summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile.todo40
-rwxr-xr-xbuilder.py4
-rwxr-xr-xcreate-kernel-namespace.sh2
m---------freebsd-org0
-rw-r--r--freebsd/contrib/libxo/libxo/libxo.c8080
-rw-r--r--freebsd/contrib/libxo/libxo/xo.h666
-rw-r--r--freebsd/contrib/libxo/libxo/xo_buf.h158
-rw-r--r--freebsd/contrib/libxo/libxo/xo_config.h254
-rw-r--r--freebsd/contrib/libxo/libxo/xo_encoder.c435
-rw-r--r--freebsd/contrib/libxo/libxo/xo_encoder.h116
-rw-r--r--freebsd/contrib/libxo/libxo/xo_humanize.h169
-rw-r--r--freebsd/contrib/libxo/libxo/xo_wcwidth.h313
-rw-r--r--freebsd/contrib/pf/pfctl/pfctl-data.h26
-rw-r--r--freebsd/contrib/pf/pfctl/pfctl_optimize-data.h13
-rw-r--r--freebsd/contrib/pf/pfctl/pfctl_radix-data.h3
-rw-r--r--freebsd/include/arpa/nameser.h2
-rw-r--r--freebsd/include/arpa/nameser_compat.h4
-rw-r--r--freebsd/include/gssapi/gssapi.h49
-rw-r--r--freebsd/include/ifaddrs.h2
-rw-r--r--freebsd/include/netdb.h23
-rw-r--r--freebsd/include/resolv.h2
-rw-r--r--freebsd/include/rpc/rpcent.h2
-rw-r--r--freebsd/include/rpc/svc.h9
-rw-r--r--freebsd/include/rpc/xdr.h16
-rw-r--r--freebsd/include/rpcsvc/nis.x66
-rw-r--r--freebsd/include/rpcsvc/nis_tags.h57
-rw-r--r--freebsd/lib/libc/db/btree/bt_open.c14
-rw-r--r--freebsd/lib/libc/db/btree/bt_put.c3
-rw-r--r--freebsd/lib/libc/db/btree/bt_split.c19
-rw-r--r--freebsd/lib/libc/db/db/db.c6
-rw-r--r--freebsd/lib/libc/db/recno/rec_open.c2
-rw-r--r--freebsd/lib/libc/db/recno/rec_put.c5
-rw-r--r--freebsd/lib/libc/gen/err.c13
-rw-r--r--freebsd/lib/libc/gen/feature_present.c1
-rw-r--r--freebsd/lib/libc/gen/getdomainname.c6
-rw-r--r--freebsd/lib/libc/gen/gethostname.c4
-rw-r--r--freebsd/lib/libc/include/libc_private.h185
-rw-r--r--freebsd/lib/libc/include/namespace.h216
-rw-r--r--freebsd/lib/libc/include/port_before.h2
-rw-r--r--freebsd/lib/libc/include/reentrant.h2
-rw-r--r--freebsd/lib/libc/include/un-namespace.h228
-rw-r--r--freebsd/lib/libc/inet/inet_addr.c1
-rw-r--r--freebsd/lib/libc/inet/inet_lnaof.c3
-rw-r--r--freebsd/lib/libc/inet/inet_makeaddr.c3
-rw-r--r--freebsd/lib/libc/inet/inet_net_ntop.c13
-rw-r--r--freebsd/lib/libc/inet/inet_neta.c5
-rw-r--r--freebsd/lib/libc/inet/inet_netof.c3
-rw-r--r--freebsd/lib/libc/inet/inet_network.c3
-rw-r--r--freebsd/lib/libc/inet/inet_ntop.c7
-rw-r--r--freebsd/lib/libc/inet/inet_pton.c1
-rw-r--r--freebsd/lib/libc/inet/nsap_addr.c1
-rw-r--r--freebsd/lib/libc/isc/ev_timers.c4
-rw-r--r--freebsd/lib/libc/isc/eventlib_p.h1
-rw-r--r--freebsd/lib/libc/nameser/ns_samedomain.c2
-rw-r--r--freebsd/lib/libc/net/base64.c30
-rw-r--r--freebsd/lib/libc/net/ether_addr.c17
-rw-r--r--freebsd/lib/libc/net/getaddrinfo.c344
-rw-r--r--freebsd/lib/libc/net/gethostbydns.c26
-rw-r--r--freebsd/lib/libc/net/gethostbyht.c4
-rw-r--r--freebsd/lib/libc/net/gethostbynis.c57
-rw-r--r--freebsd/lib/libc/net/gethostnamadr.c9
-rw-r--r--freebsd/lib/libc/net/getifaddrs.c172
-rw-r--r--freebsd/lib/libc/net/getifmaddrs.c28
-rw-r--r--freebsd/lib/libc/net/getnameinfo.c187
-rw-r--r--freebsd/lib/libc/net/getnetbydns.c2
-rw-r--r--freebsd/lib/libc/net/getnetbyht.c4
-rw-r--r--freebsd/lib/libc/net/getnetbynis.c2
-rw-r--r--freebsd/lib/libc/net/getnetnamadr.c9
-rw-r--r--freebsd/lib/libc/net/getproto.c6
-rw-r--r--freebsd/lib/libc/net/getprotoent.c5
-rw-r--r--freebsd/lib/libc/net/getservent.c27
-rw-r--r--freebsd/lib/libc/net/if_indextoname.c2
-rw-r--r--freebsd/lib/libc/net/if_nameindex.c2
-rw-r--r--freebsd/lib/libc/net/if_nametoindex.c5
-rw-r--r--freebsd/lib/libc/net/ip6opt.c38
-rw-r--r--freebsd/lib/libc/net/linkaddr.c9
-rw-r--r--freebsd/lib/libc/net/map_v4v6.c18
-rw-r--r--freebsd/lib/libc/net/name6.c33
-rw-r--r--freebsd/lib/libc/net/netdb_private.h22
-rw-r--r--freebsd/lib/libc/net/nsdispatch.c47
-rw-r--r--freebsd/lib/libc/net/nslexer.l12
-rw-r--r--freebsd/lib/libc/net/nsparser.y10
-rw-r--r--freebsd/lib/libc/net/rcmd.c78
-rw-r--r--freebsd/lib/libc/net/recv.c21
-rw-r--r--freebsd/lib/libc/net/res_config.h2
-rw-r--r--freebsd/lib/libc/net/rthdr.c32
-rw-r--r--freebsd/lib/libc/net/send.c21
-rw-r--r--freebsd/lib/libc/posix1e/mac.c2
-rw-r--r--freebsd/lib/libc/resolv/h_errno.c3
-rw-r--r--freebsd/lib/libc/resolv/herror.c3
-rw-r--r--freebsd/lib/libc/resolv/mtctxres.c11
-rw-r--r--freebsd/lib/libc/resolv/res_comp.c9
-rw-r--r--freebsd/lib/libc/resolv/res_data.c92
-rw-r--r--freebsd/lib/libc/resolv/res_debug.c19
-rw-r--r--freebsd/lib/libc/resolv/res_findzonecut.c2
-rw-r--r--freebsd/lib/libc/resolv/res_init.c65
-rw-r--r--freebsd/lib/libc/resolv/res_mkquery.c7
-rw-r--r--freebsd/lib/libc/resolv/res_mkupdate.c13
-rw-r--r--freebsd/lib/libc/resolv/res_private.h5
-rw-r--r--freebsd/lib/libc/resolv/res_query.c9
-rw-r--r--freebsd/lib/libc/resolv/res_send.c21
-rw-r--r--freebsd/lib/libc/resolv/res_state.c39
-rw-r--r--freebsd/lib/libc/rpc/auth_des.c8
-rw-r--r--freebsd/lib/libc/rpc/auth_none.c14
-rw-r--r--freebsd/lib/libc/rpc/auth_time.c45
-rw-r--r--freebsd/lib/libc/rpc/auth_unix.c34
-rw-r--r--freebsd/lib/libc/rpc/authdes_prot.c8
-rw-r--r--freebsd/lib/libc/rpc/authunix_prot.c4
-rw-r--r--freebsd/lib/libc/rpc/bindresvport.c8
-rw-r--r--freebsd/lib/libc/rpc/clnt_bcast.c77
-rw-r--r--freebsd/lib/libc/rpc/clnt_dg.c69
-rw-r--r--freebsd/lib/libc/rpc/clnt_generic.c2
-rw-r--r--freebsd/lib/libc/rpc/clnt_perror.c33
-rw-r--r--freebsd/lib/libc/rpc/clnt_raw.c36
-rw-r--r--freebsd/lib/libc/rpc/clnt_simple.c20
-rw-r--r--freebsd/lib/libc/rpc/clnt_vc.c117
-rw-r--r--freebsd/lib/libc/rpc/crypt_client.c6
-rw-r--r--freebsd/lib/libc/rpc/des_crypt.c23
-rw-r--r--freebsd/lib/libc/rpc/des_soft.c3
-rw-r--r--freebsd/lib/libc/rpc/getnetconfig.c48
-rw-r--r--freebsd/lib/libc/rpc/getnetpath.c21
-rw-r--r--freebsd/lib/libc/rpc/getpublickey.c14
-rw-r--r--freebsd/lib/libc/rpc/getrpcent.c9
-rw-r--r--freebsd/lib/libc/rpc/getrpcport.c4
-rw-r--r--freebsd/lib/libc/rpc/key_call.c48
-rw-r--r--freebsd/lib/libc/rpc/mt_misc.c7
-rw-r--r--freebsd/lib/libc/rpc/netname.c13
-rw-r--r--freebsd/lib/libc/rpc/netnamer.c46
-rw-r--r--freebsd/lib/libc/rpc/pmap_getmaps.c3
-rw-r--r--freebsd/lib/libc/rpc/pmap_getport.c7
-rw-r--r--freebsd/lib/libc/rpc/pmap_prot.c4
-rw-r--r--freebsd/lib/libc/rpc/pmap_prot2.c8
-rw-r--r--freebsd/lib/libc/rpc/pmap_rmt.c19
-rw-r--r--freebsd/lib/libc/rpc/rpc_callmsg.c12
-rw-r--r--freebsd/lib/libc/rpc/rpc_com.h4
-rw-r--r--freebsd/lib/libc/rpc/rpc_generic.c36
-rw-r--r--freebsd/lib/libc/rpc/rpc_prot.c48
-rw-r--r--freebsd/lib/libc/rpc/rpc_soc.c154
-rw-r--r--freebsd/lib/libc/rpc/rpcb_clnt.c133
-rw-r--r--freebsd/lib/libc/rpc/rpcb_prot.c46
-rw-r--r--freebsd/lib/libc/rpc/rpcb_st_xdr.c50
-rw-r--r--freebsd/lib/libc/rpc/rpcdname.c15
-rw-r--r--freebsd/lib/libc/rpc/rtime.c25
-rw-r--r--freebsd/lib/libc/rpc/svc.c111
-rw-r--r--freebsd/lib/libc/rpc/svc_auth.c19
-rw-r--r--freebsd/lib/libc/rpc/svc_auth_des.c37
-rw-r--r--freebsd/lib/libc/rpc/svc_auth_unix.c8
-rw-r--r--freebsd/lib/libc/rpc/svc_dg.c54
-rw-r--r--freebsd/lib/libc/rpc/svc_generic.c40
-rw-r--r--freebsd/lib/libc/rpc/svc_raw.c34
-rw-r--r--freebsd/lib/libc/rpc/svc_run.c4
-rw-r--r--freebsd/lib/libc/rpc/svc_simple.c34
-rw-r--r--freebsd/lib/libc/rpc/svc_vc.c106
-rw-r--r--freebsd/lib/libc/stdio/fgetln.c13
-rw-r--r--freebsd/lib/libc/stdio/local.h9
-rw-r--r--freebsd/lib/libc/stdlib/strtoimax.c2
-rw-r--r--freebsd/lib/libc/stdlib/strtonum.c14
-rw-r--r--freebsd/lib/libc/stdlib/strtoumax.c2
-rw-r--r--freebsd/lib/libc/string/strsep.c2
-rw-r--r--freebsd/lib/libc/xdr/xdr.c186
-rw-r--r--freebsd/lib/libc/xdr/xdr_array.c75
-rw-r--r--freebsd/lib/libc/xdr/xdr_float.c93
-rw-r--r--freebsd/lib/libc/xdr/xdr_mem.c104
-rw-r--r--freebsd/lib/libc/xdr/xdr_rec.c152
-rw-r--r--freebsd/lib/libc/xdr/xdr_reference.c70
-rw-r--r--freebsd/lib/libc/xdr/xdr_sizeof.c87
-rw-r--r--freebsd/lib/libc/xdr/xdr_stdio.c89
-rw-r--r--freebsd/lib/libipsec/pfkey.c2
-rw-r--r--freebsd/lib/libipsec/pfkey_dump.c3
-rw-r--r--freebsd/lib/libipsec/policy_parse.y1
-rw-r--r--freebsd/lib/libkvm/kvm.h22
-rw-r--r--freebsd/lib/libmemstat/memstat.c7
-rw-r--r--freebsd/lib/libmemstat/memstat.h1
-rw-r--r--freebsd/lib/libmemstat/memstat_internal.h1
-rw-r--r--freebsd/lib/libmemstat/memstat_uma.c4
-rw-r--r--freebsd/lib/libutil/expand_number.c30
-rw-r--r--freebsd/lib/libutil/humanize_number.c74
-rw-r--r--freebsd/lib/libutil/libutil.h17
-rw-r--r--freebsd/sbin/dhclient/bpf.c220
-rw-r--r--freebsd/sbin/dhclient/clparse.c4
-rw-r--r--freebsd/sbin/dhclient/conflex.c4
-rw-r--r--freebsd/sbin/dhclient/dhclient.c179
-rw-r--r--freebsd/sbin/dhclient/dhcpd.h20
-rw-r--r--freebsd/sbin/dhclient/options.c4
-rw-r--r--freebsd/sbin/dhclient/packet.c23
-rw-r--r--freebsd/sbin/dhclient/privsep.c5
-rw-r--r--freebsd/sbin/dhclient/privsep.h5
-rw-r--r--freebsd/sbin/ifconfig/af_inet.c75
-rw-r--r--freebsd/sbin/ifconfig/af_inet6.c170
-rw-r--r--freebsd/sbin/ifconfig/af_link.c30
-rw-r--r--freebsd/sbin/ifconfig/af_nd6.c22
-rw-r--r--freebsd/sbin/ifconfig/ifbridge.c33
-rw-r--r--freebsd/sbin/ifconfig/ifcarp.c10
-rw-r--r--freebsd/sbin/ifconfig/ifclone.c54
-rw-r--r--freebsd/sbin/ifconfig/ifconfig.c452
-rw-r--r--freebsd/sbin/ifconfig/ifconfig.h15
-rw-r--r--freebsd/sbin/ifconfig/ifgif.c25
-rw-r--r--freebsd/sbin/ifconfig/ifgre.c77
-rw-r--r--freebsd/sbin/ifconfig/ifgroup.c52
-rw-r--r--freebsd/sbin/ifconfig/ifieee80211.c26
-rw-r--r--freebsd/sbin/ifconfig/iflagg.c119
-rw-r--r--freebsd/sbin/ifconfig/ifmac.c18
-rw-r--r--freebsd/sbin/ifconfig/ifmedia.c190
-rw-r--r--freebsd/sbin/ifconfig/ifpfsync.c44
-rw-r--r--freebsd/sbin/ifconfig/ifvlan.c56
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet-data.h9
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet6-data.h13
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_link-data.h8
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_nd6-data.h4
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-data.h39
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifbridge-data.h9
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifclone-data.h7
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifconfig-data.h9
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgif-data.h6
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgre-data.h6
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgroup-data.h7
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-iflagg-data.h6
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmac-data.h6
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmedia-data.h29
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifpfsync-data.h6
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifvlan-data.h7
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-namespace.h76
-rw-r--r--freebsd/sbin/ifconfig/rtems-bsd-ifconfig-sfp-data.h15
-rw-r--r--freebsd/sbin/ifconfig/sfp.c935
-rw-r--r--freebsd/sbin/pfctl/parse.c (renamed from freebsd/contrib/pf/pfctl/parse.c)4290
-rw-r--r--freebsd/sbin/pfctl/parse.h337
-rw-r--r--freebsd/sbin/pfctl/parse.y (renamed from freebsd/contrib/pf/pfctl/parse.y)391
-rw-r--r--freebsd/sbin/pfctl/pf_print_state.c (renamed from freebsd/contrib/pf/pfctl/pf_print_state.c)70
-rw-r--r--freebsd/sbin/pfctl/pfctl.c (renamed from freebsd/contrib/pf/pfctl/pfctl.c)160
-rw-r--r--freebsd/sbin/pfctl/pfctl.h (renamed from freebsd/contrib/pf/pfctl/pfctl.h)0
-rw-r--r--freebsd/sbin/pfctl/pfctl_altq.c (renamed from freebsd/contrib/pf/pfctl/pfctl_altq.c)306
-rw-r--r--freebsd/sbin/pfctl/pfctl_optimize.c (renamed from freebsd/contrib/pf/pfctl/pfctl_optimize.c)60
-rw-r--r--freebsd/sbin/pfctl/pfctl_osfp.c (renamed from freebsd/contrib/pf/pfctl/pfctl_osfp.c)30
-rw-r--r--freebsd/sbin/pfctl/pfctl_parser.c (renamed from freebsd/contrib/pf/pfctl/pfctl_parser.c)110
-rw-r--r--freebsd/sbin/pfctl/pfctl_parser.h (renamed from freebsd/contrib/pf/pfctl/pfctl_parser.h)17
-rw-r--r--freebsd/sbin/pfctl/pfctl_qstats.c (renamed from freebsd/contrib/pf/pfctl/pfctl_qstats.c)99
-rw-r--r--freebsd/sbin/pfctl/pfctl_radix.c (renamed from freebsd/contrib/pf/pfctl/pfctl_radix.c)25
-rw-r--r--freebsd/sbin/pfctl/pfctl_table.c (renamed from freebsd/contrib/pf/pfctl/pfctl_table.c)16
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-data.h25
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-namespace.h262
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-parse-data.h (renamed from freebsd/contrib/pf/pfctl/parse-data.h)64
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pf_print_state-data.h4
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl-data.h22
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_altq-data.h (renamed from freebsd/contrib/pf/pfctl/pfctl_altq-data.h)9
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_optimize-data.h11
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_osfp-data.h (renamed from freebsd/contrib/pf/pfctl/pfctl_osfp-data.h)8
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_parser-data.h (renamed from freebsd/contrib/pf/pfctl/pfctl_parser-data.h)4
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_qstats-data.h (renamed from freebsd/contrib/pf/pfctl/pfctl_qstats-data.h)4
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_radix-data.h4
-rw-r--r--freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_table-data.h6
-rw-r--r--freebsd/sbin/ping/ping.c396
-rw-r--r--freebsd/sbin/ping/rtems-bsd-ping-data.h3
-rw-r--r--freebsd/sbin/ping/rtems-bsd-ping-namespace.h2
-rw-r--r--freebsd/sbin/ping/rtems-bsd-ping-ping-data.h43
-rw-r--r--freebsd/sbin/ping6/ping6.c715
-rw-r--r--freebsd/sbin/ping6/rtems-bsd-ping6-data.h4
-rw-r--r--freebsd/sbin/ping6/rtems-bsd-ping6-namespace.h3
-rw-r--r--freebsd/sbin/ping6/rtems-bsd-ping6-ping6-data.h40
-rw-r--r--freebsd/sbin/route/keywords1
-rw-r--r--freebsd/sbin/route/route.c1452
-rw-r--r--freebsd/sbin/route/rtems-bsd-route-data.h3
-rw-r--r--freebsd/sbin/route/rtems-bsd-route-namespace.h2
-rw-r--r--freebsd/sbin/route/rtems-bsd-route-route-data.h30
-rw-r--r--freebsd/sbin/sysctl/rtems-bsd-sysctl-data.h3
-rw-r--r--freebsd/sbin/sysctl/rtems-bsd-sysctl-namespace.h2
-rw-r--r--freebsd/sbin/sysctl/rtems-bsd-sysctl-sysctl-data.h23
-rw-r--r--freebsd/sbin/sysctl/sysctl.c760
-rw-r--r--freebsd/sys/arm/include/machine/cpufunc.h407
-rw-r--r--freebsd/sys/arm/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/arm/xilinx/zy7_slcr.c296
-rw-r--r--freebsd/sys/arm/xilinx/zy7_slcr.h36
-rw-r--r--freebsd/sys/avr/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/bfin/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/bsm/audit.h1
-rw-r--r--freebsd/sys/bsm/audit_kevents.h18
-rw-r--r--freebsd/sys/cam/ata/ata_all.h20
-rw-r--r--freebsd/sys/cam/cam.c125
-rw-r--r--freebsd/sys/cam/cam.h267
-rw-r--r--freebsd/sys/cam/cam_ccb.h194
-rw-r--r--freebsd/sys/cam/cam_periph.h51
-rw-r--r--freebsd/sys/cam/cam_sim.h14
-rw-r--r--freebsd/sys/cam/cam_xpt.h17
-rw-r--r--freebsd/sys/cam/cam_xpt_sim.h4
-rw-r--r--freebsd/sys/cam/nvme/nvme_all.h48
-rw-r--r--freebsd/sys/cam/scsi/scsi_all.c2726
-rw-r--r--freebsd/sys/cam/scsi/scsi_all.h1761
-rw-r--r--freebsd/sys/cam/scsi/scsi_da.h236
-rw-r--r--freebsd/sys/contrib/altq/altq/altqconf.h29
-rw-r--r--freebsd/sys/crypto/rijndael/rijndael-api-fst.c19
-rw-r--r--freebsd/sys/crypto/rijndael/rijndael-api-fst.h18
-rw-r--r--freebsd/sys/crypto/sha1.c6
-rw-r--r--freebsd/sys/crypto/sha1.h14
-rw-r--r--freebsd/sys/crypto/sha2/sha256.h90
-rw-r--r--freebsd/sys/crypto/sha2/sha256c.c318
-rw-r--r--freebsd/sys/crypto/sha2/sha384.h87
-rw-r--r--freebsd/sys/crypto/sha2/sha512.h90
-rw-r--r--freebsd/sys/crypto/sha2/sha512c.c505
-rw-r--r--freebsd/sys/crypto/sha2/sha512t.h125
-rw-r--r--freebsd/sys/crypto/siphash/siphash.c244
-rw-r--r--freebsd/sys/crypto/siphash/siphash.h83
-rw-r--r--freebsd/sys/crypto/skein/skein.c860
-rw-r--r--freebsd/sys/crypto/skein/skein.h333
-rw-r--r--freebsd/sys/crypto/skein/skein_block.c708
-rw-r--r--freebsd/sys/crypto/skein/skein_debug.h48
-rw-r--r--freebsd/sys/crypto/skein/skein_freebsd.h79
-rw-r--r--freebsd/sys/crypto/skein/skein_iv.h200
-rw-r--r--freebsd/sys/crypto/skein/skein_port.h158
-rw-r--r--freebsd/sys/dev/bce/if_bce.c274
-rw-r--r--freebsd/sys/dev/bce/if_bcereg.h51
-rw-r--r--freebsd/sys/dev/bfe/if_bfe.c34
-rw-r--r--freebsd/sys/dev/bge/if_bge.c470
-rw-r--r--freebsd/sys/dev/bge/if_bgereg.h17
-rw-r--r--freebsd/sys/dev/cadence/if_cgem.c293
-rw-r--r--freebsd/sys/dev/dc/dcphy.c17
-rw-r--r--freebsd/sys/dev/dc/if_dc.c46
-rw-r--r--freebsd/sys/dev/dc/pnphy.c7
-rw-r--r--freebsd/sys/dev/dwc/if_dwc.c736
-rw-r--r--freebsd/sys/dev/dwc/if_dwc.h35
-rw-r--r--freebsd/sys/dev/dwc/if_dwcvar.h99
-rw-r--r--freebsd/sys/dev/e1000/e1000_80003es2lan.c35
-rw-r--r--freebsd/sys/dev/e1000/e1000_80003es2lan.h2
-rw-r--r--freebsd/sys/dev/e1000/e1000_82540.c12
-rw-r--r--freebsd/sys/dev/e1000/e1000_82541.c299
-rw-r--r--freebsd/sys/dev/e1000/e1000_82541.h76
-rw-r--r--freebsd/sys/dev/e1000/e1000_82542.c44
-rw-r--r--freebsd/sys/dev/e1000/e1000_82543.c153
-rw-r--r--freebsd/sys/dev/e1000/e1000_82543.h28
-rw-r--r--freebsd/sys/dev/e1000/e1000_82571.c4
-rw-r--r--freebsd/sys/dev/e1000/e1000_82571.h2
-rw-r--r--freebsd/sys/dev/e1000/e1000_82575.c177
-rw-r--r--freebsd/sys/dev/e1000/e1000_82575.h7
-rw-r--r--freebsd/sys/dev/e1000/e1000_api.c14
-rw-r--r--freebsd/sys/dev/e1000/e1000_api.h18
-rw-r--r--freebsd/sys/dev/e1000/e1000_defines.h16
-rw-r--r--freebsd/sys/dev/e1000/e1000_hw.h20
-rw-r--r--freebsd/sys/dev/e1000/e1000_i210.c32
-rw-r--r--freebsd/sys/dev/e1000/e1000_i210.h2
-rw-r--r--freebsd/sys/dev/e1000/e1000_ich8lan.c961
-rw-r--r--freebsd/sys/dev/e1000/e1000_ich8lan.h33
-rw-r--r--freebsd/sys/dev/e1000/e1000_mac.c2
-rw-r--r--freebsd/sys/dev/e1000/e1000_mac.h4
-rw-r--r--freebsd/sys/dev/e1000/e1000_manage.c3
-rw-r--r--freebsd/sys/dev/e1000/e1000_manage.h2
-rw-r--r--freebsd/sys/dev/e1000/e1000_mbx.c42
-rw-r--r--freebsd/sys/dev/e1000/e1000_mbx.h2
-rw-r--r--freebsd/sys/dev/e1000/e1000_nvm.c2
-rw-r--r--freebsd/sys/dev/e1000/e1000_nvm.h4
-rw-r--r--freebsd/sys/dev/e1000/e1000_osdep.c2
-rw-r--r--freebsd/sys/dev/e1000/e1000_osdep.h29
-rw-r--r--freebsd/sys/dev/e1000/e1000_phy.c37
-rw-r--r--freebsd/sys/dev/e1000/e1000_phy.h2
-rw-r--r--freebsd/sys/dev/e1000/e1000_regs.h17
-rw-r--r--freebsd/sys/dev/e1000/e1000_vf.c2
-rw-r--r--freebsd/sys/dev/e1000/e1000_vf.h2
-rw-r--r--freebsd/sys/dev/e1000/if_em.c1759
-rw-r--r--freebsd/sys/dev/e1000/if_em.h94
-rw-r--r--freebsd/sys/dev/e1000/if_igb.c819
-rw-r--r--freebsd/sys/dev/e1000/if_igb.h101
-rw-r--r--freebsd/sys/dev/e1000/if_lem.c653
-rw-r--r--freebsd/sys/dev/e1000/if_lem.h48
-rw-r--r--freebsd/sys/dev/fxp/if_fxp.c456
-rw-r--r--freebsd/sys/dev/fxp/if_fxpreg.h2
-rw-r--r--freebsd/sys/dev/fxp/if_fxpvar.h5
-rw-r--r--freebsd/sys/dev/fxp/rcvbundl.h2
-rw-r--r--freebsd/sys/dev/led/led.c24
-rw-r--r--freebsd/sys/dev/mii/brgphy.c95
-rw-r--r--freebsd/sys/dev/mii/e1000phy.c26
-rw-r--r--freebsd/sys/dev/mii/icsphy.c6
-rw-r--r--freebsd/sys/dev/mii/micphy.c148
-rw-r--r--freebsd/sys/dev/mii/mii.c57
-rw-r--r--freebsd/sys/dev/mii/mii.h43
-rw-r--r--freebsd/sys/dev/mii/mii_physubr.c172
-rw-r--r--freebsd/sys/dev/mii/miivar.h46
-rw-r--r--freebsd/sys/dev/mii/rgephy.c54
-rw-r--r--freebsd/sys/dev/mii/rgephyreg.h16
-rw-r--r--freebsd/sys/dev/mmc/mmcsd.c2
-rw-r--r--freebsd/sys/dev/nvme/nvme.h957
-rw-r--r--freebsd/sys/dev/ofw/openfirm.h40
-rw-r--r--freebsd/sys/dev/pci/pci.c1806
-rw-r--r--freebsd/sys/dev/pci/pci_pci.c1353
-rw-r--r--freebsd/sys/dev/pci/pci_private.h62
-rw-r--r--freebsd/sys/dev/pci/pci_user.c36
-rw-r--r--freebsd/sys/dev/pci/pcib_private.h85
-rw-r--r--freebsd/sys/dev/pci/pcireg.h256
-rw-r--r--freebsd/sys/dev/pci/pcivar.h156
-rw-r--r--freebsd/sys/dev/random/harvest.c137
-rw-r--r--freebsd/sys/dev/random/randomdev_soft.h94
-rw-r--r--freebsd/sys/dev/re/if_re.c114
-rw-r--r--freebsd/sys/dev/rl/if_rlreg.h (renamed from freebsd/sys/pci/if_rlreg.h)6
-rw-r--r--freebsd/sys/dev/smc/if_smc.c88
-rw-r--r--freebsd/sys/dev/tsec/if_tsec.c62
-rw-r--r--freebsd/sys/dev/tsec/if_tsec.h17
-rw-r--r--freebsd/sys/dev/tsec/if_tsecreg.h13
-rw-r--r--freebsd/sys/fs/devfs/devfs_int.h2
-rw-r--r--freebsd/sys/h8300/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/i386/include/machine/cpufunc.h97
-rw-r--r--freebsd/sys/i386/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/i386/include/machine/intr_machdep.h35
-rw-r--r--freebsd/sys/i386/include/machine/md_var.h56
-rw-r--r--freebsd/sys/i386/include/machine/specialreg.h642
-rw-r--r--freebsd/sys/kern/init_main.c129
-rw-r--r--freebsd/sys/kern/kern_condvar.c113
-rw-r--r--freebsd/sys/kern/kern_conf.c146
-rw-r--r--freebsd/sys/kern/kern_event.c831
-rw-r--r--freebsd/sys/kern/kern_hhook.c7
-rw-r--r--freebsd/sys/kern/kern_intr.c188
-rw-r--r--freebsd/sys/kern/kern_linker.c118
-rw-r--r--freebsd/sys/kern/kern_mbuf.c609
-rw-r--r--freebsd/sys/kern/kern_mib.c136
-rw-r--r--freebsd/sys/kern/kern_module.c12
-rw-r--r--freebsd/sys/kern/kern_mtxpool.c34
-rw-r--r--freebsd/sys/kern/kern_osd.c266
-rw-r--r--freebsd/sys/kern/kern_synch.c149
-rw-r--r--freebsd/sys/kern/kern_sysctl.c682
-rw-r--r--freebsd/sys/kern/kern_time.c164
-rw-r--r--freebsd/sys/kern/kern_timeout.c1336
-rw-r--r--freebsd/sys/kern/kern_uuid.c430
-rw-r--r--freebsd/sys/kern/subr_bus.c907
-rw-r--r--freebsd/sys/kern/subr_counter.c123
-rw-r--r--freebsd/sys/kern/subr_hash.c48
-rw-r--r--freebsd/sys/kern/subr_hints.c122
-rw-r--r--freebsd/sys/kern/subr_kobj.c4
-rw-r--r--freebsd/sys/kern/subr_lock.c67
-rw-r--r--freebsd/sys/kern/subr_module.c5
-rw-r--r--freebsd/sys/kern/subr_pcpu.c425
-rw-r--r--freebsd/sys/kern/subr_prf.c165
-rw-r--r--freebsd/sys/kern/subr_rman.c134
-rw-r--r--freebsd/sys/kern/subr_sbuf.c131
-rw-r--r--freebsd/sys/kern/subr_sleepqueue.c328
-rw-r--r--freebsd/sys/kern/subr_taskqueue.c299
-rw-r--r--freebsd/sys/kern/subr_uio.c364
-rw-r--r--freebsd/sys/kern/subr_unit.c214
-rw-r--r--freebsd/sys/kern/sys_generic.c494
-rwxr-xr-xfreebsd/sys/kern/sys_pipe.c405
-rw-r--r--freebsd/sys/kern/sys_socket.c581
-rw-r--r--freebsd/sys/kern/uipc_accf.c5
-rw-r--r--freebsd/sys/kern/uipc_domain.c69
-rw-r--r--freebsd/sys/kern/uipc_mbuf.c728
-rw-r--r--freebsd/sys/kern/uipc_mbuf2.c16
-rw-r--r--freebsd/sys/kern/uipc_mbufhash.c176
-rw-r--r--freebsd/sys/kern/uipc_sockbuf.c329
-rw-r--r--freebsd/sys/kern/uipc_socket.c676
-rw-r--r--freebsd/sys/kern/uipc_syscalls.c1717
-rw-r--r--freebsd/sys/kern/uipc_usrreq.c419
-rw-r--r--freebsd/sys/libkern/arc4random.c158
-rw-r--r--freebsd/sys/libkern/jenkins_hash.c465
-rw-r--r--freebsd/sys/libkern/murmur3_32.c134
-rw-r--r--freebsd/sys/libkern/random.c2
-rw-r--r--freebsd/sys/lm32/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/m32c/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/m32r/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/m68k/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/mips/include/machine/cpufunc.h137
-rw-r--r--freebsd/sys/mips/include/machine/cpuregs.h586
-rw-r--r--freebsd/sys/mips/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/net/altq/altq.h (renamed from freebsd/sys/contrib/altq/altq/altq.h)14
-rw-r--r--freebsd/sys/net/altq/altq_cbq.c (renamed from freebsd/sys/contrib/altq/altq/altq_cbq.c)38
-rw-r--r--freebsd/sys/net/altq/altq_cbq.h (renamed from freebsd/sys/contrib/altq/altq/altq_cbq.h)20
-rw-r--r--freebsd/sys/net/altq/altq_cdnr.c (renamed from freebsd/sys/contrib/altq/altq/altq_cdnr.c)23
-rw-r--r--freebsd/sys/net/altq/altq_cdnr.h (renamed from freebsd/sys/contrib/altq/altq/altq_cdnr.h)9
-rw-r--r--freebsd/sys/net/altq/altq_classq.h (renamed from freebsd/sys/contrib/altq/altq/altq_classq.h)13
-rw-r--r--freebsd/sys/net/altq/altq_codel.c479
-rw-r--r--freebsd/sys/net/altq/altq_codel.h129
-rw-r--r--freebsd/sys/net/altq/altq_fairq.c911
-rw-r--r--freebsd/sys/net/altq/altq_fairq.h145
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.c (renamed from freebsd/sys/contrib/altq/altq/altq_hfsc.c)189
-rw-r--r--freebsd/sys/net/altq/altq_hfsc.h (renamed from freebsd/sys/contrib/altq/altq/altq_hfsc.h)45
-rw-r--r--freebsd/sys/net/altq/altq_priq.c (renamed from freebsd/sys/contrib/altq/altq/altq_priq.c)99
-rw-r--r--freebsd/sys/net/altq/altq_priq.h (renamed from freebsd/sys/contrib/altq/altq/altq_priq.h)26
-rw-r--r--freebsd/sys/net/altq/altq_red.c (renamed from freebsd/sys/contrib/altq/altq/altq_red.c)60
-rw-r--r--freebsd/sys/net/altq/altq_red.h (renamed from freebsd/sys/contrib/altq/altq/altq_red.h)9
-rw-r--r--freebsd/sys/net/altq/altq_rio.c (renamed from freebsd/sys/contrib/altq/altq/altq_rio.c)37
-rw-r--r--freebsd/sys/net/altq/altq_rio.h (renamed from freebsd/sys/contrib/altq/altq/altq_rio.h)9
-rw-r--r--freebsd/sys/net/altq/altq_rmclass.c (renamed from freebsd/sys/contrib/altq/altq/altq_rmclass.c)88
-rw-r--r--freebsd/sys/net/altq/altq_rmclass.h (renamed from freebsd/sys/contrib/altq/altq/altq_rmclass.h)19
-rw-r--r--freebsd/sys/net/altq/altq_rmclass_debug.h (renamed from freebsd/sys/contrib/altq/altq/altq_rmclass_debug.h)7
-rw-r--r--freebsd/sys/net/altq/altq_subr.c (renamed from freebsd/sys/contrib/altq/altq/altq_subr.c)139
-rw-r--r--freebsd/sys/net/altq/altq_var.h (renamed from freebsd/sys/contrib/altq/altq/altq_var.h)50
-rw-r--r--freebsd/sys/net/altq/if_altq.h (renamed from freebsd/sys/contrib/altq/altq/if_altq.h)17
-rw-r--r--freebsd/sys/net/bpf.c390
-rw-r--r--freebsd/sys/net/bpf.h246
-rw-r--r--freebsd/sys/net/bpf_buffer.c5
-rw-r--r--freebsd/sys/net/bpf_filter.c11
-rw-r--r--freebsd/sys/net/bridgestp.c8
-rw-r--r--freebsd/sys/net/ethernet.h27
-rw-r--r--freebsd/sys/net/flowtable.h119
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c201
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.h16
-rw-r--r--freebsd/sys/net/if.c1411
-rw-r--r--freebsd/sys/net/if.h117
-rw-r--r--freebsd/sys/net/if_arc.h2
-rw-r--r--freebsd/sys/net/if_arcsubr.c139
-rw-r--r--freebsd/sys/net/if_arp.h44
-rw-r--r--freebsd/sys/net/if_atm.h4
-rw-r--r--freebsd/sys/net/if_atmsubr.c15
-rw-r--r--freebsd/sys/net/if_bridge.c476
-rw-r--r--freebsd/sys/net/if_clone.c389
-rw-r--r--freebsd/sys/net/if_clone.h95
-rw-r--r--freebsd/sys/net/if_dead.c10
-rw-r--r--freebsd/sys/net/if_disc.c75
-rw-r--r--freebsd/sys/net/if_dl.h8
-rw-r--r--freebsd/sys/net/if_edsc.c69
-rw-r--r--freebsd/sys/net/if_ef.c610
-rw-r--r--freebsd/sys/net/if_enc.c458
-rw-r--r--freebsd/sys/net/if_enc.h9
-rw-r--r--freebsd/sys/net/if_epair.c125
-rw-r--r--freebsd/sys/net/if_ethersubr.c894
-rw-r--r--freebsd/sys/net/if_faith.c353
-rw-r--r--freebsd/sys/net/if_fddisubr.c213
-rw-r--r--freebsd/sys/net/if_fwsubr.c44
-rw-r--r--freebsd/sys/net/if_gif.c1111
-rw-r--r--freebsd/sys/net/if_gif.h82
-rw-r--r--freebsd/sys/net/if_gre.c1541
-rw-r--r--freebsd/sys/net/if_gre.h215
-rw-r--r--freebsd/sys/net/if_iso88025subr.c207
-rw-r--r--freebsd/sys/net/if_lagg.c1253
-rw-r--r--freebsd/sys/net/if_lagg.h127
-rw-r--r--freebsd/sys/net/if_llatbl.c745
-rw-r--r--freebsd/sys/net/if_llatbl.h147
-rw-r--r--freebsd/sys/net/if_loop.c96
-rw-r--r--freebsd/sys/net/if_media.c105
-rw-r--r--freebsd/sys/net/if_media.h123
-rw-r--r--freebsd/sys/net/if_mib.c37
-rw-r--r--freebsd/sys/net/if_pflog.h66
-rw-r--r--freebsd/sys/net/if_pfsync.h265
-rw-r--r--freebsd/sys/net/if_sppp.h2
-rw-r--r--freebsd/sys/net/if_spppfr.c49
-rw-r--r--freebsd/sys/net/if_spppsubr.c186
-rw-r--r--freebsd/sys/net/if_stf.c279
-rw-r--r--freebsd/sys/net/if_tap.c103
-rw-r--r--freebsd/sys/net/if_tun.c108
-rw-r--r--freebsd/sys/net/if_tun.h4
-rw-r--r--freebsd/sys/net/if_types.h434
-rw-r--r--freebsd/sys/net/if_var.h925
-rw-r--r--freebsd/sys/net/if_vlan.c401
-rw-r--r--freebsd/sys/net/if_vlan_var.h61
-rw-r--r--freebsd/sys/net/ifq.h484
-rw-r--r--freebsd/sys/net/iso88025.h8
-rw-r--r--freebsd/sys/net/netisr.c276
-rw-r--r--freebsd/sys/net/netisr.h20
-rw-r--r--freebsd/sys/net/pfil.c175
-rw-r--r--freebsd/sys/net/pfil.h115
-rw-r--r--freebsd/sys/net/pfkeyv2.h45
-rw-r--r--freebsd/sys/net/pfvar.h1757
-rw-r--r--freebsd/sys/net/ppp_defs.h8
-rw-r--r--freebsd/sys/net/radix.c225
-rw-r--r--freebsd/sys/net/radix.h90
-rw-r--r--freebsd/sys/net/radix_mpath.c92
-rw-r--r--freebsd/sys/net/radix_mpath.h10
-rw-r--r--freebsd/sys/net/raw_cb.c2
-rw-r--r--freebsd/sys/net/raw_usrreq.c4
-rw-r--r--freebsd/sys/net/route.c1519
-rw-r--r--freebsd/sys/net/route.h234
-rw-r--r--freebsd/sys/net/route_var.h76
-rw-r--r--freebsd/sys/net/rss_config.h138
-rw-r--r--freebsd/sys/net/rtsock.c912
-rw-r--r--freebsd/sys/net/sff8436.h213
-rw-r--r--freebsd/sys/net/sff8472.h508
-rw-r--r--freebsd/sys/net/vnet.h126
-rw-r--r--freebsd/sys/netgraph/ng_socket.h69
-rw-r--r--freebsd/sys/netinet/accf_dns.c6
-rw-r--r--freebsd/sys/netinet/accf_http.c11
-rw-r--r--freebsd/sys/netinet/cc/cc.c15
-rw-r--r--freebsd/sys/netinet/cc/cc.h (renamed from freebsd/sys/netinet/cc.h)21
-rw-r--r--freebsd/sys/netinet/cc/cc_newreno.c18
-rw-r--r--freebsd/sys/netinet/icmp6.h91
-rw-r--r--freebsd/sys/netinet/icmp_var.h21
-rw-r--r--freebsd/sys/netinet/if_atm.c6
-rw-r--r--freebsd/sys/netinet/if_atm.h2
-rw-r--r--freebsd/sys/netinet/if_ether.c1122
-rw-r--r--freebsd/sys/netinet/if_ether.h32
-rw-r--r--freebsd/sys/netinet/igmp.c462
-rw-r--r--freebsd/sys/netinet/igmp_var.h88
-rw-r--r--freebsd/sys/netinet/in.c1782
-rw-r--r--freebsd/sys/netinet/in.h211
-rw-r--r--freebsd/sys/netinet/in_fib.c235
-rw-r--r--freebsd/sys/netinet/in_fib.h61
-rw-r--r--freebsd/sys/netinet/in_gif.c421
-rw-r--r--freebsd/sys/netinet/in_kdtrace.h72
-rw-r--r--freebsd/sys/netinet/in_mcast.c149
-rw-r--r--freebsd/sys/netinet/in_pcb.c386
-rw-r--r--freebsd/sys/netinet/in_pcb.h137
-rw-r--r--freebsd/sys/netinet/in_proto.c37
-rw-r--r--freebsd/sys/netinet/in_rmx.c377
-rw-r--r--freebsd/sys/netinet/in_rss.h57
-rw-r--r--freebsd/sys/netinet/in_systm.h16
-rw-r--r--freebsd/sys/netinet/in_var.h155
-rw-r--r--freebsd/sys/netinet/ip.h25
-rw-r--r--freebsd/sys/netinet/ip6.h6
-rw-r--r--freebsd/sys/netinet/ip_carp.c2681
-rw-r--r--freebsd/sys/netinet/ip_carp.h62
-rw-r--r--freebsd/sys/netinet/ip_divert.c59
-rw-r--r--freebsd/sys/netinet/ip_dummynet.h32
-rw-r--r--freebsd/sys/netinet/ip_ecn.h4
-rw-r--r--freebsd/sys/netinet/ip_encap.c68
-rw-r--r--freebsd/sys/netinet/ip_encap.h2
-rw-r--r--freebsd/sys/netinet/ip_fastfwd.c181
-rw-r--r--freebsd/sys/netinet/ip_fw.h469
-rw-r--r--freebsd/sys/netinet/ip_gre.c354
-rw-r--r--freebsd/sys/netinet/ip_icmp.c248
-rw-r--r--freebsd/sys/netinet/ip_icmp.h12
-rw-r--r--freebsd/sys/netinet/ip_id.c260
-rw-r--r--freebsd/sys/netinet/ip_input.c1046
-rw-r--r--freebsd/sys/netinet/ip_ipsec.h4
-rw-r--r--freebsd/sys/netinet/ip_mroute.c136
-rw-r--r--freebsd/sys/netinet/ip_mroute.h29
-rw-r--r--freebsd/sys/netinet/ip_options.c116
-rw-r--r--freebsd/sys/netinet/ip_options.h5
-rw-r--r--freebsd/sys/netinet/ip_output.c614
-rw-r--r--freebsd/sys/netinet/ip_reass.c660
-rw-r--r--freebsd/sys/netinet/ip_var.h116
-rw-r--r--freebsd/sys/netinet/libalias/alias.c32
-rw-r--r--freebsd/sys/netinet/libalias/alias_cuseeme.c36
-rw-r--r--freebsd/sys/netinet/libalias/alias_db.c21
-rw-r--r--freebsd/sys/netinet/libalias/alias_dummy.c42
-rw-r--r--freebsd/sys/netinet/libalias/alias_irc.c24
-rw-r--r--freebsd/sys/netinet/libalias/alias_local.h2
-rw-r--r--freebsd/sys/netinet/libalias/alias_mod.c192
-rw-r--r--freebsd/sys/netinet/libalias/alias_mod.h138
-rw-r--r--freebsd/sys/netinet/libalias/alias_nbt.c54
-rw-r--r--freebsd/sys/netinet/libalias/alias_pptp.c60
-rw-r--r--freebsd/sys/netinet/libalias/alias_sctp.h1
-rw-r--r--freebsd/sys/netinet/libalias/alias_skinny.c20
-rw-r--r--freebsd/sys/netinet/libalias/alias_smedia.c22
-rw-r--r--freebsd/sys/netinet/pim_var.h35
-rw-r--r--freebsd/sys/netinet/raw_ip.c106
-rw-r--r--freebsd/sys/netinet/sctp.h36
-rw-r--r--freebsd/sys/netinet/sctp_asconf.c221
-rw-r--r--freebsd/sys/netinet/sctp_auth.c70
-rw-r--r--freebsd/sys/netinet/sctp_auth.h3
-rw-r--r--freebsd/sys/netinet/sctp_bsd_addr.c21
-rw-r--r--freebsd/sys/netinet/sctp_cc_functions.c145
-rw-r--r--freebsd/sys/netinet/sctp_constants.h83
-rw-r--r--freebsd/sys/netinet/sctp_dtrace_declare.h1
-rw-r--r--freebsd/sys/netinet/sctp_dtrace_define.h177
-rw-r--r--freebsd/sys/netinet/sctp_header.h86
-rw-r--r--freebsd/sys/netinet/sctp_indata.c3483
-rw-r--r--freebsd/sys/netinet/sctp_indata.h26
-rw-r--r--freebsd/sys/netinet/sctp_input.c956
-rw-r--r--freebsd/sys/netinet/sctp_input.h4
-rw-r--r--freebsd/sys/netinet/sctp_lock_bsd.h2
-rw-r--r--freebsd/sys/netinet/sctp_os_bsd.h84
-rw-r--r--freebsd/sys/netinet/sctp_output.c2326
-rw-r--r--freebsd/sys/netinet/sctp_output.h22
-rw-r--r--freebsd/sys/netinet/sctp_pcb.c653
-rw-r--r--freebsd/sys/netinet/sctp_pcb.h33
-rw-r--r--freebsd/sys/netinet/sctp_peeloff.c9
-rw-r--r--freebsd/sys/netinet/sctp_structs.h144
-rw-r--r--freebsd/sys/netinet/sctp_sysctl.c692
-rw-r--r--freebsd/sys/netinet/sctp_sysctl.h90
-rw-r--r--freebsd/sys/netinet/sctp_timer.c72
-rw-r--r--freebsd/sys/netinet/sctp_timer.h4
-rw-r--r--freebsd/sys/netinet/sctp_uio.h76
-rw-r--r--freebsd/sys/netinet/sctp_usrreq.c2041
-rw-r--r--freebsd/sys/netinet/sctp_var.h93
-rw-r--r--freebsd/sys/netinet/sctputil.c1339
-rw-r--r--freebsd/sys/netinet/sctputil.h59
-rw-r--r--freebsd/sys/netinet/tcp.h16
-rw-r--r--freebsd/sys/netinet/tcp_debug.c3
-rw-r--r--freebsd/sys/netinet/tcp_hostcache.c81
-rw-r--r--freebsd/sys/netinet/tcp_hostcache.h2
-rw-r--r--freebsd/sys/netinet/tcp_input.c1134
-rw-r--r--freebsd/sys/netinet/tcp_lro.c458
-rw-r--r--freebsd/sys/netinet/tcp_lro.h45
-rw-r--r--freebsd/sys/netinet/tcp_offload.c3
-rw-r--r--freebsd/sys/netinet/tcp_output.c445
-rw-r--r--freebsd/sys/netinet/tcp_reass.c80
-rw-r--r--freebsd/sys/netinet/tcp_sack.c39
-rw-r--r--freebsd/sys/netinet/tcp_subr.c1387
-rw-r--r--freebsd/sys/netinet/tcp_syncache.c995
-rw-r--r--freebsd/sys/netinet/tcp_syncache.h47
-rw-r--r--freebsd/sys/netinet/tcp_timer.c583
-rw-r--r--freebsd/sys/netinet/tcp_timer.h39
-rw-r--r--freebsd/sys/netinet/tcp_timewait.c225
-rw-r--r--freebsd/sys/netinet/tcp_usrreq.c568
-rw-r--r--freebsd/sys/netinet/tcp_var.h490
-rw-r--r--freebsd/sys/netinet/udp_usrreq.c584
-rw-r--r--freebsd/sys/netinet/udp_var.h109
-rw-r--r--freebsd/sys/netinet/udplite.h (renamed from freebsd/sys/libkern/fls.c)34
-rw-r--r--freebsd/sys/netinet6/dest6.c1
-rw-r--r--freebsd/sys/netinet6/frag6.c109
-rw-r--r--freebsd/sys/netinet6/icmp6.c336
-rw-r--r--freebsd/sys/netinet6/in6.c1836
-rw-r--r--freebsd/sys/netinet6/in6.h70
-rw-r--r--freebsd/sys/netinet6/in6_cksum.c28
-rw-r--r--freebsd/sys/netinet6/in6_fib.c278
-rw-r--r--freebsd/sys/netinet6/in6_fib.h61
-rw-r--r--freebsd/sys/netinet6/in6_gif.c387
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.c205
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.h2
-rw-r--r--freebsd/sys/netinet6/in6_mcast.c74
-rw-r--r--freebsd/sys/netinet6/in6_pcb.c191
-rw-r--r--freebsd/sys/netinet6/in6_pcb.h5
-rw-r--r--freebsd/sys/netinet6/in6_proto.c343
-rw-r--r--freebsd/sys/netinet6/in6_rmx.c126
-rw-r--r--freebsd/sys/netinet6/in6_rss.h58
-rw-r--r--freebsd/sys/netinet6/in6_src.c320
-rw-r--r--freebsd/sys/netinet6/in6_var.h217
-rw-r--r--freebsd/sys/netinet6/ip6_forward.c216
-rw-r--r--freebsd/sys/netinet6/ip6_id.c11
-rw-r--r--freebsd/sys/netinet6/ip6_input.c712
-rw-r--r--freebsd/sys/netinet6/ip6_ipsec.h3
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.c75
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.h40
-rw-r--r--freebsd/sys/netinet6/ip6_output.c783
-rw-r--r--freebsd/sys/netinet6/ip6_var.h159
-rw-r--r--freebsd/sys/netinet6/ip6protosw.h35
-rw-r--r--freebsd/sys/netinet6/ip_fw_nat64.h154
-rw-r--r--freebsd/sys/netinet6/ip_fw_nptv6.h51
-rw-r--r--freebsd/sys/netinet6/mld6.c336
-rw-r--r--freebsd/sys/netinet6/mld6_var.h65
-rw-r--r--freebsd/sys/netinet6/nd6.c1911
-rw-r--r--freebsd/sys/netinet6/nd6.h96
-rw-r--r--freebsd/sys/netinet6/nd6_nbr.c787
-rw-r--r--freebsd/sys/netinet6/nd6_rtr.c493
-rw-r--r--freebsd/sys/netinet6/pim6_var.h21
-rw-r--r--freebsd/sys/netinet6/raw_ip6.c66
-rw-r--r--freebsd/sys/netinet6/raw_ip6.h24
-rw-r--r--freebsd/sys/netinet6/route6.c1
-rw-r--r--freebsd/sys/netinet6/scope6.c278
-rw-r--r--freebsd/sys/netinet6/scope6_var.h13
-rw-r--r--freebsd/sys/netinet6/sctp6_usrreq.c458
-rw-r--r--freebsd/sys/netinet6/sctp6_var.h7
-rw-r--r--freebsd/sys/netinet6/udp6_usrreq.c390
-rw-r--r--freebsd/sys/netinet6/udp6_var.h1
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_aqm.h167
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_aqm_codel.h222
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_aqm_pie.h153
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_heap.c554
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_heap.h4
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched.h14
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_fifo.c122
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h167
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h187
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_prio.c231
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_qfq.c866
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_rr.c309
-rw-r--r--freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c375
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dn_glue.c848
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dn_io.c852
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dn_private.h62
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_dummynet.c2309
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw2.c553
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_bpf.c211
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c1822
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_eaction.c383
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_iface.c541
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_log.c120
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_nat.c710
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_pfil.c210
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_private.h545
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c3904
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_table.c3595
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_table.h234
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c4112
-rw-r--r--freebsd/sys/netpfil/ipfw/ip_fw_table_value.c810
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c131
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h117
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c1574
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h116
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c1772
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h351
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c919
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64stl.c262
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64stl.h58
-rw-r--r--freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c623
-rw-r--r--freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c101
-rw-r--r--freebsd/sys/netpfil/ipfw/nptv6/nptv6.c894
-rw-r--r--freebsd/sys/netpfil/ipfw/nptv6/nptv6.h65
-rw-r--r--freebsd/sys/netpfil/pf/if_pflog.c320
-rw-r--r--freebsd/sys/netpfil/pf/if_pfsync.c2421
-rw-r--r--freebsd/sys/netpfil/pf/in4_cksum.c122
-rw-r--r--freebsd/sys/netpfil/pf/pf.c6657
-rw-r--r--freebsd/sys/netpfil/pf/pf.h203
-rw-r--r--freebsd/sys/netpfil/pf/pf_altq.h121
-rw-r--r--freebsd/sys/netpfil/pf/pf_if.c924
-rw-r--r--freebsd/sys/netpfil/pf/pf_ioctl.c3872
-rw-r--r--freebsd/sys/netpfil/pf/pf_lb.c681
-rw-r--r--freebsd/sys/netpfil/pf/pf_mtag.h64
-rw-r--r--freebsd/sys/netpfil/pf/pf_norm.c1843
-rw-r--r--freebsd/sys/netpfil/pf/pf_osfp.c530
-rw-r--r--freebsd/sys/netpfil/pf/pf_ruleset.c426
-rw-r--r--freebsd/sys/netpfil/pf/pf_table.c2195
-rw-r--r--freebsd/sys/nios2/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/opencrypto/cast.c2
-rw-r--r--freebsd/sys/opencrypto/criov.c85
-rw-r--r--freebsd/sys/opencrypto/crypto.c21
-rw-r--r--freebsd/sys/opencrypto/cryptodeflate.c265
-rw-r--r--freebsd/sys/opencrypto/cryptodev.h183
-rw-r--r--freebsd/sys/opencrypto/cryptosoft.c793
-rw-r--r--freebsd/sys/opencrypto/deflate.h2
-rw-r--r--freebsd/sys/opencrypto/gfmult.c277
-rw-r--r--freebsd/sys/opencrypto/gfmult.h128
-rw-r--r--freebsd/sys/opencrypto/gmac.c121
-rw-r--r--freebsd/sys/opencrypto/gmac.h56
-rw-r--r--freebsd/sys/opencrypto/skipjack.h5
-rw-r--r--freebsd/sys/opencrypto/xform.c768
-rw-r--r--freebsd/sys/opencrypto/xform.h81
-rw-r--r--freebsd/sys/opencrypto/xform_aes_icm.c154
-rw-r--r--freebsd/sys/opencrypto/xform_aes_xts.c166
-rw-r--r--freebsd/sys/opencrypto/xform_auth.h89
-rw-r--r--freebsd/sys/opencrypto/xform_blf.c129
-rw-r--r--freebsd/sys/opencrypto/xform_cast5.c109
-rw-r--r--freebsd/sys/opencrypto/xform_cml.c115
-rw-r--r--freebsd/sys/opencrypto/xform_comp.h52
-rw-r--r--freebsd/sys/opencrypto/xform_deflate.c88
-rw-r--r--freebsd/sys/opencrypto/xform_des1.c118
-rw-r--r--freebsd/sys/opencrypto/xform_des3.c121
-rw-r--r--freebsd/sys/opencrypto/xform_enc.h92
-rw-r--r--freebsd/sys/opencrypto/xform_gmac.c101
-rw-r--r--freebsd/sys/opencrypto/xform_md5.c83
-rw-r--r--freebsd/sys/opencrypto/xform_null.c138
-rw-r--r--freebsd/sys/opencrypto/xform_rijndael.c115
-rw-r--r--freebsd/sys/opencrypto/xform_rmd160.c77
-rw-r--r--freebsd/sys/opencrypto/xform_sha1.c95
-rw-r--r--freebsd/sys/opencrypto/xform_sha2.c111
-rw-r--r--freebsd/sys/opencrypto/xform_skipjack.c119
-rw-r--r--freebsd/sys/opencrypto/xform_userland.h48
-rw-r--r--freebsd/sys/powerpc/include/machine/cpufunc.h10
-rw-r--r--freebsd/sys/powerpc/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/powerpc/include/machine/psl.h85
-rw-r--r--freebsd/sys/powerpc/include/machine/spr.h266
-rw-r--r--freebsd/sys/rpc/types.h2
-rw-r--r--freebsd/sys/security/audit/audit.h23
-rw-r--r--freebsd/sys/security/mac/mac_framework.h6
-rw-r--r--freebsd/sys/sh/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/sparc/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/sparc64/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/sys/_bitset.h58
-rw-r--r--freebsd/sys/sys/_callout.h10
-rw-r--r--freebsd/sys/sys/_cpuset.h50
-rw-r--r--freebsd/sys/sys/_mutex.h26
-rw-r--r--freebsd/sys/sys/_pctrie.h51
-rw-r--r--freebsd/sys/sys/_rmlock.h14
-rw-r--r--freebsd/sys/sys/_rwlock.h26
-rw-r--r--freebsd/sys/sys/_task.h21
-rw-r--r--freebsd/sys/sys/_unrhdr.h51
-rw-r--r--freebsd/sys/sys/acl.h3
-rw-r--r--freebsd/sys/sys/aio.h148
-rw-r--r--freebsd/sys/sys/ata.h392
-rw-r--r--freebsd/sys/sys/bitset.h208
-rw-r--r--freebsd/sys/sys/bitstring.h364
-rw-r--r--freebsd/sys/sys/buf.h117
-rw-r--r--freebsd/sys/sys/buf_ring.h127
-rw-r--r--freebsd/sys/sys/bufobj.h36
-rw-r--r--freebsd/sys/sys/bus.h214
-rw-r--r--freebsd/sys/sys/bus_dma.h29
-rw-r--r--freebsd/sys/sys/callout.h76
-rw-r--r--freebsd/sys/sys/capability.h178
-rw-r--r--freebsd/sys/sys/caprights.h61
-rw-r--r--freebsd/sys/sys/capsicum.h450
-rw-r--r--freebsd/sys/sys/condvar.h21
-rw-r--r--freebsd/sys/sys/conf.h144
-rw-r--r--freebsd/sys/sys/counter.h63
-rw-r--r--freebsd/sys/sys/cpu.h16
-rw-r--r--freebsd/sys/sys/domain.h14
-rw-r--r--freebsd/sys/sys/eventhandler.h31
-rw-r--r--freebsd/sys/sys/eventvar.h1
-rw-r--r--freebsd/sys/sys/fail.h366
-rw-r--r--freebsd/sys/sys/file.h129
-rw-r--r--freebsd/sys/sys/filedesc.h144
-rw-r--r--freebsd/sys/sys/fnv_hash.h2
-rw-r--r--freebsd/sys/sys/gpio.h108
-rw-r--r--freebsd/sys/sys/hash.h13
-rw-r--r--freebsd/sys/sys/hhook.h3
-rw-r--r--freebsd/sys/sys/interrupt.h8
-rw-r--r--freebsd/sys/sys/jail.h34
-rw-r--r--freebsd/sys/sys/kernel.h75
-rw-r--r--freebsd/sys/sys/khelp.h1
-rw-r--r--freebsd/sys/sys/kobj.h12
-rw-r--r--freebsd/sys/sys/kthread.h6
-rw-r--r--freebsd/sys/sys/ktr.h101
-rw-r--r--freebsd/sys/sys/ktr_class.h87
-rw-r--r--freebsd/sys/sys/libkern.h84
-rw-r--r--freebsd/sys/sys/linker.h10
-rw-r--r--freebsd/sys/sys/linker_set.h27
-rw-r--r--freebsd/sys/sys/lockmgr.h12
-rw-r--r--freebsd/sys/sys/lockstat.h225
-rw-r--r--freebsd/sys/sys/loginclass.h3
-rw-r--r--freebsd/sys/sys/malloc.h17
-rw-r--r--freebsd/sys/sys/mbuf.h1077
-rw-r--r--freebsd/sys/sys/module.h58
-rw-r--r--freebsd/sys/sys/mount.h140
-rw-r--r--freebsd/sys/sys/mutex.h191
-rw-r--r--freebsd/sys/sys/nlist_aout.h2
-rw-r--r--freebsd/sys/sys/osd.h8
-rw-r--r--freebsd/sys/sys/pcpu.h56
-rwxr-xr-xfreebsd/sys/sys/pipe.h11
-rw-r--r--freebsd/sys/sys/priv.h32
-rw-r--r--freebsd/sys/sys/proc.h284
-rw-r--r--freebsd/sys/sys/protosw.h34
-rw-r--r--freebsd/sys/sys/racct.h112
-rw-r--r--freebsd/sys/sys/random.h89
-rw-r--r--freebsd/sys/sys/reboot.h1
-rw-r--r--freebsd/sys/sys/refcount.h10
-rw-r--r--freebsd/sys/sys/resourcevar.h39
-rw-r--r--freebsd/sys/sys/rman.h57
-rw-r--r--freebsd/sys/sys/rmlock.h30
-rw-r--r--freebsd/sys/sys/rwlock.h101
-rw-r--r--freebsd/sys/sys/sbuf.h14
-rw-r--r--freebsd/sys/sys/sdt.h4
-rw-r--r--freebsd/sys/sys/seq.h129
-rw-r--r--freebsd/sys/sys/sf_buf.h169
-rw-r--r--freebsd/sys/sys/signalvar.h403
-rw-r--r--freebsd/sys/sys/sleepqueue.h13
-rw-r--r--freebsd/sys/sys/smp.h78
-rw-r--r--freebsd/sys/sys/sockbuf.h158
-rw-r--r--freebsd/sys/sys/socket.h116
-rw-r--r--freebsd/sys/sys/socketvar.h63
-rw-r--r--freebsd/sys/sys/stdint.h13
-rw-r--r--freebsd/sys/sys/sx.h47
-rw-r--r--freebsd/sys/sys/sysctl.h796
-rw-r--r--freebsd/sys/sys/syslog.h4
-rw-r--r--freebsd/sys/sys/sysproto.h324
-rw-r--r--freebsd/sys/sys/systm.h175
-rw-r--r--freebsd/sys/sys/taskqueue.h23
-rw-r--r--freebsd/sys/sys/tree.h86
-rw-r--r--freebsd/sys/sys/tty.h7
-rw-r--r--freebsd/sys/sys/ttydevsw.h41
-rw-r--r--freebsd/sys/sys/ucred.h10
-rw-r--r--freebsd/sys/sys/unpcb.h17
-rw-r--r--freebsd/sys/sys/user.h63
-rw-r--r--freebsd/sys/sys/uuid.h85
-rw-r--r--freebsd/sys/sys/vmmeter.h61
-rw-r--r--freebsd/sys/sys/vnode.h161
-rw-r--r--freebsd/sys/v850/include/machine/in_cksum.h2
-rw-r--r--freebsd/sys/vm/uma.h188
-rw-r--r--freebsd/sys/vm/uma_core.c2196
-rw-r--r--freebsd/sys/vm/uma_dbg.c166
-rw-r--r--freebsd/sys/vm/uma_dbg.h3
-rw-r--r--freebsd/sys/vm/uma_int.h249
-rw-r--r--freebsd/sys/vm/vm.h9
-rw-r--r--freebsd/sys/vm/vm_extern.h59
-rw-r--r--freebsd/sys/x86/include/machine/bus.h63
-rw-r--r--freebsd/sys/x86/include/machine/pci_cfgreg.h2
-rw-r--r--freebsd/sys/x86/pci/pci_bus.c60
-rw-r--r--freebsd/usr.bin/netstat/bpf.c69
-rw-r--r--freebsd/usr.bin/netstat/flowtable.c100
-rw-r--r--freebsd/usr.bin/netstat/if.c1000
-rw-r--r--freebsd/usr.bin/netstat/inet.c1300
-rw-r--r--freebsd/usr.bin/netstat/inet6.c789
-rw-r--r--freebsd/usr.bin/netstat/ipsec.c399
-rw-r--r--freebsd/usr.bin/netstat/main.c684
-rw-r--r--freebsd/usr.bin/netstat/mbuf.c189
-rw-r--r--freebsd/usr.bin/netstat/mroute.c283
-rw-r--r--freebsd/usr.bin/netstat/mroute6.c203
-rw-r--r--freebsd/usr.bin/netstat/netstat.h86
-rw-r--r--freebsd/usr.bin/netstat/nl_defs.h58
-rw-r--r--freebsd/usr.bin/netstat/nl_symbols.c75
-rw-r--r--freebsd/usr.bin/netstat/pfkey.c109
-rw-r--r--freebsd/usr.bin/netstat/route.c1198
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-bpf-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-data.h41
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-flowtable-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-if-data.h5
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-inet-data.h8
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-inet6-data.h8
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-ipsec-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-main-data.h10
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-mbuf-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute6-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-namespace.h83
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-nl_symbols-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-pfkey-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-route-data.h15
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-sctp-data.h4
-rw-r--r--freebsd/usr.bin/netstat/rtems-bsd-netstat-unix-data.h4
-rw-r--r--freebsd/usr.bin/netstat/sctp.c603
-rw-r--r--freebsd/usr.bin/netstat/unix.c95
-rwxr-xr-xlibbsd.py319
-rw-r--r--libbsd.txt5
-rw-r--r--libbsd_waf.py141
-rw-r--r--rtemsbsd/include/bsp/nexus-devices.h2
-rw-r--r--rtemsbsd/include/ddb/ddb.h2
-rw-r--r--rtemsbsd/include/machine/counter.h90
-rw-r--r--rtemsbsd/include/machine/pcpu.h2
-rw-r--r--rtemsbsd/include/machine/rtems-bsd-kernel-namespace.h1432
-rw-r--r--rtemsbsd/include/machine/rtems-bsd-kernel-space.h2
-rw-r--r--rtemsbsd/include/machine/rtems-bsd-nexus-bus.h14
-rw-r--r--rtemsbsd/include/machine/rtems-bsd-user-space.h6
-rw-r--r--rtemsbsd/include/rtems/bsd/local/bus_if.h256
-rw-r--r--rtemsbsd/include/rtems/bsd/local/device_if.h49
-rw-r--r--rtemsbsd/include/rtems/bsd/local/gpio_if.h151
-rw-r--r--rtemsbsd/include/rtems/bsd/local/if_dwc_if.h55
-rw-r--r--rtemsbsd/include/rtems/bsd/local/miidevs.h18
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_callout_profiling.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_em.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_ifmedia.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_kld.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_kqueue.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_kstack_usage_prof.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_pci.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_rss.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_stack.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_timer.h1
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_verbose_sysinit.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/opt_vm.h0
-rw-r--r--rtemsbsd/include/rtems/bsd/local/pci_if.h181
-rw-r--r--rtemsbsd/include/rtems/bsd/local/pcib_if.h67
-rw-r--r--rtemsbsd/include/rtems/bsd/sys/cpuset.h94
-rw-r--r--rtemsbsd/include/rtems/bsd/sys/lock.h2
-rw-r--r--rtemsbsd/include/rtems/bsd/sys/param.h12
-rw-r--r--rtemsbsd/include/sys/_vm_domain.h0
-rw-r--r--rtemsbsd/include/sys/fail.h1
-rw-r--r--rtemsbsd/include/sys/signalvar.h1
-rw-r--r--rtemsbsd/include/sys/stack.h0
-rw-r--r--rtemsbsd/include/sys/stdatomic.h1
-rw-r--r--rtemsbsd/include/sys/zlib.h1
-rw-r--r--rtemsbsd/include/vm/memguard.h0
-rw-r--r--rtemsbsd/include/vm/vm_domain.h0
-rw-r--r--rtemsbsd/include/vm/vm_map.h2
-rw-r--r--rtemsbsd/local/bus_if.c196
-rw-r--r--rtemsbsd/local/device_if.c55
-rw-r--r--rtemsbsd/local/gpio_if.c88
-rw-r--r--rtemsbsd/local/if_dwc_if.c52
-rw-r--r--rtemsbsd/local/miibus_if.c30
-rw-r--r--rtemsbsd/local/mmcbr_if.c30
-rw-r--r--rtemsbsd/local/mmcbus_if.c18
-rw-r--r--rtemsbsd/local/pci_if.c149
-rw-r--r--rtemsbsd/local/pcib_if.c75
-rw-r--r--rtemsbsd/local/usb_if.c12
-rwxr-xr-xrtemsbsd/rtems/generate_kvm_symbols100
-rw-r--r--rtemsbsd/rtems/rtems-kernel-init.c29
-rw-r--r--rtemsbsd/rtems/rtems-kernel-irqs.c2
-rw-r--r--rtemsbsd/rtems/rtems-kernel-jail.c4
-rw-r--r--rtemsbsd/rtems/rtems-kernel-malloc.c3
-rw-r--r--rtemsbsd/rtems/rtems-kernel-mutex.c2
-rw-r--r--rtemsbsd/rtems/rtems-kernel-nexus.c6
-rw-r--r--rtemsbsd/rtems/rtems-kernel-page.c6
-rw-r--r--rtemsbsd/rtems/rtems-kernel-sx.c6
-rw-r--r--rtemsbsd/rtems/rtems-legacy-rtrequest.c3
-rw-r--r--rtemsbsd/sys/dev/ffec/if_ffec_mcf548x.c3
-rw-r--r--rtemsbsd/sys/dev/tsec/if_tsec_nexus.c1
-rw-r--r--rtemsbsd/sys/dev/usb/controller/ehci_mpc83xx.c2
-rwxr-xr-xrtemsbsd/sys/fs/devfs/devfs_devs.c16
-rw-r--r--rtemsbsd/sys/net/if_ppp.c32
-rw-r--r--rtemsbsd/sys/net/if_pppvar.h4
-rw-r--r--rtemsbsd/sys/net/ppp_tty.c18
-rw-r--r--testsuite/netshell01/shellconfig.c1
1040 files changed, 156261 insertions, 66434 deletions
diff --git a/Makefile.todo b/Makefile.todo
index fe86dbd1..dcc8c756 100644
--- a/Makefile.todo
+++ b/Makefile.todo
@@ -104,3 +104,43 @@ $(LOCAL_INC)/mmcbr_if.h: $(FREEBSD_SRC)/sys/dev/mmc/mmcbr_if.m
$(LOCAL_SRC)/mmcbr_if.c: $(FREEBSD_SRC)/sys/dev/mmc/mmcbr_if.m
awk -f $(TOOLS)/makeobjops.awk $< -c
mv mmcbr_if.c $@
+
+$(LOCAL_INC)/if_dwc_if.h: $(FREEBSD_SRC)/sys/dev/dwc/if_dwc_if.m
+ awk -f $(TOOLS)/makeobjops.awk $< -h
+ mv if_dwc_if.h $@
+
+$(LOCAL_SRC)/if_dwc_if.c: $(FREEBSD_SRC)/sys/dev/dwc/if_dwc_if.m
+ awk -f $(TOOLS)/makeobjops.awk $< -c
+ mv if_dwc_if.c $@
+
+$(LOCAL_INC)/gpio_if.h: $(FREEBSD_SRC)/sys/dev/gpio/gpio_if.m
+ awk -f $(TOOLS)/makeobjops.awk $< -h
+ mv gpio_if.h $@
+
+$(LOCAL_SRC)/gpio_if.c: $(FREEBSD_SRC)/sys/dev/gpio/gpio_if.m
+ awk -f $(TOOLS)/makeobjops.awk $< -c
+ mv gpio_if.c $@
+
+freebsd/usr.bin/netstat/nl_symbols.c: $(FREEBSD_SRC)/usr.bin/netstat/nlist_symbols
+ awk '\
+ BEGIN { \
+ print "#include <sys/param.h>"; \
+ print "#include <nlist.h>"; \
+ print "const struct nlist nl[] = {"; \
+ } \
+ !/^\#/ { printf("\t{ .n_name = \"%s\" },\n", $$2); } \
+ END { print "\t{ .n_name = NULL },\n};" } \
+ ' < $< > $@ || rm -f $@
+
+freebsd/usr.bin/netstat/nl_defs.h: $(FREEBSD_SRC)/usr.bin/netstat/nlist_symbols
+ awk '\
+ BEGIN { \
+ print "#include <nlist.h>"; \
+ print "extern const struct nlist nl[];"; \
+ i = 0; \
+ } \
+ !/^\#/ { printf("\#define\tN%s\t%s\n", toupper($$2), i++); }' \
+ < $< > $@ || rm -f $@
+
+clean:
+ rm $(GENERATED)
diff --git a/builder.py b/builder.py
index 4a749d02..29a6b49e 100755
--- a/builder.py
+++ b/builder.py
@@ -173,8 +173,8 @@ def commonNoWarnings():
def includes():
return ['-Irtemsbsd/include',
'-Ifreebsd/sys',
- '-Ifreebsd/sys/contrib/altq',
'-Ifreebsd/sys/contrib/pf',
+ '-Ifreebsd/sys/net',
'-Ifreebsd/include',
'-Ifreebsd/lib/libc/include',
'-Ifreebsd/lib/libc/isc/include',
@@ -184,6 +184,7 @@ def includes():
'-Ifreebsd/lib/libmemstat',
'-Ifreebsd/lib/libipsec',
'-Ifreebsd/contrib/libpcap',
+ '-Ifreebsd/contrib/libxo',
'-Irtemsbsd/sys',
'-ImDNSResponder/mDNSCore',
'-ImDNSResponder/mDNSShared',
@@ -205,7 +206,6 @@ def headerPaths():
return [('rtemsbsd/include', '*.h', ''),
('rtemsbsd/mghttpd', 'mongoose.h', 'mghttpd'),
('freebsd/include', '*.h', ''),
- ('freebsd/sys/contrib/altq/altq', '*.h', 'altq'),
('freebsd/sys/bsm', '*.h', 'bsm'),
('freebsd/sys/cam', '*.h', 'cam'),
('freebsd/sys/net', '*.h', 'net'),
diff --git a/create-kernel-namespace.sh b/create-kernel-namespace.sh
index 1b3391ce..ed4efa10 100755
--- a/create-kernel-namespace.sh
+++ b/create-kernel-namespace.sh
@@ -72,7 +72,6 @@ objdump --syms `for i in build/*rtems* ; do \
| sed '/^max_linkhdr$/d' \
| sed '/^max_protohdr$/d' \
| sed '/^maxsockets$/d' \
- | sed '/^mbstat$/d' \
| sed '/^nd6_debug$/d' \
| sed '/^nd6_delay$/d' \
| sed '/^nd6_gctimer$/d' \
@@ -92,6 +91,7 @@ objdump --syms `for i in build/*rtems* ; do \
| sed '/^pause$/d' \
| sed '/^pf_osfp_entry_pl$/d' \
| sed '/^pf_osfp_pl$/d' \
+ | sed '/^pipe$/d' \
| sed '/^poll$/d' \
| sed '/^random$/d' \
| sed '/^realloc$/d' \
diff --git a/freebsd-org b/freebsd-org
-Subproject 99a648a912e81e29d9c4c159cbbe263462f2d71
+Subproject 9fe7c416e6abb28b1398fd3e5687099846800cf
diff --git a/freebsd/contrib/libxo/libxo/libxo.c b/freebsd/contrib/libxo/libxo/libxo.c
new file mode 100644
index 00000000..fc7c3209
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/libxo.c
@@ -0,0 +1,8080 @@
+#include <machine/rtems-bsd-user-space.h>
+
+/*
+ * Copyright (c) 2014-2015, Juniper Networks, Inc.
+ * All rights reserved.
+ * This SOFTWARE is licensed under the LICENSE provided in the
+ * ../Copyright file. By downloading, installing, copying, or otherwise
+ * using the SOFTWARE, you agree to be bound by the terms of that
+ * LICENSE.
+ * Phil Shafer, July 2014
+ *
+ * This is the implementation of libxo, the formatting library that
+ * generates multiple styles of output from a single code path.
+ * Command line utilities can have their normal text output while
+ * automation tools can see XML or JSON output, and web tools can use
+ * HTML output that encodes the text output annotated with additional
+ * information. Specialized encoders can be built that allow custom
+ * encoding including binary ones like CBOR, thrift, protobufs, etc.
+ *
+ * Full documentation is available in ./doc/libxo.txt or online at:
+ * http://juniper.github.io/libxo/libxo-manual.html
+ *
+ * For first time readers, the core bits of code to start looking at are:
+ * - xo_do_emit() -- parse and emit a set of fields
+ * - xo_do_emit_fields -- the central function of the library
+ * - xo_do_format_field() -- handles formatting a single field
+ * - xo_transiton() -- the state machine that keeps things sane
+ * and of course the "xo_handle_t" data structure, which carries all
+ * configuration and state.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <wchar.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <ctype.h>
+#include <wctype.h>
+#include <getopt.h>
+
+#include "xo_config.h"
+#include "xo.h"
+#include "xo_encoder.h"
+#include "xo_buf.h"
+
+/*
+ * We ask wcwidth() to do an impossible job, really. It's supposed to
+ * need to tell us the number of columns consumed to display a unicode
+ * character. It returns that number without any sort of context, but
+ * we know they are characters whose glyph differs based on placement
+ * (end of word, middle of word, etc) and many that affect characters
+ * previously emitted. Without content, it can't hope to tell us.
+ * But it's the only standard tool we've got, so we use it. We would
+ * use wcswidth() but it typically just loops through adding the results
+ * of wcwidth() calls in an entirely unhelpful way.
+ *
+ * Even then, there are many poor implementations (macosx), so we have
+ * to carry our own. We could have configure.ac test this (with
+ * something like 'assert(wcwidth(0x200d) == 0)'), but it would have
+ * to run a binary, which breaks cross-compilation. Hmm... I could
+ * run this test at init time and make a warning for our dear user.
+ *
+ * Anyhow, it remains a best-effort sort of thing. And it's all made
+ * more hopeless because we assume the display code doing the rendering is
+ * playing by the same rules we are. If it display 0x200d as a square
+ * box or a funky question mark, the output will be hosed.
+ */
+#ifdef LIBXO_WCWIDTH
+#include "xo_wcwidth.h"
+#else /* LIBXO_WCWIDTH */
+#define xo_wcwidth(_x) wcwidth(_x)
+#endif /* LIBXO_WCWIDTH */
+
+#ifdef HAVE_STDIO_EXT_H
+#include <stdio_ext.h>
+#endif /* HAVE_STDIO_EXT_H */
+
+/*
+ * humanize_number is a great function, unless you don't have it. So
+ * we carry one in our pocket.
+ */
+#ifdef HAVE_HUMANIZE_NUMBER
+#include <libutil.h>
+#define xo_humanize_number humanize_number
+#else /* HAVE_HUMANIZE_NUMBER */
+#include "xo_humanize.h"
+#endif /* HAVE_HUMANIZE_NUMBER */
+
+#ifdef HAVE_GETTEXT
+#include <libintl.h>
+#endif /* HAVE_GETTEXT */
+
+/*
+ * Three styles of specifying thread-local variables are supported.
+ * configure.ac has the brains to run each possibility through the
+ * compiler and see what works; we are left to define the THREAD_LOCAL
+ * macro to the right value. Most toolchains (clang, gcc) use
+ * "before", but some (borland) use "after" and I've heard of some
+ * (ms) that use __declspec. Any others out there?
+ */
+#define THREAD_LOCAL_before 1
+#define THREAD_LOCAL_after 2
+#define THREAD_LOCAL_declspec 3
+
+#ifndef HAVE_THREAD_LOCAL
+#define THREAD_LOCAL(_x) _x
+#elif HAVE_THREAD_LOCAL == THREAD_LOCAL_before
+#define THREAD_LOCAL(_x) __thread _x
+#elif HAVE_THREAD_LOCAL == THREAD_LOCAL_after
+#define THREAD_LOCAL(_x) _x __thread
+#elif HAVE_THREAD_LOCAL == THREAD_LOCAL_declspec
+#define THREAD_LOCAL(_x) __declspec(_x)
+#else
+#error unknown thread-local setting
+#endif /* HAVE_THREADS_H */
+
+const char xo_version[] = LIBXO_VERSION;
+const char xo_version_extra[] = LIBXO_VERSION_EXTRA;
+static const char xo_default_format[] = "%s";
+
+#ifndef UNUSED
+#define UNUSED __attribute__ ((__unused__))
+#endif /* UNUSED */
+
+#define XO_INDENT_BY 2 /* Amount to indent when pretty printing */
+#define XO_DEPTH 128 /* Default stack depth */
+#define XO_MAX_ANCHOR_WIDTH (8*1024) /* Anything wider is just sillyb */
+
+#define XO_FAILURE_NAME "failure"
+
+/* Flags for the stack frame */
+typedef unsigned xo_xsf_flags_t; /* XSF_* flags */
+#define XSF_NOT_FIRST (1<<0) /* Not the first element */
+#define XSF_LIST (1<<1) /* Frame is a list */
+#define XSF_INSTANCE (1<<2) /* Frame is an instance */
+#define XSF_DTRT (1<<3) /* Save the name for DTRT mode */
+
+#define XSF_CONTENT (1<<4) /* Some content has been emitted */
+#define XSF_EMIT (1<<5) /* Some field has been emitted */
+#define XSF_EMIT_KEY (1<<6) /* A key has been emitted */
+#define XSF_EMIT_LEAF_LIST (1<<7) /* A leaf-list field has been emitted */
+
+/* These are the flags we propagate between markers and their parents */
+#define XSF_MARKER_FLAGS \
+ (XSF_NOT_FIRST | XSF_CONTENT | XSF_EMIT | XSF_EMIT_KEY | XSF_EMIT_LEAF_LIST )
+
+/*
+ * A word about states: We use a finite state machine (FMS) approach
+ * to help remove fragility from the caller's code. Instead of
+ * requiring a specific order of calls, we'll allow the caller more
+ * flexibility and make the library responsible for recovering from
+ * missed steps. The goal is that the library should not be capable
+ * of emitting invalid xml or json, but the developer shouldn't need
+ * to know or understand all the details about these encodings.
+ *
+ * You can think of states as either states or events, since they
+ * function rather like both. None of the XO_CLOSE_* events will
+ * persist as states, since the matching stack frame will be popped.
+ * Same is true of XSS_EMIT, which is an event that asks us to
+ * prep for emitting output fields.
+ */
+
+/* Stack frame states */
+typedef unsigned xo_state_t;
+#define XSS_INIT 0 /* Initial stack state */
+#define XSS_OPEN_CONTAINER 1
+#define XSS_CLOSE_CONTAINER 2
+#define XSS_OPEN_LIST 3
+#define XSS_CLOSE_LIST 4
+#define XSS_OPEN_INSTANCE 5
+#define XSS_CLOSE_INSTANCE 6
+#define XSS_OPEN_LEAF_LIST 7
+#define XSS_CLOSE_LEAF_LIST 8
+#define XSS_DISCARDING 9 /* Discarding data until recovered */
+#define XSS_MARKER 10 /* xo_open_marker's marker */
+#define XSS_EMIT 11 /* xo_emit has a leaf field */
+#define XSS_EMIT_LEAF_LIST 12 /* xo_emit has a leaf-list ({l:}) */
+#define XSS_FINISH 13 /* xo_finish was called */
+
+#define XSS_MAX 13
+
+#define XSS_TRANSITION(_old, _new) ((_old) << 8 | (_new))
+
+/*
+ * xo_stack_t: As we open and close containers and levels, we
+ * create a stack of frames to track them. This is needed for
+ * XOF_WARN and XOF_XPATH.
+ */
+typedef struct xo_stack_s {
+ xo_xsf_flags_t xs_flags; /* Flags for this frame */
+ xo_state_t xs_state; /* State for this stack frame */
+ char *xs_name; /* Name (for XPath value) */
+ char *xs_keys; /* XPath predicate for any key fields */
+} xo_stack_t;
+
+/*
+ * libxo supports colors and effects, for those who like them.
+ * XO_COL_* ("colors") refers to fancy ansi codes, while X__EFF_*
+ * ("effects") are bits since we need to maintain state.
+ */
+#define XO_COL_DEFAULT 0
+#define XO_COL_BLACK 1
+#define XO_COL_RED 2
+#define XO_COL_GREEN 3
+#define XO_COL_YELLOW 4
+#define XO_COL_BLUE 5
+#define XO_COL_MAGENTA 6
+#define XO_COL_CYAN 7
+#define XO_COL_WHITE 8
+
+#define XO_NUM_COLORS 9
+
+/*
+ * Yes, there's no blink. We're civilized. We like users. Blink
+ * isn't something one does to someone you like. Friends don't let
+ * friends use blink. On friends. You know what I mean. Blink is
+ * like, well, it's like bursting into show tunes at a funeral. It's
+ * just not done. Not something anyone wants. And on those rare
+ * instances where it might actually be appropriate, it's still wrong,
+ * since it's likely done by the wrong person for the wrong reason.
+ * Just like blink. And if I implemented blink, I'd be like a funeral
+ * director who adds "Would you like us to burst into show tunes?" on
+ * the list of questions asked while making funeral arrangements.
+ * It's formalizing wrongness in the wrong way. And we're just too
+ * civilized to do that. Hhhmph!
+ */
+#define XO_EFF_RESET (1<<0)
+#define XO_EFF_NORMAL (1<<1)
+#define XO_EFF_BOLD (1<<2)
+#define XO_EFF_UNDERLINE (1<<3)
+#define XO_EFF_INVERSE (1<<4)
+
+#define XO_EFF_CLEAR_BITS XO_EFF_RESET /* Reset gets reset, surprisingly */
+
+typedef uint8_t xo_effect_t;
+typedef uint8_t xo_color_t;
+typedef struct xo_colors_s {
+ xo_effect_t xoc_effects; /* Current effect set */
+ xo_color_t xoc_col_fg; /* Foreground color */
+ xo_color_t xoc_col_bg; /* Background color */
+} xo_colors_t;
+
+/*
+ * xo_handle_t: this is the principle data structure for libxo.
+ * It's used as a store for state, options, content, and all manor
+ * of other information.
+ */
+struct xo_handle_s {
+ xo_xof_flags_t xo_flags; /* Flags (XOF_*) from the user*/
+ xo_xof_flags_t xo_iflags; /* Internal flags (XOIF_*) */
+ xo_style_t xo_style; /* XO_STYLE_* value */
+ unsigned short xo_indent; /* Indent level (if pretty) */
+ unsigned short xo_indent_by; /* Indent amount (tab stop) */
+ xo_write_func_t xo_write; /* Write callback */
+ xo_close_func_t xo_close; /* Close callback */
+ xo_flush_func_t xo_flush; /* Flush callback */
+ xo_formatter_t xo_formatter; /* Custom formating function */
+ xo_checkpointer_t xo_checkpointer; /* Custom formating support function */
+ void *xo_opaque; /* Opaque data for write function */
+ xo_buffer_t xo_data; /* Output data */
+ xo_buffer_t xo_fmt; /* Work area for building format strings */
+ xo_buffer_t xo_attrs; /* Work area for building XML attributes */
+ xo_buffer_t xo_predicate; /* Work area for building XPath predicates */
+ xo_stack_t *xo_stack; /* Stack pointer */
+ int xo_depth; /* Depth of stack */
+ int xo_stack_size; /* Size of the stack */
+ xo_info_t *xo_info; /* Info fields for all elements */
+ int xo_info_count; /* Number of info entries */
+ va_list xo_vap; /* Variable arguments (stdargs) */
+ char *xo_leading_xpath; /* A leading XPath expression */
+ mbstate_t xo_mbstate; /* Multi-byte character conversion state */
+ unsigned xo_anchor_offset; /* Start of anchored text */
+ unsigned xo_anchor_columns; /* Number of columns since the start anchor */
+ int xo_anchor_min_width; /* Desired width of anchored text */
+ unsigned xo_units_offset; /* Start of units insertion point */
+ unsigned xo_columns; /* Columns emitted during this xo_emit call */
+ uint8_t xo_color_map_fg[XO_NUM_COLORS]; /* Foreground color mappings */
+ uint8_t xo_color_map_bg[XO_NUM_COLORS]; /* Background color mappings */
+ xo_colors_t xo_colors; /* Current color and effect values */
+ xo_buffer_t xo_color_buf; /* HTML: buffer of colors and effects */
+ char *xo_version; /* Version string */
+ int xo_errno; /* Saved errno for "%m" */
+ char *xo_gt_domain; /* Gettext domain, suitable for dgettext(3) */
+ xo_encoder_func_t xo_encoder; /* Encoding function */
+ void *xo_private; /* Private data for external encoders */
+};
+
+/* Flag operations */
+#define XOF_BIT_ISSET(_flag, _bit) (((_flag) & (_bit)) ? 1 : 0)
+#define XOF_BIT_SET(_flag, _bit) do { (_flag) |= (_bit); } while (0)
+#define XOF_BIT_CLEAR(_flag, _bit) do { (_flag) &= ~(_bit); } while (0)
+
+#define XOF_ISSET(_xop, _bit) XOF_BIT_ISSET(_xop->xo_flags, _bit)
+#define XOF_SET(_xop, _bit) XOF_BIT_SET(_xop->xo_flags, _bit)
+#define XOF_CLEAR(_xop, _bit) XOF_BIT_CLEAR(_xop->xo_flags, _bit)
+
+#define XOIF_ISSET(_xop, _bit) XOF_BIT_ISSET(_xop->xo_iflags, _bit)
+#define XOIF_SET(_xop, _bit) XOF_BIT_SET(_xop->xo_iflags, _bit)
+#define XOIF_CLEAR(_xop, _bit) XOF_BIT_CLEAR(_xop->xo_iflags, _bit)
+
+/* Internal flags */
+#define XOIF_REORDER XOF_BIT(0) /* Reordering fields; record field info */
+#define XOIF_DIV_OPEN XOF_BIT(1) /* A <div> is open */
+#define XOIF_TOP_EMITTED XOF_BIT(2) /* The top JSON braces have been emitted */
+#define XOIF_ANCHOR XOF_BIT(3) /* An anchor is in place */
+
+#define XOIF_UNITS_PENDING XOF_BIT(4) /* We have a units-insertion pending */
+#define XOIF_INIT_IN_PROGRESS XOF_BIT(5) /* Init of handle is in progress */
+
+/* Flags for formatting functions */
+typedef unsigned long xo_xff_flags_t;
+#define XFF_COLON (1<<0) /* Append a ":" */
+#define XFF_COMMA (1<<1) /* Append a "," iff there's more output */
+#define XFF_WS (1<<2) /* Append a blank */
+#define XFF_ENCODE_ONLY (1<<3) /* Only emit for encoding styles (XML, JSON) */
+
+#define XFF_QUOTE (1<<4) /* Force quotes */
+#define XFF_NOQUOTE (1<<5) /* Force no quotes */
+#define XFF_DISPLAY_ONLY (1<<6) /* Only emit for display styles (text, html) */
+#define XFF_KEY (1<<7) /* Field is a key (for XPath) */
+
+#define XFF_XML (1<<8) /* Force XML encoding style (for XPath) */
+#define XFF_ATTR (1<<9) /* Escape value using attribute rules (XML) */
+#define XFF_BLANK_LINE (1<<10) /* Emit a blank line */
+#define XFF_NO_OUTPUT (1<<11) /* Do not make any output */
+
+#define XFF_TRIM_WS (1<<12) /* Trim whitespace off encoded values */
+#define XFF_LEAF_LIST (1<<13) /* A leaf-list (list of values) */
+#define XFF_UNESCAPE (1<<14) /* Need to printf-style unescape the value */
+#define XFF_HUMANIZE (1<<15) /* Humanize the value (for display styles) */
+
+#define XFF_HN_SPACE (1<<16) /* Humanize: put space before suffix */
+#define XFF_HN_DECIMAL (1<<17) /* Humanize: add one decimal place if <10 */
+#define XFF_HN_1000 (1<<18) /* Humanize: use 1000, not 1024 */
+#define XFF_GT_FIELD (1<<19) /* Call gettext() on a field */
+
+#define XFF_GT_PLURAL (1<<20) /* Call dngettext to find plural form */
+#define XFF_ARGUMENT (1<<21) /* Content provided via argument */
+
+/* Flags to turn off when we don't want i18n processing */
+#define XFF_GT_FLAGS (XFF_GT_FIELD | XFF_GT_PLURAL)
+
+/*
+ * Normal printf has width and precision, which for strings operate as
+ * min and max number of columns. But this depends on the idea that
+ * one byte means one column, which UTF-8 and multi-byte characters
+ * pitches on its ear. It may take 40 bytes of data to populate 14
+ * columns, but we can't go off looking at 40 bytes of data without the
+ * caller's permission for fear/knowledge that we'll generate core files.
+ *
+ * So we make three values, distinguishing between "max column" and
+ * "number of bytes that we will inspect inspect safely" We call the
+ * later "size", and make the format "%[[<min>].[[<size>].<max>]]s".
+ *
+ * Under the "first do no harm" theory, we default "max" to "size".
+ * This is a reasonable assumption for folks that don't grok the
+ * MBS/WCS/UTF-8 world, and while it will be annoying, it will never
+ * be evil.
+ *
+ * For example, xo_emit("{:tag/%-14.14s}", buf) will make 14
+ * columns of output, but will never look at more than 14 bytes of the
+ * input buffer. This is mostly compatible with printf and caller's
+ * expectations.
+ *
+ * In contrast xo_emit("{:tag/%-14..14s}", buf) will look at however
+ * many bytes (or until a NUL is seen) are needed to fill 14 columns
+ * of output. xo_emit("{:tag/%-14.*.14s}", xx, buf) will look at up
+ * to xx bytes (or until a NUL is seen) in order to fill 14 columns
+ * of output.
+ *
+ * It's fairly amazing how a good idea (handle all languages of the
+ * world) blows such a big hole in the bottom of the fairly weak boat
+ * that is C string handling. The simplicity and completenesss are
+ * sunk in ways we haven't even begun to understand.
+ */
+#define XF_WIDTH_MIN 0 /* Minimal width */
+#define XF_WIDTH_SIZE 1 /* Maximum number of bytes to examine */
+#define XF_WIDTH_MAX 2 /* Maximum width */
+#define XF_WIDTH_NUM 3 /* Numeric fields in printf (min.size.max) */
+
+/* Input and output string encodings */
+#define XF_ENC_WIDE 1 /* Wide characters (wchar_t) */
+#define XF_ENC_UTF8 2 /* UTF-8 */
+#define XF_ENC_LOCALE 3 /* Current locale */
+
+/*
+ * A place to parse printf-style format flags for each field
+ */
+typedef struct xo_format_s {
+ unsigned char xf_fc; /* Format character */
+ unsigned char xf_enc; /* Encoding of the string (XF_ENC_*) */
+ unsigned char xf_skip; /* Skip this field */
+ unsigned char xf_lflag; /* 'l' (long) */
+ unsigned char xf_hflag;; /* 'h' (half) */
+ unsigned char xf_jflag; /* 'j' (intmax_t) */
+ unsigned char xf_tflag; /* 't' (ptrdiff_t) */
+ unsigned char xf_zflag; /* 'z' (size_t) */
+ unsigned char xf_qflag; /* 'q' (quad_t) */
+ unsigned char xf_seen_minus; /* Seen a minus */
+ int xf_leading_zero; /* Seen a leading zero (zero fill) */
+ unsigned xf_dots; /* Seen one or more '.'s */
+ int xf_width[XF_WIDTH_NUM]; /* Width/precision/size numeric fields */
+ unsigned xf_stars; /* Seen one or more '*'s */
+ unsigned char xf_star[XF_WIDTH_NUM]; /* Seen one or more '*'s */
+} xo_format_t;
+
+/*
+ * This structure represents the parsed field information, suitable for
+ * processing by xo_do_emit and anything else that needs to parse fields.
+ * Note that all pointers point to the main format string.
+ *
+ * XXX This is a first step toward compilable or cachable format
+ * strings. We can also cache the results of dgettext when no format
+ * is used, assuming the 'p' modifier has _not_ been set.
+ */
+typedef struct xo_field_info_s {
+ xo_xff_flags_t xfi_flags; /* Flags for this field */
+ unsigned xfi_ftype; /* Field type, as character (e.g. 'V') */
+ const char *xfi_start; /* Start of field in the format string */
+ const char *xfi_content; /* Field's content */
+ const char *xfi_format; /* Field's Format */
+ const char *xfi_encoding; /* Field's encoding format */
+ const char *xfi_next; /* Next character in format string */
+ unsigned xfi_len; /* Length of field */
+ unsigned xfi_clen; /* Content length */
+ unsigned xfi_flen; /* Format length */
+ unsigned xfi_elen; /* Encoding length */
+ unsigned xfi_fnum; /* Field number (if used; 0 otherwise) */
+ unsigned xfi_renum; /* Reordered number (0 == no renumbering) */
+} xo_field_info_t;
+
+/*
+ * We keep a 'default' handle to allow callers to avoid having to
+ * allocate one. Passing NULL to any of our functions will use
+ * this default handle. Most functions have a variant that doesn't
+ * require a handle at all, since most output is to stdout, which
+ * the default handle handles handily.
+ */
+static THREAD_LOCAL(xo_handle_t) xo_default_handle;
+static THREAD_LOCAL(int) xo_default_inited;
+static int xo_locale_inited;
+static const char *xo_program;
+
+/*
+ * To allow libxo to be used in diverse environment, we allow the
+ * caller to give callbacks for memory allocation.
+ */
+xo_realloc_func_t xo_realloc = realloc;
+xo_free_func_t xo_free = free;
+
+/* Forward declarations */
+static void
+xo_failure (xo_handle_t *xop, const char *fmt, ...);
+
+static int
+xo_transition (xo_handle_t *xop, xo_xsf_flags_t flags, const char *name,
+ xo_state_t new_state);
+
+static void
+xo_buf_append_div (xo_handle_t *xop, const char *class, xo_xff_flags_t flags,
+ const char *name, int nlen,
+ const char *value, int vlen,
+ const char *encoding, int elen);
+
+static void
+xo_anchor_clear (xo_handle_t *xop);
+
+/*
+ * xo_style is used to retrieve the current style. When we're built
+ * for "text only" mode, we use this function to drive the removal
+ * of most of the code in libxo. We return a constant and the compiler
+ * happily removes the non-text code that is not longer executed. This
+ * trims our code nicely without needing to trampel perfectly readable
+ * code with ifdefs.
+ */
+static inline xo_style_t
+xo_style (xo_handle_t *xop UNUSED)
+{
+#ifdef LIBXO_TEXT_ONLY
+ return XO_STYLE_TEXT;
+#else /* LIBXO_TEXT_ONLY */
+ return xop->xo_style;
+#endif /* LIBXO_TEXT_ONLY */
+}
+
+/*
+ * Callback to write data to a FILE pointer
+ */
+static int
+xo_write_to_file (void *opaque, const char *data)
+{
+ FILE *fp = (FILE *) opaque;
+
+ return fprintf(fp, "%s", data);
+}
+
+/*
+ * Callback to close a file
+ */
+static void
+xo_close_file (void *opaque)
+{
+ FILE *fp = (FILE *) opaque;
+
+ fclose(fp);
+}
+
+/*
+ * Callback to flush a FILE pointer
+ */
+static int
+xo_flush_file (void *opaque)
+{
+ FILE *fp = (FILE *) opaque;
+
+ return fflush(fp);
+}
+
+/*
+ * Use a rotating stock of buffers to make a printable string
+ */
+#define XO_NUMBUFS 8
+#define XO_SMBUFSZ 128
+
+static const char *
+xo_printable (const char *str)
+{
+ static THREAD_LOCAL(char) bufset[XO_NUMBUFS][XO_SMBUFSZ];
+ static THREAD_LOCAL(int) bufnum = 0;
+
+ if (str == NULL)
+ return "";
+
+ if (++bufnum == XO_NUMBUFS)
+ bufnum = 0;
+
+ char *res = bufset[bufnum], *cp, *ep;
+
+ for (cp = res, ep = res + XO_SMBUFSZ - 1; *str && cp < ep; cp++, str++) {
+ if (*str == '\n') {
+ *cp++ = '\\';
+ *cp = 'n';
+ } else if (*str == '\r') {
+ *cp++ = '\\';
+ *cp = 'r';
+ } else if (*str == '\"') {
+ *cp++ = '\\';
+ *cp = '"';
+ } else
+ *cp = *str;
+ }
+
+ *cp = '\0';
+ return res;
+}
+
+static int
+xo_depth_check (xo_handle_t *xop, int depth)
+{
+ xo_stack_t *xsp;
+
+ if (depth >= xop->xo_stack_size) {
+ depth += XO_DEPTH; /* Extra room */
+
+ xsp = xo_realloc(xop->xo_stack, sizeof(xop->xo_stack[0]) * depth);
+ if (xsp == NULL) {
+ xo_failure(xop, "xo_depth_check: out of memory (%d)", depth);
+ return -1;
+ }
+
+ int count = depth - xop->xo_stack_size;
+
+ bzero(xsp + xop->xo_stack_size, count * sizeof(*xsp));
+ xop->xo_stack_size = depth;
+ xop->xo_stack = xsp;
+ }
+
+ return 0;
+}
+
+void
+xo_no_setlocale (void)
+{
+ xo_locale_inited = 1; /* Skip initialization */
+}
+
+/*
+ * We need to decide if stdout is line buffered (_IOLBF). Lacking a
+ * standard way to decide this (e.g. getlinebuf()), we have configure
+ * look to find __flbf, which glibc supported. If not, we'll rely on
+ * isatty, with the assumption that terminals are the only thing
+ * that's line buffered. We _could_ test for "steam._flags & _IOLBF",
+ * which is all __flbf does, but that's even tackier. Like a
+ * bedazzled Elvis outfit on an ugly lap dog sort of tacky. Not
+ * something we're willing to do.
+ */
+static int
+xo_is_line_buffered (FILE *stream)
+{
+#if HAVE___FLBF
+ if (__flbf(stream))
+ return 1;
+#else /* HAVE___FLBF */
+ if (isatty(fileno(stream)))
+ return 1;
+#endif /* HAVE___FLBF */
+ return 0;
+}
+
+/*
+ * Initialize an xo_handle_t, using both static defaults and
+ * the global settings from the LIBXO_OPTIONS environment
+ * variable.
+ */
+static void
+xo_init_handle (xo_handle_t *xop)
+{
+ xop->xo_opaque = stdout;
+ xop->xo_write = xo_write_to_file;
+ xop->xo_flush = xo_flush_file;
+
+ if (xo_is_line_buffered(stdout))
+ XOF_SET(xop, XOF_FLUSH_LINE);
+
+ /*
+ * We only want to do color output on terminals, but we only want
+ * to do this if the user has asked for color.
+ */
+ if (XOF_ISSET(xop, XOF_COLOR_ALLOWED) && isatty(1))
+ XOF_SET(xop, XOF_COLOR);
+
+ /*
+ * We need to initialize the locale, which isn't really pretty.
+ * Libraries should depend on their caller to set up the
+ * environment. But we really can't count on the caller to do
+ * this, because well, they won't. Trust me.
+ */
+ if (!xo_locale_inited) {
+ xo_locale_inited = 1; /* Only do this once */
+
+ const char *cp = getenv("LC_CTYPE");
+ if (cp == NULL)
+ cp = getenv("LANG");
+ if (cp == NULL)
+ cp = getenv("LC_ALL");
+ if (cp == NULL)
+ cp = "C"; /* Default for C programs */
+ (void) setlocale(LC_CTYPE, cp);
+ }
+
+ /*
+ * Initialize only the xo_buffers we know we'll need; the others
+ * can be allocated as needed.
+ */
+ xo_buf_init(&xop->xo_data);
+ xo_buf_init(&xop->xo_fmt);
+
+ if (XOIF_ISSET(xop, XOIF_INIT_IN_PROGRESS))
+ return;
+ XOIF_SET(xop, XOIF_INIT_IN_PROGRESS);
+
+ xop->xo_indent_by = XO_INDENT_BY;
+ xo_depth_check(xop, XO_DEPTH);
+
+#if !defined(NO_LIBXO_OPTIONS)
+ if (!XOF_ISSET(xop, XOF_NO_ENV)) {
+ char *env = getenv("LIBXO_OPTIONS");
+ if (env)
+ xo_set_options(xop, env);
+
+ }
+#endif /* NO_GETENV */
+
+ XOIF_CLEAR(xop, XOIF_INIT_IN_PROGRESS);
+}
+
+/*
+ * Initialize the default handle.
+ */
+static void
+xo_default_init (void)
+{
+ xo_handle_t *xop = &xo_default_handle;
+
+ xo_init_handle(xop);
+
+ xo_default_inited = 1;
+}
+
+/*
+ * Cheap convenience function to return either the argument, or
+ * the internal handle, after it has been initialized. The usage
+ * is:
+ * xop = xo_default(xop);
+ */
+static xo_handle_t *
+xo_default (xo_handle_t *xop)
+{
+ if (xop == NULL) {
+ if (xo_default_inited == 0)
+ xo_default_init();
+ xop = &xo_default_handle;
+ }
+
+ return xop;
+}
+
+/*
+ * Return the number of spaces we should be indenting. If
+ * we are pretty-printing, this is indent * indent_by.
+ */
+static int
+xo_indent (xo_handle_t *xop)
+{
+ int rc = 0;
+
+ xop = xo_default(xop);
+
+ if (XOF_ISSET(xop, XOF_PRETTY)) {
+ rc = xop->xo_indent * xop->xo_indent_by;
+ if (XOIF_ISSET(xop, XOIF_TOP_EMITTED))
+ rc += xop->xo_indent_by;
+ }
+
+ return (rc > 0) ? rc : 0;
+}
+
+static void
+xo_buf_indent (xo_handle_t *xop, int indent)
+{
+ xo_buffer_t *xbp = &xop->xo_data;
+
+ if (indent <= 0)
+ indent = xo_indent(xop);
+
+ if (!xo_buf_has_room(xbp, indent))
+ return;
+
+ memset(xbp->xb_curp, ' ', indent);
+ xbp->xb_curp += indent;
+}
+
+static char xo_xml_amp[] = "&amp;";
+static char xo_xml_lt[] = "&lt;";
+static char xo_xml_gt[] = "&gt;";
+static char xo_xml_quot[] = "&quot;";
+
+static int
+xo_escape_xml (xo_buffer_t *xbp, int len, xo_xff_flags_t flags)
+{
+ int slen;
+ unsigned delta = 0;
+ char *cp, *ep, *ip;
+ const char *sp;
+ int attr = (flags & XFF_ATTR);
+
+ for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
+ /* We're subtracting 2: 1 for the NUL, 1 for the char we replace */
+ if (*cp == '<')
+ delta += sizeof(xo_xml_lt) - 2;
+ else if (*cp == '>')
+ delta += sizeof(xo_xml_gt) - 2;
+ else if (*cp == '&')
+ delta += sizeof(xo_xml_amp) - 2;
+ else if (attr && *cp == '"')
+ delta += sizeof(xo_xml_quot) - 2;
+ }
+
+ if (delta == 0) /* Nothing to escape; bail */
+ return len;
+
+ if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */
+ return 0;
+
+ ep = xbp->xb_curp;
+ cp = ep + len;
+ ip = cp + delta;
+ do {
+ cp -= 1;
+ ip -= 1;
+
+ if (*cp == '<')
+ sp = xo_xml_lt;
+ else if (*cp == '>')
+ sp = xo_xml_gt;
+ else if (*cp == '&')
+ sp = xo_xml_amp;
+ else if (attr && *cp == '"')
+ sp = xo_xml_quot;
+ else {
+ *ip = *cp;
+ continue;
+ }
+
+ slen = strlen(sp);
+ ip -= slen - 1;
+ memcpy(ip, sp, slen);
+
+ } while (cp > ep && cp != ip);
+
+ return len + delta;
+}
+
+static int
+xo_escape_json (xo_buffer_t *xbp, int len, xo_xff_flags_t flags UNUSED)
+{
+ unsigned delta = 0;
+ char *cp, *ep, *ip;
+
+ for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
+ if (*cp == '\\' || *cp == '"')
+ delta += 1;
+ else if (*cp == '\n' || *cp == '\r')
+ delta += 1;
+ }
+
+ if (delta == 0) /* Nothing to escape; bail */
+ return len;
+
+ if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */
+ return 0;
+
+ ep = xbp->xb_curp;
+ cp = ep + len;
+ ip = cp + delta;
+ do {
+ cp -= 1;
+ ip -= 1;
+
+ if (*cp == '\\' || *cp == '"') {
+ *ip-- = *cp;
+ *ip = '\\';
+ } else if (*cp == '\n') {
+ *ip-- = 'n';
+ *ip = '\\';
+ } else if (*cp == '\r') {
+ *ip-- = 'r';
+ *ip = '\\';
+ } else {
+ *ip = *cp;
+ }
+
+ } while (cp > ep && cp != ip);
+
+ return len + delta;
+}
+
+/*
+ * PARAM-VALUE = UTF-8-STRING ; characters '"', '\' and
+ * ; ']' MUST be escaped.
+ */
+static int
+xo_escape_sdparams (xo_buffer_t *xbp, int len, xo_xff_flags_t flags UNUSED)
+{
+ unsigned delta = 0;
+ char *cp, *ep, *ip;
+
+ for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
+ if (*cp == '\\' || *cp == '"' || *cp == ']')
+ delta += 1;
+ }
+
+ if (delta == 0) /* Nothing to escape; bail */
+ return len;
+
+ if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */
+ return 0;
+
+ ep = xbp->xb_curp;
+ cp = ep + len;
+ ip = cp + delta;
+ do {
+ cp -= 1;
+ ip -= 1;
+
+ if (*cp == '\\' || *cp == '"' || *cp == ']') {
+ *ip-- = *cp;
+ *ip = '\\';
+ } else {
+ *ip = *cp;
+ }
+
+ } while (cp > ep && cp != ip);
+
+ return len + delta;
+}
+
+static void
+xo_buf_escape (xo_handle_t *xop, xo_buffer_t *xbp,
+ const char *str, int len, xo_xff_flags_t flags)
+{
+ if (!xo_buf_has_room(xbp, len))
+ return;
+
+ memcpy(xbp->xb_curp, str, len);
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ case XO_STYLE_HTML:
+ len = xo_escape_xml(xbp, len, flags);
+ break;
+
+ case XO_STYLE_JSON:
+ len = xo_escape_json(xbp, len, flags);
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ len = xo_escape_sdparams(xbp, len, flags);
+ break;
+ }
+
+ xbp->xb_curp += len;
+}
+
+/*
+ * Write the current contents of the data buffer using the handle's
+ * xo_write function.
+ */
+static int
+xo_write (xo_handle_t *xop)
+{
+ int rc = 0;
+ xo_buffer_t *xbp = &xop->xo_data;
+
+ if (xbp->xb_curp != xbp->xb_bufp) {
+ xo_buf_append(xbp, "", 1); /* Append ending NUL */
+ xo_anchor_clear(xop);
+ if (xop->xo_write)
+ rc = xop->xo_write(xop->xo_opaque, xbp->xb_bufp);
+ xbp->xb_curp = xbp->xb_bufp;
+ }
+
+ /* Turn off the flags that don't survive across writes */
+ XOIF_CLEAR(xop, XOIF_UNITS_PENDING);
+
+ return rc;
+}
+
+/*
+ * Format arguments into our buffer. If a custom formatter has been set,
+ * we use that to do the work; otherwise we vsnprintf().
+ */
+static int
+xo_vsnprintf (xo_handle_t *xop, xo_buffer_t *xbp, const char *fmt, va_list vap)
+{
+ va_list va_local;
+ int rc;
+ int left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+
+ va_copy(va_local, vap);
+
+ if (xop->xo_formatter)
+ rc = xop->xo_formatter(xop, xbp->xb_curp, left, fmt, va_local);
+ else
+ rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
+
+ if (rc >= left) {
+ if (!xo_buf_has_room(xbp, rc)) {
+ va_end(va_local);
+ return -1;
+ }
+
+ /*
+ * After we call vsnprintf(), the stage of vap is not defined.
+ * We need to copy it before we pass. Then we have to do our
+ * own logic below to move it along. This is because the
+ * implementation can have va_list be a pointer (bsd) or a
+ * structure (macosx) or anything in between.
+ */
+
+ va_end(va_local); /* Reset vap to the start */
+ va_copy(va_local, vap);
+
+ left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ if (xop->xo_formatter)
+ rc = xop->xo_formatter(xop, xbp->xb_curp, left, fmt, va_local);
+ else
+ rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
+ }
+ va_end(va_local);
+
+ return rc;
+}
+
+/*
+ * Print some data through the handle.
+ */
+static int
+xo_printf_v (xo_handle_t *xop, const char *fmt, va_list vap)
+{
+ xo_buffer_t *xbp = &xop->xo_data;
+ int left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ int rc;
+ va_list va_local;
+
+ va_copy(va_local, vap);
+
+ rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
+
+ if (rc >= left) {
+ if (!xo_buf_has_room(xbp, rc)) {
+ va_end(va_local);
+ return -1;
+ }
+
+ va_end(va_local); /* Reset vap to the start */
+ va_copy(va_local, vap);
+
+ left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ rc = vsnprintf(xbp->xb_curp, left, fmt, va_local);
+ }
+
+ va_end(va_local);
+
+ if (rc > 0)
+ xbp->xb_curp += rc;
+
+ return rc;
+}
+
+static int
+xo_printf (xo_handle_t *xop, const char *fmt, ...)
+{
+ int rc;
+ va_list vap;
+
+ va_start(vap, fmt);
+
+ rc = xo_printf_v(xop, fmt, vap);
+
+ va_end(vap);
+ return rc;
+}
+
+/*
+ * These next few function are make The Essential UTF-8 Ginsu Knife.
+ * Identify an input and output character, and convert it.
+ */
+static int xo_utf8_bits[7] = { 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
+
+static int
+xo_is_utf8 (char ch)
+{
+ return (ch & 0x80);
+}
+
+static inline int
+xo_utf8_to_wc_len (const char *buf)
+{
+ unsigned b = (unsigned char) *buf;
+ int len;
+
+ if ((b & 0x80) == 0x0)
+ len = 1;
+ else if ((b & 0xe0) == 0xc0)
+ len = 2;
+ else if ((b & 0xf0) == 0xe0)
+ len = 3;
+ else if ((b & 0xf8) == 0xf0)
+ len = 4;
+ else if ((b & 0xfc) == 0xf8)
+ len = 5;
+ else if ((b & 0xfe) == 0xfc)
+ len = 6;
+ else
+ len = -1;
+
+ return len;
+}
+
+static int
+xo_buf_utf8_len (xo_handle_t *xop, const char *buf, int bufsiz)
+{
+
+ unsigned b = (unsigned char) *buf;
+ int len, i;
+
+ len = xo_utf8_to_wc_len(buf);
+ if (len == -1) {
+ xo_failure(xop, "invalid UTF-8 data: %02hhx", b);
+ return -1;
+ }
+
+ if (len > bufsiz) {
+ xo_failure(xop, "invalid UTF-8 data (short): %02hhx (%d/%d)",
+ b, len, bufsiz);
+ return -1;
+ }
+
+ for (i = 2; i < len; i++) {
+ b = (unsigned char ) buf[i];
+ if ((b & 0xc0) != 0x80) {
+ xo_failure(xop, "invalid UTF-8 data (byte %d): %x", i, b);
+ return -1;
+ }
+ }
+
+ return len;
+}
+
+/*
+ * Build a wide character from the input buffer; the number of
+ * bits we pull off the first character is dependent on the length,
+ * but we put 6 bits off all other bytes.
+ */
+static inline wchar_t
+xo_utf8_char (const char *buf, int len)
+{
+ /* Most common case: singleton byte */
+ if (len == 1)
+ return (unsigned char) buf[0];
+
+ int i;
+ wchar_t wc;
+ const unsigned char *cp = (const unsigned char *) buf;
+
+ wc = *cp & xo_utf8_bits[len];
+ for (i = 1; i < len; i++) {
+ wc <<= 6;
+ wc |= cp[i] & 0x3f;
+ if ((cp[i] & 0xc0) != 0x80)
+ return (wchar_t) -1;
+ }
+
+ return wc;
+}
+
+/*
+ * Determine the number of bytes needed to encode a wide character.
+ */
+static int
+xo_utf8_emit_len (wchar_t wc)
+{
+ int len;
+
+ if ((wc & ((1<<7) - 1)) == wc) /* Simple case */
+ len = 1;
+ else if ((wc & ((1<<11) - 1)) == wc)
+ len = 2;
+ else if ((wc & ((1<<16) - 1)) == wc)
+ len = 3;
+ else if ((wc & ((1<<21) - 1)) == wc)
+ len = 4;
+ else if ((wc & ((1<<26) - 1)) == wc)
+ len = 5;
+ else
+ len = 6;
+
+ return len;
+}
+
+static void
+xo_utf8_emit_char (char *buf, int len, wchar_t wc)
+{
+ int i;
+
+ if (len == 1) { /* Simple case */
+ buf[0] = wc & 0x7f;
+ return;
+ }
+
+ for (i = len - 1; i >= 0; i--) {
+ buf[i] = 0x80 | (wc & 0x3f);
+ wc >>= 6;
+ }
+
+ buf[0] &= xo_utf8_bits[len];
+ buf[0] |= ~xo_utf8_bits[len] << 1;
+}
+
+static int
+xo_buf_append_locale_from_utf8 (xo_handle_t *xop, xo_buffer_t *xbp,
+ const char *ibuf, int ilen)
+{
+ wchar_t wc;
+ int len;
+
+ /*
+ * Build our wide character from the input buffer; the number of
+ * bits we pull off the first character is dependent on the length,
+ * but we put 6 bits off all other bytes.
+ */
+ wc = xo_utf8_char(ibuf, ilen);
+ if (wc == (wchar_t) -1) {
+ xo_failure(xop, "invalid utf-8 byte sequence");
+ return 0;
+ }
+
+ if (XOF_ISSET(xop, XOF_NO_LOCALE)) {
+ if (!xo_buf_has_room(xbp, ilen))
+ return 0;
+
+ memcpy(xbp->xb_curp, ibuf, ilen);
+ xbp->xb_curp += ilen;
+
+ } else {
+ if (!xo_buf_has_room(xbp, MB_LEN_MAX + 1))
+ return 0;
+
+ bzero(&xop->xo_mbstate, sizeof(xop->xo_mbstate));
+ len = wcrtomb(xbp->xb_curp, wc, &xop->xo_mbstate);
+
+ if (len <= 0) {
+ xo_failure(xop, "could not convert wide char: %lx",
+ (unsigned long) wc);
+ return 0;
+ }
+ xbp->xb_curp += len;
+ }
+
+ return xo_wcwidth(wc);
+}
+
+static void
+xo_buf_append_locale (xo_handle_t *xop, xo_buffer_t *xbp,
+ const char *cp, int len)
+{
+ const char *sp = cp, *ep = cp + len;
+ unsigned save_off = xbp->xb_bufp - xbp->xb_curp;
+ int slen;
+ int cols = 0;
+
+ for ( ; cp < ep; cp++) {
+ if (!xo_is_utf8(*cp)) {
+ cols += 1;
+ continue;
+ }
+
+ /*
+ * We're looking at a non-ascii UTF-8 character.
+ * First we copy the previous data.
+ * Then we need find the length and validate it.
+ * Then we turn it into a wide string.
+ * Then we turn it into a localized string.
+ * Then we repeat. Isn't i18n fun?
+ */
+ if (sp != cp)
+ xo_buf_append(xbp, sp, cp - sp); /* Append previous data */
+
+ slen = xo_buf_utf8_len(xop, cp, ep - cp);
+ if (slen <= 0) {
+ /* Bad data; back it all out */
+ xbp->xb_curp = xbp->xb_bufp + save_off;
+ return;
+ }
+
+ cols += xo_buf_append_locale_from_utf8(xop, xbp, cp, slen);
+
+ /* Next time through, we'll start at the next character */
+ cp += slen - 1;
+ sp = cp + 1;
+ }
+
+ /* Update column values */
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += cols;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += cols;
+
+ /* Before we fall into the basic logic below, we need reset len */
+ len = ep - sp;
+ if (len != 0) /* Append trailing data */
+ xo_buf_append(xbp, sp, len);
+}
+
+/*
+ * Append the given string to the given buffer, without escaping or
+ * character set conversion. This is the straight copy to the data
+ * buffer with no fanciness.
+ */
+static void
+xo_data_append (xo_handle_t *xop, const char *str, int len)
+{
+ xo_buf_append(&xop->xo_data, str, len);
+}
+
+/*
+ * Append the given string to the given buffer
+ */
+static void
+xo_data_escape (xo_handle_t *xop, const char *str, int len)
+{
+ xo_buf_escape(xop, &xop->xo_data, str, len, 0);
+}
+
+#ifdef LIBXO_NO_RETAIN
+/*
+ * Empty implementations of the retain logic
+ */
+
+void
+xo_retain_clear_all (void)
+{
+ return;
+}
+
+void
+xo_retain_clear (const char *fmt UNUSED)
+{
+ return;
+}
+static void
+xo_retain_add (const char *fmt UNUSED, xo_field_info_t *fields UNUSED,
+ unsigned num_fields UNUSED)
+{
+ return;
+}
+
+static int
+xo_retain_find (const char *fmt UNUSED, xo_field_info_t **valp UNUSED,
+ unsigned *nump UNUSED)
+{
+ return -1;
+}
+
+#else /* !LIBXO_NO_RETAIN */
+/*
+ * Retain: We retain parsed field definitions to enhance performance,
+ * especially inside loops. We depend on the caller treating the format
+ * strings as immutable, so that we can retain pointers into them. We
+ * hold the pointers in a hash table, so allow quick access. Retained
+ * information is retained until xo_retain_clear is called.
+ */
+
+/*
+ * xo_retain_entry_t holds information about one retained set of
+ * parsed fields.
+ */
+typedef struct xo_retain_entry_s {
+ struct xo_retain_entry_s *xre_next; /* Pointer to next (older) entry */
+ unsigned long xre_hits; /* Number of times we've hit */
+ const char *xre_format; /* Pointer to format string */
+ unsigned xre_num_fields; /* Number of fields saved */
+ xo_field_info_t *xre_fields; /* Pointer to fields */
+} xo_retain_entry_t;
+
+/*
+ * xo_retain_t holds a complete set of parsed fields as a hash table.
+ */
+#ifndef XO_RETAIN_SIZE
+#define XO_RETAIN_SIZE 6
+#endif /* XO_RETAIN_SIZE */
+#define RETAIN_HASH_SIZE (1<<XO_RETAIN_SIZE)
+
+typedef struct xo_retain_s {
+ xo_retain_entry_t *xr_bucket[RETAIN_HASH_SIZE];
+} xo_retain_t;
+
+static THREAD_LOCAL(xo_retain_t) xo_retain;
+static THREAD_LOCAL(unsigned) xo_retain_count;
+
+/*
+ * Simple hash function based on Thomas Wang's paper. The original is
+ * gone, but an archive is available on the Way Back Machine:
+ *
+ * http://web.archive.org/web/20071223173210/\
+ * http://www.concentric.net/~Ttwang/tech/inthash.htm
+ *
+ * For our purposes, we can assume the low four bits are uninteresting
+ * since any string less that 16 bytes wouldn't be worthy of
+ * retaining. We toss the high bits also, since these bits are likely
+ * to be common among constant format strings. We then run Wang's
+ * algorithm, and cap the result at RETAIN_HASH_SIZE.
+ */
+static unsigned
+xo_retain_hash (const char *fmt)
+{
+ volatile uintptr_t iptr = (uintptr_t) (const void *) fmt;
+
+ /* Discard low four bits and high bits; they aren't interesting */
+ uint32_t val = (uint32_t) ((iptr >> 4) & (((1 << 24) - 1)));
+
+ val = (val ^ 61) ^ (val >> 16);
+ val = val + (val << 3);
+ val = val ^ (val >> 4);
+ val = val * 0x3a8f05c5; /* My large prime number */
+ val = val ^ (val >> 15);
+ val &= RETAIN_HASH_SIZE - 1;
+
+ return val;
+}
+
+/*
+ * Walk all buckets, clearing all retained entries
+ */
+void
+xo_retain_clear_all (void)
+{
+ int i;
+ xo_retain_entry_t *xrep, *next;
+
+ for (i = 0; i < RETAIN_HASH_SIZE; i++) {
+ for (xrep = xo_retain.xr_bucket[i]; xrep; xrep = next) {
+ next = xrep->xre_next;
+ xo_free(xrep);
+ }
+ xo_retain.xr_bucket[i] = NULL;
+ }
+ xo_retain_count = 0;
+}
+
+/*
+ * Walk all buckets, clearing all retained entries
+ */
+void
+xo_retain_clear (const char *fmt)
+{
+ xo_retain_entry_t **xrepp;
+ unsigned hash = xo_retain_hash(fmt);
+
+ for (xrepp = &xo_retain.xr_bucket[hash]; *xrepp;
+ xrepp = &(*xrepp)->xre_next) {
+ if ((*xrepp)->xre_format == fmt) {
+ *xrepp = (*xrepp)->xre_next;
+ xo_retain_count -= 1;
+ return;
+ }
+ }
+}
+
+/*
+ * Search the hash for an entry matching 'fmt'; return it's fields.
+ */
+static int
+xo_retain_find (const char *fmt, xo_field_info_t **valp, unsigned *nump)
+{
+ if (xo_retain_count == 0)
+ return -1;
+
+ unsigned hash = xo_retain_hash(fmt);
+ xo_retain_entry_t *xrep;
+
+ for (xrep = xo_retain.xr_bucket[hash]; xrep != NULL;
+ xrep = xrep->xre_next) {
+ if (xrep->xre_format == fmt) {
+ *valp = xrep->xre_fields;
+ *nump = xrep->xre_num_fields;
+ xrep->xre_hits += 1;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static void
+xo_retain_add (const char *fmt, xo_field_info_t *fields, unsigned num_fields)
+{
+ unsigned hash = xo_retain_hash(fmt);
+ xo_retain_entry_t *xrep;
+ unsigned sz = sizeof(*xrep) + (num_fields + 1) * sizeof(*fields);
+ xo_field_info_t *xfip;
+
+ xrep = xo_realloc(NULL, sz);
+ if (xrep == NULL)
+ return;
+
+ xfip = (xo_field_info_t *) &xrep[1];
+ memcpy(xfip, fields, num_fields * sizeof(*fields));
+
+ bzero(xrep, sizeof(*xrep));
+
+ xrep->xre_format = fmt;
+ xrep->xre_fields = xfip;
+ xrep->xre_num_fields = num_fields;
+
+ /* Record the field info in the retain bucket */
+ xrep->xre_next = xo_retain.xr_bucket[hash];
+ xo_retain.xr_bucket[hash] = xrep;
+ xo_retain_count += 1;
+}
+
+#endif /* !LIBXO_NO_RETAIN */
+
+/*
+ * Generate a warning. Normally, this is a text message written to
+ * standard error. If the XOF_WARN_XML flag is set, then we generate
+ * XMLified content on standard output.
+ */
+static void
+xo_warn_hcv (xo_handle_t *xop, int code, int check_warn,
+ const char *fmt, va_list vap)
+{
+ xop = xo_default(xop);
+ if (check_warn && !XOF_ISSET(xop, XOF_WARN))
+ return;
+
+ if (fmt == NULL)
+ return;
+
+ int len = strlen(fmt);
+ int plen = xo_program ? strlen(xo_program) : 0;
+ char *newfmt = alloca(len + 1 + plen + 2); /* NUL, and ": " */
+
+ if (plen) {
+ memcpy(newfmt, xo_program, plen);
+ newfmt[plen++] = ':';
+ newfmt[plen++] = ' ';
+ }
+ memcpy(newfmt + plen, fmt, len);
+ newfmt[len + plen] = '\0';
+
+ if (XOF_ISSET(xop, XOF_WARN_XML)) {
+ static char err_open[] = "<error>";
+ static char err_close[] = "</error>";
+ static char msg_open[] = "<message>";
+ static char msg_close[] = "</message>";
+
+ xo_buffer_t *xbp = &xop->xo_data;
+
+ xo_buf_append(xbp, err_open, sizeof(err_open) - 1);
+ xo_buf_append(xbp, msg_open, sizeof(msg_open) - 1);
+
+ va_list va_local;
+ va_copy(va_local, vap);
+
+ int left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ int rc = vsnprintf(xbp->xb_curp, left, newfmt, vap);
+ if (rc >= left) {
+ if (!xo_buf_has_room(xbp, rc)) {
+ va_end(va_local);
+ return;
+ }
+
+ va_end(vap); /* Reset vap to the start */
+ va_copy(vap, va_local);
+
+ left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ rc = vsnprintf(xbp->xb_curp, left, fmt, vap);
+ }
+ va_end(va_local);
+
+ rc = xo_escape_xml(xbp, rc, 1);
+ xbp->xb_curp += rc;
+
+ xo_buf_append(xbp, msg_close, sizeof(msg_close) - 1);
+ xo_buf_append(xbp, err_close, sizeof(err_close) - 1);
+
+ if (code >= 0) {
+ const char *msg = strerror(code);
+ if (msg) {
+ xo_buf_append(xbp, ": ", 2);
+ xo_buf_append(xbp, msg, strlen(msg));
+ }
+ }
+
+ xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */
+ (void) xo_write(xop);
+
+ } else {
+ vfprintf(stderr, newfmt, vap);
+ if (code >= 0) {
+ const char *msg = strerror(code);
+ if (msg)
+ fprintf(stderr, ": %s", msg);
+ }
+ fprintf(stderr, "\n");
+ }
+}
+
+void
+xo_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(xop, code, 0, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_warn_c (int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(NULL, code, 0, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_warn (const char *fmt, ...)
+{
+ int code = errno;
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(NULL, code, 0, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_warnx (const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(NULL, -1, 0, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_err (int eval, const char *fmt, ...)
+{
+ int code = errno;
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(NULL, code, 0, fmt, vap);
+ va_end(vap);
+ xo_finish();
+ exit(eval);
+}
+
+void
+xo_errx (int eval, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(NULL, -1, 0, fmt, vap);
+ va_end(vap);
+ xo_finish();
+ exit(eval);
+}
+
+void
+xo_errc (int eval, int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(NULL, code, 0, fmt, vap);
+ va_end(vap);
+ xo_finish();
+ exit(eval);
+}
+
+/*
+ * Generate a warning. Normally, this is a text message written to
+ * standard error. If the XOF_WARN_XML flag is set, then we generate
+ * XMLified content on standard output.
+ */
+void
+xo_message_hcv (xo_handle_t *xop, int code, const char *fmt, va_list vap)
+{
+ static char msg_open[] = "<message>";
+ static char msg_close[] = "</message>";
+ xo_buffer_t *xbp;
+ int rc;
+ va_list va_local;
+
+ xop = xo_default(xop);
+
+ if (fmt == NULL || *fmt == '\0')
+ return;
+
+ int need_nl = (fmt[strlen(fmt) - 1] != '\n');
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ xbp = &xop->xo_data;
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_buf_indent(xop, xop->xo_indent_by);
+ xo_buf_append(xbp, msg_open, sizeof(msg_open) - 1);
+
+ va_copy(va_local, vap);
+
+ int left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ rc = vsnprintf(xbp->xb_curp, left, fmt, vap);
+ if (rc >= left) {
+ if (!xo_buf_has_room(xbp, rc)) {
+ va_end(va_local);
+ return;
+ }
+
+ va_end(vap); /* Reset vap to the start */
+ va_copy(vap, va_local);
+
+ left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ rc = vsnprintf(xbp->xb_curp, left, fmt, vap);
+ }
+ va_end(va_local);
+
+ rc = xo_escape_xml(xbp, rc, 0);
+ xbp->xb_curp += rc;
+
+ if (need_nl && code > 0) {
+ const char *msg = strerror(code);
+ if (msg) {
+ xo_buf_append(xbp, ": ", 2);
+ xo_buf_append(xbp, msg, strlen(msg));
+ }
+ }
+
+ if (need_nl)
+ xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */
+
+ xo_buf_append(xbp, msg_close, sizeof(msg_close) - 1);
+
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */
+
+ (void) xo_write(xop);
+ break;
+
+ case XO_STYLE_HTML:
+ {
+ char buf[BUFSIZ], *bp = buf, *cp;
+ int bufsiz = sizeof(buf);
+ int rc2;
+
+ va_copy(va_local, vap);
+
+ rc = vsnprintf(bp, bufsiz, fmt, va_local);
+ if (rc > bufsiz) {
+ bufsiz = rc + BUFSIZ;
+ bp = alloca(bufsiz);
+ va_end(va_local);
+ va_copy(va_local, vap);
+ rc = vsnprintf(bp, bufsiz, fmt, va_local);
+ }
+ va_end(va_local);
+ cp = bp + rc;
+
+ if (need_nl) {
+ rc2 = snprintf(cp, bufsiz - rc, "%s%s\n",
+ (code > 0) ? ": " : "",
+ (code > 0) ? strerror(code) : "");
+ if (rc2 > 0)
+ rc += rc2;
+ }
+
+ xo_buf_append_div(xop, "message", 0, NULL, 0, bp, rc, NULL, 0);
+ }
+ break;
+
+ case XO_STYLE_JSON:
+ case XO_STYLE_SDPARAMS:
+ case XO_STYLE_ENCODER:
+ /* No means of representing messages */
+ return;
+
+ case XO_STYLE_TEXT:
+ rc = xo_printf_v(xop, fmt, vap);
+ /*
+ * XXX need to handle UTF-8 widths
+ */
+ if (rc > 0) {
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += rc;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += rc;
+ }
+
+ if (need_nl && code > 0) {
+ const char *msg = strerror(code);
+ if (msg) {
+ xo_printf(xop, ": %s", msg);
+ }
+ }
+ if (need_nl)
+ xo_printf(xop, "\n");
+
+ break;
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_HTML:
+ if (XOIF_ISSET(xop, XOIF_DIV_OPEN)) {
+ static char div_close[] = "</div>";
+ XOIF_CLEAR(xop, XOIF_DIV_OPEN);
+ xo_data_append(xop, div_close, sizeof(div_close) - 1);
+
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_data_append(xop, "\n", 1);
+ }
+ break;
+ }
+
+ (void) xo_flush_h(xop);
+}
+
+void
+xo_message_hc (xo_handle_t *xop, int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_message_hcv(xop, code, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_message_c (int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_message_hcv(NULL, code, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_message_e (const char *fmt, ...)
+{
+ int code = errno;
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_message_hcv(NULL, code, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_message (const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_message_hcv(NULL, 0, fmt, vap);
+ va_end(vap);
+}
+
+static void
+xo_failure (xo_handle_t *xop, const char *fmt, ...)
+{
+ if (!XOF_ISSET(xop, XOF_WARN))
+ return;
+
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_warn_hcv(xop, -1, 1, fmt, vap);
+ va_end(vap);
+}
+
+/**
+ * Create a handle for use by later libxo functions.
+ *
+ * Note: normal use of libxo does not require a distinct handle, since
+ * the default handle (used when NULL is passed) generates text on stdout.
+ *
+ * @style Style of output desired (XO_STYLE_* value)
+ * @flags Set of XOF_* flags in use with this handle
+ */
+xo_handle_t *
+xo_create (xo_style_t style, xo_xof_flags_t flags)
+{
+ xo_handle_t *xop = xo_realloc(NULL, sizeof(*xop));
+
+ if (xop) {
+ bzero(xop, sizeof(*xop));
+
+ xop->xo_style = style;
+ XOF_SET(xop, flags);
+ xo_init_handle(xop);
+ xop->xo_style = style; /* Reset style (see LIBXO_OPTIONS) */
+ }
+
+ return xop;
+}
+
+/**
+ * Create a handle that will write to the given file. Use
+ * the XOF_CLOSE_FP flag to have the file closed on xo_destroy().
+ * @fp FILE pointer to use
+ * @style Style of output desired (XO_STYLE_* value)
+ * @flags Set of XOF_* flags to use with this handle
+ */
+xo_handle_t *
+xo_create_to_file (FILE *fp, xo_style_t style, xo_xof_flags_t flags)
+{
+ xo_handle_t *xop = xo_create(style, flags);
+
+ if (xop) {
+ xop->xo_opaque = fp;
+ xop->xo_write = xo_write_to_file;
+ xop->xo_close = xo_close_file;
+ xop->xo_flush = xo_flush_file;
+ }
+
+ return xop;
+}
+
+/**
+ * Set the default handler to output to a file.
+ * @xop libxo handle
+ * @fp FILE pointer to use
+ */
+int
+xo_set_file_h (xo_handle_t *xop, FILE *fp)
+{
+ xop = xo_default(xop);
+
+ if (fp == NULL) {
+ xo_failure(xop, "xo_set_file: NULL fp");
+ return -1;
+ }
+
+ xop->xo_opaque = fp;
+ xop->xo_write = xo_write_to_file;
+ xop->xo_close = xo_close_file;
+ xop->xo_flush = xo_flush_file;
+
+ return 0;
+}
+
+/**
+ * Set the default handler to output to a file.
+ * @fp FILE pointer to use
+ */
+int
+xo_set_file (FILE *fp)
+{
+ return xo_set_file_h(NULL, fp);
+}
+
+/**
+ * Release any resources held by the handle.
+ * @xop XO handle to alter (or NULL for default handle)
+ */
+void
+xo_destroy (xo_handle_t *xop_arg)
+{
+ xo_handle_t *xop = xo_default(xop_arg);
+
+ xo_flush_h(xop);
+
+ if (xop->xo_close && XOF_ISSET(xop, XOF_CLOSE_FP))
+ xop->xo_close(xop->xo_opaque);
+
+ xo_free(xop->xo_stack);
+ xo_buf_cleanup(&xop->xo_data);
+ xo_buf_cleanup(&xop->xo_fmt);
+ xo_buf_cleanup(&xop->xo_predicate);
+ xo_buf_cleanup(&xop->xo_attrs);
+ xo_buf_cleanup(&xop->xo_color_buf);
+
+ if (xop->xo_version)
+ xo_free(xop->xo_version);
+
+ if (xop_arg == NULL) {
+ bzero(&xo_default_handle, sizeof(xo_default_handle));
+ xo_default_inited = 0;
+ } else
+ xo_free(xop);
+}
+
+/**
+ * Record a new output style to use for the given handle (or default if
+ * handle is NULL). This output style will be used for any future output.
+ *
+ * @xop XO handle to alter (or NULL for default handle)
+ * @style new output style (XO_STYLE_*)
+ */
+void
+xo_set_style (xo_handle_t *xop, xo_style_t style)
+{
+ xop = xo_default(xop);
+ xop->xo_style = style;
+}
+
+xo_style_t
+xo_get_style (xo_handle_t *xop)
+{
+ xop = xo_default(xop);
+ return xo_style(xop);
+}
+
+static int
+xo_name_to_style (const char *name)
+{
+ if (strcmp(name, "xml") == 0)
+ return XO_STYLE_XML;
+ else if (strcmp(name, "json") == 0)
+ return XO_STYLE_JSON;
+ else if (strcmp(name, "encoder") == 0)
+ return XO_STYLE_ENCODER;
+ else if (strcmp(name, "text") == 0)
+ return XO_STYLE_TEXT;
+ else if (strcmp(name, "html") == 0)
+ return XO_STYLE_HTML;
+ else if (strcmp(name, "sdparams") == 0)
+ return XO_STYLE_SDPARAMS;
+
+ return -1;
+}
+
+/*
+ * Indicate if the style is an "encoding" one as opposed to a "display" one.
+ */
+static int
+xo_style_is_encoding (xo_handle_t *xop)
+{
+ if (xo_style(xop) == XO_STYLE_JSON
+ || xo_style(xop) == XO_STYLE_XML
+ || xo_style(xop) == XO_STYLE_SDPARAMS
+ || xo_style(xop) == XO_STYLE_ENCODER)
+ return 1;
+ return 0;
+}
+
+/* Simple name-value mapping */
+typedef struct xo_mapping_s {
+ xo_xff_flags_t xm_value;
+ const char *xm_name;
+} xo_mapping_t;
+
+static xo_xff_flags_t
+xo_name_lookup (xo_mapping_t *map, const char *value, int len)
+{
+ if (len == 0)
+ return 0;
+
+ if (len < 0)
+ len = strlen(value);
+
+ while (isspace((int) *value)) {
+ value += 1;
+ len -= 1;
+ }
+
+ while (isspace((int) value[len]))
+ len -= 1;
+
+ if (*value == '\0')
+ return 0;
+
+ for ( ; map->xm_name; map++)
+ if (strncmp(map->xm_name, value, len) == 0)
+ return map->xm_value;
+
+ return 0;
+}
+
+#ifdef NOT_NEEDED_YET
+static const char *
+xo_value_lookup (xo_mapping_t *map, xo_xff_flags_t value)
+{
+ if (value == 0)
+ return NULL;
+
+ for ( ; map->xm_name; map++)
+ if (map->xm_value == value)
+ return map->xm_name;
+
+ return NULL;
+}
+#endif /* NOT_NEEDED_YET */
+
+static xo_mapping_t xo_xof_names[] = {
+ { XOF_COLOR_ALLOWED, "color" },
+ { XOF_COLUMNS, "columns" },
+ { XOF_DTRT, "dtrt" },
+ { XOF_FLUSH, "flush" },
+ { XOF_IGNORE_CLOSE, "ignore-close" },
+ { XOF_INFO, "info" },
+ { XOF_KEYS, "keys" },
+ { XOF_LOG_GETTEXT, "log-gettext" },
+ { XOF_LOG_SYSLOG, "log-syslog" },
+ { XOF_NO_HUMANIZE, "no-humanize" },
+ { XOF_NO_LOCALE, "no-locale" },
+ { XOF_RETAIN_NONE, "no-retain" },
+ { XOF_NO_TOP, "no-top" },
+ { XOF_NOT_FIRST, "not-first" },
+ { XOF_PRETTY, "pretty" },
+ { XOF_RETAIN_ALL, "retain" },
+ { XOF_UNDERSCORES, "underscores" },
+ { XOF_UNITS, "units" },
+ { XOF_WARN, "warn" },
+ { XOF_WARN_XML, "warn-xml" },
+ { XOF_XPATH, "xpath" },
+ { 0, NULL }
+};
+
+/*
+ * Convert string name to XOF_* flag value.
+ * Not all are useful. Or safe. Or sane.
+ */
+static unsigned
+xo_name_to_flag (const char *name)
+{
+ return (unsigned) xo_name_lookup(xo_xof_names, name, -1);
+}
+
+int
+xo_set_style_name (xo_handle_t *xop, const char *name)
+{
+ if (name == NULL)
+ return -1;
+
+ int style = xo_name_to_style(name);
+ if (style < 0)
+ return -1;
+
+ xo_set_style(xop, style);
+ return 0;
+}
+
+/*
+ * Set the options for a handle using a string of options
+ * passed in. The input is a comma-separated set of names
+ * and optional values: "xml,pretty,indent=4"
+ */
+int
+xo_set_options (xo_handle_t *xop, const char *input)
+{
+ char *cp, *ep, *vp, *np, *bp;
+ int style = -1, new_style, len, rc = 0;
+ xo_xof_flags_t new_flag;
+
+ if (input == NULL)
+ return 0;
+
+ xop = xo_default(xop);
+
+#ifdef LIBXO_COLOR_ON_BY_DEFAULT
+ /* If the installer used --enable-color-on-by-default, then we allow it */
+ XOF_SET(xop, XOF_COLOR_ALLOWED);
+#endif /* LIBXO_COLOR_ON_BY_DEFAULT */
+
+ /*
+ * We support a simpler, old-school style of giving option
+ * also, using a single character for each option. It's
+ * ideal for lazy people, such as myself.
+ */
+ if (*input == ':') {
+ int sz;
+
+ for (input++ ; *input; input++) {
+ switch (*input) {
+ case 'c':
+ XOF_SET(xop, XOF_COLOR_ALLOWED);
+ break;
+
+ case 'f':
+ XOF_SET(xop, XOF_FLUSH);
+ break;
+
+ case 'F':
+ XOF_SET(xop, XOF_FLUSH_LINE);
+ break;
+
+ case 'g':
+ XOF_SET(xop, XOF_LOG_GETTEXT);
+ break;
+
+ case 'H':
+ xop->xo_style = XO_STYLE_HTML;
+ break;
+
+ case 'I':
+ XOF_SET(xop, XOF_INFO);
+ break;
+
+ case 'i':
+ sz = strspn(input + 1, "0123456789");
+ if (sz > 0) {
+ xop->xo_indent_by = atoi(input + 1);
+ input += sz - 1; /* Skip value */
+ }
+ break;
+
+ case 'J':
+ xop->xo_style = XO_STYLE_JSON;
+ break;
+
+ case 'k':
+ XOF_SET(xop, XOF_KEYS);
+ break;
+
+ case 'n':
+ XOF_SET(xop, XOF_NO_HUMANIZE);
+ break;
+
+ case 'P':
+ XOF_SET(xop, XOF_PRETTY);
+ break;
+
+ case 'T':
+ xop->xo_style = XO_STYLE_TEXT;
+ break;
+
+ case 'U':
+ XOF_SET(xop, XOF_UNITS);
+ break;
+
+ case 'u':
+ XOF_SET(xop, XOF_UNDERSCORES);
+ break;
+
+ case 'W':
+ XOF_SET(xop, XOF_WARN);
+ break;
+
+ case 'X':
+ xop->xo_style = XO_STYLE_XML;
+ break;
+
+ case 'x':
+ XOF_SET(xop, XOF_XPATH);
+ break;
+ }
+ }
+ return 0;
+ }
+
+ len = strlen(input) + 1;
+ bp = alloca(len);
+ memcpy(bp, input, len);
+
+ for (cp = bp, ep = cp + len - 1; cp && cp < ep; cp = np) {
+ np = strchr(cp, ',');
+ if (np)
+ *np++ = '\0';
+
+ vp = strchr(cp, '=');
+ if (vp)
+ *vp++ = '\0';
+
+ if (strcmp("colors", cp) == 0) {
+ /* XXX Look for colors=red-blue+green-yellow */
+ continue;
+ }
+
+ /*
+ * For options, we don't allow "encoder" since we want to
+ * handle it explicitly below as "encoder=xxx".
+ */
+ new_style = xo_name_to_style(cp);
+ if (new_style >= 0 && new_style != XO_STYLE_ENCODER) {
+ if (style >= 0)
+ xo_warnx("ignoring multiple styles: '%s'", cp);
+ else
+ style = new_style;
+ } else {
+ new_flag = xo_name_to_flag(cp);
+ if (new_flag != 0)
+ XOF_SET(xop, new_flag);
+ else {
+ if (strcmp(cp, "no-color") == 0) {
+ XOF_CLEAR(xop, XOF_COLOR_ALLOWED);
+ } else if (strcmp(cp, "indent") == 0) {
+ if (vp)
+ xop->xo_indent_by = atoi(vp);
+ else
+ xo_failure(xop, "missing value for indent option");
+ } else if (strcmp(cp, "encoder") == 0) {
+ if (vp == NULL)
+ xo_failure(xop, "missing value for encoder option");
+ else {
+ if (xo_encoder_init(xop, vp)) {
+ xo_failure(xop, "encoder not found: %s", vp);
+ rc = -1;
+ }
+ }
+
+ } else {
+ xo_warnx("unknown libxo option value: '%s'", cp);
+ rc = -1;
+ }
+ }
+ }
+ }
+
+ if (style > 0)
+ xop->xo_style= style;
+
+ return rc;
+}
+
+/**
+ * Set one or more flags for a given handle (or default if handle is NULL).
+ * These flags will affect future output.
+ *
+ * @xop XO handle to alter (or NULL for default handle)
+ * @flags Flags to be set (XOF_*)
+ */
+void
+xo_set_flags (xo_handle_t *xop, xo_xof_flags_t flags)
+{
+ xop = xo_default(xop);
+
+ XOF_SET(xop, flags);
+}
+
+xo_xof_flags_t
+xo_get_flags (xo_handle_t *xop)
+{
+ xop = xo_default(xop);
+
+ return xop->xo_flags;
+}
+
+/*
+ * strndup with a twist: len < 0 means strlen
+ */
+static char *
+xo_strndup (const char *str, int len)
+{
+ if (len < 0)
+ len = strlen(str);
+
+ char *cp = xo_realloc(NULL, len + 1);
+ if (cp) {
+ memcpy(cp, str, len);
+ cp[len] = '\0';
+ }
+
+ return cp;
+}
+
+/**
+ * Record a leading prefix for the XPath we generate. This allows the
+ * generated data to be placed within an XML hierarchy but still have
+ * accurate XPath expressions.
+ *
+ * @xop XO handle to alter (or NULL for default handle)
+ * @path The XPath expression
+ */
+void
+xo_set_leading_xpath (xo_handle_t *xop, const char *path)
+{
+ xop = xo_default(xop);
+
+ if (xop->xo_leading_xpath) {
+ xo_free(xop->xo_leading_xpath);
+ xop->xo_leading_xpath = NULL;
+ }
+
+ if (path == NULL)
+ return;
+
+ xop->xo_leading_xpath = xo_strndup(path, -1);
+}
+
+/**
+ * Record the info data for a set of tags
+ *
+ * @xop XO handle to alter (or NULL for default handle)
+ * @info Info data (xo_info_t) to be recorded (or NULL) (MUST BE SORTED)
+ * @count Number of entries in info (or -1 to count them ourselves)
+ */
+void
+xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count)
+{
+ xop = xo_default(xop);
+
+ if (count < 0 && infop) {
+ xo_info_t *xip;
+
+ for (xip = infop, count = 0; xip->xi_name; xip++, count++)
+ continue;
+ }
+
+ xop->xo_info = infop;
+ xop->xo_info_count = count;
+}
+
+/**
+ * Set the formatter callback for a handle. The callback should
+ * return a newly formatting contents of a formatting instruction,
+ * meaning the bits inside the braces.
+ */
+void
+xo_set_formatter (xo_handle_t *xop, xo_formatter_t func,
+ xo_checkpointer_t cfunc)
+{
+ xop = xo_default(xop);
+
+ xop->xo_formatter = func;
+ xop->xo_checkpointer = cfunc;
+}
+
+/**
+ * Clear one or more flags for a given handle (or default if handle is NULL).
+ * These flags will affect future output.
+ *
+ * @xop XO handle to alter (or NULL for default handle)
+ * @flags Flags to be cleared (XOF_*)
+ */
+void
+xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags)
+{
+ xop = xo_default(xop);
+
+ XOF_CLEAR(xop, flags);
+}
+
+static const char *
+xo_state_name (xo_state_t state)
+{
+ static const char *names[] = {
+ "init",
+ "open_container",
+ "close_container",
+ "open_list",
+ "close_list",
+ "open_instance",
+ "close_instance",
+ "open_leaf_list",
+ "close_leaf_list",
+ "discarding",
+ "marker",
+ "emit",
+ "emit_leaf_list",
+ "finish",
+ NULL
+ };
+
+ if (state < (sizeof(names) / sizeof(names[0])))
+ return names[state];
+
+ return "unknown";
+}
+
+static void
+xo_line_ensure_open (xo_handle_t *xop, xo_xff_flags_t flags UNUSED)
+{
+ static char div_open[] = "<div class=\"line\">";
+ static char div_open_blank[] = "<div class=\"blank-line\">";
+
+ if (XOIF_ISSET(xop, XOIF_DIV_OPEN))
+ return;
+
+ if (xo_style(xop) != XO_STYLE_HTML)
+ return;
+
+ XOIF_SET(xop, XOIF_DIV_OPEN);
+ if (flags & XFF_BLANK_LINE)
+ xo_data_append(xop, div_open_blank, sizeof(div_open_blank) - 1);
+ else
+ xo_data_append(xop, div_open, sizeof(div_open) - 1);
+
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_data_append(xop, "\n", 1);
+}
+
+static void
+xo_line_close (xo_handle_t *xop)
+{
+ static char div_close[] = "</div>";
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_HTML:
+ if (!XOIF_ISSET(xop, XOIF_DIV_OPEN))
+ xo_line_ensure_open(xop, 0);
+
+ XOIF_CLEAR(xop, XOIF_DIV_OPEN);
+ xo_data_append(xop, div_close, sizeof(div_close) - 1);
+
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_data_append(xop, "\n", 1);
+ break;
+
+ case XO_STYLE_TEXT:
+ xo_data_append(xop, "\n", 1);
+ break;
+ }
+}
+
+static int
+xo_info_compare (const void *key, const void *data)
+{
+ const char *name = key;
+ const xo_info_t *xip = data;
+
+ return strcmp(name, xip->xi_name);
+}
+
+
+static xo_info_t *
+xo_info_find (xo_handle_t *xop, const char *name, int nlen)
+{
+ xo_info_t *xip;
+ char *cp = alloca(nlen + 1); /* Need local copy for NUL termination */
+
+ memcpy(cp, name, nlen);
+ cp[nlen] = '\0';
+
+ xip = bsearch(cp, xop->xo_info, xop->xo_info_count,
+ sizeof(xop->xo_info[0]), xo_info_compare);
+ return xip;
+}
+
+#define CONVERT(_have, _need) (((_have) << 8) | (_need))
+
+/*
+ * Check to see that the conversion is safe and sane.
+ */
+static int
+xo_check_conversion (xo_handle_t *xop, int have_enc, int need_enc)
+{
+ switch (CONVERT(have_enc, need_enc)) {
+ case CONVERT(XF_ENC_UTF8, XF_ENC_UTF8):
+ case CONVERT(XF_ENC_UTF8, XF_ENC_LOCALE):
+ case CONVERT(XF_ENC_WIDE, XF_ENC_UTF8):
+ case CONVERT(XF_ENC_WIDE, XF_ENC_LOCALE):
+ case CONVERT(XF_ENC_LOCALE, XF_ENC_LOCALE):
+ case CONVERT(XF_ENC_LOCALE, XF_ENC_UTF8):
+ return 0;
+
+ default:
+ xo_failure(xop, "invalid conversion (%c:%c)", have_enc, need_enc);
+ return 1;
+ }
+}
+
+static int
+xo_format_string_direct (xo_handle_t *xop, xo_buffer_t *xbp,
+ xo_xff_flags_t flags,
+ const wchar_t *wcp, const char *cp, int len, int max,
+ int need_enc, int have_enc)
+{
+ int cols = 0;
+ wchar_t wc = 0;
+ int ilen, olen, width;
+ int attr = (flags & XFF_ATTR);
+ const char *sp;
+
+ if (len > 0 && !xo_buf_has_room(xbp, len))
+ return 0;
+
+ for (;;) {
+ if (len == 0)
+ break;
+
+ if (cp) {
+ if (*cp == '\0')
+ break;
+ if ((flags & XFF_UNESCAPE) && (*cp == '\\' || *cp == '%')) {
+ cp += 1;
+ len -= 1;
+ }
+ }
+
+ if (wcp && *wcp == L'\0')
+ break;
+
+ ilen = 0;
+
+ switch (have_enc) {
+ case XF_ENC_WIDE: /* Wide character */
+ wc = *wcp++;
+ ilen = 1;
+ break;
+
+ case XF_ENC_UTF8: /* UTF-8 */
+ ilen = xo_utf8_to_wc_len(cp);
+ if (ilen < 0) {
+ xo_failure(xop, "invalid UTF-8 character: %02hhx", *cp);
+ return -1; /* Can't continue; we can't find the end */
+ }
+
+ if (len > 0 && len < ilen) {
+ len = 0; /* Break out of the loop */
+ continue;
+ }
+
+ wc = xo_utf8_char(cp, ilen);
+ if (wc == (wchar_t) -1) {
+ xo_failure(xop, "invalid UTF-8 character: %02hhx/%d",
+ *cp, ilen);
+ return -1; /* Can't continue; we can't find the end */
+ }
+ cp += ilen;
+ break;
+
+ case XF_ENC_LOCALE: /* Native locale */
+ ilen = (len > 0) ? len : MB_LEN_MAX;
+ ilen = mbrtowc(&wc, cp, ilen, &xop->xo_mbstate);
+ if (ilen < 0) { /* Invalid data; skip */
+ xo_failure(xop, "invalid mbs char: %02hhx", *cp);
+ wc = L'?';
+ ilen = 1;
+ }
+
+ if (ilen == 0) { /* Hit a wide NUL character */
+ len = 0;
+ continue;
+ }
+
+ cp += ilen;
+ break;
+ }
+
+ /* Reduce len, but not below zero */
+ if (len > 0) {
+ len -= ilen;
+ if (len < 0)
+ len = 0;
+ }
+
+ /*
+ * Find the width-in-columns of this character, which must be done
+ * in wide characters, since we lack a mbswidth() function. If
+ * it doesn't fit
+ */
+ width = xo_wcwidth(wc);
+ if (width < 0)
+ width = iswcntrl(wc) ? 0 : 1;
+
+ if (xo_style(xop) == XO_STYLE_TEXT || xo_style(xop) == XO_STYLE_HTML) {
+ if (max > 0 && cols + width > max)
+ break;
+ }
+
+ switch (need_enc) {
+ case XF_ENC_UTF8:
+
+ /* Output in UTF-8 needs to be escaped, based on the style */
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ case XO_STYLE_HTML:
+ if (wc == '<')
+ sp = xo_xml_lt;
+ else if (wc == '>')
+ sp = xo_xml_gt;
+ else if (wc == '&')
+ sp = xo_xml_amp;
+ else if (attr && wc == '"')
+ sp = xo_xml_quot;
+ else
+ break;
+
+ int slen = strlen(sp);
+ if (!xo_buf_has_room(xbp, slen - 1))
+ return -1;
+
+ memcpy(xbp->xb_curp, sp, slen);
+ xbp->xb_curp += slen;
+ goto done_with_encoding; /* Need multi-level 'break' */
+
+ case XO_STYLE_JSON:
+ if (wc != '\\' && wc != '"' && wc != '\n' && wc != '\r')
+ break;
+
+ if (!xo_buf_has_room(xbp, 2))
+ return -1;
+
+ *xbp->xb_curp++ = '\\';
+ if (wc == '\n')
+ wc = 'n';
+ else if (wc == '\r')
+ wc = 'r';
+ else wc = wc & 0x7f;
+
+ *xbp->xb_curp++ = wc;
+ goto done_with_encoding;
+
+ case XO_STYLE_SDPARAMS:
+ if (wc != '\\' && wc != '"' && wc != ']')
+ break;
+
+ if (!xo_buf_has_room(xbp, 2))
+ return -1;
+
+ *xbp->xb_curp++ = '\\';
+ wc = wc & 0x7f;
+ *xbp->xb_curp++ = wc;
+ goto done_with_encoding;
+ }
+
+ olen = xo_utf8_emit_len(wc);
+ if (olen < 0) {
+ xo_failure(xop, "ignoring bad length");
+ continue;
+ }
+
+ if (!xo_buf_has_room(xbp, olen))
+ return -1;
+
+ xo_utf8_emit_char(xbp->xb_curp, olen, wc);
+ xbp->xb_curp += olen;
+ break;
+
+ case XF_ENC_LOCALE:
+ if (!xo_buf_has_room(xbp, MB_LEN_MAX + 1))
+ return -1;
+
+ olen = wcrtomb(xbp->xb_curp, wc, &xop->xo_mbstate);
+ if (olen <= 0) {
+ xo_failure(xop, "could not convert wide char: %lx",
+ (unsigned long) wc);
+ width = 1;
+ *xbp->xb_curp++ = '?';
+ } else
+ xbp->xb_curp += olen;
+ break;
+ }
+
+ done_with_encoding:
+ cols += width;
+ }
+
+ return cols;
+}
+
+static int
+xo_needed_encoding (xo_handle_t *xop)
+{
+ if (XOF_ISSET(xop, XOF_UTF8)) /* Check the override flag */
+ return XF_ENC_UTF8;
+
+ if (xo_style(xop) == XO_STYLE_TEXT) /* Text means locale */
+ return XF_ENC_LOCALE;
+
+ return XF_ENC_UTF8; /* Otherwise, we love UTF-8 */
+}
+
+static int
+xo_format_string (xo_handle_t *xop, xo_buffer_t *xbp, xo_xff_flags_t flags,
+ xo_format_t *xfp)
+{
+ static char null[] = "(null)";
+ static char null_no_quotes[] = "null";
+
+ char *cp = NULL;
+ wchar_t *wcp = NULL;
+ int len, cols = 0, rc = 0;
+ int off = xbp->xb_curp - xbp->xb_bufp, off2;
+ int need_enc = xo_needed_encoding(xop);
+
+ if (xo_check_conversion(xop, xfp->xf_enc, need_enc))
+ return 0;
+
+ len = xfp->xf_width[XF_WIDTH_SIZE];
+
+ if (xfp->xf_fc == 'm') {
+ cp = strerror(xop->xo_errno);
+ if (len < 0)
+ len = cp ? strlen(cp) : 0;
+ goto normal_string;
+
+ } else if (xfp->xf_enc == XF_ENC_WIDE) {
+ wcp = va_arg(xop->xo_vap, wchar_t *);
+ if (xfp->xf_skip)
+ return 0;
+
+ /*
+ * Dont' deref NULL; use the traditional "(null)" instead
+ * of the more accurate "who's been a naughty boy, then?".
+ */
+ if (wcp == NULL) {
+ cp = null;
+ len = sizeof(null) - 1;
+ }
+
+ } else {
+ cp = va_arg(xop->xo_vap, char *); /* UTF-8 or native */
+
+ normal_string:
+ if (xfp->xf_skip)
+ return 0;
+
+ /* Echo "Dont' deref NULL" logic */
+ if (cp == NULL) {
+ if ((flags & XFF_NOQUOTE) && xo_style_is_encoding(xop)) {
+ cp = null_no_quotes;
+ len = sizeof(null_no_quotes) - 1;
+ } else {
+ cp = null;
+ len = sizeof(null) - 1;
+ }
+ }
+
+ /*
+ * Optimize the most common case, which is "%s". We just
+ * need to copy the complete string to the output buffer.
+ */
+ if (xfp->xf_enc == need_enc
+ && xfp->xf_width[XF_WIDTH_MIN] < 0
+ && xfp->xf_width[XF_WIDTH_SIZE] < 0
+ && xfp->xf_width[XF_WIDTH_MAX] < 0
+ && !(XOIF_ISSET(xop, XOIF_ANCHOR)
+ || XOF_ISSET(xop, XOF_COLUMNS))) {
+ len = strlen(cp);
+ xo_buf_escape(xop, xbp, cp, len, flags);
+
+ /*
+ * Our caller expects xb_curp left untouched, so we have
+ * to reset it and return the number of bytes written to
+ * the buffer.
+ */
+ off2 = xbp->xb_curp - xbp->xb_bufp;
+ rc = off2 - off;
+ xbp->xb_curp = xbp->xb_bufp + off;
+
+ return rc;
+ }
+ }
+
+ cols = xo_format_string_direct(xop, xbp, flags, wcp, cp, len,
+ xfp->xf_width[XF_WIDTH_MAX],
+ need_enc, xfp->xf_enc);
+ if (cols < 0)
+ goto bail;
+
+ /*
+ * xo_buf_append* will move xb_curp, so we save/restore it.
+ */
+ off2 = xbp->xb_curp - xbp->xb_bufp;
+ rc = off2 - off;
+ xbp->xb_curp = xbp->xb_bufp + off;
+
+ if (cols < xfp->xf_width[XF_WIDTH_MIN]) {
+ /*
+ * Find the number of columns needed to display the string.
+ * If we have the original wide string, we just call wcswidth,
+ * but if we did the work ourselves, then we need to do it.
+ */
+ int delta = xfp->xf_width[XF_WIDTH_MIN] - cols;
+ if (!xo_buf_has_room(xbp, xfp->xf_width[XF_WIDTH_MIN]))
+ goto bail;
+
+ /*
+ * If seen_minus, then pad on the right; otherwise move it so
+ * we can pad on the left.
+ */
+ if (xfp->xf_seen_minus) {
+ cp = xbp->xb_curp + rc;
+ } else {
+ cp = xbp->xb_curp;
+ memmove(xbp->xb_curp + delta, xbp->xb_curp, rc);
+ }
+
+ /* Set the padding */
+ memset(cp, (xfp->xf_leading_zero > 0) ? '0' : ' ', delta);
+ rc += delta;
+ cols += delta;
+ }
+
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += cols;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += cols;
+
+ return rc;
+
+ bail:
+ xbp->xb_curp = xbp->xb_bufp + off;
+ return 0;
+}
+
+/*
+ * Look backwards in a buffer to find a numeric value
+ */
+static int
+xo_buf_find_last_number (xo_buffer_t *xbp, int start_offset)
+{
+ int rc = 0; /* Fail with zero */
+ int digit = 1;
+ char *sp = xbp->xb_bufp;
+ char *cp = sp + start_offset;
+
+ while (--cp >= sp)
+ if (isdigit((int) *cp))
+ break;
+
+ for ( ; cp >= sp; cp--) {
+ if (!isdigit((int) *cp))
+ break;
+ rc += (*cp - '0') * digit;
+ digit *= 10;
+ }
+
+ return rc;
+}
+
+static int
+xo_count_utf8_cols (const char *str, int len)
+{
+ int tlen;
+ wchar_t wc;
+ int cols = 0;
+ const char *ep = str + len;
+
+ while (str < ep) {
+ tlen = xo_utf8_to_wc_len(str);
+ if (tlen < 0) /* Broken input is very bad */
+ return cols;
+
+ wc = xo_utf8_char(str, tlen);
+ if (wc == (wchar_t) -1)
+ return cols;
+
+ /* We only print printable characters */
+ if (iswprint((wint_t) wc)) {
+ /*
+ * Find the width-in-columns of this character, which must be done
+ * in wide characters, since we lack a mbswidth() function.
+ */
+ int width = xo_wcwidth(wc);
+ if (width < 0)
+ width = iswcntrl(wc) ? 0 : 1;
+
+ cols += width;
+ }
+
+ str += tlen;
+ }
+
+ return cols;
+}
+
+#ifdef HAVE_GETTEXT
+static inline const char *
+xo_dgettext (xo_handle_t *xop, const char *str)
+{
+ const char *domainname = xop->xo_gt_domain;
+ const char *res;
+
+ res = dgettext(domainname, str);
+
+ if (XOF_ISSET(xop, XOF_LOG_GETTEXT))
+ fprintf(stderr, "xo: gettext: %s%s%smsgid \"%s\" returns \"%s\"\n",
+ domainname ? "domain \"" : "", xo_printable(domainname),
+ domainname ? "\", " : "", xo_printable(str), xo_printable(res));
+
+ return res;
+}
+
+static inline const char *
+xo_dngettext (xo_handle_t *xop, const char *sing, const char *plural,
+ unsigned long int n)
+{
+ const char *domainname = xop->xo_gt_domain;
+ const char *res;
+
+ res = dngettext(domainname, sing, plural, n);
+ if (XOF_ISSET(xop, XOF_LOG_GETTEXT))
+ fprintf(stderr, "xo: gettext: %s%s%s"
+ "msgid \"%s\", msgid_plural \"%s\" (%lu) returns \"%s\"\n",
+ domainname ? "domain \"" : "",
+ xo_printable(domainname), domainname ? "\", " : "",
+ xo_printable(sing),
+ xo_printable(plural), n, xo_printable(res));
+
+ return res;
+}
+#else /* HAVE_GETTEXT */
+static inline const char *
+xo_dgettext (xo_handle_t *xop UNUSED, const char *str)
+{
+ return str;
+}
+
+static inline const char *
+xo_dngettext (xo_handle_t *xop UNUSED, const char *singular,
+ const char *plural, unsigned long int n)
+{
+ return (n == 1) ? singular : plural;
+}
+#endif /* HAVE_GETTEXT */
+
+/*
+ * This is really _re_formatting, since the normal format code has
+ * generated a beautiful string into xo_data, starting at
+ * start_offset. We need to see if it's plural, which means
+ * comma-separated options, or singular. Then we make the appropriate
+ * call to d[n]gettext() to get the locale-based version. Note that
+ * both input and output of gettext() this should be UTF-8.
+ */
+static int
+xo_format_gettext (xo_handle_t *xop, xo_xff_flags_t flags,
+ int start_offset, int cols, int need_enc)
+{
+ xo_buffer_t *xbp = &xop->xo_data;
+
+ if (!xo_buf_has_room(xbp, 1))
+ return cols;
+
+ xbp->xb_curp[0] = '\0'; /* NUL-terminate the input string */
+
+ char *cp = xbp->xb_bufp + start_offset;
+ int len = xbp->xb_curp - cp;
+ const char *newstr = NULL;
+
+ /*
+ * The plural flag asks us to look backwards at the last numeric
+ * value rendered and disect the string into two pieces.
+ */
+ if (flags & XFF_GT_PLURAL) {
+ int n = xo_buf_find_last_number(xbp, start_offset);
+ char *two = memchr(cp, (int) ',', len);
+ if (two == NULL) {
+ xo_failure(xop, "no comma in plural gettext field: '%s'", cp);
+ return cols;
+ }
+
+ if (two == cp) {
+ xo_failure(xop, "nothing before comma in plural gettext "
+ "field: '%s'", cp);
+ return cols;
+ }
+
+ if (two == xbp->xb_curp) {
+ xo_failure(xop, "nothing after comma in plural gettext "
+ "field: '%s'", cp);
+ return cols;
+ }
+
+ *two++ = '\0';
+ if (flags & XFF_GT_FIELD) {
+ newstr = xo_dngettext(xop, cp, two, n);
+ } else {
+ /* Don't do a gettext() look up, just get the plural form */
+ newstr = (n == 1) ? cp : two;
+ }
+
+ /*
+ * If we returned the first string, optimize a bit by
+ * backing up over comma
+ */
+ if (newstr == cp) {
+ xbp->xb_curp = two - 1; /* One for comma */
+ /*
+ * If the caller wanted UTF8, we're done; nothing changed,
+ * but we need to count the columns used.
+ */
+ if (need_enc == XF_ENC_UTF8)
+ return xo_count_utf8_cols(cp, xbp->xb_curp - cp);
+ }
+
+ } else {
+ /* The simple case (singular) */
+ newstr = xo_dgettext(xop, cp);
+
+ if (newstr == cp) {
+ /* If the caller wanted UTF8, we're done; nothing changed */
+ if (need_enc == XF_ENC_UTF8)
+ return cols;
+ }
+ }
+
+ /*
+ * Since the new string string might be in gettext's buffer or
+ * in the buffer (as the plural form), we make a copy.
+ */
+ int nlen = strlen(newstr);
+ char *newcopy = alloca(nlen + 1);
+ memcpy(newcopy, newstr, nlen + 1);
+
+ xbp->xb_curp = xbp->xb_bufp + start_offset; /* Reset the buffer */
+ return xo_format_string_direct(xop, xbp, flags, NULL, newcopy, nlen, 0,
+ need_enc, XF_ENC_UTF8);
+}
+
+static void
+xo_data_append_content (xo_handle_t *xop, const char *str, int len,
+ xo_xff_flags_t flags)
+{
+ int cols;
+ int need_enc = xo_needed_encoding(xop);
+ int start_offset = xo_buf_offset(&xop->xo_data);
+
+ cols = xo_format_string_direct(xop, &xop->xo_data, XFF_UNESCAPE | flags,
+ NULL, str, len, -1,
+ need_enc, XF_ENC_UTF8);
+ if (flags & XFF_GT_FLAGS)
+ cols = xo_format_gettext(xop, flags, start_offset, cols, need_enc);
+
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += cols;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += cols;
+}
+
+static void
+xo_bump_width (xo_format_t *xfp, int digit)
+{
+ int *ip = &xfp->xf_width[xfp->xf_dots];
+
+ *ip = ((*ip > 0) ? *ip : 0) * 10 + digit;
+}
+
+static int
+xo_trim_ws (xo_buffer_t *xbp, int len)
+{
+ char *cp, *sp, *ep;
+ int delta;
+
+ /* First trim leading space */
+ for (cp = sp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) {
+ if (*cp != ' ')
+ break;
+ }
+
+ delta = cp - sp;
+ if (delta) {
+ len -= delta;
+ memmove(sp, cp, len);
+ }
+
+ /* Then trim off the end */
+ for (cp = xbp->xb_curp, sp = ep = cp + len; cp < ep; ep--) {
+ if (ep[-1] != ' ')
+ break;
+ }
+
+ delta = sp - ep;
+ if (delta) {
+ len -= delta;
+ cp[len] = '\0';
+ }
+
+ return len;
+}
+
+/*
+ * Interface to format a single field. The arguments are in xo_vap,
+ * and the format is in 'fmt'. If 'xbp' is null, we use xop->xo_data;
+ * this is the most common case.
+ */
+static int
+xo_do_format_field (xo_handle_t *xop, xo_buffer_t *xbp,
+ const char *fmt, int flen, xo_xff_flags_t flags)
+{
+ xo_format_t xf;
+ const char *cp, *ep, *sp, *xp = NULL;
+ int rc, cols;
+ int style = (flags & XFF_XML) ? XO_STYLE_XML : xo_style(xop);
+ unsigned make_output = !(flags & XFF_NO_OUTPUT);
+ int need_enc = xo_needed_encoding(xop);
+ int real_need_enc = need_enc;
+ int old_cols = xop->xo_columns;
+
+ /* The gettext interface is UTF-8, so we'll need that for now */
+ if (flags & XFF_GT_FIELD)
+ need_enc = XF_ENC_UTF8;
+
+ if (xbp == NULL)
+ xbp = &xop->xo_data;
+
+ unsigned start_offset = xo_buf_offset(xbp);
+
+ for (cp = fmt, ep = fmt + flen; cp < ep; cp++) {
+ /*
+ * Since we're starting a new field, save the starting offset.
+ * We'll need this later for field-related operations.
+ */
+
+ if (*cp != '%') {
+ add_one:
+ if (xp == NULL)
+ xp = cp;
+
+ if (*cp == '\\' && cp[1] != '\0')
+ cp += 1;
+ continue;
+
+ } if (cp + 1 < ep && cp[1] == '%') {
+ cp += 1;
+ goto add_one;
+ }
+
+ if (xp) {
+ if (make_output) {
+ cols = xo_format_string_direct(xop, xbp, flags | XFF_UNESCAPE,
+ NULL, xp, cp - xp, -1,
+ need_enc, XF_ENC_UTF8);
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += cols;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += cols;
+ }
+
+ xp = NULL;
+ }
+
+ bzero(&xf, sizeof(xf));
+ xf.xf_leading_zero = -1;
+ xf.xf_width[0] = xf.xf_width[1] = xf.xf_width[2] = -1;
+
+ /*
+ * "%@" starts an XO-specific set of flags:
+ * @X@ - XML-only field; ignored if style isn't XML
+ */
+ if (cp[1] == '@') {
+ for (cp += 2; cp < ep; cp++) {
+ if (*cp == '@') {
+ break;
+ }
+ if (*cp == '*') {
+ /*
+ * '*' means there's a "%*.*s" value in vap that
+ * we want to ignore
+ */
+ if (!XOF_ISSET(xop, XOF_NO_VA_ARG))
+ va_arg(xop->xo_vap, int);
+ }
+ }
+ }
+
+ /* Hidden fields are only visible to JSON and XML */
+ if (XOF_ISSET(xop, XFF_ENCODE_ONLY)) {
+ if (style != XO_STYLE_XML
+ && !xo_style_is_encoding(xop))
+ xf.xf_skip = 1;
+ } else if (XOF_ISSET(xop, XFF_DISPLAY_ONLY)) {
+ if (style != XO_STYLE_TEXT
+ && xo_style(xop) != XO_STYLE_HTML)
+ xf.xf_skip = 1;
+ }
+
+ if (!make_output)
+ xf.xf_skip = 1;
+
+ /*
+ * Looking at one piece of a format; find the end and
+ * call snprintf. Then advance xo_vap on our own.
+ *
+ * Note that 'n', 'v', and '$' are not supported.
+ */
+ sp = cp; /* Save start pointer */
+ for (cp += 1; cp < ep; cp++) {
+ if (*cp == 'l')
+ xf.xf_lflag += 1;
+ else if (*cp == 'h')
+ xf.xf_hflag += 1;
+ else if (*cp == 'j')
+ xf.xf_jflag += 1;
+ else if (*cp == 't')
+ xf.xf_tflag += 1;
+ else if (*cp == 'z')
+ xf.xf_zflag += 1;
+ else if (*cp == 'q')
+ xf.xf_qflag += 1;
+ else if (*cp == '.') {
+ if (++xf.xf_dots >= XF_WIDTH_NUM) {
+ xo_failure(xop, "Too many dots in format: '%s'", fmt);
+ return -1;
+ }
+ } else if (*cp == '-')
+ xf.xf_seen_minus = 1;
+ else if (isdigit((int) *cp)) {
+ if (xf.xf_leading_zero < 0)
+ xf.xf_leading_zero = (*cp == '0');
+ xo_bump_width(&xf, *cp - '0');
+ } else if (*cp == '*') {
+ xf.xf_stars += 1;
+ xf.xf_star[xf.xf_dots] = 1;
+ } else if (strchr("diouxXDOUeEfFgGaAcCsSpm", *cp) != NULL)
+ break;
+ else if (*cp == 'n' || *cp == 'v') {
+ xo_failure(xop, "unsupported format: '%s'", fmt);
+ return -1;
+ }
+ }
+
+ if (cp == ep)
+ xo_failure(xop, "field format missing format character: %s",
+ fmt);
+
+ xf.xf_fc = *cp;
+
+ if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) {
+ if (*cp == 's' || *cp == 'S') {
+ /* Handle "%*.*.*s" */
+ int s;
+ for (s = 0; s < XF_WIDTH_NUM; s++) {
+ if (xf.xf_star[s]) {
+ xf.xf_width[s] = va_arg(xop->xo_vap, int);
+
+ /* Normalize a negative width value */
+ if (xf.xf_width[s] < 0) {
+ if (s == 0) {
+ xf.xf_width[0] = -xf.xf_width[0];
+ xf.xf_seen_minus = 1;
+ } else
+ xf.xf_width[s] = -1; /* Ignore negative values */
+ }
+ }
+ }
+ }
+ }
+
+ /* If no max is given, it defaults to size */
+ if (xf.xf_width[XF_WIDTH_MAX] < 0 && xf.xf_width[XF_WIDTH_SIZE] >= 0)
+ xf.xf_width[XF_WIDTH_MAX] = xf.xf_width[XF_WIDTH_SIZE];
+
+ if (xf.xf_fc == 'D' || xf.xf_fc == 'O' || xf.xf_fc == 'U')
+ xf.xf_lflag = 1;
+
+ if (!xf.xf_skip) {
+ xo_buffer_t *fbp = &xop->xo_fmt;
+ int len = cp - sp + 1;
+ if (!xo_buf_has_room(fbp, len + 1))
+ return -1;
+
+ char *newfmt = fbp->xb_curp;
+ memcpy(newfmt, sp, len);
+ newfmt[0] = '%'; /* If we skipped over a "%@...@s" format */
+ newfmt[len] = '\0';
+
+ /*
+ * Bad news: our strings are UTF-8, but the stock printf
+ * functions won't handle field widths for wide characters
+ * correctly. So we have to handle this ourselves.
+ */
+ if (xop->xo_formatter == NULL
+ && (xf.xf_fc == 's' || xf.xf_fc == 'S'
+ || xf.xf_fc == 'm')) {
+
+ xf.xf_enc = (xf.xf_fc == 'm') ? XF_ENC_UTF8
+ : (xf.xf_lflag || (xf.xf_fc == 'S')) ? XF_ENC_WIDE
+ : xf.xf_hflag ? XF_ENC_LOCALE : XF_ENC_UTF8;
+
+ rc = xo_format_string(xop, xbp, flags, &xf);
+
+ if ((flags & XFF_TRIM_WS) && xo_style_is_encoding(xop))
+ rc = xo_trim_ws(xbp, rc);
+
+ } else {
+ int columns = rc = xo_vsnprintf(xop, xbp, newfmt, xop->xo_vap);
+
+ /*
+ * For XML and HTML, we need "&<>" processing; for JSON,
+ * it's quotes. Text gets nothing.
+ */
+ switch (style) {
+ case XO_STYLE_XML:
+ if (flags & XFF_TRIM_WS)
+ columns = rc = xo_trim_ws(xbp, rc);
+ /* FALLTHRU */
+ case XO_STYLE_HTML:
+ rc = xo_escape_xml(xbp, rc, (flags & XFF_ATTR));
+ break;
+
+ case XO_STYLE_JSON:
+ if (flags & XFF_TRIM_WS)
+ columns = rc = xo_trim_ws(xbp, rc);
+ rc = xo_escape_json(xbp, rc, 0);
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ if (flags & XFF_TRIM_WS)
+ columns = rc = xo_trim_ws(xbp, rc);
+ rc = xo_escape_sdparams(xbp, rc, 0);
+ break;
+
+ case XO_STYLE_ENCODER:
+ if (flags & XFF_TRIM_WS)
+ columns = rc = xo_trim_ws(xbp, rc);
+ break;
+ }
+
+ /*
+ * We can assume all the non-%s data we've
+ * added is ASCII, so the columns and bytes are the
+ * same. xo_format_string handles all the fancy
+ * string conversions and updates xo_anchor_columns
+ * accordingly.
+ */
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += columns;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += columns;
+ }
+
+ xbp->xb_curp += rc;
+ }
+
+ /*
+ * Now for the tricky part: we need to move the argument pointer
+ * along by the amount needed.
+ */
+ if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) {
+
+ if (xf.xf_fc == 's' ||xf.xf_fc == 'S') {
+ /*
+ * The 'S' and 's' formats are normally handled in
+ * xo_format_string, but if we skipped it, then we
+ * need to pop it.
+ */
+ if (xf.xf_skip)
+ va_arg(xop->xo_vap, char *);
+
+ } else if (xf.xf_fc == 'm') {
+ /* Nothing on the stack for "%m" */
+
+ } else {
+ int s;
+ for (s = 0; s < XF_WIDTH_NUM; s++) {
+ if (xf.xf_star[s])
+ va_arg(xop->xo_vap, int);
+ }
+
+ if (strchr("diouxXDOU", xf.xf_fc) != NULL) {
+ if (xf.xf_hflag > 1) {
+ va_arg(xop->xo_vap, int);
+
+ } else if (xf.xf_hflag > 0) {
+ va_arg(xop->xo_vap, int);
+
+ } else if (xf.xf_lflag > 1) {
+ va_arg(xop->xo_vap, unsigned long long);
+
+ } else if (xf.xf_lflag > 0) {
+ va_arg(xop->xo_vap, unsigned long);
+
+ } else if (xf.xf_jflag > 0) {
+ va_arg(xop->xo_vap, intmax_t);
+
+ } else if (xf.xf_tflag > 0) {
+ va_arg(xop->xo_vap, ptrdiff_t);
+
+ } else if (xf.xf_zflag > 0) {
+ va_arg(xop->xo_vap, size_t);
+
+ } else if (xf.xf_qflag > 0) {
+ va_arg(xop->xo_vap, quad_t);
+
+ } else {
+ va_arg(xop->xo_vap, int);
+ }
+ } else if (strchr("eEfFgGaA", xf.xf_fc) != NULL)
+ if (xf.xf_lflag)
+ va_arg(xop->xo_vap, long double);
+ else
+ va_arg(xop->xo_vap, double);
+
+ else if (xf.xf_fc == 'C' || (xf.xf_fc == 'c' && xf.xf_lflag))
+ va_arg(xop->xo_vap, wint_t);
+
+ else if (xf.xf_fc == 'c')
+ va_arg(xop->xo_vap, int);
+
+ else if (xf.xf_fc == 'p')
+ va_arg(xop->xo_vap, void *);
+ }
+ }
+ }
+
+ if (xp) {
+ if (make_output) {
+ cols = xo_format_string_direct(xop, xbp, flags | XFF_UNESCAPE,
+ NULL, xp, cp - xp, -1,
+ need_enc, XF_ENC_UTF8);
+
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += cols;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += cols;
+ }
+
+ xp = NULL;
+ }
+
+ if (flags & XFF_GT_FLAGS) {
+ /*
+ * Handle gettext()ing the field by looking up the value
+ * and then copying it in, while converting to locale, if
+ * needed.
+ */
+ int new_cols = xo_format_gettext(xop, flags, start_offset,
+ old_cols, real_need_enc);
+
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += new_cols - old_cols;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += new_cols - old_cols;
+ }
+
+ return 0;
+}
+
+static char *
+xo_fix_encoding (xo_handle_t *xop UNUSED, char *encoding)
+{
+ char *cp = encoding;
+
+ if (cp[0] != '%' || !isdigit((int) cp[1]))
+ return encoding;
+
+ for (cp += 2; *cp; cp++) {
+ if (!isdigit((int) *cp))
+ break;
+ }
+
+ cp -= 1;
+ *cp = '%';
+
+ return cp;
+}
+
+static void
+xo_color_append_html (xo_handle_t *xop)
+{
+ /*
+ * If the color buffer has content, we add it now. It's already
+ * prebuilt and ready, since we want to add it to every <div>.
+ */
+ if (!xo_buf_is_empty(&xop->xo_color_buf)) {
+ xo_buffer_t *xbp = &xop->xo_color_buf;
+
+ xo_data_append(xop, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp);
+ }
+}
+
+/*
+ * A wrapper for humanize_number that autoscales, since the
+ * HN_AUTOSCALE flag scales as needed based on the size of
+ * the output buffer, not the size of the value. I also
+ * wish HN_DECIMAL was more imperative, without the <10
+ * test. But the boat only goes where we want when we hold
+ * the rudder, so xo_humanize fixes part of the problem.
+ */
+static int
+xo_humanize (char *buf, int len, uint64_t value, int flags)
+{
+ int scale = 0;
+
+ if (value) {
+ uint64_t left = value;
+
+ if (flags & HN_DIVISOR_1000) {
+ for ( ; left; scale++)
+ left /= 1000;
+ } else {
+ for ( ; left; scale++)
+ left /= 1024;
+ }
+ scale -= 1;
+ }
+
+ return xo_humanize_number(buf, len, value, "", scale, flags);
+}
+
+/*
+ * This is an area where we can save information from the handle for
+ * later restoration. We need to know what data was rendered to know
+ * what needs cleaned up.
+ */
+typedef struct xo_humanize_save_s {
+ unsigned xhs_offset; /* Saved xo_offset */
+ unsigned xhs_columns; /* Saved xo_columns */
+ unsigned xhs_anchor_columns; /* Saved xo_anchor_columns */
+} xo_humanize_save_t;
+
+/*
+ * Format a "humanized" value for a numeric, meaning something nice
+ * like "44M" instead of "44470272". We autoscale, choosing the
+ * most appropriate value for K/M/G/T/P/E based on the value given.
+ */
+static void
+xo_format_humanize (xo_handle_t *xop, xo_buffer_t *xbp,
+ xo_humanize_save_t *savep, xo_xff_flags_t flags)
+{
+ if (XOF_ISSET(xop, XOF_NO_HUMANIZE))
+ return;
+
+ unsigned end_offset = xbp->xb_curp - xbp->xb_bufp;
+ if (end_offset == savep->xhs_offset) /* Huh? Nothing to render */
+ return;
+
+ /*
+ * We have a string that's allegedly a number. We want to
+ * humanize it, which means turning it back into a number
+ * and calling xo_humanize_number on it.
+ */
+ uint64_t value;
+ char *ep;
+
+ xo_buf_append(xbp, "", 1); /* NUL-terminate it */
+
+ value = strtoull(xbp->xb_bufp + savep->xhs_offset, &ep, 0);
+ if (!(value == ULLONG_MAX && errno == ERANGE)
+ && (ep != xbp->xb_bufp + savep->xhs_offset)) {
+ /*
+ * There are few values where humanize_number needs
+ * more bytes than the original value. I've used
+ * 10 as a rectal number to cover those scenarios.
+ */
+ if (xo_buf_has_room(xbp, 10)) {
+ xbp->xb_curp = xbp->xb_bufp + savep->xhs_offset;
+
+ int rc;
+ int left = (xbp->xb_bufp + xbp->xb_size) - xbp->xb_curp;
+ int hn_flags = HN_NOSPACE; /* On by default */
+
+ if (flags & XFF_HN_SPACE)
+ hn_flags &= ~HN_NOSPACE;
+
+ if (flags & XFF_HN_DECIMAL)
+ hn_flags |= HN_DECIMAL;
+
+ if (flags & XFF_HN_1000)
+ hn_flags |= HN_DIVISOR_1000;
+
+ rc = xo_humanize(xbp->xb_curp,
+ left, value, hn_flags);
+ if (rc > 0) {
+ xbp->xb_curp += rc;
+ xop->xo_columns = savep->xhs_columns + rc;
+ xop->xo_anchor_columns = savep->xhs_anchor_columns + rc;
+ }
+ }
+ }
+}
+
+static void
+xo_buf_append_div (xo_handle_t *xop, const char *class, xo_xff_flags_t flags,
+ const char *name, int nlen,
+ const char *value, int vlen,
+ const char *encoding, int elen)
+{
+ static char div_start[] = "<div class=\"";
+ static char div_tag[] = "\" data-tag=\"";
+ static char div_xpath[] = "\" data-xpath=\"";
+ static char div_key[] = "\" data-key=\"key";
+ static char div_end[] = "\">";
+ static char div_close[] = "</div>";
+
+ /* The encoding format defaults to the normal format */
+ if (encoding == NULL) {
+ char *enc = alloca(vlen + 1);
+ memcpy(enc, value, vlen);
+ enc[vlen] = '\0';
+ encoding = xo_fix_encoding(xop, enc);
+ elen = strlen(encoding);
+ }
+
+ /*
+ * To build our XPath predicate, we need to save the va_list before
+ * we format our data, and then restore it before we format the
+ * xpath expression.
+ * Display-only keys implies that we've got an encode-only key
+ * elsewhere, so we don't use them from making predicates.
+ */
+ int need_predidate =
+ (name && (flags & XFF_KEY) && !(flags & XFF_DISPLAY_ONLY)
+ && XOF_ISSET(xop, XOF_XPATH));
+
+ if (need_predidate) {
+ va_list va_local;
+
+ va_copy(va_local, xop->xo_vap);
+ if (xop->xo_checkpointer)
+ xop->xo_checkpointer(xop, xop->xo_vap, 0);
+
+ /*
+ * Build an XPath predicate expression to match this key.
+ * We use the format buffer.
+ */
+ xo_buffer_t *pbp = &xop->xo_predicate;
+ pbp->xb_curp = pbp->xb_bufp; /* Restart buffer */
+
+ xo_buf_append(pbp, "[", 1);
+ xo_buf_escape(xop, pbp, name, nlen, 0);
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_buf_append(pbp, " = '", 4);
+ else
+ xo_buf_append(pbp, "='", 2);
+
+ xo_xff_flags_t pflags = flags | XFF_XML | XFF_ATTR;
+ pflags &= ~(XFF_NO_OUTPUT | XFF_ENCODE_ONLY);
+ xo_do_format_field(xop, pbp, encoding, elen, pflags);
+
+ xo_buf_append(pbp, "']", 2);
+
+ /* Now we record this predicate expression in the stack */
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+ int olen = xsp->xs_keys ? strlen(xsp->xs_keys) : 0;
+ int dlen = pbp->xb_curp - pbp->xb_bufp;
+
+ char *cp = xo_realloc(xsp->xs_keys, olen + dlen + 1);
+ if (cp) {
+ memcpy(cp + olen, pbp->xb_bufp, dlen);
+ cp[olen + dlen] = '\0';
+ xsp->xs_keys = cp;
+ }
+
+ /* Now we reset the xo_vap as if we were never here */
+ va_end(xop->xo_vap);
+ va_copy(xop->xo_vap, va_local);
+ va_end(va_local);
+ if (xop->xo_checkpointer)
+ xop->xo_checkpointer(xop, xop->xo_vap, 1);
+ }
+
+ if (flags & XFF_ENCODE_ONLY) {
+ /*
+ * Even if this is encode-only, we need to go through the
+ * work of formatting it to make sure the args are cleared
+ * from xo_vap.
+ */
+ xo_do_format_field(xop, NULL, encoding, elen,
+ flags | XFF_NO_OUTPUT);
+ return;
+ }
+
+ xo_line_ensure_open(xop, 0);
+
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_buf_indent(xop, xop->xo_indent_by);
+
+ xo_data_append(xop, div_start, sizeof(div_start) - 1);
+ xo_data_append(xop, class, strlen(class));
+
+ /*
+ * If the color buffer has content, we add it now. It's already
+ * prebuilt and ready, since we want to add it to every <div>.
+ */
+ if (!xo_buf_is_empty(&xop->xo_color_buf)) {
+ xo_buffer_t *xbp = &xop->xo_color_buf;
+
+ xo_data_append(xop, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp);
+ }
+
+ if (name) {
+ xo_data_append(xop, div_tag, sizeof(div_tag) - 1);
+ xo_data_escape(xop, name, nlen);
+
+ /*
+ * Save the offset at which we'd place units. See xo_format_units.
+ */
+ if (XOF_ISSET(xop, XOF_UNITS)) {
+ XOIF_SET(xop, XOIF_UNITS_PENDING);
+ /*
+ * Note: We need the '+1' here because we know we've not
+ * added the closing quote. We add one, knowing the quote
+ * will be added shortly.
+ */
+ xop->xo_units_offset =
+ xop->xo_data.xb_curp -xop->xo_data.xb_bufp + 1;
+ }
+
+ if (XOF_ISSET(xop, XOF_XPATH)) {
+ int i;
+ xo_stack_t *xsp;
+
+ xo_data_append(xop, div_xpath, sizeof(div_xpath) - 1);
+ if (xop->xo_leading_xpath)
+ xo_data_append(xop, xop->xo_leading_xpath,
+ strlen(xop->xo_leading_xpath));
+
+ for (i = 0; i <= xop->xo_depth; i++) {
+ xsp = &xop->xo_stack[i];
+ if (xsp->xs_name == NULL)
+ continue;
+
+ /*
+ * XSS_OPEN_LIST and XSS_OPEN_LEAF_LIST stack frames
+ * are directly under XSS_OPEN_INSTANCE frames so we
+ * don't need to put these in our XPath expressions.
+ */
+ if (xsp->xs_state == XSS_OPEN_LIST
+ || xsp->xs_state == XSS_OPEN_LEAF_LIST)
+ continue;
+
+ xo_data_append(xop, "/", 1);
+ xo_data_escape(xop, xsp->xs_name, strlen(xsp->xs_name));
+ if (xsp->xs_keys) {
+ /* Don't show keys for the key field */
+ if (i != xop->xo_depth || !(flags & XFF_KEY))
+ xo_data_append(xop, xsp->xs_keys, strlen(xsp->xs_keys));
+ }
+ }
+
+ xo_data_append(xop, "/", 1);
+ xo_data_escape(xop, name, nlen);
+ }
+
+ if (XOF_ISSET(xop, XOF_INFO) && xop->xo_info) {
+ static char in_type[] = "\" data-type=\"";
+ static char in_help[] = "\" data-help=\"";
+
+ xo_info_t *xip = xo_info_find(xop, name, nlen);
+ if (xip) {
+ if (xip->xi_type) {
+ xo_data_append(xop, in_type, sizeof(in_type) - 1);
+ xo_data_escape(xop, xip->xi_type, strlen(xip->xi_type));
+ }
+ if (xip->xi_help) {
+ xo_data_append(xop, in_help, sizeof(in_help) - 1);
+ xo_data_escape(xop, xip->xi_help, strlen(xip->xi_help));
+ }
+ }
+ }
+
+ if ((flags & XFF_KEY) && XOF_ISSET(xop, XOF_KEYS))
+ xo_data_append(xop, div_key, sizeof(div_key) - 1);
+ }
+
+ xo_buffer_t *xbp = &xop->xo_data;
+ unsigned base_offset = xbp->xb_curp - xbp->xb_bufp;
+
+ xo_data_append(xop, div_end, sizeof(div_end) - 1);
+
+ xo_humanize_save_t save; /* Save values for humanizing logic */
+
+ save.xhs_offset = xbp->xb_curp - xbp->xb_bufp;
+ save.xhs_columns = xop->xo_columns;
+ save.xhs_anchor_columns = xop->xo_anchor_columns;
+
+ xo_do_format_field(xop, NULL, value, vlen, flags);
+
+ if (flags & XFF_HUMANIZE) {
+ /*
+ * Unlike text style, we want to retain the original value and
+ * stuff it into the "data-number" attribute.
+ */
+ static const char div_number[] = "\" data-number=\"";
+ int div_len = sizeof(div_number) - 1;
+
+ unsigned end_offset = xbp->xb_curp - xbp->xb_bufp;
+ int olen = end_offset - save.xhs_offset;
+
+ char *cp = alloca(olen + 1);
+ memcpy(cp, xbp->xb_bufp + save.xhs_offset, olen);
+ cp[olen] = '\0';
+
+ xo_format_humanize(xop, xbp, &save, flags);
+
+ if (xo_buf_has_room(xbp, div_len + olen)) {
+ unsigned new_offset = xbp->xb_curp - xbp->xb_bufp;
+
+
+ /* Move the humanized string off to the left */
+ memmove(xbp->xb_bufp + base_offset + div_len + olen,
+ xbp->xb_bufp + base_offset, new_offset - base_offset);
+
+ /* Copy the data_number attribute name */
+ memcpy(xbp->xb_bufp + base_offset, div_number, div_len);
+
+ /* Copy the original long value */
+ memcpy(xbp->xb_bufp + base_offset + div_len, cp, olen);
+ xbp->xb_curp += div_len + olen;
+ }
+ }
+
+ xo_data_append(xop, div_close, sizeof(div_close) - 1);
+
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_data_append(xop, "\n", 1);
+}
+
+static void
+xo_format_text (xo_handle_t *xop, const char *str, int len)
+{
+ switch (xo_style(xop)) {
+ case XO_STYLE_TEXT:
+ xo_buf_append_locale(xop, &xop->xo_data, str, len);
+ break;
+
+ case XO_STYLE_HTML:
+ xo_buf_append_div(xop, "text", 0, NULL, 0, str, len, NULL, 0);
+ break;
+ }
+}
+
+static void
+xo_format_title (xo_handle_t *xop, xo_field_info_t *xfip,
+ const char *str, unsigned len)
+{
+ const char *fmt = xfip->xfi_format;
+ unsigned flen = xfip->xfi_flen;
+ xo_xff_flags_t flags = xfip->xfi_flags;
+
+ static char div_open[] = "<div class=\"title";
+ static char div_middle[] = "\">";
+ static char div_close[] = "</div>";
+
+ if (flen == 0) {
+ fmt = "%s";
+ flen = 2;
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ case XO_STYLE_JSON:
+ case XO_STYLE_SDPARAMS:
+ case XO_STYLE_ENCODER:
+ /*
+ * Even though we don't care about text, we need to do
+ * enough parsing work to skip over the right bits of xo_vap.
+ */
+ if (len == 0)
+ xo_do_format_field(xop, NULL, fmt, flen, flags | XFF_NO_OUTPUT);
+ return;
+ }
+
+ xo_buffer_t *xbp = &xop->xo_data;
+ int start = xbp->xb_curp - xbp->xb_bufp;
+ int left = xbp->xb_size - start;
+ int rc;
+
+ if (xo_style(xop) == XO_STYLE_HTML) {
+ xo_line_ensure_open(xop, 0);
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_buf_indent(xop, xop->xo_indent_by);
+ xo_buf_append(&xop->xo_data, div_open, sizeof(div_open) - 1);
+ xo_color_append_html(xop);
+ xo_buf_append(&xop->xo_data, div_middle, sizeof(div_middle) - 1);
+ }
+
+ start = xbp->xb_curp - xbp->xb_bufp; /* Reset start */
+ if (len) {
+ char *newfmt = alloca(flen + 1);
+ memcpy(newfmt, fmt, flen);
+ newfmt[flen] = '\0';
+
+ /* If len is non-zero, the format string apply to the name */
+ char *newstr = alloca(len + 1);
+ memcpy(newstr, str, len);
+ newstr[len] = '\0';
+
+ if (newstr[len - 1] == 's') {
+ char *bp;
+
+ rc = snprintf(NULL, 0, newfmt, newstr);
+ if (rc > 0) {
+ /*
+ * We have to do this the hard way, since we might need
+ * the columns.
+ */
+ bp = alloca(rc + 1);
+ rc = snprintf(bp, rc + 1, newfmt, newstr);
+
+ xo_data_append_content(xop, bp, rc, flags);
+ }
+ goto move_along;
+
+ } else {
+ rc = snprintf(xbp->xb_curp, left, newfmt, newstr);
+ if (rc >= left) {
+ if (!xo_buf_has_room(xbp, rc))
+ return;
+ left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+ rc = snprintf(xbp->xb_curp, left, newfmt, newstr);
+ }
+
+ if (rc > 0) {
+ if (XOF_ISSET(xop, XOF_COLUMNS))
+ xop->xo_columns += rc;
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xop->xo_anchor_columns += rc;
+ }
+ }
+
+ } else {
+ xo_do_format_field(xop, NULL, fmt, flen, flags);
+
+ /* xo_do_format_field moved curp, so we need to reset it */
+ rc = xbp->xb_curp - (xbp->xb_bufp + start);
+ xbp->xb_curp = xbp->xb_bufp + start;
+ }
+
+ /* If we're styling HTML, then we need to escape it */
+ if (xo_style(xop) == XO_STYLE_HTML) {
+ rc = xo_escape_xml(xbp, rc, 0);
+ }
+
+ if (rc > 0)
+ xbp->xb_curp += rc;
+
+ move_along:
+ if (xo_style(xop) == XO_STYLE_HTML) {
+ xo_data_append(xop, div_close, sizeof(div_close) - 1);
+ if (XOF_ISSET(xop, XOF_PRETTY))
+ xo_data_append(xop, "\n", 1);
+ }
+}
+
+static void
+xo_format_prep (xo_handle_t *xop, xo_xff_flags_t flags)
+{
+ if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) {
+ xo_data_append(xop, ",", 1);
+ if (!(flags & XFF_LEAF_LIST) && XOF_ISSET(xop, XOF_PRETTY))
+ xo_data_append(xop, "\n", 1);
+ } else
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+}
+
+#if 0
+/* Useful debugging function */
+void
+xo_arg (xo_handle_t *xop);
+void
+xo_arg (xo_handle_t *xop)
+{
+ xop = xo_default(xop);
+ fprintf(stderr, "0x%x", va_arg(xop->xo_vap, unsigned));
+}
+#endif /* 0 */
+
+static void
+xo_format_value (xo_handle_t *xop, const char *name, int nlen,
+ const char *format, int flen,
+ const char *encoding, int elen, xo_xff_flags_t flags)
+{
+ int pretty = XOF_ISSET(xop, XOF_PRETTY);
+ int quote;
+
+ /*
+ * Before we emit a value, we need to know that the frame is ready.
+ */
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+
+ if (flags & XFF_LEAF_LIST) {
+ /*
+ * Check if we've already started to emit normal leafs
+ * or if we're not in a leaf list.
+ */
+ if ((xsp->xs_flags & (XSF_EMIT | XSF_EMIT_KEY))
+ || !(xsp->xs_flags & XSF_EMIT_LEAF_LIST)) {
+ char nbuf[nlen + 1];
+ memcpy(nbuf, name, nlen);
+ nbuf[nlen] = '\0';
+
+ int rc = xo_transition(xop, 0, nbuf, XSS_EMIT_LEAF_LIST);
+ if (rc < 0)
+ flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY;
+ else
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT_LEAF_LIST;
+ }
+
+ xsp = &xop->xo_stack[xop->xo_depth];
+ if (xsp->xs_name) {
+ name = xsp->xs_name;
+ nlen = strlen(name);
+ }
+
+ } else if (flags & XFF_KEY) {
+ /* Emitting a 'k' (key) field */
+ if ((xsp->xs_flags & XSF_EMIT) && !(flags & XFF_DISPLAY_ONLY)) {
+ xo_failure(xop, "key field emitted after normal value field: '%.*s'",
+ nlen, name);
+
+ } else if (!(xsp->xs_flags & XSF_EMIT_KEY)) {
+ char nbuf[nlen + 1];
+ memcpy(nbuf, name, nlen);
+ nbuf[nlen] = '\0';
+
+ int rc = xo_transition(xop, 0, nbuf, XSS_EMIT);
+ if (rc < 0)
+ flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY;
+ else
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT_KEY;
+
+ xsp = &xop->xo_stack[xop->xo_depth];
+ xsp->xs_flags |= XSF_EMIT_KEY;
+ }
+
+ } else {
+ /* Emitting a normal value field */
+ if ((xsp->xs_flags & XSF_EMIT_LEAF_LIST)
+ || !(xsp->xs_flags & XSF_EMIT)) {
+ char nbuf[nlen + 1];
+ memcpy(nbuf, name, nlen);
+ nbuf[nlen] = '\0';
+
+ int rc = xo_transition(xop, 0, nbuf, XSS_EMIT);
+ if (rc < 0)
+ flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY;
+ else
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT;
+
+ xsp = &xop->xo_stack[xop->xo_depth];
+ xsp->xs_flags |= XSF_EMIT;
+ }
+ }
+
+ xo_buffer_t *xbp = &xop->xo_data;
+ xo_humanize_save_t save; /* Save values for humanizing logic */
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_TEXT:
+ if (flags & XFF_ENCODE_ONLY)
+ flags |= XFF_NO_OUTPUT;
+
+ save.xhs_offset = xbp->xb_curp - xbp->xb_bufp;
+ save.xhs_columns = xop->xo_columns;
+ save.xhs_anchor_columns = xop->xo_anchor_columns;
+
+ xo_do_format_field(xop, NULL, format, flen, flags);
+
+ if (flags & XFF_HUMANIZE)
+ xo_format_humanize(xop, xbp, &save, flags);
+ break;
+
+ case XO_STYLE_HTML:
+ if (flags & XFF_ENCODE_ONLY)
+ flags |= XFF_NO_OUTPUT;
+
+ xo_buf_append_div(xop, "data", flags, name, nlen,
+ format, flen, encoding, elen);
+ break;
+
+ case XO_STYLE_XML:
+ /*
+ * Even though we're not making output, we still need to
+ * let the formatting code handle the va_arg popping.
+ */
+ if (flags & XFF_DISPLAY_ONLY) {
+ flags |= XFF_NO_OUTPUT;
+ xo_do_format_field(xop, NULL, format, flen, flags);
+ break;
+ }
+
+ if (encoding) {
+ format = encoding;
+ flen = elen;
+ } else {
+ char *enc = alloca(flen + 1);
+ memcpy(enc, format, flen);
+ enc[flen] = '\0';
+ format = xo_fix_encoding(xop, enc);
+ flen = strlen(format);
+ }
+
+ if (nlen == 0) {
+ static char missing[] = "missing-field-name";
+ xo_failure(xop, "missing field name: %s", format);
+ name = missing;
+ nlen = sizeof(missing) - 1;
+ }
+
+ if (pretty)
+ xo_buf_indent(xop, -1);
+ xo_data_append(xop, "<", 1);
+ xo_data_escape(xop, name, nlen);
+
+ if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) {
+ xo_data_append(xop, xop->xo_attrs.xb_bufp,
+ xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp);
+ xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp;
+ }
+
+ /*
+ * We indicate 'key' fields using the 'key' attribute. While
+ * this is really committing the crime of mixing meta-data with
+ * data, it's often useful. Especially when format meta-data is
+ * difficult to come by.
+ */
+ if ((flags & XFF_KEY) && XOF_ISSET(xop, XOF_KEYS)) {
+ static char attr[] = " key=\"key\"";
+ xo_data_append(xop, attr, sizeof(attr) - 1);
+ }
+
+ /*
+ * Save the offset at which we'd place units. See xo_format_units.
+ */
+ if (XOF_ISSET(xop, XOF_UNITS)) {
+ XOIF_SET(xop, XOIF_UNITS_PENDING);
+ xop->xo_units_offset = xop->xo_data.xb_curp -xop->xo_data.xb_bufp;
+ }
+
+ xo_data_append(xop, ">", 1);
+ xo_do_format_field(xop, NULL, format, flen, flags);
+ xo_data_append(xop, "</", 2);
+ xo_data_escape(xop, name, nlen);
+ xo_data_append(xop, ">", 1);
+ if (pretty)
+ xo_data_append(xop, "\n", 1);
+ break;
+
+ case XO_STYLE_JSON:
+ if (flags & XFF_DISPLAY_ONLY) {
+ flags |= XFF_NO_OUTPUT;
+ xo_do_format_field(xop, NULL, format, flen, flags);
+ break;
+ }
+
+ if (encoding) {
+ format = encoding;
+ flen = elen;
+ } else {
+ char *enc = alloca(flen + 1);
+ memcpy(enc, format, flen);
+ enc[flen] = '\0';
+ format = xo_fix_encoding(xop, enc);
+ flen = strlen(format);
+ }
+
+ int first = !(xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST);
+
+ xo_format_prep(xop, flags);
+
+ if (flags & XFF_QUOTE)
+ quote = 1;
+ else if (flags & XFF_NOQUOTE)
+ quote = 0;
+ else if (flen == 0) {
+ quote = 0;
+ format = "true"; /* JSON encodes empty tags as a boolean true */
+ flen = 4;
+ } else if (strchr("diouxXDOUeEfFgGaAcCp", format[flen - 1]) == NULL)
+ quote = 1;
+ else
+ quote = 0;
+
+ if (nlen == 0) {
+ static char missing[] = "missing-field-name";
+ xo_failure(xop, "missing field name: %s", format);
+ name = missing;
+ nlen = sizeof(missing) - 1;
+ }
+
+ if (flags & XFF_LEAF_LIST) {
+ if (!first && pretty)
+ xo_data_append(xop, "\n", 1);
+ if (pretty)
+ xo_buf_indent(xop, -1);
+ } else {
+ if (pretty)
+ xo_buf_indent(xop, -1);
+ xo_data_append(xop, "\"", 1);
+
+ xbp = &xop->xo_data;
+ int off = xbp->xb_curp - xbp->xb_bufp;
+
+ xo_data_escape(xop, name, nlen);
+
+ if (XOF_ISSET(xop, XOF_UNDERSCORES)) {
+ int now = xbp->xb_curp - xbp->xb_bufp;
+ for ( ; off < now; off++)
+ if (xbp->xb_bufp[off] == '-')
+ xbp->xb_bufp[off] = '_';
+ }
+ xo_data_append(xop, "\":", 2);
+ if (pretty)
+ xo_data_append(xop, " ", 1);
+ }
+
+ if (quote)
+ xo_data_append(xop, "\"", 1);
+
+ xo_do_format_field(xop, NULL, format, flen, flags);
+
+ if (quote)
+ xo_data_append(xop, "\"", 1);
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ if (flags & XFF_DISPLAY_ONLY) {
+ flags |= XFF_NO_OUTPUT;
+ xo_do_format_field(xop, NULL, format, flen, flags);
+ break;
+ }
+
+ if (encoding) {
+ format = encoding;
+ flen = elen;
+ } else {
+ char *enc = alloca(flen + 1);
+ memcpy(enc, format, flen);
+ enc[flen] = '\0';
+ format = xo_fix_encoding(xop, enc);
+ flen = strlen(format);
+ }
+
+ if (nlen == 0) {
+ static char missing[] = "missing-field-name";
+ xo_failure(xop, "missing field name: %s", format);
+ name = missing;
+ nlen = sizeof(missing) - 1;
+ }
+
+ xo_data_escape(xop, name, nlen);
+ xo_data_append(xop, "=\"", 2);
+ xo_do_format_field(xop, NULL, format, flen, flags);
+ xo_data_append(xop, "\" ", 2);
+ break;
+
+ case XO_STYLE_ENCODER:
+ if (flags & XFF_DISPLAY_ONLY) {
+ flags |= XFF_NO_OUTPUT;
+ xo_do_format_field(xop, NULL, format, flen, flags);
+ break;
+ }
+
+ if (flags & XFF_QUOTE)
+ quote = 1;
+ else if (flags & XFF_NOQUOTE)
+ quote = 0;
+ else if (flen == 0) {
+ quote = 0;
+ format = "true"; /* JSON encodes empty tags as a boolean true */
+ flen = 4;
+ } else if (strchr("diouxXDOUeEfFgGaAcCp", format[flen - 1]) == NULL)
+ quote = 1;
+ else
+ quote = 0;
+
+ if (encoding) {
+ format = encoding;
+ flen = elen;
+ } else {
+ char *enc = alloca(flen + 1);
+ memcpy(enc, format, flen);
+ enc[flen] = '\0';
+ format = xo_fix_encoding(xop, enc);
+ flen = strlen(format);
+ }
+
+ if (nlen == 0) {
+ static char missing[] = "missing-field-name";
+ xo_failure(xop, "missing field name: %s", format);
+ name = missing;
+ nlen = sizeof(missing) - 1;
+ }
+
+ unsigned name_offset = xo_buf_offset(&xop->xo_data);
+ xo_data_append(xop, name, nlen);
+ xo_data_append(xop, "", 1);
+
+ unsigned value_offset = xo_buf_offset(&xop->xo_data);
+ xo_do_format_field(xop, NULL, format, flen, flags);
+ xo_data_append(xop, "", 1);
+
+ xo_encoder_handle(xop, quote ? XO_OP_STRING : XO_OP_CONTENT,
+ xo_buf_data(&xop->xo_data, name_offset),
+ xo_buf_data(&xop->xo_data, value_offset));
+ xo_buf_reset(&xop->xo_data);
+ break;
+ }
+}
+
+static void
+xo_set_gettext_domain (xo_handle_t *xop, xo_field_info_t *xfip,
+ const char *str, unsigned len)
+{
+ const char *fmt = xfip->xfi_format;
+ unsigned flen = xfip->xfi_flen;
+
+ /* Start by discarding previous domain */
+ if (xop->xo_gt_domain) {
+ xo_free(xop->xo_gt_domain);
+ xop->xo_gt_domain = NULL;
+ }
+
+ /* An empty {G:} means no domainname */
+ if (len == 0 && flen == 0)
+ return;
+
+ int start_offset = -1;
+ if (len == 0 && flen != 0) {
+ /* Need to do format the data to get the domainname from args */
+ start_offset = xop->xo_data.xb_curp - xop->xo_data.xb_bufp;
+ xo_do_format_field(xop, NULL, fmt, flen, 0);
+
+ int end_offset = xop->xo_data.xb_curp - xop->xo_data.xb_bufp;
+ len = end_offset - start_offset;
+ str = xop->xo_data.xb_bufp + start_offset;
+ }
+
+ xop->xo_gt_domain = xo_strndup(str, len);
+
+ /* Reset the current buffer point to avoid emitting the name as output */
+ if (start_offset >= 0)
+ xop->xo_data.xb_curp = xop->xo_data.xb_bufp + start_offset;
+}
+
+static void
+xo_format_content (xo_handle_t *xop, const char *class_name,
+ const char *tag_name,
+ const char *str, int len, const char *fmt, int flen,
+ xo_xff_flags_t flags)
+{
+ switch (xo_style(xop)) {
+ case XO_STYLE_TEXT:
+ if (len)
+ xo_data_append_content(xop, str, len, flags);
+ else
+ xo_do_format_field(xop, NULL, fmt, flen, flags);
+ break;
+
+ case XO_STYLE_HTML:
+ if (len == 0) {
+ str = fmt;
+ len = flen;
+ }
+
+ xo_buf_append_div(xop, class_name, flags, NULL, 0, str, len, NULL, 0);
+ break;
+
+ case XO_STYLE_XML:
+ case XO_STYLE_JSON:
+ case XO_STYLE_SDPARAMS:
+ if (tag_name) {
+ if (len == 0) {
+ str = fmt;
+ len = flen;
+ }
+
+ xo_open_container_h(xop, tag_name);
+ xo_format_value(xop, "message", 7, str, len, NULL, 0, flags);
+ xo_close_container_h(xop, tag_name);
+
+ } else {
+ /*
+ * Even though we don't care about labels, we need to do
+ * enough parsing work to skip over the right bits of xo_vap.
+ */
+ if (len == 0)
+ xo_do_format_field(xop, NULL, fmt, flen,
+ flags | XFF_NO_OUTPUT);
+ }
+ break;
+
+ case XO_STYLE_ENCODER:
+ if (len == 0)
+ xo_do_format_field(xop, NULL, fmt, flen,
+ flags | XFF_NO_OUTPUT);
+ break;
+ }
+}
+
+static const char *xo_color_names[] = {
+ "default", /* XO_COL_DEFAULT */
+ "black", /* XO_COL_BLACK */
+ "red", /* XO_CLOR_RED */
+ "green", /* XO_COL_GREEN */
+ "yellow", /* XO_COL_YELLOW */
+ "blue", /* XO_COL_BLUE */
+ "magenta", /* XO_COL_MAGENTA */
+ "cyan", /* XO_COL_CYAN */
+ "white", /* XO_COL_WHITE */
+ NULL
+};
+
+static int
+xo_color_find (const char *str)
+{
+ int i;
+
+ for (i = 0; xo_color_names[i]; i++) {
+ if (strcmp(xo_color_names[i], str) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+static const char *xo_effect_names[] = {
+ "reset", /* XO_EFF_RESET */
+ "normal", /* XO_EFF_NORMAL */
+ "bold", /* XO_EFF_BOLD */
+ "underline", /* XO_EFF_UNDERLINE */
+ "inverse", /* XO_EFF_INVERSE */
+ NULL
+};
+
+static const char *xo_effect_on_codes[] = {
+ "0", /* XO_EFF_RESET */
+ "0", /* XO_EFF_NORMAL */
+ "1", /* XO_EFF_BOLD */
+ "4", /* XO_EFF_UNDERLINE */
+ "7", /* XO_EFF_INVERSE */
+ NULL
+};
+
+#if 0
+/*
+ * See comment below re: joy of terminal standards. These can
+ * be use by just adding:
+ * + if (newp->xoc_effects & bit)
+ * code = xo_effect_on_codes[i];
+ * + else
+ * + code = xo_effect_off_codes[i];
+ * in xo_color_handle_text.
+ */
+static const char *xo_effect_off_codes[] = {
+ "0", /* XO_EFF_RESET */
+ "0", /* XO_EFF_NORMAL */
+ "21", /* XO_EFF_BOLD */
+ "24", /* XO_EFF_UNDERLINE */
+ "27", /* XO_EFF_INVERSE */
+ NULL
+};
+#endif /* 0 */
+
+static int
+xo_effect_find (const char *str)
+{
+ int i;
+
+ for (i = 0; xo_effect_names[i]; i++) {
+ if (strcmp(xo_effect_names[i], str) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+static void
+xo_colors_parse (xo_handle_t *xop, xo_colors_t *xocp, char *str)
+{
+#ifdef LIBXO_TEXT_ONLY
+ return;
+#endif /* LIBXO_TEXT_ONLY */
+
+ char *cp, *ep, *np, *xp;
+ int len = strlen(str);
+ int rc;
+
+ /*
+ * Possible tokens: colors, bg-colors, effects, no-effects, "reset".
+ */
+ for (cp = str, ep = cp + len - 1; cp && cp < ep; cp = np) {
+ /* Trim leading whitespace */
+ while (isspace((int) *cp))
+ cp += 1;
+
+ np = strchr(cp, ',');
+ if (np)
+ *np++ = '\0';
+
+ /* Trim trailing whitespace */
+ xp = cp + strlen(cp) - 1;
+ while (isspace(*xp) && xp > cp)
+ *xp-- = '\0';
+
+ if (cp[0] == 'f' && cp[1] == 'g' && cp[2] == '-') {
+ rc = xo_color_find(cp + 3);
+ if (rc < 0)
+ goto unknown;
+
+ xocp->xoc_col_fg = rc;
+
+ } else if (cp[0] == 'b' && cp[1] == 'g' && cp[2] == '-') {
+ rc = xo_color_find(cp + 3);
+ if (rc < 0)
+ goto unknown;
+ xocp->xoc_col_bg = rc;
+
+ } else if (cp[0] == 'n' && cp[1] == 'o' && cp[2] == '-') {
+ rc = xo_effect_find(cp + 3);
+ if (rc < 0)
+ goto unknown;
+ xocp->xoc_effects &= ~(1 << rc);
+
+ } else {
+ rc = xo_effect_find(cp);
+ if (rc < 0)
+ goto unknown;
+ xocp->xoc_effects |= 1 << rc;
+
+ switch (1 << rc) {
+ case XO_EFF_RESET:
+ xocp->xoc_col_fg = xocp->xoc_col_bg = 0;
+ /* Note: not "|=" since we want to wipe out the old value */
+ xocp->xoc_effects = XO_EFF_RESET;
+ break;
+
+ case XO_EFF_NORMAL:
+ xocp->xoc_effects &= ~(XO_EFF_BOLD | XO_EFF_UNDERLINE
+ | XO_EFF_INVERSE | XO_EFF_NORMAL);
+ break;
+ }
+ }
+ continue;
+
+ unknown:
+ if (XOF_ISSET(xop, XOF_WARN))
+ xo_failure(xop, "unknown color/effect string detected: '%s'", cp);
+ }
+}
+
+static inline int
+xo_colors_enabled (xo_handle_t *xop UNUSED)
+{
+#ifdef LIBXO_TEXT_ONLY
+ return 0;
+#else /* LIBXO_TEXT_ONLY */
+ return XOF_ISSET(xop, XOF_COLOR);
+#endif /* LIBXO_TEXT_ONLY */
+}
+
+static void
+xo_colors_handle_text (xo_handle_t *xop, xo_colors_t *newp)
+{
+ char buf[BUFSIZ];
+ char *cp = buf, *ep = buf + sizeof(buf);
+ unsigned i, bit;
+ xo_colors_t *oldp = &xop->xo_colors;
+ const char *code = NULL;
+
+ /*
+ * Start the buffer with an escape. We don't want to add the '['
+ * now, since we let xo_effect_text_add unconditionally add the ';'.
+ * We'll replace the first ';' with a '[' when we're done.
+ */
+ *cp++ = 0x1b; /* Escape */
+
+ /*
+ * Terminals were designed back in the age before "certainty" was
+ * invented, when standards were more what you'd call "guidelines"
+ * than actual rules. Anyway we can't depend on them to operate
+ * correctly. So when display attributes are changed, we punt,
+ * reseting them all and turning back on the ones we want to keep.
+ * Longer, but should be completely reliable. Savvy?
+ */
+ if (oldp->xoc_effects != (newp->xoc_effects & oldp->xoc_effects)) {
+ newp->xoc_effects |= XO_EFF_RESET;
+ oldp->xoc_effects = 0;
+ }
+
+ for (i = 0, bit = 1; xo_effect_names[i]; i++, bit <<= 1) {
+ if ((newp->xoc_effects & bit) == (oldp->xoc_effects & bit))
+ continue;
+
+ code = xo_effect_on_codes[i];
+
+ cp += snprintf(cp, ep - cp, ";%s", code);
+ if (cp >= ep)
+ return; /* Should not occur */
+
+ if (bit == XO_EFF_RESET) {
+ /* Mark up the old value so we can detect current values as new */
+ oldp->xoc_effects = 0;
+ oldp->xoc_col_fg = oldp->xoc_col_bg = XO_COL_DEFAULT;
+ }
+ }
+
+ if (newp->xoc_col_fg != oldp->xoc_col_fg) {
+ cp += snprintf(cp, ep - cp, ";3%u",
+ (newp->xoc_col_fg != XO_COL_DEFAULT)
+ ? newp->xoc_col_fg - 1 : 9);
+ }
+
+ if (newp->xoc_col_bg != oldp->xoc_col_bg) {
+ cp += snprintf(cp, ep - cp, ";4%u",
+ (newp->xoc_col_bg != XO_COL_DEFAULT)
+ ? newp->xoc_col_bg - 1 : 9);
+ }
+
+ if (cp - buf != 1 && cp < ep - 3) {
+ buf[1] = '['; /* Overwrite leading ';' */
+ *cp++ = 'm';
+ *cp = '\0';
+ xo_buf_append(&xop->xo_data, buf, cp - buf);
+ }
+}
+
+static void
+xo_colors_handle_html (xo_handle_t *xop, xo_colors_t *newp)
+{
+ xo_colors_t *oldp = &xop->xo_colors;
+
+ /*
+ * HTML colors are mostly trivial: fill in xo_color_buf with
+ * a set of class tags representing the colors and effects.
+ */
+
+ /* If nothing changed, then do nothing */
+ if (oldp->xoc_effects == newp->xoc_effects
+ && oldp->xoc_col_fg == newp->xoc_col_fg
+ && oldp->xoc_col_bg == newp->xoc_col_bg)
+ return;
+
+ unsigned i, bit;
+ xo_buffer_t *xbp = &xop->xo_color_buf;
+
+ xo_buf_reset(xbp); /* We rebuild content after each change */
+
+ for (i = 0, bit = 1; xo_effect_names[i]; i++, bit <<= 1) {
+ if (!(newp->xoc_effects & bit))
+ continue;
+
+ xo_buf_append_str(xbp, " effect-");
+ xo_buf_append_str(xbp, xo_effect_names[i]);
+ }
+
+ const char *fg = NULL;
+ const char *bg = NULL;
+
+ if (newp->xoc_col_fg != XO_COL_DEFAULT)
+ fg = xo_color_names[newp->xoc_col_fg];
+ if (newp->xoc_col_bg != XO_COL_DEFAULT)
+ bg = xo_color_names[newp->xoc_col_bg];
+
+ if (newp->xoc_effects & XO_EFF_INVERSE) {
+ const char *tmp = fg;
+ fg = bg;
+ bg = tmp;
+ if (fg == NULL)
+ fg = "inverse";
+ if (bg == NULL)
+ bg = "inverse";
+
+ }
+
+ if (fg) {
+ xo_buf_append_str(xbp, " color-fg-");
+ xo_buf_append_str(xbp, fg);
+ }
+
+ if (bg) {
+ xo_buf_append_str(xbp, " color-bg-");
+ xo_buf_append_str(xbp, bg);
+ }
+}
+
+static void
+xo_format_colors (xo_handle_t *xop, xo_field_info_t *xfip,
+ const char *str, unsigned len)
+{
+ const char *fmt = xfip->xfi_format;
+ unsigned flen = xfip->xfi_flen;
+
+ xo_buffer_t xb;
+
+ /* If the string is static and we've in an encoding style, bail */
+ if (len != 0 && xo_style_is_encoding(xop))
+ return;
+
+ xo_buf_init(&xb);
+
+ if (len)
+ xo_buf_append(&xb, str, len);
+ else if (flen)
+ xo_do_format_field(xop, &xb, fmt, flen, 0);
+ else
+ xo_buf_append(&xb, "reset", 6); /* Default if empty */
+
+ if (xo_colors_enabled(xop)) {
+ switch (xo_style(xop)) {
+ case XO_STYLE_TEXT:
+ case XO_STYLE_HTML:
+ xo_buf_append(&xb, "", 1);
+
+ xo_colors_t xoc = xop->xo_colors;
+ xo_colors_parse(xop, &xoc, xb.xb_bufp);
+
+ if (xo_style(xop) == XO_STYLE_TEXT) {
+ /*
+ * Text mode means emitting the colors as ANSI character
+ * codes. This will allow people who like colors to have
+ * colors. The issue is, of course conflicting with the
+ * user's perfectly reasonable color scheme. Which leads
+ * to the hell of LSCOLORS, where even app need to have
+ * customization hooks for adjusting colors. Instead we
+ * provide a simpler-but-still-annoying answer where one
+ * can map colors to other colors.
+ */
+ xo_colors_handle_text(xop, &xoc);
+ xoc.xoc_effects &= ~XO_EFF_RESET; /* After handling it */
+
+ } else {
+ /*
+ * HTML output is wrapped in divs, so the color information
+ * must appear in every div until cleared. Most pathetic.
+ * Most unavoidable.
+ */
+ xoc.xoc_effects &= ~XO_EFF_RESET; /* Before handling effects */
+ xo_colors_handle_html(xop, &xoc);
+ }
+
+ xop->xo_colors = xoc;
+ break;
+
+ case XO_STYLE_XML:
+ case XO_STYLE_JSON:
+ case XO_STYLE_SDPARAMS:
+ case XO_STYLE_ENCODER:
+ /*
+ * Nothing to do; we did all that work just to clear the stack of
+ * formatting arguments.
+ */
+ break;
+ }
+ }
+
+ xo_buf_cleanup(&xb);
+}
+
+static void
+xo_format_units (xo_handle_t *xop, xo_field_info_t *xfip,
+ const char *str, unsigned len)
+{
+ const char *fmt = xfip->xfi_format;
+ unsigned flen = xfip->xfi_flen;
+ xo_xff_flags_t flags = xfip->xfi_flags;
+
+ static char units_start_xml[] = " units=\"";
+ static char units_start_html[] = " data-units=\"";
+
+ if (!XOIF_ISSET(xop, XOIF_UNITS_PENDING)) {
+ xo_format_content(xop, "units", NULL, str, len, fmt, flen, flags);
+ return;
+ }
+
+ xo_buffer_t *xbp = &xop->xo_data;
+ int start = xop->xo_units_offset;
+ int stop = xbp->xb_curp - xbp->xb_bufp;
+
+ if (xo_style(xop) == XO_STYLE_XML)
+ xo_buf_append(xbp, units_start_xml, sizeof(units_start_xml) - 1);
+ else if (xo_style(xop) == XO_STYLE_HTML)
+ xo_buf_append(xbp, units_start_html, sizeof(units_start_html) - 1);
+ else
+ return;
+
+ if (len)
+ xo_data_escape(xop, str, len);
+ else
+ xo_do_format_field(xop, NULL, fmt, flen, flags);
+
+ xo_buf_append(xbp, "\"", 1);
+
+ int now = xbp->xb_curp - xbp->xb_bufp;
+ int delta = now - stop;
+ if (delta <= 0) { /* Strange; no output to move */
+ xbp->xb_curp = xbp->xb_bufp + stop; /* Reset buffer to prior state */
+ return;
+ }
+
+ /*
+ * Now we're in it alright. We've need to insert the unit value
+ * we just created into the right spot. We make a local copy,
+ * move it and then insert our copy. We know there's room in the
+ * buffer, since we're just moving this around.
+ */
+ char *buf = alloca(delta);
+
+ memcpy(buf, xbp->xb_bufp + stop, delta);
+ memmove(xbp->xb_bufp + start + delta, xbp->xb_bufp + start, stop - start);
+ memmove(xbp->xb_bufp + start, buf, delta);
+}
+
+static int
+xo_find_width (xo_handle_t *xop, xo_field_info_t *xfip,
+ const char *str, unsigned len)
+{
+ const char *fmt = xfip->xfi_format;
+ unsigned flen = xfip->xfi_flen;
+
+ long width = 0;
+ char *bp;
+ char *cp;
+
+ if (len) {
+ bp = alloca(len + 1); /* Make local NUL-terminated copy of str */
+ memcpy(bp, str, len);
+ bp[len] = '\0';
+
+ width = strtol(bp, &cp, 0);
+ if (width == LONG_MIN || width == LONG_MAX
+ || bp == cp || *cp != '\0' ) {
+ width = 0;
+ xo_failure(xop, "invalid width for anchor: '%s'", bp);
+ }
+ } else if (flen) {
+ if (flen != 2 || strncmp("%d", fmt, flen) != 0)
+ xo_failure(xop, "invalid width format: '%*.*s'", flen, flen, fmt);
+ if (!XOF_ISSET(xop, XOF_NO_VA_ARG))
+ width = va_arg(xop->xo_vap, int);
+ }
+
+ return width;
+}
+
+static void
+xo_anchor_clear (xo_handle_t *xop)
+{
+ XOIF_CLEAR(xop, XOIF_ANCHOR);
+ xop->xo_anchor_offset = 0;
+ xop->xo_anchor_columns = 0;
+ xop->xo_anchor_min_width = 0;
+}
+
+/*
+ * An anchor is a marker used to delay field width implications.
+ * Imagine the format string "{[:10}{min:%d}/{cur:%d}/{max:%d}{:]}".
+ * We are looking for output like " 1/4/5"
+ *
+ * To make this work, we record the anchor and then return to
+ * format it when the end anchor tag is seen.
+ */
+static void
+xo_anchor_start (xo_handle_t *xop, xo_field_info_t *xfip,
+ const char *str, unsigned len)
+{
+ if (xo_style(xop) != XO_STYLE_TEXT && xo_style(xop) != XO_STYLE_HTML)
+ return;
+
+ if (XOIF_ISSET(xop, XOIF_ANCHOR))
+ xo_failure(xop, "the anchor already recording is discarded");
+
+ XOIF_SET(xop, XOIF_ANCHOR);
+ xo_buffer_t *xbp = &xop->xo_data;
+ xop->xo_anchor_offset = xbp->xb_curp - xbp->xb_bufp;
+ xop->xo_anchor_columns = 0;
+
+ /*
+ * Now we find the width, if possible. If it's not there,
+ * we'll get it on the end anchor.
+ */
+ xop->xo_anchor_min_width = xo_find_width(xop, xfip, str, len);
+}
+
+static void
+xo_anchor_stop (xo_handle_t *xop, xo_field_info_t *xfip,
+ const char *str, unsigned len)
+{
+ if (xo_style(xop) != XO_STYLE_TEXT && xo_style(xop) != XO_STYLE_HTML)
+ return;
+
+ if (!XOIF_ISSET(xop, XOIF_ANCHOR)) {
+ xo_failure(xop, "no start anchor");
+ return;
+ }
+
+ XOIF_CLEAR(xop, XOIF_UNITS_PENDING);
+
+ int width = xo_find_width(xop, xfip, str, len);
+ if (width == 0)
+ width = xop->xo_anchor_min_width;
+
+ if (width == 0) /* No width given; nothing to do */
+ goto done;
+
+ xo_buffer_t *xbp = &xop->xo_data;
+ int start = xop->xo_anchor_offset;
+ int stop = xbp->xb_curp - xbp->xb_bufp;
+ int abswidth = (width > 0) ? width : -width;
+ int blen = abswidth - xop->xo_anchor_columns;
+
+ if (blen <= 0) /* Already over width */
+ goto done;
+
+ if (abswidth > XO_MAX_ANCHOR_WIDTH) {
+ xo_failure(xop, "width over %u are not supported",
+ XO_MAX_ANCHOR_WIDTH);
+ goto done;
+ }
+
+ /* Make a suitable padding field and emit it */
+ char *buf = alloca(blen);
+ memset(buf, ' ', blen);
+ xo_format_content(xop, "padding", NULL, buf, blen, NULL, 0, 0);
+
+ if (width < 0) /* Already left justified */
+ goto done;
+
+ int now = xbp->xb_curp - xbp->xb_bufp;
+ int delta = now - stop;
+ if (delta <= 0) /* Strange; no output to move */
+ goto done;
+
+ /*
+ * Now we're in it alright. We've need to insert the padding data
+ * we just created (which might be an HTML <div> or text) before
+ * the formatted data. We make a local copy, move it and then
+ * insert our copy. We know there's room in the buffer, since
+ * we're just moving this around.
+ */
+ if (delta > blen)
+ buf = alloca(delta); /* Expand buffer if needed */
+
+ memcpy(buf, xbp->xb_bufp + stop, delta);
+ memmove(xbp->xb_bufp + start + delta, xbp->xb_bufp + start, stop - start);
+ memmove(xbp->xb_bufp + start, buf, delta);
+
+ done:
+ xo_anchor_clear(xop);
+}
+
+static const char *
+xo_class_name (int ftype)
+{
+ switch (ftype) {
+ case 'D': return "decoration";
+ case 'E': return "error";
+ case 'L': return "label";
+ case 'N': return "note";
+ case 'P': return "padding";
+ case 'W': return "warning";
+ }
+
+ return NULL;
+}
+
+static const char *
+xo_tag_name (int ftype)
+{
+ switch (ftype) {
+ case 'E': return "__error";
+ case 'W': return "__warning";
+ }
+
+ return NULL;
+}
+
+static int
+xo_role_wants_default_format (int ftype)
+{
+ switch (ftype) {
+ /* These roles can be completely empty and/or without formatting */
+ case 'C':
+ case 'G':
+ case '[':
+ case ']':
+ return 0;
+ }
+
+ return 1;
+}
+
+static xo_mapping_t xo_role_names[] = {
+ { 'C', "color" },
+ { 'D', "decoration" },
+ { 'E', "error" },
+ { 'L', "label" },
+ { 'N', "note" },
+ { 'P', "padding" },
+ { 'T', "title" },
+ { 'U', "units" },
+ { 'V', "value" },
+ { 'W', "warning" },
+ { '[', "start-anchor" },
+ { ']', "stop-anchor" },
+ { 0, NULL }
+};
+
+#define XO_ROLE_EBRACE '{' /* Escaped braces */
+#define XO_ROLE_TEXT '+'
+#define XO_ROLE_NEWLINE '\n'
+
+static xo_mapping_t xo_modifier_names[] = {
+ { XFF_ARGUMENT, "argument" },
+ { XFF_COLON, "colon" },
+ { XFF_COMMA, "comma" },
+ { XFF_DISPLAY_ONLY, "display" },
+ { XFF_ENCODE_ONLY, "encoding" },
+ { XFF_GT_FIELD, "gettext" },
+ { XFF_HUMANIZE, "humanize" },
+ { XFF_HUMANIZE, "hn" },
+ { XFF_HN_SPACE, "hn-space" },
+ { XFF_HN_DECIMAL, "hn-decimal" },
+ { XFF_HN_1000, "hn-1000" },
+ { XFF_KEY, "key" },
+ { XFF_LEAF_LIST, "leaf-list" },
+ { XFF_LEAF_LIST, "list" },
+ { XFF_NOQUOTE, "no-quotes" },
+ { XFF_NOQUOTE, "no-quote" },
+ { XFF_GT_PLURAL, "plural" },
+ { XFF_QUOTE, "quotes" },
+ { XFF_QUOTE, "quote" },
+ { XFF_TRIM_WS, "trim" },
+ { XFF_WS, "white" },
+ { 0, NULL }
+};
+
+#ifdef NOT_NEEDED_YET
+static xo_mapping_t xo_modifier_short_names[] = {
+ { XFF_COLON, "c" },
+ { XFF_DISPLAY_ONLY, "d" },
+ { XFF_ENCODE_ONLY, "e" },
+ { XFF_GT_FIELD, "g" },
+ { XFF_HUMANIZE, "h" },
+ { XFF_KEY, "k" },
+ { XFF_LEAF_LIST, "l" },
+ { XFF_NOQUOTE, "n" },
+ { XFF_GT_PLURAL, "p" },
+ { XFF_QUOTE, "q" },
+ { XFF_TRIM_WS, "t" },
+ { XFF_WS, "w" },
+ { 0, NULL }
+};
+#endif /* NOT_NEEDED_YET */
+
+static int
+xo_count_fields (xo_handle_t *xop UNUSED, const char *fmt)
+{
+ int rc = 1;
+ const char *cp;
+
+ for (cp = fmt; *cp; cp++)
+ if (*cp == '{' || *cp == '\n')
+ rc += 1;
+
+ return rc * 2 + 1;
+}
+
+/*
+ * The field format is:
+ * '{' modifiers ':' content [ '/' print-fmt [ '/' encode-fmt ]] '}'
+ * Roles are optional and include the following field types:
+ * 'D': decoration; something non-text and non-data (colons, commmas)
+ * 'E': error message
+ * 'G': gettext() the entire string; optional domainname as content
+ * 'L': label; text preceding data
+ * 'N': note; text following data
+ * 'P': padding; whitespace
+ * 'T': Title, where 'content' is a column title
+ * 'U': Units, where 'content' is the unit label
+ * 'V': value, where 'content' is the name of the field (the default)
+ * 'W': warning message
+ * '[': start a section of anchored text
+ * ']': end a section of anchored text
+ * The following modifiers are also supported:
+ * 'a': content is provided via argument (const char *), not descriptor
+ * 'c': flag: emit a colon after the label
+ * 'd': field is only emitted for display styles (text and html)
+ * 'e': field is only emitted for encoding styles (xml and json)
+ * 'g': gettext() the field
+ * 'h': humanize a numeric value (only for display styles)
+ * 'k': this field is a key, suitable for XPath predicates
+ * 'l': a leaf-list, a simple list of values
+ * 'n': no quotes around this field
+ * 'p': the field has plural gettext semantics (ngettext)
+ * 'q': add quotes around this field
+ * 't': trim whitespace around the value
+ * 'w': emit a blank after the label
+ * The print-fmt and encode-fmt strings is the printf-style formating
+ * for this data. JSON and XML will use the encoding-fmt, if present.
+ * If the encode-fmt is not provided, it defaults to the print-fmt.
+ * If the print-fmt is not provided, it defaults to 's'.
+ */
+static const char *
+xo_parse_roles (xo_handle_t *xop, const char *fmt,
+ const char *basep, xo_field_info_t *xfip)
+{
+ const char *sp;
+ unsigned ftype = 0;
+ xo_xff_flags_t flags = 0;
+ uint8_t fnum = 0;
+
+ for (sp = basep; sp && *sp; sp++) {
+ if (*sp == ':' || *sp == '/' || *sp == '}')
+ break;
+
+ if (*sp == '\\') {
+ if (sp[1] == '\0') {
+ xo_failure(xop, "backslash at the end of string");
+ return NULL;
+ }
+
+ /* Anything backslashed is ignored */
+ sp += 1;
+ continue;
+ }
+
+ if (*sp == ',') {
+ const char *np;
+ for (np = ++sp; *np; np++)
+ if (*np == ':' || *np == '/' || *np == '}' || *np == ',')
+ break;
+
+ int slen = np - sp;
+ if (slen > 0) {
+ xo_xff_flags_t value;
+
+ value = xo_name_lookup(xo_role_names, sp, slen);
+ if (value)
+ ftype = value;
+ else {
+ value = xo_name_lookup(xo_modifier_names, sp, slen);
+ if (value)
+ flags |= value;
+ else
+ xo_failure(xop, "unknown keyword ignored: '%.*s'",
+ slen, sp);
+ }
+ }
+
+ sp = np - 1;
+ continue;
+ }
+
+ switch (*sp) {
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'G':
+ case 'L':
+ case 'N':
+ case 'P':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case '[':
+ case ']':
+ if (ftype != 0) {
+ xo_failure(xop, "field descriptor uses multiple types: '%s'",
+ xo_printable(fmt));
+ return NULL;
+ }
+ ftype = *sp;
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ fnum = (fnum * 10) + (*sp - '0');
+ break;
+
+ case 'a':
+ flags |= XFF_ARGUMENT;
+ break;
+
+ case 'c':
+ flags |= XFF_COLON;
+ break;
+
+ case 'd':
+ flags |= XFF_DISPLAY_ONLY;
+ break;
+
+ case 'e':
+ flags |= XFF_ENCODE_ONLY;
+ break;
+
+ case 'g':
+ flags |= XFF_GT_FIELD;
+ break;
+
+ case 'h':
+ flags |= XFF_HUMANIZE;
+ break;
+
+ case 'k':
+ flags |= XFF_KEY;
+ break;
+
+ case 'l':
+ flags |= XFF_LEAF_LIST;
+ break;
+
+ case 'n':
+ flags |= XFF_NOQUOTE;
+ break;
+
+ case 'p':
+ flags |= XFF_GT_PLURAL;
+ break;
+
+ case 'q':
+ flags |= XFF_QUOTE;
+ break;
+
+ case 't':
+ flags |= XFF_TRIM_WS;
+ break;
+
+ case 'w':
+ flags |= XFF_WS;
+ break;
+
+ default:
+ xo_failure(xop, "field descriptor uses unknown modifier: '%s'",
+ xo_printable(fmt));
+ /*
+ * No good answer here; a bad format will likely
+ * mean a core file. We just return and hope
+ * the caller notices there's no output, and while
+ * that seems, well, bad, there's nothing better.
+ */
+ return NULL;
+ }
+
+ if (ftype == 'N' || ftype == 'U') {
+ if (flags & XFF_COLON) {
+ xo_failure(xop, "colon modifier on 'N' or 'U' field ignored: "
+ "'%s'", xo_printable(fmt));
+ flags &= ~XFF_COLON;
+ }
+ }
+ }
+
+ xfip->xfi_flags = flags;
+ xfip->xfi_ftype = ftype ?: 'V';
+ xfip->xfi_fnum = fnum;
+
+ return sp;
+}
+
+/*
+ * Number any remaining fields that need numbers. Note that some
+ * field types (text, newline, escaped braces) never get numbers.
+ */
+static void
+xo_gettext_finish_numbering_fields (xo_handle_t *xop UNUSED,
+ const char *fmt UNUSED,
+ xo_field_info_t *fields)
+{
+ xo_field_info_t *xfip;
+ unsigned fnum, max_fields;
+ uint64_t bits = 0;
+
+ /* First make a list of add the explicitly used bits */
+ for (xfip = fields, fnum = 0; xfip->xfi_ftype; xfip++) {
+ switch (xfip->xfi_ftype) {
+ case XO_ROLE_NEWLINE: /* Don't get numbered */
+ case XO_ROLE_TEXT:
+ case XO_ROLE_EBRACE:
+ case 'G':
+ continue;
+ }
+
+ fnum += 1;
+ if (fnum >= 63)
+ break;
+
+ if (xfip->xfi_fnum)
+ bits |= 1 << xfip->xfi_fnum;
+ }
+
+ max_fields = fnum;
+
+ for (xfip = fields, fnum = 0; xfip->xfi_ftype; xfip++) {
+ switch (xfip->xfi_ftype) {
+ case XO_ROLE_NEWLINE: /* Don't get numbered */
+ case XO_ROLE_TEXT:
+ case XO_ROLE_EBRACE:
+ case 'G':
+ continue;
+ }
+
+ if (xfip->xfi_fnum != 0)
+ continue;
+
+ /* Find the next unassigned field */
+ for (fnum++; bits & (1 << fnum); fnum++)
+ continue;
+
+ if (fnum > max_fields)
+ break;
+
+ xfip->xfi_fnum = fnum; /* Mark the field number */
+ bits |= 1 << fnum; /* Mark it used */
+ }
+}
+
+/*
+ * The format string uses field numbers, so we need to whiffle through it
+ * and make sure everything's sane and lovely.
+ */
+static int
+xo_parse_field_numbers (xo_handle_t *xop, const char *fmt,
+ xo_field_info_t *fields, unsigned num_fields)
+{
+ xo_field_info_t *xfip;
+ unsigned field, fnum;
+ uint64_t bits = 0;
+
+ for (xfip = fields, field = 0; field < num_fields; xfip++, field++) {
+ /* Fields default to 1:1 with natural position */
+ if (xfip->xfi_fnum == 0)
+ xfip->xfi_fnum = field + 1;
+ else if (xfip->xfi_fnum > num_fields) {
+ xo_failure(xop, "field number exceeds number of fields: '%s'", fmt);
+ return -1;
+ }
+
+ fnum = xfip->xfi_fnum - 1; /* Move to zero origin */
+ if (fnum < 64) { /* Only test what fits */
+ if (bits & (1 << fnum)) {
+ xo_failure(xop, "field number %u reused: '%s'",
+ xfip->xfi_fnum, fmt);
+ return -1;
+ }
+ bits |= 1 << fnum;
+ }
+ }
+
+ return 0;
+}
+
+static int
+xo_parse_fields (xo_handle_t *xop, xo_field_info_t *fields,
+ unsigned num_fields, const char *fmt)
+{
+ const char *cp, *sp, *ep, *basep;
+ unsigned field = 0;
+ xo_field_info_t *xfip = fields;
+ unsigned seen_fnum = 0;
+
+ for (cp = fmt; *cp && field < num_fields; field++, xfip++) {
+ xfip->xfi_start = cp;
+
+ if (*cp == '\n') {
+ xfip->xfi_ftype = XO_ROLE_NEWLINE;
+ xfip->xfi_len = 1;
+ cp += 1;
+ continue;
+ }
+
+ if (*cp != '{') {
+ /* Normal text */
+ for (sp = cp; *sp; sp++) {
+ if (*sp == '{' || *sp == '\n')
+ break;
+ }
+
+ xfip->xfi_ftype = XO_ROLE_TEXT;
+ xfip->xfi_content = cp;
+ xfip->xfi_clen = sp - cp;
+ xfip->xfi_next = sp;
+
+ cp = sp;
+ continue;
+ }
+
+ if (cp[1] == '{') { /* Start of {{escaped braces}} */
+ xfip->xfi_start = cp + 1; /* Start at second brace */
+ xfip->xfi_ftype = XO_ROLE_EBRACE;
+
+ cp += 2; /* Skip over _both_ characters */
+ for (sp = cp; *sp; sp++) {
+ if (*sp == '}' && sp[1] == '}')
+ break;
+ }
+ if (*sp == '\0') {
+ xo_failure(xop, "missing closing '}}': '%s'",
+ xo_printable(fmt));
+ return -1;
+ }
+
+ xfip->xfi_len = sp - xfip->xfi_start + 1;
+
+ /* Move along the string, but don't run off the end */
+ if (*sp == '}' && sp[1] == '}')
+ sp += 2;
+ cp = *sp ? sp : sp;
+ xfip->xfi_next = cp;
+ continue;
+ }
+
+ /* We are looking at the start of a field definition */
+ xfip->xfi_start = basep = cp + 1;
+
+ const char *format = NULL;
+ int flen = 0;
+
+ /* Looking at roles and modifiers */
+ sp = xo_parse_roles(xop, fmt, basep, xfip);
+ if (sp == NULL) {
+ /* xo_failure has already been called */
+ return -1;
+ }
+
+ if (xfip->xfi_fnum)
+ seen_fnum = 1;
+
+ /* Looking at content */
+ if (*sp == ':') {
+ for (ep = ++sp; *sp; sp++) {
+ if (*sp == '}' || *sp == '/')
+ break;
+ if (*sp == '\\') {
+ if (sp[1] == '\0') {
+ xo_failure(xop, "backslash at the end of string");
+ return -1;
+ }
+ sp += 1;
+ continue;
+ }
+ }
+ if (ep != sp) {
+ xfip->xfi_clen = sp - ep;
+ xfip->xfi_content = ep;
+ }
+ } else {
+ xo_failure(xop, "missing content (':'): '%s'", xo_printable(fmt));
+ return -1;
+ }
+
+ /* Looking at main (display) format */
+ if (*sp == '/') {
+ for (ep = ++sp; *sp; sp++) {
+ if (*sp == '}' || *sp == '/')
+ break;
+ if (*sp == '\\') {
+ if (sp[1] == '\0') {
+ xo_failure(xop, "backslash at the end of string");
+ return -1;
+ }
+ sp += 1;
+ continue;
+ }
+ }
+ flen = sp - ep;
+ format = ep;
+ }
+
+ /* Looking at encoding format */
+ if (*sp == '/') {
+ for (ep = ++sp; *sp; sp++) {
+ if (*sp == '}')
+ break;
+ }
+
+ xfip->xfi_encoding = ep;
+ xfip->xfi_elen = sp - ep;
+ }
+
+ if (*sp != '}') {
+ xo_failure(xop, "missing closing '}': %s", xo_printable(fmt));
+ return -1;
+ }
+
+ xfip->xfi_len = sp - xfip->xfi_start;
+ xfip->xfi_next = ++sp;
+
+ /* If we have content, then we have a default format */
+ if (xfip->xfi_clen || format || (xfip->xfi_flags & XFF_ARGUMENT)) {
+ if (format) {
+ xfip->xfi_format = format;
+ xfip->xfi_flen = flen;
+ } else if (xo_role_wants_default_format(xfip->xfi_ftype)) {
+ xfip->xfi_format = xo_default_format;
+ xfip->xfi_flen = 2;
+ }
+ }
+
+ cp = sp;
+ }
+
+ int rc = 0;
+
+ /*
+ * If we saw a field number on at least one field, then we need
+ * to enforce some rules and/or guidelines.
+ */
+ if (seen_fnum)
+ rc = xo_parse_field_numbers(xop, fmt, fields, field);
+
+ return rc;
+}
+
+/*
+ * We are passed a pointer to a format string just past the "{G:}"
+ * field. We build a simplified version of the format string.
+ */
+static int
+xo_gettext_simplify_format (xo_handle_t *xop UNUSED,
+ xo_buffer_t *xbp,
+ xo_field_info_t *fields,
+ int this_field,
+ const char *fmt UNUSED,
+ xo_simplify_field_func_t field_cb)
+{
+ unsigned ftype;
+ xo_xff_flags_t flags;
+ int field = this_field + 1;
+ xo_field_info_t *xfip;
+ char ch;
+
+ for (xfip = &fields[field]; xfip->xfi_ftype; xfip++, field++) {
+ ftype = xfip->xfi_ftype;
+ flags = xfip->xfi_flags;
+
+ if ((flags & XFF_GT_FIELD) && xfip->xfi_content && ftype != 'V') {
+ if (field_cb)
+ field_cb(xfip->xfi_content, xfip->xfi_clen,
+ (flags & XFF_GT_PLURAL) ? 1 : 0);
+ }
+
+ switch (ftype) {
+ case 'G':
+ /* Ignore gettext roles */
+ break;
+
+ case XO_ROLE_NEWLINE:
+ xo_buf_append(xbp, "\n", 1);
+ break;
+
+ case XO_ROLE_EBRACE:
+ xo_buf_append(xbp, "{", 1);
+ xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen);
+ xo_buf_append(xbp, "}", 1);
+ break;
+
+ case XO_ROLE_TEXT:
+ xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen);
+ break;
+
+ default:
+ xo_buf_append(xbp, "{", 1);
+ if (ftype != 'V') {
+ ch = ftype;
+ xo_buf_append(xbp, &ch, 1);
+ }
+
+ unsigned fnum = xfip->xfi_fnum ?: 0;
+ if (fnum) {
+ char num[12];
+ /* Field numbers are origin 1, not 0, following printf(3) */
+ snprintf(num, sizeof(num), "%u", fnum);
+ xo_buf_append(xbp, num, strlen(num));
+ }
+
+ xo_buf_append(xbp, ":", 1);
+ xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen);
+ xo_buf_append(xbp, "}", 1);
+ }
+ }
+
+ xo_buf_append(xbp, "", 1);
+ return 0;
+}
+
+void
+xo_dump_fields (xo_field_info_t *); /* Fake prototype for debug function */
+void
+xo_dump_fields (xo_field_info_t *fields)
+{
+ xo_field_info_t *xfip;
+
+ for (xfip = fields; xfip->xfi_ftype; xfip++) {
+ printf("%lu(%u): %lx [%c/%u] [%.*s] [%.*s] [%.*s]\n",
+ (unsigned long) (xfip - fields), xfip->xfi_fnum,
+ (unsigned long) xfip->xfi_flags,
+ isprint((int) xfip->xfi_ftype) ? xfip->xfi_ftype : ' ',
+ xfip->xfi_ftype,
+ xfip->xfi_clen, xfip->xfi_content ?: "",
+ xfip->xfi_flen, xfip->xfi_format ?: "",
+ xfip->xfi_elen, xfip->xfi_encoding ?: "");
+ }
+}
+
+#ifdef HAVE_GETTEXT
+/*
+ * Find the field that matches the given field number
+ */
+static xo_field_info_t *
+xo_gettext_find_field (xo_field_info_t *fields, unsigned fnum)
+{
+ xo_field_info_t *xfip;
+
+ for (xfip = fields; xfip->xfi_ftype; xfip++)
+ if (xfip->xfi_fnum == fnum)
+ return xfip;
+
+ return NULL;
+}
+
+/*
+ * At this point, we need to consider if the fields have been reordered,
+ * such as "The {:adjective} {:noun}" to "La {:noun} {:adjective}".
+ *
+ * We need to rewrite the new_fields using the old fields order,
+ * so that we can render the message using the arguments as they
+ * appear on the stack. It's a lot of work, but we don't really
+ * want to (eventually) fall into the standard printf code which
+ * means using the arguments straight (and in order) from the
+ * varargs we were originally passed.
+ */
+static void
+xo_gettext_rewrite_fields (xo_handle_t *xop UNUSED,
+ xo_field_info_t *fields, unsigned max_fields)
+{
+ xo_field_info_t tmp[max_fields];
+ bzero(tmp, max_fields * sizeof(tmp[0]));
+
+ unsigned fnum = 0;
+ xo_field_info_t *newp, *outp, *zp;
+ for (newp = fields, outp = tmp; newp->xfi_ftype; newp++, outp++) {
+ switch (newp->xfi_ftype) {
+ case XO_ROLE_NEWLINE: /* Don't get numbered */
+ case XO_ROLE_TEXT:
+ case XO_ROLE_EBRACE:
+ case 'G':
+ *outp = *newp;
+ outp->xfi_renum = 0;
+ continue;
+ }
+
+ zp = xo_gettext_find_field(fields, ++fnum);
+ if (zp == NULL) { /* Should not occur */
+ *outp = *newp;
+ outp->xfi_renum = 0;
+ continue;
+ }
+
+ *outp = *zp;
+ outp->xfi_renum = newp->xfi_fnum;
+ }
+
+ memcpy(fields, tmp, max_fields * sizeof(tmp[0]));
+}
+
+/*
+ * We've got two lists of fields, the old list from the original
+ * format string and the new one from the parsed gettext reply. The
+ * new list has the localized words, where the old list has the
+ * formatting information. We need to combine them into a single list
+ * (the new list).
+ *
+ * If the list needs to be reordered, then we've got more serious work
+ * to do.
+ */
+static int
+xo_gettext_combine_formats (xo_handle_t *xop, const char *fmt UNUSED,
+ const char *gtfmt, xo_field_info_t *old_fields,
+ xo_field_info_t *new_fields, unsigned new_max_fields,
+ int *reorderedp)
+{
+ int reordered = 0;
+ xo_field_info_t *newp, *oldp, *startp = old_fields;
+
+ xo_gettext_finish_numbering_fields(xop, fmt, old_fields);
+
+ for (newp = new_fields; newp->xfi_ftype; newp++) {
+ switch (newp->xfi_ftype) {
+ case XO_ROLE_NEWLINE:
+ case XO_ROLE_TEXT:
+ case XO_ROLE_EBRACE:
+ continue;
+
+ case 'V':
+ for (oldp = startp; oldp->xfi_ftype; oldp++) {
+ if (oldp->xfi_ftype != 'V')
+ continue;
+ if (newp->xfi_clen != oldp->xfi_clen
+ || strncmp(newp->xfi_content, oldp->xfi_content,
+ oldp->xfi_clen) != 0) {
+ reordered = 1;
+ continue;
+ }
+ startp = oldp + 1;
+ break;
+ }
+
+ /* Didn't find it on the first pass (starting from start) */
+ if (oldp->xfi_ftype == 0) {
+ for (oldp = old_fields; oldp < startp; oldp++) {
+ if (oldp->xfi_ftype != 'V')
+ continue;
+ if (newp->xfi_clen != oldp->xfi_clen)
+ continue;
+ if (strncmp(newp->xfi_content, oldp->xfi_content,
+ oldp->xfi_clen) != 0)
+ continue;
+ reordered = 1;
+ break;
+ }
+ if (oldp == startp) {
+ /* Field not found */
+ xo_failure(xop, "post-gettext format can't find field "
+ "'%.*s' in format '%s'",
+ newp->xfi_clen, newp->xfi_content,
+ xo_printable(gtfmt));
+ return -1;
+ }
+ }
+ break;
+
+ default:
+ /*
+ * Other fields don't have names for us to use, so if
+ * the types aren't the same, then we'll have to assume
+ * the original field is a match.
+ */
+ for (oldp = startp; oldp->xfi_ftype; oldp++) {
+ if (oldp->xfi_ftype == 'V') /* Can't go past these */
+ break;
+ if (oldp->xfi_ftype == newp->xfi_ftype)
+ goto copy_it; /* Assumably we have a match */
+ }
+ continue;
+ }
+
+ /*
+ * Found a match; copy over appropriate fields
+ */
+ copy_it:
+ newp->xfi_flags = oldp->xfi_flags;
+ newp->xfi_fnum = oldp->xfi_fnum;
+ newp->xfi_format = oldp->xfi_format;
+ newp->xfi_flen = oldp->xfi_flen;
+ newp->xfi_encoding = oldp->xfi_encoding;
+ newp->xfi_elen = oldp->xfi_elen;
+ }
+
+ *reorderedp = reordered;
+ if (reordered) {
+ xo_gettext_finish_numbering_fields(xop, fmt, new_fields);
+ xo_gettext_rewrite_fields(xop, new_fields, new_max_fields);
+ }
+
+ return 0;
+}
+
+/*
+ * We don't want to make gettext() calls here with a complete format
+ * string, since that means changing a flag would mean a
+ * labor-intensive re-translation expense. Instead we build a
+ * simplified form with a reduced level of detail, perform a lookup on
+ * that string and then re-insert the formating info.
+ *
+ * So something like:
+ * xo_emit("{G:}close {:fd/%ld} returned {g:error/%m} {:test/%6.6s}\n", ...)
+ * would have a lookup string of:
+ * "close {:fd} returned {:error} {:test}\n"
+ *
+ * We also need to handling reordering of fields, where the gettext()
+ * reply string uses fields in a different order than the original
+ * format string:
+ * "cluse-a {:fd} retoorned {:test}. Bork {:error} Bork. Bork.\n"
+ * If we have to reorder fields within the message, then things get
+ * complicated. See xo_gettext_rewrite_fields.
+ *
+ * Summary: i18n aighn't cheap.
+ */
+static const char *
+xo_gettext_build_format (xo_handle_t *xop,
+ xo_field_info_t *fields, int this_field,
+ const char *fmt, char **new_fmtp)
+{
+ if (xo_style_is_encoding(xop))
+ goto bail;
+
+ xo_buffer_t xb;
+ xo_buf_init(&xb);
+
+ if (xo_gettext_simplify_format(xop, &xb, fields,
+ this_field, fmt, NULL))
+ goto bail2;
+
+ const char *gtfmt = xo_dgettext(xop, xb.xb_bufp);
+ if (gtfmt == NULL || gtfmt == fmt || strcmp(gtfmt, fmt) == 0)
+ goto bail2;
+
+ xo_buf_cleanup(&xb);
+
+ char *new_fmt = xo_strndup(gtfmt, -1);
+ if (new_fmt == NULL)
+ goto bail2;
+
+ *new_fmtp = new_fmt;
+ return new_fmt;
+
+ bail2:
+ xo_buf_cleanup(&xb);
+ bail:
+ *new_fmtp = NULL;
+ return fmt;
+}
+
+static void
+xo_gettext_rebuild_content (xo_handle_t *xop, xo_field_info_t *fields,
+ unsigned *fstart, unsigned min_fstart,
+ unsigned *fend, unsigned max_fend)
+{
+ xo_field_info_t *xfip;
+ char *buf;
+ unsigned base = fstart[min_fstart];
+ unsigned blen = fend[max_fend] - base;
+ xo_buffer_t *xbp = &xop->xo_data;
+
+ if (blen == 0)
+ return;
+
+ buf = xo_realloc(NULL, blen);
+ if (buf == NULL)
+ return;
+
+ memcpy(buf, xbp->xb_bufp + fstart[min_fstart], blen); /* Copy our data */
+
+ unsigned field = min_fstart, soff, doff = base, len, fnum;
+ xo_field_info_t *zp;
+
+ /*
+ * Be aware there are two competing views of "field number": we
+ * want the user to thing in terms of "The {1:size}" where {G:},
+ * newlines, escaped braces, and text don't have numbers. But is
+ * also the internal view, where we have an array of
+ * xo_field_info_t and every field have an index. fnum, fstart[]
+ * and fend[] are the latter, but xfi_renum is the former.
+ */
+ for (xfip = fields + field; xfip->xfi_ftype; xfip++, field++) {
+ fnum = field;
+ if (xfip->xfi_renum) {
+ zp = xo_gettext_find_field(fields, xfip->xfi_renum);
+ fnum = zp ? zp - fields : field;
+ }
+
+ soff = fstart[fnum];
+ len = fend[fnum] - soff;
+
+ if (len > 0) {
+ soff -= base;
+ memcpy(xbp->xb_bufp + doff, buf + soff, len);
+ doff += len;
+ }
+ }
+
+ xo_free(buf);
+}
+#else /* HAVE_GETTEXT */
+static const char *
+xo_gettext_build_format (xo_handle_t *xop UNUSED,
+ xo_field_info_t *fields UNUSED,
+ int this_field UNUSED,
+ const char *fmt UNUSED, char **new_fmtp)
+{
+ *new_fmtp = NULL;
+ return fmt;
+}
+
+static int
+xo_gettext_combine_formats (xo_handle_t *xop UNUSED, const char *fmt UNUSED,
+ const char *gtfmt UNUSED,
+ xo_field_info_t *old_fields UNUSED,
+ xo_field_info_t *new_fields UNUSED,
+ unsigned new_max_fields UNUSED,
+ int *reorderedp UNUSED)
+{
+ return -1;
+}
+
+static void
+xo_gettext_rebuild_content (xo_handle_t *xop UNUSED,
+ xo_field_info_t *fields UNUSED,
+ unsigned *fstart UNUSED, unsigned min_fstart UNUSED,
+ unsigned *fend UNUSED, unsigned max_fend UNUSED)
+{
+ return;
+}
+#endif /* HAVE_GETTEXT */
+
+/*
+ * Emit a set of fields. This is really the core of libxo.
+ */
+static int
+xo_do_emit_fields (xo_handle_t *xop, xo_field_info_t *fields,
+ unsigned max_fields, const char *fmt)
+{
+ int gettext_inuse = 0;
+ int gettext_changed = 0;
+ int gettext_reordered = 0;
+ unsigned ftype;
+ xo_xff_flags_t flags;
+ xo_field_info_t *new_fields = NULL;
+ xo_field_info_t *xfip;
+ unsigned field;
+ int rc = 0;
+
+ int flush = XOF_ISSET(xop, XOF_FLUSH);
+ int flush_line = XOF_ISSET(xop, XOF_FLUSH_LINE);
+ char *new_fmt = NULL;
+
+ if (XOIF_ISSET(xop, XOIF_REORDER) || xo_style(xop) == XO_STYLE_ENCODER)
+ flush_line = 0;
+
+ /*
+ * Some overhead for gettext; if the fields in the msgstr returned
+ * by gettext are reordered, then we need to record start and end
+ * for each field. We'll go ahead and render the fields in the
+ * normal order, but later we can then reconstruct the reordered
+ * fields using these fstart/fend values.
+ */
+ unsigned flimit = max_fields * 2; /* Pessimistic limit */
+ unsigned min_fstart = flimit - 1;
+ unsigned max_fend = 0; /* Highest recorded fend[] entry */
+ unsigned fstart[flimit];
+ bzero(fstart, flimit * sizeof(fstart[0]));
+ unsigned fend[flimit];
+ bzero(fend, flimit * sizeof(fend[0]));
+
+ for (xfip = fields, field = 0; xfip->xfi_ftype && field < max_fields;
+ xfip++, field++) {
+ ftype = xfip->xfi_ftype;
+ flags = xfip->xfi_flags;
+
+ /* Record field start offset */
+ if (gettext_reordered) {
+ fstart[field] = xo_buf_offset(&xop->xo_data);
+ if (min_fstart > field)
+ min_fstart = field;
+ }
+
+ const char *content = xfip->xfi_content;
+ int clen = xfip->xfi_clen;
+
+ if (flags & XFF_ARGUMENT) {
+ /*
+ * Argument flag means the content isn't given in the descriptor,
+ * but as a UTF-8 string ('const char *') argument in xo_vap.
+ */
+ content = va_arg(xop->xo_vap, char *);
+ clen = content ? strlen(content) : 0;
+ }
+
+ if (ftype == XO_ROLE_NEWLINE) {
+ xo_line_close(xop);
+ if (flush_line && xo_flush_h(xop) < 0)
+ return -1;
+ goto bottom;
+
+ } else if (ftype == XO_ROLE_EBRACE) {
+ xo_format_text(xop, xfip->xfi_start, xfip->xfi_len);
+ goto bottom;
+
+ } else if (ftype == XO_ROLE_TEXT) {
+ /* Normal text */
+ xo_format_text(xop, xfip->xfi_content, xfip->xfi_clen);
+ goto bottom;
+ }
+
+ /*
+ * Notes and units need the 'w' flag handled before the content.
+ */
+ if (ftype == 'N' || ftype == 'U') {
+ if (flags & XFF_WS) {
+ xo_format_content(xop, "padding", NULL, " ", 1,
+ NULL, 0, flags);
+ flags &= ~XFF_WS; /* Block later handling of this */
+ }
+ }
+
+ if (ftype == 'V')
+ xo_format_value(xop, content, clen,
+ xfip->xfi_format, xfip->xfi_flen,
+ xfip->xfi_encoding, xfip->xfi_elen, flags);
+ else if (ftype == '[')
+ xo_anchor_start(xop, xfip, content, clen);
+ else if (ftype == ']')
+ xo_anchor_stop(xop, xfip, content, clen);
+ else if (ftype == 'C')
+ xo_format_colors(xop, xfip, content, clen);
+
+ else if (ftype == 'G') {
+ /*
+ * A {G:domain} field; disect the domain name and translate
+ * the remaining portion of the input string. If the user
+ * didn't put the {G:} at the start of the format string, then
+ * assumably they just want us to translate the rest of it.
+ * Since gettext returns strings in a static buffer, we make
+ * a copy in new_fmt.
+ */
+ xo_set_gettext_domain(xop, xfip, content, clen);
+
+ if (!gettext_inuse) { /* Only translate once */
+ gettext_inuse = 1;
+ if (new_fmt) {
+ xo_free(new_fmt);
+ new_fmt = NULL;
+ }
+
+ xo_gettext_build_format(xop, fields, field,
+ xfip->xfi_next, &new_fmt);
+ if (new_fmt) {
+ gettext_changed = 1;
+
+ unsigned new_max_fields = xo_count_fields(xop, new_fmt);
+
+ if (++new_max_fields < max_fields)
+ new_max_fields = max_fields;
+
+ /* Leave a blank slot at the beginning */
+ int sz = (new_max_fields + 1) * sizeof(xo_field_info_t);
+ new_fields = alloca(sz);
+ bzero(new_fields, sz);
+
+ if (!xo_parse_fields(xop, new_fields + 1,
+ new_max_fields, new_fmt)) {
+ gettext_reordered = 0;
+
+ if (!xo_gettext_combine_formats(xop, fmt, new_fmt,
+ fields, new_fields + 1,
+ new_max_fields, &gettext_reordered)) {
+
+ if (gettext_reordered) {
+ if (XOF_ISSET(xop, XOF_LOG_GETTEXT))
+ xo_failure(xop, "gettext finds reordered "
+ "fields in '%s' and '%s'",
+ xo_printable(fmt),
+ xo_printable(new_fmt));
+ flush_line = 0; /* Must keep at content */
+ XOIF_SET(xop, XOIF_REORDER);
+ }
+
+ field = -1; /* Will be incremented at top of loop */
+ xfip = new_fields;
+ max_fields = new_max_fields;
+ }
+ }
+ }
+ }
+ continue;
+
+ } else if (clen || xfip->xfi_format) {
+
+ const char *class_name = xo_class_name(ftype);
+ if (class_name)
+ xo_format_content(xop, class_name, xo_tag_name(ftype),
+ content, clen,
+ xfip->xfi_format, xfip->xfi_flen, flags);
+ else if (ftype == 'T')
+ xo_format_title(xop, xfip, content, clen);
+ else if (ftype == 'U')
+ xo_format_units(xop, xfip, content, clen);
+ else
+ xo_failure(xop, "unknown field type: '%c'", ftype);
+ }
+
+ if (flags & XFF_COLON)
+ xo_format_content(xop, "decoration", NULL, ":", 1, NULL, 0, 0);
+
+ if (flags & XFF_WS)
+ xo_format_content(xop, "padding", NULL, " ", 1, NULL, 0, 0);
+
+ bottom:
+ /* Record the end-of-field offset */
+ if (gettext_reordered) {
+ fend[field] = xo_buf_offset(&xop->xo_data);
+ max_fend = field;
+ }
+ }
+
+ if (gettext_changed && gettext_reordered) {
+ /* Final step: rebuild the content using the rendered fields */
+ xo_gettext_rebuild_content(xop, new_fields + 1, fstart, min_fstart,
+ fend, max_fend);
+ }
+
+ XOIF_CLEAR(xop, XOIF_REORDER);
+
+ /*
+ * If we've got enough data, flush it.
+ */
+ if (xo_buf_offset(&xop->xo_data) > XO_BUF_HIGH_WATER)
+ flush = 1;
+
+ /* If we don't have an anchor, write the text out */
+ if (flush && !XOIF_ISSET(xop, XOIF_ANCHOR)) {
+ if (xo_write(xop) < 0)
+ rc = -1; /* Report failure */
+ else if (xo_flush_h(xop) < 0)
+ rc = -1;
+ }
+
+ if (new_fmt)
+ xo_free(new_fmt);
+
+ /*
+ * We've carried the gettext domainname inside our handle just for
+ * convenience, but we need to ensure it doesn't survive across
+ * xo_emit calls.
+ */
+ if (xop->xo_gt_domain) {
+ xo_free(xop->xo_gt_domain);
+ xop->xo_gt_domain = NULL;
+ }
+
+ return (rc < 0) ? rc : (int) xop->xo_columns;
+}
+
+/*
+ * Parse and emit a set of fields
+ */
+static int
+xo_do_emit (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt)
+{
+ xop->xo_columns = 0; /* Always reset it */
+ xop->xo_errno = errno; /* Save for "%m" */
+
+ if (fmt == NULL)
+ return 0;
+
+ unsigned max_fields;
+ xo_field_info_t *fields = NULL;
+
+ /* Adjust XOEF_RETAIN based on global flags */
+ if (XOF_ISSET(xop, XOF_RETAIN_ALL))
+ flags |= XOEF_RETAIN;
+ if (XOF_ISSET(xop, XOF_RETAIN_NONE))
+ flags &= ~XOEF_RETAIN;
+
+ /*
+ * Check for 'retain' flag, telling us to retain the field
+ * information. If we've already saved it, then we can avoid
+ * re-parsing the format string.
+ */
+ if (!(flags & XOEF_RETAIN)
+ || xo_retain_find(fmt, &fields, &max_fields) != 0
+ || fields == NULL) {
+
+ /* Nothing retained; parse the format string */
+ max_fields = xo_count_fields(xop, fmt);
+ fields = alloca(max_fields * sizeof(fields[0]));
+ bzero(fields, max_fields * sizeof(fields[0]));
+
+ if (xo_parse_fields(xop, fields, max_fields, fmt))
+ return -1; /* Warning already displayed */
+
+ if (flags & XOEF_RETAIN) {
+ /* Retain the info */
+ xo_retain_add(fmt, fields, max_fields);
+ }
+ }
+
+ return xo_do_emit_fields(xop, fields, max_fields, fmt);
+}
+
+/*
+ * Rebuild a format string in a gettext-friendly format. This function
+ * is exposed to tools can perform this function. See xo(1).
+ */
+char *
+xo_simplify_format (xo_handle_t *xop, const char *fmt, int with_numbers,
+ xo_simplify_field_func_t field_cb)
+{
+ xop = xo_default(xop);
+
+ xop->xo_columns = 0; /* Always reset it */
+ xop->xo_errno = errno; /* Save for "%m" */
+
+ unsigned max_fields = xo_count_fields(xop, fmt);
+ xo_field_info_t fields[max_fields];
+
+ bzero(fields, max_fields * sizeof(fields[0]));
+
+ if (xo_parse_fields(xop, fields, max_fields, fmt))
+ return NULL; /* Warning already displayed */
+
+ xo_buffer_t xb;
+ xo_buf_init(&xb);
+
+ if (with_numbers)
+ xo_gettext_finish_numbering_fields(xop, fmt, fields);
+
+ if (xo_gettext_simplify_format(xop, &xb, fields, -1, fmt, field_cb))
+ return NULL;
+
+ return xb.xb_bufp;
+}
+
+int
+xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap)
+{
+ int rc;
+
+ xop = xo_default(xop);
+ va_copy(xop->xo_vap, vap);
+ rc = xo_do_emit(xop, 0, fmt);
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+
+ return rc;
+}
+
+int
+xo_emit_h (xo_handle_t *xop, const char *fmt, ...)
+{
+ int rc;
+
+ xop = xo_default(xop);
+ va_start(xop->xo_vap, fmt);
+ rc = xo_do_emit(xop, 0, fmt);
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+
+ return rc;
+}
+
+int
+xo_emit (const char *fmt, ...)
+{
+ xo_handle_t *xop = xo_default(NULL);
+ int rc;
+
+ va_start(xop->xo_vap, fmt);
+ rc = xo_do_emit(xop, 0, fmt);
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+
+ return rc;
+}
+
+int
+xo_emit_hvf (xo_handle_t *xop, xo_emit_flags_t flags,
+ const char *fmt, va_list vap)
+{
+ int rc;
+
+ xop = xo_default(xop);
+ va_copy(xop->xo_vap, vap);
+ rc = xo_do_emit(xop, flags, fmt);
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+
+ return rc;
+}
+
+int
+xo_emit_hf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...)
+{
+ int rc;
+
+ xop = xo_default(xop);
+ va_start(xop->xo_vap, fmt);
+ rc = xo_do_emit(xop, flags, fmt);
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+
+ return rc;
+}
+
+int
+xo_emit_f (xo_emit_flags_t flags, const char *fmt, ...)
+{
+ xo_handle_t *xop = xo_default(NULL);
+ int rc;
+
+ va_start(xop->xo_vap, fmt);
+ rc = xo_do_emit(xop, flags, fmt);
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+
+ return rc;
+}
+
+/*
+ * Emit a single field by providing the info information typically provided
+ * inside the field description (role, modifiers, and formats). This is
+ * a convenience function to avoid callers using snprintf to build field
+ * descriptions.
+ */
+int
+xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents,
+ const char *fmt, const char *efmt,
+ va_list vap)
+{
+ int rc;
+
+ xop = xo_default(xop);
+
+ if (rolmod == NULL)
+ rolmod = "V";
+
+ xo_field_info_t xfi;
+
+ bzero(&xfi, sizeof(xfi));
+
+ const char *cp;
+ cp = xo_parse_roles(xop, rolmod, rolmod, &xfi);
+ if (cp == NULL)
+ return -1;
+
+ xfi.xfi_start = fmt;
+ xfi.xfi_content = contents;
+ xfi.xfi_format = fmt;
+ xfi.xfi_encoding = efmt;
+ xfi.xfi_clen = contents ? strlen(contents) : 0;
+ xfi.xfi_flen = fmt ? strlen(fmt) : 0;
+ xfi.xfi_elen = efmt ? strlen(efmt) : 0;
+
+ /* If we have content, then we have a default format */
+ if (contents && fmt == NULL
+ && xo_role_wants_default_format(xfi.xfi_ftype)) {
+ xfi.xfi_format = xo_default_format;
+ xfi.xfi_flen = 2;
+ }
+
+
+
+ va_copy(xop->xo_vap, vap);
+
+ rc = xo_do_emit_fields(xop, &xfi, 1, fmt ?: contents ?: "field");
+
+ va_end(xop->xo_vap);
+
+ return rc;
+}
+
+int
+xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents,
+ const char *fmt, const char *efmt, ...)
+{
+ int rc;
+ va_list vap;
+
+ va_start(vap, efmt);
+ rc = xo_emit_field_hv(xop, rolmod, contents, fmt, efmt, vap);
+ va_end(vap);
+
+ return rc;
+}
+
+int
+xo_emit_field (const char *rolmod, const char *contents,
+ const char *fmt, const char *efmt, ...)
+{
+ int rc;
+ va_list vap;
+
+ va_start(vap, efmt);
+ rc = xo_emit_field_hv(NULL, rolmod, contents, fmt, efmt, vap);
+ va_end(vap);
+
+ return rc;
+}
+
+int
+xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap)
+{
+ const int extra = 5; /* space, equals, quote, quote, and nul */
+ xop = xo_default(xop);
+
+ int rc = 0;
+ int nlen = strlen(name);
+ xo_buffer_t *xbp = &xop->xo_attrs;
+ unsigned name_offset, value_offset;
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ if (!xo_buf_has_room(xbp, nlen + extra))
+ return -1;
+
+ *xbp->xb_curp++ = ' ';
+ memcpy(xbp->xb_curp, name, nlen);
+ xbp->xb_curp += nlen;
+ *xbp->xb_curp++ = '=';
+ *xbp->xb_curp++ = '"';
+
+ rc = xo_vsnprintf(xop, xbp, fmt, vap);
+
+ if (rc >= 0) {
+ rc = xo_escape_xml(xbp, rc, 1);
+ xbp->xb_curp += rc;
+ }
+
+ if (!xo_buf_has_room(xbp, 2))
+ return -1;
+
+ *xbp->xb_curp++ = '"';
+ *xbp->xb_curp = '\0';
+
+ rc += nlen + extra;
+ break;
+
+ case XO_STYLE_ENCODER:
+ name_offset = xo_buf_offset(xbp);
+ xo_buf_append(xbp, name, nlen);
+ xo_buf_append(xbp, "", 1);
+
+ value_offset = xo_buf_offset(xbp);
+ rc = xo_vsnprintf(xop, xbp, fmt, vap);
+ if (rc >= 0) {
+ xbp->xb_curp += rc;
+ *xbp->xb_curp = '\0';
+ rc = xo_encoder_handle(xop, XO_OP_ATTRIBUTE,
+ xo_buf_data(xbp, name_offset),
+ xo_buf_data(xbp, value_offset));
+ }
+ }
+
+ return rc;
+}
+
+int
+xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...)
+{
+ int rc;
+ va_list vap;
+
+ va_start(vap, fmt);
+ rc = xo_attr_hv(xop, name, fmt, vap);
+ va_end(vap);
+
+ return rc;
+}
+
+int
+xo_attr (const char *name, const char *fmt, ...)
+{
+ int rc;
+ va_list vap;
+
+ va_start(vap, fmt);
+ rc = xo_attr_hv(NULL, name, fmt, vap);
+ va_end(vap);
+
+ return rc;
+}
+
+static void
+xo_stack_set_flags (xo_handle_t *xop)
+{
+ if (XOF_ISSET(xop, XOF_NOT_FIRST)) {
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+
+ xsp->xs_flags |= XSF_NOT_FIRST;
+ XOF_CLEAR(xop, XOF_NOT_FIRST);
+ }
+}
+
+static void
+xo_depth_change (xo_handle_t *xop, const char *name,
+ int delta, int indent, xo_state_t state, xo_xsf_flags_t flags)
+{
+ if (xo_style(xop) == XO_STYLE_HTML || xo_style(xop) == XO_STYLE_TEXT)
+ indent = 0;
+
+ if (XOF_ISSET(xop, XOF_DTRT))
+ flags |= XSF_DTRT;
+
+ if (delta >= 0) { /* Push operation */
+ if (xo_depth_check(xop, xop->xo_depth + delta))
+ return;
+
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth + delta];
+ xsp->xs_flags = flags;
+ xsp->xs_state = state;
+ xo_stack_set_flags(xop);
+
+ if (name == NULL)
+ name = XO_FAILURE_NAME;
+
+ xsp->xs_name = xo_strndup(name, -1);
+
+ } else { /* Pop operation */
+ if (xop->xo_depth == 0) {
+ if (!XOF_ISSET(xop, XOF_IGNORE_CLOSE))
+ xo_failure(xop, "close with empty stack: '%s'", name);
+ return;
+ }
+
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+ if (XOF_ISSET(xop, XOF_WARN)) {
+ const char *top = xsp->xs_name;
+ if (top && strcmp(name, top) != 0) {
+ xo_failure(xop, "incorrect close: '%s' .vs. '%s'",
+ name, top);
+ return;
+ }
+ if ((xsp->xs_flags & XSF_LIST) != (flags & XSF_LIST)) {
+ xo_failure(xop, "list close on list confict: '%s'",
+ name);
+ return;
+ }
+ if ((xsp->xs_flags & XSF_INSTANCE) != (flags & XSF_INSTANCE)) {
+ xo_failure(xop, "list close on instance confict: '%s'",
+ name);
+ return;
+ }
+ }
+
+ if (xsp->xs_name) {
+ xo_free(xsp->xs_name);
+ xsp->xs_name = NULL;
+ }
+ if (xsp->xs_keys) {
+ xo_free(xsp->xs_keys);
+ xsp->xs_keys = NULL;
+ }
+ }
+
+ xop->xo_depth += delta; /* Record new depth */
+ xop->xo_indent += indent;
+}
+
+void
+xo_set_depth (xo_handle_t *xop, int depth)
+{
+ xop = xo_default(xop);
+
+ if (xo_depth_check(xop, depth))
+ return;
+
+ xop->xo_depth += depth;
+ xop->xo_indent += depth;
+}
+
+static xo_xsf_flags_t
+xo_stack_flags (unsigned xflags)
+{
+ if (xflags & XOF_DTRT)
+ return XSF_DTRT;
+ return 0;
+}
+
+static void
+xo_emit_top (xo_handle_t *xop, const char *ppn)
+{
+ xo_printf(xop, "%*s{%s", xo_indent(xop), "", ppn);
+ XOIF_SET(xop, XOIF_TOP_EMITTED);
+
+ if (xop->xo_version) {
+ xo_printf(xop, "%*s\"__version\": \"%s\", %s",
+ xo_indent(xop), "", xop->xo_version, ppn);
+ xo_free(xop->xo_version);
+ xop->xo_version = NULL;
+ }
+}
+
+static int
+xo_do_open_container (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
+{
+ int rc = 0;
+ const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ const char *pre_nl = "";
+
+ if (name == NULL) {
+ xo_failure(xop, "NULL passed for container name");
+ name = XO_FAILURE_NAME;
+ }
+
+ flags |= xop->xo_flags; /* Pick up handle flags */
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ rc = xo_printf(xop, "%*s<%s", xo_indent(xop), "", name);
+
+ if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) {
+ rc += xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp;
+ xo_data_append(xop, xop->xo_attrs.xb_bufp,
+ xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp);
+ xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp;
+ }
+
+ rc += xo_printf(xop, ">%s", ppn);
+ break;
+
+ case XO_STYLE_JSON:
+ xo_stack_set_flags(xop);
+
+ if (!XOF_ISSET(xop, XOF_NO_TOP)
+ && !XOIF_ISSET(xop, XOIF_TOP_EMITTED))
+ xo_emit_top(xop, ppn);
+
+ if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+
+ rc = xo_printf(xop, "%s%*s\"%s\": {%s",
+ pre_nl, xo_indent(xop), "", name, ppn);
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ break;
+
+ case XO_STYLE_ENCODER:
+ rc = xo_encoder_handle(xop, XO_OP_OPEN_CONTAINER, name, NULL);
+ break;
+ }
+
+ xo_depth_change(xop, name, 1, 1, XSS_OPEN_CONTAINER,
+ xo_stack_flags(flags));
+
+ return rc;
+}
+
+static int
+xo_open_container_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name)
+{
+ return xo_transition(xop, flags, name, XSS_OPEN_CONTAINER);
+}
+
+int
+xo_open_container_h (xo_handle_t *xop, const char *name)
+{
+ return xo_open_container_hf(xop, 0, name);
+}
+
+int
+xo_open_container (const char *name)
+{
+ return xo_open_container_hf(NULL, 0, name);
+}
+
+int
+xo_open_container_hd (xo_handle_t *xop, const char *name)
+{
+ return xo_open_container_hf(xop, XOF_DTRT, name);
+}
+
+int
+xo_open_container_d (const char *name)
+{
+ return xo_open_container_hf(NULL, XOF_DTRT, name);
+}
+
+static int
+xo_do_close_container (xo_handle_t *xop, const char *name)
+{
+ xop = xo_default(xop);
+
+ int rc = 0;
+ const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ const char *pre_nl = "";
+
+ if (name == NULL) {
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+
+ name = xsp->xs_name;
+ if (name) {
+ int len = strlen(name) + 1;
+ /* We need to make a local copy; xo_depth_change will free it */
+ char *cp = alloca(len);
+ memcpy(cp, name, len);
+ name = cp;
+ } else if (!(xsp->xs_flags & XSF_DTRT)) {
+ xo_failure(xop, "missing name without 'dtrt' mode");
+ name = XO_FAILURE_NAME;
+ }
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ xo_depth_change(xop, name, -1, -1, XSS_CLOSE_CONTAINER, 0);
+ rc = xo_printf(xop, "%*s</%s>%s", xo_indent(xop), "", name, ppn);
+ break;
+
+ case XO_STYLE_JSON:
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ ppn = (xop->xo_depth <= 1) ? "\n" : "";
+
+ xo_depth_change(xop, name, -1, -1, XSS_CLOSE_CONTAINER, 0);
+ rc = xo_printf(xop, "%s%*s}%s", pre_nl, xo_indent(xop), "", ppn);
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+ break;
+
+ case XO_STYLE_HTML:
+ case XO_STYLE_TEXT:
+ xo_depth_change(xop, name, -1, 0, XSS_CLOSE_CONTAINER, 0);
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ break;
+
+ case XO_STYLE_ENCODER:
+ xo_depth_change(xop, name, -1, 0, XSS_CLOSE_CONTAINER, 0);
+ rc = xo_encoder_handle(xop, XO_OP_CLOSE_CONTAINER, name, NULL);
+ break;
+ }
+
+ return rc;
+}
+
+int
+xo_close_container_h (xo_handle_t *xop, const char *name)
+{
+ return xo_transition(xop, 0, name, XSS_CLOSE_CONTAINER);
+}
+
+int
+xo_close_container (const char *name)
+{
+ return xo_close_container_h(NULL, name);
+}
+
+int
+xo_close_container_hd (xo_handle_t *xop)
+{
+ return xo_close_container_h(xop, NULL);
+}
+
+int
+xo_close_container_d (void)
+{
+ return xo_close_container_h(NULL, NULL);
+}
+
+static int
+xo_do_open_list (xo_handle_t *xop, xo_xsf_flags_t flags, const char *name)
+{
+ int rc = 0;
+ int indent = 0;
+
+ xop = xo_default(xop);
+
+ const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ const char *pre_nl = "";
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_JSON:
+
+ indent = 1;
+ if (!XOF_ISSET(xop, XOF_NO_TOP)
+ && !XOIF_ISSET(xop, XOIF_TOP_EMITTED))
+ xo_emit_top(xop, ppn);
+
+ if (name == NULL) {
+ xo_failure(xop, "NULL passed for list name");
+ name = XO_FAILURE_NAME;
+ }
+
+ xo_stack_set_flags(xop);
+
+ if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+
+ rc = xo_printf(xop, "%s%*s\"%s\": [%s",
+ pre_nl, xo_indent(xop), "", name, ppn);
+ break;
+
+ case XO_STYLE_ENCODER:
+ rc = xo_encoder_handle(xop, XO_OP_OPEN_LIST, name, NULL);
+ break;
+ }
+
+ xo_depth_change(xop, name, 1, indent, XSS_OPEN_LIST,
+ XSF_LIST | xo_stack_flags(flags));
+
+ return rc;
+}
+
+static int
+xo_open_list_hf (xo_handle_t *xop, xo_xsf_flags_t flags, const char *name)
+{
+ return xo_transition(xop, flags, name, XSS_OPEN_LIST);
+}
+
+int
+xo_open_list_h (xo_handle_t *xop, const char *name)
+{
+ return xo_open_list_hf(xop, 0, name);
+}
+
+int
+xo_open_list (const char *name)
+{
+ return xo_open_list_hf(NULL, 0, name);
+}
+
+int
+xo_open_list_hd (xo_handle_t *xop, const char *name)
+{
+ return xo_open_list_hf(xop, XOF_DTRT, name);
+}
+
+int
+xo_open_list_d (const char *name)
+{
+ return xo_open_list_hf(NULL, XOF_DTRT, name);
+}
+
+static int
+xo_do_close_list (xo_handle_t *xop, const char *name)
+{
+ int rc = 0;
+ const char *pre_nl = "";
+
+ if (name == NULL) {
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+
+ name = xsp->xs_name;
+ if (name) {
+ int len = strlen(name) + 1;
+ /* We need to make a local copy; xo_depth_change will free it */
+ char *cp = alloca(len);
+ memcpy(cp, name, len);
+ name = cp;
+ } else if (!(xsp->xs_flags & XSF_DTRT)) {
+ xo_failure(xop, "missing name without 'dtrt' mode");
+ name = XO_FAILURE_NAME;
+ }
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_JSON:
+ if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+
+ xo_depth_change(xop, name, -1, -1, XSS_CLOSE_LIST, XSF_LIST);
+ rc = xo_printf(xop, "%s%*s]", pre_nl, xo_indent(xop), "");
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+ break;
+
+ case XO_STYLE_ENCODER:
+ xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LIST, XSF_LIST);
+ rc = xo_encoder_handle(xop, XO_OP_CLOSE_LIST, name, NULL);
+ break;
+
+ default:
+ xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LIST, XSF_LIST);
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+ break;
+ }
+
+ return rc;
+}
+
+int
+xo_close_list_h (xo_handle_t *xop, const char *name)
+{
+ return xo_transition(xop, 0, name, XSS_CLOSE_LIST);
+}
+
+int
+xo_close_list (const char *name)
+{
+ return xo_close_list_h(NULL, name);
+}
+
+int
+xo_close_list_hd (xo_handle_t *xop)
+{
+ return xo_close_list_h(xop, NULL);
+}
+
+int
+xo_close_list_d (void)
+{
+ return xo_close_list_h(NULL, NULL);
+}
+
+static int
+xo_do_open_leaf_list (xo_handle_t *xop, xo_xsf_flags_t flags, const char *name)
+{
+ int rc = 0;
+ int indent = 0;
+
+ xop = xo_default(xop);
+
+ const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ const char *pre_nl = "";
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_JSON:
+ indent = 1;
+
+ if (!XOF_ISSET(xop, XOF_NO_TOP)) {
+ if (!XOIF_ISSET(xop, XOIF_TOP_EMITTED)) {
+ xo_printf(xop, "%*s{%s", xo_indent(xop), "", ppn);
+ XOIF_SET(xop, XOIF_TOP_EMITTED);
+ }
+ }
+
+ if (name == NULL) {
+ xo_failure(xop, "NULL passed for list name");
+ name = XO_FAILURE_NAME;
+ }
+
+ xo_stack_set_flags(xop);
+
+ if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+
+ rc = xo_printf(xop, "%s%*s\"%s\": [%s",
+ pre_nl, xo_indent(xop), "", name, ppn);
+ break;
+
+ case XO_STYLE_ENCODER:
+ rc = xo_encoder_handle(xop, XO_OP_OPEN_LEAF_LIST, name, NULL);
+ break;
+ }
+
+ xo_depth_change(xop, name, 1, indent, XSS_OPEN_LEAF_LIST,
+ XSF_LIST | xo_stack_flags(flags));
+
+ return rc;
+}
+
+static int
+xo_do_close_leaf_list (xo_handle_t *xop, const char *name)
+{
+ int rc = 0;
+ const char *pre_nl = "";
+
+ if (name == NULL) {
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+
+ name = xsp->xs_name;
+ if (name) {
+ int len = strlen(name) + 1;
+ /* We need to make a local copy; xo_depth_change will free it */
+ char *cp = alloca(len);
+ memcpy(cp, name, len);
+ name = cp;
+ } else if (!(xsp->xs_flags & XSF_DTRT)) {
+ xo_failure(xop, "missing name without 'dtrt' mode");
+ name = XO_FAILURE_NAME;
+ }
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_JSON:
+ if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+
+ xo_depth_change(xop, name, -1, -1, XSS_CLOSE_LEAF_LIST, XSF_LIST);
+ rc = xo_printf(xop, "%s%*s]", pre_nl, xo_indent(xop), "");
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+ break;
+
+ case XO_STYLE_ENCODER:
+ rc = xo_encoder_handle(xop, XO_OP_CLOSE_LEAF_LIST, name, NULL);
+ /* FALLTHRU */
+
+ default:
+ xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LEAF_LIST, XSF_LIST);
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+ break;
+ }
+
+ return rc;
+}
+
+static int
+xo_do_open_instance (xo_handle_t *xop, xo_xsf_flags_t flags, const char *name)
+{
+ xop = xo_default(xop);
+
+ int rc = 0;
+ const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ const char *pre_nl = "";
+
+ flags |= xop->xo_flags;
+
+ if (name == NULL) {
+ xo_failure(xop, "NULL passed for instance name");
+ name = XO_FAILURE_NAME;
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ rc = xo_printf(xop, "%*s<%s", xo_indent(xop), "", name);
+
+ if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) {
+ rc += xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp;
+ xo_data_append(xop, xop->xo_attrs.xb_bufp,
+ xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp);
+ xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp;
+ }
+
+ rc += xo_printf(xop, ">%s", ppn);
+ break;
+
+ case XO_STYLE_JSON:
+ xo_stack_set_flags(xop);
+
+ if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST)
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", ";
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+
+ rc = xo_printf(xop, "%s%*s{%s",
+ pre_nl, xo_indent(xop), "", ppn);
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ break;
+
+ case XO_STYLE_ENCODER:
+ rc = xo_encoder_handle(xop, XO_OP_OPEN_INSTANCE, name, NULL);
+ break;
+ }
+
+ xo_depth_change(xop, name, 1, 1, XSS_OPEN_INSTANCE, xo_stack_flags(flags));
+
+ return rc;
+}
+
+static int
+xo_open_instance_hf (xo_handle_t *xop, xo_xsf_flags_t flags, const char *name)
+{
+ return xo_transition(xop, flags, name, XSS_OPEN_INSTANCE);
+}
+
+int
+xo_open_instance_h (xo_handle_t *xop, const char *name)
+{
+ return xo_open_instance_hf(xop, 0, name);
+}
+
+int
+xo_open_instance (const char *name)
+{
+ return xo_open_instance_hf(NULL, 0, name);
+}
+
+int
+xo_open_instance_hd (xo_handle_t *xop, const char *name)
+{
+ return xo_open_instance_hf(xop, XOF_DTRT, name);
+}
+
+int
+xo_open_instance_d (const char *name)
+{
+ return xo_open_instance_hf(NULL, XOF_DTRT, name);
+}
+
+static int
+xo_do_close_instance (xo_handle_t *xop, const char *name)
+{
+ xop = xo_default(xop);
+
+ int rc = 0;
+ const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+ const char *pre_nl = "";
+
+ if (name == NULL) {
+ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth];
+
+ name = xsp->xs_name;
+ if (name) {
+ int len = strlen(name) + 1;
+ /* We need to make a local copy; xo_depth_change will free it */
+ char *cp = alloca(len);
+ memcpy(cp, name, len);
+ name = cp;
+ } else if (!(xsp->xs_flags & XSF_DTRT)) {
+ xo_failure(xop, "missing name without 'dtrt' mode");
+ name = XO_FAILURE_NAME;
+ }
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ xo_depth_change(xop, name, -1, -1, XSS_CLOSE_INSTANCE, 0);
+ rc = xo_printf(xop, "%*s</%s>%s", xo_indent(xop), "", name, ppn);
+ break;
+
+ case XO_STYLE_JSON:
+ pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : "";
+
+ xo_depth_change(xop, name, -1, -1, XSS_CLOSE_INSTANCE, 0);
+ rc = xo_printf(xop, "%s%*s}", pre_nl, xo_indent(xop), "");
+ xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST;
+ break;
+
+ case XO_STYLE_HTML:
+ case XO_STYLE_TEXT:
+ xo_depth_change(xop, name, -1, 0, XSS_CLOSE_INSTANCE, 0);
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ break;
+
+ case XO_STYLE_ENCODER:
+ xo_depth_change(xop, name, -1, 0, XSS_CLOSE_INSTANCE, 0);
+ rc = xo_encoder_handle(xop, XO_OP_CLOSE_INSTANCE, name, NULL);
+ break;
+ }
+
+ return rc;
+}
+
+int
+xo_close_instance_h (xo_handle_t *xop, const char *name)
+{
+ return xo_transition(xop, 0, name, XSS_CLOSE_INSTANCE);
+}
+
+int
+xo_close_instance (const char *name)
+{
+ return xo_close_instance_h(NULL, name);
+}
+
+int
+xo_close_instance_hd (xo_handle_t *xop)
+{
+ return xo_close_instance_h(xop, NULL);
+}
+
+int
+xo_close_instance_d (void)
+{
+ return xo_close_instance_h(NULL, NULL);
+}
+
+static int
+xo_do_close_all (xo_handle_t *xop, xo_stack_t *limit)
+{
+ xo_stack_t *xsp;
+ int rc = 0;
+ xo_xsf_flags_t flags;
+
+ for (xsp = &xop->xo_stack[xop->xo_depth]; xsp >= limit; xsp--) {
+ switch (xsp->xs_state) {
+ case XSS_INIT:
+ /* Nothing */
+ rc = 0;
+ break;
+
+ case XSS_OPEN_CONTAINER:
+ rc = xo_do_close_container(xop, NULL);
+ break;
+
+ case XSS_OPEN_LIST:
+ rc = xo_do_close_list(xop, NULL);
+ break;
+
+ case XSS_OPEN_INSTANCE:
+ rc = xo_do_close_instance(xop, NULL);
+ break;
+
+ case XSS_OPEN_LEAF_LIST:
+ rc = xo_do_close_leaf_list(xop, NULL);
+ break;
+
+ case XSS_MARKER:
+ flags = xsp->xs_flags & XSF_MARKER_FLAGS;
+ xo_depth_change(xop, xsp->xs_name, -1, 0, XSS_MARKER, 0);
+ xop->xo_stack[xop->xo_depth].xs_flags |= flags;
+ rc = 0;
+ break;
+ }
+
+ if (rc < 0)
+ xo_failure(xop, "close %d failed: %d", xsp->xs_state, rc);
+ }
+
+ return 0;
+}
+
+/*
+ * This function is responsible for clearing out whatever is needed
+ * to get to the desired state, if possible.
+ */
+static int
+xo_do_close (xo_handle_t *xop, const char *name, xo_state_t new_state)
+{
+ xo_stack_t *xsp, *limit = NULL;
+ int rc;
+ xo_state_t need_state = new_state;
+
+ if (new_state == XSS_CLOSE_CONTAINER)
+ need_state = XSS_OPEN_CONTAINER;
+ else if (new_state == XSS_CLOSE_LIST)
+ need_state = XSS_OPEN_LIST;
+ else if (new_state == XSS_CLOSE_INSTANCE)
+ need_state = XSS_OPEN_INSTANCE;
+ else if (new_state == XSS_CLOSE_LEAF_LIST)
+ need_state = XSS_OPEN_LEAF_LIST;
+ else if (new_state == XSS_MARKER)
+ need_state = XSS_MARKER;
+ else
+ return 0; /* Unknown or useless new states are ignored */
+
+ for (xsp = &xop->xo_stack[xop->xo_depth]; xsp > xop->xo_stack; xsp--) {
+ /*
+ * Marker's normally stop us from going any further, unless
+ * we are popping a marker (new_state == XSS_MARKER).
+ */
+ if (xsp->xs_state == XSS_MARKER && need_state != XSS_MARKER) {
+ if (name) {
+ xo_failure(xop, "close (xo_%s) fails at marker '%s'; "
+ "not found '%s'",
+ xo_state_name(new_state),
+ xsp->xs_name, name);
+ return 0;
+
+ } else {
+ limit = xsp;
+ xo_failure(xop, "close stops at marker '%s'", xsp->xs_name);
+ }
+ break;
+ }
+
+ if (xsp->xs_state != need_state)
+ continue;
+
+ if (name && xsp->xs_name && strcmp(name, xsp->xs_name) != 0)
+ continue;
+
+ limit = xsp;
+ break;
+ }
+
+ if (limit == NULL) {
+ xo_failure(xop, "xo_%s can't find match for '%s'",
+ xo_state_name(new_state), name);
+ return 0;
+ }
+
+ rc = xo_do_close_all(xop, limit);
+
+ return rc;
+}
+
+/*
+ * We are in a given state and need to transition to the new state.
+ */
+static int
+xo_transition (xo_handle_t *xop, xo_xsf_flags_t flags, const char *name,
+ xo_state_t new_state)
+{
+ xo_stack_t *xsp;
+ int rc;
+ int old_state, on_marker;
+
+ xop = xo_default(xop);
+
+ rc = 0;
+ xsp = &xop->xo_stack[xop->xo_depth];
+ old_state = xsp->xs_state;
+ on_marker = (old_state == XSS_MARKER);
+
+ /* If there's a marker on top of the stack, we need to find a real state */
+ while (old_state == XSS_MARKER) {
+ if (xsp == xop->xo_stack)
+ break;
+ xsp -= 1;
+ old_state = xsp->xs_state;
+ }
+
+ /*
+ * At this point, the list of possible states are:
+ * XSS_INIT, XSS_OPEN_CONTAINER, XSS_OPEN_LIST,
+ * XSS_OPEN_INSTANCE, XSS_OPEN_LEAF_LIST, XSS_DISCARDING
+ */
+ switch (XSS_TRANSITION(old_state, new_state)) {
+
+ open_container:
+ case XSS_TRANSITION(XSS_INIT, XSS_OPEN_CONTAINER):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_CONTAINER):
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_CONTAINER):
+ rc = xo_do_open_container(xop, flags, name);
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_CONTAINER):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_list(xop, NULL);
+ if (rc >= 0)
+ goto open_container;
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_CONTAINER):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_leaf_list(xop, NULL);
+ if (rc >= 0)
+ goto open_container;
+ break;
+
+ /*close_container:*/
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_CONTAINER):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_CONTAINER):
+ /* This is an exception for "xo --close" */
+ rc = xo_do_close_container(xop, name);
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_CONTAINER):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_CONTAINER):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_CONTAINER):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_leaf_list(xop, NULL);
+ if (rc >= 0)
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ open_list:
+ case XSS_TRANSITION(XSS_INIT, XSS_OPEN_LIST):
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_LIST):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_LIST):
+ rc = xo_do_open_list(xop, flags, name);
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_LIST):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_list(xop, NULL);
+ if (rc >= 0)
+ goto open_list;
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_LIST):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_leaf_list(xop, NULL);
+ if (rc >= 0)
+ goto open_list;
+ break;
+
+ /*close_list:*/
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_LIST):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_LIST):
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_LIST):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_LIST):
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_LIST):
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ open_instance:
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_INSTANCE):
+ rc = xo_do_open_instance(xop, flags, name);
+ break;
+
+ case XSS_TRANSITION(XSS_INIT, XSS_OPEN_INSTANCE):
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_INSTANCE):
+ rc = xo_do_open_list(xop, flags, name);
+ if (rc >= 0)
+ goto open_instance;
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_INSTANCE):
+ if (on_marker) {
+ rc = xo_do_open_list(xop, flags, name);
+ } else {
+ rc = xo_do_close_instance(xop, NULL);
+ }
+ if (rc >= 0)
+ goto open_instance;
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_INSTANCE):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_leaf_list(xop, NULL);
+ if (rc >= 0)
+ goto open_instance;
+ break;
+
+ /*close_instance:*/
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_INSTANCE):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_instance(xop, name);
+ break;
+
+ case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_INSTANCE):
+ /* This one makes no sense; ignore it */
+ xo_failure(xop, "xo_close_instance ignored when called from "
+ "initial state ('%s')", name ?: "(unknown)");
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_INSTANCE):
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_INSTANCE):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_INSTANCE):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_leaf_list(xop, NULL);
+ if (rc >= 0)
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ open_leaf_list:
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_LEAF_LIST):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_LEAF_LIST):
+ case XSS_TRANSITION(XSS_INIT, XSS_OPEN_LEAF_LIST):
+ rc = xo_do_open_leaf_list(xop, flags, name);
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_LEAF_LIST):
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_LEAF_LIST):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_list(xop, NULL);
+ if (rc >= 0)
+ goto open_leaf_list;
+ break;
+
+ /*close_leaf_list:*/
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_LEAF_LIST):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_leaf_list(xop, name);
+ break;
+
+ case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_LEAF_LIST):
+ /* Makes no sense; ignore */
+ xo_failure(xop, "xo_close_leaf_list ignored when called from "
+ "initial state ('%s')", name ?: "(unknown)");
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_LEAF_LIST):
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_LEAF_LIST):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_LEAF_LIST):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close(xop, name, new_state);
+ break;
+
+ /*emit:*/
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_EMIT):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_EMIT):
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_EMIT):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close(xop, NULL, XSS_CLOSE_LIST);
+ break;
+
+ case XSS_TRANSITION(XSS_INIT, XSS_EMIT):
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_EMIT):
+ if (on_marker)
+ goto marker_prevents_close;
+ rc = xo_do_close_leaf_list(xop, NULL);
+ break;
+
+ /*emit_leaf_list:*/
+ case XSS_TRANSITION(XSS_INIT, XSS_EMIT_LEAF_LIST):
+ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_EMIT_LEAF_LIST):
+ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_EMIT_LEAF_LIST):
+ rc = xo_do_open_leaf_list(xop, flags, name);
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_EMIT_LEAF_LIST):
+ break;
+
+ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_EMIT_LEAF_LIST):
+ /*
+ * We need to be backward compatible with the pre-xo_open_leaf_list
+ * API, where both lists and leaf-lists were opened as lists. So
+ * if we find an open list that hasn't had anything written to it,
+ * we'll accept it.
+ */
+ break;
+
+ default:
+ xo_failure(xop, "unknown transition: (%u -> %u)",
+ xsp->xs_state, new_state);
+ }
+
+ /* Handle the flush flag */
+ if (rc >= 0 && XOF_ISSET(xop, XOF_FLUSH))
+ if (xo_flush_h(xop))
+ rc = -1;
+
+ return rc;
+
+ marker_prevents_close:
+ xo_failure(xop, "marker '%s' prevents transition from %s to %s",
+ xop->xo_stack[xop->xo_depth].xs_name,
+ xo_state_name(old_state), xo_state_name(new_state));
+ return -1;
+}
+
+int
+xo_open_marker_h (xo_handle_t *xop, const char *name)
+{
+ xop = xo_default(xop);
+
+ xo_depth_change(xop, name, 1, 0, XSS_MARKER,
+ xop->xo_stack[xop->xo_depth].xs_flags & XSF_MARKER_FLAGS);
+
+ return 0;
+}
+
+int
+xo_open_marker (const char *name)
+{
+ return xo_open_marker_h(NULL, name);
+}
+
+int
+xo_close_marker_h (xo_handle_t *xop, const char *name)
+{
+ xop = xo_default(xop);
+
+ return xo_do_close(xop, name, XSS_MARKER);
+}
+
+int
+xo_close_marker (const char *name)
+{
+ return xo_close_marker_h(NULL, name);
+}
+
+/*
+ * Record custom output functions into the xo handle, allowing
+ * integration with a variety of output frameworks.
+ */
+void
+xo_set_writer (xo_handle_t *xop, void *opaque, xo_write_func_t write_func,
+ xo_close_func_t close_func, xo_flush_func_t flush_func)
+{
+ xop = xo_default(xop);
+
+ xop->xo_opaque = opaque;
+ xop->xo_write = write_func;
+ xop->xo_close = close_func;
+ xop->xo_flush = flush_func;
+}
+
+void
+xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func)
+{
+ xo_realloc = realloc_func;
+ xo_free = free_func;
+}
+
+int
+xo_flush_h (xo_handle_t *xop)
+{
+ int rc;
+
+ xop = xo_default(xop);
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_ENCODER:
+ xo_encoder_handle(xop, XO_OP_FLUSH, NULL, NULL);
+ }
+
+ rc = xo_write(xop);
+ if (rc >= 0 && xop->xo_flush)
+ if (xop->xo_flush(xop->xo_opaque) < 0)
+ return -1;
+
+ return rc;
+}
+
+int
+xo_flush (void)
+{
+ return xo_flush_h(NULL);
+}
+
+int
+xo_finish_h (xo_handle_t *xop)
+{
+ const char *cp = "";
+ xop = xo_default(xop);
+
+ if (!XOF_ISSET(xop, XOF_NO_CLOSE))
+ xo_do_close_all(xop, xop->xo_stack);
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_JSON:
+ if (!XOF_ISSET(xop, XOF_NO_TOP)) {
+ if (XOIF_ISSET(xop, XOIF_TOP_EMITTED))
+ XOIF_CLEAR(xop, XOIF_TOP_EMITTED); /* Turn off before output */
+ else
+ cp = "{ ";
+ xo_printf(xop, "%*s%s}\n",xo_indent(xop), "", cp);
+ }
+ break;
+
+ case XO_STYLE_ENCODER:
+ xo_encoder_handle(xop, XO_OP_FINISH, NULL, NULL);
+ break;
+ }
+
+ return xo_flush_h(xop);
+}
+
+int
+xo_finish (void)
+{
+ return xo_finish_h(NULL);
+}
+
+/*
+ * xo_finish_atexit is suitable for atexit() calls, to force clear up
+ * and finalizing output.
+ */
+void
+xo_finish_atexit (void)
+{
+ (void) xo_finish_h(NULL);
+}
+
+/*
+ * Generate an error message, such as would be displayed on stderr
+ */
+void
+xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap)
+{
+ xop = xo_default(xop);
+
+ /*
+ * If the format string doesn't end with a newline, we pop
+ * one on ourselves.
+ */
+ int len = strlen(fmt);
+ if (len > 0 && fmt[len - 1] != '\n') {
+ char *newfmt = alloca(len + 2);
+ memcpy(newfmt, fmt, len);
+ newfmt[len] = '\n';
+ newfmt[len] = '\0';
+ fmt = newfmt;
+ }
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_TEXT:
+ vfprintf(stderr, fmt, vap);
+ break;
+
+ case XO_STYLE_HTML:
+ va_copy(xop->xo_vap, vap);
+
+ xo_buf_append_div(xop, "error", 0, NULL, 0, fmt, strlen(fmt), NULL, 0);
+
+ if (XOIF_ISSET(xop, XOIF_DIV_OPEN))
+ xo_line_close(xop);
+
+ xo_write(xop);
+
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+ break;
+
+ case XO_STYLE_XML:
+ case XO_STYLE_JSON:
+ va_copy(xop->xo_vap, vap);
+
+ xo_open_container_h(xop, "error");
+ xo_format_value(xop, "message", 7, fmt, strlen(fmt), NULL, 0, 0);
+ xo_close_container_h(xop, "error");
+
+ va_end(xop->xo_vap);
+ bzero(&xop->xo_vap, sizeof(xop->xo_vap));
+ break;
+
+ case XO_STYLE_SDPARAMS:
+ case XO_STYLE_ENCODER:
+ break;
+ }
+}
+
+void
+xo_error_h (xo_handle_t *xop, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_error_hv(xop, fmt, vap);
+ va_end(vap);
+}
+
+/*
+ * Generate an error message, such as would be displayed on stderr
+ */
+void
+xo_error (const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_error_hv(NULL, fmt, vap);
+ va_end(vap);
+}
+
+/*
+ * Parse any libxo-specific options from the command line, removing them
+ * so the main() argument parsing won't see them. We return the new value
+ * for argc or -1 for error. If an error occurred, the program should
+ * exit. A suitable error message has already been displayed.
+ */
+int
+xo_parse_args (int argc, char **argv)
+{
+ static char libxo_opt[] = "--libxo";
+ char *cp;
+ int i, save;
+
+ /* Save our program name for xo_err and friends */
+ xo_program = argv[0];
+ cp = strrchr(xo_program, '/');
+ if (cp)
+ xo_program = cp + 1;
+
+ for (save = i = 1; i < argc; i++) {
+ if (argv[i] == NULL
+ || strncmp(argv[i], libxo_opt, sizeof(libxo_opt) - 1) != 0) {
+ if (save != i)
+ argv[save] = argv[i];
+ save += 1;
+ continue;
+ }
+
+ cp = argv[i] + sizeof(libxo_opt) - 1;
+ if (*cp == '\0') {
+ cp = argv[++i];
+ if (cp == NULL) {
+ xo_warnx("missing libxo option");
+ return -1;
+ }
+
+ if (xo_set_options(NULL, cp) < 0)
+ return -1;
+ } else if (*cp == ':') {
+ if (xo_set_options(NULL, cp) < 0)
+ return -1;
+
+ } else if (*cp == '=') {
+ if (xo_set_options(NULL, ++cp) < 0)
+ return -1;
+
+ } else if (*cp == '-') {
+ cp += 1;
+ if (strcmp(cp, "check") == 0) {
+ exit(XO_HAS_LIBXO);
+
+ } else {
+ xo_warnx("unknown libxo option: '%s'", argv[i]);
+ return -1;
+ }
+ } else {
+ xo_warnx("unknown libxo option: '%s'", argv[i]);
+ return -1;
+ }
+ }
+
+ argv[save] = NULL;
+ return save;
+}
+
+/*
+ * Debugging function that dumps the current stack of open libxo constructs,
+ * suitable for calling from the debugger.
+ */
+void
+xo_dump_stack (xo_handle_t *xop)
+{
+ int i;
+ xo_stack_t *xsp;
+
+ xop = xo_default(xop);
+
+ fprintf(stderr, "Stack dump:\n");
+
+ xsp = xop->xo_stack;
+ for (i = 1, xsp++; i <= xop->xo_depth; i++, xsp++) {
+ fprintf(stderr, " [%d] %s '%s' [%x]\n",
+ i, xo_state_name(xsp->xs_state),
+ xsp->xs_name ?: "--", xsp->xs_flags);
+ }
+}
+
+/*
+ * Record the program name used for error messages
+ */
+void
+xo_set_program (const char *name)
+{
+ xo_program = name;
+}
+
+void
+xo_set_version_h (xo_handle_t *xop, const char *version)
+{
+ xop = xo_default(xop);
+
+ if (version == NULL || strchr(version, '"') != NULL)
+ return;
+
+ if (!xo_style_is_encoding(xop))
+ return;
+
+ switch (xo_style(xop)) {
+ case XO_STYLE_XML:
+ /* For XML, we record this as an attribute for the first tag */
+ xo_attr_h(xop, "__version", "%s", version);
+ break;
+
+ case XO_STYLE_JSON:
+ /*
+ * For JSON, we record the version string in our handle, and emit
+ * it in xo_emit_top.
+ */
+ xop->xo_version = xo_strndup(version, -1);
+ break;
+
+ case XO_STYLE_ENCODER:
+ xo_encoder_handle(xop, XO_OP_VERSION, NULL, version);
+ break;
+ }
+}
+
+/*
+ * Set the version number for the API content being carried through
+ * the xo handle.
+ */
+void
+xo_set_version (const char *version)
+{
+ xo_set_version_h(NULL, version);
+}
+
+/*
+ * Generate a warning. Normally, this is a text message written to
+ * standard error. If the XOF_WARN_XML flag is set, then we generate
+ * XMLified content on standard output.
+ */
+void
+xo_emit_warn_hcv (xo_handle_t *xop, int as_warning, int code,
+ const char *fmt, va_list vap)
+{
+ xop = xo_default(xop);
+
+ if (fmt == NULL)
+ return;
+
+ xo_open_marker_h(xop, "xo_emit_warn_hcv");
+ xo_open_container_h(xop, as_warning ? "__warning" : "__error");
+
+ if (xo_program)
+ xo_emit("{wc:program}", xo_program);
+
+ if (xo_style(xop) == XO_STYLE_XML || xo_style(xop) == XO_STYLE_JSON) {
+ va_list ap;
+ xo_handle_t temp;
+
+ bzero(&temp, sizeof(temp));
+ temp.xo_style = XO_STYLE_TEXT;
+ xo_buf_init(&temp.xo_data);
+ xo_depth_check(&temp, XO_DEPTH);
+
+ va_copy(ap, vap);
+ (void) xo_emit_hv(&temp, fmt, ap);
+ va_end(ap);
+
+ xo_buffer_t *src = &temp.xo_data;
+ xo_format_value(xop, "message", 7, src->xb_bufp,
+ src->xb_curp - src->xb_bufp, NULL, 0, 0);
+
+ xo_free(temp.xo_stack);
+ xo_buf_cleanup(src);
+ }
+
+ (void) xo_emit_hv(xop, fmt, vap);
+
+ int len = strlen(fmt);
+ if (len > 0 && fmt[len - 1] != '\n') {
+ if (code > 0) {
+ const char *msg = strerror(code);
+ if (msg)
+ xo_emit_h(xop, ": {G:strerror}{g:error/%s}", msg);
+ }
+ xo_emit("\n");
+ }
+
+ xo_close_marker_h(xop, "xo_emit_warn_hcv");
+ xo_flush_h(xop);
+}
+
+void
+xo_emit_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(xop, 1, code, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_emit_warn_c (int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_emit_warn (const char *fmt, ...)
+{
+ int code = errno;
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_emit_warnx (const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 1, -1, fmt, vap);
+ va_end(vap);
+}
+
+void
+xo_emit_err_v (int eval, int code, const char *fmt, va_list vap)
+{
+ xo_emit_warn_hcv(NULL, 0, code, fmt, vap);
+ xo_finish();
+ exit(eval);
+}
+
+void
+xo_emit_err (int eval, const char *fmt, ...)
+{
+ int code = errno;
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_err_v(0, code, fmt, vap);
+ va_end(vap);
+ exit(eval);
+}
+
+void
+xo_emit_errx (int eval, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_emit_err_v(0, -1, fmt, vap);
+ va_end(vap);
+ xo_finish();
+ exit(eval);
+}
+
+void
+xo_emit_errc (int eval, int code, const char *fmt, ...)
+{
+ va_list vap;
+
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 0, code, fmt, vap);
+ va_end(vap);
+ xo_finish();
+ exit(eval);
+}
+
+/*
+ * Get the opaque private pointer for an xo handle
+ */
+void *
+xo_get_private (xo_handle_t *xop)
+{
+ xop = xo_default(xop);
+ return xop->xo_private;
+}
+
+/*
+ * Set the opaque private pointer for an xo handle.
+ */
+void
+xo_set_private (xo_handle_t *xop, void *opaque)
+{
+ xop = xo_default(xop);
+ xop->xo_private = opaque;
+}
+
+/*
+ * Get the encoder function
+ */
+xo_encoder_func_t
+xo_get_encoder (xo_handle_t *xop)
+{
+ xop = xo_default(xop);
+ return xop->xo_encoder;
+}
+
+/*
+ * Record an encoder callback function in an xo handle.
+ */
+void
+xo_set_encoder (xo_handle_t *xop, xo_encoder_func_t encoder)
+{
+ xop = xo_default(xop);
+
+ xop->xo_style = XO_STYLE_ENCODER;
+ xop->xo_encoder = encoder;
+}
diff --git a/freebsd/contrib/libxo/libxo/xo.h b/freebsd/contrib/libxo/libxo/xo.h
new file mode 100644
index 00000000..310b21ca
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/xo.h
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2014-2015, Juniper Networks, Inc.
+ * All rights reserved.
+ * This SOFTWARE is licensed under the LICENSE provided in the
+ * ../Copyright file. By downloading, installing, copying, or otherwise
+ * using the SOFTWARE, you agree to be bound by the terms of that
+ * LICENSE.
+ * Phil Shafer, July 2014
+ */
+
+/**
+ * libxo provides a means of generating text, XML, JSON, and HTML output
+ * using a single set of function calls, maximizing the value of output
+ * while minimizing the cost/impact on the code.
+ *
+ * Full documentation is available in ./doc/libxo.txt or online at:
+ * http://juniper.github.io/libxo/libxo-manual.html
+ */
+
+#ifndef INCLUDE_XO_H
+#define INCLUDE_XO_H
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#ifdef __dead2
+#define NORETURN __dead2
+#else
+#define NORETURN
+#endif /* __dead2 */
+
+/*
+ * Normally we'd use the HAVE_PRINTFLIKE define triggered by the
+ * --enable-printflike option to configure, but we don't install
+ * our internal "xoconfig.h", and I'd rather not. Taking the
+ * coward's path, we'll turn it on inside a #if that allows
+ * others to turn it off where needed. Not ideal, but functional.
+ */
+#if !defined(NO_PRINTFLIKE) && !defined(__linux__)
+#define PRINTFLIKE(_x, _y) __printflike(_x, _y)
+#else
+#define PRINTFLIKE(_x, _y)
+#endif /* NO_PRINTFLIKE */
+
+/** Formatting types */
+typedef unsigned short xo_style_t;
+#define XO_STYLE_TEXT 0 /** Generate text output */
+#define XO_STYLE_XML 1 /** Generate XML output */
+#define XO_STYLE_JSON 2 /** Generate JSON output */
+#define XO_STYLE_HTML 3 /** Generate HTML output */
+#define XO_STYLE_SDPARAMS 4 /* Generate syslog structured data params */
+#define XO_STYLE_ENCODER 5 /* Generate calls to external encoder */
+
+/** Flags for libxo */
+typedef unsigned long long xo_xof_flags_t;
+#define XOF_BIT(_n) ((xo_xof_flags_t) 1 << (_n))
+#define XOF_CLOSE_FP XOF_BIT(0) /** Close file pointer on xo_close() */
+#define XOF_PRETTY XOF_BIT(1) /** Make 'pretty printed' output */
+#define XOF_LOG_SYSLOG XOF_BIT(2) /** Log (on stderr) our syslog content */
+#define XOF_RESV3 XOF_BIT(3) /* Unused */
+
+#define XOF_WARN XOF_BIT(4) /** Generate warnings for broken calls */
+#define XOF_XPATH XOF_BIT(5) /** Emit XPath attributes in HTML */
+#define XOF_INFO XOF_BIT(6) /** Emit additional info fields (HTML) */
+#define XOF_WARN_XML XOF_BIT(7) /** Emit warnings in XML (on stdout) */
+
+#define XOF_NO_ENV XOF_BIT(8) /** Don't look at LIBXO_OPTIONS env var */
+#define XOF_NO_VA_ARG XOF_BIT(9) /** Don't advance va_list w/ va_arg() */
+#define XOF_DTRT XOF_BIT(10) /** Enable "do the right thing" mode */
+#define XOF_KEYS XOF_BIT(11) /** Flag 'key' fields for xml and json */
+
+#define XOF_IGNORE_CLOSE XOF_BIT(12) /** Ignore errors on close tags */
+#define XOF_NOT_FIRST XOF_BIT(13) /* Not the first item (JSON) */
+#define XOF_NO_LOCALE XOF_BIT(14) /** Don't bother with locale */
+#define XOF_RESV15 XOF_BIT(15) /* Unused */
+
+#define XOF_NO_TOP XOF_BIT(16) /** Don't emit the top braces in JSON */
+#define XOF_RESV17 XOF_BIT(17) /* Unused */
+#define XOF_UNITS XOF_BIT(18) /** Encode units in XML */
+#define XOF_RESV19 XOF_BIT(19) /* Unused */
+
+#define XOF_UNDERSCORES XOF_BIT(20) /** Replace dashes with underscores (JSON)*/
+#define XOF_COLUMNS XOF_BIT(21) /** xo_emit should return a column count */
+#define XOF_FLUSH XOF_BIT(22) /** Flush after each xo_emit call */
+#define XOF_FLUSH_LINE XOF_BIT(23) /** Flush after each newline */
+
+#define XOF_NO_CLOSE XOF_BIT(24) /** xo_finish won't close open elements */
+#define XOF_COLOR_ALLOWED XOF_BIT(25) /** Allow color/effects to be enabled */
+#define XOF_COLOR XOF_BIT(26) /** Enable color and effects */
+#define XOF_NO_HUMANIZE XOF_BIT(27) /** Block the {h:} modifier */
+
+#define XOF_LOG_GETTEXT XOF_BIT(28) /** Log (stderr) gettext lookup strings */
+#define XOF_UTF8 XOF_BIT(29) /** Force text output to be UTF8 */
+#define XOF_RETAIN_ALL XOF_BIT(30) /** Force use of XOEF_RETAIN */
+#define XOF_RETAIN_NONE XOF_BIT(31) /** Prevent use of XOEF_RETAIN */
+
+typedef unsigned xo_emit_flags_t; /* Flags to xo_emit() and friends */
+#define XOEF_RETAIN (1<<0) /* Retain parsed formatting information */
+
+/*
+ * The xo_info_t structure provides a mapping between names and
+ * additional data emitted via HTML.
+ */
+typedef struct xo_info_s {
+ const char *xi_name; /* Name of the element */
+ const char *xi_type; /* Type of field */
+ const char *xi_help; /* Description of field */
+} xo_info_t;
+
+#define XO_INFO_NULL NULL, NULL, NULL /* Use '{ XO_INFO_NULL }' to end lists */
+
+struct xo_handle_s; /* Opaque structure forward */
+typedef struct xo_handle_s xo_handle_t; /* Handle for XO output */
+
+typedef int (*xo_write_func_t)(void *, const char *);
+typedef void (*xo_close_func_t)(void *);
+typedef int (*xo_flush_func_t)(void *);
+typedef void *(*xo_realloc_func_t)(void *, size_t);
+typedef void (*xo_free_func_t)(void *);
+
+/*
+ * The formatter function mirrors "vsnprintf", with an additional argument
+ * of the xo handle. The caller should return the number of bytes _needed_
+ * to fit the data, even if this exceeds 'len'.
+ */
+typedef int (*xo_formatter_t)(xo_handle_t *, char *, int,
+ const char *, va_list);
+typedef void (*xo_checkpointer_t)(xo_handle_t *, va_list, int);
+
+xo_handle_t *
+xo_create (xo_style_t style, xo_xof_flags_t flags);
+
+xo_handle_t *
+xo_create_to_file (FILE *fp, xo_style_t style, xo_xof_flags_t flags);
+
+void
+xo_destroy (xo_handle_t *xop);
+
+void
+xo_set_writer (xo_handle_t *xop, void *opaque, xo_write_func_t write_func,
+ xo_close_func_t close_func, xo_flush_func_t flush_func);
+
+void
+xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func);
+
+void
+xo_set_style (xo_handle_t *xop, xo_style_t style);
+
+xo_style_t
+xo_get_style (xo_handle_t *xop);
+
+int
+xo_set_style_name (xo_handle_t *xop, const char *style);
+
+int
+xo_set_options (xo_handle_t *xop, const char *input);
+
+xo_xof_flags_t
+xo_get_flags (xo_handle_t *xop);
+
+void
+xo_set_flags (xo_handle_t *xop, xo_xof_flags_t flags);
+
+void
+xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags);
+
+int
+xo_set_file_h (xo_handle_t *xop, FILE *fp);
+
+int
+xo_set_file (FILE *fp);
+
+void
+xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count);
+
+void
+xo_set_formatter (xo_handle_t *xop, xo_formatter_t func, xo_checkpointer_t);
+
+void
+xo_set_depth (xo_handle_t *xop, int depth);
+
+int
+xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap);
+
+int
+xo_emit_h (xo_handle_t *xop, const char *fmt, ...);
+
+int
+xo_emit (const char *fmt, ...);
+
+int
+xo_emit_hvf (xo_handle_t *xop, xo_emit_flags_t flags,
+ const char *fmt, va_list vap);
+
+int
+xo_emit_hf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...);
+
+int
+xo_emit_f (xo_emit_flags_t flags, const char *fmt, ...);
+
+PRINTFLIKE(2, 0)
+static inline int
+xo_emit_hvp (xo_handle_t *xop, const char *fmt, va_list vap)
+{
+ return xo_emit_hv(xop, fmt, vap);
+}
+
+PRINTFLIKE(2, 3)
+static inline int
+xo_emit_hp (xo_handle_t *xop, const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ int rc = xo_emit_hv(xop, fmt, vap);
+ va_end(vap);
+ return rc;
+}
+
+PRINTFLIKE(1, 2)
+static inline int
+xo_emit_p (const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ int rc = xo_emit_hv(NULL, fmt, vap);
+ va_end(vap);
+ return rc;
+}
+
+PRINTFLIKE(3, 0)
+static inline int
+xo_emit_hvfp (xo_handle_t *xop, xo_emit_flags_t flags,
+ const char *fmt, va_list vap)
+{
+ return xo_emit_hvf(xop, flags, fmt, vap);
+}
+
+PRINTFLIKE(3, 4)
+static inline int
+xo_emit_hfp (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ int rc = xo_emit_hvf(xop, flags, fmt, vap);
+ va_end(vap);
+ return rc;
+}
+
+PRINTFLIKE(2, 3)
+static inline int
+xo_emit_fp (xo_emit_flags_t flags, const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ int rc = xo_emit_hvf(NULL, flags, fmt, vap);
+ va_end(vap);
+ return rc;
+}
+
+int
+xo_open_container_h (xo_handle_t *xop, const char *name);
+
+int
+xo_open_container (const char *name);
+
+int
+xo_open_container_hd (xo_handle_t *xop, const char *name);
+
+int
+xo_open_container_d (const char *name);
+
+int
+xo_close_container_h (xo_handle_t *xop, const char *name);
+
+int
+xo_close_container (const char *name);
+
+int
+xo_close_container_hd (xo_handle_t *xop);
+
+int
+xo_close_container_d (void);
+
+int
+xo_open_list_h (xo_handle_t *xop, const char *name);
+
+int
+xo_open_list (const char *name);
+
+int
+xo_open_list_hd (xo_handle_t *xop, const char *name);
+
+int
+xo_open_list_d (const char *name);
+
+int
+xo_close_list_h (xo_handle_t *xop, const char *name);
+
+int
+xo_close_list (const char *name);
+
+int
+xo_close_list_hd (xo_handle_t *xop);
+
+int
+xo_close_list_d (void);
+
+int
+xo_open_instance_h (xo_handle_t *xop, const char *name);
+
+int
+xo_open_instance (const char *name);
+
+int
+xo_open_instance_hd (xo_handle_t *xop, const char *name);
+
+int
+xo_open_instance_d (const char *name);
+
+int
+xo_close_instance_h (xo_handle_t *xop, const char *name);
+
+int
+xo_close_instance (const char *name);
+
+int
+xo_close_instance_hd (xo_handle_t *xop);
+
+int
+xo_close_instance_d (void);
+
+int
+xo_open_marker_h (xo_handle_t *xop, const char *name);
+
+int
+xo_open_marker (const char *name);
+
+int
+xo_close_marker_h (xo_handle_t *xop, const char *name);
+
+int
+xo_close_marker (const char *name);
+
+int
+xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...);
+
+int
+xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap);
+
+int
+xo_attr (const char *name, const char *fmt, ...);
+
+void
+xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap);
+
+void
+xo_error_h (xo_handle_t *xop, const char *fmt, ...);
+
+void
+xo_error (const char *fmt, ...);
+
+int
+xo_flush_h (xo_handle_t *xop);
+
+int
+xo_flush (void);
+
+int
+xo_finish_h (xo_handle_t *xop);
+
+int
+xo_finish (void);
+
+void
+xo_finish_atexit (void);
+
+void
+xo_set_leading_xpath (xo_handle_t *xop, const char *path);
+
+void
+xo_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...) PRINTFLIKE(3, 4);
+
+void
+xo_warn_c (int code, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+void
+xo_warn (const char *fmt, ...) PRINTFLIKE(1, 2);
+
+void
+xo_warnx (const char *fmt, ...) PRINTFLIKE(1, 2);
+
+void
+xo_err (int eval, const char *fmt, ...) NORETURN PRINTFLIKE(2, 3);
+
+void
+xo_errx (int eval, const char *fmt, ...) NORETURN PRINTFLIKE(2, 3);
+
+void
+xo_errc (int eval, int code, const char *fmt, ...) NORETURN PRINTFLIKE(3, 4);
+
+void
+xo_message_hcv (xo_handle_t *xop, int code, const char *fmt, va_list vap) PRINTFLIKE(3, 0);
+
+void
+xo_message_hc (xo_handle_t *xop, int code, const char *fmt, ...) PRINTFLIKE(3, 4);
+
+void
+xo_message_c (int code, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+void
+xo_message_e (const char *fmt, ...) PRINTFLIKE(1, 2);
+
+void
+xo_message (const char *fmt, ...) PRINTFLIKE(1, 2);
+
+void
+xo_emit_warn_hcv (xo_handle_t *xop, int as_warning, int code,
+ const char *fmt, va_list vap);
+
+void
+xo_emit_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...);
+
+void
+xo_emit_warn_c (int code, const char *fmt, ...);
+
+void
+xo_emit_warn (const char *fmt, ...);
+
+void
+xo_emit_warnx (const char *fmt, ...);
+
+void
+xo_emit_err (int eval, const char *fmt, ...) NORETURN;
+
+void
+xo_emit_errx (int eval, const char *fmt, ...) NORETURN;
+
+void
+xo_emit_errc (int eval, int code, const char *fmt, ...) NORETURN;
+
+PRINTFLIKE(4, 0)
+static inline void
+xo_emit_warn_hcvp (xo_handle_t *xop, int as_warning, int code,
+ const char *fmt, va_list vap)
+{
+ xo_emit_warn_hcv(xop, as_warning, code, fmt, vap);
+}
+
+PRINTFLIKE(3, 4)
+static inline void
+xo_emit_warn_hcp (xo_handle_t *xop, int code, const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(xop, 1, code, fmt, vap);
+ va_end(vap);
+}
+
+PRINTFLIKE(2, 3)
+static inline void
+xo_emit_warn_cp (int code, const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
+ va_end(vap);
+}
+
+PRINTFLIKE(1, 2)
+static inline void
+xo_emit_warn_p (const char *fmt, ...)
+{
+ int code = errno;
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 1, code, fmt, vap);
+ va_end(vap);
+}
+
+PRINTFLIKE(1, 2)
+static inline void
+xo_emit_warnx_p (const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 1, -1, fmt, vap);
+ va_end(vap);
+}
+
+NORETURN PRINTFLIKE(2, 3)
+static inline void
+xo_emit_err_p (int eval, const char *fmt, ...)
+{
+ int code = errno;
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 0, code, fmt, vap);
+ va_end(vap);
+
+ exit(eval);
+}
+
+PRINTFLIKE(2, 3)
+static inline void
+xo_emit_errx_p (int eval, const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 0, -1, fmt, vap);
+ va_end(vap);
+ exit(eval);
+}
+
+PRINTFLIKE(3, 4)
+static inline void
+xo_emit_errc_p (int eval, int code, const char *fmt, ...)
+{
+ va_list vap;
+ va_start(vap, fmt);
+ xo_emit_warn_hcv(NULL, 0, code, fmt, vap);
+ va_end(vap);
+ exit(eval);
+}
+
+void
+xo_emit_err_v (int eval, int code, const char *fmt, va_list vap) NORETURN PRINTFLIKE(3, 0);
+
+void
+xo_no_setlocale (void);
+
+/**
+ * @brief Lift libxo-specific arguments from a set of arguments
+ *
+ * libxo-enable programs typically use command line options to enable
+ * all the nifty-cool libxo features. xo_parse_args() makes this simple
+ * by pre-processing the command line arguments given to main(), handling
+ * and removing the libxo-specific ones, meaning anything starting with
+ * "--libxo". A full description of these arguments is in the base
+ * documentation.
+ * @param[in] argc Number of arguments (ala #main())
+ * @param[in] argc Array of argument strings (ala #main())
+ * @return New number of arguments, or -1 for failure.
+ */
+int
+xo_parse_args (int argc, char **argv);
+
+/**
+ * This is the "magic" number returned by libxo-supporting commands
+ * when passed the equally magic "--libxo-check" option. If you
+ * return this, we can (unsafely) assume that since you know the magic
+ * handshake, you'll happily handle future --libxo options and not do
+ * something violent like reboot the box or create another hole in the
+ * ozone layer.
+ */
+#define XO_HAS_LIBXO 121
+
+/**
+ * externs for libxo's version number strings
+ */
+extern const char xo_version[]; /** Base version triple string */
+extern const char xo_version_extra[]; /** Extra version magic content */
+
+/**
+ * @brief Dump the internal stack of a libxo handle.
+ *
+ * This diagnostic function is something I will ask you to call from
+ * your program when you write to tell me libxo has gone bat-stink
+ * crazy and has discarded your list or container or content. Output
+ * content will be what we lovingly call "developer entertainment".
+ * @param[in] xop A valid libxo handle, or NULL for the default handle
+ */
+void
+xo_dump_stack (xo_handle_t *xop);
+
+/**
+ * @brief Recode the name of the program, suitable for error output.
+ *
+ * libxo will record the given name for use while generating error
+ * messages. The contents are not copied, so the value must continue
+ * to point to a valid memory location. This allows the caller to change
+ * the value, but requires the caller to manage the memory. Typically
+ * this is called with argv[0] from main().
+ * @param[in] name The name of the current application program
+ */
+void
+xo_set_program (const char *name);
+
+/**
+ * @brief Add a version string to the output, where possible.
+ *
+ * Adds a version number to the output, suitable for tracking
+ * changes in the content. This is only important for the "encoding"
+ * format styles (XML and JSON) and allows a user of the data to
+ * discern which version of the data model is in use.
+ * @param[in] version The version number, encoded as a string
+ */
+void
+xo_set_version (const char *version);
+
+/**
+ * #xo_set_version with a handle.
+ * @param[in] xop A valid libxo handle, or NULL for the default handle
+ * @param[in] version The version number, encoded as a string
+ */
+void
+xo_set_version_h (xo_handle_t *xop, const char *version);
+
+void
+xo_open_log (const char *ident, int logopt, int facility);
+
+void
+xo_close_log (void);
+
+int
+xo_set_logmask (int maskpri);
+
+void
+xo_set_unit_test_mode (int value);
+
+void
+xo_syslog (int priority, const char *name, const char *message, ...);
+
+void
+xo_vsyslog (int priority, const char *name, const char *message, va_list args);
+
+typedef void (*xo_syslog_open_t)(void);
+typedef void (*xo_syslog_send_t)(const char *full_msg,
+ const char *v0_hdr, const char *text_only);
+typedef void (*xo_syslog_close_t)(void);
+
+void
+xo_set_syslog_handler (xo_syslog_open_t open_func, xo_syslog_send_t send_func,
+ xo_syslog_close_t close_func);
+
+void
+xo_set_syslog_enterprise_id (unsigned short eid);
+
+typedef void (*xo_simplify_field_func_t)(const char *, unsigned, int);
+
+char *
+xo_simplify_format (xo_handle_t *xop, const char *fmt, int with_numbers,
+ xo_simplify_field_func_t field_cb);
+
+int
+xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents,
+ const char *fmt, const char *efmt,
+ va_list vap);
+
+int
+xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents,
+ const char *fmt, const char *efmt, ...);
+
+int
+xo_emit_field (const char *rolmod, const char *contents,
+ const char *fmt, const char *efmt, ...);
+
+void
+xo_retain_clear_all (void);
+
+void
+xo_retain_clear (const char *fmt);
+
+#endif /* INCLUDE_XO_H */
diff --git a/freebsd/contrib/libxo/libxo/xo_buf.h b/freebsd/contrib/libxo/libxo/xo_buf.h
new file mode 100644
index 00000000..01eb397d
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/xo_buf.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2015, Juniper Networks, Inc.
+ * All rights reserved.
+ * This SOFTWARE is licensed under the LICENSE provided in the
+ * ../Copyright file. By downloading, installing, copying, or otherwise
+ * using the SOFTWARE, you agree to be bound by the terms of that
+ * LICENSE.
+ * Phil Shafer, August 2015
+ */
+
+/*
+ * This file is an _internal_ part of the libxo plumbing, not suitable
+ * for external use. It is not considered part of the libxo API and
+ * will not be a stable part of that API. Mine, not your's, dude...
+ * The real hope is that something like this will become a standard part
+ * of libc and I can kill this off.
+ */
+
+#ifndef XO_BUF_H
+#define XO_BUF_H
+
+#define XO_BUFSIZ (8*1024) /* Initial buffer size */
+#define XO_BUF_HIGH_WATER (XO_BUFSIZ - 512) /* When to auto-flush */
+/*
+ * xo_buffer_t: a memory buffer that can be grown as needed. We
+ * use them for building format strings and output data.
+ */
+typedef struct xo_buffer_s {
+ char *xb_bufp; /* Buffer memory */
+ char *xb_curp; /* Current insertion point */
+ unsigned xb_size; /* Size of buffer */
+} xo_buffer_t;
+
+/*
+ * Initialize the contents of an xo_buffer_t.
+ */
+static inline void
+xo_buf_init (xo_buffer_t *xbp)
+{
+ xbp->xb_size = XO_BUFSIZ;
+ xbp->xb_bufp = xo_realloc(NULL, xbp->xb_size);
+ xbp->xb_curp = xbp->xb_bufp;
+}
+
+/*
+ * Reset the buffer to empty
+ */
+static inline void
+xo_buf_reset (xo_buffer_t *xbp)
+{
+ xbp->xb_curp = xbp->xb_bufp;
+}
+
+/*
+ * Return the number of bytes left in the buffer
+ */
+static inline int
+xo_buf_left (xo_buffer_t *xbp)
+{
+ return xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp);
+}
+
+/*
+ * See if the buffer to empty
+ */
+static inline int
+xo_buf_is_empty (xo_buffer_t *xbp)
+{
+ return (xbp->xb_curp == xbp->xb_bufp);
+}
+
+/*
+ * Return the current offset
+ */
+static inline unsigned
+xo_buf_offset (xo_buffer_t *xbp)
+{
+ return xbp ? (xbp->xb_curp - xbp->xb_bufp) : 0;
+}
+
+static inline char *
+xo_buf_data (xo_buffer_t *xbp, unsigned offset)
+{
+ if (xbp == NULL)
+ return NULL;
+ return xbp->xb_bufp + offset;
+}
+
+static inline char *
+xo_buf_cur (xo_buffer_t *xbp)
+{
+ if (xbp == NULL)
+ return NULL;
+ return xbp->xb_curp;
+}
+
+/*
+ * Initialize the contents of an xo_buffer_t.
+ */
+static inline void
+xo_buf_cleanup (xo_buffer_t *xbp)
+{
+ if (xbp->xb_bufp)
+ xo_free(xbp->xb_bufp);
+ bzero(xbp, sizeof(*xbp));
+}
+
+/*
+ * Does the buffer have room for the given number of bytes of data?
+ * If not, realloc the buffer to make room. If that fails, we
+ * return 0 to tell the caller they are in trouble.
+ */
+static inline int
+xo_buf_has_room (xo_buffer_t *xbp, int len)
+{
+ if (xbp->xb_curp + len >= xbp->xb_bufp + xbp->xb_size) {
+ int sz = xbp->xb_size + XO_BUFSIZ;
+ char *bp = xo_realloc(xbp->xb_bufp, sz);
+ if (bp == NULL)
+ return 0;
+
+ xbp->xb_curp = bp + (xbp->xb_curp - xbp->xb_bufp);
+ xbp->xb_bufp = bp;
+ xbp->xb_size = sz;
+ }
+
+ return 1;
+}
+
+/*
+ * Append the given string to the given buffer
+ */
+static inline void
+xo_buf_append (xo_buffer_t *xbp, const char *str, int len)
+{
+ if (!xo_buf_has_room(xbp, len))
+ return;
+
+ memcpy(xbp->xb_curp, str, len);
+ xbp->xb_curp += len;
+}
+
+/*
+ * Append the given NUL-terminated string to the given buffer
+ */
+static inline void
+xo_buf_append_str (xo_buffer_t *xbp, const char *str)
+{
+ int len = strlen(str);
+
+ if (!xo_buf_has_room(xbp, len))
+ return;
+
+ memcpy(xbp->xb_curp, str, len);
+ xbp->xb_curp += len;
+}
+
+#endif /* XO_BUF_H */
diff --git a/freebsd/contrib/libxo/libxo/xo_config.h b/freebsd/contrib/libxo/libxo/xo_config.h
new file mode 100644
index 00000000..9020b8c6
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/xo_config.h
@@ -0,0 +1,254 @@
+/* $FreeBSD$ */
+/* libxo/xo_config.h. Generated from xo_config.h.in by configure. */
+/* libxo/xo_config.h.in. Generated from configure.ac by autoheader. */
+
+/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+ systems. This function is required for `alloca.c' support on those systems.
+ */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define to 1 if using `alloca.c'. */
+/* #undef C_ALLOCA */
+
+/* Define to 1 if you have `alloca', as a function or macro. */
+#define HAVE_ALLOCA 1
+
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+ */
+/* #undef HAVE_ALLOCA_H */
+
+/* Define to 1 if you have the `asprintf' function. */
+#define HAVE_ASPRINTF 1
+
+/* Define to 1 if you have the `bzero' function. */
+#define HAVE_BZERO 1
+
+/* Define to 1 if you have the `ctime' function. */
+#define HAVE_CTIME 1
+
+/* Define to 1 if you have the <ctype.h> header file. */
+#define HAVE_CTYPE_H 1
+
+/* Define to 1 if you have the declaration of `__isthreaded', and to 0 if you
+ don't. */
+#define HAVE_DECL___ISTHREADED 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the `dlfunc' function. */
+#define HAVE_DLFUNC 1
+
+/* Define to 1 if you have the <errno.h> header file. */
+#define HAVE_ERRNO_H 1
+
+/* Define to 1 if you have the `fdopen' function. */
+#define HAVE_FDOPEN 1
+
+/* Define to 1 if you have the `flock' function. */
+#define HAVE_FLOCK 1
+
+/* Define to 1 if you have the `getpass' function. */
+#define HAVE_GETPASS 1
+
+/* Define to 1 if you have the `getprogname' function. */
+#define HAVE_GETPROGNAME 1
+
+/* Define to 1 if you have the `getrusage' function. */
+#define HAVE_GETRUSAGE 1
+
+/* gettext(3) */
+/* #undef HAVE_GETTEXT */
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#define HAVE_GETTIMEOFDAY 1
+
+/* humanize_number(3) */
+#define HAVE_HUMANIZE_NUMBER 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `crypto' library (-lcrypto). */
+#define HAVE_LIBCRYPTO 1
+
+/* Define to 1 if you have the `m' library (-lm). */
+#define HAVE_LIBM 1
+
+/* Define to 1 if you have the <libutil.h> header file. */
+#define HAVE_LIBUTIL_H 1
+
+/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
+ to 0 otherwise. */
+#define HAVE_MALLOC 1
+
+/* Define to 1 if you have the `memmove' function. */
+#define HAVE_MEMMOVE 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <monitor.h> header file. */
+/* #undef HAVE_MONITOR_H */
+
+/* Support printflike */
+/* #undef HAVE_PRINTFLIKE */
+
+/* Define to 1 if your system has a GNU libc compatible `realloc' function,
+ and to 0 otherwise. */
+#define HAVE_REALLOC 1
+
+/* Define to 1 if you have the `srand' function. */
+#define HAVE_SRAND 1
+
+/* Define to 1 if you have the `sranddev' function. */
+#define HAVE_SRANDDEV 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdio_ext.h> header file. */
+/* #undef HAVE_STDIO_EXT_H */
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#define HAVE_STDIO_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <stdtime/tzfile.h> header file. */
+/* #undef HAVE_STDTIME_TZFILE_H */
+
+/* Define to 1 if you have the `strchr' function. */
+#define HAVE_STRCHR 1
+
+/* Define to 1 if you have the `strcspn' function. */
+#define HAVE_STRCSPN 1
+
+/* Define to 1 if you have the `strerror' function. */
+#define HAVE_STRERROR 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strlcpy' function. */
+#define HAVE_STRLCPY 1
+
+/* Define to 1 if you have the `strspn' function. */
+#define HAVE_STRSPN 1
+
+/* Have struct sockaddr_un.sun_len */
+#define HAVE_SUN_LEN 1
+
+/* Define to 1 if you have the `sysctlbyname' function. */
+#define HAVE_SYSCTLBYNAME 1
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/sysctl.h> header file. */
+#define HAVE_SYS_SYSCTL_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <threads.h> header file. */
+#define HAVE_THREADS_H 1
+
+/* thread-local setting */
+#define HAVE_THREAD_LOCAL THREAD_LOCAL_before
+
+/* Define to 1 if you have the <tzfile.h> header file. */
+/* #undef HAVE_TZFILE_H */
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `__flbf' function. */
+/* #undef HAVE___FLBF */
+
+/* Enable debugging */
+/* #undef LIBXO_DEBUG */
+
+/* Enable text-only rendering */
+/* #undef LIBXO_TEXT_ONLY */
+
+/* Version number as dotted value */
+#define LIBXO_VERSION "0.6.2"
+
+/* Version number extra information */
+#define LIBXO_VERSION_EXTRA ""
+
+/* Version number as a number */
+#define LIBXO_VERSION_NUMBER 6002
+
+/* Version number as string */
+#define LIBXO_VERSION_STRING "6002"
+
+/* Enable local wcwidth implementation */
+#define LIBXO_WCWIDTH 1
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "libxo"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "phil@juniper.net"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libxo"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libxo 0.6.2"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libxo"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.6.2"
+
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at runtime.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+/* #undef STACK_DIRECTION */
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Version number of package */
+#define VERSION "0.6.2"
+
+/* Retain hash bucket size */
+/* #undef XO_RETAIN_SIZE */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+ calls it, or to nothing if 'inline' is not supported under any name. */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+
+/* Define to rpl_malloc if the replacement function should be used. */
+/* #undef malloc */
+
+/* Define to rpl_realloc if the replacement function should be used. */
+/* #undef realloc */
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef size_t */
diff --git a/freebsd/contrib/libxo/libxo/xo_encoder.c b/freebsd/contrib/libxo/libxo/xo_encoder.c
new file mode 100644
index 00000000..8c3d9dbb
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/xo_encoder.c
@@ -0,0 +1,435 @@
+#include <machine/rtems-bsd-user-space.h>
+
+/*
+ * Copyright (c) 2015, Juniper Networks, Inc.
+ * All rights reserved.
+ * This SOFTWARE is licensed under the LICENSE provided in the
+ * ../Copyright file. By downloading, installing, copying, or otherwise
+ * using the SOFTWARE, you agree to be bound by the terms of that
+ * LICENSE.
+ * Phil Shafer, August 2015
+ */
+
+#ifndef __rtems__
+/**
+ * libxo includes a number of fixed encoding styles. But other
+ * external encoders are need to deal with new encoders. Rather
+ * than expose a swarm of libxo internals, we create a distinct
+ * API, with a simpler API than we use internally.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <rtems/bsd/sys/param.h>
+#include <dlfcn.h>
+
+#include "xo_config.h"
+#include "xo.h"
+#include "xo_encoder.h"
+
+#ifdef HAVE_DLFCN_H
+#include <dlfcn.h>
+#if !defined(HAVE_DLFUNC)
+#define dlfunc(_p, _n) dlsym(_p, _n)
+#endif
+#else /* HAVE_DLFCN_H */
+#define dlopen(_n, _f) NULL /* Fail */
+#define dlsym(_p, _n) NULL /* Fail */
+#define dlfunc(_p, _n) NULL /* Fail */
+#endif /* HAVE_DLFCN_H */
+
+static void xo_encoder_setup (void); /* Forward decl */
+
+/*
+ * Need a simple string collection
+ */
+typedef struct xo_string_node_s {
+ TAILQ_ENTRY(xo_string_node_s) xs_link; /* Next string */
+ char xs_data[0]; /* String data */
+} xo_string_node_t;
+
+typedef TAILQ_HEAD(xo_string_list_s, xo_string_node_s) xo_string_list_t;
+
+static inline void
+xo_string_list_init (xo_string_list_t *listp)
+{
+ if (listp->tqh_last == NULL)
+ TAILQ_INIT(listp);
+}
+
+static inline xo_string_node_t *
+xo_string_add (xo_string_list_t *listp, const char *str)
+{
+ if (listp == NULL || str == NULL)
+ return NULL;
+
+ xo_string_list_init(listp);
+ size_t len = strlen(str);
+ xo_string_node_t *xsp;
+
+ xsp = xo_realloc(NULL, sizeof(*xsp) + len + 1);
+ if (xsp) {
+ memcpy(xsp->xs_data, str, len);
+ xsp->xs_data[len] = '\0';
+ TAILQ_INSERT_TAIL(listp, xsp, xs_link);
+ }
+
+ return xsp;
+}
+
+#define XO_STRING_LIST_FOREACH(_xsp, _listp) \
+ xo_string_list_init(_listp); \
+ TAILQ_FOREACH(_xsp, _listp, xs_link)
+
+static inline void
+xo_string_list_clean (xo_string_list_t *listp)
+{
+ xo_string_node_t *xsp;
+
+ xo_string_list_init(listp);
+
+ for (;;) {
+ xsp = TAILQ_FIRST(listp);
+ if (xsp == NULL)
+ break;
+ TAILQ_REMOVE(listp, xsp, xs_link);
+ xo_free(xsp);
+ }
+}
+
+static xo_string_list_t xo_encoder_path;
+
+void
+xo_encoder_path_add (const char *path)
+{
+ xo_encoder_setup();
+
+ if (path)
+ xo_string_add(&xo_encoder_path, path);
+}
+
+/* ---------------------------------------------------------------------- */
+
+typedef struct xo_encoder_node_s {
+ TAILQ_ENTRY(xo_encoder_node_s) xe_link; /* Next session */
+ char *xe_name; /* Name for this encoder */
+ xo_encoder_func_t xe_handler; /* Callback function */
+ void *xe_dlhandle; /* dlopen handle */
+} xo_encoder_node_t;
+
+typedef TAILQ_HEAD(xo_encoder_list_s, xo_encoder_node_s) xo_encoder_list_t;
+
+#define XO_ENCODER_LIST_FOREACH(_xep, _listp) \
+ xo_encoder_list_init(_listp); \
+ TAILQ_FOREACH(_xep, _listp, xe_link)
+
+static xo_encoder_list_t xo_encoders;
+
+static void
+xo_encoder_list_init (xo_encoder_list_t *listp)
+{
+ if (listp->tqh_last == NULL)
+ TAILQ_INIT(listp);
+}
+
+static xo_encoder_node_t *
+xo_encoder_list_add (const char *name)
+{
+ if (name == NULL)
+ return NULL;
+
+ xo_encoder_node_t *xep = xo_realloc(NULL, sizeof(*xep));
+ if (xep) {
+ int len = strlen(name) + 1;
+ xep->xe_name = xo_realloc(NULL, len);
+ if (xep->xe_name == NULL) {
+ xo_free(xep);
+ return NULL;
+ }
+
+ memcpy(xep->xe_name, name, len);
+
+ TAILQ_INSERT_TAIL(&xo_encoders, xep, xe_link);
+ }
+
+ return xep;
+}
+
+void
+xo_encoders_clean (void)
+{
+ xo_encoder_node_t *xep;
+
+ xo_encoder_setup();
+
+ for (;;) {
+ xep = TAILQ_FIRST(&xo_encoders);
+ if (xep == NULL)
+ break;
+
+ TAILQ_REMOVE(&xo_encoders, xep, xe_link);
+
+ if (xep->xe_dlhandle)
+ dlclose(xep->xe_dlhandle);
+
+ xo_free(xep);
+ }
+
+ xo_string_list_clean(&xo_encoder_path);
+}
+
+static void
+xo_encoder_setup (void)
+{
+ static int initted;
+ if (!initted) {
+ initted = 1;
+
+ xo_string_list_init(&xo_encoder_path);
+ xo_encoder_list_init(&xo_encoders);
+
+ xo_encoder_path_add(XO_ENCODERDIR);
+ }
+}
+
+static xo_encoder_node_t *
+xo_encoder_find (const char *name)
+{
+ xo_encoder_node_t *xep;
+
+ xo_encoder_list_init(&xo_encoders);
+
+ XO_ENCODER_LIST_FOREACH(xep, &xo_encoders) {
+ if (strcmp(xep->xe_name, name) == 0)
+ return xep;
+ }
+
+ return NULL;
+}
+
+static xo_encoder_node_t *
+xo_encoder_discover (const char *name)
+{
+ void *dlp = NULL;
+ char buf[MAXPATHLEN];
+ xo_string_node_t *xsp;
+ xo_encoder_node_t *xep = NULL;
+
+ XO_STRING_LIST_FOREACH(xsp, &xo_encoder_path) {
+ static const char fmt[] = "%s/%s.enc";
+ char *dir = xsp->xs_data;
+ size_t len = snprintf(buf, sizeof(buf), fmt, dir, name);
+
+ if (len > sizeof(buf)) /* Should not occur */
+ continue;
+
+ dlp = dlopen((const char *) buf, RTLD_NOW);
+ if (dlp)
+ break;
+ }
+
+ if (dlp) {
+ /*
+ * If the library exists, find the initializer function and
+ * call it.
+ */
+ xo_encoder_init_func_t func;
+
+ func = (xo_encoder_init_func_t) dlfunc(dlp, XO_ENCODER_INIT_NAME);
+ if (func) {
+ xo_encoder_init_args_t xei;
+
+ bzero(&xei, sizeof(xei));
+
+ xei.xei_version = XO_ENCODER_VERSION;
+ int rc = func(&xei);
+ if (rc == 0 && xei.xei_handler) {
+ xep = xo_encoder_list_add(name);
+ if (xep) {
+ xep->xe_handler = xei.xei_handler;
+ xep->xe_dlhandle = dlp;
+ }
+ }
+ }
+
+ if (xep == NULL)
+ dlclose(dlp);
+ }
+
+ return xep;
+}
+
+void
+xo_encoder_register (const char *name, xo_encoder_func_t func)
+{
+ xo_encoder_setup();
+
+ xo_encoder_node_t *xep = xo_encoder_find(name);
+
+ if (xep) /* "We alla-ready got one" */
+ return;
+
+ xep = xo_encoder_list_add(name);
+ if (xep)
+ xep->xe_handler = func;
+}
+
+void
+xo_encoder_unregister (const char *name)
+{
+ xo_encoder_setup();
+
+ xo_encoder_node_t *xep = xo_encoder_find(name);
+ if (xep) {
+ TAILQ_REMOVE(&xo_encoders, xep, xe_link);
+ xo_free(xep);
+ }
+}
+
+int
+xo_encoder_init (xo_handle_t *xop, const char *name)
+{
+ xo_encoder_setup();
+
+ /* Can't have names containing '/' or ':' */
+ if (strchr(name, '/') != NULL || strchr(name, ':') != NULL)
+ return -1;
+
+ /*
+ * First we look on the list of known (registered) encoders.
+ * If we don't find it, we follow the set of paths to find
+ * the encoding library.
+ */
+ xo_encoder_node_t *xep = xo_encoder_find(name);
+ if (xep == NULL) {
+ xep = xo_encoder_discover(name);
+ if (xep == NULL)
+ return -1;
+ }
+
+ xo_set_encoder(xop, xep->xe_handler);
+
+ return xo_encoder_handle(xop, XO_OP_CREATE, NULL, NULL);
+}
+
+/*
+ * A couple of function varieties here, to allow for multiple
+ * use cases. This variant is for when the main program knows
+ * its own encoder needs.
+ */
+xo_handle_t *
+xo_encoder_create (const char *name, xo_xof_flags_t flags)
+{
+ xo_handle_t *xop;
+
+ xop = xo_create(XO_STYLE_ENCODER, flags);
+ if (xop) {
+ if (xo_encoder_init(xop, name)) {
+ xo_destroy(xop);
+ xop = NULL;
+ }
+ }
+
+ return xop;
+}
+
+int
+xo_encoder_handle (xo_handle_t *xop, xo_encoder_op_t op,
+ const char *name, const char *value)
+{
+ void *private = xo_get_private(xop);
+ xo_encoder_func_t func = xo_get_encoder(xop);
+
+ if (func == NULL)
+ return -1;
+
+ return func(xop, op, name, value, private);
+}
+
+const char *
+xo_encoder_op_name (xo_encoder_op_t op)
+{
+ static const char *names[] = {
+ /* 0 */ "unknown",
+ /* 1 */ "create",
+ /* 2 */ "open_container",
+ /* 3 */ "close_container",
+ /* 4 */ "open_list",
+ /* 5 */ "close_list",
+ /* 6 */ "open_leaf_list",
+ /* 7 */ "close_leaf_list",
+ /* 8 */ "open_instance",
+ /* 9 */ "close_instance",
+ /* 10 */ "string",
+ /* 11 */ "content",
+ /* 12 */ "finish",
+ /* 13 */ "flush",
+ /* 14 */ "destroy",
+ /* 15 */ "attr",
+ /* 16 */ "version",
+ };
+
+ if (op > sizeof(names) / sizeof(names[0]))
+ return "unknown";
+
+ return names[op];
+}
+#else /* __rtems__ */
+
+/*
+ * Not supported on RTEMS. Just return errors on all functions.
+ */
+#include "xo.h"
+#include "xo_encoder.h"
+
+void
+xo_encoder_register (const char *name, xo_encoder_func_t func)
+{
+ /* Nothing to do */
+}
+
+void
+xo_encoder_unregister (const char *name)
+{
+ /* Nothing to do */
+}
+
+void
+xo_encoder_path_add (const char *path)
+{
+ /* Nothing to do */
+}
+
+int
+xo_encoder_init (xo_handle_t *xop, const char *name)
+{
+ return -1;
+}
+
+xo_handle_t *
+xo_encoder_create (const char *name, xo_xof_flags_t flags)
+{
+ return NULL;
+}
+
+int
+xo_encoder_handle (xo_handle_t *xop, xo_encoder_op_t op,
+ const char *name, const char *value)
+{
+ return -1;
+}
+
+void
+xo_encoders_clean (void)
+{
+ /* Nothing to do */
+}
+
+const char *
+xo_encoder_op_name (xo_encoder_op_t op)
+{
+ return "unknown";
+}
+#endif /* __rtems__ */
diff --git a/freebsd/contrib/libxo/libxo/xo_encoder.h b/freebsd/contrib/libxo/libxo/xo_encoder.h
new file mode 100644
index 00000000..f73552b1
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/xo_encoder.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015, Juniper Networks, Inc.
+ * All rights reserved.
+ * This SOFTWARE is licensed under the LICENSE provided in the
+ * ../Copyright file. By downloading, installing, copying, or otherwise
+ * using the SOFTWARE, you agree to be bound by the terms of that
+ * LICENSE.
+ * Phil Shafer, August 2015
+ */
+
+/*
+ * NOTE WELL: This file is needed to software that implements an
+ * external encoder for libxo that allows libxo data to be encoded in
+ * new and bizarre formats. General libxo code should _never_
+ * include this header file.
+ */
+
+#ifndef XO_ENCODER_H
+#define XO_ENCODER_H
+
+/*
+ * Expose libxo's memory allocation functions
+ */
+extern xo_realloc_func_t xo_realloc;
+extern xo_free_func_t xo_free;
+
+typedef unsigned xo_encoder_op_t;
+
+/* Encoder operations; names are in xo_encoder.c:xo_encoder_op_name() */
+#define XO_OP_UNKNOWN 0
+#define XO_OP_CREATE 1 /* Called when the handle is init'd */
+#define XO_OP_OPEN_CONTAINER 2
+#define XO_OP_CLOSE_CONTAINER 3
+#define XO_OP_OPEN_LIST 4
+#define XO_OP_CLOSE_LIST 5
+#define XO_OP_OPEN_LEAF_LIST 6
+#define XO_OP_CLOSE_LEAF_LIST 7
+#define XO_OP_OPEN_INSTANCE 8
+#define XO_OP_CLOSE_INSTANCE 9
+#define XO_OP_STRING 10 /* Quoted UTF-8 string */
+#define XO_OP_CONTENT 11 /* Other content */
+#define XO_OP_FINISH 12 /* Finish any pending output */
+#define XO_OP_FLUSH 13 /* Flush any buffered output */
+#define XO_OP_DESTROY 14 /* Clean up function */
+#define XO_OP_ATTRIBUTE 15 /* Attribute name/value */
+#define XO_OP_VERSION 16 /* Version string */
+
+#define XO_ENCODER_HANDLER_ARGS \
+ xo_handle_t *xop __attribute__ ((__unused__)), \
+ xo_encoder_op_t op __attribute__ ((__unused__)), \
+ const char *name __attribute__ ((__unused__)), \
+ const char *value __attribute__ ((__unused__)), \
+ void *private __attribute__ ((__unused__))
+
+typedef int (*xo_encoder_func_t)(XO_ENCODER_HANDLER_ARGS);
+
+typedef struct xo_encoder_init_args_s {
+ unsigned xei_version; /* Current version */
+ xo_encoder_func_t xei_handler; /* Encoding handler */
+} xo_encoder_init_args_t;
+
+#define XO_ENCODER_VERSION 1 /* Current version */
+
+#define XO_ENCODER_INIT_ARGS \
+ xo_encoder_init_args_t *arg __attribute__ ((__unused__))
+
+typedef int (*xo_encoder_init_func_t)(XO_ENCODER_INIT_ARGS);
+/*
+ * Each encoder library must define a function named xo_encoder_init
+ * that takes the arguments defined in XO_ENCODER_INIT_ARGS. It
+ * should return zero for success.
+ */
+#define XO_ENCODER_INIT_NAME_TOKEN xo_encoder_library_init
+#define XO_STRINGIFY(_x) #_x
+#define XO_STRINGIFY2(_x) XO_STRINGIFY(_x)
+#define XO_ENCODER_INIT_NAME XO_STRINGIFY2(XO_ENCODER_INIT_NAME_TOKEN)
+extern int XO_ENCODER_INIT_NAME_TOKEN (XO_ENCODER_INIT_ARGS);
+
+void
+xo_encoder_register (const char *name, xo_encoder_func_t func);
+
+void
+xo_encoder_unregister (const char *name);
+
+void *
+xo_get_private (xo_handle_t *xop);
+
+void
+xo_encoder_path_add (const char *path);
+
+void
+xo_set_private (xo_handle_t *xop, void *opaque);
+
+xo_encoder_func_t
+xo_get_encoder (xo_handle_t *xop);
+
+void
+xo_set_encoder (xo_handle_t *xop, xo_encoder_func_t encoder);
+
+int
+xo_encoder_init (xo_handle_t *xop, const char *name);
+
+xo_handle_t *
+xo_encoder_create (const char *name, xo_xof_flags_t flags);
+
+int
+xo_encoder_handle (xo_handle_t *xop, xo_encoder_op_t op,
+ const char *name, const char *value);
+
+void
+xo_encoders_clean (void);
+
+const char *
+xo_encoder_op_name (xo_encoder_op_t op);
+
+#endif /* XO_ENCODER_H */
diff --git a/freebsd/contrib/libxo/libxo/xo_humanize.h b/freebsd/contrib/libxo/libxo/xo_humanize.h
new file mode 100644
index 00000000..edf85b8b
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/xo_humanize.h
@@ -0,0 +1,169 @@
+/* $NetBSD: humanize_number.c,v 1.8 2004/07/27 01:56:24 enami Exp $ */
+
+/*
+ * Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
+ * NASA Ames Research Center, by Luke Mewburn and by Tomas Svensson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <stdint.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+/* humanize_number(3) */
+#define HN_DECIMAL 0x01
+#define HN_NOSPACE 0x02
+#define HN_B 0x04
+#define HN_DIVISOR_1000 0x08
+
+#define HN_GETSCALE 0x10
+#define HN_AUTOSCALE 0x20
+
+static int
+xo_humanize_number (char *buf, size_t len, int64_t bytes,
+ const char *suffix, int scale, int flags)
+{
+ const char *prefixes, *sep;
+ int b, i, r, maxscale, s1, s2, sign;
+ int64_t divisor, max;
+ // We multiply bytes by 100 to deal with rounding, so we need something
+ // big enough to hold LLONG_MAX * 100. On 64-bit we can use 128-bit wide
+ // integers with __int128_t, but on 32-bit we have to use long double.
+#ifdef __LP64__
+ __int128_t scalable = (__int128_t)bytes;
+#else
+ long double scalable = (long double)bytes;
+#endif
+ size_t baselen;
+
+ assert(buf != NULL);
+ assert(suffix != NULL);
+ assert(scale >= 0);
+
+ if (flags & HN_DIVISOR_1000) {
+ /* SI for decimal multiplies */
+ divisor = 1000;
+ if (flags & HN_B)
+ prefixes = "B\0k\0M\0G\0T\0P\0E";
+ else
+ prefixes = "\0\0k\0M\0G\0T\0P\0E";
+ } else {
+ /*
+ * binary multiplies
+ * XXX IEC 60027-2 recommends Ki, Mi, Gi...
+ */
+ divisor = 1024;
+ if (flags & HN_B)
+ prefixes = "B\0K\0M\0G\0T\0P\0E";
+ else
+ prefixes = "\0\0K\0M\0G\0T\0P\0E";
+ }
+
+#define SCALE2PREFIX(scale) (&prefixes[(scale) << 1])
+ maxscale = 7;
+
+ if (scale >= maxscale &&
+ (scale & (HN_AUTOSCALE | HN_GETSCALE)) == 0)
+ return (-1);
+
+ if (buf == NULL || suffix == NULL)
+ return (-1);
+
+ if (len > 0)
+ buf[0] = '\0';
+ if (bytes < 0) {
+ sign = -1;
+ scalable *= -100;
+ baselen = 3; /* sign, digit, prefix */
+ } else {
+ sign = 1;
+ scalable *= 100;
+ baselen = 2; /* digit, prefix */
+ }
+ if (flags & HN_NOSPACE)
+ sep = "";
+ else {
+ sep = " ";
+ baselen++;
+ }
+ baselen += strlen(suffix);
+
+ /* Check if enough room for `x y' + suffix + `\0' */
+ if (len < baselen + 1)
+ return (-1);
+
+ if (scale & (HN_AUTOSCALE | HN_GETSCALE)) {
+ /* See if there is additional columns can be used. */
+ for (max = 100, i = len - baselen; i-- > 0;)
+ max *= 10;
+
+ for (i = 0; scalable >= max && i < maxscale; i++)
+ scalable /= divisor;
+
+ if (scale & HN_GETSCALE)
+ return (i);
+ } else
+ for (i = 0; i < scale && i < maxscale; i++)
+ scalable /= divisor;
+
+ /* If a value <= 9.9 after rounding and ... */
+ if (scalable < 995 && i > 0 && flags & HN_DECIMAL) {
+ /* baselen + \0 + .N */
+ if (len < baselen + 1 + 2)
+ return (-1);
+ b = ((int)scalable + 5) / 10;
+ s1 = b / 10;
+ s2 = b % 10;
+ r = snprintf(buf, len, "%s%d%s%d%s%s%s",
+ ((sign == -1) ? "-" : ""),
+ s1, localeconv()->decimal_point, s2,
+ sep, SCALE2PREFIX(i), suffix);
+ } else
+ r = snprintf(buf, len, "%s%lld%s%s%s",
+ /* LONGLONG */
+ ((sign == -1) ? "-" : ""),
+ (long long)((scalable + 50) / 100),
+ sep, SCALE2PREFIX(i), suffix);
+
+ return (r);
+}
diff --git a/freebsd/contrib/libxo/libxo/xo_wcwidth.h b/freebsd/contrib/libxo/libxo/xo_wcwidth.h
new file mode 100644
index 00000000..46d83f03
--- /dev/null
+++ b/freebsd/contrib/libxo/libxo/xo_wcwidth.h
@@ -0,0 +1,313 @@
+/*
+ * This is an implementation of wcwidth() and wcswidth() (defined in
+ * IEEE Std 1002.1-2001) for Unicode.
+ *
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
+ *
+ * In fixed-width output devices, Latin characters all occupy a single
+ * "cell" position of equal width, whereas ideographic CJK characters
+ * occupy two such cells. Interoperability between terminal-line
+ * applications and (teletype-style) character terminals using the
+ * UTF-8 encoding requires agreement on which character should advance
+ * the cursor by how many cell positions. No established formal
+ * standards exist at present on which Unicode character shall occupy
+ * how many cell positions on character terminals. These routines are
+ * a first attempt of defining such behavior based on simple rules
+ * applied to data provided by the Unicode Consortium.
+ *
+ * For some graphical characters, the Unicode standard explicitly
+ * defines a character-cell width via the definition of the East Asian
+ * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
+ * In all these cases, there is no ambiguity about which width a
+ * terminal shall use. For characters in the East Asian Ambiguous (A)
+ * class, the width choice depends purely on a preference of backward
+ * compatibility with either historic CJK or Western practice.
+ * Choosing single-width for these characters is easy to justify as
+ * the appropriate long-term solution, as the CJK practice of
+ * displaying these characters as double-width comes from historic
+ * implementation simplicity (8-bit encoded characters were displayed
+ * single-width and 16-bit ones double-width, even for Greek,
+ * Cyrillic, etc.) and not any typographic considerations.
+ *
+ * Much less clear is the choice of width for the Not East Asian
+ * (Neutral) class. Existing practice does not dictate a width for any
+ * of these characters. It would nevertheless make sense
+ * typographically to allocate two character cells to characters such
+ * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
+ * represented adequately with a single-width glyph. The following
+ * routines at present merely assign a single-cell width to all
+ * neutral characters, in the interest of simplicity. This is not
+ * entirely satisfactory and should be reconsidered before
+ * establishing a formal standard in this area. At the moment, the
+ * decision which Not East Asian (Neutral) characters should be
+ * represented by double-width glyphs cannot yet be answered by
+ * applying a simple rule from the Unicode database content. Setting
+ * up a proper standard for the behavior of UTF-8 character terminals
+ * will require a careful analysis not only of each Unicode character,
+ * but also of each presentation form, something the author of these
+ * routines has avoided to do so far.
+ *
+ * http://www.unicode.org/unicode/reports/tr11/
+ *
+ * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * for any purpose and without fee is hereby granted. The author
+ * disclaims all warranties with regard to this software.
+ *
+ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ */
+
+#include <wchar.h>
+
+struct interval {
+ wchar_t first;
+ wchar_t last;
+};
+
+/* auxiliary function for binary search in interval table */
+static int
+xo_bisearch (wchar_t ucs, const struct interval *table, int max)
+{
+ int min = 0;
+ int mid;
+
+ if (ucs < table[0].first || ucs > table[max].last)
+ return 0;
+ while (max >= min) {
+ mid = (min + max) / 2;
+ if (ucs > table[mid].last)
+ min = mid + 1;
+ else if (ucs < table[mid].first)
+ max = mid - 1;
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* The following two functions define the column width of an ISO 10646
+ * character as follows:
+ *
+ * - The null character (U+0000) has a column width of 0.
+ *
+ * - Other C0/C1 control characters and DEL will lead to a return
+ * value of -1.
+ *
+ * - Non-spacing and enclosing combining characters (general
+ * category code Mn or Me in the Unicode database) have a
+ * column width of 0.
+ *
+ * - SOFT HYPHEN (U+00AD) has a column width of 1.
+ *
+ * - Other format characters (general category code Cf in the Unicode
+ * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
+ *
+ * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
+ * have a column width of 0.
+ *
+ * - Spacing characters in the East Asian Wide (W) or East Asian
+ * Full-width (F) category as defined in Unicode Technical
+ * Report #11 have a column width of 2.
+ *
+ * - All remaining characters (including all printable
+ * ISO 8859-1 and WGL4 characters, Unicode control characters,
+ * etc.) have a column width of 1.
+ *
+ * This implementation assumes that wchar_t characters are encoded
+ * in ISO 10646.
+ */
+
+static int
+xo_wcwidth (wchar_t ucs)
+{
+ /* sorted list of non-overlapping intervals of non-spacing characters */
+ /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
+ static const struct interval combining[] = {
+ { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
+ { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
+ { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
+ { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
+ { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
+ { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
+ { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
+ { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
+ { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
+ { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
+ { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
+ { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
+ { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
+ { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
+ { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
+ { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
+ { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
+ { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
+ { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
+ { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
+ { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
+ { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
+ { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
+ { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
+ { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
+ { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
+ { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
+ { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
+ { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
+ { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
+ { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
+ { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
+ { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
+ { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
+ { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
+ { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
+ { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
+ { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
+ { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
+ { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
+ { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
+ { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
+ { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
+ { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
+ { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
+ { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
+ { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
+ { 0xE0100, 0xE01EF }
+ };
+
+ /* test for 8-bit control characters */
+ if (ucs == 0)
+ return 0;
+ if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
+ return -1;
+
+ /* binary search in table of non-spacing characters */
+ if (xo_bisearch(ucs, combining,
+ sizeof(combining) / sizeof(struct interval) - 1))
+ return 0;
+
+ /* if we arrive here, ucs is not a combining or C0/C1 control character */
+
+ return 1 +
+ (ucs >= 0x1100 &&
+ (ucs <= 0x115f || /* Hangul Jamo init. consonants */
+ ucs == 0x2329 || ucs == 0x232a ||
+ (ucs >= 0x2e80 && ucs <= 0xa4cf &&
+ ucs != 0x303f) || /* CJK ... Yi */
+ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
+ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
+ (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
+ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
+ (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
+ (ucs >= 0xffe0 && ucs <= 0xffe6) ||
+ (ucs >= 0x20000 && ucs <= 0x2fffd) ||
+ (ucs >= 0x30000 && ucs <= 0x3fffd)));
+}
+
+#if UNUSED_CODE
+static int xo_wcswidth(const wchar_t *pwcs, size_t n)
+{
+ int w, width = 0;
+
+ for (;*pwcs && n-- > 0; pwcs++)
+ if ((w = mk_wcwidth(*pwcs)) < 0)
+ return -1;
+ else
+ width += w;
+
+ return width;
+}
+
+
+/*
+ * The following functions are the same as mk_wcwidth() and
+ * mk_wcswidth(), except that spacing characters in the East Asian
+ * Ambiguous (A) category as defined in Unicode Technical Report #11
+ * have a column width of 2. This variant might be useful for users of
+ * CJK legacy encodings who want to migrate to UCS without changing
+ * the traditional terminal character-width behaviour. It is not
+ * otherwise recommended for general use.
+ */
+int mk_wcwidth_cjk(wchar_t ucs)
+{
+ /* sorted list of non-overlapping intervals of East Asian Ambiguous
+ * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
+ static const struct interval ambiguous[] = {
+ { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
+ { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
+ { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
+ { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
+ { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
+ { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
+ { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
+ { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
+ { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
+ { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
+ { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
+ { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
+ { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
+ { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
+ { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
+ { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
+ { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
+ { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
+ { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
+ { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
+ { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
+ { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
+ { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
+ { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
+ { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
+ { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
+ { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
+ { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
+ { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
+ { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
+ { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
+ { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
+ { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
+ { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
+ { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
+ { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
+ { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
+ { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
+ { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
+ { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
+ { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
+ { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
+ { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
+ { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
+ { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
+ { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
+ { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
+ { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
+ { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
+ { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
+ { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
+ { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
+ };
+
+ /* binary search in table of non-spacing characters */
+ if (xo_bisearch(ucs, ambiguous,
+ sizeof(ambiguous) / sizeof(struct interval) - 1))
+ return 2;
+
+ return mk_wcwidth(ucs);
+}
+
+
+int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n)
+{
+ int w, width = 0;
+
+ for (;*pwcs && n-- > 0; pwcs++)
+ if ((w = mk_wcwidth_cjk(*pwcs)) < 0)
+ return -1;
+ else
+ width += w;
+
+ return width;
+}
+#endif /* UNUSED_CODE */
diff --git a/freebsd/contrib/pf/pfctl/pfctl-data.h b/freebsd/contrib/pf/pfctl/pfctl-data.h
deleted file mode 100644
index a7c7e49d..00000000
--- a/freebsd/contrib/pf/pfctl/pfctl-data.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <rtems/linkersets.h>
-
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int altqsupport);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char*anchoropt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static const char *clearopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static const char *debugopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int dev);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int first_title);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char*ifaceopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int labels);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int loadopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static const char *optiopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct pf_anchor_global pf_anchors);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static const char *pf_device);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct pf_anchor pf_main_anchor);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char*rulesopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static const char *showopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char*src_node_kill[2]);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int src_node_killers);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char*state_kill[2]);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int state_killers);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char*tableopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static const char *tblcmdopt);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static const char *tblcmdopt);
diff --git a/freebsd/contrib/pf/pfctl/pfctl_optimize-data.h b/freebsd/contrib/pf/pfctl/pfctl_optimize-data.h
deleted file mode 100644
index 1d55415a..00000000
--- a/freebsd/contrib/pf/pfctl/pfctl_optimize-data.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <rtems/linkersets.h>
-
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int add_opt_table_num);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int pf_opt_create_table_num);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct pf_rule_field pf_rule_desc[]);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int
- (*skip_comparitors[PF_SKIP_COUNT])(struct pf_rule *, struct pf_rule *));
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static const char *skip_comparitors_names[PF_SKIP_COUNT]);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct pfr_buffer table_buffer);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int table_identifier);
diff --git a/freebsd/contrib/pf/pfctl/pfctl_radix-data.h b/freebsd/contrib/pf/pfctl/pfctl_radix-data.h
deleted file mode 100644
index bcb5f3bb..00000000
--- a/freebsd/contrib/pf/pfctl/pfctl_radix-data.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <rtems/linkersets.h>
-
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char next_ch);
diff --git a/freebsd/include/arpa/nameser.h b/freebsd/include/arpa/nameser.h
index 8575989d..a1f87c6f 100644
--- a/freebsd/include/arpa/nameser.h
+++ b/freebsd/include/arpa/nameser.h
@@ -106,7 +106,7 @@ typedef enum __ns_sect {
} ns_sect;
/*%
- * Network name (compressed or not) type. Equivilent to a pointer when used
+ * Network name (compressed or not) type. Equivalent to a pointer when used
* in a function prototype. Can be const'd.
*/
typedef u_char ns_nname[NS_MAXNNAME];
diff --git a/freebsd/include/arpa/nameser_compat.h b/freebsd/include/arpa/nameser_compat.h
index 915700cf..edd7872f 100644
--- a/freebsd/include/arpa/nameser_compat.h
+++ b/freebsd/include/arpa/nameser_compat.h
@@ -67,7 +67,7 @@ typedef struct {
/* fields in third byte */
unsigned qr: 1; /*%< response flag */
unsigned opcode: 4; /*%< purpose of message */
- unsigned aa: 1; /*%< authoritive answer */
+ unsigned aa: 1; /*%< authoritative answer */
unsigned tc: 1; /*%< truncated message */
unsigned rd: 1; /*%< recursion desired */
/* fields in fourth byte */
@@ -81,7 +81,7 @@ typedef struct {
/* fields in third byte */
unsigned rd :1; /*%< recursion desired */
unsigned tc :1; /*%< truncated message */
- unsigned aa :1; /*%< authoritive answer */
+ unsigned aa :1; /*%< authoritative answer */
unsigned opcode :4; /*%< purpose of message */
unsigned qr :1; /*%< response flag */
/* fields in fourth byte */
diff --git a/freebsd/include/gssapi/gssapi.h b/freebsd/include/gssapi/gssapi.h
index 16a588e1..bd2722c9 100644
--- a/freebsd/include/gssapi/gssapi.h
+++ b/freebsd/include/gssapi/gssapi.h
@@ -31,21 +31,46 @@
#ifndef _GSSAPI_GSSAPI_H_
#define _GSSAPI_GSSAPI_H_
-/*
- * First, include stddef.h to get size_t defined.
- */
-#include <stddef.h>
+#include <sys/cdefs.h>
+#include <sys/_types.h>
-/*
- * Include stdint.h to get explicitly sized data types.
- */
-#include <stdint.h>
+#ifndef _SIZE_T_DECLARED
+typedef __size_t size_t;
+#define _SIZE_T_DECLARED
+#endif
#ifndef _SSIZE_T_DECLARED
typedef __ssize_t ssize_t;
#define _SSIZE_T_DECLARED
#endif
+/* Compatibility with Heimdal 1.5.1 */
+#ifndef GSSAPI_CPP_START
+#ifdef __cplusplus
+#define GSSAPI_CPP_START extern "C" {
+#define GSSAPI_CPP_END }
+#else
+#define GSSAPI_CPP_START
+#define GSSAPI_CPP_END
+#endif
+#endif
+
+/* Compatibility with Heimdal 1.5.1 */
+#ifndef BUILD_GSSAPI_LIB
+#define GSSAPI_LIB_FUNCTION
+#define GSSAPI_LIB_CALL
+#define GSSAPI_LIB_VARIABLE
+#endif
+
+/* Compatibility with Heimdal 1.5.1 */
+#ifndef GSSAPI_DEPRECATED_FUNCTION
+#if defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1 )))
+#define GSSAPI_DEPRECATED_FUNCTION(X) __attribute__((deprecated))
+#else
+#define GSSAPI_DEPRECATED_FUNCTION(X)
+#endif
+#endif
+
#if 0
/*
* If the platform supports the xom.h header file, it should be
@@ -67,7 +92,7 @@ typedef struct _gss_name_t *gss_name_t;
* unsigned integer supported by the platform that has at least
* 32 bits of precision.
*/
-typedef uint32_t gss_uint32;
+typedef __uint32_t gss_uint32;
#ifdef OM_STRING
@@ -89,7 +114,7 @@ typedef OM_object_identifier gss_OID_desc, *gss_OID;
*/
typedef gss_uint32 OM_uint32;
-typedef uint64_t OM_uint64;
+typedef __uint64_t OM_uint64;
typedef struct gss_OID_desc_struct {
OM_uint32 length;
@@ -756,11 +781,11 @@ OM_uint32 gss_release_oid
OM_uint32 gss_decapsulate_token
(const gss_buffer_t, /* mechanism independent token */
gss_OID, /* desired mechanism */
- gss_buffer_t /* decapsulated mechanism dependant token */
+ gss_buffer_t /* decapsulated mechanism dependent token */
);
OM_uint32 gss_encapsulate_token
- (const gss_buffer_t, /* mechanism dependant token */
+ (const gss_buffer_t, /* mechanism dependent token */
gss_OID, /* desired mechanism */
gss_buffer_t /* encapsulated mechanism independent token */
);
diff --git a/freebsd/include/ifaddrs.h b/freebsd/include/ifaddrs.h
index f0911a48..e768d50c 100644
--- a/freebsd/include/ifaddrs.h
+++ b/freebsd/include/ifaddrs.h
@@ -31,7 +31,7 @@
struct ifaddrs {
struct ifaddrs *ifa_next;
char *ifa_name;
- u_int ifa_flags;
+ unsigned int ifa_flags;
struct sockaddr *ifa_addr;
struct sockaddr *ifa_netmask;
struct sockaddr *ifa_dstaddr;
diff --git a/freebsd/include/netdb.h b/freebsd/include/netdb.h
index 2c2e4b3f..2da7755d 100644
--- a/freebsd/include/netdb.h
+++ b/freebsd/include/netdb.h
@@ -60,6 +60,16 @@
#include <sys/cdefs.h>
#include <sys/_types.h>
+#ifndef _IN_ADDR_T_DECLARED
+typedef __uint32_t in_addr_t;
+#define _IN_ADDR_T_DECLARED
+#endif
+
+#ifndef _IN_PORT_T_DECLARED
+typedef __uint16_t in_port_t;
+#define _IN_PORT_T_DECLARED
+#endif
+
#ifndef _SIZE_T_DECLARED
typedef __size_t size_t;
#define _SIZE_T_DECLARED
@@ -122,7 +132,7 @@ struct protoent {
struct addrinfo {
int ai_flags; /* AI_PASSIVE, AI_CANONNAME, AI_NUMERICHOST */
- int ai_family; /* PF_xxx */
+ int ai_family; /* AF_xxx */
int ai_socktype; /* SOCK_xxx */
int ai_protocol; /* 0 or IPPROTO_xxx for IPv4 and IPv6 */
socklen_t ai_addrlen; /* length of ai_addr */
@@ -131,6 +141,8 @@ struct addrinfo {
struct addrinfo *ai_next; /* next structure in linked list */
};
+#define IPPORT_RESERVED 1024
+
/*
* Error return codes from gethostbyname() and gethostbyaddr()
* (left in h_errno).
@@ -179,7 +191,7 @@ struct addrinfo {
/* valid flags for addrinfo (not a standard def, apps should not use it) */
#define AI_MASK \
(AI_PASSIVE | AI_CANONNAME | AI_NUMERICHOST | AI_NUMERICSERV | \
- AI_ADDRCONFIG)
+ AI_ADDRCONFIG | AI_ALL | AI_V4MAPPED)
#define AI_ALL 0x00000100 /* IPv6 and IPv4-mapped (with AI_V4MAPPED) */
#define AI_V4MAPPED_CFG 0x00000200 /* accept IPv4-mapped if kernel supports */
@@ -202,9 +214,7 @@ struct addrinfo {
#define NI_NAMEREQD 0x00000004
#define NI_NUMERICSERV 0x00000008
#define NI_DGRAM 0x00000010
-#if 0 /* obsolete */
-#define NI_WITHSCOPEID 0x00000020
-#endif
+#define NI_NUMERICSCOPE 0x00000020
/*
* Scope delimit character
@@ -263,6 +273,7 @@ int getnetbyname_r(const char *, struct netent *, char *, size_t,
int getnetent_r(struct netent *, char *, size_t, struct netent **,
int *);
int getnetgrent(char **, char **, char **);
+int getnetgrent_r(char **, char **, char **, char *, size_t);
int getprotobyname_r(const char *, struct protoent *, char *,
size_t, struct protoent **);
int getprotobynumber_r(int, struct protoent *, char *, size_t,
@@ -276,7 +287,7 @@ int getservbyport_r(int, const char *, struct servent *, char *,
int getservent_r(struct servent *, char *, size_t,
struct servent **);
void herror(const char *);
-__const char *hstrerror(int);
+const char *hstrerror(int);
int innetgr(const char *, const char *, const char *, const char *);
void setnetgrent(const char *);
#endif
diff --git a/freebsd/include/resolv.h b/freebsd/include/resolv.h
index e108635c..412e4a91 100644
--- a/freebsd/include/resolv.h
+++ b/freebsd/include/resolv.h
@@ -184,7 +184,7 @@ struct __res_state {
u_int16_t nscount;
u_int16_t nstimes[MAXNS]; /*%< ms. */
int nssocks[MAXNS];
- struct __res_state_ext *ext; /*%< extention for IPv6 */
+ struct __res_state_ext *ext; /*%< extension for IPv6 */
} _ext;
} _u;
u_char *_rnd; /*%< PRIVATE: random state */
diff --git a/freebsd/include/rpc/rpcent.h b/freebsd/include/rpc/rpcent.h
index 405ba678..c1650fa4 100644
--- a/freebsd/include/rpc/rpcent.h
+++ b/freebsd/include/rpc/rpcent.h
@@ -56,7 +56,7 @@ __BEGIN_DECLS
* These interfaces are currently implemented through nsswitch and are
* MT-safe.
*/
-extern struct rpcent *getrpcbyname(char *);
+extern struct rpcent *getrpcbyname(const char *);
extern struct rpcent *getrpcbynumber(int);
extern struct rpcent *getrpcent(void);
extern void setrpcent(int);
diff --git a/freebsd/include/rpc/svc.h b/freebsd/include/rpc/svc.h
index 51f278ae..c7989b03 100644
--- a/freebsd/include/rpc/svc.h
+++ b/freebsd/include/rpc/svc.h
@@ -90,6 +90,7 @@ enum xprt_stat {
*/
typedef struct __rpc_svcxprt {
int xp_fd;
+#define xp_sock xp_fd
u_short xp_port; /* associated port number */
const struct xp_ops {
/* receive incoming requests */
@@ -226,7 +227,7 @@ struct svc_req {
* const SVCXPRT *xprt;
* const rpcprog_t prog;
* const rpcvers_t vers;
- * const void (*dispatch)();
+ * const void (*dispatch)(struct svc_req *, SVCXPRT *);
* const struct netconfig *nconf;
*/
@@ -314,7 +315,7 @@ __END_DECLS
* Somebody has to wait for incoming requests and then call the correct
* service routine. The routine svc_run does infinite waiting; i.e.,
* svc_run never returns.
- * Since another (co-existant) package may wish to selectively wait for
+ * Since another (co-existent) package may wish to selectively wait for
* incoming calls or other events outside of the rpc architecture, the
* routine svc_getreq is provided. It must be passed readfds, the
* "in-place" results of a select system call (see select, section 2).
@@ -376,7 +377,7 @@ __BEGIN_DECLS
extern int svc_create(void (*)(struct svc_req *, SVCXPRT *),
const rpcprog_t, const rpcvers_t, const char *);
/*
- * void (*dispatch)(); -- dispatch routine
+ * void (*dispatch)(struct svc_req *, SVCXPRT *);
* const rpcprog_t prognum; -- program number
* const rpcvers_t versnum; -- version number
* const char *nettype; -- network type
@@ -392,7 +393,7 @@ extern SVCXPRT *svc_tp_create(void (*)(struct svc_req *, SVCXPRT *),
const rpcprog_t, const rpcvers_t,
const struct netconfig *);
/*
- * void (*dispatch)(); -- dispatch routine
+ * void (*dispatch)(struct svc_req *, SVCXPRT *);
* const rpcprog_t prognum; -- program number
* const rpcvers_t versnum; -- version number
* const struct netconfig *nconf; -- netconfig structure
diff --git a/freebsd/include/rpc/xdr.h b/freebsd/include/rpc/xdr.h
index 9456f70c..daee333f 100644
--- a/freebsd/include/rpc/xdr.h
+++ b/freebsd/include/rpc/xdr.h
@@ -219,15 +219,11 @@ xdr_putint32(XDR *xdrs, int32_t *ip)
(*(xdrs)->x_ops->x_control)(xdrs, req, op)
#define xdr_control(xdrs, req, op) XDR_CONTROL(xdrs, req, op)
-/*
- * Solaris strips the '_t' from these types -- not sure why.
- * But, let's be compatible.
- */
-#define xdr_rpcvers(xdrs, versp) xdr_u_int32(xdrs, versp)
-#define xdr_rpcprog(xdrs, progp) xdr_u_int32(xdrs, progp)
-#define xdr_rpcproc(xdrs, procp) xdr_u_int32(xdrs, procp)
-#define xdr_rpcprot(xdrs, protp) xdr_u_int32(xdrs, protp)
-#define xdr_rpcport(xdrs, portp) xdr_u_int32(xdrs, portp)
+#define xdr_rpcvers(xdrs, versp) xdr_u_int32_t(xdrs, versp)
+#define xdr_rpcprog(xdrs, progp) xdr_u_int32_t(xdrs, progp)
+#define xdr_rpcproc(xdrs, procp) xdr_u_int32_t(xdrs, procp)
+#define xdr_rpcprot(xdrs, protp) xdr_u_int32_t(xdrs, protp)
+#define xdr_rpcport(xdrs, portp) xdr_u_int32_t(xdrs, portp)
/*
* Support struct for discriminated unions.
@@ -355,7 +351,7 @@ extern void xdrrec_create(XDR *, u_int, u_int, void *,
int (*)(void *, void *, int));
/* make end of xdr record */
-extern bool_t xdrrec_endofrecord(XDR *, int);
+extern bool_t xdrrec_endofrecord(XDR *, bool_t);
/* move to beginning of next record */
extern bool_t xdrrec_skiprecord(XDR *);
diff --git a/freebsd/include/rpcsvc/nis.x b/freebsd/include/rpcsvc/nis.x
index 7ebb19df..ee822d03 100644
--- a/freebsd/include/rpcsvc/nis.x
+++ b/freebsd/include/rpcsvc/nis.x
@@ -1,31 +1,32 @@
-%/*
-% * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
-% * unrestricted use provided that this legend is included on all tape
-% * media and as a part of the software program in whole or part. Users
-% * may copy or modify Sun RPC without charge, but are not authorized
-% * to license or distribute it to anyone else except as part of a product or
-% * program developed by the user or with the express written consent of
-% * Sun Microsystems, Inc.
-% *
-% * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
-% * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
-% * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
-% *
-% * Sun RPC is provided with no support and without any obligation on the
-% * part of Sun Microsystems, Inc. to assist in its use, correction,
-% * modification or enhancement.
+%/*-
+% * Copyright (c) 2010, Oracle America, Inc.
% *
-% * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
-% * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
-% * OR ANY PART THEREOF.
+% * Redistribution and use in source and binary forms, with or without
+% * modification, are permitted provided that the following conditions are
+% * met:
% *
-% * In no event will Sun Microsystems, Inc. be liable for any lost revenue
-% * or profits or other special, indirect and consequential damages, even if
-% * Sun has been advised of the possibility of such damages.
+% * * Redistributions of source code must retain the above copyright
+% * notice, this list of conditions and the following disclaimer.
+% * * Redistributions in binary form must reproduce the above
+% * copyright notice, this list of conditions and the following
+% * disclaimer in the documentation and/or other materials
+% * provided with the distribution.
+% * * Neither the name of the "Oracle America, Inc." nor the names of its
+% * contributors may be used to endorse or promote products derived
+% * from this software without specific prior written permission.
% *
-% * Sun Microsystems, Inc.
-% * 2550 Garcia Avenue
-% * Mountain View, California 94043
+% * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+% * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+% * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+% * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+% * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+% * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+% * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+% * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+% * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+% * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+% * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+% * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
% */
#ifndef RPC_HDR
@@ -192,8 +193,8 @@ struct ping_args {
* note that modifications will appear as two entries, for names, they have
* an "OLD" entry followed by a "NEW" entry. For entries in tables, there
* is a remove followed by an add. It is done this way so that we can read
- * the log backwards to back out transactions and forwards to propogate
- * updated.
+ * the log backwards to back out transactions and forwards to propagate
+ * updates.
*/
enum log_entry_t {
LOG_NOP = 0,
@@ -399,10 +400,7 @@ program NIS_PROG {
%#define OARIGHTS(d, n) (((d)->do_armask.do_armask_val+n)->oa_rights)
%#define WORLD_DEFAULT (NIS_READ_ACC)
%#define GROUP_DEFAULT (NIS_READ_ACC << 8)
-%#define OWNER_DEFAULT ((NIS_READ_ACC +\
- NIS_MODIFY_ACC +\
- NIS_CREATE_ACC +\
- NIS_DESTROY_ACC) << 16)
+%#define OWNER_DEFAULT ((NIS_READ_ACC + NIS_MODIFY_ACC + NIS_CREATE_ACC + NIS_DESTROY_ACC) << 16)
%#define DEFAULT_RIGHTS (WORLD_DEFAULT | GROUP_DEFAULT | OWNER_DEFAULT)
%
%/* Result manipulation defines ... */
@@ -431,10 +429,8 @@ program NIS_PROG {
% * these definitions they take an nis_object *, and an int and return
% * a u_char * for Value, and an int for length.
% */
-%#define ENTRY_VAL(obj, col) \
- (obj)->EN_data.en_cols.en_cols_val[col].ec_value.ec_value_val
-%#define ENTRY_LEN(obj, col) \
- (obj)->EN_data.en_cols.en_cols_val[col].ec_value.ec_value_len
+%#define ENTRY_VAL(obj, col) (obj)->EN_data.en_cols.en_cols_val[col].ec_value.ec_value_val
+%#define ENTRY_LEN(obj, col) (obj)->EN_data.en_cols.en_cols_val[col].ec_value.ec_value_len
%
%#ifdef __cplusplus
%}
diff --git a/freebsd/include/rpcsvc/nis_tags.h b/freebsd/include/rpcsvc/nis_tags.h
index 0eaee6d4..7ee630e6 100644
--- a/freebsd/include/rpcsvc/nis_tags.h
+++ b/freebsd/include/rpcsvc/nis_tags.h
@@ -1,30 +1,32 @@
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
- *
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
- *
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
- *
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
@@ -41,6 +43,7 @@
#ifndef _RPCSVC_NIS_TAGS_H
#define _RPCSVC_NIS_TAGS_H
+/* $FreeBSD$ */
/* From: #pragma ident "@(#)nis_tags.h 1.10 94/05/03 SMI" */
/* from file: zns_tags.h 1.7 Copyright (c) 1990 Sun Microsystems */
diff --git a/freebsd/lib/libc/db/btree/bt_open.c b/freebsd/lib/libc/db/btree/bt_open.c
index b249f5cd..ce393bbb 100644
--- a/freebsd/lib/libc/db/btree/bt_open.c
+++ b/freebsd/lib/libc/db/btree/bt_open.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
#include "un-namespace.h"
+#include "libc_private.h"
#include <db.h>
#include "btree.h"
@@ -198,7 +199,7 @@ __bt_open(const char *fname, int flags, int mode, const BTREEINFO *openinfo, int
goto einval;
}
- if ((t->bt_fd = _open(fname, flags, mode)) < 0)
+ if ((t->bt_fd = _open(fname, flags | O_CLOEXEC, mode)) < 0)
goto err;
} else {
@@ -209,9 +210,6 @@ __bt_open(const char *fname, int flags, int mode, const BTREEINFO *openinfo, int
F_SET(t, B_INMEM);
}
- if (_fcntl(t->bt_fd, F_SETFD, 1) == -1)
- goto err;
-
if (_fstat(t->bt_fd, &sb))
goto err;
if (sb.st_size) {
@@ -281,7 +279,7 @@ __bt_open(const char *fname, int flags, int mode, const BTREEINFO *openinfo, int
b.cachesize = b.psize * MINCACHE;
/* Calculate number of pages to cache. */
- ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize;
+ ncache = howmany(b.cachesize, t->bt_psize);
/*
* The btree data structure requires that at least two keys can fit on
@@ -406,10 +404,10 @@ tmp(void)
}
(void)sigfillset(&set);
- (void)_sigprocmask(SIG_BLOCK, &set, &oset);
- if ((fd = mkstemp(path)) != -1)
+ (void)__libc_sigprocmask(SIG_BLOCK, &set, &oset);
+ if ((fd = mkostemp(path, O_CLOEXEC)) != -1)
(void)unlink(path);
- (void)_sigprocmask(SIG_SETMASK, &oset, NULL);
+ (void)__libc_sigprocmask(SIG_SETMASK, &oset, NULL);
return(fd);
}
diff --git a/freebsd/lib/libc/db/btree/bt_put.c b/freebsd/lib/libc/db/btree/bt_put.c
index 885562b3..d0b54cf4 100644
--- a/freebsd/lib/libc/db/btree/bt_put.c
+++ b/freebsd/lib/libc/db/btree/bt_put.c
@@ -57,7 +57,7 @@ static EPG *bt_fast(BTREE *, const DBT *, const DBT *, int *);
* dbp: pointer to access method
* key: key
* data: data
- * flag: R_NOOVERWRITE
+ * flag: R_NOOVERWRITE, R_SETCURSOR, R_CURSOR
*
* Returns:
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
@@ -93,6 +93,7 @@ __bt_put(const DB *dbp, DBT *key, const DBT *data, u_int flags)
switch (flags) {
case 0:
case R_NOOVERWRITE:
+ case R_SETCURSOR:
break;
case R_CURSOR:
/*
diff --git a/freebsd/lib/libc/db/btree/bt_split.c b/freebsd/lib/libc/db/btree/bt_split.c
index 5ea902dd..7d47d660 100644
--- a/freebsd/lib/libc/db/btree/bt_split.c
+++ b/freebsd/lib/libc/db/btree/bt_split.c
@@ -38,7 +38,6 @@ static char sccsid[] = "@(#)bt_split.c 8.10 (Berkeley) 1/9/95";
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <limits.h>
@@ -238,9 +237,12 @@ __bt_split(BTREE *t, PAGE *sp, const DBT *key, const DBT *data, int flags,
WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
rchild->pgno, bl->flags & P_BIGKEY);
memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
- if (bl->flags & P_BIGKEY &&
- bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
- goto err1;
+ if (bl->flags & P_BIGKEY) {
+ pgno_t pgno;
+ memcpy(&pgno, bl->bytes, sizeof(pgno));
+ if (bt_preserve(t, pgno) == RET_ERROR)
+ goto err1;
+ }
break;
case P_RINTERNAL:
/*
@@ -546,9 +548,12 @@ bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
* If the key is on an overflow page, mark the overflow chain
* so it isn't deleted when the leaf copy of the key is deleted.
*/
- if (bl->flags & P_BIGKEY &&
- bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
- return (RET_ERROR);
+ if (bl->flags & P_BIGKEY) {
+ pgno_t pgno;
+ memcpy(&pgno, bl->bytes, sizeof(pgno));
+ if (bt_preserve(t, pgno) == RET_ERROR)
+ return (RET_ERROR);
+ }
break;
case P_BINTERNAL:
bi = GETBINTERNAL(r, 0);
diff --git a/freebsd/lib/libc/db/db/db.c b/freebsd/lib/libc/db/db/db.c
index ffd083c7..06b45935 100644
--- a/freebsd/lib/libc/db/db/db.c
+++ b/freebsd/lib/libc/db/db/db.c
@@ -46,6 +46,10 @@ __FBSDID("$FreeBSD$");
static int __dberr(void);
+#ifndef O_CLOEXEC
+#define O_CLOEXEC 0
+#endif
+
DB *
dbopen(const char *fname, int flags, int mode, DBTYPE type, const void *openinfo)
{
@@ -53,7 +57,7 @@ dbopen(const char *fname, int flags, int mode, DBTYPE type, const void *openinfo
#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN)
#define USE_OPEN_FLAGS \
(O_CREAT | O_EXCL | O_EXLOCK | O_NOFOLLOW | O_NONBLOCK | \
- O_RDONLY | O_RDWR | O_SHLOCK | O_SYNC | O_TRUNC)
+ O_RDONLY | O_RDWR | O_SHLOCK | O_SYNC | O_TRUNC | O_CLOEXEC)
if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0)
switch (type) {
diff --git a/freebsd/lib/libc/db/recno/rec_open.c b/freebsd/lib/libc/db/recno/rec_open.c
index 81945fea..1ad0bb6a 100644
--- a/freebsd/lib/libc/db/recno/rec_open.c
+++ b/freebsd/lib/libc/db/recno/rec_open.c
@@ -66,7 +66,7 @@ __rec_open(const char *fname, int flags, int mode, const RECNOINFO *openinfo,
int rfd, sverrno;
/* Open the user's file -- if this fails, we're done. */
- if (fname != NULL && (rfd = _open(fname, flags, mode)) < 0)
+ if (fname != NULL && (rfd = _open(fname, flags | O_CLOEXEC, mode)) < 0)
return (NULL);
/* Create a btree in memory (backed by disk). */
diff --git a/freebsd/lib/libc/db/recno/rec_put.c b/freebsd/lib/libc/db/recno/rec_put.c
index a667022e..f8253aa7 100644
--- a/freebsd/lib/libc/db/recno/rec_put.c
+++ b/freebsd/lib/libc/db/recno/rec_put.c
@@ -142,8 +142,7 @@ einval: errno = EINVAL;
return (RET_ERROR);
if (nrec > t->bt_nrecs + 1) {
if (F_ISSET(t, R_FIXLEN)) {
- if ((tdata.data =
- (void *)malloc(t->bt_reclen)) == NULL)
+ if ((tdata.data = malloc(t->bt_reclen)) == NULL)
return (RET_ERROR);
tdata.size = t->bt_reclen;
memset(tdata.data, t->bt_bval, tdata.size);
@@ -210,7 +209,7 @@ __rec_iput(BTREE *t, recno_t nrec, const DBT *data, u_int flags)
return (RET_ERROR);
tdata.data = db;
tdata.size = NOVFLSIZE;
- *(pgno_t *)db = pg;
+ memcpy(db, &pg, sizeof(pg));
*(u_int32_t *)(db + sizeof(pgno_t)) = data->size;
dflags = P_BIGDATA;
data = &tdata;
diff --git a/freebsd/lib/libc/gen/err.c b/freebsd/lib/libc/gen/err.c
index b83a86ed..d41d43f7 100644
--- a/freebsd/lib/libc/gen/err.c
+++ b/freebsd/lib/libc/gen/err.c
@@ -98,10 +98,7 @@ _err(int eval, const char *fmt, ...)
}
void
-verr(eval, fmt, ap)
- int eval;
- const char *fmt;
- va_list ap;
+verr(int eval, const char *fmt, va_list ap)
{
verrc(eval, errno, fmt, ap);
}
@@ -118,7 +115,7 @@ errc(int eval, int code, const char *fmt, ...)
void
verrc(int eval, int code, const char *fmt, va_list ap)
{
- if (err_file == 0)
+ if (err_file == NULL)
err_set_file((FILE *)0);
fprintf(err_file, "%s: ", _getprogname());
if (fmt != NULL) {
@@ -145,7 +142,7 @@ errx(int eval, const char *fmt, ...)
void
verrx(int eval, const char *fmt, va_list ap)
{
- if (err_file == 0)
+ if (err_file == NULL)
err_set_file((FILE *)0);
fprintf(err_file, "%s: ", _getprogname());
if (fmt != NULL)
@@ -187,7 +184,7 @@ warnc(int code, const char *fmt, ...)
void
vwarnc(int code, const char *fmt, va_list ap)
{
- if (err_file == 0)
+ if (err_file == NULL)
err_set_file((FILE *)0);
fprintf(err_file, "%s: ", _getprogname());
if (fmt != NULL) {
@@ -209,7 +206,7 @@ warnx(const char *fmt, ...)
void
vwarnx(const char *fmt, va_list ap)
{
- if (err_file == 0)
+ if (err_file == NULL)
err_set_file((FILE *)0);
fprintf(err_file, "%s: ", _getprogname());
if (fmt != NULL)
diff --git a/freebsd/lib/libc/gen/feature_present.c b/freebsd/lib/libc/gen/feature_present.c
index 9404a063..6eb44887 100644
--- a/freebsd/lib/libc/gen/feature_present.c
+++ b/freebsd/lib/libc/gen/feature_present.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
/*
* Returns true if the named feature is present in the currently
diff --git a/freebsd/lib/libc/gen/getdomainname.c b/freebsd/lib/libc/gen/getdomainname.c
index 39234deb..8aebb257 100644
--- a/freebsd/lib/libc/gen/getdomainname.c
+++ b/freebsd/lib/libc/gen/getdomainname.c
@@ -41,12 +41,10 @@ __FBSDID("$FreeBSD$");
#include <unistd.h>
int
-getdomainname(name, namelen)
- char *name;
#ifndef __rtems__
- int namelen;
+getdomainname(char *name, int namelen)
#else /* __rtems__ */
- size_t namelen;
+getdomainname(char *name, size_t namelen)
#endif /* __rtems__ */
{
int mib[2];
diff --git a/freebsd/lib/libc/gen/gethostname.c b/freebsd/lib/libc/gen/gethostname.c
index ea0532ea..bd236852 100644
--- a/freebsd/lib/libc/gen/gethostname.c
+++ b/freebsd/lib/libc/gen/gethostname.c
@@ -42,9 +42,7 @@ __FBSDID("$FreeBSD$");
#include <unistd.h>
int
-gethostname(name, namelen)
- char *name;
- size_t namelen;
+gethostname(char *name, size_t namelen)
{
int mib[2];
diff --git a/freebsd/lib/libc/include/libc_private.h b/freebsd/lib/libc/include/libc_private.h
index cebc035e..9fe6ab01 100644
--- a/freebsd/lib/libc/include/libc_private.h
+++ b/freebsd/lib/libc/include/libc_private.h
@@ -87,7 +87,7 @@ void _rtld_error(const char *fmt, ...);
#define FUNLOCKFILE(fp) if (__isthreaded) _funlockfile(fp)
struct _spinlock;
-extern struct _spinlock __stdio_thread_lock;
+extern struct _spinlock __stdio_thread_lock __hidden;
#define STDIO_THREAD_LOCK() \
do { \
if (__isthreaded) \
@@ -99,6 +99,9 @@ do { \
_SPINUNLOCK(&__stdio_thread_lock); \
} while (0)
+void __libc_spinlock_stub(struct _spinlock *);
+void __libc_spinunlock_stub(struct _spinlock *);
+
/*
* Indexes into the pthread jump table.
*
@@ -169,6 +172,9 @@ typedef enum {
PJT_CLEANUP_PUSH_IMP,
PJT_CANCEL_ENTER,
PJT_CANCEL_LEAVE,
+ PJT_MUTEX_CONSISTENT,
+ PJT_MUTEXATTR_GETROBUST,
+ PJT_MUTEXATTR_SETROBUST,
PJT_MAX
} pjt_index_t;
@@ -177,6 +183,59 @@ typedef pthread_func_t pthread_func_entry_t[2];
extern pthread_func_entry_t __thr_jtable[];
+void __set_error_selector(int *(*arg)(void));
+int _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
+ void *(calloc_cb)(__size_t, __size_t));
+
+typedef int (*interpos_func_t)(void);
+interpos_func_t *__libc_interposing_slot(int interposno);
+extern interpos_func_t __libc_interposing[] __hidden;
+
+enum {
+ INTERPOS_accept,
+ INTERPOS_accept4,
+ INTERPOS_aio_suspend,
+ INTERPOS_close,
+ INTERPOS_connect,
+ INTERPOS_fcntl,
+ INTERPOS_fsync,
+ INTERPOS_fork,
+ INTERPOS_msync,
+ INTERPOS_nanosleep,
+ INTERPOS_openat,
+ INTERPOS_poll,
+ INTERPOS_pselect,
+ INTERPOS_recvfrom,
+ INTERPOS_recvmsg,
+ INTERPOS_select,
+ INTERPOS_sendmsg,
+ INTERPOS_sendto,
+ INTERPOS_setcontext,
+ INTERPOS_sigaction,
+ INTERPOS_sigprocmask,
+ INTERPOS_sigsuspend,
+ INTERPOS_sigwait,
+ INTERPOS_sigtimedwait,
+ INTERPOS_sigwaitinfo,
+ INTERPOS_swapcontext,
+ INTERPOS_system,
+ INTERPOS_tcdrain,
+ INTERPOS_read,
+ INTERPOS_readv,
+ INTERPOS_wait4,
+ INTERPOS_write,
+ INTERPOS_writev,
+ INTERPOS__pthread_mutex_init_calloc_cb,
+ INTERPOS_spinlock,
+ INTERPOS_spinunlock,
+ INTERPOS_kevent,
+ INTERPOS_wait6,
+ INTERPOS_ppoll,
+ INTERPOS_map_stacks_exec,
+ INTERPOS_fdatasync,
+ INTERPOS_MAX
+};
+
/*
* yplib internal interfaces
*/
@@ -213,48 +272,123 @@ extern const char *__progname;
void _malloc_thread_cleanup(void);
/*
+ * This function is used by the threading libraries to notify libc that a
+ * thread is exiting, so its thread-local dtors should be called.
+ */
+void __cxa_thread_call_dtors(void);
+
+/*
* These functions are used by the threading libraries in order to protect
* malloc across fork().
*/
void _malloc_prefork(void);
void _malloc_postfork(void);
+void _malloc_first_thread(void);
+
/*
* Function to clean up streams, called from abort() and exit().
*/
-extern void (*__cleanup)(void);
+extern void (*__cleanup)(void) __hidden;
/*
* Get kern.osreldate to detect ABI revisions. Explicitly
- * ignores value of $OSVERSION and caches result. Prototypes
- * for the wrapped "new" pad-less syscalls are here for now.
+ * ignores value of $OSVERSION and caches result.
*/
-extern int __getosreldate(void);
+int __getosreldate(void);
#include <sys/_types.h>
-/* Without pad */
-extern __off_t __sys_lseek(int, __off_t, int);
-extern int __sys_ftruncate(int, __off_t);
-extern int __sys_truncate(const char *, __off_t);
-extern __ssize_t __sys_pread(int, void *, __size_t, __off_t);
-extern __ssize_t __sys_pwrite(int, const void *, __size_t, __off_t);
-extern void * __sys_mmap(void *, __size_t, int, int, int, __off_t);
-
-/* With pad */
-extern __off_t __sys_freebsd6_lseek(int, int, __off_t, int);
-extern int __sys_freebsd6_ftruncate(int, int, __off_t);
-extern int __sys_freebsd6_truncate(const char *, int, __off_t);
-extern __ssize_t __sys_freebsd6_pread(int, void *, __size_t, int, __off_t);
-extern __ssize_t __sys_freebsd6_pwrite(int, const void *, __size_t, int, __off_t);
-extern void * __sys_freebsd6_mmap(void *, __size_t, int, int, int, int, __off_t);
-
-/* Without back-compat translation */
-extern int __sys_fcntl(int, int, ...);
+#include <sys/_sigset.h>
+struct aiocb;
+struct fd_set;
+struct iovec;
+struct kevent;
+struct msghdr;
+struct pollfd;
+struct rusage;
+struct sigaction;
+struct sockaddr;
struct timespec;
struct timeval;
struct timezone;
-int __sys_gettimeofday(struct timeval *, struct timezone *);
-int __sys_clock_gettime(__clockid_t, struct timespec *ts);
+struct __siginfo;
+struct __ucontext;
+struct __wrusage;
+enum idtype;
+int __sys_aio_suspend(const struct aiocb * const[], int,
+ const struct timespec *);
+int __sys_accept(int, struct sockaddr *, __socklen_t *);
+int __sys_accept4(int, struct sockaddr *, __socklen_t *, int);
+int __sys_clock_gettime(__clockid_t, struct timespec *ts);
+int __sys_close(int);
+int __sys_connect(int, const struct sockaddr *, __socklen_t);
+int __sys_fcntl(int, int, ...);
+int __sys_fdatasync(int);
+int __sys_fsync(int);
+__pid_t __sys_fork(void);
+int __sys_ftruncate(int, __off_t);
+int __sys_gettimeofday(struct timeval *, struct timezone *);
+int __sys_kevent(int, const struct kevent *, int, struct kevent *,
+ int, const struct timespec *);
+__off_t __sys_lseek(int, __off_t, int);
+void *__sys_mmap(void *, __size_t, int, int, int, __off_t);
+int __sys_msync(void *, __size_t, int);
+int __sys_nanosleep(const struct timespec *, struct timespec *);
+int __sys_open(const char *, int, ...);
+int __sys_openat(int, const char *, int, ...);
+int __sys_pselect(int, struct fd_set *, struct fd_set *,
+ struct fd_set *, const struct timespec *,
+ const __sigset_t *);
+int __sys_poll(struct pollfd *, unsigned, int);
+int __sys_ppoll(struct pollfd *, unsigned, const struct timespec *,
+ const __sigset_t *);
+__ssize_t __sys_pread(int, void *, __size_t, __off_t);
+__ssize_t __sys_pwrite(int, const void *, __size_t, __off_t);
+__ssize_t __sys_read(int, void *, __size_t);
+__ssize_t __sys_readv(int, const struct iovec *, int);
+__ssize_t __sys_recv(int, void *, __size_t, int);
+__ssize_t __sys_recvfrom(int, void *, __size_t, int, struct sockaddr *,
+ __socklen_t *);
+__ssize_t __sys_recvmsg(int, struct msghdr *, int);
+int __sys_select(int, struct fd_set *, struct fd_set *,
+ struct fd_set *, struct timeval *);
+__ssize_t __sys_sendmsg(int, const struct msghdr *, int);
+__ssize_t __sys_sendto(int, const void *, __size_t, int,
+ const struct sockaddr *, __socklen_t);
+int __sys_setcontext(const struct __ucontext *);
+int __sys_sigaction(int, const struct sigaction *,
+ struct sigaction *);
+int __sys_sigprocmask(int, const __sigset_t *, __sigset_t *);
+int __sys_sigsuspend(const __sigset_t *);
+int __sys_sigtimedwait(const __sigset_t *, struct __siginfo *,
+ const struct timespec *);
+int __sys_sigwait(const __sigset_t *, int *);
+int __sys_sigwaitinfo(const __sigset_t *, struct __siginfo *);
+int __sys_swapcontext(struct __ucontext *,
+ const struct __ucontext *);
+int __sys_thr_kill(long, int);
+int __sys_thr_self(long *);
+int __sys_truncate(const char *, __off_t);
+__pid_t __sys_wait4(__pid_t, int *, int, struct rusage *);
+__pid_t __sys_wait6(enum idtype, __id_t, int *, int,
+ struct __wrusage *, struct __siginfo *);
+__ssize_t __sys_write(int, const void *, __size_t);
+__ssize_t __sys_writev(int, const struct iovec *, int);
+
+int __libc_sigaction(int, const struct sigaction *,
+ struct sigaction *) __hidden;
+int __libc_sigprocmask(int, const __sigset_t *, __sigset_t *)
+ __hidden;
+int __libc_sigsuspend(const __sigset_t *) __hidden;
+int __libc_sigwait(const __sigset_t * __restrict,
+ int * restrict sig);
+int __libc_system(const char *);
+int __libc_tcdrain(int);
+int __fcntl_compat(int fd, int cmd, ...);
+
+int __sys_futimens(int fd, const struct timespec *times) __hidden;
+int __sys_utimensat(int fd, const char *path,
+ const struct timespec *times, int flag) __hidden;
/* execve() with PATH processing to implement posix_spawnp() */
int _execvpe(const char *, char * const *, char * const *);
@@ -263,6 +397,7 @@ int _elf_aux_info(int aux, void *buf, int buflen);
struct dl_phdr_info;
int __elf_phdr_match_addr(struct dl_phdr_info *, void *);
void __init_elf_aux_vector(void);
+void __libc_map_stacks_exec(void);
void _pthread_cancel_enter(int);
void _pthread_cancel_leave(int);
diff --git a/freebsd/lib/libc/include/namespace.h b/freebsd/lib/libc/include/namespace.h
index 8b2f04b1..8af80817 100644
--- a/freebsd/lib/libc/include/namespace.h
+++ b/freebsd/lib/libc/include/namespace.h
@@ -35,16 +35,199 @@
*/
-#define _pthread_getspecific pthread_getspecific
-#define _pthread_key_create pthread_key_create
-#define _pthread_main_np pthread_main_np
-#define _pthread_once pthread_once
-#define _pthread_setspecific pthread_setspecific
-#define _pthread_mutex_trylock pthread_mutex_trylock
-#define _pthread_mutex_unlock pthread_mutex_unlock
-#define _pthread_rwlock_rdlock pthread_rwlock_rdlock
-#define _pthread_rwlock_unlock pthread_rwlock_unlock
-#define _pthread_rwlock_wrlock pthread_rwlock_wrlock
+/*
+ * Prototypes for syscalls/functions that need to be overridden
+ * in libc_r/libpthread.
+ */
+#ifndef __rtems__
+#define accept _accept
+#define __acl_aclcheck_fd ___acl_aclcheck_fd
+#define __acl_delete_fd ___acl_delete_fd
+#define __acl_get_fd ___acl_get_fd
+#define __acl_set_fd ___acl_set_fd
+#define bind _bind
+#define __cap_get_fd ___cap_get_fd
+#define __cap_set_fd ___cap_set_fd
+#define close _close
+#define connect _connect
+#define dup _dup
+#define dup2 _dup2
+#define execve _execve
+#define fcntl _fcntl
+/*#define flock _flock */
+#define flockfile _flockfile
+#define fpathconf _fpathconf
+#define fstat _fstat
+#define fstatfs _fstatfs
+#define fsync _fsync
+#define funlockfile _funlockfile
+#define getdirentries _getdirentries
+#define getlogin _getlogin
+#define getpeername _getpeername
+#define getprogname _getprogname
+#define getsockname _getsockname
+#define getsockopt _getsockopt
+#define ioctl _ioctl
+/* #define kevent _kevent */
+#define listen _listen
+#define nanosleep _nanosleep
+#define open _open
+#define openat _openat
+#define poll _poll
+#define pthread_atfork _pthread_atfork
+#define pthread_attr_destroy _pthread_attr_destroy
+#define pthread_attr_get_np _pthread_attr_get_np
+#define pthread_attr_getaffinity_np _pthread_attr_getaffinity_np
+#define pthread_attr_getdetachstate _pthread_attr_getdetachstate
+#define pthread_attr_getguardsize _pthread_attr_getguardsize
+#define pthread_attr_getinheritsched _pthread_attr_getinheritsched
+#define pthread_attr_getschedparam _pthread_attr_getschedparam
+#define pthread_attr_getschedpolicy _pthread_attr_getschedpolicy
+#define pthread_attr_getscope _pthread_attr_getscope
+#define pthread_attr_getstack _pthread_attr_getstack
+#define pthread_attr_getstackaddr _pthread_attr_getstackaddr
+#define pthread_attr_getstacksize _pthread_attr_getstacksize
+#define pthread_attr_init _pthread_attr_init
+#define pthread_attr_setaffinity_np _pthread_attr_setaffinity_np
+#define pthread_attr_setcreatesuspend_np _pthread_attr_setcreatesuspend_np
+#define pthread_attr_setdetachstate _pthread_attr_setdetachstate
+#define pthread_attr_setguardsize _pthread_attr_setguardsize
+#define pthread_attr_setinheritsched _pthread_attr_setinheritsched
+#define pthread_attr_setschedparam _pthread_attr_setschedparam
+#define pthread_attr_setschedpolicy _pthread_attr_setschedpolicy
+#define pthread_attr_setscope _pthread_attr_setscope
+#define pthread_attr_setstack _pthread_attr_setstack
+#define pthread_attr_setstackaddr _pthread_attr_setstackaddr
+#define pthread_attr_setstacksize _pthread_attr_setstacksize
+#define pthread_barrier_destroy _pthread_barrier_destroy
+#define pthread_barrier_init _pthread_barrier_init
+#define pthread_barrier_wait _pthread_barrier_wait
+#define pthread_barrierattr_destroy _pthread_barrierattr_destroy
+#define pthread_barrierattr_getpshared _pthread_barrierattr_getpshared
+#define pthread_barrierattr_init _pthread_barrierattr_init
+#define pthread_barrierattr_setpshared _pthread_barrierattr_setpshared
+#define pthread_cancel _pthread_cancel
+#define pthread_cond_broadcast _pthread_cond_broadcast
+#define pthread_cond_destroy _pthread_cond_destroy
+#define pthread_cond_init _pthread_cond_init
+#define pthread_cond_signal _pthread_cond_signal
+#define pthread_cond_timedwait _pthread_cond_timedwait
+#define pthread_cond_wait _pthread_cond_wait
+#define pthread_condattr_destroy _pthread_condattr_destroy
+#define pthread_condattr_getclock _pthread_condattr_getclock
+#define pthread_condattr_getpshared _pthread_condattr_getpshared
+#define pthread_condattr_init _pthread_condattr_init
+#define pthread_condattr_setclock _pthread_condattr_setclock
+#define pthread_condattr_setpshared _pthread_condattr_setpshared
+#define pthread_create _pthread_create
+#define pthread_detach _pthread_detach
+#define pthread_equal _pthread_equal
+#define pthread_exit _pthread_exit
+#define pthread_getaffinity_np _pthread_getaffinity_np
+#define pthread_getconcurrency _pthread_getconcurrency
+#define pthread_getcpuclockid _pthread_getcpuclockid
+#define pthread_getprio _pthread_getprio
+#define pthread_getschedparam _pthread_getschedparam
+#define pthread_getspecific _pthread_getspecific
+#define pthread_getthreadid_np _pthread_getthreadid_np
+#define pthread_join _pthread_join
+#define pthread_key_create _pthread_key_create
+#define pthread_key_delete _pthread_key_delete
+#define pthread_kill _pthread_kill
+#define pthread_main_np _pthread_main_np
+#define pthread_multi_np _pthread_multi_np
+#define pthread_mutex_destroy _pthread_mutex_destroy
+#define pthread_mutex_getprioceiling _pthread_mutex_getprioceiling
+#define pthread_mutex_init _pthread_mutex_init
+#define pthread_mutex_isowned_np _pthread_mutex_isowned_np
+#define pthread_mutex_lock _pthread_mutex_lock
+#define pthread_mutex_setprioceiling _pthread_mutex_setprioceiling
+#define pthread_mutex_timedlock _pthread_mutex_timedlock
+#define pthread_mutex_trylock _pthread_mutex_trylock
+#define pthread_mutex_unlock _pthread_mutex_unlock
+#define pthread_mutexattr_destroy _pthread_mutexattr_destroy
+#define pthread_mutexattr_getkind_np _pthread_mutexattr_getkind_np
+#define pthread_mutexattr_getprioceiling _pthread_mutexattr_getprioceiling
+#define pthread_mutexattr_getprotocol _pthread_mutexattr_getprotocol
+#define pthread_mutexattr_getpshared _pthread_mutexattr_getpshared
+#define pthread_mutexattr_gettype _pthread_mutexattr_gettype
+#define pthread_mutexattr_init _pthread_mutexattr_init
+#define pthread_mutexattr_setkind_np _pthread_mutexattr_setkind_np
+#define pthread_mutexattr_setprioceiling _pthread_mutexattr_setprioceiling
+#define pthread_mutexattr_setprotocol _pthread_mutexattr_setprotocol
+#define pthread_mutexattr_setpshared _pthread_mutexattr_setpshared
+#define pthread_mutexattr_settype _pthread_mutexattr_settype
+#define pthread_once _pthread_once
+#define pthread_resume_all_np _pthread_resume_all_np
+#define pthread_resume_np _pthread_resume_np
+#define pthread_rwlock_destroy _pthread_rwlock_destroy
+#define pthread_rwlock_init _pthread_rwlock_init
+#define pthread_rwlock_rdlock _pthread_rwlock_rdlock
+#define pthread_rwlock_timedrdlock _pthread_rwlock_timedrdlock
+#define pthread_rwlock_timedwrlock _pthread_rwlock_timedwrlock
+#define pthread_rwlock_tryrdlock _pthread_rwlock_tryrdlock
+#define pthread_rwlock_trywrlock _pthread_rwlock_trywrlock
+#define pthread_rwlock_unlock _pthread_rwlock_unlock
+#define pthread_rwlock_wrlock _pthread_rwlock_wrlock
+#define pthread_rwlockattr_destroy _pthread_rwlockattr_destroy
+#define pthread_rwlockattr_getpshared _pthread_rwlockattr_getpshared
+#define pthread_rwlockattr_init _pthread_rwlockattr_init
+#define pthread_rwlockattr_setpshared _pthread_rwlockattr_setpshared
+#define pthread_self _pthread_self
+#define pthread_set_name_np _pthread_set_name_np
+#define pthread_setaffinity_np _pthread_setaffinity_np
+#define pthread_setcancelstate _pthread_setcancelstate
+#define pthread_setcanceltype _pthread_setcanceltype
+#define pthread_setconcurrency _pthread_setconcurrency
+#define pthread_setprio _pthread_setprio
+#define pthread_setschedparam _pthread_setschedparam
+#define pthread_setspecific _pthread_setspecific
+#define pthread_sigmask _pthread_sigmask
+#define pthread_single_np _pthread_single_np
+#define pthread_spin_destroy _pthread_spin_destroy
+#define pthread_spin_init _pthread_spin_init
+#define pthread_spin_lock _pthread_spin_lock
+#define pthread_spin_trylock _pthread_spin_trylock
+#define pthread_spin_unlock _pthread_spin_unlock
+#define pthread_suspend_all_np _pthread_suspend_all_np
+#define pthread_suspend_np _pthread_suspend_np
+#define pthread_switch_add_np _pthread_switch_add_np
+#define pthread_switch_delete_np _pthread_switch_delete_np
+#define pthread_testcancel _pthread_testcancel
+#define pthread_timedjoin_np _pthread_timedjoin_np
+#define pthread_yield _pthread_yield
+#define read _read
+#define readv _readv
+#define recvfrom _recvfrom
+#define recvmsg _recvmsg
+#define recvmmsg _recvmmsg
+#define select _select
+#define sem_close _sem_close
+#define sem_destroy _sem_destroy
+#define sem_getvalue _sem_getvalue
+#define sem_init _sem_init
+#define sem_open _sem_open
+#define sem_post _sem_post
+#define sem_timedwait _sem_timedwait
+#define sem_trywait _sem_trywait
+#define sem_unlink _sem_unlink
+#define sem_wait _sem_wait
+#define sendmsg _sendmsg
+#define sendmmsg _sendmmsg
+#define sendto _sendto
+#define setsockopt _setsockopt
+/*#define sigaction _sigaction*/
+#define sigprocmask _sigprocmask
+#define sigsuspend _sigsuspend
+#define socket _socket
+#define socketpair _socketpair
+#define usleep _usleep
+#define wait4 _wait4
+#define wait6 _wait6
+#define waitpid _waitpid
+#define write _write
+#define writev _writev
+#endif /* __rtems__ */
#define _open open
#define _close close
@@ -68,9 +251,18 @@
#define _getprogname getprogname
#define _getsockname getsockname
#ifdef __rtems__
-#define _pthread_mutex_lock pthread_mutex_lock
-#define _getsockopt getsockopt
#define _bind bind
+#define _getsockopt getsockopt
+#define _poll poll
+#define _pthread_getspecific pthread_getspecific
+#define _pthread_key_create pthread_key_create
+#define _pthread_mutex_lock pthread_mutex_lock
+#define _pthread_mutex_unlock pthread_mutex_unlock
+#define _pthread_once pthread_once
+#define _pthread_rwlock_rdlock pthread_rwlock_rdlock
+#define _pthread_rwlock_unlock pthread_rwlock_unlock
+#define _pthread_rwlock_wrlock pthread_rwlock_wrlock
+#define _pthread_setspecific pthread_setspecific
#endif /* __rtems__ */
#endif /* _NAMESPACE_H_ */
diff --git a/freebsd/lib/libc/include/port_before.h b/freebsd/lib/libc/include/port_before.h
index 4b6e3590..430d2336 100644
--- a/freebsd/lib/libc/include/port_before.h
+++ b/freebsd/lib/libc/include/port_before.h
@@ -5,7 +5,7 @@
#define _LIBC 1
#define DO_PTHREADS 1
-#define USE_KQUEUE 1
+#define USE_POLL 1
#define HAVE_MD5 1
#define ISC_SOCKLEN_T socklen_t
diff --git a/freebsd/lib/libc/include/reentrant.h b/freebsd/lib/libc/include/reentrant.h
index 5debcf1a..8e0b90f5 100644
--- a/freebsd/lib/libc/include/reentrant.h
+++ b/freebsd/lib/libc/include/reentrant.h
@@ -49,7 +49,7 @@
* One approach for thread safety is to provide discrete versions of the
* library: one thread safe, the other not. The disadvantage of this is
* that libc is rather large, and two copies of a library which are 99%+
- * identical is not an efficent use of resources.
+ * identical is not an efficient use of resources.
*
* Another approach is to provide a single thread safe library. However,
* it should not add significant run time or code size overhead to non-
diff --git a/freebsd/lib/libc/include/un-namespace.h b/freebsd/lib/libc/include/un-namespace.h
index 287a2382..6cfb4ef0 100644
--- a/freebsd/lib/libc/include/un-namespace.h
+++ b/freebsd/lib/libc/include/un-namespace.h
@@ -29,4 +29,232 @@
#ifndef _UN_NAMESPACE_H_
#define _UN_NAMESPACE_H_
+#ifdef __rtems__
+#undef accept
+#undef __acl_aclcheck_fd
+#undef __acl_delete_fd
+#undef __acl_get_fd
+#undef __acl_set_fd
+#undef bind
+#undef __cap_get_fd
+#undef __cap_set_fd
+#undef close
+#undef connect
+#undef dup
+#undef dup2
+#undef execve
+#undef fcntl
+#undef flock
+#undef flockfile
+#undef fpathconf
+#undef fstat
+#undef fstatfs
+#undef fsync
+#undef funlockfile
+#undef getdirentries
+#undef getlogin
+#undef getpeername
+#undef getprogname
+#undef getsockname
+#undef getsockopt
+#undef ioctl
+#undef kevent
+#undef listen
+#undef nanosleep
+#undef open
+#undef openat
+#undef poll
+#undef pthread_atfork
+#undef pthread_attr_destroy
+#undef pthread_attr_get_np
+#undef pthread_attr_getaffinity_np
+#undef pthread_attr_getdetachstate
+#undef pthread_attr_getguardsize
+#undef pthread_attr_getinheritsched
+#undef pthread_attr_getschedparam
+#undef pthread_attr_getschedpolicy
+#undef pthread_attr_getscope
+#undef pthread_attr_getstack
+#undef pthread_attr_getstackaddr
+#undef pthread_attr_getstacksize
+#undef pthread_attr_init
+#undef pthread_attr_setaffinity_np
+#undef pthread_attr_setcreatesuspend_np
+#undef pthread_attr_setdetachstate
+#undef pthread_attr_setguardsize
+#undef pthread_attr_setinheritsched
+#undef pthread_attr_setschedparam
+#undef pthread_attr_setschedpolicy
+#undef pthread_attr_setscope
+#undef pthread_attr_setstack
+#undef pthread_attr_setstackaddr
+#undef pthread_attr_setstacksize
+#undef pthread_barrier_destroy
+#undef pthread_barrier_init
+#undef pthread_barrier_wait
+#undef pthread_barrierattr_destroy
+#undef pthread_barrierattr_getpshared
+#undef pthread_barrierattr_init
+#undef pthread_barrierattr_setpshared
+#undef pthread_cancel
+#undef pthread_cond_broadcast
+#undef pthread_cond_destroy
+#undef pthread_cond_init
+#undef pthread_cond_signal
+#undef pthread_cond_timedwait
+#undef pthread_cond_wait
+#undef pthread_condattr_destroy
+#undef pthread_condattr_getclock
+#undef pthread_condattr_getpshared
+#undef pthread_condattr_init
+#undef pthread_condattr_setclock
+#undef pthread_condattr_setpshared
+#undef pthread_create
+#undef pthread_detach
+#undef pthread_equal
+#undef pthread_exit
+#undef pthread_getaffinity_np
+#undef pthread_getconcurrency
+#undef pthread_getcpuclockid
+#undef pthread_getprio
+#undef pthread_getschedparam
+#undef pthread_getspecific
+#undef pthread_getthreadid_np
+#undef pthread_join
+#undef pthread_key_create
+#undef pthread_key_delete
+#undef pthread_kill
+#undef pthread_main_np
+#undef pthread_multi_np
+#undef pthread_mutex_destroy
+#undef pthread_mutex_getprioceiling
+#undef pthread_mutex_init
+#undef pthread_mutex_isowned_np
+#undef pthread_mutex_lock
+#undef pthread_mutex_setprioceiling
+#undef pthread_mutex_timedlock
+#undef pthread_mutex_trylock
+#undef pthread_mutex_unlock
+#undef pthread_mutexattr_destroy
+#undef pthread_mutexattr_getkind_np
+#undef pthread_mutexattr_getprioceiling
+#undef pthread_mutexattr_getprotocol
+#undef pthread_mutexattr_getpshared
+#undef pthread_mutexattr_gettype
+#undef pthread_mutexattr_init
+#undef pthread_mutexattr_setkind_np
+#undef pthread_mutexattr_setprioceiling
+#undef pthread_mutexattr_setprotocol
+#undef pthread_mutexattr_setpshared
+#undef pthread_mutexattr_settype
+#undef pthread_once
+#undef pthread_resume_all_np
+#undef pthread_resume_np
+#undef pthread_rwlock_destroy
+#undef pthread_rwlock_init
+#undef pthread_rwlock_rdlock
+#undef pthread_rwlock_timedrdlock
+#undef pthread_rwlock_timedwrlock
+#undef pthread_rwlock_tryrdlock
+#undef pthread_rwlock_trywrlock
+#undef pthread_rwlock_unlock
+#undef pthread_rwlock_wrlock
+#undef pthread_rwlockattr_destroy
+#undef pthread_rwlockattr_getpshared
+#undef pthread_rwlockattr_init
+#undef pthread_rwlockattr_setpshared
+#undef pthread_self
+#undef pthread_set_name_np
+#undef pthread_setaffinity_np
+#undef pthread_setcancelstate
+#undef pthread_setcanceltype
+#undef pthread_setconcurrency
+#undef pthread_setprio
+#undef pthread_setschedparam
+#undef pthread_setspecific
+#undef pthread_sigmask
+#undef pthread_single_np
+#undef pthread_spin_destroy
+#undef pthread_spin_init
+#undef pthread_spin_lock
+#undef pthread_spin_trylock
+#undef pthread_spin_unlock
+#undef pthread_suspend_all_np
+#undef pthread_suspend_np
+#undef pthread_switch_add_np
+#undef pthread_switch_delete_np
+#undef pthread_testcancel
+#undef pthread_timedjoin_np
+#undef pthread_yield
+#undef read
+#undef readv
+#undef recvfrom
+#undef recvmsg
+#undef recvmmsg
+#undef select
+#undef sem_close
+#undef sem_destroy
+#undef sem_getvalue
+#undef sem_init
+#undef sem_open
+#undef sem_post
+#undef sem_timedwait
+#undef sem_trywait
+#undef sem_unlink
+#undef sem_wait
+#undef sendmsg
+#undef sendmmsg
+#undef sendto
+#undef setsockopt
+#undef sigaction
+#undef sigprocmask
+#undef sigsuspend
+#undef socket
+#undef socketpair
+#undef usleep
+#undef wait4
+#undef wait6
+#undef waitpid
+#undef write
+#undef writev
+
+#if 0
+#undef creat
+#undef fchflags
+#undef fchmod
+#undef ftrylockfile
+#undef msync
+#undef nfssvc
+#undef pause
+#undef sched_yield
+#undef sendfile
+#undef shutdown
+#undef sigaltstack
+#undef sigpending
+#undef sigreturn
+#undef sigsetmask
+#undef sleep
+#undef system
+#undef tcdrain
+#undef wait
+#endif /* 0 */
+
+#ifdef _SIGNAL_H_
+int _sigaction(int, const struct sigaction *, struct sigaction *);
+#endif
+
+#ifdef _SYS_EVENT_H_
+int _kevent(int, const struct kevent *, int, struct kevent *,
+ int, const struct timespec *);
+#endif
+
+#ifdef _SYS_FCNTL_H_
+int _flock(int, int);
+#endif
+
+#undef err
+#undef warn
+#undef nsdispatch
+#endif /* __rtems__ */
+
#endif /* _UN_NAMESPACE_H_ */
diff --git a/freebsd/lib/libc/inet/inet_addr.c b/freebsd/lib/libc/inet/inet_addr.c
index e0980f6c..3df3b0c0 100644
--- a/freebsd/lib/libc/inet/inet_addr.c
+++ b/freebsd/lib/libc/inet/inet_addr.c
@@ -75,7 +75,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <netinet/in.h>
diff --git a/freebsd/lib/libc/inet/inet_lnaof.c b/freebsd/lib/libc/inet/inet_lnaof.c
index 13699bed..868570bf 100644
--- a/freebsd/lib/libc/inet/inet_lnaof.c
+++ b/freebsd/lib/libc/inet/inet_lnaof.c
@@ -49,8 +49,7 @@ __FBSDID("$FreeBSD$");
* number formats.
*/
in_addr_t
-inet_lnaof(in)
- struct in_addr in;
+inet_lnaof(struct in_addr in)
{
in_addr_t i = ntohl(in.s_addr);
diff --git a/freebsd/lib/libc/inet/inet_makeaddr.c b/freebsd/lib/libc/inet/inet_makeaddr.c
index 8c60363d..f2637342 100644
--- a/freebsd/lib/libc/inet/inet_makeaddr.c
+++ b/freebsd/lib/libc/inet/inet_makeaddr.c
@@ -48,8 +48,7 @@ __FBSDID("$FreeBSD$");
* building addresses stored in the ifnet structure.
*/
struct in_addr
-inet_makeaddr(net, host)
- in_addr_t net, host;
+inet_makeaddr(in_addr_t net, in_addr_t host)
{
struct in_addr a;
diff --git a/freebsd/lib/libc/inet/inet_net_ntop.c b/freebsd/lib/libc/inet/inet_net_ntop.c
index 49e20fb8..31c04aba 100644
--- a/freebsd/lib/libc/inet/inet_net_ntop.c
+++ b/freebsd/lib/libc/inet/inet_net_ntop.c
@@ -59,12 +59,7 @@ static char * inet_net_ntop_ipv6(const u_char *src, int bits, char *dst,
* Paul Vixie (ISC), July 1996
*/
char *
-inet_net_ntop(af, src, bits, dst, size)
- int af;
- const void *src;
- int bits;
- char *dst;
- size_t size;
+inet_net_ntop(int af, const void *src, int bits, char *dst, size_t size)
{
switch (af) {
case AF_INET:
@@ -91,11 +86,7 @@ inet_net_ntop(af, src, bits, dst, size)
* Paul Vixie (ISC), July 1996
*/
static char *
-inet_net_ntop_ipv4(src, bits, dst, size)
- const u_char *src;
- int bits;
- char *dst;
- size_t size;
+inet_net_ntop_ipv4(const u_char *src, int bits, char *dst, size_t size)
{
char *odst = dst;
char *t;
diff --git a/freebsd/lib/libc/inet/inet_neta.c b/freebsd/lib/libc/inet/inet_neta.c
index 14b75c18..2cab5934 100644
--- a/freebsd/lib/libc/inet/inet_neta.c
+++ b/freebsd/lib/libc/inet/inet_neta.c
@@ -54,10 +54,7 @@ __FBSDID("$FreeBSD$");
* Paul Vixie (ISC), July 1996
*/
char *
-inet_neta(src, dst, size)
- in_addr_t src;
- char *dst;
- size_t size;
+inet_neta(in_addr_t src, char *dst, size_t size)
{
char *odst = dst;
char *tp;
diff --git a/freebsd/lib/libc/inet/inet_netof.c b/freebsd/lib/libc/inet/inet_netof.c
index b782395e..6dda5b1b 100644
--- a/freebsd/lib/libc/inet/inet_netof.c
+++ b/freebsd/lib/libc/inet/inet_netof.c
@@ -48,8 +48,7 @@ __FBSDID("$FreeBSD$");
* address; handles class a/b/c network #'s.
*/
in_addr_t
-inet_netof(in)
- struct in_addr in;
+inet_netof(struct in_addr in)
{
in_addr_t i = ntohl(in.s_addr);
diff --git a/freebsd/lib/libc/inet/inet_network.c b/freebsd/lib/libc/inet/inet_network.c
index 88a760c4..3c087ec4 100644
--- a/freebsd/lib/libc/inet/inet_network.c
+++ b/freebsd/lib/libc/inet/inet_network.c
@@ -50,8 +50,7 @@ __FBSDID("$FreeBSD$");
* network numbers.
*/
in_addr_t
-inet_network(cp)
- const char *cp;
+inet_network(const char *cp)
{
in_addr_t val, base, n;
char c;
diff --git a/freebsd/lib/libc/inet/inet_ntop.c b/freebsd/lib/libc/inet/inet_ntop.c
index 5cd6a7e0..a3f49d2f 100644
--- a/freebsd/lib/libc/inet/inet_ntop.c
+++ b/freebsd/lib/libc/inet/inet_ntop.c
@@ -26,7 +26,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
@@ -47,7 +46,7 @@ __FBSDID("$FreeBSD$");
static const char *inet_ntop4(const u_char *src, char *dst, socklen_t size);
static const char *inet_ntop6(const u_char *src, char *dst, socklen_t size);
-/* char *
+/* const char *
* inet_ntop(af, src, dst, size)
* convert a network format address to presentation format.
* return:
@@ -171,8 +170,10 @@ inet_ntop6(const u_char *src, char *dst, socklen_t size)
if (i == 6 && best.base == 0 && (best.len == 6 ||
(best.len == 7 && words[7] != 0x0001) ||
(best.len == 5 && words[5] == 0xffff))) {
- if (!inet_ntop4(src+12, tp, sizeof tmp - (tp - tmp)))
+ if (!inet_ntop4(src+12, tp, sizeof tmp - (tp - tmp))) {
+ errno = ENOSPC;
return (NULL);
+ }
tp += strlen(tp);
break;
}
diff --git a/freebsd/lib/libc/inet/inet_pton.c b/freebsd/lib/libc/inet/inet_pton.c
index 738b9b50..605076e5 100644
--- a/freebsd/lib/libc/inet/inet_pton.c
+++ b/freebsd/lib/libc/inet/inet_pton.c
@@ -25,7 +25,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
diff --git a/freebsd/lib/libc/inet/nsap_addr.c b/freebsd/lib/libc/inet/nsap_addr.c
index 5489f983..e46d8848 100644
--- a/freebsd/lib/libc/inet/nsap_addr.c
+++ b/freebsd/lib/libc/inet/nsap_addr.c
@@ -25,7 +25,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
diff --git a/freebsd/lib/libc/isc/ev_timers.c b/freebsd/lib/libc/isc/ev_timers.c
index 74dda8ea..56efe06f 100644
--- a/freebsd/lib/libc/isc/ev_timers.c
+++ b/freebsd/lib/libc/isc/ev_timers.c
@@ -119,7 +119,7 @@ evCmpTime(struct timespec a, struct timespec b) {
}
struct timespec
-evNowTime() {
+evNowTime(void) {
struct timeval now;
#ifdef CLOCK_REALTIME
struct timespec tsnow;
@@ -138,7 +138,7 @@ evNowTime() {
}
struct timespec
-evUTCTime() {
+evUTCTime(void) {
struct timeval now;
#ifdef CLOCK_REALTIME
struct timespec tsnow;
diff --git a/freebsd/lib/libc/isc/eventlib_p.h b/freebsd/lib/libc/isc/eventlib_p.h
index 678f5ee7..495e8d1e 100644
--- a/freebsd/lib/libc/isc/eventlib_p.h
+++ b/freebsd/lib/libc/isc/eventlib_p.h
@@ -27,7 +27,6 @@
#define _EVENTLIB_P_H
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/un.h>
diff --git a/freebsd/lib/libc/nameser/ns_samedomain.c b/freebsd/lib/libc/nameser/ns_samedomain.c
index 6bfe42d2..754e23b9 100644
--- a/freebsd/lib/libc/nameser/ns_samedomain.c
+++ b/freebsd/lib/libc/nameser/ns_samedomain.c
@@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$");
* Check whether a name belongs to a domain.
*
* Inputs:
- *\li a - the domain whose ancestory is being verified
+ *\li a - the domain whose ancestry is being verified
*\li b - the potential ancestor we're checking against
*
* Return:
diff --git a/freebsd/lib/libc/net/base64.c b/freebsd/lib/libc/net/base64.c
index 227dc68e..86366ec2 100644
--- a/freebsd/lib/libc/net/base64.c
+++ b/freebsd/lib/libc/net/base64.c
@@ -45,7 +45,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
@@ -195,12 +194,10 @@ b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize) {
*/
int
-b64_pton(src, target, targsize)
- char const *src;
- u_char *target;
- size_t targsize;
+b64_pton(const char *src, u_char *target, size_t targsize)
{
int tarindex, state, ch;
+ u_char nextbyte;
char *pos;
state = 0;
@@ -214,7 +211,7 @@ b64_pton(src, target, targsize)
break;
pos = strchr(Base64, ch);
- if (pos == 0) /* A non-base64 character. */
+ if (pos == NULL) /* A non-base64 character. */
return (-1);
switch (state) {
@@ -228,22 +225,28 @@ b64_pton(src, target, targsize)
break;
case 1:
if (target) {
- if ((size_t)tarindex + 1 >= targsize)
+ if ((size_t)tarindex >= targsize)
return (-1);
target[tarindex] |= (pos - Base64) >> 4;
- target[tarindex+1] = ((pos - Base64) & 0x0f)
- << 4 ;
+ nextbyte = ((pos - Base64) & 0x0f) << 4;
+ if ((size_t)tarindex + 1 < targsize)
+ target[tarindex + 1] = nextbyte;
+ else if (nextbyte)
+ return (-1);
}
tarindex++;
state = 2;
break;
case 2:
if (target) {
- if ((size_t)tarindex + 1 >= targsize)
+ if ((size_t)tarindex >= targsize)
return (-1);
target[tarindex] |= (pos - Base64) >> 2;
- target[tarindex+1] = ((pos - Base64) & 0x03)
- << 6;
+ nextbyte = ((pos - Base64) & 0x03) << 6;
+ if ((size_t)tarindex + 1 < targsize)
+ target[tarindex + 1] = nextbyte;
+ else if (nextbyte)
+ return (-1);
}
tarindex++;
state = 3;
@@ -301,7 +304,8 @@ b64_pton(src, target, targsize)
* zeros. If we don't check them, they become a
* subliminal channel.
*/
- if (target && target[tarindex] != 0)
+ if (target && (size_t)tarindex < targsize &&
+ target[tarindex] != 0)
return (-1);
}
} else {
diff --git a/freebsd/lib/libc/net/ether_addr.c b/freebsd/lib/libc/net/ether_addr.c
index d5a35de2..ad2449c6 100644
--- a/freebsd/lib/libc/net/ether_addr.c
+++ b/freebsd/lib/libc/net/ether_addr.c
@@ -42,7 +42,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
@@ -74,11 +73,13 @@ ether_line(const char *l, struct ether_addr *e, char *hostname)
i = sscanf(l, "%x:%x:%x:%x:%x:%x %s", &o[0], &o[1], &o[2], &o[3],
&o[4], &o[5], hostname);
- if (i != 7)
- return (i);
- for (i=0; i<6; i++)
- e->octet[i] = o[i];
- return (0);
+ if (i == 7) {
+ for (i = 0; i < 6; i++)
+ e->octet[i] = o[i];
+ return (0);
+ } else {
+ return (-1);
+ }
}
/*
@@ -150,7 +151,7 @@ ether_ntohost(char *hostname, const struct ether_addr *e)
char *yp_domain;
#endif
- if ((fp = fopen(_PATH_ETHERS, "r")) == NULL)
+ if ((fp = fopen(_PATH_ETHERS, "re")) == NULL)
return (1);
while (fgets(buf,BUFSIZ,fp)) {
if (buf[0] == '#')
@@ -199,7 +200,7 @@ ether_hostton(const char *hostname, struct ether_addr *e)
char *yp_domain;
#endif
- if ((fp = fopen(_PATH_ETHERS, "r")) == NULL)
+ if ((fp = fopen(_PATH_ETHERS, "re")) == NULL)
return (1);
while (fgets(buf,BUFSIZ,fp)) {
if (buf[0] == '#')
diff --git a/freebsd/lib/libc/net/getaddrinfo.c b/freebsd/lib/libc/net/getaddrinfo.c
index 65e905c3..0fe1b5d8 100644
--- a/freebsd/lib/libc/net/getaddrinfo.c
+++ b/freebsd/lib/libc/net/getaddrinfo.c
@@ -32,8 +32,6 @@
*/
/*
- * "#ifdef FAITH" part is local hack for supporting IPv4-v6 translator.
- *
* Issues to be discussed:
* - Return values. There are nonstandard return values defined and used
* in the source code. This is because RFC2553 is silent about which error
@@ -59,7 +57,6 @@
__FBSDID("$FreeBSD$");
#include "namespace.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
#include <net/if.h>
@@ -68,7 +65,6 @@ __FBSDID("$FreeBSD$");
#include <ifaddrs.h>
#include <sys/queue.h>
#ifdef INET6
-#include <net/if_var.h>
#include <sys/sysctl.h>
#include <sys/ioctl.h>
#include <netinet6/in6_var.h>
@@ -98,15 +94,12 @@ __FBSDID("$FreeBSD$");
#include <stdarg.h>
#include <nsswitch.h>
#include "un-namespace.h"
+#include "netdb_private.h"
#include "libc_private.h"
#ifdef NS_CACHING
#include "nscache.h"
#endif
-#if defined(__KAME__) && defined(INET6)
-# define FAITH
-#endif
-
#define ANY 0
#define YES 1
#define NO 0
@@ -146,13 +139,20 @@ static const struct afd {
offsetof(struct sockaddr_in6, sin6_addr),
in6_addrany, in6_loopback, 1},
#define N_INET 1
+#define N_LOCAL 2
#else
#define N_INET 0
+#define N_LOCAL 1
#endif
{PF_INET, sizeof(struct in_addr),
sizeof(struct sockaddr_in),
offsetof(struct sockaddr_in, sin_addr),
in_addrany, in_loopback, 0},
+#define sizeofmember(type, member) (sizeof(((type *)0)->member))
+ {PF_LOCAL, sizeofmember(struct sockaddr_un, sun_path),
+ sizeof(struct sockaddr_un),
+ offsetof(struct sockaddr_un, sun_path),
+ NULL, NULL, 0},
{0, 0, 0, 0, NULL, NULL, 0},
};
@@ -160,30 +160,49 @@ struct explore {
int e_af;
int e_socktype;
int e_protocol;
- const char *e_protostr;
int e_wild;
-#define WILD_AF(ex) ((ex)->e_wild & 0x01)
-#define WILD_SOCKTYPE(ex) ((ex)->e_wild & 0x02)
-#define WILD_PROTOCOL(ex) ((ex)->e_wild & 0x04)
+#define AF_ANY 0x01
+#define SOCKTYPE_ANY 0x02
+#define PROTOCOL_ANY 0x04
+#define WILD_AF(ex) ((ex)->e_wild & AF_ANY)
+#define WILD_SOCKTYPE(ex) ((ex)->e_wild & SOCKTYPE_ANY)
+#define WILD_PROTOCOL(ex) ((ex)->e_wild & PROTOCOL_ANY)
};
static const struct explore explore[] = {
-#if 0
- { PF_LOCAL, ANY, ANY, NULL, 0x01 },
-#endif
#ifdef INET6
- { PF_INET6, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 },
- { PF_INET6, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 },
- { PF_INET6, SOCK_STREAM, IPPROTO_SCTP, "sctp", 0x03 },
- { PF_INET6, SOCK_SEQPACKET, IPPROTO_SCTP, "sctp", 0x07 },
- { PF_INET6, SOCK_RAW, ANY, NULL, 0x05 },
+ { PF_INET6, SOCK_DGRAM, IPPROTO_UDP,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_INET6, SOCK_STREAM, IPPROTO_TCP,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_INET6, SOCK_STREAM, IPPROTO_SCTP,
+ AF_ANY | SOCKTYPE_ANY },
+ { PF_INET6, SOCK_SEQPACKET, IPPROTO_SCTP,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_INET6, SOCK_DGRAM, IPPROTO_UDPLITE,
+ AF_ANY | SOCKTYPE_ANY },
+ { PF_INET6, SOCK_RAW, ANY,
+ AF_ANY | PROTOCOL_ANY },
#endif
- { PF_INET, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 },
- { PF_INET, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 },
- { PF_INET, SOCK_STREAM, IPPROTO_SCTP, "sctp", 0x03 },
- { PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, "sctp", 0x07 },
- { PF_INET, SOCK_RAW, ANY, NULL, 0x05 },
- { -1, 0, 0, NULL, 0 },
+ { PF_INET, SOCK_DGRAM, IPPROTO_UDP,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_INET, SOCK_STREAM, IPPROTO_SCTP,
+ AF_ANY | SOCKTYPE_ANY },
+ { PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_INET, SOCK_DGRAM, IPPROTO_UDPLITE,
+ AF_ANY | SOCKTYPE_ANY },
+ { PF_INET, SOCK_RAW, ANY,
+ AF_ANY | PROTOCOL_ANY },
+ { PF_LOCAL, SOCK_DGRAM, ANY,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_LOCAL, SOCK_STREAM, ANY,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { PF_LOCAL, SOCK_SEQPACKET, ANY,
+ AF_ANY | SOCKTYPE_ANY | PROTOCOL_ANY },
+ { -1, 0, 0, 0 },
};
#ifdef INET6
@@ -380,7 +399,7 @@ getaddrinfo(const char *hostname, const char *servname,
struct addrinfo *pai;
const struct afd *afd;
const struct explore *ex;
- struct addrinfo *afailist[sizeof(afdl)/sizeof(afdl[0])];
+ struct addrinfo *afailist[nitems(afdl)];
struct addrinfo *afai_unspec;
int found;
int numeric = 0;
@@ -416,6 +435,7 @@ getaddrinfo(const char *hostname, const char *servname,
ERR(EAI_BADFLAGS);
switch (hints->ai_family) {
case PF_UNSPEC:
+ case PF_LOCAL:
case PF_INET:
#ifdef INET6
case PF_INET6:
@@ -452,6 +472,24 @@ getaddrinfo(const char *hostname, const char *servname,
}
/*
+ * RFC 3493: AI_ALL and AI_V4MAPPED are effective only against
+ * AF_INET6 query. They need to be ignored if specified in other
+ * occasions.
+ */
+ switch (pai->ai_flags & (AI_ALL | AI_V4MAPPED)) {
+ case AI_V4MAPPED:
+ case AI_ALL | AI_V4MAPPED:
+#ifdef INET6
+ if (pai->ai_family != AF_INET6)
+ pai->ai_flags &= ~(AI_ALL | AI_V4MAPPED);
+ break;
+#endif
+ case AI_ALL:
+ pai->ai_flags &= ~(AI_ALL | AI_V4MAPPED);
+ break;
+ }
+
+ /*
* check for special cases. (1) numeric servname is disallowed if
* socktype/protocol are left unspecified. (2) servname is disallowed
* for raw and other inet{,6} sockets.
@@ -699,13 +737,13 @@ get_addrselectpolicy(struct policyhead *head)
char *buf;
struct in6_addrpolicy *pol, *ep;
- if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), NULL, &l, NULL, 0) < 0)
+ if (sysctl(mib, nitems(mib), NULL, &l, NULL, 0) < 0)
return (0);
if (l == 0)
return (0);
if ((buf = malloc(l)) == NULL)
return (0);
- if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), buf, &l, NULL, 0) < 0) {
+ if (sysctl(mib, nitems(mib), buf, &l, NULL, 0) < 0) {
free(buf);
return (0);
}
@@ -760,10 +798,9 @@ match_addrselectpolicy(struct sockaddr *addr, struct policyhead *head)
memset(&key, 0, sizeof(key));
key.sin6_family = AF_INET6;
key.sin6_len = sizeof(key);
- key.sin6_addr.s6_addr[10] = 0xff;
- key.sin6_addr.s6_addr[11] = 0xff;
- memcpy(&key.sin6_addr.s6_addr[12],
- &((struct sockaddr_in *)addr)->sin_addr, 4);
+ _map_v4v6_address(
+ (char *)&((struct sockaddr_in *)addr)->sin_addr,
+ (char *)&key.sin6_addr);
break;
default:
return(NULL);
@@ -840,8 +877,19 @@ set_source(struct ai_order *aio, struct policyhead *ph)
get_port(&ai, "1", 0);
/* open a socket to get the source address for the given dst */
- if ((s = _socket(ai.ai_family, ai.ai_socktype, ai.ai_protocol)) < 0)
+ if ((s = _socket(ai.ai_family, ai.ai_socktype | SOCK_CLOEXEC,
+ ai.ai_protocol)) < 0)
return; /* give up */
+#ifdef INET6
+ if (ai.ai_family == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ai.ai_addr;
+ int off = 0;
+
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
+ (void)_setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
+ (char *)&off, sizeof(off));
+ }
+#endif
if (_connect(s, ai.ai_addr, ai.ai_addrlen) < 0)
goto cleanup;
srclen = ai.ai_addrlen;
@@ -1137,11 +1185,14 @@ explore_null(const struct addrinfo *pai, const char *servname,
*res = NULL;
ai = NULL;
+ if (pai->ai_family == PF_LOCAL)
+ return (0);
+
/*
* filter out AFs that are not supported by the kernel
* XXX errno?
*/
- s = _socket(pai->ai_family, SOCK_DGRAM, 0);
+ s = _socket(pai->ai_family, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s < 0) {
if (errno != EMFILE)
return 0;
@@ -1177,10 +1228,13 @@ explore_numeric(const struct addrinfo *pai, const char *hostname,
const char *servname, struct addrinfo **res, const char *canonname)
{
const struct afd *afd;
- struct addrinfo *ai;
+ struct addrinfo *ai, ai0;
int error;
- char pton[PTON_MAX];
+ char pton[PTON_MAX], path[PATH_MAX], *p;
+#ifdef CTASSERT
+ CTASSERT(sizeofmember(struct sockaddr_un, sun_path) <= PATH_MAX);
+#endif
*res = NULL;
ai = NULL;
@@ -1189,6 +1243,15 @@ explore_numeric(const struct addrinfo *pai, const char *hostname,
return 0;
switch (afd->a_af) {
+ case AF_LOCAL:
+ if (hostname[0] != '/')
+ ERR(EAI_NONAME);
+ if (strlen(hostname) > afd->a_addrlen)
+ ERR(EAI_MEMORY);
+ /* NUL-termination does not need to be guaranteed. */
+ strncpy(path, hostname, afd->a_addrlen);
+ p = &path[0];
+ break;
case AF_INET:
/*
* RFC3493 requires getaddrinfo() to accept AF_INET formats
@@ -1199,17 +1262,30 @@ explore_numeric(const struct addrinfo *pai, const char *hostname,
*/
if (inet_aton(hostname, (struct in_addr *)pton) != 1)
return 0;
+ p = pton;
break;
default:
- if (inet_pton(afd->a_af, hostname, pton) != 1)
- return 0;
+ if (inet_pton(afd->a_af, hostname, pton) != 1) {
+ if (pai->ai_family != AF_INET6 ||
+ (pai->ai_flags & AI_V4MAPPED) != AI_V4MAPPED)
+ return 0;
+ if (inet_aton(hostname, (struct in_addr *)pton) != 1)
+ return 0;
+ afd = &afdl[N_INET];
+ ai0 = *pai;
+ ai0.ai_family = AF_INET;
+ pai = &ai0;
+ }
+ p = pton;
break;
}
if (pai->ai_family == afd->a_af) {
- GET_AI(ai, afd, pton);
+ GET_AI(ai, afd, p);
GET_PORT(ai, servname);
- if ((pai->ai_flags & AI_CANONNAME)) {
+ if ((pai->ai_family == AF_INET ||
+ pai->ai_family == AF_INET6) &&
+ (pai->ai_flags & AI_CANONNAME)) {
/*
* Set the numeric address itself as the canonical
* name, based on a clarification in RFC3493.
@@ -1316,45 +1392,13 @@ get_ai(const struct addrinfo *pai, const struct afd *afd, const char *addr)
{
char *p;
struct addrinfo *ai;
-#ifdef FAITH
- struct in6_addr faith_prefix;
- char *fp_str;
- int translate = 0;
-#endif
+#ifdef INET6
+ struct in6_addr mapaddr;
-#ifdef FAITH
- /*
- * Transfrom an IPv4 addr into a special IPv6 addr format for
- * IPv6->IPv4 translation gateway. (only TCP is supported now)
- *
- * +-----------------------------------+------------+
- * | faith prefix part (12 bytes) | embedded |
- * | | IPv4 addr part (4 bytes)
- * +-----------------------------------+------------+
- *
- * faith prefix part is specified as ascii IPv6 addr format
- * in environmental variable GAI.
- * For FAITH to work correctly, routing to faith prefix must be
- * setup toward a machine where a FAITH daemon operates.
- * Also, the machine must enable some mechanizm
- * (e.g. faith interface hack) to divert those packet with
- * faith prefixed destination addr to user-land FAITH daemon.
- */
- fp_str = getenv("GAI");
- if (fp_str && inet_pton(AF_INET6, fp_str, &faith_prefix) == 1 &&
- afd->a_af == AF_INET && pai->ai_socktype == SOCK_STREAM) {
- u_int32_t v4a;
- u_int8_t v4a_top;
-
- memcpy(&v4a, addr, sizeof v4a);
- v4a_top = v4a >> IN_CLASSA_NSHIFT;
- if (!IN_MULTICAST(v4a) && !IN_EXPERIMENTAL(v4a) &&
- v4a_top != 0 && v4a != IN_LOOPBACKNET) {
- afd = &afdl[N_INET6];
- memcpy(&faith_prefix.s6_addr[12], addr,
- sizeof(struct in_addr));
- translate = 1;
- }
+ if (afd->a_af == AF_INET && (pai->ai_flags & AI_V4MAPPED) != 0) {
+ afd = &afdl[N_INET6];
+ _map_v4v6_address(addr, (char *)&mapaddr);
+ addr = (char *)&mapaddr;
}
#endif
@@ -1368,13 +1412,14 @@ get_ai(const struct addrinfo *pai, const struct afd *afd, const char *addr)
memset(ai->ai_addr, 0, (size_t)afd->a_socklen);
ai->ai_addr->sa_len = afd->a_socklen;
ai->ai_addrlen = afd->a_socklen;
+ if (ai->ai_family == PF_LOCAL) {
+ size_t n = strnlen(addr, afd->a_addrlen);
+
+ ai->ai_addrlen -= afd->a_addrlen - n;
+ ai->ai_addr->sa_len -= afd->a_addrlen - n;
+ }
ai->ai_addr->sa_family = ai->ai_family = afd->a_af;
p = (char *)(void *)(ai->ai_addr);
-#ifdef FAITH
- if (translate == 1)
- memcpy(p + afd->a_off, &faith_prefix, (size_t)afd->a_addrlen);
- else
-#endif
memcpy(p + afd->a_off, addr, (size_t)afd->a_addrlen);
return ai;
}
@@ -1431,6 +1476,9 @@ get_port(struct addrinfo *ai, const char *servname, int matchonly)
if (servname == NULL)
return 0;
switch (ai->ai_family) {
+ case AF_LOCAL:
+ /* AF_LOCAL ignores servname silently. */
+ return (0);
case AF_INET:
#ifdef AF_INET6
case AF_INET6:
@@ -1486,6 +1534,9 @@ get_port(struct addrinfo *ai, const char *servname, int matchonly)
case IPPROTO_SCTP:
proto = "sctp";
break;
+ case IPPROTO_UDPLITE:
+ proto = "udplite";
+ break;
default:
proto = NULL;
break;
@@ -1559,7 +1610,7 @@ addrconfig(struct addrinfo *pai)
if (seen_inet)
continue;
sin = (struct sockaddr_in *)(ifa->ifa_addr);
- if (IN_LOOPBACK(htonl(sin->sin_addr.s_addr)))
+ if (htonl(sin->sin_addr.s_addr) == INADDR_LOOPBACK)
continue;
seen_inet = 1;
break;
@@ -1603,7 +1654,7 @@ is_ifdisabled(char *name)
struct in6_ndireq nd;
int fd;
- if ((fd = _socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
+ if ((fd = _socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)) < 0)
return (-1);
memset(&nd, 0, sizeof(nd));
strlcpy(nd.ifname, name, sizeof(nd.ifname));
@@ -2114,7 +2165,11 @@ getanswer(const querybuf *answer, int anslen, const char *qname, int qtype,
return sentinel.ai_next;
}
- RES_SET_H_ERRNO(res, NO_RECOVERY);
+ /*
+ * We could have walked a CNAME chain, but the ultimate target
+ * may not have what we looked for.
+ */
+ RES_SET_H_ERRNO(res, ntohs(hp->ancount) > 0 ? NO_DATA : NO_RECOVERY);
return NULL;
}
@@ -2188,7 +2243,7 @@ addr4sort(struct addrinfo *sentinel, res_state res)
static int
_dns_getaddrinfo(void *rv, void *cb_data, va_list ap)
{
- struct addrinfo *ai;
+ struct addrinfo *ai, ai0;
querybuf *buf, *buf2;
const char *hostname;
const struct addrinfo *pai;
@@ -2196,6 +2251,8 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap)
struct res_target q, q2;
res_state res;
+ ai = NULL;
+
hostname = va_arg(ap, char *);
pai = va_arg(ap, const struct addrinfo *);
@@ -2204,6 +2261,8 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap)
memset(&sentinel, 0, sizeof(sentinel));
cur = &sentinel;
+ res = __res_state();
+
buf = malloc(sizeof(*buf));
if (!buf) {
RES_SET_H_ERRNO(res, NETDB_INTERNAL);
@@ -2216,6 +2275,13 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap)
return NS_NOTFOUND;
}
+ if (pai->ai_family == AF_INET6 &&
+ (pai->ai_flags & AI_V4MAPPED) == AI_V4MAPPED) {
+ ai0 = *pai;
+ ai0.ai_family = AF_UNSPEC;
+ pai = &ai0;
+ }
+
switch (pai->ai_family) {
case AF_UNSPEC:
q.name = hostname;
@@ -2250,7 +2316,6 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap)
return NS_UNAVAIL;
}
- res = __res_state();
if ((res->options & RES_INIT) == 0 && res_ninit(res) == -1) {
RES_SET_H_ERRNO(res, NETDB_INTERNAL);
free(buf);
@@ -2266,20 +2331,24 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap)
/* prefer IPv6 */
if (q.next) {
ai = getanswer(buf2, q2.n, q2.name, q2.qtype, pai, res);
- if (ai) {
+ if (ai != NULL) {
cur->ai_next = ai;
while (cur && cur->ai_next)
cur = cur->ai_next;
}
}
- ai = getanswer(buf, q.n, q.name, q.qtype, pai, res);
- if (ai)
- cur->ai_next = ai;
+ if (ai == NULL || pai->ai_family != AF_UNSPEC ||
+ (pai->ai_flags & (AI_ALL | AI_V4MAPPED)) != AI_V4MAPPED) {
+ ai = getanswer(buf, q.n, q.name, q.qtype, pai, res);
+ if (ai != NULL)
+ cur->ai_next = ai;
+ }
free(buf);
free(buf2);
if (sentinel.ai_next == NULL)
switch (res->res_h_errno) {
case HOST_NOT_FOUND:
+ case NO_DATA:
return NS_NOTFOUND;
case TRY_AGAIN:
return NS_TRYAGAIN;
@@ -2294,7 +2363,7 @@ static void
_sethtent(FILE **hostf)
{
if (!*hostf)
- *hostf = fopen(_PATH_HOSTS, "r");
+ *hostf = fopen(_PATH_HOSTS, "re");
else
rewind(*hostf);
}
@@ -2318,7 +2387,7 @@ _gethtent(FILE **hostf, const char *name, const struct addrinfo *pai)
const char *addr;
char hostbuf[8*1024];
- if (!*hostf && !(*hostf = fopen(_PATH_HOSTS, "r")))
+ if (!*hostf && !(*hostf = fopen(_PATH_HOSTS, "re")))
return (NULL);
again:
if (!(p = fgets(hostbuf, sizeof hostbuf, *hostf)))
@@ -2356,6 +2425,9 @@ found:
hints.ai_socktype = SOCK_DGRAM;
hints.ai_protocol = 0;
hints.ai_flags = AI_NUMERICHOST;
+ if (pai->ai_family == AF_INET6 &&
+ (pai->ai_flags & AI_V4MAPPED) == AI_V4MAPPED)
+ hints.ai_flags |= AI_V4MAPPED;
error = getaddrinfo(addr, "0", &hints, &res0);
if (error)
goto again;
@@ -2383,6 +2455,20 @@ found:
return res0;
}
+static struct addrinfo *
+_getht(FILE **hostf, const char *name, const struct addrinfo *pai,
+ struct addrinfo *cur)
+{
+ struct addrinfo *p;
+
+ while ((p = _gethtent(hostf, name, pai)) != NULL) {
+ cur->ai_next = p;
+ while (cur && cur->ai_next)
+ cur = cur->ai_next;
+ }
+ return (cur);
+}
+
/*ARGSUSED*/
static int
_files_getaddrinfo(void *rv, void *cb_data, va_list ap)
@@ -2390,7 +2476,6 @@ _files_getaddrinfo(void *rv, void *cb_data, va_list ap)
const char *name;
const struct addrinfo *pai;
struct addrinfo sentinel, *cur;
- struct addrinfo *p;
FILE *hostf = NULL;
name = va_arg(ap, char *);
@@ -2400,11 +2485,19 @@ _files_getaddrinfo(void *rv, void *cb_data, va_list ap)
cur = &sentinel;
_sethtent(&hostf);
- while ((p = _gethtent(&hostf, name, pai)) != NULL) {
- cur->ai_next = p;
- while (cur && cur->ai_next)
- cur = cur->ai_next;
- }
+ if (pai->ai_family == AF_INET6 &&
+ (pai->ai_flags & (AI_ALL | AI_V4MAPPED)) == AI_V4MAPPED) {
+ struct addrinfo ai0 = *pai;
+
+ ai0.ai_flags &= ~AI_V4MAPPED;
+ cur = _getht(&hostf, name, &ai0, cur);
+ if (sentinel.ai_next == NULL) {
+ _sethtent(&hostf);
+ ai0.ai_flags |= AI_V4MAPPED;
+ cur = _getht(&hostf, name, &ai0, cur);
+ }
+ } else
+ cur = _getht(&hostf, name, pai, cur);
_endhtent(&hostf);
*((struct addrinfo **)rv) = sentinel.ai_next;
@@ -2464,6 +2557,9 @@ nextline:
hints = *pai;
hints.ai_flags = AI_NUMERICHOST;
+ if (pai->ai_family == AF_INET6 &&
+ (pai->ai_flags & AI_V4MAPPED) == AI_V4MAPPED)
+ hints.ai_flags |= AI_V4MAPPED;
error = getaddrinfo(addr, NULL, &hints, &res0);
if (error == 0) {
for (res = res0; res; res = res->ai_next) {
@@ -2511,15 +2607,46 @@ _yp_getaddrinfo(void *rv, void *cb_data, va_list ap)
memset(&sentinel, 0, sizeof(sentinel));
cur = &sentinel;
+ /* ipnodes.byname can hold both IPv4/v6 */
+ r = yp_match(ypdomain, "ipnodes.byname", name,
+ (int)strlen(name), &ypbuf, &ypbuflen);
+ if (r == 0) {
+ ai = _yphostent(ypbuf, pai);
+ if (ai) {
+ cur->ai_next = ai;
+ while (cur && cur->ai_next)
+ cur = cur->ai_next;
+ }
+ free(ypbuf);
+ }
+
+ if (ai != NULL) {
+ struct sockaddr_in6 *sin6;
+
+ switch (ai->ai_family) {
+ case AF_INET:
+ goto done;
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)ai->ai_addr;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
+ goto done;
+ break;
+ }
+ }
+
/* hosts.byname is only for IPv4 (Solaris8) */
- if (pai->ai_family == PF_UNSPEC || pai->ai_family == PF_INET) {
+ if (pai->ai_family == AF_UNSPEC || pai->ai_family == AF_INET ||
+ ((pai->ai_family == AF_INET6 &&
+ (pai->ai_flags & AI_V4MAPPED) == AI_V4MAPPED) &&
+ (ai == NULL || (pai->ai_flags & AI_ALL) == AI_ALL))) {
r = yp_match(ypdomain, "hosts.byname", name,
(int)strlen(name), &ypbuf, &ypbuflen);
if (r == 0) {
struct addrinfo ai4;
ai4 = *pai;
- ai4.ai_family = AF_INET;
+ if (pai->ai_family == AF_UNSPEC)
+ ai4.ai_family = AF_INET;
ai = _yphostent(ypbuf, &ai4);
if (ai) {
cur->ai_next = ai;
@@ -2530,16 +2657,7 @@ _yp_getaddrinfo(void *rv, void *cb_data, va_list ap)
}
}
- /* ipnodes.byname can hold both IPv4/v6 */
- r = yp_match(ypdomain, "ipnodes.byname", name,
- (int)strlen(name), &ypbuf, &ypbuflen);
- if (r == 0) {
- ai = _yphostent(ypbuf, pai);
- if (ai)
- cur->ai_next = ai;
- free(ypbuf);
- }
-
+done:
if (sentinel.ai_next == NULL) {
RES_SET_H_ERRNO(__res_state(), HOST_NOT_FOUND);
return NS_NOTFOUND;
diff --git a/freebsd/lib/libc/net/gethostbydns.c b/freebsd/lib/libc/net/gethostbydns.c
index 764ec311..bae4d166 100644
--- a/freebsd/lib/libc/net/gethostbydns.c
+++ b/freebsd/lib/libc/net/gethostbydns.c
@@ -58,7 +58,6 @@ static char fromrcsid[] = "From: Id: gethnamaddr.c,v 8.23 1998/04/07 04:59:46 vi
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
#include <netinet/in.h>
@@ -90,7 +89,7 @@ static void addrsort(char **, int, res_state);
#endif
#ifdef DEBUG
-static void dprintf(char *, int, res_state) __printflike(1, 0);
+static void dbg_printf(char *, int, res_state) __printflike(1, 0);
#endif
#define MAXPACKET (64*1024)
@@ -109,10 +108,7 @@ int _dns_ttl_;
#ifdef DEBUG
static void
-dprintf(msg, num, res)
- char *msg;
- int num;
- res_state res;
+dbg_printf(char *msg, int num, res_state res)
{
if (res->options & RES_DEBUG) {
int save = errno;
@@ -122,7 +118,7 @@ dprintf(msg, num, res)
}
}
#else
-# define dprintf(msg, num, res) /*nada*/
+# define dbg_printf(msg, num, res) /*nada*/
#endif
#define BOUNDED_INCR(x) \
@@ -375,13 +371,13 @@ gethostanswer(const querybuf *answer, int anslen, const char *qname, int qtype,
bp += sizeof(align) - ((u_long)bp % sizeof(align));
if (bp + n >= ep) {
- dprintf("size (%d) too big\n", n, statp);
+ dbg_printf("size (%d) too big\n", n, statp);
had_error++;
continue;
}
if (hap >= &hed->h_addr_ptrs[_MAXADDRS-1]) {
if (!toobig++)
- dprintf("Too many addresses (%d)\n",
+ dbg_printf("Too many addresses (%d)\n",
_MAXADDRS, statp);
cp += n;
continue;
@@ -395,7 +391,7 @@ gethostanswer(const querybuf *answer, int anslen, const char *qname, int qtype,
}
break;
default:
- dprintf("Impossible condition (type=%d)\n", type,
+ dbg_printf("Impossible condition (type=%d)\n", type,
statp);
RES_SET_H_ERRNO(statp, NO_RECOVERY);
return (-1);
@@ -522,12 +518,12 @@ _dns_gethostbyname(void *rval, void *cb_data, va_list ap)
n = res_nsearch(statp, name, C_IN, type, buf->buf, sizeof(buf->buf));
if (n < 0) {
free(buf);
- dprintf("res_nsearch failed (%d)\n", n, statp);
+ dbg_printf("res_nsearch failed (%d)\n", n, statp);
*h_errnop = statp->res_h_errno;
return (NS_NOTFOUND);
} else if (n > sizeof(buf->buf)) {
free(buf);
- dprintf("static buffer is too small (%d)\n", n, statp);
+ dbg_printf("static buffer is too small (%d)\n", n, statp);
*h_errnop = statp->res_h_errno;
return (NS_UNAVAIL);
}
@@ -629,13 +625,13 @@ _dns_gethostbyaddr(void *rval, void *cb_data, va_list ap)
sizeof buf->buf);
if (n < 0) {
free(buf);
- dprintf("res_nquery failed (%d)\n", n, statp);
+ dbg_printf("res_nquery failed (%d)\n", n, statp);
*h_errnop = statp->res_h_errno;
return (NS_UNAVAIL);
}
if (n > sizeof buf->buf) {
free(buf);
- dprintf("static buffer is too small (%d)\n", n, statp);
+ dbg_printf("static buffer is too small (%d)\n", n, statp);
*h_errnop = statp->res_h_errno;
return (NS_UNAVAIL);
}
@@ -771,7 +767,7 @@ _sethostdnsent(int stayopen)
}
void
-_endhostdnsent()
+_endhostdnsent(void)
{
res_state statp;
diff --git a/freebsd/lib/libc/net/gethostbyht.c b/freebsd/lib/libc/net/gethostbyht.c
index f373fcbd..80a66922 100644
--- a/freebsd/lib/libc/net/gethostbyht.c
+++ b/freebsd/lib/libc/net/gethostbyht.c
@@ -74,7 +74,7 @@ void
_sethosthtent(int f, struct hostent_data *hed)
{
if (!hed->hostf)
- hed->hostf = fopen(_PATH_HOSTS, "r");
+ hed->hostf = fopen(_PATH_HOSTS, "re");
else
rewind(hed->hostf);
hed->stayopen = f;
@@ -98,7 +98,7 @@ gethostent_p(struct hostent *he, struct hostent_data *hed, int mapped,
int af, len;
char hostbuf[BUFSIZ + 1];
- if (!hed->hostf && !(hed->hostf = fopen(_PATH_HOSTS, "r"))) {
+ if (!hed->hostf && !(hed->hostf = fopen(_PATH_HOSTS, "re"))) {
RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
diff --git a/freebsd/lib/libc/net/gethostbynis.c b/freebsd/lib/libc/net/gethostbynis.c
index 959caae4..19d3f92a 100644
--- a/freebsd/lib/libc/net/gethostbynis.c
+++ b/freebsd/lib/libc/net/gethostbynis.c
@@ -93,7 +93,7 @@ _gethostbynis(const char *name, char *map, int af, struct hostent *he,
free(result);
result = (char *)&ypbuf;
- if ((cp = index(result, '\n')))
+ if ((cp = strchr(result, '\n')))
*cp = '\0';
cp = strpbrk(result, " \t");
@@ -200,61 +200,6 @@ _gethostbynisaddr_r(const void *addr, socklen_t len, int af,
}
#endif /* YP */
-/* XXX _gethostbynisname/_gethostbynisaddr only used by getipnodeby*() */
-struct hostent *
-_gethostbynisname(const char *name, int af)
-{
-#ifdef YP
- struct hostent *he;
- struct hostent_data *hed;
- u_long oresopt;
- int error;
- res_state statp;
-
- statp = __res_state();
- if ((he = __hostent_init()) == NULL ||
- (hed = __hostent_data_init()) == NULL) {
- RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
- return (NULL);
- }
-
- oresopt = statp->options;
- statp->options &= ~RES_USE_INET6;
- error = _gethostbynisname_r(name, af, he, hed);
- statp->options = oresopt;
- return (error == 0) ? he : NULL;
-#else
- return (NULL);
-#endif
-}
-
-struct hostent *
-_gethostbynisaddr(const void *addr, socklen_t len, int af)
-{
-#ifdef YP
- struct hostent *he;
- struct hostent_data *hed;
- u_long oresopt;
- int error;
- res_state statp;
-
- statp = __res_state();
- if ((he = __hostent_init()) == NULL ||
- (hed = __hostent_data_init()) == NULL) {
- RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
- return (NULL);
- }
-
- oresopt = statp->options;
- statp->options &= ~RES_USE_INET6;
- error = _gethostbynisaddr_r(addr, len, af, he, hed);
- statp->options = oresopt;
- return (error == 0) ? he : NULL;
-#else
- return (NULL);
-#endif
-}
-
int
_nis_gethostbyname(void *rval, void *cb_data, va_list ap)
{
diff --git a/freebsd/lib/libc/net/gethostnamadr.c b/freebsd/lib/libc/net/gethostnamadr.c
index 7960be7c..e5d41ac0 100644
--- a/freebsd/lib/libc/net/gethostnamadr.c
+++ b/freebsd/lib/libc/net/gethostnamadr.c
@@ -50,17 +50,10 @@ __FBSDID("$FreeBSD$");
#include "nscache.h"
#endif
-extern int _ht_gethostbyname(void *, void *, va_list);
-extern int _dns_gethostbyname(void *, void *, va_list);
-extern int _nis_gethostbyname(void *, void *, va_list);
-extern int _ht_gethostbyaddr(void *, void *, va_list);
-extern int _dns_gethostbyaddr(void *, void *, va_list);
-extern int _nis_gethostbyaddr(void *, void *, va_list);
-
static int gethostbyname_internal(const char *, int, struct hostent *, char *,
size_t, struct hostent **, int *, res_state);
-/* Host lookup order if nsswitch.conf is broken or nonexistant */
+/* Host lookup order if nsswitch.conf is broken or nonexistent */
static const ns_src default_src[] = {
{ NSSRC_FILES, NS_SUCCESS },
{ NSSRC_DNS, NS_SUCCESS },
diff --git a/freebsd/lib/libc/net/getifaddrs.c b/freebsd/lib/libc/net/getifaddrs.c
index 17ef0198..ee5ecd76 100644
--- a/freebsd/lib/libc/net/getifaddrs.c
+++ b/freebsd/lib/libc/net/getifaddrs.c
@@ -74,19 +74,6 @@ __FBSDID("$FreeBSD$");
#define ALIGN(p) (((u_long)(p) + ALIGNBYTES) &~ ALIGNBYTES)
#endif
-#if _BSDI_VERSION >= 199701
-#define HAVE_IFM_DATA
-#endif
-
-#if _BSDI_VERSION >= 199802
-/* ifam_data is very specific to recent versions of bsdi */
-#define HAVE_IFAM_DATA
-#endif
-
-#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
-#define HAVE_IFM_DATA
-#endif
-
#define MAX_SYSCTL_TRY 5
int
@@ -95,39 +82,31 @@ getifaddrs(struct ifaddrs **pif)
int icnt = 1;
int dcnt = 0;
int ncnt = 0;
-#ifdef NET_RT_IFLIST
int ntry = 0;
int mib[6];
size_t needed;
char *buf;
char *next;
- struct ifaddrs *cif = 0;
+ struct ifaddrs *cif;
char *p, *p0;
struct rt_msghdr *rtm;
- struct if_msghdr *ifm;
- struct ifa_msghdr *ifam;
+ struct if_msghdrl *ifm;
+ struct ifa_msghdrl *ifam;
struct sockaddr_dl *dl;
struct sockaddr *sa;
struct ifaddrs *ifa, *ift;
+ struct if_data *if_data;
u_short idx = 0;
-#else /* NET_RT_IFLIST */
- char buf[1024];
- int m, sock;
- struct ifconf ifc;
- struct ifreq *ifr;
- struct ifreq *lifr;
-#endif /* NET_RT_IFLIST */
int i;
size_t len, alen;
char *data;
char *names;
-#ifdef NET_RT_IFLIST
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0; /* protocol */
mib[3] = 0; /* wildcard address family */
- mib[4] = NET_RT_IFLIST;
+ mib[4] = NET_RT_IFLISTL;/* extra fields for extensible msghdr structs */
mib[5] = 0; /* no flags */
do {
/*
@@ -161,34 +140,33 @@ getifaddrs(struct ifaddrs **pif)
continue;
switch (rtm->rtm_type) {
case RTM_IFINFO:
- ifm = (struct if_msghdr *)(void *)rtm;
+ ifm = (struct if_msghdrl *)(void *)rtm;
if (ifm->ifm_addrs & RTA_IFP) {
idx = ifm->ifm_index;
++icnt;
- dl = (struct sockaddr_dl *)(void *)(ifm + 1);
+ if_data = IF_MSGHDRL_IFM_DATA(ifm);
+ dcnt += if_data->ifi_datalen;
+ dl = (struct sockaddr_dl *)IF_MSGHDRL_RTA(ifm);
dcnt += SA_RLEN((struct sockaddr *)(void*)dl) +
ALIGNBYTES;
-#ifdef HAVE_IFM_DATA
- dcnt += sizeof(ifm->ifm_data);
-#endif /* HAVE_IFM_DATA */
ncnt += dl->sdl_nlen + 1;
} else
idx = 0;
break;
case RTM_NEWADDR:
- ifam = (struct ifa_msghdr *)(void *)rtm;
+ ifam = (struct ifa_msghdrl *)(void *)rtm;
if (idx && ifam->ifam_index != idx)
abort(); /* this cannot happen */
#define RTA_MASKS (RTA_NETMASK | RTA_IFA | RTA_BRD)
if (idx == 0 || (ifam->ifam_addrs & RTA_MASKS) == 0)
break;
- p = (char *)(void *)(ifam + 1);
+ p = (char *)IFA_MSGHDRL_RTA(ifam);
++icnt;
-#ifdef HAVE_IFAM_DATA
- dcnt += sizeof(ifam->ifam_data) + ALIGNBYTES;
-#endif /* HAVE_IFAM_DATA */
+ if_data = IFA_MSGHDRL_IFAM_DATA(ifam);
+ dcnt += if_data->ifi_datalen + ALIGNBYTES;
+
/* Scan to look for length of address */
alen = 0;
for (p0 = p, i = 0; i < RTAX_MAX; i++) {
@@ -218,34 +196,6 @@ getifaddrs(struct ifaddrs **pif)
break;
}
}
-#else /* NET_RT_IFLIST */
- ifc.ifc_buf = buf;
- ifc.ifc_len = sizeof(buf);
-
- if ((sock = _socket(AF_INET, SOCK_STREAM, 0)) < 0)
- return (-1);
- i = _ioctl(sock, SIOCGIFCONF, (char *)&ifc);
- _close(sock);
- if (i < 0)
- return (-1);
-
- ifr = ifc.ifc_req;
- lifr = (struct ifreq *)&ifc.ifc_buf[ifc.ifc_len];
-
- while (ifr < lifr) {
- struct sockaddr *sa;
-
- sa = &ifr->ifr_addr;
- ++icnt;
- dcnt += SA_RLEN(sa);
- ncnt += sizeof(ifr->ifr_name) + 1;
-
- if (SA_LEN(sa) < sizeof(*sa))
- ifr = (struct ifreq *)(((char *)sa) + sizeof(*sa));
- else
- ifr = (struct ifreq *)(((char *)sa) + SA_LEN(sa));
- }
-#endif /* NET_RT_IFLIST */
if (icnt + dcnt + ncnt == 1) {
*pif = NULL;
@@ -265,49 +215,46 @@ getifaddrs(struct ifaddrs **pif)
memset(ifa, 0, sizeof(struct ifaddrs) * icnt);
ift = ifa;
-#ifdef NET_RT_IFLIST
idx = 0;
+ cif = NULL;
for (next = buf; next < buf + needed; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)(void *)next;
if (rtm->rtm_version != RTM_VERSION)
continue;
switch (rtm->rtm_type) {
case RTM_IFINFO:
- ifm = (struct if_msghdr *)(void *)rtm;
- if (ifm->ifm_addrs & RTA_IFP) {
- idx = ifm->ifm_index;
- dl = (struct sockaddr_dl *)(void *)(ifm + 1);
-
- cif = ift;
- ift->ifa_name = names;
- ift->ifa_flags = (int)ifm->ifm_flags;
- memcpy(names, dl->sdl_data,
- (size_t)dl->sdl_nlen);
- names[dl->sdl_nlen] = 0;
- names += dl->sdl_nlen + 1;
-
- ift->ifa_addr = (struct sockaddr *)(void *)data;
- memcpy(data, dl,
- (size_t)SA_LEN((struct sockaddr *)
- (void *)dl));
- data += SA_RLEN((struct sockaddr *)(void *)dl);
-
-#ifdef HAVE_IFM_DATA
- /* ifm_data needs to be aligned */
- ift->ifa_data = data = (void *)ALIGN(data);
- memcpy(data, &ifm->ifm_data, sizeof(ifm->ifm_data));
- data += sizeof(ifm->ifm_data);
-#else /* HAVE_IFM_DATA */
- ift->ifa_data = NULL;
-#endif /* HAVE_IFM_DATA */
-
- ift = (ift->ifa_next = ift + 1);
- } else
+ ifm = (struct if_msghdrl *)(void *)rtm;
+ if ((ifm->ifm_addrs & RTA_IFP) == 0) {
idx = 0;
+ break;
+ }
+
+ idx = ifm->ifm_index;
+ dl = (struct sockaddr_dl *)IF_MSGHDRL_RTA(ifm);
+
+ cif = ift;
+ ift->ifa_name = names;
+ ift->ifa_flags = (int)ifm->ifm_flags;
+ memcpy(names, dl->sdl_data, (size_t)dl->sdl_nlen);
+ names[dl->sdl_nlen] = 0;
+ names += dl->sdl_nlen + 1;
+
+ ift->ifa_addr = (struct sockaddr *)(void *)data;
+ memcpy(data, dl, (size_t)SA_LEN((struct sockaddr *)
+ (void *)dl));
+ data += SA_RLEN((struct sockaddr *)(void *)dl);
+
+ if_data = IF_MSGHDRL_IFM_DATA(ifm);
+ /* ifm_data needs to be aligned */
+ ift->ifa_data = data = (void *)ALIGN(data);
+ memcpy(data, if_data, if_data->ifi_datalen);
+ data += if_data->ifi_datalen;
+
+ ift = (ift->ifa_next = ift + 1);
break;
case RTM_NEWADDR:
- ifam = (struct ifa_msghdr *)(void *)rtm;
+ ifam = (struct ifa_msghdrl *)(void *)rtm;
if (idx && ifam->ifam_index != idx)
abort(); /* this cannot happen */
@@ -316,7 +263,8 @@ getifaddrs(struct ifaddrs **pif)
ift->ifa_name = cif->ifa_name;
ift->ifa_flags = cif->ifa_flags;
ift->ifa_data = NULL;
- p = (char *)(void *)(ifam + 1);
+
+ p = (char *)IFA_MSGHDRL_RTA(ifam);
/* Scan to look for length of address */
alen = 0;
for (p0 = p, i = 0; i < RTAX_MAX; i++) {
@@ -367,12 +315,11 @@ getifaddrs(struct ifaddrs **pif)
p += len;
}
-#ifdef HAVE_IFAM_DATA
+ if_data = IFA_MSGHDRL_IFAM_DATA(ifam);
/* ifam_data needs to be aligned */
ift->ifa_data = data = (void *)ALIGN(data);
- memcpy(data, &ifam->ifam_data, sizeof(ifam->ifam_data));
- data += sizeof(ifam->ifam_data);
-#endif /* HAVE_IFAM_DATA */
+ memcpy(data, if_data, if_data->ifi_datalen);
+ data += if_data->ifi_datalen;
ift = (ift->ifa_next = ift + 1);
break;
@@ -380,28 +327,7 @@ getifaddrs(struct ifaddrs **pif)
}
free(buf);
-#else /* NET_RT_IFLIST */
- ifr = ifc.ifc_req;
- lifr = (struct ifreq *)&ifc.ifc_buf[ifc.ifc_len];
-
- while (ifr < lifr) {
- struct sockaddr *sa;
-
- ift->ifa_name = names;
- names[sizeof(ifr->ifr_name)] = 0;
- strncpy(names, ifr->ifr_name, sizeof(ifr->ifr_name));
- while (*names++)
- ;
-
- ift->ifa_addr = (struct sockaddr *)data;
- sa = &ifr->ifr_addr;
- memcpy(data, sa, SA_LEN(sa));
- data += SA_RLEN(sa);
-
- ifr = (struct ifreq *)(((char *)sa) + SA_LEN(sa));
- ift = (ift->ifa_next = ift + 1);
- }
-#endif /* NET_RT_IFLIST */
+
if (--ift >= ifa) {
ift->ifa_next = NULL;
*pif = ifa;
diff --git a/freebsd/lib/libc/net/getifmaddrs.c b/freebsd/lib/libc/net/getifmaddrs.c
index 6d2bf84a..72d07733 100644
--- a/freebsd/lib/libc/net/getifmaddrs.c
+++ b/freebsd/lib/libc/net/getifmaddrs.c
@@ -12,24 +12,18 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Bruce M. Simpson.
- * 4. Neither the name of Bruce M. Simpson nor the names of other
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY BRUCE M. SIMPSON AND AFFILIATES
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BRUCE M. SIMPSON OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
#include <sys/cdefs.h>
diff --git a/freebsd/lib/libc/net/getnameinfo.c b/freebsd/lib/libc/net/getnameinfo.c
index 3a489c48..e042c549 100644
--- a/freebsd/lib/libc/net/getnameinfo.c
+++ b/freebsd/lib/libc/net/getnameinfo.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/socket.h>
+#include <sys/un.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
@@ -64,92 +65,123 @@ __FBSDID("$FreeBSD$");
#include <stddef.h>
#include <errno.h>
-static int getnameinfo_inet(const struct sockaddr *, socklen_t, char *,
+static const struct afd *find_afd(int);
+static int getnameinfo_inet(const struct afd *,
+ const struct sockaddr *, socklen_t, char *,
size_t, char *, size_t, int);
#ifdef INET6
static int ip6_parsenumeric(const struct sockaddr *, const char *, char *,
size_t, int);
static int ip6_sa2str(const struct sockaddr_in6 *, char *, size_t, int);
#endif
-static int getnameinfo_link(const struct sockaddr *, socklen_t, char *,
+static int getnameinfo_link(const struct afd *,
+ const struct sockaddr *, socklen_t, char *,
+ size_t, char *, size_t, int);
+static int hexname(const u_int8_t *, size_t, char *, size_t);
+static int getnameinfo_un(const struct afd *,
+ const struct sockaddr *, socklen_t, char *,
size_t, char *, size_t, int);
-static int hexname(const u_int8_t *, size_t, char *, size_t);
-
-int
-getnameinfo(const struct sockaddr *sa, socklen_t salen,
- char *host, size_t hostlen, char *serv, size_t servlen,
- int flags)
-{
-
- switch (sa->sa_family) {
- case AF_INET:
-#ifdef INET6
- case AF_INET6:
-#endif
- return getnameinfo_inet(sa, salen, host, hostlen, serv,
- servlen, flags);
- case AF_LINK:
- return getnameinfo_link(sa, salen, host, hostlen, serv,
- servlen, flags);
- default:
- return EAI_FAMILY;
- }
-}
static const struct afd {
int a_af;
size_t a_addrlen;
socklen_t a_socklen;
int a_off;
+ int (*a_func)(const struct afd *,
+ const struct sockaddr *, socklen_t, char *,
+ size_t, char *, size_t, int);
} afdl [] = {
#ifdef INET6
{PF_INET6, sizeof(struct in6_addr), sizeof(struct sockaddr_in6),
- offsetof(struct sockaddr_in6, sin6_addr)},
+ offsetof(struct sockaddr_in6, sin6_addr),
+ getnameinfo_inet},
#endif
{PF_INET, sizeof(struct in_addr), sizeof(struct sockaddr_in),
- offsetof(struct sockaddr_in, sin_addr)},
+ offsetof(struct sockaddr_in, sin_addr),
+ getnameinfo_inet},
+#define sizeofmember(type, member) (sizeof(((type *)0)->member))
+ {PF_LOCAL, sizeofmember(struct sockaddr_un, sun_path),
+ sizeof(struct sockaddr_un),
+ offsetof(struct sockaddr_un, sun_path),
+ getnameinfo_un},
+ {PF_LINK, sizeofmember(struct sockaddr_dl, sdl_data),
+ sizeof(struct sockaddr_dl),
+ offsetof(struct sockaddr_dl, sdl_data),
+ getnameinfo_link},
{0, 0, 0},
};
-struct sockinet {
- u_char si_len;
- u_char si_family;
- u_short si_port;
-};
+int
+getnameinfo(const struct sockaddr *sa, socklen_t salen,
+ char *host, size_t hostlen, char *serv, size_t servlen,
+ int flags)
+{
+ const struct afd *afd;
+
+ if (sa == NULL)
+ return (EAI_FAIL);
+
+ afd = find_afd(sa->sa_family);
+ if (afd == NULL)
+ return (EAI_FAMILY);
+ switch (sa->sa_family) {
+ case PF_LOCAL:
+ /*
+ * PF_LOCAL uses variable sa->sa_len depending on the
+ * content length of sun_path. Require 1 byte in
+ * sun_path at least.
+ */
+ if (salen > afd->a_socklen ||
+ salen <= afd->a_socklen -
+ sizeofmember(struct sockaddr_un, sun_path))
+ return (EAI_FAIL);
+ break;
+ case PF_LINK:
+ if (salen <= afd->a_socklen -
+ sizeofmember(struct sockaddr_dl, sdl_data))
+ return (EAI_FAIL);
+ break;
+ default:
+ if (salen != afd->a_socklen)
+ return (EAI_FAIL);
+ break;
+ }
+
+ return ((*afd->a_func)(afd, sa, salen, host, hostlen,
+ serv, servlen, flags));
+}
+
+static const struct afd *
+find_afd(int af)
+{
+ const struct afd *afd;
+
+ if (af == PF_UNSPEC)
+ return (NULL);
+ for (afd = &afdl[0]; afd->a_af > 0; afd++) {
+ if (afd->a_af == af)
+ return (afd);
+ }
+ return (NULL);
+}
static int
-getnameinfo_inet(const struct sockaddr *sa, socklen_t salen,
+getnameinfo_inet(const struct afd *afd,
+ const struct sockaddr *sa, socklen_t salen,
char *host, size_t hostlen, char *serv, size_t servlen,
int flags)
{
- const struct afd *afd;
struct servent *sp;
struct hostent *hp;
u_short port;
- int family, i;
const char *addr;
u_int32_t v4a;
int h_error;
char numserv[512];
char numaddr[512];
- if (sa == NULL)
- return EAI_FAIL;
-
- family = sa->sa_family;
- for (i = 0; afdl[i].a_af; i++)
- if (afdl[i].a_af == family) {
- afd = &afdl[i];
- goto found;
- }
- return EAI_FAMILY;
-
- found:
- if (salen != afd->a_socklen)
- return EAI_FAIL;
-
/* network byte order */
- port = ((const struct sockinet *)sa)->si_port;
+ port = ((const struct sockaddr_in *)sa)->sin_port;
addr = (const char *)sa + afd->a_off;
if (serv == NULL || servlen == 0) {
@@ -348,7 +380,6 @@ ip6_sa2str(const struct sockaddr_in6 *sa6, char *buf, size_t bufsiz, int flags)
ifindex = (unsigned int)sa6->sin6_scope_id;
a6 = &sa6->sin6_addr;
-#ifdef NI_NUMERICSCOPE
if ((flags & NI_NUMERICSCOPE) != 0) {
n = snprintf(buf, bufsiz, "%u", sa6->sin6_scope_id);
if (n < 0 || n >= bufsiz)
@@ -356,7 +387,6 @@ ip6_sa2str(const struct sockaddr_in6 *sa6, char *buf, size_t bufsiz, int flags)
else
return n;
}
-#endif
/* if_indextoname() does not take buffer size. not a good api... */
if ((IN6_IS_ADDR_LINKLOCAL(a6) || IN6_IS_ADDR_MC_LINKLOCAL(a6) ||
@@ -383,7 +413,8 @@ ip6_sa2str(const struct sockaddr_in6 *sa6, char *buf, size_t bufsiz, int flags)
*/
/* ARGSUSED */
static int
-getnameinfo_link(const struct sockaddr *sa, socklen_t salen,
+getnameinfo_link(const struct afd *afd,
+ const struct sockaddr *sa, socklen_t salen,
char *host, size_t hostlen, char *serv, size_t servlen, int flags)
{
const struct sockaddr_dl *sdl =
@@ -396,11 +427,22 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen,
if (sdl->sdl_nlen == 0 && sdl->sdl_alen == 0 && sdl->sdl_slen == 0) {
n = snprintf(host, hostlen, "link#%d", sdl->sdl_index);
- if (n > hostlen) {
+ if (n >= hostlen) {
*host = '\0';
- return EAI_MEMORY;
+ return (EAI_MEMORY);
}
- return 0;
+ return (0);
+ }
+
+ if (sdl->sdl_nlen > 0 && sdl->sdl_alen == 0) {
+ n = sdl->sdl_nlen;
+ if (n >= hostlen) {
+ *host = '\0';
+ return (EAI_MEMORY);
+ }
+ memcpy(host, sdl->sdl_data, sdl->sdl_nlen);
+ host[n] = '\0';
+ return (0);
}
switch (sdl->sdl_type) {
@@ -416,7 +458,6 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen,
/*
* The following have zero-length addresses.
* IFT_ATM (net/if_atmsubr.c)
- * IFT_FAITH (net/if_faith.c)
* IFT_GIF (net/if_gif.c)
* IFT_LOOP (net/if_loop.c)
* IFT_PPP (net/if_ppp.c, net/if_spppsubr.c)
@@ -443,10 +484,7 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen,
}
static int
-hexname(cp, len, host, hostlen)
- const u_int8_t *cp;
- char *host;
- size_t len, hostlen;
+hexname(const u_int8_t *cp, size_t len, char *host, size_t hostlen)
{
int i, n;
char *outp = host;
@@ -464,3 +502,30 @@ hexname(cp, len, host, hostlen)
}
return 0;
}
+
+/*
+ * getnameinfo_un():
+ * Format a UNIX IPC domain address (pathname).
+ */
+/* ARGSUSED */
+static int
+getnameinfo_un(const struct afd *afd,
+ const struct sockaddr *sa, socklen_t salen,
+ char *host, size_t hostlen, char *serv, size_t servlen, int flags)
+{
+ size_t pathlen;
+
+ if (serv != NULL && servlen > 0)
+ *serv = '\0';
+ if (host != NULL && hostlen > 0) {
+ pathlen = sa->sa_len - afd->a_off;
+
+ if (pathlen + 1 > hostlen) {
+ *host = '\0';
+ return (EAI_MEMORY);
+ }
+ strlcpy(host, (const char *)sa + afd->a_off, pathlen + 1);
+ }
+
+ return (0);
+}
diff --git a/freebsd/lib/libc/net/getnetbydns.c b/freebsd/lib/libc/net/getnetbydns.c
index 7f2b08fb..1e0cc3d1 100644
--- a/freebsd/lib/libc/net/getnetbydns.c
+++ b/freebsd/lib/libc/net/getnetbydns.c
@@ -457,7 +457,7 @@ _setnetdnsent(int stayopen)
}
void
-_endnetdnsent()
+_endnetdnsent(void)
{
res_state statp;
diff --git a/freebsd/lib/libc/net/getnetbyht.c b/freebsd/lib/libc/net/getnetbyht.c
index caf9c3a6..ee80bab0 100644
--- a/freebsd/lib/libc/net/getnetbyht.c
+++ b/freebsd/lib/libc/net/getnetbyht.c
@@ -65,7 +65,7 @@ _setnethtent(int f, struct netent_data *ned)
{
if (ned->netf == NULL)
- ned->netf = fopen(_PATH_NETWORKS, "r");
+ ned->netf = fopen(_PATH_NETWORKS, "re");
else
rewind(ned->netf);
ned->stayopen |= f;
@@ -91,7 +91,7 @@ getnetent_p(struct netent *ne, struct netent_data *ned)
char line[BUFSIZ + 1];
if (ned->netf == NULL &&
- (ned->netf = fopen(_PATH_NETWORKS, "r")) == NULL)
+ (ned->netf = fopen(_PATH_NETWORKS, "re")) == NULL)
return (-1);
again:
p = fgets(line, sizeof line, ned->netf);
diff --git a/freebsd/lib/libc/net/getnetbynis.c b/freebsd/lib/libc/net/getnetbynis.c
index dd9f506a..7b561abc 100644
--- a/freebsd/lib/libc/net/getnetbynis.c
+++ b/freebsd/lib/libc/net/getnetbynis.c
@@ -82,7 +82,7 @@ _getnetbynis(const char *name, char *map, int af, struct netent *ne,
free(result);
result = (char *)&ypbuf;
- if ((cp = index(result, '\n')))
+ if ((cp = strchr(result, '\n')))
*cp = '\0';
cp = strpbrk(result, " \t");
diff --git a/freebsd/lib/libc/net/getnetnamadr.c b/freebsd/lib/libc/net/getnetnamadr.c
index 2974d152..79cd5f62 100644
--- a/freebsd/lib/libc/net/getnetnamadr.c
+++ b/freebsd/lib/libc/net/getnetnamadr.c
@@ -48,14 +48,7 @@ __FBSDID("$FreeBSD$");
#include "nscache.h"
#endif
-extern int _ht_getnetbyname(void *, void *, va_list);
-extern int _dns_getnetbyname(void *, void *, va_list);
-extern int _nis_getnetbyname(void *, void *, va_list);
-extern int _ht_getnetbyaddr(void *, void *, va_list);
-extern int _dns_getnetbyaddr(void *, void *, va_list);
-extern int _nis_getnetbyaddr(void *, void *, va_list);
-
-/* Network lookup order if nsswitch.conf is broken or nonexistant */
+/* Network lookup order if nsswitch.conf is broken or nonexistent */
static const ns_src default_src[] = {
{ NSSRC_FILES, NS_SUCCESS },
{ NSSRC_DNS, NS_SUCCESS },
diff --git a/freebsd/lib/libc/net/getproto.c b/freebsd/lib/libc/net/getproto.c
index e0c30df0..5f9950ae 100644
--- a/freebsd/lib/libc/net/getproto.c
+++ b/freebsd/lib/libc/net/getproto.c
@@ -49,12 +49,6 @@ static const ns_src defaultsrc[] = {
{ NULL, 0 }
};
-#ifdef NS_CACHING
-extern int __proto_id_func(char *, size_t *, va_list, void *);
-extern int __proto_marshal_func(char *, size_t *, void *, va_list, void *);
-extern int __proto_unmarshal_func(char *, size_t, void *, va_list, void *);
-#endif
-
static int
files_getprotobynumber(void *retval, void *mdata, va_list ap)
{
diff --git a/freebsd/lib/libc/net/getprotoent.c b/freebsd/lib/libc/net/getprotoent.c
index d0a7a97e..774ae95a 100644
--- a/freebsd/lib/libc/net/getprotoent.c
+++ b/freebsd/lib/libc/net/getprotoent.c
@@ -36,7 +36,6 @@ static char sccsid[] = "@(#)getprotoent.c 8.1 (Berkeley) 6/4/93";
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <limits.h>
@@ -344,7 +343,7 @@ void
__setprotoent_p(int f, struct protoent_data *ped)
{
if (ped->fp == NULL)
- ped->fp = fopen(_PATH_PROTOCOLS, "r");
+ ped->fp = fopen(_PATH_PROTOCOLS, "re");
else
rewind(ped->fp);
ped->stayopen |= f;
@@ -367,7 +366,7 @@ __getprotoent_p(struct protoent *pe, struct protoent_data *ped)
char *cp, **q, *endp;
long l;
- if (ped->fp == NULL && (ped->fp = fopen(_PATH_PROTOCOLS, "r")) == NULL)
+ if (ped->fp == NULL && (ped->fp = fopen(_PATH_PROTOCOLS, "re")) == NULL)
return (-1);
again:
if ((p = fgets(ped->line, sizeof ped->line, ped->fp)) == NULL)
diff --git a/freebsd/lib/libc/net/getservent.c b/freebsd/lib/libc/net/getservent.c
index aff91ed2..63aba518 100644
--- a/freebsd/lib/libc/net/getservent.c
+++ b/freebsd/lib/libc/net/getservent.c
@@ -36,7 +36,6 @@ static char sccsid[] = "@(#)getservent.c 8.1 (Berkeley) 6/4/93";
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <db.h>
@@ -323,7 +322,7 @@ files_servent(void *retval, void *mdata, va_list ap)
break;
default:
return NS_NOTFOUND;
- };
+ }
serv = va_arg(ap, struct servent *);
buffer = va_arg(ap, char *);
@@ -337,7 +336,7 @@ files_servent(void *retval, void *mdata, va_list ap)
if (st->fp == NULL)
st->compat_mode_active = 0;
- if (st->fp == NULL && (st->fp = fopen(_PATH_SERVICES, "r")) == NULL) {
+ if (st->fp == NULL && (st->fp = fopen(_PATH_SERVICES, "re")) == NULL) {
*errnop = errno;
return (NS_UNAVAIL);
}
@@ -408,14 +407,14 @@ files_servent(void *retval, void *mdata, va_list ap)
continue;
gotname:
- if (proto == 0 || strcmp(serv->s_proto, proto) == 0)
+ if (proto == NULL || strcmp(serv->s_proto, proto) == 0)
rv = NS_SUCCESS;
break;
case nss_lt_id:
if (port != serv->s_port)
continue;
- if (proto == 0 || strcmp(serv->s_proto, proto) == 0)
+ if (proto == NULL || strcmp(serv->s_proto, proto) == 0)
rv = NS_SUCCESS;
break;
case nss_lt_all:
@@ -451,7 +450,7 @@ files_setservent(void *retval, void *mdata, va_list ap)
case SETSERVENT:
f = va_arg(ap,int);
if (st->fp == NULL)
- st->fp = fopen(_PATH_SERVICES, "r");
+ st->fp = fopen(_PATH_SERVICES, "re");
else
rewind(st->fp);
st->stayopen |= f;
@@ -465,7 +464,7 @@ files_setservent(void *retval, void *mdata, va_list ap)
break;
default:
break;
- };
+ }
st->compat_mode_active = 0;
return (NS_UNAVAIL);
@@ -524,7 +523,7 @@ db_servent(void *retval, void *mdata, va_list ap)
break;
default:
return NS_NOTFOUND;
- };
+ }
serv = va_arg(ap, struct servent *);
buffer = va_arg(ap, char *);
@@ -643,7 +642,7 @@ db_setservent(void *retval, void *mdata, va_list ap)
break;
default:
break;
- };
+ }
return (NS_UNAVAIL);
}
@@ -696,7 +695,7 @@ nis_servent(void *retval, void *mdata, va_list ap)
break;
default:
return NS_NOTFOUND;
- };
+ }
serv = va_arg(ap, struct servent *);
buffer = va_arg(ap, char *);
@@ -783,7 +782,7 @@ nis_servent(void *retval, void *mdata, va_list ap)
}
}
break;
- };
+ }
rv = parse_result(serv, buffer, bufsize, resultbuf,
resultbuflen, errnop);
@@ -817,7 +816,7 @@ nis_setservent(void *result, void *mdata, va_list ap)
break;
default:
break;
- };
+ }
return (NS_UNAVAIL);
}
@@ -1243,7 +1242,7 @@ setservent(int stayopen)
}
void
-endservent()
+endservent(void)
{
#ifdef NS_CACHING
static const nss_cache_info cache_info = NS_MP_CACHE_INFO_INITIALIZER(
@@ -1364,7 +1363,7 @@ getservbyport(int port, const char *proto)
}
struct servent *
-getservent()
+getservent(void)
{
struct key key;
diff --git a/freebsd/lib/libc/net/if_indextoname.c b/freebsd/lib/libc/net/if_indextoname.c
index b0f10f72..236ccbda 100644
--- a/freebsd/lib/libc/net/if_indextoname.c
+++ b/freebsd/lib/libc/net/if_indextoname.c
@@ -72,7 +72,7 @@ if_indextoname(unsigned int ifindex, char *ifname)
for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) {
if (ifa->ifa_addr &&
ifa->ifa_addr->sa_family == AF_LINK &&
- ifindex == ((struct sockaddr_dl*)ifa->ifa_addr)->sdl_index)
+ ifindex == LLINDEX((struct sockaddr_dl*)ifa->ifa_addr))
break;
}
diff --git a/freebsd/lib/libc/net/if_nameindex.c b/freebsd/lib/libc/net/if_nameindex.c
index 7cb14a80..6d2573e8 100644
--- a/freebsd/lib/libc/net/if_nameindex.c
+++ b/freebsd/lib/libc/net/if_nameindex.c
@@ -125,7 +125,7 @@ if_nameindex(void)
if (ifa->ifa_addr &&
ifa->ifa_addr->sa_family == AF_LINK) {
ifni2->if_index =
- ((struct sockaddr_dl*)ifa->ifa_addr)->sdl_index;
+ LLINDEX((struct sockaddr_dl*)ifa->ifa_addr);
ifni2->if_name = cp;
strcpy(cp, ifa->ifa_name);
ifni2++;
diff --git a/freebsd/lib/libc/net/if_nametoindex.c b/freebsd/lib/libc/net/if_nametoindex.c
index eff590e7..92a2abed 100644
--- a/freebsd/lib/libc/net/if_nametoindex.c
+++ b/freebsd/lib/libc/net/if_nametoindex.c
@@ -70,8 +70,9 @@ if_nametoindex(const char *ifname)
struct ifaddrs *ifaddrs, *ifa;
unsigned int ni;
- s = _socket(AF_INET, SOCK_DGRAM, 0);
+ s = _socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s != -1) {
+ memset(&ifr, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
if (_ioctl(s, SIOCGIFINDEX, &ifr) != -1) {
_close(s);
@@ -89,7 +90,7 @@ if_nametoindex(const char *ifname)
if (ifa->ifa_addr &&
ifa->ifa_addr->sa_family == AF_LINK &&
strcmp(ifa->ifa_name, ifname) == 0) {
- ni = ((struct sockaddr_dl*)ifa->ifa_addr)->sdl_index;
+ ni = LLINDEX((struct sockaddr_dl*)ifa->ifa_addr);
break;
}
}
diff --git a/freebsd/lib/libc/net/ip6opt.c b/freebsd/lib/libc/net/ip6opt.c
index a09f05d8..2fbd6cc6 100644
--- a/freebsd/lib/libc/net/ip6opt.c
+++ b/freebsd/lib/libc/net/ip6opt.c
@@ -35,7 +35,6 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
@@ -47,6 +46,18 @@ __FBSDID("$FreeBSD$");
static int ip6optlen(u_int8_t *opt, u_int8_t *lim);
static void inet6_insert_padopt(u_char *p, int len);
+#ifndef IPV6_2292HOPOPTS
+#define IPV6_2292HOPOPTS 22
+#endif
+#ifndef IPV6_2292DSTOPTS
+#define IPV6_2292DSTOPTS 23
+#endif
+
+#define is_ipv6_hopopts(x) \
+ ((x) == IPV6_HOPOPTS || (x) == IPV6_2292HOPOPTS)
+#define is_ipv6_dstopts(x) \
+ ((x) == IPV6_DSTOPTS || (x) == IPV6_2292DSTOPTS)
+
/*
* This function returns the number of bytes required to hold an option
* when it is stored as ancillary data, including the cmsghdr structure
@@ -74,9 +85,9 @@ inet6_option_init(void *bp, struct cmsghdr **cmsgp, int type)
struct cmsghdr *ch = (struct cmsghdr *)bp;
/* argument validation */
- if (type != IPV6_HOPOPTS && type != IPV6_DSTOPTS)
+ if (!is_ipv6_hopopts(type) && !is_ipv6_dstopts(type))
return(-1);
-
+
ch->cmsg_level = IPPROTO_IPV6;
ch->cmsg_type = type;
ch->cmsg_len = CMSG_LEN(0);
@@ -121,8 +132,7 @@ inet6_option_append(struct cmsghdr *cmsg, const u_int8_t *typep, int multx,
/* calculate pad length before the option. */
off = bp - (u_char *)eh;
- padlen = (((off % multx) + (multx - 1)) & ~(multx - 1)) -
- (off % multx);
+ padlen = roundup2(off % multx, multx) - (off % multx);
padlen += plusy;
padlen %= multx; /* keep the pad as short as possible */
/* insert padding */
@@ -191,8 +201,7 @@ inet6_option_alloc(struct cmsghdr *cmsg, int datalen, int multx, int plusy)
/* calculate pad length before the option. */
off = bp - (u_char *)eh;
- padlen = (((off % multx) + (multx - 1)) & ~(multx - 1)) -
- (off % multx);
+ padlen = roundup2(off % multx, multx) - (off % multx);
padlen += plusy;
padlen %= multx; /* keep the pad as short as possible */
/* insert padding */
@@ -236,8 +245,8 @@ inet6_option_next(const struct cmsghdr *cmsg, u_int8_t **tptrp)
u_int8_t *lim;
if (cmsg->cmsg_level != IPPROTO_IPV6 ||
- (cmsg->cmsg_type != IPV6_HOPOPTS &&
- cmsg->cmsg_type != IPV6_DSTOPTS))
+ (!is_ipv6_hopopts(cmsg->cmsg_type) &&
+ !is_ipv6_dstopts(cmsg->cmsg_type)))
return(-1);
/* message length validation */
@@ -292,8 +301,8 @@ inet6_option_find(const struct cmsghdr *cmsg, u_int8_t **tptrp, int type)
u_int8_t *optp, *lim;
if (cmsg->cmsg_level != IPPROTO_IPV6 ||
- (cmsg->cmsg_type != IPV6_HOPOPTS &&
- cmsg->cmsg_type != IPV6_DSTOPTS))
+ (!is_ipv6_hopopts(cmsg->cmsg_type) &&
+ !is_ipv6_dstopts(cmsg->cmsg_type)))
return(-1);
/* message length validation */
@@ -383,11 +392,8 @@ inet6_opt_init(void *extbuf, socklen_t extlen)
{
struct ip6_ext *ext = (struct ip6_ext *)extbuf;
- if (extlen < 0 || (extlen % 8))
- return(-1);
-
if (ext) {
- if (extlen == 0)
+ if (extlen <= 0 || (extlen % 8))
return(-1);
ext->ip6e_len = (extlen >> 3) - 1;
}
@@ -412,7 +418,7 @@ inet6_opt_append(void *extbuf, socklen_t extlen, int offset, u_int8_t type,
* The option data length must have a value between 0 and 255,
* inclusive, and is the length of the option data that follows.
*/
- if (len < 0 || len > 255)
+ if (len > 255 || len < 0 )
return(-1);
/*
diff --git a/freebsd/lib/libc/net/linkaddr.c b/freebsd/lib/libc/net/linkaddr.c
index 10aff86c..1a2c3fd3 100644
--- a/freebsd/lib/libc/net/linkaddr.c
+++ b/freebsd/lib/libc/net/linkaddr.c
@@ -52,9 +52,7 @@ __FBSDID("$FreeBSD$");
#define LETTER (4*3)
void
-link_addr(addr, sdl)
- const char *addr;
- struct sockaddr_dl *sdl;
+link_addr(const char *addr, struct sockaddr_dl *sdl)
{
char *cp = sdl->sdl_data;
char *cplim = sdl->sdl_len + (char *)sdl;
@@ -120,11 +118,10 @@ link_addr(addr, sdl)
return;
}
-static char hexlist[] = "0123456789abcdef";
+static const char hexlist[] = "0123456789abcdef";
char *
-link_ntoa(sdl)
- const struct sockaddr_dl *sdl;
+link_ntoa(const struct sockaddr_dl *sdl)
{
static char obuf[64];
char *out = obuf;
diff --git a/freebsd/lib/libc/net/map_v4v6.c b/freebsd/lib/libc/net/map_v4v6.c
index e2fc9747..2923fb48 100644
--- a/freebsd/lib/libc/net/map_v4v6.c
+++ b/freebsd/lib/libc/net/map_v4v6.c
@@ -57,7 +57,6 @@ static char sccsid[] = "@(#)gethostnamadr.c 8.1 (Berkeley) 6/4/93";
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
#include <netinet/in.h>
@@ -70,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <resolv.h>
#include <ctype.h>
#include <syslog.h>
+#include "netdb_private.h"
typedef union {
int32_t al;
@@ -79,19 +79,11 @@ typedef union {
void
_map_v4v6_address(const char *src, char *dst)
{
- u_char *p = (u_char *)dst;
- char tmp[NS_INADDRSZ];
- int i;
-
- /* Stash a temporary copy so our caller can update in place. */
- memcpy(tmp, src, NS_INADDRSZ);
+ /* Our caller may update in place. */
+ memmove(&dst[12], src, NS_INADDRSZ);
/* Mark this ipv6 addr as a mapped ipv4. */
- for (i = 0; i < 10; i++)
- *p++ = 0x00;
- *p++ = 0xff;
- *p++ = 0xff;
- /* Retrieve the saved copy and we're done. */
- memcpy((void*)p, tmp, NS_INADDRSZ);
+ memset(&dst[10], 0xff, 2);
+ memset(&dst[0], 0, 10);
}
void
diff --git a/freebsd/lib/libc/net/name6.c b/freebsd/lib/libc/net/name6.c
index 2a93efce..a106ad98 100644
--- a/freebsd/lib/libc/net/name6.c
+++ b/freebsd/lib/libc/net/name6.c
@@ -44,11 +44,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -100,7 +96,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#ifdef INET6
#include <net/if.h>
-#include <net/if_var.h>
#include <sys/sysctl.h>
#include <sys/ioctl.h>
#include <netinet6/in6_var.h> /* XXX */
@@ -241,13 +236,13 @@ getipnodebyname(const char *name, int af, int flags, int *errp)
if (flags & AI_ADDRCONFIG) {
int s;
- if ((s = _socket(af, SOCK_DGRAM, 0)) < 0)
+ if ((s = _socket(af, SOCK_DGRAM | SOCK_CLOEXEC, 0)) < 0)
return NULL;
/*
* TODO:
* Note that implementation dependent test for address
- * configuration should be done everytime called
- * (or apropriate interval),
+ * configuration should be done every time called
+ * (or appropriate interval),
* because addresses will be dynamically assigned or deleted.
*/
_close(s);
@@ -337,7 +332,7 @@ getipnodebyaddr(const void *src, size_t len, int af, int *errp)
*errp = NO_RECOVERY;
return NULL;
}
- if ((long)src & ~(sizeof(struct in_addr) - 1)) {
+ if (rounddown2((long)src, sizeof(struct in_addr))) {
memcpy(&addrbuf, src, len);
src = &addrbuf;
}
@@ -350,7 +345,8 @@ getipnodebyaddr(const void *src, size_t len, int af, int *errp)
*errp = NO_RECOVERY;
return NULL;
}
- if ((long)src & ~(sizeof(struct in6_addr) / 2 - 1)) { /*XXX*/
+ if (rounddown2((long)src, sizeof(struct in6_addr) / 2)) {
+ /* XXX */
memcpy(&addrbuf, src, len);
src = &addrbuf;
}
@@ -661,7 +657,6 @@ _hpreorder(struct hostent *hp)
#endif
break;
default:
- free_addrselectpolicy(&policyhead);
return hp;
}
@@ -742,11 +737,11 @@ get_addrselectpolicy(struct policyhead *head)
char *buf;
struct in6_addrpolicy *pol, *ep;
- if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), NULL, &l, NULL, 0) < 0)
+ if (sysctl(mib, nitems(mib), NULL, &l, NULL, 0) < 0)
return (0);
if ((buf = malloc(l)) == NULL)
return (0);
- if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), buf, &l, NULL, 0) < 0) {
+ if (sysctl(mib, nitems(mib), buf, &l, NULL, 0) < 0) {
free(buf);
return (0);
}
@@ -801,10 +796,9 @@ match_addrselectpolicy(struct sockaddr *addr, struct policyhead *head)
memset(&key, 0, sizeof(key));
key.sin6_family = AF_INET6;
key.sin6_len = sizeof(key);
- key.sin6_addr.s6_addr[10] = 0xff;
- key.sin6_addr.s6_addr[11] = 0xff;
- memcpy(&key.sin6_addr.s6_addr[12],
- &((struct sockaddr_in *)addr)->sin_addr, 4);
+ _map_v4v6_address(
+ (char *)&((struct sockaddr_in *)addr)->sin_addr,
+ (char *)&key.sin6_addr);
break;
default:
return(NULL);
@@ -874,7 +868,8 @@ set_source(struct hp_order *aio, struct policyhead *ph)
}
/* open a socket to get the source address for the given dst */
- if ((s = _socket(ss.ss_family, SOCK_DGRAM, IPPROTO_UDP)) < 0)
+ if ((s = _socket(ss.ss_family, SOCK_DGRAM | SOCK_CLOEXEC,
+ IPPROTO_UDP)) < 0)
return; /* give up */
if (_connect(s, (struct sockaddr *)&ss, ss.ss_len) < 0)
goto cleanup;
diff --git a/freebsd/lib/libc/net/netdb_private.h b/freebsd/lib/libc/net/netdb_private.h
index ed3465e9..51f28f07 100644
--- a/freebsd/lib/libc/net/netdb_private.h
+++ b/freebsd/lib/libc/net/netdb_private.h
@@ -130,8 +130,6 @@ void _endhostdnsent(void);
void _endhosthtent(struct hostent_data *);
void _endnetdnsent(void);
void _endnethtent(struct netent_data *);
-struct hostent *_gethostbynisaddr(const void *, socklen_t, int);
-struct hostent *_gethostbynisname(const char *, int);
void _map_v4v6_address(const char *, char *);
void _map_v4v6_hostent(struct hostent *, char **, char *);
void _sethostdnsent(int);
@@ -139,7 +137,23 @@ void _sethosthtent(int, struct hostent_data *);
void _setnetdnsent(int);
void _setnethtent(int, struct netent_data *);
-int _nsdispatch(void *retval, const ns_dtab disp_tab[], const char *database,
- const char *method_name, const ns_src defaults[], ...);
+struct hostent *__dns_getanswer(const char *, int, const char *, int);
+int _dns_gethostbyaddr(void *, void *, va_list);
+int _dns_gethostbyname(void *, void *, va_list);
+int _dns_getnetbyaddr(void *, void *, va_list);
+int _dns_getnetbyname(void *, void *, va_list);
+int _ht_gethostbyaddr(void *, void *, va_list);
+int _ht_gethostbyname(void *, void *, va_list);
+int _ht_getnetbyaddr(void *, void *, va_list);
+int _ht_getnetbyname(void *, void *, va_list);
+int _nis_gethostbyaddr(void *, void *, va_list);
+int _nis_gethostbyname(void *, void *, va_list);
+int _nis_getnetbyaddr(void *, void *, va_list);
+int _nis_getnetbyname(void *, void *, va_list);
+#ifdef NS_CACHING
+int __proto_id_func(char *, size_t *, va_list, void *);
+int __proto_marshal_func(char *, size_t *, void *, va_list, void *);
+int __proto_unmarshal_func(char *, size_t, void *, va_list, void *);
+#endif
#endif /* _NETDB_PRIVATE_H_ */
diff --git a/freebsd/lib/libc/net/nsdispatch.c b/freebsd/lib/libc/net/nsdispatch.c
index f1a88d8d..8d0e1493 100644
--- a/freebsd/lib/libc/net/nsdispatch.c
+++ b/freebsd/lib/libc/net/nsdispatch.c
@@ -17,13 +17,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -147,14 +140,17 @@ static void *nss_cache_cycle_prevention_func = NULL;
#endif
/*
- * When this is set to 1, nsdispatch won't use nsswitch.conf
- * but will consult the 'defaults' source list only.
- * NOTE: nested fallbacks (when nsdispatch calls fallback functions,
- * which in turn calls nsdispatch, which should call fallback
- * function) are not supported
+ * We keep track of nsdispatch() nesting depth in dispatch_depth. When a
+ * fallback method is invoked from nsdispatch(), we temporarily set
+ * fallback_depth to the current dispatch depth plus one. Subsequent
+ * calls at that exact depth will run in fallback mode (restricted to the
+ * same source as the call that was handled by the fallback method), while
+ * calls below that depth will be handled normally, allowing fallback
+ * methods to perform arbitrary lookups.
*/
struct fb_state {
- int fb_dispatch;
+ int dispatch_depth;
+ int fallback_depth;
};
static void fb_endstate(void *);
NSS_TLS_HANDLING(fb);
@@ -352,7 +348,6 @@ _nsdbtdump(const ns_dbt *dbt)
static int
nss_configure(void)
{
- static pthread_mutex_t conf_lock = PTHREAD_MUTEX_INITIALIZER;
static time_t confmod;
struct stat statbuf;
int result, isthreaded;
@@ -376,15 +371,16 @@ nss_configure(void)
if (statbuf.st_mtime <= confmod)
return (0);
if (isthreaded) {
- result = _pthread_mutex_trylock(&conf_lock);
- if (result != 0)
- return (0);
(void)_pthread_rwlock_unlock(&nss_lock);
result = _pthread_rwlock_wrlock(&nss_lock);
if (result != 0)
- goto fin2;
+ return (result);
+ if (stat(path, &statbuf) != 0)
+ goto fin;
+ if (statbuf.st_mtime <= confmod)
+ goto fin;
}
- _nsyyin = fopen(path, "r");
+ _nsyyin = fopen(path, "re");
if (_nsyyin == NULL)
goto fin;
VECTOR_FREE(_nsmap, &_nsmapsize, sizeof(*_nsmap),
@@ -417,9 +413,6 @@ fin:
if (result == 0)
result = _pthread_rwlock_rdlock(&nss_lock);
}
-fin2:
- if (isthreaded)
- (void)_pthread_mutex_unlock(&conf_lock);
return (result);
}
@@ -681,6 +674,7 @@ _nsdispatch(void *retval, const ns_dtab disp_tab[], const char *database,
void *mdata;
int isthreaded, serrno, i, result, srclistsize;
struct fb_state *st;
+ int saved_depth;
#ifdef NS_CACHING
nss_cache_data cache_data;
@@ -712,7 +706,8 @@ _nsdispatch(void *retval, const ns_dtab disp_tab[], const char *database,
result = NS_UNAVAIL;
goto fin;
}
- if (st->fb_dispatch == 0) {
+ ++st->dispatch_depth;
+ if (st->dispatch_depth > st->fallback_depth) {
dbt = vector_search(&database, _nsmap, _nsmapsize, sizeof(*_nsmap),
string_compare);
fb_method = nss_method_lookup(NSSRC_FALLBACK, database,
@@ -781,12 +776,13 @@ _nsdispatch(void *retval, const ns_dtab disp_tab[], const char *database,
break;
} else {
if (fb_method != NULL) {
- st->fb_dispatch = 1;
+ saved_depth = st->fallback_depth;
+ st->fallback_depth = st->dispatch_depth + 1;
va_start(ap, defaults);
result = fb_method(retval,
(void *)srclist[i].name, ap);
va_end(ap);
- st->fb_dispatch = 0;
+ st->fallback_depth = saved_depth;
} else
nss_log(LOG_DEBUG, "%s, %s, %s, not found, "
"and no fallback provided",
@@ -818,6 +814,7 @@ _nsdispatch(void *retval, const ns_dtab disp_tab[], const char *database,
if (isthreaded)
(void)_pthread_rwlock_unlock(&nss_lock);
+ --st->dispatch_depth;
fin:
errno = serrno;
return (result);
diff --git a/freebsd/lib/libc/net/nslexer.l b/freebsd/lib/libc/net/nslexer.l
index bc36ea2b..c7a95f61 100644
--- a/freebsd/lib/libc/net/nslexer.l
+++ b/freebsd/lib/libc/net/nslexer.l
@@ -16,13 +16,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -104,14 +97,13 @@ STRING [a-zA-Z][a-zA-Z0-9_]*
#undef _nsyywrap
int
-_nsyywrap()
+_nsyywrap(void)
{
return 1;
} /* _nsyywrap */
void
-_nsyyerror(msg)
- const char *msg;
+_nsyyerror(const char *msg)
{
syslog(LOG_ERR, "NSSWITCH(nslexer): %s line %d: %s at '%s'",
diff --git a/freebsd/lib/libc/net/nsparser.y b/freebsd/lib/libc/net/nsparser.y
index 8f1d162d..773e23fc 100644
--- a/freebsd/lib/libc/net/nsparser.y
+++ b/freebsd/lib/libc/net/nsparser.y
@@ -16,13 +16,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the NetBSD
- * Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -152,8 +145,7 @@ Action
%%
static void
-_nsaddsrctomap(elem)
- const char *elem;
+_nsaddsrctomap(const char *elem)
{
int i, lineno;
extern int _nsyylineno;
diff --git a/freebsd/lib/libc/net/rcmd.c b/freebsd/lib/libc/net/rcmd.c
index 2f5e593f..8458a34c 100644
--- a/freebsd/lib/libc/net/rcmd.c
+++ b/freebsd/lib/libc/net/rcmd.c
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <arpa/nameser.h>
#include "un-namespace.h"
+#include "libc_private.h"
extern int innetgr( const char *, const char *, const char *, const char * );
@@ -74,22 +75,15 @@ static int __icheckhost(const struct sockaddr *, socklen_t, const char *);
char paddr[NI_MAXHOST];
int
-rcmd(ahost, rport, locuser, remuser, cmd, fd2p)
- char **ahost;
- u_short rport;
- const char *locuser, *remuser, *cmd;
- int *fd2p;
+rcmd(char **ahost, int rport, const char *locuser, const char *remuser,
+ const char *cmd, int *fd2p)
{
return rcmd_af(ahost, rport, locuser, remuser, cmd, fd2p, AF_INET);
}
int
-rcmd_af(ahost, rport, locuser, remuser, cmd, fd2p, af)
- char **ahost;
- u_short rport;
- const char *locuser, *remuser, *cmd;
- int *fd2p;
- int af;
+rcmd_af(char **ahost, int rport, const char *locuser, const char *remuser,
+ const char *cmd, int *fd2p, int af)
{
struct addrinfo hints, *res, *ai;
struct sockaddr_storage from;
@@ -150,7 +144,7 @@ rcmd_af(ahost, rport, locuser, remuser, cmd, fd2p, af)
refused = 0;
sigemptyset(&newmask);
sigaddset(&newmask, SIGURG);
- _sigprocmask(SIG_BLOCK, (const sigset_t *)&newmask, &oldmask);
+ __libc_sigprocmask(SIG_BLOCK, (const sigset_t *)&newmask, &oldmask);
for (timo = 1, lport = IPPORT_RESERVED - 1;;) {
s = rresvport_af(&lport, ai->ai_family);
if (s < 0) {
@@ -165,7 +159,7 @@ rcmd_af(ahost, rport, locuser, remuser, cmd, fd2p, af)
(void)fprintf(stderr, "rcmd: socket: %s\n",
strerror(errno));
freeaddrinfo(res);
- _sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask,
+ __libc_sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask,
NULL);
return (-1);
}
@@ -183,7 +177,7 @@ rcmd_af(ahost, rport, locuser, remuser, cmd, fd2p, af)
(void)fprintf(stderr, "%s: %s\n",
*ahost, strerror(errno));
freeaddrinfo(res);
- _sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask,
+ __libc_sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask,
NULL);
return (-1);
}
@@ -215,7 +209,7 @@ rcmd_af(ahost, rport, locuser, remuser, cmd, fd2p, af)
}
}
lport--;
- if (fd2p == 0) {
+ if (fd2p == NULL) {
_write(s, "", 1);
lport = 0;
} else {
@@ -308,7 +302,7 @@ again:
}
goto bad2;
}
- _sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask, NULL);
+ __libc_sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask, NULL);
freeaddrinfo(res);
return (s);
bad2:
@@ -316,21 +310,19 @@ bad2:
(void)_close(*fd2p);
bad:
(void)_close(s);
- _sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask, NULL);
+ __libc_sigprocmask(SIG_SETMASK, (const sigset_t *)&oldmask, NULL);
freeaddrinfo(res);
return (-1);
}
int
-rresvport(port)
- int *port;
+rresvport(int *port)
{
return rresvport_af(port, AF_INET);
}
int
-rresvport_af(alport, family)
- int *alport, family;
+rresvport_af(int *alport, int family)
{
int s;
struct sockaddr_storage ss;
@@ -381,9 +373,7 @@ int __check_rhosts_file = 1;
char *__rcmd_errstr;
int
-ruserok(rhost, superuser, ruser, luser)
- const char *rhost, *ruser, *luser;
- int superuser;
+ruserok(const char *rhost, int superuser, const char *ruser, const char *luser)
{
struct addrinfo hints, *res, *r;
int error;
@@ -416,10 +406,7 @@ ruserok(rhost, superuser, ruser, luser)
* Returns 0 if ok, -1 if not ok.
*/
int
-iruserok(raddr, superuser, ruser, luser)
- unsigned long raddr;
- int superuser;
- const char *ruser, *luser;
+iruserok(unsigned long raddr, int superuser, const char *ruser, const char *luser)
{
struct sockaddr_in sin;
@@ -437,11 +424,8 @@ iruserok(raddr, superuser, ruser, luser)
* Returns 0 if ok, -1 if not ok.
*/
int
-iruserok_sa(ra, rlen, superuser, ruser, luser)
- const void *ra;
- int rlen;
- int superuser;
- const char *ruser, *luser;
+iruserok_sa(const void *ra, int rlen, int superuser, const char *ruser,
+ const char *luser)
{
char *cp;
struct stat sbuf;
@@ -460,7 +444,7 @@ iruserok_sa(ra, rlen, superuser, ruser, luser)
raddr = (struct sockaddr *)&ss;
first = 1;
- hostf = superuser ? NULL : fopen(_PATH_HEQUIV, "r");
+ hostf = superuser ? NULL : fopen(_PATH_HEQUIV, "re");
again:
if (hostf) {
if (__ivaliduser_sa(hostf, raddr, rlen, luser, ruser) == 0) {
@@ -483,7 +467,7 @@ again:
*/
uid = geteuid();
(void)seteuid(pwd->pw_uid);
- hostf = fopen(pbuf, "r");
+ hostf = fopen(pbuf, "re");
(void)seteuid(uid);
if (hostf == NULL)
@@ -521,10 +505,7 @@ again:
* Returns 0 if ok, -1 if not ok.
*/
int
-__ivaliduser(hostf, raddr, luser, ruser)
- FILE *hostf;
- u_int32_t raddr;
- const char *luser, *ruser;
+__ivaliduser(FILE *hostf, u_int32_t raddr, const char *luser, const char *ruser)
{
struct sockaddr_in sin;
@@ -542,11 +523,8 @@ __ivaliduser(hostf, raddr, luser, ruser)
* XXX obsolete API.
*/
int
-__ivaliduser_af(hostf, raddr, luser, ruser, af, len)
- FILE *hostf;
- const void *raddr;
- const char *luser, *ruser;
- int af, len;
+__ivaliduser_af(FILE *hostf, const void *raddr, const char *luser,
+ const char *ruser, int af, int len)
{
struct sockaddr *sa = NULL;
struct sockaddr_in *sin = NULL;
@@ -585,11 +563,8 @@ __ivaliduser_af(hostf, raddr, luser, ruser, af, len)
}
int
-__ivaliduser_sa(hostf, raddr, salen, luser, ruser)
- FILE *hostf;
- const struct sockaddr *raddr;
- socklen_t salen;
- const char *luser, *ruser;
+__ivaliduser_sa(FILE *hostf, const struct sockaddr *raddr, socklen_t salen,
+ const char *luser, const char *ruser)
{
char *user, *p;
int ch;
@@ -708,10 +683,7 @@ __ivaliduser_sa(hostf, raddr, salen, luser, ruser)
* Returns "true" if match, 0 if no match.
*/
static int
-__icheckhost(raddr, salen, lhost)
- const struct sockaddr *raddr;
- socklen_t salen;
- const char *lhost;
+__icheckhost(const struct sockaddr *raddr, socklen_t salen, const char *lhost)
{
struct sockaddr_in sin;
struct sockaddr_in6 *sin6;
diff --git a/freebsd/lib/libc/net/recv.c b/freebsd/lib/libc/net/recv.c
index 168cb85e..71c531c8 100644
--- a/freebsd/lib/libc/net/recv.c
+++ b/freebsd/lib/libc/net/recv.c
@@ -35,18 +35,25 @@ static char sccsid[] = "@(#)recv.c 8.2 (Berkeley) 2/21/94";
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "namespace.h"
#include <sys/types.h>
#include <sys/socket.h>
+#include "libc_private.h"
#include <stddef.h>
-#include "un-namespace.h"
ssize_t
-recv(s, buf, len, flags)
- int s, flags;
- size_t len;
- void *buf;
+recv(int s, void *buf, size_t len, int flags)
{
- return (_recvfrom(s, buf, len, flags, NULL, 0));
+ /*
+ * POSIX says recv() shall be a cancellation point, so call the
+ * cancellation-enabled recvfrom() and not _recvfrom().
+ */
+#ifndef __rtems__
+ return (((ssize_t (*)(int, void *, size_t, int,
+ struct sockaddr *, socklen_t *))
+ __libc_interposing[INTERPOS_recvfrom])(s, buf, len, flags,
+ NULL, NULL));
+#else /* __rtems__ */
+ return (recvfrom(s, buf, len, flags, NULL, 0));
+#endif /* __rtems__ */
}
diff --git a/freebsd/lib/libc/net/res_config.h b/freebsd/lib/libc/net/res_config.h
index 411542d1..c730aad3 100644
--- a/freebsd/lib/libc/net/res_config.h
+++ b/freebsd/lib/libc/net/res_config.h
@@ -2,7 +2,7 @@
#define DEBUG 1 /* enable debugging code (needed for dig) */
#define RESOLVSORT /* allow sorting of addresses in gethostbyname */
-#undef SUNSECURITY /* verify gethostbyaddr() calls - WE DONT NEED IT */
+#undef SUNSECURITY /* verify gethostbyaddr() calls - WE DON'T NEED IT */
#define MULTI_PTRS_ARE_ALIASES 1 /* fold multiple PTR records into aliases */
#ifdef __rtems__
#define dprintf gethostbydns_dprintf
diff --git a/freebsd/lib/libc/net/rthdr.c b/freebsd/lib/libc/net/rthdr.c
index f2a1d3d0..dd7c9a9a 100644
--- a/freebsd/lib/libc/net/rthdr.c
+++ b/freebsd/lib/libc/net/rthdr.c
@@ -35,7 +35,6 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
@@ -49,8 +48,7 @@ __FBSDID("$FreeBSD$");
*/
size_t
-inet6_rthdr_space(type, seg)
- int type, seg;
+inet6_rthdr_space(int type, int seg)
{
switch (type) {
case IPV6_RTHDR_TYPE_0:
@@ -69,9 +67,7 @@ inet6_rthdr_space(type, seg)
}
struct cmsghdr *
-inet6_rthdr_init(bp, type)
- void *bp;
- int type;
+inet6_rthdr_init(void *bp, int type)
{
struct cmsghdr *ch = (struct cmsghdr *)bp;
struct ip6_rthdr *rthdr;
@@ -100,10 +96,7 @@ inet6_rthdr_init(bp, type)
/* ARGSUSED */
int
-inet6_rthdr_add(cmsg, addr, flags)
- struct cmsghdr *cmsg;
- const struct in6_addr *addr;
- u_int flags;
+inet6_rthdr_add(struct cmsghdr *cmsg, const struct in6_addr *addr, u_int flags)
{
struct ip6_rthdr *rthdr;
@@ -145,9 +138,7 @@ inet6_rthdr_add(cmsg, addr, flags)
/* ARGSUSED */
int
-inet6_rthdr_lasthop(cmsg, flags)
- struct cmsghdr *cmsg;
- unsigned int flags;
+inet6_rthdr_lasthop(struct cmsghdr *cmsg, unsigned int flags)
{
struct ip6_rthdr *rthdr;
@@ -185,9 +176,7 @@ inet6_rthdr_lasthop(cmsg, flags)
#if 0
int
-inet6_rthdr_reverse(in, out)
- const struct cmsghdr *in;
- struct cmsghdr *out;
+inet6_rthdr_reverse(const struct cmsghdr *in, struct cmsghdr *out)
{
return (-1);
@@ -195,8 +184,7 @@ inet6_rthdr_reverse(in, out)
#endif
int
-inet6_rthdr_segments(cmsg)
- const struct cmsghdr *cmsg;
+inet6_rthdr_segments(const struct cmsghdr *cmsg)
{
struct ip6_rthdr *rthdr;
@@ -219,9 +207,7 @@ inet6_rthdr_segments(cmsg)
}
struct in6_addr *
-inet6_rthdr_getaddr(cmsg, idx)
- struct cmsghdr *cmsg;
- int idx;
+inet6_rthdr_getaddr(struct cmsghdr *cmsg, int idx)
{
struct ip6_rthdr *rthdr;
@@ -251,9 +237,7 @@ inet6_rthdr_getaddr(cmsg, idx)
}
int
-inet6_rthdr_getflags(cmsg, idx)
- const struct cmsghdr *cmsg;
- int idx;
+inet6_rthdr_getflags(const struct cmsghdr *cmsg, int idx)
{
struct ip6_rthdr *rthdr;
diff --git a/freebsd/lib/libc/net/send.c b/freebsd/lib/libc/net/send.c
index aac2e1f8..332f850d 100644
--- a/freebsd/lib/libc/net/send.c
+++ b/freebsd/lib/libc/net/send.c
@@ -35,18 +35,25 @@ static char sccsid[] = "@(#)send.c 8.2 (Berkeley) 2/21/94";
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "namespace.h"
#include <sys/types.h>
#include <sys/socket.h>
+#include "libc_private.h"
#include <stddef.h>
-#include "un-namespace.h"
ssize_t
-send(s, msg, len, flags)
- int s, flags;
- size_t len;
- const void *msg;
+send(int s, const void *msg, size_t len, int flags)
{
- return (_sendto(s, msg, len, flags, NULL, 0));
+ /*
+ * POSIX says send() shall be a cancellation point, so call the
+ * cancellation-enabled sendto() and not _sendto().
+ */
+#ifndef __rtems__
+ return (((ssize_t (*)(int, const void *, size_t, int,
+ const struct sockaddr *, socklen_t))
+ __libc_interposing[INTERPOS_sendto])(s, msg, len, flags,
+ NULL, 0));
+#else /* __rtems__ */
+ return (sendto(s, msg, len, flags, NULL, 0));
+#endif /* __rtems__ */
}
diff --git a/freebsd/lib/libc/posix1e/mac.c b/freebsd/lib/libc/posix1e/mac.c
index 3806130a..16e45199 100644
--- a/freebsd/lib/libc/posix1e/mac.c
+++ b/freebsd/lib/libc/posix1e/mac.c
@@ -181,7 +181,7 @@ mac_init_internal(int ignore_errors)
filename = getenv("MAC_CONFFILE");
else
filename = MAC_CONFFILE;
- file = fopen(filename, "r");
+ file = fopen(filename, "re");
if (file == NULL)
return (0);
diff --git a/freebsd/lib/libc/resolv/h_errno.c b/freebsd/lib/libc/resolv/h_errno.c
index 4902e22c..aed29e84 100644
--- a/freebsd/lib/libc/resolv/h_errno.c
+++ b/freebsd/lib/libc/resolv/h_errno.c
@@ -35,6 +35,9 @@
#undef h_errno
extern int h_errno;
+int *__h_errno(void);
+void __h_errno_set(res_state res, int err);
+
int *
__h_errno(void)
{
diff --git a/freebsd/lib/libc/resolv/herror.c b/freebsd/lib/libc/resolv/herror.c
index aea3a745..1c6900b0 100644
--- a/freebsd/lib/libc/resolv/herror.c
+++ b/freebsd/lib/libc/resolv/herror.c
@@ -56,7 +56,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
#include "namespace.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/uio.h>
@@ -78,7 +77,7 @@ const char *h_errlist[] = {
"Unknown server error", /*%< 3 NO_RECOVERY */
"No address associated with name", /*%< 4 NO_ADDRESS */
};
-const int h_nerr = { sizeof h_errlist / sizeof h_errlist[0] };
+const int h_nerr = { nitems(h_errlist) };
#undef h_errno
int h_errno;
diff --git a/freebsd/lib/libc/resolv/mtctxres.c b/freebsd/lib/libc/resolv/mtctxres.c
index 6ba2bc97..f4fd36a5 100644
--- a/freebsd/lib/libc/resolv/mtctxres.c
+++ b/freebsd/lib/libc/resolv/mtctxres.c
@@ -77,7 +77,7 @@ __res_init_ctx(void) {
return (0);
}
- if ((mt = malloc(sizeof (mtctxres_t))) == 0) {
+ if ((mt = malloc(sizeof(mtctxres_t))) == NULL) {
errno = ENOMEM;
return (-1);
}
@@ -96,10 +96,7 @@ __res_init_ctx(void) {
static void
__res_destroy_ctx(void *value) {
- mtctxres_t *mt = (mtctxres_t *)value;
-
- if (mt != 0)
- free(mt);
+ free(value);
}
#endif
@@ -134,9 +131,9 @@ ___mtctxres(void) {
* that fails return a global context.
*/
if (mt_key_initialized) {
- if (((mt = pthread_getspecific(key)) != 0) ||
+ if (((mt = pthread_getspecific(key)) != NULL) ||
(__res_init_ctx() == 0 &&
- (mt = pthread_getspecific(key)) != 0)) {
+ (mt = pthread_getspecific(key)) != NULL)) {
return (mt);
}
}
diff --git a/freebsd/lib/libc/resolv/res_comp.c b/freebsd/lib/libc/resolv/res_comp.c
index 9b6ee369..9908da3c 100644
--- a/freebsd/lib/libc/resolv/res_comp.c
+++ b/freebsd/lib/libc/resolv/res_comp.c
@@ -74,7 +74,6 @@ static const char rcsid[] = "$Id: res_comp.c,v 1.5 2005/07/28 06:51:50 marka Exp
__FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
@@ -88,7 +87,7 @@ __FBSDID("$FreeBSD$");
/*%
* Expand compressed domain name 'src' to full domain name.
*
- * \li 'msg' is a pointer to the begining of the message,
+ * \li 'msg' is a pointer to the beginning of the message,
* \li 'eom' points to the first location after the message,
* \li 'dst' is a pointer to a buffer of size 'dstsiz' for the result.
* \li Return size of compressed name or -1 if there was an error.
@@ -149,12 +148,12 @@ dn_skipname(const u_char *ptr, const u_char *eom) {
|| ((c) >= 0x61 && (c) <= 0x7a))
#define digitchar(c) ((c) >= 0x30 && (c) <= 0x39)
-#define borderchar(c) (alphachar(c) || digitchar(c))
#ifdef RES_ENFORCE_RFC1034
-#define middlechar(c) (borderchar(c) || hyphenchar(c))
+#define borderchar(c) (alphachar(c) || digitchar(c))
#else
-#define middlechar(c) (borderchar(c) || hyphenchar(c) || underscorechar(c))
+#define borderchar(c) (alphachar(c) || digitchar(c) || underscorechar(c))
#endif
+#define middlechar(c) (borderchar(c) || hyphenchar(c))
#define domainchar(c) ((c) > 0x20 && (c) < 0x7f)
int
diff --git a/freebsd/lib/libc/resolv/res_data.c b/freebsd/lib/libc/resolv/res_data.c
index 0ab7ed20..e89390f8 100644
--- a/freebsd/lib/libc/resolv/res_data.c
+++ b/freebsd/lib/libc/resolv/res_data.c
@@ -25,7 +25,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
#include <sys/time.h>
@@ -79,9 +78,10 @@ const char *_res_sectioncodes[] = {
int res_ourserver_p(const res_state, const struct sockaddr_in *);
-int
+__noinline int
res_init(void) {
extern int __res_vinit(res_state, int);
+ res_state statp = &_res;
/*
* These three fields used to be statically initialized. This made
@@ -102,14 +102,14 @@ res_init(void) {
* set in RES_DEFAULT). Our solution is to declare such applications
* "broken". They could fool us by setting RES_INIT but none do (yet).
*/
- if (!_res.retrans)
- _res.retrans = RES_TIMEOUT;
- if (!_res.retry)
- _res.retry = RES_DFLRETRY;
- if (!(_res.options & RES_INIT))
- _res.options = RES_DEFAULT;
-
- return (__res_vinit(&_res, 1));
+ if (!statp->retrans)
+ statp->retrans = RES_TIMEOUT;
+ if (!statp->retry)
+ statp->retry = RES_DFLRETRY;
+ if (!(statp->options & RES_INIT))
+ statp->options = RES_DEFAULT;
+
+ return (__res_vinit(statp, 1));
}
void
@@ -124,10 +124,11 @@ fp_query(const u_char *msg, FILE *file) {
void
fp_nquery(const u_char *msg, int len, FILE *file) {
- if ((_res.options & RES_INIT) == 0U && res_init() == -1)
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1)
return;
- res_pquery(&_res, msg, len, file);
+ res_pquery(statp, msg, len, file);
}
int
@@ -140,23 +141,25 @@ res_mkquery(int op, /*!< opcode of query */
u_char *buf, /*!< buffer to put query */
int buflen) /*!< size of buffer */
{
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
- RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
- return (res_nmkquery(&_res, op, dname, class, type,
+ return (res_nmkquery(statp, op, dname, class, type,
data, datalen,
newrr_in, buf, buflen));
}
int
res_mkupdate(ns_updrec *rrecp_in, u_char *buf, int buflen) {
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
- RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
- return (res_nmkupdate(&_res, rrecp_in, buf, buflen));
+ return (res_nmkupdate(statp, rrecp_in, buf, buflen));
}
int
@@ -165,11 +168,12 @@ res_query(const char *name, /*!< domain name */
u_char *answer, /*!< buffer to put answer */
int anslen) /*!< size of answer buffer */
{
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
- RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
- return (res_nquery(&_res, name, class, type, answer, anslen));
+ return (res_nquery(statp, name, class, type, answer, anslen));
}
#ifndef _LIBC
@@ -191,12 +195,13 @@ res_isourserver(const struct sockaddr_in *inp) {
int
res_send(const u_char *buf, int buflen, u_char *ans, int anssiz) {
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
/* errno should have been set by res_init() in this case. */
return (-1);
}
- return (res_nsend(&_res, buf, buflen, ans, anssiz));
+ return (res_nsend(statp, buf, buflen, ans, anssiz));
}
#ifndef _LIBC
@@ -204,12 +209,13 @@ int
res_sendsigned(const u_char *buf, int buflen, ns_tsig_key *key,
u_char *ans, int anssiz)
{
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
/* errno should have been set by res_init() in this case. */
return (-1);
}
- return (res_nsendsigned(&_res, buf, buflen, key, ans, anssiz));
+ return (res_nsendsigned(statp, buf, buflen, key, ans, anssiz));
}
#endif
@@ -220,12 +226,13 @@ res_close(void) {
int
res_update(ns_updrec *rrecp_in) {
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
- RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
- return (res_nupdate(&_res, rrecp_in, NULL));
+ return (res_nupdate(statp, rrecp_in, NULL));
}
int
@@ -234,12 +241,13 @@ res_search(const char *name, /*!< domain name */
u_char *answer, /*!< buffer to put answer */
int anslen) /*!< size of answer */
{
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
- RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
- return (res_nsearch(&_res, name, class, type, answer, anslen));
+ return (res_nsearch(statp, name, class, type, answer, anslen));
}
int
@@ -249,24 +257,26 @@ res_querydomain(const char *name,
u_char *answer, /*!< buffer to put answer */
int anslen) /*!< size of answer */
{
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
- RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
- return (res_nquerydomain(&_res, name, domain,
+ return (res_nquerydomain(statp, name, domain,
class, type,
answer, anslen));
}
u_int
res_randomid(void) {
- if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
- RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ res_state statp = &_res;
+ if ((statp->options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
return (-1);
}
- return (res_nrandomid(&_res));
+ return (res_nrandomid(statp));
}
int
@@ -286,13 +296,15 @@ hostalias(const char *name) {
int
local_hostname_length(const char *hostname) {
int len_host, len_domain;
+ res_state statp;
- if (!*_res.defdname)
+ statp = &_res;
+ if (!*statp->defdname)
res_init();
len_host = strlen(hostname);
- len_domain = strlen(_res.defdname);
+ len_domain = strlen(statp->defdname);
if (len_host > len_domain &&
- !strcasecmp(hostname + len_host - len_domain, _res.defdname) &&
+ !strcasecmp(hostname + len_host - len_domain, statp->defdname) &&
hostname[len_host - len_domain - 1] == '.')
return (len_host - len_domain - 1);
return (0);
diff --git a/freebsd/lib/libc/resolv/res_debug.c b/freebsd/lib/libc/resolv/res_debug.c
index 632aa666..d2551e57 100644
--- a/freebsd/lib/libc/resolv/res_debug.c
+++ b/freebsd/lib/libc/resolv/res_debug.c
@@ -100,7 +100,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
@@ -369,11 +368,8 @@ p_cdname(const u_char *cp, const u_char *msg, FILE *file) {
length supplied). */
const u_char *
-p_fqnname(cp, msg, msglen, name, namelen)
- const u_char *cp, *msg;
- int msglen;
- char *name;
- int namelen;
+p_fqnname(const u_char *cp, const u_char *msg, int msglen, char *name,
+ int namelen)
{
int n, newlen;
@@ -760,8 +756,7 @@ static unsigned int poweroften[10] = {1, 10, 100, 1000, 10000, 100000,
/*% takes an XeY precision/size value, returns a string representation. */
static const char *
-precsize_ntoa(prec)
- u_int8_t prec;
+precsize_ntoa(u_int8_t prec)
{
char *retbuf = precsize_ntoa_retbuf;
unsigned long val;
@@ -914,9 +909,7 @@ latlon2ul(const char **latlonstrptr, int *which) {
* converts a zone file representation in a string to an RDATA on-the-wire
* representation. */
int
-loc_aton(ascii, binary)
- const char *ascii;
- u_char *binary;
+loc_aton(const char *ascii, u_char *binary)
{
const char *cp, *maxcp;
u_char *bcp;
@@ -1025,9 +1018,7 @@ loc_aton(ascii, binary)
/*% takes an on-the-wire LOC RR and formats it in a human readable format. */
const char *
-loc_ntoa(binary, ascii)
- const u_char *binary;
- char *ascii;
+loc_ntoa(const u_char *binary, char *ascii)
{
static const char *error = "?";
static char tmpbuf[sizeof
diff --git a/freebsd/lib/libc/resolv/res_findzonecut.c b/freebsd/lib/libc/resolv/res_findzonecut.c
index e776a1eb..360bd498 100644
--- a/freebsd/lib/libc/resolv/res_findzonecut.c
+++ b/freebsd/lib/libc/resolv/res_findzonecut.c
@@ -121,7 +121,7 @@ static void res_dprintf(const char *, ...) ISC_FORMAT_PRINTF(1, 2);
* notes:
*\li this function calls res_nsend() which means it depends on correctly
* functioning recursive nameservers (usually defined in /etc/resolv.conf
- * or its local equivilent).
+ * or its local equivalent).
*
*\li we start by asking for an SOA<dname,class>. if we get one as an
* answer, that just means <dname,class> is a zone top, which is fine.
diff --git a/freebsd/lib/libc/resolv/res_init.c b/freebsd/lib/libc/resolv/res_init.c
index 75a8dff3..1c7b8c25 100644
--- a/freebsd/lib/libc/resolv/res_init.c
+++ b/freebsd/lib/libc/resolv/res_init.c
@@ -77,9 +77,9 @@ __FBSDID("$FreeBSD$");
#include "namespace.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
+#include <sys/stat.h>
#include <sys/time.h>
#include <netinet/in.h>
@@ -117,7 +117,9 @@ __FBSDID("$FreeBSD$");
/*% Options. Should all be left alone. */
#define RESOLVSORT
-#define DEBUG
+#ifndef DEBUG
+#define DEBUG
+#endif
#ifdef SOLARIS2
#include <sys/systeminfo.h>
@@ -144,7 +146,7 @@ static u_int32_t net_mask(struct in_addr);
* there will have precedence. Otherwise, the server address is set to
* INADDR_ANY and the default domain name comes from the gethostname().
*
- * An interrim version of this code (BIND 4.9, pre-4.4BSD) used 127.0.0.1
+ * An interim version of this code (BIND 4.9, pre-4.4BSD) used 127.0.0.1
* rather than INADDR_ANY ("0.0.0.0") as the default name server address
* since it was noted that INADDR_ANY actually meant ``the first interface
* you "ifconfig"'d at boot time'' and if this was a SLIP or PPP interface,
@@ -167,7 +169,7 @@ res_ninit(res_state statp) {
return (__res_vinit(statp, 0));
}
-/*% This function has to be reachable by res_data.c but not publically. */
+/*% This function has to be reachable by res_data.c but not publicly. */
int
__res_vinit(res_state statp, int preinit) {
FILE *fp;
@@ -238,6 +240,7 @@ __res_vinit(res_state statp, int preinit) {
statp->_u._ext.ext->nsaddrs[0].sin = statp->nsaddr;
strcpy(statp->_u._ext.ext->nsuffix, "ip6.arpa");
strcpy(statp->_u._ext.ext->nsuffix2, "ip6.int");
+ statp->_u._ext.ext->reload_period = 2;
} else {
/*
* Historically res_init() rarely, if at all, failed.
@@ -313,7 +316,7 @@ __res_vinit(res_state statp, int preinit) {
while (*cp != '\0' && *cp != ' ' && *cp != '\t' && *cp != '\n')
cp++;
*cp = '\0';
- *pp++ = 0;
+ *pp++ = NULL;
}
#define MATCH(line, name) \
@@ -322,7 +325,19 @@ __res_vinit(res_state statp, int preinit) {
line[sizeof(name) - 1] == '\t'))
nserv = 0;
- if ((fp = fopen(_PATH_RESCONF, "r")) != NULL) {
+ if ((fp = fopen(_PATH_RESCONF, "re")) != NULL) {
+ struct stat sb;
+ struct timespec now;
+
+ if (statp->_u._ext.ext != NULL) {
+ if (_fstat(fileno(fp), &sb) == 0) {
+ statp->_u._ext.ext->conf_mtim = sb.st_mtim;
+ if (clock_gettime(CLOCK_MONOTONIC_FAST, &now) == 0) {
+ statp->_u._ext.ext->conf_stat = now.tv_sec;
+ }
+ }
+ }
+
/* read the config file */
while (fgets(buf, sizeof(buf), fp) != NULL) {
/* skip comments */
@@ -377,7 +392,7 @@ __res_vinit(res_state statp, int preinit) {
while (*cp != '\0' && *cp != ' ' && *cp != '\t')
cp++;
*cp = '\0';
- *pp++ = 0;
+ *pp++ = NULL;
havesearch = 1;
continue;
}
@@ -398,20 +413,21 @@ __res_vinit(res_state statp, int preinit) {
hints.ai_socktype = SOCK_DGRAM; /*dummy*/
hints.ai_flags = AI_NUMERICHOST;
sprintf(sbuf, "%u", NAMESERVER_PORT);
- if (getaddrinfo(cp, sbuf, &hints, &ai) == 0 &&
- ai->ai_addrlen <= minsiz) {
- if (statp->_u._ext.ext != NULL) {
- memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
- ai->ai_addr, ai->ai_addrlen);
+ if (getaddrinfo(cp, sbuf, &hints, &ai) == 0) {
+ if (ai->ai_addrlen <= minsiz) {
+ if (statp->_u._ext.ext != NULL) {
+ memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
+ ai->ai_addr, ai->ai_addrlen);
+ }
+ if (ai->ai_addrlen <=
+ sizeof(statp->nsaddr_list[nserv])) {
+ memcpy(&statp->nsaddr_list[nserv],
+ ai->ai_addr, ai->ai_addrlen);
+ } else
+ statp->nsaddr_list[nserv].sin_family = 0;
+ nserv++;
}
- if (ai->ai_addrlen <=
- sizeof(statp->nsaddr_list[nserv])) {
- memcpy(&statp->nsaddr_list[nserv],
- ai->ai_addr, ai->ai_addrlen);
- } else
- statp->nsaddr_list[nserv].sin_family = 0;
freeaddrinfo(ai);
- nserv++;
}
}
continue;
@@ -583,9 +599,7 @@ res_setoptions(res_state statp, const char *options, const char *source)
{
const char *cp = options;
int i;
-#ifndef _LIBC
struct __res_state_ext *ext = statp->_u._ext.ext;
-#endif
#ifdef DEBUG
if (statp->options & RES_DEBUG)
@@ -668,6 +682,12 @@ res_setoptions(res_state statp, const char *options, const char *source)
} else if (!strncmp(cp, "no-check-names",
sizeof("no-check-names") - 1)) {
statp->options |= RES_NOCHECKNAME;
+ } else if (!strncmp(cp, "reload-period:",
+ sizeof("reload-period:") - 1)) {
+ if (ext != NULL) {
+ ext->reload_period = (u_short)
+ atoi(cp + sizeof("reload-period:") - 1);
+ }
}
#ifdef RES_USE_EDNS0
else if (!strncmp(cp, "edns0", sizeof("edns0") - 1)) {
@@ -720,8 +740,7 @@ res_setoptions(res_state statp, const char *options, const char *source)
#ifdef RESOLVSORT
/* XXX - should really support CIDR which means explicit masks always. */
static u_int32_t
-net_mask(in) /*!< XXX - should really use system's version of this */
- struct in_addr in;
+net_mask(struct in_addr in) /*!< XXX - should really use system's version of this */
{
u_int32_t i = ntohl(in.s_addr);
diff --git a/freebsd/lib/libc/resolv/res_mkquery.c b/freebsd/lib/libc/resolv/res_mkquery.c
index 84800095..866dbaf6 100644
--- a/freebsd/lib/libc/resolv/res_mkquery.c
+++ b/freebsd/lib/libc/resolv/res_mkquery.c
@@ -74,7 +74,6 @@ static const char rcsid[] = "$Id: res_mkquery.c,v 1.10 2008/12/11 09:59:00 marka
__FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
@@ -85,7 +84,9 @@ __FBSDID("$FreeBSD$");
#include "port_after.h"
/* Options. Leave them on. */
-#define DEBUG
+#ifndef DEBUG
+#define DEBUG
+#endif
extern const char *_res_opcodes[];
@@ -133,7 +134,7 @@ res_nmkquery(res_state statp,
dpp = dnptrs;
*dpp++ = buf;
*dpp++ = NULL;
- lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0];
+ lastdnptr = dnptrs + nitems(dnptrs);
/*
* perform opcode specific processing
*/
diff --git a/freebsd/lib/libc/resolv/res_mkupdate.c b/freebsd/lib/libc/resolv/res_mkupdate.c
index fade9356..6b0c484f 100644
--- a/freebsd/lib/libc/resolv/res_mkupdate.c
+++ b/freebsd/lib/libc/resolv/res_mkupdate.c
@@ -31,7 +31,6 @@ __FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <netinet/in.h>
@@ -56,7 +55,9 @@ __FBSDID("$FreeBSD$");
#include "port_after.h"
/* Options. Leave them on. */
-#define DEBUG
+#ifndef DEBUG
+#define DEBUG
+#endif
#define MAXPORT 1024
static int getnum_str(u_char **, u_char *);
@@ -127,7 +128,7 @@ res_nmkupdate(res_state statp, ns_updrec *rrecp_in, u_char *buf, int buflen) {
dpp = dnptrs;
*dpp++ = buf;
*dpp++ = NULL;
- lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0];
+ lastdnptr = dnptrs + nitems(dnptrs);
if (rrecp_start == NULL)
return (-5);
@@ -975,7 +976,7 @@ struct valuelist {
static struct valuelist *servicelist, *protolist;
static void
-res_buildservicelist() {
+res_buildservicelist(void) {
struct servent *sp;
struct valuelist *slp;
@@ -1175,7 +1176,7 @@ res_protocolname(int num) {
if (protolist == (struct valuelist *)0)
res_buildprotolist();
pp = cgetprotobynumber(num);
- if (pp == 0) {
+ if (pp == NULL) {
(void) sprintf(number, "%d", num);
return (number);
}
@@ -1190,7 +1191,7 @@ res_servicename(u_int16_t port, const char *proto) { /*%< Host byte order. */
if (servicelist == (struct valuelist *)0)
res_buildservicelist();
ss = cgetservbyport(htons(port), proto);
- if (ss == 0) {
+ if (ss == NULL) {
(void) sprintf(number, "%d", port);
return (number);
}
diff --git a/freebsd/lib/libc/resolv/res_private.h b/freebsd/lib/libc/resolv/res_private.h
index 4e98157c..a986e95c 100644
--- a/freebsd/lib/libc/resolv/res_private.h
+++ b/freebsd/lib/libc/resolv/res_private.h
@@ -1,3 +1,5 @@
+/* $FreeBSD$ */
+
#ifndef res_private_h
#define res_private_h
@@ -12,6 +14,9 @@ struct __res_state_ext {
} sort_list[MAXRESOLVSORT];
char nsuffix[64];
char nsuffix2[64];
+ struct timespec conf_mtim; /* mod time of loaded resolv.conf */
+ time_t conf_stat; /* time of last stat(resolv.conf) */
+ u_short reload_period; /* seconds between stat(resolv.conf) */
};
extern int
diff --git a/freebsd/lib/libc/resolv/res_query.c b/freebsd/lib/libc/resolv/res_query.c
index 9dd8f186..7189e6e2 100644
--- a/freebsd/lib/libc/resolv/res_query.c
+++ b/freebsd/lib/libc/resolv/res_query.c
@@ -74,7 +74,6 @@ static const char rcsid[] = "$Id: res_query.c,v 1.11 2008/11/14 02:36:51 marka E
__FBSDID("$FreeBSD$");
#include "port_before.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
@@ -90,7 +89,9 @@ __FBSDID("$FreeBSD$");
#include "port_after.h"
/* Options. Leave them on. */
-#define DEBUG
+#ifndef DEBUG
+#define DEBUG
+#endif
#if PACKETSZ > 1024
#define MAXPACKET PACKETSZ
@@ -136,8 +137,8 @@ again:
if (n > 0 && (statp->_flags & RES_F_EDNS0ERR) == 0 &&
(statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC|RES_NSID))) {
n = res_nopt(statp, n, buf, sizeof(buf), anslen);
- rdata = &buf[n];
if (n > 0 && (statp->options & RES_NSID) != 0U) {
+ rdata = &buf[n];
n = res_nopt_rdata(statp, n, buf, sizeof(buf), rdata,
NS_OPT_NSID, 0, NULL);
}
@@ -459,7 +460,7 @@ res_hostalias(const res_state statp, const char *name, char *dst, size_t siz) {
if (issetugid())
return (NULL);
file = getenv("HOSTALIASES");
- if (file == NULL || (fp = fopen(file, "r")) == NULL)
+ if (file == NULL || (fp = fopen(file, "re")) == NULL)
return (NULL);
setbuf(fp, NULL);
buf[sizeof(buf) - 1] = '\0';
diff --git a/freebsd/lib/libc/resolv/res_send.c b/freebsd/lib/libc/resolv/res_send.c
index c39d8df9..73817ad5 100644
--- a/freebsd/lib/libc/resolv/res_send.c
+++ b/freebsd/lib/libc/resolv/res_send.c
@@ -79,12 +79,11 @@ __FBSDID("$FreeBSD$");
*/
#include "port_before.h"
-#ifndef USE_KQUEUE
+#if !defined(USE_KQUEUE) && !defined(USE_POLL)
#include "fd_setsize.h"
#endif
#include "namespace.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/time.h>
#include <sys/socket.h>
@@ -121,7 +120,9 @@ __FBSDID("$FreeBSD$");
#include "un-namespace.h"
/* Options. Leave them on. */
-#define DEBUG
+#ifndef DEBUG
+#define DEBUG
+#endif
#include "res_debug.h"
#include "res_private.h"
@@ -576,8 +577,7 @@ res_nsend(res_state statp,
/* Private */
static int
-get_salen(sa)
- const struct sockaddr *sa;
+get_salen(const struct sockaddr *sa)
{
#ifdef HAVE_SA_LEN
@@ -598,9 +598,7 @@ get_salen(sa)
* pick appropriate nsaddr_list for use. see res_init() for initialization.
*/
static struct sockaddr *
-get_nsaddr(statp, n)
- res_state statp;
- size_t n;
+get_nsaddr(res_state statp, size_t n)
{
if (!statp->nsaddr_list[n].sin_family && EXT(statp).ext) {
@@ -662,7 +660,8 @@ send_vc(res_state statp,
if (statp->_vcsock >= 0)
res_nclose(statp);
- statp->_vcsock = _socket(nsap->sa_family, SOCK_STREAM, 0);
+ statp->_vcsock = _socket(nsap->sa_family, SOCK_STREAM |
+ SOCK_CLOEXEC, 0);
#if !defined(USE_POLL) && !defined(USE_KQUEUE)
if (statp->_vcsock > highestFD) {
res_nclose(statp);
@@ -853,7 +852,7 @@ send_dg(res_state statp,
nsaplen = get_salen(nsap);
if (EXT(statp).nssocks[ns] == -1) {
EXT(statp).nssocks[ns] = _socket(nsap->sa_family,
- SOCK_DGRAM, 0);
+ SOCK_DGRAM | SOCK_CLOEXEC, 0);
#if !defined(USE_POLL) && !defined(USE_KQUEUE)
if (EXT(statp).nssocks[ns] > highestFD) {
res_nclose(statp);
@@ -964,7 +963,7 @@ send_dg(res_state statp,
timeout.tv_nsec/1000000;
pollfd.fd = s;
pollfd.events = POLLRDNORM;
- n = poll(&pollfd, 1, polltimeout);
+ n = _poll(&pollfd, 1, polltimeout);
#endif /* USE_POLL */
if (n == 0) {
diff --git a/freebsd/lib/libc/resolv/res_state.c b/freebsd/lib/libc/resolv/res_state.c
index a89b9b5e..1cfe1700 100644
--- a/freebsd/lib/libc/resolv/res_state.c
+++ b/freebsd/lib/libc/resolv/res_state.c
@@ -28,6 +28,8 @@
*/
#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
@@ -37,6 +39,8 @@
#include "reentrant.h"
#include "un-namespace.h"
+#include "res_private.h"
+
#undef _res
struct __res_state _res;
@@ -61,6 +65,37 @@ res_keycreate(void)
res_thr_keycreated = thr_keycreate(&res_key, free_res) == 0;
}
+static res_state
+res_check_reload(res_state statp)
+{
+ struct timespec now;
+ struct stat sb;
+ struct __res_state_ext *ext;
+
+ if ((statp->options & RES_INIT) == 0) {
+ return (statp);
+ }
+
+ ext = statp->_u._ext.ext;
+ if (ext == NULL || ext->reload_period == 0) {
+ return (statp);
+ }
+
+ if (clock_gettime(CLOCK_MONOTONIC_FAST, &now) != 0 ||
+ (now.tv_sec - ext->conf_stat) < ext->reload_period) {
+ return (statp);
+ }
+
+ ext->conf_stat = now.tv_sec;
+ if (stat(_PATH_RESCONF, &sb) == 0 &&
+ (sb.st_mtim.tv_sec != ext->conf_mtim.tv_sec ||
+ sb.st_mtim.tv_nsec != ext->conf_mtim.tv_nsec)) {
+ statp->options &= ~RES_INIT;
+ }
+
+ return (statp);
+}
+
res_state
__res_state(void)
{
@@ -68,7 +103,7 @@ __res_state(void)
#ifndef __rtems__
if (thr_main() != 0)
- return (&_res);
+ return res_check_reload(&_res);
#endif /* __rtems__ */
if (thr_once(&res_init_once, res_keycreate) != 0 ||
@@ -77,7 +112,7 @@ __res_state(void)
statp = thr_getspecific(res_key);
if (statp != NULL)
- return (statp);
+ return res_check_reload(statp);
statp = calloc(1, sizeof(*statp));
if (statp == NULL)
return (&_res);
diff --git a/freebsd/lib/libc/rpc/auth_des.c b/freebsd/lib/libc/rpc/auth_des.c
index b3002301..02943484 100644
--- a/freebsd/lib/libc/rpc/auth_des.c
+++ b/freebsd/lib/libc/rpc/auth_des.c
@@ -71,7 +71,7 @@ __FBSDID("$FreeBSD$");
extern bool_t xdr_authdes_cred( XDR *, struct authdes_cred *);
extern bool_t xdr_authdes_verf( XDR *, struct authdes_verf *);
-extern int key_encryptsession_pk();
+extern int key_encryptsession_pk(char *, netobj *, des_block *);
extern bool_t __rpc_get_time_offset(struct timeval *, nis_server *, char *,
char **, char **);
@@ -261,7 +261,7 @@ failed:
*/
/*ARGSUSED*/
static void
-authdes_nextverf(AUTH *auth)
+authdes_nextverf(AUTH *auth __unused)
{
/* what the heck am I supposed to do??? */
}
@@ -287,7 +287,7 @@ authdes_marshal(AUTH *auth, XDR *xdrs)
* Figure out the "time", accounting for any time difference
* with the server if necessary.
*/
- (void) gettimeofday(&ad->ad_timestamp, (struct timezone *)NULL);
+ (void)gettimeofday(&ad->ad_timestamp, NULL);
ad->ad_timestamp.tv_sec += ad->ad_timediff.tv_sec;
ad->ad_timestamp.tv_usec += ad->ad_timediff.tv_usec;
while (ad->ad_timestamp.tv_usec >= USEC_PER_SEC) {
@@ -422,7 +422,7 @@ authdes_validate(AUTH *auth, struct opaque_auth *rverf)
*/
/*ARGSUSED*/
static bool_t
-authdes_refresh(AUTH *auth, void *dummy)
+authdes_refresh(AUTH *auth, void *dummy __unused)
{
/* LINTED pointer alignment */
struct ad_private *ad = AUTH_PRIVATE(auth);
diff --git a/freebsd/lib/libc/rpc/auth_none.c b/freebsd/lib/libc/rpc/auth_none.c
index 821771de..9f91292a 100644
--- a/freebsd/lib/libc/rpc/auth_none.c
+++ b/freebsd/lib/libc/rpc/auth_none.c
@@ -67,9 +67,9 @@ static bool_t authnone_validate (AUTH *, struct opaque_auth *);
static bool_t authnone_refresh (AUTH *, void *);
static void authnone_destroy (AUTH *);
-extern bool_t xdr_opaque_auth();
+extern bool_t xdr_opaque_auth(XDR *, struct opaque_auth *);
-static struct auth_ops *authnone_ops();
+static struct auth_ops *authnone_ops(void);
static struct authnone_private {
AUTH no_client;
@@ -78,16 +78,16 @@ static struct authnone_private {
} *authnone_private;
AUTH *
-authnone_create()
+authnone_create(void)
{
struct authnone_private *ap = authnone_private;
XDR xdr_stream;
XDR *xdrs;
mutex_lock(&authnone_lock);
- if (ap == 0) {
- ap = (struct authnone_private *)calloc(1, sizeof (*ap));
- if (ap == 0) {
+ if (ap == NULL) {
+ ap = calloc(1, sizeof (*ap));
+ if (ap == NULL) {
mutex_unlock(&authnone_lock);
return (0);
}
@@ -158,7 +158,7 @@ authnone_destroy(AUTH *client)
}
static struct auth_ops *
-authnone_ops()
+authnone_ops(void)
{
static struct auth_ops ops;
diff --git a/freebsd/lib/libc/rpc/auth_time.c b/freebsd/lib/libc/rpc/auth_time.c
index 09e197a7..a685ebb5 100644
--- a/freebsd/lib/libc/rpc/auth_time.c
+++ b/freebsd/lib/libc/rpc/auth_time.c
@@ -63,8 +63,7 @@ extern int _rpc_dtablesize( void );
static int saw_alarm = 0;
static void
-alarm_hndler(s)
- int s;
+alarm_hndler(int s)
{
saw_alarm = 1;
return;
@@ -85,12 +84,7 @@ alarm_hndler(s)
* Turn a 'universal address' into a struct sockaddr_in.
* Bletch.
*/
-static int uaddr_to_sockaddr(uaddr, sin)
-#ifdef foo
- endpoint *endpt;
-#endif
- char *uaddr;
- struct sockaddr_in *sin;
+static int uaddr_to_sockaddr(char *uaddr, struct sockaddr_in *sin)
{
unsigned char p_bytes[2];
int i;
@@ -120,9 +114,7 @@ static int uaddr_to_sockaddr(uaddr, sin)
* Free the strings that were strduped into the eps structure.
*/
static void
-free_eps(eps, num)
- endpoint eps[];
- int num;
+free_eps(endpoint eps[], int num)
{
int i;
@@ -144,14 +136,15 @@ free_eps(eps, num)
* fact that gethostbyname() could do an NIS search. Ideally, the
* NIS+ server will call __rpc_get_time_offset() with the nis_server
* structure already populated.
+ *
+ * host - name of the time host
+ * srv - nis_server struct to use.
+ * eps[] - array of endpoints
+ * maxep - max array size
*/
static nis_server *
-get_server(sin, host, srv, eps, maxep)
- struct sockaddr_in *sin;
- char *host; /* name of the time host */
- nis_server *srv; /* nis_server struct to use. */
- endpoint eps[]; /* array of endpoints */
- int maxep; /* max array size */
+get_server(struct sockaddr_in *sin, char *host, nis_server *srv,
+ endpoint eps[], int maxep)
{
char hname[256];
int num_ep = 0, i;
@@ -238,14 +231,16 @@ get_server(sin, host, srv, eps, maxep)
* structure and to then contact the machine for the time.
*
* td = "server" - "client"
+ *
+ * td - Time difference
+ * srv - NIS Server description
+ * thost - if no server, this is the timehost
+ * uaddr - known universal address
+ * netid - known network identifier
*/
int
-__rpc_get_time_offset(td, srv, thost, uaddr, netid)
- struct timeval *td; /* Time difference */
- nis_server *srv; /* NIS Server description */
- char *thost; /* if no server, this is the timehost */
- char **uaddr; /* known universal address */
- struct sockaddr_in *netid; /* known network identifier */
+__rpc_get_time_offset(struct timeval *td, nis_server *srv, char *thost,
+ char **uaddr, struct sockaddr_in *netid)
{
CLIENT *clnt; /* Client handle */
endpoint *ep, /* useful endpoints */
@@ -262,7 +257,7 @@ __rpc_get_time_offset(td, srv, thost, uaddr, netid)
char ut[64], ipuaddr[64];
endpoint teps[32];
nis_server tsrv;
- void (*oldsig)() = NULL; /* old alarm handler */
+ void (*oldsig)(int) = NULL; /* old alarm handler */
struct sockaddr_in sin;
socklen_t len;
int s = RPC_ANYSOCK;
@@ -431,7 +426,7 @@ __rpc_get_time_offset(td, srv, thost, uaddr, netid)
} else {
int res;
- oldsig = (void (*)())signal(SIGALRM, alarm_hndler);
+ oldsig = (void (*)(int))signal(SIGALRM, alarm_hndler);
saw_alarm = 0; /* global tracking the alarm */
alarm(20); /* only wait 20 seconds */
res = _connect(s, (struct sockaddr *)&sin, sizeof(sin));
diff --git a/freebsd/lib/libc/rpc/auth_unix.c b/freebsd/lib/libc/rpc/auth_unix.c
index 1d9130df..b27236ad 100644
--- a/freebsd/lib/libc/rpc/auth_unix.c
+++ b/freebsd/lib/libc/rpc/auth_unix.c
@@ -93,12 +93,7 @@ struct audata {
* Returns an auth handle with the given stuff in it.
*/
AUTH *
-authunix_create(machname, uid, gid, len, aup_gids)
- char *machname;
- u_int uid;
- u_int gid;
- int len;
- u_int *aup_gids;
+authunix_create(char *machname, u_int uid, u_int gid, int len, u_int *aup_gids)
{
struct authunix_parms aup;
char mymem[MAX_AUTH_BYTES];
@@ -184,15 +179,15 @@ authunix_create(machname, uid, gid, len, aup_gids)
* syscalls.
*/
AUTH *
-authunix_create_default()
+authunix_create_default(void)
{
AUTH *auth;
int ngids;
long ngids_max;
char machname[MAXHOSTNAMELEN + 1];
- u_int uid;
- u_int gid;
- u_int *gids;
+ uid_t uid;
+ gid_t gid;
+ gid_t *gids;
ngids_max = sysconf(_SC_NGROUPS_MAX) + 1;
gids = malloc(sizeof(gid_t) * ngids_max);
@@ -220,16 +215,13 @@ authunix_create_default()
/* ARGSUSED */
static void
-authunix_nextverf(auth)
- AUTH *auth;
+authunix_nextverf(AUTH *auth)
{
/* no action necessary */
}
static bool_t
-authunix_marshal(auth, xdrs)
- AUTH *auth;
- XDR *xdrs;
+authunix_marshal(AUTH *auth, XDR *xdrs)
{
struct audata *au;
@@ -241,9 +233,7 @@ authunix_marshal(auth, xdrs)
}
static bool_t
-authunix_validate(auth, verf)
- AUTH *auth;
- struct opaque_auth *verf;
+authunix_validate(AUTH *auth, struct opaque_auth *verf)
{
struct audata *au;
XDR xdrs;
@@ -319,8 +309,7 @@ done:
}
static void
-authunix_destroy(auth)
- AUTH *auth;
+authunix_destroy(AUTH *auth)
{
struct audata *au;
@@ -345,8 +334,7 @@ authunix_destroy(auth)
* sets private data, au_marshed and au_mpos
*/
static void
-marshal_new_auth(auth)
- AUTH *auth;
+marshal_new_auth(AUTH *auth)
{
XDR xdr_stream;
XDR *xdrs = &xdr_stream;
@@ -365,7 +353,7 @@ marshal_new_auth(auth)
}
static struct auth_ops *
-authunix_ops()
+authunix_ops(void)
{
static struct auth_ops ops;
diff --git a/freebsd/lib/libc/rpc/authdes_prot.c b/freebsd/lib/libc/rpc/authdes_prot.c
index af77afdd..dd28c049 100644
--- a/freebsd/lib/libc/rpc/authdes_prot.c
+++ b/freebsd/lib/libc/rpc/authdes_prot.c
@@ -51,9 +51,7 @@ __FBSDID("$FreeBSD$");
#define ATTEMPT(xdr_op) if (!(xdr_op)) return (FALSE)
bool_t
-xdr_authdes_cred(xdrs, cred)
- XDR *xdrs;
- struct authdes_cred *cred;
+xdr_authdes_cred(XDR *xdrs, struct authdes_cred *cred)
{
enum authdes_namekind *padc_namekind = &cred->adc_namekind;
/*
@@ -80,9 +78,7 @@ xdr_authdes_cred(xdrs, cred)
bool_t
-xdr_authdes_verf(xdrs, verf)
- XDR *xdrs;
- struct authdes_verf *verf;
+xdr_authdes_verf(XDR *xdrs, struct authdes_verf *verf)
{
/*
* Unrolled xdr
diff --git a/freebsd/lib/libc/rpc/authunix_prot.c b/freebsd/lib/libc/rpc/authunix_prot.c
index e5413b2d..9eb5e8d3 100644
--- a/freebsd/lib/libc/rpc/authunix_prot.c
+++ b/freebsd/lib/libc/rpc/authunix_prot.c
@@ -57,9 +57,7 @@ __FBSDID("$FreeBSD$");
* XDR for unix authentication parameters.
*/
bool_t
-xdr_authunix_parms(xdrs, p)
- XDR *xdrs;
- struct authunix_parms *p;
+xdr_authunix_parms(XDR *xdrs, struct authunix_parms *p)
{
u_int **paup_gids;
diff --git a/freebsd/lib/libc/rpc/bindresvport.c b/freebsd/lib/libc/rpc/bindresvport.c
index baf5cbd4..77e03568 100644
--- a/freebsd/lib/libc/rpc/bindresvport.c
+++ b/freebsd/lib/libc/rpc/bindresvport.c
@@ -63,9 +63,7 @@ __FBSDID("$FreeBSD$");
* Bind a socket to a privileged IP port
*/
int
-bindresvport(sd, sin)
- int sd;
- struct sockaddr_in *sin;
+bindresvport(int sd, struct sockaddr_in *sin)
{
return bindresvport_sa(sd, (struct sockaddr *)sin);
}
@@ -74,9 +72,7 @@ bindresvport(sd, sin)
* Bind a socket to a privileged IP port
*/
int
-bindresvport_sa(sd, sa)
- int sd;
- struct sockaddr *sa;
+bindresvport_sa(int sd, struct sockaddr *sa)
{
int old, error, af;
struct sockaddr_storage myaddr;
diff --git a/freebsd/lib/libc/rpc/clnt_bcast.c b/freebsd/lib/libc/rpc/clnt_bcast.c
index 3a12c1e9..d17e9166 100644
--- a/freebsd/lib/libc/rpc/clnt_bcast.c
+++ b/freebsd/lib/libc/rpc/clnt_bcast.c
@@ -227,21 +227,26 @@ __rpc_broadenable(int af, int s, struct broadif *bip)
return 0;
}
-
+/*
+ * rpc_broadcast_exp()
+ *
+ * prog - program number
+ * vers - version number
+ * proc - procedure number
+ * xargs - xdr routine for args
+ * argsp - pointer to args
+ * xresults - xdr routine for results
+ * resultsp - pointer to results
+ * eachresult - call with each result obtained
+ * inittime - how long to wait initially
+ * waittime - maximum time to wait
+ * nettype - transport type
+ */
enum clnt_stat
-rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp,
- eachresult, inittime, waittime, nettype)
- rpcprog_t prog; /* program number */
- rpcvers_t vers; /* version number */
- rpcproc_t proc; /* procedure number */
- xdrproc_t xargs; /* xdr routine for args */
- caddr_t argsp; /* pointer to args */
- xdrproc_t xresults; /* xdr routine for results */
- caddr_t resultsp; /* pointer to results */
- resultproc_t eachresult; /* call with each result obtained */
- int inittime; /* how long to wait initially */
- int waittime; /* maximum time to wait */
- const char *nettype; /* transport type */
+rpc_broadcast_exp(rpcprog_t prog, rpcvers_t vers, rpcproc_t proc,
+ xdrproc_t xargs, caddr_t argsp, xdrproc_t xresults, caddr_t resultsp,
+ resultproc_t eachresult, int inittime, int waittime,
+ const char *nettype)
{
enum clnt_stat stat = RPC_SUCCESS; /* Return status */
XDR xdr_stream; /* XDR stream */
@@ -253,7 +258,7 @@ rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp,
int inlen;
u_int maxbufsize = 0;
AUTH *sys_auth = authunix_create_default();
- int i;
+ u_int i;
void *handle;
char uaddress[1024]; /* A self imposed limit */
char *uaddrp = uaddress;
@@ -343,7 +348,8 @@ rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp,
#ifdef PORTMAP
if (si.si_af == AF_INET && si.si_proto == IPPROTO_UDP) {
udpbufsz = fdlist[fdlistno].dsize;
- if ((outbuf_pmap = malloc(udpbufsz)) == NULL) {
+ outbuf_pmap = reallocf(outbuf_pmap, udpbufsz);
+ if (outbuf_pmap == NULL) {
_close(fd);
stat = RPC_SYSTEMERROR;
goto done_broad;
@@ -466,7 +472,7 @@ rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp,
"broadcast packet");
stat = RPC_CANTSEND;
continue;
- };
+ }
#ifdef RPC_DEBUG
if (!__rpc_lowvers)
fprintf(stderr, "Broadcast packet sent "
@@ -633,13 +639,10 @@ rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp,
} /* The giant for loop */
done_broad:
- if (inbuf)
- (void) free(inbuf);
- if (outbuf)
- (void) free(outbuf);
+ free(inbuf);
+ free(outbuf);
#ifdef PORTMAP
- if (outbuf_pmap)
- (void) free(outbuf_pmap);
+ free(outbuf_pmap);
#endif /* PORTMAP */
for (i = 0; i < fdlistno; i++) {
(void)_close(fdlist[i].fd);
@@ -651,19 +654,23 @@ done_broad:
return (stat);
}
-
+/*
+ * rpc_broadcast()
+ *
+ * prog - program number
+ * vers - version number
+ * proc - procedure number
+ * xargs - xdr routine for args
+ * argsp - pointer to args
+ * xresults - xdr routine for results
+ * resultsp - pointer to results
+ * eachresult - call with each result obtained
+ * nettype - transport type
+ */
enum clnt_stat
-rpc_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp,
- eachresult, nettype)
- rpcprog_t prog; /* program number */
- rpcvers_t vers; /* version number */
- rpcproc_t proc; /* procedure number */
- xdrproc_t xargs; /* xdr routine for args */
- caddr_t argsp; /* pointer to args */
- xdrproc_t xresults; /* xdr routine for results */
- caddr_t resultsp; /* pointer to results */
- resultproc_t eachresult; /* call with each result obtained */
- const char *nettype; /* transport type */
+rpc_broadcast(rpcprog_t prog, rpcvers_t vers, rpcproc_t proc, xdrproc_t xargs,
+ caddr_t argsp, xdrproc_t xresults, caddr_t resultsp,
+ resultproc_t eachresult, const char *nettype)
{
enum clnt_stat dummy;
diff --git a/freebsd/lib/libc/rpc/clnt_dg.c b/freebsd/lib/libc/rpc/clnt_dg.c
index c3694337..0c6db42d 100644
--- a/freebsd/lib/libc/rpc/clnt_dg.c
+++ b/freebsd/lib/libc/rpc/clnt_dg.c
@@ -155,15 +155,17 @@ struct cu_data {
* If they are 0, use the transport default.
*
* If svcaddr is NULL, returns NULL.
+ *
+ * fd - open file descriptor
+ * svcaddr - servers address
+ * program - program number
+ * version - version number
+ * sendsz - buffer recv size
+ * recvsz - buffer send size
*/
CLIENT *
-clnt_dg_create(fd, svcaddr, program, version, sendsz, recvsz)
- int fd; /* open file descriptor */
- const struct netbuf *svcaddr; /* servers address */
- rpcprog_t program; /* program number */
- rpcvers_t version; /* version number */
- u_int sendsz; /* buffer recv size */
- u_int recvsz; /* buffer send size */
+clnt_dg_create(int fd, const struct netbuf *svcaddr, rpcprog_t program,
+ rpcvers_t version, u_int sendsz, u_int recvsz)
{
CLIENT *cl = NULL; /* client handle */
struct cu_data *cu = NULL; /* private data */
@@ -303,15 +305,18 @@ err2:
return (NULL);
}
+/*
+ * cl - client handle
+ * proc - procedure number
+ * xargs - xdr routine for args
+ * argsp - pointer to args
+ * xresults - xdr routine for results
+ * resultsp - pointer to results
+ * utimeout - seconds to wait before giving up
+ */
static enum clnt_stat
-clnt_dg_call(cl, proc, xargs, argsp, xresults, resultsp, utimeout)
- CLIENT *cl; /* client handle */
- rpcproc_t proc; /* procedure number */
- xdrproc_t xargs; /* xdr routine for args */
- void *argsp; /* pointer to args */
- xdrproc_t xresults; /* xdr routine for results */
- void *resultsp; /* pointer to results */
- struct timeval utimeout; /* seconds to wait before giving up */
+clnt_dg_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xargs, void *argsp,
+ xdrproc_t xresults, void *resultsp, struct timeval utimeout)
{
struct cu_data *cu = (struct cu_data *)cl->cl_private;
XDR *xdrs;
@@ -329,7 +334,7 @@ clnt_dg_call(cl, proc, xargs, argsp, xresults, resultsp, utimeout)
struct sockaddr *sa;
sigset_t mask;
sigset_t newmask;
- socklen_t inlen, salen;
+ socklen_t salen;
ssize_t recvlen = 0;
int kin_len, n, rpc_lock_value;
u_int32_t xid;
@@ -522,7 +527,6 @@ get_reply:
goto call_again_same_xid;
}
}
- inlen = (socklen_t)recvlen;
/*
* now decode and validate the response
@@ -580,7 +584,7 @@ get_reply:
}
} /* end successful completion */
/*
- * If unsuccesful AND error is an authentication error
+ * If unsuccessful AND error is an authentication error
* then refresh credentials and try again, else break
*/
else if (cu->cu_error.re_status == RPC_AUTHERROR)
@@ -605,9 +609,7 @@ out:
}
static void
-clnt_dg_geterr(cl, errp)
- CLIENT *cl;
- struct rpc_err *errp;
+clnt_dg_geterr(CLIENT *cl, struct rpc_err *errp)
{
struct cu_data *cu = (struct cu_data *)cl->cl_private;
@@ -615,10 +617,7 @@ clnt_dg_geterr(cl, errp)
}
static bool_t
-clnt_dg_freeres(cl, xdr_res, res_ptr)
- CLIENT *cl;
- xdrproc_t xdr_res;
- void *res_ptr;
+clnt_dg_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr)
{
struct cu_data *cu = (struct cu_data *)cl->cl_private;
XDR *xdrs = &(cu->cu_outxdrs);
@@ -641,16 +640,12 @@ clnt_dg_freeres(cl, xdr_res, res_ptr)
/*ARGSUSED*/
static void
-clnt_dg_abort(h)
- CLIENT *h;
+clnt_dg_abort(CLIENT *h)
{
}
static bool_t
-clnt_dg_control(cl, request, info)
- CLIENT *cl;
- u_int request;
- void *info;
+clnt_dg_control(CLIENT *cl, u_int request, void *info)
{
struct cu_data *cu = (struct cu_data *)cl->cl_private;
struct netbuf *addr;
@@ -749,7 +744,7 @@ clnt_dg_control(cl, request, info)
/*
* This RELIES on the information that, in the call body,
* the version number field is the fifth field from the
- * begining of the RPC header. MUST be changed if the
+ * beginning of the RPC header. MUST be changed if the
* call_struct is changed
*/
*(u_int32_t *)info =
@@ -766,7 +761,7 @@ clnt_dg_control(cl, request, info)
/*
* This RELIES on the information that, in the call body,
* the program number field is the fourth field from the
- * begining of the RPC header. MUST be changed if the
+ * beginning of the RPC header. MUST be changed if the
* call_struct is changed
*/
*(u_int32_t *)info =
@@ -793,8 +788,7 @@ clnt_dg_control(cl, request, info)
}
static void
-clnt_dg_destroy(cl)
- CLIENT *cl;
+clnt_dg_destroy(CLIENT *cl)
{
struct cu_data *cu = (struct cu_data *)cl->cl_private;
int cu_fd = cu->cu_fd;
@@ -823,7 +817,7 @@ clnt_dg_destroy(cl)
}
static struct clnt_ops *
-clnt_dg_ops()
+clnt_dg_ops(void)
{
static struct clnt_ops ops;
sigset_t mask;
@@ -851,8 +845,7 @@ clnt_dg_ops()
* Make sure that the time is not garbage. -1 value is allowed.
*/
static bool_t
-time_not_ok(t)
- struct timeval *t;
+time_not_ok(struct timeval *t)
{
return (t->tv_sec < -1 || t->tv_sec > 100000000 ||
t->tv_usec < -1 || t->tv_usec > 1000000);
diff --git a/freebsd/lib/libc/rpc/clnt_generic.c b/freebsd/lib/libc/rpc/clnt_generic.c
index cca7f3e3..1f9960d2 100644
--- a/freebsd/lib/libc/rpc/clnt_generic.c
+++ b/freebsd/lib/libc/rpc/clnt_generic.c
@@ -404,7 +404,7 @@ clnt_tli_create(int fd, const struct netconfig *nconf,
if (madefd) {
(void) CLNT_CONTROL(cl, CLSET_FD_CLOSE, NULL);
/* (void) CLNT_CONTROL(cl, CLSET_POP_TIMOD, NULL); */
- };
+ }
return (cl);
diff --git a/freebsd/lib/libc/rpc/clnt_perror.c b/freebsd/lib/libc/rpc/clnt_perror.c
index 56155728..1dda69bc 100644
--- a/freebsd/lib/libc/rpc/clnt_perror.c
+++ b/freebsd/lib/libc/rpc/clnt_perror.c
@@ -63,11 +63,11 @@ static char *auth_errmsg(enum auth_stat);
#define CLNT_PERROR_BUFLEN 256
static char *
-_buf()
+_buf(void)
{
- if (buf == 0)
- buf = (char *)malloc(CLNT_PERROR_BUFLEN);
+ if (buf == NULL)
+ buf = malloc(CLNT_PERROR_BUFLEN);
return (buf);
}
@@ -75,9 +75,7 @@ _buf()
* Print reply error info
*/
char *
-clnt_sperror(rpch, s)
- CLIENT *rpch;
- const char *s;
+clnt_sperror(CLIENT *rpch, const char *s)
{
struct rpc_err e;
char *err;
@@ -89,7 +87,7 @@ clnt_sperror(rpch, s)
assert(s != NULL);
str = _buf(); /* side effect: sets CLNT_PERROR_BUFLEN */
- if (str == 0)
+ if (str == NULL)
return (0);
len = CLNT_PERROR_BUFLEN;
strstart = str;
@@ -182,9 +180,7 @@ clnt_sperror(rpch, s)
}
void
-clnt_perror(rpch, s)
- CLIENT *rpch;
- const char *s;
+clnt_perror(CLIENT *rpch, const char *s)
{
assert(rpch != NULL);
@@ -219,8 +215,7 @@ static const char *const rpc_errlist[] = {
* This interface for use by clntrpc
*/
char *
-clnt_sperrno(stat)
- enum clnt_stat stat;
+clnt_sperrno(enum clnt_stat stat)
{
unsigned int errnum = stat;
@@ -232,16 +227,14 @@ clnt_sperrno(stat)
}
void
-clnt_perrno(num)
- enum clnt_stat num;
+clnt_perrno(enum clnt_stat num)
{
(void) fprintf(stderr, "%s\n", clnt_sperrno(num));
}
char *
-clnt_spcreateerror(s)
- const char *s;
+clnt_spcreateerror(const char *s)
{
char *str;
size_t len, i;
@@ -249,7 +242,7 @@ clnt_spcreateerror(s)
assert(s != NULL);
str = _buf(); /* side effect: sets CLNT_PERROR_BUFLEN */
- if (str == 0)
+ if (str == NULL)
return(0);
len = CLNT_PERROR_BUFLEN;
i = snprintf(str, len, "%s: ", s);
@@ -293,8 +286,7 @@ clnt_spcreateerror(s)
}
void
-clnt_pcreateerror(s)
- const char *s;
+clnt_pcreateerror(const char *s)
{
assert(s != NULL);
@@ -321,8 +313,7 @@ static const char *const auth_errlist[] = {
};
static char *
-auth_errmsg(stat)
- enum auth_stat stat;
+auth_errmsg(enum auth_stat stat)
{
unsigned int errnum = stat;
diff --git a/freebsd/lib/libc/rpc/clnt_raw.c b/freebsd/lib/libc/rpc/clnt_raw.c
index ec130c14..f2f18b33 100644
--- a/freebsd/lib/libc/rpc/clnt_raw.c
+++ b/freebsd/lib/libc/rpc/clnt_raw.c
@@ -89,9 +89,7 @@ static struct clnt_ops *clnt_raw_ops(void);
* Create a client handle for memory based rpc.
*/
CLIENT *
-clnt_raw_create(prog, vers)
- rpcprog_t prog;
- rpcvers_t vers;
+clnt_raw_create(rpcprog_t prog, rpcvers_t vers)
{
struct clntraw_private *clp;
struct rpc_msg call_msg;
@@ -144,14 +142,8 @@ clnt_raw_create(prog, vers)
/* ARGSUSED */
static enum clnt_stat
-clnt_raw_call(h, proc, xargs, argsp, xresults, resultsp, timeout)
- CLIENT *h;
- rpcproc_t proc;
- xdrproc_t xargs;
- void *argsp;
- xdrproc_t xresults;
- void *resultsp;
- struct timeval timeout;
+clnt_raw_call(CLIENT *h, rpcproc_t proc, xdrproc_t xargs, void *argsp,
+ xdrproc_t xresults, void *resultsp, struct timeval timeout)
{
struct clntraw_private *clp = clntraw_private;
XDR *xdrs = &clp->xdr_stream;
@@ -242,19 +234,14 @@ call_again:
/*ARGSUSED*/
static void
-clnt_raw_geterr(cl, err)
- CLIENT *cl;
- struct rpc_err *err;
+clnt_raw_geterr(CLIENT *cl, struct rpc_err *err)
{
}
/* ARGSUSED */
static bool_t
-clnt_raw_freeres(cl, xdr_res, res_ptr)
- CLIENT *cl;
- xdrproc_t xdr_res;
- void *res_ptr;
+clnt_raw_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr)
{
struct clntraw_private *clp = clntraw_private;
XDR *xdrs = &clp->xdr_stream;
@@ -273,30 +260,25 @@ clnt_raw_freeres(cl, xdr_res, res_ptr)
/*ARGSUSED*/
static void
-clnt_raw_abort(cl)
- CLIENT *cl;
+clnt_raw_abort(CLIENT *cl)
{
}
/*ARGSUSED*/
static bool_t
-clnt_raw_control(cl, ui, str)
- CLIENT *cl;
- u_int ui;
- void *str;
+clnt_raw_control(CLIENT *cl, u_int ui, void *str)
{
return (FALSE);
}
/*ARGSUSED*/
static void
-clnt_raw_destroy(cl)
- CLIENT *cl;
+clnt_raw_destroy(CLIENT *cl)
{
}
static struct clnt_ops *
-clnt_raw_ops()
+clnt_raw_ops(void)
{
static struct clnt_ops ops;
diff --git a/freebsd/lib/libc/rpc/clnt_simple.c b/freebsd/lib/libc/rpc/clnt_simple.c
index 56d76b8a..491172cf 100644
--- a/freebsd/lib/libc/rpc/clnt_simple.c
+++ b/freebsd/lib/libc/rpc/clnt_simple.c
@@ -109,17 +109,19 @@ rpc_call_key_init(void)
* the future calls to same prog, vers, host and nettype combination.
*
* The total time available is 25 seconds.
+ *
+ * host - host name
+ * prognum - program number
+ * versnum - version number
+ * procnum - procedure number
+ * inproc, outproc - in/out XDR procedures
+ * in, out - recv/send data
+ * nettype - nettype
*/
enum clnt_stat
-rpc_call(host, prognum, versnum, procnum, inproc, in, outproc, out, nettype)
- const char *host; /* host name */
- rpcprog_t prognum; /* program number */
- rpcvers_t versnum; /* version number */
- rpcproc_t procnum; /* procedure number */
- xdrproc_t inproc, outproc; /* in/out XDR procedures */
- const char *in;
- char *out; /* recv/send data */
- const char *nettype; /* nettype */
+rpc_call(const char *host, const rpcprog_t prognum, const rpcvers_t versnum,
+ const rpcproc_t procnum, const xdrproc_t inproc, const char *in,
+ const xdrproc_t outproc, char *out, const char *nettype)
{
struct rpc_call_private *rcp = (struct rpc_call_private *) 0;
enum clnt_stat clnt_stat;
diff --git a/freebsd/lib/libc/rpc/clnt_vc.c b/freebsd/lib/libc/rpc/clnt_vc.c
index 6c34cccc..8dc3de48 100644
--- a/freebsd/lib/libc/rpc/clnt_vc.c
+++ b/freebsd/lib/libc/rpc/clnt_vc.c
@@ -143,7 +143,6 @@ static cond_t *vc_cv;
static const char clnt_vc_errstr[] = "%s : %s";
static const char clnt_vc_str[] = "clnt_vc_create";
-static const char clnt_read_vc_str[] = "read_vc";
static const char __no_mem_str[] = "out of memory";
/*
@@ -156,15 +155,17 @@ static const char __no_mem_str[] = "out of memory";
* set this something more useful.
*
* fd should be an open socket
+ *
+ * fd - open file descriptor
+ * raddr - servers address
+ * prog - program number
+ * vers - version number
+ * sendsz - buffer send size
+ * recvsz - buffer recv size
*/
CLIENT *
-clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
- int fd; /* open file descriptor */
- const struct netbuf *raddr; /* servers address */
- const rpcprog_t prog; /* program number */
- const rpcvers_t vers; /* version number */
- u_int sendsz; /* buffer recv size */
- u_int recvsz; /* buffer send size */
+clnt_vc_create(int fd, const struct netbuf *raddr, const rpcprog_t prog,
+ const rpcvers_t vers, u_int sendsz, u_int recvsz)
{
CLIENT *cl; /* client handle */
struct ct_data *ct = NULL; /* client handle */
@@ -261,7 +262,7 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
if (ct->ct_addr.buf == NULL)
goto err;
memcpy(ct->ct_addr.buf, raddr->buf, raddr->len);
- ct->ct_addr.len = raddr->maxlen;
+ ct->ct_addr.len = raddr->len;
ct->ct_addr.maxlen = raddr->maxlen;
/*
@@ -314,14 +315,8 @@ err:
}
static enum clnt_stat
-clnt_vc_call(cl, proc, xdr_args, args_ptr, xdr_results, results_ptr, timeout)
- CLIENT *cl;
- rpcproc_t proc;
- xdrproc_t xdr_args;
- void *args_ptr;
- xdrproc_t xdr_results;
- void *results_ptr;
- struct timeval timeout;
+clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, void *args_ptr,
+ xdrproc_t xdr_results, void *results_ptr, struct timeval timeout)
{
struct ct_data *ct = (struct ct_data *) cl->cl_private;
XDR *xdrs = &(ct->ct_xdrs);
@@ -464,9 +459,7 @@ call_again:
}
static void
-clnt_vc_geterr(cl, errp)
- CLIENT *cl;
- struct rpc_err *errp;
+clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
{
struct ct_data *ct;
@@ -478,10 +471,7 @@ clnt_vc_geterr(cl, errp)
}
static bool_t
-clnt_vc_freeres(cl, xdr_res, res_ptr)
- CLIENT *cl;
- xdrproc_t xdr_res;
- void *res_ptr;
+clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr)
{
struct ct_data *ct;
XDR *xdrs;
@@ -510,16 +500,26 @@ clnt_vc_freeres(cl, xdr_res, res_ptr)
/*ARGSUSED*/
static void
-clnt_vc_abort(cl)
- CLIENT *cl;
+clnt_vc_abort(CLIENT *cl)
+{
+}
+
+static __inline void
+htonlp(void *dst, const void *src, uint32_t incr)
+{
+ /* We are aligned, so we think */
+ *(uint32_t *)dst = htonl(*(const uint32_t *)src + incr);
+}
+
+static __inline void
+ntohlp(void *dst, const void *src)
{
+ /* We are aligned, so we think */
+ *(uint32_t *)dst = htonl(*(const uint32_t *)src);
}
static bool_t
-clnt_vc_control(cl, request, info)
- CLIENT *cl;
- u_int request;
- void *info;
+clnt_vc_control(CLIENT *cl, u_int request, void *info)
{
struct ct_data *ct;
void *infop = info;
@@ -592,49 +592,39 @@ clnt_vc_control(cl, request, info)
* first element in the call structure
* This will get the xid of the PREVIOUS call
*/
- *(u_int32_t *)info =
- ntohl(*(u_int32_t *)(void *)&ct->ct_u.ct_mcalli);
+ ntohlp(info, &ct->ct_u.ct_mcalli);
break;
case CLSET_XID:
/* This will set the xid of the NEXT call */
- *(u_int32_t *)(void *)&ct->ct_u.ct_mcalli =
- htonl(*((u_int32_t *)info) + 1);
/* increment by 1 as clnt_vc_call() decrements once */
+ htonlp(&ct->ct_u.ct_mcalli, info, 1);
break;
case CLGET_VERS:
/*
* This RELIES on the information that, in the call body,
* the version number field is the fifth field from the
- * begining of the RPC header. MUST be changed if the
+ * beginning of the RPC header. MUST be changed if the
* call_struct is changed
*/
- *(u_int32_t *)info =
- ntohl(*(u_int32_t *)(void *)(ct->ct_u.ct_mcallc +
- 4 * BYTES_PER_XDR_UNIT));
+ ntohlp(info, ct->ct_u.ct_mcallc + 4 * BYTES_PER_XDR_UNIT);
break;
case CLSET_VERS:
- *(u_int32_t *)(void *)(ct->ct_u.ct_mcallc +
- 4 * BYTES_PER_XDR_UNIT) =
- htonl(*(u_int32_t *)info);
+ htonlp(ct->ct_u.ct_mcallc + 4 * BYTES_PER_XDR_UNIT, info, 0);
break;
case CLGET_PROG:
/*
* This RELIES on the information that, in the call body,
* the program number field is the fourth field from the
- * begining of the RPC header. MUST be changed if the
+ * beginning of the RPC header. MUST be changed if the
* call_struct is changed
*/
- *(u_int32_t *)info =
- ntohl(*(u_int32_t *)(void *)(ct->ct_u.ct_mcallc +
- 3 * BYTES_PER_XDR_UNIT));
+ ntohlp(info, ct->ct_u.ct_mcallc + 3 * BYTES_PER_XDR_UNIT);
break;
case CLSET_PROG:
- *(u_int32_t *)(void *)(ct->ct_u.ct_mcallc +
- 3 * BYTES_PER_XDR_UNIT) =
- htonl(*(u_int32_t *)info);
+ htonlp(ct->ct_u.ct_mcallc + 3 * BYTES_PER_XDR_UNIT, info, 0);
break;
default:
@@ -647,8 +637,7 @@ clnt_vc_control(cl, request, info)
static void
-clnt_vc_destroy(cl)
- CLIENT *cl;
+clnt_vc_destroy(CLIENT *cl)
{
struct ct_data *ct = (struct ct_data *) cl->cl_private;
int ct_fd = ct->ct_fd;
@@ -668,8 +657,7 @@ clnt_vc_destroy(cl)
(void)_close(ct->ct_fd);
}
XDR_DESTROY(&(ct->ct_xdrs));
- if (ct->ct_addr.buf)
- free(ct->ct_addr.buf);
+ free(ct->ct_addr.buf);
mem_free(ct, sizeof(struct ct_data));
if (cl->cl_netid && cl->cl_netid[0])
mem_free(cl->cl_netid, strlen(cl->cl_netid) +1);
@@ -687,10 +675,7 @@ clnt_vc_destroy(cl)
* around for the rpc level.
*/
static int
-read_vc(ctp, buf, len)
- void *ctp;
- void *buf;
- int len;
+read_vc(void *ctp, void *buf, int len)
{
struct sockaddr sa;
socklen_t sal;
@@ -744,10 +729,7 @@ read_vc(ctp, buf, len)
}
static int
-write_vc(ctp, buf, len)
- void *ctp;
- void *buf;
- int len;
+write_vc(void *ctp, void *buf, int len)
{
struct sockaddr sa;
socklen_t sal;
@@ -778,7 +760,7 @@ write_vc(ctp, buf, len)
}
static struct clnt_ops *
-clnt_vc_ops()
+clnt_vc_ops(void)
{
static struct clnt_ops ops;
sigset_t mask, newmask;
@@ -806,18 +788,14 @@ clnt_vc_ops()
* Note this is different from time_not_ok in clnt_dg.c
*/
static bool_t
-time_not_ok(t)
- struct timeval *t;
+time_not_ok(struct timeval *t)
{
return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
t->tv_usec <= -1 || t->tv_usec > 1000000);
}
static int
-__msgread(sock, buf, cnt)
- int sock;
- void *buf;
- size_t cnt;
+__msgread(int sock, void *buf, size_t cnt)
{
struct iovec iov[1];
struct msghdr msg;
@@ -842,10 +820,7 @@ __msgread(sock, buf, cnt)
}
static int
-__msgwrite(sock, buf, cnt)
- int sock;
- void *buf;
- size_t cnt;
+__msgwrite(int sock, void *buf, size_t cnt)
{
struct iovec iov[1];
struct msghdr msg;
diff --git a/freebsd/lib/libc/rpc/crypt_client.c b/freebsd/lib/libc/rpc/crypt_client.c
index 9ce94157..15e915ad 100644
--- a/freebsd/lib/libc/rpc/crypt_client.c
+++ b/freebsd/lib/libc/rpc/crypt_client.c
@@ -45,10 +45,7 @@ __FBSDID("$FreeBSD$");
#include "un-namespace.h"
int
-_des_crypt_call(buf, len, dparms)
- char *buf;
- int len;
- struct desparams *dparms;
+_des_crypt_call(char *buf, int len, struct desparams *dparms)
{
CLIENT *clnt;
desresp *result_1;
@@ -66,6 +63,7 @@ _des_crypt_call(buf, len, dparms)
}
if (nconf == NULL) {
warnx("getnetconfig: %s", nc_sperror());
+ endnetconfig(localhandle);
return(DESERR_HWERROR);
}
clnt = clnt_tp_create(NULL, CRYPT_PROG, CRYPT_VERS, nconf);
diff --git a/freebsd/lib/libc/rpc/des_crypt.c b/freebsd/lib/libc/rpc/des_crypt.c
index b5f14f82..31763422 100644
--- a/freebsd/lib/libc/rpc/des_crypt.c
+++ b/freebsd/lib/libc/rpc/des_crypt.c
@@ -43,7 +43,7 @@ static char sccsid[] = "@(#)des_crypt.c 2.2 88/08/10 4.0 RPCSRC; from 1.13 88/02
__FBSDID("$FreeBSD$");
static int common_crypt( char *, char *, unsigned, unsigned, struct desparams * );
-int (*__des_crypt_LOCAL)() = 0;
+int (*__des_crypt_LOCAL)(char *, unsigned, struct desparams *) = 0;
extern int _des_crypt_call(char *, int, struct desparams *);
/*
* Copy 8 bytes
@@ -72,12 +72,7 @@ extern int _des_crypt_call(char *, int, struct desparams *);
* CBC mode encryption
*/
int
-cbc_crypt(key, buf, len, mode, ivec)
- char *key;
- char *buf;
- unsigned len;
- unsigned mode;
- char *ivec;
+cbc_crypt(char *key, char *buf, unsigned len, unsigned mode, char *ivec)
{
int err;
struct desparams dp;
@@ -99,11 +94,7 @@ cbc_crypt(key, buf, len, mode, ivec)
* ECB mode encryption
*/
int
-ecb_crypt(key, buf, len, mode)
- char *key;
- char *buf;
- unsigned len;
- unsigned mode;
+ecb_crypt(char *key, char *buf, unsigned len, unsigned mode)
{
struct desparams dp;
@@ -122,12 +113,8 @@ ecb_crypt(key, buf, len, mode)
* Common code to cbc_crypt() & ecb_crypt()
*/
static int
-common_crypt(key, buf, len, mode, desp)
- char *key;
- char *buf;
- unsigned len;
- unsigned mode;
- struct desparams *desp;
+common_crypt(char *key, char *buf, unsigned len, unsigned mode,
+ struct desparams *desp)
{
int desdev;
diff --git a/freebsd/lib/libc/rpc/des_soft.c b/freebsd/lib/libc/rpc/des_soft.c
index e67f90d8..2b6b8606 100644
--- a/freebsd/lib/libc/rpc/des_soft.c
+++ b/freebsd/lib/libc/rpc/des_soft.c
@@ -60,8 +60,7 @@ static char partab[128] = {
* Add odd parity to low bit of 8 byte key
*/
void
-des_setparity(p)
- char *p;
+des_setparity(char *p)
{
int i;
diff --git a/freebsd/lib/libc/rpc/getnetconfig.c b/freebsd/lib/libc/rpc/getnetconfig.c
index f6279e04..bc3bc7ef 100644
--- a/freebsd/lib/libc/rpc/getnetconfig.c
+++ b/freebsd/lib/libc/rpc/getnetconfig.c
@@ -149,7 +149,7 @@ nc_key_init(void)
#define MAXNETCONFIGLINE 1000
static int *
-__nc_error()
+__nc_error(void)
{
static int nc_error = 0;
int *nc_addr;
@@ -166,8 +166,7 @@ __nc_error()
if ((nc_addr = (int *)thr_getspecific(nc_key)) == NULL) {
nc_addr = (int *)malloc(sizeof (int));
if (thr_setspecific(nc_key, (void *) nc_addr) != 0) {
- if (nc_addr)
- free(nc_addr);
+ free(nc_addr);
return (&nc_error);
}
*nc_addr = 0;
@@ -196,7 +195,7 @@ __nc_error()
* the netconfig database is not present).
*/
void *
-setnetconfig()
+setnetconfig(void)
{
struct netconfig_vars *nc_vars;
@@ -242,8 +241,7 @@ setnetconfig()
*/
struct netconfig *
-getnetconfig(handlep)
-void *handlep;
+getnetconfig(void *handlep)
{
struct netconfig_vars *ncp = (struct netconfig_vars *)handlep;
char *stringp; /* tmp string pointer */
@@ -380,8 +378,7 @@ void *handlep;
* previously).
*/
int
-endnetconfig(handlep)
-void *handlep;
+endnetconfig(void *handlep)
{
struct netconfig_vars *nc_handlep = (struct netconfig_vars *)handlep;
@@ -410,7 +407,7 @@ void *handlep;
}
/*
- * Noone needs these entries anymore, then frees them.
+ * No one needs these entries anymore, then frees them.
* Make sure all info in netconfig_info structure has been reinitialized.
*/
q = ni.head;
@@ -421,7 +418,7 @@ void *handlep;
while (q != NULL) {
p = q->next;
- if (q->ncp->nc_lookups != NULL) free(q->ncp->nc_lookups);
+ free(q->ncp->nc_lookups);
free(q->ncp);
free(q->linep);
free(q);
@@ -446,8 +443,7 @@ void *handlep;
*/
struct netconfig *
-getnetconfigent(netid)
- const char *netid;
+getnetconfigent(const char *netid)
{
FILE *file; /* NETCONFIG db's file pointer */
char *linep; /* holds current netconfig line */
@@ -538,13 +534,11 @@ getnetconfigent(netid)
*/
void
-freenetconfigent(netconfigp)
- struct netconfig *netconfigp;
+freenetconfigent(struct netconfig *netconfigp)
{
if (netconfigp != NULL) {
free(netconfigp->nc_netid); /* holds all netconfigp's strings */
- if (netconfigp->nc_lookups != NULL)
- free(netconfigp->nc_lookups);
+ free(netconfigp->nc_lookups);
free(netconfigp);
}
return;
@@ -560,12 +554,13 @@ freenetconfigent(netconfigp)
* Note that we modify stringp (putting NULLs after tokens) and
* we set the ncp's string field pointers to point to these tokens within
* stringp.
+ *
+ * stringp - string to parse
+ * ncp - where to put results
*/
static int
-parse_ncp(stringp, ncp)
-char *stringp; /* string to parse */
-struct netconfig *ncp; /* where to put results */
+parse_ncp(char *stringp, struct netconfig *ncp)
{
char *tokenp; /* for processing tokens */
char *lasts;
@@ -633,8 +628,7 @@ struct netconfig *ncp; /* where to put results */
} else {
char *cp; /* tmp string */
- if (ncp->nc_lookups != NULL) /* from last visit */
- free(ncp->nc_lookups);
+ free(ncp->nc_lookups); /* from last visit */
ncp->nc_lookups = NULL;
ncp->nc_nlookups = 0;
while ((cp = tokenp) != NULL) {
@@ -657,7 +651,7 @@ struct netconfig *ncp; /* where to put results */
* Returns a string describing the reason for failure.
*/
char *
-nc_sperror()
+nc_sperror(void)
{
const char *message;
@@ -688,8 +682,7 @@ nc_sperror()
* Prints a message onto standard error describing the reason for failure.
*/
void
-nc_perror(s)
- const char *s;
+nc_perror(const char *s)
{
fprintf(stderr, "%s: %s\n", s, nc_sperror());
}
@@ -698,11 +691,10 @@ nc_perror(s)
* Duplicates the matched netconfig buffer.
*/
static struct netconfig *
-dup_ncp(ncp)
-struct netconfig *ncp;
+dup_ncp(struct netconfig *ncp)
{
struct netconfig *p;
- char *tmp;
+ char *tmp, *tmp2;
u_int i;
if ((tmp=malloc(MAXNETCONFIGLINE)) == NULL)
@@ -711,6 +703,7 @@ struct netconfig *ncp;
free(tmp);
return(NULL);
}
+ tmp2 = tmp;
/*
* First we dup all the data from matched netconfig buffer. Then we
* adjust some of the member pointer to a pre-allocated buffer where
@@ -732,6 +725,7 @@ struct netconfig *ncp;
if (p->nc_lookups == NULL) {
free(p->nc_netid);
free(p);
+ free(tmp2);
return(NULL);
}
for (i=0; i < p->nc_nlookups; i++) {
diff --git a/freebsd/lib/libc/rpc/getnetpath.c b/freebsd/lib/libc/rpc/getnetpath.c
index 56493557..12def71b 100644
--- a/freebsd/lib/libc/rpc/getnetpath.c
+++ b/freebsd/lib/libc/rpc/getnetpath.c
@@ -84,7 +84,7 @@ char *_get_next_token(char *, int);
*/
void *
-setnetpath()
+setnetpath(void)
{
struct netpath_vars *np_sessionp; /* this session's variables */
@@ -99,9 +99,8 @@ setnetpath()
return (NULL);
}
if ((np_sessionp->nc_handlep = setnetconfig()) == NULL) {
- free(np_sessionp);
syslog (LOG_ERR, "rpc: failed to open " NETCONFIG);
- return (NULL);
+ goto failed;
}
np_sessionp->valid = NP_VALID;
np_sessionp->ncp_list = NULL;
@@ -144,8 +143,7 @@ failed:
*/
struct netconfig *
-getnetpath(handlep)
- void *handlep;
+getnetpath(void *handlep)
{
struct netpath_vars *np_sessionp = (struct netpath_vars *)handlep;
struct netconfig *ncp = NULL; /* temp. holds a netconfig session */
@@ -200,8 +198,7 @@ getnetpath(handlep)
* (e.g. if setnetpath() was not called previously.
*/
int
-endnetpath(handlep)
- void *handlep;
+endnetpath(void *handlep)
{
struct netpath_vars *np_sessionp = (struct netpath_vars *)handlep;
struct netpath_chain *chainp, *lastp;
@@ -234,12 +231,12 @@ endnetpath(handlep)
* Returns pointer to the rest-of-the-string after the current token.
* The token itself starts at arg, and we null terminate it. We return NULL
* if either the arg is empty, or if this is the last token.
+ *
+ * npp - string
+ * token - char to parse string for
*/
-
char *
-_get_next_token(npp, token)
-char *npp; /* string */
-int token; /* char to parse string for */
+_get_next_token(char *npp, int token)
{
char *cp; /* char pointer */
char *np; /* netpath pointer */
@@ -267,7 +264,7 @@ int token; /* char to parse string for */
*cp++ = '\0'; /* null-terminate token */
/* get rid of any backslash escapes */
ep = npp;
- while ((np = strchr(ep, '\\')) != 0) {
+ while ((np = strchr(ep, '\\')) != NULL) {
if (np[1] == '\\')
np++;
strcpy(np, (ep = &np[1])); /* XXX: overlapping string copy */
diff --git a/freebsd/lib/libc/rpc/getpublickey.c b/freebsd/lib/libc/rpc/getpublickey.c
index d7bac7c2..0fef3d38 100644
--- a/freebsd/lib/libc/rpc/getpublickey.c
+++ b/freebsd/lib/libc/rpc/getpublickey.c
@@ -58,15 +58,13 @@ __FBSDID("$FreeBSD$");
/*
* Hack to let ypserv/rpc.nisd use AUTH_DES.
*/
-int (*__getpublickey_LOCAL)() = 0;
+int (*__getpublickey_LOCAL)(const char *, char *) = 0;
/*
* Get somebody's public key
*/
static int
-__getpublickey_real(netname, publickey)
- const char *netname;
- char *publickey;
+__getpublickey_real(const char *netname, char *publickey)
{
char lookup[3 * HEXKEYBYTES];
char *p;
@@ -91,9 +89,7 @@ __getpublickey_real(netname, publickey)
*/
int
-getpublicandprivatekey(key, ret)
- const char *key;
- char *ret;
+getpublicandprivatekey(const char *key, char *ret)
{
char buf[1024]; /* big enough */
char *res;
@@ -168,9 +164,7 @@ getpublicandprivatekey(key, ret)
}
}
-int getpublickey(netname, publickey)
- const char *netname;
- char *publickey;
+int getpublickey(const char *netname, char *publickey)
{
if (__getpublickey_LOCAL != NULL)
return(__getpublickey_LOCAL(netname, publickey));
diff --git a/freebsd/lib/libc/rpc/getrpcent.c b/freebsd/lib/libc/rpc/getrpcent.c
index b4d51e0d..bed8074c 100644
--- a/freebsd/lib/libc/rpc/getrpcent.c
+++ b/freebsd/lib/libc/rpc/getrpcent.c
@@ -41,7 +41,6 @@ __FBSDID("$FreeBSD$");
*/
#include <rtems/bsd/sys/param.h>
-#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <assert.h>
@@ -514,6 +513,7 @@ nis_rpcent(void *retval, void *mdata, va_list ap)
sizeof(char *)) {
*errnop = ERANGE;
rv = NS_RETURN;
+ free(resultbuf);
break;
}
@@ -523,6 +523,7 @@ nis_rpcent(void *retval, void *mdata, va_list ap)
if (aliases_size < 1) {
*errnop = ERANGE;
rv = NS_RETURN;
+ free(resultbuf);
break;
}
@@ -971,7 +972,7 @@ getrpc(int (*fn)(union key, struct rpcent *, char *, size_t, struct rpcent **),
}
struct rpcent *
-getrpcbyname(char *name)
+getrpcbyname(const char *name)
{
union key key;
@@ -991,7 +992,7 @@ getrpcbynumber(int number)
}
struct rpcent *
-getrpcent()
+getrpcent(void)
{
union key key;
@@ -1025,7 +1026,7 @@ setrpcent(int stayopen)
}
void
-endrpcent()
+endrpcent(void)
{
#ifdef NS_CACHING
static const nss_cache_info cache_info = NS_MP_CACHE_INFO_INITIALIZER(
diff --git a/freebsd/lib/libc/rpc/getrpcport.c b/freebsd/lib/libc/rpc/getrpcport.c
index 161486dc..26e08b93 100644
--- a/freebsd/lib/libc/rpc/getrpcport.c
+++ b/freebsd/lib/libc/rpc/getrpcport.c
@@ -55,9 +55,7 @@ __FBSDID("$FreeBSD$");
#include "un-namespace.h"
int
-getrpcport(host, prognum, versnum, proto)
- char *host;
- int prognum, versnum, proto;
+getrpcport(char *host, int prognum, int versnum, int proto)
{
struct sockaddr_in addr;
struct hostent *hp;
diff --git a/freebsd/lib/libc/rpc/key_call.c b/freebsd/lib/libc/rpc/key_call.c
index ba2a3835..c28aa507 100644
--- a/freebsd/lib/libc/rpc/key_call.c
+++ b/freebsd/lib/libc/rpc/key_call.c
@@ -83,15 +83,14 @@ __FBSDID("$FreeBSD$");
* implementations of these functions, and to call those in key_call().
*/
-cryptkeyres *(*__key_encryptsession_pk_LOCAL)() = 0;
-cryptkeyres *(*__key_decryptsession_pk_LOCAL)() = 0;
-des_block *(*__key_gendes_LOCAL)() = 0;
+cryptkeyres *(*__key_encryptsession_pk_LOCAL)(uid_t, void *arg) = 0;
+cryptkeyres *(*__key_decryptsession_pk_LOCAL)(uid_t, void *arg) = 0;
+des_block *(*__key_gendes_LOCAL)(uid_t, void *) = 0;
static int key_call( u_long, xdrproc_t, void *, xdrproc_t, void *);
int
-key_setsecret(secretkey)
- const char *secretkey;
+key_setsecret(const char *secretkey)
{
keystatus status;
@@ -133,10 +132,7 @@ key_secretkey_is_set(void)
}
int
-key_encryptsession_pk(remotename, remotekey, deskey)
- char *remotename;
- netobj *remotekey;
- des_block *deskey;
+key_encryptsession_pk(char *remotename, netobj *remotekey, des_block *deskey)
{
cryptkeyarg2 arg;
cryptkeyres res;
@@ -157,10 +153,7 @@ key_encryptsession_pk(remotename, remotekey, deskey)
}
int
-key_decryptsession_pk(remotename, remotekey, deskey)
- char *remotename;
- netobj *remotekey;
- des_block *deskey;
+key_decryptsession_pk(char *remotename, netobj *remotekey, des_block *deskey)
{
cryptkeyarg2 arg;
cryptkeyres res;
@@ -181,9 +174,7 @@ key_decryptsession_pk(remotename, remotekey, deskey)
}
int
-key_encryptsession(remotename, deskey)
- const char *remotename;
- des_block *deskey;
+key_encryptsession(const char *remotename, des_block *deskey)
{
cryptkeyarg arg;
cryptkeyres res;
@@ -203,9 +194,7 @@ key_encryptsession(remotename, deskey)
}
int
-key_decryptsession(remotename, deskey)
- const char *remotename;
- des_block *deskey;
+key_decryptsession(const char *remotename, des_block *deskey)
{
cryptkeyarg arg;
cryptkeyres res;
@@ -225,8 +214,7 @@ key_decryptsession(remotename, deskey)
}
int
-key_gendes(key)
- des_block *key;
+key_gendes(des_block *key)
{
if (!key_call((u_long)KEY_GEN, (xdrproc_t)xdr_void, NULL,
(xdrproc_t)xdr_des_block, key)) {
@@ -236,8 +224,7 @@ key_gendes(key)
}
int
-key_setnet(arg)
-struct key_netstarg *arg;
+key_setnet(struct key_netstarg *arg)
{
keystatus status;
@@ -256,9 +243,7 @@ struct key_netstarg *arg;
int
-key_get_conv(pkey, deskey)
- char *pkey;
- des_block *deskey;
+key_get_conv(char *pkey, des_block *deskey)
{
cryptkeyres res;
@@ -307,8 +292,7 @@ key_call_init(void)
* Keep the handle cached. This call may be made quite often.
*/
static CLIENT *
-getkeyserv_handle(vers)
-int vers;
+getkeyserv_handle(int vers)
{
void *localhandle;
struct netconfig *nconf;
@@ -431,12 +415,8 @@ int vers;
/* returns 0 on failure, 1 on success */
static int
-key_call(proc, xdr_arg, arg, xdr_rslt, rslt)
- u_long proc;
- xdrproc_t xdr_arg;
- void *arg;
- xdrproc_t xdr_rslt;
- void *rslt;
+key_call(u_long proc, xdrproc_t xdr_arg, void *arg, xdrproc_t xdr_rslt,
+ void *rslt)
{
CLIENT *clnt;
struct timeval wait_time;
diff --git a/freebsd/lib/libc/rpc/mt_misc.c b/freebsd/lib/libc/rpc/mt_misc.c
index b494bef7..75649fd2 100644
--- a/freebsd/lib/libc/rpc/mt_misc.c
+++ b/freebsd/lib/libc/rpc/mt_misc.c
@@ -95,9 +95,9 @@ rce_key_init(void)
}
struct rpc_createerr *
-__rpc_createerr()
+__rpc_createerr(void)
{
- struct rpc_createerr *rce_addr = 0;
+ struct rpc_createerr *rce_addr = NULL;
if (thr_main())
return (&rpc_createerr);
@@ -108,8 +108,7 @@ __rpc_createerr()
rce_addr = (struct rpc_createerr *)
malloc(sizeof (struct rpc_createerr));
if (thr_setspecific(rce_key, (void *) rce_addr) != 0) {
- if (rce_addr)
- free(rce_addr);
+ free(rce_addr);
return (&rpc_createerr);
}
memset(rce_addr, 0, sizeof (struct rpc_createerr));
diff --git a/freebsd/lib/libc/rpc/netname.c b/freebsd/lib/libc/rpc/netname.c
index d992c05d..25439fda 100644
--- a/freebsd/lib/libc/rpc/netname.c
+++ b/freebsd/lib/libc/rpc/netname.c
@@ -81,8 +81,7 @@ static char *OPSYS = "unix";
* Figure out my fully qualified network name
*/
int
-getnetname(name)
- char name[MAXNETNAMELEN+1];
+getnetname(char name[MAXNETNAMELEN+1])
{
uid_t uid;
@@ -99,10 +98,7 @@ getnetname(name)
* Convert unix cred to network-name
*/
int
-user2netname(netname, uid, domain)
- char netname[MAXNETNAMELEN + 1];
- const uid_t uid;
- const char *domain;
+user2netname(char netname[MAXNETNAMELEN + 1], const uid_t uid, const char *domain)
{
char *dfltdom;
@@ -124,10 +120,7 @@ user2netname(netname, uid, domain)
* Convert host to network-name
*/
int
-host2netname(netname, host, domain)
- char netname[MAXNETNAMELEN + 1];
- const char *host;
- const char *domain;
+host2netname(char netname[MAXNETNAMELEN + 1], const char *host, const char *domain)
{
char *dfltdom;
char hostname[MAXHOSTNAMELEN+1];
diff --git a/freebsd/lib/libc/rpc/netnamer.c b/freebsd/lib/libc/rpc/netnamer.c
index dd09c257..be154eb0 100644
--- a/freebsd/lib/libc/rpc/netnamer.c
+++ b/freebsd/lib/libc/rpc/netnamer.c
@@ -70,12 +70,8 @@ static int _getgroups( char *, gid_t * );
* Convert network-name into unix credential
*/
int
-netname2user(netname, uidp, gidp, gidlenp, gidlist)
- char netname[MAXNETNAMELEN + 1];
- uid_t *uidp;
- gid_t *gidp;
- int *gidlenp;
- gid_t *gidlist;
+netname2user(char netname[MAXNETNAMELEN + 1], uid_t *uidp, gid_t *gidp,
+ int *gidlenp, gid_t *gidlist)
{
char *p;
int gidlen;
@@ -151,9 +147,7 @@ netname2user(netname, uidp, gidp, gidlenp, gidlist)
*/
static int
-_getgroups(uname, groups)
- char *uname;
- gid_t groups[NGRPS];
+_getgroups(char *uname, gid_t groups[NGRPS])
{
gid_t ngroups = 0;
struct group *grp;
@@ -192,10 +186,7 @@ toomany:
* Convert network-name to hostname
*/
int
-netname2host(netname, hostname, hostlen)
- char netname[MAXNETNAMELEN + 1];
- char *hostname;
- int hostlen;
+netname2host(char netname[MAXNETNAMELEN + 1], char *hostname, int hostlen)
{
int err;
char valbuf[1024];
@@ -241,8 +232,7 @@ netname2host(netname, hostname, hostlen)
* network information service.
*/
int
-getnetid(key, ret)
- char *key, *ret;
+getnetid(char *key, char *ret)
{
char buf[1024]; /* big enough */
char *res;
@@ -255,6 +245,9 @@ getnetid(key, ret)
char *lookup;
int len;
#endif
+ int rv;
+
+ rv = 0;
fd = fopen(NETIDFILE, "r");
if (fd == NULL) {
@@ -265,13 +258,11 @@ getnetid(key, ret)
return (0);
#endif
}
- for (;;) {
- if (fd == NULL)
- return (0); /* getnetidyp brings us here */
+ while (fd != NULL) {
res = fgets(buf, sizeof(buf), fd);
if (res == NULL) {
- fclose(fd);
- return (0);
+ rv = 0;
+ goto done;
}
if (res[0] == '#')
continue;
@@ -294,9 +285,8 @@ getnetid(key, ret)
lookup[len] = 0;
strcpy(ret, lookup);
free(lookup);
- if (fd != NULL)
- fclose(fd);
- return (2);
+ rv = 2;
+ goto done;
#else /* YP */
#ifdef DEBUG
fprintf(stderr,
@@ -322,10 +312,14 @@ getnetid(key, ret)
}
if (strcmp(mkey, key) == 0) {
strcpy(ret, mval);
- fclose(fd);
- return (1);
-
+ rv = 1;
+ goto done;
}
}
}
+
+done:
+ if (fd != NULL)
+ fclose(fd);
+ return (rv);
}
diff --git a/freebsd/lib/libc/rpc/pmap_getmaps.c b/freebsd/lib/libc/rpc/pmap_getmaps.c
index 7c35ff26..fd6c4a85 100644
--- a/freebsd/lib/libc/rpc/pmap_getmaps.c
+++ b/freebsd/lib/libc/rpc/pmap_getmaps.c
@@ -72,8 +72,7 @@ __FBSDID("$FreeBSD$");
* Calls the pmap service remotely to do get the maps.
*/
struct pmaplist *
-pmap_getmaps(address)
- struct sockaddr_in *address;
+pmap_getmaps(struct sockaddr_in *address)
{
struct pmaplist *head = NULL;
int sock = -1;
diff --git a/freebsd/lib/libc/rpc/pmap_getport.c b/freebsd/lib/libc/rpc/pmap_getport.c
index 7b6c4818..0ddfb68c 100644
--- a/freebsd/lib/libc/rpc/pmap_getport.c
+++ b/freebsd/lib/libc/rpc/pmap_getport.c
@@ -68,11 +68,8 @@ static const struct timeval tottimeout = { 60, 0 };
* Returns 0 if no map exists.
*/
u_short
-pmap_getport(address, program, version, protocol)
- struct sockaddr_in *address;
- u_long program;
- u_long version;
- u_int protocol;
+pmap_getport(struct sockaddr_in *address, u_long program, u_long version,
+ u_int protocol)
{
u_short port = 0;
int sock = -1;
diff --git a/freebsd/lib/libc/rpc/pmap_prot.c b/freebsd/lib/libc/rpc/pmap_prot.c
index 121af17a..03c303eb 100644
--- a/freebsd/lib/libc/rpc/pmap_prot.c
+++ b/freebsd/lib/libc/rpc/pmap_prot.c
@@ -54,9 +54,7 @@ __FBSDID("$FreeBSD$");
bool_t
-xdr_pmap(xdrs, regs)
- XDR *xdrs;
- struct pmap *regs;
+xdr_pmap(XDR *xdrs, struct pmap *regs)
{
assert(xdrs != NULL);
diff --git a/freebsd/lib/libc/rpc/pmap_prot2.c b/freebsd/lib/libc/rpc/pmap_prot2.c
index 9a50dfa7..0b5134a3 100644
--- a/freebsd/lib/libc/rpc/pmap_prot2.c
+++ b/freebsd/lib/libc/rpc/pmap_prot2.c
@@ -92,9 +92,7 @@ __FBSDID("$FreeBSD$");
* this sounds like a job for xdr_reference!
*/
bool_t
-xdr_pmaplist(xdrs, rp)
- XDR *xdrs;
- struct pmaplist **rp;
+xdr_pmaplist(XDR *xdrs, struct pmaplist **rp)
{
/*
* more_elements is pre-computed in case the direction is
@@ -136,9 +134,7 @@ xdr_pmaplist(xdrs, rp)
* functionality to xdr_pmaplist().
*/
bool_t
-xdr_pmaplist_ptr(xdrs, rp)
- XDR *xdrs;
- struct pmaplist *rp;
+xdr_pmaplist_ptr(XDR *xdrs, struct pmaplist *rp)
{
return xdr_pmaplist(xdrs, (struct pmaplist **)(void *)rp);
}
diff --git a/freebsd/lib/libc/rpc/pmap_rmt.c b/freebsd/lib/libc/rpc/pmap_rmt.c
index e2d45608..17f9d5b4 100644
--- a/freebsd/lib/libc/rpc/pmap_rmt.c
+++ b/freebsd/lib/libc/rpc/pmap_rmt.c
@@ -78,14 +78,9 @@ static const struct timeval timeout = { 3, 0 };
* programs to do a lookup and call in one step.
*/
enum clnt_stat
-pmap_rmtcall(addr, prog, vers, proc, xdrargs, argsp, xdrres, resp, tout,
- port_ptr)
- struct sockaddr_in *addr;
- u_long prog, vers, proc;
- xdrproc_t xdrargs, xdrres;
- caddr_t argsp, resp;
- struct timeval tout;
- u_long *port_ptr;
+pmap_rmtcall(struct sockaddr_in *addr, u_long prog, u_long vers, u_long proc,
+ xdrproc_t xdrargs, caddr_t argsp, xdrproc_t xdrres, caddr_t resp,
+ struct timeval tout, u_long *port_ptr)
{
int sock = -1;
CLIENT *client;
@@ -124,9 +119,7 @@ pmap_rmtcall(addr, prog, vers, proc, xdrargs, argsp, xdrres, resp, tout,
* written for XDR_ENCODE direction only
*/
bool_t
-xdr_rmtcall_args(xdrs, cap)
- XDR *xdrs;
- struct rmtcallargs *cap;
+xdr_rmtcall_args(XDR *xdrs, struct rmtcallargs *cap)
{
u_int lenposition, argposition, position;
@@ -158,9 +151,7 @@ xdr_rmtcall_args(xdrs, cap)
* written for XDR_DECODE direction only
*/
bool_t
-xdr_rmtcallres(xdrs, crp)
- XDR *xdrs;
- struct rmtcallres *crp;
+xdr_rmtcallres(XDR *xdrs, struct rmtcallres *crp)
{
caddr_t port_ptr;
diff --git a/freebsd/lib/libc/rpc/rpc_callmsg.c b/freebsd/lib/libc/rpc/rpc_callmsg.c
index 7d236d5e..f635205c 100644
--- a/freebsd/lib/libc/rpc/rpc_callmsg.c
+++ b/freebsd/lib/libc/rpc/rpc_callmsg.c
@@ -56,9 +56,7 @@ __FBSDID("$FreeBSD$");
* XDR a call message
*/
bool_t
-xdr_callmsg(xdrs, cmsg)
- XDR *xdrs;
- struct rpc_msg *cmsg;
+xdr_callmsg(XDR *xdrs, struct rpc_msg *cmsg)
{
enum msg_type *prm_direction;
int32_t *buf;
@@ -197,11 +195,11 @@ xdr_callmsg(xdrs, cmsg)
xdr_u_int32_t(xdrs, &(cmsg->rm_xid)) &&
xdr_enum(xdrs, (enum_t *) prm_direction) &&
(cmsg->rm_direction == CALL) &&
- xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_rpcvers)) &&
+ xdr_rpcvers(xdrs, &(cmsg->rm_call.cb_rpcvers)) &&
(cmsg->rm_call.cb_rpcvers == RPC_MSG_VERSION) &&
- xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_prog)) &&
- xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_vers)) &&
- xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_proc)) &&
+ xdr_rpcprog(xdrs, &(cmsg->rm_call.cb_prog)) &&
+ xdr_rpcvers(xdrs, &(cmsg->rm_call.cb_vers)) &&
+ xdr_rpcproc(xdrs, &(cmsg->rm_call.cb_proc)) &&
xdr_opaque_auth(xdrs, &(cmsg->rm_call.cb_cred)) )
return (xdr_opaque_auth(xdrs, &(cmsg->rm_call.cb_verf)));
return (FALSE);
diff --git a/freebsd/lib/libc/rpc/rpc_com.h b/freebsd/lib/libc/rpc/rpc_com.h
index 770faf91..bfa6a0bc 100644
--- a/freebsd/lib/libc/rpc/rpc_com.h
+++ b/freebsd/lib/libc/rpc/rpc_com.h
@@ -86,8 +86,8 @@ bool_t __xdrrec_setnonblock(XDR *, int);
bool_t __xdrrec_getrec(XDR *, enum xprt_stat *, bool_t);
void __xprt_unregister_unlocked(SVCXPRT *);
-SVCXPRT **__svc_xports;
-int __svc_maxrec;
+extern SVCXPRT **__svc_xports;
+extern int __svc_maxrec;
__END_DECLS
diff --git a/freebsd/lib/libc/rpc/rpc_generic.c b/freebsd/lib/libc/rpc/rpc_generic.c
index d58bbfc0..8471fd7c 100644
--- a/freebsd/lib/libc/rpc/rpc_generic.c
+++ b/freebsd/lib/libc/rpc/rpc_generic.c
@@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$");
#include "namespace.h"
#include "reentrant.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/socket.h>
#include <sys/time.h>
@@ -113,7 +112,7 @@ static int getnettype(const char *);
* expensive call every time.
*/
int
-__rpc_dtbsize()
+__rpc_dtbsize(void)
{
static int tbsize;
struct rlimit rl;
@@ -134,12 +133,12 @@ __rpc_dtbsize()
/*
* Find the appropriate buffer size
+ *
+ * size - Size requested
*/
u_int
/*ARGSUSED*/
-__rpc_get_t_size(af, proto, size)
- int af, proto;
- int size; /* Size requested */
+__rpc_get_t_size(int af, int proto, int size)
{
int maxsize, defsize;
@@ -166,8 +165,7 @@ __rpc_get_t_size(af, proto, size)
* Find the appropriate address buffer size
*/
u_int
-__rpc_get_a_size(af)
- int af;
+__rpc_get_a_size(int af)
{
switch (af) {
case AF_INET:
@@ -186,8 +184,7 @@ __rpc_get_a_size(af)
#if 0
static char *
-strlocase(p)
- char *p;
+strlocase(char *p)
{
char *t = p;
@@ -203,8 +200,7 @@ strlocase(p)
* If nettype is NULL, it defaults to NETPATH.
*/
static int
-getnettype(nettype)
- const char *nettype;
+getnettype(const char *nettype)
{
int i;
@@ -239,8 +235,7 @@ keys_init(void)
* This should be freed by calling freenetconfigent()
*/
struct netconfig *
-__rpc_getconfip(nettype)
- const char *nettype;
+__rpc_getconfip(const char *nettype)
{
char *netid;
char *netid_tcp = (char *) NULL;
@@ -311,8 +306,7 @@ __rpc_getconfip(nettype)
* __rpc_getconf().
*/
void *
-__rpc_setconf(nettype)
- const char *nettype;
+__rpc_setconf(const char *nettype)
{
struct handle *handle;
@@ -355,8 +349,7 @@ failed:
* __rpc_setconf() should have been called previously.
*/
struct netconfig *
-__rpc_getconf(vhandle)
- void *vhandle;
+__rpc_getconf(void *vhandle)
{
struct handle *handle;
struct netconfig *nconf;
@@ -432,8 +425,7 @@ __rpc_getconf(vhandle)
}
void
-__rpc_endconf(vhandle)
- void * vhandle;
+__rpc_endconf(void *vhandle)
{
struct handle *handle;
@@ -454,8 +446,7 @@ __rpc_endconf(vhandle)
* Returns NULL if fails, else a non-NULL pointer.
*/
void *
-rpc_nullproc(clnt)
- CLIENT *clnt;
+rpc_nullproc(CLIENT *clnt)
{
struct timeval TIMEOUT = {25, 0};
@@ -471,8 +462,7 @@ rpc_nullproc(clnt)
* one succeeds in finding the netconf for the given fd.
*/
struct netconfig *
-__rpcgettp(fd)
- int fd;
+__rpcgettp(int fd)
{
const char *netid;
struct __rpc_sockinfo si;
diff --git a/freebsd/lib/libc/rpc/rpc_prot.c b/freebsd/lib/libc/rpc/rpc_prot.c
index 754f7cb9..d943ceaf 100644
--- a/freebsd/lib/libc/rpc/rpc_prot.c
+++ b/freebsd/lib/libc/rpc/rpc_prot.c
@@ -70,9 +70,7 @@ extern struct opaque_auth _null_auth;
* (see auth.h)
*/
bool_t
-xdr_opaque_auth(xdrs, ap)
- XDR *xdrs;
- struct opaque_auth *ap;
+xdr_opaque_auth(XDR *xdrs, struct opaque_auth *ap)
{
assert(xdrs != NULL);
@@ -88,9 +86,7 @@ xdr_opaque_auth(xdrs, ap)
* XDR a DES block
*/
bool_t
-xdr_des_block(xdrs, blkp)
- XDR *xdrs;
- des_block *blkp;
+xdr_des_block(XDR *xdrs, des_block *blkp)
{
assert(xdrs != NULL);
@@ -105,9 +101,7 @@ xdr_des_block(xdrs, blkp)
* XDR the MSG_ACCEPTED part of a reply message union
*/
bool_t
-xdr_accepted_reply(xdrs, ar)
- XDR *xdrs;
- struct accepted_reply *ar;
+xdr_accepted_reply(XDR *xdrs, struct accepted_reply *ar)
{
enum accept_stat *par_stat;
@@ -127,9 +121,9 @@ xdr_accepted_reply(xdrs, ar)
return ((*(ar->ar_results.proc))(xdrs, ar->ar_results.where));
case PROG_MISMATCH:
- if (! xdr_u_int32_t(xdrs, &(ar->ar_vers.low)))
+ if (!xdr_rpcvers(xdrs, &(ar->ar_vers.low)))
return (FALSE);
- return (xdr_u_int32_t(xdrs, &(ar->ar_vers.high)));
+ return (xdr_rpcvers(xdrs, &(ar->ar_vers.high)));
case GARBAGE_ARGS:
case SYSTEM_ERR:
@@ -144,9 +138,7 @@ xdr_accepted_reply(xdrs, ar)
* XDR the MSG_DENIED part of a reply message union
*/
bool_t
-xdr_rejected_reply(xdrs, rr)
- XDR *xdrs;
- struct rejected_reply *rr;
+xdr_rejected_reply(XDR *xdrs, struct rejected_reply *rr)
{
enum reject_stat *prj_stat;
enum auth_stat *prj_why;
@@ -162,9 +154,9 @@ xdr_rejected_reply(xdrs, rr)
switch (rr->rj_stat) {
case RPC_MISMATCH:
- if (! xdr_u_int32_t(xdrs, &(rr->rj_vers.low)))
+ if (! xdr_rpcvers(xdrs, &(rr->rj_vers.low)))
return (FALSE);
- return (xdr_u_int32_t(xdrs, &(rr->rj_vers.high)));
+ return (xdr_rpcvers(xdrs, &(rr->rj_vers.high)));
case AUTH_ERROR:
prj_why = &rr->rj_why;
@@ -184,9 +176,7 @@ static const struct xdr_discrim reply_dscrm[3] = {
* XDR a reply message
*/
bool_t
-xdr_replymsg(xdrs, rmsg)
- XDR *xdrs;
- struct rpc_msg *rmsg;
+xdr_replymsg(XDR *xdrs, struct rpc_msg *rmsg)
{
enum msg_type *prm_direction;
enum reply_stat *prp_stat;
@@ -214,9 +204,7 @@ xdr_replymsg(xdrs, rmsg)
* The rm_xid is not really static, but the user can easily munge on the fly.
*/
bool_t
-xdr_callhdr(xdrs, cmsg)
- XDR *xdrs;
- struct rpc_msg *cmsg;
+xdr_callhdr(XDR *xdrs, struct rpc_msg *cmsg)
{
enum msg_type *prm_direction;
@@ -231,8 +219,8 @@ xdr_callhdr(xdrs, cmsg)
(xdrs->x_op == XDR_ENCODE) &&
xdr_u_int32_t(xdrs, &(cmsg->rm_xid)) &&
xdr_enum(xdrs, (enum_t *) prm_direction) &&
- xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_rpcvers)) &&
- xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_prog)) )
+ xdr_rpcvers(xdrs, &(cmsg->rm_call.cb_rpcvers)) &&
+ xdr_rpcprog(xdrs, &(cmsg->rm_call.cb_prog)) )
return (xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_vers)));
return (FALSE);
}
@@ -240,9 +228,7 @@ xdr_callhdr(xdrs, cmsg)
/* ************************** Client utility routine ************* */
static void
-accepted(acpt_stat, error)
- enum accept_stat acpt_stat;
- struct rpc_err *error;
+accepted(enum accept_stat acpt_stat, struct rpc_err *error)
{
assert(error != NULL);
@@ -281,9 +267,7 @@ accepted(acpt_stat, error)
}
static void
-rejected(rjct_stat, error)
- enum reject_stat rjct_stat;
- struct rpc_err *error;
+rejected(enum reject_stat rjct_stat, struct rpc_err *error)
{
assert(error != NULL);
@@ -308,9 +292,7 @@ rejected(rjct_stat, error)
* given a reply message, fills in the error
*/
void
-_seterr_reply(msg, error)
- struct rpc_msg *msg;
- struct rpc_err *error;
+_seterr_reply(struct rpc_msg *msg, struct rpc_err *error)
{
assert(msg != NULL);
diff --git a/freebsd/lib/libc/rpc/rpc_soc.c b/freebsd/lib/libc/rpc/rpc_soc.c
index 6b4260c4..d5b5be3a 100644
--- a/freebsd/lib/libc/rpc/rpc_soc.c
+++ b/freebsd/lib/libc/rpc/rpc_soc.c
@@ -90,14 +90,8 @@ static bool_t rpc_wrap_bcast(char *, struct netbuf *, struct netconfig *);
* A common clnt create routine
*/
static CLIENT *
-clnt_com_create(raddr, prog, vers, sockp, sendsz, recvsz, tp)
- struct sockaddr_in *raddr;
- rpcprog_t prog;
- rpcvers_t vers;
- int *sockp;
- u_int sendsz;
- u_int recvsz;
- char *tp;
+clnt_com_create(struct sockaddr_in *raddr, rpcprog_t prog, rpcvers_t vers, int *sockp,
+ u_int sendsz, u_int recvsz, char *tp)
{
CLIENT *cl;
int madefd = FALSE;
@@ -166,14 +160,8 @@ err: if (madefd == TRUE)
}
CLIENT *
-clntudp_bufcreate(raddr, prog, vers, wait, sockp, sendsz, recvsz)
- struct sockaddr_in *raddr;
- u_long prog;
- u_long vers;
- struct timeval wait;
- int *sockp;
- u_int sendsz;
- u_int recvsz;
+clntudp_bufcreate(struct sockaddr_in *raddr, u_long prog, u_long vers,
+ struct timeval wait, int *sockp, u_int sendsz, u_int recvsz)
{
CLIENT *cl;
@@ -187,12 +175,8 @@ clntudp_bufcreate(raddr, prog, vers, wait, sockp, sendsz, recvsz)
}
CLIENT *
-clntudp_create(raddr, program, version, wait, sockp)
- struct sockaddr_in *raddr;
- u_long program;
- u_long version;
- struct timeval wait;
- int *sockp;
+clntudp_create(struct sockaddr_in *raddr, u_long program, u_long version,
+ struct timeval wait, int *sockp)
{
return clntudp_bufcreate(raddr, program, version, wait, sockp,
@@ -200,13 +184,8 @@ clntudp_create(raddr, program, version, wait, sockp)
}
CLIENT *
-clnttcp_create(raddr, prog, vers, sockp, sendsz, recvsz)
- struct sockaddr_in *raddr;
- u_long prog;
- u_long vers;
- int *sockp;
- u_int sendsz;
- u_int recvsz;
+clnttcp_create(struct sockaddr_in *raddr, u_long prog, u_long vers, int *sockp,
+ u_int sendsz, u_int recvsz)
{
return clnt_com_create(raddr, (rpcprog_t)prog, (rpcvers_t)vers, sockp,
@@ -214,9 +193,7 @@ clnttcp_create(raddr, prog, vers, sockp, sendsz, recvsz)
}
CLIENT *
-clntraw_create(prog, vers)
- u_long prog;
- u_long vers;
+clntraw_create(u_long prog, u_long vers)
{
return clnt_raw_create((rpcprog_t)prog, (rpcvers_t)vers);
@@ -226,11 +203,7 @@ clntraw_create(prog, vers)
* A common server create routine
*/
static SVCXPRT *
-svc_com_create(fd, sendsize, recvsize, netid)
- int fd;
- u_int sendsize;
- u_int recvsize;
- char *netid;
+svc_com_create(int fd, u_int sendsize, u_int recvsize, char *netid)
{
struct netconfig *nconf;
SVCXPRT *svc;
@@ -270,29 +243,21 @@ svc_com_create(fd, sendsize, recvsize, netid)
}
SVCXPRT *
-svctcp_create(fd, sendsize, recvsize)
- int fd;
- u_int sendsize;
- u_int recvsize;
+svctcp_create(int fd, u_int sendsize, u_int recvsize)
{
return svc_com_create(fd, sendsize, recvsize, "tcp");
}
SVCXPRT *
-svcudp_bufcreate(fd, sendsz, recvsz)
- int fd;
- u_int sendsz, recvsz;
+svcudp_bufcreate(int fd, u_int sendsz, u_int recvsz)
{
return svc_com_create(fd, sendsz, recvsz, "udp");
}
SVCXPRT *
-svcfd_create(fd, sendsize, recvsize)
- int fd;
- u_int sendsize;
- u_int recvsize;
+svcfd_create(int fd, u_int sendsize, u_int recvsize)
{
return svc_fd_create(fd, sendsize, recvsize);
@@ -300,23 +265,21 @@ svcfd_create(fd, sendsize, recvsize)
SVCXPRT *
-svcudp_create(fd)
- int fd;
+svcudp_create(int fd)
{
return svc_com_create(fd, UDPMSGSIZE, UDPMSGSIZE, "udp");
}
SVCXPRT *
-svcraw_create()
+svcraw_create(void)
{
return svc_raw_create();
}
int
-get_myaddress(addr)
- struct sockaddr_in *addr;
+get_myaddress(struct sockaddr_in *addr)
{
memset((void *) addr, 0, sizeof(*addr));
@@ -330,11 +293,8 @@ get_myaddress(addr)
* For connectionless "udp" transport. Obsoleted by rpc_call().
*/
int
-callrpc(host, prognum, versnum, procnum, inproc, in, outproc, out)
- const char *host;
- int prognum, versnum, procnum;
- xdrproc_t inproc, outproc;
- void *in, *out;
+callrpc(const char *host, int prognum, int versnum, int procnum,
+ xdrproc_t inproc, void *in, xdrproc_t outproc, void *out)
{
return (int)rpc_call(host, (rpcprog_t)prognum, (rpcvers_t)versnum,
@@ -345,10 +305,9 @@ callrpc(host, prognum, versnum, procnum, inproc, in, outproc, out)
* For connectionless kind of transport. Obsoleted by rpc_reg()
*/
int
-registerrpc(prognum, versnum, procnum, progname, inproc, outproc)
- int prognum, versnum, procnum;
- char *(*progname)(char [UDPMSGSIZE]);
- xdrproc_t inproc, outproc;
+registerrpc(int prognum, int versnum, int procnum,
+ char *(*progname)(char [UDPMSGSIZE]),
+ xdrproc_t inproc, xdrproc_t outproc)
{
return rpc_reg((rpcprog_t)prognum, (rpcvers_t)versnum,
@@ -376,10 +335,12 @@ clnt_broadcast_key_init(void)
*/
/* ARGSUSED */
static bool_t
-rpc_wrap_bcast(resultp, addr, nconf)
- char *resultp; /* results of the call */
- struct netbuf *addr; /* address of the guy who responded */
- struct netconfig *nconf; /* Netconf of the transport */
+rpc_wrap_bcast(char *resultp, struct netbuf *addr, struct netconfig *nconf)
+/*
+ * char *resultp; // results of the call
+ * struct netbuf *addr; // address of the guy who responded
+ * struct netconfig *nconf; // Netconf of the transport
+ */
{
resultproc_t clnt_broadcast_result;
@@ -397,15 +358,18 @@ rpc_wrap_bcast(resultp, addr, nconf)
* Broadcasts on UDP transport. Obsoleted by rpc_broadcast().
*/
enum clnt_stat
-clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult)
- u_long prog; /* program number */
- u_long vers; /* version number */
- u_long proc; /* procedure number */
- xdrproc_t xargs; /* xdr routine for args */
- void *argsp; /* pointer to args */
- xdrproc_t xresults; /* xdr routine for results */
- void *resultsp; /* pointer to results */
- resultproc_t eachresult; /* call with each result obtained */
+clnt_broadcast(u_long prog, u_long vers, u_long proc, xdrproc_t xargs,
+ void *argsp, xdrproc_t xresults, void *resultsp, resultproc_t eachresult)
+/*
+ * u_long prog; // program number
+ * u_long vers; // version number
+ * u_long proc; // procedure number
+ * xdrproc_t xargs; // xdr routine for args
+ * void *argsp; // pointer to args
+ * xdrproc_t xresults; // xdr routine for results
+ * void *resultsp; // pointer to results
+ * resultproc_t eachresult; // call with each result obtained
+ */
{
if (thr_main())
@@ -424,11 +388,14 @@ clnt_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult)
* authdes_seccreate().
*/
AUTH *
-authdes_create(servername, window, syncaddr, ckey)
- char *servername; /* network name of server */
- u_int window; /* time to live */
- struct sockaddr *syncaddr; /* optional hostaddr to sync with */
- des_block *ckey; /* optional conversation key to use */
+authdes_create(char *servername, u_int window, struct sockaddr *syncaddr,
+ des_block *ckey)
+/*
+ * char *servername; // network name of server
+ * u_int window; // time to live
+ * struct sockaddr *syncaddr; // optional hostaddr to sync with
+ * des_block *ckey; // optional conversation key to use
+ */
{
AUTH *dummy;
AUTH *nauth;
@@ -455,27 +422,19 @@ fallback:
* Create a client handle for a unix connection. Obsoleted by clnt_vc_create()
*/
CLIENT *
-clntunix_create(raddr, prog, vers, sockp, sendsz, recvsz)
- struct sockaddr_un *raddr;
- u_long prog;
- u_long vers;
- int *sockp;
- u_int sendsz;
- u_int recvsz;
+clntunix_create(struct sockaddr_un *raddr, u_long prog, u_long vers, int *sockp,
+ u_int sendsz, u_int recvsz)
{
struct netbuf *svcaddr;
- struct netconfig *nconf;
CLIENT *cl;
int len;
cl = NULL;
- nconf = NULL;
svcaddr = NULL;
if ((raddr->sun_len == 0) ||
((svcaddr = malloc(sizeof(struct netbuf))) == NULL ) ||
((svcaddr->buf = malloc(sizeof(struct sockaddr_un))) == NULL)) {
- if (svcaddr != NULL)
- free(svcaddr);
+ free(svcaddr);
rpc_createerr.cf_stat = RPC_SYSTEMERROR;
rpc_createerr.cf_error.re_errno = errno;
return(cl);
@@ -508,11 +467,7 @@ done:
* Obsoleted by svc_vc_create().
*/
SVCXPRT *
-svcunix_create(sock, sendsize, recvsize, path)
- int sock;
- u_int sendsize;
- u_int recvsize;
- char *path;
+svcunix_create(int sock, u_int sendsize, u_int recvsize, char *path)
{
struct netconfig *nconf;
void *localhandle;
@@ -530,7 +485,7 @@ svcunix_create(sock, sendsize, recvsize, path)
break;
}
if (nconf == NULL)
- return(xprt);
+ goto done;
if ((sock = __rpc_nconf2fd(nconf)) < 0)
goto done;
@@ -572,10 +527,7 @@ done:
* descriptor as its first input. Obsoleted by svc_fd_create();
*/
SVCXPRT *
-svcunixfd_create(fd, sendsize, recvsize)
- int fd;
- u_int sendsize;
- u_int recvsize;
+svcunixfd_create(int fd, u_int sendsize, u_int recvsize)
{
return (svc_fd_create(fd, sendsize, recvsize));
}
diff --git a/freebsd/lib/libc/rpc/rpcb_clnt.c b/freebsd/lib/libc/rpc/rpcb_clnt.c
index ffee9659..b8f1dd0c 100644
--- a/freebsd/lib/libc/rpc/rpcb_clnt.c
+++ b/freebsd/lib/libc/rpc/rpcb_clnt.c
@@ -111,9 +111,7 @@ static struct netbuf *got_entry(rpcb_entry_list_ptr, const struct netconfig *);
* These are private routines that may not be provided in future releases.
*/
bool_t
-__rpc_control(request, info)
- int request;
- void *info;
+__rpc_control(int request, void *info)
{
switch (request) {
case CLCR_GET_RPCB_TIMEOUT:
@@ -152,8 +150,7 @@ __rpc_control(request, info)
*/
static struct address_cache *
-check_cache(host, netid)
- const char *host, *netid;
+check_cache(const char *host, const char *netid)
{
struct address_cache *cptr;
@@ -173,8 +170,7 @@ check_cache(host, netid)
}
static void
-delete_cache(addr)
- struct netbuf *addr;
+delete_cache(struct netbuf *addr)
{
struct address_cache *cptr, *prevptr = NULL;
@@ -185,8 +181,7 @@ delete_cache(addr)
free(cptr->ac_netid);
free(cptr->ac_taddr->buf);
free(cptr->ac_taddr);
- if (cptr->ac_uaddr)
- free(cptr->ac_uaddr);
+ free(cptr->ac_uaddr);
if (prevptr)
prevptr->ac_next = cptr->ac_next;
else
@@ -200,10 +195,8 @@ delete_cache(addr)
}
static void
-add_cache(host, netid, taddr, uaddr)
- const char *host, *netid;
- char *uaddr;
- struct netbuf *taddr;
+add_cache(const char *host, const char *netid, struct netbuf *taddr,
+ char *uaddr)
{
struct address_cache *ad_cache, *cptr, *prevptr;
@@ -224,14 +217,10 @@ add_cache(host, netid, taddr, uaddr)
ad_cache->ac_taddr->buf = (char *) malloc(taddr->len);
if (ad_cache->ac_taddr->buf == NULL) {
out:
- if (ad_cache->ac_host)
- free(ad_cache->ac_host);
- if (ad_cache->ac_netid)
- free(ad_cache->ac_netid);
- if (ad_cache->ac_uaddr)
- free(ad_cache->ac_uaddr);
- if (ad_cache->ac_taddr)
- free(ad_cache->ac_taddr);
+ free(ad_cache->ac_host);
+ free(ad_cache->ac_netid);
+ free(ad_cache->ac_uaddr);
+ free(ad_cache->ac_taddr);
free(ad_cache);
return;
}
@@ -264,8 +253,7 @@ out:
free(cptr->ac_netid);
free(cptr->ac_taddr->buf);
free(cptr->ac_taddr);
- if (cptr->ac_uaddr)
- free(cptr->ac_uaddr);
+ free(cptr->ac_uaddr);
if (prevptr) {
prevptr->ac_next = NULL;
@@ -288,10 +276,7 @@ out:
* On error, returns NULL and free's everything.
*/
static CLIENT *
-getclnthandle(host, nconf, targaddr)
- const char *host;
- const struct netconfig *nconf;
- char **targaddr;
+getclnthandle(const char *host, const struct netconfig *nconf, char **targaddr)
{
CLIENT *client;
struct netbuf *addr, taddr;
@@ -437,7 +422,7 @@ getclnthandle(host, nconf, targaddr)
* rpcbind. Returns NULL on error and free's everything.
*/
static CLIENT *
-local_rpcb()
+local_rpcb(void)
{
CLIENT *client;
static struct netconfig *loopnconf;
@@ -516,6 +501,7 @@ try_nconf:
hostname = IN6_LOCALHOST_STRING;
}
}
+ endnetconfig(nc_handle);
if (tmpnconf == NULL) {
rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
mutex_unlock(&loopnconf_lock);
@@ -523,7 +509,6 @@ try_nconf:
}
loopnconf = getnetconfigent(tmpnconf->nc_netid);
/* loopnconf is never freed */
- endnetconfig(nc_handle);
}
mutex_unlock(&loopnconf_lock);
client = getclnthandle(hostname, loopnconf, NULL);
@@ -533,13 +518,13 @@ try_nconf:
/*
* Set a mapping between program, version and address.
* Calls the rpcbind service to do the mapping.
+ *
+ * nconf - Network structure of transport
+ * address - Services netconfig address
*/
bool_t
-rpcb_set(program, version, nconf, address)
- rpcprog_t program;
- rpcvers_t version;
- const struct netconfig *nconf; /* Network structure of transport */
- const struct netbuf *address; /* Services netconfig address */
+rpcb_set(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf,
+ const struct netbuf *address)
{
CLIENT *client;
bool_t rslt = FALSE;
@@ -596,10 +581,7 @@ rpcb_set(program, version, nconf, address)
* only for the given transport.
*/
bool_t
-rpcb_unset(program, version, nconf)
- rpcprog_t program;
- rpcvers_t version;
- const struct netconfig *nconf;
+rpcb_unset(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf)
{
CLIENT *client;
bool_t rslt = FALSE;
@@ -636,9 +618,7 @@ rpcb_unset(program, version, nconf)
* From the merged list, find the appropriate entry
*/
static struct netbuf *
-got_entry(relp, nconf)
- rpcb_entry_list_ptr relp;
- const struct netconfig *nconf;
+got_entry(rpcb_entry_list_ptr relp, const struct netconfig *nconf)
{
struct netbuf *na = NULL;
rpcb_entry_list_ptr sp;
@@ -669,7 +649,7 @@ got_entry(relp, nconf)
* local transport.
*/
static bool_t
-__rpcbind_is_up()
+__rpcbind_is_up(void)
{
struct netconfig *nconf;
struct sockaddr_un sun;
@@ -683,11 +663,11 @@ __rpcbind_is_up()
strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0)
break;
}
+ endnetconfig(localhandle);
+
if (nconf == NULL)
return (FALSE);
- endnetconfig(localhandle);
-
memset(&sun, 0, sizeof sun);
sock = _socket(AF_LOCAL, SOCK_STREAM, 0);
if (sock < 0)
@@ -724,13 +704,9 @@ __rpcbind_is_up()
* starts working properly. Also look under clnt_vc.c.
*/
struct netbuf *
-__rpcb_findaddr_timed(program, version, nconf, host, clpp, tp)
- rpcprog_t program;
- rpcvers_t version;
- const struct netconfig *nconf;
- const char *host;
- CLIENT **clpp;
- struct timeval *tp;
+__rpcb_findaddr_timed(rpcprog_t program, rpcvers_t version,
+ const struct netconfig *nconf, const char *host,
+ CLIENT **clpp, struct timeval *tp)
{
static bool_t check_rpcbind = TRUE;
CLIENT *client = NULL;
@@ -818,10 +794,8 @@ __rpcb_findaddr_timed(program, version, nconf, host, clpp, tp)
malloc(remote.len)) == NULL)) {
rpc_createerr.cf_stat = RPC_SYSTEMERROR;
clnt_geterr(client, &rpc_createerr.cf_error);
- if (address) {
- free(address);
- address = NULL;
- }
+ free(address);
+ address = NULL;
goto error;
}
memcpy(address->buf, remote.buf, remote.len);
@@ -1039,12 +1013,8 @@ done:
* Assuming that the address is all properly allocated
*/
bool_t
-rpcb_getaddr(program, version, nconf, address, host)
- rpcprog_t program;
- rpcvers_t version;
- const struct netconfig *nconf;
- struct netbuf *address;
- const char *host;
+rpcb_getaddr(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf,
+ struct netbuf *address, const char *host)
{
struct netbuf *na;
@@ -1075,9 +1045,7 @@ rpcb_getaddr(program, version, nconf, address, host)
* It returns NULL on failure.
*/
rpcblist *
-rpcb_getmaps(nconf, host)
- const struct netconfig *nconf;
- const char *host;
+rpcb_getmaps(const struct netconfig *nconf, const char *host)
{
rpcblist_ptr head = NULL;
CLIENT *client;
@@ -1125,19 +1093,20 @@ done:
* which will look up a service program in the address maps, and then
* remotely call that routine with the given parameters. This allows
* programs to do a lookup and call in one step.
-*/
+ *
+ * nconf -Netconfig structure
+ * host - Remote host name
+ * proc - Remote proc identifiers
+ * xdrargs, xdrres; XDR routines
+ * argsp, resp - Argument and Result
+ * tout - Timeout value for this call
+ * addr_ptr - Preallocated netbuf address
+ */
enum clnt_stat
-rpcb_rmtcall(nconf, host, prog, vers, proc, xdrargs, argsp,
- xdrres, resp, tout, addr_ptr)
- const struct netconfig *nconf; /* Netconfig structure */
- const char *host; /* Remote host name */
- rpcprog_t prog;
- rpcvers_t vers;
- rpcproc_t proc; /* Remote proc identifiers */
- xdrproc_t xdrargs, xdrres; /* XDR routines */
- caddr_t argsp, resp; /* Argument and Result */
- struct timeval tout; /* Timeout value for this call */
- const struct netbuf *addr_ptr; /* Preallocated netbuf address */
+rpcb_rmtcall(const struct netconfig *nconf, const char *host, rpcprog_t prog,
+ rpcvers_t vers, rpcproc_t proc, xdrproc_t xdrargs, caddr_t argsp,
+ xdrproc_t xdrres, caddr_t resp, struct timeval tout,
+ const struct netbuf *addr_ptr)
{
CLIENT *client;
enum clnt_stat stat;
@@ -1208,9 +1177,7 @@ error:
* Returns 1 if succeeds else 0.
*/
bool_t
-rpcb_gettime(host, timep)
- const char *host;
- time_t *timep;
+rpcb_gettime(const char *host, time_t *timep)
{
CLIENT *client = NULL;
void *handle;
@@ -1269,9 +1236,7 @@ rpcb_gettime(host, timep)
* really be called because local n2a libraries are always provided.
*/
char *
-rpcb_taddr2uaddr(nconf, taddr)
- struct netconfig *nconf;
- struct netbuf *taddr;
+rpcb_taddr2uaddr(struct netconfig *nconf, struct netbuf *taddr)
{
CLIENT *client;
char *uaddr = NULL;
@@ -1303,9 +1268,7 @@ rpcb_taddr2uaddr(nconf, taddr)
* really be called because local n2a libraries are always provided.
*/
struct netbuf *
-rpcb_uaddr2taddr(nconf, uaddr)
- struct netconfig *nconf;
- char *uaddr;
+rpcb_uaddr2taddr(struct netconfig *nconf, char *uaddr)
{
CLIENT *client;
struct netbuf *taddr;
diff --git a/freebsd/lib/libc/rpc/rpcb_prot.c b/freebsd/lib/libc/rpc/rpcb_prot.c
index c3a7ea8e..95072371 100644
--- a/freebsd/lib/libc/rpc/rpcb_prot.c
+++ b/freebsd/lib/libc/rpc/rpcb_prot.c
@@ -56,14 +56,12 @@ __FBSDID("$FreeBSD$");
#include "un-namespace.h"
bool_t
-xdr_rpcb(xdrs, objp)
- XDR *xdrs;
- RPCB *objp;
+xdr_rpcb(XDR *xdrs, RPCB *objp)
{
- if (!xdr_u_int32_t(xdrs, &objp->r_prog)) {
+ if (!xdr_rpcprog(xdrs, &objp->r_prog)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->r_vers)) {
+ if (!xdr_rpcvers(xdrs, &objp->r_vers)) {
return (FALSE);
}
if (!xdr_string(xdrs, &objp->r_netid, (u_int)~0)) {
@@ -102,9 +100,7 @@ xdr_rpcb(xdrs, objp)
*/
bool_t
-xdr_rpcblist_ptr(xdrs, rp)
- XDR *xdrs;
- rpcblist_ptr *rp;
+xdr_rpcblist_ptr(XDR *xdrs, rpcblist_ptr *rp)
{
/*
* more_elements is pre-computed in case the direction is
@@ -156,9 +152,7 @@ xdr_rpcblist_ptr(xdrs, rp)
* functionality to xdr_rpcblist_ptr().
*/
bool_t
-xdr_rpcblist(xdrs, rp)
- XDR *xdrs;
- RPCBLIST **rp;
+xdr_rpcblist(XDR *xdrs, RPCBLIST **rp)
{
bool_t dummy;
@@ -168,9 +162,7 @@ xdr_rpcblist(xdrs, rp)
bool_t
-xdr_rpcb_entry(xdrs, objp)
- XDR *xdrs;
- rpcb_entry *objp;
+xdr_rpcb_entry(XDR *xdrs, rpcb_entry *objp)
{
if (!xdr_string(xdrs, &objp->r_maddr, (u_int)~0)) {
return (FALSE);
@@ -191,9 +183,7 @@ xdr_rpcb_entry(xdrs, objp)
}
bool_t
-xdr_rpcb_entry_list_ptr(xdrs, rp)
- XDR *xdrs;
- rpcb_entry_list_ptr *rp;
+xdr_rpcb_entry_list_ptr(XDR *xdrs, rpcb_entry_list_ptr *rp)
{
/*
* more_elements is pre-computed in case the direction is
@@ -219,14 +209,14 @@ xdr_rpcb_entry_list_ptr(xdrs, rp)
* the case of freeing we must remember the next object
* before we free the current object ...
*/
- if (freeing)
+ if (freeing && *rp)
next = (*rp)->rpcb_entry_next;
if (! xdr_reference(xdrs, (caddr_t *)rp,
(u_int)sizeof (rpcb_entry_list),
(xdrproc_t)xdr_rpcb_entry)) {
return (FALSE);
}
- if (freeing && *rp) {
+ if (freeing) {
next_copy = next;
rp = &next_copy;
/*
@@ -246,9 +236,7 @@ xdr_rpcb_entry_list_ptr(xdrs, rp)
* written for XDR_ENCODE direction only
*/
bool_t
-xdr_rpcb_rmtcallargs(xdrs, p)
- XDR *xdrs;
- struct rpcb_rmtcallargs *p;
+xdr_rpcb_rmtcallargs(XDR *xdrs, struct rpcb_rmtcallargs *p)
{
struct r_rpcb_rmtcallargs *objp =
(struct r_rpcb_rmtcallargs *)(void *)p;
@@ -257,13 +245,13 @@ xdr_rpcb_rmtcallargs(xdrs, p)
buf = XDR_INLINE(xdrs, 3 * BYTES_PER_XDR_UNIT);
if (buf == NULL) {
- if (!xdr_u_int32_t(xdrs, &objp->prog)) {
+ if (!xdr_rpcprog(xdrs, &objp->prog)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->vers)) {
+ if (!xdr_rpcvers(xdrs, &objp->vers)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->proc)) {
+ if (!xdr_rpcproc(xdrs, &objp->proc)) {
return (FALSE);
}
} else {
@@ -298,9 +286,7 @@ xdr_rpcb_rmtcallargs(xdrs, p)
* written for XDR_DECODE direction only
*/
bool_t
-xdr_rpcb_rmtcallres(xdrs, p)
- XDR *xdrs;
- struct rpcb_rmtcallres *p;
+xdr_rpcb_rmtcallres(XDR *xdrs, struct rpcb_rmtcallres *p)
{
bool_t dummy;
struct r_rpcb_rmtcallres *objp = (struct r_rpcb_rmtcallres *)(void *)p;
@@ -316,9 +302,7 @@ xdr_rpcb_rmtcallres(xdrs, p)
}
bool_t
-xdr_netbuf(xdrs, objp)
- XDR *xdrs;
- struct netbuf *objp;
+xdr_netbuf(XDR *xdrs, struct netbuf *objp)
{
bool_t dummy;
void **pp;
diff --git a/freebsd/lib/libc/rpc/rpcb_st_xdr.c b/freebsd/lib/libc/rpc/rpcb_st_xdr.c
index 375f06cd..6b8711ea 100644
--- a/freebsd/lib/libc/rpc/rpcb_st_xdr.c
+++ b/freebsd/lib/libc/rpc/rpcb_st_xdr.c
@@ -49,16 +49,14 @@ __FBSDID("$FreeBSD$");
/* Link list of all the stats about getport and getaddr */
bool_t
-xdr_rpcbs_addrlist(xdrs, objp)
- XDR *xdrs;
- rpcbs_addrlist *objp;
+xdr_rpcbs_addrlist(XDR *xdrs, rpcbs_addrlist *objp)
{
struct rpcbs_addrlist **pnext;
- if (!xdr_u_int32_t(xdrs, &objp->prog)) {
+ if (!xdr_rpcprog(xdrs, &objp->prog)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->vers)) {
+ if (!xdr_rpcvers(xdrs, &objp->vers)) {
return (FALSE);
}
if (!xdr_int(xdrs, &objp->success)) {
@@ -85,9 +83,7 @@ xdr_rpcbs_addrlist(xdrs, objp)
/* Link list of all the stats about rmtcall */
bool_t
-xdr_rpcbs_rmtcalllist(xdrs, objp)
- XDR *xdrs;
- rpcbs_rmtcalllist *objp;
+xdr_rpcbs_rmtcalllist(XDR *xdrs, rpcbs_rmtcalllist *objp)
{
int32_t *buf;
struct rpcbs_rmtcalllist **pnext;
@@ -95,13 +91,13 @@ xdr_rpcbs_rmtcalllist(xdrs, objp)
if (xdrs->x_op == XDR_ENCODE) {
buf = XDR_INLINE(xdrs, 6 * BYTES_PER_XDR_UNIT);
if (buf == NULL) {
- if (!xdr_u_int32_t(xdrs, &objp->prog)) {
+ if (!xdr_rpcprog(xdrs, &objp->prog)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->vers)) {
+ if (!xdr_rpcvers(xdrs, &objp->vers)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->proc)) {
+ if (!xdr_rpcproc(xdrs, &objp->proc)) {
return (FALSE);
}
if (!xdr_int(xdrs, &objp->success)) {
@@ -134,13 +130,13 @@ xdr_rpcbs_rmtcalllist(xdrs, objp)
} else if (xdrs->x_op == XDR_DECODE) {
buf = XDR_INLINE(xdrs, 6 * BYTES_PER_XDR_UNIT);
if (buf == NULL) {
- if (!xdr_u_int32_t(xdrs, &objp->prog)) {
+ if (!xdr_rpcprog(xdrs, &objp->prog)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->vers)) {
+ if (!xdr_rpcvers(xdrs, &objp->vers)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->proc)) {
+ if (!xdr_rpcproc(xdrs, &objp->proc)) {
return (FALSE);
}
if (!xdr_int(xdrs, &objp->success)) {
@@ -170,13 +166,13 @@ xdr_rpcbs_rmtcalllist(xdrs, objp)
}
return (TRUE);
}
- if (!xdr_u_int32_t(xdrs, &objp->prog)) {
+ if (!xdr_rpcprog(xdrs, &objp->prog)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->vers)) {
+ if (!xdr_rpcvers(xdrs, &objp->vers)) {
return (FALSE);
}
- if (!xdr_u_int32_t(xdrs, &objp->proc)) {
+ if (!xdr_rpcproc(xdrs, &objp->proc)) {
return (FALSE);
}
if (!xdr_int(xdrs, &objp->success)) {
@@ -200,9 +196,7 @@ xdr_rpcbs_rmtcalllist(xdrs, objp)
}
bool_t
-xdr_rpcbs_proc(xdrs, objp)
- XDR *xdrs;
- rpcbs_proc objp;
+xdr_rpcbs_proc(XDR *xdrs, rpcbs_proc objp)
{
if (!xdr_vector(xdrs, (char *)(void *)objp, RPCBSTAT_HIGHPROC,
sizeof (int), (xdrproc_t)xdr_int)) {
@@ -212,9 +206,7 @@ xdr_rpcbs_proc(xdrs, objp)
}
bool_t
-xdr_rpcbs_addrlist_ptr(xdrs, objp)
- XDR *xdrs;
- rpcbs_addrlist_ptr *objp;
+xdr_rpcbs_addrlist_ptr(XDR *xdrs, rpcbs_addrlist_ptr *objp)
{
if (!xdr_pointer(xdrs, (char **)objp, sizeof (rpcbs_addrlist),
(xdrproc_t)xdr_rpcbs_addrlist)) {
@@ -224,9 +216,7 @@ xdr_rpcbs_addrlist_ptr(xdrs, objp)
}
bool_t
-xdr_rpcbs_rmtcalllist_ptr(xdrs, objp)
- XDR *xdrs;
- rpcbs_rmtcalllist_ptr *objp;
+xdr_rpcbs_rmtcalllist_ptr(XDR *xdrs, rpcbs_rmtcalllist_ptr *objp)
{
if (!xdr_pointer(xdrs, (char **)objp, sizeof (rpcbs_rmtcalllist),
(xdrproc_t)xdr_rpcbs_rmtcalllist)) {
@@ -236,9 +226,7 @@ xdr_rpcbs_rmtcalllist_ptr(xdrs, objp)
}
bool_t
-xdr_rpcb_stat(xdrs, objp)
- XDR *xdrs;
- rpcb_stat *objp;
+xdr_rpcb_stat(XDR *xdrs, rpcb_stat *objp)
{
if (!xdr_rpcbs_proc(xdrs, objp->info)) {
@@ -264,9 +252,7 @@ xdr_rpcb_stat(xdrs, objp)
* being monitored.
*/
bool_t
-xdr_rpcb_stat_byvers(xdrs, objp)
- XDR *xdrs;
- rpcb_stat_byvers objp;
+xdr_rpcb_stat_byvers(XDR *xdrs, rpcb_stat_byvers objp)
{
if (!xdr_vector(xdrs, (char *)(void *)objp, RPCBVERS_STAT,
sizeof (rpcb_stat), (xdrproc_t)xdr_rpcb_stat)) {
diff --git a/freebsd/lib/libc/rpc/rpcdname.c b/freebsd/lib/libc/rpc/rpcdname.c
index 1805da80..63ea11e1 100644
--- a/freebsd/lib/libc/rpc/rpcdname.c
+++ b/freebsd/lib/libc/rpc/rpcdname.c
@@ -45,20 +45,20 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include "un-namespace.h"
-static char *default_domain = 0;
+static char *default_domain;
static char *
-get_default_domain()
+get_default_domain(void)
{
char temp[256];
- if (default_domain)
+ if (default_domain != NULL)
return (default_domain);
if (getdomainname(temp, sizeof(temp)) < 0)
return (0);
if ((int) strlen(temp) > 0) {
- default_domain = (char *)malloc((strlen(temp)+(unsigned)1));
- if (default_domain == 0)
+ default_domain = malloc((strlen(temp) + (unsigned)1));
+ if (default_domain == NULL)
return (0);
(void) strcpy(default_domain, temp);
return (default_domain);
@@ -73,10 +73,9 @@ get_default_domain()
* get rejected elsewhere in the NIS client package.
*/
int
-__rpc_get_default_domain(domain)
- char **domain;
+__rpc_get_default_domain(char **domain)
{
- if ((*domain = get_default_domain()) != 0)
+ if ((*domain = get_default_domain()) != NULL)
return (0);
return (-1);
}
diff --git a/freebsd/lib/libc/rpc/rtime.c b/freebsd/lib/libc/rpc/rtime.c
index b8f783e9..a6223863 100644
--- a/freebsd/lib/libc/rpc/rtime.c
+++ b/freebsd/lib/libc/rpc/rtime.c
@@ -63,16 +63,14 @@ __FBSDID("$FreeBSD$");
extern int _rpc_dtablesize( void );
-#define NYEARS (unsigned long)(1970 - 1900)
-#define TOFFSET (unsigned long)(60*60*24*(365*NYEARS + (NYEARS/4)))
+#define NYEARS (unsigned long)(1970 - 1900)
+#define TOFFSET (unsigned long)(60*60*24*(365*NYEARS + (NYEARS/4)))
static void do_close( int );
int
-rtime(addrp, timep, timeout)
- struct sockaddr_in *addrp;
- struct timeval *timep;
- struct timeval *timeout;
+rtime(struct sockaddr_in *addrp, struct timeval *timep,
+ struct timeval *timeout)
{
int s;
fd_set readfds;
@@ -102,11 +100,11 @@ rtime(addrp, timep, timeout)
addrp->sin_port = serv->s_port;
if (type == SOCK_DGRAM) {
- res = _sendto(s, (char *)&thetime, sizeof(thetime), 0,
+ res = _sendto(s, (char *)&thetime, sizeof(thetime), 0,
(struct sockaddr *)addrp, sizeof(*addrp));
if (res < 0) {
do_close(s);
- return(-1);
+ return(-1);
}
do {
FD_ZERO(&readfds);
@@ -119,14 +117,14 @@ rtime(addrp, timep, timeout)
errno = ETIMEDOUT;
}
do_close(s);
- return(-1);
+ return(-1);
}
fromlen = sizeof(from);
- res = _recvfrom(s, (char *)&thetime, sizeof(thetime), 0,
+ res = _recvfrom(s, (char *)&thetime, sizeof(thetime), 0,
(struct sockaddr *)&from, &fromlen);
do_close(s);
if (res < 0) {
- return(-1);
+ return(-1);
}
} else {
if (_connect(s, (struct sockaddr *)addrp, sizeof(*addrp)) < 0) {
@@ -141,7 +139,7 @@ rtime(addrp, timep, timeout)
}
if (res != sizeof(thetime)) {
errno = EIO;
- return(-1);
+ return(-1);
}
thetime = ntohl(thetime);
timep->tv_sec = thetime - TOFFSET;
@@ -150,8 +148,7 @@ rtime(addrp, timep, timeout)
}
static void
-do_close(s)
- int s;
+do_close(int s)
{
int save;
diff --git a/freebsd/lib/libc/rpc/svc.c b/freebsd/lib/libc/rpc/svc.c
index 709bfffa..64b79036 100644
--- a/freebsd/lib/libc/rpc/svc.c
+++ b/freebsd/lib/libc/rpc/svc.c
@@ -76,7 +76,7 @@ __FBSDID("$FreeBSD$");
* The services list
* Each entry represents a set of procedures (an rpc program).
* The dispatch routine takes request structs and runs the
- * apropriate procedure.
+ * appropriate procedure.
*/
static struct svc_callout {
struct svc_callout *sc_next;
@@ -86,6 +86,9 @@ static struct svc_callout {
void (*sc_dispatch)(struct svc_req *, SVCXPRT *);
} *svc_head;
+SVCXPRT **__svc_xports;
+int __svc_maxrec;
+
static struct svc_callout *svc_find(rpcprog_t, rpcvers_t,
struct svc_callout **, char *);
static void __xprt_do_unregister (SVCXPRT *xprt, bool_t dolock);
@@ -96,8 +99,7 @@ static void __xprt_do_unregister (SVCXPRT *xprt, bool_t dolock);
* Activate a transport handle.
*/
void
-xprt_register(xprt)
- SVCXPRT *xprt;
+xprt_register(SVCXPRT *xprt)
{
int sock;
@@ -139,9 +141,7 @@ __xprt_unregister_unlocked(SVCXPRT *xprt)
* De-activate a transport handle.
*/
static void
-__xprt_do_unregister(xprt, dolock)
- SVCXPRT *xprt;
- bool_t dolock;
+__xprt_do_unregister(SVCXPRT *xprt, bool_t dolock)
{
int sock;
@@ -170,12 +170,9 @@ __xprt_do_unregister(xprt, dolock)
* program number comes in.
*/
bool_t
-svc_reg(xprt, prog, vers, dispatch, nconf)
- SVCXPRT *xprt;
- const rpcprog_t prog;
- const rpcvers_t vers;
- void (*dispatch)(struct svc_req *, SVCXPRT *);
- const struct netconfig *nconf;
+svc_reg(SVCXPRT *xprt, const rpcprog_t prog, const rpcvers_t vers,
+ void (*dispatch)(struct svc_req *, SVCXPRT *),
+ const struct netconfig *nconf)
{
bool_t dummy;
struct svc_callout *prev;
@@ -203,8 +200,7 @@ svc_reg(xprt, prog, vers, dispatch, nconf)
rwlock_wrlock(&svc_lock);
if ((s = svc_find(prog, vers, &prev, netid)) != NULL) {
- if (netid)
- free(netid);
+ free(netid);
if (s->sc_dispatch == dispatch)
goto rpcb_it; /* he is registering another xptr */
rwlock_unlock(&svc_lock);
@@ -212,8 +208,7 @@ svc_reg(xprt, prog, vers, dispatch, nconf)
}
s = mem_alloc(sizeof (struct svc_callout));
if (s == NULL) {
- if (netid)
- free(netid);
+ free(netid);
rwlock_unlock(&svc_lock);
return (FALSE);
}
@@ -244,9 +239,7 @@ rpcb_it:
* Remove a service program from the callout list.
*/
void
-svc_unreg(prog, vers)
- const rpcprog_t prog;
- const rpcvers_t vers;
+svc_unreg(const rpcprog_t prog, const rpcvers_t vers)
{
struct svc_callout *prev;
struct svc_callout *s;
@@ -277,12 +270,9 @@ svc_unreg(prog, vers)
* program number comes in.
*/
bool_t
-svc_register(xprt, prog, vers, dispatch, protocol)
- SVCXPRT *xprt;
- u_long prog;
- u_long vers;
- void (*dispatch)(struct svc_req *, SVCXPRT *);
- int protocol;
+svc_register(SVCXPRT *xprt, u_long prog, u_long vers,
+ void (*dispatch)(struct svc_req *, SVCXPRT *),
+ int protocol)
{
struct svc_callout *prev;
struct svc_callout *s;
@@ -317,9 +307,7 @@ pmap_it:
* Remove a service program from the callout list.
*/
void
-svc_unregister(prog, vers)
- u_long prog;
- u_long vers;
+svc_unregister(u_long prog, u_long vers)
{
struct svc_callout *prev;
struct svc_callout *s;
@@ -344,11 +332,8 @@ svc_unregister(prog, vers)
* struct.
*/
static struct svc_callout *
-svc_find(prog, vers, prev, netid)
- rpcprog_t prog;
- rpcvers_t vers;
- struct svc_callout **prev;
- char *netid;
+svc_find(rpcprog_t prog, rpcvers_t vers, struct svc_callout **prev,
+ char *netid)
{
struct svc_callout *s, *p;
@@ -372,10 +357,8 @@ svc_find(prog, vers, prev, netid)
* Send a reply to an rpc request
*/
bool_t
-svc_sendreply(xprt, xdr_results, xdr_location)
- SVCXPRT *xprt;
- xdrproc_t xdr_results;
- void * xdr_location;
+svc_sendreply(SVCXPRT *xprt, xdrproc_t xdr_results,
+ void * xdr_location)
{
struct rpc_msg rply;
@@ -394,8 +377,7 @@ svc_sendreply(xprt, xdr_results, xdr_location)
* No procedure error reply
*/
void
-svcerr_noproc(xprt)
- SVCXPRT *xprt;
+svcerr_noproc(SVCXPRT *xprt)
{
struct rpc_msg rply;
@@ -412,8 +394,7 @@ svcerr_noproc(xprt)
* Can't decode args error reply
*/
void
-svcerr_decode(xprt)
- SVCXPRT *xprt;
+svcerr_decode(SVCXPRT *xprt)
{
struct rpc_msg rply;
@@ -430,8 +411,7 @@ svcerr_decode(xprt)
* Some system error
*/
void
-svcerr_systemerr(xprt)
- SVCXPRT *xprt;
+svcerr_systemerr(SVCXPRT *xprt)
{
struct rpc_msg rply;
@@ -452,31 +432,27 @@ svcerr_systemerr(xprt)
* protocol: the portmapper (or rpc binder).
*/
void
-__svc_versquiet_on(xprt)
- SVCXPRT *xprt;
+__svc_versquiet_on(SVCXPRT *xprt)
{
SVC_EXT(xprt)->xp_flags |= SVC_VERSQUIET;
}
void
-__svc_versquiet_off(xprt)
- SVCXPRT *xprt;
+__svc_versquiet_off(SVCXPRT *xprt)
{
SVC_EXT(xprt)->xp_flags &= ~SVC_VERSQUIET;
}
void
-svc_versquiet(xprt)
- SVCXPRT *xprt;
+svc_versquiet(SVCXPRT *xprt)
{
__svc_versquiet_on(xprt);
}
int
-__svc_versquiet_get(xprt)
- SVCXPRT *xprt;
+__svc_versquiet_get(SVCXPRT *xprt)
{
return (SVC_EXT(xprt)->xp_flags & SVC_VERSQUIET);
@@ -487,9 +463,7 @@ __svc_versquiet_get(xprt)
* Authentication error reply
*/
void
-svcerr_auth(xprt, why)
- SVCXPRT *xprt;
- enum auth_stat why;
+svcerr_auth(SVCXPRT *xprt, enum auth_stat why)
{
struct rpc_msg rply;
@@ -506,8 +480,7 @@ svcerr_auth(xprt, why)
* Auth too weak error reply
*/
void
-svcerr_weakauth(xprt)
- SVCXPRT *xprt;
+svcerr_weakauth(SVCXPRT *xprt)
{
assert(xprt != NULL);
@@ -519,8 +492,7 @@ svcerr_weakauth(xprt)
* Program unavailable error reply
*/
void
-svcerr_noprog(xprt)
- SVCXPRT *xprt;
+svcerr_noprog(SVCXPRT *xprt)
{
struct rpc_msg rply;
@@ -537,10 +509,7 @@ svcerr_noprog(xprt)
* Program version mismatch error reply
*/
void
-svcerr_progvers(xprt, low_vers, high_vers)
- SVCXPRT *xprt;
- rpcvers_t low_vers;
- rpcvers_t high_vers;
+svcerr_progvers(SVCXPRT *xprt, rpcvers_t low_vers, rpcvers_t high_vers)
{
struct rpc_msg rply;
@@ -562,7 +531,7 @@ svcerr_progvers(xprt, low_vers, high_vers)
* parameters.
*/
SVCXPRT *
-svc_xprt_alloc()
+svc_xprt_alloc(void)
{
SVCXPRT *xprt;
SVCXPRT_EXT *ext;
@@ -587,8 +556,7 @@ svc_xprt_alloc()
* Free a server transport structure.
*/
void
-svc_xprt_free(xprt)
- SVCXPRT *xprt;
+svc_xprt_free(SVCXPRT *xprt)
{
mem_free(xprt->xp_p3, sizeof(SVCXPRT_EXT));
@@ -614,8 +582,7 @@ svc_xprt_free(xprt)
*/
void
-svc_getreq(rdfds)
- int rdfds;
+svc_getreq(int rdfds)
{
fd_set readfds;
@@ -625,8 +592,7 @@ svc_getreq(rdfds)
}
void
-svc_getreqset(readfds)
- fd_set *readfds;
+svc_getreqset(fd_set *readfds)
{
int bit, fd;
fd_mask mask, *maskp;
@@ -646,8 +612,7 @@ svc_getreqset(readfds)
}
void
-svc_getreq_common(fd)
- int fd;
+svc_getreq_common(int fd)
{
SVCXPRT *xprt;
struct svc_req r;
@@ -742,9 +707,7 @@ call_done:
void
-svc_getreq_poll(pfdp, pollretval)
- struct pollfd *pfdp;
- int pollretval;
+svc_getreq_poll(struct pollfd *pfdp, int pollretval)
{
int i;
int fds_found;
diff --git a/freebsd/lib/libc/rpc/svc_auth.c b/freebsd/lib/libc/rpc/svc_auth.c
index 917e5e81..ea56b340 100644
--- a/freebsd/lib/libc/rpc/svc_auth.c
+++ b/freebsd/lib/libc/rpc/svc_auth.c
@@ -97,9 +97,7 @@ struct svc_auth_ops svc_auth_null_ops;
* invalid.
*/
enum auth_stat
-_authenticate(rqst, msg)
- struct svc_req *rqst;
- struct rpc_msg *msg;
+_authenticate(struct svc_req *rqst, struct rpc_msg *msg)
{
int cred_flavor;
struct authsvc *asp;
@@ -153,11 +151,7 @@ _authenticate(rqst, msg)
* that don't need to inspect or modify the message body.
*/
static bool_t
-svcauth_null_wrap(auth, xdrs, xdr_func, xdr_ptr)
- SVCAUTH *auth;
- XDR *xdrs;
- xdrproc_t xdr_func;
- caddr_t xdr_ptr;
+svcauth_null_wrap(SVCAUTH *auth, XDR *xdrs, xdrproc_t xdr_func, caddr_t xdr_ptr)
{
return (xdr_func(xdrs, xdr_ptr));
@@ -170,9 +164,7 @@ struct svc_auth_ops svc_auth_null_ops = {
/*ARGSUSED*/
enum auth_stat
-_svcauth_null(rqst, msg)
- struct svc_req *rqst;
- struct rpc_msg *msg;
+_svcauth_null(struct svc_req *rqst, struct rpc_msg *msg)
{
return (AUTH_OK);
}
@@ -192,9 +184,8 @@ _svcauth_null(rqst, msg)
*/
int
-svc_auth_reg(cred_flavor, handler)
- int cred_flavor;
- enum auth_stat (*handler)(struct svc_req *, struct rpc_msg *);
+svc_auth_reg(int cred_flavor,
+ enum auth_stat (*handler)(struct svc_req *, struct rpc_msg *))
{
struct authsvc *asp;
diff --git a/freebsd/lib/libc/rpc/svc_auth_des.c b/freebsd/lib/libc/rpc/svc_auth_des.c
index a9408019..55f3d3ee 100644
--- a/freebsd/lib/libc/rpc/svc_auth_des.c
+++ b/freebsd/lib/libc/rpc/svc_auth_des.c
@@ -92,11 +92,11 @@ struct cache_entry {
static struct cache_entry *authdes_cache/* [AUTHDES_CACHESZ] */;
static short *authdes_lru/* [AUTHDES_CACHESZ] */;
-static void cache_init(); /* initialize the cache */
-static short cache_spot(); /* find an entry in the cache */
-static void cache_ref(/*short sid*/); /* note that sid was ref'd */
+static void cache_init(void); /* initialize the cache */
+static short cache_spot(des_block *, char *, struct timeval *); /* find an entry in the cache */
+static void cache_ref(short sid); /* note that sid was ref'd */
-static void invalidate(); /* invalidate entry in cache */
+static void invalidate(char *); /* invalidate entry in cache */
/*
* cache statistics
@@ -111,9 +111,7 @@ static struct {
* Service side authenticator for AUTH_DES
*/
enum auth_stat
-_svcauth_des(rqst, msg)
- struct svc_req *rqst;
- struct rpc_msg *msg;
+_svcauth_des(struct svc_req *rqst, struct rpc_msg *msg)
{
long *ixdr;
@@ -272,7 +270,7 @@ _svcauth_des(rqst, msg)
debug("timestamp before last seen");
return (AUTH_REJECTEDVERF); /* replay */
}
- (void) gettimeofday(&current, (struct timezone *)NULL);
+ (void)gettimeofday(&current, NULL);
current.tv_sec -= window; /* allow for expiration */
if (!BEFORE(&current, &timestamp)) {
debug("timestamp expired");
@@ -357,7 +355,7 @@ _svcauth_des(rqst, msg)
* Initialize the cache
*/
static void
-cache_init()
+cache_init(void)
{
int i;
@@ -380,7 +378,7 @@ cache_init()
* Find the lru victim
*/
static short
-cache_victim()
+cache_victim(void)
{
return (authdes_lru[AUTHDES_CACHESZ-1]);
}
@@ -389,8 +387,7 @@ cache_victim()
* Note that sid was referenced
*/
static void
-cache_ref(sid)
- short sid;
+cache_ref(short sid)
{
int i;
short curr;
@@ -412,10 +409,7 @@ cache_ref(sid)
* return the spot in the cache.
*/
static short
-cache_spot(key, name, timestamp)
- des_block *key;
- char *name;
- struct timeval *timestamp;
+cache_spot(des_block *key, char *name, struct timeval *timestamp)
{
struct cache_entry *cp;
int i;
@@ -463,12 +457,8 @@ struct bsdcred {
* the credential.
*/
int
-authdes_getucred(adc, uid, gid, grouplen, groups)
- struct authdes_cred *adc;
- uid_t *uid;
- gid_t *gid;
- int *grouplen;
- gid_t *groups;
+authdes_getucred(struct authdes_cred *adc, uid_t *uid, gid_t *gid,
+ int *grouplen, gid_t *groups)
{
unsigned sid;
int i;
@@ -527,8 +517,7 @@ authdes_getucred(adc, uid, gid, grouplen, groups)
}
static void
-invalidate(cred)
- char *cred;
+invalidate(char *cred)
{
if (cred == NULL) {
return;
diff --git a/freebsd/lib/libc/rpc/svc_auth_unix.c b/freebsd/lib/libc/rpc/svc_auth_unix.c
index bd052b77..53459786 100644
--- a/freebsd/lib/libc/rpc/svc_auth_unix.c
+++ b/freebsd/lib/libc/rpc/svc_auth_unix.c
@@ -58,9 +58,7 @@ __FBSDID("$FreeBSD$");
* Unix longhand authenticator
*/
enum auth_stat
-_svcauth_unix(rqst, msg)
- struct svc_req *rqst;
- struct rpc_msg *msg;
+_svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg)
{
enum auth_stat stat;
XDR xdrs;
@@ -149,9 +147,7 @@ done:
*/
/*ARGSUSED*/
enum auth_stat
-_svcauth_short(rqst, msg)
- struct svc_req *rqst;
- struct rpc_msg *msg;
+_svcauth_short(struct svc_req *rqst, struct rpc_msg *msg)
{
return (AUTH_REJECTEDCRED);
}
diff --git a/freebsd/lib/libc/rpc/svc_dg.c b/freebsd/lib/libc/rpc/svc_dg.c
index 0d75191c..377ce5f2 100644
--- a/freebsd/lib/libc/rpc/svc_dg.c
+++ b/freebsd/lib/libc/rpc/svc_dg.c
@@ -104,10 +104,7 @@ static const char svc_dg_err4[] = "cannot set IP_RECVDSTADDR";
static const char __no_mem_str[] = "out of memory";
SVCXPRT *
-svc_dg_create(fd, sendsize, recvsize)
- int fd;
- u_int sendsize;
- u_int recvsize;
+svc_dg_create(int fd, u_int sendsize, u_int recvsize)
{
SVCXPRT *xprt;
struct svc_dg_data *su = NULL;
@@ -190,8 +187,7 @@ freedata_nowarn:
/*ARGSUSED*/
static enum xprt_stat
-svc_dg_stat(xprt)
- SVCXPRT *xprt;
+svc_dg_stat(SVCXPRT *xprt)
{
return (XPRT_IDLE);
}
@@ -252,9 +248,7 @@ svc_dg_recvfrom(int fd, char *buf, int buflen,
}
static bool_t
-svc_dg_recv(xprt, msg)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
+svc_dg_recv(SVCXPRT *xprt, struct rpc_msg *msg)
{
struct svc_dg_data *su = su_data(xprt);
XDR *xdrs = &(su->su_xdrs);
@@ -337,9 +331,7 @@ svc_dg_sendto(int fd, char *buf, int buflen,
}
static bool_t
-svc_dg_reply(xprt, msg)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
+svc_dg_reply(SVCXPRT *xprt, struct rpc_msg *msg)
{
struct svc_dg_data *su = su_data(xprt);
XDR *xdrs = &(su->su_xdrs);
@@ -380,10 +372,7 @@ svc_dg_reply(xprt, msg)
}
static bool_t
-svc_dg_getargs(xprt, xdr_args, args_ptr)
- SVCXPRT *xprt;
- xdrproc_t xdr_args;
- void *args_ptr;
+svc_dg_getargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr)
{
struct svc_dg_data *su;
@@ -394,10 +383,7 @@ svc_dg_getargs(xprt, xdr_args, args_ptr)
}
static bool_t
-svc_dg_freeargs(xprt, xdr_args, args_ptr)
- SVCXPRT *xprt;
- xdrproc_t xdr_args;
- void *args_ptr;
+svc_dg_freeargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr)
{
XDR *xdrs = &(su_data(xprt)->su_xdrs);
@@ -406,8 +392,7 @@ svc_dg_freeargs(xprt, xdr_args, args_ptr)
}
static void
-svc_dg_destroy(xprt)
- SVCXPRT *xprt;
+svc_dg_destroy(SVCXPRT *xprt)
{
struct svc_dg_data *su = su_data(xprt);
@@ -423,24 +408,19 @@ svc_dg_destroy(xprt)
(void) mem_free(xprt->xp_rtaddr.buf, xprt->xp_rtaddr.maxlen);
if (xprt->xp_ltaddr.buf)
(void) mem_free(xprt->xp_ltaddr.buf, xprt->xp_ltaddr.maxlen);
- if (xprt->xp_tp)
- (void) free(xprt->xp_tp);
+ free(xprt->xp_tp);
svc_xprt_free(xprt);
}
static bool_t
/*ARGSUSED*/
-svc_dg_control(xprt, rq, in)
- SVCXPRT *xprt;
- const u_int rq;
- void *in;
+svc_dg_control(SVCXPRT *xprt, const u_int rq, void *in)
{
return (FALSE);
}
static void
-svc_dg_ops(xprt)
- SVCXPRT *xprt;
+svc_dg_ops(SVCXPRT *xprt)
{
static struct xp_ops ops;
static struct xp_ops2 ops2;
@@ -538,9 +518,7 @@ static const char alloc_err[] = "could not allocate cache ";
static const char enable_err[] = "cache already enabled";
int
-svc_dg_enablecache(transp, size)
- SVCXPRT *transp;
- u_int size;
+svc_dg_enablecache(SVCXPRT *transp, u_int size)
{
struct svc_dg_data *su = su_data(transp);
struct cl_cache *uc;
@@ -595,9 +573,7 @@ static const char cache_set_err2[] = "victim alloc failed";
static const char cache_set_err3[] = "could not allocate new rpc buffer";
static void
-cache_set(xprt, replylen)
- SVCXPRT *xprt;
- size_t replylen;
+cache_set(SVCXPRT *xprt, size_t replylen)
{
cache_ptr victim;
cache_ptr *vicp;
@@ -685,11 +661,7 @@ cache_set(xprt, replylen)
* return 1 if found, 0 if not found and set the stage for cache_set()
*/
static int
-cache_get(xprt, msg, replyp, replylenp)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
- char **replyp;
- size_t *replylenp;
+cache_get(SVCXPRT *xprt, struct rpc_msg *msg, char **replyp, size_t *replylenp)
{
u_int loc;
cache_ptr ent;
diff --git a/freebsd/lib/libc/rpc/svc_generic.c b/freebsd/lib/libc/rpc/svc_generic.c
index 4d7a23a4..0643ae22 100644
--- a/freebsd/lib/libc/rpc/svc_generic.c
+++ b/freebsd/lib/libc/rpc/svc_generic.c
@@ -74,13 +74,14 @@ extern int __svc_vc_setflag(SVCXPRT *, int);
* It creates a link list of all the handles it could create.
* If svc_create() is called multiple times, it uses the handle
* created earlier instead of creating a new handle every time.
+ *
+ * prognum - Program number
+ * versnum - Version number
+ * nettype - Networktype token
*/
int
-svc_create(dispatch, prognum, versnum, nettype)
- void (*dispatch)(struct svc_req *, SVCXPRT *);
- rpcprog_t prognum; /* Program number */
- rpcvers_t versnum; /* Version number */
- const char *nettype; /* Networktype token */
+svc_create(void (*dispatch)(struct svc_req *, SVCXPRT *),
+ rpcprog_t prognum, rpcvers_t versnum, const char *nettype)
{
struct xlist {
SVCXPRT *xprt; /* Server handle */
@@ -123,7 +124,8 @@ svc_create(dispatch, prognum, versnum, nettype)
if (l == NULL) {
warnx("svc_create: no memory");
mutex_unlock(&xprtlist_lock);
- return (0);
+ num = 0;
+ goto done;
}
l->xprt = xprt;
l->next = xprtlist;
@@ -133,6 +135,7 @@ svc_create(dispatch, prognum, versnum, nettype)
}
mutex_unlock(&xprtlist_lock);
}
+done:
__rpc_endconf(handle);
/*
* In case of num == 0; the error messages are generated by the
@@ -145,13 +148,14 @@ svc_create(dispatch, prognum, versnum, nettype)
* The high level interface to svc_tli_create().
* It tries to create a server for "nconf" and registers the service
* with the rpcbind. It calls svc_tli_create();
+ *
+ * prognum - Program number
+ * versnum - Version number
+ * ncofn - Netconfig structure for the network
*/
SVCXPRT *
-svc_tp_create(dispatch, prognum, versnum, nconf)
- void (*dispatch)(struct svc_req *, SVCXPRT *);
- rpcprog_t prognum; /* Program number */
- rpcvers_t versnum; /* Version number */
- const struct netconfig *nconf; /* Netconfig structure for the network */
+svc_tp_create(void (*dispatch)(struct svc_req *, SVCXPRT *),
+ rpcprog_t prognum, rpcvers_t versnum, const struct netconfig *nconf)
{
SVCXPRT *xprt;
@@ -186,14 +190,16 @@ svc_tp_create(dispatch, prognum, versnum, nconf)
* is set to 8.
*
* If sendsz or recvsz are zero, their default values are chosen.
+ *
+ * fd - Connection end point
+ * nconf - Netconfig struct for nettoken
+ * bindaddr - Local bind address
+ * sendsz - Max sendsize
+ * recvxz - Max recvsize
*/
SVCXPRT *
-svc_tli_create(fd, nconf, bindaddr, sendsz, recvsz)
- int fd; /* Connection end point */
- const struct netconfig *nconf; /* Netconfig struct for nettoken */
- const struct t_bind *bindaddr; /* Local bind address */
- u_int sendsz; /* Max sendsize */
- u_int recvsz; /* Max recvsize */
+svc_tli_create(int fd, const struct netconfig *nconf,
+ const struct t_bind *bindaddr, u_int sendsz, u_int recvsz)
{
SVCXPRT *xprt = NULL; /* service handle */
bool_t madefd = FALSE; /* whether fd opened here */
diff --git a/freebsd/lib/libc/rpc/svc_raw.c b/freebsd/lib/libc/rpc/svc_raw.c
index 7f9c4eb4..772e523f 100644
--- a/freebsd/lib/libc/rpc/svc_raw.c
+++ b/freebsd/lib/libc/rpc/svc_raw.c
@@ -84,7 +84,7 @@ static bool_t svc_raw_control(SVCXPRT *, const u_int, void *);
char *__rpc_rawcombuf = NULL;
SVCXPRT *
-svc_raw_create()
+svc_raw_create(void)
{
struct svc_raw_private *srp;
/* VARIABLES PROTECTED BY svcraw_lock: svc_raw_private, srp */
@@ -127,17 +127,14 @@ svc_raw_create()
/*ARGSUSED*/
static enum xprt_stat
-svc_raw_stat(xprt)
-SVCXPRT *xprt; /* args needed to satisfy ANSI-C typechecking */
+svc_raw_stat(SVCXPRT *xprt)
{
return (XPRT_IDLE);
}
/*ARGSUSED*/
static bool_t
-svc_raw_recv(xprt, msg)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
+svc_raw_recv(SVCXPRT *xprt, struct rpc_msg *msg)
{
struct svc_raw_private *srp;
XDR *xdrs;
@@ -161,9 +158,7 @@ svc_raw_recv(xprt, msg)
/*ARGSUSED*/
static bool_t
-svc_raw_reply(xprt, msg)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
+svc_raw_reply(SVCXPRT *xprt, struct rpc_msg *msg)
{
struct svc_raw_private *srp;
XDR *xdrs;
@@ -203,10 +198,7 @@ svc_raw_reply(xprt, msg)
/*ARGSUSED*/
static bool_t
-svc_raw_getargs(xprt, xdr_args, args_ptr)
- SVCXPRT *xprt;
- xdrproc_t xdr_args;
- void *args_ptr;
+svc_raw_getargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr)
{
struct svc_raw_private *srp;
@@ -224,10 +216,7 @@ svc_raw_getargs(xprt, xdr_args, args_ptr)
/*ARGSUSED*/
static bool_t
-svc_raw_freeargs(xprt, xdr_args, args_ptr)
- SVCXPRT *xprt;
- xdrproc_t xdr_args;
- void *args_ptr;
+svc_raw_freeargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr)
{
struct svc_raw_private *srp;
XDR *xdrs;
@@ -247,24 +236,19 @@ svc_raw_freeargs(xprt, xdr_args, args_ptr)
/*ARGSUSED*/
static void
-svc_raw_destroy(xprt)
-SVCXPRT *xprt;
+svc_raw_destroy(SVCXPRT *xprt)
{
}
/*ARGSUSED*/
static bool_t
-svc_raw_control(xprt, rq, in)
- SVCXPRT *xprt;
- const u_int rq;
- void *in;
+svc_raw_control(SVCXPRT *xprt, const u_int rq, void *in)
{
return (FALSE);
}
static void
-svc_raw_ops(xprt)
- SVCXPRT *xprt;
+svc_raw_ops(SVCXPRT *xprt)
{
static struct xp_ops ops;
static struct xp_ops2 ops2;
diff --git a/freebsd/lib/libc/rpc/svc_run.c b/freebsd/lib/libc/rpc/svc_run.c
index 85506c5c..4e6ed628 100644
--- a/freebsd/lib/libc/rpc/svc_run.c
+++ b/freebsd/lib/libc/rpc/svc_run.c
@@ -56,7 +56,7 @@ __FBSDID("$FreeBSD$");
#include "mt_misc.h"
void
-svc_run()
+svc_run(void)
{
fd_set readfds, cleanfds;
struct timeval timeout;
@@ -91,7 +91,7 @@ svc_run()
* more work to do.
*/
void
-svc_exit()
+svc_exit(void)
{
rwlock_wrlock(&svc_fd_lock);
FD_ZERO(&svc_fdset);
diff --git a/freebsd/lib/libc/rpc/svc_simple.c b/freebsd/lib/libc/rpc/svc_simple.c
index da3f1ebf..28da2898 100644
--- a/freebsd/lib/libc/rpc/svc_simple.c
+++ b/freebsd/lib/libc/rpc/svc_simple.c
@@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
/*
* This interface creates a virtual listener for all the services
- * started thru rpc_reg(). It listens on the same endpoint for
+ * started through rpc_reg(). It listens on the same endpoint for
* all the services and then executes the corresponding service
* for the given prognum and procnum.
*/
@@ -94,16 +94,18 @@ static const char __no_mem_str[] = "out of memory";
* is also limited by the recvsize for that transport, even if it is
* a COTS transport. This may be wrong, but for cases like these, they
* should not use the simplified interfaces like this.
+ *
+ * prognum - program number
+ * versnum - version number
+ * procnum - procedure number
+ * progname - Server routine
+ * inproc, outproc - in/out XDR procedures
+ * nettype - nettype
*/
-
int
-rpc_reg(prognum, versnum, procnum, progname, inproc, outproc, nettype)
- rpcprog_t prognum; /* program number */
- rpcvers_t versnum; /* version number */
- rpcproc_t procnum; /* procedure number */
- char *(*progname)(char *); /* Server routine */
- xdrproc_t inproc, outproc; /* in/out XDR procedures */
- char *nettype; /* nettype */
+rpc_reg(rpcprog_t prognum, rpcvers_t versnum, rpcproc_t procnum,
+ char *(*progname)(char *), xdrproc_t inproc, xdrproc_t outproc,
+ char *nettype)
{
struct netconfig *nconf;
int done = FALSE;
@@ -166,10 +168,8 @@ rpc_reg(prognum, versnum, procnum, progname, inproc, outproc, nettype)
if (((xdrbuf = malloc((unsigned)recvsz)) == NULL) ||
((netid = strdup(nconf->nc_netid)) == NULL)) {
warnx(rpc_reg_err, rpc_reg_msg, __no_mem_str);
- if (xdrbuf != NULL)
- free(xdrbuf);
- if (netid != NULL)
- free(netid);
+ free(xdrbuf);
+ free(netid);
SVC_DESTROY(svcxprt);
break;
}
@@ -231,7 +231,7 @@ rpc_reg(prognum, versnum, procnum, progname, inproc, outproc, nettype)
mutex_unlock(&proglst_lock);
if (done == FALSE) {
- warnx("%s cant find suitable transport for %s",
+ warnx("%s can't find suitable transport for %s",
rpc_reg_msg, nettype);
return (-1);
}
@@ -244,9 +244,7 @@ rpc_reg(prognum, versnum, procnum, progname, inproc, outproc, nettype)
*/
static void
-universal(rqstp, transp)
- struct svc_req *rqstp;
- SVCXPRT *transp;
+universal(struct svc_req *rqstp, SVCXPRT *transp)
{
rpcprog_t prog;
rpcvers_t vers;
@@ -276,7 +274,7 @@ universal(rqstp, transp)
/* decode arguments into a CLEAN buffer */
xdrbuf = pl->p_xdrbuf;
/* Zero the arguments: reqd ! */
- (void) memset(xdrbuf, 0, sizeof (pl->p_recvsz));
+ (void) memset(xdrbuf, 0, (size_t)pl->p_recvsz);
/*
* Assuming that sizeof (xdrbuf) would be enough
* for the arguments; if not then the program
diff --git a/freebsd/lib/libc/rpc/svc_vc.c b/freebsd/lib/libc/rpc/svc_vc.c
index 5b17ca4b..5b4c7bd3 100644
--- a/freebsd/lib/libc/rpc/svc_vc.c
+++ b/freebsd/lib/libc/rpc/svc_vc.c
@@ -47,7 +47,6 @@ __FBSDID("$FreeBSD$");
#include "namespace.h"
#include "reentrant.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/poll.h>
#include <sys/socket.h>
@@ -125,12 +124,9 @@ struct cf_conn { /* kept in xprt->xp_p1 for actual connection */
* 0 => use the system default.
*/
SVCXPRT *
-svc_vc_create(fd, sendsize, recvsize)
- int fd;
- u_int sendsize;
- u_int recvsize;
+svc_vc_create(int fd, u_int sendsize, u_int recvsize)
{
- SVCXPRT *xprt;
+ SVCXPRT *xprt = NULL;
struct cf_rendezvous *r = NULL;
struct __rpc_sockinfo si;
struct sockaddr_storage sslocal;
@@ -188,10 +184,7 @@ cleanup_svc_vc_create:
* descriptor as its first input.
*/
SVCXPRT *
-svc_fd_create(fd, sendsize, recvsize)
- int fd;
- u_int sendsize;
- u_int recvsize;
+svc_fd_create(int fd, u_int sendsize, u_int recvsize)
{
struct sockaddr_storage ss;
socklen_t slen;
@@ -245,10 +238,7 @@ freedata:
}
static SVCXPRT *
-makefd_xprt(fd, sendsize, recvsize)
- int fd;
- u_int sendsize;
- u_int recvsize;
+makefd_xprt(int fd, u_int sendsize, u_int recvsize)
{
SVCXPRT *xprt;
struct cf_conn *cd;
@@ -274,7 +264,7 @@ makefd_xprt(fd, sendsize, recvsize)
xprt, read_vc, write_vc);
xprt->xp_p1 = cd;
xprt->xp_verf.oa_base = cd->verf_body;
- svc_vc_ops(xprt); /* truely deals with calls */
+ svc_vc_ops(xprt); /* truly deals with calls */
xprt->xp_port = 0; /* this is a connection, not a rendezvouser */
xprt->xp_fd = fd;
if (__rpc_fd2sockinfo(fd, &si) && __rpc_sockinfo2netid(&si, &netid))
@@ -287,15 +277,13 @@ done:
/*ARGSUSED*/
static bool_t
-rendezvous_request(xprt, msg)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
+rendezvous_request(SVCXPRT *xprt, struct rpc_msg *msg)
{
int sock, flags;
struct cf_rendezvous *r;
struct cf_conn *cd;
- struct sockaddr_storage addr;
- socklen_t len;
+ struct sockaddr_storage addr, sslocal;
+ socklen_t len, slen;
struct __rpc_sockinfo si;
SVCXPRT *newxprt;
fd_set cleanfds;
@@ -360,6 +348,20 @@ again:
__xdrrec_setnonblock(&cd->xdrs, cd->maxrec);
} else
cd->nonblock = FALSE;
+ slen = sizeof(struct sockaddr_storage);
+ if(_getsockname(sock, (struct sockaddr *)(void *)&sslocal, &slen) < 0) {
+ warnx("svc_vc_create: could not retrieve local addr");
+ newxprt->xp_ltaddr.maxlen = newxprt->xp_ltaddr.len = 0;
+ } else {
+ newxprt->xp_ltaddr.maxlen = newxprt->xp_ltaddr.len = sslocal.ss_len;
+ newxprt->xp_ltaddr.buf = mem_alloc((size_t)sslocal.ss_len);
+ if (newxprt->xp_ltaddr.buf == NULL) {
+ warnx("svc_vc_create: no mem for local addr");
+ newxprt->xp_ltaddr.maxlen = newxprt->xp_ltaddr.len = 0;
+ } else {
+ memcpy(newxprt->xp_ltaddr.buf, &sslocal, (size_t)sslocal.ss_len);
+ }
+ }
gettimeofday(&cd->last_recv_time, NULL);
@@ -368,16 +370,14 @@ again:
/*ARGSUSED*/
static enum xprt_stat
-rendezvous_stat(xprt)
- SVCXPRT *xprt;
+rendezvous_stat(SVCXPRT *xprt)
{
return (XPRT_IDLE);
}
static void
-svc_vc_destroy(xprt)
- SVCXPRT *xprt;
+svc_vc_destroy(SVCXPRT *xprt)
{
assert(xprt != NULL);
@@ -386,8 +386,7 @@ svc_vc_destroy(xprt)
}
static void
-__svc_vc_dodestroy(xprt)
- SVCXPRT *xprt;
+__svc_vc_dodestroy(SVCXPRT *xprt)
{
struct cf_conn *cd;
struct cf_rendezvous *r;
@@ -410,28 +409,20 @@ __svc_vc_dodestroy(xprt)
mem_free(xprt->xp_rtaddr.buf, xprt->xp_rtaddr.maxlen);
if (xprt->xp_ltaddr.buf)
mem_free(xprt->xp_ltaddr.buf, xprt->xp_ltaddr.maxlen);
- if (xprt->xp_tp)
- free(xprt->xp_tp);
- if (xprt->xp_netid)
- free(xprt->xp_netid);
+ free(xprt->xp_tp);
+ free(xprt->xp_netid);
svc_xprt_free(xprt);
}
/*ARGSUSED*/
static bool_t
-svc_vc_control(xprt, rq, in)
- SVCXPRT *xprt;
- const u_int rq;
- void *in;
+svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
{
return (FALSE);
}
static bool_t
-svc_vc_rendezvous_control(xprt, rq, in)
- SVCXPRT *xprt;
- const u_int rq;
- void *in;
+svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
{
struct cf_rendezvous *cfp;
@@ -459,10 +450,7 @@ svc_vc_rendezvous_control(xprt, rq, in)
* fatal for the connection.
*/
static int
-read_vc(xprtp, buf, len)
- void *xprtp;
- void *buf;
- int len;
+read_vc(void *xprtp, void *buf, int len)
{
SVCXPRT *xprt;
int sock;
@@ -522,10 +510,7 @@ fatal_err:
* Any error is fatal and the connection is closed.
*/
static int
-write_vc(xprtp, buf, len)
- void *xprtp;
- void *buf;
- int len;
+write_vc(void *xprtp, void *buf, int len)
{
SVCXPRT *xprt;
int i, cnt;
@@ -569,8 +554,7 @@ write_vc(xprtp, buf, len)
}
static enum xprt_stat
-svc_vc_stat(xprt)
- SVCXPRT *xprt;
+svc_vc_stat(SVCXPRT *xprt)
{
struct cf_conn *cd;
@@ -586,9 +570,7 @@ svc_vc_stat(xprt)
}
static bool_t
-svc_vc_recv(xprt, msg)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
+svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg)
{
struct cf_conn *cd;
XDR *xdrs;
@@ -616,10 +598,7 @@ svc_vc_recv(xprt, msg)
}
static bool_t
-svc_vc_getargs(xprt, xdr_args, args_ptr)
- SVCXPRT *xprt;
- xdrproc_t xdr_args;
- void *args_ptr;
+svc_vc_getargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr)
{
struct cf_conn *cd;
@@ -630,10 +609,7 @@ svc_vc_getargs(xprt, xdr_args, args_ptr)
}
static bool_t
-svc_vc_freeargs(xprt, xdr_args, args_ptr)
- SVCXPRT *xprt;
- xdrproc_t xdr_args;
- void *args_ptr;
+svc_vc_freeargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr)
{
XDR *xdrs;
@@ -647,9 +623,7 @@ svc_vc_freeargs(xprt, xdr_args, args_ptr)
}
static bool_t
-svc_vc_reply(xprt, msg)
- SVCXPRT *xprt;
- struct rpc_msg *msg;
+svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg)
{
struct cf_conn *cd;
XDR *xdrs;
@@ -691,8 +665,7 @@ svc_vc_reply(xprt, msg)
}
static void
-svc_vc_ops(xprt)
- SVCXPRT *xprt;
+svc_vc_ops(SVCXPRT *xprt)
{
static struct xp_ops ops;
static struct xp_ops2 ops2;
@@ -715,8 +688,7 @@ svc_vc_ops(xprt)
}
static void
-svc_vc_rendezvous_ops(xprt)
- SVCXPRT *xprt;
+svc_vc_rendezvous_ops(SVCXPRT *xprt)
{
static struct xp_ops ops;
static struct xp_ops2 ops2;
@@ -730,7 +702,7 @@ svc_vc_rendezvous_ops(xprt)
ops.xp_reply =
(bool_t (*)(SVCXPRT *, struct rpc_msg *))abort;
ops.xp_freeargs =
- (bool_t (*)(SVCXPRT *, xdrproc_t, void *))abort,
+ (bool_t (*)(SVCXPRT *, xdrproc_t, void *))abort;
ops.xp_destroy = svc_vc_destroy;
ops2.xp_control = svc_vc_rendezvous_control;
}
diff --git a/freebsd/lib/libc/stdio/fgetln.c b/freebsd/lib/libc/stdio/fgetln.c
index 1a7b0514..7d9f6a53 100644
--- a/freebsd/lib/libc/stdio/fgetln.c
+++ b/freebsd/lib/libc/stdio/fgetln.c
@@ -15,7 +15,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -39,6 +39,8 @@ static char sccsid[] = "@(#)fgetln.c 8.2 (Berkeley) 1/2/94";
__FBSDID("$FreeBSD$");
#include "namespace.h"
+#include <errno.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -63,6 +65,10 @@ __slbexpand(FILE *fp, size_t newsize)
#endif
if (fp->_lb._size >= newsize)
return (0);
+ if (newsize > INT_MAX) {
+ errno = ENOMEM;
+ return (-1);
+ }
if ((p = realloc(fp->_lb._base, newsize)) == NULL)
return (-1);
fp->_lb._base = p;
@@ -119,7 +125,7 @@ fgetln(FILE *fp, size_t *lenp)
* As a bonus, though, we can leave off the __SMOD.
*
* OPTIMISTIC is length that we (optimistically) expect will
- * accomodate the `rest' of the string, on each trip through the
+ * accommodate the `rest' of the string, on each trip through the
* loop below.
*/
#define OPTIMISTIC 80
@@ -156,13 +162,14 @@ fgetln(FILE *fp, size_t *lenp)
}
*lenp = len;
#ifdef notdef
- fp->_lb._base[len] = 0;
+ fp->_lb._base[len] = '\0';
#endif
FUNLOCKFILE(fp);
return ((char *)fp->_lb._base);
error:
*lenp = 0; /* ??? */
+ fp->_flags |= __SERR;
FUNLOCKFILE(fp);
return (NULL); /* ??? */
}
diff --git a/freebsd/lib/libc/stdio/local.h b/freebsd/lib/libc/stdio/local.h
index a9994b5f..664ca3b3 100644
--- a/freebsd/lib/libc/stdio/local.h
+++ b/freebsd/lib/libc/stdio/local.h
@@ -56,8 +56,8 @@ extern int _ftello(FILE *, fpos_t *);
extern int _fseeko(FILE *, off_t, int, int);
extern int __fflush(FILE *fp);
extern void __fcloseall(void);
+extern wint_t __fgetwc_mbs(FILE *, mbstate_t *, int *, locale_t);
#ifndef __rtems__
-extern wint_t __fgetwc(FILE *, locale_t);
extern wint_t __fputwc(wchar_t, FILE *, locale_t);
#endif /* __rtems__ */
extern int __sflush(FILE *);
@@ -104,6 +104,13 @@ extern size_t __fread(void * __restrict buf, size_t size, size_t count,
FILE * __restrict fp);
extern int __sdidinit;
+static inline wint_t
+__fgetwc(FILE *fp, locale_t locale)
+{
+ int nread;
+
+ return (__fgetwc_mbs(fp, &fp->_mbstate, &nread, locale));
+}
/*
* Prepare the given FILE for writing, and return 0 iff it
diff --git a/freebsd/lib/libc/stdlib/strtoimax.c b/freebsd/lib/libc/stdlib/strtoimax.c
index d7103b3a..e8a4f455 100644
--- a/freebsd/lib/libc/stdlib/strtoimax.c
+++ b/freebsd/lib/libc/stdlib/strtoimax.c
@@ -17,7 +17,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/lib/libc/stdlib/strtonum.c b/freebsd/lib/libc/stdlib/strtonum.c
index c55cb7d7..528380b7 100644
--- a/freebsd/lib/libc/stdlib/strtonum.c
+++ b/freebsd/lib/libc/stdlib/strtonum.c
@@ -16,7 +16,7 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
- * $OpenBSD: strtonum.c,v 1.6 2004/08/03 19:38:01 millert Exp $
+ * $OpenBSD: strtonum.c,v 1.7 2013/04/17 18:40:58 tedu Exp $
*/
#include <sys/cdefs.h>
@@ -26,17 +26,17 @@ __FBSDID("$FreeBSD$");
#include <limits.h>
#include <stdlib.h>
-#define INVALID 1
-#define TOOSMALL 2
-#define TOOLARGE 3
+#define INVALID 1
+#define TOOSMALL 2
+#define TOOLARGE 3
long long
strtonum(const char *numstr, long long minval, long long maxval,
const char **errstrp)
{
long long ll = 0;
- char *ep;
int error = 0;
+ char *ep;
struct errval {
const char *errstr;
int err;
@@ -49,9 +49,9 @@ strtonum(const char *numstr, long long minval, long long maxval,
ev[0].err = errno;
errno = 0;
- if (minval > maxval)
+ if (minval > maxval) {
error = INVALID;
- else {
+ } else {
ll = strtoll(numstr, &ep, 10);
if (errno == EINVAL || numstr == ep || *ep != '\0')
error = INVALID;
diff --git a/freebsd/lib/libc/stdlib/strtoumax.c b/freebsd/lib/libc/stdlib/strtoumax.c
index 4098ce81..e16dbf47 100644
--- a/freebsd/lib/libc/stdlib/strtoumax.c
+++ b/freebsd/lib/libc/stdlib/strtoumax.c
@@ -17,7 +17,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/lib/libc/string/strsep.c b/freebsd/lib/libc/string/strsep.c
index e2560e95..221eda29 100644
--- a/freebsd/lib/libc/string/strsep.c
+++ b/freebsd/lib/libc/string/strsep.c
@@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/lib/libc/xdr/xdr.c b/freebsd/lib/libc/xdr/xdr.c
index 561f179b..c529bb95 100644
--- a/freebsd/lib/libc/xdr/xdr.c
+++ b/freebsd/lib/libc/xdr/xdr.c
@@ -2,33 +2,35 @@
/* $NetBSD: xdr.c,v 1.22 2000/07/06 03:10:35 christos Exp $ */
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
*
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
*
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -41,8 +43,6 @@ __FBSDID("$FreeBSD$");
/*
* xdr.c, Generic XDR routines implementation.
*
- * Copyright (C) 1986, Sun Microsystems, Inc.
- *
* These are the "generic" xdr routines used to serialize and de-serialize
* most common data items. See xdr.h for more info on the interface to
* xdr.
@@ -78,9 +78,7 @@ static const char xdr_zero[BYTES_PER_XDR_UNIT] = { 0, 0, 0, 0 };
* Not a filter, but a convenient utility nonetheless
*/
void
-xdr_free(proc, objp)
- xdrproc_t proc;
- void *objp;
+xdr_free(xdrproc_t proc, void *objp)
{
XDR x;
@@ -103,9 +101,7 @@ xdr_void(void)
* XDR integers
*/
bool_t
-xdr_int(xdrs, ip)
- XDR *xdrs;
- int *ip;
+xdr_int(XDR *xdrs, int *ip)
{
long l;
@@ -133,9 +129,7 @@ xdr_int(xdrs, ip)
* XDR unsigned integers
*/
bool_t
-xdr_u_int(xdrs, up)
- XDR *xdrs;
- u_int *up;
+xdr_u_int(XDR *xdrs, u_int *up)
{
u_long l;
@@ -165,9 +159,7 @@ xdr_u_int(xdrs, up)
* same as xdr_u_long - open coded to save a proc call!
*/
bool_t
-xdr_long(xdrs, lp)
- XDR *xdrs;
- long *lp;
+xdr_long(XDR *xdrs, long *lp)
{
switch (xdrs->x_op) {
case XDR_ENCODE:
@@ -186,9 +178,7 @@ xdr_long(xdrs, lp)
* same as xdr_long - open coded to save a proc call!
*/
bool_t
-xdr_u_long(xdrs, ulp)
- XDR *xdrs;
- u_long *ulp;
+xdr_u_long(XDR *xdrs, u_long *ulp)
{
switch (xdrs->x_op) {
case XDR_ENCODE:
@@ -208,9 +198,7 @@ xdr_u_long(xdrs, ulp)
* same as xdr_u_int32_t - open coded to save a proc call!
*/
bool_t
-xdr_int32_t(xdrs, int32_p)
- XDR *xdrs;
- int32_t *int32_p;
+xdr_int32_t(XDR *xdrs, int32_t *int32_p)
{
long l;
@@ -239,9 +227,7 @@ xdr_int32_t(xdrs, int32_p)
* same as xdr_int32_t - open coded to save a proc call!
*/
bool_t
-xdr_u_int32_t(xdrs, u_int32_p)
- XDR *xdrs;
- u_int32_t *u_int32_p;
+xdr_u_int32_t(XDR *xdrs, u_int32_t *u_int32_p)
{
u_long l;
@@ -270,9 +256,7 @@ xdr_u_int32_t(xdrs, u_int32_p)
* same as xdr_int32_t - open coded to save a proc call!
*/
bool_t
-xdr_uint32_t(xdrs, u_int32_p)
- XDR *xdrs;
- uint32_t *u_int32_p;
+xdr_uint32_t(XDR *xdrs, uint32_t *u_int32_p)
{
u_long l;
@@ -300,9 +284,7 @@ xdr_uint32_t(xdrs, u_int32_p)
* XDR short integers
*/
bool_t
-xdr_short(xdrs, sp)
- XDR *xdrs;
- short *sp;
+xdr_short(XDR *xdrs, short *sp)
{
long l;
@@ -330,9 +312,7 @@ xdr_short(xdrs, sp)
* XDR unsigned short integers
*/
bool_t
-xdr_u_short(xdrs, usp)
- XDR *xdrs;
- u_short *usp;
+xdr_u_short(XDR *xdrs, u_short *usp)
{
u_long l;
@@ -361,9 +341,7 @@ xdr_u_short(xdrs, usp)
* XDR 16-bit integers
*/
bool_t
-xdr_int16_t(xdrs, int16_p)
- XDR *xdrs;
- int16_t *int16_p;
+xdr_int16_t(XDR *xdrs, int16_t *int16_p)
{
long l;
@@ -391,9 +369,7 @@ xdr_int16_t(xdrs, int16_p)
* XDR unsigned 16-bit integers
*/
bool_t
-xdr_u_int16_t(xdrs, u_int16_p)
- XDR *xdrs;
- u_int16_t *u_int16_p;
+xdr_u_int16_t(XDR *xdrs, u_int16_t *u_int16_p)
{
u_long l;
@@ -421,9 +397,7 @@ xdr_u_int16_t(xdrs, u_int16_p)
* XDR unsigned 16-bit integers
*/
bool_t
-xdr_uint16_t(xdrs, u_int16_p)
- XDR *xdrs;
- uint16_t *u_int16_p;
+xdr_uint16_t(XDR *xdrs, uint16_t *u_int16_p)
{
u_long l;
@@ -452,9 +426,7 @@ xdr_uint16_t(xdrs, u_int16_p)
* XDR a char
*/
bool_t
-xdr_char(xdrs, cp)
- XDR *xdrs;
- char *cp;
+xdr_char(XDR *xdrs, char *cp)
{
int i;
@@ -470,9 +442,7 @@ xdr_char(xdrs, cp)
* XDR an unsigned char
*/
bool_t
-xdr_u_char(xdrs, cp)
- XDR *xdrs;
- u_char *cp;
+xdr_u_char(XDR *xdrs, u_char *cp)
{
u_int u;
@@ -488,9 +458,7 @@ xdr_u_char(xdrs, cp)
* XDR booleans
*/
bool_t
-xdr_bool(xdrs, bp)
- XDR *xdrs;
- bool_t *bp;
+xdr_bool(XDR *xdrs, bool_t *bp)
{
long lb;
@@ -518,9 +486,7 @@ xdr_bool(xdrs, bp)
* XDR enumerations
*/
bool_t
-xdr_enum(xdrs, ep)
- XDR *xdrs;
- enum_t *ep;
+xdr_enum(XDR *xdrs, enum_t *ep)
{
/*
* enums are treated as ints
@@ -542,10 +508,7 @@ xdr_enum(xdrs, ep)
* cp points to the opaque object and cnt gives the byte length.
*/
bool_t
-xdr_opaque(xdrs, cp, cnt)
- XDR *xdrs;
- caddr_t cp;
- u_int cnt;
+xdr_opaque(XDR *xdrs, caddr_t cp, u_int cnt)
{
u_int rndup;
static int crud[BYTES_PER_XDR_UNIT];
@@ -594,11 +557,7 @@ xdr_opaque(xdrs, cp, cnt)
* If *cpp is NULL maxsize bytes are allocated
*/
bool_t
-xdr_bytes(xdrs, cpp, sizep, maxsize)
- XDR *xdrs;
- char **cpp;
- u_int *sizep;
- u_int maxsize;
+xdr_bytes(XDR *xdrs, char **cpp, u_int *sizep, u_int maxsize)
{
char *sp = *cpp; /* sp is the actual string pointer */
u_int nodesize;
@@ -650,9 +609,7 @@ xdr_bytes(xdrs, cpp, sizep, maxsize)
* Implemented here due to commonality of the object.
*/
bool_t
-xdr_netobj(xdrs, np)
- XDR *xdrs;
- struct netobj *np;
+xdr_netobj(XDR *xdrs, struct netobj *np)
{
return (xdr_bytes(xdrs, &np->n_bytes, &np->n_len, MAX_NETOBJ_SZ));
@@ -670,12 +627,14 @@ xdr_netobj(xdrs, np)
* If there is no specific or default routine an error is returned.
*/
bool_t
-xdr_union(xdrs, dscmp, unp, choices, dfault)
- XDR *xdrs;
- enum_t *dscmp; /* enum to decide which arm to work on */
- char *unp; /* the union itself */
- const struct xdr_discrim *choices; /* [value, xdr proc] for each arm */
- xdrproc_t dfault; /* default xdr routine */
+xdr_union(XDR *xdrs, enum_t *dscmp, char *unp, const struct xdr_discrim *choices, xdrproc_t dfault)
+/*
+ * XDR *xdrs;
+ * enum_t *dscmp; // enum to decide which arm to work on
+ * char *unp; // the union itself
+ * const struct xdr_discrim *choices; // [value, xdr proc] for each arm
+ * xdrproc_t dfault; // default xdr routine
+ */
{
enum_t dscm;
@@ -719,10 +678,7 @@ xdr_union(xdrs, dscmp, unp, choices, dfault)
* of the string as specified by a protocol.
*/
bool_t
-xdr_string(xdrs, cpp, maxsize)
- XDR *xdrs;
- char **cpp;
- u_int maxsize;
+xdr_string(XDR *xdrs, char **cpp, u_int maxsize)
{
char *sp = *cpp; /* sp is the actual string pointer */
u_int size;
@@ -786,9 +742,7 @@ xdr_string(xdrs, cpp, maxsize)
* routines like clnt_call
*/
bool_t
-xdr_wrapstring(xdrs, cpp)
- XDR *xdrs;
- char **cpp;
+xdr_wrapstring(XDR *xdrs, char **cpp)
{
return xdr_string(xdrs, cpp, LASTUNSIGNED);
}
@@ -805,9 +759,7 @@ xdr_wrapstring(xdrs, cpp)
* XDR 64-bit integers
*/
bool_t
-xdr_int64_t(xdrs, llp)
- XDR *xdrs;
- int64_t *llp;
+xdr_int64_t(XDR *xdrs, int64_t *llp)
{
u_long ul[2];
@@ -838,9 +790,7 @@ xdr_int64_t(xdrs, llp)
* XDR unsigned 64-bit integers
*/
bool_t
-xdr_u_int64_t(xdrs, ullp)
- XDR *xdrs;
- u_int64_t *ullp;
+xdr_u_int64_t(XDR *xdrs, u_int64_t *ullp)
{
u_long ul[2];
@@ -870,9 +820,7 @@ xdr_u_int64_t(xdrs, ullp)
* XDR unsigned 64-bit integers
*/
bool_t
-xdr_uint64_t(xdrs, ullp)
- XDR *xdrs;
- uint64_t *ullp;
+xdr_uint64_t(XDR *xdrs, uint64_t *ullp)
{
u_long ul[2];
@@ -903,9 +851,7 @@ xdr_uint64_t(xdrs, ullp)
* XDR hypers
*/
bool_t
-xdr_hyper(xdrs, llp)
- XDR *xdrs;
- longlong_t *llp;
+xdr_hyper(XDR *xdrs, longlong_t *llp)
{
/*
@@ -920,9 +866,7 @@ xdr_hyper(xdrs, llp)
* XDR unsigned hypers
*/
bool_t
-xdr_u_hyper(xdrs, ullp)
- XDR *xdrs;
- u_longlong_t *ullp;
+xdr_u_hyper(XDR *xdrs, u_longlong_t *ullp)
{
/*
@@ -937,9 +881,7 @@ xdr_u_hyper(xdrs, ullp)
* XDR longlong_t's
*/
bool_t
-xdr_longlong_t(xdrs, llp)
- XDR *xdrs;
- longlong_t *llp;
+xdr_longlong_t(XDR *xdrs, longlong_t *llp)
{
/*
@@ -954,9 +896,7 @@ xdr_longlong_t(xdrs, llp)
* XDR u_longlong_t's
*/
bool_t
-xdr_u_longlong_t(xdrs, ullp)
- XDR *xdrs;
- u_longlong_t *ullp;
+xdr_u_longlong_t(XDR *xdrs, u_longlong_t *ullp)
{
/*
diff --git a/freebsd/lib/libc/xdr/xdr_array.c b/freebsd/lib/libc/xdr/xdr_array.c
index eb0eefa0..2b9fa580 100644
--- a/freebsd/lib/libc/xdr/xdr_array.c
+++ b/freebsd/lib/libc/xdr/xdr_array.c
@@ -2,33 +2,35 @@
/* $NetBSD: xdr_array.c,v 1.12 2000/01/22 22:19:18 mycroft Exp $ */
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
*
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
*
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -41,8 +43,6 @@ __FBSDID("$FreeBSD$");
/*
* xdr_array.c, Generic XDR routines impelmentation.
*
- * Copyright (C) 1984, Sun Microsystems, Inc.
- *
* These are the "non-trivial" xdr primitives used to serialize and de-serialize
* arrays. See xdr.h for more info on the interface to xdr.
*/
@@ -66,13 +66,15 @@ __FBSDID("$FreeBSD$");
* xdr procedure to call to handle each element of the array.
*/
bool_t
-xdr_array(xdrs, addrp, sizep, maxsize, elsize, elproc)
- XDR *xdrs;
- caddr_t *addrp; /* array pointer */
- u_int *sizep; /* number of elements */
- u_int maxsize; /* max numberof elements */
- u_int elsize; /* size in bytes of each element */
- xdrproc_t elproc; /* xdr routine to handle each element */
+xdr_array(XDR *xdrs, caddr_t *addrp, u_int *sizep, u_int maxsize, u_int elsize, xdrproc_t elproc)
+/*
+ * XDR *xdrs;
+ * caddr_t *addrp; // array pointer
+ * u_int *sizep; // number of elements
+ * u_int maxsize; // max numberof elements
+ * u_int elsize; // size in bytes of each element
+ * xdrproc_t elproc; // xdr routine to handle each element
+ */
{
u_int i;
caddr_t target = *addrp;
@@ -144,12 +146,7 @@ xdr_array(xdrs, addrp, sizep, maxsize, elsize, elproc)
* > xdr_elem: routine to XDR each element
*/
bool_t
-xdr_vector(xdrs, basep, nelem, elemsize, xdr_elem)
- XDR *xdrs;
- char *basep;
- u_int nelem;
- u_int elemsize;
- xdrproc_t xdr_elem;
+xdr_vector(XDR *xdrs, char *basep, u_int nelem, u_int elemsize, xdrproc_t xdr_elem)
{
u_int i;
char *elptr;
diff --git a/freebsd/lib/libc/xdr/xdr_float.c b/freebsd/lib/libc/xdr/xdr_float.c
index 4ff153b3..3c99f192 100644
--- a/freebsd/lib/libc/xdr/xdr_float.c
+++ b/freebsd/lib/libc/xdr/xdr_float.c
@@ -2,33 +2,35 @@
/* $NetBSD: xdr_float.c,v 1.23 2000/07/17 04:59:51 matt Exp $ */
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
*
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
*
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
- *
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -41,15 +43,12 @@ __FBSDID("$FreeBSD$");
/*
* xdr_float.c, Generic XDR routines implementation.
*
- * Copyright (C) 1984, Sun Microsystems, Inc.
- *
* These are the "floating point" xdr routines used to (de)serialize
* most common data items. See xdr.h for more info on the interface to
* xdr.
*/
#include "namespace.h"
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <stdio.h>
@@ -63,16 +62,8 @@ __FBSDID("$FreeBSD$");
* This routine works on machines with IEEE754 FP and Vaxen.
*/
-#if defined(__m68k__) || defined(__sparc__) || defined(__i386__) || \
- defined(__mips__) || defined(__ns32k__) || defined(__alpha__) || \
- defined(__arm__) || defined(__ppc__) || defined(__ia64__) || \
- defined(__arm26__) || defined(__sparc64__) || defined(__amd64__)
#include <machine/endian.h>
#define IEEEFP
-#else /* __rtems__ */
-#include <machine/endian.h>
-#define IEEEFP
-#endif
#if defined(__vax__)
@@ -106,15 +97,13 @@ static struct sgl_limits {
#endif /* vax */
bool_t
-xdr_float(xdrs, fp)
- XDR *xdrs;
- float *fp;
+xdr_float(XDR *xdrs, float *fp)
{
#ifndef IEEEFP
struct ieee_single is;
struct vax_single vs, *vsp;
struct sgl_limits *lim;
- int i;
+ u_int i;
#endif
switch (xdrs->x_op) {
@@ -123,9 +112,8 @@ xdr_float(xdrs, fp)
return (XDR_PUTINT32(xdrs, (int32_t *)fp));
#else
vs = *((struct vax_single *)fp);
- for (i = 0, lim = sgl_limits;
- i < sizeof(sgl_limits)/sizeof(struct sgl_limits);
- i++, lim++) {
+ for (i = 0, lim = sgl_limits; i < nitems(sgl_limits);
+ i++, lim++) {
if ((vs.mantissa2 == lim->s.mantissa2) &&
(vs.exp == lim->s.exp) &&
(vs.mantissa1 == lim->s.mantissa1)) {
@@ -147,9 +135,8 @@ xdr_float(xdrs, fp)
vsp = (struct vax_single *)fp;
if (!XDR_GETINT32(xdrs, (int32_t *)&is))
return (FALSE);
- for (i = 0, lim = sgl_limits;
- i < sizeof(sgl_limits)/sizeof(struct sgl_limits);
- i++, lim++) {
+ for (i = 0, lim = sgl_limits; i < nitems(sgl_limits);
+ i++, lim++) {
if ((is.exp == lim->ieee.exp) &&
(is.mantissa == lim->ieee.mantissa)) {
*vsp = lim->s;
@@ -208,9 +195,7 @@ static struct dbl_limits {
bool_t
-xdr_double(xdrs, dp)
- XDR *xdrs;
- double *dp;
+xdr_double(XDR *xdrs, double *dp)
{
#ifdef IEEEFP
int32_t *i32p;
@@ -220,7 +205,7 @@ xdr_double(xdrs, dp)
struct ieee_double id;
struct vax_double vd;
struct dbl_limits *lim;
- int i;
+ u_int i;
#endif
switch (xdrs->x_op) {
@@ -242,9 +227,8 @@ xdr_double(xdrs, dp)
return (rv);
#else
vd = *((struct vax_double *)dp);
- for (i = 0, lim = dbl_limits;
- i < sizeof(dbl_limits)/sizeof(struct dbl_limits);
- i++, lim++) {
+ for (i = 0, lim = dbl_limits; i < nitems(dbl_limits);
+ i++, lim++) {
if ((vd.mantissa4 == lim->d.mantissa4) &&
(vd.mantissa3 == lim->d.mantissa3) &&
(vd.mantissa2 == lim->d.mantissa2) &&
@@ -284,9 +268,8 @@ xdr_double(xdrs, dp)
lp = (int32_t *)&id;
if (!XDR_GETINT32(xdrs, lp++) || !XDR_GETINT32(xdrs, lp))
return (FALSE);
- for (i = 0, lim = dbl_limits;
- i < sizeof(dbl_limits)/sizeof(struct dbl_limits);
- i++, lim++) {
+ for (i = 0, lim = dbl_limits; i < nitems(dbl_limits);
+ i++, lim++) {
if ((id.mantissa2 == lim->ieee.mantissa2) &&
(id.mantissa1 == lim->ieee.mantissa1) &&
(id.exp == lim->ieee.exp)) {
diff --git a/freebsd/lib/libc/xdr/xdr_mem.c b/freebsd/lib/libc/xdr/xdr_mem.c
index c97e752f..c7dafbae 100644
--- a/freebsd/lib/libc/xdr/xdr_mem.c
+++ b/freebsd/lib/libc/xdr/xdr_mem.c
@@ -2,33 +2,35 @@
/* $NetBSD: xdr_mem.c,v 1.15 2000/01/22 22:19:18 mycroft Exp $ */
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
*
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
*
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
- *
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -41,8 +43,6 @@ __FBSDID("$FreeBSD$");
/*
* xdr_mem.h, XDR implementation using memory buffers.
*
- * Copyright (C) 1984, Sun Microsystems, Inc.
- *
* If you have some data to be interpreted as external data representation
* or to be converted to external data representation in a memory buffer,
* then this is the package for you.
@@ -100,11 +100,7 @@ static const struct xdr_ops xdrmem_ops_unaligned = {
* memory buffer.
*/
void
-xdrmem_create(xdrs, addr, size, op)
- XDR *xdrs;
- char *addr;
- u_int size;
- enum xdr_op op;
+xdrmem_create(XDR *xdrs, char *addr, u_int size, enum xdr_op op)
{
xdrs->x_op = op;
@@ -116,16 +112,13 @@ xdrmem_create(xdrs, addr, size, op)
/*ARGSUSED*/
static void
-xdrmem_destroy(xdrs)
- XDR *xdrs;
+xdrmem_destroy(XDR *xdrs)
{
}
static bool_t
-xdrmem_getlong_aligned(xdrs, lp)
- XDR *xdrs;
- long *lp;
+xdrmem_getlong_aligned(XDR *xdrs, long *lp)
{
if (xdrs->x_handy < sizeof(int32_t))
@@ -137,9 +130,7 @@ xdrmem_getlong_aligned(xdrs, lp)
}
static bool_t
-xdrmem_putlong_aligned(xdrs, lp)
- XDR *xdrs;
- const long *lp;
+xdrmem_putlong_aligned(XDR *xdrs, const long *lp)
{
if (xdrs->x_handy < sizeof(int32_t))
@@ -151,9 +142,7 @@ xdrmem_putlong_aligned(xdrs, lp)
}
static bool_t
-xdrmem_getlong_unaligned(xdrs, lp)
- XDR *xdrs;
- long *lp;
+xdrmem_getlong_unaligned(XDR *xdrs, long *lp)
{
u_int32_t l;
@@ -167,9 +156,7 @@ xdrmem_getlong_unaligned(xdrs, lp)
}
static bool_t
-xdrmem_putlong_unaligned(xdrs, lp)
- XDR *xdrs;
- const long *lp;
+xdrmem_putlong_unaligned(XDR *xdrs, const long *lp)
{
u_int32_t l;
@@ -183,10 +170,7 @@ xdrmem_putlong_unaligned(xdrs, lp)
}
static bool_t
-xdrmem_getbytes(xdrs, addr, len)
- XDR *xdrs;
- char *addr;
- u_int len;
+xdrmem_getbytes(XDR *xdrs, char *addr, u_int len)
{
if (xdrs->x_handy < len)
@@ -198,10 +182,7 @@ xdrmem_getbytes(xdrs, addr, len)
}
static bool_t
-xdrmem_putbytes(xdrs, addr, len)
- XDR *xdrs;
- const char *addr;
- u_int len;
+xdrmem_putbytes(XDR *xdrs, const char *addr, u_int len)
{
if (xdrs->x_handy < len)
@@ -213,8 +194,7 @@ xdrmem_putbytes(xdrs, addr, len)
}
static u_int
-xdrmem_getpos(xdrs)
- XDR *xdrs;
+xdrmem_getpos(XDR *xdrs)
{
/* XXX w/64-bit pointers, u_int not enough! */
@@ -222,9 +202,7 @@ xdrmem_getpos(xdrs)
}
static bool_t
-xdrmem_setpos(xdrs, pos)
- XDR *xdrs;
- u_int pos;
+xdrmem_setpos(XDR *xdrs, u_int pos)
{
char *newaddr = xdrs->x_base + pos;
char *lastaddr = (char *)xdrs->x_private + xdrs->x_handy;
@@ -237,11 +215,9 @@ xdrmem_setpos(xdrs, pos)
}
static int32_t *
-xdrmem_inline_aligned(xdrs, len)
- XDR *xdrs;
- u_int len;
+xdrmem_inline_aligned(XDR *xdrs, u_int len)
{
- int32_t *buf = 0;
+ int32_t *buf = NULL;
if (xdrs->x_handy >= len) {
xdrs->x_handy -= len;
@@ -253,9 +229,7 @@ xdrmem_inline_aligned(xdrs, len)
/* ARGSUSED */
static int32_t *
-xdrmem_inline_unaligned(xdrs, len)
- XDR *xdrs;
- u_int len;
+xdrmem_inline_unaligned(XDR *xdrs, u_int len)
{
return (0);
diff --git a/freebsd/lib/libc/xdr/xdr_rec.c b/freebsd/lib/libc/xdr/xdr_rec.c
index a204b44b..a7eb890e 100644
--- a/freebsd/lib/libc/xdr/xdr_rec.c
+++ b/freebsd/lib/libc/xdr/xdr_rec.c
@@ -2,33 +2,35 @@
/* $NetBSD: xdr_rec.c,v 1.18 2000/07/06 03:10:35 christos Exp $ */
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
*
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
*
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -42,8 +44,6 @@ __FBSDID("$FreeBSD$");
* xdr_rec.c, Implements TCP/IP based XDR streams with a "record marking"
* layer above tcp (for rpc's use).
*
- * Copyright (C) 1984, Sun Microsystems, Inc.
- *
* These routines interface XDRSTREAMS to a tcp/ip connection.
* There is a record marking layer between the xdr stream
* and the tcp transport level. A record is composed on one or more
@@ -108,7 +108,7 @@ static const struct xdr_ops xdrrec_ops = {
* meet the needs of xdr and rpc based on tcp.
*/
-#define LAST_FRAG ((u_int32_t)(1 << 31))
+#define LAST_FRAG ((u_int32_t)(1U << 31))
typedef struct rec_strm {
char *tcp_handle;
@@ -163,15 +163,18 @@ static bool_t realloc_stream(RECSTREAM *, int);
* calls expect that they take an opaque handle rather than an fd.
*/
void
-xdrrec_create(xdrs, sendsize, recvsize, tcp_handle, readit, writeit)
- XDR *xdrs;
- u_int sendsize;
- u_int recvsize;
- void *tcp_handle;
- /* like read, but pass it a tcp_handle, not sock */
- int (*readit)(void *, void *, int);
- /* like write, but pass it a tcp_handle, not sock */
- int (*writeit)(void *, void *, int);
+xdrrec_create(XDR *xdrs, u_int sendsize, u_int recvsize, void *tcp_handle,
+ int (*readit)(void *, void *, int), int (*writeit)(void *, void *, int))
+/*
+ * XDR *xdrs;
+ * u_int sendsize;
+ * u_int recvsize;
+ * void *tcp_handle;
+ * // like read, but pass it a tcp_handle, not sock
+ * int (*readit)(void *, void *, int);
+ * // like write, but pass it a tcp_handle, not sock
+ * int (*writeit)(void *, void *, int);
+ */
{
RECSTREAM *rstrm = mem_alloc(sizeof(RECSTREAM));
@@ -231,9 +234,7 @@ xdrrec_create(xdrs, sendsize, recvsize, tcp_handle, readit, writeit)
*/
static bool_t
-xdrrec_getlong(xdrs, lp)
- XDR *xdrs;
- long *lp;
+xdrrec_getlong(XDR *xdrs, long *lp)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
int32_t *buflp = (int32_t *)(void *)(rstrm->in_finger);
@@ -255,9 +256,7 @@ xdrrec_getlong(xdrs, lp)
}
static bool_t
-xdrrec_putlong(xdrs, lp)
- XDR *xdrs;
- const long *lp;
+xdrrec_putlong(XDR *xdrs, const long *lp)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
int32_t *dest_lp = ((int32_t *)(void *)(rstrm->out_finger));
@@ -279,10 +278,7 @@ xdrrec_putlong(xdrs, lp)
}
static bool_t /* must manage buffers, fragments, and records */
-xdrrec_getbytes(xdrs, addr, len)
- XDR *xdrs;
- char *addr;
- u_int len;
+xdrrec_getbytes(XDR *xdrs, char *addr, u_int len)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
int current;
@@ -307,10 +303,7 @@ xdrrec_getbytes(xdrs, addr, len)
}
static bool_t
-xdrrec_putbytes(xdrs, addr, len)
- XDR *xdrs;
- const char *addr;
- u_int len;
+xdrrec_putbytes(XDR *xdrs, const char *addr, u_int len)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
size_t current;
@@ -333,8 +326,7 @@ xdrrec_putbytes(xdrs, addr, len)
}
static u_int
-xdrrec_getpos(xdrs)
- XDR *xdrs;
+xdrrec_getpos(XDR *xdrs)
{
RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
off_t pos;
@@ -360,9 +352,7 @@ xdrrec_getpos(xdrs)
}
static bool_t
-xdrrec_setpos(xdrs, pos)
- XDR *xdrs;
- u_int pos;
+xdrrec_setpos(XDR *xdrs, u_int pos)
{
RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
u_int currpos = xdrrec_getpos(xdrs);
@@ -399,9 +389,7 @@ xdrrec_setpos(xdrs, pos)
}
static int32_t *
-xdrrec_inline(xdrs, len)
- XDR *xdrs;
- u_int len;
+xdrrec_inline(XDR *xdrs, u_int len)
{
RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
int32_t *buf = NULL;
@@ -431,8 +419,7 @@ xdrrec_inline(xdrs, len)
}
static void
-xdrrec_destroy(xdrs)
- XDR *xdrs;
+xdrrec_destroy(XDR *xdrs)
{
RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private;
@@ -451,8 +438,7 @@ xdrrec_destroy(xdrs)
* this procedure to guarantee proper record alignment.
*/
bool_t
-xdrrec_skiprecord(xdrs)
- XDR *xdrs;
+xdrrec_skiprecord(XDR *xdrs)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
enum xprt_stat xstat;
@@ -487,8 +473,7 @@ xdrrec_skiprecord(xdrs)
* after consuming the rest of the current record.
*/
bool_t
-xdrrec_eof(xdrs)
- XDR *xdrs;
+xdrrec_eof(XDR *xdrs)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
@@ -511,9 +496,7 @@ xdrrec_eof(xdrs)
* pipelined procedure calls.) TRUE => immmediate flush to tcp connection.
*/
bool_t
-xdrrec_endofrecord(xdrs, sendnow)
- XDR *xdrs;
- int sendnow;
+xdrrec_endofrecord(XDR *xdrs, bool_t sendnow)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
u_long len; /* fragment length */
@@ -537,10 +520,7 @@ xdrrec_endofrecord(xdrs, sendnow)
* Return true if a record is available in the buffer, false if not.
*/
bool_t
-__xdrrec_getrec(xdrs, statp, expectdata)
- XDR *xdrs;
- enum xprt_stat *statp;
- bool_t expectdata;
+__xdrrec_getrec(XDR *xdrs, enum xprt_stat *statp, bool_t expectdata)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
ssize_t n;
@@ -620,9 +600,7 @@ __xdrrec_getrec(xdrs, statp, expectdata)
}
bool_t
-__xdrrec_setnonblock(xdrs, maxrec)
- XDR *xdrs;
- int maxrec;
+__xdrrec_setnonblock(XDR *xdrs, int maxrec)
{
RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private);
@@ -637,9 +615,7 @@ __xdrrec_setnonblock(xdrs, maxrec)
* Internal useful routines
*/
static bool_t
-flush_out(rstrm, eor)
- RECSTREAM *rstrm;
- bool_t eor;
+flush_out(RECSTREAM *rstrm, bool_t eor)
{
u_int32_t eormask = (eor == TRUE) ? LAST_FRAG : 0;
u_int32_t len = (u_int32_t)((u_long)(rstrm->out_finger) -
@@ -657,8 +633,7 @@ flush_out(rstrm, eor)
}
static bool_t /* knows nothing about records! Only about input buffers */
-fill_input_buf(rstrm)
- RECSTREAM *rstrm;
+fill_input_buf(RECSTREAM *rstrm)
{
char *where;
u_int32_t i;
@@ -680,10 +655,7 @@ fill_input_buf(rstrm)
}
static bool_t /* knows nothing about records! Only about input buffers */
-get_input_bytes(rstrm, addr, len)
- RECSTREAM *rstrm;
- char *addr;
- int len;
+get_input_bytes(RECSTREAM *rstrm, char *addr, int len)
{
size_t current;
@@ -713,8 +685,7 @@ get_input_bytes(rstrm, addr, len)
}
static bool_t /* next two bytes of the input stream are treated as a header */
-set_input_fragment(rstrm)
- RECSTREAM *rstrm;
+set_input_fragment(RECSTREAM *rstrm)
{
u_int32_t header;
@@ -739,9 +710,7 @@ set_input_fragment(rstrm)
}
static bool_t /* consumes input bytes; knows nothing about records! */
-skip_input_bytes(rstrm, cnt)
- RECSTREAM *rstrm;
- long cnt;
+skip_input_bytes(RECSTREAM *rstrm, long cnt)
{
u_int32_t current;
@@ -761,8 +730,7 @@ skip_input_bytes(rstrm, cnt)
}
static u_int
-fix_buf_size(s)
- u_int s;
+fix_buf_size(u_int s)
{
if (s < 100)
@@ -774,9 +742,7 @@ fix_buf_size(s)
* Reallocate the input buffer for a non-block stream.
*/
static bool_t
-realloc_stream(rstrm, size)
- RECSTREAM *rstrm;
- int size;
+realloc_stream(RECSTREAM *rstrm, int size)
{
ptrdiff_t diff;
char *buf;
diff --git a/freebsd/lib/libc/xdr/xdr_reference.c b/freebsd/lib/libc/xdr/xdr_reference.c
index 44615daa..1c9dc5e5 100644
--- a/freebsd/lib/libc/xdr/xdr_reference.c
+++ b/freebsd/lib/libc/xdr/xdr_reference.c
@@ -2,33 +2,35 @@
/* $NetBSD: xdr_reference.c,v 1.13 2000/01/22 22:19:18 mycroft Exp $ */
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
*
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
*
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -41,8 +43,6 @@ __FBSDID("$FreeBSD$");
/*
* xdr_reference.c, Generic XDR routines impelmentation.
*
- * Copyright (C) 1987, Sun Microsystems, Inc.
- *
* These are the "non-trivial" xdr primitives used to serialize and de-serialize
* "pointers". See xdr.h for more info on the interface to xdr.
*/
@@ -67,11 +67,13 @@ __FBSDID("$FreeBSD$");
* proc is the routine to handle the referenced structure.
*/
bool_t
-xdr_reference(xdrs, pp, size, proc)
- XDR *xdrs;
- caddr_t *pp; /* the pointer to work on */
- u_int size; /* size of the object pointed to */
- xdrproc_t proc; /* xdr routine to handle the object */
+xdr_reference(XDR *xdrs, caddr_t *pp, u_int size, xdrproc_t proc)
+/*
+ * XDR *xdrs;
+ * caddr_t *pp; // the pointer to work on
+ * u_int size; // size of the object pointed to
+ * xdrproc_t proc; // xdr routine to handle the object
+ */
{
caddr_t loc = *pp;
bool_t stat;
@@ -124,11 +126,7 @@ xdr_reference(xdrs, pp, size, proc)
*
*/
bool_t
-xdr_pointer(xdrs,objpp,obj_size,xdr_obj)
- XDR *xdrs;
- char **objpp;
- u_int obj_size;
- xdrproc_t xdr_obj;
+xdr_pointer(XDR *xdrs, char **objpp, u_int obj_size, xdrproc_t xdr_obj)
{
bool_t more_data;
diff --git a/freebsd/lib/libc/xdr/xdr_sizeof.c b/freebsd/lib/libc/xdr/xdr_sizeof.c
index dc401400..4e669cbe 100644
--- a/freebsd/lib/libc/xdr/xdr_sizeof.c
+++ b/freebsd/lib/libc/xdr/xdr_sizeof.c
@@ -1,38 +1,38 @@
#include <machine/rtems-bsd-user-space.h>
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
- *
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
- *
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
- *
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* xdr_sizeof.c
*
- * Copyright 1990 Sun Microsystems, Inc.
- *
* General purpose routine to see how much space something will use
* when serialized using XDR.
*/
@@ -49,9 +49,7 @@ __FBSDID("$FreeBSD$");
/* ARGSUSED */
static bool_t
-x_putlong(xdrs, longp)
- XDR *xdrs;
- long *longp;
+x_putlong(XDR *xdrs, const long *longp)
{
xdrs->x_handy += BYTES_PER_XDR_UNIT;
return (TRUE);
@@ -59,36 +57,28 @@ x_putlong(xdrs, longp)
/* ARGSUSED */
static bool_t
-x_putbytes(xdrs, bp, len)
- XDR *xdrs;
- char *bp;
- u_int len;
+x_putbytes(XDR *xdrs, const char *bp, u_int len)
{
xdrs->x_handy += len;
return (TRUE);
}
static u_int
-x_getpostn(xdrs)
- XDR *xdrs;
+x_getpostn(XDR *xdrs)
{
return (xdrs->x_handy);
}
/* ARGSUSED */
static bool_t
-x_setpostn(xdrs, pos)
- XDR *xdrs;
- u_int pos;
+x_setpostn(XDR *xdrs, u_int pos)
{
/* This is not allowed */
return (FALSE);
}
static int32_t *
-x_inline(xdrs, len)
- XDR *xdrs;
- u_int len;
+x_inline(XDR *xdrs, u_int len)
{
if (len == 0) {
return (NULL);
@@ -115,15 +105,14 @@ x_inline(xdrs, len)
}
static int
-harmless()
+harmless(void)
{
/* Always return FALSE/NULL, as the case may be */
return (0);
}
static void
-x_destroy(xdrs)
- XDR *xdrs;
+x_destroy(XDR *xdrs)
{
xdrs->x_handy = 0;
xdrs->x_base = 0;
@@ -135,9 +124,7 @@ x_destroy(xdrs)
}
unsigned long
-xdr_sizeof(func, data)
- xdrproc_t func;
- void *data;
+xdr_sizeof(xdrproc_t func, void *data)
{
XDR x;
struct xdr_ops ops;
diff --git a/freebsd/lib/libc/xdr/xdr_stdio.c b/freebsd/lib/libc/xdr/xdr_stdio.c
index 5267c604..3bbdb464 100644
--- a/freebsd/lib/libc/xdr/xdr_stdio.c
+++ b/freebsd/lib/libc/xdr/xdr_stdio.c
@@ -2,33 +2,35 @@
/* $NetBSD: xdr_stdio.c,v 1.14 2000/01/22 22:19:19 mycroft Exp $ */
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
+/*-
+ * Copyright (c) 2010, Oracle America, Inc.
*
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
*
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ * * Neither the name of the "Oracle America, Inc." nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
- *
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -41,8 +43,6 @@ __FBSDID("$FreeBSD$");
/*
* xdr_stdio.c, XDR implementation on standard i/o file.
*
- * Copyright (C) 1984, Sun Microsystems, Inc.
- *
* This set of routines implements a XDR on a stdio stream.
* XDR_ENCODE serializes onto the stream, XDR_DECODE de-serializes
* from the stream.
@@ -85,10 +85,7 @@ static const struct xdr_ops xdrstdio_ops = {
* Operation flag is set to op.
*/
void
-xdrstdio_create(xdrs, file, op)
- XDR *xdrs;
- FILE *file;
- enum xdr_op op;
+xdrstdio_create(XDR *xdrs, FILE *file, enum xdr_op op)
{
xdrs->x_op = op;
@@ -103,17 +100,14 @@ xdrstdio_create(xdrs, file, op)
* Cleans up the xdr stream handle xdrs previously set up by xdrstdio_create.
*/
static void
-xdrstdio_destroy(xdrs)
- XDR *xdrs;
+xdrstdio_destroy(XDR *xdrs)
{
(void)fflush((FILE *)xdrs->x_private);
/* XXX: should we close the file ?? */
}
static bool_t
-xdrstdio_getlong(xdrs, lp)
- XDR *xdrs;
- long *lp;
+xdrstdio_getlong(XDR *xdrs, long *lp)
{
u_int32_t temp;
@@ -124,9 +118,7 @@ xdrstdio_getlong(xdrs, lp)
}
static bool_t
-xdrstdio_putlong(xdrs, lp)
- XDR *xdrs;
- const long *lp;
+xdrstdio_putlong(XDR *xdrs, const long *lp)
{
int32_t mycopy = htonl((u_int32_t)*lp);
@@ -136,10 +128,7 @@ xdrstdio_putlong(xdrs, lp)
}
static bool_t
-xdrstdio_getbytes(xdrs, addr, len)
- XDR *xdrs;
- char *addr;
- u_int len;
+xdrstdio_getbytes(XDR *xdrs, char *addr, u_int len)
{
if ((len != 0) && (fread(addr, (size_t)len, 1, (FILE *)xdrs->x_private) != 1))
@@ -148,10 +137,7 @@ xdrstdio_getbytes(xdrs, addr, len)
}
static bool_t
-xdrstdio_putbytes(xdrs, addr, len)
- XDR *xdrs;
- const char *addr;
- u_int len;
+xdrstdio_putbytes(XDR *xdrs, const char *addr, u_int len)
{
if ((len != 0) && (fwrite(addr, (size_t)len, 1,
@@ -161,17 +147,14 @@ xdrstdio_putbytes(xdrs, addr, len)
}
static u_int
-xdrstdio_getpos(xdrs)
- XDR *xdrs;
+xdrstdio_getpos(XDR *xdrs)
{
return ((u_int) ftell((FILE *)xdrs->x_private));
}
static bool_t
-xdrstdio_setpos(xdrs, pos)
- XDR *xdrs;
- u_int pos;
+xdrstdio_setpos(XDR *xdrs, u_int pos)
{
return ((fseek((FILE *)xdrs->x_private, (long)pos, 0) < 0) ?
@@ -180,9 +163,7 @@ xdrstdio_setpos(xdrs, pos)
/* ARGSUSED */
static int32_t *
-xdrstdio_inline(xdrs, len)
- XDR *xdrs;
- u_int len;
+xdrstdio_inline(XDR *xdrs, u_int len)
{
/*
diff --git a/freebsd/lib/libipsec/pfkey.c b/freebsd/lib/libipsec/pfkey.c
index c8ebfbf5..abb0cee1 100644
--- a/freebsd/lib/libipsec/pfkey.c
+++ b/freebsd/lib/libipsec/pfkey.c
@@ -1647,7 +1647,7 @@ pfkey_recv(so)
/* read real message */
reallen = PFKEY_UNUNIT64(buf.sadb_msg_len);
- if ((newmsg = CALLOC(reallen, struct sadb_msg *)) == 0) {
+ if ((newmsg = CALLOC(reallen, struct sadb_msg *)) == NULL) {
__ipsec_set_strerror(strerror(errno));
return NULL;
}
diff --git a/freebsd/lib/libipsec/pfkey_dump.c b/freebsd/lib/libipsec/pfkey_dump.c
index 8c4d5213..7a5c2f21 100644
--- a/freebsd/lib/libipsec/pfkey_dump.c
+++ b/freebsd/lib/libipsec/pfkey_dump.c
@@ -189,6 +189,9 @@ static struct val2str str_alg_enc[] = {
#ifdef SADB_X_EALG_AESCTR
{ SADB_X_EALG_AESCTR, "aes-ctr", },
#endif
+#ifdef SADB_X_EALG_AESGCM16
+ { SADB_X_EALG_AESGCM16, "aes-gcm-16", },
+#endif
#ifdef SADB_X_EALG_CAMELLIACBC
{ SADB_X_EALG_CAMELLIACBC, "camellia-cbc", },
#endif
diff --git a/freebsd/lib/libipsec/policy_parse.y b/freebsd/lib/libipsec/policy_parse.y
index 9e2f979c..46e54e55 100644
--- a/freebsd/lib/libipsec/policy_parse.y
+++ b/freebsd/lib/libipsec/policy_parse.y
@@ -88,7 +88,6 @@ static caddr_t policy_parse(char *msg, int msglen);
extern void __policy__strbuffer__init__(char *msg);
extern void __policy__strbuffer__free__(void);
-extern int yyparse(void);
extern int yylex(void);
extern char *__libipsecyytext; /*XXX*/
diff --git a/freebsd/lib/libkvm/kvm.h b/freebsd/lib/libkvm/kvm.h
index 912f1d4b..8b661807 100644
--- a/freebsd/lib/libkvm/kvm.h
+++ b/freebsd/lib/libkvm/kvm.h
@@ -34,7 +34,7 @@
#define _KVM_H_
#include <sys/cdefs.h>
-#include <sys/_types.h>
+#include <sys/types.h>
#include <nlist.h>
/* Default version symbol. */
@@ -51,6 +51,14 @@ typedef __ssize_t ssize_t;
#define _SSIZE_T_DECLARED
#endif
+typedef uint64_t kvaddr_t; /* An address in a target image. */
+
+struct kvm_nlist {
+ const char *n_name;
+ unsigned char n_type;
+ kvaddr_t n_value;
+};
+
typedef struct __kvm kvm_t;
struct kinfo_proc;
@@ -74,21 +82,27 @@ char **kvm_getargv(kvm_t *, const struct kinfo_proc *, int);
int kvm_getcptime(kvm_t *, long *);
char **kvm_getenvv(kvm_t *, const struct kinfo_proc *, int);
char *kvm_geterr(kvm_t *);
-char *kvm_getfiles(kvm_t *, int, int, int *);
int kvm_getloadavg(kvm_t *, double [], int);
int kvm_getmaxcpu(kvm_t *);
+int kvm_getncpus(kvm_t *);
void *kvm_getpcpu(kvm_t *, int);
+uint64_t kvm_counter_u64_fetch(kvm_t *, u_long);
struct kinfo_proc *
kvm_getprocs(kvm_t *, int, int, int *);
int kvm_getswapinfo(kvm_t *, struct kvm_swap *, int, int);
+int kvm_native(kvm_t *);
int kvm_nlist(kvm_t *, struct nlist *);
+int kvm_nlist2(kvm_t *, struct kvm_nlist *);
kvm_t *kvm_open
(const char *, const char *, const char *, int, const char *);
kvm_t *kvm_openfiles
(const char *, const char *, const char *, int, char *);
+kvm_t *kvm_open2
+ (const char *, const char *, int, char *,
+ int (*)(const char *, kvaddr_t *));
ssize_t kvm_read(kvm_t *, unsigned long, void *, size_t);
-ssize_t kvm_uread
- (kvm_t *, const struct kinfo_proc *, unsigned long, char *, size_t);
+ssize_t kvm_read_zpcpu(kvm_t *, unsigned long, void *, size_t, int);
+ssize_t kvm_read2(kvm_t *, kvaddr_t, void *, size_t);
ssize_t kvm_write(kvm_t *, unsigned long, const void *, size_t);
__END_DECLS
diff --git a/freebsd/lib/libmemstat/memstat.c b/freebsd/lib/libmemstat/memstat.c
index 536b47fb..e7e7d53f 100644
--- a/freebsd/lib/libmemstat/memstat.c
+++ b/freebsd/lib/libmemstat/memstat.c
@@ -256,6 +256,13 @@ memstat_get_size(const struct memory_type *mtp)
}
uint64_t
+memstat_get_rsize(const struct memory_type *mtp)
+{
+
+ return (mtp->mt_rsize);
+}
+
+uint64_t
memstat_get_memalloced(const struct memory_type *mtp)
{
diff --git a/freebsd/lib/libmemstat/memstat.h b/freebsd/lib/libmemstat/memstat.h
index cca75b32..8394dc1c 100644
--- a/freebsd/lib/libmemstat/memstat.h
+++ b/freebsd/lib/libmemstat/memstat.h
@@ -124,6 +124,7 @@ uint64_t memstat_get_countlimit(const struct memory_type *mtp);
uint64_t memstat_get_byteslimit(const struct memory_type *mtp);
uint64_t memstat_get_sizemask(const struct memory_type *mtp);
uint64_t memstat_get_size(const struct memory_type *mtp);
+uint64_t memstat_get_rsize(const struct memory_type *mtp);
uint64_t memstat_get_memalloced(const struct memory_type *mtp);
uint64_t memstat_get_memfreed(const struct memory_type *mtp);
uint64_t memstat_get_numallocs(const struct memory_type *mtp);
diff --git a/freebsd/lib/libmemstat/memstat_internal.h b/freebsd/lib/libmemstat/memstat_internal.h
index 2416e09b..9fdc2281 100644
--- a/freebsd/lib/libmemstat/memstat_internal.h
+++ b/freebsd/lib/libmemstat/memstat_internal.h
@@ -51,6 +51,7 @@ struct memory_type {
uint64_t mt_byteslimit; /* 0, or maximum bytes. */
uint64_t mt_sizemask; /* malloc: allocated size bitmask. */
uint64_t mt_size; /* uma: size of objects. */
+ uint64_t mt_rsize; /* uma: real size of objects. */
/*
* Zone or type information that includes all caches and any central
diff --git a/freebsd/lib/libmemstat/memstat_uma.c b/freebsd/lib/libmemstat/memstat_uma.c
index 1be0ada7..9c5e4024 100644
--- a/freebsd/lib/libmemstat/memstat_uma.c
+++ b/freebsd/lib/libmemstat/memstat_uma.c
@@ -214,6 +214,7 @@ retry:
}
mtp->mt_size = uthp->uth_size;
+ mtp->mt_rsize = uthp->uth_rsize;
mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size;
mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size;
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
@@ -438,6 +439,7 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
}
skip_percpu:
mtp->mt_size = kz.uk_size;
+ mtp->mt_rsize = kz.uk_rsize;
mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size;
mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size;
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
@@ -449,7 +451,7 @@ skip_percpu:
kz.uk_ipers;
mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
- for (ubp = LIST_FIRST(&uz.uz_full_bucket); ubp !=
+ for (ubp = LIST_FIRST(&uz.uz_buckets); ubp !=
NULL; ubp = LIST_NEXT(&ub, ub_link)) {
ret = kread(kvm, ubp, &ub, sizeof(ub), 0);
mtp->mt_zonefree += ub.ub_cnt;
diff --git a/freebsd/lib/libutil/expand_number.c b/freebsd/lib/libutil/expand_number.c
index 893a1d77..b004f5fc 100644
--- a/freebsd/lib/libutil/expand_number.c
+++ b/freebsd/lib/libutil/expand_number.c
@@ -37,31 +37,24 @@ __FBSDID("$FreeBSD$");
#include <libutil.h>
#include <stdint.h>
-/*
- * Convert an expression of the following forms to a uint64_t.
- * 1) A positive decimal number.
- * 2) A positive decimal number followed by a 'b' or 'B' (mult by 1).
- * 3) A positive decimal number followed by a 'k' or 'K' (mult by 1 << 10).
- * 4) A positive decimal number followed by a 'm' or 'M' (mult by 1 << 20).
- * 5) A positive decimal number followed by a 'g' or 'G' (mult by 1 << 30).
- * 6) A positive decimal number followed by a 't' or 'T' (mult by 1 << 40).
- * 7) A positive decimal number followed by a 'p' or 'P' (mult by 1 << 50).
- * 8) A positive decimal number followed by a 'e' or 'E' (mult by 1 << 60).
- */
int
expand_number(const char *buf, uint64_t *num)
{
+ char *endptr;
+ uintmax_t umaxval;
uint64_t number;
unsigned shift;
- char *endptr;
-
- number = strtoumax(buf, &endptr, 0);
+ int serrno;
- if (endptr == buf) {
- /* No valid digits. */
- errno = EINVAL;
+ serrno = errno;
+ errno = 0;
+ umaxval = strtoumax(buf, &endptr, 0);
+ if (umaxval > UINT64_MAX)
+ errno = ERANGE;
+ if (errno != 0)
return (-1);
- }
+ errno = serrno;
+ number = umaxval;
switch (tolower((unsigned char)*endptr)) {
case 'e':
@@ -97,7 +90,6 @@ expand_number(const char *buf, uint64_t *num)
errno = ERANGE;
return (-1);
}
-
*num = number << shift;
return (0);
}
diff --git a/freebsd/lib/libutil/humanize_number.c b/freebsd/lib/libutil/humanize_number.c
index da5c5197..3bb729a9 100644
--- a/freebsd/lib/libutil/humanize_number.c
+++ b/freebsd/lib/libutil/humanize_number.c
@@ -4,6 +4,7 @@
/*
* Copyright (c) 1997, 1998, 1999, 2002 The NetBSD Foundation, Inc.
+ * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -52,15 +53,26 @@ humanize_number(char *buf, size_t len, int64_t quotient,
{
const char *prefixes, *sep;
int i, r, remainder, s1, s2, sign;
+ int divisordeccut;
int64_t divisor, max;
size_t baselen;
- assert(buf != NULL);
- assert(suffix != NULL);
- assert(scale >= 0);
- assert(scale < maxscale || (((scale & (HN_AUTOSCALE | HN_GETSCALE)) != 0)));
- assert(!((flags & HN_DIVISOR_1000) && (flags & HN_IEC_PREFIXES)));
+ /* Since so many callers don't check -1, NUL terminate the buffer */
+ if (len > 0)
+ buf[0] = '\0';
+ /* validate args */
+ if (buf == NULL || suffix == NULL)
+ return (-1);
+ if (scale < 0)
+ return (-1);
+ else if (scale >= maxscale &&
+ ((scale & ~(HN_AUTOSCALE|HN_GETSCALE)) != 0))
+ return (-1);
+ if ((flags & HN_DIVISOR_1000) && (flags & HN_IEC_PREFIXES))
+ return (-1);
+
+ /* setup parameters */
remainder = 0;
if (flags & HN_IEC_PREFIXES) {
@@ -75,34 +87,32 @@ humanize_number(char *buf, size_t len, int64_t quotient,
* an assertion earlier).
*/
divisor = 1024;
+ divisordeccut = 973; /* ceil(.95 * 1024) */
if (flags & HN_B)
prefixes = "B\0\0Ki\0Mi\0Gi\0Ti\0Pi\0Ei";
else
prefixes = "\0\0\0Ki\0Mi\0Gi\0Ti\0Pi\0Ei";
} else {
baselen = 1;
- if (flags & HN_DIVISOR_1000)
+ if (flags & HN_DIVISOR_1000) {
divisor = 1000;
- else
+ divisordeccut = 950;
+ if (flags & HN_B)
+ prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ else
+ prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ } else {
divisor = 1024;
-
- if (flags & HN_B)
- prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
- else
- prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ divisordeccut = 973; /* ceil(.95 * 1024) */
+ if (flags & HN_B)
+ prefixes = "B\0\0K\0\0M\0\0G\0\0T\0\0P\0\0E";
+ else
+ prefixes = "\0\0\0K\0\0M\0\0G\0\0T\0\0P\0\0E";
+ }
}
#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3])
- if (scale < 0 || (scale >= maxscale &&
- (scale & (HN_AUTOSCALE | HN_GETSCALE)) == 0))
- return (-1);
-
- if (buf == NULL || suffix == NULL)
- return (-1);
-
- if (len > 0)
- buf[0] = '\0';
if (quotient < 0) {
sign = -1;
quotient = -quotient;
@@ -134,8 +144,8 @@ humanize_number(char *buf, size_t len, int64_t quotient,
* divide once more.
*/
for (i = 0;
- (quotient >= max || (quotient == max - 1 && remainder >= 950)) &&
- i < maxscale; i++) {
+ (quotient >= max || (quotient == max - 1 &&
+ remainder >= divisordeccut)) && i < maxscale; i++) {
remainder = quotient % divisor;
quotient /= divisor;
}
@@ -150,20 +160,22 @@ humanize_number(char *buf, size_t len, int64_t quotient,
}
/* If a value <= 9.9 after rounding and ... */
- if (quotient <= 9 && remainder < 950 && i > 0 && flags & HN_DECIMAL) {
- /* baselen + \0 + .N */
- if (len < baselen + 1 + 2)
- return (-1);
- s1 = (int)quotient + ((remainder + 50) / 1000);
- s2 = ((remainder + 50) / 100) % 10;
+ /*
+ * XXX - should we make sure there is enough space for the decimal
+ * place and if not, don't do HN_DECIMAL?
+ */
+ if (((quotient == 9 && remainder < divisordeccut) || quotient < 9) &&
+ i > 0 && flags & HN_DECIMAL) {
+ s1 = (int)quotient + ((remainder * 10 + divisor / 2) /
+ divisor / 10);
+ s2 = ((remainder * 10 + divisor / 2) / divisor) % 10;
r = snprintf(buf, len, "%d%s%d%s%s%s",
sign * s1, localeconv()->decimal_point, s2,
sep, SCALE2PREFIX(i), suffix);
} else
r = snprintf(buf, len, "%" PRId64 "%s%s%s",
- sign * (quotient + (remainder + 50) / 1000),
+ sign * (quotient + (remainder + divisor / 2) / divisor),
sep, SCALE2PREFIX(i), suffix);
return (r);
}
-
diff --git a/freebsd/lib/libutil/libutil.h b/freebsd/lib/libutil/libutil.h
index 60ba3235..b20ffa29 100644
--- a/freebsd/lib/libutil/libutil.h
+++ b/freebsd/lib/libutil/libutil.h
@@ -102,6 +102,8 @@ struct kinfo_file *
kinfo_getfile(pid_t _pid, int *_cntp);
struct kinfo_vmentry *
kinfo_getvmmap(pid_t _pid, int *_cntp);
+struct kinfo_vmobject *
+ kinfo_getvmobject(int *_cntp);
struct kinfo_proc *
kinfo_getallproc(int *_cntp);
struct kinfo_proc *
@@ -162,16 +164,21 @@ int pw_tmp(int _mfd);
#endif
#ifdef _GRP_H_
-int gr_copy(int __ffd, int _tfd, const struct group *_gr, struct group *_old_gr);
-struct group *gr_dup(const struct group *gr);
-int gr_equal(const struct group *gr1, const struct group *gr2);
+int gr_copy(int __ffd, int _tfd, const struct group *_gr,
+ struct group *_old_gr);
+struct group *
+ gr_dup(const struct group *_gr);
+struct group *
+ gr_add(const struct group *_gr, const char *_newmember);
+int gr_equal(const struct group *_gr1, const struct group *_gr2);
void gr_fini(void);
int gr_init(const char *_dir, const char *_master);
int gr_lock(void);
-char *gr_make(const struct group *gr);
+char *gr_make(const struct group *_gr);
int gr_mkdb(void);
+struct group *
+ gr_scan(const char *_line);
int gr_tmp(int _mdf);
-struct group *gr_scan(const char *line);
#endif
#ifdef _UFS_UFS_QUOTA_H_
diff --git a/freebsd/sbin/dhclient/bpf.c b/freebsd/sbin/dhclient/bpf.c
index 8ff451b2..4d3d2276 100644
--- a/freebsd/sbin/dhclient/bpf.c
+++ b/freebsd/sbin/dhclient/bpf.c
@@ -46,6 +46,8 @@
__FBSDID("$FreeBSD$");
#include "dhcpd.h"
+#include "privsep.h"
+#include <sys/capsicum.h>
#include <sys/ioctl.h>
#include <sys/uio.h>
@@ -63,15 +65,15 @@ __FBSDID("$FreeBSD$");
* mask.
*/
int
-if_register_bpf(struct interface_info *info)
+if_register_bpf(struct interface_info *info, int flags)
{
char filename[50];
int sock, b;
/* Open a BPF device */
- for (b = 0; 1; b++) {
+ for (b = 0;; b++) {
snprintf(filename, sizeof(filename), BPF_FORMAT, b);
- sock = open(filename, O_RDWR, 0);
+ sock = open(filename, flags);
if (sock < 0) {
if (errno == EBUSY)
continue;
@@ -89,16 +91,81 @@ if_register_bpf(struct interface_info *info)
return (sock);
}
+/*
+ * Packet write filter program:
+ * 'ip and udp and src port bootps and dst port (bootps or bootpc)'
+ */
+struct bpf_insn dhcp_bpf_wfilter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_IND, 14),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, (IPVERSION << 4) + 5, 0, 12),
+
+ /* Make sure this is an IP packet... */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 12),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 10),
+
+ /* Make sure it's a UDP packet... */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, 23),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 8),
+
+ /* Make sure this isn't a fragment... */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20),
+ BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 6, 0), /* patched */
+
+ /* Get the IP header length... */
+ BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, 14),
+
+ /* Make sure it's from the right port... */
+ BPF_STMT(BPF_LD + BPF_H + BPF_IND, 14),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 68, 0, 3),
+
+ /* Make sure it is to the right ports ... */
+ BPF_STMT(BPF_LD + BPF_H + BPF_IND, 16),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 67, 0, 1),
+
+ /* If we passed all the tests, ask for the whole packet. */
+ BPF_STMT(BPF_RET+BPF_K, (u_int)-1),
+
+ /* Otherwise, drop it. */
+ BPF_STMT(BPF_RET+BPF_K, 0),
+};
+
+int dhcp_bpf_wfilter_len = sizeof(dhcp_bpf_wfilter) / sizeof(struct bpf_insn);
+
void
if_register_send(struct interface_info *info)
{
+ cap_rights_t rights;
+ struct bpf_version v;
+ struct bpf_program p;
int sock, on = 1;
- /*
- * If we're using the bpf API for sending and receiving, we
- * don't need to register this interface twice.
- */
- info->wfdesc = info->rfdesc;
+ /* Open a BPF device and hang it on this interface... */
+ info->wfdesc = if_register_bpf(info, O_WRONLY);
+
+ /* Make sure the BPF version is in range... */
+ if (ioctl(info->wfdesc, BIOCVERSION, &v) < 0)
+ error("Can't get BPF version: %m");
+
+ if (v.bv_major != BPF_MAJOR_VERSION ||
+ v.bv_minor < BPF_MINOR_VERSION)
+ error("Kernel BPF version out of range - recompile dhcpd!");
+
+ /* Set up the bpf write filter program structure. */
+ p.bf_len = dhcp_bpf_wfilter_len;
+ p.bf_insns = dhcp_bpf_wfilter;
+
+ if (dhcp_bpf_wfilter[7].k == 0x1fff)
+ dhcp_bpf_wfilter[7].k = htons(IP_MF|IP_OFFMASK);
+
+ if (ioctl(info->wfdesc, BIOCSETWF, &p) < 0)
+ error("Can't install write filter program: %m");
+
+ if (ioctl(info->wfdesc, BIOCLOCK, NULL) < 0)
+ error("Cannot lock bpf");
+
+ cap_rights_init(&rights, CAP_WRITE);
+ if (cap_rights_limit(info->wfdesc, &rights) < 0 && errno != ENOSYS)
+ error("Can't limit bpf descriptor: %m");
/*
* Use raw socket for unicast send.
@@ -146,55 +213,17 @@ struct bpf_insn dhcp_bpf_filter[] = {
int dhcp_bpf_filter_len = sizeof(dhcp_bpf_filter) / sizeof(struct bpf_insn);
-/*
- * Packet write filter program:
- * 'ip and udp and src port bootps and dst port (bootps or bootpc)'
- */
-struct bpf_insn dhcp_bpf_wfilter[] = {
- BPF_STMT(BPF_LD + BPF_B + BPF_IND, 14),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, (IPVERSION << 4) + 5, 0, 12),
-
- /* Make sure this is an IP packet... */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 12),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 10),
-
- /* Make sure it's a UDP packet... */
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, 23),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 8),
-
- /* Make sure this isn't a fragment... */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20),
- BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 6, 0), /* patched */
-
- /* Get the IP header length... */
- BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, 14),
-
- /* Make sure it's from the right port... */
- BPF_STMT(BPF_LD + BPF_H + BPF_IND, 14),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 68, 0, 3),
-
- /* Make sure it is to the right ports ... */
- BPF_STMT(BPF_LD + BPF_H + BPF_IND, 16),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 67, 0, 1),
-
- /* If we passed all the tests, ask for the whole packet. */
- BPF_STMT(BPF_RET+BPF_K, (u_int)-1),
-
- /* Otherwise, drop it. */
- BPF_STMT(BPF_RET+BPF_K, 0),
-};
-
-int dhcp_bpf_wfilter_len = sizeof(dhcp_bpf_wfilter) / sizeof(struct bpf_insn);
-
void
if_register_receive(struct interface_info *info)
{
+ static const unsigned long cmds[2] = { SIOCGIFFLAGS, SIOCGIFMEDIA };
+ cap_rights_t rights;
struct bpf_version v;
struct bpf_program p;
int flag = 1, sz;
/* Open a BPF device and hang it on this interface... */
- info->rfdesc = if_register_bpf(info);
+ info->rfdesc = if_register_bpf(info, O_RDONLY);
/* Make sure the BPF version is in range... */
if (ioctl(info->rfdesc, BIOCVERSION, &v) < 0)
@@ -237,48 +266,94 @@ if_register_receive(struct interface_info *info)
if (ioctl(info->rfdesc, BIOCSETF, &p) < 0)
error("Can't install packet filter program: %m");
- /* Set up the bpf write filter program structure. */
- p.bf_len = dhcp_bpf_wfilter_len;
- p.bf_insns = dhcp_bpf_wfilter;
-
- if (dhcp_bpf_wfilter[7].k == 0x1fff)
- dhcp_bpf_wfilter[7].k = htons(IP_MF|IP_OFFMASK);
-
- if (ioctl(info->rfdesc, BIOCSETWF, &p) < 0)
- error("Can't install write filter program: %m");
-
if (ioctl(info->rfdesc, BIOCLOCK, NULL) < 0)
error("Cannot lock bpf");
+
+ cap_rights_init(&rights, CAP_IOCTL, CAP_EVENT, CAP_READ);
+ if (cap_rights_limit(info->rfdesc, &rights) < 0 && errno != ENOSYS)
+ error("Can't limit bpf descriptor: %m");
+ if (cap_ioctls_limit(info->rfdesc, cmds, 2) < 0 && errno != ENOSYS)
+ error("Can't limit ioctls for bpf descriptor: %m");
}
-ssize_t
-send_packet(struct interface_info *interface, struct dhcp_packet *raw,
- size_t len, struct in_addr from, struct sockaddr_in *to,
- struct hardware *hto)
+void
+send_packet_unpriv(int privfd, struct dhcp_packet *raw, size_t len,
+ struct in_addr from, struct in_addr to)
+{
+ struct imsg_hdr hdr;
+ struct buf *buf;
+ int errs;
+
+ hdr.code = IMSG_SEND_PACKET;
+ hdr.len = sizeof(hdr) +
+ sizeof(size_t) + len +
+ sizeof(from) + sizeof(to);
+
+ if ((buf = buf_open(hdr.len)) == NULL)
+ error("buf_open: %m");
+
+ errs = 0;
+ errs += buf_add(buf, &hdr, sizeof(hdr));
+ errs += buf_add(buf, &len, sizeof(len));
+ errs += buf_add(buf, raw, len);
+ errs += buf_add(buf, &from, sizeof(from));
+ errs += buf_add(buf, &to, sizeof(to));
+ if (errs)
+ error("buf_add: %m");
+
+ if (buf_close(privfd, buf) == -1)
+ error("buf_close: %m");
+}
+
+void
+send_packet_priv(struct interface_info *interface, struct imsg_hdr *hdr, int fd)
{
unsigned char buf[256];
struct iovec iov[2];
struct msghdr msg;
+ struct dhcp_packet raw;
+ size_t len;
+ struct in_addr from, to;
int result, bufp = 0;
+ if (hdr->len < sizeof(*hdr) + sizeof(size_t))
+ error("corrupted message received");
+ buf_read(fd, &len, sizeof(len));
+ if (hdr->len != sizeof(*hdr) + sizeof(size_t) + len +
+ sizeof(from) + sizeof(to)) {
+ error("corrupted message received");
+ }
+ if (len > sizeof(raw))
+ error("corrupted message received");
+ buf_read(fd, &raw, len);
+ buf_read(fd, &from, sizeof(from));
+ buf_read(fd, &to, sizeof(to));
+
/* Assemble the headers... */
- if (to->sin_addr.s_addr == INADDR_BROADCAST)
- assemble_hw_header(interface, buf, &bufp, hto);
- assemble_udp_ip_header(buf, &bufp, from.s_addr,
- to->sin_addr.s_addr, to->sin_port, (unsigned char *)raw, len);
+ if (to.s_addr == INADDR_BROADCAST)
+ assemble_hw_header(interface, buf, &bufp);
+ assemble_udp_ip_header(buf, &bufp, from.s_addr, to.s_addr,
+ htons(REMOTE_PORT), (unsigned char *)&raw, len);
- iov[0].iov_base = (char *)buf;
+ iov[0].iov_base = buf;
iov[0].iov_len = bufp;
- iov[1].iov_base = (char *)raw;
+ iov[1].iov_base = &raw;
iov[1].iov_len = len;
/* Fire it off */
- if (to->sin_addr.s_addr == INADDR_BROADCAST)
+ if (to.s_addr == INADDR_BROADCAST)
result = writev(interface->wfdesc, iov, 2);
else {
+ struct sockaddr_in sato;
+
+ sato.sin_addr = to;
+ sato.sin_port = htons(REMOTE_PORT);
+ sato.sin_family = AF_INET;
+ sato.sin_len = sizeof(sato);
+
memset(&msg, 0, sizeof(msg));
- msg.msg_name = (struct sockaddr *)to;
- msg.msg_namelen = sizeof(*to);
+ msg.msg_name = (struct sockaddr *)&sato;
+ msg.msg_namelen = sizeof(sato);
msg.msg_iov = iov;
msg.msg_iovlen = 2;
result = sendmsg(interface->ufdesc, &msg, 0);
@@ -286,7 +361,6 @@ send_packet(struct interface_info *interface, struct dhcp_packet *raw,
if (result < 0)
warning("send_packet: %m");
- return (result);
}
ssize_t
diff --git a/freebsd/sbin/dhclient/clparse.c b/freebsd/sbin/dhclient/clparse.c
index b52bc473..a65236fc 100644
--- a/freebsd/sbin/dhclient/clparse.c
+++ b/freebsd/sbin/dhclient/clparse.c
@@ -644,6 +644,10 @@ parse_client_lease_declaration(FILE *cfile, struct client_lease *lease,
case FILENAME:
lease->filename = parse_string(cfile);
return;
+ case NEXT_SERVER:
+ if (!parse_ip_addr(cfile, &lease->nextserver))
+ return;
+ break;
case SERVER_NAME:
lease->server_name = parse_string(cfile);
return;
diff --git a/freebsd/sbin/dhclient/conflex.c b/freebsd/sbin/dhclient/conflex.c
index 4990f45f..c4fd442c 100644
--- a/freebsd/sbin/dhclient/conflex.c
+++ b/freebsd/sbin/dhclient/conflex.c
@@ -99,8 +99,8 @@ get_char(FILE *cfile)
cur_line = line2;
prev_line = line1;
} else {
- cur_line = line2;
- prev_line = line1;
+ cur_line = line1;
+ prev_line = line2;
}
line++;
lpos = 1;
diff --git a/freebsd/sbin/dhclient/dhclient.c b/freebsd/sbin/dhclient/dhclient.c
index 97824985..c7b75c59 100644
--- a/freebsd/sbin/dhclient/dhclient.c
+++ b/freebsd/sbin/dhclient/dhclient.c
@@ -61,6 +61,8 @@ __FBSDID("$FreeBSD$");
#include "dhcpd.h"
#include "privsep.h"
+#include <sys/capsicum.h>
+
#include <net80211/ieee80211_freebsd.h>
#ifndef _PATH_VAREMPTY
@@ -93,9 +95,10 @@ int log_perror = 1;
int privfd;
int nullfd = -1;
+char hostname[_POSIX_HOST_NAME_MAX + 1];
+
struct iaddr iaddr_broadcast = { 4, { 255, 255, 255, 255 } };
-struct in_addr inaddr_any;
-struct sockaddr_in sockaddr_broadcast;
+struct in_addr inaddr_any, inaddr_broadcast;
char *path_dhclient_pidfile;
struct pidfh *pidfile;
@@ -345,6 +348,7 @@ main(int argc, char *argv[])
int immediate_daemon = 0;
struct passwd *pw;
pid_t otherpid;
+ cap_rights_t rights;
/* Initially, log errors to stderr as well as to syslogd. */
openlog(__progname, LOG_PID | LOG_NDELAY, DHCPD_LOG_FACILITY);
@@ -389,7 +393,7 @@ main(int argc, char *argv[])
if (path_dhclient_pidfile == NULL)
error("asprintf");
}
- pidfile = pidfile_open(path_dhclient_pidfile, 0600, &otherpid);
+ pidfile = pidfile_open(path_dhclient_pidfile, 0644, &otherpid);
if (pidfile == NULL) {
if (errno == EEXIST)
error("dhclient already running, pid: %d.", otherpid);
@@ -412,11 +416,7 @@ main(int argc, char *argv[])
tzset();
time(&cur_time);
- memset(&sockaddr_broadcast, 0, sizeof(sockaddr_broadcast));
- sockaddr_broadcast.sin_family = AF_INET;
- sockaddr_broadcast.sin_port = htons(REMOTE_PORT);
- sockaddr_broadcast.sin_addr.s_addr = INADDR_BROADCAST;
- sockaddr_broadcast.sin_len = sizeof(sockaddr_broadcast);
+ inaddr_broadcast.s_addr = INADDR_BROADCAST;
inaddr_any.s_addr = INADDR_ANY;
read_client_conf();
@@ -453,13 +453,36 @@ main(int argc, char *argv[])
error("no such user: nobody");
}
+ /*
+ * Obtain hostname before entering capability mode - it won't be
+ * possible then, as reading kern.hostname is not permitted.
+ */
+ if (gethostname(hostname, sizeof(hostname)) < 0)
+ hostname[0] = '\0';
+
+ priv_script_init("PREINIT", NULL);
+ if (ifi->client->alias)
+ priv_script_write_params("alias_", ifi->client->alias);
+ priv_script_go();
+
+ /* set up the interface */
+ discover_interfaces(ifi);
+
if (pipe(pipe_fd) == -1)
error("pipe");
fork_privchld(pipe_fd[0], pipe_fd[1]);
+ close(ifi->ufdesc);
+ ifi->ufdesc = -1;
+ close(ifi->wfdesc);
+ ifi->wfdesc = -1;
+
close(pipe_fd[0]);
privfd = pipe_fd[1];
+ cap_rights_init(&rights, CAP_READ, CAP_WRITE);
+ if (cap_rights_limit(privfd, &rights) < 0 && errno != ENOSYS)
+ error("can't limit private descriptor: %m");
if ((fd = open(path_dhclient_db, O_RDONLY|O_EXLOCK|O_CREAT, 0)) == -1)
error("can't open and lock %s: %m", path_dhclient_db);
@@ -467,16 +490,13 @@ main(int argc, char *argv[])
rewrite_client_leases();
close(fd);
- priv_script_init("PREINIT", NULL);
- if (ifi->client->alias)
- priv_script_write_params("alias_", ifi->client->alias);
- priv_script_go();
-
if ((routefd = socket(PF_ROUTE, SOCK_RAW, 0)) != -1)
add_protocol("AF_ROUTE", routefd, routehandler, ifi);
-
- /* set up the interface */
- discover_interfaces(ifi);
+ if (shutdown(routefd, SHUT_WR) < 0)
+ error("can't shutdown route socket: %m");
+ cap_rights_init(&rights, CAP_EVENT, CAP_READ);
+ if (cap_rights_limit(routefd, &rights) < 0 && errno != ENOSYS)
+ error("can't limit route socket: %m");
if (chroot(_PATH_VAREMPTY) == -1)
error("chroot");
@@ -492,6 +512,9 @@ main(int argc, char *argv[])
setproctitle("%s", ifi->name);
+ if (cap_enter() < 0 && errno != ENOSYS)
+ error("can't enter capability mode: %m");
+
if (immediate_daemon)
go_daemon();
@@ -1065,6 +1088,9 @@ packet_to_lease(struct packet *packet)
lease->address.len = sizeof(packet->raw->yiaddr);
memcpy(lease->address.iabuf, &packet->raw->yiaddr, lease->address.len);
+ lease->nextserver.len = sizeof(packet->raw->siaddr);
+ memcpy(lease->nextserver.iabuf, &packet->raw->siaddr, lease->nextserver.len);
+
/* If the server name was filled out, copy it.
Do not attempt to validate the server name as a host name.
RFC 2131 merely states that sname is NUL-terminated (which do
@@ -1225,13 +1251,12 @@ again:
ip->client->secs = ip->client->packet.secs;
note("DHCPDISCOVER on %s to %s port %d interval %d",
- ip->name, inet_ntoa(sockaddr_broadcast.sin_addr),
- ntohs(sockaddr_broadcast.sin_port),
+ ip->name, inet_ntoa(inaddr_broadcast), REMOTE_PORT,
(int)ip->client->interval);
/* Send out a packet. */
- (void)send_packet(ip, &ip->client->packet, ip->client->packet_length,
- inaddr_any, &sockaddr_broadcast, NULL);
+ send_packet_unpriv(privfd, &ip->client->packet,
+ ip->client->packet_length, inaddr_any, inaddr_broadcast);
add_timeout(cur_time + ip->client->interval, send_discover, ip);
}
@@ -1339,8 +1364,7 @@ void
send_request(void *ipp)
{
struct interface_info *ip = ipp;
- struct sockaddr_in destination;
- struct in_addr from;
+ struct in_addr from, to;
int interval;
/* Figure out how long it's been since we started transmitting. */
@@ -1428,18 +1452,13 @@ cancel:
/* If the lease T2 time has elapsed, or if we're not yet bound,
broadcast the DHCPREQUEST rather than unicasting. */
- memset(&destination, 0, sizeof(destination));
if (ip->client->state == S_REQUESTING ||
ip->client->state == S_REBOOTING ||
cur_time > ip->client->active->rebind)
- destination.sin_addr.s_addr = INADDR_BROADCAST;
+ to.s_addr = INADDR_BROADCAST;
else
- memcpy(&destination.sin_addr.s_addr,
- ip->client->destination.iabuf,
- sizeof(destination.sin_addr.s_addr));
- destination.sin_port = htons(REMOTE_PORT);
- destination.sin_family = AF_INET;
- destination.sin_len = sizeof(destination);
+ memcpy(&to.s_addr, ip->client->destination.iabuf,
+ sizeof(to.s_addr));
if (ip->client->state != S_REQUESTING)
memcpy(&from, ip->client->active->address.iabuf,
@@ -1457,12 +1476,12 @@ cancel:
ip->client->packet.secs = htons(65535);
}
- note("DHCPREQUEST on %s to %s port %d", ip->name,
- inet_ntoa(destination.sin_addr), ntohs(destination.sin_port));
+ note("DHCPREQUEST on %s to %s port %d", ip->name, inet_ntoa(to),
+ REMOTE_PORT);
/* Send out a packet. */
- (void) send_packet(ip, &ip->client->packet, ip->client->packet_length,
- from, &destination, NULL);
+ send_packet_unpriv(privfd, &ip->client->packet,
+ ip->client->packet_length, from, to);
add_timeout(cur_time + ip->client->interval, send_request, ip);
}
@@ -1473,12 +1492,11 @@ send_decline(void *ipp)
struct interface_info *ip = ipp;
note("DHCPDECLINE on %s to %s port %d", ip->name,
- inet_ntoa(sockaddr_broadcast.sin_addr),
- ntohs(sockaddr_broadcast.sin_port));
+ inet_ntoa(inaddr_broadcast), REMOTE_PORT);
/* Send out a packet. */
- (void) send_packet(ip, &ip->client->packet, ip->client->packet_length,
- inaddr_any, &sockaddr_broadcast, NULL);
+ send_packet_unpriv(privfd, &ip->client->packet,
+ ip->client->packet_length, inaddr_any, inaddr_broadcast);
}
void
@@ -1535,11 +1553,10 @@ make_discover(struct interface_info *ip, struct client_lease *lease)
ip->client->config->send_options[i].len;
options[i]->timeout = 0xFFFFFFFF;
}
-
+
/* send host name if not set via config file. */
- char hostname[_POSIX_HOST_NAME_MAX+1];
if (!options[DHO_HOST_NAME]) {
- if (gethostname(hostname, sizeof(hostname)) == 0) {
+ if (hostname[0] != '\0') {
size_t len;
char* posDot = strchr(hostname, '.');
if (posDot != NULL)
@@ -1555,12 +1572,12 @@ make_discover(struct interface_info *ip, struct client_lease *lease)
}
/* set unique client identifier */
- char client_ident[sizeof(struct hardware)];
+ char client_ident[sizeof(ip->hw_address.haddr) + 1];
if (!options[DHO_DHCP_CLIENT_IDENTIFIER]) {
int hwlen = (ip->hw_address.hlen < sizeof(client_ident)-1) ?
ip->hw_address.hlen : sizeof(client_ident)-1;
client_ident[0] = ip->hw_address.htype;
- memcpy(&client_ident[1], ip->hw_address.haddr, hwlen);
+ memcpy(&client_ident[1], ip->hw_address.haddr, hwlen);
options[DHO_DHCP_CLIENT_IDENTIFIER] = &option_elements[DHO_DHCP_CLIENT_IDENTIFIER];
options[DHO_DHCP_CLIENT_IDENTIFIER]->value = client_ident;
options[DHO_DHCP_CLIENT_IDENTIFIER]->len = hwlen+1;
@@ -1659,11 +1676,10 @@ make_request(struct interface_info *ip, struct client_lease * lease)
ip->client->config->send_options[i].len;
options[i]->timeout = 0xFFFFFFFF;
}
-
+
/* send host name if not set via config file. */
- char hostname[_POSIX_HOST_NAME_MAX+1];
if (!options[DHO_HOST_NAME]) {
- if (gethostname(hostname, sizeof(hostname)) == 0) {
+ if (hostname[0] != '\0') {
size_t len;
char* posDot = strchr(hostname, '.');
if (posDot != NULL)
@@ -1684,7 +1700,7 @@ make_request(struct interface_info *ip, struct client_lease * lease)
int hwlen = (ip->hw_address.hlen < sizeof(client_ident)-1) ?
ip->hw_address.hlen : sizeof(client_ident)-1;
client_ident[0] = ip->hw_address.htype;
- memcpy(&client_ident[1], ip->hw_address.haddr, hwlen);
+ memcpy(&client_ident[1], ip->hw_address.haddr, hwlen);
options[DHO_DHCP_CLIENT_IDENTIFIER] = &option_elements[DHO_DHCP_CLIENT_IDENTIFIER];
options[DHO_DHCP_CLIENT_IDENTIFIER]->value = client_ident;
options[DHO_DHCP_CLIENT_IDENTIFIER]->len = hwlen+1;
@@ -1825,11 +1841,22 @@ void
rewrite_client_leases(void)
{
struct client_lease *lp;
+ cap_rights_t rights;
if (!leaseFile) {
leaseFile = fopen(path_dhclient_db, "w");
if (!leaseFile)
error("can't create %s: %m", path_dhclient_db);
+ cap_rights_init(&rights, CAP_FCNTL, CAP_FSTAT, CAP_FSYNC,
+ CAP_FTRUNCATE, CAP_SEEK, CAP_WRITE);
+ if (cap_rights_limit(fileno(leaseFile), &rights) < 0 &&
+ errno != ENOSYS) {
+ error("can't limit lease descriptor: %m");
+ }
+ if (cap_fcntls_limit(fileno(leaseFile), CAP_FCNTL_GETFL) < 0 &&
+ errno != ENOSYS) {
+ error("can't limit lease descriptor fcntls: %m");
+ }
} else {
fflush(leaseFile);
rewind(leaseFile);
@@ -1876,6 +1903,11 @@ write_client_lease(struct interface_info *ip, struct client_lease *lease,
fprintf(leaseFile, " bootp;\n");
fprintf(leaseFile, " interface \"%s\";\n", ip->name);
fprintf(leaseFile, " fixed-address %s;\n", piaddr(lease->address));
+ if (lease->nextserver.len == sizeof(inaddr_any) &&
+ 0 != memcmp(lease->nextserver.iabuf, &inaddr_any,
+ sizeof(inaddr_any)))
+ fprintf(leaseFile, " next-server %s;\n",
+ piaddr(lease->nextserver));
if (lease->filename)
fprintf(leaseFile, " filename \"%s\";\n", lease->filename);
if (lease->server_name)
@@ -2245,6 +2277,17 @@ script_set_env(struct client_state *client, const char *prefix,
{
int i, j, namelen;
+ /* No `` or $() command substitution allowed in environment values! */
+ for (j=0; j < strlen(value); j++)
+ switch (value[j]) {
+ case '`':
+ case '$':
+ warning("illegal character (%c) in value '%s'",
+ value[j], value);
+ /* Ignore this option */
+ return;
+ }
+
namelen = strlen(name);
for (i = 0; client->scriptEnv[i]; i++)
@@ -2281,16 +2324,6 @@ script_set_env(struct client_state *client, const char *prefix,
strlen(value) + 1);
if (client->scriptEnv[i] == NULL)
error("script_set_env: no memory for variable assignment");
-
- /* No `` or $() command substitution allowed in environment values! */
- for (j=0; j < strlen(value); j++)
- switch (value[j]) {
- case '`':
- case '$':
- error("illegal character (%c) in value '%s'", value[j],
- value);
- /* not reached */
- }
snprintf(client->scriptEnv[i], strlen(prefix) + strlen(name) +
1 + strlen(value) + 1, "%s%s=%s", prefix, name, value);
}
@@ -2329,6 +2362,7 @@ void
go_daemon(void)
{
static int state = 0;
+ cap_rights_t rights;
if (no_daemon || state)
return;
@@ -2341,8 +2375,15 @@ go_daemon(void)
if (daemon(1, 0) == -1)
error("daemon");
- if (pidfile != NULL)
+ cap_rights_init(&rights);
+
+ if (pidfile != NULL) {
pidfile_write(pidfile);
+ if (cap_rights_limit(pidfile_fileno(pidfile), &rights) < 0 &&
+ errno != ENOSYS) {
+ error("can't limit pidfile descriptor: %m");
+ }
+ }
/* we are chrooted, daemon(3) fails to open /dev/null */
if (nullfd != -1) {
@@ -2352,6 +2393,14 @@ go_daemon(void)
close(nullfd);
nullfd = -1;
}
+
+ if (cap_rights_limit(STDIN_FILENO, &rights) < 0 && errno != ENOSYS)
+ error("can't limit stdin: %m");
+ cap_rights_init(&rights, CAP_WRITE);
+ if (cap_rights_limit(STDOUT_FILENO, &rights) < 0 && errno != ENOSYS)
+ error("can't limit stdout: %m");
+ if (cap_rights_limit(STDERR_FILENO, &rights) < 0 && errno != ENOSYS)
+ error("can't limit stderr: %m");
}
int
@@ -2496,19 +2545,19 @@ check_classless_option(unsigned char *data, int len)
i += 4;
continue;
} else if (width < 9) {
- addr = (in_addr_t)(data[i] << 24);
+ addr = (in_addr_t)(data[i] << 24);
i += 1;
} else if (width < 17) {
- addr = (in_addr_t)(data[i] << 24) +
+ addr = (in_addr_t)(data[i] << 24) +
(in_addr_t)(data[i + 1] << 16);
i += 2;
} else if (width < 25) {
- addr = (in_addr_t)(data[i] << 24) +
+ addr = (in_addr_t)(data[i] << 24) +
(in_addr_t)(data[i + 1] << 16) +
(in_addr_t)(data[i + 2] << 8);
i += 3;
} else if (width < 33) {
- addr = (in_addr_t)(data[i] << 24) +
+ addr = (in_addr_t)(data[i] << 24) +
(in_addr_t)(data[i + 1] << 16) +
(in_addr_t)(data[i + 2] << 8) +
data[i + 3];
@@ -2532,7 +2581,7 @@ check_classless_option(unsigned char *data, int len)
addr &= mask;
data[i - 1] = (unsigned char)(
(addr >> (((32 - width)/8)*8)) & 0xFF);
- }
+ }
i += 4;
}
if (i > len) {
@@ -2696,6 +2745,8 @@ fork_privchld(int fd, int fd2)
dup2(nullfd, STDERR_FILENO);
close(nullfd);
close(fd2);
+ close(ifi->rfdesc);
+ ifi->rfdesc = -1;
for (;;) {
pfd[0].fd = fd;
@@ -2707,6 +2758,6 @@ fork_privchld(int fd, int fd2)
if (nfds == 0 || !(pfd[0].revents & POLLIN))
continue;
- dispatch_imsg(fd);
+ dispatch_imsg(ifi, fd);
}
}
diff --git a/freebsd/sbin/dhclient/dhcpd.h b/freebsd/sbin/dhclient/dhcpd.h
index 030a96a0..852513a8 100644
--- a/freebsd/sbin/dhclient/dhcpd.h
+++ b/freebsd/sbin/dhclient/dhcpd.h
@@ -121,6 +121,7 @@ struct client_lease {
struct client_lease *next;
time_t expiry, renewal, rebind;
struct iaddr address;
+ struct iaddr nextserver;
char *server_name;
char *filename;
struct string_list *medium;
@@ -296,11 +297,13 @@ struct hash_table *new_hash_table(int);
struct hash_bucket *new_hash_bucket(void);
/* bpf.c */
-int if_register_bpf(struct interface_info *);
+int if_register_bpf(struct interface_info *, int);
void if_register_send(struct interface_info *);
void if_register_receive(struct interface_info *);
-ssize_t send_packet(struct interface_info *, struct dhcp_packet *, size_t,
- struct in_addr, struct sockaddr_in *, struct hardware *);
+void send_packet_unpriv(int, struct dhcp_packet *, size_t, struct in_addr,
+ struct in_addr);
+struct imsg_hdr;
+void send_packet_priv(struct interface_info *, struct imsg_hdr *, int);
ssize_t receive_packet(struct interface_info *, unsigned char *, size_t,
struct sockaddr_in *, struct hardware *);
@@ -404,20 +407,13 @@ void bootp(struct packet *);
void dhcp(struct packet *);
/* packet.c */
-void assemble_hw_header(struct interface_info *, unsigned char *,
- int *, struct hardware *);
+void assemble_hw_header(struct interface_info *, unsigned char *, int *);
void assemble_udp_ip_header(unsigned char *, int *, u_int32_t, u_int32_t,
unsigned int, unsigned char *, int);
ssize_t decode_hw_header(unsigned char *, int, struct hardware *);
ssize_t decode_udp_ip_header(unsigned char *, int, struct sockaddr_in *,
unsigned char *, int);
-/* ethernet.c */
-void assemble_ethernet_header(struct interface_info *, unsigned char *,
- int *, struct hardware *);
-ssize_t decode_ethernet_header(struct interface_info *, unsigned char *,
- int, struct hardware *);
-
/* clparse.c */
int read_client_conf(void);
void read_client_leases(void);
@@ -441,4 +437,4 @@ struct buf *buf_open(size_t);
int buf_add(struct buf *, void *, size_t);
int buf_close(int, struct buf *);
ssize_t buf_read(int, void *, size_t);
-void dispatch_imsg(int);
+void dispatch_imsg(struct interface_info *, int);
diff --git a/freebsd/sbin/dhclient/options.c b/freebsd/sbin/dhclient/options.c
index c86b7e3b..5ea8de7c 100644
--- a/freebsd/sbin/dhclient/options.c
+++ b/freebsd/sbin/dhclient/options.c
@@ -286,8 +286,8 @@ find_search_domain_name_len(struct option_data *option, int *offset)
option->data[i + 1];
if (pointer >= *offset) {
/*
- * The pointer must indicates a prior
- * occurance.
+ * The pointer must indicate a prior
+ * occurrence.
*/
warning("Invalid forward pointer in DHCP "
"Domain Search option compression.");
diff --git a/freebsd/sbin/dhclient/packet.c b/freebsd/sbin/dhclient/packet.c
index e4fa0e86..28a9ccec 100644
--- a/freebsd/sbin/dhclient/packet.c
+++ b/freebsd/sbin/dhclient/packet.c
@@ -57,11 +57,6 @@ __FBSDID("$FreeBSD$");
u_int32_t checksum(unsigned char *, unsigned, u_int32_t);
u_int32_t wrapsum(u_int32_t);
-void assemble_ethernet_header(struct interface_info *, unsigned char *,
- int *, struct hardware *);
-ssize_t decode_ethernet_header(struct interface_info *, unsigned char *,
- int bufix, struct hardware *);
-
u_int32_t
checksum(unsigned char *buf, unsigned nbytes, u_int32_t sum)
{
@@ -97,14 +92,11 @@ wrapsum(u_int32_t sum)
void
assemble_hw_header(struct interface_info *interface, unsigned char *buf,
- int *bufix, struct hardware *to)
+ int *bufix)
{
struct ether_header eh;
- if (to != NULL && to->hlen == 6) /* XXX */
- memcpy(eh.ether_dhost, to->haddr, sizeof(eh.ether_dhost));
- else
- memset(eh.ether_dhost, 0xff, sizeof(eh.ether_dhost));
+ memset(eh.ether_dhost, 0xff, sizeof(eh.ether_dhost));
if (interface->hw_address.hlen == sizeof(eh.ether_shost))
memcpy(eh.ether_shost, interface->hw_address.haddr,
sizeof(eh.ether_shost));
@@ -137,17 +129,6 @@ assemble_udp_ip_header(unsigned char *buf, int *bufix, u_int32_t from,
ip.ip_dst.s_addr = to;
ip.ip_sum = wrapsum(checksum((unsigned char *)&ip, sizeof(ip), 0));
-
- /*
- * While the BPF -- used for broadcasts -- expects a "true" IP header
- * with all the bytes in network byte order, the raw socket interface
- * which is used for unicasts expects the ip_len field to be in host
- * byte order. In both cases, the checksum has to be correct, so this
- * is as good a place as any to turn the bytes around again.
- */
- if (to != INADDR_BROADCAST)
- ip.ip_len = ntohs(ip.ip_len);
-
memcpy(&buf[*bufix], &ip, sizeof(ip));
*bufix += sizeof(ip);
diff --git a/freebsd/sbin/dhclient/privsep.c b/freebsd/sbin/dhclient/privsep.c
index 7b68059a..aa1042fd 100644
--- a/freebsd/sbin/dhclient/privsep.c
+++ b/freebsd/sbin/dhclient/privsep.c
@@ -103,7 +103,7 @@ buf_read(int sock, void *buf, size_t nbytes)
}
void
-dispatch_imsg(int fd)
+dispatch_imsg(struct interface_info *ifi, int fd)
{
struct imsg_hdr hdr;
char *medium, *reason, *filename,
@@ -234,6 +234,9 @@ dispatch_imsg(int fd)
if (buf_close(fd, buf) == -1)
error("buf_close: %m");
break;
+ case IMSG_SEND_PACKET:
+ send_packet_priv(ifi, &hdr, fd);
+ break;
default:
error("received unknown message, code %d", hdr.code);
}
diff --git a/freebsd/sbin/dhclient/privsep.h b/freebsd/sbin/dhclient/privsep.h
index f30284ee..d464da43 100644
--- a/freebsd/sbin/dhclient/privsep.h
+++ b/freebsd/sbin/dhclient/privsep.h
@@ -14,6 +14,8 @@
* WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
* OF OR IN CONNECTION WITH THE USE, ABUSE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * $FreeBSD$
*/
#include <sys/types.h>
@@ -33,7 +35,8 @@ enum imsg_code {
IMSG_SCRIPT_INIT,
IMSG_SCRIPT_WRITE_PARAMS,
IMSG_SCRIPT_GO,
- IMSG_SCRIPT_GO_RET
+ IMSG_SCRIPT_GO_RET,
+ IMSG_SEND_PACKET
};
struct imsg_hdr {
diff --git a/freebsd/sbin/ifconfig/af_inet.c b/freebsd/sbin/ifconfig/af_inet.c
index 1d713eec..d464df52 100644
--- a/freebsd/sbin/ifconfig/af_inet.c
+++ b/freebsd/sbin/ifconfig/af_inet.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1983, 1993
* The Regents of the University of California. All rights reserved.
@@ -34,7 +38,10 @@ static const char rcsid[] =
"$FreeBSD$";
#endif /* not lint */
-#include <sys/types.h>
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
+#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
@@ -48,20 +55,25 @@ static const char rcsid[] =
#include <ifaddrs.h>
#include <netinet/in.h>
-#include <net/if_var.h> /* for struct ifaddr */
#include <netinet/in_var.h>
#include <arpa/inet.h>
#include <netdb.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-af_inet-data.h"
+#endif /* __rtems__ */
static struct in_aliasreq in_addreq;
static struct ifreq in_ridreq;
+static char addr_buf[NI_MAXHOST]; /*for getnameinfo()*/
+extern char *f_inet, *f_addr;
static void
in_status(int s __unused, const struct ifaddrs *ifa)
{
struct sockaddr_in *sin, null_sin;
+ int error, n_flags;
memset(&null_sin, 0, sizeof(null_sin));
@@ -69,25 +81,56 @@ in_status(int s __unused, const struct ifaddrs *ifa)
if (sin == NULL)
return;
- printf("\tinet %s ", inet_ntoa(sin->sin_addr));
+ if (f_addr != NULL && strcmp(f_addr, "fqdn") == 0)
+ n_flags = 0;
+ else if (f_addr != NULL && strcmp(f_addr, "host") == 0)
+ n_flags = NI_NOFQDN;
+ else
+ n_flags = NI_NUMERICHOST;
+
+ error = getnameinfo((struct sockaddr *)sin, sin->sin_len, addr_buf,
+ sizeof(addr_buf), NULL, 0, n_flags);
+
+ if (error)
+ inet_ntop(AF_INET, &sin->sin_addr, addr_buf, sizeof(addr_buf));
+
+ printf("\tinet %s", addr_buf);
if (ifa->ifa_flags & IFF_POINTOPOINT) {
sin = (struct sockaddr_in *)ifa->ifa_dstaddr;
if (sin == NULL)
sin = &null_sin;
- printf("--> %s ", inet_ntoa(sin->sin_addr));
+ printf(" --> %s ", inet_ntoa(sin->sin_addr));
}
sin = (struct sockaddr_in *)ifa->ifa_netmask;
if (sin == NULL)
sin = &null_sin;
- printf("netmask 0x%lx ", (unsigned long)ntohl(sin->sin_addr.s_addr));
+ if (f_inet != NULL && strcmp(f_inet, "cidr") == 0) {
+ int cidr = 32;
+ unsigned long smask;
+
+ smask = ntohl(sin->sin_addr.s_addr);
+ while ((smask & 1) == 0) {
+ smask = smask >> 1;
+ cidr--;
+ if (cidr == 0)
+ break;
+ }
+ printf("/%d ", cidr);
+ } else if (f_inet != NULL && strcmp(f_inet, "dotted") == 0)
+ printf(" netmask %s ", inet_ntoa(sin->sin_addr));
+ else
+ printf(" netmask 0x%lx ", (unsigned long)ntohl(sin->sin_addr.s_addr));
if (ifa->ifa_flags & IFF_BROADCAST) {
sin = (struct sockaddr_in *)ifa->ifa_broadaddr;
if (sin != NULL && sin->sin_addr.s_addr != 0)
- printf("broadcast %s", inet_ntoa(sin->sin_addr));
+ printf("broadcast %s ", inet_ntoa(sin->sin_addr));
}
+
+ print_vhid(ifa, " ");
+
putchar('\n');
}
@@ -100,7 +143,6 @@ static struct sockaddr_in *sintab[] = {
static void
in_getaddr(const char *s, int which)
{
-#define MIN(a,b) ((a)<(b)?(a):(b))
struct sockaddr_in *sin = sintab[which];
struct hostent *hp;
struct netent *np;
@@ -117,7 +159,11 @@ in_getaddr(const char *s, int which)
int masklen;
struct sockaddr_in *min = sintab[MASK];
*p = '\0';
+#ifndef __rtems__
+ if (!isdigit(*(p + 1)))
+#else /* __rtems__ */
if (!isdigit((unsigned char)*(p + 1)))
+#endif /* __rtems__ */
errstr = "invalid";
else
masklen = (int)strtonum(p + 1, 0, 32, &errstr);
@@ -134,14 +180,13 @@ in_getaddr(const char *s, int which)
if (inet_aton(s, &sin->sin_addr))
return;
- if ((hp = gethostbyname(s)) != 0)
+ if ((hp = gethostbyname(s)) != NULL)
bcopy(hp->h_addr, (char *)&sin->sin_addr,
MIN((size_t)hp->h_length, sizeof(sin->sin_addr)));
- else if ((np = getnetbyname(s)) != 0)
+ else if ((np = getnetbyname(s)) != NULL)
sin->sin_addr = inet_makeaddr(np->n_net, INADDR_ANY);
else
errx(1, "%s: bad value", s);
-#undef MIN
}
static void
@@ -153,7 +198,7 @@ in_status_tunnel(int s)
const struct sockaddr *sa = (const struct sockaddr *) &ifr.ifr_addr;
memset(&ifr, 0, sizeof(ifr));
- strncpy(ifr.ifr_name, name, IFNAMSIZ);
+ strlcpy(ifr.ifr_name, name, IFNAMSIZ);
if (ioctl(s, SIOCGIFPSRCADDR, (caddr_t)&ifr) < 0)
return;
@@ -178,7 +223,7 @@ in_set_tunnel(int s, struct addrinfo *srcres, struct addrinfo *dstres)
struct in_aliasreq addreq;
memset(&addreq, 0, sizeof(addreq));
- strncpy(addreq.ifra_name, name, IFNAMSIZ);
+ strlcpy(addreq.ifra_name, name, IFNAMSIZ);
memcpy(&addreq.ifra_addr, srcres->ai_addr, srcres->ai_addr->sa_len);
memcpy(&addreq.ifra_dstaddr, dstres->ai_addr, dstres->ai_addr->sa_len);
@@ -211,11 +256,5 @@ inet_ctor(void)
if (!feature_present("inet"))
return;
#endif
-
-#ifdef __rtems__
- memset(&in_addreq, 0, sizeof(in_addreq));
- memset(&in_ridreq, 0, sizeof(in_ridreq));
-#endif /* __rtems__ */
-
af_register(&af_inet);
}
diff --git a/freebsd/sbin/ifconfig/af_inet6.c b/freebsd/sbin/ifconfig/af_inet6.c
index 0e099ada..f743ee8f 100644
--- a/freebsd/sbin/ifconfig/af_inet6.c
+++ b/freebsd/sbin/ifconfig/af_inet6.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1983, 1993
* The Regents of the University of California. All rights reserved.
@@ -34,6 +38,9 @@ static const char rcsid[] =
"$FreeBSD$";
#endif /* not lint */
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -45,12 +52,12 @@ static const char rcsid[] =
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <time.h>
#include <ifaddrs.h>
#include <arpa/inet.h>
#include <netinet/in.h>
-#include <net/if_var.h> /* for struct ifaddr */
#include <netinet/in_var.h>
#include <arpa/inet.h>
#include <netdb.h>
@@ -58,23 +65,26 @@ static const char rcsid[] =
#include <netinet6/nd6.h> /* Define ND6_INFINITE_LIFETIME */
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-af_inet6-data.h"
+#endif /* __rtems__ */
static struct in6_ifreq in6_ridreq;
-static struct in6_aliasreq in6_addreq =
- { .ifra_flags = 0,
+static struct in6_aliasreq in6_addreq =
+ { .ifra_flags = 0,
.ifra_lifetime = { 0, 0, ND6_INFINITE_LIFETIME, ND6_INFINITE_LIFETIME } };
static int ip6lifetime;
-static void in6_fillscopeid(struct sockaddr_in6 *sin6);
static int prefix(void *, int);
static char *sec2str(time_t);
static int explicit_prefix = 0;
+extern char *f_inet6, *f_addr;
extern void setnd6flags(const char *, int, int, const struct afswtch *);
extern void setnd6defif(const char *, int, int, const struct afswtch *);
extern void nd6_status(int);
-static char addr_buf[MAXHOSTNAMELEN *2 + 1]; /*for getnameinfo()*/
+static char addr_buf[NI_MAXHOST]; /*for getnameinfo()*/
static void
setifprefixlen(const char *addr, int dummy __unused, int s,
@@ -102,20 +112,21 @@ static void
setip6lifetime(const char *cmd, const char *val, int s,
const struct afswtch *afp)
{
- time_t newval, t;
+ struct timespec now;
+ time_t newval;
char *ep;
- t = time(NULL);
+ clock_gettime(CLOCK_MONOTONIC_FAST, &now);
newval = (time_t)strtoul(val, &ep, 0);
if (val == ep)
errx(1, "invalid %s", cmd);
if (afp->af_af != AF_INET6)
errx(1, "%s not allowed for the AF", cmd);
if (strcmp(cmd, "vltime") == 0) {
- in6_addreq.ifra_lifetime.ia6t_expire = t + newval;
+ in6_addreq.ifra_lifetime.ia6t_expire = now.tv_sec + newval;
in6_addreq.ifra_lifetime.ia6t_vltime = newval;
} else if (strcmp(cmd, "pltime") == 0) {
- in6_addreq.ifra_lifetime.ia6t_preferred = t + newval;
+ in6_addreq.ifra_lifetime.ia6t_preferred = now.tv_sec + newval;
in6_addreq.ifra_lifetime.ia6t_pltime = newval;
}
}
@@ -169,18 +180,6 @@ setip6eui64(const char *cmd, int dummy __unused, int s,
}
static void
-in6_fillscopeid(struct sockaddr_in6 *sin6)
-{
-#if defined(__KAME__) && defined(KAME_SCOPEID)
- if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
- sin6->sin6_scope_id =
- ntohs(*(u_int16_t *)&sin6->sin6_addr.s6_addr[2]);
- sin6->sin6_addr.s6_addr[2] = sin6->sin6_addr.s6_addr[3] = 0;
- }
-#endif
-}
-
-static void
in6_status(int s __unused, const struct ifaddrs *ifa)
{
struct sockaddr_in6 *sin, null_sin;
@@ -188,9 +187,10 @@ in6_status(int s __unused, const struct ifaddrs *ifa)
int s6;
u_int32_t flags6;
struct in6_addrlifetime lifetime;
- time_t t = time(NULL);
- int error;
- u_int32_t scopeid;
+ struct timespec now;
+ int error, n_flags;
+
+ clock_gettime(CLOCK_MONOTONIC_FAST, &now);
memset(&null_sin, 0, sizeof(null_sin));
@@ -198,7 +198,7 @@ in6_status(int s __unused, const struct ifaddrs *ifa)
if (sin == NULL)
return;
- strncpy(ifr6.ifr_name, ifr.ifr_name, sizeof(ifr.ifr_name));
+ strlcpy(ifr6.ifr_name, ifr.ifr_name, sizeof(ifr.ifr_name));
if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
warn("socket(AF_INET6,SOCK_DGRAM)");
return;
@@ -220,24 +220,19 @@ in6_status(int s __unused, const struct ifaddrs *ifa)
lifetime = ifr6.ifr_ifru.ifru_lifetime;
close(s6);
- /* XXX: embedded link local addr check */
- if (IN6_IS_ADDR_LINKLOCAL(&sin->sin6_addr) &&
- *(u_short *)&sin->sin6_addr.s6_addr[2] != 0) {
- u_short index;
-
- index = *(u_short *)&sin->sin6_addr.s6_addr[2];
- *(u_short *)&sin->sin6_addr.s6_addr[2] = 0;
- if (sin->sin6_scope_id == 0)
- sin->sin6_scope_id = ntohs(index);
- }
- scopeid = sin->sin6_scope_id;
-
- error = getnameinfo((struct sockaddr *)sin, sin->sin6_len, addr_buf,
- sizeof(addr_buf), NULL, 0, NI_NUMERICHOST);
+ if (f_addr != NULL && strcmp(f_addr, "fqdn") == 0)
+ n_flags = 0;
+ else if (f_addr != NULL && strcmp(f_addr, "host") == 0)
+ n_flags = NI_NOFQDN;
+ else
+ n_flags = NI_NUMERICHOST;
+ error = getnameinfo((struct sockaddr *)sin, sin->sin6_len,
+ addr_buf, sizeof(addr_buf), NULL, 0,
+ n_flags);
if (error != 0)
inet_ntop(AF_INET6, &sin->sin6_addr, addr_buf,
sizeof(addr_buf));
- printf("\tinet6 %s ", addr_buf);
+ printf("\tinet6 %s", addr_buf);
if (ifa->ifa_flags & IFF_POINTOPOINT) {
sin = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
@@ -248,17 +243,6 @@ in6_status(int s __unused, const struct ifaddrs *ifa)
if (sin != NULL && sin->sin6_family == AF_INET6) {
int error;
- /* XXX: embedded link local addr check */
- if (IN6_IS_ADDR_LINKLOCAL(&sin->sin6_addr) &&
- *(u_short *)&sin->sin6_addr.s6_addr[2] != 0) {
- u_short index;
-
- index = *(u_short *)&sin->sin6_addr.s6_addr[2];
- *(u_short *)&sin->sin6_addr.s6_addr[2] = 0;
- if (sin->sin6_scope_id == 0)
- sin->sin6_scope_id = ntohs(index);
- }
-
error = getnameinfo((struct sockaddr *)sin,
sin->sin6_len, addr_buf,
sizeof(addr_buf), NULL, 0,
@@ -266,15 +250,19 @@ in6_status(int s __unused, const struct ifaddrs *ifa)
if (error != 0)
inet_ntop(AF_INET6, &sin->sin6_addr, addr_buf,
sizeof(addr_buf));
- printf("--> %s ", addr_buf);
+ printf(" --> %s ", addr_buf);
}
}
sin = (struct sockaddr_in6 *)ifa->ifa_netmask;
if (sin == NULL)
sin = &null_sin;
- printf("prefixlen %d ", prefix(&sin->sin6_addr,
- sizeof(struct in6_addr)));
+ if (f_inet6 != NULL && strcmp(f_inet6, "cidr") == 0)
+ printf("/%d ", prefix(&sin->sin6_addr,
+ sizeof(struct in6_addr)));
+ else
+ printf(" prefixlen %d ", prefix(&sin->sin6_addr,
+ sizeof(struct in6_addr)));
if ((flags6 & IN6_IFF_ANYCAST) != 0)
printf("anycast ");
@@ -293,25 +281,30 @@ in6_status(int s __unused, const struct ifaddrs *ifa)
if ((flags6 & IN6_IFF_PREFER_SOURCE) != 0)
printf("prefer_source ");
- if (scopeid)
- printf("scopeid 0x%x ", scopeid);
+ if (((struct sockaddr_in6 *)(ifa->ifa_addr))->sin6_scope_id)
+ printf("scopeid 0x%x ",
+ ((struct sockaddr_in6 *)(ifa->ifa_addr))->sin6_scope_id);
if (ip6lifetime && (lifetime.ia6t_preferred || lifetime.ia6t_expire)) {
printf("pltime ");
if (lifetime.ia6t_preferred) {
- printf("%s ", lifetime.ia6t_preferred < t
- ? "0" : sec2str(lifetime.ia6t_preferred - t));
+ printf("%s ", lifetime.ia6t_preferred < now.tv_sec
+ ? "0" :
+ sec2str(lifetime.ia6t_preferred - now.tv_sec));
} else
printf("infty ");
printf("vltime ");
if (lifetime.ia6t_expire) {
- printf("%s ", lifetime.ia6t_expire < t
- ? "0" : sec2str(lifetime.ia6t_expire - t));
+ printf("%s ", lifetime.ia6t_expire < now.tv_sec
+ ? "0" :
+ sec2str(lifetime.ia6t_expire - now.tv_sec));
} else
printf("infty ");
}
+ print_vhid(ifa, " ");
+
putchar('\n');
}
@@ -389,25 +382,25 @@ done:
static int
prefix(void *val, int size)
{
- u_char *name = (u_char *)val;
- int byte, bit, plen = 0;
+ u_char *name = (u_char *)val;
+ int byte, bit, plen = 0;
- for (byte = 0; byte < size; byte++, plen += 8)
- if (name[byte] != 0xff)
- break;
+ for (byte = 0; byte < size; byte++, plen += 8)
+ if (name[byte] != 0xff)
+ break;
if (byte == size)
return (plen);
for (bit = 7; bit != 0; bit--, plen++)
- if (!(name[byte] & (1 << bit)))
- break;
- for (; bit != 0; bit--)
- if (name[byte] & (1 << bit))
- return(0);
- byte++;
- for (; byte < size; byte++)
- if (name[byte])
- return(0);
- return (plen);
+ if (!(name[byte] & (1 << bit)))
+ break;
+ for (; bit != 0; bit--)
+ if (name[byte] & (1 << bit))
+ return(0);
+ byte++;
+ for (; byte < size; byte++)
+ if (name[byte])
+ return(0);
+ return (plen);
}
static char *
@@ -464,13 +457,12 @@ in6_status_tunnel(int s)
const struct sockaddr *sa = (const struct sockaddr *) &in6_ifr.ifr_addr;
memset(&in6_ifr, 0, sizeof(in6_ifr));
- strncpy(in6_ifr.ifr_name, name, IFNAMSIZ);
+ strlcpy(in6_ifr.ifr_name, name, sizeof(in6_ifr.ifr_name));
if (ioctl(s, SIOCGIFPSRCADDR_IN6, (caddr_t)&in6_ifr) < 0)
return;
if (sa->sa_family != AF_INET6)
return;
- in6_fillscopeid(&in6_ifr.ifr_addr);
if (getnameinfo(sa, sa->sa_len, src, sizeof(src), 0, 0,
NI_NUMERICHOST) != 0)
src[0] = '\0';
@@ -479,7 +471,6 @@ in6_status_tunnel(int s)
return;
if (sa->sa_family != AF_INET6)
return;
- in6_fillscopeid(&in6_ifr.ifr_addr);
if (getnameinfo(sa, sa->sa_len, dst, sizeof(dst), 0, 0,
NI_NUMERICHOST) != 0)
dst[0] = '\0';
@@ -493,7 +484,7 @@ in6_set_tunnel(int s, struct addrinfo *srcres, struct addrinfo *dstres)
struct in6_aliasreq in6_addreq;
memset(&in6_addreq, 0, sizeof(in6_addreq));
- strncpy(in6_addreq.ifra_name, name, IFNAMSIZ);
+ strlcpy(in6_addreq.ifra_name, name, sizeof(in6_addreq.ifra_name));
memcpy(&in6_addreq.ifra_addr, srcres->ai_addr, srcres->ai_addr->sa_len);
memcpy(&in6_addreq.ifra_dstaddr, dstres->ai_addr,
dstres->ai_addr->sa_len);
@@ -527,6 +518,8 @@ static struct cmd inet6_cmds[] = {
DEF_CMD("-auto_linklocal",-ND6_IFF_AUTO_LINKLOCAL,setnd6flags),
DEF_CMD("no_prefer_iface",ND6_IFF_NO_PREFER_IFACE,setnd6flags),
DEF_CMD("-no_prefer_iface",-ND6_IFF_NO_PREFER_IFACE,setnd6flags),
+ DEF_CMD("no_dad", ND6_IFF_NO_DAD, setnd6flags),
+ DEF_CMD("-no_dad", -ND6_IFF_NO_DAD, setnd6flags),
DEF_CMD_ARG("pltime", setip6pltime),
DEF_CMD_ARG("vltime", setip6vltime),
DEF_CMD("eui64", 0, setip6eui64),
@@ -553,7 +546,11 @@ in6_Lopt_cb(const char *optarg __unused)
{
ip6lifetime++; /* print IPv6 address lifetime */
}
-static struct option in6_Lopt = { .opt = "L", .opt_usage = "[-L]", .cb = in6_Lopt_cb };
+static struct option in6_Lopt = {
+ .opt = "L",
+ .opt_usage = "[-L]",
+ .cb = in6_Lopt_cb
+};
#ifndef __rtems__
static __constructor void
@@ -562,16 +559,6 @@ void
#endif /* __rtems__ */
inet6_ctor(void)
{
-#ifdef __rtems__
- memset(&in6_ridreq, 0, sizeof(in6_ridreq));
- memset(&in6_addreq, 0, sizeof(in6_addreq));
- in6_addreq.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
- in6_addreq.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
- ip6lifetime = 0;
- explicit_prefix = 0;
- memset(&addr_buf, 0, sizeof(addr_buf));
-#endif /* __rtems__ */
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
#ifndef RESCUE
@@ -579,9 +566,8 @@ inet6_ctor(void)
return;
#endif
- for (i = 0; i < N(inet6_cmds); i++)
+ for (i = 0; i < nitems(inet6_cmds); i++)
cmd_register(&inet6_cmds[i]);
af_register(&af_inet6);
opt_register(&in6_Lopt);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/af_link.c b/freebsd/sbin/ifconfig/af_link.c
index ffd92e30..1c37496e 100644
--- a/freebsd/sbin/ifconfig/af_link.c
+++ b/freebsd/sbin/ifconfig/af_link.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1983, 1993
* The Regents of the University of California. All rights reserved.
@@ -34,6 +38,9 @@ static const char rcsid[] =
"$FreeBSD$";
#endif /* not lint */
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -50,23 +57,35 @@ static const char rcsid[] =
#include <net/ethernet.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-af_link-data.h"
+#endif /* __rtems__ */
static struct ifreq link_ridreq;
+extern char *f_ether;
+
static void
link_status(int s __unused, const struct ifaddrs *ifa)
{
/* XXX no const 'cuz LLADDR is defined wrong */
struct sockaddr_dl *sdl = (struct sockaddr_dl *) ifa->ifa_addr;
+ char *ether_format, *format_char;
if (sdl != NULL && sdl->sdl_alen > 0) {
if ((sdl->sdl_type == IFT_ETHER ||
sdl->sdl_type == IFT_L2VLAN ||
sdl->sdl_type == IFT_BRIDGE) &&
- sdl->sdl_alen == ETHER_ADDR_LEN)
- printf("\tether %s\n",
- ether_ntoa((struct ether_addr *)LLADDR(sdl)));
- else {
+ sdl->sdl_alen == ETHER_ADDR_LEN) {
+ ether_format = ether_ntoa((struct ether_addr *)LLADDR(sdl));
+ if (f_ether != NULL && strcmp(f_ether, "dash") == 0) {
+ for (format_char = strchr(ether_format, ':');
+ format_char != NULL;
+ format_char = strchr(ether_format, ':'))
+ *format_char = '-';
+ }
+ printf("\tether %s\n", ether_format);
+ } else {
int n = sdl->sdl_nlen > 0 ? sdl->sdl_nlen + 1 : 0;
printf("\tlladdr %s\n", link_ntoa(sdl) + n);
@@ -129,9 +148,6 @@ void
#endif /* __rtems__ */
link_ctor(void)
{
-#ifdef __rtems__
- memset(&link_ridreq, 0, sizeof(link_ridreq));
-#endif /* __rtems__ */
af_register(&af_link);
af_register(&af_ether);
af_register(&af_lladdr);
diff --git a/freebsd/sbin/ifconfig/af_nd6.c b/freebsd/sbin/ifconfig/af_nd6.c
index a1e930b0..ac302c65 100644
--- a/freebsd/sbin/ifconfig/af_nd6.c
+++ b/freebsd/sbin/ifconfig/af_nd6.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 2009 Hiroki Sato. All rights reserved.
*
@@ -30,6 +34,9 @@ static const char rcsid[] =
"$FreeBSD$";
#endif /* not lint */
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -48,7 +55,6 @@ static const char rcsid[] =
#include <arpa/inet.h>
#include <netinet/in.h>
-#include <net/if_var.h>
#include <netinet/in_var.h>
#include <arpa/inet.h>
#include <netdb.h>
@@ -56,11 +62,15 @@ static const char rcsid[] =
#include <netinet6/nd6.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-af_nd6-data.h"
+#endif /* __rtems__ */
#define MAX_SYSCTL_TRY 5
#define ND6BITS "\020\001PERFORMNUD\002ACCEPT_RTADV\003PREFER_SOURCE" \
"\004IFDISABLED\005DONT_SET_IFROUTE\006AUTO_LINKLOCAL" \
- "\007NO_RADR\010NO_PREFER_IFACE\020DEFAULTIF"
+ "\007NO_RADR\010NO_PREFER_IFACE\011IGNORELOOP\012NO_DAD" \
+ "\020DEFAULTIF"
static int isnd6defif(int);
void setnd6flags(const char *, int, int, const struct afswtch *);
@@ -76,7 +86,7 @@ setnd6flags(const char *dummyaddr __unused,
int error;
memset(&nd, 0, sizeof(nd));
- strncpy(nd.ifname, ifr.ifr_name, sizeof(nd.ifname));
+ strlcpy(nd.ifname, ifr.ifr_name, sizeof(nd.ifname));
error = ioctl(s, SIOCGIFINFO_IN6, &nd);
if (error) {
warn("ioctl(SIOCGIFINFO_IN6)");
@@ -101,7 +111,7 @@ setnd6defif(const char *dummyaddr __unused,
int error;
memset(&ndifreq, 0, sizeof(ndifreq));
- strncpy(ndifreq.ifname, ifr.ifr_name, sizeof(ndifreq.ifname));
+ strlcpy(ndifreq.ifname, ifr.ifr_name, sizeof(ndifreq.ifname));
if (d < 0) {
if (isnd6defif(s)) {
@@ -128,7 +138,7 @@ isnd6defif(int s)
int error;
memset(&ndifreq, 0, sizeof(ndifreq));
- strncpy(ndifreq.ifname, ifr.ifr_name, sizeof(ndifreq.ifname));
+ strlcpy(ndifreq.ifname, ifr.ifr_name, sizeof(ndifreq.ifname));
ifindex = if_nametoindex(ndifreq.ifname);
error = ioctl(s, SIOCGDEFIFACE_IN6, (caddr_t)&ndifreq);
@@ -148,7 +158,7 @@ nd6_status(int s)
int isdefif;
memset(&nd, 0, sizeof(nd));
- strncpy(nd.ifname, ifr.ifr_name, sizeof(nd.ifname));
+ strlcpy(nd.ifname, ifr.ifr_name, sizeof(nd.ifname));
if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
if (errno != EAFNOSUPPORT && errno != EPROTONOSUPPORT)
warn("socket(AF_INET6, SOCK_DGRAM)");
diff --git a/freebsd/sbin/ifconfig/ifbridge.c b/freebsd/sbin/ifconfig/ifbridge.c
index 9a4d868d..26412a02 100644
--- a/freebsd/sbin/ifconfig/ifbridge.c
+++ b/freebsd/sbin/ifconfig/ifbridge.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright 2001 Wasabi Systems, Inc.
* All rights reserved.
@@ -40,6 +44,9 @@ static const char rcsid[] =
"$FreeBSD$";
#endif /* not lint */
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -62,6 +69,9 @@ static const char rcsid[] =
#include <errno.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifbridge-data.h"
+#endif /* __rtems__ */
#define PV2ID(pv, epri, eaddr) do { \
epri = pv >> 48; \
@@ -73,7 +83,7 @@ static const char rcsid[] =
eaddr[5] = pv >> 0; \
} while (0)
-static const char *const stpstates[] = {
+static const char *stpstates[] = {
"disabled",
"listening",
"learning",
@@ -81,12 +91,12 @@ static const char *const stpstates[] = {
"blocking",
"discarding"
};
-static const char *const stpproto[] = {
+static const char *stpproto[] = {
"stp",
"-",
"rstp"
};
-static const char *const stproles[] = {
+static const char *stproles[] = {
"disabled",
"root",
"designated",
@@ -157,7 +167,11 @@ bridge_interfaces(int s, const char *prefix)
err(1, "strdup");
/* replace the prefix with whitespace */
for (p = pad; *p != '\0'; p++) {
+#ifndef __rtems__
+ if(isprint(*p))
+#else /* __rtems__ */
if(isprint((unsigned char)*p))
+#endif /* __rtems__ */
*p = ' ';
}
@@ -187,22 +201,19 @@ bridge_interfaces(int s, const char *prefix)
printf(" path cost %u", req->ifbr_path_cost);
if (req->ifbr_ifsflags & IFBIF_STP) {
- if (req->ifbr_proto <
- sizeof(stpproto) / sizeof(stpproto[0]))
+ if (req->ifbr_proto < nitems(stpproto))
printf(" proto %s", stpproto[req->ifbr_proto]);
else
printf(" <unknown proto %d>",
req->ifbr_proto);
printf("\n%s", pad);
- if (req->ifbr_role <
- sizeof(stproles) / sizeof(stproles[0]))
+ if (req->ifbr_role < nitems(stproles))
printf("role %s", stproles[req->ifbr_role]);
else
printf("<unknown role %d>",
req->ifbr_role);
- if (req->ifbr_state <
- sizeof(stpstates) / sizeof(stpstates[0]))
+ if (req->ifbr_state < nitems(stpstates))
printf(" state %s", stpstates[req->ifbr_state]);
else
printf(" <unknown state %d>",
@@ -755,11 +766,9 @@ void
#endif /* __rtems__ */
bridge_ctor(void)
{
-#define N(a) (sizeof(a) / sizeof(a[0]))
int i;
- for (i = 0; i < N(bridge_cmds); i++)
+ for (i = 0; i < nitems(bridge_cmds); i++)
cmd_register(&bridge_cmds[i]);
af_register(&af_bridge);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifcarp.c b/freebsd/sbin/ifconfig/ifcarp.c
index d0f3616d..61e3fa36 100644
--- a/freebsd/sbin/ifconfig/ifcarp.c
+++ b/freebsd/sbin/ifconfig/ifcarp.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/* $FreeBSD$ */
/* from $OpenBSD: ifconfig.c,v 1.82 2003/10/19 05:43:35 mcbride Exp $ */
@@ -29,6 +33,9 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -51,6 +58,9 @@
#include <errno.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifcarp-data.h"
+#endif /* __rtems__ */
static const char *const carp_states[] = { CARP_STATES };
diff --git a/freebsd/sbin/ifconfig/ifclone.c b/freebsd/sbin/ifconfig/ifclone.c
index 8fb9d6c3..6dcd344c 100644
--- a/freebsd/sbin/ifconfig/ifclone.c
+++ b/freebsd/sbin/ifconfig/ifclone.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1983, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,20 +39,11 @@ static const char rcsid[] =
#endif /* not lint */
#ifdef __rtems__
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#endif /* __rtems__ */
-#include <sys/queue.h>
-#include <sys/types.h>
+#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
+#include <sys/queue.h>
#include <sys/socket.h>
#include <net/if.h>
@@ -59,6 +54,9 @@ static const char rcsid[] =
#include <unistd.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifclone-data.h"
+#endif /* __rtems__ */
static void
list_cloners(void)
@@ -84,10 +82,8 @@ list_cloners(void)
ifcr.ifcr_count = ifcr.ifcr_total;
ifcr.ifcr_buffer = buf;
- if (ioctl(s, SIOCIFGCLONERS, &ifcr) < 0) {
- free(buf);
+ if (ioctl(s, SIOCIFGCLONERS, &ifcr) < 0)
err(1, "SIOCIFGCLONERS for names");
- }
/*
* In case some disappeared in the mean time, clamp it down.
@@ -111,7 +107,11 @@ struct clone_defcb {
SLIST_ENTRY(clone_defcb) next;
};
+#ifndef __rtems__
static SLIST_HEAD(, clone_defcb) clone_defcbh =
+#else /* __rtems__ */
+static SLIST_HEAD(clone_defcb_list, clone_defcb) clone_defcbh =
+#endif /* __rtems__ */
SLIST_HEAD_INITIALIZER(clone_defcbh);
void
@@ -165,11 +165,12 @@ ifclonecreate(int s, void *arg)
}
/*
- * If we get a different name back than we put in, print it.
+ * If we get a different name back than we put in, update record and
+ * indicate it should be printed later.
*/
if (strncmp(name, ifr.ifr_name, sizeof(name)) != 0) {
strlcpy(name, ifr.ifr_name, sizeof(name));
- printf("%s\n", name);
+ printifname = 1;
}
}
@@ -182,7 +183,7 @@ DECL_CMD_FUNC(clone_create, arg, d)
static
DECL_CMD_FUNC(clone_destroy, arg, d)
{
- (void) strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ (void) strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
if (ioctl(s, SIOCIFDESTROY, &ifr) < 0)
err(1, "SIOCIFDESTROY");
}
@@ -209,26 +210,9 @@ void
#endif /* __rtems__ */
clone_ctor(void)
{
-#ifdef __rtems__
- SLIST_INIT(&clone_defcbh);
-#endif /* __rtems__ */
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
- for (i = 0; i < N(clone_cmds); i++)
+ for (i = 0; i < nitems(clone_cmds); i++)
cmd_register(&clone_cmds[i]);
opt_register(&clone_Copt);
-#undef N
}
-#ifdef __rtems__
-void
-clone_dtor(void)
-{
- struct clone_defcb *dcp;
- struct clone_defcb *dcp_tmp;
-
- SLIST_FOREACH_SAFE(dcp, &clone_defcbh, next, dcp_tmp) {
- free(dcp);
- }
-}
-#endif /* __rtems__ */
diff --git a/freebsd/sbin/ifconfig/ifconfig.c b/freebsd/sbin/ifconfig/ifconfig.c
index 041e53d0..ad199be8 100644
--- a/freebsd/sbin/ifconfig/ifconfig.c
+++ b/freebsd/sbin/ifconfig/ifconfig.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1983, 1993
* The Regents of the University of California. All rights reserved.
@@ -48,28 +52,19 @@ static const char rcsid[] =
#define option getopt_option
#include <getopt.h>
#undef option
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#include <machine/rtems-bsd-commands.h>
#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/time.h>
#include <sys/module.h>
#include <sys/linker.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/time.h>
#include <net/ethernet.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -85,13 +80,18 @@ static const char rcsid[] =
#include <err.h>
#include <errno.h>
#include <fcntl.h>
+#ifdef JAIL
#include <jail.h>
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifconfig-data.h"
+#endif /* __rtems__ */
/*
* Since "struct ifreq" is composed of various union members, callers
@@ -110,10 +110,14 @@ int clearaddr;
int newaddr = 1;
int verbose;
int noload;
+int printifname = 0;
int supmedia = 0;
int printkeys = 0; /* Print keying material for interfaces. */
+/* Formatter Strings */
+char *f_inet, *f_inet6, *f_ether, *f_addr;
+
static int ifconfig(int argc, char *const *argv, int iscreate,
const struct afswtch *afp);
static void status(const struct afswtch *afp, const struct sockaddr_dl *sdl,
@@ -125,8 +129,19 @@ static struct afswtch *af_getbyname(const char *name);
static struct afswtch *af_getbyfamily(int af);
static void af_other_status(int);
+void printifnamemaybe(void);
+
static struct option *opts = NULL;
+struct ifa_order_elt {
+ int if_order;
+ int af_orders[255];
+ struct ifaddrs *ifa;
+ TAILQ_ENTRY(ifa_order_elt) link;
+};
+
+TAILQ_HEAD(ifa_queue, ifa_order_elt);
+
void
opt_register(struct option *p)
{
@@ -148,8 +163,8 @@ usage(void)
}
fprintf(stderr,
- "usage: ifconfig %sinterface address_family [address [dest_address]]\n"
- " [parameters]\n"
+ "usage: ifconfig [-f type:format] %sinterface address_family\n"
+ " [address [dest_address]] [parameters]\n"
" ifconfig interface create\n"
" ifconfig -a %s[-d] [-m] [-u] [-v] [address_family]\n"
" ifconfig -l [-d] [-u] [address_family]\n"
@@ -158,39 +173,241 @@ usage(void)
exit(1);
}
+#define ORDERS_SIZE(x) sizeof(x) / sizeof(x[0])
+
+static int
+calcorders(struct ifaddrs *ifa, struct ifa_queue *q)
+{
+ struct ifaddrs *prev;
+ struct ifa_order_elt *cur;
+ unsigned int ord, af, ifa_ord;
+
+ prev = NULL;
+ cur = NULL;
+ ord = 0;
+ ifa_ord = 0;
+
+ while (ifa != NULL) {
+ if (prev == NULL ||
+ strcmp(ifa->ifa_name, prev->ifa_name) != 0) {
+ cur = calloc(1, sizeof(*cur));
+
+ if (cur == NULL)
+ return (-1);
+
+ TAILQ_INSERT_TAIL(q, cur, link);
+ cur->if_order = ifa_ord ++;
+ cur->ifa = ifa;
+ ord = 0;
+ }
+
+ if (ifa->ifa_addr) {
+ af = ifa->ifa_addr->sa_family;
+
+ if (af < ORDERS_SIZE(cur->af_orders) &&
+ cur->af_orders[af] == 0)
+ cur->af_orders[af] = ++ord;
+ }
+ prev = ifa;
+ ifa = ifa->ifa_next;
+ }
+
+ return (0);
+}
+
+static int
+cmpifaddrs(struct ifaddrs *a, struct ifaddrs *b, struct ifa_queue *q)
+{
+ struct ifa_order_elt *cur, *e1, *e2;
+ unsigned int af1, af2;
+ int ret;
+
+ e1 = e2 = NULL;
+
+ ret = strcmp(a->ifa_name, b->ifa_name);
+ if (ret != 0) {
+ TAILQ_FOREACH(cur, q, link) {
+ if (e1 && e2)
+ break;
+
+ if (strcmp(cur->ifa->ifa_name, a->ifa_name) == 0)
+ e1 = cur;
+ else if (strcmp(cur->ifa->ifa_name, b->ifa_name) == 0)
+ e2 = cur;
+ }
+
+ if (!e1 || !e2)
+ return (0);
+ else
+ return (e1->if_order - e2->if_order);
+
+ } else if (a->ifa_addr != NULL && b->ifa_addr != NULL) {
+ TAILQ_FOREACH(cur, q, link) {
+ if (strcmp(cur->ifa->ifa_name, a->ifa_name) == 0) {
+ e1 = cur;
+ break;
+ }
+ }
+
+ if (!e1)
+ return (0);
+
+ af1 = a->ifa_addr->sa_family;
+ af2 = b->ifa_addr->sa_family;
+
+ if (af1 < ORDERS_SIZE(e1->af_orders) &&
+ af2 < ORDERS_SIZE(e1->af_orders))
+ return (e1->af_orders[af1] - e1->af_orders[af2]);
+ }
+
+ return (0);
+}
+
+static void freeformat(void)
+{
+
+ if (f_inet != NULL)
+ free(f_inet);
+ if (f_inet6 != NULL)
+ free(f_inet6);
+ if (f_ether != NULL)
+ free(f_ether);
+ if (f_addr != NULL)
+ free(f_addr);
+}
+
+static void setformat(char *input)
+{
+ char *formatstr, *category, *modifier;
+
+ formatstr = strdup(input);
+ while ((category = strsep(&formatstr, ",")) != NULL) {
+ modifier = strchr(category, ':');
+ if (modifier == NULL || modifier[1] == '\0') {
+ warnx("Skipping invalid format specification: %s\n",
+ category);
+ continue;
+ }
+
+ /* Split the string on the separator, then seek past it */
+ modifier[0] = '\0';
+ modifier++;
+
+ if (strcmp(category, "addr") == 0)
+ f_addr = strdup(modifier);
+ else if (strcmp(category, "ether") == 0)
+ f_ether = strdup(modifier);
+ else if (strcmp(category, "inet") == 0)
+ f_inet = strdup(modifier);
+ else if (strcmp(category, "inet6") == 0)
+ f_inet6 = strdup(modifier);
+ }
+ free(formatstr);
+}
+
+#undef ORDERS_SIZE
+
+static struct ifaddrs *
+sortifaddrs(struct ifaddrs *list,
+ int (*compare)(struct ifaddrs *, struct ifaddrs *, struct ifa_queue *),
+ struct ifa_queue *q)
+{
+ struct ifaddrs *right, *temp, *last, *result, *next, *tail;
+
+ right = list;
+ temp = list;
+ last = list;
+ result = NULL;
+ next = NULL;
+ tail = NULL;
+
+ if (!list || !list->ifa_next)
+ return (list);
+
+ while (temp && temp->ifa_next) {
+ last = right;
+ right = right->ifa_next;
+ temp = temp->ifa_next->ifa_next;
+ }
+
+ last->ifa_next = NULL;
+
+ list = sortifaddrs(list, compare, q);
+ right = sortifaddrs(right, compare, q);
+
+ while (list || right) {
+
+ if (!right) {
+ next = list;
+ list = list->ifa_next;
+ } else if (!list) {
+ next = right;
+ right = right->ifa_next;
+ } else if (compare(list, right, q) <= 0) {
+ next = list;
+ list = list->ifa_next;
+ } else {
+ next = right;
+ right = right->ifa_next;
+ }
+
+ if (!result)
+ result = next;
+ else
+ tail->ifa_next = next;
+
+ tail = next;
+ }
+
+ return (result);
+}
+
+void printifnamemaybe()
+{
+ if (printifname)
+ printf("%s\n", name);
+}
+
#ifdef __rtems__
static void ifconfig_ctor(void);
-static void ifconfig_dtor(void);
static int main(int argc, char *argv[]);
-int rtems_bsd_command_ifconfig(int argc, char *argv[])
+static int
+mainwrapper(int argc, char *argv[])
{
- int exit_code;
-
- rtems_bsd_program_lock();
-
ifconfig_ctor();
-
bridge_ctor();
- carp_ctor();
clone_ctor();
gif_ctor();
gre_ctor();
group_ctor();
ifmedia_ctor();
- inet_ctor();
inet6_ctor();
+ inet_ctor();
lagg_ctor();
link_ctor();
mac_ctor();
pfsync_ctor();
vlan_ctor();
- exit_code = rtems_bsd_program_call_main("ifconfig", main, argc, argv);
+ return main(argc, argv);
+}
+
+RTEMS_LINKER_RWSET(bsd_prog_ifconfig, char);
+
+int
+rtems_bsd_command_ifconfig(int argc, char *argv[])
+{
+ int exit_code;
+ const void *data_begin;
+ size_t data_size;
- clone_dtor();
- ifconfig_dtor();
+ data_begin = RTEMS_LINKER_SET_BEGIN(bsd_prog_ifconfig);
+ data_size = RTEMS_LINKER_SET_SIZE(bsd_prog_ifconfig);
+ rtems_bsd_program_lock();
+ exit_code = rtems_bsd_program_call_main_with_data_restore("ifconfig",
+ mainwrapper, argc, argv, data_begin, data_size);
rtems_bsd_program_unlock();
return exit_code;
@@ -202,10 +419,12 @@ main(int argc, char *argv[])
int c, all, namesonly, downonly, uponly;
const struct afswtch *afp = NULL;
int ifindex;
- struct ifaddrs *ifap, *ifa;
+ struct ifaddrs *ifap, *sifap, *ifa;
struct ifreq paifr;
const struct sockaddr_dl *sdl;
- char options[1024], *cp, *namecp = NULL;
+ char options[1024], *cp, *envformat, *namecp = NULL;
+ struct ifa_queue q = TAILQ_HEAD_INITIALIZER(q);
+ struct ifa_order_elt *cur, *tmp;
const char *ifname;
struct option *p;
size_t iflen;
@@ -220,12 +439,23 @@ main(int argc, char *argv[])
#endif /* __rtems__ */
all = downonly = uponly = namesonly = noload = verbose = 0;
+ f_inet = f_inet6 = f_ether = f_addr = NULL;
+
+ envformat = getenv("IFCONFIG_FORMAT");
+ if (envformat != NULL)
+ setformat(envformat);
+
+ /*
+ * Ensure we print interface name when expected to,
+ * even if we terminate early due to error.
+ */
+ atexit(printifnamemaybe);
/* Parse leading line options */
#ifndef __rtems__
- strlcpy(options, "adklmnuv", sizeof(options));
+ strlcpy(options, "f:adklmnuv", sizeof(options));
#else /* __rtems__ */
- strlcpy(options, "+adklmnuv", sizeof(options));
+ strlcpy(options, "+f:adklmnuv", sizeof(options));
#endif /* __rtems__ */
for (p = opts; p != NULL; p = p->next)
strlcat(options, p->opt, sizeof(options));
@@ -237,6 +467,11 @@ main(int argc, char *argv[])
case 'd': /* restrict scan to "down" interfaces */
downonly++;
break;
+ case 'f':
+ if (optarg == NULL)
+ usage();
+ setformat(optarg);
+ break;
case 'k':
printkeys++;
break;
@@ -325,6 +560,7 @@ main(int argc, char *argv[])
ifconfig(argc, argv, 1, NULL);
exit(0);
}
+#ifdef JAIL
/*
* NOTE: We have to special-case the `-vnet' command
* right here as we would otherwise fail when trying
@@ -338,6 +574,7 @@ main(int argc, char *argv[])
ifconfig(argc, argv, 0, NULL);
exit(0);
}
+#endif
errx(1, "interface %s does not exist", ifname);
}
}
@@ -351,11 +588,21 @@ main(int argc, char *argv[])
if (getifaddrs(&ifap) != 0)
err(EXIT_FAILURE, "getifaddrs");
+
cp = NULL;
+
+ if (calcorders(ifap, &q) != 0)
+ err(EXIT_FAILURE, "calcorders");
+
+ sifap = sortifaddrs(ifap, cmpifaddrs, &q);
+
+ TAILQ_FOREACH_SAFE(cur, &q, link, tmp)
+ free(cur);
+
ifindex = 0;
- for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
+ for (ifa = sifap; ifa; ifa = ifa->ifa_next) {
memset(&paifr, 0, sizeof(paifr));
- strncpy(paifr.ifr_name, ifa->ifa_name, sizeof(paifr.ifr_name));
+ strlcpy(paifr.ifr_name, ifa->ifa_name, sizeof(paifr.ifr_name));
if (sizeof(paifr.ifr_addr) >= ifa->ifa_addr->sa_len) {
memcpy(&paifr.ifr_addr, ifa->ifa_addr,
ifa->ifa_addr->sa_len);
@@ -399,7 +646,8 @@ main(int argc, char *argv[])
sdl->sdl_alen != ETHER_ADDR_LEN)
continue;
} else {
- if (ifa->ifa_addr->sa_family != afp->af_af)
+ if (ifa->ifa_addr->sa_family
+ != afp->af_af)
continue;
}
}
@@ -421,6 +669,7 @@ main(int argc, char *argv[])
printf("\n");
freeifaddrs(ifap);
+ freeformat();
exit(0);
}
@@ -501,7 +750,6 @@ cmd_register(struct cmd *p)
static const struct cmd *
cmd_lookup(const char *name, int iscreate)
{
-#define N(a) (sizeof(a)/sizeof(a[0]))
const struct cmd *p;
for (p = cmds; p != NULL; p = p->c_next)
@@ -515,7 +763,6 @@ cmd_lookup(const char *name, int iscreate)
}
}
return NULL;
-#undef N
}
struct callback {
@@ -555,7 +802,7 @@ ifconfig(int argc, char *const *argv, int iscreate, const struct afswtch *uafp)
struct callback *cb;
int s;
- strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
+ strlcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
afp = NULL;
if (uafp != NULL)
afp = uafp;
@@ -586,7 +833,7 @@ top:
AF_LOCAL : afp->af_af;
if ((s = socket(ifr.ifr_addr.sa_family, SOCK_DGRAM, 0)) < 0 &&
- (uafp != NULL || errno != EPROTONOSUPPORT ||
+ (uafp != NULL || errno != EAFNOSUPPORT ||
(s = socket(AF_LOCAL, SOCK_DGRAM, 0)) < 0))
err(1, "socket(family %u,SOCK_DGRAM", ifr.ifr_addr.sa_family);
@@ -676,7 +923,8 @@ top:
}
if (clearaddr) {
int ret;
- strncpy(afp->af_ridreq, name, sizeof ifr.ifr_name);
+ strlcpy(((struct ifreq *)afp->af_ridreq)->ifr_name, name,
+ sizeof ifr.ifr_name);
ret = ioctl(s, afp->af_difaddr, afp->af_ridreq);
if (ret < 0) {
if (errno == EADDRNOTAVAIL && (doalias >= 0)) {
@@ -693,7 +941,8 @@ top:
}
}
if (newaddr && (setaddr || setmask)) {
- strncpy(afp->af_addreq, name, sizeof ifr.ifr_name);
+ strlcpy(((struct ifreq *)afp->af_addreq)->ifr_name, name,
+ sizeof ifr.ifr_name);
if (ioctl(s, afp->af_aifaddr, afp->af_addreq) < 0)
Perror("ioctl (SIOCAIFADDR)");
}
@@ -735,7 +984,7 @@ settunnel(const char *src, const char *dst, int s, const struct afswtch *afp)
errx(1, "error in parsing address string: %s",
gai_strerror(ecode));
- if ((ecode = getaddrinfo(dst, NULL, NULL, &dstres)) != 0)
+ if ((ecode = getaddrinfo(dst, NULL, NULL, &dstres)) != 0)
errx(1, "error in parsing address string: %s",
gai_strerror(ecode));
@@ -758,7 +1007,7 @@ deletetunnel(const char *vname, int param, int s, const struct afswtch *afp)
err(1, "SIOCDIFPHYADDR");
}
-#ifndef __rtems__
+#ifdef JAIL
static void
setifvnet(const char *jname, int dummy __unused, int s,
const struct afswtch *afp)
@@ -786,7 +1035,7 @@ setifrvnet(const char *jname, int dummy __unused, int s,
if (ioctl(s, SIOCSIFRVNET, &my_ifr) < 0)
err(1, "SIOCSIFRVNET(%d, %s)", my_ifr.ifr_jid, my_ifr.ifr_name);
}
-#endif /* __rtems__ */
+#endif
static void
setifnetmask(const char *addr, int dummy __unused, int s,
@@ -807,20 +1056,6 @@ setifbroadaddr(const char *addr, int dummy __unused, int s,
}
static void
-setifipdst(const char *addr, int dummy __unused, int s,
- const struct afswtch *afp)
-{
- const struct afswtch *inet;
-
- inet = af_getbyname("inet");
- if (inet == NULL)
- return;
- inet->af_getaddr(addr, DSTADDR);
- clearaddr = 0;
- newaddr = 0;
-}
-
-static void
notealias(const char *addr, int param, int s, const struct afswtch *afp)
{
#define rqtosa(x) (&(((struct ifreq *)(afp->x))->ifr_addr))
@@ -903,20 +1138,20 @@ static void
setifmetric(const char *val, int dummy __unused, int s,
const struct afswtch *afp)
{
- strncpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
ifr.ifr_metric = atoi(val);
if (ioctl(s, SIOCSIFMETRIC, (caddr_t)&ifr) < 0)
- warn("ioctl (set metric)");
+ err(1, "ioctl SIOCSIFMETRIC (set metric)");
}
static void
setifmtu(const char *val, int dummy __unused, int s,
const struct afswtch *afp)
{
- strncpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
ifr.ifr_mtu = atoi(val);
if (ioctl(s, SIOCSIFMTU, (caddr_t)&ifr) < 0)
- warn("ioctl (set mtu)");
+ err(1, "ioctl SIOCSIFMTU (set mtu)");
}
static void
@@ -924,18 +1159,18 @@ setifname(const char *val, int dummy __unused, int s,
const struct afswtch *afp)
{
char *newname;
+
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
newname = strdup(val);
- if (newname == NULL) {
- warn("no memory to set ifname");
- return;
- }
+ if (newname == NULL)
+ err(1, "no memory to set ifname");
ifr.ifr_data = newname;
if (ioctl(s, SIOCSIFNAME, (caddr_t)&ifr) < 0) {
- warn("ioctl (set name)");
free(newname);
- return;
+ err(1, "ioctl SIOCSIFNAME (set name)");
}
+ printifname = 1;
strlcpy(name, newname, sizeof(name));
free(newname);
}
@@ -947,6 +1182,8 @@ setifdescr(const char *val, int dummy __unused, int s,
{
char *newdescr;
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+
ifr.ifr_buffer.length = strlen(val) + 1;
if (ifr.ifr_buffer.length == 1) {
ifr.ifr_buffer.buffer = newdescr = NULL;
@@ -961,7 +1198,7 @@ setifdescr(const char *val, int dummy __unused, int s,
}
if (ioctl(s, SIOCSIFDESCR, (caddr_t)&ifr) < 0)
- warn("ioctl (set descr)");
+ err(1, "ioctl SIOCSIFDESCR (set descr)");
free(newdescr);
}
@@ -975,7 +1212,7 @@ unsetifdescr(const char *val, int value, int s, const struct afswtch *afp)
}
#define IFFBITS \
-"\020\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5POINTOPOINT\6SMART\7RUNNING" \
+"\020\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5POINTOPOINT\7RUNNING" \
"\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX\15LINK0\16LINK1\17LINK2" \
"\20MULTICAST\22PPROMISC\23MONITOR\24STATICARP"
@@ -1009,7 +1246,7 @@ status(const struct afswtch *afp, const struct sockaddr_dl *sdl,
ifr.ifr_addr.sa_family =
afp->af_af == AF_LINK ? AF_LOCAL : afp->af_af;
}
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
s = socket(ifr.ifr_addr.sa_family, SOCK_DGRAM, 0);
if (s < 0)
@@ -1091,10 +1328,13 @@ status(const struct afswtch *afp, const struct sockaddr_dl *sdl,
else if (afp->af_other_status != NULL)
afp->af_other_status(s);
- strncpy(ifs.ifs_name, name, sizeof ifs.ifs_name);
+ strlcpy(ifs.ifs_name, name, sizeof ifs.ifs_name);
if (ioctl(s, SIOCGIFSTATUS, &ifs) == 0)
printf("%s", ifs.ascii);
+ if (verbose > 0)
+ sfp_status(s, &ifr, verbose);
+
close(s);
return;
}
@@ -1155,6 +1395,21 @@ printb(const char *s, unsigned v, const char *bits)
}
void
+print_vhid(const struct ifaddrs *ifa, const char *s)
+{
+ struct if_data *ifd;
+
+ if (ifa->ifa_data == NULL)
+ return;
+
+ ifd = ifa->ifa_data;
+ if (ifd->ifi_vhid == 0)
+ return;
+
+ printf("vhid %d ", ifd->ifi_vhid);
+}
+
+void
ifmaybeload(const char *name)
{
#ifndef __rtems__
@@ -1177,9 +1432,8 @@ ifmaybeload(const char *name)
}
/* turn interface and unit into module name */
- strcpy(ifkind, "if_");
- strlcpy(ifkind + MOD_PREFIX_LEN, ifname,
- sizeof(ifkind) - MOD_PREFIX_LEN);
+ strlcpy(ifkind, "if_", sizeof(ifkind));
+ strlcat(ifkind, ifname, sizeof(ifkind));
/* scan files in kernel */
mstat.version = sizeof(struct module_stat);
@@ -1196,8 +1450,8 @@ ifmaybeload(const char *name)
cp = mstat.name;
}
/* already loaded? */
- if (strncmp(ifname, cp, strlen(ifname) + 1) == 0 ||
- strncmp(ifkind, cp, strlen(ifkind) + 1) == 0)
+ if (strcmp(ifname, cp) == 0 ||
+ strcmp(ifkind, cp) == 0)
return;
}
}
@@ -1233,14 +1487,13 @@ static struct cmd basic_cmds[] = {
DEF_CMD_ARG("netmask", setifnetmask),
DEF_CMD_ARG("metric", setifmetric),
DEF_CMD_ARG("broadcast", setifbroadaddr),
- DEF_CMD_ARG("ipdst", setifipdst),
DEF_CMD_ARG2("tunnel", settunnel),
DEF_CMD("-tunnel", 0, deletetunnel),
DEF_CMD("deletetunnel", 0, deletetunnel),
-#ifndef __rtems__
+#ifdef JAIL
DEF_CMD_ARG("vnet", setifvnet),
DEF_CMD_ARG("-vnet", setifrvnet),
-#endif /* __rtems__ */
+#endif
DEF_CMD("link0", IFF_LINK0, setifflags),
DEF_CMD("-link0", -IFF_LINK0, setifflags),
DEF_CMD("link1", IFF_LINK1, setifflags),
@@ -1288,48 +1541,15 @@ static struct cmd basic_cmds[] = {
DEF_CMD_ARG("name", setifname),
};
+#ifndef __rtems__
static __constructor void
+#else /* __rtems__ */
+static void
+#endif /* __rtems__ */
ifconfig_ctor(void)
{
-#ifdef __rtems__
- memset(&ifr, 0, sizeof(ifr));
- memset(&name, 0, sizeof(name));
- descr = NULL;
- descrlen = 64;
- setaddr = 0;
- setmask = 0;
- doalias = 0;
- clearaddr = 0;
- newaddr = 1;
- verbose = 0;
- noload = 0;
- supmedia = 0;
- printkeys = 0;
- opts = NULL;
- afs = NULL;
- callbacks = NULL;
- cmds = NULL;
-#endif /* __rtems__ */
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
- for (i = 0; i < N(basic_cmds); i++)
+ for (i = 0; i < nitems(basic_cmds); i++)
cmd_register(&basic_cmds[i]);
-#undef N
-}
-#ifdef __rtems__
-static void
-ifconfig_dtor(void)
-{
- struct callback *cb = callbacks;
-
- while (cb != NULL) {
- struct callback *to_free = cb;
-
- cb = to_free->cb_next;
- free(to_free);
- }
-
- free(descr);
}
-#endif /* __rtems__ */
diff --git a/freebsd/sbin/ifconfig/ifconfig.h b/freebsd/sbin/ifconfig/ifconfig.h
index 074e810e..d9be9c7b 100644
--- a/freebsd/sbin/ifconfig/ifconfig.h
+++ b/freebsd/sbin/ifconfig/ifconfig.h
@@ -34,11 +34,7 @@
* $FreeBSD$
*/
-#ifndef __rtems__
#define __constructor __attribute__((constructor))
-#else /* __rtems__ */
-#define __constructor
-#endif /* __rtems__ */
struct afswtch;
struct cmd;
@@ -78,6 +74,7 @@ void callback_register(callback_func *, void *);
#define DEF_CMD_ARG2(name, func) { name, NEXTARG2, { .c_func2 = func }, 0, NULL }
#define DEF_CLONE_CMD(name, param, func) { name, param, { .c_func = func }, 1, NULL }
#define DEF_CLONE_CMD_ARG(name, func) { name, NEXTARG, { .c_func = func }, 1, NULL }
+#define DEF_CLONE_CMD_ARG2(name, func) { name, NEXTARG2, { .c_func2 = func }, 1, NULL }
struct ifaddrs;
struct addrinfo;
@@ -136,6 +133,7 @@ extern int supmedia;
extern int printkeys;
extern int newaddr;
extern int verbose;
+extern int printifname;
void setifcap(const char *, int value, int s, const struct afswtch *);
@@ -147,19 +145,23 @@ void ifmaybeload(const char *name);
typedef void clone_callback_func(int, struct ifreq *);
void clone_setdefcallback(const char *, clone_callback_func *);
+void sfp_status(int s, struct ifreq *ifr, int verbose);
+
/*
* XXX expose this so modules that neeed to know of any pending
* operations on ifmedia can avoid cmd line ordering confusion.
*/
struct ifmediareq *ifmedia_getstate(int s);
+
+void print_vhid(const struct ifaddrs *, const char *);
#ifdef __rtems__
-void atalk_ctor(void);
void bridge_ctor(void);
void carp_ctor(void);
void clone_ctor(void);
void gif_ctor(void);
void gre_ctor(void);
void group_ctor(void);
+void ieee80211_ctor(void);
void ifmedia_ctor(void);
void inet6_ctor(void);
void inet_ctor(void);
@@ -169,5 +171,6 @@ void mac_ctor(void);
void pfsync_ctor(void);
void vlan_ctor(void);
-void clone_dtor(void);
+/* Necessary for struct ifmedia_description */
+#include <if_media.h>
#endif /* __rtems__ */
diff --git a/freebsd/sbin/ifconfig/ifgif.c b/freebsd/sbin/ifconfig/ifgif.c
index e55933a5..c13a0506 100644
--- a/freebsd/sbin/ifconfig/ifgif.c
+++ b/freebsd/sbin/ifconfig/ifgif.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 2009 Hiroki Sato. All rights reserved.
*
@@ -30,6 +34,9 @@ static const char rcsid[] =
"$FreeBSD$";
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -52,8 +59,11 @@ static const char rcsid[] =
#include <errno.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifgif-data.h"
+#endif /* __rtems__ */
-#define GIFBITS "\020\1ACCEPT_REV_ETHIP_VER\5SEND_REV_ETHIP_VER"
+#define GIFBITS "\020\2IGNORE_SOURCE"
static void gif_status(int);
@@ -72,8 +82,7 @@ gif_status(int s)
}
static void
-setgifopts(const char *val,
- int d, int s, const struct afswtch *afp)
+setgifopts(const char *val, int d, int s, const struct afswtch *afp)
{
int opts;
@@ -95,10 +104,8 @@ setgifopts(const char *val,
}
static struct cmd gif_cmds[] = {
- DEF_CMD("accept_rev_ethip_ver", GIF_ACCEPT_REVETHIP, setgifopts),
- DEF_CMD("-accept_rev_ethip_ver",-GIF_ACCEPT_REVETHIP, setgifopts),
- DEF_CMD("send_rev_ethip_ver", GIF_SEND_REVETHIP, setgifopts),
- DEF_CMD("-send_rev_ethip_ver", -GIF_SEND_REVETHIP, setgifopts),
+ DEF_CMD("ignore_source", GIF_IGNORE_SOURCE, setgifopts),
+ DEF_CMD("-ignore_source", -GIF_IGNORE_SOURCE, setgifopts),
};
static struct afswtch af_gif = {
@@ -114,11 +121,9 @@ void
#endif /* __rtems__ */
gif_ctor(void)
{
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
- for (i = 0; i < N(gif_cmds); i++)
+ for (i = 0; i < nitems(gif_cmds); i++)
cmd_register(&gif_cmds[i]);
af_register(&af_gif);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifgre.c b/freebsd/sbin/ifconfig/ifgre.c
index 221371f6..6f72a89e 100644
--- a/freebsd/sbin/ifconfig/ifgre.c
+++ b/freebsd/sbin/ifconfig/ifgre.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 2008 Andrew Thompson. All rights reserved.
*
@@ -25,61 +29,92 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef lint
-static const char rcsid[] =
- "$FreeBSD$";
-#endif
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/sockio.h>
-
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_gre.h>
-#include <net/route.h>
#include <ctype.h>
+#include <limits.h>
#include <stdio.h>
-#include <string.h>
#include <stdlib.h>
-#include <unistd.h>
+#include <string.h>
#include <err.h>
-#include <errno.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifgre-data.h"
+#endif /* __rtems__ */
+
+#define GREBITS "\020\01ENABLE_CSUM\02ENABLE_SEQ"
static void gre_status(int s);
static void
gre_status(int s)
{
- int grekey = 0;
+ uint32_t opts = 0;
- ifr.ifr_data = (caddr_t)&grekey;
+ ifr.ifr_data = (caddr_t)&opts;
if (ioctl(s, GREGKEY, &ifr) == 0)
- if (grekey != 0)
- printf("\tgrekey: %d\n", grekey);
+ if (opts != 0)
+ printf("\tgrekey: 0x%x (%u)\n", opts, opts);
+ opts = 0;
+ if (ioctl(s, GREGOPTS, &ifr) != 0 || opts == 0)
+ return;
+ printb("\toptions", opts, GREBITS);
+ putchar('\n');
}
static void
setifgrekey(const char *val, int dummy __unused, int s,
const struct afswtch *afp)
{
- uint32_t grekey = atol(val);
+ uint32_t grekey = strtol(val, NULL, 0);
- strncpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof (ifr.ifr_name));
ifr.ifr_data = (caddr_t)&grekey;
if (ioctl(s, GRESKEY, (caddr_t)&ifr) < 0)
warn("ioctl (set grekey)");
}
+static void
+setifgreopts(const char *val, int d, int s, const struct afswtch *afp)
+{
+ uint32_t opts;
+
+ ifr.ifr_data = (caddr_t)&opts;
+ if (ioctl(s, GREGOPTS, &ifr) == -1) {
+ warn("ioctl(GREGOPTS)");
+ return;
+ }
+
+ if (d < 0)
+ opts &= ~(-d);
+ else
+ opts |= d;
+
+ if (ioctl(s, GRESOPTS, &ifr) == -1) {
+ warn("ioctl(GIFSOPTS)");
+ return;
+ }
+}
+
+
static struct cmd gre_cmds[] = {
DEF_CMD_ARG("grekey", setifgrekey),
+ DEF_CMD("enable_csum", GRE_ENABLE_CSUM, setifgreopts),
+ DEF_CMD("-enable_csum",-GRE_ENABLE_CSUM,setifgreopts),
+ DEF_CMD("enable_seq", GRE_ENABLE_SEQ, setifgreopts),
+ DEF_CMD("-enable_seq",-GRE_ENABLE_SEQ, setifgreopts),
};
static struct afswtch af_gre = {
.af_name = "af_gre",
@@ -94,11 +129,9 @@ void
#endif /* __rtems__ */
gre_ctor(void)
{
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
- for (i = 0; i < N(gre_cmds); i++)
+ for (i = 0; i < nitems(gre_cmds); i++)
cmd_register(&gre_cmds[i]);
af_register(&af_gre);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifgroup.c b/freebsd/sbin/ifconfig/ifgroup.c
index 444a1c22..0e19d4ae 100644
--- a/freebsd/sbin/ifconfig/ifgroup.c
+++ b/freebsd/sbin/ifconfig/ifgroup.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 2006 Max Laier. All rights reserved.
*
@@ -31,18 +35,9 @@ static const char rcsid[] =
#endif /* not lint */
#ifdef __rtems__
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#endif /* __rtems__ */
-#include <sys/types.h>
+#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
@@ -56,6 +51,9 @@ static const char rcsid[] =
#include <unistd.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifgroup-data.h"
+#endif /* __rtems__ */
/* ARGSUSED */
static void
@@ -66,7 +64,12 @@ setifgroup(const char *group_name, int d, int s, const struct afswtch *rafp)
memset(&ifgr, 0, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, name, IFNAMSIZ);
- if (group_name[0] && isdigit((unsigned char)group_name[strlen(group_name) - 1]))
+#ifndef __rtems__
+ if (group_name[0] && isdigit(group_name[strlen(group_name) - 1]))
+#else /* __rtems__ */
+ if (group_name[0] && isdigit(
+ (unsigned char)group_name[strlen(group_name) - 1]))
+#endif /* __rtems__ */
errx(1, "setifgroup: group names may not end in a digit");
if (strlcpy(ifgr.ifgr_group, group_name, IFNAMSIZ) >= IFNAMSIZ)
@@ -84,7 +87,12 @@ unsetifgroup(const char *group_name, int d, int s, const struct afswtch *rafp)
memset(&ifgr, 0, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, name, IFNAMSIZ);
- if (group_name[0] && isdigit((unsigned char)group_name[strlen(group_name) - 1]))
+#ifndef __rtems__
+ if (group_name[0] && isdigit(group_name[strlen(group_name) - 1]))
+#else /* __rtems__ */
+ if (group_name[0] && isdigit(
+ (unsigned char)group_name[strlen(group_name) - 1]))
+#endif /* __rtems__ */
errx(1, "unsetifgroup: group names may not end in a digit");
if (strlcpy(ifgr.ifgr_group, group_name, IFNAMSIZ) >= IFNAMSIZ)
@@ -100,9 +108,6 @@ getifgroups(int s)
struct ifgroupreq ifgr;
struct ifg_req *ifg;
- if (!verbose)
- return;
-
memset(&ifgr, 0, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, name, IFNAMSIZ);
@@ -135,6 +140,8 @@ getifgroups(int s)
}
if (cnt)
printf("\n");
+
+ free(ifgr.ifgr_groups);
}
static void
@@ -151,7 +158,6 @@ printgroup(const char *groupname)
bzero(&ifgr, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, groupname, sizeof(ifgr.ifgr_name));
if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) {
- close(s);
if (errno == EINVAL || errno == ENOTTY ||
errno == ENOENT)
exit(0);
@@ -160,15 +166,10 @@ printgroup(const char *groupname)
}
len = ifgr.ifgr_len;
- if ((ifgr.ifgr_groups = calloc(1, len)) == NULL) {
- close(s);
+ if ((ifgr.ifgr_groups = calloc(1, len)) == NULL)
err(1, "printgroup");
- }
- if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) {
- free(ifgr.ifgr_groups);
- close(s);
+ if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1)
err(1, "SIOCGIFGMEMB");
- }
for (ifg = ifgr.ifgr_groups; ifg && len >= sizeof(struct ifg_req);
ifg++) {
@@ -177,7 +178,6 @@ printgroup(const char *groupname)
cnt++;
}
free(ifgr.ifgr_groups);
- close(s);
exit(0);
}
@@ -200,12 +200,10 @@ void
#endif /* __rtems__ */
group_ctor(void)
{
-#define N(a) (sizeof(a) / sizeof(a[0]))
int i;
- for (i = 0; i < N(group_cmds); i++)
+ for (i = 0; i < nitems(group_cmds); i++)
cmd_register(&group_cmds[i]);
af_register(&af_group);
opt_register(&group_gopt);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifieee80211.c b/freebsd/sbin/ifconfig/ifieee80211.c
index 29b49a5b..ead27e2b 100644
--- a/freebsd/sbin/ifconfig/ifieee80211.c
+++ b/freebsd/sbin/ifconfig/ifieee80211.c
@@ -1,3 +1,9 @@
+#include <machine/rtems-bsd-user-space.h>
+
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright 2001 The Aerospace Corporation. All rights reserved.
*
@@ -64,6 +70,9 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -98,6 +107,9 @@
#include "ifconfig.h"
#include "regdomain.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifieee80211-data.h"
+#endif /* __rtems__ */
#ifndef IEEE80211_FIXED_RATE_NONE
#define IEEE80211_FIXED_RATE_NONE 0xff
@@ -207,12 +219,22 @@ getchaninfo(int s)
gethtconf(s);
}
+#ifdef __rtems__
+static struct regdata *getregdata_rdp = NULL;
+#endif /* __rtems__ */
static struct regdata *
getregdata(void)
{
+#ifndef __rtems__
static struct regdata *rdp = NULL;
+#else /* __rtems__ */
+ struct regdata *rdp = getregdata_rdp;
+#endif /* __rtems__ */
if (rdp == NULL) {
rdp = lib80211_alloc_regdata();
+#ifdef __rtems__
+ getregdata_rdp = rdp;
+#endif /* __rtems__ */
if (rdp == NULL)
errx(-1, "missing or corrupted regdomain database");
}
@@ -5269,7 +5291,11 @@ static struct afswtch af_ieee80211 = {
.af_other_status = ieee80211_status,
};
+#ifndef __rtems__
static __constructor void
+#else /* __rtems__ */
+void
+#endif /* __rtems__ */
ieee80211_ctor(void)
{
#define N(a) (sizeof(a) / sizeof(a[0]))
diff --git a/freebsd/sbin/ifconfig/iflagg.c b/freebsd/sbin/ifconfig/iflagg.c
index 56970e57..78f81fdb 100644
--- a/freebsd/sbin/ifconfig/iflagg.c
+++ b/freebsd/sbin/ifconfig/iflagg.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*-
*/
@@ -8,6 +12,9 @@ static const char rcsid[] =
"$FreeBSD$";
#endif /* not lint */
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -19,6 +26,7 @@ static const char rcsid[] =
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_lagg.h>
+#include <net/ieee8023ad_lacp.h>
#include <net/route.h>
#include <ctype.h>
@@ -30,6 +38,9 @@ static const char rcsid[] =
#include <errno.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-iflagg-data.h"
+#endif /* __rtems__ */
char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */
@@ -70,7 +81,7 @@ setlaggproto(const char *val, int d, int s, const struct afswtch *afp)
bzero(&ra, sizeof(ra));
ra.ra_proto = LAGG_PROTO_MAX;
- for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++) {
+ for (i = 0; i < nitems(lpr); i++) {
if (strcmp(val, lpr[i].lpr_name) == 0) {
ra.ra_proto = lpr[i].lpr_proto;
break;
@@ -85,6 +96,63 @@ setlaggproto(const char *val, int d, int s, const struct afswtch *afp)
}
static void
+setlaggflowidshift(const char *val, int d, int s, const struct afswtch *afp)
+{
+ struct lagg_reqopts ro;
+
+ bzero(&ro, sizeof(ro));
+ ro.ro_opts = LAGG_OPT_FLOWIDSHIFT;
+ strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
+ ro.ro_flowid_shift = (int)strtol(val, NULL, 10);
+ if (ro.ro_flowid_shift & ~LAGG_OPT_FLOWIDSHIFT_MASK)
+ errx(1, "Invalid flowid_shift option: %s", val);
+
+ if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0)
+ err(1, "SIOCSLAGGOPTS");
+}
+
+static void
+setlaggrr_limit(const char *val, int d, int s, const struct afswtch *afp)
+{
+ struct lagg_reqopts ro;
+
+ bzero(&ro, sizeof(ro));
+ strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
+ ro.ro_bkt = (int)strtol(val, NULL, 10);
+
+ if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0)
+ err(1, "SIOCSLAGG");
+}
+
+static void
+setlaggsetopt(const char *val, int d, int s, const struct afswtch *afp)
+{
+ struct lagg_reqopts ro;
+
+ bzero(&ro, sizeof(ro));
+ ro.ro_opts = d;
+ switch (ro.ro_opts) {
+ case LAGG_OPT_USE_FLOWID:
+ case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_LACP_STRICT:
+ case -LAGG_OPT_LACP_STRICT:
+ case LAGG_OPT_LACP_TXTEST:
+ case -LAGG_OPT_LACP_TXTEST:
+ case LAGG_OPT_LACP_RXTEST:
+ case -LAGG_OPT_LACP_RXTEST:
+ case LAGG_OPT_LACP_TIMEOUT:
+ case -LAGG_OPT_LACP_TIMEOUT:
+ break;
+ default:
+ err(1, "Invalid lagg option");
+ }
+ strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
+
+ if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0)
+ err(1, "SIOCSLAGGOPTS");
+}
+
+static void
setlagghash(const char *val, int d, int s, const struct afswtch *afp)
{
struct lagg_reqflags rf;
@@ -146,6 +214,7 @@ lagg_status(int s)
struct lagg_protos lpr[] = LAGG_PROTOS;
struct lagg_reqport rp, rpbuf[LAGG_MAX_PORTS];
struct lagg_reqall ra;
+ struct lagg_reqopts ro;
struct lagg_reqflags rf;
struct lacp_opreq *lp;
const char *proto = "<unknown>";
@@ -153,6 +222,7 @@ lagg_status(int s)
bzero(&rp, sizeof(rp));
bzero(&ra, sizeof(ra));
+ bzero(&ro, sizeof(ro));
strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname));
strlcpy(rp.rp_portname, name, sizeof(rp.rp_portname));
@@ -164,6 +234,9 @@ lagg_status(int s)
ra.ra_size = sizeof(rpbuf);
ra.ra_port = rpbuf;
+ strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
+ ioctl(s, SIOCGLAGGOPTS, &ro);
+
strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname));
if (ioctl(s, SIOCGLAGGFLAGS, &rf) != 0)
rf.rf_flags = 0;
@@ -171,7 +244,7 @@ lagg_status(int s)
if (ioctl(s, SIOCGLAGG, &ra) == 0) {
lp = (struct lacp_opreq *)&ra.ra_lacpreq;
- for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++) {
+ for (i = 0; i < nitems(lpr); i++) {
if (ra.ra_proto == lpr[i].lpr_proto) {
proto = lpr[i].lpr_name;
break;
@@ -199,16 +272,29 @@ lagg_status(int s)
if (isport)
printf(" laggdev %s", rp.rp_ifname);
putchar('\n');
- if (verbose && ra.ra_proto == LAGG_PROTO_LACP)
- printf("\tlag id: %s\n",
- lacp_format_peer(lp, "\n\t\t "));
+ if (verbose) {
+ printf("\tlagg options:\n");
+ printb("\t\tflags", ro.ro_opts, LAGG_OPT_BITS);
+ putchar('\n');
+ printf("\t\tflowid_shift: %d\n", ro.ro_flowid_shift);
+ if (ra.ra_proto == LAGG_PROTO_ROUNDROBIN)
+ printf("\t\trr_limit: %d\n", ro.ro_bkt);
+ printf("\tlagg statistics:\n");
+ printf("\t\tactive ports: %d\n", ro.ro_active);
+ printf("\t\tflapping: %u\n", ro.ro_flapping);
+ if (ra.ra_proto == LAGG_PROTO_LACP) {
+ printf("\tlag id: %s\n",
+ lacp_format_peer(lp, "\n\t\t "));
+ }
+ }
for (i = 0; i < ra.ra_ports; i++) {
lp = (struct lacp_opreq *)&rpbuf[i].rp_lacpreq;
printf("\tlaggport: %s ", rpbuf[i].rp_portname);
printb("flags", rpbuf[i].rp_flags, LAGG_PORT_BITS);
if (verbose && ra.ra_proto == LAGG_PROTO_LACP)
- printf(" state=%X", lp->actor_state);
+ printb(" state", lp->actor_state,
+ LACP_STATE_BITS);
putchar('\n');
if (verbose && ra.ra_proto == LAGG_PROTO_LACP)
printf("\t\t%s\n",
@@ -217,7 +303,7 @@ lagg_status(int s)
if (0 /* XXX */) {
printf("\tsupported aggregation protocols:\n");
- for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++)
+ for (i = 0; i < nitems(lpr); i++)
printf("\t\tlaggproto %s\n", lpr[i].lpr_name);
}
}
@@ -228,6 +314,18 @@ static struct cmd lagg_cmds[] = {
DEF_CMD_ARG("-laggport", unsetlaggport),
DEF_CMD_ARG("laggproto", setlaggproto),
DEF_CMD_ARG("lagghash", setlagghash),
+ DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt),
+ DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt),
+ DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt),
+ DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt),
+ DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt),
+ DEF_CMD("-lacp_txtest", -LAGG_OPT_LACP_TXTEST, setlaggsetopt),
+ DEF_CMD("lacp_rxtest", LAGG_OPT_LACP_RXTEST, setlaggsetopt),
+ DEF_CMD("-lacp_rxtest", -LAGG_OPT_LACP_RXTEST, setlaggsetopt),
+ DEF_CMD("lacp_fast_timeout", LAGG_OPT_LACP_TIMEOUT, setlaggsetopt),
+ DEF_CMD("-lacp_fast_timeout", -LAGG_OPT_LACP_TIMEOUT, setlaggsetopt),
+ DEF_CMD_ARG("flowid_shift", setlaggflowidshift),
+ DEF_CMD_ARG("rr_limit", setlaggrr_limit),
};
static struct afswtch af_lagg = {
.af_name = "af_lagg",
@@ -242,14 +340,9 @@ void
#endif /* __rtems__ */
lagg_ctor(void)
{
-#ifdef __rtems__
- memset(&lacpbuf, 0, sizeof(lacpbuf));
-#endif /* __rtems__ */
-#define N(a) (sizeof(a) / sizeof(a[0]))
int i;
- for (i = 0; i < N(lagg_cmds); i++)
+ for (i = 0; i < nitems(lagg_cmds); i++)
cmd_register(&lagg_cmds[i]);
af_register(&af_lagg);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifmac.c b/freebsd/sbin/ifconfig/ifmac.c
index a64a0cbf..b9aa5eed 100644
--- a/freebsd/sbin/ifconfig/ifmac.c
+++ b/freebsd/sbin/ifconfig/ifmac.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 2001 Networks Associates Technology, Inc.
* All rights reserved.
@@ -36,6 +40,9 @@
* $FreeBSD$
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/mac.h>
@@ -50,6 +57,9 @@
#include <string.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifmac-data.h"
+#endif /* __rtems__ */
static void
maclabel_status(int s)
@@ -59,7 +69,7 @@ maclabel_status(int s)
char *label_text;
memset(&ifr, 0, sizeof(ifr));
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
if (mac_prepare_ifnet_label(&label) == -1)
return;
@@ -92,7 +102,7 @@ setifmaclabel(const char *val, int d, int s, const struct afswtch *rafp)
}
memset(&ifr, 0, sizeof(ifr));
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
ifr.ifr_ifru.ifru_data = (void *)label;
error = ioctl(s, SIOCSIFMAC, &ifr);
@@ -117,11 +127,9 @@ void
#endif /* __rtems__ */
mac_ctor(void)
{
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
- for (i = 0; i < N(mac_cmds); i++)
+ for (i = 0; i < nitems(mac_cmds); i++)
cmd_register(&mac_cmds[i]);
af_register(&af_mac);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifmedia.c b/freebsd/sbin/ifconfig/ifmedia.c
index ba029cb4..890ca8e3 100644
--- a/freebsd/sbin/ifconfig/ifmedia.c
+++ b/freebsd/sbin/ifconfig/ifmedia.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/* $NetBSD: ifconfig.c,v 1.34 1997/04/21 01:17:58 lukem Exp $ */
/* $FreeBSD$ */
@@ -64,6 +68,9 @@
* SUCH DAMAGE.
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -86,19 +93,37 @@
#include <unistd.h>
#include "ifconfig.h"
+#ifdef __rtems__
+struct ifmedia_type_to_subtype {
+ struct {
+ struct ifmedia_description *desc;
+ int alias;
+ } subtypes[5];
+ struct {
+ struct ifmedia_description *desc;
+ int alias;
+ } options[4];
+ struct {
+ struct ifmedia_description *desc;
+ int alias;
+ } modes[3];
+};
+
+#include "rtems-bsd-ifconfig-ifmedia-data.h"
+#endif /* __rtems__ */
static void domediaopt(const char *, int, int);
static int get_media_subtype(int, const char *);
static int get_media_mode(int, const char *);
static int get_media_options(int, const char *);
-static int lookup_media_word(const struct ifmedia_description *, const char *);
+static int lookup_media_word(struct ifmedia_description *, const char *);
static void print_media_word(int, int);
static void print_media_word_ifconfig(int);
-static const struct ifmedia_description *get_toptype_desc(int);
-static const struct ifmedia_type_to_subtype *get_toptype_ttos(int);
-static const struct ifmedia_description *get_subtype_desc(int,
- const struct ifmedia_type_to_subtype *ttos);
+static struct ifmedia_description *get_toptype_desc(int);
+static struct ifmedia_type_to_subtype *get_toptype_ttos(int);
+static struct ifmedia_description *get_subtype_desc(int,
+ struct ifmedia_type_to_subtype *ttos);
#define IFM_OPMODE(x) \
((x) & (IFM_IEEE80211_ADHOC | IFM_IEEE80211_HOSTAP | \
@@ -111,11 +136,17 @@ media_status(int s)
{
struct ifmediareq ifmr;
int *media_list, i;
+ int xmedia = 1;
(void) memset(&ifmr, 0, sizeof(ifmr));
- (void) strncpy(ifmr.ifm_name, name, sizeof(ifmr.ifm_name));
+ (void) strlcpy(ifmr.ifm_name, name, sizeof(ifmr.ifm_name));
- if (ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) {
+ /*
+ * Check if interface supports extended media types.
+ */
+ if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)&ifmr) < 0)
+ xmedia = 0;
+ if (xmedia == 0 && ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) {
/*
* Interface doesn't support SIOC{G,S}IFMEDIA.
*/
@@ -132,9 +163,12 @@ media_status(int s)
err(1, "malloc");
ifmr.ifm_ulist = media_list;
- if (ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) {
- free(media_list);
- err(1, "SIOCGIFMEDIA");
+ if (xmedia) {
+ if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)&ifmr) < 0)
+ err(1, "SIOCGIFXMEDIA");
+ } else {
+ if (ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0)
+ err(1, "SIOCGIFMEDIA");
}
printf("\tmedia: ");
@@ -194,23 +228,29 @@ media_status(int s)
}
#ifdef __rtems__
-static struct ifmediareq *ifmr = NULL;
+static struct ifmediareq *ifmedia_getstate_ifmr = NULL;
#endif /* __rtems__ */
struct ifmediareq *
ifmedia_getstate(int s)
{
#ifndef __rtems__
static struct ifmediareq *ifmr = NULL;
+#else /* __rtems__ */
+ struct ifmediareq *ifmr = ifmedia_getstate_ifmr;
#endif /* __rtems__ */
int *mwords;
+ int xmedia = 1;
if (ifmr == NULL) {
ifmr = (struct ifmediareq *)malloc(sizeof(struct ifmediareq));
+#ifdef __rtems__
+ ifmedia_getstate_ifmr = ifmr;
+#endif /* __rtems__ */
if (ifmr == NULL)
err(1, "malloc");
(void) memset(ifmr, 0, sizeof(struct ifmediareq));
- (void) strncpy(ifmr->ifm_name, name,
+ (void) strlcpy(ifmr->ifm_name, name,
sizeof(ifmr->ifm_name));
ifmr->ifm_count = 0;
@@ -222,7 +262,10 @@ ifmedia_getstate(int s)
* the current media type and the top-level type.
*/
- if (ioctl(s, SIOCGIFMEDIA, (caddr_t)ifmr) < 0) {
+ if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)ifmr) < 0) {
+ xmedia = 0;
+ }
+ if (xmedia == 0 && ioctl(s, SIOCGIFMEDIA, (caddr_t)ifmr) < 0) {
err(1, "SIOCGIFMEDIA");
}
@@ -234,8 +277,13 @@ ifmedia_getstate(int s)
err(1, "malloc");
ifmr->ifm_ulist = mwords;
- if (ioctl(s, SIOCGIFMEDIA, (caddr_t)ifmr) < 0)
- err(1, "SIOCGIFMEDIA");
+ if (xmedia) {
+ if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)ifmr) < 0)
+ err(1, "SIOCGIFXMEDIA");
+ } else {
+ if (ioctl(s, SIOCGIFMEDIA, (caddr_t)ifmr) < 0)
+ err(1, "SIOCGIFMEDIA");
+ }
}
return ifmr;
@@ -281,7 +329,7 @@ setmedia(const char *val, int d, int s, const struct afswtch *afp)
*/
subtype = get_media_subtype(IFM_TYPE(ifmr->ifm_ulist[0]), val);
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
ifr.ifr_media = (ifmr->ifm_current & IFM_IMASK) |
IFM_TYPE(ifmr->ifm_ulist[0]) | subtype;
@@ -313,7 +361,7 @@ domediaopt(const char *val, int clear, int s)
options = get_media_options(IFM_TYPE(ifmr->ifm_ulist[0]), val);
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
ifr.ifr_media = ifmr->ifm_current;
if (clear)
ifr.ifr_media &= ~options;
@@ -340,7 +388,7 @@ setmediainst(const char *val, int d, int s, const struct afswtch *afp)
if (inst < 0 || inst > (int)IFM_INST_MAX)
errx(1, "invalid media instance: %s", val);
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
ifr.ifr_media = (ifmr->ifm_current & ~IFM_IMASK) | inst << IFM_ISHIFT;
ifmr->ifm_current = ifr.ifr_media;
@@ -357,7 +405,7 @@ setmediamode(const char *val, int d, int s, const struct afswtch *afp)
mode = get_media_mode(IFM_TYPE(ifmr->ifm_ulist[0]), val);
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
ifr.ifr_media = (ifmr->ifm_current & ~IFM_MMASK) | mode;
ifmr->ifm_current = ifr.ifr_media;
@@ -368,89 +416,91 @@ setmediamode(const char *val, int d, int s, const struct afswtch *afp)
* A good chunk of this is duplicated from sys/net/ifmedia.c
**********************************************************************/
-static const struct ifmedia_description ifm_type_descriptions[] =
+static struct ifmedia_description ifm_type_descriptions[] =
IFM_TYPE_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_ethernet_descriptions[] =
+static struct ifmedia_description ifm_subtype_ethernet_descriptions[] =
IFM_SUBTYPE_ETHERNET_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_ethernet_aliases[] =
+static struct ifmedia_description ifm_subtype_ethernet_aliases[] =
IFM_SUBTYPE_ETHERNET_ALIASES;
-static const struct ifmedia_description ifm_subtype_ethernet_option_descriptions[] =
+static struct ifmedia_description ifm_subtype_ethernet_option_descriptions[] =
IFM_SUBTYPE_ETHERNET_OPTION_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_tokenring_descriptions[] =
+static struct ifmedia_description ifm_subtype_tokenring_descriptions[] =
IFM_SUBTYPE_TOKENRING_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_tokenring_aliases[] =
+static struct ifmedia_description ifm_subtype_tokenring_aliases[] =
IFM_SUBTYPE_TOKENRING_ALIASES;
-static const struct ifmedia_description ifm_subtype_tokenring_option_descriptions[] =
+static struct ifmedia_description ifm_subtype_tokenring_option_descriptions[] =
IFM_SUBTYPE_TOKENRING_OPTION_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_fddi_descriptions[] =
+static struct ifmedia_description ifm_subtype_fddi_descriptions[] =
IFM_SUBTYPE_FDDI_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_fddi_aliases[] =
+static struct ifmedia_description ifm_subtype_fddi_aliases[] =
IFM_SUBTYPE_FDDI_ALIASES;
-static const struct ifmedia_description ifm_subtype_fddi_option_descriptions[] =
+static struct ifmedia_description ifm_subtype_fddi_option_descriptions[] =
IFM_SUBTYPE_FDDI_OPTION_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_ieee80211_descriptions[] =
+static struct ifmedia_description ifm_subtype_ieee80211_descriptions[] =
IFM_SUBTYPE_IEEE80211_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_ieee80211_aliases[] =
+static struct ifmedia_description ifm_subtype_ieee80211_aliases[] =
IFM_SUBTYPE_IEEE80211_ALIASES;
-static const struct ifmedia_description ifm_subtype_ieee80211_option_descriptions[] =
+static struct ifmedia_description ifm_subtype_ieee80211_option_descriptions[] =
IFM_SUBTYPE_IEEE80211_OPTION_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_ieee80211_mode_descriptions[] =
+struct ifmedia_description ifm_subtype_ieee80211_mode_descriptions[] =
IFM_SUBTYPE_IEEE80211_MODE_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_ieee80211_mode_aliases[] =
+struct ifmedia_description ifm_subtype_ieee80211_mode_aliases[] =
IFM_SUBTYPE_IEEE80211_MODE_ALIASES;
-static const struct ifmedia_description ifm_subtype_atm_descriptions[] =
+static struct ifmedia_description ifm_subtype_atm_descriptions[] =
IFM_SUBTYPE_ATM_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_atm_aliases[] =
+static struct ifmedia_description ifm_subtype_atm_aliases[] =
IFM_SUBTYPE_ATM_ALIASES;
-static const struct ifmedia_description ifm_subtype_atm_option_descriptions[] =
+static struct ifmedia_description ifm_subtype_atm_option_descriptions[] =
IFM_SUBTYPE_ATM_OPTION_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_shared_descriptions[] =
+static struct ifmedia_description ifm_subtype_shared_descriptions[] =
IFM_SUBTYPE_SHARED_DESCRIPTIONS;
-static const struct ifmedia_description ifm_subtype_shared_aliases[] =
+static struct ifmedia_description ifm_subtype_shared_aliases[] =
IFM_SUBTYPE_SHARED_ALIASES;
-static const struct ifmedia_description ifm_shared_option_descriptions[] =
+static struct ifmedia_description ifm_shared_option_descriptions[] =
IFM_SHARED_OPTION_DESCRIPTIONS;
static struct ifmedia_description ifm_shared_option_aliases[] =
IFM_SHARED_OPTION_ALIASES;
+#ifndef __rtems__
struct ifmedia_type_to_subtype {
struct {
- const struct ifmedia_description *desc;
+ struct ifmedia_description *desc;
int alias;
} subtypes[5];
struct {
- const struct ifmedia_description *desc;
+ struct ifmedia_description *desc;
int alias;
} options[4];
struct {
- const struct ifmedia_description *desc;
+ struct ifmedia_description *desc;
int alias;
} modes[3];
};
+#endif /* __rtems__ */
/* must be in the same order as IFM_TYPE_DESCRIPTIONS */
-static const struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[] = {
+static struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[] = {
{
{
{ &ifm_subtype_shared_descriptions[0], 0 },
@@ -548,8 +598,8 @@ static const struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[] = {
static int
get_media_subtype(int type, const char *val)
{
- const struct ifmedia_description *desc;
- const struct ifmedia_type_to_subtype *ttos;
+ struct ifmedia_description *desc;
+ struct ifmedia_type_to_subtype *ttos;
int rval, i;
/* Find the top-level interface type. */
@@ -572,8 +622,8 @@ get_media_subtype(int type, const char *val)
static int
get_media_mode(int type, const char *val)
{
- const struct ifmedia_description *desc;
- const struct ifmedia_type_to_subtype *ttos;
+ struct ifmedia_description *desc;
+ struct ifmedia_type_to_subtype *ttos;
int rval, i;
/* Find the top-level interface type. */
@@ -595,8 +645,8 @@ get_media_mode(int type, const char *val)
static int
get_media_options(int type, const char *val)
{
- const struct ifmedia_description *desc;
- const struct ifmedia_type_to_subtype *ttos;
+ struct ifmedia_description *desc;
+ struct ifmedia_type_to_subtype *ttos;
char *optlist, *optptr;
int option = 0, i, rval = 0;
@@ -634,7 +684,7 @@ get_media_options(int type, const char *val)
}
static int
-lookup_media_word(const struct ifmedia_description *desc, const char *val)
+lookup_media_word(struct ifmedia_description *desc, const char *val)
{
for (; desc->ifmt_string != NULL; desc++)
@@ -644,9 +694,9 @@ lookup_media_word(const struct ifmedia_description *desc, const char *val)
return (-1);
}
-static const struct ifmedia_description *get_toptype_desc(int ifmw)
+static struct ifmedia_description *get_toptype_desc(int ifmw)
{
- const struct ifmedia_description *desc;
+ struct ifmedia_description *desc;
for (desc = ifm_type_descriptions; desc->ifmt_string != NULL; desc++)
if (IFM_TYPE(ifmw) == desc->ifmt_word)
@@ -655,10 +705,10 @@ static const struct ifmedia_description *get_toptype_desc(int ifmw)
return desc;
}
-static const struct ifmedia_type_to_subtype *get_toptype_ttos(int ifmw)
+static struct ifmedia_type_to_subtype *get_toptype_ttos(int ifmw)
{
- const struct ifmedia_description *desc;
- const struct ifmedia_type_to_subtype *ttos;
+ struct ifmedia_description *desc;
+ struct ifmedia_type_to_subtype *ttos;
for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes;
desc->ifmt_string != NULL; desc++, ttos++)
@@ -668,11 +718,11 @@ static const struct ifmedia_type_to_subtype *get_toptype_ttos(int ifmw)
return ttos;
}
-static const struct ifmedia_description *get_subtype_desc(int ifmw,
- const struct ifmedia_type_to_subtype *ttos)
+static struct ifmedia_description *get_subtype_desc(int ifmw,
+ struct ifmedia_type_to_subtype *ttos)
{
int i;
- const struct ifmedia_description *desc;
+ struct ifmedia_description *desc;
for (i = 0; ttos->subtypes[i].desc != NULL; i++) {
if (ttos->subtypes[i].alias)
@@ -687,11 +737,11 @@ static const struct ifmedia_description *get_subtype_desc(int ifmw,
return NULL;
}
-static const struct ifmedia_description *get_mode_desc(int ifmw,
- const struct ifmedia_type_to_subtype *ttos)
+static struct ifmedia_description *get_mode_desc(int ifmw,
+ struct ifmedia_type_to_subtype *ttos)
{
int i;
- const struct ifmedia_description *desc;
+ struct ifmedia_description *desc;
for (i = 0; ttos->modes[i].desc != NULL; i++) {
if (ttos->modes[i].alias)
@@ -709,8 +759,8 @@ static const struct ifmedia_description *get_mode_desc(int ifmw,
static void
print_media_word(int ifmw, int print_toptype)
{
- const struct ifmedia_description *desc;
- const struct ifmedia_type_to_subtype *ttos;
+ struct ifmedia_description *desc;
+ struct ifmedia_type_to_subtype *ttos;
int seen_option = 0, i;
/* Find the top-level interface type. */
@@ -769,8 +819,8 @@ print_media_word(int ifmw, int print_toptype)
static void
print_media_word_ifconfig(int ifmw)
{
- const struct ifmedia_description *desc;
- const struct ifmedia_type_to_subtype *ttos;
+ struct ifmedia_description *desc;
+ struct ifmedia_type_to_subtype *ttos;
int seen_option = 0, i;
/* Find the top-level interface type. */
@@ -843,15 +893,9 @@ void
#endif /* __rtems__ */
ifmedia_ctor(void)
{
-#ifdef __rtems__
- did_it = 0;
- ifmr = NULL;
-#endif /* __rtems__ */
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
- for (i = 0; i < N(media_cmds); i++)
+ for (i = 0; i < nitems(media_cmds); i++)
cmd_register(&media_cmds[i]);
af_register(&af_media);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifpfsync.c b/freebsd/sbin/ifconfig/ifpfsync.c
index ad65e659..162219c8 100644
--- a/freebsd/sbin/ifconfig/ifpfsync.c
+++ b/freebsd/sbin/ifconfig/ifpfsync.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 2003 Ryan McBride. All rights reserved.
* Copyright (c) 2004 Max Laier. All rights reserved.
@@ -28,7 +32,10 @@
* $FreeBSD$
*/
-#include <sys/types.h>
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
+#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -47,6 +54,9 @@
#include <unistd.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifpfsync-data.h"
+#endif /* __rtems__ */
void setpfsync_syncdev(const char *, int, int, const struct afswtch *);
void unsetpfsync_syncdev(const char *, int, int, const struct afswtch *);
@@ -54,6 +64,7 @@ void setpfsync_syncpeer(const char *, int, int, const struct afswtch *);
void unsetpfsync_syncpeer(const char *, int, int, const struct afswtch *);
void setpfsync_syncpeer(const char *, int, int, const struct afswtch *);
void setpfsync_maxupd(const char *, int, int, const struct afswtch *);
+void setpfsync_defer(const char *, int, int, const struct afswtch *);
void pfsync_status(int);
void
@@ -164,6 +175,23 @@ setpfsync_maxupd(const char *val, int d, int s, const struct afswtch *rafp)
err(1, "SIOCSETPFSYNC");
}
+/* ARGSUSED */
+void
+setpfsync_defer(const char *val, int d, int s, const struct afswtch *rafp)
+{
+ struct pfsyncreq preq;
+
+ memset((char *)&preq, 0, sizeof(struct pfsyncreq));
+ ifr.ifr_data = (caddr_t)&preq;
+
+ if (ioctl(s, SIOCGETPFSYNC, (caddr_t)&ifr) == -1)
+ err(1, "SIOCGETPFSYNC");
+
+ preq.pfsyncr_defer = d;
+ if (ioctl(s, SIOCSETPFSYNC, (caddr_t)&ifr) == -1)
+ err(1, "SIOCSETPFSYNC");
+}
+
void
pfsync_status(int s)
{
@@ -185,8 +213,10 @@ pfsync_status(int s)
printf("syncpeer: %s ", inet_ntoa(preq.pfsyncr_syncpeer));
if (preq.pfsyncr_syncdev[0] != '\0' ||
- preq.pfsyncr_syncpeer.s_addr != INADDR_PFSYNC_GROUP)
- printf("maxupd: %d\n", preq.pfsyncr_maxupdates);
+ preq.pfsyncr_syncpeer.s_addr != INADDR_PFSYNC_GROUP) {
+ printf("maxupd: %d ", preq.pfsyncr_maxupdates);
+ printf("defer: %s\n", preq.pfsyncr_defer ? "on" : "off");
+ }
}
static struct cmd pfsync_cmds[] = {
@@ -196,7 +226,9 @@ static struct cmd pfsync_cmds[] = {
DEF_CMD("-syncif", 1, unsetpfsync_syncdev),
DEF_CMD_ARG("syncpeer", setpfsync_syncpeer),
DEF_CMD("-syncpeer", 1, unsetpfsync_syncpeer),
- DEF_CMD_ARG("maxupd", setpfsync_maxupd)
+ DEF_CMD_ARG("maxupd", setpfsync_maxupd),
+ DEF_CMD("defer", 1, setpfsync_defer),
+ DEF_CMD("-defer", 0, setpfsync_defer),
};
static struct afswtch af_pfsync = {
.af_name = "af_pfsync",
@@ -211,11 +243,9 @@ void
#endif /* __rtems__ */
pfsync_ctor(void)
{
-#define N(a) (sizeof(a) / sizeof(a[0]))
int i;
- for (i = 0; i < N(pfsync_cmds); i++)
+ for (i = 0; i < nitems(pfsync_cmds); i++)
cmd_register(&pfsync_cmds[i]);
af_register(&af_pfsync);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/ifvlan.c b/freebsd/sbin/ifconfig/ifvlan.c
index 9fc2971d..14350baf 100644
--- a/freebsd/sbin/ifconfig/ifvlan.c
+++ b/freebsd/sbin/ifconfig/ifvlan.c
@@ -1,8 +1,16 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
/*
- * Copyright (c) 1999
- * Bill Paul <wpaul@ctr.columbia.edu>. All rights reserved.
+ * Copyright (c) 1999 Bill Paul <wpaul@ctr.columbia.edu>
+ * Copyright (c) 2012 ADARA Networks, Inc.
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to ADARA Networks, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -32,6 +40,9 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -42,7 +53,6 @@
#include <net/ethernet.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <net/if_vlan_var.h>
#include <net/route.h>
@@ -55,6 +65,9 @@
#include <errno.h>
#include "ifconfig.h"
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-ifvlan-data.h"
+#endif /* __rtems__ */
#ifndef lint
static const char rcsid[] =
@@ -81,10 +94,14 @@ vlan_status(int s)
{
struct vlanreq vreq;
- if (getvlan(s, &ifr, &vreq) != -1)
- printf("\tvlan: %d parent interface: %s\n",
- vreq.vlr_tag, vreq.vlr_parent[0] == '\0' ?
- "<none>" : vreq.vlr_parent);
+ if (getvlan(s, &ifr, &vreq) == -1)
+ return;
+ printf("\tvlan: %d", vreq.vlr_tag);
+ if (ioctl(s, SIOCGVLANPCP, (caddr_t)&ifr) != -1)
+ printf(" vlanpcp: %u", ifr.ifr_vlan_pcp);
+ printf(" parent interface: %s", vreq.vlr_parent[0] == '\0' ?
+ "<none>" : vreq.vlr_parent);
+ printf("\n");
}
static void
@@ -152,6 +169,22 @@ DECL_CMD_FUNC(setvlandev, val, d)
}
static
+DECL_CMD_FUNC(setvlanpcp, val, d)
+{
+ u_long ul;
+ char *endp;
+
+ ul = strtoul(val, &endp, 0);
+ if (*endp != '\0')
+ errx(1, "invalid value for vlanpcp");
+ if (ul > 7)
+ errx(1, "value for vlanpcp out of range");
+ ifr.ifr_vlan_pcp = ul;
+ if (ioctl(s, SIOCSVLANPCP, (caddr_t)&ifr) == -1)
+ err(1, "SIOCSVLANPCP");
+}
+
+static
DECL_CMD_FUNC(unsetvlandev, val, d)
{
struct vlanreq vreq;
@@ -172,6 +205,7 @@ DECL_CMD_FUNC(unsetvlandev, val, d)
static struct cmd vlan_cmds[] = {
DEF_CLONE_CMD_ARG("vlan", setvlantag),
DEF_CLONE_CMD_ARG("vlandev", setvlandev),
+ DEF_CMD_ARG("vlanpcp", setvlanpcp),
/* NB: non-clone cmds */
DEF_CMD_ARG("vlan", setvlantag),
DEF_CMD_ARG("vlandev", setvlandev),
@@ -201,17 +235,11 @@ void
#endif /* __rtems__ */
vlan_ctor(void)
{
-#ifdef __rtems__
- memset(&params, 0, sizeof(params));
- params.vlr_tag = NOTAG;
-#endif /* __rtems__ */
-#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
- for (i = 0; i < N(vlan_cmds); i++)
+ for (i = 0; i < nitems(vlan_cmds); i++)
cmd_register(&vlan_cmds[i]);
af_register(&af_vlan);
callback_register(vlan_cb, NULL);
clone_setdefcallback("vlan", vlan_create);
-#undef N
}
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet-data.h
new file mode 100644
index 00000000..962974a0
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet-data.h
@@ -0,0 +1,9 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* af_inet.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct in_aliasreq in_addreq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifreq in_ridreq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_inet);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static char addr_buf[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct sockaddr_in *sintab[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet6-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet6-data.h
new file mode 100644
index 00000000..6038db41
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_inet6-data.h
@@ -0,0 +1,13 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* af_inet6.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct in6_ifreq in6_ridreq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct in6_aliasreq in6_addreq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static int ip6lifetime);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static int explicit_prefix);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_inet6);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct option in6_Lopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static char addr_buf[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct sockaddr_in6 *sin6tab[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd inet6_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_link-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_link-data.h
new file mode 100644
index 00000000..b5f77c05
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_link-data.h
@@ -0,0 +1,8 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* af_link.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifreq link_ridreq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_link);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_ether);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_lladdr);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_nd6-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_nd6-data.h
new file mode 100644
index 00000000..5ad9960a
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-af_nd6-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* af_nd6.c */
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-data.h
new file mode 100644
index 00000000..d5d6f294
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-data.h
@@ -0,0 +1,39 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+/* af_inet6.c */
+/* af_inet.c */
+/* af_link.c */
+/* af_nd6.c */
+/* ifbridge.c */
+/* ifclone.c */
+/* ifconfig.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int setaddr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern struct ifreq ifr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern char name[16]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern char *descr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern size_t descrlen);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int setmask);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int doalias);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int clearaddr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int newaddr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int verbose);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int noload);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int printifname);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int supmedia);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern int printkeys);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern char *f_inet);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern char *f_inet6);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern char *f_ether);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern char *f_addr);
+/* ifgif.c */
+/* ifgre.c */
+/* ifgroup.c */
+/* iflagg.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern char lacpbuf[120]);
+/* ifmac.c */
+/* ifmedia.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern struct ifmedia_description ifm_subtype_ieee80211_mode_descriptions[8]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, extern struct ifmedia_description ifm_subtype_ieee80211_mode_aliases[2]);
+/* ifpfsync.c */
+/* ifvlan.c */
+/* sfp.c */
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifbridge-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifbridge-data.h
new file mode 100644
index 00000000..ba3ee5c6
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifbridge-data.h
@@ -0,0 +1,9 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifbridge.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_bridge);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static char const *stpstates[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static char const *stpproto[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static char const *stproles[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd bridge_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifclone-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifclone-data.h
new file mode 100644
index 00000000..746e12c5
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifclone-data.h
@@ -0,0 +1,7 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifclone.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct clone_defcb_list clone_defcbh);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct option clone_Copt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd clone_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifconfig-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifconfig-data.h
new file mode 100644
index 00000000..4ce74977
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifconfig-data.h
@@ -0,0 +1,9 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifconfig.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct option *opts);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch *afs);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd *cmds);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct callback *callbacks);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd basic_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgif-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgif-data.h
new file mode 100644
index 00000000..2364de3e
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgif-data.h
@@ -0,0 +1,6 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifgif.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_gif);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd gif_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgre-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgre-data.h
new file mode 100644
index 00000000..af37782d
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgre-data.h
@@ -0,0 +1,6 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifgre.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_gre);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd gre_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgroup-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgroup-data.h
new file mode 100644
index 00000000..3b306f50
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifgroup-data.h
@@ -0,0 +1,7 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifgroup.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_group);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct option group_gopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd group_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-iflagg-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-iflagg-data.h
new file mode 100644
index 00000000..64f05f94
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-iflagg-data.h
@@ -0,0 +1,6 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* iflagg.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_lagg);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd lagg_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmac-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmac-data.h
new file mode 100644
index 00000000..c7bc23f2
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmac-data.h
@@ -0,0 +1,6 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifmac.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_mac);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd mac_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmedia-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmedia-data.h
new file mode 100644
index 00000000..a14edd2b
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifmedia-data.h
@@ -0,0 +1,29 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifmedia.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmediareq *ifmedia_getstate_ifmr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static int did_it);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_media);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_type_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_ethernet_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_ethernet_aliases[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_ethernet_option_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_tokenring_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_tokenring_aliases[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_tokenring_option_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_fddi_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_fddi_aliases[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_fddi_option_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_ieee80211_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_ieee80211_aliases[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_ieee80211_option_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_atm_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_atm_aliases[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_atm_option_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_shared_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_subtype_shared_aliases[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_shared_option_descriptions[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_description ifm_shared_option_aliases[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd media_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifpfsync-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifpfsync-data.h
new file mode 100644
index 00000000..ad28104a
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifpfsync-data.h
@@ -0,0 +1,6 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifpfsync.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_pfsync);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd pfsync_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifvlan-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifvlan-data.h
new file mode 100644
index 00000000..0d8ffe8a
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-ifvlan-data.h
@@ -0,0 +1,7 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* ifvlan.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct vlanreq params);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct afswtch af_vlan);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct cmd vlan_cmds[]);
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-namespace.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-namespace.h
new file mode 100644
index 00000000..5bf653b8
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-namespace.h
@@ -0,0 +1,76 @@
+/* generated by userspace-header-gen.py */
+/* af_inet6.c */
+#define inet6_ctor _bsd_ifconfig_inet6_ctor
+/* af_inet.c */
+#define inet_ctor _bsd_ifconfig_inet_ctor
+/* af_link.c */
+#define link_ctor _bsd_ifconfig_link_ctor
+/* af_nd6.c */
+#define nd6_status _bsd_ifconfig_nd6_status
+#define setnd6defif _bsd_ifconfig_setnd6defif
+#define setnd6flags _bsd_ifconfig_setnd6flags
+/* ifbridge.c */
+#define bridge_ctor _bsd_ifconfig_bridge_ctor
+/* ifclone.c */
+#define clone_ctor _bsd_ifconfig_clone_ctor
+#define clone_setdefcallback _bsd_ifconfig_clone_setdefcallback
+/* ifconfig.c */
+#define setaddr _bsd_ifconfig_setaddr
+#define ifr _bsd_ifconfig_ifr
+#define name _bsd_ifconfig_name
+#define descr _bsd_ifconfig_descr
+#define descrlen _bsd_ifconfig_descrlen
+#define setmask _bsd_ifconfig_setmask
+#define doalias _bsd_ifconfig_doalias
+#define clearaddr _bsd_ifconfig_clearaddr
+#define newaddr _bsd_ifconfig_newaddr
+#define verbose _bsd_ifconfig_verbose
+#define noload _bsd_ifconfig_noload
+#define printifname _bsd_ifconfig_printifname
+#define supmedia _bsd_ifconfig_supmedia
+#define printkeys _bsd_ifconfig_printkeys
+#define f_inet _bsd_ifconfig_f_inet
+#define f_inet6 _bsd_ifconfig_f_inet6
+#define f_ether _bsd_ifconfig_f_ether
+#define f_addr _bsd_ifconfig_f_addr
+#define ifmaybeload _bsd_ifconfig_ifmaybeload
+#define print_vhid _bsd_ifconfig_print_vhid
+#define printb _bsd_ifconfig_printb
+#define Perror _bsd_ifconfig_Perror
+#define setifcap _bsd_ifconfig_setifcap
+#define callback_register _bsd_ifconfig_callback_register
+#define cmd_register _bsd_ifconfig_cmd_register
+#define af_register _bsd_ifconfig_af_register
+#define printifnamemaybe _bsd_ifconfig_printifnamemaybe
+#define opt_register _bsd_ifconfig_opt_register
+/* ifgif.c */
+#define gif_ctor _bsd_ifconfig_gif_ctor
+/* ifgre.c */
+#define gre_ctor _bsd_ifconfig_gre_ctor
+/* ifgroup.c */
+#define group_ctor _bsd_ifconfig_group_ctor
+/* iflagg.c */
+#define lacpbuf _bsd_ifconfig_lacpbuf
+#define lagg_ctor _bsd_ifconfig_lagg_ctor
+/* ifmac.c */
+#define mac_ctor _bsd_ifconfig_mac_ctor
+/* ifmedia.c */
+#define ifm_subtype_ieee80211_mode_descriptions _bsd_ifconfig_ifm_subtype_ieee80211_mode_descriptions
+#define ifm_subtype_ieee80211_mode_aliases _bsd_ifconfig_ifm_subtype_ieee80211_mode_aliases
+#define ifmedia_ctor _bsd_ifconfig_ifmedia_ctor
+#define ifmedia_getstate _bsd_ifconfig_ifmedia_getstate
+/* ifpfsync.c */
+#define pfsync_ctor _bsd_ifconfig_pfsync_ctor
+#define pfsync_status _bsd_ifconfig_pfsync_status
+#define setpfsync_defer _bsd_ifconfig_setpfsync_defer
+#define setpfsync_maxupd _bsd_ifconfig_setpfsync_maxupd
+#define unsetpfsync_syncpeer _bsd_ifconfig_unsetpfsync_syncpeer
+#define setpfsync_syncpeer _bsd_ifconfig_setpfsync_syncpeer
+#define unsetpfsync_syncdev _bsd_ifconfig_unsetpfsync_syncdev
+#define setpfsync_syncdev _bsd_ifconfig_setpfsync_syncdev
+/* ifvlan.c */
+#define vlan_ctor _bsd_ifconfig_vlan_ctor
+/* sfp.c */
+#define sfp_status _bsd_ifconfig_sfp_status
+#define find_zero_bit _bsd_ifconfig_find_zero_bit
+#define find_value _bsd_ifconfig_find_value
diff --git a/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-sfp-data.h b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-sfp-data.h
new file mode 100644
index 00000000..994d2279
--- /dev/null
+++ b/freebsd/sbin/ifconfig/rtems-bsd-ifconfig-sfp-data.h
@@ -0,0 +1,15 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ifconfig-data.h"
+/* sfp.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static char const *sff_8024_id[23]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv conn[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv eth_10g[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv eth_compat[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv fc_len[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv cab_tech[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv fc_media[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv fc_speed[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv eth_1040g[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv eth_extended_comp[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ifconfig, static struct _nv rev_compl[]);
diff --git a/freebsd/sbin/ifconfig/sfp.c b/freebsd/sbin/ifconfig/sfp.c
new file mode 100644
index 00000000..55fb7c7e
--- /dev/null
+++ b/freebsd/sbin/ifconfig/sfp.c
@@ -0,0 +1,935 @@
+#include <machine/rtems-bsd-user-space.h>
+
+#ifdef __rtems__
+#include "rtems-bsd-ifconfig-namespace.h"
+#endif /* __rtems__ */
+
+/*-
+ * Copyright (c) 2014 Alexander V. Chernikov. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static const char rcsid[] =
+ "$FreeBSD$";
+#endif /* not lint */
+
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
+#include <sys/types.h>
+#include <rtems/bsd/sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/sff8436.h>
+#include <net/sff8472.h>
+
+#include <math.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "ifconfig.h"
+#ifdef __rtems__
+struct _nv {
+ int v;
+ const char *n;
+};
+
+#include "rtems-bsd-ifconfig-sfp-data.h"
+#endif /* __rtems__ */
+
+struct i2c_info {
+ int fd; /* fd to issue SIOCGI2C */
+ int error; /* Store first error */
+ int qsfp; /* True if transceiver is QSFP */
+ int do_diag; /* True if we need to request DDM */
+ struct ifreq *ifr; /* Pointer to pre-filled ifreq */
+};
+
+static int read_i2c(struct i2c_info *ii, uint8_t addr, uint8_t off,
+ uint8_t len, uint8_t *buf);
+static void dump_i2c_data(struct i2c_info *ii, uint8_t addr, uint8_t off,
+ uint8_t len);
+
+#ifndef __rtems__
+struct _nv {
+ int v;
+ const char *n;
+};
+#endif /* __rtems__ */
+
+const char *find_value(struct _nv *x, int value);
+const char *find_zero_bit(struct _nv *x, int value, int sz);
+
+/* SFF-8472 Rev. 11.4 table 3.4: Connector values */
+static struct _nv conn[] = {
+ { 0x00, "Unknown" },
+ { 0x01, "SC" },
+ { 0x02, "Fibre Channel Style 1 copper" },
+ { 0x03, "Fibre Channel Style 2 copper" },
+ { 0x04, "BNC/TNC" },
+ { 0x05, "Fibre Channel coaxial" },
+ { 0x06, "FiberJack" },
+ { 0x07, "LC" },
+ { 0x08, "MT-RJ" },
+ { 0x09, "MU" },
+ { 0x0A, "SG" },
+ { 0x0B, "Optical pigtail" },
+ { 0x0C, "MPO Parallel Optic" },
+ { 0x20, "HSSDC II" },
+ { 0x21, "Copper pigtail" },
+ { 0x22, "RJ45" },
+ { 0x23, "No separate connector" }, /* SFF-8436 */
+ { 0, NULL }
+};
+
+/* SFF-8472 Rev. 11.4 table 3.5: Transceiver codes */
+/* 10G Ethernet/IB compliance codes, byte 3 */
+static struct _nv eth_10g[] = {
+ { 0x80, "10G Base-ER" },
+ { 0x40, "10G Base-LRM" },
+ { 0x20, "10G Base-LR" },
+ { 0x10, "10G Base-SR" },
+ { 0x08, "1X SX" },
+ { 0x04, "1X LX" },
+ { 0x02, "1X Copper Active" },
+ { 0x01, "1X Copper Passive" },
+ { 0, NULL }
+};
+
+/* Ethernet compliance codes, byte 6 */
+static struct _nv eth_compat[] = {
+ { 0x80, "BASE-PX" },
+ { 0x40, "BASE-BX10" },
+ { 0x20, "100BASE-FX" },
+ { 0x10, "100BASE-LX/LX10" },
+ { 0x08, "1000BASE-T" },
+ { 0x04, "1000BASE-CX" },
+ { 0x02, "1000BASE-LX" },
+ { 0x01, "1000BASE-SX" },
+ { 0, NULL }
+};
+
+/* FC link length, byte 7 */
+static struct _nv fc_len[] = {
+ { 0x80, "very long distance" },
+ { 0x40, "short distance" },
+ { 0x20, "intermediate distance" },
+ { 0x10, "long distance" },
+ { 0x08, "medium distance" },
+ { 0, NULL }
+};
+
+/* Channel/Cable technology, byte 7-8 */
+static struct _nv cab_tech[] = {
+ { 0x0400, "Shortwave laser (SA)" },
+ { 0x0200, "Longwave laser (LC)" },
+ { 0x0100, "Electrical inter-enclosure (EL)" },
+ { 0x80, "Electrical intra-enclosure (EL)" },
+ { 0x40, "Shortwave laser (SN)" },
+ { 0x20, "Shortwave laser (SL)" },
+ { 0x10, "Longwave laser (LL)" },
+ { 0x08, "Active Cable" },
+ { 0x04, "Passive Cable" },
+ { 0, NULL }
+};
+
+/* FC Transmission media, byte 9 */
+static struct _nv fc_media[] = {
+ { 0x80, "Twin Axial Pair" },
+ { 0x40, "Twisted Pair" },
+ { 0x20, "Miniature Coax" },
+ { 0x10, "Viao Coax" },
+ { 0x08, "Miltimode, 62.5um" },
+ { 0x04, "Multimode, 50um" },
+ { 0x02, "" },
+ { 0x01, "Single Mode" },
+ { 0, NULL }
+};
+
+/* FC Speed, byte 10 */
+static struct _nv fc_speed[] = {
+ { 0x80, "1200 MBytes/sec" },
+ { 0x40, "800 MBytes/sec" },
+ { 0x20, "1600 MBytes/sec" },
+ { 0x10, "400 MBytes/sec" },
+ { 0x08, "3200 MBytes/sec" },
+ { 0x04, "200 MBytes/sec" },
+ { 0x01, "100 MBytes/sec" },
+ { 0, NULL }
+};
+
+/* SFF-8436 Rev. 4.8 table 33: Specification compliance */
+
+/* 10/40G Ethernet compliance codes, byte 128 + 3 */
+static struct _nv eth_1040g[] = {
+ { 0x80, "Extended" },
+ { 0x40, "10GBASE-LRM" },
+ { 0x20, "10GBASE-LR" },
+ { 0x10, "10GBASE-SR" },
+ { 0x08, "40GBASE-CR4" },
+ { 0x04, "40GBASE-SR4" },
+ { 0x02, "40GBASE-LR4" },
+ { 0x01, "40G Active Cable" },
+ { 0, NULL }
+};
+#define SFF_8636_EXT_COMPLIANCE 0x80
+
+/* SFF-8024 Rev. 3.4 table 4.4: Extended Specification Compliance */
+static struct _nv eth_extended_comp[] = {
+ { 0xFF, "Reserved" },
+ { 0x1A, "2 lambda DWDM 100G" },
+ { 0x19, "100G ACC or 25GAUI C2M ACC" },
+ { 0x18, "100G AOC or 25GAUI C2M AOC" },
+ { 0x17, "100G CLR4" },
+ { 0x16, "10GBASE-T with SFI electrical interface" },
+ { 0x15, "G959.1 profile P1L1-2D2" },
+ { 0x14, "G959.1 profile P1S1-2D2" },
+ { 0x13, "G959.1 profile P1I1-2D1" },
+ { 0x12, "40G PSM4 Parallel SMF" },
+ { 0x11, "4 x 10GBASE-SR" },
+ { 0x10, "40GBASE-ER4" },
+ { 0x0F, "Reserved" },
+ { 0x0D, "25GBASE-CR CA-N" },
+ { 0x0C, "25GBASE-CR CA-S" },
+ { 0x0B, "100GBASE-CR4 or 25GBASE-CR CA-L" },
+ { 0x0A, "Reserved" },
+ { 0x09, "100G CWDM4 MSA without FEC" },
+ { 0x08, "100G ACC (Active Copper Cable)" },
+ { 0x07, "100G PSM4 Parallel SMF" },
+ { 0x06, "100G CWDM4 MSA with FEC" },
+ { 0x05, "100GBASE-SR10" },
+ { 0x04, "100GBASE-ER4" },
+ { 0x03, "100GBASE-LR4" },
+ { 0x02, "100GBASE-SR4" },
+ { 0x01, "100G AOC (Active Optical Cable) or 25GAUI C2M ACC" },
+ { 0x00, "Unspecified" }
+};
+
+/* SFF-8636 Rev. 2.5 table 6.3: Revision compliance */
+static struct _nv rev_compl[] = {
+ { 0x1, "SFF-8436 rev <=4.8" },
+ { 0x2, "SFF-8436 rev <=4.8" },
+ { 0x3, "SFF-8636 rev <=1.3" },
+ { 0x4, "SFF-8636 rev <=1.4" },
+ { 0x5, "SFF-8636 rev <=1.5" },
+ { 0x6, "SFF-8636 rev <=2.0" },
+ { 0x7, "SFF-8636 rev <=2.5" },
+ { 0x0, "Unspecified" }
+};
+
+const char *
+find_value(struct _nv *x, int value)
+{
+ for (; x->n != NULL; x++)
+ if (x->v == value)
+ return (x->n);
+ return (NULL);
+}
+
+const char *
+find_zero_bit(struct _nv *x, int value, int sz)
+{
+ int v, m;
+ const char *s;
+
+ v = 1;
+ for (v = 1, m = 1 << (8 * sz); v < m; v *= 2) {
+ if ((value & v) == 0)
+ continue;
+ if ((s = find_value(x, value & v)) != NULL) {
+ value &= ~v;
+ return (s);
+ }
+ }
+
+ return (NULL);
+}
+
+static void
+convert_sff_identifier(char *buf, size_t size, uint8_t value)
+{
+ const char *x;
+
+ x = NULL;
+ if (value <= SFF_8024_ID_LAST)
+ x = sff_8024_id[value];
+ else {
+ if (value > 0x80)
+ x = "Vendor specific";
+ else
+ x = "Reserved";
+ }
+
+ snprintf(buf, size, "%s", x);
+}
+
+static void
+convert_sff_connector(char *buf, size_t size, uint8_t value)
+{
+ const char *x;
+
+ if ((x = find_value(conn, value)) == NULL) {
+ if (value >= 0x0D && value <= 0x1F)
+ x = "Unallocated";
+ else if (value >= 0x24 && value <= 0x7F)
+ x = "Unallocated";
+ else
+ x = "Vendor specific";
+ }
+
+ snprintf(buf, size, "%s", x);
+}
+
+static void
+convert_sff_rev_compliance(char *buf, size_t size, uint8_t value)
+{
+ const char *x;
+
+ if (value > 0x07)
+ x = "Unallocated";
+ else
+ x = find_value(rev_compl, value);
+
+ snprintf(buf, size, "%s", x);
+}
+
+static void
+get_sfp_identifier(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t data;
+
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_ID, 1, &data);
+ convert_sff_identifier(buf, size, data);
+}
+
+static void
+get_sfp_connector(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t data;
+
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_CONNECTOR, 1, &data);
+ convert_sff_connector(buf, size, data);
+}
+
+static void
+get_qsfp_identifier(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t data;
+
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_ID, 1, &data);
+ convert_sff_identifier(buf, size, data);
+}
+
+static void
+get_qsfp_connector(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t data;
+
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_CONNECTOR, 1, &data);
+ convert_sff_connector(buf, size, data);
+}
+
+static void
+printf_sfp_transceiver_descr(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[12];
+ const char *tech_class, *tech_len, *tech_tech, *tech_media, *tech_speed;
+
+ tech_class = NULL;
+ tech_len = NULL;
+ tech_tech = NULL;
+ tech_media = NULL;
+ tech_speed = NULL;
+
+ /* Read bytes 3-10 at once */
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_TRANS_START, 8, &xbuf[3]);
+
+ /* Check 10G ethernet first */
+ tech_class = find_zero_bit(eth_10g, xbuf[3], 1);
+ if (tech_class == NULL) {
+ /* No match. Try 1G */
+ tech_class = find_zero_bit(eth_compat, xbuf[6], 1);
+ }
+
+ tech_len = find_zero_bit(fc_len, xbuf[7], 1);
+ tech_tech = find_zero_bit(cab_tech, xbuf[7] << 8 | xbuf[8], 2);
+ tech_media = find_zero_bit(fc_media, xbuf[9], 1);
+ tech_speed = find_zero_bit(fc_speed, xbuf[10], 1);
+
+ printf("Class: %s\n", tech_class);
+ printf("Length: %s\n", tech_len);
+ printf("Tech: %s\n", tech_tech);
+ printf("Media: %s\n", tech_media);
+ printf("Speed: %s\n", tech_speed);
+}
+
+static void
+get_sfp_transceiver_class(struct i2c_info *ii, char *buf, size_t size)
+{
+ const char *tech_class;
+ uint8_t code;
+
+ unsigned char qbuf[8];
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_TRANS_START, 8, (uint8_t *)qbuf);
+
+ /* Check 10G Ethernet/IB first */
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_TRANS_START, 1, &code);
+ tech_class = find_zero_bit(eth_10g, code, 1);
+ if (tech_class == NULL) {
+ /* No match. Try Ethernet 1G */
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_TRANS_START + 3,
+ 1, (caddr_t)&code);
+ tech_class = find_zero_bit(eth_compat, code, 1);
+ }
+
+ if (tech_class == NULL)
+ tech_class = "Unknown";
+
+ snprintf(buf, size, "%s", tech_class);
+}
+
+static void
+get_qsfp_transceiver_class(struct i2c_info *ii, char *buf, size_t size)
+{
+ const char *tech_class;
+ uint8_t code;
+
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_CODE_E1040100G, 1, &code);
+
+ /* Check for extended specification compliance */
+ if (code & SFF_8636_EXT_COMPLIANCE) {
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_OPTIONS_START, 1, &code);
+ tech_class = find_value(eth_extended_comp, code);
+ } else
+ /* Check 10/40G Ethernet class only */
+ tech_class = find_zero_bit(eth_1040g, code, 1);
+
+ if (tech_class == NULL)
+ tech_class = "Unknown";
+
+ snprintf(buf, size, "%s", tech_class);
+}
+
+/*
+ * Print SFF-8472/SFF-8436 string to supplied buffer.
+ * All (vendor-specific) strings are padded right with '0x20'.
+ */
+static void
+convert_sff_name(char *buf, size_t size, char *xbuf)
+{
+ char *p;
+
+ for (p = &xbuf[16]; *(p - 1) == 0x20; p--)
+ ;
+ *p = '\0';
+ snprintf(buf, size, "%s", xbuf);
+}
+
+static void
+convert_sff_date(char *buf, size_t size, char *xbuf)
+{
+
+ snprintf(buf, size, "20%c%c-%c%c-%c%c", xbuf[0], xbuf[1],
+ xbuf[2], xbuf[3], xbuf[4], xbuf[5]);
+}
+
+static void
+get_sfp_vendor_name(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[17];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_VENDOR_START, 16, (uint8_t *)xbuf);
+ convert_sff_name(buf, size, xbuf);
+}
+
+static void
+get_sfp_vendor_pn(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[17];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_PN_START, 16, (uint8_t *)xbuf);
+ convert_sff_name(buf, size, xbuf);
+}
+
+static void
+get_sfp_vendor_sn(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[17];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_SN_START, 16, (uint8_t *)xbuf);
+ convert_sff_name(buf, size, xbuf);
+}
+
+static void
+get_sfp_vendor_date(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[6];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ /* Date code, see Table 3.8 for description */
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_DATE_START, 6, (uint8_t *)xbuf);
+ convert_sff_date(buf, size, xbuf);
+}
+
+static void
+get_qsfp_vendor_name(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[17];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_VENDOR_START, 16, (uint8_t *)xbuf);
+ convert_sff_name(buf, size, xbuf);
+}
+
+static void
+get_qsfp_vendor_pn(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[17];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_PN_START, 16, (uint8_t *)xbuf);
+ convert_sff_name(buf, size, xbuf);
+}
+
+static void
+get_qsfp_vendor_sn(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[17];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_SN_START, 16, (uint8_t *)xbuf);
+ convert_sff_name(buf, size, xbuf);
+}
+
+static void
+get_qsfp_vendor_date(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[6];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_DATE_START, 6, (uint8_t *)xbuf);
+ convert_sff_date(buf, size, xbuf);
+}
+
+static void
+print_sfp_vendor(struct i2c_info *ii, char *buf, size_t size)
+{
+ char xbuf[80];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ if (ii->qsfp != 0) {
+ get_qsfp_vendor_name(ii, xbuf, 20);
+ get_qsfp_vendor_pn(ii, &xbuf[20], 20);
+ get_qsfp_vendor_sn(ii, &xbuf[40], 20);
+ get_qsfp_vendor_date(ii, &xbuf[60], 20);
+ } else {
+ get_sfp_vendor_name(ii, xbuf, 20);
+ get_sfp_vendor_pn(ii, &xbuf[20], 20);
+ get_sfp_vendor_sn(ii, &xbuf[40], 20);
+ get_sfp_vendor_date(ii, &xbuf[60], 20);
+ }
+
+ snprintf(buf, size, "vendor: %s PN: %s SN: %s DATE: %s",
+ xbuf, &xbuf[20], &xbuf[40], &xbuf[60]);
+}
+
+/*
+ * Converts internal templerature (SFF-8472, SFF-8436)
+ * 16-bit unsigned value to human-readable representation:
+ *
+ * Internally measured Module temperature are represented
+ * as a 16-bit signed twos complement value in increments of
+ * 1/256 degrees Celsius, yielding a total range of –128C to +128C
+ * that is considered valid between –40 and +125C.
+ *
+ */
+static void
+convert_sff_temp(char *buf, size_t size, uint8_t *xbuf)
+{
+ double d;
+
+ d = (double)xbuf[0];
+ d += (double)xbuf[1] / 256;
+
+ snprintf(buf, size, "%.2f C", d);
+}
+
+/*
+ * Retrieves supplied voltage (SFF-8472, SFF-8436).
+ * 16-bit usigned value, treated as range 0..+6.55 Volts
+ */
+static void
+convert_sff_voltage(char *buf, size_t size, uint8_t *xbuf)
+{
+ double d;
+
+ d = (double)((xbuf[0] << 8) | xbuf[1]);
+ snprintf(buf, size, "%.2f Volts", d / 10000);
+}
+
+/*
+ * Converts value in @xbuf to both milliwats and dBm
+ * human representation.
+ */
+static void
+convert_sff_power(struct i2c_info *ii, char *buf, size_t size, uint8_t *xbuf)
+{
+ uint16_t mW;
+ double dbm;
+
+ mW = (xbuf[0] << 8) + xbuf[1];
+
+ /* Convert mw to dbm */
+ dbm = 10.0 * log10(1.0 * mW / 10000);
+
+ /*
+ * Assume internally-calibrated data.
+ * This is always true for SFF-8346, and explicitly
+ * checked for SFF-8472.
+ */
+
+ /* Table 3.9, bit 5 is set, internally calibrated */
+ snprintf(buf, size, "%d.%02d mW (%.2f dBm)",
+ mW / 10000, (mW % 10000) / 100, dbm);
+}
+
+static void
+get_sfp_temp(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8472_DIAG, SFF_8472_TEMP, 2, xbuf);
+ convert_sff_temp(buf, size, xbuf);
+}
+
+static void
+get_sfp_voltage(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8472_DIAG, SFF_8472_VCC, 2, xbuf);
+ convert_sff_voltage(buf, size, xbuf);
+}
+
+static int
+get_qsfp_temp(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_TEMP, 2, xbuf);
+ if ((xbuf[0] == 0xFF && xbuf[1] == 0xFF) || (xbuf[0] == 0 && xbuf[1] == 0))
+ return (-1);
+ convert_sff_temp(buf, size, xbuf);
+ return (0);
+}
+
+static void
+get_qsfp_voltage(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_VCC, 2, xbuf);
+ convert_sff_voltage(buf, size, xbuf);
+}
+
+static void
+get_sfp_rx_power(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8472_DIAG, SFF_8472_RX_POWER, 2, xbuf);
+ convert_sff_power(ii, buf, size, xbuf);
+}
+
+static void
+get_sfp_tx_power(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8472_DIAG, SFF_8472_TX_POWER, 2, xbuf);
+ convert_sff_power(ii, buf, size, xbuf);
+}
+
+static void
+get_qsfp_rx_power(struct i2c_info *ii, char *buf, size_t size, int chan)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_RX_CH1_MSB + (chan-1)*2, 2, xbuf);
+ convert_sff_power(ii, buf, size, xbuf);
+}
+
+static void
+get_qsfp_tx_power(struct i2c_info *ii, char *buf, size_t size, int chan)
+{
+ uint8_t xbuf[2];
+
+ memset(xbuf, 0, sizeof(xbuf));
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_TX_CH1_MSB + (chan-1)*2, 2, xbuf);
+ convert_sff_power(ii, buf, size, xbuf);
+}
+
+static void
+get_qsfp_rev_compliance(struct i2c_info *ii, char *buf, size_t size)
+{
+ uint8_t xbuf;
+
+ xbuf = 0;
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_STATUS, 1, &xbuf);
+ convert_sff_rev_compliance(buf, size, xbuf);
+}
+
+static uint32_t
+get_qsfp_br(struct i2c_info *ii)
+{
+ uint8_t xbuf;
+ uint32_t rate;
+
+ xbuf = 0;
+ read_i2c(ii, SFF_8436_BASE, SFF_8436_BITRATE, 1, &xbuf);
+ rate = xbuf * 100;
+ if (xbuf == 0xFF) {
+ read_i2c(ii, SFF_8436_BASE, SFF_8636_BITRATE, 1, &xbuf);
+ rate = xbuf * 250;
+ }
+
+ return (rate);
+}
+
+/*
+ * Reads i2c data from opened kernel socket.
+ */
+static int
+read_i2c(struct i2c_info *ii, uint8_t addr, uint8_t off, uint8_t len,
+ uint8_t *buf)
+{
+ struct ifi2creq req;
+ int i, l;
+
+ if (ii->error != 0)
+ return (ii->error);
+
+ ii->ifr->ifr_data = (caddr_t)&req;
+
+ i = 0;
+ l = 0;
+ memset(&req, 0, sizeof(req));
+ req.dev_addr = addr;
+ req.offset = off;
+ req.len = len;
+
+ while (len > 0) {
+ l = MIN(sizeof(req.data), len);
+ req.len = l;
+ if (ioctl(ii->fd, SIOCGI2C, ii->ifr) != 0) {
+ ii->error = errno;
+ return (errno);
+ }
+
+ memcpy(&buf[i], req.data, l);
+ len -= l;
+ i += l;
+ req.offset += l;
+ }
+
+ return (0);
+}
+
+static void
+dump_i2c_data(struct i2c_info *ii, uint8_t addr, uint8_t off, uint8_t len)
+{
+ unsigned char buf[16];
+ int i, read;
+
+ while (len > 0) {
+ memset(buf, 0, sizeof(buf));
+ read = MIN(sizeof(buf), len);
+ read_i2c(ii, addr, off, read, buf);
+ if (ii->error != 0) {
+ fprintf(stderr, "Error reading i2c info\n");
+ return;
+ }
+
+ printf("\t");
+ for (i = 0; i < read; i++)
+ printf("%02X ", buf[i]);
+ printf("\n");
+ len -= read;
+ off += read;
+ }
+}
+
+static void
+print_qsfp_status(struct i2c_info *ii, int verbose)
+{
+ char buf[80], buf2[40], buf3[40];
+ uint32_t bitrate;
+ int i;
+
+ ii->qsfp = 1;
+
+ /* Transceiver type */
+ get_qsfp_identifier(ii, buf, sizeof(buf));
+ get_qsfp_transceiver_class(ii, buf2, sizeof(buf2));
+ get_qsfp_connector(ii, buf3, sizeof(buf3));
+ if (ii->error == 0)
+ printf("\tplugged: %s %s (%s)\n", buf, buf2, buf3);
+ print_sfp_vendor(ii, buf, sizeof(buf));
+ if (ii->error == 0)
+ printf("\t%s\n", buf);
+
+ if (verbose > 1) {
+ get_qsfp_rev_compliance(ii, buf, sizeof(buf));
+ if (ii->error == 0)
+ printf("\tcompliance level: %s\n", buf);
+
+ bitrate = get_qsfp_br(ii);
+ if (ii->error == 0 && bitrate > 0)
+ printf("\tnominal bitrate: %u Mbps\n", bitrate);
+ }
+
+ /*
+ * The standards in this area are not clear when the
+ * additional measurements are present or not. Use a valid
+ * temperature reading as an indicator for the presence of
+ * voltage and TX/RX power measurements.
+ */
+ if (get_qsfp_temp(ii, buf, sizeof(buf)) == 0) {
+ get_qsfp_voltage(ii, buf2, sizeof(buf2));
+ printf("\tmodule temperature: %s voltage: %s\n", buf, buf2);
+ for (i = 1; i <= 4; i++) {
+ get_qsfp_rx_power(ii, buf, sizeof(buf), i);
+ get_qsfp_tx_power(ii, buf2, sizeof(buf2), i);
+ printf("\tlane %d: RX: %s TX: %s\n", i, buf, buf2);
+ }
+ }
+
+ if (verbose > 2) {
+ printf("\n\tSFF8436 DUMP (0xA0 128..255 range):\n");
+ dump_i2c_data(ii, SFF_8436_BASE, 128, 128);
+ printf("\n\tSFF8436 DUMP (0xA0 0..81 range):\n");
+ dump_i2c_data(ii, SFF_8436_BASE, 0, 82);
+ }
+}
+
+static void
+print_sfp_status(struct i2c_info *ii, int verbose)
+{
+ char buf[80], buf2[40], buf3[40];
+ uint8_t diag_type, flags;
+
+ /* Read diagnostic monitoring type */
+ read_i2c(ii, SFF_8472_BASE, SFF_8472_DIAG_TYPE, 1, (caddr_t)&diag_type);
+ if (ii->error != 0)
+ return;
+
+ /*
+ * Read monitoring data IFF it is supplied AND is
+ * internally calibrated
+ */
+ flags = SFF_8472_DDM_DONE | SFF_8472_DDM_INTERNAL;
+ if ((diag_type & flags) == flags)
+ ii->do_diag = 1;
+
+ /* Transceiver type */
+ get_sfp_identifier(ii, buf, sizeof(buf));
+ get_sfp_transceiver_class(ii, buf2, sizeof(buf2));
+ get_sfp_connector(ii, buf3, sizeof(buf3));
+ if (ii->error == 0)
+ printf("\tplugged: %s %s (%s)\n", buf, buf2, buf3);
+ print_sfp_vendor(ii, buf, sizeof(buf));
+ if (ii->error == 0)
+ printf("\t%s\n", buf);
+
+ if (verbose > 5)
+ printf_sfp_transceiver_descr(ii, buf, sizeof(buf));
+ /*
+ * Request current measurements iff they are provided:
+ */
+ if (ii->do_diag != 0) {
+ get_sfp_temp(ii, buf, sizeof(buf));
+ get_sfp_voltage(ii, buf2, sizeof(buf2));
+ printf("\tmodule temperature: %s Voltage: %s\n", buf, buf2);
+ get_sfp_rx_power(ii, buf, sizeof(buf));
+ get_sfp_tx_power(ii, buf2, sizeof(buf2));
+ printf("\tRX: %s TX: %s\n", buf, buf2);
+ }
+
+ if (verbose > 2) {
+ printf("\n\tSFF8472 DUMP (0xA0 0..127 range):\n");
+ dump_i2c_data(ii, SFF_8472_BASE, 0, 128);
+ }
+}
+
+void
+sfp_status(int s, struct ifreq *ifr, int verbose)
+{
+ struct i2c_info ii;
+ uint8_t id_byte;
+
+ /* Prepare necessary into pass to i2c reader */
+ memset(&ii, 0, sizeof(ii));
+ ii.fd = s;
+ ii.ifr = ifr;
+
+ /*
+ * Try to read byte 0 from i2c:
+ * Both SFF-8472 and SFF-8436 use it as
+ * 'identification byte'.
+ * Stop reading status on zero as value -
+ * this might happen in case of empty transceiver slot.
+ */
+ id_byte = 0;
+ read_i2c(&ii, SFF_8472_BASE, SFF_8472_ID, 1, (caddr_t)&id_byte);
+ if (ii.error != 0 || id_byte == 0)
+ return;
+
+ switch (id_byte) {
+ case SFF_8024_ID_QSFP:
+ case SFF_8024_ID_QSFPPLUS:
+ case SFF_8024_ID_QSFP28:
+ print_qsfp_status(&ii, verbose);
+ break;
+ default:
+ print_sfp_status(&ii, verbose);
+ }
+}
+
diff --git a/freebsd/contrib/pf/pfctl/parse.c b/freebsd/sbin/pfctl/parse.c
index ffe7c1a8..1ae5fc95 100644
--- a/freebsd/contrib/pf/pfctl/parse.c
+++ b/freebsd/sbin/pfctl/parse.c
@@ -5,7 +5,7 @@
#define YYBYACC 1
#define YYMAJOR 1
#define YYMINOR 9
-#define YYPATCH 20141006
+#define YYPATCH 20160324
#define YYEMPTY (-1)
#define yyclearin (yychar = YYEMPTY)
@@ -97,17 +97,23 @@
#define YYPURE 0
-#line 30 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 30 "../../freebsd/sbin/pfctl/parse.y"
#ifdef __rtems__
#include <machine/rtems-bsd-user-space.h>
+#endif /* __rtems__ */
+
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef __rtems__
#include <machine/rtems-bsd-program.h>
#define pf_find_or_create_ruleset _bsd_pf_find_or_create_ruleset
#define pf_anchor_setup _bsd_pf_anchor_setup
#define pf_remove_if_empty_ruleset _bsd_pf_remove_if_empty_ruleset
#endif /* __rtems__ */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
@@ -122,10 +128,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <net/pfvar.h>
#include <arpa/inet.h>
-#include <altq/altq.h>
-#include <altq/altq_cbq.h>
-#include <altq/altq_priq.h>
-#include <altq/altq_hfsc.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_priq.h>
+#include <net/altq/altq_hfsc.h>
+#include <net/altq/altq_fairq.h>
#include <stdio.h>
#include <unistd.h>
@@ -144,6 +152,9 @@ __FBSDID("$FreeBSD$");
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-parse-data.h"
+#endif /* __rtems__ */
static struct pfctl *pf = NULL;
static int debug = 0;
@@ -156,11 +167,7 @@ static int blockpolicy = PFRULE_DROP;
static int require_order = 1;
static int default_statelock;
-#ifndef __rtems__
-TAILQ_HEAD(files, file) files = TAILQ_HEAD_INITIALIZER(files);
-#else /* __rtems__ */
static TAILQ_HEAD(files, file) files = TAILQ_HEAD_INITIALIZER(files);
-#endif /* __rtems__ */
static struct file {
TAILQ_ENTRY(file) entry;
FILE *stream;
@@ -180,11 +187,7 @@ int lgetc(int);
int lungetc(int);
int findeol(void);
-#ifndef __rtems__
-TAILQ_HEAD(symhead, sym) symhead = TAILQ_HEAD_INITIALIZER(symhead);
-#else /* __rtems__ */
static TAILQ_HEAD(symhead, sym) symhead = TAILQ_HEAD_INITIALIZER(symhead);
-#endif /* __rtems__ */
struct sym {
TAILQ_ENTRY(sym) entry;
int used;
@@ -245,8 +248,7 @@ enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK,
PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN,
PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES,
PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK,
- PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY,
- PF_STATE_OPT_PFLOW };
+ PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, };
enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE };
@@ -281,11 +283,7 @@ struct peer {
struct node_port *port;
};
-#ifndef __rtems__
-struct node_queue {
-#else /* __rtems__ */
static struct node_queue {
-#endif /* __rtems__ */
char queue[PF_QNAME_SIZE];
char parent[PF_QNAME_SIZE];
char ifname[IFNAMSIZ];
@@ -299,17 +297,15 @@ struct node_qassign {
char *pqname;
};
-#ifndef __rtems__
-struct filter_opts {
-#else /* __rtems__ */
static struct filter_opts {
-#endif /* __rtems__ */
int marker;
#define FOM_FLAGS 0x01
#define FOM_ICMP 0x02
#define FOM_TOS 0x04
#define FOM_KEEP 0x08
#define FOM_SRCTRACK 0x10
+#define FOM_SETPRIO 0x0400
+#define FOM_PRIO 0x2000
struct node_uid *uid;
struct node_gid *gid;
struct {
@@ -333,26 +329,20 @@ static struct filter_opts {
char *match_tag;
u_int8_t match_tag_not;
u_int rtableid;
+ u_int8_t prio;
+ u_int8_t set_prio[2];
struct {
struct node_host *addr;
u_int16_t port;
} divert;
} filter_opts;
-#ifndef __rtems__
-struct antispoof_opts {
-#else /* __rtems__ */
static struct antispoof_opts {
-#endif /* __rtems__ */
char *label;
u_int rtableid;
} antispoof_opts;
-#ifndef __rtems__
-struct scrub_opts {
-#else /* __rtems__ */
static struct scrub_opts {
-#endif /* __rtems__ */
int marker;
#define SOM_MINTTL 0x01
#define SOM_MAXMSS 0x02
@@ -370,11 +360,7 @@ static struct scrub_opts {
u_int rtableid;
} scrub_opts;
-#ifndef __rtems__
-struct queue_opts {
-#else /* __rtems__ */
static struct queue_opts {
-#endif /* __rtems__ */
int marker;
#define QOM_BWSPEC 0x01
#define QOM_SCHEDULER 0x02
@@ -388,21 +374,13 @@ static struct queue_opts {
int qlimit;
} queue_opts;
-#ifndef __rtems__
-struct table_opts {
-#else /* __rtems__ */
static struct table_opts {
-#endif /* __rtems__ */
int flags;
int init_addr;
struct node_tinithead init_nodes;
} table_opts;
-#ifndef __rtems__
-struct pool_opts {
-#else /* __rtems__ */
static struct pool_opts {
-#endif /* __rtems__ */
int marker;
#define POM_TYPE 0x01
#define POM_STICKYADDRESS 0x02
@@ -413,14 +391,10 @@ static struct pool_opts {
} pool_opts;
-
-#ifndef __rtems__
-struct node_hfsc_opts hfsc_opts;
-struct node_state_opt *keep_state_defaults = NULL;
-#else /* __rtems__ */
+static struct codel_opts codel_opts;
static struct node_hfsc_opts hfsc_opts;
+static struct node_fairq_opts fairq_opts;
static struct node_state_opt *keep_state_defaults = NULL;
-#endif /* __rtems__ */
int disallow_table(struct node_host *, const char *);
int disallow_urpf_failed(struct node_host *, const char *);
@@ -465,11 +439,7 @@ void remove_invalid_hosts(struct node_host **, sa_family_t *);
int invalid_redirect(struct node_host *, sa_family_t);
u_int16_t parseicmpspec(char *, sa_family_t);
-#ifndef __rtems__
-TAILQ_HEAD(loadanchorshead, loadanchors)
-#else /* __rtems__ */
static TAILQ_HEAD(loadanchorshead, loadanchors)
-#endif /* __rtems__ */
loadanchorshead = TAILQ_HEAD_INITIALIZER(loadanchorshead);
struct loadanchors {
@@ -546,6 +516,8 @@ typedef struct {
struct table_opts table_opts;
struct pool_opts pool_opts;
struct node_hfsc_opts hfsc_opts;
+ struct node_fairq_opts fairq_opts;
+ struct codel_opts codel_opts;
} v;
int lineno;
} YYSTYPE;
@@ -558,7 +530,7 @@ int parseport(char *, struct range *r, int);
(!((addr).iflags & PFI_AFLAG_NOALIAS) || \
!isdigit((addr).v.ifname[strlen((addr).v.ifname)-1])))
-#line 562 "pfctly.tab.c"
+#line 534 "pfctly.tab.c"
/* compatibility with bison */
#ifdef YYPARSE_PARAM
@@ -677,89 +649,98 @@ extern int YYPARSE_DECL();
#define PROBABILITY 340
#define ALTQ 341
#define CBQ 342
-#define PRIQ 343
-#define HFSC 344
-#define BANDWIDTH 345
-#define TBRSIZE 346
-#define LINKSHARE 347
-#define REALTIME 348
-#define UPPERLIMIT 349
-#define QUEUE 350
-#define PRIORITY 351
-#define QLIMIT 352
-#define RTABLE 353
-#define LOAD 354
-#define RULESET_OPTIMIZATION 355
-#define STICKYADDRESS 356
-#define MAXSRCSTATES 357
-#define MAXSRCNODES 358
-#define SOURCETRACK 359
-#define GLOBAL 360
-#define RULE 361
-#define MAXSRCCONN 362
-#define MAXSRCCONNRATE 363
-#define OVERLOAD 364
-#define FLUSH 365
-#define SLOPPY 366
-#define PFLOW 367
-#define TAGGED 368
-#define TAG 369
-#define IFBOUND 370
-#define FLOATING 371
-#define STATEPOLICY 372
-#define STATEDEFAULTS 373
-#define ROUTE 374
-#define SETTOS 375
-#define DIVERTTO 376
-#define DIVERTREPLY 377
-#define STRING 378
-#define NUMBER 379
-#define PORTBINARY 380
+#define CODEL 343
+#define PRIQ 344
+#define HFSC 345
+#define FAIRQ 346
+#define BANDWIDTH 347
+#define TBRSIZE 348
+#define LINKSHARE 349
+#define REALTIME 350
+#define UPPERLIMIT 351
+#define QUEUE 352
+#define PRIORITY 353
+#define QLIMIT 354
+#define HOGS 355
+#define BUCKETS 356
+#define RTABLE 357
+#define TARGET 358
+#define INTERVAL 359
+#define LOAD 360
+#define RULESET_OPTIMIZATION 361
+#define PRIO 362
+#define STICKYADDRESS 363
+#define MAXSRCSTATES 364
+#define MAXSRCNODES 365
+#define SOURCETRACK 366
+#define GLOBAL 367
+#define RULE 368
+#define MAXSRCCONN 369
+#define MAXSRCCONNRATE 370
+#define OVERLOAD 371
+#define FLUSH 372
+#define SLOPPY 373
+#define TAGGED 374
+#define TAG 375
+#define IFBOUND 376
+#define FLOATING 377
+#define STATEPOLICY 378
+#define STATEDEFAULTS 379
+#define ROUTE 380
+#define SETTOS 381
+#define DIVERTTO 382
+#define DIVERTREPLY 383
+#define STRING 384
+#define NUMBER 385
+#define PORTBINARY 386
#define YYERRCODE 256
typedef int YYINT;
static const YYINT pfctlylhs[] = { -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 127, 140, 140,
- 140, 140, 140, 140, 18, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 76, 76, 79, 79, 80, 80, 81, 81, 137,
- 78, 78, 146, 146, 146, 146, 148, 147, 147, 133,
- 133, 133, 133, 134, 26, 129, 149, 116, 116, 118,
- 118, 117, 117, 117, 117, 117, 117, 117, 117, 117,
- 17, 17, 17, 138, 91, 91, 92, 92, 93, 93,
- 151, 110, 110, 112, 112, 111, 111, 11, 11, 139,
- 152, 119, 119, 121, 121, 120, 120, 120, 120, 135,
- 136, 153, 113, 113, 115, 115, 114, 114, 114, 114,
- 114, 106, 106, 98, 98, 98, 98, 98, 98, 99,
- 99, 100, 101, 101, 102, 154, 105, 103, 103, 104,
- 104, 104, 104, 104, 104, 104, 95, 95, 95, 96,
- 96, 97, 132, 155, 107, 107, 109, 109, 108, 108,
- 108, 108, 108, 108, 108, 108, 108, 108, 108, 108,
- 108, 108, 108, 108, 108, 13, 13, 23, 23, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 43,
- 43, 44, 44, 15, 15, 15, 87, 87, 86, 86,
- 86, 86, 86, 88, 88, 89, 89, 90, 90, 90,
- 90, 1, 1, 1, 2, 2, 3, 4, 16, 16,
- 16, 34, 34, 34, 35, 35, 36, 37, 37, 45,
- 45, 60, 60, 60, 61, 62, 62, 47, 47, 48,
- 48, 46, 46, 46, 142, 142, 49, 49, 49, 50,
- 50, 54, 54, 51, 51, 51, 52, 52, 52, 52,
- 52, 52, 52, 52, 5, 5, 53, 63, 63, 64,
- 64, 65, 65, 65, 30, 32, 66, 66, 67, 67,
- 68, 68, 68, 8, 8, 69, 69, 70, 70, 71,
- 71, 71, 9, 9, 28, 27, 27, 27, 38, 38,
- 38, 38, 39, 39, 41, 41, 40, 40, 40, 42,
- 42, 42, 6, 6, 7, 7, 10, 10, 19, 19,
- 19, 22, 22, 82, 82, 82, 82, 20, 20, 20,
- 83, 83, 84, 84, 85, 85, 85, 85, 85, 85,
- 85, 85, 85, 85, 85, 85, 75, 94, 94, 94,
- 14, 14, 31, 56, 56, 55, 55, 74, 74, 74,
- 33, 33, 156, 122, 122, 124, 124, 123, 123, 123,
- 123, 123, 123, 73, 73, 73, 25, 25, 25, 25,
- 24, 24, 130, 131, 77, 77, 125, 125, 126, 126,
- 57, 57, 58, 58, 59, 59, 72, 72, 72, 72,
- 72, 141, 143, 143, 144, 145, 145, 150, 150, 12,
- 12, 21, 21, 21, 21, 21, 21,
+ 0, 0, 0, 0, 0, 0, 0, 137, 150, 150,
+ 150, 150, 150, 150, 18, 138, 138, 138, 138, 138,
+ 138, 138, 138, 138, 138, 138, 138, 138, 138, 138,
+ 138, 77, 77, 80, 80, 81, 81, 82, 82, 147,
+ 79, 79, 156, 156, 156, 156, 158, 157, 157, 143,
+ 143, 143, 143, 144, 26, 139, 159, 126, 126, 128,
+ 128, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+ 17, 17, 17, 148, 92, 92, 93, 93, 94, 94,
+ 161, 120, 120, 122, 122, 121, 121, 11, 11, 149,
+ 162, 129, 129, 131, 131, 130, 130, 130, 130, 145,
+ 146, 163, 123, 123, 125, 125, 124, 124, 124, 124,
+ 124, 113, 113, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 100, 100, 101, 102, 102, 103, 164,
+ 106, 104, 104, 105, 105, 105, 105, 105, 105, 105,
+ 165, 109, 107, 107, 108, 108, 108, 108, 108, 166,
+ 112, 110, 110, 111, 111, 111, 96, 96, 96, 97,
+ 97, 98, 142, 167, 114, 114, 116, 116, 115, 115,
+ 115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
+ 115, 115, 115, 115, 115, 115, 115, 117, 117, 119,
+ 119, 118, 30, 30, 13, 13, 23, 23, 29, 29,
+ 29, 29, 29, 29, 29, 29, 29, 29, 44, 44,
+ 45, 45, 15, 15, 15, 88, 88, 87, 87, 87,
+ 87, 87, 89, 89, 90, 90, 91, 91, 91, 91,
+ 1, 1, 1, 2, 2, 3, 4, 16, 16, 16,
+ 35, 35, 35, 36, 36, 37, 38, 38, 46, 46,
+ 61, 61, 61, 62, 63, 63, 48, 48, 49, 49,
+ 47, 47, 47, 152, 152, 50, 50, 50, 51, 51,
+ 55, 55, 52, 52, 52, 53, 53, 53, 53, 53,
+ 53, 53, 5, 5, 54, 64, 64, 65, 65, 66,
+ 66, 66, 31, 33, 67, 67, 68, 68, 69, 69,
+ 69, 8, 8, 70, 70, 71, 71, 72, 72, 72,
+ 9, 9, 28, 27, 27, 27, 39, 39, 39, 39,
+ 40, 40, 42, 42, 41, 41, 41, 43, 43, 43,
+ 6, 6, 7, 7, 10, 10, 19, 19, 19, 22,
+ 22, 83, 83, 83, 83, 20, 20, 20, 84, 84,
+ 85, 85, 86, 86, 86, 86, 86, 86, 86, 86,
+ 86, 86, 86, 76, 95, 95, 95, 14, 14, 32,
+ 57, 57, 56, 56, 75, 75, 75, 34, 34, 168,
+ 132, 132, 134, 134, 133, 133, 133, 133, 133, 133,
+ 74, 74, 74, 25, 25, 25, 25, 24, 24, 140,
+ 141, 78, 78, 135, 135, 136, 136, 58, 58, 59,
+ 59, 60, 60, 73, 73, 73, 73, 73, 151, 151,
+ 153, 153, 154, 155, 155, 160, 160, 12, 12, 21,
+ 21, 21, 21, 21, 21,
};
static const YYINT pfctlylen[] = { 2,
0, 3, 2, 3, 3, 3, 3, 3, 3, 3,
@@ -775,1220 +756,1298 @@ static const YYINT pfctlylen[] = { 2,
0, 2, 0, 2, 1, 1, 3, 4, 2, 5,
5, 0, 2, 0, 2, 1, 2, 2, 2, 1,
2, 1, 1, 1, 4, 1, 4, 1, 4, 1,
- 3, 1, 1, 3, 1, 0, 2, 1, 3, 2,
- 8, 2, 8, 2, 8, 1, 0, 1, 4, 2,
+ 4, 1, 4, 1, 3, 1, 1, 3, 1, 0,
+ 2, 1, 3, 2, 8, 2, 8, 2, 8, 1,
+ 0, 2, 1, 3, 2, 6, 2, 2, 1, 0,
+ 2, 1, 3, 2, 2, 1, 0, 1, 4, 2,
4, 1, 9, 0, 2, 0, 2, 1, 2, 2,
- 1, 1, 2, 1, 1, 1, 1, 1, 2, 3,
- 2, 2, 2, 4, 1, 1, 1, 1, 2, 0,
- 1, 1, 5, 1, 1, 4, 4, 6, 1, 1,
- 1, 1, 1, 0, 1, 1, 0, 1, 0, 1,
- 1, 2, 2, 1, 4, 1, 3, 1, 1, 1,
- 2, 0, 2, 5, 2, 4, 2, 1, 0, 1,
- 1, 0, 2, 5, 2, 4, 1, 1, 1, 1,
- 3, 0, 2, 5, 1, 2, 4, 0, 2, 0,
- 2, 1, 3, 2, 2, 0, 1, 1, 4, 2,
- 0, 2, 4, 2, 2, 2, 1, 3, 3, 3,
- 1, 3, 3, 2, 1, 1, 3, 1, 4, 2,
- 4, 1, 2, 3, 1, 1, 1, 4, 2, 4,
- 1, 2, 3, 1, 1, 1, 4, 2, 4, 1,
- 2, 3, 1, 1, 1, 4, 3, 2, 2, 5,
- 2, 5, 2, 4, 2, 4, 1, 3, 3, 1,
- 3, 3, 1, 1, 1, 1, 1, 1, 1, 2,
- 2, 1, 1, 2, 3, 3, 3, 0, 1, 2,
- 3, 0, 1, 3, 2, 1, 2, 2, 4, 5,
- 2, 1, 1, 1, 1, 2, 2, 2, 4, 6,
- 0, 1, 1, 1, 4, 2, 4, 0, 2, 4,
- 0, 1, 0, 2, 0, 2, 1, 1, 1, 2,
- 1, 1, 1, 0, 2, 4, 0, 1, 2, 1,
- 3, 3, 10, 13, 0, 2, 0, 3, 0, 2,
- 1, 4, 2, 4, 1, 4, 0, 1, 3, 3,
- 3, 2, 4, 2, 2, 4, 2, 1, 0, 1,
- 1, 1, 2, 2, 1, 2, 1,
+ 1, 1, 2, 2, 1, 1, 1, 1, 1, 2,
+ 3, 2, 2, 2, 4, 1, 1, 4, 2, 3,
+ 1, 1, 2, 6, 1, 1, 1, 2, 0, 1,
+ 1, 5, 1, 1, 4, 4, 6, 1, 1, 1,
+ 1, 1, 0, 1, 1, 0, 1, 0, 1, 1,
+ 2, 2, 1, 4, 1, 3, 1, 1, 1, 2,
+ 0, 2, 5, 2, 4, 2, 1, 0, 1, 1,
+ 0, 2, 5, 2, 4, 1, 1, 1, 1, 3,
+ 0, 2, 5, 1, 2, 4, 0, 2, 0, 2,
+ 1, 3, 2, 2, 0, 1, 1, 4, 2, 0,
+ 2, 4, 2, 2, 2, 1, 3, 3, 3, 1,
+ 3, 3, 1, 1, 3, 1, 4, 2, 4, 1,
+ 2, 3, 1, 1, 1, 4, 2, 4, 1, 2,
+ 3, 1, 1, 1, 4, 2, 4, 1, 2, 3,
+ 1, 1, 1, 4, 3, 2, 2, 5, 2, 5,
+ 2, 4, 2, 4, 1, 3, 3, 1, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 2, 2, 1,
+ 1, 2, 3, 3, 3, 0, 1, 2, 3, 0,
+ 1, 3, 2, 1, 2, 2, 4, 5, 2, 1,
+ 1, 1, 2, 2, 2, 4, 6, 0, 1, 1,
+ 1, 4, 2, 4, 0, 2, 4, 0, 1, 0,
+ 2, 0, 2, 1, 1, 1, 2, 1, 1, 1,
+ 0, 2, 4, 0, 1, 2, 1, 3, 3, 10,
+ 13, 0, 2, 0, 3, 0, 2, 1, 4, 2,
+ 4, 1, 4, 0, 1, 3, 3, 3, 2, 2,
+ 4, 2, 2, 4, 2, 1, 0, 1, 1, 1,
+ 2, 2, 1, 2, 1,
};
static const YYINT pfctlydefred[] = { 0,
- 0, 0, 0, 0, 178, 0, 352, 0, 0, 0,
+ 0, 0, 0, 0, 207, 0, 379, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 19, 0,
- 0, 0, 0, 0, 0, 17, 189, 0, 0, 0,
- 181, 179, 0, 51, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 17, 218, 0, 0, 0,
+ 210, 208, 0, 51, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 18, 0, 0, 0,
- 0, 0, 65, 0, 0, 0, 195, 196, 0, 0,
+ 0, 0, 65, 0, 0, 0, 224, 225, 0, 0,
0, 2, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 24, 16, 22, 21, 23, 20,
0, 0, 0, 0, 0, 44, 0, 0, 0, 26,
- 0, 0, 28, 0, 0, 30, 43, 42, 32, 35,
- 34, 410, 411, 36, 37, 39, 40, 266, 265, 33,
- 27, 25, 322, 323, 38, 0, 336, 0, 0, 0,
- 0, 0, 0, 344, 345, 0, 342, 343, 0, 333,
- 0, 203, 0, 0, 202, 0, 98, 213, 0, 0,
- 0, 0, 0, 49, 48, 50, 0, 0, 382, 380,
- 381, 0, 0, 220, 221, 0, 0, 0, 190, 191,
- 0, 192, 193, 0, 0, 198, 0, 0, 0, 0,
- 402, 0, 0, 405, 0, 335, 337, 341, 320, 321,
- 338, 0, 0, 346, 408, 0, 0, 208, 209, 210,
- 0, 206, 218, 0, 0, 89, 85, 0, 0, 217,
- 0, 0, 0, 0, 0, 0, 0, 0, 120, 116,
- 0, 0, 0, 46, 379, 0, 0, 0, 0, 0,
- 0, 186, 0, 187, 100, 0, 0, 0, 0, 0,
- 245, 0, 0, 0, 0, 0, 0, 334, 211, 205,
- 0, 0, 0, 84, 0, 0, 0, 152, 0, 110,
- 148, 0, 0, 136, 122, 123, 117, 121, 118, 119,
- 115, 111, 64, 0, 398, 0, 0, 0, 0, 228,
- 229, 0, 223, 227, 0, 230, 0, 0, 0, 183,
- 0, 0, 106, 0, 105, 0, 0, 0, 0, 0,
- 404, 29, 0, 407, 31, 0, 339, 0, 207, 0,
- 0, 90, 0, 0, 96, 95, 0, 214, 0, 215,
- 0, 132, 0, 130, 135, 0, 133, 0, 0, 0,
- 391, 0, 0, 395, 0, 0, 0, 0, 0, 247,
- 0, 0, 0, 239, 0, 248, 0, 0, 0, 0,
- 0, 188, 109, 0, 104, 0, 0, 61, 62, 63,
- 0, 0, 0, 340, 86, 0, 87, 347, 97, 94,
- 0, 0, 0, 125, 0, 127, 0, 129, 0, 0,
- 0, 146, 0, 138, 0, 0, 0, 399, 0, 401,
- 400, 0, 0, 0, 0, 412, 0, 0, 0, 0,
- 0, 244, 268, 276, 0, 255, 256, 0, 0, 0,
- 0, 0, 254, 0, 0, 386, 0, 0, 235, 0,
- 233, 0, 231, 0, 107, 0, 0, 0, 390, 403,
- 406, 330, 0, 216, 149, 0, 150, 131, 134, 0,
- 140, 0, 142, 0, 144, 0, 0, 0, 0, 0,
- 368, 369, 0, 371, 372, 373, 367, 0, 0, 224,
- 0, 225, 0, 413, 414, 416, 273, 0, 0, 264,
- 0, 0, 0, 0, 0, 0, 243, 0, 0, 0,
- 241, 66, 0, 252, 108, 0, 0, 0, 88, 0,
- 0, 0, 0, 139, 0, 0, 393, 396, 0, 392,
- 370, 362, 366, 153, 0, 0, 0, 274, 249, 258,
- 259, 260, 267, 263, 262, 388, 0, 0, 0, 0,
- 72, 0, 0, 0, 0, 78, 0, 0, 0, 76,
- 71, 0, 0, 57, 60, 0, 0, 0, 0, 0,
- 166, 0, 165, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 175, 0, 161, 162, 167, 164, 168, 158,
- 0, 151, 0, 0, 0, 250, 0, 0, 226, 269,
- 0, 270, 0, 354, 0, 383, 236, 234, 0, 73,
- 81, 83, 82, 74, 77, 79, 317, 318, 75, 0,
- 70, 253, 0, 298, 295, 0, 0, 313, 314, 0,
- 0, 299, 315, 316, 0, 0, 301, 0, 0, 324,
- 284, 285, 0, 0, 0, 159, 277, 293, 294, 0,
- 0, 0, 160, 286, 163, 0, 176, 177, 171, 348,
- 0, 172, 169, 0, 173, 275, 0, 157, 0, 0,
- 0, 0, 394, 0, 0, 0, 0, 80, 53, 297,
- 0, 0, 0, 0, 0, 0, 325, 326, 0, 0,
- 282, 0, 0, 291, 327, 0, 0, 170, 0, 0,
- 0, 0, 271, 0, 0, 360, 353, 237, 0, 296,
- 0, 0, 308, 309, 0, 0, 311, 312, 0, 0,
- 0, 283, 0, 0, 292, 349, 0, 174, 0, 0,
- 0, 0, 384, 356, 355, 0, 54, 58, 0, 0,
- 300, 0, 303, 302, 0, 305, 331, 278, 0, 279,
- 287, 0, 288, 0, 141, 143, 145, 0, 0, 55,
- 56, 0, 0, 0, 0, 350, 0, 357, 304, 306,
- 280, 289, 376,
+ 0, 0, 0, 28, 0, 0, 30, 43, 42, 32,
+ 35, 34, 438, 439, 36, 37, 39, 40, 294, 293,
+ 33, 27, 25, 350, 351, 38, 0, 364, 0, 0,
+ 0, 0, 0, 0, 372, 0, 370, 371, 0, 361,
+ 0, 232, 0, 0, 231, 0, 98, 242, 0, 0,
+ 0, 0, 0, 49, 48, 50, 0, 0, 409, 407,
+ 408, 0, 0, 249, 250, 0, 0, 0, 219, 220,
+ 0, 221, 222, 0, 0, 227, 0, 0, 0, 0,
+ 430, 429, 0, 0, 433, 0, 363, 365, 369, 348,
+ 349, 366, 0, 0, 373, 436, 0, 0, 237, 238,
+ 239, 0, 235, 247, 0, 0, 89, 85, 0, 0,
+ 246, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 120, 116, 0, 0, 0, 46, 406, 0, 0,
+ 0, 0, 0, 0, 215, 0, 216, 100, 0, 0,
+ 0, 0, 0, 274, 0, 0, 0, 0, 0, 0,
+ 362, 240, 234, 0, 0, 0, 84, 0, 0, 0,
+ 172, 0, 110, 168, 0, 160, 0, 140, 151, 122,
+ 123, 117, 121, 118, 119, 115, 111, 64, 0, 425,
+ 0, 0, 0, 0, 257, 258, 0, 252, 256, 0,
+ 259, 0, 0, 0, 212, 0, 0, 106, 0, 105,
+ 0, 0, 0, 0, 0, 432, 29, 0, 435, 31,
+ 0, 367, 0, 236, 0, 0, 90, 0, 0, 96,
+ 95, 0, 243, 0, 244, 0, 136, 0, 134, 0,
+ 0, 139, 0, 137, 0, 0, 0, 0, 0, 418,
+ 0, 0, 422, 0, 0, 0, 0, 0, 276, 0,
+ 0, 0, 268, 0, 277, 0, 0, 0, 0, 0,
+ 217, 109, 0, 104, 0, 0, 61, 62, 63, 0,
+ 0, 0, 368, 86, 0, 87, 374, 97, 94, 0,
+ 0, 0, 125, 0, 133, 0, 0, 166, 0, 162,
+ 127, 0, 129, 0, 0, 0, 150, 0, 142, 131,
+ 0, 0, 0, 159, 0, 153, 0, 0, 0, 426,
+ 0, 428, 427, 0, 0, 0, 0, 440, 0, 0,
+ 0, 0, 0, 273, 296, 304, 0, 284, 285, 0,
+ 0, 0, 0, 283, 0, 0, 413, 0, 0, 264,
+ 0, 262, 0, 260, 0, 107, 0, 0, 0, 417,
+ 431, 434, 358, 0, 245, 169, 0, 170, 135, 165,
+ 164, 0, 138, 0, 144, 0, 146, 0, 148, 0,
+ 0, 155, 157, 158, 0, 0, 0, 0, 0, 395,
+ 396, 0, 398, 399, 400, 394, 0, 0, 253, 0,
+ 254, 0, 441, 442, 444, 301, 0, 0, 0, 0,
+ 0, 0, 0, 0, 272, 0, 0, 0, 270, 66,
+ 0, 281, 108, 0, 0, 0, 88, 0, 163, 0,
+ 0, 0, 143, 0, 154, 0, 0, 420, 423, 0,
+ 419, 397, 389, 393, 173, 0, 0, 0, 302, 278,
+ 287, 288, 289, 295, 292, 291, 415, 0, 0, 0,
+ 0, 72, 0, 0, 0, 0, 78, 0, 0, 0,
+ 76, 71, 0, 0, 57, 60, 0, 0, 0, 0,
+ 0, 187, 0, 186, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 196, 0, 181, 182, 188,
+ 185, 189, 178, 0, 197, 171, 0, 0, 0, 0,
+ 279, 0, 0, 255, 297, 0, 298, 0, 381, 0,
+ 410, 265, 263, 0, 73, 81, 83, 82, 74, 77,
+ 79, 345, 346, 75, 0, 70, 282, 0, 326, 323,
+ 0, 0, 341, 342, 0, 0, 327, 343, 344, 0,
+ 0, 329, 0, 0, 352, 312, 313, 0, 0, 0,
+ 179, 305, 321, 322, 0, 0, 0, 180, 314, 184,
+ 0, 0, 202, 199, 0, 205, 206, 192, 375, 0,
+ 193, 183, 190, 0, 194, 303, 0, 177, 0, 0,
+ 0, 0, 0, 421, 0, 0, 0, 0, 80, 53,
+ 325, 0, 0, 0, 0, 0, 0, 353, 354, 0,
+ 0, 310, 0, 0, 319, 203, 0, 201, 0, 355,
+ 0, 0, 191, 0, 0, 0, 156, 0, 299, 0,
+ 0, 387, 380, 266, 0, 324, 0, 0, 336, 337,
+ 0, 0, 339, 340, 0, 0, 0, 311, 0, 0,
+ 320, 0, 198, 0, 376, 0, 195, 0, 0, 0,
+ 0, 411, 383, 382, 0, 54, 58, 0, 0, 328,
+ 0, 331, 330, 0, 333, 359, 306, 0, 307, 315,
+ 0, 316, 0, 200, 0, 145, 147, 149, 0, 0,
+ 55, 56, 0, 0, 0, 0, 0, 377, 0, 384,
+ 332, 334, 308, 317, 204, 403,
};
static const YYINT pfctlydgoto[] = { 2,
- 79, 276, 168, 226, 140, 621, 626, 634, 641, 609,
- 363, 134, 649, 21, 89, 186, 550, 141, 157, 384,
- 420, 158, 22, 23, 179, 24, 575, 617, 52, 655,
- 696, 421, 521, 249, 413, 303, 304, 576, 701, 622,
- 705, 627, 191, 194, 307, 364, 308, 443, 365, 516,
- 366, 433, 434, 447, 695, 595, 354, 469, 355, 370,
- 441, 540, 422, 526, 423, 636, 710, 637, 643, 713,
- 644, 299, 723, 538, 335, 129, 368, 55, 57, 176,
- 424, 578, 677, 159, 160, 75, 197, 76, 221, 222,
- 164, 330, 227, 579, 280, 392, 281, 239, 343, 344,
- 346, 347, 403, 404, 348, 287, 507, 580, 581, 274,
- 336, 337, 170, 240, 241, 502, 551, 552, 255, 315,
- 316, 408, 477, 478, 438, 378, 25, 26, 27, 28,
+ 79, 279, 168, 227, 141, 656, 661, 669, 676, 644,
+ 372, 135, 688, 21, 89, 186, 581, 142, 157, 393,
+ 442, 158, 22, 23, 179, 24, 608, 652, 52, 683,
+ 695, 742, 443, 552, 252, 435, 308, 309, 609, 747,
+ 657, 751, 662, 191, 194, 312, 373, 313, 464, 374,
+ 547, 375, 454, 455, 468, 741, 630, 363, 498, 364,
+ 379, 462, 571, 444, 557, 445, 671, 756, 672, 678,
+ 759, 679, 304, 772, 569, 340, 130, 377, 55, 57,
+ 176, 446, 611, 718, 159, 160, 75, 197, 76, 222,
+ 223, 164, 335, 228, 612, 283, 401, 284, 242, 348,
+ 349, 353, 354, 418, 419, 355, 425, 426, 357, 409,
+ 410, 350, 292, 535, 613, 614, 615, 684, 729, 277,
+ 341, 342, 170, 243, 244, 530, 582, 583, 258, 320,
+ 321, 430, 506, 507, 459, 387, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 3,
- 123, 203, 263, 126, 265, 699, 555, 613, 503, 216,
- 275, 256, 171, 349, 508, 409,
+ 124, 204, 266, 127, 268, 745, 586, 648, 531, 217,
+ 278, 259, 171, 356, 358, 351, 536, 431,
};
-static const YYINT pfctlysindex[] = { -41,
- 0, 240, 1290, 91, 0, 289, 0, 36, -160, -144,
- -144, -144, 1628, 235, -129, 26, -113, 29, 258, 0,
- 423, -155, 26, -155, 321, 368, 372, 376, 388, 406,
- 437, 460, 479, 516, 520, 526, 563, 568, 0, 579,
- -57, 605, 633, 636, 638, 0, 0, 529, 614, 617,
- 0, 0, 231, 0, -155, -144, 26, 26, 26, 297,
- -89, -84, -196, -45, -189, 300, 302, 26, -124, -144,
- 134, 1742, 648, 427, 385, 440, 0, 8, 0, 26,
- -144, 168, 0, -162, -162, -162, 0, 0, 235, 354,
- 235, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+static const YYINT pfctlysindex[] = { -28,
+ 0, 118, 1618, 94, 0, 604, 0, 57, -163, -158,
+ -158, -158, 1728, 392, -129, 213, -35, -49, 463, 0,
+ 680, -20, 213, -20, 519, 525, 538, 552, 569, 622,
+ 651, 664, 670, 678, 684, 691, 711, 714, 0, 716,
+ 424, 720, 726, 731, 749, 0, 0, 545, 650, 657,
+ 0, 0, 230, 0, -20, -158, 213, 213, 213, 386,
+ -93, -72, -226, -58, -230, 390, 403, 213, -107, -158,
+ 371, 1487, 754, 540, 486, 556, 0, 27, 0, 213,
+ -158, 428, 0, 326, 326, 326, 0, 0, 392, 579,
+ 392, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 429, 280, 291, 688, 488, 0, 354, 354, 354, 0,
- 379, 753, 0, 436, 753, 0, 0, 0, 0, 0,
+ 516, 475, 532, 768, 601, 0, 579, 579, 579, 0,
+ 492, 496, 883, 0, 518, 883, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 438, 0, 471, 477, 312,
- 486, 497, 710, 0, 0, 507, 0, 0, 764, 0,
- 417, 0, 14, 354, 0, 753, 0, 0, 464, 548,
- 1505, 0, 580, 0, 0, 0, 168, 427, 0, 0,
- 0, 26, 26, 0, 0, 632, 26, 510, 0, 0,
- 370, 0, 0, 866, 0, 0, 26, 632, 632, 632,
- 0, 753, 544, 0, 550, 0, 0, 0, 0, 0,
- 0, 883, 556, 0, 0, 1742, -144, 0, 0, 0,
- 381, 0, 0, 753, 464, 0, 0, 0, 906, 0,
- -81, 903, 905, 908, 316, 570, 578, 594, 0, 0,
- 1505, -81, -144, 0, 0, 354, 671, -64, -59, 354,
- 909, 0, 291, 0, 0, -94, 354, -59, -59, -59,
- 0, 753, 49, 753, 63, 595, 898, 0, 0, 0,
- 417, -24, 938, 0, -211, 73, 753, 0, 753, 0,
- 0, 604, 613, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 632, 0, 24, 24, 24, 354, 0,
- 0, 753, 0, 0, -9, 0, 623, 721, 632, 0,
- 952, 618, 0, 753, 0, -94, 632, 645, 645, 645,
- 0, 0, 544, 0, 0, 550, 0, 639, 0, 74,
- 753, 0, 629, 635, 0, 0, -211, 0, 906, 0,
- 642, 0, 474, 0, 0, 523, 0, 975, 272, 750,
- 0, 753, 644, 0, 0, 0, 0, 632, 333, 0,
- 78, 753, 1053, 0, 741, 0, 649, 906, -58, 760,
- -59, 0, 0, -10, 0, -59, 651, 0, 0, 0,
- 753, 753, 678, 0, 0, -24, 0, 0, 0, 0,
- 753, 86, 753, 0, 604, 0, 613, 0, 64, 80,
- 85, 0, 764, 0, 206, -4, 206, 0, 1280, 0,
- 0, -59, 128, 753, 753, 0, 970, 978, 982, 168,
- 664, 0, 0, 0, -7, 0, 0, 667, 195, 999,
- 682, 687, 0, 1021, 78, 0, 701, 645, 0, 753,
- 0, -9, 0, 0, 0, 753, 131, 0, 0, 0,
- 0, 0, 753, 0, 0, 642, 0, 0, 0, 316,
- 0, 316, 0, 316, 0, 272, 802, 753, 182, 1032,
- 0, 0, -144, 0, 0, 0, 0, 1280, 0, 0,
- 333, 0, 130, 0, 0, 0, 0, 168, 197, 0,
- 698, 699, 700, 1039, 1020, 704, 0, -144, 798, 706,
- 0, 0, 815, 0, 0, -7, 964, 4116, 0, 753,
- 764, 764, 764, 0, -7, 623, 0, 0, -4, 0,
- 0, 0, 0, 0, 753, 213, 753, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 877, 0, 753, 217,
- 0, 715, 311, 719, 711, 0, 720, 336, 735, 0,
- 0, 815, 753, 0, 0, 5, -53, 99, 823, 824,
- 0, 835, 0, 116, 124, 336, 838, 346, 27, 743,
- -144, 377, 0, 748, 0, 0, 0, 0, 0, 0,
- 4116, 0, 744, 745, 749, 0, 906, 753, 0, 0,
- 130, 0, 753, 0, 843, 0, 0, 0, 706, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, -144,
- 0, 0, 1124, 0, 0, 762, 1091, 0, 0, 753,
- 862, 0, 0, 0, 753, 864, 0, 1105, 1105, 0,
- 0, 0, 753, 767, 390, 0, 0, 0, 0, 753,
- 768, 411, 0, 0, 0, 1105, 0, 0, 0, 0,
- 772, 0, 0, 867, 0, 0, -144, 0, 764, 764,
- 764, 645, 0, 753, 206, 168, 753, 0, 0, 0,
- 762, 414, 432, 446, 450, 1742, 0, 0, 163, 390,
- 0, 198, 411, 0, 0, 559, 168, 0, 316, 316,
- 316, 865, 0, 753, 325, 0, 0, 0, 1257, 0,
- 463, 753, 0, 0, 468, 753, 0, 0, 573, 476,
- 753, 0, 491, 753, 0, 0, 774, 0, 1113, 1115,
- 1116, 206, 0, 0, 0, 206, 0, 0, 1148, 1150,
- 0, 414, 0, 0, 446, 0, 0, 0, 163, 0,
- 0, 198, 0, 1122, 0, 0, 0, 880, 753, 0,
- 0, 753, 753, 753, 753, 0, 168, 0, 0, 0,
- 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 529, 0, 555, 559,
+ 489, 567, 577, 841, 0, 587, 0, 0, 931, 0,
+ 347, 0, 19, 579, 0, 883, 0, 0, 611, 655,
+ 1926, 0, 744, 0, 0, 0, 428, 540, 0, 0,
+ 0, 213, 213, 0, 0, 747, 213, 632, 0, 0,
+ 599, 0, 0, 997, 0, 0, 213, 747, 747, 747,
+ 0, 0, 883, -156, 0, 669, 0, 0, 0, 0,
+ 0, 0, 994, 689, 0, 0, 1487, -158, 0, 0,
+ 0, 603, 0, 0, 883, 611, 0, 0, 0, 1006,
+ 0, -57, 1022, 1024, 1028, 1035, 1050, 535, 707, 713,
+ 739, 0, 0, 1926, -57, -158, 0, 0, 579, 382,
+ -47, -187, 579, 1067, 0, 532, 0, 0, -81, 579,
+ -187, -187, -187, 0, 883, 13, 883, 49, 770, 1034,
+ 0, 0, 0, 347, 34, 1082, 0, -101, 67, 883,
+ 0, 883, 0, 0, 745, 0, 750, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 747, 0,
+ 39, 39, 39, 579, 0, 0, 883, 0, 0, 192,
+ 0, 758, 894, 747, 0, 1119, 777, 0, 883, 0,
+ -81, 747, 810, 810, 810, 0, 0, -156, 0, 0,
+ 669, 0, 801, 0, 82, 883, 0, 790, 791, 0,
+ 0, -101, 0, 1006, 0, 797, 0, 679, 0, 1134,
+ 349, 0, 724, 0, 1141, 483, 1142, -274, 922, 0,
+ 883, 807, 0, 0, 0, 0, 747, 541, 0, 83,
+ 883, 246, 0, 908, 0, 813, 1006, -45, 926, -187,
+ 0, 0, 28, 0, -187, 815, 0, 0, 0, 883,
+ 883, 834, 0, 0, 34, 0, 0, 0, 0, 883,
+ 86, 883, 0, 745, 0, -107, -107, 0, 931, 0,
+ 0, 750, 0, 24, 187, 214, 0, 931, 0, 0,
+ 224, 535, -107, 0, 931, 0, 190, 56, 190, 0,
+ 765, 0, 0, -187, 97, 883, 883, 0, 1143, 1144,
+ 1146, 428, 822, 0, 0, 0, 32, 0, 0, 575,
+ 1167, 838, 839, 0, 1171, 83, 0, 852, 810, 0,
+ 883, 0, 192, 0, 0, 0, 883, 108, 0, 0,
+ 0, 0, 0, 883, 0, 0, 797, 0, 0, 0,
+ 0, 349, 0, 535, 0, 535, 0, 535, 0, 483,
+ 535, 0, 0, 0, -274, 959, 883, 136, 1188, 0,
+ 0, -158, 0, 0, 0, 0, 765, 0, 0, 541,
+ 0, 61, 0, 0, 0, 0, 428, 137, 846, 847,
+ 848, 1193, 1176, 855, 0, -158, 953, 858, 0, 0,
+ 1405, 0, 0, 32, 1122, 4400, 0, 883, 0, 931,
+ 931, 931, 0, -107, 0, 32, 758, 0, 0, 56,
+ 0, 0, 0, 0, 0, 883, 155, 883, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 417, 0, 883,
+ 200, 0, 863, 595, 866, 868, 0, 869, 550, 879,
+ 0, 0, 1405, 883, 0, 0, 235, 361, 376, 972,
+ 975, 0, 976, 0, 135, 283, 550, 37, 978, 562,
+ 69, 877, 880, -158, 570, 0, 890, 0, 0, 0,
+ 0, 0, 0, 4400, 0, 0, 888, 892, 893, 535,
+ 0, 1006, 883, 0, 0, 61, 0, 883, 0, 984,
+ 0, 0, 0, 858, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, -158, 0, 0, 1265, 0, 0,
+ 896, 1234, 0, 0, 883, 1007, 0, 0, 0, 883,
+ 1008, 0, 1249, 1249, 0, 0, 0, 883, 904, 630,
+ 0, 0, 0, 0, 883, 909, 642, 0, 0, 0,
+ -16, 934, 0, 0, 1249, 0, 0, 0, 0, 913,
+ 0, 0, 0, 1014, 0, 0, -158, 0, 931, 931,
+ 931, 1262, 810, 0, 883, 190, 428, 883, 0, 0,
+ 0, 896, 644, 659, 661, 665, 1487, 0, 0, 116,
+ 630, 0, 158, 642, 0, 0, 919, 0, 728, 0,
+ 734, 428, 0, 535, 535, 535, 0, 1017, 0, 883,
+ 363, 0, 0, 0, 654, 0, 385, 883, 0, 0,
+ 429, 883, 0, 0, 773, 452, 883, 0, 472, 883,
+ 0, 931, 0, 934, 0, 927, 0, 1269, 1271, 1272,
+ 190, 0, 0, 0, 190, 0, 0, 1296, 1304, 0,
+ 644, 0, 0, 661, 0, 0, 0, 116, 0, 0,
+ 158, 0, 930, 0, 1276, 0, 0, 0, 1037, 883,
+ 0, 0, 883, 883, 883, 883, 1277, 0, 428, 0,
+ 0, 0, 0, 0, 0, 0,
};
-static const YYINT pfctlyrindex[] = { 43,
- 0, 545, 352, 0, 0, 1520, 0, 0, 2567, 0,
- 0, 0, 0, 834, 0, 1412, 0, 0, 0, 0,
- 0, 2174, 3980, 1569, 0, 0, 0, 0, 0, 0,
+static const YYINT pfctlyrindex[] = { 36,
+ 0, 723, 738, 0, 0, 1644, 0, 0, 2743, 0,
+ 0, 0, 0, 986, 0, 1156, 0, 0, 0, 0,
+ 0, 2290, 2701, 4182, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1712, 1836, 1959,
+ 0, 0, 0, 0, 3033, 1283, 1115, 1115, 1115, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1312, 0, 0,
+ 0, 0, 1155, 1394, 0, 1521, 0, 939, 1813, 1027,
+ 0, 0, 0, 4235, 4235, 878, 0, 0, 2811, 4313,
+ 4202, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1637, 1757, 1874,
- 0, 0, 0, 0, 2798, 1183, 337, 337, 337, 0,
- 0, 0, 0, 0, 0, 0, 0, 1156, 0, 0,
- 0, 0, 1067, 1288, 0, 1400, 0, 801, 1428, 84,
- 0, 0, 0, 3867, 3867, 539, 0, 0, 2630, 4002,
- 1705, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 3144, 0, 629, 629, 629, 0,
+ 0, 0, -99, 0, 0, 942, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 2854, 0, 481, 481, 481, 0,
- 0, 803, 0, 0, 803, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 82,
- 0, 0, 0, 0, 0, 0, 0, 0, 960, 0,
- 0, 0, 0, 46, 0, -16, 0, 0, 0, 0,
- 0, 712, 0, 0, 0, 0, 1166, 3924, 0, 0,
- 0, 571, 2742, 0, 0, 4037, 3769, 0, 0, 0,
- 462, 0, 0, 0, 9, 0, 2975, 56, 56, 56,
- 0, 1591, 0, 0, 0, 0, 0, 0, 0, 0,
+ 289, 0, 0, 0, 0, 0, 0, 0, 793, 0,
+ 0, 0, 0, 15, 0, -23, 0, 0, 0, 0,
+ 0, 1055, 0, 0, 0, 0, 1318, 4267, 0, 0,
+ 0, 533, 2922, 0, 0, 4320, 3576, 0, 0, 0,
+ 687, 0, 0, 0, 2, 0, 3255, 219, 219, 219,
+ 0, 0, 1481, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 498, 0, 0, 34, 0, 0, 0, 47, 801, 0,
- 1170, 558, 569, 581, 0, 0, 0, 0, 0, 0,
- 1, 1170, 0, 0, 0, -54, 3093, 0, 2622, 2979,
- 0, 0, 0, 0, 0, 0, 3254, 23, 23, 23,
- 0, -34, 804, -34, 804, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, -12, -13, 0, 803, 0,
+ 0, 459, 0, 0, 78, 0, 0, 0, 30, 939,
+ 0, 1321, 258, 438, 795, 992, 1012, 0, 0, 0,
+ 0, 0, 0, 6, 1321, 0, 0, 0, -204, 3323,
+ 0, 505, 2139, 0, 0, 0, 0, 0, 0, 3459,
+ 73, 73, 73, 0, 574, -96, -27, 948, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, -13, -18,
+ 0, 942, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1066, 0,
+ 0, 0, 0, 3528, 0, 0, 697, 0, 0, 510,
+ 0, 154, 2167, 8, 0, 0, 0, 0, 1352, 0,
+ 684, 3645, 1331, 1331, 1331, 0, 0, 0, 0, 0,
+ 0, 0, 572, 0, 80, -17, 0, 0, 0, 0,
+ 0, 519, 0, 939, 0, 0, 0, 948, 0, 0,
+ 0, 0, 948, 0, 0, 0, 0, 0, 0, 0,
+ 78, 0, 0, 2499, 2499, 2499, 3697, 0, 0, 0,
+ 59, 0, 0, 2057, 0, 0, 748, 0, 2388, 2543,
+ 0, 0, 510, 0, 3795, 0, 0, 0, 0, 574,
+ -27, 690, 0, 0, 0, 0, 0, 0, 0, -18,
+ 948, -27, 0, 0, 0, 0, 0, 0, 287, 0,
+ 0, 0, 0, 0, 0, 0, 0, 285, 0, 0,
+ 0, 0, 0, 0, 514, 0, 0, 0, 0, 0,
+ 0, 0, 0, 3846, 687, 142, 175, 0, 0, 710,
+ 733, 0, 695, 0, 0, 0, 510, 0, 0, 414,
+ 0, 0, 0, 0, 547, 0, 0, 0, 195, 0,
+ 942, 0, 510, 0, 882, 0, 1184, 704, 3935, 0,
+ 0, 0, 0, -17, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 917, 0, 0, 0, 0, 3317, 0,
- 0, 485, 0, 0, 1089, 0, 391, 2058, 2062, 0,
- 0, 0, 0, 39, 0, 520, 3370, 1175, 1175, 1175,
- 0, 0, 0, 0, 0, 0, 0, 109, 0, 41,
- -22, 0, 0, 0, 0, 0, 321, 0, 801, 0,
- 0, 0, 804, 0, 0, 804, 0, 0, 0, 0,
- 0, 34, 0, 0, 2335, 2335, 2335, 3450, 0, 0,
- 0, 154, 0, 0, 1966, 0, 0, 943, 0, 2230,
- 2469, 0, 0, 1089, 0, 3530, 0, 0, 0, 0,
- -34, -34, 521, 0, 0, 0, 0, 0, 0, 0,
- -13, 804, -34, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 204, 0, 0, 0, 0, 0, 0, 0,
- 0, 3593, 462, 161, 218, 0, 0, 489, 496, 0,
- 673, 0, 0, 0, 1089, 0, 0, 0, 363, 0,
- 0, 0, 0, 483, 0, 0, 0, 650, 0, 803,
- 0, 1089, 0, 713, 0, 11, 1007, 3646, 0, 0,
- 0, 0, -22, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, -1, -22, 41, 0,
- 0, 0, 2447, 0, 0, 0, 0, 3149, 3869, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 1007, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 441, 0,
- 0, 0, 819, 0, 0, 1089, 1179, 819, 0, -34,
- 812, 812, 812, 0, 1089, -5, 0, 0, 0, 0,
- 0, 0, 0, 0, 161, 248, 16, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 534, -34, 804,
+ 0, 0, 0, 0, 0, 23, -17, 80, 0, 0,
+ 0, 2610, 0, 0, 0, 0, 3391, 4079, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 704, 0, 0,
+ 0, 0, 0, 0, 0, 0, 75, 0, 0, 0,
+ 973, 0, 0, 510, 1338, 973, 0, -27, 0, 964,
+ 964, 964, 0, 0, 0, 510, -1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 142, 240, 10, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 229, -27,
+ 948, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 11, 1184, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 3, 11, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 9, 0, 0, 0, 0, 0, 0,
+ 0, 43, -17, 0, 0, 0, 0, 209, 0, 653,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 4, 0, 0, 0, 0, 0, 2, -22, 0, 0,
- 0, 0, 421, 0, 697, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 485,
- 896, 0, 0, 0, 485, 936, 0, 3726, 3726, 0,
- 0, 0, 218, 793, 0, 0, 0, 0, 0, 218,
- 833, 0, 0, 0, 0, 3726, 0, 0, 0, 0,
- 0, 0, 0, 3806, 0, 0, 0, 0, 462, 462,
- 462, 17, 0, 16, 0, 0, -34, 0, 0, 0,
+ 0, 0, 0, 0, 697, 917, 0, 0, 0, 697,
+ 968, 0, 3981, 3981, 0, 0, 0, 175, 123, 0,
+ 0, 0, 0, 0, 175, 828, 0, 0, 0, 0,
+ 0, 0, 0, 0, 3981, 0, 0, 0, 0, 0,
+ 0, 0, 0, 4033, 0, 0, 0, 0, 687, 687,
+ 687, 0, 1, 0, 10, 0, 0, -27, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 804, 0, 0, 0, 0,
- 0, 1182, 0, 192, 616, 0, 0, 0, 0, 0,
- 462, 161, 0, 0, 462, 161, 0, 0, 1859, 248,
- 16, 0, 248, 16, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 999, 0,
+ 948, 0, 0, 0, 0, 0, 0, 1353, 0, 87,
+ 264, 0, 0, 0, 0, 0, 687, 142, 0, 0,
+ 687, 142, 0, 0, 1707, 240, 10, 0, 240, 10,
+ 0, 964, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1186, 192, 0,
- 0, 161, 161, 16, 16, 0, 0, 0, 0, 0,
- 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1357, 87,
+ 0, 0, 142, 142, 10, 10, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
};
static const YYINT pfctlygindex[] = { 0,
- 1552, 0, -166, 51, 0, 0, 0, -470, -439, 631,
- -70, 0, 0, 1200, 66, 991, 0, 0, 0, 0,
- 737, 1135, 0, 0, 796, 0, 0, -423, 0, 532,
- 451, -322, 0, 1328, 0, -352, 0, 0, 0, -622,
- 0, -613, 0, 954, -174, 771, 0, 0, 7, 0,
- 0, 279, 0, 795, 0, 0, -348, 0, 599, 0,
- -431, 0, 786, 0, -443, 0, 0, -611, 0, 0,
- -607, 0, 0, 0, -375, 0, 707, 0, -8, 1048,
- -80, 0, -425, 553, 1010, 215, 0, 95, 0, 959,
- 0, 0, -242, 0, 990, 0, -295, 0, 0, 847,
- 0, 836, 0, 777, 0, 1038, 766, 666, 0, 0,
- 913, 0, 1080, 1012, 0, 0, 702, 0, 0, 941,
- 0, -205, 780, 0, 672, -282, 0, 0, 0, 1258,
- 1260, -3, -2, 0, 0, 0, 0, 0, 0, 0,
- -168, -119, 0, -180, 0, 0, 0, 0, 0, -176,
- 0, 0, 0, 0, 0, 0,
+ 1557, 0, -208, -102, -320, 0, 0, -476, -619, 771,
+ -70, 0, 0, 1359, 490, 1165, 0, 0, 0, 0,
+ 328, 1307, 0, 0, 874, 0, 0, -612, 0, 0,
+ 647, 578, -308, 0, 1650, 0, -331, 0, 0, 0,
+ -668, 0, -525, 0, 1128, 851, 923, 0, 0, -345,
+ 0, 0, -315, 0, 946, 0, 0, -379, 0, 677,
+ 0, -497, 0, 938, 0, -457, 0, 0, -554, 0,
+ 0, -444, 0, 0, 0, -421, 0, 860, 0, -8,
+ 1218, -80, 0, -182, 688, 1196, 537, 0, 130, 0,
+ 1136, 0, 0, -223, 0, 1170, 0, -312, 0, 0,
+ 1015, 0, 1004, 0, 936, 0, 0, 937, 0, 0,
+ 958, 0, 1472, 933, 821, 0, 0, -591, 0, 0,
+ 1100, 0, 1274, 1205, 0, 0, 872, 0, 0, 1135,
+ 0, -337, 955, 0, 842, -319, 0, 0, 0, 1462,
+ 1464, -3, -2, 0, 0, 0, 0, 0, 0, 0,
+ -169, -119, 0, -192, 0, 0, 0, 0, 0, -178,
+ 0, 0, 0, 0, 0, 0, 0, 0,
};
-#define YYTABLESIZE 4493
+#define YYTABLESIZE 4783
static const YYINT pfctlytable[] = { 44,
- 45, 177, 58, 59, 385, 205, 414, 169, 251, 246,
- 113, 387, 68, 155, 253, 225, 246, 246, 103, 246,
- 409, 246, 167, 167, 264, 167, 389, 385, 314, 331,
- 246, 251, 238, 122, 262, 353, 379, 380, 125, 527,
- 167, 279, 1, 246, 271, 393, 229, 116, 246, 702,
- 246, 616, 1, 225, 246, 219, 93, 468, 302, 246,
- 706, 142, 277, 353, 440, 222, 651, 711, 539, 620,
- 246, 246, 173, 246, 714, 246, 246, 246, 246, 127,
- 409, 1, 261, 318, 319, 320, 323, 333, 326, 91,
- 246, 319, 215, 212, 178, 53, 177, 487, 246, 339,
- 46, 73, 246, 460, 272, 87, 215, 88, 132, 752,
- 417, 246, 362, 362, 445, 362, 215, 215, 328, 462,
- 115, 753, 319, 113, 464, 319, 155, 754, 525, 215,
- 166, 101, 577, 246, 755, 246, 224, 418, 416, 419,
- 246, 334, 321, 453, 324, 382, 352, 664, 417, 328,
- 410, 411, 328, 386, 381, 499, 417, 340, 169, 341,
- 510, 246, 417, 246, 681, 528, 395, 667, 162, 397,
- 588, 215, 391, 322, 215, 418, 416, 419, 180, 180,
- 180, 128, 359, 418, 416, 419, 246, 325, 133, 418,
- 416, 419, 670, 246, 374, 417, 444, 338, 385, 312,
- 415, 448, 684, 678, 246, 577, 212, 305, 269, 712,
- 455, 387, 219, 246, 130, 456, 306, 54, 219, 230,
- 685, 625, 418, 416, 419, 215, 466, 84, 85, 86,
- 417, 246, 406, 56, 293, 246, 481, 479, 633, 491,
- 215, 492, 425, 715, 137, 431, 640, 700, 77, 20,
- 246, 246, 480, 138, 139, 505, 215, 418, 416, 419,
- 215, 450, 451, 131, 80, 432, 360, 360, 169, 360,
- 506, 454, 245, 457, 361, 273, 246, 246, 246, 246,
- 409, 614, 385, 313, 238, 246, 251, 246, 121, 387,
- 238, 78, 519, 124, 482, 483, 278, 437, 1, 1,
- 1, 1, 101, 183, 389, 187, 518, 409, 409, 409,
- 246, 246, 506, 300, 301, 246, 246, 222, 82, 439,
- 500, 529, 222, 222, 618, 619, 504, 1, 1, 1,
- 92, 222, 596, 509, 583, 584, 585, 590, 246, 246,
- 1, 598, 81, 246, 219, 91, 212, 385, 517, 591,
- 113, 251, 1, 223, 387, 246, 1, 1, 1, 1,
- 1, 246, 385, 599, 246, 409, 251, 251, 215, 99,
- 99, 99, 257, 351, 1, 238, 1, 93, 113, 692,
- 446, 94, 615, 1, 246, 95, 101, 319, 246, 246,
- 582, 223, 1, 246, 246, 257, 1, 96, 219, 91,
- 385, 351, 257, 257, 650, 589, 257, 592, 222, 319,
- 252, 246, 246, 215, 328, 97, 246, 246, 409, 597,
- 1, 270, 257, 385, 215, 212, 212, 212, 212, 212,
- 246, 446, 549, 612, 212, 212, 328, 574, 319, 319,
- 319, 285, 286, 319, 319, 319, 98, 319, 319, 725,
- 358, 319, 319, 246, 246, 174, 175, 285, 286, 319,
- 246, 212, 285, 286, 522, 328, 328, 328, 663, 99,
- 328, 328, 328, 665, 328, 328, 623, 624, 328, 328,
- 246, 549, 689, 690, 691, 257, 328, 257, 100, 536,
- 219, 656, 261, 631, 632, 4, 5, 6, 73, 74,
- 672, 638, 639, 143, 144, 674, 215, 174, 175, 717,
- 574, 215, 553, 679, 394, 261, 437, 215, 726, 215,
- 682, 586, 261, 261, 732, 101, 261, 246, 735, 102,
- 329, 246, 246, 739, 215, 103, 742, 7, 246, 246,
- 631, 632, 261, 365, 693, 174, 175, 698, 47, 8,
- 409, 409, 409, 9, 10, 11, 12, 13, 48, 49,
- 50, 329, 653, 396, 329, 246, 215, 124, 111, 246,
- 246, 14, 104, 15, 724, 638, 639, 105, 126, 428,
- 16, 409, 733, 429, 430, 697, 736, 731, 106, 17,
- 128, 740, 734, 18, 743, 246, 246, 51, 212, 716,
- 738, 668, 215, 212, 212, 261, 656, 261, 114, 212,
- 212, 212, 212, 737, 107, 741, 215, 19, 399, 400,
- 401, 601, 602, 603, 257, 409, 409, 184, 185, 758,
- 257, 257, 759, 760, 761, 762, 351, 351, 351, 257,
- 257, 257, 108, 257, 257, 109, 257, 110, 688, 402,
- 257, 257, 257, 112, 257, 409, 113, 189, 190, 389,
- 257, 257, 257, 257, 257, 257, 257, 257, 192, 193,
- 257, 209, 210, 257, 120, 409, 697, 135, 385, 136,
- 124, 83, 272, 467, 217, 470, 257, 161, 257, 212,
- 73, 126, 218, 285, 286, 729, 730, 257, 257, 257,
- 257, 257, 257, 128, 165, 272, 359, 84, 85, 86,
- 300, 301, 257, 607, 608, 257, 272, 163, 257, 219,
- 220, 114, 69, 647, 648, 385, 385, 385, 385, 385,
- 257, 257, 272, 272, 272, 188, 257, 257, 257, 257,
- 257, 257, 219, 385, 261, 67, 385, 219, 219, 195,
- 261, 261, 196, 219, 654, 175, 219, 201, 385, 261,
- 261, 261, 202, 261, 261, 409, 261, 631, 632, 213,
- 261, 261, 261, 409, 261, 358, 358, 358, 358, 358,
- 261, 261, 261, 261, 261, 261, 261, 261, 638, 639,
- 261, 618, 619, 261, 246, 272, 358, 272, 246, 246,
- 409, 409, 281, 351, 377, 377, 261, 215, 261, 703,
- 704, 377, 377, 377, 204, 594, 206, 261, 261, 261,
- 261, 261, 261, 623, 624, 281, 329, 707, 708, 351,
- 351, 351, 261, 219, 114, 261, 281, 212, 261, 409,
- 409, 223, 290, 212, 212, 212, 243, 167, 329, 207,
- 261, 261, 281, 281, 281, 208, 261, 261, 261, 261,
- 261, 261, 246, 246, 211, 290, 415, 415, 363, 363,
- 363, 363, 363, 417, 417, 212, 290, 329, 329, 329,
- 181, 182, 329, 329, 329, 214, 329, 329, 251, 363,
- 329, 329, 290, 290, 290, 356, 357, 231, 329, 124,
- 124, 124, 124, 124, 248, 307, 254, 124, 124, 124,
- 126, 126, 126, 126, 126, 281, 431, 281, 126, 126,
- 126, 121, 128, 128, 128, 128, 128, 124, 307, 266,
- 128, 128, 128, 267, 272, 124, 432, 389, 167, 307,
- 272, 272, 282, 694, 283, 310, 126, 284, 288, 310,
- 272, 272, 387, 272, 272, 290, 289, 290, 128, 328,
- 272, 272, 272, 295, 272, 296, 297, 298, 310, 41,
- 272, 272, 290, 327, 272, 272, 272, 272, 332, 310,
- 272, 342, 369, 272, 389, 389, 389, 389, 389, 409,
- 345, 367, 372, 409, 409, 373, 272, 377, 272, 593,
- 748, 67, 67, 383, 749, 389, 388, 272, 272, 272,
- 272, 272, 272, 389, 67, 398, 405, 67, 307, 278,
- 307, 407, 272, 67, 435, 272, 436, 442, 272, 449,
- 484, 359, 359, 359, 359, 359, 67, 452, 485, 409,
- 272, 272, 486, 488, 490, 493, 409, 272, 272, 272,
- 272, 272, 359, 112, 112, 112, 112, 112, 310, 494,
- 310, 281, 112, 112, 495, 67, 409, 496, 498, 515,
- 281, 281, 520, 281, 281, 530, 204, 531, 532, 533,
- 67, 534, 535, 439, 281, 537, 554, 67, 605, 114,
- 281, 281, 431, 600, 281, 281, 281, 604, 606, 204,
- 281, 290, 610, 541, 542, 628, 629, 198, 199, 200,
- 290, 290, 432, 290, 290, 657, 543, 630, 281, 544,
- 646, 652, 659, 660, 290, 545, 666, 661, 99, 409,
- 290, 290, 281, 669, 290, 290, 290, 671, 546, 615,
- 290, 673, 281, 675, 676, 281, 680, 683, 99, 686,
- 687, 744, 722, 745, 228, 746, 747, 750, 290, 751,
- 281, 281, 756, 757, 307, 212, 199, 547, 281, 281,
- 281, 281, 290, 307, 307, 47, 307, 307, 99, 147,
- 246, 409, 290, 222, 389, 290, 99, 307, 59, 548,
- 409, 374, 45, 307, 307, 375, 645, 307, 307, 307,
- 290, 290, 41, 307, 310, 145, 311, 763, 290, 290,
- 290, 290, 501, 310, 310, 45, 310, 310, 718, 489,
- 497, 307, 587, 45, 244, 268, 45, 310, 709, 329,
- 387, 292, 459, 310, 310, 307, 294, 310, 310, 310,
- 309, 458, 514, 310, 524, 307, 658, 317, 307, 390,
- 428, 242, 291, 611, 429, 430, 375, 523, 662, 0,
- 42, 310, 43, 307, 307, 409, 727, 0, 0, 0,
- 0, 307, 307, 307, 307, 310, 0, 387, 387, 387,
- 387, 387, 0, 409, 0, 310, 0, 409, 310, 358,
- 0, 0, 0, 0, 0, 387, 0, 201, 387, 39,
- 635, 642, 0, 310, 310, 45, 409, 409, 0, 0,
- 99, 310, 310, 310, 310, 0, 409, 409, 409, 0,
- 201, 409, 409, 409, 0, 409, 409, 0, 204, 409,
- 409, 204, 204, 204, 204, 204, 0, 409, 0, 204,
- 204, 204, 204, 0, 204, 204, 0, 204, 204, 0,
- 0, 0, 426, 427, 204, 204, 204, 0, 204, 204,
- 0, 204, 204, 204, 204, 204, 0, 0, 204, 204,
- 204, 204, 0, 0, 204, 0, 0, 204, 0, 0,
- 409, 728, 0, 0, 409, 409, 0, 0, 99, 99,
- 204, 0, 204, 0, 0, 0, 0, 0, 0, 204,
- 0, 204, 204, 204, 204, 204, 204, 0, 0, 200,
- 0, 0, 0, 0, 40, 635, 204, 0, 642, 204,
- 0, 0, 204, 0, 0, 0, 428, 0, 0, 0,
- 429, 430, 200, 0, 204, 204, 461, 463, 465, 0,
- 0, 204, 204, 204, 45, 0, 0, 0, 45, 45,
- 45, 45, 0, 0, 0, 45, 45, 45, 45, 0,
- 45, 45, 99, 45, 45, 0, 99, 99, 0, 0,
- 45, 45, 45, 0, 45, 635, 0, 0, 642, 0,
+ 45, 177, 58, 59, 388, 389, 206, 169, 412, 275,
+ 416, 103, 256, 267, 275, 113, 275, 251, 175, 437,
+ 68, 280, 275, 727, 248, 275, 275, 432, 433, 123,
+ 570, 412, 280, 402, 265, 1, 436, 467, 711, 93,
+ 251, 319, 275, 274, 748, 1, 230, 116, 497, 128,
+ 126, 336, 414, 275, 558, 280, 216, 725, 226, 167,
+ 167, 143, 248, 484, 167, 282, 231, 133, 248, 275,
+ 275, 275, 173, 226, 421, 307, 682, 461, 362, 310,
+ 422, 423, 267, 264, 385, 480, 481, 328, 311, 331,
+ 728, 275, 216, 439, 1, 362, 177, 275, 275, 746,
+ 344, 467, 494, 46, 761, 275, 275, 275, 690, 424,
+ 216, 496, 803, 499, 610, 439, 53, 275, 275, 437,
+ 440, 438, 441, 276, 101, 216, 275, 20, 113, 216,
+ 275, 175, 309, 516, 275, 400, 708, 327, 391, 527,
+ 216, 225, 440, 438, 441, 326, 275, 329, 439, 166,
+ 371, 216, 466, 134, 371, 309, 395, 129, 390, 169,
+ 345, 361, 346, 412, 538, 757, 309, 439, 705, 404,
+ 623, 474, 794, 330, 412, 440, 438, 441, 556, 216,
+ 216, 275, 309, 309, 309, 275, 412, 368, 584, 752,
+ 439, 343, 610, 722, 440, 438, 441, 338, 216, 383,
+ 621, 131, 121, 162, 416, 437, 394, 275, 559, 272,
+ 476, 275, 317, 180, 180, 180, 396, 440, 438, 441,
+ 54, 509, 477, 620, 167, 56, 486, 122, 251, 452,
+ 482, 631, 533, 805, 275, 275, 275, 298, 392, 490,
+ 87, 428, 88, 216, 758, 309, 495, 309, 275, 453,
+ 132, 447, 629, 488, 77, 339, 510, 668, 804, 275,
+ 549, 560, 437, 491, 81, 121, 275, 124, 275, 251,
+ 471, 472, 437, 169, 251, 251, 139, 140, 760, 625,
+ 475, 651, 478, 251, 275, 452, 412, 437, 416, 534,
+ 122, 1, 1, 1, 1, 101, 251, 251, 347, 437,
+ 437, 437, 318, 437, 369, 453, 458, 248, 369, 251,
+ 280, 125, 251, 248, 371, 439, 511, 512, 251, 550,
+ 1, 1, 1, 437, 633, 141, 281, 161, 91, 347,
+ 414, 251, 347, 1, 267, 275, 305, 306, 460, 534,
+ 267, 528, 440, 438, 441, 1, 806, 532, 80, 1,
+ 1, 1, 1, 1, 537, 412, 275, 113, 275, 275,
+ 275, 617, 618, 619, 251, 275, 275, 1, 726, 1,
+ 437, 248, 412, 4, 5, 6, 1, 548, 626, 280,
+ 124, 251, 99, 738, 99, 101, 91, 1, 251, 113,
+ 740, 309, 634, 275, 275, 1, 280, 280, 681, 414,
+ 309, 309, 224, 309, 309, 675, 216, 290, 291, 385,
+ 385, 385, 385, 385, 309, 7, 99, 224, 616, 1,
+ 309, 309, 360, 286, 309, 309, 309, 8, 216, 267,
+ 309, 9, 10, 11, 12, 13, 624, 385, 627, 360,
+ 309, 412, 275, 275, 174, 175, 286, 132, 309, 14,
+ 632, 15, 689, 286, 286, 799, 452, 286, 16, 800,
+ 580, 275, 309, 437, 647, 607, 174, 175, 369, 17,
+ 275, 275, 216, 286, 309, 370, 453, 18, 78, 309,
+ 251, 719, 416, 655, 309, 251, 251, 774, 412, 412,
+ 412, 412, 412, 553, 251, 216, 309, 309, 660, 666,
+ 667, 19, 730, 704, 309, 309, 309, 309, 706, 780,
+ 412, 649, 580, 91, 267, 216, 412, 567, 666, 667,
+ 734, 735, 736, 82, 696, 275, 275, 412, 92, 416,
+ 416, 416, 416, 416, 93, 713, 286, 267, 286, 628,
+ 715, 673, 674, 607, 115, 448, 449, 94, 720, 99,
+ 764, 458, 766, 783, 152, 723, 290, 416, 275, 275,
+ 132, 95, 775, 390, 390, 390, 390, 390, 781, 99,
+ 290, 291, 784, 450, 451, 251, 787, 788, 96, 290,
+ 791, 356, 178, 793, 111, 739, 290, 290, 744, 73,
+ 290, 390, 275, 275, 347, 693, 790, 290, 291, 124,
+ 124, 124, 124, 124, 124, 124, 290, 290, 291, 124,
+ 124, 124, 356, 114, 218, 356, 347, 275, 650, 519,
+ 773, 520, 219, 437, 437, 183, 743, 187, 782, 450,
+ 451, 97, 785, 437, 437, 437, 709, 789, 248, 255,
+ 792, 124, 216, 273, 437, 437, 216, 437, 437, 220,
+ 221, 696, 347, 347, 347, 73, 74, 347, 347, 347,
+ 98, 347, 386, 776, 347, 347, 673, 674, 437, 290,
+ 437, 290, 347, 99, 300, 286, 301, 302, 303, 100,
+ 810, 286, 286, 811, 812, 813, 814, 101, 733, 112,
+ 286, 286, 286, 102, 286, 286, 113, 286, 275, 357,
+ 103, 286, 286, 286, 300, 286, 406, 407, 84, 85,
+ 86, 286, 286, 286, 286, 286, 286, 286, 286, 403,
+ 104, 286, 216, 105, 286, 106, 437, 300, 743, 107,
+ 357, 286, 408, 357, 437, 108, 437, 286, 300, 286,
+ 109, 778, 779, 437, 653, 654, 144, 145, 286, 286,
+ 286, 286, 286, 286, 300, 300, 300, 414, 110, 658,
+ 659, 437, 437, 437, 411, 286, 267, 216, 763, 120,
+ 286, 216, 267, 136, 765, 286, 286, 216, 777, 132,
+ 132, 132, 132, 132, 132, 132, 137, 286, 286, 132,
+ 132, 132, 267, 161, 286, 286, 286, 286, 286, 241,
+ 450, 451, 41, 73, 126, 241, 241, 241, 290, 99,
+ 99, 174, 175, 786, 290, 290, 216, 300, 163, 300,
+ 165, 132, 188, 290, 290, 290, 437, 290, 290, 195,
+ 290, 414, 415, 416, 290, 290, 290, 318, 290, 267,
+ 267, 267, 267, 267, 290, 290, 290, 290, 290, 290,
+ 290, 290, 184, 185, 290, 210, 211, 290, 189, 190,
+ 318, 267, 437, 47, 290, 196, 417, 267, 437, 437,
+ 290, 318, 290, 48, 49, 50, 201, 356, 267, 267,
+ 202, 290, 290, 290, 290, 290, 290, 318, 318, 318,
+ 248, 69, 203, 99, 99, 248, 248, 437, 290, 356,
+ 214, 248, 205, 290, 248, 636, 637, 638, 290, 290,
+ 5, 6, 51, 207, 67, 192, 193, 126, 290, 291,
+ 290, 290, 670, 677, 305, 306, 335, 290, 290, 290,
+ 290, 290, 275, 642, 643, 356, 356, 356, 83, 208,
+ 356, 356, 356, 209, 356, 686, 687, 356, 356, 335,
+ 318, 212, 318, 694, 175, 356, 300, 275, 181, 182,
+ 335, 213, 300, 300, 84, 85, 86, 9, 10, 11,
+ 12, 215, 300, 300, 216, 300, 300, 338, 365, 366,
+ 437, 378, 300, 300, 300, 248, 300, 386, 386, 386,
+ 386, 386, 300, 300, 224, 357, 300, 300, 300, 300,
+ 338, 128, 300, 437, 437, 300, 232, 378, 378, 378,
+ 246, 338, 300, 666, 667, 386, 254, 357, 300, 251,
+ 300, 130, 378, 378, 378, 673, 674, 653, 654, 300,
+ 300, 300, 300, 300, 300, 414, 241, 257, 167, 335,
+ 269, 335, 749, 750, 658, 659, 300, 670, 753, 754,
+ 677, 300, 125, 357, 357, 357, 300, 300, 357, 357,
+ 357, 285, 357, 286, 114, 357, 357, 287, 300, 300,
+ 437, 437, 270, 357, 288, 300, 300, 300, 300, 300,
+ 275, 275, 414, 414, 414, 414, 414, 437, 437, 289,
+ 338, 293, 338, 443, 443, 333, 318, 294, 437, 500,
+ 501, 502, 503, 504, 414, 318, 318, 315, 318, 318,
+ 414, 323, 324, 325, 128, 670, 445, 445, 677, 318,
+ 437, 99, 337, 295, 241, 318, 318, 505, 347, 318,
+ 318, 318, 376, 352, 130, 318, 126, 126, 126, 126,
+ 126, 126, 126, 404, 404, 318, 126, 126, 126, 241,
+ 404, 404, 404, 318, 332, 378, 437, 437, 437, 381,
+ 382, 437, 437, 437, 233, 437, 386, 318, 437, 437,
+ 67, 67, 392, 397, 405, 398, 437, 114, 126, 318,
+ 281, 413, 420, 67, 318, 335, 67, 233, 427, 318,
+ 429, 456, 67, 463, 335, 335, 457, 335, 335, 470,
+ 473, 318, 318, 513, 514, 67, 515, 517, 335, 318,
+ 318, 318, 318, 521, 335, 335, 275, 524, 335, 335,
+ 335, 522, 523, 275, 335, 526, 546, 275, 551, 561,
+ 465, 562, 563, 564, 335, 469, 338, 565, 67, 566,
+ 568, 460, 335, 275, 585, 338, 338, 635, 338, 338,
+ 639, 640, 645, 641, 663, 67, 335, 664, 665, 338,
+ 685, 691, 67, 697, 692, 338, 338, 707, 335, 338,
+ 338, 338, 699, 335, 710, 338, 700, 701, 335, 650,
+ 712, 198, 199, 200, 508, 338, 714, 716, 717, 721,
+ 335, 335, 45, 338, 724, 681, 731, 732, 335, 335,
+ 335, 335, 737, 762, 771, 801, 275, 338, 275, 796,
+ 795, 797, 798, 802, 807, 45, 808, 815, 228, 338,
+ 809, 241, 99, 45, 338, 275, 45, 47, 229, 338,
+ 167, 437, 251, 128, 128, 128, 128, 128, 128, 128,
+ 416, 338, 338, 128, 128, 128, 99, 59, 437, 338,
+ 338, 338, 338, 130, 130, 130, 130, 130, 130, 130,
+ 437, 41, 401, 130, 130, 130, 402, 680, 241, 241,
+ 241, 241, 241, 241, 241, 128, 241, 146, 767, 241,
+ 241, 241, 241, 316, 275, 529, 816, 241, 241, 241,
+ 241, 275, 518, 525, 247, 130, 112, 112, 112, 112,
+ 112, 112, 112, 230, 755, 45, 622, 112, 112, 334,
+ 241, 275, 271, 299, 297, 483, 233, 314, 479, 233,
+ 233, 233, 233, 233, 322, 543, 230, 233, 233, 233,
+ 233, 545, 233, 233, 698, 233, 233, 167, 114, 539,
+ 555, 399, 233, 233, 233, 245, 233, 233, 296, 233,
+ 233, 233, 233, 233, 646, 384, 233, 233, 233, 233,
+ 275, 554, 233, 703, 42, 233, 43, 0, 367, 0,
+ 0, 241, 233, 0, 275, 0, 275, 0, 233, 0,
+ 233, 0, 0, 275, 275, 0, 0, 233, 0, 233,
+ 233, 233, 233, 233, 233, 0, 0, 241, 241, 241,
+ 241, 241, 241, 241, 0, 0, 233, 241, 241, 241,
+ 0, 233, 0, 275, 0, 0, 233, 233, 0, 0,
+ 275, 0, 0, 0, 275, 0, 0, 0, 233, 233,
+ 229, 0, 0, 0, 0, 233, 233, 233, 0, 0,
+ 275, 275, 275, 0, 45, 0, 0, 0, 45, 45,
+ 45, 45, 0, 229, 0, 45, 45, 45, 45, 0,
+ 45, 45, 0, 45, 45, 0, 0, 275, 275, 0,
+ 45, 45, 45, 0, 45, 0, 0, 0, 0, 90,
45, 45, 0, 0, 45, 45, 45, 45, 0, 0,
- 45, 0, 0, 45, 0, 0, 0, 511, 0, 512,
- 0, 513, 0, 0, 0, 0, 45, 0, 45, 0,
- 0, 0, 0, 5, 6, 0, 0, 45, 45, 45,
- 45, 45, 45, 0, 0, 258, 259, 260, 0, 180,
- 0, 0, 45, 0, 0, 45, 0, 0, 45, 0,
- 0, 0, 0, 0, 0, 38, 5, 6, 0, 201,
- 45, 45, 180, 201, 201, 201, 201, 45, 45, 45,
- 201, 201, 201, 201, 0, 201, 201, 0, 201, 201,
- 9, 10, 11, 12, 90, 0, 201, 201, 194, 201,
- 201, 0, 201, 201, 201, 201, 201, 7, 0, 201,
- 201, 201, 201, 0, 0, 201, 0, 0, 201, 0,
- 0, 194, 0, 9, 10, 11, 12, 0, 117, 118,
- 119, 201, 0, 201, 471, 472, 473, 474, 475, 137,
- 201, 350, 0, 246, 0, 0, 0, 201, 0, 0,
- 246, 172, 0, 0, 246, 476, 371, 201, 0, 0,
- 201, 0, 0, 0, 376, 0, 182, 0, 0, 0,
- 246, 246, 246, 0, 0, 201, 201, 0, 0, 0,
- 0, 200, 201, 201, 201, 200, 200, 200, 200, 182,
- 0, 0, 200, 200, 200, 200, 0, 200, 200, 0,
- 200, 200, 0, 0, 0, 412, 0, 0, 200, 200,
- 0, 200, 200, 0, 200, 200, 200, 200, 200, 0,
- 0, 200, 200, 200, 200, 0, 0, 200, 0, 0,
- 200, 0, 0, 246, 199, 246, 0, 0, 0, 0,
- 0, 0, 0, 200, 0, 200, 719, 720, 721, 0,
- 0, 0, 200, 246, 247, 0, 0, 199, 250, 200,
- 0, 0, 0, 0, 0, 0, 0, 0, 257, 200,
- 0, 0, 200, 212, 212, 212, 212, 212, 0, 0,
- 0, 212, 212, 212, 0, 0, 184, 200, 200, 112,
- 112, 112, 112, 112, 200, 200, 200, 114, 112, 112,
- 180, 180, 180, 180, 180, 180, 180, 180, 180, 184,
- 0, 0, 180, 180, 180, 180, 0, 180, 180, 0,
- 180, 180, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 180, 180, 0, 180, 180, 180, 180, 180, 0,
- 0, 180, 180, 180, 0, 0, 0, 180, 0, 0,
- 194, 0, 194, 194, 194, 194, 194, 0, 0, 0,
- 0, 194, 194, 194, 194, 180, 232, 233, 234, 235,
- 236, 0, 0, 0, 0, 237, 238, 194, 194, 180,
- 0, 0, 0, 0, 0, 0, 0, 246, 0, 180,
- 194, 0, 180, 194, 0, 0, 0, 0, 0, 194,
- 0, 0, 0, 185, 0, 0, 0, 180, 180, 0,
- 246, 246, 194, 0, 0, 180, 180, 182, 182, 182,
- 182, 182, 182, 182, 182, 182, 185, 0, 0, 182,
- 182, 182, 182, 0, 182, 182, 0, 182, 182, 0,
- 0, 194, 0, 0, 0, 0, 0, 0, 182, 182,
- 0, 182, 182, 182, 182, 182, 194, 0, 182, 182,
- 182, 0, 0, 194, 182, 0, 60, 61, 62, 63,
- 64, 0, 65, 0, 66, 0, 67, 68, 69, 0,
- 0, 0, 182, 0, 246, 0, 199, 0, 246, 246,
- 199, 199, 199, 0, 0, 242, 182, 199, 199, 199,
- 199, 0, 70, 0, 0, 0, 182, 0, 0, 182,
- 0, 0, 0, 199, 199, 0, 0, 0, 242, 71,
- 72, 0, 0, 0, 182, 182, 199, 0, 0, 199,
- 0, 0, 182, 182, 0, 199, 0, 184, 184, 184,
- 184, 184, 184, 184, 184, 184, 0, 0, 199, 184,
- 184, 184, 184, 0, 184, 184, 0, 184, 184, 0,
- 0, 0, 0, 0, 0, 0, 0, 146, 184, 184,
- 0, 184, 184, 184, 184, 184, 0, 199, 184, 184,
- 184, 0, 0, 0, 184, 0, 0, 232, 0, 147,
- 0, 222, 199, 0, 0, 0, 0, 0, 0, 199,
- 0, 0, 184, 0, 0, 0, 0, 0, 242, 0,
- 232, 0, 0, 0, 222, 0, 184, 0, 148, 149,
- 150, 0, 0, 151, 152, 153, 184, 154, 155, 184,
- 0, 143, 144, 0, 0, 0, 0, 0, 0, 156,
- 0, 0, 0, 0, 184, 184, 0, 0, 0, 0,
- 0, 0, 184, 184, 185, 185, 185, 185, 185, 185,
- 185, 185, 185, 0, 0, 0, 185, 185, 185, 185,
- 0, 185, 185, 0, 185, 185, 0, 0, 0, 0,
- 0, 0, 0, 0, 409, 185, 185, 0, 185, 185,
- 185, 185, 185, 0, 0, 185, 185, 185, 0, 0,
- 232, 185, 0, 194, 0, 0, 409, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 185,
- 0, 0, 0, 0, 0, 0, 194, 0, 0, 0,
- 0, 0, 0, 185, 0, 409, 409, 409, 0, 0,
- 409, 409, 409, 185, 409, 409, 185, 242, 409, 409,
- 0, 0, 0, 242, 242, 0, 409, 0, 0, 240,
- 0, 185, 185, 242, 242, 0, 242, 242, 0, 185,
- 185, 0, 0, 242, 242, 242, 0, 242, 0, 0,
- 0, 0, 240, 242, 242, 0, 0, 242, 242, 242,
- 242, 0, 0, 242, 0, 0, 242, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 242,
- 0, 242, 0, 0, 0, 0, 0, 0, 0, 0,
- 242, 242, 242, 242, 242, 242, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 242, 0, 0, 242, 0,
- 0, 242, 0, 222, 0, 232, 232, 0, 222, 222,
- 0, 0, 0, 242, 242, 232, 232, 222, 232, 232,
- 242, 242, 242, 0, 365, 232, 232, 232, 0, 232,
- 222, 222, 240, 0, 0, 232, 232, 0, 0, 232,
- 232, 232, 232, 222, 0, 232, 222, 365, 232, 0,
- 0, 0, 222, 0, 0, 0, 0, 0, 0, 0,
- 0, 232, 0, 232, 0, 222, 0, 0, 0, 0,
- 0, 0, 232, 232, 232, 232, 232, 232, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 232, 0, 0,
- 232, 0, 0, 232, 222, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 232, 232, 0, 0, 222,
- 0, 0, 232, 232, 232, 194, 222, 194, 194, 194,
- 194, 194, 194, 0, 0, 0, 194, 194, 194, 194,
- 0, 194, 194, 0, 194, 194, 361, 0, 0, 0,
- 0, 0, 0, 0, 0, 194, 194, 0, 194, 194,
- 194, 194, 194, 0, 0, 194, 194, 194, 238, 361,
- 0, 194, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 240, 194,
- 0, 238, 0, 0, 0, 0, 0, 240, 240, 0,
- 240, 240, 0, 194, 0, 0, 0, 240, 240, 240,
- 0, 240, 0, 194, 0, 0, 194, 240, 240, 0,
- 0, 240, 240, 240, 240, 0, 0, 240, 0, 0,
- 240, 194, 194, 0, 0, 0, 0, 0, 0, 194,
- 194, 0, 0, 240, 0, 240, 0, 0, 0, 0,
- 0, 0, 0, 0, 240, 240, 240, 240, 240, 240,
- 0, 0, 0, 0, 0, 0, 52, 0, 0, 240,
- 0, 0, 240, 0, 0, 240, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 365, 240, 240, 52,
- 0, 365, 365, 365, 240, 240, 240, 365, 365, 365,
- 365, 0, 365, 365, 0, 365, 365, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 365, 0, 0, 0,
- 0, 238, 365, 365, 0, 0, 365, 365, 365, 199,
- 0, 0, 365, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 238, 0, 0, 0, 0, 0,
- 365, 0, 199, 0, 0, 0, 0, 0, 0, 363,
- 363, 363, 363, 363, 365, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 365, 0, 0, 365, 0, 52,
- 363, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 365, 365, 0, 0, 0, 0, 361, 0,
- 365, 365, 0, 361, 361, 361, 0, 0, 0, 361,
- 361, 361, 361, 0, 361, 361, 0, 361, 361, 0,
- 238, 0, 0, 0, 0, 0, 238, 0, 361, 0,
- 0, 0, 0, 0, 361, 361, 0, 0, 361, 361,
- 361, 212, 0, 0, 361, 0, 0, 238, 238, 0,
+ 45, 0, 0, 45, 0, 0, 0, 0, 0, 0,
+ 45, 0, 0, 275, 0, 275, 45, 0, 45, 0,
+ 0, 0, 0, 117, 118, 119, 0, 45, 45, 45,
+ 45, 45, 45, 0, 138, 0, 0, 39, 275, 0,
+ 0, 0, 0, 0, 45, 0, 172, 0, 0, 45,
+ 0, 0, 0, 0, 45, 45, 0, 0, 0, 0,
+ 0, 275, 275, 209, 0, 230, 45, 45, 0, 230,
+ 230, 230, 230, 45, 45, 45, 230, 230, 230, 230,
+ 0, 230, 230, 0, 230, 230, 209, 0, 0, 0,
+ 0, 0, 230, 230, 0, 230, 230, 0, 230, 230,
+ 230, 230, 230, 572, 573, 230, 230, 230, 230, 0,
+ 0, 230, 0, 0, 230, 0, 574, 0, 0, 575,
+ 0, 230, 0, 0, 0, 576, 0, 230, 0, 230,
+ 0, 211, 0, 0, 0, 0, 230, 0, 577, 0,
+ 0, 0, 0, 230, 0, 275, 275, 0, 249, 250,
+ 0, 0, 40, 253, 211, 230, 0, 0, 0, 0,
+ 230, 0, 0, 260, 0, 230, 0, 275, 0, 0,
+ 0, 578, 0, 0, 0, 0, 0, 230, 230, 0,
+ 0, 0, 0, 0, 230, 230, 230, 0, 0, 0,
+ 275, 275, 229, 0, 0, 579, 229, 229, 229, 229,
+ 0, 0, 147, 229, 229, 229, 229, 0, 229, 229,
+ 0, 229, 229, 0, 0, 0, 0, 0, 0, 229,
+ 229, 0, 229, 229, 148, 229, 229, 229, 229, 229,
+ 0, 0, 229, 229, 229, 229, 0, 0, 229, 0,
+ 0, 229, 0, 0, 0, 0, 0, 0, 229, 275,
+ 0, 0, 0, 0, 229, 213, 229, 261, 262, 263,
+ 149, 150, 151, 229, 0, 152, 153, 154, 0, 155,
+ 229, 0, 144, 145, 275, 275, 0, 0, 213, 0,
+ 156, 0, 229, 38, 5, 6, 0, 229, 0, 0,
+ 0, 0, 229, 0, 0, 485, 487, 489, 0, 0,
+ 0, 0, 492, 493, 229, 229, 0, 0, 0, 0,
+ 0, 229, 229, 229, 209, 209, 209, 209, 209, 209,
+ 209, 209, 209, 0, 0, 7, 209, 209, 209, 209,
+ 0, 209, 209, 0, 209, 209, 0, 0, 0, 0,
+ 0, 9, 10, 11, 12, 209, 209, 0, 209, 209,
+ 209, 209, 209, 0, 0, 209, 209, 209, 359, 0,
+ 0, 209, 0, 0, 0, 540, 0, 541, 0, 542,
+ 0, 209, 544, 380, 0, 0, 0, 0, 214, 209,
+ 0, 385, 211, 211, 211, 211, 211, 211, 211, 211,
+ 211, 0, 0, 209, 211, 211, 211, 211, 0, 211,
+ 211, 214, 211, 211, 0, 209, 0, 0, 0, 0,
+ 209, 0, 0, 211, 211, 209, 211, 211, 211, 211,
+ 211, 0, 437, 211, 211, 211, 434, 209, 209, 211,
+ 0, 0, 0, 0, 0, 209, 209, 0, 0, 211,
+ 0, 0, 0, 0, 437, 0, 0, 211, 0, 0,
+ 0, 0, 0, 0, 0, 0, 60, 61, 62, 63,
+ 64, 211, 65, 0, 66, 0, 67, 68, 69, 0,
+ 0, 0, 0, 211, 0, 0, 271, 0, 211, 0,
+ 437, 437, 437, 211, 0, 437, 437, 437, 0, 437,
+ 0, 0, 437, 437, 0, 211, 211, 0, 70, 271,
+ 437, 702, 0, 211, 211, 0, 213, 213, 213, 213,
+ 213, 213, 213, 213, 213, 71, 72, 0, 213, 213,
+ 213, 213, 0, 213, 213, 0, 213, 213, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 213, 213, 0,
+ 213, 213, 213, 213, 213, 0, 0, 213, 213, 213,
+ 0, 0, 0, 213, 0, 0, 0, 0, 248, 0,
+ 0, 0, 0, 213, 112, 112, 112, 112, 112, 112,
+ 112, 213, 0, 0, 114, 112, 112, 0, 0, 0,
+ 0, 248, 0, 0, 0, 213, 261, 0, 0, 271,
+ 0, 0, 0, 0, 0, 0, 0, 213, 0, 0,
+ 0, 0, 213, 0, 0, 0, 0, 213, 0, 261,
+ 0, 0, 0, 0, 0, 768, 769, 770, 0, 213,
+ 213, 0, 0, 0, 0, 0, 0, 213, 213, 214,
+ 214, 214, 214, 214, 214, 214, 214, 214, 0, 0,
+ 0, 214, 214, 214, 214, 0, 214, 214, 0, 214,
+ 214, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 214, 214, 0, 214, 214, 214, 214, 214, 0, 0,
+ 214, 214, 214, 0, 0, 0, 214, 233, 234, 235,
+ 236, 237, 238, 239, 0, 0, 214, 0, 240, 241,
+ 0, 0, 0, 0, 214, 0, 0, 0, 0, 261,
+ 0, 0, 0, 0, 0, 0, 0, 0, 214, 223,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 214, 0, 0, 0, 0, 214, 0, 0, 271, 0,
+ 214, 0, 223, 0, 271, 271, 0, 0, 0, 0,
+ 0, 0, 214, 214, 271, 271, 0, 271, 271, 0,
+ 214, 214, 0, 0, 271, 271, 271, 0, 271, 0,
+ 0, 0, 0, 0, 271, 271, 0, 0, 271, 271,
+ 271, 271, 0, 0, 271, 0, 0, 271, 0, 0,
+ 0, 0, 0, 0, 271, 0, 0, 0, 0, 0,
+ 271, 0, 271, 0, 0, 0, 0, 0, 0, 0,
+ 0, 271, 271, 271, 271, 271, 271, 269, 0, 0,
+ 248, 0, 0, 0, 0, 248, 248, 0, 271, 0,
+ 0, 248, 0, 271, 248, 0, 0, 0, 271, 271,
+ 269, 0, 0, 0, 0, 0, 0, 248, 248, 0,
+ 271, 271, 0, 0, 261, 261, 0, 271, 271, 271,
+ 248, 0, 0, 248, 261, 261, 0, 261, 261, 248,
+ 0, 0, 0, 0, 261, 261, 261, 0, 261, 0,
+ 0, 0, 248, 0, 261, 261, 0, 0, 261, 261,
+ 261, 261, 0, 0, 261, 0, 0, 261, 0, 0,
+ 0, 0, 0, 0, 261, 0, 0, 0, 0, 0,
+ 261, 0, 261, 0, 0, 248, 0, 0, 0, 0,
+ 0, 261, 261, 261, 261, 261, 261, 0, 392, 0,
+ 269, 0, 248, 0, 0, 0, 0, 0, 261, 248,
+ 0, 0, 0, 261, 0, 0, 0, 0, 261, 261,
+ 0, 392, 0, 0, 0, 0, 0, 0, 0, 0,
+ 261, 261, 0, 0, 0, 0, 0, 261, 261, 261,
+ 0, 223, 267, 223, 223, 223, 223, 223, 223, 0,
+ 0, 0, 223, 223, 223, 223, 0, 223, 223, 0,
+ 223, 223, 0, 0, 0, 267, 0, 0, 0, 0,
+ 0, 223, 223, 0, 223, 223, 223, 223, 223, 0,
+ 0, 223, 223, 223, 0, 0, 0, 223, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 223, 0, 0,
+ 0, 0, 0, 0, 0, 223, 0, 0, 0, 388,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 223,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 238, 0, 361, 238, 212, 0, 0, 0, 0, 238,
- 0, 361, 361, 361, 361, 361, 361, 0, 0, 0,
- 0, 0, 238, 0, 0, 0, 361, 0, 0, 361,
- 0, 0, 361, 0, 0, 0, 0, 194, 0, 0,
- 0, 0, 0, 0, 361, 361, 0, 0, 0, 0,
- 0, 238, 361, 361, 0, 0, 0, 52, 52, 52,
- 194, 52, 52, 52, 52, 52, 238, 0, 0, 52,
- 52, 52, 52, 238, 52, 52, 0, 52, 52, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 52, 0,
- 0, 0, 0, 197, 52, 52, 0, 0, 52, 52,
- 52, 0, 0, 0, 52, 0, 0, 0, 0, 0,
- 0, 0, 0, 238, 0, 0, 197, 0, 0, 238,
- 0, 199, 52, 0, 0, 199, 199, 199, 199, 0,
- 0, 0, 199, 199, 199, 199, 52, 199, 199, 238,
- 199, 199, 0, 0, 0, 0, 52, 0, 0, 52,
- 194, 199, 199, 0, 199, 199, 199, 199, 199, 0,
- 0, 199, 199, 199, 52, 52, 0, 199, 0, 0,
- 0, 0, 52, 52, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 199, 238, 238, 238, 238,
- 238, 0, 0, 0, 0, 0, 0, 0, 0, 199,
- 0, 0, 0, 0, 238, 0, 197, 238, 0, 199,
- 0, 0, 199, 0, 212, 0, 0, 0, 219, 238,
- 238, 0, 0, 0, 0, 0, 0, 199, 199, 0,
- 0, 0, 0, 212, 0, 199, 199, 212, 212, 212,
- 212, 219, 0, 0, 212, 212, 212, 212, 0, 212,
- 212, 0, 212, 212, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 212, 212, 0, 212, 212, 212, 212,
- 212, 0, 0, 212, 212, 212, 0, 0, 0, 212,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 194,
- 0, 0, 194, 194, 194, 194, 194, 212, 0, 0,
- 194, 194, 194, 194, 0, 194, 194, 0, 194, 194,
- 0, 212, 0, 0, 0, 0, 0, 0, 0, 194,
- 0, 212, 0, 0, 212, 194, 194, 212, 0, 194,
- 194, 194, 397, 0, 0, 194, 0, 0, 0, 212,
- 212, 0, 0, 0, 0, 197, 0, 212, 212, 197,
- 197, 197, 197, 194, 0, 397, 197, 197, 197, 197,
- 0, 197, 197, 0, 197, 197, 0, 194, 0, 0,
- 0, 0, 0, 0, 0, 197, 0, 194, 0, 0,
- 194, 197, 197, 0, 0, 197, 197, 197, 364, 0,
- 0, 197, 0, 0, 0, 194, 194, 0, 0, 0,
- 0, 0, 0, 194, 194, 0, 0, 0, 0, 197,
- 0, 364, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 197, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 197, 0, 0, 197, 0, 0, 0,
+ 0, 223, 388, 0, 0, 0, 223, 0, 0, 0,
+ 0, 223, 0, 0, 0, 0, 269, 0, 0, 0,
+ 0, 0, 0, 223, 223, 269, 269, 0, 269, 269,
+ 0, 223, 223, 0, 0, 269, 269, 269, 0, 269,
+ 0, 0, 0, 0, 0, 269, 269, 0, 0, 269,
+ 269, 269, 269, 0, 0, 269, 0, 0, 269, 0,
+ 0, 0, 0, 0, 0, 269, 0, 0, 0, 0,
+ 241, 269, 0, 269, 0, 0, 0, 0, 0, 0,
+ 0, 0, 269, 269, 269, 269, 269, 269, 0, 0,
+ 0, 0, 0, 241, 0, 0, 0, 0, 0, 269,
+ 0, 0, 0, 0, 269, 0, 0, 0, 0, 269,
+ 269, 0, 52, 0, 0, 0, 0, 0, 0, 0,
+ 392, 269, 269, 0, 0, 392, 392, 392, 269, 269,
+ 269, 392, 392, 392, 392, 52, 392, 392, 0, 392,
+ 392, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 392, 0, 0, 0, 0, 0, 392, 392, 0, 0,
+ 392, 392, 392, 0, 267, 0, 392, 0, 0, 0,
+ 267, 0, 0, 0, 0, 0, 392, 0, 0, 0,
+ 228, 0, 0, 0, 392, 0, 0, 0, 0, 0,
+ 0, 267, 267, 390, 390, 390, 390, 390, 392, 0,
+ 0, 0, 0, 228, 267, 0, 0, 267, 0, 0,
+ 392, 0, 0, 267, 0, 392, 0, 0, 0, 0,
+ 392, 390, 0, 0, 0, 52, 267, 0, 0, 0,
+ 0, 388, 392, 392, 0, 0, 388, 388, 388, 0,
+ 392, 392, 388, 388, 388, 388, 0, 388, 388, 0,
+ 388, 388, 0, 0, 0, 0, 0, 0, 0, 267,
+ 0, 388, 0, 0, 0, 0, 0, 388, 388, 0,
+ 0, 388, 388, 388, 0, 0, 267, 388, 0, 0,
+ 0, 0, 0, 267, 0, 0, 0, 388, 0, 0,
+ 0, 241, 0, 0, 0, 388, 0, 0, 0, 0,
+ 0, 0, 0, 0, 388, 388, 388, 388, 388, 388,
+ 0, 0, 0, 0, 241, 0, 0, 0, 0, 0,
+ 0, 388, 241, 0, 0, 0, 388, 241, 241, 0,
+ 0, 388, 388, 241, 241, 241, 241, 0, 0, 0,
+ 0, 0, 0, 388, 388, 0, 0, 0, 241, 0,
+ 0, 388, 388, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 52, 52, 52, 0, 52, 52, 52,
+ 52, 52, 0, 0, 0, 52, 52, 52, 52, 0,
+ 52, 52, 0, 52, 52, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 52, 241, 241, 241, 241, 241,
+ 52, 52, 223, 0, 52, 52, 52, 0, 0, 0,
+ 52, 0, 0, 0, 0, 0, 0, 241, 0, 0,
+ 52, 0, 0, 241, 0, 223, 0, 0, 52, 0,
+ 0, 0, 228, 0, 241, 241, 228, 228, 228, 228,
+ 0, 0, 52, 228, 228, 228, 228, 0, 228, 228,
+ 0, 228, 228, 0, 52, 0, 0, 0, 0, 52,
+ 0, 0, 228, 228, 52, 228, 228, 228, 228, 228,
+ 0, 0, 228, 228, 228, 0, 52, 52, 228, 0,
+ 0, 0, 0, 0, 52, 52, 0, 0, 228, 0,
+ 0, 0, 0, 0, 0, 0, 228, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 197, 197, 0, 0, 0, 0, 0, 0, 197,
- 197, 0, 0, 0, 0, 0, 212, 0, 0, 0,
- 219, 212, 212, 212, 0, 219, 219, 212, 212, 212,
- 212, 219, 212, 212, 219, 212, 212, 0, 0, 0,
- 0, 0, 0, 219, 0, 0, 212, 219, 219, 0,
- 0, 0, 212, 212, 0, 0, 212, 212, 212, 0,
- 219, 0, 212, 219, 0, 0, 219, 0, 0, 219,
+ 228, 0, 0, 226, 0, 223, 0, 0, 0, 0,
+ 0, 0, 228, 0, 0, 0, 0, 228, 0, 0,
+ 0, 0, 228, 0, 0, 0, 226, 0, 0, 0,
+ 0, 0, 0, 241, 228, 228, 0, 0, 241, 241,
+ 241, 0, 228, 228, 241, 241, 241, 241, 0, 241,
+ 241, 0, 241, 241, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 241, 241, 0, 241, 241, 241, 241,
+ 241, 0, 0, 241, 241, 241, 0, 0, 0, 241,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 241,
+ 0, 0, 0, 0, 0, 0, 0, 241, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 212, 0, 219, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 212, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 212, 0, 219, 212, 0, 0,
- 0, 219, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 212, 212, 0, 0, 219, 0, 0, 219,
- 212, 212, 0, 219, 397, 0, 0, 0, 0, 397,
- 397, 397, 0, 0, 0, 397, 397, 397, 397, 0,
- 397, 397, 0, 397, 397, 0, 219, 0, 0, 222,
- 0, 0, 0, 0, 397, 0, 0, 0, 0, 0,
- 397, 397, 0, 0, 397, 397, 397, 0, 0, 0,
- 397, 0, 222, 0, 0, 0, 0, 0, 0, 0,
- 364, 0, 0, 0, 0, 364, 364, 364, 397, 0,
- 0, 364, 364, 364, 364, 0, 364, 364, 0, 364,
- 364, 0, 397, 0, 0, 0, 0, 0, 0, 0,
- 364, 0, 397, 0, 0, 397, 364, 364, 0, 0,
- 364, 364, 364, 0, 0, 0, 364, 0, 0, 222,
- 397, 397, 0, 0, 0, 0, 0, 0, 397, 397,
- 0, 0, 0, 0, 364, 0, 0, 0, 0, 0,
- 0, 0, 222, 0, 0, 0, 0, 0, 364, 0,
- 0, 0, 222, 0, 0, 0, 0, 0, 364, 0,
- 0, 364, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 219, 364, 364, 0, 0,
- 219, 219, 219, 0, 364, 364, 219, 0, 0, 219,
- 0, 219, 219, 0, 219, 219, 0, 0, 0, 238,
- 0, 0, 0, 0, 0, 219, 0, 0, 0, 0,
- 0, 219, 219, 0, 0, 219, 219, 219, 0, 0,
- 0, 219, 238, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 219, 219,
- 0, 0, 0, 219, 219, 219, 0, 0, 0, 219,
- 0, 0, 219, 219, 219, 219, 0, 219, 219, 0,
- 0, 0, 238, 219, 0, 0, 219, 0, 219, 0,
- 0, 0, 0, 0, 219, 219, 0, 0, 219, 219,
- 219, 219, 219, 0, 219, 238, 0, 0, 0, 219,
- 219, 222, 0, 0, 0, 0, 222, 222, 222, 0,
- 0, 0, 219, 0, 0, 222, 0, 222, 222, 0,
- 222, 222, 238, 0, 0, 156, 219, 0, 0, 0,
- 0, 222, 0, 0, 0, 0, 219, 222, 222, 219,
- 0, 222, 222, 222, 0, 0, 0, 222, 154, 0,
- 0, 0, 0, 0, 219, 219, 0, 0, 0, 0,
- 0, 0, 219, 219, 0, 222, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 222,
- 0, 222, 0, 0, 0, 0, 222, 222, 222, 222,
- 0, 0, 222, 0, 0, 222, 0, 222, 222, 0,
- 222, 222, 0, 0, 0, 332, 0, 222, 222, 0,
- 0, 222, 0, 0, 0, 222, 222, 222, 222, 0,
- 0, 222, 222, 222, 0, 0, 0, 222, 332, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 156, 0,
- 0, 0, 0, 0, 0, 222, 0, 0, 212, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 222,
- 0, 238, 0, 0, 0, 0, 0, 238, 238, 222,
- 0, 212, 222, 0, 0, 0, 0, 238, 238, 0,
- 238, 238, 0, 0, 0, 49, 0, 222, 222, 0,
- 0, 238, 0, 0, 0, 222, 222, 238, 238, 0,
- 0, 238, 238, 238, 0, 0, 0, 238, 49, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 332, 0,
- 0, 0, 0, 0, 238, 238, 0, 0, 0, 0,
- 238, 238, 0, 0, 0, 0, 0, 0, 0, 238,
- 238, 238, 0, 238, 238, 0, 377, 0, 156, 238,
- 0, 0, 238, 0, 238, 0, 0, 0, 0, 0,
- 238, 238, 0, 0, 238, 238, 238, 238, 238, 377,
- 238, 154, 0, 0, 0, 238, 238, 0, 0, 0,
- 0, 0, 0, 0, 154, 0, 0, 0, 238, 0,
- 0, 0, 0, 154, 154, 0, 154, 154, 49, 0,
- 0, 0, 238, 378, 0, 0, 0, 154, 0, 0,
- 0, 0, 238, 154, 154, 238, 0, 154, 154, 154,
- 0, 0, 0, 154, 0, 0, 378, 0, 0, 0,
- 238, 238, 0, 0, 0, 0, 0, 0, 238, 238,
- 0, 154, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 154, 0, 0, 0, 212,
- 0, 0, 0, 0, 332, 154, 0, 0, 154, 0,
- 0, 0, 0, 332, 332, 0, 332, 332, 0, 0,
- 0, 219, 212, 154, 154, 0, 0, 332, 0, 0,
- 0, 154, 154, 332, 332, 0, 0, 332, 332, 332,
- 212, 0, 0, 332, 219, 212, 212, 0, 0, 0,
- 0, 212, 212, 212, 212, 0, 222, 0, 0, 0,
- 0, 332, 0, 0, 0, 0, 0, 212, 212, 0,
- 0, 0, 0, 0, 0, 332, 0, 0, 0, 222,
- 212, 0, 0, 212, 49, 332, 0, 0, 332, 212,
- 0, 0, 0, 49, 49, 0, 49, 49, 0, 0,
- 0, 0, 212, 332, 332, 0, 0, 49, 0, 0,
- 0, 332, 332, 49, 49, 0, 0, 49, 49, 49,
- 0, 0, 0, 49, 0, 0, 0, 0, 0, 0,
- 0, 212, 0, 0, 0, 0, 0, 0, 377, 0,
- 0, 49, 377, 377, 377, 0, 212, 154, 0, 377,
- 377, 377, 377, 212, 0, 49, 154, 154, 167, 154,
- 154, 0, 0, 0, 377, 49, 0, 0, 49, 0,
- 154, 0, 0, 0, 0, 0, 154, 154, 0, 0,
- 154, 154, 154, 49, 49, 0, 154, 0, 0, 0,
- 0, 49, 49, 0, 0, 378, 0, 0, 0, 378,
- 378, 378, 0, 0, 154, 0, 378, 378, 378, 378,
- 0, 377, 377, 377, 377, 377, 0, 0, 154, 0,
- 0, 378, 0, 0, 0, 0, 0, 0, 154, 377,
- 0, 154, 377, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 377, 377, 154, 154, 0, 0,
- 0, 212, 0, 0, 154, 154, 212, 212, 0, 0,
- 0, 0, 212, 212, 212, 212, 0, 0, 378, 378,
- 378, 378, 378, 219, 0, 0, 0, 212, 219, 219,
- 0, 0, 0, 0, 219, 0, 378, 219, 0, 378,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 219,
- 0, 378, 378, 0, 0, 0, 0, 0, 222, 0,
- 0, 0, 0, 222, 222, 0, 0, 0, 0, 0,
- 0, 0, 222, 0, 212, 212, 212, 212, 212, 0,
- 0, 0, 0, 0, 222, 0, 0, 0, 0, 0,
- 0, 0, 212, 0, 0, 212, 219, 219, 219, 219,
- 219, 0, 0, 0, 0, 0, 0, 212, 212, 0,
- 0, 0, 0, 0, 219, 0, 0, 219, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 219,
- 219, 222, 222, 222, 222, 222, 0, 0, 0, 0,
- 0, 0, 0, 0, 556, 0, 0, 0, 0, 222,
- 0, 0, 222, 557, 558, 0, 559, 560, 0, 0,
- 0, 0, 0, 0, 222, 222, 0, 561, 0, 0,
- 0, 0, 0, 562, 333, 0, 0, 563, 564, 565,
- 0, 0, 0, 566, 0, 0, 0, 0, 0, 0,
+ 0, 241, 0, 0, 241, 0, 226, 0, 0, 0,
+ 0, 0, 0, 241, 0, 0, 0, 0, 241, 0,
+ 0, 0, 0, 241, 0, 0, 0, 241, 0, 0,
+ 0, 0, 0, 0, 223, 241, 241, 223, 223, 223,
+ 223, 223, 0, 241, 241, 223, 223, 223, 223, 0,
+ 223, 223, 0, 223, 223, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 223, 0, 0, 0, 0, 0,
+ 223, 223, 424, 0, 223, 223, 223, 0, 0, 0,
+ 223, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 223, 0, 0, 0, 0, 424, 0, 0, 223, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 567, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 568, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 569, 0, 0, 570, 0,
+ 0, 0, 223, 0, 0, 0, 0, 241, 0, 0,
+ 0, 0, 0, 0, 223, 0, 0, 0, 0, 223,
+ 0, 0, 0, 0, 223, 0, 0, 0, 0, 0,
+ 391, 0, 0, 0, 0, 226, 223, 223, 0, 226,
+ 226, 226, 226, 0, 223, 223, 226, 226, 226, 226,
+ 0, 226, 226, 391, 226, 226, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 226, 0, 0, 0, 0,
+ 0, 226, 226, 0, 0, 226, 226, 226, 0, 0,
+ 0, 226, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 226, 0, 0, 0, 0, 0, 0, 248, 226,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 571, 0, 0, 0, 0, 0,
- 0, 572, 573,
+ 0, 0, 0, 226, 0, 0, 0, 0, 0, 0,
+ 0, 248, 0, 0, 0, 226, 0, 0, 0, 0,
+ 226, 0, 0, 0, 0, 226, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 241, 226, 226, 0,
+ 0, 241, 241, 241, 0, 226, 226, 241, 241, 241,
+ 241, 0, 241, 241, 0, 241, 241, 248, 0, 0,
+ 0, 0, 0, 0, 0, 0, 241, 0, 0, 0,
+ 0, 0, 241, 241, 0, 0, 241, 241, 241, 0,
+ 248, 0, 241, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 241, 0, 0, 0, 0, 0, 0, 0,
+ 241, 248, 0, 0, 424, 241, 0, 0, 0, 424,
+ 424, 424, 0, 0, 241, 424, 424, 424, 424, 0,
+ 424, 424, 0, 424, 424, 0, 241, 0, 241, 0,
+ 0, 241, 0, 0, 424, 0, 241, 0, 0, 0,
+ 424, 424, 0, 0, 424, 424, 424, 0, 241, 241,
+ 424, 0, 0, 0, 0, 0, 241, 241, 0, 0,
+ 424, 0, 0, 0, 0, 0, 0, 0, 424, 0,
+ 0, 0, 391, 0, 251, 0, 0, 391, 391, 391,
+ 0, 0, 424, 391, 391, 391, 391, 0, 391, 391,
+ 0, 391, 391, 0, 424, 0, 0, 251, 0, 424,
+ 0, 0, 391, 0, 424, 0, 0, 0, 391, 391,
+ 0, 0, 391, 391, 391, 0, 424, 424, 391, 0,
+ 0, 0, 0, 0, 424, 424, 251, 0, 391, 0,
+ 0, 0, 0, 0, 0, 0, 391, 0, 0, 0,
+ 248, 0, 0, 0, 0, 248, 248, 248, 0, 251,
+ 391, 248, 0, 0, 248, 0, 248, 248, 0, 248,
+ 248, 0, 391, 0, 0, 0, 0, 391, 0, 0,
+ 248, 0, 391, 0, 0, 0, 248, 248, 0, 0,
+ 248, 248, 248, 0, 391, 391, 248, 251, 0, 0,
+ 0, 0, 391, 391, 0, 0, 248, 0, 0, 0,
+ 0, 0, 0, 0, 248, 0, 0, 0, 0, 248,
+ 0, 0, 0, 0, 248, 248, 248, 0, 248, 0,
+ 248, 0, 0, 248, 267, 248, 248, 0, 248, 248,
+ 248, 0, 0, 0, 0, 248, 0, 0, 0, 248,
+ 248, 0, 0, 0, 0, 248, 248, 267, 0, 248,
+ 248, 248, 248, 248, 0, 248, 0, 241, 0, 0,
+ 248, 248, 241, 241, 0, 248, 0, 0, 241, 241,
+ 241, 241, 0, 248, 0, 267, 0, 0, 0, 0,
+ 0, 0, 0, 0, 241, 241, 0, 248, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 241, 267, 248,
+ 241, 0, 0, 0, 248, 0, 241, 0, 0, 248,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 241,
+ 0, 248, 248, 0, 0, 0, 251, 0, 0, 248,
+ 248, 251, 251, 251, 0, 0, 0, 267, 0, 0,
+ 251, 0, 251, 251, 0, 251, 251, 0, 0, 0,
+ 0, 0, 241, 0, 0, 0, 251, 0, 0, 0,
+ 0, 0, 251, 251, 176, 0, 251, 251, 251, 241,
+ 0, 0, 251, 0, 0, 0, 241, 0, 251, 0,
+ 0, 0, 251, 251, 251, 251, 0, 174, 0, 0,
+ 251, 0, 251, 0, 251, 251, 0, 251, 251, 0,
+ 0, 0, 0, 0, 251, 0, 0, 0, 251, 0,
+ 360, 0, 0, 0, 251, 251, 251, 0, 251, 251,
+ 251, 251, 0, 0, 251, 0, 251, 0, 0, 0,
+ 0, 0, 0, 360, 251, 0, 0, 0, 251, 251,
+ 0, 0, 251, 0, 0, 0, 251, 251, 0, 0,
+ 0, 0, 0, 0, 0, 0, 251, 0, 0, 0,
+ 0, 0, 49, 0, 0, 0, 0, 0, 251, 0,
+ 0, 0, 0, 251, 0, 0, 267, 176, 251, 0,
+ 0, 0, 267, 267, 0, 49, 0, 0, 0, 0,
+ 251, 251, 267, 267, 0, 267, 267, 0, 251, 251,
+ 0, 0, 0, 0, 0, 0, 267, 0, 176, 0,
+ 0, 0, 267, 267, 0, 0, 267, 267, 267, 0,
+ 0, 0, 267, 360, 0, 0, 0, 267, 0, 0,
+ 0, 174, 267, 267, 267, 0, 0, 0, 0, 0,
+ 267, 0, 0, 267, 267, 0, 267, 267, 0, 0,
+ 0, 0, 0, 0, 267, 0, 0, 267, 0, 0,
+ 0, 0, 0, 267, 267, 0, 267, 267, 267, 267,
+ 0, 267, 0, 267, 0, 49, 267, 0, 0, 0,
+ 0, 0, 0, 267, 0, 0, 0, 0, 267, 267,
+ 0, 267, 0, 0, 0, 0, 267, 267, 0, 0,
+ 0, 0, 0, 0, 0, 267, 0, 0, 0, 0,
+ 0, 223, 0, 0, 0, 0, 0, 267, 0, 0,
+ 0, 0, 267, 174, 0, 0, 0, 267, 0, 0,
+ 0, 228, 174, 174, 223, 174, 174, 0, 0, 267,
+ 267, 0, 0, 0, 0, 0, 174, 267, 267, 0,
+ 0, 0, 174, 174, 228, 0, 174, 174, 174, 0,
+ 0, 0, 174, 0, 404, 0, 0, 0, 0, 360,
+ 0, 0, 174, 0, 0, 0, 0, 0, 360, 360,
+ 174, 360, 360, 0, 0, 0, 0, 404, 0, 0,
+ 0, 0, 360, 0, 174, 0, 405, 0, 360, 360,
+ 0, 0, 360, 360, 360, 0, 174, 0, 360, 0,
+ 0, 174, 0, 0, 0, 0, 174, 0, 360, 405,
+ 0, 49, 0, 0, 0, 0, 360, 0, 174, 174,
+ 49, 49, 0, 49, 49, 0, 174, 174, 0, 0,
+ 360, 0, 248, 0, 49, 0, 0, 0, 0, 251,
+ 49, 49, 360, 0, 49, 49, 49, 360, 0, 0,
+ 49, 0, 360, 0, 0, 248, 0, 174, 0, 0,
+ 49, 0, 251, 0, 360, 360, 174, 174, 49, 174,
+ 174, 0, 360, 360, 0, 0, 0, 0, 0, 0,
+ 174, 0, 49, 0, 0, 0, 174, 174, 0, 0,
+ 174, 174, 174, 0, 49, 0, 174, 0, 0, 49,
+ 0, 0, 0, 0, 49, 0, 174, 0, 0, 0,
+ 0, 0, 0, 0, 174, 0, 49, 49, 0, 0,
+ 0, 0, 0, 0, 49, 49, 0, 0, 174, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 174, 0, 167, 0, 0, 174, 0, 0, 0, 0,
+ 174, 0, 0, 223, 0, 223, 223, 223, 223, 223,
+ 0, 0, 174, 174, 223, 223, 223, 223, 0, 0,
+ 174, 174, 0, 228, 0, 0, 0, 228, 228, 228,
+ 223, 223, 0, 0, 228, 228, 228, 228, 0, 0,
+ 0, 0, 0, 223, 0, 0, 223, 0, 0, 0,
+ 228, 228, 223, 0, 0, 0, 404, 0, 0, 0,
+ 404, 404, 404, 228, 0, 223, 228, 404, 404, 404,
+ 404, 0, 228, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 404, 0, 0, 228, 0, 0, 405, 0,
+ 0, 0, 405, 405, 405, 0, 0, 0, 223, 405,
+ 405, 405, 405, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 405, 223, 0, 0, 228, 0,
+ 0, 0, 223, 0, 0, 0, 0, 0, 0, 404,
+ 404, 404, 404, 404, 248, 228, 0, 0, 0, 248,
+ 248, 251, 228, 0, 0, 248, 251, 251, 248, 0,
+ 0, 404, 0, 0, 0, 251, 0, 404, 0, 0,
+ 248, 405, 405, 405, 405, 405, 0, 251, 404, 404,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 405, 0, 0, 0, 0, 0, 405,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 405, 405, 0, 0, 0, 0, 0, 248, 248, 248,
+ 248, 248, 0, 0, 251, 251, 251, 251, 251, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 587, 248,
+ 0, 0, 0, 0, 0, 248, 251, 588, 589, 0,
+ 590, 591, 251, 0, 0, 0, 248, 248, 0, 0,
+ 0, 592, 0, 251, 251, 0, 0, 593, 338, 0,
+ 0, 594, 595, 596, 0, 0, 0, 597, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 598, 0, 0,
+ 0, 0, 0, 0, 0, 599, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 600,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 601, 0, 0, 0, 0, 602, 0, 0, 0,
+ 0, 603, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 604, 0, 0, 0, 0, 0,
+ 0, 605, 606,
};
static const YYINT pfctlycheck[] = { 3,
- 3, 82, 11, 12, 10, 125, 359, 78, 10, 44,
- 10, 10, 10, 10, 191, 40, 33, 40, 10, 33,
- 33, 44, 33, 33, 205, 33, 10, 33, 123, 272,
- 44, 33, 10, 123, 203, 40, 319, 320, 123, 483,
- 33, 123, 0, 33, 221, 341, 166, 56, 33, 672,
- 40, 47, 10, 40, 44, 10, 10, 406, 123, 44,
- 674, 70, 229, 40, 123, 10, 40, 679, 500, 123,
- 60, 33, 81, 40, 682, 60, 61, 62, 40, 276,
- 40, 123, 202, 258, 259, 260, 263, 299, 265, 24,
- 125, 10, 44, 10, 257, 60, 177, 420, 60, 276,
- 10, 264, 125, 40, 224, 261, 44, 263, 298, 732,
- 33, 125, 123, 123, 125, 123, 44, 44, 10, 40,
- 55, 735, 41, 123, 40, 44, 123, 739, 481, 44,
- 123, 123, 508, 123, 742, 125, 123, 60, 61, 62,
- 125, 353, 262, 386, 264, 326, 123, 591, 33, 41,
- 356, 357, 44, 330, 323, 438, 33, 277, 229, 279,
- 456, 123, 33, 125, 635, 488, 343, 599, 74, 346,
- 519, 44, 339, 125, 44, 60, 61, 62, 84, 85,
- 86, 378, 302, 60, 61, 62, 33, 125, 378, 60,
- 61, 62, 616, 40, 314, 33, 371, 125, 125, 294,
- 123, 376, 642, 629, 44, 581, 123, 267, 217, 680,
- 125, 331, 267, 60, 260, 392, 276, 378, 273, 169,
- 646, 123, 60, 61, 62, 44, 403, 285, 286, 287,
- 33, 40, 352, 378, 243, 44, 413, 412, 123, 45,
- 44, 47, 362, 683, 41, 40, 123, 671, 378, 10,
- 33, 60, 125, 378, 379, 125, 44, 60, 61, 62,
- 44, 381, 382, 309, 378, 60, 277, 277, 339, 277,
- 447, 391, 178, 393, 284, 225, 123, 60, 61, 62,
- 33, 277, 288, 378, 262, 125, 288, 277, 378, 288,
- 268, 266, 469, 378, 414, 415, 378, 368, 256, 257,
- 258, 259, 294, 89, 288, 91, 125, 60, 61, 62,
- 300, 301, 489, 378, 379, 277, 125, 262, 61, 378,
- 440, 125, 267, 268, 378, 379, 446, 285, 286, 287,
- 10, 276, 538, 453, 511, 512, 513, 125, 300, 301,
- 298, 125, 314, 378, 299, 299, 10, 353, 468, 526,
- 350, 353, 310, 378, 353, 378, 314, 315, 316, 317,
- 318, 378, 368, 540, 378, 378, 368, 369, 44, 368,
- 368, 368, 10, 378, 332, 353, 334, 10, 378, 662,
- 374, 10, 378, 341, 374, 10, 378, 306, 378, 379,
- 510, 378, 350, 378, 379, 33, 354, 10, 353, 353,
- 10, 378, 40, 41, 378, 525, 44, 527, 353, 328,
- 41, 378, 374, 44, 306, 10, 378, 379, 378, 539,
- 378, 41, 60, 33, 44, 342, 343, 344, 345, 346,
- 277, 425, 503, 553, 351, 352, 328, 508, 357, 358,
- 359, 378, 379, 362, 363, 364, 10, 366, 367, 125,
- 10, 370, 371, 300, 301, 378, 379, 378, 379, 378,
- 40, 378, 378, 379, 473, 357, 358, 359, 588, 10,
- 362, 363, 364, 593, 366, 367, 378, 379, 370, 371,
- 60, 552, 659, 660, 661, 123, 378, 125, 10, 498,
- 10, 572, 10, 378, 379, 256, 257, 258, 264, 265,
- 620, 378, 379, 370, 371, 625, 44, 378, 379, 686,
- 581, 44, 506, 633, 41, 33, 587, 44, 695, 44,
- 640, 515, 40, 41, 701, 10, 44, 374, 705, 10,
- 10, 378, 379, 710, 44, 10, 713, 298, 378, 379,
- 378, 379, 60, 10, 664, 378, 379, 667, 260, 310,
- 347, 348, 349, 314, 315, 316, 317, 318, 270, 271,
- 272, 41, 571, 41, 44, 374, 44, 10, 40, 378,
- 379, 332, 10, 334, 694, 378, 379, 10, 10, 374,
- 341, 378, 702, 378, 379, 666, 706, 125, 10, 350,
- 10, 711, 125, 354, 714, 378, 379, 309, 262, 41,
- 125, 610, 44, 267, 268, 123, 687, 125, 378, 273,
- 274, 275, 276, 41, 10, 125, 44, 378, 347, 348,
- 349, 311, 312, 313, 262, 378, 379, 274, 275, 749,
- 268, 269, 752, 753, 754, 755, 285, 286, 287, 277,
- 278, 279, 10, 281, 282, 10, 284, 10, 657, 378,
- 288, 289, 290, 40, 292, 40, 40, 378, 379, 10,
- 298, 299, 300, 301, 302, 303, 304, 305, 378, 379,
- 308, 360, 361, 311, 378, 60, 757, 378, 288, 378,
- 123, 259, 10, 405, 268, 407, 324, 40, 326, 353,
- 264, 123, 276, 378, 379, 699, 699, 335, 336, 337,
- 338, 339, 340, 123, 265, 33, 10, 285, 286, 287,
- 378, 379, 350, 378, 379, 353, 44, 333, 356, 303,
- 304, 10, 10, 378, 379, 335, 336, 337, 338, 339,
- 368, 369, 60, 61, 62, 307, 374, 375, 376, 377,
- 378, 379, 262, 353, 262, 33, 356, 267, 268, 62,
- 268, 269, 265, 273, 378, 379, 276, 379, 368, 277,
- 278, 279, 10, 281, 282, 268, 284, 378, 379, 60,
- 288, 289, 290, 276, 292, 335, 336, 337, 338, 339,
- 298, 299, 300, 301, 302, 303, 304, 305, 378, 379,
- 308, 378, 379, 311, 374, 123, 356, 125, 378, 379,
- 303, 304, 10, 259, 266, 267, 324, 44, 326, 378,
- 379, 273, 274, 275, 379, 537, 379, 335, 336, 337,
- 338, 339, 340, 378, 379, 33, 306, 378, 379, 285,
- 286, 287, 350, 353, 123, 353, 44, 267, 356, 378,
- 379, 378, 10, 273, 274, 275, 267, 33, 328, 379,
- 368, 369, 60, 61, 62, 379, 374, 375, 376, 377,
- 378, 379, 378, 379, 379, 33, 378, 379, 335, 336,
- 337, 338, 339, 378, 379, 379, 44, 357, 358, 359,
- 85, 86, 362, 363, 364, 379, 366, 367, 379, 356,
- 370, 371, 60, 61, 62, 297, 298, 350, 378, 342,
- 343, 344, 345, 346, 273, 10, 41, 350, 351, 352,
- 342, 343, 344, 345, 346, 123, 40, 125, 350, 351,
- 352, 378, 342, 343, 344, 345, 346, 378, 33, 47,
- 350, 351, 352, 378, 262, 378, 60, 288, 33, 44,
- 268, 269, 40, 665, 40, 10, 378, 40, 379, 41,
- 278, 279, 10, 281, 282, 123, 379, 125, 378, 62,
- 288, 289, 290, 293, 292, 295, 296, 297, 33, 10,
- 298, 299, 379, 379, 302, 303, 304, 305, 41, 44,
- 308, 378, 262, 311, 335, 336, 337, 338, 339, 374,
- 378, 369, 41, 378, 379, 378, 324, 353, 326, 123,
- 722, 289, 290, 365, 726, 356, 378, 335, 336, 337,
- 338, 339, 340, 379, 302, 41, 267, 305, 123, 378,
- 125, 378, 350, 311, 284, 353, 378, 268, 356, 379,
- 61, 335, 336, 337, 338, 339, 324, 360, 61, 33,
- 368, 369, 61, 380, 378, 47, 40, 375, 376, 377,
- 378, 379, 356, 342, 343, 344, 345, 346, 123, 378,
- 125, 269, 351, 352, 378, 353, 60, 47, 368, 268,
- 278, 279, 41, 281, 282, 378, 10, 379, 379, 41,
- 368, 62, 379, 378, 292, 288, 123, 375, 378, 378,
- 298, 299, 40, 379, 302, 303, 304, 379, 379, 33,
- 308, 269, 368, 289, 290, 283, 283, 117, 118, 119,
- 278, 279, 60, 281, 282, 368, 302, 283, 326, 305,
- 283, 379, 379, 379, 292, 311, 284, 379, 40, 123,
- 298, 299, 340, 10, 302, 303, 304, 47, 324, 378,
- 308, 280, 350, 280, 40, 353, 380, 380, 60, 378,
- 284, 378, 288, 41, 164, 41, 41, 10, 326, 10,
- 368, 369, 41, 284, 269, 10, 333, 353, 376, 377,
- 378, 379, 340, 278, 279, 10, 281, 282, 378, 10,
- 378, 378, 350, 267, 10, 353, 368, 292, 10, 375,
- 379, 10, 10, 298, 299, 10, 566, 302, 303, 304,
- 368, 369, 3, 308, 269, 71, 253, 757, 376, 377,
- 378, 379, 442, 278, 279, 33, 281, 282, 687, 425,
- 435, 326, 516, 41, 177, 216, 44, 292, 676, 271,
- 288, 242, 397, 298, 299, 340, 246, 302, 303, 304,
- 250, 395, 466, 308, 479, 350, 581, 257, 353, 337,
- 374, 172, 241, 552, 378, 379, 316, 478, 587, -1,
- 3, 326, 3, 368, 369, 306, 10, -1, -1, -1,
- -1, 376, 377, 378, 379, 340, -1, 335, 336, 337,
- 338, 339, -1, 277, -1, 350, -1, 328, 353, 299,
- -1, -1, -1, -1, -1, 353, -1, 10, 356, 10,
- 564, 565, -1, 368, 369, 123, 300, 301, -1, -1,
- 368, 376, 377, 378, 379, -1, 357, 358, 359, -1,
- 33, 362, 363, 364, -1, 366, 367, -1, 262, 370,
- 371, 265, 266, 267, 268, 269, -1, 378, -1, 273,
- 274, 275, 276, -1, 278, 279, -1, 281, 282, -1,
- -1, -1, 300, 301, 288, 289, 290, -1, 292, 293,
- -1, 295, 296, 297, 298, 299, -1, -1, 302, 303,
- 304, 305, -1, -1, 308, -1, -1, 311, -1, -1,
- 374, 125, -1, -1, 378, 379, -1, -1, 300, 301,
- 324, -1, 326, -1, -1, -1, -1, -1, -1, 333,
- -1, 335, 336, 337, 338, 339, 340, -1, -1, 10,
- -1, -1, -1, -1, 125, 679, 350, -1, 682, 353,
- -1, -1, 356, -1, -1, -1, 374, -1, -1, -1,
- 378, 379, 33, -1, 368, 369, 399, 400, 401, -1,
- -1, 375, 376, 377, 262, -1, -1, -1, 266, 267,
- 268, 269, -1, -1, -1, 273, 274, 275, 276, -1,
- 278, 279, 374, 281, 282, -1, 378, 379, -1, -1,
- 288, 289, 290, -1, 292, 739, -1, -1, 742, -1,
+ 3, 82, 11, 12, 324, 325, 126, 78, 10, 33,
+ 10, 10, 191, 206, 33, 10, 44, 10, 10, 33,
+ 10, 230, 40, 40, 10, 44, 44, 365, 366, 123,
+ 528, 33, 10, 346, 204, 0, 368, 383, 651, 10,
+ 33, 123, 33, 222, 713, 10, 166, 56, 428, 276,
+ 123, 275, 10, 44, 512, 33, 44, 677, 40, 33,
+ 33, 70, 267, 40, 33, 123, 169, 298, 273, 60,
+ 61, 62, 81, 40, 349, 123, 40, 123, 40, 267,
+ 355, 356, 10, 203, 10, 406, 407, 266, 276, 268,
+ 682, 33, 44, 33, 123, 40, 177, 125, 40, 712,
+ 279, 447, 423, 10, 724, 225, 125, 125, 40, 384,
+ 44, 427, 781, 429, 536, 33, 60, 40, 60, 40,
+ 60, 61, 62, 226, 123, 44, 40, 10, 123, 44,
+ 44, 123, 10, 442, 125, 344, 634, 125, 331, 459,
+ 44, 123, 60, 61, 62, 265, 60, 267, 33, 123,
+ 123, 44, 125, 384, 123, 33, 335, 384, 328, 230,
+ 280, 123, 282, 10, 477, 720, 44, 33, 626, 348,
+ 550, 395, 764, 125, 353, 60, 61, 62, 510, 44,
+ 44, 123, 60, 61, 62, 44, 33, 307, 534, 715,
+ 33, 125, 614, 670, 60, 61, 62, 299, 44, 319,
+ 546, 260, 359, 74, 10, 123, 125, 33, 517, 218,
+ 125, 125, 294, 84, 85, 86, 336, 60, 61, 62,
+ 384, 125, 401, 544, 33, 384, 40, 384, 10, 40,
+ 409, 569, 125, 788, 60, 61, 62, 246, 10, 418,
+ 261, 361, 263, 44, 721, 123, 425, 125, 40, 60,
+ 309, 371, 568, 40, 384, 357, 435, 123, 784, 359,
+ 125, 125, 359, 40, 314, 359, 125, 10, 60, 262,
+ 390, 391, 33, 344, 267, 268, 384, 385, 723, 125,
+ 400, 47, 402, 276, 384, 40, 288, 384, 288, 468,
+ 384, 256, 257, 258, 259, 294, 289, 290, 10, 60,
+ 61, 62, 384, 40, 277, 60, 377, 178, 277, 302,
+ 288, 384, 305, 299, 123, 33, 436, 437, 311, 498,
+ 285, 286, 287, 60, 125, 41, 384, 41, 299, 41,
+ 288, 324, 44, 298, 262, 277, 384, 385, 384, 518,
+ 268, 461, 60, 61, 62, 310, 791, 467, 384, 314,
+ 315, 316, 317, 318, 474, 357, 384, 352, 300, 301,
+ 384, 540, 541, 542, 357, 384, 384, 332, 385, 334,
+ 384, 357, 374, 256, 257, 258, 341, 497, 557, 357,
+ 123, 374, 374, 703, 374, 384, 357, 352, 381, 384,
+ 706, 269, 571, 384, 385, 360, 374, 375, 362, 357,
+ 278, 279, 384, 281, 282, 123, 44, 384, 385, 335,
+ 336, 337, 338, 339, 292, 298, 374, 384, 538, 384,
+ 298, 299, 384, 10, 302, 303, 304, 310, 44, 357,
+ 308, 314, 315, 316, 317, 318, 556, 363, 558, 384,
+ 318, 288, 384, 385, 384, 385, 33, 10, 326, 332,
+ 570, 334, 384, 40, 41, 771, 40, 44, 341, 775,
+ 531, 384, 340, 384, 584, 536, 384, 385, 277, 352,
+ 384, 385, 44, 60, 352, 284, 60, 360, 266, 357,
+ 262, 664, 288, 123, 362, 267, 268, 125, 335, 336,
+ 337, 338, 339, 502, 276, 44, 374, 375, 123, 384,
+ 385, 384, 685, 623, 382, 383, 384, 385, 628, 125,
+ 357, 277, 583, 24, 10, 44, 363, 526, 384, 385,
+ 699, 700, 701, 61, 605, 384, 385, 374, 10, 335,
+ 336, 337, 338, 339, 10, 655, 123, 33, 125, 123,
+ 660, 384, 385, 614, 55, 300, 301, 10, 668, 40,
+ 729, 622, 731, 125, 41, 675, 10, 363, 384, 385,
+ 123, 10, 741, 335, 336, 337, 338, 339, 747, 60,
+ 384, 385, 751, 384, 385, 357, 125, 756, 10, 33,
+ 759, 10, 257, 762, 40, 705, 40, 41, 708, 264,
+ 44, 363, 384, 385, 306, 604, 125, 384, 385, 342,
+ 343, 344, 345, 346, 347, 348, 60, 384, 385, 352,
+ 353, 354, 41, 384, 268, 44, 328, 44, 384, 45,
+ 740, 47, 276, 384, 385, 89, 707, 91, 748, 384,
+ 385, 10, 752, 349, 350, 351, 645, 757, 10, 41,
+ 760, 384, 44, 41, 358, 359, 44, 384, 385, 303,
+ 304, 732, 364, 365, 366, 264, 265, 369, 370, 371,
+ 10, 373, 10, 10, 376, 377, 384, 385, 384, 123,
+ 384, 125, 384, 10, 293, 262, 295, 296, 297, 10,
+ 800, 268, 269, 803, 804, 805, 806, 10, 697, 40,
+ 277, 278, 279, 10, 281, 282, 40, 284, 125, 10,
+ 10, 288, 289, 290, 10, 292, 358, 359, 285, 286,
+ 287, 298, 299, 300, 301, 302, 303, 304, 305, 41,
+ 10, 308, 44, 10, 311, 10, 268, 33, 809, 10,
+ 41, 318, 384, 44, 276, 10, 33, 324, 44, 326,
+ 10, 745, 745, 40, 384, 385, 376, 377, 335, 336,
+ 337, 338, 339, 340, 60, 61, 62, 10, 10, 384,
+ 385, 303, 304, 60, 41, 352, 262, 44, 41, 384,
+ 357, 44, 268, 384, 41, 362, 363, 44, 125, 342,
+ 343, 344, 345, 346, 347, 348, 384, 374, 375, 352,
+ 353, 354, 288, 40, 381, 382, 383, 384, 385, 267,
+ 384, 385, 10, 264, 10, 273, 274, 275, 262, 300,
+ 301, 384, 385, 41, 268, 269, 44, 123, 333, 125,
+ 265, 384, 307, 277, 278, 279, 123, 281, 282, 62,
+ 284, 349, 350, 351, 288, 289, 290, 10, 292, 335,
+ 336, 337, 338, 339, 298, 299, 300, 301, 302, 303,
+ 304, 305, 274, 275, 308, 367, 368, 311, 384, 385,
+ 33, 357, 349, 260, 318, 265, 384, 363, 355, 356,
+ 324, 44, 326, 270, 271, 272, 385, 306, 374, 375,
+ 385, 335, 336, 337, 338, 339, 340, 60, 61, 62,
+ 262, 10, 10, 384, 385, 267, 268, 384, 352, 328,
+ 60, 273, 385, 357, 276, 311, 312, 313, 362, 363,
+ 257, 258, 309, 385, 33, 384, 385, 123, 384, 385,
+ 374, 375, 595, 596, 384, 385, 10, 381, 382, 383,
+ 384, 385, 359, 384, 385, 364, 365, 366, 259, 385,
+ 369, 370, 371, 385, 373, 384, 385, 376, 377, 33,
+ 123, 385, 125, 384, 385, 384, 262, 384, 85, 86,
+ 44, 385, 268, 269, 285, 286, 287, 314, 315, 316,
+ 317, 385, 278, 279, 44, 281, 282, 10, 302, 303,
+ 277, 259, 288, 289, 290, 357, 292, 335, 336, 337,
+ 338, 339, 298, 299, 384, 306, 302, 303, 304, 305,
+ 33, 10, 308, 300, 301, 311, 352, 285, 286, 287,
+ 267, 44, 318, 384, 385, 363, 385, 328, 324, 273,
+ 326, 10, 285, 286, 287, 384, 385, 384, 385, 335,
+ 336, 337, 338, 339, 340, 288, 10, 41, 33, 123,
+ 47, 125, 384, 385, 384, 385, 352, 720, 384, 385,
+ 723, 357, 384, 364, 365, 366, 362, 363, 369, 370,
+ 371, 40, 373, 40, 10, 376, 377, 40, 374, 375,
+ 384, 385, 384, 384, 40, 381, 382, 383, 384, 385,
+ 384, 385, 335, 336, 337, 338, 339, 384, 385, 40,
+ 123, 385, 125, 384, 385, 62, 269, 385, 306, 335,
+ 336, 337, 338, 339, 357, 278, 279, 41, 281, 282,
+ 363, 261, 262, 263, 123, 788, 384, 385, 791, 292,
+ 328, 374, 41, 385, 10, 298, 299, 363, 384, 302,
+ 303, 304, 375, 384, 123, 308, 342, 343, 344, 345,
+ 346, 347, 348, 266, 267, 318, 352, 353, 354, 123,
+ 273, 274, 275, 326, 385, 262, 364, 365, 366, 41,
+ 384, 369, 370, 371, 10, 373, 357, 340, 376, 377,
+ 289, 290, 372, 384, 41, 385, 384, 123, 384, 352,
+ 384, 41, 41, 302, 357, 269, 305, 33, 267, 362,
+ 384, 284, 311, 268, 278, 279, 384, 281, 282, 385,
+ 367, 374, 375, 61, 61, 324, 61, 386, 292, 382,
+ 383, 384, 385, 47, 298, 299, 33, 47, 302, 303,
+ 304, 384, 384, 40, 308, 374, 268, 44, 41, 384,
+ 380, 385, 385, 41, 318, 385, 269, 62, 357, 385,
+ 288, 384, 326, 60, 123, 278, 279, 385, 281, 282,
+ 385, 384, 374, 385, 283, 374, 340, 283, 283, 292,
+ 283, 385, 381, 374, 385, 298, 299, 284, 352, 302,
+ 303, 304, 385, 357, 10, 308, 385, 385, 362, 384,
+ 47, 117, 118, 119, 434, 318, 280, 280, 40, 386,
+ 374, 375, 10, 326, 386, 362, 384, 284, 382, 383,
+ 384, 385, 41, 385, 288, 10, 123, 340, 125, 41,
+ 384, 41, 41, 10, 385, 33, 41, 41, 333, 352,
+ 284, 10, 384, 41, 357, 384, 44, 10, 164, 362,
+ 10, 384, 267, 342, 343, 344, 345, 346, 347, 348,
+ 10, 374, 375, 352, 353, 354, 374, 10, 385, 382,
+ 383, 384, 385, 342, 343, 344, 345, 346, 347, 348,
+ 362, 3, 10, 352, 353, 354, 10, 597, 342, 343,
+ 344, 345, 346, 347, 348, 384, 262, 71, 732, 353,
+ 354, 267, 268, 256, 33, 463, 809, 273, 274, 275,
+ 276, 40, 447, 456, 177, 384, 342, 343, 344, 345,
+ 346, 347, 348, 10, 717, 123, 547, 353, 354, 274,
+ 384, 60, 217, 249, 245, 412, 262, 253, 404, 265,
+ 266, 267, 268, 269, 260, 490, 33, 273, 274, 275,
+ 276, 495, 278, 279, 614, 281, 282, 33, 384, 482,
+ 508, 342, 288, 289, 290, 172, 292, 293, 244, 295,
+ 296, 297, 298, 299, 583, 321, 302, 303, 304, 305,
+ 277, 507, 308, 622, 3, 311, 3, -1, 304, -1,
+ -1, 357, 318, -1, 123, -1, 125, -1, 324, -1,
+ 326, -1, -1, 300, 301, -1, -1, 333, -1, 335,
+ 336, 337, 338, 339, 340, -1, -1, 342, 343, 344,
+ 345, 346, 347, 348, -1, -1, 352, 352, 353, 354,
+ -1, 357, -1, 33, -1, -1, 362, 363, -1, -1,
+ 40, -1, -1, -1, 44, -1, -1, -1, 374, 375,
+ 10, -1, -1, -1, -1, 381, 382, 383, -1, -1,
+ 60, 61, 62, -1, 262, -1, -1, -1, 266, 267,
+ 268, 269, -1, 33, -1, 273, 274, 275, 276, -1,
+ 278, 279, -1, 281, 282, -1, -1, 384, 385, -1,
+ 288, 289, 290, -1, 292, -1, -1, -1, -1, 23,
298, 299, -1, -1, 302, 303, 304, 305, -1, -1,
- 308, -1, -1, 311, -1, -1, -1, 460, -1, 462,
- -1, 464, -1, -1, -1, -1, 324, -1, 326, -1,
- -1, -1, -1, 257, 258, -1, -1, 335, 336, 337,
- 338, 339, 340, -1, -1, 198, 199, 200, -1, 10,
- -1, -1, 350, -1, -1, 353, -1, -1, 356, -1,
- -1, -1, -1, -1, -1, 256, 257, 258, -1, 262,
- 368, 369, 33, 266, 267, 268, 269, 375, 376, 377,
- 273, 274, 275, 276, -1, 278, 279, -1, 281, 282,
- 314, 315, 316, 317, 23, -1, 289, 290, 10, 292,
- 293, -1, 295, 296, 297, 298, 299, 298, -1, 302,
- 303, 304, 305, -1, -1, 308, -1, -1, 311, -1,
- -1, 33, -1, 314, 315, 316, 317, -1, 57, 58,
- 59, 324, -1, 326, 335, 336, 337, 338, 339, 68,
- 333, 294, -1, 33, -1, -1, -1, 340, -1, -1,
- 40, 80, -1, -1, 44, 356, 309, 350, -1, -1,
- 353, -1, -1, -1, 317, -1, 10, -1, -1, -1,
- 60, 61, 62, -1, -1, 368, 369, -1, -1, -1,
- -1, 262, 375, 376, 377, 266, 267, 268, 269, 33,
- -1, -1, 273, 274, 275, 276, -1, 278, 279, -1,
- 281, 282, -1, -1, -1, 358, -1, -1, 289, 290,
- -1, 292, 293, -1, 295, 296, 297, 298, 299, -1,
- -1, 302, 303, 304, 305, -1, -1, 308, -1, -1,
- 311, -1, -1, 123, 10, 125, -1, -1, -1, -1,
- -1, -1, -1, 324, -1, 326, 689, 690, 691, -1,
- -1, -1, 333, 182, 183, -1, -1, 33, 187, 340,
- -1, -1, -1, -1, -1, -1, -1, -1, 197, 350,
- -1, -1, 353, 342, 343, 344, 345, 346, -1, -1,
- -1, 350, 351, 352, -1, -1, 10, 368, 369, 342,
- 343, 344, 345, 346, 375, 376, 377, 350, 351, 352,
- 261, 262, 263, 264, 265, 266, 267, 268, 269, 33,
+ 308, -1, -1, 311, -1, -1, -1, -1, -1, -1,
+ 318, -1, -1, 123, -1, 125, 324, -1, 326, -1,
+ -1, -1, -1, 57, 58, 59, -1, 335, 336, 337,
+ 338, 339, 340, -1, 68, -1, -1, 10, 277, -1,
+ -1, -1, -1, -1, 352, -1, 80, -1, -1, 357,
+ -1, -1, -1, -1, 362, 363, -1, -1, -1, -1,
+ -1, 300, 301, 10, -1, 262, 374, 375, -1, 266,
+ 267, 268, 269, 381, 382, 383, 273, 274, 275, 276,
+ -1, 278, 279, -1, 281, 282, 33, -1, -1, -1,
+ -1, -1, 289, 290, -1, 292, 293, -1, 295, 296,
+ 297, 298, 299, 289, 290, 302, 303, 304, 305, -1,
+ -1, 308, -1, -1, 311, -1, 302, -1, -1, 305,
+ -1, 318, -1, -1, -1, 311, -1, 324, -1, 326,
+ -1, 10, -1, -1, -1, -1, 333, -1, 324, -1,
+ -1, -1, -1, 340, -1, 384, 385, -1, 182, 183,
+ -1, -1, 125, 187, 33, 352, -1, -1, -1, -1,
+ 357, -1, -1, 197, -1, 362, -1, 277, -1, -1,
+ -1, 357, -1, -1, -1, -1, -1, 374, 375, -1,
+ -1, -1, -1, -1, 381, 382, 383, -1, -1, -1,
+ 300, 301, 262, -1, -1, 381, 266, 267, 268, 269,
+ -1, -1, 306, 273, 274, 275, 276, -1, 278, 279,
+ -1, 281, 282, -1, -1, -1, -1, -1, -1, 289,
+ 290, -1, 292, 293, 328, 295, 296, 297, 298, 299,
+ -1, -1, 302, 303, 304, 305, -1, -1, 308, -1,
+ -1, 311, -1, -1, -1, -1, -1, -1, 318, 359,
+ -1, -1, -1, -1, 324, 10, 326, 198, 199, 200,
+ 364, 365, 366, 333, -1, 369, 370, 371, -1, 373,
+ 340, -1, 376, 377, 384, 385, -1, -1, 33, -1,
+ 384, -1, 352, 256, 257, 258, -1, 357, -1, -1,
+ -1, -1, 362, -1, -1, 414, 415, 416, -1, -1,
+ -1, -1, 421, 422, 374, 375, -1, -1, -1, -1,
+ -1, 381, 382, 383, 261, 262, 263, 264, 265, 266,
+ 267, 268, 269, -1, -1, 298, 273, 274, 275, 276,
+ -1, 278, 279, -1, 281, 282, -1, -1, -1, -1,
+ -1, 314, 315, 316, 317, 292, 293, -1, 295, 296,
+ 297, 298, 299, -1, -1, 302, 303, 304, 299, -1,
+ -1, 308, -1, -1, -1, 484, -1, 486, -1, 488,
+ -1, 318, 491, 314, -1, -1, -1, -1, 10, 326,
+ -1, 322, 261, 262, 263, 264, 265, 266, 267, 268,
+ 269, -1, -1, 340, 273, 274, 275, 276, -1, 278,
+ 279, 33, 281, 282, -1, 352, -1, -1, -1, -1,
+ 357, -1, -1, 292, 293, 362, 295, 296, 297, 298,
+ 299, -1, 306, 302, 303, 304, 367, 374, 375, 308,
+ -1, -1, -1, -1, -1, 382, 383, -1, -1, 318,
+ -1, -1, -1, -1, 328, -1, -1, 326, -1, -1,
+ -1, -1, -1, -1, -1, -1, 319, 320, 321, 322,
+ 323, 340, 325, -1, 327, -1, 329, 330, 331, -1,
+ -1, -1, -1, 352, -1, -1, 10, -1, 357, -1,
+ 364, 365, 366, 362, -1, 369, 370, 371, -1, 373,
+ -1, -1, 376, 377, -1, 374, 375, -1, 361, 33,
+ 384, 620, -1, 382, 383, -1, 261, 262, 263, 264,
+ 265, 266, 267, 268, 269, 378, 379, -1, 273, 274,
+ 275, 276, -1, 278, 279, -1, 281, 282, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 292, 293, -1,
+ 295, 296, 297, 298, 299, -1, -1, 302, 303, 304,
+ -1, -1, -1, 308, -1, -1, -1, -1, 10, -1,
+ -1, -1, -1, 318, 342, 343, 344, 345, 346, 347,
+ 348, 326, -1, -1, 352, 353, 354, -1, -1, -1,
+ -1, 33, -1, -1, -1, 340, 10, -1, -1, 123,
+ -1, -1, -1, -1, -1, -1, -1, 352, -1, -1,
+ -1, -1, 357, -1, -1, -1, -1, 362, -1, 33,
+ -1, -1, -1, -1, -1, 734, 735, 736, -1, 374,
+ 375, -1, -1, -1, -1, -1, -1, 382, 383, 261,
+ 262, 263, 264, 265, 266, 267, 268, 269, -1, -1,
+ -1, 273, 274, 275, 276, -1, 278, 279, -1, 281,
+ 282, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 292, 293, -1, 295, 296, 297, 298, 299, -1, -1,
+ 302, 303, 304, -1, -1, -1, 308, 342, 343, 344,
+ 345, 346, 347, 348, -1, -1, 318, -1, 353, 354,
+ -1, -1, -1, -1, 326, -1, -1, -1, -1, 123,
+ -1, -1, -1, -1, -1, -1, -1, -1, 340, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 352, -1, -1, -1, -1, 357, -1, -1, 262, -1,
+ 362, -1, 33, -1, 268, 269, -1, -1, -1, -1,
+ -1, -1, 374, 375, 278, 279, -1, 281, 282, -1,
+ 382, 383, -1, -1, 288, 289, 290, -1, 292, -1,
+ -1, -1, -1, -1, 298, 299, -1, -1, 302, 303,
+ 304, 305, -1, -1, 308, -1, -1, 311, -1, -1,
+ -1, -1, -1, -1, 318, -1, -1, -1, -1, -1,
+ 324, -1, 326, -1, -1, -1, -1, -1, -1, -1,
+ -1, 335, 336, 337, 338, 339, 340, 10, -1, -1,
+ 262, -1, -1, -1, -1, 267, 268, -1, 352, -1,
+ -1, 273, -1, 357, 276, -1, -1, -1, 362, 363,
+ 33, -1, -1, -1, -1, -1, -1, 289, 290, -1,
+ 374, 375, -1, -1, 268, 269, -1, 381, 382, 383,
+ 302, -1, -1, 305, 278, 279, -1, 281, 282, 311,
+ -1, -1, -1, -1, 288, 289, 290, -1, 292, -1,
+ -1, -1, 324, -1, 298, 299, -1, -1, 302, 303,
+ 304, 305, -1, -1, 308, -1, -1, 311, -1, -1,
+ -1, -1, -1, -1, 318, -1, -1, -1, -1, -1,
+ 324, -1, 326, -1, -1, 357, -1, -1, -1, -1,
+ -1, 335, 336, 337, 338, 339, 340, -1, 10, -1,
+ 123, -1, 374, -1, -1, -1, -1, -1, 352, 381,
+ -1, -1, -1, 357, -1, -1, -1, -1, 362, 363,
+ -1, 33, -1, -1, -1, -1, -1, -1, -1, -1,
+ 374, 375, -1, -1, -1, -1, -1, 381, 382, 383,
+ -1, 262, 10, 264, 265, 266, 267, 268, 269, -1,
-1, -1, 273, 274, 275, 276, -1, 278, 279, -1,
- 281, 282, -1, -1, -1, -1, -1, -1, -1, -1,
+ 281, 282, -1, -1, -1, 33, -1, -1, -1, -1,
-1, 292, 293, -1, 295, 296, 297, 298, 299, -1,
-1, 302, 303, 304, -1, -1, -1, 308, -1, -1,
- 262, -1, 264, 265, 266, 267, 268, -1, -1, -1,
- -1, 273, 274, 275, 276, 326, 342, 343, 344, 345,
- 346, -1, -1, -1, -1, 351, 352, 289, 290, 340,
- -1, -1, -1, -1, -1, -1, -1, 277, -1, 350,
- 302, -1, 353, 305, -1, -1, -1, -1, -1, 311,
- -1, -1, -1, 10, -1, -1, -1, 368, 369, -1,
- 300, 301, 324, -1, -1, 376, 377, 261, 262, 263,
- 264, 265, 266, 267, 268, 269, 33, -1, -1, 273,
- 274, 275, 276, -1, 278, 279, -1, 281, 282, -1,
- -1, 353, -1, -1, -1, -1, -1, -1, 292, 293,
- -1, 295, 296, 297, 298, 299, 368, -1, 302, 303,
- 304, -1, -1, 375, 308, -1, 319, 320, 321, 322,
- 323, -1, 325, -1, 327, -1, 329, 330, 331, -1,
- -1, -1, 326, -1, 374, -1, 262, -1, 378, 379,
- 266, 267, 268, -1, -1, 10, 340, 273, 274, 275,
- 276, -1, 355, -1, -1, -1, 350, -1, -1, 353,
- -1, -1, -1, 289, 290, -1, -1, -1, 33, 372,
- 373, -1, -1, -1, 368, 369, 302, -1, -1, 305,
- -1, -1, 376, 377, -1, 311, -1, 261, 262, 263,
- 264, 265, 266, 267, 268, 269, -1, -1, 324, 273,
- 274, 275, 276, -1, 278, 279, -1, 281, 282, -1,
- -1, -1, -1, -1, -1, -1, -1, 306, 292, 293,
- -1, 295, 296, 297, 298, 299, -1, 353, 302, 303,
- 304, -1, -1, -1, 308, -1, -1, 10, -1, 328,
- -1, 10, 368, -1, -1, -1, -1, -1, -1, 375,
- -1, -1, 326, -1, -1, -1, -1, -1, 123, -1,
- 33, -1, -1, -1, 33, -1, 340, -1, 357, 358,
- 359, -1, -1, 362, 363, 364, 350, 366, 367, 353,
- -1, 370, 371, -1, -1, -1, -1, -1, -1, 378,
- -1, -1, -1, -1, 368, 369, -1, -1, -1, -1,
- -1, -1, 376, 377, 261, 262, 263, 264, 265, 266,
- 267, 268, 269, -1, -1, -1, 273, 274, 275, 276,
- -1, 278, 279, -1, 281, 282, -1, -1, -1, -1,
- -1, -1, -1, -1, 306, 292, 293, -1, 295, 296,
- 297, 298, 299, -1, -1, 302, 303, 304, -1, -1,
- 123, 308, -1, 10, -1, -1, 328, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 326,
- -1, -1, -1, -1, -1, -1, 33, -1, -1, -1,
- -1, -1, -1, 340, -1, 357, 358, 359, -1, -1,
- 362, 363, 364, 350, 366, 367, 353, 262, 370, 371,
- -1, -1, -1, 268, 269, -1, 378, -1, -1, 10,
- -1, 368, 369, 278, 279, -1, 281, 282, -1, 376,
- 377, -1, -1, 288, 289, 290, -1, 292, -1, -1,
- -1, -1, 33, 298, 299, -1, -1, 302, 303, 304,
- 305, -1, -1, 308, -1, -1, 311, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 324,
- -1, 326, -1, -1, -1, -1, -1, -1, -1, -1,
- 335, 336, 337, 338, 339, 340, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, 350, -1, -1, 353, -1,
- -1, 356, -1, 262, -1, 268, 269, -1, 267, 268,
- -1, -1, -1, 368, 369, 278, 279, 276, 281, 282,
- 375, 376, 377, -1, 10, 288, 289, 290, -1, 292,
- 289, 290, 123, -1, -1, 298, 299, -1, -1, 302,
- 303, 304, 305, 302, -1, 308, 305, 33, 311, -1,
- -1, -1, 311, -1, -1, -1, -1, -1, -1, -1,
- -1, 324, -1, 326, -1, 324, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 318, -1, -1,
+ -1, -1, -1, -1, -1, 326, -1, -1, -1, 10,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, 340,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 352, 33, -1, -1, -1, 357, -1, -1, -1,
+ -1, 362, -1, -1, -1, -1, 269, -1, -1, -1,
+ -1, -1, -1, 374, 375, 278, 279, -1, 281, 282,
+ -1, 382, 383, -1, -1, 288, 289, 290, -1, 292,
+ -1, -1, -1, -1, -1, 298, 299, -1, -1, 302,
+ 303, 304, 305, -1, -1, 308, -1, -1, 311, -1,
+ -1, -1, -1, -1, -1, 318, -1, -1, -1, -1,
+ 10, 324, -1, 326, -1, -1, -1, -1, -1, -1,
-1, -1, 335, 336, 337, 338, 339, 340, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, 350, -1, -1,
- 353, -1, -1, 356, 353, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, 368, 369, -1, -1, 368,
- -1, -1, 375, 376, 377, 262, 375, 264, 265, 266,
- 267, 268, 269, -1, -1, -1, 273, 274, 275, 276,
- -1, 278, 279, -1, 281, 282, 10, -1, -1, -1,
- -1, -1, -1, -1, -1, 292, 293, -1, 295, 296,
- 297, 298, 299, -1, -1, 302, 303, 304, 10, 33,
- -1, 308, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, 269, 326,
- -1, 33, -1, -1, -1, -1, -1, 278, 279, -1,
- 281, 282, -1, 340, -1, -1, -1, 288, 289, 290,
- -1, 292, -1, 350, -1, -1, 353, 298, 299, -1,
- -1, 302, 303, 304, 305, -1, -1, 308, -1, -1,
- 311, 368, 369, -1, -1, -1, -1, -1, -1, 376,
- 377, -1, -1, 324, -1, 326, -1, -1, -1, -1,
+ -1, -1, -1, 33, -1, -1, -1, -1, -1, 352,
+ -1, -1, -1, -1, 357, -1, -1, -1, -1, 362,
+ 363, -1, 10, -1, -1, -1, -1, -1, -1, -1,
+ 262, 374, 375, -1, -1, 267, 268, 269, 381, 382,
+ 383, 273, 274, 275, 276, 33, 278, 279, -1, 281,
+ 282, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 292, -1, -1, -1, -1, -1, 298, 299, -1, -1,
+ 302, 303, 304, -1, 262, -1, 308, -1, -1, -1,
+ 268, -1, -1, -1, -1, -1, 318, -1, -1, -1,
+ 10, -1, -1, -1, 326, -1, -1, -1, -1, -1,
+ -1, 289, 290, 335, 336, 337, 338, 339, 340, -1,
+ -1, -1, -1, 33, 302, -1, -1, 305, -1, -1,
+ 352, -1, -1, 311, -1, 357, -1, -1, -1, -1,
+ 362, 363, -1, -1, -1, 123, 324, -1, -1, -1,
+ -1, 262, 374, 375, -1, -1, 267, 268, 269, -1,
+ 382, 383, 273, 274, 275, 276, -1, 278, 279, -1,
+ 281, 282, -1, -1, -1, -1, -1, -1, -1, 357,
+ -1, 292, -1, -1, -1, -1, -1, 298, 299, -1,
+ -1, 302, 303, 304, -1, -1, 374, 308, -1, -1,
+ -1, -1, -1, 381, -1, -1, -1, 318, -1, -1,
+ -1, 10, -1, -1, -1, 326, -1, -1, -1, -1,
-1, -1, -1, -1, 335, 336, 337, 338, 339, 340,
- -1, -1, -1, -1, -1, -1, 10, -1, -1, 350,
- -1, -1, 353, -1, -1, 356, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 262, 368, 369, 33,
- -1, 267, 268, 269, 375, 376, 377, 273, 274, 275,
- 276, -1, 278, 279, -1, 281, 282, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 292, -1, -1, -1,
- -1, 10, 298, 299, -1, -1, 302, 303, 304, 10,
- -1, -1, 308, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 33, -1, -1, -1, -1, -1,
- 326, -1, 33, -1, -1, -1, -1, -1, -1, 335,
- 336, 337, 338, 339, 340, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 350, -1, -1, 353, -1, 123,
- 356, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, 368, 369, -1, -1, -1, -1, 262, -1,
- 376, 377, -1, 267, 268, 269, -1, -1, -1, 273,
- 274, 275, 276, -1, 278, 279, -1, 281, 282, -1,
- 262, -1, -1, -1, -1, -1, 268, -1, 292, -1,
- -1, -1, -1, -1, 298, 299, -1, -1, 302, 303,
- 304, 10, -1, -1, 308, -1, -1, 289, 290, -1,
+ -1, 352, 262, -1, -1, -1, 357, 267, 268, -1,
+ -1, 362, 363, 273, 274, 275, 276, -1, -1, -1,
+ -1, -1, -1, 374, 375, -1, -1, -1, 288, -1,
+ -1, 382, 383, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 261, 262, 263, -1, 265, 266, 267,
+ 268, 269, -1, -1, -1, 273, 274, 275, 276, -1,
+ 278, 279, -1, 281, 282, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 292, 335, 336, 337, 338, 339,
+ 298, 299, 10, -1, 302, 303, 304, -1, -1, -1,
+ 308, -1, -1, -1, -1, -1, -1, 357, -1, -1,
+ 318, -1, -1, 363, -1, 33, -1, -1, 326, -1,
+ -1, -1, 262, -1, 374, 375, 266, 267, 268, 269,
+ -1, -1, 340, 273, 274, 275, 276, -1, 278, 279,
+ -1, 281, 282, -1, 352, -1, -1, -1, -1, 357,
+ -1, -1, 292, 293, 362, 295, 296, 297, 298, 299,
+ -1, -1, 302, 303, 304, -1, 374, 375, 308, -1,
+ -1, -1, -1, -1, 382, 383, -1, -1, 318, -1,
+ -1, -1, -1, -1, -1, -1, 326, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 302, -1, 326, 305, 33, -1, -1, -1, -1, 311,
- -1, 335, 336, 337, 338, 339, 340, -1, -1, -1,
- -1, -1, 324, -1, -1, -1, 350, -1, -1, 353,
- -1, -1, 356, -1, -1, -1, -1, 10, -1, -1,
- -1, -1, -1, -1, 368, 369, -1, -1, -1, -1,
- -1, 353, 376, 377, -1, -1, -1, 261, 262, 263,
- 33, 265, 266, 267, 268, 269, 368, -1, -1, 273,
- 274, 275, 276, 375, 278, 279, -1, 281, 282, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, 292, -1,
- -1, -1, -1, 10, 298, 299, -1, -1, 302, 303,
- 304, -1, -1, -1, 308, -1, -1, -1, -1, -1,
- -1, -1, -1, 262, -1, -1, 33, -1, -1, 268,
- -1, 262, 326, -1, -1, 266, 267, 268, 269, -1,
- -1, -1, 273, 274, 275, 276, 340, 278, 279, 288,
- 281, 282, -1, -1, -1, -1, 350, -1, -1, 353,
- 123, 292, 293, -1, 295, 296, 297, 298, 299, -1,
- -1, 302, 303, 304, 368, 369, -1, 308, -1, -1,
- -1, -1, 376, 377, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, 326, 335, 336, 337, 338,
- 339, -1, -1, -1, -1, -1, -1, -1, -1, 340,
- -1, -1, -1, -1, 353, -1, 123, 356, -1, 350,
- -1, -1, 353, -1, 10, -1, -1, -1, 10, 368,
- 369, -1, -1, -1, -1, -1, -1, 368, 369, -1,
- -1, -1, -1, 262, -1, 376, 377, 33, 267, 268,
- 269, 33, -1, -1, 273, 274, 275, 276, -1, 278,
+ 340, -1, -1, 10, -1, 123, -1, -1, -1, -1,
+ -1, -1, 352, -1, -1, -1, -1, 357, -1, -1,
+ -1, -1, 362, -1, -1, -1, 33, -1, -1, -1,
+ -1, -1, -1, 262, 374, 375, -1, -1, 267, 268,
+ 269, -1, 382, 383, 273, 274, 275, 276, -1, 278,
279, -1, 281, 282, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 292, 293, -1, 295, 296, 297, 298,
299, -1, -1, 302, 303, 304, -1, -1, -1, 308,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 262,
- -1, -1, 265, 266, 267, 268, 269, 326, -1, -1,
- 273, 274, 275, 276, -1, 278, 279, -1, 281, 282,
- -1, 340, -1, -1, -1, -1, -1, -1, -1, 292,
- -1, 350, -1, -1, 353, 298, 299, 123, -1, 302,
- 303, 304, 10, -1, -1, 308, -1, -1, -1, 368,
- 369, -1, -1, -1, -1, 262, -1, 376, 377, 266,
- 267, 268, 269, 326, -1, 33, 273, 274, 275, 276,
- -1, 278, 279, -1, 281, 282, -1, 340, -1, -1,
- -1, -1, -1, -1, -1, 292, -1, 350, -1, -1,
- 353, 298, 299, -1, -1, 302, 303, 304, 10, -1,
- -1, 308, -1, -1, -1, 368, 369, -1, -1, -1,
- -1, -1, -1, 376, 377, -1, -1, -1, -1, 326,
- -1, 33, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 340, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 350, -1, -1, 353, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, 318,
+ -1, -1, -1, -1, -1, -1, -1, 326, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, 368, 369, -1, -1, -1, -1, -1, -1, 376,
- 377, -1, -1, -1, -1, -1, 262, -1, -1, -1,
- 262, 267, 268, 269, -1, 267, 268, 273, 274, 275,
- 276, 273, 278, 279, 276, 281, 282, -1, -1, -1,
- -1, -1, -1, 10, -1, -1, 292, 289, 290, -1,
- -1, -1, 298, 299, -1, -1, 302, 303, 304, -1,
- 302, -1, 308, 305, -1, -1, 33, -1, -1, 311,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 326, -1, 324, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 340, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 350, -1, 10, 353, -1, -1,
- -1, 353, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, 368, 369, -1, -1, 368, -1, -1, 33,
- 376, 377, -1, 375, 262, -1, -1, -1, -1, 267,
- 268, 269, -1, -1, -1, 273, 274, 275, 276, -1,
- 278, 279, -1, 281, 282, -1, 123, -1, -1, 10,
+ -1, 340, -1, -1, 10, -1, 123, -1, -1, -1,
+ -1, -1, -1, 352, -1, -1, -1, -1, 357, -1,
+ -1, -1, -1, 362, -1, -1, -1, 33, -1, -1,
+ -1, -1, -1, -1, 262, 374, 375, 265, 266, 267,
+ 268, 269, -1, 382, 383, 273, 274, 275, 276, -1,
+ 278, 279, -1, 281, 282, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 292, -1, -1, -1, -1, -1,
- 298, 299, -1, -1, 302, 303, 304, -1, -1, -1,
- 308, -1, 33, -1, -1, -1, -1, -1, -1, -1,
- 262, -1, -1, -1, -1, 267, 268, 269, 326, -1,
- -1, 273, 274, 275, 276, -1, 278, 279, -1, 281,
- 282, -1, 340, -1, -1, -1, -1, -1, -1, -1,
- 292, -1, 350, -1, -1, 353, 298, 299, -1, -1,
- 302, 303, 304, -1, -1, -1, 308, -1, -1, 10,
- 368, 369, -1, -1, -1, -1, -1, -1, 376, 377,
- -1, -1, -1, -1, 326, -1, -1, -1, -1, -1,
- -1, -1, 33, -1, -1, -1, -1, -1, 340, -1,
- -1, -1, 123, -1, -1, -1, -1, -1, 350, -1,
- -1, 353, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, 262, 368, 369, -1, -1,
- 267, 268, 269, -1, 376, 377, 273, -1, -1, 276,
- -1, 278, 279, -1, 281, 282, -1, -1, -1, 10,
+ 298, 299, 10, -1, 302, 303, 304, -1, -1, -1,
+ 308, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 318, -1, -1, -1, -1, 33, -1, -1, 326, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 340, -1, -1, -1, -1, 123, -1, -1,
+ -1, -1, -1, -1, 352, -1, -1, -1, -1, 357,
+ -1, -1, -1, -1, 362, -1, -1, -1, -1, -1,
+ 10, -1, -1, -1, -1, 262, 374, 375, -1, 266,
+ 267, 268, 269, -1, 382, 383, 273, 274, 275, 276,
+ -1, 278, 279, 33, 281, 282, -1, -1, -1, -1,
-1, -1, -1, -1, -1, 292, -1, -1, -1, -1,
-1, 298, 299, -1, -1, 302, 303, 304, -1, -1,
- -1, 308, 33, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, 262, 326,
- -1, -1, -1, 267, 268, 269, -1, -1, -1, 273,
- -1, -1, 276, 340, 278, 279, -1, 281, 282, -1,
- -1, -1, 10, 350, -1, -1, 353, -1, 292, -1,
- -1, -1, -1, -1, 298, 299, -1, -1, 302, 303,
- 304, 368, 369, -1, 308, 33, -1, -1, -1, 376,
- 377, 262, -1, -1, -1, -1, 267, 268, 269, -1,
- -1, -1, 326, -1, -1, 276, -1, 278, 279, -1,
- 281, 282, 123, -1, -1, 10, 340, -1, -1, -1,
- -1, 292, -1, -1, -1, -1, 350, 298, 299, 353,
- -1, 302, 303, 304, -1, -1, -1, 308, 33, -1,
- -1, -1, -1, -1, 368, 369, -1, -1, -1, -1,
- -1, -1, 376, 377, -1, 326, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 340,
- -1, 262, -1, -1, -1, -1, 267, 268, 269, 350,
- -1, -1, 353, -1, -1, 276, -1, 278, 279, -1,
- 281, 282, -1, -1, -1, 10, -1, 368, 369, -1,
- -1, 292, -1, -1, -1, 376, 377, 298, 299, -1,
- -1, 302, 303, 304, -1, -1, -1, 308, 33, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, 123, -1,
- -1, -1, -1, -1, -1, 326, -1, -1, 10, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 340,
- -1, 262, -1, -1, -1, -1, -1, 268, 269, 350,
- -1, 33, 353, -1, -1, -1, -1, 278, 279, -1,
- 281, 282, -1, -1, -1, 10, -1, 368, 369, -1,
- -1, 292, -1, -1, -1, 376, 377, 298, 299, -1,
- -1, 302, 303, 304, -1, -1, -1, 308, 33, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, 123, -1,
- -1, -1, -1, -1, 262, 326, -1, -1, -1, -1,
- 268, 269, -1, -1, -1, -1, -1, -1, -1, 340,
- 278, 279, -1, 281, 282, -1, 10, -1, 10, 350,
- -1, -1, 353, -1, 292, -1, -1, -1, -1, -1,
- 298, 299, -1, -1, 302, 303, 304, 368, 369, 33,
- 308, 33, -1, -1, -1, 376, 377, -1, -1, -1,
- -1, -1, -1, -1, 269, -1, -1, -1, 326, -1,
- -1, -1, -1, 278, 279, -1, 281, 282, 123, -1,
- -1, -1, 340, 10, -1, -1, -1, 292, -1, -1,
- -1, -1, 350, 298, 299, 353, -1, 302, 303, 304,
- -1, -1, -1, 308, -1, -1, 33, -1, -1, -1,
- 368, 369, -1, -1, -1, -1, -1, -1, 376, 377,
- -1, 326, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, 340, -1, -1, -1, 10,
- -1, -1, -1, -1, 269, 350, -1, -1, 353, -1,
- -1, -1, -1, 278, 279, -1, 281, 282, -1, -1,
- -1, 10, 33, 368, 369, -1, -1, 292, -1, -1,
- -1, 376, 377, 298, 299, -1, -1, 302, 303, 304,
- 262, -1, -1, 308, 33, 267, 268, -1, -1, -1,
- -1, 273, 274, 275, 276, -1, 10, -1, -1, -1,
- -1, 326, -1, -1, -1, -1, -1, 289, 290, -1,
- -1, -1, -1, -1, -1, 340, -1, -1, -1, 33,
- 302, -1, -1, 305, 269, 350, -1, -1, 353, 311,
- -1, -1, -1, 278, 279, -1, 281, 282, -1, -1,
- -1, -1, 324, 368, 369, -1, -1, 292, -1, -1,
- -1, 376, 377, 298, 299, -1, -1, 302, 303, 304,
- -1, -1, -1, 308, -1, -1, -1, -1, -1, -1,
- -1, 353, -1, -1, -1, -1, -1, -1, 262, -1,
- -1, 326, 266, 267, 268, -1, 368, 269, -1, 273,
- 274, 275, 276, 375, -1, 340, 278, 279, 33, 281,
- 282, -1, -1, -1, 288, 350, -1, -1, 353, -1,
- 292, -1, -1, -1, -1, -1, 298, 299, -1, -1,
- 302, 303, 304, 368, 369, -1, 308, -1, -1, -1,
- -1, 376, 377, -1, -1, 262, -1, -1, -1, 266,
- 267, 268, -1, -1, 326, -1, 273, 274, 275, 276,
- -1, 335, 336, 337, 338, 339, -1, -1, 340, -1,
- -1, 288, -1, -1, -1, -1, -1, -1, 350, 353,
- -1, 353, 356, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 368, 369, 368, 369, -1, -1,
- -1, 262, -1, -1, 376, 377, 267, 268, -1, -1,
- -1, -1, 273, 274, 275, 276, -1, -1, 335, 336,
- 337, 338, 339, 262, -1, -1, -1, 288, 267, 268,
- -1, -1, -1, -1, 273, -1, 353, 276, -1, 356,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 288,
- -1, 368, 369, -1, -1, -1, -1, -1, 262, -1,
- -1, -1, -1, 267, 268, -1, -1, -1, -1, -1,
- -1, -1, 276, -1, 335, 336, 337, 338, 339, -1,
- -1, -1, -1, -1, 288, -1, -1, -1, -1, -1,
- -1, -1, 353, -1, -1, 356, 335, 336, 337, 338,
- 339, -1, -1, -1, -1, -1, -1, 368, 369, -1,
- -1, -1, -1, -1, 353, -1, -1, 356, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 368,
- 369, 335, 336, 337, 338, 339, -1, -1, -1, -1,
- -1, -1, -1, -1, 269, -1, -1, -1, -1, 353,
- -1, -1, 356, 278, 279, -1, 281, 282, -1, -1,
- -1, -1, -1, -1, 368, 369, -1, 292, -1, -1,
- -1, -1, -1, 298, 299, -1, -1, 302, 303, 304,
- -1, -1, -1, 308, -1, -1, -1, -1, -1, -1,
+ -1, 308, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 318, -1, -1, -1, -1, -1, -1, 10, 326,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, 326, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 340, -1, -1, -1, -1, -1, -1,
+ -1, 33, -1, -1, -1, 352, -1, -1, -1, -1,
+ 357, -1, -1, -1, -1, 362, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 262, 374, 375, -1,
+ -1, 267, 268, 269, -1, 382, 383, 273, 274, 275,
+ 276, -1, 278, 279, -1, 281, 282, 10, -1, -1,
+ -1, -1, -1, -1, -1, -1, 292, -1, -1, -1,
+ -1, -1, 298, 299, -1, -1, 302, 303, 304, -1,
+ 33, -1, 308, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 318, -1, -1, -1, -1, -1, -1, -1,
+ 326, 123, -1, -1, 262, 10, -1, -1, -1, 267,
+ 268, 269, -1, -1, 340, 273, 274, 275, 276, -1,
+ 278, 279, -1, 281, 282, -1, 352, -1, 33, -1,
+ -1, 357, -1, -1, 292, -1, 362, -1, -1, -1,
+ 298, 299, -1, -1, 302, 303, 304, -1, 374, 375,
+ 308, -1, -1, -1, -1, -1, 382, 383, -1, -1,
+ 318, -1, -1, -1, -1, -1, -1, -1, 326, -1,
+ -1, -1, 262, -1, 10, -1, -1, 267, 268, 269,
+ -1, -1, 340, 273, 274, 275, 276, -1, 278, 279,
+ -1, 281, 282, -1, 352, -1, -1, 33, -1, 357,
+ -1, -1, 292, -1, 362, -1, -1, -1, 298, 299,
+ -1, -1, 302, 303, 304, -1, 374, 375, 308, -1,
+ -1, -1, -1, -1, 382, 383, 10, -1, 318, -1,
+ -1, -1, -1, -1, -1, -1, 326, -1, -1, -1,
+ 262, -1, -1, -1, -1, 267, 268, 269, -1, 33,
+ 340, 273, -1, -1, 276, -1, 278, 279, -1, 281,
+ 282, -1, 352, -1, -1, -1, -1, 357, -1, -1,
+ 292, -1, 362, -1, -1, -1, 298, 299, -1, -1,
+ 302, 303, 304, -1, 374, 375, 308, 123, -1, -1,
+ -1, -1, 382, 383, -1, -1, 318, -1, -1, -1,
+ -1, -1, -1, -1, 326, -1, -1, -1, -1, 262,
+ -1, -1, -1, -1, 267, 268, 269, -1, 340, -1,
+ 273, -1, -1, 276, 10, 278, 279, -1, 281, 282,
+ 352, -1, -1, -1, -1, 357, -1, -1, -1, 292,
+ 362, -1, -1, -1, -1, 298, 299, 33, -1, 302,
+ 303, 304, 374, 375, -1, 308, -1, 262, -1, -1,
+ 382, 383, 267, 268, -1, 318, -1, -1, 273, 274,
+ 275, 276, -1, 326, -1, 10, -1, -1, -1, -1,
+ -1, -1, -1, -1, 289, 290, -1, 340, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 302, 33, 352,
+ 305, -1, -1, -1, 357, -1, 311, -1, -1, 362,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, 324,
+ -1, 374, 375, -1, -1, -1, 262, -1, -1, 382,
+ 383, 267, 268, 269, -1, -1, -1, 123, -1, -1,
+ 276, -1, 278, 279, -1, 281, 282, -1, -1, -1,
+ -1, -1, 357, -1, -1, -1, 292, -1, -1, -1,
+ -1, -1, 298, 299, 10, -1, 302, 303, 304, 374,
+ -1, -1, 308, -1, -1, -1, 381, -1, 262, -1,
+ -1, -1, 318, 267, 268, 269, -1, 33, -1, -1,
+ 326, -1, 276, -1, 278, 279, -1, 281, 282, -1,
+ -1, -1, -1, -1, 340, -1, -1, -1, 292, -1,
+ 10, -1, -1, -1, 298, 299, 352, -1, 302, 303,
+ 304, 357, -1, -1, 308, -1, 362, -1, -1, -1,
+ -1, -1, -1, 33, 318, -1, -1, -1, 374, 375,
+ -1, -1, 326, -1, -1, -1, 382, 383, -1, -1,
+ -1, -1, -1, -1, -1, -1, 340, -1, -1, -1,
+ -1, -1, 10, -1, -1, -1, -1, -1, 352, -1,
+ -1, -1, -1, 357, -1, -1, 262, 123, 362, -1,
+ -1, -1, 268, 269, -1, 33, -1, -1, -1, -1,
+ 374, 375, 278, 279, -1, 281, 282, -1, 382, 383,
+ -1, -1, -1, -1, -1, -1, 292, -1, 10, -1,
+ -1, -1, 298, 299, -1, -1, 302, 303, 304, -1,
+ -1, -1, 308, 123, -1, -1, -1, 262, -1, -1,
+ -1, 33, 318, 268, 269, -1, -1, -1, -1, -1,
+ 326, -1, -1, 278, 279, -1, 281, 282, -1, -1,
+ -1, -1, -1, -1, 340, -1, -1, 292, -1, -1,
+ -1, -1, -1, 298, 299, -1, 352, 302, 303, 304,
+ -1, 357, -1, 308, -1, 123, 362, -1, -1, -1,
+ -1, -1, -1, 318, -1, -1, -1, -1, 374, 375,
+ -1, 326, -1, -1, -1, -1, 382, 383, -1, -1,
-1, -1, -1, -1, -1, 340, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, 350, -1, -1, 353, -1,
+ -1, 10, -1, -1, -1, -1, -1, 352, -1, -1,
+ -1, -1, 357, 269, -1, -1, -1, 362, -1, -1,
+ -1, 10, 278, 279, 33, 281, 282, -1, -1, 374,
+ 375, -1, -1, -1, -1, -1, 292, 382, 383, -1,
+ -1, -1, 298, 299, 33, -1, 302, 303, 304, -1,
+ -1, -1, 308, -1, 10, -1, -1, -1, -1, 269,
+ -1, -1, 318, -1, -1, -1, -1, -1, 278, 279,
+ 326, 281, 282, -1, -1, -1, -1, 33, -1, -1,
+ -1, -1, 292, -1, 340, -1, 10, -1, 298, 299,
+ -1, -1, 302, 303, 304, -1, 352, -1, 308, -1,
+ -1, 357, -1, -1, -1, -1, 362, -1, 318, 33,
+ -1, 269, -1, -1, -1, -1, 326, -1, 374, 375,
+ 278, 279, -1, 281, 282, -1, 382, 383, -1, -1,
+ 340, -1, 10, -1, 292, -1, -1, -1, -1, 10,
+ 298, 299, 352, -1, 302, 303, 304, 357, -1, -1,
+ 308, -1, 362, -1, -1, 33, -1, 269, -1, -1,
+ 318, -1, 33, -1, 374, 375, 278, 279, 326, 281,
+ 282, -1, 382, 383, -1, -1, -1, -1, -1, -1,
+ 292, -1, 340, -1, -1, -1, 298, 299, -1, -1,
+ 302, 303, 304, -1, 352, -1, 308, -1, -1, 357,
+ -1, -1, -1, -1, 362, -1, 318, -1, -1, -1,
+ -1, -1, -1, -1, 326, -1, 374, 375, -1, -1,
+ -1, -1, -1, -1, 382, 383, -1, -1, 340, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 352, -1, 33, -1, -1, 357, -1, -1, -1, -1,
+ 362, -1, -1, 262, -1, 264, 265, 266, 267, 268,
+ -1, -1, 374, 375, 273, 274, 275, 276, -1, -1,
+ 382, 383, -1, 262, -1, -1, -1, 266, 267, 268,
+ 289, 290, -1, -1, 273, 274, 275, 276, -1, -1,
+ -1, -1, -1, 302, -1, -1, 305, -1, -1, -1,
+ 289, 290, 311, -1, -1, -1, 262, -1, -1, -1,
+ 266, 267, 268, 302, -1, 324, 305, 273, 274, 275,
+ 276, -1, 311, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 288, -1, -1, 324, -1, -1, 262, -1,
+ -1, -1, 266, 267, 268, -1, -1, -1, 357, 273,
+ 274, 275, 276, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 288, 374, -1, -1, 357, -1,
+ -1, -1, 381, -1, -1, -1, -1, -1, -1, 335,
+ 336, 337, 338, 339, 262, 374, -1, -1, -1, 267,
+ 268, 262, 381, -1, -1, 273, 267, 268, 276, -1,
+ -1, 357, -1, -1, -1, 276, -1, 363, -1, -1,
+ 288, 335, 336, 337, 338, 339, -1, 288, 374, 375,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 369, -1, -1, -1, -1, -1,
- -1, 376, 377,
+ -1, -1, -1, 357, -1, -1, -1, -1, -1, 363,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 374, 375, -1, -1, -1, -1, -1, 335, 336, 337,
+ 338, 339, -1, -1, 335, 336, 337, 338, 339, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, 269, 357,
+ -1, -1, -1, -1, -1, 363, 357, 278, 279, -1,
+ 281, 282, 363, -1, -1, -1, 374, 375, -1, -1,
+ -1, 292, -1, 374, 375, -1, -1, 298, 299, -1,
+ -1, 302, 303, 304, -1, -1, -1, 308, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 318, -1, -1,
+ -1, -1, -1, -1, -1, 326, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, 340,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 352, -1, -1, -1, -1, 357, -1, -1, -1,
+ -1, 362, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 375, -1, -1, -1, -1, -1,
+ -1, 382, 383,
};
#define YYFINAL 2
#ifndef YYDEBUG
#define YYDEBUG 0
#endif
-#define YYMAXTOKEN 380
-#define YYUNDFTOKEN 539
+#define YYMAXTOKEN 386
+#define YYUNDFTOKEN 557
#define YYTRANSLATE(a) ((a) > YYMAXTOKEN ? YYUNDFTOKEN : (a))
#if YYDEBUG
static const char *const pfctlyname[] = {
@@ -2010,17 +2069,18 @@ static const char *const pfctlyname[] = {
"BINATANCHOR","SET","OPTIMIZATION","TIMEOUT","LIMIT","LOGINTERFACE",
"BLOCKPOLICY","RANDOMID","REQUIREORDER","SYNPROXY","FINGERPRINTS","NOSYNC",
"DEBUG","SKIP","HOSTID","ANTISPOOF","FOR","INCLUDE","BITMASK","RANDOM",
-"SOURCEHASH","ROUNDROBIN","STATICPORT","PROBABILITY","ALTQ","CBQ","PRIQ","HFSC",
-"BANDWIDTH","TBRSIZE","LINKSHARE","REALTIME","UPPERLIMIT","QUEUE","PRIORITY",
-"QLIMIT","RTABLE","LOAD","RULESET_OPTIMIZATION","STICKYADDRESS","MAXSRCSTATES",
+"SOURCEHASH","ROUNDROBIN","STATICPORT","PROBABILITY","ALTQ","CBQ","CODEL",
+"PRIQ","HFSC","FAIRQ","BANDWIDTH","TBRSIZE","LINKSHARE","REALTIME","UPPERLIMIT",
+"QUEUE","PRIORITY","QLIMIT","HOGS","BUCKETS","RTABLE","TARGET","INTERVAL",
+"LOAD","RULESET_OPTIMIZATION","PRIO","STICKYADDRESS","MAXSRCSTATES",
"MAXSRCNODES","SOURCETRACK","GLOBAL","RULE","MAXSRCCONN","MAXSRCCONNRATE",
-"OVERLOAD","FLUSH","SLOPPY","PFLOW","TAGGED","TAG","IFBOUND","FLOATING",
-"STATEPOLICY","STATEDEFAULTS","ROUTE","SETTOS","DIVERTTO","DIVERTREPLY",
-"STRING","NUMBER","PORTBINARY",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+"OVERLOAD","FLUSH","SLOPPY","TAGGED","TAG","IFBOUND","FLOATING","STATEPOLICY",
+"STATEDEFAULTS","ROUTE","SETTOS","DIVERTTO","DIVERTREPLY","STRING","NUMBER",
+"PORTBINARY",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,"illegal-symbol",
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"illegal-symbol",
};
static const char *const pfctlyrule[] = {
"$accept : ruleset",
@@ -2153,6 +2213,10 @@ static const char *const pfctlyrule[] = {
"scheduler : PRIQ '(' priqflags_list ')'",
"scheduler : HFSC",
"scheduler : HFSC '(' hfsc_opts ')'",
+"scheduler : FAIRQ",
+"scheduler : FAIRQ '(' fairq_opts ')'",
+"scheduler : CODEL",
+"scheduler : CODEL '(' codel_opts ')'",
"cbqflags_list : cbqflags_item",
"cbqflags_list : cbqflags_list comma cbqflags_item",
"cbqflags_item : STRING",
@@ -2170,6 +2234,22 @@ static const char *const pfctlyrule[] = {
"hfscopts_item : UPPERLIMIT bandwidth",
"hfscopts_item : UPPERLIMIT '(' bandwidth comma NUMBER comma bandwidth ')'",
"hfscopts_item : STRING",
+"$$7 :",
+"fairq_opts : $$7 fairqopts_list",
+"fairqopts_list : fairqopts_item",
+"fairqopts_list : fairqopts_list comma fairqopts_item",
+"fairqopts_item : LINKSHARE bandwidth",
+"fairqopts_item : LINKSHARE '(' bandwidth number bandwidth ')'",
+"fairqopts_item : HOGS bandwidth",
+"fairqopts_item : BUCKETS number",
+"fairqopts_item : STRING",
+"$$8 :",
+"codel_opts : $$8 codelopts_list",
+"codelopts_list : codelopts_item",
+"codelopts_list : codelopts_list comma codelopts_item",
+"codelopts_item : INTERVAL number",
+"codelopts_item : TARGET number",
+"codelopts_item : STRING",
"qassign :",
"qassign : qassign_item",
"qassign : '{' optnl qassign_list '}'",
@@ -2177,8 +2257,8 @@ static const char *const pfctlyrule[] = {
"qassign_list : qassign_list comma qassign_item optnl",
"qassign_item : STRING",
"pfrule : action dir logquick interface route af proto fromto filter_opts",
-"$$7 :",
-"filter_opts : $$7 filter_opts_l",
+"$$9 :",
+"filter_opts : $$9 filter_opts_l",
"filter_opts :",
"filter_opts_l : filter_opts_l filter_opt",
"filter_opts_l : filter_opt",
@@ -2186,6 +2266,7 @@ static const char *const pfctlyrule[] = {
"filter_opt : GROUP gids",
"filter_opt : flags",
"filter_opt : icmpspec",
+"filter_opt : PRIO NUMBER",
"filter_opt : TOS tos",
"filter_opt : keep",
"filter_opt : FRAGMENT",
@@ -2199,6 +2280,14 @@ static const char *const pfctlyrule[] = {
"filter_opt : DIVERTTO portplain",
"filter_opt : DIVERTTO STRING PORT portplain",
"filter_opt : DIVERTREPLY",
+"filter_opt : filter_sets",
+"filter_sets : SET '(' filter_sets_l ')'",
+"filter_sets : SET filter_set",
+"filter_sets_l : filter_sets_l comma filter_set",
+"filter_sets_l : filter_set",
+"filter_set : prio",
+"prio : PRIO NUMBER",
+"prio : PRIO '(' NUMBER comma NUMBER ')'",
"probability : STRING",
"probability : NUMBER",
"action : PASS",
@@ -2287,7 +2376,6 @@ static const char *const pfctlyrule[] = {
"host : dynaddr",
"host : dynaddr '/' NUMBER",
"host : '<' STRING '>'",
-"host : ROUTE STRING",
"number : NUMBER",
"number : STRING",
"dynaddr : '(' STRING ')'",
@@ -2368,7 +2456,6 @@ static const char *const pfctlyrule[] = {
"state_opt_item : sourcetrack",
"state_opt_item : statelock",
"state_opt_item : SLOPPY",
-"state_opt_item : PFLOW",
"state_opt_item : STRING NUMBER",
"label : LABEL STRING",
"qname : QUEUE STRING",
@@ -2386,8 +2473,8 @@ static const char *const pfctlyrule[] = {
"redirpool : ARROW redirspec PORT portstar",
"hashkey :",
"hashkey : string",
-"$$8 :",
-"pool_opts : $$8 pool_opts_l",
+"$$10 :",
+"pool_opts : $$10 pool_opts_l",
"pool_opts :",
"pool_opts_l : pool_opts_l pool_opt",
"pool_opts_l : pool_opt",
@@ -2426,6 +2513,7 @@ static const char *const pfctlyrule[] = {
"route : REPLYTO routespec pool_opts",
"route : DUPTO routespec pool_opts",
"timeout_spec : STRING NUMBER",
+"timeout_spec : INTERVAL NUMBER",
"timeout_list : timeout_list comma timeout_spec optnl",
"timeout_list : timeout_spec optnl",
"limit_spec : STRING NUMBER",
@@ -2478,7 +2566,12 @@ typedef struct {
} YYSTACKDATA;
/* variables for the parser stack */
static YYSTACKDATA yystack;
-#line 4397 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 4545 "../../freebsd/sbin/pfctl/parse.y"
+#ifdef __rtems__
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE pfctlyval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE pfctlylval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static YYSTACKDATA yystack);
+#endif /* __rtems__ */
int
yyerror(const char *fmt, ...)
@@ -2907,7 +3000,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces,
if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) {
FREE_LIST(struct node_if, interfaces);
- FREE_LIST(struct node_queue, nqueues);
+ if (nqueues)
+ FREE_LIST(struct node_queue, nqueues);
return (0);
}
@@ -2998,7 +3092,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces,
}
);
FREE_LIST(struct node_if, interfaces);
- FREE_LIST(struct node_queue, nqueues);
+ if (nqueues)
+ FREE_LIST(struct node_queue, nqueues);
return (errs);
}
@@ -3401,8 +3496,10 @@ lookup(char *s)
{ "bitmask", BITMASK},
{ "block", BLOCK},
{ "block-policy", BLOCKPOLICY},
+ { "buckets", BUCKETS},
{ "cbq", CBQ},
{ "code", CODE},
+ { "codelq", CODEL},
{ "crop", FRAGCROP},
{ "debug", DEBUG},
{ "divert-reply", DIVERTREPLY},
@@ -3410,6 +3507,7 @@ lookup(char *s)
{ "drop", DROP},
{ "drop-ovl", FRAGDROP},
{ "dup-to", DUPTO},
+ { "fairq", FAIRQ},
{ "fastroute", FASTROUTE},
{ "file", FILENAME},
{ "fingerprints", FINGERPRINTS},
@@ -3422,6 +3520,7 @@ lookup(char *s)
{ "global", GLOBAL},
{ "group", GROUP},
{ "hfsc", HFSC},
+ { "hogs", HOGS},
{ "hostid", HOSTID},
{ "icmp-type", ICMPTYPE},
{ "icmp6-type", ICMP6TYPE},
@@ -3430,6 +3529,7 @@ lookup(char *s)
{ "include", INCLUDE},
{ "inet", INET},
{ "inet6", INET6},
+ { "interval", INTERVAL},
{ "keep", KEEP},
{ "label", LABEL},
{ "limit", LIMIT},
@@ -3457,8 +3557,8 @@ lookup(char *s)
{ "out", OUT},
{ "overload", OVERLOAD},
{ "pass", PASS},
- { "pflow", PFLOW},
{ "port", PORT},
+ { "prio", PRIO},
{ "priority", PRIORITY},
{ "priq", PRIQ},
{ "probability", PROBABILITY},
@@ -3500,6 +3600,7 @@ lookup(char *s)
{ "table", TABLE},
{ "tag", TAG},
{ "tagged", TAGGED},
+ { "target", TARGET},
{ "tbrsize", TBRSIZE},
{ "timeout", TIMEOUT},
{ "to", TO},
@@ -3527,17 +3628,10 @@ lookup(char *s)
#define MAXPUSHBACK 128
-#ifndef __rtems__
-char *parsebuf;
-int parseindex;
-char pushback_buffer[MAXPUSHBACK];
-int pushback_index = 0;
-#else /* __rtems__ */
static char *parsebuf;
static int parseindex;
static char pushback_buffer[MAXPUSHBACK];
static int pushback_index = 0;
-#endif /* __rtems__ */
int
lgetc(int quotec)
@@ -4212,17 +4306,14 @@ rt_tableid_max(void)
/*
* As the OpenBSD code only compares > and not >= we need to adjust
* here given we only accept values of 0..n and want to avoid #ifdefs
- * in the grammer.
+ * in the grammar.
*/
return (fibs - 1);
#else
return (RT_TABLEID_MAX);
#endif
}
-#ifdef __rtems__
-#include "parse-data.h"
-#endif /* __rtems__ */
-#line 4226 "pfctly.tab.c"
+#line 4317 "pfctly.tab.c"
#if YYDEBUG
#include <stdio.h> /* needed for printf */
@@ -4425,11 +4516,11 @@ yyreduce:
switch (yyn)
{
case 17:
-#line 578 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 553 "../../freebsd/sbin/pfctl/parse.y"
{ file->errors++; }
break;
case 18:
-#line 581 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 556 "../../freebsd/sbin/pfctl/parse.y"
{
struct file *nfile;
@@ -4445,7 +4536,7 @@ case 18:
}
break;
case 25:
-#line 608 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 583 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "none"))
yyval.v.i = 0;
@@ -4460,7 +4551,7 @@ case 25:
}
break;
case 26:
-#line 622 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 597 "../../freebsd/sbin/pfctl/parse.y"
{
if (check_rulestate(PFCTL_STATE_OPTION)) {
free(yystack.l_mark[0].v.string);
@@ -4475,7 +4566,7 @@ case 26:
}
break;
case 27:
-#line 634 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 609 "../../freebsd/sbin/pfctl/parse.y"
{
if (!(pf->opts & PF_OPT_OPTIMIZE)) {
pf->opts |= PF_OPT_OPTIMIZE;
@@ -4484,7 +4575,7 @@ case 27:
}
break;
case 32:
-#line 644 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 619 "../../freebsd/sbin/pfctl/parse.y"
{
if (check_rulestate(PFCTL_STATE_OPTION)) {
free(yystack.l_mark[0].v.string);
@@ -4499,7 +4590,7 @@ case 32:
}
break;
case 33:
-#line 656 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 631 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number == 0 || yystack.l_mark[0].v.number > UINT_MAX) {
yyerror("hostid must be non-zero");
@@ -4512,7 +4603,7 @@ case 33:
}
break;
case 34:
-#line 666 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 641 "../../freebsd/sbin/pfctl/parse.y"
{
if (pf->opts & PF_OPT_VERBOSE)
printf("set block-policy drop\n");
@@ -4522,7 +4613,7 @@ case 34:
}
break;
case 35:
-#line 673 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 648 "../../freebsd/sbin/pfctl/parse.y"
{
if (pf->opts & PF_OPT_VERBOSE)
printf("set block-policy return\n");
@@ -4532,7 +4623,7 @@ case 35:
}
break;
case 36:
-#line 680 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 655 "../../freebsd/sbin/pfctl/parse.y"
{
if (pf->opts & PF_OPT_VERBOSE)
printf("set require-order %s\n",
@@ -4541,7 +4632,7 @@ case 36:
}
break;
case 37:
-#line 686 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 661 "../../freebsd/sbin/pfctl/parse.y"
{
if (pf->opts & PF_OPT_VERBOSE)
printf("set fingerprints \"%s\"\n", yystack.l_mark[0].v.string);
@@ -4562,7 +4653,7 @@ case 37:
}
break;
case 38:
-#line 704 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 679 "../../freebsd/sbin/pfctl/parse.y"
{
if (pf->opts & PF_OPT_VERBOSE)
switch (yystack.l_mark[0].v.i) {
@@ -4577,7 +4668,7 @@ case 38:
}
break;
case 39:
-#line 716 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 691 "../../freebsd/sbin/pfctl/parse.y"
{
if (check_rulestate(PFCTL_STATE_OPTION)) {
free(yystack.l_mark[0].v.string);
@@ -4592,7 +4683,7 @@ case 39:
}
break;
case 40:
-#line 728 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 703 "../../freebsd/sbin/pfctl/parse.y"
{
if (expand_skip_interface(yystack.l_mark[0].v.interface) != 0) {
yyerror("error setting skip interface(s)");
@@ -4601,7 +4692,7 @@ case 40:
}
break;
case 41:
-#line 734 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 709 "../../freebsd/sbin/pfctl/parse.y"
{
if (keep_state_defaults != NULL) {
yyerror("cannot redefine state-defaults");
@@ -4611,11 +4702,11 @@ case 41:
}
break;
case 42:
-#line 743 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 718 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.string = yystack.l_mark[0].v.string; }
break;
case 43:
-#line 744 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 719 "../../freebsd/sbin/pfctl/parse.y"
{
if ((yyval.v.string = strdup("all")) == NULL) {
err(1, "stringall: strdup");
@@ -4623,7 +4714,7 @@ case 43:
}
break;
case 44:
-#line 751 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 726 "../../freebsd/sbin/pfctl/parse.y"
{
if (asprintf(&yyval.v.string, "%s %s", yystack.l_mark[-1].v.string, yystack.l_mark[0].v.string) == -1)
err(1, "string: asprintf");
@@ -4632,7 +4723,7 @@ case 44:
}
break;
case 46:
-#line 760 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 735 "../../freebsd/sbin/pfctl/parse.y"
{
if (asprintf(&yyval.v.string, "%s %s", yystack.l_mark[-1].v.string, yystack.l_mark[0].v.string) == -1)
err(1, "string: asprintf");
@@ -4641,7 +4732,7 @@ case 46:
}
break;
case 48:
-#line 769 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 744 "../../freebsd/sbin/pfctl/parse.y"
{
char *s;
if (asprintf(&s, "%lld", (long long)yystack.l_mark[0].v.number) == -1) {
@@ -4652,7 +4743,7 @@ case 48:
}
break;
case 50:
-#line 780 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 755 "../../freebsd/sbin/pfctl/parse.y"
{
if (pf->opts & PF_OPT_VERBOSE)
printf("%s = \"%s\"\n", yystack.l_mark[-2].v.string, yystack.l_mark[0].v.string);
@@ -4663,15 +4754,15 @@ case 50:
}
break;
case 51:
-#line 790 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 765 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.string = yystack.l_mark[0].v.string; }
break;
case 52:
-#line 791 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 766 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.string = NULL; }
break;
case 57:
-#line 801 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 776 "../../freebsd/sbin/pfctl/parse.y"
{
char ta[PF_ANCHOR_NAME_SIZE];
struct pf_ruleset *rs;
@@ -4691,7 +4782,7 @@ case 57:
}
break;
case 58:
-#line 818 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 793 "../../freebsd/sbin/pfctl/parse.y"
{
pf->alast = pf->anchor;
pf->asd--;
@@ -4699,7 +4790,7 @@ case 58:
}
break;
case 60:
-#line 828 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 803 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
struct node_proto *proto;
@@ -4810,6 +4901,17 @@ case 60:
YYERROR;
}
r.match_tag_not = yystack.l_mark[-1].v.filter_opts.match_tag_not;
+ if (yystack.l_mark[-1].v.filter_opts.marker & FOM_PRIO) {
+ if (yystack.l_mark[-1].v.filter_opts.prio == 0)
+ r.prio = PF_PRIO_ZERO;
+ else
+ r.prio = yystack.l_mark[-1].v.filter_opts.prio;
+ }
+ if (yystack.l_mark[-1].v.filter_opts.marker & FOM_SETPRIO) {
+ r.set_prio[0] = yystack.l_mark[-1].v.filter_opts.set_prio[0];
+ r.set_prio[1] = yystack.l_mark[-1].v.filter_opts.set_prio[1];
+ r.scrub_flags |= PFSTATE_SETPRIO;
+ }
decide_address_family(yystack.l_mark[-2].v.fromto.src.host, &r.af);
decide_address_family(yystack.l_mark[-2].v.fromto.dst.host, &r.af);
@@ -4823,7 +4925,7 @@ case 60:
}
break;
case 61:
-#line 949 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 935 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
@@ -4847,7 +4949,7 @@ case 61:
}
break;
case 62:
-#line 970 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 956 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
@@ -4892,7 +4994,7 @@ case 62:
}
break;
case 63:
-#line 1012 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 998 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
@@ -4930,7 +5032,7 @@ case 63:
}
break;
case 64:
-#line 1049 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1035 "../../freebsd/sbin/pfctl/parse.y"
{
struct loadanchors *loadanchor;
@@ -4963,7 +5065,7 @@ case 64:
}
break;
case 65:
-#line 1080 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1066 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = yyval.v.b.w = 0;
if (yystack.l_mark[-1].v.i)
@@ -4973,7 +5075,7 @@ case 65:
}
break;
case 66:
-#line 1090 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1076 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
@@ -5031,18 +5133,18 @@ case 66:
}
break;
case 67:
-#line 1147 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1133 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&scrub_opts, sizeof scrub_opts);
scrub_opts.rtableid = -1;
}
break;
case 68:
-#line 1152 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1138 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.scrub_opts = scrub_opts; }
break;
case 69:
-#line 1153 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1139 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&scrub_opts, sizeof scrub_opts);
scrub_opts.rtableid = -1;
@@ -5050,7 +5152,7 @@ case 69:
}
break;
case 72:
-#line 1164 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1150 "../../freebsd/sbin/pfctl/parse.y"
{
if (scrub_opts.nodf) {
yyerror("no-df cannot be respecified");
@@ -5060,7 +5162,7 @@ case 72:
}
break;
case 73:
-#line 1171 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1157 "../../freebsd/sbin/pfctl/parse.y"
{
if (scrub_opts.marker & SOM_MINTTL) {
yyerror("min-ttl cannot be respecified");
@@ -5075,7 +5177,7 @@ case 73:
}
break;
case 74:
-#line 1183 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1169 "../../freebsd/sbin/pfctl/parse.y"
{
if (scrub_opts.marker & SOM_MAXMSS) {
yyerror("max-mss cannot be respecified");
@@ -5090,7 +5192,7 @@ case 74:
}
break;
case 75:
-#line 1195 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1181 "../../freebsd/sbin/pfctl/parse.y"
{
if (scrub_opts.marker & SOM_SETTOS) {
yyerror("set-tos cannot be respecified");
@@ -5101,7 +5203,7 @@ case 75:
}
break;
case 76:
-#line 1203 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1189 "../../freebsd/sbin/pfctl/parse.y"
{
if (scrub_opts.marker & SOM_FRAGCACHE) {
yyerror("fragcache cannot be respecified");
@@ -5112,7 +5214,7 @@ case 76:
}
break;
case 77:
-#line 1211 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1197 "../../freebsd/sbin/pfctl/parse.y"
{
if (strcasecmp(yystack.l_mark[0].v.string, "tcp") != 0) {
yyerror("scrub reassemble supports only tcp, "
@@ -5129,7 +5231,7 @@ case 77:
}
break;
case 78:
-#line 1225 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1211 "../../freebsd/sbin/pfctl/parse.y"
{
if (scrub_opts.randomid) {
yyerror("random-id cannot be respecified");
@@ -5139,7 +5241,7 @@ case 78:
}
break;
case 79:
-#line 1232 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1218 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > rt_tableid_max()) {
yyerror("invalid rtable id");
@@ -5149,26 +5251,26 @@ case 79:
}
break;
case 80:
-#line 1239 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1225 "../../freebsd/sbin/pfctl/parse.y"
{
scrub_opts.match_tag = yystack.l_mark[0].v.string;
scrub_opts.match_tag_not = yystack.l_mark[-2].v.number;
}
break;
case 81:
-#line 1245 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1231 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = 0; /* default */ }
break;
case 82:
-#line 1246 "../../freebsd/contrib/pf/pfctl/parse.y"
- { yyval.v.i = PFRULE_FRAGCROP; }
+#line 1232 "../../freebsd/sbin/pfctl/parse.y"
+ { yyval.v.i = 0; }
break;
case 83:
-#line 1247 "../../freebsd/contrib/pf/pfctl/parse.y"
- { yyval.v.i = PFRULE_FRAGDROP; }
+#line 1233 "../../freebsd/sbin/pfctl/parse.y"
+ { yyval.v.i = 0; }
break;
case 84:
-#line 1250 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1236 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
struct node_host *h = NULL, *hh;
@@ -5256,19 +5358,19 @@ case 84:
}
break;
case 85:
-#line 1337 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1323 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[0].v.interface; }
break;
case 86:
-#line 1338 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1324 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[-1].v.interface; }
break;
case 87:
-#line 1341 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1327 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[-1].v.interface; }
break;
case 88:
-#line 1342 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1328 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.interface->tail->next = yystack.l_mark[-1].v.interface;
yystack.l_mark[-3].v.interface->tail = yystack.l_mark[-1].v.interface;
@@ -5276,29 +5378,29 @@ case 88:
}
break;
case 89:
-#line 1349 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1335 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[0].v.interface; }
break;
case 90:
-#line 1350 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1336 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-1].v.interface->dynamic = 1;
yyval.v.interface = yystack.l_mark[-1].v.interface;
}
break;
case 91:
-#line 1356 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1342 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&antispoof_opts, sizeof antispoof_opts);
antispoof_opts.rtableid = -1;
}
break;
case 92:
-#line 1361 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1347 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.antispoof_opts = antispoof_opts; }
break;
case 93:
-#line 1362 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1348 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&antispoof_opts, sizeof antispoof_opts);
antispoof_opts.rtableid = -1;
@@ -5306,7 +5408,7 @@ case 93:
}
break;
case 96:
-#line 1373 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1359 "../../freebsd/sbin/pfctl/parse.y"
{
if (antispoof_opts.label) {
yyerror("label cannot be redefined");
@@ -5316,7 +5418,7 @@ case 96:
}
break;
case 97:
-#line 1380 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1366 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > rt_tableid_max()) {
yyerror("invalid rtable id");
@@ -5326,15 +5428,15 @@ case 97:
}
break;
case 98:
-#line 1389 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1375 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.number = 1; }
break;
case 99:
-#line 1390 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1376 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.number = 0; }
break;
case 100:
-#line 1393 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1379 "../../freebsd/sbin/pfctl/parse.y"
{
struct node_host *h, *nh;
struct node_tinit *ti, *nti;
@@ -5365,18 +5467,18 @@ case 100:
}
break;
case 101:
-#line 1423 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1409 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&table_opts, sizeof table_opts);
SIMPLEQ_INIT(&table_opts.init_nodes);
}
break;
case 102:
-#line 1428 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1414 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.table_opts = table_opts; }
break;
case 103:
-#line 1430 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1416 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&table_opts, sizeof table_opts);
SIMPLEQ_INIT(&table_opts.init_nodes);
@@ -5384,7 +5486,7 @@ case 103:
}
break;
case 106:
-#line 1441 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1427 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "const"))
table_opts.flags |= PFR_TFLAG_CONST;
@@ -5401,11 +5503,11 @@ case 106:
}
break;
case 107:
-#line 1455 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1441 "../../freebsd/sbin/pfctl/parse.y"
{ table_opts.init_addr = 1; }
break;
case 108:
-#line 1456 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1442 "../../freebsd/sbin/pfctl/parse.y"
{
struct node_host *n;
struct node_tinit *ti;
@@ -5448,7 +5550,7 @@ case 108:
}
break;
case 109:
-#line 1496 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1482 "../../freebsd/sbin/pfctl/parse.y"
{
struct node_tinit *ti;
@@ -5461,7 +5563,7 @@ case 109:
}
break;
case 110:
-#line 1508 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1494 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_altq a;
@@ -5476,7 +5578,7 @@ case 110:
a.scheduler = yystack.l_mark[-2].v.queue_opts.scheduler.qtype;
a.qlimit = yystack.l_mark[-2].v.queue_opts.qlimit;
a.tbrsize = yystack.l_mark[-2].v.queue_opts.tbrsize;
- if (yystack.l_mark[0].v.queue == NULL) {
+ if (yystack.l_mark[0].v.queue == NULL && yystack.l_mark[-2].v.queue_opts.scheduler.qtype != ALTQT_CODEL) {
yyerror("no child queues specified");
YYERROR;
}
@@ -5486,7 +5588,7 @@ case 110:
}
break;
case 111:
-#line 1532 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1518 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_altq a;
@@ -5524,7 +5626,7 @@ case 111:
}
break;
case 112:
-#line 1569 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1555 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&queue_opts, sizeof queue_opts);
queue_opts.priority = DEFAULT_PRIORITY;
@@ -5534,11 +5636,11 @@ case 112:
}
break;
case 113:
-#line 1577 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1563 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.queue_opts = queue_opts; }
break;
case 114:
-#line 1578 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1564 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&queue_opts, sizeof queue_opts);
queue_opts.priority = DEFAULT_PRIORITY;
@@ -5549,7 +5651,7 @@ case 114:
}
break;
case 117:
-#line 1592 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1578 "../../freebsd/sbin/pfctl/parse.y"
{
if (queue_opts.marker & QOM_BWSPEC) {
yyerror("bandwidth cannot be respecified");
@@ -5560,7 +5662,7 @@ case 117:
}
break;
case 118:
-#line 1600 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1586 "../../freebsd/sbin/pfctl/parse.y"
{
if (queue_opts.marker & QOM_PRIORITY) {
yyerror("priority cannot be respecified");
@@ -5575,7 +5677,7 @@ case 118:
}
break;
case 119:
-#line 1612 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1598 "../../freebsd/sbin/pfctl/parse.y"
{
if (queue_opts.marker & QOM_QLIMIT) {
yyerror("qlimit cannot be respecified");
@@ -5590,7 +5692,7 @@ case 119:
}
break;
case 120:
-#line 1624 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1610 "../../freebsd/sbin/pfctl/parse.y"
{
if (queue_opts.marker & QOM_SCHEDULER) {
yyerror("scheduler cannot be respecified");
@@ -5601,7 +5703,7 @@ case 120:
}
break;
case 121:
-#line 1632 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1618 "../../freebsd/sbin/pfctl/parse.y"
{
if (queue_opts.marker & QOM_TBRSIZE) {
yyerror("tbrsize cannot be respecified");
@@ -5616,7 +5718,7 @@ case 121:
}
break;
case 122:
-#line 1646 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1632 "../../freebsd/sbin/pfctl/parse.y"
{
double bps;
char *cp;
@@ -5625,13 +5727,22 @@ case 122:
bps = strtod(yystack.l_mark[0].v.string, &cp);
if (cp != NULL) {
+ if (strlen(cp) > 1) {
+ char *cu = cp + 1;
+ if (!strcmp(cu, "Bit") ||
+ !strcmp(cu, "B") ||
+ !strcmp(cu, "bit") ||
+ !strcmp(cu, "b")) {
+ *cu = 0;
+ }
+ }
if (!strcmp(cp, "b"))
; /* nothing */
- else if (!strcmp(cp, "Kb"))
+ else if (!strcmp(cp, "K"))
bps *= 1000;
- else if (!strcmp(cp, "Mb"))
+ else if (!strcmp(cp, "M"))
bps *= 1000 * 1000;
- else if (!strcmp(cp, "Gb"))
+ else if (!strcmp(cp, "G"))
bps *= 1000 * 1000 * 1000;
else if (!strcmp(cp, "%")) {
if (bps < 0 || bps > 100) {
@@ -5653,7 +5764,7 @@ case 122:
}
break;
case 123:
-#line 1680 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1675 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > UINT_MAX) {
yyerror("bandwidth number too big");
@@ -5664,35 +5775,35 @@ case 123:
}
break;
case 124:
-#line 1690 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1685 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.queue_options.qtype = ALTQT_CBQ;
yyval.v.queue_options.data.cbq_opts.flags = 0;
}
break;
case 125:
-#line 1694 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1689 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.queue_options.qtype = ALTQT_CBQ;
yyval.v.queue_options.data.cbq_opts.flags = yystack.l_mark[-1].v.number;
}
break;
case 126:
-#line 1698 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1693 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.queue_options.qtype = ALTQT_PRIQ;
yyval.v.queue_options.data.priq_opts.flags = 0;
}
break;
case 127:
-#line 1702 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1697 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.queue_options.qtype = ALTQT_PRIQ;
yyval.v.queue_options.data.priq_opts.flags = yystack.l_mark[-1].v.number;
}
break;
case 128:
-#line 1706 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1701 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.queue_options.qtype = ALTQT_HFSC;
bzero(&yyval.v.queue_options.data.hfsc_opts,
@@ -5700,22 +5811,52 @@ case 128:
}
break;
case 129:
-#line 1711 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1706 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.queue_options.qtype = ALTQT_HFSC;
yyval.v.queue_options.data.hfsc_opts = yystack.l_mark[-1].v.hfsc_opts;
}
break;
case 130:
-#line 1717 "../../freebsd/contrib/pf/pfctl/parse.y"
- { yyval.v.number |= yystack.l_mark[0].v.number; }
+#line 1710 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ yyval.v.queue_options.qtype = ALTQT_FAIRQ;
+ bzero(&yyval.v.queue_options.data.fairq_opts,
+ sizeof(struct node_fairq_opts));
+ }
break;
case 131:
-#line 1718 "../../freebsd/contrib/pf/pfctl/parse.y"
- { yyval.v.number |= yystack.l_mark[0].v.number; }
+#line 1715 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ yyval.v.queue_options.qtype = ALTQT_FAIRQ;
+ yyval.v.queue_options.data.fairq_opts = yystack.l_mark[-1].v.fairq_opts;
+ }
break;
case 132:
-#line 1721 "../../freebsd/contrib/pf/pfctl/parse.y"
+#line 1719 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ yyval.v.queue_options.qtype = ALTQT_CODEL;
+ bzero(&yyval.v.queue_options.data.codel_opts,
+ sizeof(struct codel_opts));
+ }
+break;
+case 133:
+#line 1724 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ yyval.v.queue_options.qtype = ALTQT_CODEL;
+ yyval.v.queue_options.data.codel_opts = yystack.l_mark[-1].v.codel_opts;
+ }
+break;
+case 134:
+#line 1730 "../../freebsd/sbin/pfctl/parse.y"
+ { yyval.v.number |= yystack.l_mark[0].v.number; }
+break;
+case 135:
+#line 1731 "../../freebsd/sbin/pfctl/parse.y"
+ { yyval.v.number |= yystack.l_mark[0].v.number; }
+break;
+case 136:
+#line 1734 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "default"))
yyval.v.number = CBQCLF_DEFCLASS;
@@ -5727,6 +5868,8 @@ case 132:
yyval.v.number = CBQCLF_RED|CBQCLF_ECN;
else if (!strcmp(yystack.l_mark[0].v.string, "rio"))
yyval.v.number = CBQCLF_RIO;
+ else if (!strcmp(yystack.l_mark[0].v.string, "codel"))
+ yyval.v.number = CBQCLF_CODEL;
else {
yyerror("unknown cbq flag \"%s\"", yystack.l_mark[0].v.string);
free(yystack.l_mark[0].v.string);
@@ -5735,16 +5878,16 @@ case 132:
free(yystack.l_mark[0].v.string);
}
break;
-case 133:
-#line 1741 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 137:
+#line 1756 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.number |= yystack.l_mark[0].v.number; }
break;
-case 134:
-#line 1742 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 138:
+#line 1757 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.number |= yystack.l_mark[0].v.number; }
break;
-case 135:
-#line 1745 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 139:
+#line 1760 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "default"))
yyval.v.number = PRCF_DEFAULTCLASS;
@@ -5754,6 +5897,8 @@ case 135:
yyval.v.number = PRCF_RED|PRCF_ECN;
else if (!strcmp(yystack.l_mark[0].v.string, "rio"))
yyval.v.number = PRCF_RIO;
+ else if (!strcmp(yystack.l_mark[0].v.string, "codel"))
+ yyval.v.number = PRCF_CODEL;
else {
yyerror("unknown priq flag \"%s\"", yystack.l_mark[0].v.string);
free(yystack.l_mark[0].v.string);
@@ -5762,21 +5907,21 @@ case 135:
free(yystack.l_mark[0].v.string);
}
break;
-case 136:
-#line 1763 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 140:
+#line 1780 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&hfsc_opts,
sizeof(struct node_hfsc_opts));
}
break;
-case 137:
-#line 1767 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 141:
+#line 1784 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.hfsc_opts = hfsc_opts;
}
break;
-case 140:
-#line 1776 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 144:
+#line 1793 "../../freebsd/sbin/pfctl/parse.y"
{
if (hfsc_opts.linkshare.used) {
yyerror("linkshare already specified");
@@ -5786,8 +5931,8 @@ case 140:
hfsc_opts.linkshare.used = 1;
}
break;
-case 141:
-#line 1785 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 145:
+#line 1802 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-3].v.number < 0 || yystack.l_mark[-3].v.number > INT_MAX) {
yyerror("timing in curve out of range");
@@ -5803,8 +5948,8 @@ case 141:
hfsc_opts.linkshare.used = 1;
}
break;
-case 142:
-#line 1799 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 146:
+#line 1816 "../../freebsd/sbin/pfctl/parse.y"
{
if (hfsc_opts.realtime.used) {
yyerror("realtime already specified");
@@ -5814,8 +5959,8 @@ case 142:
hfsc_opts.realtime.used = 1;
}
break;
-case 143:
-#line 1808 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 147:
+#line 1825 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-3].v.number < 0 || yystack.l_mark[-3].v.number > INT_MAX) {
yyerror("timing in curve out of range");
@@ -5831,8 +5976,8 @@ case 143:
hfsc_opts.realtime.used = 1;
}
break;
-case 144:
-#line 1822 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 148:
+#line 1839 "../../freebsd/sbin/pfctl/parse.y"
{
if (hfsc_opts.upperlimit.used) {
yyerror("upperlimit already specified");
@@ -5842,8 +5987,8 @@ case 144:
hfsc_opts.upperlimit.used = 1;
}
break;
-case 145:
-#line 1831 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 149:
+#line 1848 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-3].v.number < 0 || yystack.l_mark[-3].v.number > INT_MAX) {
yyerror("timing in curve out of range");
@@ -5859,8 +6004,8 @@ case 145:
hfsc_opts.upperlimit.used = 1;
}
break;
-case 146:
-#line 1845 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 150:
+#line 1862 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "default"))
hfsc_opts.flags |= HFCF_DEFAULTCLASS;
@@ -5870,6 +6015,8 @@ case 146:
hfsc_opts.flags |= HFCF_RED|HFCF_ECN;
else if (!strcmp(yystack.l_mark[0].v.string, "rio"))
hfsc_opts.flags |= HFCF_RIO;
+ else if (!strcmp(yystack.l_mark[0].v.string, "codel"))
+ hfsc_opts.flags |= HFCF_CODEL;
else {
yyerror("unknown hfsc flag \"%s\"", yystack.l_mark[0].v.string);
free(yystack.l_mark[0].v.string);
@@ -5878,32 +6025,148 @@ case 146:
free(yystack.l_mark[0].v.string);
}
break;
-case 147:
-#line 1863 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 151:
+#line 1882 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ bzero(&fairq_opts,
+ sizeof(struct node_fairq_opts));
+ }
+break;
+case 152:
+#line 1886 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ yyval.v.fairq_opts = fairq_opts;
+ }
+break;
+case 155:
+#line 1895 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (fairq_opts.linkshare.used) {
+ yyerror("linkshare already specified");
+ YYERROR;
+ }
+ fairq_opts.linkshare.m2 = yystack.l_mark[0].v.queue_bwspec;
+ fairq_opts.linkshare.used = 1;
+ }
+break;
+case 156:
+#line 1903 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (fairq_opts.linkshare.used) {
+ yyerror("linkshare already specified");
+ YYERROR;
+ }
+ fairq_opts.linkshare.m1 = yystack.l_mark[-3].v.queue_bwspec;
+ fairq_opts.linkshare.d = yystack.l_mark[-2].v.number;
+ fairq_opts.linkshare.m2 = yystack.l_mark[-1].v.queue_bwspec;
+ fairq_opts.linkshare.used = 1;
+ }
+break;
+case 157:
+#line 1913 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ fairq_opts.hogs_bw = yystack.l_mark[0].v.queue_bwspec;
+ }
+break;
+case 158:
+#line 1916 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ fairq_opts.nbuckets = yystack.l_mark[0].v.number;
+ }
+break;
+case 159:
+#line 1919 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (!strcmp(yystack.l_mark[0].v.string, "default"))
+ fairq_opts.flags |= FARF_DEFAULTCLASS;
+ else if (!strcmp(yystack.l_mark[0].v.string, "red"))
+ fairq_opts.flags |= FARF_RED;
+ else if (!strcmp(yystack.l_mark[0].v.string, "ecn"))
+ fairq_opts.flags |= FARF_RED|FARF_ECN;
+ else if (!strcmp(yystack.l_mark[0].v.string, "rio"))
+ fairq_opts.flags |= FARF_RIO;
+ else if (!strcmp(yystack.l_mark[0].v.string, "codel"))
+ fairq_opts.flags |= FARF_CODEL;
+ else {
+ yyerror("unknown fairq flag \"%s\"", yystack.l_mark[0].v.string);
+ free(yystack.l_mark[0].v.string);
+ YYERROR;
+ }
+ free(yystack.l_mark[0].v.string);
+ }
+break;
+case 160:
+#line 1939 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ bzero(&codel_opts,
+ sizeof(struct codel_opts));
+ }
+break;
+case 161:
+#line 1943 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ yyval.v.codel_opts = codel_opts;
+ }
+break;
+case 164:
+#line 1952 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (codel_opts.interval) {
+ yyerror("interval already specified");
+ YYERROR;
+ }
+ codel_opts.interval = yystack.l_mark[0].v.number;
+ }
+break;
+case 165:
+#line 1959 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (codel_opts.target) {
+ yyerror("target already specified");
+ YYERROR;
+ }
+ codel_opts.target = yystack.l_mark[0].v.number;
+ }
+break;
+case 166:
+#line 1966 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (!strcmp(yystack.l_mark[0].v.string, "ecn"))
+ codel_opts.ecn = 1;
+ else {
+ yyerror("unknown codel option \"%s\"", yystack.l_mark[0].v.string);
+ free(yystack.l_mark[0].v.string);
+ YYERROR;
+ }
+ free(yystack.l_mark[0].v.string);
+ }
+break;
+case 167:
+#line 1978 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.queue = NULL; }
break;
-case 148:
-#line 1864 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 168:
+#line 1979 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.queue = yystack.l_mark[0].v.queue; }
break;
-case 149:
-#line 1865 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 169:
+#line 1980 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.queue = yystack.l_mark[-1].v.queue; }
break;
-case 150:
-#line 1868 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 170:
+#line 1983 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.queue = yystack.l_mark[-1].v.queue; }
break;
-case 151:
-#line 1869 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 171:
+#line 1984 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.queue->tail->next = yystack.l_mark[-1].v.queue;
yystack.l_mark[-3].v.queue->tail = yystack.l_mark[-1].v.queue;
yyval.v.queue = yystack.l_mark[-3].v.queue;
}
break;
-case 152:
-#line 1876 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 172:
+#line 1991 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.queue = calloc(1, sizeof(struct node_queue));
if (yyval.v.queue == NULL)
@@ -5921,8 +6184,8 @@ case 152:
yyval.v.queue->tail = yyval.v.queue;
}
break;
-case 153:
-#line 1896 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 173:
+#line 2011 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
struct node_state_opt *o;
@@ -5961,6 +6224,18 @@ case 153:
r.prob = yystack.l_mark[0].v.filter_opts.prob;
r.rtableid = yystack.l_mark[0].v.filter_opts.rtableid;
+ if (yystack.l_mark[0].v.filter_opts.marker & FOM_PRIO) {
+ if (yystack.l_mark[0].v.filter_opts.prio == 0)
+ r.prio = PF_PRIO_ZERO;
+ else
+ r.prio = yystack.l_mark[0].v.filter_opts.prio;
+ }
+ if (yystack.l_mark[0].v.filter_opts.marker & FOM_SETPRIO) {
+ r.set_prio[0] = yystack.l_mark[0].v.filter_opts.set_prio[0];
+ r.set_prio[1] = yystack.l_mark[0].v.filter_opts.set_prio[1];
+ r.scrub_flags |= PFSTATE_SETPRIO;
+ }
+
r.af = yystack.l_mark[-3].v.i;
if (yystack.l_mark[0].v.filter_opts.tag)
if (strlcpy(r.tagname, yystack.l_mark[0].v.filter_opts.tag,
@@ -6163,15 +6438,6 @@ case 153:
}
r.rule_flag |= PFRULE_STATESLOPPY;
break;
- case PF_STATE_OPT_PFLOW:
- if (r.rule_flag & PFRULE_PFLOW) {
- yyerror("state pflow "
- "option: multiple "
- "definitions");
- YYERROR;
- }
- r.rule_flag |= PFRULE_PFLOW;
- break;
case PF_STATE_OPT_TIMEOUT:
if (o->data.timeout.number ==
PFTM_ADAPTIVE_START ||
@@ -6343,43 +6609,43 @@ case 153:
yystack.l_mark[0].v.filter_opts.uid, yystack.l_mark[0].v.filter_opts.gid, yystack.l_mark[0].v.filter_opts.icmpspec, "");
}
break;
-case 154:
-#line 2317 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 174:
+#line 2435 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&filter_opts, sizeof filter_opts);
filter_opts.rtableid = -1;
}
break;
-case 155:
-#line 2322 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 175:
+#line 2440 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.filter_opts = filter_opts; }
break;
-case 156:
-#line 2323 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 176:
+#line 2441 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&filter_opts, sizeof filter_opts);
filter_opts.rtableid = -1;
yyval.v.filter_opts = filter_opts;
}
break;
-case 159:
-#line 2334 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 179:
+#line 2452 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.uid)
yystack.l_mark[0].v.uid->tail->next = filter_opts.uid;
filter_opts.uid = yystack.l_mark[0].v.uid;
}
break;
-case 160:
-#line 2339 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 180:
+#line 2457 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.gid)
yystack.l_mark[0].v.gid->tail->next = filter_opts.gid;
filter_opts.gid = yystack.l_mark[0].v.gid;
}
break;
-case 161:
-#line 2344 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 181:
+#line 2462 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.marker & FOM_FLAGS) {
yyerror("flags cannot be redefined");
@@ -6392,8 +6658,8 @@ case 161:
filter_opts.flags.w2 |= yystack.l_mark[0].v.b.w2;
}
break;
-case 162:
-#line 2355 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 182:
+#line 2473 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.marker & FOM_ICMP) {
yyerror("icmp-type cannot be redefined");
@@ -6403,8 +6669,23 @@ case 162:
filter_opts.icmpspec = yystack.l_mark[0].v.icmp;
}
break;
-case 163:
-#line 2363 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 183:
+#line 2481 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (filter_opts.marker & FOM_PRIO) {
+ yyerror("prio cannot be redefined");
+ YYERROR;
+ }
+ if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > PF_PRIO_MAX) {
+ yyerror("prio must be 0 - %u", PF_PRIO_MAX);
+ YYERROR;
+ }
+ filter_opts.marker |= FOM_PRIO;
+ filter_opts.prio = yystack.l_mark[0].v.number;
+ }
+break;
+case 184:
+#line 2493 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.marker & FOM_TOS) {
yyerror("tos cannot be redefined");
@@ -6414,8 +6695,8 @@ case 163:
filter_opts.tos = yystack.l_mark[0].v.number;
}
break;
-case 164:
-#line 2371 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 185:
+#line 2501 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.marker & FOM_KEEP) {
yyerror("modulate or keep cannot be redefined");
@@ -6426,20 +6707,20 @@ case 164:
filter_opts.keep.options = yystack.l_mark[0].v.keep_state.options;
}
break;
-case 165:
-#line 2380 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 186:
+#line 2510 "../../freebsd/sbin/pfctl/parse.y"
{
filter_opts.fragment = 1;
}
break;
-case 166:
-#line 2383 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 187:
+#line 2513 "../../freebsd/sbin/pfctl/parse.y"
{
filter_opts.allowopts = 1;
}
break;
-case 167:
-#line 2386 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 188:
+#line 2516 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.label) {
yyerror("label cannot be redefined");
@@ -6448,8 +6729,8 @@ case 167:
filter_opts.label = yystack.l_mark[0].v.string;
}
break;
-case 168:
-#line 2393 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 189:
+#line 2523 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.queues.qname) {
yyerror("queue cannot be redefined");
@@ -6458,21 +6739,21 @@ case 168:
filter_opts.queues = yystack.l_mark[0].v.qassign;
}
break;
-case 169:
-#line 2400 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 190:
+#line 2530 "../../freebsd/sbin/pfctl/parse.y"
{
filter_opts.tag = yystack.l_mark[0].v.string;
}
break;
-case 170:
-#line 2403 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 191:
+#line 2533 "../../freebsd/sbin/pfctl/parse.y"
{
filter_opts.match_tag = yystack.l_mark[0].v.string;
filter_opts.match_tag_not = yystack.l_mark[-2].v.number;
}
break;
-case 171:
-#line 2407 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 192:
+#line 2537 "../../freebsd/sbin/pfctl/parse.y"
{
double p;
@@ -6486,8 +6767,8 @@ case 171:
filter_opts.prob = 1;
}
break;
-case 172:
-#line 2419 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 193:
+#line 2549 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > rt_tableid_max()) {
yyerror("invalid rtable id");
@@ -6496,8 +6777,8 @@ case 172:
filter_opts.rtableid = yystack.l_mark[0].v.number;
}
break;
-case 173:
-#line 2426 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 194:
+#line 2556 "../../freebsd/sbin/pfctl/parse.y"
{
#ifdef __FreeBSD__
filter_opts.divert.port = yystack.l_mark[0].v.range.a;
@@ -6508,8 +6789,8 @@ case 173:
#endif
}
break;
-case 174:
-#line 2435 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 195:
+#line 2565 "../../freebsd/sbin/pfctl/parse.y"
{
#ifndef __FreeBSD__
if ((filter_opts.divert.addr = host(yystack.l_mark[-2].v.string)) == NULL) {
@@ -6529,8 +6810,8 @@ case 174:
}
}
break;
-case 175:
-#line 2453 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 196:
+#line 2583 "../../freebsd/sbin/pfctl/parse.y"
{
#ifdef __FreeBSD__
yyerror("divert-reply has no meaning in FreeBSD pf(4)");
@@ -6540,8 +6821,50 @@ case 175:
#endif
}
break;
-case 176:
-#line 2463 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 198:
+#line 2594 "../../freebsd/sbin/pfctl/parse.y"
+ { yyval.v.filter_opts = filter_opts; }
+break;
+case 199:
+#line 2595 "../../freebsd/sbin/pfctl/parse.y"
+ { yyval.v.filter_opts = filter_opts; }
+break;
+case 202:
+#line 2602 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (filter_opts.marker & FOM_SETPRIO) {
+ yyerror("prio cannot be redefined");
+ YYERROR;
+ }
+ filter_opts.marker |= FOM_SETPRIO;
+ filter_opts.set_prio[0] = yystack.l_mark[0].v.b.b1;
+ filter_opts.set_prio[1] = yystack.l_mark[0].v.b.b2;
+ }
+break;
+case 203:
+#line 2611 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > PF_PRIO_MAX) {
+ yyerror("prio must be 0 - %u", PF_PRIO_MAX);
+ YYERROR;
+ }
+ yyval.v.b.b1 = yyval.v.b.b2 = yystack.l_mark[0].v.number;
+ }
+break;
+case 204:
+#line 2618 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (yystack.l_mark[-3].v.number < 0 || yystack.l_mark[-3].v.number > PF_PRIO_MAX ||
+ yystack.l_mark[-1].v.number < 0 || yystack.l_mark[-1].v.number > PF_PRIO_MAX) {
+ yyerror("prio must be 0 - %u", PF_PRIO_MAX);
+ YYERROR;
+ }
+ yyval.v.b.b1 = yystack.l_mark[-3].v.number;
+ yyval.v.b.b2 = yystack.l_mark[-1].v.number;
+ }
+break;
+case 205:
+#line 2629 "../../freebsd/sbin/pfctl/parse.y"
{
char *e;
double p = strtod(yystack.l_mark[0].v.string, &e);
@@ -6559,46 +6882,46 @@ case 176:
yyval.v.probability = p;
}
break;
-case 177:
-#line 2479 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 206:
+#line 2645 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.probability = (double)yystack.l_mark[0].v.number;
}
break;
-case 178:
-#line 2485 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 207:
+#line 2651 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = PF_PASS; yyval.v.b.b2 = yyval.v.b.w = 0; }
break;
-case 179:
-#line 2486 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 208:
+#line 2652 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b = yystack.l_mark[0].v.b; yyval.v.b.b1 = PF_DROP; }
break;
-case 180:
-#line 2489 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 209:
+#line 2655 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = blockpolicy;
yyval.v.b.w = returnicmpdefault;
yyval.v.b.w2 = returnicmp6default;
}
break;
-case 181:
-#line 2494 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 210:
+#line 2660 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_DROP;
yyval.v.b.w = 0;
yyval.v.b.w2 = 0;
}
break;
-case 182:
-#line 2499 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 211:
+#line 2665 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_RETURNRST;
yyval.v.b.w = 0;
yyval.v.b.w2 = 0;
}
break;
-case 183:
-#line 2504 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 212:
+#line 2670 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-1].v.number < 0 || yystack.l_mark[-1].v.number > 255) {
yyerror("illegal ttl value %d", yystack.l_mark[-1].v.number);
@@ -6609,56 +6932,56 @@ case 183:
yyval.v.b.w2 = 0;
}
break;
-case 184:
-#line 2513 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 213:
+#line 2679 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_RETURNICMP;
yyval.v.b.w = returnicmpdefault;
yyval.v.b.w2 = returnicmp6default;
}
break;
-case 185:
-#line 2518 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 214:
+#line 2684 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_RETURNICMP;
yyval.v.b.w = returnicmpdefault;
yyval.v.b.w2 = returnicmp6default;
}
break;
-case 186:
-#line 2523 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 215:
+#line 2689 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_RETURNICMP;
yyval.v.b.w = yystack.l_mark[-1].v.number;
yyval.v.b.w2 = returnicmpdefault;
}
break;
-case 187:
-#line 2528 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 216:
+#line 2694 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_RETURNICMP;
yyval.v.b.w = returnicmpdefault;
yyval.v.b.w2 = yystack.l_mark[-1].v.number;
}
break;
-case 188:
-#line 2533 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 217:
+#line 2699 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_RETURNICMP;
yyval.v.b.w = yystack.l_mark[-3].v.number;
yyval.v.b.w2 = yystack.l_mark[-1].v.number;
}
break;
-case 189:
-#line 2538 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 218:
+#line 2704 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.b.b2 = PFRULE_RETURN;
yyval.v.b.w = returnicmpdefault;
yyval.v.b.w2 = returnicmp6default;
}
break;
-case 190:
-#line 2545 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 219:
+#line 2711 "../../freebsd/sbin/pfctl/parse.y"
{
if (!(yyval.v.number = parseicmpspec(yystack.l_mark[0].v.string, AF_INET))) {
free(yystack.l_mark[0].v.string);
@@ -6667,8 +6990,8 @@ case 190:
free(yystack.l_mark[0].v.string);
}
break;
-case 191:
-#line 2552 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 220:
+#line 2718 "../../freebsd/sbin/pfctl/parse.y"
{
u_int8_t icmptype;
@@ -6680,8 +7003,8 @@ case 191:
yyval.v.number = (icmptype << 8 | yystack.l_mark[0].v.number);
}
break;
-case 192:
-#line 2564 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 221:
+#line 2730 "../../freebsd/sbin/pfctl/parse.y"
{
if (!(yyval.v.number = parseicmpspec(yystack.l_mark[0].v.string, AF_INET6))) {
free(yystack.l_mark[0].v.string);
@@ -6690,8 +7013,8 @@ case 192:
free(yystack.l_mark[0].v.string);
}
break;
-case 193:
-#line 2571 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 222:
+#line 2737 "../../freebsd/sbin/pfctl/parse.y"
{
u_int8_t icmptype;
@@ -6703,63 +7026,63 @@ case 193:
yyval.v.number = (icmptype << 8 | yystack.l_mark[0].v.number);
}
break;
-case 194:
-#line 2583 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 223:
+#line 2749 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_INOUT; }
break;
-case 195:
-#line 2584 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 224:
+#line 2750 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_IN; }
break;
-case 196:
-#line 2585 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 225:
+#line 2751 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_OUT; }
break;
-case 197:
-#line 2588 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 226:
+#line 2754 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.quick = 0; }
break;
-case 198:
-#line 2589 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 227:
+#line 2755 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.quick = 1; }
break;
-case 199:
-#line 2592 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 228:
+#line 2758 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.log = 0; yyval.v.logquick.quick = 0; yyval.v.logquick.logif = 0; }
break;
-case 200:
-#line 2593 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 229:
+#line 2759 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick = yystack.l_mark[0].v.logquick; yyval.v.logquick.quick = 0; }
break;
-case 201:
-#line 2594 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 230:
+#line 2760 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.quick = 1; yyval.v.logquick.log = 0; yyval.v.logquick.logif = 0; }
break;
-case 202:
-#line 2595 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 231:
+#line 2761 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick = yystack.l_mark[-1].v.logquick; yyval.v.logquick.quick = 1; }
break;
-case 203:
-#line 2596 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 232:
+#line 2762 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick = yystack.l_mark[0].v.logquick; yyval.v.logquick.quick = 1; }
break;
-case 204:
-#line 2599 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 233:
+#line 2765 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.log = PF_LOG; yyval.v.logquick.logif = 0; }
break;
-case 205:
-#line 2600 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 234:
+#line 2766 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.logquick.log = PF_LOG | yystack.l_mark[-1].v.logquick.log;
yyval.v.logquick.logif = yystack.l_mark[-1].v.logquick.logif;
}
break;
-case 206:
-#line 2606 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 235:
+#line 2772 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick = yystack.l_mark[0].v.logquick; }
break;
-case 207:
-#line 2607 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 236:
+#line 2773 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.logquick.log = yystack.l_mark[-2].v.logquick.log | yystack.l_mark[0].v.logquick.log;
yyval.v.logquick.logif = yystack.l_mark[0].v.logquick.logif;
@@ -6767,20 +7090,20 @@ case 207:
yyval.v.logquick.logif = yystack.l_mark[-2].v.logquick.logif;
}
break;
-case 208:
-#line 2615 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 237:
+#line 2781 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.log = PF_LOG_ALL; yyval.v.logquick.logif = 0; }
break;
-case 209:
-#line 2616 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 238:
+#line 2782 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.log = PF_LOG_SOCKET_LOOKUP; yyval.v.logquick.logif = 0; }
break;
-case 210:
-#line 2617 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 239:
+#line 2783 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.logquick.log = PF_LOG_SOCKET_LOOKUP; yyval.v.logquick.logif = 0; }
break;
-case 211:
-#line 2618 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 240:
+#line 2784 "../../freebsd/sbin/pfctl/parse.y"
{
const char *errstr;
u_int i;
@@ -6801,36 +7124,36 @@ case 211:
yyval.v.logquick.logif = i;
}
break;
-case 212:
-#line 2639 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 241:
+#line 2805 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = NULL; }
break;
-case 213:
-#line 2640 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 242:
+#line 2806 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[0].v.interface; }
break;
-case 214:
-#line 2641 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 243:
+#line 2807 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[-1].v.interface; }
break;
-case 215:
-#line 2644 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 244:
+#line 2810 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[-1].v.interface; }
break;
-case 216:
-#line 2645 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 245:
+#line 2811 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.interface->tail->next = yystack.l_mark[-1].v.interface;
yystack.l_mark[-3].v.interface->tail = yystack.l_mark[-1].v.interface;
yyval.v.interface = yystack.l_mark[-3].v.interface;
}
break;
-case 217:
-#line 2652 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 246:
+#line 2818 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.interface = yystack.l_mark[0].v.interface; yyval.v.interface->not = yystack.l_mark[-1].v.number; }
break;
-case 218:
-#line 2655 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 247:
+#line 2821 "../../freebsd/sbin/pfctl/parse.y"
{
struct node_host *n;
@@ -6854,44 +7177,44 @@ case 218:
yyval.v.interface->tail = yyval.v.interface;
}
break;
-case 219:
-#line 2679 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 248:
+#line 2845 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = 0; }
break;
-case 220:
-#line 2680 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 249:
+#line 2846 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = AF_INET; }
break;
-case 221:
-#line 2681 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 250:
+#line 2847 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = AF_INET6; }
break;
-case 222:
-#line 2684 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 251:
+#line 2850 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.proto = NULL; }
break;
-case 223:
-#line 2685 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 252:
+#line 2851 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.proto = yystack.l_mark[0].v.proto; }
break;
-case 224:
-#line 2686 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 253:
+#line 2852 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.proto = yystack.l_mark[-1].v.proto; }
break;
-case 225:
-#line 2689 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 254:
+#line 2855 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.proto = yystack.l_mark[-1].v.proto; }
break;
-case 226:
-#line 2690 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 255:
+#line 2856 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.proto->tail->next = yystack.l_mark[-1].v.proto;
yystack.l_mark[-3].v.proto->tail = yystack.l_mark[-1].v.proto;
yyval.v.proto = yystack.l_mark[-3].v.proto;
}
break;
-case 227:
-#line 2697 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 256:
+#line 2863 "../../freebsd/sbin/pfctl/parse.y"
{
u_int8_t pr;
@@ -6908,8 +7231,8 @@ case 227:
yyval.v.proto->tail = yyval.v.proto;
}
break;
-case 228:
-#line 2714 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 257:
+#line 2880 "../../freebsd/sbin/pfctl/parse.y"
{
struct protoent *p;
@@ -6923,8 +7246,8 @@ case 228:
free(yystack.l_mark[0].v.string);
}
break;
-case 229:
-#line 2726 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 258:
+#line 2892 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > 255) {
yyerror("protocol outside range");
@@ -6932,8 +7255,8 @@ case 229:
}
}
break;
-case 230:
-#line 2734 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 259:
+#line 2900 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.fromto.src.host = NULL;
yyval.v.fromto.src.port = NULL;
@@ -6942,28 +7265,28 @@ case 230:
yyval.v.fromto.src_os = NULL;
}
break;
-case 231:
-#line 2741 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 260:
+#line 2907 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.fromto.src = yystack.l_mark[-2].v.peer;
yyval.v.fromto.src_os = yystack.l_mark[-1].v.os;
yyval.v.fromto.dst = yystack.l_mark[0].v.peer;
}
break;
-case 232:
-#line 2748 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 261:
+#line 2914 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.os = NULL; }
break;
-case 233:
-#line 2749 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 262:
+#line 2915 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.os = yystack.l_mark[0].v.os; }
break;
-case 234:
-#line 2750 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 263:
+#line 2916 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.os = yystack.l_mark[-1].v.os; }
break;
-case 235:
-#line 2753 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 264:
+#line 2919 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.os = calloc(1, sizeof(struct node_os));
if (yyval.v.os == NULL)
@@ -6972,40 +7295,40 @@ case 235:
yyval.v.os->tail = yyval.v.os;
}
break;
-case 236:
-#line 2762 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 265:
+#line 2928 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.os = yystack.l_mark[-1].v.os; }
break;
-case 237:
-#line 2763 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 266:
+#line 2929 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.os->tail->next = yystack.l_mark[-1].v.os;
yystack.l_mark[-3].v.os->tail = yystack.l_mark[-1].v.os;
yyval.v.os = yystack.l_mark[-3].v.os;
}
break;
-case 238:
-#line 2770 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 267:
+#line 2936 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.peer.host = NULL;
yyval.v.peer.port = NULL;
}
break;
-case 239:
-#line 2774 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 268:
+#line 2940 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.peer = yystack.l_mark[0].v.peer;
}
break;
-case 240:
-#line 2779 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 269:
+#line 2945 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.peer.host = NULL;
yyval.v.peer.port = NULL;
}
break;
-case 241:
-#line 2783 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 270:
+#line 2949 "../../freebsd/sbin/pfctl/parse.y"
{
if (disallow_urpf_failed(yystack.l_mark[0].v.peer.host, "\"urpf-failed\" is "
"not permitted in a destination address"))
@@ -7013,53 +7336,53 @@ case 241:
yyval.v.peer = yystack.l_mark[0].v.peer;
}
break;
-case 242:
-#line 2791 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 271:
+#line 2957 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.peer.host = yystack.l_mark[0].v.host;
yyval.v.peer.port = NULL;
}
break;
-case 243:
-#line 2795 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 272:
+#line 2961 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.peer.host = yystack.l_mark[-2].v.host;
yyval.v.peer.port = yystack.l_mark[0].v.port;
}
break;
-case 244:
-#line 2799 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 273:
+#line 2965 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.peer.host = NULL;
yyval.v.peer.port = yystack.l_mark[0].v.port;
}
break;
-case 247:
-#line 2809 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 276:
+#line 2975 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = NULL; }
break;
-case 248:
-#line 2810 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 277:
+#line 2976 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[0].v.host; }
break;
-case 249:
-#line 2811 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 278:
+#line 2977 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[-1].v.host; }
break;
-case 250:
-#line 2814 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 279:
+#line 2980 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[0].v.host; }
break;
-case 251:
-#line 2815 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 280:
+#line 2981 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = NULL; }
break;
-case 252:
-#line 2818 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 281:
+#line 2984 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[-1].v.host; }
break;
-case 253:
-#line 2819 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 282:
+#line 2985 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-1].v.host == NULL)
yyval.v.host = yystack.l_mark[-3].v.host;
@@ -7072,8 +7395,8 @@ case 253:
}
}
break;
-case 254:
-#line 2832 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 283:
+#line 2998 "../../freebsd/sbin/pfctl/parse.y"
{
struct node_host *n;
@@ -7082,8 +7405,8 @@ case 254:
yyval.v.host = yystack.l_mark[0].v.host;
}
break;
-case 255:
-#line 2839 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 284:
+#line 3005 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.host = calloc(1, sizeof(struct node_host));
if (yyval.v.host == NULL)
@@ -7094,8 +7417,8 @@ case 255:
yyval.v.host->tail = yyval.v.host;
}
break;
-case 256:
-#line 2848 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 285:
+#line 3014 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.host = calloc(1, sizeof(struct node_host));
if (yyval.v.host == NULL)
@@ -7106,8 +7429,8 @@ case 256:
yyval.v.host->tail = yyval.v.host;
}
break;
-case 257:
-#line 2859 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 286:
+#line 3025 "../../freebsd/sbin/pfctl/parse.y"
{
if ((yyval.v.host = host(yystack.l_mark[0].v.string)) == NULL) {
/* error. "any" is handled elsewhere */
@@ -7119,8 +7442,8 @@ case 257:
}
break;
-case 258:
-#line 2869 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 287:
+#line 3035 "../../freebsd/sbin/pfctl/parse.y"
{
struct node_host *b, *e;
@@ -7155,8 +7478,8 @@ case 258:
free(yystack.l_mark[0].v.string);
}
break;
-case 259:
-#line 2902 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 288:
+#line 3068 "../../freebsd/sbin/pfctl/parse.y"
{
char *buf;
@@ -7172,8 +7495,8 @@ case 259:
free(buf);
}
break;
-case 260:
-#line 2916 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 289:
+#line 3082 "../../freebsd/sbin/pfctl/parse.y"
{
char *buf;
@@ -7193,8 +7516,8 @@ case 260:
free(buf);
}
break;
-case 262:
-#line 2935 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 291:
+#line 3101 "../../freebsd/sbin/pfctl/parse.y"
{
struct node_host *n;
@@ -7207,8 +7530,8 @@ case 262:
set_ipmask(n, yystack.l_mark[0].v.number);
}
break;
-case 263:
-#line 2946 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 292:
+#line 3112 "../../freebsd/sbin/pfctl/parse.y"
{
if (strlen(yystack.l_mark[-1].v.string) >= PF_TABLE_NAME_SIZE) {
yyerror("table name '%s' too long", yystack.l_mark[-1].v.string);
@@ -7228,31 +7551,8 @@ case 263:
yyval.v.host->tail = yyval.v.host;
}
break;
-case 264:
-#line 2964 "../../freebsd/contrib/pf/pfctl/parse.y"
- {
- yyval.v.host = calloc(1, sizeof(struct node_host));
- if (yyval.v.host == NULL) {
- free(yystack.l_mark[0].v.string);
- err(1, "host: calloc");
- }
- yyval.v.host->addr.type = PF_ADDR_RTLABEL;
- if (strlcpy(yyval.v.host->addr.v.rtlabelname, yystack.l_mark[0].v.string,
- sizeof(yyval.v.host->addr.v.rtlabelname)) >=
- sizeof(yyval.v.host->addr.v.rtlabelname)) {
- yyerror("route label too long, max %u chars",
- sizeof(yyval.v.host->addr.v.rtlabelname) - 1);
- free(yystack.l_mark[0].v.string);
- free(yyval.v.host);
- YYERROR;
- }
- yyval.v.host->next = NULL;
- yyval.v.host->tail = yyval.v.host;
- free(yystack.l_mark[0].v.string);
- }
-break;
-case 266:
-#line 2987 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 294:
+#line 3133 "../../freebsd/sbin/pfctl/parse.y"
{
u_long ulval;
@@ -7265,8 +7565,8 @@ case 266:
free(yystack.l_mark[0].v.string);
}
break;
-case 267:
-#line 3000 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 295:
+#line 3146 "../../freebsd/sbin/pfctl/parse.y"
{
int flags = 0;
char *p, *op;
@@ -7320,28 +7620,28 @@ case 267:
yyval.v.host->tail = yyval.v.host;
}
break;
-case 268:
-#line 3054 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 296:
+#line 3200 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.port = yystack.l_mark[0].v.port; }
break;
-case 269:
-#line 3055 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 297:
+#line 3201 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.port = yystack.l_mark[-1].v.port; }
break;
-case 270:
-#line 3058 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 298:
+#line 3204 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.port = yystack.l_mark[-1].v.port; }
break;
-case 271:
-#line 3059 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 299:
+#line 3205 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.port->tail->next = yystack.l_mark[-1].v.port;
yystack.l_mark[-3].v.port->tail = yystack.l_mark[-1].v.port;
yyval.v.port = yystack.l_mark[-3].v.port;
}
break;
-case 272:
-#line 3066 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 300:
+#line 3212 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.port = calloc(1, sizeof(struct node_port));
if (yyval.v.port == NULL)
@@ -7356,8 +7656,8 @@ case 272:
yyval.v.port->tail = yyval.v.port;
}
break;
-case 273:
-#line 3079 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 301:
+#line 3225 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.range.t) {
yyerror("':' cannot be used with an other "
@@ -7374,8 +7674,8 @@ case 273:
yyval.v.port->tail = yyval.v.port;
}
break;
-case 274:
-#line 3094 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 302:
+#line 3240 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-2].v.range.t || yystack.l_mark[0].v.range.t) {
yyerror("':' cannot be used with an other "
@@ -7392,8 +7692,8 @@ case 274:
yyval.v.port->tail = yyval.v.port;
}
break;
-case 275:
-#line 3111 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 303:
+#line 3257 "../../freebsd/sbin/pfctl/parse.y"
{
if (parseport(yystack.l_mark[0].v.string, &yyval.v.range, 0) == -1) {
free(yystack.l_mark[0].v.string);
@@ -7402,8 +7702,8 @@ case 275:
free(yystack.l_mark[0].v.string);
}
break;
-case 276:
-#line 3120 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 304:
+#line 3266 "../../freebsd/sbin/pfctl/parse.y"
{
if (parseport(yystack.l_mark[0].v.string, &yyval.v.range, PPORT_RANGE) == -1) {
free(yystack.l_mark[0].v.string);
@@ -7412,28 +7712,28 @@ case 276:
free(yystack.l_mark[0].v.string);
}
break;
-case 277:
-#line 3129 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 305:
+#line 3275 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.uid = yystack.l_mark[0].v.uid; }
break;
-case 278:
-#line 3130 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 306:
+#line 3276 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.uid = yystack.l_mark[-1].v.uid; }
break;
-case 279:
-#line 3133 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 307:
+#line 3279 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.uid = yystack.l_mark[-1].v.uid; }
break;
-case 280:
-#line 3134 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 308:
+#line 3280 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.uid->tail->next = yystack.l_mark[-1].v.uid;
yystack.l_mark[-3].v.uid->tail = yystack.l_mark[-1].v.uid;
yyval.v.uid = yystack.l_mark[-3].v.uid;
}
break;
-case 281:
-#line 3141 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 309:
+#line 3287 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.uid = calloc(1, sizeof(struct node_uid));
if (yyval.v.uid == NULL)
@@ -7445,8 +7745,8 @@ case 281:
yyval.v.uid->tail = yyval.v.uid;
}
break;
-case 282:
-#line 3151 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 310:
+#line 3297 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number == UID_MAX && yystack.l_mark[-1].v.i != PF_OP_EQ && yystack.l_mark[-1].v.i != PF_OP_NE) {
yyerror("user unknown requires operator = or "
@@ -7463,8 +7763,8 @@ case 282:
yyval.v.uid->tail = yyval.v.uid;
}
break;
-case 283:
-#line 3166 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 311:
+#line 3312 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-2].v.number == UID_MAX || yystack.l_mark[0].v.number == UID_MAX) {
yyerror("user unknown requires operator = or "
@@ -7481,8 +7781,8 @@ case 283:
yyval.v.uid->tail = yyval.v.uid;
}
break;
-case 284:
-#line 3183 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 312:
+#line 3329 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "unknown"))
yyval.v.number = UID_MAX;
@@ -7499,8 +7799,8 @@ case 284:
free(yystack.l_mark[0].v.string);
}
break;
-case 285:
-#line 3198 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 313:
+#line 3344 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number >= UID_MAX) {
yyerror("illegal uid value %lu", yystack.l_mark[0].v.number);
@@ -7509,28 +7809,28 @@ case 285:
yyval.v.number = yystack.l_mark[0].v.number;
}
break;
-case 286:
-#line 3207 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 314:
+#line 3353 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.gid = yystack.l_mark[0].v.gid; }
break;
-case 287:
-#line 3208 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 315:
+#line 3354 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.gid = yystack.l_mark[-1].v.gid; }
break;
-case 288:
-#line 3211 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 316:
+#line 3357 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.gid = yystack.l_mark[-1].v.gid; }
break;
-case 289:
-#line 3212 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 317:
+#line 3358 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.gid->tail->next = yystack.l_mark[-1].v.gid;
yystack.l_mark[-3].v.gid->tail = yystack.l_mark[-1].v.gid;
yyval.v.gid = yystack.l_mark[-3].v.gid;
}
break;
-case 290:
-#line 3219 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 318:
+#line 3365 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.gid = calloc(1, sizeof(struct node_gid));
if (yyval.v.gid == NULL)
@@ -7542,8 +7842,8 @@ case 290:
yyval.v.gid->tail = yyval.v.gid;
}
break;
-case 291:
-#line 3229 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 319:
+#line 3375 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number == GID_MAX && yystack.l_mark[-1].v.i != PF_OP_EQ && yystack.l_mark[-1].v.i != PF_OP_NE) {
yyerror("group unknown requires operator = or "
@@ -7560,8 +7860,8 @@ case 291:
yyval.v.gid->tail = yyval.v.gid;
}
break;
-case 292:
-#line 3244 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 320:
+#line 3390 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-2].v.number == GID_MAX || yystack.l_mark[0].v.number == GID_MAX) {
yyerror("group unknown requires operator = or "
@@ -7578,8 +7878,8 @@ case 292:
yyval.v.gid->tail = yyval.v.gid;
}
break;
-case 293:
-#line 3261 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 321:
+#line 3407 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "unknown"))
yyval.v.number = GID_MAX;
@@ -7596,8 +7896,8 @@ case 293:
free(yystack.l_mark[0].v.string);
}
break;
-case 294:
-#line 3276 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 322:
+#line 3422 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number >= GID_MAX) {
yyerror("illegal gid value %lu", yystack.l_mark[0].v.number);
@@ -7606,8 +7906,8 @@ case 294:
yyval.v.number = yystack.l_mark[0].v.number;
}
break;
-case 295:
-#line 3285 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 323:
+#line 3431 "../../freebsd/sbin/pfctl/parse.y"
{
int f;
@@ -7620,60 +7920,60 @@ case 295:
yyval.v.b.b1 = f;
}
break;
-case 296:
-#line 3298 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 324:
+#line 3444 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = yystack.l_mark[-2].v.b.b1; yyval.v.b.b2 = yystack.l_mark[0].v.b.b1; }
break;
-case 297:
-#line 3299 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 325:
+#line 3445 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = 0; yyval.v.b.b2 = yystack.l_mark[0].v.b.b1; }
break;
-case 298:
-#line 3300 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 326:
+#line 3446 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = 0; yyval.v.b.b2 = 0; }
break;
-case 299:
-#line 3303 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 327:
+#line 3449 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.icmp = yystack.l_mark[0].v.icmp; }
break;
-case 300:
-#line 3304 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 328:
+#line 3450 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.icmp = yystack.l_mark[-1].v.icmp; }
break;
-case 301:
-#line 3305 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 329:
+#line 3451 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.icmp = yystack.l_mark[0].v.icmp; }
break;
-case 302:
-#line 3306 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 330:
+#line 3452 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.icmp = yystack.l_mark[-1].v.icmp; }
break;
-case 303:
-#line 3309 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 331:
+#line 3455 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.icmp = yystack.l_mark[-1].v.icmp; }
break;
-case 304:
-#line 3310 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 332:
+#line 3456 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.icmp->tail->next = yystack.l_mark[-1].v.icmp;
yystack.l_mark[-3].v.icmp->tail = yystack.l_mark[-1].v.icmp;
yyval.v.icmp = yystack.l_mark[-3].v.icmp;
}
break;
-case 305:
-#line 3317 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 333:
+#line 3463 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.icmp = yystack.l_mark[-1].v.icmp; }
break;
-case 306:
-#line 3318 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 334:
+#line 3464 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.icmp->tail->next = yystack.l_mark[-1].v.icmp;
yystack.l_mark[-3].v.icmp->tail = yystack.l_mark[-1].v.icmp;
yyval.v.icmp = yystack.l_mark[-3].v.icmp;
}
break;
-case 307:
-#line 3325 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 335:
+#line 3471 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.icmp = calloc(1, sizeof(struct node_icmp));
if (yyval.v.icmp == NULL)
@@ -7685,8 +7985,8 @@ case 307:
yyval.v.icmp->tail = yyval.v.icmp;
}
break;
-case 308:
-#line 3335 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 336:
+#line 3481 "../../freebsd/sbin/pfctl/parse.y"
{
const struct icmpcodeent *p;
@@ -7707,8 +8007,8 @@ case 308:
yyval.v.icmp->tail = yyval.v.icmp;
}
break;
-case 309:
-#line 3354 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 337:
+#line 3500 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > 255) {
yyerror("illegal icmp-code %lu", yystack.l_mark[0].v.number);
@@ -7724,8 +8024,8 @@ case 309:
yyval.v.icmp->tail = yyval.v.icmp;
}
break;
-case 310:
-#line 3370 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 338:
+#line 3516 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.icmp = calloc(1, sizeof(struct node_icmp));
if (yyval.v.icmp == NULL)
@@ -7737,8 +8037,8 @@ case 310:
yyval.v.icmp->tail = yyval.v.icmp;
}
break;
-case 311:
-#line 3380 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 339:
+#line 3526 "../../freebsd/sbin/pfctl/parse.y"
{
const struct icmpcodeent *p;
@@ -7759,8 +8059,8 @@ case 311:
yyval.v.icmp->tail = yyval.v.icmp;
}
break;
-case 312:
-#line 3399 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 340:
+#line 3545 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > 255) {
yyerror("illegal icmp-code %lu", yystack.l_mark[0].v.number);
@@ -7776,8 +8076,8 @@ case 312:
yyval.v.icmp->tail = yyval.v.icmp;
}
break;
-case 313:
-#line 3415 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 341:
+#line 3561 "../../freebsd/sbin/pfctl/parse.y"
{
const struct icmptypeent *p;
@@ -7790,8 +8090,8 @@ case 313:
free(yystack.l_mark[0].v.string);
}
break;
-case 314:
-#line 3426 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 342:
+#line 3572 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > 255) {
yyerror("illegal icmp-type %lu", yystack.l_mark[0].v.number);
@@ -7800,8 +8100,8 @@ case 314:
yyval.v.number = yystack.l_mark[0].v.number + 1;
}
break;
-case 315:
-#line 3435 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 343:
+#line 3581 "../../freebsd/sbin/pfctl/parse.y"
{
const struct icmptypeent *p;
@@ -7815,8 +8115,8 @@ case 315:
free(yystack.l_mark[0].v.string);
}
break;
-case 316:
-#line 3447 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 344:
+#line 3593 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > 255) {
yyerror("illegal icmp6-type %lu", yystack.l_mark[0].v.number);
@@ -7825,8 +8125,8 @@ case 316:
yyval.v.number = yystack.l_mark[0].v.number + 1;
}
break;
-case 317:
-#line 3456 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 345:
+#line 3602 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "lowdelay"))
yyval.v.number = IPTOS_LOWDELAY;
@@ -7837,8 +8137,8 @@ case 317:
else if (yystack.l_mark[0].v.string[0] == '0' && yystack.l_mark[0].v.string[1] == 'x')
yyval.v.number = strtoul(yystack.l_mark[0].v.string, NULL, 16);
else
- yyval.v.number = 0; /* flag bad argument */
- if (!yyval.v.number || yyval.v.number > 255) {
+ yyval.v.number = 256; /* flag bad argument */
+ if (yyval.v.number < 0 || yyval.v.number > 255) {
yyerror("illegal tos value %s", yystack.l_mark[0].v.string);
free(yystack.l_mark[0].v.string);
YYERROR;
@@ -7846,104 +8146,104 @@ case 317:
free(yystack.l_mark[0].v.string);
}
break;
-case 318:
-#line 3474 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 346:
+#line 3620 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.number = yystack.l_mark[0].v.number;
- if (!yyval.v.number || yyval.v.number > 255) {
+ if (yyval.v.number < 0 || yyval.v.number > 255) {
yyerror("illegal tos value %s", yystack.l_mark[0].v.number);
YYERROR;
}
}
break;
-case 319:
-#line 3483 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 347:
+#line 3629 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_SRCTRACK; }
break;
-case 320:
-#line 3484 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 348:
+#line 3630 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_SRCTRACK_GLOBAL; }
break;
-case 321:
-#line 3485 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 349:
+#line 3631 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_SRCTRACK_RULE; }
break;
-case 322:
-#line 3488 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 350:
+#line 3634 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.i = PFRULE_IFBOUND;
}
break;
-case 323:
-#line 3491 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 351:
+#line 3637 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.i = 0;
}
break;
-case 324:
-#line 3496 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 352:
+#line 3642 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.keep_state.action = 0;
yyval.v.keep_state.options = NULL;
}
break;
-case 325:
-#line 3500 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 353:
+#line 3646 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.keep_state.action = PF_STATE_NORMAL;
yyval.v.keep_state.options = yystack.l_mark[0].v.state_opt;
}
break;
-case 326:
-#line 3504 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 354:
+#line 3650 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.keep_state.action = PF_STATE_MODULATE;
yyval.v.keep_state.options = yystack.l_mark[0].v.state_opt;
}
break;
-case 327:
-#line 3508 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 355:
+#line 3654 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.keep_state.action = PF_STATE_SYNPROXY;
yyval.v.keep_state.options = yystack.l_mark[0].v.state_opt;
}
break;
-case 328:
-#line 3514 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 356:
+#line 3660 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = 0; }
break;
-case 329:
-#line 3515 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 357:
+#line 3661 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_FLUSH; }
break;
-case 330:
-#line 3516 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 358:
+#line 3662 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.i = PF_FLUSH | PF_FLUSH_GLOBAL;
}
break;
-case 331:
-#line 3521 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 359:
+#line 3667 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.state_opt = yystack.l_mark[-1].v.state_opt; }
break;
-case 332:
-#line 3522 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 360:
+#line 3668 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.state_opt = NULL; }
break;
-case 333:
-#line 3525 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 361:
+#line 3671 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.state_opt = yystack.l_mark[0].v.state_opt; }
break;
-case 334:
-#line 3526 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 362:
+#line 3672 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-2].v.state_opt->tail->next = yystack.l_mark[0].v.state_opt;
yystack.l_mark[-2].v.state_opt->tail = yystack.l_mark[0].v.state_opt;
yyval.v.state_opt = yystack.l_mark[-2].v.state_opt;
}
break;
-case 335:
-#line 3533 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 363:
+#line 3679 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > UINT_MAX) {
yyerror("only positive values permitted");
@@ -7958,8 +8258,8 @@ case 335:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 336:
-#line 3546 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 364:
+#line 3692 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.state_opt = calloc(1, sizeof(struct node_state_opt));
if (yyval.v.state_opt == NULL)
@@ -7969,8 +8269,8 @@ case 336:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 337:
-#line 3554 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 365:
+#line 3700 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > UINT_MAX) {
yyerror("only positive values permitted");
@@ -7985,8 +8285,8 @@ case 337:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 338:
-#line 3567 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 366:
+#line 3713 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > UINT_MAX) {
yyerror("only positive values permitted");
@@ -8001,8 +8301,8 @@ case 338:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 339:
-#line 3580 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 367:
+#line 3726 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-2].v.number < 0 || yystack.l_mark[-2].v.number > UINT_MAX ||
yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > UINT_MAX) {
@@ -8019,8 +8319,8 @@ case 339:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 340:
-#line 3595 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 368:
+#line 3741 "../../freebsd/sbin/pfctl/parse.y"
{
if (strlen(yystack.l_mark[-2].v.string) >= PF_TABLE_NAME_SIZE) {
yyerror("table name '%s' too long", yystack.l_mark[-2].v.string);
@@ -8040,8 +8340,8 @@ case 340:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 341:
-#line 3613 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 369:
+#line 3759 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > UINT_MAX) {
yyerror("only positive values permitted");
@@ -8056,8 +8356,8 @@ case 341:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 342:
-#line 3626 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 370:
+#line 3772 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.state_opt = calloc(1, sizeof(struct node_state_opt));
if (yyval.v.state_opt == NULL)
@@ -8068,8 +8368,8 @@ case 342:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 343:
-#line 3635 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 371:
+#line 3781 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.state_opt = calloc(1, sizeof(struct node_state_opt));
if (yyval.v.state_opt == NULL)
@@ -8080,8 +8380,8 @@ case 343:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 344:
-#line 3644 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 372:
+#line 3790 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.state_opt = calloc(1, sizeof(struct node_state_opt));
if (yyval.v.state_opt == NULL)
@@ -8091,19 +8391,8 @@ case 344:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 345:
-#line 3652 "../../freebsd/contrib/pf/pfctl/parse.y"
- {
- yyval.v.state_opt = calloc(1, sizeof(struct node_state_opt));
- if (yyval.v.state_opt == NULL)
- err(1, "state_opt_item: calloc");
- yyval.v.state_opt->type = PF_STATE_OPT_PFLOW;
- yyval.v.state_opt->next = NULL;
- yyval.v.state_opt->tail = yyval.v.state_opt;
- }
-break;
-case 346:
-#line 3660 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 373:
+#line 3798 "../../freebsd/sbin/pfctl/parse.y"
{
int i;
@@ -8135,43 +8424,43 @@ case 346:
yyval.v.state_opt->tail = yyval.v.state_opt;
}
break;
-case 347:
-#line 3692 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 374:
+#line 3830 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.string = yystack.l_mark[0].v.string;
}
break;
-case 348:
-#line 3697 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 375:
+#line 3835 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.qassign.qname = yystack.l_mark[0].v.string;
yyval.v.qassign.pqname = NULL;
}
break;
-case 349:
-#line 3701 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 376:
+#line 3839 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.qassign.qname = yystack.l_mark[-1].v.string;
yyval.v.qassign.pqname = NULL;
}
break;
-case 350:
-#line 3705 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 377:
+#line 3843 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.qassign.qname = yystack.l_mark[-3].v.string;
yyval.v.qassign.pqname = yystack.l_mark[-1].v.string;
}
break;
-case 351:
-#line 3711 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 378:
+#line 3849 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = 0; }
break;
-case 352:
-#line 3712 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 379:
+#line 3850 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = 1; }
break;
-case 353:
-#line 3715 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 380:
+#line 3853 "../../freebsd/sbin/pfctl/parse.y"
{
if (parseport(yystack.l_mark[0].v.string, &yyval.v.range, PPORT_RANGE|PPORT_STAR) == -1) {
free(yystack.l_mark[0].v.string);
@@ -8180,32 +8469,32 @@ case 353:
free(yystack.l_mark[0].v.string);
}
break;
-case 354:
-#line 3724 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 381:
+#line 3862 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[0].v.host; }
break;
-case 355:
-#line 3725 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 382:
+#line 3863 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[-1].v.host; }
break;
-case 356:
-#line 3728 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 383:
+#line 3866 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[-1].v.host; }
break;
-case 357:
-#line 3729 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 384:
+#line 3867 "../../freebsd/sbin/pfctl/parse.y"
{
yystack.l_mark[-3].v.host->tail->next = yystack.l_mark[-1].v.host;
yystack.l_mark[-3].v.host->tail = yystack.l_mark[-1].v.host->tail;
yyval.v.host = yystack.l_mark[-3].v.host;
}
break;
-case 358:
-#line 3736 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 385:
+#line 3874 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.redirection = NULL; }
break;
-case 359:
-#line 3737 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 386:
+#line 3875 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.redirection = calloc(1, sizeof(struct redirection));
if (yyval.v.redirection == NULL)
@@ -8214,8 +8503,8 @@ case 359:
yyval.v.redirection->rport.a = yyval.v.redirection->rport.b = yyval.v.redirection->rport.t = 0;
}
break;
-case 360:
-#line 3744 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 387:
+#line 3882 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.redirection = calloc(1, sizeof(struct redirection));
if (yyval.v.redirection == NULL)
@@ -8224,8 +8513,8 @@ case 360:
yyval.v.redirection->rport = yystack.l_mark[0].v.range;
}
break;
-case 361:
-#line 3754 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 388:
+#line 3892 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.hashkey = calloc(1, sizeof(struct pf_poolhashkey));
if (yyval.v.hashkey == NULL)
@@ -8236,8 +8525,8 @@ case 361:
yyval.v.hashkey->key32[3] = arc4random();
}
break;
-case 362:
-#line 3764 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 389:
+#line 3902 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strncmp(yystack.l_mark[0].v.string, "0x", 2)) {
if (strlen(yystack.l_mark[0].v.string) != 34) {
@@ -8276,23 +8565,23 @@ case 362:
free(yystack.l_mark[0].v.string);
}
break;
-case 363:
-#line 3803 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 390:
+#line 3941 "../../freebsd/sbin/pfctl/parse.y"
{ bzero(&pool_opts, sizeof pool_opts); }
break;
-case 364:
-#line 3805 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 391:
+#line 3943 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.pool_opts = pool_opts; }
break;
-case 365:
-#line 3806 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 392:
+#line 3944 "../../freebsd/sbin/pfctl/parse.y"
{
bzero(&pool_opts, sizeof pool_opts);
yyval.v.pool_opts = pool_opts;
}
break;
-case 368:
-#line 3816 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 395:
+#line 3954 "../../freebsd/sbin/pfctl/parse.y"
{
if (pool_opts.type) {
yyerror("pool type cannot be redefined");
@@ -8301,8 +8590,8 @@ case 368:
pool_opts.type = PF_POOL_BITMASK;
}
break;
-case 369:
-#line 3823 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 396:
+#line 3961 "../../freebsd/sbin/pfctl/parse.y"
{
if (pool_opts.type) {
yyerror("pool type cannot be redefined");
@@ -8311,8 +8600,8 @@ case 369:
pool_opts.type = PF_POOL_RANDOM;
}
break;
-case 370:
-#line 3830 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 397:
+#line 3968 "../../freebsd/sbin/pfctl/parse.y"
{
if (pool_opts.type) {
yyerror("pool type cannot be redefined");
@@ -8322,8 +8611,8 @@ case 370:
pool_opts.key = yystack.l_mark[0].v.hashkey;
}
break;
-case 371:
-#line 3838 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 398:
+#line 3976 "../../freebsd/sbin/pfctl/parse.y"
{
if (pool_opts.type) {
yyerror("pool type cannot be redefined");
@@ -8332,8 +8621,8 @@ case 371:
pool_opts.type = PF_POOL_ROUNDROBIN;
}
break;
-case 372:
-#line 3845 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 399:
+#line 3983 "../../freebsd/sbin/pfctl/parse.y"
{
if (pool_opts.staticport) {
yyerror("static-port cannot be redefined");
@@ -8342,8 +8631,8 @@ case 372:
pool_opts.staticport = 1;
}
break;
-case 373:
-#line 3852 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 400:
+#line 3990 "../../freebsd/sbin/pfctl/parse.y"
{
if (filter_opts.marker & POM_STICKYADDRESS) {
yyerror("sticky-address cannot be redefined");
@@ -8353,12 +8642,12 @@ case 373:
pool_opts.opts |= PF_POOL_STICKYADDR;
}
break;
-case 374:
-#line 3862 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 401:
+#line 4000 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.redirection = NULL; }
break;
-case 375:
-#line 3863 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 402:
+#line 4001 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.redirection = calloc(1, sizeof(struct redirection));
if (yyval.v.redirection == NULL)
@@ -8367,8 +8656,8 @@ case 375:
yyval.v.redirection->rport.a = yyval.v.redirection->rport.b = yyval.v.redirection->rport.t = 0;
}
break;
-case 376:
-#line 3870 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 403:
+#line 4008 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.redirection = calloc(1, sizeof(struct redirection));
if (yyval.v.redirection == NULL)
@@ -8377,24 +8666,24 @@ case 376:
yyval.v.redirection->rport = yystack.l_mark[0].v.range;
}
break;
-case 377:
-#line 3879 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 404:
+#line 4017 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = yyval.v.b.b2 = 0; yyval.v.b.w2 = 0; }
break;
-case 378:
-#line 3880 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 405:
+#line 4018 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = 1; yyval.v.b.b2 = 0; yyval.v.b.w2 = 0; }
break;
-case 379:
-#line 3881 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 406:
+#line 4019 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = 1; yyval.v.b.b2 = yystack.l_mark[0].v.logquick.log; yyval.v.b.w2 = yystack.l_mark[0].v.logquick.logif; }
break;
-case 380:
-#line 3882 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 407:
+#line 4020 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.b.b1 = 0; yyval.v.b.b2 = yystack.l_mark[0].v.logquick.log; yyval.v.b.w2 = yystack.l_mark[0].v.logquick.logif; }
break;
-case 381:
-#line 3885 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 408:
+#line 4023 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-2].v.i && yystack.l_mark[0].v.b.b1) {
yyerror("\"pass\" not valid with \"no\"");
@@ -8409,8 +8698,8 @@ case 381:
yyval.v.b.w2 = yystack.l_mark[0].v.b.w2;
}
break;
-case 382:
-#line 3898 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 409:
+#line 4036 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-2].v.i && yystack.l_mark[0].v.b.b1) {
yyerror("\"pass\" not valid with \"no\"");
@@ -8425,8 +8714,8 @@ case 382:
yyval.v.b.w2 = yystack.l_mark[0].v.b.w2;
}
break;
-case 383:
-#line 3915 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 410:
+#line 4053 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule r;
@@ -8583,8 +8872,8 @@ case 383:
free(yystack.l_mark[-1].v.redirection);
}
break;
-case 384:
-#line 4074 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 411:
+#line 4212 "../../freebsd/sbin/pfctl/parse.y"
{
struct pf_rule binat;
struct pf_pooladdr *pa;
@@ -8749,28 +9038,28 @@ case 384:
pfctl_add_rule(pf, &binat, "");
}
break;
-case 385:
-#line 4239 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 412:
+#line 4377 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.string = NULL; }
break;
-case 386:
-#line 4240 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 413:
+#line 4378 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.string = yystack.l_mark[0].v.string; }
break;
-case 387:
-#line 4243 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 414:
+#line 4381 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.tagged.neg = 0; yyval.v.tagged.name = NULL; }
break;
-case 388:
-#line 4244 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 415:
+#line 4382 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.tagged.neg = yystack.l_mark[-2].v.number; yyval.v.tagged.name = yystack.l_mark[0].v.string; }
break;
-case 389:
-#line 4247 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 416:
+#line 4385 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.rtableid = -1; }
break;
-case 390:
-#line 4248 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 417:
+#line 4386 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > rt_tableid_max()) {
yyerror("invalid rtable id");
@@ -8779,8 +9068,8 @@ case 390:
yyval.v.rtableid = yystack.l_mark[0].v.number;
}
break;
-case 391:
-#line 4257 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 418:
+#line 4395 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.host = calloc(1, sizeof(struct node_host));
if (yyval.v.host == NULL)
@@ -8791,19 +9080,19 @@ case 391:
yyval.v.host->tail = yyval.v.host;
}
break;
-case 392:
-#line 4266 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 419:
+#line 4404 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.host = yystack.l_mark[-1].v.host;
yyval.v.host->ifname = yystack.l_mark[-2].v.string;
}
break;
-case 393:
-#line 4272 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 420:
+#line 4410 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[-1].v.host; }
break;
-case 394:
-#line 4273 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 421:
+#line 4411 "../../freebsd/sbin/pfctl/parse.y"
{
if (yystack.l_mark[-3].v.host->af == 0)
yystack.l_mark[-3].v.host->af = yystack.l_mark[-1].v.host->af;
@@ -8817,32 +9106,32 @@ case 394:
yyval.v.host = yystack.l_mark[-3].v.host;
}
break;
-case 395:
-#line 4287 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 422:
+#line 4425 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[0].v.host; }
break;
-case 396:
-#line 4288 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 423:
+#line 4426 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.host = yystack.l_mark[-1].v.host; }
break;
-case 397:
-#line 4291 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 424:
+#line 4429 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.route.host = NULL;
yyval.v.route.rt = 0;
yyval.v.route.pool_opts = 0;
}
break;
-case 398:
-#line 4296 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 425:
+#line 4434 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.route.host = NULL;
yyval.v.route.rt = PF_FASTROUTE;
yyval.v.route.pool_opts = 0;
}
break;
-case 399:
-#line 4301 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 426:
+#line 4439 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.route.host = yystack.l_mark[-1].v.host;
yyval.v.route.rt = PF_ROUTETO;
@@ -8851,8 +9140,8 @@ case 399:
yyval.v.route.key = yystack.l_mark[0].v.pool_opts.key;
}
break;
-case 400:
-#line 4308 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 427:
+#line 4446 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.route.host = yystack.l_mark[-1].v.host;
yyval.v.route.rt = PF_REPLYTO;
@@ -8861,8 +9150,8 @@ case 400:
yyval.v.route.key = yystack.l_mark[0].v.pool_opts.key;
}
break;
-case 401:
-#line 4315 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 428:
+#line 4453 "../../freebsd/sbin/pfctl/parse.y"
{
yyval.v.route.host = yystack.l_mark[-1].v.host;
yyval.v.route.rt = PF_DUPTO;
@@ -8871,8 +9160,8 @@ case 401:
yyval.v.route.key = yystack.l_mark[0].v.pool_opts.key;
}
break;
-case 402:
-#line 4325 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 429:
+#line 4463 "../../freebsd/sbin/pfctl/parse.y"
{
if (check_rulestate(PFCTL_STATE_OPTION)) {
free(yystack.l_mark[-1].v.string);
@@ -8890,8 +9179,21 @@ case 402:
free(yystack.l_mark[-1].v.string);
}
break;
-case 405:
-#line 4348 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 430:
+#line 4479 "../../freebsd/sbin/pfctl/parse.y"
+ {
+ if (check_rulestate(PFCTL_STATE_OPTION))
+ YYERROR;
+ if (yystack.l_mark[0].v.number < 0 || yystack.l_mark[0].v.number > UINT_MAX) {
+ yyerror("only positive values permitted");
+ YYERROR;
+ }
+ if (pfctl_set_timeout(pf, "interval", yystack.l_mark[0].v.number, 0) != 0)
+ YYERROR;
+ }
+break;
+case 433:
+#line 4496 "../../freebsd/sbin/pfctl/parse.y"
{
if (check_rulestate(PFCTL_STATE_OPTION)) {
free(yystack.l_mark[-1].v.string);
@@ -8909,12 +9211,12 @@ case 405:
free(yystack.l_mark[-1].v.string);
}
break;
-case 410:
-#line 4374 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 438:
+#line 4522 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.number = 0; }
break;
-case 411:
-#line 4375 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 439:
+#line 4523 "../../freebsd/sbin/pfctl/parse.y"
{
if (!strcmp(yystack.l_mark[0].v.string, "yes"))
yyval.v.number = 1;
@@ -8927,31 +9229,31 @@ case 411:
free(yystack.l_mark[0].v.string);
}
break;
-case 412:
-#line 4388 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 440:
+#line 4536 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_OP_EQ; }
break;
-case 413:
-#line 4389 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 441:
+#line 4537 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_OP_NE; }
break;
-case 414:
-#line 4390 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 442:
+#line 4538 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_OP_LE; }
break;
-case 415:
-#line 4391 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 443:
+#line 4539 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_OP_LT; }
break;
-case 416:
-#line 4392 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 444:
+#line 4540 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_OP_GE; }
break;
-case 417:
-#line 4393 "../../freebsd/contrib/pf/pfctl/parse.y"
+case 445:
+#line 4541 "../../freebsd/sbin/pfctl/parse.y"
{ yyval.v.i = PF_OP_GT; }
break;
-#line 8955 "pfctly.tab.c"
+#line 9257 "pfctly.tab.c"
}
yystack.s_mark -= yym;
yystate = *yystack.s_mark;
diff --git a/freebsd/sbin/pfctl/parse.h b/freebsd/sbin/pfctl/parse.h
new file mode 100644
index 00000000..929f9956
--- /dev/null
+++ b/freebsd/sbin/pfctl/parse.h
@@ -0,0 +1,337 @@
+/* A Bison parser, made by GNU Bison 2.7. */
+
+/* Bison interface for Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989-1990, 2000-2012 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+#ifndef YY_PFCTLY_PFCTLY_TAB_H_INCLUDED
+# define YY_PFCTLY_PFCTLY_TAB_H_INCLUDED
+/* Enabling traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int pfctlydebug;
+#endif
+
+/* Tokens. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
+ know about them. */
+ enum yytokentype {
+ PASS = 258,
+ BLOCK = 259,
+ SCRUB = 260,
+ RETURN = 261,
+ IN = 262,
+ OS = 263,
+ OUT = 264,
+ LOG = 265,
+ QUICK = 266,
+ ON = 267,
+ FROM = 268,
+ TO = 269,
+ FLAGS = 270,
+ RETURNRST = 271,
+ RETURNICMP = 272,
+ RETURNICMP6 = 273,
+ PROTO = 274,
+ INET = 275,
+ INET6 = 276,
+ ALL = 277,
+ ANY = 278,
+ ICMPTYPE = 279,
+ ICMP6TYPE = 280,
+ CODE = 281,
+ KEEP = 282,
+ MODULATE = 283,
+ STATE = 284,
+ PORT = 285,
+ RDR = 286,
+ NAT = 287,
+ BINAT = 288,
+ ARROW = 289,
+ NODF = 290,
+ MINTTL = 291,
+ ERROR = 292,
+ ALLOWOPTS = 293,
+ FASTROUTE = 294,
+ FILENAME = 295,
+ ROUTETO = 296,
+ DUPTO = 297,
+ REPLYTO = 298,
+ NO = 299,
+ LABEL = 300,
+ NOROUTE = 301,
+ URPFFAILED = 302,
+ FRAGMENT = 303,
+ USER = 304,
+ GROUP = 305,
+ MAXMSS = 306,
+ MAXIMUM = 307,
+ TTL = 308,
+ TOS = 309,
+ DROP = 310,
+ TABLE = 311,
+ REASSEMBLE = 312,
+ FRAGDROP = 313,
+ FRAGCROP = 314,
+ ANCHOR = 315,
+ NATANCHOR = 316,
+ RDRANCHOR = 317,
+ BINATANCHOR = 318,
+ SET = 319,
+ OPTIMIZATION = 320,
+ TIMEOUT = 321,
+ LIMIT = 322,
+ LOGINTERFACE = 323,
+ BLOCKPOLICY = 324,
+ RANDOMID = 325,
+ REQUIREORDER = 326,
+ SYNPROXY = 327,
+ FINGERPRINTS = 328,
+ NOSYNC = 329,
+ DEBUG = 330,
+ SKIP = 331,
+ HOSTID = 332,
+ ANTISPOOF = 333,
+ FOR = 334,
+ INCLUDE = 335,
+ BITMASK = 336,
+ RANDOM = 337,
+ SOURCEHASH = 338,
+ ROUNDROBIN = 339,
+ STATICPORT = 340,
+ PROBABILITY = 341,
+ ALTQ = 342,
+ CBQ = 343,
+ CODEL = 344,
+ PRIQ = 345,
+ HFSC = 346,
+ FAIRQ = 347,
+ BANDWIDTH = 348,
+ TBRSIZE = 349,
+ LINKSHARE = 350,
+ REALTIME = 351,
+ UPPERLIMIT = 352,
+ QUEUE = 353,
+ PRIORITY = 354,
+ QLIMIT = 355,
+ HOGS = 356,
+ BUCKETS = 357,
+ RTABLE = 358,
+ TARGET = 359,
+ INTERVAL = 360,
+ LOAD = 361,
+ RULESET_OPTIMIZATION = 362,
+ PRIO = 363,
+ STICKYADDRESS = 364,
+ MAXSRCSTATES = 365,
+ MAXSRCNODES = 366,
+ SOURCETRACK = 367,
+ GLOBAL = 368,
+ RULE = 369,
+ MAXSRCCONN = 370,
+ MAXSRCCONNRATE = 371,
+ OVERLOAD = 372,
+ FLUSH = 373,
+ SLOPPY = 374,
+ TAGGED = 375,
+ TAG = 376,
+ IFBOUND = 377,
+ FLOATING = 378,
+ STATEPOLICY = 379,
+ STATEDEFAULTS = 380,
+ ROUTE = 381,
+ SETTOS = 382,
+ DIVERTTO = 383,
+ DIVERTREPLY = 384,
+ STRING = 385,
+ NUMBER = 386,
+ PORTBINARY = 387
+ };
+#endif
+/* Tokens. */
+#define PASS 258
+#define BLOCK 259
+#define SCRUB 260
+#define RETURN 261
+#define IN 262
+#define OS 263
+#define OUT 264
+#define LOG 265
+#define QUICK 266
+#define ON 267
+#define FROM 268
+#define TO 269
+#define FLAGS 270
+#define RETURNRST 271
+#define RETURNICMP 272
+#define RETURNICMP6 273
+#define PROTO 274
+#define INET 275
+#define INET6 276
+#define ALL 277
+#define ANY 278
+#define ICMPTYPE 279
+#define ICMP6TYPE 280
+#define CODE 281
+#define KEEP 282
+#define MODULATE 283
+#define STATE 284
+#define PORT 285
+#define RDR 286
+#define NAT 287
+#define BINAT 288
+#define ARROW 289
+#define NODF 290
+#define MINTTL 291
+#define ERROR 292
+#define ALLOWOPTS 293
+#define FASTROUTE 294
+#define FILENAME 295
+#define ROUTETO 296
+#define DUPTO 297
+#define REPLYTO 298
+#define NO 299
+#define LABEL 300
+#define NOROUTE 301
+#define URPFFAILED 302
+#define FRAGMENT 303
+#define USER 304
+#define GROUP 305
+#define MAXMSS 306
+#define MAXIMUM 307
+#define TTL 308
+#define TOS 309
+#define DROP 310
+#define TABLE 311
+#define REASSEMBLE 312
+#define FRAGDROP 313
+#define FRAGCROP 314
+#define ANCHOR 315
+#define NATANCHOR 316
+#define RDRANCHOR 317
+#define BINATANCHOR 318
+#define SET 319
+#define OPTIMIZATION 320
+#define TIMEOUT 321
+#define LIMIT 322
+#define LOGINTERFACE 323
+#define BLOCKPOLICY 324
+#define RANDOMID 325
+#define REQUIREORDER 326
+#define SYNPROXY 327
+#define FINGERPRINTS 328
+#define NOSYNC 329
+#define DEBUG 330
+#define SKIP 331
+#define HOSTID 332
+#define ANTISPOOF 333
+#define FOR 334
+#define INCLUDE 335
+#define BITMASK 336
+#define RANDOM 337
+#define SOURCEHASH 338
+#define ROUNDROBIN 339
+#define STATICPORT 340
+#define PROBABILITY 341
+#define ALTQ 342
+#define CBQ 343
+#define CODEL 344
+#define PRIQ 345
+#define HFSC 346
+#define FAIRQ 347
+#define BANDWIDTH 348
+#define TBRSIZE 349
+#define LINKSHARE 350
+#define REALTIME 351
+#define UPPERLIMIT 352
+#define QUEUE 353
+#define PRIORITY 354
+#define QLIMIT 355
+#define HOGS 356
+#define BUCKETS 357
+#define RTABLE 358
+#define TARGET 359
+#define INTERVAL 360
+#define LOAD 361
+#define RULESET_OPTIMIZATION 362
+#define PRIO 363
+#define STICKYADDRESS 364
+#define MAXSRCSTATES 365
+#define MAXSRCNODES 366
+#define SOURCETRACK 367
+#define GLOBAL 368
+#define RULE 369
+#define MAXSRCCONN 370
+#define MAXSRCCONNRATE 371
+#define OVERLOAD 372
+#define FLUSH 373
+#define SLOPPY 374
+#define TAGGED 375
+#define TAG 376
+#define IFBOUND 377
+#define FLOATING 378
+#define STATEPOLICY 379
+#define STATEDEFAULTS 380
+#define ROUTE 381
+#define SETTOS 382
+#define DIVERTTO 383
+#define DIVERTREPLY 384
+#define STRING 385
+#define NUMBER 386
+#define PORTBINARY 387
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+extern YYSTYPE pfctlylval;
+
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int pfctlyparse (void *YYPARSE_PARAM);
+#else
+int pfctlyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int pfctlyparse (void);
+#else
+int pfctlyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+#endif /* !YY_PFCTLY_PFCTLY_TAB_H_INCLUDED */
diff --git a/freebsd/contrib/pf/pfctl/parse.y b/freebsd/sbin/pfctl/parse.y
index df865066..33edd3e5 100644
--- a/freebsd/contrib/pf/pfctl/parse.y
+++ b/freebsd/sbin/pfctl/parse.y
@@ -29,14 +29,20 @@
%{
#ifdef __rtems__
#include <machine/rtems-bsd-user-space.h>
+#endif /* __rtems__ */
+
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef __rtems__
#include <machine/rtems-bsd-program.h>
#define pf_find_or_create_ruleset _bsd_pf_find_or_create_ruleset
#define pf_anchor_setup _bsd_pf_anchor_setup
#define pf_remove_if_empty_ruleset _bsd_pf_remove_if_empty_ruleset
#endif /* __rtems__ */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
@@ -51,10 +57,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <net/pfvar.h>
#include <arpa/inet.h>
-#include <altq/altq.h>
-#include <altq/altq_cbq.h>
-#include <altq/altq_priq.h>
-#include <altq/altq_hfsc.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_priq.h>
+#include <net/altq/altq_hfsc.h>
+#include <net/altq/altq_fairq.h>
#include <stdio.h>
#include <unistd.h>
@@ -73,6 +81,9 @@ __FBSDID("$FreeBSD$");
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-parse-data.h"
+#endif /* __rtems__ */
static struct pfctl *pf = NULL;
static int debug = 0;
@@ -85,11 +96,7 @@ static int blockpolicy = PFRULE_DROP;
static int require_order = 1;
static int default_statelock;
-#ifndef __rtems__
-TAILQ_HEAD(files, file) files = TAILQ_HEAD_INITIALIZER(files);
-#else /* __rtems__ */
static TAILQ_HEAD(files, file) files = TAILQ_HEAD_INITIALIZER(files);
-#endif /* __rtems__ */
static struct file {
TAILQ_ENTRY(file) entry;
FILE *stream;
@@ -109,11 +116,7 @@ int lgetc(int);
int lungetc(int);
int findeol(void);
-#ifndef __rtems__
-TAILQ_HEAD(symhead, sym) symhead = TAILQ_HEAD_INITIALIZER(symhead);
-#else /* __rtems__ */
static TAILQ_HEAD(symhead, sym) symhead = TAILQ_HEAD_INITIALIZER(symhead);
-#endif /* __rtems__ */
struct sym {
TAILQ_ENTRY(sym) entry;
int used;
@@ -174,8 +177,7 @@ enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK,
PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN,
PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES,
PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK,
- PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY,
- PF_STATE_OPT_PFLOW };
+ PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, };
enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE };
@@ -210,11 +212,7 @@ struct peer {
struct node_port *port;
};
-#ifndef __rtems__
-struct node_queue {
-#else /* __rtems__ */
static struct node_queue {
-#endif /* __rtems__ */
char queue[PF_QNAME_SIZE];
char parent[PF_QNAME_SIZE];
char ifname[IFNAMSIZ];
@@ -228,17 +226,15 @@ struct node_qassign {
char *pqname;
};
-#ifndef __rtems__
-struct filter_opts {
-#else /* __rtems__ */
static struct filter_opts {
-#endif /* __rtems__ */
int marker;
#define FOM_FLAGS 0x01
#define FOM_ICMP 0x02
#define FOM_TOS 0x04
#define FOM_KEEP 0x08
#define FOM_SRCTRACK 0x10
+#define FOM_SETPRIO 0x0400
+#define FOM_PRIO 0x2000
struct node_uid *uid;
struct node_gid *gid;
struct {
@@ -262,26 +258,20 @@ static struct filter_opts {
char *match_tag;
u_int8_t match_tag_not;
u_int rtableid;
+ u_int8_t prio;
+ u_int8_t set_prio[2];
struct {
struct node_host *addr;
u_int16_t port;
} divert;
} filter_opts;
-#ifndef __rtems__
-struct antispoof_opts {
-#else /* __rtems__ */
static struct antispoof_opts {
-#endif /* __rtems__ */
char *label;
u_int rtableid;
} antispoof_opts;
-#ifndef __rtems__
-struct scrub_opts {
-#else /* __rtems__ */
static struct scrub_opts {
-#endif /* __rtems__ */
int marker;
#define SOM_MINTTL 0x01
#define SOM_MAXMSS 0x02
@@ -299,11 +289,7 @@ static struct scrub_opts {
u_int rtableid;
} scrub_opts;
-#ifndef __rtems__
-struct queue_opts {
-#else /* __rtems__ */
static struct queue_opts {
-#endif /* __rtems__ */
int marker;
#define QOM_BWSPEC 0x01
#define QOM_SCHEDULER 0x02
@@ -317,21 +303,13 @@ static struct queue_opts {
int qlimit;
} queue_opts;
-#ifndef __rtems__
-struct table_opts {
-#else /* __rtems__ */
static struct table_opts {
-#endif /* __rtems__ */
int flags;
int init_addr;
struct node_tinithead init_nodes;
} table_opts;
-#ifndef __rtems__
-struct pool_opts {
-#else /* __rtems__ */
static struct pool_opts {
-#endif /* __rtems__ */
int marker;
#define POM_TYPE 0x01
#define POM_STICKYADDRESS 0x02
@@ -342,14 +320,10 @@ static struct pool_opts {
} pool_opts;
-
-#ifndef __rtems__
-struct node_hfsc_opts hfsc_opts;
-struct node_state_opt *keep_state_defaults = NULL;
-#else /* __rtems__ */
+static struct codel_opts codel_opts;
static struct node_hfsc_opts hfsc_opts;
+static struct node_fairq_opts fairq_opts;
static struct node_state_opt *keep_state_defaults = NULL;
-#endif /* __rtems__ */
int disallow_table(struct node_host *, const char *);
int disallow_urpf_failed(struct node_host *, const char *);
@@ -394,11 +368,7 @@ void remove_invalid_hosts(struct node_host **, sa_family_t *);
int invalid_redirect(struct node_host *, sa_family_t);
u_int16_t parseicmpspec(char *, sa_family_t);
-#ifndef __rtems__
-TAILQ_HEAD(loadanchorshead, loadanchors)
-#else /* __rtems__ */
static TAILQ_HEAD(loadanchorshead, loadanchors)
-#endif /* __rtems__ */
loadanchorshead = TAILQ_HEAD_INITIALIZER(loadanchorshead);
struct loadanchors {
@@ -475,6 +445,8 @@ typedef struct {
struct table_opts table_opts;
struct pool_opts pool_opts;
struct node_hfsc_opts hfsc_opts;
+ struct node_fairq_opts fairq_opts;
+ struct codel_opts codel_opts;
} v;
int lineno;
} YYSTYPE;
@@ -499,11 +471,11 @@ int parseport(char *, struct range *r, int);
%token REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID
%token ANTISPOOF FOR INCLUDE
%token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY
-%token ALTQ CBQ PRIQ HFSC BANDWIDTH TBRSIZE LINKSHARE REALTIME UPPERLIMIT
-%token QUEUE PRIORITY QLIMIT RTABLE
-%token LOAD RULESET_OPTIMIZATION
+%token ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME
+%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
+%token LOAD RULESET_OPTIMIZATION PRIO
%token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
-%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW
+%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY
%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
%token DIVERTTO DIVERTREPLY
%token <v.string> STRING
@@ -516,7 +488,7 @@ int parseport(char *, struct range *r, int);
%type <v.i> no dir af fragcache optimizer
%type <v.i> sourcetrack flush unaryop statelock
%type <v.b> action nataction natpasslog scrubaction
-%type <v.b> flags flag blockspec
+%type <v.b> flags flag blockspec prio
%type <v.range> portplain portstar portrange
%type <v.hashkey> hashkey
%type <v.proto> proto proto_list proto_item
@@ -548,8 +520,11 @@ int parseport(char *, struct range *r, int);
%type <v.number> cbqflags_list cbqflags_item
%type <v.number> priqflags_list priqflags_item
%type <v.hfsc_opts> hfscopts_list hfscopts_item hfsc_opts
+%type <v.fairq_opts> fairqopts_list fairqopts_item fairq_opts
+%type <v.codel_opts> codelopts_list codelopts_item codel_opts
%type <v.queue_bwspec> bandwidth
%type <v.filter_opts> filter_opts filter_opt filter_opts_l
+%type <v.filter_opts> filter_sets filter_set filter_sets_l
%type <v.antispoof_opts> antispoof_opts antispoof_opt antispoof_opts_l
%type <v.queue_opts> queue_opts queue_opt queue_opts_l
%type <v.scrub_opts> scrub_opts scrub_opt scrub_opts_l
@@ -935,6 +910,17 @@ anchorrule : ANCHOR anchorname dir quick interface af proto fromto
YYERROR;
}
r.match_tag_not = $9.match_tag_not;
+ if ($9.marker & FOM_PRIO) {
+ if ($9.prio == 0)
+ r.prio = PF_PRIO_ZERO;
+ else
+ r.prio = $9.prio;
+ }
+ if ($9.marker & FOM_SETPRIO) {
+ r.set_prio[0] = $9.set_prio[0];
+ r.set_prio[1] = $9.set_prio[1];
+ r.scrub_flags |= PFSTATE_SETPRIO;
+ }
decide_address_family($8.src.host, &r.af);
decide_address_family($8.dst.host, &r.af);
@@ -1243,8 +1229,8 @@ scrub_opt : NODF {
;
fragcache : FRAGMENT REASSEMBLE { $$ = 0; /* default */ }
- | FRAGMENT FRAGCROP { $$ = PFRULE_FRAGCROP; }
- | FRAGMENT FRAGDROP { $$ = PFRULE_FRAGDROP; }
+ | FRAGMENT FRAGCROP { $$ = 0; }
+ | FRAGMENT FRAGDROP { $$ = 0; }
;
antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts {
@@ -1519,7 +1505,7 @@ altqif : ALTQ interface queue_opts QUEUE qassign {
a.scheduler = $3.scheduler.qtype;
a.qlimit = $3.qlimit;
a.tbrsize = $3.tbrsize;
- if ($5 == NULL) {
+ if ($5 == NULL && $3.scheduler.qtype != ALTQT_CODEL) {
yyerror("no child queues specified");
YYERROR;
}
@@ -1651,13 +1637,22 @@ bandwidth : STRING {
bps = strtod($1, &cp);
if (cp != NULL) {
+ if (strlen(cp) > 1) {
+ char *cu = cp + 1;
+ if (!strcmp(cu, "Bit") ||
+ !strcmp(cu, "B") ||
+ !strcmp(cu, "bit") ||
+ !strcmp(cu, "b")) {
+ *cu = 0;
+ }
+ }
if (!strcmp(cp, "b"))
; /* nothing */
- else if (!strcmp(cp, "Kb"))
+ else if (!strcmp(cp, "K"))
bps *= 1000;
- else if (!strcmp(cp, "Mb"))
+ else if (!strcmp(cp, "M"))
bps *= 1000 * 1000;
- else if (!strcmp(cp, "Gb"))
+ else if (!strcmp(cp, "G"))
bps *= 1000 * 1000 * 1000;
else if (!strcmp(cp, "%")) {
if (bps < 0 || bps > 100) {
@@ -1712,6 +1707,24 @@ scheduler : CBQ {
$$.qtype = ALTQT_HFSC;
$$.data.hfsc_opts = $3;
}
+ | FAIRQ {
+ $$.qtype = ALTQT_FAIRQ;
+ bzero(&$$.data.fairq_opts,
+ sizeof(struct node_fairq_opts));
+ }
+ | FAIRQ '(' fairq_opts ')' {
+ $$.qtype = ALTQT_FAIRQ;
+ $$.data.fairq_opts = $3;
+ }
+ | CODEL {
+ $$.qtype = ALTQT_CODEL;
+ bzero(&$$.data.codel_opts,
+ sizeof(struct codel_opts));
+ }
+ | CODEL '(' codel_opts ')' {
+ $$.qtype = ALTQT_CODEL;
+ $$.data.codel_opts = $3;
+ }
;
cbqflags_list : cbqflags_item { $$ |= $1; }
@@ -1729,6 +1742,8 @@ cbqflags_item : STRING {
$$ = CBQCLF_RED|CBQCLF_ECN;
else if (!strcmp($1, "rio"))
$$ = CBQCLF_RIO;
+ else if (!strcmp($1, "codel"))
+ $$ = CBQCLF_CODEL;
else {
yyerror("unknown cbq flag \"%s\"", $1);
free($1);
@@ -1751,6 +1766,8 @@ priqflags_item : STRING {
$$ = PRCF_RED|PRCF_ECN;
else if (!strcmp($1, "rio"))
$$ = PRCF_RIO;
+ else if (!strcmp($1, "codel"))
+ $$ = PRCF_CODEL;
else {
yyerror("unknown priq flag \"%s\"", $1);
free($1);
@@ -1851,6 +1868,8 @@ hfscopts_item : LINKSHARE bandwidth {
hfsc_opts.flags |= HFCF_RED|HFCF_ECN;
else if (!strcmp($1, "rio"))
hfsc_opts.flags |= HFCF_RIO;
+ else if (!strcmp($1, "codel"))
+ hfsc_opts.flags |= HFCF_CODEL;
else {
yyerror("unknown hfsc flag \"%s\"", $1);
free($1);
@@ -1860,6 +1879,102 @@ hfscopts_item : LINKSHARE bandwidth {
}
;
+fairq_opts : {
+ bzero(&fairq_opts,
+ sizeof(struct node_fairq_opts));
+ }
+ fairqopts_list {
+ $$ = fairq_opts;
+ }
+ ;
+
+fairqopts_list : fairqopts_item
+ | fairqopts_list comma fairqopts_item
+ ;
+
+fairqopts_item : LINKSHARE bandwidth {
+ if (fairq_opts.linkshare.used) {
+ yyerror("linkshare already specified");
+ YYERROR;
+ }
+ fairq_opts.linkshare.m2 = $2;
+ fairq_opts.linkshare.used = 1;
+ }
+ | LINKSHARE '(' bandwidth number bandwidth ')' {
+ if (fairq_opts.linkshare.used) {
+ yyerror("linkshare already specified");
+ YYERROR;
+ }
+ fairq_opts.linkshare.m1 = $3;
+ fairq_opts.linkshare.d = $4;
+ fairq_opts.linkshare.m2 = $5;
+ fairq_opts.linkshare.used = 1;
+ }
+ | HOGS bandwidth {
+ fairq_opts.hogs_bw = $2;
+ }
+ | BUCKETS number {
+ fairq_opts.nbuckets = $2;
+ }
+ | STRING {
+ if (!strcmp($1, "default"))
+ fairq_opts.flags |= FARF_DEFAULTCLASS;
+ else if (!strcmp($1, "red"))
+ fairq_opts.flags |= FARF_RED;
+ else if (!strcmp($1, "ecn"))
+ fairq_opts.flags |= FARF_RED|FARF_ECN;
+ else if (!strcmp($1, "rio"))
+ fairq_opts.flags |= FARF_RIO;
+ else if (!strcmp($1, "codel"))
+ fairq_opts.flags |= FARF_CODEL;
+ else {
+ yyerror("unknown fairq flag \"%s\"", $1);
+ free($1);
+ YYERROR;
+ }
+ free($1);
+ }
+ ;
+
+codel_opts : {
+ bzero(&codel_opts,
+ sizeof(struct codel_opts));
+ }
+ codelopts_list {
+ $$ = codel_opts;
+ }
+ ;
+
+codelopts_list : codelopts_item
+ | codelopts_list comma codelopts_item
+ ;
+
+codelopts_item : INTERVAL number {
+ if (codel_opts.interval) {
+ yyerror("interval already specified");
+ YYERROR;
+ }
+ codel_opts.interval = $2;
+ }
+ | TARGET number {
+ if (codel_opts.target) {
+ yyerror("target already specified");
+ YYERROR;
+ }
+ codel_opts.target = $2;
+ }
+ | STRING {
+ if (!strcmp($1, "ecn"))
+ codel_opts.ecn = 1;
+ else {
+ yyerror("unknown codel option \"%s\"", $1);
+ free($1);
+ YYERROR;
+ }
+ free($1);
+ }
+ ;
+
qassign : /* empty */ { $$ = NULL; }
| qassign_item { $$ = $1; }
| '{' optnl qassign_list '}' { $$ = $3; }
@@ -1931,6 +2046,18 @@ pfrule : action dir logquick interface route af proto fromto
r.prob = $9.prob;
r.rtableid = $9.rtableid;
+ if ($9.marker & FOM_PRIO) {
+ if ($9.prio == 0)
+ r.prio = PF_PRIO_ZERO;
+ else
+ r.prio = $9.prio;
+ }
+ if ($9.marker & FOM_SETPRIO) {
+ r.set_prio[0] = $9.set_prio[0];
+ r.set_prio[1] = $9.set_prio[1];
+ r.scrub_flags |= PFSTATE_SETPRIO;
+ }
+
r.af = $6;
if ($9.tag)
if (strlcpy(r.tagname, $9.tag,
@@ -2133,15 +2260,6 @@ pfrule : action dir logquick interface route af proto fromto
}
r.rule_flag |= PFRULE_STATESLOPPY;
break;
- case PF_STATE_OPT_PFLOW:
- if (r.rule_flag & PFRULE_PFLOW) {
- yyerror("state pflow "
- "option: multiple "
- "definitions");
- YYERROR;
- }
- r.rule_flag |= PFRULE_PFLOW;
- break;
case PF_STATE_OPT_TIMEOUT:
if (o->data.timeout.number ==
PFTM_ADAPTIVE_START ||
@@ -2360,6 +2478,18 @@ filter_opt : USER uids {
filter_opts.marker |= FOM_ICMP;
filter_opts.icmpspec = $1;
}
+ | PRIO NUMBER {
+ if (filter_opts.marker & FOM_PRIO) {
+ yyerror("prio cannot be redefined");
+ YYERROR;
+ }
+ if ($2 < 0 || $2 > PF_PRIO_MAX) {
+ yyerror("prio must be 0 - %u", PF_PRIO_MAX);
+ YYERROR;
+ }
+ filter_opts.marker |= FOM_PRIO;
+ filter_opts.prio = $2;
+ }
| TOS tos {
if (filter_opts.marker & FOM_TOS) {
yyerror("tos cannot be redefined");
@@ -2458,6 +2588,42 @@ filter_opt : USER uids {
filter_opts.divert.port = 1; /* some random value */
#endif
}
+ | filter_sets
+ ;
+
+filter_sets : SET '(' filter_sets_l ')' { $$ = filter_opts; }
+ | SET filter_set { $$ = filter_opts; }
+ ;
+
+filter_sets_l : filter_sets_l comma filter_set
+ | filter_set
+ ;
+
+filter_set : prio {
+ if (filter_opts.marker & FOM_SETPRIO) {
+ yyerror("prio cannot be redefined");
+ YYERROR;
+ }
+ filter_opts.marker |= FOM_SETPRIO;
+ filter_opts.set_prio[0] = $1.b1;
+ filter_opts.set_prio[1] = $1.b2;
+ }
+prio : PRIO NUMBER {
+ if ($2 < 0 || $2 > PF_PRIO_MAX) {
+ yyerror("prio must be 0 - %u", PF_PRIO_MAX);
+ YYERROR;
+ }
+ $$.b1 = $$.b2 = $2;
+ }
+ | PRIO '(' NUMBER comma NUMBER ')' {
+ if ($3 < 0 || $3 > PF_PRIO_MAX ||
+ $5 < 0 || $5 > PF_PRIO_MAX) {
+ yyerror("prio must be 0 - %u", PF_PRIO_MAX);
+ YYERROR;
+ }
+ $$.b1 = $3;
+ $$.b2 = $5;
+ }
;
probability : STRING {
@@ -2961,26 +3127,6 @@ host : STRING {
$$->next = NULL;
$$->tail = $$;
}
- | ROUTE STRING {
- $$ = calloc(1, sizeof(struct node_host));
- if ($$ == NULL) {
- free($2);
- err(1, "host: calloc");
- }
- $$->addr.type = PF_ADDR_RTLABEL;
- if (strlcpy($$->addr.v.rtlabelname, $2,
- sizeof($$->addr.v.rtlabelname)) >=
- sizeof($$->addr.v.rtlabelname)) {
- yyerror("route label too long, max %u chars",
- sizeof($$->addr.v.rtlabelname) - 1);
- free($2);
- free($$);
- YYERROR;
- }
- $$->next = NULL;
- $$->tail = $$;
- free($2);
- }
;
number : NUMBER
@@ -3463,8 +3609,8 @@ tos : STRING {
else if ($1[0] == '0' && $1[1] == 'x')
$$ = strtoul($1, NULL, 16);
else
- $$ = 0; /* flag bad argument */
- if (!$$ || $$ > 255) {
+ $$ = 256; /* flag bad argument */
+ if ($$ < 0 || $$ > 255) {
yyerror("illegal tos value %s", $1);
free($1);
YYERROR;
@@ -3473,7 +3619,7 @@ tos : STRING {
}
| NUMBER {
$$ = $1;
- if (!$$ || $$ > 255) {
+ if ($$ < 0 || $$ > 255) {
yyerror("illegal tos value %s", $1);
YYERROR;
}
@@ -3649,14 +3795,6 @@ state_opt_item : MAXIMUM NUMBER {
$$->next = NULL;
$$->tail = $$;
}
- | PFLOW {
- $$ = calloc(1, sizeof(struct node_state_opt));
- if ($$ == NULL)
- err(1, "state_opt_item: calloc");
- $$->type = PF_STATE_OPT_PFLOW;
- $$->next = NULL;
- $$->tail = $$;
- }
| STRING NUMBER {
int i;
@@ -4338,6 +4476,16 @@ timeout_spec : STRING NUMBER
}
free($1);
}
+ | INTERVAL NUMBER {
+ if (check_rulestate(PFCTL_STATE_OPTION))
+ YYERROR;
+ if ($2 < 0 || $2 > UINT_MAX) {
+ yyerror("only positive values permitted");
+ YYERROR;
+ }
+ if (pfctl_set_timeout(pf, "interval", $2, 0) != 0)
+ YYERROR;
+ }
;
timeout_list : timeout_list comma timeout_spec optnl
@@ -4394,6 +4542,11 @@ unaryop : '=' { $$ = PF_OP_EQ; }
;
%%
+#ifdef __rtems__
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE pfctlyval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE pfctlylval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static YYSTACKDATA yystack);
+#endif /* __rtems__ */
int
yyerror(const char *fmt, ...)
@@ -4822,7 +4975,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces,
if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) {
FREE_LIST(struct node_if, interfaces);
- FREE_LIST(struct node_queue, nqueues);
+ if (nqueues)
+ FREE_LIST(struct node_queue, nqueues);
return (0);
}
@@ -4913,7 +5067,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces,
}
);
FREE_LIST(struct node_if, interfaces);
- FREE_LIST(struct node_queue, nqueues);
+ if (nqueues)
+ FREE_LIST(struct node_queue, nqueues);
return (errs);
}
@@ -5316,8 +5471,10 @@ lookup(char *s)
{ "bitmask", BITMASK},
{ "block", BLOCK},
{ "block-policy", BLOCKPOLICY},
+ { "buckets", BUCKETS},
{ "cbq", CBQ},
{ "code", CODE},
+ { "codelq", CODEL},
{ "crop", FRAGCROP},
{ "debug", DEBUG},
{ "divert-reply", DIVERTREPLY},
@@ -5325,6 +5482,7 @@ lookup(char *s)
{ "drop", DROP},
{ "drop-ovl", FRAGDROP},
{ "dup-to", DUPTO},
+ { "fairq", FAIRQ},
{ "fastroute", FASTROUTE},
{ "file", FILENAME},
{ "fingerprints", FINGERPRINTS},
@@ -5337,6 +5495,7 @@ lookup(char *s)
{ "global", GLOBAL},
{ "group", GROUP},
{ "hfsc", HFSC},
+ { "hogs", HOGS},
{ "hostid", HOSTID},
{ "icmp-type", ICMPTYPE},
{ "icmp6-type", ICMP6TYPE},
@@ -5345,6 +5504,7 @@ lookup(char *s)
{ "include", INCLUDE},
{ "inet", INET},
{ "inet6", INET6},
+ { "interval", INTERVAL},
{ "keep", KEEP},
{ "label", LABEL},
{ "limit", LIMIT},
@@ -5372,8 +5532,8 @@ lookup(char *s)
{ "out", OUT},
{ "overload", OVERLOAD},
{ "pass", PASS},
- { "pflow", PFLOW},
{ "port", PORT},
+ { "prio", PRIO},
{ "priority", PRIORITY},
{ "priq", PRIQ},
{ "probability", PROBABILITY},
@@ -5415,6 +5575,7 @@ lookup(char *s)
{ "table", TABLE},
{ "tag", TAG},
{ "tagged", TAGGED},
+ { "target", TARGET},
{ "tbrsize", TBRSIZE},
{ "timeout", TIMEOUT},
{ "to", TO},
@@ -5442,17 +5603,10 @@ lookup(char *s)
#define MAXPUSHBACK 128
-#ifndef __rtems__
-char *parsebuf;
-int parseindex;
-char pushback_buffer[MAXPUSHBACK];
-int pushback_index = 0;
-#else /* __rtems__ */
static char *parsebuf;
static int parseindex;
static char pushback_buffer[MAXPUSHBACK];
static int pushback_index = 0;
-#endif /* __rtems__ */
int
lgetc(int quotec)
@@ -6127,13 +6281,10 @@ rt_tableid_max(void)
/*
* As the OpenBSD code only compares > and not >= we need to adjust
* here given we only accept values of 0..n and want to avoid #ifdefs
- * in the grammer.
+ * in the grammar.
*/
return (fibs - 1);
#else
return (RT_TABLEID_MAX);
#endif
}
-#ifdef __rtems__
-#include "parse-data.h"
-#endif /* __rtems__ */
diff --git a/freebsd/contrib/pf/pfctl/pf_print_state.c b/freebsd/sbin/pfctl/pf_print_state.c
index 237b3dea..1e09a01f 100644
--- a/freebsd/contrib/pf/pfctl/pf_print_state.c
+++ b/freebsd/sbin/pfctl/pf_print_state.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pf_print_state.c,v 1.52 2008/08/12 16:40:18 david Exp $ */
/*
@@ -32,18 +36,15 @@
*
*/
-#ifdef __rtems__
-#include <machine/rtems-bsd-program.h>
-#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/types.h>
#include <sys/socket.h>
-#ifdef __FreeBSD__
#include <sys/endian.h>
-#define betoh64 be64toh
-#endif
#include <net/if.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
@@ -51,11 +52,15 @@ __FBSDID("$FreeBSD$");
#include <arpa/inet.h>
#include <netdb.h>
+#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pf_print_state-data.h"
+#endif /* __rtems__ */
void print_name(struct pf_addr *, sa_family_t);
@@ -124,9 +129,6 @@ print_addr(struct pf_addr_wrap *addr, sa_family_t af, int verbose)
case PF_ADDR_URPFFAILED:
printf("urpf-failed");
return;
- case PF_ADDR_RTLABEL:
- printf("route \"%s\"", addr->v.rtlabelname);
- return;
default:
printf("?");
return;
@@ -218,22 +220,30 @@ void
print_state(struct pfsync_state *s, int opts)
{
struct pfsync_state_peer *src, *dst;
- struct pfsync_state_key *sk, *nk;
+ struct pfsync_state_key *key, *sk, *nk;
struct protoent *p;
int min, sec;
+#ifndef __NO_STRICT_ALIGNMENT
+ struct pfsync_state_key aligned_key[2];
+
+ bcopy(&s->key, aligned_key, sizeof(aligned_key));
+ key = aligned_key;
+#else
+ key = s->key;
+#endif
if (s->direction == PF_OUT) {
src = &s->src;
dst = &s->dst;
- sk = &s->key[PF_SK_STACK];
- nk = &s->key[PF_SK_WIRE];
+ sk = &key[PF_SK_STACK];
+ nk = &key[PF_SK_WIRE];
if (s->proto == IPPROTO_ICMP || s->proto == IPPROTO_ICMPV6)
sk->port[0] = nk->port[0];
} else {
src = &s->dst;
dst = &s->src;
- sk = &s->key[PF_SK_WIRE];
- nk = &s->key[PF_SK_STACK];
+ sk = &key[PF_SK_WIRE];
+ nk = &key[PF_SK_STACK];
if (s->proto == IPPROTO_ICMP || s->proto == IPPROTO_ICMPV6)
sk->port[1] = nk->port[1];
}
@@ -295,8 +305,13 @@ print_state(struct pfsync_state *s, int opts)
const char *states[] = PFUDPS_NAMES;
printf(" %s:%s\n", states[src->state], states[dst->state]);
+#ifndef INET6
} else if (s->proto != IPPROTO_ICMP && src->state < PFOTHERS_NSTATES &&
dst->state < PFOTHERS_NSTATES) {
+#else
+ } else if (s->proto != IPPROTO_ICMP && s->proto != IPPROTO_ICMPV6 &&
+ src->state < PFOTHERS_NSTATES && dst->state < PFOTHERS_NSTATES) {
+#endif
/* XXX ICMP doesn't really have state levels */
const char *states[] = PFOTHERS_NAMES;
@@ -326,26 +341,17 @@ print_state(struct pfsync_state *s, int opts)
bcopy(s->packets[1], &packets[1], sizeof(u_int64_t));
bcopy(s->bytes[0], &bytes[0], sizeof(u_int64_t));
bcopy(s->bytes[1], &bytes[1], sizeof(u_int64_t));
- printf(", %llu:%llu pkts, %llu:%llu bytes",
-#ifdef __FreeBSD__
- (unsigned long long)betoh64(packets[0]),
- (unsigned long long)betoh64(packets[1]),
- (unsigned long long)betoh64(bytes[0]),
- (unsigned long long)betoh64(bytes[1]));
-#else
- betoh64(packets[0]),
- betoh64(packets[1]),
- betoh64(bytes[0]),
- betoh64(bytes[1]));
-#endif
+ printf(", %ju:%ju pkts, %ju:%ju bytes",
+ (uintmax_t )be64toh(packets[0]),
+ (uintmax_t )be64toh(packets[1]),
+ (uintmax_t )be64toh(bytes[0]),
+ (uintmax_t )be64toh(bytes[1]));
if (ntohl(s->anchor) != -1)
printf(", anchor %u", ntohl(s->anchor));
if (ntohl(s->rule) != -1)
printf(", rule %u", ntohl(s->rule));
if (s->state_flags & PFSTATE_SLOPPY)
printf(", sloppy");
- if (s->state_flags & PFSTATE_PFLOW)
- printf(", pflow");
if (s->sync_flags & PFSYNC_FLAG_SRCNODE)
printf(", source-track");
if (s->sync_flags & PFSYNC_FLAG_NATSRCNODE)
@@ -356,12 +362,8 @@ print_state(struct pfsync_state *s, int opts)
u_int64_t id;
bcopy(&s->id, &id, sizeof(u_int64_t));
- printf(" id: %016llx creatorid: %08x",
-#ifdef __FreeBSD__
- (unsigned long long)betoh64(id), ntohl(s->creatorid));
-#else
- betoh64(id), ntohl(s->creatorid));
-#endif
+ printf(" id: %016jx creatorid: %08x",
+ (uintmax_t )be64toh(id), ntohl(s->creatorid));
printf("\n");
}
}
diff --git a/freebsd/contrib/pf/pfctl/pfctl.c b/freebsd/sbin/pfctl/pfctl.c
index 19bd6c26..ab597068 100644
--- a/freebsd/contrib/pf/pfctl/pfctl.c
+++ b/freebsd/sbin/pfctl/pfctl.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl.c,v 1.278 2008/08/31 20:18:17 jmc Exp $ */
/*
@@ -33,36 +37,28 @@
*
*/
-#ifdef __rtems__
-#include <machine/rtems-bsd-program.h>
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#ifdef __rtems__
#define __need_getopt_newlib
#include <getopt.h>
-
-/* We need some functions from kernel space. */
+#include <machine/rtems-bsd-program.h>
+#include <machine/rtems-bsd-commands.h>
#define pf_get_ruleset_number _bsd_pf_get_ruleset_number
#define pf_init_ruleset _bsd_pf_init_ruleset
-
-#include <rtems/linkersets.h>
-#include <machine/rtems-bsd-commands.h>
#endif /* __rtems__ */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
-
-#ifdef __FreeBSD__
#include <sys/endian.h>
-#endif
#include <net/if.h>
#include <netinet/in.h>
#include <net/pfvar.h>
#include <arpa/inet.h>
-#include <altq/altq.h>
+#include <net/altq/altq.h>
#include <sys/sysctl.h>
#include <err.h>
@@ -70,6 +66,7 @@ __FBSDID("$FreeBSD$");
#include <fcntl.h>
#include <limits.h>
#include <netdb.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -77,6 +74,9 @@ __FBSDID("$FreeBSD$");
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pfctl-data.h"
+#endif /* __rtems__ */
void usage(void);
int pfctl_enable(int, int);
@@ -99,11 +99,7 @@ int pfctl_load_limit(struct pfctl *, unsigned int, unsigned int);
int pfctl_load_timeout(struct pfctl *, unsigned int, unsigned int);
int pfctl_load_debug(struct pfctl *, unsigned int);
int pfctl_load_logif(struct pfctl *, char *);
-#ifndef __rtems__
-int pfctl_load_hostid(struct pfctl *, unsigned int);
-#else /* __rtems__ */
int pfctl_load_hostid(struct pfctl *, u_int32_t);
-#endif /* __rtems__ */
int pfctl_get_pool(int, struct pf_pool *, u_int32_t, u_int32_t, int,
char *);
void pfctl_print_rule_counters(struct pf_rule *, int);
@@ -121,31 +117,8 @@ int pfctl_ruleset_trans(struct pfctl *, char *, struct pf_anchor *);
int pfctl_load_ruleset(struct pfctl *, char *,
struct pf_ruleset *, int, int);
int pfctl_load_rule(struct pfctl *, char *, struct pf_rule *, int);
-#ifndef __rtems__
-const char *pfctl_lookup_option(char *, const char **);
-#else /* __rtems__ */
const char *pfctl_lookup_option(char *, const char * const *);
-#endif /* __rtems__ */
-#ifndef __rtems__
-struct pf_anchor_global pf_anchors;
-struct pf_anchor pf_main_anchor;
-
-const char *clearopt;
-char *rulesopt;
-const char *showopt;
-const char *debugopt;
-char *anchoropt;
-const char *optiopt = NULL;
-char *pf_device = "/dev/pf";
-char *ifaceopt;
-char *tableopt;
-const char *tblcmdopt;
-int src_node_killers;
-char *src_node_kill[2];
-int state_killers;
-char *state_kill[2];
-#else /* __rtems__ */
static struct pf_anchor_global pf_anchors;
static struct pf_anchor pf_main_anchor;
@@ -163,18 +136,12 @@ static int src_node_killers;
static char *src_node_kill[2];
static int state_killers;
static char *state_kill[2];
-#endif /* __rtems__ */
-int loadopt;
-int altqsupport;
+int loadopt;
+int altqsupport;
-int dev = -1;
-#ifndef __rtems__
-int first_title = 1;
-int labels = 0;
-#else /* __rtems__ */
-static int first_title = 1;
-static int labels = 0;
-#endif /* __rtems__ */
+int dev = -1;
+static int first_title = 1;
+static int labels = 0;
#define INDENT(d, o) do { \
if (o) { \
@@ -192,7 +159,6 @@ static const struct {
{ "states", PF_LIMIT_STATES },
{ "src-nodes", PF_LIMIT_SRC_NODES },
{ "frags", PF_LIMIT_FRAGS },
- { "tables", PF_LIMIT_TABLES },
{ "table-entries", PF_LIMIT_TABLE_ENTRIES },
{ NULL, 0 }
};
@@ -254,53 +220,29 @@ static const struct {
{ NULL, NULL }
};
-#ifndef __rtems__
-static const char *clearopt_list[] = {
-#else /* __rtems__ */
static const char * const clearopt_list[] = {
-#endif /* __rtems__ */
"nat", "queue", "rules", "Sources",
"states", "info", "Tables", "osfp", "all", NULL
};
-#ifndef __rtems__
-static const char *showopt_list[] = {
-#else /* __rtems__ */
static const char * const showopt_list[] = {
-#endif /* __rtems__ */
"nat", "queue", "rules", "Anchors", "Sources", "states", "info",
"Interfaces", "labels", "timeouts", "memory", "Tables", "osfp",
"all", NULL
};
-#ifndef __rtems__
-static const char *tblcmdopt_list[] = {
-#else /* __rtems__ */
static const char * const tblcmdopt_list[] = {
-#endif /* __rtems__ */
"kill", "flush", "add", "delete", "load", "replace", "show",
"test", "zero", "expire", NULL
};
-#ifndef __rtems__
-static const char *debugopt_list[] = {
-#else /* __rtems__ */
static const char * const debugopt_list[] = {
-#endif /* __rtems__ */
"none", "urgent", "misc", "loud", NULL
};
-#ifndef __rtems__
-static const char *optiopt_list[] = {
-#else /* __rtems__ */
static const char * const optiopt_list[] = {
-#endif /* __rtems__ */
"none", "basic", "profile", NULL
};
-#ifdef __rtems__
-
-static const int nattype[3] = { PF_NAT, PF_RDR, PF_BINAT };
-#endif /* __rtems__ */
void
usage(void)
@@ -311,13 +253,13 @@ usage(void)
#define __progname "pfctl"
#endif /* __rtems__ */
- fprintf(stderr, "usage: %s [-AdeghmNnOPqRrvz] ", __progname);
- fprintf(stderr, "[-a anchor] [-D macro=value] [-F modifier]\n");
- fprintf(stderr, "\t[-f file] [-i interface] [-K host | network]\n");
- fprintf(stderr, "\t[-k host | network | label | id] ");
- fprintf(stderr, "[-o level] [-p device]\n");
- fprintf(stderr, "\t[-s modifier] ");
- fprintf(stderr, "[-t table -T command [address ...]] [-x level]\n");
+ fprintf(stderr,
+"usage: %s [-AdeghmNnOPqRrvz] [-a anchor] [-D macro=value] [-F modifier]\n"
+ "\t[-f file] [-i interface] [-K host | network]\n"
+ "\t[-k host | network | label | id] [-o level] [-p device]\n"
+ "\t[-s modifier] [-t table -T command [address ...]] [-x level]\n",
+ __progname);
+
exit(1);
}
@@ -327,10 +269,8 @@ pfctl_enable(int dev, int opts)
if (ioctl(dev, DIOCSTART)) {
if (errno == EEXIST)
errx(1, "pf already enabled");
-#ifdef __FreeBSD__
else if (errno == ESRCH)
errx(1, "pfil registeration failed");
-#endif
else
err(1, "DIOCSTART");
}
@@ -873,17 +813,17 @@ pfctl_print_rule_counters(struct pf_rule *rule, int opts)
}
if (opts & PF_OPT_VERBOSE) {
printf(" [ Evaluations: %-8llu Packets: %-8llu "
- "Bytes: %-10llu States: %-6u]\n",
+ "Bytes: %-10llu States: %-6ju]\n",
(unsigned long long)rule->evaluations,
(unsigned long long)(rule->packets[0] +
rule->packets[1]),
(unsigned long long)(rule->bytes[0] +
- rule->bytes[1]), rule->states_cur);
+ rule->bytes[1]), (uintmax_t)rule->u_states_cur);
if (!(opts & PF_OPT_DEBUG))
printf(" [ Inserted: uid %u pid %u "
- "State Creations: %-6u]\n",
+ "State Creations: %-6ju]\n",
(unsigned)rule->cuid, (unsigned)rule->cpid,
- rule->states_tot);
+ (uintmax_t)rule->u_states_tot);
}
}
@@ -985,7 +925,7 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format,
case PFCTL_SHOW_LABELS:
if (pr.rule.label[0]) {
printf("%s %llu %llu %llu %llu"
- " %llu %llu %llu %llu\n",
+ " %llu %llu %llu %ju\n",
pr.rule.label,
(unsigned long long)pr.rule.evaluations,
(unsigned long long)(pr.rule.packets[0] +
@@ -996,7 +936,7 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format,
(unsigned long long)pr.rule.bytes[0],
(unsigned long long)pr.rule.packets[1],
(unsigned long long)pr.rule.bytes[1],
- (unsigned long long)pr.rule.states_tot);
+ (uintmax_t)pr.rule.u_states_tot);
}
break;
case PFCTL_SHOW_RULES:
@@ -1050,6 +990,8 @@ pfctl_show_nat(int dev, int opts, char *anchorname)
u_int32_t mnr, nr;
#ifndef __rtems__
static int nattype[3] = { PF_NAT, PF_RDR, PF_BINAT };
+#else /* __rtems__ */
+ static const int nattype[3] = { PF_NAT, PF_RDR, PF_BINAT };
#endif /* __rtems__ */
int i, dotitle = opts & PF_OPT_SHOWALL;
@@ -1631,9 +1573,6 @@ pfctl_fopen(const char *name, const char *mode)
void
pfctl_init_options(struct pfctl *pf)
{
- int64_t mem;
- int mib[2];
- size_t size;
pf->timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
pf->timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
@@ -1659,23 +1598,8 @@ pfctl_init_options(struct pfctl *pf)
pf->limit[PF_LIMIT_STATES] = PFSTATE_HIWAT;
pf->limit[PF_LIMIT_FRAGS] = PFFRAG_FRENT_HIWAT;
pf->limit[PF_LIMIT_SRC_NODES] = PFSNODE_HIWAT;
- pf->limit[PF_LIMIT_TABLES] = PFR_KTABLE_HIWAT;
pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT;
-#ifndef __rtems__
- mib[0] = CTL_HW;
-#ifdef __FreeBSD__
- mib[1] = HW_PHYSMEM;
-#else
- mib[1] = HW_PHYSMEM64;
-#endif
- size = sizeof(mem);
- if (sysctl(mib, 2, &mem, &size, NULL, 0) == -1)
- err(1, "sysctl");
- if (mem <= 100*1024*1024)
-#endif /* __rtems__ */
- pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT_SMALL;
-
pf->debug = PF_DEBUG_URGENT;
}
@@ -1941,6 +1865,7 @@ pfctl_set_debug(struct pfctl *pf, char *d)
}
pf->debug_set = 1;
+ level = pf->debug;
if ((pf->opts & PF_OPT_NOACTION) == 0)
if (ioctl(dev, DIOCSETDEBUG, &level))
@@ -2025,7 +1950,7 @@ pfctl_test_altqsupport(int dev, int opts)
if (ioctl(dev, DIOCGETALTQS, &pa)) {
if (errno == ENODEV) {
- if (!(opts & PF_OPT_QUIET))
+ if (opts & PF_OPT_VERBOSE)
fprintf(stderr, "No ALTQ support in kernel\n"
"ALTQ related functions disabled\n");
return (0);
@@ -2075,11 +2000,7 @@ pfctl_show_anchors(int dev, int opts, char *anchorname)
}
const char *
-#ifndef __rtems__
-pfctl_lookup_option(char *cmd, const char **list)
-#else /* __rtems__ */
pfctl_lookup_option(char *cmd, const char * const *list)
-#endif /* __rtems__ */
{
if (cmd != NULL && *cmd)
for (; *list; list++)
@@ -2097,7 +2018,7 @@ int
rtems_bsd_command_pfctl(int argc, char *argv[])
{
int exit_code;
- const void *data_begin;
+ void *data_begin;
size_t data_size;
data_begin = RTEMS_LINKER_SET_BEGIN(bsd_prog_pfctl);
@@ -2110,7 +2031,6 @@ rtems_bsd_command_pfctl(int argc, char *argv[])
return exit_code;
}
-
#endif /* __rtems__ */
int
main(int argc, char *argv[])
@@ -2122,7 +2042,6 @@ main(int argc, char *argv[])
int optimize = PF_OPTIMIZE_BASIC;
char anchorname[MAXPATHLEN];
char *path;
-
#ifdef __rtems__
struct getopt_data getopt_data;
memset(&getopt_data, 0, sizeof(getopt_data));
@@ -2320,7 +2239,7 @@ main(int argc, char *argv[])
/* turn off options */
opts &= ~ (PF_OPT_DISABLE | PF_OPT_ENABLE);
clearopt = showopt = debugopt = NULL;
-#if defined(__FreeBSD__) && !defined(ENABLE_ALTQ)
+#if !defined(ENABLE_ALTQ)
altqsupport = 0;
#else
altqsupport = 1;
@@ -2524,6 +2443,3 @@ main(int argc, char *argv[])
exit(error);
}
-#ifdef __rtems__
-#include "pfctl-data.h"
-#endif /* __rtems__ */
diff --git a/freebsd/contrib/pf/pfctl/pfctl.h b/freebsd/sbin/pfctl/pfctl.h
index 2c69bc20..2c69bc20 100644
--- a/freebsd/contrib/pf/pfctl/pfctl.h
+++ b/freebsd/sbin/pfctl/pfctl.h
diff --git a/freebsd/contrib/pf/pfctl/pfctl_altq.c b/freebsd/sbin/pfctl/pfctl_altq.c
index a5dea13f..145d60ae 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_altq.c
+++ b/freebsd/sbin/pfctl/pfctl_altq.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl_altq.c,v 1.93 2007/10/15 02:16:35 deraadt Exp $ */
/*
@@ -20,12 +24,12 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#ifdef __rtems__
-#include <machine/rtems-bsd-program.h>
-#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -43,23 +47,23 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
-#include <altq/altq.h>
-#include <altq/altq_cbq.h>
-#include <altq/altq_priq.h>
-#include <altq/altq_hfsc.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_priq.h>
+#include <net/altq/altq_hfsc.h>
+#include <net/altq/altq_fairq.h>
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pfctl_altq-data.h"
+#endif /* __rtems__ */
#define is_sc_null(sc) (((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
-#ifndef __rtems__
-TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
-LIST_HEAD(gen_sc, segment) rtsc, lssc;
-#else /* __rtems__ */
static TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
static LIST_HEAD(gen_sc, segment) rtsc, lssc;
-#endif /* __rtems__ */
struct pf_altq *qname_to_pfaltq(const char *, const char *);
u_int32_t qname_to_qid(const char *);
@@ -69,6 +73,9 @@ static int cbq_compute_idletime(struct pfctl *, struct pf_altq *);
static int check_commit_cbq(int, int, struct pf_altq *);
static int print_cbq_opts(const struct pf_altq *);
+static int print_codel_opts(const struct pf_altq *,
+ const struct node_queue_opt *);
+
static int eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
static int check_commit_priq(int, int, struct pf_altq *);
static int print_priq_opts(const struct pf_altq *);
@@ -78,6 +85,11 @@ static int check_commit_hfsc(int, int, struct pf_altq *);
static int print_hfsc_opts(const struct pf_altq *,
const struct node_queue_opt *);
+static int eval_pfqueue_fairq(struct pfctl *, struct pf_altq *);
+static int print_fairq_opts(const struct pf_altq *,
+ const struct node_queue_opt *);
+static int check_commit_fairq(int, int, struct pf_altq *);
+
static void gsc_add_sc(struct gen_sc *, struct service_curve *);
static int is_gsc_under_sc(struct gen_sc *,
struct service_curve *);
@@ -98,6 +110,8 @@ int eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
u_int32_t eval_bwspec(struct node_queue_bw *, u_int32_t);
void print_hfsc_sc(const char *, u_int, u_int, u_int,
const struct node_hfsc_sc *);
+void print_fairq_sc(const char *, u_int, u_int, u_int,
+ const struct node_fairq_sc *);
void
pfaltq_store(struct pf_altq *a)
@@ -183,6 +197,14 @@ print_altq(const struct pf_altq *a, unsigned int level,
if (!print_hfsc_opts(a, qopts))
printf("hfsc ");
break;
+ case ALTQT_FAIRQ:
+ if (!print_fairq_opts(a, qopts))
+ printf("fairq ");
+ break;
+ case ALTQT_CODEL:
+ if (!print_codel_opts(a, qopts))
+ printf("codel ");
+ break;
}
if (bw != NULL && bw->bw_percent > 0) {
@@ -213,7 +235,8 @@ print_queue(const struct pf_altq *a, unsigned int level,
printf("%s ", a->qname);
if (print_interface)
printf("on %s ", a->ifname);
- if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
+ if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC ||
+ a->scheduler == ALTQT_FAIRQ) {
if (bw != NULL && bw->bw_percent > 0) {
if (bw->bw_percent < 100)
printf("bandwidth %u%% ", bw->bw_percent);
@@ -234,6 +257,9 @@ print_queue(const struct pf_altq *a, unsigned int level,
case ALTQT_HFSC:
print_hfsc_opts(a, qopts);
break;
+ case ALTQT_FAIRQ:
+ print_fairq_opts(a, qopts);
+ break;
}
}
@@ -304,6 +330,9 @@ check_commit_altq(int dev, int opts)
case ALTQT_HFSC:
error = check_commit_hfsc(dev, opts, altq);
break;
+ case ALTQT_FAIRQ:
+ error = check_commit_fairq(dev, opts, altq);
+ break;
default:
break;
}
@@ -352,7 +381,8 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
if (pa->qlimit == 0)
pa->qlimit = DEFAULT_QLIMIT;
- if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
+ if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC ||
+ pa->scheduler == ALTQT_FAIRQ) {
pa->bandwidth = eval_bwspec(bw,
parent == NULL ? 0 : parent->bandwidth);
@@ -398,6 +428,9 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
case ALTQT_HFSC:
error = eval_pfqueue_hfsc(pf, pa);
break;
+ case ALTQT_FAIRQ:
+ error = eval_pfqueue_fairq(pf, pa);
+ break;
default:
break;
}
@@ -578,6 +611,8 @@ print_cbq_opts(const struct pf_altq *a)
printf(" ecn");
if (opts->flags & CBQCLF_RIO)
printf(" rio");
+ if (opts->flags & CBQCLF_CODEL)
+ printf(" codel");
if (opts->flags & CBQCLF_CLEARDSCP)
printf(" cleardscp");
if (opts->flags & CBQCLF_FLOWVALVE)
@@ -665,6 +700,8 @@ print_priq_opts(const struct pf_altq *a)
printf(" ecn");
if (opts->flags & PRCF_RIO)
printf(" rio");
+ if (opts->flags & PRCF_CODEL)
+ printf(" codel");
if (opts->flags & PRCF_CLEARDSCP)
printf(" cleardscp");
if (opts->flags & PRCF_DEFAULTCLASS)
@@ -817,6 +854,85 @@ err_ret:
return (-1);
}
+/*
+ * FAIRQ support functions
+ */
+static int
+eval_pfqueue_fairq(struct pfctl *pf __unused, struct pf_altq *pa)
+{
+ struct pf_altq *altq, *parent;
+ struct fairq_opts *opts;
+ struct service_curve sc;
+
+ opts = &pa->pq_u.fairq_opts;
+
+ if (pa->parent[0] == 0) {
+ /* root queue */
+ opts->lssc_m1 = pa->ifbandwidth;
+ opts->lssc_m2 = pa->ifbandwidth;
+ opts->lssc_d = 0;
+ return (0);
+ }
+
+ LIST_INIT(&lssc);
+
+ /* if link_share is not specified, use bandwidth */
+ if (opts->lssc_m2 == 0)
+ opts->lssc_m2 = pa->bandwidth;
+
+ /*
+ * admission control:
+ * for the real-time service curve, the sum of the service curves
+ * should not exceed 80% of the interface bandwidth. 20% is reserved
+ * not to over-commit the actual interface bandwidth.
+ * for the link-sharing service curve, the sum of the child service
+ * curve should not exceed the parent service curve.
+ * for the upper-limit service curve, the assigned bandwidth should
+ * be smaller than the interface bandwidth, and the upper-limit should
+ * be larger than the real-time service curve when both are defined.
+ */
+ parent = qname_to_pfaltq(pa->parent, pa->ifname);
+ if (parent == NULL)
+ errx(1, "parent %s not found for %s", pa->parent, pa->qname);
+
+ TAILQ_FOREACH(altq, &altqs, entries) {
+ if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
+ continue;
+ if (altq->qname[0] == 0) /* this is for interface */
+ continue;
+
+ if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
+ continue;
+
+ /* if the class has a link-sharing service curve, add it. */
+ if (opts->lssc_m2 != 0 && altq->pq_u.fairq_opts.lssc_m2 != 0) {
+ sc.m1 = altq->pq_u.fairq_opts.lssc_m1;
+ sc.d = altq->pq_u.fairq_opts.lssc_d;
+ sc.m2 = altq->pq_u.fairq_opts.lssc_m2;
+ gsc_add_sc(&lssc, &sc);
+ }
+ }
+
+ /* check the link-sharing service curve. */
+ if (opts->lssc_m2 != 0) {
+ sc.m1 = parent->pq_u.fairq_opts.lssc_m1;
+ sc.d = parent->pq_u.fairq_opts.lssc_d;
+ sc.m2 = parent->pq_u.fairq_opts.lssc_m2;
+ if (!is_gsc_under_sc(&lssc, &sc)) {
+ warnx("link-sharing sc exceeds parent's sc");
+ goto err_ret;
+ }
+ }
+
+ gsc_destroy(&lssc);
+
+ return (0);
+
+err_ret:
+ gsc_destroy(&lssc);
+ return (-1);
+}
+
static int
check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
{
@@ -857,6 +973,43 @@ check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
}
static int
+check_commit_fairq(int dev __unused, int opts __unused, struct pf_altq *pa)
+{
+ struct pf_altq *altq, *def = NULL;
+ int default_class;
+ int error = 0;
+
+ /* check if fairq has one default queue for this interface */
+ default_class = 0;
+ TAILQ_FOREACH(altq, &altqs, entries) {
+ if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
+ continue;
+ if (altq->qname[0] == 0) /* this is for interface */
+ continue;
+ if (altq->pq_u.fairq_opts.flags & FARF_DEFAULTCLASS) {
+ default_class++;
+ def = altq;
+ }
+ }
+ if (default_class != 1) {
+ warnx("should have one default queue on %s", pa->ifname);
+ return (1);
+ }
+ /* make sure the default queue is a leaf */
+ TAILQ_FOREACH(altq, &altqs, entries) {
+ if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
+ continue;
+ if (altq->qname[0] == 0) /* this is for interface */
+ continue;
+ if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
+ warnx("default queue is not a leaf");
+ error++;
+ }
+ }
+ return (error);
+}
+
+static int
print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
{
const struct hfsc_opts *opts;
@@ -881,6 +1034,8 @@ print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
printf(" ecn");
if (opts->flags & HFCF_RIO)
printf(" rio");
+ if (opts->flags & HFCF_CODEL)
+ printf(" codel");
if (opts->flags & HFCF_CLEARDSCP)
printf(" cleardscp");
if (opts->flags & HFCF_DEFAULTCLASS)
@@ -902,6 +1057,67 @@ print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
return (0);
}
+static int
+print_codel_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
+{
+ const struct codel_opts *opts;
+
+ opts = &a->pq_u.codel_opts;
+ if (opts->target || opts->interval || opts->ecn) {
+ printf("codel(");
+ if (opts->target)
+ printf(" target %d", opts->target);
+ if (opts->interval)
+ printf(" interval %d", opts->interval);
+ if (opts->ecn)
+ printf("ecn");
+ printf(" ) ");
+
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+print_fairq_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
+{
+ const struct fairq_opts *opts;
+ const struct node_fairq_sc *loc_lssc;
+
+ opts = &a->pq_u.fairq_opts;
+ if (qopts == NULL)
+ loc_lssc = NULL;
+ else
+ loc_lssc = &qopts->data.fairq_opts.linkshare;
+
+ if (opts->flags ||
+ (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
+ opts->lssc_d != 0))) {
+ printf("fairq(");
+ if (opts->flags & FARF_RED)
+ printf(" red");
+ if (opts->flags & FARF_ECN)
+ printf(" ecn");
+ if (opts->flags & FARF_RIO)
+ printf(" rio");
+ if (opts->flags & FARF_CODEL)
+ printf(" codel");
+ if (opts->flags & FARF_CLEARDSCP)
+ printf(" cleardscp");
+ if (opts->flags & FARF_DEFAULTCLASS)
+ printf(" default");
+ if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
+ opts->lssc_d != 0))
+ print_fairq_sc("linkshare", opts->lssc_m1, opts->lssc_d,
+ opts->lssc_m2, loc_lssc);
+ printf(" ) ");
+
+ return (1);
+ } else
+ return (0);
+}
+
/*
* admission control using generalized service curve
*/
@@ -1083,8 +1299,7 @@ sc_x2y(struct service_curve *sc, double x)
#ifdef __rtems__
static char r2sbuf[R2S_BUFS][RATESTR_MAX]; /* ring bufer */
-static int r2sidx = 0;
-
+static int idx = 0;
#endif /* __rtems__ */
char *
rate2str(double rate)
@@ -1097,15 +1312,9 @@ rate2str(double rate)
int i;
static const char unit[] = " KMG";
-#ifndef __rtems__
buf = r2sbuf[idx++];
if (idx == R2S_BUFS)
idx = 0;
-#else /* __rtems__ */
- buf = r2sbuf[r2sidx++];
- if (r2sidx == R2S_BUFS)
- r2sidx = 0;
-#endif /* __rtems__ */
for (i = 0; rate >= 1000 && i <= 3; i++)
rate /= 1000;
@@ -1145,7 +1354,7 @@ getifspeed(char *ifname)
struct ifreq ifr;
struct if_data ifrdat;
- if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
+ if ((s = socket(get_socket_domain(), SOCK_DGRAM, 0)) < 0)
err(1, "socket");
bzero(&ifr, sizeof(ifr));
if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
@@ -1166,7 +1375,7 @@ getifmtu(char *ifname)
int s;
struct ifreq ifr;
- if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
+ if ((s = socket(get_socket_domain(), SOCK_DGRAM, 0)) < 0)
err(1, "socket");
bzero(&ifr, sizeof(ifr));
if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
@@ -1234,6 +1443,28 @@ eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
opts->data.hfsc_opts.upperlimit.d;
}
break;
+ case ALTQT_FAIRQ:
+ pa->pq_u.fairq_opts.flags = opts->data.fairq_opts.flags;
+ pa->pq_u.fairq_opts.nbuckets = opts->data.fairq_opts.nbuckets;
+ pa->pq_u.fairq_opts.hogs_m1 =
+ eval_bwspec(&opts->data.fairq_opts.hogs_bw, ref_bw);
+
+ if (opts->data.fairq_opts.linkshare.used) {
+ pa->pq_u.fairq_opts.lssc_m1 =
+ eval_bwspec(&opts->data.fairq_opts.linkshare.m1,
+ ref_bw);
+ pa->pq_u.fairq_opts.lssc_m2 =
+ eval_bwspec(&opts->data.fairq_opts.linkshare.m2,
+ ref_bw);
+ pa->pq_u.fairq_opts.lssc_d =
+ opts->data.fairq_opts.linkshare.d;
+ }
+ break;
+ case ALTQT_CODEL:
+ pa->pq_u.codel_opts.target = opts->data.codel_opts.target;
+ pa->pq_u.codel_opts.interval = opts->data.codel_opts.interval;
+ pa->pq_u.codel_opts.ecn = opts->data.codel_opts.ecn;
+ break;
default:
warnx("eval_queue_opts: unknown scheduler type %u",
opts->qtype);
@@ -1279,6 +1510,27 @@ print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
if (d != 0)
printf(")");
}
-#ifdef __rtems__
-#include "pfctl_altq-data.h"
-#endif /* __rtems__ */
+
+void
+print_fairq_sc(const char *scname, u_int m1, u_int d, u_int m2,
+ const struct node_fairq_sc *sc)
+{
+ printf(" %s", scname);
+
+ if (d != 0) {
+ printf("(");
+ if (sc != NULL && sc->m1.bw_percent > 0)
+ printf("%u%%", sc->m1.bw_percent);
+ else
+ printf("%s", rate2str((double)m1));
+ printf(" %u", d);
+ }
+
+ if (sc != NULL && sc->m2.bw_percent > 0)
+ printf(" %u%%", sc->m2.bw_percent);
+ else
+ printf(" %s", rate2str((double)m2));
+
+ if (d != 0)
+ printf(")");
+}
diff --git a/freebsd/contrib/pf/pfctl/pfctl_optimize.c b/freebsd/sbin/pfctl/pfctl_optimize.c
index b62359f3..b8f44e8b 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_optimize.c
+++ b/freebsd/sbin/pfctl/pfctl_optimize.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl_optimize.c,v 1.17 2008/05/06 03:45:21 mpf Exp $ */
/*
@@ -18,15 +22,13 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
#ifdef __rtems__
#include <machine/rtems-bsd-program.h>
-
-/* We need some functions from kernel space. */
#define pf_find_or_create_ruleset _bsd_pf_find_or_create_ruleset
#endif /* __rtems__ */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -48,6 +50,15 @@ __FBSDID("$FreeBSD$");
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+struct pf_rule_field {
+ const char *prf_name;
+ int prf_type;
+ size_t prf_offset;
+ size_t prf_size;
+};
+#include "rtems-bsd-pfctl-pfctl_optimize-data.h"
+#endif /* __rtems__ */
/* The size at which a table becomes faster than individual rules */
#define TABLE_THRESHOLD 6
@@ -99,15 +110,15 @@ enum {
DC, /* we just don't care about the field */
NEVER}; /* we should never see this field set?!? */
#ifndef __rtems__
-struct pf_rule_field {
-#else /* __rtems__ */
static struct pf_rule_field {
-#endif /* __rtems__ */
const char *prf_name;
int prf_type;
size_t prf_offset;
size_t prf_size;
} pf_rule_desc[] = {
+#else /* __rtems__ */
+static struct pf_rule_field pf_rule_desc[] = {
+#endif /* __rtems__ */
#define PF_RULE_FIELD(field, ty) \
{#field, \
ty, \
@@ -216,10 +227,6 @@ static struct pf_rule_field {
PF_RULE_FIELD(min_ttl, NEVER),
PF_RULE_FIELD(set_tos, NEVER),
};
-#ifdef __rtems__
-static int pf_opt_create_table_num;
-static int add_opt_table_num = 0;
-#endif /* __rtems__ */
@@ -258,14 +265,9 @@ int superblock_inclusive(struct superblock *, struct pf_opt_rule *);
void superblock_free(struct pfctl *, struct superblock *);
-#ifndef __rtems__
-int (*skip_comparitors[PF_SKIP_COUNT])(struct pf_rule *, struct pf_rule *);
-const char *skip_comparitors_names[PF_SKIP_COUNT];
-#else /* __rtems__ */
static int (*skip_comparitors[PF_SKIP_COUNT])(struct pf_rule *,
struct pf_rule *);
static const char *skip_comparitors_names[PF_SKIP_COUNT];
-#endif /* __rtems__ */
#define PF_SKIP_COMPARITORS { \
{ "ifp", PF_SKIP_IFP, skip_cmp_ifp }, \
{ "dir", PF_SKIP_DIR, skip_cmp_dir }, \
@@ -277,13 +279,8 @@ static const char *skip_comparitors_names[PF_SKIP_COUNT];
{ "dport", PF_SKIP_DST_PORT, skip_cmp_dst_port } \
}
-#ifndef __rtems__
-struct pfr_buffer table_buffer;
-int table_identifier;
-#else /* __rtems__ */
static struct pfr_buffer table_buffer;
static int table_identifier;
-#endif /* __rtems__ */
int
@@ -1246,6 +1243,9 @@ skip_init(void)
/*
* Add a host/netmask to a table
*/
+#ifdef __rtems__
+static int add_opt_tablenum = 0;
+#endif /* __rtems__ */
int
add_opt_table(struct pfctl *pf, struct pf_opt_tbl **tbl, sa_family_t af,
struct pf_rule_addr *addr)
@@ -1271,7 +1271,7 @@ add_opt_table(struct pfctl *pf, struct pf_opt_tbl **tbl, sa_family_t af,
#ifndef __rtems__
PF_OPT_TABLE_PREFIX, tablenum++);
#else /* __rtems__ */
- PF_OPT_TABLE_PREFIX, add_opt_table_num++);
+ PF_OPT_TABLE_PREFIX, add_opt_tablenum++);
#endif /* __rtems__ */
DEBUG("creating table <%s>", (*tbl)->pt_name);
}
@@ -1308,10 +1308,14 @@ add_opt_table(struct pfctl *pf, struct pf_opt_tbl **tbl, sa_family_t af,
return (0);
}
+
/*
* Do the dirty work of choosing an unused table name and creating it.
* (be careful with the table name, it might already be used in another anchor)
*/
+#ifdef __rtems__
+static int pf_opt_create_tablenum;
+#endif /* __rtems__ */
int
pf_opt_create_table(struct pfctl *pf, struct pf_opt_tbl *tbl)
{
@@ -1343,13 +1347,13 @@ again:
#ifndef __rtems__
PF_OPT_TABLE_PREFIX, table_identifier, tablenum);
#else /* __rtems__ */
- PF_OPT_TABLE_PREFIX, table_identifier, pf_opt_create_table_num);
+ PF_OPT_TABLE_PREFIX, table_identifier, pf_opt_create_tablenum);
#endif /* __rtems__ */
snprintf(tbl->pt_name, sizeof(tbl->pt_name), "%s%x_%d",
#ifndef __rtems__
PF_OPT_TABLE_PREFIX, table_identifier, tablenum);
#else /* __rtems__ */
- PF_OPT_TABLE_PREFIX, table_identifier, pf_opt_create_table_num);
+ PF_OPT_TABLE_PREFIX, table_identifier, pf_opt_create_tablenum);
#endif /* __rtems__ */
PFRB_FOREACH(t, &table_buffer) {
if (strcasecmp(t->pfrt_name, tbl->pt_name) == 0) {
@@ -1363,7 +1367,7 @@ again:
#ifndef __rtems__
tablenum++;
#else /* __rtems__ */
- pf_opt_create_table_num++;
+ pf_opt_create_tablenum++;
#endif /* __rtems__ */
@@ -1698,6 +1702,4 @@ superblock_free(struct pfctl *pf, struct superblock *block)
superblock_free(pf, block->sb_profiled_block);
free(block);
}
-#ifdef __rtems__
-#include "pfctl_optimize-data.h"
-#endif /* __rtems__ */
+
diff --git a/freebsd/contrib/pf/pfctl/pfctl_osfp.c b/freebsd/sbin/pfctl/pfctl_osfp.c
index cc9d86e8..dd672623 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_osfp.c
+++ b/freebsd/sbin/pfctl/pfctl_osfp.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl_osfp.c,v 1.14 2006/04/08 02:13:14 ray Exp $ */
/*
@@ -18,6 +22,9 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
#ifdef __rtems__
#include <machine/rtems-bsd-program.h>
#endif /* __rtems__ */
@@ -41,6 +48,9 @@
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pfctl_osfp-data.h"
+#endif /* __rtems__ */
#ifndef MIN
# define MIN(a,b) (((a) < (b)) ? (a) : (b))
@@ -69,15 +79,9 @@ struct name_entry {
struct name_list nm_sublist;
int nm_sublist_num;
};
-#ifndef __rtems__
-struct name_list classes = LIST_HEAD_INITIALIZER(&classes);
-int class_count;
-int fingerprint_count;
-#else /* __rtems__ */
static struct name_list classes = LIST_HEAD_INITIALIZER(&classes);
static int class_count;
static int fingerprint_count;
-#endif /* __rtems__ */
void add_fingerprint(int, int, struct pf_osfp_ioctl *);
struct name_entry *fingerprint_name_entry(struct name_list *, char *);
@@ -1019,18 +1023,7 @@ get_field(char **line, size_t *len, int *fieldlen)
const char *
print_ioctl(struct pf_osfp_ioctl *fp)
{
-#ifndef __rtems__
- static char buf[1024];
-#else /* __rtems__ */
- /* Note on RTEMS port:
- * This buffer is static. So normally it would have to be initialized to
- * zero every time the program starts. But in this special case it is
- * set to zero inside the function. Therefore it is not necessary to
- * move it. If it would be moved out of the function, the name would
- * have to be changed. This would be a lot of change in this function!
- */
static char buf[1024];
-#endif /* __rtems__ */
char tmp[32];
int i, opt;
@@ -1128,6 +1121,3 @@ print_ioctl(struct pf_osfp_ioctl *fp)
return (buf);
}
-#ifdef __rtems__
-#include "pfctl_osfp-data.h"
-#endif /* __rtems__ */
diff --git a/freebsd/contrib/pf/pfctl/pfctl_parser.c b/freebsd/sbin/pfctl/pfctl_parser.c
index 0efdaad9..0ff7deec 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_parser.c
+++ b/freebsd/sbin/pfctl/pfctl_parser.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl_parser.c,v 1.240 2008/06/10 20:55:02 mcbride Exp $ */
/*
@@ -33,19 +37,16 @@
*
*/
-#ifdef __rtems__
-#include <machine/rtems-bsd-program.h>
-#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <rtems/bsd/sys/param.h>
-#ifdef __rtems__
-#include <rtems/bsd/sys/errno.h>
-#endif /* __rtems__ */
#include <sys/proc.h>
#include <net/if.h>
#include <netinet/in.h>
@@ -69,6 +70,9 @@ __FBSDID("$FreeBSD$");
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pfctl_parser-data.h"
+#endif /* __rtems__ */
void print_op (u_int8_t, const char *, const char *);
void print_port (u_int8_t, u_int16_t, u_int16_t, const char *, int);
@@ -84,11 +88,7 @@ struct node_host *host_v4(const char *, int);
struct node_host *host_v6(const char *, int);
struct node_host *host_dns(const char *, int, int);
-#ifndef __rtems__
-const char *tcpflags = "FSRPAUEW";
-#else /* __rtems__ */
const char * const tcpflags = "FSRPAUEW";
-#endif /* __rtems__ */
static const struct icmptypeent icmp_type[] = {
{ "echoreq", ICMP_ECHO },
@@ -225,14 +225,12 @@ geticmptypebynumber(u_int8_t type, sa_family_t af)
unsigned int i;
if (af != AF_INET6) {
- for (i=0; i < (sizeof (icmp_type) / sizeof(icmp_type[0]));
- i++) {
+ for (i=0; i < nitems(icmp_type); i++) {
if (type == icmp_type[i].type)
return (&icmp_type[i]);
}
} else {
- for (i=0; i < (sizeof (icmp6_type) /
- sizeof(icmp6_type[0])); i++) {
+ for (i=0; i < nitems(icmp6_type); i++) {
if (type == icmp6_type[i].type)
return (&icmp6_type[i]);
}
@@ -246,14 +244,12 @@ geticmptypebyname(char *w, sa_family_t af)
unsigned int i;
if (af != AF_INET6) {
- for (i=0; i < (sizeof (icmp_type) / sizeof(icmp_type[0]));
- i++) {
+ for (i=0; i < nitems(icmp_type); i++) {
if (!strcmp(w, icmp_type[i].name))
return (&icmp_type[i]);
}
} else {
- for (i=0; i < (sizeof (icmp6_type) /
- sizeof(icmp6_type[0])); i++) {
+ for (i=0; i < nitems(icmp6_type); i++) {
if (!strcmp(w, icmp6_type[i].name))
return (&icmp6_type[i]);
}
@@ -267,15 +263,13 @@ geticmpcodebynumber(u_int8_t type, u_int8_t code, sa_family_t af)
unsigned int i;
if (af != AF_INET6) {
- for (i=0; i < (sizeof (icmp_code) / sizeof(icmp_code[0]));
- i++) {
+ for (i=0; i < nitems(icmp_code); i++) {
if (type == icmp_code[i].type &&
code == icmp_code[i].code)
return (&icmp_code[i]);
}
} else {
- for (i=0; i < (sizeof (icmp6_code) /
- sizeof(icmp6_code[0])); i++) {
+ for (i=0; i < nitems(icmp6_code); i++) {
if (type == icmp6_code[i].type &&
code == icmp6_code[i].code)
return (&icmp6_code[i]);
@@ -290,15 +284,13 @@ geticmpcodebyname(u_long type, char *w, sa_family_t af)
unsigned int i;
if (af != AF_INET6) {
- for (i=0; i < (sizeof (icmp_code) / sizeof(icmp_code[0]));
- i++) {
+ for (i=0; i < nitems(icmp_code); i++) {
if (type == icmp_code[i].type &&
!strcmp(w, icmp_code[i].name))
return (&icmp_code[i]);
}
} else {
- for (i=0; i < (sizeof (icmp6_code) /
- sizeof(icmp6_code[0])); i++) {
+ for (i=0; i < nitems(icmp6_code); i++) {
if (type == icmp6_code[i].type &&
!strcmp(w, icmp6_code[i].name))
return (&icmp6_code[i]);
@@ -493,17 +485,10 @@ print_pool(struct pf_pool *pool, u_int16_t p1, u_int16_t p2,
printf(" static-port");
}
-#ifndef __rtems__
-const char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES;
-const char *pf_lcounters[LCNT_MAX+1] = LCNT_NAMES;
-const char *pf_fcounters[FCNT_MAX+1] = FCNT_NAMES;
-const char *pf_scounters[FCNT_MAX+1] = FCNT_NAMES;
-#else /* __rtems__ */
const char * const pf_reasons[PFRES_MAX+1] = PFRES_NAMES;
const char * const pf_lcounters[LCNT_MAX+1] = LCNT_NAMES;
const char * const pf_fcounters[FCNT_MAX+1] = FCNT_NAMES;
const char * const pf_scounters[FCNT_MAX+1] = FCNT_NAMES;
-#endif /* __rtems__ */
void
print_status(struct pf_status *s, int opts)
@@ -868,6 +853,21 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric)
}
if (r->tos)
printf(" tos 0x%2.2x", r->tos);
+ if (r->prio)
+ printf(" prio %u", r->prio == PF_PRIO_ZERO ? 0 : r->prio);
+ if (r->scrub_flags & PFSTATE_SETMASK) {
+ char *comma = "";
+ printf(" set (");
+ if (r->scrub_flags & PFSTATE_SETPRIO) {
+ if (r->set_prio[0] == r->set_prio[1])
+ printf("%s prio %u", comma, r->set_prio[0]);
+ else
+ printf("%s prio(%u, %u)", comma, r->set_prio[0],
+ r->set_prio[1]);
+ comma = ",";
+ }
+ printf(" )");
+ }
if (!r->keep_state && r->action == PF_PASS && !anchor_call[0])
printf(" no state");
else if (r->keep_state == PF_STATE_NORMAL)
@@ -974,12 +974,6 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric)
printf("sloppy");
opts = 0;
}
- if (r->rule_flag & PFRULE_PFLOW) {
- if (!opts)
- printf(", ");
- printf("pflow");
- opts = 0;
- }
for (i = 0; i < PFTM_MAX; ++i)
if (r->timeout[i]) {
int j;
@@ -1015,12 +1009,7 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric)
if (r->rule_flag & PFRULE_REASSEMBLE_TCP)
printf(" reassemble tcp");
- if (r->rule_flag & PFRULE_FRAGDROP)
- printf(" fragment drop-ovl");
- else if (r->rule_flag & PFRULE_FRAGCROP)
- printf(" fragment crop");
- else
- printf(" fragment reassemble");
+ printf(" fragment reassemble");
}
if (r->label[0])
printf(" label \"%s\"", r->label);
@@ -1170,11 +1159,7 @@ check_netmask(struct node_host *h, sa_family_t af)
/* interface lookup routines */
-#ifndef __rtems__
-struct node_host *iftab;
-#else /* __rtems__ */
static struct node_host *iftab;
-#endif /* __rtems__ */
void
ifa_load(void)
@@ -1260,6 +1245,26 @@ ifa_load(void)
freeifaddrs(ifap);
}
+int
+get_socket_domain(void)
+{
+ int sdom;
+
+ sdom = AF_UNSPEC;
+#ifdef WITH_INET6
+ if (sdom == AF_UNSPEC && feature_present("inet6"))
+ sdom = AF_INET6;
+#endif
+#ifdef WITH_INET
+ if (sdom == AF_UNSPEC && feature_present("inet"))
+ sdom = AF_INET;
+#endif
+ if (sdom == AF_UNSPEC)
+ sdom = AF_LINK;
+
+ return (sdom);
+}
+
struct node_host *
ifa_exists(const char *ifa_name)
{
@@ -1271,7 +1276,7 @@ ifa_exists(const char *ifa_name)
ifa_load();
/* check wether this is a group */
- if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
+ if ((s = socket(get_socket_domain(), SOCK_DGRAM, 0)) == -1)
err(1, "socket");
bzero(&ifgr, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, ifa_name, sizeof(ifgr.ifgr_name));
@@ -1302,7 +1307,7 @@ ifa_grouplookup(const char *ifa_name, int flags)
int s, len;
struct node_host *n, *h = NULL;
- if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
+ if ((s = socket(get_socket_domain(), SOCK_DGRAM, 0)) == -1)
err(1, "socket");
bzero(&ifgr, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, ifa_name, sizeof(ifgr.ifgr_name));
@@ -1773,6 +1778,3 @@ pfctl_trans(int dev, struct pfr_buffer *buf, u_long cmd, int from)
trans.array = ((struct pfioc_trans_e *)buf->pfrb_caddr) + from;
return ioctl(dev, cmd, &trans);
}
-#ifdef __rtems__
-#include "pfctl_parser-data.h"
-#endif /* __rtems__ */
diff --git a/freebsd/contrib/pf/pfctl/pfctl_parser.h b/freebsd/sbin/pfctl/pfctl_parser.h
index 4560d66b..2b7fea7b 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_parser.h
+++ b/freebsd/sbin/pfctl/pfctl_parser.h
@@ -150,12 +150,28 @@ struct node_hfsc_opts {
int flags;
};
+struct node_fairq_sc {
+ struct node_queue_bw m1; /* slope of 1st segment; bps */
+ u_int d; /* x-projection of m1; msec */
+ struct node_queue_bw m2; /* slope of 2nd segment; bps */
+ u_int8_t used;
+};
+
+struct node_fairq_opts {
+ struct node_fairq_sc linkshare;
+ struct node_queue_bw hogs_bw;
+ u_int nbuckets;
+ int flags;
+};
+
struct node_queue_opt {
int qtype;
union {
struct cbq_opts cbq_opts;
+ struct codel_opts codel_opts;
struct priq_opts priq_opts;
struct node_hfsc_opts hfsc_opts;
+ struct node_fairq_opts fairq_opts;
} data;
};
@@ -294,6 +310,7 @@ void set_ipmask(struct node_host *, u_int8_t);
int check_netmask(struct node_host *, sa_family_t);
int unmask(struct pf_addr *, sa_family_t);
void ifa_load(void);
+int get_socket_domain(void);
struct node_host *ifa_exists(const char *);
struct node_host *ifa_lookup(const char *, int);
struct node_host *host(const char *);
diff --git a/freebsd/contrib/pf/pfctl/pfctl_qstats.c b/freebsd/sbin/pfctl/pfctl_qstats.c
index 91bdb86c..dda3a494 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_qstats.c
+++ b/freebsd/sbin/pfctl/pfctl_qstats.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl_qstats.c,v 1.30 2004/04/27 21:47:32 kjc Exp $ */
/*
@@ -18,12 +22,12 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#ifdef __rtems__
-#include <machine/rtems-bsd-program.h>
-#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -39,18 +43,25 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
-#include <altq/altq.h>
-#include <altq/altq_cbq.h>
-#include <altq/altq_priq.h>
-#include <altq/altq_hfsc.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_priq.h>
+#include <net/altq/altq_hfsc.h>
+#include <net/altq/altq_fairq.h>
#include "pfctl.h"
#include "pfctl_parser.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pfctl_qstats-data.h"
+#endif /* __rtems__ */
union class_stats {
class_stats_t cbq_stats;
struct priq_classstats priq_stats;
struct hfsc_classstats hfsc_stats;
+ struct fairq_classstats fairq_stats;
+ struct codel_ifstats codel_stats;
};
#define AVGN_MAX 8
@@ -72,10 +83,6 @@ struct pf_altq_node {
struct queue_stats qstats;
};
-#ifdef __rtems__
-static u_int32_t last_ticket;
-
-#endif /* __rtems__ */
int pfctl_update_qstats(int, struct pf_altq_node **);
void pfctl_insert_altq_node(struct pf_altq_node **,
const struct pf_altq, const struct queue_stats);
@@ -84,8 +91,10 @@ struct pf_altq_node *pfctl_find_altq_node(struct pf_altq_node *,
void pfctl_print_altq_node(int, const struct pf_altq_node *,
unsigned, int);
void print_cbqstats(struct queue_stats);
+void print_codelstats(struct queue_stats);
void print_priqstats(struct queue_stats);
void print_hfscstats(struct queue_stats);
+void print_fairqstats(struct queue_stats);
void pfctl_free_altq_node(struct pf_altq_node *);
void pfctl_print_altq_nodestat(int,
const struct pf_altq_node *);
@@ -138,6 +147,9 @@ pfctl_show_altq(int dev, const char *iface, int opts, int verbose2)
return (0);
}
+#ifdef __rtems__
+static u_int32_t last_ticket;
+#endif /* __rtems__ */
int
pfctl_update_qstats(int dev, struct pf_altq_node **root)
{
@@ -173,7 +185,7 @@ pfctl_update_qstats(int dev, struct pf_altq_node **root)
return (-1);
}
#ifdef __FreeBSD__
- if (pa.altq.qid > 0 &&
+ if ((pa.altq.qid > 0 || pa.altq.scheduler == ALTQT_CODEL) &&
!(pa.altq.local_flags & PFALTQ_FLAG_IF_REMOVED)) {
#else
if (pa.altq.qid > 0) {
@@ -311,7 +323,7 @@ pfctl_print_altq_node(int dev, const struct pf_altq_node *node,
void
pfctl_print_altq_nodestat(int dev, const struct pf_altq_node *a)
{
- if (a->altq.qid == 0)
+ if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL)
return;
#ifdef __FreeBSD__
@@ -328,6 +340,12 @@ pfctl_print_altq_nodestat(int dev, const struct pf_altq_node *a)
case ALTQT_HFSC:
print_hfscstats(a->qstats);
break;
+ case ALTQT_FAIRQ:
+ print_fairqstats(a->qstats);
+ break;
+ case ALTQT_CODEL:
+ print_codelstats(a->qstats);
+ break;
}
}
@@ -353,6 +371,28 @@ print_cbqstats(struct queue_stats cur)
}
void
+print_codelstats(struct queue_stats cur)
+{
+ printf(" [ pkts: %10llu bytes: %10llu "
+ "dropped pkts: %6llu bytes: %6llu ]\n",
+ (unsigned long long)cur.data.codel_stats.cl_xmitcnt.packets,
+ (unsigned long long)cur.data.codel_stats.cl_xmitcnt.bytes,
+ (unsigned long long)cur.data.codel_stats.cl_dropcnt.packets +
+ cur.data.codel_stats.stats.drop_cnt.packets,
+ (unsigned long long)cur.data.codel_stats.cl_dropcnt.bytes +
+ cur.data.codel_stats.stats.drop_cnt.bytes);
+ printf(" [ qlength: %3d/%3d ]\n",
+ cur.data.codel_stats.qlength, cur.data.codel_stats.qlimit);
+
+ if (cur.avgn < 2)
+ return;
+
+ printf(" [ measured: %7.1f packets/s, %s/s ]\n",
+ cur.avg_packets / STAT_INTERVAL,
+ rate2str((8 * cur.avg_bytes) / STAT_INTERVAL));
+}
+
+void
print_priqstats(struct queue_stats cur)
{
printf(" [ pkts: %10llu bytes: %10llu "
@@ -393,6 +433,26 @@ print_hfscstats(struct queue_stats cur)
}
void
+print_fairqstats(struct queue_stats cur)
+{
+ printf(" [ pkts: %10llu bytes: %10llu "
+ "dropped pkts: %6llu bytes: %6llu ]\n",
+ (unsigned long long)cur.data.fairq_stats.xmit_cnt.packets,
+ (unsigned long long)cur.data.fairq_stats.xmit_cnt.bytes,
+ (unsigned long long)cur.data.fairq_stats.drop_cnt.packets,
+ (unsigned long long)cur.data.fairq_stats.drop_cnt.bytes);
+ printf(" [ qlength: %3d/%3d ]\n",
+ cur.data.fairq_stats.qlength, cur.data.fairq_stats.qlimit);
+
+ if (cur.avgn < 2)
+ return;
+
+ printf(" [ measured: %7.1f packets/s, %s/s ]\n",
+ cur.avg_packets / STAT_INTERVAL,
+ rate2str((8 * cur.avg_bytes) / STAT_INTERVAL));
+}
+
+void
pfctl_free_altq_node(struct pf_altq_node *node)
{
while (node != NULL) {
@@ -413,7 +473,7 @@ update_avg(struct pf_altq_node *a)
u_int64_t b, p;
int n;
- if (a->altq.qid == 0)
+ if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL)
return;
qs = &a->qstats;
@@ -432,6 +492,14 @@ update_avg(struct pf_altq_node *a)
b = qs->data.hfsc_stats.xmit_cnt.bytes;
p = qs->data.hfsc_stats.xmit_cnt.packets;
break;
+ case ALTQT_FAIRQ:
+ b = qs->data.fairq_stats.xmit_cnt.bytes;
+ p = qs->data.fairq_stats.xmit_cnt.packets;
+ break;
+ case ALTQT_CODEL:
+ b = qs->data.codel_stats.cl_xmitcnt.bytes;
+ p = qs->data.codel_stats.cl_xmitcnt.packets;
+ break;
default:
b = 0;
p = 0;
@@ -458,6 +526,3 @@ update_avg(struct pf_altq_node *a)
if (n < AVGN_MAX)
qs->avgn++;
}
-#ifdef __rtems__
-#include "pfctl_qstats-data.h"
-#endif /* __rtems__ */
diff --git a/freebsd/contrib/pf/pfctl/pfctl_radix.c b/freebsd/sbin/pfctl/pfctl_radix.c
index cc5b0acd..c151f878 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_radix.c
+++ b/freebsd/sbin/pfctl/pfctl_radix.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl_radix.c,v 1.27 2005/05/21 21:03:58 henning Exp $ */
/*
@@ -32,12 +36,12 @@
*
*/
-#ifdef __rtems__
-#include <machine/rtems-bsd-program.h>
-#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -54,6 +58,9 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pfctl_radix-data.h"
+#endif /* __rtems__ */
#define BUF_SIZE 256
@@ -406,11 +413,7 @@ pfi_get_ifaces(const char *filter, struct pfi_kif *buf, int *size)
/* buffer management code */
-#ifndef __rtems__
-size_t buf_esize[PFRB_MAX] = { 0,
-#else /* __rtems__ */
const size_t buf_esize[PFRB_MAX] = { 0,
-#endif /* __rtems__ */
sizeof(struct pfr_table), sizeof(struct pfr_tstats),
sizeof(struct pfr_addr), sizeof(struct pfr_astats),
sizeof(struct pfi_kif), sizeof(struct pfioc_trans_e)
@@ -543,15 +546,10 @@ pfr_buf_load(struct pfr_buffer *b, char *file, int nonetwork,
return (rv);
}
-#ifdef __rtems__
-static char next_ch = ' ';
-#endif /* __rtems__ */
int
pfr_next_token(char buf[BUF_SIZE], FILE *fp)
{
-#ifndef __rtems__
static char next_ch = ' ';
-#endif /* __rtems__ */
int i = 0;
for (;;) {
@@ -597,6 +595,3 @@ pfr_strerror(int errnum)
return strerror(errnum);
}
}
-#ifdef __rtems__
-#include "pfctl_radix-data.h"
-#endif /* __rtems__ */
diff --git a/freebsd/contrib/pf/pfctl/pfctl_table.c b/freebsd/sbin/pfctl/pfctl_table.c
index 6fad082d..4dfb0689 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_table.c
+++ b/freebsd/sbin/pfctl/pfctl_table.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-namespace.h"
+#endif /* __rtems__ */
+
/* $OpenBSD: pfctl_table.c,v 1.67 2008/06/10 20:55:02 mcbride Exp $ */
/*
@@ -32,12 +36,12 @@
*
*/
-#ifdef __rtems__
-#include <machine/rtems-bsd-program.h>
-#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -58,6 +62,9 @@ __FBSDID("$FreeBSD$");
#include "pfctl_parser.h"
#include "pfctl.h"
+#ifdef __rtems__
+#include "rtems-bsd-pfctl-pfctl_table-data.h"
+#endif /* __rtems__ */
extern void usage(void);
static int pfctl_table(int, char *[], char *, const char *, char *,
@@ -630,8 +637,7 @@ print_iface(struct pfi_kif *p, int opts)
if (!(opts & PF_OPT_VERBOSE2))
return;
printf("\tCleared: %s", ctime(&tzero));
- printf("\tReferences: [ States: %-18d Rules: %-18d ]\n",
- p->pfik_states, p->pfik_rules);
+ printf("\tReferences: %-18d\n", p->pfik_rulerefs);
for (i = 0; i < 8; i++) {
af = (i>>2) & 1;
dir = (i>>1) &1;
diff --git a/freebsd/sbin/pfctl/rtems-bsd-pfctl-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-data.h
new file mode 100644
index 00000000..3984522e
--- /dev/null
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-data.h
@@ -0,0 +1,25 @@
+/* generated by userspace-header-gen.py */
+/*
+ * NOTE: MANUALLY CHANGED.
+ * YACC needs a special treatment for some variables. They are commented here.
+ */
+#include <rtems/linkersets.h>
+/* parse.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int pfctlydebug);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int pfctlynerrs);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int pfctlyerrflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int pfctlychar);
+/* RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE pfctlyval); */
+/* RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE pfctlylval); */
+/* pfctl_altq.c */
+/* pfctl.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int loadopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int altqsupport);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int dev);
+/* pfctl_optimize.c */
+/* pfctl_osfp.c */
+/* pfctl_parser.c */
+/* pfctl_qstats.c */
+/* pfctl_radix.c */
+/* pfctl_table.c */
+/* pf_print_state.c */
diff --git a/freebsd/sbin/pfctl/rtems-bsd-pfctl-namespace.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-namespace.h
new file mode 100644
index 00000000..4e815fee
--- /dev/null
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-namespace.h
@@ -0,0 +1,262 @@
+/* generated by userspace-header-gen.py */
+/* parse.c */
+#define pfctlydebug _bsd_pfctl_pfctlydebug
+#define pfctlynerrs _bsd_pfctl_pfctlynerrs
+#define pfctlyerrflag _bsd_pfctl_pfctlyerrflag
+#define pfctlychar _bsd_pfctl_pfctlychar
+#define pfctlyval _bsd_pfctl_pfctlyval
+#define pfctlylval _bsd_pfctl_pfctlylval
+#define pfctlyparse _bsd_pfctl_pfctlyparse
+#define rt_tableid_max _bsd_pfctl_rt_tableid_max
+#define pfctl_load_anchors _bsd_pfctl_pfctl_load_anchors
+#define parseport _bsd_pfctl_parseport
+#define parseicmpspec _bsd_pfctl_parseicmpspec
+#define rule_label _bsd_pfctl_rule_label
+#define getservice _bsd_pfctl_getservice
+#define atoul _bsd_pfctl_atoul
+#define invalid_redirect _bsd_pfctl_invalid_redirect
+#define remove_invalid_hosts _bsd_pfctl_remove_invalid_hosts
+#define decide_address_family _bsd_pfctl_decide_address_family
+#define mv_rules _bsd_pfctl_mv_rules
+#define symget _bsd_pfctl_symget
+#define pfctl_cmdline_symset _bsd_pfctl_pfctl_cmdline_symset
+#define symset _bsd_pfctl_symset
+#define parse_config _bsd_pfctl_parse_config
+#define popfile _bsd_pfctl_popfile
+#define pushfile _bsd_pfctl_pushfile
+#define check_file_secrecy _bsd_pfctl_check_file_secrecy
+#define pfctlylex _bsd_pfctl_pfctlylex
+#define findeol _bsd_pfctl_findeol
+#define lungetc _bsd_pfctl_lungetc
+#define lgetc _bsd_pfctl_lgetc
+#define lookup _bsd_pfctl_lookup
+#define kw_cmp _bsd_pfctl_kw_cmp
+#define check_rulestate _bsd_pfctl_check_rulestate
+#define expand_skip_interface _bsd_pfctl_expand_skip_interface
+#define expand_rule _bsd_pfctl_expand_rule
+#define expand_queue _bsd_pfctl_expand_queue
+#define expand_altq _bsd_pfctl_expand_altq
+#define expand_label _bsd_pfctl_expand_label
+#define expand_label_nr _bsd_pfctl_expand_label_nr
+#define expand_label_proto _bsd_pfctl_expand_label_proto
+#define expand_label_port _bsd_pfctl_expand_label_port
+#define expand_label_addr _bsd_pfctl_expand_label_addr
+#define expand_label_if _bsd_pfctl_expand_label_if
+#define expand_label_str _bsd_pfctl_expand_label_str
+#define process_tabledef _bsd_pfctl_process_tabledef
+#define rdr_consistent _bsd_pfctl_rdr_consistent
+#define nat_consistent _bsd_pfctl_nat_consistent
+#define filter_consistent _bsd_pfctl_filter_consistent
+#define rule_consistent _bsd_pfctl_rule_consistent
+#define disallow_alias _bsd_pfctl_disallow_alias
+#define disallow_urpf_failed _bsd_pfctl_disallow_urpf_failed
+#define disallow_table _bsd_pfctl_disallow_table
+#define pfctlyerror _bsd_pfctl_pfctlyerror
+/* pfctl_altq.c */
+#define print_fairq_sc _bsd_pfctl_print_fairq_sc
+#define print_hfsc_sc _bsd_pfctl_print_hfsc_sc
+#define eval_bwspec _bsd_pfctl_eval_bwspec
+#define eval_queue_opts _bsd_pfctl_eval_queue_opts
+#define getifmtu _bsd_pfctl_getifmtu
+#define getifspeed _bsd_pfctl_getifspeed
+#define rate2str _bsd_pfctl_rate2str
+#define eval_pfqueue _bsd_pfctl_eval_pfqueue
+#define check_commit_altq _bsd_pfctl_check_commit_altq
+#define eval_pfaltq _bsd_pfctl_eval_pfaltq
+#define print_queue _bsd_pfctl_print_queue
+#define print_altq _bsd_pfctl_print_altq
+#define qname_to_qid _bsd_pfctl_qname_to_qid
+#define qname_to_pfaltq _bsd_pfctl_qname_to_pfaltq
+#define pfaltq_lookup _bsd_pfctl_pfaltq_lookup
+#define pfaltq_store _bsd_pfctl_pfaltq_store
+/* pfctl.c */
+#define loadopt _bsd_pfctl_loadopt
+#define altqsupport _bsd_pfctl_altqsupport
+#define dev _bsd_pfctl_dev
+#define pfctl_lookup_option _bsd_pfctl_pfctl_lookup_option
+#define pfctl_show_anchors _bsd_pfctl_pfctl_show_anchors
+#define pfctl_test_altqsupport _bsd_pfctl_pfctl_test_altqsupport
+#define pfctl_debug _bsd_pfctl_pfctl_debug
+#define pfctl_set_interface_flags _bsd_pfctl_pfctl_set_interface_flags
+#define pfctl_load_debug _bsd_pfctl_pfctl_load_debug
+#define pfctl_set_debug _bsd_pfctl_pfctl_set_debug
+#define pfctl_load_hostid _bsd_pfctl_pfctl_load_hostid
+#define pfctl_set_hostid _bsd_pfctl_pfctl_set_hostid
+#define pfctl_load_logif _bsd_pfctl_pfctl_load_logif
+#define pfctl_set_logif _bsd_pfctl_pfctl_set_logif
+#define pfctl_set_optimization _bsd_pfctl_pfctl_set_optimization
+#define pfctl_load_timeout _bsd_pfctl_pfctl_load_timeout
+#define pfctl_set_timeout _bsd_pfctl_pfctl_set_timeout
+#define pfctl_load_limit _bsd_pfctl_pfctl_load_limit
+#define pfctl_set_limit _bsd_pfctl_pfctl_set_limit
+#define pfctl_load_options _bsd_pfctl_pfctl_load_options
+#define pfctl_init_options _bsd_pfctl_pfctl_init_options
+#define pfctl_fopen _bsd_pfctl_pfctl_fopen
+#define pfctl_rules _bsd_pfctl_pfctl_rules
+#define pfctl_add_altq _bsd_pfctl_pfctl_add_altq
+#define pfctl_load_rule _bsd_pfctl_pfctl_load_rule
+#define pfctl_load_ruleset _bsd_pfctl_pfctl_load_ruleset
+#define pfctl_ruleset_trans _bsd_pfctl_pfctl_ruleset_trans
+#define pfctl_add_rule _bsd_pfctl_pfctl_add_rule
+#define pfctl_add_pool _bsd_pfctl_pfctl_add_pool
+#define pfctl_show_limits _bsd_pfctl_pfctl_show_limits
+#define pfctl_show_timeouts _bsd_pfctl_pfctl_show_timeouts
+#define pfctl_show_status _bsd_pfctl_pfctl_show_status
+#define pfctl_show_states _bsd_pfctl_pfctl_show_states
+#define pfctl_show_src_nodes _bsd_pfctl_pfctl_show_src_nodes
+#define pfctl_show_nat _bsd_pfctl_pfctl_show_nat
+#define pfctl_show_rules _bsd_pfctl_pfctl_show_rules
+#define pfctl_print_title _bsd_pfctl_pfctl_print_title
+#define pfctl_print_rule_counters _bsd_pfctl_pfctl_print_rule_counters
+#define pfctl_clear_pool _bsd_pfctl_pfctl_clear_pool
+#define pfctl_move_pool _bsd_pfctl_pfctl_move_pool
+#define pfctl_get_pool _bsd_pfctl_pfctl_get_pool
+#define pfctl_id_kill_states _bsd_pfctl_pfctl_id_kill_states
+#define pfctl_label_kill_states _bsd_pfctl_pfctl_label_kill_states
+#define pfctl_net_kill_states _bsd_pfctl_pfctl_net_kill_states
+#define pfctl_kill_src_nodes _bsd_pfctl_pfctl_kill_src_nodes
+#define pfctl_addrprefix _bsd_pfctl_pfctl_addrprefix
+#define pfctl_clear_states _bsd_pfctl_pfctl_clear_states
+#define pfctl_clear_src_nodes _bsd_pfctl_pfctl_clear_src_nodes
+#define pfctl_clear_altq _bsd_pfctl_pfctl_clear_altq
+#define pfctl_clear_nat _bsd_pfctl_pfctl_clear_nat
+#define pfctl_clear_rules _bsd_pfctl_pfctl_clear_rules
+#define pfctl_clear_interface_flags _bsd_pfctl_pfctl_clear_interface_flags
+#define pfctl_clear_stats _bsd_pfctl_pfctl_clear_stats
+#define pfctl_disable _bsd_pfctl_pfctl_disable
+#define pfctl_enable _bsd_pfctl_pfctl_enable
+#define usage _bsd_pfctl_usage
+/* pfctl_optimize.c */
+#define superblock_free _bsd_pfctl_superblock_free
+#define exclude_supersets _bsd_pfctl_exclude_supersets
+#define comparable_rule _bsd_pfctl_comparable_rule
+#define interface_group _bsd_pfctl_interface_group
+#define superblock_inclusive _bsd_pfctl_superblock_inclusive
+#define rules_combineable _bsd_pfctl_rules_combineable
+#define addrs_combineable _bsd_pfctl_addrs_combineable
+#define addrs_equal _bsd_pfctl_addrs_equal
+#define construct_superblocks _bsd_pfctl_construct_superblocks
+#define pf_opt_create_table _bsd_pfctl_pf_opt_create_table
+#define add_opt_table _bsd_pfctl_add_opt_table
+#define skip_init _bsd_pfctl_skip_init
+#define skip_cmp_src_port _bsd_pfctl_skip_cmp_src_port
+#define skip_cmp_src_addr _bsd_pfctl_skip_cmp_src_addr
+#define skip_cmp_proto _bsd_pfctl_skip_cmp_proto
+#define skip_cmp_ifp _bsd_pfctl_skip_cmp_ifp
+#define skip_cmp_dst_port _bsd_pfctl_skip_cmp_dst_port
+#define skip_cmp_dst_addr _bsd_pfctl_skip_cmp_dst_addr
+#define skip_cmp_dir _bsd_pfctl_skip_cmp_dir
+#define skip_cmp_af _bsd_pfctl_skip_cmp_af
+#define remove_from_skipsteps _bsd_pfctl_remove_from_skipsteps
+#define skip_append _bsd_pfctl_skip_append
+#define skip_compare _bsd_pfctl_skip_compare
+#define load_feedback_profile _bsd_pfctl_load_feedback_profile
+#define block_feedback _bsd_pfctl_block_feedback
+#define reorder_rules _bsd_pfctl_reorder_rules
+#define combine_rules _bsd_pfctl_combine_rules
+#define remove_identical_rules _bsd_pfctl_remove_identical_rules
+#define optimize_superblock _bsd_pfctl_optimize_superblock
+#define pfctl_optimize_ruleset _bsd_pfctl_pfctl_optimize_ruleset
+/* pfctl_osfp.c */
+#define print_ioctl _bsd_pfctl_print_ioctl
+#define get_field _bsd_pfctl_get_field
+#define get_tcpopts _bsd_pfctl_get_tcpopts
+#define get_str _bsd_pfctl_get_str
+#define get_int _bsd_pfctl_get_int
+#define sort_name_list _bsd_pfctl_sort_name_list
+#define print_name_list _bsd_pfctl_print_name_list
+#define fingerprint_name_entry _bsd_pfctl_fingerprint_name_entry
+#define import_fingerprint _bsd_pfctl_import_fingerprint
+#define add_fingerprint _bsd_pfctl_add_fingerprint
+#define lookup_name_list _bsd_pfctl_lookup_name_list
+#define pfctl_lookup_fingerprint _bsd_pfctl_pfctl_lookup_fingerprint
+#define pfctl_get_fingerprint _bsd_pfctl_pfctl_get_fingerprint
+#define pfctl_show_fingerprints _bsd_pfctl_pfctl_show_fingerprints
+#define pfctl_load_fingerprints _bsd_pfctl_pfctl_load_fingerprints
+#define pfctl_flush_my_fingerprints _bsd_pfctl_pfctl_flush_my_fingerprints
+#define pfctl_clear_fingerprints _bsd_pfctl_pfctl_clear_fingerprints
+#define pfctl_file_fingerprints _bsd_pfctl_pfctl_file_fingerprints
+/* pfctl_parser.c */
+#define pfctl_trans _bsd_pfctl_pfctl_trans
+#define pfctl_get_ticket _bsd_pfctl_pfctl_get_ticket
+#define pfctl_add_trans _bsd_pfctl_pfctl_add_trans
+#define append_addr_host _bsd_pfctl_append_addr_host
+#define append_addr _bsd_pfctl_append_addr
+#define host_dns _bsd_pfctl_host_dns
+#define host_v6 _bsd_pfctl_host_v6
+#define host_v4 _bsd_pfctl_host_v4
+#define host_if _bsd_pfctl_host_if
+#define host _bsd_pfctl_host
+#define ifa_skip_if _bsd_pfctl_ifa_skip_if
+#define ifa_lookup _bsd_pfctl_ifa_lookup
+#define ifa_grouplookup _bsd_pfctl_ifa_grouplookup
+#define ifa_exists _bsd_pfctl_ifa_exists
+#define get_socket_domain _bsd_pfctl_get_socket_domain
+#define ifa_load _bsd_pfctl_ifa_load
+#define check_netmask _bsd_pfctl_check_netmask
+#define set_ipmask _bsd_pfctl_set_ipmask
+#define parse_flags _bsd_pfctl_parse_flags
+#define print_tabledef _bsd_pfctl_print_tabledef
+#define print_rule _bsd_pfctl_print_rule
+#define print_src_node _bsd_pfctl_print_src_node
+#define print_status _bsd_pfctl_print_status
+#define print_pool _bsd_pfctl_print_pool
+#define print_fromto _bsd_pfctl_print_fromto
+#define print_flags _bsd_pfctl_print_flags
+#define print_ugid _bsd_pfctl_print_ugid
+#define print_port _bsd_pfctl_print_port
+#define print_op _bsd_pfctl_print_op
+#define geticmpcodebyname _bsd_pfctl_geticmpcodebyname
+#define geticmpcodebynumber _bsd_pfctl_geticmpcodebynumber
+#define geticmptypebyname _bsd_pfctl_geticmptypebyname
+#define geticmptypebynumber _bsd_pfctl_geticmptypebynumber
+/* pfctl_qstats.c */
+#define update_avg _bsd_pfctl_update_avg
+#define pfctl_free_altq_node _bsd_pfctl_pfctl_free_altq_node
+#define print_fairqstats _bsd_pfctl_print_fairqstats
+#define print_hfscstats _bsd_pfctl_print_hfscstats
+#define print_priqstats _bsd_pfctl_print_priqstats
+#define print_codelstats _bsd_pfctl_print_codelstats
+#define print_cbqstats _bsd_pfctl_print_cbqstats
+#define pfctl_print_altq_nodestat _bsd_pfctl_pfctl_print_altq_nodestat
+#define pfctl_print_altq_node _bsd_pfctl_pfctl_print_altq_node
+#define pfctl_find_altq_node _bsd_pfctl_pfctl_find_altq_node
+#define pfctl_insert_altq_node _bsd_pfctl_pfctl_insert_altq_node
+#define pfctl_update_qstats _bsd_pfctl_pfctl_update_qstats
+#define pfctl_show_altq _bsd_pfctl_pfctl_show_altq
+/* pfctl_radix.c */
+#define pfr_strerror _bsd_pfctl_pfr_strerror
+#define pfr_buf_load _bsd_pfctl_pfr_buf_load
+#define pfr_buf_clear _bsd_pfctl_pfr_buf_clear
+#define pfr_buf_grow _bsd_pfctl_pfr_buf_grow
+#define pfr_buf_next _bsd_pfctl_pfr_buf_next
+#define pfr_buf_add _bsd_pfctl_pfr_buf_add
+#define pfi_get_ifaces _bsd_pfctl_pfi_get_ifaces
+#define pfr_ina_define _bsd_pfctl_pfr_ina_define
+#define pfr_tst_addrs _bsd_pfctl_pfr_tst_addrs
+#define pfr_clr_tstats _bsd_pfctl_pfr_clr_tstats
+#define pfr_get_astats _bsd_pfctl_pfr_get_astats
+#define pfr_get_addrs _bsd_pfctl_pfr_get_addrs
+#define pfr_set_addrs _bsd_pfctl_pfr_set_addrs
+#define pfr_del_addrs _bsd_pfctl_pfr_del_addrs
+#define pfr_add_addrs _bsd_pfctl_pfr_add_addrs
+#define pfr_clr_addrs _bsd_pfctl_pfr_clr_addrs
+#define pfr_get_tstats _bsd_pfctl_pfr_get_tstats
+#define pfr_get_tables _bsd_pfctl_pfr_get_tables
+#define pfr_del_tables _bsd_pfctl_pfr_del_tables
+#define pfr_add_tables _bsd_pfctl_pfr_add_tables
+#define pfr_clr_tables _bsd_pfctl_pfr_clr_tables
+/* pfctl_table.c */
+#define pfctl_show_ifaces _bsd_pfctl_pfctl_show_ifaces
+#define warn_namespace_collision _bsd_pfctl_warn_namespace_collision
+#define pfctl_define_table _bsd_pfctl_pfctl_define_table
+#define pfctl_command_tables _bsd_pfctl_pfctl_command_tables
+#define pfctl_show_tables _bsd_pfctl_pfctl_show_tables
+#define pfctl_clear_tables _bsd_pfctl_pfctl_clear_tables
+/* pf_print_state.c */
+#define unmask _bsd_pfctl_unmask
+#define print_state _bsd_pfctl_print_state
+#define print_seq _bsd_pfctl_print_seq
+#define print_host _bsd_pfctl_print_host
+#define print_name _bsd_pfctl_print_name
+#define print_addr _bsd_pfctl_print_addr
diff --git a/freebsd/contrib/pf/pfctl/parse-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-parse-data.h
index 22f62958..ed13a43a 100644
--- a/freebsd/contrib/pf/pfctl/parse-data.h
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-parse-data.h
@@ -1,42 +1,36 @@
+/* generated by userspace-header-gen.py */
+/*
+ * NOTE: MANUALLY CHANGED.
+ * YACC needs a special treatment for some variables. They are commented here.
+ */
#include <rtems/linkersets.h>
-
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct antispoof_opts antispoof_opts);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int blockpolicy);
+#include "rtems-bsd-pfctl-data.h"
+/* parse.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pfctl *pf);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int debug);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int rulestate);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static u_int16_t returnicmpdefault);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static u_int16_t returnicmp6default);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int blockpolicy);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int require_order);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int default_statelock);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct file *file);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct files files);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct filter_opts filter_opts);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct node_hfsc_opts hfsc_opts);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct node_state_opt *keep_state_defaults);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static struct loadanchorshead loadanchorshead);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *parsebuf);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int parseindex);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pfctl *pf);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pool_opts pool_opts);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char pushback_buffer[]);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int pushback_index);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct queue_opts queue_opts);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct file *file);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct symhead symhead);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct node_queue *queues);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int require_order);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static u_int16_t returnicmp6default);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static u_int16_t returnicmpdefault);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int rulestate);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct filter_opts filter_opts);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct antispoof_opts antispoof_opts);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct scrub_opts scrub_opts);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct symhead symhead);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct queue_opts queue_opts);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct table_opts table_opts);
-
-/* NOTE: the following variables are generated by yacc and may change with yacc
- * version or generation options. */
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static YYSTACKDATA yystack);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int yychar);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int yydebug);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int yyerrflag);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE yylval);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern int yynerrs);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, extern YYSTYPE yyval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pool_opts pool_opts);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct codel_opts codel_opts);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct node_hfsc_opts hfsc_opts);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct node_fairq_opts fairq_opts);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct node_state_opt *keep_state_defaults);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct loadanchorshead loadanchorshead);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *parsebuf);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int parseindex);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int pushback_index);
+/* RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static YYSTACKDATA yystack); */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char pushback_buffer[]);
diff --git a/freebsd/sbin/pfctl/rtems-bsd-pfctl-pf_print_state-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pf_print_state-data.h
new file mode 100644
index 00000000..33366e0d
--- /dev/null
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pf_print_state-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-pfctl-data.h"
+/* pf_print_state.c */
diff --git a/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl-data.h
new file mode 100644
index 00000000..65956d58
--- /dev/null
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl-data.h
@@ -0,0 +1,22 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pf_anchor_global pf_anchors);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pf_anchor pf_main_anchor);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *clearopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *rulesopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *showopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *debugopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *anchoropt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *optiopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *pf_device);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *ifaceopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *tableopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *tblcmdopt);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int src_node_killers);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int state_killers);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int first_title);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int labels);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *src_node_kill[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char *state_kill[]);
diff --git a/freebsd/contrib/pf/pfctl/pfctl_altq-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_altq-data.h
index 49898bc7..6a032132 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_altq-data.h
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_altq-data.h
@@ -1,8 +1,9 @@
+/* generated by userspace-header-gen.py */
#include <rtems/linkersets.h>
-
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl_altq.c */
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct altqs altqs);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct gen_sc rtsc);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct gen_sc lssc);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl,
- static char r2sbuf[R2S_BUFS][RATESTR_MAX]);
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int r2sidx);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char r2sbuf[8][16]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int idx);
diff --git a/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_optimize-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_optimize-data.h
new file mode 100644
index 00000000..09598a79
--- /dev/null
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_optimize-data.h
@@ -0,0 +1,11 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl_optimize.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pf_rule_field pf_rule_desc[70]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int (*skip_comparitors[8])(struct pf_rule *, struct pf_rule *));
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *skip_comparitors_names[8]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct pfr_buffer table_buffer);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int table_identifier);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int add_opt_tablenum);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int pf_opt_create_tablenum);
diff --git a/freebsd/contrib/pf/pfctl/pfctl_osfp-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_osfp-data.h
index 1ac0ee31..53bf09c4 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_osfp-data.h
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_osfp-data.h
@@ -1,7 +1,7 @@
+/* generated by userspace-header-gen.py */
#include <rtems/linkersets.h>
-
-RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int class_count);
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl_osfp.c */
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct name_list classes);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int class_count);
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static int fingerprint_count);
-/* There is also one static buffer called "buf". But this can be ignored. See
- * comment in source file. */
diff --git a/freebsd/contrib/pf/pfctl/pfctl_parser-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_parser-data.h
index 5e9ff5c1..bb8832ac 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_parser-data.h
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_parser-data.h
@@ -1,3 +1,5 @@
+/* generated by userspace-header-gen.py */
#include <rtems/linkersets.h>
-
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl_parser.c */
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static struct node_host *iftab);
diff --git a/freebsd/contrib/pf/pfctl/pfctl_qstats-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_qstats-data.h
index 275a184c..0ee02a9e 100644
--- a/freebsd/contrib/pf/pfctl/pfctl_qstats-data.h
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_qstats-data.h
@@ -1,3 +1,5 @@
+/* generated by userspace-header-gen.py */
#include <rtems/linkersets.h>
-
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl_qstats.c */
RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static u_int32_t last_ticket);
diff --git a/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_radix-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_radix-data.h
new file mode 100644
index 00000000..b2a7340e
--- /dev/null
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_radix-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl_radix.c */
diff --git a/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_table-data.h b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_table-data.h
new file mode 100644
index 00000000..db4e915b
--- /dev/null
+++ b/freebsd/sbin/pfctl/rtems-bsd-pfctl-pfctl_table-data.h
@@ -0,0 +1,6 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-pfctl-data.h"
+/* pfctl_table.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *stats_text[2][3]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_pfctl, static char const *istats_text[2][2][2]);
diff --git a/freebsd/sbin/ping/ping.c b/freebsd/sbin/ping/ping.c
index ad5515ab..f0d1f7ce 100644
--- a/freebsd/sbin/ping/ping.c
+++ b/freebsd/sbin/ping/ping.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ping-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -43,21 +47,6 @@ static const char copyright[] =
static char sccsid[] = "@(#)ping.c 8.1 (Berkeley) 6/5/93";
#endif /* not lint */
#endif
-#ifdef __rtems__
-#define __need_getopt_newlib
-#include <getopt.h>
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
-#include <machine/rtems-bsd-program.h>
-#include <machine/rtems-bsd-commands.h>
-#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -79,7 +68,14 @@ __FBSDID("$FreeBSD$");
* This program has to run SUID to ROOT to access the ICMP socket.
*/
+#ifdef __rtems__
+#define __need_getopt_newlib
+#include <getopt.h>
+#include <machine/rtems-bsd-program.h>
+#include <machine/rtems-bsd-commands.h>
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h> /* NB: we rely on this for <sys/types.h> */
+#include <sys/capsicum.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/time.h>
@@ -92,6 +88,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_var.h>
#include <arpa/inet.h>
+#ifdef HAVE_LIBCASPER
+#include <libcasper.h>
+#include <casper/cap_dns.h>
+#endif
+
#ifdef IPSEC
#include <netipsec/ipsec.h>
#endif /*IPSEC*/
@@ -107,6 +108,9 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
+#ifdef __rtems__
+#include "rtems-bsd-ping-ping-data.h"
+#endif /* __rtems__ */
#define INADDR_LEN ((int)sizeof(in_addr_t))
#define TIMEVAL_LEN ((int)sizeof(struct tv32))
@@ -168,20 +172,20 @@ static int options;
* to 8192 for complete accuracy...
*/
#define MAX_DUP_CHK (8 * 128)
-static const int mx_dup_ck = MAX_DUP_CHK;
static char rcvd_tbl[MAX_DUP_CHK / 8];
static struct sockaddr_in whereto; /* who to ping */
static int datalen = DEFDATALEN;
static int maxpayload;
-static int s; /* socket file descriptor */
+static int ssend; /* send socket file descriptor */
+static int srecv; /* receive socket file descriptor */
static u_char outpackhdr[IP_MAXPACKET], *outpack;
-static const char BBELL = '\a'; /* characters written for MISSED and AUDIBLE */
-static const char BSPACE = '\b'; /* characters written for flood */
+static const char BBELL = '\a'; /* characters written for MISSED and AUDIBLE */
+static const char BSPACE = '\b'; /* characters written for flood */
static const char DOT = '.';
static char *hostname;
static char *shostname;
-static int ident; /* process id to identify our packets */
+static int ident; /* process id to identify our packets */
static int uid; /* cached uid for micro-optimization */
static u_char icmp_type = ICMP_ECHO;
static u_char icmp_type_rsp = ICMP_ECHOREPLY;
@@ -190,31 +194,49 @@ static int send_len;
/* counters */
static long nmissedmax; /* max value of ntransmitted - nreceived - 1 */
-static long npackets; /* max packets to transmit */
-static long nreceived; /* # of packets we got back */
-static long nrepeats; /* number of duplicates */
-static long ntransmitted; /* sequence # for outbound packets = #sent */
+#ifndef __rtems__
+static long npackets; /* max packets to transmit */
+#else /* __rtems__ */
+static long npackets = 3; /* max packets to transmit */
+#endif /* __rtems__ */
+static long nreceived; /* # of packets we got back */
+static long nrepeats; /* number of duplicates */
+static long ntransmitted; /* sequence # for outbound packets = #sent */
static long snpackets; /* max packets to transmit in one sweep */
-static long sntransmitted; /* # of packets we sent in this sweep */
-static int sweepmax; /* max value of payload in sweep */
-static int sweepmin = 0; /* start value of payload in sweep */
-static int sweepincr = 1; /* payload increment in sweep */
-static int interval = 1000; /* interval between packets, ms */
-static int waittime = MAXWAIT; /* timeout for each packet */
-static long nrcvtimeout = 0; /* # of packets we got back after waittime */
+static long sntransmitted; /* # of packets we sent in this sweep */
+static int sweepmax; /* max value of payload in sweep */
+static int sweepmin = 0; /* start value of payload in sweep */
+static int sweepincr = 1; /* payload increment in sweep */
+static int interval = 1000; /* interval between packets, ms */
+static int waittime = MAXWAIT; /* timeout for each packet */
+static long nrcvtimeout = 0; /* # of packets we got back after waittime */
/* timing */
-static int timing; /* flag to do timing */
+static int timing; /* flag to do timing */
static double tmin = 999999999.0; /* minimum round trip time */
-static double tmax = 0.0; /* maximum round trip time */
-static double tsum = 0.0; /* sum of all times, for doing average */
-static double tsumsq = 0.0; /* sum of all times squared, for std. dev. */
+static double tmax = 0.0; /* maximum round trip time */
+static double tsum = 0.0; /* sum of all times, for doing average */
+static double tsumsq = 0.0; /* sum of all times squared, for std. dev. */
-static volatile sig_atomic_t finish_up; /* nonzero if we've been told to finish up */
+/* nonzero if we've been told to finish up */
+static volatile sig_atomic_t finish_up;
+#ifndef __rtems__
static volatile sig_atomic_t siginfo_p;
+#endif /* __rtems__ */
+
+#ifdef HAVE_LIBCASPER
+static cap_channel_t *capdns;
+#endif
+#ifdef __rtems__
+static u_char packet[IP_MAXPACKET] __aligned(4);
+static char hnamebuf[MAXHOSTNAMELEN], snamebuf[MAXHOSTNAMELEN];
+#endif /* __rtems__ */
static void fill(char *, char *);
static u_short in_cksum(u_short *, int);
+#ifdef HAVE_LIBCASPER
+static cap_channel_t *capdns_setup(void);
+#endif
static void check_status(void);
static void finish(void) __dead2;
static void pinger(void);
@@ -232,55 +254,32 @@ static void tvsub(struct timeval *, const struct timeval *);
static void usage(void) __dead2;
#ifdef __rtems__
-static int main(int argc, char **argv);
+static int main(int argc, char *argv[]);
+
+RTEMS_LINKER_RWSET(bsd_prog_ping, char);
-int rtems_bsd_command_ping(int argc, char *argv[])
+int
+rtems_bsd_command_ping(int argc, char *argv[])
{
int exit_code;
+ void *data_begin;
+ size_t data_size;
- rtems_bsd_program_lock();
-
- memset(&rcvd_tbl[0], 0, sizeof(rcvd_tbl));
- s = -1;
- memset(&outpackhdr[0], 0, sizeof(outpackhdr));
- icmp_type = ICMP_ECHO;
- icmp_type_rsp = ICMP_ECHOREPLY;
- phdr_len = 0;
- nmissedmax = 0;
- npackets = 3;
- nreceived = 0;
- nrepeats = 0;
- ntransmitted = 0;
- snpackets = 0;
- sntransmitted = 0;
- sweepmax = 0;
- sweepmin = 0;
- sweepincr = 1;
- interval = 1000;
- waittime = MAXWAIT;
- nrcvtimeout = 0;
- timing = 0;
- tmin = 999999999.0;
- tmax = 0.0;
- tsum = 0.0;
- tsumsq = 0.0;
- finish_up = 0;
- siginfo_p = 0;
-
- exit_code = rtems_bsd_program_call_main("ping", main, argc, argv);
+ data_begin = RTEMS_LINKER_SET_BEGIN(bsd_prog_ping);
+ data_size = RTEMS_LINKER_SET_SIZE(bsd_prog_ping);
+ rtems_bsd_program_lock();
+ exit_code = rtems_bsd_program_call_main_with_data_restore("ping",
+ main, argc, argv, data_begin, data_size);
rtems_bsd_program_unlock();
- close(s);
-
return exit_code;
}
-#endif /* __rtems__ */
int
-#ifndef __rtems__
-main(int argc, char *const *argv)
-#else /* __rtems__ */
main(int argc, char **argv)
+#else /* __rtems__ */
+int
+main(int argc, char *const *argv)
#endif /* __rtems__ */
{
struct sockaddr_in from, sock_in;
@@ -289,13 +288,14 @@ main(int argc, char **argv)
struct iovec iov;
struct ip *ip;
struct msghdr msg;
+#ifndef __rtems__
struct sigaction si_sa;
+#endif /* __rtems__ */
size_t sz;
#ifndef __rtems__
u_char *datap, packet[IP_MAXPACKET] __aligned(4);
#else /* __rtems__ */
u_char *datap;
- static u_char packet[IP_MAXPACKET] __aligned(4);
#endif /* __rtems__ */
char *ep, *source, *target, *payload;
struct hostent *hp;
@@ -305,19 +305,23 @@ main(int argc, char **argv)
struct sockaddr_in *to;
double t;
u_long alarmtimeout, ultmp;
- int almost_done, ch, df, hold, i, icmp_len, mib[4], preload, sockerrno,
- tos, ttl;
+ int almost_done, ch, df, hold, i, icmp_len, mib[4], preload;
+ int ssend_errno, srecv_errno, tos, ttl;
char ctrl[CMSG_SPACE(sizeof(struct timeval))];
#ifndef __rtems__
char hnamebuf[MAXHOSTNAMELEN], snamebuf[MAXHOSTNAMELEN];
-#else /* __rtems__ */
- static char hnamebuf[MAXHOSTNAMELEN];
- static char snamebuf[MAXHOSTNAMELEN];
#endif /* __rtems__ */
#ifdef IP_OPTIONS
char rspace[MAX_IPOPTLEN]; /* record route space */
#endif
unsigned char loop, mttl;
+
+ payload = source = NULL;
+#ifdef IPSEC_POLICY_IPSEC
+ policy_in = policy_out = NULL;
+#endif
+ cap_rights_t rights;
+ bool cansandbox;
#ifdef __rtems__
struct getopt_data getopt_data;
memset(&getopt_data, 0, sizeof(getopt_data));
@@ -328,22 +332,38 @@ main(int argc, char **argv)
#define getopt(argc, argv, opt) getopt_r(argc, argv, "+" opt, &getopt_data)
#endif /* __rtems__ */
- payload = source = NULL;
-#ifdef IPSEC_POLICY_IPSEC
- policy_in = policy_out = NULL;
-#endif
-
/*
* Do the stuff that we need root priv's for *first*, and
* then drop our setuid bit. Save error reporting for
* after arg parsing.
+ *
+ * Historicaly ping was using one socket 's' for sending and for
+ * receiving. After capsicum(4) related changes we use two
+ * sockets. It was done for special ping use case - when user
+ * issue ping on multicast or broadcast address replies come
+ * from different addresses, not from the address we
+ * connect(2)'ed to, and send socket do not receive those
+ * packets.
*/
- s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
- sockerrno = errno;
+ ssend = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
+ ssend_errno = errno;
+ srecv = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
+ srecv_errno = errno;
- setuid(getuid());
+ if (setuid(getuid()) != 0)
+ err(EX_NOPERM, "setuid() failed");
uid = getuid();
+ if (ssend < 0) {
+ errno = ssend_errno;
+ err(EX_OSERR, "ssend socket");
+ }
+
+ if (srecv < 0) {
+ errno = srecv_errno;
+ err(EX_OSERR, "srecv socket");
+ }
+
alarmtimeout = df = preload = tos = 0;
outpack = outpackhdr + sizeof(struct ip);
@@ -612,13 +632,22 @@ main(int argc, char **argv)
if (options & F_PINGFILLED) {
fill((char *)datap, payload);
}
+#ifdef HAVE_LIBCASPER
+ capdns = capdns_setup();
+#endif
if (source) {
bzero((char *)&sock_in, sizeof(sock_in));
sock_in.sin_family = AF_INET;
if (inet_aton(source, &sock_in.sin_addr) != 0) {
shostname = source;
} else {
- hp = gethostbyname2(source, AF_INET);
+#ifdef HAVE_LIBCASPER
+ if (capdns != NULL)
+ hp = cap_gethostbyname2(capdns, source,
+ AF_INET);
+ else
+#endif
+ hp = gethostbyname2(source, AF_INET);
if (!hp)
errx(EX_NOHOST, "cannot resolve %s: %s",
source, hstrerror(h_errno));
@@ -634,7 +663,8 @@ main(int argc, char **argv)
snamebuf[sizeof(snamebuf) - 1] = '\0';
shostname = snamebuf;
}
- if (bind(s, (struct sockaddr *)&sock_in, sizeof sock_in) == -1)
+ if (bind(ssend, (struct sockaddr *)&sock_in, sizeof sock_in) ==
+ -1)
err(1, "bind");
}
@@ -645,7 +675,12 @@ main(int argc, char **argv)
if (inet_aton(target, &to->sin_addr) != 0) {
hostname = target;
} else {
- hp = gethostbyname2(target, AF_INET);
+#ifdef HAVE_LIBCASPER
+ if (capdns != NULL)
+ hp = cap_gethostbyname2(capdns, target, AF_INET);
+ else
+#endif
+ hp = gethostbyname2(target, AF_INET);
if (!hp)
errx(EX_NOHOST, "cannot resolve %s: %s",
target, hstrerror(h_errno));
@@ -658,6 +693,20 @@ main(int argc, char **argv)
hostname = hnamebuf;
}
+#ifdef HAVE_LIBCASPER
+ /* From now on we will use only reverse DNS lookups. */
+ if (capdns != NULL) {
+ const char *types[1];
+
+ types[0] = "ADDR";
+ if (cap_dns_type_limit(capdns, types, 1) < 0)
+ err(1, "unable to limit access to system.dns service");
+ }
+#endif
+
+ if (connect(ssend, (struct sockaddr *)&whereto, sizeof(whereto)) != 0)
+ err(1, "connect");
+
if (options & F_FLOOD && options & F_INTERVAL)
errx(EX_USAGE, "-f and -i: incompatible options");
@@ -678,16 +727,15 @@ main(int argc, char **argv)
ident = getpid() & 0xFFFF;
- if (s < 0) {
- errno = sockerrno;
- err(EX_OSERR, "socket");
- }
hold = 1;
- if (options & F_SO_DEBUG)
- (void)setsockopt(s, SOL_SOCKET, SO_DEBUG, (char *)&hold,
+ if (options & F_SO_DEBUG) {
+ (void)setsockopt(ssend, SOL_SOCKET, SO_DEBUG, (char *)&hold,
sizeof(hold));
+ (void)setsockopt(srecv, SOL_SOCKET, SO_DEBUG, (char *)&hold,
+ sizeof(hold));
+ }
if (options & F_SO_DONTROUTE)
- (void)setsockopt(s, SOL_SOCKET, SO_DONTROUTE, (char *)&hold,
+ (void)setsockopt(ssend, SOL_SOCKET, SO_DONTROUTE, (char *)&hold,
sizeof(hold));
#ifdef IPSEC
#ifdef IPSEC_POLICY_IPSEC
@@ -697,7 +745,7 @@ main(int argc, char **argv)
buf = ipsec_set_policy(policy_in, strlen(policy_in));
if (buf == NULL)
errx(EX_CONFIG, "%s", ipsec_strerror());
- if (setsockopt(s, IPPROTO_IP, IP_IPSEC_POLICY,
+ if (setsockopt(srecv, IPPROTO_IP, IP_IPSEC_POLICY,
buf, ipsec_get_policylen(buf)) < 0)
err(EX_CONFIG,
"ipsec policy cannot be configured");
@@ -708,7 +756,7 @@ main(int argc, char **argv)
buf = ipsec_set_policy(policy_out, strlen(policy_out));
if (buf == NULL)
errx(EX_CONFIG, "%s", ipsec_strerror());
- if (setsockopt(s, IPPROTO_IP, IP_IPSEC_POLICY,
+ if (setsockopt(ssend, IPPROTO_IP, IP_IPSEC_POLICY,
buf, ipsec_get_policylen(buf)) < 0)
err(EX_CONFIG,
"ipsec policy cannot be configured");
@@ -729,17 +777,43 @@ main(int argc, char **argv)
if (sysctl(mib, 4, &ttl, &sz, NULL, 0) == -1)
err(1, "sysctl(net.inet.ip.ttl)");
}
- setsockopt(s, IPPROTO_IP, IP_HDRINCL, &hold, sizeof(hold));
+ setsockopt(ssend, IPPROTO_IP, IP_HDRINCL, &hold, sizeof(hold));
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(struct ip) >> 2;
ip->ip_tos = tos;
ip->ip_id = 0;
- ip->ip_off = df ? IP_DF : 0;
+ ip->ip_off = htons(df ? IP_DF : 0);
ip->ip_ttl = ttl;
ip->ip_p = IPPROTO_ICMP;
ip->ip_src.s_addr = source ? sock_in.sin_addr.s_addr : INADDR_ANY;
ip->ip_dst = to->sin_addr;
}
+
+ if (options & F_NUMERIC)
+ cansandbox = true;
+#ifdef HAVE_LIBCASPER
+ else if (capdns != NULL)
+ cansandbox = true;
+#endif
+ else
+ cansandbox = false;
+
+ /*
+ * Here we enter capability mode. Further down access to global
+ * namespaces (e.g filesystem) is restricted (see capsicum(4)).
+ * We must connect(2) our socket before this point.
+ */
+ if (cansandbox && cap_enter() < 0 && errno != ENOSYS)
+ err(1, "cap_enter");
+
+ cap_rights_init(&rights, CAP_RECV, CAP_EVENT, CAP_SETSOCKOPT);
+ if (cap_rights_limit(srecv, &rights) < 0 && errno != ENOSYS)
+ err(1, "cap_rights_limit srecv");
+
+ cap_rights_init(&rights, CAP_SEND, CAP_SETSOCKOPT);
+ if (cap_rights_limit(ssend, &rights) < 0 && errno != ENOSYS)
+ err(1, "cap_rights_limit ssend");
+
/* record route option */
if (options & F_RROUTE) {
#ifdef IP_OPTIONS
@@ -748,7 +822,7 @@ main(int argc, char **argv)
rspace[IPOPT_OLEN] = sizeof(rspace) - 1;
rspace[IPOPT_OFFSET] = IPOPT_MINOFF;
rspace[sizeof(rspace) - 1] = IPOPT_EOL;
- if (setsockopt(s, IPPROTO_IP, IP_OPTIONS, rspace,
+ if (setsockopt(ssend, IPPROTO_IP, IP_OPTIONS, rspace,
sizeof(rspace)) < 0)
err(EX_OSERR, "setsockopt IP_OPTIONS");
#else
@@ -758,38 +832,38 @@ main(int argc, char **argv)
}
if (options & F_TTL) {
- if (setsockopt(s, IPPROTO_IP, IP_TTL, &ttl,
+ if (setsockopt(ssend, IPPROTO_IP, IP_TTL, &ttl,
sizeof(ttl)) < 0) {
err(EX_OSERR, "setsockopt IP_TTL");
}
}
if (options & F_NOLOOP) {
- if (setsockopt(s, IPPROTO_IP, IP_MULTICAST_LOOP, &loop,
+ if (setsockopt(ssend, IPPROTO_IP, IP_MULTICAST_LOOP, &loop,
sizeof(loop)) < 0) {
err(EX_OSERR, "setsockopt IP_MULTICAST_LOOP");
}
}
if (options & F_MTTL) {
- if (setsockopt(s, IPPROTO_IP, IP_MULTICAST_TTL, &mttl,
+ if (setsockopt(ssend, IPPROTO_IP, IP_MULTICAST_TTL, &mttl,
sizeof(mttl)) < 0) {
err(EX_OSERR, "setsockopt IP_MULTICAST_TTL");
}
}
if (options & F_MIF) {
- if (setsockopt(s, IPPROTO_IP, IP_MULTICAST_IF, &ifaddr,
+ if (setsockopt(ssend, IPPROTO_IP, IP_MULTICAST_IF, &ifaddr,
sizeof(ifaddr)) < 0) {
err(EX_OSERR, "setsockopt IP_MULTICAST_IF");
}
}
#ifdef SO_TIMESTAMP
{ int on = 1;
- if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMP, &on, sizeof(on)) < 0)
+ if (setsockopt(srecv, SOL_SOCKET, SO_TIMESTAMP, &on, sizeof(on)) < 0)
err(EX_OSERR, "setsockopt SO_TIMESTAMP");
}
#endif
if (sweepmax) {
- if (sweepmin >= sweepmax)
- errx(EX_USAGE, "Maximum packet size must be greater than the minimum packet size");
+ if (sweepmin > sweepmax)
+ errx(EX_USAGE, "Maximum packet size must be no less than the minimum packet size");
if (datalen != DEFDATALEN)
errx(EX_USAGE, "Packet size and ping sweep are mutually exclusive");
@@ -802,7 +876,7 @@ main(int argc, char **argv)
datalen = sweepmin;
send_len = icmp_len + sweepmin;
}
- if (options & F_SWEEP && !sweepmax)
+ if (options & F_SWEEP && !sweepmax)
errx(EX_USAGE, "Maximum sweep size must be specified");
/*
@@ -818,11 +892,19 @@ main(int argc, char **argv)
* as well.
*/
hold = IP_MAXPACKET + 128;
- (void)setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&hold,
+ (void)setsockopt(srecv, SOL_SOCKET, SO_RCVBUF, (char *)&hold,
sizeof(hold));
+ /* CAP_SETSOCKOPT removed */
+ cap_rights_init(&rights, CAP_RECV, CAP_EVENT);
+ if (cap_rights_limit(srecv, &rights) < 0 && errno != ENOSYS)
+ err(1, "cap_rights_limit srecv setsockopt");
if (uid == 0)
- (void)setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&hold,
+ (void)setsockopt(ssend, SOL_SOCKET, SO_SNDBUF, (char *)&hold,
sizeof(hold));
+ /* CAP_SETSOCKOPT removed */
+ cap_rights_init(&rights, CAP_SEND);
+ if (cap_rights_limit(ssend, &rights) < 0 && errno != ENOSYS)
+ err(1, "cap_rights_limit ssend setsockopt");
if (to->sin_family == AF_INET) {
(void)printf("PING %s (%s)", hostname,
@@ -832,9 +914,9 @@ main(int argc, char **argv)
if (sweepmax)
(void)printf(": (%d ... %d) data bytes\n",
sweepmin, sweepmax);
- else
+ else
(void)printf(": %d data bytes\n", datalen);
-
+
} else {
if (sweepmax)
(void)printf("PING %s: (%d ... %d) data bytes\n",
@@ -867,8 +949,6 @@ main(int argc, char **argv)
if (sigaction(SIGALRM, &si_sa, 0) == -1)
err(EX_OSERR, "sigaction SIGALRM");
}
-#else /* __rtems__ */
- (void) si_sa;
#endif /* __rtems__ */
bzero(&msg, sizeof(msg));
@@ -906,10 +986,10 @@ main(int argc, char **argv)
int cc, n;
check_status();
- if ((unsigned)s >= FD_SETSIZE)
+ if ((unsigned)srecv >= FD_SETSIZE)
errx(EX_OSERR, "descriptor too large");
FD_ZERO(&rfds);
- FD_SET(s, &rfds);
+ FD_SET(srecv, &rfds);
(void)gettimeofday(&now, NULL);
timeout.tv_sec = last.tv_sec + intvl.tv_sec - now.tv_sec;
timeout.tv_usec = last.tv_usec + intvl.tv_usec - now.tv_usec;
@@ -922,8 +1002,8 @@ main(int argc, char **argv)
timeout.tv_sec++;
}
if (timeout.tv_sec < 0)
- timeout.tv_sec = timeout.tv_usec = 0;
- n = select(s + 1, &rfds, NULL, NULL, &timeout);
+ timerclear(&timeout);
+ n = select(srecv + 1, &rfds, NULL, NULL, &timeout);
if (n < 0)
continue; /* Must be EINTR. */
if (n == 1) {
@@ -934,7 +1014,7 @@ main(int argc, char **argv)
msg.msg_controllen = sizeof(ctrl);
#endif
msg.msg_namelen = sizeof(from);
- if ((cc = recvmsg(s, &msg, 0)) < 0) {
+ if ((cc = recvmsg(srecv, &msg, 0)) < 0) {
if (errno == EINTR)
continue;
warn("recvmsg");
@@ -960,14 +1040,14 @@ main(int argc, char **argv)
}
if (n == 0 || options & F_FLOOD) {
if (sweepmax && sntransmitted == snpackets) {
- for (i = 0; i < sweepincr ; ++i)
+ for (i = 0; i < sweepincr ; ++i)
*datap++ = i;
datalen += sweepincr;
if (datalen > sweepmax)
break;
send_len = icmp_len + datalen;
sntransmitted = 0;
- }
+ }
if (!npackets || ntransmitted < npackets)
pinger();
else {
@@ -1044,7 +1124,7 @@ pinger(void)
icp->icmp_seq = htons(ntransmitted);
icp->icmp_id = ident; /* ID */
- CLR(ntransmitted % mx_dup_ck);
+ CLR(ntransmitted % MAX_DUP_CHK);
if ((options & F_TIME) || timing) {
(void)gettimeofday(&now, NULL);
@@ -1068,13 +1148,11 @@ pinger(void)
if (options & F_HDRINCL) {
cc += sizeof(struct ip);
ip = (struct ip *)outpackhdr;
- ip->ip_len = cc;
+ ip->ip_len = htons(cc);
ip->ip_sum = in_cksum((u_short *)outpackhdr, cc);
packet = outpackhdr;
}
- i = sendto(s, (char *)packet, cc, 0, (struct sockaddr *)&whereto,
- sizeof(whereto));
-
+ i = send(ssend, (char *)packet, cc, 0);
if (i < 0 || i != cc) {
if (i < 0) {
if (options & F_FLOOD && errno == ENOBUFS) {
@@ -1142,7 +1220,8 @@ pr_pack(char *buf, int cc, struct sockaddr_in *from, struct timeval *tv)
#endif
tp = (const char *)tp + phdr_len;
- if (cc - ICMP_MINLEN - phdr_len >= sizeof(tv1)) {
+ if ((size_t)(cc - ICMP_MINLEN - phdr_len) >=
+ sizeof(tv1)) {
/* Copy to avoid alignment problems: */
memcpy(&tv32, tp, sizeof(tv32));
tv1.tv_sec = ntohl(tv32.tv32_sec);
@@ -1162,18 +1241,18 @@ pr_pack(char *buf, int cc, struct sockaddr_in *from, struct timeval *tv)
seq = ntohs(icp->icmp_seq);
- if (TST(seq % mx_dup_ck)) {
+ if (TST(seq % MAX_DUP_CHK)) {
++nrepeats;
--nreceived;
dupflag = 1;
} else {
- SET(seq % mx_dup_ck);
+ SET(seq % MAX_DUP_CHK);
dupflag = 0;
}
if (options & F_QUIET)
return;
-
+
if (options & F_WAITTIME && triptime > waittime) {
++nrcvtimeout;
return;
@@ -1195,7 +1274,7 @@ pr_pack(char *buf, int cc, struct sockaddr_in *from, struct timeval *tv)
if (options & F_MASK) {
/* Just prentend this cast isn't ugly */
(void)printf(" mask=%s",
- pr_addr(*(struct in_addr *)&(icp->icmp_mask)));
+ inet_ntoa(*(struct in_addr *)&(icp->icmp_mask)));
}
if (options & F_TIME) {
(void)printf(" tso=%s", pr_ntime(icp->icmp_otime));
@@ -1437,6 +1516,7 @@ static void
check_status(void)
{
+#ifndef __rtems__
if (siginfo_p) {
siginfo_p = 0;
(void)fprintf(stderr, "\r%ld/%ld packets received (%.1f%%)",
@@ -1447,6 +1527,7 @@ check_status(void)
tmin, tsum / (nreceived + nrepeats), tmax);
(void)fprintf(stderr, "\n");
}
+#endif /* __rtems__ */
}
/*
@@ -1697,12 +1778,21 @@ pr_addr(struct in_addr ina)
struct hostent *hp;
static char buf[16 + 3 + MAXHOSTNAMELEN];
- if ((options & F_NUMERIC) ||
- !(hp = gethostbyaddr((char *)&ina, 4, AF_INET)))
+ if (options & F_NUMERIC)
return inet_ntoa(ina);
+
+#ifdef HAVE_LIBCASPER
+ if (capdns != NULL)
+ hp = cap_gethostbyaddr(capdns, (char *)&ina, 4, AF_INET);
else
- (void)snprintf(buf, sizeof(buf), "%s (%s)", hp->h_name,
- inet_ntoa(ina));
+#endif
+ hp = gethostbyaddr((char *)&ina, 4, AF_INET);
+
+ if (hp == NULL)
+ return inet_ntoa(ina);
+
+ (void)snprintf(buf, sizeof(buf), "%s (%s)", hp->h_name,
+ inet_ntoa(ina));
return(buf);
}
@@ -1752,7 +1842,7 @@ fill(char *bp, char *patp)
u_int ii, jj, kk;
for (cp = patp; *cp; cp++) {
- if (!isxdigit((unsigned char) *cp))
+ if (!isxdigit((unsigned char)*cp))
errx(EX_USAGE,
"patterns must be specified as hex digits");
@@ -1775,6 +1865,34 @@ fill(char *bp, char *patp)
}
}
+#ifdef HAVE_LIBCASPER
+static cap_channel_t *
+capdns_setup(void)
+{
+ cap_channel_t *capcas, *capdnsloc;
+ const char *types[2];
+ int families[1];
+
+ capcas = cap_init();
+ if (capcas == NULL)
+ err(1, "unable to create casper process");
+ capdnsloc = cap_service_open(capcas, "system.dns");
+ /* Casper capability no longer needed. */
+ cap_close(capcas);
+ if (capdnsloc == NULL)
+ err(1, "unable to open system.dns service");
+ types[0] = "NAME";
+ types[1] = "ADDR";
+ if (cap_dns_type_limit(capdnsloc, types, 2) < 0)
+ err(1, "unable to limit access to system.dns service");
+ families[0] = AF_INET;
+ if (cap_dns_family_limit(capdnsloc, families, 1) < 0)
+ err(1, "unable to limit access to system.dns service");
+
+ return (capdnsloc);
+}
+#endif /* HAVE_LIBCASPER */
+
#if defined(IPSEC) && defined(IPSEC_POLICY_IPSEC)
#define SECOPT " [-P policy]"
#else
diff --git a/freebsd/sbin/ping/rtems-bsd-ping-data.h b/freebsd/sbin/ping/rtems-bsd-ping-data.h
new file mode 100644
index 00000000..b89b2282
--- /dev/null
+++ b/freebsd/sbin/ping/rtems-bsd-ping-data.h
@@ -0,0 +1,3 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+/* ping.c */
diff --git a/freebsd/sbin/ping/rtems-bsd-ping-namespace.h b/freebsd/sbin/ping/rtems-bsd-ping-namespace.h
new file mode 100644
index 00000000..a46682da
--- /dev/null
+++ b/freebsd/sbin/ping/rtems-bsd-ping-namespace.h
@@ -0,0 +1,2 @@
+/* generated by userspace-header-gen.py */
+/* ping.c */
diff --git a/freebsd/sbin/ping/rtems-bsd-ping-ping-data.h b/freebsd/sbin/ping/rtems-bsd-ping-ping-data.h
new file mode 100644
index 00000000..46bada6b
--- /dev/null
+++ b/freebsd/sbin/ping/rtems-bsd-ping-ping-data.h
@@ -0,0 +1,43 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ping-data.h"
+/* ping.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int options);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static struct sockaddr_in whereto);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int datalen);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int maxpayload);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int ssend);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int srecv);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static u_char *outpack);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static char *hostname);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static char *shostname);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int ident);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int uid);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static u_char icmp_type);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static u_char icmp_type_rsp);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int phdr_len);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int send_len);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int nmissedmax);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int npackets);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int nreceived);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int nrepeats);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int ntransmitted);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int snpackets);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int sntransmitted);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int sweepmax);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int sweepmin);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int sweepincr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int interval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int waittime);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static long int nrcvtimeout);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static int timing);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static double tmin);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static double tmax);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static double tsum);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static double tsumsq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static sig_atomic_t volatile finish_up);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static char rcvd_tbl[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static u_char outpackhdr[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static u_char packet[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static char hnamebuf[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping, static char snamebuf[]);
diff --git a/freebsd/sbin/ping6/ping6.c b/freebsd/sbin/ping6/ping6.c
index 054cebf2..b8f565c6 100644
--- a/freebsd/sbin/ping6/ping6.c
+++ b/freebsd/sbin/ping6/ping6.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-ping6-namespace.h"
+#endif /* __rtems__ */
+
/* $KAME: ping6.c,v 1.169 2003/07/25 06:01:47 itojun Exp $ */
/*
@@ -106,20 +110,8 @@ __FBSDID("$FreeBSD$");
#ifdef __rtems__
#define __need_getopt_newlib
#include <getopt.h>
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#include <machine/rtems-bsd-commands.h>
-
-#define USE_RFC2292BIS
-#define HAVE_POLL_H
#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/uio.h>
@@ -145,10 +137,8 @@ __FBSDID("$FreeBSD$");
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <unistd.h>
-#ifdef HAVE_POLL_H
-#include <poll.h>
-#endif
#ifdef IPSEC
#include <netipsec/ah.h>
@@ -156,6 +146,9 @@ __FBSDID("$FreeBSD$");
#endif
#include <md5.h>
+#ifdef __rtems__
+#include "rtems-bsd-ping6-ping6-data.h"
+#endif /* __rtems__ */
struct tv32 {
u_int32_t tv32_sec;
@@ -174,6 +167,8 @@ struct tv32 {
#define DEFDATALEN ICMP6ECHOTMLEN
#define MAXDATALEN MAXPACKETLEN - IP6LEN - ICMP6ECHOLEN
#define NROUTES 9 /* number of record route slots */
+#define MAXWAIT 10000 /* max ms to wait for response */
+#define MAXALARM (60 * 60) /* max seconds for alarm timeout */
#define A(bit) rcvd_tbl[(bit)>>3] /* identify byte in array */
#define B(bit) (1 << ((bit) & 0x07)) /* identify bit in byte */
@@ -210,7 +205,8 @@ struct tv32 {
#define F_MISSED 0x800000
#define F_DONTFRAG 0x1000000
#define F_NOUSERDATA (F_NODEADDR | F_FQDN | F_FQDNOLD | F_SUPTYPES)
-static u_int options;
+#define F_WAITTIME 0x2000000
+u_int options;
#define IN6LEN sizeof(struct in6_addr)
#define SA6LEN sizeof(struct sockaddr_in6)
@@ -224,45 +220,40 @@ static u_int options;
* to 8192 for complete accuracy...
*/
#define MAX_DUP_CHK (8 * 8192)
-static const int mx_dup_ck = MAX_DUP_CHK;
+static int mx_dup_ck = MAX_DUP_CHK;
static char rcvd_tbl[MAX_DUP_CHK / 8];
-static struct addrinfo *res;
static struct sockaddr_in6 dst; /* who to ping6 */
static struct sockaddr_in6 src; /* src addr of this packet */
static socklen_t srclen;
-static int datalen;
-static int s; /* socket file descriptor */
+static size_t datalen = DEFDATALEN;
+static int s; /* socket file descriptor */
static u_char outpack[MAXPACKETLEN];
-static const char BSPACE = '\b'; /* characters written for flood */
-static const char BBELL = '\a'; /* characters written for AUDIBLE */
-static const char DOT = '.';
+static char BSPACE = '\b'; /* characters written for flood */
+static char BBELL = '\a'; /* characters written for AUDIBLE */
+static char DOT = '.';
static char *hostname;
-static int ident; /* process id to identify our packets */
-static u_int8_t nonce[8]; /* nonce field for node information */
-static int hoplimit; /* hoplimit */
-static u_char *packet;
-#ifdef HAVE_POLL_H
-static struct pollfd fdmaskp[1];
-#else
-static fd_set *fdmaskp = NULL;
-static int fdmasks;
-#endif
+static int ident; /* process id to identify our packets */
+static u_int8_t nonce[8]; /* nonce field for node information */
+static int hoplimit = -1; /* hoplimit */
+static u_char *packet = NULL;
/* counters */
static long nmissedmax; /* max value of ntransmitted - nreceived - 1 */
-static long npackets; /* max packets to transmit */
-static long nreceived; /* # of packets we got back */
-static long nrepeats; /* number of duplicates */
-static long ntransmitted; /* sequence # for outbound packets = #sent */
-static struct timeval interval; /* interval between packets */
+static long npackets; /* max packets to transmit */
+static long nreceived; /* # of packets we got back */
+static long nrepeats; /* number of duplicates */
+static long ntransmitted; /* sequence # for outbound packets = #sent */
+static int interval = 1000; /* interval between packets in ms */
+static int waittime = MAXWAIT; /* timeout for each packet */
+static long nrcvtimeout = 0; /* # of packets we got back after waittime */
/* timing */
-static int timing; /* flag to do timing */
-static double tmin; /* minimum round trip time */
-static double tmax; /* maximum round trip time */
-static double tsum; /* sum of all times, for doing average */
-static double tsumsq; /* sum of all times squared, for std. dev. */
+static int timing; /* flag to do timing */
+static double tmin = 999999999.0; /* minimum round trip time */
+static double tmax = 0.0; /* maximum round trip time */
+static double tsum = 0.0; /* sum of all times, for doing average */
+static double tsumsq = 0.0; /* sum of all times squared, for std. dev. */
/* for node addresses */
static u_short naflags;
@@ -272,22 +263,19 @@ static struct msghdr smsghdr;
static struct iovec smsgiov;
static char *scmsg = 0;
-static volatile sig_atomic_t seenalrm;
static volatile sig_atomic_t seenint;
#ifdef SIGINFO
static volatile sig_atomic_t seeninfo;
#endif
-/* For control (ancillary) data received from recvmsg() */
-static struct cmsghdr cm[CONTROLLEN];
-
-static int main(int, char *[]);
+#ifndef __rtems__
+int main(int, char *[]);
+#endif /* __rtems__ */
static void fill(char *, char *);
static int get_hoplim(struct msghdr *);
static int get_pathmtu(struct msghdr *);
static struct in6_pktinfo *get_rcvpktinfo(struct msghdr *);
static void onsignal(int);
-static void retransmit(void);
static void onint(int);
static size_t pingerlen(void);
static int pinger(void);
@@ -299,7 +287,7 @@ static void pr_nodeaddr(struct icmp6_nodeinfo *, int);
static int myechoreply(const struct icmp6_hdr *);
static int mynireply(const struct icmp6_nodeinfo *);
static char *dnsdecode(const u_char **, const u_char *, const u_char *,
- char *, size_t);
+ char *, size_t);
static void pr_pack(u_char *, int, struct msghdr *);
static void pr_exthdrs(struct msghdr *);
static void pr_ip6opt(void *, size_t);
@@ -308,85 +296,56 @@ static int pr_bitrange(u_int32_t, int, int);
static void pr_retip(struct ip6_hdr *, u_char *);
static void summary(void);
static void tvsub(struct timeval *, struct timeval *);
-#ifdef IPSEC
-#ifdef IPSEC_POLICY_IPSEC
static int setpolicy(int, char *);
-#endif
-#endif
static char *nigroup(char *, int);
static void usage(void);
#ifdef __rtems__
-int rtems_bsd_command_ping6(int argc, char **argv)
-{
- int exit_code;
+#define USE_RFC2292BIS
+#endif /* __rtems__ */
+#ifdef __rtems__
+static int main(int argc, char *argv[]);
- rtems_bsd_program_lock();
+RTEMS_LINKER_RWSET(bsd_prog_ping6, char);
- memset(&rcvd_tbl[0], 0, sizeof(rcvd_tbl));
- res = NULL;
- srclen = 0;
- datalen = DEFDATALEN;
- s = -1;
- memset(&outpack[0], 0, sizeof(outpack));
- hoplimit = -1;
- nmissedmax = 0;
- npackets = 0;
- nreceived = 0;
- nrepeats = 0;
- ntransmitted = 0;
- interval.tv_sec = 1;
- interval.tv_usec = 0;
- timing = 0;
- tmin = 999999999.0;
- tmax = 0.0;
- tsum = 0.0;
- tsumsq = 0.0;
- naflags = 0;
- scmsg = NULL;
- seenalrm = 0;
- seenint = 0;
-#ifdef SIGINFO
- seeninfo = 0;
-#endif
- packet = NULL;
+int
+rtems_bsd_command_ping6(int argc, char *argv[])
+{
+ int exit_code;
+ void *data_begin;
+ size_t data_size;
- exit_code = rtems_bsd_program_call_main("ping6", main, argc, argv);
+ data_begin = RTEMS_LINKER_SET_BEGIN(bsd_prog_ping6);
+ data_size = RTEMS_LINKER_SET_SIZE(bsd_prog_ping6);
+ rtems_bsd_program_lock();
+ exit_code = rtems_bsd_program_call_main_with_data_restore("ping6",
+ main, argc, argv, data_begin, data_size);
rtems_bsd_program_unlock();
- close(s);
- free(scmsg);
- free(packet);
-
- if (res != NULL) {
- freeaddrinfo(res);
- }
-
return exit_code;
}
#endif /* __rtems__ */
int
main(int argc, char *argv[])
{
- struct itimerval itimer;
- struct sockaddr_in6 from;
-#ifndef HAVE_ARC4RANDOM
- struct timeval seed;
-#endif
-#ifdef HAVE_POLL_H
- int timeout;
-#else
- struct timeval timeout, *tv;
-#endif
- struct addrinfo hints;
+ struct timeval last, intvl;
+ struct sockaddr_in6 from, *sin6;
+ struct addrinfo hints, *res;
+ struct sigaction si_sa;
int cc, i;
- int ch, hold, packlen, preload, optval, ret_ga;
+ int almost_done, ch, hold, packlen, preload, optval, error;
int nig_oldmcprefix = -1;
u_char *datap;
char *e, *target, *ifname = NULL, *gateway = NULL;
int ip6optlen = 0;
struct cmsghdr *scmsgp = NULL;
+ /* For control (ancillary) data received from recvmsg() */
+#ifndef __rtems__
+ struct cmsghdr cm[CONTROLLEN];
+#else /* __rtems__ */
+ static struct cmsghdr cm[CONTROLLEN];
+#endif /* __rtems__ */
#if defined(SO_SNDBUF) && defined(SO_RCVBUF)
u_long lsockbufsize;
int sockbufsize = 0;
@@ -400,7 +359,8 @@ main(int argc, char *argv[])
char *policy_in = NULL;
char *policy_out = NULL;
#endif
- double intval;
+ double t;
+ u_long alarmtimeout;
size_t rthlen;
#ifdef IPV6_USE_MIN_MTU
int mflag = 0;
@@ -419,7 +379,7 @@ main(int argc, char *argv[])
memset(&smsghdr, 0, sizeof(smsghdr));
memset(&smsgiov, 0, sizeof(smsgiov));
- preload = 0;
+ alarmtimeout = preload = 0;
datap = &outpack[ICMP6ECHOLEN + ICMP6ECHOTMLEN];
#ifndef IPSEC
#define ADDOPTS
@@ -431,7 +391,7 @@ main(int argc, char *argv[])
#endif /*IPSEC_POLICY_IPSEC*/
#endif
while ((ch = getopt(argc, argv,
- "a:b:c:DdfHg:h:I:i:l:mnNop:qrRS:s:tvwW" ADDOPTS)) != -1) {
+ "a:b:c:DdfHg:h:I:i:l:mnNop:qrRS:s:tvwWx:X:" ADDOPTS)) != -1) {
#undef ADDOPTS
switch (ch) {
case 'a':
@@ -482,9 +442,9 @@ main(int argc, char *argv[])
errno = 0;
e = NULL;
lsockbufsize = strtoul(optarg, &e, 10);
- sockbufsize = lsockbufsize;
+ sockbufsize = (int)lsockbufsize;
if (errno || !*optarg || *e ||
- sockbufsize != lsockbufsize)
+ lsockbufsize > INT_MAX)
errx(1, "invalid socket buffer size");
#else
errx(1,
@@ -533,22 +493,22 @@ main(int argc, char *argv[])
#endif
break;
case 'i': /* wait between sending packets */
- intval = strtod(optarg, &e);
+ t = strtod(optarg, &e);
if (*optarg == '\0' || *e != '\0')
errx(1, "illegal timing interval %s", optarg);
- if (intval < 1 && getuid()) {
+ if (t < 1 && getuid()) {
errx(1, "%s: only root may use interval < 1s",
strerror(EPERM));
}
- interval.tv_sec = (long)intval;
- interval.tv_usec =
- (long)((intval - interval.tv_sec) * 1000000);
- if (interval.tv_sec < 0)
+ intvl.tv_sec = (long)t;
+ intvl.tv_usec =
+ (long)((t - intvl.tv_sec) * 1000000);
+ if (intvl.tv_sec < 0)
errx(1, "illegal timing interval %s", optarg);
/* less than 1/hz does not make sense */
- if (interval.tv_sec == 0 && interval.tv_usec < 1) {
+ if (intvl.tv_sec == 0 && intvl.tv_usec < 1) {
warnx("too small interval, raised to .000001");
- interval.tv_usec = 1;
+ intvl.tv_usec = 1;
}
options |= F_INTERVAL;
break;
@@ -599,10 +559,10 @@ main(int argc, char *argv[])
hints.ai_socktype = SOCK_RAW;
hints.ai_protocol = IPPROTO_ICMPV6;
- ret_ga = getaddrinfo(optarg, NULL, &hints, &res);
- if (ret_ga) {
+ error = getaddrinfo(optarg, NULL, &hints, &res);
+ if (error) {
errx(1, "invalid source address: %s",
- gai_strerror(ret_ga));
+ gai_strerror(error));
}
/*
* res->ai_family must be AF_INET6 and res->ai_addrlen
@@ -611,7 +571,6 @@ main(int argc, char *argv[])
memcpy(&src, res->ai_addr, res->ai_addrlen);
srclen = res->ai_addrlen;
freeaddrinfo(res);
- res = NULL;
options |= F_SRCADDR;
break;
case 's': /* size of packet to send */
@@ -639,6 +598,24 @@ main(int argc, char *argv[])
options &= ~F_NOUSERDATA;
options |= F_FQDNOLD;
break;
+ case 'x':
+ t = strtod(optarg, &e);
+ if (*e || e == optarg || t > (double)INT_MAX)
+ err(EX_USAGE, "invalid timing interval: `%s'",
+ optarg);
+ options |= F_WAITTIME;
+ waittime = (int)t;
+ break;
+ case 'X':
+ alarmtimeout = strtoul(optarg, &e, 0);
+ if ((alarmtimeout < 1) || (alarmtimeout == ULONG_MAX))
+ errx(EX_USAGE, "invalid timeout: `%s'",
+ optarg);
+ if (alarmtimeout > MAXALARM)
+ errx(EX_USAGE, "invalid timeout: `%s' > %d",
+ optarg, MAXALARM);
+ alarm((int)alarmtimeout);
+ break;
#ifdef IPSEC
#ifdef IPSEC_POLICY_IPSEC
case 'P':
@@ -705,11 +682,11 @@ main(int argc, char *argv[])
hints.ai_socktype = SOCK_RAW;
hints.ai_protocol = IPPROTO_ICMPV6;
- ret_ga = getaddrinfo(target, NULL, &hints, &res);
- if (ret_ga)
- errx(1, "%s", gai_strerror(ret_ga));
+ error = getaddrinfo(target, NULL, &hints, &res);
+ if (error)
+ errx(1, "%s", gai_strerror(error));
if (res->ai_canonname)
- hostname = res->ai_canonname;
+ hostname = strdup(res->ai_canonname);
else
hostname = target;
@@ -721,40 +698,44 @@ main(int argc, char *argv[])
if ((s = socket(res->ai_family, res->ai_socktype,
res->ai_protocol)) < 0)
err(1, "socket");
+ freeaddrinfo(res);
/* set the source address if specified. */
- if ((options & F_SRCADDR) &&
- bind(s, (struct sockaddr *)&src, srclen) != 0) {
- err(1, "bind");
+ if ((options & F_SRCADDR) != 0) {
+ /* properly fill sin6_scope_id */
+ if (IN6_IS_ADDR_LINKLOCAL(&src.sin6_addr) && (
+ IN6_IS_ADDR_LINKLOCAL(&dst.sin6_addr) ||
+ IN6_IS_ADDR_MC_LINKLOCAL(&dst.sin6_addr) ||
+ IN6_IS_ADDR_MC_NODELOCAL(&dst.sin6_addr))) {
+ if (src.sin6_scope_id == 0)
+ src.sin6_scope_id = dst.sin6_scope_id;
+ if (dst.sin6_scope_id == 0)
+ dst.sin6_scope_id = src.sin6_scope_id;
+ }
+ if (bind(s, (struct sockaddr *)&src, srclen) != 0)
+ err(1, "bind");
}
-
/* set the gateway (next hop) if specified */
if (gateway) {
- struct addrinfo ghints, *gres;
- int error;
-
- memset(&ghints, 0, sizeof(ghints));
- ghints.ai_family = AF_INET6;
- ghints.ai_socktype = SOCK_RAW;
- ghints.ai_protocol = IPPROTO_ICMPV6;
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_INET6;
+ hints.ai_socktype = SOCK_RAW;
+ hints.ai_protocol = IPPROTO_ICMPV6;
- error = getaddrinfo(gateway, NULL, &hints, &gres);
+ error = getaddrinfo(gateway, NULL, &hints, &res);
if (error) {
- freeaddrinfo(gres);
errx(1, "getaddrinfo for the gateway %s: %s",
gateway, gai_strerror(error));
}
- if (gres->ai_next && (options & F_VERBOSE))
- freeaddrinfo(gres);
+ if (res->ai_next && (options & F_VERBOSE))
warnx("gateway resolves to multiple addresses");
if (setsockopt(s, IPPROTO_IPV6, IPV6_NEXTHOP,
- gres->ai_addr, gres->ai_addrlen)) {
- freeaddrinfo(gres);
+ res->ai_addr, res->ai_addrlen)) {
err(1, "setsockopt(IPV6_NEXTHOP)");
}
- freeaddrinfo(gres);
+ freeaddrinfo(res);
}
/*
@@ -790,8 +771,10 @@ main(int argc, char *argv[])
}
/* revoke root privilege */
- seteuid(getuid());
- setuid(getuid());
+ if (seteuid(getuid()) != 0)
+ err(1, "seteuid() failed");
+ if (setuid(getuid()) != 0)
+ err(1, "setuid() failed");
if ((options & F_FLOOD) && (options & F_INTERVAL))
errx(1, "-f and -i incompatible options");
@@ -821,17 +804,7 @@ main(int argc, char *argv[])
*datap++ = i;
ident = getpid() & 0xFFFF;
-#ifndef HAVE_ARC4RANDOM
- gettimeofday(&seed, NULL);
- srand((unsigned int)(seed.tv_sec ^ seed.tv_usec ^ (long)ident));
- memset(nonce, 0, sizeof(nonce));
- for (i = 0; i < sizeof(nonce); i += sizeof(int))
- *((int *)&nonce[i]) = rand();
-#else
- memset(nonce, 0, sizeof(nonce));
- for (i = 0; i < sizeof(nonce); i += sizeof(u_int32_t))
- *((u_int32_t *)&nonce[i]) = arc4random();
-#endif
+ arc4random_buf(nonce, sizeof(nonce));
optval = 1;
if (options & F_DONTFRAG)
if (setsockopt(s, IPPROTO_IPV6, IPV6_DONTFRAG,
@@ -946,7 +919,7 @@ main(int argc, char *argv[])
/* set IP6 packet options */
if (ip6optlen) {
- if ((scmsg = (char *)malloc(ip6optlen)) == 0)
+ if ((scmsg = (char *)malloc(ip6optlen)) == NULL)
errx(1, "can't allocate enough memory");
smsghdr.msg_control = (caddr_t)scmsg;
smsghdr.msg_controllen = ip6optlen;
@@ -982,7 +955,7 @@ main(int argc, char *argv[])
}
if (argc > 1) { /* some intermediate addrs are specified */
- int hops, error;
+ int hops;
#ifdef USE_RFC2292BIS
int rthdrlen;
#endif
@@ -999,31 +972,30 @@ main(int argc, char *argv[])
errx(1, "can't initialize rthdr");
#else /* old advanced API */
if ((scmsgp = (struct cmsghdr *)inet6_rthdr_init(scmsgp,
- IPV6_RTHDR_TYPE_0)) == 0)
+ IPV6_RTHDR_TYPE_0)) == NULL)
errx(1, "can't initialize rthdr");
#endif /* USE_RFC2292BIS */
for (hops = 0; hops < argc - 1; hops++) {
- struct addrinfo *iaip;
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_INET6;
if ((error = getaddrinfo(argv[hops], NULL, &hints,
- &iaip)))
+ &res)))
errx(1, "%s", gai_strerror(error));
- if (SIN6(iaip->ai_addr)->sin6_family != AF_INET6)
+ if (res->ai_addr->sa_family != AF_INET6)
errx(1,
"bad addr family of an intermediate addr");
-
+ sin6 = (struct sockaddr_in6 *)(void *)res->ai_addr;
#ifdef USE_RFC2292BIS
- if (inet6_rth_add(rthdr,
- &(SIN6(iaip->ai_addr))->sin6_addr))
+ if (inet6_rth_add(rthdr, &sin6->sin6_addr))
errx(1, "can't add an intermediate node");
#else /* old advanced API */
- if (inet6_rthdr_add(scmsgp,
- &(SIN6(iaip->ai_addr))->sin6_addr,
+ if (inet6_rthdr_add(scmsg, &sin6->sin6_addr,
IPV6_RTHDR_LOOSE))
errx(1, "can't add an intermediate node");
#endif /* USE_RFC2292BIS */
- freeaddrinfo(iaip);
+ freeaddrinfo(res);
}
#ifndef USE_RFC2292BIS
@@ -1088,7 +1060,7 @@ main(int argc, char *argv[])
#if defined(SO_SNDBUF) && defined(SO_RCVBUF)
if (sockbufsize) {
- if (datalen > sockbufsize)
+ if (datalen > (size_t)sockbufsize)
warnx("you need -b to increase socket buffer size");
if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sockbufsize,
sizeof(sockbufsize)) < 0)
@@ -1139,58 +1111,50 @@ main(int argc, char *argv[])
printf("%s --> ", pr_addr((struct sockaddr *)&src, sizeof(src)));
printf("%s\n", pr_addr((struct sockaddr *)&dst, sizeof(dst)));
- while (preload--) /* Fire off them quickies. */
- (void)pinger();
-
- (void)signal(SIGINT, onsignal);
-#ifdef SIGINFO
- (void)signal(SIGINFO, onsignal);
-#endif
-
- if ((options & F_FLOOD) == 0) {
- (void)signal(SIGALRM, onsignal);
- itimer.it_interval = interval;
- itimer.it_value = interval;
- (void)setitimer(ITIMER_REAL, &itimer, NULL);
- if (ntransmitted == 0)
- retransmit();
+ if (preload == 0)
+ pinger();
+ else {
+ if (npackets != 0 && preload > npackets)
+ preload = npackets;
+ while (preload--)
+ pinger();
}
+ gettimeofday(&last, NULL);
-#ifndef HAVE_POLL_H
- fdmasks = howmany(s + 1, NFDBITS) * sizeof(fd_mask);
- if ((fdmaskp = malloc(fdmasks)) == NULL)
- err(1, "malloc");
-#endif
-
- seenalrm = seenint = 0;
+ sigemptyset(&si_sa.sa_mask);
+ si_sa.sa_flags = 0;
+ si_sa.sa_handler = onsignal;
+ if (sigaction(SIGINT, &si_sa, 0) == -1)
+ err(EX_OSERR, "sigaction SIGINT");
+ seenint = 0;
#ifdef SIGINFO
+ if (sigaction(SIGINFO, &si_sa, 0) == -1)
+ err(EX_OSERR, "sigaction SIGINFO");
seeninfo = 0;
#endif
+ if (alarmtimeout > 0) {
+ if (sigaction(SIGALRM, &si_sa, 0) == -1)
+ err(EX_OSERR, "sigaction SIGALRM");
+ }
+ if (options & F_FLOOD) {
+ intvl.tv_sec = 0;
+ intvl.tv_usec = 10000;
+ } else if ((options & F_INTERVAL) == 0) {
+ intvl.tv_sec = interval / 1000;
+ intvl.tv_usec = interval % 1000 * 1000;
+ }
- for (;;) {
+ almost_done = 0;
+ while (seenint == 0) {
+ struct timeval now, timeout;
struct msghdr m;
struct iovec iov[2];
+ fd_set rfds;
+ int n;
/* signal handling */
- if (seenalrm) {
- /* last packet sent, timeout reached? */
- if (npackets && ntransmitted >= npackets) {
- struct timeval zerotime = {0, 0};
- itimer.it_value = zerotime;
- itimer.it_interval = zerotime;
- (void)setitimer(ITIMER_REAL, &itimer, NULL);
- seenalrm = 0; /* clear flag */
- continue;
- }
- retransmit();
- seenalrm = 0;
- continue;
- }
- if (seenint) {
+ if (seenint)
onint(SIGINT);
- seenint = 0;
- continue;
- }
#ifdef SIGINFO
if (seeninfo) {
summary();
@@ -1198,122 +1162,121 @@ main(int argc, char *argv[])
continue;
}
#endif
-
- if (options & F_FLOOD) {
- (void)pinger();
-#ifdef HAVE_POLL_H
- timeout = 10;
-#else
- timeout.tv_sec = 0;
- timeout.tv_usec = 10000;
- tv = &timeout;
-#endif
- } else {
-#ifdef HAVE_POLL_H
- timeout = INFTIM;
-#else
- tv = NULL;
-#endif
+ FD_ZERO(&rfds);
+ FD_SET(s, &rfds);
+ gettimeofday(&now, NULL);
+ timeout.tv_sec = last.tv_sec + intvl.tv_sec - now.tv_sec;
+ timeout.tv_usec = last.tv_usec + intvl.tv_usec - now.tv_usec;
+ while (timeout.tv_usec < 0) {
+ timeout.tv_usec += 1000000;
+ timeout.tv_sec--;
}
-#ifdef HAVE_POLL_H
- fdmaskp[0].fd = s;
- fdmaskp[0].events = POLLIN;
- cc = poll(fdmaskp, 1, timeout);
-#else
- memset(fdmaskp, 0, fdmasks);
- FD_SET(s, fdmaskp);
- cc = select(s + 1, fdmaskp, NULL, NULL, tv);
-#endif
- if (cc < 0) {
- if (errno != EINTR) {
-#ifdef HAVE_POLL_H
- warn("poll");
-#else
- warn("select");
-#endif
- sleep(1);
- }
- continue;
- } else if (cc == 0)
- continue;
+ while (timeout.tv_usec > 1000000) {
+ timeout.tv_usec -= 1000000;
+ timeout.tv_sec++;
+ }
+ if (timeout.tv_sec < 0)
+ timeout.tv_sec = timeout.tv_usec = 0;
+
+ n = select(s + 1, &rfds, NULL, NULL, &timeout);
+ if (n < 0)
+ continue; /* EINTR */
+ if (n == 1) {
+ m.msg_name = (caddr_t)&from;
+ m.msg_namelen = sizeof(from);
+ memset(&iov, 0, sizeof(iov));
+ iov[0].iov_base = (caddr_t)packet;
+ iov[0].iov_len = packlen;
+ m.msg_iov = iov;
+ m.msg_iovlen = 1;
+ memset(cm, 0, CONTROLLEN);
+ m.msg_control = (void *)cm;
+ m.msg_controllen = CONTROLLEN;
+
+ cc = recvmsg(s, &m, 0);
+ if (cc < 0) {
+ if (errno != EINTR) {
+ warn("recvmsg");
+ sleep(1);
+ }
+ continue;
+ } else if (cc == 0) {
+ int mtu;
- m.msg_name = (caddr_t)&from;
- m.msg_namelen = sizeof(from);
- memset(&iov, 0, sizeof(iov));
- iov[0].iov_base = (caddr_t)packet;
- iov[0].iov_len = packlen;
- m.msg_iov = iov;
- m.msg_iovlen = 1;
- memset(cm, 0, CONTROLLEN);
- m.msg_control = (void *)cm;
- m.msg_controllen = CONTROLLEN;
-
- cc = recvmsg(s, &m, 0);
- if (cc < 0) {
- if (errno != EINTR) {
- warn("recvmsg");
- sleep(1);
+ /*
+ * receive control messages only. Process the
+ * exceptions (currently the only possibility is
+ * a path MTU notification.)
+ */
+ if ((mtu = get_pathmtu(&m)) > 0) {
+ if ((options & F_VERBOSE) != 0) {
+ printf("new path MTU (%d) is "
+ "notified\n", mtu);
+ }
+ }
+ continue;
+ } else {
+ /*
+ * an ICMPv6 message (probably an echoreply)
+ * arrived.
+ */
+ pr_pack(packet, cc, &m);
}
- continue;
- } else if (cc == 0) {
- int mtu;
-
+ if (((options & F_ONCE) != 0 && nreceived > 0) ||
+ (npackets > 0 && nreceived >= npackets))
+ break;
+ }
+ if (n == 0 || (options & F_FLOOD)) {
+ if (npackets == 0 || ntransmitted < npackets)
+ pinger();
+ else {
+ if (almost_done)
+ break;
+ almost_done = 1;
/*
- * receive control messages only. Process the
- * exceptions (currently the only possiblity is
- * a path MTU notification.)
+ * If we're not transmitting any more packets,
+ * change the timer to wait two round-trip times
+ * if we've received any packets or (waittime)
+ * milliseconds if we haven't.
*/
- if ((mtu = get_pathmtu(&m)) > 0) {
- if ((options & F_VERBOSE) != 0) {
- printf("new path MTU (%d) is "
- "notified\n", mtu);
+ intvl.tv_usec = 0;
+ if (nreceived) {
+ intvl.tv_sec = 2 * tmax / 1000;
+ if (intvl.tv_sec == 0)
+ intvl.tv_sec = 1;
+ } else {
+ intvl.tv_sec = waittime / 1000;
+ intvl.tv_usec = waittime % 1000 * 1000;
}
}
- continue;
- } else {
- /*
- * an ICMPv6 message (probably an echoreply) arrived.
- */
- pr_pack(packet, cc, &m);
- }
- if (((options & F_ONCE) != 0 && nreceived > 0) ||
- (npackets > 0 && nreceived >= npackets))
- break;
- if (ntransmitted - nreceived - 1 > nmissedmax) {
- nmissedmax = ntransmitted - nreceived - 1;
- if (options & F_MISSED)
- (void)write(STDOUT_FILENO, &BBELL, 1);
+ gettimeofday(&last, NULL);
+ if (ntransmitted - nreceived - 1 > nmissedmax) {
+ nmissedmax = ntransmitted - nreceived - 1;
+ if (options & F_MISSED)
+ (void)write(STDOUT_FILENO, &BBELL, 1);
+ }
}
}
+ sigemptyset(&si_sa.sa_mask);
+ si_sa.sa_flags = 0;
+ si_sa.sa_handler = SIG_IGN;
+ sigaction(SIGINT, &si_sa, 0);
+ sigaction(SIGALRM, &si_sa, 0);
summary();
- if (res != NULL) {
- freeaddrinfo(res);
- res = NULL;
- }
-
- if(packet != NULL) {
+ if(packet != NULL)
free(packet);
- packet = NULL;
- }
-
-#ifndef HAVE_POLL_H
- if(fdmaskp != NULL)
- free(fdmaskp);
-#endif
exit(nreceived == 0 ? 2 : 0);
}
-void
+static void
onsignal(int sig)
{
switch (sig) {
- case SIGALRM:
- seenalrm++;
- break;
case SIGINT:
+ case SIGALRM:
seenint++;
break;
#ifdef SIGINFO
@@ -1325,38 +1288,6 @@ onsignal(int sig)
}
/*
- * retransmit --
- * This routine transmits another ping6.
- */
-void
-retransmit(void)
-{
- struct itimerval itimer;
-
- if (pinger() == 0)
- return;
-
- /*
- * If we're not transmitting any more packets, change the timer
- * to wait two round-trip times if we've received any packets or
- * ten seconds if we haven't.
- */
-#define MAXWAIT 10
- if (nreceived) {
- itimer.it_value.tv_sec = 2 * tmax / 1000;
- if (itimer.it_value.tv_sec == 0)
- itimer.it_value.tv_sec = 1;
- } else
- itimer.it_value.tv_sec = MAXWAIT;
- itimer.it_interval.tv_sec = 0;
- itimer.it_interval.tv_usec = 0;
- itimer.it_value.tv_usec = 0;
-
- (void)signal(SIGALRM, onsignal);
- (void)setitimer(ITIMER_REAL, &itimer, NULL);
-}
-
-/*
* pinger --
* Compose and transmit an ICMP ECHO REQUEST packet. The IP packet
* will be added on by the kernel. The ID field is our UNIX process ID,
@@ -1364,7 +1295,7 @@ retransmit(void)
* of the data portion are used to hold a UNIX "timeval" struct in VAX
* byte-order, to compute the round-trip time.
*/
-size_t
+static size_t
pingerlen(void)
{
size_t l;
@@ -1383,7 +1314,7 @@ pingerlen(void)
return l;
}
-int
+static int
pinger(void)
{
struct icmp6_hdr *icp;
@@ -1498,7 +1429,7 @@ pinger(void)
return(0);
}
-int
+static int
myechoreply(const struct icmp6_hdr *icp)
{
if (ntohs(icp->icmp6_id) == ident)
@@ -1507,7 +1438,7 @@ myechoreply(const struct icmp6_hdr *icp)
return 0;
}
-int
+static int
mynireply(const struct icmp6_nodeinfo *nip)
{
if (memcmp(nip->icmp6_ni_nonce + sizeof(u_int16_t),
@@ -1518,7 +1449,7 @@ mynireply(const struct icmp6_nodeinfo *nip)
return 0;
}
-char *
+static char *
dnsdecode(const u_char **sp, const u_char *ep, const u_char *base, char *buf,
size_t bufsiz)
/*base for compressed name*/
@@ -1562,7 +1493,7 @@ dnsdecode(const u_char **sp, const u_char *ep, const u_char *base, char *buf,
while (i-- > 0 && cp < ep) {
l = snprintf(cresult, sizeof(cresult),
isprint(*cp) ? "%c" : "\\%03o", *cp & 0xff);
- if (l >= sizeof(cresult) || l < 0)
+ if ((size_t)l >= sizeof(cresult) || l < 0)
return NULL;
if (strlcat(buf, cresult, bufsiz) >= bufsiz)
return NULL; /*result overrun*/
@@ -1585,7 +1516,7 @@ dnsdecode(const u_char **sp, const u_char *ep, const u_char *base, char *buf,
* which arrive ('tis only fair). This permits multiple copies of this
* program to be run without having intermingled output (or statistics!).
*/
-void
+static void
pr_pack(u_char *buf, int cc, struct msghdr *mhdr)
{
#define safeputc(c) printf((isprint((c)) ? "%c" : "\\%03o"), c)
@@ -1617,7 +1548,7 @@ pr_pack(u_char *buf, int cc, struct msghdr *mhdr)
}
from = (struct sockaddr *)mhdr->msg_name;
fromlen = mhdr->msg_namelen;
- if (cc < sizeof(struct icmp6_hdr)) {
+ if (cc < (int)sizeof(struct icmp6_hdr)) {
if (options & F_VERBOSE)
warnx("packet too short (%d bytes) from %s", cc,
pr_addr(from, fromlen));
@@ -1669,6 +1600,11 @@ pr_pack(u_char *buf, int cc, struct msghdr *mhdr)
if (options & F_QUIET)
return;
+ if (options & F_WAITTIME && triptime > waittime) {
+ ++nrcvtimeout;
+ return;
+ }
+
if (options & F_FLOOD)
(void)write(STDOUT_FILENO, &BSPACE, 1);
else {
@@ -1866,7 +1802,7 @@ pr_pack(u_char *buf, int cc, struct msghdr *mhdr)
#undef safeputc
}
-void
+static void
pr_exthdrs(struct msghdr *mhdr)
{
ssize_t bufsize;
@@ -1904,7 +1840,7 @@ pr_exthdrs(struct msghdr *mhdr)
}
#ifdef USE_RFC2292BIS
-void
+static void
pr_ip6opt(void *extbuf, size_t bufsize)
{
struct ip6_hbh *ext;
@@ -1967,7 +1903,7 @@ pr_ip6opt(void *extbuf, size_t bufsize)
}
#else /* !USE_RFC2292BIS */
/* ARGSUSED */
-void
+static void
pr_ip6opt(void *extbuf, size_t bufsize __unused)
{
putchar('\n');
@@ -1976,7 +1912,7 @@ pr_ip6opt(void *extbuf, size_t bufsize __unused)
#endif /* USE_RFC2292BIS */
#ifdef USE_RFC2292BIS
-void
+static void
pr_rthdr(void *extbuf, size_t bufsize)
{
struct in6_addr *in6;
@@ -2033,7 +1969,7 @@ pr_rthdr(void *extbuf, size_t bufsize)
#else /* !USE_RFC2292BIS */
/* ARGSUSED */
-void
+static void
pr_rthdr(void *extbuf, size_t bufsize __unused)
{
putchar('\n');
@@ -2041,7 +1977,7 @@ pr_rthdr(void *extbuf, size_t bufsize __unused)
}
#endif /* USE_RFC2292BIS */
-int
+static int
pr_bitrange(u_int32_t v, int soff, int ii)
{
int off;
@@ -2087,7 +2023,7 @@ pr_bitrange(u_int32_t v, int soff, int ii)
return ii;
}
-void
+static void
pr_suptypes(struct icmp6_nodeinfo *ni, size_t nilen)
/* ni->qtype must be SUPTYPES */
{
@@ -2153,7 +2089,7 @@ pr_suptypes(struct icmp6_nodeinfo *ni, size_t nilen)
}
}
-void
+static void
pr_nodeaddr(struct icmp6_nodeinfo *ni, int nilen)
/* ni->qtype must be NODEADDR */
{
@@ -2219,7 +2155,7 @@ pr_nodeaddr(struct icmp6_nodeinfo *ni, int nilen)
}
}
-int
+static int
get_hoplim(struct msghdr *mhdr)
{
struct cmsghdr *cm;
@@ -2238,7 +2174,7 @@ get_hoplim(struct msghdr *mhdr)
return(-1);
}
-struct in6_pktinfo *
+static struct in6_pktinfo *
get_rcvpktinfo(struct msghdr *mhdr)
{
struct cmsghdr *cm;
@@ -2257,7 +2193,7 @@ get_rcvpktinfo(struct msghdr *mhdr)
return(NULL);
}
-int
+static int
get_pathmtu(struct msghdr *mhdr)
{
#ifdef IPV6_RECVPATHMTU
@@ -2317,7 +2253,7 @@ get_pathmtu(struct msghdr *mhdr)
* Subtract 2 timeval structs: out = out - in. Out is assumed to
* be >= in.
*/
-void
+static void
tvsub(struct timeval *out, struct timeval *in)
{
if ((out->tv_usec -= in->tv_usec) < 0) {
@@ -2332,34 +2268,22 @@ tvsub(struct timeval *out, struct timeval *in)
* SIGINT handler.
*/
/* ARGSUSED */
-void
+static void
onint(int notused __unused)
{
- summary();
-
- if (res != NULL)
- freeaddrinfo(res);
-
- if(packet != NULL)
- free(packet);
-
-#ifndef HAVE_POLL_H
- if(fdmaskp != NULL)
- free(fdmaskp);
-#endif
-
- (void)signal(SIGINT, SIG_DFL);
- (void)kill(getpid(), SIGINT);
-
- /* NOTREACHED */
- exit(1);
+ /*
+ * When doing reverse DNS lookups, the seenint flag might not
+ * be noticed for a while. Just exit if we get a second SIGINT.
+ */
+ if ((options & F_HOSTNAME) && seenint != 0)
+ _exit(nreceived ? 0 : 2);
}
/*
* summary --
* Print out statistics.
*/
-void
+static void
summary(void)
{
@@ -2376,6 +2300,8 @@ summary(void)
((((double)ntransmitted - nreceived) * 100.0) /
ntransmitted));
}
+ if (nrcvtimeout)
+ printf(", %ld packets out of wait time", nrcvtimeout);
(void)putchar('\n');
if (nreceived && timing) {
/* Only display average to microseconds */
@@ -2407,7 +2333,7 @@ static const char *nircode[] = {
* pr_icmph --
* Print a descriptive string about an ICMP header.
*/
-void
+static void
pr_icmph(struct icmp6_hdr *icp, u_char *end)
{
char ntop_buf[INET6_ADDRSTRLEN];
@@ -2622,7 +2548,7 @@ pr_icmph(struct icmp6_hdr *icp, u_char *end)
break;
}
if (options & F_VERBOSE) {
- if (ni->ni_code > sizeof(nircode) / sizeof(nircode[0]))
+ if (ni->ni_code > nitems(nircode))
printf(", invalid");
else
printf(", %s", nircode[ni->ni_code]);
@@ -2637,7 +2563,7 @@ pr_icmph(struct icmp6_hdr *icp, u_char *end)
* pr_iph --
* Print an IP6 header.
*/
-void
+static void
pr_iph(struct ip6_hdr *ip6)
{
u_int32_t flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
@@ -2665,7 +2591,7 @@ pr_iph(struct ip6_hdr *ip6)
* Return an ascii host address as a dotted quad and optionally with
* a hostname.
*/
-const char *
+static const char *
pr_addr(struct sockaddr *addr, int addrlen)
{
static char buf[NI_MAXHOST];
@@ -2684,13 +2610,13 @@ pr_addr(struct sockaddr *addr, int addrlen)
* pr_retip --
* Dump some info on a returned (via ICMPv6) IPv6 packet.
*/
-void
+static void
pr_retip(struct ip6_hdr *ip6, u_char *end)
{
u_char *cp = (u_char *)ip6, nh;
int hlen;
- if (end - (u_char *)ip6 < sizeof(*ip6)) {
+ if ((size_t)(end - (u_char *)ip6) < sizeof(*ip6)) {
printf("IP6");
goto trunc;
}
@@ -2764,7 +2690,7 @@ pr_retip(struct ip6_hdr *ip6, u_char *end)
return;
}
-void
+static void
fill(char *bp, char *patp)
{
int ii, jj, kk;
@@ -2772,7 +2698,11 @@ fill(char *bp, char *patp)
char *cp;
for (cp = patp; *cp; cp++)
- if (!isxdigit((unsigned char) *cp))
+#ifndef __rtems__
+ if (!isxdigit(*cp))
+#else /* __rtems__ */
+ if (!isxdigit((unsigned char)*cp))
+#endif /* __rtems__ */
errx(1, "patterns must be specified as hex digits");
ii = sscanf(patp,
"%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x",
@@ -2783,7 +2713,7 @@ fill(char *bp, char *patp)
/* xxx */
if (ii > 0)
for (kk = 0;
- kk <= MAXDATALEN - (8 + sizeof(struct tv32) + ii);
+ (size_t)kk <= MAXDATALEN - 8 + sizeof(struct tv32) + ii;
kk += ii)
for (jj = 0; jj < ii; ++jj)
bp[jj + kk] = pat[jj];
@@ -2797,7 +2727,7 @@ fill(char *bp, char *patp)
#ifdef IPSEC
#ifdef IPSEC_POLICY_IPSEC
-int
+static int
setpolicy(int so __unused, char *policy)
{
char *buf;
@@ -2818,7 +2748,7 @@ setpolicy(int so __unused, char *policy)
#endif
#endif
-char *
+static char *
nigroup(char *name, int nig_oldmcprefix)
{
char *p;
@@ -2877,7 +2807,7 @@ nigroup(char *name, int nig_oldmcprefix)
return strdup(hbuf);
}
-void
+static void
usage(void)
{
(void)fprintf(stderr,
@@ -2901,6 +2831,7 @@ usage(void)
#endif
"\n"
" [-p pattern] [-S sourceaddr] [-s packetsize] "
- "[hops ...] host\n");
+ "[-x waittime]\n"
+ " [-X timeout] [hops ...] host\n");
exit(1);
}
diff --git a/freebsd/sbin/ping6/rtems-bsd-ping6-data.h b/freebsd/sbin/ping6/rtems-bsd-ping6-data.h
new file mode 100644
index 00000000..40e781b8
--- /dev/null
+++ b/freebsd/sbin/ping6/rtems-bsd-ping6-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+/* ping6.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, extern u_int options);
diff --git a/freebsd/sbin/ping6/rtems-bsd-ping6-namespace.h b/freebsd/sbin/ping6/rtems-bsd-ping6-namespace.h
new file mode 100644
index 00000000..8b875692
--- /dev/null
+++ b/freebsd/sbin/ping6/rtems-bsd-ping6-namespace.h
@@ -0,0 +1,3 @@
+/* generated by userspace-header-gen.py */
+/* ping6.c */
+#define options _bsd_ping6_options
diff --git a/freebsd/sbin/ping6/rtems-bsd-ping6-ping6-data.h b/freebsd/sbin/ping6/rtems-bsd-ping6-ping6-data.h
new file mode 100644
index 00000000..54444845
--- /dev/null
+++ b/freebsd/sbin/ping6/rtems-bsd-ping6-ping6-data.h
@@ -0,0 +1,40 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-ping6-data.h"
+/* ping6.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static int mx_dup_ck);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static struct sockaddr_in6 dst);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static struct sockaddr_in6 src);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static socklen_t srclen);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static size_t datalen);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static int s);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char BSPACE);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char BBELL);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char DOT);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char *hostname);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static int ident);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static int hoplimit);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static u_char *packet);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static long int nmissedmax);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static long int npackets);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static long int nreceived);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static long int nrepeats);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static long int ntransmitted);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static int interval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static int waittime);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static long int nrcvtimeout);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static int timing);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static double tmin);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static double tmax);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static double tsum);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static double tsumsq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static u_short naflags);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static struct msghdr smsghdr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static struct iovec smsgiov);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char *scmsg);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static sig_atomic_t volatile seenint);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char rcvd_tbl[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static u_char outpack[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static u_int8_t nonce[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char const *niqcode[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_ping6, static char const *nircode[]);
diff --git a/freebsd/sbin/route/keywords b/freebsd/sbin/route/keywords
index 676f781d..82edc466 100644
--- a/freebsd/sbin/route/keywords
+++ b/freebsd/sbin/route/keywords
@@ -4,7 +4,6 @@
4
6
add
-atalk
blackhole
change
cloning
diff --git a/freebsd/sbin/route/route.c b/freebsd/sbin/route/route.c
index f2d73587..76c609af 100644
--- a/freebsd/sbin/route/route.c
+++ b/freebsd/sbin/route/route.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-route-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1983, 1989, 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -41,24 +45,15 @@ static char sccsid[] = "@(#)route.c 8.6 (Berkeley) 4/28/95";
#endif
#endif /* not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
#ifdef __rtems__
#define __need_getopt_newlib
#include <getopt.h>
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#include <machine/rtems-bsd-commands.h>
-#endif
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
+#endif /* __rtems__ */
#include <rtems/bsd/sys/param.h>
#include <sys/file.h>
#include <sys/socket.h>
@@ -79,20 +74,18 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include <errno.h>
#include <paths.h>
+#include <signal.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
+#include <time.h>
#include <unistd.h>
#include <ifaddrs.h>
-
-static const struct keytab {
- const char *kt_cp;
- int kt_i;
-} keywords[] = {
-#include "keywords.h"
- {0, 0}
-};
+#ifdef __rtems__
+#include "rtems-bsd-route-route-data.h"
+#endif /* __rtems__ */
struct fibl {
TAILQ_ENTRY(fibl) fl_next;
@@ -102,152 +95,120 @@ struct fibl {
int fl_errno;
};
-struct rt_ctx {
- union sockunion {
- struct sockaddr sa;
- struct sockaddr_in sin;
-#ifdef INET6
- struct sockaddr_in6 sin6;
-#endif
- struct sockaddr_dl sdl;
- struct sockaddr_inarp sinarp;
- struct sockaddr_storage ss; /* added to avoid memory overrun */
- } so_dst, so_gate, so_mask, so_genmask, so_ifa, so_ifp;
-
- int pid, rtm_addrs;
- int s;
- int forcehost, forcenet, nflag, af, qflag, tflag;
- int verbose, aflen;
- int locking, lockrest, debugonly;
- struct rt_metrics rt_metrics;
- u_long rtm_inits;
- uid_t uid;
- int defaultfib;
- int numfibs;
- char domain[MAXHOSTNAMELEN + 1];
- int domain_initialized;
- int rtm_seq;
- char rt_line[MAXHOSTNAMELEN + 1];
- char net_line[MAXHOSTNAMELEN + 1];
- struct {
- struct rt_msghdr m_rtm;
- char m_space[512];
- } m_rtmsg;
- TAILQ_HEAD(fibl_head_t, fibl) fibl_head;
+static struct keytab {
+ const char *kt_cp;
+ int kt_i;
+} const keywords[] = {
+#include "keywords.h"
+ {0, 0}
};
+static struct sockaddr_storage so[RTAX_MAX];
+static int pid, rtm_addrs;
+static int s;
+static int nflag, af, qflag, tflag;
+static int verbose, aflen;
+static int locking, lockrest, debugonly;
+static struct rt_metrics rt_metrics;
+static u_long rtm_inits;
+static uid_t uid;
+static int defaultfib;
+static int numfibs;
+static char domain[MAXHOSTNAMELEN + 1];
+static bool domain_initialized;
+static int rtm_seq;
+static char rt_line[NI_MAXHOST];
+static char net_line[MAXHOSTNAMELEN + 1];
+
#ifndef __rtems__
-struct rt_ctx rt_ctx;
+static struct {
+#else /* __rtems__ */
+static struct m_rtmsg {
#endif /* __rtems__ */
+ struct rt_msghdr m_rtm;
+ char m_space[512];
+} m_rtmsg;
-typedef union sockunion *sup;
+static TAILQ_HEAD(fibl_head_t, fibl) fibl_head;
-static void bprintf(FILE *, int, const char *);
-static void flushroutes(struct rt_ctx *, int argc, char *argv[]);
-static int flushroutes_fib(struct rt_ctx *, int);
-static int getaddr(struct rt_ctx *, int, char *, struct hostent **, int);
+static void printb(int, const char *);
+static void flushroutes(int argc, char *argv[]);
+static int flushroutes_fib(int);
+static int getaddr(int, char *, struct hostent **, int);
static int keyword(const char *);
-static void inet_makenetandmask(struct rt_ctx *, u_long, struct sockaddr_in *, u_long);
+#ifdef INET
+static void inet_makenetandmask(u_long, struct sockaddr_in *,
+ struct sockaddr_in *, u_long);
+#endif
#ifdef INET6
-static int inet6_makenetandmask(struct rt_ctx *, struct sockaddr_in6 *, const char *);
+static int inet6_makenetandmask(struct sockaddr_in6 *, const char *);
#endif
-static void interfaces(struct rt_ctx *);
-static void mask_addr(struct rt_ctx *);
-static void monitor(struct rt_ctx *, int, char *[]);
-static const char *netname(struct rt_ctx *, struct sockaddr *);
-static void newroute(struct rt_ctx *, int, char **);
-static int newroute_fib(struct rt_ctx *, int, char *, int);
-static void pmsg_addrs(struct rt_ctx *, char *, int, size_t);
-static void pmsg_common(struct rt_ctx *, struct rt_msghdr *, size_t);
-static int prefixlen(struct rt_ctx *, const char *);
-static void print_getmsg(struct rt_ctx *, struct rt_msghdr *, int, int);
-static void print_rtmsg(struct rt_ctx *, struct rt_msghdr *, size_t);
-static const char *routename(struct rt_ctx *, struct sockaddr *);
-static int rtmsg(struct rt_ctx *, int, int, int);
-static void set_metric(struct rt_ctx *, char *, int);
-static int set_sofib(struct rt_ctx *, int);
-static int set_procfib(int);
-static void sockaddr(char *, struct sockaddr *);
-static void sodump(sup, const char *);
-extern char *iso_ntoa(void);
-
-static int fiboptlist_csv(struct rt_ctx *, const char *, struct fibl_head_t *);
-static int fiboptlist_range(struct rt_ctx *, const char *, struct fibl_head_t *);
+static void interfaces(void);
+static void monitor(int, char*[]);
+static const char *netname(struct sockaddr *);
+static void newroute(int, char **);
+static int newroute_fib(int, char *, int);
+static void pmsg_addrs(char *, int, size_t);
+static void pmsg_common(struct rt_msghdr *, size_t);
+static int prefixlen(const char *);
+static void print_getmsg(struct rt_msghdr *, int, int);
+static void print_rtmsg(struct rt_msghdr *, size_t);
+static const char *routename(struct sockaddr *);
+static int rtmsg(int, int, int);
+static void set_metric(char *, int);
+static int set_sofib(int);
+static void sockaddr(char *, struct sockaddr *, size_t);
+static void sodump(struct sockaddr *, const char *);
+static int fiboptlist_csv(const char *, struct fibl_head_t *);
+static int fiboptlist_range(const char *, struct fibl_head_t *);
static void usage(const char *) __dead2;
-void
+#define READ_TIMEOUT 10
+static volatile sig_atomic_t stop_read;
+
+static void
+stopit(int sig __unused)
+{
+
+ stop_read = 1;
+}
+
+static void
usage(const char *cp)
{
if (cp != NULL)
warnx("bad keyword: %s", cp);
- (void) fprintf(stderr,
- "usage: route [-46dnqtv] command [[modifiers] args]\n");
- exit(EX_USAGE);
+ errx(EX_USAGE, "usage: route [-46dnqtv] command [[modifiers] args]");
/* NOTREACHED */
}
#ifdef __rtems__
-static int main(int argc, char **argv, struct rt_ctx *c);
+static int main(int argc, char *argv[]);
-struct main_ctx {
- int argc;
- char **argv;
- struct rt_ctx *c;
-};
+RTEMS_LINKER_RWSET(bsd_prog_route, char);
-static int
-call_main(void *ctx)
-{
- const struct main_ctx *mc = ctx;
-
- return main(mc->argc, mc->argv, mc->c);
-}
-
-int rtems_bsd_command_route(int argc, char *argv[])
+int
+rtems_bsd_command_route(int argc, char *argv[])
{
- struct rt_ctx *c;
int exit_code;
+ void *data_begin;
+ size_t data_size;
- c = calloc(1, sizeof(*c));
- if (c != NULL) {
- struct main_ctx mc;
- struct fibl *fl;
- struct fibl *tfl;
-
- mc.argc = argc;
- mc.argv = argv;
- mc.c = c;
-
- c->s = -1;
- c->aflen = sizeof(struct sockaddr_in);
- TAILQ_INIT(&c->fibl_head);
+ data_begin = RTEMS_LINKER_SET_BEGIN(bsd_prog_route);
+ data_size = RTEMS_LINKER_SET_SIZE(bsd_prog_route);
- exit_code = rtems_bsd_program_call("route", call_main, &mc);
-
- close(c->s);
-
- TAILQ_FOREACH_SAFE(fl, &c->fibl_head, fl_next, tfl) {
- free(fl);
- }
-
- free(c);
- } else {
- exit_code = EXIT_FAILURE;
- }
+ rtems_bsd_program_lock();
+ exit_code = rtems_bsd_program_call_main_with_data_restore("route",
+ main, argc, argv, data_begin, data_size);
+ rtems_bsd_program_unlock();
return exit_code;
}
-
-int
-main(int argc, char **argv, struct rt_ctx *c)
-{
-#else /* __rtems__ */
+#endif /* __rtems__ */
int
main(int argc, char **argv)
{
- struct rt_ctx *c;
-#endif /* __rtems__ */
int ch;
size_t len;
#ifdef __rtems__
@@ -260,11 +221,6 @@ main(int argc, char **argv)
#define getopt(argc, argv, opt) getopt_r(argc, argv, "+" opt, &getopt_data)
#endif /* __rtems__ */
-#ifndef __rtems__
- c = &rt_ctx;
- c->aflen = sizeof (struct sockaddr_in);
-#endif /* __rtems__ */
-
if (argc < 2)
usage(NULL);
@@ -272,34 +228,34 @@ main(int argc, char **argv)
switch(ch) {
case '4':
#ifdef INET
- c->af = AF_INET;
- c->aflen = sizeof(struct sockaddr_in);
+ af = AF_INET;
+ aflen = sizeof(struct sockaddr_in);
#else
errx(1, "IPv4 support is not compiled in");
#endif
break;
case '6':
#ifdef INET6
- c->af = AF_INET6;
- c->aflen = sizeof(struct sockaddr_in6);
+ af = AF_INET6;
+ aflen = sizeof(struct sockaddr_in6);
#else
errx(1, "IPv6 support is not compiled in");
#endif
break;
case 'n':
- c->nflag = 1;
+ nflag = 1;
break;
case 'q':
- c->qflag = 1;
+ qflag = 1;
break;
case 'v':
- c->verbose = 1;
+ verbose = 1;
break;
case 't':
- c->tflag = 1;
+ tflag = 1;
break;
case 'd':
- c->debugonly = 1;
+ debugonly = 1;
break;
case '?':
default:
@@ -308,45 +264,45 @@ main(int argc, char **argv)
argc -= optind;
argv += optind;
- c->pid = getpid();
- c->uid = geteuid();
- if (c->tflag)
- c->s = open(_PATH_DEVNULL, O_WRONLY, 0);
+ pid = getpid();
+ uid = geteuid();
+ if (tflag)
+ s = open(_PATH_DEVNULL, O_WRONLY, 0);
else
- c->s = socket(PF_ROUTE, SOCK_RAW, 0);
- if (c->s < 0)
+ s = socket(PF_ROUTE, SOCK_RAW, 0);
+ if (s < 0)
err(EX_OSERR, "socket");
- len = sizeof(c->numfibs);
- if (sysctlbyname("net.fibs", (void *)&c->numfibs, &len, NULL, 0) == -1)
- c->numfibs = -1;
+ len = sizeof(numfibs);
+ if (sysctlbyname("net.fibs", (void *)&numfibs, &len, NULL, 0) == -1)
+ numfibs = -1;
- len = sizeof(c->defaultfib);
- if (c->numfibs != -1 &&
- sysctlbyname("net.my_fibnum", (void *)&c->defaultfib, &len, NULL,
+ len = sizeof(defaultfib);
+ if (numfibs != -1 &&
+ sysctlbyname("net.my_fibnum", (void *)&defaultfib, &len, NULL,
0) == -1)
- c->defaultfib = -1;
+ defaultfib = -1;
if (*argv != NULL)
switch (keyword(*argv)) {
case K_GET:
case K_SHOW:
- c->uid = 0;
+ uid = 0;
/* FALLTHROUGH */
case K_CHANGE:
case K_ADD:
case K_DEL:
case K_DELETE:
- newroute(c, argc, argv);
+ newroute(argc, argv);
/* NOTREACHED */
case K_MONITOR:
- monitor(c, argc, argv);
+ monitor(argc, argv);
/* NOTREACHED */
case K_FLUSH:
- flushroutes(c, argc, argv);
+ flushroutes(argc, argv);
exit(0);
/* NOTREACHED */
}
@@ -355,26 +311,17 @@ main(int argc, char **argv)
}
static int
-set_sofib(struct rt_ctx *c, int fib)
+set_sofib(int fib)
{
if (fib < 0)
return (0);
- return (setsockopt(c->s, SOL_SOCKET, SO_SETFIB, (void *)&fib,
+ return (setsockopt(s, SOL_SOCKET, SO_SETFIB, (void *)&fib,
sizeof(fib)));
}
static int
-set_procfib(int fib)
-{
-
- if (fib < 0)
- return (0);
- return (setfib(fib));
-}
-
-static int
-fiboptlist_range(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
+fiboptlist_range(const char *arg, struct fibl_head_t *flh)
{
struct fibl *fl;
char *str0, *str, *token, *endptr;
@@ -392,7 +339,7 @@ fiboptlist_range(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
if (errno == 0) {
if (*endptr != '\0' ||
fib[i] < 0 ||
- (c->numfibs != -1 && fib[i] > c->numfibs - 1))
+ (numfibs != -1 && fib[i] > numfibs - 1))
errno = EINVAL;
}
if (errno)
@@ -425,20 +372,21 @@ fiboptlist_range_ret:
#define ALLSTRLEN 64
static int
-fiboptlist_csv(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
+fiboptlist_csv(const char *arg, struct fibl_head_t *flh)
{
struct fibl *fl;
char *str0, *str, *token, *endptr;
int fib, error;
+ str0 = str = NULL;
if (strcmp("all", arg) == 0) {
- str0 = str = calloc(1, ALLSTRLEN);
+ str = calloc(1, ALLSTRLEN);
if (str == NULL) {
error = 1;
goto fiboptlist_csv_ret;
}
- if (c->numfibs > 1)
- snprintf(str, ALLSTRLEN - 1, "%d-%d", 0, c->numfibs - 1);
+ if (numfibs > 1)
+ snprintf(str, ALLSTRLEN - 1, "%d-%d", 0, numfibs - 1);
else
snprintf(str, ALLSTRLEN - 1, "%d", 0);
} else if (strcmp("default", arg) == 0) {
@@ -447,14 +395,14 @@ fiboptlist_csv(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
error = 1;
goto fiboptlist_csv_ret;
}
- snprintf(str, ALLSTRLEN - 1, "%d", c->defaultfib);
+ snprintf(str, ALLSTRLEN - 1, "%d", defaultfib);
} else
str0 = str = strdup(arg);
error = 0;
while ((token = strsep(&str, ",")) != NULL) {
if (*token != '-' && strchr(token, '-') != NULL) {
- error = fiboptlist_range(c, token, flh);
+ error = fiboptlist_range(token, flh);
if (error)
goto fiboptlist_csv_ret;
} else {
@@ -463,7 +411,7 @@ fiboptlist_csv(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
if (errno == 0) {
if (*endptr != '\0' ||
fib < 0 ||
- (c->numfibs != -1 && fib > c->numfibs - 1))
+ (numfibs != -1 && fib > numfibs - 1))
errno = EINVAL;
}
if (errno) {
@@ -480,7 +428,8 @@ fiboptlist_csv(struct rt_ctx *c, const char *arg, struct fibl_head_t *flh)
}
}
fiboptlist_csv_ret:
- free(str0);
+ if (str0 != NULL)
+ free(str0);
return (error);
}
@@ -489,43 +438,41 @@ fiboptlist_csv_ret:
* associated with network interfaces.
*/
static void
-flushroutes(struct rt_ctx *c, int argc, char *argv[])
+flushroutes(int argc, char *argv[])
{
struct fibl *fl;
int error;
- if (c->uid != 0 && !c->debugonly) {
+ if (uid != 0 && !debugonly && !tflag)
errx(EX_NOPERM, "must be root to alter routing table");
- }
- shutdown(c->s, SHUT_RD); /* Don't want to read back our messages */
+ shutdown(s, SHUT_RD); /* Don't want to read back our messages */
- TAILQ_INIT(&c->fibl_head);
+ TAILQ_INIT(&fibl_head);
while (argc > 1) {
argc--;
argv++;
if (**argv != '-')
usage(*argv);
switch (keyword(*argv + 1)) {
+#ifdef INET
case K_4:
case K_INET:
- c->af = AF_INET;
+ af = AF_INET;
break;
+#endif
#ifdef INET6
case K_6:
case K_INET6:
- c->af = AF_INET6;
+ af = AF_INET6;
break;
#endif
- case K_ATALK:
- c->af = AF_APPLETALK;
- break;
case K_LINK:
- c->af = AF_LINK;
+ af = AF_LINK;
break;
case K_FIB:
if (!--argc)
usage(*argv);
- error = fiboptlist_csv(c, *++argv, &c->fibl_head);
+ error = fiboptlist_csv(*++argv, &fibl_head);
if (error)
errx(EX_USAGE, "invalid fib number: %s", *argv);
break;
@@ -533,26 +480,25 @@ flushroutes(struct rt_ctx *c, int argc, char *argv[])
usage(*argv);
}
}
- if (TAILQ_EMPTY(&c->fibl_head)) {
- error = fiboptlist_csv(c, "default", &c->fibl_head);
+ if (TAILQ_EMPTY(&fibl_head)) {
+ error = fiboptlist_csv("default", &fibl_head);
if (error)
errx(EX_OSERR, "fiboptlist_csv failed.");
}
- TAILQ_FOREACH(fl, &c->fibl_head, fl_next)
- flushroutes_fib(c, fl->fl_num);
+ TAILQ_FOREACH(fl, &fibl_head, fl_next)
+ flushroutes_fib(fl->fl_num);
}
static int
-flushroutes_fib(struct rt_ctx *c, int fib)
+flushroutes_fib(int fib)
{
struct rt_msghdr *rtm;
size_t needed;
char *buf, *next, *lim;
- int mib[6], rlen, seqno, count = 0;
+ int mib[7], rlen, seqno, count = 0;
int error;
- error = set_sofib(c, fib);
- error += set_procfib(fib);
+ error = set_sofib(fib);
if (error) {
warn("fib number %d is ignored", fib);
return (error);
@@ -562,14 +508,15 @@ retry:
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0; /* protocol */
- mib[3] = 0; /* wildcard address family */
+ mib[3] = AF_UNSPEC;
mib[4] = NET_RT_DUMP;
mib[5] = 0; /* no flags */
- if (sysctl(mib, 6, NULL, &needed, NULL, 0) < 0)
+ mib[6] = fib;
+ if (sysctl(mib, nitems(mib), NULL, &needed, NULL, 0) < 0)
err(EX_OSERR, "route-sysctl-estimate");
- if ((buf = malloc(needed)) == NULL && needed != 0)
+ if ((buf = malloc(needed)) == NULL)
errx(EX_OSERR, "malloc failed");
- if (sysctl(mib, 6, buf, &needed, NULL, 0) < 0) {
+ if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0) {
if (errno == ENOMEM && count++ < 10) {
warnx("Routing table grew, retrying");
sleep(1);
@@ -579,145 +526,158 @@ retry:
err(EX_OSERR, "route-sysctl-get");
}
lim = buf + needed;
- if (c->verbose)
- (void) printf("Examining routing table from sysctl\n");
+ if (verbose)
+ (void)printf("Examining routing table from sysctl\n");
seqno = 0; /* ??? */
for (next = buf; next < lim; next += rtm->rtm_msglen) {
- rtm = (struct rt_msghdr *)next;
- if (c->verbose)
- print_rtmsg(c, rtm, rtm->rtm_msglen);
+ rtm = (struct rt_msghdr *)(void *)next;
+ if (verbose)
+ print_rtmsg(rtm, rtm->rtm_msglen);
if ((rtm->rtm_flags & RTF_GATEWAY) == 0)
continue;
- if (c->af != 0) {
+ if (af != 0) {
struct sockaddr *sa = (struct sockaddr *)(rtm + 1);
- if (sa->sa_family != c->af)
+ if (sa->sa_family != af)
continue;
}
- if (c->debugonly)
+ if (debugonly)
continue;
rtm->rtm_type = RTM_DELETE;
rtm->rtm_seq = seqno;
- rlen = write(c->s, next, rtm->rtm_msglen);
+ rlen = write(s, next, rtm->rtm_msglen);
if (rlen < 0 && errno == EPERM)
err(1, "write to routing socket");
if (rlen < (int)rtm->rtm_msglen) {
warn("write to routing socket");
- (void) printf("got only %d for rlen\n", rlen);
+ (void)printf("got only %d for rlen\n", rlen);
free(buf);
goto retry;
break;
}
seqno++;
- if (c->qflag)
+ if (qflag)
continue;
- if (c->verbose)
- print_rtmsg(c, rtm, rlen);
+ if (verbose)
+ print_rtmsg(rtm, rlen);
else {
struct sockaddr *sa = (struct sockaddr *)(rtm + 1);
printf("%-20.20s ", rtm->rtm_flags & RTF_HOST ?
- routename(c, sa) : netname(c, sa));
+ routename(sa) : netname(sa));
sa = (struct sockaddr *)(SA_SIZE(sa) + (char *)sa);
- printf("%-20.20s ", routename(c, sa));
+ printf("%-20.20s ", routename(sa));
if (fib >= 0)
printf("-fib %-3d ", fib);
printf("done\n");
}
}
- free(buf);
return (error);
}
static const char *
-routename(struct rt_ctx *c, struct sockaddr *sa)
+routename(struct sockaddr *sa)
{
+ struct sockaddr_dl *sdl;
const char *cp;
- struct hostent *hp;
int n;
- if (c->domain_initialized) {
- c->domain_initialized = 1;
- if (gethostname(c->domain, MAXHOSTNAMELEN) == 0 &&
- (cp = strchr(c->domain, '.'))) {
- c->domain[MAXHOSTNAMELEN] = '\0';
- (void) strcpy(c->domain, cp + 1);
+ if (!domain_initialized) {
+ domain_initialized = true;
+ if (gethostname(domain, MAXHOSTNAMELEN) == 0 &&
+ (cp = strchr(domain, '.'))) {
+ domain[MAXHOSTNAMELEN] = '\0';
+ (void)strcpy(domain, cp + 1);
} else
- c->domain[0] = 0;
+ domain[0] = '\0';
}
- if (sa->sa_len == 0)
- strcpy(c->rt_line, "default");
- else switch (sa->sa_family) {
-
+ /* If the address is zero-filled, use "default". */
+ if (sa->sa_len == 0 && nflag == 0)
+ return ("default");
+#if defined(INET) || defined(INET6)
+ switch (sa->sa_family) {
+#ifdef INET
case AF_INET:
- { struct in_addr in;
- in = ((struct sockaddr_in *)sa)->sin_addr;
-
- cp = NULL;
- if (in.s_addr == INADDR_ANY || sa->sa_len < 4)
- cp = "default";
- if (cp == NULL && !c->nflag) {
- hp = gethostbyaddr((char *)&in, sizeof (struct in_addr),
- AF_INET);
- if (hp != NULL) {
- char *cptr;
- cptr = strchr(hp->h_name, '.');
- if (cptr != NULL &&
- strcmp(cptr + 1, c->domain) == 0)
- *cptr = '\0';
- cp = hp->h_name;
- }
- }
- if (cp != NULL) {
- strncpy(c->rt_line, cp, sizeof(c->rt_line) - 1);
- c->rt_line[sizeof(c->rt_line) - 1] = '\0';
- } else
- (void) sprintf(c->rt_line, "%s", inet_ntoa(in));
+ /* If the address is zero-filled, use "default". */
+ if (nflag == 0 &&
+ ((struct sockaddr_in *)(void *)sa)->sin_addr.s_addr ==
+ INADDR_ANY)
+ return("default");
break;
- }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ /* If the address is zero-filled, use "default". */
+ if (nflag == 0 &&
+ IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)(void *)sa)->sin6_addr))
+ return("default");
+ break;
+#endif
+ }
+#endif
+ switch (sa->sa_family) {
+#if defined(INET) || defined(INET6)
+#ifdef INET
+ case AF_INET:
+#endif
#ifdef INET6
case AF_INET6:
+#endif
{
- struct sockaddr_in6 sin6; /* use static var for safety */
- int niflags = 0;
+ struct sockaddr_storage ss;
+ int error;
+ char *p;
- memset(&sin6, 0, sizeof(sin6));
- memcpy(&sin6, sa, sa->sa_len);
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_family = AF_INET6;
-#ifdef __KAME__
- if (sa->sa_len == sizeof(struct sockaddr_in6) &&
- (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr) ||
- IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr) ||
- IN6_IS_ADDR_MC_NODELOCAL(&sin6.sin6_addr)) &&
- sin6.sin6_scope_id == 0) {
- sin6.sin6_scope_id =
- ntohs(*(u_int16_t *)&sin6.sin6_addr.s6_addr[2]);
- sin6.sin6_addr.s6_addr[2] = 0;
- sin6.sin6_addr.s6_addr[3] = 0;
+ memset(&ss, 0, sizeof(ss));
+ if (sa->sa_len == 0)
+ ss.ss_family = sa->sa_family;
+ else
+ memcpy(&ss, sa, sa->sa_len);
+ /* Expand sa->sa_len because it could be shortened. */
+ if (sa->sa_family == AF_INET)
+ ss.ss_len = sizeof(struct sockaddr_in);
+ else if (sa->sa_family == AF_INET6)
+ ss.ss_len = sizeof(struct sockaddr_in6);
+ error = getnameinfo((struct sockaddr *)&ss, ss.ss_len,
+ rt_line, sizeof(rt_line), NULL, 0,
+ (nflag == 0) ? 0 : NI_NUMERICHOST);
+ if (error) {
+ warnx("getnameinfo(): %s", gai_strerror(error));
+ strncpy(rt_line, "invalid", sizeof(rt_line));
}
-#endif
- if (c->nflag)
- niflags |= NI_NUMERICHOST;
- if (getnameinfo((struct sockaddr *)&sin6, sin6.sin6_len,
- c->rt_line, sizeof(c->rt_line), NULL, 0, niflags) != 0)
- strncpy(c->rt_line, "invalid", sizeof(c->rt_line));
- return(c->rt_line);
+ /* Remove the domain part if any. */
+ p = strchr(rt_line, '.');
+ if (p != NULL && strcmp(p + 1, domain) == 0)
+ *p = '\0';
+
+ return (rt_line);
+ break;
}
#endif
-
case AF_LINK:
- return (link_ntoa((struct sockaddr_dl *)sa));
+ sdl = (struct sockaddr_dl *)(void *)sa;
+
+ if (sdl->sdl_nlen == 0 &&
+ sdl->sdl_alen == 0 &&
+ sdl->sdl_slen == 0) {
+ n = snprintf(rt_line, sizeof(rt_line), "link#%d",
+ sdl->sdl_index);
+ if (n > (int)sizeof(rt_line))
+ rt_line[0] = '\0';
+ return (rt_line);
+ } else
+ return (link_ntoa(sdl));
+ break;
default:
{
- u_short *sp = (u_short *)sa;
+ u_short *sp = (u_short *)(void *)sa;
u_short *splim = sp + ((sa->sa_len + 1) >> 1);
- char *cps = c->rt_line + sprintf(c->rt_line, "(%d)", sa->sa_family);
- char *cpe = c->rt_line + sizeof(c->rt_line);
+ char *cps = rt_line + sprintf(rt_line, "(%d)", sa->sa_family);
+ char *cpe = rt_line + sizeof(rt_line);
while (++sp < splim && cps < cpe) /* start with sa->sa_data */
if ((n = snprintf(cps, cpe - cps, " %x", *sp)) > 0)
@@ -727,118 +687,98 @@ routename(struct rt_ctx *c, struct sockaddr *sa)
break;
}
}
- return (c->rt_line);
+ return (rt_line);
}
/*
* Return the name of the network whose address is given.
- * The address is assumed to be that of a net or subnet, not a host.
+ * The address is assumed to be that of a net, not a host.
*/
-const char *
-netname(struct rt_ctx *c, struct sockaddr *sa)
+static const char *
+netname(struct sockaddr *sa)
{
- const char *cp = NULL;
+ struct sockaddr_dl *sdl;
+ int n;
+#ifdef INET
struct netent *np = NULL;
- u_long net, mask;
+ const char *cp = NULL;
u_long i;
- int n, subnetshift;
+#endif
switch (sa->sa_family) {
-
+#ifdef INET
case AF_INET:
- { struct in_addr in;
- in = ((struct sockaddr_in *)sa)->sin_addr;
+ {
+ struct in_addr in;
+ in = ((struct sockaddr_in *)(void *)sa)->sin_addr;
i = in.s_addr = ntohl(in.s_addr);
if (in.s_addr == 0)
cp = "default";
- else if (!c->nflag) {
- if (IN_CLASSA(i)) {
- mask = IN_CLASSA_NET;
- subnetshift = 8;
- } else if (IN_CLASSB(i)) {
- mask = IN_CLASSB_NET;
- subnetshift = 8;
- } else {
- mask = IN_CLASSC_NET;
- subnetshift = 4;
- }
- /*
- * If there are more bits than the standard mask
- * would suggest, subnets must be in use.
- * Guess at the subnet mask, assuming reasonable
- * width subnet fields.
- */
- while (in.s_addr &~ mask)
- mask = (long)mask >> subnetshift;
- net = in.s_addr & mask;
- while ((mask & 1) == 0)
- mask >>= 1, net >>= 1;
- np = getnetbyaddr(net, AF_INET);
+ else if (!nflag) {
+ np = getnetbyaddr(i, AF_INET);
if (np != NULL)
cp = np->n_name;
}
#define C(x) (unsigned)((x) & 0xff)
if (cp != NULL)
- strncpy(c->net_line, cp, sizeof(c->net_line));
+ strncpy(net_line, cp, sizeof(net_line));
else if ((in.s_addr & 0xffffff) == 0)
- (void) sprintf(c->net_line, "%u", C(in.s_addr >> 24));
+ (void)sprintf(net_line, "%u", C(in.s_addr >> 24));
else if ((in.s_addr & 0xffff) == 0)
- (void) sprintf(c->net_line, "%u.%u", C(in.s_addr >> 24),
+ (void)sprintf(net_line, "%u.%u", C(in.s_addr >> 24),
C(in.s_addr >> 16));
else if ((in.s_addr & 0xff) == 0)
- (void) sprintf(c->net_line, "%u.%u.%u", C(in.s_addr >> 24),
+ (void)sprintf(net_line, "%u.%u.%u", C(in.s_addr >> 24),
C(in.s_addr >> 16), C(in.s_addr >> 8));
else
- (void) sprintf(c->net_line, "%u.%u.%u.%u", C(in.s_addr >> 24),
+ (void)sprintf(net_line, "%u.%u.%u.%u", C(in.s_addr >> 24),
C(in.s_addr >> 16), C(in.s_addr >> 8),
C(in.s_addr));
#undef C
break;
- }
-
+ }
+#endif
#ifdef INET6
case AF_INET6:
{
- struct sockaddr_in6 sin6; /* use static var for safety */
+ struct sockaddr_in6 sin6;
int niflags = 0;
memset(&sin6, 0, sizeof(sin6));
memcpy(&sin6, sa, sa->sa_len);
- sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_len = sizeof(sin6);
sin6.sin6_family = AF_INET6;
-#ifdef __KAME__
- if (sa->sa_len == sizeof(struct sockaddr_in6) &&
- (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr) ||
- IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr) ||
- IN6_IS_ADDR_MC_NODELOCAL(&sin6.sin6_addr)) &&
- sin6.sin6_scope_id == 0) {
- sin6.sin6_scope_id =
- ntohs(*(u_int16_t *)&sin6.sin6_addr.s6_addr[2]);
- sin6.sin6_addr.s6_addr[2] = 0;
- sin6.sin6_addr.s6_addr[3] = 0;
- }
-#endif
- if (c->nflag)
+ if (nflag)
niflags |= NI_NUMERICHOST;
if (getnameinfo((struct sockaddr *)&sin6, sin6.sin6_len,
- c->net_line, sizeof(c->net_line), NULL, 0, niflags) != 0)
- strncpy(c->net_line, "invalid", sizeof(c->net_line));
+ net_line, sizeof(net_line), NULL, 0, niflags) != 0)
+ strncpy(net_line, "invalid", sizeof(net_line));
- return(c->net_line);
+ return(net_line);
}
#endif
-
case AF_LINK:
- return (link_ntoa((struct sockaddr_dl *)sa));
-
+ sdl = (struct sockaddr_dl *)(void *)sa;
+
+ if (sdl->sdl_nlen == 0 &&
+ sdl->sdl_alen == 0 &&
+ sdl->sdl_slen == 0) {
+ n = snprintf(net_line, sizeof(net_line), "link#%d",
+ sdl->sdl_index);
+ if (n > (int)sizeof(net_line))
+ net_line[0] = '\0';
+ return (net_line);
+ } else
+ return (link_ntoa(sdl));
+ break;
default:
{
- u_short *sp = (u_short *)sa->sa_data;
+ u_short *sp = (u_short *)(void *)sa->sa_data;
u_short *splim = sp + ((sa->sa_len + 1)>>1);
- char *cps = c->net_line + sprintf(c->net_line, "af %d:", sa->sa_family);
- char *cpe = c->net_line + sizeof(c->net_line);
+ char *cps = net_line + sprintf(net_line, "af %d:", sa->sa_family);
+ char *cpe = net_line + sizeof(net_line);
while (sp < splim && cps < cpe)
if ((n = snprintf(cps, cpe - cps, " %x", *sp++)) > 0)
@@ -848,17 +788,18 @@ netname(struct rt_ctx *c, struct sockaddr *sa)
break;
}
}
- return (c->net_line);
+ return (net_line);
}
static void
-set_metric(struct rt_ctx *c, char *value, int key)
+set_metric(char *value, int key)
{
int flag = 0;
+ char *endptr;
u_long noval, *valp = &noval;
switch (key) {
-#define caseof(x, y, z) case x: valp = &c->rt_metrics.z; flag = y; break
+#define caseof(x, y, z) case x: valp = &rt_metrics.z; flag = y; break
caseof(K_MTU, RTV_MTU, rmx_mtu);
caseof(K_HOPCOUNT, RTV_HOPCOUNT, rmx_hopcount);
caseof(K_EXPIRE, RTV_EXPIRE, rmx_expire);
@@ -869,12 +810,23 @@ set_metric(struct rt_ctx *c, char *value, int key)
caseof(K_RTTVAR, RTV_RTTVAR, rmx_rttvar);
caseof(K_WEIGHT, RTV_WEIGHT, rmx_weight);
}
- c->rtm_inits |= flag;
- if (c->lockrest || c->locking)
- c->rt_metrics.rmx_locks |= flag;
- if (c->locking)
- c->locking = 0;
- *valp = atoi(value);
+ rtm_inits |= flag;
+ if (lockrest || locking)
+ rt_metrics.rmx_locks |= flag;
+ if (locking)
+ locking = 0;
+ errno = 0;
+ *valp = strtol(value, &endptr, 0);
+ if (errno == 0 && *endptr != '\0')
+ errno = EINVAL;
+ if (errno)
+ err(EX_USAGE, "%s", value);
+ if (flag & RTV_EXPIRE && (value[0] == '+' || value[0] == '-')) {
+ struct timespec ts;
+
+ clock_gettime(CLOCK_REALTIME_FAST, &ts);
+ *valp += ts.tv_sec;
+ }
}
#define F_ISHOST 0x01
@@ -884,51 +836,57 @@ set_metric(struct rt_ctx *c, char *value, int key)
#define F_INTERFACE 0x10
static void
-newroute(struct rt_ctx *c, int argc, char **argv)
+newroute(int argc, char **argv)
{
+ struct sigaction sa;
struct hostent *hp;
struct fibl *fl;
char *cmd;
const char *dest, *gateway, *errmsg;
int key, error, flags, nrflags, fibnum;
- if (c->uid != 0) {
+ if (uid != 0 && !debugonly && !tflag)
errx(EX_NOPERM, "must be root to alter routing table");
- }
-
dest = NULL;
gateway = NULL;
flags = RTF_STATIC;
nrflags = 0;
hp = NULL;
- TAILQ_INIT(&c->fibl_head);
+ TAILQ_INIT(&fibl_head);
+
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = stopit;
+ if (sigaction(SIGALRM, &sa, 0) == -1)
+ warn("sigaction SIGALRM");
cmd = argv[0];
if (*cmd != 'g' && *cmd != 's')
- shutdown(c->s, SHUT_RD); /* Don't want to read back our messages */
-
+ shutdown(s, SHUT_RD); /* Don't want to read back our messages */
while (--argc > 0) {
if (**(++argv)== '-') {
switch (key = keyword(1 + *argv)) {
case K_LINK:
- c->af = AF_LINK;
- c->aflen = sizeof(struct sockaddr_dl);
+ af = AF_LINK;
+ aflen = sizeof(struct sockaddr_dl);
break;
+#ifdef INET
case K_4:
case K_INET:
- c->af = AF_INET;
- c->aflen = sizeof(struct sockaddr_in);
+ af = AF_INET;
+ aflen = sizeof(struct sockaddr_in);
break;
+#endif
#ifdef INET6
case K_6:
case K_INET6:
- c->af = AF_INET6;
- c->aflen = sizeof(struct sockaddr_in6);
+ af = AF_INET6;
+ aflen = sizeof(struct sockaddr_in6);
break;
#endif
case K_SA:
- c->af = PF_ROUTE;
- c->aflen = sizeof(union sockunion);
+ af = PF_ROUTE;
+ aflen = sizeof(struct sockaddr_storage);
break;
case K_IFACE:
case K_INTERFACE:
@@ -938,10 +896,10 @@ newroute(struct rt_ctx *c, int argc, char **argv)
flags &= ~RTF_STATIC;
break;
case K_LOCK:
- c->locking = 1;
+ locking = 1;
break;
case K_LOCKREST:
- c->lockrest = 1;
+ lockrest = 1;
break;
case K_HOST:
nrflags |= F_FORCEHOST;
@@ -976,7 +934,7 @@ newroute(struct rt_ctx *c, int argc, char **argv)
case K_FIB:
if (!--argc)
usage(NULL);
- error = fiboptlist_csv(c, *++argv, &c->fibl_head);
+ error = fiboptlist_csv(*++argv, &fibl_head);
if (error)
errx(EX_USAGE,
"invalid fib number: %s", *argv);
@@ -984,35 +942,35 @@ newroute(struct rt_ctx *c, int argc, char **argv)
case K_IFA:
if (!--argc)
usage(NULL);
- getaddr(c, RTA_IFA, *++argv, 0, nrflags);
+ getaddr(RTAX_IFA, *++argv, 0, nrflags);
break;
case K_IFP:
if (!--argc)
usage(NULL);
- getaddr(c, RTA_IFP, *++argv, 0, nrflags);
+ getaddr(RTAX_IFP, *++argv, 0, nrflags);
break;
case K_GENMASK:
if (!--argc)
usage(NULL);
- getaddr(c, RTA_GENMASK, *++argv, 0, nrflags);
+ getaddr(RTAX_GENMASK, *++argv, 0, nrflags);
break;
case K_GATEWAY:
if (!--argc)
usage(NULL);
- getaddr(c, RTA_GATEWAY, *++argv, 0, nrflags);
+ getaddr(RTAX_GATEWAY, *++argv, 0, nrflags);
gateway = *argv;
break;
case K_DST:
if (!--argc)
usage(NULL);
- if (getaddr(c, RTA_DST, *++argv, &hp, nrflags))
+ if (getaddr(RTAX_DST, *++argv, &hp, nrflags))
nrflags |= F_ISHOST;
dest = *argv;
break;
case K_NETMASK:
if (!--argc)
usage(NULL);
- getaddr(c, RTA_NETMASK, *++argv, 0, nrflags);
+ getaddr(RTAX_NETMASK, *++argv, 0, nrflags);
/* FALLTHROUGH */
case K_NET:
nrflags |= F_FORCENET;
@@ -1020,7 +978,7 @@ newroute(struct rt_ctx *c, int argc, char **argv)
case K_PREFIXLEN:
if (!--argc)
usage(NULL);
- if (prefixlen(c, *++argv) == -1) {
+ if (prefixlen(*++argv) == -1) {
nrflags &= ~F_FORCENET;
nrflags |= F_ISHOST;
} else {
@@ -1039,32 +997,44 @@ newroute(struct rt_ctx *c, int argc, char **argv)
case K_WEIGHT:
if (!--argc)
usage(NULL);
- set_metric(c, *++argv, key);
+ set_metric(*++argv, key);
break;
default:
usage(1+*argv);
}
} else {
- if ((c->rtm_addrs & RTA_DST) == 0) {
+ if ((rtm_addrs & RTA_DST) == 0) {
dest = *argv;
- if (getaddr(c, RTA_DST, *argv, &hp, nrflags))
+ if (getaddr(RTAX_DST, *argv, &hp, nrflags))
nrflags |= F_ISHOST;
- } else if ((c->rtm_addrs & RTA_GATEWAY) == 0) {
+ } else if ((rtm_addrs & RTA_GATEWAY) == 0) {
gateway = *argv;
- getaddr(c, RTA_GATEWAY, *argv, &hp, nrflags);
+ getaddr(RTAX_GATEWAY, *argv, &hp, nrflags);
} else {
- getaddr(c, RTA_NETMASK, *argv, 0, nrflags);
+ getaddr(RTAX_NETMASK, *argv, 0, nrflags);
nrflags |= F_FORCENET;
}
}
}
+ /* Do some sanity checks on resulting request */
+ if (so[RTAX_DST].ss_len == 0) {
+ warnx("destination parameter required");
+ usage(NULL);
+ }
+
+ if (so[RTAX_NETMASK].ss_len != 0 &&
+ so[RTAX_DST].ss_family != so[RTAX_NETMASK].ss_family) {
+ warnx("destination and netmask family need to be the same");
+ usage(NULL);
+ }
+
if (nrflags & F_FORCEHOST) {
nrflags |= F_ISHOST;
#ifdef INET6
- if (c->af == AF_INET6) {
- c->rtm_addrs &= ~RTA_NETMASK;
- memset((void *)&c->so_mask, 0, sizeof(c->so_mask));
+ if (af == AF_INET6) {
+ rtm_addrs &= ~RTA_NETMASK;
+ memset(&so[RTAX_NETMASK], 0, sizeof(so[RTAX_NETMASK]));
}
#endif
}
@@ -1075,23 +1045,21 @@ newroute(struct rt_ctx *c, int argc, char **argv)
flags |= RTF_HOST;
if ((nrflags & F_INTERFACE) == 0)
flags |= RTF_GATEWAY;
- if (nrflags & F_PROXY) {
- c->so_dst.sinarp.sin_other = SIN_PROXY;
+ if (nrflags & F_PROXY)
flags |= RTF_ANNOUNCE;
- }
if (dest == NULL)
dest = "";
if (gateway == NULL)
gateway = "";
- if (TAILQ_EMPTY(&c->fibl_head)) {
- error = fiboptlist_csv(c, "default", &c->fibl_head);
+ if (TAILQ_EMPTY(&fibl_head)) {
+ error = fiboptlist_csv("default", &fibl_head);
if (error)
errx(EX_OSERR, "fiboptlist_csv failed.");
}
error = 0;
- TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
- fl->fl_error = newroute_fib(c, fl->fl_num, cmd, flags);
+ TAILQ_FOREACH(fl, &fibl_head, fl_next) {
+ fl->fl_error = newroute_fib(fl->fl_num, cmd, flags);
if (fl->fl_error)
fl->fl_errno = errno;
error += fl->fl_error;
@@ -1100,9 +1068,9 @@ newroute(struct rt_ctx *c, int argc, char **argv)
exit(error);
error = 0;
- if (!c->qflag) {
+ if (!qflag) {
fibnum = 0;
- TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
+ TAILQ_FOREACH(fl, &fibl_head, fl_next) {
if (fl->fl_error == 0)
fibnum++;
}
@@ -1114,8 +1082,8 @@ newroute(struct rt_ctx *c, int argc, char **argv)
if (*gateway)
printf(": gateway %s", gateway);
- if (c->numfibs > 1) {
- TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
+ if (numfibs > 1) {
+ TAILQ_FOREACH(fl, &fibl_head, fl_next) {
if (fl->fl_error == 0
&& fl->fl_num >= 0) {
if (firstfib) {
@@ -1132,7 +1100,7 @@ newroute(struct rt_ctx *c, int argc, char **argv)
}
fibnum = 0;
- TAILQ_FOREACH(fl, &c->fibl_head, fl_next) {
+ TAILQ_FOREACH(fl, &fibl_head, fl_next) {
if (fl->fl_error != 0) {
printf("%s %s %s", cmd, (nrflags & F_ISHOST)
? "host" : "net", dest);
@@ -1175,47 +1143,45 @@ newroute(struct rt_ctx *c, int argc, char **argv)
}
static int
-newroute_fib(struct rt_ctx *c, int fib, char *cmd, int flags)
+newroute_fib(int fib, char *cmd, int flags)
{
int error;
- error = set_sofib(c, fib);
+ error = set_sofib(fib);
if (error) {
warn("fib number %d is ignored", fib);
return (error);
}
- error = rtmsg(c, *cmd, flags, fib);
+ error = rtmsg(*cmd, flags, fib);
return (error);
}
+#ifdef INET
static void
-inet_makenetandmask(struct rt_ctx *c, u_long net, struct sockaddr_in *sin, u_long bits)
+inet_makenetandmask(u_long net, struct sockaddr_in *sin,
+ struct sockaddr_in *sin_mask, u_long bits)
{
- u_long addr, mask = 0;
- char *cp;
+ u_long mask = 0;
+
+ rtm_addrs |= RTA_NETMASK;
- c->rtm_addrs |= RTA_NETMASK;
/*
- * XXX: This approach unable to handle 0.0.0.1/32 correctly
- * as inet_network() converts 0.0.0.1 and 1 equally.
+ * MSB of net should be meaningful. 0/0 is exception.
*/
- if (net <= 0xff)
- addr = net << IN_CLASSA_NSHIFT;
- else if (net <= 0xffff)
- addr = net << IN_CLASSB_NSHIFT;
- else if (net <= 0xffffff)
- addr = net << IN_CLASSC_NSHIFT;
- else
- addr = net;
+ if (net > 0)
+ while ((net & 0xff000000) == 0)
+ net <<= 8;
+
/*
* If no /xx was specified we must calculate the
* CIDR address.
*/
- if ((bits == 0) && (addr != 0)) {
+ if ((bits == 0) && (net != 0)) {
u_long i, j;
- for(i=0,j=0xff; i<4; i++) {
- if (addr & j) {
+
+ for(i = 0, j = 0xff; i < 4; i++) {
+ if (net & j) {
break;
}
j <<= 8;
@@ -1226,43 +1192,31 @@ inet_makenetandmask(struct rt_ctx *c, u_long net, struct sockaddr_in *sin, u_lon
if (bits != 0)
mask = 0xffffffff << (32 - bits);
- sin->sin_addr.s_addr = htonl(addr);
- sin = &c->so_mask.sin;
- sin->sin_addr.s_addr = htonl(mask);
- sin->sin_len = 0;
- sin->sin_family = 0;
- cp = (char *)(&sin->sin_addr + 1);
- while (*--cp == 0 && cp > (char *)sin)
- ;
- sin->sin_len = 1 + cp - (char *)sin;
+ sin->sin_addr.s_addr = htonl(net);
+ sin_mask->sin_addr.s_addr = htonl(mask);
+ sin_mask->sin_len = sizeof(struct sockaddr_in);
+ sin_mask->sin_family = AF_INET;
}
+#endif
#ifdef INET6
/*
* XXX the function may need more improvement...
*/
static int
-inet6_makenetandmask(struct rt_ctx *c, struct sockaddr_in6 *sin6, const char *plen)
+inet6_makenetandmask(struct sockaddr_in6 *sin6, const char *plen)
{
- struct in6_addr in6;
if (plen == NULL) {
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
- sin6->sin6_scope_id == 0) {
+ sin6->sin6_scope_id == 0)
plen = "0";
- } else if ((sin6->sin6_addr.s6_addr[0] & 0xe0) == 0x20) {
- /* aggregatable global unicast - RFC2374 */
- memset(&in6, 0, sizeof(in6));
- if (!memcmp(&sin6->sin6_addr.s6_addr[8],
- &in6.s6_addr[8], 8))
- plen = "64";
- }
}
if (plen == NULL || strcmp(plen, "128") == 0)
return (1);
- c->rtm_addrs |= RTA_NETMASK;
- prefixlen(c, plen);
+ rtm_addrs |= RTA_NETMASK;
+ prefixlen(plen);
return (0);
}
#endif
@@ -1272,33 +1226,50 @@ inet6_makenetandmask(struct rt_ctx *c, struct sockaddr_in6 *sin6, const char *pl
* returning 1 if a host address, 0 if a network address.
*/
static int
-getaddr(struct rt_ctx *c, int which, char *str, struct hostent **hpp, int nrflags)
+getaddr(int idx, char *str, struct hostent **hpp, int nrflags)
{
- sup su;
+ struct sockaddr *sa;
+#if defined(INET)
+ struct sockaddr_in *sin;
struct hostent *hp;
struct netent *np;
u_long val;
char *q;
- int afamily; /* local copy of af so we can change it */
+#elif defined(INET6)
+ char *q;
+#endif
- if (c->af == 0) {
- c->af = AF_INET;
- c->aflen = sizeof(struct sockaddr_in);
+ if (idx < 0 || idx >= RTAX_MAX)
+ usage("internal error");
+ if (af == 0) {
+#if defined(INET)
+ af = AF_INET;
+ aflen = sizeof(struct sockaddr_in);
+#elif defined(INET6)
+ af = AF_INET6;
+ aflen = sizeof(struct sockaddr_in6);
+#else
+ af = AF_LINK;
+ aflen = sizeof(struct sockaddr_dl);
+#endif
}
- afamily = c->af;
- c->rtm_addrs |= which;
- switch (which) {
- case RTA_DST:
- su = &c->so_dst;
- break;
- case RTA_GATEWAY:
- su = &c->so_gate;
+#ifndef INET
+ hpp = NULL;
+#endif
+ rtm_addrs |= (1 << idx);
+ sa = (struct sockaddr *)&so[idx];
+ sa->sa_family = af;
+ sa->sa_len = aflen;
+
+ switch (idx) {
+ case RTAX_GATEWAY:
if (nrflags & F_INTERFACE) {
struct ifaddrs *ifap, *ifa;
+ struct sockaddr_dl *sdl0 = (struct sockaddr_dl *)(void *)sa;
struct sockaddr_dl *sdl = NULL;
if (getifaddrs(&ifap))
- err(1, "getifaddrs");
+ err(EX_OSERR, "getifaddrs");
for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
if (ifa->ifa_addr->sa_family != AF_LINK)
@@ -1307,63 +1278,41 @@ getaddr(struct rt_ctx *c, int which, char *str, struct hostent **hpp, int nrflag
if (strcmp(str, ifa->ifa_name) != 0)
continue;
- sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl = (struct sockaddr_dl *)(void *)ifa->ifa_addr;
}
/* If we found it, then use it */
if (sdl != NULL) {
/*
- * Copy is safe since we have a
- * sockaddr_storage member in sockunion{}.
* Note that we need to copy before calling
* freeifaddrs().
*/
- memcpy(&su->sdl, sdl, sdl->sdl_len);
+ memcpy(sdl0, sdl, sdl->sdl_len);
}
freeifaddrs(ifap);
if (sdl != NULL)
return(1);
+ else
+ errx(EX_DATAERR,
+ "interface '%s' does not exist", str);
}
break;
- case RTA_NETMASK:
- su = &c->so_mask;
+ case RTAX_IFP:
+ sa->sa_family = AF_LINK;
break;
- case RTA_GENMASK:
- su = &c->so_genmask;
- break;
- case RTA_IFP:
- su = &c->so_ifp;
- afamily = AF_LINK;
- break;
- case RTA_IFA:
- su = &c->so_ifa;
- break;
- default:
- usage("internal error");
- /*NOTREACHED*/
}
- su->sa.sa_len = c->aflen;
- su->sa.sa_family = afamily; /* cases that don't want it have left already */
if (strcmp(str, "default") == 0) {
/*
* Default is net 0.0.0.0/0
*/
- switch (which) {
- case RTA_DST:
- c->forcenet++;
-#if 0
- bzero(su, sizeof(*su)); /* for readability */
-#endif
- getaddr(c, RTA_NETMASK, str, 0, nrflags);
+ switch (idx) {
+ case RTAX_DST:
+ nrflags |= F_FORCENET;
+ getaddr(RTAX_NETMASK, str, 0, nrflags);
break;
-#if 0
- case RTA_NETMASK:
- case RTA_GENMASK:
- bzero(su, sizeof(*su)); /* for readability */
-#endif
}
return (0);
}
- switch (afamily) {
+ switch (sa->sa_family) {
#ifdef INET6
case AF_INET6:
{
@@ -1371,129 +1320,130 @@ getaddr(struct rt_ctx *c, int which, char *str, struct hostent **hpp, int nrflag
int ecode;
q = NULL;
- if (which == RTA_DST && (q = strchr(str, '/')) != NULL)
+ if (idx == RTAX_DST && (q = strchr(str, '/')) != NULL)
*q = '\0';
memset(&hints, 0, sizeof(hints));
- hints.ai_family = afamily; /*AF_INET6*/
- hints.ai_socktype = SOCK_DGRAM; /*dummy*/
+ hints.ai_family = sa->sa_family;
+ hints.ai_socktype = SOCK_DGRAM;
ecode = getaddrinfo(str, NULL, &hints, &res);
if (ecode != 0 || res->ai_family != AF_INET6 ||
- res->ai_addrlen != sizeof(su->sin6)) {
- (void) fprintf(stderr, "%s: %s\n", str,
- gai_strerror(ecode));
- exit(1);
- }
- memcpy(&su->sin6, res->ai_addr, sizeof(su->sin6));
-#ifdef __KAME__
- if ((IN6_IS_ADDR_LINKLOCAL(&su->sin6.sin6_addr) ||
- IN6_IS_ADDR_MC_LINKLOCAL(&su->sin6.sin6_addr) ||
- IN6_IS_ADDR_MC_NODELOCAL(&su->sin6.sin6_addr)) &&
- su->sin6.sin6_scope_id) {
- *(u_int16_t *)&su->sin6.sin6_addr.s6_addr[2] =
- htons(su->sin6.sin6_scope_id);
- su->sin6.sin6_scope_id = 0;
- }
-#endif
+ res->ai_addrlen != sizeof(struct sockaddr_in6))
+ errx(EX_OSERR, "%s: %s", str, gai_strerror(ecode));
+ memcpy(sa, res->ai_addr, res->ai_addrlen);
freeaddrinfo(res);
if (q != NULL)
*q++ = '/';
- if (which == RTA_DST)
- return (inet6_makenetandmask(c, &su->sin6, q));
+ if (idx == RTAX_DST)
+ return (inet6_makenetandmask((struct sockaddr_in6 *)(void *)sa, q));
return (0);
}
#endif /* INET6 */
-
case AF_LINK:
- link_addr(str, &su->sdl);
+ link_addr(str, (struct sockaddr_dl *)(void *)sa);
return (1);
-
case PF_ROUTE:
- su->sa.sa_len = sizeof(*su);
- sockaddr(str, &su->sa);
+ sockaddr(str, sa, sizeof(struct sockaddr_storage));
return (1);
-
+#ifdef INET
case AF_INET:
+#endif
default:
break;
}
+#ifdef INET
+ sin = (struct sockaddr_in *)(void *)sa;
if (hpp == NULL)
hpp = &hp;
*hpp = NULL;
q = strchr(str,'/');
- if (q != NULL && which == RTA_DST) {
+ if (q != NULL && idx == RTAX_DST) {
*q = '\0';
if ((val = inet_network(str)) != INADDR_NONE) {
- inet_makenetandmask(
- c, val, &su->sin, strtoul(q+1, 0, 0));
+ inet_makenetandmask(val, sin,
+ (struct sockaddr_in *)&so[RTAX_NETMASK],
+ strtoul(q+1, 0, 0));
return (0);
}
*q = '/';
}
- if ((which != RTA_DST || c->forcenet == 0) &&
- inet_aton(str, &su->sin.sin_addr)) {
- val = su->sin.sin_addr.s_addr;
- if (which != RTA_DST || c->forcehost ||
- inet_lnaof(su->sin.sin_addr) != INADDR_ANY)
+ if ((idx != RTAX_DST || (nrflags & F_FORCENET) == 0) &&
+ inet_aton(str, &sin->sin_addr)) {
+ val = sin->sin_addr.s_addr;
+ if (idx != RTAX_DST || nrflags & F_FORCEHOST ||
+ inet_lnaof(sin->sin_addr) != INADDR_ANY)
return (1);
else {
val = ntohl(val);
goto netdone;
}
}
- if (which == RTA_DST && c->forcehost == 0 &&
+ if (idx == RTAX_DST && (nrflags & F_FORCEHOST) == 0 &&
((val = inet_network(str)) != INADDR_NONE ||
((np = getnetbyname(str)) != NULL && (val = np->n_net) != 0))) {
netdone:
- inet_makenetandmask(c, val, &su->sin, 0);
+ inet_makenetandmask(val, sin,
+ (struct sockaddr_in *)&so[RTAX_NETMASK], 0);
return (0);
}
hp = gethostbyname(str);
if (hp != NULL) {
*hpp = hp;
- su->sin.sin_family = hp->h_addrtype;
- memmove((char *)&su->sin.sin_addr, hp->h_addr,
- MIN((size_t)hp->h_length, sizeof(su->sin.sin_addr)));
+ sin->sin_family = hp->h_addrtype;
+ memmove((char *)&sin->sin_addr, hp->h_addr,
+ MIN((size_t)hp->h_length, sizeof(sin->sin_addr)));
return (1);
}
+#endif
errx(EX_NOHOST, "bad address: %s", str);
}
static int
-prefixlen(struct rt_ctx *c, const char *str)
+prefixlen(const char *str)
{
int len = atoi(str), q, r;
int max;
char *p;
- c->rtm_addrs |= RTA_NETMASK;
- switch (c->af) {
+ rtm_addrs |= RTA_NETMASK;
+ switch (af) {
#ifdef INET6
case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6 =
+ (struct sockaddr_in6 *)&so[RTAX_NETMASK];
+
max = 128;
- p = (char *)&c->so_mask.sin6.sin6_addr;
+ p = (char *)&sin6->sin6_addr;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
+ }
#endif
+#ifdef INET
case AF_INET:
+ {
+ struct sockaddr_in *sin =
+ (struct sockaddr_in *)&so[RTAX_NETMASK];
+
max = 32;
- p = (char *)&c->so_mask.sin.sin_addr;
+ p = (char *)&sin->sin_addr;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
+ }
+#endif
default:
- fprintf(stderr, "prefixlen not supported in this af\n");
- exit(1);
+ errx(EX_OSERR, "prefixlen not supported in this af");
}
- if (len < 0 || max < len) {
- fprintf(stderr, "%s: bad value\n", str);
- exit(1);
- }
+ if (len < 0 || max < len)
+ errx(EX_USAGE, "%s: invalid prefixlen", str);
q = len >> 3;
r = len & 7;
- c->so_mask.sa.sa_family = c->af;
- c->so_mask.sa.sa_len = c->aflen;
memset((void *)p, 0, max / 8);
if (q > 0)
memset((void *)p, 0xff, q);
@@ -1506,7 +1456,7 @@ prefixlen(struct rt_ctx *c, const char *str)
}
static void
-interfaces(struct rt_ctx *c)
+interfaces(void)
{
size_t needed;
int mib[6];
@@ -1517,14 +1467,14 @@ retry2:
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0; /* protocol */
- mib[3] = 0; /* wildcard address family */
+ mib[3] = AF_UNSPEC;
mib[4] = NET_RT_IFLIST;
mib[5] = 0; /* no flags */
- if (sysctl(mib, 6, NULL, &needed, NULL, 0) < 0)
+ if (sysctl(mib, nitems(mib), NULL, &needed, NULL, 0) < 0)
err(EX_OSERR, "route-sysctl-estimate");
- if ((buf = malloc(needed)) == NULL && needed != 0)
+ if ((buf = malloc(needed)) == NULL)
errx(EX_OSERR, "malloc failed");
- if (sysctl(mib, 6, buf, &needed, NULL, 0) < 0) {
+ if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0) {
if (errno == ENOMEM && count++ < 10) {
warnx("Routing table grew, retrying");
sleep(1);
@@ -1535,19 +1485,18 @@ retry2:
}
lim = buf + needed;
for (next = buf; next < lim; next += rtm->rtm_msglen) {
- rtm = (struct rt_msghdr *)next;
- print_rtmsg(c, rtm, rtm->rtm_msglen);
+ rtm = (struct rt_msghdr *)(void *)next;
+ print_rtmsg(rtm, rtm->rtm_msglen);
}
- free(buf);
}
static void
-monitor(struct rt_ctx *c, int argc, char *argv[])
+monitor(int argc, char *argv[])
{
int n, fib, error;
char msg[2048], *endptr;
- fib = c->defaultfib;
+ fib = defaultfib;
while (argc > 1) {
argc--;
argv++;
@@ -1562,7 +1511,7 @@ monitor(struct rt_ctx *c, int argc, char *argv[])
if (errno == 0) {
if (*endptr != '\0' ||
fib < 0 ||
- (c->numfibs != -1 && fib > c->numfibs - 1))
+ (numfibs != -1 && fib > numfibs - 1))
errno = EINVAL;
}
if (errno)
@@ -1572,124 +1521,115 @@ monitor(struct rt_ctx *c, int argc, char *argv[])
usage(*argv);
}
}
- error = set_sofib(c, fib);
+ error = set_sofib(fib);
if (error)
errx(EX_USAGE, "invalid fib number: %d", fib);
- c->verbose = 1;
- if (c->debugonly) {
- interfaces(c);
+ verbose = 1;
+ if (debugonly) {
+ interfaces();
exit(0);
}
for (;;) {
time_t now;
- n = read(c->s, msg, 2048);
+ n = read(s, msg, 2048);
now = time(NULL);
- (void) printf("\ngot message of size %d on %s", n, ctime(&now));
- print_rtmsg(c, (struct rt_msghdr *)msg, n);
+ (void)printf("\ngot message of size %d on %s", n, ctime(&now));
+ print_rtmsg((struct rt_msghdr *)(void *)msg, n);
}
}
static int
-rtmsg(struct rt_ctx *c, int cmd, int flags, int fib)
+rtmsg(int cmd, int flags, int fib)
{
int rlen;
- char *cp = c->m_rtmsg.m_space;
+ char *cp = m_rtmsg.m_space;
int l;
-#define NEXTADDR(w, u) \
- if (c->rtm_addrs & (w)) {\
- l = SA_SIZE(&(u.sa)); memmove(cp, &(u), l); cp += l;\
- if (c->verbose) sodump(&(u),#u);\
+#define NEXTADDR(w, u) \
+ if (rtm_addrs & (w)) { \
+ l = (((struct sockaddr *)&(u))->sa_len == 0) ? \
+ sizeof(long) : \
+ 1 + ((((struct sockaddr *)&(u))->sa_len - 1) \
+ | (sizeof(long) - 1)); \
+ memmove(cp, (char *)&(u), l); \
+ cp += l; \
+ if (verbose) \
+ sodump((struct sockaddr *)&(u), #w); \
}
errno = 0;
- memset(&c->m_rtmsg, 0, sizeof(c->m_rtmsg));
+ memset(&m_rtmsg, 0, sizeof(m_rtmsg));
if (cmd == 'a')
cmd = RTM_ADD;
else if (cmd == 'c')
cmd = RTM_CHANGE;
else if (cmd == 'g' || cmd == 's') {
cmd = RTM_GET;
- if (c->so_ifp.sa.sa_family == 0) {
- c->so_ifp.sa.sa_family = AF_LINK;
- c->so_ifp.sa.sa_len = sizeof(struct sockaddr_dl);
- c->rtm_addrs |= RTA_IFP;
+ if (so[RTAX_IFP].ss_family == 0) {
+ so[RTAX_IFP].ss_family = AF_LINK;
+ so[RTAX_IFP].ss_len = sizeof(struct sockaddr_dl);
+ rtm_addrs |= RTA_IFP;
}
} else
cmd = RTM_DELETE;
-#define rtm c->m_rtmsg.m_rtm
+#define rtm m_rtmsg.m_rtm
rtm.rtm_type = cmd;
rtm.rtm_flags = flags;
rtm.rtm_version = RTM_VERSION;
- rtm.rtm_seq = ++c->rtm_seq;
- rtm.rtm_addrs = c->rtm_addrs;
- rtm.rtm_rmx = c->rt_metrics;
- rtm.rtm_inits = c->rtm_inits;
-
- if (c->rtm_addrs & RTA_NETMASK)
- mask_addr(c);
- NEXTADDR(RTA_DST, c->so_dst);
- NEXTADDR(RTA_GATEWAY, c->so_gate);
- NEXTADDR(RTA_NETMASK, c->so_mask);
- NEXTADDR(RTA_GENMASK, c->so_genmask);
- NEXTADDR(RTA_IFP, c->so_ifp);
- NEXTADDR(RTA_IFA, c->so_ifa);
- rtm.rtm_msglen = l = cp - (char *)&c->m_rtmsg;
- if (c->verbose)
- print_rtmsg(c, &rtm, l);
- if (c->debugonly)
+ rtm.rtm_seq = ++rtm_seq;
+ rtm.rtm_addrs = rtm_addrs;
+ rtm.rtm_rmx = rt_metrics;
+ rtm.rtm_inits = rtm_inits;
+
+ NEXTADDR(RTA_DST, so[RTAX_DST]);
+ NEXTADDR(RTA_GATEWAY, so[RTAX_GATEWAY]);
+ NEXTADDR(RTA_NETMASK, so[RTAX_NETMASK]);
+ NEXTADDR(RTA_GENMASK, so[RTAX_GENMASK]);
+ NEXTADDR(RTA_IFP, so[RTAX_IFP]);
+ NEXTADDR(RTA_IFA, so[RTAX_IFA]);
+ rtm.rtm_msglen = l = cp - (char *)&m_rtmsg;
+ if (verbose)
+ print_rtmsg(&rtm, l);
+ if (debugonly)
return (0);
- if ((rlen = write(c->s, (char *)&c->m_rtmsg, l)) < 0) {
- if (errno == EPERM)
+ if ((rlen = write(s, (char *)&m_rtmsg, l)) < 0) {
+ switch (errno) {
+ case EPERM:
err(1, "writing to routing socket");
- warn("writing to routing socket");
+ break;
+ case ESRCH:
+ warnx("route has not been found");
+ break;
+ case EEXIST:
+ /* Handled by newroute() */
+ break;
+ default:
+ warn("writing to routing socket");
+ }
return (-1);
}
if (cmd == RTM_GET) {
+ stop_read = 0;
+ alarm(READ_TIMEOUT);
do {
- l = read(c->s, (char *)&c->m_rtmsg, sizeof(c->m_rtmsg));
- } while (l > 0 && (rtm.rtm_seq != c->rtm_seq || rtm.rtm_pid != c->pid));
+ l = read(s, (char *)&m_rtmsg, sizeof(m_rtmsg));
+ } while (l > 0 && stop_read == 0 &&
+ (rtm.rtm_seq != rtm_seq || rtm.rtm_pid != pid));
+ if (stop_read != 0) {
+ warnx("read from routing socket timed out");
+ return (-1);
+ } else
+ alarm(0);
if (l < 0)
warn("read from routing socket");
else
- print_getmsg(c, &rtm, l, fib);
+ print_getmsg(&rtm, l, fib);
}
#undef rtm
return (0);
}
-static void
-mask_addr(struct rt_ctx *c)
-{
- int olen = c->so_mask.sa.sa_len;
- char *cp1 = olen + (char *)&c->so_mask, *cp2;
-
- for (c->so_mask.sa.sa_len = 0; cp1 > (char *)&c->so_mask; )
- if (*--cp1 != 0) {
- c->so_mask.sa.sa_len = 1 + cp1 - (char *)&c->so_mask;
- break;
- }
- if ((c->rtm_addrs & RTA_DST) == 0)
- return;
- switch (c->so_dst.sa.sa_family) {
- case AF_INET:
-#ifdef INET6
- case AF_INET6:
-#endif
- case AF_APPLETALK:
- case 0:
- return;
- }
- cp1 = c->so_mask.sa.sa_len + 1 + (char *)&c->so_dst;
- cp2 = c->so_dst.sa.sa_len + 1 + (char *)&c->so_dst;
- while (cp2 > cp1)
- *--cp2 = 0;
- cp2 = c->so_mask.sa.sa_len + 1 + (char *)&c->so_mask;
- while (cp1 > c->so_dst.sa.sa_data)
- *--cp1 &= *--cp2;
-}
-
static const char *const msgtypes[] = {
"",
"RTM_ADD: Add Route",
@@ -1713,25 +1653,25 @@ static const char *const msgtypes[] = {
};
static const char metricnames[] =
-"\011weight\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire"
-"\1mtu";
+ "\011weight\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire"
+ "\1mtu";
static const char routeflags[] =
-"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE"
-"\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE"
-"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3"
-"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST\035STICKY";
+ "\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE"
+ "\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE"
+ "\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3"
+ "\024FIXEDMTU\025PINNED\026LOCAL\027BROADCAST\030MULTICAST\035STICKY";
static const char ifnetflags[] =
-"\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5PTP\6b6\7RUNNING\010NOARP"
-"\011PPROMISC\012ALLMULTI\013OACTIVE\014SIMPLEX\015LINK0\016LINK1"
-"\017LINK2\020MULTICAST";
+ "\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5PTP\6b6\7RUNNING\010NOARP"
+ "\011PPROMISC\012ALLMULTI\013OACTIVE\014SIMPLEX\015LINK0\016LINK1"
+ "\017LINK2\020MULTICAST";
static const char addrnames[] =
-"\1DST\2GATEWAY\3NETMASK\4GENMASK\5IFP\6IFA\7AUTHOR\010BRD";
+ "\1DST\2GATEWAY\3NETMASK\4GENMASK\5IFP\6IFA\7AUTHOR\010BRD";
static const char errfmt[] =
-"\n%s: truncated route message, only %zu bytes left\n";
+ "\n%s: truncated route message, only %zu bytes left\n";
static void
-print_rtmsg(struct rt_ctx *c, struct rt_msghdr *rtm, size_t msglen)
+print_rtmsg(struct rt_msghdr *rtm, size_t msglen)
{
struct if_msghdr *ifm;
struct ifa_msghdr *ifam;
@@ -1741,14 +1681,14 @@ print_rtmsg(struct rt_ctx *c, struct rt_msghdr *rtm, size_t msglen)
struct if_announcemsghdr *ifan;
const char *state;
- if (c->verbose == 0)
+ if (verbose == 0)
return;
if (rtm->rtm_version != RTM_VERSION) {
- (void) printf("routing message version %d not understood\n",
+ (void)printf("routing message version %d not understood\n",
rtm->rtm_version);
return;
}
- if (rtm->rtm_type < sizeof(msgtypes) / sizeof(msgtypes[0]))
+ if (rtm->rtm_type < nitems(msgtypes))
(void)printf("%s: ", msgtypes[rtm->rtm_type]);
else
(void)printf("unknown type %d: ", rtm->rtm_type);
@@ -1765,7 +1705,7 @@ print_rtmsg(struct rt_ctx *c, struct rt_msghdr *rtm, size_t msglen)
case RTM_IFINFO:
REQUIRE(struct if_msghdr);
ifm = (struct if_msghdr *)rtm;
- (void) printf("if# %d, ", ifm->ifm_index);
+ (void)printf("if# %d, ", ifm->ifm_index);
switch (ifm->ifm_data.ifi_link_state) {
case LINK_STATE_DOWN:
state = "down";
@@ -1777,33 +1717,33 @@ print_rtmsg(struct rt_ctx *c, struct rt_msghdr *rtm, size_t msglen)
state = "unknown";
break;
}
- (void) printf("link: %s, flags:", state);
- bprintf(stdout, ifm->ifm_flags, ifnetflags);
- pmsg_addrs(c, (char *)(ifm + 1), ifm->ifm_addrs, msglen);
+ (void)printf("link: %s, flags:", state);
+ printb(ifm->ifm_flags, ifnetflags);
+ pmsg_addrs((char *)(ifm + 1), ifm->ifm_addrs, msglen);
break;
case RTM_NEWADDR:
case RTM_DELADDR:
REQUIRE(struct ifa_msghdr);
ifam = (struct ifa_msghdr *)rtm;
- (void) printf("metric %d, flags:", ifam->ifam_metric);
- bprintf(stdout, ifam->ifam_flags, routeflags);
- pmsg_addrs(c, (char *)(ifam + 1), ifam->ifam_addrs, msglen);
+ (void)printf("metric %d, flags:", ifam->ifam_metric);
+ printb(ifam->ifam_flags, routeflags);
+ pmsg_addrs((char *)(ifam + 1), ifam->ifam_addrs, msglen);
break;
#ifdef RTM_NEWMADDR
case RTM_NEWMADDR:
case RTM_DELMADDR:
REQUIRE(struct ifma_msghdr);
ifmam = (struct ifma_msghdr *)rtm;
- pmsg_addrs(c, (char *)(ifmam + 1), ifmam->ifmam_addrs, msglen);
+ pmsg_addrs((char *)(ifmam + 1), ifmam->ifmam_addrs, msglen);
break;
#endif
case RTM_IFANNOUNCE:
REQUIRE(struct if_announcemsghdr);
ifan = (struct if_announcemsghdr *)rtm;
- (void) printf("if# %d, what: ", ifan->ifan_index);
+ (void)printf("if# %d, what: ", ifan->ifan_index);
switch (ifan->ifan_what) {
case IFAN_ARRIVAL:
- printf("arrival");
+ (void)printf("arrival");
break;
case IFAN_DEPARTURE:
printf("departure");
@@ -1817,10 +1757,10 @@ print_rtmsg(struct rt_ctx *c, struct rt_msghdr *rtm, size_t msglen)
break;
default:
- (void) printf("pid: %ld, seq %d, errno %d, flags:",
+ printf("pid: %ld, seq %d, errno %d, flags:",
(long)rtm->rtm_pid, rtm->rtm_seq, rtm->rtm_errno);
- bprintf(stdout, rtm->rtm_flags, routeflags);
- pmsg_common(c, rtm, msglen);
+ printb(rtm->rtm_flags, routeflags);
+ pmsg_common(rtm, msglen);
}
return;
@@ -1831,16 +1771,16 @@ badlen:
}
static void
-print_getmsg(struct rt_ctx *c, struct rt_msghdr *rtm, int msglen, int fib)
+print_getmsg(struct rt_msghdr *rtm, int msglen, int fib)
{
- struct sockaddr *dst = NULL, *gate = NULL, *mask = NULL;
- struct sockaddr_dl *ifp = NULL;
- struct sockaddr *sa;
+ struct sockaddr *sp[RTAX_MAX];
+ struct timespec ts;
char *cp;
int i;
- (void) printf(" route to: %s\n",
- routename(c, (struct sockaddr *)&c->so_dst));
+ memset(sp, 0, sizeof(sp));
+ (void)printf(" route to: %s\n",
+ routename((struct sockaddr *)&so[RTAX_DST]));
if (rtm->rtm_version != RTM_VERSION) {
warnx("routing message version %d not understood",
rtm->rtm_version);
@@ -1849,6 +1789,7 @@ print_getmsg(struct rt_ctx *c, struct rt_msghdr *rtm, int msglen, int fib)
if (rtm->rtm_msglen > msglen) {
warnx("message length mismatch, in packet %d, returned %d",
rtm->rtm_msglen, msglen);
+ return;
}
if (rtm->rtm_errno) {
errno = rtm->rtm_errno;
@@ -1856,122 +1797,105 @@ print_getmsg(struct rt_ctx *c, struct rt_msghdr *rtm, int msglen, int fib)
return;
}
cp = ((char *)(rtm + 1));
- if (rtm->rtm_addrs)
- for (i = 1; i; i <<= 1)
- if (i & rtm->rtm_addrs) {
- sa = (struct sockaddr *)cp;
- switch (i) {
- case RTA_DST:
- dst = sa;
- break;
- case RTA_GATEWAY:
- gate = sa;
- break;
- case RTA_NETMASK:
- mask = sa;
- break;
- case RTA_IFP:
- if (sa->sa_family == AF_LINK &&
- ((struct sockaddr_dl *)sa)->sdl_nlen)
- ifp = (struct sockaddr_dl *)sa;
- break;
- }
- cp += SA_SIZE(sa);
- }
- if (dst && mask)
- mask->sa_family = dst->sa_family; /* XXX */
- if (dst)
- (void)printf("destination: %s\n", routename(c, dst));
- if (mask) {
- int savenflag = c->nflag;
-
- c->nflag = 1;
- (void)printf(" mask: %s\n", routename(c, mask));
- c->nflag = savenflag;
- }
- if (gate && rtm->rtm_flags & RTF_GATEWAY)
- (void)printf(" gateway: %s\n", routename(c, gate));
+ for (i = 0; i < RTAX_MAX; i++)
+ if (rtm->rtm_addrs & (1 << i)) {
+ sp[i] = (struct sockaddr *)cp;
+ cp += SA_SIZE((struct sockaddr *)cp);
+ }
+ if ((rtm->rtm_addrs & RTA_IFP) &&
+ (sp[RTAX_IFP]->sa_family != AF_LINK ||
+ ((struct sockaddr_dl *)(void *)sp[RTAX_IFP])->sdl_nlen == 0))
+ sp[RTAX_IFP] = NULL;
+ if (sp[RTAX_DST])
+ (void)printf("destination: %s\n", routename(sp[RTAX_DST]));
+ if (sp[RTAX_NETMASK])
+ (void)printf(" mask: %s\n", routename(sp[RTAX_NETMASK]));
+ if (sp[RTAX_GATEWAY] && (rtm->rtm_flags & RTF_GATEWAY))
+ (void)printf(" gateway: %s\n", routename(sp[RTAX_GATEWAY]));
if (fib >= 0)
(void)printf(" fib: %u\n", (unsigned int)fib);
- if (ifp)
+ if (sp[RTAX_IFP])
(void)printf(" interface: %.*s\n",
- ifp->sdl_nlen, ifp->sdl_data);
+ ((struct sockaddr_dl *)(void *)sp[RTAX_IFP])->sdl_nlen,
+ ((struct sockaddr_dl *)(void *)sp[RTAX_IFP])->sdl_data);
(void)printf(" flags: ");
- bprintf(stdout, rtm->rtm_flags, routeflags);
+ printb(rtm->rtm_flags, routeflags);
#define lock(f) ((rtm->rtm_rmx.rmx_locks & __CONCAT(RTV_,f)) ? 'L' : ' ')
#define msec(u) (((u) + 500) / 1000) /* usec to msec */
-
- (void) printf("\n%s\n", "\
- recvpipe sendpipe ssthresh rtt,msec mtu weight expire");
- printf("%8ld%c ", rtm->rtm_rmx.rmx_recvpipe, lock(RPIPE));
- printf("%8ld%c ", rtm->rtm_rmx.rmx_sendpipe, lock(SPIPE));
- printf("%8ld%c ", rtm->rtm_rmx.rmx_ssthresh, lock(SSTHRESH));
- printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rtt), lock(RTT));
- printf("%8ld%c ", rtm->rtm_rmx.rmx_mtu, lock(MTU));
- printf("%8ld%c ", rtm->rtm_rmx.rmx_weight, lock(WEIGHT));
- if (rtm->rtm_rmx.rmx_expire)
- rtm->rtm_rmx.rmx_expire -= time(0);
- printf("%8ld%c\n", rtm->rtm_rmx.rmx_expire, lock(EXPIRE));
+ printf("\n%9s %9s %9s %9s %9s %10s %9s\n", "recvpipe",
+ "sendpipe", "ssthresh", "rtt,msec", "mtu ", "weight", "expire");
+ printf("%8lu%c ", rtm->rtm_rmx.rmx_recvpipe, lock(RPIPE));
+ printf("%8lu%c ", rtm->rtm_rmx.rmx_sendpipe, lock(SPIPE));
+ printf("%8lu%c ", rtm->rtm_rmx.rmx_ssthresh, lock(SSTHRESH));
+ printf("%8lu%c ", msec(rtm->rtm_rmx.rmx_rtt), lock(RTT));
+ printf("%8lu%c ", rtm->rtm_rmx.rmx_mtu, lock(MTU));
+ printf("%8lu%c ", rtm->rtm_rmx.rmx_weight, lock(WEIGHT));
+ if (rtm->rtm_rmx.rmx_expire > 0)
+ clock_gettime(CLOCK_REALTIME_FAST, &ts);
+ else
+ ts.tv_sec = 0;
+ printf("%8ld%c\n", (long)(rtm->rtm_rmx.rmx_expire - ts.tv_sec),
+ lock(EXPIRE));
#undef lock
#undef msec
#define RTA_IGN (RTA_DST|RTA_GATEWAY|RTA_NETMASK|RTA_IFP|RTA_IFA|RTA_BRD)
- if (c->verbose)
- pmsg_common(c, rtm, msglen);
+ if (verbose)
+ pmsg_common(rtm, msglen);
else if (rtm->rtm_addrs &~ RTA_IGN) {
- (void) printf("sockaddrs: ");
- bprintf(stdout, rtm->rtm_addrs, addrnames);
+ (void)printf("sockaddrs: ");
+ printb(rtm->rtm_addrs, addrnames);
putchar('\n');
}
#undef RTA_IGN
}
static void
-pmsg_common(struct rt_ctx *c, struct rt_msghdr *rtm, size_t msglen)
+pmsg_common(struct rt_msghdr *rtm, size_t msglen)
{
- (void) printf("\nlocks: ");
- bprintf(stdout, rtm->rtm_rmx.rmx_locks, metricnames);
- (void) printf(" inits: ");
- bprintf(stdout, rtm->rtm_inits, metricnames);
+
+ (void)printf("\nlocks: ");
+ printb(rtm->rtm_rmx.rmx_locks, metricnames);
+ (void)printf(" inits: ");
+ printb(rtm->rtm_inits, metricnames);
if (msglen > sizeof(struct rt_msghdr))
- pmsg_addrs(c, ((char *)(rtm + 1)), rtm->rtm_addrs,
+ pmsg_addrs(((char *)(rtm + 1)), rtm->rtm_addrs,
msglen - sizeof(struct rt_msghdr));
else
- (void) fflush(stdout);
+ (void)fflush(stdout);
}
static void
-pmsg_addrs(struct rt_ctx *c, char *cp, int addrs, size_t len)
+pmsg_addrs(char *cp, int addrs, size_t len)
{
struct sockaddr *sa;
int i;
if (addrs == 0) {
- (void) putchar('\n');
+ (void)putchar('\n');
return;
}
- (void) printf("\nsockaddrs: ");
- bprintf(stdout, addrs, addrnames);
- (void) putchar('\n');
- for (i = 1; i != 0; i <<= 1)
- if (i & addrs) {
+ (void)printf("\nsockaddrs: ");
+ printb(addrs, addrnames);
+ putchar('\n');
+ for (i = 0; i < RTAX_MAX; i++)
+ if (addrs & (1 << i)) {
sa = (struct sockaddr *)cp;
if (len == 0 || len < SA_SIZE(sa)) {
- (void) printf(errfmt, __func__, len);
+ (void)printf(errfmt, __func__, len);
break;
}
- (void) printf(" %s", routename(c, sa));
+ (void)printf(" %s", routename(sa));
len -= SA_SIZE(sa);
cp += SA_SIZE(sa);
}
- (void) putchar('\n');
- (void) fflush(stdout);
+ (void)putchar('\n');
+ (void)fflush(stdout);
}
static void
-bprintf(FILE *fp, int b, const char *sstr)
+printb(int b, const char *str)
{
- const u_char *str = (const u_char *) sstr;
int i;
int gotsome = 0;
@@ -1983,16 +1907,16 @@ bprintf(FILE *fp, int b, const char *sstr)
i = '<';
else
i = ',';
- (void) putc(i, fp);
+ putchar(i);
gotsome = 1;
for (; (i = *str) > 32; str++)
- (void) putc(i, fp);
+ putchar(i);
} else
while (*str > 32)
str++;
}
if (gotsome)
- (void) putc('>', fp);
+ putchar('>');
}
int
@@ -2006,19 +1930,32 @@ keyword(const char *cp)
}
static void
-sodump(sup su, const char *which)
+sodump(struct sockaddr *sa, const char *which)
{
- switch (su->sa.sa_family) {
+#ifdef INET6
+ char nbuf[INET6_ADDRSTRLEN];
+#endif
+
+ switch (sa->sa_family) {
case AF_LINK:
- (void) printf("%s: link %s; ",
- which, link_ntoa(&su->sdl));
+ (void)printf("%s: link %s; ", which,
+ link_ntoa((struct sockaddr_dl *)(void *)sa));
break;
+#ifdef INET
case AF_INET:
- (void) printf("%s: inet %s; ",
- which, inet_ntoa(su->sin.sin_addr));
+ (void)printf("%s: inet %s; ", which,
+ inet_ntoa(((struct sockaddr_in *)(void *)sa)->sin_addr));
break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ (void)printf("%s: inet6 %s; ", which, inet_ntop(sa->sa_family,
+ &((struct sockaddr_in6 *)(void *)sa)->sin6_addr, nbuf,
+ sizeof(nbuf)));
+ break;
+#endif
}
- (void) fflush(stdout);
+ (void)fflush(stdout);
}
/* States*/
@@ -2031,10 +1968,9 @@ sodump(sup su, const char *which)
#define DELIM (4*2)
static void
-sockaddr(char *addr, struct sockaddr *sa)
+sockaddr(char *addr, struct sockaddr *sa, size_t size)
{
char *cp = (char *)sa;
- int size = sa->sa_len;
char *cplim = cp + size;
int byte = 0, state = VIRGIN, new = 0 /* foil gcc */;
diff --git a/freebsd/sbin/route/rtems-bsd-route-data.h b/freebsd/sbin/route/rtems-bsd-route-data.h
new file mode 100644
index 00000000..b3854595
--- /dev/null
+++ b/freebsd/sbin/route/rtems-bsd-route-data.h
@@ -0,0 +1,3 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+/* route.c */
diff --git a/freebsd/sbin/route/rtems-bsd-route-namespace.h b/freebsd/sbin/route/rtems-bsd-route-namespace.h
new file mode 100644
index 00000000..4f49513e
--- /dev/null
+++ b/freebsd/sbin/route/rtems-bsd-route-namespace.h
@@ -0,0 +1,2 @@
+/* generated by userspace-header-gen.py */
+/* route.c */
diff --git a/freebsd/sbin/route/rtems-bsd-route-route-data.h b/freebsd/sbin/route/rtems-bsd-route-route-data.h
new file mode 100644
index 00000000..8b54078f
--- /dev/null
+++ b/freebsd/sbin/route/rtems-bsd-route-route-data.h
@@ -0,0 +1,30 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-route-data.h"
+/* route.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int pid);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int rtm_addrs);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int s);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int nflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int af);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int qflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int tflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int verbose);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int aflen);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int locking);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int lockrest);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int debugonly);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static struct rt_metrics rt_metrics);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static u_long rtm_inits);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static uid_t uid);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int defaultfib);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int numfibs);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static _Bool domain_initialized);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static int rtm_seq);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static struct m_rtmsg m_rtmsg);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static struct fibl_head_t fibl_head);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static sig_atomic_t volatile stop_read);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static struct sockaddr_storage so[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static char domain[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static char rt_line[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_route, static char net_line[]);
diff --git a/freebsd/sbin/sysctl/rtems-bsd-sysctl-data.h b/freebsd/sbin/sysctl/rtems-bsd-sysctl-data.h
new file mode 100644
index 00000000..8746008a
--- /dev/null
+++ b/freebsd/sbin/sysctl/rtems-bsd-sysctl-data.h
@@ -0,0 +1,3 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+/* sysctl.c */
diff --git a/freebsd/sbin/sysctl/rtems-bsd-sysctl-namespace.h b/freebsd/sbin/sysctl/rtems-bsd-sysctl-namespace.h
new file mode 100644
index 00000000..858b515f
--- /dev/null
+++ b/freebsd/sbin/sysctl/rtems-bsd-sysctl-namespace.h
@@ -0,0 +1,2 @@
+/* generated by userspace-header-gen.py */
+/* sysctl.c */
diff --git a/freebsd/sbin/sysctl/rtems-bsd-sysctl-sysctl-data.h b/freebsd/sbin/sysctl/rtems-bsd-sysctl-sysctl-data.h
new file mode 100644
index 00000000..6c6b3453
--- /dev/null
+++ b/freebsd/sbin/sysctl/rtems-bsd-sysctl-sysctl-data.h
@@ -0,0 +1,23 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-sysctl-data.h"
+/* sysctl.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static char const *conffile);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int aflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int bflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int Bflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int dflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int eflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int hflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int iflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int Nflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int nflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int oflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int qflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int tflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int Tflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int Wflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int xflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int ctl_sign[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static int ctl_size[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_sysctl, static char const *ctl_typename[]);
diff --git a/freebsd/sbin/sysctl/sysctl.c b/freebsd/sbin/sysctl/sysctl.c
index 87d9da32..fdbecd5f 100644
--- a/freebsd/sbin/sysctl/sysctl.c
+++ b/freebsd/sbin/sysctl/sysctl.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-sysctl-namespace.h"
+#endif /* __rtems__ */
+
/*
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
@@ -42,22 +46,13 @@ static char sccsid[] = "@(#)from: sysctl.c 8.1 (Berkeley) 6/6/93";
static const char rcsid[] =
"$FreeBSD$";
#endif /* not lint */
+
#ifdef __rtems__
#define __need_getopt_newlib
#include <getopt.h>
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#include <machine/rtems-bsd-commands.h>
#endif /* __rtems__ */
-
#include <rtems/bsd/sys/param.h>
#include <sys/time.h>
#include <rtems/bsd/sys/resource.h>
@@ -65,6 +60,16 @@ static const char rcsid[] =
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
+#ifdef __amd64__
+#include <sys/efi.h>
+#include <machine/metadata.h>
+#endif
+
+#if defined(__amd64__) || defined(__i386__)
+#include <machine/pc/bios.h>
+#endif
+
+#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
@@ -73,71 +78,112 @@ static const char rcsid[] =
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <unistd.h>
+#ifdef __rtems__
+#include "rtems-bsd-sysctl-sysctl-data.h"
+#endif /* __rtems__ */
+
+static const char *conffile;
-static int aflag, bflag, dflag, eflag, hflag, iflag;
-static int Nflag, nflag, oflag, qflag, xflag, warncount;
+static int aflag, bflag, Bflag, dflag, eflag, hflag, iflag;
+static int Nflag, nflag, oflag, qflag, tflag, Tflag, Wflag, xflag;
static int oidfmt(int *, int, char *, u_int *);
-static void parse(const char *);
+static int parsefile(const char *);
+static int parse(const char *, int);
static int show_var(int *, int);
static int sysctl_all(int *oid, int len);
-static int name2oid(char *, int *);
+static int name2oid(const char *, int *);
-static int set_IK(const char *, int *);
+static int strIKtoi(const char *, char **, const char *);
+
+static int ctl_sign[CTLTYPE+1] = {
+ [CTLTYPE_INT] = 1,
+ [CTLTYPE_LONG] = 1,
+ [CTLTYPE_S8] = 1,
+ [CTLTYPE_S16] = 1,
+ [CTLTYPE_S32] = 1,
+ [CTLTYPE_S64] = 1,
+};
+
+static int ctl_size[CTLTYPE+1] = {
+ [CTLTYPE_INT] = sizeof(int),
+ [CTLTYPE_UINT] = sizeof(u_int),
+ [CTLTYPE_LONG] = sizeof(long),
+ [CTLTYPE_ULONG] = sizeof(u_long),
+ [CTLTYPE_S8] = sizeof(int8_t),
+ [CTLTYPE_S16] = sizeof(int16_t),
+ [CTLTYPE_S32] = sizeof(int32_t),
+ [CTLTYPE_S64] = sizeof(int64_t),
+ [CTLTYPE_U8] = sizeof(uint8_t),
+ [CTLTYPE_U16] = sizeof(uint16_t),
+ [CTLTYPE_U32] = sizeof(uint32_t),
+ [CTLTYPE_U64] = sizeof(uint64_t),
+};
+
+static const char *ctl_typename[CTLTYPE+1] = {
+ [CTLTYPE_INT] = "integer",
+ [CTLTYPE_UINT] = "unsigned integer",
+ [CTLTYPE_LONG] = "long integer",
+ [CTLTYPE_ULONG] = "unsigned long",
+ [CTLTYPE_U8] = "uint8_t",
+ [CTLTYPE_U16] = "uint16_t",
+ [CTLTYPE_U32] = "uint16_t",
+ [CTLTYPE_U64] = "uint64_t",
+ [CTLTYPE_S8] = "int8_t",
+ [CTLTYPE_S16] = "int16_t",
+ [CTLTYPE_S32] = "int32_t",
+ [CTLTYPE_S64] = "int64_t",
+ [CTLTYPE_NODE] = "node",
+ [CTLTYPE_STRING] = "string",
+ [CTLTYPE_OPAQUE] = "opaque",
+};
static void
usage(void)
{
(void)fprintf(stderr, "%s\n%s\n",
- "usage: sysctl [-bdehiNnoqx] name[=value] ...",
- " sysctl [-bdehNnoqx] -a");
+ "usage: sysctl [-bdehiNnoqTtWx] [ -B <bufsize> ] [-f filename] name[=value] ...",
+ " sysctl [-bdehNnoqTtWx] [ -B <bufsize> ] -a");
exit(1);
}
#ifdef __rtems__
-static int main(int argc, char **argv);
+static int main(int argc, char *argv[]);
+
+RTEMS_LINKER_RWSET(bsd_prog_sysctl, char);
-int rtems_bsd_command_sysctl(int argc, char *argv[])
+int
+rtems_bsd_command_sysctl(int argc, char *argv[])
{
int exit_code;
+ void *data_begin;
+ size_t data_size;
- rtems_bsd_program_lock();
-
- aflag = bflag = dflag = eflag = hflag = iflag = 0;
- Nflag = nflag = oflag = qflag = xflag = warncount = 0;
-
- exit_code = rtems_bsd_program_call_main("sysctl", main, argc, argv);
+ data_begin = RTEMS_LINKER_SET_BEGIN(bsd_prog_sysctl);
+ data_size = RTEMS_LINKER_SET_SIZE(bsd_prog_sysctl);
+ rtems_bsd_program_lock();
+ exit_code = rtems_bsd_program_call_main_with_data_restore("sysctl",
+ main, argc, argv, data_begin, data_size);
rtems_bsd_program_unlock();
return exit_code;
}
#endif /* __rtems__ */
-
int
main(int argc, char **argv)
{
int ch;
+ int warncount = 0;
-#ifdef __rtems__
- struct getopt_data getopt_data;
- memset(&getopt_data, 0, sizeof(getopt_data));
-#define optind getopt_data.optind
-#define optarg getopt_data.optarg
-#define opterr getopt_data.opterr
-#define optopt getopt_data.optopt
-#define getopt(argc, argv, opt) getopt_r(argc, argv, "+" opt, &getopt_data)
-#endif /* __rtems__ */
-
-#ifndef __rtems__
setlocale(LC_NUMERIC, "");
setbuf(stdout,0);
setbuf(stderr,0);
-#endif /* __rtems__ */
- while ((ch = getopt(argc, argv, "AabdehiNnoqwxX")) != -1) {
+ while ((ch = getopt(argc, argv, "AabB:def:hiNnoqtTwWxX")) != -1) {
switch (ch) {
case 'A':
/* compatibility */
@@ -149,12 +195,18 @@ main(int argc, char **argv)
case 'b':
bflag = 1;
break;
+ case 'B':
+ Bflag = strtol(optarg, NULL, 0);
+ break;
case 'd':
dflag = 1;
break;
case 'e':
eflag = 1;
break;
+ case 'f':
+ conffile = optarg;
+ break;
case 'h':
hflag = 1;
break;
@@ -173,10 +225,19 @@ main(int argc, char **argv)
case 'q':
qflag = 1;
break;
+ case 't':
+ tflag = 1;
+ break;
+ case 'T':
+ Tflag = 1;
+ break;
case 'w':
/* compatibility */
/* ignored */
break;
+ case 'W':
+ Wflag = 1;
+ break;
case 'X':
/* compatibility */
aflag = xflag = 1;
@@ -195,13 +256,17 @@ main(int argc, char **argv)
usage();
if (aflag && argc == 0)
exit(sysctl_all(0, 0));
- if (argc == 0)
+ if (argc == 0 && conffile == NULL)
usage();
warncount = 0;
+ if (conffile != NULL)
+ warncount += parsefile(conffile);
+
while (argc-- > 0)
- parse(*argv++);
- exit(warncount);
+ warncount += parse(*argv++, 0);
+
+ return (warncount);
}
/*
@@ -209,47 +274,91 @@ main(int argc, char **argv)
* Lookup and print out the MIB entry if it exists.
* Set a new value if requested.
*/
-static void
-parse(const char *string)
+static int
+parse(const char *string, int lineno)
{
int len, i, j;
- void *newval = 0;
+ const void *newval;
+ const char *newvalstr = NULL;
+ int8_t i8val;
+ uint8_t u8val;
+ int16_t i16val;
+ uint16_t u16val;
+ int32_t i32val;
+ uint32_t u32val;
int intval;
unsigned int uintval;
long longval;
unsigned long ulongval;
- size_t newsize = 0;
+ size_t newsize = Bflag;
int64_t i64val;
uint64_t u64val;
int mib[CTL_MAXNAME];
- char *cp, *bufp, buf[BUFSIZ], *endptr, fmt[BUFSIZ];
+ char *cp, *bufp, buf[BUFSIZ], *endptr = NULL, fmt[BUFSIZ], line[BUFSIZ];
u_int kind;
+ if (lineno)
+ snprintf(line, sizeof(line), " at line %d", lineno);
+ else
+ line[0] = '\0';
+
cp = buf;
- if (snprintf(buf, BUFSIZ, "%s", string) >= BUFSIZ)
- errx(1, "oid too long: '%s'", string);
- bufp = strsep(&cp, "=");
+ if (snprintf(buf, BUFSIZ, "%s", string) >= BUFSIZ) {
+ warnx("oid too long: '%s'%s", string, line);
+ return (1);
+ }
+ bufp = strsep(&cp, "=:");
if (cp != NULL) {
+ /* Tflag just lists tunables, do not allow assignment */
+ if (Tflag || Wflag) {
+ warnx("Can't set variables when using -T or -W");
+ usage();
+ }
while (isspace(*cp))
cp++;
- newval = cp;
+ /* Strip a pair of " or ' if any. */
+ switch (*cp) {
+ case '\"':
+ case '\'':
+ if (cp[strlen(cp) - 1] == *cp)
+ cp[strlen(cp) - 1] = '\0';
+ cp++;
+ }
+ newvalstr = cp;
newsize = strlen(cp);
}
+ /* Trim spaces */
+ cp = bufp + strlen(bufp) - 1;
+ while (cp >= bufp && isspace((int)*cp)) {
+ *cp = '\0';
+ cp--;
+ }
len = name2oid(bufp, mib);
if (len < 0) {
if (iflag)
- return;
+ return (0);
if (qflag)
- exit(1);
- else
- errx(1, "unknown oid '%s'", bufp);
+ return (1);
+ else {
+ if (errno == ENOENT) {
+ warnx("unknown oid '%s'%s", bufp, line);
+ } else {
+ warn("unknown oid '%s'%s", bufp, line);
+ }
+ return (1);
+ }
}
- if (oidfmt(mib, len, fmt, &kind))
- err(1, "couldn't find format of oid '%s'", bufp);
+ if (oidfmt(mib, len, fmt, &kind)) {
+ warn("couldn't find format of oid '%s'%s", bufp, line);
+ if (iflag)
+ return (1);
+ else
+ exit(1);
+ }
- if (newval == NULL || dflag) {
+ if (newvalstr == NULL || dflag) {
if ((kind & CTLTYPE) == CTLTYPE_NODE) {
if (dflag) {
i = show_var(mib, len);
@@ -263,92 +372,131 @@ parse(const char *string)
putchar('\n');
}
} else {
- if ((kind & CTLTYPE) == CTLTYPE_NODE)
- errx(1, "oid '%s' isn't a leaf node", bufp);
+ if ((kind & CTLTYPE) == CTLTYPE_NODE) {
+ warnx("oid '%s' isn't a leaf node%s", bufp, line);
+ return (1);
+ }
if (!(kind & CTLFLAG_WR)) {
if (kind & CTLFLAG_TUN) {
- warnx("oid '%s' is a read only tunable", bufp);
- errx(1, "Tunable values are set in /boot/loader.conf");
- } else {
- errx(1, "oid '%s' is read only", bufp);
- }
+ warnx("oid '%s' is a read only tunable%s", bufp, line);
+ warnx("Tunable values are set in /boot/loader.conf");
+ } else
+ warnx("oid '%s' is read only%s", bufp, line);
+ return (1);
}
- if ((kind & CTLTYPE) == CTLTYPE_INT ||
- (kind & CTLTYPE) == CTLTYPE_UINT ||
- (kind & CTLTYPE) == CTLTYPE_LONG ||
- (kind & CTLTYPE) == CTLTYPE_ULONG ||
- (kind & CTLTYPE) == CTLTYPE_S64 ||
- (kind & CTLTYPE) == CTLTYPE_U64) {
- if (strlen(newval) == 0)
- errx(1, "empty numeric value");
+ switch (kind & CTLTYPE) {
+ case CTLTYPE_INT:
+ case CTLTYPE_UINT:
+ case CTLTYPE_LONG:
+ case CTLTYPE_ULONG:
+ case CTLTYPE_S8:
+ case CTLTYPE_S16:
+ case CTLTYPE_S32:
+ case CTLTYPE_S64:
+ case CTLTYPE_U8:
+ case CTLTYPE_U16:
+ case CTLTYPE_U32:
+ case CTLTYPE_U64:
+ if (strlen(newvalstr) == 0) {
+ warnx("empty numeric value");
+ return (1);
+ }
+ /* FALLTHROUGH */
+ case CTLTYPE_STRING:
+ break;
+ default:
+ warnx("oid '%s' is type %d,"
+ " cannot set that%s", bufp,
+ kind & CTLTYPE, line);
+ return (1);
}
+ errno = 0;
+
switch (kind & CTLTYPE) {
case CTLTYPE_INT:
- if (strcmp(fmt, "IK") == 0) {
- if (!set_IK(newval, &intval))
- errx(1, "invalid value '%s'",
- (char *)newval);
- } else {
- intval = (int)strtol(newval, &endptr,
+ if (strncmp(fmt, "IK", 2) == 0)
+ intval = strIKtoi(newvalstr, &endptr, fmt);
+ else
+ intval = (int)strtol(newvalstr, &endptr,
0);
- if (endptr == newval || *endptr != '\0')
- errx(1, "invalid integer '%s'",
- (char *)newval);
- }
newval = &intval;
newsize = sizeof(intval);
break;
case CTLTYPE_UINT:
- uintval = (int) strtoul(newval, &endptr, 0);
- if (endptr == newval || *endptr != '\0')
- errx(1, "invalid unsigned integer '%s'",
- (char *)newval);
+ uintval = (int) strtoul(newvalstr, &endptr, 0);
newval = &uintval;
newsize = sizeof(uintval);
break;
case CTLTYPE_LONG:
- longval = strtol(newval, &endptr, 0);
- if (endptr == newval || *endptr != '\0')
- errx(1, "invalid long integer '%s'",
- (char *)newval);
+ longval = strtol(newvalstr, &endptr, 0);
newval = &longval;
newsize = sizeof(longval);
break;
case CTLTYPE_ULONG:
- ulongval = strtoul(newval, &endptr, 0);
- if (endptr == newval || *endptr != '\0')
- errx(1, "invalid unsigned long integer"
- " '%s'", (char *)newval);
+ ulongval = strtoul(newvalstr, &endptr, 0);
newval = &ulongval;
newsize = sizeof(ulongval);
break;
case CTLTYPE_STRING:
+ newval = newvalstr;
+ break;
+ case CTLTYPE_S8:
+ i8val = (int8_t)strtol(newvalstr, &endptr, 0);
+ newval = &i8val;
+ newsize = sizeof(i8val);
+ break;
+ case CTLTYPE_S16:
+ i16val = (int16_t)strtol(newvalstr, &endptr,
+ 0);
+ newval = &i16val;
+ newsize = sizeof(i16val);
+ break;
+ case CTLTYPE_S32:
+ i32val = (int32_t)strtol(newvalstr, &endptr,
+ 0);
+ newval = &i32val;
+ newsize = sizeof(i32val);
break;
case CTLTYPE_S64:
- i64val = strtoimax(newval, &endptr, 0);
- if (endptr == newval || *endptr != '\0')
- errx(1, "invalid int64_t '%s'",
- (char *)newval);
+ i64val = strtoimax(newvalstr, &endptr, 0);
newval = &i64val;
newsize = sizeof(i64val);
break;
+ case CTLTYPE_U8:
+ u8val = (uint8_t)strtoul(newvalstr, &endptr, 0);
+ newval = &u8val;
+ newsize = sizeof(u8val);
+ break;
+ case CTLTYPE_U16:
+ u16val = (uint16_t)strtoul(newvalstr, &endptr,
+ 0);
+ newval = &u16val;
+ newsize = sizeof(u16val);
+ break;
+ case CTLTYPE_U32:
+ u32val = (uint32_t)strtoul(newvalstr, &endptr,
+ 0);
+ newval = &u32val;
+ newsize = sizeof(u32val);
+ break;
case CTLTYPE_U64:
- u64val = strtoumax(newval, &endptr, 0);
- if (endptr == newval || *endptr != '\0')
- errx(1, "invalid uint64_t '%s'",
- (char *)newval);
+ u64val = strtoumax(newvalstr, &endptr, 0);
newval = &u64val;
newsize = sizeof(u64val);
break;
- case CTLTYPE_OPAQUE:
- /* FALLTHROUGH */
default:
- errx(1, "oid '%s' is type %d,"
- " cannot set that", bufp,
- kind & CTLTYPE);
+ /* NOTREACHED */
+ abort();
+ }
+
+ if (errno != 0 || endptr == newvalstr ||
+ (endptr != NULL && *endptr != '\0')) {
+ warnx("invalid %s '%s'%s", ctl_typename[kind & CTLTYPE],
+ newvalstr, line);
+ return (1);
}
i = show_var(mib, len);
@@ -357,18 +505,20 @@ parse(const char *string)
putchar('\n');
switch (errno) {
case EOPNOTSUPP:
- errx(1, "%s: value is not available",
- string);
+ warnx("%s: value is not available%s",
+ string, line);
+ return (1);
case ENOTDIR:
- errx(1, "%s: specification is incomplete",
- string);
+ warnx("%s: specification is incomplete%s",
+ string, line);
+ return (1);
case ENOMEM:
- errx(1, "%s: type is unknown to this program",
- string);
+ warnx("%s: type is unknown to this program%s",
+ string, line);
+ return (1);
default:
- warn("%s", string);
- warncount++;
- return;
+ warn("%s%s", string, line);
+ return (1);
}
}
if (!bflag)
@@ -380,34 +530,86 @@ parse(const char *string)
putchar('\n');
nflag = i;
}
+
+ return (0);
+}
+
+static int
+parsefile(const char *filename)
+{
+ FILE *file;
+ char line[BUFSIZ], *p, *pq, *pdq;
+ int warncount = 0, lineno = 0;
+
+ file = fopen(filename, "r");
+ if (file == NULL)
+ err(EX_NOINPUT, "%s", filename);
+ while (fgets(line, sizeof(line), file) != NULL) {
+ lineno++;
+ p = line;
+ pq = strchr(line, '\'');
+ pdq = strchr(line, '\"');
+ /* Replace the first # with \0. */
+ while((p = strchr(p, '#')) != NULL) {
+ if (pq != NULL && p > pq) {
+ if ((p = strchr(pq+1, '\'')) != NULL)
+ *(++p) = '\0';
+ break;
+ } else if (pdq != NULL && p > pdq) {
+ if ((p = strchr(pdq+1, '\"')) != NULL)
+ *(++p) = '\0';
+ break;
+ } else if (p == line || *(p-1) != '\\') {
+ *p = '\0';
+ break;
+ }
+ p++;
+ }
+ /* Trim spaces */
+ p = line + strlen(line) - 1;
+ while (p >= line && isspace((int)*p)) {
+ *p = '\0';
+ p--;
+ }
+ p = line;
+ while (isspace((int)*p))
+ p++;
+ if (*p == '\0')
+ continue;
+ else
+ warncount += parse(p, lineno);
+ }
+ fclose(file);
+
+ return (warncount);
}
/* These functions will dump out various interesting structures. */
+#ifndef __rtems__
static int
-S_clockinfo(int l2, void *p)
+S_clockinfo(size_t l2, void *p)
{
-#ifndef __rtems__
struct clockinfo *ci = (struct clockinfo*)p;
if (l2 != sizeof(*ci)) {
- warnx("S_clockinfo %d != %zu", l2, sizeof(*ci));
+ warnx("S_clockinfo %zu != %zu", l2, sizeof(*ci));
return (1);
}
printf(hflag ? "{ hz = %'d, tick = %'d, profhz = %'d, stathz = %'d }" :
"{ hz = %d, tick = %d, profhz = %d, stathz = %d }",
ci->hz, ci->tick, ci->profhz, ci->stathz);
-#endif /* __rtems__ */
return (0);
}
+#endif /* __rtems__ */
static int
-S_loadavg(int l2, void *p)
+S_loadavg(size_t l2, void *p)
{
struct loadavg *tv = (struct loadavg*)p;
if (l2 != sizeof(*tv)) {
- warnx("S_loadavg %d != %zu", l2, sizeof(*tv));
+ warnx("S_loadavg %zu != %zu", l2, sizeof(*tv));
return (1);
}
printf(hflag ? "{ %'.2f %'.2f %'.2f }" : "{ %.2f %.2f %.2f }",
@@ -418,14 +620,14 @@ S_loadavg(int l2, void *p)
}
static int
-S_timeval(int l2, void *p)
+S_timeval(size_t l2, void *p)
{
struct timeval *tv = (struct timeval*)p;
time_t tv_sec;
char *p1, *p2;
if (l2 != sizeof(*tv)) {
- warnx("S_timeval %d != %zu", l2, sizeof(*tv));
+ warnx("S_timeval %zu != %zu", l2, sizeof(*tv));
return (1);
}
printf(hflag ? "{ sec = %'jd, usec = %'ld } " :
@@ -442,13 +644,13 @@ S_timeval(int l2, void *p)
}
static int
-S_vmtotal(int l2, void *p)
+S_vmtotal(size_t l2, void *p)
{
struct vmtotal *v = (struct vmtotal *)p;
int pageKilo = getpagesize() / 1024;
if (l2 != sizeof(*v)) {
- warnx("S_vmtotal %d != %zu", l2, sizeof(*v));
+ warnx("S_vmtotal %zu != %zu", l2, sizeof(*v));
return (1);
}
@@ -461,44 +663,179 @@ S_vmtotal(int l2, void *p)
"%hd Sleep: %hd)\n",
v->t_rq, v->t_dw, v->t_pw, v->t_sl);
printf(
- "Virtual Memory:\t\t(Total: %dK Active: %dK)\n",
- v->t_vm * pageKilo, v->t_avm * pageKilo);
- printf("Real Memory:\t\t(Total: %dK Active: %dK)\n",
- v->t_rm * pageKilo, v->t_arm * pageKilo);
- printf("Shared Virtual Memory:\t(Total: %dK Active: %dK)\n",
- v->t_vmshr * pageKilo, v->t_avmshr * pageKilo);
- printf("Shared Real Memory:\t(Total: %dK Active: %dK)\n",
- v->t_rmshr * pageKilo, v->t_armshr * pageKilo);
- printf("Free Memory:\t%dK\n", v->t_free * pageKilo);
+ "Virtual Memory:\t\t(Total: %jdK Active: %jdK)\n",
+ (intmax_t)v->t_vm * pageKilo, (intmax_t)v->t_avm * pageKilo);
+ printf("Real Memory:\t\t(Total: %jdK Active: %jdK)\n",
+ (intmax_t)v->t_rm * pageKilo, (intmax_t)v->t_arm * pageKilo);
+ printf("Shared Virtual Memory:\t(Total: %jdK Active: %jdK)\n",
+ (intmax_t)v->t_vmshr * pageKilo, (intmax_t)v->t_avmshr * pageKilo);
+ printf("Shared Real Memory:\t(Total: %jdK Active: %jdK)\n",
+ (intmax_t)v->t_rmshr * pageKilo, (intmax_t)v->t_armshr * pageKilo);
+ printf("Free Memory:\t%jdK", (intmax_t)v->t_free * pageKilo);
return (0);
}
+#ifdef __amd64__
+#define efi_next_descriptor(ptr, size) \
+ ((struct efi_md *)(((uint8_t *) ptr) + size))
+
static int
-set_IK(const char *str, int *val)
+S_efi_map(size_t l2, void *p)
{
- float temp;
- int len, kelv;
- const char *p;
- char *endptr;
+ struct efi_map_header *efihdr;
+ struct efi_md *map;
+ const char *type;
+ size_t efisz;
+ int ndesc, i;
+
+ static const char *types[] = {
+ "Reserved",
+ "LoaderCode",
+ "LoaderData",
+ "BootServicesCode",
+ "BootServicesData",
+ "RuntimeServicesCode",
+ "RuntimeServicesData",
+ "ConventionalMemory",
+ "UnusableMemory",
+ "ACPIReclaimMemory",
+ "ACPIMemoryNVS",
+ "MemoryMappedIO",
+ "MemoryMappedIOPortSpace",
+ "PalCode"
+ };
+
+ /*
+ * Memory map data provided by UEFI via the GetMemoryMap
+ * Boot Services API.
+ */
+ if (l2 < sizeof(*efihdr)) {
+ warnx("S_efi_map length less than header");
+ return (1);
+ }
+ efihdr = p;
+ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
+ map = (struct efi_md *)((uint8_t *)efihdr + efisz);
- if ((len = strlen(str)) == 0)
+ if (efihdr->descriptor_size == 0)
return (0);
+ if (l2 != efisz + efihdr->memory_size) {
+ warnx("S_efi_map length mismatch %zu vs %zu", l2, efisz +
+ efihdr->memory_size);
+ return (1);
+ }
+ ndesc = efihdr->memory_size / efihdr->descriptor_size;
+
+ printf("\n%23s %12s %12s %8s %4s",
+ "Type", "Physical", "Virtual", "#Pages", "Attr");
+
+ for (i = 0; i < ndesc; i++,
+ map = efi_next_descriptor(map, efihdr->descriptor_size)) {
+ if (map->md_type <= EFI_MD_TYPE_PALCODE)
+ type = types[map->md_type];
+ else
+ type = "<INVALID>";
+ printf("\n%23s %012lx %12p %08lx ", type, map->md_phys,
+ map->md_virt, map->md_pages);
+ if (map->md_attr & EFI_MD_ATTR_UC)
+ printf("UC ");
+ if (map->md_attr & EFI_MD_ATTR_WC)
+ printf("WC ");
+ if (map->md_attr & EFI_MD_ATTR_WT)
+ printf("WT ");
+ if (map->md_attr & EFI_MD_ATTR_WB)
+ printf("WB ");
+ if (map->md_attr & EFI_MD_ATTR_UCE)
+ printf("UCE ");
+ if (map->md_attr & EFI_MD_ATTR_WP)
+ printf("WP ");
+ if (map->md_attr & EFI_MD_ATTR_RP)
+ printf("RP ");
+ if (map->md_attr & EFI_MD_ATTR_XP)
+ printf("XP ");
+ if (map->md_attr & EFI_MD_ATTR_RT)
+ printf("RUNTIME");
+ }
+ return (0);
+}
+#endif
+
+#if defined(__amd64__) || defined(__i386__)
+static int
+S_bios_smap_xattr(size_t l2, void *p)
+{
+ struct bios_smap_xattr *smap, *end;
+
+ if (l2 % sizeof(*smap) != 0) {
+ warnx("S_bios_smap_xattr %zu is not a multiple of %zu", l2,
+ sizeof(*smap));
+ return (1);
+ }
+
+ end = (struct bios_smap_xattr *)((char *)p + l2);
+ for (smap = p; smap < end; smap++)
+ printf("\nSMAP type=%02x, xattr=%02x, base=%016jx, len=%016jx",
+ smap->type, smap->xattr, (uintmax_t)smap->base,
+ (uintmax_t)smap->length);
+ return (0);
+}
+#endif
+
+static int
+strIKtoi(const char *str, char **endptrp, const char *fmt)
+{
+ int kelv;
+ float temp;
+ size_t len;
+ const char *p;
+ int prec, i;
+
+ assert(errno == 0);
+
+ len = strlen(str);
+ /* caller already checked this */
+ assert(len > 0);
+
+ /*
+ * A format of "IK" is in deciKelvin. A format of "IK3" is in
+ * milliKelvin. The single digit following IK is log10 of the
+ * multiplying factor to convert Kelvin into the untis of this sysctl,
+ * or the dividing factor to convert the sysctl value to Kelvin. Numbers
+ * larger than 6 will run into precision issues with 32-bit integers.
+ * Characters that aren't ASCII digits after the 'K' are ignored. No
+ * localization is present because this is an interface from the kernel
+ * to this program (eg not an end-user interface), so isdigit() isn't
+ * used here.
+ */
+ if (fmt[2] != '\0' && fmt[2] >= '0' && fmt[2] <= '9')
+ prec = fmt[2] - '0';
+ else
+ prec = 1;
p = &str[len - 1];
- if (*p == 'C' || *p == 'F') {
- temp = strtof(str, &endptr);
- if (endptr == str || endptr != p)
- return (0);
- if (*p == 'F')
- temp = (temp - 32) * 5 / 9;
- kelv = temp * 10 + 2732;
+ if (*p == 'C' || *p == 'F' || *p == 'K') {
+ temp = strtof(str, endptrp);
+ if (*endptrp != str && *endptrp == p && errno == 0) {
+ if (*p == 'F')
+ temp = (temp - 32) * 5 / 9;
+ *endptrp = NULL;
+ if (*p != 'K')
+ temp += 273.15;
+ for (i = 0; i < prec; i++)
+ temp *= 10.0;
+ return ((int)(temp + 0.5));
+ }
} else {
- kelv = (int)strtol(str, &endptr, 10);
- if (endptr == str || *endptr != '\0')
- return (0);
+ /* No unit specified -> treat it as a raw number */
+ kelv = (int)strtol(str, endptrp, 10);
+ if (*endptrp != str && *endptrp == p && errno == 0) {
+ *endptrp = NULL;
+ return (kelv);
+ }
}
- *val = kelv;
- return (1);
+
+ errno = ERANGE;
+ return (0);
}
/*
@@ -511,7 +848,7 @@ set_IK(const char *str, int *val)
*/
static int
-name2oid(char *name, int *oidp)
+name2oid(const char *name, int *oidp)
{
int oid[2];
int i;
@@ -553,21 +890,6 @@ oidfmt(int *oid, int len, char *fmt, u_int *kind)
return (0);
}
-static int ctl_sign[CTLTYPE+1] = {
- [CTLTYPE_INT] = 1,
- [CTLTYPE_LONG] = 1,
- [CTLTYPE_S64] = 1,
-};
-
-static int ctl_size[CTLTYPE+1] = {
- [CTLTYPE_INT] = sizeof(int),
- [CTLTYPE_UINT] = sizeof(u_int),
- [CTLTYPE_LONG] = sizeof(long),
- [CTLTYPE_ULONG] = sizeof(u_long),
- [CTLTYPE_S64] = sizeof(int64_t),
- [CTLTYPE_U64] = sizeof(int64_t),
-};
-
/*
* This formats and outputs the value of one variable
*
@@ -579,8 +901,8 @@ static int
show_var(int *oid, int nlen)
{
u_char buf[BUFSIZ], *val, *oval, *p;
- char name[BUFSIZ], *fmt;
- const char *sep, *sep1;
+ char name[BUFSIZ], fmt[BUFSIZ];
+ const char *sep, *sep1, *prntype;
int qoid[CTL_MAXNAME+2];
uintmax_t umv;
intmax_t mv;
@@ -588,12 +910,15 @@ show_var(int *oid, int nlen)
size_t intlen;
size_t j, len;
u_int kind;
- int (*func)(int, void *);
+ float base;
+ int (*func)(size_t, void *);
+ int prec;
/* Silence GCC. */
umv = mv = intlen = 0;
bzero(buf, BUFSIZ);
+ bzero(fmt, BUFSIZ);
bzero(name, BUFSIZ);
qoid[0] = 0;
memcpy(qoid + 2, oid, nlen * sizeof(int));
@@ -604,6 +929,15 @@ show_var(int *oid, int nlen)
if (i || !j)
err(1, "sysctl name %d %zu %d", i, j, errno);
+ oidfmt(oid, nlen, fmt, &kind);
+ /* if Wflag then only list sysctls that are writeable and not stats. */
+ if (Wflag && ((kind & CTLFLAG_WR) == 0 || (kind & CTLFLAG_STATS) != 0))
+ return 1;
+
+ /* if Tflag then only list sysctls that are tuneables. */
+ if (Tflag && (kind & CTLFLAG_TUN) == 0)
+ return 1;
+
if (Nflag) {
printf("%s", name);
return (0);
@@ -614,19 +948,34 @@ show_var(int *oid, int nlen)
else
sep = ": ";
- if (dflag) { /* just print description */
+ ctltype = (kind & CTLTYPE);
+ if (tflag || dflag) {
+ if (!nflag)
+ printf("%s%s", name, sep);
+ if (ctl_typename[ctltype] != NULL)
+ prntype = ctl_typename[ctltype];
+ else
+ prntype = "unknown";
+ if (tflag && dflag)
+ printf("%s%s", prntype, sep);
+ else if (tflag) {
+ printf("%s", prntype);
+ return (0);
+ }
qoid[1] = 5;
j = sizeof(buf);
i = sysctl(qoid, nlen + 2, buf, &j, 0, 0);
- if (!nflag)
- printf("%s%s", name, sep);
printf("%s", buf);
return (0);
}
/* find an estimate of how much we need for this var */
- j = 0;
- i = sysctl(oid, nlen, 0, &j, 0, 0);
- j += j; /* we want to be sure :-) */
+ if (Bflag)
+ j = Bflag;
+ else {
+ j = 0;
+ i = sysctl(oid, nlen, 0, &j, 0, 0);
+ j += j; /* we want to be sure :-) */
+ }
val = oval = malloc(j + 1);
if (val == NULL) {
@@ -635,7 +984,7 @@ show_var(int *oid, int nlen)
}
len = j;
i = sysctl(oid, nlen, val, &len, 0, 0);
- if (i || !len) {
+ if (i != 0 || (len == 0 && ctltype != CTLTYPE_STRING)) {
free(oval);
return (1);
}
@@ -646,10 +995,7 @@ show_var(int *oid, int nlen)
return (0);
}
val[len] = '\0';
- fmt = buf;
- oidfmt(oid, nlen, fmt, &kind);
p = val;
- ctltype = (kind & CTLTYPE);
sign = ctl_sign[ctltype];
intlen = ctl_size[ctltype];
@@ -665,7 +1011,13 @@ show_var(int *oid, int nlen)
case CTLTYPE_UINT:
case CTLTYPE_LONG:
case CTLTYPE_ULONG:
+ case CTLTYPE_S8:
+ case CTLTYPE_S16:
+ case CTLTYPE_S32:
case CTLTYPE_S64:
+ case CTLTYPE_U8:
+ case CTLTYPE_U16:
+ case CTLTYPE_U32:
case CTLTYPE_U64:
if (!nflag)
printf("%s%s", name, sep);
@@ -683,6 +1035,21 @@ show_var(int *oid, int nlen)
umv = *(u_long *)p;
mv = *(long *)p;
break;
+ case CTLTYPE_S8:
+ case CTLTYPE_U8:
+ umv = *(uint8_t *)p;
+ mv = *(int8_t *)p;
+ break;
+ case CTLTYPE_S16:
+ case CTLTYPE_U16:
+ umv = *(uint16_t *)p;
+ mv = *(int16_t *)p;
+ break;
+ case CTLTYPE_S32:
+ case CTLTYPE_U32:
+ umv = *(uint32_t *)p;
+ mv = *(int32_t *)p;
+ break;
case CTLTYPE_S64:
case CTLTYPE_U64:
umv = *(uint64_t *)p;
@@ -697,8 +1064,19 @@ show_var(int *oid, int nlen)
else if (fmt[1] == 'K') {
if (mv < 0)
printf("%jd", mv);
- else
- printf("%.1fC", (mv - 2732.0) / 10);
+ else {
+ /*
+ * See strIKtoi for details on fmt.
+ */
+ prec = 1;
+ if (fmt[2] != '\0')
+ prec = fmt[2] - '0';
+ base = 1.0;
+ for (int i = 0; i < prec; i++)
+ base *= 10.0;
+ printf("%.*fC", prec,
+ (float)mv / base - 273.15);
+ }
} else
printf(hflag ? "%'jd" : "%jd", mv);
sep1 = " ";
@@ -711,13 +1089,25 @@ show_var(int *oid, int nlen)
case CTLTYPE_OPAQUE:
i = 0;
if (strcmp(fmt, "S,clockinfo") == 0)
+#ifndef __rtems__
func = S_clockinfo;
+#else /* __rtems__ */
+ func = NULL;
+#endif /* __rtems__ */
else if (strcmp(fmt, "S,timeval") == 0)
func = S_timeval;
else if (strcmp(fmt, "S,loadavg") == 0)
func = S_loadavg;
else if (strcmp(fmt, "S,vmtotal") == 0)
func = S_vmtotal;
+#ifdef __amd64__
+ else if (strcmp(fmt, "S,efi_map_header") == 0)
+ func = S_efi_map;
+#endif
+#if defined(__amd64__) || defined(__i386__)
+ else if (strcmp(fmt, "S,bios_smap_xattr") == 0)
+ func = S_bios_smap_xattr;
+#endif
else
func = NULL;
if (func) {
diff --git a/freebsd/sys/arm/include/machine/cpufunc.h b/freebsd/sys/arm/include/machine/cpufunc.h
index 8ec1574f..911fc14c 100644
--- a/freebsd/sys/arm/include/machine/cpufunc.h
+++ b/freebsd/sys/arm/include/machine/cpufunc.h
@@ -48,8 +48,8 @@
#ifndef __rtems__
#include <sys/types.h>
+#include <machine/armreg.h>
#include <machine/cpuconf.h>
-#include <machine/katelib.h> /* For in[bwl] and out[bwl] */
static __inline void
breakpoint(void)
@@ -60,33 +60,26 @@ breakpoint(void)
struct cpu_functions {
/* CPU functions */
-
- u_int (*cf_id) (void);
+
void (*cf_cpwait) (void);
/* MMU functions */
u_int (*cf_control) (u_int bic, u_int eor);
- void (*cf_domains) (u_int domains);
void (*cf_setttb) (u_int ttb);
- u_int (*cf_faultstatus) (void);
- u_int (*cf_faultaddress) (void);
/* TLB functions */
- void (*cf_tlb_flushID) (void);
- void (*cf_tlb_flushID_SE) (u_int va);
- void (*cf_tlb_flushI) (void);
- void (*cf_tlb_flushI_SE) (u_int va);
+ void (*cf_tlb_flushID) (void);
+ void (*cf_tlb_flushID_SE) (u_int va);
void (*cf_tlb_flushD) (void);
- void (*cf_tlb_flushD_SE) (u_int va);
+ void (*cf_tlb_flushD_SE) (u_int va);
/*
* Cache operations:
*
* We define the following primitives:
*
- * icache_sync_all Synchronize I-cache
* icache_sync_range Synchronize I-cache range
*
* dcache_wbinv_all Write-back and Invalidate D-cache
@@ -105,7 +98,13 @@ struct cpu_functions {
*
* There are some rules that must be followed:
*
- * I-cache Synch (all or range):
+ * ID-cache Invalidate All:
+ * Unlike other functions, this one must never write back.
+ * It is used to intialize the MMU when it is in an unknown
+ * state (such as when it may have lines tagged as valid
+ * that belong to a previous set of mappings).
+ *
+ * I-cache Sync range:
* The goal is to synchronize the instruction stream,
* so you may beed to write-back dirty D-cache blocks
* first. If a range is requested, and you can't
@@ -131,7 +130,6 @@ struct cpu_functions {
* Valid virtual addresses must be passed to each
* cache operation.
*/
- void (*cf_icache_sync_all) (void);
void (*cf_icache_sync_range) (vm_offset_t, vm_size_t);
void (*cf_dcache_wbinv_all) (void);
@@ -139,52 +137,44 @@ struct cpu_functions {
void (*cf_dcache_inv_range) (vm_offset_t, vm_size_t);
void (*cf_dcache_wb_range) (vm_offset_t, vm_size_t);
+ void (*cf_idcache_inv_all) (void);
void (*cf_idcache_wbinv_all) (void);
void (*cf_idcache_wbinv_range) (vm_offset_t, vm_size_t);
void (*cf_l2cache_wbinv_all) (void);
void (*cf_l2cache_wbinv_range) (vm_offset_t, vm_size_t);
void (*cf_l2cache_inv_range) (vm_offset_t, vm_size_t);
void (*cf_l2cache_wb_range) (vm_offset_t, vm_size_t);
+ void (*cf_l2cache_drain_writebuf) (void);
/* Other functions */
- void (*cf_flush_prefetchbuf) (void);
void (*cf_drain_writebuf) (void);
- void (*cf_flush_brnchtgt_C) (void);
- void (*cf_flush_brnchtgt_E) (u_int va);
void (*cf_sleep) (int mode);
/* Soft functions */
- int (*cf_dataabt_fixup) (void *arg);
- int (*cf_prefetchabt_fixup) (void *arg);
-
void (*cf_context_switch) (void);
- void (*cf_setup) (char *string);
+ void (*cf_setup) (void);
};
extern struct cpu_functions cpufuncs;
extern u_int cputype;
-#define cpu_id() cpufuncs.cf_id()
+#if __ARM_ARCH < 6
#define cpu_cpwait() cpufuncs.cf_cpwait()
+#endif
#define cpu_control(c, e) cpufuncs.cf_control(c, e)
-#define cpu_domains(d) cpufuncs.cf_domains(d)
+#if __ARM_ARCH < 6
#define cpu_setttb(t) cpufuncs.cf_setttb(t)
-#define cpu_faultstatus() cpufuncs.cf_faultstatus()
-#define cpu_faultaddress() cpufuncs.cf_faultaddress()
#define cpu_tlb_flushID() cpufuncs.cf_tlb_flushID()
#define cpu_tlb_flushID_SE(e) cpufuncs.cf_tlb_flushID_SE(e)
-#define cpu_tlb_flushI() cpufuncs.cf_tlb_flushI()
-#define cpu_tlb_flushI_SE(e) cpufuncs.cf_tlb_flushI_SE(e)
#define cpu_tlb_flushD() cpufuncs.cf_tlb_flushD()
#define cpu_tlb_flushD_SE(e) cpufuncs.cf_tlb_flushD_SE(e)
-#define cpu_icache_sync_all() cpufuncs.cf_icache_sync_all()
#define cpu_icache_sync_range(a, s) cpufuncs.cf_icache_sync_range((a), (s))
#define cpu_dcache_wbinv_all() cpufuncs.cf_dcache_wbinv_all()
@@ -192,110 +182,43 @@ extern u_int cputype;
#define cpu_dcache_inv_range(a, s) cpufuncs.cf_dcache_inv_range((a), (s))
#define cpu_dcache_wb_range(a, s) cpufuncs.cf_dcache_wb_range((a), (s))
+#define cpu_idcache_inv_all() cpufuncs.cf_idcache_inv_all()
#define cpu_idcache_wbinv_all() cpufuncs.cf_idcache_wbinv_all()
#define cpu_idcache_wbinv_range(a, s) cpufuncs.cf_idcache_wbinv_range((a), (s))
+#endif
#define cpu_l2cache_wbinv_all() cpufuncs.cf_l2cache_wbinv_all()
#define cpu_l2cache_wb_range(a, s) cpufuncs.cf_l2cache_wb_range((a), (s))
#define cpu_l2cache_inv_range(a, s) cpufuncs.cf_l2cache_inv_range((a), (s))
#define cpu_l2cache_wbinv_range(a, s) cpufuncs.cf_l2cache_wbinv_range((a), (s))
+#define cpu_l2cache_drain_writebuf() cpufuncs.cf_l2cache_drain_writebuf()
-#define cpu_flush_prefetchbuf() cpufuncs.cf_flush_prefetchbuf()
+#if __ARM_ARCH < 6
#define cpu_drain_writebuf() cpufuncs.cf_drain_writebuf()
-#define cpu_flush_brnchtgt_C() cpufuncs.cf_flush_brnchtgt_C()
-#define cpu_flush_brnchtgt_E(e) cpufuncs.cf_flush_brnchtgt_E(e)
-
+#endif
#define cpu_sleep(m) cpufuncs.cf_sleep(m)
-#define cpu_dataabt_fixup(a) cpufuncs.cf_dataabt_fixup(a)
-#define cpu_prefetchabt_fixup(a) cpufuncs.cf_prefetchabt_fixup(a)
-#define ABORT_FIXUP_OK 0 /* fixup succeeded */
-#define ABORT_FIXUP_FAILED 1 /* fixup failed */
-#define ABORT_FIXUP_RETURN 2 /* abort handler should return */
-
-#define cpu_setup(a) cpufuncs.cf_setup(a)
+#define cpu_setup() cpufuncs.cf_setup()
int set_cpufuncs (void);
#define ARCHITECTURE_NOT_PRESENT 1 /* known but not configured */
#define ARCHITECTURE_NOT_SUPPORTED 2 /* not known */
void cpufunc_nullop (void);
-int cpufunc_null_fixup (void *);
-int early_abort_fixup (void *);
-int late_abort_fixup (void *);
-u_int cpufunc_id (void);
+u_int cpu_ident (void);
u_int cpufunc_control (u_int clear, u_int bic);
-void cpufunc_domains (u_int domains);
-u_int cpufunc_faultstatus (void);
-u_int cpufunc_faultaddress (void);
-
-#ifdef CPU_ARM3
-u_int arm3_control (u_int clear, u_int bic);
-void arm3_cache_flush (void);
-#endif /* CPU_ARM3 */
-
-#if defined(CPU_ARM6) || defined(CPU_ARM7)
-void arm67_setttb (u_int ttb);
-void arm67_tlb_flush (void);
-void arm67_tlb_purge (u_int va);
-void arm67_cache_flush (void);
-void arm67_context_switch (void);
-#endif /* CPU_ARM6 || CPU_ARM7 */
-
-#ifdef CPU_ARM6
-void arm6_setup (char *string);
-#endif /* CPU_ARM6 */
-
-#ifdef CPU_ARM7
-void arm7_setup (char *string);
-#endif /* CPU_ARM7 */
-
-#ifdef CPU_ARM7TDMI
-int arm7_dataabt_fixup (void *arg);
-void arm7tdmi_setup (char *string);
-void arm7tdmi_setttb (u_int ttb);
-void arm7tdmi_tlb_flushID (void);
-void arm7tdmi_tlb_flushID_SE (u_int va);
-void arm7tdmi_cache_flushID (void);
-void arm7tdmi_context_switch (void);
-#endif /* CPU_ARM7TDMI */
-
-#ifdef CPU_ARM8
-void arm8_setttb (u_int ttb);
-void arm8_tlb_flushID (void);
-void arm8_tlb_flushID_SE (u_int va);
-void arm8_cache_flushID (void);
-void arm8_cache_flushID_E (u_int entry);
-void arm8_cache_cleanID (void);
-void arm8_cache_cleanID_E (u_int entry);
-void arm8_cache_purgeID (void);
-void arm8_cache_purgeID_E (u_int entry);
-
-void arm8_cache_syncI (void);
-void arm8_cache_cleanID_rng (vm_offset_t start, vm_size_t end);
-void arm8_cache_cleanD_rng (vm_offset_t start, vm_size_t end);
-void arm8_cache_purgeID_rng (vm_offset_t start, vm_size_t end);
-void arm8_cache_purgeD_rng (vm_offset_t start, vm_size_t end);
-void arm8_cache_syncI_rng (vm_offset_t start, vm_size_t end);
-
-void arm8_context_switch (void);
-
-void arm8_setup (char *string);
-
-u_int arm8_clock_config (u_int, u_int);
-#endif
-
-
-#if defined(CPU_FA526) || defined(CPU_FA626TE)
-void fa526_setup (char *arg);
+void cpu_domains (u_int domains);
+u_int cpu_faultstatus (void);
+u_int cpu_faultaddress (void);
+u_int cpu_get_control (void);
+u_int cpu_pfr (int);
+
+#if defined(CPU_FA526)
+void fa526_setup (void);
void fa526_setttb (u_int ttb);
void fa526_context_switch (void);
void fa526_cpu_sleep (int);
-void fa526_tlb_flushI_SE (u_int);
void fa526_tlb_flushID_SE (u_int);
-void fa526_flush_prefetchbuf (void);
-void fa526_flush_brnchtgt_E (u_int);
-void fa526_icache_sync_all (void);
void fa526_icache_sync_range(vm_offset_t start, vm_size_t end);
void fa526_dcache_wbinv_all (void);
void fa526_dcache_wbinv_range(vm_offset_t start, vm_size_t end);
@@ -306,54 +229,13 @@ void fa526_idcache_wbinv_range(vm_offset_t start, vm_size_t end);
#endif
-#ifdef CPU_SA110
-void sa110_setup (char *string);
-void sa110_context_switch (void);
-#endif /* CPU_SA110 */
-
-#if defined(CPU_SA1100) || defined(CPU_SA1110)
-void sa11x0_drain_readbuf (void);
-
-void sa11x0_context_switch (void);
-void sa11x0_cpu_sleep (int mode);
-
-void sa11x0_setup (char *string);
-#endif
-
-#if defined(CPU_SA110) || defined(CPU_SA1100) || defined(CPU_SA1110)
-void sa1_setttb (u_int ttb);
-
-void sa1_tlb_flushID_SE (u_int va);
-
-void sa1_cache_flushID (void);
-void sa1_cache_flushI (void);
-void sa1_cache_flushD (void);
-void sa1_cache_flushD_SE (u_int entry);
-
-void sa1_cache_cleanID (void);
-void sa1_cache_cleanD (void);
-void sa1_cache_cleanD_E (u_int entry);
-
-void sa1_cache_purgeID (void);
-void sa1_cache_purgeID_E (u_int entry);
-void sa1_cache_purgeD (void);
-void sa1_cache_purgeD_E (u_int entry);
-
-void sa1_cache_syncI (void);
-void sa1_cache_cleanID_rng (vm_offset_t start, vm_size_t end);
-void sa1_cache_cleanD_rng (vm_offset_t start, vm_size_t end);
-void sa1_cache_purgeID_rng (vm_offset_t start, vm_size_t end);
-void sa1_cache_purgeD_rng (vm_offset_t start, vm_size_t end);
-void sa1_cache_syncI_rng (vm_offset_t start, vm_size_t end);
-
-#endif
-
-#ifdef CPU_ARM9
+#if defined(CPU_ARM9) || defined(CPU_ARM9E)
void arm9_setttb (u_int);
-
void arm9_tlb_flushID_SE (u_int va);
+void arm9_context_switch (void);
+#endif
-void arm9_icache_sync_all (void);
+#if defined(CPU_ARM9)
void arm9_icache_sync_range (vm_offset_t, vm_size_t);
void arm9_dcache_wbinv_all (void);
@@ -364,9 +246,7 @@ void arm9_dcache_wb_range (vm_offset_t, vm_size_t);
void arm9_idcache_wbinv_all (void);
void arm9_idcache_wbinv_range (vm_offset_t, vm_size_t);
-void arm9_context_switch (void);
-
-void arm9_setup (char *string);
+void arm9_setup (void);
extern unsigned arm9_dcache_sets_max;
extern unsigned arm9_dcache_sets_inc;
@@ -374,31 +254,8 @@ extern unsigned arm9_dcache_index_max;
extern unsigned arm9_dcache_index_inc;
#endif
-#if defined(CPU_ARM9E) || defined(CPU_ARM10)
-void arm10_setttb (u_int);
-
-void arm10_tlb_flushID_SE (u_int);
-void arm10_tlb_flushI_SE (u_int);
-
-void arm10_icache_sync_all (void);
-void arm10_icache_sync_range (vm_offset_t, vm_size_t);
-
-void arm10_dcache_wbinv_all (void);
-void arm10_dcache_wbinv_range (vm_offset_t, vm_size_t);
-void arm10_dcache_inv_range (vm_offset_t, vm_size_t);
-void arm10_dcache_wb_range (vm_offset_t, vm_size_t);
-
-void arm10_idcache_wbinv_all (void);
-void arm10_idcache_wbinv_range (vm_offset_t, vm_size_t);
-
-void arm10_context_switch (void);
-
-void arm10_setup (char *string);
-
-extern unsigned arm10_dcache_sets_max;
-extern unsigned arm10_dcache_sets_inc;
-extern unsigned arm10_dcache_index_max;
-extern unsigned arm10_dcache_index_inc;
+#if defined(CPU_ARM9E)
+void arm10_setup (void);
u_int sheeva_control_ext (u_int, u_int);
void sheeva_cpu_sleep (int);
@@ -414,27 +271,65 @@ void sheeva_l2cache_wb_range (vm_offset_t, vm_size_t);
void sheeva_l2cache_wbinv_all (void);
#endif
-#ifdef CPU_ARM11
-void arm11_setttb (u_int);
-
-void arm11_tlb_flushID_SE (u_int);
-void arm11_tlb_flushI_SE (u_int);
-
-void arm11_context_switch (void);
+#if defined(CPU_MV_PJ4B)
+void armv6_idcache_wbinv_all (void);
+#endif
+#if defined(CPU_MV_PJ4B) || defined(CPU_CORTEXA) || defined(CPU_KRAIT)
+void armv7_setttb (u_int);
+void armv7_tlb_flushID (void);
+void armv7_tlb_flushID_SE (u_int);
+void armv7_icache_sync_range (vm_offset_t, vm_size_t);
+void armv7_idcache_wbinv_range (vm_offset_t, vm_size_t);
+void armv7_idcache_inv_all (void);
+void armv7_dcache_wbinv_all (void);
+void armv7_idcache_wbinv_all (void);
+void armv7_dcache_wbinv_range (vm_offset_t, vm_size_t);
+void armv7_dcache_inv_range (vm_offset_t, vm_size_t);
+void armv7_dcache_wb_range (vm_offset_t, vm_size_t);
+void armv7_cpu_sleep (int);
+void armv7_setup (void);
+void armv7_context_switch (void);
+void armv7_drain_writebuf (void);
+void armv7_sev (void);
+u_int armv7_auxctrl (u_int, u_int);
+
+void armadaxp_idcache_wbinv_all (void);
+
+void cortexa_setup (void);
+#endif
+#if defined(CPU_MV_PJ4B)
+void pj4b_config (void);
+void pj4bv7_setup (void);
+#endif
-void arm11_setup (char *string);
+#if defined(CPU_ARM1176)
void arm11_tlb_flushID (void);
-void arm11_tlb_flushI (void);
+void arm11_tlb_flushID_SE (u_int);
void arm11_tlb_flushD (void);
void arm11_tlb_flushD_SE (u_int va);
+void arm11_context_switch (void);
+
void arm11_drain_writebuf (void);
+
+void armv6_dcache_wbinv_range (vm_offset_t, vm_size_t);
+void armv6_dcache_inv_range (vm_offset_t, vm_size_t);
+void armv6_dcache_wb_range (vm_offset_t, vm_size_t);
+
+void armv6_idcache_inv_all (void);
+
+void arm11x6_setttb (u_int);
+void arm11x6_idcache_wbinv_all (void);
+void arm11x6_dcache_wbinv_all (void);
+void arm11x6_icache_sync_range (vm_offset_t, vm_size_t);
+void arm11x6_idcache_wbinv_range (vm_offset_t, vm_size_t);
+void arm11x6_setup (void);
+void arm11x6_sleep (int); /* no ref. for errata */
#endif
-#if defined(CPU_ARM9E) || defined (CPU_ARM10)
+#if defined(CPU_ARM9E)
void armv5_ec_setttb(u_int);
-void armv5_ec_icache_sync_all(void);
void armv5_ec_icache_sync_range(vm_offset_t, vm_size_t);
void armv5_ec_dcache_wbinv_all(void);
@@ -446,50 +341,21 @@ void armv5_ec_idcache_wbinv_all(void);
void armv5_ec_idcache_wbinv_range(vm_offset_t, vm_size_t);
#endif
-#if defined (CPU_ARM10) || defined (CPU_ARM11)
-void armv5_setttb(u_int);
-
-void armv5_icache_sync_all(void);
-void armv5_icache_sync_range(vm_offset_t, vm_size_t);
-
-void armv5_dcache_wbinv_all(void);
-void armv5_dcache_wbinv_range(vm_offset_t, vm_size_t);
-void armv5_dcache_inv_range(vm_offset_t, vm_size_t);
-void armv5_dcache_wb_range(vm_offset_t, vm_size_t);
-
-void armv5_idcache_wbinv_all(void);
-void armv5_idcache_wbinv_range(vm_offset_t, vm_size_t);
-
-extern unsigned armv5_dcache_sets_max;
-extern unsigned armv5_dcache_sets_inc;
-extern unsigned armv5_dcache_index_max;
-extern unsigned armv5_dcache_index_inc;
-#endif
-
-#if defined(CPU_ARM9) || defined(CPU_ARM9E) || defined(CPU_ARM10) || \
- defined(CPU_SA110) || defined(CPU_SA1100) || defined(CPU_SA1110) || \
- defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
- defined(CPU_FA526) || defined(CPU_FA626TE) || \
+#if defined(CPU_ARM9) || defined(CPU_ARM9E) || \
+ defined(CPU_FA526) || \
defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
- defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342)
-
+ defined(CPU_XSCALE_81342)
+
void armv4_tlb_flushID (void);
-void armv4_tlb_flushI (void);
void armv4_tlb_flushD (void);
void armv4_tlb_flushD_SE (u_int va);
void armv4_drain_writebuf (void);
+void armv4_idcache_inv_all (void);
#endif
-#if defined(CPU_IXP12X0)
-void ixp12x0_drain_readbuf (void);
-void ixp12x0_context_switch (void);
-void ixp12x0_setup (char *string);
-#endif
-
-#if defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
- defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
- defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342)
+#if defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
+ defined(CPU_XSCALE_81342)
void xscale_cpwait (void);
void xscale_cpu_sleep (int mode);
@@ -526,9 +392,8 @@ void xscale_cache_flushD_rng (vm_offset_t start, vm_size_t end);
void xscale_context_switch (void);
-void xscale_setup (char *string);
-#endif /* CPU_XSCALE_80200 || CPU_XSCALE_80321 || CPU_XSCALE_PXA2X0 || CPU_XSCALE_IXP425
- CPU_XSCALE_80219 */
+void xscale_setup (void);
+#endif /* CPU_XSCALE_PXA2X0 || CPU_XSCALE_IXP425 */
#ifdef CPU_XSCALE_81342
@@ -555,48 +420,66 @@ void xscalec3_context_switch (void);
#endif /* CPU_XSCALE_81342 */
-#define tlb_flush cpu_tlb_flushID
-#define setttb cpu_setttb
-#define drain_writebuf cpu_drain_writebuf
-
/*
* Macros for manipulating CPU interrupts
*/
-static __inline u_int32_t __set_cpsr_c(u_int bic, u_int eor) __attribute__((__unused__));
+#if __ARM_ARCH < 6
+#define __ARM_INTR_BITS (PSR_I | PSR_F)
+#else
+#define __ARM_INTR_BITS (PSR_I | PSR_F | PSR_A)
+#endif
-static __inline u_int32_t
-__set_cpsr_c(u_int bic, u_int eor)
+static __inline uint32_t
+__set_cpsr(uint32_t bic, uint32_t eor)
{
- u_int32_t tmp, ret;
+ uint32_t tmp, ret;
__asm __volatile(
- "mrs %0, cpsr\n" /* Get the CPSR */
- "bic %1, %0, %2\n" /* Clear bits */
- "eor %1, %1, %3\n" /* XOR bits */
- "msr cpsr_c, %1\n" /* Set the control field of CPSR */
+ "mrs %0, cpsr\n" /* Get the CPSR */
+ "bic %1, %0, %2\n" /* Clear bits */
+ "eor %1, %1, %3\n" /* XOR bits */
+ "msr cpsr_xc, %1\n" /* Set the CPSR */
: "=&r" (ret), "=&r" (tmp)
: "r" (bic), "r" (eor) : "memory");
return ret;
}
-#define disable_interrupts(mask) \
- (__set_cpsr_c((mask) & (I32_bit | F32_bit), \
- (mask) & (I32_bit | F32_bit)))
+static __inline uint32_t
+disable_interrupts(uint32_t mask)
+{
+
+ return (__set_cpsr(mask & __ARM_INTR_BITS, mask & __ARM_INTR_BITS));
+}
+
+static __inline uint32_t
+enable_interrupts(uint32_t mask)
+{
-#define enable_interrupts(mask) \
- (__set_cpsr_c((mask) & (I32_bit | F32_bit), 0))
+ return (__set_cpsr(mask & __ARM_INTR_BITS, 0));
+}
-#define restore_interrupts(old_cpsr) \
- (__set_cpsr_c((I32_bit | F32_bit), (old_cpsr) & (I32_bit | F32_bit)))
+static __inline uint32_t
+restore_interrupts(uint32_t old_cpsr)
+{
-#define intr_disable() \
- disable_interrupts(I32_bit | F32_bit)
-#define intr_restore(s) \
- restore_interrupts(s)
-/* Functions to manipulate the CPSR. */
-u_int SetCPSR(u_int bic, u_int eor);
-u_int GetCPSR(void);
+ return (__set_cpsr(__ARM_INTR_BITS, old_cpsr & __ARM_INTR_BITS));
+}
+
+static __inline register_t
+intr_disable(void)
+{
+
+ return (disable_interrupts(PSR_I | PSR_F));
+}
+
+static __inline void
+intr_restore(register_t s)
+{
+
+ restore_interrupts(s);
+}
+#undef __ARM_INTR_BITS
/*
* Functions to manipulate cpu r13
@@ -629,7 +512,7 @@ extern int arm_picache_ways;
extern int arm_pdcache_size; /* and unified */
extern int arm_pdcache_line_size;
-extern int arm_pdcache_ways;
+extern int arm_pdcache_ways;
extern int arm_pcache_type;
extern int arm_pcache_unified;
@@ -637,6 +520,10 @@ extern int arm_pcache_unified;
extern int arm_dcache_align;
extern int arm_dcache_align_mask;
+extern u_int arm_cache_level;
+extern u_int arm_cache_loc;
+extern u_int arm_cache_type[14];
+
#endif /* __rtems__ */
#endif /* _KERNEL */
#endif /* _MACHINE_CPUFUNC_H_ */
diff --git a/freebsd/sys/arm/include/machine/in_cksum.h b/freebsd/sys/arm/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/arm/include/machine/in_cksum.h
+++ b/freebsd/sys/arm/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/arm/xilinx/zy7_slcr.c b/freebsd/sys/arm/xilinx/zy7_slcr.c
index 79be30a7..7ce502f9 100644
--- a/freebsd/sys/arm/xilinx/zy7_slcr.c
+++ b/freebsd/sys/arm/xilinx/zy7_slcr.c
@@ -88,7 +88,6 @@ extern void (*zynq7_cpu_reset);
#define ZYNQ_DEFAULT_PS_CLK_FREQUENCY 33333333 /* 33.3 Mhz */
-
SYSCTL_NODE(_hw, OID_AUTO, zynq, CTLFLAG_RD, 0, "Xilinx Zynq-7000");
static char zynq_bootmode[64];
@@ -135,7 +134,6 @@ zy7_slcr_lock(struct zy7_slcr_softc *sc)
WR4(sc, ZY7_SLCR_LOCK, ZY7_SLCR_LOCK_MAGIC);
}
-
#ifndef __rtems__
static void
zy7_slcr_cpu_reset(void)
@@ -185,11 +183,12 @@ zy7_slcr_preload_pl(void)
ZSLCR_UNLOCK(sc);
}
+#endif /* __rtems__ */
/* After PL configuration, enable level shifters and deassert top-level
* PL resets. Called from zy7_devcfg.c. Optionally, the level shifters
* can be left disabled but that's rare of an FPGA application. That option
- * is controled by a sysctl in the devcfg driver.
+ * is controlled by a sysctl in the devcfg driver.
*/
void
zy7_slcr_postload_pl(int en_level_shifters)
@@ -216,7 +215,6 @@ zy7_slcr_postload_pl(int en_level_shifters)
ZSLCR_UNLOCK(sc);
}
-#endif /* __rtems__ */
/* Override cgem_set_refclk() in gigabit ethernet driver
* (sys/dev/cadence/if_cgem.c). This function is called to
@@ -266,6 +264,296 @@ cgem_set_ref_clk(int unit, int frequency)
return (0);
}
+/*
+ * PL clocks management function
+ */
+int
+zy7_pl_fclk_set_source(int unit, int source)
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+ uint32_t reg;
+
+ if (!sc)
+ return (-1);
+
+ ZSLCR_LOCK(sc);
+
+ /* Unlock SLCR registers. */
+ zy7_slcr_unlock(sc);
+
+ /* Modify FPGAx source. */
+ reg = RD4(sc, ZY7_SLCR_FPGA_CLK_CTRL(unit));
+ reg &= ~(ZY7_SLCR_FPGA_CLK_CTRL_SRCSEL_MASK);
+ reg |= (source << ZY7_SLCR_FPGA_CLK_CTRL_SRCSEL_SHIFT);
+ WR4(sc, ZY7_SLCR_FPGA_CLK_CTRL(unit), reg);
+
+ /* Lock SLCR registers. */
+ zy7_slcr_lock(sc);
+
+ ZSLCR_UNLOCK(sc);
+
+ return (0);
+}
+
+int
+zy7_pl_fclk_get_source(int unit)
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+ uint32_t reg;
+ int source;
+
+ if (!sc)
+ return (-1);
+
+ ZSLCR_LOCK(sc);
+
+ /* Modify GEM reference clock. */
+ reg = RD4(sc, ZY7_SLCR_FPGA_CLK_CTRL(unit));
+ source = (reg & ZY7_SLCR_FPGA_CLK_CTRL_SRCSEL_MASK) >>
+ ZY7_SLCR_FPGA_CLK_CTRL_SRCSEL_SHIFT;
+
+ /* ZY7_PL_FCLK_SRC_IO is actually b0x */
+ if ((source & 2) == 0)
+ source = ZY7_PL_FCLK_SRC_IO;
+
+ ZSLCR_UNLOCK(sc);
+
+ return (source);
+}
+
+int
+zy7_pl_fclk_set_freq(int unit, int frequency)
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+ int div0, div1;
+ int base_frequency;
+ uint32_t reg;
+ int source;
+
+ if (!sc)
+ return (-1);
+
+ source = zy7_pl_fclk_get_source(unit);
+ switch (source) {
+ case ZY7_PL_FCLK_SRC_IO:
+ base_frequency = io_pll_frequency;
+ break;
+
+ case ZY7_PL_FCLK_SRC_ARM:
+ base_frequency = arm_pll_frequency;
+ break;
+
+ case ZY7_PL_FCLK_SRC_DDR:
+ base_frequency = ddr_pll_frequency;
+ break;
+
+ default:
+ return (-1);
+ }
+
+ /* Find suitable divisor pairs. Round result to nearest khz
+ * to test for match.
+ */
+ for (div1 = 1; div1 <= ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR_MAX; div1++) {
+ div0 = (base_frequency + div1 * frequency / 2) /
+ div1 / frequency;
+ if (div0 > 0 && div0 <= ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR_MAX &&
+ ((base_frequency / div0 / div1) + 500) / 1000 ==
+ (frequency + 500) / 1000)
+ break;
+ }
+
+ if (div1 > ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR_MAX)
+ return (-1);
+
+ ZSLCR_LOCK(sc);
+
+ /* Unlock SLCR registers. */
+ zy7_slcr_unlock(sc);
+
+ /* Modify FPGAx reference clock. */
+ reg = RD4(sc, ZY7_SLCR_FPGA_CLK_CTRL(unit));
+ reg &= ~(ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR1_MASK |
+ ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR0_MASK);
+ reg |= (div1 << ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR1_SHIFT) |
+ (div0 << ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR0_SHIFT);
+ WR4(sc, ZY7_SLCR_FPGA_CLK_CTRL(unit), reg);
+
+ /* Lock SLCR registers. */
+ zy7_slcr_lock(sc);
+
+ ZSLCR_UNLOCK(sc);
+
+ return (base_frequency / div0 / div1);
+}
+
+int
+zy7_pl_fclk_get_freq(int unit)
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+ int div0, div1;
+ int base_frequency;
+ int frequency;
+ uint32_t reg;
+ int source;
+
+ if (!sc)
+ return (-1);
+
+ source = zy7_pl_fclk_get_source(unit);
+ switch (source) {
+ case ZY7_PL_FCLK_SRC_IO:
+ base_frequency = io_pll_frequency;
+ break;
+
+ case ZY7_PL_FCLK_SRC_ARM:
+ base_frequency = arm_pll_frequency;
+ break;
+
+ case ZY7_PL_FCLK_SRC_DDR:
+ base_frequency = ddr_pll_frequency;
+ break;
+
+ default:
+ return (-1);
+ }
+
+ ZSLCR_LOCK(sc);
+
+ /* Modify FPGAx reference clock. */
+ reg = RD4(sc, ZY7_SLCR_FPGA_CLK_CTRL(unit));
+ div1 = (reg & ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR1_MASK) >>
+ ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR1_SHIFT;
+ div0 = (reg & ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR0_MASK) >>
+ ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR0_SHIFT;
+
+ ZSLCR_UNLOCK(sc);
+
+ if (div0 == 0)
+ div0 = 1;
+
+ if (div1 == 0)
+ div1 = 1;
+
+ frequency = (base_frequency / div0 / div1);
+ /* Round to KHz */
+ frequency = (frequency + 500) / 1000;
+ frequency = frequency * 1000;
+
+ return (frequency);
+}
+
+int
+zy7_pl_fclk_enable(int unit)
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+
+ if (!sc)
+ return (-1);
+
+ ZSLCR_LOCK(sc);
+
+ /* Unlock SLCR registers. */
+ zy7_slcr_unlock(sc);
+
+ WR4(sc, ZY7_SLCR_FPGA_THR_CTRL(unit), 0);
+ WR4(sc, ZY7_SLCR_FPGA_THR_CNT(unit), 0);
+
+ /* Lock SLCR registers. */
+ zy7_slcr_lock(sc);
+
+ ZSLCR_UNLOCK(sc);
+
+ return (0);
+}
+
+int
+zy7_pl_fclk_disable(int unit)
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+
+ if (!sc)
+ return (-1);
+
+ ZSLCR_LOCK(sc);
+
+ /* Unlock SLCR registers. */
+ zy7_slcr_unlock(sc);
+
+ WR4(sc, ZY7_SLCR_FPGA_THR_CTRL(unit), 0);
+ WR4(sc, ZY7_SLCR_FPGA_THR_CNT(unit), 1);
+
+ /* Lock SLCR registers. */
+ zy7_slcr_lock(sc);
+
+ ZSLCR_UNLOCK(sc);
+
+ return (0);
+}
+
+int
+zy7_pl_fclk_enabled(int unit)
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+ uint32_t reg;
+
+ if (!sc)
+ return (-1);
+
+ ZSLCR_LOCK(sc);
+ reg = RD4(sc, ZY7_SLCR_FPGA_THR_CNT(unit));
+ ZSLCR_UNLOCK(sc);
+
+ return !(reg & 1);
+}
+
+int
+zy7_pl_level_shifters_enabled()
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+
+ uint32_t reg;
+
+ if (!sc)
+ return (-1);
+
+ ZSLCR_LOCK(sc);
+ reg = RD4(sc, ZY7_SLCR_LVL_SHFTR_EN);
+ ZSLCR_UNLOCK(sc);
+
+ return (reg == ZY7_SLCR_LVL_SHFTR_EN_ALL);
+}
+
+void
+zy7_pl_level_shifters_enable()
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+
+ if (!sc)
+ return;
+
+ ZSLCR_LOCK(sc);
+ zy7_slcr_unlock(sc);
+ WR4(sc, ZY7_SLCR_LVL_SHFTR_EN, ZY7_SLCR_LVL_SHFTR_EN_ALL);
+ zy7_slcr_lock(sc);
+ ZSLCR_UNLOCK(sc);
+}
+
+void
+zy7_pl_level_shifters_disable()
+{
+ struct zy7_slcr_softc *sc = zy7_slcr_softc_p;
+
+ if (!sc)
+ return;
+
+ ZSLCR_LOCK(sc);
+ zy7_slcr_unlock(sc);
+ WR4(sc, ZY7_SLCR_LVL_SHFTR_EN, 0);
+ zy7_slcr_lock(sc);
+ ZSLCR_UNLOCK(sc);
+}
+
static int
zy7_slcr_probe(device_t dev)
{
diff --git a/freebsd/sys/arm/xilinx/zy7_slcr.h b/freebsd/sys/arm/xilinx/zy7_slcr.h
index 70c46619..3afec02a 100644
--- a/freebsd/sys/arm/xilinx/zy7_slcr.h
+++ b/freebsd/sys/arm/xilinx/zy7_slcr.h
@@ -37,7 +37,6 @@
* are in appendix B.28.
*/
-
#ifndef _ZY7_SLCR_H_
#define _ZY7_SLCR_H_
@@ -148,10 +147,19 @@
#define ZY7_SLCR_DBG_CLK_CTRL 0x0164
#define ZY7_SLCR_PCAP_CLK_CTRL 0x0168
#define ZY7_SLCR_TOPSW_CLK_CTRL 0x016c /* central intercnn clk ctrl */
-#define ZY7_SLCR_FPGA0_CLK_CTRL 0x0170
-#define ZY7_SLCR_FPGA1_CLK_CTRL 0x0180
-#define ZY7_SLCR_FPGA2_CLK_CTRL 0x0190
-#define ZY7_SLCR_FPGA3_CLK_CTRL 0x01a0
+#define ZY7_SLCR_FPGA_CLK_CTRL(unit) (0x0170 + 0x10*(unit))
+#define ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR1_SHIFT 20
+#define ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR1_MASK (0x3f << 20)
+#define ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR0_SHIFT 8
+#define ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR0_MASK (0x3f << 8)
+#define ZY7_SLCR_FPGA_CLK_CTRL_DIVISOR_MAX 0x3f
+#define ZY7_SLCR_FPGA_CLK_CTRL_SRCSEL_SHIFT 4
+#define ZY7_SLCR_FPGA_CLK_CTRL_SRCSEL_MASK (3 << 4)
+#define ZY7_SLCR_FPGA_THR_CTRL(unit) (0x0174 + 0x10*(unit))
+#define ZY7_SLCR_FPGA_THR_CTRL_CNT_RST (1 << 1)
+#define ZY7_SLCR_FPGA_THR_CTRL_CPU_START (1 << 0)
+#define ZY7_SLCR_FPGA_THR_CNT(unit) (0x0178 + 0x10*(unit))
+#define ZY7_SLCR_FPGA_THR_STA(unit) (0x017c + 0x10*(unit))
#define ZY7_SLCR_CLK_621_TRUE 0x01c4 /* cpu clock ratio mode */
/* Reset controls. */
@@ -288,5 +296,23 @@
extern void zy7_slcr_preload_pl(void);
extern void zy7_slcr_postload_pl(int en_level_shifters);
extern int cgem_set_ref_clk(int unit, int frequency);
+
+/* Should be consistent with SRCSEL field of FPGAx_CLK_CTRL */
+#define ZY7_PL_FCLK_SRC_IO 0
+#define ZY7_PL_FCLK_SRC_IO_ALT 1 /* ZY7_PL_FCLK_SRC_IO is b0x */
+#define ZY7_PL_FCLK_SRC_ARM 2
+#define ZY7_PL_FCLK_SRC_DDR 3
+
+int zy7_pl_fclk_set_source(int unit, int source);
+int zy7_pl_fclk_get_source(int unit);
+int zy7_pl_fclk_set_freq(int unit, int freq);
+int zy7_pl_fclk_get_freq(int unit);
+int zy7_pl_fclk_enable(int unit);
+int zy7_pl_fclk_disable(int unit);
+int zy7_pl_fclk_enabled(int unit);
+int zy7_pl_level_shifters_enabled(void);
+void zy7_pl_level_shifters_enable(void);
+void zy7_pl_level_shifters_disable(void);
+
#endif
#endif /* _ZY7_SLCR_H_ */
diff --git a/freebsd/sys/avr/include/machine/in_cksum.h b/freebsd/sys/avr/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/avr/include/machine/in_cksum.h
+++ b/freebsd/sys/avr/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/bfin/include/machine/in_cksum.h b/freebsd/sys/bfin/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/bfin/include/machine/in_cksum.h
+++ b/freebsd/sys/bfin/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/bsm/audit.h b/freebsd/sys/bsm/audit.h
index d1f18a91..7efc93a1 100644
--- a/freebsd/sys/bsm/audit.h
+++ b/freebsd/sys/bsm/audit.h
@@ -26,7 +26,6 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit.h#10
* $FreeBSD$
*/
diff --git a/freebsd/sys/bsm/audit_kevents.h b/freebsd/sys/bsm/audit_kevents.h
index 3eb2e3ab..3c16c739 100644
--- a/freebsd/sys/bsm/audit_kevents.h
+++ b/freebsd/sys/bsm/audit_kevents.h
@@ -26,7 +26,6 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_kevents.h#7
* $FreeBSD$
*/
@@ -34,10 +33,10 @@
#define _BSM_AUDIT_KEVENTS_H_
/*
- * The reserved event numbers for kernel events are 1...2047 and 43001..44900.
+ * The reserved event numbers for kernel events are 1...2047 and 43001..44999.
*/
-#define AUE_IS_A_KEVENT(e) (((e) > 0 && (e) < 2048) || \
- ((e) > 43000 && (e) < 45000))
+#define AUE_IS_A_KEVENT(e) (((e) > 0 && (e) < 2048) || \
+ ((e) > 43000 && (e) < 45000))
/*
* Values marked as AUE_NULL are not required to be audited as per CAPP.
@@ -588,7 +587,8 @@
#define AUE_OPENAT 43184 /* FreeBSD. */
#define AUE_POSIX_OPENPT 43185 /* FreeBSD. */
#define AUE_CAP_NEW 43186 /* TrustedBSD. */
-#define AUE_CAP_GETRIGHTS 43187 /* TrustedBSD. */
+#define AUE_CAP_RIGHTS_GET 43187 /* TrustedBSD. */
+#define AUE_CAP_GETRIGHTS AUE_CAP_RIGHTS_GET
#define AUE_CAP_ENTER 43188 /* TrustedBSD. */
#define AUE_CAP_GETMODE 43189 /* TrustedBSD. */
#define AUE_POSIX_SPAWN 43190 /* Darwin. */
@@ -603,6 +603,14 @@
#define AUE_PDGETPID 43199 /* FreeBSD. */
#define AUE_PDWAIT 43200 /* FreeBSD. */
#define AUE_WAIT6 43201 /* FreeBSD. */
+#define AUE_CAP_RIGHTS_LIMIT 43202 /* TrustedBSD. */
+#define AUE_CAP_IOCTLS_LIMIT 43203 /* TrustedBSD. */
+#define AUE_CAP_IOCTLS_GET 43204 /* TrustedBSD. */
+#define AUE_CAP_FCNTLS_LIMIT 43205 /* TrustedBSD. */
+#define AUE_CAP_FCNTLS_GET 43206 /* TrustedBSD. */
+#define AUE_BINDAT 43207 /* TrustedBSD. */
+#define AUE_CONNECTAT 43208 /* TrustedBSD. */
+#define AUE_CHFLAGSAT 43209 /* FreeBSD-specific. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/freebsd/sys/cam/ata/ata_all.h b/freebsd/sys/cam/ata/ata_all.h
index 91e941c8..ea902d09 100644
--- a/freebsd/sys/cam/ata/ata_all.h
+++ b/freebsd/sys/cam/ata/ata_all.h
@@ -103,10 +103,11 @@ int ata_version(int ver);
char * ata_op_string(struct ata_cmd *cmd);
char * ata_cmd_string(struct ata_cmd *cmd, char *cmd_string, size_t len);
+void ata_cmd_sbuf(struct ata_cmd *cmd, struct sbuf *sb);
char * ata_res_string(struct ata_res *res, char *res_string, size_t len);
int ata_command_sbuf(struct ccb_ataio *ataio, struct sbuf *sb);
int ata_status_sbuf(struct ccb_ataio *ataio, struct sbuf *sb);
-int ata_res_sbuf(struct ccb_ataio *ataio, struct sbuf *sb);
+int ata_res_sbuf(struct ata_res *res, struct sbuf *sb);
void ata_print_ident(struct ata_params *ident_data);
void ata_print_ident_short(struct ata_params *ident_data);
@@ -124,6 +125,11 @@ void ata_ncq_cmd(struct ccb_ataio *ataio, uint8_t cmd,
void ata_reset_cmd(struct ccb_ataio *ataio);
void ata_pm_read_cmd(struct ccb_ataio *ataio, int reg, int port);
void ata_pm_write_cmd(struct ccb_ataio *ataio, int reg, int port, uint32_t val);
+void ata_read_log(struct ccb_ataio *ataio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint32_t log_address, uint32_t page_number,
+ uint16_t block_count, uint32_t protocol,
+ uint8_t *data_ptr, uint32_t dxfer_len, uint32_t timeout);
void ata_bswap(int8_t *buf, int len);
void ata_btrim(int8_t *buf, int len);
@@ -166,4 +172,16 @@ void semb_write_buffer(struct ccb_ataio *ataio,
uint8_t tag_action, uint8_t *data_ptr, uint16_t param_list_length,
uint32_t timeout);
+void ata_zac_mgmt_out(struct ccb_ataio *ataio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ int use_ncq __unused, uint8_t zm_action, uint64_t zone_id,
+ uint8_t zone_flags, uint16_t sector_count, uint8_t *data_ptr,
+ uint32_t dxfer_len, uint32_t timeout);
+
+void ata_zac_mgmt_in(struct ccb_ataio *ataio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ int use_ncq __unused, uint8_t zm_action, uint64_t zone_id,
+ uint8_t zone_flags, uint8_t *data_ptr, uint32_t dxfer_len,
+ uint32_t timeout);
+
#endif
diff --git a/freebsd/sys/cam/cam.c b/freebsd/sys/cam/cam.c
index 18628cd0..71523db2 100644
--- a/freebsd/sys/cam/cam.c
+++ b/freebsd/sys/cam/cam.c
@@ -107,9 +107,6 @@ const struct cam_status_entry cam_status_table[] = {
{ CAM_SCSI_BUSY, "SCSI Bus Busy" },
};
-const int num_cam_status_entries =
- sizeof(cam_status_table)/sizeof(*cam_status_table);
-
#ifdef _KERNEL
SYSCTL_NODE(_kern, OID_AUTO, cam, CTLFLAG_RD, 0, "CAM Subsystem");
@@ -118,7 +115,6 @@ SYSCTL_NODE(_kern, OID_AUTO, cam, CTLFLAG_RD, 0, "CAM Subsystem");
#endif
int cam_sort_io_queues = CAM_DEFAULT_SORT_IO_QUEUES;
-TUNABLE_INT("kern.cam.sort_io_queues", &cam_sort_io_queues);
SYSCTL_INT(_kern_cam, OID_AUTO, sort_io_queues, CTLFLAG_RWTUN,
&cam_sort_io_queues, 0, "Sort IO queues to try and optimise disk access patterns");
#endif
@@ -160,35 +156,125 @@ cam_strvis(u_int8_t *dst, const u_int8_t *src, int srclen, int dstlen)
*dst = '\0';
}
+void
+cam_strvis_sbuf(struct sbuf *sb, const u_int8_t *src, int srclen,
+ uint32_t flags)
+{
+
+ /* Trim leading/trailing spaces, nulls. */
+ while (srclen > 0 && src[0] == ' ')
+ src++, srclen--;
+ while (srclen > 0
+ && (src[srclen-1] == ' ' || src[srclen-1] == '\0'))
+ srclen--;
+
+ while (srclen > 0) {
+ if (*src < 0x20 || *src >= 0x80) {
+ /* SCSI-II Specifies that these should never occur. */
+ /* non-printable character */
+ switch (flags & CAM_STRVIS_FLAG_NONASCII_MASK) {
+ case CAM_STRVIS_FLAG_NONASCII_ESC:
+ sbuf_printf(sb, "\\%c%c%c",
+ ((*src & 0300) >> 6) + '0',
+ ((*src & 0070) >> 3) + '0',
+ ((*src & 0007) >> 0) + '0');
+ break;
+ case CAM_STRVIS_FLAG_NONASCII_RAW:
+ /*
+ * If we run into a NUL, just transform it
+ * into a space.
+ */
+ if (*src != 0x00)
+ sbuf_putc(sb, *src);
+ else
+ sbuf_putc(sb, ' ');
+ break;
+ case CAM_STRVIS_FLAG_NONASCII_SPC:
+ sbuf_putc(sb, ' ');
+ break;
+ case CAM_STRVIS_FLAG_NONASCII_TRIM:
+ default:
+ break;
+ }
+ } else {
+ /* normal character */
+ sbuf_putc(sb, *src);
+ }
+ src++;
+ srclen--;
+ }
+}
+
+
/*
* Compare string with pattern, returning 0 on match.
* Short pattern matches trailing blanks in name,
- * wildcard '*' in pattern matches rest of name,
- * wildcard '?' matches a single non-space character.
+ * Shell globbing rules apply: * matches 0 or more characters,
+ * ? matchces one character, [...] denotes a set to match one char,
+ * [^...] denotes a complimented set to match one character.
+ * Spaces in str used to match anything in the pattern string
+ * but was removed because it's a bug. No current patterns require
+ * it, as far as I know, but it's impossible to know what drives
+ * returned.
+ *
+ * Each '*' generates recursion, so keep the number of * in check.
*/
int
cam_strmatch(const u_int8_t *str, const u_int8_t *pattern, int str_len)
{
- while (*pattern != '\0'&& str_len > 0) {
-
+ while (*pattern != '\0' && str_len > 0) {
if (*pattern == '*') {
- return (0);
- }
- if ((*pattern != *str)
- && (*pattern != '?' || *str == ' ')) {
+ pattern++;
+ if (*pattern == '\0')
+ return (0);
+ do {
+ if (cam_strmatch(str, pattern, str_len) == 0)
+ return (0);
+ str++;
+ str_len--;
+ } while (str_len > 0);
return (1);
+ } else if (*pattern == '[') {
+ int negate_range, ok;
+ uint8_t pc, sc;
+
+ ok = 0;
+ sc = *str++;
+ str_len--;
+ if ((negate_range = (*pattern == '^')) != 0)
+ pattern++;
+ while (((pc = *pattern) != ']') && *pattern != '\0') {
+ pattern++;
+ if (*pattern == '-') {
+ if (pattern[1] == '\0') /* Bad pattern */
+ return (1);
+ if (sc >= pc && sc <= pattern[1])
+ ok = 1;
+ pattern += 2;
+ } else if (pc == sc)
+ ok = 1;
+ }
+ if (ok == negate_range)
+ return (1);
+ } else if (*pattern == '?') {
+ /* NB: || *str == ' ' of the old code is a bug and was removed */
+ /* if you add it back, keep this the last if before the naked else */
+ pattern++;
+ str++;
+ str_len--;
+ } else {
+ if (*str != *pattern)
+ return (1);
+ pattern++;
+ str++;
+ str_len--;
}
- pattern++;
- str++;
- str_len--;
}
while (str_len > 0 && *str == ' ') {
str++;
str_len--;
}
- if (str_len > 0 && *str == 0)
- str_len = 0;
return (str_len);
}
@@ -209,7 +295,7 @@ cam_fetch_status_entry(cam_status status)
{
status &= CAM_STATUS_MASK;
return (bsearch(&status, &cam_status_table,
- num_cam_status_entries,
+ nitems(cam_status_table),
sizeof(*cam_status_table),
camstatusentrycomp));
}
@@ -366,7 +452,8 @@ cam_error_string(struct cam_device *device, union ccb *ccb, char *str,
}
if (proto_flags & CAM_EAF_PRINT_RESULT) {
sbuf_cat(&sb, path_str);
- ata_res_sbuf(&ccb->ataio, &sb);
+ sbuf_printf(&sb, "RES: ");
+ ata_res_sbuf(&ccb->ataio.res, &sb);
sbuf_printf(&sb, "\n");
}
diff --git a/freebsd/sys/cam/cam.h b/freebsd/sys/cam/cam.h
index ba0e43c7..5eb0a776 100644
--- a/freebsd/sys/cam/cam.h
+++ b/freebsd/sys/cam/cam.h
@@ -43,12 +43,18 @@
typedef u_int path_id_t;
typedef u_int target_id_t;
-typedef u_int lun_id_t;
+typedef u_int64_t lun_id_t;
#define CAM_XPT_PATH_ID ((path_id_t)~0)
#define CAM_BUS_WILDCARD ((path_id_t)~0)
#define CAM_TARGET_WILDCARD ((target_id_t)~0)
-#define CAM_LUN_WILDCARD ((lun_id_t)~0)
+#define CAM_LUN_WILDCARD (~(u_int)0)
+
+#define CAM_EXTLUN_BYTE_SWIZZLE(lun) ( \
+ ((((u_int64_t)lun) & 0xffff000000000000L) >> 48) | \
+ ((((u_int64_t)lun) & 0x0000ffff00000000L) >> 16) | \
+ ((((u_int64_t)lun) & 0x00000000ffff0000L) << 16) | \
+ ((((u_int64_t)lun) & 0x000000000000ffffL) << 48))
/*
* Maximum length for a CAM CDB.
@@ -75,7 +81,7 @@ typedef enum {
CAM_RL_VALUES
} cam_rl;
/*
- * The generation number is incremented everytime a new entry is entered into
+ * The generation number is incremented every time a new entry is entered into
* the queue giving round robin per priority level scheduling.
*/
typedef struct {
@@ -116,7 +122,7 @@ typedef enum {
enum {
SF_RETRY_UA = 0x01, /* Retry UNIT ATTENTION conditions. */
SF_NO_PRINT = 0x02, /* Never print error status. */
- SF_QUIET_IR = 0x04, /* Be quiet about Illegal Request reponses */
+ SF_QUIET_IR = 0x04, /* Be quiet about Illegal Request responses */
SF_PRINT_ALWAYS = 0x08, /* Always print error status. */
SF_NO_RECOVERY = 0x10, /* Don't do active error recovery. */
SF_NO_RETRY = 0x20, /* Don't do any retries. */
@@ -125,69 +131,184 @@ enum {
/* CAM Status field values */
typedef enum {
- CAM_REQ_INPROG, /* CCB request is in progress */
- CAM_REQ_CMP, /* CCB request completed without error */
- CAM_REQ_ABORTED, /* CCB request aborted by the host */
- CAM_UA_ABORT, /* Unable to abort CCB request */
- CAM_REQ_CMP_ERR, /* CCB request completed with an error */
- CAM_BUSY, /* CAM subsystem is busy */
- CAM_REQ_INVALID, /* CCB request was invalid */
- CAM_PATH_INVALID, /* Supplied Path ID is invalid */
- CAM_DEV_NOT_THERE, /* SCSI Device Not Installed/there */
- CAM_UA_TERMIO, /* Unable to terminate I/O CCB request */
- CAM_SEL_TIMEOUT, /* Target Selection Timeout */
- CAM_CMD_TIMEOUT, /* Command timeout */
- CAM_SCSI_STATUS_ERROR, /* SCSI error, look at error code in CCB */
- CAM_MSG_REJECT_REC, /* Message Reject Received */
- CAM_SCSI_BUS_RESET, /* SCSI Bus Reset Sent/Received */
- CAM_UNCOR_PARITY, /* Uncorrectable parity error occurred */
- CAM_AUTOSENSE_FAIL = 0x10,/* Autosense: request sense cmd fail */
- CAM_NO_HBA, /* No HBA Detected error */
- CAM_DATA_RUN_ERR, /* Data Overrun error */
- CAM_UNEXP_BUSFREE, /* Unexpected Bus Free */
- CAM_SEQUENCE_FAIL, /* Target Bus Phase Sequence Failure */
- CAM_CCB_LEN_ERR, /* CCB length supplied is inadequate */
- CAM_PROVIDE_FAIL, /* Unable to provide requested capability */
- CAM_BDR_SENT, /* A SCSI BDR msg was sent to target */
- CAM_REQ_TERMIO, /* CCB request terminated by the host */
- CAM_UNREC_HBA_ERROR, /* Unrecoverable Host Bus Adapter Error */
- CAM_REQ_TOO_BIG, /* The request was too large for this host */
- CAM_REQUEUE_REQ, /*
- * This request should be requeued to preserve
- * transaction ordering. This typically occurs
- * when the SIM recognizes an error that should
- * freeze the queue and must place additional
- * requests for the target at the sim level
- * back into the XPT queue.
- */
- CAM_ATA_STATUS_ERROR, /* ATA error, look at error code in CCB */
- CAM_SCSI_IT_NEXUS_LOST, /* Initiator/Target Nexus lost. */
- CAM_SMP_STATUS_ERROR, /* SMP error, look at error code in CCB */
- CAM_IDE = 0x33, /* Initiator Detected Error */
- CAM_RESRC_UNAVAIL, /* Resource Unavailable */
- CAM_UNACKED_EVENT, /* Unacknowledged Event by Host */
- CAM_MESSAGE_RECV, /* Message Received in Host Target Mode */
- CAM_INVALID_CDB, /* Invalid CDB received in Host Target Mode */
- CAM_LUN_INVALID, /* Lun supplied is invalid */
- CAM_TID_INVALID, /* Target ID supplied is invalid */
- CAM_FUNC_NOTAVAIL, /* The requested function is not available */
- CAM_NO_NEXUS, /* Nexus is not established */
- CAM_IID_INVALID, /* The initiator ID is invalid */
- CAM_CDB_RECVD, /* The SCSI CDB has been received */
- CAM_LUN_ALRDY_ENA, /* The LUN is already enabled for target mode */
- CAM_SCSI_BUSY, /* SCSI Bus Busy */
-
- CAM_DEV_QFRZN = 0x40, /* The DEV queue is frozen w/this err */
-
- /* Autosense data valid for target */
- CAM_AUTOSNS_VALID = 0x80,
- CAM_RELEASE_SIMQ = 0x100,/* SIM ready to take more commands */
- CAM_SIM_QUEUED = 0x200,/* SIM has this command in it's queue */
-
- CAM_STATUS_MASK = 0x3F, /* Mask bits for just the status # */
-
- /* Target Specific Adjunct Status */
- CAM_SENT_SENSE = 0x40000000 /* sent sense with status */
+ /* CCB request is in progress */
+ CAM_REQ_INPROG = 0x00,
+
+ /* CCB request completed without error */
+ CAM_REQ_CMP = 0x01,
+
+ /* CCB request aborted by the host */
+ CAM_REQ_ABORTED = 0x02,
+
+ /* Unable to abort CCB request */
+ CAM_UA_ABORT = 0x03,
+
+ /* CCB request completed with an error */
+ CAM_REQ_CMP_ERR = 0x04,
+
+ /* CAM subsystem is busy */
+ CAM_BUSY = 0x05,
+
+ /* CCB request was invalid */
+ CAM_REQ_INVALID = 0x06,
+
+ /* Supplied Path ID is invalid */
+ CAM_PATH_INVALID = 0x07,
+
+ /* SCSI Device Not Installed/there */
+ CAM_DEV_NOT_THERE = 0x08,
+
+ /* Unable to terminate I/O CCB request */
+ CAM_UA_TERMIO = 0x09,
+
+ /* Target Selection Timeout */
+ CAM_SEL_TIMEOUT = 0x0a,
+
+ /* Command timeout */
+ CAM_CMD_TIMEOUT = 0x0b,
+
+ /* SCSI error, look at error code in CCB */
+ CAM_SCSI_STATUS_ERROR = 0x0c,
+
+ /* Message Reject Received */
+ CAM_MSG_REJECT_REC = 0x0d,
+
+ /* SCSI Bus Reset Sent/Received */
+ CAM_SCSI_BUS_RESET = 0x0e,
+
+ /* Uncorrectable parity error occurred */
+ CAM_UNCOR_PARITY = 0x0f,
+
+ /* Autosense: request sense cmd fail */
+ CAM_AUTOSENSE_FAIL = 0x10,
+
+ /* No HBA Detected error */
+ CAM_NO_HBA = 0x11,
+
+ /* Data Overrun error */
+ CAM_DATA_RUN_ERR = 0x12,
+
+ /* Unexpected Bus Free */
+ CAM_UNEXP_BUSFREE = 0x13,
+
+ /* Target Bus Phase Sequence Failure */
+ CAM_SEQUENCE_FAIL = 0x14,
+
+ /* CCB length supplied is inadequate */
+ CAM_CCB_LEN_ERR = 0x15,
+
+ /* Unable to provide requested capability*/
+ CAM_PROVIDE_FAIL = 0x16,
+
+ /* A SCSI BDR msg was sent to target */
+ CAM_BDR_SENT = 0x17,
+
+ /* CCB request terminated by the host */
+ CAM_REQ_TERMIO = 0x18,
+
+ /* Unrecoverable Host Bus Adapter Error */
+ CAM_UNREC_HBA_ERROR = 0x19,
+
+ /* Request was too large for this host */
+ CAM_REQ_TOO_BIG = 0x1a,
+
+ /*
+ * This request should be requeued to preserve
+ * transaction ordering. This typically occurs
+ * when the SIM recognizes an error that should
+ * freeze the queue and must place additional
+ * requests for the target at the sim level
+ * back into the XPT queue.
+ */
+ CAM_REQUEUE_REQ = 0x1b,
+
+ /* ATA error, look at error code in CCB */
+ CAM_ATA_STATUS_ERROR = 0x1c,
+
+ /* Initiator/Target Nexus lost. */
+ CAM_SCSI_IT_NEXUS_LOST = 0x1d,
+
+ /* SMP error, look at error code in CCB */
+ CAM_SMP_STATUS_ERROR = 0x1e,
+
+ /*
+ * Command completed without error but exceeded the soft
+ * timeout threshold.
+ */
+ CAM_REQ_SOFTTIMEOUT = 0x1f,
+
+ /*
+ * 0x20 - 0x32 are unassigned
+ */
+
+ /* Initiator Detected Error */
+ CAM_IDE = 0x33,
+
+ /* Resource Unavailable */
+ CAM_RESRC_UNAVAIL = 0x34,
+
+ /* Unacknowledged Event by Host */
+ CAM_UNACKED_EVENT = 0x35,
+
+ /* Message Received in Host Target Mode */
+ CAM_MESSAGE_RECV = 0x36,
+
+ /* Invalid CDB received in Host Target Mode */
+ CAM_INVALID_CDB = 0x37,
+
+ /* Lun supplied is invalid */
+ CAM_LUN_INVALID = 0x38,
+
+ /* Target ID supplied is invalid */
+ CAM_TID_INVALID = 0x39,
+
+ /* The requested function is not available */
+ CAM_FUNC_NOTAVAIL = 0x3a,
+
+ /* Nexus is not established */
+ CAM_NO_NEXUS = 0x3b,
+
+ /* The initiator ID is invalid */
+ CAM_IID_INVALID = 0x3c,
+
+ /* The SCSI CDB has been received */
+ CAM_CDB_RECVD = 0x3d,
+
+ /* The LUN is already enabled for target mode */
+ CAM_LUN_ALRDY_ENA = 0x3e,
+
+ /* SCSI Bus Busy */
+ CAM_SCSI_BUSY = 0x3f,
+
+
+ /*
+ * Flags
+ */
+
+ /* The DEV queue is frozen w/this err */
+ CAM_DEV_QFRZN = 0x40,
+
+ /* Autosense data valid for target */
+ CAM_AUTOSNS_VALID = 0x80,
+
+ /* SIM ready to take more commands */
+ CAM_RELEASE_SIMQ = 0x100,
+
+ /* SIM has this command in it's queue */
+ CAM_SIM_QUEUED = 0x200,
+
+ /* Quality of service data is valid */
+ CAM_QOS_VALID = 0x400,
+
+ /* Mask bits for just the status # */
+ CAM_STATUS_MASK = 0x3F,
+
+ /*
+ * Target Specific Adjunct Status
+ */
+
+ /* sent sense with status */
+ CAM_SENT_SENSE = 0x40000000
} cam_status;
typedef enum {
@@ -225,6 +346,15 @@ typedef enum {
CAM_EAF_PRINT_RESULT = 0x20
} cam_error_ata_flags;
+typedef enum {
+ CAM_STRVIS_FLAG_NONE = 0x00,
+ CAM_STRVIS_FLAG_NONASCII_MASK = 0x03,
+ CAM_STRVIS_FLAG_NONASCII_TRIM = 0x00,
+ CAM_STRVIS_FLAG_NONASCII_RAW = 0x01,
+ CAM_STRVIS_FLAG_NONASCII_SPC = 0x02,
+ CAM_STRVIS_FLAG_NONASCII_ESC = 0x03
+} cam_strvis_flags;
+
struct cam_status_entry
{
cam_status status_code;
@@ -237,6 +367,7 @@ extern const int num_cam_status_entries;
extern int cam_sort_io_queues;
#endif
union ccb;
+struct sbuf;
#ifdef SYSCTL_DECL /* from sysctl.h */
SYSCTL_DECL(_kern_cam);
@@ -249,6 +380,8 @@ caddr_t cam_quirkmatch(caddr_t target, caddr_t quirk_table, int num_entries,
int entry_size, cam_quirkmatch_t *comp_func);
void cam_strvis(u_int8_t *dst, const u_int8_t *src, int srclen, int dstlen);
+void cam_strvis_sbuf(struct sbuf *sb, const u_int8_t *src, int srclen,
+ uint32_t flags);
int cam_strmatch(const u_int8_t *str, const u_int8_t *pattern, int str_len);
const struct cam_status_entry*
diff --git a/freebsd/sys/cam/cam_ccb.h b/freebsd/sys/cam/cam_ccb.h
index 359064b6..e00b5bd3 100644
--- a/freebsd/sys/cam/cam_ccb.h
+++ b/freebsd/sys/cam/cam_ccb.h
@@ -41,7 +41,7 @@
#include <cam/cam_debug.h>
#include <cam/scsi/scsi_all.h>
#include <cam/ata/ata_all.h>
-
+#include <cam/nvme/nvme_all.h>
#ifdef __rtems__
#include <rtems/blkdev.h>
#endif /* __rtems__ */
@@ -67,7 +67,7 @@ typedef enum {
* Perform transport negotiation
* with this command.
*/
- CAM_DATA_ISPHYS = 0x00200000,/* Data type with physical addrs */
+ CAM_DATA_ISPHYS = 0x00000010,/* Data type with physical addrs */
CAM_DIS_AUTOSENSE = 0x00000020,/* Disable autosense feature */
CAM_DIR_BOTH = 0x00000000,/* Data direction (00:IN/OUT) */
CAM_DIR_IN = 0x00000040,/* Data direction (01:DATA IN) */
@@ -75,10 +75,10 @@ typedef enum {
CAM_DIR_NONE = 0x000000C0,/* Data direction (11:no data) */
CAM_DIR_MASK = 0x000000C0,/* Data direction Mask */
CAM_DATA_VADDR = 0x00000000,/* Data type (000:Virtual) */
- CAM_DATA_PADDR = 0x00200000,/* Data type (001:Physical) */
- CAM_DATA_SG = 0x00000010,/* Data type (010:sglist) */
- CAM_DATA_SG_PADDR = 0x00200010,/* Data type (011:sglist phys) */
- CAM_DATA_BIO = 0x00040000,/* Data type (100:bio) */
+ CAM_DATA_PADDR = 0x00000010,/* Data type (001:Physical) */
+ CAM_DATA_SG = 0x00040000,/* Data type (010:sglist) */
+ CAM_DATA_SG_PADDR = 0x00040010,/* Data type (011:sglist phys) */
+ CAM_DATA_BIO = 0x00200000,/* Data type (100:bio) */
CAM_DATA_MASK = 0x00240010,/* Data type mask */
CAM_SOFT_RST_OP = 0x00000100,/* Use Soft reset alternative */
CAM_ENG_SYNC = 0x00000200,/* Flush resid bytes on complete */
@@ -95,11 +95,6 @@ typedef enum {
CAM_CDB_PHYS = 0x00400000,/* CDB poiner is physical */
CAM_ENG_SGLIST = 0x00800000,/* SG list is for the HBA engine */
-/* Compatibility for FreeBSD 9.x*/
- CAM_SCATTER_VALID = 0x00000010,/* These exist for src compat for*/
- CAM_SG_LIST_PHYS = 0x00200010,/* old drivers. Hardly anything */
- CAM_DATA_PHYS = 0x00200000,/* uses them. */
-
/* Phase cognizant mode flags */
CAM_DIS_AUTOSRP = 0x01000000,/* Disable autosave/restore ptrs */
CAM_DIS_AUTODISC = 0x02000000,/* Disable auto disconnect */
@@ -113,9 +108,17 @@ typedef enum {
CAM_SEND_SENSE = 0x08000000,/* Send sense data with status */
CAM_TERM_IO = 0x10000000,/* Terminate I/O Message sup. */
CAM_DISCONNECT = 0x20000000,/* Disconnects are mandatory */
- CAM_SEND_STATUS = 0x40000000 /* Send status after data phase */
+ CAM_SEND_STATUS = 0x40000000,/* Send status after data phase */
+
+ CAM_UNLOCKED = 0x80000000 /* Call callback without lock. */
} ccb_flags;
+typedef enum {
+ CAM_USER_DATA_ADDR = 0x00000002,/* Userspace data pointers */
+ CAM_SG_FORMAT_IOVEC = 0x00000004,/* iovec instead of busdma S/G*/
+ CAM_UNMAPPED_BUF = 0x00000008 /* use unmapped I/O */
+} ccb_xflags;
+
/* XPT Opcodes for xpt_action */
typedef enum {
/* Function code flags are bits greater than 0xff */
@@ -156,6 +159,9 @@ typedef enum {
/* Device statistics (error counts, etc.) */
XPT_DEV_ADVINFO = 0x0e,
/* Get/Set Device advanced information */
+ XPT_ASYNC = 0x0f | XPT_FC_QUEUED | XPT_FC_USER_CCB
+ | XPT_FC_XPT_ONLY,
+ /* Asynchronous event */
/* SCSI Control Functions: 0x10->0x1F */
XPT_ABORT = 0x10,
/* Abort the specified CCB */
@@ -187,19 +193,27 @@ typedef enum {
XPT_ATA_IO = 0x18 | XPT_FC_DEV_QUEUED,
/* Execute the requested ATA I/O operation */
- XPT_GET_SIM_KNOB = 0x18,
- /*
- * Get SIM specific knob values.
- */
+ XPT_GET_SIM_KNOB_OLD = 0x18, /* Compat only */
XPT_SET_SIM_KNOB = 0x19,
/*
* Set SIM specific knob values.
*/
+ XPT_GET_SIM_KNOB = 0x1a,
+ /*
+ * Get SIM specific knob values.
+ */
+
XPT_SMP_IO = 0x1b | XPT_FC_DEV_QUEUED,
/* Serial Management Protocol */
+ XPT_NVME_IO = 0x1c | XPT_FC_DEV_QUEUED,
+ /* Execiute the requestred NVMe I/O operation */
+
+ XPT_MMCSD_IO = 0x1d | XPT_FC_DEV_QUEUED,
+ /* Placeholder for MMC / SD / SDIO I/O stuff */
+
XPT_SCAN_TGT = 0x1E | XPT_FC_QUEUED | XPT_FC_USER_CCB
| XPT_FC_XPT_ONLY,
/* Scan Target */
@@ -227,6 +241,8 @@ typedef enum {
/* Notify Host Target driver of event */
XPT_NOTIFY_ACKNOWLEDGE = 0x37 | XPT_FC_QUEUED | XPT_FC_USER_CCB,
/* Acknowledgement of event */
+ XPT_REPROBE_LUN = 0x38 | XPT_FC_QUEUED | XPT_FC_USER_CCB,
+ /* Query device capacity and notify GEOM */
/* Vendor Unique codes: 0x80->0x8F */
XPT_VUNIQUE = 0x80
@@ -253,6 +269,7 @@ typedef enum {
PROTO_ATAPI, /* AT Attachment Packetized Interface */
PROTO_SATAPM, /* SATA Port Multiplier */
PROTO_SEMB, /* SATA Enclosure Management Bridge */
+ PROTO_NVME, /* NVME */
} cam_proto;
typedef enum {
@@ -267,12 +284,15 @@ typedef enum {
XPORT_SAS, /* Serial Attached SCSI */
XPORT_SATA, /* Serial AT Attachment */
XPORT_ISCSI, /* iSCSI */
+ XPORT_SRP, /* SCSI RDMA Protocol */
+ XPORT_NVME, /* NVMe over PCIe */
} cam_xport;
+#define XPORT_IS_NVME(t) ((t) == XPORT_NVME)
#define XPORT_IS_ATA(t) ((t) == XPORT_ATA || (t) == XPORT_SATA)
#define XPORT_IS_SCSI(t) ((t) != XPORT_UNKNOWN && \
(t) != XPORT_UNSPECIFIED && \
- !XPORT_IS_ATA(t))
+ !XPORT_IS_ATA(t) && !XPORT_IS_NVME(t))
#define XPORT_DEVSTAT_TYPE(t) (XPORT_IS_ATA(t) ? DEVSTAT_TYPE_IF_IDE : \
XPORT_IS_SCSI(t) ? DEVSTAT_TYPE_IF_SCSI : \
DEVSTAT_TYPE_IF_OTHER)
@@ -305,6 +325,12 @@ typedef union {
u_int8_t bytes[CCB_SIM_PRIV_SIZE * sizeof(ccb_priv_entry)];
} ccb_spriv_area;
+typedef struct {
+ struct timeval *etime;
+ uintptr_t sim_data;
+ uintptr_t periph_data;
+} ccb_qos_area;
+
struct ccb_hdr {
#ifndef __rtems__
cam_pinfo pinfo; /* Info for priority scheduling */
@@ -326,18 +352,15 @@ struct ccb_hdr {
target_id_t target_id; /* Target device ID */
lun_id_t target_lun; /* Target LUN number */
u_int32_t flags; /* ccb_flags */
+ u_int32_t xflags; /* Extended flags */
#ifndef __rtems__
ccb_ppriv_area periph_priv;
ccb_spriv_area sim_priv;
+ ccb_qos_area qos;
#endif /* __rtems__ */
- u_int32_t timeout; /* Timeout value */
-
+ u_int32_t timeout; /* Hard timeout value in mseconds */
#ifndef __rtems__
- /*
- * Deprecated, only for use by non-MPSAFE SIMs. All others must
- * allocate and initialize their own callout storage.
- */
- struct callout_handle timeout_ch;
+ struct timeval softtimeout; /* Soft timeout value in sec + usec */
#endif /* __rtems__ */
};
@@ -350,6 +373,8 @@ struct ccb_getdev {
u_int8_t serial_num[252];
u_int8_t inq_flags;
u_int8_t serial_num_len;
+ const struct nvme_controller_data *nvme_cdata;
+ const struct nvme_namespace_data *nvme_data;
};
/* Device Statistics CCB */
@@ -357,8 +382,8 @@ struct ccb_getdevstats {
struct ccb_hdr ccb_h;
int dev_openings; /* Space left for more work on device*/
int dev_active; /* Transactions running on the device */
- int devq_openings; /* Space left for more queued work */
- int devq_queued; /* Transactions queued to be sent */
+ int allocated; /* CCBs allocated for the device */
+ int queued; /* CCBs queued to be sent to the device */
int held; /*
* CCBs held by peripheral drivers
* for this device
@@ -560,7 +585,7 @@ struct ccb_dev_match {
/*
* Definitions for the path inquiry CCB fields.
*/
-#define CAM_VERSION 0x17 /* Hex value for current version */
+#define CAM_VERSION 0x19 /* Hex value for current version */
typedef enum {
PI_MDP_ABLE = 0x80, /* Supports MDP message */
@@ -583,6 +608,8 @@ typedef enum {
} pi_tmflag;
typedef enum {
+ PIM_ATA_EXT = 0x200,/* ATA requests can understand ata_ext requests */
+ PIM_EXTLUNS = 0x100,/* 64bit extended LUNs supported */
PIM_SCANHILO = 0x80, /* Bus scans from high ID to low ID */
PIM_NOREMOVE = 0x40, /* Removeable devices not included in scan */
PIM_NOINITIATOR = 0x20, /* Initiator role not supported. */
@@ -608,14 +635,19 @@ struct ccb_pathinq_settings_fc {
struct ccb_pathinq_settings_sas {
u_int32_t bitrate; /* Mbps */
};
+
+struct ccb_pathinq_settings_nvme {
+ uint16_t nsid; /* Namespace ID for this path */
+};
+
#define PATHINQ_SETTINGS_SIZE 128
struct ccb_pathinq {
struct ccb_hdr ccb_h;
u_int8_t version_num; /* Version number for the SIM/HBA */
u_int8_t hba_inquiry; /* Mimic of INQ byte 7 for the HBA */
- u_int8_t target_sprt; /* Flags for target mode support */
- u_int8_t hba_misc; /* Misc HBA features */
+ u_int16_t target_sprt; /* Flags for target mode support */
+ u_int32_t hba_misc; /* Misc HBA features */
u_int16_t hba_eng_cnt; /* HBA engine count */
/* Vendor Unique capabilities */
u_int8_t vuhba_flags[VUHBALEN];
@@ -638,6 +670,7 @@ struct ccb_pathinq {
struct ccb_pathinq_settings_spi spi;
struct ccb_pathinq_settings_fc fc;
struct ccb_pathinq_settings_sas sas;
+ struct ccb_pathinq_settings_nvme nvme;
char ccb_pathinq_settings_opaque[PATHINQ_SETTINGS_SIZE];
} xport_specific;
u_int maxio; /* Max supported I/O size, in bytes. */
@@ -732,6 +765,13 @@ struct ccb_scsiio {
#endif /* __rtems__ */
};
+static __inline uint8_t *
+scsiio_cdb_ptr(struct ccb_scsiio *ccb)
+{
+ return ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
+ ccb->cdb_io.cdb_ptr : ccb->cdb_io.cdb_bytes);
+}
+
/*
* ATA I/O Request CCB used for the XPT_ATA_IO function code.
*/
@@ -743,15 +783,10 @@ struct ccb_ataio {
u_int8_t *data_ptr; /* Ptr to the data buf/SG list */
u_int32_t dxfer_len; /* Data transfer length */
u_int32_t resid; /* Transfer residual length: 2's comp */
- u_int8_t tag_action; /* What to do for tag queueing */
- /*
- * The tag action should be either the define below (to send a
- * non-tagged transaction) or one of the defined scsi tag messages
- * from scsi_message.h.
- */
-#define CAM_TAG_ACTION_NONE 0x00
- u_int tag_id; /* tag id from initator (target mode) */
- u_int init_id; /* initiator id of who selected */
+ u_int8_t ata_flags; /* Flags for the rest of the buffer */
+#define ATA_FLAG_AUX 0x1
+ uint32_t aux;
+ uint32_t unused;
};
struct ccb_accept_tio {
@@ -779,6 +814,19 @@ struct ccb_relsim {
};
/*
+ * NVMe I/O Request CCB used for the XPT_NVME_IO function code.
+ */
+struct ccb_nvmeio {
+ struct ccb_hdr ccb_h;
+ union ccb *next_ccb; /* Ptr for next CCB for action */
+ struct nvme_command cmd; /* NVME command, per NVME standard */
+ struct nvme_completion cpl; /* NVME completion, per NVME standard */
+ uint8_t *data_ptr; /* Ptr to the data buf/SG list */
+ uint32_t dxfer_len; /* Data transfer length */
+ uint32_t resid; /* Transfer residual length: 2's comp unused ?*/
+};
+
+/*
* Definitions for the asynchronous callback CCB fields.
*/
typedef enum {
@@ -959,6 +1007,18 @@ struct ccb_trans_settings_sata {
#define CTS_SATA_CAPS_D_APST 0x00020000
};
+struct ccb_trans_settings_nvme
+{
+ u_int valid; /* Which fields to honor */
+#define CTS_NVME_VALID_SPEC 0x01
+#define CTS_NVME_VALID_CAPS 0x02
+ u_int spec_major; /* Major version of spec supported */
+ u_int spec_minor; /* Minor verison of spec supported */
+ u_int spec_tiny; /* Tiny version of spec supported */
+ u_int max_xfer; /* Max transfer size (0 -> unlimited */
+ u_int caps;
+};
+
/* Get/Set transfer rate/width/disconnection/tag queueing settings */
struct ccb_trans_settings {
struct ccb_hdr ccb_h;
@@ -971,6 +1031,7 @@ struct ccb_trans_settings {
u_int valid; /* Which fields to honor */
struct ccb_trans_settings_ata ata;
struct ccb_trans_settings_scsi scsi;
+ struct ccb_trans_settings_nvme nvme;
} proto_specific;
union {
u_int valid; /* Which fields to honor */
@@ -979,6 +1040,7 @@ struct ccb_trans_settings {
struct ccb_trans_settings_sas sas;
struct ccb_trans_settings_pata ata;
struct ccb_trans_settings_sata sata;
+ struct ccb_trans_settings_nvme nvme;
} xport_specific;
};
@@ -1093,7 +1155,17 @@ struct ccb_notify_acknowledge {
u_int tag_id; /* Tag for immediate notify */
u_int seq_id; /* Tar for target of notify */
u_int initiator_id; /* Initiator Identifier */
- u_int arg; /* Function specific */
+ u_int arg; /* Response information */
+ /*
+ * Lower byte of arg is one of RESPONSE CODE values defined below
+ * (subset of response codes from SPL-4 and FCP-4 specifications),
+ * upper 3 bytes is code-specific ADDITIONAL RESPONSE INFORMATION.
+ */
+#define CAM_RSP_TMF_COMPLETE 0x00
+#define CAM_RSP_TMF_REJECTED 0x04
+#define CAM_RSP_TMF_FAILED 0x05
+#define CAM_RSP_TMF_SUCCEEDED 0x08
+#define CAM_RSP_TMF_INCORRECT_LUN 0x09
};
/* HBA engine structures. */
@@ -1159,6 +1231,7 @@ struct ccb_eng_exec { /* This structure must match SCSIIO size */
struct ccb_dev_advinfo {
struct ccb_hdr ccb_h;
uint32_t flags;
+#define CDAI_FLAG_NONE 0x0 /* No flags set */
#define CDAI_FLAG_STORE 0x1 /* If set, action becomes store */
uint32_t buftype; /* IN: Type of data being requested */
/* NB: buftype is interpreted on a per-transport basis */
@@ -1166,6 +1239,7 @@ struct ccb_dev_advinfo {
#define CDAI_TYPE_SERIAL_NUM 2
#define CDAI_TYPE_PHYS_PATH 3
#define CDAI_TYPE_RCAPLONG 4
+#define CDAI_TYPE_EXT_INQ 5
off_t bufsiz; /* IN: Size of external buffer */
#define CAM_SCSI_DEVID_MAXLEN 65536 /* length in buffer is an uint16_t */
off_t provsiz; /* OUT: Size required/used */
@@ -1173,6 +1247,16 @@ struct ccb_dev_advinfo {
};
/*
+ * CCB for sending async events
+ */
+struct ccb_async {
+ struct ccb_hdr ccb_h;
+ uint32_t async_code;
+ off_t async_arg_size;
+ void *async_arg_ptr;
+};
+
+/*
* Union of all CCB types for kernel space allocation. This union should
* never be used for manipulating CCBs - its only use is for the allocation
* and deallocation of raw CCB space and is the return type of xpt_ccb_alloc
@@ -1211,8 +1295,14 @@ union ccb {
struct ccb_debug cdbg;
struct ccb_ataio ataio;
struct ccb_dev_advinfo cdai;
+ struct ccb_async casync;
+ struct ccb_nvmeio nvmeio;
};
+#define CCB_CLEAR_ALL_EXCEPT_HDR(ccbp) \
+ bzero((char *)(ccbp) + sizeof((ccbp)->ccb_h), \
+ sizeof(*(ccbp)) - sizeof((ccbp)->ccb_h))
+
__BEGIN_DECLS
static __inline void
cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries,
@@ -1223,6 +1313,12 @@ cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries,
u_int32_t timeout);
static __inline void
+cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len,
+ u_int32_t timeout);
+
+static __inline void
cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int32_t flags, u_int tag_action, u_int tag_id,
@@ -1253,6 +1349,7 @@ cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries,
{
csio->ccb_h.func_code = XPT_SCSI_IO;
csio->ccb_h.flags = flags;
+ csio->ccb_h.xflags = 0;
csio->ccb_h.retry_count = retries;
csio->ccb_h.cbfcnp = cbfcnp;
csio->ccb_h.timeout = timeout;
@@ -1272,6 +1369,7 @@ cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries,
{
csio->ccb_h.func_code = XPT_CONT_TARGET_IO;
csio->ccb_h.flags = flags;
+ csio->ccb_h.xflags = 0;
csio->ccb_h.retry_count = retries;
csio->ccb_h.cbfcnp = cbfcnp;
csio->ccb_h.timeout = timeout;
@@ -1286,7 +1384,7 @@ cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries,
static __inline void
cam_fill_ataio(struct ccb_ataio *ataio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
- u_int32_t flags, u_int tag_action,
+ u_int32_t flags, u_int tag_action __unused,
u_int8_t *data_ptr, u_int32_t dxfer_len,
u_int32_t timeout)
{
@@ -1297,7 +1395,7 @@ cam_fill_ataio(struct ccb_ataio *ataio, u_int32_t retries,
ataio->ccb_h.timeout = timeout;
ataio->data_ptr = data_ptr;
ataio->dxfer_len = dxfer_len;
- ataio->tag_action = tag_action;
+ ataio->ata_flags = 0;
}
static __inline void
@@ -1341,6 +1439,20 @@ cam_ccb_status(union ccb *ccb)
void cam_calc_geometry(struct ccb_calc_geometry *ccg, int extended);
+static __inline void
+cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len,
+ u_int32_t timeout)
+{
+ nvmeio->ccb_h.func_code = XPT_NVME_IO;
+ nvmeio->ccb_h.flags = flags;
+ nvmeio->ccb_h.retry_count = retries;
+ nvmeio->ccb_h.cbfcnp = cbfcnp;
+ nvmeio->ccb_h.timeout = timeout;
+ nvmeio->data_ptr = data_ptr;
+ nvmeio->dxfer_len = dxfer_len;
+}
__END_DECLS
#endif /* _CAM_CAM_CCB_H */
diff --git a/freebsd/sys/cam/cam_periph.h b/freebsd/sys/cam/cam_periph.h
index a58ec947..e28d5b11 100644
--- a/freebsd/sys/cam/cam_periph.h
+++ b/freebsd/sys/cam/cam_periph.h
@@ -35,6 +35,7 @@
#include <cam/cam_sim.h>
#ifdef _KERNEL
+#include <sys/taskqueue.h>
#include <cam/cam_xpt.h>
@@ -90,7 +91,7 @@ typedef enum {
CAM_PERIPH_BIO
} cam_periph_type;
-/* Generically usefull offsets into the peripheral private area */
+/* Generically useful offsets into the peripheral private area */
#define ppriv_ptr0 periph_priv.entries[0].ptr
#define ppriv_ptr1 periph_priv.entries[1].ptr
#define ppriv_field0 periph_priv.entries[0].field
@@ -103,7 +104,6 @@ typedef cam_status periph_ctor_t (struct cam_periph *periph,
typedef void periph_oninv_t (struct cam_periph *periph);
typedef void periph_dtor_t (struct cam_periph *periph);
struct cam_periph {
- cam_pinfo pinfo;
periph_start_t *periph_start;
periph_oninv_t *periph_oninval;
periph_dtor_t *periph_dtor;
@@ -120,15 +120,20 @@ struct cam_periph {
#define CAM_PERIPH_INVALID 0x08
#define CAM_PERIPH_NEW_DEV_FOUND 0x10
#define CAM_PERIPH_RECOVERY_INPROG 0x20
+#define CAM_PERIPH_RUN_TASK 0x40
#define CAM_PERIPH_FREE 0x80
#define CAM_PERIPH_ANNOUNCED 0x100
- u_int32_t immediate_priority;
+ uint32_t scheduled_priority;
+ uint32_t immediate_priority;
+ int periph_allocating;
+ int periph_allocated;
u_int32_t refcount;
SLIST_HEAD(, ccb_hdr) ccb_list; /* For "immediate" requests */
SLIST_ENTRY(cam_periph) periph_links;
TAILQ_ENTRY(cam_periph) unit_links;
ac_callback_t *deferred_callback;
ac_code deferred_ac;
+ struct task periph_run_task;
};
#define CAM_PERIPH_MAXMAPS 2
@@ -147,6 +152,7 @@ cam_status cam_periph_alloc(periph_ctor_t *periph_ctor,
struct cam_periph *cam_periph_find(struct cam_path *path, char *name);
int cam_periph_list(struct cam_path *, struct sbuf *);
cam_status cam_periph_acquire(struct cam_periph *periph);
+void cam_periph_doacquire(struct cam_periph *periph);
void cam_periph_release(struct cam_periph *periph);
void cam_periph_release_locked(struct cam_periph *periph);
void cam_periph_release_locked_buses(struct cam_periph *periph);
@@ -154,7 +160,8 @@ int cam_periph_hold(struct cam_periph *periph, int priority);
void cam_periph_unhold(struct cam_periph *periph);
void cam_periph_invalidate(struct cam_periph *periph);
int cam_periph_mapmem(union ccb *ccb,
- struct cam_periph_map_info *mapinfo);
+ struct cam_periph_map_info *mapinfo,
+ u_int maxmap);
void cam_periph_unmapmem(union ccb *ccb,
struct cam_periph_map_info *mapinfo);
union ccb *cam_periph_getccb(struct cam_periph *periph,
@@ -185,30 +192,26 @@ void cam_periph_freeze_after_event(struct cam_periph *periph,
int cam_periph_error(union ccb *ccb, cam_flags camflags,
u_int32_t sense_flags, union ccb *save_ccb);
-static __inline void
-cam_periph_lock(struct cam_periph *periph)
+static __inline struct mtx *
+cam_periph_mtx(struct cam_periph *periph)
{
- mtx_lock(periph->sim->mtx);
+ return (xpt_path_mtx(periph->path));
}
-static __inline void
-cam_periph_unlock(struct cam_periph *periph)
-{
- mtx_unlock(periph->sim->mtx);
-}
+#define cam_periph_owned(periph) \
+ mtx_owned(xpt_path_mtx((periph)->path))
-static __inline int
-cam_periph_owned(struct cam_periph *periph)
-{
- return (mtx_owned(periph->sim->mtx));
-}
+#define cam_periph_lock(periph) \
+ mtx_lock(xpt_path_mtx((periph)->path))
-static __inline int
-cam_periph_sleep(struct cam_periph *periph, void *chan, int priority,
- const char *wmesg, int timo)
-{
- return (msleep(chan, periph->sim->mtx, priority, wmesg, timo));
-}
+#define cam_periph_unlock(periph) \
+ mtx_unlock(xpt_path_mtx((periph)->path))
+
+#define cam_periph_assert(periph, what) \
+ mtx_assert(xpt_path_mtx((periph)->path), (what))
+
+#define cam_periph_sleep(periph, chan, priority, wmesg, timo) \
+ xpt_path_sleep((periph)->path, (chan), (priority), (wmesg), (timo))
static inline struct cam_periph *
cam_periph_acquire_first(struct periph_driver *driver)
@@ -230,7 +233,7 @@ cam_periph_acquire_next(struct cam_periph *pperiph)
{
struct cam_periph *periph = pperiph;
- mtx_assert(pperiph->sim->mtx, MA_NOTOWNED);
+ cam_periph_assert(pperiph, MA_NOTOWNED);
xpt_lock_buses();
do {
periph = TAILQ_NEXT(periph, unit_links);
diff --git a/freebsd/sys/cam/cam_sim.h b/freebsd/sys/cam/cam_sim.h
index ba0ac18f..7309e97c 100644
--- a/freebsd/sys/cam/cam_sim.h
+++ b/freebsd/sys/cam/cam_sim.h
@@ -145,24 +145,10 @@ struct cam_sim {
u_int32_t flags;
#define CAM_SIM_REL_TIMEOUT_PENDING 0x01
#define CAM_SIM_MPSAFE 0x02
-#define CAM_SIM_ON_DONEQ 0x04
-#define CAM_SIM_POLLED 0x08
-#define CAM_SIM_BATCH 0x10
struct callout callout;
struct cam_devq *devq; /* Device Queue to use for this SIM */
int refcount; /* References to the SIM. */
-
- /* "Pool" of inactive ccbs managed by xpt_get_ccb and xpt_release_ccb */
- SLIST_HEAD(,ccb_hdr) ccb_freeq;
- /*
- * Maximum size of ccb pool. Modified as devices are added/removed
- * or have their * opening counts changed.
- */
- u_int max_ccbs;
- /* Current count of allocated ccbs */
- u_int ccb_count;
#endif /* __rtems__ */
-
};
#define CAM_SIM_LOCK(sim) mtx_lock((sim)->mtx)
diff --git a/freebsd/sys/cam/cam_xpt.h b/freebsd/sys/cam/cam_xpt.h
index 97933b98..ba5c924a 100644
--- a/freebsd/sys/cam/cam_xpt.h
+++ b/freebsd/sys/cam/cam_xpt.h
@@ -56,6 +56,7 @@ struct cam_path;
struct async_node {
SLIST_ENTRY(async_node) links;
u_int32_t event_enable; /* Async Event enables */
+ u_int32_t event_lock; /* Take SIM lock for handlers. */
void (*callback)(void *arg, u_int32_t code,
struct cam_path *path, void *args);
void *callback_arg;
@@ -69,6 +70,10 @@ void xpt_action_default(union ccb *new_ccb);
union ccb *xpt_alloc_ccb(void);
union ccb *xpt_alloc_ccb_nowait(void);
void xpt_free_ccb(union ccb *free_ccb);
+void xpt_setup_ccb_flags(struct ccb_hdr *ccb_h,
+ struct cam_path *path,
+ u_int32_t priority,
+ u_int32_t flags);
void xpt_setup_ccb(struct ccb_hdr *ccb_h,
struct cam_path *path,
u_int32_t priority);
@@ -100,7 +105,6 @@ int xpt_path_string(struct cam_path *path, char *str,
path_id_t xpt_path_path_id(struct cam_path *path);
target_id_t xpt_path_target_id(struct cam_path *path);
lun_id_t xpt_path_lun_id(struct cam_path *path);
-int xpt_path_legacy_ata_id(struct cam_path *path);
struct cam_sim *xpt_path_sim(struct cam_path *path);
struct cam_periph *xpt_path_periph(struct cam_path *path);
void xpt_async(u_int32_t async_code, struct cam_path *path,
@@ -110,6 +114,13 @@ void xpt_hold_boot(void);
void xpt_release_boot(void);
void xpt_lock_buses(void);
void xpt_unlock_buses(void);
+struct mtx * xpt_path_mtx(struct cam_path *path);
+#define xpt_path_lock(path) mtx_lock(xpt_path_mtx(path))
+#define xpt_path_unlock(path) mtx_unlock(xpt_path_mtx(path))
+#define xpt_path_assert(path, what) mtx_assert(xpt_path_mtx(path), (what))
+#define xpt_path_owned(path) mtx_owned(xpt_path_mtx(path))
+#define xpt_path_sleep(path, chan, priority, wmesg, timo) \
+ msleep((chan), xpt_path_mtx(path), (priority), (wmesg), (timo))
cam_status xpt_register_async(int event, ac_callback_t *cbfunc,
void *cbarg, struct cam_path *path);
cam_status xpt_compile_path(struct cam_path *new_path,
@@ -117,6 +128,10 @@ cam_status xpt_compile_path(struct cam_path *new_path,
path_id_t path_id,
target_id_t target_id,
lun_id_t lun_id);
+cam_status xpt_clone_path(struct cam_path **new_path,
+ struct cam_path *path);
+void xpt_copy_path(struct cam_path *new_path,
+ struct cam_path *path);
void xpt_release_path(struct cam_path *path);
diff --git a/freebsd/sys/cam/cam_xpt_sim.h b/freebsd/sys/cam/cam_xpt_sim.h
index 62ded090..c3575deb 100644
--- a/freebsd/sys/cam/cam_xpt_sim.h
+++ b/freebsd/sys/cam/cam_xpt_sim.h
@@ -49,10 +49,8 @@ u_int32_t xpt_freeze_devq(struct cam_path *path, u_int count);
#endif /* __rtems__ */
void xpt_release_devq(struct cam_path *path,
u_int count, int run_queue);
-int xpt_sim_opened(struct cam_sim *sim);
void xpt_done(union ccb *done_ccb);
-void xpt_batch_start(struct cam_sim *sim);
-void xpt_batch_done(struct cam_sim *sim);
+void xpt_done_direct(union ccb *done_ccb);
#endif
#endif /* _CAM_CAM_XPT_SIM_H */
diff --git a/freebsd/sys/cam/nvme/nvme_all.h b/freebsd/sys/cam/nvme/nvme_all.h
new file mode 100644
index 00000000..3cff74d3
--- /dev/null
+++ b/freebsd/sys/cam/nvme/nvme_all.h
@@ -0,0 +1,48 @@
+/*-
+ * Copyright (c) 2015 Netflix, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer,
+ * without modification, immediately at the beginning of the file.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef CAM_NVME_NVME_ALL_H
+#define CAM_NVME_NVME_ALL_H 1
+
+#include <dev/nvme/nvme.h>
+
+struct ccb_nvmeio;
+
+#define NVME_REV_1 1 /* Supports NVMe 1.2 or earlier */
+
+void nvme_ns_cmd(struct ccb_nvmeio *nvmeio, uint8_t cmd, uint32_t nsid,
+ uint32_t cdw10, uint32_t cdw11, uint32_t cdw12, uint32_t cdw13,
+ uint32_t cdw14, uint32_t cdw15);
+
+int nvme_identify_match(caddr_t identbuffer, caddr_t table_entry);
+
+void nvme_print_ident(const struct nvme_controller_data *, const struct nvme_namespace_data *);
+const char *nvme_op_string(const struct nvme_command *);
+const char *nvme_cmd_string(const struct nvme_command *, char *, size_t);
+
+#endif /* CAM_NVME_NVME_ALL_H */
diff --git a/freebsd/sys/cam/scsi/scsi_all.c b/freebsd/sys/cam/scsi/scsi_all.c
index 7bb0425d..0cb7118a 100644
--- a/freebsd/sys/cam/scsi/scsi_all.c
+++ b/freebsd/sys/cam/scsi/scsi_all.c
@@ -50,11 +50,13 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
+#include <sys/ctype.h>
#else
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
#endif
#include <cam/cam.h>
@@ -116,6 +118,7 @@ static void fetchtableentries(int sense_key, int asc, int ascq,
struct scsi_inquiry_data *,
const struct sense_key_table_entry **,
const struct asc_table_entry **);
+
#ifdef _KERNEL
static void init_scsi_delay(void);
static int sysctl_scsi_delay(SYSCTL_HANDLER_ARGS);
@@ -159,7 +162,7 @@ static struct scsi_op_quirk_entry scsi_op_quirk_table[] = {
* feel free to change this quirk entry.
*/
{T_CDROM, SIP_MEDIA_REMOVABLE, "PLEXTOR", "CD-ROM PX*", "*"},
- sizeof(plextor_cd_ops)/sizeof(struct op_table_entry),
+ nitems(plextor_cd_ops),
plextor_cd_ops
}
};
@@ -180,7 +183,7 @@ static struct op_table_entry scsi_op_codes[] = {
*
* SCSI Operation Codes
* Numeric Sorted Listing
- * as of 3/11/08
+ * as of 5/26/15
*
* D - DIRECT ACCESS DEVICE (SBC-2) device column key
* .T - SEQUENTIAL ACCESS DEVICE (SSC-2) -----------------
@@ -478,7 +481,8 @@ static struct op_table_entry scsi_op_codes[] = {
*/
/* 88 MM O O O READ(16) */
{ 0x88, D | T | W | O | B, "READ(16)" },
- /* 89 */
+ /* 89 O COMPARE AND WRITE*/
+ { 0x89, D, "COMPARE AND WRITE" },
/* 8A OM O O O WRITE(16) */
{ 0x8A, D | T | W | O | B, "WRITE(16)" },
/* 8B O ORWRITE */
@@ -505,20 +509,25 @@ static struct op_table_entry scsi_op_codes[] = {
{ 0x93, D, "WRITE SAME(16)" },
/* 93 M ERASE(16) */
{ 0x93, T, "ERASE(16)" },
- /* 94 [usage proposed by SCSI Socket Services project] */
- /* 95 [usage proposed by SCSI Socket Services project] */
- /* 96 [usage proposed by SCSI Socket Services project] */
- /* 97 [usage proposed by SCSI Socket Services project] */
+ /* 94 O ZBC OUT */
+ { 0x94, ALL, "ZBC OUT" },
+ /* 95 O ZBC IN */
+ { 0x95, ALL, "ZBC IN" },
+ /* 96 */
+ /* 97 */
/* 98 */
/* 99 */
- /* 9A */
- /* 9B */
- /* 9C */
- /* 9D */
+ /* 9A O WRITE STREAM(16) */
+ { 0x9A, D, "WRITE STREAM(16)" },
+ /* 9B OOOOOOOOOO OOO READ BUFFER(16) */
+ { 0x9B, ALL & ~(B) , "READ BUFFER(16)" },
+ /* 9C O WRITE ATOMIC(16) */
+ { 0x9C, D, "WRITE ATOMIC(16)" },
+ /* 9D SERVICE ACTION BIDIRECTIONAL */
+ { 0x9D, ALL, "SERVICE ACTION BIDIRECTIONAL" },
/* XXX KDM ALL for this? op-num.txt defines it for none.. */
/* 9E SERVICE ACTION IN(16) */
{ 0x9E, ALL, "SERVICE ACTION IN(16)" },
- /* XXX KDM ALL for this? op-num.txt defines it for ADC.. */
/* 9F M SERVICE ACTION OUT(16) */
{ 0x9F, ALL, "SERVICE ACTION OUT(16)" },
/* A0 MMOOO OMMM OMO REPORT LUNS */
@@ -643,8 +652,7 @@ scsi_op_desc(u_int16_t opcode, struct scsi_inquiry_data *inq_data)
match = cam_quirkmatch((caddr_t)inq_data,
(caddr_t)scsi_op_quirk_table,
- sizeof(scsi_op_quirk_table)/
- sizeof(*scsi_op_quirk_table),
+ nitems(scsi_op_quirk_table),
sizeof(*scsi_op_quirk_table),
scsi_inquiry_match);
}
@@ -653,7 +661,7 @@ scsi_op_desc(u_int16_t opcode, struct scsi_inquiry_data *inq_data)
table[0] = ((struct scsi_op_quirk_entry *)match)->op_table;
num_ops[0] = ((struct scsi_op_quirk_entry *)match)->num_ops;
table[1] = scsi_op_codes;
- num_ops[1] = sizeof(scsi_op_codes)/sizeof(scsi_op_codes[0]);
+ num_ops[1] = nitems(scsi_op_codes);
num_tables = 2;
} else {
/*
@@ -664,7 +672,7 @@ scsi_op_desc(u_int16_t opcode, struct scsi_inquiry_data *inq_data)
return("Vendor Specific Command");
table[0] = scsi_op_codes;
- num_ops[0] = sizeof(scsi_op_codes)/sizeof(scsi_op_codes[0]);
+ num_ops[0] = nitems(scsi_op_codes);
num_tables = 1;
}
@@ -672,6 +680,12 @@ scsi_op_desc(u_int16_t opcode, struct scsi_inquiry_data *inq_data)
if (pd_type == T_RBC)
pd_type = T_DIRECT;
+ /*
+ * Host managed drives are direct access for the most part.
+ */
+ if (pd_type == T_ZBC_HM)
+ pd_type = T_DIRECT;
+
/* Map NODEVICE to Direct Access Device to handle REPORT LUNS, etc. */
if (pd_type == T_NODEVICE)
pd_type = T_DIRECT;
@@ -735,9 +749,6 @@ const struct sense_key_table_entry sense_key_table[] =
{ SSD_KEY_COMPLETED, SS_NOP, "COMPLETED" }
};
-const int sense_key_table_size =
- sizeof(sense_key_table)/sizeof(sense_key_table[0]);
-
static struct asc_table_entry quantum_fireball_entries[] = {
{ SST(0x04, 0x0b, SS_START | SSQ_DECREMENT_COUNT | ENXIO,
"Logical unit not ready, initializing cmd. required") }
@@ -922,7 +933,7 @@ static struct scsi_sense_quirk_entry sense_quirk_table[] = {
*/
{T_DIRECT, SIP_MEDIA_FIXED, "QUANTUM", "FIREBALL S*", "*"},
/*num_sense_keys*/0,
- sizeof(quantum_fireball_entries)/sizeof(struct asc_table_entry),
+ nitems(quantum_fireball_entries),
/*sense key entries*/NULL,
quantum_fireball_entries
},
@@ -933,7 +944,7 @@ static struct scsi_sense_quirk_entry sense_quirk_table[] = {
*/
{T_DIRECT, SIP_MEDIA_REMOVABLE, "SONY", "SMO-*", "*"},
/*num_sense_keys*/0,
- sizeof(sony_mo_entries)/sizeof(struct asc_table_entry),
+ nitems(sony_mo_entries),
/*sense key entries*/NULL,
sony_mo_entries
},
@@ -943,7 +954,7 @@ static struct scsi_sense_quirk_entry sense_quirk_table[] = {
*/
{T_DIRECT, SIP_MEDIA_FIXED, "HGST", "*", "*"},
/*num_sense_keys*/0,
- sizeof(hgst_entries)/sizeof(struct asc_table_entry),
+ nitems(hgst_entries),
/*sense key entries*/NULL,
hgst_entries
},
@@ -953,14 +964,13 @@ static struct scsi_sense_quirk_entry sense_quirk_table[] = {
*/
{T_DIRECT, SIP_MEDIA_FIXED, "SEAGATE", "*", "*"},
/*num_sense_keys*/0,
- sizeof(seagate_entries)/sizeof(struct asc_table_entry),
+ nitems(seagate_entries),
/*sense key entries*/NULL,
seagate_entries
}
};
-const int sense_quirk_table_size =
- sizeof(sense_quirk_table)/sizeof(sense_quirk_table[0]);
+const u_int sense_quirk_table_size = nitems(sense_quirk_table);
static struct asc_table_entry asc_table[] = {
/*
@@ -972,7 +982,7 @@ static struct asc_table_entry asc_table[] = {
*
* SCSI ASC/ASCQ Assignments
* Numeric Sorted Listing
- * as of 5/20/12
+ * as of 8/12/15
*
* D - DIRECT ACCESS DEVICE (SBC-2) device column key
* .T - SEQUENTIAL ACCESS DEVICE (SSC) -------------------
@@ -1064,6 +1074,9 @@ static struct asc_table_entry asc_table[] = {
/* DT P B */
{ SST(0x00, 0x20, SS_RDEF, /* XXX TBD */
"Extended copy information available") },
+ /* D */
+ { SST(0x00, 0x21, SS_RDEF, /* XXX TBD */
+ "Atomic command aborted due to ACA") },
/* D W O BK */
{ SST(0x01, 0x00, SS_RDEF,
"No index/sector signal") },
@@ -1083,7 +1096,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x00, SS_RDEF,
"Logical unit not ready, cause not reportable") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x01, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | EBUSY,
+ { SST(0x04, 0x01, SS_WAIT | EBUSY,
"Logical unit is in process of becoming ready") },
/* DTLPWROMAEBKVF */
{ SST(0x04, 0x02, SS_START | SSQ_DECREMENT_COUNT | ENXIO,
@@ -1110,7 +1123,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x09, SS_RDEF, /* XXX TBD */
"Logical unit not ready, self-test in progress") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x0A, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | ENXIO,
+ { SST(0x04, 0x0A, SS_WAIT | ENXIO,
"Logical unit not accessible, asymmetric access state transition")},
/* DTLPWROMAEBKVF */
{ SST(0x04, 0x0B, SS_FATAL | ENXIO,
@@ -1121,11 +1134,14 @@ static struct asc_table_entry asc_table[] = {
/* F */
{ SST(0x04, 0x0D, SS_RDEF, /* XXX TBD */
"Logical unit not ready, structure check required") },
+ /* DTL WR MAEBKVF */
+ { SST(0x04, 0x0E, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, security session in progress") },
/* DT WROM B */
{ SST(0x04, 0x10, SS_RDEF, /* XXX TBD */
"Logical unit not ready, auxiliary memory not accessible") },
/* DT WRO AEB VF */
- { SST(0x04, 0x11, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | EBUSY,
+ { SST(0x04, 0x11, SS_WAIT | EBUSY,
"Logical unit not ready, notify (enable spinup) required") },
/* M V */
{ SST(0x04, 0x12, SS_RDEF, /* XXX TBD */
@@ -1160,6 +1176,24 @@ static struct asc_table_entry asc_table[] = {
/* DT MAEB */
{ SST(0x04, 0x1C, SS_RDEF, /* XXX TBD */
"Logical unit not ready, additional power use not yet granted") },
+ /* D */
+ { SST(0x04, 0x1D, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, configuration in progress") },
+ /* D */
+ { SST(0x04, 0x1E, SS_FATAL | ENXIO,
+ "Logical unit not ready, microcode activation required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x1F, SS_FATAL | ENXIO,
+ "Logical unit not ready, microcode download required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x20, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, logical unit reset required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x21, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, hard reset required") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x04, 0x22, SS_RDEF, /* XXX TBD */
+ "Logical unit not ready, power cycle required") },
/* DTL WROMAEBKVF */
{ SST(0x05, 0x00, SS_RDEF,
"Logical unit does not respond to selection") },
@@ -1199,6 +1233,9 @@ static struct asc_table_entry asc_table[] = {
/* DT WRO B */
{ SST(0x09, 0x04, SS_RDEF,
"Head select fault") },
+ /* DT RO B */
+ { SST(0x09, 0x05, SS_RDEF,
+ "Vibration induced tracking error") },
/* DTLPWROMAEBKVF */
{ SST(0x0A, 0x00, SS_FATAL | ENOSPC,
"Error log overflow") },
@@ -1232,6 +1269,30 @@ static struct asc_table_entry asc_table[] = {
/* D */
{ SST(0x0B, 0x09, SS_RDEF, /* XXX TBD */
"Warning - device statistics notification available") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0A, SS_RDEF, /* XXX TBD */
+ "Warning - High critical temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0B, SS_RDEF, /* XXX TBD */
+ "Warning - Low critical temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0C, SS_RDEF, /* XXX TBD */
+ "Warning - High operating temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0D, SS_RDEF, /* XXX TBD */
+ "Warning - Low operating temperature limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0E, SS_RDEF, /* XXX TBD */
+ "Warning - High citical humidity limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x0F, SS_RDEF, /* XXX TBD */
+ "Warning - Low citical humidity limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x10, SS_RDEF, /* XXX TBD */
+ "Warning - High operating humidity limit exceeded") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x0B, 0x11, SS_RDEF, /* XXX TBD */
+ "Warning - Low operating humidity limit exceeded") },
/* T R */
{ SST(0x0C, 0x00, SS_RDEF,
"Write error") },
@@ -1280,6 +1341,15 @@ static struct asc_table_entry asc_table[] = {
/* R */
{ SST(0x0C, 0x0F, SS_RDEF, /* XXX TBD */
"Defects in error window") },
+ /* D */
+ { SST(0x0C, 0x10, SS_RDEF, /* XXX TBD */
+ "Incomplete multiple atomic write operations") },
+ /* D */
+ { SST(0x0C, 0x11, SS_RDEF, /* XXX TBD */
+ "Write error - recovery scan needed") },
+ /* D */
+ { SST(0x0C, 0x12, SS_RDEF, /* XXX TBD */
+ "Write error - insufficient zone resources") },
/* DTLPWRO A K */
{ SST(0x0D, 0x00, SS_RDEF, /* XXX TBD */
"Error detected by third party temporary initiator") },
@@ -1391,6 +1461,9 @@ static struct asc_table_entry asc_table[] = {
/* D */
{ SST(0x11, 0x14, SS_RDEF, /* XXX TBD */
"Read error - LBA marked bad by application client") },
+ /* D */
+ { SST(0x11, 0x15, SS_RDEF, /* XXX TBD */
+ "Write after sanitize required") },
/* D W O BK */
{ SST(0x12, 0x00, SS_RDEF,
"Address mark not found for ID field") },
@@ -1593,40 +1666,52 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x21, 0x03, SS_RDEF, /* XXX TBD */
"Invalid write crossing layer jump") },
/* D */
+ { SST(0x21, 0x04, SS_RDEF, /* XXX TBD */
+ "Unaligned write command") },
+ /* D */
+ { SST(0x21, 0x05, SS_RDEF, /* XXX TBD */
+ "Write boundary violation") },
+ /* D */
+ { SST(0x21, 0x06, SS_RDEF, /* XXX TBD */
+ "Attempt to read invalid data") },
+ /* D */
+ { SST(0x21, 0x07, SS_RDEF, /* XXX TBD */
+ "Read boundary violation") },
+ /* D */
{ SST(0x22, 0x00, SS_FATAL | EINVAL,
"Illegal function (use 20 00, 24 00, or 26 00)") },
/* DT P B */
- { SST(0x23, 0x00, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x00, SS_FATAL | EINVAL,
"Invalid token operation, cause not reportable") },
/* DT P B */
- { SST(0x23, 0x01, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x01, SS_FATAL | EINVAL,
"Invalid token operation, unsupported token type") },
/* DT P B */
- { SST(0x23, 0x02, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x02, SS_FATAL | EINVAL,
"Invalid token operation, remote token usage not supported") },
/* DT P B */
- { SST(0x23, 0x03, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x03, SS_FATAL | EINVAL,
"Invalid token operation, remote ROD token creation not supported") },
/* DT P B */
- { SST(0x23, 0x04, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x04, SS_FATAL | EINVAL,
"Invalid token operation, token unknown") },
/* DT P B */
- { SST(0x23, 0x05, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x05, SS_FATAL | EINVAL,
"Invalid token operation, token corrupt") },
/* DT P B */
- { SST(0x23, 0x06, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x06, SS_FATAL | EINVAL,
"Invalid token operation, token revoked") },
/* DT P B */
- { SST(0x23, 0x07, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x07, SS_FATAL | EINVAL,
"Invalid token operation, token expired") },
/* DT P B */
- { SST(0x23, 0x08, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x08, SS_FATAL | EINVAL,
"Invalid token operation, token cancelled") },
/* DT P B */
- { SST(0x23, 0x09, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x09, SS_FATAL | EINVAL,
"Invalid token operation, token deleted") },
/* DT P B */
- { SST(0x23, 0x0A, SS_RDEF, /* XXX TBD */
+ { SST(0x23, 0x0A, SS_FATAL | EINVAL,
"Invalid token operation, invalid token length") },
/* DTLPWROMAEBKVF */
{ SST(0x24, 0x00, SS_FATAL | EINVAL,
@@ -1677,28 +1762,28 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x26, 0x05, SS_RDEF, /* XXX TBD */
"Data decryption error") },
/* DTLPWRO K */
- { SST(0x26, 0x06, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x06, SS_FATAL | EINVAL,
"Too many target descriptors") },
/* DTLPWRO K */
- { SST(0x26, 0x07, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x07, SS_FATAL | EINVAL,
"Unsupported target descriptor type code") },
/* DTLPWRO K */
- { SST(0x26, 0x08, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x08, SS_FATAL | EINVAL,
"Too many segment descriptors") },
/* DTLPWRO K */
- { SST(0x26, 0x09, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x09, SS_FATAL | EINVAL,
"Unsupported segment descriptor type code") },
/* DTLPWRO K */
- { SST(0x26, 0x0A, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x0A, SS_FATAL | EINVAL,
"Unexpected inexact segment") },
/* DTLPWRO K */
- { SST(0x26, 0x0B, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x0B, SS_FATAL | EINVAL,
"Inline data length exceeded") },
/* DTLPWRO K */
- { SST(0x26, 0x0C, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x0C, SS_FATAL | EINVAL,
"Invalid operation for copy source or destination") },
/* DTLPWRO K */
- { SST(0x26, 0x0D, SS_RDEF, /* XXX TBD */
+ { SST(0x26, 0x0D, SS_FATAL | EINVAL,
"Copy segment granularity violation") },
/* DT PWROMAEBK */
{ SST(0x26, 0x0E, SS_RDEF, /* XXX TBD */
@@ -1715,6 +1800,9 @@ static struct asc_table_entry asc_table[] = {
/* T */
{ SST(0x26, 0x12, SS_RDEF, /* XXX TBD */
"Vendor specific key reference not found") },
+ /* D */
+ { SST(0x26, 0x13, SS_RDEF, /* XXX TBD */
+ "Application tag mode page is invalid") },
/* DT WRO BK */
{ SST(0x27, 0x00, SS_FATAL | EACCES,
"Write protected") },
@@ -1737,8 +1825,11 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x27, 0x06, SS_RDEF, /* XXX TBD */
"Conditional write protect") },
/* D B */
- { SST(0x27, 0x07, SS_RDEF, /* XXX TBD */
+ { SST(0x27, 0x07, SS_FATAL | ENOSPC,
"Space allocation failed write protect") },
+ /* D */
+ { SST(0x27, 0x08, SS_FATAL | EACCES,
+ "Zone is read only") },
/* DTLPWROMAEBKVF */
{ SST(0x28, 0x00, SS_FATAL | ENXIO,
"Not ready to ready change, medium may have changed") },
@@ -1882,12 +1973,33 @@ static struct asc_table_entry asc_table[] = {
/* D */
{ SST(0x2C, 0x0C, SS_RDEF, /* XXX TBD */
"ORWRITE generation does not match") },
+ /* D */
+ { SST(0x2C, 0x0D, SS_RDEF, /* XXX TBD */
+ "Reset write pointer not allowed") },
+ /* D */
+ { SST(0x2C, 0x0E, SS_RDEF, /* XXX TBD */
+ "Zone is offline") },
+ /* D */
+ { SST(0x2C, 0x0F, SS_RDEF, /* XXX TBD */
+ "Stream not open") },
+ /* D */
+ { SST(0x2C, 0x10, SS_RDEF, /* XXX TBD */
+ "Unwritten data in zone") },
/* T */
{ SST(0x2D, 0x00, SS_RDEF,
"Overwrite error on update in place") },
/* R */
{ SST(0x2E, 0x00, SS_RDEF, /* XXX TBD */
"Insufficient time for operation") },
+ /* D */
+ { SST(0x2E, 0x01, SS_RDEF, /* XXX TBD */
+ "Command timeout before processing") },
+ /* D */
+ { SST(0x2E, 0x02, SS_RDEF, /* XXX TBD */
+ "Command timeout during processing") },
+ /* D */
+ { SST(0x2E, 0x03, SS_RDEF, /* XXX TBD */
+ "Command timeout during processing due to error recovery") },
/* DTLPWROMAEBKVF */
{ SST(0x2F, 0x00, SS_RDEF,
"Commands cleared by another initiator") },
@@ -1897,6 +2009,9 @@ static struct asc_table_entry asc_table[] = {
/* DTLPWROMAEBKVF */
{ SST(0x2F, 0x02, SS_RDEF, /* XXX TBD */
"Commands cleared by device server") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x2F, 0x03, SS_RDEF, /* XXX TBD */
+ "Some commands cleared by queuing layer event") },
/* DT WROM BK */
{ SST(0x30, 0x00, SS_RDEF,
"Incompatible medium installed") },
@@ -2194,6 +2309,15 @@ static struct asc_table_entry asc_table[] = {
/* DTLPWR MAEBK F */
{ SST(0x3F, 0x14, SS_RDEF, /* XXX TBD */
"iSCSI IP address changed") },
+ /* DTLPWR MAEBK */
+ { SST(0x3F, 0x15, SS_RDEF, /* XXX TBD */
+ "Inspect referrals sense descriptors") },
+ /* DTLPWROMAEBKVF */
+ { SST(0x3F, 0x16, SS_RDEF, /* XXX TBD */
+ "Microcode has been changed without reset") },
+ /* D */
+ { SST(0x3F, 0x17, SS_RDEF, /* XXX TBD */
+ "Zone transition to full") },
/* D */
{ SST(0x40, 0x00, SS_RDEF,
"RAM failure") }, /* deprecated - use 40 NN instead */
@@ -2303,6 +2427,30 @@ static struct asc_table_entry asc_table[] = {
/* DT PWROMAEBK F */
{ SST(0x4B, 0x0D, SS_RDEF, /* XXX TBD */
"Data-out buffer error") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x0E, SS_RDEF, /* XXX TBD */
+ "PCIe fabric error") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x0F, SS_RDEF, /* XXX TBD */
+ "PCIe completion timeout") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x10, SS_RDEF, /* XXX TBD */
+ "PCIe completer abort") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x11, SS_RDEF, /* XXX TBD */
+ "PCIe poisoned TLP received") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x12, SS_RDEF, /* XXX TBD */
+ "PCIe ECRC check failed") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x13, SS_RDEF, /* XXX TBD */
+ "PCIe unsupported request") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x14, SS_RDEF, /* XXX TBD */
+ "PCIe ACS violation") },
+ /* DT PWROMAEBK F */
+ { SST(0x4B, 0x15, SS_RDEF, /* XXX TBD */
+ "PCIe TLP prefix blocket") },
/* DTLPWROMAEBKVF */
{ SST(0x4C, 0x00, SS_RDEF,
"Logical unit failed self-configuration") },
@@ -2360,6 +2508,21 @@ static struct asc_table_entry asc_table[] = {
/* M */
{ SST(0x53, 0x08, SS_RDEF, /* XXX TBD */
"Element status unknown") },
+ /* M */
+ { SST(0x53, 0x09, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - load failed") },
+ /* M */
+ { SST(0x53, 0x0A, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - unload failed") },
+ /* M */
+ { SST(0x53, 0x0B, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - unload missing") },
+ /* M */
+ { SST(0x53, 0x0C, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - eject failed") },
+ /* M */
+ { SST(0x53, 0x0D, SS_RDEF, /* XXX TBD */
+ "Data transfer device error - library communication failed") },
/* P */
{ SST(0x54, 0x00, SS_RDEF,
"SCSI to host system interface failure") },
@@ -2405,6 +2568,15 @@ static struct asc_table_entry asc_table[] = {
/* DT P B */
{ SST(0x55, 0x0D, SS_RDEF, /* XXX TBD */
"Insufficient resources to create ROD token") },
+ /* D */
+ { SST(0x55, 0x0E, SS_RDEF, /* XXX TBD */
+ "Insufficient zone resources") },
+ /* D */
+ { SST(0x55, 0x0F, SS_RDEF, /* XXX TBD */
+ "Insufficient zone resources to complete write") },
+ /* D */
+ { SST(0x55, 0x10, SS_RDEF, /* XXX TBD */
+ "Maximum number of streams open") },
/* R */
{ SST(0x57, 0x00, SS_RDEF,
"Unable to recover table-of-contents") },
@@ -2825,6 +2997,9 @@ static struct asc_table_entry asc_table[] = {
/* A */
{ SST(0x68, 0x00, SS_RDEF,
"Logical unit not configured") },
+ /* D */
+ { SST(0x68, 0x01, SS_RDEF,
+ "Subsidiary logical unit not configured") },
/* A */
{ SST(0x69, 0x00, SS_RDEF,
"Data loss on logical unit") },
@@ -3031,7 +3206,7 @@ static struct asc_table_entry asc_table[] = {
"Security conflict in translated device") }
};
-const int asc_table_size = sizeof(asc_table)/sizeof(asc_table[0]);
+const u_int asc_table_size = nitems(asc_table);
struct asc_key
{
@@ -3124,14 +3299,14 @@ fetchtableentries(int sense_key, int asc, int ascq,
sense_tables[0] = quirk->sense_key_info;
sense_tables_size[0] = quirk->num_sense_keys;
sense_tables[1] = sense_key_table;
- sense_tables_size[1] = sense_key_table_size;
+ sense_tables_size[1] = nitems(sense_key_table);
num_sense_tables = 2;
} else {
asc_tables[0] = asc_table;
asc_tables_size[0] = asc_table_size;
num_asc_tables = 1;
sense_tables[0] = sense_key_table;
- sense_tables_size[0] = sense_key_table_size;
+ sense_tables_size[0] = nitems(sense_key_table);
num_sense_tables = 1;
}
@@ -3297,14 +3472,32 @@ scsi_error_action(struct ccb_scsiio *csio, struct scsi_inquiry_data *inq_data,
char *
scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string, size_t len)
{
+ struct sbuf sb;
+ int error;
+
+ if (len == 0)
+ return ("");
+
+ sbuf_new(&sb, cdb_string, len, SBUF_FIXEDLEN);
+
+ scsi_cdb_sbuf(cdb_ptr, &sb);
+
+ /* ENOMEM just means that the fixed buffer is full, OK to ignore */
+ error = sbuf_finish(&sb);
+ if (error != 0 && error != ENOMEM)
+ return ("");
+
+ return(sbuf_data(&sb));
+}
+
+void
+scsi_cdb_sbuf(u_int8_t *cdb_ptr, struct sbuf *sb)
+{
u_int8_t cdb_len;
int i;
if (cdb_ptr == NULL)
- return("");
-
- /* Silence warnings */
- cdb_len = 0;
+ return;
/*
* This is taken from the SCSI-3 draft spec.
@@ -3341,12 +3534,11 @@ scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string, size_t len)
cdb_len = 12;
break;
}
- *cdb_string = '\0';
+
for (i = 0; i < cdb_len; i++)
- snprintf(cdb_string + strlen(cdb_string),
- len - strlen(cdb_string), "%02hhx ", cdb_ptr[i]);
+ sbuf_printf(sb, "%02hhx ", cdb_ptr[i]);
- return(cdb_string);
+ return;
}
const char *
@@ -3395,7 +3587,6 @@ scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio,
#endif /* _KERNEL/!_KERNEL */
{
struct scsi_inquiry_data *inq_data;
- char cdb_str[(SCSI_MAX_CDBLEN * 3) + 1];
#ifdef _KERNEL
struct ccb_getdev *cgd;
#endif /* _KERNEL */
@@ -3428,15 +3619,13 @@ scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio,
#endif /* _KERNEL/!_KERNEL */
if ((csio->ccb_h.flags & CAM_CDB_POINTER) != 0) {
- sbuf_printf(sb, "%s. CDB: %s",
- scsi_op_desc(csio->cdb_io.cdb_ptr[0], inq_data),
- scsi_cdb_string(csio->cdb_io.cdb_ptr, cdb_str,
- sizeof(cdb_str)));
+ sbuf_printf(sb, "%s. CDB: ",
+ scsi_op_desc(csio->cdb_io.cdb_ptr[0], inq_data));
+ scsi_cdb_sbuf(csio->cdb_io.cdb_ptr, sb);
} else {
- sbuf_printf(sb, "%s. CDB: %s",
- scsi_op_desc(csio->cdb_io.cdb_bytes[0], inq_data),
- scsi_cdb_string(csio->cdb_io.cdb_bytes, cdb_str,
- sizeof(cdb_str)));
+ sbuf_printf(sb, "%s. CDB: ",
+ scsi_op_desc(csio->cdb_io.cdb_bytes[0], inq_data));
+ scsi_cdb_sbuf(csio->cdb_io.cdb_bytes, sb);
}
#ifdef _KERNEL
@@ -3468,7 +3657,7 @@ scsi_desc_iterate(struct scsi_sense_data_desc *sense, u_int sense_len,
/*
* The length of data actually returned may be different than the
- * extra_len recorded in the sturcture.
+ * extra_len recorded in the structure.
*/
desc_len = sense_len -offsetof(struct scsi_sense_data_desc, sense_desc);
@@ -3808,8 +3997,6 @@ scsi_set_sense_data_va(struct scsi_sense_data *sense_data,
*/
sense->extra_len = 10;
sense_len = (int)va_arg(ap, int);
- len_to_copy = MIN(sense_len, SSD_EXTRA_MAX -
- sense->extra_len);
data = (uint8_t *)va_arg(ap, uint8_t *);
switch (elem_type) {
@@ -3827,10 +4014,14 @@ scsi_set_sense_data_va(struct scsi_sense_data *sense_data,
uint8_t *data_dest;
int i;
- if (elem_type == SSD_ELEM_COMMAND)
+ if (elem_type == SSD_ELEM_COMMAND) {
data_dest = &sense->cmd_spec_info[0];
- else {
+ len_to_copy = MIN(sense_len,
+ sizeof(sense->cmd_spec_info));
+ } else {
data_dest = &sense->info[0];
+ len_to_copy = MIN(sense_len,
+ sizeof(sense->info));
/*
* We're setting the info field, so
* set the valid bit.
@@ -4083,6 +4274,7 @@ scsi_get_block_info(struct scsi_sense_data *sense_data, u_int sense_len,
switch (SID_TYPE(inq_data)) {
case T_DIRECT:
case T_RBC:
+ case T_ZBC_HM:
break;
default:
goto bailout;
@@ -4524,10 +4716,9 @@ scsi_sense_desc_sbuf(struct sbuf *sb, struct scsi_sense_data *sense,
struct scsi_inquiry_data *inq_data,
struct scsi_sense_desc_header *header)
{
- int i;
+ u_int i;
- for (i = 0; i < (sizeof(scsi_sense_printers) /
- sizeof(scsi_sense_printers[0])); i++) {
+ for (i = 0; i < nitems(scsi_sense_printers); i++) {
struct scsi_sense_desc_printer *printer;
printer = &scsi_sense_printers[i];
@@ -5236,6 +5427,9 @@ scsi_print_inquiry(struct scsi_inquiry_data *inq_data)
case T_ADC:
dtype = "Automation/Drive Interface";
break;
+ case T_ZBC_HM:
+ dtype = "Host Managed Zoned Block";
+ break;
case T_NODEVICE:
dtype = "Uninstalled";
break;
@@ -5304,8 +5498,8 @@ static struct {
u_int
scsi_calc_syncsrate(u_int period_factor)
{
- int i;
- int num_syncrates;
+ u_int i;
+ u_int num_syncrates;
/*
* It's a bug if period is zero, but if it is anyway, don't
@@ -5316,7 +5510,7 @@ scsi_calc_syncsrate(u_int period_factor)
return (3300);
}
- num_syncrates = sizeof(scsi_syncrates) / sizeof(scsi_syncrates[0]);
+ num_syncrates = nitems(scsi_syncrates);
/* See if the period is in the "exception" table */
for (i = 0; i < num_syncrates; i++) {
@@ -5334,21 +5528,21 @@ scsi_calc_syncsrate(u_int period_factor)
}
/*
- * Return the SCSI sync parameter that corresponsd to
+ * Return the SCSI sync parameter that corresponds to
* the passed in period in 10ths of ns.
*/
u_int
scsi_calc_syncparam(u_int period)
{
- int i;
- int num_syncrates;
+ u_int i;
+ u_int num_syncrates;
if (period == 0)
return (~0); /* Async */
/* Adjust for exception table being in 100ths. */
period *= 10;
- num_syncrates = sizeof(scsi_syncrates) / sizeof(scsi_syncrates[0]);
+ num_syncrates = nitems(scsi_syncrates);
/* See if the period is in the "exception" table */
for (i = 0; i < num_syncrates; i++) {
@@ -5450,33 +5644,1853 @@ scsi_devid_is_lun_name(uint8_t *bufp)
return 1;
}
+int
+scsi_devid_is_port_naa(uint8_t *bufp)
+{
+ struct scsi_vpd_id_descriptor *descr;
+
+ descr = (struct scsi_vpd_id_descriptor *)bufp;
+ if ((descr->id_type & SVPD_ID_ASSOC_MASK) != SVPD_ID_ASSOC_PORT)
+ return 0;
+ if ((descr->id_type & SVPD_ID_TYPE_MASK) != SVPD_ID_TYPE_NAA)
+ return 0;
+ return 1;
+}
+
struct scsi_vpd_id_descriptor *
-scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t page_len,
+scsi_get_devid_desc(struct scsi_vpd_id_descriptor *desc, uint32_t len,
scsi_devid_checkfn_t ck_fn)
{
- struct scsi_vpd_id_descriptor *desc;
- uint8_t *page_end;
uint8_t *desc_buf_end;
- page_end = (uint8_t *)id + page_len;
- if (page_end < id->desc_list)
- return (NULL);
-
- desc_buf_end = MIN(id->desc_list + scsi_2btoul(id->length), page_end);
+ desc_buf_end = (uint8_t *)desc + len;
- for (desc = (struct scsi_vpd_id_descriptor *)id->desc_list;
- desc->identifier <= desc_buf_end
- && desc->identifier + desc->length <= desc_buf_end;
- desc = (struct scsi_vpd_id_descriptor *)(desc->identifier
+ for (; desc->identifier <= desc_buf_end &&
+ desc->identifier + desc->length <= desc_buf_end;
+ desc = (struct scsi_vpd_id_descriptor *)(desc->identifier
+ desc->length)) {
if (ck_fn == NULL || ck_fn((uint8_t *)desc) != 0)
return (desc);
}
+ return (NULL);
+}
+
+struct scsi_vpd_id_descriptor *
+scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t page_len,
+ scsi_devid_checkfn_t ck_fn)
+{
+ uint32_t len;
+
+ if (page_len < sizeof(*id))
+ return (NULL);
+ len = MIN(scsi_2btoul(id->length), page_len - sizeof(*id));
+ return (scsi_get_devid_desc((struct scsi_vpd_id_descriptor *)
+ id->desc_list, len, ck_fn));
+}
+
+int
+scsi_transportid_sbuf(struct sbuf *sb, struct scsi_transportid_header *hdr,
+ uint32_t valid_len)
+{
+ switch (hdr->format_protocol & SCSI_TRN_PROTO_MASK) {
+ case SCSI_PROTO_FC: {
+ struct scsi_transportid_fcp *fcp;
+ uint64_t n_port_name;
+
+ fcp = (struct scsi_transportid_fcp *)hdr;
+
+ n_port_name = scsi_8btou64(fcp->n_port_name);
+
+ sbuf_printf(sb, "FCP address: 0x%.16jx",(uintmax_t)n_port_name);
+ break;
+ }
+ case SCSI_PROTO_SPI: {
+ struct scsi_transportid_spi *spi;
+
+ spi = (struct scsi_transportid_spi *)hdr;
+
+ sbuf_printf(sb, "SPI address: %u,%u",
+ scsi_2btoul(spi->scsi_addr),
+ scsi_2btoul(spi->rel_trgt_port_id));
+ break;
+ }
+ case SCSI_PROTO_SSA:
+ /*
+ * XXX KDM there is no transport ID defined in SPC-4 for
+ * SSA.
+ */
+ break;
+ case SCSI_PROTO_1394: {
+ struct scsi_transportid_1394 *sbp;
+ uint64_t eui64;
+
+ sbp = (struct scsi_transportid_1394 *)hdr;
+
+ eui64 = scsi_8btou64(sbp->eui64);
+ sbuf_printf(sb, "SBP address: 0x%.16jx", (uintmax_t)eui64);
+ break;
+ }
+ case SCSI_PROTO_RDMA: {
+ struct scsi_transportid_rdma *rdma;
+ unsigned int i;
+
+ rdma = (struct scsi_transportid_rdma *)hdr;
+
+ sbuf_printf(sb, "RDMA address: 0x");
+ for (i = 0; i < sizeof(rdma->initiator_port_id); i++)
+ sbuf_printf(sb, "%02x", rdma->initiator_port_id[i]);
+ break;
+ }
+ case SCSI_PROTO_ISCSI: {
+ uint32_t add_len, i;
+ uint8_t *iscsi_name = NULL;
+ int nul_found = 0;
+
+ sbuf_printf(sb, "iSCSI address: ");
+ if ((hdr->format_protocol & SCSI_TRN_FORMAT_MASK) ==
+ SCSI_TRN_ISCSI_FORMAT_DEVICE) {
+ struct scsi_transportid_iscsi_device *dev;
+
+ dev = (struct scsi_transportid_iscsi_device *)hdr;
+
+ /*
+ * Verify how much additional data we really have.
+ */
+ add_len = scsi_2btoul(dev->additional_length);
+ add_len = MIN(add_len, valid_len -
+ __offsetof(struct scsi_transportid_iscsi_device,
+ iscsi_name));
+ iscsi_name = &dev->iscsi_name[0];
+
+ } else if ((hdr->format_protocol & SCSI_TRN_FORMAT_MASK) ==
+ SCSI_TRN_ISCSI_FORMAT_PORT) {
+ struct scsi_transportid_iscsi_port *port;
+
+ port = (struct scsi_transportid_iscsi_port *)hdr;
+
+ add_len = scsi_2btoul(port->additional_length);
+ add_len = MIN(add_len, valid_len -
+ __offsetof(struct scsi_transportid_iscsi_port,
+ iscsi_name));
+ iscsi_name = &port->iscsi_name[0];
+ } else {
+ sbuf_printf(sb, "unknown format %x",
+ (hdr->format_protocol &
+ SCSI_TRN_FORMAT_MASK) >>
+ SCSI_TRN_FORMAT_SHIFT);
+ break;
+ }
+ if (add_len == 0) {
+ sbuf_printf(sb, "not enough data");
+ break;
+ }
+ /*
+ * This is supposed to be a NUL-terminated ASCII
+ * string, but you never know. So we're going to
+ * check. We need to do this because there is no
+ * sbuf equivalent of strncat().
+ */
+ for (i = 0; i < add_len; i++) {
+ if (iscsi_name[i] == '\0') {
+ nul_found = 1;
+ break;
+ }
+ }
+ /*
+ * If there is a NUL in the name, we can just use
+ * sbuf_cat(). Otherwise we need to use sbuf_bcat().
+ */
+ if (nul_found != 0)
+ sbuf_cat(sb, iscsi_name);
+ else
+ sbuf_bcat(sb, iscsi_name, add_len);
+ break;
+ }
+ case SCSI_PROTO_SAS: {
+ struct scsi_transportid_sas *sas;
+ uint64_t sas_addr;
+
+ sas = (struct scsi_transportid_sas *)hdr;
+
+ sas_addr = scsi_8btou64(sas->sas_address);
+ sbuf_printf(sb, "SAS address: 0x%.16jx", (uintmax_t)sas_addr);
+ break;
+ }
+ case SCSI_PROTO_ADITP:
+ case SCSI_PROTO_ATA:
+ case SCSI_PROTO_UAS:
+ /*
+ * No Transport ID format for ADI, ATA or USB is defined in
+ * SPC-4.
+ */
+ sbuf_printf(sb, "No known Transport ID format for protocol "
+ "%#x", hdr->format_protocol & SCSI_TRN_PROTO_MASK);
+ break;
+ case SCSI_PROTO_SOP: {
+ struct scsi_transportid_sop *sop;
+ struct scsi_sop_routing_id_norm *rid;
+
+ sop = (struct scsi_transportid_sop *)hdr;
+ rid = (struct scsi_sop_routing_id_norm *)sop->routing_id;
+
+ /*
+ * Note that there is no alternate format specified in SPC-4
+ * for the PCIe routing ID, so we don't really have a way
+ * to know whether the second byte of the routing ID is
+ * a device and function or just a function. So we just
+ * assume bus,device,function.
+ */
+ sbuf_printf(sb, "SOP Routing ID: %u,%u,%u",
+ rid->bus, rid->devfunc >> SCSI_TRN_SOP_DEV_SHIFT,
+ rid->devfunc & SCSI_TRN_SOP_FUNC_NORM_MAX);
+ break;
+ }
+ case SCSI_PROTO_NONE:
+ default:
+ sbuf_printf(sb, "Unknown protocol %#x",
+ hdr->format_protocol & SCSI_TRN_PROTO_MASK);
+ break;
+ }
+
+ return (0);
+}
+
+struct scsi_nv scsi_proto_map[] = {
+ { "fcp", SCSI_PROTO_FC },
+ { "spi", SCSI_PROTO_SPI },
+ { "ssa", SCSI_PROTO_SSA },
+ { "sbp", SCSI_PROTO_1394 },
+ { "1394", SCSI_PROTO_1394 },
+ { "srp", SCSI_PROTO_RDMA },
+ { "rdma", SCSI_PROTO_RDMA },
+ { "iscsi", SCSI_PROTO_ISCSI },
+ { "iqn", SCSI_PROTO_ISCSI },
+ { "sas", SCSI_PROTO_SAS },
+ { "aditp", SCSI_PROTO_ADITP },
+ { "ata", SCSI_PROTO_ATA },
+ { "uas", SCSI_PROTO_UAS },
+ { "usb", SCSI_PROTO_UAS },
+ { "sop", SCSI_PROTO_SOP }
+};
+
+const char *
+scsi_nv_to_str(struct scsi_nv *table, int num_table_entries, uint64_t value)
+{
+ int i;
+
+ for (i = 0; i < num_table_entries; i++) {
+ if (table[i].value == value)
+ return (table[i].name);
+ }
return (NULL);
}
+/*
+ * Given a name/value table, find a value matching the given name.
+ * Return values:
+ * SCSI_NV_FOUND - match found
+ * SCSI_NV_AMBIGUOUS - more than one match, none of them exact
+ * SCSI_NV_NOT_FOUND - no match found
+ */
+scsi_nv_status
+scsi_get_nv(struct scsi_nv *table, int num_table_entries,
+ char *name, int *table_entry, scsi_nv_flags flags)
+{
+ int i, num_matches = 0;
+
+ for (i = 0; i < num_table_entries; i++) {
+ size_t table_len, name_len;
+
+ table_len = strlen(table[i].name);
+ name_len = strlen(name);
+
+ if ((((flags & SCSI_NV_FLAG_IG_CASE) != 0)
+ && (strncasecmp(table[i].name, name, name_len) == 0))
+ || (((flags & SCSI_NV_FLAG_IG_CASE) == 0)
+ && (strncmp(table[i].name, name, name_len) == 0))) {
+ *table_entry = i;
+
+ /*
+ * Check for an exact match. If we have the same
+ * number of characters in the table as the argument,
+ * and we already know they're the same, we have
+ * an exact match.
+ */
+ if (table_len == name_len)
+ return (SCSI_NV_FOUND);
+
+ /*
+ * Otherwise, bump up the number of matches. We'll
+ * see later how many we have.
+ */
+ num_matches++;
+ }
+ }
+
+ if (num_matches > 1)
+ return (SCSI_NV_AMBIGUOUS);
+ else if (num_matches == 1)
+ return (SCSI_NV_FOUND);
+ else
+ return (SCSI_NV_NOT_FOUND);
+}
+
+/*
+ * Parse transport IDs for Fibre Channel, 1394 and SAS. Since these are
+ * all 64-bit numbers, the code is similar.
+ */
+int
+scsi_parse_transportid_64bit(int proto_id, char *id_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len)
+{
+ uint64_t value;
+ char *endptr;
+ int retval;
+ size_t alloc_size;
+
+ retval = 0;
+
+ value = strtouq(id_str, &endptr, 0);
+ if (*endptr != '\0') {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: error "
+ "parsing ID %s, 64-bit number required",
+ __func__, id_str);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ switch (proto_id) {
+ case SCSI_PROTO_FC:
+ alloc_size = sizeof(struct scsi_transportid_fcp);
+ break;
+ case SCSI_PROTO_1394:
+ alloc_size = sizeof(struct scsi_transportid_1394);
+ break;
+ case SCSI_PROTO_SAS:
+ alloc_size = sizeof(struct scsi_transportid_sas);
+ break;
+ default:
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: unsupported "
+ "protocol %d", __func__, proto_id);
+ }
+ retval = 1;
+ goto bailout;
+ break; /* NOTREACHED */
+ }
+#ifdef _KERNEL
+ *hdr = malloc(alloc_size, type, flags);
+#else /* _KERNEL */
+ *hdr = malloc(alloc_size);
+#endif /*_KERNEL */
+ if (*hdr == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: unable to "
+ "allocate %zu bytes", __func__, alloc_size);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ *alloc_len = alloc_size;
+
+ bzero(*hdr, alloc_size);
+
+ switch (proto_id) {
+ case SCSI_PROTO_FC: {
+ struct scsi_transportid_fcp *fcp;
+
+ fcp = (struct scsi_transportid_fcp *)(*hdr);
+ fcp->format_protocol = SCSI_PROTO_FC |
+ SCSI_TRN_FCP_FORMAT_DEFAULT;
+ scsi_u64to8b(value, fcp->n_port_name);
+ break;
+ }
+ case SCSI_PROTO_1394: {
+ struct scsi_transportid_1394 *sbp;
+
+ sbp = (struct scsi_transportid_1394 *)(*hdr);
+ sbp->format_protocol = SCSI_PROTO_1394 |
+ SCSI_TRN_1394_FORMAT_DEFAULT;
+ scsi_u64to8b(value, sbp->eui64);
+ break;
+ }
+ case SCSI_PROTO_SAS: {
+ struct scsi_transportid_sas *sas;
+
+ sas = (struct scsi_transportid_sas *)(*hdr);
+ sas->format_protocol = SCSI_PROTO_SAS |
+ SCSI_TRN_SAS_FORMAT_DEFAULT;
+ scsi_u64to8b(value, sas->sas_address);
+ break;
+ }
+ default:
+ break;
+ }
+bailout:
+ return (retval);
+}
+
+/*
+ * Parse a SPI (Parallel SCSI) address of the form: id,rel_tgt_port
+ */
+int
+scsi_parse_transportid_spi(char *id_str, struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len)
+{
+ unsigned long scsi_addr, target_port;
+ struct scsi_transportid_spi *spi;
+ char *tmpstr, *endptr;
+ int retval;
+
+ retval = 0;
+
+ tmpstr = strsep(&id_str, ",");
+ if (tmpstr == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len,
+ "%s: no ID found", __func__);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ scsi_addr = strtoul(tmpstr, &endptr, 0);
+ if (*endptr != '\0') {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: error "
+ "parsing SCSI ID %s, number required",
+ __func__, tmpstr);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ if (id_str == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: no relative "
+ "target port found", __func__);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ target_port = strtoul(id_str, &endptr, 0);
+ if (*endptr != '\0') {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: error "
+ "parsing relative target port %s, number "
+ "required", __func__, id_str);
+ }
+ retval = 1;
+ goto bailout;
+ }
+#ifdef _KERNEL
+ spi = malloc(sizeof(*spi), type, flags);
+#else
+ spi = malloc(sizeof(*spi));
+#endif
+ if (spi == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: unable to "
+ "allocate %zu bytes", __func__,
+ sizeof(*spi));
+ }
+ retval = 1;
+ goto bailout;
+ }
+ *alloc_len = sizeof(*spi);
+ bzero(spi, sizeof(*spi));
+
+ spi->format_protocol = SCSI_PROTO_SPI | SCSI_TRN_SPI_FORMAT_DEFAULT;
+ scsi_ulto2b(scsi_addr, spi->scsi_addr);
+ scsi_ulto2b(target_port, spi->rel_trgt_port_id);
+
+ *hdr = (struct scsi_transportid_header *)spi;
+bailout:
+ return (retval);
+}
+
+/*
+ * Parse an RDMA/SRP Initiator Port ID string. This is 32 hexadecimal digits,
+ * optionally prefixed by "0x" or "0X".
+ */
+int
+scsi_parse_transportid_rdma(char *id_str, struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len)
+{
+ struct scsi_transportid_rdma *rdma;
+ int retval;
+ size_t id_len, rdma_id_size;
+ uint8_t rdma_id[SCSI_TRN_RDMA_PORT_LEN];
+ char *tmpstr;
+ unsigned int i, j;
+
+ retval = 0;
+ id_len = strlen(id_str);
+ rdma_id_size = SCSI_TRN_RDMA_PORT_LEN;
+
+ /*
+ * Check the size. It needs to be either 32 or 34 characters long.
+ */
+ if ((id_len != (rdma_id_size * 2))
+ && (id_len != ((rdma_id_size * 2) + 2))) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: RDMA ID "
+ "must be 32 hex digits (0x prefix "
+ "optional), only %zu seen", __func__, id_len);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ tmpstr = id_str;
+ /*
+ * If the user gave us 34 characters, the string needs to start
+ * with '0x'.
+ */
+ if (id_len == ((rdma_id_size * 2) + 2)) {
+ if ((tmpstr[0] == '0')
+ && ((tmpstr[1] == 'x') || (tmpstr[1] == 'X'))) {
+ tmpstr += 2;
+ } else {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: RDMA "
+ "ID prefix, if used, must be \"0x\", "
+ "got %s", __func__, tmpstr);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ }
+ bzero(rdma_id, sizeof(rdma_id));
+
+ /*
+ * Convert ASCII hex into binary bytes. There is no standard
+ * 128-bit integer type, and so no strtou128t() routine to convert
+ * from hex into a large integer. In the end, we're not going to
+ * an integer, but rather to a byte array, so that and the fact
+ * that we require the user to give us 32 hex digits simplifies the
+ * logic.
+ */
+ for (i = 0; i < (rdma_id_size * 2); i++) {
+ int cur_shift;
+ unsigned char c;
+
+ /* Increment the byte array one for every 2 hex digits */
+ j = i >> 1;
+
+ /*
+ * The first digit in every pair is the most significant
+ * 4 bits. The second is the least significant 4 bits.
+ */
+ if ((i % 2) == 0)
+ cur_shift = 4;
+ else
+ cur_shift = 0;
+
+ c = tmpstr[i];
+ /* Convert the ASCII hex character into a number */
+ if (isdigit(c))
+ c -= '0';
+ else if (isalpha(c))
+ c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+ else {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: "
+ "RDMA ID must be hex digits, got "
+ "invalid character %c", __func__,
+ tmpstr[i]);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ /*
+ * The converted number can't be less than 0; the type is
+ * unsigned, and the subtraction logic will not give us
+ * a negative number. So we only need to make sure that
+ * the value is not greater than 0xf. (i.e. make sure the
+ * user didn't give us a value like "0x12jklmno").
+ */
+ if (c > 0xf) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: "
+ "RDMA ID must be hex digits, got "
+ "invalid character %c", __func__,
+ tmpstr[i]);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ rdma_id[j] |= c << cur_shift;
+ }
+
+#ifdef _KERNEL
+ rdma = malloc(sizeof(*rdma), type, flags);
+#else
+ rdma = malloc(sizeof(*rdma));
+#endif
+ if (rdma == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: unable to "
+ "allocate %zu bytes", __func__,
+ sizeof(*rdma));
+ }
+ retval = 1;
+ goto bailout;
+ }
+ *alloc_len = sizeof(*rdma);
+ bzero(rdma, *alloc_len);
+
+ rdma->format_protocol = SCSI_PROTO_RDMA | SCSI_TRN_RDMA_FORMAT_DEFAULT;
+ bcopy(rdma_id, rdma->initiator_port_id, SCSI_TRN_RDMA_PORT_LEN);
+
+ *hdr = (struct scsi_transportid_header *)rdma;
+
+bailout:
+ return (retval);
+}
+
+/*
+ * Parse an iSCSI name. The format is either just the name:
+ *
+ * iqn.2012-06.com.example:target0
+ * or the name, separator and initiator session ID:
+ *
+ * iqn.2012-06.com.example:target0,i,0x123
+ *
+ * The separator format is exact.
+ */
+int
+scsi_parse_transportid_iscsi(char *id_str, struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len)
+{
+ size_t id_len, sep_len, id_size, name_len;
+ int retval;
+ unsigned int i, sep_pos, sep_found;
+ const char *sep_template = ",i,0x";
+ const char *iqn_prefix = "iqn.";
+ struct scsi_transportid_iscsi_device *iscsi;
+
+ retval = 0;
+ sep_found = 0;
+
+ id_len = strlen(id_str);
+ sep_len = strlen(sep_template);
+
+ /*
+ * The separator is defined as exactly ',i,0x'. Any other commas,
+ * or any other form, is an error. So look for a comma, and once
+ * we find that, the next few characters must match the separator
+ * exactly. Once we get through the separator, there should be at
+ * least one character.
+ */
+ for (i = 0, sep_pos = 0; i < id_len; i++) {
+ if (sep_pos == 0) {
+ if (id_str[i] == sep_template[sep_pos])
+ sep_pos++;
+
+ continue;
+ }
+ if (sep_pos < sep_len) {
+ if (id_str[i] == sep_template[sep_pos]) {
+ sep_pos++;
+ continue;
+ }
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: "
+ "invalid separator in iSCSI name "
+ "\"%s\"",
+ __func__, id_str);
+ }
+ retval = 1;
+ goto bailout;
+ } else {
+ sep_found = 1;
+ break;
+ }
+ }
+
+ /*
+ * Check to see whether we have a separator but no digits after it.
+ */
+ if ((sep_pos != 0)
+ && (sep_found == 0)) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: no digits "
+ "found after separator in iSCSI name \"%s\"",
+ __func__, id_str);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ /*
+ * The incoming ID string has the "iqn." prefix stripped off. We
+ * need enough space for the base structure (the structures are the
+ * same for the two iSCSI forms), the prefix, the ID string and a
+ * terminating NUL.
+ */
+ id_size = sizeof(*iscsi) + strlen(iqn_prefix) + id_len + 1;
+
+#ifdef _KERNEL
+ iscsi = malloc(id_size, type, flags);
+#else
+ iscsi = malloc(id_size);
+#endif
+ if (iscsi == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: unable to "
+ "allocate %zu bytes", __func__, id_size);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ *alloc_len = id_size;
+ bzero(iscsi, id_size);
+
+ iscsi->format_protocol = SCSI_PROTO_ISCSI;
+ if (sep_found == 0)
+ iscsi->format_protocol |= SCSI_TRN_ISCSI_FORMAT_DEVICE;
+ else
+ iscsi->format_protocol |= SCSI_TRN_ISCSI_FORMAT_PORT;
+ name_len = id_size - sizeof(*iscsi);
+ scsi_ulto2b(name_len, iscsi->additional_length);
+ snprintf(iscsi->iscsi_name, name_len, "%s%s", iqn_prefix, id_str);
+
+ *hdr = (struct scsi_transportid_header *)iscsi;
+
+bailout:
+ return (retval);
+}
+
+/*
+ * Parse a SCSI over PCIe (SOP) identifier. The Routing ID can either be
+ * of the form 'bus,device,function' or 'bus,function'.
+ */
+int
+scsi_parse_transportid_sop(char *id_str, struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len)
+{
+ struct scsi_transportid_sop *sop;
+ unsigned long bus, device, function;
+ char *tmpstr, *endptr;
+ int retval, device_spec;
+
+ retval = 0;
+ device_spec = 0;
+ device = 0;
+
+ tmpstr = strsep(&id_str, ",");
+ if ((tmpstr == NULL)
+ || (*tmpstr == '\0')) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: no ID found",
+ __func__);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ bus = strtoul(tmpstr, &endptr, 0);
+ if (*endptr != '\0') {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: error "
+ "parsing PCIe bus %s, number required",
+ __func__, tmpstr);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ if ((id_str == NULL)
+ || (*id_str == '\0')) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: no PCIe "
+ "device or function found", __func__);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ tmpstr = strsep(&id_str, ",");
+ function = strtoul(tmpstr, &endptr, 0);
+ if (*endptr != '\0') {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: error "
+ "parsing PCIe device/function %s, number "
+ "required", __func__, tmpstr);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ /*
+ * Check to see whether the user specified a third value. If so,
+ * the second is the device.
+ */
+ if (id_str != NULL) {
+ if (*id_str == '\0') {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: "
+ "no PCIe function found", __func__);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ device = function;
+ device_spec = 1;
+ function = strtoul(id_str, &endptr, 0);
+ if (*endptr != '\0') {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: "
+ "error parsing PCIe function %s, "
+ "number required", __func__, id_str);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ }
+ if (bus > SCSI_TRN_SOP_BUS_MAX) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: bus value "
+ "%lu greater than maximum %u", __func__,
+ bus, SCSI_TRN_SOP_BUS_MAX);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ if ((device_spec != 0)
+ && (device > SCSI_TRN_SOP_DEV_MASK)) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: device value "
+ "%lu greater than maximum %u", __func__,
+ device, SCSI_TRN_SOP_DEV_MAX);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ if (((device_spec != 0)
+ && (function > SCSI_TRN_SOP_FUNC_NORM_MAX))
+ || ((device_spec == 0)
+ && (function > SCSI_TRN_SOP_FUNC_ALT_MAX))) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: function value "
+ "%lu greater than maximum %u", __func__,
+ function, (device_spec == 0) ?
+ SCSI_TRN_SOP_FUNC_ALT_MAX :
+ SCSI_TRN_SOP_FUNC_NORM_MAX);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+#ifdef _KERNEL
+ sop = malloc(sizeof(*sop), type, flags);
+#else
+ sop = malloc(sizeof(*sop));
+#endif
+ if (sop == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: unable to "
+ "allocate %zu bytes", __func__, sizeof(*sop));
+ }
+ retval = 1;
+ goto bailout;
+ }
+ *alloc_len = sizeof(*sop);
+ bzero(sop, sizeof(*sop));
+ sop->format_protocol = SCSI_PROTO_SOP | SCSI_TRN_SOP_FORMAT_DEFAULT;
+ if (device_spec != 0) {
+ struct scsi_sop_routing_id_norm rid;
+
+ rid.bus = bus;
+ rid.devfunc = (device << SCSI_TRN_SOP_DEV_SHIFT) | function;
+ bcopy(&rid, sop->routing_id, MIN(sizeof(rid),
+ sizeof(sop->routing_id)));
+ } else {
+ struct scsi_sop_routing_id_alt rid;
+
+ rid.bus = bus;
+ rid.function = function;
+ bcopy(&rid, sop->routing_id, MIN(sizeof(rid),
+ sizeof(sop->routing_id)));
+ }
+
+ *hdr = (struct scsi_transportid_header *)sop;
+bailout:
+ return (retval);
+}
+
+/*
+ * transportid_str: NUL-terminated string with format: protcol,id
+ * The ID is protocol specific.
+ * hdr: Storage will be allocated for the transport ID.
+ * alloc_len: The amount of memory allocated is returned here.
+ * type: Malloc bucket (kernel only).
+ * flags: Malloc flags (kernel only).
+ * error_str: If non-NULL, it will contain error information (without
+ * a terminating newline) if an error is returned.
+ * error_str_len: Allocated length of the error string.
+ *
+ * Returns 0 for success, non-zero for failure.
+ */
+int
+scsi_parse_transportid(char *transportid_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len)
+{
+ char *tmpstr;
+ scsi_nv_status status;
+ u_int num_proto_entries;
+ int retval, table_entry;
+
+ retval = 0;
+ table_entry = 0;
+
+ /*
+ * We do allow a period as well as a comma to separate the protocol
+ * from the ID string. This is to accommodate iSCSI names, which
+ * start with "iqn.".
+ */
+ tmpstr = strsep(&transportid_str, ",.");
+ if (tmpstr == NULL) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len,
+ "%s: transportid_str is NULL", __func__);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ num_proto_entries = nitems(scsi_proto_map);
+ status = scsi_get_nv(scsi_proto_map, num_proto_entries, tmpstr,
+ &table_entry, SCSI_NV_FLAG_IG_CASE);
+ if (status != SCSI_NV_FOUND) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: %s protocol "
+ "name %s", __func__,
+ (status == SCSI_NV_AMBIGUOUS) ? "ambiguous" :
+ "invalid", tmpstr);
+ }
+ retval = 1;
+ goto bailout;
+ }
+ switch (scsi_proto_map[table_entry].value) {
+ case SCSI_PROTO_FC:
+ case SCSI_PROTO_1394:
+ case SCSI_PROTO_SAS:
+ retval = scsi_parse_transportid_64bit(
+ scsi_proto_map[table_entry].value, transportid_str, hdr,
+ alloc_len,
+#ifdef _KERNEL
+ type, flags,
+#endif
+ error_str, error_str_len);
+ break;
+ case SCSI_PROTO_SPI:
+ retval = scsi_parse_transportid_spi(transportid_str, hdr,
+ alloc_len,
+#ifdef _KERNEL
+ type, flags,
+#endif
+ error_str, error_str_len);
+ break;
+ case SCSI_PROTO_RDMA:
+ retval = scsi_parse_transportid_rdma(transportid_str, hdr,
+ alloc_len,
+#ifdef _KERNEL
+ type, flags,
+#endif
+ error_str, error_str_len);
+ break;
+ case SCSI_PROTO_ISCSI:
+ retval = scsi_parse_transportid_iscsi(transportid_str, hdr,
+ alloc_len,
+#ifdef _KERNEL
+ type, flags,
+#endif
+ error_str, error_str_len);
+ break;
+ case SCSI_PROTO_SOP:
+ retval = scsi_parse_transportid_sop(transportid_str, hdr,
+ alloc_len,
+#ifdef _KERNEL
+ type, flags,
+#endif
+ error_str, error_str_len);
+ break;
+ case SCSI_PROTO_SSA:
+ case SCSI_PROTO_ADITP:
+ case SCSI_PROTO_ATA:
+ case SCSI_PROTO_UAS:
+ case SCSI_PROTO_NONE:
+ default:
+ /*
+ * There is no format defined for a Transport ID for these
+ * protocols. So even if the user gives us something, we
+ * have no way to turn it into a standard SCSI Transport ID.
+ */
+ retval = 1;
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "%s: no Transport "
+ "ID format exists for protocol %s",
+ __func__, tmpstr);
+ }
+ goto bailout;
+ break; /* NOTREACHED */
+ }
+bailout:
+ return (retval);
+}
+
+struct scsi_attrib_table_entry scsi_mam_attr_table[] = {
+ { SMA_ATTR_REM_CAP_PARTITION, SCSI_ATTR_FLAG_NONE,
+ "Remaining Capacity in Partition",
+ /*suffix*/ "MB", /*to_str*/ scsi_attrib_int_sbuf,/*parse_str*/ NULL },
+ { SMA_ATTR_MAX_CAP_PARTITION, SCSI_ATTR_FLAG_NONE,
+ "Maximum Capacity in Partition",
+ /*suffix*/"MB", /*to_str*/ scsi_attrib_int_sbuf, /*parse_str*/ NULL },
+ { SMA_ATTR_TAPEALERT_FLAGS, SCSI_ATTR_FLAG_HEX,
+ "TapeAlert Flags",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf, /*parse_str*/ NULL },
+ { SMA_ATTR_LOAD_COUNT, SCSI_ATTR_FLAG_NONE,
+ "Load Count",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf, /*parse_str*/ NULL },
+ { SMA_ATTR_MAM_SPACE_REMAINING, SCSI_ATTR_FLAG_NONE,
+ "MAM Space Remaining",
+ /*suffix*/"bytes", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_DEV_ASSIGNING_ORG, SCSI_ATTR_FLAG_NONE,
+ "Assigning Organization",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_FORMAT_DENSITY_CODE, SCSI_ATTR_FLAG_HEX,
+ "Format Density Code",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf, /*parse_str*/ NULL },
+ { SMA_ATTR_INITIALIZATION_COUNT, SCSI_ATTR_FLAG_NONE,
+ "Initialization Count",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf, /*parse_str*/ NULL },
+ { SMA_ATTR_VOLUME_ID, SCSI_ATTR_FLAG_NONE,
+ "Volume Identifier",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_VOLUME_CHANGE_REF, SCSI_ATTR_FLAG_HEX,
+ "Volume Change Reference",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_DEV_SERIAL_LAST_LOAD, SCSI_ATTR_FLAG_NONE,
+ "Device Vendor/Serial at Last Load",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_vendser_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_DEV_SERIAL_LAST_LOAD_1, SCSI_ATTR_FLAG_NONE,
+ "Device Vendor/Serial at Last Load - 1",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_vendser_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_DEV_SERIAL_LAST_LOAD_2, SCSI_ATTR_FLAG_NONE,
+ "Device Vendor/Serial at Last Load - 2",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_vendser_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_DEV_SERIAL_LAST_LOAD_3, SCSI_ATTR_FLAG_NONE,
+ "Device Vendor/Serial at Last Load - 3",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_vendser_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_TOTAL_MB_WRITTEN_LT, SCSI_ATTR_FLAG_NONE,
+ "Total MB Written in Medium Life",
+ /*suffix*/ "MB", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_TOTAL_MB_READ_LT, SCSI_ATTR_FLAG_NONE,
+ "Total MB Read in Medium Life",
+ /*suffix*/ "MB", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_TOTAL_MB_WRITTEN_CUR, SCSI_ATTR_FLAG_NONE,
+ "Total MB Written in Current/Last Load",
+ /*suffix*/ "MB", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_TOTAL_MB_READ_CUR, SCSI_ATTR_FLAG_NONE,
+ "Total MB Read in Current/Last Load",
+ /*suffix*/ "MB", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_FIRST_ENC_BLOCK, SCSI_ATTR_FLAG_NONE,
+ "Logical Position of First Encrypted Block",
+ /*suffix*/ NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_NEXT_UNENC_BLOCK, SCSI_ATTR_FLAG_NONE,
+ "Logical Position of First Unencrypted Block after First "
+ "Encrypted Block",
+ /*suffix*/ NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MEDIUM_USAGE_HIST, SCSI_ATTR_FLAG_NONE,
+ "Medium Usage History",
+ /*suffix*/ NULL, /*to_str*/ NULL,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_PART_USAGE_HIST, SCSI_ATTR_FLAG_NONE,
+ "Partition Usage History",
+ /*suffix*/ NULL, /*to_str*/ NULL,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_MANUF, SCSI_ATTR_FLAG_NONE,
+ "Medium Manufacturer",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_SERIAL, SCSI_ATTR_FLAG_NONE,
+ "Medium Serial Number",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_LENGTH, SCSI_ATTR_FLAG_NONE,
+ "Medium Length",
+ /*suffix*/"m", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_WIDTH, SCSI_ATTR_FLAG_FP | SCSI_ATTR_FLAG_DIV_10 |
+ SCSI_ATTR_FLAG_FP_1DIGIT,
+ "Medium Width",
+ /*suffix*/"mm", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_ASSIGNING_ORG, SCSI_ATTR_FLAG_NONE,
+ "Assigning Organization",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_DENSITY_CODE, SCSI_ATTR_FLAG_HEX,
+ "Medium Density Code",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_MANUF_DATE, SCSI_ATTR_FLAG_NONE,
+ "Medium Manufacture Date",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MAM_CAPACITY, SCSI_ATTR_FLAG_NONE,
+ "MAM Capacity",
+ /*suffix*/"bytes", /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_TYPE, SCSI_ATTR_FLAG_HEX,
+ "Medium Type",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_TYPE_INFO, SCSI_ATTR_FLAG_HEX,
+ "Medium Type Information",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MED_SERIAL_NUM, SCSI_ATTR_FLAG_NONE,
+ "Medium Serial Number",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_APP_VENDOR, SCSI_ATTR_FLAG_NONE,
+ "Application Vendor",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_APP_NAME, SCSI_ATTR_FLAG_NONE,
+ "Application Name",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_APP_VERSION, SCSI_ATTR_FLAG_NONE,
+ "Application Version",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_USER_MED_TEXT_LABEL, SCSI_ATTR_FLAG_NONE,
+ "User Medium Text Label",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_text_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_LAST_WRITTEN_TIME, SCSI_ATTR_FLAG_NONE,
+ "Date and Time Last Written",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_TEXT_LOCAL_ID, SCSI_ATTR_FLAG_HEX,
+ "Text Localization Identifier",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_BARCODE, SCSI_ATTR_FLAG_NONE,
+ "Barcode",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_HOST_OWNER_NAME, SCSI_ATTR_FLAG_NONE,
+ "Owning Host Textual Name",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_text_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_MEDIA_POOL, SCSI_ATTR_FLAG_NONE,
+ "Media Pool",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_text_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_PART_USER_LABEL, SCSI_ATTR_FLAG_NONE,
+ "Partition User Text Label",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_LOAD_UNLOAD_AT_PART, SCSI_ATTR_FLAG_NONE,
+ "Load/Unload at Partition",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_int_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_APP_FORMAT_VERSION, SCSI_ATTR_FLAG_NONE,
+ "Application Format Version",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_ascii_sbuf,
+ /*parse_str*/ NULL },
+ { SMA_ATTR_VOL_COHERENCY_INFO, SCSI_ATTR_FLAG_NONE,
+ "Volume Coherency Information",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_volcoh_sbuf,
+ /*parse_str*/ NULL },
+ { 0x0ff1, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM Creation",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x0ff2, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM C3",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x0ff3, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM RW",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x0ff4, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM SDC List",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x0ff7, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM Post Scan",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x0ffe, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM Checksum",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x17f1, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM Creation",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x17f2, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM C3",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x17f3, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM RW",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x17f4, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM SDC List",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x17f7, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM Post Scan",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+ { 0x17ff, SCSI_ATTR_FLAG_NONE,
+ "Spectra MLM Checksum",
+ /*suffix*/NULL, /*to_str*/ scsi_attrib_hexdump_sbuf,
+ /*parse_str*/ NULL },
+};
+
+/*
+ * Print out Volume Coherency Information (Attribute 0x080c).
+ * This field has two variable length members, including one at the
+ * beginning, so it isn't practical to have a fixed structure definition.
+ * This is current as of SSC4r03 (see section 4.2.21.3), dated March 25,
+ * 2013.
+ */
+int
+scsi_attrib_volcoh_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len)
+{
+ size_t avail_len;
+ uint32_t field_size;
+ uint64_t tmp_val;
+ uint8_t *cur_ptr;
+ int retval;
+ int vcr_len, as_len;
+
+ retval = 0;
+ tmp_val = 0;
+
+ field_size = scsi_2btoul(hdr->length);
+ avail_len = valid_len - sizeof(*hdr);
+ if (field_size > avail_len) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "Available "
+ "length of attribute ID 0x%.4x %zu < field "
+ "length %u", scsi_2btoul(hdr->id), avail_len,
+ field_size);
+ }
+ retval = 1;
+ goto bailout;
+ } else if (field_size == 0) {
+ /*
+ * It isn't clear from the spec whether a field length of
+ * 0 is invalid here. It probably is, but be lenient here
+ * to avoid inconveniencing the user.
+ */
+ goto bailout;
+ }
+ cur_ptr = hdr->attribute;
+ vcr_len = *cur_ptr;
+ cur_ptr++;
+
+ sbuf_printf(sb, "\n\tVolume Change Reference Value:");
+
+ switch (vcr_len) {
+ case 0:
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "Volume Change "
+ "Reference value has length of 0");
+ }
+ retval = 1;
+ goto bailout;
+ break; /*NOTREACHED*/
+ case 1:
+ tmp_val = *cur_ptr;
+ break;
+ case 2:
+ tmp_val = scsi_2btoul(cur_ptr);
+ break;
+ case 3:
+ tmp_val = scsi_3btoul(cur_ptr);
+ break;
+ case 4:
+ tmp_val = scsi_4btoul(cur_ptr);
+ break;
+ case 8:
+ tmp_val = scsi_8btou64(cur_ptr);
+ break;
+ default:
+ sbuf_printf(sb, "\n");
+ sbuf_hexdump(sb, cur_ptr, vcr_len, NULL, 0);
+ break;
+ }
+ if (vcr_len <= 8)
+ sbuf_printf(sb, " 0x%jx\n", (uintmax_t)tmp_val);
+
+ cur_ptr += vcr_len;
+ tmp_val = scsi_8btou64(cur_ptr);
+ sbuf_printf(sb, "\tVolume Coherency Count: %ju\n", (uintmax_t)tmp_val);
+
+ cur_ptr += sizeof(tmp_val);
+ tmp_val = scsi_8btou64(cur_ptr);
+ sbuf_printf(sb, "\tVolume Coherency Set Identifier: 0x%jx\n",
+ (uintmax_t)tmp_val);
+
+ /*
+ * Figure out how long the Application Client Specific Information
+ * is and produce a hexdump.
+ */
+ cur_ptr += sizeof(tmp_val);
+ as_len = scsi_2btoul(cur_ptr);
+ cur_ptr += sizeof(uint16_t);
+ sbuf_printf(sb, "\tApplication Client Specific Information: ");
+ if (((as_len == SCSI_LTFS_VER0_LEN)
+ || (as_len == SCSI_LTFS_VER1_LEN))
+ && (strncmp(cur_ptr, SCSI_LTFS_STR_NAME, SCSI_LTFS_STR_LEN) == 0)) {
+ sbuf_printf(sb, "LTFS\n");
+ cur_ptr += SCSI_LTFS_STR_LEN + 1;
+ if (cur_ptr[SCSI_LTFS_UUID_LEN] != '\0')
+ cur_ptr[SCSI_LTFS_UUID_LEN] = '\0';
+ sbuf_printf(sb, "\tLTFS UUID: %s\n", cur_ptr);
+ cur_ptr += SCSI_LTFS_UUID_LEN + 1;
+ /* XXX KDM check the length */
+ sbuf_printf(sb, "\tLTFS Version: %d\n", *cur_ptr);
+ } else {
+ sbuf_printf(sb, "Unknown\n");
+ sbuf_hexdump(sb, cur_ptr, as_len, NULL, 0);
+ }
+
+bailout:
+ return (retval);
+}
+
+int
+scsi_attrib_vendser_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len)
+{
+ size_t avail_len;
+ uint32_t field_size;
+ struct scsi_attrib_vendser *vendser;
+ cam_strvis_flags strvis_flags;
+ int retval = 0;
+
+ field_size = scsi_2btoul(hdr->length);
+ avail_len = valid_len - sizeof(*hdr);
+ if (field_size > avail_len) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "Available "
+ "length of attribute ID 0x%.4x %zu < field "
+ "length %u", scsi_2btoul(hdr->id), avail_len,
+ field_size);
+ }
+ retval = 1;
+ goto bailout;
+ } else if (field_size == 0) {
+ /*
+ * A field size of 0 doesn't make sense here. The device
+ * can at least give you the vendor ID, even if it can't
+ * give you the serial number.
+ */
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "The length of "
+ "attribute ID 0x%.4x is 0",
+ scsi_2btoul(hdr->id));
+ }
+ retval = 1;
+ goto bailout;
+ }
+ vendser = (struct scsi_attrib_vendser *)hdr->attribute;
+
+ switch (output_flags & SCSI_ATTR_OUTPUT_NONASCII_MASK) {
+ case SCSI_ATTR_OUTPUT_NONASCII_TRIM:
+ strvis_flags = CAM_STRVIS_FLAG_NONASCII_TRIM;
+ break;
+ case SCSI_ATTR_OUTPUT_NONASCII_RAW:
+ strvis_flags = CAM_STRVIS_FLAG_NONASCII_RAW;
+ break;
+ case SCSI_ATTR_OUTPUT_NONASCII_ESC:
+ default:
+ strvis_flags = CAM_STRVIS_FLAG_NONASCII_ESC;
+ break;;
+ }
+ cam_strvis_sbuf(sb, vendser->vendor, sizeof(vendser->vendor),
+ strvis_flags);
+ sbuf_putc(sb, ' ');
+ cam_strvis_sbuf(sb, vendser->serial_num, sizeof(vendser->serial_num),
+ strvis_flags);
+bailout:
+ return (retval);
+}
+
+int
+scsi_attrib_hexdump_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len)
+{
+ uint32_t field_size;
+ ssize_t avail_len;
+ uint32_t print_len;
+ uint8_t *num_ptr;
+ int retval = 0;
+
+ field_size = scsi_2btoul(hdr->length);
+ avail_len = valid_len - sizeof(*hdr);
+ print_len = MIN(avail_len, field_size);
+ num_ptr = hdr->attribute;
+
+ if (print_len > 0) {
+ sbuf_printf(sb, "\n");
+ sbuf_hexdump(sb, num_ptr, print_len, NULL, 0);
+ }
+
+ return (retval);
+}
+
+int
+scsi_attrib_int_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len)
+{
+ uint64_t print_number;
+ size_t avail_len;
+ uint32_t number_size;
+ int retval = 0;
+
+ number_size = scsi_2btoul(hdr->length);
+
+ avail_len = valid_len - sizeof(*hdr);
+ if (avail_len < number_size) {
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "Available "
+ "length of attribute ID 0x%.4x %zu < field "
+ "length %u", scsi_2btoul(hdr->id), avail_len,
+ number_size);
+ }
+ retval = 1;
+ goto bailout;
+ }
+
+ switch (number_size) {
+ case 0:
+ /*
+ * We don't treat this as an error, since there may be
+ * scenarios where a device reports a field but then gives
+ * a length of 0. See the note in scsi_attrib_ascii_sbuf().
+ */
+ goto bailout;
+ break; /*NOTREACHED*/
+ case 1:
+ print_number = hdr->attribute[0];
+ break;
+ case 2:
+ print_number = scsi_2btoul(hdr->attribute);
+ break;
+ case 3:
+ print_number = scsi_3btoul(hdr->attribute);
+ break;
+ case 4:
+ print_number = scsi_4btoul(hdr->attribute);
+ break;
+ case 8:
+ print_number = scsi_8btou64(hdr->attribute);
+ break;
+ default:
+ /*
+ * If we wind up here, the number is too big to print
+ * normally, so just do a hexdump.
+ */
+ retval = scsi_attrib_hexdump_sbuf(sb, hdr, valid_len,
+ flags, output_flags,
+ error_str, error_str_len);
+ goto bailout;
+ break;
+ }
+
+ if (flags & SCSI_ATTR_FLAG_FP) {
+#ifndef _KERNEL
+ long double num_float;
+
+ num_float = (long double)print_number;
+
+ if (flags & SCSI_ATTR_FLAG_DIV_10)
+ num_float /= 10;
+
+ sbuf_printf(sb, "%.*Lf", (flags & SCSI_ATTR_FLAG_FP_1DIGIT) ?
+ 1 : 0, num_float);
+#else /* _KERNEL */
+ sbuf_printf(sb, "%ju", (flags & SCSI_ATTR_FLAG_DIV_10) ?
+ (print_number / 10) : print_number);
+#endif /* _KERNEL */
+ } else if (flags & SCSI_ATTR_FLAG_HEX) {
+ sbuf_printf(sb, "0x%jx", (uintmax_t)print_number);
+ } else
+ sbuf_printf(sb, "%ju", (uintmax_t)print_number);
+
+bailout:
+ return (retval);
+}
+
+int
+scsi_attrib_ascii_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len)
+{
+ size_t avail_len;
+ uint32_t field_size, print_size;
+ int retval = 0;
+
+ avail_len = valid_len - sizeof(*hdr);
+ field_size = scsi_2btoul(hdr->length);
+ print_size = MIN(avail_len, field_size);
+
+ if (print_size > 0) {
+ cam_strvis_flags strvis_flags;
+
+ switch (output_flags & SCSI_ATTR_OUTPUT_NONASCII_MASK) {
+ case SCSI_ATTR_OUTPUT_NONASCII_TRIM:
+ strvis_flags = CAM_STRVIS_FLAG_NONASCII_TRIM;
+ break;
+ case SCSI_ATTR_OUTPUT_NONASCII_RAW:
+ strvis_flags = CAM_STRVIS_FLAG_NONASCII_RAW;
+ break;
+ case SCSI_ATTR_OUTPUT_NONASCII_ESC:
+ default:
+ strvis_flags = CAM_STRVIS_FLAG_NONASCII_ESC;
+ break;
+ }
+ cam_strvis_sbuf(sb, hdr->attribute, print_size, strvis_flags);
+ } else if (avail_len < field_size) {
+ /*
+ * We only report an error if the user didn't allocate
+ * enough space to hold the full value of this field. If
+ * the field length is 0, that is allowed by the spec.
+ * e.g. in SPC-4r37, section 7.4.2.2.5, VOLUME IDENTIFIER
+ * "This attribute indicates the current volume identifier
+ * (see SMC-3) of the medium. If the device server supports
+ * this attribute but does not have access to the volume
+ * identifier, the device server shall report this attribute
+ * with an attribute length value of zero."
+ */
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "Available "
+ "length of attribute ID 0x%.4x %zu < field "
+ "length %u", scsi_2btoul(hdr->id), avail_len,
+ field_size);
+ }
+ retval = 1;
+ }
+
+ return (retval);
+}
+
+int
+scsi_attrib_text_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len)
+{
+ size_t avail_len;
+ uint32_t field_size, print_size;
+ int retval = 0;
+ int esc_text = 1;
+
+ avail_len = valid_len - sizeof(*hdr);
+ field_size = scsi_2btoul(hdr->length);
+ print_size = MIN(avail_len, field_size);
+
+ if ((output_flags & SCSI_ATTR_OUTPUT_TEXT_MASK) ==
+ SCSI_ATTR_OUTPUT_TEXT_RAW)
+ esc_text = 0;
+
+ if (print_size > 0) {
+ uint32_t i;
+
+ for (i = 0; i < print_size; i++) {
+ if (hdr->attribute[i] == '\0')
+ continue;
+ else if (((unsigned char)hdr->attribute[i] < 0x80)
+ || (esc_text == 0))
+ sbuf_putc(sb, hdr->attribute[i]);
+ else
+ sbuf_printf(sb, "%%%02x",
+ (unsigned char)hdr->attribute[i]);
+ }
+ } else if (avail_len < field_size) {
+ /*
+ * We only report an error if the user didn't allocate
+ * enough space to hold the full value of this field.
+ */
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "Available "
+ "length of attribute ID 0x%.4x %zu < field "
+ "length %u", scsi_2btoul(hdr->id), avail_len,
+ field_size);
+ }
+ retval = 1;
+ }
+
+ return (retval);
+}
+
+struct scsi_attrib_table_entry *
+scsi_find_attrib_entry(struct scsi_attrib_table_entry *table,
+ size_t num_table_entries, uint32_t id)
+{
+ uint32_t i;
+
+ for (i = 0; i < num_table_entries; i++) {
+ if (table[i].id == id)
+ return (&table[i]);
+ }
+
+ return (NULL);
+}
+
+struct scsi_attrib_table_entry *
+scsi_get_attrib_entry(uint32_t id)
+{
+ return (scsi_find_attrib_entry(scsi_mam_attr_table,
+ nitems(scsi_mam_attr_table), id));
+}
+
+int
+scsi_attrib_value_sbuf(struct sbuf *sb, uint32_t valid_len,
+ struct scsi_mam_attribute_header *hdr, uint32_t output_flags,
+ char *error_str, size_t error_str_len)
+{
+ int retval;
+
+ switch (hdr->byte2 & SMA_FORMAT_MASK) {
+ case SMA_FORMAT_ASCII:
+ retval = scsi_attrib_ascii_sbuf(sb, hdr, valid_len,
+ SCSI_ATTR_FLAG_NONE, output_flags, error_str,error_str_len);
+ break;
+ case SMA_FORMAT_BINARY:
+ if (scsi_2btoul(hdr->length) <= 8)
+ retval = scsi_attrib_int_sbuf(sb, hdr, valid_len,
+ SCSI_ATTR_FLAG_NONE, output_flags, error_str,
+ error_str_len);
+ else
+ retval = scsi_attrib_hexdump_sbuf(sb, hdr, valid_len,
+ SCSI_ATTR_FLAG_NONE, output_flags, error_str,
+ error_str_len);
+ break;
+ case SMA_FORMAT_TEXT:
+ retval = scsi_attrib_text_sbuf(sb, hdr, valid_len,
+ SCSI_ATTR_FLAG_NONE, output_flags, error_str,
+ error_str_len);
+ break;
+ default:
+ if (error_str != NULL) {
+ snprintf(error_str, error_str_len, "Unknown attribute "
+ "format 0x%x", hdr->byte2 & SMA_FORMAT_MASK);
+ }
+ retval = 1;
+ goto bailout;
+ break; /*NOTREACHED*/
+ }
+
+ sbuf_trim(sb);
+
+bailout:
+
+ return (retval);
+}
+
+void
+scsi_attrib_prefix_sbuf(struct sbuf *sb, uint32_t output_flags,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, const char *desc)
+{
+ int need_space = 0;
+ uint32_t len;
+ uint32_t id;
+
+ /*
+ * We can't do anything if we don't have enough valid data for the
+ * header.
+ */
+ if (valid_len < sizeof(*hdr))
+ return;
+
+ id = scsi_2btoul(hdr->id);
+ /*
+ * Note that we print out the value of the attribute listed in the
+ * header, regardless of whether we actually got that many bytes
+ * back from the device through the controller. A truncated result
+ * could be the result of a failure to ask for enough data; the
+ * header indicates how many bytes are allocated for this attribute
+ * in the MAM.
+ */
+ len = scsi_2btoul(hdr->length);
+
+ if ((output_flags & SCSI_ATTR_OUTPUT_FIELD_MASK) ==
+ SCSI_ATTR_OUTPUT_FIELD_NONE)
+ return;
+
+ if ((output_flags & SCSI_ATTR_OUTPUT_FIELD_DESC)
+ && (desc != NULL)) {
+ sbuf_printf(sb, "%s", desc);
+ need_space = 1;
+ }
+
+ if (output_flags & SCSI_ATTR_OUTPUT_FIELD_NUM) {
+ sbuf_printf(sb, "%s(0x%.4x)", (need_space) ? " " : "", id);
+ need_space = 0;
+ }
+
+ if (output_flags & SCSI_ATTR_OUTPUT_FIELD_SIZE) {
+ sbuf_printf(sb, "%s[%d]", (need_space) ? " " : "", len);
+ need_space = 0;
+ }
+ if (output_flags & SCSI_ATTR_OUTPUT_FIELD_RW) {
+ sbuf_printf(sb, "%s(%s)", (need_space) ? " " : "",
+ (hdr->byte2 & SMA_READ_ONLY) ? "RO" : "RW");
+ }
+ sbuf_printf(sb, ": ");
+}
+
+int
+scsi_attrib_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, struct scsi_attrib_table_entry *user_table,
+ size_t num_user_entries, int prefer_user_table,
+ uint32_t output_flags, char *error_str, int error_str_len)
+{
+ int retval;
+ struct scsi_attrib_table_entry *table1 = NULL, *table2 = NULL;
+ struct scsi_attrib_table_entry *entry = NULL;
+ size_t table1_size = 0, table2_size = 0;
+ uint32_t id;
+
+ retval = 0;
+
+ if (valid_len < sizeof(*hdr)) {
+ retval = 1;
+ goto bailout;
+ }
+
+ id = scsi_2btoul(hdr->id);
+
+ if (user_table != NULL) {
+ if (prefer_user_table != 0) {
+ table1 = user_table;
+ table1_size = num_user_entries;
+ table2 = scsi_mam_attr_table;
+ table2_size = nitems(scsi_mam_attr_table);
+ } else {
+ table1 = scsi_mam_attr_table;
+ table1_size = nitems(scsi_mam_attr_table);
+ table2 = user_table;
+ table2_size = num_user_entries;
+ }
+ } else {
+ table1 = scsi_mam_attr_table;
+ table1_size = nitems(scsi_mam_attr_table);
+ }
+
+ entry = scsi_find_attrib_entry(table1, table1_size, id);
+ if (entry != NULL) {
+ scsi_attrib_prefix_sbuf(sb, output_flags, hdr, valid_len,
+ entry->desc);
+ if (entry->to_str == NULL)
+ goto print_default;
+ retval = entry->to_str(sb, hdr, valid_len, entry->flags,
+ output_flags, error_str, error_str_len);
+ goto bailout;
+ }
+ if (table2 != NULL) {
+ entry = scsi_find_attrib_entry(table2, table2_size, id);
+ if (entry != NULL) {
+ if (entry->to_str == NULL)
+ goto print_default;
+
+ scsi_attrib_prefix_sbuf(sb, output_flags, hdr,
+ valid_len, entry->desc);
+ retval = entry->to_str(sb, hdr, valid_len, entry->flags,
+ output_flags, error_str,
+ error_str_len);
+ goto bailout;
+ }
+ }
+
+ scsi_attrib_prefix_sbuf(sb, output_flags, hdr, valid_len, NULL);
+
+print_default:
+ retval = scsi_attrib_value_sbuf(sb, valid_len, hdr, output_flags,
+ error_str, error_str_len);
+bailout:
+ if (retval == 0) {
+ if ((entry != NULL)
+ && (entry->suffix != NULL))
+ sbuf_printf(sb, " %s", entry->suffix);
+
+ sbuf_trim(sb);
+ sbuf_printf(sb, "\n");
+ }
+
+ return (retval);
+}
+
void
scsi_test_unit_ready(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
@@ -5500,7 +7514,6 @@ scsi_test_unit_ready(struct ccb_scsiio *csio, u_int32_t retries,
scsi_cmd->opcode = TEST_UNIT_READY;
}
-#ifndef __rtems__
void
scsi_request_sense(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
@@ -5525,7 +7538,6 @@ scsi_request_sense(struct ccb_scsiio *csio, u_int32_t retries,
scsi_cmd->opcode = REQUEST_SENSE;
scsi_cmd->length = dxfer_len;
}
-#endif /* __rtems__ */
void
scsi_inquiry(struct ccb_scsiio *csio, u_int32_t retries,
@@ -5557,7 +7569,6 @@ scsi_inquiry(struct ccb_scsiio *csio, u_int32_t retries,
scsi_ulto2b(inq_len, scsi_cmd->length);
}
-#ifndef __rtems__
void
scsi_mode_sense(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
@@ -5790,7 +7801,6 @@ scsi_prevent(struct ccb_scsiio *csio, u_int32_t retries,
scsi_cmd->opcode = PREVENT_ALLOW;
scsi_cmd->how = action;
}
-#endif /* __rtems__ */
/* XXX allow specification of address and PMI bit and LBA */
void
@@ -5823,8 +7833,8 @@ void
scsi_read_capacity_16(struct ccb_scsiio *csio, uint32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
uint8_t tag_action, uint64_t lba, int reladr, int pmi,
- struct scsi_read_capacity_data_long *rcap_buf,
- uint8_t sense_len, uint32_t timeout)
+ uint8_t *rcap_buf, int rcap_buf_len, uint8_t sense_len,
+ uint32_t timeout)
{
struct scsi_read_capacity_16 *scsi_cmd;
@@ -5835,7 +7845,7 @@ scsi_read_capacity_16(struct ccb_scsiio *csio, uint32_t retries,
/*flags*/CAM_DIR_IN,
tag_action,
/*data_ptr*/(u_int8_t *)rcap_buf,
- /*dxfer_len*/sizeof(*rcap_buf),
+ /*dxfer_len*/rcap_buf_len,
sense_len,
sizeof(*scsi_cmd),
timeout);
@@ -5844,7 +7854,7 @@ scsi_read_capacity_16(struct ccb_scsiio *csio, uint32_t retries,
scsi_cmd->opcode = SERVICE_ACTION_IN;
scsi_cmd->service_action = SRC16_SERVICE_ACTION;
scsi_u64to8b(lba, scsi_cmd->addr);
- scsi_ulto4b(sizeof(*rcap_buf), scsi_cmd->alloc_len);
+ scsi_ulto4b(rcap_buf_len, scsi_cmd->alloc_len);
if (pmi)
reladr |= SRC16_PMI;
if (reladr)
@@ -6152,23 +8162,30 @@ scsi_ata_identify(struct ccb_scsiio *csio, u_int32_t retries,
u_int16_t dxfer_len, u_int8_t sense_len,
u_int32_t timeout)
{
- scsi_ata_pass_16(csio,
- retries,
- cbfcnp,
- /*flags*/CAM_DIR_IN,
- tag_action,
- /*protocol*/AP_PROTO_PIO_IN,
- /*ata_flags*/AP_FLAG_TDIR_FROM_DEV|
- AP_FLAG_BYT_BLOK_BYTES|AP_FLAG_TLEN_SECT_CNT,
- /*features*/0,
- /*sector_count*/dxfer_len,
- /*lba*/0,
- /*command*/ATA_ATA_IDENTIFY,
- /*control*/0,
- data_ptr,
- dxfer_len,
- sense_len,
- timeout);
+ scsi_ata_pass(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ /*protocol*/AP_PROTO_PIO_IN,
+ /*ata_flags*/AP_FLAG_TDIR_FROM_DEV |
+ AP_FLAG_BYT_BLOK_BYTES |
+ AP_FLAG_TLEN_SECT_CNT,
+ /*features*/0,
+ /*sector_count*/dxfer_len,
+ /*lba*/0,
+ /*command*/ATA_ATA_IDENTIFY,
+ /*device*/ 0,
+ /*icc*/ 0,
+ /*auxiliary*/ 0,
+ /*control*/0,
+ data_ptr,
+ dxfer_len,
+ /*cdb_storage*/ NULL,
+ /*cdb_storage_len*/ 0,
+ /*minimum_cmd_size*/ 0,
+ sense_len,
+ timeout);
}
void
@@ -6196,6 +8213,248 @@ scsi_ata_trim(struct ccb_scsiio *csio, u_int32_t retries,
timeout);
}
+int
+scsi_ata_read_log(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint32_t log_address,
+ uint32_t page_number, uint16_t block_count,
+ uint8_t protocol, uint8_t *data_ptr, uint32_t dxfer_len,
+ uint8_t sense_len, uint32_t timeout)
+{
+ uint8_t command, protocol_out;
+ uint16_t count_out;
+ uint64_t lba;
+ int retval;
+
+ retval = 0;
+
+ switch (protocol) {
+ case AP_PROTO_DMA:
+ count_out = block_count;
+ command = ATA_READ_LOG_DMA_EXT;
+ protocol_out = AP_PROTO_DMA;
+ break;
+ case AP_PROTO_PIO_IN:
+ default:
+ count_out = block_count;
+ command = ATA_READ_LOG_EXT;
+ protocol_out = AP_PROTO_PIO_IN;
+ break;
+ }
+
+ lba = (((uint64_t)page_number & 0xff00) << 32) |
+ ((page_number & 0x00ff) << 8) |
+ (log_address & 0xff);
+
+ protocol_out |= AP_EXTEND;
+
+ retval = scsi_ata_pass(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ /*protocol*/ protocol_out,
+ /*ata_flags*/AP_FLAG_TLEN_SECT_CNT |
+ AP_FLAG_BYT_BLOK_BLOCKS |
+ AP_FLAG_TDIR_FROM_DEV,
+ /*feature*/ 0,
+ /*sector_count*/ count_out,
+ /*lba*/ lba,
+ /*command*/ command,
+ /*device*/ 0,
+ /*icc*/ 0,
+ /*auxiliary*/ 0,
+ /*control*/0,
+ data_ptr,
+ dxfer_len,
+ /*cdb_storage*/ NULL,
+ /*cdb_storage_len*/ 0,
+ /*minimum_cmd_size*/ 0,
+ sense_len,
+ timeout);
+
+ return (retval);
+}
+
+/*
+ * Note! This is an unusual CDB building function because it can return
+ * an error in the event that the command in question requires a variable
+ * length CDB, but the caller has not given storage space for one or has not
+ * given enough storage space. If there is enough space available in the
+ * standard SCSI CCB CDB bytes, we'll prefer that over passed in storage.
+ */
+int
+scsi_ata_pass(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint32_t flags, uint8_t tag_action,
+ uint8_t protocol, uint8_t ata_flags, uint16_t features,
+ uint16_t sector_count, uint64_t lba, uint8_t command,
+ uint8_t device, uint8_t icc, uint32_t auxiliary,
+ uint8_t control, u_int8_t *data_ptr, uint32_t dxfer_len,
+ uint8_t *cdb_storage, size_t cdb_storage_len,
+ int minimum_cmd_size, u_int8_t sense_len, u_int32_t timeout)
+{
+ uint32_t cam_flags;
+ uint8_t *cdb_ptr;
+ int cmd_size;
+ int retval;
+ uint8_t cdb_len;
+
+ retval = 0;
+ cam_flags = flags;
+
+ /*
+ * Round the user's request to the nearest command size that is at
+ * least as big as what he requested.
+ */
+ if (minimum_cmd_size <= 12)
+ cmd_size = 12;
+ else if (minimum_cmd_size > 16)
+ cmd_size = 32;
+ else
+ cmd_size = 16;
+
+ /*
+ * If we have parameters that require a 48-bit ATA command, we have to
+ * use the 16 byte ATA PASS-THROUGH command at least.
+ */
+ if (((lba > ATA_MAX_28BIT_LBA)
+ || (sector_count > 255)
+ || (features > 255)
+ || (protocol & AP_EXTEND))
+ && ((cmd_size < 16)
+ || ((protocol & AP_EXTEND) == 0))) {
+ if (cmd_size < 16)
+ cmd_size = 16;
+ protocol |= AP_EXTEND;
+ }
+
+ /*
+ * The icc and auxiliary ATA registers are only supported in the
+ * 32-byte version of the ATA PASS-THROUGH command.
+ */
+ if ((icc != 0)
+ || (auxiliary != 0)) {
+ cmd_size = 32;
+ protocol |= AP_EXTEND;
+ }
+
+
+ if ((cmd_size > sizeof(csio->cdb_io.cdb_bytes))
+ && ((cdb_storage == NULL)
+ || (cdb_storage_len < cmd_size))) {
+ retval = 1;
+ goto bailout;
+ }
+
+ /*
+ * At this point we know we have enough space to store the command
+ * in one place or another. We prefer the built-in array, but used
+ * the passed in storage if necessary.
+ */
+ if (cmd_size <= sizeof(csio->cdb_io.cdb_bytes))
+ cdb_ptr = csio->cdb_io.cdb_bytes;
+ else {
+ cdb_ptr = cdb_storage;
+ cam_flags |= CAM_CDB_POINTER;
+ }
+
+ if (cmd_size <= 12) {
+ struct ata_pass_12 *cdb;
+
+ cdb = (struct ata_pass_12 *)cdb_ptr;
+ cdb_len = sizeof(*cdb);
+ bzero(cdb, cdb_len);
+
+ cdb->opcode = ATA_PASS_12;
+ cdb->protocol = protocol;
+ cdb->flags = ata_flags;
+ cdb->features = features;
+ cdb->sector_count = sector_count;
+ cdb->lba_low = lba & 0xff;
+ cdb->lba_mid = (lba >> 8) & 0xff;
+ cdb->lba_high = (lba >> 16) & 0xff;
+ cdb->device = ((lba >> 24) & 0xf) | ATA_DEV_LBA;
+ cdb->command = command;
+ cdb->control = control;
+ } else if (cmd_size <= 16) {
+ struct ata_pass_16 *cdb;
+
+ cdb = (struct ata_pass_16 *)cdb_ptr;
+ cdb_len = sizeof(*cdb);
+ bzero(cdb, cdb_len);
+
+ cdb->opcode = ATA_PASS_16;
+ cdb->protocol = protocol;
+ cdb->flags = ata_flags;
+ cdb->features = features & 0xff;
+ cdb->sector_count = sector_count & 0xff;
+ cdb->lba_low = lba & 0xff;
+ cdb->lba_mid = (lba >> 8) & 0xff;
+ cdb->lba_high = (lba >> 16) & 0xff;
+ /*
+ * If AP_EXTEND is set, we're sending a 48-bit command.
+ * Otherwise it's a 28-bit command.
+ */
+ if (protocol & AP_EXTEND) {
+ cdb->lba_low_ext = (lba >> 24) & 0xff;
+ cdb->lba_mid_ext = (lba >> 32) & 0xff;
+ cdb->lba_high_ext = (lba >> 40) & 0xff;
+ cdb->features_ext = (features >> 8) & 0xff;
+ cdb->sector_count_ext = (sector_count >> 8) & 0xff;
+ cdb->device = device | ATA_DEV_LBA;
+ } else {
+ cdb->lba_low_ext = (lba >> 24) & 0xf;
+ cdb->device = ((lba >> 24) & 0xf) | ATA_DEV_LBA;
+ }
+ cdb->command = command;
+ cdb->control = control;
+ } else {
+ struct ata_pass_32 *cdb;
+ uint8_t tmp_lba[8];
+
+ cdb = (struct ata_pass_32 *)cdb_ptr;
+ cdb_len = sizeof(*cdb);
+ bzero(cdb, cdb_len);
+ cdb->opcode = VARIABLE_LEN_CDB;
+ cdb->control = control;
+ cdb->length = sizeof(*cdb) - __offsetof(struct ata_pass_32,
+ service_action);
+ scsi_ulto2b(ATA_PASS_32_SA, cdb->service_action);
+ cdb->protocol = protocol;
+ cdb->flags = ata_flags;
+
+ if ((protocol & AP_EXTEND) == 0) {
+ lba &= 0x0fffffff;
+ cdb->device = ((lba >> 24) & 0xf) | ATA_DEV_LBA;
+ features &= 0xff;
+ sector_count &= 0xff;
+ } else {
+ cdb->device = device | ATA_DEV_LBA;
+ }
+ scsi_u64to8b(lba, tmp_lba);
+ bcopy(&tmp_lba[2], cdb->lba, sizeof(cdb->lba));
+ scsi_ulto2b(features, cdb->features);
+ scsi_ulto2b(sector_count, cdb->count);
+ cdb->command = command;
+ cdb->icc = icc;
+ scsi_ulto4b(auxiliary, cdb->auxiliary);
+ }
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ cam_flags,
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ cmd_size,
+ timeout);
+bailout:
+ return (retval);
+}
+
void
scsi_ata_pass_16(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
@@ -6434,6 +8693,229 @@ scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries,
timeout);
}
+void
+scsi_read_attribute(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t service_action,
+ uint32_t element, u_int8_t elem_type, int logical_volume,
+ int partition, u_int32_t first_attribute, int cache,
+ u_int8_t *data_ptr, u_int32_t length, int sense_len,
+ u_int32_t timeout)
+{
+ struct scsi_read_attribute *scsi_cmd;
+
+ scsi_cmd = (struct scsi_read_attribute *)&csio->cdb_io.cdb_bytes;
+ bzero(scsi_cmd, sizeof(*scsi_cmd));
+
+ scsi_cmd->opcode = READ_ATTRIBUTE;
+ scsi_cmd->service_action = service_action;
+ scsi_ulto2b(element, scsi_cmd->element);
+ scsi_cmd->elem_type = elem_type;
+ scsi_cmd->logical_volume = logical_volume;
+ scsi_cmd->partition = partition;
+ scsi_ulto2b(first_attribute, scsi_cmd->first_attribute);
+ scsi_ulto4b(length, scsi_cmd->length);
+ if (cache != 0)
+ scsi_cmd->cache |= SRA_CACHE;
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ /*data_ptr*/data_ptr,
+ /*dxfer_len*/length,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_write_attribute(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, uint32_t element, int logical_volume,
+ int partition, int wtc, u_int8_t *data_ptr,
+ u_int32_t length, int sense_len, u_int32_t timeout)
+{
+ struct scsi_write_attribute *scsi_cmd;
+
+ scsi_cmd = (struct scsi_write_attribute *)&csio->cdb_io.cdb_bytes;
+ bzero(scsi_cmd, sizeof(*scsi_cmd));
+
+ scsi_cmd->opcode = WRITE_ATTRIBUTE;
+ if (wtc != 0)
+ scsi_cmd->byte2 = SWA_WTC;
+ scsi_ulto3b(element, scsi_cmd->element);
+ scsi_cmd->logical_volume = logical_volume;
+ scsi_cmd->partition = partition;
+ scsi_ulto4b(length, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_OUT,
+ tag_action,
+ /*data_ptr*/data_ptr,
+ /*dxfer_len*/length,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_persistent_reserve_in(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int service_action,
+ uint8_t *data_ptr, uint32_t dxfer_len, int sense_len,
+ int timeout)
+{
+ struct scsi_per_res_in *scsi_cmd;
+
+ scsi_cmd = (struct scsi_per_res_in *)&csio->cdb_io.cdb_bytes;
+ bzero(scsi_cmd, sizeof(*scsi_cmd));
+
+ scsi_cmd->opcode = PERSISTENT_RES_IN;
+ scsi_cmd->action = service_action;
+ scsi_ulto2b(dxfer_len, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_persistent_reserve_out(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int service_action,
+ int scope, int res_type, uint8_t *data_ptr,
+ uint32_t dxfer_len, int sense_len, int timeout)
+{
+ struct scsi_per_res_out *scsi_cmd;
+
+ scsi_cmd = (struct scsi_per_res_out *)&csio->cdb_io.cdb_bytes;
+ bzero(scsi_cmd, sizeof(*scsi_cmd));
+
+ scsi_cmd->opcode = PERSISTENT_RES_OUT;
+ scsi_cmd->action = service_action;
+ scsi_cmd->scope_type = scope | res_type;
+ scsi_ulto4b(dxfer_len, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_OUT,
+ tag_action,
+ /*data_ptr*/data_ptr,
+ /*dxfer_len*/dxfer_len,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_security_protocol_in(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint32_t security_protocol,
+ uint32_t security_protocol_specific, int byte4,
+ uint8_t *data_ptr, uint32_t dxfer_len, int sense_len,
+ int timeout)
+{
+ struct scsi_security_protocol_in *scsi_cmd;
+
+ scsi_cmd = (struct scsi_security_protocol_in *)&csio->cdb_io.cdb_bytes;
+ bzero(scsi_cmd, sizeof(*scsi_cmd));
+
+ scsi_cmd->opcode = SECURITY_PROTOCOL_IN;
+
+ scsi_cmd->security_protocol = security_protocol;
+ scsi_ulto2b(security_protocol_specific,
+ scsi_cmd->security_protocol_specific);
+ scsi_cmd->byte4 = byte4;
+ scsi_ulto4b(dxfer_len, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_security_protocol_out(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint32_t security_protocol,
+ uint32_t security_protocol_specific, int byte4,
+ uint8_t *data_ptr, uint32_t dxfer_len, int sense_len,
+ int timeout)
+{
+ struct scsi_security_protocol_out *scsi_cmd;
+
+ scsi_cmd = (struct scsi_security_protocol_out *)&csio->cdb_io.cdb_bytes;
+ bzero(scsi_cmd, sizeof(*scsi_cmd));
+
+ scsi_cmd->opcode = SECURITY_PROTOCOL_OUT;
+
+ scsi_cmd->security_protocol = security_protocol;
+ scsi_ulto2b(security_protocol_specific,
+ scsi_cmd->security_protocol_specific);
+ scsi_cmd->byte4 = byte4;
+ scsi_ulto4b(dxfer_len, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_OUT,
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
+
+void
+scsi_report_supported_opcodes(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int options, int req_opcode,
+ int req_service_action, uint8_t *data_ptr,
+ uint32_t dxfer_len, int sense_len, int timeout)
+{
+ struct scsi_report_supported_opcodes *scsi_cmd;
+
+ scsi_cmd = (struct scsi_report_supported_opcodes *)
+ &csio->cdb_io.cdb_bytes;
+ bzero(scsi_cmd, sizeof(*scsi_cmd));
+
+ scsi_cmd->opcode = MAINTENANCE_IN;
+ scsi_cmd->service_action = REPORT_SUPPORTED_OPERATION_CODES;
+ scsi_cmd->options = options;
+ scsi_cmd->requested_opcode = req_opcode;
+ scsi_ulto2b(req_service_action, scsi_cmd->requested_service_action);
+ scsi_ulto4b(dxfer_len, scsi_cmd->length);
+
+ cam_fill_csio(csio,
+ retries,
+ cbfcnp,
+ /*flags*/CAM_DIR_IN,
+ tag_action,
+ data_ptr,
+ dxfer_len,
+ sense_len,
+ sizeof(*scsi_cmd),
+ timeout);
+}
/*
* Try make as good a match as possible with
@@ -6500,7 +8982,7 @@ scsi_static_inquiry_match(caddr_t inqbuffer, caddr_t table_entry)
* \return 0 on a match, -1 otherwise.
*
* Treat rhs and lhs as arrays of vpd device id descriptors. Walk lhs matching
- * agains each element in rhs until all data are exhausted or we have found
+ * against each element in rhs until all data are exhausted or we have found
* a match.
*/
int
diff --git a/freebsd/sys/cam/scsi/scsi_all.h b/freebsd/sys/cam/scsi/scsi_all.h
index 326ce990..1fd45405 100644
--- a/freebsd/sys/cam/scsi/scsi_all.h
+++ b/freebsd/sys/cam/scsi/scsi_all.h
@@ -103,6 +103,9 @@ typedef enum {
/* The retyable, error action, with table specified error code */
#define SS_RET SS_RETRY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE
+/* Wait for transient error status to change */
+#define SS_WAIT SS_TUR|SSQ_MANY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE
+
/* Fatal error action, with table specified error code */
#define SS_FATAL SS_FAIL|SSQ_PRINT_SENSE
@@ -278,6 +281,7 @@ struct scsi_per_res_in
#define SPRI_RS 0x03
u_int8_t reserved[5];
u_int8_t length[2];
+#define SPRI_MAX_LEN 0xffff
u_int8_t control;
};
@@ -302,13 +306,22 @@ struct scsi_per_res_cap
{
uint8_t length[2];
uint8_t flags1;
-#define SPRI_CRH 0x10
-#define SPRI_SIP_C 0x08
-#define SPRI_ATP_C 0x04
-#define SPRI_PTPL_C 0x01
+#define SPRI_RLR_C 0x80
+#define SPRI_CRH 0x10
+#define SPRI_SIP_C 0x08
+#define SPRI_ATP_C 0x04
+#define SPRI_PTPL_C 0x01
uint8_t flags2;
-#define SPRI_TMV 0x80
-#define SPRI_PTPL_A 0x01
+#define SPRI_TMV 0x80
+#define SPRI_ALLOW_CMD_MASK 0x70
+#define SPRI_ALLOW_CMD_SHIFT 4
+#define SPRI_ALLOW_NA 0x00
+#define SPRI_ALLOW_1 0x10
+#define SPRI_ALLOW_2 0x20
+#define SPRI_ALLOW_3 0x30
+#define SPRI_ALLOW_4 0x40
+#define SPRI_ALLOW_5 0x50
+#define SPRI_PTPL_A 0x01
uint8_t type_mask[2];
#define SPRI_TM_WR_EX_AR 0x8000
#define SPRI_TM_EX_AC_RO 0x4000
@@ -322,7 +335,7 @@ struct scsi_per_res_cap
struct scsi_per_res_in_rsrv_data
{
uint8_t reservation[8];
- uint8_t obsolete1[4];
+ uint8_t scope_addr[4];
uint8_t reserved;
uint8_t scopetype;
#define SPRT_WE 0x01
@@ -331,7 +344,7 @@ struct scsi_per_res_in_rsrv_data
#define SPRT_EARO 0x06
#define SPRT_WEAR 0x07
#define SPRT_EAAR 0x08
- uint8_t obsolete2[2];
+ uint8_t extent_length[2];
};
struct scsi_per_res_in_rsrv
@@ -340,6 +353,26 @@ struct scsi_per_res_in_rsrv
struct scsi_per_res_in_rsrv_data data;
};
+struct scsi_per_res_in_full_desc
+{
+ struct scsi_per_res_key res_key;
+ uint8_t reserved1[4];
+ uint8_t flags;
+#define SPRI_FULL_ALL_TG_PT 0x02
+#define SPRI_FULL_R_HOLDER 0x01
+ uint8_t scopetype;
+ uint8_t reserved2[4];
+ uint8_t rel_trgt_port_id[2];
+ uint8_t additional_length[4];
+ uint8_t transport_id[];
+};
+
+struct scsi_per_res_in_full
+{
+ struct scsi_per_res_in_header header;
+ struct scsi_per_res_in_full_desc desc[];
+};
+
struct scsi_per_res_out
{
u_int8_t opcode;
@@ -352,13 +385,20 @@ struct scsi_per_res_out
#define SPRO_PRE_ABO 0x05
#define SPRO_REG_IGNO 0x06
#define SPRO_REG_MOVE 0x07
+#define SPRO_REPL_LOST_RES 0x08
#define SPRO_ACTION_MASK 0x1f
u_int8_t scope_type;
#define SPR_SCOPE_MASK 0xf0
+#define SPR_SCOPE_SHIFT 4
#define SPR_LU_SCOPE 0x00
+#define SPR_EXTENT_SCOPE 0x10
+#define SPR_ELEMENT_SCOPE 0x20
#define SPR_TYPE_MASK 0x0f
+#define SPR_TYPE_RD_SHARED 0x00
#define SPR_TYPE_WR_EX 0x01
+#define SPR_TYPE_RD_EX 0x02
#define SPR_TYPE_EX_AC 0x03
+#define SPR_TYPE_SHARED 0x04
#define SPR_TYPE_WR_EX_RO 0x05
#define SPR_TYPE_EX_AC_RO 0x06
#define SPR_TYPE_WR_EX_AR 0x07
@@ -372,15 +412,139 @@ struct scsi_per_res_out_parms
{
struct scsi_per_res_key res_key;
u_int8_t serv_act_res_key[8];
- u_int8_t obsolete1[4];
+ u_int8_t scope_spec_address[4];
u_int8_t flags;
#define SPR_SPEC_I_PT 0x08
#define SPR_ALL_TG_PT 0x04
#define SPR_APTPL 0x01
u_int8_t reserved1;
- u_int8_t obsolete2[2];
+ u_int8_t extent_length[2];
+ u_int8_t transport_id_list[];
+};
+
+struct scsi_per_res_out_trans_ids {
+ u_int8_t additional_length[4];
+ u_int8_t transport_ids[];
+};
+
+/*
+ * Used with REGISTER AND MOVE serivce action of the PERSISTENT RESERVE OUT
+ * command.
+ */
+struct scsi_per_res_reg_move
+{
+ struct scsi_per_res_key res_key;
+ u_int8_t serv_act_res_key[8];
+ u_int8_t reserved;
+ u_int8_t flags;
+#define SPR_REG_MOVE_UNREG 0x02
+#define SPR_REG_MOVE_APTPL 0x01
+ u_int8_t rel_trgt_port_id[2];
+ u_int8_t transport_id_length[4];
+ u_int8_t transport_id[];
};
+struct scsi_transportid_header
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_FORMAT_MASK 0xc0
+#define SCSI_TRN_FORMAT_SHIFT 6
+#define SCSI_TRN_PROTO_MASK 0x0f
+};
+
+struct scsi_transportid_fcp
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_FCP_FORMAT_DEFAULT 0x00
+ uint8_t reserved1[7];
+ uint8_t n_port_name[8];
+ uint8_t reserved2[8];
+};
+
+struct scsi_transportid_spi
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_SPI_FORMAT_DEFAULT 0x00
+ uint8_t reserved1;
+ uint8_t scsi_addr[2];
+ uint8_t obsolete[2];
+ uint8_t rel_trgt_port_id[2];
+ uint8_t reserved2[16];
+};
+
+struct scsi_transportid_1394
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_1394_FORMAT_DEFAULT 0x00
+ uint8_t reserved1[7];
+ uint8_t eui64[8];
+ uint8_t reserved2[8];
+};
+
+struct scsi_transportid_rdma
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_RDMA_FORMAT_DEFAULT 0x00
+ uint8_t reserved[7];
+#define SCSI_TRN_RDMA_PORT_LEN 16
+ uint8_t initiator_port_id[SCSI_TRN_RDMA_PORT_LEN];
+};
+
+struct scsi_transportid_iscsi_device
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_ISCSI_FORMAT_DEVICE 0x00
+ uint8_t reserved;
+ uint8_t additional_length[2];
+ uint8_t iscsi_name[];
+};
+
+struct scsi_transportid_iscsi_port
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_ISCSI_FORMAT_PORT 0x40
+ uint8_t reserved;
+ uint8_t additional_length[2];
+ uint8_t iscsi_name[];
+ /*
+ * Followed by a separator and iSCSI initiator session ID
+ */
+};
+
+struct scsi_transportid_sas
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_SAS_FORMAT_DEFAULT 0x00
+ uint8_t reserved1[3];
+ uint8_t sas_address[8];
+ uint8_t reserved2[12];
+};
+
+struct scsi_sop_routing_id_norm {
+ uint8_t bus;
+ uint8_t devfunc;
+#define SCSI_TRN_SOP_BUS_MAX 0xff
+#define SCSI_TRN_SOP_DEV_MAX 0x1f
+#define SCSI_TRN_SOP_DEV_MASK 0xf8
+#define SCSI_TRN_SOP_DEV_SHIFT 3
+#define SCSI_TRN_SOP_FUNC_NORM_MASK 0x07
+#define SCSI_TRN_SOP_FUNC_NORM_MAX 0x07
+};
+
+struct scsi_sop_routing_id_alt {
+ uint8_t bus;
+ uint8_t function;
+#define SCSI_TRN_SOP_FUNC_ALT_MAX 0xff
+};
+
+struct scsi_transportid_sop
+{
+ uint8_t format_protocol;
+#define SCSI_TRN_SOP_FORMAT_DEFAULT 0x00
+ uint8_t reserved1;
+ uint8_t routing_id[2];
+ uint8_t reserved2[20];
+};
struct scsi_log_sense
{
@@ -390,7 +554,7 @@ struct scsi_log_sense
#define SLS_PPC 0x02
u_int8_t page;
#define SLS_PAGE_CODE 0x3F
-#define SLS_ALL_PAGES_PAGE 0x00
+#define SLS_SUPPORTED_PAGES_PAGE 0x00
#define SLS_OVERRUN_PAGE 0x01
#define SLS_ERROR_WRITE_PAGE 0x02
#define SLS_ERROR_READ_PAGE 0x03
@@ -398,14 +562,18 @@ struct scsi_log_sense
#define SLS_ERROR_VERIFY_PAGE 0x05
#define SLS_ERROR_NONMEDIUM_PAGE 0x06
#define SLS_ERROR_LASTN_PAGE 0x07
+#define SLS_LOGICAL_BLOCK_PROVISIONING 0x0c
#define SLS_SELF_TEST_PAGE 0x10
+#define SLS_STAT_AND_PERF 0x19
#define SLS_IE_PAGE 0x2f
#define SLS_PAGE_CTRL_MASK 0xC0
#define SLS_PAGE_CTRL_THRESHOLD 0x00
#define SLS_PAGE_CTRL_CUMULATIVE 0x40
#define SLS_PAGE_CTRL_THRESH_DEFAULT 0x80
#define SLS_PAGE_CTRL_CUMUL_DEFAULT 0xC0
- u_int8_t reserved[2];
+ u_int8_t subpage;
+#define SLS_SUPPORTED_SUBPAGES_SUBPAGE 0xff
+ u_int8_t reserved;
u_int8_t paramptr[2];
u_int8_t length[2];
u_int8_t control;
@@ -431,7 +599,10 @@ struct scsi_log_select
struct scsi_log_header
{
u_int8_t page;
- u_int8_t reserved;
+#define SL_PAGE_CODE 0x3F
+#define SL_SPF 0x40
+#define SL_DS 0x80
+ u_int8_t subpage;
u_int8_t datalen[2];
};
@@ -452,6 +623,45 @@ struct scsi_log_param_header {
u_int8_t param_len;
};
+struct scsi_log_stat_and_perf {
+ struct scsi_log_param_header hdr;
+#define SLP_SAP 0x0001
+ uint8_t read_num[8];
+ uint8_t write_num[8];
+ uint8_t recvieved_lba[8];
+ uint8_t transmitted_lba[8];
+ uint8_t read_int[8];
+ uint8_t write_int[8];
+ uint8_t weighted_num[8];
+ uint8_t weighted_int[8];
+};
+
+struct scsi_log_idle_time {
+ struct scsi_log_param_header hdr;
+#define SLP_IT 0x0002
+ uint8_t idle_int[8];
+};
+
+struct scsi_log_time_interval {
+ struct scsi_log_param_header hdr;
+#define SLP_TI 0x0003
+ uint8_t exponent[4];
+ uint8_t integer[4];
+};
+
+struct scsi_log_fua_stat_and_perf {
+ struct scsi_log_param_header hdr;
+#define SLP_FUA_SAP 0x0004
+ uint8_t fua_read_num[8];
+ uint8_t fua_write_num[8];
+ uint8_t fuanv_read_num[8];
+ uint8_t fuanv_write_num[8];
+ uint8_t fua_read_int[8];
+ uint8_t fua_write_int[8];
+ uint8_t fuanv_read_int[8];
+ uint8_t fuanv_write_int[8];
+};
+
struct scsi_control_page {
u_int8_t page_code;
u_int8_t page_length;
@@ -470,15 +680,37 @@ struct scsi_control_page {
#define SCP_QUEUE_ALG_MASK 0xF0
#define SCP_QUEUE_ALG_RESTRICTED 0x00
#define SCP_QUEUE_ALG_UNRESTRICTED 0x10
+#define SCP_NUAR 0x08 /*No UA on release*/
#define SCP_QUEUE_ERR 0x02 /*Queued I/O aborted for CACs*/
#define SCP_QUEUE_DQUE 0x01 /*Queued I/O disabled*/
u_int8_t eca_and_aen;
#define SCP_EECA 0x80 /*Enable Extended CA*/
+#define SCP_RAC 0x40 /*Report a check*/
+#define SCP_SWP 0x08 /*Software Write Protect*/
#define SCP_RAENP 0x04 /*Ready AEN Permission*/
#define SCP_UAAENP 0x02 /*UA AEN Permission*/
#define SCP_EAENP 0x01 /*Error AEN Permission*/
- u_int8_t reserved;
+ u_int8_t flags4;
+#define SCP_ATO 0x80 /*Application tag owner*/
+#define SCP_TAS 0x40 /*Task aborted status*/
+#define SCP_ATMPE 0x20 /*Application tag mode page*/
+#define SCP_RWWP 0x10 /*Reject write without prot*/
u_int8_t aen_holdoff_period[2];
+ u_int8_t busy_timeout_period[2];
+ u_int8_t extended_selftest_completion_time[2];
+};
+
+struct scsi_control_ext_page {
+ uint8_t page_code;
+ uint8_t subpage_code;
+ uint8_t page_length[2];
+ uint8_t flags;
+#define SCEP_TCMOS 0x04 /* Timestamp Changeable by */
+#define SCEP_SCSIP 0x02 /* SCSI Precedence (clock) */
+#define SCEP_IALUAE 0x01 /* Implicit ALUA Enabled */
+ uint8_t prio;
+ uint8_t max_sense;
+ uint8_t reserve[25];
};
struct scsi_cache_page {
@@ -534,40 +766,6 @@ struct scsi_caching_page {
/*
* XXX KDM move this off to a vendor shim.
*/
-struct copan_power_subpage {
- uint8_t page_code;
-#define PWR_PAGE_CODE 0x00
- uint8_t subpage;
-#define PWR_SUBPAGE_CODE 0x02
- uint8_t page_length[2];
- uint8_t page_version;
-#define PWR_VERSION 0x01
- uint8_t total_luns;
- uint8_t max_active_luns;
-#define PWR_DFLT_MAX_LUNS 0x07
- uint8_t reserved[25];
-};
-
-/*
- * XXX KDM move this off to a vendor shim.
- */
-struct copan_aps_subpage {
- uint8_t page_code;
-#define APS_PAGE_CODE 0x00
- uint8_t subpage;
-#define APS_SUBPAGE_CODE 0x03
- uint8_t page_length[2];
- uint8_t page_version;
-#define APS_VERSION 0x00
- uint8_t lock_active;
-#define APS_LOCK_ACTIVE 0x01
-#define APS_LOCK_INACTIVE 0x00
- uint8_t reserved[26];
-};
-
-/*
- * XXX KDM move this off to a vendor shim.
- */
struct copan_debugconf_subpage {
uint8_t page_code;
#define DBGCNF_PAGE_CODE 0x00
@@ -597,21 +795,63 @@ struct scsi_info_exceptions_page {
u_int8_t report_count[4];
};
+struct scsi_logical_block_provisioning_page_descr {
+ uint8_t flags;
+#define SLBPPD_ENABLED 0x80
+#define SLBPPD_TYPE_MASK 0x38
+#define SLBPPD_ARMING_MASK 0x07
+#define SLBPPD_ARMING_DEC 0x02
+#define SLBPPD_ARMING_INC 0x01
+ uint8_t resource;
+ uint8_t reserved[2];
+ uint8_t count[4];
+};
+
+struct scsi_logical_block_provisioning_page {
+ uint8_t page_code;
+ uint8_t subpage_code;
+ uint8_t page_length[2];
+ uint8_t flags;
+#define SLBPP_SITUA 0x01
+ uint8_t reserved[11];
+ struct scsi_logical_block_provisioning_page_descr descr[0];
+};
+
+/*
+ * SCSI protocol identifier values, current as of SPC4r36l.
+ */
+#define SCSI_PROTO_FC 0x00 /* Fibre Channel */
+#define SCSI_PROTO_SPI 0x01 /* Parallel SCSI */
+#define SCSI_PROTO_SSA 0x02 /* Serial Storage Arch. */
+#define SCSI_PROTO_1394 0x03 /* IEEE 1394 (Firewire) */
+#define SCSI_PROTO_RDMA 0x04 /* SCSI RDMA Protocol */
+#define SCSI_PROTO_ISCSI 0x05 /* Internet SCSI */
+#define SCSI_PROTO_iSCSI 0x05 /* Internet SCSI */
+#define SCSI_PROTO_SAS 0x06 /* SAS Serial SCSI Protocol */
+#define SCSI_PROTO_ADT 0x07 /* Automation/Drive Int. Trans. Prot.*/
+#define SCSI_PROTO_ADITP 0x07 /* Automation/Drive Int. Trans. Prot.*/
+#define SCSI_PROTO_ATA 0x08 /* AT Attachment Interface */
+#define SCSI_PROTO_UAS 0x09 /* USB Atached SCSI */
+#define SCSI_PROTO_SOP 0x0a /* SCSI over PCI Express */
+#define SCSI_PROTO_NONE 0x0f /* No specific protocol */
+
struct scsi_proto_specific_page {
u_int8_t page_code;
#define SPSP_PAGE_SAVABLE 0x80 /* Page is savable */
u_int8_t page_length;
u_int8_t protocol;
-#define SPSP_PROTO_FC 0x00
-#define SPSP_PROTO_SPI 0x01
-#define SPSP_PROTO_SSA 0x02
-#define SPSP_PROTO_1394 0x03
-#define SPSP_PROTO_RDMA 0x04
-#define SPSP_PROTO_ISCSI 0x05
-#define SPSP_PROTO_SAS 0x06
-#define SPSP_PROTO_ADT 0x07
-#define SPSP_PROTO_ATA 0x08
-#define SPSP_PROTO_NONE 0x0f
+#define SPSP_PROTO_FC SCSI_PROTO_FC
+#define SPSP_PROTO_SPI SCSI_PROTO_SPI
+#define SPSP_PROTO_SSA SCSI_PROTO_SSA
+#define SPSP_PROTO_1394 SCSI_PROTO_1394
+#define SPSP_PROTO_RDMA SCSI_PROTO_RDMA
+#define SPSP_PROTO_ISCSI SCSI_PROTO_ISCSI
+#define SPSP_PROTO_SAS SCSI_PROTO_SAS
+#define SPSP_PROTO_ADT SCSI_PROTO_ADITP
+#define SPSP_PROTO_ATA SCSI_PROTO_ATA
+#define SPSP_PROTO_UAS SCSI_PROTO_UAS
+#define SPSP_PROTO_SOP SCSI_PROTO_SOP
+#define SPSP_PROTO_NONE SCSI_PROTO_NONE
};
struct scsi_reserve
@@ -746,18 +986,32 @@ struct scsi_read_buffer
{
u_int8_t opcode;
u_int8_t byte2;
-#define RWB_MODE 0x07
+#define RWB_MODE 0x1F
#define RWB_MODE_HDR_DATA 0x00
#define RWB_MODE_VENDOR 0x01
#define RWB_MODE_DATA 0x02
+#define RWB_MODE_DESCR 0x03
#define RWB_MODE_DOWNLOAD 0x04
#define RWB_MODE_DOWNLOAD_SAVE 0x05
+#define RWB_MODE_ECHO 0x0A
+#define RWB_MODE_ECHO_DESCR 0x0B
+#define RWB_MODE_ERROR_HISTORY 0x1C
u_int8_t buffer_id;
u_int8_t offset[3];
u_int8_t length[3];
u_int8_t control;
};
+struct scsi_read_buffer_16
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t offset[8];
+ uint8_t length[4];
+ uint8_t buffer_id;
+ uint8_t control;
+};
+
struct scsi_write_buffer
{
u_int8_t opcode;
@@ -768,6 +1022,216 @@ struct scsi_write_buffer
u_int8_t control;
};
+struct scsi_read_attribute
+{
+ u_int8_t opcode;
+ u_int8_t service_action;
+#define SRA_SA_ATTR_VALUES 0x00
+#define SRA_SA_ATTR_LIST 0x01
+#define SRA_SA_LOG_VOL_LIST 0x02
+#define SRA_SA_PART_LIST 0x03
+#define SRA_SA_RESTRICTED 0x04
+#define SRA_SA_SUPPORTED_ATTRS 0x05
+#define SRA_SA_MASK 0x1f
+ u_int8_t element[2];
+ u_int8_t elem_type;
+ u_int8_t logical_volume;
+ u_int8_t reserved1;
+ u_int8_t partition;
+ u_int8_t first_attribute[2];
+ u_int8_t length[4];
+ u_int8_t cache;
+#define SRA_CACHE 0x01
+ u_int8_t control;
+};
+
+struct scsi_write_attribute
+{
+ u_int8_t opcode;
+ u_int8_t byte2;
+#define SWA_WTC 0x01
+ u_int8_t element[3];
+ u_int8_t logical_volume;
+ u_int8_t reserved1;
+ u_int8_t partition;
+ u_int8_t reserved2[2];
+ u_int8_t length[4];
+ u_int8_t reserved3;
+ u_int8_t control;
+};
+
+
+struct scsi_read_attribute_values
+{
+ u_int8_t length[4];
+ u_int8_t attribute_0[0];
+};
+
+struct scsi_mam_attribute_header
+{
+ u_int8_t id[2];
+ /*
+ * Attributes obtained from SPC-4r36g (section 7.4.2.2) and
+ * SSC-4r03 (section 4.2.21).
+ */
+#define SMA_ATTR_ID_DEVICE_MIN 0x0000
+
+#define SMA_ATTR_REM_CAP_PARTITION 0x0000
+#define SMA_ATTR_MAX_CAP_PARTITION 0x0001
+#define SMA_ATTR_TAPEALERT_FLAGS 0x0002
+#define SMA_ATTR_LOAD_COUNT 0x0003
+#define SMA_ATTR_MAM_SPACE_REMAINING 0x0004
+
+#define SMA_ATTR_DEV_ASSIGNING_ORG 0x0005
+#define SMA_ATTR_FORMAT_DENSITY_CODE 0x0006
+#define SMA_ATTR_INITIALIZATION_COUNT 0x0007
+#define SMA_ATTR_VOLUME_ID 0x0008
+#define SMA_ATTR_VOLUME_CHANGE_REF 0x0009
+
+#define SMA_ATTR_DEV_SERIAL_LAST_LOAD 0x020a
+#define SMA_ATTR_DEV_SERIAL_LAST_LOAD_1 0x020b
+#define SMA_ATTR_DEV_SERIAL_LAST_LOAD_2 0x020c
+#define SMA_ATTR_DEV_SERIAL_LAST_LOAD_3 0x020d
+
+#define SMA_ATTR_TOTAL_MB_WRITTEN_LT 0x0220
+#define SMA_ATTR_TOTAL_MB_READ_LT 0x0221
+#define SMA_ATTR_TOTAL_MB_WRITTEN_CUR 0x0222
+#define SMA_ATTR_TOTAL_MB_READ_CUR 0x0223
+#define SMA_ATTR_FIRST_ENC_BLOCK 0x0224
+#define SMA_ATTR_NEXT_UNENC_BLOCK 0x0225
+
+#define SMA_ATTR_MEDIUM_USAGE_HIST 0x0340
+#define SMA_ATTR_PART_USAGE_HIST 0x0341
+
+#define SMA_ATTR_ID_DEVICE_MAX 0x03ff
+
+#define SMA_ATTR_ID_MEDIUM_MIN 0x0400
+
+#define SMA_ATTR_MED_MANUF 0x0400
+#define SMA_ATTR_MED_SERIAL 0x0401
+
+#define SMA_ATTR_MED_LENGTH 0x0402
+#define SMA_ATTR_MED_WIDTH 0x0403
+#define SMA_ATTR_MED_ASSIGNING_ORG 0x0404
+#define SMA_ATTR_MED_DENSITY_CODE 0x0405
+
+#define SMA_ATTR_MED_MANUF_DATE 0x0406
+#define SMA_ATTR_MAM_CAPACITY 0x0407
+#define SMA_ATTR_MED_TYPE 0x0408
+#define SMA_ATTR_MED_TYPE_INFO 0x0409
+#define SMA_ATTR_MED_SERIAL_NUM 0x040a
+
+#define SMA_ATTR_ID_MEDIUM_MAX 0x07ff
+
+#define SMA_ATTR_ID_HOST_MIN 0x0800
+
+#define SMA_ATTR_APP_VENDOR 0x0800
+#define SMA_ATTR_APP_NAME 0x0801
+#define SMA_ATTR_APP_VERSION 0x0802
+#define SMA_ATTR_USER_MED_TEXT_LABEL 0x0803
+#define SMA_ATTR_LAST_WRITTEN_TIME 0x0804
+#define SMA_ATTR_TEXT_LOCAL_ID 0x0805
+#define SMA_ATTR_BARCODE 0x0806
+#define SMA_ATTR_HOST_OWNER_NAME 0x0807
+#define SMA_ATTR_MEDIA_POOL 0x0808
+#define SMA_ATTR_PART_USER_LABEL 0x0809
+#define SMA_ATTR_LOAD_UNLOAD_AT_PART 0x080a
+#define SMA_ATTR_APP_FORMAT_VERSION 0x080b
+#define SMA_ATTR_VOL_COHERENCY_INFO 0x080c
+
+#define SMA_ATTR_ID_HOST_MAX 0x0bff
+
+#define SMA_ATTR_VENDOR_DEVICE_MIN 0x0c00
+#define SMA_ATTR_VENDOR_DEVICE_MAX 0x0fff
+#define SMA_ATTR_VENDOR_MEDIUM_MIN 0x1000
+#define SMA_ATTR_VENDOR_MEDIUM_MAX 0x13ff
+#define SMA_ATTR_VENDOR_HOST_MIN 0x1400
+#define SMA_ATTR_VENDOR_HOST_MAX 0x17ff
+ u_int8_t byte2;
+#define SMA_FORMAT_BINARY 0x00
+#define SMA_FORMAT_ASCII 0x01
+#define SMA_FORMAT_TEXT 0x02
+#define SMA_FORMAT_MASK 0x03
+#define SMA_READ_ONLY 0x80
+ u_int8_t length[2];
+ u_int8_t attribute[0];
+};
+
+struct scsi_attrib_list_header {
+ u_int8_t length[4];
+ u_int8_t first_attr_0[0];
+};
+
+struct scsi_attrib_lv_list {
+ u_int8_t length[2];
+ u_int8_t first_lv_number;
+ u_int8_t num_logical_volumes;
+};
+
+struct scsi_attrib_vendser {
+ uint8_t vendor[8];
+ uint8_t serial_num[32];
+};
+
+/*
+ * These values are used to decode the Volume Coherency Information
+ * Attribute (0x080c) for LTFS-format coherency information.
+ * Although the Application Client Specific lengths are different for
+ * Version 0 and Version 1, the data is in fact the same. The length
+ * difference was due to a code bug.
+ */
+#define SCSI_LTFS_VER0_LEN 42
+#define SCSI_LTFS_VER1_LEN 43
+#define SCSI_LTFS_UUID_LEN 36
+#define SCSI_LTFS_STR_NAME "LTFS"
+#define SCSI_LTFS_STR_LEN 4
+
+typedef enum {
+ SCSI_ATTR_FLAG_NONE = 0x00,
+ SCSI_ATTR_FLAG_HEX = 0x01,
+ SCSI_ATTR_FLAG_FP = 0x02,
+ SCSI_ATTR_FLAG_DIV_10 = 0x04,
+ SCSI_ATTR_FLAG_FP_1DIGIT = 0x08
+} scsi_attrib_flags;
+
+typedef enum {
+ SCSI_ATTR_OUTPUT_NONE = 0x00,
+ SCSI_ATTR_OUTPUT_TEXT_MASK = 0x03,
+ SCSI_ATTR_OUTPUT_TEXT_RAW = 0x00,
+ SCSI_ATTR_OUTPUT_TEXT_ESC = 0x01,
+ SCSI_ATTR_OUTPUT_TEXT_RSV1 = 0x02,
+ SCSI_ATTR_OUTPUT_TEXT_RSV2 = 0x03,
+ SCSI_ATTR_OUTPUT_NONASCII_MASK = 0x0c,
+ SCSI_ATTR_OUTPUT_NONASCII_TRIM = 0x00,
+ SCSI_ATTR_OUTPUT_NONASCII_ESC = 0x04,
+ SCSI_ATTR_OUTPUT_NONASCII_RAW = 0x08,
+ SCSI_ATTR_OUTPUT_NONASCII_RSV1 = 0x0c,
+ SCSI_ATTR_OUTPUT_FIELD_MASK = 0xf0,
+ SCSI_ATTR_OUTPUT_FIELD_ALL = 0xf0,
+ SCSI_ATTR_OUTPUT_FIELD_NONE = 0x00,
+ SCSI_ATTR_OUTPUT_FIELD_DESC = 0x10,
+ SCSI_ATTR_OUTPUT_FIELD_NUM = 0x20,
+ SCSI_ATTR_OUTPUT_FIELD_SIZE = 0x40,
+ SCSI_ATTR_OUTPUT_FIELD_RW = 0x80
+} scsi_attrib_output_flags;
+
+struct sbuf;
+
+struct scsi_attrib_table_entry
+{
+ u_int32_t id;
+ u_int32_t flags;
+ const char *desc;
+ const char *suffix;
+ int (*to_str)(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len);
+ int (*parse_str)(char *str, struct scsi_mam_attribute_header *hdr,
+ uint32_t alloc_len, uint32_t flags, char *error_str,
+ int error_str_len);
+};
+
struct scsi_rw_6
{
u_int8_t opcode;
@@ -819,6 +1283,17 @@ struct scsi_rw_16
u_int8_t control;
};
+struct scsi_write_atomic_16
+{
+ uint8_t opcode;
+ uint8_t byte2;
+ uint8_t addr[8];
+ uint8_t boundary[2];
+ uint8_t length[2];
+ uint8_t group;
+ uint8_t control;
+};
+
struct scsi_write_same_10
{
uint8_t opcode;
@@ -837,6 +1312,7 @@ struct scsi_write_same_16
{
uint8_t opcode;
uint8_t byte2;
+#define SWS_NDOB 0x01
uint8_t addr[8];
uint8_t length[4];
uint8_t group;
@@ -854,6 +1330,20 @@ struct scsi_unmap
uint8_t control;
};
+struct scsi_unmap_header
+{
+ uint8_t length[2];
+ uint8_t desc_length[2];
+ uint8_t reserved[4];
+};
+
+struct scsi_unmap_desc
+{
+ uint8_t lba[8];
+ uint8_t length[4];
+ uint8_t reserved[4];
+};
+
struct scsi_write_verify_10
{
uint8_t opcode;
@@ -924,6 +1414,7 @@ struct ata_pass_12 {
#define AP_PROTO_UDMA_OUT (0x0b << 1)
#define AP_PROTO_FPDMA (0x0c << 1)
#define AP_PROTO_RESP_INFO (0x0f << 1)
+#define AP_PROTO_MASK 0x1e
#define AP_MULTI 0xe0
u_int8_t flags;
#define AP_T_LEN 0x03
@@ -954,6 +1445,488 @@ struct scsi_maintenance_in
uint8_t control;
};
+struct scsi_report_supported_opcodes
+{
+ uint8_t opcode;
+ uint8_t service_action;
+ uint8_t options;
+#define RSO_RCTD 0x80
+#define RSO_OPTIONS_MASK 0x07
+#define RSO_OPTIONS_ALL 0x00
+#define RSO_OPTIONS_OC 0x01
+#define RSO_OPTIONS_OC_SA 0x02
+ uint8_t requested_opcode;
+ uint8_t requested_service_action[2];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_report_supported_opcodes_timeout
+{
+ uint8_t length[2];
+ uint8_t reserved;
+ uint8_t cmd_specific;
+ uint8_t nominal_time[4];
+ uint8_t recommended_time[4];
+};
+
+struct scsi_report_supported_opcodes_descr
+{
+ uint8_t opcode;
+ uint8_t reserved;
+ uint8_t service_action[2];
+ uint8_t reserved2;
+ uint8_t flags;
+#define RSO_SERVACTV 0x01
+#define RSO_CTDP 0x02
+ uint8_t cdb_length[2];
+ struct scsi_report_supported_opcodes_timeout timeout[0];
+};
+
+struct scsi_report_supported_opcodes_all
+{
+ uint8_t length[4];
+ struct scsi_report_supported_opcodes_descr descr[0];
+};
+
+struct scsi_report_supported_opcodes_one
+{
+ uint8_t reserved;
+ uint8_t support;
+#define RSO_ONE_CTDP 0x80
+#define RSO_ONE_SUP_MASK 0x07
+#define RSO_ONE_SUP_UNAVAIL 0x00
+#define RSO_ONE_SUP_NOT_SUP 0x01
+#define RSO_ONE_SUP_AVAIL 0x03
+#define RSO_ONE_SUP_VENDOR 0x05
+ uint8_t cdb_length[2];
+ uint8_t cdb_usage[];
+};
+
+struct scsi_report_supported_tmf
+{
+ uint8_t opcode;
+ uint8_t service_action;
+ uint8_t reserved[4];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_report_supported_tmf_data
+{
+ uint8_t byte1;
+#define RST_WAKES 0x01
+#define RST_TRS 0x02
+#define RST_QTS 0x04
+#define RST_LURS 0x08
+#define RST_CTSS 0x10
+#define RST_CACAS 0x20
+#define RST_ATSS 0x40
+#define RST_ATS 0x80
+ uint8_t byte2;
+#define RST_ITNRS 0x01
+#define RST_QTSS 0x02
+#define RST_QAES 0x04
+ uint8_t reserved[2];
+};
+
+struct scsi_report_timestamp
+{
+ uint8_t opcode;
+ uint8_t service_action;
+ uint8_t reserved[4];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_report_timestamp_data
+{
+ uint8_t length[2];
+ uint8_t origin;
+#define RTS_ORIG_MASK 0x00
+#define RTS_ORIG_ZERO 0x00
+#define RTS_ORIG_SET 0x02
+#define RTS_ORIG_OUTSIDE 0x03
+ uint8_t reserved;
+ uint8_t timestamp[6];
+ uint8_t reserve2[2];
+};
+
+struct scsi_receive_copy_status_lid1
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define RCS_RCS_LID1 0x00
+ uint8_t list_identifier;
+ uint8_t reserved[7];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_receive_copy_status_lid1_data
+{
+ uint8_t available_data[4];
+ uint8_t copy_command_status;
+#define RCS_CCS_INPROG 0x00
+#define RCS_CCS_COMPLETED 0x01
+#define RCS_CCS_ERROR 0x02
+ uint8_t segments_processed[2];
+ uint8_t transfer_count_units;
+#define RCS_TC_BYTES 0x00
+#define RCS_TC_KBYTES 0x01
+#define RCS_TC_MBYTES 0x02
+#define RCS_TC_GBYTES 0x03
+#define RCS_TC_TBYTES 0x04
+#define RCS_TC_PBYTES 0x05
+#define RCS_TC_EBYTES 0x06
+#define RCS_TC_LBAS 0xf1
+ uint8_t transfer_count[4];
+};
+
+struct scsi_receive_copy_failure_details
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define RCS_RCFD 0x04
+ uint8_t list_identifier;
+ uint8_t reserved[7];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_receive_copy_failure_details_data
+{
+ uint8_t available_data[4];
+ uint8_t reserved[52];
+ uint8_t copy_command_status;
+ uint8_t reserved2;
+ uint8_t sense_data_length[2];
+ uint8_t sense_data[];
+};
+
+struct scsi_receive_copy_status_lid4
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define RCS_RCS_LID4 0x05
+ uint8_t list_identifier[4];
+ uint8_t reserved[4];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_receive_copy_status_lid4_data
+{
+ uint8_t available_data[4];
+ uint8_t response_to_service_action;
+ uint8_t copy_command_status;
+#define RCS_CCS_COMPLETED_PROD 0x03
+#define RCS_CCS_COMPLETED_RESID 0x04
+#define RCS_CCS_INPROG_FGBG 0x10
+#define RCS_CCS_INPROG_FG 0x11
+#define RCS_CCS_INPROG_BG 0x12
+#define RCS_CCS_ABORTED 0x60
+ uint8_t operation_counter[2];
+ uint8_t estimated_status_update_delay[4];
+ uint8_t extended_copy_completion_status;
+ uint8_t length_of_the_sense_data_field;
+ uint8_t sense_data_length;
+ uint8_t transfer_count_units;
+ uint8_t transfer_count[8];
+ uint8_t segments_processed[2];
+ uint8_t reserved[6];
+ uint8_t sense_data[];
+};
+
+struct scsi_receive_copy_operating_parameters
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define RCS_RCOP 0x03
+ uint8_t reserved[8];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_receive_copy_operating_parameters_data
+{
+ uint8_t length[4];
+ uint8_t snlid;
+#define RCOP_SNLID 0x01
+ uint8_t reserved[3];
+ uint8_t maximum_cscd_descriptor_count[2];
+ uint8_t maximum_segment_descriptor_count[2];
+ uint8_t maximum_descriptor_list_length[4];
+ uint8_t maximum_segment_length[4];
+ uint8_t maximum_inline_data_length[4];
+ uint8_t held_data_limit[4];
+ uint8_t maximum_stream_device_transfer_size[4];
+ uint8_t reserved2[2];
+ uint8_t total_concurrent_copies[2];
+ uint8_t maximum_concurrent_copies;
+ uint8_t data_segment_granularity;
+ uint8_t inline_data_granularity;
+ uint8_t held_data_granularity;
+ uint8_t reserved3[3];
+ uint8_t implemented_descriptor_list_length;
+ uint8_t list_of_implemented_descriptor_type_codes[0];
+};
+
+struct scsi_extended_copy
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define EC_EC_LID1 0x00
+#define EC_EC_LID4 0x01
+ uint8_t reserved[8];
+ uint8_t length[4];
+ uint8_t reserved1;
+ uint8_t control;
+};
+
+struct scsi_ec_cscd_dtsp
+{
+ uint8_t flags;
+#define EC_CSCD_FIXED 0x01
+#define EC_CSCD_PAD 0x04
+ uint8_t block_length[3];
+};
+
+struct scsi_ec_cscd
+{
+ uint8_t type_code;
+#define EC_CSCD_EXT 0xff
+ uint8_t luidt_pdt;
+#define EC_NUL 0x20
+#define EC_LUIDT_MASK 0xc0
+#define EC_LUIDT_LUN 0x00
+#define EC_LUIDT_PROXY_TOKEN 0x40
+ uint8_t relative_initiator_port[2];
+ uint8_t cscd_params[24];
+ struct scsi_ec_cscd_dtsp dtsp;
+};
+
+struct scsi_ec_cscd_id
+{
+ uint8_t type_code;
+#define EC_CSCD_ID 0xe4
+ uint8_t luidt_pdt;
+ uint8_t relative_initiator_port[2];
+ uint8_t codeset;
+ uint8_t id_type;
+ uint8_t reserved;
+ uint8_t length;
+ uint8_t designator[20];
+ struct scsi_ec_cscd_dtsp dtsp;
+};
+
+struct scsi_ec_segment
+{
+ uint8_t type_code;
+ uint8_t flags;
+#define EC_SEG_DC 0x02
+#define EC_SEG_CAT 0x01
+ uint8_t descr_length[2];
+ uint8_t params[];
+};
+
+struct scsi_ec_segment_b2b
+{
+ uint8_t type_code;
+#define EC_SEG_B2B 0x02
+ uint8_t flags;
+ uint8_t descr_length[2];
+ uint8_t src_cscd[2];
+ uint8_t dst_cscd[2];
+ uint8_t reserved[2];
+ uint8_t number_of_blocks[2];
+ uint8_t src_lba[8];
+ uint8_t dst_lba[8];
+};
+
+struct scsi_ec_segment_verify
+{
+ uint8_t type_code;
+#define EC_SEG_VERIFY 0x07
+ uint8_t reserved;
+ uint8_t descr_length[2];
+ uint8_t src_cscd[2];
+ uint8_t reserved2[2];
+ uint8_t tur;
+ uint8_t reserved3[3];
+};
+
+struct scsi_ec_segment_register_key
+{
+ uint8_t type_code;
+#define EC_SEG_REGISTER_KEY 0x14
+ uint8_t reserved;
+ uint8_t descr_length[2];
+ uint8_t reserved2[2];
+ uint8_t dst_cscd[2];
+ uint8_t res_key[8];
+ uint8_t sa_res_key[8];
+ uint8_t reserved3[4];
+};
+
+struct scsi_extended_copy_lid1_data
+{
+ uint8_t list_identifier;
+ uint8_t flags;
+#define EC_PRIORITY 0x07
+#define EC_LIST_ID_USAGE_MASK 0x18
+#define EC_LIST_ID_USAGE_FULL 0x08
+#define EC_LIST_ID_USAGE_NOHOLD 0x10
+#define EC_LIST_ID_USAGE_NONE 0x18
+#define EC_STR 0x20
+ uint8_t cscd_list_length[2];
+ uint8_t reserved[4];
+ uint8_t segment_list_length[4];
+ uint8_t inline_data_length[4];
+ uint8_t data[];
+};
+
+struct scsi_extended_copy_lid4_data
+{
+ uint8_t list_format;
+#define EC_LIST_FORMAT 0x01
+ uint8_t flags;
+ uint8_t header_cscd_list_length[2];
+ uint8_t reserved[11];
+ uint8_t flags2;
+#define EC_IMMED 0x01
+#define EC_G_SENSE 0x02
+ uint8_t header_cscd_type_code;
+ uint8_t reserved2[3];
+ uint8_t list_identifier[4];
+ uint8_t reserved3[18];
+ uint8_t cscd_list_length[2];
+ uint8_t segment_list_length[2];
+ uint8_t inline_data_length[2];
+ uint8_t data[];
+};
+
+struct scsi_copy_operation_abort
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define EC_COA 0x1c
+ uint8_t list_identifier[4];
+ uint8_t reserved[9];
+ uint8_t control;
+};
+
+struct scsi_populate_token
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define EC_PT 0x10
+ uint8_t reserved[4];
+ uint8_t list_identifier[4];
+ uint8_t length[4];
+ uint8_t group_number;
+ uint8_t control;
+};
+
+struct scsi_range_desc
+{
+ uint8_t lba[8];
+ uint8_t length[4];
+ uint8_t reserved[4];
+};
+
+struct scsi_populate_token_data
+{
+ uint8_t length[2];
+ uint8_t flags;
+#define EC_PT_IMMED 0x01
+#define EC_PT_RTV 0x02
+ uint8_t reserved;
+ uint8_t inactivity_timeout[4];
+ uint8_t rod_type[4];
+ uint8_t reserved2[2];
+ uint8_t range_descriptor_length[2];
+ struct scsi_range_desc desc[];
+};
+
+struct scsi_write_using_token
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define EC_WUT 0x11
+ uint8_t reserved[4];
+ uint8_t list_identifier[4];
+ uint8_t length[4];
+ uint8_t group_number;
+ uint8_t control;
+};
+
+struct scsi_write_using_token_data
+{
+ uint8_t length[2];
+ uint8_t flags;
+#define EC_WUT_IMMED 0x01
+#define EC_WUT_DEL_TKN 0x02
+ uint8_t reserved[5];
+ uint8_t offset_into_rod[8];
+ uint8_t rod_token[512];
+ uint8_t reserved2[6];
+ uint8_t range_descriptor_length[2];
+ struct scsi_range_desc desc[];
+};
+
+struct scsi_receive_rod_token_information
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define RCS_RRTI 0x07
+ uint8_t list_identifier[4];
+ uint8_t reserved[4];
+ uint8_t length[4];
+ uint8_t reserved2;
+ uint8_t control;
+};
+
+struct scsi_token
+{
+ uint8_t type[4];
+#define ROD_TYPE_INTERNAL 0x00000000
+#define ROD_TYPE_AUR 0x00010000
+#define ROD_TYPE_PIT_DEF 0x00800000
+#define ROD_TYPE_PIT_VULN 0x00800001
+#define ROD_TYPE_PIT_PERS 0x00800002
+#define ROD_TYPE_PIT_ANY 0x0080FFFF
+#define ROD_TYPE_BLOCK_ZERO 0xFFFF0001
+ uint8_t reserved[2];
+ uint8_t length[2];
+ uint8_t body[0];
+};
+
+struct scsi_report_all_rod_tokens
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define RCS_RART 0x08
+ uint8_t reserved[8];
+ uint8_t length[4];
+ uint8_t reserved2;
+ uint8_t control;
+};
+
+struct scsi_report_all_rod_tokens_data
+{
+ uint8_t available_data[4];
+ uint8_t reserved[4];
+ uint8_t rod_management_token_list[];
+};
+
struct ata_pass_16 {
u_int8_t opcode;
u_int8_t protocol;
@@ -983,6 +1956,27 @@ struct ata_pass_16 {
u_int8_t control;
};
+struct ata_pass_32 {
+ uint8_t opcode;
+ uint8_t control;
+ uint8_t reserved1[5];
+ uint8_t length;
+ uint8_t service_action[2];
+#define ATA_PASS_32_SA 0x1ff0
+ uint8_t protocol;
+ uint8_t flags;
+ uint8_t reserved2[2];
+ uint8_t lba[6];
+ uint8_t features[2];
+ uint8_t count[2];
+ uint8_t device;
+ uint8_t command;
+ uint8_t reserved3;
+ uint8_t icc;
+ uint8_t auxiliary[4];
+};
+
+
#define SC_SCSI_1 0x01
#define SC_SCSI_2 0x03
@@ -1025,21 +2019,34 @@ struct ata_pass_16 {
#define MODE_SENSE_10 0x5A
#define PERSISTENT_RES_IN 0x5E
#define PERSISTENT_RES_OUT 0x5F
+#define EXTENDED_CDB 0x7E
+#define VARIABLE_LEN_CDB 0x7F
+#define EXTENDED_COPY 0x83
+#define RECEIVE_COPY_STATUS 0x84
#define ATA_PASS_16 0x85
#define READ_16 0x88
+#define COMPARE_AND_WRITE 0x89
#define WRITE_16 0x8A
+#define READ_ATTRIBUTE 0x8C
+#define WRITE_ATTRIBUTE 0x8D
#define WRITE_VERIFY_16 0x8E
+#define VERIFY_16 0x8F
#define SYNCHRONIZE_CACHE_16 0x91
#define WRITE_SAME_16 0x93
+#define READ_BUFFER_16 0x9B
+#define WRITE_ATOMIC_16 0x9C
#define SERVICE_ACTION_IN 0x9E
#define REPORT_LUNS 0xA0
#define ATA_PASS_12 0xA1
+#define SECURITY_PROTOCOL_IN 0xA2
#define MAINTENANCE_IN 0xA3
#define MAINTENANCE_OUT 0xA4
#define MOVE_MEDIUM 0xA5
#define READ_12 0xA8
#define WRITE_12 0xAA
#define WRITE_VERIFY_12 0xAE
+#define VERIFY_12 0xAF
+#define SECURITY_PROTOCOL_OUT 0xB5
#define READ_ELEMENT_STATUS 0xB8
#define READ_CD 0xBE
@@ -1081,6 +2088,7 @@ struct ata_pass_16 {
#define T_OCRW 0x0f
#define T_OSD 0x11
#define T_ADC 0x12
+#define T_ZBC_HM 0x14
#define T_NODEVICE 0x1f
#define T_ANY 0xff /* Used in Quirk table matches */
@@ -1135,7 +2143,9 @@ struct scsi_inquiry_data
#define SID_QUAL_IS_VENDOR_UNIQUE(inq_data) ((SID_QUAL(inq_data) & 0x04) != 0)
u_int8_t dev_qual2;
#define SID_QUAL2 0x7F
-#define SID_IS_REMOVABLE(inq_data) (((inq_data)->dev_qual2 & 0x80) != 0)
+#define SID_LU_CONG 0x40
+#define SID_RMB 0x80
+#define SID_IS_REMOVABLE(inq_data) (((inq_data)->dev_qual2 & SID_RMB) != 0)
u_int8_t version;
#define SID_ANSI_REV(inq_data) ((inq_data)->version & 0x07)
#define SCSI_REV_0 0
@@ -1283,15 +2293,9 @@ struct scsi_vpd_device_id
struct scsi_vpd_id_descriptor
{
u_int8_t proto_codeset;
-#define SCSI_PROTO_FC 0x00
-#define SCSI_PROTO_SPI 0x01
-#define SCSI_PROTO_SSA 0x02
-#define SCSI_PROTO_1394 0x03
-#define SCSI_PROTO_RDMA 0x04
-#define SCSI_PROTO_iSCSI 0x05
-#define SCSI_PROTO_SAS 0x06
-#define SCSI_PROTO_ADT 0x07
-#define SCSI_PROTO_ATA 0x08
+ /*
+ * See the SCSI_PROTO definitions above for the protocols.
+ */
#define SVPD_ID_PROTO_SHIFT 4
#define SVPD_ID_CODESET_BINARY 0x01
#define SVPD_ID_CODESET_ASCII 0x02
@@ -1312,6 +2316,8 @@ struct scsi_vpd_id_descriptor
#define SVPD_ID_TYPE_LUNGRP 0x06
#define SVPD_ID_TYPE_MD5_LUN_ID 0x07
#define SVPD_ID_TYPE_SCSI_NAME 0x08
+#define SVPD_ID_TYPE_PROTO 0x09
+#define SVPD_ID_TYPE_UUID 0x0a
#define SVPD_ID_TYPE_MASK 0x0f
u_int8_t reserved;
u_int8_t length;
@@ -1426,6 +2432,90 @@ struct scsi_service_action_in
uint8_t control;
};
+struct scsi_vpd_extended_inquiry_data
+{
+ uint8_t device;
+ uint8_t page_code;
+#define SVPD_EXTENDED_INQUIRY_DATA 0x86
+ uint8_t page_length[2];
+ uint8_t flags1;
+
+ /* These values are for direct access devices */
+#define SVPD_EID_AM_MASK 0xC0
+#define SVPD_EID_AM_DEFER 0x80
+#define SVPD_EID_AM_IMMED 0x40
+#define SVPD_EID_AM_UNDEFINED 0x00
+#define SVPD_EID_AM_RESERVED 0xc0
+#define SVPD_EID_SPT 0x38
+#define SVPD_EID_SPT_1 0x00
+#define SVPD_EID_SPT_12 0x08
+#define SVPD_EID_SPT_2 0x10
+#define SVPD_EID_SPT_13 0x18
+#define SVPD_EID_SPT_3 0x20
+#define SVPD_EID_SPT_23 0x28
+#define SVPD_EID_SPT_123 0x38
+
+ /* These values are for sequential access devices */
+#define SVPD_EID_SA_SPT_LBP 0x08
+
+#define SVPD_EID_GRD_CHK 0x04
+#define SVPD_EID_APP_CHK 0x02
+#define SVPD_EID_REF_CHK 0x01
+
+ uint8_t flags2;
+#define SVPD_EID_UASK_SUP 0x20
+#define SVPD_EID_GROUP_SUP 0x10
+#define SVPD_EID_PRIOR_SUP 0x08
+#define SVPD_EID_HEADSUP 0x04
+#define SVPD_EID_ORDSUP 0x02
+#define SVPD_EID_SIMPSUP 0x01
+ uint8_t flags3;
+#define SVPD_EID_WU_SUP 0x08
+#define SVPD_EID_CRD_SUP 0x04
+#define SVPD_EID_NV_SUP 0x02
+#define SVPD_EID_V_SUP 0x01
+ uint8_t flags4;
+#define SVPD_EID_P_I_I_SUP 0x10
+#define SVPD_EID_LUICLT 0x01
+ uint8_t flags5;
+#define SVPD_EID_R_SUP 0x10
+#define SVPD_EID_CBCS 0x01
+ uint8_t flags6;
+#define SVPD_EID_MULTI_I_T_FW 0x0F
+#define SVPD_EID_MC_VENDOR_SPEC 0x00
+#define SVPD_EID_MC_MODE_1 0x01
+#define SVPD_EID_MC_MODE_2 0x02
+#define SVPD_EID_MC_MODE_3 0x03
+ uint8_t est[2];
+ uint8_t flags7;
+#define SVPD_EID_POA_SUP 0x80
+#define SVPD_EID_HRA_SUP 0x80
+#define SVPD_EID_VSA_SUP 0x80
+ uint8_t max_sense_length;
+ uint8_t reserved2[50];
+};
+
+struct scsi_vpd_mode_page_policy_descr
+{
+ uint8_t page_code;
+ uint8_t subpage_code;
+ uint8_t policy;
+#define SVPD_MPP_SHARED 0x00
+#define SVPD_MPP_PORT 0x01
+#define SVPD_MPP_I_T 0x03
+#define SVPD_MPP_MLUS 0x80
+ uint8_t reserved;
+};
+
+struct scsi_vpd_mode_page_policy
+{
+ uint8_t device;
+ uint8_t page_code;
+#define SVPD_MODE_PAGE_POLICY 0x87
+ uint8_t page_length[2];
+ struct scsi_vpd_mode_page_policy_descr descr[0];
+};
+
struct scsi_diag_page {
uint8_t page_code;
uint8_t page_specific_flags;
@@ -1433,12 +2523,179 @@ struct scsi_diag_page {
uint8_t params[0];
};
+struct scsi_vpd_port_designation
+{
+ uint8_t reserved[2];
+ uint8_t relative_port_id[2];
+ uint8_t reserved2[2];
+ uint8_t initiator_transportid_length[2];
+ uint8_t initiator_transportid[0];
+};
+
+struct scsi_vpd_port_designation_cont
+{
+ uint8_t reserved[2];
+ uint8_t target_port_descriptors_length[2];
+ struct scsi_vpd_id_descriptor target_port_descriptors[0];
+};
+
+struct scsi_vpd_scsi_ports
+{
+ u_int8_t device;
+ u_int8_t page_code;
+#define SVPD_SCSI_PORTS 0x88
+ u_int8_t page_length[2];
+ struct scsi_vpd_port_designation design[];
+};
+
/*
* ATA Information VPD Page based on
* T10/2126-D Revision 04
*/
#define SVPD_ATA_INFORMATION 0x89
+
+struct scsi_vpd_tpc_descriptor
+{
+ uint8_t desc_type[2];
+ uint8_t desc_length[2];
+ uint8_t parameters[];
+};
+
+struct scsi_vpd_tpc_descriptor_bdrl
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_BDRL 0x0000
+ uint8_t desc_length[2];
+ uint8_t vendor_specific[6];
+ uint8_t maximum_ranges[2];
+ uint8_t maximum_inactivity_timeout[4];
+ uint8_t default_inactivity_timeout[4];
+ uint8_t maximum_token_transfer_size[8];
+ uint8_t optimal_transfer_count[8];
+};
+
+struct scsi_vpd_tpc_descriptor_sc_descr
+{
+ uint8_t opcode;
+ uint8_t sa_length;
+ uint8_t supported_service_actions[0];
+};
+
+struct scsi_vpd_tpc_descriptor_sc
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_SC 0x0001
+ uint8_t desc_length[2];
+ uint8_t list_length;
+ struct scsi_vpd_tpc_descriptor_sc_descr descr[];
+};
+
+struct scsi_vpd_tpc_descriptor_pd
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_PD 0x0004
+ uint8_t desc_length[2];
+ uint8_t reserved[4];
+ uint8_t maximum_cscd_descriptor_count[2];
+ uint8_t maximum_segment_descriptor_count[2];
+ uint8_t maximum_descriptor_list_length[4];
+ uint8_t maximum_inline_data_length[4];
+ uint8_t reserved2[12];
+};
+
+struct scsi_vpd_tpc_descriptor_sd
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_SD 0x0008
+ uint8_t desc_length[2];
+ uint8_t list_length;
+ uint8_t supported_descriptor_codes[];
+};
+
+struct scsi_vpd_tpc_descriptor_sdid
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_SDID 0x000C
+ uint8_t desc_length[2];
+ uint8_t list_length[2];
+ uint8_t supported_descriptor_ids[];
+};
+
+struct scsi_vpd_tpc_descriptor_rtf_block
+{
+ uint8_t type_format;
+#define SVPD_TPC_RTF_BLOCK 0x00
+ uint8_t reserved;
+ uint8_t desc_length[2];
+ uint8_t reserved2[2];
+ uint8_t optimal_length_granularity[2];
+ uint8_t maximum_bytes[8];
+ uint8_t optimal_bytes[8];
+ uint8_t optimal_bytes_to_token_per_segment[8];
+ uint8_t optimal_bytes_from_token_per_segment[8];
+ uint8_t reserved3[8];
+};
+
+struct scsi_vpd_tpc_descriptor_rtf
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_RTF 0x0106
+ uint8_t desc_length[2];
+ uint8_t remote_tokens;
+ uint8_t reserved[11];
+ uint8_t minimum_token_lifetime[4];
+ uint8_t maximum_token_lifetime[4];
+ uint8_t maximum_token_inactivity_timeout[4];
+ uint8_t reserved2[18];
+ uint8_t type_specific_features_length[2];
+ uint8_t type_specific_features[0];
+};
+
+struct scsi_vpd_tpc_descriptor_srtd
+{
+ uint8_t rod_type[4];
+ uint8_t flags;
+#define SVPD_TPC_SRTD_TOUT 0x01
+#define SVPD_TPC_SRTD_TIN 0x02
+#define SVPD_TPC_SRTD_ECPY 0x80
+ uint8_t reserved;
+ uint8_t preference_indicator[2];
+ uint8_t reserved2[56];
+};
+
+struct scsi_vpd_tpc_descriptor_srt
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_SRT 0x0108
+ uint8_t desc_length[2];
+ uint8_t reserved[2];
+ uint8_t rod_type_descriptors_length[2];
+ uint8_t rod_type_descriptors[0];
+};
+
+struct scsi_vpd_tpc_descriptor_gco
+{
+ uint8_t desc_type[2];
+#define SVPD_TPC_GCO 0x8001
+ uint8_t desc_length[2];
+ uint8_t total_concurrent_copies[4];
+ uint8_t maximum_identified_concurrent_copies[4];
+ uint8_t maximum_segment_length[4];
+ uint8_t data_segment_granularity;
+ uint8_t inline_data_granularity;
+ uint8_t reserved[18];
+};
+
+struct scsi_vpd_tpc
+{
+ uint8_t device;
+ uint8_t page_code;
+#define SVPD_SCSI_TPC 0x8F
+ uint8_t page_length[2];
+ struct scsi_vpd_tpc_descriptor descr[];
+};
+
/*
* Block Device Characteristics VPD Page based on
* T10/1799-D Revision 31
@@ -1451,7 +2708,7 @@ struct scsi_vpd_block_characteristics
u_int8_t page_length[2];
u_int8_t medium_rotation_rate[2];
#define SVPD_BDC_RATE_NOT_REPORTED 0x00
-#define SVPD_BDC_RATE_NONE_ROTATING 0x01
+#define SVPD_BDC_RATE_NON_ROTATING 0x01
u_int8_t reserved1;
u_int8_t nominal_form_factor;
#define SVPD_BDC_FORM_NOT_REPORTED 0x00
@@ -1464,6 +2721,34 @@ struct scsi_vpd_block_characteristics
};
/*
+ * Block Device Characteristics VPD Page
+ */
+struct scsi_vpd_block_device_characteristics
+{
+ uint8_t device;
+ uint8_t page_code;
+#define SVPD_BDC 0xB1
+ uint8_t page_length[2];
+ uint8_t medium_rotation_rate[2];
+#define SVPD_NOT_REPORTED 0x0000
+#define SVPD_NON_ROTATING 0x0001
+ uint8_t product_type;
+ uint8_t wab_wac_ff;
+ uint8_t flags;
+#define SVPD_VBULS 0x01
+#define SVPD_FUAB 0x02
+#define SVPD_ZBC_NR 0x00 /* Not Reported */
+#define SVPD_HAW_ZBC 0x10 /* Host Aware */
+#define SVPD_DM_ZBC 0x20 /* Drive Managed */
+#define SVPD_ZBC_MASK 0x30 /* Zoned mask */
+ uint8_t reserved[55];
+};
+
+#define SBDC_IS_PRESENT(bdc, length, field) \
+ ((length >= offsetof(struct scsi_vpd_block_device_characteristics, \
+ field) + sizeof(bdc->field)) ? 1 : 0)
+
+/*
* Logical Block Provisioning VPD Page based on
* T10/1799-D Revision 31
*/
@@ -1493,8 +2778,7 @@ struct scsi_vpd_logical_block_prov
};
/*
- * Block Limits VDP Page based on
- * T10/1799-D Revision 31
+ * Block Limits VDP Page based on SBC-4 Revision 2
*/
struct scsi_vpd_block_limits
{
@@ -1515,7 +2799,33 @@ struct scsi_vpd_block_limits
u_int8_t opt_unmap_grain[4];
u_int8_t unmap_grain_align[4];
u_int8_t max_write_same_length[8];
- u_int8_t reserved2[20];
+ u_int8_t max_atomic_transfer_length[4];
+ u_int8_t atomic_alignment[4];
+ u_int8_t atomic_transfer_length_granularity[4];
+ u_int8_t max_atomic_transfer_length_with_atomic_boundary[4];
+ u_int8_t max_atomic_boundary_size[4];
+};
+
+/*
+ * Zoned Block Device Characacteristics VPD page.
+ * From ZBC-r04, dated August 12, 2015.
+ */
+struct scsi_vpd_zoned_bdc {
+ uint8_t device;
+ uint8_t page_code;
+#define SVPD_ZONED_BDC 0xB6
+ uint8_t page_length[2];
+#define SVPD_ZBDC_PL 0x3C
+ uint8_t flags;
+#define SVPD_ZBDC_URSWRZ 0x01
+ uint8_t reserved1[3];
+ uint8_t optimal_seq_zones[4];
+#define SVPD_ZBDC_OPT_SEQ_NR 0xffffffff
+ uint8_t optimal_nonseq_zones[4];
+#define SVPD_ZBDC_OPT_NONSEQ_NR 0xffffffff
+ uint8_t max_seq_req_zones[4];
+#define SVPD_ZBDC_MAX_SEQ_UNLIMITED 0xffffffff
+ uint8_t reserved2[44];
};
struct scsi_read_capacity
@@ -1574,6 +2884,33 @@ struct scsi_read_capacity_data_long
#define SRC16_LBPRZ_A 0x4000
#define SRC16_LBPME_A 0x8000
uint8_t lalba_lbp[2];
+ uint8_t reserved[16];
+};
+
+struct scsi_get_lba_status
+{
+ uint8_t opcode;
+#define SGLS_SERVICE_ACTION 0x12
+ uint8_t service_action;
+ uint8_t addr[8];
+ uint8_t alloc_len[4];
+ uint8_t reserved;
+ uint8_t control;
+};
+
+struct scsi_get_lba_status_data_descr
+{
+ uint8_t addr[8];
+ uint8_t length[4];
+ uint8_t status;
+ uint8_t reserved[3];
+};
+
+struct scsi_get_lba_status_data
+{
+ uint8_t length[4];
+ uint8_t reserved[4];
+ struct scsi_get_lba_status_data_descr descr[];
};
struct scsi_report_luns
@@ -1583,6 +2920,9 @@ struct scsi_report_luns
#define RPL_REPORT_DEFAULT 0x00
#define RPL_REPORT_WELLKNOWN 0x01
#define RPL_REPORT_ALL 0x02
+#define RPL_REPORT_ADMIN 0x10
+#define RPL_REPORT_NONSUBSID 0x11
+#define RPL_REPORT_CONGLOM 0x12
uint8_t select_report;
uint8_t reserved2[3];
uint8_t length[4];
@@ -1622,8 +2962,9 @@ struct scsi_target_group
{
uint8_t opcode;
uint8_t service_action;
+#define STG_PDF_MASK 0xe0
#define STG_PDF_LENGTH 0x00
-#define RPL_PDF_EXTENDED 0x20
+#define STG_PDF_EXTENDED 0x20
uint8_t reserved1[4];
uint8_t length[4];
uint8_t reserved2;
@@ -1673,12 +3014,47 @@ struct scsi_target_group_data {
struct scsi_target_group_data_extended {
uint8_t length[4]; /* length of returned data, in bytes */
- uint8_t format_type; /* STG_PDF_LENGTH or RPL_PDF_EXTENDED */
+ uint8_t format_type; /* STG_PDF_LENGTH or STG_PDF_EXTENDED */
uint8_t implicit_transition_time;
uint8_t reserved[2];
struct scsi_target_port_group_descriptor groups[];
};
+struct scsi_security_protocol_in
+{
+ uint8_t opcode;
+ uint8_t security_protocol;
+#define SPI_PROT_INFORMATION 0x00
+#define SPI_PROT_CBCS 0x07
+#define SPI_PROT_TAPE_DATA_ENC 0x20
+#define SPI_PROT_DATA_ENC_CONFIG 0x21
+#define SPI_PROT_SA_CREATE_CAP 0x40
+#define SPI_PROT_IKEV2_SCSI 0x41
+#define SPI_PROT_JEDEC_UFS 0xEC
+#define SPI_PROT_SDCARD_TFSSS 0xED
+#define SPI_PROT_AUTH_HOST_TRANSIENT 0xEE
+#define SPI_PROT_ATA_DEVICE_PASSWORD 0xEF
+ uint8_t security_protocol_specific[2];
+ uint8_t byte4;
+#define SPI_INC_512 0x80
+ uint8_t reserved1;
+ uint8_t length[4];
+ uint8_t reserved2;
+ uint8_t control;
+};
+
+struct scsi_security_protocol_out
+{
+ uint8_t opcode;
+ uint8_t security_protocol;
+ uint8_t security_protocol_specific[2];
+ uint8_t byte4;
+#define SPO_INC_512 0x80
+ uint8_t reserved1;
+ uint8_t length[4];
+ uint8_t reserved2;
+ uint8_t control;
+};
typedef enum {
SSD_TYPE_NONE,
@@ -1824,7 +3200,7 @@ struct scsi_sense_info
/*
* Command-specific information depends on the command for which the
- * reported condition occured.
+ * reported condition occurred.
*
* Note that any changes to the field names or positions in this structure,
* even reserved fields, should be accompanied by an examination of the
@@ -2023,6 +3399,29 @@ struct scsi_sense_osd_attr_id
};
/*
+ * ATA Return descriptor, used for the SCSI ATA PASS-THROUGH(12), (16) and
+ * (32) commands. Described in SAT-4r05.
+ */
+struct scsi_sense_ata_ret_desc
+{
+ uint8_t desc_type;
+#define SSD_DESC_ATA 0x09
+ uint8_t length;
+ uint8_t flags;
+#define SSD_DESC_ATA_FLAG_EXTEND 0x01
+ uint8_t error;
+ uint8_t count_15_8;
+ uint8_t count_7_0;
+ uint8_t lba_31_24;
+ uint8_t lba_7_0;
+ uint8_t lba_39_32;
+ uint8_t lba_15_8;
+ uint8_t lba_47_40;
+ uint8_t lba_23_16;
+ uint8_t device;
+ uint8_t status;
+};
+/*
* Used with Sense keys No Sense (0x00) and Not Ready (0x02).
*
* Maximum descriptors allowed: 32 (as of SPC-4)
@@ -2059,7 +3458,7 @@ struct scsi_sense_forwarded
/*
* Vendor-specific sense descriptor. The desc_type field will be in the
- * range bewteen MIN and MAX inclusive.
+ * range between MIN and MAX inclusive.
*/
struct scsi_sense_vendor
{
@@ -2187,6 +3586,22 @@ typedef enum {
SSS_FLAG_PRINT_COMMAND = 0x01
} scsi_sense_string_flags;
+struct scsi_nv {
+ const char *name;
+ uint64_t value;
+};
+
+typedef enum {
+ SCSI_NV_FOUND,
+ SCSI_NV_AMBIGUOUS,
+ SCSI_NV_NOT_FOUND
+} scsi_nv_status;
+
+typedef enum {
+ SCSI_NV_FLAG_NONE = 0x00,
+ SCSI_NV_FLAG_IG_CASE = 0x01 /* Case insensitive comparison */
+} scsi_nv_flags;
+
struct ccb_scsiio;
struct cam_periph;
union ccb;
@@ -2196,8 +3611,6 @@ struct cam_device;
extern const char *scsi_sense_key_text[];
-struct sbuf;
-
__BEGIN_DECLS
void scsi_sense_desc(int sense_key, int asc, int ascq,
struct scsi_inquiry_data *inq_data,
@@ -2310,6 +3723,7 @@ const char * scsi_op_desc(u_int16_t opcode,
struct scsi_inquiry_data *inq_data);
char * scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string,
size_t len);
+void scsi_cdb_sbuf(u_int8_t *cdb_ptr, struct sbuf *sb);
void scsi_print_inquiry(struct scsi_inquiry_data *inq_data);
void scsi_print_inquiry_short(struct scsi_inquiry_data *inq_data);
@@ -2324,9 +3738,128 @@ int scsi_devid_is_lun_eui64(uint8_t *bufp);
int scsi_devid_is_lun_naa(uint8_t *bufp);
int scsi_devid_is_lun_name(uint8_t *bufp);
int scsi_devid_is_lun_t10(uint8_t *bufp);
+int scsi_devid_is_port_naa(uint8_t *bufp);
struct scsi_vpd_id_descriptor *
scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t len,
scsi_devid_checkfn_t ck_fn);
+struct scsi_vpd_id_descriptor *
+ scsi_get_devid_desc(struct scsi_vpd_id_descriptor *desc, uint32_t len,
+ scsi_devid_checkfn_t ck_fn);
+
+int scsi_transportid_sbuf(struct sbuf *sb,
+ struct scsi_transportid_header *hdr,
+ uint32_t valid_len);
+
+const char * scsi_nv_to_str(struct scsi_nv *table, int num_table_entries,
+ uint64_t value);
+
+scsi_nv_status scsi_get_nv(struct scsi_nv *table, int num_table_entries,
+ char *name, int *table_entry, scsi_nv_flags flags);
+
+int scsi_parse_transportid_64bit(int proto_id, char *id_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len);
+
+int scsi_parse_transportid_spi(char *id_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len);
+
+int scsi_parse_transportid_rdma(char *id_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len);
+
+int scsi_parse_transportid_iscsi(char *id_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str,int error_str_len);
+
+int scsi_parse_transportid_sop(char *id_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str,int error_str_len);
+
+int scsi_parse_transportid(char *transportid_str,
+ struct scsi_transportid_header **hdr,
+ unsigned int *alloc_len,
+#ifdef _KERNEL
+ struct malloc_type *type, int flags,
+#endif
+ char *error_str, int error_str_len);
+
+
+int scsi_attrib_volcoh_sbuf(struct sbuf *sb,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len);
+
+int scsi_attrib_vendser_sbuf(struct sbuf *sb,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len);
+
+int scsi_attrib_hexdump_sbuf(struct sbuf *sb,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len);
+
+int scsi_attrib_int_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len);
+
+int scsi_attrib_ascii_sbuf(struct sbuf *sb,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len);
+
+int scsi_attrib_text_sbuf(struct sbuf *sb,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, uint32_t flags,
+ uint32_t output_flags, char *error_str,
+ int error_str_len);
+
+struct scsi_attrib_table_entry *scsi_find_attrib_entry(
+ struct scsi_attrib_table_entry *table,
+ size_t num_table_entries, uint32_t id);
+
+struct scsi_attrib_table_entry *scsi_get_attrib_entry(uint32_t id);
+
+int scsi_attrib_value_sbuf(struct sbuf *sb, uint32_t valid_len,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t output_flags, char *error_str,
+ size_t error_str_len);
+
+void scsi_attrib_prefix_sbuf(struct sbuf *sb, uint32_t output_flags,
+ struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len, const char *desc);
+
+int scsi_attrib_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr,
+ uint32_t valid_len,
+ struct scsi_attrib_table_entry *user_table,
+ size_t num_user_entries, int prefer_user_table,
+ uint32_t output_flags, char *error_str, int error_str_len);
void scsi_test_unit_ready(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *,
@@ -2410,9 +3943,8 @@ void scsi_read_capacity_16(struct ccb_scsiio *csio, uint32_t retries,
void (*cbfcnp)(struct cam_periph *,
union ccb *), uint8_t tag_action,
uint64_t lba, int reladr, int pmi,
- struct scsi_read_capacity_data_long
- *rcap_buf, uint8_t sense_len,
- uint32_t timeout);
+ uint8_t *rcap_buf, int rcap_buf_len,
+ uint8_t sense_len, uint32_t timeout);
void scsi_report_luns(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *,
@@ -2505,6 +4037,23 @@ void scsi_ata_trim(struct ccb_scsiio *csio, u_int32_t retries,
u_int8_t *data_ptr, u_int16_t dxfer_len,
u_int8_t sense_len, u_int32_t timeout);
+int scsi_ata_read_log(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint32_t log_address,
+ uint32_t page_number, uint16_t block_count,
+ uint8_t protocol, uint8_t *data_ptr, uint32_t dxfer_len,
+ uint8_t sense_len, uint32_t timeout);
+
+int scsi_ata_pass(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint32_t flags, uint8_t tag_action,
+ uint8_t protocol, uint8_t ata_flags, uint16_t features,
+ uint16_t sector_count, uint64_t lba, uint8_t command,
+ uint8_t device, uint8_t icc, uint32_t auxiliary,
+ uint8_t control, u_int8_t *data_ptr, uint32_t dxfer_len,
+ uint8_t *cdb_storage, size_t cdb_storage_len,
+ int minimum_cmd_size, u_int8_t sense_len, u_int32_t timeout);
+
void scsi_ata_pass_16(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int32_t flags, u_int8_t tag_action,
@@ -2523,6 +4072,54 @@ void scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int8_t tag_action, int start, int load_eject,
int immediate, u_int8_t sense_len, u_int32_t timeout);
+void scsi_read_attribute(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, u_int8_t service_action,
+ uint32_t element, u_int8_t elem_type,
+ int logical_volume, int partition,
+ u_int32_t first_attribute, int cache, u_int8_t *data_ptr,
+ u_int32_t length, int sense_len, u_int32_t timeout);
+void scsi_write_attribute(struct ccb_scsiio *csio, u_int32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ u_int8_t tag_action, uint32_t element,
+ int logical_volume, int partition, int wtc, u_int8_t *data_ptr,
+ u_int32_t length, int sense_len, u_int32_t timeout);
+
+void scsi_security_protocol_in(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint32_t security_protocol,
+ uint32_t security_protocol_specific, int byte4,
+ uint8_t *data_ptr, uint32_t dxfer_len,
+ int sense_len, int timeout);
+
+void scsi_security_protocol_out(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *,union ccb *),
+ uint8_t tag_action, uint32_t security_protocol,
+ uint32_t security_protocol_specific, int byte4,
+ uint8_t *data_ptr, uint32_t dxfer_len,
+ int sense_len, int timeout);
+
+void scsi_persistent_reserve_in(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *,union ccb *),
+ uint8_t tag_action, int service_action,
+ uint8_t *data_ptr, uint32_t dxfer_len,
+ int sense_len, int timeout);
+
+void scsi_persistent_reserve_out(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *,
+ union ccb *),
+ uint8_t tag_action, int service_action,
+ int scope, int res_type, uint8_t *data_ptr,
+ uint32_t dxfer_len, int sense_len,
+ int timeout);
+
+void scsi_report_supported_opcodes(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *,
+ union ccb *),
+ uint8_t tag_action, int options,
+ int req_opcode, int req_service_action,
+ uint8_t *data_ptr, uint32_t dxfer_len,
+ int sense_len, int timeout);
int scsi_inquiry_match(caddr_t inqbuffer, caddr_t table_entry);
int scsi_static_inquiry_match(caddr_t inqbuffer,
diff --git a/freebsd/sys/cam/scsi/scsi_da.h b/freebsd/sys/cam/scsi/scsi_da.h
index 4fbd7256..e6eb95f1 100644
--- a/freebsd/sys/cam/scsi/scsi_da.h
+++ b/freebsd/sys/cam/scsi/scsi_da.h
@@ -92,28 +92,23 @@ struct scsi_reassign_blocks
struct scsi_read_defect_data_10
{
- u_int8_t opcode;
-
- /*
- * The most significant 3 bits are the LUN, the other 5 are
- * reserved.
- */
-#define SRDD10_LUN_MASK 0xE0
- u_int8_t byte2;
+ uint8_t opcode;
+ uint8_t byte2;
#define SRDD10_GLIST 0x08
#define SRDD10_PLIST 0x10
#define SRDD10_DLIST_FORMAT_MASK 0x07
#define SRDD10_BLOCK_FORMAT 0x00
+#define SRDD10_EXT_BFI_FORMAT 0x01
+#define SRDD10_EXT_PHYS_FORMAT 0x02
+#define SRDD10_LONG_BLOCK_FORMAT 0x03
#define SRDD10_BYTES_FROM_INDEX_FORMAT 0x04
#define SRDD10_PHYSICAL_SECTOR_FORMAT 0x05
- u_int8_t format;
-
- u_int8_t reserved[4];
-
- u_int8_t alloc_length[2];
+#define SRDD10_VENDOR_FORMAT 0x06
+ uint8_t format;
+ uint8_t reserved[4];
+ uint8_t alloc_length[2];
#define SRDD10_MAX_LENGTH 0xffff
-
- u_int8_t control;
+ uint8_t control;
};
struct scsi_sanitize
@@ -143,31 +138,99 @@ struct scsi_sanitize_parameter_list
struct scsi_read_defect_data_12
{
- u_int8_t opcode;
-
- /*
- * The most significant 3 bits are the LUN, the other 5 are
- * reserved.
- */
-#define SRDD12_LUN_MASK 0xE0
- u_int8_t byte2;
-
+ uint8_t opcode;
#define SRDD12_GLIST 0x08
#define SRDD12_PLIST 0x10
#define SRDD12_DLIST_FORMAT_MASK 0x07
-#define SRDD12_BLOCK_FORMAT 0x00
-#define SRDD12_BYTES_FROM_INDEX_FORMAT 0x04
-#define SRDD12_PHYSICAL_SECTOR_FORMAT 0x05
- u_int8_t format;
+#define SRDD12_BLOCK_FORMAT SRDD10_BLOCK_FORMAT
+#define SRDD12_BYTES_FROM_INDEX_FORMAT SRDD10_BYTES_FROM_INDEX_FORMAT
+#define SRDD12_PHYSICAL_SECTOR_FORMAT SRDD10_PHYSICAL_SECTOR_FORMAT
+ uint8_t format;
+ uint8_t address_descriptor_index[4];
+ uint8_t alloc_length[4];
+#define SRDD12_MAX_LENGTH 0xffffffff
+ uint8_t reserved;
+ uint8_t control;
+};
- u_int8_t reserved[4];
+struct scsi_zbc_out
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define ZBC_OUT_SA_CLOSE 0x01
+#define ZBC_OUT_SA_FINISH 0x02
+#define ZBC_OUT_SA_OPEN 0x03
+#define ZBC_OUT_SA_RWP 0x04
+ uint8_t zone_id[8];
+ uint8_t reserved[4];
+ uint8_t zone_flags;
+#define ZBC_OUT_ALL 0x01
+ uint8_t control;
+};
- u_int8_t alloc_length[4];
+struct scsi_zbc_in
+{
+ uint8_t opcode;
+ uint8_t service_action;
+#define ZBC_IN_SA_REPORT_ZONES 0x00
+ uint8_t zone_start_lba[8];
+ uint8_t length[4];
+ uint8_t zone_options;
+#define ZBC_IN_PARTIAL 0x80
+#define ZBC_IN_REP_ALL_ZONES 0x00
+#define ZBC_IN_REP_EMPTY 0x01
+#define ZBC_IN_REP_IMP_OPEN 0x02
+#define ZBC_IN_REP_EXP_OPEN 0x03
+#define ZBC_IN_REP_CLOSED 0x04
+#define ZBC_IN_REP_FULL 0x05
+#define ZBC_IN_REP_READONLY 0x06
+#define ZBC_IN_REP_OFFLINE 0x07
+#define ZBC_IN_REP_RESET 0x10
+#define ZBC_IN_REP_NON_SEQ 0x11
+#define ZBC_IN_REP_NON_WP 0x3f
+#define ZBC_IN_REP_MASK 0x3f
+ uint8_t control;
+};
- u_int8_t control;
-
+struct scsi_report_zones_desc {
+ uint8_t zone_type;
+#define SRZ_TYPE_CONVENTIONAL 0x01
+#define SRZ_TYPE_SEQ_REQUIRED 0x02
+#define SRZ_TYPE_SEQ_PREFERRED 0x03
+#define SRZ_TYPE_MASK 0x0f
+ uint8_t zone_flags;
+#define SRZ_ZONE_COND_SHIFT 4
+#define SRZ_ZONE_COND_MASK 0xf0
+#define SRZ_ZONE_COND_NWP 0x00
+#define SRZ_ZONE_COND_EMPTY 0x10
+#define SRZ_ZONE_COND_IMP_OPEN 0x20
+#define SRZ_ZONE_COND_EXP_OPEN 0x30
+#define SRZ_ZONE_COND_CLOSED 0x40
+#define SRZ_ZONE_COND_READONLY 0xd0
+#define SRZ_ZONE_COND_FULL 0xe0
+#define SRZ_ZONE_COND_OFFLINE 0xf0
+#define SRZ_ZONE_NON_SEQ 0x02
+#define SRZ_ZONE_RESET 0x01
+ uint8_t reserved[6];
+ uint8_t zone_length[8];
+ uint8_t zone_start_lba[8];
+ uint8_t write_pointer_lba[8];
+ uint8_t reserved2[32];
};
+struct scsi_report_zones_hdr {
+ uint8_t length[4];
+ uint8_t byte4;
+#define SRZ_SAME_ALL_DIFFERENT 0x00 /* Lengths and types vary */
+#define SRZ_SAME_ALL_SAME 0x01 /* Lengths and types the same */
+#define SRZ_SAME_LAST_DIFFERENT 0x02 /* Types same, last length varies */
+#define SRZ_SAME_TYPES_DIFFERENT 0x03 /* Types vary, length the same */
+#define SRZ_SAME_MASK 0x0f
+ uint8_t reserved[3];
+ uint8_t maximum_lba[8];
+ uint8_t reserved2[48];
+ struct scsi_report_zones_desc desc_list[];
+};
/*
* Opcodes
@@ -182,6 +245,8 @@ struct scsi_read_defect_data_12
#define VERIFY 0x2f
#define READ_DEFECT_DATA_10 0x37
#define SANITIZE 0x48
+#define ZBC_OUT 0x94
+#define ZBC_IN 0x95
#define READ_DEFECT_DATA_12 0xb7
struct format_defect_list_header
@@ -222,18 +287,49 @@ struct scsi_read_format_capacities
uint8_t reserved1[3];
};
-struct scsi_verify
+struct scsi_verify_10
{
- uint8_t opcode; /* VERIFY */
+ uint8_t opcode; /* VERIFY(10) */
uint8_t byte2;
#define SVFY_LUN_MASK 0xE0
#define SVFY_RELADR 0x01
-#define SVFY_BYTECHK 0x02
+#define SVFY_BYTCHK 0x02
#define SVFY_DPO 0x10
uint8_t addr[4]; /* LBA to begin verification at */
- uint8_t reserved0[1];
- uint8_t len[2]; /* number of blocks to verify */
- uint8_t reserved1[3];
+ uint8_t group;
+ uint8_t length[2]; /* number of blocks to verify */
+ uint8_t control;
+};
+
+struct scsi_verify_12
+{
+ uint8_t opcode; /* VERIFY(12) */
+ uint8_t byte2;
+ uint8_t addr[4]; /* LBA to begin verification at */
+ uint8_t length[4]; /* number of blocks to verify */
+ uint8_t group;
+ uint8_t control;
+};
+
+struct scsi_verify_16
+{
+ uint8_t opcode; /* VERIFY(16) */
+ uint8_t byte2;
+ uint8_t addr[8]; /* LBA to begin verification at */
+ uint8_t length[4]; /* number of blocks to verify */
+ uint8_t group;
+ uint8_t control;
+};
+
+struct scsi_compare_and_write
+{
+ uint8_t opcode; /* COMPARE AND WRITE */
+ uint8_t byte2;
+ uint8_t addr[8]; /* LBA to begin verification at */
+ uint8_t reserved[3];
+ uint8_t length; /* number of blocks */
+ uint8_t group;
+ uint8_t control;
};
struct scsi_write_and_verify
@@ -314,6 +410,8 @@ struct scsi_read_defect_data_hdr_10
#define SRDDH10_PHYSICAL_SECTOR_FORMAT 0x05
u_int8_t format;
u_int8_t length[2];
+#define SRDDH10_MAX_LENGTH SRDD10_MAX_LENGTH - \
+ sizeof(struct scsi_read_defect_data_hdr_10)
};
struct scsi_defect_desc_block
@@ -321,10 +419,18 @@ struct scsi_defect_desc_block
u_int8_t address[4];
};
+struct scsi_defect_desc_long_block
+{
+ u_int8_t address[8];
+};
+
struct scsi_defect_desc_bytes_from_index
{
u_int8_t cylinder[3];
u_int8_t head;
+#define SDD_EXT_BFI_MADS 0x80000000
+#define SDD_EXT_BFI_FLAG_MASK 0xf0000000
+#define SDD_EXT_BFI_ENTIRE_TRACK 0x0fffffff
u_int8_t bytes_from_index[4];
};
@@ -332,6 +438,9 @@ struct scsi_defect_desc_phys_sector
{
u_int8_t cylinder[3];
u_int8_t head;
+#define SDD_EXT_PHYS_MADS 0x80000000
+#define SDD_EXT_PHYS_FLAG_MASK 0xf0000000
+#define SDD_EXT_PHYS_ENTIRE_TRACK 0x0fffffff
u_int8_t sector[4];
};
@@ -345,7 +454,10 @@ struct scsi_read_defect_data_hdr_12
#define SRDDH12_BYTES_FROM_INDEX_FORMAT 0x04
#define SRDDH12_PHYSICAL_SECTOR_FORMAT 0x05
u_int8_t format;
+ u_int8_t generation[2];
u_int8_t length[4];
+#define SRDDH12_MAX_LENGTH SRDD12_MAX_LENGTH - \
+ sizeof(struct scsi_read_defect_data_hdr_12)
};
union disk_pages /* this is the structure copied from osf */
@@ -515,7 +627,8 @@ struct scsi_da_rw_recovery_page {
u_int8_t correction_span;
u_int8_t head_offset_count;
u_int8_t data_strobe_offset_cnt;
- u_int8_t reserved;
+ u_int8_t byte8;
+#define SMS_RWER_LBPERE 0x80
u_int8_t write_retry_count;
u_int8_t reserved2;
u_int8_t recovery_time_limit[2];
@@ -523,9 +636,9 @@ struct scsi_da_rw_recovery_page {
__BEGIN_DECLS
/*
- * XXX This is only left out of the kernel build to silence warnings. If,
- * for some reason this function is used in the kernel, the ifdefs should
- * be moved so it is included both in the kernel and userland.
+ * XXX These are only left out of the kernel build to silence warnings. If,
+ * for some reason these functions are used in the kernel, the ifdefs should
+ * be moved so they are included both in the kernel and userland.
*/
#ifndef _KERNEL
void scsi_format_unit(struct ccb_scsiio *csio, u_int32_t retries,
@@ -534,6 +647,13 @@ void scsi_format_unit(struct ccb_scsiio *csio, u_int32_t retries,
u_int8_t *data_ptr, u_int32_t dxfer_len,
u_int8_t sense_len, u_int32_t timeout);
+void scsi_read_defects(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint8_t list_format,
+ uint32_t addr_desc_index, uint8_t *data_ptr,
+ uint32_t dxfer_len, int minimum_cmd_size,
+ uint8_t sense_len, uint32_t timeout);
+
void scsi_sanitize(struct ccb_scsiio *csio, u_int32_t retries,
void (*cbfcnp)(struct cam_periph *, union ccb *),
u_int8_t tag_action, u_int8_t byte2, u_int16_t control,
@@ -541,6 +661,38 @@ void scsi_sanitize(struct ccb_scsiio *csio, u_int32_t retries,
u_int32_t timeout);
#endif /* !_KERNEL */
+
+void scsi_zbc_out(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint8_t service_action, uint64_t zone_id,
+ uint8_t zone_flags, uint8_t *data_ptr, uint32_t dxfer_len,
+ uint8_t sense_len, uint32_t timeout);
+
+void scsi_zbc_in(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, uint8_t service_action,
+ uint64_t zone_start_lba, uint8_t zone_options,
+ uint8_t *data_ptr, uint32_t dxfer_len, uint8_t sense_len,
+ uint32_t timeout);
+
+int scsi_ata_zac_mgmt_out(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int use_ncq,
+ uint8_t zm_action, uint64_t zone_id,
+ uint8_t zone_flags, uint8_t *data_ptr,
+ uint32_t dxfer_len, uint8_t *cdb_storage,
+ size_t cdb_storage_len, uint8_t sense_len,
+ uint32_t timeout);
+
+int scsi_ata_zac_mgmt_in(struct ccb_scsiio *csio, uint32_t retries,
+ void (*cbfcnp)(struct cam_periph *, union ccb *),
+ uint8_t tag_action, int use_ncq,
+ uint8_t zm_action, uint64_t zone_id,
+ uint8_t zone_flags, uint8_t *data_ptr,
+ uint32_t dxfer_len, uint8_t *cdb_storage,
+ size_t cdb_storage_len, uint8_t sense_len,
+ uint32_t timeout);
+
__END_DECLS
#endif /* _SCSI_SCSI_DA_H */
diff --git a/freebsd/sys/contrib/altq/altq/altqconf.h b/freebsd/sys/contrib/altq/altq/altqconf.h
deleted file mode 100644
index 1b4271eb..00000000
--- a/freebsd/sys/contrib/altq/altq/altqconf.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* $OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
-/* $NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $ */
-
-#if defined(_KERNEL_OPT) || defined(__OpenBSD__)
-
-#if defined(_KERNEL_OPT)
-#include <rtems/bsd/local/opt_altq_enabled.h>
-#endif
-
-#include <sys/conf.h>
-
-#ifdef ALTQ
-#define NALTQ 1
-#else
-#define NALTQ 0
-#endif
-
-cdev_decl(altq);
-
-#ifdef __OpenBSD__
-#define cdev_altq_init(c,n) { \
- dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
- (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
- (dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \
- (dev_type_mmap((*))) enodev }
-#else
-#define cdev_altq_init(x,y) cdev__oci_init(x,y)
-#endif
-#endif /* defined(_KERNEL_OPT) || defined(__OpenBSD__) */
diff --git a/freebsd/sys/crypto/rijndael/rijndael-api-fst.c b/freebsd/sys/crypto/rijndael/rijndael-api-fst.c
index 72e07840..2f7ba27b 100644
--- a/freebsd/sys/crypto/rijndael/rijndael-api-fst.c
+++ b/freebsd/sys/crypto/rijndael/rijndael-api-fst.c
@@ -36,7 +36,8 @@ __FBSDID("$FreeBSD$");
typedef u_int8_t BYTE;
-int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial) {
+int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen,
+ const char *keyMaterial) {
u_int8_t cipherKey[RIJNDAEL_MAXKB];
if (key == NULL) {
@@ -85,7 +86,7 @@ int rijndael_cipherInit(cipherInstance *cipher, BYTE mode, char *IV) {
}
int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputLen, BYTE *outBuffer) {
+ const BYTE *input, int inputLen, BYTE *outBuffer) {
int i, k, numBlocks;
u_int8_t block[16], iv[4][4];
@@ -200,7 +201,7 @@ int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key,
* @return length in octets (not bits) of the encrypted output buffer.
*/
int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputOctets, BYTE *outBuffer) {
+ const BYTE *input, int inputOctets, BYTE *outBuffer) {
int i, numBlocks, padLen;
u_int8_t block[16], *iv, *cp;
@@ -234,10 +235,10 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key,
case MODE_CBC:
iv = cipher->IV;
for (i = numBlocks; i > 0; i--) {
- ((u_int32_t*)block)[0] = ((u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0];
- ((u_int32_t*)block)[1] = ((u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1];
- ((u_int32_t*)block)[2] = ((u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2];
- ((u_int32_t*)block)[3] = ((u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3];
+ ((u_int32_t*)block)[0] = ((const u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0];
+ ((u_int32_t*)block)[1] = ((const u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1];
+ ((u_int32_t*)block)[2] = ((const u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2];
+ ((u_int32_t*)block)[3] = ((const u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3];
rijndaelEncrypt(key->rk, key->Nr, block, outBuffer);
iv = outBuffer;
input += 16;
@@ -263,7 +264,7 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key,
}
int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputLen, BYTE *outBuffer) {
+ const BYTE *input, int inputLen, BYTE *outBuffer) {
int i, k, numBlocks;
u_int8_t block[16], iv[4][4];
@@ -362,7 +363,7 @@ int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key,
}
int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key,
- BYTE *input, int inputOctets, BYTE *outBuffer) {
+ const BYTE *input, int inputOctets, BYTE *outBuffer) {
int i, numBlocks, padLen;
u_int8_t block[16];
u_int32_t iv[4];
diff --git a/freebsd/sys/crypto/rijndael/rijndael-api-fst.h b/freebsd/sys/crypto/rijndael/rijndael-api-fst.h
index 122bf52d..e5f596ac 100644
--- a/freebsd/sys/crypto/rijndael/rijndael-api-fst.h
+++ b/freebsd/sys/crypto/rijndael/rijndael-api-fst.h
@@ -56,18 +56,18 @@ typedef struct { /* changed order of the components */
/* Function prototypes */
-int rijndael_makeKey(keyInstance *, u_int8_t, int, char *);
+int rijndael_makeKey(keyInstance *, u_int8_t, int, const char *);
int rijndael_cipherInit(cipherInstance *, u_int8_t, char *);
-int rijndael_blockEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
-int rijndael_padEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
+int rijndael_blockEncrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
+int rijndael_padEncrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
-int rijndael_blockDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
-int rijndael_padDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int,
- u_int8_t *);
+int rijndael_blockDecrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
+int rijndael_padDecrypt(cipherInstance *, keyInstance *, const u_int8_t *,
+ int, u_int8_t *);
#endif /* __RIJNDAEL_API_FST_H */
diff --git a/freebsd/sys/crypto/sha1.c b/freebsd/sys/crypto/sha1.c
index 2c3a4c15..c21677ec 100644
--- a/freebsd/sys/crypto/sha1.c
+++ b/freebsd/sys/crypto/sha1.c
@@ -251,16 +251,14 @@ sha1_loop(ctxt, input, len)
}
void
-sha1_result(ctxt, digest0)
- struct sha1_ctxt *ctxt;
- caddr_t digest0;
+sha1_result(struct sha1_ctxt *ctxt, char digest0[static SHA1_RESULTLEN])
{
u_int8_t *digest;
digest = (u_int8_t *)digest0;
sha1_pad(ctxt);
#if BYTE_ORDER == BIG_ENDIAN
- bcopy(&ctxt->h.b8[0], digest, 20);
+ bcopy(&ctxt->h.b8[0], digest, SHA1_RESULTLEN);
#else
digest[0] = ctxt->h.b8[3]; digest[1] = ctxt->h.b8[2];
digest[2] = ctxt->h.b8[1]; digest[3] = ctxt->h.b8[0];
diff --git a/freebsd/sys/crypto/sha1.h b/freebsd/sys/crypto/sha1.h
index 3686d7dd..d61709e2 100644
--- a/freebsd/sys/crypto/sha1.h
+++ b/freebsd/sys/crypto/sha1.h
@@ -35,8 +35,8 @@
* implemented by Jun-ichiro itojun Itoh <itojun@itojun.org>
*/
-#ifndef _NETINET6_SHA1_H_
-#define _NETINET6_SHA1_H_
+#ifndef _CRYPTO_SHA1_H_
+#define _CRYPTO_SHA1_H_
struct sha1_ctxt {
union {
@@ -53,20 +53,20 @@ struct sha1_ctxt {
} m;
u_int8_t count;
};
+typedef struct sha1_ctxt SHA1_CTX;
+
+#define SHA1_RESULTLEN (160/8)
#ifdef _KERNEL
extern void sha1_init(struct sha1_ctxt *);
extern void sha1_pad(struct sha1_ctxt *);
extern void sha1_loop(struct sha1_ctxt *, const u_int8_t *, size_t);
-extern void sha1_result(struct sha1_ctxt *, caddr_t);
+extern void sha1_result(struct sha1_ctxt *, char[static SHA1_RESULTLEN]);
/* compatibilty with other SHA1 source codes */
-typedef struct sha1_ctxt SHA1_CTX;
#define SHA1Init(x) sha1_init((x))
#define SHA1Update(x, y, z) sha1_loop((x), (y), (z))
#define SHA1Final(x, y) sha1_result((y), (x))
#endif /* _KERNEL */
-#define SHA1_RESULTLEN (160/8)
-
-#endif /*_NETINET6_SHA1_H_*/
+#endif /*_CRYPTO_SHA1_H_*/
diff --git a/freebsd/sys/crypto/sha2/sha256.h b/freebsd/sys/crypto/sha2/sha256.h
new file mode 100644
index 00000000..17aae7de
--- /dev/null
+++ b/freebsd/sys/crypto/sha2/sha256.h
@@ -0,0 +1,90 @@
+/*-
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA256_H_
+#define _SHA256_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define SHA256_BLOCK_LENGTH 64
+#define SHA256_DIGEST_LENGTH 32
+#define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA256Context {
+ uint32_t state[8];
+ uint64_t count;
+ uint8_t buf[SHA256_BLOCK_LENGTH];
+} SHA256_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+
+#ifndef SHA256_Init
+#define SHA256_Init _libmd_SHA256_Init
+#endif
+#ifndef SHA256_Update
+#define SHA256_Update _libmd_SHA256_Update
+#endif
+#ifndef SHA256_Final
+#define SHA256_Final _libmd_SHA256_Final
+#endif
+#ifndef SHA256_End
+#define SHA256_End _libmd_SHA256_End
+#endif
+#ifndef SHA256_File
+#define SHA256_File _libmd_SHA256_File
+#endif
+#ifndef SHA256_FileChunk
+#define SHA256_FileChunk _libmd_SHA256_FileChunk
+#endif
+#ifndef SHA256_Data
+#define SHA256_Data _libmd_SHA256_Data
+#endif
+
+#ifndef SHA256_Transform
+#define SHA256_Transform _libmd_SHA256_Transform
+#endif
+#ifndef SHA256_version
+#define SHA256_version _libmd_SHA256_version
+#endif
+
+void SHA256_Init(SHA256_CTX *);
+void SHA256_Update(SHA256_CTX *, const void *, size_t);
+void SHA256_Final(unsigned char [static SHA256_DIGEST_LENGTH], SHA256_CTX *);
+#ifndef _KERNEL
+char *SHA256_End(SHA256_CTX *, char *);
+char *SHA256_Data(const void *, unsigned int, char *);
+char *SHA256_File(const char *, char *);
+char *SHA256_FileChunk(const char *, char *, off_t, off_t);
+#endif
+__END_DECLS
+
+#endif /* !_SHA256_H_ */
diff --git a/freebsd/sys/crypto/sha2/sha256c.c b/freebsd/sys/crypto/sha2/sha256c.c
new file mode 100644
index 00000000..ea389694
--- /dev/null
+++ b/freebsd/sys/crypto/sha2/sha256c.c
@@ -0,0 +1,318 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include "sha256.h"
+
+#if BYTE_ORDER == BIG_ENDIAN
+
+/* Copy a vector of big-endian uint32_t into a vector of bytes */
+#define be32enc_vect(dst, src, len) \
+ memcpy((void *)dst, (const void *)src, (size_t)len)
+
+/* Copy a vector of bytes into a vector of big-endian uint32_t */
+#define be32dec_vect(dst, src, len) \
+ memcpy((void *)dst, (const void *)src, (size_t)len)
+
+#else /* BYTE_ORDER != BIG_ENDIAN */
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form. Assumes len is a multiple of 4.
+ */
+static void
+be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len / 4; i++)
+ be32enc(dst + i * 4, src[i]);
+}
+
+/*
+ * Decode a big-endian length len vector of (unsigned char) into a length
+ * len/4 vector of (uint32_t). Assumes len is a multiple of 4.
+ */
+static void
+be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len / 4; i++)
+ dst[i] = be32dec(src + i * 4);
+}
+
+#endif /* BYTE_ORDER != BIG_ENDIAN */
+
+/* SHA256 round constants. */
+static const uint32_t K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z) ((x & (y | z)) | (y & z))
+#define SHR(x, n) (x >> n)
+#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
+#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
+#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+ h += S1(e) + Ch(e, f, g) + k; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c);
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i, ii) \
+ RND(S[(64 - i) % 8], S[(65 - i) % 8], \
+ S[(66 - i) % 8], S[(67 - i) % 8], \
+ S[(68 - i) % 8], S[(69 - i) % 8], \
+ S[(70 - i) % 8], S[(71 - i) % 8], \
+ W[i + ii] + K[i + ii])
+
+/* Message schedule computation */
+#define MSCH(W, ii, i) \
+ W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
+
+/*
+ * SHA256 block compression function. The 256-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void
+SHA256_Transform(uint32_t * state, const unsigned char block[64])
+{
+ uint32_t W[64];
+ uint32_t S[8];
+ int i;
+
+ /* 1. Prepare the first part of the message schedule W. */
+ be32dec_vect(W, block, 64);
+
+ /* 2. Initialize working variables. */
+ memcpy(S, state, 32);
+
+ /* 3. Mix. */
+ for (i = 0; i < 64; i += 16) {
+ RNDr(S, W, 0, i);
+ RNDr(S, W, 1, i);
+ RNDr(S, W, 2, i);
+ RNDr(S, W, 3, i);
+ RNDr(S, W, 4, i);
+ RNDr(S, W, 5, i);
+ RNDr(S, W, 6, i);
+ RNDr(S, W, 7, i);
+ RNDr(S, W, 8, i);
+ RNDr(S, W, 9, i);
+ RNDr(S, W, 10, i);
+ RNDr(S, W, 11, i);
+ RNDr(S, W, 12, i);
+ RNDr(S, W, 13, i);
+ RNDr(S, W, 14, i);
+ RNDr(S, W, 15, i);
+
+ if (i == 48)
+ break;
+ MSCH(W, 0, i);
+ MSCH(W, 1, i);
+ MSCH(W, 2, i);
+ MSCH(W, 3, i);
+ MSCH(W, 4, i);
+ MSCH(W, 5, i);
+ MSCH(W, 6, i);
+ MSCH(W, 7, i);
+ MSCH(W, 8, i);
+ MSCH(W, 9, i);
+ MSCH(W, 10, i);
+ MSCH(W, 11, i);
+ MSCH(W, 12, i);
+ MSCH(W, 13, i);
+ MSCH(W, 14, i);
+ MSCH(W, 15, i);
+ }
+
+ /* 4. Mix local working variables into global state */
+ for (i = 0; i < 8; i++)
+ state[i] += S[i];
+}
+
+static unsigned char PAD[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Add padding and terminating bit-count. */
+static void
+SHA256_Pad(SHA256_CTX * ctx)
+{
+ size_t r;
+
+ /* Figure out how many bytes we have buffered. */
+ r = (ctx->count >> 3) & 0x3f;
+
+ /* Pad to 56 mod 64, transforming if we finish a block en route. */
+ if (r < 56) {
+ /* Pad to 56 mod 64. */
+ memcpy(&ctx->buf[r], PAD, 56 - r);
+ } else {
+ /* Finish the current block and mix. */
+ memcpy(&ctx->buf[r], PAD, 64 - r);
+ SHA256_Transform(ctx->state, ctx->buf);
+
+ /* The start of the final block is all zeroes. */
+ memset(&ctx->buf[0], 0, 56);
+ }
+
+ /* Add the terminating bit-count. */
+ be64enc(&ctx->buf[56], ctx->count);
+
+ /* Mix in the final block. */
+ SHA256_Transform(ctx->state, ctx->buf);
+}
+
+/* SHA-256 initialization. Begins a SHA-256 operation. */
+void
+SHA256_Init(SHA256_CTX * ctx)
+{
+
+ /* Zero bits processed so far */
+ ctx->count = 0;
+
+ /* Magic initialization constants */
+ ctx->state[0] = 0x6A09E667;
+ ctx->state[1] = 0xBB67AE85;
+ ctx->state[2] = 0x3C6EF372;
+ ctx->state[3] = 0xA54FF53A;
+ ctx->state[4] = 0x510E527F;
+ ctx->state[5] = 0x9B05688C;
+ ctx->state[6] = 0x1F83D9AB;
+ ctx->state[7] = 0x5BE0CD19;
+}
+
+/* Add bytes into the hash */
+void
+SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
+{
+ uint64_t bitlen;
+ uint32_t r;
+ const unsigned char *src = in;
+
+ /* Number of bytes left in the buffer from previous updates */
+ r = (ctx->count >> 3) & 0x3f;
+
+ /* Convert the length into a number of bits */
+ bitlen = len << 3;
+
+ /* Update number of bits */
+ ctx->count += bitlen;
+
+ /* Handle the case where we don't need to perform any transforms */
+ if (len < 64 - r) {
+ memcpy(&ctx->buf[r], src, len);
+ return;
+ }
+
+ /* Finish the current block */
+ memcpy(&ctx->buf[r], src, 64 - r);
+ SHA256_Transform(ctx->state, ctx->buf);
+ src += 64 - r;
+ len -= 64 - r;
+
+ /* Perform complete blocks */
+ while (len >= 64) {
+ SHA256_Transform(ctx->state, src);
+ src += 64;
+ len -= 64;
+ }
+
+ /* Copy left over data into buffer */
+ memcpy(ctx->buf, src, len);
+}
+
+/*
+ * SHA-256 finalization. Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
+{
+
+ /* Add padding */
+ SHA256_Pad(ctx);
+
+ /* Write the hash */
+ be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
+
+ /* Clear the context state */
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+#ifdef WEAK_REFS
+/* When building libmd, provide weak references. Note: this is not
+ activated in the context of compiling these sources for internal
+ use in libcrypt.
+ */
+#undef SHA256_Init
+__weak_reference(_libmd_SHA256_Init, SHA256_Init);
+#undef SHA256_Update
+__weak_reference(_libmd_SHA256_Update, SHA256_Update);
+#undef SHA256_Final
+__weak_reference(_libmd_SHA256_Final, SHA256_Final);
+#undef SHA256_Transform
+__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
+#endif
diff --git a/freebsd/sys/crypto/sha2/sha384.h b/freebsd/sys/crypto/sha2/sha384.h
new file mode 100644
index 00000000..63dd948b
--- /dev/null
+++ b/freebsd/sys/crypto/sha2/sha384.h
@@ -0,0 +1,87 @@
+/*-
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA384_H_
+#define _SHA384_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define SHA384_BLOCK_LENGTH 128
+#define SHA384_DIGEST_LENGTH 48
+#define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA384Context {
+ uint64_t state[8];
+ uint64_t count[2];
+ uint8_t buf[SHA384_BLOCK_LENGTH];
+} SHA384_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+#ifndef SHA384_Init
+#define SHA384_Init _libmd_SHA384_Init
+#endif
+#ifndef SHA384_Update
+#define SHA384_Update _libmd_SHA384_Update
+#endif
+#ifndef SHA384_Final
+#define SHA384_Final _libmd_SHA384_Final
+#endif
+#ifndef SHA384_End
+#define SHA384_End _libmd_SHA384_End
+#endif
+#ifndef SHA384_File
+#define SHA384_File _libmd_SHA384_File
+#endif
+#ifndef SHA384_FileChunk
+#define SHA384_FileChunk _libmd_SHA384_FileChunk
+#endif
+#ifndef SHA384_Data
+#define SHA384_Data _libmd_SHA384_Data
+#endif
+
+#ifndef SHA384_version
+#define SHA384_version _libmd_SHA384_version
+#endif
+
+void SHA384_Init(SHA384_CTX *);
+void SHA384_Update(SHA384_CTX *, const void *, size_t);
+void SHA384_Final(unsigned char [static SHA384_DIGEST_LENGTH], SHA384_CTX *);
+#ifndef _KERNEL
+char *SHA384_End(SHA384_CTX *, char *);
+char *SHA384_Data(const void *, unsigned int, char *);
+char *SHA384_File(const char *, char *);
+char *SHA384_FileChunk(const char *, char *, off_t, off_t);
+#endif
+
+__END_DECLS
+
+#endif /* !_SHA384_H_ */
diff --git a/freebsd/sys/crypto/sha2/sha512.h b/freebsd/sys/crypto/sha2/sha512.h
new file mode 100644
index 00000000..b008aeae
--- /dev/null
+++ b/freebsd/sys/crypto/sha2/sha512.h
@@ -0,0 +1,90 @@
+/*-
+ * Copyright 2005 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA512_H_
+#define _SHA512_H_
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define SHA512_BLOCK_LENGTH 128
+#define SHA512_DIGEST_LENGTH 64
+#define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1)
+
+typedef struct SHA512Context {
+ uint64_t state[8];
+ uint64_t count[2];
+ uint8_t buf[SHA512_BLOCK_LENGTH];
+} SHA512_CTX;
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+#ifndef SHA512_Init
+#define SHA512_Init _libmd_SHA512_Init
+#endif
+#ifndef SHA512_Update
+#define SHA512_Update _libmd_SHA512_Update
+#endif
+#ifndef SHA512_Final
+#define SHA512_Final _libmd_SHA512_Final
+#endif
+#ifndef SHA512_End
+#define SHA512_End _libmd_SHA512_End
+#endif
+#ifndef SHA512_File
+#define SHA512_File _libmd_SHA512_File
+#endif
+#ifndef SHA512_FileChunk
+#define SHA512_FileChunk _libmd_SHA512_FileChunk
+#endif
+#ifndef SHA512_Data
+#define SHA512_Data _libmd_SHA512_Data
+#endif
+
+#ifndef SHA512_Transform
+#define SHA512_Transform _libmd_SHA512_Transform
+#endif
+#ifndef SHA512_version
+#define SHA512_version _libmd_SHA512_version
+#endif
+
+void SHA512_Init(SHA512_CTX *);
+void SHA512_Update(SHA512_CTX *, const void *, size_t);
+void SHA512_Final(unsigned char [static SHA512_DIGEST_LENGTH], SHA512_CTX *);
+#ifndef _KERNEL
+char *SHA512_End(SHA512_CTX *, char *);
+char *SHA512_Data(const void *, unsigned int, char *);
+char *SHA512_File(const char *, char *);
+char *SHA512_FileChunk(const char *, char *, off_t, off_t);
+#endif
+
+__END_DECLS
+
+#endif /* !_SHA512_H_ */
diff --git a/freebsd/sys/crypto/sha2/sha512c.c b/freebsd/sys/crypto/sha2/sha512c.c
new file mode 100644
index 00000000..7aa4bf52
--- /dev/null
+++ b/freebsd/sys/crypto/sha2/sha512c.c
@@ -0,0 +1,505 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright 2005 Colin Percival
+ * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include "sha512.h"
+#include "sha512t.h"
+#include "sha384.h"
+
+#if BYTE_ORDER == BIG_ENDIAN
+
+/* Copy a vector of big-endian uint64_t into a vector of bytes */
+#define be64enc_vect(dst, src, len) \
+ memcpy((void *)dst, (const void *)src, (size_t)len)
+
+/* Copy a vector of bytes into a vector of big-endian uint64_t */
+#define be64dec_vect(dst, src, len) \
+ memcpy((void *)dst, (const void *)src, (size_t)len)
+
+#else /* BYTE_ORDER != BIG_ENDIAN */
+
+/*
+ * Encode a length len/4 vector of (uint64_t) into a length len vector of
+ * (unsigned char) in big-endian form. Assumes len is a multiple of 8.
+ */
+static void
+be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len / 8; i++)
+ be64enc(dst + i * 8, src[i]);
+}
+
+/*
+ * Decode a big-endian length len vector of (unsigned char) into a length
+ * len/4 vector of (uint64_t). Assumes len is a multiple of 8.
+ */
+static void
+be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len / 8; i++)
+ dst[i] = be64dec(src + i * 8);
+}
+
+#endif /* BYTE_ORDER != BIG_ENDIAN */
+
+/* SHA512 round constants. */
+static const uint64_t K[80] = {
+ 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+ 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+ 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+ 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+ 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+ 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+ 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+ 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+ 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+ 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+ 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+ 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+ 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+ 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+ 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+ 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+ 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+ 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+ 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+ 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+ 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+ 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+ 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+ 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+ 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+ 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+ 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+ 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+ 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+ 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+ 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+ 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+ 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+ 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+ 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+ 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+ 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+ 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+ 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+ 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+/* Elementary functions used by SHA512 */
+#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z) ((x & (y | z)) | (y & z))
+#define SHR(x, n) (x >> n)
+#define ROTR(x, n) ((x >> n) | (x << (64 - n)))
+#define S0(x) (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
+#define S1(x) (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
+#define s0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
+#define s1(x) (ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6))
+
+/* SHA512 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+ h += S1(e) + Ch(e, f, g) + k; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c);
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i, ii) \
+ RND(S[(80 - i) % 8], S[(81 - i) % 8], \
+ S[(82 - i) % 8], S[(83 - i) % 8], \
+ S[(84 - i) % 8], S[(85 - i) % 8], \
+ S[(86 - i) % 8], S[(87 - i) % 8], \
+ W[i + ii] + K[i + ii])
+
+/* Message schedule computation */
+#define MSCH(W, ii, i) \
+ W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii]
+
+/*
+ * SHA512 block compression function. The 512-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void
+SHA512_Transform(uint64_t * state, const unsigned char block[SHA512_BLOCK_LENGTH])
+{
+ uint64_t W[80];
+ uint64_t S[8];
+ int i;
+
+ /* 1. Prepare the first part of the message schedule W. */
+ be64dec_vect(W, block, SHA512_BLOCK_LENGTH);
+
+ /* 2. Initialize working variables. */
+ memcpy(S, state, SHA512_DIGEST_LENGTH);
+
+ /* 3. Mix. */
+ for (i = 0; i < 80; i += 16) {
+ RNDr(S, W, 0, i);
+ RNDr(S, W, 1, i);
+ RNDr(S, W, 2, i);
+ RNDr(S, W, 3, i);
+ RNDr(S, W, 4, i);
+ RNDr(S, W, 5, i);
+ RNDr(S, W, 6, i);
+ RNDr(S, W, 7, i);
+ RNDr(S, W, 8, i);
+ RNDr(S, W, 9, i);
+ RNDr(S, W, 10, i);
+ RNDr(S, W, 11, i);
+ RNDr(S, W, 12, i);
+ RNDr(S, W, 13, i);
+ RNDr(S, W, 14, i);
+ RNDr(S, W, 15, i);
+
+ if (i == 64)
+ break;
+ MSCH(W, 0, i);
+ MSCH(W, 1, i);
+ MSCH(W, 2, i);
+ MSCH(W, 3, i);
+ MSCH(W, 4, i);
+ MSCH(W, 5, i);
+ MSCH(W, 6, i);
+ MSCH(W, 7, i);
+ MSCH(W, 8, i);
+ MSCH(W, 9, i);
+ MSCH(W, 10, i);
+ MSCH(W, 11, i);
+ MSCH(W, 12, i);
+ MSCH(W, 13, i);
+ MSCH(W, 14, i);
+ MSCH(W, 15, i);
+ }
+
+ /* 4. Mix local working variables into global state */
+ for (i = 0; i < 8; i++)
+ state[i] += S[i];
+}
+
+static unsigned char PAD[SHA512_BLOCK_LENGTH] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Add padding and terminating bit-count. */
+static void
+SHA512_Pad(SHA512_CTX * ctx)
+{
+ size_t r;
+
+ /* Figure out how many bytes we have buffered. */
+ r = (ctx->count[1] >> 3) & 0x7f;
+
+ /* Pad to 112 mod 128, transforming if we finish a block en route. */
+ if (r < 112) {
+ /* Pad to 112 mod 128. */
+ memcpy(&ctx->buf[r], PAD, 112 - r);
+ } else {
+ /* Finish the current block and mix. */
+ memcpy(&ctx->buf[r], PAD, 128 - r);
+ SHA512_Transform(ctx->state, ctx->buf);
+
+ /* The start of the final block is all zeroes. */
+ memset(&ctx->buf[0], 0, 112);
+ }
+
+ /* Add the terminating bit-count. */
+ be64enc_vect(&ctx->buf[112], ctx->count, 16);
+
+ /* Mix in the final block. */
+ SHA512_Transform(ctx->state, ctx->buf);
+}
+
+/* SHA-512 initialization. Begins a SHA-512 operation. */
+void
+SHA512_Init(SHA512_CTX * ctx)
+{
+
+ /* Zero bits processed so far */
+ ctx->count[0] = ctx->count[1] = 0;
+
+ /* Magic initialization constants */
+ ctx->state[0] = 0x6a09e667f3bcc908ULL;
+ ctx->state[1] = 0xbb67ae8584caa73bULL;
+ ctx->state[2] = 0x3c6ef372fe94f82bULL;
+ ctx->state[3] = 0xa54ff53a5f1d36f1ULL;
+ ctx->state[4] = 0x510e527fade682d1ULL;
+ ctx->state[5] = 0x9b05688c2b3e6c1fULL;
+ ctx->state[6] = 0x1f83d9abfb41bd6bULL;
+ ctx->state[7] = 0x5be0cd19137e2179ULL;
+}
+
+/* Add bytes into the hash */
+void
+SHA512_Update(SHA512_CTX * ctx, const void *in, size_t len)
+{
+ uint64_t bitlen[2];
+ uint64_t r;
+ const unsigned char *src = in;
+
+ /* Number of bytes left in the buffer from previous updates */
+ r = (ctx->count[1] >> 3) & 0x7f;
+
+ /* Convert the length into a number of bits */
+ bitlen[1] = ((uint64_t)len) << 3;
+ bitlen[0] = ((uint64_t)len) >> 61;
+
+ /* Update number of bits */
+ if ((ctx->count[1] += bitlen[1]) < bitlen[1])
+ ctx->count[0]++;
+ ctx->count[0] += bitlen[0];
+
+ /* Handle the case where we don't need to perform any transforms */
+ if (len < SHA512_BLOCK_LENGTH - r) {
+ memcpy(&ctx->buf[r], src, len);
+ return;
+ }
+
+ /* Finish the current block */
+ memcpy(&ctx->buf[r], src, SHA512_BLOCK_LENGTH - r);
+ SHA512_Transform(ctx->state, ctx->buf);
+ src += SHA512_BLOCK_LENGTH - r;
+ len -= SHA512_BLOCK_LENGTH - r;
+
+ /* Perform complete blocks */
+ while (len >= SHA512_BLOCK_LENGTH) {
+ SHA512_Transform(ctx->state, src);
+ src += SHA512_BLOCK_LENGTH;
+ len -= SHA512_BLOCK_LENGTH;
+ }
+
+ /* Copy left over data into buffer */
+ memcpy(ctx->buf, src, len);
+}
+
+/*
+ * SHA-512 finalization. Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx)
+{
+
+ /* Add padding */
+ SHA512_Pad(ctx);
+
+ /* Write the hash */
+ be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH);
+
+ /* Clear the context state */
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/*** SHA-512t: *********************************************************/
+/*
+ * the SHA512t transforms are identical to SHA512 so reuse the existing function
+ */
+void
+SHA512_224_Init(SHA512_CTX * ctx)
+{
+
+ /* Zero bits processed so far */
+ ctx->count[0] = ctx->count[1] = 0;
+
+ /* Magic initialization constants */
+ ctx->state[0] = 0x8c3d37c819544da2ULL;
+ ctx->state[1] = 0x73e1996689dcd4d6ULL;
+ ctx->state[2] = 0x1dfab7ae32ff9c82ULL;
+ ctx->state[3] = 0x679dd514582f9fcfULL;
+ ctx->state[4] = 0x0f6d2b697bd44da8ULL;
+ ctx->state[5] = 0x77e36f7304c48942ULL;
+ ctx->state[6] = 0x3f9d85a86a1d36c8ULL;
+ ctx->state[7] = 0x1112e6ad91d692a1ULL;
+}
+
+void
+SHA512_224_Update(SHA512_CTX * ctx, const void *in, size_t len)
+{
+
+ SHA512_Update(ctx, in, len);
+}
+
+void
+SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH], SHA512_CTX * ctx)
+{
+
+ /* Add padding */
+ SHA512_Pad(ctx);
+
+ /* Write the hash */
+ be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH);
+
+ /* Clear the context state */
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+void
+SHA512_256_Init(SHA512_CTX * ctx)
+{
+
+ /* Zero bits processed so far */
+ ctx->count[0] = ctx->count[1] = 0;
+
+ /* Magic initialization constants */
+ ctx->state[0] = 0x22312194fc2bf72cULL;
+ ctx->state[1] = 0x9f555fa3c84c64c2ULL;
+ ctx->state[2] = 0x2393b86b6f53b151ULL;
+ ctx->state[3] = 0x963877195940eabdULL;
+ ctx->state[4] = 0x96283ee2a88effe3ULL;
+ ctx->state[5] = 0xbe5e1e2553863992ULL;
+ ctx->state[6] = 0x2b0199fc2c85b8aaULL;
+ ctx->state[7] = 0x0eb72ddc81c52ca2ULL;
+}
+
+void
+SHA512_256_Update(SHA512_CTX * ctx, const void *in, size_t len)
+{
+
+ SHA512_Update(ctx, in, len);
+}
+
+void
+SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH], SHA512_CTX * ctx)
+{
+
+ /* Add padding */
+ SHA512_Pad(ctx);
+
+ /* Write the hash */
+ be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH);
+
+ /* Clear the context state */
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/*** SHA-384: *********************************************************/
+/*
+ * the SHA384 and SHA512 transforms are identical, so SHA384 is skipped
+ */
+
+/* SHA-384 initialization. Begins a SHA-384 operation. */
+void
+SHA384_Init(SHA384_CTX * ctx)
+{
+
+ /* Zero bits processed so far */
+ ctx->count[0] = ctx->count[1] = 0;
+
+ /* Magic initialization constants */
+ ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
+ ctx->state[1] = 0x629a292a367cd507ULL;
+ ctx->state[2] = 0x9159015a3070dd17ULL;
+ ctx->state[3] = 0x152fecd8f70e5939ULL;
+ ctx->state[4] = 0x67332667ffc00b31ULL;
+ ctx->state[5] = 0x8eb44a8768581511ULL;
+ ctx->state[6] = 0xdb0c2e0d64f98fa7ULL;
+ ctx->state[7] = 0x47b5481dbefa4fa4ULL;
+}
+
+/* Add bytes into the SHA-384 hash */
+void
+SHA384_Update(SHA384_CTX * ctx, const void *in, size_t len)
+{
+
+ SHA512_Update((SHA512_CTX *)ctx, in, len);
+}
+
+/*
+ * SHA-384 finalization. Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void
+SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx)
+{
+
+ /* Add padding */
+ SHA512_Pad((SHA512_CTX *)ctx);
+
+ /* Write the hash */
+ be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH);
+
+ /* Clear the context state */
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+#ifdef WEAK_REFS
+/* When building libmd, provide weak references. Note: this is not
+ activated in the context of compiling these sources for internal
+ use in libcrypt.
+ */
+#undef SHA512_Init
+__weak_reference(_libmd_SHA512_Init, SHA512_Init);
+#undef SHA512_Update
+__weak_reference(_libmd_SHA512_Update, SHA512_Update);
+#undef SHA512_Final
+__weak_reference(_libmd_SHA512_Final, SHA512_Final);
+#undef SHA512_Transform
+__weak_reference(_libmd_SHA512_Transform, SHA512_Transform);
+
+#undef SHA512_224_Init
+__weak_reference(_libmd_SHA512_224_Init, SHA512_224_Init);
+#undef SHA512_224_Update
+__weak_reference(_libmd_SHA512_224_Update, SHA512_224_Update);
+#undef SHA512_224_Final
+__weak_reference(_libmd_SHA512_224_Final, SHA512_224_Final);
+
+#undef SHA512_256_Init
+__weak_reference(_libmd_SHA512_256_Init, SHA512_256_Init);
+#undef SHA512_256_Update
+__weak_reference(_libmd_SHA512_256_Update, SHA512_256_Update);
+#undef SHA512_256_Final
+__weak_reference(_libmd_SHA512_256_Final, SHA512_256_Final);
+
+#undef SHA384_Init
+__weak_reference(_libmd_SHA384_Init, SHA384_Init);
+#undef SHA384_Update
+__weak_reference(_libmd_SHA384_Update, SHA384_Update);
+#undef SHA384_Final
+__weak_reference(_libmd_SHA384_Final, SHA384_Final);
+#endif
diff --git a/freebsd/sys/crypto/sha2/sha512t.h b/freebsd/sys/crypto/sha2/sha512t.h
new file mode 100644
index 00000000..3f0c921f
--- /dev/null
+++ b/freebsd/sys/crypto/sha2/sha512t.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SHA512T_H_
+#define _SHA512T_H_
+
+#include "sha512.h"
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#endif
+
+#define SHA512_224_DIGEST_LENGTH 28
+#define SHA512_224_DIGEST_STRING_LENGTH (SHA512_224_DIGEST_LENGTH * 2 + 1)
+#define SHA512_256_DIGEST_LENGTH 32
+#define SHA512_256_DIGEST_STRING_LENGTH (SHA512_256_DIGEST_LENGTH * 2 + 1)
+
+__BEGIN_DECLS
+
+/* Ensure libmd symbols do not clash with libcrypto */
+#ifndef SHA512_224_Init
+#define SHA512_224_Init _libmd_SHA512_224_Init
+#endif
+#ifndef SHA512_224_Update
+#define SHA512_224_Update _libmd_SHA512_224_Update
+#endif
+#ifndef SHA512_224_Final
+#define SHA512_224_Final _libmd_SHA512_224_Final
+#endif
+#ifndef SHA512_224_End
+#define SHA512_224_End _libmd_SHA512_224_End
+#endif
+#ifndef SHA512_224_File
+#define SHA512_224_File _libmd_SHA512_224_File
+#endif
+#ifndef SHA512_224_FileChunk
+#define SHA512_224_FileChunk _libmd_SHA512_224_FileChunk
+#endif
+#ifndef SHA512_224_Data
+#define SHA512_224_Data _libmd_SHA512_224_Data
+#endif
+
+#ifndef SHA512_224_Transform
+#define SHA512_224_Transform _libmd_SHA512_224_Transform
+#endif
+#ifndef SHA512_224_version
+#define SHA512_224_version _libmd_SHA512_224_version
+#endif
+
+#ifndef SHA512_256_Init
+#define SHA512_256_Init _libmd_SHA512_256_Init
+#endif
+#ifndef SHA512_256_Update
+#define SHA512_256_Update _libmd_SHA512_256_Update
+#endif
+#ifndef SHA512_256_Final
+#define SHA512_256_Final _libmd_SHA512_256_Final
+#endif
+#ifndef SHA512_256_End
+#define SHA512_256_End _libmd_SHA512_256_End
+#endif
+#ifndef SHA512_256_File
+#define SHA512_256_File _libmd_SHA512_256_File
+#endif
+#ifndef SHA512_256_FileChunk
+#define SHA512_256_FileChunk _libmd_SHA512_256_FileChunk
+#endif
+#ifndef SHA512_256_Data
+#define SHA512_256_Data _libmd_SHA512_256_Data
+#endif
+
+#ifndef SHA512_256_Transform
+#define SHA512_256_Transform _libmd_SHA512_256_Transform
+#endif
+#ifndef SHA512_256_version
+#define SHA512_256_version _libmd_SHA512_256_version
+#endif
+
+void SHA512_224_Init(SHA512_CTX *);
+void SHA512_224_Update(SHA512_CTX *, const void *, size_t);
+void SHA512_224_Final(unsigned char [static SHA512_224_DIGEST_LENGTH], SHA512_CTX *);
+#ifndef _KERNEL
+char *SHA512_224_End(SHA512_CTX *, char *);
+char *SHA512_224_Data(const void *, unsigned int, char *);
+char *SHA512_224_File(const char *, char *);
+char *SHA512_224_FileChunk(const char *, char *, off_t, off_t);
+#endif
+void SHA512_256_Init(SHA512_CTX *);
+void SHA512_256_Update(SHA512_CTX *, const void *, size_t);
+void SHA512_256_Final(unsigned char [static SHA512_256_DIGEST_LENGTH], SHA512_CTX *);
+#ifndef _KERNEL
+char *SHA512_256_End(SHA512_CTX *, char *);
+char *SHA512_256_Data(const void *, unsigned int, char *);
+char *SHA512_256_File(const char *, char *);
+char *SHA512_256_FileChunk(const char *, char *, off_t, off_t);
+#endif
+
+__END_DECLS
+
+#endif /* !_SHA512T_H_ */
diff --git a/freebsd/sys/crypto/siphash/siphash.c b/freebsd/sys/crypto/siphash/siphash.c
new file mode 100644
index 00000000..aaf69a01
--- /dev/null
+++ b/freebsd/sys/crypto/siphash/siphash.c
@@ -0,0 +1,244 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
+ * are the number of compression rounds and the number of finalization rounds.
+ * A compression round is identical to a finalization round and this round
+ * function is called SipRound. Given a 128-bit key k and a (possibly empty)
+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
+ *
+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
+ * https://131002.net/siphash/siphash.pdf
+ * https://131002.net/siphash/
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/libkern.h>
+#include <sys/endian.h>
+
+#include <crypto/siphash/siphash.h>
+
+static void SipRounds(SIPHASH_CTX *ctx, int final);
+
+void
+SipHash_InitX(SIPHASH_CTX *ctx, int rc, int rf)
+{
+
+ ctx->v[0] = 0x736f6d6570736575ull;
+ ctx->v[1] = 0x646f72616e646f6dull;
+ ctx->v[2] = 0x6c7967656e657261ull;
+ ctx->v[3] = 0x7465646279746573ull;
+ ctx->buf.b64 = 0;
+ ctx->bytes = 0;
+ ctx->buflen = 0;
+ ctx->rounds_compr = rc;
+ ctx->rounds_final = rf;
+ ctx->initialized = 1;
+}
+
+void
+SipHash_SetKey(SIPHASH_CTX *ctx, const uint8_t key[static SIPHASH_KEY_LENGTH])
+{
+ uint64_t k[2];
+
+ KASSERT(ctx->v[0] == 0x736f6d6570736575ull &&
+ ctx->initialized == 1,
+ ("%s: context %p not properly initialized", __func__, ctx));
+
+ k[0] = le64dec(&key[0]);
+ k[1] = le64dec(&key[8]);
+
+ ctx->v[0] ^= k[0];
+ ctx->v[1] ^= k[1];
+ ctx->v[2] ^= k[0];
+ ctx->v[3] ^= k[1];
+
+ ctx->initialized = 2;
+}
+
+static size_t
+SipBuf(SIPHASH_CTX *ctx, const uint8_t **src, size_t len, int final)
+{
+ size_t x = 0;
+
+ KASSERT((!final && len > 0) || (final && len == 0),
+ ("%s: invalid parameters", __func__));
+
+ if (!final) {
+ x = MIN(len, sizeof(ctx->buf.b64) - ctx->buflen);
+ bcopy(*src, &ctx->buf.b8[ctx->buflen], x);
+ ctx->buflen += x;
+ *src += x;
+ } else
+ ctx->buf.b8[7] = (uint8_t)ctx->bytes;
+
+ if (ctx->buflen == 8 || final) {
+ ctx->v[3] ^= le64toh(ctx->buf.b64);
+ SipRounds(ctx, 0);
+ ctx->v[0] ^= le64toh(ctx->buf.b64);
+ ctx->buf.b64 = 0;
+ ctx->buflen = 0;
+ }
+ return (x);
+}
+
+void
+SipHash_Update(SIPHASH_CTX *ctx, const void *src, size_t len)
+{
+ uint64_t m;
+ const uint64_t *p;
+ const uint8_t *s;
+ size_t rem;
+
+ KASSERT(ctx->initialized == 2,
+ ("%s: context %p not properly initialized", __func__, ctx));
+
+ s = src;
+ ctx->bytes += len;
+
+ /*
+ * Push length smaller than block size into buffer or
+ * fill up the buffer if there is already something
+ * in it.
+ */
+ if (ctx->buflen > 0 || len < 8)
+ len -= SipBuf(ctx, &s, len, 0);
+ if (len == 0)
+ return;
+
+ rem = len & 0x7;
+ len >>= 3;
+
+ /* Optimze for 64bit aligned/unaligned access. */
+ if (((uintptr_t)s & 0x7) == 0) {
+ for (p = (const uint64_t *)s; len > 0; len--, p++) {
+ m = le64toh(*p);
+ ctx->v[3] ^= m;
+ SipRounds(ctx, 0);
+ ctx->v[0] ^= m;
+ }
+ s = (const uint8_t *)p;
+ } else {
+ for (; len > 0; len--, s += 8) {
+ m = le64dec(s);
+ ctx->v[3] ^= m;
+ SipRounds(ctx, 0);
+ ctx->v[0] ^= m;
+ }
+ }
+
+ /* Push remainder into buffer. */
+ if (rem > 0)
+ (void)SipBuf(ctx, &s, rem, 0);
+}
+
+void
+SipHash_Final(uint8_t dst[static SIPHASH_DIGEST_LENGTH], SIPHASH_CTX *ctx)
+{
+ uint64_t r;
+
+ KASSERT(ctx->initialized == 2,
+ ("%s: context %p not properly initialized", __func__, ctx));
+
+ r = SipHash_End(ctx);
+ le64enc(dst, r);
+}
+
+uint64_t
+SipHash_End(SIPHASH_CTX *ctx)
+{
+ uint64_t r;
+
+ KASSERT(ctx->initialized == 2,
+ ("%s: context %p not properly initialized", __func__, ctx));
+
+ SipBuf(ctx, NULL, 0, 1);
+ ctx->v[2] ^= 0xff;
+ SipRounds(ctx, 1);
+ r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
+
+ bzero(ctx, sizeof(*ctx));
+ return (r);
+}
+
+uint64_t
+SipHashX(SIPHASH_CTX *ctx, int rc, int rf,
+ const uint8_t key[static SIPHASH_KEY_LENGTH], const void *src, size_t len)
+{
+
+ SipHash_InitX(ctx, rc, rf);
+ SipHash_SetKey(ctx, key);
+ SipHash_Update(ctx, src, len);
+
+ return (SipHash_End(ctx));
+}
+
+#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ( (x) >> (64 - (b))))
+
+static void
+SipRounds(SIPHASH_CTX *ctx, int final)
+{
+ int rounds;
+
+ if (!final)
+ rounds = ctx->rounds_compr;
+ else
+ rounds = ctx->rounds_final;
+
+ while (rounds--) {
+ ctx->v[0] += ctx->v[1];
+ ctx->v[2] += ctx->v[3];
+ ctx->v[1] = SIP_ROTL(ctx->v[1], 13);
+ ctx->v[3] = SIP_ROTL(ctx->v[3], 16);
+
+ ctx->v[1] ^= ctx->v[0];
+ ctx->v[3] ^= ctx->v[2];
+ ctx->v[0] = SIP_ROTL(ctx->v[0], 32);
+
+ ctx->v[2] += ctx->v[1];
+ ctx->v[0] += ctx->v[3];
+ ctx->v[1] = SIP_ROTL(ctx->v[1], 17);
+ ctx->v[3] = SIP_ROTL(ctx->v[3], 21);
+
+ ctx->v[1] ^= ctx->v[2];
+ ctx->v[3] ^= ctx->v[0];
+ ctx->v[2] = SIP_ROTL(ctx->v[2], 32);
+ }
+}
+
diff --git a/freebsd/sys/crypto/siphash/siphash.h b/freebsd/sys/crypto/siphash/siphash.h
new file mode 100644
index 00000000..8bbda4f3
--- /dev/null
+++ b/freebsd/sys/crypto/siphash/siphash.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
+ * optimized for speed on short messages returning a 64bit hash/digest value.
+ *
+ * The number of rounds is defined during the initialization:
+ * SipHash24_Init() for the fast and resonable strong version
+ * SipHash48_Init() for the strong version (half as fast)
+ *
+ * struct SIPHASH_CTX ctx;
+ * SipHash24_Init(&ctx);
+ * SipHash_SetKey(&ctx, "16bytes long key");
+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
+ * SipHash_Final(output, &ctx);
+ */
+
+#ifndef _SIPHASH_H_
+#define _SIPHASH_H_
+
+#define SIPHASH_BLOCK_LENGTH 8
+#define SIPHASH_KEY_LENGTH 16
+#define SIPHASH_DIGEST_LENGTH 8
+
+typedef struct _SIPHASH_CTX {
+ uint64_t v[4];
+ union {
+ uint64_t b64;
+ uint8_t b8[8];
+ } buf;
+ uint64_t bytes;
+ uint8_t buflen;
+ uint8_t rounds_compr;
+ uint8_t rounds_final;
+ uint8_t initialized;
+} SIPHASH_CTX;
+
+
+#define SipHash24_Init(x) SipHash_InitX((x), 2, 4)
+#define SipHash48_Init(x) SipHash_InitX((x), 4, 8)
+void SipHash_InitX(SIPHASH_CTX *, int, int);
+void SipHash_SetKey(SIPHASH_CTX *, const uint8_t[static SIPHASH_KEY_LENGTH]);
+void SipHash_Update(SIPHASH_CTX *, const void *, size_t);
+void SipHash_Final(uint8_t[static SIPHASH_DIGEST_LENGTH], SIPHASH_CTX *);
+uint64_t SipHash_End(SIPHASH_CTX *);
+
+#define SipHash24(x, y, z, i) SipHashX((x), 2, 4, (y), (z), (i));
+#define SipHash48(x, y, z, i) SipHashX((x), 4, 8, (y), (z), (i));
+uint64_t SipHashX(SIPHASH_CTX *, int, int, const uint8_t[static SIPHASH_KEY_LENGTH], const void *,
+ size_t);
+
+int SipHash24_TestVectors(void);
+
+#endif /* _SIPHASH_H_ */
diff --git a/freebsd/sys/crypto/skein/skein.c b/freebsd/sys/crypto/skein/skein.c
new file mode 100644
index 00000000..c13f9ad4
--- /dev/null
+++ b/freebsd/sys/crypto/skein/skein.c
@@ -0,0 +1,860 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/***********************************************************************
+**
+** Implementation of the Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+#include <sys/types.h>
+
+/* get the memcpy/memset functions */
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
+
+#include "skein.h" /* get the Skein API definitions */
+#include "skein_iv.h" /* get precomputed IVs */
+
+/*****************************************************************/
+/* External function to process blkCnt (nonzero) full block(s) of data. */
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+
+/*****************************************************************/
+/* 256-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN_256_STATE_BYTES];
+ u64b_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 256: memcpy(ctx->X,SKEIN_256_IV_256,sizeof(ctx->X)); break;
+ case 224: memcpy(ctx->X,SKEIN_256_IV_224,sizeof(ctx->X)); break;
+ case 160: memcpy(ctx->X,SKEIN_256_IV_160,sizeof(ctx->X)); break;
+ case 128: memcpy(ctx->X,SKEIN_256_IV_128,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN_256_STATE_BYTES];
+ u64b_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein_256_Update(ctx,key,keyBytes); /* hash the key */
+ Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;i<SKEIN_256_STATE_WORDS;i++) /* convert key bytes to context words */
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(256,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
+ Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN_256_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */
+ Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
+ msg += n * SKEIN_256_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_256_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_256_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_256_API_CodeSize) -
+ ((u08b_t *) Skein_256_Init);
+ }
+#endif
+
+/*****************************************************************/
+/* 512-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN_512_STATE_BYTES];
+ u64b_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512: memcpy(ctx->X,SKEIN_512_IV_512,sizeof(ctx->X)); break;
+ case 384: memcpy(ctx->X,SKEIN_512_IV_384,sizeof(ctx->X)); break;
+ case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break;
+ case 224: memcpy(ctx->X,SKEIN_512_IV_224,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN_512_STATE_BYTES];
+ u64b_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein_512_Update(ctx,key,keyBytes); /* hash the key */
+ Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;i<SKEIN_512_STATE_WORDS;i++) /* convert key bytes to context words */
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(512,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+ Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */
+ Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+ msg += n * SKEIN_512_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_512_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_512_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_512_API_CodeSize) -
+ ((u08b_t *) Skein_512_Init);
+ }
+#endif
+
+/*****************************************************************/
+/* 1024-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN1024_STATE_BYTES];
+ u64b_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512: memcpy(ctx->X,SKEIN1024_IV_512 ,sizeof(ctx->X)); break;
+ case 384: memcpy(ctx->X,SKEIN1024_IV_384 ,sizeof(ctx->X)); break;
+ case 1024: memcpy(ctx->X,SKEIN1024_IV_1024,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN1024_STATE_BYTES];
+ u64b_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein1024_Update(ctx,key,keyBytes); /* hash the key */
+ Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;i<SKEIN1024_STATE_WORDS;i++) /* convert key bytes to context words */
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(1024,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
+ Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN1024_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */
+ Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
+ msg += n * SKEIN1024_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN1024_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein1024_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein1024_API_CodeSize) -
+ ((u08b_t *) Skein1024_Init);
+ }
+#endif
+
+/**************** Functions to support MAC/tree hashing ***************/
+/* (this code is identical for Optimized and Reference versions) */
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+ Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+ Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+ Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+#if SKEIN_TREE_HASH
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_256_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_512_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN1024_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+
+/* Adapt the functions to match the prototype expected by libmd */
+void
+SKEIN256_Init(SKEIN256_CTX * ctx)
+{
+
+ Skein_256_Init(ctx, 256);
+}
+
+void
+SKEIN512_Init(SKEIN512_CTX * ctx)
+{
+
+ Skein_512_Init(ctx, 512);
+}
+
+void
+SKEIN1024_Init(SKEIN1024_CTX * ctx)
+{
+
+ Skein1024_Init(ctx, 1024);
+}
+
+void
+SKEIN256_Update(SKEIN256_CTX * ctx, const void *in, size_t len)
+{
+
+ Skein_256_Update(ctx, in, len);
+}
+
+void
+SKEIN512_Update(SKEIN512_CTX * ctx, const void *in, size_t len)
+{
+
+ Skein_512_Update(ctx, in, len);
+}
+
+void
+SKEIN1024_Update(SKEIN1024_CTX * ctx, const void *in, size_t len)
+{
+
+ Skein1024_Update(ctx, in, len);
+}
+
+void
+SKEIN256_Final(unsigned char digest[static SKEIN_256_BLOCK_BYTES], SKEIN256_CTX *ctx)
+{
+
+ Skein_256_Final(ctx, digest);
+}
+
+void
+SKEIN512_Final(unsigned char digest[static SKEIN_512_BLOCK_BYTES], SKEIN512_CTX *ctx)
+{
+
+ Skein_512_Final(ctx, digest);
+}
+
+void
+SKEIN1024_Final(unsigned char digest[static SKEIN1024_BLOCK_BYTES], SKEIN1024_CTX *ctx)
+{
+
+ Skein1024_Final(ctx, digest);
+}
+
+#ifdef WEAK_REFS
+/* When building libmd, provide weak references. Note: this is not
+ activated in the context of compiling these sources for internal
+ use in libcrypt.
+ */
+#undef SKEIN256_Init
+__weak_reference(_libmd_SKEIN256_Init, SKEIN256_Init);
+#undef SKEIN256_Update
+__weak_reference(_libmd_SKEIN256_Update, SKEIN256_Update);
+#undef SKEIN256_Final
+__weak_reference(_libmd_SKEIN256_Final, SKEIN256_Final);
+
+#undef SKEIN512_Init
+__weak_reference(_libmd_SKEIN512_Init, SKEIN512_Init);
+#undef SKEIN512_Update
+__weak_reference(_libmd_SKEIN512_Update, SKEIN512_Update);
+#undef SKEIN512_Final
+__weak_reference(_libmd_SKEIN512_Final, SKEIN512_Final);
+
+#undef SKEIN1024_Init
+__weak_reference(_libmd_SKEIN1024_Init, SKEIN1024_Init);
+#undef SKEIN1024_Update
+__weak_reference(_libmd_SKEIN1024_Update, SKEIN1024_Update);
+#undef SKEIN1024_Final
+__weak_reference(_libmd_SKEIN1024_Final, SKEIN1024_Final);
+#endif
+
+#endif
diff --git a/freebsd/sys/crypto/skein/skein.h b/freebsd/sys/crypto/skein/skein.h
new file mode 100644
index 00000000..ca919aca
--- /dev/null
+++ b/freebsd/sys/crypto/skein/skein.h
@@ -0,0 +1,333 @@
+/* $FreeBSD$ */
+#ifndef _SKEIN_H_
+#define _SKEIN_H_ 1
+/**************************************************************************
+**
+** Interface declarations and internal definitions for Skein hashing.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+***************************************************************************
+**
+** The following compile-time switches may be defined to control some
+** tradeoffs between speed, code size, error checking, and security.
+**
+** The "default" note explains what happens when the switch is not defined.
+**
+** SKEIN_DEBUG -- make callouts from inside Skein code
+** to examine/display intermediate values.
+** [default: no callouts (no overhead)]
+**
+** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
+** code. If not defined, most error checking
+** is disabled (for performance). Otherwise,
+** the switch value is interpreted as:
+** 0: use assert() to flag errors
+** 1: return SKEIN_FAIL to flag errors
+**
+***************************************************************************/
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifndef _KERNEL
+#include <stddef.h> /* get size_t definition */
+#endif
+#include "skein_port.h" /* get platform-specific definitions */
+
+enum
+ {
+ SKEIN_SUCCESS = 0, /* return codes from Skein calls */
+ SKEIN_FAIL = 1,
+ SKEIN_BAD_HASHLEN = 2
+ };
+
+#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */
+
+#define SKEIN_256_STATE_WORDS ( 4)
+#define SKEIN_512_STATE_WORDS ( 8)
+#define SKEIN1024_STATE_WORDS (16)
+#define SKEIN_MAX_STATE_WORDS (16)
+
+#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+typedef struct
+ {
+ size_t hashBitLen; /* size of hash result, in bits */
+ size_t bCnt; /* current byte count in buffer b[] */
+ u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */
+ } Skein_Ctxt_Hdr_t;
+
+typedef struct /* 256-bit Skein hash context structure */
+ {
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+ } Skein_256_Ctxt_t;
+
+typedef struct /* 512-bit Skein hash context structure */
+ {
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+ } Skein_512_Ctxt_t;
+
+typedef struct /* 1024-bit Skein hash context structure */
+ {
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+ } Skein1024_Ctxt_t;
+
+/* Skein APIs for (incremental) "straight hashing" */
+int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen);
+int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen);
+int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen);
+
+int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+
+int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+
+/*
+** Skein APIs for "extended" initialization: MAC keys, tree hashing.
+** After an InitExt() call, just use Update/Final calls as with Init().
+**
+** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes.
+** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL,
+** the results of InitExt() are identical to calling Init().
+** The function Init() may be called once to "precompute" the IV for
+** a given hashBitLen value, then by saving a copy of the context
+** the IV computation may be avoided in later calls.
+** Similarly, the function InitExt() may be called once per MAC key
+** to precompute the MAC IV, then a copy of the context saved and
+** reused for each new MAC computation.
+**/
+int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+
+/*
+** Skein APIs for MAC and tree hash:
+** Final_Pad: pad, do final block, but no OUTPUT type
+** Output: do just the output stage
+*/
+int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+
+#ifndef SKEIN_TREE_HASH
+#define SKEIN_TREE_HASH (1)
+#endif
+#if SKEIN_TREE_HASH
+int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+#endif
+
+/*****************************************************************
+** "Internal" Skein definitions
+** -- not needed for sequential hashing API, but will be
+** helpful for other uses of Skein (e.g., tree hash mode).
+** -- included here so that they can be shared between
+** reference and optimized code.
+******************************************************************/
+
+/* tweak word T[1]: bit field starting positions */
+#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */
+
+#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */
+#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */
+#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */
+#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */
+#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */
+
+/* tweak word T[1]: flag bit definition(s) */
+#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST)
+#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL)
+#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD)
+
+/* tweak word T[1]: tree level bit field mask */
+#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
+#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL)
+
+/* tweak word T[1]: block type field */
+#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */
+#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */
+#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */
+#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */
+#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */
+#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */
+#define SKEIN_BLK_TYPE_MSG (48) /* message processing */
+#define SKEIN_BLK_TYPE_OUT (63) /* output stage */
+#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */
+
+#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
+#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */
+#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */
+#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */
+#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */
+#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */
+#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */
+#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */
+#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */
+#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */
+
+#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
+#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
+
+#define SKEIN_VERSION (1)
+
+#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */
+#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/
+#endif
+
+#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32))
+#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE)
+#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
+
+#define SKEIN_CFG_STR_LEN (4*8)
+
+/* bit field definitions in config block treeInfo word */
+#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0)
+#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8)
+#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16)
+
+#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
+#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
+#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
+
+#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \
+ ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
+ (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \
+ (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) )
+
+#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */
+
+/*
+** Skein macros for getting/setting tweak words, etc.
+** These are useful for partial input bytes, hash tree init/update, etc.
+**/
+#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM])
+#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);}
+
+#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0)
+#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1)
+#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0)
+#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1)
+
+/* set both tweak words at once */
+#define Skein_Set_T0_T1(ctxPtr,T0,T1) \
+ { \
+ Skein_Set_T0(ctxPtr,(T0)); \
+ Skein_Set_T1(ctxPtr,(T1)); \
+ }
+
+#define Skein_Set_Type(ctxPtr,BLK_TYPE) \
+ Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+
+/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */
+#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \
+ { Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
+
+#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; }
+#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; }
+
+#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);}
+
+/*****************************************************************
+** "Internal" Skein definitions for debugging and error checking
+******************************************************************/
+#ifdef SKEIN_DEBUG /* examine/display intermediate values? */
+#include "skein_debug.h"
+#else /* default is no callouts */
+#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr)
+#define Skein_Show_Round(bits,ctx,r,X)
+#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr)
+#define Skein_Show_Final(bits,ctx,cnt,outPtr)
+#define Skein_Show_Key(bits,ctx,key,keyBytes)
+#endif
+
+#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */
+#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
+#define Skein_assert(x)
+#elif defined(SKEIN_ASSERT)
+#include <assert.h>
+#define Skein_Assert(x,retCode) assert(x)
+#define Skein_assert(x) assert(x)
+#else
+#include <assert.h>
+#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */
+#define Skein_assert(x) assert(x) /* internal error */
+#endif
+
+/*****************************************************************
+** Skein block function constants (shared across Ref and Opt code)
+******************************************************************/
+enum
+ {
+ /* Skein_256 round rotation constants */
+ R_256_0_0=14, R_256_0_1=16,
+ R_256_1_0=52, R_256_1_1=57,
+ R_256_2_0=23, R_256_2_1=40,
+ R_256_3_0= 5, R_256_3_1=37,
+ R_256_4_0=25, R_256_4_1=33,
+ R_256_5_0=46, R_256_5_1=12,
+ R_256_6_0=58, R_256_6_1=22,
+ R_256_7_0=32, R_256_7_1=32,
+
+ /* Skein_512 round rotation constants */
+ R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
+ R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
+ R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39,
+ R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56,
+ R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24,
+ R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17,
+ R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43,
+ R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22,
+
+ /* Skein1024 round rotation constants */
+ R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37,
+ R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52,
+ R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17,
+ R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25,
+ R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30,
+ R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41,
+ R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25,
+ R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20
+ };
+
+#ifndef SKEIN_ROUNDS
+#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */
+#define SKEIN_512_ROUNDS_TOTAL (72)
+#define SKEIN1024_ROUNDS_TOTAL (80)
+#else /* allow command-line define in range 8*(5..14) */
+#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5))
+#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5))
+#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5))
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Pull in FreeBSD specific shims */
+#include "skein_freebsd.h"
+
+#endif /* ifndef _SKEIN_H_ */
diff --git a/freebsd/sys/crypto/skein/skein_block.c b/freebsd/sys/crypto/skein/skein_block.c
new file mode 100644
index 00000000..831b45aa
--- /dev/null
+++ b/freebsd/sys/crypto/skein/skein_block.c
@@ -0,0 +1,708 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/***********************************************************************
+**
+** Implementation of the Skein block functions.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+** Compile-time switches:
+**
+** SKEIN_USE_ASM -- set bits (256/512/1024) to select which
+** versions use ASM code for block processing
+** [default: use C for all block sizes]
+**
+************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/endian.h>
+#include <sys/types.h>
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <string.h>
+#endif
+
+#include "skein.h"
+
+#ifndef SKEIN_USE_ASM
+#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
+#endif
+
+#ifndef SKEIN_LOOP
+#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
+#endif
+
+#define BLK_BITS (WCNT*64) /* some useful definitions for code here */
+#define KW_TWK_BASE (0)
+#define KW_KEY_BASE (3)
+#define ks (kw + KW_KEY_BASE)
+#define ts (kw + KW_TWK_BASE)
+
+#ifdef SKEIN_DEBUG
+#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
+#else
+#define DebugSaveTweak(ctx)
+#endif
+
+/*****************************************************************/
+/* functions to process blkCnt (nonzero) full block(s) of data. */
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd);
+
+/***************************** Skein_256 ******************************/
+#if !(SKEIN_USE_ASM & 256)
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C */
+ enum
+ {
+ WCNT = SKEIN_256_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_256_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10)
+#else
+#define SKEIN_UNROLL_256 (0)
+#endif
+
+#if SKEIN_UNROLL_256
+#if (RCNT % SKEIN_UNROLL_256)
+#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+ u64b_t X0,X1,X2,X3; /* local copy of context vars, for speed */
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
+#endif
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1] + ts[0];
+ X2 = w[2] + ks[2] + ts[1];
+ X3 = w[3] + ks[3];
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); /* show starting state values */
+
+ blkPtr += SKEIN_256_BLOCK_BYTES;
+
+ /* run the rounds */
+
+#define Round256(p0,p1,p2,p3,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+
+#if SKEIN_UNROLL_256 == 0
+#define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \
+ Round256(p0,p1,p2,p3,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
+
+#define I256(R) \
+ X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \
+ X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \
+ X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \
+ X3 += ks[((R)+4) % 5] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R256(p0,p1,p2,p3,ROT,rNum) \
+ Round256(p0,p1,p2,p3,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
+
+#define I256(R) \
+ X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
+ X1 += ks[r+(R)+1] + ts[r+(R)+0]; \
+ X2 += ks[r+(R)+2] + ts[r+(R)+1]; \
+ X3 += ks[r+(R)+3] + r+(R) ; \
+ ks[r + (R)+4 ] = ks[r+(R)-1]; /* rotate key schedule */\
+ ts[r + (R)+2 ] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */
+#endif
+ {
+#define R256_8_rounds(R) \
+ R256(0,1,2,3,R_256_0,8*(R) + 1); \
+ R256(0,3,2,1,R_256_1,8*(R) + 2); \
+ R256(0,1,2,3,R_256_2,8*(R) + 3); \
+ R256(0,3,2,1,R_256_3,8*(R) + 4); \
+ I256(2*(R)); \
+ R256(0,1,2,3,R_256_4,8*(R) + 5); \
+ R256(0,3,2,1,R_256_5,8*(R) + 6); \
+ R256(0,1,2,3,R_256_6,8*(R) + 7); \
+ R256(0,3,2,1,R_256_7,8*(R) + 8); \
+ I256(2*(R)+1);
+
+ R256_8_rounds( 0);
+
+#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN)))
+
+ #if R256_Unroll_R( 1)
+ R256_8_rounds( 1);
+ #endif
+ #if R256_Unroll_R( 2)
+ R256_8_rounds( 2);
+ #endif
+ #if R256_Unroll_R( 3)
+ R256_8_rounds( 3);
+ #endif
+ #if R256_Unroll_R( 4)
+ R256_8_rounds( 4);
+ #endif
+ #if R256_Unroll_R( 5)
+ R256_8_rounds( 5);
+ #endif
+ #if R256_Unroll_R( 6)
+ R256_8_rounds( 6);
+ #endif
+ #if R256_Unroll_R( 7)
+ R256_8_rounds( 7);
+ #endif
+ #if R256_Unroll_R( 8)
+ R256_8_rounds( 8);
+ #endif
+ #if R256_Unroll_R( 9)
+ R256_8_rounds( 9);
+ #endif
+ #if R256_Unroll_R(10)
+ R256_8_rounds(10);
+ #endif
+ #if R256_Unroll_R(11)
+ R256_8_rounds(11);
+ #endif
+ #if R256_Unroll_R(12)
+ R256_8_rounds(12);
+ #endif
+ #if R256_Unroll_R(13)
+ R256_8_rounds(13);
+ #endif
+ #if R256_Unroll_R(14)
+ R256_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_256 > 14)
+#error "need more unrolling in Skein_256_Process_Block"
+ #endif
+ }
+ /* do the final "feedforward" xor, update context chaining vars */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_256_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_256_Process_Block_CodeSize) -
+ ((u08b_t *) Skein_256_Process_Block);
+ }
+uint_t Skein_256_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_256;
+ }
+#endif
+#endif
+
+/***************************** Skein_512 ******************************/
+#if !(SKEIN_USE_ASM & 512)
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C */
+ enum
+ {
+ WCNT = SKEIN_512_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_512_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
+#else
+#define SKEIN_UNROLL_512 (0)
+#endif
+
+#if SKEIN_UNROLL_512
+#if (RCNT % SKEIN_UNROLL_512)
+#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+ u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
+ Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1];
+ X2 = w[2] + ks[2];
+ X3 = w[3] + ks[3];
+ X4 = w[4] + ks[4];
+ X5 = w[5] + ks[5] + ts[0];
+ X6 = w[6] + ks[6] + ts[1];
+ X7 = w[7] + ks[7];
+
+ blkPtr += SKEIN_512_BLOCK_BYTES;
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
+ /* run the rounds */
+#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
+
+#if SKEIN_UNROLL_512 == 0
+#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \
+ Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
+
+#define I512(R) \
+ X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \
+ X1 += ks[((R)+2) % 9]; \
+ X2 += ks[((R)+3) % 9]; \
+ X3 += ks[((R)+4) % 9]; \
+ X4 += ks[((R)+5) % 9]; \
+ X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \
+ X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \
+ X7 += ks[((R)+8) % 9] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
+
+#define I512(R) \
+ X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
+ X1 += ks[r+(R)+1]; \
+ X2 += ks[r+(R)+2]; \
+ X3 += ks[r+(R)+3]; \
+ X4 += ks[r+(R)+4]; \
+ X5 += ks[r+(R)+5] + ts[r+(R)+0]; \
+ X6 += ks[r+(R)+6] + ts[r+(R)+1]; \
+ X7 += ks[r+(R)+7] + r+(R) ; \
+ ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \
+ ts[r + (R)+2] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */
+#endif /* end of looped code definitions */
+ {
+#define R512_8_rounds(R) /* do 8 full rounds */ \
+ R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \
+ R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \
+ R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \
+ R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \
+ I512(2*(R)); \
+ R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \
+ R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \
+ R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \
+ R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \
+ I512(2*(R)+1); /* and key injection */
+
+ R512_8_rounds( 0);
+
+#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
+
+ #if R512_Unroll_R( 1)
+ R512_8_rounds( 1);
+ #endif
+ #if R512_Unroll_R( 2)
+ R512_8_rounds( 2);
+ #endif
+ #if R512_Unroll_R( 3)
+ R512_8_rounds( 3);
+ #endif
+ #if R512_Unroll_R( 4)
+ R512_8_rounds( 4);
+ #endif
+ #if R512_Unroll_R( 5)
+ R512_8_rounds( 5);
+ #endif
+ #if R512_Unroll_R( 6)
+ R512_8_rounds( 6);
+ #endif
+ #if R512_Unroll_R( 7)
+ R512_8_rounds( 7);
+ #endif
+ #if R512_Unroll_R( 8)
+ R512_8_rounds( 8);
+ #endif
+ #if R512_Unroll_R( 9)
+ R512_8_rounds( 9);
+ #endif
+ #if R512_Unroll_R(10)
+ R512_8_rounds(10);
+ #endif
+ #if R512_Unroll_R(11)
+ R512_8_rounds(11);
+ #endif
+ #if R512_Unroll_R(12)
+ R512_8_rounds(12);
+ #endif
+ #if R512_Unroll_R(13)
+ R512_8_rounds(13);
+ #endif
+ #if R512_Unroll_R(14)
+ R512_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_512 > 14)
+#error "need more unrolling in Skein_512_Process_Block"
+ #endif
+ }
+
+ /* do the final "feedforward" xor, update context chaining vars */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+ ctx->X[4] = X4 ^ w[4];
+ ctx->X[5] = X5 ^ w[5];
+ ctx->X[6] = X6 ^ w[6];
+ ctx->X[7] = X7 ^ w[7];
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein_512_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_512_Process_Block_CodeSize) -
+ ((u08b_t *) Skein_512_Process_Block);
+ }
+uint_t Skein_512_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_512;
+ }
+#endif
+#endif
+
+/***************************** Skein1024 ******************************/
+#if !(SKEIN_USE_ASM & 1024)
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C, always looping (unrolled is bigger AND slower!) */
+ enum
+ {
+ WCNT = SKEIN1024_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
+#else
+#define SKEIN_UNROLL_1024 (0)
+#endif
+
+#if (SKEIN_UNROLL_1024 != 0)
+#if (RCNT % SKEIN_UNROLL_1024)
+#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+
+ u64b_t X00,X01,X02,X03,X04,X05,X06,X07, /* local copy of vars, for speed */
+ X08,X09,X10,X11,X12,X13,X14,X15;
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[ 0] = &X00; Xptr[ 1] = &X01; Xptr[ 2] = &X02; Xptr[ 3] = &X03;
+ Xptr[ 4] = &X04; Xptr[ 5] = &X05; Xptr[ 6] = &X06; Xptr[ 7] = &X07;
+ Xptr[ 8] = &X08; Xptr[ 9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11;
+ Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[ 0] = ctx->X[ 0];
+ ks[ 1] = ctx->X[ 1];
+ ks[ 2] = ctx->X[ 2];
+ ks[ 3] = ctx->X[ 3];
+ ks[ 4] = ctx->X[ 4];
+ ks[ 5] = ctx->X[ 5];
+ ks[ 6] = ctx->X[ 6];
+ ks[ 7] = ctx->X[ 7];
+ ks[ 8] = ctx->X[ 8];
+ ks[ 9] = ctx->X[ 9];
+ ks[10] = ctx->X[10];
+ ks[11] = ctx->X[11];
+ ks[12] = ctx->X[12];
+ ks[13] = ctx->X[13];
+ ks[14] = ctx->X[14];
+ ks[15] = ctx->X[15];
+ ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^
+ ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^
+ ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^
+ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X00 = w[ 0] + ks[ 0]; /* do the first full key injection */
+ X01 = w[ 1] + ks[ 1];
+ X02 = w[ 2] + ks[ 2];
+ X03 = w[ 3] + ks[ 3];
+ X04 = w[ 4] + ks[ 4];
+ X05 = w[ 5] + ks[ 5];
+ X06 = w[ 6] + ks[ 6];
+ X07 = w[ 7] + ks[ 7];
+ X08 = w[ 8] + ks[ 8];
+ X09 = w[ 9] + ks[ 9];
+ X10 = w[10] + ks[10];
+ X11 = w[11] + ks[11];
+ X12 = w[12] + ks[12];
+ X13 = w[13] + ks[13] + ts[0];
+ X14 = w[14] + ks[14] + ts[1];
+ X15 = w[15] + ks[15];
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
+
+#define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
+ X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8; \
+ X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA; \
+ X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \
+ X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \
+
+#if SKEIN_UNROLL_1024 == 0
+#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr);
+
+#define I1024(R) \
+ X00 += ks[((R)+ 1) % 17]; /* inject the key schedule value */ \
+ X01 += ks[((R)+ 2) % 17]; \
+ X02 += ks[((R)+ 3) % 17]; \
+ X03 += ks[((R)+ 4) % 17]; \
+ X04 += ks[((R)+ 5) % 17]; \
+ X05 += ks[((R)+ 6) % 17]; \
+ X06 += ks[((R)+ 7) % 17]; \
+ X07 += ks[((R)+ 8) % 17]; \
+ X08 += ks[((R)+ 9) % 17]; \
+ X09 += ks[((R)+10) % 17]; \
+ X10 += ks[((R)+11) % 17]; \
+ X11 += ks[((R)+12) % 17]; \
+ X12 += ks[((R)+13) % 17]; \
+ X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \
+ X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \
+ X15 += ks[((R)+16) % 17] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr);
+
+#define I1024(R) \
+ X00 += ks[r+(R)+ 0]; /* inject the key schedule value */ \
+ X01 += ks[r+(R)+ 1]; \
+ X02 += ks[r+(R)+ 2]; \
+ X03 += ks[r+(R)+ 3]; \
+ X04 += ks[r+(R)+ 4]; \
+ X05 += ks[r+(R)+ 5]; \
+ X06 += ks[r+(R)+ 6]; \
+ X07 += ks[r+(R)+ 7]; \
+ X08 += ks[r+(R)+ 8]; \
+ X09 += ks[r+(R)+ 9]; \
+ X10 += ks[r+(R)+10]; \
+ X11 += ks[r+(R)+11]; \
+ X12 += ks[r+(R)+12]; \
+ X13 += ks[r+(R)+13] + ts[r+(R)+0]; \
+ X14 += ks[r+(R)+14] + ts[r+(R)+1]; \
+ X15 += ks[r+(R)+15] + r+(R) ; \
+ ks[r + (R)+16] = ks[r+(R)-1]; /* rotate key schedule */ \
+ ts[r + (R)+ 2] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */
+#endif
+ {
+#define R1024_8_rounds(R) /* do 8 full rounds */ \
+ R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \
+ R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \
+ R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \
+ R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \
+ I1024(2*(R)); \
+ R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \
+ R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \
+ R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \
+ R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \
+ I1024(2*(R)+1);
+
+ R1024_8_rounds( 0);
+
+#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN)))
+
+ #if R1024_Unroll_R( 1)
+ R1024_8_rounds( 1);
+ #endif
+ #if R1024_Unroll_R( 2)
+ R1024_8_rounds( 2);
+ #endif
+ #if R1024_Unroll_R( 3)
+ R1024_8_rounds( 3);
+ #endif
+ #if R1024_Unroll_R( 4)
+ R1024_8_rounds( 4);
+ #endif
+ #if R1024_Unroll_R( 5)
+ R1024_8_rounds( 5);
+ #endif
+ #if R1024_Unroll_R( 6)
+ R1024_8_rounds( 6);
+ #endif
+ #if R1024_Unroll_R( 7)
+ R1024_8_rounds( 7);
+ #endif
+ #if R1024_Unroll_R( 8)
+ R1024_8_rounds( 8);
+ #endif
+ #if R1024_Unroll_R( 9)
+ R1024_8_rounds( 9);
+ #endif
+ #if R1024_Unroll_R(10)
+ R1024_8_rounds(10);
+ #endif
+ #if R1024_Unroll_R(11)
+ R1024_8_rounds(11);
+ #endif
+ #if R1024_Unroll_R(12)
+ R1024_8_rounds(12);
+ #endif
+ #if R1024_Unroll_R(13)
+ R1024_8_rounds(13);
+ #endif
+ #if R1024_Unroll_R(14)
+ R1024_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_1024 > 14)
+#error "need more unrolling in Skein_1024_Process_Block"
+ #endif
+ }
+ /* do the final "feedforward" xor, update context chaining vars */
+
+ ctx->X[ 0] = X00 ^ w[ 0];
+ ctx->X[ 1] = X01 ^ w[ 1];
+ ctx->X[ 2] = X02 ^ w[ 2];
+ ctx->X[ 3] = X03 ^ w[ 3];
+ ctx->X[ 4] = X04 ^ w[ 4];
+ ctx->X[ 5] = X05 ^ w[ 5];
+ ctx->X[ 6] = X06 ^ w[ 6];
+ ctx->X[ 7] = X07 ^ w[ 7];
+ ctx->X[ 8] = X08 ^ w[ 8];
+ ctx->X[ 9] = X09 ^ w[ 9];
+ ctx->X[10] = X10 ^ w[10];
+ ctx->X[11] = X11 ^ w[11];
+ ctx->X[12] = X12 ^ w[12];
+ ctx->X[13] = X13 ^ w[13];
+ ctx->X[14] = X14 ^ w[14];
+ ctx->X[15] = X15 ^ w[15];
+
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ blkPtr += SKEIN1024_BLOCK_BYTES;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t Skein1024_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein1024_Process_Block_CodeSize) -
+ ((u08b_t *) Skein1024_Process_Block);
+ }
+uint_t Skein1024_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_1024;
+ }
+#endif
+#endif
diff --git a/freebsd/sys/crypto/skein/skein_debug.h b/freebsd/sys/crypto/skein/skein_debug.h
new file mode 100644
index 00000000..7775c016
--- /dev/null
+++ b/freebsd/sys/crypto/skein/skein_debug.h
@@ -0,0 +1,48 @@
+#ifndef _SKEIN_DEBUG_H_
+#define _SKEIN_DEBUG_H_
+/***********************************************************************
+**
+** Interface definitions for Skein hashing debug output.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+************************************************************************/
+
+#ifdef SKEIN_DEBUG
+/* callout functions used inside Skein code */
+void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X,const u08b_t *blkPtr,
+ const u64b_t *wPtr,const u64b_t *ksPtr,const u64b_t *tsPtr);
+void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X);
+void Skein_Show_R_Ptr(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X_ptr[]);
+void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr);
+void Skein_Show_Key (uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes);
+
+extern uint_t skein_DebugFlag; /* flags to control debug output (0 --> none) */
+
+#define SKEIN_RND_SPECIAL (1000u)
+#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u)
+#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u)
+#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u)
+
+/* flag bits: skein_DebugFlag */
+#define SKEIN_DEBUG_KEY (1u << 1) /* show MAC key */
+#define SKEIN_DEBUG_CONFIG (1u << 2) /* show config block processing */
+#define SKEIN_DEBUG_STATE (1u << 3) /* show input state during Show_Block() */
+#define SKEIN_DEBUG_TWEAK (1u << 4) /* show input state during Show_Block() */
+#define SKEIN_DEBUG_KEYSCHED (1u << 5) /* show expanded key schedule */
+#define SKEIN_DEBUG_INPUT_64 (1u << 6) /* show input block as 64-bit words */
+#define SKEIN_DEBUG_INPUT_08 (1u << 7) /* show input block as 8-bit bytes */
+#define SKEIN_DEBUG_INJECT (1u << 8) /* show state after key injection & feedforward points */
+#define SKEIN_DEBUG_ROUNDS (1u << 9) /* show state after all rounds */
+#define SKEIN_DEBUG_FINAL (1u <<10) /* show final output of Skein */
+#define SKEIN_DEBUG_HDR (1u <<11) /* show block header */
+#define SKEIN_DEBUG_THREEFISH (1u <<12) /* use Threefish name instead of Skein */
+#define SKEIN_DEBUG_PERMUTE (1u <<13) /* use word permutations */
+#define SKEIN_DEBUG_ALL ((~0u) & ~(SKEIN_DEBUG_THREEFISH | SKEIN_DEBUG_PERMUTE))
+#define THREEFISH_DEBUG_ALL (SKEIN_DEBUG_ALL | SKEIN_DEBUG_THREEFISH)
+
+#endif /* SKEIN_DEBUG */
+
+#endif /* _SKEIN_DEBUG_H_ */
diff --git a/freebsd/sys/crypto/skein/skein_freebsd.h b/freebsd/sys/crypto/skein/skein_freebsd.h
new file mode 100644
index 00000000..935fa090
--- /dev/null
+++ b/freebsd/sys/crypto/skein/skein_freebsd.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright 2016 Allan Jude
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SKEIN_FREEBSD_H_
+#define _SKEIN_FREEBSD_H_
+
+#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define SKEIN256_BLOCK_LENGTH SKEIN_256_BLOCK_BYTES
+#define SKEIN256_DIGEST_LENGTH 32
+#define SKEIN256_DIGEST_STRING_LENGTH (SKEIN256_DIGEST_LENGTH * 2 + 1)
+#define SKEIN512_BLOCK_LENGTH SKEIN_512_BLOCK_BYTES
+#define SKEIN512_DIGEST_LENGTH 64
+#define SKEIN512_DIGEST_STRING_LENGTH (SKEIN512_DIGEST_LENGTH * 2 + 1)
+#define SKEIN1024_BLOCK_LENGTH SKEIN1024_BLOCK_BYTES
+#define SKEIN1024_DIGEST_LENGTH 128
+#define SKEIN1024_DIGEST_STRING_LENGTH (SKEIN1024_DIGEST_LENGTH * 2 + 1)
+
+/* Make the context types look like the other hashes on FreeBSD */
+typedef Skein_256_Ctxt_t SKEIN256_CTX;
+typedef Skein_512_Ctxt_t SKEIN512_CTX;
+typedef Skein1024_Ctxt_t SKEIN1024_CTX;
+
+/* Make the prototypes look like the other hashes */
+void SKEIN256_Init (SKEIN256_CTX *ctx);
+void SKEIN512_Init (SKEIN512_CTX *ctx);
+void SKEIN1024_Init (SKEIN1024_CTX *ctx);
+
+void SKEIN256_Update(SKEIN256_CTX *ctx, const void *in, size_t len);
+void SKEIN512_Update(SKEIN512_CTX *ctx, const void *in, size_t len);
+void SKEIN1024_Update(SKEIN1024_CTX *ctx, const void *in, size_t len);
+
+void SKEIN256_Final(unsigned char digest[static SKEIN256_DIGEST_LENGTH], SKEIN256_CTX *ctx);
+void SKEIN512_Final(unsigned char digest[static SKEIN512_DIGEST_LENGTH], SKEIN512_CTX *ctx);
+void SKEIN1024_Final(unsigned char digest[static SKEIN1024_DIGEST_LENGTH], SKEIN1024_CTX *ctx);
+
+#ifndef _KERNEL
+char *SKEIN256_End(SKEIN256_CTX *, char *);
+char *SKEIN512_End(SKEIN512_CTX *, char *);
+char *SKEIN1024_End(SKEIN1024_CTX *, char *);
+char *SKEIN256_Data(const void *, unsigned int, char *);
+char *SKEIN512_Data(const void *, unsigned int, char *);
+char *SKEIN1024_Data(const void *, unsigned int, char *);
+char *SKEIN256_File(const char *, char *);
+char *SKEIN512_File(const char *, char *);
+char *SKEIN1024_File(const char *, char *);
+char *SKEIN256_FileChunk(const char *, char *, off_t, off_t);
+char *SKEIN512_FileChunk(const char *, char *, off_t, off_t);
+char *SKEIN1024_FileChunk(const char *, char *, off_t, off_t);
+#endif
+
+#endif /* ifndef _SKEIN_FREEBSD_H_ */
diff --git a/freebsd/sys/crypto/skein/skein_iv.h b/freebsd/sys/crypto/skein/skein_iv.h
new file mode 100644
index 00000000..6e661666
--- /dev/null
+++ b/freebsd/sys/crypto/skein/skein_iv.h
@@ -0,0 +1,200 @@
+/* $FreeBSD$ */
+#ifndef _SKEIN_IV_H_
+#define _SKEIN_IV_H_
+
+#include "skein.h" /* get Skein macros and types */
+
+/*
+***************** Pre-computed Skein IVs *******************
+**
+** NOTE: these values are not "magic" constants, but
+** are generated using the Threefish block function.
+** They are pre-computed here only for speed; i.e., to
+** avoid the need for a Threefish call during Init().
+**
+** The IV for any fixed hash length may be pre-computed.
+** Only the most common values are included here.
+**
+************************************************************
+**/
+
+#define MK_64 SKEIN_MK_64
+
+/* blkSize = 256 bits. hashSize = 128 bits */
+const u64b_t SKEIN_256_IV_128[] =
+ {
+ MK_64(0xE1111906,0x964D7260),
+ MK_64(0x883DAAA7,0x7C8D811C),
+ MK_64(0x10080DF4,0x91960F7A),
+ MK_64(0xCCF7DDE5,0xB45BC1C2)
+ };
+
+/* blkSize = 256 bits. hashSize = 160 bits */
+const u64b_t SKEIN_256_IV_160[] =
+ {
+ MK_64(0x14202314,0x72825E98),
+ MK_64(0x2AC4E9A2,0x5A77E590),
+ MK_64(0xD47A5856,0x8838D63E),
+ MK_64(0x2DD2E496,0x8586AB7D)
+ };
+
+/* blkSize = 256 bits. hashSize = 224 bits */
+const u64b_t SKEIN_256_IV_224[] =
+ {
+ MK_64(0xC6098A8C,0x9AE5EA0B),
+ MK_64(0x876D5686,0x08C5191C),
+ MK_64(0x99CB88D7,0xD7F53884),
+ MK_64(0x384BDDB1,0xAEDDB5DE)
+ };
+
+/* blkSize = 256 bits. hashSize = 256 bits */
+const u64b_t SKEIN_256_IV_256[] =
+ {
+ MK_64(0xFC9DA860,0xD048B449),
+ MK_64(0x2FCA6647,0x9FA7D833),
+ MK_64(0xB33BC389,0x6656840F),
+ MK_64(0x6A54E920,0xFDE8DA69)
+ };
+
+/* blkSize = 512 bits. hashSize = 128 bits */
+const u64b_t SKEIN_512_IV_128[] =
+ {
+ MK_64(0xA8BC7BF3,0x6FBF9F52),
+ MK_64(0x1E9872CE,0xBD1AF0AA),
+ MK_64(0x309B1790,0xB32190D3),
+ MK_64(0xBCFBB854,0x3F94805C),
+ MK_64(0x0DA61BCD,0x6E31B11B),
+ MK_64(0x1A18EBEA,0xD46A32E3),
+ MK_64(0xA2CC5B18,0xCE84AA82),
+ MK_64(0x6982AB28,0x9D46982D)
+ };
+
+/* blkSize = 512 bits. hashSize = 160 bits */
+const u64b_t SKEIN_512_IV_160[] =
+ {
+ MK_64(0x28B81A2A,0xE013BD91),
+ MK_64(0xC2F11668,0xB5BDF78F),
+ MK_64(0x1760D8F3,0xF6A56F12),
+ MK_64(0x4FB74758,0x8239904F),
+ MK_64(0x21EDE07F,0x7EAF5056),
+ MK_64(0xD908922E,0x63ED70B8),
+ MK_64(0xB8EC76FF,0xECCB52FA),
+ MK_64(0x01A47BB8,0xA3F27A6E)
+ };
+
+/* blkSize = 512 bits. hashSize = 224 bits */
+const u64b_t SKEIN_512_IV_224[] =
+ {
+ MK_64(0xCCD06162,0x48677224),
+ MK_64(0xCBA65CF3,0xA92339EF),
+ MK_64(0x8CCD69D6,0x52FF4B64),
+ MK_64(0x398AED7B,0x3AB890B4),
+ MK_64(0x0F59D1B1,0x457D2BD0),
+ MK_64(0x6776FE65,0x75D4EB3D),
+ MK_64(0x99FBC70E,0x997413E9),
+ MK_64(0x9E2CFCCF,0xE1C41EF7)
+ };
+
+/* blkSize = 512 bits. hashSize = 256 bits */
+const u64b_t SKEIN_512_IV_256[] =
+ {
+ MK_64(0xCCD044A1,0x2FDB3E13),
+ MK_64(0xE8359030,0x1A79A9EB),
+ MK_64(0x55AEA061,0x4F816E6F),
+ MK_64(0x2A2767A4,0xAE9B94DB),
+ MK_64(0xEC06025E,0x74DD7683),
+ MK_64(0xE7A436CD,0xC4746251),
+ MK_64(0xC36FBAF9,0x393AD185),
+ MK_64(0x3EEDBA18,0x33EDFC13)
+ };
+
+/* blkSize = 512 bits. hashSize = 384 bits */
+const u64b_t SKEIN_512_IV_384[] =
+ {
+ MK_64(0xA3F6C6BF,0x3A75EF5F),
+ MK_64(0xB0FEF9CC,0xFD84FAA4),
+ MK_64(0x9D77DD66,0x3D770CFE),
+ MK_64(0xD798CBF3,0xB468FDDA),
+ MK_64(0x1BC4A666,0x8A0E4465),
+ MK_64(0x7ED7D434,0xE5807407),
+ MK_64(0x548FC1AC,0xD4EC44D6),
+ MK_64(0x266E1754,0x6AA18FF8)
+ };
+
+/* blkSize = 512 bits. hashSize = 512 bits */
+const u64b_t SKEIN_512_IV_512[] =
+ {
+ MK_64(0x4903ADFF,0x749C51CE),
+ MK_64(0x0D95DE39,0x9746DF03),
+ MK_64(0x8FD19341,0x27C79BCE),
+ MK_64(0x9A255629,0xFF352CB1),
+ MK_64(0x5DB62599,0xDF6CA7B0),
+ MK_64(0xEABE394C,0xA9D5C3F4),
+ MK_64(0x991112C7,0x1A75B523),
+ MK_64(0xAE18A40B,0x660FCC33)
+ };
+
+/* blkSize = 1024 bits. hashSize = 384 bits */
+const u64b_t SKEIN1024_IV_384[] =
+ {
+ MK_64(0x5102B6B8,0xC1894A35),
+ MK_64(0xFEEBC9E3,0xFE8AF11A),
+ MK_64(0x0C807F06,0xE32BED71),
+ MK_64(0x60C13A52,0xB41A91F6),
+ MK_64(0x9716D35D,0xD4917C38),
+ MK_64(0xE780DF12,0x6FD31D3A),
+ MK_64(0x797846B6,0xC898303A),
+ MK_64(0xB172C2A8,0xB3572A3B),
+ MK_64(0xC9BC8203,0xA6104A6C),
+ MK_64(0x65909338,0xD75624F4),
+ MK_64(0x94BCC568,0x4B3F81A0),
+ MK_64(0x3EBBF51E,0x10ECFD46),
+ MK_64(0x2DF50F0B,0xEEB08542),
+ MK_64(0x3B5A6530,0x0DBC6516),
+ MK_64(0x484B9CD2,0x167BBCE1),
+ MK_64(0x2D136947,0xD4CBAFEA)
+ };
+
+/* blkSize = 1024 bits. hashSize = 512 bits */
+const u64b_t SKEIN1024_IV_512[] =
+ {
+ MK_64(0xCAEC0E5D,0x7C1B1B18),
+ MK_64(0xA01B0E04,0x5F03E802),
+ MK_64(0x33840451,0xED912885),
+ MK_64(0x374AFB04,0xEAEC2E1C),
+ MK_64(0xDF25A0E2,0x813581F7),
+ MK_64(0xE4004093,0x8B12F9D2),
+ MK_64(0xA662D539,0xC2ED39B6),
+ MK_64(0xFA8B85CF,0x45D8C75A),
+ MK_64(0x8316ED8E,0x29EDE796),
+ MK_64(0x053289C0,0x2E9F91B8),
+ MK_64(0xC3F8EF1D,0x6D518B73),
+ MK_64(0xBDCEC3C4,0xD5EF332E),
+ MK_64(0x549A7E52,0x22974487),
+ MK_64(0x67070872,0x5B749816),
+ MK_64(0xB9CD28FB,0xF0581BD1),
+ MK_64(0x0E2940B8,0x15804974)
+ };
+
+/* blkSize = 1024 bits. hashSize = 1024 bits */
+const u64b_t SKEIN1024_IV_1024[] =
+ {
+ MK_64(0xD593DA07,0x41E72355),
+ MK_64(0x15B5E511,0xAC73E00C),
+ MK_64(0x5180E5AE,0xBAF2C4F0),
+ MK_64(0x03BD41D3,0xFCBCAFAF),
+ MK_64(0x1CAEC6FD,0x1983A898),
+ MK_64(0x6E510B8B,0xCDD0589F),
+ MK_64(0x77E2BDFD,0xC6394ADA),
+ MK_64(0xC11E1DB5,0x24DCB0A3),
+ MK_64(0xD6D14AF9,0xC6329AB5),
+ MK_64(0x6A9B0BFC,0x6EB67E0D),
+ MK_64(0x9243C60D,0xCCFF1332),
+ MK_64(0x1A1F1DDE,0x743F02D4),
+ MK_64(0x0996753C,0x10ED0BB8),
+ MK_64(0x6572DD22,0xF2B4969A),
+ MK_64(0x61FD3062,0xD00A579A),
+ MK_64(0x1DE0536E,0x8682E539)
+ };
+
+#endif /* _SKEIN_IV_H_ */
diff --git a/freebsd/sys/crypto/skein/skein_port.h b/freebsd/sys/crypto/skein/skein_port.h
new file mode 100644
index 00000000..7025a516
--- /dev/null
+++ b/freebsd/sys/crypto/skein/skein_port.h
@@ -0,0 +1,158 @@
+/* $FreeBSD$ */
+#ifndef _SKEIN_PORT_H_
+#define _SKEIN_PORT_H_
+/*******************************************************************
+**
+** Platform-specific definitions for Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+** Many thanks to Brian Gladman for his portable header files.
+**
+** To port Skein to an "unsupported" platform, change the definitions
+** in this file appropriately.
+**
+********************************************************************/
+
+#include <sys/endian.h>
+#include <sys/types.h>
+
+#ifndef _OPENSOLARIS_SYS_TYPES_H_ /* Avoid redefining this typedef */
+typedef unsigned int uint_t; /* native unsigned integer */
+#endif
+typedef u_int8_t u08b_t; /* 8-bit unsigned integer */
+typedef u_int32_t uint_32t; /* 32-bit unsigned integer */
+typedef u_int64_t u64b_t; /* 64-bit unsigned integer */
+
+#ifndef RotL_64
+#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N))))
+#endif
+
+__BEGIN_DECLS
+
+/*
+ * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
+ * performance on x86 CPUs. The Skein code requires the following
+ * definitions for dealing with endianness:
+ *
+ * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
+ * Skein_Put64_LSB_First
+ * Skein_Get64_LSB_First
+ * Skein_Swap64
+ *
+ * If SKEIN_NEED_SWAP is defined at compile time, it is used here
+ * along with the portable versions of Put64/Get64/Swap64, which
+ * are slow in general.
+ *
+ * Otherwise, an "auto-detect" of endianness is attempted below.
+ * If the default handling doesn't work well, the user may insert
+ * platform-specific code instead (e.g., for big-endian CPUs).
+ *
+ */
+#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
+
+#if BYTE_ORDER == BIG_ENDIAN
+ /* here for big-endian CPUs */
+#define SKEIN_NEED_SWAP (1)
+#ifdef SKEIN_PORT_CODE
+void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt);
+void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt);
+#endif /* ifdef SKEIN_PORT_CODE */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
+#define SKEIN_NEED_SWAP (0)
+#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
+#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
+#else
+#error "Skein needs endianness setting!"
+#endif
+
+#endif /* ifndef SKEIN_NEED_SWAP */
+
+/*
+ ******************************************************************
+ * Provide any definitions still needed.
+ ******************************************************************
+ */
+#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
+#if SKEIN_NEED_SWAP
+#define Skein_Swap64(w64) bswap64(w64)
+#else
+#define Skein_Swap64(w64) (w64)
+#endif
+#endif /* ifndef Skein_Swap64 */
+
+
+#ifndef Skein_Put64_LSB_First
+void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+{
+ size_t n;
+
+ for (n = 0; n < bCnt / 8; n++)
+ le64enc(dst + n * 8, src[n]);
+}
+#else
+; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Put64_LSB_First */
+
+
+#ifndef Skein_Get64_LSB_First
+void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+{
+ size_t n;
+
+ for (n = 0; n < wCnt; n++)
+ dst[n] = le64dec(src + n * 8);
+}
+#else
+; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Get64_LSB_First */
+
+/* Start FreeBSD libmd shims */
+
+/* Ensure libmd symbols do not clash with libcrypto */
+#ifndef SKEIN256_Init
+#define SKEIN256_Init _libmd_SKEIN256_Init
+#define SKEIN512_Init _libmd_SKEIN512_Init
+#define SKEIN1024_Init _libmd_SKEIN1024_Init
+#endif
+#ifndef SKEIN256_Update
+#define SKEIN256_Update _libmd_SKEIN256_Update
+#define SKEIN512_Update _libmd_SKEIN512_Update
+#define SKEIN1024_Update _libmd_SKEIN1024_Update
+#endif
+#ifndef SKEIN256_Final
+#define SKEIN256_Final _libmd_SKEIN256_Final
+#define SKEIN512_Final _libmd_SKEIN512_Final
+#define SKEIN1024_Final _libmd_SKEIN1024_Final
+#endif
+#ifndef SKEIN256_End
+#define SKEIN256_End _libmd_SKEIN256_End
+#define SKEIN512_End _libmd_SKEIN512_End
+#define SKEIN1024_End _libmd_SKEIN1024_End
+#endif
+#ifndef SKEIN256_File
+#define SKEIN256_File _libmd_SKEIN256_File
+#define SKEIN512_File _libmd_SKEIN512_File
+#define SKEIN1024_File _libmd_SKEIN1024_File
+#endif
+#ifndef SKEIN256_FileChunk
+#define SKEIN256_FileChunk _libmd_SKEIN256_FileChunk
+#define SKEIN512_FileChunk _libmd_SKEIN512_FileChunk
+#define SKEIN1024_FileChunk _libmd_SKEIN1024_FileChunk
+#endif
+#ifndef SKEIN256_Data
+#define SKEIN256_Data _libmd_SKEIN256_Data
+#define SKEIN512_Data _libmd_SKEIN512_Data
+#define SKEIN1024_Data _libmd_SKEIN1024_Data
+#endif
+
+__END_DECLS
+
+#endif /* ifndef _SKEIN_PORT_H_ */
diff --git a/freebsd/sys/dev/bce/if_bce.c b/freebsd/sys/dev/bce/if_bce.c
index e186590f..26dacd7b 100644
--- a/freebsd/sys/dev/bce/if_bce.c
+++ b/freebsd/sys/dev/bce/if_bce.c
@@ -51,6 +51,54 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bce.h>
+#include <rtems/bsd/sys/param.h>
+#include <sys/endian.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <dev/mii/mii.h>
+#include <dev/mii/miivar.h>
+#include <rtems/bsd/local/miidevs.h>
+#include <dev/mii/brgphyreg.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <rtems/bsd/local/miibus_if.h>
+
#include <dev/bce/if_bcereg.h>
#include <dev/bce/if_bcefw.h>
@@ -411,9 +459,10 @@ static struct mbuf *bce_tso_setup (struct bce_softc *,
struct mbuf **, u16 *);
static int bce_tx_encap (struct bce_softc *, struct mbuf **);
static void bce_start_locked (struct ifnet *);
-static void bce_start (struct ifnet *);
-static int bce_ioctl (struct ifnet *, u_long, caddr_t);
-static void bce_watchdog (struct bce_softc *);
+static void bce_start (struct ifnet *);
+static int bce_ioctl (struct ifnet *, u_long, caddr_t);
+static uint64_t bce_get_counter (struct ifnet *, ift_counter);
+static void bce_watchdog (struct bce_softc *);
static int bce_ifmedia_upd (struct ifnet *);
static int bce_ifmedia_upd_locked (struct ifnet *);
static void bce_ifmedia_sts (struct ifnet *, struct ifmediareq *);
@@ -489,45 +538,38 @@ static SYSCTL_NODE(_hw, OID_AUTO, bce, CTLFLAG_RD, 0, "bce driver parameters");
/* Allowable values are TRUE or FALSE */
static int bce_verbose = TRUE;
-TUNABLE_INT("hw.bce.verbose", &bce_verbose);
SYSCTL_INT(_hw_bce, OID_AUTO, verbose, CTLFLAG_RDTUN, &bce_verbose, 0,
"Verbose output enable/disable");
/* Allowable values are TRUE or FALSE */
static int bce_tso_enable = TRUE;
-TUNABLE_INT("hw.bce.tso_enable", &bce_tso_enable);
SYSCTL_INT(_hw_bce, OID_AUTO, tso_enable, CTLFLAG_RDTUN, &bce_tso_enable, 0,
"TSO Enable/Disable");
/* Allowable values are 0 (IRQ), 1 (MSI/IRQ), and 2 (MSI-X/MSI/IRQ) */
/* ToDo: Add MSI-X support. */
static int bce_msi_enable = 1;
-TUNABLE_INT("hw.bce.msi_enable", &bce_msi_enable);
SYSCTL_INT(_hw_bce, OID_AUTO, msi_enable, CTLFLAG_RDTUN, &bce_msi_enable, 0,
"MSI-X|MSI|INTx selector");
/* Allowable values are 1, 2, 4, 8. */
static int bce_rx_pages = DEFAULT_RX_PAGES;
-TUNABLE_INT("hw.bce.rx_pages", &bce_rx_pages);
SYSCTL_UINT(_hw_bce, OID_AUTO, rx_pages, CTLFLAG_RDTUN, &bce_rx_pages, 0,
"Receive buffer descriptor pages (1 page = 255 buffer descriptors)");
/* Allowable values are 1, 2, 4, 8. */
static int bce_tx_pages = DEFAULT_TX_PAGES;
-TUNABLE_INT("hw.bce.tx_pages", &bce_tx_pages);
SYSCTL_UINT(_hw_bce, OID_AUTO, tx_pages, CTLFLAG_RDTUN, &bce_tx_pages, 0,
"Transmit buffer descriptor pages (1 page = 255 buffer descriptors)");
/* Allowable values are TRUE or FALSE. */
static int bce_hdr_split = TRUE;
-TUNABLE_INT("hw.bce.hdr_split", &bce_hdr_split);
SYSCTL_UINT(_hw_bce, OID_AUTO, hdr_split, CTLFLAG_RDTUN, &bce_hdr_split, 0,
"Frame header/payload splitting Enable/Disable");
/* Allowable values are TRUE or FALSE. */
static int bce_strict_rx_mtu = FALSE;
-TUNABLE_INT("hw.bce.strict_rx_mtu", &bce_strict_rx_mtu);
-SYSCTL_UINT(_hw_bce, OID_AUTO, loose_rx_mtu, CTLFLAG_RDTUN,
+SYSCTL_UINT(_hw_bce, OID_AUTO, strict_rx_mtu, CTLFLAG_RDTUN,
&bce_strict_rx_mtu, 0,
"Enable/Disable strict RX frame size checking");
@@ -539,7 +581,6 @@ static int bce_tx_quick_cons_trip_int = 1;
/* Generate 1 interrupt for every 20 transmit completions. */
static int bce_tx_quick_cons_trip_int = DEFAULT_TX_QUICK_CONS_TRIP_INT;
#endif
-TUNABLE_INT("hw.bce.tx_quick_cons_trip_int", &bce_tx_quick_cons_trip_int);
SYSCTL_UINT(_hw_bce, OID_AUTO, tx_quick_cons_trip_int, CTLFLAG_RDTUN,
&bce_tx_quick_cons_trip_int, 0,
"Transmit BD trip point during interrupts");
@@ -552,7 +593,6 @@ static int bce_tx_quick_cons_trip = 1;
/* Generate 1 interrupt for every 20 transmit completions. */
static int bce_tx_quick_cons_trip = DEFAULT_TX_QUICK_CONS_TRIP;
#endif
-TUNABLE_INT("hw.bce.tx_quick_cons_trip", &bce_tx_quick_cons_trip);
SYSCTL_UINT(_hw_bce, OID_AUTO, tx_quick_cons_trip, CTLFLAG_RDTUN,
&bce_tx_quick_cons_trip, 0,
"Transmit BD trip point");
@@ -565,7 +605,6 @@ static int bce_tx_ticks_int = 0;
/* Generate an interrupt if 80us have elapsed since the last TX completion. */
static int bce_tx_ticks_int = DEFAULT_TX_TICKS_INT;
#endif
-TUNABLE_INT("hw.bce.tx_ticks_int", &bce_tx_ticks_int);
SYSCTL_UINT(_hw_bce, OID_AUTO, tx_ticks_int, CTLFLAG_RDTUN,
&bce_tx_ticks_int, 0, "Transmit ticks count during interrupt");
@@ -577,7 +616,6 @@ static int bce_tx_ticks = 0;
/* Generate an interrupt if 80us have elapsed since the last TX completion. */
static int bce_tx_ticks = DEFAULT_TX_TICKS;
#endif
-TUNABLE_INT("hw.bce.tx_ticks", &bce_tx_ticks);
SYSCTL_UINT(_hw_bce, OID_AUTO, tx_ticks, CTLFLAG_RDTUN,
&bce_tx_ticks, 0, "Transmit ticks count");
@@ -589,7 +627,6 @@ static int bce_rx_quick_cons_trip_int = 1;
/* Generate 1 interrupt for every 6 received frames. */
static int bce_rx_quick_cons_trip_int = DEFAULT_RX_QUICK_CONS_TRIP_INT;
#endif
-TUNABLE_INT("hw.bce.rx_quick_cons_trip_int", &bce_rx_quick_cons_trip_int);
SYSCTL_UINT(_hw_bce, OID_AUTO, rx_quick_cons_trip_int, CTLFLAG_RDTUN,
&bce_rx_quick_cons_trip_int, 0,
"Receive BD trip point duirng interrupts");
@@ -602,7 +639,6 @@ static int bce_rx_quick_cons_trip = 1;
/* Generate 1 interrupt for every 6 received frames. */
static int bce_rx_quick_cons_trip = DEFAULT_RX_QUICK_CONS_TRIP;
#endif
-TUNABLE_INT("hw.bce.rx_quick_cons_trip", &bce_rx_quick_cons_trip);
SYSCTL_UINT(_hw_bce, OID_AUTO, rx_quick_cons_trip, CTLFLAG_RDTUN,
&bce_rx_quick_cons_trip, 0,
"Receive BD trip point");
@@ -615,7 +651,6 @@ static int bce_rx_ticks_int = 0;
/* Generate an int. if 18us have elapsed since the last received frame. */
static int bce_rx_ticks_int = DEFAULT_RX_TICKS_INT;
#endif
-TUNABLE_INT("hw.bce.rx_ticks_int", &bce_rx_ticks_int);
SYSCTL_UINT(_hw_bce, OID_AUTO, rx_ticks_int, CTLFLAG_RDTUN,
&bce_rx_ticks_int, 0, "Receive ticks count during interrupt");
@@ -627,7 +662,6 @@ static int bce_rx_ticks = 0;
/* Generate an int. if 18us have elapsed since the last received frame. */
static int bce_rx_ticks = DEFAULT_RX_TICKS;
#endif
-TUNABLE_INT("hw.bce.rx_ticks", &bce_rx_ticks);
SYSCTL_UINT(_hw_bce, OID_AUTO, rx_ticks, CTLFLAG_RDTUN,
&bce_rx_ticks, 0, "Receive ticks count");
@@ -1358,6 +1392,7 @@ bce_attach(device_t dev)
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = bce_ioctl;
ifp->if_start = bce_start;
+ ifp->if_get_counter = bce_get_counter;
ifp->if_init = bce_init;
ifp->if_mtu = ETHERMTU;
@@ -2328,7 +2363,7 @@ bce_nvram_erase_page(struct bce_softc *sc, u32 offset)
BCE_NVM_COMMAND_DOIT;
/*
- * Clear the DONE bit separately, set the NVRAM adress to erase,
+ * Clear the DONE bit separately, set the NVRAM address to erase,
* and issue the erase command.
*/
REG_WR(sc, BCE_NVM_COMMAND, BCE_NVM_COMMAND_DONE);
@@ -3014,7 +3049,7 @@ bce_get_rx_buffer_sizes(struct bce_softc *sc, int mtu)
sc->rx_bd_mbuf_alloc_size = MHLEN;
/* Make sure offset is 16 byte aligned for hardware. */
sc->rx_bd_mbuf_align_pad =
- roundup2((MSIZE - MHLEN), 16) - (MSIZE - MHLEN);
+ roundup2(MSIZE - MHLEN, 16) - (MSIZE - MHLEN);
sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size -
sc->rx_bd_mbuf_align_pad;
} else {
@@ -3205,6 +3240,13 @@ bce_dma_free(struct bce_softc *sc)
DBENTER(BCE_VERBOSE_RESET | BCE_VERBOSE_UNLOAD | BCE_VERBOSE_CTX);
/* Free, unmap, and destroy the status block. */
+ if (sc->status_block_paddr != 0) {
+ bus_dmamap_unload(
+ sc->status_tag,
+ sc->status_map);
+ sc->status_block_paddr = 0;
+ }
+
if (sc->status_block != NULL) {
bus_dmamem_free(
sc->status_tag,
@@ -3213,15 +3255,6 @@ bce_dma_free(struct bce_softc *sc)
sc->status_block = NULL;
}
- if (sc->status_map != NULL) {
- bus_dmamap_unload(
- sc->status_tag,
- sc->status_map);
- bus_dmamap_destroy(sc->status_tag,
- sc->status_map);
- sc->status_map = NULL;
- }
-
if (sc->status_tag != NULL) {
bus_dma_tag_destroy(sc->status_tag);
sc->status_tag = NULL;
@@ -3229,21 +3262,19 @@ bce_dma_free(struct bce_softc *sc)
/* Free, unmap, and destroy the statistics block. */
- if (sc->stats_block != NULL) {
- bus_dmamem_free(
+ if (sc->stats_block_paddr != 0) {
+ bus_dmamap_unload(
sc->stats_tag,
- sc->stats_block,
sc->stats_map);
- sc->stats_block = NULL;
+ sc->stats_block_paddr = 0;
}
- if (sc->stats_map != NULL) {
- bus_dmamap_unload(
+ if (sc->stats_block != NULL) {
+ bus_dmamem_free(
sc->stats_tag,
+ sc->stats_block,
sc->stats_map);
- bus_dmamap_destroy(sc->stats_tag,
- sc->stats_map);
- sc->stats_map = NULL;
+ sc->stats_block = NULL;
}
if (sc->stats_tag != NULL) {
@@ -3255,22 +3286,19 @@ bce_dma_free(struct bce_softc *sc)
/* Free, unmap and destroy all context memory pages. */
if (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) {
for (i = 0; i < sc->ctx_pages; i++ ) {
- if (sc->ctx_block[i] != NULL) {
- bus_dmamem_free(
+ if (sc->ctx_paddr[i] != 0) {
+ bus_dmamap_unload(
sc->ctx_tag,
- sc->ctx_block[i],
sc->ctx_map[i]);
- sc->ctx_block[i] = NULL;
+ sc->ctx_paddr[i] = 0;
}
- if (sc->ctx_map[i] != NULL) {
- bus_dmamap_unload(
- sc->ctx_tag,
- sc->ctx_map[i]);
- bus_dmamap_destroy(
+ if (sc->ctx_block[i] != NULL) {
+ bus_dmamem_free(
sc->ctx_tag,
+ sc->ctx_block[i],
sc->ctx_map[i]);
- sc->ctx_map[i] = NULL;
+ sc->ctx_block[i] = NULL;
}
}
@@ -3284,22 +3312,19 @@ bce_dma_free(struct bce_softc *sc)
/* Free, unmap and destroy all TX buffer descriptor chain pages. */
for (i = 0; i < sc->tx_pages; i++ ) {
- if (sc->tx_bd_chain[i] != NULL) {
- bus_dmamem_free(
+ if (sc->tx_bd_chain_paddr[i] != 0) {
+ bus_dmamap_unload(
sc->tx_bd_chain_tag,
- sc->tx_bd_chain[i],
sc->tx_bd_chain_map[i]);
- sc->tx_bd_chain[i] = NULL;
+ sc->tx_bd_chain_paddr[i] = 0;
}
- if (sc->tx_bd_chain_map[i] != NULL) {
- bus_dmamap_unload(
- sc->tx_bd_chain_tag,
- sc->tx_bd_chain_map[i]);
- bus_dmamap_destroy(
+ if (sc->tx_bd_chain[i] != NULL) {
+ bus_dmamem_free(
sc->tx_bd_chain_tag,
+ sc->tx_bd_chain[i],
sc->tx_bd_chain_map[i]);
- sc->tx_bd_chain_map[i] = NULL;
+ sc->tx_bd_chain[i] = NULL;
}
}
@@ -3312,22 +3337,19 @@ bce_dma_free(struct bce_softc *sc)
/* Free, unmap and destroy all RX buffer descriptor chain pages. */
for (i = 0; i < sc->rx_pages; i++ ) {
- if (sc->rx_bd_chain[i] != NULL) {
- bus_dmamem_free(
+ if (sc->rx_bd_chain_paddr[i] != 0) {
+ bus_dmamap_unload(
sc->rx_bd_chain_tag,
- sc->rx_bd_chain[i],
sc->rx_bd_chain_map[i]);
- sc->rx_bd_chain[i] = NULL;
+ sc->rx_bd_chain_paddr[i] = 0;
}
- if (sc->rx_bd_chain_map[i] != NULL) {
- bus_dmamap_unload(
- sc->rx_bd_chain_tag,
- sc->rx_bd_chain_map[i]);
- bus_dmamap_destroy(
+ if (sc->rx_bd_chain[i] != NULL) {
+ bus_dmamem_free(
sc->rx_bd_chain_tag,
+ sc->rx_bd_chain[i],
sc->rx_bd_chain_map[i]);
- sc->rx_bd_chain_map[i] = NULL;
+ sc->rx_bd_chain[i] = NULL;
}
}
@@ -3341,22 +3363,19 @@ bce_dma_free(struct bce_softc *sc)
/* Free, unmap and destroy all page buffer descriptor chain pages. */
if (bce_hdr_split == TRUE) {
for (i = 0; i < sc->pg_pages; i++ ) {
- if (sc->pg_bd_chain[i] != NULL) {
- bus_dmamem_free(
+ if (sc->pg_bd_chain_paddr[i] != 0) {
+ bus_dmamap_unload(
sc->pg_bd_chain_tag,
- sc->pg_bd_chain[i],
sc->pg_bd_chain_map[i]);
- sc->pg_bd_chain[i] = NULL;
+ sc->pg_bd_chain_paddr[i] = 0;
}
- if (sc->pg_bd_chain_map[i] != NULL) {
- bus_dmamap_unload(
- sc->pg_bd_chain_tag,
- sc->pg_bd_chain_map[i]);
- bus_dmamap_destroy(
+ if (sc->pg_bd_chain[i] != NULL) {
+ bus_dmamem_free(
sc->pg_bd_chain_tag,
+ sc->pg_bd_chain[i],
sc->pg_bd_chain_map[i]);
- sc->pg_bd_chain_map[i] = NULL;
+ sc->pg_bd_chain[i] = NULL;
}
}
@@ -6732,9 +6751,7 @@ bce_rx_intr(struct bce_softc *sc)
L2_FHDR_ERRORS_TOO_SHORT | L2_FHDR_ERRORS_GIANT_FRAME)) {
/* Log the error and release the mbuf. */
- ifp->if_ierrors++;
sc->l2fhdr_error_count++;
-
m_freem(m0);
m0 = NULL;
goto bce_rx_intr_next_rx;
@@ -6815,7 +6832,7 @@ bce_rx_intr(struct bce_softc *sc)
}
/* Increment received packet statistics. */
- ifp->if_ipackets++;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
bce_rx_intr_next_rx:
sw_rx_cons = NEXT_RX_BD(sw_rx_cons);
@@ -6973,7 +6990,7 @@ bce_tx_intr(struct bce_softc *sc)
sc->tx_mbuf_ptr[sw_tx_chain_cons] = NULL;
DBRUN(sc->debug_tx_mbuf_alloc--);
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
}
sc->used_tx_bd--;
@@ -7886,7 +7903,7 @@ bce_watchdog(struct bce_softc *sc)
sc->bce_ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
bce_init_locked(sc);
- sc->bce_ifp->if_oerrors++;
+ sc->watchdog_timeouts++;
bce_watchdog_exit:
REG_WR(sc, BCE_EMAC_RX_STATUS, status);
@@ -7942,7 +7959,7 @@ bce_intr(void *xsc)
goto bce_intr_exit;
}
- /* Ack the interrupt and stop others from occuring. */
+ /* Ack the interrupt and stop others from occurring. */
REG_WR(sc, BCE_PCICFG_INT_ACK_CMD,
BCE_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM |
BCE_PCICFG_INT_ACK_CMD_MASK_INT);
@@ -8142,28 +8159,15 @@ bce_set_rx_mode(struct bce_softc *sc)
static void
bce_stats_update(struct bce_softc *sc)
{
- struct ifnet *ifp;
struct statistics_block *stats;
DBENTER(BCE_EXTREME_MISC);
- ifp = sc->bce_ifp;
-
bus_dmamap_sync(sc->stats_tag, sc->stats_map, BUS_DMASYNC_POSTREAD);
stats = (struct statistics_block *) sc->stats_block;
/*
- * Certain controllers don't report
- * carrier sense errors correctly.
- * See errata E11_5708CA0_1165.
- */
- if (!(BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5706) &&
- !(BCE_CHIP_ID(sc) == BCE_CHIP_ID_5708_A0))
- ifp->if_oerrors +=
- (u_long) stats->stat_Dot3StatsCarrierSenseErrors;
-
- /*
* Update the sysctl statistics from the
* hardware statistics.
*/
@@ -8344,35 +8348,51 @@ bce_stats_update(struct bce_softc *sc)
sc->com_no_buffers = REG_RD_IND(sc, 0x120084);
- /*
- * Update the interface statistics from the
- * hardware statistics.
- */
- ifp->if_collisions =
- (u_long) sc->stat_EtherStatsCollisions;
-
- /* ToDo: This method loses soft errors. */
- ifp->if_ierrors =
- (u_long) sc->stat_EtherStatsUndersizePkts +
- (u_long) sc->stat_EtherStatsOversizePkts +
- (u_long) sc->stat_IfInMBUFDiscards +
- (u_long) sc->stat_Dot3StatsAlignmentErrors +
- (u_long) sc->stat_Dot3StatsFCSErrors +
- (u_long) sc->stat_IfInRuleCheckerDiscards +
- (u_long) sc->stat_IfInFTQDiscards +
- (u_long) sc->com_no_buffers;
-
- /* ToDo: This method loses soft errors. */
- ifp->if_oerrors =
- (u_long) sc->stat_emac_tx_stat_dot3statsinternalmactransmiterrors +
- (u_long) sc->stat_Dot3StatsExcessiveCollisions +
- (u_long) sc->stat_Dot3StatsLateCollisions;
-
/* ToDo: Add additional statistics? */
DBEXIT(BCE_EXTREME_MISC);
}
+static uint64_t
+bce_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+ struct bce_softc *sc;
+ uint64_t rv;
+
+ sc = if_getsoftc(ifp);
+
+ switch (cnt) {
+ case IFCOUNTER_COLLISIONS:
+ return (sc->stat_EtherStatsCollisions);
+ case IFCOUNTER_IERRORS:
+ return (sc->stat_EtherStatsUndersizePkts +
+ sc->stat_EtherStatsOversizePkts +
+ sc->stat_IfInMBUFDiscards +
+ sc->stat_Dot3StatsAlignmentErrors +
+ sc->stat_Dot3StatsFCSErrors +
+ sc->stat_IfInRuleCheckerDiscards +
+ sc->stat_IfInFTQDiscards +
+ sc->l2fhdr_error_count +
+ sc->com_no_buffers);
+ case IFCOUNTER_OERRORS:
+ rv = sc->stat_Dot3StatsExcessiveCollisions +
+ sc->stat_emac_tx_stat_dot3statsinternalmactransmiterrors +
+ sc->stat_Dot3StatsLateCollisions +
+ sc->watchdog_timeouts;
+ /*
+ * Certain controllers don't report
+ * carrier sense errors correctly.
+ * See errata E11_5708CA0_1165.
+ */
+ if (!(BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5706) &&
+ !(BCE_CHIP_ID(sc) == BCE_CHIP_ID_5708_A0))
+ rv += sc->stat_Dot3StatsCarrierSenseErrors;
+ return (rv);
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
+}
+
/****************************************************************************/
/* Periodic function to notify the bootcode that the driver is still */
@@ -9821,14 +9841,8 @@ bce_dump_mbuf(struct bce_softc *sc, struct mbuf *m)
if (mp->m_flags & M_PKTHDR) {
BCE_PRINTF("- m_pkthdr: len = %d, flags = 0x%b, "
"csum_flags = %b\n", mp->m_pkthdr.len,
- mp->m_flags, "\20\12M_BCAST\13M_MCAST\14M_FRAG"
- "\15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG"
- "\22M_PROMISC\23M_NOFREE",
- mp->m_pkthdr.csum_flags,
- "\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP"
- "\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED"
- "\12CSUM_IP_VALID\13CSUM_DATA_VALID"
- "\14CSUM_PSEUDO_HDR");
+ mp->m_flags, M_FLAG_PRINTF,
+ mp->m_pkthdr.csum_flags, CSUM_BITS);
}
if (mp->m_flags & M_EXT) {
diff --git a/freebsd/sys/dev/bce/if_bcereg.h b/freebsd/sys/dev/bce/if_bcereg.h
index 1058c14a..2f166ead 100644
--- a/freebsd/sys/dev/bce/if_bcereg.h
+++ b/freebsd/sys/dev/bce/if_bcereg.h
@@ -28,51 +28,6 @@
#ifndef _BCEREG_H_DEFINED
#define _BCEREG_H_DEFINED
-#include <rtems/bsd/sys/param.h>
-#include <sys/endian.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-#include <sys/queue.h>
-
-#include <net/bpf.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/if_dl.h>
-#include <net/if_media.h>
-
-#include <net/if_types.h>
-#include <net/if_vlan_var.h>
-
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/if_ether.h>
-#include <netinet/ip.h>
-#include <netinet/ip6.h>
-#include <netinet/tcp.h>
-#include <netinet/udp.h>
-
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/bus.h>
-#include <sys/rman.h>
-
-#include <dev/mii/mii.h>
-#include <dev/mii/miivar.h>
-#include <rtems/bsd/local/miidevs.h>
-#include <dev/mii/brgphyreg.h>
-
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
-#include <rtems/bsd/local/miibus_if.h>
-
/****************************************************************************/
/* Conversion to FreeBSD type definitions. */
/****************************************************************************/
@@ -281,7 +236,6 @@
*
* #define BCE_DEBUG
* #define BCE_NVRAM_WRITE_SUPPORT
- * #define BCE_JUMBO_HDRSPLIT
*/
/****************************************************************************/
@@ -6135,7 +6089,7 @@ struct l2_fhdr {
/****************************************************************************/
/* The following definitions refer to pre-defined locations in processor */
/* memory space which allows the driver to enable particular functionality */
-/* within the firmware or read specfic information about the running */
+/* within the firmware or read specific information about the running */
/* firmware. */
/****************************************************************************/
@@ -6220,7 +6174,7 @@ struct l2_fhdr {
#define RX_IDX(x) ((x) & USABLE_RX_BD_PER_PAGE)
/*
- * To accomodate jumbo frames, the page chain should
+ * To accommodate jumbo frames, the page chain should
* be 4 times larger than the receive chain.
*/
#define DEFAULT_PG_PAGES (DEFAULT_RX_PAGES * 4)
@@ -6738,6 +6692,7 @@ struct bce_softc
u32 l2fhdr_error_count;
u32 dma_map_addr_tx_failed_count;
u32 dma_map_addr_rx_failed_count;
+ u32 watchdog_timeouts;
/* Host coalescing block command register */
u32 hc_command;
diff --git a/freebsd/sys/dev/bfe/if_bfe.c b/freebsd/sys/dev/bfe/if_bfe.c
index e805ad1c..c87dea13 100644
--- a/freebsd/sys/dev/bfe/if_bfe.c
+++ b/freebsd/sys/dev/bfe/if_bfe.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <net/bpf.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/ethernet.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -365,12 +366,12 @@ bfe_dma_free(struct bfe_softc *sc)
/* Tx ring. */
if (sc->bfe_tx_tag != NULL) {
- if (sc->bfe_tx_map != NULL)
+ if (sc->bfe_tx_dma != 0)
bus_dmamap_unload(sc->bfe_tx_tag, sc->bfe_tx_map);
- if (sc->bfe_tx_map != NULL && sc->bfe_tx_list != NULL)
+ if (sc->bfe_tx_list != NULL)
bus_dmamem_free(sc->bfe_tx_tag, sc->bfe_tx_list,
sc->bfe_tx_map);
- sc->bfe_tx_map = NULL;
+ sc->bfe_tx_dma = 0;
sc->bfe_tx_list = NULL;
bus_dma_tag_destroy(sc->bfe_tx_tag);
sc->bfe_tx_tag = NULL;
@@ -378,12 +379,12 @@ bfe_dma_free(struct bfe_softc *sc)
/* Rx ring. */
if (sc->bfe_rx_tag != NULL) {
- if (sc->bfe_rx_map != NULL)
+ if (sc->bfe_rx_dma != 0)
bus_dmamap_unload(sc->bfe_rx_tag, sc->bfe_rx_map);
- if (sc->bfe_rx_map != NULL && sc->bfe_rx_list != NULL)
+ if (sc->bfe_rx_list != NULL)
bus_dmamem_free(sc->bfe_rx_tag, sc->bfe_rx_list,
sc->bfe_rx_map);
- sc->bfe_rx_map = NULL;
+ sc->bfe_rx_dma = 0;
sc->bfe_rx_list = NULL;
bus_dma_tag_destroy(sc->bfe_rx_tag);
sc->bfe_rx_tag = NULL;
@@ -491,7 +492,6 @@ bfe_attach(device_t dev)
ifp->if_ioctl = bfe_ioctl;
ifp->if_start = bfe_start;
ifp->if_init = bfe_init;
- ifp->if_mtu = ETHERMTU;
IFQ_SET_MAXLEN(&ifp->if_snd, BFE_TX_QLEN);
ifp->if_snd.ifq_drv_maxlen = BFE_TX_QLEN;
IFQ_SET_READY(&ifp->if_snd);
@@ -516,7 +516,7 @@ bfe_attach(device_t dev)
/*
* Tell the upper layer(s) we support long frames.
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+ ifp->if_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |= IFCAP_VLAN_MTU;
ifp->if_capenable |= IFCAP_VLAN_MTU;
@@ -1312,22 +1312,22 @@ bfe_stats_update(struct bfe_softc *sc)
stats->rx_control_frames += mib[MIB_RX_NPAUSE];
/* Update counters in ifnet. */
- ifp->if_opackets += (u_long)mib[MIB_TX_GOOD_P];
- ifp->if_collisions += (u_long)mib[MIB_TX_TCOLS];
- ifp->if_oerrors += (u_long)mib[MIB_TX_URUNS] +
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, (u_long)mib[MIB_TX_GOOD_P]);
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (u_long)mib[MIB_TX_TCOLS]);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, (u_long)mib[MIB_TX_URUNS] +
(u_long)mib[MIB_TX_ECOLS] +
(u_long)mib[MIB_TX_DEFERED] +
- (u_long)mib[MIB_TX_CLOST];
+ (u_long)mib[MIB_TX_CLOST]);
- ifp->if_ipackets += (u_long)mib[MIB_RX_GOOD_P];
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, (u_long)mib[MIB_RX_GOOD_P]);
- ifp->if_ierrors += mib[MIB_RX_JABBER] +
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, mib[MIB_RX_JABBER] +
mib[MIB_RX_MISS] +
mib[MIB_RX_CRCA] +
mib[MIB_RX_USIZE] +
mib[MIB_RX_CRC] +
mib[MIB_RX_ALIGN] +
- mib[MIB_RX_SYM];
+ mib[MIB_RX_SYM]);
}
static void
@@ -1405,7 +1405,7 @@ bfe_rxeof(struct bfe_softc *sc)
* reuse mapped buffer from errored frame.
*/
if (bfe_list_newbuf(sc, cons) != 0) {
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
bfe_discard_buf(sc, cons);
continue;
}
@@ -1820,7 +1820,7 @@ bfe_watchdog(struct bfe_softc *sc)
device_printf(sc->bfe_dev, "watchdog timeout -- resetting\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
bfe_init_locked(sc);
diff --git a/freebsd/sys/dev/bge/if_bge.c b/freebsd/sys/dev/bge/if_bge.c
index 5456f3d5..aadf5a99 100644
--- a/freebsd/sys/dev/bge/if_bge.c
+++ b/freebsd/sys/dev/bge/if_bge.c
@@ -85,6 +85,7 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_dl.h>
@@ -172,6 +173,7 @@ static const struct bge_type {
{ BCOM_VENDORID, BCOM_DEVICEID_BCM5715 },
{ BCOM_VENDORID, BCOM_DEVICEID_BCM5715S },
{ BCOM_VENDORID, BCOM_DEVICEID_BCM5717 },
+ { BCOM_VENDORID, BCOM_DEVICEID_BCM5717C },
{ BCOM_VENDORID, BCOM_DEVICEID_BCM5718 },
{ BCOM_VENDORID, BCOM_DEVICEID_BCM5719 },
{ BCOM_VENDORID, BCOM_DEVICEID_BCM5720 },
@@ -312,6 +314,7 @@ static const struct bge_revision {
{ BGE_CHIPID_BCM5715_A3, "BCM5715 A3" },
{ BGE_CHIPID_BCM5717_A0, "BCM5717 A0" },
{ BGE_CHIPID_BCM5717_B0, "BCM5717 B0" },
+ { BGE_CHIPID_BCM5717_C0, "BCM5717 C0" },
{ BGE_CHIPID_BCM5719_A0, "BCM5719 A0" },
{ BGE_CHIPID_BCM5720_A0, "BCM5720 A0" },
{ BGE_CHIPID_BCM5755_A0, "BCM5755 A0" },
@@ -423,18 +426,19 @@ static int bge_encap(struct bge_softc *, struct mbuf **, uint32_t *);
static void bge_intr(void *);
static int bge_msi_intr(void *);
static void bge_intr_task(void *, int);
-static void bge_start_locked(struct ifnet *);
-static void bge_start(struct ifnet *);
-static int bge_ioctl(struct ifnet *, u_long, caddr_t);
+static void bge_start_locked(if_t);
+static void bge_start(if_t);
+static int bge_ioctl(if_t, u_long, caddr_t);
static void bge_init_locked(struct bge_softc *);
static void bge_init(void *);
static void bge_stop_block(struct bge_softc *, bus_size_t, uint32_t);
static void bge_stop(struct bge_softc *);
static void bge_watchdog(struct bge_softc *);
static int bge_shutdown(device_t);
-static int bge_ifmedia_upd_locked(struct ifnet *);
-static int bge_ifmedia_upd(struct ifnet *);
-static void bge_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static int bge_ifmedia_upd_locked(if_t);
+static int bge_ifmedia_upd(if_t);
+static void bge_ifmedia_sts(if_t, struct ifmediareq *);
+static uint64_t bge_get_counter(if_t, ift_counter);
static uint8_t bge_nvram_getbyte(struct bge_softc *, int, uint8_t *);
static int bge_read_nvram(struct bge_softc *, caddr_t, int, int);
@@ -475,7 +479,7 @@ static int bge_miibus_readreg(device_t, int, int);
static int bge_miibus_writereg(device_t, int, int, int);
static void bge_miibus_statchg(device_t);
#ifdef DEVICE_POLLING
-static int bge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count);
+static int bge_poll(if_t ifp, enum poll_cmd cmd, int count);
#endif
#define BGE_RESET_SHUTDOWN 0
@@ -543,10 +547,8 @@ DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0);
static int bge_allow_asf = 1;
-TUNABLE_INT("hw.bge.allow_asf", &bge_allow_asf);
-
static SYSCTL_NODE(_hw, OID_AUTO, bge, CTLFLAG_RD, 0, "BGE driver parameters");
-SYSCTL_INT(_hw_bge, OID_AUTO, allow_asf, CTLFLAG_RD, &bge_allow_asf, 0,
+SYSCTL_INT(_hw_bge, OID_AUTO, allow_asf, CTLFLAG_RDTUN, &bge_allow_asf, 0,
"Allow ASF mode if available");
#define SPARC64_BLADE_1500_MODEL "SUNW,Sun-Blade-1500"
@@ -1247,7 +1249,7 @@ bge_miibus_statchg(device_t dev)
uint32_t mac_mode, rx_mode, tx_mode;
sc = device_get_softc(dev);
- if ((sc->bge_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ if ((if_getdrvflags(sc->bge_ifp) & IFF_DRV_RUNNING) == 0)
return;
mii = device_get_softc(sc->bge_miibus);
@@ -1323,7 +1325,7 @@ bge_newbuf_std(struct bge_softc *sc, int i)
int error, nsegs;
if (sc->bge_flags & BGE_FLAG_JUMBO_STD &&
- (sc->bge_ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN +
+ (if_getmtu(sc->bge_ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN +
ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) {
m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
if (m == NULL)
@@ -1385,8 +1387,7 @@ bge_newbuf_jumbo(struct bge_softc *sc, int i)
if (m == NULL)
return (ENOBUFS);
- m_cljget(m, M_NOWAIT, MJUM9BYTES);
- if (!(m->m_flags & M_EXT)) {
+ if (m_cljget(m, M_NOWAIT, MJUM9BYTES) == NULL) {
m_freem(m);
return (ENOBUFS);
}
@@ -1601,14 +1602,14 @@ bge_init_tx_ring(struct bge_softc *sc)
static void
bge_setpromisc(struct bge_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
BGE_LOCK_ASSERT(sc);
ifp = sc->bge_ifp;
/* Enable or disable promiscuous mode as needed. */
- if (ifp->if_flags & IFF_PROMISC)
+ if (if_getflags(ifp) & IFF_PROMISC)
BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
else
BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
@@ -1617,18 +1618,30 @@ bge_setpromisc(struct bge_softc *sc)
static void
bge_setmulti(struct bge_softc *sc)
{
- struct ifnet *ifp;
- struct ifmultiaddr *ifma;
+ if_t ifp;
+ int mc_count = 0;
uint32_t hashes[4] = { 0, 0, 0, 0 };
- int h, i;
+ int h, i, mcnt;
+ unsigned char *mta;
BGE_LOCK_ASSERT(sc);
ifp = sc->bge_ifp;
- if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
+ mc_count = if_multiaddr_count(ifp, -1);
+ mta = malloc(sizeof(unsigned char) * ETHER_ADDR_LEN *
+ mc_count, M_DEVBUF, M_NOWAIT);
+
+ if(mta == NULL) {
+ device_printf(sc->bge_dev,
+ "Failed to allocated temp mcast list\n");
+ return;
+ }
+
+ if (if_getflags(ifp) & IFF_ALLMULTI || if_getflags(ifp) & IFF_PROMISC) {
for (i = 0; i < 4; i++)
CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF);
+ free(mta, M_DEVBUF);
return;
}
@@ -1636,32 +1649,30 @@ bge_setmulti(struct bge_softc *sc)
for (i = 0; i < 4; i++)
CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0);
- /* Now program new ones. */
- if_maddr_rlock(ifp);
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
- h = ether_crc32_le(LLADDR((struct sockaddr_dl *)
- ifma->ifma_addr), ETHER_ADDR_LEN) & 0x7F;
+ if_multiaddr_array(ifp, mta, &mcnt, mc_count);
+ for(i = 0; i < mcnt; i++) {
+ h = ether_crc32_le(mta + (i * ETHER_ADDR_LEN),
+ ETHER_ADDR_LEN) & 0x7F;
hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F);
}
- if_maddr_runlock(ifp);
for (i = 0; i < 4; i++)
CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]);
+
+ free(mta, M_DEVBUF);
}
static void
bge_setvlan(struct bge_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
BGE_LOCK_ASSERT(sc);
ifp = sc->bge_ifp;
/* Enable or disable VLAN tag stripping as needed. */
- if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG);
else
BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_KEEP_VLAN_DIAG);
@@ -1995,7 +2006,7 @@ bge_blockinit(struct bge_softc *sc)
/* Configure mbuf pool watermarks */
if (BGE_IS_5717_PLUS(sc)) {
CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
- if (sc->bge_ifp->if_mtu > ETHERMTU) {
+ if (if_getmtu(sc->bge_ifp) > ETHERMTU) {
CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e);
CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea);
} else {
@@ -2691,6 +2702,10 @@ bge_chipid(device_t dev)
* registers.
*/
switch (pci_get_device(dev)) {
+ case BCOM_DEVICEID_BCM5717C:
+ /* 5717 C0 seems to belong to 5720 line. */
+ id = BGE_CHIPID_BCM5720_A0;
+ break;
case BCOM_DEVICEID_BCM5717:
case BCOM_DEVICEID_BCM5718:
case BCOM_DEVICEID_BCM5719:
@@ -2816,10 +2831,10 @@ bge_dma_free(struct bge_softc *sc)
bus_dma_tag_destroy(sc->bge_cdata.bge_tx_mtag);
/* Destroy standard RX ring. */
- if (sc->bge_cdata.bge_rx_std_ring_map)
+ if (sc->bge_ldata.bge_rx_std_ring_paddr)
bus_dmamap_unload(sc->bge_cdata.bge_rx_std_ring_tag,
sc->bge_cdata.bge_rx_std_ring_map);
- if (sc->bge_cdata.bge_rx_std_ring_map && sc->bge_ldata.bge_rx_std_ring)
+ if (sc->bge_ldata.bge_rx_std_ring)
bus_dmamem_free(sc->bge_cdata.bge_rx_std_ring_tag,
sc->bge_ldata.bge_rx_std_ring,
sc->bge_cdata.bge_rx_std_ring_map);
@@ -2828,12 +2843,11 @@ bge_dma_free(struct bge_softc *sc)
bus_dma_tag_destroy(sc->bge_cdata.bge_rx_std_ring_tag);
/* Destroy jumbo RX ring. */
- if (sc->bge_cdata.bge_rx_jumbo_ring_map)
+ if (sc->bge_ldata.bge_rx_jumbo_ring_paddr)
bus_dmamap_unload(sc->bge_cdata.bge_rx_jumbo_ring_tag,
sc->bge_cdata.bge_rx_jumbo_ring_map);
- if (sc->bge_cdata.bge_rx_jumbo_ring_map &&
- sc->bge_ldata.bge_rx_jumbo_ring)
+ if (sc->bge_ldata.bge_rx_jumbo_ring)
bus_dmamem_free(sc->bge_cdata.bge_rx_jumbo_ring_tag,
sc->bge_ldata.bge_rx_jumbo_ring,
sc->bge_cdata.bge_rx_jumbo_ring_map);
@@ -2842,12 +2856,11 @@ bge_dma_free(struct bge_softc *sc)
bus_dma_tag_destroy(sc->bge_cdata.bge_rx_jumbo_ring_tag);
/* Destroy RX return ring. */
- if (sc->bge_cdata.bge_rx_return_ring_map)
+ if (sc->bge_ldata.bge_rx_return_ring_paddr)
bus_dmamap_unload(sc->bge_cdata.bge_rx_return_ring_tag,
sc->bge_cdata.bge_rx_return_ring_map);
- if (sc->bge_cdata.bge_rx_return_ring_map &&
- sc->bge_ldata.bge_rx_return_ring)
+ if (sc->bge_ldata.bge_rx_return_ring)
bus_dmamem_free(sc->bge_cdata.bge_rx_return_ring_tag,
sc->bge_ldata.bge_rx_return_ring,
sc->bge_cdata.bge_rx_return_ring_map);
@@ -2856,11 +2869,11 @@ bge_dma_free(struct bge_softc *sc)
bus_dma_tag_destroy(sc->bge_cdata.bge_rx_return_ring_tag);
/* Destroy TX ring. */
- if (sc->bge_cdata.bge_tx_ring_map)
+ if (sc->bge_ldata.bge_tx_ring_paddr)
bus_dmamap_unload(sc->bge_cdata.bge_tx_ring_tag,
sc->bge_cdata.bge_tx_ring_map);
- if (sc->bge_cdata.bge_tx_ring_map && sc->bge_ldata.bge_tx_ring)
+ if (sc->bge_ldata.bge_tx_ring)
bus_dmamem_free(sc->bge_cdata.bge_tx_ring_tag,
sc->bge_ldata.bge_tx_ring,
sc->bge_cdata.bge_tx_ring_map);
@@ -2869,11 +2882,11 @@ bge_dma_free(struct bge_softc *sc)
bus_dma_tag_destroy(sc->bge_cdata.bge_tx_ring_tag);
/* Destroy status block. */
- if (sc->bge_cdata.bge_status_map)
+ if (sc->bge_ldata.bge_status_block_paddr)
bus_dmamap_unload(sc->bge_cdata.bge_status_tag,
sc->bge_cdata.bge_status_map);
- if (sc->bge_cdata.bge_status_map && sc->bge_ldata.bge_status_block)
+ if (sc->bge_ldata.bge_status_block)
bus_dmamem_free(sc->bge_cdata.bge_status_tag,
sc->bge_ldata.bge_status_block,
sc->bge_cdata.bge_status_map);
@@ -2882,11 +2895,11 @@ bge_dma_free(struct bge_softc *sc)
bus_dma_tag_destroy(sc->bge_cdata.bge_status_tag);
/* Destroy statistics block. */
- if (sc->bge_cdata.bge_stats_map)
+ if (sc->bge_ldata.bge_stats_paddr)
bus_dmamap_unload(sc->bge_cdata.bge_stats_tag,
sc->bge_cdata.bge_stats_map);
- if (sc->bge_cdata.bge_stats_map && sc->bge_ldata.bge_stats)
+ if (sc->bge_ldata.bge_stats)
bus_dmamem_free(sc->bge_cdata.bge_stats_tag,
sc->bge_ldata.bge_stats,
sc->bge_cdata.bge_stats_map);
@@ -2908,14 +2921,10 @@ bge_dma_ring_alloc(struct bge_softc *sc, bus_size_t alignment,
bus_addr_t *paddr, const char *msg)
{
struct bge_dmamap_arg ctx;
- bus_addr_t lowaddr;
- bus_size_t ring_end;
int error;
- lowaddr = BUS_SPACE_MAXADDR;
-again:
error = bus_dma_tag_create(sc->bge_cdata.bge_parent_tag,
- alignment, 0, lowaddr, BUS_SPACE_MAXADDR, NULL,
+ alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag);
if (error != 0) {
device_printf(sc->bge_dev,
@@ -2940,25 +2949,6 @@ again:
return (ENOMEM);
}
*paddr = ctx.bge_busaddr;
- ring_end = *paddr + maxsize;
- if ((sc->bge_flags & BGE_FLAG_4G_BNDRY_BUG) != 0 &&
- BGE_ADDR_HI(*paddr) != BGE_ADDR_HI(ring_end)) {
- /*
- * 4GB boundary crossed. Limit maximum allowable DMA
- * address space to 32bit and try again.
- */
- bus_dmamap_unload(*tag, *map);
- bus_dmamem_free(*tag, *ring, *map);
- bus_dma_tag_destroy(*tag);
- if (bootverbose)
- device_printf(sc->bge_dev, "4GB boundary crossed, "
- "limit DMA address space to 32bit for %s\n", msg);
- *ring = NULL;
- *tag = NULL;
- *map = NULL;
- lowaddr = BUS_SPACE_MAXADDR_32BIT;
- goto again;
- }
return (0);
}
@@ -2966,7 +2956,7 @@ static int
bge_dma_alloc(struct bge_softc *sc)
{
bus_addr_t lowaddr;
- bus_size_t boundary, sbsz, rxmaxsegsz, txsegsz, txmaxsegsz;
+ bus_size_t rxmaxsegsz, sbsz, txsegsz, txmaxsegsz;
int i, error;
lowaddr = BUS_SPACE_MAXADDR;
@@ -3053,9 +3043,7 @@ bge_dma_alloc(struct bge_softc *sc)
}
/* Create parent tag for buffers. */
- boundary = 0;
if ((sc->bge_flags & BGE_FLAG_4G_BNDRY_BUG) != 0) {
- boundary = BGE_DMA_BNDRY;
/*
* XXX
* watchdog timeout issue was observed on BCM5704 which
@@ -3066,10 +3054,10 @@ bge_dma_alloc(struct bge_softc *sc)
if (sc->bge_pcixcap != 0)
lowaddr = BUS_SPACE_MAXADDR_32BIT;
}
- error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev),
- 1, boundary, lowaddr, BUS_SPACE_MAXADDR, NULL,
- NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT,
- 0, NULL, NULL, &sc->bge_cdata.bge_buffer_tag);
+ error = bus_dma_tag_create(bus_get_dma_tag(sc->bge_dev), 1, 0, lowaddr,
+ BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0,
+ BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL,
+ &sc->bge_cdata.bge_buffer_tag);
if (error != 0) {
device_printf(sc->bge_dev,
"could not allocate buffer dma tag\n");
@@ -3322,7 +3310,7 @@ bge_devinfo(struct bge_softc *sc)
static int
bge_attach(device_t dev)
{
- struct ifnet *ifp;
+ if_t ifp;
struct bge_softc *sc;
uint32_t hwcfg = 0, misccfg, pcistate;
u_char eaddr[ETHER_ADDR_LEN];
@@ -3751,28 +3739,28 @@ bge_attach(device_t dev)
error = ENXIO;
goto fail;
}
- ifp->if_softc = sc;
+ if_setsoftc(ifp, sc);
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_ioctl = bge_ioctl;
- ifp->if_start = bge_start;
- ifp->if_init = bge_init;
- ifp->if_snd.ifq_drv_maxlen = BGE_TX_RING_CNT - 1;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
- IFQ_SET_READY(&ifp->if_snd);
- ifp->if_hwassist = sc->bge_csum_features;
- ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING |
- IFCAP_VLAN_MTU;
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setioctlfn(ifp, bge_ioctl);
+ if_setstartfn(ifp, bge_start);
+ if_setinitfn(ifp, bge_init);
+ if_setgetcounterfn(ifp, bge_get_counter);
+ if_setsendqlen(ifp, BGE_TX_RING_CNT - 1);
+ if_setsendqready(ifp);
+ if_sethwassist(ifp, sc->bge_csum_features);
+ if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING |
+ IFCAP_VLAN_MTU);
if ((sc->bge_flags & (BGE_FLAG_TSO | BGE_FLAG_TSO3)) != 0) {
- ifp->if_hwassist |= CSUM_TSO;
- ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
+ if_sethwassistbits(ifp, CSUM_TSO, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_TSO4 | IFCAP_VLAN_HWTSO, 0);
}
#ifdef IFCAP_VLAN_HWCSUM
- ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWCSUM, 0);
#endif
- ifp->if_capenable = ifp->if_capabilities;
+ if_setcapenable(ifp, if_getcapabilities(ifp));
#ifdef DEVICE_POLLING
- ifp->if_capabilities |= IFCAP_POLLING;
+ if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0);
#endif
/*
@@ -3780,9 +3768,9 @@ bge_attach(device_t dev)
* to hardware bugs.
*/
if (sc->bge_chipid == BGE_CHIPID_BCM5700_B0) {
- ifp->if_capabilities &= ~IFCAP_HWCSUM;
- ifp->if_capenable &= ~IFCAP_HWCSUM;
- ifp->if_hwassist = 0;
+ if_setcapabilitiesbit(ifp, 0, IFCAP_HWCSUM);
+ if_setcapenablebit(ifp, 0, IFCAP_HWCSUM);
+ if_sethwassist(ifp, 0);
}
/*
@@ -3878,9 +3866,10 @@ bge_attach(device_t dev)
again:
bge_asf_driver_up(sc);
- error = mii_attach(dev, &sc->bge_miibus, ifp, bge_ifmedia_upd,
- bge_ifmedia_sts, capmask, sc->bge_phy_addr, MII_OFFSET_ANY,
- MIIF_DOPAUSE);
+ error = mii_attach(dev, &sc->bge_miibus, ifp,
+ (ifm_change_cb_t)bge_ifmedia_upd,
+ (ifm_stat_cb_t)bge_ifmedia_sts, capmask, sc->bge_phy_addr,
+ MII_OFFSET_ANY, MIIF_DOPAUSE);
if (error != 0) {
if (trys++ < 4) {
device_printf(sc->bge_dev, "Try again\n");
@@ -3917,7 +3906,7 @@ again:
ether_ifattach(ifp, eaddr);
/* Tell upper layer we support long frames. */
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
/*
* Hookup IRQ last.
@@ -3964,13 +3953,13 @@ static int
bge_detach(device_t dev)
{
struct bge_softc *sc;
- struct ifnet *ifp;
+ if_t ifp;
sc = device_get_softc(dev);
ifp = sc->bge_ifp;
#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(ifp) & IFCAP_POLLING)
ether_poll_deregister(ifp);
#endif
@@ -4326,7 +4315,7 @@ bge_rxreuse_jumbo(struct bge_softc *sc, int i)
static int
bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
{
- struct ifnet *ifp;
+ if_t ifp;
int rx_npkts = 0, stdcnt = 0, jumbocnt = 0;
uint16_t rx_cons;
@@ -4343,8 +4332,8 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag,
sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_POSTWRITE);
if (BGE_IS_JUMBO_CAPABLE(sc) &&
- ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN >
- (MCLBYTES - ETHER_ALIGN))
+ if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN +
+ ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))
bus_dmamap_sync(sc->bge_cdata.bge_rx_jumbo_ring_tag,
sc->bge_cdata.bge_rx_jumbo_ring_map, BUS_DMASYNC_POSTWRITE);
@@ -4356,7 +4345,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
int have_tag = 0;
#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING) {
+ if (if_getcapenable(ifp) & IFCAP_POLLING) {
if (sc->rxcycles <= 0)
break;
sc->rxcycles--;
@@ -4368,7 +4357,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
rxidx = cur_rx->bge_idx;
BGE_INC(rx_cons, sc->bge_return_ring_cnt);
- if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING &&
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING &&
cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) {
have_tag = 1;
vlan_tag = cur_rx->bge_vlan_tag;
@@ -4383,7 +4372,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
}
if (bge_newbuf_jumbo(sc, rxidx) != 0) {
bge_rxreuse_jumbo(sc, rxidx);
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
continue;
}
BGE_INC(sc->bge_jumbo, BGE_JUMBO_RX_RING_CNT);
@@ -4396,13 +4385,13 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
}
if (bge_newbuf_std(sc, rxidx) != 0) {
bge_rxreuse_std(sc, rxidx);
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
continue;
}
BGE_INC(sc->bge_std, BGE_STD_RX_RING_CNT);
}
- ifp->if_ipackets++;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
#ifndef __NO_STRICT_ALIGNMENT
/*
* For architectures with strict alignment we must make sure
@@ -4417,7 +4406,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN;
m->m_pkthdr.rcvif = ifp;
- if (ifp->if_capenable & IFCAP_RXCSUM)
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM)
bge_rxcsum(sc, cur_rx, m);
/*
@@ -4431,13 +4420,13 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
if (holdlck != 0) {
BGE_UNLOCK(sc);
- (*ifp->if_input)(ifp, m);
+ if_input(ifp, m);
BGE_LOCK(sc);
} else
- (*ifp->if_input)(ifp, m);
+ if_input(ifp, m);
rx_npkts++;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
return (rx_npkts);
}
@@ -4465,7 +4454,7 @@ bge_rxeof(struct bge_softc *sc, uint16_t rx_prod, int holdlck)
* If you need correct statistics, you can enable this check.
*/
if (BGE_IS_5705_PLUS(sc))
- ifp->if_ierrors += CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS);
+ if_incierrors(ifp, CSR_READ_4(sc, BGE_RXLP_LOCSTAT_IFIN_DROPS));
#endif
return (rx_npkts);
}
@@ -4509,7 +4498,7 @@ static void
bge_txeof(struct bge_softc *sc, uint16_t tx_cons)
{
struct bge_tx_bd *cur_tx;
- struct ifnet *ifp;
+ if_t ifp;
BGE_LOCK_ASSERT(sc);
@@ -4531,7 +4520,7 @@ bge_txeof(struct bge_softc *sc, uint16_t tx_cons)
idx = sc->bge_tx_saved_considx;
cur_tx = &sc->bge_ldata.bge_tx_ring[idx];
if (cur_tx->bge_flags & BGE_TXBDFLAG_END)
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
if (sc->bge_cdata.bge_tx_chain[idx] != NULL) {
bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag,
sc->bge_cdata.bge_tx_dmamap[idx],
@@ -4545,22 +4534,22 @@ bge_txeof(struct bge_softc *sc, uint16_t tx_cons)
BGE_INC(sc->bge_tx_saved_considx, BGE_TX_RING_CNT);
}
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
if (sc->bge_txcnt == 0)
sc->bge_timer = 0;
}
#ifdef DEVICE_POLLING
static int
-bge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
+bge_poll(if_t ifp, enum poll_cmd cmd, int count)
{
- struct bge_softc *sc = ifp->if_softc;
+ struct bge_softc *sc = if_getsoftc(ifp);
uint16_t rx_prod, tx_cons;
uint32_t statusword;
int rx_npkts = 0;
BGE_LOCK(sc);
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
BGE_UNLOCK(sc);
return (rx_npkts);
}
@@ -4592,12 +4581,12 @@ bge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
sc->rxcycles = count;
rx_npkts = bge_rxeof(sc, rx_prod, 1);
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
BGE_UNLOCK(sc);
return (rx_npkts);
}
bge_txeof(sc, tx_cons);
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
bge_start_locked(ifp);
BGE_UNLOCK(sc);
@@ -4623,7 +4612,7 @@ static void
bge_intr_task(void *arg, int pending)
{
struct bge_softc *sc;
- struct ifnet *ifp;
+ if_t ifp;
uint32_t status, status_tag;
uint16_t rx_prod, tx_cons;
@@ -4631,7 +4620,7 @@ bge_intr_task(void *arg, int pending)
ifp = sc->bge_ifp;
BGE_LOCK(sc);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
BGE_UNLOCK(sc);
return;
}
@@ -4660,17 +4649,17 @@ bge_intr_task(void *arg, int pending)
/* Let controller work. */
bge_writembx(sc, BGE_MBX_IRQ0_LO, status_tag);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
sc->bge_rx_saved_considx != rx_prod) {
/* Check RX return ring producer/consumer. */
BGE_UNLOCK(sc);
bge_rxeof(sc, rx_prod, 0);
BGE_LOCK(sc);
}
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
/* Check TX ring producer/consumer. */
bge_txeof(sc, tx_cons);
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
bge_start_locked(ifp);
}
BGE_UNLOCK(sc);
@@ -4680,7 +4669,7 @@ static void
bge_intr(void *xsc)
{
struct bge_softc *sc;
- struct ifnet *ifp;
+ if_t ifp;
uint32_t statusword;
uint16_t rx_prod, tx_cons;
@@ -4691,7 +4680,7 @@ bge_intr(void *xsc)
ifp = sc->bge_ifp;
#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING) {
+ if (if_getcapenable(ifp) & IFCAP_POLLING) {
BGE_UNLOCK(sc);
return;
}
@@ -4740,18 +4729,18 @@ bge_intr(void *xsc)
statusword || sc->bge_link_evt)
bge_link_upd(sc);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
/* Check RX return ring producer/consumer. */
bge_rxeof(sc, rx_prod, 1);
}
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
/* Check TX ring producer/consumer. */
bge_txeof(sc, tx_cons);
}
- if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
- !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
+ !if_sendq_empty(ifp))
bge_start_locked(ifp);
BGE_UNLOCK(sc);
@@ -4815,7 +4804,7 @@ bge_tick(void *xsc)
*/
#ifdef DEVICE_POLLING
/* In polling mode we poll link state in bge_poll(). */
- if (!(sc->bge_ifp->if_capenable & IFCAP_POLLING))
+ if (!(if_getcapenable(sc->bge_ifp) & IFCAP_POLLING))
#endif
{
sc->bge_link_evt++;
@@ -4836,7 +4825,7 @@ bge_tick(void *xsc)
static void
bge_stats_update_regs(struct bge_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
struct bge_mac_stats *stats;
uint32_t val;
@@ -4936,10 +4925,6 @@ bge_stats_update_regs(struct bge_softc *sc)
stats->RecvThresholdHit +=
CSR_READ_4(sc, BGE_RXLP_LOCSTAT_RXTHRESH_HIT);
- ifp->if_collisions = (u_long)stats->etherStatsCollisions;
- ifp->if_ierrors = (u_long)(stats->NoMoreRxBDs + stats->InputDiscards +
- stats->InputErrors);
-
if (sc->bge_flags & BGE_FLAG_RDMA_BUG) {
/*
* If controller transmitted more than BGE_NUM_RDMA_CHANNELS
@@ -5004,7 +4989,7 @@ bge_stats_clear_regs(struct bge_softc *sc)
static void
bge_stats_update(struct bge_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
bus_size_t stats;
uint32_t cnt; /* current register value */
@@ -5016,21 +5001,21 @@ bge_stats_update(struct bge_softc *sc)
CSR_READ_4(sc, stats + offsetof(struct bge_stats, stat))
cnt = READ_STAT(sc, stats, txstats.etherStatsCollisions.bge_addr_lo);
- ifp->if_collisions += (uint32_t)(cnt - sc->bge_tx_collisions);
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, cnt - sc->bge_tx_collisions);
sc->bge_tx_collisions = cnt;
cnt = READ_STAT(sc, stats, nicNoMoreRxBDs.bge_addr_lo);
- ifp->if_ierrors += (uint32_t)(cnt - sc->bge_rx_nobds);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_nobds);
sc->bge_rx_nobds = cnt;
cnt = READ_STAT(sc, stats, ifInErrors.bge_addr_lo);
- ifp->if_ierrors += (uint32_t)(cnt - sc->bge_rx_inerrs);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_inerrs);
sc->bge_rx_inerrs = cnt;
cnt = READ_STAT(sc, stats, ifInDiscards.bge_addr_lo);
- ifp->if_ierrors += (uint32_t)(cnt - sc->bge_rx_discards);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, cnt - sc->bge_rx_discards);
sc->bge_rx_discards = cnt;
cnt = READ_STAT(sc, stats, txstats.ifOutDiscards.bge_addr_lo);
- ifp->if_oerrors += (uint32_t)(cnt - sc->bge_tx_discards);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, cnt - sc->bge_tx_discards);
sc->bge_tx_discards = cnt;
#undef READ_STAT
@@ -5357,29 +5342,29 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head, uint32_t *txidx)
* to the mbuf data regions directly in the transmit descriptors.
*/
static void
-bge_start_locked(struct ifnet *ifp)
+bge_start_locked(if_t ifp)
{
struct bge_softc *sc;
struct mbuf *m_head;
uint32_t prodidx;
int count;
- sc = ifp->if_softc;
+ sc = if_getsoftc(ifp);
BGE_LOCK_ASSERT(sc);
if (!sc->bge_link ||
- (ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+ (if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return;
prodidx = sc->bge_tx_prodidx;
- for (count = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd);) {
+ for (count = 0; !if_sendq_empty(ifp);) {
if (sc->bge_txcnt > BGE_TX_RING_CNT - 16) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
break;
}
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
+ m_head = if_dequeue(ifp);
if (m_head == NULL)
break;
@@ -5391,8 +5376,8 @@ bge_start_locked(struct ifnet *ifp)
if (bge_encap(sc, &m_head, &prodidx)) {
if (m_head == NULL)
break;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_sendq_prepend(ifp, m_head);
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
break;
}
++count;
@@ -5401,11 +5386,7 @@ bge_start_locked(struct ifnet *ifp)
* If there's a BPF listener, bounce a copy of this frame
* to him.
*/
-#ifdef ETHER_BPF_MTAP
- ETHER_BPF_MTAP(ifp, m_head);
-#else
- BPF_MTAP(ifp, m_head);
-#endif
+ if_bpfmtap(ifp, m_head);
}
if (count > 0) {
@@ -5431,11 +5412,11 @@ bge_start_locked(struct ifnet *ifp)
* to the mbuf data regions directly in the transmit descriptors.
*/
static void
-bge_start(struct ifnet *ifp)
+bge_start(if_t ifp)
{
struct bge_softc *sc;
- sc = ifp->if_softc;
+ sc = if_getsoftc(ifp);
BGE_LOCK(sc);
bge_start_locked(ifp);
BGE_UNLOCK(sc);
@@ -5444,7 +5425,7 @@ bge_start(struct ifnet *ifp)
static void
bge_init_locked(struct bge_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
uint16_t *m;
uint32_t mode;
@@ -5452,7 +5433,7 @@ bge_init_locked(struct bge_softc *sc)
ifp = sc->bge_ifp;
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
return;
/* Cancel pending I/O and flush buffers. */
@@ -5478,9 +5459,9 @@ bge_init_locked(struct bge_softc *sc)
ifp = sc->bge_ifp;
/* Specify MTU. */
- CSR_WRITE_4(sc, BGE_RX_MTU, ifp->if_mtu +
+ CSR_WRITE_4(sc, BGE_RX_MTU, if_getmtu(ifp) +
ETHER_HDR_LEN + ETHER_CRC_LEN +
- (ifp->if_capenable & IFCAP_VLAN_MTU ? ETHER_VLAN_ENCAP_LEN : 0));
+ (if_getcapenable(ifp) & IFCAP_VLAN_MTU ? ETHER_VLAN_ENCAP_LEN : 0));
/* Load our MAC address. */
m = (uint16_t *)IF_LLADDR(sc->bge_ifp);
@@ -5501,10 +5482,10 @@ bge_init_locked(struct bge_softc *sc)
sc->bge_csum_features &= ~CSUM_UDP;
else
sc->bge_csum_features |= CSUM_UDP;
- if (ifp->if_capabilities & IFCAP_TXCSUM &&
- ifp->if_capenable & IFCAP_TXCSUM) {
- ifp->if_hwassist &= ~(BGE_CSUM_FEATURES | CSUM_UDP);
- ifp->if_hwassist |= sc->bge_csum_features;
+ if (if_getcapabilities(ifp) & IFCAP_TXCSUM &&
+ if_getcapenable(ifp) & IFCAP_TXCSUM) {
+ if_sethwassistbits(ifp, 0, (BGE_CSUM_FEATURES | CSUM_UDP));
+ if_sethwassistbits(ifp, sc->bge_csum_features, 0);
}
/* Init RX ring. */
@@ -5534,8 +5515,8 @@ bge_init_locked(struct bge_softc *sc)
/* Init jumbo RX ring. */
if (BGE_IS_JUMBO_CAPABLE(sc) &&
- ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN >
- (MCLBYTES - ETHER_ALIGN)) {
+ if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN +
+ ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN)) {
if (bge_init_rx_ring_jumbo(sc) != 0) {
device_printf(sc->bge_dev,
"no memory for jumbo Rx buffers.\n");
@@ -5571,6 +5552,8 @@ bge_init_locked(struct bge_softc *sc)
mode = CSR_READ_4(sc, BGE_RX_MODE);
if (BGE_IS_5755_PLUS(sc))
mode |= BGE_RXMODE_IPV6_ENABLE;
+ if (sc->bge_asicrev == BGE_ASICREV_BCM5762)
+ mode |= BGE_RXMODE_IPV4_FRAG_FIX;
CSR_WRITE_4(sc,BGE_RX_MODE, mode | BGE_RXMODE_ENABLE);
DELAY(10);
@@ -5594,7 +5577,7 @@ bge_init_locked(struct bge_softc *sc)
#ifdef DEVICE_POLLING
/* Disable interrupts if we are polling. */
- if (ifp->if_capenable & IFCAP_POLLING) {
+ if (if_getcapenable(ifp) & IFCAP_POLLING) {
BGE_SETBIT(sc, BGE_PCI_MISC_CTL,
BGE_PCIMISCCTL_MASK_PCI_INTR);
bge_writembx(sc, BGE_MBX_IRQ0_LO, 1);
@@ -5608,8 +5591,8 @@ bge_init_locked(struct bge_softc *sc)
bge_writembx(sc, BGE_MBX_IRQ0_LO, 0);
}
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
+ if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
bge_ifmedia_upd_locked(ifp);
@@ -5630,9 +5613,9 @@ bge_init(void *xsc)
* Set media options.
*/
static int
-bge_ifmedia_upd(struct ifnet *ifp)
+bge_ifmedia_upd(if_t ifp)
{
- struct bge_softc *sc = ifp->if_softc;
+ struct bge_softc *sc = if_getsoftc(ifp);
int res;
BGE_LOCK(sc);
@@ -5643,9 +5626,9 @@ bge_ifmedia_upd(struct ifnet *ifp)
}
static int
-bge_ifmedia_upd_locked(struct ifnet *ifp)
+bge_ifmedia_upd_locked(if_t ifp)
{
- struct bge_softc *sc = ifp->if_softc;
+ struct bge_softc *sc = if_getsoftc(ifp);
struct mii_data *mii;
struct mii_softc *miisc;
struct ifmedia *ifm;
@@ -5728,14 +5711,14 @@ bge_ifmedia_upd_locked(struct ifnet *ifp)
* Report current media status.
*/
static void
-bge_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
+bge_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
{
- struct bge_softc *sc = ifp->if_softc;
+ struct bge_softc *sc = if_getsoftc(ifp);
struct mii_data *mii;
BGE_LOCK(sc);
- if ((ifp->if_flags & IFF_UP) == 0) {
+ if ((if_getflags(ifp) & IFF_UP) == 0) {
BGE_UNLOCK(sc);
return;
}
@@ -5768,9 +5751,9 @@ bge_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
}
static int
-bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+bge_ioctl(if_t ifp, u_long command, caddr_t data)
{
- struct bge_softc *sc = ifp->if_softc;
+ struct bge_softc *sc = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *) data;
struct mii_data *mii;
int flags, mask, error = 0;
@@ -5789,10 +5772,10 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
break;
}
BGE_LOCK(sc);
- if (ifp->if_mtu != ifr->ifr_mtu) {
- ifp->if_mtu = ifr->ifr_mtu;
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if (if_getmtu(ifp) != ifr->ifr_mtu) {
+ if_setmtu(ifp, ifr->ifr_mtu);
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
bge_init_locked(sc);
}
}
@@ -5800,7 +5783,7 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
break;
case SIOCSIFFLAGS:
BGE_LOCK(sc);
- if (ifp->if_flags & IFF_UP) {
+ if (if_getflags(ifp) & IFF_UP) {
/*
* If only the state of the PROMISC flag changed,
* then just use the 'set promisc mode' command
@@ -5809,8 +5792,8 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
* waiting for it to start up, which may take a
* second or two. Similarly for ALLMULTI.
*/
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- flags = ifp->if_flags ^ sc->bge_if_flags;
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+ flags = if_getflags(ifp) ^ sc->bge_if_flags;
if (flags & IFF_PROMISC)
bge_setpromisc(sc);
if (flags & IFF_ALLMULTI)
@@ -5818,17 +5801,17 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
} else
bge_init_locked(sc);
} else {
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
bge_stop(sc);
}
}
- sc->bge_if_flags = ifp->if_flags;
+ sc->bge_if_flags = if_getflags(ifp);
BGE_UNLOCK(sc);
error = 0;
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
BGE_LOCK(sc);
bge_setmulti(sc);
BGE_UNLOCK(sc);
@@ -5847,7 +5830,7 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
break;
case SIOCSIFCAP:
- mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
#ifdef DEVICE_POLLING
if (mask & IFCAP_POLLING) {
if (ifr->ifr_reqcap & IFCAP_POLLING) {
@@ -5858,7 +5841,7 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
BGE_SETBIT(sc, BGE_PCI_MISC_CTL,
BGE_PCIMISCCTL_MASK_PCI_INTR);
bge_writembx(sc, BGE_MBX_IRQ0_LO, 1);
- ifp->if_capenable |= IFCAP_POLLING;
+ if_setcapenablebit(ifp, IFCAP_POLLING, 0);
BGE_UNLOCK(sc);
} else {
error = ether_poll_deregister(ifp);
@@ -5867,53 +5850,55 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
BGE_CLRBIT(sc, BGE_PCI_MISC_CTL,
BGE_PCIMISCCTL_MASK_PCI_INTR);
bge_writembx(sc, BGE_MBX_IRQ0_LO, 0);
- ifp->if_capenable &= ~IFCAP_POLLING;
+ if_setcapenablebit(ifp, 0, IFCAP_POLLING);
BGE_UNLOCK(sc);
}
}
#endif
if ((mask & IFCAP_TXCSUM) != 0 &&
- (ifp->if_capabilities & IFCAP_TXCSUM) != 0) {
- ifp->if_capenable ^= IFCAP_TXCSUM;
- if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
- ifp->if_hwassist |= sc->bge_csum_features;
+ (if_getcapabilities(ifp) & IFCAP_TXCSUM) != 0) {
+ if_togglecapenable(ifp, IFCAP_TXCSUM);
+ if ((if_getcapenable(ifp) & IFCAP_TXCSUM) != 0)
+ if_sethwassistbits(ifp,
+ sc->bge_csum_features, 0);
else
- ifp->if_hwassist &= ~sc->bge_csum_features;
+ if_sethwassistbits(ifp, 0,
+ sc->bge_csum_features);
}
if ((mask & IFCAP_RXCSUM) != 0 &&
- (ifp->if_capabilities & IFCAP_RXCSUM) != 0)
- ifp->if_capenable ^= IFCAP_RXCSUM;
+ (if_getcapabilities(ifp) & IFCAP_RXCSUM) != 0)
+ if_togglecapenable(ifp, IFCAP_RXCSUM);
if ((mask & IFCAP_TSO4) != 0 &&
- (ifp->if_capabilities & IFCAP_TSO4) != 0) {
- ifp->if_capenable ^= IFCAP_TSO4;
- if ((ifp->if_capenable & IFCAP_TSO4) != 0)
- ifp->if_hwassist |= CSUM_TSO;
+ (if_getcapabilities(ifp) & IFCAP_TSO4) != 0) {
+ if_togglecapenable(ifp, IFCAP_TSO4);
+ if ((if_getcapenable(ifp) & IFCAP_TSO4) != 0)
+ if_sethwassistbits(ifp, CSUM_TSO, 0);
else
- ifp->if_hwassist &= ~CSUM_TSO;
+ if_sethwassistbits(ifp, 0, CSUM_TSO);
}
if (mask & IFCAP_VLAN_MTU) {
- ifp->if_capenable ^= IFCAP_VLAN_MTU;
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_togglecapenable(ifp, IFCAP_VLAN_MTU);
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
bge_init(sc);
}
if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
- (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0)
- ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
+ (if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) != 0)
+ if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
- (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) {
- ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
- if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
- ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
+ (if_getcapabilities(ifp) & IFCAP_VLAN_HWTAGGING) != 0) {
+ if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
+ if ((if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) == 0)
+ if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO);
BGE_LOCK(sc);
bge_setvlan(sc);
BGE_UNLOCK(sc);
}
#ifdef VLAN_CAPABILITIES
- VLAN_CAPABILITIES(ifp);
+ if_vlancap(ifp);
#endif
break;
default:
@@ -5927,7 +5912,7 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
static void
bge_watchdog(struct bge_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
uint32_t status;
BGE_LOCK_ASSERT(sc);
@@ -5966,10 +5951,10 @@ bge_watchdog(struct bge_softc *sc)
if_printf(ifp, "watchdog timeout -- resetting\n");
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
bge_init_locked(sc);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
static void
@@ -5993,7 +5978,7 @@ bge_stop_block(struct bge_softc *sc, bus_size_t reg, uint32_t bit)
static void
bge_stop(struct bge_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
BGE_LOCK_ASSERT(sc);
@@ -6083,7 +6068,7 @@ bge_stop(struct bge_softc *sc)
if_printf(sc->bge_ifp, "link DOWN\n");
sc->bge_link = 0;
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
}
/*
@@ -6120,14 +6105,14 @@ static int
bge_resume(device_t dev)
{
struct bge_softc *sc;
- struct ifnet *ifp;
+ if_t ifp;
sc = device_get_softc(dev);
BGE_LOCK(sc);
ifp = sc->bge_ifp;
- if (ifp->if_flags & IFF_UP) {
+ if (if_getflags(ifp) & IFF_UP) {
bge_init_locked(sc);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
bge_start_locked(ifp);
}
BGE_UNLOCK(sc);
@@ -6261,7 +6246,6 @@ bge_add_sysctls(struct bge_softc *sc)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid_list *children;
- char tn[32];
int unit;
ctx = device_get_sysctl_ctx(sc->bge_dev);
@@ -6300,18 +6284,14 @@ bge_add_sysctls(struct bge_softc *sc)
* consumes a lot of CPU cycles, so leave it off by default.
*/
sc->bge_forced_collapse = 0;
- snprintf(tn, sizeof(tn), "dev.bge.%d.forced_collapse", unit);
- TUNABLE_INT_FETCH(tn, &sc->bge_forced_collapse);
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_collapse",
- CTLFLAG_RW, &sc->bge_forced_collapse, 0,
+ CTLFLAG_RWTUN, &sc->bge_forced_collapse, 0,
"Number of fragmented TX buffers of a frame allowed before "
"forced collapsing");
sc->bge_msi = 1;
- snprintf(tn, sizeof(tn), "dev.bge.%d.msi", unit);
- TUNABLE_INT_FETCH(tn, &sc->bge_msi);
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "msi",
- CTLFLAG_RD, &sc->bge_msi, 0, "Enable MSI");
+ CTLFLAG_RDTUN, &sc->bge_msi, 0, "Enable MSI");
/*
* It seems all Broadcom controllers have a bug that can generate UDP
@@ -6324,10 +6304,8 @@ bge_add_sysctls(struct bge_softc *sc)
* dev.bge.0.forced_udpcsum.
*/
sc->bge_forced_udpcsum = 0;
- snprintf(tn, sizeof(tn), "dev.bge.%d.bge_forced_udpcsum", unit);
- TUNABLE_INT_FETCH(tn, &sc->bge_forced_udpcsum);
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "forced_udpcsum",
- CTLFLAG_RW, &sc->bge_forced_udpcsum, 0,
+ CTLFLAG_RWTUN, &sc->bge_forced_udpcsum, 0,
"Enable UDP checksum offloading even if controller can "
"generate UDP checksum value 0");
@@ -6796,3 +6774,25 @@ bge_get_eaddr(struct bge_softc *sc, uint8_t eaddr[])
}
return (*func == NULL ? ENXIO : 0);
}
+
+static uint64_t
+bge_get_counter(if_t ifp, ift_counter cnt)
+{
+ struct bge_softc *sc;
+ struct bge_mac_stats *stats;
+
+ sc = if_getsoftc(ifp);
+ if (!BGE_IS_5705_PLUS(sc))
+ return (if_get_counter_default(ifp, cnt));
+ stats = &sc->bge_mac_stats;
+
+ switch (cnt) {
+ case IFCOUNTER_IERRORS:
+ return (stats->NoMoreRxBDs + stats->InputDiscards +
+ stats->InputErrors);
+ case IFCOUNTER_COLLISIONS:
+ return (stats->etherStatsCollisions);
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
+}
diff --git a/freebsd/sys/dev/bge/if_bgereg.h b/freebsd/sys/dev/bge/if_bgereg.h
index acac8b28..0cf9ca18 100644
--- a/freebsd/sys/dev/bge/if_bgereg.h
+++ b/freebsd/sys/dev/bge/if_bgereg.h
@@ -329,6 +329,7 @@
#define BGE_CHIPID_BCM57780_A1 0x57780001
#define BGE_CHIPID_BCM5717_A0 0x05717000
#define BGE_CHIPID_BCM5717_B0 0x05717100
+#define BGE_CHIPID_BCM5717_C0 0x05717200
#define BGE_CHIPID_BCM5719_A0 0x05719000
#define BGE_CHIPID_BCM5720_A0 0x05720000
#define BGE_CHIPID_BCM5762_A0 0x05762000
@@ -791,11 +792,11 @@
#define BGE_LEDCTL_10MBPS_LED 0x00000008
#define BGE_LEDCTL_TRAFLED_OVERRIDE 0x00000010
#define BGE_LEDCTL_TRAFLED_BLINK 0x00000020
-#define BGE_LEDCTL_TREFLED_BLINK_2 0x00000040
+#define BGE_LEDCTL_TRAFLED_BLINK_2 0x00000040
#define BGE_LEDCTL_1000MBPS_STS 0x00000080
#define BGE_LEDCTL_100MBPS_STS 0x00000100
#define BGE_LEDCTL_10MBPS_STS 0x00000200
-#define BGE_LEDCTL_TRADLED_STS 0x00000400
+#define BGE_LEDCTL_TRAFLED_STS 0x00000400
#define BGE_LEDCTL_BLINKPERIOD 0x7FF80000
#define BGE_LEDCTL_BLINKPERIOD_OVERRIDE 0x80000000
@@ -841,6 +842,7 @@
#define BGE_RXMODE_RX_NO_CRC_CHECK 0x00000200
#define BGE_RXMODE_RX_KEEP_VLAN_DIAG 0x00000400
#define BGE_RXMODE_IPV6_ENABLE 0x01000000
+#define BGE_RXMODE_IPV4_FRAG_FIX 0x02000000
/* Receive MAC status register */
#define BGE_RXSTAT_REMOTE_XOFFED 0x00000001
@@ -2451,6 +2453,7 @@ struct bge_status_block {
#define BCOM_DEVICEID_BCM5715 0x1678
#define BCOM_DEVICEID_BCM5715S 0x1679
#define BCOM_DEVICEID_BCM5717 0x1655
+#define BCOM_DEVICEID_BCM5717C 0x1665
#define BCOM_DEVICEID_BCM5718 0x1656
#define BCOM_DEVICEID_BCM5719 0x1657
#define BCOM_DEVICEID_BCM5720_PP 0x1658 /* Not released to public. */
@@ -2861,16 +2864,6 @@ struct bge_gib {
#define BGE_DMA_MAXADDR 0xFFFFFFFFFF
#endif
-#ifdef PAE
-#define BGE_DMA_BNDRY 0x80000000
-#else
-#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
-#define BGE_DMA_BNDRY 0x100000000
-#else
-#define BGE_DMA_BNDRY 0
-#endif
-#endif
-
/*
* Ring structures. Most of these reside in host memory and we tell
* the NIC where they are via the ring control blocks. The exceptions
diff --git a/freebsd/sys/dev/cadence/if_cgem.c b/freebsd/sys/dev/cadence/if_cgem.c
index 950208e6..3098ee5d 100644
--- a/freebsd/sys/dev/cadence/if_cgem.c
+++ b/freebsd/sys/dev/cadence/if_cgem.c
@@ -54,7 +54,6 @@ __FBSDID("$FreeBSD$");
#include <net/ethernet.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -85,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/miibus_if.h>
#ifdef __rtems__
#pragma GCC diagnostic ignored "-Wpointer-sign"
+#pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
#include <rtems/bsd/bsd.h>
#endif /* __rtems__ */
@@ -106,7 +106,7 @@ __FBSDID("$FreeBSD$");
CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
struct cgem_softc {
- struct ifnet *ifp;
+ if_t ifp;
struct mtx sc_mtx;
device_t dev;
device_t miibus;
@@ -124,7 +124,7 @@ struct cgem_softc {
bus_dma_tag_t mbuf_dma_tag;
/* receive descriptor ring */
- struct cgem_rx_desc volatile *rxring;
+ struct cgem_rx_desc *rxring;
bus_addr_t rxring_physaddr;
struct mbuf *rxring_m[CGEM_NUM_RX_DESCS];
#ifndef __rtems__
@@ -142,7 +142,7 @@ struct cgem_softc {
uint32_t rx_frames_prev;
/* transmit descriptor ring */
- struct cgem_tx_desc volatile *txring;
+ struct cgem_tx_desc *txring;
bus_addr_t txring_physaddr;
struct mbuf *txring_m[CGEM_NUM_TX_DESCS];
#ifndef __rtems__
@@ -266,7 +266,7 @@ cgem_get_mac(struct cgem_softc *sc, u_char eaddr[])
eaddr[5] = rnd & 0xff;
#else /* __rtems__ */
rtems_bsd_get_mac_address(device_get_name(sc->dev),
- device_get_unit(sc->dev), eaddr);
+- device_get_unit(sc->dev), eaddr);
#endif /* __rtems__ */
device_printf(sc->dev, "no mac address found, assigning "
@@ -317,9 +317,10 @@ cgem_mac_hash(u_char eaddr[])
static void
cgem_rx_filter(struct cgem_softc *sc)
{
- struct ifnet *ifp = sc->ifp;
- struct ifmultiaddr *ifma;
- int index;
+ if_t ifp = sc->ifp;
+ u_char *mta;
+
+ int index, i, mcnt;
uint32_t hash_hi, hash_lo;
uint32_t net_cfg;
@@ -332,28 +333,34 @@ cgem_rx_filter(struct cgem_softc *sc)
CGEM_NET_CFG_NO_BCAST |
CGEM_NET_CFG_COPY_ALL);
- if ((ifp->if_flags & IFF_PROMISC) != 0)
+ if ((if_getflags(ifp) & IFF_PROMISC) != 0)
net_cfg |= CGEM_NET_CFG_COPY_ALL;
else {
- if ((ifp->if_flags & IFF_BROADCAST) == 0)
+ if ((if_getflags(ifp) & IFF_BROADCAST) == 0)
net_cfg |= CGEM_NET_CFG_NO_BCAST;
- if ((ifp->if_flags & IFF_ALLMULTI) != 0) {
+ if ((if_getflags(ifp) & IFF_ALLMULTI) != 0) {
hash_hi = 0xffffffff;
hash_lo = 0xffffffff;
} else {
- if_maddr_rlock(ifp);
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
+ mcnt = if_multiaddr_count(ifp, -1);
+ mta = malloc(ETHER_ADDR_LEN * mcnt, M_DEVBUF,
+ M_NOWAIT);
+ if (mta == NULL) {
+ device_printf(sc->dev,
+ "failed to allocate temp mcast list\n");
+ return;
+ }
+ if_multiaddr_array(ifp, mta, &mcnt, mcnt);
+ for (i = 0; i < mcnt; i++) {
index = cgem_mac_hash(
LLADDR((struct sockaddr_dl *)
- ifma->ifma_addr));
+ (mta + (i * ETHER_ADDR_LEN))));
if (index > 31)
- hash_hi |= (1<<(index-32));
+ hash_hi |= (1 << (index - 32));
else
- hash_lo |= (1<<index);
+ hash_lo |= (1 << index);
}
- if_maddr_runlock(ifp);
+ free(mta, M_DEVBUF);
}
if (hash_hi != 0 || hash_lo != 0)
@@ -438,10 +445,7 @@ cgem_setup_descs(struct cgem_softc *sc)
sc->rxring[i].ctl = 0;
sc->rxring_m[i] = NULL;
#ifndef __rtems__
- err = bus_dmamap_create(sc->mbuf_dma_tag, 0,
- &sc->rxring_m_dmamap[i]);
- if (err)
- return (err);
+ sc->rxring_m_dmamap[i] = NULL;
#endif /* __rtems__ */
}
sc->rxring[CGEM_NUM_RX_DESCS - 1].addr |= CGEM_RXDESC_WRAP;
@@ -473,10 +477,7 @@ cgem_setup_descs(struct cgem_softc *sc)
sc->txring[i].ctl = CGEM_TXDESC_USED;
sc->txring_m[i] = NULL;
#ifndef __rtems__
- err = bus_dmamap_create(sc->mbuf_dma_tag, 0,
- &sc->txring_m_dmamap[i]);
- if (err)
- return (err);
+ sc->txring_m_dmamap[i] = NULL;
#endif /* __rtems__ */
}
sc->txring[CGEM_NUM_TX_DESCS - 1].ctl |= CGEM_TXDESC_WRAP;
@@ -512,20 +513,29 @@ cgem_fill_rqueue(struct cgem_softc *sc)
m->m_pkthdr.len = MCLBYTES;
m->m_pkthdr.rcvif = sc->ifp;
-#ifndef __rtems__
/* Load map and plug in physical address. */
+#ifndef __rtems__
+ if (bus_dmamap_create(sc->mbuf_dma_tag, 0,
+ &sc->rxring_m_dmamap[sc->rxring_hd_ptr])) {
+ sc->rxdmamapfails++;
+ m_free(m);
+ break;
+ }
if (bus_dmamap_load_mbuf_sg(sc->mbuf_dma_tag,
sc->rxring_m_dmamap[sc->rxring_hd_ptr], m,
segs, &nsegs, BUS_DMA_NOWAIT)) {
sc->rxdmamapfails++;
+ bus_dmamap_destroy(sc->mbuf_dma_tag,
+ sc->rxring_m_dmamap[sc->rxring_hd_ptr]);
+ sc->rxring_m_dmamap[sc->rxring_hd_ptr] = NULL;
m_free(m);
break;
}
#endif /* __rtems__ */
sc->rxring_m[sc->rxring_hd_ptr] = m;
-#ifndef __rtems__
/* Sync cache with receive buffer. */
+#ifndef __rtems__
bus_dmamap_sync(sc->mbuf_dma_tag,
sc->rxring_m_dmamap[sc->rxring_hd_ptr],
BUS_DMASYNC_PREREAD);
@@ -551,7 +561,7 @@ cgem_fill_rqueue(struct cgem_softc *sc)
static void
cgem_recv(struct cgem_softc *sc)
{
- struct ifnet *ifp = sc->ifp;
+ if_t ifp = sc->ifp;
struct mbuf *m, *m_hd, **m_tl;
uint32_t ctl;
@@ -569,17 +579,22 @@ cgem_recv(struct cgem_softc *sc)
m = sc->rxring_m[sc->rxring_tl_ptr];
sc->rxring_m[sc->rxring_tl_ptr] = NULL;
-#ifndef __rtems__
/* Sync cache with receive buffer. */
+#ifndef __rtems__
bus_dmamap_sync(sc->mbuf_dma_tag,
sc->rxring_m_dmamap[sc->rxring_tl_ptr],
BUS_DMASYNC_POSTREAD);
+#else /* __rtems__ */
+ rtems_cache_invalidate_multiple_data_lines(m->m_data, m->m_len);
+#endif /* __rtems__ */
- /* Unload dmamap. */
+#ifndef __rtems__
+ /* Unload and destroy dmamap. */
bus_dmamap_unload(sc->mbuf_dma_tag,
sc->rxring_m_dmamap[sc->rxring_tl_ptr]);
-#else /* __rtems__ */
- rtems_cache_invalidate_multiple_data_lines(m->m_data, m->m_len);
+ bus_dmamap_destroy(sc->mbuf_dma_tag,
+ sc->rxring_m_dmamap[sc->rxring_tl_ptr]);
+ sc->rxring_m_dmamap[sc->rxring_tl_ptr] = NULL;
#endif /* __rtems__ */
/* Increment tail pointer. */
@@ -596,11 +611,7 @@ cgem_recv(struct cgem_softc *sc)
(CGEM_RXDESC_SOF | CGEM_RXDESC_EOF)) {
/* discard. */
m_free(m);
-#ifndef __rtems__
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
-#else /* __rtems__ */
- ifp->if_ierrors++;
-#endif /* __rtems__ */
continue;
}
@@ -613,7 +624,7 @@ cgem_recv(struct cgem_softc *sc)
/* Are we using hardware checksumming? Check the
* status in the receive descriptor.
*/
- if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
+ if ((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) {
/* TCP or UDP checks out, IP checks out too. */
if ((ctl & CGEM_RXDESC_CKSUM_STAT_MASK) ==
CGEM_RXDESC_CKSUM_STAT_TCP_GOOD ||
@@ -646,12 +657,8 @@ cgem_recv(struct cgem_softc *sc)
m = m_hd;
m_hd = m_hd->m_next;
m->m_next = NULL;
-#ifndef __rtems__
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
-#else /* __rtems__ */
- ifp->if_ipackets++;
-#endif /* __rtems__ */
- (*ifp->if_input)(ifp, m);
+ if_input(ifp, m);
}
CGEM_LOCK(sc);
}
@@ -670,15 +677,18 @@ cgem_clean_tx(struct cgem_softc *sc)
((ctl = sc->txring[sc->txring_tl_ptr].ctl) &
CGEM_TXDESC_USED) != 0) {
+ /* Sync cache. */
#ifndef __rtems__
- /* Sync cache. nop? */
bus_dmamap_sync(sc->mbuf_dma_tag,
sc->txring_m_dmamap[sc->txring_tl_ptr],
BUS_DMASYNC_POSTWRITE);
- /* Unload DMA map. */
+ /* Unload and destroy DMA map. */
bus_dmamap_unload(sc->mbuf_dma_tag,
sc->txring_m_dmamap[sc->txring_tl_ptr]);
+ bus_dmamap_destroy(sc->mbuf_dma_tag,
+ sc->txring_m_dmamap[sc->txring_tl_ptr]);
+ sc->txring_m_dmamap[sc->txring_tl_ptr] = NULL;
#endif /* __rtems__ */
/* Free up the mbuf. */
@@ -694,17 +704,9 @@ cgem_clean_tx(struct cgem_softc *sc)
sc->txring[sc->txring_tl_ptr].addr);
} else if ((ctl & (CGEM_TXDESC_RETRY_ERR |
CGEM_TXDESC_LATE_COLL)) != 0) {
-#ifndef __rtems__
if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1);
-#else /* __rtems__ */
- sc->ifp->if_oerrors++;
-#endif /* __rtems__ */
} else
-#ifndef __rtems__
if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1);
-#else /* __rtems__ */
- sc->ifp->if_opackets++;
-#endif /* __rtems__ */
/* If the packet spanned more than one tx descriptor,
* skip descriptors until we find the end so that only
@@ -730,7 +732,7 @@ cgem_clean_tx(struct cgem_softc *sc)
sc->txring_tl_ptr++;
sc->txring_queued--;
- sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(sc->ifp, 0, IFF_DRV_OACTIVE);
}
}
@@ -763,9 +765,9 @@ cgem_get_segs_for_tx(struct mbuf *m, bus_dma_segment_t segs[TX_MAX_DMA_SEGS],
#endif /* __rtems__ */
/* Start transmits. */
static void
-cgem_start_locked(struct ifnet *ifp)
+cgem_start_locked(if_t ifp)
{
- struct cgem_softc *sc = (struct cgem_softc *) ifp->if_softc;
+ struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
struct mbuf *m;
bus_dma_segment_t segs[TX_MAX_DMA_SEGS];
uint32_t ctl;
@@ -773,7 +775,7 @@ cgem_start_locked(struct ifnet *ifp)
CGEM_ASSERT_LOCKED(sc);
- if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_OACTIVE) != 0)
return;
for (;;) {
@@ -787,19 +789,25 @@ cgem_start_locked(struct ifnet *ifp)
/* Still no room? */
if (sc->txring_queued >=
CGEM_NUM_TX_DESCS - TX_MAX_DMA_SEGS * 2) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
sc->txfull++;
break;
}
}
/* Grab next transmit packet. */
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ m = if_dequeue(ifp);
if (m == NULL)
break;
#ifndef __rtems__
- /* Load DMA map. */
+ /* Create and load DMA map. */
+ if (bus_dmamap_create(sc->mbuf_dma_tag, 0,
+ &sc->txring_m_dmamap[sc->txring_hd_ptr])) {
+ m_freem(m);
+ sc->txdmamapfails++;
+ continue;
+ }
err = bus_dmamap_load_mbuf_sg(sc->mbuf_dma_tag,
sc->txring_m_dmamap[sc->txring_hd_ptr],
m, segs, &nsegs, BUS_DMA_NOWAIT);
@@ -813,6 +821,11 @@ cgem_start_locked(struct ifnet *ifp)
if (m2 == NULL) {
sc->txdefragfails++;
m_freem(m);
+#ifndef __rtems__
+ bus_dmamap_destroy(sc->mbuf_dma_tag,
+ sc->txring_m_dmamap[sc->txring_hd_ptr]);
+ sc->txring_m_dmamap[sc->txring_hd_ptr] = NULL;
+#endif /* __rtems__ */
continue;
}
m = m2;
@@ -828,6 +841,11 @@ cgem_start_locked(struct ifnet *ifp)
if (err) {
/* Give up. */
m_freem(m);
+#ifndef __rtems__
+ bus_dmamap_destroy(sc->mbuf_dma_tag,
+ sc->txring_m_dmamap[sc->txring_hd_ptr]);
+ sc->txring_m_dmamap[sc->txring_hd_ptr] = NULL;
+#endif /* __rtems__ */
sc->txdmamapfails++;
continue;
}
@@ -881,9 +899,9 @@ cgem_start_locked(struct ifnet *ifp)
}
static void
-cgem_start(struct ifnet *ifp)
+cgem_start(if_t ifp)
{
- struct cgem_softc *sc = (struct cgem_softc *) ifp->if_softc;
+ struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
CGEM_LOCK(sc);
cgem_start_locked(ifp);
@@ -914,32 +932,16 @@ cgem_poll_hw_stats(struct cgem_softc *sc)
n = RD4(sc, CGEM_SINGLE_COLL_FRAMES);
sc->stats.tx_single_collisn += n;
-#ifndef __rtems__
if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
-#else /* __rtems__ */
- sc->ifp->if_collisions += n;
-#endif /* __rtems__ */
n = RD4(sc, CGEM_MULTI_COLL_FRAMES);
sc->stats.tx_multi_collisn += n;
-#ifndef __rtems__
if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
-#else /* __rtems__ */
- sc->ifp->if_collisions += n;
-#endif /* __rtems__ */
n = RD4(sc, CGEM_EXCESSIVE_COLL_FRAMES);
sc->stats.tx_excsv_collisn += n;
-#ifndef __rtems__
if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
-#else /* __rtems__ */
- sc->ifp->if_collisions += n;
-#endif /* __rtems__ */
n = RD4(sc, CGEM_LATE_COLL);
sc->stats.tx_late_collisn += n;
-#ifndef __rtems__
if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, n);
-#else /* __rtems__ */
- sc->ifp->if_collisions += n;
-#endif /* __rtems__ */
sc->stats.tx_deferred_frames += RD4(sc, CGEM_DEFERRED_TX_FRAMES);
sc->stats.tx_carrier_sense_errs += RD4(sc, CGEM_CARRIER_SENSE_ERRS);
@@ -1011,11 +1013,12 @@ static void
cgem_intr(void *arg)
{
struct cgem_softc *sc = (struct cgem_softc *)arg;
+ if_t ifp = sc->ifp;
uint32_t istatus;
CGEM_LOCK(sc);
- if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
CGEM_UNLOCK(sc);
return;
}
@@ -1054,8 +1057,8 @@ cgem_intr(void *arg)
}
/* Restart transmitter if needed. */
- if (!IFQ_DRV_IS_EMPTY(&sc->ifp->if_snd))
- cgem_start_locked(sc->ifp);
+ if (!if_sendq_empty(ifp))
+ cgem_start_locked(ifp);
CGEM_UNLOCK(sc);
}
@@ -1091,9 +1094,10 @@ cgem_reset(struct cgem_softc *sc)
static void
cgem_config(struct cgem_softc *sc)
{
+ if_t ifp = sc->ifp;
uint32_t net_cfg;
uint32_t dma_cfg;
- u_char *eaddr = IF_LLADDR(sc->ifp);
+ u_char *eaddr = if_getlladdr(ifp);
CGEM_ASSERT_LOCKED(sc);
@@ -1108,7 +1112,7 @@ cgem_config(struct cgem_softc *sc)
CGEM_NET_CFG_SPEED100;
/* Enable receive checksum offloading? */
- if ((sc->ifp->if_capenable & IFCAP_RXCSUM) != 0)
+ if ((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0)
net_cfg |= CGEM_NET_CFG_RX_CHKSUM_OFFLD_EN;
WR4(sc, CGEM_NET_CFG, net_cfg);
@@ -1121,7 +1125,7 @@ cgem_config(struct cgem_softc *sc)
CGEM_DMA_CFG_DISC_WHEN_NO_AHB;
/* Enable transmit checksum offloading? */
- if ((sc->ifp->if_capenable & IFCAP_TXCSUM) != 0)
+ if ((if_getcapenable(ifp) & IFCAP_TXCSUM) != 0)
dma_cfg |= CGEM_DMA_CFG_CHKSUM_GEN_OFFLOAD_EN;
WR4(sc, CGEM_DMA_CFG, dma_cfg);
@@ -1154,14 +1158,13 @@ cgem_init_locked(struct cgem_softc *sc)
CGEM_ASSERT_LOCKED(sc);
- if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
+ if ((if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) != 0)
return;
cgem_config(sc);
cgem_fill_rqueue(sc);
- sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
- sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
mii = device_get_softc(sc->miibus);
mii_mediachg(mii);
@@ -1198,8 +1201,12 @@ cgem_stop(struct cgem_softc *sc)
sc->txring[i].addr = 0;
if (sc->txring_m[i]) {
#ifndef __rtems__
+ /* Unload and destroy dmamap. */
bus_dmamap_unload(sc->mbuf_dma_tag,
sc->txring_m_dmamap[i]);
+ bus_dmamap_destroy(sc->mbuf_dma_tag,
+ sc->txring_m_dmamap[i]);
+ sc->txring_m_dmamap[i] = NULL;
#endif /* __rtems__ */
m_freem(sc->txring_m[i]);
sc->txring_m[i] = NULL;
@@ -1217,9 +1224,12 @@ cgem_stop(struct cgem_softc *sc)
sc->rxring[i].ctl = 0;
if (sc->rxring_m[i]) {
#ifndef __rtems__
- /* Unload dmamap. */
+ /* Unload and destroy dmamap. */
bus_dmamap_unload(sc->mbuf_dma_tag,
- sc->rxring_m_dmamap[sc->rxring_tl_ptr]);
+ sc->rxring_m_dmamap[i]);
+ bus_dmamap_destroy(sc->mbuf_dma_tag,
+ sc->rxring_m_dmamap[i]);
+ sc->rxring_m_dmamap[i] = NULL;
#endif /* __rtems__ */
m_freem(sc->rxring_m[i]);
@@ -1238,9 +1248,9 @@ cgem_stop(struct cgem_softc *sc)
static int
-cgem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+cgem_ioctl(if_t ifp, u_long cmd, caddr_t data)
{
- struct cgem_softc *sc = ifp->if_softc;
+ struct cgem_softc *sc = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
struct mii_data *mii;
int error = 0, mask;
@@ -1248,27 +1258,27 @@ cgem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
switch (cmd) {
case SIOCSIFFLAGS:
CGEM_LOCK(sc);
- if ((ifp->if_flags & IFF_UP) != 0) {
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
- if (((ifp->if_flags ^ sc->if_old_flags) &
+ if ((if_getflags(ifp) & IFF_UP) != 0) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
+ if (((if_getflags(ifp) ^ sc->if_old_flags) &
(IFF_PROMISC | IFF_ALLMULTI)) != 0) {
cgem_rx_filter(sc);
}
} else {
cgem_init_locked(sc);
}
- } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ } else if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
cgem_stop(sc);
}
- sc->if_old_flags = ifp->if_flags;
+ sc->if_old_flags = if_getflags(ifp);
CGEM_UNLOCK(sc);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
/* Set up multi-cast filters. */
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
CGEM_LOCK(sc);
cgem_rx_filter(sc);
CGEM_UNLOCK(sc);
@@ -1283,23 +1293,23 @@ cgem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSIFCAP:
CGEM_LOCK(sc);
- mask = ifp->if_capenable ^ ifr->ifr_reqcap;
+ mask = if_getcapenable(ifp) ^ ifr->ifr_reqcap;
if ((mask & IFCAP_TXCSUM) != 0) {
if ((ifr->ifr_reqcap & IFCAP_TXCSUM) != 0) {
/* Turn on TX checksumming. */
- ifp->if_capenable |= (IFCAP_TXCSUM |
- IFCAP_TXCSUM_IPV6);
- ifp->if_hwassist |= CGEM_CKSUM_ASSIST;
+ if_setcapenablebit(ifp, IFCAP_TXCSUM |
+ IFCAP_TXCSUM_IPV6, 0);
+ if_sethwassistbits(ifp, CGEM_CKSUM_ASSIST, 0);
WR4(sc, CGEM_DMA_CFG,
RD4(sc, CGEM_DMA_CFG) |
CGEM_DMA_CFG_CHKSUM_GEN_OFFLOAD_EN);
} else {
/* Turn off TX checksumming. */
- ifp->if_capenable &= ~(IFCAP_TXCSUM |
- IFCAP_TXCSUM_IPV6);
- ifp->if_hwassist &= ~CGEM_CKSUM_ASSIST;
+ if_setcapenablebit(ifp, 0, IFCAP_TXCSUM |
+ IFCAP_TXCSUM_IPV6);
+ if_sethwassistbits(ifp, 0, CGEM_CKSUM_ASSIST);
WR4(sc, CGEM_DMA_CFG,
RD4(sc, CGEM_DMA_CFG) &
@@ -1309,25 +1319,25 @@ cgem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if ((mask & IFCAP_RXCSUM) != 0) {
if ((ifr->ifr_reqcap & IFCAP_RXCSUM) != 0) {
/* Turn on RX checksumming. */
- ifp->if_capenable |= (IFCAP_RXCSUM |
- IFCAP_RXCSUM_IPV6);
+ if_setcapenablebit(ifp, IFCAP_RXCSUM |
+ IFCAP_RXCSUM_IPV6, 0);
WR4(sc, CGEM_NET_CFG,
RD4(sc, CGEM_NET_CFG) |
CGEM_NET_CFG_RX_CHKSUM_OFFLD_EN);
} else {
/* Turn off RX checksumming. */
- ifp->if_capenable &= ~(IFCAP_RXCSUM |
- IFCAP_RXCSUM_IPV6);
+ if_setcapenablebit(ifp, 0, IFCAP_RXCSUM |
+ IFCAP_RXCSUM_IPV6);
WR4(sc, CGEM_NET_CFG,
RD4(sc, CGEM_NET_CFG) &
~CGEM_NET_CFG_RX_CHKSUM_OFFLD_EN);
}
}
- if ((ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_TXCSUM)) ==
+ if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_TXCSUM)) ==
(IFCAP_RXCSUM | IFCAP_TXCSUM))
- ifp->if_capenable |= IFCAP_VLAN_HWCSUM;
+ if_setcapenablebit(ifp, IFCAP_VLAN_HWCSUM, 0);
else
- ifp->if_capenable &= ~IFCAP_VLAN_HWCSUM;
+ if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWCSUM);
CGEM_UNLOCK(sc);
break;
@@ -1351,16 +1361,16 @@ cgem_child_detached(device_t dev, device_t child)
}
static int
-cgem_ifmedia_upd(struct ifnet *ifp)
+cgem_ifmedia_upd(if_t ifp)
{
- struct cgem_softc *sc = (struct cgem_softc *) ifp->if_softc;
+ struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
struct mii_data *mii;
struct mii_softc *miisc;
int error = 0;
mii = device_get_softc(sc->miibus);
CGEM_LOCK(sc);
- if ((ifp->if_flags & IFF_UP) != 0) {
+ if ((if_getflags(ifp) & IFF_UP) != 0) {
LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
PHY_RESET(miisc);
error = mii_mediachg(mii);
@@ -1371,9 +1381,9 @@ cgem_ifmedia_upd(struct ifnet *ifp)
}
static void
-cgem_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
+cgem_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
{
- struct cgem_softc *sc = (struct cgem_softc *) ifp->if_softc;
+ struct cgem_softc *sc = (struct cgem_softc *) if_getsoftc(ifp);
struct mii_data *mii;
mii = device_get_softc(sc->miibus);
@@ -1709,6 +1719,9 @@ cgem_probe(device_t dev)
{
#ifndef __rtems__
+ if (!ofw_bus_status_okay(dev))
+ return (ENXIO);
+
if (!ofw_bus_is_compatible(dev, "cadence,gem"))
return (ENXIO);
#endif /* __rtems__ */
@@ -1721,7 +1734,7 @@ static int
cgem_attach(device_t dev)
{
struct cgem_softc *sc = device_get_softc(dev);
- struct ifnet *ifp = NULL;
+ if_t ifp = NULL;
#ifndef __rtems__
phandle_t node;
pcell_t cell;
@@ -1768,23 +1781,23 @@ cgem_attach(device_t dev)
cgem_detach(dev);
return (ENOMEM);
}
- ifp->if_softc = sc;
+ if_setsoftc(ifp, sc);
if_initname(ifp, IF_CGEM_NAME, device_get_unit(dev));
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_start = cgem_start;
- ifp->if_ioctl = cgem_ioctl;
- ifp->if_init = cgem_init;
- ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
- IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM;
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setinitfn(ifp, cgem_init);
+ if_setioctlfn(ifp, cgem_ioctl);
+ if_setstartfn(ifp, cgem_start);
+ if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
+ IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM, 0);
+ if_setsendqlen(ifp, CGEM_NUM_TX_DESCS);
+ if_setsendqready(ifp);
+
/* Disable hardware checksumming by default. */
- ifp->if_hwassist = 0;
- ifp->if_capenable = ifp->if_capabilities &
- ~(IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_VLAN_HWCSUM);
- ifp->if_snd.ifq_drv_maxlen = CGEM_NUM_TX_DESCS;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
- IFQ_SET_READY(&ifp->if_snd);
-
- sc->if_old_flags = ifp->if_flags;
+ if_sethwassist(ifp, 0);
+ if_setcapenable(ifp, if_getcapabilities(ifp) &
+ ~(IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_VLAN_HWCSUM));
+
+ sc->if_old_flags = if_getflags(ifp);
sc->rxbufs = DEFAULT_NUM_RX_BUFS;
sc->rxhangwar = 1;
@@ -1849,7 +1862,7 @@ cgem_detach(device_t dev)
cgem_stop(sc);
CGEM_UNLOCK(sc);
callout_drain(&sc->tick_ch);
- sc->ifp->if_flags &= ~IFF_UP;
+ if_setflagbits(sc->ifp, 0, IFF_UP);
ether_ifdetach(sc->ifp);
}
@@ -1875,10 +1888,11 @@ cgem_detach(device_t dev)
/* Release DMA resources. */
if (sc->rxring != NULL) {
if (sc->rxring_physaddr != 0) {
- bus_dmamap_unload(sc->desc_dma_tag, sc->rxring_dma_map);
+ bus_dmamap_unload(sc->desc_dma_tag,
+ sc->rxring_dma_map);
sc->rxring_physaddr = 0;
}
- bus_dmamem_free(sc->desc_dma_tag, __DEVOLATILE(void *, sc->rxring),
+ bus_dmamem_free(sc->desc_dma_tag, sc->rxring,
sc->rxring_dma_map);
sc->rxring = NULL;
#ifndef __rtems__
@@ -1892,10 +1906,11 @@ cgem_detach(device_t dev)
}
if (sc->txring != NULL) {
if (sc->txring_physaddr != 0) {
- bus_dmamap_unload(sc->desc_dma_tag, sc->txring_dma_map);
+ bus_dmamap_unload(sc->desc_dma_tag,
+ sc->txring_dma_map);
sc->txring_physaddr = 0;
}
- bus_dmamem_free(sc->desc_dma_tag, __DEVOLATILE(void *, sc->txring),
+ bus_dmamem_free(sc->desc_dma_tag, sc->txring,
sc->txring_dma_map);
sc->txring = NULL;
#ifndef __rtems__
diff --git a/freebsd/sys/dev/dc/dcphy.c b/freebsd/sys/dev/dc/dcphy.c
index 32d11c61..76170aa0 100644
--- a/freebsd/sys/dev/dc/dcphy.c
+++ b/freebsd/sys/dev/dc/dcphy.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_media.h>
@@ -154,7 +155,7 @@ dcphy_attach(device_t dev)
&dcphy_funcs, 0);
/*PHY_RESET(sc);*/
- dc_sc = sc->mii_pdata->mii_ifp->if_softc;
+ dc_sc = if_getsoftc(sc->mii_pdata->mii_ifp);
CSR_WRITE_4(dc_sc, DC_10BTSTAT, 0);
CSR_WRITE_4(dc_sc, DC_10BTCTRL, 0);
@@ -192,7 +193,7 @@ dcphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
int reg;
u_int32_t mode;
- dc_sc = mii->mii_ifp->if_softc;
+ dc_sc = if_getsoftc(mii->mii_ifp);
switch (cmd) {
case MII_POLLSTAT:
@@ -202,7 +203,7 @@ dcphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
/*
* If the interface is not up, don't do anything.
*/
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
+ if ((if_getflags(mii->mii_ifp) & IFF_UP) == 0)
break;
mii->mii_media_active = IFM_NONE;
@@ -252,7 +253,7 @@ dcphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
/*
* Is the interface even up?
*/
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
+ if ((if_getflags(mii->mii_ifp) & IFF_UP) == 0)
return (0);
/*
@@ -299,12 +300,12 @@ dcphy_status(struct mii_softc *sc)
int anlpar, tstat;
struct dc_softc *dc_sc;
- dc_sc = mii->mii_ifp->if_softc;
+ dc_sc = if_getsoftc(mii->mii_ifp);
mii->mii_media_status = IFM_AVALID;
mii->mii_media_active = IFM_ETHER;
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
+ if ((if_getflags(mii->mii_ifp) & IFF_UP) == 0)
return;
tstat = CSR_READ_4(dc_sc, DC_10BTSTAT);
@@ -379,7 +380,7 @@ dcphy_auto(struct mii_softc *mii)
{
struct dc_softc *sc;
- sc = mii->mii_pdata->mii_ifp->if_softc;
+ sc = if_getsoftc(mii->mii_pdata->mii_ifp);
DC_CLRBIT(sc, DC_NETCFG, DC_NETCFG_PORTSEL);
DC_SETBIT(sc, DC_NETCFG, DC_NETCFG_FULLDUPLEX);
@@ -400,7 +401,7 @@ dcphy_reset(struct mii_softc *mii)
{
struct dc_softc *sc;
- sc = mii->mii_pdata->mii_ifp->if_softc;
+ sc = if_getsoftc(mii->mii_pdata->mii_ifp);
DC_CLRBIT(sc, DC_SIARESET, DC_SIA_RESET);
DELAY(1000);
diff --git a/freebsd/sys/dev/dc/if_dc.c b/freebsd/sys/dev/dc/if_dc.c
index 0c7a46cb..5b0aaffd 100644
--- a/freebsd/sys/dev/dc/if_dc.c
+++ b/freebsd/sys/dev/dc/if_dc.c
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_dl.h>
@@ -672,20 +673,16 @@ dc_miibus_readreg(device_t dev, int phy, int reg)
* code think there's a PHY here.
*/
return (BMSR_MEDIAMASK);
- break;
case MII_PHYIDR1:
if (DC_IS_PNIC(sc))
return (DC_VENDORID_LO);
return (DC_VENDORID_DEC);
- break;
case MII_PHYIDR2:
if (DC_IS_PNIC(sc))
return (DC_DEVICEID_82C168);
return (DC_DEVICEID_21143);
- break;
default:
return (0);
- break;
}
} else
return (0);
@@ -749,7 +746,6 @@ dc_miibus_readreg(device_t dev, int phy, int reg)
device_printf(dev, "phy_read: bad phy register %x\n",
reg);
return (0);
- break;
}
rval = CSR_READ_4(sc, phy_reg) & 0x0000FFFF;
@@ -1983,9 +1979,9 @@ dc_dma_free(struct dc_softc *sc)
/* RX descriptor list. */
if (sc->dc_rx_ltag) {
- if (sc->dc_rx_lmap != NULL)
+ if (sc->dc_ldata.dc_rx_list_paddr != 0)
bus_dmamap_unload(sc->dc_rx_ltag, sc->dc_rx_lmap);
- if (sc->dc_rx_lmap != NULL && sc->dc_ldata.dc_rx_list != NULL)
+ if (sc->dc_ldata.dc_rx_list != NULL)
bus_dmamem_free(sc->dc_rx_ltag, sc->dc_ldata.dc_rx_list,
sc->dc_rx_lmap);
bus_dma_tag_destroy(sc->dc_rx_ltag);
@@ -1993,9 +1989,9 @@ dc_dma_free(struct dc_softc *sc)
/* TX descriptor list. */
if (sc->dc_tx_ltag) {
- if (sc->dc_tx_lmap != NULL)
+ if (sc->dc_ldata.dc_tx_list_paddr != 0)
bus_dmamap_unload(sc->dc_tx_ltag, sc->dc_tx_lmap);
- if (sc->dc_tx_lmap != NULL && sc->dc_ldata.dc_tx_list != NULL)
+ if (sc->dc_ldata.dc_tx_list != NULL)
bus_dmamem_free(sc->dc_tx_ltag, sc->dc_ldata.dc_tx_list,
sc->dc_tx_lmap);
bus_dma_tag_destroy(sc->dc_tx_ltag);
@@ -2003,9 +1999,9 @@ dc_dma_free(struct dc_softc *sc)
/* multicast setup frame. */
if (sc->dc_stag) {
- if (sc->dc_smap != NULL)
+ if (sc->dc_saddr != 0)
bus_dmamap_unload(sc->dc_stag, sc->dc_smap);
- if (sc->dc_smap != NULL && sc->dc_cdata.dc_sbuf != NULL)
+ if (sc->dc_cdata.dc_sbuf != NULL)
bus_dmamem_free(sc->dc_stag, sc->dc_cdata.dc_sbuf,
sc->dc_smap);
bus_dma_tag_destroy(sc->dc_stag);
@@ -2485,7 +2481,7 @@ dc_attach(device_t dev)
/*
* Tell the upper layer(s) we support long frames.
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+ ifp->if_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |= IFCAP_VLAN_MTU;
ifp->if_capenable = ifp->if_capabilities;
#ifdef DEVICE_POLLING
@@ -2916,9 +2912,9 @@ dc_rxeof(struct dc_softc *sc)
(rxstat & (DC_RXSTAT_CRCERR | DC_RXSTAT_DRIBBLE |
DC_RXSTAT_MIIERE | DC_RXSTAT_COLLSEEN |
DC_RXSTAT_RUNT | DC_RXSTAT_DE))) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
if (rxstat & DC_RXSTAT_COLLSEEN)
- ifp->if_collisions++;
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
dc_discard_rxbuf(sc, i);
if (rxstat & DC_RXSTAT_CRCERR)
continue;
@@ -2944,7 +2940,7 @@ dc_rxeof(struct dc_softc *sc)
*/
if (dc_newbuf(sc, i) != 0) {
dc_discard_rxbuf(sc, i);
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
continue;
}
m->m_pkthdr.rcvif = ifp;
@@ -2957,14 +2953,14 @@ dc_rxeof(struct dc_softc *sc)
ETHER_ALIGN, ifp, NULL);
dc_discard_rxbuf(sc, i);
if (m0 == NULL) {
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
continue;
}
m = m0;
}
#endif
- ifp->if_ipackets++;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
DC_UNLOCK(sc);
(*ifp->if_input)(ifp, m);
DC_LOCK(sc);
@@ -3055,19 +3051,19 @@ dc_txeof(struct dc_softc *sc)
}
if (txstat & DC_TXSTAT_ERRSUM) {
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if (txstat & DC_TXSTAT_EXCESSCOLL)
- ifp->if_collisions++;
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
if (txstat & DC_TXSTAT_LATECOLL)
- ifp->if_collisions++;
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
if (!(txstat & DC_TXSTAT_UNDERRUN)) {
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
dc_init_locked(sc);
return;
}
} else
- ifp->if_opackets++;
- ifp->if_collisions += (txstat & DC_TXSTAT_COLLCNT) >> 3;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, (txstat & DC_TXSTAT_COLLCNT) >> 3);
bus_dmamap_sync(sc->dc_tx_mtag, sc->dc_cdata.dc_tx_map[idx],
BUS_DMASYNC_POSTWRITE);
@@ -3262,7 +3258,7 @@ dc_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
if (status & (DC_ISR_RX_WATDOGTIMEO | DC_ISR_RX_NOBUF)) {
uint32_t r = CSR_READ_4(sc, DC_FRAMESDISCARDED);
- ifp->if_ierrors += (r & 0xffff) + ((r >> 17) & 0x7ff);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, (r & 0xffff) + ((r >> 17) & 0x7ff));
if (dc_rx_resync(sc))
dc_rxeof(sc);
@@ -3344,7 +3340,7 @@ dc_intr(void *arg)
if ((status & DC_ISR_RX_WATDOGTIMEO)
|| (status & DC_ISR_RX_NOBUF)) {
r = CSR_READ_4(sc, DC_FRAMESDISCARDED);
- ifp->if_ierrors += (r & 0xffff) + ((r >> 17) & 0x7ff);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, (r & 0xffff) + ((r >> 17) & 0x7ff));
if (dc_rxeof(sc) == 0) {
while (dc_rx_resync(sc))
dc_rxeof(sc);
@@ -3942,7 +3938,7 @@ dc_watchdog(void *xsc)
}
ifp = sc->dc_ifp;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
device_printf(sc->dc_dev, "watchdog timeout\n");
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
diff --git a/freebsd/sys/dev/dc/pnphy.c b/freebsd/sys/dev/dc/pnphy.c
index a9a8ec93..5f1c52f2 100644
--- a/freebsd/sys/dev/dc/pnphy.c
+++ b/freebsd/sys/dev/dc/pnphy.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_media.h>
@@ -153,7 +154,7 @@ pnphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
/*
* If the interface is not up, don't do anything.
*/
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
+ if ((if_getflags(mii->mii_ifp) & IFF_UP) == 0)
break;
/*
@@ -181,7 +182,7 @@ pnphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
/*
* Is the interface even up?
*/
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
+ if ((if_getflags(mii->mii_ifp) & IFF_UP) == 0)
return (0);
break;
@@ -202,7 +203,7 @@ pnphy_status(struct mii_softc *sc)
int reg;
struct dc_softc *dc_sc;
- dc_sc = mii->mii_ifp->if_softc;
+ dc_sc = if_getsoftc(mii->mii_ifp);
mii->mii_media_status = IFM_AVALID;
mii->mii_media_active = IFM_ETHER;
diff --git a/freebsd/sys/dev/dwc/if_dwc.c b/freebsd/sys/dev/dwc/if_dwc.c
index a3716a15..c7284ee1 100644
--- a/freebsd/sys/dev/dwc/if_dwc.c
+++ b/freebsd/sys/dev/dwc/if_dwc.c
@@ -44,24 +44,16 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/gpio.h>
#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/malloc.h>
-#include <sys/rman.h>
-#include <sys/endian.h>
#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/module.h>
#include <sys/mutex.h>
+#include <sys/rman.h>
#include <sys/socket.h>
#include <sys/sockio.h>
-#include <sys/sysctl.h>
-
-#ifndef __rtems__
-#include <dev/fdt/fdt_common.h>
-#include <dev/ofw/openfirm.h>
-#include <dev/ofw/ofw_bus.h>
-#include <dev/ofw/ofw_bus_subr.h>
-#endif /* __rtems__ */
#include <net/bpf.h>
#include <net/if.h>
@@ -70,21 +62,27 @@ __FBSDID("$FreeBSD$");
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_var.h>
-#include <net/if_vlan_var.h>
#include <machine/bus.h>
-#ifndef __rtems__
-#include <machine/fdt.h>
-#endif /* __rtems__ */
+#include <dev/dwc/if_dwc.h>
+#include <dev/dwc/if_dwcvar.h>
#include <dev/mii/mii.h>
#include <dev/mii/miivar.h>
-#include <rtems/bsd/local/miibus_if.h>
-#ifdef __rtems__
-#pragma GCC diagnostic ignored "-Wpointer-sign"
-#include <rtems/bsd/bsd.h>
+#ifndef __rtems__
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
#endif /* __rtems__ */
+#ifdef EXT_RESOURCES
+#include <dev/extres/clk/clk.h>
+#include <dev/extres/hwreset/hwreset.h>
+#endif
+
+#include <rtems/bsd/local/if_dwc_if.h>
+#include <rtems/bsd/local/gpio_if.h>
+#include <rtems/bsd/local/miibus_if.h>
+
#define READ4(_sc, _reg) \
bus_read_4((_sc)->res[0], _reg)
#define WRITE4(_sc, _reg, _val) \
@@ -93,128 +91,53 @@ __FBSDID("$FreeBSD$");
#define MAC_RESET_TIMEOUT 100
#define WATCHDOG_TIMEOUT_SECS 5
#define STATS_HARVEST_INTERVAL 2
-#define MII_CLK_VAL 2
-
-#include <dev/dwc/if_dwc.h>
#define DWC_LOCK(sc) mtx_lock(&(sc)->mtx)
#define DWC_UNLOCK(sc) mtx_unlock(&(sc)->mtx)
-#define DWC_ASSERT_LOCKED(sc) mtx_assert(&(sc)->mtx, MA_OWNED);
-#define DWC_ASSERT_UNLOCKED(sc) mtx_assert(&(sc)->mtx, MA_NOTOWNED);
-
-#define DDESC_TDES0_OWN (1 << 31)
-#define DDESC_TDES0_TXINT (1 << 30)
-#define DDESC_TDES0_TXLAST (1 << 29)
-#define DDESC_TDES0_TXFIRST (1 << 28)
-#define DDESC_TDES0_TXCRCDIS (1 << 27)
-#define DDESC_TDES0_CIC_IP_HDR (0x1 << 22)
-#define DDESC_TDES0_CIC_IP_HDR_PYL (0x2 << 22)
-#define DDESC_TDES0_CIC_IP_HDR_PYL_PHDR (0x3 << 22)
-#define DDESC_TDES0_TXRINGEND (1 << 21)
-#define DDESC_TDES0_TXCHAIN (1 << 20)
-
-#define DDESC_RDES0_OWN (1 << 31)
+#define DWC_ASSERT_LOCKED(sc) mtx_assert(&(sc)->mtx, MA_OWNED)
+#define DWC_ASSERT_UNLOCKED(sc) mtx_assert(&(sc)->mtx, MA_NOTOWNED)
+
+#define DDESC_TDES0_OWN (1U << 31)
+#define DDESC_TDES0_TXINT (1U << 30)
+#define DDESC_TDES0_TXLAST (1U << 29)
+#define DDESC_TDES0_TXFIRST (1U << 28)
+#define DDESC_TDES0_TXCRCDIS (1U << 27)
+#define DDESC_TDES0_TXRINGEND (1U << 21)
+#define DDESC_TDES0_TXCHAIN (1U << 20)
+
+#define DDESC_RDES0_OWN (1U << 31)
#define DDESC_RDES0_FL_MASK 0x3fff
#define DDESC_RDES0_FL_SHIFT 16 /* Frame Length */
-#define DDESC_RDES0_ESA (1 << 0)
-#define DDESC_RDES1_CHAINED (1 << 14)
-#define DDESC_RDES4_IP_PYL_ERR (1 << 4)
-#define DDESC_RDES4_IP_HDR_ERR (1 << 3)
-#define DDESC_RDES4_IP_PYL_TYPE_MSK 0x7
-#define DDESC_RDES4_IP_PYL_UDP 1
-#define DDESC_RDES4_IP_PYL_TCP 2
-
-struct dwc_bufmap {
-#ifndef __rtems__
- bus_dmamap_t map;
-#endif /* __rtems__ */
- struct mbuf *mbuf;
-};
+#define DDESC_RDES1_CHAINED (1U << 14)
+
+/* Alt descriptor bits. */
+#define DDESC_CNTL_TXINT (1U << 31)
+#define DDESC_CNTL_TXLAST (1U << 30)
+#define DDESC_CNTL_TXFIRST (1U << 29)
+#define DDESC_CNTL_TXCRCDIS (1U << 26)
+#define DDESC_CNTL_TXRINGEND (1U << 25)
+#define DDESC_CNTL_TXCHAIN (1U << 24)
+
+#define DDESC_CNTL_CHAINED (1U << 24)
/*
* A hardware buffer descriptor. Rx and Tx buffers have the same descriptor
- * layout, but the bits in the flags field have different meanings.
+ * layout, but the bits in the fields have different meanings.
*/
struct dwc_hwdesc
{
- uint32_t tdes0;
- uint32_t tdes1;
+ uint32_t tdes0; /* status for alt layout */
+ uint32_t tdes1; /* cntl for alt layout */
uint32_t addr; /* pointer to buffer data */
uint32_t addr_next; /* link to next descriptor */
- uint32_t tdes4;
- uint32_t tdes5;
- uint32_t timestamp_low;
- uint32_t timestamp_high;
};
/*
- * Driver data and defines.
- */
-#ifndef __rtems__
-#define RX_DESC_COUNT 1024
-#else /* __rtems__ */
-#define RX_DESC_COUNT 256
-#endif /* __rtems__ */
-#define RX_DESC_SIZE (sizeof(struct dwc_hwdesc) * RX_DESC_COUNT)
-#define TX_DESC_COUNT 1024
-#define TX_DESC_SIZE (sizeof(struct dwc_hwdesc) * TX_DESC_COUNT)
-#define TX_MAX_DMA_SEGS 8 /* maximum segs in a tx mbuf dma */
-
-/*
* The hardware imposes alignment restrictions on various objects involved in
* DMA transfers. These values are expressed in bytes (not bits).
*/
#define DWC_DESC_RING_ALIGN 2048
-#define DWC_CKSUM_ASSIST (CSUM_IP | CSUM_TCP | CSUM_UDP | \
- CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
-
-struct dwc_softc {
- struct resource *res[2];
- bus_space_tag_t bst;
- bus_space_handle_t bsh;
- device_t dev;
- int mii_clk;
- device_t miibus;
- struct mii_data * mii_softc;
- struct ifnet *ifp;
- int if_flags;
- struct mtx mtx;
- void * intr_cookie;
- struct callout dwc_callout;
- uint8_t phy_conn_type;
- uint8_t mactype;
- boolean_t link_is_up;
- boolean_t is_attached;
- boolean_t is_detaching;
- int tx_watchdog_count;
- int stats_harvest_count;
-
- /* RX */
- bus_dma_tag_t rxdesc_tag;
- bus_dmamap_t rxdesc_map;
- struct dwc_hwdesc *rxdesc_ring;
-#ifndef __rtems__
- bus_addr_t rxdesc_ring_paddr;
- bus_dma_tag_t rxbuf_tag;
-#endif /* __rtems__ */
- struct dwc_bufmap rxbuf_map[RX_DESC_COUNT];
- uint32_t rx_idx;
-
- /* TX */
- bus_dma_tag_t txdesc_tag;
- bus_dmamap_t txdesc_map;
- struct dwc_hwdesc *txdesc_ring;
-#ifndef __rtems__
- bus_addr_t txdesc_ring_paddr;
- bus_dma_tag_t txbuf_tag;
-#endif /* __rtems__ */
- struct dwc_bufmap txbuf_map[TX_DESC_COUNT];
- uint32_t tx_idx_head;
- uint32_t tx_idx_tail;
- int txcount;
-};
-
static struct resource_spec dwc_spec[] = {
{ SYS_RES_MEMORY, 0, RF_ACTIVE },
{ SYS_RES_IRQ, 0, RF_ACTIVE },
@@ -234,13 +157,12 @@ next_rxidx(struct dwc_softc *sc, uint32_t curidx)
}
static inline uint32_t
-next_txidx(struct dwc_softc *sc, uint32_t curidx, int inc)
+next_txidx(struct dwc_softc *sc, uint32_t curidx)
{
- return ((curidx + (uint32_t)inc) % TX_DESC_COUNT);
+ return ((curidx + 1) % TX_DESC_COUNT);
}
-#ifndef __rtems__
static void
dwc_get1paddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
{
@@ -249,130 +171,75 @@ dwc_get1paddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
return;
*(bus_addr_t *)arg = segs[0].ds_addr;
}
-#endif /* __rtems__ */
-static void
-dwc_setup_txdesc(struct dwc_softc *sc, int csum_flags, int idx,
- bus_dma_segment_t segs[TX_MAX_DMA_SEGS], int nsegs)
+inline static uint32_t
+dwc_setup_txdesc(struct dwc_softc *sc, int idx, bus_addr_t paddr,
+ uint32_t len)
{
- int i;
-
- sc->txcount += nsegs;
+ uint32_t flags;
+ uint32_t nidx;
- idx = next_txidx(sc, idx, nsegs);
- sc->tx_idx_head = idx;
+ nidx = next_txidx(sc, idx);
- /*
- * Fill in the TX descriptors back to front so that OWN bit in first
- * descriptor is set last.
- */
- for (i = nsegs - 1; i >= 0; i--) {
- uint32_t tdes0;
+ /* Addr/len 0 means we're clearing the descriptor after xmit done. */
+ if (paddr == 0 || len == 0) {
+ flags = 0;
+ --sc->txcount;
+ } else {
+ if (sc->mactype == DWC_GMAC_ALT_DESC)
+ flags = DDESC_CNTL_TXCHAIN | DDESC_CNTL_TXFIRST
+ | DDESC_CNTL_TXLAST | DDESC_CNTL_TXINT;
+ else
+ flags = DDESC_TDES0_TXCHAIN | DDESC_TDES0_TXFIRST
+ | DDESC_TDES0_TXLAST | DDESC_TDES0_TXINT;
+ ++sc->txcount;
+ }
- idx = next_txidx(sc, idx, -1);
+ sc->txdesc_ring[idx].addr = (uint32_t)(paddr);
+ if (sc->mactype == DWC_GMAC_ALT_DESC) {
+ sc->txdesc_ring[idx].tdes0 = 0;
+ sc->txdesc_ring[idx].tdes1 = flags | len;
+ } else {
+ sc->txdesc_ring[idx].tdes0 = flags;
+ sc->txdesc_ring[idx].tdes1 = len;
+ }
- sc->txdesc_ring[idx].addr = segs[i].ds_addr;
- sc->txdesc_ring[idx].tdes1 = segs[i].ds_len;
+ if (paddr && len) {
wmb();
-
- tdes0 = DDESC_TDES0_TXCHAIN | DDESC_TDES0_TXINT |
- DDESC_TDES0_OWN;
-
- if (i == 0) {
- tdes0 |= DDESC_TDES0_TXFIRST;
-
- if ((csum_flags & (CSUM_TCP | CSUM_UDP |
- CSUM_TCP_IPV6 | CSUM_UDP_IPV6)) != 0)
- tdes0 |= DDESC_TDES0_CIC_IP_HDR_PYL_PHDR;
- else if ((csum_flags & CSUM_IP) != 0)
- tdes0 |= DDESC_TDES0_CIC_IP_HDR;
- }
-
- if (i == nsegs - 1)
- tdes0 |= DDESC_TDES0_TXLAST;
-
- sc->txdesc_ring[idx].tdes0 = tdes0;
+ sc->txdesc_ring[idx].tdes0 |= DDESC_TDES0_OWN;
wmb();
-
- if (i != 0)
- sc->txbuf_map[idx].mbuf = NULL;
}
+
+ return (nidx);
}
-#ifdef __rtems__
static int
-dwc_get_segs_for_tx(struct mbuf *m, bus_dma_segment_t segs[TX_MAX_DMA_SEGS],
- int *nsegs)
+dwc_setup_txbuf(struct dwc_softc *sc, int idx, struct mbuf **mp)
{
- int i = 0;
-
- do {
- if (m->m_len > 0) {
- segs[i].ds_addr = mtod(m, bus_addr_t);
- segs[i].ds_len = m->m_len;
- rtems_cache_flush_multiple_data_lines(m->m_data, m->m_len);
- ++i;
- }
-
- m = m->m_next;
-
- if (m == NULL) {
- *nsegs = i;
-
- return (0);
- }
- } while (i < TX_MAX_DMA_SEGS);
+ struct bus_dma_segment seg;
+ int error, nsegs;
+ struct mbuf * m;
- return (EFBIG);
-}
-#endif /* __rtems__ */
-static void
-dwc_setup_txbuf(struct dwc_softc *sc, struct mbuf *m, int *start_tx)
-{
- bus_dma_segment_t segs[TX_MAX_DMA_SEGS];
- int error, nsegs, idx;
+ if ((m = m_defrag(*mp, M_NOWAIT)) == NULL)
+ return (ENOMEM);
+ *mp = m;
- idx = sc->tx_idx_head;
-#ifndef __rtems__
error = bus_dmamap_load_mbuf_sg(sc->txbuf_tag, sc->txbuf_map[idx].map,
- m, &seg, &nsegs, BUS_DMA_NOWAIT);
-
-#else /* __rtems__ */
- error = dwc_get_segs_for_tx(m, segs, &nsegs);
-#endif /* __rtems__ */
- if (error == EFBIG) {
- /* Too many segments! Defrag and try again. */
- struct mbuf *m2 = m_defrag(m, M_NOWAIT);
-
- if (m2 == NULL) {
- m_freem(m);
- return;
- }
- m = m2;
-#ifndef __rtems__
- error = bus_dmamap_load_mbuf_sg(sc->txbuf_tag,
- sc->txbuf_map[idx].map, m, &seg, &nsegs, BUS_DMA_NOWAIT);
-#else /* __rtems__ */
- error = dwc_get_segs_for_tx(m, segs, &nsegs);
-#endif /* __rtems__ */
- }
+ m, &seg, &nsegs, 0);
if (error != 0) {
- /* Give up. */
- m_freem(m);
- return;
+ return (ENOMEM);
}
- sc->txbuf_map[idx].mbuf = m;
+ KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
-#ifndef __rtems__
bus_dmamap_sync(sc->txbuf_tag, sc->txbuf_map[idx].map,
BUS_DMASYNC_PREWRITE);
-#endif /* __rtems__ */
- dwc_setup_txdesc(sc, m->m_pkthdr.csum_flags, idx, segs, nsegs);
+ sc->txbuf_map[idx].mbuf = m;
+
+ dwc_setup_txdesc(sc, idx, seg.ds_addr, seg.ds_len);
- ETHER_BPF_MTAP(sc->ifp, m);
- *start_tx = 1;
+ return (0);
}
static void
@@ -380,7 +247,7 @@ dwc_txstart_locked(struct dwc_softc *sc)
{
struct ifnet *ifp;
struct mbuf *m;
- int start_tx;
+ int enqueued;
DWC_ASSERT_LOCKED(sc);
@@ -389,10 +256,14 @@ dwc_txstart_locked(struct dwc_softc *sc)
ifp = sc->ifp;
- start_tx = 0;
+ if (ifp->if_drv_flags & IFF_DRV_OACTIVE) {
+ return;
+ }
+
+ enqueued = 0;
for (;;) {
- if (sc->txcount >= (TX_DESC_COUNT - 1 - TX_MAX_DMA_SEGS)) {
+ if (sc->txcount == (TX_DESC_COUNT-1)) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
break;
}
@@ -400,11 +271,16 @@ dwc_txstart_locked(struct dwc_softc *sc)
IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
if (m == NULL)
break;
-
- dwc_setup_txbuf(sc, m, &start_tx);
+ if (dwc_setup_txbuf(sc, sc->tx_idx_head, &m) != 0) {
+ IFQ_DRV_PREPEND(&ifp->if_snd, m);
+ break;
+ }
+ BPF_MTAP(ifp, m);
+ sc->tx_idx_head = next_txidx(sc, sc->tx_idx_head);
+ ++enqueued;
}
- if (start_tx != 0) {
+ if (enqueued != 0) {
WRITE4(sc, TRANSMIT_POLL_DEMAND, 0x1);
sc->tx_watchdog_count = WATCHDOG_TIMEOUT_SECS;
}
@@ -416,8 +292,7 @@ dwc_txstart(struct ifnet *ifp)
struct dwc_softc *sc = ifp->if_softc;
DWC_LOCK(sc);
- if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
- dwc_txstart_locked(sc);
+ dwc_txstart_locked(sc);
DWC_UNLOCK(sc);
}
@@ -425,7 +300,7 @@ static void
dwc_stop_locked(struct dwc_softc *sc)
{
struct ifnet *ifp;
- int reg;
+ uint32_t reg;
DWC_ASSERT_LOCKED(sc);
@@ -459,7 +334,7 @@ dwc_stop_locked(struct dwc_softc *sc)
static void dwc_clear_stats(struct dwc_softc *sc)
{
- int reg;
+ uint32_t reg;
reg = READ4(sc, MMC_CONTROL);
reg |= (MMC_CONTROL_CNTRST);
@@ -478,7 +353,6 @@ dwc_harvest_stats(struct dwc_softc *sc)
sc->stats_harvest_count = 0;
ifp = sc->ifp;
-#ifndef __rtems__
if_inc_counter(ifp, IFCOUNTER_IPACKETS, READ4(sc, RXFRAMECOUNT_GB));
if_inc_counter(ifp, IFCOUNTER_IMCASTS, READ4(sc, RXMULTICASTFRAMES_G));
if_inc_counter(ifp, IFCOUNTER_IERRORS,
@@ -495,24 +369,6 @@ dwc_harvest_stats(struct dwc_softc *sc)
if_inc_counter(ifp, IFCOUNTER_COLLISIONS,
READ4(sc, TXEXESSCOL) + READ4(sc, TXLATECOL));
-#else /* __rtems__ */
- ifp->if_ipackets += READ4(sc, RXFRAMECOUNT_GB);
- ifp->if_imcasts += READ4(sc, RXMULTICASTFRAMES_G);
- ifp->if_ierrors +=
- READ4(sc, RXOVERSIZE_G) + READ4(sc, RXUNDERSIZE_G) +
- READ4(sc, RXCRCERROR) + READ4(sc, RXALIGNMENTERROR) +
- READ4(sc, RXRUNTERROR) + READ4(sc, RXJABBERERROR) +
- READ4(sc, RXLENGTHERROR);
-
- ifp->if_opackets += READ4(sc, TXFRAMECOUNT_G);
- ifp->if_omcasts += READ4(sc, TXMULTICASTFRAMES_G);
- ifp->if_oerrors +=
- READ4(sc, TXOVERSIZE_G) + READ4(sc, TXEXCESSDEF) +
- READ4(sc, TXCARRIERERR) + READ4(sc, TXUNDERFLOWERROR);
-
- ifp->if_collisions +=
- READ4(sc, TXEXESSCOL) + READ4(sc, TXLATECOL);
-#endif /* __rtems__ */
dwc_clear_stats(sc);
}
@@ -561,7 +417,7 @@ static void
dwc_init_locked(struct dwc_softc *sc)
{
struct ifnet *ifp = sc->ifp;
- int reg;
+ uint32_t reg;
DWC_ASSERT_LOCKED(sc);
@@ -588,7 +444,6 @@ dwc_init_locked(struct dwc_softc *sc)
/* Enable transmitters */
reg = READ4(sc, MAC_CONFIGURATION);
- reg |= (CONF_IPC);
reg |= (CONF_JD | CONF_ACS | CONF_BE);
reg |= (CONF_TE | CONF_RE);
WRITE4(sc, MAC_CONFIGURATION, reg);
@@ -618,13 +473,12 @@ dwc_setup_rxdesc(struct dwc_softc *sc, int idx, bus_addr_t paddr)
sc->rxdesc_ring[idx].addr = (uint32_t)paddr;
nidx = next_rxidx(sc, idx);
-#ifndef __rtems__
sc->rxdesc_ring[idx].addr_next = sc->rxdesc_ring_paddr + \
(nidx * sizeof(struct dwc_hwdesc));
-#else /* __rtems__ */
- sc->rxdesc_ring[idx].addr_next = (uint32_t)&sc->rxdesc_ring[nidx];
-#endif /* __rtems__ */
- sc->rxdesc_ring[idx].tdes1 = DDESC_RDES1_CHAINED | MCLBYTES;
+ if (sc->mactype == DWC_GMAC_ALT_DESC)
+ sc->rxdesc_ring[idx].tdes1 = DDESC_CNTL_CHAINED | RX_MAX_PACKET;
+ else
+ sc->rxdesc_ring[idx].tdes1 = DDESC_RDES1_CHAINED | MCLBYTES;
wmb();
sc->rxdesc_ring[idx].tdes0 = DDESC_RDES0_OWN;
@@ -636,14 +490,11 @@ dwc_setup_rxdesc(struct dwc_softc *sc, int idx, bus_addr_t paddr)
static int
dwc_setup_rxbuf(struct dwc_softc *sc, int idx, struct mbuf *m)
{
- bus_dma_segment_t seg;
-#ifndef __rtems__
+ struct bus_dma_segment seg;
int error, nsegs;
-#endif /* __rtems__ */
m_adj(m, ETHER_ALIGN);
-#ifndef __rtems__
error = bus_dmamap_load_mbuf_sg(sc->rxbuf_tag, sc->rxbuf_map[idx].map,
m, &seg, &nsegs, 0);
if (error != 0) {
@@ -654,10 +505,6 @@ dwc_setup_rxbuf(struct dwc_softc *sc, int idx, struct mbuf *m)
bus_dmamap_sync(sc->rxbuf_tag, sc->rxbuf_map[idx].map,
BUS_DMASYNC_PREREAD);
-#else /* __rtems__ */
- rtems_cache_invalidate_multiple_data_lines(m->m_data, m->m_len);
- seg.ds_addr = mtod(m, bus_addr_t);
-#endif /* __rtems__ */
sc->rxbuf_map[idx].mbuf = m;
dwc_setup_rxdesc(sc, idx, seg.ds_addr);
@@ -743,27 +590,26 @@ dwc_setup_rxfilter(struct dwc_softc *sc)
{
struct ifmultiaddr *ifma;
struct ifnet *ifp;
- uint8_t *eaddr;
- uint32_t crc;
- uint8_t val;
- int hashbit;
- int hashreg;
- int ffval;
- int reg;
- int lo;
- int hi;
+ uint8_t *eaddr, val;
+ uint32_t crc, ffval, hashbit, hashreg, hi, lo, hash[8];
+ int nhash, i;
DWC_ASSERT_LOCKED(sc);
ifp = sc->ifp;
+ nhash = sc->mactype == DWC_GMAC_ALT_DESC ? 2 : 8;
/*
* Set the multicast (group) filter hash.
*/
- if ((ifp->if_flags & IFF_ALLMULTI))
+ if ((ifp->if_flags & IFF_ALLMULTI) != 0) {
ffval = (FRAME_FILTER_PM);
- else {
+ for (i = 0; i < nhash; i++)
+ hash[i] = ~0;
+ } else {
ffval = (FRAME_FILTER_HMC);
+ for (i = 0; i < nhash; i++)
+ hash[i] = 0;
if_maddr_rlock(ifp);
TAILQ_FOREACH(ifma, &sc->ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
@@ -773,12 +619,11 @@ dwc_setup_rxfilter(struct dwc_softc *sc)
/* Take lower 8 bits and reverse it */
val = bitreverse(~crc & 0xff);
+ if (sc->mactype == DWC_GMAC_ALT_DESC)
+ val >>= nhash; /* Only need lower 6 bits */
hashreg = (val >> 5);
hashbit = (val & 31);
-
- reg = READ4(sc, HASH_TABLE_REG(hashreg));
- reg |= (1 << hashbit);
- WRITE4(sc, HASH_TABLE_REG(hashreg), reg);
+ hash[hashreg] |= (1 << hashbit);
}
if_maddr_runlock(ifp);
}
@@ -799,6 +644,13 @@ dwc_setup_rxfilter(struct dwc_softc *sc)
WRITE4(sc, MAC_ADDRESS_LOW(0), lo);
WRITE4(sc, MAC_ADDRESS_HIGH(0), hi);
WRITE4(sc, MAC_FRAME_FILTER, ffval);
+ if (sc->mactype == DWC_GMAC_ALT_DESC) {
+ WRITE4(sc, GMAC_MAC_HTLOW, hash[0]);
+ WRITE4(sc, GMAC_MAC_HTHIGH, hash[1]);
+ } else {
+ for (i = 0; i < nhash; i++)
+ WRITE4(sc, HASH_TABLE_REG(i), hash[i]);
+ }
}
static int
@@ -866,28 +718,27 @@ dwc_txfinish_locked(struct dwc_softc *sc)
{
struct dwc_bufmap *bmap;
struct dwc_hwdesc *desc;
+ struct ifnet *ifp;
DWC_ASSERT_LOCKED(sc);
+ ifp = sc->ifp;
while (sc->tx_idx_tail != sc->tx_idx_head) {
desc = &sc->txdesc_ring[sc->tx_idx_tail];
if ((desc->tdes0 & DDESC_TDES0_OWN) != 0)
break;
bmap = &sc->txbuf_map[sc->tx_idx_tail];
-#ifndef __rtems__
bus_dmamap_sync(sc->txbuf_tag, bmap->map,
BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(sc->txbuf_tag, bmap->map);
-#endif /* __rtems__ */
m_freem(bmap->mbuf);
bmap->mbuf = NULL;
- --sc->txcount;
- sc->tx_idx_tail = next_txidx(sc, sc->tx_idx_tail, 1);
+ dwc_setup_txdesc(sc, sc->tx_idx_tail, 0, 0);
+ sc->tx_idx_tail = next_txidx(sc, sc->tx_idx_tail);
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
}
- sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- dwc_txstart_locked(sc);
-
/* If there are no buffers outstanding, muzzle the watchdog. */
if (sc->tx_idx_tail == sc->tx_idx_head) {
sc->tx_watchdog_count = 0;
@@ -900,11 +751,8 @@ dwc_rxfinish_locked(struct dwc_softc *sc)
struct ifnet *ifp;
struct mbuf *m0;
struct mbuf *m;
- int error;
+ int error, idx, len;
uint32_t rdes0;
- uint32_t rdes4;
- int idx;
- int len;
ifp = sc->ifp;
@@ -915,78 +763,20 @@ dwc_rxfinish_locked(struct dwc_softc *sc)
if ((rdes0 & DDESC_RDES0_OWN) != 0)
break;
- sc->rx_idx = next_rxidx(sc, idx);
-
- m = sc->rxbuf_map[idx].mbuf;
-
- m0 = dwc_alloc_mbufcl(sc);
- if (m0 == NULL) {
- m0 = m;
-
- /* Account for m_adj() in dwc_setup_rxbuf() */
- m0->m_data = m0->m_ext.ext_buf;
- }
-
- if ((error = dwc_setup_rxbuf(sc, idx, m0)) != 0) {
- /*
- * XXX Now what?
- * We've got a hole in the rx ring.
- */
- }
-
- if (m0 == m) {
- /* Discard frame and continue */
-#ifndef __rtems__
- if_inc_counter(sc->ifp, IFCOUNTER_IQDROPS, 1);
-#else /* __rtems__ */
- ++ifp->if_iqdrops;
-#endif /* __rtems__ */
- continue;
- }
-
-#ifndef __rtems__
bus_dmamap_sync(sc->rxbuf_tag, sc->rxbuf_map[idx].map,
BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(sc->rxbuf_tag, sc->rxbuf_map[idx].map);
-#endif /* __rtems__ */
len = (rdes0 >> DDESC_RDES0_FL_SHIFT) & DDESC_RDES0_FL_MASK;
if (len != 0) {
+ m = sc->rxbuf_map[idx].mbuf;
m->m_pkthdr.rcvif = ifp;
m->m_pkthdr.len = len;
m->m_len = len;
-
- /* Check checksum offload flags. */
- if ((rdes0 & DDESC_RDES0_ESA) != 0) {
- rdes4 = sc->rxdesc_ring[idx].tdes4;
-
- /* TCP or UDP checks out, IP checks out too. */
- if ((rdes4 & DDESC_RDES4_IP_PYL_TYPE_MSK) ==
- DDESC_RDES4_IP_PYL_UDP ||
- (rdes4 & DDESC_RDES4_IP_PYL_TYPE_MSK) ==
- DDESC_RDES4_IP_PYL_TCP) {
- m->m_pkthdr.csum_flags |=
- CSUM_IP_CHECKED |
- CSUM_IP_VALID |
- CSUM_DATA_VALID |
- CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xffff;
- } else if ((rdes4 & (DDESC_RDES4_IP_PYL_ERR |
- DDESC_RDES4_IP_HDR_ERR)) == 0) {
- /* Only IP checks out. */
- m->m_pkthdr.csum_flags |=
- CSUM_IP_CHECKED |
- CSUM_IP_VALID;
- m->m_pkthdr.csum_data = 0xffff;
- }
- }
-
-#ifndef __rtems__
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
-#else /* __rtems__ */
- ++ifp->if_ipackets;
- rtems_cache_invalidate_multiple_data_lines(m->m_data, m->m_len);
-#endif /* __rtems__ */
+
+ /* Remove trailing FCS */
+ m_adj(m, -ETHER_CRC_LEN);
DWC_UNLOCK(sc);
(*ifp->if_input)(ifp, m);
@@ -994,6 +784,18 @@ dwc_rxfinish_locked(struct dwc_softc *sc)
} else {
/* XXX Zero-length packet ? */
}
+
+ if ((m0 = dwc_alloc_mbufcl(sc)) != NULL) {
+ if ((error = dwc_setup_rxbuf(sc, idx, m0)) != 0) {
+ /*
+ * XXX Now what?
+ * We've got a hole in the rx ring.
+ */
+ }
+ } else
+ if_inc_counter(sc->ifp, IFCOUNTER_IQDROPS, 1);
+
+ sc->rx_idx = next_rxidx(sc, sc->rx_idx);
}
}
@@ -1008,28 +810,31 @@ dwc_intr(void *arg)
DWC_LOCK(sc);
reg = READ4(sc, INTERRUPT_STATUS);
- if (reg) {
- mii_mediachg(sc->mii_softc);
+ if (reg)
READ4(sc, SGMII_RGMII_SMII_CTRL_STATUS);
- }
reg = READ4(sc, DMA_STATUS);
- WRITE4(sc, DMA_STATUS, reg & DMA_STATUS_INTR_MASK);
-
- if (reg & (DMA_STATUS_RI | DMA_STATUS_RU))
- dwc_rxfinish_locked(sc);
+ if (reg & DMA_STATUS_NIS) {
+ if (reg & DMA_STATUS_RI)
+ dwc_rxfinish_locked(sc);
- if (reg & DMA_STATUS_TI)
- dwc_txfinish_locked(sc);
+ if (reg & DMA_STATUS_TI) {
+ dwc_txfinish_locked(sc);
+ dwc_txstart_locked(sc);
+ }
+ }
- if (reg & DMA_STATUS_FBI) {
- /* Fatal bus error */
- device_printf(sc->dev,
- "Ethernet DMA error, restarting controller.\n");
- dwc_stop_locked(sc);
- dwc_init_locked(sc);
+ if (reg & DMA_STATUS_AIS) {
+ if (reg & DMA_STATUS_FBI) {
+ /* Fatal bus error */
+ device_printf(sc->dev,
+ "Ethernet DMA error, restarting controller.\n");
+ dwc_stop_locked(sc);
+ dwc_init_locked(sc);
+ }
}
+ WRITE4(sc, DMA_STATUS, reg & DMA_STATUS_INTR_MASK);
DWC_UNLOCK(sc);
}
@@ -1070,7 +875,6 @@ setup_dma(struct dwc_softc *sc)
goto out;
}
-#ifndef __rtems__
error = bus_dmamap_load(sc->txdesc_tag, sc->txdesc_map,
sc->txdesc_ring, TX_DESC_SIZE, dwc_get1paddr,
&sc->txdesc_ring_paddr, 0);
@@ -1079,30 +883,20 @@ setup_dma(struct dwc_softc *sc)
"could not load TX descriptor ring map.\n");
goto out;
}
-#endif /* __rtems__ */
for (idx = 0; idx < TX_DESC_COUNT; idx++) {
- sc->txdesc_ring[idx].addr = 0;
- sc->txdesc_ring[idx].tdes0 = DDESC_TDES0_TXCHAIN;
- sc->txdesc_ring[idx].tdes1 = 0;
- nidx = next_txidx(sc, idx, 1);
-#ifndef __rtems__
- sc->txdesc_ring[idx].addr_next = sc->txdesc_ring_paddr + \
+ nidx = next_txidx(sc, idx);
+ sc->txdesc_ring[idx].addr_next = sc->txdesc_ring_paddr +
(nidx * sizeof(struct dwc_hwdesc));
-#else /* __rtems__ */
- sc->txdesc_ring[idx].addr_next =
- (uint32_t)&sc->txdesc_ring[nidx];
-#endif /* __rtems__ */
}
-#ifndef __rtems__
error = bus_dma_tag_create(
bus_get_dma_tag(sc->dev), /* Parent tag. */
1, 0, /* alignment, boundary */
BUS_SPACE_MAXADDR_32BIT, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
- MCLBYTES, TX_MAX_DMA_SEGS, /* maxsize, nsegments */
+ MCLBYTES, 1, /* maxsize, nsegments */
MCLBYTES, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lockfunc, lockarg */
@@ -1112,10 +906,8 @@ setup_dma(struct dwc_softc *sc)
"could not create TX ring DMA tag.\n");
goto out;
}
-#endif /* __rtems__ */
for (idx = 0; idx < TX_DESC_COUNT; idx++) {
-#ifndef __rtems__
error = bus_dmamap_create(sc->txbuf_tag, BUS_DMA_COHERENT,
&sc->txbuf_map[idx].map);
if (error != 0) {
@@ -1123,7 +915,7 @@ setup_dma(struct dwc_softc *sc)
"could not create TX buffer DMA map.\n");
goto out;
}
-#endif /* __rtems__ */
+ dwc_setup_txdesc(sc, idx, 0, 0);
}
/*
@@ -1155,7 +947,6 @@ setup_dma(struct dwc_softc *sc)
goto out;
}
-#ifndef __rtems__
error = bus_dmamap_load(sc->rxdesc_tag, sc->rxdesc_map,
sc->rxdesc_ring, RX_DESC_SIZE, dwc_get1paddr,
&sc->rxdesc_ring_paddr, 0);
@@ -1181,10 +972,8 @@ setup_dma(struct dwc_softc *sc)
"could not create RX buf DMA tag.\n");
goto out;
}
-#endif /* __rtems__ */
for (idx = 0; idx < RX_DESC_COUNT; idx++) {
-#ifndef __rtems__
error = bus_dmamap_create(sc->rxbuf_tag, BUS_DMA_COHERENT,
&sc->rxbuf_map[idx].map);
if (error != 0) {
@@ -1192,7 +981,6 @@ setup_dma(struct dwc_softc *sc)
"could not create RX buffer DMA map.\n");
goto out;
}
-#endif /* __rtems__ */
if ((m = dwc_alloc_mbufcl(sc)) == NULL) {
device_printf(sc->dev, "Could not alloc mbuf\n");
error = ENOMEM;
@@ -1203,7 +991,6 @@ setup_dma(struct dwc_softc *sc)
"could not create new RX buffer.\n");
goto out;
}
- sc->rxdesc_ring[idx].tdes4 = 0;
}
out:
@@ -1216,11 +1003,7 @@ out:
static int
dwc_get_hwaddr(struct dwc_softc *sc, uint8_t *hwaddr)
{
-#ifndef __rtems__
- int rnd;
-#endif /* __rtems__ */
- int lo;
- int hi;
+ uint32_t hi, lo, rnd;
/*
* Try to recover a MAC address from the running hardware. If there's
@@ -1241,7 +1024,6 @@ dwc_get_hwaddr(struct dwc_softc *sc, uint8_t *hwaddr)
hwaddr[4] = (hi >> 0) & 0xff;
hwaddr[5] = (hi >> 8) & 0xff;
} else {
-#ifndef __rtems__
rnd = arc4random() & 0x00ffffff;
hwaddr[0] = 'b';
hwaddr[1] = 's';
@@ -1249,14 +1031,98 @@ dwc_get_hwaddr(struct dwc_softc *sc, uint8_t *hwaddr)
hwaddr[3] = rnd >> 16;
hwaddr[4] = rnd >> 8;
hwaddr[5] = rnd >> 0;
-#else /* __rtems__ */
- rtems_bsd_get_mac_address(device_get_name(sc->dev),
- device_get_unit(sc->dev), hwaddr);
+ }
+
+ return (0);
+}
+
+#define GPIO_ACTIVE_LOW 1
+
+static int
+dwc_reset(device_t dev)
+{
+#ifndef __rtems__
+ pcell_t gpio_prop[4];
+ pcell_t delay_prop[3];
+ phandle_t node, gpio_node;
+ device_t gpio;
+ uint32_t pin, flags;
+ uint32_t pin_value;
+
+ node = ofw_bus_get_node(dev);
+ if (OF_getencprop(node, "snps,reset-gpio",
+ gpio_prop, sizeof(gpio_prop)) <= 0)
+ return (0);
+
+ if (OF_getencprop(node, "snps,reset-delays-us",
+ delay_prop, sizeof(delay_prop)) <= 0) {
+ device_printf(dev,
+ "Wrong property for snps,reset-delays-us");
+ return (ENXIO);
+ }
+
+ gpio_node = OF_node_from_xref(gpio_prop[0]);
+ if ((gpio = OF_device_from_xref(gpio_prop[0])) == NULL) {
+ device_printf(dev,
+ "Can't find gpio controller for phy reset\n");
+ return (ENXIO);
+ }
+
+ if (GPIO_MAP_GPIOS(gpio, node, gpio_node,
+ nitems(gpio_prop) - 1,
+ gpio_prop + 1, &pin, &flags) != 0) {
+ device_printf(dev, "Can't map gpio for phy reset\n");
+ return (ENXIO);
+ }
+
+ pin_value = GPIO_PIN_LOW;
+ if (OF_hasprop(node, "snps,reset-active-low"))
+ pin_value = GPIO_PIN_HIGH;
+
+ if (flags & GPIO_ACTIVE_LOW)
+ pin_value = !pin_value;
+
+ GPIO_PIN_SETFLAGS(gpio, pin, GPIO_PIN_OUTPUT);
+ GPIO_PIN_SET(gpio, pin, pin_value);
+ DELAY(delay_prop[0]);
+ GPIO_PIN_SET(gpio, pin, !pin_value);
+ DELAY(delay_prop[1]);
+ GPIO_PIN_SET(gpio, pin, pin_value);
+ DELAY(delay_prop[2]);
#endif /* __rtems__ */
+
+ return (0);
+}
+
+#ifdef EXT_RESOURCES
+static int
+dwc_clock_init(device_t dev)
+{
+ hwreset_t rst;
+ clk_t clk;
+ int error;
+
+ /* Enable clock */
+ if (clk_get_by_ofw_name(dev, 0, "stmmaceth", &clk) == 0) {
+ error = clk_enable(clk);
+ if (error != 0) {
+ device_printf(dev, "could not enable main clock\n");
+ return (error);
+ }
+ }
+
+ /* De-assert reset */
+ if (hwreset_get_by_ofw_name(dev, 0, "stmmaceth", &rst) == 0) {
+ error = hwreset_deassert(rst);
+ if (error != 0) {
+ device_printf(dev, "could not de-assert reset\n");
+ return (error);
+ }
}
return (0);
}
+#endif
static int
dwc_probe(device_t dev)
@@ -1280,15 +1146,23 @@ dwc_attach(device_t dev)
uint8_t macaddr[ETHER_ADDR_LEN];
struct dwc_softc *sc;
struct ifnet *ifp;
- int error;
- int reg;
- int i;
+ int error, i;
+ uint32_t reg;
sc = device_get_softc(dev);
sc->dev = dev;
- sc->mii_clk = MII_CLK_VAL;
sc->rx_idx = 0;
- sc->txcount = 0;
+ sc->txcount = TX_DESC_COUNT;
+ sc->mii_clk = IF_DWC_MII_CLK(dev);
+ sc->mactype = IF_DWC_MAC_TYPE(dev);
+
+ if (IF_DWC_INIT(dev) != 0)
+ return (ENXIO);
+
+#ifdef EXT_RESOURCES
+ if (dwc_clock_init(dev) != 0)
+ return (ENXIO);
+#endif
if (bus_alloc_resources(dev, dwc_spec, sc->res)) {
device_printf(dev, "could not allocate resources\n");
@@ -1305,6 +1179,12 @@ dwc_attach(device_t dev)
return (ENXIO);
}
+ /* Reset the PHY if needed */
+ if (dwc_reset(dev) != 0) {
+ device_printf(dev, "Can't reset the PHY\n");
+ return (ENXIO);
+ }
+
/* Reset */
reg = READ4(sc, BUS_MODE);
reg |= (BUS_MODE_SWR);
@@ -1320,9 +1200,11 @@ dwc_attach(device_t dev)
return (ENXIO);
}
- reg = READ4(sc, BUS_MODE);
- reg |= (BUS_MODE_ATDS);
- reg |= (BUS_MODE_EIGHTXPBL);
+ if (sc->mactype == DWC_GMAC_ALT_DESC) {
+ reg = BUS_MODE_FIXEDBURST;
+ reg |= (BUS_MODE_PRIORXTX_41 << BUS_MODE_PRIORXTX_SHIFT);
+ } else
+ reg = (BUS_MODE_EIGHTXPBL);
reg |= (BUS_MODE_PBL_BEATS_8 << BUS_MODE_PBL_SHIFT);
WRITE4(sc, BUS_MODE, reg);
@@ -1337,36 +1219,36 @@ dwc_attach(device_t dev)
return (ENXIO);
/* Setup addresses */
-#ifndef __rtems__
WRITE4(sc, RX_DESCR_LIST_ADDR, sc->rxdesc_ring_paddr);
WRITE4(sc, TX_DESCR_LIST_ADDR, sc->txdesc_ring_paddr);
-#else /* __rtems__ */
- WRITE4(sc, RX_DESCR_LIST_ADDR, (uint32_t)&sc->rxdesc_ring[0]);
- WRITE4(sc, TX_DESCR_LIST_ADDR, (uint32_t)&sc->txdesc_ring[0]);
-#endif /* __rtems__ */
mtx_init(&sc->mtx, device_get_nameunit(sc->dev),
MTX_NETWORK_LOCK, MTX_DEF);
callout_init_mtx(&sc->dwc_callout, &sc->mtx, 0);
+ /* Setup interrupt handler. */
+ error = bus_setup_intr(dev, sc->res[1], INTR_TYPE_NET | INTR_MPSAFE,
+ NULL, dwc_intr, sc, &sc->intr_cookie);
+ if (error != 0) {
+ device_printf(dev, "could not setup interrupt handler.\n");
+ return (ENXIO);
+ }
+
/* Set up the ethernet interface. */
sc->ifp = ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
- IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM;
+ ifp->if_capabilities = IFCAP_VLAN_MTU;
ifp->if_capenable = ifp->if_capabilities;
- ifp->if_hwassist = DWC_CKSUM_ASSIST;
ifp->if_start = dwc_txstart;
ifp->if_ioctl = dwc_ioctl;
ifp->if_init = dwc_init;
IFQ_SET_MAXLEN(&ifp->if_snd, TX_DESC_COUNT - 1);
ifp->if_snd.ifq_drv_maxlen = TX_DESC_COUNT - 1;
IFQ_SET_READY(&ifp->if_snd);
- ifp->if_hdrlen = sizeof(struct ether_vlan_header);
/* Attach the mii driver. */
error = mii_attach(dev, &sc->miibus, ifp, dwc_media_change,
@@ -1379,14 +1261,6 @@ dwc_attach(device_t dev)
}
sc->mii_softc = device_get_softc(sc->miibus);
- /* Setup interrupt handler. */
- error = bus_setup_intr(dev, sc->res[1], INTR_TYPE_NET | INTR_MPSAFE,
- NULL, dwc_intr, sc, &sc->intr_cookie);
- if (error != 0) {
- device_printf(dev, "could not setup interrupt handler.\n");
- return (ENXIO);
- }
-
/* All ready to run, attach the ethernet interface. */
ether_ifattach(ifp, macaddr);
sc->is_attached = true;
@@ -1454,7 +1328,7 @@ dwc_miibus_statchg(device_t dev)
{
struct dwc_softc *sc;
struct mii_data *mii;
- int reg;
+ uint32_t reg;
/*
* Called by the MII bus driver when the PHY establishes
@@ -1513,7 +1387,7 @@ static device_method_t dwc_methods[] = {
{ 0, 0 }
};
-static driver_t dwc_driver = {
+driver_t dwc_driver = {
"dwc",
dwc_methods,
sizeof(struct dwc_softc),
@@ -1521,11 +1395,7 @@ static driver_t dwc_driver = {
static devclass_t dwc_devclass;
-#ifndef __rtems__
DRIVER_MODULE(dwc, simplebus, dwc_driver, dwc_devclass, 0, 0);
-#else /* __rtems__ */
-DRIVER_MODULE(dwc, nexus, dwc_driver, dwc_devclass, 0, 0);
-#endif /* __rtems__ */
DRIVER_MODULE(miibus, dwc, miibus_driver, miibus_devclass, 0, 0);
MODULE_DEPEND(dwc, ether, 1, 1, 1);
diff --git a/freebsd/sys/dev/dwc/if_dwc.h b/freebsd/sys/dev/dwc/if_dwc.h
index 2ba9860e..d88ca0db 100644
--- a/freebsd/sys/dev/dwc/if_dwc.h
+++ b/freebsd/sys/dev/dwc/if_dwc.h
@@ -34,23 +34,27 @@
* Register names were taken almost as is from the documentation.
*/
+#ifndef __IF_DWC_H__
+#define __IF_DWC_H__
+
#define MAC_CONFIGURATION 0x0
#define CONF_JD (1 << 22) /* jabber timer disable */
#define CONF_BE (1 << 21) /* Frame Burst Enable */
#define CONF_PS (1 << 15) /* GMII/MII */
#define CONF_FES (1 << 14) /* MII speed select */
#define CONF_DM (1 << 11) /* Full Duplex Enable */
-#define CONF_IPC (1 << 10) /* Checksum Enable */
#define CONF_ACS (1 << 7)
#define CONF_TE (1 << 3)
#define CONF_RE (1 << 2)
#define MAC_FRAME_FILTER 0x4
-#define FRAME_FILTER_RA (1 << 31) /* Receive All */
+#define FRAME_FILTER_RA (1U << 31) /* Receive All */
#define FRAME_FILTER_HPF (1 << 10) /* Hash or Perfect Filter */
#define FRAME_FILTER_PM (1 << 4) /* Pass multicast */
#define FRAME_FILTER_HMC (1 << 2)
#define FRAME_FILTER_HUC (1 << 1)
#define FRAME_FILTER_PR (1 << 0) /* All Incoming Frames */
+#define GMAC_MAC_HTHIGH 0x08
+#define GMAC_MAC_HTLOW 0x0c
#define GMII_ADDRESS 0x10
#define GMII_ADDRESS_PA_MASK 0x1f /* Phy device */
#define GMII_ADDRESS_PA_SHIFT 11
@@ -208,9 +212,14 @@
/* DMA */
#define BUS_MODE 0x1000
#define BUS_MODE_EIGHTXPBL (1 << 24) /* Multiplies PBL by 8 */
+#define BUS_MODE_FIXEDBURST (1 << 16)
+#define BUS_MODE_PRIORXTX_SHIFT 14
+#define BUS_MODE_PRIORXTX_41 3
+#define BUS_MODE_PRIORXTX_31 2
+#define BUS_MODE_PRIORXTX_21 1
+#define BUS_MODE_PRIORXTX_11 0
#define BUS_MODE_PBL_SHIFT 8 /* Single block transfer size */
#define BUS_MODE_PBL_BEATS_8 8
-#define BUS_MODE_ATDS (1 << 7) /* Alternate Descriptor Size */
#define BUS_MODE_SWR (1 << 0) /* Reset */
#define TRANSMIT_POLL_DEMAND 0x1004
#define RECEIVE_POLL_DEMAND 0x1008
@@ -220,7 +229,6 @@
#define DMA_STATUS_NIS (1 << 16)
#define DMA_STATUS_AIS (1 << 15)
#define DMA_STATUS_FBI (1 << 13)
-#define DMA_STATUS_RU (1 << 7)
#define DMA_STATUS_RI (1 << 6)
#define DMA_STATUS_TI (1 << 0)
#define DMA_STATUS_INTR_MASK 0x1ffff
@@ -263,3 +271,22 @@
#define CURRENT_HOST_TRANSMIT_BUF_ADDR 0x1050
#define CURRENT_HOST_RECEIVE_BUF_ADDR 0x1054
#define HW_FEATURE 0x1058
+
+#define DWC_GMAC 0x1
+#define DWC_GMAC_ALT_DESC 0x2
+#define GMAC_MII_CLK_60_100M_DIV42 0x0
+#define GMAC_MII_CLK_100_150M_DIV62 0x1
+#define GMAC_MII_CLK_25_35M_DIV16 0x2
+#define GMAC_MII_CLK_35_60M_DIV26 0x3
+#define GMAC_MII_CLK_150_250M_DIV102 0x4
+#define GMAC_MII_CLK_250_300M_DIV124 0x5
+#define GMAC_MII_CLK_DIV4 0x8
+#define GMAC_MII_CLK_DIV6 0x9
+#define GMAC_MII_CLK_DIV8 0xa
+#define GMAC_MII_CLK_DIV10 0xb
+#define GMAC_MII_CLK_DIV12 0xc
+#define GMAC_MII_CLK_DIV14 0xd
+#define GMAC_MII_CLK_DIV16 0xe
+#define GMAC_MII_CLK_DIV18 0xf
+
+#endif /* __IF_DWC_H__ */
diff --git a/freebsd/sys/dev/dwc/if_dwcvar.h b/freebsd/sys/dev/dwc/if_dwcvar.h
new file mode 100644
index 00000000..d9198bda
--- /dev/null
+++ b/freebsd/sys/dev/dwc/if_dwcvar.h
@@ -0,0 +1,99 @@
+/*-
+ * Copyright (c) 2014 Ruslan Bukin <br@bsdpad.com>
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Ethernet media access controller (EMAC)
+ * Chapter 17, Altera Cyclone V Device Handbook (CV-5V2 2014.07.22)
+ *
+ * EMAC is an instance of the Synopsys DesignWare 3504-0
+ * Universal 10/100/1000 Ethernet MAC (DWC_gmac).
+ */
+
+#ifndef __IF_DWCVAR_H__
+#define __IF_DWCVAR_H__
+
+/*
+ * Driver data and defines.
+ */
+#define RX_MAX_PACKET 0x7ff
+#define RX_DESC_COUNT 1024
+#define RX_DESC_SIZE (sizeof(struct dwc_hwdesc) * RX_DESC_COUNT)
+#define TX_DESC_COUNT 1024
+#define TX_DESC_SIZE (sizeof(struct dwc_hwdesc) * TX_DESC_COUNT)
+
+struct dwc_bufmap {
+ bus_dmamap_t map;
+ struct mbuf *mbuf;
+};
+
+struct dwc_softc {
+ struct resource *res[2];
+ bus_space_tag_t bst;
+ bus_space_handle_t bsh;
+ device_t dev;
+ int mactype;
+ int mii_clk;
+ device_t miibus;
+ struct mii_data * mii_softc;
+ struct ifnet *ifp;
+ int if_flags;
+ struct mtx mtx;
+ void * intr_cookie;
+ struct callout dwc_callout;
+ boolean_t link_is_up;
+ boolean_t is_attached;
+ boolean_t is_detaching;
+ int tx_watchdog_count;
+ int stats_harvest_count;
+
+ /* RX */
+ bus_dma_tag_t rxdesc_tag;
+ bus_dmamap_t rxdesc_map;
+ struct dwc_hwdesc *rxdesc_ring;
+ bus_addr_t rxdesc_ring_paddr;
+ bus_dma_tag_t rxbuf_tag;
+ struct dwc_bufmap rxbuf_map[RX_DESC_COUNT];
+ uint32_t rx_idx;
+
+ /* TX */
+ bus_dma_tag_t txdesc_tag;
+ bus_dmamap_t txdesc_map;
+ struct dwc_hwdesc *txdesc_ring;
+ bus_addr_t txdesc_ring_paddr;
+ bus_dma_tag_t txbuf_tag;
+ struct dwc_bufmap txbuf_map[TX_DESC_COUNT];
+ uint32_t tx_idx_head;
+ uint32_t tx_idx_tail;
+ int txcount;
+};
+
+#endif /* __IF_DWCVAR_H__ */
diff --git a/freebsd/sys/dev/e1000/e1000_80003es2lan.c b/freebsd/sys/dev/e1000/e1000_80003es2lan.c
index 62e9fc42..bd666441 100644
--- a/freebsd/sys/dev/e1000/e1000_80003es2lan.c
+++ b/freebsd/sys/dev/e1000/e1000_80003es2lan.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2013, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -853,11 +853,17 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
e1000_release_phy_80003es2lan(hw);
/* Disable IBIST slave mode (far-end loopback) */
- e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
- &kum_reg_data);
- kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
- e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
- kum_reg_data);
+ ret_val = e1000_read_kmrn_reg_80003es2lan(hw,
+ E1000_KMRNCTRLSTA_INBAND_PARAM, &kum_reg_data);
+ if (!ret_val) {
+ kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+ ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+ E1000_KMRNCTRLSTA_INBAND_PARAM,
+ kum_reg_data);
+ if (ret_val)
+ DEBUGOUT("Error disabling far-end loopback\n");
+ } else
+ DEBUGOUT("Error disabling far-end loopback\n");
ret_val = e1000_get_auto_rd_done_generic(hw);
if (ret_val)
@@ -913,11 +919,18 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw)
return ret_val;
/* Disable IBIST slave mode (far-end loopback) */
- e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
- &kum_reg_data);
- kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
- e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
- kum_reg_data);
+ ret_val =
+ e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
+ &kum_reg_data);
+ if (!ret_val) {
+ kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+ ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+ E1000_KMRNCTRLSTA_INBAND_PARAM,
+ kum_reg_data);
+ if (ret_val)
+ DEBUGOUT("Error disabling far-end loopback\n");
+ } else
+ DEBUGOUT("Error disabling far-end loopback\n");
/* Set the transmit descriptor write-back policy */
reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0));
diff --git a/freebsd/sys/dev/e1000/e1000_80003es2lan.h b/freebsd/sys/dev/e1000/e1000_80003es2lan.h
index 3807e463..89b15512 100644
--- a/freebsd/sys/dev/e1000/e1000_80003es2lan.h
+++ b/freebsd/sys/dev/e1000/e1000_80003es2lan.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2013, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_82540.c b/freebsd/sys/dev/e1000/e1000_82540.c
index c769c901..e6311762 100644
--- a/freebsd/sys/dev/e1000/e1000_82540.c
+++ b/freebsd/sys/dev/e1000/e1000_82540.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2011, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -68,7 +68,7 @@ static s32 e1000_read_mac_addr_82540(struct e1000_hw *hw);
static s32 e1000_init_phy_params_82540(struct e1000_hw *hw)
{
struct e1000_phy_info *phy = &hw->phy;
- s32 ret_val = E1000_SUCCESS;
+ s32 ret_val;
phy->addr = 1;
phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
@@ -331,7 +331,7 @@ static s32 e1000_init_hw_82540(struct e1000_hw *hw)
{
struct e1000_mac_info *mac = &hw->mac;
u32 txdctl, ctrl_ext;
- s32 ret_val = E1000_SUCCESS;
+ s32 ret_val;
u16 i;
DEBUGFUNC("e1000_init_hw_82540");
@@ -413,7 +413,7 @@ static s32 e1000_init_hw_82540(struct e1000_hw *hw)
static s32 e1000_setup_copper_link_82540(struct e1000_hw *hw)
{
u32 ctrl;
- s32 ret_val = E1000_SUCCESS;
+ s32 ret_val;
u16 data;
DEBUGFUNC("e1000_setup_copper_link_82540");
@@ -500,7 +500,7 @@ out:
**/
static s32 e1000_adjust_serdes_amplitude_82540(struct e1000_hw *hw)
{
- s32 ret_val = E1000_SUCCESS;
+ s32 ret_val;
u16 nvm_data;
DEBUGFUNC("e1000_adjust_serdes_amplitude_82540");
@@ -530,7 +530,7 @@ out:
**/
static s32 e1000_set_vco_speed_82540(struct e1000_hw *hw)
{
- s32 ret_val = E1000_SUCCESS;
+ s32 ret_val;
u16 default_page = 0;
u16 phy_data;
diff --git a/freebsd/sys/dev/e1000/e1000_82541.c b/freebsd/sys/dev/e1000/e1000_82541.c
index 6bfbb33b..770b3b7b 100644
--- a/freebsd/sys/dev/e1000/e1000_82541.c
+++ b/freebsd/sys/dev/e1000/e1000_82541.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2011, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -51,34 +51,34 @@ static s32 e1000_init_mac_params_82541(struct e1000_hw *hw);
static s32 e1000_reset_hw_82541(struct e1000_hw *hw);
static s32 e1000_init_hw_82541(struct e1000_hw *hw);
static s32 e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed,
- u16 *duplex);
+ u16 *duplex);
static s32 e1000_phy_hw_reset_82541(struct e1000_hw *hw);
static s32 e1000_setup_copper_link_82541(struct e1000_hw *hw);
static s32 e1000_check_for_link_82541(struct e1000_hw *hw);
static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw);
static s32 e1000_set_d3_lplu_state_82541(struct e1000_hw *hw,
- bool active);
+ bool active);
static s32 e1000_setup_led_82541(struct e1000_hw *hw);
static s32 e1000_cleanup_led_82541(struct e1000_hw *hw);
static void e1000_clear_hw_cntrs_82541(struct e1000_hw *hw);
static s32 e1000_read_mac_addr_82541(struct e1000_hw *hw);
static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
- bool link_up);
+ bool link_up);
static s32 e1000_phy_init_script_82541(struct e1000_hw *hw);
static void e1000_power_down_phy_copper_82541(struct e1000_hw *hw);
-static const u16 e1000_igp_cable_length_table[] =
- { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25,
- 25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40,
- 40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60,
- 60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90,
- 90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
- 100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
- 110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120};
+static const u16 e1000_igp_cable_length_table[] = {
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10,
+ 10, 10, 20, 20, 20, 20, 20, 25, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30,
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 50, 50, 50, 50, 50, 50, 50, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80,
+ 80, 90, 90, 90, 90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100,
+ 100, 100, 100, 100, 100, 100, 100, 100, 110, 110, 110, 110, 110, 110,
+ 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 120, 120,
+ 120, 120, 120, 120, 120, 120, 120, 120};
#define IGP01E1000_AGC_LENGTH_TABLE_SIZE \
- (sizeof(e1000_igp_cable_length_table) / \
- sizeof(e1000_igp_cable_length_table[0]))
+ (sizeof(e1000_igp_cable_length_table) / \
+ sizeof(e1000_igp_cable_length_table[0]))
/**
* e1000_init_phy_params_82541 - Init PHY func ptrs.
@@ -87,27 +87,27 @@ static const u16 e1000_igp_cable_length_table[] =
static s32 e1000_init_phy_params_82541(struct e1000_hw *hw)
{
struct e1000_phy_info *phy = &hw->phy;
- s32 ret_val = E1000_SUCCESS;
+ s32 ret_val;
DEBUGFUNC("e1000_init_phy_params_82541");
- phy->addr = 1;
- phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
- phy->reset_delay_us = 10000;
- phy->type = e1000_phy_igp;
+ phy->addr = 1;
+ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+ phy->reset_delay_us = 10000;
+ phy->type = e1000_phy_igp;
/* Function Pointers */
- phy->ops.check_polarity = e1000_check_polarity_igp;
- phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp;
- phy->ops.get_cable_length = e1000_get_cable_length_igp_82541;
- phy->ops.get_cfg_done = e1000_get_cfg_done_generic;
- phy->ops.get_info = e1000_get_phy_info_igp;
- phy->ops.read_reg = e1000_read_phy_reg_igp;
- phy->ops.reset = e1000_phy_hw_reset_82541;
- phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82541;
- phy->ops.write_reg = e1000_write_phy_reg_igp;
- phy->ops.power_up = e1000_power_up_phy_copper;
- phy->ops.power_down = e1000_power_down_phy_copper_82541;
+ phy->ops.check_polarity = e1000_check_polarity_igp;
+ phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp;
+ phy->ops.get_cable_length = e1000_get_cable_length_igp_82541;
+ phy->ops.get_cfg_done = e1000_get_cfg_done_generic;
+ phy->ops.get_info = e1000_get_phy_info_igp;
+ phy->ops.read_reg = e1000_read_phy_reg_igp;
+ phy->ops.reset = e1000_phy_hw_reset_82541;
+ phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82541;
+ phy->ops.write_reg = e1000_write_phy_reg_igp;
+ phy->ops.power_up = e1000_power_up_phy_copper;
+ phy->ops.power_down = e1000_power_down_phy_copper_82541;
ret_val = e1000_get_phy_id(hw);
if (ret_val)
@@ -129,8 +129,8 @@ out:
**/
static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw)
{
- struct e1000_nvm_info *nvm = &hw->nvm;
- s32 ret_val = E1000_SUCCESS;
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ s32 ret_val = E1000_SUCCESS;
u32 eecd = E1000_READ_REG(hw, E1000_EECD);
u16 size;
@@ -154,28 +154,25 @@ static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw)
eecd &= ~E1000_EECD_SIZE;
break;
default:
- nvm->type = eecd & E1000_EECD_TYPE
- ? e1000_nvm_eeprom_spi
- : e1000_nvm_eeprom_microwire;
+ nvm->type = eecd & E1000_EECD_TYPE ? e1000_nvm_eeprom_spi
+ : e1000_nvm_eeprom_microwire;
break;
}
if (nvm->type == e1000_nvm_eeprom_spi) {
- nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS)
- ? 16 : 8;
- nvm->delay_usec = 1;
- nvm->opcode_bits = 8;
- nvm->page_size = (eecd & E1000_EECD_ADDR_BITS)
- ? 32 : 8;
+ nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS) ? 16 : 8;
+ nvm->delay_usec = 1;
+ nvm->opcode_bits = 8;
+ nvm->page_size = (eecd & E1000_EECD_ADDR_BITS) ? 32 : 8;
/* Function Pointers */
- nvm->ops.acquire = e1000_acquire_nvm_generic;
- nvm->ops.read = e1000_read_nvm_spi;
- nvm->ops.release = e1000_release_nvm_generic;
- nvm->ops.update = e1000_update_nvm_checksum_generic;
+ nvm->ops.acquire = e1000_acquire_nvm_generic;
+ nvm->ops.read = e1000_read_nvm_spi;
+ nvm->ops.release = e1000_release_nvm_generic;
+ nvm->ops.update = e1000_update_nvm_checksum_generic;
nvm->ops.valid_led_default = e1000_valid_led_default_generic;
- nvm->ops.validate = e1000_validate_nvm_checksum_generic;
- nvm->ops.write = e1000_write_nvm_spi;
+ nvm->ops.validate = e1000_validate_nvm_checksum_generic;
+ nvm->ops.write = e1000_write_nvm_spi;
/*
* nvm->word_size must be discovered after the pointers
@@ -198,21 +195,19 @@ static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw)
nvm->word_size = 1 << size;
}
} else {
- nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS)
- ? 8 : 6;
- nvm->delay_usec = 50;
- nvm->opcode_bits = 3;
- nvm->word_size = (eecd & E1000_EECD_ADDR_BITS)
- ? 256 : 64;
+ nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS) ? 8 : 6;
+ nvm->delay_usec = 50;
+ nvm->opcode_bits = 3;
+ nvm->word_size = (eecd & E1000_EECD_ADDR_BITS) ? 256 : 64;
/* Function Pointers */
- nvm->ops.acquire = e1000_acquire_nvm_generic;
- nvm->ops.read = e1000_read_nvm_microwire;
- nvm->ops.release = e1000_release_nvm_generic;
- nvm->ops.update = e1000_update_nvm_checksum_generic;
+ nvm->ops.acquire = e1000_acquire_nvm_generic;
+ nvm->ops.read = e1000_read_nvm_microwire;
+ nvm->ops.release = e1000_release_nvm_generic;
+ nvm->ops.update = e1000_update_nvm_checksum_generic;
nvm->ops.valid_led_default = e1000_valid_led_default_generic;
- nvm->ops.validate = e1000_validate_nvm_checksum_generic;
- nvm->ops.write = e1000_write_nvm_microwire;
+ nvm->ops.validate = e1000_validate_nvm_checksum_generic;
+ nvm->ops.write = e1000_write_nvm_microwire;
}
out:
@@ -302,7 +297,7 @@ void e1000_init_function_pointers_82541(struct e1000_hw *hw)
**/
static s32 e1000_reset_hw_82541(struct e1000_hw *hw)
{
- u32 ledctl, ctrl, icr, manc;
+ u32 ledctl, ctrl, manc;
DEBUGFUNC("e1000_reset_hw_82541");
@@ -324,6 +319,7 @@ static s32 e1000_reset_hw_82541(struct e1000_hw *hw)
/* Must reset the Phy before resetting the MAC */
if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) {
E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_PHY_RST));
+ E1000_WRITE_FLUSH(hw);
msec_delay(5);
}
@@ -366,7 +362,7 @@ static s32 e1000_reset_hw_82541(struct e1000_hw *hw)
E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF);
/* Clear any pending interrupt events. */
- icr = E1000_READ_REG(hw, E1000_ICR);
+ E1000_READ_REG(hw, E1000_ICR);
return E1000_SUCCESS;
}
@@ -392,11 +388,10 @@ static s32 e1000_init_hw_82541(struct e1000_hw *hw)
DEBUGOUT("Error initializing identification LED\n");
/* This is not fatal and we should not stop init due to this */
}
-
+
/* Storing the Speed Power Down value for later use */
- ret_val = hw->phy.ops.read_reg(hw,
- IGP01E1000_GMII_FIFO,
- &dev_spec->spd_default);
+ ret_val = hw->phy.ops.read_reg(hw, IGP01E1000_GMII_FIFO,
+ &dev_spec->spd_default);
if (ret_val)
goto out;
@@ -425,7 +420,7 @@ static s32 e1000_init_hw_82541(struct e1000_hw *hw)
txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0));
txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) |
- E1000_TXDCTL_FULL_TX_DESC_WB;
+ E1000_TXDCTL_FULL_TX_DESC_WB;
E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl);
/*
@@ -449,7 +444,7 @@ out:
* Retrieve the current speed and duplex configuration.
**/
static s32 e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed,
- u16 *duplex)
+ u16 *duplex)
{
struct e1000_phy_info *phy = &hw->phy;
s32 ret_val;
@@ -551,6 +546,7 @@ static s32 e1000_setup_copper_link_82541(struct e1000_hw *hw)
ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
/* Earlier revs of the IGP phy require us to force MDI. */
if (hw->mac.type == e1000_82541 || hw->mac.type == e1000_82547) {
dev_spec->dsp_config = e1000_dsp_config_disabled;
@@ -653,9 +649,8 @@ static s32 e1000_check_for_link_82541(struct e1000_hw *hw)
* different link partner.
*/
ret_val = e1000_config_fc_after_link_up_generic(hw);
- if (ret_val) {
+ if (ret_val)
DEBUGOUT("Error configuring flow control\n");
- }
out:
return ret_val;
@@ -673,7 +668,7 @@ out:
* gigabit link is achieved to improve link quality.
**/
static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
- bool link_up)
+ bool link_up)
{
struct e1000_phy_info *phy = &hw->phy;
struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541;
@@ -681,11 +676,11 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
u32 idle_errs = 0;
u16 phy_data, phy_saved_data, speed, duplex, i;
u16 ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20;
- u16 dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] =
- {IGP01E1000_PHY_AGC_PARAM_A,
- IGP01E1000_PHY_AGC_PARAM_B,
- IGP01E1000_PHY_AGC_PARAM_C,
- IGP01E1000_PHY_AGC_PARAM_D};
+ u16 dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = {
+ IGP01E1000_PHY_AGC_PARAM_A,
+ IGP01E1000_PHY_AGC_PARAM_B,
+ IGP01E1000_PHY_AGC_PARAM_C,
+ IGP01E1000_PHY_AGC_PARAM_D};
DEBUGFUNC("e1000_config_dsp_after_link_change_82541");
@@ -710,16 +705,16 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
ret_val = phy->ops.read_reg(hw,
- dsp_reg_array[i],
- &phy_data);
+ dsp_reg_array[i],
+ &phy_data);
if (ret_val)
goto out;
phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX;
ret_val = phy->ops.write_reg(hw,
- dsp_reg_array[i],
- phy_data);
+ dsp_reg_array[i],
+ phy_data);
if (ret_val)
goto out;
}
@@ -739,9 +734,8 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
for (i = 0; i < ffe_idle_err_timeout; i++) {
usec_delay(1000);
- ret_val = phy->ops.read_reg(hw,
- PHY_1000T_STATUS,
- &phy_data);
+ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS,
+ &phy_data);
if (ret_val)
goto out;
@@ -750,8 +744,8 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
dev_spec->ffe_config = e1000_ffe_config_active;
ret_val = phy->ops.write_reg(hw,
- IGP01E1000_PHY_DSP_FFE,
- IGP01E1000_PHY_DSP_FFE_CM_CP);
+ IGP01E1000_PHY_DSP_FFE,
+ IGP01E1000_PHY_DSP_FFE_CM_CP);
if (ret_val)
goto out;
break;
@@ -759,7 +753,7 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
if (idle_errs)
ffe_idle_err_timeout =
- FFE_IDLE_ERR_COUNT_TIMEOUT_100;
+ FFE_IDLE_ERR_COUNT_TIMEOUT_100;
}
} else {
if (dev_spec->dsp_config == e1000_dsp_config_activated) {
@@ -767,9 +761,8 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
* Save off the current value of register 0x2F5B
* to be restored at the end of the routines.
*/
- ret_val = phy->ops.read_reg(hw,
- 0x2F5B,
- &phy_saved_data);
+ ret_val = phy->ops.read_reg(hw, 0x2F5B,
+ &phy_saved_data);
if (ret_val)
goto out;
@@ -780,15 +773,14 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
msec_delay_irq(20);
- ret_val = phy->ops.write_reg(hw,
- 0x0000,
- IGP01E1000_IEEE_FORCE_GIG);
+ ret_val = phy->ops.write_reg(hw, 0x0000,
+ IGP01E1000_IEEE_FORCE_GIG);
if (ret_val)
goto out;
for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
ret_val = phy->ops.read_reg(hw,
- dsp_reg_array[i],
- &phy_data);
+ dsp_reg_array[i],
+ &phy_data);
if (ret_val)
goto out;
@@ -796,24 +788,22 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
phy_data |= IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS;
ret_val = phy->ops.write_reg(hw,
- dsp_reg_array[i],
- phy_data);
+ dsp_reg_array[i],
+ phy_data);
if (ret_val)
goto out;
}
- ret_val = phy->ops.write_reg(hw,
- 0x0000,
- IGP01E1000_IEEE_RESTART_AUTONEG);
+ ret_val = phy->ops.write_reg(hw, 0x0000,
+ IGP01E1000_IEEE_RESTART_AUTONEG);
if (ret_val)
goto out;
msec_delay_irq(20);
/* Now enable the transmitter */
- ret_val = phy->ops.write_reg(hw,
- 0x2F5B,
- phy_saved_data);
+ ret_val = phy->ops.write_reg(hw, 0x2F5B,
+ phy_saved_data);
if (ret_val)
goto out;
@@ -840,21 +830,18 @@ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
msec_delay_irq(20);
- ret_val = phy->ops.write_reg(hw,
- 0x0000,
- IGP01E1000_IEEE_FORCE_GIG);
+ ret_val = phy->ops.write_reg(hw, 0x0000,
+ IGP01E1000_IEEE_FORCE_GIG);
if (ret_val)
goto out;
- ret_val = phy->ops.write_reg(hw,
- IGP01E1000_PHY_DSP_FFE,
- IGP01E1000_PHY_DSP_FFE_DEFAULT);
+ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_DSP_FFE,
+ IGP01E1000_PHY_DSP_FFE_DEFAULT);
if (ret_val)
goto out;
- ret_val = phy->ops.write_reg(hw,
- 0x0000,
- IGP01E1000_IEEE_RESTART_AUTONEG);
+ ret_val = phy->ops.write_reg(hw, 0x0000,
+ IGP01E1000_IEEE_RESTART_AUTONEG);
if (ret_val)
goto out;
@@ -891,11 +878,10 @@ static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw)
u16 i, data;
u16 cur_agc_value, agc_value = 0;
u16 min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE;
- u16 agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] =
- {IGP01E1000_PHY_AGC_A,
- IGP01E1000_PHY_AGC_B,
- IGP01E1000_PHY_AGC_C,
- IGP01E1000_PHY_AGC_D};
+ u16 agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = {IGP01E1000_PHY_AGC_A,
+ IGP01E1000_PHY_AGC_B,
+ IGP01E1000_PHY_AGC_C,
+ IGP01E1000_PHY_AGC_D};
DEBUGFUNC("e1000_get_cable_length_igp_82541");
@@ -931,12 +917,12 @@ static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw)
}
phy->min_cable_length = (e1000_igp_cable_length_table[agc_value] >
- IGP01E1000_AGC_RANGE)
- ? (e1000_igp_cable_length_table[agc_value] -
- IGP01E1000_AGC_RANGE)
- : 0;
+ IGP01E1000_AGC_RANGE)
+ ? (e1000_igp_cable_length_table[agc_value] -
+ IGP01E1000_AGC_RANGE)
+ : 0;
phy->max_cable_length = e1000_igp_cable_length_table[agc_value] +
- IGP01E1000_AGC_RANGE;
+ IGP01E1000_AGC_RANGE;
phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
@@ -994,50 +980,48 @@ static s32 e1000_set_d3_lplu_state_82541(struct e1000_hw *hw, bool active)
*/
if (phy->smart_speed == e1000_smart_speed_on) {
ret_val = phy->ops.read_reg(hw,
- IGP01E1000_PHY_PORT_CONFIG,
- &data);
+ IGP01E1000_PHY_PORT_CONFIG,
+ &data);
if (ret_val)
goto out;
data |= IGP01E1000_PSCFR_SMART_SPEED;
ret_val = phy->ops.write_reg(hw,
- IGP01E1000_PHY_PORT_CONFIG,
- data);
+ IGP01E1000_PHY_PORT_CONFIG,
+ data);
if (ret_val)
goto out;
} else if (phy->smart_speed == e1000_smart_speed_off) {
ret_val = phy->ops.read_reg(hw,
- IGP01E1000_PHY_PORT_CONFIG,
- &data);
+ IGP01E1000_PHY_PORT_CONFIG,
+ &data);
if (ret_val)
goto out;
data &= ~IGP01E1000_PSCFR_SMART_SPEED;
ret_val = phy->ops.write_reg(hw,
- IGP01E1000_PHY_PORT_CONFIG,
- data);
+ IGP01E1000_PHY_PORT_CONFIG,
+ data);
if (ret_val)
goto out;
}
} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
- (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
- (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+ (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+ (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
data |= IGP01E1000_GMII_FLEX_SPD;
ret_val = phy->ops.write_reg(hw, IGP01E1000_GMII_FIFO, data);
if (ret_val)
goto out;
/* When LPLU is enabled, we should disable SmartSpeed */
- ret_val = phy->ops.read_reg(hw,
- IGP01E1000_PHY_PORT_CONFIG,
- &data);
+ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+ &data);
if (ret_val)
goto out;
data &= ~IGP01E1000_PSCFR_SMART_SPEED;
- ret_val = phy->ops.write_reg(hw,
- IGP01E1000_PHY_PORT_CONFIG,
- data);
+ ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+ data);
}
out:
@@ -1058,16 +1042,14 @@ static s32 e1000_setup_led_82541(struct e1000_hw *hw)
DEBUGFUNC("e1000_setup_led_82541");
- ret_val = hw->phy.ops.read_reg(hw,
- IGP01E1000_GMII_FIFO,
- &dev_spec->spd_default);
+ ret_val = hw->phy.ops.read_reg(hw, IGP01E1000_GMII_FIFO,
+ &dev_spec->spd_default);
if (ret_val)
goto out;
- ret_val = hw->phy.ops.write_reg(hw,
- IGP01E1000_GMII_FIFO,
- (u16)(dev_spec->spd_default &
- ~IGP01E1000_GMII_SPD));
+ ret_val = hw->phy.ops.write_reg(hw, IGP01E1000_GMII_FIFO,
+ (u16)(dev_spec->spd_default &
+ ~IGP01E1000_GMII_SPD));
if (ret_val)
goto out;
@@ -1091,9 +1073,8 @@ static s32 e1000_cleanup_led_82541(struct e1000_hw *hw)
DEBUGFUNC("e1000_cleanup_led_82541");
- ret_val = hw->phy.ops.write_reg(hw,
- IGP01E1000_GMII_FIFO,
- dev_spec->spd_default);
+ ret_val = hw->phy.ops.write_reg(hw, IGP01E1000_GMII_FIFO,
+ dev_spec->spd_default);
if (ret_val)
goto out;
@@ -1180,14 +1161,12 @@ static s32 e1000_phy_init_script_82541(struct e1000_hw *hw)
u16 fused, fine, coarse;
/* Move to analog registers page */
- hw->phy.ops.read_reg(hw,
- IGP01E1000_ANALOG_SPARE_FUSE_STATUS,
- &fused);
+ hw->phy.ops.read_reg(hw, IGP01E1000_ANALOG_SPARE_FUSE_STATUS,
+ &fused);
if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) {
- hw->phy.ops.read_reg(hw,
- IGP01E1000_ANALOG_FUSE_STATUS,
- &fused);
+ hw->phy.ops.read_reg(hw, IGP01E1000_ANALOG_FUSE_STATUS,
+ &fused);
fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK;
coarse = fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK;
@@ -1196,19 +1175,19 @@ static s32 e1000_phy_init_script_82541(struct e1000_hw *hw)
coarse -= IGP01E1000_ANALOG_FUSE_COARSE_10;
fine -= IGP01E1000_ANALOG_FUSE_FINE_1;
} else if (coarse ==
- IGP01E1000_ANALOG_FUSE_COARSE_THRESH)
+ IGP01E1000_ANALOG_FUSE_COARSE_THRESH)
fine -= IGP01E1000_ANALOG_FUSE_FINE_10;
fused = (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) |
- (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) |
- (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK);
+ (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) |
+ (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK);
hw->phy.ops.write_reg(hw,
- IGP01E1000_ANALOG_FUSE_CONTROL,
- fused);
+ IGP01E1000_ANALOG_FUSE_CONTROL,
+ fused);
hw->phy.ops.write_reg(hw,
- IGP01E1000_ANALOG_FUSE_BYPASS,
- IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL);
+ IGP01E1000_ANALOG_FUSE_BYPASS,
+ IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL);
}
}
diff --git a/freebsd/sys/dev/e1000/e1000_82541.h b/freebsd/sys/dev/e1000/e1000_82541.h
index 3b6b9612..7a01fd43 100644
--- a/freebsd/sys/dev/e1000/e1000_82541.h
+++ b/freebsd/sys/dev/e1000/e1000_82541.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2008, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -37,55 +37,55 @@
#define NVM_WORD_SIZE_BASE_SHIFT_82541 (NVM_WORD_SIZE_BASE_SHIFT + 1)
-#define IGP01E1000_PHY_CHANNEL_NUM 4
+#define IGP01E1000_PHY_CHANNEL_NUM 4
-#define IGP01E1000_PHY_AGC_A 0x1172
-#define IGP01E1000_PHY_AGC_B 0x1272
-#define IGP01E1000_PHY_AGC_C 0x1472
-#define IGP01E1000_PHY_AGC_D 0x1872
+#define IGP01E1000_PHY_AGC_A 0x1172
+#define IGP01E1000_PHY_AGC_B 0x1272
+#define IGP01E1000_PHY_AGC_C 0x1472
+#define IGP01E1000_PHY_AGC_D 0x1872
-#define IGP01E1000_PHY_AGC_PARAM_A 0x1171
-#define IGP01E1000_PHY_AGC_PARAM_B 0x1271
-#define IGP01E1000_PHY_AGC_PARAM_C 0x1471
-#define IGP01E1000_PHY_AGC_PARAM_D 0x1871
+#define IGP01E1000_PHY_AGC_PARAM_A 0x1171
+#define IGP01E1000_PHY_AGC_PARAM_B 0x1271
+#define IGP01E1000_PHY_AGC_PARAM_C 0x1471
+#define IGP01E1000_PHY_AGC_PARAM_D 0x1871
-#define IGP01E1000_PHY_EDAC_MU_INDEX 0xC000
-#define IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS 0x8000
+#define IGP01E1000_PHY_EDAC_MU_INDEX 0xC000
+#define IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS 0x8000
-#define IGP01E1000_PHY_DSP_RESET 0x1F33
+#define IGP01E1000_PHY_DSP_RESET 0x1F33
-#define IGP01E1000_PHY_DSP_FFE 0x1F35
-#define IGP01E1000_PHY_DSP_FFE_CM_CP 0x0069
-#define IGP01E1000_PHY_DSP_FFE_DEFAULT 0x002A
+#define IGP01E1000_PHY_DSP_FFE 0x1F35
+#define IGP01E1000_PHY_DSP_FFE_CM_CP 0x0069
+#define IGP01E1000_PHY_DSP_FFE_DEFAULT 0x002A
-#define IGP01E1000_IEEE_FORCE_GIG 0x0140
-#define IGP01E1000_IEEE_RESTART_AUTONEG 0x3300
+#define IGP01E1000_IEEE_FORCE_GIG 0x0140
+#define IGP01E1000_IEEE_RESTART_AUTONEG 0x3300
-#define IGP01E1000_AGC_LENGTH_SHIFT 7
-#define IGP01E1000_AGC_RANGE 10
+#define IGP01E1000_AGC_LENGTH_SHIFT 7
+#define IGP01E1000_AGC_RANGE 10
-#define FFE_IDLE_ERR_COUNT_TIMEOUT_20 20
-#define FFE_IDLE_ERR_COUNT_TIMEOUT_100 100
+#define FFE_IDLE_ERR_COUNT_TIMEOUT_20 20
+#define FFE_IDLE_ERR_COUNT_TIMEOUT_100 100
-#define IGP01E1000_ANALOG_FUSE_STATUS 0x20D0
-#define IGP01E1000_ANALOG_SPARE_FUSE_STATUS 0x20D1
-#define IGP01E1000_ANALOG_FUSE_CONTROL 0x20DC
-#define IGP01E1000_ANALOG_FUSE_BYPASS 0x20DE
+#define IGP01E1000_ANALOG_FUSE_STATUS 0x20D0
+#define IGP01E1000_ANALOG_SPARE_FUSE_STATUS 0x20D1
+#define IGP01E1000_ANALOG_FUSE_CONTROL 0x20DC
+#define IGP01E1000_ANALOG_FUSE_BYPASS 0x20DE
-#define IGP01E1000_ANALOG_SPARE_FUSE_ENABLED 0x0100
-#define IGP01E1000_ANALOG_FUSE_FINE_MASK 0x0F80
-#define IGP01E1000_ANALOG_FUSE_COARSE_MASK 0x0070
-#define IGP01E1000_ANALOG_FUSE_COARSE_THRESH 0x0040
-#define IGP01E1000_ANALOG_FUSE_COARSE_10 0x0010
-#define IGP01E1000_ANALOG_FUSE_FINE_1 0x0080
-#define IGP01E1000_ANALOG_FUSE_FINE_10 0x0500
-#define IGP01E1000_ANALOG_FUSE_POLY_MASK 0xF000
+#define IGP01E1000_ANALOG_SPARE_FUSE_ENABLED 0x0100
+#define IGP01E1000_ANALOG_FUSE_FINE_MASK 0x0F80
+#define IGP01E1000_ANALOG_FUSE_COARSE_MASK 0x0070
+#define IGP01E1000_ANALOG_FUSE_COARSE_THRESH 0x0040
+#define IGP01E1000_ANALOG_FUSE_COARSE_10 0x0010
+#define IGP01E1000_ANALOG_FUSE_FINE_1 0x0080
+#define IGP01E1000_ANALOG_FUSE_FINE_10 0x0500
+#define IGP01E1000_ANALOG_FUSE_POLY_MASK 0xF000
#define IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL 0x0002
-#define IGP01E1000_MSE_CHANNEL_D 0x000F
-#define IGP01E1000_MSE_CHANNEL_C 0x00F0
-#define IGP01E1000_MSE_CHANNEL_B 0x0F00
-#define IGP01E1000_MSE_CHANNEL_A 0xF000
+#define IGP01E1000_MSE_CHANNEL_D 0x000F
+#define IGP01E1000_MSE_CHANNEL_C 0x00F0
+#define IGP01E1000_MSE_CHANNEL_B 0x0F00
+#define IGP01E1000_MSE_CHANNEL_A 0xF000
void e1000_init_script_state_82541(struct e1000_hw *hw, bool state);
diff --git a/freebsd/sys/dev/e1000/e1000_82542.c b/freebsd/sys/dev/e1000/e1000_82542.c
index 95323394..a21fec73 100644
--- a/freebsd/sys/dev/e1000/e1000_82542.c
+++ b/freebsd/sys/dev/e1000/e1000_82542.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -64,7 +64,7 @@ static s32 e1000_init_phy_params_82542(struct e1000_hw *hw)
DEBUGFUNC("e1000_init_phy_params_82542");
- phy->type = e1000_phy_none;
+ phy->type = e1000_phy_none;
return ret_val;
}
@@ -79,18 +79,18 @@ static s32 e1000_init_nvm_params_82542(struct e1000_hw *hw)
DEBUGFUNC("e1000_init_nvm_params_82542");
- nvm->address_bits = 6;
- nvm->delay_usec = 50;
- nvm->opcode_bits = 3;
- nvm->type = e1000_nvm_eeprom_microwire;
- nvm->word_size = 64;
+ nvm->address_bits = 6;
+ nvm->delay_usec = 50;
+ nvm->opcode_bits = 3;
+ nvm->type = e1000_nvm_eeprom_microwire;
+ nvm->word_size = 64;
/* Function Pointers */
- nvm->ops.read = e1000_read_nvm_microwire;
- nvm->ops.release = e1000_stop_nvm;
- nvm->ops.write = e1000_write_nvm_microwire;
- nvm->ops.update = e1000_update_nvm_checksum_generic;
- nvm->ops.validate = e1000_validate_nvm_checksum_generic;
+ nvm->ops.read = e1000_read_nvm_microwire;
+ nvm->ops.release = e1000_stop_nvm;
+ nvm->ops.write = e1000_write_nvm_microwire;
+ nvm->ops.update = e1000_update_nvm_checksum_generic;
+ nvm->ops.validate = e1000_validate_nvm_checksum_generic;
return E1000_SUCCESS;
}
@@ -126,7 +126,8 @@ static s32 e1000_init_mac_params_82542(struct e1000_hw *hw)
/* link setup */
mac->ops.setup_link = e1000_setup_link_82542;
/* phy/fiber/serdes setup */
- mac->ops.setup_physical_interface = e1000_setup_fiber_serdes_link_generic;
+ mac->ops.setup_physical_interface =
+ e1000_setup_fiber_serdes_link_generic;
/* check for link */
mac->ops.check_for_link = e1000_check_for_fiber_link_generic;
/* multicast address update */
@@ -145,7 +146,8 @@ static s32 e1000_init_mac_params_82542(struct e1000_hw *hw)
/* clear hardware counters */
mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82542;
/* link info */
- mac->ops.get_link_up_info = e1000_get_speed_and_duplex_fiber_serdes_generic;
+ mac->ops.get_link_up_info =
+ e1000_get_speed_and_duplex_fiber_serdes_generic;
return E1000_SUCCESS;
}
@@ -317,7 +319,7 @@ static s32 e1000_init_hw_82542(struct e1000_hw *hw)
static s32 e1000_setup_link_82542(struct e1000_hw *hw)
{
struct e1000_mac_info *mac = &hw->mac;
- s32 ret_val = E1000_SUCCESS;
+ s32 ret_val;
DEBUGFUNC("e1000_setup_link_82542");
@@ -327,7 +329,7 @@ static s32 e1000_setup_link_82542(struct e1000_hw *hw)
hw->fc.requested_mode &= ~e1000_fc_tx_pause;
- if (mac->report_tx_early == 1)
+ if (mac->report_tx_early)
hw->fc.requested_mode &= ~e1000_fc_rx_pause;
/*
@@ -337,7 +339,7 @@ static s32 e1000_setup_link_82542(struct e1000_hw *hw)
hw->fc.current_mode = hw->fc.requested_mode;
DEBUGOUT1("After fix-ups FlowControl is now = %x\n",
- hw->fc.current_mode);
+ hw->fc.current_mode);
/* Call the necessary subroutine to configure the link. */
ret_val = mac->ops.setup_physical_interface(hw);
@@ -421,9 +423,8 @@ static int e1000_rar_set_82542(struct e1000_hw *hw, u8 *addr, u32 index)
* HW expects these in little endian so we reverse the byte order
* from network order (big endian) to little endian
*/
- rar_low = ((u32) addr[0] |
- ((u32) addr[1] << 8) |
- ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
+ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
+ ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
@@ -433,6 +434,7 @@ static int e1000_rar_set_82542(struct e1000_hw *hw, u8 *addr, u32 index)
E1000_WRITE_REG_ARRAY(hw, E1000_RA, (index << 1), rar_low);
E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((index << 1) + 1), rar_high);
+
return E1000_SUCCESS;
}
@@ -565,7 +567,7 @@ static void e1000_clear_hw_cntrs_82542(struct e1000_hw *hw)
*
* Reads the device MAC address from the EEPROM and stores the value.
**/
-static s32 e1000_read_mac_addr_82542(struct e1000_hw *hw)
+s32 e1000_read_mac_addr_82542(struct e1000_hw *hw)
{
s32 ret_val = E1000_SUCCESS;
u16 offset, nvm_data, i;
diff --git a/freebsd/sys/dev/e1000/e1000_82543.c b/freebsd/sys/dev/e1000/e1000_82543.c
index 47cf69c4..4930fc95 100644
--- a/freebsd/sys/dev/e1000/e1000_82543.c
+++ b/freebsd/sys/dev/e1000/e1000_82543.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2011, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -49,9 +49,9 @@ static s32 e1000_init_phy_params_82543(struct e1000_hw *hw);
static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw);
static s32 e1000_init_mac_params_82543(struct e1000_hw *hw);
static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset,
- u16 *data);
+ u16 *data);
static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset,
- u16 data);
+ u16 data);
static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw);
static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw);
static s32 e1000_reset_hw_82543(struct e1000_hw *hw);
@@ -64,7 +64,7 @@ static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw);
static s32 e1000_led_on_82543(struct e1000_hw *hw);
static s32 e1000_led_off_82543(struct e1000_hw *hw);
static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset,
- u32 value);
+ u32 value);
static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw);
static s32 e1000_config_mac_to_phy_82543(struct e1000_hw *hw);
static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw);
@@ -73,7 +73,7 @@ static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw);
static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl);
static u16 e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw);
static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
- u16 count);
+ u16 count);
static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw);
static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state);
static s32 e1000_read_mac_addr_82543(struct e1000_hw *hw);
@@ -91,34 +91,34 @@ static s32 e1000_init_phy_params_82543(struct e1000_hw *hw)
DEBUGFUNC("e1000_init_phy_params_82543");
if (hw->phy.media_type != e1000_media_type_copper) {
- phy->type = e1000_phy_none;
+ phy->type = e1000_phy_none;
goto out;
} else {
- phy->ops.power_up = e1000_power_up_phy_copper;
- phy->ops.power_down = e1000_power_down_phy_copper;
+ phy->ops.power_up = e1000_power_up_phy_copper;
+ phy->ops.power_down = e1000_power_down_phy_copper;
}
- phy->addr = 1;
- phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
- phy->reset_delay_us = 10000;
- phy->type = e1000_phy_m88;
+ phy->addr = 1;
+ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+ phy->reset_delay_us = 10000;
+ phy->type = e1000_phy_m88;
/* Function Pointers */
- phy->ops.check_polarity = e1000_check_polarity_m88;
- phy->ops.commit = e1000_phy_sw_reset_generic;
- phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82543;
- phy->ops.get_cable_length = e1000_get_cable_length_m88;
- phy->ops.get_cfg_done = e1000_get_cfg_done_generic;
- phy->ops.read_reg = (hw->mac.type == e1000_82543)
- ? e1000_read_phy_reg_82543
- : e1000_read_phy_reg_m88;
- phy->ops.reset = (hw->mac.type == e1000_82543)
- ? e1000_phy_hw_reset_82543
- : e1000_phy_hw_reset_generic;
- phy->ops.write_reg = (hw->mac.type == e1000_82543)
- ? e1000_write_phy_reg_82543
- : e1000_write_phy_reg_m88;
- phy->ops.get_info = e1000_get_phy_info_m88;
+ phy->ops.check_polarity = e1000_check_polarity_m88;
+ phy->ops.commit = e1000_phy_sw_reset_generic;
+ phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82543;
+ phy->ops.get_cable_length = e1000_get_cable_length_m88;
+ phy->ops.get_cfg_done = e1000_get_cfg_done_generic;
+ phy->ops.read_reg = (hw->mac.type == e1000_82543)
+ ? e1000_read_phy_reg_82543
+ : e1000_read_phy_reg_m88;
+ phy->ops.reset = (hw->mac.type == e1000_82543)
+ ? e1000_phy_hw_reset_82543
+ : e1000_phy_hw_reset_generic;
+ phy->ops.write_reg = (hw->mac.type == e1000_82543)
+ ? e1000_write_phy_reg_82543
+ : e1000_write_phy_reg_m88;
+ phy->ops.get_info = e1000_get_phy_info_m88;
/*
* The external PHY of the 82543 can be in a funky state.
@@ -172,18 +172,18 @@ static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw)
DEBUGFUNC("e1000_init_nvm_params_82543");
- nvm->type = e1000_nvm_eeprom_microwire;
- nvm->word_size = 64;
- nvm->delay_usec = 50;
- nvm->address_bits = 6;
- nvm->opcode_bits = 3;
+ nvm->type = e1000_nvm_eeprom_microwire;
+ nvm->word_size = 64;
+ nvm->delay_usec = 50;
+ nvm->address_bits = 6;
+ nvm->opcode_bits = 3;
/* Function Pointers */
- nvm->ops.read = e1000_read_nvm_microwire;
- nvm->ops.update = e1000_update_nvm_checksum_generic;
+ nvm->ops.read = e1000_read_nvm_microwire;
+ nvm->ops.update = e1000_update_nvm_checksum_generic;
nvm->ops.valid_led_default = e1000_valid_led_default_generic;
- nvm->ops.validate = e1000_validate_nvm_checksum_generic;
- nvm->ops.write = e1000_write_nvm_microwire;
+ nvm->ops.validate = e1000_validate_nvm_checksum_generic;
+ nvm->ops.write = e1000_write_nvm_microwire;
return E1000_SUCCESS;
}
@@ -228,19 +228,18 @@ static s32 e1000_init_mac_params_82543(struct e1000_hw *hw)
mac->ops.setup_link = e1000_setup_link_82543;
/* physical interface setup */
mac->ops.setup_physical_interface =
- (hw->phy.media_type == e1000_media_type_copper)
- ? e1000_setup_copper_link_82543
- : e1000_setup_fiber_link_82543;
+ (hw->phy.media_type == e1000_media_type_copper)
+ ? e1000_setup_copper_link_82543 : e1000_setup_fiber_link_82543;
/* check for link */
mac->ops.check_for_link =
- (hw->phy.media_type == e1000_media_type_copper)
- ? e1000_check_for_copper_link_82543
- : e1000_check_for_fiber_link_82543;
+ (hw->phy.media_type == e1000_media_type_copper)
+ ? e1000_check_for_copper_link_82543
+ : e1000_check_for_fiber_link_82543;
/* link info */
mac->ops.get_link_up_info =
- (hw->phy.media_type == e1000_media_type_copper)
- ? e1000_get_speed_and_duplex_copper_generic
- : e1000_get_speed_and_duplex_fiber_serdes_generic;
+ (hw->phy.media_type == e1000_media_type_copper)
+ ? e1000_get_speed_and_duplex_copper_generic
+ : e1000_get_speed_and_duplex_fiber_serdes_generic;
/* multicast address update */
mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic;
/* writing VFTA */
@@ -297,8 +296,7 @@ static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw)
goto out;
}
- state = (dev_spec->tbi_compatibility & TBI_COMPAT_ENABLED)
- ? TRUE : FALSE;
+ state = !!(dev_spec->tbi_compatibility & TBI_COMPAT_ENABLED);
out:
return state;
@@ -350,8 +348,7 @@ bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw)
goto out;
}
- state = (dev_spec->tbi_compatibility & TBI_SBP_ENABLED)
- ? TRUE : FALSE;
+ state = !!(dev_spec->tbi_compatibility & TBI_SBP_ENABLED);
out:
return state;
@@ -414,8 +411,8 @@ out:
* Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
**/
void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw,
- struct e1000_hw_stats *stats, u32 frame_len,
- u8 *mac_addr, u32 max_frame_size)
+ struct e1000_hw_stats *stats, u32 frame_len,
+ u8 *mac_addr, u32 max_frame_size)
{
if (!(e1000_tbi_sbp_enabled_82543(hw)))
goto out;
@@ -427,12 +424,12 @@ void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw,
* counters overcount this packet as a CRC error and undercount
* the packet as a good packet
*/
- /* This packet should not be counted as a CRC error. */
+ /* This packet should not be counted as a CRC error. */
stats->crcerrs--;
- /* This packet does count as a Good Packet Received. */
+ /* This packet does count as a Good Packet Received. */
stats->gprc++;
- /* Adjust the Good Octets received counters */
+ /* Adjust the Good Octets received counters */
stats->gorc += frame_len;
/*
@@ -448,7 +445,7 @@ void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw,
stats->mprc++;
/*
- * In this case, the hardware has overcounted the number of
+ * In this case, the hardware has over counted the number of
* oversize frames.
*/
if ((frame_len == max_frame_size) && (stats->roc > 0))
@@ -515,7 +512,7 @@ static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 *data)
* e1000_shift_out_mdi_bits routine five different times. The format
* of an MII read instruction consists of a shift out of 14 bits and
* is defined as follows:
- * <Preamble><SOF><Op Code><Phy Addr><Offset>
+ * <Preamble><SOF><Op Code><Phy Addr><Offset>
* followed by a shift in of 18 bits. This first two bits shifted in
* are TurnAround bits used to avoid contention on the MDIO pin when a
* READ operation is performed. These two bits are thrown away
@@ -574,9 +571,9 @@ static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 data)
* <Preamble><SOF><Op Code><Phy Addr><Reg Addr><Turnaround><Data>.
*/
mdic = ((PHY_TURNAROUND) | (offset << 2) | (hw->phy.addr << 7) |
- (PHY_OP_WRITE << 12) | (PHY_SOF << 14));
+ (PHY_OP_WRITE << 12) | (PHY_SOF << 14));
mdic <<= 16;
- mdic |= (u32) data;
+ mdic |= (u32)data;
e1000_shift_out_mdi_bits_82543(hw, mdic, 32);
@@ -633,7 +630,7 @@ static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl)
* In order to do this, "data" must be broken down into bits.
**/
static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
- u16 count)
+ u16 count)
{
u32 ctrl, mask;
@@ -644,7 +641,7 @@ static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
* into bits.
*/
mask = 0x01;
- mask <<= (count -1);
+ mask <<= (count - 1);
ctrl = E1000_READ_REG(hw, E1000_CTRL);
@@ -658,8 +655,10 @@ static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
* A "0" is shifted out to the PHY by setting the MDIO bit to
* "0" and then raising and lowering the clock.
*/
- if (data & mask) ctrl |= E1000_CTRL_MDIO;
- else ctrl &= ~E1000_CTRL_MDIO;
+ if (data & mask)
+ ctrl |= E1000_CTRL_MDIO;
+ else
+ ctrl &= ~E1000_CTRL_MDIO;
E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
E1000_WRITE_FLUSH(hw);
@@ -751,8 +750,8 @@ static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw)
if (ret_val)
goto out;
- if (!hw->mac.autoneg &&
- (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED))
+ if (!hw->mac.autoneg && (hw->mac.forced_speed_duplex &
+ E1000_ALL_10_SPEED))
ret_val = e1000_polarity_reversal_workaround_82543(hw);
out:
@@ -810,7 +809,7 @@ static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw)
if (ret_val)
goto out;
- if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0)
+ if (!(mii_status_reg & ~MII_SR_LINK_STATUS))
break;
msec_delay_irq(100);
}
@@ -903,7 +902,7 @@ static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw)
**/
static s32 e1000_reset_hw_82543(struct e1000_hw *hw)
{
- u32 ctrl, icr;
+ u32 ctrl;
s32 ret_val = E1000_SUCCESS;
DEBUGFUNC("e1000_reset_hw_82543");
@@ -945,7 +944,7 @@ static s32 e1000_reset_hw_82543(struct e1000_hw *hw)
/* Masking off and clearing any pending interrupts */
E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
- icr = E1000_READ_REG(hw, E1000_ICR);
+ E1000_READ_REG(hw, E1000_ICR);
return ret_val;
}
@@ -1042,7 +1041,7 @@ static s32 e1000_setup_link_82543(struct e1000_hw *hw)
goto out;
}
ctrl_ext = ((data & NVM_WORD0F_SWPDIO_EXT_MASK) <<
- NVM_SWDPIO_EXT_SHIFT);
+ NVM_SWDPIO_EXT_SHIFT);
E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
}
@@ -1116,10 +1115,8 @@ static s32 e1000_setup_copper_link_82543(struct e1000_hw *hw)
* Check link status. Wait up to 100 microseconds for link to become
* valid.
*/
- ret_val = e1000_phy_has_link_generic(hw,
- COPPER_LINK_UP_LIMIT,
- 10,
- &link);
+ ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10,
+ &link);
if (ret_val)
goto out;
@@ -1179,11 +1176,10 @@ static s32 e1000_setup_fiber_link_82543(struct e1000_hw *hw)
* optics detect a signal. If we have a signal, then poll for a
* "Link-Up" indication.
*/
- if (!(E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) {
+ if (!(E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1))
ret_val = e1000_poll_fiber_serdes_link_generic(hw);
- } else {
+ else
DEBUGOUT("No signal detected\n");
- }
out:
return ret_val;
@@ -1277,9 +1273,8 @@ static s32 e1000_check_for_copper_link_82543(struct e1000_hw *hw)
* different link partner.
*/
ret_val = e1000_config_fc_after_link_up_generic(hw);
- if (ret_val) {
+ if (ret_val)
DEBUGOUT("Error configuring flow control\n");
- }
/*
* At this point we know that we are on copper and we have
@@ -1361,8 +1356,8 @@ static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw)
if ((!(ctrl & E1000_CTRL_SWDPIN1)) &&
(!(status & E1000_STATUS_LU)) &&
(!(rxcw & E1000_RXCW_C))) {
- if (mac->autoneg_failed == 0) {
- mac->autoneg_failed = 1;
+ if (!mac->autoneg_failed) {
+ mac->autoneg_failed = TRUE;
ret_val = 0;
goto out;
}
diff --git a/freebsd/sys/dev/e1000/e1000_82543.h b/freebsd/sys/dev/e1000/e1000_82543.h
index 60e5c15d..e8edda5d 100644
--- a/freebsd/sys/dev/e1000/e1000_82543.h
+++ b/freebsd/sys/dev/e1000/e1000_82543.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2008, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -35,23 +35,23 @@
#ifndef _E1000_82543_H_
#define _E1000_82543_H_
-#define PHY_PREAMBLE 0xFFFFFFFF
-#define PHY_PREAMBLE_SIZE 32
-#define PHY_SOF 0x1
-#define PHY_OP_READ 0x2
-#define PHY_OP_WRITE 0x1
-#define PHY_TURNAROUND 0x2
+#define PHY_PREAMBLE 0xFFFFFFFF
+#define PHY_PREAMBLE_SIZE 32
+#define PHY_SOF 0x1
+#define PHY_OP_READ 0x2
+#define PHY_OP_WRITE 0x1
+#define PHY_TURNAROUND 0x2
-#define TBI_COMPAT_ENABLED 0x1 /* Global "knob" for the workaround */
+#define TBI_COMPAT_ENABLED 0x1 /* Global "knob" for the workaround */
/* If TBI_COMPAT_ENABLED, then this is the current state (on/off) */
-#define TBI_SBP_ENABLED 0x2
-
+#define TBI_SBP_ENABLED 0x2
+
void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw,
- struct e1000_hw_stats *stats,
- u32 frame_len, u8 *mac_addr,
- u32 max_frame_size);
+ struct e1000_hw_stats *stats,
+ u32 frame_len, u8 *mac_addr,
+ u32 max_frame_size);
void e1000_set_tbi_compatibility_82543(struct e1000_hw *hw,
- bool state);
+ bool state);
bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw);
#endif
diff --git a/freebsd/sys/dev/e1000/e1000_82571.c b/freebsd/sys/dev/e1000/e1000_82571.c
index dadd3a82..2b72489a 100644
--- a/freebsd/sys/dev/e1000/e1000_82571.c
+++ b/freebsd/sys/dev/e1000/e1000_82571.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -398,7 +398,7 @@ static s32 e1000_init_mac_params_82571(struct e1000_hw *hw)
}
/* Ensure that the inter-port SWSM.SMBI lock bit is clear before
- * first NVM or PHY acess. This should be done for single-port
+ * first NVM or PHY access. This should be done for single-port
* devices, and for one port only on dual-port devices so that
* for those devices we can still use the SMBI lock to synchronize
* inter-port accesses to the PHY & NVM.
diff --git a/freebsd/sys/dev/e1000/e1000_82571.h b/freebsd/sys/dev/e1000/e1000_82571.h
index 41d5df0e..8e5ca56a 100644
--- a/freebsd/sys/dev/e1000/e1000_82571.h
+++ b/freebsd/sys/dev/e1000/e1000_82571.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_82575.c b/freebsd/sys/dev/e1000/e1000_82575.c
index 0e06471f..83116e8e 100644
--- a/freebsd/sys/dev/e1000/e1000_82575.c
+++ b/freebsd/sys/dev/e1000/e1000_82575.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -280,6 +280,11 @@ static s32 e1000_init_phy_params_82575(struct e1000_hw *hw)
if (ret_val)
goto out;
}
+ if (phy->id == M88E1543_E_PHY_ID) {
+ ret_val = e1000_initialize_M88E1543_phy(hw);
+ if (ret_val)
+ goto out;
+ }
break;
case IGP03E1000_E_PHY_ID:
case IGP04E1000_E_PHY_ID:
@@ -1053,7 +1058,7 @@ static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
u32 swmask = mask;
u32 fwmask = mask << 16;
s32 ret_val = E1000_SUCCESS;
- s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
+ s32 i = 0, timeout = 200;
DEBUGFUNC("e1000_acquire_swfw_sync_82575");
@@ -1237,7 +1242,7 @@ static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw)
DEBUGFUNC("e1000_check_for_link_media_swap");
- /* Check the copper medium. */
+ /* Check for copper. */
ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
if (ret_val)
return ret_val;
@@ -1249,7 +1254,7 @@ static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw)
if (data & E1000_M88E1112_STATUS_LINK)
port = E1000_MEDIA_PORT_COPPER;
- /* Check the other medium. */
+ /* Check for other. */
ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1);
if (ret_val)
return ret_val;
@@ -1258,11 +1263,6 @@ static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw)
if (ret_val)
return ret_val;
- /* reset page to 0 */
- ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
- if (ret_val)
- return ret_val;
-
if (data & E1000_M88E1112_STATUS_LINK)
port = E1000_MEDIA_PORT_OTHER;
@@ -1270,8 +1270,20 @@ static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw)
if (port && (hw->dev_spec._82575.media_port != port)) {
hw->dev_spec._82575.media_port = port;
hw->dev_spec._82575.media_changed = TRUE;
+ }
+
+ if (port == E1000_MEDIA_PORT_COPPER) {
+ /* reset page to 0 */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
+ if (ret_val)
+ return ret_val;
+ e1000_check_for_link_82575(hw);
} else {
- ret_val = e1000_check_for_link_82575(hw);
+ e1000_check_for_link_82575(hw);
+ /* reset page to 0 */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
+ if (ret_val)
+ return ret_val;
}
return E1000_SUCCESS;
@@ -2128,7 +2140,7 @@ static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw)
* e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable
* @hw: pointer to the HW structure
*
- * After rx enable if managability is enabled then there is likely some
+ * After Rx enable, if manageability is enabled then there is likely some
* bad data at the start of the fifo and possibly in the DMA fifo. This
* function clears the fifos and flushes any packets that came in as rx was
* being enabled.
@@ -2138,7 +2150,13 @@ void e1000_rx_fifo_flush_82575(struct e1000_hw *hw)
u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
int i, ms_wait;
- DEBUGFUNC("e1000_rx_fifo_workaround_82575");
+ DEBUGFUNC("e1000_rx_fifo_flush_82575");
+
+ /* disable IPv6 options as per hardware errata */
+ rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+ rfctl |= E1000_RFCTL_IPV6_EX_DIS;
+ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
+
if (hw->mac.type != e1000_82575 ||
!(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN))
return;
@@ -2166,7 +2184,6 @@ void e1000_rx_fifo_flush_82575(struct e1000_hw *hw)
* incoming packets are rejected. Set enable and wait 2ms so that
* any packet that was coming in as RCTL.EN was set is flushed
*/
- rfctl = E1000_READ_REG(hw, E1000_RFCTL);
E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
rlpml = E1000_READ_REG(hw, E1000_RLPML);
@@ -2401,7 +2418,7 @@ out:
* e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits
* @hw: pointer to the HW structure
*
- * This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on
+ * This resets the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on
* the values found in the EEPROM. This addresses an issue in which these
* bits are not restored from EEPROM after reset.
**/
@@ -2808,7 +2825,7 @@ s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data)
* e1000_initialize_M88E1512_phy - Initialize M88E1512 PHY
* @hw: pointer to the HW structure
*
- * Initialize Marverl 1512 to work correctly with Avoton.
+ * Initialize Marvell 1512 to work correctly with Avoton.
**/
s32 e1000_initialize_M88E1512_phy(struct e1000_hw *hw)
{
@@ -2894,13 +2911,114 @@ out:
}
/**
+ * e1000_initialize_M88E1543_phy - Initialize M88E1543 PHY
+ * @hw: pointer to the HW structure
+ *
+ * Initialize Marvell 1543 to work correctly with Avoton.
+ **/
+s32 e1000_initialize_M88E1543_phy(struct e1000_hw *hw)
+{
+ struct e1000_phy_info *phy = &hw->phy;
+ s32 ret_val = E1000_SUCCESS;
+
+ DEBUGFUNC("e1000_initialize_M88E1543_phy");
+
+ /* Check if this is correct PHY. */
+ if (phy->id != M88E1543_E_PHY_ID)
+ goto out;
+
+ /* Switch to PHY page 0xFF. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xDC0C);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159);
+ if (ret_val)
+ goto out;
+
+ /* Switch to PHY page 0xFB. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0xC00D);
+ if (ret_val)
+ goto out;
+
+ /* Switch to PHY page 0x12. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12);
+ if (ret_val)
+ goto out;
+
+ /* Change mode to SGMII-to-Copper */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001);
+ if (ret_val)
+ goto out;
+
+ /* Switch to PHY page 1. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x1);
+ if (ret_val)
+ goto out;
+
+ /* Change mode to 1000BASE-X/SGMII and autoneg enable; reset */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_FIBER_CTRL, 0x9140);
+ if (ret_val)
+ goto out;
+
+ /* Return the PHY to page 0. */
+ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
+ if (ret_val)
+ goto out;
+
+ ret_val = phy->ops.commit(hw);
+ if (ret_val) {
+ DEBUGOUT("Error committing the PHY changes\n");
+ return ret_val;
+ }
+
+ msec_delay(1000);
+out:
+ return ret_val;
+}
+
+/**
* e1000_set_eee_i350 - Enable/disable EEE support
* @hw: pointer to the HW structure
+ * @adv1g: boolean flag enabling 1G EEE advertisement
+ * @adv100m: boolean flag enabling 100M EEE advertisement
*
* Enable/disable EEE based on setting in dev_spec structure.
*
**/
-s32 e1000_set_eee_i350(struct e1000_hw *hw)
+s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M)
{
u32 ipcnfg, eeer;
@@ -2916,7 +3034,16 @@ s32 e1000_set_eee_i350(struct e1000_hw *hw)
if (!(hw->dev_spec._82575.eee_disable)) {
u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU);
- ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN);
+ if (adv100M)
+ ipcnfg |= E1000_IPCNFG_EEE_100M_AN;
+ else
+ ipcnfg &= ~E1000_IPCNFG_EEE_100M_AN;
+
+ if (adv1G)
+ ipcnfg |= E1000_IPCNFG_EEE_1G_AN;
+ else
+ ipcnfg &= ~E1000_IPCNFG_EEE_1G_AN;
+
eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
E1000_EEER_LPI_FC);
@@ -2940,11 +3067,13 @@ out:
/**
* e1000_set_eee_i354 - Enable/disable EEE support
* @hw: pointer to the HW structure
+ * @adv1g: boolean flag enabling 1G EEE advertisement
+ * @adv100m: boolean flag enabling 100M EEE advertisement
*
* Enable/disable EEE legacy mode based on setting in dev_spec structure.
*
**/
-s32 e1000_set_eee_i354(struct e1000_hw *hw)
+s32 e1000_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M)
{
struct e1000_phy_info *phy = &hw->phy;
s32 ret_val = E1000_SUCCESS;
@@ -2986,8 +3115,16 @@ s32 e1000_set_eee_i354(struct e1000_hw *hw)
if (ret_val)
goto out;
- phy_data |= E1000_EEE_ADV_100_SUPPORTED |
- E1000_EEE_ADV_1000_SUPPORTED;
+ if (adv100M)
+ phy_data |= E1000_EEE_ADV_100_SUPPORTED;
+ else
+ phy_data &= ~E1000_EEE_ADV_100_SUPPORTED;
+
+ if (adv1G)
+ phy_data |= E1000_EEE_ADV_1000_SUPPORTED;
+ else
+ phy_data &= ~E1000_EEE_ADV_1000_SUPPORTED;
+
ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
E1000_EEE_ADV_DEV_I354,
phy_data);
diff --git a/freebsd/sys/dev/e1000/e1000_82575.h b/freebsd/sys/dev/e1000/e1000_82575.h
index 6569b988..45fe132e 100644
--- a/freebsd/sys/dev/e1000/e1000_82575.h
+++ b/freebsd/sys/dev/e1000/e1000_82575.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -495,10 +495,11 @@ void e1000_rlpml_set_vf(struct e1000_hw *, u16);
s32 e1000_promisc_set_vf(struct e1000_hw *, enum e1000_promisc_type type);
u16 e1000_rxpbs_adjust_82580(u32 data);
s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data);
-s32 e1000_set_eee_i350(struct e1000_hw *);
-s32 e1000_set_eee_i354(struct e1000_hw *);
+s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M);
+s32 e1000_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M);
s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *);
s32 e1000_initialize_M88E1512_phy(struct e1000_hw *hw);
+s32 e1000_initialize_M88E1543_phy(struct e1000_hw *hw);
/* I2C SDA and SCL timing parameters for standard mode */
#define E1000_I2C_T_HD_STA 4
diff --git a/freebsd/sys/dev/e1000/e1000_api.c b/freebsd/sys/dev/e1000/e1000_api.c
index 71315bde..829e7be0 100644
--- a/freebsd/sys/dev/e1000/e1000_api.c
+++ b/freebsd/sys/dev/e1000/e1000_api.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -301,6 +301,17 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
case E1000_DEV_ID_PCH_I218_V3:
mac->type = e1000_pch_lpt;
break;
+ case E1000_DEV_ID_PCH_SPT_I219_LM:
+ case E1000_DEV_ID_PCH_SPT_I219_V:
+ case E1000_DEV_ID_PCH_SPT_I219_LM2:
+ case E1000_DEV_ID_PCH_SPT_I219_V2:
+ case E1000_DEV_ID_PCH_LBG_I219_LM3:
+ case E1000_DEV_ID_PCH_SPT_I219_LM4:
+ case E1000_DEV_ID_PCH_SPT_I219_V4:
+ case E1000_DEV_ID_PCH_SPT_I219_LM5:
+ case E1000_DEV_ID_PCH_SPT_I219_V5:
+ mac->type = e1000_pch_spt;
+ break;
case E1000_DEV_ID_82575EB_COPPER:
case E1000_DEV_ID_82575EB_FIBER_SERDES:
case E1000_DEV_ID_82575GB_QUAD_COPPER:
@@ -451,6 +462,7 @@ s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device)
case e1000_pchlan:
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
e1000_init_function_pointers_ich8lan(hw);
break;
case e1000_82575:
diff --git a/freebsd/sys/dev/e1000/e1000_api.h b/freebsd/sys/dev/e1000/e1000_api.h
index a2ffa169..074197bf 100644
--- a/freebsd/sys/dev/e1000/e1000_api.h
+++ b/freebsd/sys/dev/e1000/e1000_api.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -124,14 +124,14 @@ u32 e1000_translate_register_82542(u32 reg);
* TBI_ACCEPT macro definition:
*
* This macro requires:
- * adapter = a pointer to struct e1000_hw
+ * a = a pointer to struct e1000_hw
* status = the 8 bit status field of the Rx descriptor with EOP set
- * error = the 8 bit error field of the Rx descriptor with EOP set
+ * errors = the 8 bit error field of the Rx descriptor with EOP set
* length = the sum of all the length fields of the Rx descriptors that
* make up the current frame
* last_byte = the last byte of the frame DMAed by the hardware
- * max_frame_length = the maximum frame length we want to accept.
- * min_frame_length = the minimum frame length we want to accept.
+ * min_frame_size = the minimum frame length we want to accept.
+ * max_frame_size = the maximum frame length we want to accept.
*
* This macro is a conditional that should be used in the interrupt
* handler's Rx processing routine when RxErrors have been detected.
@@ -157,10 +157,10 @@ u32 e1000_translate_register_82542(u32 reg);
(((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
((last_byte) == CARRIER_EXTENSION) && \
(((status) & E1000_RXD_STAT_VP) ? \
- (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \
- ((length) <= (max_frame_size + 1))) : \
- (((length) > min_frame_size) && \
- ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1)))))
+ (((length) > ((min_frame_size) - VLAN_TAG_SIZE)) && \
+ ((length) <= ((max_frame_size) + 1))) : \
+ (((length) > (min_frame_size)) && \
+ ((length) <= ((max_frame_size) + VLAN_TAG_SIZE + 1)))))
#define E1000_MAX(a, b) ((a) > (b) ? (a) : (b))
#define E1000_DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) /* ceil(a/b) */
diff --git a/freebsd/sys/dev/e1000/e1000_defines.h b/freebsd/sys/dev/e1000/e1000_defines.h
index 72a8b14f..e33fe0fb 100644
--- a/freebsd/sys/dev/e1000/e1000_defines.h
+++ b/freebsd/sys/dev/e1000/e1000_defines.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -158,11 +158,12 @@
E1000_RXDEXT_STATERR_CXE | \
E1000_RXDEXT_STATERR_RXE)
-#define E1000_MRQC_ENABLE_RSS_2Q 0x00000001
+#define E1000_MRQC_RSS_ENABLE_2Q 0x00000001
#define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000
#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000
#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000
#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6_EX 0x00080000
#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000
#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000
@@ -196,6 +197,8 @@
#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */
#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */
#define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */
+#define E1000_RCTL_RDMTS_HEX 0x00010000
+#define E1000_RCTL_RDMTS1_HEX E1000_RCTL_RDMTS_HEX
#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */
#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */
#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */
@@ -752,6 +755,12 @@
#define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */
#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */
+/* HH Time Sync */
+#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */
+#define E1000_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */
+#define E1000_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */
+#define E1000_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */
+
#define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */
#define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */
#define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00
@@ -848,6 +857,7 @@
#define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */
#define E1000_M88E1543_EEE_CTRL_1 0x0
#define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */
+#define E1000_M88E1543_FIBER_CTRL 0x0 /* Fiber Control Register */
#define E1000_EEE_ADV_DEV_I354 7
#define E1000_EEE_ADV_ADDR_I354 60
#define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */
@@ -1019,9 +1029,7 @@
/* NVM Addressing bits based on type 0=small, 1=large */
#define E1000_EECD_ADDR_BITS 0x00000400
#define E1000_EECD_TYPE 0x00002000 /* NVM Type (1-SPI, 0-Microwire) */
-#ifndef E1000_NVM_GRANT_ATTEMPTS
#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */
-#endif
#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */
#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */
#define E1000_EECD_SIZE_EX_SHIFT 11
diff --git a/freebsd/sys/dev/e1000/e1000_hw.h b/freebsd/sys/dev/e1000/e1000_hw.h
index faf64a37..e1464a7b 100644
--- a/freebsd/sys/dev/e1000/e1000_hw.h
+++ b/freebsd/sys/dev/e1000/e1000_hw.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -137,6 +137,15 @@ struct e1000_hw;
#define E1000_DEV_ID_PCH_I218_V2 0x15A1
#define E1000_DEV_ID_PCH_I218_LM3 0x15A2 /* Wildcat Point PCH */
#define E1000_DEV_ID_PCH_I218_V3 0x15A3 /* Wildcat Point PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM 0x156F /* Sunrise Point PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_V 0x1570 /* Sunrise Point PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */
+#define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LEWISBURG PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7
+#define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8
+#define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3
+#define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6
#define E1000_DEV_ID_82576 0x10C9
#define E1000_DEV_ID_82576_FIBER 0x10E6
#define E1000_DEV_ID_82576_SERDES 0x10E7
@@ -222,6 +231,7 @@ enum e1000_mac_type {
e1000_pchlan,
e1000_pch2lan,
e1000_pch_lpt,
+ e1000_pch_spt,
e1000_82575,
e1000_82576,
e1000_82580,
@@ -787,7 +797,7 @@ struct e1000_mac_info {
u16 uta_reg_count;
/* Maximum size of the MTA register table in all supported adapters */
- #define MAX_MTA_REG 128
+#define MAX_MTA_REG 128
u32 mta_shadow[MAX_MTA_REG];
u16 rar_entry_count;
@@ -805,7 +815,7 @@ struct e1000_mac_info {
enum e1000_serdes_link_state serdes_link_state;
bool serdes_has_link;
bool tx_pkt_filtering;
- u32 max_frame_size;
+ u32 max_frame_size;
};
struct e1000_phy_info {
@@ -951,9 +961,13 @@ struct e1000_dev_spec_ich8lan {
E1000_MUTEX nvm_mutex;
E1000_MUTEX swflag_mutex;
bool nvm_k1_enabled;
+ bool disable_k1_off;
bool eee_disable;
u16 eee_lp_ability;
enum e1000_ulp_state ulp_state;
+ bool ulp_capability_disabled;
+ bool during_suspend_flow;
+ bool during_dpg_exit;
};
struct e1000_dev_spec_82575 {
diff --git a/freebsd/sys/dev/e1000/e1000_i210.c b/freebsd/sys/dev/e1000/e1000_i210.c
index 413b6a1d..8f427b9a 100644
--- a/freebsd/sys/dev/e1000/e1000_i210.c
+++ b/freebsd/sys/dev/e1000/e1000_i210.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -885,6 +885,35 @@ static s32 e1000_pll_workaround_i210(struct e1000_hw *hw)
}
/**
+ * e1000_get_cfg_done_i210 - Read config done bit
+ * @hw: pointer to the HW structure
+ *
+ * Read the management control register for the config done bit for
+ * completion status. NOTE: silicon which is EEPROM-less will fail trying
+ * to read the config done bit, so an error is *ONLY* logged and returns
+ * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon
+ * would not be able to be reset or change link.
+ **/
+static s32 e1000_get_cfg_done_i210(struct e1000_hw *hw)
+{
+ s32 timeout = PHY_CFG_TIMEOUT;
+ u32 mask = E1000_NVM_CFG_DONE_PORT_0;
+
+ DEBUGFUNC("e1000_get_cfg_done_i210");
+
+ while (timeout) {
+ if (E1000_READ_REG(hw, E1000_EEMNGCTL_I210) & mask)
+ break;
+ msec_delay(1);
+ timeout--;
+ }
+ if (!timeout)
+ DEBUGOUT("MNG configuration cycle has not completed.\n");
+
+ return E1000_SUCCESS;
+}
+
+/**
* e1000_init_hw_i210 - Init hw for I210/I211
* @hw: pointer to the HW structure
*
@@ -901,6 +930,7 @@ s32 e1000_init_hw_i210(struct e1000_hw *hw)
if (ret_val != E1000_SUCCESS)
return ret_val;
}
+ hw->phy.ops.get_cfg_done = e1000_get_cfg_done_i210;
ret_val = e1000_init_hw_82575(hw);
return ret_val;
}
diff --git a/freebsd/sys/dev/e1000/e1000_i210.h b/freebsd/sys/dev/e1000/e1000_i210.h
index 2a20ca1e..f940915b 100644
--- a/freebsd/sys/dev/e1000/e1000_i210.h
+++ b/freebsd/sys/dev/e1000/e1000_i210.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_ich8lan.c b/freebsd/sys/dev/e1000/e1000_ich8lan.c
index b5c75f26..007488b2 100644
--- a/freebsd/sys/dev/e1000/e1000_ich8lan.c
+++ b/freebsd/sys/dev/e1000/e1000_ich8lan.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -94,10 +94,13 @@ static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw,
bool active);
static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset,
u16 words, u16 *data);
+static s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data);
static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset,
u16 words, u16 *data);
static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw);
static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw);
+static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw);
static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw,
u16 *data);
static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw);
@@ -125,6 +128,14 @@ static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw,
u32 offset, u8 *data);
static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
u8 size, u16 *data);
+static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+ u32 *data);
+static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw,
+ u32 offset, u32 *data);
+static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw,
+ u32 offset, u32 data);
+static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw,
+ u32 offset, u32 dword);
static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw,
u32 offset, u16 *data);
static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
@@ -234,16 +245,21 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
if (ret_val)
return FALSE;
out:
- if (hw->mac.type == e1000_pch_lpt) {
- /* Unforce SMBus mode in PHY */
- hw->phy.ops.read_reg_locked(hw, CV_SMB_CTRL, &phy_reg);
- phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
- hw->phy.ops.write_reg_locked(hw, CV_SMB_CTRL, phy_reg);
+ if ((hw->mac.type == e1000_pch_lpt) ||
+ (hw->mac.type == e1000_pch_spt)) {
+ /* Only unforce SMBus if ME is not active */
+ if (!(E1000_READ_REG(hw, E1000_FWSM) &
+ E1000_ICH_FWSM_FW_VALID)) {
+ /* Unforce SMBus mode in PHY */
+ hw->phy.ops.read_reg_locked(hw, CV_SMB_CTRL, &phy_reg);
+ phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ hw->phy.ops.write_reg_locked(hw, CV_SMB_CTRL, phy_reg);
- /* Unforce SMBus mode in MAC */
- mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
- mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
- E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg);
+ /* Unforce SMBus mode in MAC */
+ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+ E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg);
+ }
}
return TRUE;
@@ -274,7 +290,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE;
E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
E1000_WRITE_FLUSH(hw);
- usec_delay(10);
+ msec_delay(1);
mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
E1000_WRITE_FLUSH(hw);
@@ -330,6 +346,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
*/
switch (hw->mac.type) {
case e1000_pch_lpt:
+ case e1000_pch_spt:
if (e1000_phy_is_accessible_pchlan(hw))
break;
@@ -477,6 +494,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
/* fall-through */
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
/* In case the PHY needs to be in mdio slow mode,
* set slow mode and try to get the PHY id again.
*/
@@ -619,36 +637,57 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
u32 gfpreg, sector_base_addr, sector_end_addr;
u16 i;
+ u32 nvm_size;
DEBUGFUNC("e1000_init_nvm_params_ich8lan");
- /* Can't read flash registers if the register set isn't mapped. */
nvm->type = e1000_nvm_flash_sw;
- if (!hw->flash_address) {
- DEBUGOUT("ERROR: Flash registers not mapped\n");
- return -E1000_ERR_CONFIG;
- }
- gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG);
+ if (hw->mac.type == e1000_pch_spt) {
+ /* in SPT, gfpreg doesn't exist. NVM size is taken from the
+ * STRAP register. This is because in SPT the GbE Flash region
+ * is no longer accessed through the flash registers. Instead,
+ * the mechanism has changed, and the Flash region access
+ * registers are now implemented in GbE memory space.
+ */
+ nvm->flash_base_addr = 0;
+ nvm_size =
+ (((E1000_READ_REG(hw, E1000_STRAP) >> 1) & 0x1F) + 1)
+ * NVM_SIZE_MULTIPLIER;
+ nvm->flash_bank_size = nvm_size / 2;
+ /* Adjust to word count */
+ nvm->flash_bank_size /= sizeof(u16);
+ /* Set the base address for flash register access */
+ hw->flash_address = hw->hw_addr + E1000_FLASH_BASE_ADDR;
+ } else {
+ /* Can't read flash registers if register set isn't mapped. */
+ if (!hw->flash_address) {
+ DEBUGOUT("ERROR: Flash registers not mapped\n");
+ return -E1000_ERR_CONFIG;
+ }
- /* sector_X_addr is a "sector"-aligned address (4096 bytes)
- * Add 1 to sector_end_addr since this sector is included in
- * the overall size.
- */
- sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK;
- sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1;
+ gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG);
- /* flash_base_addr is byte-aligned */
- nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT;
+ /* sector_X_addr is a "sector"-aligned address (4096 bytes)
+ * Add 1 to sector_end_addr since this sector is included in
+ * the overall size.
+ */
+ sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK;
+ sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1;
- /* find total size of the NVM, then cut in half since the total
- * size represents two separate NVM banks.
- */
- nvm->flash_bank_size = ((sector_end_addr - sector_base_addr)
- << FLASH_SECTOR_ADDR_SHIFT);
- nvm->flash_bank_size /= 2;
- /* Adjust to word count */
- nvm->flash_bank_size /= sizeof(u16);
+ /* flash_base_addr is byte-aligned */
+ nvm->flash_base_addr = sector_base_addr
+ << FLASH_SECTOR_ADDR_SHIFT;
+
+ /* find total size of the NVM, then cut in half since the total
+ * size represents two separate NVM banks.
+ */
+ nvm->flash_bank_size = ((sector_end_addr - sector_base_addr)
+ << FLASH_SECTOR_ADDR_SHIFT);
+ nvm->flash_bank_size /= 2;
+ /* Adjust to word count */
+ nvm->flash_bank_size /= sizeof(u16);
+ }
nvm->word_size = E1000_SHADOW_RAM_WORDS;
@@ -664,8 +703,13 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
/* Function Pointers */
nvm->ops.acquire = e1000_acquire_nvm_ich8lan;
nvm->ops.release = e1000_release_nvm_ich8lan;
- nvm->ops.read = e1000_read_nvm_ich8lan;
- nvm->ops.update = e1000_update_nvm_checksum_ich8lan;
+ if (hw->mac.type == e1000_pch_spt) {
+ nvm->ops.read = e1000_read_nvm_spt;
+ nvm->ops.update = e1000_update_nvm_checksum_spt;
+ } else {
+ nvm->ops.read = e1000_read_nvm_ich8lan;
+ nvm->ops.update = e1000_update_nvm_checksum_ich8lan;
+ }
nvm->ops.valid_led_default = e1000_valid_led_default_ich8lan;
nvm->ops.validate = e1000_validate_nvm_checksum_ich8lan;
nvm->ops.write = e1000_write_nvm_ich8lan;
@@ -751,9 +795,11 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
mac->ops.rar_set = e1000_rar_set_pch2lan;
/* fall-through */
case e1000_pch_lpt:
+ case e1000_pch_spt:
/* multicast address update for pch2 */
mac->ops.update_mc_addr_list =
e1000_update_mc_addr_list_pch2lan;
+ /* fall-through */
case e1000_pchlan:
/* check management mode */
mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
@@ -771,7 +817,8 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
break;
}
- if (mac->type == e1000_pch_lpt) {
+ if ((mac->type == e1000_pch_lpt) ||
+ (mac->type == e1000_pch_spt)) {
mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES;
mac->ops.rar_set = e1000_rar_set_pch_lpt;
mac->ops.setup_physical_interface = e1000_setup_copper_link_pch_lpt;
@@ -1001,8 +1048,9 @@ release:
/* clear FEXTNVM6 bit 8 on link down or 10/100 */
fextnvm6 &= ~E1000_FEXTNVM6_REQ_PLL_CLK;
- if (!link || ((status & E1000_STATUS_SPEED_100) &&
- (status & E1000_STATUS_FD)))
+ if ((hw->phy.revision > 5) || !link ||
+ ((status & E1000_STATUS_SPEED_100) &&
+ (status & E1000_STATUS_FD)))
goto update_fextnvm6;
ret_val = hw->phy.ops.read_reg(hw, I217_INBAND_CTRL, &reg);
@@ -1081,7 +1129,7 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
u16 speed, duplex, scale = 0;
u16 max_snoop, max_nosnoop;
u16 max_ltr_enc; /* max LTR latency encoded */
- s64 lat_ns; /* latency (ns) */
+ s64 lat_ns;
s64 value;
u32 rxa;
@@ -1113,8 +1161,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
lat_ns = 0;
else
lat_ns /= speed;
-
value = lat_ns;
+
while (value > E1000_LTRV_VALUE_MASK) {
scale++;
value = E1000_DIVIDE_ROUND_UP(value, (1 << 5));
@@ -1215,6 +1263,7 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
u32 mac_reg;
s32 ret_val = E1000_SUCCESS;
u16 phy_reg;
+ u16 oem_reg = 0;
if ((hw->mac.type < e1000_pch_lpt) ||
(hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) ||
@@ -1270,6 +1319,25 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg);
+ /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
+ * LPLU and disable Gig speed when entering ULP
+ */
+ if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) {
+ ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS,
+ &oem_reg);
+ if (ret_val)
+ goto release;
+
+ phy_reg = oem_reg;
+ phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS;
+
+ ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+ phy_reg);
+
+ if (ret_val)
+ goto release;
+ }
+
/* Set Inband ULP Exit, Reset to SMBus mode and
* Disable SMBus Release on PERST# in PHY
*/
@@ -1281,10 +1349,15 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (to_sx) {
if (E1000_READ_REG(hw, E1000_WUFC) & E1000_WUFC_LNKC)
phy_reg |= I218_ULP_CONFIG1_WOL_HOST;
+ else
+ phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
phy_reg |= I218_ULP_CONFIG1_STICKY_ULP;
+ phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT;
} else {
phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT;
+ phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP;
+ phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
}
e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
@@ -1296,12 +1369,21 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
/* Commit ULP changes in PHY by starting auto ULP configuration */
phy_reg |= I218_ULP_CONFIG1_START;
e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+ if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) &&
+ to_sx && (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
+ ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+ oem_reg);
+ if (ret_val)
+ goto release;
+ }
+
release:
hw->phy.ops.release(hw);
out:
- if (ret_val) {
+ if (ret_val)
DEBUGOUT1("Error in ULP enable flow: %d\n", ret_val);
- } else
+ else
hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_on;
return ret_val;
@@ -1346,10 +1428,10 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
E1000_WRITE_REG(hw, E1000_H2ME, mac_reg);
}
- /* Poll up to 100msec for ME to clear ULP_CFG_DONE */
+ /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */
while (E1000_READ_REG(hw, E1000_FWSM) &
E1000_FWSM_ULP_CFG_DONE) {
- if (i++ == 10) {
+ if (i++ == 30) {
ret_val = -E1000_ERR_PHY;
goto out;
}
@@ -1423,6 +1505,8 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
I218_ULP_CONFIG1_RESET_TO_SMBUS |
I218_ULP_CONFIG1_WOL_HOST |
I218_ULP_CONFIG1_INBAND_EXIT |
+ I218_ULP_CONFIG1_EN_ULP_LANPHYPC |
+ I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST |
I218_ULP_CONFIG1_DISABLE_SMB_PERST);
e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
@@ -1442,9 +1526,9 @@ release:
msec_delay(50);
}
out:
- if (ret_val) {
+ if (ret_val)
DEBUGOUT1("Error in ULP disable flow: %d\n", ret_val);
- } else
+ else
hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_off;
return ret_val;
@@ -1461,7 +1545,8 @@ out:
static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
{
struct e1000_mac_info *mac = &hw->mac;
- s32 ret_val;
+ s32 ret_val, tipg_reg = 0;
+ u16 emi_addr, emi_val = 0;
bool link;
u16 phy_reg;
@@ -1494,35 +1579,129 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
* the IPG and reduce Rx latency in the PHY.
*/
if (((hw->mac.type == e1000_pch2lan) ||
- (hw->mac.type == e1000_pch_lpt)) && link) {
- u32 reg;
- reg = E1000_READ_REG(hw, E1000_STATUS);
- if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) {
- u16 emi_addr;
+ (hw->mac.type == e1000_pch_lpt) ||
+ (hw->mac.type == e1000_pch_spt)) && link) {
+ u16 speed, duplex;
- reg = E1000_READ_REG(hw, E1000_TIPG);
- reg &= ~E1000_TIPG_IPGT_MASK;
- reg |= 0xFF;
- E1000_WRITE_REG(hw, E1000_TIPG, reg);
+ e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex);
+ tipg_reg = E1000_READ_REG(hw, E1000_TIPG);
+ tipg_reg &= ~E1000_TIPG_IPGT_MASK;
+ if (duplex == HALF_DUPLEX && speed == SPEED_10) {
+ tipg_reg |= 0xFF;
/* Reduce Rx latency in analog PHY */
- ret_val = hw->phy.ops.acquire(hw);
- if (ret_val)
- return ret_val;
+ emi_val = 0;
+ } else if (hw->mac.type == e1000_pch_spt &&
+ duplex == FULL_DUPLEX && speed != SPEED_1000) {
+ tipg_reg |= 0xC;
+ emi_val = 1;
+ } else {
+ /* Roll back the default values */
+ tipg_reg |= 0x08;
+ emi_val = 1;
+ }
- if (hw->mac.type == e1000_pch2lan)
- emi_addr = I82579_RX_CONFIG;
+ E1000_WRITE_REG(hw, E1000_TIPG, tipg_reg);
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ if (hw->mac.type == e1000_pch2lan)
+ emi_addr = I82579_RX_CONFIG;
+ else
+ emi_addr = I217_RX_CONFIG;
+ ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val);
+
+ if (hw->mac.type == e1000_pch_lpt ||
+ hw->mac.type == e1000_pch_spt) {
+ u16 phy_reg;
+
+ hw->phy.ops.read_reg_locked(hw, I217_PLL_CLOCK_GATE_REG,
+ &phy_reg);
+ phy_reg &= ~I217_PLL_CLOCK_GATE_MASK;
+ if (speed == SPEED_100 || speed == SPEED_10)
+ phy_reg |= 0x3E8;
else
- emi_addr = I217_RX_CONFIG;
- ret_val = e1000_write_emi_reg_locked(hw, emi_addr, 0);
+ phy_reg |= 0xFA;
+ hw->phy.ops.write_reg_locked(hw,
+ I217_PLL_CLOCK_GATE_REG,
+ phy_reg);
- hw->phy.ops.release(hw);
+ if (speed == SPEED_1000) {
+ hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL,
+ &phy_reg);
- if (ret_val)
- return ret_val;
+ phy_reg |= HV_PM_CTRL_K1_CLK_REQ;
+
+ hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL,
+ phy_reg);
+ }
+ }
+ hw->phy.ops.release(hw);
+
+ if (ret_val)
+ return ret_val;
+
+ if (hw->mac.type == e1000_pch_spt) {
+ u16 data;
+ u16 ptr_gap;
+
+ if (speed == SPEED_1000) {
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = hw->phy.ops.read_reg_locked(hw,
+ PHY_REG(776, 20),
+ &data);
+ if (ret_val) {
+ hw->phy.ops.release(hw);
+ return ret_val;
+ }
+
+ ptr_gap = (data & (0x3FF << 2)) >> 2;
+ if (ptr_gap < 0x18) {
+ data &= ~(0x3FF << 2);
+ data |= (0x18 << 2);
+ ret_val =
+ hw->phy.ops.write_reg_locked(hw,
+ PHY_REG(776, 20), data);
+ }
+ hw->phy.ops.release(hw);
+ if (ret_val)
+ return ret_val;
+ } else {
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = hw->phy.ops.write_reg_locked(hw,
+ PHY_REG(776, 20),
+ 0xC023);
+ hw->phy.ops.release(hw);
+ if (ret_val)
+ return ret_val;
+
+ }
}
}
+ /* I217 Packet Loss issue:
+ * ensure that FEXTNVM4 Beacon Duration is set correctly
+ * on power up.
+ * Set the Beacon Duration for I217 to 8 usec
+ */
+ if ((hw->mac.type == e1000_pch_lpt) ||
+ (hw->mac.type == e1000_pch_spt)) {
+ u32 mac_reg;
+
+ mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4);
+ mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
+ mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
+ E1000_WRITE_REG(hw, E1000_FEXTNVM4, mac_reg);
+ }
+
/* Work-around I218 hang issue */
if ((hw->device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
(hw->device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
@@ -1532,7 +1711,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
if (ret_val)
return ret_val;
}
- if (hw->mac.type == e1000_pch_lpt) {
+ if ((hw->mac.type == e1000_pch_lpt) ||
+ (hw->mac.type == e1000_pch_spt)) {
/* Set platform power management values for
* Latency Tolerance Reporting (LTR)
* Optimized Buffer Flush/Fill (OBFF)
@@ -1545,6 +1725,20 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
/* Clear link partner's EEE ability */
hw->dev_spec.ich8lan.eee_lp_ability = 0;
+ /* FEXTNVM6 K1-off workaround */
+ if (hw->mac.type == e1000_pch_spt) {
+ u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG);
+ u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6);
+
+ if ((pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) &&
+ (hw->dev_spec.ich8lan.disable_k1_off == FALSE))
+ fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE;
+ else
+ fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE;
+
+ E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6);
+ }
+
if (!link)
return E1000_SUCCESS; /* No link detected */
@@ -1638,6 +1832,7 @@ void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw)
case e1000_pchlan:
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
hw->phy.ops.init_params = e1000_init_phy_params_pchlan;
break;
default:
@@ -2020,7 +2215,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
continue;
}
blocked = FALSE;
- } while (blocked && (i++ < 10));
+ } while (blocked && (i++ < 30));
return blocked ? E1000_BLK_PHY_RESET : E1000_SUCCESS;
}
@@ -2101,6 +2296,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
case e1000_pchlan:
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
break;
default:
@@ -2991,7 +3187,6 @@ static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active)
u16 oem_reg;
DEBUGFUNC("e1000_set_lplu_state_pchlan");
-
ret_val = hw->phy.ops.read_reg(hw, HV_OEM_BITS, &oem_reg);
if (ret_val)
return ret_val;
@@ -3211,12 +3406,47 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
struct e1000_nvm_info *nvm = &hw->nvm;
u32 bank1_offset = nvm->flash_bank_size * sizeof(u16);
u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1;
+ u32 nvm_dword = 0;
u8 sig_byte = 0;
s32 ret_val;
DEBUGFUNC("e1000_valid_nvm_bank_detect_ich8lan");
switch (hw->mac.type) {
+ case e1000_pch_spt:
+ bank1_offset = nvm->flash_bank_size;
+ act_offset = E1000_ICH_NVM_SIG_WORD;
+
+ /* set bank to 0 in case flash read fails */
+ *bank = 0;
+
+ /* Check bank 0 */
+ ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset,
+ &nvm_dword);
+ if (ret_val)
+ return ret_val;
+ sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+ if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+ E1000_ICH_NVM_SIG_VALUE) {
+ *bank = 0;
+ return E1000_SUCCESS;
+ }
+
+ /* Check bank 1 */
+ ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset +
+ bank1_offset,
+ &nvm_dword);
+ if (ret_val)
+ return ret_val;
+ sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+ if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+ E1000_ICH_NVM_SIG_VALUE) {
+ *bank = 1;
+ return E1000_SUCCESS;
+ }
+
+ DEBUGOUT("ERROR: No valid NVM bank present\n");
+ return -E1000_ERR_NVM;
case e1000_ich8lan:
case e1000_ich9lan:
eecd = E1000_READ_REG(hw, E1000_EECD);
@@ -3264,6 +3494,99 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
}
/**
+ * e1000_read_nvm_spt - NVM access for SPT
+ * @hw: pointer to the HW structure
+ * @offset: The offset (in bytes) of the word(s) to read.
+ * @words: Size of data to read in words.
+ * @data: pointer to the word(s) to read at offset.
+ *
+ * Reads a word(s) from the NVM
+ **/
+static s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+ u32 act_offset;
+ s32 ret_val = E1000_SUCCESS;
+ u32 bank = 0;
+ u32 dword = 0;
+ u16 offset_to_read;
+ u16 i;
+
+ DEBUGFUNC("e1000_read_nvm_spt");
+
+ if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
+ (words == 0)) {
+ DEBUGOUT("nvm parameter(s) out of bounds\n");
+ ret_val = -E1000_ERR_NVM;
+ goto out;
+ }
+
+ nvm->ops.acquire(hw);
+
+ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+ if (ret_val != E1000_SUCCESS) {
+ DEBUGOUT("Could not detect valid bank, assuming bank 0\n");
+ bank = 0;
+ }
+
+ act_offset = (bank) ? nvm->flash_bank_size : 0;
+ act_offset += offset;
+
+ ret_val = E1000_SUCCESS;
+
+ for (i = 0; i < words; i += 2) {
+ if (words - i == 1) {
+ if (dev_spec->shadow_ram[offset+i].modified) {
+ data[i] = dev_spec->shadow_ram[offset+i].value;
+ } else {
+ offset_to_read = act_offset + i -
+ ((act_offset + i) % 2);
+ ret_val =
+ e1000_read_flash_dword_ich8lan(hw,
+ offset_to_read,
+ &dword);
+ if (ret_val)
+ break;
+ if ((act_offset + i) % 2 == 0)
+ data[i] = (u16)(dword & 0xFFFF);
+ else
+ data[i] = (u16)((dword >> 16) & 0xFFFF);
+ }
+ } else {
+ offset_to_read = act_offset + i;
+ if (!(dev_spec->shadow_ram[offset+i].modified) ||
+ !(dev_spec->shadow_ram[offset+i+1].modified)) {
+ ret_val =
+ e1000_read_flash_dword_ich8lan(hw,
+ offset_to_read,
+ &dword);
+ if (ret_val)
+ break;
+ }
+ if (dev_spec->shadow_ram[offset+i].modified)
+ data[i] = dev_spec->shadow_ram[offset+i].value;
+ else
+ data[i] = (u16) (dword & 0xFFFF);
+ if (dev_spec->shadow_ram[offset+i].modified)
+ data[i+1] =
+ dev_spec->shadow_ram[offset+i+1].value;
+ else
+ data[i+1] = (u16) (dword >> 16 & 0xFFFF);
+ }
+ }
+
+ nvm->ops.release(hw);
+
+out:
+ if (ret_val)
+ DEBUGOUT1("NVM read error: %d\n", ret_val);
+
+ return ret_val;
+}
+
+/**
* e1000_read_nvm_ich8lan - Read word(s) from the NVM
* @hw: pointer to the HW structure
* @offset: The offset (in bytes) of the word(s) to read.
@@ -3350,7 +3673,11 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
/* Clear FCERR and DAEL in hw status by writing 1 */
hsfsts.hsf_status.flcerr = 1;
hsfsts.hsf_status.dael = 1;
- E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
+ if (hw->mac.type == e1000_pch_spt)
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ hsfsts.regval & 0xFFFF);
+ else
+ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
/* Either we should have a hardware SPI cycle in progress
* bit to check against, in order to start a new cycle or
@@ -3366,7 +3693,12 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
* Begin by setting Flash Cycle Done.
*/
hsfsts.hsf_status.flcdone = 1;
- E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
+ if (hw->mac.type == e1000_pch_spt)
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ hsfsts.regval & 0xFFFF);
+ else
+ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS,
+ hsfsts.regval);
ret_val = E1000_SUCCESS;
} else {
s32 i;
@@ -3388,8 +3720,12 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
* now set the Flash Cycle Done.
*/
hsfsts.hsf_status.flcdone = 1;
- E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS,
- hsfsts.regval);
+ if (hw->mac.type == e1000_pch_spt)
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ hsfsts.regval & 0xFFFF);
+ else
+ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS,
+ hsfsts.regval);
} else {
DEBUGOUT("Flash controller busy, cannot get access\n");
}
@@ -3414,10 +3750,17 @@ static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout)
DEBUGFUNC("e1000_flash_cycle_ich8lan");
/* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */
- hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
+ if (hw->mac.type == e1000_pch_spt)
+ hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16;
+ else
+ hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
hsflctl.hsf_ctrl.flcgo = 1;
- E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
+ if (hw->mac.type == e1000_pch_spt)
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ hsflctl.regval << 16);
+ else
+ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
/* wait till FDONE bit is set to 1 */
do {
@@ -3434,6 +3777,29 @@ static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout)
}
/**
+ * e1000_read_flash_dword_ich8lan - Read dword from flash
+ * @hw: pointer to the HW structure
+ * @offset: offset to data location
+ * @data: pointer to the location for storing the data
+ *
+ * Reads the flash dword at offset into data. Offset is converted
+ * to bytes before read.
+ **/
+static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset,
+ u32 *data)
+{
+ DEBUGFUNC("e1000_read_flash_dword_ich8lan");
+
+ if (!data)
+ return -E1000_ERR_NVM;
+
+ /* Must convert word offset into bytes. */
+ offset <<= 1;
+
+ return e1000_read_flash_data32_ich8lan(hw, offset, data);
+}
+
+/**
* e1000_read_flash_word_ich8lan - Read word from flash
* @hw: pointer to the HW structure
* @offset: offset to data location
@@ -3470,7 +3836,13 @@ static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
s32 ret_val;
u16 word = 0;
- ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word);
+ /* In SPT, only 32 bits access is supported,
+ * so this function should not be called.
+ */
+ if (hw->mac.type == e1000_pch_spt)
+ return -E1000_ERR_NVM;
+ else
+ ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word);
if (ret_val)
return ret_val;
@@ -3556,6 +3928,83 @@ static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
return ret_val;
}
+/**
+ * e1000_read_flash_data32_ich8lan - Read dword from NVM
+ * @hw: pointer to the HW structure
+ * @offset: The offset (in bytes) of the dword to read.
+ * @data: Pointer to the dword to store the value read.
+ *
+ * Reads a byte or word from the NVM using the flash access registers.
+ **/
+static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+ u32 *data)
+{
+ union ich8_hws_flash_status hsfsts;
+ union ich8_hws_flash_ctrl hsflctl;
+ u32 flash_linear_addr;
+ s32 ret_val = -E1000_ERR_NVM;
+ u8 count = 0;
+
+ DEBUGFUNC("e1000_read_flash_data_ich8lan");
+
+ if (offset > ICH_FLASH_LINEAR_ADDR_MASK ||
+ hw->mac.type != e1000_pch_spt)
+ return -E1000_ERR_NVM;
+ flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+ hw->nvm.flash_base_addr);
+
+ do {
+ usec_delay(1);
+ /* Steps */
+ ret_val = e1000_flash_cycle_init_ich8lan(hw);
+ if (ret_val != E1000_SUCCESS)
+ break;
+ /* In SPT, This register is in Lan memory space, not flash.
+ * Therefore, only 32 bit access is supported
+ */
+ hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16;
+
+ /* 0b/1b corresponds to 1 or 2 byte size, respectively. */
+ hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1;
+ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ;
+ /* In SPT, This register is in Lan memory space, not flash.
+ * Therefore, only 32 bit access is supported
+ */
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ (u32)hsflctl.regval << 16);
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
+
+ ret_val = e1000_flash_cycle_ich8lan(hw,
+ ICH_FLASH_READ_COMMAND_TIMEOUT);
+
+ /* Check if FCERR is set to 1, if set to 1, clear it
+ * and try the whole sequence a few more times, else
+ * read in (shift in) the Flash Data0, the order is
+ * least significant byte first msb to lsb
+ */
+ if (ret_val == E1000_SUCCESS) {
+ *data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0);
+ break;
+ } else {
+ /* If we've gotten here, then things are probably
+ * completely hosed, but if the error condition is
+ * detected, it won't hurt to give it another try...
+ * ICH_FLASH_CYCLE_REPEAT_COUNT times.
+ */
+ hsfsts.regval = E1000_READ_FLASH_REG16(hw,
+ ICH_FLASH_HSFSTS);
+ if (hsfsts.hsf_status.flcerr) {
+ /* Repeat for some time before giving up. */
+ continue;
+ } else if (!hsfsts.hsf_status.flcdone) {
+ DEBUGOUT("Timeout error - flash cycle did not complete.\n");
+ break;
+ }
+ }
+ } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+ return ret_val;
+}
/**
* e1000_write_nvm_ich8lan - Write word(s) to the NVM
@@ -3594,6 +4043,175 @@ static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
}
/**
+ * e1000_update_nvm_checksum_spt - Update the checksum for NVM
+ * @hw: pointer to the HW structure
+ *
+ * The NVM checksum is updated by calling the generic update_nvm_checksum,
+ * which writes the checksum to the shadow ram. The changes in the shadow
+ * ram are then committed to the EEPROM by processing each bank at a time
+ * checking for the modified bit and writing only the pending changes.
+ * After a successful commit, the shadow ram is cleared and is ready for
+ * future writes.
+ **/
+static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw)
+{
+ struct e1000_nvm_info *nvm = &hw->nvm;
+ struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+ u32 i, act_offset, new_bank_offset, old_bank_offset, bank;
+ s32 ret_val;
+ u32 dword = 0;
+
+ DEBUGFUNC("e1000_update_nvm_checksum_spt");
+
+ ret_val = e1000_update_nvm_checksum_generic(hw);
+ if (ret_val)
+ goto out;
+
+ if (nvm->type != e1000_nvm_flash_sw)
+ goto out;
+
+ nvm->ops.acquire(hw);
+
+ /* We're writing to the opposite bank so if we're on bank 1,
+ * write to bank 0 etc. We also need to erase the segment that
+ * is going to be written
+ */
+ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+ if (ret_val != E1000_SUCCESS) {
+ DEBUGOUT("Could not detect valid bank, assuming bank 0\n");
+ bank = 0;
+ }
+
+ if (bank == 0) {
+ new_bank_offset = nvm->flash_bank_size;
+ old_bank_offset = 0;
+ ret_val = e1000_erase_flash_bank_ich8lan(hw, 1);
+ if (ret_val)
+ goto release;
+ } else {
+ old_bank_offset = nvm->flash_bank_size;
+ new_bank_offset = 0;
+ ret_val = e1000_erase_flash_bank_ich8lan(hw, 0);
+ if (ret_val)
+ goto release;
+ }
+ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i += 2) {
+ /* Determine whether to write the value stored
+ * in the other NVM bank or a modified value stored
+ * in the shadow RAM
+ */
+ ret_val = e1000_read_flash_dword_ich8lan(hw,
+ i + old_bank_offset,
+ &dword);
+
+ if (dev_spec->shadow_ram[i].modified) {
+ dword &= 0xffff0000;
+ dword |= (dev_spec->shadow_ram[i].value & 0xffff);
+ }
+ if (dev_spec->shadow_ram[i + 1].modified) {
+ dword &= 0x0000ffff;
+ dword |= ((dev_spec->shadow_ram[i + 1].value & 0xffff)
+ << 16);
+ }
+ if (ret_val)
+ break;
+
+ /* If the word is 0x13, then make sure the signature bits
+ * (15:14) are 11b until the commit has completed.
+ * This will allow us to write 10b which indicates the
+ * signature is valid. We want to do this after the write
+ * has completed so that we don't mark the segment valid
+ * while the write is still in progress
+ */
+ if (i == E1000_ICH_NVM_SIG_WORD - 1)
+ dword |= E1000_ICH_NVM_SIG_MASK << 16;
+
+ /* Convert offset to bytes. */
+ act_offset = (i + new_bank_offset) << 1;
+
+ usec_delay(100);
+
+ /* Write the data to the new bank. Offset in words*/
+ act_offset = i + new_bank_offset;
+ ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset,
+ dword);
+ if (ret_val)
+ break;
+ }
+
+ /* Don't bother writing the segment valid bits if sector
+ * programming failed.
+ */
+ if (ret_val) {
+ DEBUGOUT("Flash commit failed.\n");
+ goto release;
+ }
+
+ /* Finally validate the new segment by setting bit 15:14
+ * to 10b in word 0x13 , this can be done without an
+ * erase as well since these bits are 11 to start with
+ * and we need to change bit 14 to 0b
+ */
+ act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD;
+
+ /*offset in words but we read dword*/
+ --act_offset;
+ ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword);
+
+ if (ret_val)
+ goto release;
+
+ dword &= 0xBFFFFFFF;
+ ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword);
+
+ if (ret_val)
+ goto release;
+
+ /* And invalidate the previously valid segment by setting
+ * its signature word (0x13) high_byte to 0b. This can be
+ * done without an erase because flash erase sets all bits
+ * to 1's. We can write 1's to 0's without an erase
+ */
+ act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1;
+
+ /* offset in words but we read dword*/
+ act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1;
+ ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword);
+
+ if (ret_val)
+ goto release;
+
+ dword &= 0x00FFFFFF;
+ ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword);
+
+ if (ret_val)
+ goto release;
+
+ /* Great! Everything worked, we can now clear the cached entries. */
+ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) {
+ dev_spec->shadow_ram[i].modified = FALSE;
+ dev_spec->shadow_ram[i].value = 0xFFFF;
+ }
+
+release:
+ nvm->ops.release(hw);
+
+ /* Reload the EEPROM, or else modifications will not appear
+ * until after the next adapter reset.
+ */
+ if (!ret_val) {
+ nvm->ops.reload(hw);
+ msec_delay(10);
+ }
+
+out:
+ if (ret_val)
+ DEBUGOUT1("NVM update error: %d\n", ret_val);
+
+ return ret_val;
+}
+
+/**
* e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM
* @hw: pointer to the HW structure
*
@@ -3770,6 +4388,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
*/
switch (hw->mac.type) {
case e1000_pch_lpt:
+ case e1000_pch_spt:
word = NVM_COMPAT;
valid_csum_mask = NVM_COMPAT_VALID_CSUM;
break;
@@ -3817,8 +4436,13 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
DEBUGFUNC("e1000_write_ich8_data");
- if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
- return -E1000_ERR_NVM;
+ if (hw->mac.type == e1000_pch_spt) {
+ if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
+ return -E1000_ERR_NVM;
+ } else {
+ if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
+ return -E1000_ERR_NVM;
+ }
flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
hw->nvm.flash_base_addr);
@@ -3829,12 +4453,29 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
ret_val = e1000_flash_cycle_init_ich8lan(hw);
if (ret_val != E1000_SUCCESS)
break;
- hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
+ /* In SPT, This register is in Lan memory space, not
+ * flash. Therefore, only 32 bit access is supported
+ */
+ if (hw->mac.type == e1000_pch_spt)
+ hsflctl.regval =
+ E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16;
+ else
+ hsflctl.regval =
+ E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
hsflctl.hsf_ctrl.fldbcount = size - 1;
hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
- E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
+ /* In SPT, This register is in Lan memory space,
+ * not flash. Therefore, only 32 bit access is
+ * supported
+ */
+ if (hw->mac.type == e1000_pch_spt)
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ hsflctl.regval << 16);
+ else
+ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
+ hsflctl.regval);
E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
@@ -3872,6 +4513,94 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
return ret_val;
}
+/**
+* e1000_write_flash_data32_ich8lan - Writes 4 bytes to the NVM
+* @hw: pointer to the HW structure
+* @offset: The offset (in bytes) of the dwords to read.
+* @data: The 4 bytes to write to the NVM.
+*
+* Writes one/two/four bytes to the NVM using the flash access registers.
+**/
+static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+ u32 data)
+{
+ union ich8_hws_flash_status hsfsts;
+ union ich8_hws_flash_ctrl hsflctl;
+ u32 flash_linear_addr;
+ s32 ret_val;
+ u8 count = 0;
+
+ DEBUGFUNC("e1000_write_flash_data32_ich8lan");
+
+ if (hw->mac.type == e1000_pch_spt) {
+ if (offset > ICH_FLASH_LINEAR_ADDR_MASK)
+ return -E1000_ERR_NVM;
+ }
+ flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+ hw->nvm.flash_base_addr);
+ do {
+ usec_delay(1);
+ /* Steps */
+ ret_val = e1000_flash_cycle_init_ich8lan(hw);
+ if (ret_val != E1000_SUCCESS)
+ break;
+
+ /* In SPT, This register is in Lan memory space, not
+ * flash. Therefore, only 32 bit access is supported
+ */
+ if (hw->mac.type == e1000_pch_spt)
+ hsflctl.regval = E1000_READ_FLASH_REG(hw,
+ ICH_FLASH_HSFSTS)
+ >> 16;
+ else
+ hsflctl.regval = E1000_READ_FLASH_REG16(hw,
+ ICH_FLASH_HSFCTL);
+
+ hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1;
+ hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
+
+ /* In SPT, This register is in Lan memory space,
+ * not flash. Therefore, only 32 bit access is
+ * supported
+ */
+ if (hw->mac.type == e1000_pch_spt)
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ hsflctl.regval << 16);
+ else
+ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
+ hsflctl.regval);
+
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
+
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, data);
+
+ /* check if FCERR is set to 1 , if set to 1, clear it
+ * and try the whole sequence a few more times else done
+ */
+ ret_val = e1000_flash_cycle_ich8lan(hw,
+ ICH_FLASH_WRITE_COMMAND_TIMEOUT);
+
+ if (ret_val == E1000_SUCCESS)
+ break;
+
+ /* If we're here, then things are most likely
+ * completely hosed, but if the error condition
+ * is detected, it won't hurt to give it another
+ * try...ICH_FLASH_CYCLE_REPEAT_COUNT times.
+ */
+ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS);
+
+ if (hsfsts.hsf_status.flcerr)
+ /* Repeat for some time before giving up. */
+ continue;
+ if (!hsfsts.hsf_status.flcdone) {
+ DEBUGOUT("Timeout error - flash cycle did not complete.\n");
+ break;
+ }
+ } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+ return ret_val;
+}
/**
* e1000_write_flash_byte_ich8lan - Write a single byte to NVM
@@ -3891,7 +4620,42 @@ static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
return e1000_write_flash_data_ich8lan(hw, offset, 1, word);
}
+/**
+* e1000_retry_write_flash_dword_ich8lan - Writes a dword to NVM
+* @hw: pointer to the HW structure
+* @offset: The offset of the word to write.
+* @dword: The dword to write to the NVM.
+*
+* Writes a single dword to the NVM using the flash access registers.
+* Goes through a retry algorithm before giving up.
+**/
+static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw,
+ u32 offset, u32 dword)
+{
+ s32 ret_val;
+ u16 program_retries;
+
+ DEBUGFUNC("e1000_retry_write_flash_dword_ich8lan");
+
+ /* Must convert word offset into bytes. */
+ offset <<= 1;
+
+ ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword);
+
+ if (!ret_val)
+ return ret_val;
+ for (program_retries = 0; program_retries < 100; program_retries++) {
+ DEBUGOUT2("Retrying Byte %8.8X at offset %u\n", dword, offset);
+ usec_delay(100);
+ ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword);
+ if (ret_val == E1000_SUCCESS)
+ break;
+ }
+ if (program_retries == 100)
+ return -E1000_ERR_NVM;
+ return E1000_SUCCESS;
+}
/**
* e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM
@@ -4001,12 +4765,22 @@ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank)
/* Write a value 11 (block Erase) in Flash
* Cycle field in hw flash control
*/
- hsflctl.regval =
- E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
+ if (hw->mac.type == e1000_pch_spt)
+ hsflctl.regval =
+ E1000_READ_FLASH_REG(hw,
+ ICH_FLASH_HSFSTS)>>16;
+ else
+ hsflctl.regval =
+ E1000_READ_FLASH_REG16(hw,
+ ICH_FLASH_HSFCTL);
hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE;
- E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
- hsflctl.regval);
+ if (hw->mac.type == e1000_pch_spt)
+ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS,
+ hsflctl.regval << 16);
+ else
+ E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL,
+ hsflctl.regval);
/* Write the last 24 bits of an index within the
* block into Flash Linear address field in Flash
@@ -4143,7 +4917,7 @@ static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw)
* @hw: pointer to the HW structure
*
* ICH8 use the PCI Express bus, but does not contain a PCI Express Capability
- * register, so the the bus width is hard coded.
+ * register, so the bus width is hard coded.
**/
static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw)
{
@@ -4439,7 +5213,8 @@ static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw)
E1000_WRITE_REG(hw, E1000_RFCTL, reg);
/* Enable ECC on Lynxpoint */
- if (hw->mac.type == e1000_pch_lpt) {
+ if ((hw->mac.type == e1000_pch_lpt) ||
+ (hw->mac.type == e1000_pch_spt)) {
reg = E1000_READ_REG(hw, E1000_PBECCSTS);
reg |= E1000_PBECCSTS_ECC_ENABLE;
E1000_WRITE_REG(hw, E1000_PBECCSTS, reg);
@@ -4871,7 +5646,8 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw)
if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
(device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
(device_id == E1000_DEV_ID_PCH_I218_LM3) ||
- (device_id == E1000_DEV_ID_PCH_I218_V3)) {
+ (device_id == E1000_DEV_ID_PCH_I218_V3) ||
+ (hw->mac.type == e1000_pch_spt)) {
u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6);
E1000_WRITE_REG(hw, E1000_FEXTNVM6,
@@ -4987,19 +5763,18 @@ out:
* the PHY.
* On i217, setup Intel Rapid Start Technology.
**/
-void e1000_resume_workarounds_pchlan(struct e1000_hw *hw)
+u32 e1000_resume_workarounds_pchlan(struct e1000_hw *hw)
{
s32 ret_val;
DEBUGFUNC("e1000_resume_workarounds_pchlan");
-
if (hw->mac.type < e1000_pch2lan)
- return;
+ return E1000_SUCCESS;
ret_val = e1000_init_phy_workarounds_pchlan(hw);
if (ret_val) {
DEBUGOUT1("Failed to init PHY flow ret_val=%d\n", ret_val);
- return;
+ return ret_val;
}
/* For i217 Intel Rapid Start Technology support when the system
@@ -5013,7 +5788,7 @@ void e1000_resume_workarounds_pchlan(struct e1000_hw *hw)
ret_val = hw->phy.ops.acquire(hw);
if (ret_val) {
DEBUGOUT("Failed to setup iRST\n");
- return;
+ return ret_val;
}
/* Clear Auto Enable LPI after link up */
@@ -5047,7 +5822,9 @@ release:
if (ret_val)
DEBUGOUT1("Error %d in resume workarounds\n", ret_val);
hw->phy.ops.release(hw);
+ return ret_val;
}
+ return E1000_SUCCESS;
}
/**
diff --git a/freebsd/sys/dev/e1000/e1000_ich8lan.h b/freebsd/sys/dev/e1000/e1000_ich8lan.h
index 999e856b..6d812911 100644
--- a/freebsd/sys/dev/e1000/e1000_ich8lan.h
+++ b/freebsd/sys/dev/e1000/e1000_ich8lan.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -107,9 +107,23 @@
#define E1000_FEXTNVM6_REQ_PLL_CLK 0x00000100
#define E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION 0x00000200
-
+#define E1000_FEXTNVM6_K1_OFF_ENABLE 0x80000000
+/* bit for disabling packet buffer read */
+#define E1000_FEXTNVM7_DISABLE_PB_READ 0x00040000
+#define E1000_FEXTNVM7_SIDE_CLK_UNGATE 0x00000004
#define E1000_FEXTNVM7_DISABLE_SMB_PERST 0x00000020
-
+#define E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS 0x00000800
+#define E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS 0x00001000
+#define E1000_FEXTNVM11_DISABLE_PB_READ 0x00000200
+#define E1000_FEXTNVM11_DISABLE_MULR_FIX 0x00002000
+
+/* bit24: RXDCTL thresholds granularity: 0 - cache lines, 1 - descriptors */
+#define E1000_RXDCTL_THRESH_UNIT_DESC 0x01000000
+
+#define NVM_SIZE_MULTIPLIER 4096 /*multiplier for NVMS field*/
+#define E1000_FLASH_BASE_ADDR 0xE000 /*offset of NVM access regs*/
+#define E1000_CTRL_EXT_NVMVS 0x3 /*NVM valid sector */
+#define E1000_TARC0_CB_MULTIQ_3_REQ (1 << 28 | 1 << 29)
#define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL
#define E1000_ICH_RAR_ENTRIES 7
@@ -171,6 +185,8 @@
#define E1000_NVM_K1_CONFIG 0x1B /* NVM K1 Config Word */
#define E1000_NVM_K1_ENABLE 0x1 /* NVM Enable K1 bit */
+#define K1_ENTRY_LATENCY 0
+#define K1_MIN_TIME 1
/* SMBus Control Phy Register */
#define CV_SMB_CTRL PHY_REG(769, 23)
@@ -184,6 +200,10 @@
#define I218_ULP_CONFIG1_INBAND_EXIT 0x0020 /* Inband on ULP exit */
#define I218_ULP_CONFIG1_WOL_HOST 0x0040 /* WoL Host on ULP exit */
#define I218_ULP_CONFIG1_RESET_TO_SMBUS 0x0100 /* Reset to SMBus mode */
+/* enable ULP even if when phy powered down via lanphypc */
+#define I218_ULP_CONFIG1_EN_ULP_LANPHYPC 0x0400
+/* disable clear of sticky ULP on PERST */
+#define I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST 0x0800
#define I218_ULP_CONFIG1_DISABLE_SMB_PERST 0x1000 /* Disable on PERST# */
/* SMBus Address Phy Register */
@@ -219,9 +239,12 @@
/* PHY Power Management Control */
#define HV_PM_CTRL PHY_REG(770, 17)
-#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100
+#define HV_PM_CTRL_K1_CLK_REQ 0x200
#define HV_PM_CTRL_K1_ENABLE 0x4000
+#define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28)
+#define I217_PLL_CLOCK_GATE_MASK 0x07FF
+
#define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in ms */
/* Inband Control */
@@ -307,7 +330,7 @@ void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw);
void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw);
void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw);
-void e1000_resume_workarounds_pchlan(struct e1000_hw *hw);
+u32 e1000_resume_workarounds_pchlan(struct e1000_hw *hw);
s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable);
void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw);
s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable);
diff --git a/freebsd/sys/dev/e1000/e1000_mac.c b/freebsd/sys/dev/e1000/e1000_mac.c
index 3967a25a..2af37399 100644
--- a/freebsd/sys/dev/e1000/e1000_mac.c
+++ b/freebsd/sys/dev/e1000/e1000_mac.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_mac.h b/freebsd/sys/dev/e1000/e1000_mac.h
index 2c1bfe32..ef9789bb 100644
--- a/freebsd/sys/dev/e1000/e1000_mac.h
+++ b/freebsd/sys/dev/e1000/e1000_mac.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -36,9 +36,7 @@
#define _E1000_MAC_H_
void e1000_init_mac_ops_generic(struct e1000_hw *hw);
-#ifndef E1000_REMOVED
#define E1000_REMOVED(a) (0)
-#endif /* E1000_REMOVED */
void e1000_null_mac_generic(struct e1000_hw *hw);
s32 e1000_null_ops_generic(struct e1000_hw *hw);
s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d);
diff --git a/freebsd/sys/dev/e1000/e1000_manage.c b/freebsd/sys/dev/e1000/e1000_manage.c
index 0648ac9d..82456912 100644
--- a/freebsd/sys/dev/e1000/e1000_manage.c
+++ b/freebsd/sys/dev/e1000/e1000_manage.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -35,7 +35,6 @@
/*$FreeBSD$*/
#include "e1000_api.h"
-
/**
* e1000_calculate_checksum - Calculate checksum for buffer
* @buffer: pointer to EEPROM
diff --git a/freebsd/sys/dev/e1000/e1000_manage.h b/freebsd/sys/dev/e1000/e1000_manage.h
index 51f17671..303e99e3 100644
--- a/freebsd/sys/dev/e1000/e1000_manage.h
+++ b/freebsd/sys/dev/e1000/e1000_manage.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2012, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_mbx.c b/freebsd/sys/dev/e1000/e1000_mbx.c
index 1b5bb70d..0c6bb2c2 100644
--- a/freebsd/sys/dev/e1000/e1000_mbx.c
+++ b/freebsd/sys/dev/e1000/e1000_mbx.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -69,7 +69,7 @@ static s32 e1000_null_mbx_transact(struct e1000_hw E1000_UNUSEDARG *hw,
* @size: Length of buffer
* @mbx_id: id of mailbox to read
*
- * returns SUCCESS if it successfuly read message from buffer
+ * returns SUCCESS if it successfully read message from buffer
**/
s32 e1000_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
{
@@ -428,15 +428,21 @@ static s32 e1000_check_for_rst_vf(struct e1000_hw *hw,
static s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw)
{
s32 ret_val = -E1000_ERR_MBX;
+ int count = 10;
DEBUGFUNC("e1000_obtain_mbx_lock_vf");
- /* Take ownership of the buffer */
- E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU);
+ do {
+ /* Take ownership of the buffer */
+ E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU);
- /* reserve mailbox for vf use */
- if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU)
- ret_val = E1000_SUCCESS;
+ /* reserve mailbox for vf use */
+ if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) {
+ ret_val = E1000_SUCCESS;
+ break;
+ }
+ usec_delay(1000);
+ } while (count-- > 0);
return ret_val;
}
@@ -489,7 +495,7 @@ out_no_write:
* @size: Length of buffer
* @mbx_id: id of mailbox to read
*
- * returns SUCCESS if it successfuly read message from buffer
+ * returns SUCCESS if it successfully read message from buffer
**/
static s32 e1000_read_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size,
u16 E1000_UNUSEDARG mbx_id)
@@ -641,18 +647,26 @@ static s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
{
s32 ret_val = -E1000_ERR_MBX;
u32 p2v_mailbox;
+ int count = 10;
DEBUGFUNC("e1000_obtain_mbx_lock_pf");
- /* Take ownership of the buffer */
- E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU);
+ do {
+ /* Take ownership of the buffer */
+ E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number),
+ E1000_P2VMAILBOX_PFU);
- /* reserve mailbox for vf use */
- p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number));
- if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
- ret_val = E1000_SUCCESS;
+ /* reserve mailbox for pf use */
+ p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number));
+ if (p2v_mailbox & E1000_P2VMAILBOX_PFU) {
+ ret_val = E1000_SUCCESS;
+ break;
+ }
+ usec_delay(1000);
+ } while (count-- > 0);
return ret_val;
+
}
/**
diff --git a/freebsd/sys/dev/e1000/e1000_mbx.h b/freebsd/sys/dev/e1000/e1000_mbx.h
index d2aea5c4..fadd8494 100644
--- a/freebsd/sys/dev/e1000/e1000_mbx.h
+++ b/freebsd/sys/dev/e1000/e1000_mbx.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_nvm.c b/freebsd/sys/dev/e1000/e1000_nvm.c
index ad0c9544..a1e881ad 100644
--- a/freebsd/sys/dev/e1000/e1000_nvm.c
+++ b/freebsd/sys/dev/e1000/e1000_nvm.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_nvm.h b/freebsd/sys/dev/e1000/e1000_nvm.h
index 34077b24..64a4083e 100644
--- a/freebsd/sys/dev/e1000/e1000_nvm.h
+++ b/freebsd/sys/dev/e1000/e1000_nvm.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2013, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -35,12 +35,10 @@
#ifndef _E1000_NVM_H_
#define _E1000_NVM_H_
-#if !defined(NO_READ_PBA_RAW) || !defined(NO_WRITE_PBA_RAW)
struct e1000_pba {
u16 word[2];
u16 *pba_block;
};
-#endif
void e1000_init_nvm_ops_generic(struct e1000_hw *hw);
diff --git a/freebsd/sys/dev/e1000/e1000_osdep.c b/freebsd/sys/dev/e1000/e1000_osdep.c
index 7eef489c..45aae330 100644
--- a/freebsd/sys/dev/e1000/e1000_osdep.c
+++ b/freebsd/sys/dev/e1000/e1000_osdep.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2010, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_osdep.h b/freebsd/sys/dev/e1000/e1000_osdep.h
index b58a85b4..4ef3ce76 100644
--- a/freebsd/sys/dev/e1000/e1000_osdep.h
+++ b/freebsd/sys/dev/e1000/e1000_osdep.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -60,24 +60,24 @@
#define ASSERT(x) if(!(x)) panic("EM: x")
#define usec_delay(x) DELAY(x)
-#define usec_delay_irq(x) DELAY(x)
+#define usec_delay_irq(x) usec_delay(x)
#define msec_delay(x) DELAY(1000*(x))
#define msec_delay_irq(x) DELAY(1000*(x))
-#define DEBUGFUNC(F) DEBUGOUT(F);
-#define DEBUGOUT(S) do {} while (0)
-#define DEBUGOUT1(S,A) do {} while (0)
-#define DEBUGOUT2(S,A,B) do {} while (0)
-#define DEBUGOUT3(S,A,B,C) do {} while (0)
-#define DEBUGOUT7(S,A,B,C,D,E,F,G) do {} while (0)
+/* Enable/disable debugging statements in shared code */
+#define DBG 0
+
+#define DEBUGOUT(...) \
+ do { if (DBG) printf(__VA_ARGS__); } while (0)
+#define DEBUGOUT1(...) DEBUGOUT(__VA_ARGS__)
+#define DEBUGOUT2(...) DEBUGOUT(__VA_ARGS__)
+#define DEBUGOUT3(...) DEBUGOUT(__VA_ARGS__)
+#define DEBUGOUT7(...) DEBUGOUT(__VA_ARGS__)
+#define DEBUGFUNC(F) DEBUGOUT(F "\n")
#define STATIC static
#define FALSE 0
#define TRUE 1
-#ifndef __bool_true_false_are_defined
-#define false FALSE
-#define true TRUE
-#endif
#define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */
#define PCI_COMMAND_REGISTER PCIR_COMMAND
@@ -99,9 +99,6 @@ typedef int64_t s64;
typedef int32_t s32;
typedef int16_t s16;
typedef int8_t s8;
-#ifndef __bool_true_false_are_defined
-typedef boolean_t bool;
-#endif
#define __le16 u16
#define __le32 u32
@@ -137,7 +134,7 @@ struct e1000_osdep
bus_space_handle_t io_bus_space_handle;
bus_space_tag_t flash_bus_space_tag;
bus_space_handle_t flash_bus_space_handle;
- struct device *dev;
+ device_t dev;
};
#define E1000_REGISTER(hw, reg) (((hw)->mac.type >= e1000_82543) \
diff --git a/freebsd/sys/dev/e1000/e1000_phy.c b/freebsd/sys/dev/e1000/e1000_phy.c
index cb92973b..87753e19 100644
--- a/freebsd/sys/dev/e1000/e1000_phy.c
+++ b/freebsd/sys/dev/e1000/e1000_phy.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -1829,9 +1829,9 @@ s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw)
phy_data);
if (ret_val)
return ret_val;
- }
- DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data);
+ DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data);
+ }
ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
if (ret_val)
@@ -3124,7 +3124,7 @@ s32 e1000_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
/* Page 800 works differently than the rest so it has its own func */
if (page == BM_WUC_PAGE) {
ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
- FALSE, FALSE);
+ FALSE, false);
goto release;
}
@@ -3288,7 +3288,7 @@ s32 e1000_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data)
/* Page 800 works differently than the rest so it has its own func */
if (page == BM_WUC_PAGE) {
ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
- FALSE, FALSE);
+ FALSE, false);
goto release;
}
@@ -3431,12 +3431,11 @@ static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset,
u16 *data, bool read, bool page_set)
{
s32 ret_val;
- u16 reg, page;
+ u16 reg = BM_PHY_REG_NUM(offset);
+ u16 page = BM_PHY_REG_PAGE(offset);
u16 phy_reg = 0;
DEBUGFUNC("e1000_access_phy_wakeup_reg_bm");
- reg = BM_PHY_REG_NUM(offset);
- page = BM_PHY_REG_PAGE(offset);
/* Gig must be disabled for MDIO accesses to Host Wakeup reg page */
if ((hw->mac.type == e1000_pchlan) &&
@@ -3546,7 +3545,6 @@ static s32 __e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data,
if (ret_val)
return ret_val;
}
-
/* Page 800 works differently than the rest so it has its own func */
if (page == BM_WUC_PAGE) {
ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
@@ -3600,7 +3598,7 @@ out:
**/
s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data)
{
- return __e1000_read_phy_reg_hv(hw, offset, data, FALSE, FALSE);
+ return __e1000_read_phy_reg_hv(hw, offset, data, FALSE, false);
}
/**
@@ -3656,7 +3654,6 @@ static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data,
if (ret_val)
return ret_val;
}
-
/* Page 800 works differently than the rest so it has its own func */
if (page == BM_WUC_PAGE) {
ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
@@ -3726,7 +3723,7 @@ out:
**/
s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data)
{
- return __e1000_write_phy_reg_hv(hw, offset, data, FALSE, FALSE);
+ return __e1000_write_phy_reg_hv(hw, offset, data, FALSE, false);
}
/**
@@ -4153,10 +4150,10 @@ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
/* Disable access to mPHY if it was originally disabled */
if (locked)
ready = e1000_is_mphy_ready(hw);
- if (!ready)
- return -E1000_ERR_PHY;
- E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
- E1000_MPHY_DIS_ACCESS);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
return E1000_SUCCESS;
}
@@ -4218,10 +4215,10 @@ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
/* Disable access to mPHY if it was originally disabled */
if (locked)
ready = e1000_is_mphy_ready(hw);
- if (!ready)
- return -E1000_ERR_PHY;
- E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
- E1000_MPHY_DIS_ACCESS);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
return E1000_SUCCESS;
}
diff --git a/freebsd/sys/dev/e1000/e1000_phy.h b/freebsd/sys/dev/e1000/e1000_phy.h
index 0e5b2e6a..d3d563f7 100644
--- a/freebsd/sys/dev/e1000/e1000_phy.h
+++ b/freebsd/sys/dev/e1000/e1000_phy.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_regs.h b/freebsd/sys/dev/e1000/e1000_regs.h
index 5c2e3f78..37d70172 100644
--- a/freebsd/sys/dev/e1000/e1000_regs.h
+++ b/freebsd/sys/dev/e1000/e1000_regs.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -65,6 +65,9 @@
#define E1000_FEXTNVM4 0x00024 /* Future Extended NVM 4 - RW */
#define E1000_FEXTNVM6 0x00010 /* Future Extended NVM 6 - RW */
#define E1000_FEXTNVM7 0x000E4 /* Future Extended NVM 7 - RW */
+#define E1000_FEXTNVM9 0x5BB4 /* Future Extended NVM 9 - RW */
+#define E1000_FEXTNVM11 0x5BBC /* Future Extended NVM 11 - RW */
+#define E1000_PCIEANACFG 0x00F18 /* PCIE Analog Config */
#define E1000_FCT 0x00030 /* Flow Control Type - RW */
#define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */
#define E1000_VET 0x00038 /* VLAN Ether Type - RW */
@@ -107,7 +110,9 @@
#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */
#define E1000_PBS 0x01008 /* Packet Buffer Size */
#define E1000_PBECCSTS 0x0100C /* Packet Buffer ECC Status - RW */
+#define E1000_IOSFPC 0x00F28 /* TX corrupted data */
#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */
+#define E1000_EEMNGCTL_I210 0x01010 /* i210 MNG EEprom Mode Control */
#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */
#define E1000_EEARBC_I210 0x12024 /* EEPROM Auto Read Bus Control */
#define E1000_FLASHT 0x01028 /* FLASH Timer Register */
@@ -202,7 +207,7 @@
/* Queues fetch arbitration priority control register */
#define E1000_I210_TQAVARBCTRL 0x3574
/* Queues priority masks where _n and _p can be 0-3. */
-#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p) ((_p) << (2 * _n))
+#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p) ((_p) << (2 * (_n)))
/* QAV Tx mode control registers where _n can be 0 or 1. */
#define E1000_I210_TQAVCC(_n) (0x3004 + 0x40 * (_n))
@@ -215,7 +220,7 @@
#define E1000_PQGPTC(_n) (0x010014 + (0x100 * (_n)))
/* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */
-#define E1000_I210_TXPBS_SIZE(_n, _s) ((_s) << (6 * _n))
+#define E1000_I210_TXPBS_SIZE(_n, _s) ((_s) << (6 * (_n)))
#define E1000_MMDAC 13 /* MMD Access Control */
#define E1000_MMDAAD 14 /* MMD Access Address/Data */
@@ -552,7 +557,7 @@
#define E1000_WVBR 0x03554 /* VM Wrong Behavior - RWS */
#define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */
#define E1000_UTA 0x0A000 /* Unicast Table Array - RW */
-#define E1000_IOVTCL 0x05BBC /* IOV Control Register */
+#define E1000_IOVCTL 0x05BBC /* IOV Control Register */
#define E1000_VMRCTL 0X05D80 /* Virtual Mirror Rule Control */
#define E1000_VMRVLAN 0x05D90 /* Virtual Mirror Rule VLAN */
#define E1000_VMRVM 0x05DA0 /* Virtual Mirror Rule VM */
@@ -588,6 +593,10 @@
#define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */
#define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */
#define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */
+#define E1000_SYSSTMPL 0x0B648 /* HH Timesync system stamp low register */
+#define E1000_SYSSTMPH 0x0B64C /* HH Timesync system stamp hi register */
+#define E1000_PLTSTMPL 0x0B640 /* HH Timesync platform stamp low register */
+#define E1000_PLTSTMPH 0x0B644 /* HH Timesync platform stamp hi register */
#define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */
#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */
#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */
diff --git a/freebsd/sys/dev/e1000/e1000_vf.c b/freebsd/sys/dev/e1000/e1000_vf.c
index 17fd7cb0..0e46641e 100644
--- a/freebsd/sys/dev/e1000/e1000_vf.c
+++ b/freebsd/sys/dev/e1000/e1000_vf.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/e1000_vf.h b/freebsd/sys/dev/e1000/e1000_vf.h
index 2a780741..e6f834e7 100644
--- a/freebsd/sys/dev/e1000/e1000_vf.h
+++ b/freebsd/sys/dev/e1000/e1000_vf.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c
index 05d7fbc2..fa34dd62 100644
--- a/freebsd/sys/dev/e1000/if_em.c
+++ b/freebsd/sys/dev/e1000/if_em.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -34,6 +34,8 @@
******************************************************************************/
/*$FreeBSD$*/
+#include <rtems/bsd/local/opt_em.h>
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -43,6 +45,10 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#ifdef DDB
+#include <sys/types.h>
+#include <ddb/ddb.h>
+#endif
#if __FreeBSD_version >= 800000
#include <sys/buf_ring.h>
#endif
@@ -54,6 +60,7 @@
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rman.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -65,6 +72,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -90,14 +98,9 @@
#include "if_em.h"
/*********************************************************************
- * Set this to one to display debug statistics
- *********************************************************************/
-int em_display_debug_stats = 0;
-
-/*********************************************************************
* Driver version:
*********************************************************************/
-char em_driver_version[] = "7.4.2";
+char em_driver_version[] = "7.6.1-k";
/*********************************************************************
* PCI Device ID Table
@@ -185,6 +188,19 @@ static em_vendor_info_t em_vendor_info_array[] =
{ 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
/* required last entry */
{ 0, 0, 0, 0, 0}
};
@@ -207,20 +223,21 @@ static int em_shutdown(device_t);
static int em_suspend(device_t);
static int em_resume(device_t);
#ifdef EM_MULTIQUEUE
-static int em_mq_start(struct ifnet *, struct mbuf *);
-static int em_mq_start_locked(struct ifnet *,
- struct tx_ring *, struct mbuf *);
-static void em_qflush(struct ifnet *);
+static int em_mq_start(if_t, struct mbuf *);
+static int em_mq_start_locked(if_t,
+ struct tx_ring *);
+static void em_qflush(if_t);
#else
-static void em_start(struct ifnet *);
-static void em_start_locked(struct ifnet *, struct tx_ring *);
+static void em_start(if_t);
+static void em_start_locked(if_t, struct tx_ring *);
#endif
-static int em_ioctl(struct ifnet *, u_long, caddr_t);
+static int em_ioctl(if_t, u_long, caddr_t);
+static uint64_t em_get_counter(if_t, ift_counter);
static void em_init(void *);
static void em_init_locked(struct adapter *);
static void em_stop(void *);
-static void em_media_status(struct ifnet *, struct ifmediareq *);
-static int em_media_change(struct ifnet *);
+static void em_media_status(if_t, struct ifmediareq *);
+static int em_media_change(if_t);
static void em_identify_hardware(struct adapter *);
static int em_allocate_pci_resources(struct adapter *);
static int em_allocate_legacy(struct adapter *);
@@ -231,6 +248,7 @@ static void em_free_pci_resources(struct adapter *);
static void em_local_timer(void *);
static void em_reset(struct adapter *);
static int em_setup_interface(device_t, struct adapter *);
+static void em_flush_desc_rings(struct adapter *);
static void em_setup_transmit_structures(struct adapter *);
static void em_initialize_transmit_unit(struct adapter *);
@@ -253,7 +271,9 @@ static bool em_rxeof(struct rx_ring *, int, int *);
#ifndef __NO_STRICT_ALIGNMENT
static int em_fixup_rx(struct rx_ring *);
#endif
-static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
+static void em_setup_rxdesc(union e1000_rx_desc_extended *,
+ const struct em_rxbuffer *rxbuf);
+static void em_receive_checksum(uint32_t status, struct mbuf *);
static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
struct ip *, u32 *, u32 *);
static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
@@ -263,8 +283,8 @@ static void em_disable_promisc(struct adapter *);
static void em_set_multi(struct adapter *);
static void em_update_link_status(struct adapter *);
static void em_refresh_mbufs(struct rx_ring *, int);
-static void em_register_vlan(void *, struct ifnet *, u16);
-static void em_unregister_vlan(void *, struct ifnet *, u16);
+static void em_register_vlan(void *, if_t, u16);
+static void em_unregister_vlan(void *, if_t, u16);
static void em_setup_vlan_hw_support(struct adapter *);
static int em_xmit(struct tx_ring *, struct mbuf **);
static int em_dma_malloc(struct adapter *, bus_size_t,
@@ -299,6 +319,10 @@ static void em_handle_tx(void *context, int pending);
static void em_handle_rx(void *context, int pending);
static void em_handle_link(void *context, int pending);
+#ifdef EM_MULTIQUEUE
+static void em_enable_vectors_82574(struct adapter *);
+#endif
+
static void em_set_sysctl_value(struct adapter *, const char *,
const char *, int *, int);
static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
@@ -333,6 +357,9 @@ devclass_t em_devclass;
DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
MODULE_DEPEND(em, pci, 1, 1, 1);
MODULE_DEPEND(em, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(em, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.
@@ -350,12 +377,16 @@ MODULE_DEPEND(em, ether, 1, 1, 1);
#define CSUM_TSO 0
#endif
+#define TSO_WORKAROUND 4
+
static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
+static int em_disable_crc_stripping = 0;
+SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
+ &em_disable_crc_stripping, 0, "Disable CRC Stripping");
+
static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
-TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
-TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
0, "Default transmit interrupt delay in usecs");
SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
@@ -363,8 +394,6 @@ SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
-TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
-TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
&em_tx_abs_int_delay_dflt, 0,
"Default transmit interrupt delay limit in usecs");
@@ -374,32 +403,39 @@ SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
static int em_rxd = EM_DEFAULT_RXD;
static int em_txd = EM_DEFAULT_TXD;
-TUNABLE_INT("hw.em.rxd", &em_rxd);
-TUNABLE_INT("hw.em.txd", &em_txd);
SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
"Number of receive descriptors per queue");
SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
"Number of transmit descriptors per queue");
static int em_smart_pwr_down = FALSE;
-TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
0, "Set to true to leave smart power down enabled on newer adapters");
/* Controls whether promiscuous also shows bad packets */
static int em_debug_sbp = FALSE;
-TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
"Show bad packets in promiscuous mode");
static int em_enable_msix = TRUE;
-TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
"Enable MSI-X interrupts");
+#ifdef EM_MULTIQUEUE
+static int em_num_queues = 1;
+SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
+ "82574 only: Number of queues to configure, 0 indicates autoconfigure");
+#endif
+
+/*
+** Global variable to store last used CPU when binding queues
+** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
+** queue is bound to a cpu.
+*/
+static int em_last_bind_cpu = -1;
+
/* How many packets rxeof tries to clean at a time */
static int em_rx_process_limit = 100;
-TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
&em_rx_process_limit, 0,
"Maximum number of received packets to process "
@@ -407,7 +443,6 @@ SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
/* Energy efficient ethernet - default to OFF */
static int eee_setting = 1;
-TUNABLE_INT("hw.em.eee_setting", &eee_setting);
SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
"Enable Energy Efficient Ethernet");
@@ -431,10 +466,10 @@ static int
em_probe(device_t dev)
{
char adapter_name[60];
- u16 pci_vendor_id = 0;
- u16 pci_device_id = 0;
- u16 pci_subvendor_id = 0;
- u16 pci_subdevice_id = 0;
+ uint16_t pci_vendor_id = 0;
+ uint16_t pci_device_id = 0;
+ uint16_t pci_subvendor_id = 0;
+ uint16_t pci_subdevice_id = 0;
em_vendor_info_t *ent;
INIT_DEBUGOUT("em_probe: begin");
@@ -553,14 +588,34 @@ em_attach(device_t dev)
adapter->osdep.flash_bus_space_handle =
rman_get_bushandle(adapter->flash);
}
+ /*
+ ** In the new SPT device flash is not a
+ ** separate BAR, rather it is also in BAR0,
+ ** so use the same tag and an offset handle for the
+ ** FLASH read/write macros in the shared code.
+ */
+ else if (hw->mac.type == e1000_pch_spt) {
+ adapter->osdep.flash_bus_space_tag =
+ adapter->osdep.mem_bus_space_tag;
+ adapter->osdep.flash_bus_space_handle =
+ adapter->osdep.mem_bus_space_handle
+ + E1000_FLASH_BASE_ADDR;
+ }
/* Do Shared Code initialization */
- if (e1000_setup_init_funcs(hw, TRUE)) {
- device_printf(dev, "Setup of Shared code failed\n");
+ error = e1000_setup_init_funcs(hw, TRUE);
+ if (error) {
+ device_printf(dev, "Setup of Shared code failed, error %d\n",
+ error);
error = ENXIO;
goto err_pci;
}
+ /*
+ * Setup MSI/X or MSI if PCI Express
+ */
+ adapter->msix = em_setup_msix(adapter);
+
e1000_get_bus_info(hw);
/* Set up some sysctls for the tunable interrupt delays */
@@ -604,7 +659,7 @@ em_attach(device_t dev)
} else
adapter->num_tx_desc = em_txd;
- if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
+ if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
(em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
device_printf(dev, "Using %d RX descriptors instead of %d!\n",
EM_DEFAULT_RXD, em_rxd);
@@ -746,8 +801,7 @@ em_attach(device_t dev)
em_get_hw_control(adapter);
/* Tell the stack that the interface is not active */
- adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
adapter->led_dev = led_create(em_led_func, adapter,
device_get_nameunit(dev));
@@ -763,7 +817,7 @@ err_late:
em_free_transmit_structures(adapter);
em_free_receive_structures(adapter);
em_release_hw_control(adapter);
- if (adapter->ifp != NULL)
+ if (adapter->ifp != (void *)NULL)
if_free(adapter->ifp);
err_pci:
em_free_pci_resources(adapter);
@@ -787,18 +841,18 @@ static int
em_detach(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
INIT_DEBUGOUT("em_detach: begin");
/* Make sure VLANS are not using driver */
- if (adapter->ifp->if_vlantrunk != NULL) {
+ if (if_vlantrunkinuse(ifp)) {
device_printf(dev,"Vlan in use, detach first\n");
return (EBUSY);
}
#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(ifp) & IFCAP_POLLING)
ether_poll_deregister(ifp);
#endif
@@ -878,7 +932,7 @@ em_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct tx_ring *txr = adapter->tx_rings;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_CORE_LOCK(adapter);
if (adapter->hw.mac.type == e1000_pch2lan)
@@ -886,15 +940,15 @@ em_resume(device_t dev)
em_init_locked(adapter);
em_init_manageability(adapter);
- if ((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
+ if ((if_getflags(ifp) & IFF_UP) &&
+ (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
for (int i = 0; i < adapter->num_queues; i++, txr++) {
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -906,7 +960,70 @@ em_resume(device_t dev)
}
-#ifdef EM_MULTIQUEUE
+#ifndef EM_MULTIQUEUE
+static void
+em_start_locked(if_t ifp, struct tx_ring *txr)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct mbuf *m_head;
+
+ EM_TX_LOCK_ASSERT(txr);
+
+ if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
+ IFF_DRV_RUNNING)
+ return;
+
+ if (!adapter->link_active)
+ return;
+
+ while (!if_sendq_empty(ifp)) {
+ /* Call cleanup if number of TX descriptors low */
+ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
+ em_txeof(txr);
+ if (txr->tx_avail < EM_MAX_SCATTER) {
+ if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
+ break;
+ }
+ m_head = if_dequeue(ifp);
+ if (m_head == NULL)
+ break;
+ /*
+ * Encapsulation can modify our pointer, and or make it
+ * NULL on failure. In that event, we can't requeue.
+ */
+ if (em_xmit(txr, &m_head)) {
+ if (m_head == NULL)
+ break;
+ if_sendq_prepend(ifp, m_head);
+ break;
+ }
+
+ /* Mark the queue as having work */
+ if (txr->busy == EM_TX_IDLE)
+ txr->busy = EM_TX_BUSY;
+
+ /* Send a copy of the frame to the BPF listener */
+ ETHER_BPF_MTAP(ifp, m_head);
+
+ }
+
+ return;
+}
+
+static void
+em_start(if_t ifp)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct tx_ring *txr = adapter->tx_rings;
+
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+ EM_TX_LOCK(txr);
+ em_start_locked(ifp, txr);
+ EM_TX_UNLOCK(txr);
+ }
+ return;
+}
+#else /* EM_MULTIQUEUE */
/*********************************************************************
* Multiqueue Transmit routines
*
@@ -915,85 +1032,95 @@ em_resume(device_t dev)
* than do an immediate send. It is this that is an advantage
* in this driver, rather than also having multiple tx queues.
**********************************************************************/
+/*
+** Multiqueue capable stack interface
+*/
+static int
+em_mq_start(if_t ifp, struct mbuf *m)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct tx_ring *txr = adapter->tx_rings;
+ unsigned int i, error;
+
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ i = m->m_pkthdr.flowid % adapter->num_queues;
+ else
+ i = curcpu % adapter->num_queues;
+
+ txr = &adapter->tx_rings[i];
+
+ error = drbr_enqueue(ifp, txr->br, m);
+ if (error)
+ return (error);
+
+ if (EM_TX_TRYLOCK(txr)) {
+ em_mq_start_locked(ifp, txr);
+ EM_TX_UNLOCK(txr);
+ } else
+ taskqueue_enqueue(txr->tq, &txr->tx_task);
+
+ return (0);
+}
+
static int
-em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
+em_mq_start_locked(if_t ifp, struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct mbuf *next;
int err = 0, enq = 0;
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING || adapter->link_active == 0) {
- if (m != NULL)
- err = drbr_enqueue(ifp, txr->br, m);
- return (err);
- }
+ EM_TX_LOCK_ASSERT(txr);
- enq = 0;
- if (m != NULL) {
- err = drbr_enqueue(ifp, txr->br, m);
- if (err)
- return (err);
- }
+ if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
+ adapter->link_active == 0) {
+ return (ENETDOWN);
+ }
/* Process the queue */
while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = em_xmit(txr, &next)) != 0) {
- if (next == NULL)
+ if (next == NULL) {
+ /* It was freed, move forward */
drbr_advance(ifp, txr->br);
- else
+ } else {
+ /*
+ * Still have one left, it may not be
+ * the same since the transmit function
+ * may have changed it.
+ */
drbr_putback(ifp, txr->br, next);
+ }
break;
}
drbr_advance(ifp, txr->br);
enq++;
- ifp->if_obytes += next->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
if (next->m_flags & M_MCAST)
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
ETHER_BPF_MTAP(ifp, next);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
break;
}
- if (enq > 0) {
- /* Set the watchdog */
- txr->queue_status = EM_QUEUE_WORKING;
- txr->watchdog_time = ticks;
- }
+ /* Mark the queue as having work */
+ if ((enq > 0) && (txr->busy == EM_TX_IDLE))
+ txr->busy = EM_TX_BUSY;
if (txr->tx_avail < EM_MAX_SCATTER)
em_txeof(txr);
- if (txr->tx_avail < EM_MAX_SCATTER)
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if (txr->tx_avail < EM_MAX_SCATTER) {
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
+ }
return (err);
}
/*
-** Multiqueue capable stack interface
-*/
-static int
-em_mq_start(struct ifnet *ifp, struct mbuf *m)
-{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = adapter->tx_rings;
- int error;
-
- if (EM_TX_TRYLOCK(txr)) {
- error = em_mq_start_locked(ifp, txr, m);
- EM_TX_UNLOCK(txr);
- } else
- error = drbr_enqueue(ifp, txr->br, m);
-
- return (error);
-}
-
-/*
** Flush all ring buffers
*/
static void
-em_qflush(struct ifnet *ifp)
+em_qflush(if_t ifp)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct tx_ring *txr = adapter->tx_rings;
struct mbuf *m;
@@ -1005,69 +1132,6 @@ em_qflush(struct ifnet *ifp)
}
if_qflush(ifp);
}
-#else /* !EM_MULTIQUEUE */
-
-static void
-em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
-{
- struct adapter *adapter = ifp->if_softc;
- struct mbuf *m_head;
-
- EM_TX_LOCK_ASSERT(txr);
-
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING)
- return;
-
- if (!adapter->link_active)
- return;
-
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- /* Call cleanup if number of TX descriptors low */
- if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
- em_txeof(txr);
- if (txr->tx_avail < EM_MAX_SCATTER) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- break;
- }
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
- if (m_head == NULL)
- break;
- /*
- * Encapsulation can modify our pointer, and or make it
- * NULL on failure. In that event, we can't requeue.
- */
- if (em_xmit(txr, &m_head)) {
- if (m_head == NULL)
- break;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- break;
- }
-
- /* Send a copy of the frame to the BPF listener */
- ETHER_BPF_MTAP(ifp, m_head);
-
- /* Set timeout in case hardware has problems transmitting. */
- txr->watchdog_time = ticks;
- txr->queue_status = EM_QUEUE_WORKING;
- }
-
- return;
-}
-
-static void
-em_start(struct ifnet *ifp)
-{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = adapter->tx_rings;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- EM_TX_LOCK(txr);
- em_start_locked(ifp, txr);
- EM_TX_UNLOCK(txr);
- }
- return;
-}
#endif /* EM_MULTIQUEUE */
/*********************************************************************
@@ -1080,9 +1144,9 @@ em_start(struct ifnet *ifp)
**********************************************************************/
static int
-em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+em_ioctl(if_t ifp, u_long command, caddr_t data)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
#if defined(INET) || defined(INET6)
struct ifaddr *ifa = (struct ifaddr *)data;
@@ -1108,11 +1172,11 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
** so we avoid doing it when possible.
*/
if (avoid_reset) {
- ifp->if_flags |= IFF_UP;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if_setflagbits(ifp,IFF_UP,0);
+ if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
em_init(adapter);
#ifdef INET
- if (!(ifp->if_flags & IFF_NOARP))
+ if (!(if_getflags(ifp) & IFF_NOARP))
arp_ifinit(ifp, ifa);
#endif
} else
@@ -1132,6 +1196,7 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
case e1000_ich10lan:
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
case e1000_82574:
case e1000_82583:
case e1000_80003es2lan: /* 9K Jumbo Frame size */
@@ -1154,10 +1219,11 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
break;
}
- ifp->if_mtu = ifr->ifr_mtu;
+ if_setmtu(ifp, ifr->ifr_mtu);
adapter->hw.mac.max_frame_size =
- ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
- em_init_locked(adapter);
+ if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
+ em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
break;
}
@@ -1165,9 +1231,9 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
IOCTL_DEBUGOUT("ioctl rcv'd:\
SIOCSIFFLAGS (Set Interface Flags)");
EM_CORE_LOCK(adapter);
- if (ifp->if_flags & IFF_UP) {
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- if ((ifp->if_flags ^ adapter->if_flags) &
+ if (if_getflags(ifp) & IFF_UP) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+ if ((if_getflags(ifp) ^ adapter->if_flags) &
(IFF_PROMISC | IFF_ALLMULTI)) {
em_disable_promisc(adapter);
em_set_promisc(adapter);
@@ -1175,20 +1241,20 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
} else
em_init_locked(adapter);
} else
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
em_stop(adapter);
- adapter->if_flags = ifp->if_flags;
+ adapter->if_flags = if_getflags(ifp);
EM_CORE_UNLOCK(adapter);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
em_set_multi(adapter);
#ifdef DEVICE_POLLING
- if (!(ifp->if_capenable & IFCAP_POLLING))
+ if (!(if_getcapenable(ifp) & IFCAP_POLLING))
#endif
em_enable_intr(adapter);
EM_CORE_UNLOCK(adapter);
@@ -1216,7 +1282,7 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
reinit = 0;
- mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
#ifdef DEVICE_POLLING
if (mask & IFCAP_POLLING) {
if (ifr->ifr_reqcap & IFCAP_POLLING) {
@@ -1225,48 +1291,48 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
return (error);
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
- ifp->if_capenable |= IFCAP_POLLING;
+ if_setcapenablebit(ifp, IFCAP_POLLING, 0);
EM_CORE_UNLOCK(adapter);
} else {
error = ether_poll_deregister(ifp);
/* Enable interrupt even in error case */
EM_CORE_LOCK(adapter);
em_enable_intr(adapter);
- ifp->if_capenable &= ~IFCAP_POLLING;
+ if_setcapenablebit(ifp, 0, IFCAP_POLLING);
EM_CORE_UNLOCK(adapter);
}
}
#endif
if (mask & IFCAP_HWCSUM) {
- ifp->if_capenable ^= IFCAP_HWCSUM;
+ if_togglecapenable(ifp,IFCAP_HWCSUM);
reinit = 1;
}
if (mask & IFCAP_TSO4) {
- ifp->if_capenable ^= IFCAP_TSO4;
+ if_togglecapenable(ifp,IFCAP_TSO4);
reinit = 1;
}
if (mask & IFCAP_VLAN_HWTAGGING) {
- ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+ if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
reinit = 1;
}
if (mask & IFCAP_VLAN_HWFILTER) {
- ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
+ if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
reinit = 1;
}
if (mask & IFCAP_VLAN_HWTSO) {
- ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
+ if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
reinit = 1;
}
if ((mask & IFCAP_WOL) &&
- (ifp->if_capabilities & IFCAP_WOL) != 0) {
+ (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
if (mask & IFCAP_WOL_MCAST)
- ifp->if_capenable ^= IFCAP_WOL_MCAST;
+ if_togglecapenable(ifp, IFCAP_WOL_MCAST);
if (mask & IFCAP_WOL_MAGIC)
- ifp->if_capenable ^= IFCAP_WOL_MAGIC;
+ if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
}
- if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
em_init(adapter);
- VLAN_CAPABILITIES(ifp);
+ if_vlancap(ifp);
break;
}
@@ -1293,7 +1359,7 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
static void
em_init_locked(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
device_t dev = adapter->dev;
INIT_DEBUGOUT("em_init: begin");
@@ -1304,7 +1370,7 @@ em_init_locked(struct adapter *adapter)
callout_stop(&adapter->timer);
/* Get the latest mac address, User can use a LAA */
- bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
+ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
ETHER_ADDR_LEN);
/* Put the address into the Receive Address Array */
@@ -1330,11 +1396,18 @@ em_init_locked(struct adapter *adapter)
E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
/* Set hardware offload abilities */
- ifp->if_hwassist = 0;
- if (ifp->if_capenable & IFCAP_TXCSUM)
- ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
- if (ifp->if_capenable & IFCAP_TSO4)
- ifp->if_hwassist |= CSUM_TSO;
+ if_clearhwassist(ifp);
+ if (if_getcapenable(ifp) & IFCAP_TXCSUM)
+ if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
+ /*
+ ** There have proven to be problems with TSO when not
+ ** at full gigabit speed, so disable the assist automatically
+ ** when at lower speeds. -jfv
+ */
+ if (if_getcapenable(ifp) & IFCAP_TSO4) {
+ if (adapter->link_speed == SPEED_1000)
+ if_sethwassistbits(ifp, CSUM_TSO, 0);
+ }
/* Configure for OS presence */
em_init_manageability(adapter);
@@ -1366,8 +1439,8 @@ em_init_locked(struct adapter *adapter)
em_initialize_receive_unit(adapter);
/* Use real VLAN Filter support? */
- if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
/* Use real VLAN Filter support */
em_setup_vlan_hw_support(adapter);
else {
@@ -1382,8 +1455,7 @@ em_init_locked(struct adapter *adapter)
em_set_promisc(adapter);
/* Set the interface as ACTIVE */
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
e1000_clear_hw_cntrs_base_generic(&adapter->hw);
@@ -1403,7 +1475,7 @@ em_init_locked(struct adapter *adapter)
* Only enable interrupts if we are not polling, make sure
* they are off otherwise.
*/
- if (ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(ifp) & IFCAP_POLLING)
em_disable_intr(adapter);
else
#endif /* DEVICE_POLLING */
@@ -1432,16 +1504,16 @@ em_init(void *arg)
*
*********************************************************************/
static int
-em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
+em_poll(if_t ifp, enum poll_cmd cmd, int count)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
u32 reg_icr;
int rx_done;
EM_CORE_LOCK(adapter);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
EM_CORE_UNLOCK(adapter);
return (0);
}
@@ -1464,9 +1536,9 @@ em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -1485,7 +1557,7 @@ static int
em_irq_fast(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp;
+ if_t ifp;
u32 reg_icr;
ifp = adapter->ifp;
@@ -1527,20 +1599,20 @@ static void
em_handle_que(void *context, int pending)
{
struct adapter *adapter = context;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
+
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -1565,18 +1637,19 @@ em_msix_tx(void *arg)
{
struct tx_ring *txr = arg;
struct adapter *adapter = txr->adapter;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
++txr->tx_irq;
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
+
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
EM_TX_UNLOCK(txr);
@@ -1597,14 +1670,15 @@ em_msix_rx(void *arg)
bool more;
++rxr->rx_irq;
- if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
return;
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
return;
}
@@ -1622,12 +1696,25 @@ em_msix_link(void *arg)
++adapter->link_irq;
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
+ if (reg_icr & E1000_ICR_RXO)
+ adapter->rx_overruns++;
+
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
adapter->hw.mac.get_link_status = 1;
em_handle_link(adapter, 0);
} else
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
EM_MSIX_LINK | E1000_IMS_LSC);
+ /*
+ ** Because we must read the ICR for this interrupt
+ ** it may clear other causes using autoclear, for
+ ** this reason we simply create a soft interrupt
+ ** for all these vectors.
+ */
+ if (reg_icr) {
+ E1000_WRITE_REG(&adapter->hw,
+ E1000_ICS, adapter->ims);
+ }
return;
}
@@ -1641,9 +1728,10 @@ em_handle_rx(void *context, int pending)
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
}
static void
@@ -1651,15 +1739,15 @@ em_handle_tx(void *context, int pending)
{
struct tx_ring *txr = context;
struct adapter *adapter = txr->adapter;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
@@ -1671,9 +1759,9 @@ em_handle_link(void *context, int pending)
{
struct adapter *adapter = context;
struct tx_ring *txr = adapter->tx_rings;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
return;
EM_CORE_LOCK(adapter);
@@ -1687,9 +1775,9 @@ em_handle_link(void *context, int pending)
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -1708,9 +1796,9 @@ em_handle_link(void *context, int pending)
*
**********************************************************************/
static void
-em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+em_media_status(if_t ifp, struct ifmediareq *ifmr)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u_char fiber_type = IFM_1000_SX;
INIT_DEBUGOUT("em_media_status: begin");
@@ -1760,9 +1848,9 @@ em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
*
**********************************************************************/
static int
-em_media_change(struct ifnet *ifp)
+em_media_change(if_t ifp)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct ifmedia *ifm = &adapter->media;
INIT_DEBUGOUT("em_media_change: begin");
@@ -1821,20 +1909,21 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
struct adapter *adapter = txr->adapter;
bus_dma_segment_t segs[EM_MAX_SCATTER];
bus_dmamap_t map;
- struct em_buffer *tx_buffer, *tx_buffer_mapped;
+ struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
struct e1000_tx_desc *ctxd = NULL;
struct mbuf *m_head;
struct ether_header *eh;
struct ip *ip = NULL;
struct tcphdr *tp = NULL;
- u32 txd_upper, txd_lower, txd_used, txd_saved;
+ u32 txd_upper = 0, txd_lower = 0;
int ip_off, poff;
int nsegs, i, j, first, last = 0;
- int error, do_tso, tso_desc = 0, remap = 1;
+ int error;
+ bool do_tso, tso_desc, remap = TRUE;
m_head = *m_headp;
- txd_upper = txd_lower = txd_used = txd_saved = 0;
- do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
+ do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
+ tso_desc = FALSE;
ip_off = poff = 0;
/*
@@ -1849,7 +1938,7 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
* so we firstly get a writable mbuf chain then coalesce ethernet/
* IP/TCP header into a single buffer to meet the requirement of
* controller. This also simplifies IP/TCP/UDP checksum offloading
- * which also has similiar restrictions.
+ * which also has similar restrictions.
*/
if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
if (do_tso || (m_head->m_next != NULL &&
@@ -1870,74 +1959,82 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
* for IPv6 yet.
*/
ip_off = sizeof(struct ether_header);
- m_head = m_pullup(m_head, ip_off);
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (m_head->m_len < ip_off) {
+ m_head = m_pullup(m_head, ip_off);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
eh = mtod(m_head, struct ether_header *);
if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
ip_off = sizeof(struct ether_vlan_header);
- m_head = m_pullup(m_head, ip_off);
+ if (m_head->m_len < ip_off) {
+ m_head = m_pullup(m_head, ip_off);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ }
+ }
+ if (m_head->m_len < ip_off + sizeof(struct ip)) {
+ m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
if (m_head == NULL) {
*m_headp = NULL;
return (ENOBUFS);
}
}
- m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
- }
ip = (struct ip *)(mtod(m_head, char *) + ip_off);
poff = ip_off + (ip->ip_hl << 2);
- if (do_tso) {
- m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+
+ if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
+ if (m_head->m_len < poff + sizeof(struct tcphdr)) {
+ m_head = m_pullup(m_head, poff +
+ sizeof(struct tcphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
/*
* TSO workaround:
* pull 4 more bytes of data into it.
*/
- m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (m_head->m_len < poff + (tp->th_off << 2)) {
+ m_head = m_pullup(m_head, poff +
+ (tp->th_off << 2) +
+ TSO_WORKAROUND);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
ip = (struct ip *)(mtod(m_head, char *) + ip_off);
- ip->ip_len = 0;
- ip->ip_sum = 0;
- /*
- * The pseudo TCP checksum does not include TCP payload
- * length so driver should recompute the checksum here
- * what hardware expect to see. This is adherence of
- * Microsoft's Large Send specification.
- */
tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
- tp->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr, htons(IPPROTO_TCP));
- } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
- m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
- }
- tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
- m_head = m_pullup(m_head, poff + (tp->th_off << 2));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (do_tso) {
+ ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
+ (ip->ip_hl << 2) +
+ (tp->th_off << 2));
+ ip->ip_sum = 0;
+ /*
+ * The pseudo TCP checksum does not include TCP
+ * payload length so driver should recompute
+ * the checksum here what hardware expect to
+ * see. This is adherence of Microsoft's Large
+ * Send specification.
+ */
+ tp->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(IPPROTO_TCP));
}
- ip = (struct ip *)(mtod(m_head, char *) + ip_off);
- tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
- m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (m_head->m_len < poff + sizeof(struct udphdr)) {
+ m_head = m_pullup(m_head, poff +
+ sizeof(struct udphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
ip = (struct ip *)(mtod(m_head, char *) + ip_off);
}
@@ -1973,9 +2070,9 @@ retry:
if (error == EFBIG && remap) {
struct mbuf *m;
- m = m_defrag(*m_headp, M_NOWAIT);
+ m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
if (m == NULL) {
- adapter->mbuf_alloc_failed++;
+ adapter->mbuf_defrag_failed++;
m_freem(*m_headp);
*m_headp = NULL;
return (ENOBUFS);
@@ -1983,11 +2080,8 @@ retry:
*m_headp = m;
/* Try it again, but only once */
- remap = 0;
+ remap = FALSE;
goto retry;
- } else if (error == ENOMEM) {
- adapter->no_tx_dma_setup++;
- return (error);
} else if (error != 0) {
adapter->no_tx_dma_setup++;
m_freem(*m_headp);
@@ -2001,13 +2095,13 @@ retry:
* it follows a TSO burst, then we need to add a
* sentinel descriptor to prevent premature writeback.
*/
- if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
+ if ((!do_tso) && (txr->tx_tso == TRUE)) {
if (nsegs == 1)
tso_desc = TRUE;
txr->tx_tso = FALSE;
}
- if (nsegs > (txr->tx_avail - 2)) {
+ if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
txr->no_desc_avail++;
bus_dmamap_unload(txr->txtag, map);
return (ENOBUFS);
@@ -2026,8 +2120,7 @@ retry:
if (m_head->m_flags & M_VLANTAG) {
/* Set the vlan id. */
- txd_upper |=
- (htole16(m_head->m_pkthdr.ether_vtag) << 16);
+ txd_upper |= htole16(if_getvtag(m_head)) << 16;
/* Tell hardware to add tag */
txd_lower |= htole32(E1000_TXD_CMD_VLE);
}
@@ -2048,23 +2141,23 @@ retry:
** If this is the last descriptor, we want to
** split it so we have a small final sentinel
*/
- if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
- seg_len -= 4;
+ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
+ seg_len -= TSO_WORKAROUND;
ctxd->buffer_addr = htole64(seg_addr);
ctxd->lower.data = htole32(
- adapter->txd_cmd | txd_lower | seg_len);
- ctxd->upper.data =
- htole32(txd_upper);
+ adapter->txd_cmd | txd_lower | seg_len);
+ ctxd->upper.data = htole32(txd_upper);
if (++i == adapter->num_tx_desc)
i = 0;
+
/* Now make the sentinel */
- ++txd_used; /* using an extra txd */
+ txr->tx_avail--;
ctxd = &txr->tx_base[i];
tx_buffer = &txr->tx_buffers[i];
ctxd->buffer_addr =
htole64(seg_addr + seg_len);
ctxd->lower.data = htole32(
- adapter->txd_cmd | txd_lower | 4);
+ adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
ctxd->upper.data =
htole32(txd_upper);
last = i;
@@ -2074,8 +2167,7 @@ retry:
ctxd->buffer_addr = htole64(seg_addr);
ctxd->lower.data = htole32(
adapter->txd_cmd | txd_lower | seg_len);
- ctxd->upper.data =
- htole32(txd_upper);
+ ctxd->upper.data = htole32(txd_upper);
last = i;
if (++i == adapter->num_tx_desc)
i = 0;
@@ -2086,8 +2178,6 @@ retry:
txr->next_avail_desc = i;
txr->tx_avail -= nsegs;
- if (tso_desc) /* TSO used an extra for sentinel */
- txr->tx_avail -= txd_used;
tx_buffer->m_head = m_head;
/*
@@ -2113,8 +2203,6 @@ retry:
*/
tx_buffer = &txr->tx_buffers[first];
tx_buffer->next_eop = last;
- /* Update the watchdog time early and often */
- txr->watchdog_time = ticks;
/*
* Advance the Transmit Descriptor Tail (TDT), this tells the E1000
@@ -2130,18 +2218,18 @@ retry:
static void
em_set_promisc(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 reg_rctl;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
- if (ifp->if_flags & IFF_PROMISC) {
+ if (if_getflags(ifp) & IFF_PROMISC) {
reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
/* Turn this on if you want to see bad packets */
if (em_debug_sbp)
reg_rctl |= E1000_RCTL_SBP;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
- } else if (ifp->if_flags & IFF_ALLMULTI) {
+ } else if (if_getflags(ifp) & IFF_ALLMULTI) {
reg_rctl |= E1000_RCTL_MPE;
reg_rctl &= ~E1000_RCTL_UPE;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
@@ -2151,34 +2239,16 @@ em_set_promisc(struct adapter *adapter)
static void
em_disable_promisc(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 reg_rctl;
int mcnt = 0;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl &= (~E1000_RCTL_UPE);
- if (ifp->if_flags & IFF_ALLMULTI)
+ if (if_getflags(ifp) & IFF_ALLMULTI)
mcnt = MAX_NUM_MULTICAST_ADDRESSES;
- else {
- struct ifmultiaddr *ifma;
-#if __FreeBSD_version < 800000
- IF_ADDR_LOCK(ifp);
-#else
- if_maddr_rlock(ifp);
-#endif
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
- if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
- break;
- mcnt++;
- }
-#if __FreeBSD_version < 800000
- IF_ADDR_UNLOCK(ifp);
-#else
- if_maddr_runlock(ifp);
-#endif
- }
+ else
+ mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
/* Don't disable if in MAX groups */
if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
reg_rctl &= (~E1000_RCTL_MPE);
@@ -2197,8 +2267,7 @@ em_disable_promisc(struct adapter *adapter)
static void
em_set_multi(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
- struct ifmultiaddr *ifma;
+ if_t ifp = adapter->ifp;
u32 reg_rctl = 0;
u8 *mta; /* Multicast array memory */
int mcnt = 0;
@@ -2218,27 +2287,8 @@ em_set_multi(struct adapter *adapter)
msec_delay(5);
}
-#if __FreeBSD_version < 800000
- IF_ADDR_LOCK(ifp);
-#else
- if_maddr_rlock(ifp);
-#endif
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
-
- if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
- break;
+ if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
- mcnt++;
- }
-#if __FreeBSD_version < 800000
- IF_ADDR_UNLOCK(ifp);
-#else
- if_maddr_runlock(ifp);
-#endif
if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl |= E1000_RCTL_MPE;
@@ -2269,10 +2319,10 @@ static void
em_local_timer(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
- u32 trigger;
+ u32 trigger = 0;
EM_CORE_LOCK_ASSERT(adapter);
@@ -2285,9 +2335,11 @@ em_local_timer(void *arg)
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
/* Mask to use in the irq trigger */
- if (adapter->msix_mem)
- trigger = rxr->ims;
- else
+ if (adapter->msix_mem) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++)
+ trigger |= rxr->ims;
+ rxr = adapter->rx_rings;
+ } else
trigger = E1000_ICS_RXDMT0;
/*
@@ -2296,15 +2348,15 @@ em_local_timer(void *arg)
** and the HUNG state will be static if set.
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
- if ((txr->queue_status == EM_QUEUE_HUNG) &&
- (adapter->pause_frames == 0))
+ if (txr->busy == EM_TX_HUNG)
goto hung;
+ if (txr->busy >= EM_TX_MAXTRIES)
+ txr->busy = EM_TX_HUNG;
/* Schedule a TX tasklet if needed */
if (txr->tx_avail <= EM_MAX_SCATTER)
taskqueue_enqueue(txr->tq, &txr->tx_task);
}
- adapter->pause_frames = 0;
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
#ifndef DEVICE_POLLING
/* Trigger an RX interrupt to guarantee mbuf refresh */
@@ -2313,17 +2365,11 @@ em_local_timer(void *arg)
return;
hung:
/* Looks like we're hung */
- device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
- device_printf(adapter->dev,
- "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
- E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
- device_printf(adapter->dev,"TX(%d) desc avail = %d,"
- "Next TX to Clean = %d\n",
- txr->me, txr->tx_avail, txr->next_to_clean);
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
+ txr->me);
+ em_print_debug_info(adapter);
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
adapter->watchdog_events++;
- adapter->pause_frames = 0;
em_init_locked(adapter);
}
@@ -2332,7 +2378,7 @@ static void
em_update_link_status(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
device_t dev = adapter->dev;
struct tx_ring *txr = adapter->tx_rings;
u32 link_check = 0;
@@ -2341,6 +2387,8 @@ em_update_link_status(struct adapter *adapter)
switch (hw->phy.media_type) {
case e1000_media_type_copper:
if (hw->mac.get_link_status) {
+ if (hw->mac.type == e1000_pch_spt)
+ msec_delay(50);
/* Do the work to read phy */
e1000_check_for_link(hw);
link_check = !hw->mac.get_link_status;
@@ -2373,7 +2421,7 @@ em_update_link_status(struct adapter *adapter)
(hw->mac.type == e1000_82572))) {
int tarc0;
tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
- tarc0 &= ~SPEED_MODE_BIT;
+ tarc0 &= ~TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
}
if (bootverbose)
@@ -2383,17 +2431,18 @@ em_update_link_status(struct adapter *adapter)
"Full Duplex" : "Half Duplex"));
adapter->link_active = 1;
adapter->smartspeed = 0;
- ifp->if_baudrate = adapter->link_speed * 1000000;
+ if_setbaudrate(ifp, adapter->link_speed * 1000000);
if_link_state_change(ifp, LINK_STATE_UP);
} else if (!link_check && (adapter->link_active == 1)) {
- ifp->if_baudrate = adapter->link_speed = 0;
+ if_setbaudrate(ifp, 0);
+ adapter->link_speed = 0;
adapter->link_duplex = 0;
if (bootverbose)
device_printf(dev, "Link is Down\n");
adapter->link_active = 0;
- /* Link down, disable watchdog */
+ /* Link down, disable hang detection */
for (int i = 0; i < adapter->num_queues; i++, txr++)
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
if_link_state_change(ifp, LINK_STATE_DOWN);
}
}
@@ -2411,7 +2460,7 @@ static void
em_stop(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
EM_CORE_LOCK_ASSERT(adapter);
@@ -2422,16 +2471,19 @@ em_stop(void *arg)
callout_stop(&adapter->timer);
/* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
- /* Unarm watchdog timer. */
+ /* Disarm Hang Detection. */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
EM_TX_LOCK(txr);
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
EM_TX_UNLOCK(txr);
}
+ /* I219 needs some special flushing to avoid hangs */
+ if (adapter->hw.mac.type == e1000_pch_spt)
+ em_flush_desc_rings(adapter);
+
e1000_reset_hw(&adapter->hw);
E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
@@ -2489,14 +2541,6 @@ em_allocate_pci_resources(struct adapter *adapter)
rman_get_bushandle(adapter->memory);
adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
- /* Default to a single queue */
- adapter->num_queues = 1;
-
- /*
- * Setup MSI/X or MSI if PCI Express
- */
- adapter->msix = em_setup_msix(adapter);
-
adapter->hw.back = &adapter->osdep;
return (0);
@@ -2560,7 +2604,7 @@ em_allocate_legacy(struct adapter *adapter)
*
* Setup the MSIX Interrupt handlers
* This is not really Multiqueue, rather
- * its just seperate interrupt vectors
+ * its just separate interrupt vectors
* for TX, RX, and Link.
*
**********************************************************************/
@@ -2571,13 +2615,14 @@ em_allocate_msix(struct adapter *adapter)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int error, rid, vector = 0;
+ int cpu_id = 0;
/* Make sure all interrupts are disabled */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
/* First set up ring resources */
- for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
/* RX ring */
rid = vector + 1;
@@ -2597,14 +2642,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
+ bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
#endif
- rxr->msix = vector++; /* NOTE increment vector for TX */
+ rxr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, rxr->res, cpu_id);
+
TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
taskqueue_thread_enqueue, &rxr->tq);
- taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 20 and 21
@@ -2612,8 +2663,13 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
rxr->ims = 1 << (20 + i);
+ adapter->ims |= rxr->ims;
adapter->ivars |= (8 | rxr->msix) << (i * 4);
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
+ }
+
+ for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
/* TX ring */
rid = vector + 1;
txr->res = bus_alloc_resource_any(dev,
@@ -2631,14 +2687,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
+ bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
#endif
- txr->msix = vector++; /* Increment vector for next pass */
+ txr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, txr->res, cpu_id);
+
TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
taskqueue_thread_enqueue, &txr->tq);
- taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 22 and 23
@@ -2646,13 +2708,16 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
txr->ims = 1 << (22 + i);
+ adapter->ims |= txr->ims;
adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
+
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
}
/* Link interrupt */
- ++rid;
+ rid = vector + 1;
adapter->res = bus_alloc_resource_any(dev,
- SYS_RES_IRQ, &rid, RF_ACTIVE);
+ SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (!adapter->res) {
device_printf(dev,"Unable to allocate "
"bus resource: Link interrupt [%d]\n", rid);
@@ -2668,7 +2733,7 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, adapter->res, adapter->tag, "link");
+ bus_describe_intr(dev, adapter->res, adapter->tag, "link");
#endif
adapter->linkvec = vector;
adapter->ivars |= (8 | vector) << 16;
@@ -2692,9 +2757,8 @@ em_free_pci_resources(struct adapter *adapter)
*/
for (int i = 0; i < adapter->num_queues; i++) {
txr = &adapter->tx_rings[i];
- rxr = &adapter->rx_rings[i];
/* an early abort? */
- if ((txr == NULL) || (rxr == NULL))
+ if (txr == NULL)
break;
rid = txr->msix +1;
if (txr->tag != NULL) {
@@ -2704,6 +2768,11 @@ em_free_pci_resources(struct adapter *adapter)
if (txr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ,
rid, txr->res);
+
+ rxr = &adapter->rx_rings[i];
+ /* an early abort? */
+ if (rxr == NULL)
+ break;
rid = rxr->msix +1;
if (rxr->tag != NULL) {
bus_teardown_intr(dev, rxr->res, rxr->tag);
@@ -2753,14 +2822,19 @@ em_setup_msix(struct adapter *adapter)
device_t dev = adapter->dev;
int val;
+ /* Nearly always going to use one queue */
+ adapter->num_queues = 1;
+
/*
- ** Setup MSI/X for Hartwell: tests have shown
- ** use of two queues to be unstable, and to
- ** provide no great gain anyway, so we simply
- ** seperate the interrupts and use a single queue.
+ ** Try using MSI-X for Hartwell adapters
*/
if ((adapter->hw.mac.type == e1000_82574) &&
(em_enable_msix == TRUE)) {
+#ifdef EM_MULTIQUEUE
+ adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
+ if (adapter->num_queues > 1)
+ em_enable_vectors_82574(adapter);
+#endif
/* Map the MSIX BAR */
int rid = PCIR_BAR(EM_MSIX_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
@@ -2772,16 +2846,34 @@ em_setup_msix(struct adapter *adapter)
goto msi;
}
val = pci_msix_count(dev);
- /* We only need/want 3 vectors */
- if (val >= 3)
- val = 3;
- else {
- device_printf(adapter->dev,
- "MSIX: insufficient vectors, using MSI\n");
- goto msi;
+
+#ifdef EM_MULTIQUEUE
+ /* We need 5 vectors in the multiqueue case */
+ if (adapter->num_queues > 1 ) {
+ if (val >= 5)
+ val = 5;
+ else {
+ adapter->num_queues = 1;
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors for >1 queue, "
+ "using single queue...\n");
+ goto msix_one;
+ }
+ } else {
+msix_one:
+#endif
+ if (val >= 3)
+ val = 3;
+ else {
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors, using MSI\n");
+ goto msi;
+ }
+#ifdef EM_MULTIQUEUE
}
+#endif
- if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
+ if ((pci_alloc_msix(dev, &val) == 0)) {
device_printf(adapter->dev,
"Using MSIX interrupts "
"with %d vectors\n", val);
@@ -2802,7 +2894,7 @@ msi:
}
val = 1;
if (pci_alloc_msi(dev, &val) == 0) {
- device_printf(adapter->dev,"Using an MSI interrupt\n");
+ device_printf(adapter->dev, "Using an MSI interrupt\n");
return (val);
}
/* Should only happen due to manual configuration */
@@ -2811,6 +2903,116 @@ msi:
}
+/*
+** The 3 following flush routines are used as a workaround in the
+** I219 client parts and only for them.
+**
+** em_flush_tx_ring - remove all descriptors from the tx_ring
+**
+** We want to clear all pending descriptors from the TX ring.
+** zeroing happens when the HW reads the regs. We assign the ring itself as
+** the data of the next descriptor. We don't care about the data we are about
+** to reset the HW.
+*/
+static void
+em_flush_tx_ring(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct tx_ring *txr = adapter->tx_rings;
+ struct e1000_tx_desc *txd;
+ u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
+ u16 size = 512;
+
+ tctl = E1000_READ_REG(hw, E1000_TCTL);
+ E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
+
+ txd = &txr->tx_base[txr->next_avail_desc++];
+ if (txr->next_avail_desc == adapter->num_tx_desc)
+ txr->next_avail_desc = 0;
+
+ /* Just use the ring as a dummy buffer addr */
+ txd->buffer_addr = txr->txdma.dma_paddr;
+ txd->lower.data = htole32(txd_lower | size);
+ txd->upper.data = 0;
+
+ /* flush descriptors to memory before notifying the HW */
+ wmb();
+
+ E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
+ mb();
+ usec_delay(250);
+}
+
+/*
+** em_flush_rx_ring - remove all descriptors from the rx_ring
+**
+** Mark all descriptors in the RX ring as consumed and disable the rx ring
+*/
+static void
+em_flush_rx_ring(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 rctl, rxdctl;
+
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(150);
+
+ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
+ /* zero the lower 14 bits (prefetch and host thresholds) */
+ rxdctl &= 0xffffc000;
+ /*
+ * update thresholds: prefetch threshold to 31, host threshold to 1
+ * and make sure the granularity is "descriptors" and not "cache lines"
+ */
+ rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+ E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
+
+ /* momentarily enable the RX ring for the changes to take effect */
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(150);
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/*
+** em_flush_desc_rings - remove all descriptors from the descriptor rings
+**
+** In i219, the descriptor rings must be emptied before resetting the HW
+** or before changing the device state to D3 during runtime (runtime PM).
+**
+** Failure to do this will cause the HW to enter a unit hang state which can
+** only be released by PCI reset on the device
+**
+*/
+static void
+em_flush_desc_rings(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ device_t dev = adapter->dev;
+ u16 hang_state;
+ u32 fext_nvm11, tdlen;
+
+ /* First, disable MULR fix in FEXTNVM11 */
+ fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
+ fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
+ E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
+
+ /* do nothing if we're not in faulty state, or if the queue is empty */
+ tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
+ hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
+ if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
+ return;
+ em_flush_tx_ring(adapter);
+
+ /* recheck, maybe the fault is caused by the rx ring */
+ hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
+ if (hang_state & FLUSH_DESC_REQUIRED)
+ em_flush_rx_ring(adapter);
+}
+
+
/*********************************************************************
*
* Initialize the hardware to a configuration
@@ -2821,7 +3023,7 @@ static void
em_reset(struct adapter *adapter)
{
device_t dev = adapter->dev;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct e1000_hw *hw = &adapter->hw;
u16 rx_buffer_size;
u32 pba;
@@ -2872,6 +3074,7 @@ em_reset(struct adapter *adapter)
case e1000_pchlan:
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
pba = E1000_PBA_26K;
break;
default:
@@ -2889,7 +3092,7 @@ em_reset(struct adapter *adapter)
* received after sending an XOFF.
* - Low water mark works best when it is very near the high water mark.
* This allows the receiver to restart by sending XON when it has
- * drained a bit. Here we use an arbitary value of 1500 which will
+ * drained a bit. Here we use an arbitrary value of 1500 which will
* restart after one full frame is pulled from the buffer. There
* could be several smaller frames in the buffer and if so they will
* not trigger the XON until their total number reduces the buffer
@@ -2919,7 +3122,7 @@ em_reset(struct adapter *adapter)
/* Workaround: no TX flow ctrl for PCH */
hw->fc.requested_mode = e1000_fc_rx_pause;
hw->fc.pause_time = 0xFFFF; /* override */
- if (ifp->if_mtu > ETHERMTU) {
+ if (if_getmtu(ifp) > ETHERMTU) {
hw->fc.high_water = 0x3500;
hw->fc.low_water = 0x1500;
} else {
@@ -2930,19 +3133,20 @@ em_reset(struct adapter *adapter)
break;
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
hw->fc.high_water = 0x5C20;
hw->fc.low_water = 0x5048;
hw->fc.pause_time = 0x0650;
hw->fc.refresh_time = 0x0400;
/* Jumbos need adjusted PBA */
- if (ifp->if_mtu > ETHERMTU)
+ if (if_getmtu(ifp) > ETHERMTU)
E1000_WRITE_REG(hw, E1000_PBA, 12);
else
E1000_WRITE_REG(hw, E1000_PBA, 26);
break;
case e1000_ich9lan:
case e1000_ich10lan:
- if (ifp->if_mtu > ETHERMTU) {
+ if (if_getmtu(ifp) > ETHERMTU) {
hw->fc.high_water = 0x2800;
hw->fc.low_water = hw->fc.high_water - 8;
break;
@@ -2954,6 +3158,10 @@ em_reset(struct adapter *adapter)
break;
}
+ /* I219 needs some special flushing to avoid hangs */
+ if (hw->mac.type == e1000_pch_spt)
+ em_flush_desc_rings(adapter);
+
/* Issue a global reset */
e1000_reset_hw(hw);
E1000_WRITE_REG(hw, E1000_WUC, 0);
@@ -2978,47 +3186,55 @@ em_reset(struct adapter *adapter)
static int
em_setup_interface(device_t dev, struct adapter *adapter)
{
- struct ifnet *ifp;
+ if_t ifp;
INIT_DEBUGOUT("em_setup_interface: begin");
- ifp = adapter->ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
+ ifp = adapter->ifp = if_gethandle(IFT_ETHER);
+ if (ifp == 0) {
device_printf(dev, "can not allocate ifnet structure\n");
return (-1);
}
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_init = em_init;
- ifp->if_softc = adapter;
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_ioctl = em_ioctl;
+ if_setdev(ifp, dev);
+ if_setinitfn(ifp, em_init);
+ if_setsoftc(ifp, adapter);
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setioctlfn(ifp, em_ioctl);
+ if_setgetcounterfn(ifp, em_get_counter);
+
+ /* TSO parameters */
+ ifp->if_hw_tsomax = IP_MAXPACKET;
+ /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
+ ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
+ ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
+
#ifdef EM_MULTIQUEUE
/* Multiqueue stack interface */
- ifp->if_transmit = em_mq_start;
- ifp->if_qflush = em_qflush;
+ if_settransmitfn(ifp, em_mq_start);
+ if_setqflushfn(ifp, em_qflush);
#else
- ifp->if_start = em_start;
- IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
- ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
- IFQ_SET_READY(&ifp->if_snd);
+ if_setstartfn(ifp, em_start);
+ if_setsendqlen(ifp, adapter->num_tx_desc - 1);
+ if_setsendqready(ifp);
#endif
ether_ifattach(ifp, adapter->hw.mac.addr);
- ifp->if_capabilities = ifp->if_capenable = 0;
+ if_setcapabilities(ifp, 0);
+ if_setcapenable(ifp, 0);
- ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
- ifp->if_capabilities |= IFCAP_TSO4;
+ if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
+ IFCAP_TSO4, 0);
/*
* Tell the upper layer(s) we
* support full VLAN capability
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
- ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
- | IFCAP_VLAN_HWTSO
- | IFCAP_VLAN_MTU;
- ifp->if_capenable = ifp->if_capabilities;
+ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
+ IFCAP_VLAN_MTU, 0);
+ if_setcapenable(ifp, if_getcapabilities(ifp));
/*
** Don't turn this on by default, if vlans are
@@ -3028,16 +3244,16 @@ em_setup_interface(device_t dev, struct adapter *adapter)
** using vlans directly on the em driver you can
** enable this and get full hardware tag filtering.
*/
- ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
#ifdef DEVICE_POLLING
- ifp->if_capabilities |= IFCAP_POLLING;
+ if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
#endif
/* Enable only WOL MAGIC by default */
if (adapter->wol) {
- ifp->if_capabilities |= IFCAP_WOL;
- ifp->if_capenable |= IFCAP_WOL_MAGIC;
+ if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
+ if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
}
/*
@@ -3137,7 +3353,6 @@ fail_2:
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
bus_dma_tag_destroy(dma->dma_tag);
fail_0:
- dma->dma_map = NULL;
dma->dma_tag = NULL;
return (error);
@@ -3148,12 +3363,15 @@ em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
{
if (dma->dma_tag == NULL)
return;
- if (dma->dma_map != NULL) {
+ if (dma->dma_paddr != 0) {
bus_dmamap_sync(dma->dma_tag, dma->dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
+ dma->dma_paddr = 0;
+ }
+ if (dma->dma_vaddr != NULL) {
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
- dma->dma_map = NULL;
+ dma->dma_vaddr = NULL;
}
bus_dma_tag_destroy(dma->dma_tag);
dma->dma_tag = NULL;
@@ -3239,7 +3457,7 @@ em_allocate_queues(struct adapter *adapter)
* Next the RX queues...
*/
rsize = roundup2(adapter->num_rx_desc *
- sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
+ sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
rxr = &adapter->rx_rings[i];
rxr->adapter = adapter;
@@ -3257,7 +3475,7 @@ em_allocate_queues(struct adapter *adapter)
error = ENOMEM;
goto err_rx_desc;
}
- rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
+ rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
bzero((void *)rxr->rx_base, rsize);
/* Allocate receive buffers for the ring*/
@@ -3300,7 +3518,7 @@ em_allocate_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
device_t dev = adapter->dev;
- struct em_buffer *txbuf;
+ struct em_txbuffer *txbuf;
int error, i;
/*
@@ -3323,7 +3541,7 @@ em_allocate_transmit_buffers(struct tx_ring *txr)
}
if (!(txr->tx_buffers =
- (struct em_buffer *) malloc(sizeof(struct em_buffer) *
+ (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
error = ENOMEM;
@@ -3356,11 +3574,11 @@ static void
em_setup_transmit_ring(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
- struct em_buffer *txbuf;
+ struct em_txbuffer *txbuf;
int i;
#ifdef DEV_NETMAP
- struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_slot *slot;
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
#endif /* DEV_NETMAP */
/* Clear the old descriptor contents */
@@ -3391,10 +3609,10 @@ em_setup_transmit_ring(struct tx_ring *txr)
uint64_t paddr;
void *addr;
- addr = PNMB(slot + si, &paddr);
+ addr = PNMB(na, slot + si, &paddr);
txr->tx_base[i].buffer_addr = htole64(paddr);
/* reload the map for netmap mode */
- netmap_load_map(txr->txtag, txbuf->map, addr);
+ netmap_load_map(na, txr->txtag, txbuf->map, addr);
}
#endif /* DEV_NETMAP */
@@ -3404,7 +3622,7 @@ em_setup_transmit_ring(struct tx_ring *txr)
/* Set number of descriptors available */
txr->tx_avail = adapter->num_tx_desc;
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
/* Clear checksum offload context. */
txr->last_hw_offload = 0;
@@ -3444,7 +3662,7 @@ em_initialize_transmit_unit(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct e1000_hw *hw = &adapter->hw;
- u32 tctl, tarc, tipg = 0;
+ u32 tctl, txdctl = 0, tarc, tipg = 0;
INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
@@ -3465,7 +3683,16 @@ em_initialize_transmit_unit(struct adapter *adapter)
E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
+ txdctl = 0; /* clear txdctl */
+ txdctl |= 0x1f; /* PTHRESH */
+ txdctl |= 1 << 8; /* HTHRESH */
+ txdctl |= 1 << 16;/* WTHRESH */
+ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
+ txdctl |= E1000_TXDCTL_GRAN;
+ txdctl |= 1 << 25; /* LWTHRESH */
+
+ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
}
/* Set the default values for the Tx Inter Packet Gap timer */
@@ -3496,15 +3723,25 @@ em_initialize_transmit_unit(struct adapter *adapter)
if ((adapter->hw.mac.type == e1000_82571) ||
(adapter->hw.mac.type == e1000_82572)) {
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
- tarc |= SPEED_MODE_BIT;
+ tarc |= TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
} else if (adapter->hw.mac.type == e1000_80003es2lan) {
+ /* errata: program both queues to unweighted RR */
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else if (adapter->hw.mac.type == e1000_82574) {
+ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
+ tarc |= TARC_ERRATA_BIT;
+ if ( adapter->num_queues > 1) {
+ tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
}
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
@@ -3523,6 +3760,15 @@ em_initialize_transmit_unit(struct adapter *adapter)
/* This write will effectively turn on the transmit unit. */
E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
+ if (hw->mac.type == e1000_pch_spt) {
+ u32 reg;
+ reg = E1000_READ_REG(hw, E1000_IOSFPC);
+ reg |= E1000_RCTL_RDMTS_HEX;
+ E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
+ reg = E1000_READ_REG(hw, E1000_TARC(0));
+ reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
+ E1000_WRITE_REG(hw, E1000_TARC(0), reg);
+ }
}
@@ -3556,7 +3802,7 @@ static void
em_free_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
- struct em_buffer *txbuf;
+ struct em_txbuffer *txbuf;
INIT_DEBUGOUT("free_transmit_ring: begin");
@@ -3623,7 +3869,7 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
{
struct adapter *adapter = txr->adapter;
struct e1000_context_desc *TXD = NULL;
- struct em_buffer *tx_buffer;
+ struct em_txbuffer *tx_buffer;
int cur, hdr_len;
u32 cmd = 0;
u16 offload = 0;
@@ -3657,29 +3903,38 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
offload |= CSUM_TCP;
tucss = hdr_len;
tucso = hdr_len + offsetof(struct tcphdr, th_sum);
- /*
- * Setting up new checksum offload context for every frames
- * takes a lot of processing time for hardware. This also
- * reduces performance a lot for small sized frames so avoid
- * it if driver can use previously configured checksum
- * offload context.
- */
- if (txr->last_hw_offload == offload) {
- if (offload & CSUM_IP) {
- if (txr->last_hw_ipcss == ipcss &&
- txr->last_hw_ipcso == ipcso &&
- txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
- } else {
- if (txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
- }
- }
- txr->last_hw_offload = offload;
- txr->last_hw_tucss = tucss;
- txr->last_hw_tucso = tucso;
+ /*
+ * The 82574L can only remember the *last* context used
+ * regardless of queue that it was use for. We cannot reuse
+ * contexts on this hardware platform and must generate a new
+ * context every time. 82574L hardware spec, section 7.2.6,
+ * second note.
+ */
+ if (adapter->num_queues < 2) {
+ /*
+ * Setting up new checksum offload context for every
+ * frames takes a lot of processing time for hardware.
+ * This also reduces performance a lot for small sized
+ * frames so avoid it if driver can use previously
+ * configured checksum offload context.
+ */
+ if (txr->last_hw_offload == offload) {
+ if (offload & CSUM_IP) {
+ if (txr->last_hw_ipcss == ipcss &&
+ txr->last_hw_ipcso == ipcso &&
+ txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ } else {
+ if (txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ }
+ }
+ txr->last_hw_offload = offload;
+ txr->last_hw_tucss = tucss;
+ txr->last_hw_tucso = tucso;
+ }
/*
* Start offset for payload checksum calculation.
* End offset for payload checksum calculation.
@@ -3695,29 +3950,38 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
tucss = hdr_len;
tucso = hdr_len + offsetof(struct udphdr, uh_sum);
- /*
- * Setting up new checksum offload context for every frames
- * takes a lot of processing time for hardware. This also
- * reduces performance a lot for small sized frames so avoid
- * it if driver can use previously configured checksum
- * offload context.
- */
- if (txr->last_hw_offload == offload) {
- if (offload & CSUM_IP) {
- if (txr->last_hw_ipcss == ipcss &&
- txr->last_hw_ipcso == ipcso &&
- txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
- } else {
- if (txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
+ /*
+ * The 82574L can only remember the *last* context used
+ * regardless of queue that it was use for. We cannot reuse
+ * contexts on this hardware platform and must generate a new
+ * context every time. 82574L hardware spec, section 7.2.6,
+ * second note.
+ */
+ if (adapter->num_queues < 2) {
+ /*
+ * Setting up new checksum offload context for every
+ * frames takes a lot of processing time for hardware.
+ * This also reduces performance a lot for small sized
+ * frames so avoid it if driver can use previously
+ * configured checksum offload context.
+ */
+ if (txr->last_hw_offload == offload) {
+ if (offload & CSUM_IP) {
+ if (txr->last_hw_ipcss == ipcss &&
+ txr->last_hw_ipcso == ipcso &&
+ txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ } else {
+ if (txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ }
}
- }
- txr->last_hw_offload = offload;
- txr->last_hw_tucss = tucss;
- txr->last_hw_tucso = tucso;
+ txr->last_hw_offload = offload;
+ txr->last_hw_tucss = tucss;
+ txr->last_hw_tucso = tucso;
+ }
/*
* Start offset for header checksum calculation.
* End offset for header checksum calculation.
@@ -3760,7 +4024,7 @@ em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
{
struct adapter *adapter = txr->adapter;
struct e1000_context_desc *TXD;
- struct em_buffer *tx_buffer;
+ struct em_txbuffer *tx_buffer;
int cur, hdr_len;
/*
@@ -3838,9 +4102,9 @@ em_txeof(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
int first, last, done, processed;
- struct em_buffer *tx_buffer;
+ struct em_txbuffer *tx_buffer;
struct e1000_tx_desc *tx_desc, *eop_desc;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_TX_LOCK_ASSERT(txr);
#ifdef DEV_NETMAP
@@ -3848,9 +4112,9 @@ em_txeof(struct tx_ring *txr)
return;
#endif /* DEV_NETMAP */
- /* No work, make sure watchdog is off */
+ /* No work, make sure hang detection is disabled */
if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
return;
}
@@ -3893,7 +4157,6 @@ em_txeof(struct tx_ring *txr)
tx_buffer->m_head = NULL;
}
tx_buffer->next_eop = -1;
- txr->watchdog_time = ticks;
if (++first == adapter->num_tx_desc)
first = 0;
@@ -3901,7 +4164,7 @@ em_txeof(struct tx_ring *txr)
tx_buffer = &txr->tx_buffers[first];
tx_desc = &txr->tx_base[first];
}
- ++ifp->if_opackets;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/* See if we can continue to the next packet */
last = tx_buffer->next_eop;
if (last != -1) {
@@ -3918,14 +4181,16 @@ em_txeof(struct tx_ring *txr)
txr->next_to_clean = first;
/*
- ** Watchdog calculation, we know there's
- ** work outstanding or the first return
- ** would have been taken, so none processed
- ** for too long indicates a hang. local timer
- ** will examine this and do a reset if needed.
+ ** Hang detection: we know there's work outstanding
+ ** or the entry return would have been taken, so no
+ ** descriptor processed here indicates a potential hang.
+ ** The local timer will examine this and do a reset if needed.
*/
- if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
- txr->queue_status = EM_QUEUE_HUNG;
+ if (processed == 0) {
+ if (txr->busy != EM_TX_HUNG)
+ ++txr->busy;
+ } else /* At least one descriptor was cleaned */
+ txr->busy = EM_TX_BUSY; /* note this clears HUNG */
/*
* If we have a minimum free, clear IFF_DRV_OACTIVE
@@ -3934,16 +4199,15 @@ em_txeof(struct tx_ring *txr)
* TX lock which, with a single queue, guarantees
* sanity.
*/
- if (txr->tx_avail >= EM_MAX_SCATTER)
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if (txr->tx_avail >= EM_MAX_SCATTER) {
+ if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
+ }
- /* Disable watchdog if all clean */
- if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = EM_QUEUE_IDLE;
- }
+ /* Disable hang detection if all clean */
+ if (txr->tx_avail == adapter->num_tx_desc)
+ txr->busy = EM_TX_IDLE;
}
-
/*********************************************************************
*
* Refresh RX descriptor mbufs from system mbuf buffer pool.
@@ -3954,8 +4218,8 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit)
{
struct adapter *adapter = rxr->adapter;
struct mbuf *m;
- bus_dma_segment_t segs[1];
- struct em_buffer *rxbuf;
+ bus_dma_segment_t segs;
+ struct em_rxbuffer *rxbuf;
int i, j, error, nsegs;
bool cleaned = FALSE;
@@ -3990,7 +4254,7 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit)
/* Use bus_dma machinery to setup the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
- m, segs, &nsegs, BUS_DMA_NOWAIT);
+ m, &segs, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
printf("Refresh mbufs: hdr dmamap load"
" failure - %d\n", error);
@@ -3999,9 +4263,10 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit)
goto update;
}
rxbuf->m_head = m;
+ rxbuf->paddr = segs.ds_addr;
bus_dmamap_sync(rxr->rxtag,
rxbuf->map, BUS_DMASYNC_PREREAD);
- rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
+ em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
cleaned = TRUE;
i = j; /* Next is precalulated for us */
@@ -4036,10 +4301,10 @@ em_allocate_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
device_t dev = adapter->dev;
- struct em_buffer *rxbuf;
+ struct em_rxbuffer *rxbuf;
int error;
- rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
+ rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
if (rxr->rx_buffers == NULL) {
device_printf(dev, "Unable to allocate rx_buffer memory\n");
@@ -4092,22 +4357,22 @@ static int
em_setup_receive_ring(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
- struct em_buffer *rxbuf;
+ struct em_rxbuffer *rxbuf;
bus_dma_segment_t seg[1];
int rsize, nsegs, error = 0;
#ifdef DEV_NETMAP
- struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_slot *slot;
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
#endif
/* Clear the ring contents */
EM_RX_LOCK(rxr);
rsize = roundup2(adapter->num_rx_desc *
- sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
+ sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
#ifdef DEV_NETMAP
- slot = netmap_reset(na, NR_RX, 0, 0);
+ slot = netmap_reset(na, NR_RX, rxr->me, 0);
#endif
/*
@@ -4133,10 +4398,10 @@ em_setup_receive_ring(struct rx_ring *rxr)
uint64_t paddr;
void *addr;
- addr = PNMB(slot + si, &paddr);
- netmap_load_map(rxr->rxtag, rxbuf->map, addr);
- /* Update descriptor */
- rxr->rx_base[j].buffer_addr = htole64(paddr);
+ addr = PNMB(na, slot + si, &paddr);
+ netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
+ rxbuf->paddr = paddr;
+ em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
continue;
}
#endif /* DEV_NETMAP */
@@ -4162,8 +4427,8 @@ em_setup_receive_ring(struct rx_ring *rxr)
bus_dmamap_sync(rxr->rxtag,
rxbuf->map, BUS_DMASYNC_PREREAD);
- /* Update descriptor */
- rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
+ rxbuf->paddr = seg[0].ds_addr;
+ em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
}
rxr->next_to_check = 0;
rxr->next_to_refresh = 0;
@@ -4200,7 +4465,7 @@ fail:
for (int i = 0; i < q; ++i) {
rxr = &adapter->rx_rings[i];
for (int n = 0; n < adapter->num_rx_desc; n++) {
- struct em_buffer *rxbuf;
+ struct em_rxbuffer *rxbuf;
rxbuf = &rxr->rx_buffers[n];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
@@ -4247,7 +4512,7 @@ static void
em_free_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
- struct em_buffer *rxbuf = NULL;
+ struct em_rxbuffer *rxbuf = NULL;
INIT_DEBUGOUT("free_receive_buffers: begin");
@@ -4289,11 +4554,10 @@ em_free_receive_buffers(struct rx_ring *rxr)
static void
em_initialize_receive_unit(struct adapter *adapter)
{
- struct rx_ring *rxr = adapter->rx_rings;
- struct ifnet *ifp = adapter->ifp;
+ struct rx_ring *rxr = adapter->rx_rings;
+ if_t ifp = adapter->ifp;
struct e1000_hw *hw = &adapter->hw;
- u64 bus_addr;
- u32 rctl, rxcsum;
+ u32 rctl, rxcsum, rfctl;
INIT_DEBUGOUT("em_initialize_receive_units: begin");
@@ -4306,14 +4570,39 @@ em_initialize_receive_unit(struct adapter *adapter)
if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+ /* Setup the Receive Control Register */
+ rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
+ E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+ (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+ /* Do not store bad packets */
+ rctl &= ~E1000_RCTL_SBP;
+
+ /* Enable Long Packet receive */
+ if (if_getmtu(ifp) > ETHERMTU)
+ rctl |= E1000_RCTL_LPE;
+ else
+ rctl &= ~E1000_RCTL_LPE;
+
+ /* Strip the CRC */
+ if (!em_disable_crc_stripping)
+ rctl |= E1000_RCTL_SECRC;
+
E1000_WRITE_REG(&adapter->hw, E1000_RADV,
adapter->rx_abs_int_delay.value);
+
+ E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
+ adapter->rx_int_delay.value);
/*
* Set the interrupt throttling rate. Value is calculated
* as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
*/
E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
+ /* Use extended rx descriptor formats */
+ rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+ rfctl |= E1000_RFCTL_EXTEN;
/*
** When using MSIX interrupts we need to throttle
** using the EITR register (82574 only)
@@ -4323,16 +4612,65 @@ em_initialize_receive_unit(struct adapter *adapter)
E1000_WRITE_REG(hw, E1000_EITR_82574(i),
DEFAULT_ITR);
/* Disable accelerated acknowledge */
- E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
+ rfctl |= E1000_RFCTL_ACK_DIS;
}
+ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
- if (ifp->if_capenable & IFCAP_RXCSUM)
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
+#ifdef EM_MULTIQUEUE
+ rxcsum |= E1000_RXCSUM_TUOFL |
+ E1000_RXCSUM_IPOFL |
+ E1000_RXCSUM_PCSD;
+#else
rxcsum |= E1000_RXCSUM_TUOFL;
- else
+#endif
+ } else
rxcsum &= ~E1000_RXCSUM_TUOFL;
+
E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
+#ifdef EM_MULTIQUEUE
+#define RSSKEYLEN 10
+ if (adapter->num_queues > 1) {
+ uint8_t rss_key[4 * RSSKEYLEN];
+ uint32_t reta = 0;
+ int i;
+
+ /*
+ * Configure RSS key
+ */
+ arc4rand(rss_key, sizeof(rss_key), 0);
+ for (i = 0; i < RSSKEYLEN; ++i) {
+ uint32_t rssrk = 0;
+
+ rssrk = EM_RSSRK_VAL(rss_key, i);
+ E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
+ }
+
+ /*
+ * Configure RSS redirect table in following fashion:
+ * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
+ */
+ for (i = 0; i < sizeof(reta); ++i) {
+ uint32_t q;
+
+ q = (i % adapter->num_queues) << 7;
+ reta |= q << (8 * i);
+ }
+
+ for (i = 0; i < 32; ++i) {
+ E1000_WRITE_REG(hw, E1000_RETA(i), reta);
+ }
+
+ E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
+ E1000_MRQC_RSS_FIELD_IPV4_TCP |
+ E1000_MRQC_RSS_FIELD_IPV4 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
+ E1000_MRQC_RSS_FIELD_IPV6_EX |
+ E1000_MRQC_RSS_FIELD_IPV6);
+ }
+#endif
/*
** XXX TEMPORARY WORKAROUND: on some systems with 82573
** long latencies are observed, like Lenovo X60. This
@@ -4345,11 +4683,11 @@ em_initialize_receive_unit(struct adapter *adapter)
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
/* Setup the Base and Length of the Rx Descriptor Ring */
+ u64 bus_addr = rxr->rxdma.dma_paddr;
u32 rdt = adapter->num_rx_desc - 1; /* default */
- bus_addr = rxr->rxdma.dma_paddr;
E1000_WRITE_REG(hw, E1000_RDLEN(i),
- adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
+ adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
/* Setup the Head and Tail Descriptor Pointers */
@@ -4359,40 +4697,48 @@ em_initialize_receive_unit(struct adapter *adapter)
* an init() while a netmap client is active must
* preserve the rx buffers passed to userspace.
*/
- if (ifp->if_capenable & IFCAP_NETMAP)
- rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
+ if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
+ rdt -= nm_kr_rxspace(&na->rx_rings[i]);
+ }
#endif /* DEV_NETMAP */
E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
}
- /* Set PTHRESH for improved jumbo performance */
+ /*
+ * Set PTHRESH for improved jumbo performance
+ * According to 10.2.5.11 of Intel 82574 Datasheet,
+ * RXDCTL(1) is written whenever RXDCTL(0) is written.
+ * Only write to RXDCTL(1) if there is a need for different
+ * settings.
+ */
if (((adapter->hw.mac.type == e1000_ich9lan) ||
(adapter->hw.mac.type == e1000_pch2lan) ||
(adapter->hw.mac.type == e1000_ich10lan)) &&
- (ifp->if_mtu > ETHERMTU)) {
+ (if_getmtu(ifp) > ETHERMTU)) {
u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
+ } else if (adapter->hw.mac.type == e1000_82574) {
+ for (int i = 0; i < adapter->num_queues; i++) {
+ u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+
+ rxdctl |= 0x20; /* PTHRESH */
+ rxdctl |= 4 << 8; /* HTHRESH */
+ rxdctl |= 4 << 16;/* WTHRESH */
+ rxdctl |= 1 << 24; /* Switch to granularity */
+ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+ }
}
if (adapter->hw.mac.type >= e1000_pch2lan) {
- if (ifp->if_mtu > ETHERMTU)
+ if (if_getmtu(ifp) > ETHERMTU)
e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
else
e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
}
- /* Setup the Receive Control Register */
- rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
- rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
- E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
- (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
-
- /* Strip the CRC */
- rctl |= E1000_RCTL_SECRC;
-
/* Make sure VLAN Filters are off */
rctl &= ~E1000_RCTL_VFE;
- rctl &= ~E1000_RCTL_SBP;
if (adapter->rx_mbuf_sz == MCLBYTES)
rctl |= E1000_RCTL_SZ_2048;
@@ -4401,11 +4747,8 @@ em_initialize_receive_unit(struct adapter *adapter)
else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
- if (ifp->if_mtu > ETHERMTU)
- rctl |= E1000_RCTL_LPE;
- else
- rctl &= ~E1000_RCTL_LPE;
-
+ /* ensure we clear use DTYPE of 00 here */
+ rctl &= ~0x00000C00;
/* Write out the settings */
E1000_WRITE_REG(hw, E1000_RCTL, rctl);
@@ -4428,16 +4771,21 @@ static bool
em_rxeof(struct rx_ring *rxr, int count, int *done)
{
struct adapter *adapter = rxr->adapter;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct mbuf *mp, *sendmp;
- u8 status = 0;
+ u32 status = 0;
u16 len;
int i, processed, rxdone = 0;
bool eop;
- struct e1000_rx_desc *cur;
+ union e1000_rx_desc_extended *cur;
EM_RX_LOCK(rxr);
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+
#ifdef DEV_NETMAP
if (netmap_rx_irq(ifp, rxr->me, &processed)) {
EM_RX_UNLOCK(rxr);
@@ -4446,24 +4794,20 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
#endif /* DEV_NETMAP */
for (i = rxr->next_to_check, processed = 0; count != 0;) {
-
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
break;
- bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
cur = &rxr->rx_base[i];
- status = cur->status;
+ status = le32toh(cur->wb.upper.status_error);
mp = sendmp = NULL;
if ((status & E1000_RXD_STAT_DD) == 0)
break;
- len = le16toh(cur->length);
+ len = le16toh(cur->wb.upper.length);
eop = (status & E1000_RXD_STAT_EOP) != 0;
- if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
+ if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
(rxr->discard == TRUE)) {
adapter->dropped_pkts++;
++rxr->rx_discarded;
@@ -4498,9 +4842,9 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
if (eop) {
--count;
sendmp = rxr->fmp;
- sendmp->m_pkthdr.rcvif = ifp;
- ifp->if_ipackets++;
- em_receive_checksum(cur, sendmp);
+ if_setrcvif(sendmp, ifp);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ em_receive_checksum(status, sendmp);
#ifndef __NO_STRICT_ALIGNMENT
if (adapter->hw.mac.max_frame_size >
(MCLBYTES - ETHER_ALIGN) &&
@@ -4508,8 +4852,8 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
goto skip;
#endif
if (status & E1000_RXD_STAT_VP) {
- sendmp->m_pkthdr.ether_vtag =
- le16toh(cur->special);
+ if_setvtag(sendmp,
+ le16toh(cur->wb.upper.vlan));
sendmp->m_flags |= M_VLANTAG;
}
#ifndef __NO_STRICT_ALIGNMENT
@@ -4518,8 +4862,12 @@ skip:
rxr->fmp = rxr->lmp = NULL;
}
next_desc:
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
/* Zero out the receive descriptors status. */
- cur->status = 0;
+ cur->wb.upper.status_error &= htole32(~0xFF);
++rxdone; /* cumulative for POLL */
++processed;
@@ -4531,7 +4879,7 @@ next_desc:
if (sendmp != NULL) {
rxr->next_to_check = i;
EM_RX_UNLOCK(rxr);
- (*ifp->if_input)(ifp, sendmp);
+ if_input(ifp, sendmp);
EM_RX_LOCK(rxr);
i = rxr->next_to_check;
}
@@ -4558,7 +4906,7 @@ next_desc:
static __inline void
em_rx_discard(struct rx_ring *rxr, int i)
{
- struct em_buffer *rbuf;
+ struct em_rxbuffer *rbuf;
rbuf = &rxr->rx_buffers[i];
bus_dmamap_unload(rxr->rxtag, rbuf->map);
@@ -4630,6 +4978,14 @@ em_fixup_rx(struct rx_ring *rxr)
}
#endif
+static void
+em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
+{
+ rxd->read.buffer_addr = htole64(rxbuf->paddr);
+ /* DD bits must be cleared */
+ rxd->wb.upper.status_error= 0;
+}
+
/*********************************************************************
*
* Verify that the hardware indicated that the checksum is valid.
@@ -4638,23 +4994,27 @@ em_fixup_rx(struct rx_ring *rxr)
*
*********************************************************************/
static void
-em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
+em_receive_checksum(uint32_t status, struct mbuf *mp)
{
mp->m_pkthdr.csum_flags = 0;
/* Ignore Checksum bit is set */
- if (rx_desc->status & E1000_RXD_STAT_IXSM)
+ if (status & E1000_RXD_STAT_IXSM)
return;
- if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
- return;
-
- /* IP Checksum Good? */
- if (rx_desc->status & E1000_RXD_STAT_IPCS)
+ /* If the IP checksum exists and there is no IP Checksum error */
+ if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
+ E1000_RXD_STAT_IPCS) {
mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
+ }
/* TCP or UDP checksum */
- if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
+ if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
+ E1000_RXD_STAT_TCPCS) {
+ mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ mp->m_pkthdr.csum_data = htons(0xffff);
+ }
+ if (status & E1000_RXD_STAT_UDPCS) {
mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
mp->m_pkthdr.csum_data = htons(0xffff);
}
@@ -4665,12 +5025,12 @@ em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
* config EVENT
*/
static void
-em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
+em_register_vlan(void *arg, if_t ifp, u16 vtag)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u32 index, bit;
- if (ifp->if_softc != arg) /* Not our event */
+ if ((void*)adapter != arg) /* Not our event */
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
@@ -4682,7 +5042,7 @@ em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
adapter->shadow_vfta[index] |= (1 << bit);
++adapter->num_vlans;
/* Re-init to load the changes */
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
@@ -4692,12 +5052,12 @@ em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
* unconfig EVENT
*/
static void
-em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
+em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u32 index, bit;
- if (ifp->if_softc != arg)
+ if (adapter != arg)
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
@@ -4709,7 +5069,7 @@ em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
adapter->shadow_vfta[index] &= ~(1 << bit);
--adapter->num_vlans;
/* Re-init to load the changes */
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
@@ -4756,8 +5116,8 @@ em_enable_intr(struct adapter *adapter)
u32 ims_mask = IMS_ENABLE_MASK;
if (hw->mac.type == e1000_82574) {
- E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
- ims_mask |= EM_MSIX_MASK;
+ E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
+ ims_mask |= adapter->ims;
}
E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
}
@@ -4963,7 +5323,7 @@ static void
em_enable_wakeup(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 pmc, ctrl, ctrl_ext, rctl;
u16 status;
@@ -4994,10 +5354,10 @@ em_enable_wakeup(device_t dev)
** Determine type of Wakeup: note that wol
** is set with all bits on by default.
*/
- if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
+ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
adapter->wol &= ~E1000_WUFC_MAG;
- if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
+ if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
adapter->wol &= ~E1000_WUFC_MC;
else {
rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
@@ -5020,7 +5380,7 @@ em_enable_wakeup(device_t dev)
/* Request PME */
status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
- if (ifp->if_capenable & IFCAP_WOL)
+ if (if_getcapenable(ifp) & IFCAP_WOL)
status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
@@ -5156,7 +5516,6 @@ em_disable_aspm(struct adapter *adapter)
static void
em_update_stats_counters(struct adapter *adapter)
{
- struct ifnet *ifp;
if(adapter->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
@@ -5175,12 +5534,7 @@ em_update_stats_counters(struct adapter *adapter)
adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
- /*
- ** For watchdog management we need to know if we have been
- ** paused during the last interval, so capture that here.
- */
- adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
- adapter->stats.xoffrxc += adapter->pause_frames;
+ adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
@@ -5248,19 +5602,29 @@ em_update_stats_counters(struct adapter *adapter)
adapter->stats.tsctfc +=
E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
}
- ifp = adapter->ifp;
-
- ifp->if_collisions = adapter->stats.colc;
+}
- /* Rx Errors */
- ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
- adapter->stats.crcerrs + adapter->stats.algnerrc +
- adapter->stats.ruc + adapter->stats.roc +
- adapter->stats.mpc + adapter->stats.cexterr;
+static uint64_t
+em_get_counter(if_t ifp, ift_counter cnt)
+{
+ struct adapter *adapter;
- /* Tx Errors */
- ifp->if_oerrors = adapter->stats.ecol +
- adapter->stats.latecol + adapter->watchdog_events;
+ adapter = if_getsoftc(ifp);
+
+ switch (cnt) {
+ case IFCOUNTER_COLLISIONS:
+ return (adapter->stats.colc);
+ case IFCOUNTER_IERRORS:
+ return (adapter->dropped_pkts + adapter->stats.rxerrc +
+ adapter->stats.crcerrs + adapter->stats.algnerrc +
+ adapter->stats.ruc + adapter->stats.roc +
+ adapter->stats.mpc + adapter->stats.cexterr);
+ case IFCOUNTER_OERRORS:
+ return (adapter->stats.ecol + adapter->stats.latecol +
+ adapter->watchdog_events);
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
}
/* Export a single 32-bit register via a read-only sysctl. */
@@ -5298,18 +5662,15 @@ em_add_hw_stats(struct adapter *adapter)
char namebuf[QUEUE_NAME_LEN];
/* Driver Statistics */
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
- CTLFLAG_RD, &adapter->link_irq,
- "Link MSIX IRQ Handled");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
- CTLFLAG_RD, &adapter->mbuf_alloc_failed,
- "Std mbuf failed");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
- CTLFLAG_RD, &adapter->mbuf_cluster_failed,
- "Std mbuf cluster failed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
CTLFLAG_RD, &adapter->dropped_pkts,
"Driver dropped packets");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
+ CTLFLAG_RD, &adapter->link_irq,
+ "Link MSIX IRQ Handled");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
+ CTLFLAG_RD, &adapter->mbuf_defrag_failed,
+ "Defragmenting mbuf chain failed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
CTLFLAG_RD, &adapter->no_tx_dma_setup,
"Driver tx dma failure in xmit");
@@ -5335,10 +5696,10 @@ em_add_hw_stats(struct adapter *adapter)
CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
"Flow Control Low Watermark");
- for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
- snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
- CTLFLAG_RD, NULL, "Queue Name");
+ CTLFLAG_RD, NULL, "TX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
@@ -5357,7 +5718,12 @@ em_add_hw_stats(struct adapter *adapter)
SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
CTLFLAG_RD, &txr->no_desc_avail,
"Queue No Descriptor Available");
-
+
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
+ queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "RX Queue Name");
+ queue_list = SYSCTL_CHILDREN(queue_node);
+
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
CTLTYPE_UINT | CTLFLAG_RD, adapter,
E1000_RDH(rxr->me),
@@ -5781,29 +6147,98 @@ em_print_debug_info(struct adapter *adapter)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
- if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
printf("Interface is RUNNING ");
else
printf("Interface is NOT RUNNING\n");
- if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
+ if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
printf("and INACTIVE\n");
else
printf("and ACTIVE\n");
- device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
- device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
- device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
- device_printf(dev, "TX descriptors avail = %d\n",
- txr->tx_avail);
- device_printf(dev, "Tx Descriptors avail failure = %ld\n",
- txr->no_desc_avail);
- device_printf(dev, "RX discarded packets = %ld\n",
- rxr->rx_discarded);
- device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
- device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ device_printf(dev, "TX Queue %d ------\n", i);
+ device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
+ device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
+ device_printf(dev, "TX descriptors avail = %d\n",
+ txr->tx_avail);
+ device_printf(dev, "Tx Descriptors avail failure = %ld\n",
+ txr->no_desc_avail);
+ device_printf(dev, "RX Queue %d ------\n", i);
+ device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
+ device_printf(dev, "RX discarded packets = %ld\n",
+ rxr->rx_discarded);
+ device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
+ device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ }
+}
+
+#ifdef EM_MULTIQUEUE
+/*
+ * 82574 only:
+ * Write a new value to the EEPROM increasing the number of MSIX
+ * vectors from 3 to 5, for proper multiqueue support.
+ */
+static void
+em_enable_vectors_82574(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ device_t dev = adapter->dev;
+ u16 edata;
+
+ e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ printf("Current cap: %#06x\n", edata);
+ if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
+ device_printf(dev, "Writing to eeprom: increasing "
+ "reported MSIX vectors from 3 to 5...\n");
+ edata &= ~(EM_NVM_MSIX_N_MASK);
+ edata |= 4 << EM_NVM_MSIX_N_SHIFT;
+ e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ e1000_update_nvm_checksum(hw);
+ device_printf(dev, "Writing to eeprom: done\n");
+ }
+}
+#endif
+
+#ifdef DDB
+DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver) {
+ struct adapter *adapter = device_get_softc(dev);
+ EM_CORE_LOCK(adapter);
+ em_init_locked(adapter);
+ EM_CORE_UNLOCK(adapter);
+ }
+ }
}
+DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver)
+ em_print_debug_info(device_get_softc(dev));
+ }
+
+}
+#endif
diff --git a/freebsd/sys/dev/e1000/if_em.h b/freebsd/sys/dev/e1000/if_em.h
index 09d81d25..2a2bf2cc 100644
--- a/freebsd/sys/dev/e1000/if_em.h
+++ b/freebsd/sys/dev/e1000/if_em.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2011, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -53,7 +53,11 @@
*/
#define EM_MIN_TXD 80
#define EM_MAX_TXD 4096
+#ifdef EM_MULTIQUEUE
+#define EM_DEFAULT_TXD 4096
+#else
#define EM_DEFAULT_TXD 1024
+#endif
/*
* EM_RXD - Maximum number of receive Descriptors
@@ -70,7 +74,11 @@
*/
#define EM_MIN_RXD 80
#define EM_MAX_RXD 4096
+#ifdef EM_MULTIQUEUE
+#define EM_DEFAULT_RXD 4096
+#else
#define EM_DEFAULT_RXD 1024
+#endif
/*
* EM_TIDV - Transmit Interrupt Delay Value
@@ -117,7 +125,11 @@
* restoring the network connection. To eliminate the potential
* for the hang ensure that EM_RDTR is set to 0.
*/
+#ifdef EM_MULTIQUEUE
+#define EM_RDTR 64
+#else
#define EM_RDTR 0
+#endif
/*
* Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544)
@@ -130,7 +142,11 @@
* along with EM_RDTR, may improve traffic throughput in specific network
* conditions.
*/
+#ifdef EM_MULTIQUEUE
+#define EM_RADV 128
+#else
#define EM_RADV 64
+#endif
/*
* This parameter controls the max duration of transmit watchdog.
@@ -188,9 +204,22 @@
#define EM_EEPROM_APME 0x400;
#define EM_82544_APME 0x0004;
-#define EM_QUEUE_IDLE 0
-#define EM_QUEUE_WORKING 1
-#define EM_QUEUE_HUNG 2
+/*
+ * Driver state logic for the detection of a hung state
+ * in hardware. Set TX_HUNG whenever a TX packet is used
+ * (data is sent) and clear it when txeof() is invoked if
+ * any descriptors from the ring are cleaned/reclaimed.
+ * Increment internal counter if no descriptors are cleaned
+ * and compare to TX_MAXTRIES. When counter > TX_MAXTRIES,
+ * reset adapter.
+ */
+#define EM_TX_IDLE 0x00000000
+#define EM_TX_BUSY 0x00000001
+#define EM_TX_HUNG 0x80000000
+#define EM_TX_MAXTRIES 10
+
+#define PCICFG_DESC_RING_STATUS 0xe4
+#define FLUSH_DESC_REQUIRED 0x100
/*
* TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
@@ -199,7 +228,15 @@
*/
#define EM_DBA_ALIGN 128
-#define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */
+/*
+ * See Intel 82574 Driver Programming Interface Manual, Section 10.2.6.9
+ */
+#define TARC_COMPENSATION_MODE (1 << 7) /* Compensation Mode */
+#define TARC_SPEED_MODE_BIT (1 << 21) /* On PCI-E MACs only */
+#define TARC_MQ_FIX (1 << 23) | \
+ (1 << 24) | \
+ (1 << 25) /* Handle errata in MQ mode */
+#define TARC_ERRATA_BIT (1 << 26) /* Note from errata on 82574 */
/* PCI Config defines */
#define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK)
@@ -232,7 +269,7 @@
#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A)
#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B)
-#define EM_MAX_SCATTER 32
+#define EM_MAX_SCATTER 40
#define EM_VFTA_SIZE 128
#define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header))
#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */
@@ -249,6 +286,14 @@
* solve it just using this define.
*/
#define EM_EIAC 0x000DC
+/*
+ * 82574 only reports 3 MSI-X vectors by default;
+ * defines assisting with making it report 5 are
+ * located here.
+ */
+#define EM_NVM_PCIE_CTRL 0x1B
+#define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT)
+#define EM_NVM_MSIX_N_SHIFT 7
/*
* Bus dma allocation structure used by
@@ -281,15 +326,14 @@ struct tx_ring {
u32 me;
u32 msix;
u32 ims;
- int queue_status;
- int watchdog_time;
+ int busy;
struct em_dma_alloc txdma;
struct e1000_tx_desc *tx_base;
struct task tx_task;
struct taskqueue *tq;
u32 next_avail_desc;
u32 next_to_clean;
- struct em_buffer *tx_buffers;
+ struct em_txbuffer *tx_buffers;
volatile u16 tx_avail;
u32 tx_tso; /* last tx was tso */
u16 last_hw_offload;
@@ -321,11 +365,11 @@ struct rx_ring {
u32 payload;
struct task rx_task;
struct taskqueue *tq;
- struct e1000_rx_desc *rx_base;
+ union e1000_rx_desc_extended *rx_base;
struct em_dma_alloc rxdma;
u32 next_to_refresh;
u32 next_to_check;
- struct em_buffer *rx_buffers;
+ struct em_rxbuffer *rx_buffers;
struct mbuf *fmp;
struct mbuf *lmp;
@@ -345,12 +389,12 @@ struct rx_ring {
/* Our adapter structure */
struct adapter {
- struct ifnet *ifp;
+ if_t ifp;
struct e1000_hw hw;
/* FreeBSD operating-system-specific structures. */
struct e1000_osdep osdep;
- struct device *dev;
+ device_t dev;
struct cdev *led_dev;
struct resource *memory;
@@ -368,7 +412,6 @@ struct adapter {
int if_flags;
int max_frame_size;
int min_frame_size;
- int pause_frames;
struct mtx core_mtx;
int em_insert_vlan_header;
u32 ims;
@@ -383,7 +426,7 @@ struct adapter {
eventhandler_tag vlan_detach;
u16 num_vlans;
- u16 num_queues;
+ u8 num_queues;
/*
* Transmit rings:
@@ -433,13 +476,12 @@ struct adapter {
/* Misc stats maintained by the driver */
unsigned long dropped_pkts;
- unsigned long mbuf_alloc_failed;
- unsigned long mbuf_cluster_failed;
+ unsigned long link_irq;
+ unsigned long mbuf_defrag_failed;
+ unsigned long no_tx_dma_setup;
unsigned long no_tx_map_avail;
- unsigned long no_tx_dma_setup;
unsigned long rx_overruns;
unsigned long watchdog_events;
- unsigned long link_irq;
struct e1000_hw_stats stats;
};
@@ -459,10 +501,17 @@ typedef struct _em_vendor_info_t {
unsigned int index;
} em_vendor_info_t;
-struct em_buffer {
+struct em_txbuffer {
+ int next_eop; /* Index of the desc to watch */
+ struct mbuf *m_head;
+ bus_dmamap_t map; /* bus_dma map for packet */
+};
+
+struct em_rxbuffer {
int next_eop; /* Index of the desc to watch */
struct mbuf *m_head;
bus_dmamap_t map; /* bus_dma map for packet */
+ bus_addr_t paddr;
};
@@ -501,4 +550,9 @@ e1000_rx_unrefreshed(struct rx_ring *rxr)
#define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED)
#define EM_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED)
+#define EM_RSSRK_SIZE 4
+#define EM_RSSRK_VAL(key, i) (key[(i) * EM_RSSRK_SIZE] | \
+ key[(i) * EM_RSSRK_SIZE + 1] << 8 | \
+ key[(i) * EM_RSSRK_SIZE + 2] << 16 | \
+ key[(i) * EM_RSSRK_SIZE + 3] << 24)
#endif /* _EM_H_DEFINED_ */
diff --git a/freebsd/sys/dev/e1000/if_igb.c b/freebsd/sys/dev/e1000/if_igb.c
index 27ae386c..d683b85f 100644
--- a/freebsd/sys/dev/e1000/if_igb.c
+++ b/freebsd/sys/dev/e1000/if_igb.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2013, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -37,73 +37,19 @@
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_rss.h>
#ifdef HAVE_KERNEL_OPTION_HEADERS
#include <rtems/bsd/local/opt_device_polling.h>
#include <rtems/bsd/local/opt_altq.h>
#endif
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#ifndef IGB_LEGACY_TX
-#include <sys/buf_ring.h>
-#endif
-#include <sys/bus.h>
-#include <sys/endian.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/rman.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/sysctl.h>
-#include <sys/taskqueue.h>
-#include <sys/eventhandler.h>
-#include <sys/pcpu.h>
-#include <sys/smp.h>
-#include <machine/smp.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-
-#include <net/bpf.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/if_dl.h>
-#include <net/if_media.h>
-
-#include <net/if_types.h>
-#include <net/if_vlan_var.h>
-
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/if_ether.h>
-#include <netinet/ip.h>
-#include <netinet/ip6.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_lro.h>
-#include <netinet/udp.h>
-
-#include <machine/in_cksum.h>
-#include <dev/led/led.h>
-#include <dev/pci/pcivar.h>
-#include <dev/pci/pcireg.h>
-
-#include "e1000_api.h"
-#include "e1000_82575.h"
#include "if_igb.h"
/*********************************************************************
- * Set this to one to display debug statistics
- *********************************************************************/
-int igb_display_debug_stats = 0;
-
-/*********************************************************************
* Driver version:
*********************************************************************/
-char igb_driver_version[] = "version - 2.4.0";
+char igb_driver_version[] = "2.5.3-k";
/*********************************************************************
@@ -118,60 +64,47 @@ char igb_driver_version[] = "version - 2.4.0";
static igb_vendor_info_t igb_vendor_info_array[] =
{
- { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82576_VF, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_DH89XXCC_SFP, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I350_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I350_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I350_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I350_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I350_VF, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_COPPER_IT, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I210_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I211_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
- PCI_ANY_ID, PCI_ANY_ID, 0},
- { 0x8086, E1000_DEV_ID_I354_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
+ {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0},
/* required last entry */
- { 0, 0, 0, 0, 0}
+ {0, 0, 0, 0, 0}
};
/*********************************************************************
@@ -201,6 +134,7 @@ static void igb_start(struct ifnet *);
static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
#endif
static int igb_ioctl(struct ifnet *, u_long, caddr_t);
+static uint64_t igb_get_counter(if_t, ift_counter);
static void igb_init(void *);
static void igb_init_locked(struct adapter *);
static void igb_stop(void *);
@@ -318,6 +252,9 @@ static devclass_t igb_devclass;
DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
MODULE_DEPEND(igb, pci, 1, 1, 1);
MODULE_DEPEND(igb, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(igb, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.
@@ -328,8 +265,6 @@ static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
/* Descriptor defaults */
static int igb_rxd = IGB_DEFAULT_RXD;
static int igb_txd = IGB_DEFAULT_TXD;
-TUNABLE_INT("hw.igb.rxd", &igb_rxd);
-TUNABLE_INT("hw.igb.txd", &igb_txd);
SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
"Number of receive descriptors per queue");
SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
@@ -342,8 +277,7 @@ SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
** traffic for that interrupt vector
*/
static int igb_enable_aim = TRUE;
-TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
-SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
+SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
"Enable adaptive interrupt moderation");
/*
@@ -351,7 +285,6 @@ SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
* but this allows it to be forced off for testing.
*/
static int igb_enable_msix = 1;
-TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
"Enable MSI-X interrupts");
@@ -359,7 +292,6 @@ SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
** Tuneable Interrupt rate
*/
static int igb_max_interrupt_rate = 8000;
-TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
&igb_max_interrupt_rate, 0, "Maximum interrupts per second");
@@ -368,21 +300,19 @@ SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
** Tuneable number of buffers in the buf-ring (drbr_xxx)
*/
static int igb_buf_ring_size = IGB_BR_SIZE;
-TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
&igb_buf_ring_size, 0, "Size of the bufring");
#endif
/*
** Header split causes the packet header to
-** be dma'd to a seperate mbuf from the payload.
+** be dma'd to a separate mbuf from the payload.
** this can have memory alignment benefits. But
** another plus is that small packets often fit
** into the header and thus use no cluster. Its
** a very workload dependent type feature.
*/
static int igb_header_split = FALSE;
-TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
"Enable receive mbuf header split");
@@ -392,7 +322,6 @@ SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
** MSIX messages if left at 0.
*/
static int igb_num_queues = 0;
-TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
"Number of queues to configure, 0 indicates autoconfigure");
@@ -405,11 +334,16 @@ static int igb_last_bind_cpu = -1;
/* How many packets rxeof tries to clean at a time */
static int igb_rx_process_limit = 100;
-TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
&igb_rx_process_limit, 0,
"Maximum number of received packets to process at a time, -1 means unlimited");
+/* How many packets txeof tries to clean at a time */
+static int igb_tx_process_limit = -1;
+SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
+ &igb_tx_process_limit, 0,
+ "Maximum number of sent packets to process at a time, -1 means unlimited");
+
#ifdef DEV_NETMAP /* see ixgbe.c for details */
#include <dev/netmap/if_igb_netmap.h>
#endif /* DEV_NETMAP */
@@ -425,7 +359,7 @@ SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
static int
igb_probe(device_t dev)
{
- char adapter_name[60];
+ char adapter_name[256];
uint16_t pci_vendor_id = 0;
uint16_t pci_device_id = 0;
uint16_t pci_subvendor_id = 0;
@@ -435,7 +369,7 @@ igb_probe(device_t dev)
INIT_DEBUGOUT("igb_probe: begin");
pci_vendor_id = pci_get_vendor(dev);
- if (pci_vendor_id != IGB_VENDOR_ID)
+ if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
return (ENXIO);
pci_device_id = pci_get_device(dev);
@@ -448,11 +382,11 @@ igb_probe(device_t dev)
(pci_device_id == ent->device_id) &&
((pci_subvendor_id == ent->subvendor_id) ||
- (ent->subvendor_id == PCI_ANY_ID)) &&
+ (ent->subvendor_id == 0)) &&
((pci_subdevice_id == ent->subdevice_id) ||
- (ent->subdevice_id == PCI_ANY_ID))) {
- sprintf(adapter_name, "%s %s",
+ (ent->subdevice_id == 0))) {
+ sprintf(adapter_name, "%s, Version - %s",
igb_strings[ent->index],
igb_driver_version);
device_set_desc_copy(dev, adapter_name);
@@ -460,7 +394,6 @@ igb_probe(device_t dev)
}
ent++;
}
-
return (ENXIO);
}
@@ -492,7 +425,7 @@ igb_attach(device_t dev)
adapter->dev = adapter->osdep.dev = dev;
IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
- /* SYSCTL stuff */
+ /* SYSCTLs */
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
@@ -528,11 +461,15 @@ igb_attach(device_t dev)
e1000_get_bus_info(&adapter->hw);
- /* Sysctl for limiting the amount of work done in the taskqueue */
+ /* Sysctls for limiting the amount of work done in the taskqueues */
igb_set_sysctl_value(adapter, "rx_processing_limit",
"max number of rx packets to process",
&adapter->rx_process_limit, igb_rx_process_limit);
+ igb_set_sysctl_value(adapter, "tx_processing_limit",
+ "max number of tx packets to process",
+ &adapter->tx_process_limit, igb_tx_process_limit);
+
/*
* Validate number of transmit and receive descriptors. It
* must not exceed hardware maximum, and must be multiple
@@ -616,9 +553,9 @@ igb_attach(device_t dev)
"Disable Energy Efficient Ethernet");
if (adapter->hw.phy.media_type == e1000_media_type_copper) {
if (adapter->hw.mac.type == e1000_i354)
- e1000_set_eee_i354(&adapter->hw);
+ e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
else
- e1000_set_eee_i350(&adapter->hw);
+ e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
}
}
@@ -724,7 +661,8 @@ igb_attach(device_t dev)
return (0);
err_late:
- igb_detach(dev);
+ if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
+ return(error);
igb_free_transmit_structures(adapter);
igb_free_receive_structures(adapter);
igb_release_hw_control(adapter);
@@ -977,12 +915,33 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m)
struct igb_queue *que;
struct tx_ring *txr;
int i, err = 0;
+#ifdef RSS
+ uint32_t bucket_id;
+#endif
/* Which queue to use */
- if ((m->m_flags & M_FLOWID) != 0)
- i = m->m_pkthdr.flowid % adapter->num_queues;
- else
+ /*
+ * When doing RSS, map it to the same outbound queue
+ * as the incoming flow would be mapped to.
+ *
+ * If everything is setup correctly, it should be the
+ * same bucket that the current CPU we're on is.
+ */
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+#ifdef RSS
+ if (rss_hash2bucket(m->m_pkthdr.flowid,
+ M_HASHTYPE_GET(m), &bucket_id) == 0) {
+ /* XXX TODO: spit out something if bucket_id > num_queues? */
+ i = bucket_id % adapter->num_queues;
+ } else {
+#endif
+ i = m->m_pkthdr.flowid % adapter->num_queues;
+#ifdef RSS
+ }
+#endif
+ } else {
i = curcpu % adapter->num_queues;
+ }
txr = &adapter->tx_rings[i];
que = &adapter->queues[i];
@@ -1011,7 +970,6 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
adapter->link_active == 0)
return (ENETDOWN);
-
/* Process the queue */
while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = igb_xmit(txr, &next)) != 0) {
@@ -1030,9 +988,8 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
}
drbr_advance(ifp, txr->br);
enq++;
- ifp->if_obytes += next->m_pkthdr.len;
- if (next->m_flags & M_MCAST)
- ifp->if_omcasts++;
+ if (next->m_flags & M_MCAST && adapter->vf_ifp)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
@@ -1151,7 +1108,8 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
ifp->if_mtu = ifr->ifr_mtu;
adapter->max_frame_size =
ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
- igb_init_locked(adapter);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ igb_init_locked(adapter);
IGB_CORE_UNLOCK(adapter);
break;
}
@@ -1230,10 +1188,27 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
}
#endif
+#if __FreeBSD_version >= 1000000
+ /* HW cannot turn these on/off separately */
+ if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+ reinit = 1;
+ }
+ if (mask & IFCAP_TXCSUM) {
+ ifp->if_capenable ^= IFCAP_TXCSUM;
+ reinit = 1;
+ }
+ if (mask & IFCAP_TXCSUM_IPV6) {
+ ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+ reinit = 1;
+ }
+#else
if (mask & IFCAP_HWCSUM) {
ifp->if_capenable ^= IFCAP_HWCSUM;
reinit = 1;
}
+#endif
if (mask & IFCAP_TSO4) {
ifp->if_capenable ^= IFCAP_TSO4;
reinit = 1;
@@ -1312,16 +1287,32 @@ igb_init_locked(struct adapter *adapter)
/* Set hardware offload abilities */
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TXCSUM) {
+#if __FreeBSD_version >= 1000000
+ ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
+ if (adapter->hw.mac.type != e1000_82575)
+ ifp->if_hwassist |= CSUM_IP_SCTP;
+#else
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
#if __FreeBSD_version >= 800000
- if (adapter->hw.mac.type == e1000_82576)
+ if (adapter->hw.mac.type != e1000_82575)
ifp->if_hwassist |= CSUM_SCTP;
#endif
+#endif
}
+#if __FreeBSD_version >= 1000000
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
+ ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
+ if (adapter->hw.mac.type != e1000_82575)
+ ifp->if_hwassist |= CSUM_IP6_SCTP;
+ }
+#endif
if (ifp->if_capenable & IFCAP_TSO)
ifp->if_hwassist |= CSUM_TSO;
+ /* Clear bad data from Rx FIFOs */
+ e1000_rx_fifo_flush_82575(&adapter->hw);
+
/* Configure for OS presence */
igb_init_manageability(adapter);
@@ -1385,9 +1376,9 @@ igb_init_locked(struct adapter *adapter)
/* Set Energy Efficient Ethernet */
if (adapter->hw.phy.media_type == e1000_media_type_copper) {
if (adapter->hw.mac.type == e1000_i354)
- e1000_set_eee_i354(&adapter->hw);
+ e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
else
- e1000_set_eee_i350(&adapter->hw);
+ e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
}
}
@@ -1879,7 +1870,8 @@ retry:
/* Try it again? - one try */
if (remap == TRUE) {
remap = FALSE;
- m = m_defrag(*m_headp, M_NOWAIT);
+ m = m_collapse(*m_headp, M_NOWAIT,
+ IGB_MAX_SCATTER);
if (m == NULL) {
adapter->mbuf_defrag_failed++;
m_freem(*m_headp);
@@ -1890,9 +1882,6 @@ retry:
goto retry;
} else
return (error);
- case ENOMEM:
- txr->no_tx_dma_setup++;
- return (error);
default:
txr->no_tx_dma_setup++;
m_freem(*m_headp);
@@ -1902,7 +1891,7 @@ retry:
}
/* Make certain there are enough descriptors */
- if (nsegs > txr->tx_avail - 2) {
+ if (txr->tx_avail < (nsegs + 2)) {
txr->no_desc_avail++;
bus_dmamap_unload(txr->txtag, map);
return (ENOBUFS);
@@ -2443,11 +2432,37 @@ igb_allocate_msix(struct adapter *adapter)
device_t dev = adapter->dev;
struct igb_queue *que = adapter->queues;
int error, rid, vector = 0;
+ int cpu_id = 0;
+#ifdef RSS
+ cpuset_t cpu_mask;
+#endif
/* Be sure to start with all interrupts disabled */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
E1000_WRITE_FLUSH(&adapter->hw);
+#ifdef RSS
+ /*
+ * If we're doing RSS, the number of queues needs to
+ * match the number of RSS buckets that are configured.
+ *
+ * + If there's more queues than RSS buckets, we'll end
+ * up with queues that get no traffic.
+ *
+ * + If there's more RSS buckets than queues, we'll end
+ * up having multiple RSS buckets map to the same queue,
+ * so there'll be some contention.
+ */
+ if (adapter->num_queues != rss_getnumbuckets()) {
+ device_printf(dev,
+ "%s: number of queues (%d) != number of RSS buckets (%d)"
+ "; performance will be impacted.\n",
+ __func__,
+ adapter->num_queues,
+ rss_getnumbuckets());
+ }
+#endif
+
for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
rid = vector +1;
que->res = bus_alloc_resource_any(dev,
@@ -2474,19 +2489,42 @@ igb_allocate_msix(struct adapter *adapter)
que->eims = E1000_EICR_TX_QUEUE0 << i;
else
que->eims = 1 << vector;
+
+#ifdef RSS
/*
- ** Bind the msix vector, and thus the
- ** rings to the corresponding cpu.
- */
+ * The queue ID is used as the RSS layer bucket ID.
+ * We look up the queue ID -> RSS CPU ID and select
+ * that.
+ */
+ cpu_id = rss_getcpu(i % rss_getnumbuckets());
+#else
+ /*
+ * Bind the msix vector, and thus the
+ * rings to the corresponding cpu.
+ *
+ * This just happens to match the default RSS round-robin
+ * bucket -> queue -> CPU allocation.
+ */
if (adapter->num_queues > 1) {
if (igb_last_bind_cpu < 0)
igb_last_bind_cpu = CPU_FIRST();
- bus_bind_intr(dev, que->res, igb_last_bind_cpu);
+ cpu_id = igb_last_bind_cpu;
+ }
+#endif
+
+ if (adapter->num_queues > 1) {
+ bus_bind_intr(dev, que->res, cpu_id);
+#ifdef RSS
+ device_printf(dev,
+ "Bound queue %d to RSS bucket %d\n",
+ i, cpu_id);
+#else
device_printf(dev,
"Bound queue %d to cpu %d\n",
- i,igb_last_bind_cpu);
- igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
+ i, cpu_id);
+#endif
}
+
#ifndef IGB_LEGACY_TX
TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
que->txr);
@@ -2495,8 +2533,35 @@ igb_allocate_msix(struct adapter *adapter)
TASK_INIT(&que->que_task, 0, igb_handle_que, que);
que->tq = taskqueue_create("igb_que", M_NOWAIT,
taskqueue_thread_enqueue, &que->tq);
- taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
- device_get_nameunit(adapter->dev));
+ if (adapter->num_queues > 1) {
+ /*
+ * Only pin the taskqueue thread to a CPU if
+ * RSS is in use.
+ *
+ * This again just happens to match the default RSS
+ * round-robin bucket -> queue -> CPU allocation.
+ */
+#ifdef RSS
+ CPU_SETOF(cpu_id, &cpu_mask);
+ taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
+ &cpu_mask,
+ "%s que (bucket %d)",
+ device_get_nameunit(adapter->dev),
+ cpu_id);
+#else
+ taskqueue_start_threads(&que->tq, 1, PI_NET,
+ "%s que (qid %d)",
+ device_get_nameunit(adapter->dev),
+ cpu_id);
+#endif
+ } else {
+ taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
+ device_get_nameunit(adapter->dev));
+ }
+
+ /* Finally update the last bound CPU id */
+ if (adapter->num_queues > 1)
+ igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
}
/* And Link */
@@ -2770,13 +2835,19 @@ igb_setup_msix(struct adapter *adapter)
goto msi;
}
- /* Figure out a reasonable auto config value */
queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
- /* Manual override */
+ /* Override via tuneable */
if (igb_num_queues != 0)
queues = igb_num_queues;
+#ifdef RSS
+ /* If we're doing RSS, clamp at the number of RSS buckets */
+ if (queues > rss_getnumbuckets())
+ queues = rss_getnumbuckets();
+#endif
+
+
/* Sanity check based on HW */
switch (adapter->hw.mac.type) {
case e1000_82575:
@@ -2798,13 +2869,11 @@ igb_setup_msix(struct adapter *adapter)
maxqueues = 1;
break;
}
+
+ /* Final clamp on the actual hardware capability */
if (queues > maxqueues)
queues = maxqueues;
- /* Manual override */
- if (igb_num_queues != 0)
- queues = igb_num_queues;
-
/*
** One vector (RX/TX pair) per queue
** plus an additional for Link interrupt
@@ -2913,21 +2982,6 @@ igb_init_dmac(struct adapter *adapter, u32 pba)
E1000_WRITE_REG(hw, E1000_DMACR, reg);
-#ifdef I210_OBFF_SUPPORT
- /*
- * Set the OBFF Rx threshold to DMA Coalescing Rx
- * threshold - 2KB and enable the feature in the
- * hardware for I210.
- */
- if (hw->mac.type == e1000_i210) {
- int obff = dmac - 2;
- reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
- reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
- reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
- | E1000_DOBFFCTL_EXIT_ACT_MASK;
- E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
- }
-#endif
E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
/* Set the interval before transition */
@@ -3117,6 +3171,13 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = igb_ioctl;
+ ifp->if_get_counter = igb_get_counter;
+
+ /* TSO parameters */
+ ifp->if_hw_tsomax = IP_MAXPACKET;
+ ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
+ ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
+
#ifndef IGB_LEGACY_TX
ifp->if_transmit = igb_mq_start;
ifp->if_qflush = igb_qflush;
@@ -3132,6 +3193,9 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
ifp->if_capabilities = ifp->if_capenable = 0;
ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
+#if __FreeBSD_version >= 1000000
+ ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
+#endif
ifp->if_capabilities |= IFCAP_TSO;
ifp->if_capabilities |= IFCAP_JUMBO_MTU;
ifp->if_capenable = ifp->if_capabilities;
@@ -3147,7 +3211,7 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
* Tell the upper layer(s) we
* support full VLAN capability.
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+ ifp->if_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
| IFCAP_VLAN_HWTSO
| IFCAP_VLAN_MTU;
@@ -3260,7 +3324,6 @@ fail_2:
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
bus_dma_tag_destroy(dma->dma_tag);
fail_0:
- dma->dma_map = NULL;
dma->dma_tag = NULL;
return (error);
@@ -3271,12 +3334,15 @@ igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
{
if (dma->dma_tag == NULL)
return;
- if (dma->dma_map != NULL) {
+ if (dma->dma_paddr != 0) {
bus_dmamap_sync(dma->dma_tag, dma->dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
+ dma->dma_paddr = 0;
+ }
+ if (dma->dma_vaddr != NULL) {
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
- dma->dma_map = NULL;
+ dma->dma_vaddr = NULL;
}
bus_dma_tag_destroy(dma->dma_tag);
dma->dma_tag = NULL;
@@ -3533,7 +3599,7 @@ igb_setup_transmit_ring(struct tx_ring *txr)
if (slot) {
int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
/* no need to set the address */
- netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
+ netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
}
#endif /* DEV_NETMAP */
/* clear the watch index */
@@ -3903,17 +3969,29 @@ igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
switch (ipproto) {
case IPPROTO_TCP:
+#if __FreeBSD_version >= 1000000
+ if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
+#else
if (mp->m_pkthdr.csum_flags & CSUM_TCP)
+#endif
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
break;
case IPPROTO_UDP:
+#if __FreeBSD_version >= 1000000
+ if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
+#else
if (mp->m_pkthdr.csum_flags & CSUM_UDP)
+#endif
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
break;
#if __FreeBSD_version >= 800000
case IPPROTO_SCTP:
+#if __FreeBSD_version >= 1000000
+ if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
+#else
if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
+#endif
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
break;
#endif
@@ -3956,9 +4034,11 @@ static bool
igb_txeof(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
+#ifdef DEV_NETMAP
struct ifnet *ifp = adapter->ifp;
+#endif /* DEV_NETMAP */
u32 work, processed = 0;
- u16 limit = txr->process_limit;
+ int limit = adapter->tx_process_limit;
struct igb_tx_buf *buf;
union e1000_adv_tx_desc *txd;
@@ -4031,7 +4111,6 @@ igb_txeof(struct tx_ring *txr)
}
++txr->packets;
++processed;
- ++ifp->if_opackets;
txr->watchdog_time = ticks;
/* Try the next packet */
@@ -4332,13 +4411,13 @@ igb_setup_receive_ring(struct rx_ring *rxr)
rxbuf = &rxr->rx_buffers[j];
#ifdef DEV_NETMAP
if (slot) {
- /* slot sj is mapped to the i-th NIC-ring entry */
+ /* slot sj is mapped to the j-th NIC-ring entry */
int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
uint64_t paddr;
void *addr;
- addr = PNMB(slot + sj, &paddr);
- netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
+ addr = PNMB(na, slot + sj, &paddr);
+ netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
/* Update descriptor */
rxr->rx_base[j].read.pkt_addr = htole64(paddr);
continue;
@@ -4399,7 +4478,6 @@ skip_head:
rxr->fmp = NULL;
rxr->lmp = NULL;
- rxr->discard = FALSE;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
@@ -4408,7 +4486,7 @@ skip_head:
** Now set up the LRO interface, we
** also only do head split when LRO
** is enabled, since so often they
- ** are undesireable in similar setups.
+ ** are undesirable in similar setups.
*/
if (ifp->if_capenable & IFCAP_LRO) {
error = tcp_lro_init(lro);
@@ -4463,6 +4541,106 @@ fail:
return (ENOBUFS);
}
+/*
+ * Initialise the RSS mapping for NICs that support multiple transmit/
+ * receive rings.
+ */
+static void
+igb_initialise_rss_mapping(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+ int queue_id;
+ u32 reta;
+ u32 rss_key[10], mrqc, shift = 0;
+
+ /* XXX? */
+ if (adapter->hw.mac.type == e1000_82575)
+ shift = 6;
+
+ /*
+ * The redirection table controls which destination
+ * queue each bucket redirects traffic to.
+ * Each DWORD represents four queues, with the LSB
+ * being the first queue in the DWORD.
+ *
+ * This just allocates buckets to queues using round-robin
+ * allocation.
+ *
+ * NOTE: It Just Happens to line up with the default
+ * RSS allocation method.
+ */
+
+ /* Warning FM follows */
+ reta = 0;
+ for (i = 0; i < 128; i++) {
+#ifdef RSS
+ queue_id = rss_get_indirection_to_bucket(i);
+ /*
+ * If we have more queues than buckets, we'll
+ * end up mapping buckets to a subset of the
+ * queues.
+ *
+ * If we have more buckets than queues, we'll
+ * end up instead assigning multiple buckets
+ * to queues.
+ *
+ * Both are suboptimal, but we need to handle
+ * the case so we don't go out of bounds
+ * indexing arrays and such.
+ */
+ queue_id = queue_id % adapter->num_queues;
+#else
+ queue_id = (i % adapter->num_queues);
+#endif
+ /* Adjust if required */
+ queue_id = queue_id << shift;
+
+ /*
+ * The low 8 bits are for hash value (n+0);
+ * The next 8 bits are for hash value (n+1), etc.
+ */
+ reta = reta >> 8;
+ reta = reta | ( ((uint32_t) queue_id) << 24);
+ if ((i & 3) == 3) {
+ E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
+ reta = 0;
+ }
+ }
+
+ /* Now fill in hash table */
+
+ /*
+ * MRQC: Multiple Receive Queues Command
+ * Set queuing to RSS control, number depends on the device.
+ */
+ mrqc = E1000_MRQC_ENABLE_RSS_8Q;
+
+#ifdef RSS
+ /* XXX ew typecasting */
+ rss_getkey((uint8_t *) &rss_key);
+#else
+ arc4rand(&rss_key, sizeof(rss_key), 0);
+#endif
+ for (i = 0; i < 10; i++)
+ E1000_WRITE_REG_ARRAY(hw,
+ E1000_RSSRK(0), i, rss_key[i]);
+
+ /*
+ * Configure the RSS fields to hash upon.
+ */
+ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
+ E1000_MRQC_RSS_FIELD_IPV4_TCP);
+ mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP);
+ mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
+ E1000_MRQC_RSS_FIELD_IPV6_UDP);
+ mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
+
+ E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
+}
+
/*********************************************************************
*
* Enable receive unit.
@@ -4519,6 +4697,18 @@ igb_initialize_receive_units(struct adapter *adapter)
rctl |= E1000_RCTL_SZ_2048;
}
+ /*
+ * If TX flow control is disabled and there's >1 queue defined,
+ * enable DROP.
+ *
+ * This drops frames rather than hanging the RX MAC for all queues.
+ */
+ if ((adapter->num_queues > 1) &&
+ (adapter->fc == e1000_fc_none ||
+ adapter->fc == e1000_fc_rx_pause)) {
+ srrctl |= E1000_SRRCTL_DROP_EN;
+ }
+
/* Setup the Base and Length of the Rx Descriptor Rings */
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
u64 bus_addr = rxr->rxdma.dma_paddr;
@@ -4546,39 +4736,9 @@ igb_initialize_receive_units(struct adapter *adapter)
*/
rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
if (adapter->num_queues >1) {
- u32 random[10], mrqc, shift = 0;
- union igb_reta {
- u32 dword;
- u8 bytes[4];
- } reta;
- arc4rand(&random, sizeof(random), 0);
- if (adapter->hw.mac.type == e1000_82575)
- shift = 6;
- /* Warning FM follows */
- for (int i = 0; i < 128; i++) {
- reta.bytes[i & 3] =
- (i % adapter->num_queues) << shift;
- if ((i & 3) == 3)
- E1000_WRITE_REG(hw,
- E1000_RETA(i >> 2), reta.dword);
- }
- /* Now fill in hash table */
- mrqc = E1000_MRQC_ENABLE_RSS_4Q;
- for (int i = 0; i < 10; i++)
- E1000_WRITE_REG_ARRAY(hw,
- E1000_RSSRK(0), i, random[i]);
-
- mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
- E1000_MRQC_RSS_FIELD_IPV4_TCP);
- mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
- E1000_MRQC_RSS_FIELD_IPV6_TCP);
- mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
- E1000_MRQC_RSS_FIELD_IPV6_UDP);
- mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
- E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
-
- E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
+ /* rss setup */
+ igb_initialise_rss_mapping(adapter);
/*
** NOTE: Receive Full-Packet Checksum Offload
@@ -4589,8 +4749,8 @@ igb_initialize_receive_units(struct adapter *adapter)
rxcsum |= E1000_RXCSUM_PCSD;
#if __FreeBSD_version >= 800000
/* For SCTP Offload */
- if ((hw->mac.type == e1000_82576)
- && (ifp->if_capenable & IFCAP_RXCSUM))
+ if ((hw->mac.type != e1000_82575) &&
+ (ifp->if_capenable & IFCAP_RXCSUM))
rxcsum |= E1000_RXCSUM_CRCOFL;
#endif
} else {
@@ -4598,7 +4758,7 @@ igb_initialize_receive_units(struct adapter *adapter)
if (ifp->if_capenable & IFCAP_RXCSUM) {
rxcsum |= E1000_RXCSUM_IPPCSE;
#if __FreeBSD_version >= 800000
- if (adapter->hw.mac.type == e1000_82576)
+ if (adapter->hw.mac.type != e1000_82575)
rxcsum |= E1000_RXCSUM_CRCOFL;
#endif
} else
@@ -4818,7 +4978,6 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
struct rx_ring *rxr = que->rxr;
struct ifnet *ifp = adapter->ifp;
struct lro_ctrl *lro = &rxr->lro;
- struct lro_entry *queued;
int i, processed = 0, rxdone = 0;
u32 ptype, staterr = 0;
union e1000_adv_rx_desc *cur;
@@ -4839,7 +4998,7 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
for (i = rxr->next_to_check; count != 0;) {
struct mbuf *sendmp, *mh, *mp;
struct igb_rx_buf *rxbuf;
- u16 hlen, plen, hdr, vtag;
+ u16 hlen, plen, hdr, vtag, pkt_info;
bool eop = FALSE;
cur = &rxr->rx_base[i];
@@ -4861,17 +5020,19 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
else
vtag = le16toh(cur->wb.upper.vlan);
hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
+ pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
- /* Make sure all segments of a bad packet are discarded */
- if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
- (rxr->discard)) {
+ /*
+ * Free the frame (all segments) if we're at EOP and
+ * it's an error.
+ *
+ * The datasheet states that EOP + status is only valid for
+ * the final segment in a multi-segment frame.
+ */
+ if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
adapter->dropped_pkts++;
++rxr->rx_discarded;
- if (!eop) /* Catch subsequent segs */
- rxr->discard = TRUE;
- else
- rxr->discard = FALSE;
igb_rx_discard(rxr, i);
goto next_desc;
}
@@ -4945,7 +5106,6 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
if (eop) {
rxr->fmp->m_pkthdr.rcvif = ifp;
- ifp->if_ipackets++;
rxr->rx_packets++;
/* capture data for AIM */
rxr->packets++;
@@ -4960,10 +5120,51 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
rxr->fmp->m_pkthdr.ether_vtag = vtag;
rxr->fmp->m_flags |= M_VLANTAG;
}
+
+ /*
+ * In case of multiqueue, we have RXCSUM.PCSD bit set
+ * and never cleared. This means we have RSS hash
+ * available to be used.
+ */
+ if (adapter->num_queues > 1) {
+ rxr->fmp->m_pkthdr.flowid =
+ le32toh(cur->wb.lower.hi_dword.rss);
+ switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
+ case E1000_RXDADV_RSSTYPE_IPV4_TCP:
+ M_HASHTYPE_SET(rxr->fmp,
+ M_HASHTYPE_RSS_TCP_IPV4);
+ break;
+ case E1000_RXDADV_RSSTYPE_IPV4:
+ M_HASHTYPE_SET(rxr->fmp,
+ M_HASHTYPE_RSS_IPV4);
+ break;
+ case E1000_RXDADV_RSSTYPE_IPV6_TCP:
+ M_HASHTYPE_SET(rxr->fmp,
+ M_HASHTYPE_RSS_TCP_IPV6);
+ break;
+ case E1000_RXDADV_RSSTYPE_IPV6_EX:
+ M_HASHTYPE_SET(rxr->fmp,
+ M_HASHTYPE_RSS_IPV6_EX);
+ break;
+ case E1000_RXDADV_RSSTYPE_IPV6:
+ M_HASHTYPE_SET(rxr->fmp,
+ M_HASHTYPE_RSS_IPV6);
+ break;
+ case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
+ M_HASHTYPE_SET(rxr->fmp,
+ M_HASHTYPE_RSS_TCP_IPV6_EX);
+ break;
+ default:
+ /* XXX fallthrough */
+ M_HASHTYPE_SET(rxr->fmp,
+ M_HASHTYPE_OPAQUE_HASH);
+ }
+ } else {
#ifndef IGB_LEGACY_TX
- rxr->fmp->m_pkthdr.flowid = que->msix;
- rxr->fmp->m_flags |= M_FLOWID;
+ rxr->fmp->m_pkthdr.flowid = que->msix;
+ M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
#endif
+ }
sendmp = rxr->fmp;
/* Make sure to set M_PKTHDR. */
sendmp->m_flags |= M_PKTHDR;
@@ -5004,10 +5205,7 @@ next_desc:
/*
* Flush any outstanding LRO work
*/
- while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
- SLIST_REMOVE_HEAD(&lro->lro_active, next);
- tcp_lro_flush(lro, queued);
- }
+ tcp_lro_flush_all(lro);
if (done != NULL)
*done += rxdone;
@@ -5336,6 +5534,100 @@ igb_led_func(void *arg, int onoff)
IGB_CORE_UNLOCK(adapter);
}
+static uint64_t
+igb_get_vf_counter(if_t ifp, ift_counter cnt)
+{
+ struct adapter *adapter;
+ struct e1000_vf_stats *stats;
+#ifndef IGB_LEGACY_TX
+ struct tx_ring *txr;
+ uint64_t rv;
+#endif
+
+ adapter = if_getsoftc(ifp);
+ stats = (struct e1000_vf_stats *)adapter->stats;
+
+ switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ return (stats->gprc);
+ case IFCOUNTER_OPACKETS:
+ return (stats->gptc);
+ case IFCOUNTER_IBYTES:
+ return (stats->gorc);
+ case IFCOUNTER_OBYTES:
+ return (stats->gotc);
+ case IFCOUNTER_IMCASTS:
+ return (stats->mprc);
+ case IFCOUNTER_IERRORS:
+ return (adapter->dropped_pkts);
+ case IFCOUNTER_OERRORS:
+ return (adapter->watchdog_events);
+#ifndef IGB_LEGACY_TX
+ case IFCOUNTER_OQDROPS:
+ rv = 0;
+ txr = adapter->tx_rings;
+ for (int i = 0; i < adapter->num_queues; i++, txr++)
+ rv += txr->br->br_drops;
+ return (rv);
+#endif
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
+}
+
+static uint64_t
+igb_get_counter(if_t ifp, ift_counter cnt)
+{
+ struct adapter *adapter;
+ struct e1000_hw_stats *stats;
+#ifndef IGB_LEGACY_TX
+ struct tx_ring *txr;
+ uint64_t rv;
+#endif
+
+ adapter = if_getsoftc(ifp);
+ if (adapter->vf_ifp)
+ return (igb_get_vf_counter(ifp, cnt));
+
+ stats = (struct e1000_hw_stats *)adapter->stats;
+
+ switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ return (stats->gprc);
+ case IFCOUNTER_OPACKETS:
+ return (stats->gptc);
+ case IFCOUNTER_IBYTES:
+ return (stats->gorc);
+ case IFCOUNTER_OBYTES:
+ return (stats->gotc);
+ case IFCOUNTER_IMCASTS:
+ return (stats->mprc);
+ case IFCOUNTER_OMCASTS:
+ return (stats->mptc);
+ case IFCOUNTER_IERRORS:
+ return (adapter->dropped_pkts + stats->rxerrc +
+ stats->crcerrs + stats->algnerrc +
+ stats->ruc + stats->roc + stats->cexterr);
+ case IFCOUNTER_OERRORS:
+ return (stats->ecol + stats->latecol +
+ adapter->watchdog_events);
+ case IFCOUNTER_COLLISIONS:
+ return (stats->colc);
+ case IFCOUNTER_IQDROPS:
+ return (stats->mpc);
+#ifndef IGB_LEGACY_TX
+ case IFCOUNTER_OQDROPS:
+ rv = 0;
+ txr = adapter->tx_rings;
+ for (int i = 0; i < adapter->num_queues; i++, txr++)
+ rv += txr->br->br_drops;
+ return (rv);
+#endif
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
+}
+
/**********************************************************************
*
* Update the board statistics counters.
@@ -5344,7 +5636,6 @@ igb_led_func(void *arg, int onoff)
static void
igb_update_stats_counters(struct adapter *adapter)
{
- struct ifnet *ifp;
struct e1000_hw *hw = &adapter->hw;
struct e1000_hw_stats *stats;
@@ -5360,7 +5651,7 @@ igb_update_stats_counters(struct adapter *adapter)
stats = (struct e1000_hw_stats *)adapter->stats;
- if(adapter->hw.phy.media_type == e1000_media_type_copper ||
+ if (adapter->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
stats->symerrs +=
E1000_READ_REG(hw,E1000_SYMERRS);
@@ -5468,18 +5759,6 @@ igb_update_stats_counters(struct adapter *adapter)
stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
- ifp = adapter->ifp;
- ifp->if_collisions = stats->colc;
-
- /* Rx Errors */
- ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
- stats->crcerrs + stats->algnerrc +
- stats->ruc + stats->roc + stats->mpc + stats->cexterr;
-
- /* Tx Errors */
- ifp->if_oerrors = stats->ecol +
- stats->latecol + adapter->watchdog_events;
-
/* Driver specific counters */
adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
@@ -5598,12 +5877,15 @@ igb_add_hw_stats(struct adapter *adapter)
char namebuf[QUEUE_NAME_LEN];
/* Driver Statistics */
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
- CTLFLAG_RD, &adapter->link_irq,
- "Link MSIX IRQ Handled");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
CTLFLAG_RD, &adapter->dropped_pkts,
"Driver dropped packets");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
+ CTLFLAG_RD, &adapter->link_irq,
+ "Link MSIX IRQ Handled");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
+ CTLFLAG_RD, &adapter->mbuf_defrag_failed,
+ "Defragmenting mbuf chain failed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
CTLFLAG_RD, &adapter->no_tx_dma_setup,
"Driver tx dma failure in xmit");
@@ -5663,7 +5945,7 @@ igb_add_hw_stats(struct adapter *adapter)
"Transmit Descriptor Tail");
SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
CTLFLAG_RD, &txr->no_desc_avail,
- "Queue No Descriptor Available");
+ "Queue Descriptors Unavailable");
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
CTLFLAG_RD, &txr->total_packets,
"Queue Packets Transmitted");
@@ -5682,10 +5964,10 @@ igb_add_hw_stats(struct adapter *adapter)
SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
CTLFLAG_RD, &rxr->rx_bytes,
"Queue Bytes Received");
- SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
+ SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
CTLFLAG_RD, &lro->lro_queued, 0,
"LRO Queued");
- SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
+ SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
CTLFLAG_RD, &lro->lro_flushed, 0,
"LRO Flushed");
}
@@ -5837,18 +6119,18 @@ igb_add_hw_stats(struct adapter *adapter)
"1023-1522 byte frames received");
SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
CTLFLAG_RD, &stats->gorc,
- "Good Octets Received");
- SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
- CTLFLAG_RD, &stats->tor,
- "Total Octets Received");
+ "Good Octets Received");
+ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
+ CTLFLAG_RD, &stats->tor,
+ "Total Octets Received");
/* Packet Transmission Stats */
SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
CTLFLAG_RD, &stats->gotc,
"Good Octets Transmitted");
- SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
- CTLFLAG_RD, &stats->tot,
- "Total Octets Transmitted");
+ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
+ CTLFLAG_RD, &stats->tot,
+ "Total Octets Transmitted");
SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
CTLFLAG_RD, &stats->tpt,
"Total Packets Transmitted");
@@ -6085,6 +6367,7 @@ igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
e1000_force_mac_fc(&adapter->hw);
+ /* XXX TODO: update DROP_EN on each RX queue if appropriate */
return (error);
}
@@ -6108,7 +6391,7 @@ igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
switch (adapter->dmac) {
case 0:
- /*Disabling */
+ /* Disabling */
break;
case 1: /* Just enable and use default */
adapter->dmac = 1000;
diff --git a/freebsd/sys/dev/e1000/if_igb.h b/freebsd/sys/dev/e1000/if_igb.h
index 0c447412..ea5ba649 100644
--- a/freebsd/sys/dev/e1000/if_igb.h
+++ b/freebsd/sys/dev/e1000/if_igb.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2013, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -32,11 +32,70 @@
******************************************************************************/
/*$FreeBSD$*/
-#ifndef _IGB_H_DEFINED_
-#define _IGB_H_DEFINED_
+#ifndef _IF_IGB_H_
+#define _IF_IGB_H_
-/* Tunables */
+#ifdef ALTQ
+#define IGB_LEGACY_TX
+#endif
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#ifndef IGB_LEGACY_TX
+#include <sys/buf_ring.h>
+#endif
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/eventhandler.h>
+#include <sys/pcpu.h>
+#include <sys/smp.h>
+#include <machine/smp.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#ifdef RSS
+#include <net/rss_config.h>
+#include <netinet/in_rss.h>
+#endif
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
+#include <netinet/udp.h>
+
+#include <machine/in_cksum.h>
+#include <dev/led/led.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include "e1000_api.h"
+#include "e1000_82575.h"
+
+/* Tunables */
/*
* IGB_TXD: Maximum number of Transmit Descriptors
*
@@ -168,7 +227,7 @@
/*
* Micellaneous constants
*/
-#define IGB_VENDOR_ID 0x8086
+#define IGB_INTEL_VENDOR_ID 0x8086
#define IGB_JUMBO_PBA 0x00000028
#define IGB_DEFAULT_PBA 0x00000030
@@ -223,7 +282,7 @@
#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A)
#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B)
-#define IGB_MAX_SCATTER 64
+#define IGB_MAX_SCATTER 40
#define IGB_VFTA_SIZE 128
#define IGB_BR_SIZE 4096 /* ring buf size */
#define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header))
@@ -236,7 +295,11 @@
#define ETH_ADDR_LEN 6
/* Offload bits in mbuf flag */
-#if __FreeBSD_version >= 800000
+#if __FreeBSD_version >= 1000000
+#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP|CSUM_IP_SCTP)
+#define CSUM_OFFLOAD_IPV6 (CSUM_IP6_TCP|CSUM_IP6_UDP|CSUM_IP6_SCTP)
+#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6)
+#elif __FreeBSD_version >= 800000
#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP)
#else
#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP)
@@ -300,7 +363,6 @@ struct tx_ring {
volatile u16 tx_avail;
u16 next_avail_desc;
u16 next_to_clean;
- u16 process_limit;
u16 num_desc;
enum {
IGB_QUEUE_IDLE = 1,
@@ -336,7 +398,6 @@ struct rx_ring {
struct lro_ctrl lro;
bool lro_enabled;
bool hdr_split;
- bool discard;
struct mtx rx_mtx;
char mtx_name[16];
u32 next_to_refresh;
@@ -368,7 +429,7 @@ struct adapter {
struct e1000_hw hw;
struct e1000_osdep osdep;
- struct device *dev;
+ device_t dev;
struct cdev *led_dev;
struct resource *pci_mem;
@@ -459,20 +520,19 @@ struct adapter {
u8 *mta;
/* Misc stats maintained by the driver */
+ unsigned long device_control;
unsigned long dropped_pkts;
+ unsigned long eint_mask;
+ unsigned long int_mask;
+ unsigned long link_irq;
unsigned long mbuf_defrag_failed;
- unsigned long mbuf_header_failed;
- unsigned long mbuf_packet_failed;
unsigned long no_tx_dma_setup;
- unsigned long watchdog_events;
- unsigned long link_irq;
- unsigned long rx_overruns;
- unsigned long device_control;
- unsigned long rx_control;
- unsigned long int_mask;
- unsigned long eint_mask;
unsigned long packet_buf_alloc_rx;
unsigned long packet_buf_alloc_tx;
+ unsigned long rx_control;
+ unsigned long rx_overruns;
+ unsigned long watchdog_events;
+
/* Used in pf and vf */
void *stats;
@@ -480,6 +540,7 @@ struct adapter {
int has_manage;
int wol;
int rx_process_limit;
+ int tx_process_limit;
u16 vf_ifp; /* a VF interface */
bool in_detach; /* Used only in igb_ioctl */
@@ -568,6 +629,6 @@ drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
}
#endif
-#endif /* _IGB_H_DEFINED_ */
+#endif /* _IF_IGB_H_ */
diff --git a/freebsd/sys/dev/e1000/if_lem.c b/freebsd/sys/dev/e1000/if_lem.c
index 7c22200d..c46c3728 100644
--- a/freebsd/sys/dev/e1000/if_lem.c
+++ b/freebsd/sys/dev/e1000/if_lem.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2012, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -34,6 +34,15 @@
******************************************************************************/
/*$FreeBSD$*/
+/*
+ * Uncomment the following extensions for better performance in a VM,
+ * especially if you have support in the hypervisor.
+ * See http://info.iet.unipi.it/~luigi/netmap/
+ */
+// #define BATCH_DISPATCH
+// #define NIC_SEND_COMBINING
+// #define NIC_PARAVIRT /* enable virtio-like synchronization */
+
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -43,6 +52,7 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/buf_ring.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/kernel.h>
@@ -62,6 +72,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -88,7 +99,7 @@
/*********************************************************************
* Legacy Em Driver version:
*********************************************************************/
-char lem_driver_version[] = "1.0.6";
+char lem_driver_version[] = "1.1.0";
/*********************************************************************
* PCI Device ID Table
@@ -168,14 +179,15 @@ static int lem_detach(device_t);
static int lem_shutdown(device_t);
static int lem_suspend(device_t);
static int lem_resume(device_t);
-static void lem_start(struct ifnet *);
-static void lem_start_locked(struct ifnet *ifp);
-static int lem_ioctl(struct ifnet *, u_long, caddr_t);
+static void lem_start(if_t);
+static void lem_start_locked(if_t ifp);
+static int lem_ioctl(if_t, u_long, caddr_t);
+static uint64_t lem_get_counter(if_t, ift_counter);
static void lem_init(void *);
static void lem_init_locked(struct adapter *);
static void lem_stop(void *);
-static void lem_media_status(struct ifnet *, struct ifmediareq *);
-static int lem_media_change(struct ifnet *);
+static void lem_media_status(if_t, struct ifmediareq *);
+static int lem_media_change(if_t);
static void lem_identify_hardware(struct adapter *);
static int lem_allocate_pci_resources(struct adapter *);
static int lem_allocate_irq(struct adapter *adapter);
@@ -210,8 +222,8 @@ static void lem_disable_promisc(struct adapter *);
static void lem_set_multi(struct adapter *);
static void lem_update_link_status(struct adapter *);
static int lem_get_buf(struct adapter *, int);
-static void lem_register_vlan(void *, struct ifnet *, u16);
-static void lem_unregister_vlan(void *, struct ifnet *, u16);
+static void lem_register_vlan(void *, if_t, u16);
+static void lem_unregister_vlan(void *, if_t, u16);
static void lem_setup_vlan_hw_support(struct adapter *);
static int lem_xmit(struct adapter *, struct mbuf **);
static void lem_smartspeed(struct adapter *);
@@ -276,6 +288,9 @@ extern devclass_t em_devclass;
DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0);
MODULE_DEPEND(lem, pci, 1, 1, 1);
MODULE_DEPEND(lem, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(lem, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.
@@ -291,6 +306,10 @@ static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
+/*
+ * increase lem_rxd and lem_txd to at least 2048 in netmap mode
+ * for better performance.
+ */
static int lem_rxd = EM_DEFAULT_RXD;
static int lem_txd = EM_DEFAULT_TXD;
static int lem_smart_pwr_down = FALSE;
@@ -460,6 +479,20 @@ lem_attach(device_t dev)
"max number of rx packets to process", &adapter->rx_process_limit,
lem_rx_process_limit);
+#ifdef NIC_SEND_COMBINING
+ /* Sysctls to control mitigation */
+ lem_add_rx_process_limit(adapter, "sc_enable",
+ "driver TDT mitigation", &adapter->sc_enable, 0);
+#endif /* NIC_SEND_COMBINING */
+#ifdef BATCH_DISPATCH
+ lem_add_rx_process_limit(adapter, "batch_enable",
+ "driver rx batch", &adapter->batch_enable, 0);
+#endif /* BATCH_DISPATCH */
+#ifdef NIC_PARAVIRT
+ lem_add_rx_process_limit(adapter, "rx_retries",
+ "driver rx retries", &adapter->rx_retries, 0);
+#endif /* NIC_PARAVIRT */
+
/* Sysctl for setting the interface flow control */
lem_set_flow_cntrl(adapter, "flow_control",
"flow control setting",
@@ -517,6 +550,49 @@ lem_attach(device_t dev)
*/
adapter->hw.mac.report_tx_early = 1;
+#ifdef NIC_PARAVIRT
+ device_printf(dev, "driver supports paravirt, subdev 0x%x\n",
+ adapter->hw.subsystem_device_id);
+ if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) {
+ uint64_t bus_addr;
+
+ device_printf(dev, "paravirt support on dev %p\n", adapter);
+ tsize = 4096; // XXX one page for the csb
+ if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) {
+ device_printf(dev, "Unable to allocate csb memory\n");
+ error = ENOMEM;
+ goto err_csb;
+ }
+ /* Setup the Base of the CSB */
+ adapter->csb = (struct paravirt_csb *)adapter->csb_mem.dma_vaddr;
+ /* force the first kick */
+ adapter->csb->host_need_txkick = 1; /* txring empty */
+ adapter->csb->guest_need_rxkick = 1; /* no rx packets */
+ bus_addr = adapter->csb_mem.dma_paddr;
+ lem_add_rx_process_limit(adapter, "csb_on",
+ "enable paravirt.", &adapter->csb->guest_csb_on, 0);
+ lem_add_rx_process_limit(adapter, "txc_lim",
+ "txc_lim", &adapter->csb->host_txcycles_lim, 1);
+
+ /* some stats */
+#define PA_SC(name, var, val) \
+ lem_add_rx_process_limit(adapter, name, name, var, val)
+ PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1);
+ PA_SC("host_rxkick_at",&adapter->csb->host_rxkick_at, ~0);
+ PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0);
+ PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1);
+ PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0);
+ PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0);
+ PA_SC("tdt_int_count",&adapter->tdt_int_count, 0);
+ PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0);
+ /* tell the host where the block is */
+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAH,
+ (u32)(bus_addr >> 32));
+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAL,
+ (u32)bus_addr);
+ }
+#endif /* NIC_PARAVIRT */
+
tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
EM_DBA_ALIGN);
@@ -654,7 +730,7 @@ lem_attach(device_t dev)
lem_get_hw_control(adapter);
/* Tell the stack that the interface is not active */
- adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_OACTIVE | IFF_DRV_RUNNING);
adapter->led_dev = led_create(lem_led_func, adapter,
device_get_nameunit(dev));
@@ -675,8 +751,13 @@ err_hw_init:
err_rx_desc:
lem_dma_free(adapter, &adapter->txdma);
err_tx_desc:
+#ifdef NIC_PARAVIRT
+ lem_dma_free(adapter, &adapter->csb_mem);
+err_csb:
+#endif /* NIC_PARAVIRT */
+
err_pci:
- if (adapter->ifp != NULL)
+ if (adapter->ifp != (void *)NULL)
if_free(adapter->ifp);
lem_free_pci_resources(adapter);
free(adapter->mta, M_DEVBUF);
@@ -701,18 +782,18 @@ static int
lem_detach(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
INIT_DEBUGOUT("em_detach: begin");
/* Make sure VLANS are not using driver */
- if (adapter->ifp->if_vlantrunk != NULL) {
+ if (if_vlantrunkinuse(ifp)) {
device_printf(dev,"Vlan in use, detach first\n");
return (EBUSY);
}
#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(ifp) & IFCAP_POLLING)
ether_poll_deregister(ifp);
#endif
@@ -762,6 +843,12 @@ lem_detach(device_t dev)
adapter->rx_desc_base = NULL;
}
+#ifdef NIC_PARAVIRT
+ if (adapter->csb) {
+ lem_dma_free(adapter, &adapter->csb_mem);
+ adapter->csb = NULL;
+ }
+#endif /* NIC_PARAVIRT */
lem_release_hw_control(adapter);
free(adapter->mta, M_DEVBUF);
EM_TX_LOCK_DESTROY(adapter);
@@ -806,7 +893,7 @@ static int
lem_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_CORE_LOCK(adapter);
lem_init_locked(adapter);
@@ -819,14 +906,14 @@ lem_resume(device_t dev)
static void
-lem_start_locked(struct ifnet *ifp)
+lem_start_locked(if_t ifp)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct mbuf *m_head;
EM_TX_LOCK_ASSERT(adapter);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
+ if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return;
if (!adapter->link_active)
@@ -845,9 +932,9 @@ lem_start_locked(struct ifnet *ifp)
}
}
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+ while (!if_sendq_empty(ifp)) {
+ m_head = if_dequeue(ifp);
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
if (m_head == NULL)
break;
/*
@@ -857,31 +944,41 @@ lem_start_locked(struct ifnet *ifp)
if (lem_xmit(adapter, &m_head)) {
if (m_head == NULL)
break;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
+ if_sendq_prepend(ifp, m_head);
break;
}
/* Send a copy of the frame to the BPF listener */
- ETHER_BPF_MTAP(ifp, m_head);
+ if_etherbpfmtap(ifp, m_head);
/* Set timeout in case hardware has problems transmitting. */
adapter->watchdog_check = TRUE;
adapter->watchdog_time = ticks;
}
if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
+#ifdef NIC_PARAVIRT
+ if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE && adapter->csb &&
+ adapter->csb->guest_csb_on &&
+ !(adapter->csb->guest_need_txkick & 1)) {
+ adapter->csb->guest_need_txkick = 1;
+ adapter->guest_need_kick_count++;
+ // XXX memory barrier
+ lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE
+ }
+#endif /* NIC_PARAVIRT */
return;
}
static void
-lem_start(struct ifnet *ifp)
+lem_start(if_t ifp)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
EM_TX_LOCK(adapter);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
lem_start_locked(ifp);
EM_TX_UNLOCK(adapter);
}
@@ -896,9 +993,9 @@ lem_start(struct ifnet *ifp)
**********************************************************************/
static int
-lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+lem_ioctl(if_t ifp, u_long command, caddr_t data)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
#if defined(INET) || defined(INET6)
struct ifaddr *ifa = (struct ifaddr *)data;
@@ -924,11 +1021,11 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
** so we avoid doing it when possible.
*/
if (avoid_reset) {
- ifp->if_flags |= IFF_UP;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if_setflagbits(ifp, IFF_UP, 0);
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
lem_init(adapter);
#ifdef INET
- if (!(ifp->if_flags & IFF_NOARP))
+ if (!(if_getflags(ifp) & IFF_NOARP))
arp_ifinit(ifp, ifa);
#endif
} else
@@ -955,10 +1052,11 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
break;
}
- ifp->if_mtu = ifr->ifr_mtu;
+ if_setmtu(ifp, ifr->ifr_mtu);
adapter->max_frame_size =
- ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
- lem_init_locked(adapter);
+ if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
+ lem_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
break;
}
@@ -966,9 +1064,9 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
IOCTL_DEBUGOUT("ioctl rcv'd:\
SIOCSIFFLAGS (Set Interface Flags)");
EM_CORE_LOCK(adapter);
- if (ifp->if_flags & IFF_UP) {
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- if ((ifp->if_flags ^ adapter->if_flags) &
+ if (if_getflags(ifp) & IFF_UP) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
+ if ((if_getflags(ifp) ^ adapter->if_flags) &
(IFF_PROMISC | IFF_ALLMULTI)) {
lem_disable_promisc(adapter);
lem_set_promisc(adapter);
@@ -976,18 +1074,18 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
} else
lem_init_locked(adapter);
} else
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
EM_TX_LOCK(adapter);
lem_stop(adapter);
EM_TX_UNLOCK(adapter);
}
- adapter->if_flags = ifp->if_flags;
+ adapter->if_flags = if_getflags(ifp);
EM_CORE_UNLOCK(adapter);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
EM_CORE_LOCK(adapter);
lem_disable_intr(adapter);
lem_set_multi(adapter);
@@ -996,7 +1094,7 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
lem_initialize_receive_unit(adapter);
}
#ifdef DEVICE_POLLING
- if (!(ifp->if_capenable & IFCAP_POLLING))
+ if (!(if_getcapenable(ifp) & IFCAP_POLLING))
#endif
lem_enable_intr(adapter);
EM_CORE_UNLOCK(adapter);
@@ -1023,7 +1121,7 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
reinit = 0;
- mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
#ifdef DEVICE_POLLING
if (mask & IFCAP_POLLING) {
if (ifr->ifr_reqcap & IFCAP_POLLING) {
@@ -1032,36 +1130,36 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
return (error);
EM_CORE_LOCK(adapter);
lem_disable_intr(adapter);
- ifp->if_capenable |= IFCAP_POLLING;
+ if_setcapenablebit(ifp, IFCAP_POLLING, 0);
EM_CORE_UNLOCK(adapter);
} else {
error = ether_poll_deregister(ifp);
/* Enable interrupt even in error case */
EM_CORE_LOCK(adapter);
lem_enable_intr(adapter);
- ifp->if_capenable &= ~IFCAP_POLLING;
+ if_setcapenablebit(ifp, 0, IFCAP_POLLING);
EM_CORE_UNLOCK(adapter);
}
}
#endif
if (mask & IFCAP_HWCSUM) {
- ifp->if_capenable ^= IFCAP_HWCSUM;
+ if_togglecapenable(ifp, IFCAP_HWCSUM);
reinit = 1;
}
if (mask & IFCAP_VLAN_HWTAGGING) {
- ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+ if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
reinit = 1;
}
if ((mask & IFCAP_WOL) &&
- (ifp->if_capabilities & IFCAP_WOL) != 0) {
+ (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
if (mask & IFCAP_WOL_MCAST)
- ifp->if_capenable ^= IFCAP_WOL_MCAST;
+ if_togglecapenable(ifp, IFCAP_WOL_MCAST);
if (mask & IFCAP_WOL_MAGIC)
- ifp->if_capenable ^= IFCAP_WOL_MAGIC;
+ if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
}
- if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
lem_init(adapter);
- VLAN_CAPABILITIES(ifp);
+ if_vlancap(ifp);
break;
}
@@ -1088,7 +1186,7 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
static void
lem_init_locked(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
device_t dev = adapter->dev;
u32 pba;
@@ -1135,7 +1233,7 @@ lem_init_locked(struct adapter *adapter)
E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
/* Get the latest mac address, User can use a LAA */
- bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
+ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
ETHER_ADDR_LEN);
/* Put the address into the Receive Address Array */
@@ -1152,10 +1250,10 @@ lem_init_locked(struct adapter *adapter)
E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
/* Set hardware offload abilities */
- ifp->if_hwassist = 0;
+ if_clearhwassist(ifp);
if (adapter->hw.mac.type >= e1000_82543) {
- if (ifp->if_capenable & IFCAP_TXCSUM)
- ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
+ if (if_getcapenable(ifp) & IFCAP_TXCSUM)
+ if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
}
/* Configure for OS presence */
@@ -1179,8 +1277,8 @@ lem_init_locked(struct adapter *adapter)
lem_initialize_receive_unit(adapter);
/* Use real VLAN Filter support? */
- if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
/* Use real VLAN Filter support */
lem_setup_vlan_hw_support(adapter);
else {
@@ -1194,8 +1292,7 @@ lem_init_locked(struct adapter *adapter)
/* Don't lose promiscuous settings */
lem_set_promisc(adapter);
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
callout_reset(&adapter->timer, hz, lem_local_timer, adapter);
e1000_clear_hw_cntrs_base_generic(&adapter->hw);
@@ -1205,7 +1302,7 @@ lem_init_locked(struct adapter *adapter)
* Only enable interrupts if we are not polling, make sure
* they are off otherwise.
*/
- if (ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(ifp) & IFCAP_POLLING)
lem_disable_intr(adapter);
else
#endif /* DEVICE_POLLING */
@@ -1234,13 +1331,13 @@ lem_init(void *arg)
*
*********************************************************************/
static int
-lem_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
+lem_poll(if_t ifp, enum poll_cmd cmd, int count)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u32 reg_icr, rx_done = 0;
EM_CORE_LOCK(adapter);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
EM_CORE_UNLOCK(adapter);
return (rx_done);
}
@@ -1261,7 +1358,7 @@ lem_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
EM_TX_LOCK(adapter);
lem_txeof(adapter);
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if(!if_sendq_empty(ifp))
lem_start_locked(ifp);
EM_TX_UNLOCK(adapter);
return (rx_done);
@@ -1277,12 +1374,12 @@ static void
lem_intr(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 reg_icr;
- if ((ifp->if_capenable & IFCAP_POLLING) ||
- ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
+ if ((if_getcapenable(ifp) & IFCAP_POLLING) ||
+ ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
return;
EM_CORE_LOCK(adapter);
@@ -1312,8 +1409,8 @@ lem_intr(void *arg)
EM_TX_LOCK(adapter);
lem_txeof(adapter);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
- !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) &&
+ (!if_sendq_empty(ifp)))
lem_start_locked(ifp);
EM_TX_UNLOCK(adapter);
return;
@@ -1324,9 +1421,9 @@ static void
lem_handle_link(void *context, int pending)
{
struct adapter *adapter = context;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
return;
EM_CORE_LOCK(adapter);
@@ -1344,14 +1441,14 @@ static void
lem_handle_rxtx(void *context, int pending)
{
struct adapter *adapter = context;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL);
EM_TX_LOCK(adapter);
lem_txeof(adapter);
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if(!if_sendq_empty(ifp))
lem_start_locked(ifp);
EM_TX_UNLOCK(adapter);
if (more) {
@@ -1360,7 +1457,7 @@ lem_handle_rxtx(void *context, int pending)
}
}
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
lem_enable_intr(adapter);
}
@@ -1373,7 +1470,7 @@ static int
lem_irq_fast(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp;
+ if_t ifp;
u32 reg_icr;
ifp = adapter->ifp;
@@ -1417,9 +1514,9 @@ lem_irq_fast(void *arg)
*
**********************************************************************/
static void
-lem_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+lem_media_status(if_t ifp, struct ifmediareq *ifmr)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u_char fiber_type = IFM_1000_SX;
INIT_DEBUGOUT("lem_media_status: begin");
@@ -1471,9 +1568,9 @@ lem_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
*
**********************************************************************/
static int
-lem_media_change(struct ifnet *ifp)
+lem_media_change(if_t ifp)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct ifmedia *ifm = &adapter->media;
INIT_DEBUGOUT("lem_media_change: begin");
@@ -1581,9 +1678,9 @@ lem_xmit(struct adapter *adapter, struct mbuf **m_headp)
if (error == EFBIG) {
struct mbuf *m;
- m = m_defrag(*m_headp, M_NOWAIT);
+ m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
if (m == NULL) {
- adapter->mbuf_alloc_failed++;
+ adapter->mbuf_defrag_failed++;
m_freem(*m_headp);
*m_headp = NULL;
return (ENOBUFS);
@@ -1605,7 +1702,7 @@ lem_xmit(struct adapter *adapter, struct mbuf **m_headp)
return (error);
}
- if (nsegs > (adapter->num_tx_desc_avail - 2)) {
+ if (adapter->num_tx_desc_avail < (nsegs + 2)) {
adapter->no_tx_desc_avail2++;
bus_dmamap_unload(adapter->txtag, map);
return (ENOBUFS);
@@ -1717,6 +1814,37 @@ lem_xmit(struct adapter *adapter, struct mbuf **m_headp)
*/
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+#ifdef NIC_PARAVIRT
+ if (adapter->csb) {
+ adapter->csb->guest_tdt = i;
+ /* XXX memory barrier ? */
+ if (adapter->csb->guest_csb_on &&
+ !(adapter->csb->host_need_txkick & 1)) {
+ /* XXX maybe useless
+ * clean the ring. maybe do it before ?
+ * maybe a little bit of histeresys ?
+ */
+ if (adapter->num_tx_desc_avail <= 64) {// XXX
+ lem_txeof(adapter);
+ }
+ return (0);
+ }
+ }
+#endif /* NIC_PARAVIRT */
+
+#ifdef NIC_SEND_COMBINING
+ if (adapter->sc_enable) {
+ if (adapter->shadow_tdt & MIT_PENDING_INT) {
+ /* signal intr and data pending */
+ adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff);
+ return (0);
+ } else {
+ adapter->shadow_tdt = MIT_PENDING_INT;
+ }
+ }
+#endif /* NIC_SEND_COMBINING */
+
if (adapter->hw.mac.type == e1000_82547 &&
adapter->link_duplex == HALF_DUPLEX)
lem_82547_move_tail(adapter);
@@ -1850,18 +1978,18 @@ lem_82547_tx_fifo_reset(struct adapter *adapter)
static void
lem_set_promisc(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 reg_rctl;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
- if (ifp->if_flags & IFF_PROMISC) {
+ if (if_getflags(ifp) & IFF_PROMISC) {
reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
/* Turn this on if you want to see bad packets */
if (lem_debug_sbp)
reg_rctl |= E1000_RCTL_SBP;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
- } else if (ifp->if_flags & IFF_ALLMULTI) {
+ } else if (if_getflags(ifp) & IFF_ALLMULTI) {
reg_rctl |= E1000_RCTL_MPE;
reg_rctl &= ~E1000_RCTL_UPE;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
@@ -1871,34 +1999,17 @@ lem_set_promisc(struct adapter *adapter)
static void
lem_disable_promisc(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 reg_rctl;
int mcnt = 0;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl &= (~E1000_RCTL_UPE);
- if (ifp->if_flags & IFF_ALLMULTI)
+ if (if_getflags(ifp) & IFF_ALLMULTI)
mcnt = MAX_NUM_MULTICAST_ADDRESSES;
- else {
- struct ifmultiaddr *ifma;
-#if __FreeBSD_version < 800000
- IF_ADDR_LOCK(ifp);
-#else
- if_maddr_rlock(ifp);
-#endif
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
- if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
- break;
- mcnt++;
- }
-#if __FreeBSD_version < 800000
- IF_ADDR_UNLOCK(ifp);
-#else
- if_maddr_runlock(ifp);
-#endif
- }
+ else
+ mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
+
/* Don't disable if in MAX groups */
if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
reg_rctl &= (~E1000_RCTL_MPE);
@@ -1917,8 +2028,7 @@ lem_disable_promisc(struct adapter *adapter)
static void
lem_set_multi(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
- struct ifmultiaddr *ifma;
+ if_t ifp = adapter->ifp;
u32 reg_rctl = 0;
u8 *mta; /* Multicast array memory */
int mcnt = 0;
@@ -1938,27 +2048,8 @@ lem_set_multi(struct adapter *adapter)
msec_delay(5);
}
-#if __FreeBSD_version < 800000
- IF_ADDR_LOCK(ifp);
-#else
- if_maddr_rlock(ifp);
-#endif
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
-
- if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
- break;
+ if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
- mcnt++;
- }
-#if __FreeBSD_version < 800000
- IF_ADDR_UNLOCK(ifp);
-#else
- if_maddr_runlock(ifp);
-#endif
if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl |= E1000_RCTL_MPE;
@@ -1997,6 +2088,20 @@ lem_local_timer(void *arg)
lem_smartspeed(adapter);
+#ifdef NIC_PARAVIRT
+ /* recover space if needed */
+ if (adapter->csb && adapter->csb->guest_csb_on &&
+ (adapter->watchdog_check == TRUE) &&
+ (ticks - adapter->watchdog_time > EM_WATCHDOG) &&
+ (adapter->num_tx_desc_avail != adapter->num_tx_desc) ) {
+ lem_txeof(adapter);
+ /*
+ * lem_txeof() normally (except when space in the queue
+ * runs low XXX) cleans watchdog_check so that
+ * we do not hung.
+ */
+ }
+#endif /* NIC_PARAVIRT */
/*
* We check the watchdog: the time since
* the last TX descriptor was cleaned.
@@ -2010,7 +2115,7 @@ lem_local_timer(void *arg)
return;
hung:
device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
- adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_RUNNING);
adapter->watchdog_events++;
lem_init_locked(adapter);
}
@@ -2019,7 +2124,7 @@ static void
lem_update_link_status(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
device_t dev = adapter->dev;
u32 link_check = 0;
@@ -2060,10 +2165,11 @@ lem_update_link_status(struct adapter *adapter)
"Full Duplex" : "Half Duplex"));
adapter->link_active = 1;
adapter->smartspeed = 0;
- ifp->if_baudrate = adapter->link_speed * 1000000;
+ if_setbaudrate(ifp, adapter->link_speed * 1000000);
if_link_state_change(ifp, LINK_STATE_UP);
} else if (!link_check && (adapter->link_active == 1)) {
- ifp->if_baudrate = adapter->link_speed = 0;
+ if_setbaudrate(ifp, 0);
+ adapter->link_speed = 0;
adapter->link_duplex = 0;
if (bootverbose)
device_printf(dev, "Link is Down\n");
@@ -2087,7 +2193,7 @@ static void
lem_stop(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_CORE_LOCK_ASSERT(adapter);
EM_TX_LOCK_ASSERT(adapter);
@@ -2099,7 +2205,7 @@ lem_stop(void *arg)
callout_stop(&adapter->tx_fifo_timer);
/* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
e1000_reset_hw(&adapter->hw);
if (adapter->hw.mac.type >= e1000_82544)
@@ -2309,7 +2415,7 @@ lem_hardware_init(struct adapter *adapter)
* received after sending an XOFF.
* - Low water mark works best when it is very near the high water mark.
* This allows the receiver to restart by sending XON when it has
- * drained a bit. Here we use an arbitary value of 1500 which will
+ * drained a bit. Here we use an arbitrary value of 1500 which will
* restart after one full frame is pulled from the buffer. There
* could be several smaller frames in the buffer and if so they will
* not trigger the XON until their total number reduces the buffer
@@ -2350,40 +2456,40 @@ lem_hardware_init(struct adapter *adapter)
static int
lem_setup_interface(device_t dev, struct adapter *adapter)
{
- struct ifnet *ifp;
+ if_t ifp;
INIT_DEBUGOUT("lem_setup_interface: begin");
- ifp = adapter->ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
+ ifp = adapter->ifp = if_gethandle(IFT_ETHER);
+ if (ifp == (void *)NULL) {
device_printf(dev, "can not allocate ifnet structure\n");
return (-1);
}
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_init = lem_init;
- ifp->if_softc = adapter;
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_ioctl = lem_ioctl;
- ifp->if_start = lem_start;
- IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
- ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
- IFQ_SET_READY(&ifp->if_snd);
+ if_setinitfn(ifp, lem_init);
+ if_setsoftc(ifp, adapter);
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setioctlfn(ifp, lem_ioctl);
+ if_setstartfn(ifp, lem_start);
+ if_setgetcounterfn(ifp, lem_get_counter);
+ if_setsendqlen(ifp, adapter->num_tx_desc - 1);
+ if_setsendqready(ifp);
ether_ifattach(ifp, adapter->hw.mac.addr);
- ifp->if_capabilities = ifp->if_capenable = 0;
+ if_setcapabilities(ifp, 0);
if (adapter->hw.mac.type >= e1000_82543) {
- ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
- ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
+ if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0);
+ if_setcapenablebit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0);
}
/*
* Tell the upper layer(s) we support long frames.
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
- ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
- ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
+ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
+ if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
/*
** Dont turn this on by default, if vlans are
@@ -2393,16 +2499,16 @@ lem_setup_interface(device_t dev, struct adapter *adapter)
** using vlans directly on the em driver you can
** enable this and get full hardware tag filtering.
*/
- ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
#ifdef DEVICE_POLLING
- ifp->if_capabilities |= IFCAP_POLLING;
+ if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0);
#endif
/* Enable only WOL MAGIC by default */
if (adapter->wol) {
- ifp->if_capabilities |= IFCAP_WOL;
- ifp->if_capenable |= IFCAP_WOL_MAGIC;
+ if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
+ if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
}
/*
@@ -2565,7 +2671,6 @@ fail_2:
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
bus_dma_tag_destroy(dma->dma_tag);
fail_0:
- dma->dma_map = NULL;
dma->dma_tag = NULL;
return (error);
@@ -2576,12 +2681,15 @@ lem_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
{
if (dma->dma_tag == NULL)
return;
- if (dma->dma_map != NULL) {
+ if (dma->dma_paddr != 0) {
bus_dmamap_sync(dma->dma_tag, dma->dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
+ dma->dma_paddr = 0;
+ }
+ if (dma->dma_vaddr != NULL) {
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
- dma->dma_map = NULL;
+ dma->dma_vaddr = NULL;
}
bus_dma_tag_destroy(dma->dma_tag);
dma->dma_tag = NULL;
@@ -2656,7 +2764,7 @@ lem_setup_transmit_structures(struct adapter *adapter)
struct em_buffer *tx_buffer;
#ifdef DEV_NETMAP
/* we are already locked */
- struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
#endif /* DEV_NETMAP */
@@ -2679,10 +2787,10 @@ lem_setup_transmit_structures(struct adapter *adapter)
uint64_t paddr;
void *addr;
- addr = PNMB(slot + si, &paddr);
+ addr = PNMB(na, slot + si, &paddr);
adapter->tx_desc_base[i].buffer_addr = htole64(paddr);
/* reload the map for netmap mode */
- netmap_load_map(adapter->txtag, tx_buffer->map, addr);
+ netmap_load_map(na, adapter->txtag, tx_buffer->map, addr);
}
#endif /* DEV_NETMAP */
tx_buffer->next_eop = -1;
@@ -2808,10 +2916,6 @@ lem_free_transmit_structures(struct adapter *adapter)
bus_dma_tag_destroy(adapter->txtag);
adapter->txtag = NULL;
}
-#if __FreeBSD_version >= 800000
- if (adapter->br != NULL)
- buf_ring_free(adapter->br, M_DEVBUF);
-#endif
}
/*********************************************************************
@@ -2982,7 +3086,7 @@ lem_txeof(struct adapter *adapter)
int first, last, done, num_avail;
struct em_buffer *tx_buffer;
struct e1000_tx_desc *tx_desc, *eop_desc;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_TX_LOCK_ASSERT(adapter);
@@ -3022,7 +3126,7 @@ lem_txeof(struct adapter *adapter)
++num_avail;
if (tx_buffer->m_head) {
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
bus_dmamap_sync(adapter->txtag,
tx_buffer->map,
BUS_DMASYNC_POSTWRITE);
@@ -3057,13 +3161,29 @@ lem_txeof(struct adapter *adapter)
adapter->next_tx_to_clean = first;
adapter->num_tx_desc_avail = num_avail;
+#ifdef NIC_SEND_COMBINING
+ if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) {
+ /* a tdt write is pending, do it */
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0),
+ 0xffff & adapter->shadow_tdt);
+ adapter->shadow_tdt = MIT_PENDING_INT;
+ } else {
+ adapter->shadow_tdt = 0; // disable
+ }
+#endif /* NIC_SEND_COMBINING */
/*
* If we have enough room, clear IFF_DRV_OACTIVE to
* tell the stack that it is OK to send packets.
* If there are no pending descriptors, clear the watchdog.
*/
if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) {
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
+#ifdef NIC_PARAVIRT
+ if (adapter->csb) { // XXX also csb_on ?
+ adapter->csb->guest_need_txkick = 2; /* acked */
+ // XXX memory barrier
+ }
+#endif /* NIC_PARAVIRT */
if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
adapter->watchdog_check = FALSE;
return;
@@ -3184,8 +3304,7 @@ lem_allocate_receive_structures(struct adapter *adapter)
}
/* Create the spare map (used by getbuf) */
- error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
- &adapter->rx_sparemap);
+ error = bus_dmamap_create(adapter->rxtag, 0, &adapter->rx_sparemap);
if (error) {
device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
__func__, error);
@@ -3194,8 +3313,7 @@ lem_allocate_receive_structures(struct adapter *adapter)
rx_buffer = adapter->rx_buffer_area;
for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
- error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
- &rx_buffer->map);
+ error = bus_dmamap_create(adapter->rxtag, 0, &rx_buffer->map);
if (error) {
device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
__func__, error);
@@ -3222,7 +3340,7 @@ lem_setup_receive_structures(struct adapter *adapter)
int i, error;
#ifdef DEV_NETMAP
/* we are already under lock */
- struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
#endif
@@ -3251,8 +3369,8 @@ lem_setup_receive_structures(struct adapter *adapter)
uint64_t paddr;
void *addr;
- addr = PNMB(slot + si, &paddr);
- netmap_load_map(adapter->rxtag, rx_buffer->map, addr);
+ addr = PNMB(na, slot + si, &paddr);
+ netmap_load_map(na, adapter->rxtag, rx_buffer->map, addr);
/* Update descriptor */
adapter->rx_desc_base[i].buffer_addr = htole64(paddr);
continue;
@@ -3280,7 +3398,7 @@ lem_setup_receive_structures(struct adapter *adapter)
static void
lem_initialize_receive_unit(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u64 bus_addr;
u32 rctl, rxcsum;
@@ -3345,14 +3463,14 @@ lem_initialize_receive_unit(struct adapter *adapter)
break;
}
- if (ifp->if_mtu > ETHERMTU)
+ if (if_getmtu(ifp) > ETHERMTU)
rctl |= E1000_RCTL_LPE;
else
rctl &= ~E1000_RCTL_LPE;
/* Enable 82543 Receive Checksum Offload for TCP and UDP */
if ((adapter->hw.mac.type >= e1000_82543) &&
- (ifp->if_capenable & IFCAP_RXCSUM)) {
+ (if_getcapenable(ifp) & IFCAP_RXCSUM)) {
rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
@@ -3369,8 +3487,10 @@ lem_initialize_receive_unit(struct adapter *adapter)
rctl = adapter->num_rx_desc - 1; /* default RDT value */
#ifdef DEV_NETMAP
/* preserve buffers already made available to clients */
- if (ifp->if_capenable & IFCAP_NETMAP)
- rctl -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[0]);
+ if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
+ rctl -= nm_kr_rxspace(&na->rx_rings[0]);
+ }
#endif /* DEV_NETMAP */
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl);
@@ -3442,14 +3562,30 @@ lem_free_receive_structures(struct adapter *adapter)
static bool
lem_rxeof(struct adapter *adapter, int count, int *done)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct mbuf *mp;
u8 status = 0, accept_frame = 0, eop = 0;
u16 len, desc_len, prev_len_adj;
int i, rx_sent = 0;
struct e1000_rx_desc *current_desc;
+#ifdef BATCH_DISPATCH
+ struct mbuf *mh = NULL, *mt = NULL;
+#endif /* BATCH_DISPATCH */
+#ifdef NIC_PARAVIRT
+ int retries = 0;
+ struct paravirt_csb* csb = adapter->csb;
+ int csb_mode = csb && csb->guest_csb_on;
+
+ //ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check);
+ if (csb_mode && csb->guest_need_rxkick)
+ csb->guest_need_rxkick = 0;
+#endif /* NIC_PARAVIRT */
EM_RX_LOCK(adapter);
+
+#ifdef BATCH_DISPATCH
+ batch_again:
+#endif /* BATCH_DISPATCH */
i = adapter->next_rx_desc_to_check;
current_desc = &adapter->rx_desc_base[i];
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
@@ -3462,19 +3598,45 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
}
#endif /* DEV_NETMAP */
+#if 1 // XXX optimization ?
if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
if (done != NULL)
*done = rx_sent;
EM_RX_UNLOCK(adapter);
return (FALSE);
}
+#endif /* 0 */
- while (count != 0 && ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ while (count != 0 && if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
struct mbuf *m = NULL;
status = current_desc->status;
- if ((status & E1000_RXD_STAT_DD) == 0)
+ if ((status & E1000_RXD_STAT_DD) == 0) {
+#ifdef NIC_PARAVIRT
+ if (csb_mode) {
+ /* buffer not ready yet. Retry a few times before giving up */
+ if (++retries <= adapter->rx_retries) {
+ continue;
+ }
+ if (csb->guest_need_rxkick == 0) {
+ // ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check);
+ csb->guest_need_rxkick = 1;
+ // XXX memory barrier, status volatile ?
+ continue; /* double check */
+ }
+ }
+ /* no buffer ready, give up */
+#endif /* NIC_PARAVIRT */
break;
+ }
+#ifdef NIC_PARAVIRT
+ if (csb_mode) {
+ if (csb->guest_need_rxkick)
+ // ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check);
+ csb->guest_need_rxkick = 0;
+ retries = 0;
+ }
+#endif /* NIC_PARAVIRT */
mp = adapter->rx_buffer_area[i].m_head;
/*
@@ -3523,7 +3685,7 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
if (accept_frame) {
if (lem_get_buf(adapter, i) != 0) {
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
goto discard;
}
@@ -3553,8 +3715,8 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
}
if (eop) {
- adapter->fmp->m_pkthdr.rcvif = ifp;
- ifp->if_ipackets++;
+ if_setrcvif(adapter->fmp, ifp);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
lem_receive_checksum(adapter, current_desc,
adapter->fmp);
#ifndef __NO_STRICT_ALIGNMENT
@@ -3599,14 +3761,39 @@ discard:
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+#ifdef NIC_PARAVIRT
+ if (csb_mode) {
+ /* the buffer at i has been already replaced by lem_get_buf()
+ * so it is safe to set guest_rdt = i and possibly send a kick.
+ * XXX see if we can optimize it later.
+ */
+ csb->guest_rdt = i;
+ // XXX memory barrier
+ if (i == csb->host_rxkick_at)
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
+ }
+#endif /* NIC_PARAVIRT */
/* Advance our pointers to the next descriptor. */
if (++i == adapter->num_rx_desc)
i = 0;
/* Call into the stack */
if (m != NULL) {
+#ifdef BATCH_DISPATCH
+ if (adapter->batch_enable) {
+ if (mh == NULL)
+ mh = mt = m;
+ else
+ mt->m_nextpkt = m;
+ mt = m;
+ m->m_nextpkt = NULL;
+ rx_sent++;
+ current_desc = &adapter->rx_desc_base[i];
+ continue;
+ }
+#endif /* BATCH_DISPATCH */
adapter->next_rx_desc_to_check = i;
EM_RX_UNLOCK(adapter);
- (*ifp->if_input)(ifp, m);
+ if_input(ifp, m);
EM_RX_LOCK(adapter);
rx_sent++;
i = adapter->next_rx_desc_to_check;
@@ -3614,10 +3801,27 @@ discard:
current_desc = &adapter->rx_desc_base[i];
}
adapter->next_rx_desc_to_check = i;
+#ifdef BATCH_DISPATCH
+ if (mh) {
+ EM_RX_UNLOCK(adapter);
+ while ( (mt = mh) != NULL) {
+ mh = mh->m_nextpkt;
+ mt->m_nextpkt = NULL;
+ if_input(ifp, mt);
+ }
+ EM_RX_LOCK(adapter);
+ i = adapter->next_rx_desc_to_check; /* in case of interrupts */
+ if (count > 0)
+ goto batch_again;
+ }
+#endif /* BATCH_DISPATCH */
/* Advance the E1000's Receive Queue #0 "Tail Pointer". */
if (--i < 0)
i = adapter->num_rx_desc - 1;
+#ifdef NIC_PARAVIRT
+ if (!csb_mode) /* filter out writes */
+#endif /* NIC_PARAVIRT */
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
if (done != NULL)
*done = rx_sent;
@@ -3637,7 +3841,7 @@ discard:
* copy ethernet header to the new mbuf. The new mbuf is prepended into the
* existing mbuf chain.
*
- * Be aware, best performance of the 8254x is achived only when jumbo frame is
+ * Be aware, best performance of the 8254x is achieved only when jumbo frame is
* not used at all on architectures with strict alignment.
*/
static int
@@ -3719,12 +3923,12 @@ lem_receive_checksum(struct adapter *adapter,
* config EVENT
*/
static void
-lem_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
+lem_register_vlan(void *arg, if_t ifp, u16 vtag)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u32 index, bit;
- if (ifp->if_softc != arg) /* Not our event */
+ if (if_getsoftc(ifp) != arg) /* Not our event */
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
@@ -3736,7 +3940,7 @@ lem_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
adapter->shadow_vfta[index] |= (1 << bit);
++adapter->num_vlans;
/* Re-init to load the changes */
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
lem_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
@@ -3746,12 +3950,12 @@ lem_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
* unconfig EVENT
*/
static void
-lem_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
+lem_unregister_vlan(void *arg, if_t ifp, u16 vtag)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u32 index, bit;
- if (ifp->if_softc != arg)
+ if (if_getsoftc(ifp) != arg)
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
@@ -3763,7 +3967,7 @@ lem_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
adapter->shadow_vfta[index] &= ~(1 << bit);
--adapter->num_vlans;
/* Re-init to load the changes */
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
lem_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
@@ -3980,7 +4184,7 @@ static void
lem_enable_wakeup(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 pmc, ctrl, ctrl_ext, rctl;
u16 status;
@@ -4005,10 +4209,10 @@ lem_enable_wakeup(device_t dev)
** Determine type of Wakeup: note that wol
** is set with all bits on by default.
*/
- if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
+ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
adapter->wol &= ~E1000_WUFC_MAG;
- if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
+ if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
adapter->wol &= ~E1000_WUFC_MC;
else {
rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
@@ -4028,7 +4232,7 @@ lem_enable_wakeup(device_t dev)
/* Request PME */
status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
- if (ifp->if_capenable & IFCAP_WOL)
+ if (if_getcapenable(ifp) & IFCAP_WOL)
status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
@@ -4197,7 +4401,6 @@ lem_fill_descriptors (bus_addr_t address, u32 length,
static void
lem_update_stats_counters(struct adapter *adapter)
{
- struct ifnet *ifp;
if(adapter->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
@@ -4272,19 +4475,29 @@ lem_update_stats_counters(struct adapter *adapter)
adapter->stats.tsctfc +=
E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
}
- ifp = adapter->ifp;
-
- ifp->if_collisions = adapter->stats.colc;
+}
- /* Rx Errors */
- ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
- adapter->stats.crcerrs + adapter->stats.algnerrc +
- adapter->stats.ruc + adapter->stats.roc +
- adapter->stats.mpc + adapter->stats.cexterr;
+static uint64_t
+lem_get_counter(if_t ifp, ift_counter cnt)
+{
+ struct adapter *adapter;
- /* Tx Errors */
- ifp->if_oerrors = adapter->stats.ecol +
- adapter->stats.latecol + adapter->watchdog_events;
+ adapter = if_getsoftc(ifp);
+
+ switch (cnt) {
+ case IFCOUNTER_COLLISIONS:
+ return (adapter->stats.colc);
+ case IFCOUNTER_IERRORS:
+ return (adapter->dropped_pkts + adapter->stats.rxerrc +
+ adapter->stats.crcerrs + adapter->stats.algnerrc +
+ adapter->stats.ruc + adapter->stats.roc +
+ adapter->stats.mpc + adapter->stats.cexterr);
+ case IFCOUNTER_OERRORS:
+ return (adapter->stats.ecol + adapter->stats.latecol +
+ adapter->watchdog_events);
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
}
/* Export a single 32-bit register via a read-only sysctl. */
@@ -4316,12 +4529,12 @@ lem_add_hw_stats(struct adapter *adapter)
struct sysctl_oid_list *stat_list;
/* Driver Statistics */
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
- CTLFLAG_RD, &adapter->mbuf_alloc_failed,
- "Std mbuf failed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
CTLFLAG_RD, &adapter->mbuf_cluster_failed,
"Std mbuf cluster failed");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
+ CTLFLAG_RD, &adapter->mbuf_defrag_failed,
+ "Defragmenting mbuf chain failed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
CTLFLAG_RD, &adapter->dropped_pkts,
"Driver dropped packets");
diff --git a/freebsd/sys/dev/e1000/if_lem.h b/freebsd/sys/dev/e1000/if_lem.h
index 235277d7..4a27c34b 100644
--- a/freebsd/sys/dev/e1000/if_lem.h
+++ b/freebsd/sys/dev/e1000/if_lem.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2011, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -236,10 +236,8 @@
#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A)
#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B)
-#define EM_MAX_SCATTER 64
+#define EM_MAX_SCATTER 40
#define EM_VFTA_SIZE 128
-#define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header))
-#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */
#define EM_MSIX_MASK 0x01F00000 /* For 82574 use */
#define ETH_ZLEN 60
#define ETH_ADDR_LEN 6
@@ -265,6 +263,13 @@
#define PICOSECS_PER_TICK 20833
#define TSYNC_PORT 319 /* UDP port for the protocol */
+#ifdef NIC_PARAVIRT
+#define E1000_PARA_SUBDEV 0x1101 /* special id */
+#define E1000_CSBAL 0x02830 /* csb phys. addr. low */
+#define E1000_CSBAH 0x02834 /* csb phys. addr. hi */
+#include <net/paravirt.h>
+#endif /* NIC_PARAVIRT */
+
/*
* Bus dma allocation structure used by
* e1000_dma_malloc and e1000_dma_free.
@@ -288,15 +293,12 @@ struct em_int_delay_info {
/* Our adapter structure */
struct adapter {
- struct ifnet *ifp;
-#if __FreeBSD_version >= 800000
- struct buf_ring *br;
-#endif
+ if_t ifp;
struct e1000_hw hw;
/* FreeBSD operating-system-specific structures. */
struct e1000_osdep osdep;
- struct device *dev;
+ device_t dev;
struct cdev *led_dev;
struct resource *memory;
@@ -413,17 +415,17 @@ struct adapter {
/* Misc stats maintained by the driver */
unsigned long dropped_pkts;
- unsigned long mbuf_alloc_failed;
+ unsigned long link_irq;
unsigned long mbuf_cluster_failed;
+ unsigned long mbuf_defrag_failed;
unsigned long no_tx_desc_avail1;
unsigned long no_tx_desc_avail2;
+ unsigned long no_tx_dma_setup;
unsigned long no_tx_map_avail;
- unsigned long no_tx_dma_setup;
unsigned long watchdog_events;
- unsigned long rx_overruns;
unsigned long rx_irq;
+ unsigned long rx_overruns;
unsigned long tx_irq;
- unsigned long link_irq;
/* 82547 workaround */
uint32_t tx_fifo_size;
@@ -437,6 +439,26 @@ struct adapter {
boolean_t pcix_82544;
boolean_t in_detach;
+#ifdef NIC_SEND_COMBINING
+ /* 0 = idle; 1xxxx int-pending; 3xxxx int + d pending + tdt */
+#define MIT_PENDING_INT 0x10000 /* pending interrupt */
+#define MIT_PENDING_TDT 0x30000 /* both intr and tdt write are pending */
+ uint32_t shadow_tdt;
+ uint32_t sc_enable;
+#endif /* NIC_SEND_COMBINING */
+#ifdef BATCH_DISPATCH
+ uint32_t batch_enable;
+#endif /* BATCH_DISPATCH */
+
+#ifdef NIC_PARAVIRT
+ struct em_dma_alloc csb_mem; /* phys address */
+ struct paravirt_csb *csb; /* virtual addr */
+ uint32_t rx_retries; /* optimize rx loop */
+ uint32_t tdt_csb_count;// XXX stat
+ uint32_t tdt_reg_count;// XXX stat
+ uint32_t tdt_int_count;// XXX stat
+ uint32_t guest_need_kick_count;// XXX stat
+#endif /* NIC_PARAVIRT */
struct e1000_hw_stats stats;
};
diff --git a/freebsd/sys/dev/fxp/if_fxp.c b/freebsd/sys/dev/fxp/if_fxp.c
index 806163a2..b3d4fa15 100644
--- a/freebsd/sys/dev/fxp/if_fxp.c
+++ b/freebsd/sys/dev/fxp/if_fxp.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/rman.h>
@@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -158,52 +160,52 @@ static const u_char fxp_cb_config_template[] = {
* them.
*/
static const struct fxp_ident fxp_ident_table[] = {
- { 0x1029, -1, 0, "Intel 82559 PCI/CardBus Pro/100" },
- { 0x1030, -1, 0, "Intel 82559 Pro/100 Ethernet" },
- { 0x1031, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" },
- { 0x1032, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" },
- { 0x1033, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
- { 0x1034, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
- { 0x1035, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
- { 0x1036, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
- { 0x1037, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
- { 0x1038, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
- { 0x1039, -1, 4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" },
- { 0x103A, -1, 4, "Intel 82801DB (ICH4) Pro/100 Ethernet" },
- { 0x103B, -1, 4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" },
- { 0x103C, -1, 4, "Intel 82801DB (ICH4) Pro/100 Ethernet" },
- { 0x103D, -1, 4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" },
- { 0x103E, -1, 4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" },
- { 0x1050, -1, 5, "Intel 82801BA (D865) Pro/100 VE Ethernet" },
- { 0x1051, -1, 5, "Intel 82562ET (ICH5/ICH5R) Pro/100 VE Ethernet" },
- { 0x1059, -1, 0, "Intel 82551QM Pro/100 M Mobile Connection" },
- { 0x1064, -1, 6, "Intel 82562EZ (ICH6)" },
- { 0x1065, -1, 6, "Intel 82562ET/EZ/GT/GZ PRO/100 VE Ethernet" },
- { 0x1068, -1, 6, "Intel 82801FBM (ICH6-M) Pro/100 VE Ethernet" },
- { 0x1069, -1, 6, "Intel 82562EM/EX/GX Pro/100 Ethernet" },
- { 0x1091, -1, 7, "Intel 82562GX Pro/100 Ethernet" },
- { 0x1092, -1, 7, "Intel Pro/100 VE Network Connection" },
- { 0x1093, -1, 7, "Intel Pro/100 VM Network Connection" },
- { 0x1094, -1, 7, "Intel Pro/100 946GZ (ICH7) Network Connection" },
- { 0x1209, -1, 0, "Intel 82559ER Embedded 10/100 Ethernet" },
- { 0x1229, 0x01, 0, "Intel 82557 Pro/100 Ethernet" },
- { 0x1229, 0x02, 0, "Intel 82557 Pro/100 Ethernet" },
- { 0x1229, 0x03, 0, "Intel 82557 Pro/100 Ethernet" },
- { 0x1229, 0x04, 0, "Intel 82558 Pro/100 Ethernet" },
- { 0x1229, 0x05, 0, "Intel 82558 Pro/100 Ethernet" },
- { 0x1229, 0x06, 0, "Intel 82559 Pro/100 Ethernet" },
- { 0x1229, 0x07, 0, "Intel 82559 Pro/100 Ethernet" },
- { 0x1229, 0x08, 0, "Intel 82559 Pro/100 Ethernet" },
- { 0x1229, 0x09, 0, "Intel 82559ER Pro/100 Ethernet" },
- { 0x1229, 0x0c, 0, "Intel 82550 Pro/100 Ethernet" },
- { 0x1229, 0x0d, 0, "Intel 82550C Pro/100 Ethernet" },
- { 0x1229, 0x0e, 0, "Intel 82550 Pro/100 Ethernet" },
- { 0x1229, 0x0f, 0, "Intel 82551 Pro/100 Ethernet" },
- { 0x1229, 0x10, 0, "Intel 82551 Pro/100 Ethernet" },
- { 0x1229, -1, 0, "Intel 82557/8/9 Pro/100 Ethernet" },
- { 0x2449, -1, 2, "Intel 82801BA/CAM (ICH2/3) Pro/100 Ethernet" },
- { 0x27dc, -1, 7, "Intel 82801GB (ICH7) 10/100 Ethernet" },
- { 0, -1, 0, NULL },
+ { 0x8086, 0x1029, -1, 0, "Intel 82559 PCI/CardBus Pro/100" },
+ { 0x8086, 0x1030, -1, 0, "Intel 82559 Pro/100 Ethernet" },
+ { 0x8086, 0x1031, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" },
+ { 0x8086, 0x1032, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" },
+ { 0x8086, 0x1033, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
+ { 0x8086, 0x1034, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
+ { 0x8086, 0x1035, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
+ { 0x8086, 0x1036, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
+ { 0x8086, 0x1037, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" },
+ { 0x8086, 0x1038, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" },
+ { 0x8086, 0x1039, -1, 4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" },
+ { 0x8086, 0x103A, -1, 4, "Intel 82801DB (ICH4) Pro/100 Ethernet" },
+ { 0x8086, 0x103B, -1, 4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" },
+ { 0x8086, 0x103C, -1, 4, "Intel 82801DB (ICH4) Pro/100 Ethernet" },
+ { 0x8086, 0x103D, -1, 4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" },
+ { 0x8086, 0x103E, -1, 4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" },
+ { 0x8086, 0x1050, -1, 5, "Intel 82801BA (D865) Pro/100 VE Ethernet" },
+ { 0x8086, 0x1051, -1, 5, "Intel 82562ET (ICH5/ICH5R) Pro/100 VE Ethernet" },
+ { 0x8086, 0x1059, -1, 0, "Intel 82551QM Pro/100 M Mobile Connection" },
+ { 0x8086, 0x1064, -1, 6, "Intel 82562EZ (ICH6)" },
+ { 0x8086, 0x1065, -1, 6, "Intel 82562ET/EZ/GT/GZ PRO/100 VE Ethernet" },
+ { 0x8086, 0x1068, -1, 6, "Intel 82801FBM (ICH6-M) Pro/100 VE Ethernet" },
+ { 0x8086, 0x1069, -1, 6, "Intel 82562EM/EX/GX Pro/100 Ethernet" },
+ { 0x8086, 0x1091, -1, 7, "Intel 82562GX Pro/100 Ethernet" },
+ { 0x8086, 0x1092, -1, 7, "Intel Pro/100 VE Network Connection" },
+ { 0x8086, 0x1093, -1, 7, "Intel Pro/100 VM Network Connection" },
+ { 0x8086, 0x1094, -1, 7, "Intel Pro/100 946GZ (ICH7) Network Connection" },
+ { 0x8086, 0x1209, -1, 0, "Intel 82559ER Embedded 10/100 Ethernet" },
+ { 0x8086, 0x1229, 0x01, 0, "Intel 82557 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x02, 0, "Intel 82557 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x03, 0, "Intel 82557 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x04, 0, "Intel 82558 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x05, 0, "Intel 82558 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x06, 0, "Intel 82559 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x07, 0, "Intel 82559 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x08, 0, "Intel 82559 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x09, 0, "Intel 82559ER Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x0c, 0, "Intel 82550 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x0d, 0, "Intel 82550C Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x0e, 0, "Intel 82550 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x0f, 0, "Intel 82551 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, 0x10, 0, "Intel 82551 Pro/100 Ethernet" },
+ { 0x8086, 0x1229, -1, 0, "Intel 82557/8/9 Pro/100 Ethernet" },
+ { 0x8086, 0x2449, -1, 2, "Intel 82801BA/CAM (ICH2/3) Pro/100 Ethernet" },
+ { 0x8086, 0x27dc, -1, 7, "Intel 82801GB (ICH7) 10/100 Ethernet" },
+ { 0, 0, -1, 0, NULL },
};
#ifdef FXP_IP_CSUM_WAR
@@ -221,20 +223,20 @@ static int fxp_resume(device_t dev);
static const struct fxp_ident *fxp_find_ident(device_t dev);
static void fxp_intr(void *xsc);
-static void fxp_rxcsum(struct fxp_softc *sc, struct ifnet *ifp,
+static void fxp_rxcsum(struct fxp_softc *sc, if_t ifp,
struct mbuf *m, uint16_t status, int pos);
-static int fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp,
+static int fxp_intr_body(struct fxp_softc *sc, if_t ifp,
uint8_t statack, int count);
static void fxp_init(void *xsc);
static void fxp_init_body(struct fxp_softc *sc, int);
static void fxp_tick(void *xsc);
-static void fxp_start(struct ifnet *ifp);
-static void fxp_start_body(struct ifnet *ifp);
+static void fxp_start(if_t ifp);
+static void fxp_start_body(if_t ifp);
static int fxp_encap(struct fxp_softc *sc, struct mbuf **m_head);
static void fxp_txeof(struct fxp_softc *sc);
static void fxp_stop(struct fxp_softc *sc);
static void fxp_release(struct fxp_softc *sc);
-static int fxp_ioctl(struct ifnet *ifp, u_long command,
+static int fxp_ioctl(if_t ifp, u_long command,
caddr_t data);
static void fxp_watchdog(struct fxp_softc *sc);
static void fxp_add_rfabuf(struct fxp_softc *sc,
@@ -255,11 +257,11 @@ static void fxp_read_eeprom(struct fxp_softc *sc, u_short *data,
int offset, int words);
static void fxp_write_eeprom(struct fxp_softc *sc, u_short *data,
int offset, int words);
-static int fxp_ifmedia_upd(struct ifnet *ifp);
-static void fxp_ifmedia_sts(struct ifnet *ifp,
+static int fxp_ifmedia_upd(if_t ifp);
+static void fxp_ifmedia_sts(if_t ifp,
struct ifmediareq *ifmr);
-static int fxp_serial_ifmedia_upd(struct ifnet *ifp);
-static void fxp_serial_ifmedia_sts(struct ifnet *ifp,
+static int fxp_serial_ifmedia_upd(if_t ifp);
+static void fxp_serial_ifmedia_sts(if_t ifp,
struct ifmediareq *ifmr);
static int fxp_miibus_readreg(device_t dev, int phy, int reg);
static int fxp_miibus_writereg(device_t dev, int phy, int reg,
@@ -375,18 +377,18 @@ fxp_dma_wait(struct fxp_softc *sc, volatile uint16_t *status,
static const struct fxp_ident *
fxp_find_ident(device_t dev)
{
- uint16_t devid;
+ uint16_t vendor;
+ uint16_t device;
uint8_t revid;
const struct fxp_ident *ident;
- if (pci_get_vendor(dev) == FXP_VENDORID_INTEL) {
- devid = pci_get_device(dev);
- revid = pci_get_revid(dev);
- for (ident = fxp_ident_table; ident->name != NULL; ident++) {
- if (ident->devid == devid &&
- (ident->revid == revid || ident->revid == -1)) {
- return (ident);
- }
+ vendor = pci_get_vendor(dev);
+ device = pci_get_device(dev);
+ revid = pci_get_revid(dev);
+ for (ident = fxp_ident_table; ident->name != NULL; ident++) {
+ if (ident->vendor == vendor && ident->device == device &&
+ (ident->revid == revid || ident->revid == -1)) {
+ return (ident);
}
}
return (NULL);
@@ -428,7 +430,7 @@ fxp_attach(device_t dev)
struct fxp_cb_tx *tcbp;
struct fxp_tx *txp;
struct fxp_rx *rxp;
- struct ifnet *ifp;
+ if_t ifp;
uint32_t val;
uint16_t data;
u_char eaddr[ETHER_ADDR_LEN];
@@ -443,8 +445,8 @@ fxp_attach(device_t dev)
ifmedia_init(&sc->sc_media, 0, fxp_serial_ifmedia_upd,
fxp_serial_ifmedia_sts);
- ifp = sc->ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
+ ifp = sc->ifp = if_gethandle(IFT_ETHER);
+ if (ifp == (void *)NULL) {
device_printf(dev, "can not if_alloc()\n");
error = ENOSPC;
goto fail;
@@ -629,7 +631,7 @@ fxp_attach(device_t dev)
/* For 82559 or later chips, Rx checksum offload is supported. */
if (sc->revision >= FXP_REV_82559_A0) {
/* 82559ER does not support Rx checksum offloading. */
- if (sc->ident->devid != 0x1209)
+ if (sc->ident->device != 0x1209)
sc->flags |= FXP_FLAG_82559_RXCSUM;
}
/*
@@ -828,9 +830,10 @@ fxp_attach(device_t dev)
flags = MIIF_NOISOLATE;
if (sc->revision >= FXP_REV_82558_A4)
flags |= MIIF_DOPAUSE;
- error = mii_attach(dev, &sc->miibus, ifp, fxp_ifmedia_upd,
- fxp_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY,
- MII_OFFSET_ANY, flags);
+ error = mii_attach(dev, &sc->miibus, ifp,
+ (ifm_change_cb_t)fxp_ifmedia_upd,
+ (ifm_stat_cb_t)fxp_ifmedia_sts, BMSR_DEFCAPMASK,
+ MII_PHY_ANY, MII_OFFSET_ANY, flags);
if (error != 0) {
device_printf(dev, "attaching PHYs failed\n");
goto fail;
@@ -838,34 +841,36 @@ fxp_attach(device_t dev)
}
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_init = fxp_init;
- ifp->if_softc = sc;
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_ioctl = fxp_ioctl;
- ifp->if_start = fxp_start;
+ if_setdev(ifp, dev);
+ if_setinitfn(ifp, fxp_init);
+ if_setsoftc(ifp, sc);
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setioctlfn(ifp, fxp_ioctl);
+ if_setstartfn(ifp, fxp_start);
- ifp->if_capabilities = ifp->if_capenable = 0;
+ if_setcapabilities(ifp, 0);
+ if_setcapenable(ifp, 0);
/* Enable checksum offload/TSO for 82550 or better chips */
if (sc->flags & FXP_FLAG_EXT_RFA) {
- ifp->if_hwassist = FXP_CSUM_FEATURES | CSUM_TSO;
- ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4;
- ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_TSO4;
+ if_sethwassist(ifp, FXP_CSUM_FEATURES | CSUM_TSO);
+ if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_TSO4, 0);
+ if_setcapenablebit(ifp, IFCAP_HWCSUM | IFCAP_TSO4, 0);
}
if (sc->flags & FXP_FLAG_82559_RXCSUM) {
- ifp->if_capabilities |= IFCAP_RXCSUM;
- ifp->if_capenable |= IFCAP_RXCSUM;
+ if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
+ if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
}
if (sc->flags & FXP_FLAG_WOLCAP) {
- ifp->if_capabilities |= IFCAP_WOL_MAGIC;
- ifp->if_capenable |= IFCAP_WOL_MAGIC;
+ if_setcapabilitiesbit(ifp, IFCAP_WOL_MAGIC, 0);
+ if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
}
#ifdef DEVICE_POLLING
/* Inform the world we support polling. */
- ifp->if_capabilities |= IFCAP_POLLING;
+ if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0);
#endif
/*
@@ -878,23 +883,22 @@ fxp_attach(device_t dev)
* Must appear after the call to ether_ifattach() because
* ether_ifattach() sets ifi_hdrlen to the default value.
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
- ifp->if_capabilities |= IFCAP_VLAN_MTU;
- ifp->if_capenable |= IFCAP_VLAN_MTU; /* the hw bits already set */
+ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
+ if_setcapenablebit(ifp, IFCAP_VLAN_MTU, 0);
if ((sc->flags & FXP_FLAG_EXT_RFA) != 0) {
- ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING |
- IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO;
- ifp->if_capenable |= IFCAP_VLAN_HWTAGGING |
- IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO;
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING |
+ IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO, 0);
+ if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING |
+ IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO, 0);
}
/*
* Let the system queue as many packets as we have available
* TX descriptors.
*/
- IFQ_SET_MAXLEN(&ifp->if_snd, FXP_NTXCB - 1);
- ifp->if_snd.ifq_drv_maxlen = FXP_NTXCB - 1;
- IFQ_SET_READY(&ifp->if_snd);
+ if_setsendqlen(ifp, FXP_NTXCB - 1);
+ if_setsendqready(ifp);
/*
* Hook our interrupt after all initialization is complete.
@@ -1006,7 +1010,7 @@ fxp_detach(device_t dev)
struct fxp_softc *sc = device_get_softc(dev);
#ifdef DEVICE_POLLING
- if (sc->ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(sc->ifp) & IFCAP_POLLING)
ether_poll_deregister(sc->ifp);
#endif
@@ -1062,7 +1066,7 @@ static int
fxp_suspend(device_t dev)
{
struct fxp_softc *sc = device_get_softc(dev);
- struct ifnet *ifp;
+ if_t ifp;
int pmc;
uint16_t pmstat;
@@ -1072,12 +1076,12 @@ fxp_suspend(device_t dev)
if (pci_find_cap(sc->dev, PCIY_PMG, &pmc) == 0) {
pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2);
pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
- if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) {
+ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) != 0) {
/* Request PME. */
pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
sc->flags |= FXP_FLAG_WOL;
/* Reconfigure hardware to accept magic frames. */
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
fxp_init_body(sc, 0);
}
pci_write_config(sc->dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
@@ -1098,7 +1102,7 @@ static int
fxp_resume(device_t dev)
{
struct fxp_softc *sc = device_get_softc(dev);
- struct ifnet *ifp = sc->ifp;
+ if_t ifp = sc->ifp;
int pmc;
uint16_t pmstat;
@@ -1119,7 +1123,7 @@ fxp_resume(device_t dev)
DELAY(10);
/* reinitialize interface if necessary */
- if (ifp->if_flags & IFF_UP)
+ if (if_getflags(ifp) & IFF_UP)
fxp_init_body(sc, 1);
sc->suspended = 0;
@@ -1261,7 +1265,7 @@ fxp_eeprom_putword(struct fxp_softc *sc, int offset, uint16_t data)
*
* 559's can have either 64-word or 256-word EEPROMs, the 558
* datasheet only talks about 64-word EEPROMs, and the 557 datasheet
- * talks about the existance of 16 to 256 word EEPROMs.
+ * talks about the existence of 16 to 256 word EEPROMs.
*
* The only known sizes are 64 and 256, where the 256 version is used
* by CardBus cards to store CIS information.
@@ -1324,9 +1328,9 @@ fxp_load_eeprom(struct fxp_softc *sc)
* Grab the softc lock and call the real fxp_start_body() routine
*/
static void
-fxp_start(struct ifnet *ifp)
+fxp_start(if_t ifp)
{
- struct fxp_softc *sc = ifp->if_softc;
+ struct fxp_softc *sc = if_getsoftc(ifp);
FXP_LOCK(sc);
fxp_start_body(ifp);
@@ -1339,15 +1343,15 @@ fxp_start(struct ifnet *ifp)
* internal entry point only.
*/
static void
-fxp_start_body(struct ifnet *ifp)
+fxp_start_body(if_t ifp)
{
- struct fxp_softc *sc = ifp->if_softc;
+ struct fxp_softc *sc = if_getsoftc(ifp);
struct mbuf *mb_head;
int txqueued;
FXP_LOCK_ASSERT(sc, MA_OWNED);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+ if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return;
@@ -1360,27 +1364,26 @@ fxp_start_body(struct ifnet *ifp)
* a NOP command when needed.
*/
txqueued = 0;
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
- sc->tx_queued < FXP_NTXCB - 1) {
+ while (!if_sendq_empty(ifp) && sc->tx_queued < FXP_NTXCB - 1) {
/*
* Grab a packet to transmit.
*/
- IFQ_DRV_DEQUEUE(&ifp->if_snd, mb_head);
+ mb_head = if_dequeue(ifp);
if (mb_head == NULL)
break;
if (fxp_encap(sc, &mb_head)) {
if (mb_head == NULL)
break;
- IFQ_DRV_PREPEND(&ifp->if_snd, mb_head);
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_sendq_prepend(ifp, mb_head);
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
}
txqueued++;
/*
* Pass packet to bpf if there is a listener.
*/
- BPF_MTAP(ifp, mb_head);
+ if_bpfmtap(ifp, mb_head);
}
/*
@@ -1403,7 +1406,7 @@ fxp_start_body(struct ifnet *ifp)
static int
fxp_encap(struct fxp_softc *sc, struct mbuf **m_head)
{
- struct ifnet *ifp;
+ if_t ifp;
struct mbuf *m;
struct fxp_tx *txp;
struct fxp_cb_tx *cbp;
@@ -1673,14 +1676,14 @@ fxp_encap(struct fxp_softc *sc, struct mbuf **m_head)
static poll_handler_t fxp_poll;
static int
-fxp_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
+fxp_poll(if_t ifp, enum poll_cmd cmd, int count)
{
- struct fxp_softc *sc = ifp->if_softc;
+ struct fxp_softc *sc = if_getsoftc(ifp);
uint8_t statack;
int rx_npkts = 0;
FXP_LOCK(sc);
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
FXP_UNLOCK(sc);
return (rx_npkts);
}
@@ -1714,7 +1717,7 @@ static void
fxp_intr(void *xsc)
{
struct fxp_softc *sc = xsc;
- struct ifnet *ifp = sc->ifp;
+ if_t ifp = sc->ifp;
uint8_t statack;
FXP_LOCK(sc);
@@ -1724,7 +1727,7 @@ fxp_intr(void *xsc)
}
#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING) {
+ if (if_getcapenable(ifp) & IFCAP_POLLING) {
FXP_UNLOCK(sc);
return;
}
@@ -1745,7 +1748,7 @@ fxp_intr(void *xsc)
* First ACK all the interrupts in this pass.
*/
CSR_WRITE_1(sc, FXP_CSR_SCB_STATACK, statack);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)
fxp_intr_body(sc, ifp, statack, -1);
}
FXP_UNLOCK(sc);
@@ -1754,7 +1757,7 @@ fxp_intr(void *xsc)
static void
fxp_txeof(struct fxp_softc *sc)
{
- struct ifnet *ifp;
+ if_t ifp;
struct fxp_tx *txp;
ifp = sc->ifp;
@@ -1773,7 +1776,7 @@ fxp_txeof(struct fxp_softc *sc)
txp->tx_cb->tbd[0].tb_addr = 0;
}
sc->tx_queued--;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
}
sc->fxp_desc.tx_first = txp;
bus_dmamap_sync(sc->cbl_tag, sc->cbl_map,
@@ -1783,7 +1786,7 @@ fxp_txeof(struct fxp_softc *sc)
}
static void
-fxp_rxcsum(struct fxp_softc *sc, struct ifnet *ifp, struct mbuf *m,
+fxp_rxcsum(struct fxp_softc *sc, if_t ifp, struct mbuf *m,
uint16_t status, int pos)
{
struct ether_header *eh;
@@ -1861,7 +1864,7 @@ fxp_rxcsum(struct fxp_softc *sc, struct ifnet *ifp, struct mbuf *m,
}
static int
-fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack,
+fxp_intr_body(struct fxp_softc *sc, if_t ifp, uint8_t statack,
int count)
{
struct mbuf *m;
@@ -1903,7 +1906,7 @@ fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack,
/*
* Try to start more packets transmitting.
*/
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
fxp_start_body(ifp);
/*
@@ -1970,7 +1973,7 @@ fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack,
*/
total_len = le16toh(rfa->actual_size) & 0x3fff;
if ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0 &&
- (ifp->if_capenable & IFCAP_RXCSUM) != 0) {
+ (if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) {
/* Adjust for appended checksum bytes. */
total_len -= 2;
}
@@ -1985,12 +1988,12 @@ fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack,
}
m->m_pkthdr.len = m->m_len = total_len;
- m->m_pkthdr.rcvif = ifp;
+ if_setrcvif(m, ifp);
/* Do IP checksum checking. */
- if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
+ if ((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0)
fxp_rxcsum(sc, ifp, m, status, total_len);
- if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
+ if ((if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) != 0 &&
(status & FXP_RFA_STATUS_VLAN) != 0) {
m->m_pkthdr.ether_vtag =
ntohs(rfa->rfax_vlan_id);
@@ -2005,14 +2008,14 @@ fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack,
* calling if_input() on each one.
*/
FXP_UNLOCK(sc);
- (*ifp->if_input)(ifp, m);
+ if_input(ifp, m);
FXP_LOCK(sc);
rx_npkts++;
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
return (rx_npkts);
} else {
/* Reuse RFA and loaded DMA map. */
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
fxp_discard_rfabuf(sc, rxp);
}
fxp_add_rfabuf(sc, rxp);
@@ -2029,7 +2032,7 @@ fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack,
static void
fxp_update_stats(struct fxp_softc *sc)
{
- struct ifnet *ifp = sc->ifp;
+ if_t ifp = sc->ifp;
struct fxp_stats *sp = sc->fxp_stats;
struct fxp_hwstats *hsp;
uint32_t *status;
@@ -2070,10 +2073,12 @@ fxp_update_stats(struct fxp_softc *sc)
hsp->tx_tco += le16toh(sp->tx_tco);
hsp->rx_tco += le16toh(sp->rx_tco);
- ifp->if_opackets += le32toh(sp->tx_good);
- ifp->if_collisions += le32toh(sp->tx_total_collisions);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, le32toh(sp->tx_good));
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS,
+ le32toh(sp->tx_total_collisions));
if (sp->rx_good) {
- ifp->if_ipackets += le32toh(sp->rx_good);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS,
+ le32toh(sp->rx_good));
sc->rx_idle_secs = 0;
} else if (sc->flags & FXP_FLAG_RXBUG) {
/*
@@ -2081,17 +2086,18 @@ fxp_update_stats(struct fxp_softc *sc)
*/
sc->rx_idle_secs++;
}
- ifp->if_ierrors +=
+ if_inc_counter(ifp, IFCOUNTER_IERRORS,
le32toh(sp->rx_crc_errors) +
le32toh(sp->rx_alignment_errors) +
le32toh(sp->rx_rnr_errors) +
- le32toh(sp->rx_overrun_errors);
+ le32toh(sp->rx_overrun_errors));
/*
- * If any transmit underruns occured, bump up the transmit
+ * If any transmit underruns occurred, bump up the transmit
* threshold by another 512 bytes (64 * 8).
*/
if (sp->tx_underruns) {
- ifp->if_oerrors += le32toh(sp->tx_underruns);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS,
+ le32toh(sp->tx_underruns));
if (tx_threshold < 192)
tx_threshold += 64;
}
@@ -2116,7 +2122,7 @@ static void
fxp_tick(void *xsc)
{
struct fxp_softc *sc = xsc;
- struct ifnet *ifp = sc->ifp;
+ if_t ifp = sc->ifp;
FXP_LOCK_ASSERT(sc, MA_OWNED);
@@ -2137,15 +2143,15 @@ fxp_tick(void *xsc)
* then assume the receiver has locked up and attempt to clear
* the condition by reprogramming the multicast filter. This is
* a work-around for a bug in the 82557 where the receiver locks
- * up if it gets certain types of garbage in the syncronization
+ * up if it gets certain types of garbage in the synchronization
* bits prior to the packet header. This bug is supposed to only
* occur in 10Mbps mode, but has been seen to occur in 100Mbps
* mode as well (perhaps due to a 10/100 speed transition).
*/
if (sc->rx_idle_secs > FXP_MAX_RX_IDLE) {
sc->rx_idle_secs = 0;
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
fxp_init_body(sc, 1);
}
return;
@@ -2181,11 +2187,11 @@ fxp_tick(void *xsc)
static void
fxp_stop(struct fxp_softc *sc)
{
- struct ifnet *ifp = sc->ifp;
+ if_t ifp = sc->ifp;
struct fxp_tx *txp;
int i;
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
sc->watchdog_timer = 0;
/*
@@ -2236,6 +2242,7 @@ fxp_stop(struct fxp_softc *sc)
static void
fxp_watchdog(struct fxp_softc *sc)
{
+ if_t ifp = sc->ifp;
FXP_LOCK_ASSERT(sc, MA_OWNED);
@@ -2243,9 +2250,9 @@ fxp_watchdog(struct fxp_softc *sc)
return;
device_printf(sc->dev, "device timeout\n");
- sc->ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
fxp_init_body(sc, 1);
}
@@ -2271,7 +2278,7 @@ fxp_init(void *xsc)
static void
fxp_init_body(struct fxp_softc *sc, int setmedia)
{
- struct ifnet *ifp = sc->ifp;
+ if_t ifp = sc->ifp;
struct mii_data *mii;
struct fxp_cb_config *cbp;
struct fxp_cb_ias *cb_ias;
@@ -2281,7 +2288,7 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
FXP_LOCK_ASSERT(sc, MA_OWNED);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
return;
/*
@@ -2296,7 +2303,7 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SOFTWARE_RESET);
DELAY(50);
- prm = (ifp->if_flags & IFF_PROMISC) ? 1 : 0;
+ prm = (if_getflags(ifp) & IFF_PROMISC) ? 1 : 0;
/*
* Initialize base of CBL and RFA memory. Loading with zero
@@ -2323,7 +2330,7 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
* For ICH based controllers do not load microcode.
*/
if (sc->ident->ich == 0) {
- if (ifp->if_flags & IFF_LINK0 &&
+ if (if_getflags(ifp) & IFF_LINK0 &&
(sc->flags & FXP_FLAG_UCODE) == 0)
fxp_load_ucode(sc);
}
@@ -2379,7 +2386,7 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
cbp->mediatype = sc->flags & FXP_FLAG_SERIAL_MEDIA ? 0 : 1;
cbp->csma_dis = 0; /* (don't) disable link */
cbp->tcp_udp_cksum = ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0 &&
- (ifp->if_capenable & IFCAP_RXCSUM) != 0) ? 1 : 0;
+ (if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) ? 1 : 0;
cbp->vlan_tco = 0; /* (don't) enable vlan wakeup */
cbp->link_wake_en = 0; /* (don't) assert PME# on link change */
cbp->arp_wake_en = 0; /* (don't) assert PME# on arp */
@@ -2406,10 +2413,10 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
cbp->force_fdx = 0; /* (don't) force full duplex */
cbp->fdx_pin_en = 1; /* (enable) FDX# pin */
cbp->multi_ia = 0; /* (don't) accept multiple IAs */
- cbp->mc_all = ifp->if_flags & IFF_ALLMULTI ? 1 : prm;
+ cbp->mc_all = if_getflags(ifp) & IFF_ALLMULTI ? 1 : prm;
cbp->gamla_rx = sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0;
cbp->vlan_strip_en = ((sc->flags & FXP_FLAG_EXT_RFA) != 0 &&
- (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) ? 1 : 0;
+ (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) != 0) ? 1 : 0;
if (sc->revision == FXP_REV_82557) {
/*
@@ -2488,7 +2495,7 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
cb_ias->cb_status = 0;
cb_ias->cb_command = htole16(FXP_CB_COMMAND_IAS | FXP_CB_COMMAND_EL);
cb_ias->link_addr = 0xffffffff;
- bcopy(IF_LLADDR(sc->ifp), cb_ias->macaddr, ETHER_ADDR_LEN);
+ bcopy(if_getlladdr(sc->ifp), cb_ias->macaddr, ETHER_ADDR_LEN);
/*
* Start the IAS (Individual Address Setup) command/DMA.
@@ -2550,8 +2557,7 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
if (sc->miibus != NULL && setmedia != 0)
mii_mediachg(device_get_softc(sc->miibus));
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
/*
* Enable interrupts.
@@ -2561,7 +2567,7 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
* ... but only do that if we are not polling. And because (presumably)
* the default is interrupts on, we need to disable them explicitly!
*/
- if (ifp->if_capenable & IFCAP_POLLING )
+ if (if_getcapenable(ifp) & IFCAP_POLLING )
CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE);
else
#endif /* DEVICE_POLLING */
@@ -2574,14 +2580,14 @@ fxp_init_body(struct fxp_softc *sc, int setmedia)
}
static int
-fxp_serial_ifmedia_upd(struct ifnet *ifp)
+fxp_serial_ifmedia_upd(if_t ifp)
{
return (0);
}
static void
-fxp_serial_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
+fxp_serial_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
{
ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
@@ -2591,11 +2597,11 @@ fxp_serial_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
* Change media according to request.
*/
static int
-fxp_ifmedia_upd(struct ifnet *ifp)
+fxp_ifmedia_upd(if_t ifp)
{
- struct fxp_softc *sc = ifp->if_softc;
+ struct fxp_softc *sc = if_getsoftc(ifp);
struct mii_data *mii;
- struct mii_softc *miisc;
+ struct mii_softc *miisc;
mii = device_get_softc(sc->miibus);
FXP_LOCK(sc);
@@ -2610,9 +2616,9 @@ fxp_ifmedia_upd(struct ifnet *ifp)
* Notify the world which media we're using.
*/
static void
-fxp_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
+fxp_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
{
- struct fxp_softc *sc = ifp->if_softc;
+ struct fxp_softc *sc = if_getsoftc(ifp);
struct mii_data *mii;
mii = device_get_softc(sc->miibus);
@@ -2801,13 +2807,13 @@ fxp_miibus_statchg(device_t dev)
{
struct fxp_softc *sc;
struct mii_data *mii;
- struct ifnet *ifp;
+ if_t ifp;
sc = device_get_softc(dev);
mii = device_get_softc(sc->miibus);
ifp = sc->ifp;
- if (mii == NULL || ifp == NULL ||
- (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ if (mii == NULL || ifp == (void *)NULL ||
+ (if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
(mii->mii_media_status & (IFM_AVALID | IFM_ACTIVE)) !=
(IFM_AVALID | IFM_ACTIVE))
return;
@@ -2823,14 +2829,14 @@ fxp_miibus_statchg(device_t dev)
*/
if (sc->revision == FXP_REV_82557)
return;
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
fxp_init_body(sc, 0);
}
static int
-fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+fxp_ioctl(if_t ifp, u_long command, caddr_t data)
{
- struct fxp_softc *sc = ifp->if_softc;
+ struct fxp_softc *sc = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
struct mii_data *mii;
int flag, mask, error = 0, reinit;
@@ -2844,27 +2850,27 @@ fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
* XXX If it's up then re-initialize it. This is so flags
* such as IFF_PROMISC are handled.
*/
- if (ifp->if_flags & IFF_UP) {
- if (((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) &&
- ((ifp->if_flags ^ sc->if_flags) &
+ if (if_getflags(ifp) & IFF_UP) {
+ if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) &&
+ ((if_getflags(ifp) ^ sc->if_flags) &
(IFF_PROMISC | IFF_ALLMULTI | IFF_LINK0)) != 0) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
fxp_init_body(sc, 0);
- } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ } else if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
fxp_init_body(sc, 1);
} else {
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)
fxp_stop(sc);
}
- sc->if_flags = ifp->if_flags;
+ sc->if_flags = if_getflags(ifp);
FXP_UNLOCK(sc);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
FXP_LOCK(sc);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
fxp_init_body(sc, 0);
}
FXP_UNLOCK(sc);
@@ -2883,7 +2889,7 @@ fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
case SIOCSIFCAP:
reinit = 0;
- mask = ifp->if_capenable ^ ifr->ifr_reqcap;
+ mask = if_getcapenable(ifp) ^ ifr->ifr_reqcap;
#ifdef DEVICE_POLLING
if (mask & IFCAP_POLLING) {
if (ifr->ifr_reqcap & IFCAP_POLLING) {
@@ -2893,75 +2899,76 @@ fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
FXP_LOCK(sc);
CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL,
FXP_SCB_INTR_DISABLE);
- ifp->if_capenable |= IFCAP_POLLING;
+ if_setcapenablebit(ifp, IFCAP_POLLING, 0);
FXP_UNLOCK(sc);
} else {
error = ether_poll_deregister(ifp);
/* Enable interrupts in any case */
FXP_LOCK(sc);
CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, 0);
- ifp->if_capenable &= ~IFCAP_POLLING;
+ if_setcapenablebit(ifp, 0, IFCAP_POLLING);
FXP_UNLOCK(sc);
}
}
#endif
FXP_LOCK(sc);
if ((mask & IFCAP_TXCSUM) != 0 &&
- (ifp->if_capabilities & IFCAP_TXCSUM) != 0) {
- ifp->if_capenable ^= IFCAP_TXCSUM;
- if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
- ifp->if_hwassist |= FXP_CSUM_FEATURES;
+ (if_getcapabilities(ifp) & IFCAP_TXCSUM) != 0) {
+ if_togglecapenable(ifp, IFCAP_TXCSUM);
+ if ((if_getcapenable(ifp) & IFCAP_TXCSUM) != 0)
+ if_sethwassistbits(ifp, FXP_CSUM_FEATURES, 0);
else
- ifp->if_hwassist &= ~FXP_CSUM_FEATURES;
+ if_sethwassistbits(ifp, 0, FXP_CSUM_FEATURES);
}
if ((mask & IFCAP_RXCSUM) != 0 &&
- (ifp->if_capabilities & IFCAP_RXCSUM) != 0) {
- ifp->if_capenable ^= IFCAP_RXCSUM;
+ (if_getcapabilities(ifp) & IFCAP_RXCSUM) != 0) {
+ if_togglecapenable(ifp, IFCAP_RXCSUM);
if ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0)
reinit++;
}
if ((mask & IFCAP_TSO4) != 0 &&
- (ifp->if_capabilities & IFCAP_TSO4) != 0) {
- ifp->if_capenable ^= IFCAP_TSO4;
- if ((ifp->if_capenable & IFCAP_TSO4) != 0)
- ifp->if_hwassist |= CSUM_TSO;
+ (if_getcapabilities(ifp) & IFCAP_TSO4) != 0) {
+ if_togglecapenable(ifp, IFCAP_TSO4);
+ if ((if_getcapenable(ifp) & IFCAP_TSO4) != 0)
+ if_sethwassistbits(ifp, CSUM_TSO, 0);
else
- ifp->if_hwassist &= ~CSUM_TSO;
+ if_sethwassistbits(ifp, 0, CSUM_TSO);
}
if ((mask & IFCAP_WOL_MAGIC) != 0 &&
- (ifp->if_capabilities & IFCAP_WOL_MAGIC) != 0)
- ifp->if_capenable ^= IFCAP_WOL_MAGIC;
+ (if_getcapabilities(ifp) & IFCAP_WOL_MAGIC) != 0)
+ if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
if ((mask & IFCAP_VLAN_MTU) != 0 &&
- (ifp->if_capabilities & IFCAP_VLAN_MTU) != 0) {
- ifp->if_capenable ^= IFCAP_VLAN_MTU;
+ (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) != 0) {
+ if_togglecapenable(ifp, IFCAP_VLAN_MTU);
if (sc->revision != FXP_REV_82557)
flag = FXP_FLAG_LONG_PKT_EN;
else /* a hack to get long frames on the old chip */
flag = FXP_FLAG_SAVE_BAD;
sc->flags ^= flag;
- if (ifp->if_flags & IFF_UP)
+ if (if_getflags(ifp) & IFF_UP)
reinit++;
}
if ((mask & IFCAP_VLAN_HWCSUM) != 0 &&
- (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0)
- ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
+ (if_getcapabilities(ifp) & IFCAP_VLAN_HWCSUM) != 0)
+ if_togglecapenable(ifp, IFCAP_VLAN_HWCSUM);
if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
- (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0)
- ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
+ (if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) != 0)
+ if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
- (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) {
- ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
- if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
- ifp->if_capenable &=
- ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM);
+ (if_getcapabilities(ifp) & IFCAP_VLAN_HWTAGGING) != 0) {
+ if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
+ if ((if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) == 0)
+ if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO |
+ IFCAP_VLAN_HWCSUM);
reinit++;
}
- if (reinit > 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if (reinit > 0 &&
+ (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
fxp_init_body(sc, 0);
}
FXP_UNLOCK(sc);
- VLAN_CAPABILITIES(ifp);
+ if_vlancap(ifp);
break;
default:
@@ -2977,24 +2984,15 @@ static int
fxp_mc_addrs(struct fxp_softc *sc)
{
struct fxp_cb_mcs *mcsp = sc->mcsp;
- struct ifnet *ifp = sc->ifp;
- struct ifmultiaddr *ifma;
- int nmcasts;
+ if_t ifp = sc->ifp;
+ int nmcasts = 0;
- nmcasts = 0;
- if ((ifp->if_flags & IFF_ALLMULTI) == 0) {
+ if ((if_getflags(ifp) & IFF_ALLMULTI) == 0) {
if_maddr_rlock(ifp);
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
- if (nmcasts >= MAXMCADDR) {
- ifp->if_flags |= IFF_ALLMULTI;
- nmcasts = 0;
- break;
- }
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- &sc->mcsp->mc_addr[nmcasts][0], ETHER_ADDR_LEN);
- nmcasts++;
+ if_setupmultiaddr(ifp, mcsp->mc_addr, &nmcasts, MAXMCADDR);
+ if (nmcasts >= MAXMCADDR) {
+ if_setflagbits(ifp, IFF_ALLMULTI, 0);
+ nmcasts = 0;
}
if_maddr_runlock(ifp);
}
diff --git a/freebsd/sys/dev/fxp/if_fxpreg.h b/freebsd/sys/dev/fxp/if_fxpreg.h
index 7fd60afd..7ee85884 100644
--- a/freebsd/sys/dev/fxp/if_fxpreg.h
+++ b/freebsd/sys/dev/fxp/if_fxpreg.h
@@ -28,8 +28,6 @@
* $FreeBSD$
*/
-#define FXP_VENDORID_INTEL 0x8086
-
#define FXP_PCI_MMBA 0x10
#define FXP_PCI_IOBA 0x14
diff --git a/freebsd/sys/dev/fxp/if_fxpvar.h b/freebsd/sys/dev/fxp/if_fxpvar.h
index 253c4ff1..78200ce3 100644
--- a/freebsd/sys/dev/fxp/if_fxpvar.h
+++ b/freebsd/sys/dev/fxp/if_fxpvar.h
@@ -143,7 +143,8 @@ struct fxp_desc_list {
};
struct fxp_ident {
- uint16_t devid;
+ uint16_t vendor;
+ uint16_t device;
int16_t revid; /* -1 matches anything */
uint8_t ich;
const char *name;
@@ -178,7 +179,7 @@ struct fxp_hwstats {
* for functional grouping.
*/
struct fxp_softc {
- struct ifnet *ifp; /* per-interface network data */
+ void *ifp; /* per-interface network data */
struct resource *fxp_res[2]; /* I/O and IRQ resources */
struct resource_spec *fxp_spec; /* the resource spec we used */
void *ih; /* interrupt handler cookie */
diff --git a/freebsd/sys/dev/fxp/rcvbundl.h b/freebsd/sys/dev/fxp/rcvbundl.h
index 2510f420..dac793df 100644
--- a/freebsd/sys/dev/fxp/rcvbundl.h
+++ b/freebsd/sys/dev/fxp/rcvbundl.h
@@ -62,7 +62,7 @@ rcvbundl.h file given above).
* driver can change algorithm.
*
* CPUSAVER_DWORD - This is the location of the instruction that loads
-* the dead-man timer with its inital value. By writing a 16-bit
+* the dead-man timer with its initial value. By writing a 16-bit
* value to the low word of this instruction, the driver can change
* the timer value. The current default is either x600 or x800;
* experiments show that the value probably should stay within the
diff --git a/freebsd/sys/dev/led/led.c b/freebsd/sys/dev/led/led.c
index 1b01132f..87cf9993 100644
--- a/freebsd/sys/dev/led/led.c
+++ b/freebsd/sys/dev/led/led.c
@@ -45,6 +45,7 @@ static struct mtx led_mtx;
static struct sx led_sx;
static LIST_HEAD(, ledsc) led_list = LIST_HEAD_INITIALIZER(led_list);
static struct callout led_ch;
+static int blinkers = 0;
static MALLOC_DEFINE(M_LED, "LED", "LED driver");
@@ -53,7 +54,6 @@ led_timeout(void *p)
{
struct ledsc *sc;
- mtx_lock(&led_mtx);
LIST_FOREACH(sc, &led_list, list) {
if (sc->ptr == NULL)
continue;
@@ -63,6 +63,7 @@ led_timeout(void *p)
}
if (*sc->ptr == '.') {
sc->ptr = NULL;
+ blinkers--;
continue;
} else if (*sc->ptr == 'U' || *sc->ptr == 'u') {
if (sc->last_second == time_second)
@@ -80,9 +81,8 @@ led_timeout(void *p)
if (*sc->ptr == '\0')
sc->ptr = sc->str;
}
- mtx_unlock(&led_mtx);
- callout_reset(&led_ch, hz / 10, led_timeout, p);
- return;
+ if (blinkers > 0)
+ callout_reset(&led_ch, hz / 10, led_timeout, p);
}
static int
@@ -94,9 +94,15 @@ led_state(struct ledsc *sc, struct sbuf **sb, int state)
sc->spec = *sb;
if (*sb != NULL) {
sc->str = sbuf_data(*sb);
+ if (sc->ptr == NULL) {
+ blinkers++;
+ callout_reset(&led_ch, hz / 10, led_timeout, NULL);
+ }
sc->ptr = sc->str;
} else {
sc->str = NULL;
+ if (sc->ptr != NULL)
+ blinkers--;
sc->ptr = NULL;
sc->func(sc->private, state);
}
@@ -288,10 +294,9 @@ led_create_state(led_t *func, void *priv, char const *name, int state)
mtx_lock(&led_mtx);
sc->dev->si_drv1 = sc;
- if (LIST_EMPTY(&led_list))
- callout_reset(&led_ch, hz / 10, led_timeout, NULL);
LIST_INSERT_HEAD(&led_list, sc, list);
- sc->func(sc->private, state != 0);
+ if (state != -1)
+ sc->func(sc->private, state != 0);
mtx_unlock(&led_mtx);
return (sc->dev);
@@ -305,7 +310,8 @@ led_destroy(struct cdev *dev)
mtx_lock(&led_mtx);
sc = dev->si_drv1;
dev->si_drv1 = NULL;
-
+ if (sc->ptr != NULL)
+ blinkers--;
LIST_REMOVE(sc, list);
if (LIST_EMPTY(&led_list))
callout_stop(&led_ch);
@@ -328,7 +334,7 @@ led_drvinit(void *unused)
led_unit = new_unrhdr(0, INT_MAX, NULL);
mtx_init(&led_mtx, "LED mtx", NULL, MTX_DEF);
sx_init(&led_sx, "LED sx");
- callout_init(&led_ch, CALLOUT_MPSAFE);
+ callout_init_mtx(&led_ch, &led_mtx, 0);
}
SYSINIT(leddev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, led_drvinit, NULL);
diff --git a/freebsd/sys/dev/mii/brgphy.c b/freebsd/sys/dev/mii/brgphy.c
index 75c15774..e07cd968 100644
--- a/freebsd/sys/dev/mii/brgphy.c
+++ b/freebsd/sys/dev/mii/brgphy.c
@@ -45,8 +45,10 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/socket.h>
#include <sys/bus.h>
+#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/ethernet.h>
#include <net/if_media.h>
@@ -118,7 +120,10 @@ static void brgphy_jumbo_settings(struct mii_softc *, u_long);
static const struct mii_phydesc brgphys[] = {
MII_PHY_DESC(BROADCOM, BCM5400),
MII_PHY_DESC(BROADCOM, BCM5401),
+ MII_PHY_DESC(BROADCOM, BCM5402),
MII_PHY_DESC(BROADCOM, BCM5411),
+ MII_PHY_DESC(BROADCOM, BCM5404),
+ MII_PHY_DESC(BROADCOM, BCM5424),
MII_PHY_DESC(BROADCOM, BCM54K2),
MII_PHY_DESC(BROADCOM, BCM5701),
MII_PHY_DESC(BROADCOM, BCM5703),
@@ -131,6 +136,9 @@ static const struct mii_phydesc brgphys[] = {
MII_PHY_DESC(BROADCOM, BCM5752),
MII_PHY_DESC(BROADCOM, BCM5780),
MII_PHY_DESC(BROADCOM, BCM5708C),
+ MII_PHY_DESC(BROADCOM, BCM5466),
+ MII_PHY_DESC(BROADCOM2, BCM5478),
+ MII_PHY_DESC(BROADCOM2, BCM5488),
MII_PHY_DESC(BROADCOM2, BCM5482),
MII_PHY_DESC(BROADCOM2, BCM5708S),
MII_PHY_DESC(BROADCOM2, BCM5709C),
@@ -160,25 +168,33 @@ static const struct mii_phy_funcs brgphy_funcs = {
brgphy_reset
};
-#define HS21_PRODUCT_ID "IBM eServer BladeCenter HS21"
-#define HS21_BCM_CHIPID 0x57081021
+static const struct hs21_type {
+ const uint32_t id;
+ const char *prod;
+} hs21_type_lists[] = {
+ { 0x57081021, "IBM eServer BladeCenter HS21" },
+ { 0x57081011, "IBM eServer BladeCenter HS21 -[8853PAU]-" },
+};
static int
detect_hs21(struct bce_softc *bce_sc)
{
char *sysenv;
- int found;
+ int found, i;
found = 0;
- if (bce_sc->bce_chipid == HS21_BCM_CHIPID) {
- sysenv = getenv("smbios.system.product");
- if (sysenv != NULL) {
- if (strncmp(sysenv, HS21_PRODUCT_ID,
- strlen(HS21_PRODUCT_ID)) == 0)
- found = 1;
- freeenv(sysenv);
+ sysenv = kern_getenv("smbios.system.product");
+ if (sysenv == NULL)
+ return (found);
+ for (i = 0; i < nitems(hs21_type_lists); i++) {
+ if (bce_sc->bce_chipid == hs21_type_lists[i].id &&
+ strncmp(sysenv, hs21_type_lists[i].prod,
+ strlen(hs21_type_lists[i].prod)) == 0) {
+ found++;
+ break;
}
}
+ freeenv(sysenv);
return (found);
}
@@ -198,7 +214,6 @@ brgphy_attach(device_t dev)
struct bge_softc *bge_sc = NULL;
struct bce_softc *bce_sc = NULL;
struct mii_softc *sc;
- struct ifnet *ifp;
bsc = device_get_softc(dev);
sc = &bsc->mii_sc;
@@ -207,13 +222,12 @@ brgphy_attach(device_t dev)
&brgphy_funcs, 0);
bsc->serdes_flags = 0;
- ifp = sc->mii_pdata->mii_ifp;
/* Find the MAC driver associated with this PHY. */
- if (strcmp(ifp->if_dname, "bge") == 0)
- bge_sc = ifp->if_softc;
- else if (strcmp(ifp->if_dname, "bce") == 0)
- bce_sc = ifp->if_softc;
+ if (mii_dev_mac_match(dev, "bge"))
+ bge_sc = mii_dev_mac_softc(dev);
+ else if (mii_dev_mac_match(dev, "bce"))
+ bce_sc = mii_dev_mac_softc(dev);
/* Handle any special cases based on the PHY ID */
switch (sc->mii_mpd_oui) {
@@ -268,20 +282,25 @@ brgphy_attach(device_t dev)
sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR);
device_printf(dev, " ");
-#define ADD(m, c) ifmedia_add(&sc->mii_pdata->mii_media, (m), (c), NULL)
-
/* Add the supported media types */
if ((sc->mii_flags & MIIF_HAVEFIBER) == 0) {
mii_phy_add_media(sc);
printf("\n");
} else {
sc->mii_anegticks = MII_ANEGTICKS_GIGE;
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_SX, IFM_FDX, sc->mii_inst),
- BRGPHY_S1000 | BRGPHY_BMCR_FDX);
+ ifmedia_add(&sc->mii_pdata->mii_media,
+ IFM_MAKEWORD(IFM_ETHER, IFM_1000_SX, IFM_FDX, sc->mii_inst),
+ 0, NULL);
printf("1000baseSX-FDX, ");
- /* 2.5G support is a software enabled feature on the 5708S and 5709S. */
- if (bce_sc && (bce_sc->bce_phy_flags & BCE_PHY_2_5G_CAPABLE_FLAG)) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, sc->mii_inst), 0);
+ /*
+ * 2.5G support is a software enabled feature
+ * on the 5708S and 5709S.
+ */
+ if (bce_sc && (bce_sc->bce_phy_flags &
+ BCE_PHY_2_5G_CAPABLE_FLAG)) {
+ ifmedia_add(&sc->mii_pdata->mii_media,
+ IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX,
+ sc->mii_inst), 0, NULL);
printf("2500baseSX-FDX, ");
} else if ((bsc->serdes_flags & BRGPHY_5708S) && bce_sc &&
(detect_hs21(bce_sc) != 0)) {
@@ -297,11 +316,11 @@ brgphy_attach(device_t dev)
printf("auto-neg workaround, ");
bsc->serdes_flags |= BRGPHY_NOANWAIT;
}
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, 0, sc->mii_inst), 0);
+ ifmedia_add(&sc->mii_pdata->mii_media, IFM_MAKEWORD(IFM_ETHER,
+ IFM_AUTO, 0, sc->mii_inst), 0, NULL);
printf("auto\n");
}
-#undef ADD
MIIBUS_MEDIAINIT(sc->mii_dev);
return (0);
}
@@ -316,10 +335,6 @@ brgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
case MII_POLLSTAT:
break;
case MII_MEDIACHG:
- /* If the interface is not up, don't do anything. */
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
- break;
-
/* Todo: Why is this here? Is it really needed? */
PHY_RESET(sc); /* XXX hardware bug work-around */
@@ -339,11 +354,6 @@ brgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
}
break;
case MII_TICK:
- /* Bail if the interface isn't up. */
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
- return (0);
-
-
/* Bail if autoneg isn't in process. */
if (IFM_SUBTYPE(ife->ifm_media) != IFM_AUTO) {
sc->mii_ticks = 0;
@@ -889,7 +899,7 @@ brgphy_reset(struct mii_softc *sc)
{
struct bge_softc *bge_sc = NULL;
struct bce_softc *bce_sc = NULL;
- struct ifnet *ifp;
+ if_t ifp;
int i, val;
/*
@@ -942,11 +952,10 @@ brgphy_reset(struct mii_softc *sc)
ifp = sc->mii_pdata->mii_ifp;
/* Find the driver associated with this PHY. */
- if (strcmp(ifp->if_dname, "bge") == 0) {
- bge_sc = ifp->if_softc;
- } else if (strcmp(ifp->if_dname, "bce") == 0) {
- bce_sc = ifp->if_softc;
- }
+ if (mii_phy_mac_match(sc, "bge"))
+ bge_sc = mii_phy_mac_softc(sc);
+ else if (mii_phy_mac_match(sc, "bce"))
+ bce_sc = mii_phy_mac_softc(sc);
if (bge_sc) {
/* Fix up various bugs */
@@ -964,7 +973,7 @@ brgphy_reset(struct mii_softc *sc)
brgphy_fixup_jitter_bug(sc);
if (bge_sc->bge_flags & BGE_FLAG_JUMBO)
- brgphy_jumbo_settings(sc, ifp->if_mtu);
+ brgphy_jumbo_settings(sc, if_getmtu(ifp));
if ((bge_sc->bge_phy_flags & BGE_PHY_NO_WIRESPEED) == 0)
brgphy_ethernet_wirespeed(sc);
@@ -1075,11 +1084,11 @@ brgphy_reset(struct mii_softc *sc)
(BCE_CHIP_REV(bce_sc) == BCE_CHIP_REV_Bx))
brgphy_fixup_disable_early_dac(sc);
- brgphy_jumbo_settings(sc, ifp->if_mtu);
+ brgphy_jumbo_settings(sc, if_getmtu(ifp));
brgphy_ethernet_wirespeed(sc);
} else {
brgphy_fixup_ber_bug(sc);
- brgphy_jumbo_settings(sc, ifp->if_mtu);
+ brgphy_jumbo_settings(sc, if_getmtu(ifp));
brgphy_ethernet_wirespeed(sc);
}
}
diff --git a/freebsd/sys/dev/mii/e1000phy.c b/freebsd/sys/dev/mii/e1000phy.c
index f50d41a9..10d6fc68 100644
--- a/freebsd/sys/dev/mii/e1000phy.c
+++ b/freebsd/sys/dev/mii/e1000phy.c
@@ -52,8 +52,8 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/bus.h>
-
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_media.h>
#include <dev/mii/mii.h>
@@ -114,7 +114,9 @@ static const struct mii_phydesc e1000phys[] = {
MII_PHY_DESC(xxMARVELL, E1111),
MII_PHY_DESC(xxMARVELL, E1116),
MII_PHY_DESC(xxMARVELL, E1116R),
+ MII_PHY_DESC(xxMARVELL, E1116R_29),
MII_PHY_DESC(xxMARVELL, E1118),
+ MII_PHY_DESC(xxMARVELL, E1145),
MII_PHY_DESC(xxMARVELL, E1149R),
MII_PHY_DESC(xxMARVELL, E3016),
MII_PHY_DESC(xxMARVELL, PHYG65G),
@@ -141,14 +143,12 @@ static int
e1000phy_attach(device_t dev)
{
struct mii_softc *sc;
- struct ifnet *ifp;
sc = device_get_softc(dev);
mii_phy_dev_attach(dev, MIIF_NOMANPAUSE, &e1000phy_funcs, 0);
- ifp = sc->mii_pdata->mii_ifp;
- if (strcmp(ifp->if_dname, "msk") == 0 &&
+ if (mii_dev_mac_match(dev, "msk") &&
(sc->mii_flags & MIIF_MACPRIV0) != 0)
sc->mii_flags |= MIIF_PHYPRIV0;
@@ -223,6 +223,7 @@ e1000phy_reset(struct mii_softc *sc)
case MII_MODEL_xxMARVELL_E1111:
case MII_MODEL_xxMARVELL_E1112:
case MII_MODEL_xxMARVELL_E1116:
+ case MII_MODEL_xxMARVELL_E1116R_29:
case MII_MODEL_xxMARVELL_E1118:
case MII_MODEL_xxMARVELL_E1149:
case MII_MODEL_xxMARVELL_E1149R:
@@ -230,7 +231,8 @@ e1000phy_reset(struct mii_softc *sc)
/* Disable energy detect mode. */
reg &= ~E1000_SCR_EN_DETECT_MASK;
reg |= E1000_SCR_AUTO_X_MODE;
- if (sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1116)
+ if (sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1116 ||
+ sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1116R_29)
reg &= ~E1000_SCR_POWER_DOWN;
reg |= E1000_SCR_ASSERT_CRS_ON_TX;
break;
@@ -258,6 +260,7 @@ e1000phy_reset(struct mii_softc *sc)
PHY_WRITE(sc, E1000_SCR, reg);
if (sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1116 ||
+ sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1116R_29 ||
sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1149 ||
sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1149R) {
PHY_WRITE(sc, E1000_EADR, 2);
@@ -274,6 +277,7 @@ e1000phy_reset(struct mii_softc *sc)
case MII_MODEL_xxMARVELL_E1118:
break;
case MII_MODEL_xxMARVELL_E1116:
+ case MII_MODEL_xxMARVELL_E1116R_29:
page = PHY_READ(sc, E1000_EADR);
/* Select page 3, LED control register. */
PHY_WRITE(sc, E1000_EADR, 3);
@@ -320,12 +324,6 @@ e1000phy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
break;
case MII_MEDIACHG:
- /*
- * If the interface is not up, don't do anything.
- */
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
- break;
-
if (IFM_SUBTYPE(ife->ifm_media) == IFM_AUTO) {
e1000phy_mii_phy_auto(sc, ife->ifm_media);
break;
@@ -382,12 +380,6 @@ done:
break;
case MII_TICK:
/*
- * Is the interface even up?
- */
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
- return (0);
-
- /*
* Only used for autonegotiation.
*/
if (IFM_SUBTYPE(ife->ifm_media) != IFM_AUTO) {
diff --git a/freebsd/sys/dev/mii/icsphy.c b/freebsd/sys/dev/mii/icsphy.c
index 3d5e6384..29444f38 100644
--- a/freebsd/sys/dev/mii/icsphy.c
+++ b/freebsd/sys/dev/mii/icsphy.c
@@ -147,12 +147,6 @@ icsphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
break;
case MII_MEDIACHG:
- /*
- * If the interface is not up, don't do anything.
- */
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
- break;
-
mii_phy_setmedia(sc);
break;
diff --git a/freebsd/sys/dev/mii/micphy.c b/freebsd/sys/dev/mii/micphy.c
index ab40dab5..acaa8b73 100644
--- a/freebsd/sys/dev/mii/micphy.c
+++ b/freebsd/sys/dev/mii/micphy.c
@@ -71,6 +71,14 @@ __FBSDID("$FreeBSD$");
#define MII_KSZPHY_CLK_CONTROL_PAD_SKEW 0x104
#define MII_KSZPHY_RX_DATA_PAD_SKEW 0x105
#define MII_KSZPHY_TX_DATA_PAD_SKEW 0x106
+/* KSZ9031 */
+#define MII_KSZ9031_MMD_ACCESS_CTRL 0x0d
+#define MII_KSZ9031_MMD_ACCESS_DATA 0x0e
+#define MII_KSZ9031_MMD_DATA_NOINC (1 << 14)
+#define MII_KSZ9031_CONTROL_PAD_SKEW 0x4
+#define MII_KSZ9031_RX_DATA_PAD_SKEW 0x5
+#define MII_KSZ9031_TX_DATA_PAD_SKEW 0x6
+#define MII_KSZ9031_CLOCK_PAD_SKEW 0x8
#define PS_TO_REG(p) ((p) / 200)
@@ -99,6 +107,7 @@ DRIVER_MODULE(micphy, miibus, micphy_driver, micphy_devclass, 0, 0);
static const struct mii_phydesc micphys[] = {
MII_PHY_DESC(MICREL, KSZ9021),
+ MII_PHY_DESC(MICREL, KSZ9031),
MII_PHY_END
};
@@ -108,8 +117,50 @@ static const struct mii_phy_funcs micphy_funcs = {
mii_phy_reset
};
+#ifndef __rtems__
+static uint32_t
+ksz9031_read(struct mii_softc *sc, uint32_t devaddr, uint32_t reg)
+{
+ /* Set up device address and register. */
+ PHY_WRITE(sc, MII_KSZ9031_MMD_ACCESS_CTRL, devaddr);
+ PHY_WRITE(sc, MII_KSZ9031_MMD_ACCESS_DATA, reg);
+
+ /* Select register data for MMD and read the value. */
+ PHY_WRITE(sc, MII_KSZ9031_MMD_ACCESS_CTRL,
+ MII_KSZ9031_MMD_DATA_NOINC | devaddr);
+
+ return (PHY_READ(sc, MII_KSZ9031_MMD_ACCESS_DATA));
+}
+#endif /* __rtems__ */
+
static void
-micphy_write(struct mii_softc *sc, uint32_t reg, uint32_t val)
+ksz9031_write(struct mii_softc *sc, uint32_t devaddr, uint32_t reg,
+ uint32_t val)
+{
+
+ /* Set up device address and register. */
+ PHY_WRITE(sc, MII_KSZ9031_MMD_ACCESS_CTRL, devaddr);
+ PHY_WRITE(sc, MII_KSZ9031_MMD_ACCESS_DATA, reg);
+
+ /* Select register data for MMD and write the value. */
+ PHY_WRITE(sc, MII_KSZ9031_MMD_ACCESS_CTRL,
+ MII_KSZ9031_MMD_DATA_NOINC | devaddr);
+ PHY_WRITE(sc, MII_KSZ9031_MMD_ACCESS_DATA, val);
+}
+
+#ifndef __rtems__
+static uint32_t
+ksz9021_read(struct mii_softc *sc, uint32_t reg)
+{
+
+ PHY_WRITE(sc, MII_KSZPHY_EXTREG, reg);
+
+ return (PHY_READ(sc, MII_KSZPHY_EXTREG_READ));
+}
+#endif /* __rtems__ */
+
+static void
+ksz9021_write(struct mii_softc *sc, uint32_t reg, uint32_t val)
{
PHY_WRITE(sc, MII_KSZPHY_EXTREG, KSZPHY_EXTREG_WRITE | reg);
@@ -117,40 +168,82 @@ micphy_write(struct mii_softc *sc, uint32_t reg, uint32_t val)
}
#ifndef __rtems__
-static int
-ksz9021_load_values(struct mii_softc *sc, phandle_t node, uint32_t reg,
- char *field1, char *field2,
- char *field3, char *field4)
+static void
+ksz90x1_load_values(struct mii_softc *sc, phandle_t node,
+ uint32_t dev, uint32_t reg, char *field1, uint32_t f1mask, int f1off,
+ char *field2, uint32_t f2mask, int f2off, char *field3, uint32_t f3mask,
+ int f3off, char *field4, uint32_t f4mask, int f4off)
{
pcell_t dts_value[1];
int len;
int val;
- val = 0;
+ if (sc->mii_mpd_model == MII_MODEL_MICREL_KSZ9031)
+ val = ksz9031_read(sc, dev, reg);
+ else
+ val = ksz9021_read(sc, reg);
if ((len = OF_getproplen(node, field1)) > 0) {
OF_getencprop(node, field1, dts_value, len);
- val = PS_TO_REG(dts_value[0]);
+ val &= ~(f1mask << f1off);
+ val |= (PS_TO_REG(dts_value[0]) & f1mask) << f1off;
}
- if ((len = OF_getproplen(node, field2)) > 0) {
+ if (field2 != NULL && (len = OF_getproplen(node, field2)) > 0) {
OF_getencprop(node, field2, dts_value, len);
- val |= PS_TO_REG(dts_value[0]) << 4;
+ val &= ~(f2mask << f2off);
+ val |= (PS_TO_REG(dts_value[0]) & f2mask) << f2off;
}
- if ((len = OF_getproplen(node, field3)) > 0) {
+ if (field3 != NULL && (len = OF_getproplen(node, field3)) > 0) {
OF_getencprop(node, field3, dts_value, len);
- val |= PS_TO_REG(dts_value[0]) << 8;
+ val &= ~(f3mask << f3off);
+ val |= (PS_TO_REG(dts_value[0]) & f3mask) << f3off;
}
- if ((len = OF_getproplen(node, field4)) > 0) {
+ if (field4 != NULL && (len = OF_getproplen(node, field4)) > 0) {
OF_getencprop(node, field4, dts_value, len);
- val |= PS_TO_REG(dts_value[0]) << 12;
+ val &= ~(f4mask << f4off);
+ val |= (PS_TO_REG(dts_value[0]) & f4mask) << f4off;
}
- micphy_write(sc, reg, val);
+ if (sc->mii_mpd_model == MII_MODEL_MICREL_KSZ9031)
+ ksz9031_write(sc, dev, reg, val);
+ else
+ ksz9021_write(sc, reg, val);
+}
- return (0);
+static void
+ksz9031_load_values(struct mii_softc *sc, phandle_t node)
+{
+
+ ksz90x1_load_values(sc, node, 2, MII_KSZ9031_CONTROL_PAD_SKEW,
+ "txen-skew-ps", 0xf, 0, "rxdv-skew-ps", 0xf, 4,
+ NULL, 0, 0, NULL, 0, 0);
+ ksz90x1_load_values(sc, node, 2, MII_KSZ9031_RX_DATA_PAD_SKEW,
+ "rxd0-skew-ps", 0xf, 0, "rxd1-skew-ps", 0xf, 4,
+ "rxd2-skew-ps", 0xf, 8, "rxd3-skew-ps", 0xf, 12);
+ ksz90x1_load_values(sc, node, 2, MII_KSZ9031_TX_DATA_PAD_SKEW,
+ "txd0-skew-ps", 0xf, 0, "txd1-skew-ps", 0xf, 4,
+ "txd2-skew-ps", 0xf, 8, "txd3-skew-ps", 0xf, 12);
+ ksz90x1_load_values(sc, node, 2, MII_KSZ9031_CLOCK_PAD_SKEW,
+ "rxc-skew-ps", 0x1f, 0, "txc-skew-ps", 0x1f, 5,
+ NULL, 0, 0, NULL, 0, 0);
+}
+
+static void
+ksz9021_load_values(struct mii_softc *sc, phandle_t node)
+{
+
+ ksz90x1_load_values(sc, node, 0, MII_KSZPHY_CLK_CONTROL_PAD_SKEW,
+ "txen-skew-ps", 0xf, 0, "txc-skew-ps", 0xf, 4,
+ "rxdv-skew-ps", 0xf, 8, "rxc-skew-ps", 0xf, 12);
+ ksz90x1_load_values(sc, node, 0, MII_KSZPHY_RX_DATA_PAD_SKEW,
+ "rxd0-skew-ps", 0xf, 0, "rxd1-skew-ps", 0xf, 4,
+ "rxd2-skew-ps", 0xf, 8, "rxd3-skew-ps", 0xf, 12);
+ ksz90x1_load_values(sc, node, 0, MII_KSZPHY_TX_DATA_PAD_SKEW,
+ "txd0-skew-ps", 0xf, 0, "txd1-skew-ps", 0xf, 4,
+ "txd2-skew-ps", 0xf, 8, "txd3-skew-ps", 0xf, 12);
}
#endif /* __rtems__ */
@@ -183,22 +276,19 @@ micphy_attach(device_t dev)
if ((node = ofw_bus_get_node(parent)) == -1)
return (ENXIO);
- ksz9021_load_values(sc, node, MII_KSZPHY_CLK_CONTROL_PAD_SKEW,
- "txen-skew-ps", "txc-skew-ps",
- "rxdv-skew-ps", "rxc-skew-ps");
-
- ksz9021_load_values(sc, node, MII_KSZPHY_RX_DATA_PAD_SKEW,
- "rxd0-skew-ps", "rxd1-skew-ps",
- "rxd2-skew-ps", "rxd3-skew-ps");
-
- ksz9021_load_values(sc, node, MII_KSZPHY_TX_DATA_PAD_SKEW,
- "txd0-skew-ps", "txd1-skew-ps",
- "txd2-skew-ps", "txd3-skew-ps");
+ if (sc->mii_mpd_model == MII_MODEL_MICREL_KSZ9031)
+ ksz9031_load_values(sc, node);
+ else
+ ksz9021_load_values(sc, node);
#else /* __rtems__ */
/* FIXME */
- micphy_write(sc, MII_KSZPHY_CLK_CONTROL_PAD_SKEW, 0xf0f0);
- micphy_write(sc, MII_KSZPHY_RX_DATA_PAD_SKEW, 0x0000);
- micphy_write(sc, MII_KSZPHY_TX_DATA_PAD_SKEW, 0x0000);
+ if (sc->mii_mpd_model == MII_MODEL_MICREL_KSZ9031) {
+ BSD_ASSERT(0);
+ } else {
+ ksz9021_write(sc, MII_KSZPHY_CLK_CONTROL_PAD_SKEW, 0xf0f0);
+ ksz9021_write(sc, MII_KSZPHY_RX_DATA_PAD_SKEW, 0x0000);
+ ksz9021_write(sc, MII_KSZPHY_TX_DATA_PAD_SKEW, 0x0000);
+ }
#endif /* __rtems__ */
return (0);
diff --git a/freebsd/sys/dev/mii/mii.c b/freebsd/sys/dev/mii/mii.c
index d1f55cb0..1f0ead72 100644
--- a/freebsd/sys/dev/mii/mii.c
+++ b/freebsd/sys/dev/mii/mii.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_media.h>
#include <dev/mii/mii.h>
@@ -107,7 +108,7 @@ driver_t miibus_driver = {
};
struct miibus_ivars {
- struct ifnet *ifp;
+ if_t ifp;
ifm_change_cb_t ifmedia_upd;
ifm_stat_cb_t ifmedia_sts;
u_int mii_flags;
@@ -148,8 +149,8 @@ miibus_attach(device_t dev)
ifmedia_init(&mii->mii_media, IFM_IMASK, ivars->ifmedia_upd,
ivars->ifmedia_sts);
mii->mii_ifp = ivars->ifp;
- mii->mii_ifp->if_capabilities |= IFCAP_LINKSTATE;
- mii->mii_ifp->if_capenable |= IFCAP_LINKSTATE;
+ if_setcapabilitiesbit(mii->mii_ifp, IFCAP_LINKSTATE, 0);
+ if_setcapenablebit(mii->mii_ifp, IFCAP_LINKSTATE, 0);
LIST_INIT(&mii->mii_phys);
return (bus_generic_attach(dev));
@@ -309,7 +310,7 @@ miibus_statchg(device_t dev)
MIIBUS_STATCHG(parent);
mii = device_get_softc(dev);
- mii->mii_ifp->if_baudrate = ifmedia_baudrate(mii->mii_media_active);
+ if_setbaudrate(mii->mii_ifp, ifmedia_baudrate(mii->mii_media_active));
}
static void
@@ -359,7 +360,7 @@ miibus_mediainit(device_t dev)
* the PHYs to the network interface driver parent.
*/
int
-mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp,
+mii_attach(device_t dev, device_t *miibus, if_t ifp,
ifm_change_cb_t ifmedia_upd, ifm_stat_cb_t ifmedia_sts, int capmask,
int phyloc, int offloc, int flags)
{
@@ -375,7 +376,7 @@ mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp,
}
if (offloc != MII_OFFSET_ANY && (offloc < 0 || offloc >= MII_NPHY)) {
- printf("%s: ivalid offloc %d\n", __func__, offloc);
+ printf("%s: invalid offloc %d\n", __func__, offloc);
return (EINVAL);
}
@@ -384,7 +385,7 @@ mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp,
phymax = MII_NPHY - 1;
} else {
if (phyloc < 0 || phyloc >= MII_NPHY) {
- printf("%s: ivalid phyloc %d\n", __func__, phyloc);
+ printf("%s: invalid phyloc %d\n", __func__, phyloc);
return (EINVAL);
}
phymin = phymax = phyloc;
@@ -613,18 +614,6 @@ mii_pollstat(struct mii_data *mii)
}
}
-/*
- * Inform the PHYs that the interface is down.
- */
-void
-mii_down(struct mii_data *mii)
-{
- struct mii_softc *child;
-
- LIST_FOREACH(child, &mii->mii_phys, mii_list)
- mii_phy_down(child);
-}
-
static unsigned char
mii_bitreverse(unsigned char x)
{
@@ -646,3 +635,33 @@ mii_oui(u_int id1, u_int id2)
(mii_bitreverse((h >> 8) & 0xff) << 8) |
mii_bitreverse(h & 0xff));
}
+
+int
+mii_phy_mac_match(struct mii_softc *mii, const char *name)
+{
+
+ return (strcmp(device_get_name(device_get_parent(mii->mii_dev)),
+ name) == 0);
+}
+
+int
+mii_dev_mac_match(device_t parent, const char *name)
+{
+
+ return (strcmp(device_get_name(device_get_parent(
+ device_get_parent(parent))), name) == 0);
+}
+
+void *
+mii_phy_mac_softc(struct mii_softc *mii)
+{
+
+ return (device_get_softc(device_get_parent(mii->mii_dev)));
+}
+
+void *
+mii_dev_mac_softc(device_t parent)
+{
+
+ return (device_get_softc(device_get_parent(device_get_parent(parent))));
+}
diff --git a/freebsd/sys/dev/mii/mii.h b/freebsd/sys/dev/mii/mii.h
index 668fb8fb..fa1ec84e 100644
--- a/freebsd/sys/dev/mii/mii.h
+++ b/freebsd/sys/dev/mii/mii.h
@@ -1,4 +1,4 @@
-/* $NetBSD: mii.h,v 1.9 2001/05/31 03:07:14 thorpej Exp $ */
+/* $NetBSD: mii.h,v 1.18 2014/06/16 14:43:22 msaitoh Exp $ */
/*-
* Copyright (c) 1997 Manuel Bouyer. All rights reserved.
@@ -87,7 +87,7 @@
/*
* Note that the EXTSTAT bit indicates that there is extended status
* info available in register 15, but 802.3 section 22.2.4.3 also
- * states that that all 1000 Mb/s capable PHYs will set this bit to 1.
+ * states that all 1000 Mb/s capable PHYs will set this bit to 1.
*/
#define BMSR_MEDIAMASK (BMSR_100T4|BMSR_100TXFDX|BMSR_100TXHDX| \
@@ -111,6 +111,7 @@
#define ANAR_NP 0x8000 /* Next page (ro) */
#define ANAR_ACK 0x4000 /* link partner abilities acknowledged (ro) */
#define ANAR_RF 0x2000 /* remote fault (ro) */
+ /* Annex 28B.2 */
#define ANAR_FC 0x0400 /* local device supports PAUSE */
#define ANAR_T4 0x0200 /* local device supports 100bT4 */
#define ANAR_TX_FD 0x0100 /* local device supports 100bTx FD */
@@ -123,6 +124,7 @@
#define ANAR_PAUSE_ASYM (2 << 10)
#define ANAR_PAUSE_TOWARDS (3 << 10)
+ /* Annex 28D */
#define ANAR_X_FD 0x0020 /* local device supports 1000BASE-X FD */
#define ANAR_X_HD 0x0040 /* local device supports 1000BASE-X HD */
#define ANAR_X_PAUSE_NONE (0 << 7)
@@ -184,12 +186,47 @@
#define GTSR_MAN_MS_FLT 0x8000 /* master/slave config fault */
#define GTSR_MS_RES 0x4000 /* result: 1 = master, 0 = slave */
#define GTSR_LRS 0x2000 /* local rx status, 1 = ok */
-#define GTSR_RRS 0x1000 /* remove rx status, 1 = ok */
+#define GTSR_RRS 0x1000 /* remote rx status, 1 = ok */
#define GTSR_LP_1000TFDX 0x0800 /* link partner 1000baseT FDX capable */
#define GTSR_LP_1000THDX 0x0400 /* link partner 1000baseT HDX capable */
#define GTSR_LP_ASM_DIR 0x0200 /* link partner asym. pause dir. capable */
#define GTSR_IDLE_ERR 0x00ff /* IDLE error count */
+#define MII_PSECR 0x0b /* PSE control register */
+#define PSECR_PACTLMASK 0x000c /* pair control mask */
+#define PSECR_PSEENMASK 0x0003 /* PSE enable mask */
+#define PSECR_PINOUTB 0x0008 /* PSE pinout Alternative B */
+#define PSECR_PINOUTA 0x0004 /* PSE pinout Alternative A */
+#define PSECR_FOPOWTST 0x0002 /* Force Power Test Mode */
+#define PSECR_PSEEN 0x0001 /* PSE Enabled */
+#define PSECR_PSEDIS 0x0000 /* PSE Disabled */
+
+#define MII_PSESR 0x0c /* PSE status register */
+#define PSESR_PWRDENIED 0x1000 /* Power Denied */
+#define PSESR_VALSIG 0x0800 /* Valid PD signature detected */
+#define PSESR_INVALSIG 0x0400 /* Invalid PD signature detected */
+#define PSESR_SHORTCIRC 0x0200 /* Short circuit condition detected */
+#define PSESR_OVERLOAD 0x0100 /* Overload condition detected */
+#define PSESR_MPSABSENT 0x0080 /* MPS absent condition detected */
+#define PSESR_PDCLMASK 0x0070 /* PD Class mask */
+#define PSESR_STATMASK 0x000e /* PSE Status mask */
+#define PSESR_PAIRCTABL 0x0001 /* PAIR Control Ability */
+#define PSESR_PDCL_4 (4 << 4) /* Class 4 */
+#define PSESR_PDCL_3 (3 << 4) /* Class 3 */
+#define PSESR_PDCL_2 (2 << 4) /* Class 2 */
+#define PSESR_PDCL_1 (1 << 4) /* Class 1 */
+#define PSESR_PDCL_0 (0 << 4) /* Class 0 */
+
+#define MII_MMDACR 0x0d /* MMD access control register */
+#define MMDACR_FUNCMASK 0xc000 /* function */
+#define MMDACR_DADDRMASK 0x001f /* device address */
+#define MMDACR_FN_ADDRESS (0 << 14) /* address */
+#define MMDACR_FN_DATANPI (1 << 14) /* data, no post increment */
+#define MMDACR_FN_DATAPIRW (2 << 14) /* data, post increment on r/w */
+#define MMDACR_FN_DATAPIW (3 << 14) /* data, post increment on wr only */
+
+#define MII_MMDAADR 0x0e /* MMD access address data register */
+
#define MII_EXTSR 0x0f /* Extended status register */
#define EXTSR_1000XFDX 0x8000 /* 1000X full-duplex capable */
#define EXTSR_1000XHDX 0x4000 /* 1000X half-duplex capable */
diff --git a/freebsd/sys/dev/mii/mii_physubr.c b/freebsd/sys/dev/mii/mii_physubr.c
index e2725ba6..e03d153a 100644
--- a/freebsd/sys/dev/mii/mii_physubr.c
+++ b/freebsd/sys/dev/mii/mii_physubr.c
@@ -56,9 +56,28 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/miibus_if.h>
/*
- * Media to register setting conversion table. Order matters.
+ *
+ * An array of structures to map MII media types to BMCR/ANAR settings.
*/
-static const struct mii_media mii_media_table[MII_NMEDIA] = {
+enum {
+ MII_MEDIA_NONE = 0,
+ MII_MEDIA_10_T,
+ MII_MEDIA_10_T_FDX,
+ MII_MEDIA_100_T4,
+ MII_MEDIA_100_TX,
+ MII_MEDIA_100_TX_FDX,
+ MII_MEDIA_1000_X,
+ MII_MEDIA_1000_X_FDX,
+ MII_MEDIA_1000_T,
+ MII_MEDIA_1000_T_FDX,
+ MII_NMEDIA,
+};
+
+static const struct mii_media {
+ u_int mm_bmcr; /* BMCR settings for this media */
+ u_int mm_anar; /* ANAR settings for this media */
+ u_int mm_gtcr; /* 100base-T2 or 1000base-T CR */
+} mii_media_table[MII_NMEDIA] = {
/* None */
{ BMCR_ISO, ANAR_CSMA,
0, },
@@ -106,8 +125,10 @@ mii_phy_setmedia(struct mii_softc *sc)
struct mii_data *mii = sc->mii_pdata;
struct ifmedia_entry *ife = mii->mii_media.ifm_cur;
int bmcr, anar, gtcr;
+ int index = -1;
- if (IFM_SUBTYPE(ife->ifm_media) == IFM_AUTO) {
+ switch (IFM_SUBTYPE(ife->ifm_media)) {
+ case IFM_AUTO:
/*
* Force renegotiation if MIIF_DOPAUSE or MIIF_FORCEANEG.
* The former is necessary as we might switch from flow-
@@ -117,19 +138,78 @@ mii_phy_setmedia(struct mii_softc *sc)
(sc->mii_flags & (MIIF_DOPAUSE | MIIF_FORCEANEG)) != 0)
(void)mii_phy_auto(sc);
return;
- }
- /*
- * Table index is stored in the media entry.
- */
+ case IFM_NONE:
+ index = MII_MEDIA_NONE;
+ break;
+
+ case IFM_HPNA_1:
+ index = MII_MEDIA_10_T;
+ break;
+
+ case IFM_10_T:
+ switch (IFM_OPTIONS(ife->ifm_media)) {
+ case 0:
+ index = MII_MEDIA_10_T;
+ break;
+ case IFM_FDX:
+ case (IFM_FDX | IFM_FLOW):
+ index = MII_MEDIA_10_T_FDX;
+ break;
+ }
+ break;
+
+ case IFM_100_TX:
+ case IFM_100_FX:
+ switch (IFM_OPTIONS(ife->ifm_media)) {
+ case 0:
+ index = MII_MEDIA_100_TX;
+ break;
+ case IFM_FDX:
+ case (IFM_FDX | IFM_FLOW):
+ index = MII_MEDIA_100_TX_FDX;
+ break;
+ }
+ break;
+
+ case IFM_100_T4:
+ index = MII_MEDIA_100_T4;
+ break;
- KASSERT(ife->ifm_data >=0 && ife->ifm_data < MII_NMEDIA,
- ("invalid ife->ifm_data (0x%x) in mii_phy_setmedia",
- ife->ifm_data));
+ case IFM_1000_SX:
+ switch (IFM_OPTIONS(ife->ifm_media)) {
+ case 0:
+ index = MII_MEDIA_1000_X;
+ break;
+ case IFM_FDX:
+ case (IFM_FDX | IFM_FLOW):
+ index = MII_MEDIA_1000_X_FDX;
+ break;
+ }
+ break;
+
+ case IFM_1000_T:
+ switch (IFM_OPTIONS(ife->ifm_media)) {
+ case 0:
+ case IFM_ETH_MASTER:
+ index = MII_MEDIA_1000_T;
+ break;
+ case IFM_FDX:
+ case (IFM_FDX | IFM_ETH_MASTER):
+ case (IFM_FDX | IFM_FLOW):
+ case (IFM_FDX | IFM_FLOW | IFM_ETH_MASTER):
+ index = MII_MEDIA_1000_T_FDX;
+ break;
+ }
+ break;
+ }
- anar = mii_media_table[ife->ifm_data].mm_anar;
- bmcr = mii_media_table[ife->ifm_data].mm_bmcr;
- gtcr = mii_media_table[ife->ifm_data].mm_gtcr;
+ KASSERT(index != -1, ("%s: failed to map media word %d",
+ __func__, ife->ifm_media));
+
+ anar = mii_media_table[index].mm_anar;
+ bmcr = mii_media_table[index].mm_bmcr;
+ gtcr = mii_media_table[index].mm_gtcr;
if (IFM_SUBTYPE(ife->ifm_media) == IFM_1000_T) {
gtcr |= GTCR_MAN_MS;
@@ -211,13 +291,8 @@ int
mii_phy_tick(struct mii_softc *sc)
{
struct ifmedia_entry *ife = sc->mii_pdata->mii_media.ifm_cur;
- struct ifnet *ifp = sc->mii_pdata->mii_ifp;
int reg;
- /* Just bail now if the interface is down. */
- if ((ifp->if_flags & IFF_UP) == 0)
- return (EJUSTRETURN);
-
/*
* If we're not doing autonegotiation, we don't need to do
* any extra work here. However, we need to check the link
@@ -286,12 +361,6 @@ mii_phy_reset(struct mii_softc *sc)
}
void
-mii_phy_down(struct mii_softc *sc)
-{
-
-}
-
-void
mii_phy_update(struct mii_softc *sc, int cmd)
{
struct mii_data *mii = sc->mii_pdata;
@@ -331,12 +400,11 @@ mii_phy_add_media(struct mii_softc *sc)
*/
sc->mii_anegticks = MII_ANEGTICKS;
-#define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL)
+#define ADD(m) ifmedia_add(&mii->mii_media, (m), 0, NULL)
#define PRINT(s) printf("%s%s", sep, s); sep = ", "
if ((sc->mii_flags & MIIF_NOISOLATE) == 0) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_NONE, 0, sc->mii_inst),
- MII_MEDIA_NONE);
+ ADD(IFM_MAKEWORD(IFM_ETHER, IFM_NONE, 0, sc->mii_inst));
PRINT("none");
}
@@ -348,51 +416,44 @@ mii_phy_add_media(struct mii_softc *sc)
if ((sc->mii_flags & MIIF_IS_HPNA) != 0) {
if ((sc->mii_capabilities & BMSR_10THDX) != 0) {
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_HPNA_1, 0,
- sc->mii_inst), MII_MEDIA_10_T);
+ sc->mii_inst));
PRINT("HomePNA1");
}
return;
}
if ((sc->mii_capabilities & BMSR_10THDX) != 0) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, 0, sc->mii_inst),
- MII_MEDIA_10_T);
+ ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, 0, sc->mii_inst));
PRINT("10baseT");
}
if ((sc->mii_capabilities & BMSR_10TFDX) != 0) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, IFM_FDX, sc->mii_inst),
- MII_MEDIA_10_T_FDX);
+ ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T, IFM_FDX, sc->mii_inst));
PRINT("10baseT-FDX");
if ((sc->mii_flags & MIIF_DOPAUSE) != 0 &&
(sc->mii_flags & MIIF_NOMANPAUSE) == 0) {
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_10_T,
- IFM_FDX | IFM_FLOW, sc->mii_inst),
- MII_MEDIA_10_T_FDX);
+ IFM_FDX | IFM_FLOW, sc->mii_inst));
PRINT("10baseT-FDX-flow");
}
fdx = 1;
}
if ((sc->mii_capabilities & BMSR_100TXHDX) != 0) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, 0, sc->mii_inst),
- MII_MEDIA_100_TX);
+ ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, 0, sc->mii_inst));
PRINT("100baseTX");
}
if ((sc->mii_capabilities & BMSR_100TXFDX) != 0) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_FDX, sc->mii_inst),
- MII_MEDIA_100_TX_FDX);
+ ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_FDX, sc->mii_inst));
PRINT("100baseTX-FDX");
if ((sc->mii_flags & MIIF_DOPAUSE) != 0 &&
(sc->mii_flags & MIIF_NOMANPAUSE) == 0) {
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX,
- IFM_FDX | IFM_FLOW, sc->mii_inst),
- MII_MEDIA_100_TX_FDX);
+ IFM_FDX | IFM_FLOW, sc->mii_inst));
PRINT("100baseTX-FDX-flow");
}
fdx = 1;
}
if ((sc->mii_capabilities & BMSR_100T4) != 0) {
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_T4, 0, sc->mii_inst),
- MII_MEDIA_100_T4);
+ ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_T4, 0, sc->mii_inst));
PRINT("100baseT4");
}
@@ -405,20 +466,19 @@ mii_phy_add_media(struct mii_softc *sc)
sc->mii_anegticks = MII_ANEGTICKS_GIGE;
sc->mii_flags |= MIIF_IS_1000X;
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_SX, 0,
- sc->mii_inst), MII_MEDIA_1000_X);
+ sc->mii_inst));
PRINT("1000baseSX");
}
if ((sc->mii_extcapabilities & EXTSR_1000XFDX) != 0) {
sc->mii_anegticks = MII_ANEGTICKS_GIGE;
sc->mii_flags |= MIIF_IS_1000X;
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_SX, IFM_FDX,
- sc->mii_inst), MII_MEDIA_1000_X_FDX);
+ sc->mii_inst));
PRINT("1000baseSX-FDX");
if ((sc->mii_flags & MIIF_DOPAUSE) != 0 &&
(sc->mii_flags & MIIF_NOMANPAUSE) == 0) {
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_SX,
- IFM_FDX | IFM_FLOW, sc->mii_inst),
- MII_MEDIA_1000_X_FDX);
+ IFM_FDX | IFM_FLOW, sc->mii_inst));
PRINT("1000baseSX-FDX-flow");
}
fdx = 1;
@@ -434,31 +494,29 @@ mii_phy_add_media(struct mii_softc *sc)
sc->mii_anegticks = MII_ANEGTICKS_GIGE;
sc->mii_flags |= MIIF_HAVE_GTCR;
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_T, 0,
- sc->mii_inst), MII_MEDIA_1000_T);
+ sc->mii_inst));
PRINT("1000baseT");
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_T,
- IFM_ETH_MASTER, sc->mii_inst), MII_MEDIA_1000_T);
+ IFM_ETH_MASTER, sc->mii_inst));
PRINT("1000baseT-master");
}
if ((sc->mii_extcapabilities & EXTSR_1000TFDX) != 0) {
sc->mii_anegticks = MII_ANEGTICKS_GIGE;
sc->mii_flags |= MIIF_HAVE_GTCR;
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_T, IFM_FDX,
- sc->mii_inst), MII_MEDIA_1000_T_FDX);
+ sc->mii_inst));
PRINT("1000baseT-FDX");
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_T,
- IFM_FDX | IFM_ETH_MASTER, sc->mii_inst),
- MII_MEDIA_1000_T_FDX);
+ IFM_FDX | IFM_ETH_MASTER, sc->mii_inst));
PRINT("1000baseT-FDX-master");
if ((sc->mii_flags & MIIF_DOPAUSE) != 0 &&
(sc->mii_flags & MIIF_NOMANPAUSE) == 0) {
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_T,
- IFM_FDX | IFM_FLOW, sc->mii_inst),
- MII_MEDIA_1000_T_FDX);
+ IFM_FDX | IFM_FLOW, sc->mii_inst));
PRINT("1000baseT-FDX-flow");
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_1000_T,
IFM_FDX | IFM_FLOW | IFM_ETH_MASTER,
- sc->mii_inst), MII_MEDIA_1000_T_FDX);
+ sc->mii_inst));
PRINT("1000baseT-FDX-flow-master");
}
fdx = 1;
@@ -467,12 +525,11 @@ mii_phy_add_media(struct mii_softc *sc)
if ((sc->mii_capabilities & BMSR_ANEG) != 0) {
/* intentionally invalid index */
- ADD(IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, 0, sc->mii_inst),
- MII_NMEDIA);
+ ADD(IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, 0, sc->mii_inst));
PRINT("auto");
if (fdx != 0 && (sc->mii_flags & MIIF_DOPAUSE) != 0) {
ADD(IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, IFM_FLOW,
- sc->mii_inst), MII_NMEDIA);
+ sc->mii_inst));
PRINT("auto-flow");
}
}
@@ -486,7 +543,6 @@ mii_phy_detach(device_t dev)
struct mii_softc *sc;
sc = device_get_softc(dev);
- mii_phy_down(sc);
sc->mii_dev = NULL;
LIST_REMOVE(sc, mii_list);
return (0);
diff --git a/freebsd/sys/dev/mii/miivar.h b/freebsd/sys/dev/mii/miivar.h
index 34b0e9ed..498e7204 100644
--- a/freebsd/sys/dev/mii/miivar.h
+++ b/freebsd/sys/dev/mii/miivar.h
@@ -36,6 +36,7 @@
#define _DEV_MII_MIIVAR_H_
#include <sys/queue.h>
+#include <net/if_var.h> /* XXX driver API temporary */
/*
* Media Independent Interface data structure defintions
@@ -44,20 +45,13 @@
struct mii_softc;
/*
- * Callbacks from MII layer into network interface device driver.
- */
-typedef int (*mii_readreg_t)(struct device *, int, int);
-typedef void (*mii_writereg_t)(struct device *, int, int, int);
-typedef void (*mii_statchg_t)(struct device *);
-
-/*
* A network interface driver has one of these structures in its softc.
* It is the interface from the network interface driver to the MII
* layer.
*/
struct mii_data {
struct ifmedia mii_media; /* media information */
- struct ifnet *mii_ifp; /* pointer back to network interface */
+ if_t mii_ifp; /* pointer back to network interface */
/*
* For network interfaces with multiple PHYs, a list of all
@@ -72,13 +66,6 @@ struct mii_data {
*/
u_int mii_media_status;
u_int mii_media_active;
-
- /*
- * Calls from MII layer into network interface driver.
- */
- mii_readreg_t mii_readreg;
- mii_writereg_t mii_writereg;
- mii_statchg_t mii_statchg;
};
typedef struct mii_data mii_data_t;
@@ -193,27 +180,6 @@ struct mii_phydesc {
MII_STR_ ## a ## _ ## b }
#define MII_PHY_END { 0, 0, NULL }
-/*
- * An array of these structures map MII media types to BMCR/ANAR settings.
- */
-struct mii_media {
- u_int mm_bmcr; /* BMCR settings for this media */
- u_int mm_anar; /* ANAR settings for this media */
- u_int mm_gtcr; /* 100base-T2 or 1000base-T CR */
-};
-
-#define MII_MEDIA_NONE 0
-#define MII_MEDIA_10_T 1
-#define MII_MEDIA_10_T_FDX 2
-#define MII_MEDIA_100_T4 3
-#define MII_MEDIA_100_TX 4
-#define MII_MEDIA_100_TX_FDX 5
-#define MII_MEDIA_1000_X 6
-#define MII_MEDIA_1000_X_FDX 7
-#define MII_MEDIA_1000_T 8
-#define MII_MEDIA_1000_T_FDX 9
-#define MII_NMEDIA 10
-
#ifdef _KERNEL
#define PHY_READ(p, r) \
@@ -246,9 +212,8 @@ MIIBUS_ACCESSOR(flags, FLAGS, u_int)
extern devclass_t miibus_devclass;
extern driver_t miibus_driver;
-int mii_attach(device_t, device_t *, struct ifnet *, ifm_change_cb_t,
+int mii_attach(device_t, device_t *, if_t, ifm_change_cb_t,
ifm_stat_cb_t, int, int, int, int);
-void mii_down(struct mii_data *);
int mii_mediachg(struct mii_data *);
void mii_tick(struct mii_data *);
void mii_pollstat(struct mii_data *);
@@ -256,12 +221,15 @@ void mii_phy_add_media(struct mii_softc *);
int mii_phy_auto(struct mii_softc *);
int mii_phy_detach(device_t dev);
-void mii_phy_down(struct mii_softc *);
u_int mii_phy_flowstatus(struct mii_softc *);
void mii_phy_reset(struct mii_softc *);
void mii_phy_setmedia(struct mii_softc *sc);
void mii_phy_update(struct mii_softc *, int);
int mii_phy_tick(struct mii_softc *);
+int mii_phy_mac_match(struct mii_softc *, const char *);
+int mii_dev_mac_match(device_t, const char *);
+void *mii_phy_mac_softc(struct mii_softc *);
+void *mii_dev_mac_softc(device_t);
const struct mii_phydesc * mii_phy_match(const struct mii_attach_args *ma,
const struct mii_phydesc *mpd);
diff --git a/freebsd/sys/dev/mii/rgephy.c b/freebsd/sys/dev/mii/rgephy.c
index 8dacb0e8..067bbadf 100644
--- a/freebsd/sys/dev/mii/rgephy.c
+++ b/freebsd/sys/dev/mii/rgephy.c
@@ -44,9 +44,11 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/socket.h>
+#include <sys/taskqueue.h>
#include <sys/bus.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_media.h>
@@ -59,7 +61,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/miibus_if.h>
#include <machine/bus.h>
-#include <pci/if_rlreg.h>
+#include <dev/rl/if_rlreg.h>
static int rgephy_probe(device_t);
static int rgephy_attach(device_t);
@@ -90,6 +92,7 @@ static void rgephy_reset(struct mii_softc *);
static int rgephy_linkup(struct mii_softc *);
static void rgephy_loop(struct mii_softc *);
static void rgephy_load_dspcode(struct mii_softc *);
+static void rgephy_disable_eee(struct mii_softc *);
static const struct mii_phydesc rgephys[] = {
MII_PHY_DESC(REALTEK, RTL8169S),
@@ -114,13 +117,11 @@ static int
rgephy_attach(device_t dev)
{
struct mii_softc *sc;
- struct mii_attach_args *ma;
u_int flags;
sc = device_get_softc(dev);
- ma = device_get_ivars(dev);
flags = 0;
- if (strcmp(ma->mii_data->mii_ifp->if_dname, "re") == 0)
+ if (mii_dev_mac_match(dev, "re"))
flags |= MIIF_PHYPRIV0;
mii_phy_dev_attach(dev, flags, &rgephy_funcs, 0);
@@ -157,12 +158,6 @@ rgephy_service(struct mii_softc *sc, struct mii_data *mii, int cmd)
break;
case MII_MEDIACHG:
- /*
- * If the interface is not up, don't do anything.
- */
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
- break;
-
PHY_RESET(sc); /* XXX hardware bug work-around */
anar = PHY_READ(sc, RGEPHY_MII_ANAR);
@@ -235,12 +230,6 @@ setit:
case MII_TICK:
/*
- * Is the interface even up?
- */
- if ((mii->mii_ifp->if_flags & IFF_UP) == 0)
- return (0);
-
- /*
* Only used for autonegotiation.
*/
if (IFM_SUBTYPE(ife->ifm_media) != IFM_AUTO) {
@@ -531,10 +520,9 @@ rgephy_reset(struct mii_softc *sc)
switch (sc->mii_mpd_rev) {
case RGEPHY_8211F:
pcr = PHY_READ(sc, RGEPHY_F_MII_PCR1);
- if ((pcr & RGEPHY_F_PCR1_MDI_MM) != 0) {
- pcr &= ~RGEPHY_F_PCR1_MDI_MM;
- PHY_WRITE(sc, RGEPHY_F_MII_PCR1, pcr);
- }
+ pcr &= ~(RGEPHY_F_PCR1_MDI_MM | RGEPHY_F_PCR1_ALDPS_EN);
+ PHY_WRITE(sc, RGEPHY_F_MII_PCR1, pcr);
+ rgephy_disable_eee(sc);
break;
case RGEPHY_8211C:
if ((sc->mii_flags & MIIF_PHYPRIV0) == 0) {
@@ -562,3 +550,29 @@ rgephy_reset(struct mii_softc *sc)
DELAY(1000);
rgephy_load_dspcode(sc);
}
+
+static void
+rgephy_disable_eee(struct mii_softc *sc)
+{
+ uint16_t anar;
+
+ PHY_WRITE(sc, RGEPHY_F_EPAGSR, 0x0000);
+ PHY_WRITE(sc, MII_MMDACR, MMDACR_FN_ADDRESS |
+ (MMDACR_DADDRMASK & RGEPHY_F_MMD_DEV_7));
+ PHY_WRITE(sc, MII_MMDAADR, RGEPHY_F_MMD_EEEAR);
+ PHY_WRITE(sc, MII_MMDACR, MMDACR_FN_DATANPI |
+ (MMDACR_DADDRMASK & RGEPHY_F_MMD_DEV_7));
+ PHY_WRITE(sc, MII_MMDAADR, 0x0000);
+ PHY_WRITE(sc, MII_MMDACR, 0x0000);
+ /*
+ * XXX
+ * Restart auto-negotiation to take changes effect.
+ * This may result in link establishment.
+ */
+ anar = BMSR_MEDIA_TO_ANAR(sc->mii_capabilities) | ANAR_CSMA;
+ PHY_WRITE(sc, RGEPHY_MII_ANAR, anar);
+ PHY_WRITE(sc, RGEPHY_MII_1000CTL, RGEPHY_1000CTL_AHD |
+ RGEPHY_1000CTL_AFD);
+ PHY_WRITE(sc, RGEPHY_MII_BMCR, RGEPHY_BMCR_RESET |
+ RGEPHY_BMCR_AUTOEN | RGEPHY_BMCR_STARTNEG);
+}
diff --git a/freebsd/sys/dev/mii/rgephyreg.h b/freebsd/sys/dev/mii/rgephyreg.h
index 2a00517e..7c24a1f7 100644
--- a/freebsd/sys/dev/mii/rgephyreg.h
+++ b/freebsd/sys/dev/mii/rgephyreg.h
@@ -183,4 +183,20 @@
#define RGEPHY_F_SSR_MDI 0x0002 /* MDI/MDIX */
#define RGEPHY_F_SSR_JABBER 0x0001 /* Jabber */
+/* RTL8211F */
+#define RGEPHY_F_EPAGSR 0x1F /* Extension page select register */
+
+/* RTL8211F */
+#define RGEPHY_F_MMD_DEV_7 0x07
+
+/* RTL8211F MMD device 7 */
+#define RGEPHY_F_MMD_EEEAR 0x3C /* EEE advertisement */
+#define EEEAR_1000T 0x0004 /* adv. 1000baseT EEE */
+#define EEEAR_100TX 0x0002 /* adv. 100baseTX EEE */
+
+/* RTL8211F MMD device 7 */
+#define RGEPHY_F_MMD_EEELPAR 0x3D /* EEE link partner abilities */
+#define EEELPAR_1000T 0x0004 /* link partner 1000baseT EEE capable */
+#define EEELPAR_100TX 0x0002 /* link partner 100baseTX EEE capable */
+
#endif /* _DEV_RGEPHY_MIIREG_H_ */
diff --git a/freebsd/sys/dev/mmc/mmcsd.c b/freebsd/sys/dev/mmc/mmcsd.c
index d02abf1c..a39d51fe 100644
--- a/freebsd/sys/dev/mmc/mmcsd.c
+++ b/freebsd/sys/dev/mmc/mmcsd.c
@@ -71,8 +71,8 @@ __FBSDID("$FreeBSD$");
#include <geom/geom_disk.h>
#include <dev/mmc/mmcbrvar.h>
-#include <dev/mmc/mmcvar.h>
#include <dev/mmc/mmcreg.h>
+#include <dev/mmc/mmcvar.h>
#include <rtems/bsd/local/mmcbus_if.h>
#ifdef __rtems__
diff --git a/freebsd/sys/dev/nvme/nvme.h b/freebsd/sys/dev/nvme/nvme.h
new file mode 100644
index 00000000..755e3766
--- /dev/null
+++ b/freebsd/sys/dev/nvme/nvme.h
@@ -0,0 +1,957 @@
+/*-
+ * Copyright (C) 2012-2013 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __NVME_H__
+#define __NVME_H__
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#endif
+
+#include <rtems/bsd/sys/param.h>
+
+#define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command)
+#define NVME_RESET_CONTROLLER _IO('n', 1)
+
+#define NVME_IO_TEST _IOWR('n', 100, struct nvme_io_test)
+#define NVME_BIO_TEST _IOWR('n', 101, struct nvme_io_test)
+
+/*
+ * Use to mark a command to apply to all namespaces, or to retrieve global
+ * log pages.
+ */
+#define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF)
+
+/* Cap nvme to 1MB transfers driver explodes with larger sizes */
+#define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20))
+
+union cap_lo_register {
+ uint32_t raw;
+ struct {
+ /** maximum queue entries supported */
+ uint32_t mqes : 16;
+
+ /** contiguous queues required */
+ uint32_t cqr : 1;
+
+ /** arbitration mechanism supported */
+ uint32_t ams : 2;
+
+ uint32_t reserved1 : 5;
+
+ /** timeout */
+ uint32_t to : 8;
+ } bits __packed;
+} __packed;
+
+union cap_hi_register {
+ uint32_t raw;
+ struct {
+ /** doorbell stride */
+ uint32_t dstrd : 4;
+
+ uint32_t reserved3 : 1;
+
+ /** command sets supported */
+ uint32_t css_nvm : 1;
+
+ uint32_t css_reserved : 3;
+ uint32_t reserved2 : 7;
+
+ /** memory page size minimum */
+ uint32_t mpsmin : 4;
+
+ /** memory page size maximum */
+ uint32_t mpsmax : 4;
+
+ uint32_t reserved1 : 8;
+ } bits __packed;
+} __packed;
+
+union cc_register {
+ uint32_t raw;
+ struct {
+ /** enable */
+ uint32_t en : 1;
+
+ uint32_t reserved1 : 3;
+
+ /** i/o command set selected */
+ uint32_t css : 3;
+
+ /** memory page size */
+ uint32_t mps : 4;
+
+ /** arbitration mechanism selected */
+ uint32_t ams : 3;
+
+ /** shutdown notification */
+ uint32_t shn : 2;
+
+ /** i/o submission queue entry size */
+ uint32_t iosqes : 4;
+
+ /** i/o completion queue entry size */
+ uint32_t iocqes : 4;
+
+ uint32_t reserved2 : 8;
+ } bits __packed;
+} __packed;
+
+enum shn_value {
+ NVME_SHN_NORMAL = 0x1,
+ NVME_SHN_ABRUPT = 0x2,
+};
+
+union csts_register {
+ uint32_t raw;
+ struct {
+ /** ready */
+ uint32_t rdy : 1;
+
+ /** controller fatal status */
+ uint32_t cfs : 1;
+
+ /** shutdown status */
+ uint32_t shst : 2;
+
+ uint32_t reserved1 : 28;
+ } bits __packed;
+} __packed;
+
+enum shst_value {
+ NVME_SHST_NORMAL = 0x0,
+ NVME_SHST_OCCURRING = 0x1,
+ NVME_SHST_COMPLETE = 0x2,
+};
+
+union aqa_register {
+ uint32_t raw;
+ struct {
+ /** admin submission queue size */
+ uint32_t asqs : 12;
+
+ uint32_t reserved1 : 4;
+
+ /** admin completion queue size */
+ uint32_t acqs : 12;
+
+ uint32_t reserved2 : 4;
+ } bits __packed;
+} __packed;
+
+struct nvme_registers
+{
+ /** controller capabilities */
+ union cap_lo_register cap_lo;
+ union cap_hi_register cap_hi;
+
+ uint32_t vs; /* version */
+ uint32_t intms; /* interrupt mask set */
+ uint32_t intmc; /* interrupt mask clear */
+
+ /** controller configuration */
+ union cc_register cc;
+
+ uint32_t reserved1;
+
+ /** controller status */
+ union csts_register csts;
+
+ uint32_t reserved2;
+
+ /** admin queue attributes */
+ union aqa_register aqa;
+
+ uint64_t asq; /* admin submission queue base addr */
+ uint64_t acq; /* admin completion queue base addr */
+ uint32_t reserved3[0x3f2];
+
+ struct {
+ uint32_t sq_tdbl; /* submission queue tail doorbell */
+ uint32_t cq_hdbl; /* completion queue head doorbell */
+ } doorbell[1] __packed;
+} __packed;
+
+struct nvme_command
+{
+ /* dword 0 */
+ uint16_t opc : 8; /* opcode */
+ uint16_t fuse : 2; /* fused operation */
+ uint16_t rsvd1 : 6;
+ uint16_t cid; /* command identifier */
+
+ /* dword 1 */
+ uint32_t nsid; /* namespace identifier */
+
+ /* dword 2-3 */
+ uint32_t rsvd2;
+ uint32_t rsvd3;
+
+ /* dword 4-5 */
+ uint64_t mptr; /* metadata pointer */
+
+ /* dword 6-7 */
+ uint64_t prp1; /* prp entry 1 */
+
+ /* dword 8-9 */
+ uint64_t prp2; /* prp entry 2 */
+
+ /* dword 10-15 */
+ uint32_t cdw10; /* command-specific */
+ uint32_t cdw11; /* command-specific */
+ uint32_t cdw12; /* command-specific */
+ uint32_t cdw13; /* command-specific */
+ uint32_t cdw14; /* command-specific */
+ uint32_t cdw15; /* command-specific */
+} __packed;
+
+struct nvme_status {
+
+ uint16_t p : 1; /* phase tag */
+ uint16_t sc : 8; /* status code */
+ uint16_t sct : 3; /* status code type */
+ uint16_t rsvd2 : 2;
+ uint16_t m : 1; /* more */
+ uint16_t dnr : 1; /* do not retry */
+} __packed;
+
+struct nvme_completion {
+
+ /* dword 0 */
+ uint32_t cdw0; /* command-specific */
+
+ /* dword 1 */
+ uint32_t rsvd1;
+
+ /* dword 2 */
+ uint16_t sqhd; /* submission queue head pointer */
+ uint16_t sqid; /* submission queue identifier */
+
+ /* dword 3 */
+ uint16_t cid; /* command identifier */
+ struct nvme_status status;
+} __packed;
+
+struct nvme_dsm_range {
+
+ uint32_t attributes;
+ uint32_t length;
+ uint64_t starting_lba;
+} __packed;
+
+/* status code types */
+enum nvme_status_code_type {
+ NVME_SCT_GENERIC = 0x0,
+ NVME_SCT_COMMAND_SPECIFIC = 0x1,
+ NVME_SCT_MEDIA_ERROR = 0x2,
+ /* 0x3-0x6 - reserved */
+ NVME_SCT_VENDOR_SPECIFIC = 0x7,
+};
+
+/* generic command status codes */
+enum nvme_generic_command_status_code {
+ NVME_SC_SUCCESS = 0x00,
+ NVME_SC_INVALID_OPCODE = 0x01,
+ NVME_SC_INVALID_FIELD = 0x02,
+ NVME_SC_COMMAND_ID_CONFLICT = 0x03,
+ NVME_SC_DATA_TRANSFER_ERROR = 0x04,
+ NVME_SC_ABORTED_POWER_LOSS = 0x05,
+ NVME_SC_INTERNAL_DEVICE_ERROR = 0x06,
+ NVME_SC_ABORTED_BY_REQUEST = 0x07,
+ NVME_SC_ABORTED_SQ_DELETION = 0x08,
+ NVME_SC_ABORTED_FAILED_FUSED = 0x09,
+ NVME_SC_ABORTED_MISSING_FUSED = 0x0a,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b,
+ NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c,
+
+ NVME_SC_LBA_OUT_OF_RANGE = 0x80,
+ NVME_SC_CAPACITY_EXCEEDED = 0x81,
+ NVME_SC_NAMESPACE_NOT_READY = 0x82,
+};
+
+/* command specific status codes */
+enum nvme_command_specific_status_code {
+ NVME_SC_COMPLETION_QUEUE_INVALID = 0x00,
+ NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01,
+ NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED = 0x02,
+ NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03,
+ /* 0x04 - reserved */
+ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
+ NVME_SC_INVALID_FIRMWARE_SLOT = 0x06,
+ NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07,
+ NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08,
+ NVME_SC_INVALID_LOG_PAGE = 0x09,
+ NVME_SC_INVALID_FORMAT = 0x0a,
+ NVME_SC_FIRMWARE_REQUIRES_RESET = 0x0b,
+
+ NVME_SC_CONFLICTING_ATTRIBUTES = 0x80,
+ NVME_SC_INVALID_PROTECTION_INFO = 0x81,
+ NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE = 0x82,
+};
+
+/* media error status codes */
+enum nvme_media_error_status_code {
+ NVME_SC_WRITE_FAULTS = 0x80,
+ NVME_SC_UNRECOVERED_READ_ERROR = 0x81,
+ NVME_SC_GUARD_CHECK_ERROR = 0x82,
+ NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83,
+ NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84,
+ NVME_SC_COMPARE_FAILURE = 0x85,
+ NVME_SC_ACCESS_DENIED = 0x86,
+};
+
+/* admin opcodes */
+enum nvme_admin_opcode {
+ NVME_OPC_DELETE_IO_SQ = 0x00,
+ NVME_OPC_CREATE_IO_SQ = 0x01,
+ NVME_OPC_GET_LOG_PAGE = 0x02,
+ /* 0x03 - reserved */
+ NVME_OPC_DELETE_IO_CQ = 0x04,
+ NVME_OPC_CREATE_IO_CQ = 0x05,
+ NVME_OPC_IDENTIFY = 0x06,
+ /* 0x07 - reserved */
+ NVME_OPC_ABORT = 0x08,
+ NVME_OPC_SET_FEATURES = 0x09,
+ NVME_OPC_GET_FEATURES = 0x0a,
+ /* 0x0b - reserved */
+ NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c,
+ /* 0x0d-0x0f - reserved */
+ NVME_OPC_FIRMWARE_ACTIVATE = 0x10,
+ NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11,
+
+ NVME_OPC_FORMAT_NVM = 0x80,
+ NVME_OPC_SECURITY_SEND = 0x81,
+ NVME_OPC_SECURITY_RECEIVE = 0x82,
+};
+
+/* nvme nvm opcodes */
+enum nvme_nvm_opcode {
+ NVME_OPC_FLUSH = 0x00,
+ NVME_OPC_WRITE = 0x01,
+ NVME_OPC_READ = 0x02,
+ /* 0x03 - reserved */
+ NVME_OPC_WRITE_UNCORRECTABLE = 0x04,
+ NVME_OPC_COMPARE = 0x05,
+ /* 0x06-0x07 - reserved */
+ NVME_OPC_DATASET_MANAGEMENT = 0x09,
+};
+
+enum nvme_feature {
+ /* 0x00 - reserved */
+ NVME_FEAT_ARBITRATION = 0x01,
+ NVME_FEAT_POWER_MANAGEMENT = 0x02,
+ NVME_FEAT_LBA_RANGE_TYPE = 0x03,
+ NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04,
+ NVME_FEAT_ERROR_RECOVERY = 0x05,
+ NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06,
+ NVME_FEAT_NUMBER_OF_QUEUES = 0x07,
+ NVME_FEAT_INTERRUPT_COALESCING = 0x08,
+ NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
+ NVME_FEAT_WRITE_ATOMICITY = 0x0A,
+ NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B,
+ /* 0x0C-0x7F - reserved */
+ NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80,
+ /* 0x81-0xBF - command set specific (reserved) */
+ /* 0xC0-0xFF - vendor specific */
+};
+
+enum nvme_dsm_attribute {
+ NVME_DSM_ATTR_INTEGRAL_READ = 0x1,
+ NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2,
+ NVME_DSM_ATTR_DEALLOCATE = 0x4,
+};
+
+enum nvme_activate_action {
+ NVME_AA_REPLACE_NO_ACTIVATE = 0x0,
+ NVME_AA_REPLACE_ACTIVATE = 0x1,
+ NVME_AA_ACTIVATE = 0x2,
+};
+
+struct nvme_power_state {
+ /** Maximum Power */
+ uint16_t mp; /* Maximum Power */
+ uint8_t ps_rsvd1;
+ uint8_t mps : 1; /* Max Power Scale */
+ uint8_t nops : 1; /* Non-Operational State */
+ uint8_t ps_rsvd2 : 6;
+ uint32_t enlat; /* Entry Latency */
+ uint32_t exlat; /* Exit Latency */
+ uint8_t rrt : 5; /* Relative Read Throughput */
+ uint8_t ps_rsvd3 : 3;
+ uint8_t rrl : 5; /* Relative Read Latency */
+ uint8_t ps_rsvd4 : 3;
+ uint8_t rwt : 5; /* Relative Write Throughput */
+ uint8_t ps_rsvd5 : 3;
+ uint8_t rwl : 5; /* Relative Write Latency */
+ uint8_t ps_rsvd6 : 3;
+ uint16_t idlp; /* Idle Power */
+ uint8_t ps_rsvd7 : 6;
+ uint8_t ips : 2; /* Idle Power Scale */
+ uint8_t ps_rsvd8;
+ uint16_t actp; /* Active Power */
+ uint8_t apw : 3; /* Active Power Workload */
+ uint8_t ps_rsvd9 : 3;
+ uint8_t aps : 2; /* Active Power Scale */
+ uint8_t ps_rsvd10[9];
+} __packed;
+
+#define NVME_SERIAL_NUMBER_LENGTH 20
+#define NVME_MODEL_NUMBER_LENGTH 40
+#define NVME_FIRMWARE_REVISION_LENGTH 8
+
+struct nvme_controller_data {
+
+ /* bytes 0-255: controller capabilities and features */
+
+ /** pci vendor id */
+ uint16_t vid;
+
+ /** pci subsystem vendor id */
+ uint16_t ssvid;
+
+ /** serial number */
+ uint8_t sn[NVME_SERIAL_NUMBER_LENGTH];
+
+ /** model number */
+ uint8_t mn[NVME_MODEL_NUMBER_LENGTH];
+
+ /** firmware revision */
+ uint8_t fr[NVME_FIRMWARE_REVISION_LENGTH];
+
+ /** recommended arbitration burst */
+ uint8_t rab;
+
+ /** ieee oui identifier */
+ uint8_t ieee[3];
+
+ /** multi-interface capabilities */
+ uint8_t mic;
+
+ /** maximum data transfer size */
+ uint8_t mdts;
+
+ uint8_t reserved1[178];
+
+ /* bytes 256-511: admin command set attributes */
+
+ /** optional admin command support */
+ struct {
+ /* supports security send/receive commands */
+ uint16_t security : 1;
+
+ /* supports format nvm command */
+ uint16_t format : 1;
+
+ /* supports firmware activate/download commands */
+ uint16_t firmware : 1;
+
+ uint16_t oacs_rsvd : 13;
+ } __packed oacs;
+
+ /** abort command limit */
+ uint8_t acl;
+
+ /** asynchronous event request limit */
+ uint8_t aerl;
+
+ /** firmware updates */
+ struct {
+ /* first slot is read-only */
+ uint8_t slot1_ro : 1;
+
+ /* number of firmware slots */
+ uint8_t num_slots : 3;
+
+ uint8_t frmw_rsvd : 4;
+ } __packed frmw;
+
+ /** log page attributes */
+ struct {
+ /* per namespace smart/health log page */
+ uint8_t ns_smart : 1;
+
+ uint8_t lpa_rsvd : 7;
+ } __packed lpa;
+
+ /** error log page entries */
+ uint8_t elpe;
+
+ /** number of power states supported */
+ uint8_t npss;
+
+ /** admin vendor specific command configuration */
+ struct {
+ /* admin vendor specific commands use spec format */
+ uint8_t spec_format : 1;
+
+ uint8_t avscc_rsvd : 7;
+ } __packed avscc;
+
+ uint8_t reserved2[247];
+
+ /* bytes 512-703: nvm command set attributes */
+
+ /** submission queue entry size */
+ struct {
+ uint8_t min : 4;
+ uint8_t max : 4;
+ } __packed sqes;
+
+ /** completion queue entry size */
+ struct {
+ uint8_t min : 4;
+ uint8_t max : 4;
+ } __packed cqes;
+
+ uint8_t reserved3[2];
+
+ /** number of namespaces */
+ uint32_t nn;
+
+ /** optional nvm command support */
+ struct {
+ uint16_t compare : 1;
+ uint16_t write_unc : 1;
+ uint16_t dsm: 1;
+ uint16_t reserved: 13;
+ } __packed oncs;
+
+ /** fused operation support */
+ uint16_t fuses;
+
+ /** format nvm attributes */
+ uint8_t fna;
+
+ /** volatile write cache */
+ struct {
+ uint8_t present : 1;
+ uint8_t reserved : 7;
+ } __packed vwc;
+
+ /* TODO: flesh out remaining nvm command set attributes */
+ uint8_t reserved4[178];
+
+ /* bytes 704-2047: i/o command set attributes */
+ uint8_t reserved5[1344];
+
+ /* bytes 2048-3071: power state descriptors */
+ struct nvme_power_state power_state[32];
+
+ /* bytes 3072-4095: vendor specific */
+ uint8_t vs[1024];
+} __packed __aligned(4);
+
+struct nvme_namespace_data {
+
+ /** namespace size */
+ uint64_t nsze;
+
+ /** namespace capacity */
+ uint64_t ncap;
+
+ /** namespace utilization */
+ uint64_t nuse;
+
+ /** namespace features */
+ struct {
+ /** thin provisioning */
+ uint8_t thin_prov : 1;
+ uint8_t reserved1 : 7;
+ } __packed nsfeat;
+
+ /** number of lba formats */
+ uint8_t nlbaf;
+
+ /** formatted lba size */
+ struct {
+ uint8_t format : 4;
+ uint8_t extended : 1;
+ uint8_t reserved2 : 3;
+ } __packed flbas;
+
+ /** metadata capabilities */
+ struct {
+ /* metadata can be transferred as part of data prp list */
+ uint8_t extended : 1;
+
+ /* metadata can be transferred with separate metadata pointer */
+ uint8_t pointer : 1;
+
+ uint8_t reserved3 : 6;
+ } __packed mc;
+
+ /** end-to-end data protection capabilities */
+ struct {
+ /* protection information type 1 */
+ uint8_t pit1 : 1;
+
+ /* protection information type 2 */
+ uint8_t pit2 : 1;
+
+ /* protection information type 3 */
+ uint8_t pit3 : 1;
+
+ /* first eight bytes of metadata */
+ uint8_t md_start : 1;
+
+ /* last eight bytes of metadata */
+ uint8_t md_end : 1;
+ } __packed dpc;
+
+ /** end-to-end data protection type settings */
+ struct {
+ /* protection information type */
+ uint8_t pit : 3;
+
+ /* 1 == protection info transferred at start of metadata */
+ /* 0 == protection info transferred at end of metadata */
+ uint8_t md_start : 1;
+
+ uint8_t reserved4 : 4;
+ } __packed dps;
+
+ uint8_t reserved5[98];
+
+ /** lba format support */
+ struct {
+ /** metadata size */
+ uint32_t ms : 16;
+
+ /** lba data size */
+ uint32_t lbads : 8;
+
+ /** relative performance */
+ uint32_t rp : 2;
+
+ uint32_t reserved6 : 6;
+ } __packed lbaf[16];
+
+ uint8_t reserved6[192];
+
+ uint8_t vendor_specific[3712];
+} __packed __aligned(4);
+
+enum nvme_log_page {
+
+ /* 0x00 - reserved */
+ NVME_LOG_ERROR = 0x01,
+ NVME_LOG_HEALTH_INFORMATION = 0x02,
+ NVME_LOG_FIRMWARE_SLOT = 0x03,
+ /* 0x04-0x7F - reserved */
+ /* 0x80-0xBF - I/O command set specific */
+ /* 0xC0-0xFF - vendor specific */
+};
+
+struct nvme_error_information_entry {
+
+ uint64_t error_count;
+ uint16_t sqid;
+ uint16_t cid;
+ struct nvme_status status;
+ uint16_t error_location;
+ uint64_t lba;
+ uint32_t nsid;
+ uint8_t vendor_specific;
+ uint8_t reserved[35];
+} __packed __aligned(4);
+
+union nvme_critical_warning_state {
+
+ uint8_t raw;
+
+ struct {
+ uint8_t available_spare : 1;
+ uint8_t temperature : 1;
+ uint8_t device_reliability : 1;
+ uint8_t read_only : 1;
+ uint8_t volatile_memory_backup : 1;
+ uint8_t reserved : 3;
+ } __packed bits;
+} __packed;
+
+struct nvme_health_information_page {
+
+ union nvme_critical_warning_state critical_warning;
+
+ uint16_t temperature;
+ uint8_t available_spare;
+ uint8_t available_spare_threshold;
+ uint8_t percentage_used;
+
+ uint8_t reserved[26];
+
+ /*
+ * Note that the following are 128-bit values, but are
+ * defined as an array of 2 64-bit values.
+ */
+ /* Data Units Read is always in 512-byte units. */
+ uint64_t data_units_read[2];
+ /* Data Units Written is always in 512-byte units. */
+ uint64_t data_units_written[2];
+ /* For NVM command set, this includes Compare commands. */
+ uint64_t host_read_commands[2];
+ uint64_t host_write_commands[2];
+ /* Controller Busy Time is reported in minutes. */
+ uint64_t controller_busy_time[2];
+ uint64_t power_cycles[2];
+ uint64_t power_on_hours[2];
+ uint64_t unsafe_shutdowns[2];
+ uint64_t media_errors[2];
+ uint64_t num_error_info_log_entries[2];
+
+ uint8_t reserved2[320];
+} __packed __aligned(4);
+
+struct nvme_firmware_page {
+
+ struct {
+ uint8_t slot : 3; /* slot for current FW */
+ uint8_t reserved : 5;
+ } __packed afi;
+
+ uint8_t reserved[7];
+ uint64_t revision[7]; /* revisions for 7 slots */
+ uint8_t reserved2[448];
+} __packed __aligned(4);
+
+#define NVME_TEST_MAX_THREADS 128
+
+struct nvme_io_test {
+
+ enum nvme_nvm_opcode opc;
+ uint32_t size;
+ uint32_t time; /* in seconds */
+ uint32_t num_threads;
+ uint32_t flags;
+ uint64_t io_completed[NVME_TEST_MAX_THREADS];
+};
+
+enum nvme_io_test_flags {
+
+ /*
+ * Specifies whether dev_refthread/dev_relthread should be
+ * called during NVME_BIO_TEST. Ignored for other test
+ * types.
+ */
+ NVME_TEST_FLAG_REFTHREAD = 0x1,
+};
+
+struct nvme_pt_command {
+
+ /*
+ * cmd is used to specify a passthrough command to a controller or
+ * namespace.
+ *
+ * The following fields from cmd may be specified by the caller:
+ * * opc (opcode)
+ * * nsid (namespace id) - for admin commands only
+ * * cdw10-cdw15
+ *
+ * Remaining fields must be set to 0 by the caller.
+ */
+ struct nvme_command cmd;
+
+ /*
+ * cpl returns completion status for the passthrough command
+ * specified by cmd.
+ *
+ * The following fields will be filled out by the driver, for
+ * consumption by the caller:
+ * * cdw0
+ * * status (except for phase)
+ *
+ * Remaining fields will be set to 0 by the driver.
+ */
+ struct nvme_completion cpl;
+
+ /* buf is the data buffer associated with this passthrough command. */
+ void * buf;
+
+ /*
+ * len is the length of the data buffer associated with this
+ * passthrough command.
+ */
+ uint32_t len;
+
+ /*
+ * is_read = 1 if the passthrough command will read data into the
+ * supplied buffer from the controller.
+ *
+ * is_read = 0 if the passthrough command will write data from the
+ * supplied buffer to the controller.
+ */
+ uint32_t is_read;
+
+ /*
+ * driver_lock is used by the driver only. It must be set to 0
+ * by the caller.
+ */
+ struct mtx * driver_lock;
+};
+
+#define nvme_completion_is_error(cpl) \
+ ((cpl)->status.sc != 0 || (cpl)->status.sct != 0)
+
+void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
+
+#ifdef _KERNEL
+
+struct bio;
+
+struct nvme_namespace;
+struct nvme_controller;
+struct nvme_consumer;
+
+typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
+
+typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *);
+typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *);
+typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
+ uint32_t, void *, uint32_t);
+typedef void (*nvme_cons_fail_fn_t)(void *);
+
+enum nvme_namespace_flags {
+ NVME_NS_DEALLOCATE_SUPPORTED = 0x1,
+ NVME_NS_FLUSH_SUPPORTED = 0x2,
+};
+
+int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
+ struct nvme_pt_command *pt,
+ uint32_t nsid, int is_user_buffer,
+ int is_admin_cmd);
+
+/* Admin functions */
+void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
+ uint8_t feature, uint32_t cdw11,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
+ uint8_t feature, uint32_t cdw11,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+void nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr,
+ uint8_t log_page, uint32_t nsid,
+ void *payload, uint32_t payload_size,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+
+/* NVM I/O functions */
+int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
+ uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
+ uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn, void *cb_arg);
+int nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
+ uint8_t num_ranges, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn,
+ void *cb_arg);
+int nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset,
+ size_t len);
+
+/* Registration functions */
+struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn,
+ nvme_cons_ctrlr_fn_t ctrlr_fn,
+ nvme_cons_async_fn_t async_fn,
+ nvme_cons_fail_fn_t fail_fn);
+void nvme_unregister_consumer(struct nvme_consumer *consumer);
+
+/* Controller helper functions */
+device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr);
+const struct nvme_controller_data *
+ nvme_ctrlr_get_data(struct nvme_controller *ctrlr);
+
+/* Namespace helper functions */
+uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns);
+uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns);
+uint64_t nvme_ns_get_size(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_flags(struct nvme_namespace *ns);
+const char * nvme_ns_get_serial_number(struct nvme_namespace *ns);
+const char * nvme_ns_get_model_number(struct nvme_namespace *ns);
+const struct nvme_namespace_data *
+ nvme_ns_get_data(struct nvme_namespace *ns);
+uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns);
+
+int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
+ nvme_cb_fn_t cb_fn);
+
+/* Command building helper functions -- shared with CAM */
+static inline
+void nvme_ns_flush_cmd(struct nvme_command *cmd, uint16_t nsid)
+{
+
+ cmd->opc = NVME_OPC_FLUSH;
+ cmd->nsid = nsid;
+}
+
+static inline
+void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint16_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ cmd->opc = rwcmd;
+ cmd->nsid = nsid;
+ *(uint64_t *)&cmd->cdw10 = lba;
+ cmd->cdw12 = count-1;
+ cmd->cdw13 = 0;
+ cmd->cdw14 = 0;
+ cmd->cdw15 = 0;
+}
+
+static inline
+void nvme_ns_write_cmd(struct nvme_command *cmd, uint16_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count);
+}
+
+static inline
+void nvme_ns_read_cmd(struct nvme_command *cmd, uint16_t nsid,
+ uint64_t lba, uint32_t count)
+{
+ nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count);
+}
+
+static inline
+void nvme_ns_trim_cmd(struct nvme_command *cmd, uint16_t nsid,
+ uint32_t num_ranges)
+{
+ cmd->opc = NVME_OPC_DATASET_MANAGEMENT;
+ cmd->nsid = nsid;
+ cmd->cdw10 = num_ranges - 1;
+ cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE;
+}
+
+#endif /* _KERNEL */
+
+#endif /* __NVME_H__ */
diff --git a/freebsd/sys/dev/ofw/openfirm.h b/freebsd/sys/dev/ofw/openfirm.h
index feac8401..4e9fb29c 100644
--- a/freebsd/sys/dev/ofw/openfirm.h
+++ b/freebsd/sys/dev/ofw/openfirm.h
@@ -61,6 +61,7 @@
#define _DEV_OPENFIRM_H_
#include <sys/types.h>
+#include <machine/_bus.h>
/*
* Prototypes for Open Firmware Interface Routines
@@ -105,10 +106,18 @@ phandle_t OF_parent(phandle_t node);
ssize_t OF_getproplen(phandle_t node, const char *propname);
ssize_t OF_getprop(phandle_t node, const char *propname, void *buf,
size_t len);
+ssize_t OF_getencprop(phandle_t node, const char *prop, pcell_t *buf,
+ size_t len); /* Same as getprop, but maintains endianness */
+int OF_hasprop(phandle_t node, const char *propname);
ssize_t OF_searchprop(phandle_t node, const char *propname, void *buf,
size_t len);
+ssize_t OF_searchencprop(phandle_t node, const char *propname,
+ void *buf, size_t len);
ssize_t OF_getprop_alloc(phandle_t node, const char *propname,
int elsz, void **buf);
+ssize_t OF_getencprop_alloc(phandle_t node, const char *propname,
+ int elsz, void **buf);
+void OF_prop_free(void *buf);
int OF_nextprop(phandle_t node, const char *propname, char *buf,
size_t len);
int OF_setprop(phandle_t node, const char *name, const void *buf,
@@ -117,6 +126,26 @@ ssize_t OF_canon(const char *path, char *buf, size_t len);
phandle_t OF_finddevice(const char *path);
ssize_t OF_package_to_path(phandle_t node, char *buf, size_t len);
+/*
+ * Some OF implementations (IBM, FDT) have a concept of effective phandles
+ * used for device-tree cross-references. Given one of these, returns the
+ * real phandle. If one can't be found (or running on OF implementations
+ * without this property), returns its input.
+ */
+phandle_t OF_node_from_xref(phandle_t xref);
+phandle_t OF_xref_from_node(phandle_t node);
+
+/*
+ * When properties contain references to other nodes using xref handles it is
+ * often necessary to use interfaces provided by the driver for the referenced
+ * instance. These routines allow a driver that provides such an interface to
+ * register its association with an xref handle, and for other drivers to obtain
+ * the device_t associated with an xref handle.
+ */
+device_t OF_device_from_xref(phandle_t xref);
+phandle_t OF_xref_from_device(device_t dev);
+int OF_device_register_xref(phandle_t xref, device_t dev);
+
/* Device I/O functions */
ihandle_t OF_open(const char *path);
void OF_close(ihandle_t instance);
@@ -140,5 +169,16 @@ void OF_exit(void) __attribute__((noreturn));
/* User interface functions */
int OF_interpret(const char *cmd, int nreturns, ...);
+/*
+ * Decode the Nth register property of the given device node and create a bus
+ * space tag and handle for accessing it. This is for use in setting up things
+ * like early console output before newbus is available. The implementation is
+ * machine-dependent, and sparc uses a different function signature as well.
+ */
+#ifndef __sparc64__
+int OF_decode_addr(phandle_t dev, int regno, bus_space_tag_t *ptag,
+ bus_space_handle_t *phandle, bus_size_t *sz);
+#endif
+
#endif /* _KERNEL */
#endif /* _DEV_OPENFIRM_H_ */
diff --git a/freebsd/sys/dev/pci/pci.c b/freebsd/sys/dev/pci/pci.c
index e76b6b9e..789825dc 100644
--- a/freebsd/sys/dev/pci/pci.c
+++ b/freebsd/sys/dev/pci/pci.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/limits.h>
#include <sys/linker.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
@@ -64,6 +65,11 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h>
+#ifdef PCI_IOV
+#include <sys/nv.h>
+#include <dev/pci/pci_iov_private.h>
+#endif
+
#include <dev/usb/controller/xhcireg.h>
#include <dev/usb/controller/ehcireg.h>
#include <dev/usb/controller/ohcireg.h>
@@ -72,21 +78,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/pcib_if.h>
#include <rtems/bsd/local/pci_if.h>
-/*
- * XXX: Due to a limitation of the bus_dma_tag_create() API, we cannot
- * specify a 4GB boundary on 32-bit targets. Usually this does not
- * matter as it is ok to use a boundary of 0 on these systems.
- * However, in the case of PAE, DMA addresses can cross a 4GB
- * boundary, so as a workaround use a 2GB boundary.
- */
-#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
-#ifdef PAE
-#define PCI_DMA_BOUNDARY 0x80000000
-#else
-#define PCI_DMA_BOUNDARY 0x100000000
-#endif
-#endif
-
#define PCIR_IS_BIOS(cfg, reg) \
(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) || \
((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
@@ -94,7 +85,6 @@ __FBSDID("$FreeBSD$");
static int pci_has_quirk(uint32_t devid, int quirk);
static pci_addr_t pci_mapbase(uint64_t mapreg);
static const char *pci_maptype(uint64_t mapreg);
-static int pci_mapsize(uint64_t testval);
static int pci_maprange(uint64_t mapreg);
static pci_addr_t pci_rombase(uint64_t mapreg);
static int pci_romsize(uint64_t testval);
@@ -109,11 +99,11 @@ static int pci_add_map(device_t bus, device_t dev, int reg,
struct resource_list *rl, int force, int prefetch);
static int pci_probe(device_t dev);
static int pci_attach(device_t dev);
+static int pci_detach(device_t dev);
static void pci_load_vendor_data(void);
static int pci_describe_parse_line(char **ptr, int *vendor,
int *device, char **desc);
static char *pci_describe_device(device_t dev);
-static bus_dma_tag_t pci_get_dma_tag(device_t bus, device_t dev);
static int pci_modevent(module_t mod, int what, void *arg);
static void pci_hdrtypedata(device_t pcib, int b, int s, int f,
pcicfgregs *cfg);
@@ -125,11 +115,6 @@ static int pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
int reg, uint32_t data);
#endif
static void pci_read_vpd(device_t pcib, pcicfgregs *cfg);
-static void pci_disable_msi(device_t dev);
-static void pci_enable_msi(device_t dev, uint64_t address,
- uint16_t data);
-static void pci_enable_msix(device_t dev, u_int index,
- uint64_t address, uint32_t data);
static void pci_mask_msix(device_t dev, u_int index);
static void pci_unmask_msix(device_t dev, u_int index);
static int pci_msi_blacklisted(void);
@@ -139,13 +124,19 @@ static void pci_resume_msix(device_t dev);
static int pci_remap_intr_method(device_t bus, device_t dev,
u_int irq);
+static int pci_get_id_method(device_t dev, device_t child,
+ enum pci_id_type type, uintptr_t *rid);
+
+static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
+ int b, int s, int f, uint16_t vid, uint16_t did);
+
static device_method_t pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pci_probe),
DEVMETHOD(device_attach, pci_attach),
- DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_detach, pci_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
- DEVMETHOD(device_suspend, pci_suspend),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
DEVMETHOD(device_resume, pci_resume),
/* Bus interface */
@@ -167,9 +158,14 @@ static device_method_t pci_methods[] = {
DEVMETHOD(bus_release_resource, pci_release_resource),
DEVMETHOD(bus_activate_resource, pci_activate_resource),
DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
+ DEVMETHOD(bus_child_deleted, pci_child_deleted),
+ DEVMETHOD(bus_child_detached, pci_child_detached),
DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
DEVMETHOD(bus_remap_intr, pci_remap_intr_method),
+ DEVMETHOD(bus_suspend_child, pci_suspend_child),
+ DEVMETHOD(bus_resume_child, pci_resume_child),
+ DEVMETHOD(bus_rescan, pci_rescan_method),
/* PCI interface */
DEVMETHOD(pci_read_config, pci_read_config_method),
@@ -183,13 +179,28 @@ static device_method_t pci_methods[] = {
DEVMETHOD(pci_get_powerstate, pci_get_powerstate_method),
DEVMETHOD(pci_set_powerstate, pci_set_powerstate_method),
DEVMETHOD(pci_assign_interrupt, pci_assign_interrupt_method),
+ DEVMETHOD(pci_find_cap, pci_find_cap_method),
DEVMETHOD(pci_find_extcap, pci_find_extcap_method),
+ DEVMETHOD(pci_find_htcap, pci_find_htcap_method),
DEVMETHOD(pci_alloc_msi, pci_alloc_msi_method),
DEVMETHOD(pci_alloc_msix, pci_alloc_msix_method),
+ DEVMETHOD(pci_enable_msi, pci_enable_msi_method),
+ DEVMETHOD(pci_enable_msix, pci_enable_msix_method),
+ DEVMETHOD(pci_disable_msi, pci_disable_msi_method),
DEVMETHOD(pci_remap_msix, pci_remap_msix_method),
DEVMETHOD(pci_release_msi, pci_release_msi_method),
DEVMETHOD(pci_msi_count, pci_msi_count_method),
DEVMETHOD(pci_msix_count, pci_msix_count_method),
+ DEVMETHOD(pci_msix_pba_bar, pci_msix_pba_bar_method),
+ DEVMETHOD(pci_msix_table_bar, pci_msix_table_bar_method),
+ DEVMETHOD(pci_get_id, pci_get_id_method),
+ DEVMETHOD(pci_alloc_devinfo, pci_alloc_devinfo_method),
+ DEVMETHOD(pci_child_added, pci_child_added_method),
+#ifdef PCI_IOV
+ DEVMETHOD(pci_iov_attach, pci_iov_attach_method),
+ DEVMETHOD(pci_iov_detach, pci_iov_detach_method),
+ DEVMETHOD(pci_create_iov_child, pci_create_iov_child_method),
+#endif
DEVMETHOD_END
};
@@ -272,12 +283,13 @@ static const struct pci_quirk pci_quirks[] = {
{ 0x43851002, PCI_QUIRK_UNMAP_REG, 0x14, 0 },
/*
- * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
- * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
- * command register is set.
+ * Atheros AR8161/AR8162/E2200/E2400 Ethernet controllers have a
+ * bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
+ * of the command register is set.
*/
{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 },
{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 },
+ { 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 },
{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG, 0, 0 },
/*
@@ -308,53 +320,46 @@ static int pcie_chipset, pcix_chipset;
SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
static int pci_enable_io_modes = 1;
-TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
-SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
+SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
&pci_enable_io_modes, 1,
"Enable I/O and memory bits in the config register. Some BIOSes do not\n\
enable these bits correctly. We'd like to do this all the time, but there\n\
are some peripherals that this causes problems with.");
static int pci_do_realloc_bars = 0;
-TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
-SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
+SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
&pci_do_realloc_bars, 0,
- "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
+ "Attempt to allocate a new range for any BARs whose original "
+ "firmware-assigned ranges fail to allocate during the initial device scan.");
static int pci_do_power_nodriver = 0;
-TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
-SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
+SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
&pci_do_power_nodriver, 0,
"Place a function into D3 state when no driver attaches to it. 0 means\n\
disable. 1 means conservatively place devices into D3 state. 2 means\n\
-agressively place devices into D3 state. 3 means put absolutely everything\n\
+aggressively place devices into D3 state. 3 means put absolutely everything\n\
in D3 state.");
int pci_do_power_resume = 1;
-TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
-SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
+SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
&pci_do_power_resume, 1,
"Transition from D3 -> D0 on resume.");
int pci_do_power_suspend = 1;
-TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
-SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
+SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
&pci_do_power_suspend, 1,
"Transition from D0 -> D3 on suspend.");
static int pci_do_msi = 1;
-TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
-SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
+SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
"Enable support for MSI interrupts");
static int pci_do_msix = 1;
-TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
-SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
+SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
"Enable support for MSI-X interrupts");
static int pci_honor_msi_blacklist = 1;
-TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
-SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
+SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
&pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
#if defined(__i386__) || defined(__amd64__)
@@ -362,17 +367,25 @@ static int pci_usb_takeover = 1;
#else
static int pci_usb_takeover = 0;
#endif
-TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
&pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
Disable this if you depend on BIOS emulation of USB devices, that is\n\
you use USB devices (like keyboard or mouse) but do not load USB drivers");
static int pci_clear_bars;
-TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
"Ignore firmware-assigned resources for BARs.");
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+static int pci_clear_buses;
+SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
+ "Ignore firmware-assigned bus numbers.");
+#endif
+
+static int pci_enable_ari = 1;
+SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
+ 0, "Enable support for PCIe Alternative RID Interpretation");
+
static int
pci_has_quirk(uint32_t devid, int quirk)
{
@@ -488,7 +501,7 @@ pci_maptype(uint64_t mapreg)
/* return log2 of map size decoded for memory or port map */
-static int
+int
pci_mapsize(uint64_t testval)
{
int ln2size;
@@ -532,7 +545,7 @@ pci_romsize(uint64_t testval)
}
return (ln2size);
}
-
+
/* return log2 of address range supported by map register */
static int
@@ -580,12 +593,24 @@ pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
case PCIM_HDRTYPE_NORMAL:
cfg->subvendor = REG(PCIR_SUBVEND_0, 2);
cfg->subdevice = REG(PCIR_SUBDEV_0, 2);
+ cfg->mingnt = REG(PCIR_MINGNT, 1);
+ cfg->maxlat = REG(PCIR_MAXLAT, 1);
cfg->nummaps = PCI_MAXMAPS_0;
break;
case PCIM_HDRTYPE_BRIDGE:
+ cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
+ cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
+ cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
+ cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
+ cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
cfg->nummaps = PCI_MAXMAPS_1;
break;
case PCIM_HDRTYPE_CARDBUS:
+ cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
+ cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
+ cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
+ cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
+ cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
cfg->subvendor = REG(PCIR_SUBVEND_2, 2);
cfg->subdevice = REG(PCIR_SUBDEV_2, 2);
cfg->nummaps = PCI_MAXMAPS_2;
@@ -596,79 +621,167 @@ pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
/* read configuration header into pcicfgregs structure */
struct pci_devinfo *
-pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
+pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
{
#define REG(n, w) PCIB_READ_CONFIG(pcib, b, s, f, n, w)
- pcicfgregs *cfg = NULL;
- struct pci_devinfo *devlist_entry;
- struct devlist *devlist_head;
+ uint16_t vid, did;
- devlist_head = &pci_devq;
+ vid = REG(PCIR_VENDOR, 2);
+ did = REG(PCIR_DEVICE, 2);
+ if (vid != 0xffff)
+ return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
- devlist_entry = NULL;
+ return (NULL);
+}
- if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
- devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
- if (devlist_entry == NULL)
- return (NULL);
+struct pci_devinfo *
+pci_alloc_devinfo_method(device_t dev)
+{
+
+ return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
+ M_WAITOK | M_ZERO));
+}
+
+static struct pci_devinfo *
+pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
+ uint16_t vid, uint16_t did)
+{
+ struct pci_devinfo *devlist_entry;
+ pcicfgregs *cfg;
+
+ devlist_entry = PCI_ALLOC_DEVINFO(bus);
+
+ cfg = &devlist_entry->cfg;
+
+ cfg->domain = d;
+ cfg->bus = b;
+ cfg->slot = s;
+ cfg->func = f;
+ cfg->vendor = vid;
+ cfg->device = did;
+ cfg->cmdreg = REG(PCIR_COMMAND, 2);
+ cfg->statreg = REG(PCIR_STATUS, 2);
+ cfg->baseclass = REG(PCIR_CLASS, 1);
+ cfg->subclass = REG(PCIR_SUBCLASS, 1);
+ cfg->progif = REG(PCIR_PROGIF, 1);
+ cfg->revid = REG(PCIR_REVID, 1);
+ cfg->hdrtype = REG(PCIR_HDRTYPE, 1);
+ cfg->cachelnsz = REG(PCIR_CACHELNSZ, 1);
+ cfg->lattimer = REG(PCIR_LATTIMER, 1);
+ cfg->intpin = REG(PCIR_INTPIN, 1);
+ cfg->intline = REG(PCIR_INTLINE, 1);
+
+ cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0;
+ cfg->hdrtype &= ~PCIM_MFDEV;
+ STAILQ_INIT(&cfg->maps);
+
+ cfg->iov = NULL;
+
+ pci_fixancient(cfg);
+ pci_hdrtypedata(pcib, b, s, f, cfg);
+
+ if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
+ pci_read_cap(pcib, cfg);
+
+ STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
+
+ devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
+ devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
+ devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
+ devlist_entry->conf.pc_sel.pc_func = cfg->func;
+ devlist_entry->conf.pc_hdr = cfg->hdrtype;
+
+ devlist_entry->conf.pc_subvendor = cfg->subvendor;
+ devlist_entry->conf.pc_subdevice = cfg->subdevice;
+ devlist_entry->conf.pc_vendor = cfg->vendor;
+ devlist_entry->conf.pc_device = cfg->device;
+
+ devlist_entry->conf.pc_class = cfg->baseclass;
+ devlist_entry->conf.pc_subclass = cfg->subclass;
+ devlist_entry->conf.pc_progif = cfg->progif;
+ devlist_entry->conf.pc_revid = cfg->revid;
+
+ pci_numdevs++;
+ pci_generation++;
- cfg = &devlist_entry->cfg;
-
- cfg->domain = d;
- cfg->bus = b;
- cfg->slot = s;
- cfg->func = f;
- cfg->vendor = REG(PCIR_VENDOR, 2);
- cfg->device = REG(PCIR_DEVICE, 2);
- cfg->cmdreg = REG(PCIR_COMMAND, 2);
- cfg->statreg = REG(PCIR_STATUS, 2);
- cfg->baseclass = REG(PCIR_CLASS, 1);
- cfg->subclass = REG(PCIR_SUBCLASS, 1);
- cfg->progif = REG(PCIR_PROGIF, 1);
- cfg->revid = REG(PCIR_REVID, 1);
- cfg->hdrtype = REG(PCIR_HDRTYPE, 1);
- cfg->cachelnsz = REG(PCIR_CACHELNSZ, 1);
- cfg->lattimer = REG(PCIR_LATTIMER, 1);
- cfg->intpin = REG(PCIR_INTPIN, 1);
- cfg->intline = REG(PCIR_INTLINE, 1);
-
- cfg->mingnt = REG(PCIR_MINGNT, 1);
- cfg->maxlat = REG(PCIR_MAXLAT, 1);
-
- cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0;
- cfg->hdrtype &= ~PCIM_MFDEV;
- STAILQ_INIT(&cfg->maps);
-
- pci_fixancient(cfg);
- pci_hdrtypedata(pcib, b, s, f, cfg);
-
- if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
- pci_read_cap(pcib, cfg);
-
- STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
-
- devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
- devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
- devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
- devlist_entry->conf.pc_sel.pc_func = cfg->func;
- devlist_entry->conf.pc_hdr = cfg->hdrtype;
-
- devlist_entry->conf.pc_subvendor = cfg->subvendor;
- devlist_entry->conf.pc_subdevice = cfg->subdevice;
- devlist_entry->conf.pc_vendor = cfg->vendor;
- devlist_entry->conf.pc_device = cfg->device;
-
- devlist_entry->conf.pc_class = cfg->baseclass;
- devlist_entry->conf.pc_subclass = cfg->subclass;
- devlist_entry->conf.pc_progif = cfg->progif;
- devlist_entry->conf.pc_revid = cfg->revid;
-
- pci_numdevs++;
- pci_generation++;
- }
return (devlist_entry);
+}
#undef REG
+
+static void
+pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
+{
+#define REG(n, w) PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
+ cfg->ea.ea_location + (n), w)
+ int num_ent;
+ int ptr;
+ int a, b;
+ uint32_t val;
+ int ent_size;
+ uint32_t dw[4];
+ uint64_t base, max_offset;
+ struct pci_ea_entry *eae;
+
+ if (cfg->ea.ea_location == 0)
+ return;
+
+ STAILQ_INIT(&cfg->ea.ea_entries);
+
+ /* Determine the number of entries */
+ num_ent = REG(PCIR_EA_NUM_ENT, 2);
+ num_ent &= PCIM_EA_NUM_ENT_MASK;
+
+ /* Find the first entry to care of */
+ ptr = PCIR_EA_FIRST_ENT;
+
+ /* Skip DWORD 2 for type 1 functions */
+ if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
+ ptr += 4;
+
+ for (a = 0; a < num_ent; a++) {
+
+ eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
+ eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
+
+ /* Read a number of dwords in the entry */
+ val = REG(ptr, 4);
+ ptr += 4;
+ ent_size = (val & PCIM_EA_ES);
+
+ for (b = 0; b < ent_size; b++) {
+ dw[b] = REG(ptr, 4);
+ ptr += 4;
+ }
+
+ eae->eae_flags = val;
+ eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
+
+ base = dw[0] & PCIM_EA_FIELD_MASK;
+ max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
+ b = 2;
+ if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
+ base |= (uint64_t)dw[b] << 32UL;
+ b++;
+ }
+ if (((dw[1] & PCIM_EA_IS_64) != 0)
+ && (b < ent_size)) {
+ max_offset |= (uint64_t)dw[b] << 32UL;
+ b++;
+ }
+
+ eae->eae_base = base;
+ eae->eae_max_offset = max_offset;
+
+ STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
+
+ if (bootverbose) {
+ printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
+ cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
+ (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
+ }
+ }
}
+#undef REG
static void
pci_read_cap(device_t pcib, pcicfgregs *cfg)
@@ -795,6 +908,7 @@ pci_read_cap(device_t pcib, pcicfgregs *cfg)
if ((cfg->hdrtype & PCIM_HDRTYPE) ==
PCIM_HDRTYPE_BRIDGE)
pcix_chipset = 1;
+ cfg->pcix.pcix_location = ptr;
break;
case PCIY_EXPRESS: /* PCI-express */
/*
@@ -802,6 +916,13 @@ pci_read_cap(device_t pcib, pcicfgregs *cfg)
* at least one PCI-express device.
*/
pcie_chipset = 1;
+ cfg->pcie.pcie_location = ptr;
+ val = REG(ptr + PCIER_FLAGS, 2);
+ cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
+ break;
+ case PCIY_EA: /* Enhanced Allocation */
+ cfg->ea.ea_location = ptr;
+ pci_ea_fill_info(pcib, cfg);
break;
default:
break;
@@ -958,10 +1079,9 @@ pci_read_vpd(device_t pcib, pcicfgregs *cfg)
remain |= byte2 << 8;
if (remain > (0x7f*4 - vrs.off)) {
state = -1;
- printf(
- "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
- cfg->domain, cfg->bus, cfg->slot,
- cfg->func, remain);
+ pci_printf(cfg,
+ "invalid VPD data, remain %#x\n",
+ remain);
}
name = byte & 0x7f;
} else {
@@ -1033,10 +1153,8 @@ pci_read_vpd(device_t pcib, pcicfgregs *cfg)
* if this happens, we can't trust the rest
* of the VPD.
*/
- printf(
- "pci%d:%d:%d:%d: bad keyword length: %d\n",
- cfg->domain, cfg->bus, cfg->slot,
- cfg->func, dflen);
+ pci_printf(cfg, "bad keyword length: %d\n",
+ dflen);
cksumvalid = 0;
state = -1;
break;
@@ -1069,10 +1187,8 @@ pci_read_vpd(device_t pcib, pcicfgregs *cfg)
cksumvalid = 1;
else {
if (bootverbose)
- printf(
- "pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
- cfg->domain, cfg->bus,
- cfg->slot, cfg->func,
+ pci_printf(cfg,
+ "bad VPD cksum, remain %hhu\n",
vrs.cksum);
cksumvalid = 0;
state = -1;
@@ -1150,9 +1266,7 @@ pci_read_vpd(device_t pcib, pcicfgregs *cfg)
break;
default:
- printf("pci%d:%d:%d:%d: invalid state: %d\n",
- cfg->domain, cfg->bus, cfg->slot, cfg->func,
- state);
+ pci_printf(cfg, "invalid state: %d\n", state);
state = -1;
break;
}
@@ -1169,8 +1283,7 @@ pci_read_vpd(device_t pcib, pcicfgregs *cfg)
}
if (state < -1) {
/* I/O error, clean up */
- printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
- cfg->domain, cfg->bus, cfg->slot, cfg->func);
+ pci_printf(cfg, "failed to read VPD data.\n");
if (cfg->vpd.vpd_ident != NULL) {
free(cfg->vpd.vpd_ident, M_DEVBUF);
cfg->vpd.vpd_ident = NULL;
@@ -1238,12 +1351,55 @@ pci_fetch_vpd_list(device_t dev)
}
/*
- * Find the requested extended capability and return the offset in
- * configuration space via the pointer provided. The function returns
- * 0 on success and error code otherwise.
+ * Find the requested HyperTransport capability and return the offset
+ * in configuration space via the pointer provided. The function
+ * returns 0 on success and an error code otherwise.
*/
int
-pci_find_extcap_method(device_t dev, device_t child, int capability,
+pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
+{
+ int ptr, error;
+ uint16_t val;
+
+ error = pci_find_cap(child, PCIY_HT, &ptr);
+ if (error)
+ return (error);
+
+ /*
+ * Traverse the capabilities list checking each HT capability
+ * to see if it matches the requested HT capability.
+ */
+ while (ptr != 0) {
+ val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
+ if (capability == PCIM_HTCAP_SLAVE ||
+ capability == PCIM_HTCAP_HOST)
+ val &= 0xe000;
+ else
+ val &= PCIM_HTCMD_CAP_MASK;
+ if (val == capability) {
+ if (capreg != NULL)
+ *capreg = ptr;
+ return (0);
+ }
+
+ /* Skip to the next HT capability. */
+ while (ptr != 0) {
+ ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
+ if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
+ PCIY_HT)
+ break;
+ }
+ }
+ return (ENOENT);
+}
+
+/*
+ * Find the requested capability and return the offset in
+ * configuration space via the pointer provided. The function returns
+ * 0 on success and an error code otherwise.
+ */
+int
+pci_find_cap_method(device_t dev, device_t child, int capability,
int *capreg)
{
struct pci_devinfo *dinfo = device_get_ivars(child);
@@ -1291,12 +1447,50 @@ pci_find_extcap_method(device_t dev, device_t child, int capability,
}
/*
+ * Find the requested extended capability and return the offset in
+ * configuration space via the pointer provided. The function returns
+ * 0 on success and an error code otherwise.
+ */
+int
+pci_find_extcap_method(device_t dev, device_t child, int capability,
+ int *capreg)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(child);
+ pcicfgregs *cfg = &dinfo->cfg;
+ uint32_t ecap;
+ uint16_t ptr;
+
+ /* Only supported for PCI-express devices. */
+ if (cfg->pcie.pcie_location == 0)
+ return (ENXIO);
+
+ ptr = PCIR_EXTCAP;
+ ecap = pci_read_config(child, ptr, 4);
+ if (ecap == 0xffffffff || ecap == 0)
+ return (ENOENT);
+ for (;;) {
+ if (PCI_EXTCAP_ID(ecap) == capability) {
+ if (capreg != NULL)
+ *capreg = ptr;
+ return (0);
+ }
+ ptr = PCI_EXTCAP_NEXTPTR(ecap);
+ if (ptr == 0)
+ break;
+ ecap = pci_read_config(child, ptr, 4);
+ }
+
+ return (ENOENT);
+}
+
+/*
* Support for MSI-X message interrupts.
*/
void
-pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
+pci_enable_msix_method(device_t dev, device_t child, u_int index,
+ uint64_t address, uint32_t data)
{
- struct pci_devinfo *dinfo = device_get_ivars(dev);
+ struct pci_devinfo *dinfo = device_get_ivars(child);
struct pcicfg_msix *msix = &dinfo->cfg.msix;
uint32_t offset;
@@ -1307,7 +1501,7 @@ pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
bus_write_4(msix->msix_table_res, offset + 8, data);
/* Enable MSI -> HT mapping. */
- pci_ht_map_msi(dev, address);
+ pci_ht_map_msi(child, address);
}
void
@@ -1459,7 +1653,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
if (bootverbose) {
rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
if (actual == 1)
- device_printf(child, "using IRQ %lu for MSI-X\n",
+ device_printf(child, "using IRQ %ju for MSI-X\n",
rle->start);
else {
int run;
@@ -1469,7 +1663,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
* IRQ values as ranges. 'irq' is the previous IRQ.
* 'run' is true if we are in a range.
*/
- device_printf(child, "using IRQs %lu", rle->start);
+ device_printf(child, "using IRQs %ju", rle->start);
irq = rle->start;
run = 0;
for (i = 1; i < actual; i++) {
@@ -1490,7 +1684,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
}
/* Start new range. */
- printf(",%lu", rle->start);
+ printf(",%ju", rle->start);
irq = rle->start;
}
@@ -1558,7 +1752,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
* 3. Call the three vectors allocated by pci_alloc_msix() A, B, and
* C. After the call to pci_alloc_msix(), the device will be setup to
* have an MSI-X table of ABC--- (where - means no vector assigned).
- * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
+ * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
* then the MSI-X table will look like A-AB-B, and the 'C' vector will
* be freed back to the system. This device will also have valid
* SYS_RES_IRQ rids of 1, 3, 4, and 6.
@@ -1608,17 +1802,21 @@ pci_remap_msix_method(device_t dev, device_t child, int count,
free(used, M_DEVBUF);
return (EINVAL);
}
-
+
/* Make sure none of the resources are allocated. */
for (i = 0; i < msix->msix_table_len; i++) {
if (msix->msix_table[i].mte_vector == 0)
continue;
- if (msix->msix_table[i].mte_handlers > 0)
+ if (msix->msix_table[i].mte_handlers > 0) {
+ free(used, M_DEVBUF);
return (EBUSY);
+ }
rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
KASSERT(rle != NULL, ("missing resource"));
- if (rle->res != NULL)
+ if (rle->res != NULL) {
+ free(used, M_DEVBUF);
return (EBUSY);
+ }
}
/* Free the existing resource list entries. */
@@ -1663,7 +1861,7 @@ pci_remap_msix_method(device_t dev, device_t child, int count,
for (i = 0; i < count; i++) {
if (vectors[i] == 0)
continue;
- irq = msix->msix_vectors[vectors[i]].mv_irq;
+ irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
irq, 1);
}
@@ -1677,7 +1875,7 @@ pci_remap_msix_method(device_t dev, device_t child, int count,
printf("---");
else
printf("%d",
- msix->msix_vectors[vectors[i]].mv_irq);
+ msix->msix_vectors[vectors[i] - 1].mv_irq);
}
printf("\n");
}
@@ -1749,6 +1947,28 @@ pci_msix_count_method(device_t dev, device_t child)
return (0);
}
+int
+pci_msix_pba_bar_method(device_t dev, device_t child)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(child);
+ struct pcicfg_msix *msix = &dinfo->cfg.msix;
+
+ if (pci_do_msix && msix->msix_location != 0)
+ return (msix->msix_pba_bar);
+ return (-1);
+}
+
+int
+pci_msix_table_bar_method(device_t dev, device_t child)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(child);
+ struct pcicfg_msix *msix = &dinfo->cfg.msix;
+
+ if (pci_do_msix && msix->msix_location != 0)
+ return (msix->msix_table_bar);
+ return (-1);
+}
+
/*
* HyperTransport MSI mapping control
*/
@@ -1778,12 +1998,30 @@ pci_ht_map_msi(device_t dev, uint64_t addr)
}
int
+pci_get_max_payload(device_t dev)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ int cap;
+ uint16_t val;
+
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0)
+ return (0);
+ val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
+ val &= PCIEM_CTL_MAX_PAYLOAD;
+ val >>= 5;
+ return (1 << (val + 7));
+}
+
+int
pci_get_max_read_req(device_t dev)
{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
int cap;
uint16_t val;
- if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0)
return (0);
val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
val &= PCIEM_CTL_MAX_READ_REQUEST;
@@ -1794,10 +2032,12 @@ pci_get_max_read_req(device_t dev)
int
pci_set_max_read_req(device_t dev, int size)
{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
int cap;
uint16_t val;
- if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0)
return (0);
if (size < 128)
size = 128;
@@ -1811,49 +2051,107 @@ pci_set_max_read_req(device_t dev, int size)
return (size);
}
+uint32_t
+pcie_read_config(device_t dev, int reg, int width)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ int cap;
+
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0) {
+ if (width == 2)
+ return (0xffff);
+ return (0xffffffff);
+ }
+
+ return (pci_read_config(dev, cap + reg, width));
+}
+
+void
+pcie_write_config(device_t dev, int reg, uint32_t value, int width)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ int cap;
+
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0)
+ return;
+ pci_write_config(dev, cap + reg, value, width);
+}
+
+/*
+ * Adjusts a PCI-e capability register by clearing the bits in mask
+ * and setting the bits in (value & mask). Bits not set in mask are
+ * not adjusted.
+ *
+ * Returns the old value on success or all ones on failure.
+ */
+uint32_t
+pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
+ int width)
+{
+ struct pci_devinfo *dinfo = device_get_ivars(dev);
+ uint32_t old, new;
+ int cap;
+
+ cap = dinfo->cfg.pcie.pcie_location;
+ if (cap == 0) {
+ if (width == 2)
+ return (0xffff);
+ return (0xffffffff);
+ }
+
+ old = pci_read_config(dev, cap + reg, width);
+ new = old & ~mask;
+ new |= (value & mask);
+ pci_write_config(dev, cap + reg, new, width);
+ return (old);
+}
+
/*
* Support for MSI message signalled interrupts.
*/
void
-pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
+pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
+ uint16_t data)
{
- struct pci_devinfo *dinfo = device_get_ivars(dev);
+ struct pci_devinfo *dinfo = device_get_ivars(child);
struct pcicfg_msi *msi = &dinfo->cfg.msi;
/* Write data and address values. */
- pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
+ pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
address & 0xffffffff, 4);
if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
- pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
+ pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
address >> 32, 4);
- pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
+ pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
data, 2);
} else
- pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
+ pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2);
/* Enable MSI in the control register. */
msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
- pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
- 2);
+ pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
+ msi->msi_ctrl, 2);
/* Enable MSI -> HT mapping. */
- pci_ht_map_msi(dev, address);
+ pci_ht_map_msi(child, address);
}
void
-pci_disable_msi(device_t dev)
+pci_disable_msi_method(device_t dev, device_t child)
{
- struct pci_devinfo *dinfo = device_get_ivars(dev);
+ struct pci_devinfo *dinfo = device_get_ivars(child);
struct pcicfg_msi *msi = &dinfo->cfg.msi;
/* Disable MSI -> HT mapping. */
- pci_ht_map_msi(dev, 0);
+ pci_ht_map_msi(child, 0);
/* Disable MSI in the control register. */
msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
- pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
- 2);
+ pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
+ msi->msi_ctrl, 2);
}
/*
@@ -1896,7 +2194,7 @@ pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
struct msix_table_entry *mte;
struct msix_vector *mv;
uint64_t addr;
- uint32_t data;
+ uint32_t data;
int error, i, j;
/*
@@ -2282,7 +2580,7 @@ pci_set_powerstate_method(device_t dev, device_t child, int state)
struct pci_devinfo *dinfo = device_get_ivars(child);
pcicfgregs *cfg = &dinfo->cfg;
uint16_t status;
- int result, oldstate, highest, delay;
+ int oldstate, highest, delay;
if (cfg->pp.pp_cap == 0)
return (EOPNOTSUPP);
@@ -2317,7 +2615,6 @@ pci_set_powerstate_method(device_t dev, device_t child, int state)
delay = 0;
status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
& ~PCIM_PSTAT_DMASK;
- result = 0;
switch (state) {
case PCI_POWERSTATE_D0:
status |= PCIM_PSTAT_D0;
@@ -2533,8 +2830,9 @@ pci_memen(device_t dev)
return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
}
-static void
-pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
+void
+pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
+ int *bar64)
{
struct pci_devinfo *dinfo;
pci_addr_t map, testval;
@@ -2554,6 +2852,8 @@ pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
pci_write_config(dev, reg, map, 4);
*mapp = map;
*testvalp = testval;
+ if (bar64 != NULL)
+ *bar64 = 0;
return;
}
@@ -2595,6 +2895,8 @@ pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
*mapp = map;
*testvalp = testval;
+ if (bar64 != NULL)
+ *bar64 = (ln2range == 64);
}
static void
@@ -2649,7 +2951,7 @@ pci_bar_enabled(device_t dev, struct pci_map *pm)
return ((cmd & PCIM_CMD_PORTEN) != 0);
}
-static struct pci_map *
+struct pci_map *
pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
{
struct pci_devinfo *dinfo;
@@ -2720,7 +3022,7 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
return (barlen);
}
- pci_read_bar(dev, reg, &map, &testval);
+ pci_read_bar(dev, reg, &map, &testval, NULL);
if (PCI_BAR_MEM(map)) {
type = SYS_RES_MEMORY;
if (map & PCIM_BAR_MEM_PREFETCH)
@@ -2816,7 +3118,7 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
flags |= RF_PREFETCHABLE;
if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
start = 0; /* Let the parent decide. */
- end = ~0ul;
+ end = ~0;
} else {
start = base;
end = base + count - 1;
@@ -2831,7 +3133,7 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
*/
res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
flags);
- if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
+ if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
/*
* If the allocation fails, try to allocate a resource for
* this BAR using any available range. The firmware felt
@@ -2839,8 +3141,8 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
* disable decoding if we can help it.
*/
resource_list_delete(rl, type, reg);
- resource_list_add(rl, type, reg, 0, ~0ul, count);
- res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
+ resource_list_add(rl, type, reg, 0, ~0, count);
+ res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
count, flags);
}
if (res == NULL) {
@@ -2877,7 +3179,6 @@ static void
pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
uint32_t prefetchmask)
{
- struct resource *r;
int rid, type, progif;
#if 0
/* if this device supports PCI native addressing use it */
@@ -2900,11 +3201,11 @@ pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
} else {
rid = PCIR_BAR(0);
resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
- r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
+ (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
0x1f7, 8, 0);
rid = PCIR_BAR(1);
resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
- r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
+ (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
0x3f6, 1, 0);
}
if (progif & PCIP_STORAGE_IDE_MODESEC) {
@@ -2915,11 +3216,11 @@ pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
} else {
rid = PCIR_BAR(2);
resource_list_add(rl, type, rid, 0x170, 0x177, 8);
- r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
+ (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
0x177, 8, 0);
rid = PCIR_BAR(3);
resource_list_add(rl, type, rid, 0x376, 0x376, 1);
- r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
+ (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
0x376, 1, 0);
}
pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
@@ -3159,6 +3460,335 @@ xhci_early_takeover(device_t self)
bus_release_resource(self, SYS_RES_MEMORY, rid, res);
}
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+static void
+pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
+ struct resource_list *rl)
+{
+ struct resource *res;
+ char *cp;
+ rman_res_t start, end, count;
+ int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
+
+ switch (cfg->hdrtype & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_BRIDGE:
+ sec_reg = PCIR_SECBUS_1;
+ sub_reg = PCIR_SUBBUS_1;
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ sec_reg = PCIR_SECBUS_2;
+ sub_reg = PCIR_SUBBUS_2;
+ break;
+ default:
+ return;
+ }
+
+ /*
+ * If the existing bus range is valid, attempt to reserve it
+ * from our parent. If this fails for any reason, clear the
+ * secbus and subbus registers.
+ *
+ * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
+ * This would at least preserve the existing sec_bus if it is
+ * valid.
+ */
+ sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
+ sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
+
+ /* Quirk handling. */
+ switch (pci_get_devid(dev)) {
+ case 0x12258086: /* Intel 82454KX/GX (Orion) */
+ sup_bus = pci_read_config(dev, 0x41, 1);
+ if (sup_bus != 0xff) {
+ sec_bus = sup_bus + 1;
+ sub_bus = sup_bus + 1;
+ PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
+ PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
+ }
+ break;
+
+ case 0x00dd10de:
+ /* Compaq R3000 BIOS sets wrong subordinate bus number. */
+ if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
+ break;
+ if (strncmp(cp, "Compal", 6) != 0) {
+ freeenv(cp);
+ break;
+ }
+ freeenv(cp);
+ if ((cp = kern_getenv("smbios.planar.product")) == NULL)
+ break;
+ if (strncmp(cp, "08A0", 4) != 0) {
+ freeenv(cp);
+ break;
+ }
+ freeenv(cp);
+ if (sub_bus < 0xa) {
+ sub_bus = 0xa;
+ PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
+ }
+ break;
+ }
+
+ if (bootverbose)
+ printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
+ if (sec_bus > 0 && sub_bus >= sec_bus) {
+ start = sec_bus;
+ end = sub_bus;
+ count = end - start + 1;
+
+ resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
+
+ /*
+ * If requested, clear secondary bus registers in
+ * bridge devices to force a complete renumbering
+ * rather than reserving the existing range. However,
+ * preserve the existing size.
+ */
+ if (pci_clear_buses)
+ goto clear;
+
+ rid = 0;
+ res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
+ start, end, count, 0);
+ if (res != NULL)
+ return;
+
+ if (bootverbose)
+ device_printf(bus,
+ "pci%d:%d:%d:%d secbus failed to allocate\n",
+ pci_get_domain(dev), pci_get_bus(dev),
+ pci_get_slot(dev), pci_get_function(dev));
+ }
+
+clear:
+ PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
+ PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
+}
+
+static struct resource *
+pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
+ rman_res_t end, rman_res_t count, u_int flags)
+{
+ struct pci_devinfo *dinfo;
+ pcicfgregs *cfg;
+ struct resource_list *rl;
+ struct resource *res;
+ int sec_reg, sub_reg;
+
+ dinfo = device_get_ivars(child);
+ cfg = &dinfo->cfg;
+ rl = &dinfo->resources;
+ switch (cfg->hdrtype & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_BRIDGE:
+ sec_reg = PCIR_SECBUS_1;
+ sub_reg = PCIR_SUBBUS_1;
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ sec_reg = PCIR_SECBUS_2;
+ sub_reg = PCIR_SUBBUS_2;
+ break;
+ default:
+ return (NULL);
+ }
+
+ if (*rid != 0)
+ return (NULL);
+
+ if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
+ resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
+ if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
+ res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
+ start, end, count, flags & ~RF_ACTIVE);
+ if (res == NULL) {
+ resource_list_delete(rl, PCI_RES_BUS, *rid);
+ device_printf(child, "allocating %ju bus%s failed\n",
+ count, count == 1 ? "" : "es");
+ return (NULL);
+ }
+ if (bootverbose)
+ device_printf(child,
+ "Lazy allocation of %ju bus%s at %ju\n", count,
+ count == 1 ? "" : "es", rman_get_start(res));
+ PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
+ PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
+ }
+ return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
+ end, count, flags));
+}
+#endif
+
+static int
+pci_ea_bei_to_rid(device_t dev, int bei)
+{
+#ifdef PCI_IOV
+ struct pci_devinfo *dinfo;
+ int iov_pos;
+ struct pcicfg_iov *iov;
+
+ dinfo = device_get_ivars(dev);
+ iov = dinfo->cfg.iov;
+ if (iov != NULL)
+ iov_pos = iov->iov_pos;
+ else
+ iov_pos = 0;
+#endif
+
+ /* Check if matches BAR */
+ if ((bei >= PCIM_EA_BEI_BAR_0) &&
+ (bei <= PCIM_EA_BEI_BAR_5))
+ return (PCIR_BAR(bei));
+
+ /* Check ROM */
+ if (bei == PCIM_EA_BEI_ROM)
+ return (PCIR_BIOS);
+
+#ifdef PCI_IOV
+ /* Check if matches VF_BAR */
+ if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
+ (bei <= PCIM_EA_BEI_VF_BAR_5))
+ return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
+ iov_pos);
+#endif
+
+ return (-1);
+}
+
+int
+pci_ea_is_enabled(device_t dev, int rid)
+{
+ struct pci_ea_entry *ea;
+ struct pci_devinfo *dinfo;
+
+ dinfo = device_get_ivars(dev);
+
+ STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
+ if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
+ return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
+ }
+
+ return (0);
+}
+
+void
+pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
+{
+ struct pci_ea_entry *ea;
+ struct pci_devinfo *dinfo;
+ pci_addr_t start, end, count;
+ struct resource_list *rl;
+ int type, flags, rid;
+ struct resource *res;
+ uint32_t tmp;
+#ifdef PCI_IOV
+ struct pcicfg_iov *iov;
+#endif
+
+ dinfo = device_get_ivars(dev);
+ rl = &dinfo->resources;
+ flags = 0;
+
+#ifdef PCI_IOV
+ iov = dinfo->cfg.iov;
+#endif
+
+ if (dinfo->cfg.ea.ea_location == 0)
+ return;
+
+ STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
+
+ /*
+ * TODO: Ignore EA-BAR if is not enabled.
+ * Currently the EA implementation supports
+ * only situation, where EA structure contains
+ * predefined entries. In case they are not enabled
+ * leave them unallocated and proceed with
+ * a legacy-BAR mechanism.
+ */
+ if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
+ continue;
+
+ switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
+ case PCIM_EA_P_MEM_PREFETCH:
+ case PCIM_EA_P_VF_MEM_PREFETCH:
+ flags = RF_PREFETCHABLE;
+ /* FALLTHROUGH */
+ case PCIM_EA_P_VF_MEM:
+ case PCIM_EA_P_MEM:
+ type = SYS_RES_MEMORY;
+ break;
+ case PCIM_EA_P_IO:
+ type = SYS_RES_IOPORT;
+ break;
+ default:
+ continue;
+ }
+
+ if (alloc_iov != 0) {
+#ifdef PCI_IOV
+ /* Allocating IOV, confirm BEI matches */
+ if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
+ (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
+ continue;
+#else
+ continue;
+#endif
+ } else {
+ /* Allocating BAR, confirm BEI matches */
+ if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
+ (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
+ (ea->eae_bei != PCIM_EA_BEI_ROM))
+ continue;
+ }
+
+ rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
+ if (rid < 0)
+ continue;
+
+ /* Skip resources already allocated by EA */
+ if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
+ (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
+ continue;
+
+ start = ea->eae_base;
+ count = ea->eae_max_offset + 1;
+#ifdef PCI_IOV
+ if (iov != NULL)
+ count = count * iov->iov_num_vfs;
+#endif
+ end = start + count - 1;
+ if (count == 0)
+ continue;
+
+ resource_list_add(rl, type, rid, start, end, count);
+ res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
+ flags);
+ if (res == NULL) {
+ resource_list_delete(rl, type, rid);
+
+ /*
+ * Failed to allocate using EA, disable entry.
+ * Another attempt to allocation will be performed
+ * further, but this time using legacy BAR registers
+ */
+ tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
+ tmp &= ~PCIM_EA_ENABLE;
+ pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
+
+ /*
+ * Disabling entry might fail in case it is hardwired.
+ * Read flags again to match current status.
+ */
+ ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
+
+ continue;
+ }
+
+ /* As per specification, fill BAR with zeros */
+ pci_write_config(dev, rid, 0, 4);
+ }
+}
+
void
pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
{
@@ -3174,6 +3804,9 @@ pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
rl = &dinfo->resources;
devid = (cfg->device << 16) | cfg->vendor;
+ /* Allocate resources using Enhanced Allocation */
+ pci_add_resources_ea(bus, dev, 0);
+
/* ATA devices needs special map treatment */
if ((pci_get_class(dev) == PCIC_STORAGE) &&
(pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
@@ -3183,6 +3816,14 @@ pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
pci_ata_maps(bus, dev, rl, force, prefetchmask);
else
for (i = 0; i < cfg->nummaps;) {
+ /* Skip resources already managed by EA */
+ if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
+ (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
+ pci_ea_is_enabled(dev, PCIR_BAR(i))) {
+ i++;
+ continue;
+ }
+
/*
* Skip quirked resources.
*/
@@ -3233,10 +3874,31 @@ pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
uhci_early_takeover(dev);
#endif /* __rtems__ */
}
+
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+ /*
+ * Reserve resources for secondary bus ranges behind bridge
+ * devices.
+ */
+ pci_reserve_secbus(bus, dev, cfg, rl);
+#endif
+}
+
+static struct pci_devinfo *
+pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
+ int slot, int func)
+{
+ struct pci_devinfo *dinfo;
+
+ dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
+ if (dinfo != NULL)
+ pci_add_child(dev, dinfo);
+
+ return (dinfo);
}
void
-pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
+pci_add_children(device_t dev, int domain, int busno)
{
#define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
device_t pcib = device_get_parent(dev);
@@ -3244,11 +3906,26 @@ pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
int maxslots;
int s, f, pcifunchigh;
uint8_t hdrtype;
+ int first_func;
+
+ /*
+ * Try to detect a device at slot 0, function 0. If it exists, try to
+ * enable ARI. We must enable ARI before detecting the rest of the
+ * functions on this bus as ARI changes the set of slots and functions
+ * that are legal on this bus.
+ */
+ dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
+ if (dinfo != NULL && pci_enable_ari)
+ PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
+
+ /*
+ * Start looking for new devices on slot 0 at function 1 because we
+ * just identified the device at slot 0, function 0.
+ */
+ first_func = 1;
- KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
- ("dinfo_size too small"));
maxslots = PCIB_MAXSLOTS(pcib);
- for (s = 0; s <= maxslots; s++) {
+ for (s = 0; s <= maxslots; s++, first_func = 0) {
pcifunchigh = 0;
f = 0;
DELAY(1);
@@ -3256,18 +3933,143 @@ pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
continue;
if (hdrtype & PCIM_MFDEV)
- pcifunchigh = PCI_FUNCMAX;
+ pcifunchigh = PCIB_MAXFUNCS(pcib);
+ for (f = first_func; f <= pcifunchigh; f++)
+ pci_identify_function(pcib, dev, domain, busno, s, f);
+ }
+#undef REG
+}
+
+int
+pci_rescan_method(device_t dev)
+{
+#define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
+ device_t pcib = device_get_parent(dev);
+ struct pci_softc *sc;
+ device_t child, *devlist, *unchanged;
+ int devcount, error, i, j, maxslots, oldcount;
+ int busno, domain, s, f, pcifunchigh;
+ uint8_t hdrtype;
+
+ /* No need to check for ARI on a rescan. */
+ error = device_get_children(dev, &devlist, &devcount);
+ if (error)
+ return (error);
+ if (devcount != 0) {
+ unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
+ M_NOWAIT | M_ZERO);
+ if (unchanged == NULL) {
+ free(devlist, M_TEMP);
+ return (ENOMEM);
+ }
+ } else
+ unchanged = NULL;
+
+ sc = device_get_softc(dev);
+ domain = pcib_get_domain(dev);
+ busno = pcib_get_bus(dev);
+ maxslots = PCIB_MAXSLOTS(pcib);
+ for (s = 0; s <= maxslots; s++) {
+ /* If function 0 is not present, skip to the next slot. */
+ f = 0;
+ if (REG(PCIR_VENDOR, 2) == 0xffff)
+ continue;
+ pcifunchigh = 0;
+ hdrtype = REG(PCIR_HDRTYPE, 1);
+ if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
+ continue;
+ if (hdrtype & PCIM_MFDEV)
+ pcifunchigh = PCIB_MAXFUNCS(pcib);
for (f = 0; f <= pcifunchigh; f++) {
- dinfo = pci_read_device(pcib, domain, busno, s, f,
- dinfo_size);
- if (dinfo != NULL) {
- pci_add_child(dev, dinfo);
+ if (REG(PCIR_VENDOR, 2) == 0xfff)
+ continue;
+
+ /*
+ * Found a valid function. Check if a
+ * device_t for this device already exists.
+ */
+ for (i = 0; i < devcount; i++) {
+ child = devlist[i];
+ if (child == NULL)
+ continue;
+ if (pci_get_slot(child) == s &&
+ pci_get_function(child) == f) {
+ unchanged[i] = child;
+ goto next_func;
+ }
}
+
+ pci_identify_function(pcib, dev, domain, busno, s, f);
+ next_func:;
}
}
+
+ /* Remove devices that are no longer present. */
+ for (i = 0; i < devcount; i++) {
+ if (unchanged[i] != NULL)
+ continue;
+ device_delete_child(dev, devlist[i]);
+ }
+
+ free(devlist, M_TEMP);
+ oldcount = devcount;
+
+ /* Try to attach the devices just added. */
+ error = device_get_children(dev, &devlist, &devcount);
+ if (error) {
+ free(unchanged, M_TEMP);
+ return (error);
+ }
+
+ for (i = 0; i < devcount; i++) {
+ for (j = 0; j < oldcount; j++) {
+ if (devlist[i] == unchanged[j])
+ goto next_device;
+ }
+
+ device_probe_and_attach(devlist[i]);
+ next_device:;
+ }
+
+ free(unchanged, M_TEMP);
+ free(devlist, M_TEMP);
+ return (0);
#undef REG
}
+#ifdef PCI_IOV
+device_t
+pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
+ uint16_t did)
+{
+ struct pci_devinfo *pf_dinfo, *vf_dinfo;
+ device_t pcib;
+ int busno, slot, func;
+
+ pf_dinfo = device_get_ivars(pf);
+
+ pcib = device_get_parent(bus);
+
+ PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
+
+ vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
+ slot, func, vid, did);
+
+ vf_dinfo->cfg.flags |= PCICFG_VF;
+ pci_add_child(bus, vf_dinfo);
+
+ return (vf_dinfo->cfg.dev);
+}
+
+device_t
+pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did)
+{
+
+ return (pci_add_iov_child(bus, pf, rid, vid, did));
+}
+#endif
+
void
pci_add_child(device_t bus, struct pci_devinfo *dinfo)
{
@@ -3278,6 +4080,13 @@ pci_add_child(device_t bus, struct pci_devinfo *dinfo)
pci_cfg_restore(dinfo->cfg.dev, dinfo);
pci_print_verbose(dinfo);
pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
+ pci_child_added(dinfo->cfg.dev);
+}
+
+void
+pci_child_added_method(device_t dev, device_t child)
+{
+
}
static int
@@ -3298,10 +4107,22 @@ pci_attach_common(device_t dev)
#ifdef PCI_DMA_BOUNDARY
int error, tag_valid;
#endif
+#ifdef PCI_RES_BUS
+ int rid;
+#endif
sc = device_get_softc(dev);
domain = pcib_get_domain(dev);
busno = pcib_get_bus(dev);
+#ifdef PCI_RES_BUS
+ rid = 0;
+ sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
+ 1, 0);
+ if (sc->sc_bus == NULL) {
+ device_printf(dev, "failed to allocate bus number\n");
+ return (ENXIO);
+ }
+#endif
if (bootverbose)
device_printf(dev, "domain=%d, physical bus=%d\n",
domain, busno);
@@ -3335,24 +4156,42 @@ pci_attach(device_t dev)
return (error);
/*
- * Since there can be multiple independantly numbered PCI
+ * Since there can be multiple independently numbered PCI
* busses on systems with multiple PCI domains, we can't use
* the unit number to decide which bus we are probing. We ask
* the parent pcib what our domain and bus numbers are.
*/
domain = pcib_get_domain(dev);
busno = pcib_get_bus(dev);
- pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
+ pci_add_children(dev, domain, busno);
return (bus_generic_attach(dev));
}
+static int
+pci_detach(device_t dev)
+{
+#ifdef PCI_RES_BUS
+ struct pci_softc *sc;
+#endif
+ int error;
+
+ error = bus_generic_detach(dev);
+ if (error)
+ return (error);
+#ifdef PCI_RES_BUS
+ sc = device_get_softc(dev);
+ error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
+ if (error)
+ return (error);
+#endif
+ return (device_delete_children(dev));
+}
+
static void
-pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
- int state)
+pci_set_power_child(device_t dev, device_t child, int state)
{
- device_t child, pcib;
- struct pci_devinfo *dinfo;
- int dstate, i;
+ device_t pcib;
+ int dstate;
/*
* Set the device to the given state. If the firmware suggests
@@ -3362,45 +4201,53 @@ pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
* are handled separately.
*/
pcib = device_get_parent(dev);
- for (i = 0; i < numdevs; i++) {
- child = devlist[i];
- dinfo = device_get_ivars(child);
- dstate = state;
- if (device_is_attached(child) &&
- PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
- pci_set_powerstate(child, dstate);
- }
+ dstate = state;
+ if (device_is_attached(child) &&
+ PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
+ pci_set_powerstate(child, dstate);
}
int
-pci_suspend(device_t dev)
+pci_suspend_child(device_t dev, device_t child)
{
- device_t child, *devlist;
struct pci_devinfo *dinfo;
- int error, i, numdevs;
+ int error;
+
+ dinfo = device_get_ivars(child);
/*
- * Save the PCI configuration space for each child and set the
+ * Save the PCI configuration space for the child and set the
* device in the appropriate power state for this sleep state.
*/
- if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
- return (error);
- for (i = 0; i < numdevs; i++) {
- child = devlist[i];
- dinfo = device_get_ivars(child);
- pci_cfg_save(child, dinfo, 0);
- }
+ pci_cfg_save(child, dinfo, 0);
/* Suspend devices before potentially powering them down. */
- error = bus_generic_suspend(dev);
- if (error) {
- free(devlist, M_TEMP);
+ error = bus_generic_suspend_child(dev, child);
+
+ if (error)
return (error);
- }
+
if (pci_do_power_suspend)
- pci_set_power_children(dev, devlist, numdevs,
- PCI_POWERSTATE_D3);
- free(devlist, M_TEMP);
+ pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
+
+ return (0);
+}
+
+int
+pci_resume_child(device_t dev, device_t child)
+{
+ struct pci_devinfo *dinfo;
+
+ if (pci_do_power_resume)
+ pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
+
+ dinfo = device_get_ivars(child);
+ pci_cfg_restore(child, dinfo);
+ if (!device_is_attached(child))
+ pci_cfg_save(child, dinfo, 1);
+
+ bus_generic_resume_child(dev, child);
+
return (0);
}
@@ -3408,27 +4255,10 @@ int
pci_resume(device_t dev)
{
device_t child, *devlist;
- struct pci_devinfo *dinfo;
int error, i, numdevs;
- /*
- * Set each child to D0 and restore its PCI configuration space.
- */
if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
return (error);
- if (pci_do_power_resume)
- pci_set_power_children(dev, devlist, numdevs,
- PCI_POWERSTATE_D0);
-
- /* Now the device is powered up, restore its config space. */
- for (i = 0; i < numdevs; i++) {
- child = devlist[i];
- dinfo = device_get_ivars(child);
-
- pci_cfg_restore(child, dinfo);
- if (!device_is_attached(child))
- pci_cfg_save(child, dinfo, 1);
- }
/*
* Resume critical devices first, then everything else later.
@@ -3440,7 +4270,7 @@ pci_resume(device_t dev)
case PCIC_MEMORY:
case PCIC_BRIDGE:
case PCIC_BASEPERIPH:
- DEVICE_RESUME(child);
+ BUS_RESUME_CHILD(dev, child);
break;
}
}
@@ -3453,7 +4283,7 @@ pci_resume(device_t dev)
case PCIC_BASEPERIPH:
break;
default:
- DEVICE_RESUME(child);
+ BUS_RESUME_CHILD(dev, child);
}
}
free(devlist, M_TEMP);
@@ -3504,7 +4334,7 @@ pci_driver_added(device_t dev, driver_t *driver)
pci_printf(&dinfo->cfg, "reprobing on driver added\n");
pci_cfg_restore(child, dinfo);
if (device_probe_and_attach(child) != 0)
- pci_cfg_save(child, dinfo, 1);
+ pci_child_detached(dev, child);
}
free(devlist, M_TEMP);
}
@@ -3680,15 +4510,16 @@ pci_print_child(device_t dev, device_t child)
retval += bus_print_child_header(dev, child);
- retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
- retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
- retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
+ retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
+ retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
+ retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
if (device_get_flags(dev))
retval += printf(" flags %#x", device_get_flags(dev));
retval += printf(" at device %d.%d", pci_get_slot(child),
pci_get_function(child));
+ retval += bus_print_child_domain(dev, child);
retval += bus_print_child_footer(dev, child);
return (retval);
@@ -3754,6 +4585,7 @@ static const struct
{PCIC_BASEPERIPH, PCIS_BASEPERIPH_RTC, 1, "realtime clock"},
{PCIC_BASEPERIPH, PCIS_BASEPERIPH_PCIHOT, 1, "PCI hot-plug controller"},
{PCIC_BASEPERIPH, PCIS_BASEPERIPH_SDHC, 1, "SD host controller"},
+ {PCIC_BASEPERIPH, PCIS_BASEPERIPH_IOMMU, 1, "IOMMU"},
{PCIC_INPUTDEV, -1, 1, "input device"},
{PCIC_INPUTDEV, PCIS_INPUTDEV_KEYBOARD, 1, "keyboard"},
{PCIC_INPUTDEV, PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
@@ -3835,6 +4667,38 @@ pci_probe_nomatch(device_t dev, device_t child)
pci_cfg_save(child, device_get_ivars(child), 1);
}
+void
+pci_child_detached(device_t dev, device_t child)
+{
+ struct pci_devinfo *dinfo;
+ struct resource_list *rl;
+
+ dinfo = device_get_ivars(child);
+ rl = &dinfo->resources;
+
+ /*
+ * Have to deallocate IRQs before releasing any MSI messages and
+ * have to release MSI messages before deallocating any memory
+ * BARs.
+ */
+ if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
+ pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
+ if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
+ pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
+ (void)pci_release_msi(child);
+ }
+ if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
+ pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
+ if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
+ pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
+#ifdef PCI_RES_BUS
+ if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
+ pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
+#endif
+
+ pci_cfg_save(child, dinfo, 1);
+}
+
/*
* Parse the PCI device database, if loaded, and return a pointer to a
* description of the device.
@@ -4031,9 +4895,17 @@ pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
*result = cfg->cachelnsz;
break;
case PCI_IVAR_MINGNT:
+ if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
+ *result = -1;
+ return (EINVAL);
+ }
*result = cfg->mingnt;
break;
case PCI_IVAR_MAXLAT:
+ if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
+ *result = -1;
+ return (EINVAL);
+ }
*result = cfg->maxlat;
break;
case PCI_IVAR_LATTIMER:
@@ -4130,7 +5002,8 @@ DB_SHOW_COMMAND(pciregs, db_pci_dump)
static struct resource *
pci_reserve_map(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
+ u_int flags)
{
struct pci_devinfo *dinfo = device_get_ivars(child);
struct resource_list *rl = &dinfo->resources;
@@ -4140,6 +5013,11 @@ pci_reserve_map(device_t dev, device_t child, int type, int *rid,
int mapsize;
res = NULL;
+
+ /* If rid is managed by EA, ignore it */
+ if (pci_ea_is_enabled(child, *rid))
+ goto out;
+
pm = pci_find_bar(child, *rid);
if (pm != NULL) {
/* This is a BAR that we failed to allocate earlier. */
@@ -4154,7 +5032,7 @@ pci_reserve_map(device_t dev, device_t child, int type, int *rid,
* have a atapci device in legacy mode and it fails
* here, that other code is broken.
*/
- pci_read_bar(child, *rid, &map, &testval);
+ pci_read_bar(child, *rid, &map, &testval, NULL);
/*
* Determine the size of the BAR and ignore BARs with a size
@@ -4196,7 +5074,7 @@ pci_reserve_map(device_t dev, device_t child, int type, int *rid,
* situation where we might allocate the excess to
* another driver, which won't work.
*/
- count = (pci_addr_t)1 << mapsize;
+ count = ((pci_addr_t)1 << mapsize) * num;
if (RF_ALIGNMENT(flags) < mapsize)
flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
@@ -4221,13 +5099,13 @@ pci_reserve_map(device_t dev, device_t child, int type, int *rid,
if (res == NULL) {
resource_list_delete(rl, type, *rid);
device_printf(child,
- "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
+ "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
count, *rid, type, start, end);
goto out;
}
if (bootverbose)
device_printf(child,
- "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
+ "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
count, *rid, type, rman_get_start(res));
map = rman_get_start(res);
pci_write_bar(child, pm, map);
@@ -4236,8 +5114,9 @@ out:
}
struct resource *
-pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
+ u_int flags)
{
struct pci_devinfo *dinfo;
struct resource_list *rl;
@@ -4245,10 +5124,6 @@ pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
struct resource *res;
pcicfgregs *cfg;
- if (device_get_parent(child) != dev)
- return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
- type, rid, start, end, count, flags));
-
/*
* Perform lazy resource allocation
*/
@@ -4256,6 +5131,11 @@ pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
rl = &dinfo->resources;
cfg = &dinfo->cfg;
switch (type) {
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+ case PCI_RES_BUS:
+ return (pci_alloc_secbus(dev, child, rid, start, end, count,
+ flags));
+#endif
case SYS_RES_IRQ:
/*
* Can't alloc legacy interrupt once MSI messages have
@@ -4300,7 +5180,7 @@ pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
rle = resource_list_find(rl, type, *rid);
if (rle == NULL) {
res = pci_reserve_map(dev, child, type, rid, start, end,
- count, flags);
+ count, num, flags);
if (res == NULL)
return (NULL);
}
@@ -4309,6 +5189,38 @@ pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
start, end, count, flags));
}
+struct resource *
+pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+#ifdef PCI_IOV
+ struct pci_devinfo *dinfo;
+#endif
+
+ if (device_get_parent(child) != dev)
+ return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
+ type, rid, start, end, count, flags));
+
+#ifdef PCI_IOV
+ dinfo = device_get_ivars(child);
+ if (dinfo->cfg.flags & PCICFG_VF) {
+ switch (type) {
+ /* VFs can't have I/O BARs. */
+ case SYS_RES_IOPORT:
+ return (NULL);
+ case SYS_RES_MEMORY:
+ return (pci_vf_alloc_mem_resource(dev, child, rid,
+ start, end, count, flags));
+ }
+
+ /* Fall through for other types of resource allocations. */
+ }
+#endif
+
+ return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
+ count, 1, flags));
+}
+
int
pci_release_resource(device_t dev, device_t child, int type, int rid,
struct resource *r)
@@ -4323,6 +5235,22 @@ pci_release_resource(device_t dev, device_t child, int type, int rid,
dinfo = device_get_ivars(child);
cfg = &dinfo->cfg;
+
+#ifdef PCI_IOV
+ if (dinfo->cfg.flags & PCICFG_VF) {
+ switch (type) {
+ /* VFs can't have I/O BARs. */
+ case SYS_RES_IOPORT:
+ return (EDOOFUS);
+ case SYS_RES_MEMORY:
+ return (pci_vf_release_mem_resource(dev, child, rid,
+ r));
+ }
+
+ /* Fall through for other types of resource allocations. */
+ }
+#endif
+
#ifdef NEW_PCIB
/*
* PCI-PCI bridge I/O window resources are not BARs. For
@@ -4383,7 +5311,7 @@ pci_deactivate_resource(device_t dev, device_t child, int type,
if (error)
return (error);
- /* Disable decoding for device ROMs. */
+ /* Disable decoding for device ROMs. */
if (device_get_parent(child) == dev) {
dinfo = device_get_ivars(child);
if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
@@ -4394,7 +5322,7 @@ pci_deactivate_resource(device_t dev, device_t child, int type,
}
void
-pci_delete_child(device_t dev, device_t child)
+pci_child_deleted(device_t dev, device_t child)
{
struct resource_list_entry *rle;
struct resource_list *rl;
@@ -4403,12 +5331,13 @@ pci_delete_child(device_t dev, device_t child)
dinfo = device_get_ivars(child);
rl = &dinfo->resources;
- if (device_is_attached(child))
- device_detach(child);
-
/* Turn off access to resources we're about to free */
- pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
- PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
+ if (bus_child_present(child) != 0) {
+ pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
+ PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
+
+ pci_disable_busmaster(child);
+ }
/* Free all allocated resources */
STAILQ_FOREACH(rle, rl, link) {
@@ -4429,7 +5358,6 @@ pci_delete_child(device_t dev, device_t child)
}
resource_list_free(rl);
- device_delete_child(dev, child);
pci_freecfg(dinfo);
}
@@ -4454,7 +5382,7 @@ pci_delete_resource(device_t dev, device_t child, int type, int rid)
resource_list_busy(rl, type, rid)) {
device_printf(dev, "delete_resource: "
"Resource still owned by child, oops. "
- "(type=%d, rid=%d, addr=%lx)\n",
+ "(type=%d, rid=%d, addr=%jx)\n",
type, rid, rman_get_start(rle->res));
return;
}
@@ -4485,6 +5413,37 @@ pci_read_config_method(device_t dev, device_t child, int reg, int width)
struct pci_devinfo *dinfo = device_get_ivars(child);
pcicfgregs *cfg = &dinfo->cfg;
+#ifdef PCI_IOV
+ /*
+ * SR-IOV VFs don't implement the VID or DID registers, so we have to
+ * emulate them here.
+ */
+ if (cfg->flags & PCICFG_VF) {
+ if (reg == PCIR_VENDOR) {
+ switch (width) {
+ case 4:
+ return (cfg->device << 16 | cfg->vendor);
+ case 2:
+ return (cfg->vendor);
+ case 1:
+ return (cfg->vendor & 0xff);
+ default:
+ return (0xffffffff);
+ }
+ } else if (reg == PCIR_DEVICE) {
+ switch (width) {
+ /* Note that an unaligned 4-byte read is an error. */
+ case 2:
+ return (cfg->device);
+ case 1:
+ return (cfg->device & 0xff);
+ default:
+ return (0xffffffff);
+ }
+ }
+ }
+#endif
+
return (PCIB_READ_CONFIG(device_get_parent(dev),
cfg->bus, cfg->slot, cfg->func, reg, width));
}
@@ -4505,8 +5464,9 @@ pci_child_location_str_method(device_t dev, device_t child, char *buf,
size_t buflen)
{
- snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
- pci_get_function(child));
+ snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
+ pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
+ pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
return (0);
}
@@ -4536,10 +5496,60 @@ pci_assign_interrupt_method(device_t dev, device_t child)
cfg->intpin));
}
+static void
+pci_lookup(void *arg, const char *name, device_t *dev)
+{
+ long val;
+ char *end;
+ int domain, bus, slot, func;
+
+ if (*dev != NULL)
+ return;
+
+ /*
+ * Accept pciconf-style selectors of either pciD:B:S:F or
+ * pciB:S:F. In the latter case, the domain is assumed to
+ * be zero.
+ */
+ if (strncmp(name, "pci", 3) != 0)
+ return;
+ val = strtol(name + 3, &end, 10);
+ if (val < 0 || val > INT_MAX || *end != ':')
+ return;
+ domain = val;
+ val = strtol(end + 1, &end, 10);
+ if (val < 0 || val > INT_MAX || *end != ':')
+ return;
+ bus = val;
+ val = strtol(end + 1, &end, 10);
+ if (val < 0 || val > INT_MAX)
+ return;
+ slot = val;
+ if (*end == ':') {
+ val = strtol(end + 1, &end, 10);
+ if (val < 0 || val > INT_MAX || *end != '\0')
+ return;
+ func = val;
+ } else if (*end == '\0') {
+ func = slot;
+ slot = bus;
+ bus = domain;
+ domain = 0;
+ } else
+ return;
+
+ if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
+ func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
+ return;
+
+ *dev = pci_find_dbsf(domain, bus, slot, func);
+}
+
static int
pci_modevent(module_t mod, int what, void *arg)
{
static struct cdev *pci_cdev;
+ static eventhandler_tag tag;
switch (what) {
case MOD_LOAD:
@@ -4548,9 +5558,13 @@ pci_modevent(module_t mod, int what, void *arg)
pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
"pci");
pci_load_vendor_data();
+ tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
+ 1000);
break;
case MOD_UNLOAD:
+ if (tag != NULL)
+ EVENTHANDLER_DEREGISTER(dev_lookup, tag);
destroy_dev(pci_cdev);
break;
}
@@ -4558,21 +5572,54 @@ pci_modevent(module_t mod, int what, void *arg)
return (0);
}
+static void
+pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
+{
+#define WREG(n, v) pci_write_config(dev, pos + (n), (v), 2)
+ struct pcicfg_pcie *cfg;
+ int version, pos;
+
+ cfg = &dinfo->cfg.pcie;
+ pos = cfg->pcie_location;
+
+ version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
+
+ WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
+
+ if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
+ cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
+ cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
+ WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
+
+ if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
+ (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
+ (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
+ WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
+
+ if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
+ cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
+ WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
+
+ if (version > 1) {
+ WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
+ WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
+ WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
+ }
+#undef WREG
+}
+
+static void
+pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
+{
+ pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
+ dinfo->cfg.pcix.pcix_command, 2);
+}
+
void
pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
{
/*
- * Only do header type 0 devices. Type 1 devices are bridges,
- * which we know need special treatment. Type 2 devices are
- * cardbus bridges which also require special treatment.
- * Other types are unknown, and we err on the side of safety
- * by ignoring them.
- */
- if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
- return;
-
- /*
* Restore the device to full power mode. We must do this
* before we restore the registers because moving from D3 to
* D0 will cause the chip's BARs and some other registers to
@@ -4582,22 +5629,108 @@ pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
*/
if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
pci_set_powerstate(dev, PCI_POWERSTATE_D0);
- pci_restore_bars(dev);
pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
- pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
- pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
+ switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_NORMAL:
+ pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
+ pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
+ break;
+ case PCIM_HDRTYPE_BRIDGE:
+ pci_write_config(dev, PCIR_SECLAT_1,
+ dinfo->cfg.bridge.br_seclat, 1);
+ pci_write_config(dev, PCIR_SUBBUS_1,
+ dinfo->cfg.bridge.br_subbus, 1);
+ pci_write_config(dev, PCIR_SECBUS_1,
+ dinfo->cfg.bridge.br_secbus, 1);
+ pci_write_config(dev, PCIR_PRIBUS_1,
+ dinfo->cfg.bridge.br_pribus, 1);
+ pci_write_config(dev, PCIR_BRIDGECTL_1,
+ dinfo->cfg.bridge.br_control, 2);
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ pci_write_config(dev, PCIR_SECLAT_2,
+ dinfo->cfg.bridge.br_seclat, 1);
+ pci_write_config(dev, PCIR_SUBBUS_2,
+ dinfo->cfg.bridge.br_subbus, 1);
+ pci_write_config(dev, PCIR_SECBUS_2,
+ dinfo->cfg.bridge.br_secbus, 1);
+ pci_write_config(dev, PCIR_PRIBUS_2,
+ dinfo->cfg.bridge.br_pribus, 1);
+ pci_write_config(dev, PCIR_BRIDGECTL_2,
+ dinfo->cfg.bridge.br_control, 2);
+ break;
+ }
+ pci_restore_bars(dev);
+
+ /*
+ * Restore extended capabilities for PCI-Express and PCI-X
+ */
+ if (dinfo->cfg.pcie.pcie_location != 0)
+ pci_cfg_restore_pcie(dev, dinfo);
+ if (dinfo->cfg.pcix.pcix_location != 0)
+ pci_cfg_restore_pcix(dev, dinfo);
/* Restore MSI and MSI-X configurations if they are present. */
if (dinfo->cfg.msi.msi_location != 0)
pci_resume_msi(dev);
if (dinfo->cfg.msix.msix_location != 0)
pci_resume_msix(dev);
+
+#ifdef PCI_IOV
+ if (dinfo->cfg.iov != NULL)
+ pci_iov_cfg_restore(dev, dinfo);
+#endif
+}
+
+static void
+pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
+{
+#define RREG(n) pci_read_config(dev, pos + (n), 2)
+ struct pcicfg_pcie *cfg;
+ int version, pos;
+
+ cfg = &dinfo->cfg.pcie;
+ pos = cfg->pcie_location;
+
+ cfg->pcie_flags = RREG(PCIER_FLAGS);
+
+ version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
+
+ cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
+
+ if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
+ cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
+ cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
+ cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
+
+ if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
+ (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
+ (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
+ cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
+
+ if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
+ cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
+ cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
+
+ if (version > 1) {
+ cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
+ cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
+ cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
+ }
+#undef RREG
+}
+
+static void
+pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
+{
+ dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
+ dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
}
void
@@ -4607,40 +5740,68 @@ pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
int ps;
/*
- * Only do header type 0 devices. Type 1 devices are bridges, which
- * we know need special treatment. Type 2 devices are cardbus bridges
- * which also require special treatment. Other types are unknown, and
- * we err on the side of safety by ignoring them. Powering down
- * bridges should not be undertaken lightly.
- */
- if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
- return;
-
- /*
* Some drivers apparently write to these registers w/o updating our
* cached copy. No harm happens if we update the copy, so do so here
* so we can restore them. The COMMAND register is modified by the
* bus w/o updating the cache. This should represent the normally
- * writable portion of the 'defined' part of type 0 headers. In
- * theory we also need to save/restore the PCI capability structures
- * we know about, but apart from power we don't know any that are
- * writable.
+ * writable portion of the 'defined' part of type 0/1/2 headers.
*/
- dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
- dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
- dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
- dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
+ switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_NORMAL:
+ dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
+ dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
+ dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
+ dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
+ break;
+ case PCIM_HDRTYPE_BRIDGE:
+ dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
+ PCIR_SECLAT_1, 1);
+ dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
+ PCIR_SUBBUS_1, 1);
+ dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
+ PCIR_SECBUS_1, 1);
+ dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
+ PCIR_PRIBUS_1, 1);
+ dinfo->cfg.bridge.br_control = pci_read_config(dev,
+ PCIR_BRIDGECTL_1, 2);
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
+ PCIR_SECLAT_2, 1);
+ dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
+ PCIR_SUBBUS_2, 1);
+ dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
+ PCIR_SECBUS_2, 1);
+ dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
+ PCIR_PRIBUS_2, 1);
+ dinfo->cfg.bridge.br_control = pci_read_config(dev,
+ PCIR_BRIDGECTL_2, 2);
+ dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
+ dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
+ break;
+ }
+
+ if (dinfo->cfg.pcie.pcie_location != 0)
+ pci_cfg_save_pcie(dev, dinfo);
+
+ if (dinfo->cfg.pcix.pcix_location != 0)
+ pci_cfg_save_pcix(dev, dinfo);
+
+#ifdef PCI_IOV
+ if (dinfo->cfg.iov != NULL)
+ pci_iov_cfg_save(dev, dinfo);
+#endif
/*
* don't set the state for display devices, base peripherals and
@@ -4661,7 +5822,7 @@ pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
if (cls == PCIC_STORAGE)
return;
/*FALLTHROUGH*/
- case 2: /* Agressive about what to power down */
+ case 2: /* Aggressive about what to power down */
if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
cls == PCIC_BASEPERIPH)
return;
@@ -4698,3 +5859,52 @@ pci_restore_state(device_t dev)
dinfo = device_get_ivars(dev);
pci_cfg_restore(dev, dinfo);
}
+
+static int
+pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
+ uintptr_t *id)
+{
+
+ return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
+}
+
+/* Find the upstream port of a given PCI device in a root complex. */
+device_t
+pci_find_pcie_root_port(device_t dev)
+{
+ struct pci_devinfo *dinfo;
+ devclass_t pci_class;
+ device_t pcib, bus;
+
+ pci_class = devclass_find("pci");
+ KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
+ ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
+
+ /*
+ * Walk the bridge hierarchy until we find a PCI-e root
+ * port or a non-PCI device.
+ */
+ for (;;) {
+ bus = device_get_parent(dev);
+ KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
+ device_get_nameunit(dev)));
+
+ pcib = device_get_parent(bus);
+ KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
+ device_get_nameunit(bus)));
+
+ /*
+ * pcib's parent must be a PCI bus for this to be a
+ * PCI-PCI bridge.
+ */
+ if (device_get_devclass(device_get_parent(pcib)) != pci_class)
+ return (NULL);
+
+ dinfo = device_get_ivars(pcib);
+ if (dinfo->cfg.pcie.pcie_location != 0 &&
+ dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
+ return (pcib);
+
+ dev = pcib;
+ }
+}
diff --git a/freebsd/sys/dev/pci/pci_pci.c b/freebsd/sys/dev/pci/pci_pci.c
index 6c159aec..7d763dd9 100644
--- a/freebsd/sys/dev/pci/pci_pci.c
+++ b/freebsd/sys/dev/pci/pci_pci.c
@@ -37,6 +37,8 @@ __FBSDID("$FreeBSD$");
* PCI:PCI bridge support.
*/
+#include <rtems/bsd/local/opt_pci.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
@@ -45,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rman.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
+#include <sys/taskqueue.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
@@ -58,17 +61,35 @@ static int pcib_suspend(device_t dev);
static int pcib_resume(device_t dev);
static int pcib_power_for_sleep(device_t pcib, device_t dev,
int *pstate);
+static int pcib_ari_get_id(device_t pcib, device_t dev,
+ enum pci_id_type type, uintptr_t *id);
+static uint32_t pcib_read_config(device_t dev, u_int b, u_int s,
+ u_int f, u_int reg, int width);
+static void pcib_write_config(device_t dev, u_int b, u_int s,
+ u_int f, u_int reg, uint32_t val, int width);
+static int pcib_ari_maxslots(device_t dev);
+static int pcib_ari_maxfuncs(device_t dev);
+static int pcib_try_enable_ari(device_t pcib, device_t dev);
+static int pcib_ari_enabled(device_t pcib);
+static void pcib_ari_decode_rid(device_t pcib, uint16_t rid,
+ int *bus, int *slot, int *func);
+#ifdef PCI_HP
+static void pcib_pcie_ab_timeout(void *arg);
+static void pcib_pcie_cc_timeout(void *arg);
+static void pcib_pcie_dll_timeout(void *arg);
+#endif
static device_method_t pcib_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pcib_probe),
DEVMETHOD(device_attach, pcib_attach),
- DEVMETHOD(device_detach, bus_generic_detach),
+ DEVMETHOD(device_detach, pcib_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
DEVMETHOD(device_suspend, pcib_suspend),
DEVMETHOD(device_resume, pcib_resume),
/* Bus interface */
+ DEVMETHOD(bus_child_present, pcib_child_present),
DEVMETHOD(bus_read_ivar, pcib_read_ivar),
DEVMETHOD(bus_write_ivar, pcib_write_ivar),
DEVMETHOD(bus_alloc_resource, pcib_alloc_resource),
@@ -85,7 +106,8 @@ static device_method_t pcib_methods[] = {
DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr),
/* pcib interface */
- DEVMETHOD(pcib_maxslots, pcib_maxslots),
+ DEVMETHOD(pcib_maxslots, pcib_ari_maxslots),
+ DEVMETHOD(pcib_maxfuncs, pcib_ari_maxfuncs),
DEVMETHOD(pcib_read_config, pcib_read_config),
DEVMETHOD(pcib_write_config, pcib_write_config),
DEVMETHOD(pcib_route_interrupt, pcib_route_interrupt),
@@ -95,6 +117,10 @@ static device_method_t pcib_methods[] = {
DEVMETHOD(pcib_release_msix, pcib_release_msix),
DEVMETHOD(pcib_map_msi, pcib_map_msi),
DEVMETHOD(pcib_power_for_sleep, pcib_power_for_sleep),
+ DEVMETHOD(pcib_get_id, pcib_ari_get_id),
+ DEVMETHOD(pcib_try_enable_ari, pcib_try_enable_ari),
+ DEVMETHOD(pcib_ari_enabled, pcib_ari_enabled),
+ DEVMETHOD(pcib_decode_rid, pcib_ari_decode_rid),
DEVMETHOD_END
};
@@ -104,11 +130,12 @@ static devclass_t pcib_devclass;
DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc));
DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, NULL, NULL);
-#ifdef NEW_PCIB
+#if defined(NEW_PCIB) || defined(PCI_HP)
SYSCTL_DECL(_hw_pci);
+#endif
+#ifdef NEW_PCIB
static int pci_clear_pcib;
-TUNABLE_INT("hw.pci.clear_pcib", &pci_clear_pcib);
SYSCTL_INT(_hw_pci, OID_AUTO, clear_pcib, CTLFLAG_RDTUN, &pci_clear_pcib, 0,
"Clear firmware-assigned resources for PCI-PCI bridge I/O windows.");
@@ -121,6 +148,10 @@ pcib_is_resource_managed(struct pcib_softc *sc, int type, struct resource *r)
{
switch (type) {
+#ifdef PCI_RES_BUS
+ case PCI_RES_BUS:
+ return (rman_is_region_manager(r, &sc->bus.rman));
+#endif
case SYS_RES_IOPORT:
return (rman_is_region_manager(r, &sc->io.rman));
case SYS_RES_MEMORY:
@@ -195,9 +226,10 @@ pcib_write_windows(struct pcib_softc *sc, int mask)
* ISA alias range.
*/
static int
-pcib_is_isa_range(struct pcib_softc *sc, u_long start, u_long end, u_long count)
+pcib_is_isa_range(struct pcib_softc *sc, rman_res_t start, rman_res_t end,
+ rman_res_t count)
{
- u_long next_alias;
+ rman_res_t next_alias;
if (!(sc->bridgectl & PCIB_BCR_ISA_ENABLE))
return (0);
@@ -229,7 +261,7 @@ pcib_is_isa_range(struct pcib_softc *sc, u_long start, u_long end, u_long count)
alias:
if (bootverbose)
device_printf(sc->dev,
- "I/O range %#lx-%#lx overlaps with an ISA alias\n", start,
+ "I/O range %#jx-%#jx overlaps with an ISA alias\n", start,
end);
return (1);
}
@@ -249,7 +281,7 @@ pcib_add_window_resources(struct pcib_window *w, struct resource **res,
free(w->res, M_DEVBUF);
w->res = newarray;
w->count += count;
-
+
for (i = 0; i < count; i++) {
error = rman_manage_region(&w->rman, rman_get_start(res[i]),
rman_get_end(res[i]));
@@ -258,13 +290,13 @@ pcib_add_window_resources(struct pcib_window *w, struct resource **res,
}
}
-typedef void (nonisa_callback)(u_long start, u_long end, void *arg);
+typedef void (nonisa_callback)(rman_res_t start, rman_res_t end, void *arg);
static void
-pcib_walk_nonisa_ranges(u_long start, u_long end, nonisa_callback *cb,
+pcib_walk_nonisa_ranges(rman_res_t start, rman_res_t end, nonisa_callback *cb,
void *arg)
{
- u_long next_end;
+ rman_res_t next_end;
/*
* If start is within an ISA alias range, move up to the start
@@ -292,7 +324,7 @@ pcib_walk_nonisa_ranges(u_long start, u_long end, nonisa_callback *cb,
}
static void
-count_ranges(u_long start, u_long end, void *arg)
+count_ranges(rman_res_t start, rman_res_t end, void *arg)
{
int *countp;
@@ -307,7 +339,7 @@ struct alloc_state {
};
static void
-alloc_ranges(u_long start, u_long end, void *arg)
+alloc_ranges(rman_res_t start, rman_res_t end, void *arg)
{
struct alloc_state *as;
struct pcib_window *w;
@@ -321,7 +353,7 @@ alloc_ranges(u_long start, u_long end, void *arg)
rid = w->reg;
if (bootverbose)
device_printf(as->sc->dev,
- "allocating non-ISA range %#lx-%#lx\n", start, end);
+ "allocating non-ISA range %#jx-%#jx\n", start, end);
as->res[as->count] = bus_alloc_resource(as->sc->dev, SYS_RES_IOPORT,
&rid, start, end, end - start + 1, 0);
if (as->res[as->count] == NULL)
@@ -331,7 +363,7 @@ alloc_ranges(u_long start, u_long end, void *arg)
}
static int
-pcib_alloc_nonisa_ranges(struct pcib_softc *sc, u_long start, u_long end)
+pcib_alloc_nonisa_ranges(struct pcib_softc *sc, rman_res_t start, rman_res_t end)
{
struct alloc_state as;
int i, new_count;
@@ -370,8 +402,8 @@ pcib_alloc_window(struct pcib_softc *sc, struct pcib_window *w, int type,
char buf[64];
int error, rid;
- if (max_address != (u_long)max_address)
- max_address = ~0ul;
+ if (max_address != (rman_res_t)max_address)
+ max_address = ~0;
w->rman.rm_start = 0;
w->rman.rm_end = max_address;
w->rman.rm_type = RMAN_ARRAY;
@@ -425,16 +457,7 @@ pcib_probe_windows(struct pcib_softc *sc)
dev = sc->dev;
if (pci_clear_pcib) {
- pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1);
- pci_write_config(dev, PCIR_IOBASEH_1, 0xffff, 2);
- pci_write_config(dev, PCIR_IOLIMITL_1, 0, 1);
- pci_write_config(dev, PCIR_IOLIMITH_1, 0, 2);
- pci_write_config(dev, PCIR_MEMBASE_1, 0xffff, 2);
- pci_write_config(dev, PCIR_MEMLIMIT_1, 0, 2);
- pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2);
- pci_write_config(dev, PCIR_PMBASEH_1, 0xffffffff, 4);
- pci_write_config(dev, PCIR_PMLIMITL_1, 0, 2);
- pci_write_config(dev, PCIR_PMLIMITH_1, 0, 4);
+ pcib_bridge_init(dev);
}
/* Determine if the I/O port window is implemented. */
@@ -525,6 +548,231 @@ pcib_probe_windows(struct pcib_softc *sc)
}
}
+static void
+pcib_release_window(struct pcib_softc *sc, struct pcib_window *w, int type)
+{
+ device_t dev;
+ int error, i;
+
+ if (!w->valid)
+ return;
+
+ dev = sc->dev;
+ error = rman_fini(&w->rman);
+ if (error) {
+ device_printf(dev, "failed to release %s rman\n", w->name);
+ return;
+ }
+ free(__DECONST(char *, w->rman.rm_descr), M_DEVBUF);
+
+ for (i = 0; i < w->count; i++) {
+ error = bus_free_resource(dev, type, w->res[i]);
+ if (error)
+ device_printf(dev,
+ "failed to release %s resource: %d\n", w->name,
+ error);
+ }
+ free(w->res, M_DEVBUF);
+}
+
+static void
+pcib_free_windows(struct pcib_softc *sc)
+{
+
+ pcib_release_window(sc, &sc->pmem, SYS_RES_MEMORY);
+ pcib_release_window(sc, &sc->mem, SYS_RES_MEMORY);
+ pcib_release_window(sc, &sc->io, SYS_RES_IOPORT);
+}
+
+#ifdef PCI_RES_BUS
+/*
+ * Allocate a suitable secondary bus for this bridge if needed and
+ * initialize the resource manager for the secondary bus range. Note
+ * that the minimum count is a desired value and this may allocate a
+ * smaller range.
+ */
+void
+pcib_setup_secbus(device_t dev, struct pcib_secbus *bus, int min_count)
+{
+ char buf[64];
+ int error, rid, sec_reg;
+
+ switch (pci_read_config(dev, PCIR_HDRTYPE, 1) & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_BRIDGE:
+ sec_reg = PCIR_SECBUS_1;
+ bus->sub_reg = PCIR_SUBBUS_1;
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ sec_reg = PCIR_SECBUS_2;
+ bus->sub_reg = PCIR_SUBBUS_2;
+ break;
+ default:
+ panic("not a PCI bridge");
+ }
+ bus->sec = pci_read_config(dev, sec_reg, 1);
+ bus->sub = pci_read_config(dev, bus->sub_reg, 1);
+ bus->dev = dev;
+ bus->rman.rm_start = 0;
+ bus->rman.rm_end = PCI_BUSMAX;
+ bus->rman.rm_type = RMAN_ARRAY;
+ snprintf(buf, sizeof(buf), "%s bus numbers", device_get_nameunit(dev));
+ bus->rman.rm_descr = strdup(buf, M_DEVBUF);
+ error = rman_init(&bus->rman);
+ if (error)
+ panic("Failed to initialize %s bus number rman",
+ device_get_nameunit(dev));
+
+ /*
+ * Allocate a bus range. This will return an existing bus range
+ * if one exists, or a new bus range if one does not.
+ */
+ rid = 0;
+ bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid,
+ min_count, 0);
+ if (bus->res == NULL) {
+ /*
+ * Fall back to just allocating a range of a single bus
+ * number.
+ */
+ bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid,
+ 1, 0);
+ } else if (rman_get_size(bus->res) < min_count)
+ /*
+ * Attempt to grow the existing range to satisfy the
+ * minimum desired count.
+ */
+ (void)bus_adjust_resource(dev, PCI_RES_BUS, bus->res,
+ rman_get_start(bus->res), rman_get_start(bus->res) +
+ min_count - 1);
+
+ /*
+ * Add the initial resource to the rman.
+ */
+ if (bus->res != NULL) {
+ error = rman_manage_region(&bus->rman, rman_get_start(bus->res),
+ rman_get_end(bus->res));
+ if (error)
+ panic("Failed to add resource to rman");
+ bus->sec = rman_get_start(bus->res);
+ bus->sub = rman_get_end(bus->res);
+ }
+}
+
+void
+pcib_free_secbus(device_t dev, struct pcib_secbus *bus)
+{
+ int error;
+
+ error = rman_fini(&bus->rman);
+ if (error) {
+ device_printf(dev, "failed to release bus number rman\n");
+ return;
+ }
+ free(__DECONST(char *, bus->rman.rm_descr), M_DEVBUF);
+
+ error = bus_free_resource(dev, PCI_RES_BUS, bus->res);
+ if (error)
+ device_printf(dev,
+ "failed to release bus numbers resource: %d\n", error);
+}
+
+static struct resource *
+pcib_suballoc_bus(struct pcib_secbus *bus, device_t child, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+ struct resource *res;
+
+ res = rman_reserve_resource(&bus->rman, start, end, count, flags,
+ child);
+ if (res == NULL)
+ return (NULL);
+
+ if (bootverbose)
+ device_printf(bus->dev,
+ "allocated bus range (%ju-%ju) for rid %d of %s\n",
+ rman_get_start(res), rman_get_end(res), *rid,
+ pcib_child_name(child));
+ rman_set_rid(res, *rid);
+ return (res);
+}
+
+/*
+ * Attempt to grow the secondary bus range. This is much simpler than
+ * for I/O windows as the range can only be grown by increasing
+ * subbus.
+ */
+static int
+pcib_grow_subbus(struct pcib_secbus *bus, rman_res_t new_end)
+{
+ rman_res_t old_end;
+ int error;
+
+ old_end = rman_get_end(bus->res);
+ KASSERT(new_end > old_end, ("attempt to shrink subbus"));
+ error = bus_adjust_resource(bus->dev, PCI_RES_BUS, bus->res,
+ rman_get_start(bus->res), new_end);
+ if (error)
+ return (error);
+ if (bootverbose)
+ device_printf(bus->dev, "grew bus range to %ju-%ju\n",
+ rman_get_start(bus->res), rman_get_end(bus->res));
+ error = rman_manage_region(&bus->rman, old_end + 1,
+ rman_get_end(bus->res));
+ if (error)
+ panic("Failed to add resource to rman");
+ bus->sub = rman_get_end(bus->res);
+ pci_write_config(bus->dev, bus->sub_reg, bus->sub, 1);
+ return (0);
+}
+
+struct resource *
+pcib_alloc_subbus(struct pcib_secbus *bus, device_t child, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
+{
+ struct resource *res;
+ rman_res_t start_free, end_free, new_end;
+
+ /*
+ * First, see if the request can be satisified by the existing
+ * bus range.
+ */
+ res = pcib_suballoc_bus(bus, child, rid, start, end, count, flags);
+ if (res != NULL)
+ return (res);
+
+ /*
+ * Figure out a range to grow the bus range. First, find the
+ * first bus number after the last allocated bus in the rman and
+ * enforce that as a minimum starting point for the range.
+ */
+ if (rman_last_free_region(&bus->rman, &start_free, &end_free) != 0 ||
+ end_free != bus->sub)
+ start_free = bus->sub + 1;
+ if (start_free < start)
+ start_free = start;
+ new_end = start_free + count - 1;
+
+ /*
+ * See if this new range would satisfy the request if it
+ * succeeds.
+ */
+ if (new_end > end)
+ return (NULL);
+
+ /* Finally, attempt to grow the existing resource. */
+ if (bootverbose) {
+ device_printf(bus->dev,
+ "attempting to grow bus range for %ju buses\n", count);
+ printf("\tback candidate range: %ju-%ju\n", start_free,
+ new_end);
+ }
+ if (pcib_grow_subbus(bus, new_end) == 0)
+ return (pcib_suballoc_bus(bus, child, rid, start, end, count,
+ flags));
+ return (NULL);
+}
+#endif
+
#else
/*
@@ -604,7 +852,7 @@ pcib_get_mem_decode(struct pcib_softc *sc)
sc->pmembase = PCI_PPBMEMBASE(0, pmemlow);
pmemlow = pci_read_config(dev, PCIR_PMLIMITL_1, 2);
- if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
+ if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
sc->pmemlimit = PCI_PPBMEMLIMIT(
pci_read_config(dev, PCIR_PMLIMITH_1, 4), pmemlow);
else
@@ -659,26 +907,564 @@ pcib_set_mem_decode(struct pcib_softc *sc)
}
#endif
+#ifdef PCI_HP
+/*
+ * PCI-express HotPlug support.
+ */
+static int pci_enable_pcie_hp = 1;
+SYSCTL_INT(_hw_pci, OID_AUTO, enable_pcie_hp, CTLFLAG_RDTUN,
+ &pci_enable_pcie_hp, 0,
+ "Enable support for native PCI-express HotPlug.");
+
+static void
+pcib_probe_hotplug(struct pcib_softc *sc)
+{
+ device_t dev;
+ uint16_t link_sta, slot_sta;
+
+ if (!pci_enable_pcie_hp)
+ return;
+
+ dev = sc->dev;
+ if (pci_find_cap(dev, PCIY_EXPRESS, NULL) != 0)
+ return;
+
+ if (!(pcie_read_config(dev, PCIER_FLAGS, 2) & PCIEM_FLAGS_SLOT))
+ return;
+
+ sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4);
+ sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4);
+
+ if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC) == 0)
+ return;
+
+ /*
+ * Some devices report that they have an MRL when they actually
+ * do not. Since they always report that the MRL is open, child
+ * devices would be ignored. Try to detect these devices and
+ * ignore their claim of HotPlug support.
+ *
+ * If there is an open MRL but the Data Link Layer is active,
+ * the MRL is not real.
+ */
+ if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0 &&
+ (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) != 0) {
+ link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
+ slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
+ if ((slot_sta & PCIEM_SLOT_STA_MRLSS) != 0 &&
+ (link_sta & PCIEM_LINK_STA_DL_ACTIVE) != 0) {
+ return;
+ }
+ }
+
+ sc->flags |= PCIB_HOTPLUG;
+}
+
+/*
+ * Send a HotPlug command to the slot control register. If this slot
+ * uses command completion interrupts and a previous command is still
+ * in progress, then the command is dropped. Once the previous
+ * command completes or times out, pcib_pcie_hotplug_update() will be
+ * invoked to post a new command based on the slot's state at that
+ * time.
+ */
+static void
+pcib_pcie_hotplug_command(struct pcib_softc *sc, uint16_t val, uint16_t mask)
+{
+ device_t dev;
+ uint16_t ctl, new;
+
+ dev = sc->dev;
+
+ if (sc->flags & PCIB_HOTPLUG_CMD_PENDING)
+ return;
+
+ ctl = pcie_read_config(dev, PCIER_SLOT_CTL, 2);
+ new = (ctl & ~mask) | val;
+ if (new == ctl)
+ return;
+ if (bootverbose)
+ device_printf(dev, "HotPlug command: %04x -> %04x\n", ctl, new);
+ pcie_write_config(dev, PCIER_SLOT_CTL, new, 2);
+ if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS) &&
+ (ctl & new) & PCIEM_SLOT_CTL_CCIE) {
+ sc->flags |= PCIB_HOTPLUG_CMD_PENDING;
+ if (!cold)
+ callout_reset(&sc->pcie_cc_timer, hz,
+ pcib_pcie_cc_timeout, sc);
+ }
+}
+
+static void
+pcib_pcie_hotplug_command_completed(struct pcib_softc *sc)
+{
+ device_t dev;
+
+ dev = sc->dev;
+
+ if (bootverbose)
+ device_printf(dev, "Command Completed\n");
+ if (!(sc->flags & PCIB_HOTPLUG_CMD_PENDING))
+ return;
+ callout_stop(&sc->pcie_cc_timer);
+ sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING;
+ wakeup(sc);
+}
+
+/*
+ * Returns true if a card is fully inserted from the user's
+ * perspective. It may not yet be ready for access, but the driver
+ * can now start enabling access if necessary.
+ */
+static bool
+pcib_hotplug_inserted(struct pcib_softc *sc)
+{
+
+ /* Pretend the card isn't present if a detach is forced. */
+ if (sc->flags & PCIB_DETACHING)
+ return (false);
+
+ /* Card must be present in the slot. */
+ if ((sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS) == 0)
+ return (false);
+
+ /* A power fault implicitly turns off power to the slot. */
+ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD)
+ return (false);
+
+ /* If the MRL is disengaged, the slot is powered off. */
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP &&
+ (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS) != 0)
+ return (false);
+
+ return (true);
+}
+
+/*
+ * Returns -1 if the card is fully inserted, powered, and ready for
+ * access. Otherwise, returns 0.
+ */
+static int
+pcib_hotplug_present(struct pcib_softc *sc)
+{
+
+ /* Card must be inserted. */
+ if (!pcib_hotplug_inserted(sc))
+ return (0);
+
+ /*
+ * Require the Electromechanical Interlock to be engaged if
+ * present.
+ */
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP &&
+ (sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS) == 0)
+ return (0);
+
+ /* Require the Data Link Layer to be active. */
+ if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) {
+ if (!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE))
+ return (0);
+ }
+
+ return (-1);
+}
+
+static void
+pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
+ bool schedule_task)
+{
+ bool card_inserted, ei_engaged;
+
+ /* Clear DETACHING if Presence Detect has cleared. */
+ if ((sc->pcie_slot_sta & (PCIEM_SLOT_STA_PDC | PCIEM_SLOT_STA_PDS)) ==
+ PCIEM_SLOT_STA_PDC)
+ sc->flags &= ~PCIB_DETACHING;
+
+ card_inserted = pcib_hotplug_inserted(sc);
+
+ /* Turn the power indicator on if a card is inserted. */
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PIP) {
+ mask |= PCIEM_SLOT_CTL_PIC;
+ if (card_inserted)
+ val |= PCIEM_SLOT_CTL_PI_ON;
+ else if (sc->flags & PCIB_DETACH_PENDING)
+ val |= PCIEM_SLOT_CTL_PI_BLINK;
+ else
+ val |= PCIEM_SLOT_CTL_PI_OFF;
+ }
+
+ /* Turn the power on via the Power Controller if a card is inserted. */
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP) {
+ mask |= PCIEM_SLOT_CTL_PCC;
+ if (card_inserted)
+ val |= PCIEM_SLOT_CTL_PC_ON;
+ else
+ val |= PCIEM_SLOT_CTL_PC_OFF;
+ }
+
+ /*
+ * If a card is inserted, enable the Electromechanical
+ * Interlock. If a card is not inserted (or we are in the
+ * process of detaching), disable the Electromechanical
+ * Interlock.
+ */
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP) {
+ mask |= PCIEM_SLOT_CTL_EIC;
+ ei_engaged = (sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS) != 0;
+ if (card_inserted != ei_engaged)
+ val |= PCIEM_SLOT_CTL_EIC;
+ }
+
+ /*
+ * Start a timer to see if the Data Link Layer times out.
+ * Note that we only start the timer if Presence Detect or MRL Sensor
+ * changed on this interrupt. Stop any scheduled timer if
+ * the Data Link Layer is active.
+ */
+ if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) {
+ if (card_inserted &&
+ !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) &&
+ sc->pcie_slot_sta &
+ (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) {
+ if (cold)
+ device_printf(sc->dev,
+ "Data Link Layer inactive\n");
+ else
+ callout_reset(&sc->pcie_dll_timer, hz,
+ pcib_pcie_dll_timeout, sc);
+ } else if (sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE)
+ callout_stop(&sc->pcie_dll_timer);
+ }
+
+ pcib_pcie_hotplug_command(sc, val, mask);
+
+ /*
+ * During attach the child "pci" device is added synchronously;
+ * otherwise, the task is scheduled to manage the child
+ * device.
+ */
+ if (schedule_task &&
+ (pcib_hotplug_present(sc) != 0) != (sc->child != NULL))
+ taskqueue_enqueue(taskqueue_thread, &sc->pcie_hp_task);
+}
+
+static void
+pcib_pcie_intr(void *arg)
+{
+ struct pcib_softc *sc;
+ device_t dev;
+
+ sc = arg;
+ dev = sc->dev;
+ sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
+
+ /* Clear the events just reported. */
+ pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2);
+
+ if (bootverbose)
+ device_printf(dev, "HotPlug interrupt: %#x\n",
+ sc->pcie_slot_sta);
+
+ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_ABP) {
+ if (sc->flags & PCIB_DETACH_PENDING) {
+ device_printf(dev,
+ "Attention Button Pressed: Detach Cancelled\n");
+ sc->flags &= ~PCIB_DETACH_PENDING;
+ callout_stop(&sc->pcie_ab_timer);
+ } else {
+ device_printf(dev,
+ "Attention Button Pressed: Detaching in 5 seconds\n");
+ sc->flags |= PCIB_DETACH_PENDING;
+ callout_reset(&sc->pcie_ab_timer, 5 * hz,
+ pcib_pcie_ab_timeout, sc);
+ }
+ }
+ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD)
+ device_printf(dev, "Power Fault Detected\n");
+ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSC)
+ device_printf(dev, "MRL Sensor Changed to %s\n",
+ sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS ? "open" :
+ "closed");
+ if (bootverbose && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC)
+ device_printf(dev, "Presence Detect Changed to %s\n",
+ sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS ? "card present" :
+ "empty");
+ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_CC)
+ pcib_pcie_hotplug_command_completed(sc);
+ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_DLLSC) {
+ sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
+ if (bootverbose)
+ device_printf(dev,
+ "Data Link Layer State Changed to %s\n",
+ sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE ?
+ "active" : "inactive");
+ }
+
+ pcib_pcie_hotplug_update(sc, 0, 0, true);
+}
+
+static void
+pcib_pcie_hotplug_task(void *context, int pending)
+{
+ struct pcib_softc *sc;
+ device_t dev;
+
+ sc = context;
+ mtx_lock(&Giant);
+ dev = sc->dev;
+ if (pcib_hotplug_present(sc) != 0) {
+ if (sc->child == NULL) {
+ sc->child = device_add_child(dev, "pci", -1);
+ bus_generic_attach(dev);
+ }
+ } else {
+ if (sc->child != NULL) {
+ if (device_delete_child(dev, sc->child) == 0)
+ sc->child = NULL;
+ }
+ }
+ mtx_unlock(&Giant);
+}
+
+static void
+pcib_pcie_ab_timeout(void *arg)
+{
+ struct pcib_softc *sc;
+ device_t dev;
+
+ sc = arg;
+ dev = sc->dev;
+ mtx_assert(&Giant, MA_OWNED);
+ if (sc->flags & PCIB_DETACH_PENDING) {
+ sc->flags |= PCIB_DETACHING;
+ sc->flags &= ~PCIB_DETACH_PENDING;
+ pcib_pcie_hotplug_update(sc, 0, 0, true);
+ }
+}
+
+static void
+pcib_pcie_cc_timeout(void *arg)
+{
+ struct pcib_softc *sc;
+ device_t dev;
+ uint16_t sta;
+
+ sc = arg;
+ dev = sc->dev;
+ mtx_assert(&Giant, MA_OWNED);
+ sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
+ if (!(sta & PCIEM_SLOT_STA_CC)) {
+ device_printf(dev,
+ "HotPlug Command Timed Out - forcing detach\n");
+ sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING);
+ sc->flags |= PCIB_DETACHING;
+ pcib_pcie_hotplug_update(sc, 0, 0, true);
+ } else {
+ device_printf(dev,
+ "Missed HotPlug interrupt waiting for Command Completion\n");
+ pcib_pcie_intr(sc);
+ }
+}
+
+static void
+pcib_pcie_dll_timeout(void *arg)
+{
+ struct pcib_softc *sc;
+ device_t dev;
+ uint16_t sta;
+
+ sc = arg;
+ dev = sc->dev;
+ mtx_assert(&Giant, MA_OWNED);
+ sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
+ if (!(sta & PCIEM_LINK_STA_DL_ACTIVE)) {
+ device_printf(dev,
+ "Timed out waiting for Data Link Layer Active\n");
+ sc->flags |= PCIB_DETACHING;
+ pcib_pcie_hotplug_update(sc, 0, 0, true);
+ } else if (sta != sc->pcie_link_sta) {
+ device_printf(dev,
+ "Missed HotPlug interrupt waiting for DLL Active\n");
+ pcib_pcie_intr(sc);
+ }
+}
+
+static int
+pcib_alloc_pcie_irq(struct pcib_softc *sc)
+{
+ device_t dev;
+ int count, error, rid;
+
+ rid = -1;
+ dev = sc->dev;
+
+ /*
+ * For simplicity, only use MSI-X if there is a single message.
+ * To support a device with multiple messages we would have to
+ * use remap intr if the MSI number is not 0.
+ */
+ count = pci_msix_count(dev);
+ if (count == 1) {
+ error = pci_alloc_msix(dev, &count);
+ if (error == 0)
+ rid = 1;
+ }
+
+ if (rid < 0 && pci_msi_count(dev) > 0) {
+ count = 1;
+ error = pci_alloc_msi(dev, &count);
+ if (error == 0)
+ rid = 1;
+ }
+
+ if (rid < 0)
+ rid = 0;
+
+ sc->pcie_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
+ RF_ACTIVE);
+ if (sc->pcie_irq == NULL) {
+ device_printf(dev,
+ "Failed to allocate interrupt for PCI-e events\n");
+ if (rid > 0)
+ pci_release_msi(dev);
+ return (ENXIO);
+ }
+
+ error = bus_setup_intr(dev, sc->pcie_irq, INTR_TYPE_MISC,
+ NULL, pcib_pcie_intr, sc, &sc->pcie_ihand);
+ if (error) {
+ device_printf(dev, "Failed to setup PCI-e interrupt handler\n");
+ bus_release_resource(dev, SYS_RES_IRQ, rid, sc->pcie_irq);
+ if (rid > 0)
+ pci_release_msi(dev);
+ return (error);
+ }
+ return (0);
+}
+
+static int
+pcib_release_pcie_irq(struct pcib_softc *sc)
+{
+ device_t dev;
+ int error;
+
+ dev = sc->dev;
+ error = bus_teardown_intr(dev, sc->pcie_irq, sc->pcie_ihand);
+ if (error)
+ return (error);
+ error = bus_free_resource(dev, SYS_RES_IRQ, sc->pcie_irq);
+ if (error)
+ return (error);
+ return (pci_release_msi(dev));
+}
+
+static void
+pcib_setup_hotplug(struct pcib_softc *sc)
+{
+ device_t dev;
+ uint16_t mask, val;
+
+ dev = sc->dev;
+ callout_init(&sc->pcie_ab_timer, 0);
+ callout_init(&sc->pcie_cc_timer, 0);
+ callout_init(&sc->pcie_dll_timer, 0);
+ TASK_INIT(&sc->pcie_hp_task, 0, pcib_pcie_hotplug_task, sc);
+
+ /* Allocate IRQ. */
+ if (pcib_alloc_pcie_irq(sc) != 0)
+ return;
+
+ sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
+ sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
+
+ /* Clear any events previously pending. */
+ pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2);
+
+ /* Enable HotPlug events. */
+ mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE |
+ PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE |
+ PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE;
+ val = PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_HPIE;
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_APB)
+ val |= PCIEM_SLOT_CTL_ABPE;
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP)
+ val |= PCIEM_SLOT_CTL_PFDE;
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP)
+ val |= PCIEM_SLOT_CTL_MRLSCE;
+ if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS))
+ val |= PCIEM_SLOT_CTL_CCIE;
+ if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE)
+ val |= PCIEM_SLOT_CTL_DLLSCE;
+
+ /* Turn the attention indicator off. */
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) {
+ mask |= PCIEM_SLOT_CTL_AIC;
+ val |= PCIEM_SLOT_CTL_AI_OFF;
+ }
+
+ pcib_pcie_hotplug_update(sc, val, mask, false);
+}
+
+static int
+pcib_detach_hotplug(struct pcib_softc *sc)
+{
+ uint16_t mask, val;
+ int error;
+
+ /* Disable the card in the slot and force it to detach. */
+ if (sc->flags & PCIB_DETACH_PENDING) {
+ sc->flags &= ~PCIB_DETACH_PENDING;
+ callout_stop(&sc->pcie_ab_timer);
+ }
+ sc->flags |= PCIB_DETACHING;
+
+ if (sc->flags & PCIB_HOTPLUG_CMD_PENDING) {
+ callout_stop(&sc->pcie_cc_timer);
+ tsleep(sc, 0, "hpcmd", hz);
+ sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING;
+ }
+
+ /* Disable HotPlug events. */
+ mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE |
+ PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE |
+ PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE;
+ val = 0;
+
+ /* Turn the attention indicator off. */
+ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) {
+ mask |= PCIEM_SLOT_CTL_AIC;
+ val |= PCIEM_SLOT_CTL_AI_OFF;
+ }
+
+ pcib_pcie_hotplug_update(sc, val, mask, false);
+
+ error = pcib_release_pcie_irq(sc);
+ if (error)
+ return (error);
+ taskqueue_drain(taskqueue_thread, &sc->pcie_hp_task);
+ callout_drain(&sc->pcie_ab_timer);
+ callout_drain(&sc->pcie_cc_timer);
+ callout_drain(&sc->pcie_dll_timer);
+ return (0);
+}
+#endif
+
/*
* Get current bridge configuration.
*/
static void
pcib_cfg_save(struct pcib_softc *sc)
{
+#ifndef NEW_PCIB
device_t dev;
+ uint16_t command;
dev = sc->dev;
- sc->command = pci_read_config(dev, PCIR_COMMAND, 2);
- sc->pribus = pci_read_config(dev, PCIR_PRIBUS_1, 1);
- sc->secbus = pci_read_config(dev, PCIR_SECBUS_1, 1);
- sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1);
- sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
- sc->seclat = pci_read_config(dev, PCIR_SECLAT_1, 1);
-#ifndef NEW_PCIB
- if (sc->command & PCIM_CMD_PORTEN)
+ command = pci_read_config(dev, PCIR_COMMAND, 2);
+ if (command & PCIM_CMD_PORTEN)
pcib_get_io_decode(sc);
- if (sc->command & PCIM_CMD_MEMEN)
+ if (command & PCIM_CMD_MEMEN)
pcib_get_mem_decode(sc);
#endif
}
@@ -690,21 +1476,18 @@ static void
pcib_cfg_restore(struct pcib_softc *sc)
{
device_t dev;
-
+#ifndef NEW_PCIB
+ uint16_t command;
+#endif
dev = sc->dev;
- pci_write_config(dev, PCIR_COMMAND, sc->command, 2);
- pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1);
- pci_write_config(dev, PCIR_SECBUS_1, sc->secbus, 1);
- pci_write_config(dev, PCIR_SUBBUS_1, sc->subbus, 1);
- pci_write_config(dev, PCIR_BRIDGECTL_1, sc->bridgectl, 2);
- pci_write_config(dev, PCIR_SECLAT_1, sc->seclat, 1);
#ifdef NEW_PCIB
pcib_write_windows(sc, WIN_IO | WIN_MEM | WIN_PMEM);
#else
- if (sc->command & PCIM_CMD_PORTEN)
+ command = pci_read_config(dev, PCIR_COMMAND, 2);
+ if (command & PCIM_CMD_PORTEN)
pcib_set_io_decode(sc);
- if (sc->command & PCIM_CMD_MEMEN)
+ if (command & PCIM_CMD_MEMEN)
pcib_set_mem_decode(sc);
#endif
}
@@ -738,10 +1521,21 @@ pcib_attach_common(device_t dev)
* Get current bridge configuration.
*/
sc->domain = pci_get_domain(dev);
- sc->secstat = pci_read_config(dev, PCIR_SECSTAT_1, 2);
+#if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
+ sc->bus.sec = pci_read_config(dev, PCIR_SECBUS_1, 1);
+ sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1);
+#endif
+ sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
pcib_cfg_save(sc);
/*
+ * The primary bus register should always be the bus of the
+ * parent.
+ */
+ sc->pribus = pci_get_bus(dev);
+ pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1);
+
+ /*
* Setup sysctl reporting nodes
*/
sctx = device_get_sysctl_ctx(dev);
@@ -751,65 +1545,68 @@ pcib_attach_common(device_t dev)
SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "pribus",
CTLFLAG_RD, &sc->pribus, 0, "Primary bus number");
SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "secbus",
- CTLFLAG_RD, &sc->secbus, 0, "Secondary bus number");
+ CTLFLAG_RD, &sc->bus.sec, 0, "Secondary bus number");
SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus",
- CTLFLAG_RD, &sc->subbus, 0, "Subordinate bus number");
+ CTLFLAG_RD, &sc->bus.sub, 0, "Subordinate bus number");
/*
* Quirk handling.
*/
switch (pci_get_devid(dev)) {
+#if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
case 0x12258086: /* Intel 82454KX/GX (Orion) */
{
uint8_t supbus;
supbus = pci_read_config(dev, 0x41, 1);
if (supbus != 0xff) {
- sc->secbus = supbus + 1;
- sc->subbus = supbus + 1;
+ sc->bus.sec = supbus + 1;
+ sc->bus.sub = supbus + 1;
}
break;
}
+#endif
/*
* The i82380FB mobile docking controller is a PCI-PCI bridge,
* and it is a subtractive bridge. However, the ProgIf is wrong
* so the normal setting of PCIB_SUBTRACTIVE bit doesn't
- * happen. There's also a Toshiba bridge that behaves this
- * way.
+ * happen. There are also Toshiba and Cavium ThunderX bridges
+ * that behave this way.
*/
+ case 0xa002177d: /* Cavium ThunderX */
case 0x124b8086: /* Intel 82380FB Mobile */
case 0x060513d7: /* Toshiba ???? */
sc->flags |= PCIB_SUBTRACTIVE;
break;
-#ifndef __rtems__
+#if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
/* Compaq R3000 BIOS sets wrong subordinate bus number. */
case 0x00dd10de:
{
char *cp;
- if ((cp = getenv("smbios.planar.maker")) == NULL)
+ if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
break;
if (strncmp(cp, "Compal", 6) != 0) {
freeenv(cp);
break;
}
freeenv(cp);
- if ((cp = getenv("smbios.planar.product")) == NULL)
+ if ((cp = kern_getenv("smbios.planar.product")) == NULL)
break;
if (strncmp(cp, "08A0", 4) != 0) {
freeenv(cp);
break;
}
freeenv(cp);
- if (sc->subbus < 0xa) {
+ if (sc->bus.sub < 0xa) {
pci_write_config(dev, PCIR_SUBBUS_1, 0xa, 1);
- sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1);
+ sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1);
}
break;
}
-#endif /* __rtems__ */
+#endif
}
if (pci_msi_device_blacklisted(dev))
@@ -830,13 +1627,23 @@ pcib_attach_common(device_t dev)
pci_read_config(dev, PCIR_PROGIF, 1) == PCIP_BRIDGE_PCI_SUBTRACTIVE)
sc->flags |= PCIB_SUBTRACTIVE;
+#ifdef PCI_HP
+ pcib_probe_hotplug(sc);
+#endif
#ifdef NEW_PCIB
+#ifdef PCI_RES_BUS
+ pcib_setup_secbus(dev, &sc->bus, 1);
+#endif
pcib_probe_windows(sc);
#endif
+#ifdef PCI_HP
+ if (sc->flags & PCIB_HOTPLUG)
+ pcib_setup_hotplug(sc);
+#endif
if (bootverbose) {
device_printf(dev, " domain %d\n", sc->domain);
- device_printf(dev, " secondary bus %d\n", sc->secbus);
- device_printf(dev, " subordinate bus %d\n", sc->subbus);
+ device_printf(dev, " secondary bus %d\n", sc->bus.sec);
+ device_printf(dev, " subordinate bus %d\n", sc->bus.sub);
#ifdef NEW_PCIB
if (pcib_is_window_open(&sc->io))
device_printf(dev, " I/O decode 0x%jx-0x%jx\n",
@@ -877,20 +1684,6 @@ pcib_attach_common(device_t dev)
}
/*
- * XXX If the secondary bus number is zero, we should assign a bus number
- * since the BIOS hasn't, then initialise the bridge. A simple
- * bus_alloc_resource with the a couple of busses seems like the right
- * approach, but we don't know what busses the BIOS might have already
- * assigned to other bridges on this bus that probe later than we do.
- *
- * If the subordinate bus number is less than the secondary bus number,
- * we should pick a better value. One sensible alternative would be to
- * pick 255; the only tradeoff here is that configuration transactions
- * would be more widely routed than absolutely necessary. We could
- * then do a walk of the tree later and fix it.
- */
-
- /*
* Always enable busmastering on bridges so that transactions
* initiated on the secondary bus are passed through to the
* primary bus.
@@ -898,66 +1691,138 @@ pcib_attach_common(device_t dev)
pci_enable_busmaster(dev);
}
+#ifdef PCI_HP
+static int
+pcib_present(struct pcib_softc *sc)
+{
+
+ if (sc->flags & PCIB_HOTPLUG)
+ return (pcib_hotplug_present(sc) != 0);
+ return (1);
+}
+#endif
+
+int
+pcib_attach_child(device_t dev)
+{
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(dev);
+ if (sc->bus.sec == 0) {
+ /* no secondary bus; we should have fixed this */
+ return(0);
+ }
+
+#ifdef PCI_HP
+ if (!pcib_present(sc)) {
+ /* An empty HotPlug slot, so don't add a PCI bus yet. */
+ return (0);
+ }
+#endif
+
+ sc->child = device_add_child(dev, "pci", -1);
+ return (bus_generic_attach(dev));
+}
+
int
pcib_attach(device_t dev)
{
- struct pcib_softc *sc;
- device_t child;
pcib_attach_common(dev);
- sc = device_get_softc(dev);
- if (sc->secbus != 0) {
- child = device_add_child(dev, "pci", sc->secbus);
- if (child != NULL)
- return(bus_generic_attach(dev));
- }
+ return (pcib_attach_child(dev));
+}
- /* no secondary bus; we should have fixed this */
- return(0);
+int
+pcib_detach(device_t dev)
+{
+#if defined(PCI_HP) || defined(NEW_PCIB)
+ struct pcib_softc *sc;
+#endif
+ int error;
+
+#if defined(PCI_HP) || defined(NEW_PCIB)
+ sc = device_get_softc(dev);
+#endif
+ error = bus_generic_detach(dev);
+ if (error)
+ return (error);
+#ifdef PCI_HP
+ if (sc->flags & PCIB_HOTPLUG) {
+ error = pcib_detach_hotplug(sc);
+ if (error)
+ return (error);
+ }
+#endif
+ error = device_delete_children(dev);
+ if (error)
+ return (error);
+#ifdef NEW_PCIB
+ pcib_free_windows(sc);
+#ifdef PCI_RES_BUS
+ pcib_free_secbus(dev, &sc->bus);
+#endif
+#endif
+ return (0);
}
int
pcib_suspend(device_t dev)
{
- device_t pcib;
- int dstate, error;
pcib_cfg_save(device_get_softc(dev));
- error = bus_generic_suspend(dev);
- if (error == 0 && pci_do_power_suspend) {
- dstate = PCI_POWERSTATE_D3;
- pcib = device_get_parent(device_get_parent(dev));
- if (PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
- pci_set_powerstate(dev, dstate);
- }
- return (error);
+ return (bus_generic_suspend(dev));
}
int
pcib_resume(device_t dev)
{
- device_t pcib;
- if (pci_do_power_resume) {
- pcib = device_get_parent(device_get_parent(dev));
- if (PCIB_POWER_FOR_SLEEP(pcib, dev, NULL) == 0)
- pci_set_powerstate(dev, PCI_POWERSTATE_D0);
- }
pcib_cfg_restore(device_get_softc(dev));
return (bus_generic_resume(dev));
}
+void
+pcib_bridge_init(device_t dev)
+{
+ pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1);
+ pci_write_config(dev, PCIR_IOBASEH_1, 0xffff, 2);
+ pci_write_config(dev, PCIR_IOLIMITL_1, 0, 1);
+ pci_write_config(dev, PCIR_IOLIMITH_1, 0, 2);
+ pci_write_config(dev, PCIR_MEMBASE_1, 0xffff, 2);
+ pci_write_config(dev, PCIR_MEMLIMIT_1, 0, 2);
+ pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2);
+ pci_write_config(dev, PCIR_PMBASEH_1, 0xffffffff, 4);
+ pci_write_config(dev, PCIR_PMLIMITL_1, 0, 2);
+ pci_write_config(dev, PCIR_PMLIMITH_1, 0, 4);
+}
+
+int
+pcib_child_present(device_t dev, device_t child)
+{
+#ifdef PCI_HP
+ struct pcib_softc *sc = device_get_softc(dev);
+ int retval;
+
+ retval = bus_child_present(dev);
+ if (retval != 0 && sc->flags & PCIB_HOTPLUG)
+ retval = pcib_hotplug_present(sc);
+ return (retval);
+#else
+ return (bus_child_present(dev));
+#endif
+}
+
int
pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
{
struct pcib_softc *sc = device_get_softc(dev);
-
+
switch (which) {
case PCIB_IVAR_DOMAIN:
*result = sc->domain;
return(0);
case PCIB_IVAR_BUS:
- *result = sc->secbus;
+ *result = sc->bus.sec;
return(0);
}
return(ENOENT);
@@ -966,14 +1831,12 @@ pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
int
pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
{
- struct pcib_softc *sc = device_get_softc(dev);
switch (which) {
case PCIB_IVAR_DOMAIN:
return(EINVAL);
case PCIB_IVAR_BUS:
- sc->secbus = value;
- return(0);
+ return(EINVAL);
}
return(ENOENT);
}
@@ -985,8 +1848,8 @@ pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
*/
static struct resource *
pcib_suballoc_resource(struct pcib_softc *sc, struct pcib_window *w,
- device_t child, int type, int *rid, u_long start, u_long end, u_long count,
- u_int flags)
+ device_t child, int type, int *rid, rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags)
{
struct resource *res;
@@ -1000,7 +1863,7 @@ pcib_suballoc_resource(struct pcib_softc *sc, struct pcib_window *w,
if (bootverbose)
device_printf(sc->dev,
- "allocated %s range (%#lx-%#lx) for rid %x of %s\n",
+ "allocated %s range (%#jx-%#jx) for rid %x of %s\n",
w->name, rman_get_start(res), rman_get_end(res), *rid,
pcib_child_name(child));
rman_set_rid(res, *rid);
@@ -1023,10 +1886,10 @@ pcib_suballoc_resource(struct pcib_softc *sc, struct pcib_window *w,
/* Allocate a fresh resource range for an unconfigured window. */
static int
pcib_alloc_new_window(struct pcib_softc *sc, struct pcib_window *w, int type,
- u_long start, u_long end, u_long count, u_int flags)
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
struct resource *res;
- u_long base, limit, wmask;
+ rman_res_t base, limit, wmask;
int rid;
/*
@@ -1070,17 +1933,17 @@ pcib_alloc_new_window(struct pcib_softc *sc, struct pcib_window *w, int type,
return (0);
}
}
- return (ENOSPC);
+ return (ENOSPC);
}
-
- wmask = (1ul << w->step) - 1;
+
+ wmask = ((rman_res_t)1 << w->step) - 1;
if (RF_ALIGNMENT(flags) < w->step) {
flags &= ~RF_ALIGNMENT_MASK;
flags |= RF_ALIGNMENT_LOG2(w->step);
}
start &= ~wmask;
end |= wmask;
- count = roundup2(count, 1ul << w->step);
+ count = roundup2(count, (rman_res_t)1 << w->step);
rid = w->reg;
res = bus_alloc_resource(sc->dev, type, &rid, start, end, count,
flags & ~RF_ACTIVE);
@@ -1096,7 +1959,7 @@ pcib_alloc_new_window(struct pcib_softc *sc, struct pcib_window *w, int type,
/* Try to expand an existing window to the requested base and limit. */
static int
pcib_expand_window(struct pcib_softc *sc, struct pcib_window *w, int type,
- u_long base, u_long limit)
+ rman_res_t base, rman_res_t limit)
{
struct resource *res;
int error, i, force_64k_base;
@@ -1164,7 +2027,7 @@ pcib_expand_window(struct pcib_softc *sc, struct pcib_window *w, int type,
KASSERT(w->base == rman_get_start(res),
("existing resource mismatch"));
force_64k_base = 0;
- }
+ }
error = bus_adjust_resource(sc->dev, type, res, force_64k_base ?
rman_get_start(res) : base, limit);
@@ -1194,9 +2057,9 @@ pcib_expand_window(struct pcib_softc *sc, struct pcib_window *w, int type,
*/
static int
pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type,
- u_long start, u_long end, u_long count, u_int flags)
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
- u_long align, start_free, end_free, front, back, wmask;
+ rman_res_t align, start_free, end_free, front, back, wmask;
int error;
/*
@@ -1215,7 +2078,7 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type,
end = w->rman.rm_end;
if (start + count - 1 > end || start + count < start)
return (EINVAL);
- wmask = (1ul << w->step) - 1;
+ wmask = ((rman_res_t)1 << w->step) - 1;
/*
* If there is no resource at all, just try to allocate enough
@@ -1227,7 +2090,7 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type,
if (error) {
if (bootverbose)
device_printf(sc->dev,
- "failed to allocate initial %s window (%#lx-%#lx,%#lx)\n",
+ "failed to allocate initial %s window (%#jx-%#jx,%#jx)\n",
w->name, start, end, count);
return (error);
}
@@ -1259,9 +2122,9 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type,
*/
if (bootverbose)
device_printf(sc->dev,
- "attempting to grow %s window for (%#lx-%#lx,%#lx)\n",
+ "attempting to grow %s window for (%#jx-%#jx,%#jx)\n",
w->name, start, end, count);
- align = 1ul << RF_ALIGNMENT(flags);
+ align = (rman_res_t)1 << RF_ALIGNMENT(flags);
if (start < w->base) {
if (rman_first_free_region(&w->rman, &start_free, &end_free) !=
0 || start_free != w->base)
@@ -1283,7 +2146,7 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type,
*/
if (front >= start && front <= end_free) {
if (bootverbose)
- printf("\tfront candidate range: %#lx-%#lx\n",
+ printf("\tfront candidate range: %#jx-%#jx\n",
front, end_free);
front &= ~wmask;
front = w->base - front;
@@ -1311,7 +2174,7 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type,
*/
if (back <= end && start_free <= back) {
if (bootverbose)
- printf("\tback candidate range: %#lx-%#lx\n",
+ printf("\tback candidate range: %#jx-%#jx\n",
start_free, back);
back |= wmask;
back -= w->limit;
@@ -1361,7 +2224,7 @@ updatewin:
*/
struct resource *
pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
struct pcib_softc *sc;
struct resource *r;
@@ -1383,6 +2246,11 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
}
switch (type) {
+#ifdef PCI_RES_BUS
+ case PCI_RES_BUS:
+ return (pcib_alloc_subbus(&sc->bus, child, rid, start, end,
+ count, flags));
+#endif
case SYS_RES_IOPORT:
if (pcib_is_isa_range(sc, start, end, count))
return (NULL);
@@ -1445,7 +2313,7 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
int
pcib_adjust_resource(device_t bus, device_t child, int type, struct resource *r,
- u_long start, u_long end)
+ rman_res_t start, rman_res_t end)
{
struct pcib_softc *sc;
@@ -1479,8 +2347,8 @@ pcib_release_resource(device_t dev, device_t child, int type, int rid,
* is set up to, or capable of handling them.
*/
struct resource *
-pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
struct pcib_softc *sc = device_get_softc(dev);
const char *name, *suffix;
@@ -1530,7 +2398,7 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
#endif
}
if (end < start) {
- device_printf(dev, "ioport: end (%lx) < start (%lx)\n",
+ device_printf(dev, "ioport: end (%jx) < start (%jx)\n",
end, start);
start = 0;
end = 0;
@@ -1538,13 +2406,13 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
}
if (!ok) {
device_printf(dev, "%s%srequested unsupported I/O "
- "range 0x%lx-0x%lx (decoding 0x%x-0x%x)\n",
+ "range 0x%jx-0x%jx (decoding 0x%x-0x%x)\n",
name, suffix, start, end, sc->iobase, sc->iolimit);
return (NULL);
}
if (bootverbose)
device_printf(dev,
- "%s%srequested I/O range 0x%lx-0x%lx: in range\n",
+ "%s%srequested I/O range 0x%jx-0x%jx: in range\n",
name, suffix, start, end);
break;
@@ -1599,7 +2467,7 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
#endif
}
if (end < start) {
- device_printf(dev, "memory: end (%lx) < start (%lx)\n",
+ device_printf(dev, "memory: end (%jx) < start (%jx)\n",
end, start);
start = 0;
end = 0;
@@ -1607,7 +2475,7 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
}
if (!ok && bootverbose)
device_printf(dev,
- "%s%srequested unsupported memory range %#lx-%#lx "
+ "%s%srequested unsupported memory range %#jx-%#jx "
"(decoding %#jx-%#jx, %#jx-%#jx)\n",
name, suffix, start, end,
(uintmax_t)sc->membase, (uintmax_t)sc->memlimit,
@@ -1616,7 +2484,7 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
return (NULL);
if (bootverbose)
device_printf(dev,"%s%srequested memory range "
- "0x%lx-0x%lx: good\n",
+ "0x%jx-0x%jx: good\n",
name, suffix, start, end);
break;
@@ -1632,27 +2500,132 @@ pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
#endif
/*
+ * If ARI is enabled on this downstream port, translate the function number
+ * to the non-ARI slot/function. The downstream port will convert it back in
+ * hardware. If ARI is not enabled slot and func are not modified.
+ */
+static __inline void
+pcib_xlate_ari(device_t pcib, int bus, int *slot, int *func)
+{
+ struct pcib_softc *sc;
+ int ari_func;
+
+ sc = device_get_softc(pcib);
+ ari_func = *func;
+
+ if (sc->flags & PCIB_ENABLE_ARI) {
+ KASSERT(*slot == 0,
+ ("Non-zero slot number with ARI enabled!"));
+ *slot = PCIE_ARI_SLOT(ari_func);
+ *func = PCIE_ARI_FUNC(ari_func);
+ }
+}
+
+
+static void
+pcib_enable_ari(struct pcib_softc *sc, uint32_t pcie_pos)
+{
+ uint32_t ctl2;
+
+ ctl2 = pci_read_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, 4);
+ ctl2 |= PCIEM_CTL2_ARI;
+ pci_write_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, ctl2, 4);
+
+ sc->flags |= PCIB_ENABLE_ARI;
+}
+
+/*
* PCIB interface.
*/
int
pcib_maxslots(device_t dev)
{
- return(PCI_SLOTMAX);
+ return (PCI_SLOTMAX);
+}
+
+static int
+pcib_ari_maxslots(device_t dev)
+{
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ if (sc->flags & PCIB_ENABLE_ARI)
+ return (PCIE_ARI_SLOTMAX);
+ else
+ return (PCI_SLOTMAX);
+}
+
+static int
+pcib_ari_maxfuncs(device_t dev)
+{
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ if (sc->flags & PCIB_ENABLE_ARI)
+ return (PCIE_ARI_FUNCMAX);
+ else
+ return (PCI_FUNCMAX);
+}
+
+static void
+pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot,
+ int *func)
+{
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(pcib);
+
+ *bus = PCI_RID2BUS(rid);
+ if (sc->flags & PCIB_ENABLE_ARI) {
+ *slot = PCIE_ARI_RID2SLOT(rid);
+ *func = PCIE_ARI_RID2FUNC(rid);
+ } else {
+ *slot = PCI_RID2SLOT(rid);
+ *func = PCI_RID2FUNC(rid);
+ }
}
/*
* Since we are a child of a PCI bus, its parent must support the pcib interface.
*/
-uint32_t
+static uint32_t
pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width)
{
- return(PCIB_READ_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, width));
+#ifdef PCI_HP
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(dev);
+ if (!pcib_present(sc)) {
+ switch (width) {
+ case 2:
+ return (0xffff);
+ case 1:
+ return (0xff);
+ default:
+ return (0xffffffff);
+ }
+ }
+#endif
+ pcib_xlate_ari(dev, b, &s, &f);
+ return(PCIB_READ_CONFIG(device_get_parent(device_get_parent(dev)), b, s,
+ f, reg, width));
}
-void
+static void
pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width)
{
- PCIB_WRITE_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, val, width);
+#ifdef PCI_HP
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(dev);
+ if (!pcib_present(sc))
+ return;
+#endif
+ pcib_xlate_ari(dev, b, &s, &f);
+ PCIB_WRITE_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f,
+ reg, val, width);
}
/*
@@ -1665,7 +2638,7 @@ pcib_route_interrupt(device_t pcib, device_t dev, int pin)
int parent_intpin;
int intnum;
- /*
+ /*
*
* The PCI standard defines a swizzle of the child-side device/intpin to
* the parent-side intpin as follows.
@@ -1764,3 +2737,101 @@ pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate)
bus = device_get_parent(pcib);
return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate));
}
+
+static int
+pcib_ari_enabled(device_t pcib)
+{
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(pcib);
+
+ return ((sc->flags & PCIB_ENABLE_ARI) != 0);
+}
+
+static int
+pcib_ari_get_id(device_t pcib, device_t dev, enum pci_id_type type,
+ uintptr_t *id)
+{
+ struct pcib_softc *sc;
+ device_t bus_dev;
+ uint8_t bus, slot, func;
+
+ if (type != PCI_ID_RID) {
+ bus_dev = device_get_parent(pcib);
+ return (PCIB_GET_ID(device_get_parent(bus_dev), dev, type, id));
+ }
+
+ sc = device_get_softc(pcib);
+
+ if (sc->flags & PCIB_ENABLE_ARI) {
+ bus = pci_get_bus(dev);
+ func = pci_get_function(dev);
+
+ *id = (PCI_ARI_RID(bus, func));
+ } else {
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+
+ *id = (PCI_RID(bus, slot, func));
+ }
+
+ return (0);
+}
+
+/*
+ * Check that the downstream port (pcib) and the endpoint device (dev) both
+ * support ARI. If so, enable it and return 0, otherwise return an error.
+ */
+static int
+pcib_try_enable_ari(device_t pcib, device_t dev)
+{
+ struct pcib_softc *sc;
+ int error;
+ uint32_t cap2;
+ int ari_cap_off;
+ uint32_t ari_ver;
+ uint32_t pcie_pos;
+
+ sc = device_get_softc(pcib);
+
+ /*
+ * ARI is controlled in a register in the PCIe capability structure.
+ * If the downstream port does not have the PCIe capability structure
+ * then it does not support ARI.
+ */
+ error = pci_find_cap(pcib, PCIY_EXPRESS, &pcie_pos);
+ if (error != 0)
+ return (ENODEV);
+
+ /* Check that the PCIe port advertises ARI support. */
+ cap2 = pci_read_config(pcib, pcie_pos + PCIER_DEVICE_CAP2, 4);
+ if (!(cap2 & PCIEM_CAP2_ARI))
+ return (ENODEV);
+
+ /*
+ * Check that the endpoint device advertises ARI support via the ARI
+ * extended capability structure.
+ */
+ error = pci_find_extcap(dev, PCIZ_ARI, &ari_cap_off);
+ if (error != 0)
+ return (ENODEV);
+
+ /*
+ * Finally, check that the endpoint device supports the same version
+ * of ARI that we do.
+ */
+ ari_ver = pci_read_config(dev, ari_cap_off, 4);
+ if (PCI_EXTCAP_VER(ari_ver) != PCIB_SUPPORTED_ARI_VER) {
+ if (bootverbose)
+ device_printf(pcib,
+ "Unsupported version of ARI (%d) detected\n",
+ PCI_EXTCAP_VER(ari_ver));
+
+ return (ENXIO);
+ }
+
+ pcib_enable_ari(sc, pcie_pos);
+
+ return (0);
+}
diff --git a/freebsd/sys/dev/pci/pci_private.h b/freebsd/sys/dev/pci/pci_private.h
index 92f36d5c..b0f14818 100644
--- a/freebsd/sys/dev/pci/pci_private.h
+++ b/freebsd/sys/dev/pci/pci_private.h
@@ -40,19 +40,26 @@ DECLARE_CLASS(pci_driver);
struct pci_softc {
bus_dma_tag_t sc_dma_tag;
+#ifdef PCI_RES_BUS
+ struct resource *sc_bus;
+#endif
};
extern int pci_do_power_resume;
extern int pci_do_power_suspend;
-void pci_add_children(device_t dev, int domain, int busno,
- size_t dinfo_size);
+void pci_add_children(device_t dev, int domain, int busno);
void pci_add_child(device_t bus, struct pci_devinfo *dinfo);
+device_t pci_add_iov_child(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did);
void pci_add_resources(device_t bus, device_t dev, int force,
uint32_t prefetchmask);
+void pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov);
+struct pci_devinfo *pci_alloc_devinfo_method(device_t dev);
int pci_attach_common(device_t dev);
-void pci_delete_child(device_t dev, device_t child);
+int pci_rescan_method(device_t dev);
void pci_driver_added(device_t dev, driver_t *driver);
+int pci_ea_is_enabled(device_t dev, int rid);
int pci_print_child(device_t dev, device_t child);
void pci_probe_nomatch(device_t dev, device_t child);
int pci_read_ivar(device_t dev, device_t child, int which,
@@ -79,18 +86,29 @@ int pci_enable_busmaster_method(device_t dev, device_t child);
int pci_disable_busmaster_method(device_t dev, device_t child);
int pci_enable_io_method(device_t dev, device_t child, int space);
int pci_disable_io_method(device_t dev, device_t child, int space);
+int pci_find_cap_method(device_t dev, device_t child,
+ int capability, int *capreg);
int pci_find_extcap_method(device_t dev, device_t child,
int capability, int *capreg);
+int pci_find_htcap_method(device_t dev, device_t child,
+ int capability, int *capreg);
int pci_alloc_msi_method(device_t dev, device_t child, int *count);
int pci_alloc_msix_method(device_t dev, device_t child, int *count);
+void pci_enable_msi_method(device_t dev, device_t child,
+ uint64_t address, uint16_t data);
+void pci_enable_msix_method(device_t dev, device_t child,
+ u_int index, uint64_t address, uint32_t data);
+void pci_disable_msi_method(device_t dev, device_t child);
int pci_remap_msix_method(device_t dev, device_t child,
int count, const u_int *vectors);
int pci_release_msi_method(device_t dev, device_t child);
int pci_msi_count_method(device_t dev, device_t child);
int pci_msix_count_method(device_t dev, device_t child);
+int pci_msix_pba_bar_method(device_t dev, device_t child);
+int pci_msix_table_bar_method(device_t dev, device_t child);
struct resource *pci_alloc_resource(device_t dev, device_t child,
- int type, int *rid, u_long start, u_long end, u_long count,
- u_int flags);
+ int type, int *rid, rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags);
int pci_release_resource(device_t dev, device_t child, int type,
int rid, struct resource *r);
int pci_activate_resource(device_t dev, device_t child, int type,
@@ -100,17 +118,22 @@ int pci_deactivate_resource(device_t dev, device_t child, int type,
void pci_delete_resource(device_t dev, device_t child,
int type, int rid);
struct resource_list *pci_get_resource_list (device_t dev, device_t child);
-struct pci_devinfo *pci_read_device(device_t pcib, int d, int b, int s, int f,
- size_t size);
+struct pci_devinfo *pci_read_device(device_t pcib, device_t bus, int d, int b,
+ int s, int f);
void pci_print_verbose(struct pci_devinfo *dinfo);
int pci_freecfg(struct pci_devinfo *dinfo);
+void pci_child_deleted(device_t dev, device_t child);
+void pci_child_detached(device_t dev, device_t child);
int pci_child_location_str_method(device_t cbdev, device_t child,
char *buf, size_t buflen);
int pci_child_pnpinfo_str_method(device_t cbdev, device_t child,
char *buf, size_t buflen);
int pci_assign_interrupt_method(device_t dev, device_t child);
int pci_resume(device_t dev);
-int pci_suspend(device_t dev);
+int pci_resume_child(device_t dev, device_t child);
+int pci_suspend_child(device_t dev, device_t child);
+bus_dma_tag_t pci_get_dma_tag(device_t bus, device_t dev);
+void pci_child_added_method(device_t dev, device_t child);
/** Restore the config register state. The state must be previously
* saved with pci_cfg_save. However, the pci bus driver takes care of
@@ -124,4 +147,27 @@ void pci_cfg_restore(device_t, struct pci_devinfo *);
*/
void pci_cfg_save(device_t, struct pci_devinfo *, int);
+int pci_mapsize(uint64_t testval);
+void pci_read_bar(device_t dev, int reg, pci_addr_t *mapp,
+ pci_addr_t *testvalp, int *bar64);
+struct pci_map *pci_add_bar(device_t dev, int reg, pci_addr_t value,
+ pci_addr_t size);
+
+struct resource *pci_alloc_multi_resource(device_t dev, device_t child,
+ int type, int *rid, rman_res_t start, rman_res_t end,
+ rman_res_t count, u_long num, u_int flags);
+
+int pci_iov_attach_method(device_t bus, device_t dev,
+ struct nvlist *pf_schema, struct nvlist *vf_schema,
+ const char *name);
+int pci_iov_detach_method(device_t bus, device_t dev);
+
+device_t pci_create_iov_child_method(device_t bus, device_t pf,
+ uint16_t rid, uint16_t vid, uint16_t did);
+
+struct resource *pci_vf_alloc_mem_resource(device_t dev, device_t child,
+ int *rid, rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags);
+int pci_vf_release_mem_resource(device_t dev, device_t child,
+ int rid, struct resource *r);
#endif /* _PCI_PRIVATE_H_ */
diff --git a/freebsd/sys/dev/pci/pci_user.c b/freebsd/sys/dev/pci/pci_user.c
index 01eacf30..5f2b934f 100644
--- a/freebsd/sys/dev/pci/pci_user.c
+++ b/freebsd/sys/dev/pci/pci_user.c
@@ -494,7 +494,7 @@ pci_list_vpd(device_t dev, struct pci_list_vpd_io *lvio)
static int
pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
- device_t pcidev, brdev;
+ device_t pcidev;
void *confdata;
const char *name;
struct devlist *devlist_head;
@@ -710,10 +710,9 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
* Go through the list of devices and copy out the devices
* that match the user's criteria.
*/
- for (cio->num_matches = 0, error = 0, i = 0,
- dinfo = STAILQ_FIRST(devlist_head);
- (dinfo != NULL) && (cio->num_matches < ionum)
- && (error == 0) && (i < pci_numdevs) && (dinfo != NULL);
+ for (cio->num_matches = 0, i = 0,
+ dinfo = STAILQ_FIRST(devlist_head);
+ dinfo != NULL;
dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
if (i < cio->offset)
@@ -835,11 +834,12 @@ pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *t
} else
#endif /* PRE7_COMPAT */
confdata = &dinfo->conf;
- /* Only if we can copy it out do we count it. */
- if (!(error = copyout(confdata,
+ error = copyout(confdata,
(caddr_t)cio->matches +
- confsz * cio->num_matches, confsz)))
- cio->num_matches++;
+ confsz * cio->num_matches, confsz);
+ if (error)
+ break;
+ cio->num_matches++;
}
}
@@ -924,37 +924,25 @@ getconfexit:
io->pi_sel.pc_bus, io->pi_sel.pc_dev,
io->pi_sel.pc_func);
if (pcidev) {
- brdev = device_get_parent(
- device_get_parent(pcidev));
-
#ifdef PRE7_COMPAT
if (cmd == PCIOCWRITE || cmd == PCIOCWRITE_OLD)
#else
if (cmd == PCIOCWRITE)
#endif
- PCIB_WRITE_CONFIG(brdev,
- io->pi_sel.pc_bus,
- io->pi_sel.pc_dev,
- io->pi_sel.pc_func,
+ pci_write_config(pcidev,
io->pi_reg,
io->pi_data,
io->pi_width);
#ifdef PRE7_COMPAT
else if (cmd == PCIOCREAD_OLD)
io_old->pi_data =
- PCIB_READ_CONFIG(brdev,
- io->pi_sel.pc_bus,
- io->pi_sel.pc_dev,
- io->pi_sel.pc_func,
+ pci_read_config(pcidev,
io->pi_reg,
io->pi_width);
#endif
else
io->pi_data =
- PCIB_READ_CONFIG(brdev,
- io->pi_sel.pc_bus,
- io->pi_sel.pc_dev,
- io->pi_sel.pc_func,
+ pci_read_config(pcidev,
io->pi_reg,
io->pi_width);
error = 0;
diff --git a/freebsd/sys/dev/pci/pcib_private.h b/freebsd/sys/dev/pci/pcib_private.h
index e9d4c4bf..65aec8d4 100644
--- a/freebsd/sys/dev/pci/pcib_private.h
+++ b/freebsd/sys/dev/pci/pcib_private.h
@@ -33,6 +33,9 @@
#ifndef __PCIB_PRIVATE_H__
#define __PCIB_PRIVATE_H__
+#include <sys/_callout.h>
+#include <sys/_task.h>
+
#ifdef NEW_PCIB
/*
* Data structure and routines that Host to PCI bridge drivers can use
@@ -49,13 +52,13 @@ int pcib_host_res_init(device_t pcib,
int pcib_host_res_free(device_t pcib,
struct pcib_host_resources *hr);
int pcib_host_res_decodes(struct pcib_host_resources *hr, int type,
- u_long start, u_long end, u_int flags);
+ rman_res_t start, rman_res_t end, u_int flags);
struct resource *pcib_host_res_alloc(struct pcib_host_resources *hr,
- device_t dev, int type, int *rid, u_long start, u_long end,
- u_long count, u_int flags);
+ device_t dev, int type, int *rid, rman_res_t start,
+ rman_res_t end, rman_res_t count, u_int flags);
int pcib_host_res_adjust(struct pcib_host_resources *hr,
- device_t dev, int type, struct resource *r, u_long start,
- u_long end);
+ device_t dev, int type, struct resource *r, rman_res_t start,
+ rman_res_t end);
#endif
/*
@@ -83,21 +86,37 @@ struct pcib_window {
};
#endif
+struct pcib_secbus {
+ u_int sec;
+ u_int sub;
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+ device_t dev;
+ struct rman rman;
+ struct resource *res;
+ const char *name;
+ int sub_reg;
+#endif
+};
+
/*
* Bridge-specific data.
*/
struct pcib_softc
{
device_t dev;
+ device_t child;
uint32_t flags; /* flags */
#define PCIB_SUBTRACTIVE 0x1
#define PCIB_DISABLE_MSI 0x2
#define PCIB_DISABLE_MSIX 0x4
- uint16_t command; /* command register */
+#define PCIB_ENABLE_ARI 0x8
+#define PCIB_HOTPLUG 0x10
+#define PCIB_HOTPLUG_CMD_PENDING 0x20
+#define PCIB_DETACH_PENDING 0x40
+#define PCIB_DETACHING 0x80
u_int domain; /* domain number */
u_int pribus; /* primary bus number */
- u_int secbus; /* secondary bus number */
- u_int subbus; /* subordinate bus number */
+ struct pcib_secbus bus; /* secondary bus numbers */
#ifdef NEW_PCIB
struct pcib_window io; /* I/O port window */
struct pcib_window mem; /* memory window */
@@ -110,38 +129,70 @@ struct pcib_softc
uint32_t iobase; /* base address of port window */
uint32_t iolimit; /* topmost address of port window */
#endif
- uint16_t secstat; /* secondary bus status register */
uint16_t bridgectl; /* bridge control register */
- uint8_t seclat; /* secondary bus latency timer */
+ uint16_t pcie_link_sta;
+ uint16_t pcie_slot_sta;
+ uint32_t pcie_link_cap;
+ uint32_t pcie_slot_cap;
+ struct resource *pcie_irq;
+ void *pcie_ihand;
+ struct task pcie_hp_task;
+ struct callout pcie_ab_timer;
+ struct callout pcie_cc_timer;
+ struct callout pcie_dll_timer;
};
+#define PCIB_SUPPORTED_ARI_VER 1
+
typedef uint32_t pci_read_config_fn(int b, int s, int f, int reg, int width);
-#ifdef NEW_PCIB
-const char *pcib_child_name(device_t child);
-#endif
int host_pcib_get_busno(pci_read_config_fn read_config, int bus,
int slot, int func, uint8_t *busnum);
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+struct resource *pci_domain_alloc_bus(int domain, device_t dev, int *rid,
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags);
+int pci_domain_adjust_bus(int domain, device_t dev,
+ struct resource *r, rman_res_t start, rman_res_t end);
+int pci_domain_release_bus(int domain, device_t dev, int rid,
+ struct resource *r);
+struct resource *pcib_alloc_subbus(struct pcib_secbus *bus, device_t child,
+ int *rid, rman_res_t start, rman_res_t end, rman_res_t count,
+ u_int flags);
+void pcib_free_secbus(device_t dev, struct pcib_secbus *bus);
+void pcib_setup_secbus(device_t dev, struct pcib_secbus *bus,
+ int min_count);
+#endif
int pcib_attach(device_t dev);
+int pcib_attach_child(device_t dev);
void pcib_attach_common(device_t dev);
+void pcib_bridge_init(device_t dev);
+#ifdef NEW_PCIB
+const char *pcib_child_name(device_t child);
+#endif
+int pcib_child_present(device_t dev, device_t child);
+int pcib_detach(device_t dev);
int pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result);
int pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value);
struct resource *pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags);
+ rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags);
#ifdef NEW_PCIB
int pcib_adjust_resource(device_t bus, device_t child, int type,
- struct resource *r, u_long start, u_long end);
+ struct resource *r, rman_res_t start, rman_res_t end);
int pcib_release_resource(device_t dev, device_t child, int type, int rid,
struct resource *r);
#endif
int pcib_maxslots(device_t dev);
-uint32_t pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width);
-void pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width);
+int pcib_maxfuncs(device_t dev);
int pcib_route_interrupt(device_t pcib, device_t dev, int pin);
int pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs);
int pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs);
int pcib_alloc_msix(device_t pcib, device_t dev, int *irq);
int pcib_release_msix(device_t pcib, device_t dev, int irq);
int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data);
+int pcib_get_id(device_t pcib, device_t dev, enum pci_id_type type,
+ uintptr_t *id);
+void pcib_decode_rid(device_t pcib, uint16_t rid, int *bus,
+ int *slot, int *func);
#endif
diff --git a/freebsd/sys/dev/pci/pcireg.h b/freebsd/sys/dev/pci/pcireg.h
index f2d1ccbe..d463b7a5 100644
--- a/freebsd/sys/dev/pci/pcireg.h
+++ b/freebsd/sys/dev/pci/pcireg.h
@@ -48,6 +48,34 @@
#define PCIE_REGMAX 4095 /* highest supported config register addr. */
#define PCI_MAXHDRTYPE 2
+#define PCIE_ARI_SLOTMAX 0
+#define PCIE_ARI_FUNCMAX 255
+
+#define PCI_RID_DOMAIN_SHIFT 16
+#define PCI_RID_BUS_SHIFT 8
+#define PCI_RID_SLOT_SHIFT 3
+#define PCI_RID_FUNC_SHIFT 0
+
+#define PCI_RID(bus, slot, func) \
+ ((((bus) & PCI_BUSMAX) << PCI_RID_BUS_SHIFT) | \
+ (((slot) & PCI_SLOTMAX) << PCI_RID_SLOT_SHIFT) | \
+ (((func) & PCI_FUNCMAX) << PCI_RID_FUNC_SHIFT))
+
+#define PCI_ARI_RID(bus, func) \
+ ((((bus) & PCI_BUSMAX) << PCI_RID_BUS_SHIFT) | \
+ (((func) & PCIE_ARI_FUNCMAX) << PCI_RID_FUNC_SHIFT))
+
+#define PCI_RID2BUS(rid) (((rid) >> PCI_RID_BUS_SHIFT) & PCI_BUSMAX)
+#define PCI_RID2SLOT(rid) (((rid) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
+#define PCI_RID2FUNC(rid) (((rid) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
+
+#define PCIE_ARI_RID2SLOT(rid) (0)
+#define PCIE_ARI_RID2FUNC(rid) \
+ (((rid) >> PCI_RID_FUNC_SHIFT) & PCIE_ARI_FUNCMAX)
+
+#define PCIE_ARI_SLOT(func) (((func) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
+#define PCIE_ARI_FUNC(func) (((func) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
+
/* PCI config header registers for all devices */
#define PCIR_DEVVENDOR 0x00
@@ -118,6 +146,7 @@
#define PCIY_MSIX 0x11 /* MSI-X */
#define PCIY_SATA 0x12 /* SATA */
#define PCIY_PCIAF 0x13 /* PCI Advanced Features */
+#define PCIY_EA 0x14 /* PCI Extended Allocation */
/* Extended Capability Register Fields */
@@ -132,25 +161,35 @@
/* Extended Capability Identification Numbers */
#define PCIZ_AER 0x0001 /* Advanced Error Reporting */
-#define PCIZ_VC 0x0002 /* Virtual Channel */
+#define PCIZ_VC 0x0002 /* Virtual Channel if MFVC Ext Cap not set */
#define PCIZ_SERNUM 0x0003 /* Device Serial Number */
#define PCIZ_PWRBDGT 0x0004 /* Power Budgeting */
#define PCIZ_RCLINK_DCL 0x0005 /* Root Complex Link Declaration */
#define PCIZ_RCLINK_CTL 0x0006 /* Root Complex Internal Link Control */
#define PCIZ_RCEC_ASSOC 0x0007 /* Root Complex Event Collector Association */
#define PCIZ_MFVC 0x0008 /* Multi-Function Virtual Channel */
+#define PCIZ_VC2 0x0009 /* Virtual Channel if MFVC Ext Cap set */
#define PCIZ_RCRB 0x000a /* RCRB Header */
#define PCIZ_VENDOR 0x000b /* Vendor Unique */
+#define PCIZ_CAC 0x000c /* Configuration Access Correction -- obsolete */
#define PCIZ_ACS 0x000d /* Access Control Services */
#define PCIZ_ARI 0x000e /* Alternative Routing-ID Interpretation */
#define PCIZ_ATS 0x000f /* Address Translation Services */
#define PCIZ_SRIOV 0x0010 /* Single Root IO Virtualization */
+#define PCIZ_MRIOV 0x0011 /* Multiple Root IO Virtualization */
#define PCIZ_MULTICAST 0x0012 /* Multicast */
+#define PCIZ_PAGE_REQ 0x0013 /* Page Request */
+#define PCIZ_AMD 0x0014 /* Reserved for AMD */
#define PCIZ_RESIZE_BAR 0x0015 /* Resizable BAR */
#define PCIZ_DPA 0x0016 /* Dynamic Power Allocation */
#define PCIZ_TPH_REQ 0x0017 /* TPH Requester */
#define PCIZ_LTR 0x0018 /* Latency Tolerance Reporting */
#define PCIZ_SEC_PCIE 0x0019 /* Secondary PCI Express */
+#define PCIZ_PMUX 0x001a /* Protocol Multiplexing */
+#define PCIZ_PASID 0x001b /* Process Address Space ID */
+#define PCIZ_LN_REQ 0x001c /* LN Requester */
+#define PCIZ_DPC 0x001d /* Downstream Porto Containment */
+#define PCIZ_L1PM 0x001e /* L1 PM Substates */
/* config registers for header type 0 devices */
@@ -183,7 +222,7 @@
#define PCIM_CIS_ASI_ROM 7
#define PCIM_CIS_ADDR_MASK 0x0ffffff8
#define PCIM_CIS_ROM_MASK 0xf0000000
-#define PCIM_CIS_CONFIG_MASK 0xff
+#define PCIM_CIS_CONFIG_MASK 0xff
#define PCIR_SUBVEND_0 0x2c
#define PCIR_SUBDEV_0 0x2e
#define PCIR_BIOS 0x30
@@ -227,6 +266,11 @@
#define PCIR_BIOS_1 0x38
#define PCIR_BRIDGECTL_1 0x3e
+#define PCI_PPBMEMBASE(h,l) ((((uint64_t)(h) << 32) + ((l)<<16)) & ~0xfffff)
+#define PCI_PPBMEMLIMIT(h,l) ((((uint64_t)(h) << 32) + ((l)<<16)) | 0xfffff)
+#define PCI_PPBIOBASE(h,l) ((((h)<<16) + ((l)<<8)) & ~0xfff)
+#define PCI_PPBIOLIMIT(h,l) ((((h)<<16) + ((l)<<8)) | 0xfff)
+
/* config registers for header type 2 (CardBus) devices */
#define PCIR_MAX_BAR_2 0
@@ -246,6 +290,9 @@
#define PCIR_IOLIMIT0_2 0x30
#define PCIR_IOBASE1_2 0x34
#define PCIR_IOLIMIT1_2 0x38
+#define PCIM_CBBIO_16 0x0
+#define PCIM_CBBIO_32 0x1
+#define PCIM_CBBIO_MASK 0x3
#define PCIR_BRIDGECTL_2 0x3e
@@ -254,6 +301,11 @@
#define PCIR_PCCARDIF_2 0x44
+#define PCI_CBBMEMBASE(l) ((l) & ~0xfffff)
+#define PCI_CBBMEMLIMIT(l) ((l) | 0xfffff)
+#define PCI_CBBIOBASE(l) ((l) & ~0x3)
+#define PCI_CBBIOLIMIT(l) ((l) | 0x3)
+
/* PCI device class, subclass and programming interface definitions */
#define PCIC_OLD 0x00
@@ -351,6 +403,7 @@
#define PCIS_BASEPERIPH_RTC 0x03
#define PCIS_BASEPERIPH_PCIHOT 0x04
#define PCIS_BASEPERIPH_SDHC 0x05
+#define PCIS_BASEPERIPH_IOMMU 0x06
#define PCIS_BASEPERIPH_OTHER 0x80
#define PCIC_INPUTDEV 0x09
@@ -440,6 +493,17 @@
#define PCIB_BCR_DISCARD_TIMER_STATUS 0x0400
#define PCIB_BCR_DISCARD_TIMER_SERREN 0x0800
+#define CBB_BCR_PERR_ENABLE 0x0001
+#define CBB_BCR_SERR_ENABLE 0x0002
+#define CBB_BCR_ISA_ENABLE 0x0004
+#define CBB_BCR_VGA_ENABLE 0x0008
+#define CBB_BCR_MASTER_ABORT_MODE 0x0020
+#define CBB_BCR_CARDBUS_RESET 0x0040
+#define CBB_BCR_IREQ_INT_ENABLE 0x0080
+#define CBB_BCR_PREFETCH_0_ENABLE 0x0100
+#define CBB_BCR_PREFETCH_1_ENABLE 0x0200
+#define CBB_BCR_WRITE_POSTING_ENABLE 0x0400
+
/* PCI power manangement */
#define PCIR_POWER_CAP 0x2
#define PCIM_PCAP_SPEC 0x0007
@@ -523,6 +587,52 @@
#define PCIR_MSI_MASK 0x10
#define PCIR_MSI_PENDING 0x14
+/* PCI Enhanced Allocation registers */
+#define PCIR_EA_NUM_ENT 2 /* Number of Capability Entries */
+#define PCIM_EA_NUM_ENT_MASK 0x3f /* Num Entries Mask */
+#define PCIR_EA_FIRST_ENT 4 /* First EA Entry in List */
+#define PCIR_EA_FIRST_ENT_BRIDGE 8 /* First EA Entry for Bridges */
+#define PCIM_EA_ES 0x00000007 /* Entry Size */
+#define PCIM_EA_BEI 0x000000f0 /* BAR Equivalent Indicator */
+#define PCIM_EA_BEI_OFFSET 4
+/* 0-5 map to BARs 0-5 respectively */
+#define PCIM_EA_BEI_BAR_0 0
+#define PCIM_EA_BEI_BAR_5 5
+#define PCIM_EA_BEI_BAR(x) (((x) >> PCIM_EA_BEI_OFFSET) & 0xf)
+#define PCIM_EA_BEI_BRIDGE 0x6 /* Resource behind bridge */
+#define PCIM_EA_BEI_ENI 0x7 /* Equivalent Not Indicated */
+#define PCIM_EA_BEI_ROM 0x8 /* Expansion ROM */
+/* 9-14 map to VF BARs 0-5 respectively */
+#define PCIM_EA_BEI_VF_BAR_0 9
+#define PCIM_EA_BEI_VF_BAR_5 14
+#define PCIM_EA_BEI_RESERVED 0xf /* Reserved - Treat like ENI */
+#define PCIM_EA_PP 0x0000ff00 /* Primary Properties */
+#define PCIM_EA_PP_OFFSET 8
+#define PCIM_EA_SP_OFFSET 16
+#define PCIM_EA_SP 0x00ff0000 /* Secondary Properties */
+#define PCIM_EA_P_MEM 0x00 /* Non-Prefetch Memory */
+#define PCIM_EA_P_MEM_PREFETCH 0x01 /* Prefetchable Memory */
+#define PCIM_EA_P_IO 0x02 /* I/O Space */
+#define PCIM_EA_P_VF_MEM_PREFETCH 0x03 /* VF Prefetchable Memory */
+#define PCIM_EA_P_VF_MEM 0x04 /* VF Non-Prefetch Memory */
+#define PCIM_EA_P_BRIDGE_MEM 0x05 /* Bridge Non-Prefetch Memory */
+#define PCIM_EA_P_BRIDGE_MEM_PREFETCH 0x06 /* Bridge Prefetchable Memory */
+#define PCIM_EA_P_BRIDGE_IO 0x07 /* Bridge I/O Space */
+/* 0x08-0xfc reserved */
+#define PCIM_EA_P_MEM_RESERVED 0xfd /* Reserved Memory */
+#define PCIM_EA_P_IO_RESERVED 0xfe /* Reserved I/O Space */
+#define PCIM_EA_P_UNAVAILABLE 0xff /* Entry Unavailable */
+#define PCIM_EA_WRITABLE 0x40000000 /* Writable: 1 = RW, 0 = HwInit */
+#define PCIM_EA_ENABLE 0x80000000 /* Enable for this entry */
+#define PCIM_EA_BASE 4 /* Base Address Offset */
+#define PCIM_EA_MAX_OFFSET 8 /* MaxOffset (resource length) */
+/* bit 0 is reserved */
+#define PCIM_EA_IS_64 0x00000002 /* 64-bit field flag */
+#define PCIM_EA_FIELD_MASK 0xfffffffc /* For Base & Max Offset */
+/* Bridge config register */
+#define PCIM_EA_SEC_NR(reg) ((reg) & 0xff)
+#define PCIM_EA_SUB_NR(reg) (((reg) >> 8) & 0xff)
+
/* PCI-X definitions */
/* For header type 0 devices */
@@ -627,6 +737,9 @@
#define PCIR_VENDOR_LENGTH 0x2
#define PCIR_VENDOR_DATA 0x3
+/* PCI Device capability definitions */
+#define PCIR_DEVICE_LENGTH 0x2
+
/* PCI EHCI Debug Port definitions */
#define PCIR_DEBUG_PORT 0x2
#define PCIM_DEBUG_PORT_OFFSET 0x1FFF
@@ -737,8 +850,16 @@
#define PCIEM_SLOT_CTL_CCIE 0x0010
#define PCIEM_SLOT_CTL_HPIE 0x0020
#define PCIEM_SLOT_CTL_AIC 0x00c0
+#define PCIEM_SLOT_CTL_AI_ON 0x0040
+#define PCIEM_SLOT_CTL_AI_BLINK 0x0080
+#define PCIEM_SLOT_CTL_AI_OFF 0x00c0
#define PCIEM_SLOT_CTL_PIC 0x0300
+#define PCIEM_SLOT_CTL_PI_ON 0x0100
+#define PCIEM_SLOT_CTL_PI_BLINK 0x0200
+#define PCIEM_SLOT_CTL_PI_OFF 0x0300
#define PCIEM_SLOT_CTL_PCC 0x0400
+#define PCIEM_SLOT_CTL_PC_ON 0x0000
+#define PCIEM_SLOT_CTL_PC_OFF 0x0400
#define PCIEM_SLOT_CTL_EIC 0x0800
#define PCIEM_SLOT_CTL_DLLSCE 0x1000
#define PCIER_SLOT_STA 0x1a
@@ -764,6 +885,7 @@
#define PCIEM_ROOT_STA_PME_STATUS 0x00010000
#define PCIEM_ROOT_STA_PME_PEND 0x00020000
#define PCIER_DEVICE_CAP2 0x24
+#define PCIEM_CAP2_ARI 0x20
#define PCIER_DEVICE_CTL2 0x28
#define PCIEM_CTL2_COMP_TIMEOUT_VAL 0x000f
#define PCIEM_CTL2_COMP_TIMEOUT_DIS 0x0010
@@ -787,117 +909,6 @@
#define PCIER_SLOT_CTL2 0x38
#define PCIER_SLOT_STA2 0x3a
-/* Old compatibility definitions for PCI Express registers */
-#define PCIR_EXPRESS_FLAGS PCIER_FLAGS
-#define PCIM_EXP_FLAGS_VERSION PCIEM_FLAGS_VERSION
-#define PCIM_EXP_FLAGS_TYPE PCIEM_FLAGS_TYPE
-#define PCIM_EXP_TYPE_ENDPOINT PCIEM_TYPE_ENDPOINT
-#define PCIM_EXP_TYPE_LEGACY_ENDPOINT PCIEM_TYPE_LEGACY_ENDPOINT
-#define PCIM_EXP_TYPE_ROOT_PORT PCIEM_TYPE_ROOT_PORT
-#define PCIM_EXP_TYPE_UPSTREAM_PORT PCIEM_TYPE_UPSTREAM_PORT
-#define PCIM_EXP_TYPE_DOWNSTREAM_PORT PCIEM_TYPE_DOWNSTREAM_PORT
-#define PCIM_EXP_TYPE_PCI_BRIDGE PCIEM_TYPE_PCI_BRIDGE
-#define PCIM_EXP_TYPE_PCIE_BRIDGE PCIEM_TYPE_PCIE_BRIDGE
-#define PCIM_EXP_TYPE_ROOT_INT_EP PCIEM_TYPE_ROOT_INT_EP
-#define PCIM_EXP_TYPE_ROOT_EC PCIEM_TYPE_ROOT_EC
-#define PCIM_EXP_FLAGS_SLOT PCIEM_FLAGS_SLOT
-#define PCIM_EXP_FLAGS_IRQ PCIEM_FLAGS_IRQ
-#define PCIR_EXPRESS_DEVICE_CAP PCIER_DEVICE_CAP
-#define PCIM_EXP_CAP_MAX_PAYLOAD PCIEM_CAP_MAX_PAYLOAD
-#define PCIM_EXP_CAP_PHANTHOM_FUNCS PCIEM_CAP_PHANTHOM_FUNCS
-#define PCIM_EXP_CAP_EXT_TAG_FIELD PCIEM_CAP_EXT_TAG_FIELD
-#define PCIM_EXP_CAP_L0S_LATENCY PCIEM_CAP_L0S_LATENCY
-#define PCIM_EXP_CAP_L1_LATENCY PCIEM_CAP_L1_LATENCY
-#define PCIM_EXP_CAP_ROLE_ERR_RPT PCIEM_CAP_ROLE_ERR_RPT
-#define PCIM_EXP_CAP_SLOT_PWR_LIM_VAL PCIEM_CAP_SLOT_PWR_LIM_VAL
-#define PCIM_EXP_CAP_SLOT_PWR_LIM_SCALE PCIEM_CAP_SLOT_PWR_LIM_SCALE
-#define PCIM_EXP_CAP_FLR PCIEM_CAP_FLR
-#define PCIR_EXPRESS_DEVICE_CTL PCIER_DEVICE_CTL
-#define PCIM_EXP_CTL_COR_ENABLE PCIEM_CTL_COR_ENABLE
-#define PCIM_EXP_CTL_NFER_ENABLE PCIEM_CTL_NFER_ENABLE
-#define PCIM_EXP_CTL_FER_ENABLE PCIEM_CTL_FER_ENABLE
-#define PCIM_EXP_CTL_URR_ENABLE PCIEM_CTL_URR_ENABLE
-#define PCIM_EXP_CTL_RELAXED_ORD_ENABLE PCIEM_CTL_RELAXED_ORD_ENABLE
-#define PCIM_EXP_CTL_MAX_PAYLOAD PCIEM_CTL_MAX_PAYLOAD
-#define PCIM_EXP_CTL_EXT_TAG_FIELD PCIEM_CTL_EXT_TAG_FIELD
-#define PCIM_EXP_CTL_PHANTHOM_FUNCS PCIEM_CTL_PHANTHOM_FUNCS
-#define PCIM_EXP_CTL_AUX_POWER_PM PCIEM_CTL_AUX_POWER_PM
-#define PCIM_EXP_CTL_NOSNOOP_ENABLE PCIEM_CTL_NOSNOOP_ENABLE
-#define PCIM_EXP_CTL_MAX_READ_REQUEST PCIEM_CTL_MAX_READ_REQUEST
-#define PCIM_EXP_CTL_BRDG_CFG_RETRY PCIEM_CTL_BRDG_CFG_RETRY
-#define PCIM_EXP_CTL_INITIATE_FLR PCIEM_CTL_INITIATE_FLR
-#define PCIR_EXPRESS_DEVICE_STA PCIER_DEVICE_STA
-#define PCIM_EXP_STA_CORRECTABLE_ERROR PCIEM_STA_CORRECTABLE_ERROR
-#define PCIM_EXP_STA_NON_FATAL_ERROR PCIEM_STA_NON_FATAL_ERROR
-#define PCIM_EXP_STA_FATAL_ERROR PCIEM_STA_FATAL_ERROR
-#define PCIM_EXP_STA_UNSUPPORTED_REQ PCIEM_STA_UNSUPPORTED_REQ
-#define PCIM_EXP_STA_AUX_POWER PCIEM_STA_AUX_POWER
-#define PCIM_EXP_STA_TRANSACTION_PND PCIEM_STA_TRANSACTION_PND
-#define PCIR_EXPRESS_LINK_CAP PCIER_LINK_CAP
-#define PCIM_LINK_CAP_MAX_SPEED PCIEM_LINK_CAP_MAX_SPEED
-#define PCIM_LINK_CAP_MAX_WIDTH PCIEM_LINK_CAP_MAX_WIDTH
-#define PCIM_LINK_CAP_ASPM PCIEM_LINK_CAP_ASPM
-#define PCIM_LINK_CAP_L0S_EXIT PCIEM_LINK_CAP_L0S_EXIT
-#define PCIM_LINK_CAP_L1_EXIT PCIEM_LINK_CAP_L1_EXIT
-#define PCIM_LINK_CAP_CLOCK_PM PCIEM_LINK_CAP_CLOCK_PM
-#define PCIM_LINK_CAP_SURPRISE_DOWN PCIEM_LINK_CAP_SURPRISE_DOWN
-#define PCIM_LINK_CAP_DL_ACTIVE PCIEM_LINK_CAP_DL_ACTIVE
-#define PCIM_LINK_CAP_LINK_BW_NOTIFY PCIEM_LINK_CAP_LINK_BW_NOTIFY
-#define PCIM_LINK_CAP_ASPM_COMPLIANCE PCIEM_LINK_CAP_ASPM_COMPLIANCE
-#define PCIM_LINK_CAP_PORT PCIEM_LINK_CAP_PORT
-#define PCIR_EXPRESS_LINK_CTL PCIER_LINK_CTL
-#define PCIM_EXP_LINK_CTL_ASPMC_DIS PCIEM_LINK_CTL_ASPMC_DIS
-#define PCIM_EXP_LINK_CTL_ASPMC_L0S PCIEM_LINK_CTL_ASPMC_L0S
-#define PCIM_EXP_LINK_CTL_ASPMC_L1 PCIEM_LINK_CTL_ASPMC_L1
-#define PCIM_EXP_LINK_CTL_ASPMC PCIEM_LINK_CTL_ASPMC
-#define PCIM_EXP_LINK_CTL_RCB PCIEM_LINK_CTL_RCB
-#define PCIM_EXP_LINK_CTL_LINK_DIS PCIEM_LINK_CTL_LINK_DIS
-#define PCIM_EXP_LINK_CTL_RETRAIN_LINK PCIEM_LINK_CTL_RETRAIN_LINK
-#define PCIM_EXP_LINK_CTL_COMMON_CLOCK PCIEM_LINK_CTL_COMMON_CLOCK
-#define PCIM_EXP_LINK_CTL_EXTENDED_SYNC PCIEM_LINK_CTL_EXTENDED_SYNC
-#define PCIM_EXP_LINK_CTL_ECPM PCIEM_LINK_CTL_ECPM
-#define PCIM_EXP_LINK_CTL_HAWD PCIEM_LINK_CTL_HAWD
-#define PCIM_EXP_LINK_CTL_LBMIE PCIEM_LINK_CTL_LBMIE
-#define PCIM_EXP_LINK_CTL_LABIE PCIEM_LINK_CTL_LABIE
-#define PCIR_EXPRESS_LINK_STA PCIER_LINK_STA
-#define PCIM_LINK_STA_SPEED PCIEM_LINK_STA_SPEED
-#define PCIM_LINK_STA_WIDTH PCIEM_LINK_STA_WIDTH
-#define PCIM_LINK_STA_TRAINING_ERROR PCIEM_LINK_STA_TRAINING_ERROR
-#define PCIM_LINK_STA_TRAINING PCIEM_LINK_STA_TRAINING
-#define PCIM_LINK_STA_SLOT_CLOCK PCIEM_LINK_STA_SLOT_CLOCK
-#define PCIM_LINK_STA_DL_ACTIVE PCIEM_LINK_STA_DL_ACTIVE
-#define PCIM_LINK_STA_LINK_BW_MGMT PCIEM_LINK_STA_LINK_BW_MGMT
-#define PCIM_LINK_STA_LINK_AUTO_BW PCIEM_LINK_STA_LINK_AUTO_BW
-#define PCIR_EXPRESS_SLOT_CAP PCIER_SLOT_CAP
-#define PCIR_EXPRESS_SLOT_CTL PCIER_SLOT_CTL
-#define PCIR_EXPRESS_SLOT_STA PCIER_SLOT_STA
-#define PCIR_EXPRESS_ROOT_CTL PCIER_ROOT_CTL
-#define PCIR_EXPRESS_ROOT_CAP PCIER_ROOT_CAP
-#define PCIR_EXPRESS_ROOT_STA PCIER_ROOT_STA
-#define PCIR_EXPRESS_DEVICE_CAP2 PCIER_DEVICE_CAP2
-#define PCIR_EXPRESS_DEVICE_CTL2 PCIER_DEVICE_CTL2
-#define PCIM_EXP_CTL2_COMP_TIMEOUT_VAL PCIEM_CTL2_COMP_TIMEOUT_VAL
-#define PCIM_EXP_CTL2_COMP_TIMEOUT_DIS PCIEM_CTL2_COMP_TIMEOUT_DIS
-#define PCIM_EXP_CTL2_ARI PCIEM_CTL2_ARI
-#define PCIM_EXP_CTL2_ATOMIC_REQ_ENABLE PCIEM_CTL2_ATOMIC_REQ_ENABLE
-#define PCIM_EXP_CTL2_ATOMIC_EGR_BLOCK PCIEM_CTL2_ATOMIC_EGR_BLOCK
-#define PCIM_EXP_CTL2_ID_ORDERED_REQ_EN PCIEM_CTL2_ID_ORDERED_REQ_EN
-#define PCIM_EXP_CTL2_ID_ORDERED_CMP_EN PCIEM_CTL2_ID_ORDERED_CMP_EN
-#define PCIM_EXP_CTL2_LTR_ENABLE PCIEM_CTL2_LTR_ENABLE
-#define PCIM_EXP_CTL2_OBFF PCIEM_CTL2_OBFF
-#define PCIM_EXP_OBFF_DISABLE PCIEM_OBFF_DISABLE
-#define PCIM_EXP_OBFF_MSGA_ENABLE PCIEM_OBFF_MSGA_ENABLE
-#define PCIM_EXP_OBFF_MSGB_ENABLE PCIEM_OBFF_MSGB_ENABLE
-#define PCIM_EXP_OBFF_WAKE_ENABLE PCIEM_OBFF_WAKE_ENABLE
-#define PCIM_EXP_CTL2_END2END_TLP PCIEM_CTL2_END2END_TLP
-#define PCIR_EXPRESS_DEVICE_STA2 PCIER_DEVICE_STA2
-#define PCIR_EXPRESS_LINK_CAP2 PCIER_LINK_CAP2
-#define PCIR_EXPRESS_LINK_CTL2 PCIER_LINK_CTL2
-#define PCIR_EXPRESS_LINK_STA2 PCIER_LINK_STA2
-#define PCIR_EXPRESS_SLOT_CAP2 PCIER_SLOT_CAP2
-#define PCIR_EXPRESS_SLOT_CTL2 PCIER_SLOT_CTL2
-#define PCIR_EXPRESS_SLOT_STA2 PCIER_SLOT_STA2
-
/* MSI-X definitions */
#define PCIR_MSIX_CTRL 0x2
#define PCIM_MSIXCTRL_MSIX_ENABLE 0x8000
@@ -995,3 +1006,22 @@
/* Serial Number definitions */
#define PCIR_SERIAL_LOW 0x04
#define PCIR_SERIAL_HIGH 0x08
+
+/* SR-IOV definitions */
+#define PCIR_SRIOV_CTL 0x08
+#define PCIM_SRIOV_VF_EN 0x01
+#define PCIM_SRIOV_VF_MSE 0x08 /* Memory space enable. */
+#define PCIM_SRIOV_ARI_EN 0x10
+#define PCIR_SRIOV_TOTAL_VFS 0x0E
+#define PCIR_SRIOV_NUM_VFS 0x10
+#define PCIR_SRIOV_VF_OFF 0x14
+#define PCIR_SRIOV_VF_STRIDE 0x16
+#define PCIR_SRIOV_VF_DID 0x1A
+#define PCIR_SRIOV_PAGE_CAP 0x1C
+#define PCIR_SRIOV_PAGE_SIZE 0x20
+
+#define PCI_SRIOV_BASE_PAGE_SHIFT 12
+
+#define PCIR_SRIOV_BARS 0x24
+#define PCIR_SRIOV_BAR(x) (PCIR_SRIOV_BARS + (x) * 4)
+
diff --git a/freebsd/sys/dev/pci/pcivar.h b/freebsd/sys/dev/pci/pcivar.h
index e28eb936..fc3fa5fb 100644
--- a/freebsd/sys/dev/pci/pcivar.h
+++ b/freebsd/sys/dev/pci/pcivar.h
@@ -39,6 +39,15 @@
typedef uint64_t pci_addr_t;
+/* Config registers for PCI-PCI and PCI-Cardbus bridges. */
+struct pcicfg_bridge {
+ uint8_t br_seclat;
+ uint8_t br_subbus;
+ uint8_t br_secbus;
+ uint8_t br_pribus;
+ uint16_t br_control;
+};
+
/* Interesting values for PCI power management */
struct pcicfg_pp {
uint16_t pp_cap; /* PCI power management capabilities */
@@ -50,7 +59,7 @@ struct pcicfg_pp {
struct pci_map {
pci_addr_t pm_value; /* Raw BAR value */
pci_addr_t pm_size;
- uint8_t pm_reg;
+ uint16_t pm_reg;
STAILQ_ENTRY(pci_map) pm_link;
};
@@ -124,9 +133,48 @@ struct pcicfg_ht {
uint64_t ht_msiaddr; /* MSI mapping base address */
};
+/* Interesting values for PCI-express */
+struct pcicfg_pcie {
+ uint8_t pcie_location; /* Offset of PCI-e capability registers. */
+ uint8_t pcie_type; /* Device type. */
+ uint16_t pcie_flags; /* Device capabilities register. */
+ uint16_t pcie_device_ctl; /* Device control register. */
+ uint16_t pcie_link_ctl; /* Link control register. */
+ uint16_t pcie_slot_ctl; /* Slot control register. */
+ uint16_t pcie_root_ctl; /* Root control register. */
+ uint16_t pcie_device_ctl2; /* Second device control register. */
+ uint16_t pcie_link_ctl2; /* Second link control register. */
+ uint16_t pcie_slot_ctl2; /* Second slot control register. */
+};
+
+struct pcicfg_pcix {
+ uint16_t pcix_command;
+ uint8_t pcix_location; /* Offset of PCI-X capability registers. */
+};
+
+struct pcicfg_vf {
+ int index;
+};
+
+struct pci_ea_entry {
+ int eae_bei;
+ uint32_t eae_flags;
+ uint64_t eae_base;
+ uint64_t eae_max_offset;
+ uint32_t eae_cfg_offset;
+ STAILQ_ENTRY(pci_ea_entry) eae_link;
+};
+
+struct pcicfg_ea {
+ int ea_location; /* Structure offset in Configuration Header */
+ STAILQ_HEAD(, pci_ea_entry) ea_entries; /* EA entries */
+};
+
+#define PCICFG_VF 0x0001 /* Device is an SR-IOV Virtual Function */
+
/* config header information common to all header types */
typedef struct pcicfg {
- struct device *dev; /* device which owns this */
+ device_t dev; /* device which owns this */
STAILQ_HEAD(, pci_map) maps; /* BARs */
@@ -160,20 +208,23 @@ typedef struct pcicfg {
uint8_t slot; /* config space slot address */
uint8_t func; /* config space function number */
+ uint32_t flags; /* flags defined above */
+
+ struct pcicfg_bridge bridge; /* Bridges */
struct pcicfg_pp pp; /* Power management */
struct pcicfg_vpd vpd; /* Vital product data */
struct pcicfg_msi msi; /* PCI MSI */
struct pcicfg_msix msix; /* PCI MSI-X */
struct pcicfg_ht ht; /* HyperTransport */
+ struct pcicfg_pcie pcie; /* PCI Express */
+ struct pcicfg_pcix pcix; /* PCI-X */
+ struct pcicfg_iov *iov; /* SR-IOV */
+ struct pcicfg_vf vf; /* SR-IOV Virtual Function */
+ struct pcicfg_ea ea; /* Enhanced Allocation */
} pcicfgregs;
/* additional type 1 device config header information (PCI to PCI bridge) */
-#define PCI_PPBMEMBASE(h,l) ((((pci_addr_t)(h) << 32) + ((l)<<16)) & ~0xfffff)
-#define PCI_PPBMEMLIMIT(h,l) ((((pci_addr_t)(h) << 32) + ((l)<<16)) | 0xfffff)
-#define PCI_PPBIOBASE(h,l) ((((h)<<16) + ((l)<<8)) & ~0xfff)
-#define PCI_PPBIOLIMIT(h,l) ((((h)<<16) + ((l)<<8)) | 0xfff)
-
typedef struct {
pci_addr_t pmembase; /* base address of prefetchable memory */
pci_addr_t pmemlimit; /* topmost address of prefetchable memory */
@@ -351,16 +402,16 @@ pci_get_vpd_ident(device_t dev, const char **identptr)
}
static __inline int
-pci_get_vpd_readonly(device_t dev, const char *kw, const char **identptr)
+pci_get_vpd_readonly(device_t dev, const char *kw, const char **vptr)
{
- return(PCI_GET_VPD_READONLY(device_get_parent(dev), dev, kw, identptr));
+ return(PCI_GET_VPD_READONLY(device_get_parent(dev), dev, kw, vptr));
}
/*
* Check if the address range falls within the VGA defined address range(s)
*/
static __inline int
-pci_is_vga_ioport_range(u_long start, u_long end)
+pci_is_vga_ioport_range(rman_res_t start, rman_res_t end)
{
return (((start >= 0x3b0 && end <= 0x3bb) ||
@@ -368,7 +419,7 @@ pci_is_vga_ioport_range(u_long start, u_long end)
}
static __inline int
-pci_is_vga_memory_range(u_long start, u_long end)
+pci_is_vga_memory_range(rman_res_t start, rman_res_t end)
{
return ((start >= 0xa0000 && end <= 0xbffff) ? 1 : 0);
@@ -410,7 +461,7 @@ pci_get_powerstate(device_t dev)
static __inline int
pci_find_cap(device_t dev, int capability, int *capreg)
{
- return (PCI_FIND_EXTCAP(device_get_parent(dev), dev, capability, capreg));
+ return (PCI_FIND_CAP(device_get_parent(dev), dev, capability, capreg));
}
static __inline int
@@ -420,6 +471,12 @@ pci_find_extcap(device_t dev, int capability, int *capreg)
}
static __inline int
+pci_find_htcap(device_t dev, int capability, int *capreg)
+{
+ return (PCI_FIND_HTCAP(device_get_parent(dev), dev, capability, capreg));
+}
+
+static __inline int
pci_alloc_msi(device_t dev, int *count)
{
return (PCI_ALLOC_MSI(device_get_parent(dev), dev, count));
@@ -431,6 +488,24 @@ pci_alloc_msix(device_t dev, int *count)
return (PCI_ALLOC_MSIX(device_get_parent(dev), dev, count));
}
+static __inline void
+pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
+{
+ PCI_ENABLE_MSI(device_get_parent(dev), dev, address, data);
+}
+
+static __inline void
+pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
+{
+ PCI_ENABLE_MSIX(device_get_parent(dev), dev, index, address, data);
+}
+
+static __inline void
+pci_disable_msi(device_t dev)
+{
+ PCI_DISABLE_MSI(device_get_parent(dev), dev);
+}
+
static __inline int
pci_remap_msix(device_t dev, int count, const u_int *vectors)
{
@@ -455,6 +530,47 @@ pci_msix_count(device_t dev)
return (PCI_MSIX_COUNT(device_get_parent(dev), dev));
}
+static __inline int
+pci_msix_pba_bar(device_t dev)
+{
+ return (PCI_MSIX_PBA_BAR(device_get_parent(dev), dev));
+}
+
+static __inline int
+pci_msix_table_bar(device_t dev)
+{
+ return (PCI_MSIX_TABLE_BAR(device_get_parent(dev), dev));
+}
+
+static __inline int
+pci_get_id(device_t dev, enum pci_id_type type, uintptr_t *id)
+{
+ return (PCI_GET_ID(device_get_parent(dev), dev, type, id));
+}
+
+/*
+ * This is the deprecated interface, there is no way to tell the difference
+ * between a failure and a valid value that happens to be the same as the
+ * failure value.
+ */
+static __inline uint16_t
+pci_get_rid(device_t dev)
+{
+ uintptr_t rid;
+
+ if (pci_get_id(dev, PCI_ID_RID, &rid) != 0)
+ return (0);
+
+ return (rid);
+}
+
+static __inline void
+pci_child_added(device_t dev)
+{
+
+ return (PCI_CHILD_ADDED(device_get_parent(dev), dev));
+}
+
device_t pci_find_bsf(uint8_t, uint8_t, uint8_t);
device_t pci_find_dbsf(uint32_t, uint8_t, uint8_t, uint8_t);
device_t pci_find_device(uint16_t, uint16_t);
@@ -468,10 +584,25 @@ int pci_msix_device_blacklisted(device_t dev);
void pci_ht_map_msi(device_t dev, uint64_t addr);
+device_t pci_find_pcie_root_port(device_t dev);
+int pci_get_max_payload(device_t dev);
int pci_get_max_read_req(device_t dev);
void pci_restore_state(device_t dev);
void pci_save_state(device_t dev);
int pci_set_max_read_req(device_t dev, int size);
+uint32_t pcie_read_config(device_t dev, int reg, int width);
+void pcie_write_config(device_t dev, int reg, uint32_t value, int width);
+uint32_t pcie_adjust_config(device_t dev, int reg, uint32_t mask,
+ uint32_t value, int width);
+
+
+#ifdef BUS_SPACE_MAXADDR
+#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
+#define PCI_DMA_BOUNDARY 0x100000000
+#else
+#define PCI_DMA_BOUNDARY 0
+#endif
+#endif
#endif /* _SYS_BUS_H_ */
@@ -498,5 +629,6 @@ struct pcicfg_vpd *pci_fetch_vpd_list(device_t dev);
int vga_pci_is_boot_display(device_t dev);
void * vga_pci_map_bios(device_t dev, size_t *size);
void vga_pci_unmap_bios(device_t dev, void *bios);
+int vga_pci_repost(device_t dev);
#endif /* _PCIVAR_H_ */
diff --git a/freebsd/sys/dev/random/harvest.c b/freebsd/sys/dev/random/harvest.c
deleted file mode 100644
index f42d35d6..00000000
--- a/freebsd/sys/dev/random/harvest.c
+++ /dev/null
@@ -1,137 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 2000-2004 Mark R V Murray
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer
- * in this position and unchanged.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/kthread.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/poll.h>
-#include <sys/queue.h>
-#include <sys/random.h>
-#include <sys/selinfo.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-
-#include <machine/cpu.h>
-
-#include <dev/random/randomdev_soft.h>
-
-static int read_random_phony(void *, int);
-
-/* Structure holding the desired entropy sources */
-struct harvest_select harvest = { 1, 1, 1, 0 };
-
-/* hold the address of the routine which is actually called if
- * the randomdev is loaded
- */
-static void (*reap_func)(u_int64_t, const void *, u_int, u_int, u_int,
- enum esource) = NULL;
-static int (*read_func)(void *, int) = read_random_phony;
-
-/* Initialise the harvester at load time */
-void
-random_yarrow_init_harvester(void (*reaper)(u_int64_t, const void *, u_int,
- u_int, u_int, enum esource), int (*reader)(void *, int))
-{
- reap_func = reaper;
- read_func = reader;
-}
-
-/* Deinitialise the harvester at unload time */
-void
-random_yarrow_deinit_harvester(void)
-{
- reap_func = NULL;
- read_func = read_random_phony;
-}
-
-/* Entropy harvesting routine. This is supposed to be fast; do
- * not do anything slow in here!
- * Implemented as in indirect call to allow non-inclusion of
- * the entropy device.
- *
- * XXXRW: get_cyclecount() is cheap on most modern hardware, where cycle
- * counters are built in, but on older hardware it will do a real time clock
- * read which can be quite expensive.
- */
-void
-random_harvest(void *entropy, u_int count, u_int bits, u_int frac,
- enum esource origin)
-{
- if (reap_func)
- (*reap_func)(get_cyclecount(), entropy, count, bits, frac,
- origin);
-}
-
-/* Userland-visible version of read_random */
-int
-read_random(void *buf, int count)
-{
- return ((*read_func)(buf, count));
-}
-
-/* If the entropy device is not loaded, make a token effort to
- * provide _some_ kind of randomness. This should only be used
- * inside other RNG's, like arc4random(9).
- */
-static int
-read_random_phony(void *buf, int count)
-{
- u_long randval;
- int size, i;
-
- /* srandom() is called in kern/init_main.c:proc0_post() */
-
- /* Fill buf[] with random(9) output */
- for (i = 0; i < count; i+= (int)sizeof(u_long)) {
- randval = random();
- size = MIN(count - i, sizeof(u_long));
- memcpy(&((char *)buf)[i], &randval, (size_t)size);
- }
-
- return (count);
-}
-
-/* Helper routine to enable kproc_exit() to work while the module is
- * being (or has been) unloaded.
- * This routine is in this file because it is always linked into the kernel,
- * and will thus never be unloaded. This is critical for unloadable modules
- * that have threads.
- */
-void
-random_set_wakeup_exit(void *control)
-{
- wakeup(control);
- kproc_exit(0);
- /* NOTREACHED */
-}
diff --git a/freebsd/sys/dev/random/randomdev_soft.h b/freebsd/sys/dev/random/randomdev_soft.h
deleted file mode 100644
index 489d45a3..00000000
--- a/freebsd/sys/dev/random/randomdev_soft.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*-
- * Copyright (c) 2000-2004 Mark R V Murray
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer
- * in this position and unchanged.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-/* This header contains only those definitions that are global
- * and harvester-specific for the entropy processor
- */
-
-/* #define ENTROPYSOURCE nn entropy sources (actually classes)
- * This is properly defined in
- * an enum in sys/random.h
- */
-
-/* Cryptographic block size in bits */
-#define BLOCKSIZE 256
-
-/* The ring size _MUST_ be a power of 2 */
-#define HARVEST_RING_SIZE 1024 /* harvest ring buffer size */
-#define HARVEST_RING_MASK (HARVEST_RING_SIZE - 1)
-
-#define HARVESTSIZE 16 /* max size of each harvested entropy unit */
-
-MALLOC_DECLARE(M_ENTROPY);
-
-/* These are used to queue harvested packets of entropy. The entropy
- * buffer size is pretty arbitrary.
- */
-struct harvest {
- uintmax_t somecounter; /* fast counter for clock jitter */
- u_char entropy[HARVESTSIZE]; /* the harvested entropy */
- u_int size, bits, frac; /* stats about the entropy */
- enum esource source; /* stats about the entropy */
- STAILQ_ENTRY(harvest) next; /* next item on the list */
-};
-
-void random_yarrow_init(void);
-void random_yarrow_deinit(void);
-
-int random_yarrow_read(void *, int);
-void random_yarrow_write(void *, int);
-
-void random_yarrow_init_harvester(void (*)(u_int64_t, const void *, u_int,
- u_int, u_int, enum esource), int (*)(void *, int));
-void random_yarrow_deinit_harvester(void);
-
-void random_set_wakeup_exit(void *);
-void random_process_event(struct harvest *event);
-void random_yarrow_reseed(void);
-void random_yarrow_unblock(void);
-
-void random_yarrow_init_alg(struct sysctl_ctx_list *, struct sysctl_oid *);
-void random_yarrow_deinit_alg(void);
-
-extern struct random_systat random_yarrow;
-extern struct mtx random_reseed_mtx;
-
-/* If this was c++, this would be a template */
-#define RANDOM_CHECK_UINT(name, min, max) \
-static int \
-random_check_uint_##name(SYSCTL_HANDLER_ARGS) \
-{ \
- if (oidp->oid_arg1 != NULL) { \
- if (*(u_int *)(oidp->oid_arg1) <= (min)) \
- *(u_int *)(oidp->oid_arg1) = (min); \
- else if (*(u_int *)(oidp->oid_arg1) > (max)) \
- *(u_int *)(oidp->oid_arg1) = (max); \
- } \
- return sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, \
- req); \
-}
diff --git a/freebsd/sys/dev/re/if_re.c b/freebsd/sys/dev/re/if_re.c
index b574f9a2..554af66a 100644
--- a/freebsd/sys/dev/re/if_re.c
+++ b/freebsd/sys/dev/re/if_re.c
@@ -129,6 +129,7 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_dl.h>
@@ -149,7 +150,7 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
-#include <pci/if_rlreg.h>
+#include <dev/rl/if_rlreg.h>
MODULE_DEPEND(re, pci, 1, 1, 1);
MODULE_DEPEND(re, ether, 1, 1, 1);
@@ -239,6 +240,7 @@ static const struct rl_hwrev re_hwrevs[] = {
{ RL_HWREV_8168F, RL_8169, "8168F/8111F", RL_JUMBO_MTU_9K},
{ RL_HWREV_8168G, RL_8169, "8168G/8111G", RL_JUMBO_MTU_9K},
{ RL_HWREV_8168GU, RL_8169, "8168GU/8111GU", RL_JUMBO_MTU_9K},
+ { RL_HWREV_8168H, RL_8169, "8168H/8111H", RL_JUMBO_MTU_9K},
{ RL_HWREV_8411, RL_8169, "8411", RL_JUMBO_MTU_9K},
{ RL_HWREV_8411B, RL_8169, "8411B", RL_JUMBO_MTU_9K},
{ 0, 0, NULL, 0 }
@@ -305,6 +307,7 @@ static void re_set_linkspeed (struct rl_softc *);
#ifdef DEV_NETMAP /* see ixgbe.c for details */
#include <dev/netmap/if_re_netmap.h>
+MODULE_DEPEND(re, netmap, 1, 1, 1);
#endif /* !DEV_NETMAP */
#ifdef RE_DIAG
@@ -635,9 +638,8 @@ re_miibus_statchg(device_t dev)
}
}
/*
- * RealTek controllers does not provide any interface to
- * Tx/Rx MACs for resolved speed, duplex and flow-control
- * parameters.
+ * RealTek controllers do not provide any interface to the RX/TX
+ * MACs for resolved speed, duplex and flow-control parameters.
*/
}
@@ -659,7 +661,7 @@ re_set_rxmode(struct rl_softc *sc)
rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
if ((sc->rl_flags & RL_FLAG_EARLYOFF) != 0)
rxfilt |= RL_RXCFG_EARLYOFF;
- else if ((sc->rl_flags & RL_FLAG_EARLYOFFV2) != 0)
+ else if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0)
rxfilt |= RL_RXCFG_EARLYOFFV2;
if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
@@ -953,7 +955,7 @@ re_probe(device_t dev)
}
t = re_devs;
- for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
+ for (i = 0; i < nitems(re_devs); i++, t++) {
if (vendor == t->rl_vid && devid == t->rl_did) {
device_set_desc(dev, t->rl_name);
return (BUS_PROBE_DEFAULT);
@@ -1206,11 +1208,10 @@ re_attach(device_t dev)
struct rl_softc *sc;
struct ifnet *ifp;
const struct rl_hwrev *hw_rev;
+ int capmask, error = 0, hwrev, i, msic, msixc,
+ phy, reg, rid;
u_int32_t cap, ctl;
- int hwrev;
u_int16_t devid, re_did = 0;
- int error = 0, i, phy, rid;
- int msic, msixc, reg;
uint8_t cfg;
sc = device_get_softc(dev);
@@ -1338,7 +1339,7 @@ re_attach(device_t dev)
SYS_RES_IRQ, &rid, RF_ACTIVE);
if (sc->rl_irq[i] == NULL) {
device_printf(dev,
- "couldn't llocate IRQ resources for "
+ "couldn't allocate IRQ resources for "
"message %d\n", rid);
error = ENXIO;
goto fail;
@@ -1364,8 +1365,8 @@ re_attach(device_t dev)
if ((cap & PCIEM_LINK_CAP_ASPM) != 0) {
ctl = pci_read_config(dev, sc->rl_expcap +
PCIER_LINK_CTL, 2);
- if ((ctl & 0x0003) != 0) {
- ctl &= ~0x0003;
+ if ((ctl & PCIEM_LINK_CTL_ASPMC) != 0) {
+ ctl &= ~PCIEM_LINK_CTL_ASPMC;
pci_write_config(dev, sc->rl_expcap +
PCIER_LINK_CTL, ctl, 2);
device_printf(dev, "ASPM disabled\n");
@@ -1490,11 +1491,12 @@ re_attach(device_t dev)
RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 |
RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK |
- RL_FLAG_EARLYOFFV2 | RL_FLAG_RXDV_GATED;
+ RL_FLAG_8168G_PLUS;
break;
case RL_HWREV_8168GU:
+ case RL_HWREV_8168H:
if (pci_get_device(dev) == RT_DEVICEID_8101E) {
- /* RTL8106EUS */
+ /* RTL8106E(US), RTL8107E */
sc->rl_flags |= RL_FLAG_FASTETHER;
} else
sc->rl_flags |= RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK;
@@ -1502,7 +1504,7 @@ re_attach(device_t dev)
sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ |
- RL_FLAG_EARLYOFFV2 | RL_FLAG_RXDV_GATED;
+ RL_FLAG_8168G_PLUS;
break;
case RL_HWREV_8169_8110SB:
case RL_HWREV_8169_8110SBL:
@@ -1652,8 +1654,11 @@ re_attach(device_t dev)
phy = RE_PHYAD_INTERNAL;
if (sc->rl_type == RL_8169)
phy = 1;
+ capmask = BMSR_DEFCAPMASK;
+ if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
+ capmask &= ~BMSR_EXTSTAT;
error = mii_attach(dev, &sc->rl_miibus, ifp, re_ifmedia_upd,
- re_ifmedia_sts, BMSR_DEFCAPMASK, phy, MII_OFFSET_ANY, MIIF_DOPAUSE);
+ re_ifmedia_sts, capmask, phy, MII_OFFSET_ANY, MIIF_DOPAUSE);
if (error != 0) {
device_printf(dev, "attaching PHYs failed\n");
goto fail;
@@ -1676,7 +1681,7 @@ re_attach(device_t dev)
/*
* Don't enable TSO by default. It is known to generate
* corrupted TCP segments(bad TCP options) under certain
- * circumtances.
+ * circumstances.
*/
ifp->if_hwassist &= ~CSUM_TSO;
ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO);
@@ -1688,18 +1693,18 @@ re_attach(device_t dev)
* Must appear after the call to ether_ifattach() because
* ether_ifattach() sets ifi_hdrlen to the default value.
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+ ifp->if_hdrlen = sizeof(struct ether_vlan_header);
#ifdef DEV_NETMAP
re_netmap_attach(sc);
#endif /* DEV_NETMAP */
+
#ifdef RE_DIAG
/*
* Perform hardware diagnostic on the original RTL8169.
* Some 32-bit cards were incorrectly wired and would
* malfunction if plugged into a 64-bit slot.
*/
-
if (hwrev == RL_HWREV_8169) {
error = re_diag(sc);
if (error) {
@@ -1731,7 +1736,6 @@ re_attach(device_t dev)
}
fail:
-
if (error)
re_detach(dev);
@@ -1825,10 +1829,10 @@ re_detach(device_t dev)
/* Unload and free the RX DMA ring memory and map */
if (sc->rl_ldata.rl_rx_list_tag) {
- if (sc->rl_ldata.rl_rx_list_map)
+ if (sc->rl_ldata.rl_rx_list_addr)
bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list_map);
- if (sc->rl_ldata.rl_rx_list_map && sc->rl_ldata.rl_rx_list)
+ if (sc->rl_ldata.rl_rx_list)
bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list,
sc->rl_ldata.rl_rx_list_map);
@@ -1838,10 +1842,10 @@ re_detach(device_t dev)
/* Unload and free the TX DMA ring memory and map */
if (sc->rl_ldata.rl_tx_list_tag) {
- if (sc->rl_ldata.rl_tx_list_map)
+ if (sc->rl_ldata.rl_tx_list_addr)
bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map);
- if (sc->rl_ldata.rl_tx_list_map && sc->rl_ldata.rl_tx_list)
+ if (sc->rl_ldata.rl_tx_list)
bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list,
sc->rl_ldata.rl_tx_list_map);
@@ -1883,10 +1887,10 @@ re_detach(device_t dev)
/* Unload and free the stats buffer and map */
if (sc->rl_ldata.rl_stag) {
- if (sc->rl_ldata.rl_smap)
+ if (sc->rl_ldata.rl_stats_addr)
bus_dmamap_unload(sc->rl_ldata.rl_stag,
sc->rl_ldata.rl_smap);
- if (sc->rl_ldata.rl_smap && sc->rl_ldata.rl_stats)
+ if (sc->rl_ldata.rl_stats)
bus_dmamem_free(sc->rl_ldata.rl_stag,
sc->rl_ldata.rl_stats, sc->rl_ldata.rl_smap);
bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
@@ -2248,7 +2252,7 @@ re_rxeof(struct rl_softc *sc, int *rx_npktsp)
(rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)
rxerr = 0;
if (rxerr != 0) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
/*
* If this is part of a multi-fragment packet,
* discard all the pieces.
@@ -2271,7 +2275,7 @@ re_rxeof(struct rl_softc *sc, int *rx_npktsp)
else
rxerr = re_newbuf(sc, i);
if (rxerr != 0) {
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
if (sc->rl_head != NULL) {
m_freem(sc->rl_head);
sc->rl_head = sc->rl_tail = NULL;
@@ -2313,7 +2317,7 @@ re_rxeof(struct rl_softc *sc, int *rx_npktsp)
#ifdef RE_FIXUP_RX
re_fixup_rx(m);
#endif
- ifp->if_ipackets++;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
m->m_pkthdr.rcvif = ifp;
/* Do RX checksumming if enabled */
@@ -2432,11 +2436,11 @@ re_txeof(struct rl_softc *sc)
txd->tx_m = NULL;
if (txstat & (RL_TDESC_STAT_EXCESSCOL|
RL_TDESC_STAT_COLCNT))
- ifp->if_collisions++;
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
if (txstat & RL_TDESC_STAT_TXERRSUM)
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
else
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
}
sc->rl_ldata.rl_tx_free++;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
@@ -2551,7 +2555,7 @@ re_intr(void *arg)
return (FILTER_STRAY);
CSR_WRITE_2(sc, RL_IMR, 0);
- taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
+ taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask);
return (FILTER_HANDLED);
}
@@ -2619,7 +2623,7 @@ re_int_task(void *arg, int npending)
RL_UNLOCK(sc);
if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
- taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
+ taskqueue_enqueue(taskqueue_fast, &sc->rl_inttask);
return;
}
@@ -2827,7 +2831,7 @@ re_encap(struct rl_softc *sc, struct mbuf **m_head)
/*
* Unconditionally enable IP checksum if TCP or UDP
* checksum is required. Otherwise, TCP/UDP checksum
- * does't make effects.
+ * doesn't make effects.
*/
if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
@@ -2937,6 +2941,7 @@ re_start_locked(struct ifnet *ifp)
return;
}
#endif /* DEV_NETMAP */
+
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0)
return;
@@ -3192,14 +3197,18 @@ re_init_locked(struct rl_softc *sc)
CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
- if ((sc->rl_flags & RL_FLAG_RXDV_GATED) != 0)
+ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
+ /* Disable RXDV gate. */
CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) &
~0x00080000);
+ }
/*
- * Enable transmit and receive.
+ * Enable transmit and receive for pre-RTL8168G controllers.
+ * RX/TX MACs should be enabled before RX/TX configuration.
*/
- CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
+ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) == 0)
+ CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB);
/*
* Set the initial TX configuration.
@@ -3227,6 +3236,13 @@ re_init_locked(struct rl_softc *sc)
CSR_WRITE_2(sc, RL_INTRMOD, 0x5100);
}
+ /*
+ * Enable transmit and receive for RTL8168G and later controllers.
+ * RX/TX MACs should be enabled after RX/TX configuration.
+ */
+ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0)
+ CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB);
+
#ifdef DEVICE_POLLING
/*
* Disable interrupts if we are polling.
@@ -3250,10 +3266,6 @@ re_init_locked(struct rl_softc *sc)
/* Start RX/TX process. */
CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
-#ifdef notdef
- /* Enable receiver and transmitter. */
- CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
-#endif
/*
* Initialize the timer interrupt register so that
@@ -3294,7 +3306,7 @@ re_init_locked(struct rl_softc *sc)
if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
/*
* For controllers that use new jumbo frame scheme,
- * set maximum size of jumbo frame depedning on
+ * set maximum size of jumbo frame depending on
* controller revisions.
*/
if (ifp->if_mtu > RL_MTU)
@@ -3546,7 +3558,7 @@ re_watchdog(struct rl_softc *sc)
}
if_printf(ifp, "watchdog timeout\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
re_rxeof(sc, NULL);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
@@ -3585,6 +3597,12 @@ re_stop(struct rl_softc *sc)
~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_MULTI |
RL_RXCFG_RX_BROAD));
+ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
+ /* Enable RXDV gate. */
+ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) |
+ 0x00080000);
+ }
+
if ((sc->rl_flags & RL_FLAG_WAIT_TXPOLL) != 0) {
for (i = RL_TIMEOUT; i > 0; i--) {
if ((CSR_READ_1(sc, sc->rl_txstart) &
@@ -3836,6 +3854,11 @@ re_setwol(struct rl_softc *sc)
CSR_READ_1(sc, RL_GPIO) & ~0x01);
}
if ((ifp->if_capenable & IFCAP_WOL) != 0) {
+ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) {
+ /* Disable RXDV gate. */
+ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) &
+ ~0x00080000);
+ }
re_set_rxmode(sc);
if ((sc->rl_flags & RL_FLAG_WOL_MANLINK) != 0)
re_set_linkspeed(sc);
@@ -3948,7 +3971,6 @@ re_add_sysctls(struct rl_softc *sc)
sc->rl_int_rx_mod = RL_TIMER_DEFAULT;
}
}
-
}
static int
@@ -3990,7 +4012,7 @@ re_sysctl_stats(SYSCTL_HANDLER_ARGS)
RL_UNLOCK(sc);
if (i == 0) {
device_printf(sc->rl_dev,
- "DUMP statistics request timedout\n");
+ "DUMP statistics request timed out\n");
return (ETIMEDOUT);
}
done:
diff --git a/freebsd/sys/pci/if_rlreg.h b/freebsd/sys/dev/rl/if_rlreg.h
index b0de60f4..2cef251b 100644
--- a/freebsd/sys/pci/if_rlreg.h
+++ b/freebsd/sys/dev/rl/if_rlreg.h
@@ -195,6 +195,7 @@
#define RL_HWREV_8168G 0x4C000000
#define RL_HWREV_8168EP 0x50000000
#define RL_HWREV_8168GU 0x50800000
+#define RL_HWREV_8168H 0x54000000
#define RL_HWREV_8411B 0x5C800000
#define RL_HWREV_8139 0x60000000
#define RL_HWREV_8139A 0x70000000
@@ -680,7 +681,7 @@ struct rl_desc {
#define RL_TDESC_STAT_LINKFAIL 0x00200000 /* link faulure */
#define RL_TDESC_STAT_OWINCOL 0x00400000 /* out-of-window collision */
#define RL_TDESC_STAT_TXERRSUM 0x00800000 /* transmit error summary */
-#define RL_TDESC_STAT_UNDERRUN 0x02000000 /* TX underrun occured */
+#define RL_TDESC_STAT_UNDERRUN 0x02000000 /* TX underrun occurred */
#define RL_TDESC_STAT_OWN 0x80000000
/*
@@ -930,8 +931,7 @@ struct rl_softc {
#define RL_FLAG_CMDSTOP_WAIT_TXQ 0x00008000
#define RL_FLAG_WOL_MANLINK 0x00010000
#define RL_FLAG_EARLYOFF 0x00020000
-#define RL_FLAG_EARLYOFFV2 0x00040000
-#define RL_FLAG_RXDV_GATED 0x00080000
+#define RL_FLAG_8168G_PLUS 0x00040000
#define RL_FLAG_PCIE 0x40000000
#define RL_FLAG_LINK 0x80000000
};
diff --git a/freebsd/sys/dev/smc/if_smc.c b/freebsd/sys/dev/smc/if_smc.c
index 8d3740cc..37f89a51 100644
--- a/freebsd/sys/dev/smc/if_smc.c
+++ b/freebsd/sys/dev/smc/if_smc.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_types.h>
@@ -235,7 +236,7 @@ smc_probe(device_t dev)
if (sc->smc_usemem)
type = SYS_RES_MEMORY;
- reg = bus_alloc_resource(dev, type, &rid, 0, ~0, 16, RF_ACTIVE);
+ reg = bus_alloc_resource_anywhere(dev, type, &rid, 16, RF_ACTIVE);
if (reg == NULL) {
if (bootverbose)
device_printf(dev,
@@ -329,15 +330,15 @@ smc_attach(device_t dev)
type = SYS_RES_MEMORY;
sc->smc_reg_rid = 0;
- sc->smc_reg = bus_alloc_resource(dev, type, &sc->smc_reg_rid, 0, ~0,
+ sc->smc_reg = bus_alloc_resource_anywhere(dev, type, &sc->smc_reg_rid,
16, RF_ACTIVE);
if (sc->smc_reg == NULL) {
error = ENXIO;
goto done;
}
- sc->smc_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->smc_irq_rid, 0,
- ~0, 1, RF_ACTIVE | RF_SHAREABLE);
+ sc->smc_irq = bus_alloc_resource_anywhere(dev, SYS_RES_IRQ,
+ &sc->smc_irq_rid, 1, RF_ACTIVE | RF_SHAREABLE);
if (sc->smc_irq == NULL) {
error = ENXIO;
goto done;
@@ -513,7 +514,7 @@ smc_start_locked(struct ifnet *ifp)
len += (len & 1);
if (len > ETHER_MAX_LEN - ETHER_CRC_LEN) {
if_printf(ifp, "large packet discarded\n");
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
return; /* XXX readcheck? */
}
@@ -528,7 +529,7 @@ smc_start_locked(struct ifnet *ifp)
* Work out how many 256 byte "pages" we need. We have to include the
* control data for the packet in this calculation.
*/
- npages = (len * PKT_CTRL_DATA_LEN) >> 8;
+ npages = (len + PKT_CTRL_DATA_LEN) >> 8;
if (npages == 0)
npages = 1;
@@ -561,7 +562,7 @@ smc_start_locked(struct ifnet *ifp)
return;
}
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_tx);
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_tx);
}
static void
@@ -599,7 +600,7 @@ smc_task_tx(void *context, int pending)
*/
if (packet & ARR_FAILED) {
IFQ_DRV_PREPEND(&ifp->if_snd, m);
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
smc_start_locked(ifp);
SMC_UNLOCK(sc);
@@ -656,7 +657,7 @@ smc_task_tx(void *context, int pending)
/*
* Finish up.
*/
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
SMC_UNLOCK(sc);
BPF_MTAP(ifp, m0);
@@ -694,8 +695,7 @@ smc_task_rx(void *context, int pending)
if (m == NULL) {
break;
}
- MCLGET(m, M_NOWAIT);
- if ((m->m_flags & M_EXT) == 0) {
+ if (!(MCLGET(m, M_NOWAIT))) {
m_freem(m);
break;
}
@@ -722,7 +722,7 @@ smc_task_rx(void *context, int pending)
if (status & (RX_TOOSHORT | RX_TOOLNG | RX_BADCRC | RX_ALGNERR)) {
smc_mmu_wait(sc);
smc_write_2(sc, MMUCR, MMUCR_CMD_RELEASE);
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
break;
}
@@ -778,7 +778,7 @@ smc_task_rx(void *context, int pending)
m = mhead;
mhead = mhead->m_next;
m->m_next = NULL;
- ifp->if_ipackets++;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
(*ifp->if_input)(ifp, m);
}
}
@@ -799,7 +799,7 @@ smc_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
SMC_UNLOCK(sc);
if (cmd == POLL_AND_CHECK_STATUS)
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_intr);
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_intr);
}
#endif
@@ -807,15 +807,31 @@ static int
smc_intr(void *context)
{
struct smc_softc *sc;
-
+ uint32_t curbank;
+
sc = (struct smc_softc *)context;
#ifdef __rtems__
SMC_LOCK(sc);
+#endif /* __rtems__ */
+
+ /*
+ * Save current bank and restore later in this function
+ */
+ curbank = (smc_read_2(sc, BSR) & BSR_BANK_MASK);
+
+ /*
+ * Block interrupts in order to let smc_task_intr to kick in
+ */
smc_select_bank(sc, 2);
smc_write_1(sc, MSK, 0);
+
+ /* Restore bank */
+ smc_select_bank(sc, curbank);
+#ifdef __rtems__
SMC_UNLOCK(sc);
#endif /* __rtems__ */
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_intr);
+
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_intr);
return (FILTER_HANDLED);
}
@@ -835,13 +851,6 @@ smc_task_intr(void *context, int pending)
smc_select_bank(sc, 2);
/*
- * Get the current mask, and then block all interrupts while we're
- * working.
- */
- if ((ifp->if_capenable & IFCAP_POLLING) == 0)
- smc_write_1(sc, MSK, 0);
-
- /*
* Find out what interrupts are flagged.
*/
status = smc_read_1(sc, IST) & sc->smc_mask;
@@ -855,13 +864,19 @@ smc_task_intr(void *context, int pending)
*/
packet = smc_read_1(sc, FIFO_TX);
if ((packet & FIFO_EMPTY) == 0) {
+ callout_stop(&sc->smc_watchdog);
+ smc_select_bank(sc, 2);
smc_write_1(sc, PNR, packet);
smc_write_2(sc, PTR, 0 | PTR_READ |
PTR_AUTO_INCR);
- tcr = smc_read_2(sc, DATA0);
+ smc_select_bank(sc, 0);
+ tcr = smc_read_2(sc, EPHSR);
+#if 0
if ((tcr & EPHSR_TX_SUC) == 0)
device_printf(sc->smc_dev,
"bad packet\n");
+#endif
+ smc_select_bank(sc, 2);
smc_mmu_wait(sc);
smc_write_2(sc, MMUCR, MMUCR_CMD_RELEASE_PKT);
@@ -870,7 +885,7 @@ smc_task_intr(void *context, int pending)
tcr |= TCR_TXENA | TCR_PAD_EN;
smc_write_2(sc, TCR, tcr);
smc_select_bank(sc, 2);
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_tx);
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_tx);
}
/*
@@ -885,7 +900,7 @@ smc_task_intr(void *context, int pending)
if (status & RCV_INT) {
smc_write_1(sc, ACK, RCV_INT);
sc->smc_mask &= ~RCV_INT;
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_rx);
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_rx);
}
/*
@@ -894,7 +909,7 @@ smc_task_intr(void *context, int pending)
if (status & ALLOC_INT) {
smc_write_1(sc, ACK, ALLOC_INT);
sc->smc_mask &= ~ALLOC_INT;
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_tx);
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_tx);
}
/*
@@ -902,7 +917,7 @@ smc_task_intr(void *context, int pending)
*/
if (status & RX_OVRN_INT) {
smc_write_1(sc, ACK, RX_OVRN_INT);
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
/*
@@ -919,20 +934,20 @@ smc_task_intr(void *context, int pending)
smc_select_bank(sc, 0);
counter = smc_read_2(sc, ECR);
smc_select_bank(sc, 2);
- ifp->if_collisions +=
- (counter & ECR_SNGLCOL_MASK) >> ECR_SNGLCOL_SHIFT;
- ifp->if_collisions +=
- (counter & ECR_MULCOL_MASK) >> ECR_MULCOL_SHIFT;
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS,
+ ((counter & ECR_SNGLCOL_MASK) >> ECR_SNGLCOL_SHIFT) +
+ ((counter & ECR_MULCOL_MASK) >> ECR_MULCOL_SHIFT));
/*
* See if there are any packets to transmit.
*/
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_tx);
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_tx);
}
/*
* Update the interrupt mask.
*/
+ smc_select_bank(sc, 2);
if ((ifp->if_capenable & IFCAP_POLLING) == 0)
smc_write_1(sc, MSK, sc->smc_mask);
@@ -1226,7 +1241,7 @@ smc_watchdog(void *arg)
sc = (struct smc_softc *)arg;
device_printf(sc->smc_dev, "watchdog timeout\n");
- taskqueue_enqueue_fast(sc->smc_tq, &sc->smc_intr);
+ taskqueue_enqueue(sc->smc_tq, &sc->smc_intr);
}
static void
@@ -1245,9 +1260,10 @@ smc_init_locked(struct smc_softc *sc)
{
struct ifnet *ifp;
- ifp = sc->smc_ifp;
-
SMC_ASSERT_LOCKED(sc);
+ ifp = sc->smc_ifp;
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
+ return;
smc_reset(sc);
smc_enable(sc);
diff --git a/freebsd/sys/dev/tsec/if_tsec.c b/freebsd/sys/dev/tsec/if_tsec.c
index 45b4716e..5b94af9c 100644
--- a/freebsd/sys/dev/tsec/if_tsec.c
+++ b/freebsd/sys/dev/tsec/if_tsec.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -115,6 +116,8 @@ DRIVER_MODULE(miibus, tsec, miibus_driver, miibus_devclass, 0, 0);
MODULE_DEPEND(tsec, ether, 1, 1, 1);
MODULE_DEPEND(tsec, miibus, 1, 1, 1);
+struct mtx tsec_phy_mtx;
+
int
tsec_attach(struct tsec_softc *sc)
{
@@ -125,6 +128,10 @@ tsec_attach(struct tsec_softc *sc)
int error = 0;
int i;
+ /* Initialize global (because potentially shared) MII lock */
+ if (!mtx_initialized(&tsec_phy_mtx))
+ mtx_init(&tsec_phy_mtx, "tsec mii", NULL, MTX_DEF);
+
/* Reset all TSEC counters */
TSEC_TX_RX_COUNTERS_INIT(sc);
@@ -251,7 +258,6 @@ tsec_attach(struct tsec_softc *sc)
ifp->if_softc = sc;
if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev));
- ifp->if_mtu = ETHERMTU;
ifp->if_flags = IFF_SIMPLEX | IFF_MULTICAST | IFF_BROADCAST;
ifp->if_init = tsec_init;
ifp->if_start = tsec_start;
@@ -420,21 +426,24 @@ tsec_init_locked(struct tsec_softc *sc)
*/
TSEC_WRITE(sc, TSEC_REG_TBIPA, 5);
+ TSEC_PHY_LOCK(sc);
+
/* Step 6: Reset the management interface */
- TSEC_WRITE(sc->phy_sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_RESETMGMT);
+ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_RESETMGMT);
/* Step 7: Setup the MII Mgmt clock speed */
- TSEC_WRITE(sc->phy_sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_CLKDIV28);
+ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_CLKDIV28);
/* Step 8: Read MII Mgmt indicator register and check for Busy = 0 */
timeout = TSEC_READ_RETRY;
- while (--timeout && (TSEC_READ(sc->phy_sc, TSEC_REG_MIIMIND) &
+ while (--timeout && (TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) &
TSEC_MIIMIND_BUSY))
DELAY(TSEC_READ_DELAY);
if (timeout == 0) {
if_printf(ifp, "tsec_init_locked(): Mgmt busy timeout\n");
return;
}
+ TSEC_PHY_UNLOCK(sc);
/* Step 9: Setup the MII Mgmt */
#ifdef __rtems__
@@ -568,7 +577,7 @@ tsec_set_mac_address(struct tsec_softc *sc)
TSEC_GLOBAL_LOCK_ASSERT(sc);
KASSERT((ETHER_ADDR_LEN <= sizeof(macbuf)),
- ("tsec_set_mac_address: (%d <= %d", ETHER_ADDR_LEN,
+ ("tsec_set_mac_address: (%d <= %zd", ETHER_ADDR_LEN,
sizeof(macbuf)));
macbufp = (char *)macbuf;
@@ -694,7 +703,7 @@ tsec_watchdog(struct tsec_softc *sc)
return;
ifp = sc->tsec_ifp;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if_printf(ifp, "watchdog timeout\n");
tsec_stop(sc);
@@ -1372,7 +1381,7 @@ tsec_receive_intr_locked(struct tsec_softc *sc, int count)
if (tsec_new_rxbuf(sc->tsec_rx_mtag, rx_data[i].map,
&rx_data[i].mbuf, &rx_data[i].paddr)) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
/*
* We ran out of mbufs; didn't consume current
* descriptor and have to return it to the queue.
@@ -1453,7 +1462,7 @@ tsec_transmit_intr_locked(struct tsec_softc *sc)
ifp = sc->tsec_ifp;
/* Update collision statistics */
- ifp->if_collisions += TSEC_READ(sc, TSEC_REG_MON_TNCL);
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, TSEC_READ(sc, TSEC_REG_MON_TNCL));
/* Reset collision counters in hardware */
TSEC_WRITE(sc, TSEC_REG_MON_TSCL, 0);
@@ -1488,7 +1497,7 @@ tsec_transmit_intr_locked(struct tsec_softc *sc)
TSEC_FREE_TX_MAP(sc, mapp);
m_freem(m0);
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
send = 1;
}
bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap,
@@ -1545,18 +1554,18 @@ tsec_error_intr_locked(struct tsec_softc *sc, int count)
/* Check transmitter errors */
if (eflags & TSEC_IEVENT_TXE) {
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if (eflags & TSEC_IEVENT_LC)
- ifp->if_collisions++;
+ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1);
TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT);
}
/* Check receiver errors */
if (eflags & TSEC_IEVENT_BSY) {
- ifp->if_ierrors++;
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
/* Get data from RX buffers */
tsec_receive_intr_locked(sc, count);
@@ -1573,10 +1582,10 @@ tsec_error_intr_locked(struct tsec_softc *sc, int count)
}
if (eflags & TSEC_IEVENT_BABT)
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if (eflags & TSEC_IEVENT_BABR)
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
void
@@ -1594,22 +1603,27 @@ tsec_miibus_readreg(device_t dev, int phy, int reg)
{
struct tsec_softc *sc;
uint32_t timeout;
+ int rv;
sc = device_get_softc(dev);
- TSEC_WRITE(sc->phy_sc, TSEC_REG_MIIMADD, (phy << 8) | reg);
- TSEC_WRITE(sc->phy_sc, TSEC_REG_MIIMCOM, 0);
- TSEC_WRITE(sc->phy_sc, TSEC_REG_MIIMCOM, TSEC_MIIMCOM_READCYCLE);
+ TSEC_PHY_LOCK();
+ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMADD, (phy << 8) | reg);
+ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, 0);
+ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, TSEC_MIIMCOM_READCYCLE);
timeout = TSEC_READ_RETRY;
- while (--timeout && TSEC_READ(sc->phy_sc, TSEC_REG_MIIMIND) &
+ while (--timeout && TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) &
(TSEC_MIIMIND_NOTVALID | TSEC_MIIMIND_BUSY))
DELAY(TSEC_READ_DELAY);
if (timeout == 0)
device_printf(dev, "Timeout while reading from PHY!\n");
- return (TSEC_READ(sc->phy_sc, TSEC_REG_MIIMSTAT));
+ rv = TSEC_PHY_READ(sc, TSEC_REG_MIIMSTAT);
+ TSEC_PHY_UNLOCK();
+
+ return (rv);
}
int
@@ -1620,13 +1634,15 @@ tsec_miibus_writereg(device_t dev, int phy, int reg, int value)
sc = device_get_softc(dev);
- TSEC_WRITE(sc->phy_sc, TSEC_REG_MIIMADD, (phy << 8) | reg);
- TSEC_WRITE(sc->phy_sc, TSEC_REG_MIIMCON, value);
+ TSEC_PHY_LOCK();
+ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMADD, (phy << 8) | reg);
+ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCON, value);
timeout = TSEC_READ_RETRY;
- while (--timeout && (TSEC_READ(sc->phy_sc, TSEC_REG_MIIMIND) &
+ while (--timeout && (TSEC_READ(sc, TSEC_REG_MIIMIND) &
TSEC_MIIMIND_BUSY))
DELAY(TSEC_READ_DELAY);
+ TSEC_PHY_UNLOCK();
if (timeout == 0)
device_printf(dev, "Timeout while writing to PHY!\n");
diff --git a/freebsd/sys/dev/tsec/if_tsec.h b/freebsd/sys/dev/tsec/if_tsec.h
index e4bbc9ae..108b0f21 100644
--- a/freebsd/sys/dev/tsec/if_tsec.h
+++ b/freebsd/sys/dev/tsec/if_tsec.h
@@ -62,12 +62,12 @@ struct tsec_softc {
bus_dma_tag_t tsec_tx_dtag; /* TX descriptors tag */
bus_dmamap_t tsec_tx_dmap; /* TX descriptors map */
struct tsec_desc *tsec_tx_vaddr;/* vadress of TX descriptors */
- uint32_t tsec_tx_raddr; /* real adress of TX descriptors */
+ uint32_t tsec_tx_raddr; /* real address of TX descriptors */
bus_dma_tag_t tsec_rx_dtag; /* RX descriptors tag */
bus_dmamap_t tsec_rx_dmap; /* RX descriptors map */
struct tsec_desc *tsec_rx_vaddr; /* vadress of RX descriptors */
- uint32_t tsec_rx_raddr; /* real adress of RX descriptors */
+ uint32_t tsec_rx_raddr; /* real address of RX descriptors */
bus_dma_tag_t tsec_tx_mtag; /* TX mbufs tag */
bus_dma_tag_t tsec_rx_mtag; /* TX mbufs tag */
@@ -75,7 +75,7 @@ struct tsec_softc {
struct rx_data_type {
bus_dmamap_t map; /* mbuf map */
struct mbuf *mbuf;
- uint32_t paddr; /* DMA addres of buffer */
+ uint32_t paddr; /* DMA address of buffer */
} rx_data[TSEC_RX_NUM_DESC];
uint32_t tx_cur_desc_cnt;
@@ -135,7 +135,8 @@ struct tsec_softc {
struct mbuf *frame;
int phyaddr;
- struct tsec_softc *phy_sc;
+ bus_space_tag_t phy_bst;
+ bus_space_handle_t phy_bsh;
};
/* interface to get/put generic objects */
@@ -255,6 +256,14 @@ struct tsec_softc {
#define TSEC_WRITE(sc, reg, val) \
bus_space_write_4((sc)->sc_bas.bst, (sc)->sc_bas.bsh, (reg), (val))
+extern struct mtx tsec_phy_mtx;
+#define TSEC_PHY_LOCK(sc) mtx_lock(&tsec_phy_mtx)
+#define TSEC_PHY_UNLOCK(sc) mtx_unlock(&tsec_phy_mtx)
+#define TSEC_PHY_READ(sc, reg) \
+ bus_space_read_4((sc)->phy_bst, (sc)->phy_bsh, (reg))
+#define TSEC_PHY_WRITE(sc, reg, val) \
+ bus_space_write_4((sc)->phy_bst, (sc)->phy_bsh, (reg), (val))
+
/* Lock for transmitter */
#define TSEC_TRANSMIT_LOCK(sc) do { \
mtx_assert(&(sc)->receive_lock, MA_NOTOWNED); \
diff --git a/freebsd/sys/dev/tsec/if_tsecreg.h b/freebsd/sys/dev/tsec/if_tsecreg.h
index 4ba1997e..1994298c 100644
--- a/freebsd/sys/dev/tsec/if_tsecreg.h
+++ b/freebsd/sys/dev/tsec/if_tsecreg.h
@@ -77,12 +77,13 @@
* register */
#define TSEC_REG_HAFDUP 0x50c /* Half-duplex register */
#define TSEC_REG_MAXFRM 0x510 /* Maximum frame length register */
-#define TSEC_REG_MIIMCFG 0x520 /* MII Management configuration register */
-#define TSEC_REG_MIIMCOM 0x524 /* MII Management command register */
-#define TSEC_REG_MIIMADD 0x528 /* MII Management address register */
-#define TSEC_REG_MIIMCON 0x52c /* MII Management control register */
-#define TSEC_REG_MIIMSTAT 0x530 /* MII Management status register */
-#define TSEC_REG_MIIMIND 0x534 /* MII Management indicator register */
+#define TSEC_REG_MIIBASE 0x520 /* MII Management base, rest offsets */
+#define TSEC_REG_MIIMCFG 0x0 /* MII Management configuration register */
+#define TSEC_REG_MIIMCOM 0x4 /* MII Management command register */
+#define TSEC_REG_MIIMADD 0x8 /* MII Management address register */
+#define TSEC_REG_MIIMCON 0xc /* MII Management control register */
+#define TSEC_REG_MIIMSTAT 0x10 /* MII Management status register */
+#define TSEC_REG_MIIMIND 0x14 /* MII Management indicator register */
#define TSEC_REG_IFSTAT 0x53c /* Interface status register */
#define TSEC_REG_MACSTNADDR1 0x540 /* Station address register, part 1 */
#define TSEC_REG_MACSTNADDR2 0x544 /* Station address register, part 2 */
diff --git a/freebsd/sys/fs/devfs/devfs_int.h b/freebsd/sys/fs/devfs/devfs_int.h
index 1f4f3e69..670aba16 100644
--- a/freebsd/sys/fs/devfs/devfs_int.h
+++ b/freebsd/sys/fs/devfs/devfs_int.h
@@ -72,7 +72,7 @@ struct cdev_priv {
#endif /* __rtems__ */
};
-#define cdev2priv(c) member2struct(cdev_priv, cdp_c, c)
+#define cdev2priv(c) __containerof(c, struct cdev_priv, cdp_c)
struct cdev *devfs_alloc(int);
int devfs_dev_exists(const char *);
diff --git a/freebsd/sys/h8300/include/machine/in_cksum.h b/freebsd/sys/h8300/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/h8300/include/machine/in_cksum.h
+++ b/freebsd/sys/h8300/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/i386/include/machine/cpufunc.h b/freebsd/sys/i386/include/machine/cpufunc.h
index 1c519920..df283e73 100644
--- a/freebsd/sys/i386/include/machine/cpufunc.h
+++ b/freebsd/sys/i386/include/machine/cpufunc.h
@@ -42,17 +42,6 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
-#ifdef XEN
-extern void xen_cli(void);
-extern void xen_sti(void);
-extern u_int xen_rcr2(void);
-extern void xen_load_cr3(u_int data);
-extern void xen_tlb_flush(void);
-extern void xen_invlpg(u_int addr);
-extern void write_eflags(u_int eflags);
-extern u_int read_eflags(void);
-#endif
-
struct region_descriptor;
#define readb(va) (*(volatile uint8_t *) (va))
@@ -99,6 +88,13 @@ clflush(u_long addr)
}
static __inline void
+clflushopt(u_long addr)
+{
+
+ __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr));
+}
+
+static __inline void
clts(void)
{
@@ -108,11 +104,8 @@ clts(void)
static __inline void
disable_intr(void)
{
-#ifdef XEN
- xen_cli();
-#else
+
__asm __volatile("cli" : : : "memory");
-#endif
}
static __inline void
@@ -134,11 +127,8 @@ cpuid_count(u_int ax, u_int cx, u_int *p)
static __inline void
enable_intr(void)
{
-#ifdef XEN
- xen_sti();
-#else
+
__asm __volatile("sti");
-#endif
}
static __inline void
@@ -187,6 +177,14 @@ ffs(int mask)
return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
}
+#define HAVE_INLINE_FFSL
+
+static __inline int
+ffsl(long mask)
+{
+ return (ffs((int)mask));
+}
+
#define HAVE_INLINE_FLS
#endif /* __rtems__ */
@@ -198,6 +196,14 @@ fls(int mask)
}
#endif
+#define HAVE_INLINE_FLSL
+
+static __inline int
+flsl(long mask)
+{
+ return (fls((int)mask));
+}
+
#endif /* _KERNEL */
static __inline void
@@ -315,11 +321,7 @@ ia32_pause(void)
}
static __inline u_int
-#ifdef XEN
-_read_eflags(void)
-#else
read_eflags(void)
-#endif
{
u_int ef;
@@ -336,6 +338,15 @@ rdmsr(u_int msr)
return (rv);
}
+static __inline uint32_t
+rdmsr32(u_int msr)
+{
+ uint32_t low;
+
+ __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "edx");
+ return (low);
+}
+
static __inline uint64_t
rdpmc(u_int pmc)
{
@@ -370,11 +381,7 @@ wbinvd(void)
}
static __inline void
-#ifdef XEN
-_write_eflags(u_int ef)
-#else
write_eflags(u_int ef)
-#endif
{
__asm __volatile("pushl %0; popfl" : : "r" (ef));
}
@@ -406,9 +413,6 @@ rcr2(void)
{
u_int data;
-#ifdef XEN
- return (xen_rcr2());
-#endif
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
@@ -416,11 +420,8 @@ rcr2(void)
static __inline void
load_cr3(u_int data)
{
-#ifdef XEN
- xen_load_cr3(data);
-#else
+
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
-#endif
}
static __inline u_int
@@ -447,17 +448,33 @@ rcr4(void)
return (data);
}
+static __inline uint64_t
+rxcr(u_int reg)
+{
+ u_int low, high;
+
+ __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg));
+ return (low | ((uint64_t)high << 32));
+}
+
+static __inline void
+load_xcr(u_int reg, uint64_t val)
+{
+ u_int low, high;
+
+ low = val;
+ high = val >> 32;
+ __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high));
+}
+
/*
* Global TLB flush (except for thise for pages marked PG_G)
*/
static __inline void
invltlb(void)
{
-#ifdef XEN
- xen_tlb_flush();
-#else
+
load_cr3(rcr3());
-#endif
}
/*
@@ -468,11 +485,7 @@ static __inline void
invlpg(u_int addr)
{
-#ifdef XEN
- xen_invlpg(addr);
-#else
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-#endif
}
static __inline u_short
diff --git a/freebsd/sys/i386/include/machine/in_cksum.h b/freebsd/sys/i386/include/machine/in_cksum.h
index 34d85be2..8816f3c8 100644
--- a/freebsd/sys/i386/include/machine/in_cksum.h
+++ b/freebsd/sys/i386/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/i386/include/machine/intr_machdep.h b/freebsd/sys/i386/include/machine/intr_machdep.h
index 123966eb..a8dd4564 100644
--- a/freebsd/sys/i386/include/machine/intr_machdep.h
+++ b/freebsd/sys/i386/include/machine/intr_machdep.h
@@ -44,12 +44,25 @@
* allocate IDT vectors.
*
* The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
- * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid
- * confusion since 255 is used in PCI to indicate an invalid IRQ.
+ * IRQ values from 256 to 767 are used by MSI. When running under the Xen
+ * Hypervisor, IRQ values from 768 to 4863 are available for binding to
+ * event channel events. We leave 255 unused to avoid confusion since 255 is
+ * used in PCI to indicate an invalid IRQ.
*/
#define NUM_MSI_INTS 512
#define FIRST_MSI_INT 256
-#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS)
+#ifdef XENHVM
+#include <xen/xen-os.h>
+#include <xen/interface/event_channel.h>
+#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS
+#define FIRST_EVTCHN_INT \
+ (FIRST_MSI_INT + NUM_MSI_INTS)
+#define LAST_EVTCHN_INT \
+ (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
+#else /* !XENHVM */
+#define NUM_EVTCHN_INTS 0
+#endif
+#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
/*
* Default base address for MSI messages on x86 platforms.
@@ -70,7 +83,7 @@
#ifndef LOCORE
-typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
+typedef void inthand_t(void);
#define IDTVEC(name) __CONCAT(X,name)
@@ -90,10 +103,11 @@ struct pic {
int (*pic_vector)(struct intsrc *);
int (*pic_source_pending)(struct intsrc *);
void (*pic_suspend)(struct pic *);
- void (*pic_resume)(struct pic *);
+ void (*pic_resume)(struct pic *, bool suspend_cancelled);
int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
enum intr_polarity);
int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
+ void (*pic_reprogram_pin)(struct intsrc *);
TAILQ_ENTRY(pic) pics;
};
@@ -120,8 +134,14 @@ struct intsrc {
struct trapframe;
+#ifdef SMP
+extern cpuset_t intr_cpus;
+#endif
extern struct mtx icu_lock;
extern int elcr_found;
+#ifdef SMP
+extern int msix_disable_migration;
+#endif
#ifndef DEV_ATPIC
void atpic_reset(void);
@@ -131,7 +151,9 @@ int elcr_probe(void);
enum intr_trigger elcr_read_trigger(u_int irq);
void elcr_resume(void);
void elcr_write_trigger(u_int irq, enum intr_trigger trigger);
+#ifdef SMP
void intr_add_cpu(u_int cpu);
+#endif
int intr_add_handler(const char *name, int vector, driver_filter_t filter,
driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
#ifdef SMP
@@ -146,8 +168,9 @@ struct intsrc *intr_lookup_source(int vector);
int intr_register_pic(struct pic *pic);
int intr_register_source(struct intsrc *isrc);
int intr_remove_handler(void *cookie);
-void intr_resume(void);
+void intr_resume(bool suspend_cancelled);
void intr_suspend(void);
+void intr_reprogram(void);
void intrcnt_add(const char *name, u_long **countp);
void nexus_add_irq(u_long irq);
int msi_alloc(device_t dev, int count, int maxcount, int *irqs);
diff --git a/freebsd/sys/i386/include/machine/md_var.h b/freebsd/sys/i386/include/machine/md_var.h
index 3563e6ce..c3caf07a 100644
--- a/freebsd/sys/i386/include/machine/md_var.h
+++ b/freebsd/sys/i386/include/machine/md_var.h
@@ -32,34 +32,12 @@
#ifndef _MACHINE_MD_VAR_H_
#define _MACHINE_MD_VAR_H_
-/*
- * Miscellaneous machine-dependent declarations.
- */
+#include <x86/x86_var.h>
-extern long Maxmem;
-extern u_int basemem; /* PA of original top of base memory */
-extern int busdma_swi_pending;
-extern u_int cpu_exthigh;
-extern u_int cpu_feature;
-extern u_int cpu_feature2;
-extern u_int amd_feature;
-extern u_int amd_feature2;
-extern u_int amd_pminfo;
-extern u_int via_feature_rng;
-extern u_int via_feature_xcrypt;
-extern u_int cpu_clflush_line_size;
-extern u_int cpu_fxsr;
-extern u_int cpu_high;
-extern u_int cpu_id;
-extern u_int cpu_mxcsr_mask;
-extern u_int cpu_procinfo;
-extern u_int cpu_procinfo2;
-extern char cpu_vendor[];
-extern u_int cpu_vendor_id;
extern u_int cyrix_did;
-extern char kstack[];
-extern char sigcode[];
-extern int szsigcode;
+#if defined(I586_CPU) && !defined(NO_F00F_HACK)
+extern int has_f00f_bug;
+#endif
#ifdef COMPAT_FREEBSD4
extern int szfreebsd4_sigcode;
#endif
@@ -67,19 +45,11 @@ extern int szfreebsd4_sigcode;
extern int szosigcode;
#endif
extern uint32_t *vm_page_dump;
-extern int vm_page_dump_size;
-extern int workaround_erratum383;
-typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
-struct thread;
-struct reg;
-struct fpreg;
-struct dbreg;
-struct dumperinfo;
+struct segment_descriptor;
+union savefpu;
void bcopyb(const void *from, void *to, size_t len);
-void busdma_swi(void);
-void cpu_setregs(void);
void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
@@ -89,19 +59,15 @@ void doreti_popl_es(void) __asm(__STRING(doreti_popl_es));
void doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault));
void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs));
void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
-void dump_add_page(vm_paddr_t);
-void dump_drop_page(vm_paddr_t);
-void initializecpu(void);
-void enable_sse(void);
-void fillw(int /*u_short*/ pat, void *base, size_t cnt);
+void finishidentcpu(void);
+void fill_based_sd(struct segment_descriptor *sdp, uint32_t base);
void i686_pagezero(void *addr);
void sse2_pagezero(void *addr);
void init_AMD_Elan_sc520(void);
-int is_physical_memory(vm_paddr_t addr);
-int isa_nmi(int cd);
vm_paddr_t kvtop(void *addr);
+void ppro_reenable_apic(void);
void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
-int user_dbreg_trap(void);
-void minidumpsys(struct dumperinfo *);
+union savefpu *get_pcb_user_save_td(struct thread *td);
+union savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
#endif /* !_MACHINE_MD_VAR_H_ */
diff --git a/freebsd/sys/i386/include/machine/specialreg.h b/freebsd/sys/i386/include/machine/specialreg.h
index be36a914..aace4bfd 100644
--- a/freebsd/sys/i386/include/machine/specialreg.h
+++ b/freebsd/sys/i386/include/machine/specialreg.h
@@ -1,642 +1,6 @@
/*-
- * Copyright (c) 1991 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)specialreg.h 7.1 (Berkeley) 5/9/91
- * $FreeBSD$
+ * This file is in the public domain.
*/
+/* $FreeBSD$ */
-#ifndef _MACHINE_SPECIALREG_H_
-#define _MACHINE_SPECIALREG_H_
-
-/*
- * Bits in 386 special registers:
- */
-#define CR0_PE 0x00000001 /* Protected mode Enable */
-#define CR0_MP 0x00000002 /* "Math" (fpu) Present */
-#define CR0_EM 0x00000004 /* EMulate FPU instructions. (trap ESC only) */
-#define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */
-#define CR0_PG 0x80000000 /* PaGing enable */
-
-/*
- * Bits in 486 special registers:
- */
-#define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */
-#define CR0_WP 0x00010000 /* Write Protect (honor page protect in
- all modes) */
-#define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */
-#define CR0_NW 0x20000000 /* Not Write-through */
-#define CR0_CD 0x40000000 /* Cache Disable */
-
-/*
- * Bits in PPro special registers
- */
-#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */
-#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */
-#define CR4_TSD 0x00000004 /* Time stamp disable */
-#define CR4_DE 0x00000008 /* Debugging extensions */
-#define CR4_PSE 0x00000010 /* Page size extensions */
-#define CR4_PAE 0x00000020 /* Physical address extension */
-#define CR4_MCE 0x00000040 /* Machine check enable */
-#define CR4_PGE 0x00000080 /* Page global enable */
-#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */
-#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */
-#define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */
-#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */
-
-/*
- * Bits in AMD64 special registers. EFER is 64 bits wide.
- */
-#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */
-
-/*
- * CPUID instruction features register
- */
-#define CPUID_FPU 0x00000001
-#define CPUID_VME 0x00000002
-#define CPUID_DE 0x00000004
-#define CPUID_PSE 0x00000008
-#define CPUID_TSC 0x00000010
-#define CPUID_MSR 0x00000020
-#define CPUID_PAE 0x00000040
-#define CPUID_MCE 0x00000080
-#define CPUID_CX8 0x00000100
-#define CPUID_APIC 0x00000200
-#define CPUID_B10 0x00000400
-#define CPUID_SEP 0x00000800
-#define CPUID_MTRR 0x00001000
-#define CPUID_PGE 0x00002000
-#define CPUID_MCA 0x00004000
-#define CPUID_CMOV 0x00008000
-#define CPUID_PAT 0x00010000
-#define CPUID_PSE36 0x00020000
-#define CPUID_PSN 0x00040000
-#define CPUID_CLFSH 0x00080000
-#define CPUID_B20 0x00100000
-#define CPUID_DS 0x00200000
-#define CPUID_ACPI 0x00400000
-#define CPUID_MMX 0x00800000
-#define CPUID_FXSR 0x01000000
-#define CPUID_SSE 0x02000000
-#define CPUID_XMM 0x02000000
-#define CPUID_SSE2 0x04000000
-#define CPUID_SS 0x08000000
-#define CPUID_HTT 0x10000000
-#define CPUID_TM 0x20000000
-#define CPUID_IA64 0x40000000
-#define CPUID_PBE 0x80000000
-
-#define CPUID2_SSE3 0x00000001
-#define CPUID2_PCLMULQDQ 0x00000002
-#define CPUID2_DTES64 0x00000004
-#define CPUID2_MON 0x00000008
-#define CPUID2_DS_CPL 0x00000010
-#define CPUID2_VMX 0x00000020
-#define CPUID2_SMX 0x00000040
-#define CPUID2_EST 0x00000080
-#define CPUID2_TM2 0x00000100
-#define CPUID2_SSSE3 0x00000200
-#define CPUID2_CNXTID 0x00000400
-#define CPUID2_FMA 0x00001000
-#define CPUID2_CX16 0x00002000
-#define CPUID2_XTPR 0x00004000
-#define CPUID2_PDCM 0x00008000
-#define CPUID2_PCID 0x00020000
-#define CPUID2_DCA 0x00040000
-#define CPUID2_SSE41 0x00080000
-#define CPUID2_SSE42 0x00100000
-#define CPUID2_X2APIC 0x00200000
-#define CPUID2_MOVBE 0x00400000
-#define CPUID2_POPCNT 0x00800000
-#define CPUID2_TSCDLT 0x01000000
-#define CPUID2_AESNI 0x02000000
-#define CPUID2_XSAVE 0x04000000
-#define CPUID2_OSXSAVE 0x08000000
-#define CPUID2_AVX 0x10000000
-#define CPUID2_F16C 0x20000000
-#define CPUID2_RDRAND 0x40000000
-#define CPUID2_HV 0x80000000
-
-/*
- * Important bits in the Thermal and Power Management flags
- * CPUID.6 EAX and ECX.
- */
-#define CPUTPM1_SENSOR 0x00000001
-#define CPUTPM1_TURBO 0x00000002
-#define CPUTPM1_ARAT 0x00000004
-#define CPUTPM2_EFFREQ 0x00000001
-
-/*
- * Important bits in the AMD extended cpuid flags
- */
-#define AMDID_SYSCALL 0x00000800
-#define AMDID_MP 0x00080000
-#define AMDID_NX 0x00100000
-#define AMDID_EXT_MMX 0x00400000
-#define AMDID_FFXSR 0x01000000
-#define AMDID_PAGE1GB 0x04000000
-#define AMDID_RDTSCP 0x08000000
-#define AMDID_LM 0x20000000
-#define AMDID_EXT_3DNOW 0x40000000
-#define AMDID_3DNOW 0x80000000
-
-#define AMDID2_LAHF 0x00000001
-#define AMDID2_CMP 0x00000002
-#define AMDID2_SVM 0x00000004
-#define AMDID2_EXT_APIC 0x00000008
-#define AMDID2_CR8 0x00000010
-#define AMDID2_ABM 0x00000020
-#define AMDID2_SSE4A 0x00000040
-#define AMDID2_MAS 0x00000080
-#define AMDID2_PREFETCH 0x00000100
-#define AMDID2_OSVW 0x00000200
-#define AMDID2_IBS 0x00000400
-#define AMDID2_XOP 0x00000800
-#define AMDID2_SKINIT 0x00001000
-#define AMDID2_WDT 0x00002000
-#define AMDID2_LWP 0x00008000
-#define AMDID2_FMA4 0x00010000
-#define AMDID2_TCE 0x00020000
-#define AMDID2_NODE_ID 0x00080000
-#define AMDID2_TBM 0x00200000
-#define AMDID2_TOPOLOGY 0x00400000
-#define AMDID2_PCXC 0x00800000
-#define AMDID2_PNXC 0x01000000
-#define AMDID2_DBE 0x04000000
-#define AMDID2_PTSC 0x08000000
-#define AMDID2_PTSCEL2I 0x10000000
-
-/*
- * CPUID instruction 1 eax info
- */
-#define CPUID_STEPPING 0x0000000f
-#define CPUID_MODEL 0x000000f0
-#define CPUID_FAMILY 0x00000f00
-#define CPUID_EXT_MODEL 0x000f0000
-#define CPUID_EXT_FAMILY 0x0ff00000
-#define CPUID_TO_MODEL(id) \
- ((((id) & CPUID_MODEL) >> 4) | \
- ((((id) & CPUID_FAMILY) >= 0x600) ? \
- (((id) & CPUID_EXT_MODEL) >> 12) : 0))
-#define CPUID_TO_FAMILY(id) \
- ((((id) & CPUID_FAMILY) >> 8) + \
- ((((id) & CPUID_FAMILY) == 0xf00) ? \
- (((id) & CPUID_EXT_FAMILY) >> 20) : 0))
-
-/*
- * CPUID instruction 1 ebx info
- */
-#define CPUID_BRAND_INDEX 0x000000ff
-#define CPUID_CLFUSH_SIZE 0x0000ff00
-#define CPUID_HTT_CORES 0x00ff0000
-#define CPUID_LOCAL_APIC_ID 0xff000000
-
-/*
- * CPUID instruction 6 ecx info
- */
-#define CPUID_PERF_STAT 0x00000001
-#define CPUID_PERF_BIAS 0x00000008
-
-/*
- * CPUID instruction 0xb ebx info.
- */
-#define CPUID_TYPE_INVAL 0
-#define CPUID_TYPE_SMT 1
-#define CPUID_TYPE_CORE 2
-
-/*
- * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
- */
-#define CPUID_EXTSTATE_XSAVEOPT 0x00000001
-
-/*
- * AMD extended function 8000_0007h edx info
- */
-#define AMDPM_TS 0x00000001
-#define AMDPM_FID 0x00000002
-#define AMDPM_VID 0x00000004
-#define AMDPM_TTP 0x00000008
-#define AMDPM_TM 0x00000010
-#define AMDPM_STC 0x00000020
-#define AMDPM_100MHZ_STEPS 0x00000040
-#define AMDPM_HW_PSTATE 0x00000080
-#define AMDPM_TSC_INVARIANT 0x00000100
-#define AMDPM_CPB 0x00000200
-
-/*
- * AMD extended function 8000_0008h ecx info
- */
-#define AMDID_CMP_CORES 0x000000ff
-#define AMDID_COREID_SIZE 0x0000f000
-#define AMDID_COREID_SIZE_SHIFT 12
-
-/*
- * CPUID manufacturers identifiers
- */
-#define AMD_VENDOR_ID "AuthenticAMD"
-#define CENTAUR_VENDOR_ID "CentaurHauls"
-#define CYRIX_VENDOR_ID "CyrixInstead"
-#define INTEL_VENDOR_ID "GenuineIntel"
-#define NEXGEN_VENDOR_ID "NexGenDriven"
-#define NSC_VENDOR_ID "Geode by NSC"
-#define RISE_VENDOR_ID "RiseRiseRise"
-#define SIS_VENDOR_ID "SiS SiS SiS "
-#define TRANSMETA_VENDOR_ID "GenuineTMx86"
-#define UMC_VENDOR_ID "UMC UMC UMC "
-
-/*
- * Model-specific registers for the i386 family
- */
-#define MSR_P5_MC_ADDR 0x000
-#define MSR_P5_MC_TYPE 0x001
-#define MSR_TSC 0x010
-#define MSR_P5_CESR 0x011
-#define MSR_P5_CTR0 0x012
-#define MSR_P5_CTR1 0x013
-#define MSR_IA32_PLATFORM_ID 0x017
-#define MSR_APICBASE 0x01b
-#define MSR_EBL_CR_POWERON 0x02a
-#define MSR_TEST_CTL 0x033
-#define MSR_BIOS_UPDT_TRIG 0x079
-#define MSR_BBL_CR_D0 0x088
-#define MSR_BBL_CR_D1 0x089
-#define MSR_BBL_CR_D2 0x08a
-#define MSR_BIOS_SIGN 0x08b
-#define MSR_PERFCTR0 0x0c1
-#define MSR_PERFCTR1 0x0c2
-#define MSR_MPERF 0x0e7
-#define MSR_APERF 0x0e8
-#define MSR_IA32_EXT_CONFIG 0x0ee /* Undocumented. Core Solo/Duo only */
-#define MSR_MTRRcap 0x0fe
-#define MSR_BBL_CR_ADDR 0x116
-#define MSR_BBL_CR_DECC 0x118
-#define MSR_BBL_CR_CTL 0x119
-#define MSR_BBL_CR_TRIG 0x11a
-#define MSR_BBL_CR_BUSY 0x11b
-#define MSR_BBL_CR_CTL3 0x11e
-#define MSR_SYSENTER_CS_MSR 0x174
-#define MSR_SYSENTER_ESP_MSR 0x175
-#define MSR_SYSENTER_EIP_MSR 0x176
-#define MSR_MCG_CAP 0x179
-#define MSR_MCG_STATUS 0x17a
-#define MSR_MCG_CTL 0x17b
-#define MSR_EVNTSEL0 0x186
-#define MSR_EVNTSEL1 0x187
-#define MSR_THERM_CONTROL 0x19a
-#define MSR_THERM_INTERRUPT 0x19b
-#define MSR_THERM_STATUS 0x19c
-#define MSR_IA32_MISC_ENABLE 0x1a0
-#define MSR_IA32_TEMPERATURE_TARGET 0x1a2
-#define MSR_DEBUGCTLMSR 0x1d9
-#define MSR_LASTBRANCHFROMIP 0x1db
-#define MSR_LASTBRANCHTOIP 0x1dc
-#define MSR_LASTINTFROMIP 0x1dd
-#define MSR_LASTINTTOIP 0x1de
-#define MSR_ROB_CR_BKUPTMPDR6 0x1e0
-#define MSR_MTRRVarBase 0x200
-#define MSR_MTRR64kBase 0x250
-#define MSR_MTRR16kBase 0x258
-#define MSR_MTRR4kBase 0x268
-#define MSR_PAT 0x277
-#define MSR_MC0_CTL2 0x280
-#define MSR_MTRRdefType 0x2ff
-#define MSR_MC0_CTL 0x400
-#define MSR_MC0_STATUS 0x401
-#define MSR_MC0_ADDR 0x402
-#define MSR_MC0_MISC 0x403
-#define MSR_MC1_CTL 0x404
-#define MSR_MC1_STATUS 0x405
-#define MSR_MC1_ADDR 0x406
-#define MSR_MC1_MISC 0x407
-#define MSR_MC2_CTL 0x408
-#define MSR_MC2_STATUS 0x409
-#define MSR_MC2_ADDR 0x40a
-#define MSR_MC2_MISC 0x40b
-#define MSR_MC3_CTL 0x40c
-#define MSR_MC3_STATUS 0x40d
-#define MSR_MC3_ADDR 0x40e
-#define MSR_MC3_MISC 0x40f
-#define MSR_MC4_CTL 0x410
-#define MSR_MC4_STATUS 0x411
-#define MSR_MC4_ADDR 0x412
-#define MSR_MC4_MISC 0x413
-
-/*
- * Constants related to MSR's.
- */
-#define APICBASE_RESERVED 0x000006ff
-#define APICBASE_BSP 0x00000100
-#define APICBASE_ENABLED 0x00000800
-#define APICBASE_ADDRESS 0xfffff000
-
-/*
- * PAT modes.
- */
-#define PAT_UNCACHEABLE 0x00
-#define PAT_WRITE_COMBINING 0x01
-#define PAT_WRITE_THROUGH 0x04
-#define PAT_WRITE_PROTECTED 0x05
-#define PAT_WRITE_BACK 0x06
-#define PAT_UNCACHED 0x07
-#define PAT_VALUE(i, m) ((long long)(m) << (8 * (i)))
-#define PAT_MASK(i) PAT_VALUE(i, 0xff)
-
-/*
- * Constants related to MTRRs
- */
-#define MTRR_UNCACHEABLE 0x00
-#define MTRR_WRITE_COMBINING 0x01
-#define MTRR_WRITE_THROUGH 0x04
-#define MTRR_WRITE_PROTECTED 0x05
-#define MTRR_WRITE_BACK 0x06
-#define MTRR_N64K 8 /* numbers of fixed-size entries */
-#define MTRR_N16K 16
-#define MTRR_N4K 64
-#define MTRR_CAP_WC 0x0000000000000400
-#define MTRR_CAP_FIXED 0x0000000000000100
-#define MTRR_CAP_VCNT 0x00000000000000ff
-#define MTRR_DEF_ENABLE 0x0000000000000800
-#define MTRR_DEF_FIXED_ENABLE 0x0000000000000400
-#define MTRR_DEF_TYPE 0x00000000000000ff
-#define MTRR_PHYSBASE_PHYSBASE 0x000ffffffffff000
-#define MTRR_PHYSBASE_TYPE 0x00000000000000ff
-#define MTRR_PHYSMASK_PHYSMASK 0x000ffffffffff000
-#define MTRR_PHYSMASK_VALID 0x0000000000000800
-
-/*
- * Cyrix configuration registers, accessible as IO ports.
- */
-#define CCR0 0xc0 /* Configuration control register 0 */
-#define CCR0_NC0 0x01 /* First 64K of each 1M memory region is
- non-cacheable */
-#define CCR0_NC1 0x02 /* 640K-1M region is non-cacheable */
-#define CCR0_A20M 0x04 /* Enables A20M# input pin */
-#define CCR0_KEN 0x08 /* Enables KEN# input pin */
-#define CCR0_FLUSH 0x10 /* Enables FLUSH# input pin */
-#define CCR0_BARB 0x20 /* Flushes internal cache when entering hold
- state */
-#define CCR0_CO 0x40 /* Cache org: 1=direct mapped, 0=2x set
- assoc */
-#define CCR0_SUSPEND 0x80 /* Enables SUSP# and SUSPA# pins */
-
-#define CCR1 0xc1 /* Configuration control register 1 */
-#define CCR1_RPL 0x01 /* Enables RPLSET and RPLVAL# pins */
-#define CCR1_SMI 0x02 /* Enables SMM pins */
-#define CCR1_SMAC 0x04 /* System management memory access */
-#define CCR1_MMAC 0x08 /* Main memory access */
-#define CCR1_NO_LOCK 0x10 /* Negate LOCK# */
-#define CCR1_SM3 0x80 /* SMM address space address region 3 */
-
-#define CCR2 0xc2
-#define CCR2_WB 0x02 /* Enables WB cache interface pins */
-#define CCR2_SADS 0x02 /* Slow ADS */
-#define CCR2_LOCK_NW 0x04 /* LOCK NW Bit */
-#define CCR2_SUSP_HLT 0x08 /* Suspend on HALT */
-#define CCR2_WT1 0x10 /* WT region 1 */
-#define CCR2_WPR1 0x10 /* Write-protect region 1 */
-#define CCR2_BARB 0x20 /* Flushes write-back cache when entering
- hold state. */
-#define CCR2_BWRT 0x40 /* Enables burst write cycles */
-#define CCR2_USE_SUSP 0x80 /* Enables suspend pins */
-
-#define CCR3 0xc3
-#define CCR3_SMILOCK 0x01 /* SMM register lock */
-#define CCR3_NMI 0x02 /* Enables NMI during SMM */
-#define CCR3_LINBRST 0x04 /* Linear address burst cycles */
-#define CCR3_SMMMODE 0x08 /* SMM Mode */
-#define CCR3_MAPEN0 0x10 /* Enables Map0 */
-#define CCR3_MAPEN1 0x20 /* Enables Map1 */
-#define CCR3_MAPEN2 0x40 /* Enables Map2 */
-#define CCR3_MAPEN3 0x80 /* Enables Map3 */
-
-#define CCR4 0xe8
-#define CCR4_IOMASK 0x07
-#define CCR4_MEM 0x08 /* Enables momory bypassing */
-#define CCR4_DTE 0x10 /* Enables directory table entry cache */
-#define CCR4_FASTFPE 0x20 /* Fast FPU exception */
-#define CCR4_CPUID 0x80 /* Enables CPUID instruction */
-
-#define CCR5 0xe9
-#define CCR5_WT_ALLOC 0x01 /* Write-through allocate */
-#define CCR5_SLOP 0x02 /* LOOP instruction slowed down */
-#define CCR5_LBR1 0x10 /* Local bus region 1 */
-#define CCR5_ARREN 0x20 /* Enables ARR region */
-
-#define CCR6 0xea
-
-#define CCR7 0xeb
-
-/* Performance Control Register (5x86 only). */
-#define PCR0 0x20
-#define PCR0_RSTK 0x01 /* Enables return stack */
-#define PCR0_BTB 0x02 /* Enables branch target buffer */
-#define PCR0_LOOP 0x04 /* Enables loop */
-#define PCR0_AIS 0x08 /* Enables all instrcutions stalled to
- serialize pipe. */
-#define PCR0_MLR 0x10 /* Enables reordering of misaligned loads */
-#define PCR0_BTBRT 0x40 /* Enables BTB test register. */
-#define PCR0_LSSER 0x80 /* Disable reorder */
-
-/* Device Identification Registers */
-#define DIR0 0xfe
-#define DIR1 0xff
-
-/*
- * Machine Check register constants.
- */
-#define MCG_CAP_COUNT 0x000000ff
-#define MCG_CAP_CTL_P 0x00000100
-#define MCG_CAP_EXT_P 0x00000200
-#define MCG_CAP_CMCI_P 0x00000400
-#define MCG_CAP_TES_P 0x00000800
-#define MCG_CAP_EXT_CNT 0x00ff0000
-#define MCG_CAP_SER_P 0x01000000
-#define MCG_STATUS_RIPV 0x00000001
-#define MCG_STATUS_EIPV 0x00000002
-#define MCG_STATUS_MCIP 0x00000004
-#define MCG_CTL_ENABLE 0xffffffffffffffff
-#define MCG_CTL_DISABLE 0x0000000000000000
-#define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4)
-#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
-#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
-#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
-#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
-#define MC_STATUS_MCA_ERROR 0x000000000000ffff
-#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000
-#define MC_STATUS_OTHER_INFO 0x01ffffff00000000
-#define MC_STATUS_COR_COUNT 0x001fffc000000000 /* If MCG_CAP_CMCI_P */
-#define MC_STATUS_TES_STATUS 0x0060000000000000 /* If MCG_CAP_TES_P */
-#define MC_STATUS_AR 0x0080000000000000 /* If MCG_CAP_TES_P */
-#define MC_STATUS_S 0x0100000000000000 /* If MCG_CAP_TES_P */
-#define MC_STATUS_PCC 0x0200000000000000
-#define MC_STATUS_ADDRV 0x0400000000000000
-#define MC_STATUS_MISCV 0x0800000000000000
-#define MC_STATUS_EN 0x1000000000000000
-#define MC_STATUS_UC 0x2000000000000000
-#define MC_STATUS_OVER 0x4000000000000000
-#define MC_STATUS_VAL 0x8000000000000000
-#define MC_MISC_RA_LSB 0x000000000000003f /* If MCG_CAP_SER_P */
-#define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */
-#define MC_CTL2_THRESHOLD 0x0000000000007fff
-#define MC_CTL2_CMCI_EN 0x0000000040000000
-
-/*
- * The following four 3-byte registers control the non-cacheable regions.
- * These registers must be written as three separate bytes.
- *
- * NCRx+0: A31-A24 of starting address
- * NCRx+1: A23-A16 of starting address
- * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx.
- *
- * The non-cacheable region's starting address must be aligned to the
- * size indicated by the NCR_SIZE_xx field.
- */
-#define NCR1 0xc4
-#define NCR2 0xc7
-#define NCR3 0xca
-#define NCR4 0xcd
-
-#define NCR_SIZE_0K 0
-#define NCR_SIZE_4K 1
-#define NCR_SIZE_8K 2
-#define NCR_SIZE_16K 3
-#define NCR_SIZE_32K 4
-#define NCR_SIZE_64K 5
-#define NCR_SIZE_128K 6
-#define NCR_SIZE_256K 7
-#define NCR_SIZE_512K 8
-#define NCR_SIZE_1M 9
-#define NCR_SIZE_2M 10
-#define NCR_SIZE_4M 11
-#define NCR_SIZE_8M 12
-#define NCR_SIZE_16M 13
-#define NCR_SIZE_32M 14
-#define NCR_SIZE_4G 15
-
-/*
- * The address region registers are used to specify the location and
- * size for the eight address regions.
- *
- * ARRx + 0: A31-A24 of start address
- * ARRx + 1: A23-A16 of start address
- * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx
- */
-#define ARR0 0xc4
-#define ARR1 0xc7
-#define ARR2 0xca
-#define ARR3 0xcd
-#define ARR4 0xd0
-#define ARR5 0xd3
-#define ARR6 0xd6
-#define ARR7 0xd9
-
-#define ARR_SIZE_0K 0
-#define ARR_SIZE_4K 1
-#define ARR_SIZE_8K 2
-#define ARR_SIZE_16K 3
-#define ARR_SIZE_32K 4
-#define ARR_SIZE_64K 5
-#define ARR_SIZE_128K 6
-#define ARR_SIZE_256K 7
-#define ARR_SIZE_512K 8
-#define ARR_SIZE_1M 9
-#define ARR_SIZE_2M 10
-#define ARR_SIZE_4M 11
-#define ARR_SIZE_8M 12
-#define ARR_SIZE_16M 13
-#define ARR_SIZE_32M 14
-#define ARR_SIZE_4G 15
-
-/*
- * The region control registers specify the attributes associated with
- * the ARRx addres regions.
- */
-#define RCR0 0xdc
-#define RCR1 0xdd
-#define RCR2 0xde
-#define RCR3 0xdf
-#define RCR4 0xe0
-#define RCR5 0xe1
-#define RCR6 0xe2
-#define RCR7 0xe3
-
-#define RCR_RCD 0x01 /* Disables caching for ARRx (x = 0-6). */
-#define RCR_RCE 0x01 /* Enables caching for ARR7. */
-#define RCR_WWO 0x02 /* Weak write ordering. */
-#define RCR_WL 0x04 /* Weak locking. */
-#define RCR_WG 0x08 /* Write gathering. */
-#define RCR_WT 0x10 /* Write-through. */
-#define RCR_NLB 0x20 /* LBA# pin is not asserted. */
-
-/* AMD Write Allocate Top-Of-Memory and Control Register */
-#define AMD_WT_ALLOC_TME 0x40000 /* top-of-memory enable */
-#define AMD_WT_ALLOC_PRE 0x20000 /* programmable range enable */
-#define AMD_WT_ALLOC_FRE 0x10000 /* fixed (A0000-FFFFF) range enable */
-
-/* AMD64 MSR's */
-#define MSR_EFER 0xc0000080 /* extended features */
-#define MSR_HWCR 0xc0010015
-#define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */
-#define MSR_MC0_CTL_MASK 0xc0010044
-
-/* VIA ACE crypto featureset: for via_feature_rng */
-#define VIA_HAS_RNG 1 /* cpu has RNG */
-
-/* VIA ACE crypto featureset: for via_feature_xcrypt */
-#define VIA_HAS_AES 1 /* cpu has AES */
-#define VIA_HAS_SHA 2 /* cpu has SHA1 & SHA256 */
-#define VIA_HAS_MM 4 /* cpu has RSA instructions */
-#define VIA_HAS_AESCTR 8 /* cpu has AES-CTR instructions */
-
-/* Centaur Extended Feature flags */
-#define VIA_CPUID_HAS_RNG 0x000004
-#define VIA_CPUID_DO_RNG 0x000008
-#define VIA_CPUID_HAS_ACE 0x000040
-#define VIA_CPUID_DO_ACE 0x000080
-#define VIA_CPUID_HAS_ACE2 0x000100
-#define VIA_CPUID_DO_ACE2 0x000200
-#define VIA_CPUID_HAS_PHE 0x000400
-#define VIA_CPUID_DO_PHE 0x000800
-#define VIA_CPUID_HAS_PMM 0x001000
-#define VIA_CPUID_DO_PMM 0x002000
-
-/* VIA ACE xcrypt-* instruction context control options */
-#define VIA_CRYPT_CWLO_ROUND_M 0x0000000f
-#define VIA_CRYPT_CWLO_ALG_M 0x00000070
-#define VIA_CRYPT_CWLO_ALG_AES 0x00000000
-#define VIA_CRYPT_CWLO_KEYGEN_M 0x00000080
-#define VIA_CRYPT_CWLO_KEYGEN_HW 0x00000000
-#define VIA_CRYPT_CWLO_KEYGEN_SW 0x00000080
-#define VIA_CRYPT_CWLO_NORMAL 0x00000000
-#define VIA_CRYPT_CWLO_INTERMEDIATE 0x00000100
-#define VIA_CRYPT_CWLO_ENCRYPT 0x00000000
-#define VIA_CRYPT_CWLO_DECRYPT 0x00000200
-#define VIA_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */
-#define VIA_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */
-#define VIA_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */
-
-#endif /* !_MACHINE_SPECIALREG_H_ */
+#include <x86/specialreg.h>
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c
index 7311bb02..627c01e0 100644
--- a/freebsd/sys/kern/init_main.c
+++ b/freebsd/sys/kern/init_main.c
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_init_path.h>
+#include <rtems/bsd/local/opt_verbose_sysinit.h>
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
@@ -88,6 +89,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_domain.h>
#include <sys/copyright.h>
#include <ddb/ddb.h>
@@ -100,17 +102,28 @@ void mi_startup(void); /* Should be elsewhere */
static struct session session0;
static struct pgrp pgrp0;
struct proc proc0;
-struct thread thread0 __aligned(16);
+struct thread0_storage thread0_st __aligned(16);
struct vmspace vmspace0;
struct proc *initproc;
-int boothowto = 0; /* initialized so that it can be patched */
+#ifndef BOOTHOWTO
+#define BOOTHOWTO 0
+#endif
+int boothowto = BOOTHOWTO; /* initialized so that it can be patched */
SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0,
"Boot control flags, passed from loader");
-int bootverbose;
+
+#ifndef BOOTVERBOSE
+#define BOOTVERBOSE 0
+#endif
+int bootverbose = BOOTVERBOSE;
SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0,
"Control the output of verbose kernel messages");
+#ifdef INVARIANTS
+FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance");
+#endif
+
/*
* This ensures that there is at least one entry so that the sysinit_set
* symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never
@@ -398,8 +411,6 @@ struct sysentvec null_sysvec = {
.sv_size = 0,
.sv_table = NULL,
.sv_mask = 0,
- .sv_sigsize = 0,
- .sv_sigtbl = NULL,
.sv_errsize = 0,
.sv_errtbl = NULL,
.sv_transtrap = NULL,
@@ -407,7 +418,6 @@ struct sysentvec null_sysvec = {
.sv_sendsig = NULL,
.sv_sigcode = NULL,
.sv_szsigcode = NULL,
- .sv_prepsyscall = NULL,
.sv_name = "null",
.sv_coredump = NULL,
.sv_imgact_try = NULL,
@@ -427,8 +437,9 @@ struct sysentvec null_sysvec = {
.sv_fetch_syscall_args = null_fetch_syscall_args,
.sv_syscallnames = NULL,
.sv_schedtail = NULL,
+ .sv_thread_detach = NULL,
+ .sv_trap = NULL,
};
-#endif /* __rtems__ */
/*
***************************************************************************
@@ -447,9 +458,9 @@ struct sysentvec null_sysvec = {
static void
proc0_init(void *dummy __unused)
{
-#ifndef __rtems__
struct proc *p;
struct thread *td;
+ struct ucred *newcred;
vm_paddr_t pageablemem;
int i;
@@ -474,17 +485,6 @@ proc0_init(void *dummy __unused)
* Add scheduler specific parts to proc, thread as needed.
*/
schedinit(); /* scheduler gets its house in order */
-#endif /* __rtems__ */
- /*
- * Initialize sleep queue hash table
- */
- sleepinit();
-
-#ifndef __rtems__
- /*
- * additional VM structures
- */
- vm_init2();
/*
* Create process 0 (the swapper).
@@ -503,10 +503,10 @@ proc0_init(void *dummy __unused)
session0.s_leader = p;
p->p_sysent = &null_sysvec;
- p->p_flag = P_SYSTEM | P_INMEM;
+ p->p_flag = P_SYSTEM | P_INMEM | P_KPROC;
p->p_flag2 = 0;
p->p_state = PRS_NORMAL;
- knlist_init_mtx(&p->p_klist, &p->p_mtx);
+ p->p_klist = knlist_alloc(&p->p_mtx);
STAILQ_INIT(&p->p_ktr);
p->p_nice = NZERO;
/* pid_max cannot be greater than PID_MAX */
@@ -519,36 +519,41 @@ proc0_init(void *dummy __unused)
td->td_lend_user_pri = PRI_MAX;
td->td_priority = PVM;
td->td_base_pri = PVM;
- td->td_oncpu = 0;
- td->td_flags = TDF_INMEM|TDP_KTHREAD;
+ td->td_oncpu = curcpu;
+ td->td_flags = TDF_INMEM;
+ td->td_pflags = TDP_KTHREAD;
td->td_cpuset = cpuset_thread0();
- prison0.pr_cpuset = cpuset_ref(td->td_cpuset);
+ vm_domain_policy_init(&td->td_vm_dom_policy);
+ vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1);
+ vm_domain_policy_init(&p->p_vm_dom_policy);
+ vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1);
+ prison0_init();
p->p_peers = 0;
p->p_leader = p;
-
+ p->p_reaper = p;
+ LIST_INIT(&p->p_reaplist);
strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
strncpy(td->td_name, "swapper", sizeof (td->td_name));
callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
- callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
+ callout_init(&td->td_slpcallout, 1);
/* Create credentials. */
- p->p_ucred = crget();
- p->p_ucred->cr_ngroups = 1; /* group 0 */
- p->p_ucred->cr_uidinfo = uifind(0);
- p->p_ucred->cr_ruidinfo = uifind(0);
- p->p_ucred->cr_prison = &prison0;
- p->p_ucred->cr_loginclass = loginclass_find("default");
+ newcred = crget();
+ newcred->cr_ngroups = 1; /* group 0 */
+ newcred->cr_uidinfo = uifind(0);
+ newcred->cr_ruidinfo = uifind(0);
+ newcred->cr_prison = &prison0;
+ newcred->cr_loginclass = loginclass_find("default");
+ proc_set_cred_init(p, newcred);
#ifdef AUDIT
- audit_cred_kproc0(p->p_ucred);
+ audit_cred_kproc0(newcred);
#endif
#ifdef MAC
- mac_cred_create_swapper(p->p_ucred);
+ mac_cred_create_swapper(newcred);
#endif
- td->td_ucred = crhold(p->p_ucred);
-
/* Create sigacts. */
p->p_sigacts = sigacts_alloc();
@@ -556,7 +561,7 @@ proc0_init(void *dummy __unused)
siginit(&proc0);
/* Create the file descriptor table. */
- p->p_fd = fdinit(NULL);
+ p->p_fd = fdinit(NULL, false);
p->p_fdtol = NULL;
/* Create the limits structures. */
@@ -573,22 +578,26 @@ proc0_init(void *dummy __unused)
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
/* Cast to avoid overflow on i386/PAE. */
- pageablemem = ptoa((vm_paddr_t)cnt.v_free_count);
+ pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count);
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
p->p_cpulimit = RLIM_INFINITY;
+ PROC_LOCK(p);
+ thread_cow_get_proc(td, p);
+ PROC_UNLOCK(p);
+
/* Initialize resource accounting structures. */
racct_create(&p->p_racct);
p->p_stats = pstats_alloc();
/* Allocate a prototype map so we have something to fork. */
- pmap_pinit0(vmspace_pmap(&vmspace0));
p->p_vmspace = &vmspace0;
vmspace0.vm_refcnt = 1;
+ pmap_pinit0(vmspace_pmap(&vmspace0));
/*
* proc0 is not expected to enter usermode, so there is no special
@@ -613,11 +622,9 @@ proc0_init(void *dummy __unused)
PROC_LOCK(p);
racct_add_force(p, RACCT_NPROC, 1);
PROC_UNLOCK(p);
-#endif /* __rtems__ */
}
SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL);
-#ifndef __rtems__
/* ARGSUSED*/
static void
proc0_post(void *dummy __unused)
@@ -634,9 +641,9 @@ proc0_post(void *dummy __unused)
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
microuptime(&p->p_stats->p_start);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
rufetch(p, &ru); /* Clears thread stats */
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
p->p_rux.rux_runtime = 0;
p->p_rux.rux_uticks = 0;
p->p_rux.rux_sticks = 0;
@@ -696,7 +703,7 @@ static char init_path[MAXPATHLEN] =
#ifdef INIT_PATH
__XSTRING(INIT_PATH);
#else
- "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init:/stand/sysinstall";
+ "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init";
#endif
SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
"Path used to search the init process");
@@ -737,17 +744,20 @@ start_init(void *dummy)
vfs_mountroot();
+ /* Wipe GELI passphrase from the environment. */
+ kern_unsetenv("kern.geom.eli.passphrase");
+
/*
* Need just enough stack to hold the faked-up "execve()" arguments.
*/
addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
- if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
- FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
+ if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 0,
+ VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
panic("init: couldn't allocate argument space");
p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
p->p_vmspace->vm_ssize = 1;
- if ((var = getenv("init_path")) != NULL) {
+ if ((var = kern_getenv("init_path")) != NULL) {
strlcpy(init_path, var, sizeof(init_path));
freeenv(var);
}
@@ -801,7 +811,7 @@ start_init(void *dummy)
/*
* Move out the arg pointers.
*/
- uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
+ uap = (char **)rounddown2((intptr_t)ucp, sizeof(intptr_t));
(void)suword((caddr_t)--uap, (long)0); /* terminator */
(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
@@ -842,18 +852,25 @@ start_init(void *dummy)
static void
create_init(const void *udata __unused)
{
+ struct fork_req fr;
struct ucred *newcred, *oldcred;
+ struct thread *td;
int error;
- error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc,
- NULL, 0);
+ bzero(&fr, sizeof(fr));
+ fr.fr_flags = RFFDG | RFPROC | RFSTOPPED;
+ fr.fr_procp = &initproc;
+ error = fork1(&thread0, &fr);
if (error)
panic("cannot fork init: %d\n", error);
KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
/* divorce init's credentials from the kernel's */
newcred = crget();
+ sx_xlock(&proctree_lock);
PROC_LOCK(initproc);
initproc->p_flag |= P_SYSTEM | P_INMEM;
+ initproc->p_treeflag |= P_TREE_REAPER;
+ LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling);
oldcred = initproc->p_ucred;
crcopy(newcred, oldcred);
#ifdef MAC
@@ -862,11 +879,15 @@ create_init(const void *udata __unused)
#ifdef AUDIT
audit_cred_proc1(newcred);
#endif
- initproc->p_ucred = newcred;
+ proc_set_cred(initproc, newcred);
+ td = FIRST_THREAD_IN_PROC(initproc);
+ crfree(td->td_ucred);
+ td->td_ucred = crhold(initproc->p_ucred);
PROC_UNLOCK(initproc);
+ sx_xunlock(&proctree_lock);
crfree(oldcred);
- cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
- cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
+ cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc),
+ start_init, NULL);
}
SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL);
@@ -884,5 +905,5 @@ kick_init(const void *udata __unused)
sched_add(td, SRQ_BORING);
thread_unlock(td);
}
-SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL);
+SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/kern_condvar.c b/freebsd/sys/kern/kern_condvar.c
index f3be4271..239640e2 100644
--- a/freebsd/sys/kern/kern_condvar.c
+++ b/freebsd/sys/kern/kern_condvar.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/limits.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -49,6 +50,17 @@ __FBSDID("$FreeBSD$");
#endif
/*
+ * A bound below which cv_waiters is valid. Once cv_waiters reaches this bound,
+ * cv_signal must manually check the wait queue for threads.
+ */
+#define CV_WAITERS_BOUND INT_MAX
+
+#define CV_WAITERS_INC(cvp) do { \
+ if ((cvp)->cv_waiters < CV_WAITERS_BOUND) \
+ (cvp)->cv_waiters++; \
+} while (0)
+
+/*
* Common sanity checks for cv_wait* functions.
*/
#define CV_ASSERT(cvp, lock, td) do { \
@@ -99,7 +111,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock)
WITNESS_SAVE_DECL(lock_witness);
struct lock_class *class;
struct thread *td;
- int lock_state;
+ uintptr_t lock_state;
td = curthread;
lock_state = 0;
@@ -112,19 +124,12 @@ _cv_wait(struct cv *cvp, struct lock_object *lock)
"Waiting on \"%s\"", cvp->cv_description);
class = LOCK_CLASS(lock);
- if (cold || panicstr) {
- /*
- * During autoconfiguration, just give interrupts
- * a chance, then just return. Don't run any other
- * thread or panic below, in case this is the idle
- * process and already asleep.
- */
+ if (SCHEDULER_STOPPED())
return;
- }
sleepq_lock(cvp);
- cvp->cv_waiters++;
+ CV_WAITERS_INC(cvp);
if (lock == &Giant.lock_object)
mtx_assert(&Giant, MA_OWNED);
DROP_GIANT();
@@ -153,7 +158,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock)
/*
* Wait on a condition variable. This function differs from cv_wait by
- * not aquiring the mutex after condition variable was signaled.
+ * not acquiring the mutex after condition variable was signaled.
*/
void
_cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
@@ -173,20 +178,14 @@ _cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
("cv_wait_unlock cannot be used with Giant"));
class = LOCK_CLASS(lock);
- if (cold || panicstr) {
- /*
- * During autoconfiguration, just give interrupts
- * a chance, then just return. Don't run any other
- * thread or panic below, in case this is the idle
- * process and already asleep.
- */
+ if (SCHEDULER_STOPPED()) {
class->lc_unlock(lock);
return;
}
sleepq_lock(cvp);
- cvp->cv_waiters++;
+ CV_WAITERS_INC(cvp);
DROP_GIANT();
sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
@@ -217,7 +216,8 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
WITNESS_SAVE_DECL(lock_witness);
struct lock_class *class;
struct thread *td;
- int lock_state, rval;
+ uintptr_t lock_state;
+ int rval;
td = curthread;
lock_state = 0;
@@ -230,19 +230,12 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
"Waiting on \"%s\"", cvp->cv_description);
class = LOCK_CLASS(lock);
- if (cold || panicstr) {
- /*
- * After a panic, or during autoconfiguration, just give
- * interrupts a chance, then just return; don't run any other
- * procs or panic below, in case this is the idle process and
- * already asleep.
- */
+ if (SCHEDULER_STOPPED())
return (0);
- }
sleepq_lock(cvp);
- cvp->cv_waiters++;
+ CV_WAITERS_INC(cvp);
if (lock == &Giant.lock_object)
mtx_assert(&Giant, MA_OWNED);
DROP_GIANT();
@@ -278,12 +271,13 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
}
/*
- * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the
- * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
- * expires.
+ * Wait on a condition variable for (at most) the value specified in sbt
+ * argument. Returns 0 if the process was resumed by cv_signal or cv_broadcast,
+ * EWOULDBLOCK if the timeout expires.
*/
int
-_cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo)
+_cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt,
+ sbintime_t pr, int flags)
{
WITNESS_SAVE_DECL(lock_witness);
struct lock_class *class;
@@ -301,25 +295,18 @@ _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo)
"Waiting on \"%s\"", cvp->cv_description);
class = LOCK_CLASS(lock);
- if (cold || panicstr) {
- /*
- * After a panic, or during autoconfiguration, just give
- * interrupts a chance, then just return; don't run any other
- * thread or panic below, in case this is the idle process and
- * already asleep.
- */
- return 0;
- }
+ if (SCHEDULER_STOPPED())
+ return (0);
sleepq_lock(cvp);
- cvp->cv_waiters++;
+ CV_WAITERS_INC(cvp);
if (lock == &Giant.lock_object)
mtx_assert(&Giant, MA_OWNED);
DROP_GIANT();
sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
- sleepq_set_timeout(cvp, timo);
+ sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
if (lock != &Giant.lock_object) {
if (class->lc_flags & LC_SLEEPABLE)
sleepq_release(cvp);
@@ -345,13 +332,15 @@ _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo)
#ifndef __rtems__
/*
- * Wait on a condition variable for at most timo/hz seconds, allowing
- * interruption by signals. Returns 0 if the thread was resumed by cv_signal
- * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
- * a signal was caught.
+ * Wait on a condition variable for (at most) the value specified in sbt
+ * argument, allowing interruption by signals.
+ * Returns 0 if the thread was resumed by cv_signal or cv_broadcast,
+ * EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if a signal
+ * was caught.
*/
int
-_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
+_cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
+ sbintime_t sbt, sbintime_t pr, int flags)
{
WITNESS_SAVE_DECL(lock_witness);
struct lock_class *class;
@@ -369,26 +358,19 @@ _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
"Waiting on \"%s\"", cvp->cv_description);
class = LOCK_CLASS(lock);
- if (cold || panicstr) {
- /*
- * After a panic, or during autoconfiguration, just give
- * interrupts a chance, then just return; don't run any other
- * thread or panic below, in case this is the idle process and
- * already asleep.
- */
- return 0;
- }
+ if (SCHEDULER_STOPPED())
+ return (0);
sleepq_lock(cvp);
- cvp->cv_waiters++;
+ CV_WAITERS_INC(cvp);
if (lock == &Giant.lock_object)
mtx_assert(&Giant, MA_OWNED);
DROP_GIANT();
sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
SLEEPQ_INTERRUPTIBLE, 0);
- sleepq_set_timeout(cvp, timo);
+ sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
if (lock != &Giant.lock_object) {
if (class->lc_flags & LC_SLEEPABLE)
sleepq_release(cvp);
@@ -428,8 +410,15 @@ cv_signal(struct cv *cvp)
wakeup_swapper = 0;
sleepq_lock(cvp);
if (cvp->cv_waiters > 0) {
- cvp->cv_waiters--;
- wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0);
+ if (cvp->cv_waiters == CV_WAITERS_BOUND &&
+ sleepq_lookup(cvp) == NULL) {
+ cvp->cv_waiters = 0;
+ } else {
+ if (cvp->cv_waiters < CV_WAITERS_BOUND)
+ cvp->cv_waiters--;
+ wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0,
+ 0);
+ }
}
sleepq_release(cvp);
if (wakeup_swapper)
diff --git a/freebsd/sys/kern/kern_conf.c b/freebsd/sys/kern/kern_conf.c
index 589e5941..fb43c243 100644
--- a/freebsd/sys/kern/kern_conf.c
+++ b/freebsd/sys/kern/kern_conf.c
@@ -62,6 +62,7 @@ static void destroy_devl(struct cdev *dev);
#ifndef __rtems__
static int destroy_dev_sched_cbl(struct cdev *dev,
void (*cb)(void *), void *arg);
+static void destroy_dev_tq(void *ctx, int pending);
#endif /* __rtems__ */
static int make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw,
int unit, struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,
@@ -206,7 +207,7 @@ dev_refthread(struct cdev *dev, int *ref)
if (csw != NULL) {
cdp = cdev2priv(dev);
if ((cdp->cdp_flags & CDP_SCHED_DTR) == 0)
- dev->si_threadcount++;
+ atomic_add_long(&dev->si_threadcount, 1);
else
csw = NULL;
}
@@ -248,7 +249,7 @@ devvn_refthread(struct vnode *vp, struct cdev **devp, int *ref)
if ((cdp->cdp_flags & CDP_SCHED_DTR) == 0) {
csw = dev->si_devsw;
if (csw != NULL)
- dev->si_threadcount++;
+ atomic_add_long(&dev->si_threadcount, 1);
}
dev_unlock();
if (csw != NULL) {
@@ -266,11 +267,9 @@ dev_relthread(struct cdev *dev, int ref)
mtx_assert(&devmtx, MA_NOTOWNED);
if (!ref)
return;
- dev_lock();
KASSERT(dev->si_threadcount > 0,
("%s threadcount is wrong", dev->si_name));
- dev->si_threadcount--;
- dev_unlock();
+ atomic_subtract_rel_long(&dev->si_threadcount, 1);
}
int
@@ -597,22 +596,26 @@ notify_destroy(struct cdev *dev)
}
static struct cdev *
-newdev(struct cdevsw *csw, int unit, struct cdev *si)
+newdev(struct make_dev_args *args, struct cdev *si)
{
struct cdev *si2;
+ struct cdevsw *csw;
mtx_assert(&devmtx, MA_OWNED);
+ csw = args->mda_devsw;
if (csw->d_flags & D_NEEDMINOR) {
/* We may want to return an existing device */
LIST_FOREACH(si2, &csw->d_devs, si_list) {
- if (dev2unit(si2) == unit) {
+ if (dev2unit(si2) == args->mda_unit) {
dev_free_devlocked(si);
return (si2);
}
}
}
- si->si_drv0 = unit;
+ si->si_drv0 = args->mda_unit;
si->si_devsw = csw;
+ si->si_drv1 = args->mda_si_drv1;
+ si->si_drv2 = args->mda_si_drv2;
LIST_INSERT_HEAD(&csw->d_devs, si, si_list);
return (si);
}
@@ -728,16 +731,22 @@ prep_devname(struct cdev *dev, const char *fmt, va_list ap)
mtx_assert(&devmtx, MA_OWNED);
- len = vsnrprintf(dev->__si_namebuf, sizeof(dev->__si_namebuf), 32,
- fmt, ap);
- if (len > sizeof(dev->__si_namebuf) - 1)
+ len = vsnrprintf(dev->si_name, sizeof(dev->si_name), 32, fmt, ap);
+ if (len > sizeof(dev->si_name) - 1)
return (ENAMETOOLONG);
/* Strip leading slashes. */
- for (from = dev->__si_namebuf; *from == '/'; from++)
+ for (from = dev->si_name; *from == '/'; from++)
;
- for (to = dev->__si_namebuf; *from != '\0'; from++, to++) {
+ for (to = dev->si_name; *from != '\0'; from++, to++) {
+ /*
+ * Spaces and double quotation marks cause
+ * problems for the devctl(4) protocol.
+ * Reject names containing those characters.
+ */
+ if (isspace(*from) || *from == '"')
+ return (EINVAL);
/* Treat multiple sequential slashes as single. */
while (from[0] == '/' && from[1] == '/')
from++;
@@ -748,11 +757,11 @@ prep_devname(struct cdev *dev, const char *fmt, va_list ap)
}
*to = '\0';
- if (dev->__si_namebuf[0] == '\0')
+ if (dev->si_name[0] == '\0')
return (EINVAL);
/* Disallow "." and ".." components. */
- for (s = dev->__si_namebuf;;) {
+ for (s = dev->si_name;;) {
for (q = s; *q != '/' && *q != '\0'; q++)
;
if (q - s == 1 && s[0] == '.')
@@ -764,39 +773,52 @@ prep_devname(struct cdev *dev, const char *fmt, va_list ap)
s = q + 1;
}
- if (devfs_dev_exists(dev->__si_namebuf) != 0)
+ if (devfs_dev_exists(dev->si_name) != 0)
return (EEXIST);
return (0);
}
+void
+make_dev_args_init_impl(struct make_dev_args *args, size_t sz)
+{
+
+ bzero(args, sz);
+ args->mda_size = sz;
+}
+
static int
-make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
- struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,
- va_list ap)
+make_dev_sv(struct make_dev_args *args1, struct cdev **dres,
+ const char *fmt, va_list ap)
{
struct cdev *dev, *dev_new;
+ struct make_dev_args args;
int res;
- KASSERT((flags & MAKEDEV_WAITOK) == 0 || (flags & MAKEDEV_NOWAIT) == 0,
- ("make_dev_credv: both WAITOK and NOWAIT specified"));
- dev_new = devfs_alloc(flags);
+ bzero(&args, sizeof(args));
+ if (sizeof(args) < args1->mda_size)
+ return (EINVAL);
+ bcopy(args1, &args, args1->mda_size);
+ KASSERT((args.mda_flags & MAKEDEV_WAITOK) == 0 ||
+ (args.mda_flags & MAKEDEV_NOWAIT) == 0,
+ ("make_dev_sv: both WAITOK and NOWAIT specified"));
+ dev_new = devfs_alloc(args.mda_flags);
if (dev_new == NULL)
return (ENOMEM);
dev_lock();
- res = prep_cdevsw(devsw, flags);
+ res = prep_cdevsw(args.mda_devsw, args.mda_flags);
if (res != 0) {
dev_unlock();
devfs_free(dev_new);
return (res);
}
- dev = newdev(devsw, unit, dev_new);
+ dev = newdev(&args, dev_new);
if ((dev->si_flags & SI_NAMED) == 0) {
res = prep_devname(dev, fmt, ap);
if (res != 0) {
- if ((flags & MAKEDEV_CHECKNAME) == 0) {
+ if ((args.mda_flags & MAKEDEV_CHECKNAME) == 0) {
panic(
- "make_dev_credv: bad si_name (error=%d, si_name=%s)",
+ "make_dev_sv: bad si_name (error=%d, si_name=%s)",
res, dev->si_name);
}
if (dev == dev_new) {
@@ -808,9 +830,9 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
return (res);
}
}
- if (flags & MAKEDEV_REF)
+ if ((args.mda_flags & MAKEDEV_REF) != 0)
dev_refl(dev);
- if (flags & MAKEDEV_ETERNAL)
+ if ((args.mda_flags & MAKEDEV_ETERNAL) != 0)
dev->si_flags |= SI_ETERNAL;
if (dev->si_flags & SI_CHEAPCLONE &&
dev->si_flags & SI_NAMED) {
@@ -825,14 +847,14 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
}
KASSERT(!(dev->si_flags & SI_NAMED),
("make_dev() by driver %s on pre-existing device (min=%x, name=%s)",
- devsw->d_name, dev2unit(dev), devtoname(dev)));
+ args.mda_devsw->d_name, dev2unit(dev), devtoname(dev)));
dev->si_flags |= SI_NAMED;
#ifndef __rtems__
- if (cr != NULL)
- dev->si_cred = crhold(cr);
- dev->si_uid = uid;
- dev->si_gid = gid;
- dev->si_mode = mode;
+ if (args.mda_cr != NULL)
+ dev->si_cred = crhold(args.mda_cr);
+ dev->si_uid = args.mda_uid;
+ dev->si_gid = args.mda_gid;
+ dev->si_mode = args.mda_mode;
#endif /* __rtems__ */
devfs_create(dev);
@@ -841,12 +863,43 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
#endif /* __rtems__ */
dev_unlock_and_free();
- notify_create(dev, flags);
+ notify_create(dev, args.mda_flags);
*dres = dev;
return (0);
}
+int
+make_dev_s(struct make_dev_args *args, struct cdev **dres,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int res;
+
+ va_start(ap, fmt);
+ res = make_dev_sv(args, dres, fmt, ap);
+ va_end(ap);
+ return (res);
+}
+
+static int
+make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
+ struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,
+ va_list ap)
+{
+ struct make_dev_args args;
+
+ make_dev_args_init(&args);
+ args.mda_flags = flags;
+ args.mda_devsw = devsw;
+ args.mda_cr = cr;
+ args.mda_uid = uid;
+ args.mda_gid = gid;
+ args.mda_mode = mode;
+ args.mda_unit = unit;
+ return (make_dev_sv(&args, dres, fmt, ap));
+}
+
struct cdev *
make_dev(struct cdevsw *devsw, int unit, uid_t uid, gid_t gid, int mode,
const char *fmt, ...)
@@ -1299,6 +1352,7 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up,
{
struct clonedevs *cd;
struct cdev *dev, *ndev, *dl, *de;
+ struct make_dev_args args;
int unit, low, u;
KASSERT(*cdp != NULL,
@@ -1350,7 +1404,10 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up,
}
if (unit == -1)
unit = low & CLONE_UNITMASK;
- dev = newdev(csw, unit | extra, ndev);
+ make_dev_args_init(&args);
+ args.mda_unit = unit | extra;
+ args.mda_devsw = csw;
+ dev = newdev(&args, ndev);
if (dev->si_flags & SI_CLONELIST) {
printf("dev %p (%s) is on clonelist\n", dev, dev->si_name);
printf("unit=%d, low=%d, extra=0x%x\n", unit, low, extra);
@@ -1398,7 +1455,8 @@ clone_cleanup(struct clonedevs **cdp)
if (!(cp->cdp_flags & CDP_SCHED_DTR)) {
cp->cdp_flags |= CDP_SCHED_DTR;
KASSERT(dev->si_flags & SI_NAMED,
- ("Driver has goofed in cloning underways udev %x unit %x", dev2udev(dev), dev2unit(dev)));
+ ("Driver has goofed in cloning underways udev %jx unit %x",
+ (uintmax_t)dev2udev(dev), dev2unit(dev)));
destroy_devl(dev);
}
}
@@ -1409,7 +1467,7 @@ clone_cleanup(struct clonedevs **cdp)
static TAILQ_HEAD(, cdev_priv) dev_ddtr =
TAILQ_HEAD_INITIALIZER(dev_ddtr);
-static struct task dev_dtr_task;
+static struct task dev_dtr_task = TASK_INITIALIZER(0, destroy_dev_tq, NULL);
static void
destroy_dev_tq(void *ctx, int pending)
@@ -1497,15 +1555,6 @@ drain_dev_clone_events(void)
sx_xunlock(&clone_drain_lock);
}
-static void
-devdtr_init(void *dummy __unused)
-{
-
- TASK_INIT(&dev_dtr_task, 0, destroy_dev_tq, NULL);
-}
-
-SYSINIT(devdtr, SI_SUB_DEVFS, SI_ORDER_SECOND, devdtr_init, NULL);
-
#include <rtems/bsd/local/opt_ddb.h>
#ifdef DDB
#include <sys/kernel.h>
@@ -1551,10 +1600,7 @@ DB_SHOW_COMMAND(cdev, db_show_cdev)
SI_FLAG(SI_NAMED);
SI_FLAG(SI_CHEAPCLONE);
SI_FLAG(SI_CHILD);
- SI_FLAG(SI_DEVOPEN);
- SI_FLAG(SI_CONSOPEN);
SI_FLAG(SI_DUMPDEV);
- SI_FLAG(SI_CANDELETE);
SI_FLAG(SI_CLONELIST);
db_printf("si_flags %s\n", buf);
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c
index 50f4c696..ca9c1de7 100644
--- a/freebsd/sys/kern/kern_event.c
+++ b/freebsd/sys/kern/kern_event.c
@@ -32,13 +32,15 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ktrace.h>
+#include <rtems/bsd/local/opt_kqueue.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
+#include <sys/rwlock.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <rtems/bsd/sys/unistd.h>
@@ -48,11 +50,13 @@ __FBSDID("$FreeBSD$");
#include <sys/fcntl.h>
#include <sys/kthread.h>
#include <sys/selinfo.h>
+#include <sys/stdatomic.h>
#include <sys/queue.h>
#include <sys/event.h>
#include <sys/eventvar.h>
#include <sys/poll.h>
#include <sys/protosw.h>
+#include <sys/resourcevar.h>
#include <sys/sigio.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -63,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/taskqueue.h>
#include <sys/uio.h>
+#include <sys/user.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
@@ -96,7 +101,7 @@ MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF);
haslck = 0; \
} while (0)
-TASKQUEUE_DEFINE_THREAD(kqueue);
+TASKQUEUE_DEFINE_THREAD(kqueue_ctx);
static int kevent_copyout(void *arg, struct kevent *kevp, int count);
static int kevent_copyin(void *arg, struct kevent *kevp, int count);
@@ -104,6 +109,8 @@ static int kqueue_register(struct kqueue *kq, struct kevent *kev,
struct thread *td, int waitok);
static int kqueue_acquire(struct file *fp, struct kqueue **kqp);
static void kqueue_release(struct kqueue *kq, int locked);
+static void kqueue_destroy(struct kqueue *kq);
+static void kqueue_drain(struct kqueue *kq, struct thread *td);
static int kqueue_expand(struct kqueue *kq, struct filterops *fops,
uintptr_t ident, int waitok);
static void kqueue_task(void *arg, int pending);
@@ -124,11 +131,12 @@ static fo_poll_t kqueue_poll;
static fo_kqfilter_t kqueue_kqfilter;
static fo_stat_t kqueue_stat;
static fo_close_t kqueue_close;
+static fo_fill_kinfo_t kqueue_fill_kinfo;
static struct fileops kqueueops = {
- .fo_read = kqueue_read,
- .fo_write = kqueue_write,
- .fo_truncate = kqueue_truncate,
+ .fo_read = invfo_rdwr,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
.fo_ioctl = kqueue_ioctl,
.fo_poll = kqueue_poll,
.fo_kqfilter = kqueue_kqfilter,
@@ -136,6 +144,8 @@ static struct fileops kqueueops = {
.fo_close = kqueue_close,
.fo_chmod = invfo_chmod,
.fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = kqueue_fill_kinfo,
};
#else /* __rtems__ */
static const rtems_filesystem_file_handlers_r kqueueops;
@@ -199,9 +209,9 @@ static struct filterops user_filtops = {
};
static uma_zone_t knote_zone;
-static int kq_ncallouts = 0;
-static int kq_calloutmax = (4 * 1024);
-SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
+static atomic_uint kq_ncallouts = ATOMIC_VAR_INIT(0);
+static unsigned int kq_calloutmax = 4 * 1024;
+SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
&kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
/* XXX - ensure not KN_INFLUX?? */
@@ -238,14 +248,33 @@ SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
#define KQ_NOTOWNED(kq) do { \
mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \
} while (0)
-#define KN_LIST_LOCK(kn) do { \
- if (kn->kn_knlist != NULL) \
- kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \
-} while (0)
-#define KN_LIST_UNLOCK(kn) do { \
- if (kn->kn_knlist != NULL) \
- kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \
-} while (0)
+
+static struct knlist *
+kn_list_lock(struct knote *kn)
+{
+ struct knlist *knl;
+
+ knl = kn->kn_knlist;
+ if (knl != NULL)
+ knl->kl_lock(knl->kl_lockarg);
+ return (knl);
+}
+
+static void
+kn_list_unlock(struct knlist *knl)
+{
+ bool do_free;
+
+ if (knl == NULL)
+ return;
+ do_free = knl->kl_autodestroy && knlist_empty(knl);
+ knl->kl_unlock(knl->kl_lockarg);
+ if (do_free) {
+ knlist_destroy(knl);
+ free(knl, M_KQUEUE);
+ }
+}
+
#define KNL_ASSERT_LOCK(knl, islocked) do { \
if (islocked) \
KNL_ASSERT_LOCKED(knl); \
@@ -264,7 +293,10 @@ SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
#define KNL_ASSERT_UNLOCKED(knl) do {} while (0)
#endif /* INVARIANTS */
+#ifndef KN_HASHSIZE
#define KN_HASHSIZE 64 /* XXX should be tunable */
+#endif
+
#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
static int
@@ -291,28 +323,30 @@ MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops",
MTX_DEF);
static struct {
struct filterops *for_fop;
+ int for_nolock;
int for_refcnt;
} sysfilt_ops[EVFILT_SYSCOUNT] = {
- { &file_filtops }, /* EVFILT_READ */
- { &file_filtops }, /* EVFILT_WRITE */
+ { &file_filtops, 1 }, /* EVFILT_READ */
+ { &file_filtops, 1 }, /* EVFILT_WRITE */
{ &null_filtops }, /* EVFILT_AIO */
- { &file_filtops }, /* EVFILT_VNODE */
+ { &file_filtops, 1 }, /* EVFILT_VNODE */
#ifndef __rtems__
- { &proc_filtops }, /* EVFILT_PROC */
- { &sig_filtops }, /* EVFILT_SIGNAL */
+ { &proc_filtops, 1 }, /* EVFILT_PROC */
+ { &sig_filtops, 1 }, /* EVFILT_SIGNAL */
#else /* __rtems__ */
{ &null_filtops }, /* EVFILT_PROC */
{ &null_filtops }, /* EVFILT_SIGNAL */
#endif /* __rtems__ */
- { &timer_filtops }, /* EVFILT_TIMER */
+ { &timer_filtops, 1 }, /* EVFILT_TIMER */
{ &null_filtops }, /* former EVFILT_NETDEV */
#ifndef __rtems__
- { &fs_filtops }, /* EVFILT_FS */
+ { &fs_filtops, 1 }, /* EVFILT_FS */
#else /* __rtems__ */
{ &null_filtops }, /* EVFILT_FS */
#endif /* __rtems__ */
{ &null_filtops }, /* EVFILT_LIO */
- { &user_filtops }, /* EVFILT_USER */
+ { &user_filtops, 1 }, /* EVFILT_USER */
+ { &null_filtops }, /* EVFILT_SENDFILE */
};
/*
@@ -375,16 +409,16 @@ static int
filt_procattach(struct knote *kn)
{
struct proc *p;
- int immediate;
int error;
+ bool exiting, immediate;
- immediate = 0;
+ exiting = immediate = false;
p = pfind(kn->kn_id);
if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
p = zpfind(kn->kn_id);
- immediate = 1;
+ exiting = true;
} else if (p != NULL && (p->p_flag & P_WEXIT)) {
- immediate = 1;
+ exiting = true;
}
if (p == NULL)
@@ -398,23 +432,33 @@ filt_procattach(struct knote *kn)
kn->kn_flags |= EV_CLEAR; /* automatically set */
/*
- * internal flag indicating registration done by kernel
+ * Internal flag indicating registration done by kernel for the
+ * purposes of getting a NOTE_CHILD notification.
*/
- if (kn->kn_flags & EV_FLAG1) {
+ if (kn->kn_flags & EV_FLAG2) {
+ kn->kn_flags &= ~EV_FLAG2;
kn->kn_data = kn->kn_sdata; /* ppid */
kn->kn_fflags = NOTE_CHILD;
+ kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK);
+ immediate = true; /* Force immediate activation of child note. */
+ }
+ /*
+ * Internal flag indicating registration done by kernel (for other than
+ * NOTE_CHILD).
+ */
+ if (kn->kn_flags & EV_FLAG1) {
kn->kn_flags &= ~EV_FLAG1;
}
- if (immediate == 0)
- knlist_add(&p->p_klist, kn, 1);
+ knlist_add(p->p_klist, kn, 1);
/*
- * Immediately activate any exit notes if the target process is a
- * zombie. This is necessary to handle the case where the target
- * process, e.g. a child, dies before the kevent is registered.
+ * Immediately activate any child notes or, in the case of a zombie
+ * target process, exit notes. The latter is necessary to handle the
+ * case where the target process, e.g. a child, dies before the kevent
+ * is registered.
*/
- if (immediate && filt_proc(kn, NOTE_EXIT))
+ if (immediate || (exiting && filt_proc(kn, NOTE_EXIT)))
KNOTE_ACTIVATE(kn, 0);
PROC_UNLOCK(p);
@@ -434,10 +478,8 @@ filt_procattach(struct knote *kn)
static void
filt_procdetach(struct knote *kn)
{
- struct proc *p;
- p = kn->kn_ptr.p_proc;
- knlist_remove(&p->p_klist, kn, 0);
+ knlist_remove(kn->kn_knlist, kn, 0);
kn->kn_ptr.p_proc = NULL;
}
@@ -445,30 +487,26 @@ filt_procdetach(struct knote *kn)
static int
filt_proc(struct knote *kn, long hint)
{
- struct proc *p = kn->kn_ptr.p_proc;
+ struct proc *p;
u_int event;
- /*
- * mask off extra data
- */
+ p = kn->kn_ptr.p_proc;
+ if (p == NULL) /* already activated, from attach filter */
+ return (0);
+
+ /* Mask off extra data. */
event = (u_int)hint & NOTE_PCTRLMASK;
- /*
- * if the user is interested in this event, record it.
- */
+ /* If the user is interested in this event, record it. */
if (kn->kn_sfflags & event)
kn->kn_fflags |= event;
- /*
- * process is gone, so flag the event as finished.
- */
+ /* Process is gone, so flag the event as finished. */
if (event == NOTE_EXIT) {
- if (!(kn->kn_status & KN_DETACHED))
- knlist_remove_inevent(&p->p_klist, kn);
- kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
kn->kn_ptr.p_proc = NULL;
if (kn->kn_fflags & NOTE_EXIT)
- kn->kn_data = p->p_xstat;
+ kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig);
if (kn->kn_fflags == 0)
kn->kn_flags |= EV_DROP;
return (1);
@@ -498,8 +536,6 @@ knote_fork(struct knlist *list, int pid)
list->kl_lock(list->kl_lockarg);
SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
- if ((kn->kn_status & KN_INFLUX) == KN_INFLUX)
- continue;
kq = kn->kn_kq;
KQ_LOCK(kq);
if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) {
@@ -521,7 +557,7 @@ knote_fork(struct knlist *list, int pid)
/*
* The NOTE_TRACK case. In addition to the activation
- * of the event, we need to register new event to
+ * of the event, we need to register new events to
* track the child. Drop the locks in preparation for
* the call to kqueue_register().
*/
@@ -530,8 +566,28 @@ knote_fork(struct knlist *list, int pid)
list->kl_unlock(list->kl_lockarg);
/*
- * Activate existing knote and register a knote with
+ * Activate existing knote and register tracking knotes with
* new process.
+ *
+ * First register a knote to get just the child notice. This
+ * must be a separate note from a potential NOTE_EXIT
+ * notification since both NOTE_CHILD and NOTE_EXIT are defined
+ * to use the data field (in conflicting ways).
+ */
+ kev.ident = pid;
+ kev.filter = kn->kn_filter;
+ kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT |
+ EV_FLAG2;
+ kev.fflags = kn->kn_sfflags;
+ kev.data = kn->kn_id; /* parent */
+ kev.udata = kn->kn_kevent.udata;/* preserve udata */
+ error = kqueue_register(kq, &kev, NULL, 0);
+ if (error)
+ kn->kn_fflags |= NOTE_TRACKERR;
+
+ /*
+ * Then register another knote to track other potential events
+ * from the new process.
*/
kev.ident = pid;
kev.filter = kn->kn_filter;
@@ -557,64 +613,123 @@ knote_fork(struct knlist *list, int pid)
* XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
* interval timer support code.
*/
-static int
-timertoticks(intptr_t data)
-{
- struct timeval tv;
- int tticks;
-
- tv.tv_sec = data / 1000;
- tv.tv_usec = (data % 1000) * 1000;
- tticks = tvtohz(&tv);
- return tticks;
+#define NOTE_TIMER_PRECMASK (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS| \
+ NOTE_NSECONDS)
+
+static sbintime_t
+timer2sbintime(intptr_t data, int flags)
+{
+
+ /*
+ * Macros for converting to the fractional second portion of an
+ * sbintime_t using 64bit multiplication to improve precision.
+ */
+#define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32)
+#define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32)
+#define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32)
+ switch (flags & NOTE_TIMER_PRECMASK) {
+ case NOTE_SECONDS:
+#ifdef __LP64__
+ if (data > (SBT_MAX / SBT_1S))
+ return SBT_MAX;
+#endif
+ return ((sbintime_t)data << 32);
+ case NOTE_MSECONDS: /* FALLTHROUGH */
+ case 0:
+ if (data >= 1000) {
+ int64_t secs = data / 1000;
+#ifdef __LP64__
+ if (secs > (SBT_MAX / SBT_1S))
+ return SBT_MAX;
+#endif
+ return (secs << 32 | MS_TO_SBT(data % 1000));
+ }
+ return MS_TO_SBT(data);
+ case NOTE_USECONDS:
+ if (data >= 1000000) {
+ int64_t secs = data / 1000000;
+#ifdef __LP64__
+ if (secs > (SBT_MAX / SBT_1S))
+ return SBT_MAX;
+#endif
+ return (secs << 32 | US_TO_SBT(data % 1000000));
+ }
+ return US_TO_SBT(data);
+ case NOTE_NSECONDS:
+ if (data >= 1000000000) {
+ int64_t secs = data / 1000000000;
+#ifdef __LP64__
+ if (secs > (SBT_MAX / SBT_1S))
+ return SBT_MAX;
+#endif
+ return (secs << 32 | US_TO_SBT(data % 1000000000));
+ }
+ return NS_TO_SBT(data);
+ default:
+ break;
+ }
+ return (-1);
}
static void
filt_timerexpire(void *knx)
{
- struct knote *kn = knx;
struct callout *calloutp;
+ struct knote *kn;
+ kn = knx;
kn->kn_data++;
KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */
- /*
- * timertoticks() uses tvtohz() which always adds 1 to allow
- * for the time until the next clock interrupt being strictly
- * less than 1 clock tick. We don't want that here since we
- * want to appear to be in sync with the clock interrupt even
- * when we're delayed.
- */
if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) {
calloutp = (struct callout *)kn->kn_hook;
- callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata) - 1,
- filt_timerexpire, kn);
+ *kn->kn_ptr.p_nexttime += timer2sbintime(kn->kn_sdata,
+ kn->kn_sfflags);
+ callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0,
+ filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE);
}
}
/*
- * data contains amount of time to sleep, in milliseconds
+ * data contains amount of time to sleep
*/
static int
filt_timerattach(struct knote *kn)
{
struct callout *calloutp;
+ sbintime_t to;
+ unsigned int ncallouts;
- atomic_add_int(&kq_ncallouts, 1);
+ if ((intptr_t)kn->kn_sdata < 0)
+ return (EINVAL);
+ if ((intptr_t)kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
+ kn->kn_sdata = 1;
+ /* Only precision unit are supported in flags so far */
+ if (kn->kn_sfflags & ~NOTE_TIMER_PRECMASK)
+ return (EINVAL);
- if (kq_ncallouts >= kq_calloutmax) {
- atomic_add_int(&kq_ncallouts, -1);
- return (ENOMEM);
- }
+ to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
+ if (to < 0)
+ return (EINVAL);
+
+ ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed);
+ do {
+ if (ncallouts >= kq_calloutmax)
+ return (ENOMEM);
+ } while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts,
+ &ncallouts, ncallouts + 1, memory_order_relaxed,
+ memory_order_relaxed));
kn->kn_flags |= EV_CLEAR; /* automatically set */
- kn->kn_status &= ~KN_DETACHED; /* knlist_add usually sets it */
+ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */
+ kn->kn_ptr.p_nexttime = malloc(sizeof(sbintime_t), M_KQUEUE, M_WAITOK);
calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK);
- callout_init(calloutp, CALLOUT_MPSAFE);
+ callout_init(calloutp, 1);
kn->kn_hook = calloutp;
- callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata),
- filt_timerexpire, kn);
+ *kn->kn_ptr.p_nexttime = to + sbinuptime();
+ callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0,
+ filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE);
return (0);
}
@@ -623,12 +738,15 @@ static void
filt_timerdetach(struct knote *kn)
{
struct callout *calloutp;
+ unsigned int old;
calloutp = (struct callout *)kn->kn_hook;
callout_drain(calloutp);
free(calloutp, M_KQUEUE);
- atomic_add_int(&kq_ncallouts, -1);
- kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */
+ free(kn->kn_ptr.p_nexttime, M_KQUEUE);
+ old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed);
+ KASSERT(old > 0, ("Number of callouts cannot become negative"));
+ kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */
}
static int
@@ -727,34 +845,60 @@ filt_usertouch(struct knote *kn, struct kevent *kev, u_long type)
}
#ifdef __rtems__
+static int
+kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps);
+
static
#endif /* __rtems__ */
int
sys_kqueue(struct thread *td, struct kqueue_args *uap)
{
+
+ return (kern_kqueue(td, 0, NULL));
+}
+
+static void
+kqueue_init(struct kqueue *kq)
+{
+
+ mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK);
+ TAILQ_INIT(&kq->kq_head);
+ knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
+ TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
+}
+
+int
+kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps)
+{
struct filedesc *fdp;
struct kqueue *kq;
struct file *fp;
+ struct ucred *cred;
int fd, error;
#ifndef __rtems__
fdp = td->td_proc->p_fd;
+ cred = td->td_ucred;
+ if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES)))
+ return (ENOMEM);
#else /* __rtems__ */
- (void) fdp;
+ (void)fdp;
+ (void)cred;
#endif /* __rtems__ */
- error = falloc(td, &fp, &fd, 0);
- if (error)
- goto done2;
- /* An extra reference on `nfp' has been held for us by falloc(). */
+ error = falloc_caps(td, &fp, &fd, flags, fcaps);
+ if (error != 0) {
+ chgkqcnt(cred->cr_ruidinfo, -1, 0);
+ return (error);
+ }
+
+ /* An extra reference on `fp' has been held for us by falloc(). */
kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
- mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
- TAILQ_INIT(&kq->kq_head);
+ kqueue_init(kq);
#ifndef __rtems__
kq->kq_fdp = fdp;
+ kq->kq_cred = crhold(cred);
#endif /* __rtems__ */
- knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
- TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
#ifndef __rtems__
FILEDESC_XLOCK(fdp);
@@ -770,8 +914,7 @@ sys_kqueue(struct thread *td, struct kqueue_args *uap)
fdrop(fp, td);
td->td_retval[0] = fd;
-done2:
- return (error);
+ return (0);
}
#ifdef __rtems__
int
@@ -806,9 +949,11 @@ struct kevent_args {
};
#endif
#ifdef __rtems__
-static int
-kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct
- kevent_copyops *k_ops, const struct timespec *timeout);
+static int kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
+ struct kevent_copyops *k_ops, const struct timespec *timeout);
+
+static int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges,
+ int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout);
static
#endif /* __rtems__ */
@@ -936,24 +1081,39 @@ int
kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
struct kevent_copyops *k_ops, const struct timespec *timeout)
{
- struct kevent keva[KQ_NEVENTS];
- struct kevent *kevp, *changes;
- struct kqueue *kq;
+ cap_rights_t rights;
struct file *fp;
- int i, n, nerrors, error;
+ int error;
- if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0)
+ cap_rights_init(&rights);
+ if (nchanges > 0)
+ cap_rights_set(&rights, CAP_KQUEUE_CHANGE);
+ if (nevents > 0)
+ cap_rights_set(&rights, CAP_KQUEUE_EVENT);
+ error = fget(td, fd, &rights, &fp);
+ if (error != 0)
return (error);
- if ((error = kqueue_acquire(fp, &kq)) != 0)
- goto done_norel;
- nerrors = 0;
+ error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout);
+ fdrop(fp, td);
+ return (error);
+}
+
+static int
+kqueue_kevent(struct kqueue *kq, struct thread *td, int nchanges, int nevents,
+ struct kevent_copyops *k_ops, const struct timespec *timeout)
+{
+ struct kevent keva[KQ_NEVENTS];
+ struct kevent *kevp, *changes;
+ int i, n, nerrors, error;
+
+ nerrors = 0;
while (nchanges > 0) {
n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
error = k_ops->k_copyin(k_ops->arg, keva, n);
if (error)
- goto done;
+ return (error);
changes = keva;
for (i = 0; i < n; i++) {
kevp = &changes[i];
@@ -962,31 +1122,56 @@ kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
kevp->flags &= ~EV_SYSFLAGS;
error = kqueue_register(kq, kevp, td, 1);
if (error || (kevp->flags & EV_RECEIPT)) {
- if (nevents != 0) {
- kevp->flags = EV_ERROR;
- kevp->data = error;
- (void) k_ops->k_copyout(k_ops->arg,
- kevp, 1);
- nevents--;
- nerrors++;
- } else {
- goto done;
- }
+ if (nevents == 0)
+ return (error);
+ kevp->flags = EV_ERROR;
+ kevp->data = error;
+ (void)k_ops->k_copyout(k_ops->arg, kevp, 1);
+ nevents--;
+ nerrors++;
}
}
nchanges -= n;
}
if (nerrors) {
td->td_retval[0] = nerrors;
- error = 0;
- goto done;
+ return (0);
}
- error = kqueue_scan(kq, nevents, k_ops, timeout, keva, td);
-done:
+ return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td));
+}
+
+int
+kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents,
+ struct kevent_copyops *k_ops, const struct timespec *timeout)
+{
+ struct kqueue *kq;
+ int error;
+
+ error = kqueue_acquire(fp, &kq);
+ if (error != 0)
+ return (error);
+ error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout);
kqueue_release(kq, 0);
-done_norel:
- fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Performs a kevent() call on a temporarily created kqueue. This can be
+ * used to perform one-shot polling, similar to poll() and select().
+ */
+int
+kern_kevent_anonymous(struct thread *td, int nevents,
+ struct kevent_copyops *k_ops)
+{
+ struct kqueue kq = {};
+ int error;
+
+ kqueue_init(&kq);
+ kq.kq_refcnt = 1;
+ error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL);
+ kqueue_drain(&kq, td);
+ kqueue_destroy(&kq);
return (error);
}
@@ -1046,6 +1231,9 @@ kqueue_fo_find(int filt)
if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
return NULL;
+ if (sysfilt_ops[~filt].for_nolock)
+ return sysfilt_ops[~filt].for_fop;
+
mtx_lock(&filterops_lock);
sysfilt_ops[~filt].for_refcnt++;
if (sysfilt_ops[~filt].for_fop == NULL)
@@ -1062,6 +1250,9 @@ kqueue_fo_release(int filt)
if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
return;
+ if (sysfilt_ops[~filt].for_nolock)
+ return;
+
mtx_lock(&filterops_lock);
KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
("filter object refcount not valid on release"));
@@ -1080,11 +1271,17 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa
struct filterops *fops;
struct file *fp;
struct knote *kn, *tkn;
+ struct knlist *knl;
+ cap_rights_t rights;
int error, filt, event;
int haskqglobal, filedesc_unlock;
+ if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE))
+ return (EINVAL);
+
fp = NULL;
kn = NULL;
+ knl = NULL;
error = 0;
haskqglobal = 0;
filedesc_unlock = 0;
@@ -1094,12 +1291,25 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa
if (fops == NULL)
return EINVAL;
- tkn = knote_alloc(waitok); /* prevent waiting with locks */
+ if (kev->flags & EV_ADD) {
+ /*
+ * Prevent waiting with locks. Non-sleepable
+ * allocation failures are handled in the loop, only
+ * if the spare knote appears to be actually required.
+ */
+ tkn = knote_alloc(waitok);
+ } else {
+ tkn = NULL;
+ }
findkn:
if (fops->f_isfd) {
KASSERT(td != NULL, ("td is NULL"));
- error = fget(td, kev->ident, CAP_POLL_EVENT, &fp);
+ if (kev->ident > INT_MAX)
+ error = EBADF;
+ else
+ error = fget(td, kev->ident,
+ cap_rights_init(&rights, CAP_EVENT), &fp);
if (error)
goto done;
@@ -1120,7 +1330,7 @@ findkn:
if (fp->f_io.pathinfo.handlers == &kqueueops) {
#endif /* __rtems__ */
/*
- * if we add some inteligence about what we are doing,
+ * If we add some intelligence about what we are doing,
* we should be able to support events on ourselves.
* We need to know when we are doing this to prevent
* getting both the knlist lock and the kq lock since
@@ -1152,7 +1362,18 @@ findkn:
kqueue_expand(kq, fops, kev->ident, waitok);
KQ_LOCK(kq);
- if (kq->kq_knhashmask != 0) {
+
+ /*
+ * If possible, find an existing knote to use for this kevent.
+ */
+ if (kev->filter == EVFILT_PROC &&
+ (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
+ /* This is an internal creation of a process tracking
+ * note. Don't attempt to coalesce this with an
+ * existing note.
+ */
+ ;
+ } else if (kq->kq_knhashmask != 0) {
struct klist *list;
list = &kq->kq_knhash[
@@ -1164,7 +1385,7 @@ findkn:
}
}
- /* knote is in the process of changing, wait for it to stablize. */
+ /* knote is in the process of changing, wait for it to stabilize. */
if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
if (filedesc_unlock) {
@@ -1208,7 +1429,7 @@ findkn:
kev->data = 0;
kn->kn_kevent = *kev;
kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE |
- EV_ENABLE | EV_DISABLE);
+ EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT);
kn->kn_status = KN_INFLUX|KN_DETACHED;
error = knote_attach(kn, kq);
@@ -1222,7 +1443,7 @@ findkn:
knote_drop(kn, td);
goto done;
}
- KN_LIST_LOCK(kn);
+ knl = kn_list_lock(kn);
goto done_ev_add;
} else {
/* No matching knote and the EV_ADD flag is not set. */
@@ -1241,6 +1462,11 @@ findkn:
goto done;
}
+ if (kev->flags & EV_FORCEONESHOT) {
+ kn->kn_flags |= EV_ONESHOT;
+ KNOTE_ACTIVATE(kn, 1);
+ }
+
/*
* The user may change some filter values after the initial EV_ADD,
* but doing so will not reset any filter which has already been
@@ -1248,7 +1474,7 @@ findkn:
*/
kn->kn_status |= KN_INFLUX | KN_SCAN;
KQ_UNLOCK(kq);
- KN_LIST_LOCK(kn);
+ knl = kn_list_lock(kn);
kn->kn_kevent.udata = kev->udata;
if (!fops->f_isfd && fops->f_touch != NULL) {
fops->f_touch(kn, kev, EVENT_REGISTER);
@@ -1265,24 +1491,24 @@ findkn:
* kn_knlist.
*/
done_ev_add:
- event = kn->kn_fop->f_event(kn, 0);
+ if ((kev->flags & EV_ENABLE) != 0)
+ kn->kn_status &= ~KN_DISABLED;
+ else if ((kev->flags & EV_DISABLE) != 0)
+ kn->kn_status |= KN_DISABLED;
+
+ if ((kn->kn_status & KN_DISABLED) == 0)
+ event = kn->kn_fop->f_event(kn, 0);
+ else
+ event = 0;
+
KQ_LOCK(kq);
if (event)
- KNOTE_ACTIVATE(kn, 1);
+ kn->kn_status |= KN_ACTIVE;
+ if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) ==
+ KN_ACTIVE)
+ knote_enqueue(kn);
kn->kn_status &= ~(KN_INFLUX | KN_SCAN);
- KN_LIST_UNLOCK(kn);
-
- if ((kev->flags & EV_DISABLE) &&
- ((kn->kn_status & KN_DISABLED) == 0)) {
- kn->kn_status |= KN_DISABLED;
- }
-
- if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
- kn->kn_status &= ~KN_DISABLED;
- if ((kn->kn_status & KN_ACTIVE) &&
- ((kn->kn_status & KN_QUEUED) == 0))
- knote_enqueue(kn);
- }
+ kn_list_unlock(knl);
KQ_UNLOCK_FLUX(kq);
done:
@@ -1291,8 +1517,7 @@ done:
FILEDESC_XUNLOCK(td->td_proc->p_fd);
if (fp != NULL)
fdrop(fp, td);
- if (tkn != NULL)
- knote_free(tkn);
+ knote_free(tkn);
if (fops != NULL)
kqueue_fo_release(filt);
return (error);
@@ -1348,7 +1573,7 @@ kqueue_schedtask(struct kqueue *kq)
("scheduling kqueue task while draining"));
if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
- taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task);
+ taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task);
kq->kq_state |= KQ_TASKSCHED;
}
}
@@ -1456,10 +1681,10 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops,
const struct timespec *tsp, struct kevent *keva, struct thread *td)
{
struct kevent *kevp;
- struct timeval atv, rtv, ttv;
struct knote *kn, *marker;
- int count, timeout, nkev, error, influx;
- int haskqglobal, touch;
+ struct knlist *knl;
+ sbintime_t asbt, rsbt;
+ int count, error, haskqglobal, influx, nkev, touch;
count = maxevents;
nkev = 0;
@@ -1469,53 +1694,42 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops,
if (maxevents == 0)
goto done_nl;
+ rsbt = 0;
if (tsp != NULL) {
- TIMESPEC_TO_TIMEVAL(&atv, tsp);
- if (itimerfix(&atv)) {
+ if (tsp->tv_sec < 0 || tsp->tv_nsec < 0 ||
+ tsp->tv_nsec >= 1000000000) {
error = EINVAL;
goto done_nl;
}
- if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
- timeout = -1;
- else
- timeout = atv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&atv);
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- } else {
- atv.tv_sec = 0;
- atv.tv_usec = 0;
- timeout = 0;
- }
+ if (timespecisset(tsp)) {
+ if (tsp->tv_sec <= INT32_MAX) {
+ rsbt = tstosbt(*tsp);
+ if (TIMESEL(&asbt, rsbt))
+ asbt += tc_tick_sbt;
+ if (asbt <= SBT_MAX - rsbt)
+ asbt += rsbt;
+ else
+ asbt = 0;
+ rsbt >>= tc_precexp;
+ } else
+ asbt = 0;
+ } else
+ asbt = -1;
+ } else
+ asbt = 0;
marker = knote_alloc(1);
- if (marker == NULL) {
- error = ENOMEM;
- goto done_nl;
- }
marker->kn_status = KN_MARKER;
KQ_LOCK(kq);
- goto start;
retry:
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- goto done;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timeout = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
-
-start:
kevp = keva;
if (kq->kq_count == 0) {
- if (timeout < 0) {
+ if (asbt == -1) {
error = EWOULDBLOCK;
} else {
kq->kq_state |= KQ_SLEEP;
- error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH,
- "kqread", timeout);
+ error = msleep_sbt(kq, &kq->kq_lock, PSOCK | PCATCH,
+ "kqread", asbt, rsbt, C_ABSOLUTE);
}
if (error == 0)
goto retry;
@@ -1594,7 +1808,7 @@ start:
KQ_UNLOCK(kq);
if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
- KN_LIST_LOCK(kn);
+ knl = kn_list_lock(kn);
if (kn->kn_fop->f_event(kn, 0) == 0) {
KQ_LOCK(kq);
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
@@ -1602,7 +1816,7 @@ start:
~(KN_QUEUED | KN_ACTIVE | KN_INFLUX |
KN_SCAN);
kq->kq_count--;
- KN_LIST_UNLOCK(kn);
+ kn_list_unlock(knl);
influx = 1;
continue;
}
@@ -1614,7 +1828,7 @@ start:
*kevp = kn->kn_kevent;
KQ_LOCK(kq);
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
- if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
+ if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
/*
* Manually clear knotes who weren't
* 'touch'ed.
@@ -1631,7 +1845,7 @@ start:
TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
kn->kn_status &= ~(KN_INFLUX | KN_SCAN);
- KN_LIST_UNLOCK(kn);
+ kn_list_unlock(knl);
influx = 1;
}
@@ -1665,35 +1879,6 @@ done_nl:
}
#ifndef __rtems__
-/*
- * XXX
- * This could be expanded to call kqueue_scan, if desired.
- */
-/*ARGSUSED*/
-static int
-kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
- return (ENXIO);
-}
-
-/*ARGSUSED*/
-static int
-kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
- return (ENXIO);
-}
-
-/*ARGSUSED*/
-static int
-kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred,
- struct thread *td)
-{
-
- return (EINVAL);
-}
-
/*ARGSUSED*/
static int
kqueue_ioctl(struct file *fp, u_long cmd, void *data,
@@ -1813,26 +1998,12 @@ rtems_bsd_kqueue_stat(const rtems_filesystem_location_info_t *loc,
return (0);
}
-/*ARGSUSED*/
-static int
-kqueue_close(struct file *fp, struct thread *td)
+static void
+kqueue_drain(struct kqueue *kq, struct thread *td)
{
- struct kqueue *kq = fp->f_data;
- struct filedesc *fdp;
struct knote *kn;
int i;
- int error;
- int filedesc_unlock;
-
-#ifdef __rtems__
- /* FIXME: Move this to the RTEMS close() function */
- knote_fdclose(td, rtems_bsd_fp_to_fd(fp));
-#endif /* __rtems__ */
- if ((error = kqueue_acquire(fp, &kq)))
- return error;
-
- filedesc_unlock = 0;
KQ_LOCK(kq);
KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING,
@@ -1842,11 +2013,6 @@ kqueue_close(struct file *fp, struct thread *td)
msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0);
KASSERT(kq->kq_refcnt == 1, ("other refs are out there!"));
-#ifndef __rtems__
- fdp = kq->kq_fdp;
-#else /* __rtems__ */
- (void) fdp;
-#endif /* __rtems__ */
KASSERT(knlist_empty(&kq->kq_sel.si_note),
("kqueue's knlist not empty"));
@@ -1897,6 +2063,38 @@ kqueue_close(struct file *fp, struct thread *td)
}
KQ_UNLOCK(kq);
+}
+
+static void
+kqueue_destroy(struct kqueue *kq)
+{
+
+ KASSERT(kq->kq_fdp == NULL,
+ ("kqueue still attached to a file descriptor"));
+ seldrain(&kq->kq_sel);
+ knlist_destroy(&kq->kq_sel.si_note);
+ mtx_destroy(&kq->kq_lock);
+
+ if (kq->kq_knhash != NULL)
+ free(kq->kq_knhash, M_KQUEUE);
+ if (kq->kq_knlist != NULL)
+ free(kq->kq_knlist, M_KQUEUE);
+
+ funsetown(&kq->kq_sigio);
+}
+
+/*ARGSUSED*/
+static int
+kqueue_close(struct file *fp, struct thread *td)
+{
+ struct kqueue *kq = fp->f_data;
+ struct filedesc *fdp;
+ int error;
+ int filedesc_unlock;
+
+ if ((error = kqueue_acquire(fp, &kq)))
+ return error;
+ kqueue_drain(kq, td);
#ifndef __rtems__
/*
@@ -1905,6 +2103,8 @@ kqueue_close(struct file *fp, struct thread *td)
* lock is owned, and filedesc sx is locked before, to not
* take the sleepable lock after non-sleepable.
*/
+ fdp = kq->kq_fdp;
+ kq->kq_fdp = NULL;
if (!sx_xlocked(FILEDESC_LOCK(fdp))) {
FILEDESC_XLOCK(fdp);
filedesc_unlock = 1;
@@ -1920,19 +2120,9 @@ kqueue_close(struct file *fp, struct thread *td)
rtems_libio_unlock();
#endif /* __rtems__ */
- seldrain(&kq->kq_sel);
- knlist_destroy(&kq->kq_sel.si_note);
- mtx_destroy(&kq->kq_lock);
-#ifndef __rtems__
- kq->kq_fdp = NULL;
-#endif /* __rtems__ */
-
- if (kq->kq_knhash != NULL)
- free(kq->kq_knhash, M_KQUEUE);
- if (kq->kq_knlist != NULL)
- free(kq->kq_knlist, M_KQUEUE);
-
- funsetown(&kq->kq_sigio);
+ kqueue_destroy(kq);
+ chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0);
+ crfree(kq->kq_cred);
free(kq, M_KQUEUE);
fp->f_data = NULL;
@@ -1956,6 +2146,16 @@ rtems_bsd_kqueue_close(rtems_libio_t *iop)
}
#endif /* __rtems__ */
+#ifndef __rtems__
+static int
+kqueue_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
+{
+
+ kif->kf_type = KF_TYPE_KQUEUE;
+ return (0);
+}
+#endif /* __rtems__ */
+
static void
kqueue_wakeup(struct kqueue *kq)
{
@@ -1993,7 +2193,7 @@ void
knote(struct knlist *list, long hint, int lockflags)
{
struct kqueue *kq;
- struct knote *kn;
+ struct knote *kn, *tkn;
int error;
if (list == NULL)
@@ -2005,14 +2205,13 @@ knote(struct knlist *list, long hint, int lockflags)
list->kl_lock(list->kl_lockarg);
/*
- * If we unlock the list lock (and set KN_INFLUX), we can eliminate
- * the kqueue scheduling, but this will introduce four
- * lock/unlock's for each knote to test. If we do, continue to use
- * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is
- * only safe if you want to remove the current item, which we are
- * not doing.
+ * If we unlock the list lock (and set KN_INFLUX), we can
+ * eliminate the kqueue scheduling, but this will introduce
+ * four lock/unlock's for each knote to test. Also, marker
+ * would be needed to keep iteration position, since filters
+ * or other threads could remove events.
*/
- SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
+ SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) {
kq = kn->kn_kq;
KQ_LOCK(kq);
if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) {
@@ -2068,7 +2267,8 @@ knlist_add(struct knlist *knl, struct knote *kn, int islocked)
}
static void
-knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked)
+knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked,
+ int kqislocked)
{
KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked"));
KNL_ASSERT_LOCK(knl, knlislocked);
@@ -2081,7 +2281,7 @@ knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqis
SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
kn->kn_knlist = NULL;
if (!knlislocked)
- knl->kl_unlock(knl->kl_lockarg);
+ kn_list_unlock(knl);
if (!kqislocked)
KQ_LOCK(kn->kn_kq);
kn->kn_status |= KN_DETACHED;
@@ -2090,7 +2290,7 @@ knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqis
}
/*
- * remove all knotes from a specified klist
+ * remove knote from the specified knlist
*/
void
knlist_remove(struct knlist *knl, struct knote *kn, int islocked)
@@ -2099,20 +2299,10 @@ knlist_remove(struct knlist *knl, struct knote *kn, int islocked)
knlist_remove_kq(knl, kn, islocked, 0);
}
-/*
- * remove knote from a specified klist while in f_event handler.
- */
-void
-knlist_remove_inevent(struct knlist *knl, struct knote *kn)
-{
-
- knlist_remove_kq(knl, kn, 1,
- (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK);
-}
-
int
knlist_empty(struct knlist *knl)
{
+
KNL_ASSERT_LOCKED(knl);
return SLIST_EMPTY(&knl->kl_list);
}
@@ -2126,27 +2316,61 @@ static void knlist_mtx_unlock(void *arg);
static void
knlist_mtx_lock(void *arg)
{
+
mtx_lock((struct mtx *)arg);
}
static void
knlist_mtx_unlock(void *arg)
{
+
mtx_unlock((struct mtx *)arg);
}
static void
knlist_mtx_assert_locked(void *arg)
{
+
mtx_assert((struct mtx *)arg, MA_OWNED);
}
static void
knlist_mtx_assert_unlocked(void *arg)
{
+
mtx_assert((struct mtx *)arg, MA_NOTOWNED);
}
+#ifndef __rtems__
+static void
+knlist_rw_rlock(void *arg)
+{
+
+ rw_rlock((struct rwlock *)arg);
+}
+
+static void
+knlist_rw_runlock(void *arg)
+{
+
+ rw_runlock((struct rwlock *)arg);
+}
+
+static void
+knlist_rw_assert_locked(void *arg)
+{
+
+ rw_assert((struct rwlock *)arg, RA_LOCKED);
+}
+
+static void
+knlist_rw_assert_unlocked(void *arg)
+{
+
+ rw_assert((struct rwlock *)arg, RA_UNLOCKED);
+}
+#endif /* __rtems__ */
+
void
knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
void (*kl_unlock)(void *),
@@ -2175,6 +2399,7 @@ knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
else
knl->kl_assert_unlocked = kl_assert_unlocked;
+ knl->kl_autodestroy = 0;
SLIST_INIT(&knl->kl_list);
}
@@ -2185,6 +2410,26 @@ knlist_init_mtx(struct knlist *knl, struct mtx *lock)
knlist_init(knl, lock, NULL, NULL, NULL, NULL);
}
+struct knlist *
+knlist_alloc(struct mtx *lock)
+{
+ struct knlist *knl;
+
+ knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK);
+ knlist_init_mtx(knl, lock);
+ return (knl);
+}
+
+#ifndef __rtems__
+void
+knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock)
+{
+
+ knlist_init(knl, lock, knlist_rw_rlock, knlist_rw_runlock,
+ knlist_rw_assert_locked, knlist_rw_assert_unlocked);
+}
+#endif /* __rtems__ */
+
void
knlist_destroy(struct knlist *knl)
{
@@ -2192,7 +2437,7 @@ knlist_destroy(struct knlist *knl)
#ifdef INVARIANTS
/*
* if we run across this error, we need to find the offending
- * driver and have it call knlist_clear.
+ * driver and have it call knlist_clear or knlist_delete.
*/
if (!SLIST_EMPTY(&knl->kl_list))
printf("WARNING: destroying knlist w/ knotes on it!\n");
@@ -2202,6 +2447,18 @@ knlist_destroy(struct knlist *knl)
SLIST_INIT(&knl->kl_list);
}
+void
+knlist_detach(struct knlist *knl)
+{
+
+ KNL_ASSERT_LOCKED(knl);
+ knl->kl_autodestroy = 1;
+ if (knlist_empty(knl)) {
+ knlist_destroy(knl);
+ free(knl, M_KQUEUE);
+ }
+}
+
/*
* Even if we are locked, we may need to drop the lock to allow any influx
* knotes time to "settle".
@@ -2212,6 +2469,7 @@ knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn)
struct knote *kn, *kn2;
struct kqueue *kq;
+ KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl));
if (islocked)
KNL_ASSERT_LOCKED(knl);
else {
@@ -2421,15 +2679,16 @@ SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
static struct knote *
knote_alloc(int waitok)
{
- return ((struct knote *)uma_zalloc(knote_zone,
- (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO));
+
+ return (uma_zalloc(knote_zone, (waitok ? M_WAITOK : M_NOWAIT) |
+ M_ZERO));
}
static void
knote_free(struct knote *kn)
{
- if (kn != NULL)
- uma_zfree(knote_zone, kn);
+
+ uma_zfree(knote_zone, kn);
}
/*
@@ -2440,9 +2699,11 @@ kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok)
{
struct kqueue *kq;
struct file *fp;
+ cap_rights_t rights;
int error;
- if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0)
+ error = fget(td, fd, cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &fp);
+ if (error != 0)
return (error);
if ((error = kqueue_acquire(fp, &kq)) != 0)
goto noacquire;
diff --git a/freebsd/sys/kern/kern_hhook.c b/freebsd/sys/kern/kern_hhook.c
index 21239b24..2efdc533 100644
--- a/freebsd/sys/kern/kern_hhook.c
+++ b/freebsd/sys/kern/kern_hhook.c
@@ -103,7 +103,8 @@ hhook_run_hooks(struct hhook_head *hhh, void *ctx_data, struct osd *hosd)
HHH_RLOCK(hhh, &rmpt);
STAILQ_FOREACH(hhk, &hhh->hhh_hooks, hhk_next) {
- if (hhk->hhk_helper->h_flags & HELPER_NEEDS_OSD) {
+ if (hhk->hhk_helper != NULL &&
+ hhk->hhk_helper->h_flags & HELPER_NEEDS_OSD) {
hdata = osd_get(OSD_KHELP, hosd, hhk->hhk_helper->h_id);
if (hdata == NULL)
continue;
@@ -511,7 +512,7 @@ hhook_vnet_uninit(const void *unused __unused)
/*
* When a vnet is created and being initialised, init the V_hhook_vhead_list.
*/
-VNET_SYSINIT(hhook_vnet_init, SI_SUB_MBUF, SI_ORDER_FIRST,
+VNET_SYSINIT(hhook_vnet_init, SI_SUB_INIT_IF, SI_ORDER_FIRST,
hhook_vnet_init, NULL);
/*
@@ -519,5 +520,5 @@ VNET_SYSINIT(hhook_vnet_init, SI_SUB_MBUF, SI_ORDER_FIRST,
* points to clean up on vnet tear down, but in case the KPI is misused,
* provide a function to clean up and free memory for a vnet being destroyed.
*/
-VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_MBUF, SI_ORDER_ANY,
+VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
hhook_vnet_uninit, NULL);
diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c
index 87b7241d..501b453e 100644
--- a/freebsd/sys/kern/kern_intr.c
+++ b/freebsd/sys/kern/kern_intr.c
@@ -30,6 +30,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
+#include <rtems/bsd/local/opt_kstack_usage_prof.h>
#include <rtems/bsd/sys/param.h>
#include <sys/bus.h>
@@ -95,18 +96,15 @@ struct intr_event *clk_intr_event;
#ifndef __rtems__
struct intr_event *tty_intr_event;
void *vm_ih;
-#endif /* __rtems__ */
struct proc *intrproc;
+#endif /* __rtems__ */
static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
static int intr_storm_threshold = 1000;
-#ifndef __rtems__
-TUNABLE_INT("hw.intr_storm_threshold", &intr_storm_threshold);
-SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RW,
+SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RWTUN,
&intr_storm_threshold, 0,
"Number of consecutive interrupts before storm protection is enabled");
-#endif /* __rtems__ */
static TAILQ_HEAD(, intr_event) event_list =
TAILQ_HEAD_INITIALIZER(event_list);
static struct mtx event_lock;
@@ -272,7 +270,7 @@ intr_event_update(struct intr_event *ie)
int
intr_event_create(struct intr_event **event, void *source, int flags, int irq,
void (*pre_ithread)(void *), void (*post_ithread)(void *),
- void (*post_filter)(void *), int (*assign_cpu)(void *, u_char),
+ void (*post_filter)(void *), int (*assign_cpu)(void *, int),
const char *fmt, ...)
{
struct intr_event *ie;
@@ -316,9 +314,8 @@ intr_event_create(struct intr_event **event, void *source, int flags, int irq,
* the interrupt event.
*/
int
-intr_event_bind(struct intr_event *ie, u_char cpu)
+intr_event_bind(struct intr_event *ie, int cpu)
{
- cpuset_t mask;
lwpid_t id;
int error;
@@ -339,14 +336,9 @@ intr_event_bind(struct intr_event *ie, u_char cpu)
*/
mtx_lock(&ie->ie_lock);
if (ie->ie_thread != NULL) {
- CPU_ZERO(&mask);
- if (cpu == NOCPU)
- CPU_COPY(cpuset_root, &mask);
- else
- CPU_SET(cpu, &mask);
id = ie->ie_thread->it_thread->td_tid;
mtx_unlock(&ie->ie_lock);
- error = cpuset_setthread(id, &mask);
+ error = cpuset_setithread(id, cpu);
if (error)
return (error);
} else
@@ -355,14 +347,10 @@ intr_event_bind(struct intr_event *ie, u_char cpu)
if (error) {
mtx_lock(&ie->ie_lock);
if (ie->ie_thread != NULL) {
- CPU_ZERO(&mask);
- if (ie->ie_cpu == NOCPU)
- CPU_COPY(cpuset_root, &mask);
- else
- CPU_SET(cpu, &mask);
+ cpu = ie->ie_cpu;
id = ie->ie_thread->it_thread->td_tid;
mtx_unlock(&ie->ie_lock);
- (void)cpuset_setthread(id, &mask);
+ (void)cpuset_setithread(id, cpu);
} else
mtx_unlock(&ie->ie_lock);
return (error);
@@ -395,8 +383,7 @@ intr_setaffinity(int irq, void *m)
{
struct intr_event *ie;
cpuset_t *mask;
- u_char cpu;
- int n;
+ int cpu, n;
mask = m;
cpu = NOCPU;
@@ -410,7 +397,7 @@ intr_setaffinity(int irq, void *m)
continue;
if (cpu != NOCPU)
return (EINVAL);
- cpu = (u_char)n;
+ cpu = n;
}
}
ie = intr_lookup(irq);
@@ -469,6 +456,9 @@ intr_event_destroy(struct intr_event *ie)
static struct intr_thread *
ithread_create(const char *name)
{
+#ifdef __rtems__
+ struct proc *intrproc;
+#endif /* __rtems__ */
struct intr_thread *ithd;
struct thread *td;
int error;
@@ -498,6 +488,9 @@ ithread_create(const char *name)
static struct intr_thread *
ithread_create(const char *name, struct intr_handler *ih)
{
+#ifdef __rtems__
+ struct proc *intrproc;
+#endif /* __rtems__ */
struct intr_thread *ithd;
struct thread *td;
int error;
@@ -659,7 +652,7 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
mtx_lock(&ie->ie_lock);
it->it_event = ie;
ih->ih_thread = it;
- ithread_update(it); // XXX - do we really need this?!?!?
+ ithread_update(it); /* XXX - do we really need this?!?!? */
} else { /* Create the global per-event thread if we need one. */
while (ie->ie_thread == NULL && handler != NULL) {
if (ie->ie_flags & IE_ADDING_THREAD)
@@ -733,9 +726,9 @@ intr_event_describe_handler(struct intr_event *ie, void *cookie,
* description at that point. If one is not found, find the
* end of the name to use as the insertion point.
*/
- start = index(ih->ih_name, ':');
+ start = strchr(ih->ih_name, ':');
if (start == NULL)
- start = index(ih->ih_name, 0);
+ start = strchr(ih->ih_name, 0);
/*
* See if there is enough remaining room in the string for the
@@ -877,6 +870,12 @@ ok:
* Ensure that the thread will process the handler list
* again and remove this handler if it has already passed
* it on the list.
+ *
+ * The release part of the following store ensures
+ * that the update of ih_flags is ordered before the
+ * it_need setting. See the comment before
+ * atomic_cmpset_acq(&ithd->it_need, ...) operation in
+ * the ithread_execute_handlers().
*/
atomic_store_rel_int(&ie->ie_thread->it_need, 1);
} else
@@ -938,13 +937,10 @@ intr_event_schedule_thread(struct intr_event *ie)
* If any of the handlers for this ithread claim to be good
* sources of entropy, then gather some.
*/
- if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
- CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
- p->p_pid, td->td_name);
+ if (ie->ie_flags & IE_ENTROPY) {
entropy.event = (uintptr_t)ie;
entropy.td = ctd;
- random_harvest(&entropy, sizeof(entropy), 2, 0,
- RANDOM_INTERRUPT);
+ random_harvest_queue(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT);
}
#ifndef __rtems__
@@ -955,6 +951,10 @@ intr_event_schedule_thread(struct intr_event *ie)
* Set it_need to tell the thread to keep running if it is already
* running. Then, lock the thread and see if we actually need to
* put it on the runqueue.
+ *
+ * Use store_rel to arrange that the store to ih_need in
+ * swi_sched() is before the store to it_need and prepare for
+ * transfer of this order to loads in the ithread.
*/
atomic_store_rel_int(&it->it_need, 1);
thread_lock(td);
@@ -1042,6 +1042,12 @@ ok:
* Ensure that the thread will process the handler list
* again and remove this handler if it has already passed
* it on the list.
+ *
+ * The release part of the following store ensures
+ * that the update of ih_flags is ordered before the
+ * it_need setting. See the comment before
+ * atomic_cmpset_acq(&ithd->it_need, ...) operation in
+ * the ithread_execute_handlers().
*/
atomic_store_rel_int(&it->it_need, 1);
} else
@@ -1107,13 +1113,10 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
* If any of the handlers for this ithread claim to be good
* sources of entropy, then gather some.
*/
- if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
- CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
- p->p_pid, td->td_name);
+ if (ie->ie_flags & IE_ENTROPY) {
entropy.event = (uintptr_t)ie;
entropy.td = ctd;
- random_harvest(&entropy, sizeof(entropy), 2, 0,
- RANDOM_INTERRUPT);
+ random_harvest_queue(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT);
}
KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
@@ -1122,6 +1125,10 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
* Set it_need to tell the thread to keep running if it is already
* running. Then, lock the thread and see if we actually need to
* put it on the runqueue.
+ *
+ * Use store_rel to arrange that the store to ih_need in
+ * swi_sched() is before the store to it_need and prepare for
+ * transfer of this order to loads in the ithread.
*/
atomic_store_rel_int(&it->it_need, 1);
thread_lock(td);
@@ -1147,7 +1154,7 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
* a PIC.
*/
static int
-swi_assign_cpu(void *arg, u_char cpu)
+swi_assign_cpu(void *arg, int cpu)
{
return (0);
@@ -1161,7 +1168,6 @@ int
swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
void *arg, int pri, enum intr_type flags, void **cookiep)
{
- struct thread *td;
struct intr_event *ie;
int error;
@@ -1183,19 +1189,7 @@ swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
}
error = intr_event_add_handler(ie, name, NULL, handler, arg,
PI_SWI(pri), flags, cookiep);
- if (error)
- return (error);
-#ifndef __rtems__
- if (pri == SWI_CLOCK) {
- td = ie->ie_thread->it_thread;
- thread_lock(td);
- td->td_flags |= TDF_NOLOAD;
- thread_unlock(td);
- }
-#else /* __rtems__ */
- // Do _not_ ignore the thread in the load avarage
-#endif /* __rtems__ */
- return (0);
+ return (error);
}
/*
@@ -1212,21 +1206,16 @@ swi_sched(void *cookie, int flags)
CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
ih->ih_need);
- if (harvest.swi) {
- CTR2(KTR_INTR, "swi_sched: pid %d (%s) gathering entropy",
- curproc->p_pid, curthread->td_name);
- entropy.event = (uintptr_t)ih;
- entropy.td = curthread;
- random_harvest(&entropy, sizeof(entropy), 1, 0,
- RANDOM_INTERRUPT);
- }
+ entropy.event = (uintptr_t)ih;
+ entropy.td = curthread;
+ random_harvest_queue(&entropy, sizeof(entropy), 1, RANDOM_SWI);
/*
* Set ih_need for this handler so that if the ithread is already
* running it will execute this handler on the next pass. Otherwise,
* it will execute it the next time it runs.
*/
- atomic_store_rel_int(&ih->ih_need, 1);
+ ih->ih_need = 1;
if (!(flags & SWI_DELAY)) {
#ifndef __rtems__
@@ -1320,13 +1309,14 @@ intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
* For software interrupt threads, we only execute
* handlers that have their need flag set. Hardware
* interrupt threads always invoke all of their handlers.
+ *
+ * ih_need can only be 0 or 1. Failed cmpset below
+ * means that there is no request to execute handlers,
+ * so a retry of the cmpset is not needed.
*/
- if (ie->ie_flags & IE_SOFT) {
- if (atomic_load_acq_int(&ih->ih_need) == 0)
- continue;
- else
- atomic_store_rel_int(&ih->ih_need, 0);
- }
+ if ((ie->ie_flags & IE_SOFT) != 0 &&
+ atomic_cmpset_int(&ih->ih_need, 1, 0) == 0)
+ continue;
/* Execute this handler. */
CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
@@ -1434,17 +1424,13 @@ ithread_loop(void *arg)
* Service interrupts. If another interrupt arrives while
* we are running, it will set it_need to note that we
* should make another pass.
+ *
+ * The load_acq part of the following cmpset ensures
+ * that the load of ih_need in ithread_execute_handlers()
+ * is ordered after the load of it_need here.
*/
- while (atomic_load_acq_int(&ithd->it_need) != 0) {
- /*
- * This might need a full read and write barrier
- * to make sure that this write posts before any
- * of the memory or device accesses in the
- * handlers.
- */
- atomic_store_rel_int(&ithd->it_need, 0);
+ while (atomic_cmpset_acq_int(&ithd->it_need, 1, 0) != 0)
ithread_execute_handlers(p, ie);
- }
WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
mtx_assert(&Giant, MA_NOTOWNED);
@@ -1454,8 +1440,8 @@ ithread_loop(void *arg)
* set again, so we have to check it again.
*/
thread_lock(td);
- if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
- !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
+ if (atomic_load_acq_int(&ithd->it_need) == 0 &&
+ (ithd->it_flags & (IT_DEAD | IT_WAIT)) == 0) {
#ifndef __rtems__
TD_SET_IWAIT(td);
ie->ie_count = 0;
@@ -1507,6 +1493,10 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
td = curthread;
+#ifdef KSTACK_USAGE_PROF
+ intr_prof_stack_use(td, frame);
+#endif
+
/* An interrupt with no event or handlers is a stray interrupt. */
if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
return (EINVAL);
@@ -1573,12 +1563,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
/* Schedule the ithread if needed. */
if (thread) {
error = intr_event_schedule_thread(ie);
-#ifndef XEN
KASSERT(error == 0, ("bad stray interrupt"));
-#else
- if (error != 0)
- log(LOG_WARNING, "bad stray interrupt");
-#endif
}
critical_exit();
td->td_intr_nesting_level--;
@@ -1631,15 +1616,12 @@ ithread_loop(void *arg)
* Service interrupts. If another interrupt arrives while
* we are running, it will set it_need to note that we
* should make another pass.
+ *
+ * The load_acq part of the following cmpset ensures
+ * that the load of ih_need in ithread_execute_handlers()
+ * is ordered after the load of it_need here.
*/
- while (atomic_load_acq_int(&ithd->it_need) != 0) {
- /*
- * This might need a full read and write barrier
- * to make sure that this write posts before any
- * of the memory or device accesses in the
- * handlers.
- */
- atomic_store_rel_int(&ithd->it_need, 0);
+ while (atomic_cmpset_acq_int(&ithd->it_need, 1, 0) != 0) {
if (priv)
priv_ithread_execute_handler(p, ih);
else
@@ -1654,8 +1636,8 @@ ithread_loop(void *arg)
* set again, so we have to check it again.
*/
thread_lock(td);
- if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
- !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
+ if (atomic_load_acq_int(&ithd->it_need) == 0 &&
+ (ithd->it_flags & (IT_DEAD | IT_WAIT)) == 0) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
mi_switch(SW_VOL | SWT_IWAIT, NULL);
@@ -1953,8 +1935,8 @@ DB_SHOW_COMMAND(intr, db_show_intr)
struct intr_event *ie;
int all, verbose;
- verbose = index(modif, 'v') != NULL;
- all = index(modif, 'a') != NULL;
+ verbose = strchr(modif, 'v') != NULL;
+ all = strchr(modif, 'a') != NULL;
TAILQ_FOREACH(ie, &event_list, ie_list) {
if (!all && TAILQ_EMPTY(&ie->ie_handlers))
continue;
@@ -1999,6 +1981,24 @@ SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
static int
sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
{
+#ifdef SCTL_MASK32
+ uint32_t *intrcnt32;
+ unsigned i;
+ int error;
+
+ if (req->flags & SCTL_MASK32) {
+ if (!req->oldptr)
+ return (sysctl_handle_opaque(oidp, NULL, sintrcnt / 2, req));
+ intrcnt32 = malloc(sintrcnt / 2, M_TEMP, M_NOWAIT);
+ if (intrcnt32 == NULL)
+ return (ENOMEM);
+ for (i = 0; i < sintrcnt / sizeof (u_long); i++)
+ intrcnt32[i] = intrcnt[i];
+ error = sysctl_handle_opaque(oidp, intrcnt32, sintrcnt / 2, req);
+ free(intrcnt32, M_TEMP);
+ return (error);
+ }
+#endif
return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req));
}
diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c
index 39664a85..a115aa96 100644
--- a/freebsd/sys/kern/kern_linker.c
+++ b/freebsd/sys/kern/kern_linker.c
@@ -30,6 +30,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
+#include <rtems/bsd/local/opt_kld.h>
#include <rtems/bsd/local/opt_hwpmc_hooks.h>
#include <rtems/bsd/sys/param.h>
@@ -55,6 +56,10 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
#include <net/vnet.h>
#include <security/mac/mac_framework.h>
@@ -68,10 +73,16 @@ __FBSDID("$FreeBSD$");
#ifndef __rtems__
#ifdef KLD_DEBUG
int kld_debug = 0;
-SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW,
- &kld_debug, 0, "Set various levels of KLD debug");
+SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RWTUN,
+ &kld_debug, 0, "Set various levels of KLD debug");
#endif
+/* These variables are used by kernel debuggers to enumerate loaded files. */
+const int kld_off_address = offsetof(struct linker_file, address);
+const int kld_off_filename = offsetof(struct linker_file, filename);
+const int kld_off_pathname = offsetof(struct linker_file, pathname);
+const int kld_off_next = offsetof(struct linker_file, link.tqe_next);
+
/*
* static char *linker_search_path(const char *name, struct mod_depend
* *verinfo);
@@ -143,18 +154,8 @@ static caddr_t linker_file_lookup_symbol_internal(linker_file_t file,
const char* name, int deps);
static int linker_load_module(const char *kldname,
const char *modname, struct linker_file *parent,
- struct mod_depend *verinfo, struct linker_file **lfpp);
-static modlist_t modlist_lookup2(const char *name, struct mod_depend *verinfo);
-
-static char *
-linker_strdup(const char *str)
-{
- char *result;
-
- if ((result = malloc((strlen(str) + 1), M_LINKER, M_WAITOK)) != NULL)
- strcpy(result, str);
- return (result);
-}
+ const struct mod_depend *verinfo, struct linker_file **lfpp);
+static modlist_t modlist_lookup2(const char *name, const struct mod_depend *verinfo);
static void
linker_init(void *arg)
@@ -306,10 +307,10 @@ linker_file_register_sysctls(linker_file_t lf)
return;
sx_xunlock(&kld_sx);
- sysctl_lock();
+ sysctl_wlock();
for (oidp = start; oidp < stop; oidp++)
sysctl_register_oid(*oidp);
- sysctl_unlock();
+ sysctl_wunlock();
sx_xlock(&kld_sx);
}
@@ -318,7 +319,7 @@ linker_file_unregister_sysctls(linker_file_t lf)
{
struct sysctl_oid **start, **stop, **oidp;
- KLD_DPF(FILE, ("linker_file_unregister_sysctls: registering SYSCTLs"
+ KLD_DPF(FILE, ("linker_file_unregister_sysctls: unregistering SYSCTLs"
" for %s\n", lf->filename));
sx_assert(&kld_sx, SA_XLOCKED);
@@ -327,10 +328,10 @@ linker_file_unregister_sysctls(linker_file_t lf)
return;
sx_xunlock(&kld_sx);
- sysctl_lock();
+ sysctl_wlock();
for (oidp = start; oidp < stop; oidp++)
sysctl_unregister_oid(*oidp);
- sysctl_unlock();
+ sysctl_wunlock();
sx_xlock(&kld_sx);
}
#endif /* __rtems__ */
@@ -598,11 +599,13 @@ linker_make_file(const char *pathname, linker_class_t lc)
lf = (linker_file_t)kobj_create((kobj_class_t)lc, M_LINKER, M_WAITOK);
if (lf == NULL)
return (NULL);
+ lf->ctors_addr = 0;
+ lf->ctors_size = 0;
lf->refs = 1;
lf->userrefs = 0;
lf->flags = 0;
- lf->filename = linker_strdup(filename);
- lf->pathname = linker_strdup(pathname);
+ lf->filename = strdup(filename, M_LINKER);
+ lf->pathname = strdup(pathname, M_LINKER);
LINKER_GET_NEXT_FILE_ID(lf->id);
lf->ndeps = 0;
lf->deps = NULL;
@@ -966,7 +969,7 @@ linker_debug_search_symbol_name(caddr_t value, char *buf, u_int buflen,
*
* Note that we do not obey list locking protocols here. We really don't need
* DDB to hang because somebody's got the lock held. We'll take the chance
- * that the files list is inconsistant instead.
+ * that the files list is inconsistent instead.
*/
#ifdef DDB
int
@@ -1009,9 +1012,9 @@ linker_search_symbol_name(caddr_t value, char *buf, u_int buflen,
{
int error;
- sx_xlock(&kld_sx);
+ sx_slock(&kld_sx);
error = linker_debug_search_symbol_name(value, buf, buflen, offset);
- sx_xunlock(&kld_sx);
+ sx_sunlock(&kld_sx);
return (error);
}
@@ -1042,7 +1045,7 @@ kern_kldload(struct thread *td, const char *file, int *fileid)
* (kldname.ko, or kldname.ver.ko) treat it as an interface
* name.
*/
- if (index(file, '/') || index(file, '.')) {
+ if (strchr(file, '/') || strchr(file, '.')) {
kldname = file;
modname = NULL;
} else {
@@ -1273,6 +1276,23 @@ kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat)
return (0);
}
+#ifdef DDB
+DB_COMMAND(kldstat, db_kldstat)
+{
+ linker_file_t lf;
+
+#define POINTER_WIDTH ((int)(sizeof(void *) * 2 + 2))
+ db_printf("Id Refs Address%*c Size Name\n", POINTER_WIDTH - 7, ' ');
+#undef POINTER_WIDTH
+ TAILQ_FOREACH(lf, &linker_files, link) {
+ if (db_pager_quit)
+ return;
+ db_printf("%2d %4d %p %-8zx %s\n", lf->id, lf->refs,
+ lf->address, lf->size, lf->filename);
+ }
+}
+#endif /* DDB */
+
int
sys_kldfirstmod(struct thread *td, struct kldfirstmod_args *uap)
{
@@ -1376,7 +1396,7 @@ modlist_lookup(const char *name, int ver)
}
static modlist_t
-modlist_lookup2(const char *name, struct mod_depend *verinfo)
+modlist_lookup2(const char *name, const struct mod_depend *verinfo)
{
modlist_t mod, bestmod;
int ver;
@@ -1426,7 +1446,7 @@ linker_addmodules(linker_file_t lf, struct mod_metadata **start,
if (mp->md_type != MDT_VERSION)
continue;
modname = mp->md_cval;
- ver = ((struct mod_version *)mp->md_data)->mv_version;
+ ver = ((const struct mod_version *)mp->md_data)->mv_version;
if (modlist_lookup(modname, ver) != NULL) {
printf("module %s already present!\n", modname);
/* XXX what can we do? this is a build error. :-( */
@@ -1449,7 +1469,7 @@ linker_preload(void *arg)
linker_file_list_t depended_files;
struct mod_metadata *mp, *nmp;
struct mod_metadata **start, **stop, **mdp, **nmdp;
- struct mod_depend *verinfo;
+ const struct mod_depend *verinfo;
int nver;
int resolves;
modlist_t mod;
@@ -1547,7 +1567,7 @@ restart:
if (mp->md_type != MDT_VERSION)
continue;
modname = mp->md_cval;
- nver = ((struct mod_version *)
+ nver = ((const struct mod_version *)
mp->md_data)->mv_version;
if (modlist_lookup(modname,
nver) != NULL) {
@@ -1667,7 +1687,7 @@ SYSINIT(preload, SI_SUB_KLD, SI_ORDER_MIDDLE, linker_preload, 0);
static char linker_hintfile[] = "linker.hints";
static char linker_path[MAXPATHLEN] = "/boot/kernel;/boot/modules";
-SYSCTL_STRING(_kern, OID_AUTO, module_path, CTLFLAG_RW, linker_path,
+SYSCTL_STRING(_kern, OID_AUTO, module_path, CTLFLAG_RWTUN, linker_path,
sizeof(linker_path), "module load search path");
TUNABLE_STR("module_path", linker_path, sizeof(linker_path));
@@ -1690,7 +1710,7 @@ linker_lookup_file(const char *path, int pathlen, const char *name,
struct nameidata nd;
struct thread *td = curthread; /* XXX */
char *result, **cpp, *sep;
- int error, len, extlen, reclen, flags, vfslocked;
+ int error, len, extlen, reclen, flags;
enum vtype type;
extlen = 0;
@@ -1711,18 +1731,16 @@ linker_lookup_file(const char *path, int pathlen, const char *name,
* Attempt to open the file, and return the path if
* we succeed and it's a regular file.
*/
- NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, result, td);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, result, td);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error == 0) {
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
type = nd.ni_vp->v_type;
if (vap)
VOP_GETATTR(nd.ni_vp, vap, td->td_ucred);
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
if (type == VREG)
return (result);
}
@@ -1732,7 +1750,7 @@ linker_lookup_file(const char *path, int pathlen, const char *name,
}
#define INT_ALIGN(base, ptr) ptr = \
- (base) + (((ptr) - (base) + sizeof(int) - 1) & ~(sizeof(int) - 1))
+ (base) + roundup2((ptr) - (base), sizeof(int))
/*
* Lookup KLD which contains requested module in the "linker.hints" file. If
@@ -1741,7 +1759,7 @@ linker_lookup_file(const char *path, int pathlen, const char *name,
*/
static char *
linker_hints_lookup(const char *path, int pathlen, const char *modname,
- int modnamelen, struct mod_depend *verinfo)
+ int modnamelen, const struct mod_depend *verinfo)
{
struct thread *td = curthread; /* XXX */
struct ucred *cred = td ? td->td_ucred : NULL;
@@ -1751,7 +1769,6 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname,
u_char *cp, *recptr, *bufend, *result, *best, *pathbuf, *sep;
int error, ival, bestver, *intp, found, flags, clen, blen;
ssize_t reclen;
- int vfslocked = 0;
result = NULL;
bestver = found = 0;
@@ -1763,12 +1780,11 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname,
snprintf(pathbuf, reclen, "%.*s%s%s", pathlen, path, sep,
linker_hintfile);
- NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, pathbuf, td);
+ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, td);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error)
goto bad;
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_vp->v_type != VREG)
goto bad;
@@ -1779,20 +1795,17 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname,
/*
* XXX: we need to limit this number to some reasonable value
*/
- if (vattr.va_size > 100 * 1024) {
+ if (vattr.va_size > LINKER_HINTS_MAX) {
printf("hints file too large %ld\n", (long)vattr.va_size);
goto bad;
}
hints = malloc(vattr.va_size, M_TEMP, M_WAITOK);
- if (hints == NULL)
- goto bad;
error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)hints, vattr.va_size, 0,
UIO_SYSSPACE, IO_NODELOCKED, cred, NOCRED, &reclen, td);
if (error)
goto bad;
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, cred, td);
- VFS_UNLOCK_GIANT(vfslocked);
nd.ni_vp = NULL;
if (reclen != 0) {
printf("can't read %zd\n", reclen);
@@ -1861,7 +1874,6 @@ bad:
if (nd.ni_vp != NULL) {
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, cred, td);
- VFS_UNLOCK_GIANT(vfslocked);
}
/*
* If nothing found or hints is absent - fallback to the old
@@ -1878,7 +1890,7 @@ bad:
*/
static char *
linker_search_module(const char *modname, int modnamelen,
- struct mod_depend *verinfo)
+ const struct mod_depend *verinfo)
{
char *cp, *ep, *result;
@@ -1908,8 +1920,8 @@ linker_search_kld(const char *name)
int len;
/* qualified at all? */
- if (index(name, '/'))
- return (linker_strdup(name));
+ if (strchr(name, '/'))
+ return (strdup(name, M_LINKER));
/* traverse the linker path */
len = strlen(name);
@@ -1929,7 +1941,7 @@ linker_basename(const char *path)
{
const char *filename;
- filename = rindex(path, '/');
+ filename = strrchr(path, '/');
if (filename == NULL)
return path;
if (filename[1])
@@ -1982,7 +1994,7 @@ linker_hwpmc_list_objects(void)
*/
static int
linker_load_module(const char *kldname, const char *modname,
- struct linker_file *parent, struct mod_depend *verinfo,
+ struct linker_file *parent, const struct mod_depend *verinfo,
struct linker_file **lfpp)
{
linker_file_t lfdep;
@@ -2002,7 +2014,7 @@ linker_load_module(const char *kldname, const char *modname,
if (modlist_lookup2(modname, verinfo) != NULL)
return (EEXIST);
if (kldname != NULL)
- pathname = linker_strdup(kldname);
+ pathname = strdup(kldname, M_LINKER);
else if (rootvnode == NULL)
pathname = NULL;
else
@@ -2056,13 +2068,13 @@ linker_load_dependencies(linker_file_t lf)
linker_file_t lfdep;
struct mod_metadata **start, **stop, **mdp, **nmdp;
struct mod_metadata *mp, *nmp;
- struct mod_depend *verinfo;
+ const struct mod_depend *verinfo;
modlist_t mod;
const char *modname, *nmodname;
int ver, error = 0, count;
/*
- * All files are dependant on /kernel.
+ * All files are dependent on /kernel.
*/
sx_assert(&kld_sx, SA_XLOCKED);
if (linker_kernel_file) {
@@ -2079,7 +2091,7 @@ linker_load_dependencies(linker_file_t lf)
if (mp->md_type != MDT_VERSION)
continue;
modname = mp->md_cval;
- ver = ((struct mod_version *)mp->md_data)->mv_version;
+ ver = ((const struct mod_version *)mp->md_data)->mv_version;
mod = modlist_lookup(modname, ver);
if (mod != NULL) {
printf("interface %s.%d already present in the KLD"
diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c
index 74e7aa10..6229b836 100644
--- a/freebsd/sys/kern/kern_mbuf.c
+++ b/freebsd/sys/kern/kern_mbuf.c
@@ -34,24 +34,24 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/malloc.h>
+#include <sys/types.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
-#include <security/mac/mac_framework.h>
-
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/uma.h>
-#include <vm/uma_int.h>
#include <vm/uma_dbg.h>
#ifdef __rtems__
#include <rtems/bsd/bsd.h>
@@ -97,7 +97,7 @@ __FBSDID("$FreeBSD$");
*
* Whenever an object is allocated from the underlying global
* memory pool it gets pre-initialized with the _zinit_ functions.
- * When the Keg's are overfull objects get decomissioned with
+ * When the Keg's are overfull objects get decommissioned with
* _zfini_ functions and free'd back to the global memory pool.
*
*/
@@ -107,12 +107,11 @@ int nmbclusters; /* limits number of mbuf clusters */
int nmbjumbop; /* limits number of page size jumbo clusters */
int nmbjumbo9; /* limits number of 9k jumbo clusters */
int nmbjumbo16; /* limits number of 16k jumbo clusters */
-struct mbstat mbstat;
static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
-SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0,
- "Maximum real memory allocateable to various mbuf types");
+SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
+ "Maximum real memory allocatable to various mbuf types");
/*
* tunable_mbinit() has to be run before any mbuf allocations are done.
@@ -128,8 +127,7 @@ tunable_mbinit(void *dummy)
* available kernel memory (physical or kmem).
* At most it can be 3/4 of available kernel memory.
*/
- realmem = qmin((quad_t)physmem * PAGE_SIZE,
- vm_map_max(kmem_map) - vm_map_min(kmem_map));
+ realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size);
maxmbufmem = realmem / 2;
TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
if (maxmbufmem > realmem / 4 * 3)
@@ -274,9 +272,6 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
&nmbufs, 0, sysctl_nmbufs, "IU",
"Maximum number of mbufs allowed");
-SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
- "Mbuf general information and statistics");
-
/*
* Zones from which we allocate.
*/
@@ -286,7 +281,6 @@ uma_zone_t zone_pack;
uma_zone_t zone_jumbop;
uma_zone_t zone_jumbo9;
uma_zone_t zone_jumbo16;
-uma_zone_t zone_ext_refcnt;
/*
* Local prototypes.
@@ -295,15 +289,13 @@ static int mb_ctor_mbuf(void *, int, void *, int);
static int mb_ctor_clust(void *, int, void *, int);
static int mb_ctor_pack(void *, int, void *, int);
static void mb_dtor_mbuf(void *, int, void *);
-static void mb_dtor_clust(void *, int, void *);
static void mb_dtor_pack(void *, int, void *);
static int mb_zinit_pack(void *, int, int);
static void mb_zfini_pack(void *, int);
+static void mb_reclaim(uma_zone_t, int);
+static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
-static void mb_reclaim(void *);
-static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
-
-/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
+/* Ensure that MSIZE is a power of 2. */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
/*
@@ -326,63 +318,66 @@ mbuf_init(void *dummy)
MSIZE - 1, UMA_ZONE_MAXBUCKET);
if (nmbufs > 0)
nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
+ uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
+ uma_zone_set_maxaction(zone_mbuf, mb_reclaim);
zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
- mb_ctor_clust, mb_dtor_clust,
+ mb_ctor_clust,
#ifdef INVARIANTS
- trash_init, trash_fini,
+ trash_dtor, trash_init, trash_fini,
#else
- NULL, NULL,
+ NULL, NULL, NULL,
#endif
- UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ UMA_ALIGN_PTR, 0);
if (nmbclusters > 0)
nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
+ uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
+ uma_zone_set_maxaction(zone_clust, mb_reclaim);
zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
/* Make jumbo frame zone too. Page size, 9k and 16k. */
zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
- mb_ctor_clust, mb_dtor_clust,
+ mb_ctor_clust,
#ifdef INVARIANTS
- trash_init, trash_fini,
+ trash_dtor, trash_init, trash_fini,
#else
- NULL, NULL,
+ NULL, NULL, NULL,
#endif
- UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ UMA_ALIGN_PTR, 0);
if (nmbjumbop > 0)
nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
+ uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
+ uma_zone_set_maxaction(zone_jumbop, mb_reclaim);
zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
- mb_ctor_clust, mb_dtor_clust,
+ mb_ctor_clust,
#ifdef INVARIANTS
- trash_init, trash_fini,
+ trash_dtor, trash_init, trash_fini,
#else
- NULL, NULL,
+ NULL, NULL, NULL,
#endif
- UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ UMA_ALIGN_PTR, 0);
uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
if (nmbjumbo9 > 0)
nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+ uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
+ uma_zone_set_maxaction(zone_jumbo9, mb_reclaim);
zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
- mb_ctor_clust, mb_dtor_clust,
+ mb_ctor_clust,
#ifdef INVARIANTS
- trash_init, trash_fini,
+ trash_dtor, trash_init, trash_fini,
#else
- NULL, NULL,
+ NULL, NULL, NULL,
#endif
- UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ UMA_ALIGN_PTR, 0);
uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
if (nmbjumbo16 > 0)
nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
-
- zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
- NULL, NULL,
- NULL, NULL,
- UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
-
- /* uma_prealloc() goes here... */
+ uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
+ uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
/*
* Hook event handler for low-memory situation, used to
@@ -393,25 +388,6 @@ mbuf_init(void *dummy)
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
#endif /* __rtems__ */
-
- /*
- * [Re]set counters and local statistics knobs.
- * XXX Some of these should go and be replaced, but UMA stat
- * gathering needs to be revised.
- */
- mbstat.m_mbufs = 0;
- mbstat.m_mclusts = 0;
- mbstat.m_drain = 0;
- mbstat.m_msize = MSIZE;
- mbstat.m_mclbytes = MCLBYTES;
- mbstat.m_minclsize = MINCLSIZE;
- mbstat.m_mlen = MLEN;
- mbstat.m_mhlen = MHLEN;
- mbstat.m_numtypes = MT_NTYPES;
-
- mbstat.m_mcfail = mbstat.m_mpfail = 0;
- mbstat.sf_iocnt = 0;
- mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
@@ -422,13 +398,13 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
* pages.
*/
static void *
-mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
+mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
{
/* Inform UMA that this allocator uses kernel_map/object. */
*flags = UMA_SLAB_KERNEL;
#ifndef __rtems__
- return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
+ return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait,
(vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
#else /* __rtems__ */
return ((void *)malloc(bytes, M_TEMP, wait));
@@ -447,18 +423,14 @@ mb_ctor_mbuf(void *mem, int size, void *arg, int how)
{
struct mbuf *m;
struct mb_args *args;
-#ifdef MAC
int error;
-#endif
int flags;
short type;
#ifdef INVARIANTS
trash_ctor(mem, size, arg, how);
#endif
- m = (struct mbuf *)mem;
args = (struct mb_args *)arg;
- flags = args->flags;
type = args->type;
/*
@@ -468,31 +440,13 @@ mb_ctor_mbuf(void *mem, int size, void *arg, int how)
if (type == MT_NOINIT)
return (0);
- m->m_next = NULL;
- m->m_nextpkt = NULL;
- m->m_len = 0;
- m->m_flags = flags;
- m->m_type = type;
- if (flags & M_PKTHDR) {
- m->m_data = m->m_pktdat;
- m->m_pkthdr.rcvif = NULL;
- m->m_pkthdr.header = NULL;
- m->m_pkthdr.len = 0;
- m->m_pkthdr.csum_flags = 0;
- m->m_pkthdr.csum_data = 0;
- m->m_pkthdr.tso_segsz = 0;
- m->m_pkthdr.ether_vtag = 0;
- m->m_pkthdr.flowid = 0;
- SLIST_INIT(&m->m_pkthdr.tags);
-#ifdef MAC
- /* If the label init fails, fail the alloc */
- error = mac_mbuf_init(m, how);
- if (error)
- return (error);
-#endif
- } else
- m->m_data = m->m_dat;
- return (0);
+ m = (struct mbuf *)mem;
+ flags = args->flags;
+ MPASS((flags & M_NOFREE) == 0);
+
+ error = m_init(m, how, type, flags);
+
+ return (error);
}
/*
@@ -507,10 +461,9 @@ mb_dtor_mbuf(void *mem, int size, void *arg)
m = (struct mbuf *)mem;
flags = (unsigned long)arg;
- if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
- m_tag_delete_chain(m, NULL);
- KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
+ if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
+ m_tag_delete_chain(m, NULL);
#ifdef INVARIANTS
trash_dtor(mem, size, arg);
#endif
@@ -536,7 +489,6 @@ mb_dtor_pack(void *mem, int size, void *arg)
KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
- KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
#ifdef INVARIANTS
trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
#endif
@@ -564,40 +516,11 @@ static int
mb_ctor_clust(void *mem, int size, void *arg, int how)
{
struct mbuf *m;
- u_int *refcnt;
- int type;
- uma_zone_t zone;
#ifdef INVARIANTS
trash_ctor(mem, size, arg, how);
#endif
- switch (size) {
- case MCLBYTES:
- type = EXT_CLUSTER;
- zone = zone_clust;
- break;
-#if MJUMPAGESIZE != MCLBYTES
- case MJUMPAGESIZE:
- type = EXT_JUMBOP;
- zone = zone_jumbop;
- break;
-#endif
- case MJUM9BYTES:
- type = EXT_JUMBO9;
- zone = zone_jumbo9;
- break;
- case MJUM16BYTES:
- type = EXT_JUMBO16;
- zone = zone_jumbo16;
- break;
- default:
- panic("unknown cluster size");
- break;
- }
-
m = (struct mbuf *)arg;
- refcnt = uma_find_refcnt(zone, mem);
- *refcnt = 1;
if (m != NULL) {
m->m_ext.ext_buf = (caddr_t)mem;
m->m_data = m->m_ext.ext_buf;
@@ -606,32 +529,15 @@ mb_ctor_clust(void *mem, int size, void *arg, int how)
m->m_ext.ext_arg1 = NULL;
m->m_ext.ext_arg2 = NULL;
m->m_ext.ext_size = size;
- m->m_ext.ext_type = type;
- m->m_ext.ref_cnt = refcnt;
+ m->m_ext.ext_type = m_gettype(size);
+ m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+ m->m_ext.ext_count = 1;
}
return (0);
}
/*
- * The Mbuf Cluster zone destructor.
- */
-static void
-mb_dtor_clust(void *mem, int size, void *arg)
-{
-#ifdef INVARIANTS
- uma_zone_t zone;
-
- zone = m_getzone(size);
- KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
- ("%s: refcnt incorrect %u", __func__,
- *(uma_find_refcnt(zone, mem))) );
-
- trash_dtor(mem, size, arg);
-#endif
-}
-
-/*
* The Packet secondary zone's init routine, executed on the
* object's transition from mbuf keg slab to zone cache.
*/
@@ -678,93 +584,392 @@ mb_ctor_pack(void *mem, int size, void *arg, int how)
{
struct mbuf *m;
struct mb_args *args;
-#ifdef MAC
- int error;
-#endif
- int flags;
+ int error, flags;
short type;
m = (struct mbuf *)mem;
args = (struct mb_args *)arg;
flags = args->flags;
type = args->type;
+ MPASS((flags & M_NOFREE) == 0);
#ifdef INVARIANTS
trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
#endif
- m->m_next = NULL;
- m->m_nextpkt = NULL;
- m->m_data = m->m_ext.ext_buf;
- m->m_len = 0;
- m->m_flags = (flags | M_EXT);
- m->m_type = type;
-
- if (flags & M_PKTHDR) {
- m->m_pkthdr.rcvif = NULL;
- m->m_pkthdr.len = 0;
- m->m_pkthdr.header = NULL;
- m->m_pkthdr.csum_flags = 0;
- m->m_pkthdr.csum_data = 0;
- m->m_pkthdr.tso_segsz = 0;
- m->m_pkthdr.ether_vtag = 0;
- m->m_pkthdr.flowid = 0;
- SLIST_INIT(&m->m_pkthdr.tags);
-#ifdef MAC
- /* If the label init fails, fail the alloc */
- error = mac_mbuf_init(m, how);
- if (error)
- return (error);
-#endif
- }
- /* m_ext is already initialized. */
- return (0);
-}
+ error = m_init(m, how, type, flags);
-int
-m_pkthdr_init(struct mbuf *m, int how)
-{
-#ifdef MAC
- int error;
-#endif
- m->m_data = m->m_pktdat;
- SLIST_INIT(&m->m_pkthdr.tags);
- m->m_pkthdr.rcvif = NULL;
- m->m_pkthdr.header = NULL;
- m->m_pkthdr.len = 0;
- m->m_pkthdr.flowid = 0;
- m->m_pkthdr.csum_flags = 0;
- m->m_pkthdr.csum_data = 0;
- m->m_pkthdr.tso_segsz = 0;
- m->m_pkthdr.ether_vtag = 0;
-#ifdef MAC
- /* If the label init fails, fail the alloc */
- error = mac_mbuf_init(m, how);
- if (error)
- return (error);
-#endif
+ /* m_ext is already initialized. */
+ m->m_data = m->m_ext.ext_buf;
+ m->m_flags = (flags | M_EXT);
- return (0);
+ return (error);
}
/*
- * This is the protocol drain routine.
+ * This is the protocol drain routine. Called by UMA whenever any of the
+ * mbuf zones is closed to its limit.
*
* No locks should be held when this is called. The drain routines have to
* presently acquire some locks which raises the possibility of lock order
* reversal.
*/
static void
-mb_reclaim(void *junk)
+mb_reclaim(uma_zone_t zone __unused, int pending __unused)
{
struct domain *dp;
struct protosw *pr;
- WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
- "mb_reclaim()");
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__);
for (dp = domains; dp != NULL; dp = dp->dom_next)
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_drain != NULL)
(*pr->pr_drain)();
}
+
+/*
+ * Clean up after mbufs with M_EXT storage attached to them if the
+ * reference count hits 1.
+ */
+void
+mb_free_ext(struct mbuf *m)
+{
+ volatile u_int *refcnt;
+ struct mbuf *mref;
+ int freembuf;
+
+ KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
+
+ /* See if this is the mbuf that holds the embedded refcount. */
+ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+ refcnt = &m->m_ext.ext_count;
+ mref = m;
+ } else {
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ refcnt = m->m_ext.ext_cnt;
+ mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
+ }
+
+ /*
+ * Check if the header is embedded in the cluster. It is
+ * important that we can't touch any of the mbuf fields
+ * after we have freed the external storage, since mbuf
+ * could have been embedded in it. For now, the mbufs
+ * embedded into the cluster are always of type EXT_EXTREF,
+ * and for this type we won't free the mref.
+ */
+ if (m->m_flags & M_NOFREE) {
+ freembuf = 0;
+ KASSERT(m->m_ext.ext_type == EXT_EXTREF,
+ ("%s: no-free mbuf %p has wrong type", __func__, m));
+ } else
+ freembuf = 1;
+
+ /* Free attached storage if this mbuf is the only reference to it. */
+ if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) {
+ switch (m->m_ext.ext_type) {
+ case EXT_PACKET:
+ /* The packet zone is special. */
+ if (*refcnt == 0)
+ *refcnt = 1;
+ uma_zfree(zone_pack, mref);
+ break;
+ case EXT_CLUSTER:
+ uma_zfree(zone_clust, m->m_ext.ext_buf);
+ uma_zfree(zone_mbuf, mref);
+ break;
+ case EXT_JUMBOP:
+ uma_zfree(zone_jumbop, m->m_ext.ext_buf);
+ uma_zfree(zone_mbuf, mref);
+ break;
+ case EXT_JUMBO9:
+ uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
+ uma_zfree(zone_mbuf, mref);
+ break;
+ case EXT_JUMBO16:
+ uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
+ uma_zfree(zone_mbuf, mref);
+ break;
+#ifndef __rtems__
+ case EXT_SFBUF:
+ sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
+ uma_zfree(zone_mbuf, mref);
+ break;
+ case EXT_SFBUF_NOCACHE:
+ sf_ext_free_nocache(m->m_ext.ext_arg1,
+ m->m_ext.ext_arg2);
+ uma_zfree(zone_mbuf, mref);
+ break;
+#endif /* __rtems__ */
+ case EXT_NET_DRV:
+ case EXT_MOD_TYPE:
+ case EXT_DISPOSABLE:
+ KASSERT(m->m_ext.ext_free != NULL,
+ ("%s: ext_free not set", __func__));
+ (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1,
+ m->m_ext.ext_arg2);
+ uma_zfree(zone_mbuf, mref);
+ break;
+ case EXT_EXTREF:
+ KASSERT(m->m_ext.ext_free != NULL,
+ ("%s: ext_free not set", __func__));
+ (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1,
+ m->m_ext.ext_arg2);
+ break;
+ default:
+ KASSERT(m->m_ext.ext_type == 0,
+ ("%s: unknown ext_type", __func__));
+ }
+ }
+
+ if (freembuf && m != mref)
+ uma_zfree(zone_mbuf, m);
+}
+
+/*
+ * Official mbuf(9) allocation KPI for stack and drivers:
+ *
+ * m_get() - a single mbuf without any attachments, sys/mbuf.h.
+ * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h.
+ * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h.
+ * m_clget() - attach cluster to already allocated mbuf.
+ * m_cljget() - attach jumbo cluster to already allocated mbuf.
+ * m_get2() - allocate minimum mbuf that would fit size argument.
+ * m_getm2() - allocate a chain of mbufs/clusters.
+ * m_extadd() - attach external cluster to mbuf.
+ *
+ * m_free() - free single mbuf with its tags and ext, sys/mbuf.h.
+ * m_freem() - free chain of mbufs.
+ */
+
+int
+m_clget(struct mbuf *m, int how)
+{
+
+ KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT",
+ __func__, m));
+ m->m_ext.ext_buf = (char *)NULL;
+ uma_zalloc_arg(zone_clust, m, how);
+ /*
+ * On a cluster allocation failure, drain the packet zone and retry,
+ * we might be able to loosen a few clusters up on the drain.
+ */
+ if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
+ zone_drain(zone_pack);
+ uma_zalloc_arg(zone_clust, m, how);
+ }
+ MBUF_PROBE2(m__clget, m, how);
+ return (m->m_flags & M_EXT);
+}
+
+/*
+ * m_cljget() is different from m_clget() as it can allocate clusters without
+ * attaching them to an mbuf. In that case the return value is the pointer
+ * to the cluster of the requested size. If an mbuf was specified, it gets
+ * the cluster attached to it and the return value can be safely ignored.
+ * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
+ */
+void *
+m_cljget(struct mbuf *m, int how, int size)
+{
+ uma_zone_t zone;
+ void *retval;
+
+ if (m != NULL) {
+ KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT",
+ __func__, m));
+ m->m_ext.ext_buf = NULL;
+ }
+
+ zone = m_getzone(size);
+ retval = uma_zalloc_arg(zone, m, how);
+
+ MBUF_PROBE4(m__cljget, m, how, size, retval);
+
+ return (retval);
+}
+
+/*
+ * m_get2() allocates minimum mbuf that would fit "size" argument.
+ */
+struct mbuf *
+m_get2(int size, int how, short type, int flags)
+{
+ struct mb_args args;
+ struct mbuf *m, *n;
+
+ args.flags = flags;
+ args.type = type;
+
+ if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0))
+ return (uma_zalloc_arg(zone_mbuf, &args, how));
+ if (size <= MCLBYTES)
+ return (uma_zalloc_arg(zone_pack, &args, how));
+
+ if (size > MJUMPAGESIZE)
+ return (NULL);
+
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ if (m == NULL)
+ return (NULL);
+
+ n = uma_zalloc_arg(zone_jumbop, m, how);
+ if (n == NULL) {
+ uma_zfree(zone_mbuf, m);
+ return (NULL);
+ }
+
+ return (m);
+}
+
+/*
+ * m_getjcl() returns an mbuf with a cluster of the specified size attached.
+ * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
+ */
+struct mbuf *
+m_getjcl(int how, short type, int flags, int size)
+{
+ struct mb_args args;
+ struct mbuf *m, *n;
+ uma_zone_t zone;
+
+ if (size == MCLBYTES)
+ return m_getcl(how, type, flags);
+
+ args.flags = flags;
+ args.type = type;
+
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ if (m == NULL)
+ return (NULL);
+
+ zone = m_getzone(size);
+ n = uma_zalloc_arg(zone, m, how);
+ if (n == NULL) {
+ uma_zfree(zone_mbuf, m);
+ return (NULL);
+ }
+ return (m);
+}
+
+/*
+ * Allocate a given length worth of mbufs and/or clusters (whatever fits
+ * best) and return a pointer to the top of the allocated chain. If an
+ * existing mbuf chain is provided, then we will append the new chain
+ * to the existing one but still return the top of the newly allocated
+ * chain.
+ */
+struct mbuf *
+m_getm2(struct mbuf *m, int len, int how, short type, int flags)
+{
+ struct mbuf *mb, *nm = NULL, *mtail = NULL;
+
+ KASSERT(len >= 0, ("%s: len is < 0", __func__));
+
+ /* Validate flags. */
+ flags &= (M_PKTHDR | M_EOR);
+
+ /* Packet header mbuf must be first in chain. */
+ if ((flags & M_PKTHDR) && m != NULL)
+ flags &= ~M_PKTHDR;
+
+ /* Loop and append maximum sized mbufs to the chain tail. */
+ while (len > 0) {
+ if (len > MCLBYTES)
+ mb = m_getjcl(how, type, (flags & M_PKTHDR),
+ MJUMPAGESIZE);
+ else if (len >= MINCLSIZE)
+ mb = m_getcl(how, type, (flags & M_PKTHDR));
+ else if (flags & M_PKTHDR)
+ mb = m_gethdr(how, type);
+ else
+ mb = m_get(how, type);
+
+ /* Fail the whole operation if one mbuf can't be allocated. */
+ if (mb == NULL) {
+ if (nm != NULL)
+ m_freem(nm);
+ return (NULL);
+ }
+
+ /* Book keeping. */
+ len -= M_SIZE(mb);
+ if (mtail != NULL)
+ mtail->m_next = mb;
+ else
+ nm = mb;
+ mtail = mb;
+ flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */
+ }
+ if (flags & M_EOR)
+ mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */
+
+ /* If mbuf was supplied, append new chain to the end of it. */
+ if (m != NULL) {
+ for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next)
+ ;
+ mtail->m_next = nm;
+ mtail->m_flags &= ~M_EOR;
+ } else
+ m = nm;
+
+ return (m);
+}
+
+/*-
+ * Configure a provided mbuf to refer to the provided external storage
+ * buffer and setup a reference count for said buffer.
+ *
+ * Arguments:
+ * mb The existing mbuf to which to attach the provided buffer.
+ * buf The address of the provided external storage buffer.
+ * size The size of the provided buffer.
+ * freef A pointer to a routine that is responsible for freeing the
+ * provided external storage buffer.
+ * args A pointer to an argument structure (of any type) to be passed
+ * to the provided freef routine (may be NULL).
+ * flags Any other flags to be passed to the provided mbuf.
+ * type The type that the external storage buffer should be
+ * labeled with.
+ *
+ * Returns:
+ * Nothing.
+ */
+void
+m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
+ void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2,
+ int flags, int type)
+{
+
+ KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
+
+ mb->m_flags |= (M_EXT | flags);
+ mb->m_ext.ext_buf = buf;
+ mb->m_data = mb->m_ext.ext_buf;
+ mb->m_ext.ext_size = size;
+ mb->m_ext.ext_free = freef;
+ mb->m_ext.ext_arg1 = arg1;
+ mb->m_ext.ext_arg2 = arg2;
+ mb->m_ext.ext_type = type;
+
+ if (type != EXT_EXTREF) {
+ mb->m_ext.ext_count = 1;
+ mb->m_ext.ext_flags = EXT_FLAG_EMBREF;
+ } else
+ mb->m_ext.ext_flags = 0;
+}
+
+/*
+ * Free an entire chain of mbufs and associated external buffers, if
+ * applicable.
+ */
+void
+m_freem(struct mbuf *mb)
+{
+
+ MBUF_PROBE1(m__freem, mb);
+ while (mb != NULL)
+ mb = m_free(mb);
+}
diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c
index d1494fbf..aa1c5774 100644
--- a/freebsd/sys/kern/kern_mib.c
+++ b/freebsd/sys/kern/kern_mib.c
@@ -45,63 +45,61 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_config.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
-#include <sys/sbuf.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-#include <sys/proc.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
-#include <sys/jail.h>
+#include <sys/proc.h>
+#include <sys/random.h>
+#include <sys/sbuf.h>
#include <sys/smp.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
#include <rtems/bsd/sys/unistd.h>
-SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(0, sysctl, CTLFLAG_RW, 0,
"Sysctl internal magic");
-SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW|CTLFLAG_CAPRD, 0,
+SYSCTL_ROOT_NODE(CTL_KERN, kern, CTLFLAG_RW|CTLFLAG_CAPRD, 0,
"High kernel, proc, limits &c");
-SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_VM, vm, CTLFLAG_RW, 0,
"Virtual memory");
#ifndef __rtems__
-SYSCTL_NODE(, CTL_VFS, vfs, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_VFS, vfs, CTLFLAG_RW, 0,
"File system");
#endif /* __rtems__ */
-SYSCTL_NODE(, CTL_NET, net, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_NET, net, CTLFLAG_RW, 0,
"Network, (see socket.h)");
-SYSCTL_NODE(, CTL_DEBUG, debug, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_DEBUG, debug, CTLFLAG_RW, 0,
"Debugging");
#ifndef __rtems__
SYSCTL_NODE(_debug, OID_AUTO, sizeof, CTLFLAG_RW, 0,
"Sizeof various things");
#endif /* __rtems__ */
-SYSCTL_NODE(, CTL_HW, hw, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_HW, hw, CTLFLAG_RW, 0,
"hardware");
#ifndef __rtems__
-SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
"machine dependent");
-SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_USER, user, CTLFLAG_RW, 0,
"user-level");
-SYSCTL_NODE(, CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0,
"p1003_1b, (see p1003_1b.h)");
-SYSCTL_NODE(, OID_AUTO, compat, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO, compat, CTLFLAG_RW, 0,
"Compatibility code");
#endif /* __rtems__ */
-SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO, security, CTLFLAG_RW, 0,
"Security");
#ifndef __rtems__
#ifdef REGRESSION
-SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO, regression, CTLFLAG_RW, 0,
"Regression test MIB");
#endif
SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE,
kern_ident, 0, "Kernel identifier");
-SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE|
- CTLFLAG_CAPRD, osrelease, 0, "Operating system release");
-
SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD|CTLFLAG_CAPRD,
SYSCTL_NULL_INT_PTR, BSD, "Operating system revision");
@@ -114,20 +112,13 @@ SYSCTL_STRING(_kern, OID_AUTO, compiler_version, CTLFLAG_RD|CTLFLAG_MPSAFE,
SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE|
CTLFLAG_CAPRD, ostype, 0, "Operating system type");
-/*
- * NOTICE: The *userland* release date is available in
- * /usr/include/osreldate.h
- */
-SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD|CTLFLAG_CAPRD,
- &osreldate, 0, "Kernel release date");
-
-SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN,
+SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&maxproc, 0, "Maximum number of processes");
SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW,
&maxprocperuid, 0, "Maximum processes allowed per userid");
-SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RDTUN,
+SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&maxusers, 0, "Hint for kernel tuning");
SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD|CTLFLAG_CAPRD,
@@ -136,8 +127,8 @@ SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD|CTLFLAG_CAPRD,
SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD|CTLFLAG_CAPRD,
SYSCTL_NULL_INT_PTR, _POSIX_VERSION, "Version of POSIX attempting to comply to");
-SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
- &ngroups_max, 0,
+SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN |
+ CTLFLAG_NOFETCH | CTLFLAG_CAPRD, &ngroups_max, 0,
"Maximum number of supplemental groups a user can belong to");
SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD|CTLFLAG_CAPRD,
@@ -171,10 +162,15 @@ sysctl_kern_arnd(SYSCTL_HANDLER_ARGS)
char buf[256];
size_t len;
- len = req->oldlen;
- if (len > sizeof(buf))
- len = sizeof(buf);
- arc4rand(buf, len, 0);
+ /*-
+ * This is one of the very few legitimate uses of read_random(9).
+ * Use of arc4random(9) is not recommended as that will ignore
+ * an unsafe (i.e. unseeded) random(4).
+ *
+ * If random(4) is not seeded, then this returns 0, so the
+ * sysctl will return a zero-length buffer.
+ */
+ len = read_random(buf, MIN(req->oldlen, sizeof(buf)));
return (SYSCTL_OUT(req, buf, len));
}
@@ -208,7 +204,7 @@ sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
{
u_long val;
- val = ctob(physmem - cnt.v_wire_count);
+ val = ctob(physmem - vm_cnt.v_wire_count);
return (sysctl_handle_long(oidp, &val, 0, req));
}
@@ -411,15 +407,8 @@ SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel,
/* Actual kernel configuration options. */
extern char kernconfstring[];
-static int
-sysctl_kern_config(SYSCTL_HANDLER_ARGS)
-{
- return (sysctl_handle_string(oidp, kernconfstring,
- strlen(kernconfstring), req));
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW,
- 0, 0, sysctl_kern_config, "", "Kernel configuration file");
+SYSCTL_STRING(_kern, OID_AUTO, conftxt, CTLFLAG_RD, kernconfstring, 0,
+ "Kernel configuration file");
#endif
static int
@@ -457,8 +446,50 @@ sysctl_hostid(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_kern, KERN_HOSTID, hostid,
- CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
NULL, 0, sysctl_hostid, "LU", "Host ID");
+
+/*
+ * The osrelease string is copied from the global (osrelease in vers.c) into
+ * prison0 by a sysinit and is inherited by child jails if not changed at jail
+ * creation, so we always return the copy from the current prison data.
+ */
+static int
+sysctl_osrelease(SYSCTL_HANDLER_ARGS)
+{
+ struct prison *pr;
+
+ pr = req->td->td_ucred->cr_prison;
+ return (SYSCTL_OUT(req, pr->pr_osrelease, strlen(pr->pr_osrelease) + 1));
+
+}
+
+SYSCTL_PROC(_kern, KERN_OSRELEASE, osrelease,
+ CTLTYPE_STRING | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_osrelease, "A", "Operating system release");
+
+/*
+ * The osreldate number is copied from the global (osreldate in vers.c) into
+ * prison0 by a sysinit and is inherited by child jails if not changed at jail
+ * creation, so we always return the value from the current prison data.
+ */
+static int
+sysctl_osreldate(SYSCTL_HANDLER_ARGS)
+{
+ struct prison *pr;
+
+ pr = req->td->td_ucred->cr_prison;
+ return (SYSCTL_OUT(req, &pr->pr_osreldate, sizeof(pr->pr_osreldate)));
+
+}
+
+/*
+ * NOTICE: The *userland* release date is available in
+ * /usr/include/osreldate.h
+ */
+SYSCTL_PROC(_kern, KERN_OSRELDATE, osreldate,
+ CTLTYPE_INT | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_osreldate, "I", "Kernel release date");
#endif /* __rtems__ */
SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features");
@@ -560,9 +591,9 @@ sysctl_kern_pid_max(SYSCTL_HANDLER_ARGS)
sx_xunlock(&proctree_lock);
return (error);
}
-SYSCTL_PROC(_kern, OID_AUTO, pid_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_TUN |
- CTLFLAG_MPSAFE, 0, 0, sysctl_kern_pid_max, "I",
- "Maximum allowed pid");
+SYSCTL_PROC(_kern, OID_AUTO, pid_max, CTLTYPE_INT |
+ CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_kern_pid_max, "I", "Maximum allowed pid");
#include <sys/bio.h>
#include <sys/buf.h>
@@ -575,6 +606,11 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD,
SYSCTL_INT(_debug_sizeof, OID_AUTO, kinfo_proc, CTLFLAG_RD,
SYSCTL_NULL_INT_PTR, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)");
+/* Used by kernel debuggers. */
+const int pcb_size = sizeof(struct pcb);
+SYSCTL_INT(_debug_sizeof, OID_AUTO, pcb, CTLFLAG_RD,
+ SYSCTL_NULL_INT_PTR, sizeof(struct pcb), "sizeof(struct pcb)");
+
/* XXX compatibility, remove for 6.0 */
#include <sys/imgact.h>
#include <sys/imgact_elf.h>
diff --git a/freebsd/sys/kern/kern_module.c b/freebsd/sys/kern/kern_module.c
index 72c9d99d..fb910e3c 100644
--- a/freebsd/sys/kern/kern_module.c
+++ b/freebsd/sys/kern/kern_module.c
@@ -144,7 +144,7 @@ module_register_init(const void *arg)
MOD_XLOCK;
if (mod->file) {
/*
- * Once a module is succesfully loaded, move
+ * Once a module is successfully loaded, move
* it to the head of the module list for this
* linker file. This resorts the list so that
* when the kernel linker iterates over the
@@ -170,16 +170,14 @@ module_register(const moduledata_t *data, linker_file_t container)
newmod = module_lookupbyname(data->name);
if (newmod != NULL) {
MOD_XUNLOCK;
- printf("module_register: module %s already exists!\n",
- data->name);
+#ifndef __rtems__
+ printf("%s: cannot register %s from %s; already loaded from %s\n",
+ __func__, data->name, container->filename, newmod->file->filename);
+#endif /* __rtems__ */
return (EEXIST);
}
namelen = strlen(data->name) + 1;
newmod = malloc(sizeof(struct module) + namelen, M_MODULE, M_WAITOK);
- if (newmod == NULL) {
- MOD_XUNLOCK;
- return (ENOMEM);
- }
#ifndef __rtems__
newmod->refs = 1;
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/kern_mtxpool.c b/freebsd/sys/kern/kern_mtxpool.c
index df1b1132..e778ec31 100644
--- a/freebsd/sys/kern/kern_mtxpool.c
+++ b/freebsd/sys/kern/kern_mtxpool.c
@@ -41,7 +41,7 @@
*
* Disadvantages:
* - should generally only be used as leaf mutexes.
- * - pool/pool dependancy ordering cannot be depended on.
+ * - pool/pool dependency ordering cannot be depended on.
* - possible L1 cache mastersip contention between cpus.
*/
@@ -61,9 +61,6 @@ __FBSDID("$FreeBSD$");
static MALLOC_DEFINE(M_MTXPOOL, "mtx_pool", "mutex pool");
/* Pool sizes must be a power of two */
-#ifndef MTX_POOL_LOCKBUILDER_SIZE
-#define MTX_POOL_LOCKBUILDER_SIZE 128
-#endif
#ifndef MTX_POOL_SLEEP_SIZE
#define MTX_POOL_SLEEP_SIZE 128
#endif
@@ -80,13 +77,6 @@ struct mtx_pool {
struct mtx mtx_pool_ary[1];
};
-#ifndef __rtems__
-static struct mtx_pool_lockbuilder {
- struct mtxpool_header mtx_pool_header;
- struct mtx mtx_pool_ary[MTX_POOL_LOCKBUILDER_SIZE];
-} lockbuilder_pool;
-#endif /* __rtems__ */
-
#define mtx_pool_size mtx_pool_header.mtxpool_size
#define mtx_pool_mask mtx_pool_header.mtxpool_mask
#define mtx_pool_shift mtx_pool_header.mtxpool_shift
@@ -94,7 +84,6 @@ static struct mtx_pool_lockbuilder {
#ifndef __rtems__
struct mtx_pool *mtxpool_sleep;
-struct mtx_pool *mtxpool_lockbuilder;
#endif /* __rtems__ */
#if UINTPTR_MAX == UINT64_MAX /* 64 bits */
@@ -173,15 +162,6 @@ mtx_pool_destroy(struct mtx_pool **poolp)
#ifndef __rtems__
static void
-mtx_pool_setup_static(void *dummy __unused)
-{
- mtx_pool_initialize((struct mtx_pool *)&lockbuilder_pool,
- "lockbuilder mtxpool", MTX_POOL_LOCKBUILDER_SIZE,
- MTX_DEF | MTX_NOWITNESS | MTX_QUIET);
- mtxpool_lockbuilder = (struct mtx_pool *)&lockbuilder_pool;
-}
-
-static void
mtx_pool_setup_dynamic(void *dummy __unused)
{
mtxpool_sleep = mtx_pool_create("sleep mtxpool",
@@ -211,18 +191,6 @@ mtx_pool_alloc(struct mtx_pool *pool)
}
#ifndef __rtems__
-/*
- * The lockbuilder pool must be initialized early because the lockmgr
- * and sx locks depend on it. The sx locks are used in the kernel
- * memory allocator. The lockmgr subsystem is initialized by
- * SYSINIT(..., SI_SUB_LOCKMGR, ...).
- *
- * We can't call malloc() to dynamically allocate the sleep pool
- * until after kmeminit() has been called, which is done by
- * SYSINIT(..., SI_SUB_KMEM, ...).
- */
-SYSINIT(mtxpooli1, SI_SUB_MTX_POOL_STATIC, SI_ORDER_FIRST,
- mtx_pool_setup_static, NULL);
SYSINIT(mtxpooli2, SI_SUB_MTX_POOL_DYNAMIC, SI_ORDER_FIRST,
mtx_pool_setup_dynamic, NULL);
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/kern_osd.c b/freebsd/sys/kern/kern_osd.c
index 8461648b..71dd6289 100644
--- a/freebsd/sys/kern/kern_osd.c
+++ b/freebsd/sys/kern/kern_osd.c
@@ -46,11 +46,27 @@ __FBSDID("$FreeBSD$");
/* OSD (Object Specific Data) */
+/*
+ * Lock key:
+ * (m) osd_module_lock
+ * (o) osd_object_lock
+ * (l) osd_list_lock
+ */
+struct osd_master {
+ struct sx osd_module_lock;
+ struct rmlock osd_object_lock;
+ struct mtx osd_list_lock;
+ LIST_HEAD(, osd) osd_list; /* (l) */
+ osd_destructor_t *osd_destructors; /* (o) */
+ osd_method_t *osd_methods; /* (m) */
+ u_int osd_ntslots; /* (m) */
+ const u_int osd_nmethods;
+};
+
static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data");
static int osd_debug = 0;
-TUNABLE_INT("debug.osd", &osd_debug);
-SYSCTL_INT(_debug, OID_AUTO, osd, CTLFLAG_RW, &osd_debug, 0, "OSD debug level");
+SYSCTL_INT(_debug, OID_AUTO, osd, CTLFLAG_RWTUN, &osd_debug, 0, "OSD debug level");
#define OSD_DEBUG(...) do { \
if (osd_debug) { \
@@ -64,25 +80,12 @@ static void do_osd_del(u_int type, struct osd *osd, u_int slot,
int list_locked);
/*
- * Lists of objects with OSD.
- *
- * Lock key:
- * (m) osd_module_lock
- * (o) osd_object_lock
- * (l) osd_list_lock
+ * List of objects with OSD.
*/
-static LIST_HEAD(, osd) osd_list[OSD_LAST + 1]; /* (m) */
-static osd_method_t *osd_methods[OSD_LAST + 1]; /* (m) */
-static u_int osd_nslots[OSD_LAST + 1]; /* (m) */
-static osd_destructor_t *osd_destructors[OSD_LAST + 1]; /* (o) */
-static const u_int osd_nmethods[OSD_LAST + 1] = {
- [OSD_JAIL] = PR_MAXMETHOD,
+struct osd_master osdm[OSD_LAST + 1] = {
+ [OSD_JAIL] = { .osd_nmethods = PR_MAXMETHOD },
};
-static struct sx osd_module_lock[OSD_LAST + 1];
-static struct rmlock osd_object_lock[OSD_LAST + 1];
-static struct mtx osd_list_lock[OSD_LAST + 1];
-
static void
osd_default_destructor(void *value __unused)
{
@@ -104,12 +107,12 @@ osd_register(u_int type, osd_destructor_t destructor, osd_method_t *methods)
if (destructor == NULL)
destructor = osd_default_destructor;
- sx_xlock(&osd_module_lock[type]);
+ sx_xlock(&osdm[type].osd_module_lock);
/*
* First, we try to find unused slot.
*/
- for (i = 0; i < osd_nslots[type]; i++) {
- if (osd_destructors[type][i] == NULL) {
+ for (i = 0; i < osdm[type].osd_ntslots; i++) {
+ if (osdm[type].osd_destructors[i] == NULL) {
OSD_DEBUG("Unused slot found (type=%u, slot=%u).",
type, i);
break;
@@ -118,31 +121,31 @@ osd_register(u_int type, osd_destructor_t destructor, osd_method_t *methods)
/*
* If no unused slot was found, allocate one.
*/
- if (i == osd_nslots[type]) {
- osd_nslots[type]++;
- if (osd_nmethods[type] != 0)
- osd_methods[type] = realloc(osd_methods[type],
- sizeof(osd_method_t) * osd_nslots[type] *
- osd_nmethods[type], M_OSD, M_WAITOK);
- newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type],
- M_OSD, M_WAITOK);
- rm_wlock(&osd_object_lock[type]);
- bcopy(osd_destructors[type], newptr,
+ if (i == osdm[type].osd_ntslots) {
+ osdm[type].osd_ntslots++;
+ if (osdm[type].osd_nmethods != 0)
+ osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+ sizeof(osd_method_t) * osdm[type].osd_ntslots *
+ osdm[type].osd_nmethods, M_OSD, M_WAITOK);
+ newptr = malloc(sizeof(osd_destructor_t) *
+ osdm[type].osd_ntslots, M_OSD, M_WAITOK);
+ rm_wlock(&osdm[type].osd_object_lock);
+ bcopy(osdm[type].osd_destructors, newptr,
sizeof(osd_destructor_t) * i);
- free(osd_destructors[type], M_OSD);
- osd_destructors[type] = newptr;
- rm_wunlock(&osd_object_lock[type]);
+ free(osdm[type].osd_destructors, M_OSD);
+ osdm[type].osd_destructors = newptr;
+ rm_wunlock(&osdm[type].osd_object_lock);
OSD_DEBUG("New slot allocated (type=%u, slot=%u).",
type, i + 1);
}
- osd_destructors[type][i] = destructor;
- if (osd_nmethods[type] != 0) {
- for (m = 0; m < osd_nmethods[type]; m++)
- osd_methods[type][i * osd_nmethods[type] + m] =
- methods != NULL ? methods[m] : NULL;
+ osdm[type].osd_destructors[i] = destructor;
+ if (osdm[type].osd_nmethods != 0) {
+ for (m = 0; m < osdm[type].osd_nmethods; m++)
+ osdm[type].osd_methods[i * osdm[type].osd_nmethods + m]
+ = methods != NULL ? methods[m] : NULL;
}
- sx_xunlock(&osd_module_lock[type]);
+ sx_xunlock(&osdm[type].osd_module_lock);
return (i + 1);
}
@@ -153,105 +156,142 @@ osd_deregister(u_int type, u_int slot)
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- sx_xlock(&osd_module_lock[type]);
- rm_wlock(&osd_object_lock[type]);
+ sx_xlock(&osdm[type].osd_module_lock);
+ rm_wlock(&osdm[type].osd_object_lock);
/*
* Free all OSD for the given slot.
*/
- mtx_lock(&osd_list_lock[type]);
- LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd)
+ mtx_lock(&osdm[type].osd_list_lock);
+ LIST_FOREACH_SAFE(osd, &osdm[type].osd_list, osd_next, tosd)
do_osd_del(type, osd, slot, 1);
- mtx_unlock(&osd_list_lock[type]);
+ mtx_unlock(&osdm[type].osd_list_lock);
/*
* Set destructor to NULL to free the slot.
*/
- osd_destructors[type][slot - 1] = NULL;
- if (slot == osd_nslots[type]) {
- osd_nslots[type]--;
- osd_destructors[type] = realloc(osd_destructors[type],
- sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+ osdm[type].osd_destructors[slot - 1] = NULL;
+ if (slot == osdm[type].osd_ntslots) {
+ osdm[type].osd_ntslots--;
+ osdm[type].osd_destructors = realloc(osdm[type].osd_destructors,
+ sizeof(osd_destructor_t) * osdm[type].osd_ntslots, M_OSD,
M_NOWAIT | M_ZERO);
- if (osd_nmethods[type] != 0)
- osd_methods[type] = realloc(osd_methods[type],
- sizeof(osd_method_t) * osd_nslots[type] *
- osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO);
+ if (osdm[type].osd_nmethods != 0)
+ osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+ sizeof(osd_method_t) * osdm[type].osd_ntslots *
+ osdm[type].osd_nmethods, M_OSD, M_NOWAIT | M_ZERO);
/*
* We always reallocate to smaller size, so we assume it will
* always succeed.
*/
- KASSERT(osd_destructors[type] != NULL &&
- (osd_nmethods[type] == 0 || osd_methods[type] != NULL),
- ("realloc() failed"));
+ KASSERT(osdm[type].osd_destructors != NULL &&
+ (osdm[type].osd_nmethods == 0 ||
+ osdm[type].osd_methods != NULL), ("realloc() failed"));
OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).",
type, slot);
} else {
OSD_DEBUG("Slot deregistration (type=%u, slot=%u).",
type, slot);
}
- rm_wunlock(&osd_object_lock[type]);
- sx_xunlock(&osd_module_lock[type]);
+ rm_wunlock(&osdm[type].osd_object_lock);
+ sx_xunlock(&osdm[type].osd_module_lock);
}
int
osd_set(u_int type, struct osd *osd, u_int slot, void *value)
{
+
+ return (osd_set_reserved(type, osd, slot, NULL, value));
+}
+
+void **
+osd_reserve(u_int slot)
+{
+
+ KASSERT(slot > 0, ("Invalid slot."));
+
+ OSD_DEBUG("Reserving slot array (slot=%u).", slot);
+ return (malloc(sizeof(void *) * slot, M_OSD, M_WAITOK | M_ZERO));
+}
+
+int
+osd_set_reserved(u_int type, struct osd *osd, u_int slot, void **rsv,
+ void *value)
+{
struct rm_priotracker tracker;
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
if (slot > osd->osd_nslots) {
+ void **newptr;
+
if (value == NULL) {
OSD_DEBUG(
"Not allocating null slot (type=%u, slot=%u).",
type, slot);
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
+ if (rsv)
+ osd_free_reserved(rsv);
return (0);
- } else if (osd->osd_nslots == 0) {
+ }
+
+ /*
+ * Too few slots allocated here, so we need to extend or create
+ * the array.
+ */
+ if (rsv) {
/*
- * First OSD for this object, so we need to allocate
- * space and put it onto the list.
+ * Use the reserve passed in (assumed to be
+ * the right size).
*/
- osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD,
- M_NOWAIT | M_ZERO);
- if (osd->osd_slots == NULL) {
- rm_runlock(&osd_object_lock[type], &tracker);
- return (ENOMEM);
+ newptr = rsv;
+ if (osd->osd_nslots != 0) {
+ memcpy(newptr, osd->osd_slots,
+ sizeof(void *) * osd->osd_nslots);
+ free(osd->osd_slots, M_OSD);
}
- osd->osd_nslots = slot;
- mtx_lock(&osd_list_lock[type]);
- LIST_INSERT_HEAD(&osd_list[type], osd, osd_next);
- mtx_unlock(&osd_list_lock[type]);
- OSD_DEBUG("Setting first slot (type=%u).", type);
} else {
- void *newptr;
-
- /*
- * Too few slots allocated here, needs to extend
- * the array.
- */
newptr = realloc(osd->osd_slots, sizeof(void *) * slot,
M_OSD, M_NOWAIT | M_ZERO);
if (newptr == NULL) {
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock,
+ &tracker);
return (ENOMEM);
}
- osd->osd_slots = newptr;
- osd->osd_nslots = slot;
- OSD_DEBUG("Growing slots array (type=%u).", type);
}
- }
+ if (osd->osd_nslots == 0) {
+ /*
+ * First OSD for this object, so we need to put it
+ * onto the list.
+ */
+ mtx_lock(&osdm[type].osd_list_lock);
+ LIST_INSERT_HEAD(&osdm[type].osd_list, osd, osd_next);
+ mtx_unlock(&osdm[type].osd_list_lock);
+ OSD_DEBUG("Setting first slot (type=%u).", type);
+ } else
+ OSD_DEBUG("Growing slots array (type=%u).", type);
+ osd->osd_slots = newptr;
+ osd->osd_nslots = slot;
+ } else if (rsv)
+ osd_free_reserved(rsv);
OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type,
slot, value);
osd->osd_slots[slot - 1] = value;
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
return (0);
}
+void
+osd_free_reserved(void **rsv)
+{
+
+ OSD_DEBUG("Discarding reserved slot array.");
+ free(rsv, M_OSD);
+}
+
void *
osd_get(u_int type, struct osd *osd, u_int slot)
{
@@ -260,9 +300,9 @@ osd_get(u_int type, struct osd *osd, u_int slot)
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
if (slot > osd->osd_nslots) {
value = NULL;
OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
@@ -271,7 +311,7 @@ osd_get(u_int type, struct osd *osd, u_int slot)
OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).",
type, slot, value);
}
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
return (value);
}
@@ -280,9 +320,9 @@ osd_del(u_int type, struct osd *osd, u_int slot)
{
struct rm_priotracker tracker;
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
do_osd_del(type, osd, slot, 0);
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
}
static void
@@ -292,7 +332,7 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked)
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot);
@@ -301,7 +341,7 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked)
return;
}
if (osd->osd_slots[slot - 1] != NULL) {
- osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]);
+ osdm[type].osd_destructors[slot - 1](osd->osd_slots[slot - 1]);
osd->osd_slots[slot - 1] = NULL;
}
for (i = osd->osd_nslots - 1; i >= 0; i--) {
@@ -315,10 +355,10 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked)
/* No values left for this object. */
OSD_DEBUG("No more slots left (type=%u).", type);
if (!list_locked)
- mtx_lock(&osd_list_lock[type]);
+ mtx_lock(&osdm[type].osd_list_lock);
LIST_REMOVE(osd, osd_next);
if (!list_locked)
- mtx_unlock(&osd_list_lock[type]);
+ mtx_unlock(&osdm[type].osd_list_lock);
free(osd->osd_slots, M_OSD);
osd->osd_slots = NULL;
osd->osd_nslots = 0;
@@ -344,21 +384,21 @@ osd_call(u_int type, u_int method, void *obj, void *data)
int error, i;
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
- KASSERT(method < osd_nmethods[type], ("Invalid method."));
+ KASSERT(method < osdm[type].osd_nmethods, ("Invalid method."));
/*
* Call this method for every slot that defines it, stopping if an
* error is encountered.
*/
error = 0;
- sx_slock(&osd_module_lock[type]);
- for (i = 0; i < osd_nslots[type]; i++) {
- methodfun =
- osd_methods[type][i * osd_nmethods[type] + method];
+ sx_slock(&osdm[type].osd_module_lock);
+ for (i = 0; i < osdm[type].osd_ntslots; i++) {
+ methodfun = osdm[type].osd_methods[i * osdm[type].osd_nmethods +
+ method];
if (methodfun != NULL && (error = methodfun(obj, data)) != 0)
break;
}
- sx_sunlock(&osd_module_lock[type]);
+ sx_sunlock(&osdm[type].osd_module_lock);
return (error);
}
@@ -376,14 +416,14 @@ osd_exit(u_int type, struct osd *osd)
return;
}
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
for (i = 1; i <= osd->osd_nslots; i++) {
- if (osd_destructors[type][i - 1] != NULL)
+ if (osdm[type].osd_destructors[i - 1] != NULL)
do_osd_del(type, osd, i, 0);
else
OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i);
}
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
OSD_DEBUG("Object exit (type=%u).", type);
}
@@ -393,13 +433,13 @@ osd_init(void *arg __unused)
u_int i;
for (i = OSD_FIRST; i <= OSD_LAST; i++) {
- osd_nslots[i] = 0;
- LIST_INIT(&osd_list[i]);
- sx_init(&osd_module_lock[i], "osd_module");
- rm_init(&osd_object_lock[i], "osd_object");
- mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF);
- osd_destructors[i] = NULL;
- osd_methods[i] = NULL;
+ sx_init(&osdm[i].osd_module_lock, "osd_module");
+ rm_init(&osdm[i].osd_object_lock, "osd_object");
+ mtx_init(&osdm[i].osd_list_lock, "osd_list", NULL, MTX_DEF);
+ LIST_INIT(&osdm[i].osd_list);
+ osdm[i].osd_destructors = NULL;
+ osdm[i].osd_ntslots = 0;
+ osdm[i].osd_methods = NULL;
}
}
SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL);
diff --git a/freebsd/sys/kern/kern_synch.c b/freebsd/sys/kern/kern_synch.c
index 2824c9a9..6ecedfd2 100644
--- a/freebsd/sys/kern/kern_synch.c
+++ b/freebsd/sys/kern/kern_synch.c
@@ -39,7 +39,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_kdtrace.h>
#include <rtems/bsd/local/opt_ktrace.h>
#include <rtems/bsd/local/opt_sched.h>
@@ -69,12 +68,6 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
-#ifdef XEN
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
-
#define KTDSTATE(td) \
(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \
((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \
@@ -89,7 +82,7 @@ SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
int hogticks;
#endif /* __rtems__ */
-static int pause_wchan;
+static uint8_t pause_wchan[MAXCPU];
#ifndef __rtems__
static struct callout loadav_callout;
@@ -113,21 +106,10 @@ static void loadav(void *arg);
SDT_PROVIDER_DECLARE(sched);
SDT_PROBE_DEFINE(sched, , , preempt);
-
-/*
- * These probes reference Solaris features that are not implemented in FreeBSD.
- * Create the probes anyway for compatibility with existing D scripts; they'll
- * just never fire.
- */
-SDT_PROBE_DEFINE(sched, , , cpucaps__sleep);
-SDT_PROBE_DEFINE(sched, , , cpucaps__wakeup);
-SDT_PROBE_DEFINE(sched, , , schedctl__nopreempt);
-SDT_PROBE_DEFINE(sched, , , schedctl__preempt);
-SDT_PROBE_DEFINE(sched, , , schedctl__yield);
#endif /* __rtems__ */
-void
-sleepinit(void)
+static void
+sleepinit(void *unused)
{
#ifndef __rtems__
@@ -137,13 +119,19 @@ sleepinit(void)
}
/*
+ * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure
+ * it is available.
+ */
+SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, 0);
+
+/*
* General sleep call. Suspends the current thread until a wakeup is
* performed on the specified identifier. The thread will then be made
- * runnable with the specified priority. Sleeps at most timo/hz seconds
- * (0 means no timeout). If pri includes PCATCH flag, signals are checked
- * before and after sleeping, else signals are not checked. Returns 0 if
+ * runnable with the specified priority. Sleeps at most sbt units of time
+ * (0 means no timeout). If pri includes the PCATCH flag, let signals
+ * interrupt the sleep, otherwise ignore them while sleeping. Returns 0 if
* awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
- * signal needs to be delivered, ERESTART is returned if the current system
+ * signal becomes pending, ERESTART is returned if the current system
* call should be restarted if possible, and EINTR is returned if the system
* call should be interrupted by the signal (return EINTR).
*
@@ -153,18 +141,15 @@ sleepinit(void)
*/
int
_sleep(void *ident, struct lock_object *lock, int priority,
- const char *wmesg, int timo)
+ const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
{
struct thread *td;
#ifndef __rtems__
struct proc *p;
#endif /* __rtems__ */
struct lock_class *class;
-#ifndef __rtems__
- int catch, flags, lock_state, pri, rval;
-#else /* __rtems__ */
- int flags, lock_state, pri, rval;
-#endif /* __rtems__ */
+ uintptr_t lock_state;
+ int catch, pri, rval, sleepq_flags;
WITNESS_SAVE_DECL(lock_witness);
td = curthread;
@@ -177,7 +162,7 @@ _sleep(void *ident, struct lock_object *lock, int priority,
#endif
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
"Sleeping on \"%s\"", wmesg);
- KASSERT(timo != 0 || mtx_owned(&Giant) || lock != NULL,
+ KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL,
("sleeping without a lock"));
#ifndef __rtems__
KASSERT(p != NULL, ("msleep1"));
@@ -194,15 +179,7 @@ _sleep(void *ident, struct lock_object *lock, int priority,
class = NULL;
#ifndef __rtems__
- if (cold || SCHEDULER_STOPPED()) {
- /*
- * During autoconfiguration, just return;
- * don't run any other threads or panic below,
- * in case this is the idle thread and already asleep.
- * XXX: this used to do "s = splhigh(); splx(safepri);
- * splx(s);" to give interrupts a chance, but there is
- * no way to give interrupts a chance now.
- */
+ if (SCHEDULER_STOPPED()) {
if (lock != NULL && priority & PDROP)
class->lc_unlock(lock);
return (0);
@@ -210,6 +187,7 @@ _sleep(void *ident, struct lock_object *lock, int priority,
catch = priority & PCATCH;
pri = priority & PRIMASK;
#else /* __rtems__ */
+ (void)catch;
pri = priority;
#endif /* __rtems__ */
@@ -221,15 +199,14 @@ _sleep(void *ident, struct lock_object *lock, int priority,
if (TD_ON_SLEEPQ(td))
sleepq_remove(td, td->td_wchan);
- if (ident == &pause_wchan)
- flags = SLEEPQ_PAUSE;
+ if ((uint8_t *)ident >= &pause_wchan[0] &&
+ (uint8_t *)ident <= &pause_wchan[MAXCPU - 1])
+ sleepq_flags = SLEEPQ_PAUSE;
else
- flags = SLEEPQ_SLEEP;
+ sleepq_flags = SLEEPQ_SLEEP;
#ifndef __rtems__
if (catch)
- flags |= SLEEPQ_INTERRUPTIBLE;
- if (priority & PBDRY)
- flags |= SLEEPQ_STOP_ON_BDRY;
+ sleepq_flags |= SLEEPQ_INTERRUPTIBLE;
#endif /* __rtems__ */
sleepq_lock(ident);
@@ -256,9 +233,9 @@ _sleep(void *ident, struct lock_object *lock, int priority,
* stopped, then td will no longer be on a sleep queue upon
* return from cursig().
*/
- sleepq_add(ident, lock, wmesg, flags, 0);
- if (timo)
- sleepq_set_timeout(ident, timo);
+ sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
+ if (sbt != 0)
+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
sleepq_release(ident);
WITNESS_SAVE(lock, lock_witness);
@@ -266,11 +243,11 @@ _sleep(void *ident, struct lock_object *lock, int priority,
sleepq_lock(ident);
}
#ifndef __rtems__
- if (timo && catch)
+ if (sbt != 0 && catch)
rval = sleepq_timedwait_sig(ident, pri);
- else if (timo)
+ else if (sbt != 0)
#else /* __rtems__ */
- if (timo)
+ if (sbt != 0)
#endif /* __rtems__ */
rval = sleepq_timedwait(ident, pri);
#ifndef __rtems__
@@ -295,7 +272,8 @@ _sleep(void *ident, struct lock_object *lock, int priority,
#ifndef __rtems__
int
-msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo)
+msleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg,
+ sbintime_t sbt, sbintime_t pr, int flags)
{
struct thread *td;
struct proc *p;
@@ -308,17 +286,8 @@ msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo)
KASSERT(p != NULL, ("msleep1"));
KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
- if (cold || SCHEDULER_STOPPED()) {
- /*
- * During autoconfiguration, just return;
- * don't run any other threads or panic below,
- * in case this is the idle thread and already asleep.
- * XXX: this used to do "s = splhigh(); splx(safepri);
- * splx(s);" to give interrupts a chance, but there is
- * no way to give interrupts a chance now.
- */
+ if (SCHEDULER_STOPPED())
return (0);
- }
sleepq_lock(ident);
CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)",
@@ -333,8 +302,8 @@ msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo)
* We put ourselves on the sleep queue and start our timeout.
*/
sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
- if (timo)
- sleepq_set_timeout(ident, timo);
+ if (sbt != 0)
+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
/*
* Can't call ktrace with any spin locks held so it can lock the
@@ -356,7 +325,7 @@ msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo)
wmesg);
sleepq_lock(ident);
#endif
- if (timo)
+ if (sbt != 0)
rval = sleepq_timedwait(ident, 0);
else {
sleepq_wait(ident, 0);
@@ -381,28 +350,32 @@ msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo)
* to a "timo" value of one.
*/
int
-pause(const char *wmesg, int timo)
+pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
{
- KASSERT(timo >= 0, ("pause: timo must be >= 0"));
+ KASSERT(sbt >= 0, ("pause: timeout must be >= 0"));
/* silently convert invalid timeouts */
- if (timo < 1)
- timo = 1;
+ if (sbt == 0)
+ sbt = tick_sbt;
- if (cold) {
+#ifndef __rtems__
+ if (cold || kdb_active || SCHEDULER_STOPPED()) {
/*
- * We delay one HZ at a time to avoid overflowing the
+ * We delay one second at a time to avoid overflowing the
* system specific DELAY() function(s):
*/
- while (timo >= hz) {
+ while (sbt >= SBT_1S) {
DELAY(1000000);
- timo -= hz;
+ sbt -= SBT_1S;
}
- if (timo > 0)
- DELAY(timo * tick);
+ /* Do the delay remainder, if any */
+ sbt = howmany(sbt, SBT_1US);
+ if (sbt > 0)
+ DELAY(sbt);
return (0);
}
- return (tsleep(&pause_wchan, 0, wmesg, timo));
+#endif /* __rtems__ */
+ return (_sleep(&pause_wchan[curcpu], NULL, 0, wmesg, sbt, pr, flags));
}
/*
@@ -460,11 +433,9 @@ mi_switch(int flags, struct thread *newtd)
{
uint64_t runtime, new_switchtime;
struct thread *td;
- struct proc *p;
td = curthread; /* XXX */
THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
- p = td->td_proc; /* XXX */
KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
#ifdef INVARIANTS
if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
@@ -486,8 +457,10 @@ mi_switch(int flags, struct thread *newtd)
if (flags & SW_VOL) {
td->td_ru.ru_nvcsw++;
td->td_swvoltick = ticks;
- } else
+ } else {
td->td_ru.ru_nivcsw++;
+ td->td_swinvoltick = ticks;
+ }
#ifdef SCHED_STATS
SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
#endif
@@ -504,7 +477,7 @@ mi_switch(int flags, struct thread *newtd)
PCPU_INC(cnt.v_swtch);
PCPU_SET(switchticks, ticks);
CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)",
- td->td_tid, td->td_sched, p->p_pid, td->td_name);
+ td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
#if (KTR_COMPILE & KTR_SCHED) != 0
if (TD_IS_IDLETHREAD(td))
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
@@ -515,15 +488,12 @@ mi_switch(int flags, struct thread *newtd)
"lockname:\"%s\"", td->td_lockname);
#endif
SDT_PROBE0(sched, , , preempt);
-#ifdef XEN
- PT_UPDATES_FLUSH();
-#endif
sched_switch(td, newtd, flags);
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
"prio:%d", td->td_priority);
CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
- td->td_tid, td->td_sched, p->p_pid, td->td_name);
+ td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
/*
* If the last thread was exiting, finish cleaning it up.
@@ -596,15 +566,16 @@ loadav(void *arg)
* random variation to avoid synchronisation with processes that
* run at regular intervals.
*/
- callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)),
- loadav, NULL);
+ callout_reset_sbt(&loadav_callout,
+ SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US,
+ loadav, NULL, C_DIRECT_EXEC | C_PREL(32));
}
/* ARGSUSED */
static void
synch_setup(void *dummy)
{
- callout_init(&loadav_callout, CALLOUT_MPSAFE);
+ callout_init(&loadav_callout, 1);
/* Kick off timeout driven events by calling first time. */
loadav(NULL);
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index 38201cd3..3bcf6688 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -47,7 +47,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/fail.h>
#include <sys/systm.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
+#include <sys/rmlock.h>
#include <sys/sbuf.h>
#include <sys/sx.h>
#include <sys/sysproto.h>
@@ -79,7 +80,7 @@ static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
* The sysctllock protects the MIB tree. It also protects sysctl
* contexts used with dynamic sysctls. The sysctl_register_oid() and
* sysctl_unregister_oid() routines require the sysctllock to already
- * be held, so the sysctl_lock() and sysctl_unlock() routines are
+ * be held, so the sysctl_wlock() and sysctl_wunlock() routines are
* provided for the few places in the kernel which need to use that
* API rather than using the dynamic API. Use of the dynamic API is
* strongly encouraged for most code.
@@ -88,29 +89,38 @@ static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
* sysctl requests. This is implemented by serializing any userland
* sysctl requests larger than a single page via an exclusive lock.
*/
-static struct sx sysctllock;
+static struct rmlock sysctllock;
static struct sx sysctlmemlock;
-#define SYSCTL_XLOCK() sx_xlock(&sysctllock)
-#define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock)
-#define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED)
-#define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock")
+#define SYSCTL_WLOCK() rm_wlock(&sysctllock)
+#define SYSCTL_WUNLOCK() rm_wunlock(&sysctllock)
+#define SYSCTL_RLOCK(tracker) rm_rlock(&sysctllock, (tracker))
+#define SYSCTL_RUNLOCK(tracker) rm_runlock(&sysctllock, (tracker))
+#define SYSCTL_WLOCKED() rm_wowned(&sysctllock)
+#define SYSCTL_ASSERT_LOCKED() rm_assert(&sysctllock, RA_LOCKED)
+#define SYSCTL_ASSERT_WLOCKED() rm_assert(&sysctllock, RA_WLOCKED)
+#define SYSCTL_ASSERT_RLOCKED() rm_assert(&sysctllock, RA_RLOCKED)
+#define SYSCTL_INIT() rm_init_flags(&sysctllock, "sysctl lock", \
+ RM_SLEEPABLE)
#define SYSCTL_SLEEP(ch, wmesg, timo) \
- sx_sleep(ch, &sysctllock, 0, wmesg, timo)
+ rm_sleep(ch, &sysctllock, 0, wmesg, timo)
static int sysctl_root(SYSCTL_HANDLER_ARGS);
-struct sysctl_oid_list sysctl__children; /* root list */
+/* Root list */
+struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children);
static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
int recurse);
+static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t);
+static int sysctl_new_kernel(struct sysctl_req *, void *, size_t);
static struct sysctl_oid *
sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
{
struct sysctl_oid *oidp;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_LOCKED();
SLIST_FOREACH(oidp, list, oid_link) {
if (strcmp(oidp->oid_name, name) == 0) {
return (oidp);
@@ -125,31 +135,212 @@ sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
* Order by number in each list.
*/
void
-sysctl_lock(void)
+sysctl_wlock(void)
{
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
}
void
-sysctl_unlock(void)
+sysctl_wunlock(void)
{
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
}
+static int
+sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2,
+ struct sysctl_req *req, struct rm_priotracker *tracker)
+{
+ int error;
+
+ if (oid->oid_kind & CTLFLAG_DYN)
+ atomic_add_int(&oid->oid_running, 1);
+
+ if (tracker != NULL)
+ SYSCTL_RUNLOCK(tracker);
+ else
+ SYSCTL_WUNLOCK();
+
+ if (!(oid->oid_kind & CTLFLAG_MPSAFE))
+ mtx_lock(&Giant);
+ error = oid->oid_handler(oid, arg1, arg2, req);
+ if (!(oid->oid_kind & CTLFLAG_MPSAFE))
+ mtx_unlock(&Giant);
+
+ KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
+
+ if (tracker != NULL)
+ SYSCTL_RLOCK(tracker);
+ else
+ SYSCTL_WLOCK();
+
+ if (oid->oid_kind & CTLFLAG_DYN) {
+ if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 &&
+ (oid->oid_kind & CTLFLAG_DYING) != 0)
+ wakeup(&oid->oid_running);
+ }
+
+ return (error);
+}
+
+#ifndef __rtems__
+static void
+sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
+{
+ struct sysctl_req req;
+ struct sysctl_oid *curr;
+ char *penv = NULL;
+ char path[64];
+ ssize_t rem = sizeof(path);
+ ssize_t len;
+ uint8_t val_8;
+ uint16_t val_16;
+ uint32_t val_32;
+ int val_int;
+ long val_long;
+ int64_t val_64;
+ quad_t val_quad;
+ int error;
+
+ path[--rem] = 0;
+
+ for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) {
+ len = strlen(curr->oid_name);
+ rem -= len;
+ if (curr != oidp)
+ rem -= 1;
+ if (rem < 0) {
+ printf("OID path exceeds %d bytes\n", (int)sizeof(path));
+ return;
+ }
+ memcpy(path + rem, curr->oid_name, len);
+ if (curr != oidp)
+ path[rem + len] = '.';
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ req.td = curthread;
+ req.oldfunc = sysctl_old_kernel;
+ req.newfunc = sysctl_new_kernel;
+ req.lock = REQ_UNWIRED;
+
+ switch (oidp->oid_kind & CTLTYPE) {
+ case CTLTYPE_INT:
+ if (getenv_int(path + rem, &val_int) == 0)
+ return;
+ req.newlen = sizeof(val_int);
+ req.newptr = &val_int;
+ break;
+ case CTLTYPE_UINT:
+ if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ return;
+ req.newlen = sizeof(val_int);
+ req.newptr = &val_int;
+ break;
+ case CTLTYPE_LONG:
+ if (getenv_long(path + rem, &val_long) == 0)
+ return;
+ req.newlen = sizeof(val_long);
+ req.newptr = &val_long;
+ break;
+ case CTLTYPE_ULONG:
+ if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0)
+ return;
+ req.newlen = sizeof(val_long);
+ req.newptr = &val_long;
+ break;
+ case CTLTYPE_S8:
+ if (getenv_int(path + rem, &val_int) == 0)
+ return;
+ val_8 = val_int;
+ req.newlen = sizeof(val_8);
+ req.newptr = &val_8;
+ break;
+ case CTLTYPE_S16:
+ if (getenv_int(path + rem, &val_int) == 0)
+ return;
+ val_16 = val_int;
+ req.newlen = sizeof(val_16);
+ req.newptr = &val_16;
+ break;
+ case CTLTYPE_S32:
+ if (getenv_long(path + rem, &val_long) == 0)
+ return;
+ val_32 = val_long;
+ req.newlen = sizeof(val_32);
+ req.newptr = &val_32;
+ break;
+ case CTLTYPE_S64:
+ if (getenv_quad(path + rem, &val_quad) == 0)
+ return;
+ val_64 = val_quad;
+ req.newlen = sizeof(val_64);
+ req.newptr = &val_64;
+ break;
+ case CTLTYPE_U8:
+ if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ return;
+ val_8 = val_int;
+ req.newlen = sizeof(val_8);
+ req.newptr = &val_8;
+ break;
+ case CTLTYPE_U16:
+ if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ return;
+ val_16 = val_int;
+ req.newlen = sizeof(val_16);
+ req.newptr = &val_16;
+ break;
+ case CTLTYPE_U32:
+ if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0)
+ return;
+ val_32 = val_long;
+ req.newlen = sizeof(val_32);
+ req.newptr = &val_32;
+ break;
+ case CTLTYPE_U64:
+ /* XXX there is no getenv_uquad() */
+ if (getenv_quad(path + rem, &val_quad) == 0)
+ return;
+ val_64 = val_quad;
+ req.newlen = sizeof(val_64);
+ req.newptr = &val_64;
+ break;
+ case CTLTYPE_STRING:
+ penv = kern_getenv(path + rem);
+ if (penv == NULL)
+ return;
+ req.newlen = strlen(penv);
+ req.newptr = penv;
+ break;
+ default:
+ return;
+ }
+ error = sysctl_root_handler_locked(oidp, oidp->oid_arg1,
+ oidp->oid_arg2, &req, NULL);
+ if (error != 0)
+ printf("Setting sysctl %s failed: %d\n", path + rem, error);
+ if (penv != NULL)
+ freeenv(penv);
+}
+#endif /* __rtems__ */
+
void
sysctl_register_oid(struct sysctl_oid *oidp)
{
struct sysctl_oid_list *parent = oidp->oid_parent;
struct sysctl_oid *p;
struct sysctl_oid *q;
+ int oid_number;
+ int timeout = 2;
/*
* First check if another oid with the same name already
* exists in the parent's list.
*/
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_WLOCKED();
p = sysctl_find_oidname(oidp->oid_name, parent);
if (p != NULL) {
if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
@@ -160,40 +351,83 @@ sysctl_register_oid(struct sysctl_oid *oidp)
return;
}
}
+ /* get current OID number */
+ oid_number = oidp->oid_number;
+
+#if (OID_AUTO >= 0)
+#error "OID_AUTO is expected to be a negative value"
+#endif
/*
- * If this oid has a number OID_AUTO, give it a number which
- * is greater than any current oid.
+ * Any negative OID number qualifies as OID_AUTO. Valid OID
+ * numbers should always be positive.
+ *
* NOTE: DO NOT change the starting value here, change it in
* <sys/sysctl.h>, and make sure it is at least 256 to
- * accomodate e.g. net.inet.raw as a static sysctl node.
+ * accommodate e.g. net.inet.raw as a static sysctl node.
*/
- if (oidp->oid_number == OID_AUTO) {
- static int newoid = CTL_AUTO_START;
+ if (oid_number < 0) {
+ static int newoid;
- oidp->oid_number = newoid++;
- if (newoid == 0x7fffffff)
- panic("out of oids");
- }
-#if 0
- else if (oidp->oid_number >= CTL_AUTO_START) {
- /* do not panic; this happens when unregistering sysctl sets */
- printf("static sysctl oid too high: %d", oidp->oid_number);
+ /*
+ * By decrementing the next OID number we spend less
+ * time inserting the OIDs into a sorted list.
+ */
+ if (--newoid < CTL_AUTO_START)
+ newoid = 0x7fffffff;
+
+ oid_number = newoid;
}
-#endif
/*
- * Insert the oid into the parent's list in order.
+ * Insert the OID into the parent's list sorted by OID number.
*/
+retry:
q = NULL;
SLIST_FOREACH(p, parent, oid_link) {
- if (oidp->oid_number < p->oid_number)
+ /* check if the current OID number is in use */
+ if (oid_number == p->oid_number) {
+ /* get the next valid OID number */
+ if (oid_number < CTL_AUTO_START ||
+ oid_number == 0x7fffffff) {
+ /* wraparound - restart */
+ oid_number = CTL_AUTO_START;
+ /* don't loop forever */
+ if (!timeout--)
+ panic("sysctl: Out of OID numbers\n");
+ goto retry;
+ } else {
+ oid_number++;
+ }
+ } else if (oid_number < p->oid_number)
break;
q = p;
}
- if (q)
+ /* check for non-auto OID number collision */
+ if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
+ oid_number >= CTL_AUTO_START) {
+ printf("sysctl: OID number(%d) is already in use for '%s'\n",
+ oidp->oid_number, oidp->oid_name);
+ }
+ /* update the OID number, if any */
+ oidp->oid_number = oid_number;
+ if (q != NULL)
SLIST_INSERT_AFTER(q, oidp, oid_link);
else
SLIST_INSERT_HEAD(parent, oidp, oid_link);
+
+ if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
+#ifdef VIMAGE
+ (oidp->oid_kind & CTLFLAG_VNET) == 0 &&
+#endif
+ (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
+ (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
+ /* only fetch value once */
+ oidp->oid_kind |= CTLFLAG_NOFETCH;
+#ifndef __rtems__
+ /* try to fetch value from kernel environment */
+ sysctl_load_tunable_by_oid_locked(oidp);
+#endif /* __rtems__ */
+ }
}
void
@@ -202,7 +436,7 @@ sysctl_unregister_oid(struct sysctl_oid *oidp)
struct sysctl_oid *p;
int error;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_WLOCKED();
error = ENOENT;
if (oidp->oid_number == OID_AUTO) {
error = EINVAL;
@@ -258,7 +492,7 @@ sysctl_ctx_free(struct sysctl_ctx_list *clist)
* XXX This algorithm is a hack. But I don't know any
* XXX better solution for now...
*/
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
TAILQ_FOREACH(e, clist, link) {
error = sysctl_remove_oid_locked(e->entry, 0, 0);
if (error)
@@ -266,7 +500,7 @@ sysctl_ctx_free(struct sysctl_ctx_list *clist)
}
/*
* Restore deregistered entries, either from the end,
- * or from the place where error occured.
+ * or from the place where error occurred.
* e contains the entry that was not unregistered
*/
if (error)
@@ -278,7 +512,7 @@ sysctl_ctx_free(struct sysctl_ctx_list *clist)
e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
}
if (error) {
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return(EBUSY);
}
/* Now really delete the entries */
@@ -292,7 +526,7 @@ sysctl_ctx_free(struct sysctl_ctx_list *clist)
free(e, M_SYSCTLOID);
e = e1;
}
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (error);
}
@@ -302,7 +536,7 @@ sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
{
struct sysctl_ctx_entry *e;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_WLOCKED();
if (clist == NULL || oidp == NULL)
return(NULL);
e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
@@ -317,7 +551,7 @@ sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
{
struct sysctl_ctx_entry *e;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_WLOCKED();
if (clist == NULL || oidp == NULL)
return(NULL);
TAILQ_FOREACH(e, clist, link) {
@@ -339,15 +573,15 @@ sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
if (clist == NULL || oidp == NULL)
return (EINVAL);
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
e = sysctl_ctx_entry_find(clist, oidp);
if (e != NULL) {
TAILQ_REMOVE(clist, e, link);
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
free(e, M_SYSCTLOID);
return (0);
} else {
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (ENOENT);
}
}
@@ -363,9 +597,9 @@ sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
{
int error;
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
error = sysctl_remove_oid_locked(oidp, del, recurse);
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (error);
}
@@ -377,14 +611,14 @@ sysctl_remove_name(struct sysctl_oid *parent, const char *name,
int error;
error = ENOENT;
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) {
if (strcmp(p->oid_name, name) == 0) {
error = sysctl_remove_oid_locked(p, del, recurse);
break;
}
}
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (error);
}
@@ -396,7 +630,7 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
struct sysctl_oid *p, *tmp;
int error;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_WLOCKED();
if (oidp == NULL)
return(EINVAL);
if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
@@ -414,15 +648,17 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
if (oidp->oid_refcnt == 1) {
SLIST_FOREACH_SAFE(p,
SYSCTL_CHILDREN(oidp), oid_link, tmp) {
- if (!recurse)
+ if (!recurse) {
+ printf("Warning: failed attempt to "
+ "remove oid %s with child %s\n",
+ oidp->oid_name, p->oid_name);
return (ENOTEMPTY);
+ }
error = sysctl_remove_oid_locked(p, del,
recurse);
if (error)
return (error);
}
- if (del)
- free(SYSCTL_CHILDREN(oidp), M_SYSCTLOID);
}
}
if (oidp->oid_refcnt > 1 ) {
@@ -460,7 +696,7 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
*/
struct sysctl_oid *
sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
- int number, const char *name, int kind, void *arg1, intptr_t arg2,
+ int number, const char *name, int kind, void *arg1, intmax_t arg2,
int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr)
{
struct sysctl_oid *oidp;
@@ -469,7 +705,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
if (parent == NULL)
return(NULL);
/* Check if the node already exists, otherwise create it */
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
oidp = sysctl_find_oidname(name, parent);
if (oidp != NULL) {
if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
@@ -477,41 +713,33 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
/* Update the context */
if (clist != NULL)
sysctl_ctx_entry_add(clist, oidp);
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (oidp);
} else {
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
printf("can't re-use a leaf (%s)!\n", name);
return (NULL);
}
}
oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
oidp->oid_parent = parent;
- SLIST_NEXT(oidp, oid_link) = NULL;
+ SLIST_INIT(&oidp->oid_children);
oidp->oid_number = number;
oidp->oid_refcnt = 1;
oidp->oid_name = strdup(name, M_SYSCTLOID);
oidp->oid_handler = handler;
oidp->oid_kind = CTLFLAG_DYN | kind;
- if ((kind & CTLTYPE) == CTLTYPE_NODE) {
- /* Allocate space for children */
- SYSCTL_CHILDREN_SET(oidp, malloc(sizeof(struct sysctl_oid_list),
- M_SYSCTLOID, M_WAITOK));
- SLIST_INIT(SYSCTL_CHILDREN(oidp));
- oidp->oid_arg2 = arg2;
- } else {
- oidp->oid_arg1 = arg1;
- oidp->oid_arg2 = arg2;
- }
+ oidp->oid_arg1 = arg1;
+ oidp->oid_arg2 = arg2;
oidp->oid_fmt = fmt;
- if (descr)
+ if (descr != NULL)
oidp->oid_descr = strdup(descr, M_SYSCTLOID);
/* Update the context, if used */
if (clist != NULL)
sysctl_ctx_entry_add(clist, oidp);
/* Register this oid */
sysctl_register_oid(oidp);
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (oidp);
}
@@ -525,10 +753,10 @@ sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
char *oldname;
newname = strdup(name, M_SYSCTLOID);
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
oldname = __DECONST(char *, oidp->oid_name);
oidp->oid_name = newname;
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
free(oldname, M_SYSCTLOID);
}
@@ -540,21 +768,21 @@ sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
{
struct sysctl_oid *oidp;
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
if (oid->oid_parent == parent) {
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (0);
}
oidp = sysctl_find_oidname(oid->oid_name, parent);
if (oidp != NULL) {
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (EEXIST);
}
sysctl_unregister_oid(oid);
oid->oid_parent = parent;
oid->oid_number = OID_AUTO;
sysctl_register_oid(oid);
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
return (0);
}
@@ -570,12 +798,12 @@ sysctl_register_all(void *arg)
sx_init(&sysctlmemlock, "sysctl mem");
SYSCTL_INIT();
- SYSCTL_XLOCK();
+ SYSCTL_WLOCK();
SET_FOREACH(oidp, sysctl_set)
sysctl_register_oid(*oidp);
- SYSCTL_XUNLOCK();
+ SYSCTL_WUNLOCK();
}
-SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0);
+SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0);
/*
* "Staff-functions"
@@ -603,7 +831,7 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
int k;
struct sysctl_oid *oidp;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_LOCKED();
SLIST_FOREACH(oidp, l, oid_link) {
for (k=0; k<i; k++)
@@ -623,7 +851,7 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
printf(" Node\n");
if (!oidp->oid_handler) {
sysctl_sysctl_debug_dump_node(
- oidp->oid_arg1, i+2);
+ SYSCTL_CHILDREN(oidp), i + 2);
}
break;
case CTLTYPE_INT: printf(" Int\n"); break;
@@ -631,8 +859,14 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
case CTLTYPE_LONG: printf(" Long\n"); break;
case CTLTYPE_ULONG: printf(" u_long\n"); break;
case CTLTYPE_STRING: printf(" String\n"); break;
- case CTLTYPE_U64: printf(" uint64_t\n"); break;
+ case CTLTYPE_S8: printf(" int8_t\n"); break;
+ case CTLTYPE_S16: printf(" int16_t\n"); break;
+ case CTLTYPE_S32: printf(" int32_t\n"); break;
case CTLTYPE_S64: printf(" int64_t\n"); break;
+ case CTLTYPE_U8: printf(" uint8_t\n"); break;
+ case CTLTYPE_U16: printf(" uint16_t\n"); break;
+ case CTLTYPE_U32: printf(" uint32_t\n"); break;
+ case CTLTYPE_U64: printf(" uint64_t\n"); break;
case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
default: printf("\n");
}
@@ -643,18 +877,19 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
static int
sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
{
+ struct rm_priotracker tracker;
int error;
error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
if (error)
return (error);
- SYSCTL_XLOCK();
+ SYSCTL_RLOCK(&tracker);
sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
- SYSCTL_XUNLOCK();
+ SYSCTL_RUNLOCK(&tracker);
return (ENOENT);
}
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD,
+SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE,
0, 0, sysctl_sysctl_debug, "-", "");
#endif
@@ -666,9 +901,10 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
int error = 0;
struct sysctl_oid *oid;
struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
+ struct rm_priotracker tracker;
char buf[10];
- SYSCTL_XLOCK();
+ SYSCTL_RLOCK(&tracker);
while (namelen) {
if (!lsp) {
snprintf(buf,sizeof(buf),"%d",*name);
@@ -682,7 +918,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
name++;
continue;
}
- lsp2 = 0;
+ lsp2 = NULL;
SLIST_FOREACH(oid, lsp, oid_link) {
if (oid->oid_number != *name)
continue;
@@ -711,7 +947,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
}
error = SYSCTL_OUT(req, "", 1);
out:
- SYSCTL_XUNLOCK();
+ SYSCTL_RUNLOCK(&tracker);
return (error);
}
@@ -719,7 +955,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_CAPRD,
+static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
sysctl_sysctl_name, "");
static int
@@ -728,7 +964,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
{
struct sysctl_oid *oidp;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_LOCKED();
*len = level;
SLIST_FOREACH(oidp, lsp, oid_link) {
*next = oidp->oid_number;
@@ -790,11 +1026,12 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
int i, j, error;
struct sysctl_oid *oid;
struct sysctl_oid_list *lsp = &sysctl__children;
+ struct rm_priotracker tracker;
int newoid[CTL_MAXNAME];
- SYSCTL_XLOCK();
+ SYSCTL_RLOCK(&tracker);
i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid);
- SYSCTL_XUNLOCK();
+ SYSCTL_RUNLOCK(&tracker);
if (i)
return (ENOENT);
error = SYSCTL_OUT(req, newoid, j * sizeof (int));
@@ -805,7 +1042,7 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD,
+static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
sysctl_sysctl_next, "");
static int
@@ -815,7 +1052,7 @@ name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
struct sysctl_oid_list *lsp = &sysctl__children;
char *p;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_LOCKED();
for (*len = 0; *len < CTL_MAXNAME;) {
p = strsep(&name, ".");
@@ -852,7 +1089,8 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
{
char *p;
int error, oid[CTL_MAXNAME], len = 0;
- struct sysctl_oid *op = 0;
+ struct sysctl_oid *op = NULL;
+ struct rm_priotracker tracker;
if (!req->newlen)
return (ENOENT);
@@ -869,9 +1107,9 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
p [req->newlen] = '\0';
- SYSCTL_XLOCK();
+ SYSCTL_RLOCK(&tracker);
error = name2oid(p, oid, &len, &op);
- SYSCTL_XUNLOCK();
+ SYSCTL_RUNLOCK(&tracker);
free(p, M_SYSCTL);
@@ -894,9 +1132,10 @@ static int
sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
{
struct sysctl_oid *oid;
+ struct rm_priotracker tracker;
int error;
- SYSCTL_XLOCK();
+ SYSCTL_RLOCK(&tracker);
error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
if (error)
goto out;
@@ -910,7 +1149,7 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
goto out;
error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
out:
- SYSCTL_XUNLOCK();
+ SYSCTL_RUNLOCK(&tracker);
return (error);
}
@@ -922,9 +1161,10 @@ static int
sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
{
struct sysctl_oid *oid;
+ struct rm_priotracker tracker;
int error;
- SYSCTL_XLOCK();
+ SYSCTL_RLOCK(&tracker);
error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
if (error)
goto out;
@@ -935,11 +1175,11 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
}
error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
out:
- SYSCTL_XUNLOCK();
+ SYSCTL_RUNLOCK(&tracker);
return (error);
}
-static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_CAPRD,
+static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
sysctl_sysctl_oiddescr, "");
/*
@@ -947,6 +1187,137 @@ static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_CAPRD,
*/
/*
+ * Handle a bool.
+ * Two cases:
+ * a variable: point arg1 at it.
+ * a constant: pass it in arg2.
+ */
+
+int
+sysctl_handle_bool(SYSCTL_HANDLER_ARGS)
+{
+ uint8_t temp;
+ int error;
+
+ /*
+ * Attempt to get a coherent snapshot by making a copy of the data.
+ */
+ if (arg1)
+ temp = *(bool *)arg1 ? 1 : 0;
+ else
+ temp = arg2 ? 1 : 0;
+
+ error = SYSCTL_OUT(req, &temp, sizeof(temp));
+ if (error || !req->newptr)
+ return (error);
+
+ if (!arg1)
+ error = EPERM;
+ else {
+ error = SYSCTL_IN(req, &temp, sizeof(temp));
+ if (!error)
+ *(bool *)arg1 = temp ? 1 : 0;
+ }
+ return (error);
+}
+
+/*
+ * Handle an int8_t, signed or unsigned.
+ * Two cases:
+ * a variable: point arg1 at it.
+ * a constant: pass it in arg2.
+ */
+
+int
+sysctl_handle_8(SYSCTL_HANDLER_ARGS)
+{
+ int8_t tmpout;
+ int error = 0;
+
+ /*
+ * Attempt to get a coherent snapshot by making a copy of the data.
+ */
+ if (arg1)
+ tmpout = *(int8_t *)arg1;
+ else
+ tmpout = arg2;
+ error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
+
+ if (error || !req->newptr)
+ return (error);
+
+ if (!arg1)
+ error = EPERM;
+ else
+ error = SYSCTL_IN(req, arg1, sizeof(tmpout));
+ return (error);
+}
+
+/*
+ * Handle an int16_t, signed or unsigned.
+ * Two cases:
+ * a variable: point arg1 at it.
+ * a constant: pass it in arg2.
+ */
+
+int
+sysctl_handle_16(SYSCTL_HANDLER_ARGS)
+{
+ int16_t tmpout;
+ int error = 0;
+
+ /*
+ * Attempt to get a coherent snapshot by making a copy of the data.
+ */
+ if (arg1)
+ tmpout = *(int16_t *)arg1;
+ else
+ tmpout = arg2;
+ error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
+
+ if (error || !req->newptr)
+ return (error);
+
+ if (!arg1)
+ error = EPERM;
+ else
+ error = SYSCTL_IN(req, arg1, sizeof(tmpout));
+ return (error);
+}
+
+/*
+ * Handle an int32_t, signed or unsigned.
+ * Two cases:
+ * a variable: point arg1 at it.
+ * a constant: pass it in arg2.
+ */
+
+int
+sysctl_handle_32(SYSCTL_HANDLER_ARGS)
+{
+ int32_t tmpout;
+ int error = 0;
+
+ /*
+ * Attempt to get a coherent snapshot by making a copy of the data.
+ */
+ if (arg1)
+ tmpout = *(int32_t *)arg1;
+ else
+ tmpout = arg2;
+ error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
+
+ if (error || !req->newptr)
+ return (error);
+
+ if (!arg1)
+ error = EPERM;
+ else
+ error = SYSCTL_IN(req, arg1, sizeof(tmpout));
+ return (error);
+}
+
+/*
* Handle an int, signed or unsigned.
* Two cases:
* a variable: point arg1 at it.
@@ -1091,26 +1462,38 @@ sysctl_handle_64(SYSCTL_HANDLER_ARGS)
int
sysctl_handle_string(SYSCTL_HANDLER_ARGS)
{
- int error=0;
- char *tmparg;
size_t outlen;
+ int error = 0, ro_string = 0;
/*
- * Attempt to get a coherent snapshot by copying to a
- * temporary kernel buffer.
+ * A zero-length buffer indicates a fixed size read-only
+ * string:
*/
-retry:
- outlen = strlen((char *)arg1)+1;
- tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK);
-
- if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) {
- free(tmparg, M_SYSCTLTMP);
- goto retry;
+ if (arg2 == 0) {
+ arg2 = strlen((char *)arg1) + 1;
+ ro_string = 1;
}
- error = SYSCTL_OUT(req, tmparg, outlen);
- free(tmparg, M_SYSCTLTMP);
+ if (req->oldptr != NULL) {
+ char *tmparg;
+ if (ro_string) {
+ tmparg = arg1;
+ } else {
+ /* try to make a coherent snapshot of the string */
+ tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
+ memcpy(tmparg, arg1, arg2);
+ }
+
+ outlen = strnlen(tmparg, arg2 - 1) + 1;
+ error = SYSCTL_OUT(req, tmparg, outlen);
+
+ if (!ro_string)
+ free(tmparg, M_SYSCTLTMP);
+ } else {
+ outlen = strnlen((char *)arg1, arg2 - 1) + 1;
+ error = SYSCTL_OUT(req, NULL, outlen);
+ }
if (error || !req->newptr)
return (error);
@@ -1121,7 +1504,6 @@ retry:
error = SYSCTL_IN(req, arg1, arg2);
((char *)arg1)[arg2] = '\0';
}
-
return (error);
}
@@ -1243,9 +1625,7 @@ kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
req.newfunc = sysctl_new_kernel;
req.lock = REQ_UNWIRED;
- SYSCTL_XLOCK();
error = sysctl_root(0, name, namelen, &req);
- SYSCTL_XUNLOCK();
if (req.lock == REQ_WIRED && req.validlen > 0)
vsunlock(req.oldptr, req.validlen);
@@ -1381,7 +1761,7 @@ sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
struct sysctl_oid *oid;
int indx;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_ASSERT_LOCKED();
lsp = &sysctl__children;
indx = 0;
while (indx < CTL_MAXNAME) {
@@ -1426,13 +1806,14 @@ static int
sysctl_root(SYSCTL_HANDLER_ARGS)
{
struct sysctl_oid *oid;
+ struct rm_priotracker tracker;
int error, indx, lvl;
- SYSCTL_ASSERT_XLOCKED();
+ SYSCTL_RLOCK(&tracker);
error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
if (error)
- return (error);
+ goto out;
if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
/*
@@ -1440,13 +1821,17 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
* no handler. Inform the user that it's a node.
* The indx may or may not be the same as namelen.
*/
- if (oid->oid_handler == NULL)
- return (EISDIR);
+ if (oid->oid_handler == NULL) {
+ error = EISDIR;
+ goto out;
+ }
}
/* Is this sysctl writable? */
- if (req->newptr && !(oid->oid_kind & CTLFLAG_WR))
- return (EPERM);
+ if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) {
+ error = EPERM;
+ goto out;
+ }
#ifndef __rtems__
KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
@@ -1457,10 +1842,11 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
* writing unless specifically granted for the node.
*/
if (IN_CAPABILITY_MODE(req->td)) {
- if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD))
- return (EPERM);
- if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))
- return (EPERM);
+ if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) ||
+ (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) {
+ error = EPERM;
+ goto out;
+ }
}
#endif
@@ -1469,7 +1855,7 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
error = securelevel_gt(req->td->td_ucred, lvl);
if (error)
- return (error);
+ goto out;
}
/* Is this sysctl writable by only privileged users? */
@@ -1487,14 +1873,16 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
priv = PRIV_SYSCTL_WRITE;
error = priv_check(req->td, priv);
if (error)
- return (error);
+ goto out;
}
#else /* __rtems__ */
(void) lvl;
#endif /* __rtems__ */
- if (!oid->oid_handler)
- return (EINVAL);
+ if (!oid->oid_handler) {
+ error = EINVAL;
+ goto out;
+ }
if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
arg1 = (int *)arg1 + indx;
@@ -1507,25 +1895,16 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
req);
if (error != 0)
- return (error);
+ goto out;
#endif
- oid->oid_running++;
- SYSCTL_XUNLOCK();
-
- if (!(oid->oid_kind & CTLFLAG_MPSAFE))
- mtx_lock(&Giant);
- error = oid->oid_handler(oid, arg1, arg2, req);
- if (!(oid->oid_kind & CTLFLAG_MPSAFE))
- mtx_unlock(&Giant);
-
-#ifndef __rtems__
- KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
-#endif /* __rtems__ */
+#ifdef VIMAGE
+ if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
+ arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
+#endif
+ error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker);
- SYSCTL_XLOCK();
- oid->oid_running--;
- if (oid->oid_running == 0 && (oid->oid_kind & CTLFLAG_DYING) != 0)
- wakeup(&oid->oid_running);
+out:
+ SYSCTL_RUNLOCK(&tracker);
return (error);
}
@@ -1616,7 +1995,7 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
ktrsysctl(name, namelen);
#endif
- if (req.oldlen > PAGE_SIZE) {
+ if (req.oldptr && req.oldlen > PAGE_SIZE) {
memlocked = 1;
sx_xlock(&sysctlmemlock);
} else
@@ -1626,9 +2005,7 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
for (;;) {
req.oldidx = 0;
req.newidx = 0;
- SYSCTL_XLOCK();
error = sysctl_root(0, name, namelen, &req);
- SYSCTL_XUNLOCK();
if (error != EAGAIN)
break;
kern_yield(PRI_USER);
@@ -1674,7 +2051,10 @@ sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
struct sysctl_req *req)
{
- s = sbuf_new(s, buf, length, SBUF_FIXEDLEN);
+ /* Supply a default buffer size if none given. */
+ if (buf == NULL && length == 0)
+ length = 64;
+ s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
sbuf_set_drain(s, sbuf_sysctl_drain, req);
return (s);
}
diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c
index 44bd206c..2fb4dd2e 100644
--- a/freebsd/sys/kern/kern_time.c
+++ b/freebsd/sys/kern/kern_time.c
@@ -34,6 +34,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_ktrace.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/limits.h>
@@ -45,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
+#include <sys/sleepqueue.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@@ -55,6 +58,9 @@ __FBSDID("$FreeBSD$");
#include <sys/timers.h>
#include <sys/timetc.h>
#include <sys/vnode.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_extern.h>
@@ -119,9 +125,7 @@ settime(struct thread *td, struct timeval *tv)
struct timeval delta, tv1, tv2;
static struct timeval maxtime, laststep;
struct timespec ts;
- int s;
- s = splclock();
microtime(&tv1);
delta = *tv;
timevalsub(&delta, &tv1);
@@ -151,10 +155,8 @@ settime(struct thread *td, struct timeval *tv)
printf("Time adjustment clamped to -1 second\n");
}
} else {
- if (tv1.tv_sec == laststep.tv_sec) {
- splx(s);
+ if (tv1.tv_sec == laststep.tv_sec)
return (EPERM);
- }
if (delta.tv_sec > 1) {
tv->tv_sec = tv1.tv_sec + 1;
printf("Time adjustment clamped to +1 second\n");
@@ -165,10 +167,8 @@ settime(struct thread *td, struct timeval *tv)
ts.tv_sec = tv->tv_sec;
ts.tv_nsec = tv->tv_usec * 1000;
- mtx_lock(&Giant);
tc_setclock(&ts);
resettodr();
- mtx_unlock(&Giant);
return (0);
}
@@ -280,10 +280,10 @@ get_process_cputime(struct proc *targetp, struct timespec *ats)
uint64_t runtime;
struct rusage ru;
- PROC_SLOCK(targetp);
+ PROC_STATLOCK(targetp);
rufetch(targetp, &ru);
runtime = targetp->p_rux.rux_runtime;
- PROC_SUNLOCK(targetp);
+ PROC_STATUNLOCK(targetp);
cputick2timespec(runtime, ats);
}
@@ -332,17 +332,17 @@ kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats)
break;
case CLOCK_VIRTUAL:
PROC_LOCK(p);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &user, &sys);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
PROC_UNLOCK(p);
TIMEVAL_TO_TIMESPEC(&user, ats);
break;
case CLOCK_PROF:
PROC_LOCK(p);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &user, &sys);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
PROC_UNLOCK(p);
timevaladd(&user, &sys);
TIMEVAL_TO_TIMESPEC(&user, ats);
@@ -407,7 +407,8 @@ kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats)
return (error);
if (clock_id != CLOCK_REALTIME)
return (EINVAL);
- if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000)
+ if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 ||
+ ats->tv_sec < 0)
return (EINVAL);
/* XXX Don't convert nsec->usec and back */
TIMESPEC_TO_TIMEVAL(&atv, ats);
@@ -463,7 +464,7 @@ kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts)
case CLOCK_VIRTUAL:
case CLOCK_PROF:
/* Accurately round up here because we can do so cheaply. */
- ts->tv_nsec = (1000000000 + hz - 1) / hz;
+ ts->tv_nsec = howmany(1000000000, hz);
break;
case CLOCK_SECOND:
ts->tv_sec = 1;
@@ -486,43 +487,50 @@ kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts)
}
#endif
-static int nanowait;
+static uint8_t nanowait[MAXCPU];
int
kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
{
- struct timespec ts, ts2, ts3;
- struct timeval tv;
+ struct timespec ts;
+ sbintime_t sbt, sbtt, prec, tmp;
+ time_t over;
int error;
if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
return (EINVAL);
if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
return (0);
- getnanouptime(&ts);
- timespecadd(&ts, rqt);
- TIMESPEC_TO_TIMEVAL(&tv, rqt);
- for (;;) {
- error = tsleep(&nanowait, PWAIT | PCATCH, "nanslp",
- tvtohz(&tv));
- getnanouptime(&ts2);
- if (error != EWOULDBLOCK) {
- if (error == ERESTART)
- error = EINTR;
- if (rmt != NULL) {
- timespecsub(&ts, &ts2);
- if (ts.tv_sec < 0)
- timespecclear(&ts);
- *rmt = ts;
- }
- return (error);
+ ts = *rqt;
+ if (ts.tv_sec > INT32_MAX / 2) {
+ over = ts.tv_sec - INT32_MAX / 2;
+ ts.tv_sec -= over;
+ } else
+ over = 0;
+ tmp = tstosbt(ts);
+ prec = tmp;
+ prec >>= tc_precexp;
+ if (TIMESEL(&sbt, tmp))
+ sbt += tc_tick_sbt;
+ sbt += tmp;
+ error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp",
+ sbt, prec, C_ABSOLUTE);
+ if (error != EWOULDBLOCK) {
+ if (error == ERESTART)
+ error = EINTR;
+ TIMESEL(&sbtt, tmp);
+ if (rmt != NULL) {
+ ts = sbttots(sbt - sbtt);
+ ts.tv_sec += over;
+ if (ts.tv_sec < 0)
+ timespecclear(&ts);
+ *rmt = ts;
}
- if (timespeccmp(&ts2, &ts, >=))
+ if (sbtt >= sbt)
return (0);
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
+ return (error);
}
+ return (0);
}
#ifndef _SYS_SYSPROTO_H_
@@ -623,7 +631,8 @@ kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp)
return (error);
/* Verify all parameters before changing time. */
if (tv) {
- if (tv->tv_usec < 0 || tv->tv_usec >= 1000000)
+ if (tv->tv_usec < 0 || tv->tv_usec >= 1000000 ||
+ tv->tv_sec < 0)
return (EINVAL);
error = settime(td, tv);
}
@@ -693,17 +702,21 @@ kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv)
*aitv = p->p_realtimer;
PROC_UNLOCK(p);
if (timevalisset(&aitv->it_value)) {
- getmicrouptime(&ctv);
+ microuptime(&ctv);
if (timevalcmp(&aitv->it_value, &ctv, <))
timevalclear(&aitv->it_value);
else
timevalsub(&aitv->it_value, &ctv);
}
} else {
- PROC_SLOCK(p);
+ PROC_ITIMLOCK(p);
*aitv = p->p_stats->p_timer[which];
- PROC_SUNLOCK(p);
+ PROC_ITIMUNLOCK(p);
}
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktritimerval(aitv);
+#endif
return (0);
}
@@ -738,28 +751,37 @@ kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv,
{
struct proc *p = td->td_proc;
struct timeval ctv;
+ sbintime_t sbt, pr;
if (aitv == NULL)
return (kern_getitimer(td, which, oitv));
if (which > ITIMER_PROF)
return (EINVAL);
- if (itimerfix(&aitv->it_value))
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktritimerval(aitv);
+#endif
+ if (itimerfix(&aitv->it_value) ||
+ aitv->it_value.tv_sec > INT32_MAX / 2)
return (EINVAL);
if (!timevalisset(&aitv->it_value))
timevalclear(&aitv->it_interval);
- else if (itimerfix(&aitv->it_interval))
+ else if (itimerfix(&aitv->it_interval) ||
+ aitv->it_interval.tv_sec > INT32_MAX / 2)
return (EINVAL);
if (which == ITIMER_REAL) {
PROC_LOCK(p);
if (timevalisset(&p->p_realtimer.it_value))
callout_stop(&p->p_itcallout);
- getmicrouptime(&ctv);
+ microuptime(&ctv);
if (timevalisset(&aitv->it_value)) {
- callout_reset(&p->p_itcallout, tvtohz(&aitv->it_value),
- realitexpire, p);
+ pr = tvtosbt(aitv->it_value) >> tc_precexp;
timevaladd(&aitv->it_value, &ctv);
+ sbt = tvtosbt(aitv->it_value);
+ callout_reset_sbt(&p->p_itcallout, sbt, pr,
+ realitexpire, p, C_ABSOLUTE);
}
*oitv = p->p_realtimer;
p->p_realtimer = *aitv;
@@ -771,11 +793,23 @@ kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv,
timevalsub(&oitv->it_value, &ctv);
}
} else {
- PROC_SLOCK(p);
+ if (aitv->it_interval.tv_sec == 0 &&
+ aitv->it_interval.tv_usec != 0 &&
+ aitv->it_interval.tv_usec < tick)
+ aitv->it_interval.tv_usec = tick;
+ if (aitv->it_value.tv_sec == 0 &&
+ aitv->it_value.tv_usec != 0 &&
+ aitv->it_value.tv_usec < tick)
+ aitv->it_value.tv_usec = tick;
+ PROC_ITIMLOCK(p);
*oitv = p->p_stats->p_timer[which];
p->p_stats->p_timer[which] = *aitv;
- PROC_SUNLOCK(p);
+ PROC_ITIMUNLOCK(p);
}
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktritimerval(oitv);
+#endif
return (0);
}
@@ -795,7 +829,8 @@ void
realitexpire(void *arg)
{
struct proc *p;
- struct timeval ctv, ntv;
+ struct timeval ctv;
+ sbintime_t isbt;
p = (struct proc *)arg;
kern_psignal(p, SIGALRM);
@@ -805,19 +840,17 @@ realitexpire(void *arg)
wakeup(&p->p_itcallout);
return;
}
- for (;;) {
+ isbt = tvtosbt(p->p_realtimer.it_interval);
+ if (isbt >= sbt_timethreshold)
+ getmicrouptime(&ctv);
+ else
+ microuptime(&ctv);
+ do {
timevaladd(&p->p_realtimer.it_value,
&p->p_realtimer.it_interval);
- getmicrouptime(&ctv);
- if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
- ntv = p->p_realtimer.it_value;
- timevalsub(&ntv, &ctv);
- callout_reset(&p->p_itcallout, tvtohz(&ntv) - 1,
- realitexpire, p);
- return;
- }
- }
- /*NOTREACHED*/
+ } while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=));
+ callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value),
+ isbt >> tc_precexp, realitexpire, p, C_ABSOLUTE);
}
#endif /* __rtems__ */
@@ -833,8 +866,9 @@ itimerfix(struct timeval *tv)
if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
return (EINVAL);
- if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
- tv->tv_usec = tick;
+ if (tv->tv_sec == 0 && tv->tv_usec != 0 &&
+ tv->tv_usec < (u_int)tick / 16)
+ tv->tv_usec = (u_int)tick / 16;
return (0);
}
@@ -977,7 +1011,7 @@ ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
return (maxpps != 0);
} else {
(*curpps)++; /* NB: ignore potential overflow */
- return (maxpps < 0 || *curpps < maxpps);
+ return (maxpps < 0 || *curpps <= maxpps);
}
}
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c
index 00024aa3..37ec0956 100644
--- a/freebsd/sys/kern/kern_timeout.c
+++ b/freebsd/sys/kern/kern_timeout.c
@@ -39,13 +39,18 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_kdtrace.h>
+#include <rtems/bsd/local/opt_callout_profiling.h>
+#include <rtems/bsd/local/opt_ddb.h>
+#if defined(__arm__) || defined(__rtems__)
+#include <rtems/bsd/local/opt_timer.h>
+#endif
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/callout.h>
-#include <sys/condvar.h>
+#include <sys/file.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
@@ -58,19 +63,24 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/smp.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <machine/_inttypes.h>
+#endif
+
#ifdef SMP
#include <machine/cpu.h>
#endif
-#ifdef __rtems__
-#define ncallout 16
-#endif /* __rtems__ */
+#ifndef NO_EVENTTIMERS
+DPCPU_DECLARE(sbintime_t, hardclocktime);
+#endif
+
SDT_PROVIDER_DEFINE(callout_execute);
-SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start,
- "struct callout *");
-SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end,
- "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
+#ifdef CALLOUT_PROFILING
static int avg_depth;
SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
"Average number of items examined per softclock call. Units = 1/1000");
@@ -83,65 +93,106 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
static int avg_mpcalls;
SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
"Average number of MP callouts made per softclock call. Units = 1/1000");
+static int avg_depth_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
+ "Average number of direct callouts examined per callout_process call. "
+ "Units = 1/1000");
+static int avg_lockcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
+ &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
+ "callout_process call. Units = 1/1000");
+static int avg_mpcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
+ 0, "Average number of MP direct callouts made per callout_process call. "
+ "Units = 1/1000");
+#endif
+
+#ifndef __rtems__
+static int ncallout;
+SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0,
+ "Number of entries in callwheel and size of timeout() preallocation");
+#else /* __rtems__ */
+#define ncallout 16
+#endif /* __rtems__ */
+
+#ifdef RSS
+static int pin_default_swi = 1;
+static int pin_pcpu_swi = 1;
+#else
+static int pin_default_swi = 0;
+static int pin_pcpu_swi = 0;
+#endif
+
+SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi,
+ 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)");
+SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi,
+ 0, "Pin the per-CPU swis (except PCPU 0, which is also default");
+
/*
* TODO:
* allocate more timeout table slots when table overflows.
*/
-int callwheelsize, callwheelbits, callwheelmask;
+u_int callwheelsize, callwheelmask;
/*
- * The callout cpu migration entity represents informations necessary for
- * describing the migrating callout to the new callout cpu.
+ * The callout cpu exec entities represent informations necessary for
+ * describing the state of callouts currently running on the CPU and the ones
+ * necessary for migrating callouts to the new callout cpu. In particular,
+ * the first entry of the array cc_exec_entity holds informations for callout
+ * running in SWI thread context, while the second one holds informations
+ * for callout running directly from hardware interrupt context.
* The cached informations are very important for deferring migration when
* the migrating callout is already running.
*/
-struct cc_mig_ent {
+struct cc_exec {
+ struct callout *cc_curr;
+ void (*cc_drain)(void *);
#ifdef SMP
- void (*ce_migration_func)(void *);
- void *ce_migration_arg;
- int ce_migration_cpu;
- int ce_migration_ticks;
+ void (*ce_migration_func)(void *);
+ void *ce_migration_arg;
+ int ce_migration_cpu;
+ sbintime_t ce_migration_time;
+ sbintime_t ce_migration_prec;
#endif
+ bool cc_cancel;
+ bool cc_waiting;
};
-
+
/*
* There is one struct callout_cpu per cpu, holding all relevant
* state for the callout processing thread on the individual CPU.
- * In particular:
- * cc_ticks is incremented once per tick in callout_cpu().
- * It tracks the global 'ticks' but in a way that the individual
- * threads should not worry about races in the order in which
- * hardclock() and hardclock_cpu() run on the various CPUs.
- * cc_softclock is advanced in callout_cpu() to point to the
- * first entry in cc_callwheel that may need handling. In turn,
- * a softclock() is scheduled so it can serve the various entries i
- * such that cc_softclock <= i <= cc_ticks .
- * XXX maybe cc_softclock and cc_ticks should be volatile ?
- *
- * cc_ticks is also used in callout_reset_cpu() to determine
- * when the callout should be served.
*/
struct callout_cpu {
- struct cc_mig_ent cc_migrating_entity;
- struct mtx cc_lock;
- struct callout *cc_callout;
- struct callout_tailq *cc_callwheel;
- struct callout_list cc_callfree;
+ struct mtx_padalign cc_lock;
+ struct cc_exec cc_exec_entity[2];
struct callout *cc_next;
- struct callout *cc_curr;
+ struct callout *cc_callout;
+ struct callout_list *cc_callwheel;
+#ifndef __rtems__
+ struct callout_tailq cc_expireq;
+#endif /* __rtems__ */
+ struct callout_slist cc_callfree;
+ sbintime_t cc_firstevent;
+ sbintime_t cc_lastscan;
void *cc_cookie;
- int cc_ticks;
- int cc_softticks;
- int cc_cancel;
- int cc_waiting;
- int cc_firsttick;
+ u_int cc_bucket;
+ u_int cc_inited;
+ char cc_ktr_event_name[20];
};
+#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION)
+
+#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
+#define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain
+#define cc_exec_next(cc) cc->cc_next
+#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel
+#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting
#ifdef SMP
-#define cc_migration_func cc_migrating_entity.ce_migration_func
-#define cc_migration_arg cc_migrating_entity.ce_migration_arg
-#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu
-#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks
+#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func
+#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg
+#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu
+#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time
+#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec
struct callout_cpu cc_cpu[MAXCPU];
#define CPUBLOCK MAXCPU
@@ -157,39 +208,49 @@ struct callout_cpu cc_cpu;
#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED)
static int timeout_cpu;
-void (*callout_new_inserted)(int cpu, int ticks) = NULL;
+
+static void callout_cpu_init(struct callout_cpu *cc, int cpu);
+static void softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+ int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+ int direct);
static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
/**
* Locked by cc_lock:
- * cc_curr - If a callout is in progress, it is curr_callout.
- * If curr_callout is non-NULL, threads waiting in
+ * cc_curr - If a callout is in progress, it is cc_curr.
+ * If cc_curr is non-NULL, threads waiting in
* callout_drain() will be woken up as soon as the
* relevant callout completes.
- * cc_cancel - Changing to 1 with both callout_lock and c_lock held
+ * cc_cancel - Changing to 1 with both callout_lock and cc_lock held
* guarantees that the current callout will not run.
* The softclock() function sets this to 0 before it
* drops callout_lock to acquire c_lock, and it calls
* the handler only if curr_cancelled is still 0 after
- * c_lock is successfully acquired.
+ * cc_lock is successfully acquired.
* cc_waiting - If a thread is waiting in callout_drain(), then
* callout_wait is nonzero. Set only when
- * curr_callout is non-NULL.
+ * cc_curr is non-NULL.
*/
/*
- * Resets the migration entity tied to a specific callout cpu.
+ * Resets the execution entity tied to a specific callout cpu.
*/
static void
-cc_cme_cleanup(struct callout_cpu *cc)
+cc_cce_cleanup(struct callout_cpu *cc, int direct)
{
+ cc_exec_curr(cc, direct) = NULL;
+ cc_exec_cancel(cc, direct) = false;
+ cc_exec_waiting(cc, direct) = false;
#ifdef SMP
- cc->cc_migration_cpu = CPUBLOCK;
- cc->cc_migration_ticks = 0;
- cc->cc_migration_func = NULL;
- cc->cc_migration_arg = NULL;
+ cc_migration_cpu(cc, direct) = CPUBLOCK;
+ cc_migration_time(cc, direct) = 0;
+ cc_migration_prec(cc, direct) = 0;
+ cc_migration_func(cc, direct) = NULL;
+ cc_migration_arg(cc, direct) = NULL;
#endif
}
@@ -197,27 +258,23 @@ cc_cme_cleanup(struct callout_cpu *cc)
* Checks if migration is requested by a specific callout cpu.
*/
static int
-cc_cme_migrating(struct callout_cpu *cc)
+cc_cce_migrating(struct callout_cpu *cc, int direct)
{
#ifdef SMP
- return (cc->cc_migration_cpu != CPUBLOCK);
+ return (cc_migration_cpu(cc, direct) != CPUBLOCK);
#else
return (0);
#endif
}
/*
- * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
- *
- * This code is called very early in the kernel initialization sequence,
- * and may be called more then once.
+ * Kernel low level callwheel initialization
+ * called on cpu0 during kernel startup.
*/
#ifdef __rtems__
static void rtems_bsd_timeout_init_early(void *);
-static void callout_cpu_init(struct callout_cpu *);
-
static void
rtems_bsd_callout_timer(rtems_id id, void *arg)
{
@@ -228,7 +285,7 @@ rtems_bsd_callout_timer(rtems_id id, void *arg)
sc = rtems_timer_reset(id);
BSD_ASSERT(sc == RTEMS_SUCCESSFUL);
- callout_tick();
+ callout_process(sbinuptime());
}
static void
@@ -253,63 +310,90 @@ SYSINIT(rtems_bsd_timeout_late, SI_SUB_LAST, SI_ORDER_FIRST,
rtems_bsd_timeout_init_late, NULL);
static void
-rtems_bsd_timeout_init_early(void *unused)
+rtems_bsd_timeout_init_early(void *dummy)
#else /* __rtems__ */
-caddr_t
-kern_timeout_callwheel_alloc(caddr_t v)
+static void
+callout_callwheel_init(void *dummy)
#endif /* __rtems__ */
{
struct callout_cpu *cc;
#ifdef __rtems__
- caddr_t v;
+ (void) dummy;
+#endif /* __rtems__ */
- (void) unused;
+ /*
+ * Calculate the size of the callout wheel and the preallocated
+ * timeout() structures.
+ * XXX: Clip callout to result of previous function of maxusers
+ * maximum 384. This is still huge, but acceptable.
+ */
+ memset(CC_CPU(0), 0, sizeof(cc_cpu));
+#ifndef __rtems__
+ ncallout = imin(16 + maxproc + maxfiles, 18508);
+ TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
#endif /* __rtems__ */
- timeout_cpu = PCPU_GET(cpuid);
- cc = CC_CPU(timeout_cpu);
/*
- * Calculate callout wheel size
+ * Calculate callout wheel size, should be next power of two higher
+ * than 'ncallout'.
*/
- for (callwheelsize = 1, callwheelbits = 0;
- callwheelsize < ncallout;
- callwheelsize <<= 1, ++callwheelbits)
- ;
+ callwheelsize = 1 << fls(ncallout);
callwheelmask = callwheelsize - 1;
-#ifdef __rtems__
- v = malloc(ncallout * sizeof(*cc->cc_callout) + callwheelsize
- * sizeof(*cc->cc_callwheel), M_CALLOUT, M_ZERO | M_WAITOK);
-#endif /* __rtems__ */
- cc->cc_callout = (struct callout *)v;
- v = (caddr_t)(cc->cc_callout + ncallout);
- cc->cc_callwheel = (struct callout_tailq *)v;
- v = (caddr_t)(cc->cc_callwheel + callwheelsize);
#ifndef __rtems__
- return(v);
-#else /* __rtems__ */
- callout_cpu_init(cc);
+ /*
+ * Fetch whether we're pinning the swi's or not.
+ */
+ TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi);
+ TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi);
#endif /* __rtems__ */
+
+ /*
+ * Only cpu0 handles timeout(9) and receives a preallocation.
+ *
+ * XXX: Once all timeout(9) consumers are converted this can
+ * be removed.
+ */
+ timeout_cpu = PCPU_GET(cpuid);
+ cc = CC_CPU(timeout_cpu);
+ cc->cc_callout = malloc(ncallout * sizeof(struct callout),
+ M_CALLOUT, M_WAITOK);
+ callout_cpu_init(cc, timeout_cpu);
}
+#ifndef __rtems__
+SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
+#endif /* __rtems__ */
+/*
+ * Initialize the per-cpu callout structures.
+ */
static void
-callout_cpu_init(struct callout_cpu *cc)
+callout_cpu_init(struct callout_cpu *cc, int cpu)
{
struct callout *c;
int i;
mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
SLIST_INIT(&cc->cc_callfree);
- for (i = 0; i < callwheelsize; i++) {
- TAILQ_INIT(&cc->cc_callwheel[i]);
- }
- cc_cme_cleanup(cc);
- if (cc->cc_callout == NULL)
+ cc->cc_inited = 1;
+ cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
+ M_CALLOUT, M_WAITOK);
+ for (i = 0; i < callwheelsize; i++)
+ LIST_INIT(&cc->cc_callwheel[i]);
+#ifndef __rtems__
+ TAILQ_INIT(&cc->cc_expireq);
+#endif /* __rtems__ */
+ cc->cc_firstevent = SBT_MAX;
+ for (i = 0; i < 2; i++)
+ cc_cce_cleanup(cc, i);
+ snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
+ "callwheel cpu %d", cpu);
+ if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */
return;
for (i = 0; i < ncallout; i++) {
c = &cc->cc_callout[i];
callout_init(c, 0);
- c->c_flags = CALLOUT_LOCAL_ALLOC;
+ c->c_iflags = CALLOUT_LOCAL_ALLOC;
SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
}
}
@@ -346,109 +430,201 @@ callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
#ifndef __rtems__
/*
- * kern_timeout_callwheel_init() - initialize previously reserved callwheel
- * space.
- *
- * This code is called just once, after the space reserved for the
- * callout wheel has been finalized.
- */
-void
-kern_timeout_callwheel_init(void)
-{
- callout_cpu_init(CC_CPU(timeout_cpu));
-}
-#endif /* __rtems__ */
-
-/*
* Start standard softclock thread.
*/
static void
start_softclock(void *dummy)
{
struct callout_cpu *cc;
+ char name[MAXCOMLEN];
#ifdef SMP
int cpu;
+ struct intr_event *ie;
#endif
cc = CC_CPU(timeout_cpu);
- if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
+ snprintf(name, sizeof(name), "clock (%d)", timeout_cpu);
+ if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK,
INTR_MPSAFE, &cc->cc_cookie))
panic("died while creating standard software ithreads");
+ if (pin_default_swi &&
+ (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) {
+ printf("%s: timeout clock couldn't be pinned to cpu %d\n",
+ __func__,
+ timeout_cpu);
+ }
+
#ifdef SMP
CPU_FOREACH(cpu) {
if (cpu == timeout_cpu)
continue;
cc = CC_CPU(cpu);
- if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
+ cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */
+ callout_cpu_init(cc, cpu);
+ snprintf(name, sizeof(name), "clock (%d)", cpu);
+ ie = NULL;
+ if (swi_add(&ie, name, softclock, cc, SWI_CLOCK,
INTR_MPSAFE, &cc->cc_cookie))
panic("died while creating standard software ithreads");
- cc->cc_callout = NULL; /* Only cpu0 handles timeout(). */
- cc->cc_callwheel = malloc(
- sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT,
- M_WAITOK);
- callout_cpu_init(cc);
+ if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) {
+ printf("%s: per-cpu clock couldn't be pinned to "
+ "cpu %d\n",
+ __func__,
+ cpu);
+ }
}
#endif
}
-
SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
+#endif /* __rtems__ */
+
+#define CC_HASH_SHIFT 8
+
+static inline u_int
+callout_hash(sbintime_t sbt)
+{
+
+ return (sbt >> (32 - CC_HASH_SHIFT));
+}
+
+static inline u_int
+callout_get_bucket(sbintime_t sbt)
+{
+
+ return (callout_hash(sbt) & callwheelmask);
+}
void
-callout_tick(void)
+callout_process(sbintime_t now)
{
+ struct callout *tmp, *tmpn;
struct callout_cpu *cc;
- int need_softclock;
- int bucket;
+ struct callout_list *sc;
+ sbintime_t first, last, max, tmp_max;
+ uint32_t lookahead;
+ u_int firstb, lastb, nowb;
+#ifdef CALLOUT_PROFILING
+ int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
+#endif
- /*
- * Process callouts at a very low cpu priority, so we don't keep the
- * relatively high clock interrupt priority any longer than necessary.
- */
- need_softclock = 0;
cc = CC_SELF();
mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- cc->cc_firsttick = cc->cc_ticks = ticks;
- for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
- bucket = cc->cc_softticks & callwheelmask;
- if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
- need_softclock = 1;
- break;
- }
+
+ /* Compute the buckets of the last scan and present times. */
+ firstb = callout_hash(cc->cc_lastscan);
+ cc->cc_lastscan = now;
+ nowb = callout_hash(now);
+
+ /* Compute the last bucket and minimum time of the bucket after it. */
+ if (nowb == firstb)
+ lookahead = (SBT_1S / 16);
+ else if (nowb - firstb == 1)
+ lookahead = (SBT_1S / 8);
+ else
+ lookahead = (SBT_1S / 2);
+ first = last = now;
+ first += (lookahead / 2);
+ last += lookahead;
+ last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
+ lastb = callout_hash(last) - 1;
+ max = last;
+
+ /*
+ * Check if we wrapped around the entire wheel from the last scan.
+ * In case, we need to scan entirely the wheel for pending callouts.
+ */
+ if (lastb - firstb >= callwheelsize) {
+ lastb = firstb + callwheelsize - 1;
+ if (nowb - firstb >= callwheelsize)
+ nowb = lastb;
}
+
+ /* Iterate callwheel from firstb to nowb and then up to lastb. */
+ do {
+ sc = &cc->cc_callwheel[firstb & callwheelmask];
+ tmp = LIST_FIRST(sc);
+ while (tmp != NULL) {
+ /* Run the callout if present time within allowed. */
+ if (tmp->c_time <= now) {
+#ifndef __rtems__
+ /*
+ * Consumer told us the callout may be run
+ * directly from hardware interrupt context.
+ */
+ if (tmp->c_iflags & CALLOUT_DIRECT) {
+#endif /* __rtems__ */
+#ifdef CALLOUT_PROFILING
+ ++depth_dir;
+#endif
+ cc_exec_next(cc) =
+ LIST_NEXT(tmp, c_links.le);
+ cc->cc_bucket = firstb & callwheelmask;
+ LIST_REMOVE(tmp, c_links.le);
+ softclock_call_cc(tmp, cc,
+#ifdef CALLOUT_PROFILING
+ &mpcalls_dir, &lockcalls_dir, NULL,
+#endif
+ 1);
+ tmp = cc_exec_next(cc);
+ cc_exec_next(cc) = NULL;
+#ifndef __rtems__
+ } else {
+ tmpn = LIST_NEXT(tmp, c_links.le);
+ LIST_REMOVE(tmp, c_links.le);
+ TAILQ_INSERT_TAIL(&cc->cc_expireq,
+ tmp, c_links.tqe);
+ tmp->c_iflags |= CALLOUT_PROCESSED;
+ tmp = tmpn;
+ }
+#endif /* __rtems__ */
+ continue;
+ }
+ /* Skip events from distant future. */
+ if (tmp->c_time >= max)
+ goto next;
+ /*
+ * Event minimal time is bigger than present maximal
+ * time, so it cannot be aggregated.
+ */
+ if (tmp->c_time > last) {
+ lastb = nowb;
+ goto next;
+ }
+ /* Update first and last time, respecting this event. */
+ if (tmp->c_time < first)
+ first = tmp->c_time;
+ tmp_max = tmp->c_time + tmp->c_precision;
+ if (tmp_max < last)
+ last = tmp_max;
+next:
+ tmp = LIST_NEXT(tmp, c_links.le);
+ }
+ /* Proceed with the next bucket. */
+ firstb++;
+ /*
+ * Stop if we looked after present time and found
+ * some event we can't execute at now.
+ * Stop if we looked far enough into the future.
+ */
+ } while (((int)(firstb - lastb)) <= 0);
+ cc->cc_firstevent = last;
+#ifndef NO_EVENTTIMERS
+ cpu_new_callout(curcpu, last, first);
+#endif
+#ifdef CALLOUT_PROFILING
+ avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
+ avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
+ avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
+#endif
mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+#ifndef __rtems__
/*
* swi_sched acquires the thread lock, so we don't want to call it
* with cc_lock held; incorrect locking order.
*/
- if (need_softclock)
+ if (!TAILQ_EMPTY(&cc->cc_expireq))
swi_sched(cc->cc_cookie, 0);
-}
-
-int
-callout_tickstofirst(int limit)
-{
- struct callout_cpu *cc;
- struct callout *c;
- struct callout_tailq *sc;
- int curticks;
- int skip = 1;
-
- cc = CC_SELF();
- mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- curticks = cc->cc_ticks;
- while( skip < ncallout && skip < limit ) {
- sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
- /* search scanning ticks */
- TAILQ_FOREACH( c, sc, c_links.tqe ){
- if (c->c_time - curticks <= ncallout)
- goto out;
- }
- skip++;
- }
-out:
- cc->cc_firsttick = curticks + skip;
- mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
- return (skip);
+#endif /* __rtems__ */
}
static struct callout_cpu *
@@ -476,169 +652,224 @@ callout_lock(struct callout *c)
}
static void
-callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
- void (*func)(void *), void *arg, int cpu)
+callout_cc_add(struct callout *c, struct callout_cpu *cc,
+ sbintime_t sbt, sbintime_t precision, void (*func)(void *),
+ void *arg, int cpu, int flags)
{
+ int bucket;
CC_LOCK_ASSERT(cc);
-
- if (to_ticks <= 0)
- to_ticks = 1;
+ if (sbt < cc->cc_lastscan)
+ sbt = cc->cc_lastscan;
c->c_arg = arg;
- c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+ c->c_iflags |= CALLOUT_PENDING;
+ c->c_iflags &= ~CALLOUT_PROCESSED;
+ c->c_flags |= CALLOUT_ACTIVE;
+ if (flags & C_DIRECT_EXEC)
+ c->c_iflags |= CALLOUT_DIRECT;
c->c_func = func;
- c->c_time = ticks + to_ticks;
- TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
- c, c_links.tqe);
- if ((c->c_time - cc->cc_firsttick) < 0 &&
- callout_new_inserted != NULL) {
- cc->cc_firsttick = c->c_time;
- (*callout_new_inserted)(cpu,
- to_ticks + (ticks - cc->cc_ticks));
+ c->c_time = sbt;
+ c->c_precision = precision;
+ bucket = callout_get_bucket(c->c_time);
+ CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
+ c, (int)(c->c_precision >> 32),
+ (u_int)(c->c_precision & 0xffffffff));
+ LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
+ if (cc->cc_bucket == bucket)
+ cc_exec_next(cc) = c;
+#ifndef NO_EVENTTIMERS
+ /*
+ * Inform the eventtimers(4) subsystem there's a new callout
+ * that has been inserted, but only if really required.
+ */
+ if (SBT_MAX - c->c_time < c->c_precision)
+ c->c_precision = SBT_MAX - c->c_time;
+ sbt = c->c_time + c->c_precision;
+ if (sbt < cc->cc_firstevent) {
+ cc->cc_firstevent = sbt;
+ cpu_new_callout(cpu, sbt, c->c_time);
}
+#endif
}
static void
callout_cc_del(struct callout *c, struct callout_cpu *cc)
{
- if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
+ if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
return;
c->c_func = NULL;
SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
}
static void
-softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
- int *lockcalls, int *gcalls)
+softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+ int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+ int direct)
{
+#ifndef __rtems__
+ struct rm_priotracker tracker;
+#endif /* __rtems__ */
void (*c_func)(void *);
void *c_arg;
struct lock_class *class;
struct lock_object *c_lock;
- int c_flags, sharedlock;
+ uintptr_t lock_status;
+ int c_iflags;
#ifdef SMP
struct callout_cpu *new_cc;
void (*new_func)(void *);
void *new_arg;
- int new_cpu, new_ticks;
+ int flags, new_cpu;
+ sbintime_t new_prec, new_time;
#endif
-#ifdef DIAGNOSTIC
- struct bintime bt1, bt2;
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+ sbintime_t sbt1, sbt2;
struct timespec ts2;
- static uint64_t maxdt = 36893488147419102LL; /* 2 msec */
+ static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */
static timeout_t *lastfunc;
#endif
- KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
- (CALLOUT_PENDING | CALLOUT_ACTIVE),
- ("softclock_call_cc: pend|act %p %x", c, c->c_flags));
+ KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
+ ("softclock_call_cc: pend %p %x", c, c->c_iflags));
+ KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
+ ("softclock_call_cc: act %p %x", c, c->c_flags));
class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
- sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
+ lock_status = 0;
+ if (c->c_flags & CALLOUT_SHAREDLOCK) {
+#ifndef __rtems__
+ if (class == &lock_class_rm)
+ lock_status = (uintptr_t)&tracker;
+ else
+#endif /* __rtems__ */
+ lock_status = 1;
+ }
c_lock = c->c_lock;
c_func = c->c_func;
c_arg = c->c_arg;
- c_flags = c->c_flags;
- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
- c->c_flags = CALLOUT_LOCAL_ALLOC;
+ c_iflags = c->c_iflags;
+ if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
+ c->c_iflags = CALLOUT_LOCAL_ALLOC;
else
- c->c_flags &= ~CALLOUT_PENDING;
- cc->cc_curr = c;
- cc->cc_cancel = 0;
+ c->c_iflags &= ~CALLOUT_PENDING;
+
+ cc_exec_curr(cc, direct) = c;
+ cc_exec_cancel(cc, direct) = false;
+ cc_exec_drain(cc, direct) = NULL;
CC_UNLOCK(cc);
if (c_lock != NULL) {
- class->lc_lock(c_lock, sharedlock);
+ class->lc_lock(c_lock, lock_status);
/*
* The callout may have been cancelled
* while we switched locks.
*/
- if (cc->cc_cancel) {
+ if (cc_exec_cancel(cc, direct)) {
class->lc_unlock(c_lock);
goto skip;
}
/* The callout cannot be stopped now. */
- cc->cc_cancel = 1;
-
+ cc_exec_cancel(cc, direct) = true;
if (c_lock == &Giant.lock_object) {
+#ifdef CALLOUT_PROFILING
(*gcalls)++;
- CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
+#endif
+ CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
c, c_func, c_arg);
} else {
+#ifdef CALLOUT_PROFILING
(*lockcalls)++;
+#endif
CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
c, c_func, c_arg);
}
} else {
+#ifdef CALLOUT_PROFILING
(*mpcalls)++;
- CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
+#endif
+ CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
c, c_func, c_arg);
}
-#ifdef DIAGNOSTIC
- binuptime(&bt1);
+ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
+ "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+ sbt1 = sbinuptime();
#endif
#ifndef __rtems__
THREAD_NO_SLEEPING();
- SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0);
+ SDT_PROBE1(callout_execute, , , callout__start, c);
#endif /* __rtems__ */
c_func(c_arg);
#ifndef __rtems__
- SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0);
+ SDT_PROBE1(callout_execute, , , callout__end, c);
THREAD_SLEEPING_OK();
#endif /* __rtems__ */
-#ifdef DIAGNOSTIC
- binuptime(&bt2);
- bintime_sub(&bt2, &bt1);
- if (bt2.frac > maxdt) {
- if (lastfunc != c_func || bt2.frac > maxdt * 2) {
- bintime2timespec(&bt2, &ts2);
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+ sbt2 = sbinuptime();
+ sbt2 -= sbt1;
+ if (sbt2 > maxdt) {
+ if (lastfunc != c_func || sbt2 > maxdt * 2) {
+ ts2 = sbttots(sbt2);
printf(
"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
}
- maxdt = bt2.frac;
+ maxdt = sbt2;
lastfunc = c_func;
}
#endif
+ KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
CTR1(KTR_CALLOUT, "callout %p finished", c);
- if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
+ if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
class->lc_unlock(c_lock);
skip:
CC_LOCK(cc);
- KASSERT(cc->cc_curr == c, ("mishandled cc_curr"));
- cc->cc_curr = NULL;
- if (cc->cc_waiting) {
+ KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
+ cc_exec_curr(cc, direct) = NULL;
+ if (cc_exec_drain(cc, direct)) {
+ void (*drain)(void *);
+
+ drain = cc_exec_drain(cc, direct);
+ cc_exec_drain(cc, direct) = NULL;
+ CC_UNLOCK(cc);
+ drain(c_arg);
+ CC_LOCK(cc);
+ }
+ if (cc_exec_waiting(cc, direct)) {
/*
* There is someone waiting for the
* callout to complete.
* If the callout was scheduled for
* migration just cancel it.
*/
- if (cc_cme_migrating(cc)) {
- cc_cme_cleanup(cc);
+ if (cc_cce_migrating(cc, direct)) {
+ cc_cce_cleanup(cc, direct);
/*
* It should be assert here that the callout is not
* destroyed but that is not easy.
*/
- c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
}
- cc->cc_waiting = 0;
+ cc_exec_waiting(cc, direct) = false;
CC_UNLOCK(cc);
- wakeup(&cc->cc_waiting);
+ wakeup(&cc_exec_waiting(cc, direct));
CC_LOCK(cc);
- } else if (cc_cme_migrating(cc)) {
- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
+ } else if (cc_cce_migrating(cc, direct)) {
+ KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
("Migrating legacy callout %p", c));
#ifdef SMP
/*
* If the callout was scheduled for
* migration just perform it now.
*/
- new_cpu = cc->cc_migration_cpu;
- new_ticks = cc->cc_migration_ticks;
- new_func = cc->cc_migration_func;
- new_arg = cc->cc_migration_arg;
- cc_cme_cleanup(cc);
+ new_cpu = cc_migration_cpu(cc, direct);
+ new_time = cc_migration_time(cc, direct);
+ new_prec = cc_migration_prec(cc, direct);
+ new_func = cc_migration_func(cc, direct);
+ new_arg = cc_migration_arg(cc, direct);
+ cc_cce_cleanup(cc, direct);
/*
* It should be assert here that the callout is not destroyed
@@ -646,18 +877,19 @@ skip:
*
* As first thing, handle deferred callout stops.
*/
- if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
+ if (!callout_migrating(c)) {
CTR3(KTR_CALLOUT,
"deferred cancelled %p func %p arg %p",
c, new_func, new_arg);
callout_cc_del(c, cc);
return;
}
- c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
new_cc = callout_cpu_switch(c, cc, new_cpu);
- callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
- new_cpu);
+ flags = (direct) ? C_DIRECT_EXEC : 0;
+ callout_cc_add(c, new_cc, new_time, new_prec, new_func,
+ new_arg, new_cpu, flags);
CC_UNLOCK(new_cc);
CC_LOCK(cc);
#else
@@ -668,19 +900,19 @@ skip:
* If the current callout is locally allocated (from
* timeout(9)) then put it on the freelist.
*
- * Note: we need to check the cached copy of c_flags because
+ * Note: we need to check the cached copy of c_iflags because
* if it was not local, then it's not safe to deref the
* callout pointer.
*/
- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 ||
- c->c_flags == CALLOUT_LOCAL_ALLOC,
+ KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
+ c->c_iflags == CALLOUT_LOCAL_ALLOC,
("corrupted callout"));
- if (c_flags & CALLOUT_LOCAL_ALLOC)
+ if (c_iflags & CALLOUT_LOCAL_ALLOC)
callout_cc_del(c, cc);
}
/*
- * The callout mechanism is based on the work of Adam M. Costello and
+ * The callout mechanism is based on the work of Adam M. Costello and
* George Varghese, published in a technical report entitled "Redesigning
* the BSD Callout and Timer Facilities" and modified slightly for inclusion
* in FreeBSD by Justin T. Gibbs. The original work on the data structures
@@ -691,6 +923,7 @@ skip:
* Austin, Texas Nov 1987.
*/
+#ifndef __rtems__
/*
* Software (low priority) clock interrupt.
* Run periodic events from timeout queue.
@@ -700,65 +933,32 @@ softclock(void *arg)
{
struct callout_cpu *cc;
struct callout *c;
- struct callout_tailq *bucket;
- int curticks;
- int steps; /* #steps since we last allowed interrupts */
- int depth;
- int mpcalls;
- int lockcalls;
- int gcalls;
-
-#ifndef MAX_SOFTCLOCK_STEPS
-#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
-#endif /* MAX_SOFTCLOCK_STEPS */
-
- mpcalls = 0;
- lockcalls = 0;
- gcalls = 0;
- depth = 0;
- steps = 0;
+#ifdef CALLOUT_PROFILING
+ int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
+#endif
+
cc = (struct callout_cpu *)arg;
CC_LOCK(cc);
- while (cc->cc_softticks - 1 != cc->cc_ticks) {
- /*
- * cc_softticks may be modified by hard clock, so cache
- * it while we work on a given bucket.
- */
- curticks = cc->cc_softticks;
- cc->cc_softticks++;
- bucket = &cc->cc_callwheel[curticks & callwheelmask];
- c = TAILQ_FIRST(bucket);
- while (c != NULL) {
- depth++;
- if (c->c_time != curticks) {
- c = TAILQ_NEXT(c, c_links.tqe);
- ++steps;
- if (steps >= MAX_SOFTCLOCK_STEPS) {
- cc->cc_next = c;
- /* Give interrupts a chance. */
- CC_UNLOCK(cc);
- ; /* nothing */
- CC_LOCK(cc);
- c = cc->cc_next;
- steps = 0;
- }
- } else {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- TAILQ_REMOVE(bucket, c, c_links.tqe);
- softclock_call_cc(c, cc, &mpcalls,
- &lockcalls, &gcalls);
- steps = 0;
- c = cc->cc_next;
- }
- }
+ while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+ softclock_call_cc(c, cc,
+#ifdef CALLOUT_PROFILING
+ &mpcalls, &lockcalls, &gcalls,
+#endif
+ 0);
+#ifdef CALLOUT_PROFILING
+ ++depth;
+#endif
}
+#ifdef CALLOUT_PROFILING
avg_depth += (depth * 1000 - avg_depth) >> 8;
avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
- cc->cc_next = NULL;
+#endif
CC_UNLOCK(cc);
}
+#endif /* __rtems__ */
/*
* timeout --
@@ -771,16 +971,13 @@ softclock(void *arg)
* Initialize a handle so that using it with untimeout is benign.
*
* See AT&T BCI Driver Reference Manual for specification. This
- * implementation differs from that one in that although an
+ * implementation differs from that one in that although an
* identification value is returned from timeout, the original
* arguments to timeout as well as the identifier are used to
* identify entries for untimeout.
*/
struct callout_handle
-timeout(ftn, arg, to_ticks)
- timeout_t *ftn;
- void *arg;
- int to_ticks;
+timeout(timeout_t *ftn, void *arg, int to_ticks)
{
struct callout_cpu *cc;
struct callout *new;
@@ -802,10 +999,7 @@ timeout(ftn, arg, to_ticks)
}
void
-untimeout(ftn, arg, handle)
- timeout_t *ftn;
- void *arg;
- struct callout_handle handle;
+untimeout(timeout_t *ftn, void *arg, struct callout_handle handle)
{
struct callout_cpu *cc;
@@ -829,6 +1023,56 @@ callout_handle_init(struct callout_handle *handle)
handle->callout = NULL;
}
+void
+callout_when(sbintime_t sbt, sbintime_t precision, int flags,
+ sbintime_t *res, sbintime_t *prec_res)
+{
+ sbintime_t to_sbt, to_pr;
+
+ if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
+ *res = sbt;
+ *prec_res = precision;
+ return;
+ }
+ if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
+ sbt = tick_sbt;
+ if ((flags & C_HARDCLOCK) != 0 ||
+#ifdef NO_EVENTTIMERS
+ sbt >= sbt_timethreshold) {
+ to_sbt = getsbinuptime();
+
+ /* Add safety belt for the case of hz > 1000. */
+ to_sbt += tc_tick_sbt - tick_sbt;
+#else
+ sbt >= sbt_tickthreshold) {
+ /*
+ * Obtain the time of the last hardclock() call on
+ * this CPU directly from the kern_clocksource.c.
+ * This value is per-CPU, but it is equal for all
+ * active ones.
+ */
+#ifdef __LP64__
+ to_sbt = DPCPU_GET(hardclocktime);
+#else
+ spinlock_enter();
+ to_sbt = DPCPU_GET(hardclocktime);
+ spinlock_exit();
+#endif
+#endif
+ if ((flags & C_HARDCLOCK) == 0)
+ to_sbt += tick_sbt;
+ } else
+ to_sbt = sbinuptime();
+ if (SBT_MAX - to_sbt < sbt)
+ to_sbt = SBT_MAX;
+ else
+ to_sbt += sbt;
+ *res = to_sbt;
+ to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
+ sbt >> C_PRELGET(flags));
+ *prec_res = to_pr > precision ? to_pr : precision;
+}
+
/*
* New interface; clients allocate their own callout structures.
*
@@ -846,28 +1090,56 @@ callout_handle_init(struct callout_handle *handle)
* callout_deactivate() - marks the callout as having been serviced
*/
int
-callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
- void *arg, int cpu)
+callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
+ void (*ftn)(void *), void *arg, int cpu, int flags)
{
+ sbintime_t to_sbt, precision;
struct callout_cpu *cc;
- int cancelled = 0;
+ int cancelled, direct;
+ int ignore_cpu=0;
+
+ cancelled = 0;
+ if (cpu == -1) {
+ ignore_cpu = 1;
+ } else if ((cpu >= MAXCPU) ||
+ ((CC_CPU(cpu))->cc_inited == 0)) {
+ /* Invalid CPU spec */
+ panic("Invalid CPU in callout %d", cpu);
+ }
+ callout_when(sbt, prec, flags, &to_sbt, &precision);
+ /*
+ * This flag used to be added by callout_cc_add, but the
+ * first time you call this we could end up with the
+ * wrong direct flag if we don't do it before we add.
+ */
+ if (flags & C_DIRECT_EXEC) {
+ direct = 1;
+ } else {
+ direct = 0;
+ }
+ KASSERT(!direct || c->c_lock == NULL,
+ ("%s: direct callout %p has lock", __func__, c));
+ cc = callout_lock(c);
/*
* Don't allow migration of pre-allocated callouts lest they
- * become unbalanced.
+ * become unbalanced or handle the case where the user does
+ * not care.
*/
- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
+ if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
+ ignore_cpu) {
cpu = c->c_cpu;
- cc = callout_lock(c);
- if (cc->cc_curr == c) {
+ }
+
+ if (cc_exec_curr(cc, direct) == c) {
/*
* We're being asked to reschedule a callout which is
* currently in progress. If there is a lock then we
* can cancel the callout if it has not really started.
*/
- if (c->c_lock != NULL && !cc->cc_cancel)
- cancelled = cc->cc_cancel = 1;
- if (cc->cc_waiting) {
+ if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
+ cancelled = cc_exec_cancel(cc, direct) = true;
+ if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) {
/*
* Someone has called callout_drain to kill this
* callout. Don't reschedule.
@@ -878,16 +1150,41 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
CC_UNLOCK(cc);
return (cancelled);
}
+#ifdef SMP
+ if (callout_migrating(c)) {
+ /*
+ * This only occurs when a second callout_reset_sbt_on
+ * is made after a previous one moved it into
+ * deferred migration (below). Note we do *not* change
+ * the prev_cpu even though the previous target may
+ * be different.
+ */
+ cc_migration_cpu(cc, direct) = cpu;
+ cc_migration_time(cc, direct) = to_sbt;
+ cc_migration_prec(cc, direct) = precision;
+ cc_migration_func(cc, direct) = ftn;
+ cc_migration_arg(cc, direct) = arg;
+ cancelled = 1;
+ CC_UNLOCK(cc);
+ return (cancelled);
+ }
+#endif
}
- if (c->c_flags & CALLOUT_PENDING) {
- if (cc->cc_next == c) {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ if (c->c_iflags & CALLOUT_PENDING) {
+#ifndef __rtems__
+ if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
+#endif /* __rtems__ */
+ if (cc_exec_next(cc) == c)
+ cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
+ LIST_REMOVE(c, c_links.le);
+#ifndef __rtems__
+ } else {
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
}
- TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
- c_links.tqe);
-
+#endif /* __rtems__ */
cancelled = 1;
- c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+ c->c_iflags &= ~ CALLOUT_PENDING;
+ c->c_flags &= ~ CALLOUT_ACTIVE;
}
#ifdef SMP
@@ -897,15 +1194,34 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
* to a more appropriate moment.
*/
if (c->c_cpu != cpu) {
- if (cc->cc_curr == c) {
- cc->cc_migration_cpu = cpu;
- cc->cc_migration_ticks = to_ticks;
- cc->cc_migration_func = ftn;
- cc->cc_migration_arg = arg;
- c->c_flags |= CALLOUT_DFRMIGRATION;
- CTR5(KTR_CALLOUT,
- "migration of %p func %p arg %p in %d to %u deferred",
- c, c->c_func, c->c_arg, to_ticks, cpu);
+ if (cc_exec_curr(cc, direct) == c) {
+ /*
+ * Pending will have been removed since we are
+ * actually executing the callout on another
+ * CPU. That callout should be waiting on the
+ * lock the caller holds. If we set both
+ * active/and/pending after we return and the
+ * lock on the executing callout proceeds, it
+ * will then see pending is true and return.
+ * At the return from the actual callout execution
+ * the migration will occur in softclock_call_cc
+ * and this new callout will be placed on the
+ * new CPU via a call to callout_cpu_switch() which
+ * will get the lock on the right CPU followed
+ * by a call callout_cc_add() which will add it there.
+ * (see above in softclock_call_cc()).
+ */
+ cc_migration_cpu(cc, direct) = cpu;
+ cc_migration_time(cc, direct) = to_sbt;
+ cc_migration_prec(cc, direct) = precision;
+ cc_migration_func(cc, direct) = ftn;
+ cc_migration_arg(cc, direct) = arg;
+ c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
+ c->c_flags |= CALLOUT_ACTIVE;
+ CTR6(KTR_CALLOUT,
+ "migration of %p func %p arg %p in %d.%08x to %u deferred",
+ c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+ (u_int)(to_sbt & 0xffffffff), cpu);
CC_UNLOCK(cc);
return (cancelled);
}
@@ -913,9 +1229,10 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
}
#endif
- callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
- CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
- cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
+ callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
+ CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
+ cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+ (u_int)(to_sbt & 0xffffffff));
CC_UNLOCK(cc);
return (cancelled);
@@ -937,25 +1254,26 @@ callout_schedule(struct callout *c, int to_ticks)
}
int
-_callout_stop_safe(c, safe)
- struct callout *c;
- int safe;
+_callout_stop_safe(struct callout *c, int flags, void (*drain)(void *))
{
-#ifndef __rtems__
struct callout_cpu *cc, *old_cc;
struct lock_class *class;
- int use_lock, sq_locked;
-#else /* __rtems__ */
- struct callout_cpu *cc;
- struct lock_class *class;
- int use_lock;
+ int direct, sq_locked, use_lock;
+ int cancelled, not_on_a_list;
+#ifdef __rtems__
+ (void)old_cc;
+ (void)sq_locked;
#endif /* __rtems__ */
+ if ((flags & CS_DRAIN) != 0)
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
+ "calling %s", __func__);
+
/*
* Some old subsystems don't hold Giant while running a callout_stop(),
* so just discard this check for the moment.
*/
- if (!safe && c->c_lock != NULL) {
+ if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
if (c->c_lock == &Giant.lock_object)
use_lock = mtx_owned(&Giant);
else {
@@ -965,6 +1283,11 @@ _callout_stop_safe(c, safe)
}
} else
use_lock = 0;
+ if (c->c_iflags & CALLOUT_DIRECT) {
+ direct = 1;
+ } else {
+ direct = 0;
+ }
#ifndef __rtems__
sq_locked = 0;
@@ -973,6 +1296,28 @@ again:
#endif /* __rtems__ */
cc = callout_lock(c);
+ if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
+ (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
+ ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
+ /*
+ * Special case where this slipped in while we
+ * were migrating *as* the callout is about to
+ * execute. The caller probably holds the lock
+ * the callout wants.
+ *
+ * Get rid of the migration first. Then set
+ * the flag that tells this code *not* to
+ * try to remove it from any lists (its not
+ * on one yet). When the callout wheel runs,
+ * it will ignore this callout.
+ */
+ c->c_iflags &= ~CALLOUT_PENDING;
+ c->c_flags &= ~CALLOUT_ACTIVE;
+ not_on_a_list = 1;
+ } else {
+ not_on_a_list = 0;
+ }
+
#ifndef __rtems__
/*
* If the callout was migrating while the callout cpu lock was
@@ -982,7 +1327,7 @@ again:
if (sq_locked != 0 && cc != old_cc) {
#ifdef SMP
CC_UNLOCK(cc);
- sleepq_release(&old_cc->cc_waiting);
+ sleepq_release(&cc_exec_waiting(old_cc, direct));
sq_locked = 0;
old_cc = NULL;
goto again;
@@ -993,36 +1338,23 @@ again:
#endif /* __rtems__ */
/*
- * If the callout isn't pending, it's not on the queue, so
- * don't attempt to remove it from the queue. We can try to
- * stop it by other means however.
+ * If the callout is running, try to stop it or drain it.
*/
- if (!(c->c_flags & CALLOUT_PENDING)) {
- c->c_flags &= ~CALLOUT_ACTIVE;
-
+ if (cc_exec_curr(cc, direct) == c) {
/*
- * If it wasn't on the queue and it isn't the current
- * callout, then we can't stop it, so just bail.
+ * Succeed we to stop it or not, we must clear the
+ * active flag - this is what API users expect.
*/
- if (cc->cc_curr != c) {
- CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
- c, c->c_func, c->c_arg);
- CC_UNLOCK(cc);
-#ifndef __rtems__
- if (sq_locked)
- sleepq_release(&cc->cc_waiting);
-#endif /* __rtems__ */
- return (0);
- }
+ c->c_flags &= ~CALLOUT_ACTIVE;
- if (safe) {
+ if ((flags & CS_DRAIN) != 0) {
/*
* The current callout is running (or just
* about to run) and blocking is allowed, so
* just wait for the current invocation to
* finish.
*/
- while (cc->cc_curr == c) {
+ while (cc_exec_curr(cc, direct) == c) {
#ifndef __rtems__
/*
@@ -1044,7 +1376,8 @@ again:
*/
if (!sq_locked) {
CC_UNLOCK(cc);
- sleepq_lock(&cc->cc_waiting);
+ sleepq_lock(
+ &cc_exec_waiting(cc, direct));
sq_locked = 1;
old_cc = cc;
goto again;
@@ -1056,13 +1389,16 @@ again:
* will be packed up, just let softclock()
* take care of it.
*/
- cc->cc_waiting = 1;
+ cc_exec_waiting(cc, direct) = true;
DROP_GIANT();
CC_UNLOCK(cc);
- sleepq_add(&cc->cc_waiting,
+ sleepq_add(
+ &cc_exec_waiting(cc, direct),
&cc->cc_lock.lock_object, "codrain",
SLEEPQ_SLEEP, 0);
- sleepq_wait(&cc->cc_waiting, 0);
+ sleepq_wait(
+ &cc_exec_waiting(cc, direct),
+ 0);
sq_locked = 0;
old_cc = NULL;
@@ -1076,84 +1412,144 @@ again:
* sleepq_set_timeout() and instead use the
* RTEMS watchdog.
*/
- cc->cc_waiting = 1;
- msleep_spin(&cc->cc_waiting, &cc->cc_lock,
- "codrain", 0);
+ cc_exec_waiting(cc, direct) = true;
+ msleep_spin(&cc_exec_waiting(cc, direct),
+ &cc->cc_lock, "codrain", 0);
#endif /* __rtems__ */
}
- } else if (use_lock && !cc->cc_cancel) {
+ } else if (use_lock &&
+ !cc_exec_cancel(cc, direct) && (drain == NULL)) {
+
/*
* The current callout is waiting for its
* lock which we hold. Cancel the callout
* and return. After our caller drops the
* lock, the callout will be skipped in
- * softclock().
+ * softclock(). This *only* works with a
+ * callout_stop() *not* callout_drain() or
+ * callout_async_drain().
*/
- cc->cc_cancel = 1;
+ cc_exec_cancel(cc, direct) = true;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
- KASSERT(!cc_cme_migrating(cc),
+ KASSERT(!cc_cce_migrating(cc, direct),
("callout wrongly scheduled for migration"));
+ if (callout_migrating(c)) {
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
+#ifdef SMP
+ cc_migration_cpu(cc, direct) = CPUBLOCK;
+ cc_migration_time(cc, direct) = 0;
+ cc_migration_prec(cc, direct) = 0;
+ cc_migration_func(cc, direct) = NULL;
+ cc_migration_arg(cc, direct) = NULL;
+#endif
+ }
CC_UNLOCK(cc);
#ifndef __rtems__
KASSERT(!sq_locked, ("sleepqueue chain locked"));
#endif /* __rtems__ */
return (1);
- } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
- c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ } else if (callout_migrating(c)) {
+ /*
+ * The callout is currently being serviced
+ * and the "next" callout is scheduled at
+ * its completion with a migration. We remove
+ * the migration flag so it *won't* get rescheduled,
+ * but we can't stop the one thats running so
+ * we return 0.
+ */
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
+#ifdef SMP
+ /*
+ * We can't call cc_cce_cleanup here since
+ * if we do it will remove .ce_curr and
+ * its still running. This will prevent a
+ * reschedule of the callout when the
+ * execution completes.
+ */
+ cc_migration_cpu(cc, direct) = CPUBLOCK;
+ cc_migration_time(cc, direct) = 0;
+ cc_migration_prec(cc, direct) = 0;
+ cc_migration_func(cc, direct) = NULL;
+ cc_migration_arg(cc, direct) = NULL;
+#endif
CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
c, c->c_func, c->c_arg);
+ if (drain) {
+ cc_exec_drain(cc, direct) = drain;
+ }
CC_UNLOCK(cc);
- return (1);
+ return ((flags & CS_EXECUTING) != 0);
}
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
- CC_UNLOCK(cc);
+ if (drain) {
+ cc_exec_drain(cc, direct) = drain;
+ }
#ifndef __rtems__
KASSERT(!sq_locked, ("sleepqueue chain still locked"));
#endif /* __rtems__ */
- return (0);
- }
+ cancelled = ((flags & CS_EXECUTING) != 0);
+ } else
+ cancelled = 1;
+
#ifndef __rtems__
if (sq_locked)
- sleepq_release(&cc->cc_waiting);
+ sleepq_release(&cc_exec_waiting(cc, direct));
#endif /* __rtems__ */
- c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+ if ((c->c_iflags & CALLOUT_PENDING) == 0) {
+ CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
+ c, c->c_func, c->c_arg);
+ /*
+ * For not scheduled and not executing callout return
+ * negative value.
+ */
+ if (cc_exec_curr(cc, direct) != c)
+ cancelled = -1;
+ CC_UNLOCK(cc);
+ return (cancelled);
+ }
+
+ c->c_iflags &= ~CALLOUT_PENDING;
+ c->c_flags &= ~CALLOUT_ACTIVE;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
- if (cc->cc_next == c)
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
- c_links.tqe);
+ if (not_on_a_list == 0) {
+#ifndef __rtems__
+ if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
+#endif /* __rtems__ */
+ if (cc_exec_next(cc) == c)
+ cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
+ LIST_REMOVE(c, c_links.le);
+#ifndef __rtems__
+ } else {
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+ }
+#endif /* __rtems__ */
+ }
callout_cc_del(c, cc);
-
CC_UNLOCK(cc);
- return (1);
+ return (cancelled);
}
void
-callout_init(c, mpsafe)
- struct callout *c;
- int mpsafe;
+callout_init(struct callout *c, int mpsafe)
{
bzero(c, sizeof *c);
if (mpsafe) {
c->c_lock = NULL;
- c->c_flags = CALLOUT_RETURNUNLOCKED;
+ c->c_iflags = CALLOUT_RETURNUNLOCKED;
} else {
c->c_lock = &Giant.lock_object;
- c->c_flags = 0;
+ c->c_iflags = 0;
}
c->c_cpu = timeout_cpu;
}
void
-_callout_init_lock(c, lock, flags)
- struct callout *c;
- struct lock_object *lock;
- int flags;
+_callout_init_lock(struct callout *c, struct lock_object *lock, int flags)
{
bzero(c, sizeof *c);
c->c_lock = lock;
@@ -1164,7 +1560,7 @@ _callout_init_lock(c, lock, flags)
KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
(LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
__func__));
- c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
+ c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
c->c_cpu = timeout_cpu;
}
@@ -1181,12 +1577,11 @@ _callout_init_lock(c, lock, flags)
* which set the timer can do the maintanence the timer was for as close
* as possible to the originally intended time. Testing this code for a
* week showed that resuming from a suspend resulted in 22 to 25 timers
- * firing, which seemed independant on whether the suspend was 2 hours or
+ * firing, which seemed independent on whether the suspend was 2 hours or
* 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu>
*/
void
-adjust_timeout_calltodo(time_change)
- struct timeval *time_change;
+adjust_timeout_calltodo(struct timeval *time_change)
{
register struct callout *p;
unsigned long delta_ticks;
@@ -1200,11 +1595,11 @@ adjust_timeout_calltodo(time_change)
if (time_change->tv_sec < 0)
return;
else if (time_change->tv_sec <= LONG_MAX / 1000000)
- delta_ticks = (time_change->tv_sec * 1000000 +
- time_change->tv_usec + (tick - 1)) / tick + 1;
+ delta_ticks = howmany(time_change->tv_sec * 1000000 +
+ time_change->tv_usec, tick) + 1;
else if (time_change->tv_sec <= LONG_MAX / hz)
delta_ticks = time_change->tv_sec * hz +
- (time_change->tv_usec + (tick - 1)) / tick + 1;
+ howmany(time_change->tv_usec, tick) + 1;
else
delta_ticks = LONG_MAX;
@@ -1233,3 +1628,152 @@ adjust_timeout_calltodo(time_change)
return;
}
#endif /* APM_FIXUP_CALLTODO */
+
+static int
+flssbt(sbintime_t sbt)
+{
+
+ sbt += (uint64_t)sbt >> 1;
+ if (sizeof(long) >= sizeof(sbintime_t))
+ return (flsl(sbt));
+ if (sbt >= SBT_1S)
+ return (flsl(((uint64_t)sbt) >> 32) + 32);
+ return (flsl(sbt));
+}
+
+/*
+ * Dump immediate statistic snapshot of the scheduled callouts.
+ */
+static int
+sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
+{
+ struct callout *tmp;
+ struct callout_cpu *cc;
+ struct callout_list *sc;
+ sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
+ int ct[64], cpr[64], ccpbk[32];
+ int error, val, i, count, tcum, pcum, maxc, c, medc;
+#ifdef SMP
+ int cpu;
+#endif
+
+ val = 0;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ count = maxc = 0;
+ st = spr = maxt = maxpr = 0;
+ bzero(ccpbk, sizeof(ccpbk));
+ bzero(ct, sizeof(ct));
+ bzero(cpr, sizeof(cpr));
+ now = sbinuptime();
+#ifdef SMP
+ CPU_FOREACH(cpu) {
+ cc = CC_CPU(cpu);
+#else
+ cc = CC_CPU(timeout_cpu);
+#endif
+ CC_LOCK(cc);
+ for (i = 0; i < callwheelsize; i++) {
+ sc = &cc->cc_callwheel[i];
+ c = 0;
+ LIST_FOREACH(tmp, sc, c_links.le) {
+ c++;
+ t = tmp->c_time - now;
+ if (t < 0)
+ t = 0;
+ st += t / SBT_1US;
+ spr += tmp->c_precision / SBT_1US;
+ if (t > maxt)
+ maxt = t;
+ if (tmp->c_precision > maxpr)
+ maxpr = tmp->c_precision;
+ ct[flssbt(t)]++;
+ cpr[flssbt(tmp->c_precision)]++;
+ }
+ if (c > maxc)
+ maxc = c;
+ ccpbk[fls(c + c / 2)]++;
+ count += c;
+ }
+ CC_UNLOCK(cc);
+#ifdef SMP
+ }
+#endif
+
+ for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
+ tcum += ct[i];
+ medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+ for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
+ pcum += cpr[i];
+ medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+ for (i = 0, c = 0; i < 32 && c < count / 2; i++)
+ c += ccpbk[i];
+ medc = (i >= 2) ? (1 << (i - 2)) : 0;
+
+ printf("Scheduled callouts statistic snapshot:\n");
+ printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n",
+ count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
+ printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n",
+ medc,
+ count / callwheelsize / mp_ncpus,
+ (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
+ maxc);
+ printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+ medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
+ (st / count) / 1000000, (st / count) % 1000000,
+ maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
+ printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+ medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
+ (spr / count) / 1000000, (spr / count) % 1000000,
+ maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
+ printf(" Distribution: \tbuckets\t time\t tcum\t"
+ " prec\t pcum\n");
+ for (i = 0, tcum = pcum = 0; i < 64; i++) {
+ if (ct[i] == 0 && cpr[i] == 0)
+ continue;
+ t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
+ tcum += ct[i];
+ pcum += cpr[i];
+ printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
+ t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
+ i - 1 - (32 - CC_HASH_SHIFT),
+ ct[i], tcum, cpr[i], pcum);
+ }
+ return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_kern_callout_stat, "I",
+ "Dump immediate statistic snapshot of the scheduled callouts");
+
+#ifdef DDB
+static void
+_show_callout(struct callout *c)
+{
+
+ db_printf("callout %p\n", c);
+#define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e);
+ db_printf(" &c_links = %p\n", &(c->c_links));
+ C_DB_PRINTF("%" PRId64, c_time);
+ C_DB_PRINTF("%" PRId64, c_precision);
+ C_DB_PRINTF("%p", c_arg);
+ C_DB_PRINTF("%p", c_func);
+ C_DB_PRINTF("%p", c_lock);
+ C_DB_PRINTF("%#x", c_flags);
+ C_DB_PRINTF("%#x", c_iflags);
+ C_DB_PRINTF("%d", c_cpu);
+#undef C_DB_PRINTF
+}
+
+DB_SHOW_COMMAND(callout, db_show_callout)
+{
+
+ if (!have_addr) {
+ db_printf("usage: show callout <struct callout *>\n");
+ return;
+ }
+
+ _show_callout((struct callout *)addr);
+}
+#endif /* DDB */
diff --git a/freebsd/sys/kern/kern_uuid.c b/freebsd/sys/kern/kern_uuid.c
new file mode 100644
index 00000000..ebcdfab8
--- /dev/null
+++ b/freebsd/sys/kern/kern_uuid.c
@@ -0,0 +1,430 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sbuf.h>
+#include <sys/socket.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/jail.h>
+#include <sys/uuid.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+
+/*
+ * See also:
+ * http://www.opengroup.org/dce/info/draft-leach-uuids-guids-01.txt
+ * http://www.opengroup.org/onlinepubs/009629399/apdxa.htm
+ *
+ * Note that the generator state is itself an UUID, but the time and clock
+ * sequence fields are written in the native byte order.
+ */
+
+CTASSERT(sizeof(struct uuid) == 16);
+
+/* We use an alternative, more convenient representation in the generator. */
+struct uuid_private {
+ union {
+ uint64_t ll; /* internal. */
+ struct {
+ uint32_t low;
+ uint16_t mid;
+ uint16_t hi;
+ } x;
+ } time;
+ uint16_t seq; /* Big-endian. */
+ uint16_t node[UUID_NODE_LEN>>1];
+};
+
+CTASSERT(sizeof(struct uuid_private) == 16);
+
+struct uuid_macaddr {
+ uint16_t state;
+#define UUID_ETHER_EMPTY 0
+#define UUID_ETHER_RANDOM 1
+#define UUID_ETHER_UNIQUE 2
+ uint16_t node[UUID_NODE_LEN>>1];
+};
+
+static struct uuid_private uuid_last;
+
+#define UUID_NETHER 4
+static struct uuid_macaddr uuid_ether[UUID_NETHER];
+
+static struct mtx uuid_mutex;
+MTX_SYSINIT(uuid_lock, &uuid_mutex, "UUID generator mutex lock", MTX_DEF);
+
+/*
+ * Return the first MAC address added in the array. If it's empty, then
+ * construct a sufficiently random multicast MAC address first. Any
+ * addresses added later will bump the random MAC address up tp the next
+ * index.
+ */
+static void
+uuid_node(uint16_t *node)
+{
+ int i;
+
+ if (uuid_ether[0].state == UUID_ETHER_EMPTY) {
+ for (i = 0; i < (UUID_NODE_LEN>>1); i++)
+ uuid_ether[0].node[i] = (uint16_t)arc4random();
+ *((uint8_t*)uuid_ether[0].node) |= 0x01;
+ uuid_ether[0].state = UUID_ETHER_RANDOM;
+ }
+ for (i = 0; i < (UUID_NODE_LEN>>1); i++)
+ node[i] = uuid_ether[0].node[i];
+}
+
+/*
+ * Get the current time as a 60 bit count of 100-nanosecond intervals
+ * since 00:00:00.00, October 15,1582. We apply a magic offset to convert
+ * the Unix time since 00:00:00.00, January 1, 1970 to the date of the
+ * Gregorian reform to the Christian calendar.
+ */
+static uint64_t
+uuid_time(void)
+{
+ struct bintime bt;
+ uint64_t time = 0x01B21DD213814000LL;
+
+ bintime(&bt);
+ time += (uint64_t)bt.sec * 10000000LL;
+ time += (10000000LL * (uint32_t)(bt.frac >> 32)) >> 32;
+ return (time & ((1LL << 60) - 1LL));
+}
+
+struct uuid *
+kern_uuidgen(struct uuid *store, size_t count)
+{
+ struct uuid_private uuid;
+ uint64_t time;
+ size_t n;
+
+ mtx_lock(&uuid_mutex);
+
+ uuid_node(uuid.node);
+ time = uuid_time();
+
+ if (uuid_last.time.ll == 0LL || uuid_last.node[0] != uuid.node[0] ||
+ uuid_last.node[1] != uuid.node[1] ||
+ uuid_last.node[2] != uuid.node[2])
+ uuid.seq = (uint16_t)arc4random() & 0x3fff;
+ else if (uuid_last.time.ll >= time)
+ uuid.seq = (uuid_last.seq + 1) & 0x3fff;
+ else
+ uuid.seq = uuid_last.seq;
+
+ uuid_last = uuid;
+ uuid_last.time.ll = (time + count - 1) & ((1LL << 60) - 1LL);
+
+ mtx_unlock(&uuid_mutex);
+
+ /* Set sequence and variant and deal with byte order. */
+ uuid.seq = htobe16(uuid.seq | 0x8000);
+
+ for (n = 0; n < count; n++) {
+ /* Set time and version (=1). */
+ uuid.time.x.low = (uint32_t)time;
+ uuid.time.x.mid = (uint16_t)(time >> 32);
+ uuid.time.x.hi = ((uint16_t)(time >> 48) & 0xfff) | (1 << 12);
+ store[n] = *(struct uuid *)&uuid;
+ time++;
+ }
+
+ return (store);
+}
+
+#ifndef __rtems__
+#ifndef _SYS_SYSPROTO_H_
+struct uuidgen_args {
+ struct uuid *store;
+ int count;
+};
+#endif
+int
+sys_uuidgen(struct thread *td, struct uuidgen_args *uap)
+{
+ struct uuid *store;
+ size_t count;
+ int error;
+
+ /*
+ * Limit the number of UUIDs that can be created at the same time
+ * to some arbitrary number. This isn't really necessary, but I
+ * like to have some sort of upper-bound that's less than 2G :-)
+ * XXX probably needs to be tunable.
+ */
+ if (uap->count < 1 || uap->count > 2048)
+ return (EINVAL);
+
+ count = uap->count;
+ store = malloc(count * sizeof(struct uuid), M_TEMP, M_WAITOK);
+ kern_uuidgen(store, count);
+ error = copyout(store, uap->store, count * sizeof(struct uuid));
+ free(store, M_TEMP);
+ return (error);
+}
+#endif /* __rtems__ */
+
+int
+uuid_ether_add(const uint8_t *addr)
+{
+ int i, sum;
+
+ /*
+ * Validate input. No multicast (flag 0x1), no locally administered
+ * (flag 0x2) and no 'all-zeroes' addresses.
+ */
+ if (addr[0] & 0x03)
+ return (EINVAL);
+ sum = 0;
+ for (i = 0; i < UUID_NODE_LEN; i++)
+ sum += addr[i];
+ if (sum == 0)
+ return (EINVAL);
+
+ mtx_lock(&uuid_mutex);
+
+ /* Make sure the MAC isn't known already and that there's space. */
+ i = 0;
+ while (i < UUID_NETHER && uuid_ether[i].state == UUID_ETHER_UNIQUE) {
+ if (!bcmp(addr, uuid_ether[i].node, UUID_NODE_LEN)) {
+ mtx_unlock(&uuid_mutex);
+ return (EEXIST);
+ }
+ i++;
+ }
+ if (i == UUID_NETHER) {
+ mtx_unlock(&uuid_mutex);
+ return (ENOSPC);
+ }
+
+ /* Insert MAC at index, moving the non-empty entry if possible. */
+ if (uuid_ether[i].state == UUID_ETHER_RANDOM && i < UUID_NETHER - 1)
+ uuid_ether[i + 1] = uuid_ether[i];
+ uuid_ether[i].state = UUID_ETHER_UNIQUE;
+ bcopy(addr, uuid_ether[i].node, UUID_NODE_LEN);
+ mtx_unlock(&uuid_mutex);
+ return (0);
+}
+
+int
+uuid_ether_del(const uint8_t *addr)
+{
+ int i;
+
+ mtx_lock(&uuid_mutex);
+ i = 0;
+ while (i < UUID_NETHER && uuid_ether[i].state == UUID_ETHER_UNIQUE &&
+ bcmp(addr, uuid_ether[i].node, UUID_NODE_LEN))
+ i++;
+ if (i == UUID_NETHER || uuid_ether[i].state != UUID_ETHER_UNIQUE) {
+ mtx_unlock(&uuid_mutex);
+ return (ENOENT);
+ }
+
+ /* Remove it by shifting higher index entries down. */
+ while (i < UUID_NETHER - 1 && uuid_ether[i].state != UUID_ETHER_EMPTY) {
+ uuid_ether[i] = uuid_ether[i + 1];
+ i++;
+ }
+ if (uuid_ether[i].state != UUID_ETHER_EMPTY) {
+ uuid_ether[i].state = UUID_ETHER_EMPTY;
+ bzero(uuid_ether[i].node, UUID_NODE_LEN);
+ }
+ mtx_unlock(&uuid_mutex);
+ return (0);
+}
+
+int
+snprintf_uuid(char *buf, size_t sz, struct uuid *uuid)
+{
+ struct uuid_private *id;
+ int cnt;
+
+ id = (struct uuid_private *)uuid;
+ cnt = snprintf(buf, sz, "%08x-%04x-%04x-%04x-%04x%04x%04x",
+ id->time.x.low, id->time.x.mid, id->time.x.hi, be16toh(id->seq),
+ be16toh(id->node[0]), be16toh(id->node[1]), be16toh(id->node[2]));
+ return (cnt);
+}
+
+int
+printf_uuid(struct uuid *uuid)
+{
+ char buf[38];
+
+ snprintf_uuid(buf, sizeof(buf), uuid);
+ return (printf("%s", buf));
+}
+
+int
+sbuf_printf_uuid(struct sbuf *sb, struct uuid *uuid)
+{
+ char buf[38];
+
+ snprintf_uuid(buf, sizeof(buf), uuid);
+ return (sbuf_printf(sb, "%s", buf));
+}
+
+/*
+ * Encode/Decode UUID into byte-stream.
+ * http://www.opengroup.org/dce/info/draft-leach-uuids-guids-01.txt
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | time_low |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | time_mid | time_hi_and_version |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |clk_seq_hi_res | clk_seq_low | node (0-1) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | node (2-5) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+void
+le_uuid_enc(void *buf, struct uuid const *uuid)
+{
+ u_char *p;
+ int i;
+
+ p = buf;
+ le32enc(p, uuid->time_low);
+ le16enc(p + 4, uuid->time_mid);
+ le16enc(p + 6, uuid->time_hi_and_version);
+ p[8] = uuid->clock_seq_hi_and_reserved;
+ p[9] = uuid->clock_seq_low;
+ for (i = 0; i < _UUID_NODE_LEN; i++)
+ p[10 + i] = uuid->node[i];
+}
+
+void
+le_uuid_dec(void const *buf, struct uuid *uuid)
+{
+ u_char const *p;
+ int i;
+
+ p = buf;
+ uuid->time_low = le32dec(p);
+ uuid->time_mid = le16dec(p + 4);
+ uuid->time_hi_and_version = le16dec(p + 6);
+ uuid->clock_seq_hi_and_reserved = p[8];
+ uuid->clock_seq_low = p[9];
+ for (i = 0; i < _UUID_NODE_LEN; i++)
+ uuid->node[i] = p[10 + i];
+}
+
+void
+be_uuid_enc(void *buf, struct uuid const *uuid)
+{
+ u_char *p;
+ int i;
+
+ p = buf;
+ be32enc(p, uuid->time_low);
+ be16enc(p + 4, uuid->time_mid);
+ be16enc(p + 6, uuid->time_hi_and_version);
+ p[8] = uuid->clock_seq_hi_and_reserved;
+ p[9] = uuid->clock_seq_low;
+ for (i = 0; i < _UUID_NODE_LEN; i++)
+ p[10 + i] = uuid->node[i];
+}
+
+void
+be_uuid_dec(void const *buf, struct uuid *uuid)
+{
+ u_char const *p;
+ int i;
+
+ p = buf;
+ uuid->time_low = be32dec(p);
+ uuid->time_mid = be16dec(p + 4);
+ uuid->time_hi_and_version = be16dec(p + 6);
+ uuid->clock_seq_hi_and_reserved = p[8];
+ uuid->clock_seq_low = p[9];
+ for (i = 0; i < _UUID_NODE_LEN; i++)
+ uuid->node[i] = p[10 + i];
+}
+
+int
+parse_uuid(const char *str, struct uuid *uuid)
+{
+ u_int c[11];
+ int n;
+
+ /* An empty string represents a nil UUID. */
+ if (*str == '\0') {
+ bzero(uuid, sizeof(*uuid));
+ return (0);
+ }
+
+ /* The UUID string representation has a fixed length. */
+ if (strlen(str) != 36)
+ return (EINVAL);
+
+ /*
+ * We only work with "new" UUIDs. New UUIDs have the form:
+ * 01234567-89ab-cdef-0123-456789abcdef
+ * The so called "old" UUIDs, which we don't support, have the form:
+ * 0123456789ab.cd.ef.01.23.45.67.89.ab
+ */
+ if (str[8] != '-')
+ return (EINVAL);
+
+ n = sscanf(str, "%8x-%4x-%4x-%2x%2x-%2x%2x%2x%2x%2x%2x", c + 0, c + 1,
+ c + 2, c + 3, c + 4, c + 5, c + 6, c + 7, c + 8, c + 9, c + 10);
+ /* Make sure we have all conversions. */
+ if (n != 11)
+ return (EINVAL);
+
+ /* Successful scan. Build the UUID. */
+ uuid->time_low = c[0];
+ uuid->time_mid = c[1];
+ uuid->time_hi_and_version = c[2];
+ uuid->clock_seq_hi_and_reserved = c[3];
+ uuid->clock_seq_low = c[4];
+ for (n = 0; n < 6; n++)
+ uuid->node[n] = c[n + 5];
+
+ /* Check semantics... */
+ return (((c[3] & 0x80) != 0x00 && /* variant 0? */
+ (c[3] & 0xc0) != 0x80 && /* variant 1? */
+ (c[3] & 0xe0) != 0xc0) ? EINVAL : 0); /* variant 2? */
+}
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index 3d7a1629..3eb7d7e9 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -42,27 +42,33 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/poll.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#include <sys/queue.h>
#include <machine/bus.h>
+#include <sys/random.h>
#include <sys/rman.h>
#include <sys/selinfo.h>
#include <sys/signalvar.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/uio.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <rtems/bsd/sys/cpuset.h>
#include <net/vnet.h>
+#include <machine/cpu.h>
#include <machine/stdarg.h>
#include <vm/uma.h>
+#include <vm/vm.h>
SYSCTL_NODE(_hw, OID_AUTO, bus, CTLFLAG_RW, NULL, NULL);
-SYSCTL_NODE(, OID_AUTO, dev, CTLFLAG_RW, NULL, NULL);
+SYSCTL_ROOT_NODE(OID_AUTO, dev, CTLFLAG_RW, NULL, NULL);
/*
* Used to attach drivers to devclasses.
@@ -126,14 +132,6 @@ struct device {
device_state_t state; /**< current device state */
uint32_t devflags; /**< api level flags for device_get_flags() */
u_int flags; /**< internal device flags */
-#define DF_ENABLED 0x01 /* device should be probed/attached */
-#define DF_FIXEDCLASS 0x02 /* devclass specified at create time */
-#define DF_WILDCARD 0x04 /* unit was originally wildcard */
-#define DF_DESCMALLOCED 0x08 /* description was malloced */
-#define DF_QUIET 0x10 /* don't print verbose attach message */
-#define DF_DONENOMATCH 0x20 /* don't execute DEVICE_NOMATCH again */
-#define DF_EXTERNALSOFTC 0x40 /* softc not allocated by us */
-#define DF_REBID 0x80 /* Can rebid after attach */
u_int order; /**< order from device_add_child_ordered() */
void *ivars; /**< instance variables */
void *softc; /**< current driver's variables */
@@ -145,14 +143,15 @@ struct device {
static MALLOC_DEFINE(M_BUS, "bus", "Bus data structures");
static MALLOC_DEFINE(M_BUS_SC, "bus-sc", "Bus data structures, softc");
+#ifndef __rtems__
+static void devctl2_init(void);
+#endif /* __rtems__ */
+
#ifdef BUS_DEBUG
static int bus_debug = 1;
-#ifndef __rtems__
-TUNABLE_INT("bus.debug", &bus_debug);
-SYSCTL_INT(_debug, OID_AUTO, bus_debug, CTLFLAG_RW, &bus_debug, 0,
- "Debug bus code");
-#endif /* __rtems__ */
+SYSCTL_INT(_debug, OID_AUTO, bus_debug, CTLFLAG_RWTUN, &bus_debug, 0,
+ "Bus debug level");
#define PDEBUG(a) if (bus_debug) {printf("%s:%d: ", __func__, __LINE__), printf a; printf("\n");}
#define DEVICENAME(d) ((d)? device_get_name(d): "no device")
@@ -218,7 +217,7 @@ devclass_sysctl_handler(SYSCTL_HANDLER_ARGS)
default:
return (EINVAL);
}
- return (SYSCTL_OUT(req, value, strlen(value)));
+ return (SYSCTL_OUT_STR(req, value));
}
static void
@@ -275,7 +274,7 @@ device_sysctl_handler(SYSCTL_HANDLER_ARGS)
default:
return (EINVAL);
}
- error = SYSCTL_OUT(req, value, strlen(value));
+ error = SYSCTL_OUT_STR(req, value);
if (buf != NULL)
free(buf, M_BUS);
return (error);
@@ -285,6 +284,7 @@ static void
device_sysctl_init(device_t dev)
{
devclass_t dc = dev->devclass;
+ int domain;
if (dev->sysctl_tree != NULL)
return;
@@ -314,6 +314,10 @@ device_sysctl_init(device_t dev)
OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD,
dev, DEVICE_SYSCTL_PARENT, device_sysctl_handler, "A",
"parent device");
+ if (bus_get_domain(dev, &domain) == 0)
+ SYSCTL_ADD_INT(&dev->sysctl_ctx,
+ SYSCTL_CHILDREN(dev->sysctl_tree), OID_AUTO, "%domain",
+ CTLFLAG_RD, NULL, domain, "NUMA domain");
}
static void
@@ -361,9 +365,9 @@ device_sysctl_fini(device_t dev)
#ifndef __rtems__
/* Deprecated way to adjust queue length */
static int sysctl_devctl_disable(SYSCTL_HANDLER_ARGS);
-/* XXX Need to support old-style tunable hw.bus.devctl_disable" */
-SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_disable, CTLTYPE_INT | CTLFLAG_RW, NULL,
- 0, sysctl_devctl_disable, "I", "devctl disable -- deprecated");
+SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_disable, CTLTYPE_INT | CTLFLAG_RWTUN |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_disable, "I",
+ "devctl disable -- deprecated");
#endif /* __rtems__ */
#define DEVCTL_DEFAULT_QUEUE_LEN 1000
@@ -372,24 +376,24 @@ static int sysctl_devctl_queue(SYSCTL_HANDLER_ARGS);
#endif /* __rtems__ */
static int devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
#ifndef __rtems__
-TUNABLE_INT("hw.bus.devctl_queue", &devctl_queue_length);
-SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RW, NULL,
- 0, sysctl_devctl_queue, "I", "devctl queue length");
+SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RWTUN |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_queue, "I", "devctl queue length");
static d_open_t devopen;
static d_close_t devclose;
static d_read_t devread;
static d_ioctl_t devioctl;
static d_poll_t devpoll;
+static d_kqfilter_t devkqfilter;
static struct cdevsw dev_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_NEEDGIANT,
.d_open = devopen,
.d_close = devclose,
.d_read = devread,
.d_ioctl = devioctl,
.d_poll = devpoll,
+ .d_kqfilter = devkqfilter,
.d_name = "devctl",
};
@@ -406,13 +410,23 @@ static struct dev_softc
int inuse;
int nonblock;
int queued;
+ int async;
struct mtx mtx;
struct cv cv;
struct selinfo sel;
struct devq devq;
- struct proc *async_proc;
+ struct sigio *sigio;
} devsoftc;
+static void filt_devctl_detach(struct knote *kn);
+static int filt_devctl_read(struct knote *kn, long hint);
+
+struct filterops devctl_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_devctl_detach,
+ .f_event = filt_devctl_read,
+};
+
static struct cdev *devctl_dev;
#else /* __rtems__ */
#define devctl_disable 0
@@ -427,6 +441,8 @@ devinit(void)
mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
cv_init(&devsoftc.cv, "dev cv");
TAILQ_INIT(&devsoftc.devq);
+ knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
+ devctl2_init();
#endif /* __rtems__ */
}
@@ -434,23 +450,29 @@ devinit(void)
static int
devopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
- if (devsoftc.inuse)
+
+ mtx_lock(&devsoftc.mtx);
+ if (devsoftc.inuse) {
+ mtx_unlock(&devsoftc.mtx);
return (EBUSY);
+ }
/* move to init */
devsoftc.inuse = 1;
- devsoftc.nonblock = 0;
- devsoftc.async_proc = NULL;
+ mtx_unlock(&devsoftc.mtx);
return (0);
}
static int
devclose(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
- devsoftc.inuse = 0;
+
mtx_lock(&devsoftc.mtx);
+ devsoftc.inuse = 0;
+ devsoftc.nonblock = 0;
+ devsoftc.async = 0;
cv_broadcast(&devsoftc.cv);
+ funsetown(&devsoftc.sigio);
mtx_unlock(&devsoftc.mtx);
- devsoftc.async_proc = NULL;
return (0);
}
@@ -506,17 +528,20 @@ devioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *t
return (0);
case FIOASYNC:
if (*(int*)data)
- devsoftc.async_proc = td->td_proc;
+ devsoftc.async = 1;
else
- devsoftc.async_proc = NULL;
+ devsoftc.async = 0;
+ return (0);
+ case FIOSETOWN:
+ return fsetown(*(int *)data, &devsoftc.sigio);
+ case FIOGETOWN:
+ *(int *)data = fgetown(&devsoftc.sigio);
return (0);
/* (un)Support for other fcntl() calls. */
case FIOCLEX:
case FIONCLEX:
case FIONREAD:
- case FIOSETOWN:
- case FIOGETOWN:
default:
break;
}
@@ -540,6 +565,34 @@ devpoll(struct cdev *dev, int events, struct thread *td)
return (revents);
}
+static int
+devkqfilter(struct cdev *dev, struct knote *kn)
+{
+ int error;
+
+ if (kn->kn_filter == EVFILT_READ) {
+ kn->kn_fop = &devctl_rfiltops;
+ knlist_add(&devsoftc.sel.si_note, kn, 0);
+ error = 0;
+ } else
+ error = EINVAL;
+ return (error);
+}
+
+static void
+filt_devctl_detach(struct knote *kn)
+{
+
+ knlist_remove(&devsoftc.sel.si_note, kn, 0);
+}
+
+static int
+filt_devctl_read(struct knote *kn, long hint)
+{
+ kn->kn_data = devsoftc.queued;
+ return (kn->kn_data != 0);
+}
+
/**
* @brief Return whether the userland process is running
*/
@@ -562,7 +615,6 @@ devctl_queue_data_f(char *data, int flags)
{
#ifndef __rtems__
struct dev_event_info *n1 = NULL, *n2 = NULL;
- struct proc *p;
#endif /* __rtems__ */
if (strlen(data) == 0)
@@ -592,14 +644,11 @@ devctl_queue_data_f(char *data, int flags)
TAILQ_INSERT_TAIL(&devsoftc.devq, n1, dei_link);
devsoftc.queued++;
cv_broadcast(&devsoftc.cv);
+ KNOTE_LOCKED(&devsoftc.sel.si_note, 0);
mtx_unlock(&devsoftc.mtx);
selwakeup(&devsoftc.sel);
- p = devsoftc.async_proc;
- if (p != NULL) {
- PROC_LOCK(p);
- kern_psignal(p, SIGIO);
- PROC_UNLOCK(p);
- }
+ if (devsoftc.async && devsoftc.sigio != NULL)
+ pgsigio(&devsoftc.sigio, SIGIO, 0);
return;
#endif /* __rtems__ */
out:
@@ -765,11 +814,12 @@ sysctl_devctl_disable(SYSCTL_HANDLER_ARGS)
struct dev_event_info *n1;
int dis, error;
- dis = devctl_queue_length == 0;
+ dis = (devctl_queue_length == 0);
error = sysctl_handle_int(oidp, &dis, 0, req);
if (error || !req->newptr)
return (error);
- mtx_lock(&devsoftc.mtx);
+ if (mtx_initialized(&devsoftc.mtx))
+ mtx_lock(&devsoftc.mtx);
if (dis) {
while (!TAILQ_EMPTY(&devsoftc.devq)) {
n1 = TAILQ_FIRST(&devsoftc.devq);
@@ -782,7 +832,8 @@ sysctl_devctl_disable(SYSCTL_HANDLER_ARGS)
} else {
devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
}
- mtx_unlock(&devsoftc.mtx);
+ if (mtx_initialized(&devsoftc.mtx))
+ mtx_unlock(&devsoftc.mtx);
return (0);
}
@@ -798,7 +849,8 @@ sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
return (error);
if (q < 0)
return (EINVAL);
- mtx_lock(&devsoftc.mtx);
+ if (mtx_initialized(&devsoftc.mtx))
+ mtx_lock(&devsoftc.mtx);
devctl_queue_length = q;
while (devsoftc.queued > devctl_queue_length) {
n1 = TAILQ_FIRST(&devsoftc.devq);
@@ -807,10 +859,43 @@ sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
free(n1, M_BUS);
devsoftc.queued--;
}
- mtx_unlock(&devsoftc.mtx);
+ if (mtx_initialized(&devsoftc.mtx))
+ mtx_unlock(&devsoftc.mtx);
return (0);
}
+/**
+ * @brief safely quotes strings that might have double quotes in them.
+ *
+ * The devctl protocol relies on quoted strings having matching quotes.
+ * This routine quotes any internal quotes so the resulting string
+ * is safe to pass to snprintf to construct, for example pnp info strings.
+ * Strings are always terminated with a NUL, but may be truncated if longer
+ * than @p len bytes after quotes.
+ *
+ * @param dst Buffer to hold the string. Must be at least @p len bytes long
+ * @param src Original buffer.
+ * @param len Length of buffer pointed to by @dst, including trailing NUL
+ */
+void
+devctl_safe_quote(char *dst, const char *src, size_t len)
+{
+ char *walker = dst, *ep = dst + len - 1;
+
+ if (len == 0)
+ return;
+ while (src != NULL && walker < ep)
+ {
+ if (*src == '"' || *src == '\\') {
+ if (ep - walker < 2)
+ break;
+ *walker++ = '\\';
+ }
+ *walker++ = *src++;
+ }
+ *walker = '\0';
+}
+
/* End of /dev/devctl code */
#endif /* __rtems__ */
@@ -1566,7 +1651,9 @@ devclass_get_sysctl_tree(devclass_t dc)
static int
devclass_alloc_unit(devclass_t dc, device_t dev, int *unitp)
{
+#ifndef __rtems__
const char *s;
+#endif /* __rtems__ */
int unit = *unitp;
PDEBUG(("unit %d in devclass %s", unit, DEVCLANAME(dc)));
@@ -1739,7 +1826,7 @@ make_device(device_t parent, const char *name, int unit)
dc = NULL;
}
- dev = malloc(sizeof(struct device), M_BUS, M_NOWAIT|M_ZERO);
+ dev = malloc(sizeof(*dev), M_BUS, M_NOWAIT|M_ZERO);
if (!dev)
return (NULL);
@@ -2055,9 +2142,15 @@ device_probe_child(device_t dev, device_t child)
if (!hasclass) {
if (device_set_devclass(child,
dl->driver->name) != 0) {
+ char const * devname =
+ device_get_name(child);
+ if (devname == NULL)
+ devname = "(unknown)";
printf("driver bug: Unable to set "
- "devclass (devname: %s)\n",
- device_get_name(child));
+ "devclass (class: %s "
+ "devname: %s)\n",
+ dl->driver->name,
+ devname);
(void)device_set_driver(child, NULL);
continue;
}
@@ -2087,6 +2180,16 @@ device_probe_child(device_t dev, device_t child)
}
/*
+ * Probes that return BUS_PROBE_NOWILDCARD or lower
+ * only match on devices whose driver was explicitly
+ * specified.
+ */
+ if (result <= BUS_PROBE_NOWILDCARD &&
+ !(child->flags & DF_FIXEDCLASS)) {
+ result = ENXIO;
+ }
+
+ /*
* The driver returned an error so it
* certainly doesn't match.
*/
@@ -2101,14 +2204,6 @@ device_probe_child(device_t dev, device_t child)
* of pri for the first match.
*/
if (best == NULL || result > pri) {
- /*
- * Probes that return BUS_PROBE_NOWILDCARD
- * or lower only match when they are set
- * in stone by the parent bus.
- */
- if (result <= BUS_PROBE_NOWILDCARD &&
- child->flags & DF_WILDCARD)
- continue;
best = dl;
pri = result;
continue;
@@ -2619,6 +2714,15 @@ device_is_attached(device_t dev)
}
/**
+ * @brief Return non-zero if the device is currently suspended.
+ */
+int
+device_is_suspended(device_t dev)
+{
+ return ((dev->flags & DF_SUSPENDED) != 0);
+}
+
+/**
* @brief Set the devclass of a device
* @see devclass_add_device().
*/
@@ -2650,6 +2754,25 @@ device_set_devclass(device_t dev, const char *classname)
}
/**
+ * @brief Set the devclass of a device and mark the devclass fixed.
+ * @see device_set_devclass()
+ */
+int
+device_set_devclass_fixed(device_t dev, const char *classname)
+{
+ int error;
+
+ if (classname == NULL)
+ return (EINVAL);
+
+ error = device_set_devclass(dev, classname);
+ if (error)
+ return (error);
+ dev->flags |= DF_FIXEDCLASS;
+ return (0);
+}
+
+/**
* @brief Set the driver of a device
*
* @retval 0 success
@@ -2794,6 +2917,7 @@ device_probe_and_attach(device_t dev)
int
device_attach(device_t dev)
{
+ uint64_t attachtime;
int error;
#ifndef __rtems__
@@ -2808,6 +2932,7 @@ device_attach(device_t dev)
device_sysctl_init(dev);
if (!device_is_quiet(dev))
device_print_child(dev->parent, dev);
+ attachtime = get_cyclecount();
dev->state = DS_ATTACHING;
if ((error = DEVICE_ATTACH(dev)) != 0) {
printf("device_attach: %s%d attach returned %d\n",
@@ -2820,6 +2945,19 @@ device_attach(device_t dev)
dev->state = DS_NOTPRESENT;
return (error);
}
+ attachtime = get_cyclecount() - attachtime;
+ /*
+ * 4 bits per device is a reasonable value for desktop and server
+ * hardware with good get_cyclecount() implementations, but WILL
+ * need to be adjusted on other platforms.
+ */
+#define RANDOM_PROBE_BIT_GUESS 4
+ if (bootverbose)
+ printf("random: harvesting attach, %zu bytes (%d bits) from %s%d\n",
+ sizeof(attachtime), RANDOM_PROBE_BIT_GUESS,
+ dev->driver->name, dev->unit);
+ random_harvest_direct(&attachtime, sizeof(attachtime),
+ RANDOM_PROBE_BIT_GUESS, RANDOM_ATTACH);
device_sysctl_update(dev);
if (dev->busy)
dev->state = DS_BUSY;
@@ -2951,6 +3089,17 @@ device_set_unit(device_t dev, int unit)
* Some useful method implementations to make life easier for bus drivers.
*/
+#ifndef __rtems__
+void
+resource_init_map_request_impl(struct resource_map_request *args, size_t sz)
+{
+
+ bzero(args, sz);
+ args->size = sz;
+ args->memattr = VM_MEMATTR_UNCACHEABLE;
+}
+#endif /* __rtems__ */
+
/**
* @brief Initialise a resource list.
*
@@ -2997,8 +3146,8 @@ resource_list_free(struct resource_list *rl)
* @param count XXX end-start+1
*/
int
-resource_list_add_next(struct resource_list *rl, int type, u_long start,
- u_long end, u_long count)
+resource_list_add_next(struct resource_list *rl, int type, rman_res_t start,
+ rman_res_t end, rman_res_t count)
{
int rid;
@@ -3026,7 +3175,7 @@ resource_list_add_next(struct resource_list *rl, int type, u_long start,
*/
struct resource_list_entry *
resource_list_add(struct resource_list *rl, int type, int rid,
- u_long start, u_long end, u_long count)
+ rman_res_t start, rman_res_t end, rman_res_t count)
{
struct resource_list_entry *rle;
@@ -3170,9 +3319,9 @@ resource_list_delete(struct resource_list *rl, int type, int rid)
* @param type the type of resource to allocate
* @param rid a pointer to the resource identifier
* @param start hint at the start of the resource range - pass
- * @c 0UL for any start address
+ * @c 0 for any start address
* @param end hint at the end of the resource range - pass
- * @c ~0UL for any end address
+ * @c ~0 for any end address
* @param count hint at the size of range required - pass @c 1
* for any size
* @param flags any extra flags to control the resource
@@ -3184,7 +3333,7 @@ resource_list_delete(struct resource_list *rl, int type, int rid)
*/
struct resource *
resource_list_reserve(struct resource_list *rl, device_t bus, device_t child,
- int type, int *rid, u_long start, u_long end, u_long count, u_int flags)
+ int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
struct resource_list_entry *rle = NULL;
int passthrough = (device_get_parent(child) != bus);
@@ -3227,9 +3376,9 @@ resource_list_reserve(struct resource_list *rl, device_t bus, device_t child,
* @param type the type of resource to allocate
* @param rid a pointer to the resource identifier
* @param start hint at the start of the resource range - pass
- * @c 0UL for any start address
+ * @c 0 for any start address
* @param end hint at the end of the resource range - pass
- * @c ~0UL for any end address
+ * @c ~0 for any end address
* @param count hint at the size of range required - pass @c 1
* for any size
* @param flags any extra flags to control the resource
@@ -3241,11 +3390,11 @@ resource_list_reserve(struct resource_list *rl, device_t bus, device_t child,
*/
struct resource *
resource_list_alloc(struct resource_list *rl, device_t bus, device_t child,
- int type, int *rid, u_long start, u_long end, u_long count, u_int flags)
+ int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
struct resource_list_entry *rle = NULL;
int passthrough = (device_get_parent(child) != bus);
- int isdefault = (start == 0UL && end == ~0UL);
+ int isdefault = RMAN_IS_DEFAULT_RANGE(start, end);
if (passthrough) {
return (BUS_ALLOC_RESOURCE(device_get_parent(bus), child,
@@ -3355,9 +3504,51 @@ resource_list_release(struct resource_list *rl, device_t bus, device_t child,
}
/**
+ * @brief Release all active resources of a given type
+ *
+ * Release all active resources of a specified type. This is intended
+ * to be used to cleanup resources leaked by a driver after detach or
+ * a failed attach.
+ *
+ * @param rl the resource list which was allocated from
+ * @param bus the parent device of @p child
+ * @param child the device whose active resources are being released
+ * @param type the type of resources to release
+ *
+ * @retval 0 success
+ * @retval EBUSY at least one resource was active
+ */
+int
+resource_list_release_active(struct resource_list *rl, device_t bus,
+ device_t child, int type)
+{
+ struct resource_list_entry *rle;
+ int error, retval;
+
+ retval = 0;
+ STAILQ_FOREACH(rle, rl, link) {
+ if (rle->type != type)
+ continue;
+ if (rle->res == NULL)
+ continue;
+ if ((rle->flags & (RLE_RESERVED | RLE_ALLOCATED)) ==
+ RLE_RESERVED)
+ continue;
+ retval = EBUSY;
+ error = resource_list_release(rl, bus, child, type,
+ rman_get_rid(rle->res), rle->res);
+ if (error != 0)
+ device_printf(bus,
+ "Failed to release active resource: %d\n", error);
+ }
+ return (retval);
+}
+
+
+/**
* @brief Fully release a reserved resource
*
- * Fully releases a resouce reserved via resource_list_reserve().
+ * Fully releases a resource reserved via resource_list_reserve().
*
* @param rl the resource list which was allocated from
* @param bus the parent device of @p child
@@ -3559,6 +3750,39 @@ bus_generic_shutdown(device_t dev)
}
/**
+ * @brief Default function for suspending a child device.
+ *
+ * This function is to be used by a bus's DEVICE_SUSPEND_CHILD().
+ */
+int
+bus_generic_suspend_child(device_t dev, device_t child)
+{
+ int error;
+
+ error = DEVICE_SUSPEND(child);
+
+ if (error == 0)
+ child->flags |= DF_SUSPENDED;
+
+ return (error);
+}
+
+/**
+ * @brief Default function for resuming a child device.
+ *
+ * This function is to be used by a bus's DEVICE_RESUME_CHILD().
+ */
+int
+bus_generic_resume_child(device_t dev, device_t child)
+{
+
+ DEVICE_RESUME(child);
+ child->flags &= ~DF_SUSPENDED;
+
+ return (0);
+}
+
+/**
* @brief Helper function for implementing DEVICE_SUSPEND()
*
* This function can be used to help implement the DEVICE_SUSPEND()
@@ -3574,12 +3798,12 @@ bus_generic_suspend(device_t dev)
device_t child, child2;
TAILQ_FOREACH(child, &dev->children, link) {
- error = DEVICE_SUSPEND(child);
+ error = BUS_SUSPEND_CHILD(dev, child);
if (error) {
for (child2 = TAILQ_FIRST(&dev->children);
child2 && child2 != child;
child2 = TAILQ_NEXT(child2, link))
- DEVICE_RESUME(child2);
+ BUS_RESUME_CHILD(dev, child2);
return (error);
}
}
@@ -3598,7 +3822,7 @@ bus_generic_resume(device_t dev)
device_t child;
TAILQ_FOREACH(child, &dev->children, link) {
- DEVICE_RESUME(child);
+ BUS_RESUME_CHILD(dev, child);
/* if resume fails, there's nothing we can usefully do... */
}
return (0);
@@ -3645,6 +3869,25 @@ bus_print_child_footer(device_t dev, device_t child)
/**
* @brief Helper function for implementing BUS_PRINT_CHILD().
*
+ * This function prints out the VM domain for the given device.
+ *
+ * @returns the number of characters printed
+ */
+int
+bus_print_child_domain(device_t dev, device_t child)
+{
+ int domain;
+
+ /* No domain? Don't print anything */
+ if (BUS_GET_DOMAIN(dev, child, &domain) != 0)
+ return (0);
+
+ return (printf(" numa-domain %d", domain));
+}
+
+/**
+ * @brief Helper function for implementing BUS_PRINT_CHILD().
+ *
* This function simply calls bus_print_child_header() followed by
* bus_print_child_footer().
*
@@ -3656,6 +3899,7 @@ bus_generic_print_child(device_t dev, device_t child)
int retval = 0;
retval += bus_print_child_header(dev, child);
+ retval += bus_print_child_domain(dev, child);
retval += bus_print_child_footer(dev, child);
return (retval);
@@ -3788,7 +4032,7 @@ bus_generic_teardown_intr(device_t dev, device_t child, struct resource *irq,
*/
int
bus_generic_adjust_resource(device_t dev, device_t child, int type,
- struct resource *r, u_long start, u_long end)
+ struct resource *r, rman_res_t start, rman_res_t end)
{
/* Propagate up the bus hierarchy until someone handles it. */
if (dev->parent)
@@ -3805,7 +4049,7 @@ bus_generic_adjust_resource(device_t dev, device_t child, int type,
*/
struct resource *
bus_generic_alloc_resource(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
/* Propagate up the bus hierarchy until someone handles it. */
if (dev->parent)
@@ -3866,6 +4110,40 @@ bus_generic_deactivate_resource(device_t dev, device_t child, int type,
}
/**
+ * @brief Helper function for implementing BUS_MAP_RESOURCE().
+ *
+ * This simple implementation of BUS_MAP_RESOURCE() simply calls the
+ * BUS_MAP_RESOURCE() method of the parent of @p dev.
+ */
+int
+bus_generic_map_resource(device_t dev, device_t child, int type,
+ struct resource *r, struct resource_map_request *args,
+ struct resource_map *map)
+{
+ /* Propagate up the bus hierarchy until someone handles it. */
+ if (dev->parent)
+ return (BUS_MAP_RESOURCE(dev->parent, child, type, r, args,
+ map));
+ return (EINVAL);
+}
+
+/**
+ * @brief Helper function for implementing BUS_UNMAP_RESOURCE().
+ *
+ * This simple implementation of BUS_UNMAP_RESOURCE() simply calls the
+ * BUS_UNMAP_RESOURCE() method of the parent of @p dev.
+ */
+int
+bus_generic_unmap_resource(device_t dev, device_t child, int type,
+ struct resource *r, struct resource_map *map)
+{
+ /* Propagate up the bus hierarchy until someone handles it. */
+ if (dev->parent)
+ return (BUS_UNMAP_RESOURCE(dev->parent, child, type, r, map));
+ return (EINVAL);
+}
+
+/**
* @brief Helper function for implementing BUS_BIND_INTR().
*
* This simple implementation of BUS_BIND_INTR() simply calls the
@@ -3918,6 +4196,23 @@ bus_generic_describe_intr(device_t dev, device_t child, struct resource *irq,
}
/**
+ * @brief Helper function for implementing BUS_GET_CPUS().
+ *
+ * This simple implementation of BUS_GET_CPUS() simply calls the
+ * BUS_GET_CPUS() method of the parent of @p dev.
+ */
+int
+bus_generic_get_cpus(device_t dev, device_t child, enum cpu_sets op,
+ size_t setsize, cpuset_t *cpuset)
+{
+
+ /* Propagate up the bus hierarchy until someone handles it. */
+ if (dev->parent != NULL)
+ return (BUS_GET_CPUS(dev->parent, child, op, setsize, cpuset));
+ return (EINVAL);
+}
+
+/**
* @brief Helper function for implementing BUS_GET_DMA_TAG().
*
* This simple implementation of BUS_GET_DMA_TAG() simply calls the
@@ -3934,6 +4229,22 @@ bus_generic_get_dma_tag(device_t dev, device_t child)
}
/**
+ * @brief Helper function for implementing BUS_GET_BUS_TAG().
+ *
+ * This simple implementation of BUS_GET_BUS_TAG() simply calls the
+ * BUS_GET_BUS_TAG() method of the parent of @p dev.
+ */
+bus_space_tag_t
+bus_generic_get_bus_tag(device_t dev, device_t child)
+{
+
+ /* Propagate up the bus hierarchy until someone handles it. */
+ if (dev->parent != NULL)
+ return (BUS_GET_BUS_TAG(dev->parent, child));
+ return ((bus_space_tag_t)0);
+}
+
+/**
* @brief Helper function for implementing BUS_GET_RESOURCE().
*
* This implementation of BUS_GET_RESOURCE() uses the
@@ -3943,7 +4254,7 @@ bus_generic_get_dma_tag(device_t dev, device_t child)
*/
int
bus_generic_rl_get_resource(device_t dev, device_t child, int type, int rid,
- u_long *startp, u_long *countp)
+ rman_res_t *startp, rman_res_t *countp)
{
struct resource_list * rl = NULL;
struct resource_list_entry * rle = NULL;
@@ -3974,7 +4285,7 @@ bus_generic_rl_get_resource(device_t dev, device_t child, int type, int rid,
*/
int
bus_generic_rl_set_resource(device_t dev, device_t child, int type, int rid,
- u_long start, u_long count)
+ rman_res_t start, rman_res_t count)
{
struct resource_list * rl = NULL;
@@ -4042,7 +4353,7 @@ bus_generic_rl_release_resource(device_t dev, device_t child, int type,
*/
struct resource *
bus_generic_rl_alloc_resource(device_t dev, device_t child, int type,
- int *rid, u_long start, u_long end, u_long count, u_int flags)
+ int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
struct resource_list * rl = NULL;
@@ -4070,6 +4381,29 @@ bus_generic_child_present(device_t dev, device_t child)
return (BUS_CHILD_PRESENT(device_get_parent(dev), dev));
}
+int
+bus_generic_get_domain(device_t dev, device_t child, int *domain)
+{
+
+ if (dev->parent)
+ return (BUS_GET_DOMAIN(dev->parent, dev, domain));
+
+ return (ENOENT);
+}
+
+/**
+ * @brief Helper function for implementing BUS_RESCAN().
+ *
+ * This null implementation of BUS_RESCAN() always fails to indicate
+ * the bus does not support rescanning.
+ */
+int
+bus_null_rescan(device_t dev)
+{
+
+ return (ENXIO);
+}
+
/*
* Some convenience functions to make it easier for drivers to use the
* resource-management functions. All these really do is hide the
@@ -4118,13 +4452,16 @@ bus_release_resources(device_t dev, const struct resource_spec *rs,
* parent of @p dev.
*/
struct resource *
-bus_alloc_resource(device_t dev, int type, int *rid, u_long start, u_long end,
- u_long count, u_int flags)
+bus_alloc_resource(device_t dev, int type, int *rid, rman_res_t start,
+ rman_res_t end, rman_res_t count, u_int flags)
{
+ struct resource *res;
+
if (dev->parent == NULL)
return (NULL);
- return (BUS_ALLOC_RESOURCE(dev->parent, dev, type, rid, start, end,
- count, flags));
+ res = BUS_ALLOC_RESOURCE(dev->parent, dev, type, rid, start, end,
+ count, flags);
+ return (res);
}
/**
@@ -4134,8 +4471,8 @@ bus_alloc_resource(device_t dev, int type, int *rid, u_long start, u_long end,
* parent of @p dev.
*/
int
-bus_adjust_resource(device_t dev, int type, struct resource *r, u_long start,
- u_long end)
+bus_adjust_resource(device_t dev, int type, struct resource *r, rman_res_t start,
+ rman_res_t end)
{
if (dev->parent == NULL)
return (EINVAL);
@@ -4171,6 +4508,36 @@ bus_deactivate_resource(device_t dev, int type, int rid, struct resource *r)
}
/**
+ * @brief Wrapper function for BUS_MAP_RESOURCE().
+ *
+ * This function simply calls the BUS_MAP_RESOURCE() method of the
+ * parent of @p dev.
+ */
+int
+bus_map_resource(device_t dev, int type, struct resource *r,
+ struct resource_map_request *args, struct resource_map *map)
+{
+ if (dev->parent == NULL)
+ return (EINVAL);
+ return (BUS_MAP_RESOURCE(dev->parent, dev, type, r, args, map));
+}
+
+/**
+ * @brief Wrapper function for BUS_UNMAP_RESOURCE().
+ *
+ * This function simply calls the BUS_UNMAP_RESOURCE() method of the
+ * parent of @p dev.
+ */
+int
+bus_unmap_resource(device_t dev, int type, struct resource *r,
+ struct resource_map *map)
+{
+ if (dev->parent == NULL)
+ return (EINVAL);
+ return (BUS_UNMAP_RESOURCE(dev->parent, dev, type, r, map));
+}
+
+/**
* @brief Wrapper function for BUS_RELEASE_RESOURCE().
*
* This function simply calls the BUS_RELEASE_RESOURCE() method of the
@@ -4179,9 +4546,12 @@ bus_deactivate_resource(device_t dev, int type, int rid, struct resource *r)
int
bus_release_resource(device_t dev, int type, int rid, struct resource *r)
{
+ int rv;
+
if (dev->parent == NULL)
return (EINVAL);
- return (BUS_RELEASE_RESOURCE(dev->parent, dev, type, rid, r));
+ rv = BUS_RELEASE_RESOURCE(dev->parent, dev, type, rid, r);
+ return (rv);
}
/**
@@ -4265,7 +4635,7 @@ bus_describe_intr(device_t dev, struct resource *irq, void *cookie,
*/
int
bus_set_resource(device_t dev, int type, int rid,
- u_long start, u_long count)
+ rman_res_t start, rman_res_t count)
{
return (BUS_SET_RESOURCE(device_get_parent(dev), dev, type, rid,
start, count));
@@ -4279,7 +4649,7 @@ bus_set_resource(device_t dev, int type, int rid,
*/
int
bus_get_resource(device_t dev, int type, int rid,
- u_long *startp, u_long *countp)
+ rman_res_t *startp, rman_res_t *countp)
{
return (BUS_GET_RESOURCE(device_get_parent(dev), dev, type, rid,
startp, countp));
@@ -4291,10 +4661,11 @@ bus_get_resource(device_t dev, int type, int rid,
* This function simply calls the BUS_GET_RESOURCE() method of the
* parent of @p dev and returns the start value.
*/
-u_long
+rman_res_t
bus_get_resource_start(device_t dev, int type, int rid)
{
- u_long start, count;
+ rman_res_t start;
+ rman_res_t count;
int error;
error = BUS_GET_RESOURCE(device_get_parent(dev), dev, type, rid,
@@ -4310,10 +4681,11 @@ bus_get_resource_start(device_t dev, int type, int rid)
* This function simply calls the BUS_GET_RESOURCE() method of the
* parent of @p dev and returns the count value.
*/
-u_long
+rman_res_t
bus_get_resource_count(device_t dev, int type, int rid)
{
- u_long start, count;
+ rman_res_t start;
+ rman_res_t count;
int error;
error = BUS_GET_RESOURCE(device_get_parent(dev), dev, type, rid,
@@ -4386,6 +4758,23 @@ bus_child_location_str(device_t child, char *buf, size_t buflen)
}
/**
+ * @brief Wrapper function for BUS_GET_CPUS().
+ *
+ * This function simply calls the BUS_GET_CPUS() method of the
+ * parent of @p dev.
+ */
+int
+bus_get_cpus(device_t dev, enum cpu_sets op, size_t setsize, cpuset_t *cpuset)
+{
+ device_t parent;
+
+ parent = device_get_parent(dev);
+ if (parent == NULL)
+ return (EINVAL);
+ return (BUS_GET_CPUS(parent, dev, op, setsize, cpuset));
+}
+
+/**
* @brief Wrapper function for BUS_GET_DMA_TAG().
*
* This function simply calls the BUS_GET_DMA_TAG() method of the
@@ -4402,6 +4791,35 @@ bus_get_dma_tag(device_t dev)
return (BUS_GET_DMA_TAG(parent, dev));
}
+/**
+ * @brief Wrapper function for BUS_GET_BUS_TAG().
+ *
+ * This function simply calls the BUS_GET_BUS_TAG() method of the
+ * parent of @p dev.
+ */
+bus_space_tag_t
+bus_get_bus_tag(device_t dev)
+{
+ device_t parent;
+
+ parent = device_get_parent(dev);
+ if (parent == NULL)
+ return ((bus_space_tag_t)0);
+ return (BUS_GET_BUS_TAG(parent, dev));
+}
+
+/**
+ * @brief Wrapper function for BUS_GET_DOMAIN().
+ *
+ * This function simply calls the BUS_GET_DOMAIN() method of the
+ * parent of @p dev.
+ */
+int
+bus_get_domain(device_t dev, int *domain)
+{
+ return (BUS_GET_DOMAIN(device_get_parent(dev), dev, domain));
+}
+
/* Resume all devices and then notify userland that we're up again. */
static int
root_resume(device_t dev)
@@ -4436,7 +4854,7 @@ root_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
}
/*
- * If we get here, assume that the device is permanant and really is
+ * If we get here, assume that the device is permanent and really is
* present in the system. Removable bus drivers are expected to intercept
* this call long before it gets here. We return -1 so that drivers that
* really care can check vs -1 or some ERRNO returned higher in the food
@@ -4448,6 +4866,25 @@ root_child_present(device_t dev, device_t child)
return (-1);
}
+#ifndef __rtems__
+static int
+root_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
+ cpuset_t *cpuset)
+{
+
+ switch (op) {
+ case INTR_CPUS:
+ /* Default to returning the set of all CPUs. */
+ if (setsize != sizeof(cpuset_t))
+ return (EINVAL);
+ *cpuset = all_cpus;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+#endif /* __rtems__ */
+
static kobj_method_t root_methods[] = {
/* Device interface */
KOBJMETHOD(device_shutdown, bus_generic_shutdown),
@@ -4460,6 +4897,9 @@ static kobj_method_t root_methods[] = {
KOBJMETHOD(bus_write_ivar, bus_generic_write_ivar),
KOBJMETHOD(bus_setup_intr, root_setup_intr),
KOBJMETHOD(bus_child_present, root_child_present),
+#ifndef __rtems__
+ KOBJMETHOD(bus_get_cpus, root_get_cpus),
+#endif /* __rtems__ */
KOBJMETHOD_END
};
@@ -4805,7 +5245,7 @@ sysctl_devices(SYSCTL_HANDLER_ARGS)
int *name = (int *)arg1;
u_int namelen = arg2;
int index;
- struct device *dev;
+ device_t dev;
struct u_device udev; /* XXX this is a bit big */
int error;
@@ -4877,4 +5317,291 @@ bus_free_resource(device_t dev, int type, struct resource *r)
return (0);
return (bus_release_resource(dev, type, rman_get_rid(r), r));
}
+
+device_t
+device_lookup_by_name(const char *name)
+{
+ device_t dev;
+
+ TAILQ_FOREACH(dev, &bus_data_devices, devlink) {
+ if (dev->nameunit != NULL && strcmp(dev->nameunit, name) == 0)
+ return (dev);
+ }
+ return (NULL);
+}
+
+/*
+ * /dev/devctl2 implementation. The existing /dev/devctl device has
+ * implicit semantics on open, so it could not be reused for this.
+ * Another option would be to call this /dev/bus?
+ */
+static int
+find_device(struct devreq *req, device_t *devp)
+{
+ device_t dev;
+
+ /*
+ * First, ensure that the name is nul terminated.
+ */
+ if (memchr(req->dr_name, '\0', sizeof(req->dr_name)) == NULL)
+ return (EINVAL);
+
+ /*
+ * Second, try to find an attached device whose name matches
+ * 'name'.
+ */
+ dev = device_lookup_by_name(req->dr_name);
+ if (dev != NULL) {
+ *devp = dev;
+ return (0);
+ }
+
+ /* Finally, give device enumerators a chance. */
+ dev = NULL;
+ EVENTHANDLER_INVOKE(dev_lookup, req->dr_name, &dev);
+ if (dev == NULL)
+ return (ENOENT);
+ *devp = dev;
+ return (0);
+}
+
+static bool
+driver_exists(device_t bus, const char *driver)
+{
+ devclass_t dc;
+
+ for (dc = bus->devclass; dc != NULL; dc = dc->parent) {
+ if (devclass_find_driver_internal(dc, driver) != NULL)
+ return (true);
+ }
+ return (false);
+}
+
+static int
+devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ struct devreq *req;
+ device_t dev;
+ int error, old;
+
+ /* Locate the device to control. */
+ mtx_lock(&Giant);
+ req = (struct devreq *)data;
+ switch (cmd) {
+ case DEV_ATTACH:
+ case DEV_DETACH:
+ case DEV_ENABLE:
+ case DEV_DISABLE:
+ case DEV_SUSPEND:
+ case DEV_RESUME:
+ case DEV_SET_DRIVER:
+ case DEV_RESCAN:
+ case DEV_DELETE:
+ error = priv_check(td, PRIV_DRIVER);
+ if (error == 0)
+ error = find_device(req, &dev);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+ if (error) {
+ mtx_unlock(&Giant);
+ return (error);
+ }
+
+ /* Perform the requested operation. */
+ switch (cmd) {
+ case DEV_ATTACH:
+ if (device_is_attached(dev) && (dev->flags & DF_REBID) == 0)
+ error = EBUSY;
+ else if (!device_is_enabled(dev))
+ error = ENXIO;
+ else
+ error = device_probe_and_attach(dev);
+ break;
+ case DEV_DETACH:
+ if (!device_is_attached(dev)) {
+ error = ENXIO;
+ break;
+ }
+ if (!(req->dr_flags & DEVF_FORCE_DETACH)) {
+ error = device_quiesce(dev);
+ if (error)
+ break;
+ }
+ error = device_detach(dev);
+ break;
+ case DEV_ENABLE:
+ if (device_is_enabled(dev)) {
+ error = EBUSY;
+ break;
+ }
+
+ /*
+ * If the device has been probed but not attached (e.g.
+ * when it has been disabled by a loader hint), just
+ * attach the device rather than doing a full probe.
+ */
+ device_enable(dev);
+ if (device_is_alive(dev)) {
+ /*
+ * If the device was disabled via a hint, clear
+ * the hint.
+ */
+ if (resource_disabled(dev->driver->name, dev->unit))
+ resource_unset_value(dev->driver->name,
+ dev->unit, "disabled");
+ error = device_attach(dev);
+ } else
+ error = device_probe_and_attach(dev);
+ break;
+ case DEV_DISABLE:
+ if (!device_is_enabled(dev)) {
+ error = ENXIO;
+ break;
+ }
+
+ if (!(req->dr_flags & DEVF_FORCE_DETACH)) {
+ error = device_quiesce(dev);
+ if (error)
+ break;
+ }
+
+ /*
+ * Force DF_FIXEDCLASS on around detach to preserve
+ * the existing name.
+ */
+ old = dev->flags;
+ dev->flags |= DF_FIXEDCLASS;
+ error = device_detach(dev);
+ if (!(old & DF_FIXEDCLASS))
+ dev->flags &= ~DF_FIXEDCLASS;
+ if (error == 0)
+ device_disable(dev);
+ break;
+ case DEV_SUSPEND:
+ if (device_is_suspended(dev)) {
+ error = EBUSY;
+ break;
+ }
+ if (device_get_parent(dev) == NULL) {
+ error = EINVAL;
+ break;
+ }
+ error = BUS_SUSPEND_CHILD(device_get_parent(dev), dev);
+ break;
+ case DEV_RESUME:
+ if (!device_is_suspended(dev)) {
+ error = EINVAL;
+ break;
+ }
+ if (device_get_parent(dev) == NULL) {
+ error = EINVAL;
+ break;
+ }
+ error = BUS_RESUME_CHILD(device_get_parent(dev), dev);
+ break;
+ case DEV_SET_DRIVER: {
+ devclass_t dc;
+ char driver[128];
+
+ error = copyinstr(req->dr_data, driver, sizeof(driver), NULL);
+ if (error)
+ break;
+ if (driver[0] == '\0') {
+ error = EINVAL;
+ break;
+ }
+ if (dev->devclass != NULL &&
+ strcmp(driver, dev->devclass->name) == 0)
+ /* XXX: Could possibly force DF_FIXEDCLASS on? */
+ break;
+
+ /*
+ * Scan drivers for this device's bus looking for at
+ * least one matching driver.
+ */
+ if (dev->parent == NULL) {
+ error = EINVAL;
+ break;
+ }
+ if (!driver_exists(dev->parent, driver)) {
+ error = ENOENT;
+ break;
+ }
+ dc = devclass_create(driver);
+ if (dc == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ /* Detach device if necessary. */
+ if (device_is_attached(dev)) {
+ if (req->dr_flags & DEVF_SET_DRIVER_DETACH)
+ error = device_detach(dev);
+ else
+ error = EBUSY;
+ if (error)
+ break;
+ }
+
+ /* Clear any previously-fixed device class and unit. */
+ if (dev->flags & DF_FIXEDCLASS)
+ devclass_delete_device(dev->devclass, dev);
+ dev->flags |= DF_WILDCARD;
+ dev->unit = -1;
+
+ /* Force the new device class. */
+ error = devclass_add_device(dc, dev);
+ if (error)
+ break;
+ dev->flags |= DF_FIXEDCLASS;
+ error = device_probe_and_attach(dev);
+ break;
+ }
+ case DEV_RESCAN:
+ if (!device_is_attached(dev)) {
+ error = ENXIO;
+ break;
+ }
+ error = BUS_RESCAN(dev);
+ break;
+ case DEV_DELETE: {
+ device_t parent;
+
+ parent = device_get_parent(dev);
+ if (parent == NULL) {
+ error = EINVAL;
+ break;
+ }
+ if (!(req->dr_flags & DEVF_FORCE_DELETE)) {
+ if (bus_child_present(dev) != 0) {
+ error = EBUSY;
+ break;
+ }
+ }
+
+ error = device_delete_child(parent, dev);
+ break;
+ }
+ }
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+static struct cdevsw devctl2_cdevsw = {
+ .d_version = D_VERSION,
+ .d_ioctl = devctl2_ioctl,
+ .d_name = "devctl2",
+};
+
+static void
+devctl2_init(void)
+{
+
+ make_dev_credf(MAKEDEV_ETERNAL, &devctl2_cdevsw, 0, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "devctl2");
+}
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/subr_counter.c b/freebsd/sys/kern/subr_counter.c
new file mode 100644
index 00000000..2625b179
--- /dev/null
+++ b/freebsd/sys/kern/subr_counter.c
@@ -0,0 +1,123 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <vm/uma.h>
+
+#define IN_SUBR_COUNTER_C
+#include <sys/counter.h>
+
+void
+counter_u64_zero(counter_u64_t c)
+{
+
+ counter_u64_zero_inline(c);
+}
+
+uint64_t
+counter_u64_fetch(counter_u64_t c)
+{
+
+ return (counter_u64_fetch_inline(c));
+}
+
+counter_u64_t
+counter_u64_alloc(int flags)
+{
+ counter_u64_t r;
+
+ r = uma_zalloc(pcpu_zone_64, flags);
+ if (r != NULL)
+ counter_u64_zero(r);
+
+ return (r);
+}
+
+void
+counter_u64_free(counter_u64_t c)
+{
+
+ uma_zfree(pcpu_zone_64, c);
+}
+
+int
+sysctl_handle_counter_u64(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t out;
+ int error;
+
+ out = counter_u64_fetch(*(counter_u64_t *)arg1);
+
+ error = SYSCTL_OUT(req, &out, sizeof(uint64_t));
+
+ if (error || !req->newptr)
+ return (error);
+
+ /*
+ * Any write attempt to a counter zeroes it.
+ */
+ counter_u64_zero(*(counter_u64_t *)arg1);
+
+ return (0);
+}
+
+int
+sysctl_handle_counter_u64_array(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t *out;
+ int error;
+
+ out = malloc(arg2 * sizeof(uint64_t), M_TEMP, M_WAITOK);
+ for (int i = 0; i < arg2; i++)
+ out[i] = counter_u64_fetch(((counter_u64_t *)arg1)[i]);
+
+ error = SYSCTL_OUT(req, out, arg2 * sizeof(uint64_t));
+ free(out, M_TEMP);
+
+ if (error || !req->newptr)
+ return (error);
+
+ /*
+ * Any write attempt to a counter zeroes it.
+ */
+ for (int i = 0; i < arg2; i++)
+ counter_u64_zero(((counter_u64_t *)arg1)[i]);
+
+ return (0);
+}
diff --git a/freebsd/sys/kern/subr_hash.c b/freebsd/sys/kern/subr_hash.c
index e526a866..1371a345 100644
--- a/freebsd/sys/kern/subr_hash.c
+++ b/freebsd/sys/kern/subr_hash.c
@@ -43,6 +43,13 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/malloc.h>
+static __inline int
+hash_mflags(int flags)
+{
+
+ return ((flags & HASH_NOWAIT) ? M_NOWAIT : M_WAITOK);
+}
+
/*
* General routine to allocate a hash table with control of memory flags.
*/
@@ -63,13 +70,8 @@ hashinit_flags(int elements, struct malloc_type *type, u_long *hashmask,
continue;
hashsize >>= 1;
- if (flags & HASH_NOWAIT)
- hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl),
- type, M_NOWAIT);
- else
- hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl),
- type, M_WAITOK);
-
+ hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type,
+ hash_mflags(flags));
if (hashtbl != NULL) {
for (i = 0; i < hashsize; i++)
LIST_INIT(&hashtbl[i]);
@@ -95,26 +97,32 @@ hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask)
hashtbl = vhashtbl;
for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++)
- KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__));
+ KASSERT(LIST_EMPTY(hp), ("%s: hashtbl %p not empty "
+ "(malloc type %s)", __func__, hashtbl, type->ks_shortdesc));
free(hashtbl, type);
}
static const int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531,
2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143,
6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 };
-#define NPRIMES (sizeof(primes) / sizeof(primes[0]))
+#define NPRIMES nitems(primes)
/*
- * General routine to allocate a prime number sized hash table.
+ * General routine to allocate a prime number sized hash table with control of
+ * memory flags.
*/
void *
-phashinit(int elements, struct malloc_type *type, u_long *nentries)
+phashinit_flags(int elements, struct malloc_type *type, u_long *nentries, int flags)
{
long hashsize;
LIST_HEAD(generic, generic) *hashtbl;
int i;
KASSERT(elements > 0, ("%s: bad elements", __func__));
+ /* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */
+ KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT),
+ ("Bad flags (0x%x) passed to phashinit_flags", flags));
+
for (i = 1, hashsize = primes[1]; hashsize <= elements;) {
i++;
if (i == NPRIMES)
@@ -122,9 +130,25 @@ phashinit(int elements, struct malloc_type *type, u_long *nentries)
hashsize = primes[i];
}
hashsize = primes[i - 1];
- hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK);
+
+ hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type,
+ hash_mflags(flags));
+ if (hashtbl == NULL)
+ return (NULL);
+
for (i = 0; i < hashsize; i++)
LIST_INIT(&hashtbl[i]);
*nentries = hashsize;
return (hashtbl);
}
+
+/*
+ * Allocate and initialize a prime number sized hash table with default flag:
+ * may sleep.
+ */
+void *
+phashinit(int elements, struct malloc_type *type, u_long *nentries)
+{
+
+ return (phashinit_flags(elements, type, nentries, HASH_WAITOK));
+}
diff --git a/freebsd/sys/kern/subr_hints.c b/freebsd/sys/kern/subr_hints.c
index 13fb3934..7fcf3275 100644
--- a/freebsd/sys/kern/subr_hints.c
+++ b/freebsd/sys/kern/subr_hints.c
@@ -31,7 +31,9 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/bus.h>
@@ -51,6 +53,87 @@ static char __used default_static_hints[] = "";
__weak_reference(default_static_hints, static_hints);
#endif /* __rtems__ */
+#ifndef __rtems__
+/*
+ * Define kern.hintmode sysctl, which only accept value 2, that cause to
+ * switch from Static KENV mode to Dynamic KENV. So systems that have hints
+ * compiled into kernel will be able to see/modify KENV (and hints too).
+ */
+
+static int
+sysctl_hintmode(SYSCTL_HANDLER_ARGS)
+{
+ const char *cp;
+ char *line, *eq;
+ int eqidx, error, from_kenv, i, value;
+
+ from_kenv = 0;
+ cp = kern_envp;
+ value = hintmode;
+
+ /* Fetch candidate for new hintmode value */
+ error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ if (value != 2)
+ /* Only accept swithing to hintmode 2 */
+ return (EINVAL);
+
+ /* Migrate from static to dynamic hints */
+ switch (hintmode) {
+ case 0:
+ if (dynamic_kenv) {
+ /*
+ * Already here. But assign hintmode to 2, to not
+ * check it in the future.
+ */
+ hintmode = 2;
+ return (0);
+ }
+ from_kenv = 1;
+ cp = kern_envp;
+ break;
+ case 1:
+ cp = static_hints;
+ break;
+ case 2:
+ /* Nothing to do, hintmode already 2 */
+ return (0);
+ }
+
+ while (cp) {
+ i = strlen(cp);
+ if (i == 0)
+ break;
+ if (from_kenv) {
+ if (strncmp(cp, "hint.", 5) != 0)
+ /* kenv can have not only hints */
+ continue;
+ }
+ eq = strchr(cp, '=');
+ if (eq == NULL)
+ /* Bad hint value */
+ continue;
+ eqidx = eq - cp;
+
+ line = malloc(i+1, M_TEMP, M_WAITOK);
+ strcpy(line, cp);
+ line[eqidx] = '\0';
+ kern_setenv(line, line + eqidx + 1);
+ free(line, M_TEMP);
+ cp += i + 1;
+ }
+
+ hintmode = value;
+ use_kenv = 1;
+ return (0);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, hintmode, CTLTYPE_INT|CTLFLAG_RW,
+ &hintmode, 0, sysctl_hintmode, "I", "Get/set current hintmode");
+#endif /* __rtems__ */
+
/*
* Evil wildcarding resource string lookup.
* This walks the supplied env string table and returns a match.
@@ -145,8 +228,7 @@ res_find(int *line, int *startln,
r_name, &r_unit, r_resname, r_value);
if (hit && n != 4) {
printf("CONFIG: invalid hint '%s'\n", cp);
- /* XXX: abuse bogus index() declaration */
- p = index(cp, 'h');
+ p = strchr(cp, 'h');
*p = 'H';
hit = 0;
}
@@ -190,18 +272,18 @@ res_find(int *line, int *startln,
s = cp;
/* This is a bit of a hack, but at least is reentrant */
/* Note that it returns some !unterminated! strings. */
- s = index(s, '.') + 1; /* start of device */
+ s = strchr(s, '.') + 1; /* start of device */
if (ret_name)
*ret_name = s;
- s = index(s, '.') + 1; /* start of unit */
+ s = strchr(s, '.') + 1; /* start of unit */
if (ret_namelen && ret_name)
*ret_namelen = s - *ret_name - 1; /* device length */
if (ret_unit)
*ret_unit = r_unit;
- s = index(s, '.') + 1; /* start of resname */
+ s = strchr(s, '.') + 1; /* start of resname */
if (ret_resname)
*ret_resname = s;
- s = index(s, '=') + 1; /* start of value */
+ s = strchr(s, '=') + 1; /* start of value */
if (ret_resnamelen && ret_resname)
*ret_resnamelen = s - *ret_resname - 1; /* value len */
if (ret_value)
@@ -403,3 +485,31 @@ resource_disabled(const char *name, int unit)
return (0);
return (value);
}
+
+/*
+ * Clear a value associated with a device by removing it from
+ * the kernel environment. This only removes a hint for an
+ * exact unit.
+ */
+int
+resource_unset_value(const char *name, int unit, const char *resname)
+{
+ char varname[128];
+ const char *retname, *retvalue;
+ int error, line;
+ size_t len;
+
+ line = 0;
+ error = resource_find(&line, NULL, name, &unit, resname, NULL,
+ &retname, NULL, NULL, NULL, NULL, &retvalue);
+ if (error)
+ return (error);
+
+ retname -= strlen("hint.");
+ len = retvalue - retname - 1;
+ if (len > sizeof(varname) - 1)
+ return (ENAMETOOLONG);
+ memcpy(varname, retname, len);
+ varname[len] = '\0';
+ return (kern_unsetenv(varname));
+}
diff --git a/freebsd/sys/kern/subr_kobj.c b/freebsd/sys/kern/subr_kobj.c
index 5666f274..91a25cf4 100644
--- a/freebsd/sys/kern/subr_kobj.c
+++ b/freebsd/sys/kern/subr_kobj.c
@@ -85,7 +85,7 @@ SYSINIT(kobj, SI_SUB_LOCK, SI_ORDER_ANY, kobj_init_mutex, NULL);
* desc pointer is NULL, it is guaranteed never to match any read
* descriptors.
*/
-static struct kobj_method null_method = {
+static const struct kobj_method null_method = {
0, 0,
};
@@ -226,7 +226,7 @@ kobj_lookup_method(kobj_class_t cls,
ce = kobj_lookup_method_mi(cls, desc);
if (!ce)
- ce = desc->deflt;
+ ce = &desc->deflt;
*cep = ce;
return ce;
}
diff --git a/freebsd/sys/kern/subr_lock.c b/freebsd/sys/kern/subr_lock.c
index 4a55a95a..83c63010 100644
--- a/freebsd/sys/kern/subr_lock.c
+++ b/freebsd/sys/kern/subr_lock.c
@@ -81,8 +81,8 @@ lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
int i;
/* Check for double-init and zero object. */
- KASSERT(!lock_initalized(lock), ("lock \"%s\" %p already initialized",
- name, lock));
+ KASSERT(flags & LO_NEW || !lock_initialized(lock),
+ ("lock \"%s\" %p already initialized", name, lock));
/* Look up lock class to find its index. */
for (i = 0; i < LOCK_CLASS_MAX; i++)
@@ -104,15 +104,45 @@ lock_destroy(struct lock_object *lock)
{
#ifndef __rtems__
- KASSERT(lock_initalized(lock), ("lock %p is not initialized", lock));
+ KASSERT(lock_initialized(lock), ("lock %p is not initialized", lock));
#else /* __rtems__ */
- BSD_ASSERT(lock_initalized(lock));
+ BSD_ASSERT(lock_initialized(lock));
#endif /* __rtems__ */
WITNESS_DESTROY(lock);
LOCK_LOG_DESTROY(lock, 0);
lock->lo_flags &= ~LO_INITIALIZED;
}
+#ifndef __rtems__
+void
+lock_delay(struct lock_delay_arg *la)
+{
+ u_int i, delay, backoff, min, max;
+ struct lock_delay_config *lc = la->config;
+
+ delay = la->delay;
+
+ if (delay == 0)
+ delay = lc->initial;
+ else {
+ delay += lc->step;
+ max = lc->max;
+ if (delay > max)
+ delay = max;
+ }
+
+ backoff = cpu_ticks() % delay;
+ min = lc->min;
+ if (backoff < min)
+ backoff = min;
+ for (i = 0; i < backoff; i++)
+ cpu_spinwait();
+
+ la->delay = delay;
+ la->spin_cnt += backoff;
+}
+#endif /* __rtems__ */
+
#ifdef DDB
DB_SHOW_COMMAND(lock, db_show_lock)
{
@@ -248,34 +278,13 @@ lock_prof_init(void *arg)
}
SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
-/*
- * To be certain that lock profiling has idled on all cpus before we
- * reset, we schedule the resetting thread on all active cpus. Since
- * all operations happen within critical sections we can be sure that
- * it is safe to zero the profiling structures.
- */
-static void
-lock_prof_idle(void)
-{
- struct thread *td;
- int cpu;
-
- td = curthread;
- thread_lock(td);
- CPU_FOREACH(cpu) {
- sched_bind(td, cpu);
- }
- sched_unbind(td);
- thread_unlock(td);
-}
-
static void
lock_prof_reset_wait(void)
{
/*
- * Spin relinquishing our cpu so that lock_prof_idle may
- * run on it.
+ * Spin relinquishing our cpu so that quiesce_all_cpus may
+ * complete.
*/
while (lock_prof_resetting)
sched_relinquish(curthread);
@@ -297,7 +306,7 @@ lock_prof_reset(void)
atomic_store_rel_int(&lock_prof_resetting, 1);
enabled = lock_prof_enable;
lock_prof_enable = 0;
- lock_prof_idle();
+ quiesce_all_cpus("profreset", 0);
/*
* Some objects may have migrated between CPUs. Clear all links
* before we zero the structures. Some items may still be linked
@@ -409,7 +418,7 @@ dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
"max", "wait_max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
enabled = lock_prof_enable;
lock_prof_enable = 0;
- lock_prof_idle();
+ quiesce_all_cpus("profstat", 0);
t = ticks;
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (lp_cpu[cpu] == NULL)
diff --git a/freebsd/sys/kern/subr_module.c b/freebsd/sys/kern/subr_module.c
index f2aa7026..531600c4 100644
--- a/freebsd/sys/kern/subr_module.c
+++ b/freebsd/sys/kern/subr_module.c
@@ -162,6 +162,9 @@ preload_search_info(caddr_t mod, int inf)
uint32_t type = 0;
int next;
+ if (mod == NULL)
+ return (NULL);
+
curp = mod;
for (;;) {
hdr = (uint32_t *)curp;
@@ -255,7 +258,7 @@ preload_fetch_size(caddr_t mod)
return (*mdp);
}
-/* Called from locore on i386. Convert physical pointers to kvm. Sigh. */
+/* Called from locore. Convert physical pointers to kvm. Sigh. */
void
preload_bootstrap_relocate(vm_offset_t offset)
{
diff --git a/freebsd/sys/kern/subr_pcpu.c b/freebsd/sys/kern/subr_pcpu.c
new file mode 100644
index 00000000..4d223899
--- /dev/null
+++ b/freebsd/sys/kern/subr_pcpu.c
@@ -0,0 +1,425 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Wind River Systems, Inc.
+ * All rights reserved.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ *
+ * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This module provides MI support for per-cpu data.
+ *
+ * Each architecture determines the mapping of logical CPU IDs to physical
+ * CPUs. The requirements of this mapping are as follows:
+ * - Logical CPU IDs must reside in the range 0 ... MAXCPU - 1.
+ * - The mapping is not required to be dense. That is, there may be
+ * gaps in the mappings.
+ * - The platform sets the value of MAXCPU in <machine/param.h>.
+ * - It is suggested, but not required, that in the non-SMP case, the
+ * platform define MAXCPU to be 1 and define the logical ID of the
+ * sole CPU as 0.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_ddb.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sx.h>
+#include <vm/uma.h>
+#include <ddb/ddb.h>
+
+#ifndef __rtems__
+static MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting.");
+
+struct dpcpu_free {
+ uintptr_t df_start;
+ int df_len;
+ TAILQ_ENTRY(dpcpu_free) df_link;
+};
+
+static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]);
+static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
+static struct sx dpcpu_lock;
+uintptr_t dpcpu_off[MAXCPU];
+struct pcpu *cpuid_to_pcpu[MAXCPU];
+struct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead);
+
+/*
+ * Initialize the MI portions of a struct pcpu.
+ */
+void
+pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
+{
+
+ bzero(pcpu, size);
+ KASSERT(cpuid >= 0 && cpuid < MAXCPU,
+ ("pcpu_init: invalid cpuid %d", cpuid));
+ pcpu->pc_cpuid = cpuid;
+ cpuid_to_pcpu[cpuid] = pcpu;
+ STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu);
+ cpu_pcpu_init(pcpu, cpuid, size);
+ pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue;
+ pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue;
+}
+
+void
+dpcpu_init(void *dpcpu, int cpuid)
+{
+ struct pcpu *pcpu;
+
+ pcpu = pcpu_find(cpuid);
+ pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START;
+
+ /*
+ * Initialize defaults from our linker section.
+ */
+ memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES);
+
+ /*
+ * Place it in the global pcpu offset array.
+ */
+ dpcpu_off[cpuid] = pcpu->pc_dynamic;
+}
+
+static void
+dpcpu_startup(void *dummy __unused)
+{
+ struct dpcpu_free *df;
+
+ df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
+ df->df_start = (uintptr_t)&DPCPU_NAME(modspace);
+ df->df_len = DPCPU_MODMIN;
+ TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link);
+ sx_init(&dpcpu_lock, "dpcpu alloc lock");
+}
+SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0);
+#endif /* __rtems__ */
+
+/*
+ * UMA_PCPU_ZONE zones, that are available for all kernel
+ * consumers. Right now 64 bit zone is used for counter(9)
+ * and pointer zone is used by flowtable.
+ */
+
+uma_zone_t pcpu_zone_64;
+uma_zone_t pcpu_zone_ptr;
+
+static void
+pcpu_zones_startup(void)
+{
+
+ pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
+
+ if (sizeof(uint64_t) == sizeof(void *))
+ pcpu_zone_ptr = pcpu_zone_64;
+ else
+ pcpu_zone_ptr = uma_zcreate("ptr pcpu", sizeof(void *),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
+}
+SYSINIT(pcpu_zones, SI_SUB_KMEM, SI_ORDER_ANY, pcpu_zones_startup, NULL);
+
+#ifndef __rtems__
+/*
+ * First-fit extent based allocator for allocating space in the per-cpu
+ * region reserved for modules. This is only intended for use by the
+ * kernel linkers to place module linker sets.
+ */
+void *
+dpcpu_alloc(int size)
+{
+ struct dpcpu_free *df;
+ void *s;
+
+ s = NULL;
+ size = roundup2(size, sizeof(void *));
+ sx_xlock(&dpcpu_lock);
+ TAILQ_FOREACH(df, &dpcpu_head, df_link) {
+ if (df->df_len < size)
+ continue;
+ if (df->df_len == size) {
+ s = (void *)df->df_start;
+ TAILQ_REMOVE(&dpcpu_head, df, df_link);
+ free(df, M_PCPU);
+ break;
+ }
+ s = (void *)df->df_start;
+ df->df_len -= size;
+ df->df_start = df->df_start + size;
+ break;
+ }
+ sx_xunlock(&dpcpu_lock);
+
+ return (s);
+}
+
+/*
+ * Free dynamic per-cpu space at module unload time.
+ */
+void
+dpcpu_free(void *s, int size)
+{
+ struct dpcpu_free *df;
+ struct dpcpu_free *dn;
+ uintptr_t start;
+ uintptr_t end;
+
+ size = roundup2(size, sizeof(void *));
+ start = (uintptr_t)s;
+ end = start + size;
+ /*
+ * Free a region of space and merge it with as many neighbors as
+ * possible. Keeping the list sorted simplifies this operation.
+ */
+ sx_xlock(&dpcpu_lock);
+ TAILQ_FOREACH(df, &dpcpu_head, df_link) {
+ if (df->df_start > end)
+ break;
+ /*
+ * If we expand at the end of an entry we may have to
+ * merge it with the one following it as well.
+ */
+ if (df->df_start + df->df_len == start) {
+ df->df_len += size;
+ dn = TAILQ_NEXT(df, df_link);
+ if (df->df_start + df->df_len == dn->df_start) {
+ df->df_len += dn->df_len;
+ TAILQ_REMOVE(&dpcpu_head, dn, df_link);
+ free(dn, M_PCPU);
+ }
+ sx_xunlock(&dpcpu_lock);
+ return;
+ }
+ if (df->df_start == end) {
+ df->df_start = start;
+ df->df_len += size;
+ sx_xunlock(&dpcpu_lock);
+ return;
+ }
+ }
+ dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
+ dn->df_start = start;
+ dn->df_len = size;
+ if (df)
+ TAILQ_INSERT_BEFORE(df, dn, df_link);
+ else
+ TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link);
+ sx_xunlock(&dpcpu_lock);
+}
+
+/*
+ * Initialize the per-cpu storage from an updated linker-set region.
+ */
+void
+dpcpu_copy(void *s, int size)
+{
+#ifdef SMP
+ uintptr_t dpcpu;
+ int i;
+
+ CPU_FOREACH(i) {
+ dpcpu = dpcpu_off[i];
+ if (dpcpu == 0)
+ continue;
+ memcpy((void *)(dpcpu + (uintptr_t)s), s, size);
+ }
+#else
+ memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size);
+#endif
+}
+
+/*
+ * Destroy a struct pcpu.
+ */
+void
+pcpu_destroy(struct pcpu *pcpu)
+{
+
+ STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu);
+ cpuid_to_pcpu[pcpu->pc_cpuid] = NULL;
+ dpcpu_off[pcpu->pc_cpuid] = 0;
+}
+
+/*
+ * Locate a struct pcpu by cpu id.
+ */
+struct pcpu *
+pcpu_find(u_int cpuid)
+{
+
+ return (cpuid_to_pcpu[cpuid]);
+}
+
+int
+sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
+{
+ uintptr_t dpcpu;
+ int64_t count;
+ int i;
+
+ count = 0;
+ CPU_FOREACH(i) {
+ dpcpu = dpcpu_off[i];
+ if (dpcpu == 0)
+ continue;
+ count += *(int64_t *)(dpcpu + (uintptr_t)arg1);
+ }
+ return (SYSCTL_OUT(req, &count, sizeof(count)));
+}
+
+int
+sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS)
+{
+ uintptr_t dpcpu;
+ long count;
+ int i;
+
+ count = 0;
+ CPU_FOREACH(i) {
+ dpcpu = dpcpu_off[i];
+ if (dpcpu == 0)
+ continue;
+ count += *(long *)(dpcpu + (uintptr_t)arg1);
+ }
+ return (SYSCTL_OUT(req, &count, sizeof(count)));
+}
+
+int
+sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
+{
+ uintptr_t dpcpu;
+ int count;
+ int i;
+
+ count = 0;
+ CPU_FOREACH(i) {
+ dpcpu = dpcpu_off[i];
+ if (dpcpu == 0)
+ continue;
+ count += *(int *)(dpcpu + (uintptr_t)arg1);
+ }
+ return (SYSCTL_OUT(req, &count, sizeof(count)));
+}
+
+#ifdef DDB
+DB_SHOW_COMMAND(dpcpu_off, db_show_dpcpu_off)
+{
+ int id;
+
+ CPU_FOREACH(id) {
+ db_printf("dpcpu_off[%2d] = 0x%jx (+ DPCPU_START = %p)\n",
+ id, (uintmax_t)dpcpu_off[id],
+ (void *)(uintptr_t)(dpcpu_off[id] + DPCPU_START));
+ }
+}
+
+static void
+show_pcpu(struct pcpu *pc)
+{
+ struct thread *td;
+
+ db_printf("cpuid = %d\n", pc->pc_cpuid);
+ db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic);
+ db_printf("curthread = ");
+ td = pc->pc_curthread;
+ if (td != NULL)
+ db_printf("%p: pid %d tid %d \"%s\"\n", td, td->td_proc->p_pid,
+ td->td_tid, td->td_name);
+ else
+ db_printf("none\n");
+ db_printf("curpcb = %p\n", pc->pc_curpcb);
+ db_printf("fpcurthread = ");
+ td = pc->pc_fpcurthread;
+ if (td != NULL)
+ db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid,
+ td->td_name);
+ else
+ db_printf("none\n");
+ db_printf("idlethread = ");
+ td = pc->pc_idlethread;
+ if (td != NULL)
+ db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name);
+ else
+ db_printf("none\n");
+ db_show_mdpcpu(pc);
+
+#ifdef VIMAGE
+ db_printf("curvnet = %p\n", pc->pc_curthread->td_vnet);
+#endif
+
+#ifdef WITNESS
+ db_printf("spin locks held:\n");
+ witness_list_locks(&pc->pc_spinlocks, db_printf);
+#endif
+}
+
+DB_SHOW_COMMAND(pcpu, db_show_pcpu)
+{
+ struct pcpu *pc;
+ int id;
+
+ if (have_addr)
+ id = ((addr >> 4) % 16) * 10 + (addr % 16);
+ else
+ id = PCPU_GET(cpuid);
+ pc = pcpu_find(id);
+ if (pc == NULL) {
+ db_printf("CPU %d not found\n", id);
+ return;
+ }
+ show_pcpu(pc);
+}
+
+DB_SHOW_ALL_COMMAND(pcpu, db_show_cpu_all)
+{
+ struct pcpu *pc;
+ int id;
+
+ db_printf("Current CPU: %d\n\n", PCPU_GET(cpuid));
+ for (id = 0; id <= mp_maxid; id++) {
+ pc = pcpu_find(id);
+ if (pc != NULL) {
+ show_pcpu(pc);
+ db_printf("\n");
+ }
+ }
+}
+DB_SHOW_ALIAS(allpcpu, db_show_cpu_all);
+#endif
+#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c
index 8e627d1a..9273cd2a 100644
--- a/freebsd/sys/kern/subr_prf.c
+++ b/freebsd/sys/kern/subr_prf.c
@@ -39,10 +39,13 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#ifdef _KERNEL
#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_printf.h>
+#endif /* _KERNEL */
#include <rtems/bsd/sys/param.h>
+#ifdef _KERNEL
#include <sys/systm.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/kdb.h>
@@ -65,7 +68,9 @@ __FBSDID("$FreeBSD$");
#include <sys/cons.h>
#endif /* __rtems__ */
#include <sys/uio.h>
+#endif
#include <sys/ctype.h>
+#include <sys/sbuf.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -77,6 +82,8 @@ __FBSDID("$FreeBSD$");
*/
#include <machine/stdarg.h>
+#ifdef _KERNEL
+
#ifndef __rtems__
#define TOCONS 0x01
#define TOTTY 0x02
@@ -117,23 +124,20 @@ static void snprintf_func(int ch, void *arg);
static int msgbufmapped; /* Set when safe to use msgbuf */
int msgbuftrigger;
-static int log_console_output = 1;
-TUNABLE_INT("kern.log_console_output", &log_console_output);
-SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RW,
- &log_console_output, 0, "Duplicate console output to the syslog.");
+static int log_console_output = 1;
+SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RWTUN,
+ &log_console_output, 0, "Duplicate console output to the syslog");
/*
* See the comment in log_console() below for more explanation of this.
*/
-static int log_console_add_linefeed = 0;
-TUNABLE_INT("kern.log_console_add_linefeed", &log_console_add_linefeed);
-SYSCTL_INT(_kern, OID_AUTO, log_console_add_linefeed, CTLFLAG_RW,
- &log_console_add_linefeed, 0, "log_console() adds extra newlines.");
+static int log_console_add_linefeed;
+SYSCTL_INT(_kern, OID_AUTO, log_console_add_linefeed, CTLFLAG_RWTUN,
+ &log_console_add_linefeed, 0, "log_console() adds extra newlines");
-static int always_console_output = 0;
-TUNABLE_INT("kern.always_console_output", &always_console_output);
-SYSCTL_INT(_kern, OID_AUTO, always_console_output, CTLFLAG_RW,
- &always_console_output, 0, "Always output to console despite TIOCCONS.");
+static int always_console_output;
+SYSCTL_INT(_kern, OID_AUTO, always_console_output, CTLFLAG_RWTUN,
+ &always_console_output, 0, "Always output to console despite TIOCCONS");
/*
* Warn that a system table is full.
@@ -189,15 +193,24 @@ uprintf(const char *fmt, ...)
}
/*
- * tprintf prints on the controlling terminal associated with the given
- * session, possibly to the log as well.
+ * tprintf and vtprintf print on the controlling terminal associated with the
+ * given session, possibly to the log as well.
*/
void
tprintf(struct proc *p, int pri, const char *fmt, ...)
{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vtprintf(p, pri, fmt, ap);
+ va_end(ap);
+}
+
+void
+vtprintf(struct proc *p, int pri, const char *fmt, va_list ap)
+{
struct tty *tp = NULL;
int flags = 0;
- va_list ap;
struct putchar_arg pca;
struct session *sess = NULL;
@@ -222,14 +235,12 @@ tprintf(struct proc *p, int pri, const char *fmt, ...)
pca.tty = tp;
pca.flags = flags;
pca.p_bufr = NULL;
- va_start(ap, fmt);
if (pca.tty != NULL)
tty_lock(pca.tty);
sx_sunlock(&proctree_lock);
kvprintf(fmt, putchar, &pca, 10, ap);
if (pca.tty != NULL)
tty_unlock(pca.tty);
- va_end(ap);
if (sess != NULL)
sess_release(sess);
msgbuftrigger = 1;
@@ -310,9 +321,15 @@ log(int level, const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
- (void)_vprintf(level, log_open ? TOLOG : TOCONS, fmt, ap);
+ vlog(level, fmt, ap);
va_end(ap);
+}
+
+void
+vlog(int level, const char *fmt, va_list ap)
+{
+ (void)_vprintf(level, log_open ? TOLOG : TOCONS | TOLOG, fmt, ap);
#ifndef __rtems__
msgbuftrigger = 1;
#endif /* __rtems__ */
@@ -478,25 +495,19 @@ putchar(int c, void *arg)
struct putchar_arg *ap = (struct putchar_arg*) arg;
struct tty *tp = ap->tty;
int flags = ap->flags;
- int putbuf_done = 0;
/* Don't use the tty code after a panic or while in ddb. */
if (kdb_active) {
if (c != '\0')
cnputc(c);
- } else {
- if ((panicstr == NULL) && (flags & TOTTY) && (tp != NULL))
- tty_putchar(tp, c);
-
- if (flags & TOCONS) {
- putbuf(c, ap);
- putbuf_done = 1;
- }
- }
- if ((flags & TOLOG) && (putbuf_done == 0)) {
- if (c != '\0')
- putbuf(c, ap);
+ return;
}
+
+ if ((flags & TOTTY) && tp != NULL && panicstr == NULL)
+ tty_putchar(tp, c);
+
+ if ((flags & (TOCONS | TOLOG)) && c != '\0')
+ putbuf(c, ap);
}
#endif /* __rtems__ */
@@ -627,7 +638,7 @@ ksprintn(char *nbuf, uintmax_t num, int base, int *lenp, int upper)
* the next characters (up to a control character, i.e. a character <= 32),
* give the name of the register. Thus:
*
- * kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
+ * kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE");
*
* would produce output:
*
@@ -746,7 +757,15 @@ reswitch: switch (ch = (u_char)*fmt++) {
PCHAR('>');
break;
case 'c':
+ width -= 1;
+
+ if (!ladjust && width > 0)
+ while (width--)
+ PCHAR(padc);
PCHAR(va_arg(ap, int));
+ if (ladjust && width > 0)
+ while (width--)
+ PCHAR(padc);
break;
case 'D':
up = va_arg(ap, u_char *);
@@ -948,7 +967,7 @@ number:
while (percent < fmt)
PCHAR(*percent++);
/*
- * Since we ignore an formatting argument it is no
+ * Since we ignore a formatting argument it is no
* longer safe to obey the remaining formatting
* arguments as the arguments will no longer match
* the format specs.
@@ -1047,7 +1066,7 @@ sysctl_kern_msgbuf(SYSCTL_HANDLER_ARGS)
len = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq);
mtx_unlock(&msgbuf_lock);
if (len == 0)
- return (0);
+ return (SYSCTL_OUT(req, "", 1)); /* add nulterm */
error = sysctl_handle_opaque(oidp, buf, len, req);
if (error)
@@ -1157,5 +1176,83 @@ hexdump(const void *ptr, int length, const char *hdr, int flags)
printf("\n");
}
}
+#endif /* __rtems__ */
+#endif /* _KERNEL */
+#ifndef __rtems__
+void
+sbuf_hexdump(struct sbuf *sb, const void *ptr, int length, const char *hdr,
+ int flags)
+{
+ int i, j, k;
+ int cols;
+ const unsigned char *cp;
+ char delim;
+
+ if ((flags & HD_DELIM_MASK) != 0)
+ delim = (flags & HD_DELIM_MASK) >> 8;
+ else
+ delim = ' ';
+
+ if ((flags & HD_COLUMN_MASK) != 0)
+ cols = flags & HD_COLUMN_MASK;
+ else
+ cols = 16;
+
+ cp = ptr;
+ for (i = 0; i < length; i+= cols) {
+ if (hdr != NULL)
+ sbuf_printf(sb, "%s", hdr);
+
+ if ((flags & HD_OMIT_COUNT) == 0)
+ sbuf_printf(sb, "%04x ", i);
+
+ if ((flags & HD_OMIT_HEX) == 0) {
+ for (j = 0; j < cols; j++) {
+ k = i + j;
+ if (k < length)
+ sbuf_printf(sb, "%c%02x", delim, cp[k]);
+ else
+ sbuf_printf(sb, " ");
+ }
+ }
+
+ if ((flags & HD_OMIT_CHARS) == 0) {
+ sbuf_printf(sb, " |");
+ for (j = 0; j < cols; j++) {
+ k = i + j;
+ if (k >= length)
+ sbuf_printf(sb, " ");
+ else if (cp[k] >= ' ' && cp[k] <= '~')
+ sbuf_printf(sb, "%c", cp[k]);
+ else
+ sbuf_printf(sb, ".");
+ }
+ sbuf_printf(sb, "|");
+ }
+ sbuf_printf(sb, "\n");
+ }
+}
+
+#ifdef _KERNEL
+void
+counted_warning(unsigned *counter, const char *msg)
+{
+ struct thread *td;
+ unsigned c;
+
+ for (;;) {
+ c = *counter;
+ if (c == 0)
+ break;
+ if (atomic_cmpset_int(counter, c, c - 1)) {
+ td = curthread;
+ log(LOG_INFO, "pid %d (%s) %s%s\n",
+ td->td_proc->p_pid, td->td_name, msg,
+ c > 1 ? "" : " - not logging anymore");
+ break;
+ }
+ }
+}
+#endif
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/subr_rman.c b/freebsd/sys/kern/subr_rman.c
index 69f8d359..115d5d8d 100644
--- a/freebsd/sys/kern/subr_rman.c
+++ b/freebsd/sys/kern/subr_rman.c
@@ -92,18 +92,17 @@ struct resource_i {
TAILQ_ENTRY(resource_i) r_link;
LIST_ENTRY(resource_i) r_sharelink;
LIST_HEAD(, resource_i) *r_sharehead;
- u_long r_start; /* index of the first entry in this resource */
- u_long r_end; /* index of the last entry (inclusive) */
+ rman_res_t r_start; /* index of the first entry in this resource */
+ rman_res_t r_end; /* index of the last entry (inclusive) */
u_int r_flags;
void *r_virtual; /* virtual address of this resource */
- struct device *r_dev; /* device which has allocated this resource */
+ device_t r_dev; /* device which has allocated this resource */
struct rman *r_rm; /* resource manager from whence this came */
int r_rid; /* optional rid for this resource. */
};
static int rman_debug = 0;
-TUNABLE_INT("debug.rman_debug", &rman_debug);
-SYSCTL_INT(_debug, OID_AUTO, rman_debug, CTLFLAG_RW,
+SYSCTL_INT(_debug, OID_AUTO, rman_debug, CTLFLAG_RWTUN,
&rman_debug, 0, "rman debug");
#define DPRINTF(params) if (rman_debug) printf params
@@ -138,7 +137,7 @@ rman_init(struct rman *rm)
}
if (rm->rm_start == 0 && rm->rm_end == 0)
- rm->rm_end = ~0ul;
+ rm->rm_end = ~0;
if (rm->rm_type == RMAN_UNINIT)
panic("rman_init");
if (rm->rm_type == RMAN_GAUGE)
@@ -157,11 +156,12 @@ rman_init(struct rman *rm)
}
int
-rman_manage_region(struct rman *rm, u_long start, u_long end)
+rman_manage_region(struct rman *rm, rman_res_t start, rman_res_t end)
{
struct resource_i *r, *s, *t;
+ int rv = 0;
- DPRINTF(("rman_manage_region: <%s> request: start %#lx, end %#lx\n",
+ DPRINTF(("rman_manage_region: <%s> request: start %#jx, end %#jx\n",
rm->rm_descr, start, end));
if (start < rm->rm_start || end > rm->rm_end)
return EINVAL;
@@ -176,7 +176,7 @@ rman_manage_region(struct rman *rm, u_long start, u_long end)
/* Skip entries before us. */
TAILQ_FOREACH(s, &rm->rm_list, r_link) {
- if (s->r_end == ULONG_MAX)
+ if (s->r_end == ~0)
break;
if (s->r_end + 1 >= r->r_start)
break;
@@ -187,13 +187,17 @@ rman_manage_region(struct rman *rm, u_long start, u_long end)
TAILQ_INSERT_TAIL(&rm->rm_list, r, r_link);
} else {
/* Check for any overlap with the current region. */
- if (r->r_start <= s->r_end && r->r_end >= s->r_start)
- return EBUSY;
+ if (r->r_start <= s->r_end && r->r_end >= s->r_start) {
+ rv = EBUSY;
+ goto out;
+ }
/* Check for any overlap with the next region. */
t = TAILQ_NEXT(s, r_link);
- if (t && r->r_start <= t->r_end && r->r_end >= t->r_start)
- return EBUSY;
+ if (t && r->r_start <= t->r_end && r->r_end >= t->r_start) {
+ rv = EBUSY;
+ goto out;
+ }
/*
* See if this region can be merged with the next region. If
@@ -224,9 +228,9 @@ rman_manage_region(struct rman *rm, u_long start, u_long end)
TAILQ_INSERT_BEFORE(s, r, r_link);
}
}
-
+out:
mtx_unlock(rm->rm_mtx);
- return 0;
+ return rv;
}
int
@@ -272,7 +276,7 @@ rman_fini(struct rman *rm)
}
int
-rman_first_free_region(struct rman *rm, u_long *start, u_long *end)
+rman_first_free_region(struct rman *rm, rman_res_t *start, rman_res_t *end)
{
struct resource_i *r;
@@ -290,7 +294,7 @@ rman_first_free_region(struct rman *rm, u_long *start, u_long *end)
}
int
-rman_last_free_region(struct rman *rm, u_long *start, u_long *end)
+rman_last_free_region(struct rman *rm, rman_res_t *start, rman_res_t *end)
{
struct resource_i *r;
@@ -309,7 +313,7 @@ rman_last_free_region(struct rman *rm, u_long *start, u_long *end)
/* Shrink or extend one or both ends of an allocated resource. */
int
-rman_adjust_resource(struct resource *rr, u_long start, u_long end)
+rman_adjust_resource(struct resource *rr, rman_res_t start, rman_res_t end)
{
struct resource_i *r, *s, *t, *new;
struct rman *rm;
@@ -432,18 +436,18 @@ rman_adjust_resource(struct resource *rr, u_long start, u_long end)
#define SHARE_TYPE(f) (f & (RF_SHAREABLE | RF_PREFETCHABLE))
struct resource *
-rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end,
- u_long count, u_long bound, u_int flags,
- struct device *dev)
+rman_reserve_resource_bound(struct rman *rm, rman_res_t start, rman_res_t end,
+ rman_res_t count, rman_res_t bound, u_int flags,
+ device_t dev)
{
u_int new_rflags;
struct resource_i *r, *s, *rv;
- u_long rstart, rend, amask, bmask;
+ rman_res_t rstart, rend, amask, bmask;
rv = NULL;
- DPRINTF(("rman_reserve_resource_bound: <%s> request: [%#lx, %#lx], "
- "length %#lx, flags %u, device %s\n", rm->rm_descr, start, end,
+ DPRINTF(("rman_reserve_resource_bound: <%s> request: [%#jx, %#jx], "
+ "length %#jx, flags %x, device %s\n", rm->rm_descr, start, end,
count, flags,
dev == NULL ? "<null>" : device_get_nameunit(dev)));
KASSERT((flags & RF_FIRSTSHARE) == 0,
@@ -452,19 +456,29 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end,
mtx_lock(rm->rm_mtx);
+ r = TAILQ_FIRST(&rm->rm_list);
+ if (r == NULL) {
+ DPRINTF(("NULL list head\n"));
+ } else {
+ DPRINTF(("rman_reserve_resource_bound: trying %#jx <%#jx,%#jx>\n",
+ r->r_end, start, count-1));
+ }
for (r = TAILQ_FIRST(&rm->rm_list);
r && r->r_end < start + count - 1;
- r = TAILQ_NEXT(r, r_link))
+ r = TAILQ_NEXT(r, r_link)) {
;
+ DPRINTF(("rman_reserve_resource_bound: tried %#jx <%#jx,%#jx>\n",
+ r->r_end, start, count-1));
+ }
if (r == NULL) {
DPRINTF(("could not find a region\n"));
goto out;
}
- amask = (1ul << RF_ALIGNMENT(flags)) - 1;
- KASSERT(start <= ULONG_MAX - amask,
- ("start (%#lx) + amask (%#lx) would wrap around", start, amask));
+ amask = (1ull << RF_ALIGNMENT(flags)) - 1;
+ KASSERT(start <= RM_MAX_END - amask,
+ ("start (%#jx) + amask (%#jx) would wrap around", start, amask));
/* If bound is 0, bmask will also be 0 */
bmask = ~(bound - 1);
@@ -472,18 +486,18 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end,
* First try to find an acceptable totally-unshared region.
*/
for (s = r; s; s = TAILQ_NEXT(s, r_link)) {
- DPRINTF(("considering [%#lx, %#lx]\n", s->r_start, s->r_end));
+ DPRINTF(("considering [%#jx, %#jx]\n", s->r_start, s->r_end));
/*
* The resource list is sorted, so there is no point in
* searching further once r_start is too large.
*/
if (s->r_start > end - (count - 1)) {
- DPRINTF(("s->r_start (%#lx) + count - 1> end (%#lx)\n",
+ DPRINTF(("s->r_start (%#jx) + count - 1> end (%#jx)\n",
s->r_start, end));
break;
}
- if (s->r_start > ULONG_MAX - amask) {
- DPRINTF(("s->r_start (%#lx) + amask (%#lx) too large\n",
+ if (s->r_start > RM_MAX_END - amask) {
+ DPRINTF(("s->r_start (%#jx) + amask (%#jx) too large\n",
s->r_start, amask));
break;
}
@@ -491,7 +505,7 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end,
DPRINTF(("region is allocated\n"));
continue;
}
- rstart = ulmax(s->r_start, start);
+ rstart = ummax(s->r_start, start);
/*
* Try to find a region by adjusting to boundary and alignment
* until both conditions are satisfied. This is not an optimal
@@ -503,16 +517,16 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end,
rstart += bound - (rstart & ~bmask);
} while ((rstart & amask) != 0 && rstart < end &&
rstart < s->r_end);
- rend = ulmin(s->r_end, ulmax(rstart + count - 1, end));
+ rend = ummin(s->r_end, ummax(rstart + count - 1, end));
if (rstart > rend) {
DPRINTF(("adjusted start exceeds end\n"));
continue;
}
- DPRINTF(("truncated region: [%#lx, %#lx]; size %#lx (requested %#lx)\n",
+ DPRINTF(("truncated region: [%#jx, %#jx]; size %#jx (requested %#jx)\n",
rstart, rend, (rend - rstart + 1), count));
if ((rend - rstart + 1) >= count) {
- DPRINTF(("candidate region: [%#lx, %#lx], size %#lx\n",
+ DPRINTF(("candidate region: [%#jx, %#jx], size %#jx\n",
rstart, rend, (rend - rstart + 1)));
if ((s->r_end - s->r_start + 1) == count) {
DPRINTF(("candidate region is entire chunk\n"));
@@ -543,7 +557,7 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end,
if (s->r_start < rv->r_start && s->r_end > rv->r_end) {
DPRINTF(("splitting region in three parts: "
- "[%#lx, %#lx]; [%#lx, %#lx]; [%#lx, %#lx]\n",
+ "[%#jx, %#jx]; [%#jx, %#jx]; [%#jx, %#jx]\n",
s->r_start, rv->r_start - 1,
rv->r_start, rv->r_end,
rv->r_end + 1, s->r_end));
@@ -639,8 +653,8 @@ out:
}
struct resource *
-rman_reserve_resource(struct rman *rm, u_long start, u_long end, u_long count,
- u_int flags, struct device *dev)
+rman_reserve_resource(struct rman *rm, rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags, device_t dev)
{
return (rman_reserve_resource_bound(rm, start, end, count, 0, flags,
@@ -801,13 +815,13 @@ rman_make_alignment_flags(uint32_t size)
}
void
-rman_set_start(struct resource *r, u_long start)
+rman_set_start(struct resource *r, rman_res_t start)
{
r->__r_i->r_start = start;
}
-u_long
+rman_res_t
rman_get_start(struct resource *r)
{
@@ -815,20 +829,20 @@ rman_get_start(struct resource *r)
}
void
-rman_set_end(struct resource *r, u_long end)
+rman_set_end(struct resource *r, rman_res_t end)
{
r->__r_i->r_end = end;
}
-u_long
+rman_res_t
rman_get_end(struct resource *r)
{
return (r->__r_i->r_end);
}
-u_long
+rman_res_t
rman_get_size(struct resource *r)
{
@@ -885,6 +899,27 @@ rman_get_bushandle(struct resource *r)
}
void
+rman_set_mapping(struct resource *r, struct resource_map *map)
+{
+
+ KASSERT(rman_get_size(r) == map->r_size,
+ ("rman_set_mapping: size mismatch"));
+ rman_set_bustag(r, map->r_bustag);
+ rman_set_bushandle(r, map->r_bushandle);
+ rman_set_virtual(r, map->r_vaddr);
+}
+
+void
+rman_get_mapping(struct resource *r, struct resource_map *map)
+{
+
+ map->r_bustag = rman_get_bustag(r);
+ map->r_bushandle = rman_get_bushandle(r);
+ map->r_size = rman_get_size(r);
+ map->r_vaddr = rman_get_virtual(r);
+}
+
+void
rman_set_rid(struct resource *r, int rid)
{
@@ -899,13 +934,13 @@ rman_get_rid(struct resource *r)
}
void
-rman_set_device(struct resource *r, struct device *dev)
+rman_set_device(struct resource *r, device_t dev)
{
r->__r_i->r_dev = dev;
}
-struct device *
+device_t
rman_get_device(struct resource *r)
{
@@ -1030,8 +1065,8 @@ dump_rman_header(struct rman *rm)
if (db_pager_quit)
return;
- db_printf("rman %p: %s (0x%lx-0x%lx full range)\n",
- rm, rm->rm_descr, rm->rm_start, rm->rm_end);
+ db_printf("rman %p: %s (0x%jx-0x%jx full range)\n",
+ rm, rm->rm_descr, (rman_res_t)rm->rm_start, (rman_res_t)rm->rm_end);
}
static void
@@ -1049,7 +1084,8 @@ dump_rman(struct rman *rm)
devname = "nomatch";
} else
devname = NULL;
- db_printf(" 0x%lx-0x%lx ", r->r_start, r->r_end);
+ db_printf(" 0x%jx-0x%jx (RID=%d) ",
+ r->r_start, r->r_end, r->r_rid);
if (devname != NULL)
db_printf("(%s)\n", devname);
else
diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c
index e61b0844..73a9b383 100644
--- a/freebsd/sys/kern/subr_sbuf.c
+++ b/freebsd/sys/kern/subr_sbuf.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ctype.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/uio.h>
@@ -44,6 +45,7 @@ __FBSDID("$FreeBSD$");
#else /* _KERNEL */
#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -54,11 +56,11 @@ __FBSDID("$FreeBSD$");
#ifdef _KERNEL
static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
-#define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK)
+#define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK|M_ZERO)
#define SBFREE(buf) free(buf, M_SBUF)
#else /* _KERNEL */
#define KASSERT(e, m)
-#define SBMALLOC(size) malloc(size)
+#define SBMALLOC(size) calloc(1, size)
#define SBFREE(buf) free(buf)
#endif /* _KERNEL */
@@ -72,6 +74,7 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_FREESPACE(s) ((s)->s_size - ((s)->s_len + 1))
#define SBUF_CANEXTEND(s) ((s)->s_flags & SBUF_AUTOEXTEND)
#define SBUF_ISSECTION(s) ((s)->s_flags & SBUF_INSECTION)
+#define SBUF_NULINCLUDED(s) ((s)->s_flags & SBUF_INCLUDENUL)
/*
* Set / clear flags
@@ -79,6 +82,7 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_SETFLAG(s, f) do { (s)->s_flags |= (f); } while (0)
#define SBUF_CLEARFLAG(s, f) do { (s)->s_flags &= ~(f); } while (0)
+#define SBUF_MINSIZE 2 /* Min is 1 byte + nulterm. */
#define SBUF_MINEXTENDSIZE 16 /* Should be power of 2. */
#ifdef PAGE_SIZE
@@ -102,9 +106,15 @@ _assert_sbuf_integrity(const char *fun, struct sbuf *s)
("%s called with a NULL sbuf pointer", fun));
KASSERT(s->s_buf != NULL,
("%s called with uninitialized or corrupt sbuf", fun));
- KASSERT(s->s_len < s->s_size,
- ("wrote past end of sbuf (%jd >= %jd)",
- (intmax_t)s->s_len, (intmax_t)s->s_size));
+ if (SBUF_ISFINISHED(s) && SBUF_NULINCLUDED(s)) {
+ KASSERT(s->s_len <= s->s_size,
+ ("wrote past end of sbuf (%jd >= %jd)",
+ (intmax_t)s->s_len, (intmax_t)s->s_size));
+ } else {
+ KASSERT(s->s_len < s->s_size,
+ ("wrote past end of sbuf (%jd >= %jd)",
+ (intmax_t)s->s_len, (intmax_t)s->s_size));
+ }
}
static void
@@ -187,8 +197,9 @@ sbuf_newbuf(struct sbuf *s, char *buf, int length, int flags)
s->s_buf = buf;
if ((s->s_flags & SBUF_AUTOEXTEND) == 0) {
- KASSERT(s->s_size >= 0,
- ("attempt to create a too small sbuf"));
+ KASSERT(s->s_size >= SBUF_MINSIZE,
+ ("attempt to create an sbuf smaller than %d bytes",
+ SBUF_MINSIZE));
}
if (s->s_buf != NULL)
@@ -264,6 +275,28 @@ sbuf_uionew(struct sbuf *s, struct uio *uio, int *error)
}
#endif
+int
+sbuf_get_flags(struct sbuf *s)
+{
+
+ return (s->s_flags & SBUF_USRFLAGMSK);
+}
+
+void
+sbuf_clear_flags(struct sbuf *s, int flags)
+{
+
+ s->s_flags &= ~(flags & SBUF_USRFLAGMSK);
+}
+
+void
+sbuf_set_flags(struct sbuf *s, int flags)
+{
+
+
+ s->s_flags |= (flags & SBUF_USRFLAGMSK);
+}
+
/*
* Clear an sbuf and reset its position.
*/
@@ -354,34 +387,51 @@ sbuf_drain(struct sbuf *s)
}
/*
- * Append a byte to an sbuf. This is the core function for appending
+ * Append bytes to an sbuf. This is the core function for appending
* to an sbuf and is the main place that deals with extending the
* buffer and marking overflow.
*/
static void
-sbuf_put_byte(struct sbuf *s, int c)
+sbuf_put_bytes(struct sbuf *s, const char *buf, size_t len)
{
+ size_t n;
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
if (s->s_error != 0)
return;
- if (SBUF_FREESPACE(s) <= 0) {
- /*
- * If there is a drain, use it, otherwise extend the
- * buffer.
- */
- if (s->s_drain_func != NULL)
- (void)sbuf_drain(s);
- else if (sbuf_extend(s, 1) < 0)
- s->s_error = ENOMEM;
- if (s->s_error != 0)
- return;
+ while (len > 0) {
+ if (SBUF_FREESPACE(s) <= 0) {
+ /*
+ * If there is a drain, use it, otherwise extend the
+ * buffer.
+ */
+ if (s->s_drain_func != NULL)
+ (void)sbuf_drain(s);
+ else if (sbuf_extend(s, len > INT_MAX ? INT_MAX : len)
+ < 0)
+ s->s_error = ENOMEM;
+ if (s->s_error != 0)
+ return;
+ }
+ n = SBUF_FREESPACE(s);
+ if (len < n)
+ n = len;
+ memcpy(&s->s_buf[s->s_len], buf, n);
+ s->s_len += n;
+ if (SBUF_ISSECTION(s))
+ s->s_sect_len += n;
+ len -= n;
+ buf += n;
}
- s->s_buf[s->s_len++] = c;
- if (SBUF_ISSECTION(s))
- s->s_sect_len++;
+}
+
+static void
+sbuf_put_byte(struct sbuf *s, char c)
+{
+
+ sbuf_put_bytes(s, &c, 1);
}
/*
@@ -390,19 +440,10 @@ sbuf_put_byte(struct sbuf *s, int c)
int
sbuf_bcat(struct sbuf *s, const void *buf, size_t len)
{
- const char *str = buf;
- const char *end = str + len;
-
- assert_sbuf_integrity(s);
- assert_sbuf_state(s, 0);
+ sbuf_put_bytes(s, buf, len);
if (s->s_error != 0)
return (-1);
- for (; str < end; str++) {
- sbuf_put_byte(s, *str);
- if (s->s_error != 0)
- return (-1);
- }
return (0);
}
@@ -456,18 +497,12 @@ sbuf_bcpy(struct sbuf *s, const void *buf, size_t len)
int
sbuf_cat(struct sbuf *s, const char *str)
{
+ size_t n;
- assert_sbuf_integrity(s);
- assert_sbuf_state(s, 0);
-
+ n = strlen(str);
+ sbuf_put_bytes(s, str, n);
if (s->s_error != 0)
return (-1);
-
- while (*str != '\0') {
- sbuf_put_byte(s, *str++);
- if (s->s_error != 0)
- return (-1);
- }
return (0);
}
@@ -590,6 +625,10 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
va_copy(ap_copy, ap);
len = vsnprintf(&s->s_buf[s->s_len], SBUF_FREESPACE(s) + 1,
fmt, ap_copy);
+ if (len < 0) {
+ s->s_error = errno;
+ return (-1);
+ }
va_end(ap_copy);
if (SBUF_FREESPACE(s) >= len)
@@ -699,11 +738,13 @@ sbuf_finish(struct sbuf *s)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
+ s->s_buf[s->s_len] = '\0';
+ if (SBUF_NULINCLUDED(s))
+ s->s_len++;
if (s->s_drain_func != NULL) {
while (s->s_len > 0 && s->s_error == 0)
s->s_error = sbuf_drain(s);
}
- s->s_buf[s->s_len] = '\0';
SBUF_SETFLAG(s, SBUF_FINISHED);
#ifdef _KERNEL
return (s->s_error);
@@ -745,6 +786,10 @@ sbuf_len(struct sbuf *s)
if (s->s_error != 0)
return (-1);
+
+ /* If finished, nulterm is already in len, else add one. */
+ if (SBUF_NULINCLUDED(s) && !SBUF_ISFINISHED(s))
+ return (s->s_len + 1);
return (s->s_len);
}
diff --git a/freebsd/sys/kern/subr_sleepqueue.c b/freebsd/sys/kern/subr_sleepqueue.c
index 28eb10b0..be8e7721 100644
--- a/freebsd/sys/kern/subr_sleepqueue.c
+++ b/freebsd/sys/kern/subr_sleepqueue.c
@@ -64,8 +64,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_sleepqueue_profiling.h>
#include <rtems/bsd/local/opt_ddb.h>
-#include <rtems/bsd/local/opt_kdtrace.h>
#include <rtems/bsd/local/opt_sched.h>
+#include <rtems/bsd/local/opt_stack.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -79,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/sleepqueue.h>
+#include <sys/stack.h>
#include <sys/sysctl.h>
#include <vm/uma.h>
@@ -93,17 +94,16 @@ __FBSDID("$FreeBSD$");
#include <rtems/score/watchdogimpl.h>
#endif /* __rtems__ */
+
/*
- * Constants for the hash table of sleep queue chains. These constants are
- * the same ones that 4BSD (and possibly earlier versions of BSD) used.
- * Basically, we ignore the lower 8 bits of the address since most wait
- * channel pointers are aligned and only look at the next 7 bits for the
- * hash. SC_TABLESIZE must be a power of two for SC_MASK to work properly.
+ * Constants for the hash table of sleep queue chains.
+ * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
*/
-#define SC_TABLESIZE 128 /* Must be power of 2. */
+#define SC_TABLESIZE 256 /* Must be power of 2. */
#define SC_MASK (SC_TABLESIZE - 1)
#define SC_SHIFT 8
-#define SC_HASH(wc) (((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
+#define SC_HASH(wc) ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
+ SC_MASK)
#define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)]
#define NR_SLEEPQS 2
/*
@@ -184,24 +184,20 @@ SDT_PROBE_DECLARE(sched, , , sleep);
SDT_PROBE_DECLARE(sched, , , wakeup);
/*
- * Early initialization of sleep queues that is called from the sleepinit()
- * SYSINIT.
+ * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
+ * Note that it must happen after sleepinit() has been fully executed, so
+ * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
*/
-void
-init_sleepqueues(void)
-{
#ifdef SLEEPQUEUE_PROFILING
- struct sysctl_oid *chain_oid;
+static void
+init_sleepqueue_profiling(void)
+{
char chain_name[10];
-#endif
- int i;
+ struct sysctl_oid *chain_oid;
+ u_int i;
for (i = 0; i < SC_TABLESIZE; i++) {
- LIST_INIT(&sleepq_chains[i].sc_queues);
- mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
- MTX_SPIN | MTX_RECURSE);
-#ifdef SLEEPQUEUE_PROFILING
- snprintf(chain_name, sizeof(chain_name), "%d", i);
+ snprintf(chain_name, sizeof(chain_name), "%u", i);
chain_oid = SYSCTL_ADD_NODE(NULL,
SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
@@ -210,7 +206,26 @@ init_sleepqueues(void)
SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
"max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
NULL);
+ }
+}
+
+SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
+ init_sleepqueue_profiling, NULL);
#endif
+
+/*
+ * Early initialization of sleep queues that is called from the sleepinit()
+ * SYSINIT.
+ */
+void
+init_sleepqueues(void)
+{
+ int i;
+
+ for (i = 0; i < SC_TABLESIZE; i++) {
+ LIST_INIT(&sleepq_chains[i].sc_queues);
+ mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
+ MTX_SPIN | MTX_RECURSE);
}
sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
#ifdef INVARIANTS
@@ -316,8 +331,9 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
/* If this thread is not allowed to sleep, die a horrible death. */
#ifndef __rtems__
- KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
- ("Trying sleep, but thread marked as sleeping prohibited"));
+ KASSERT(td->td_no_sleeping == 0,
+ ("%s: td %p to sleep on wchan %p with sleeping prohibited",
+ __func__, td, wchan));
#endif /* __rtems__ */
/* Look up the sleep queue associated with the wait channel 'wchan'. */
@@ -404,11 +420,13 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
* sleep queue after timo ticks if the thread has not already been awakened.
*/
void
-sleepq_set_timeout(void *wchan, int timo)
+sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
+ int flags)
{
#ifndef __rtems__
struct sleepqueue_chain *sc;
struct thread *td;
+ sbintime_t pr1;
td = curthread;
sc = SC_LOOKUP(wchan);
@@ -416,7 +434,16 @@ sleepq_set_timeout(void *wchan, int timo)
MPASS(TD_ON_SLEEPQ(td));
MPASS(td->td_sleepqueue == NULL);
MPASS(wchan != NULL);
- callout_reset_curcpu(&td->td_slpcallout, timo, sleepq_timeout, td);
+ if (cold)
+ panic("timed sleep before timers are working");
+ KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
+ td->td_tid, td, (uintmax_t)td->td_sleeptimo));
+ thread_lock(td);
+ callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
+ thread_unlock(td);
+ callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
+ sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
+ C_DIRECT_EXEC);
#else /* __rtems__ */
Per_CPU_Control *cpu_self;
Thread_Control *executing;
@@ -425,8 +452,15 @@ sleepq_set_timeout(void *wchan, int timo)
executing = _Per_CPU_Get_executing(cpu_self);
BSD_ASSERT(_Watchdog_Get_state(&executing->Timer.Watchdog) ==
WATCHDOG_INACTIVE);
- _Thread_Timer_insert_relative(executing, cpu_self, sleepq_timeout,
- (Watchdog_Interval)timo);
+
+ if ((flags & C_ABSOLUTE) == 0) {
+ _Thread_Timer_insert_relative(executing, cpu_self, sleepq_timeout,
+ (Watchdog_Interval)((sbt + tick_sbt - 1) / tick_sbt));
+ } else {
+ _Thread_Timer_insert_absolute(executing, cpu_self, sleepq_timeout,
+ _Watchdog_Ticks_from_sbintime(sbt));
+ }
+
_Thread_Dispatch_direct(cpu_self);
#endif /* __rtems__ */
}
@@ -462,7 +496,7 @@ sleepq_catch_signals(void *wchan, int pri)
struct thread *td;
struct proc *p;
struct sigacts *ps;
- int sig, ret, stop_allowed;
+ int sig, ret;
td = curthread;
p = curproc;
@@ -486,8 +520,6 @@ sleepq_catch_signals(void *wchan, int pri)
sleepq_switch(wchan, pri);
return (0);
}
- stop_allowed = (td->td_flags & TDF_SBDRY) ? SIG_STOP_NOT_ALLOWED :
- SIG_STOP_ALLOWED;
thread_unlock(td);
mtx_unlock_spin(&sc->sc_lock);
CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
@@ -495,8 +527,17 @@ sleepq_catch_signals(void *wchan, int pri)
PROC_LOCK(p);
ps = p->p_sigacts;
mtx_lock(&ps->ps_mtx);
- sig = cursig(td, stop_allowed);
- if (sig == 0) {
+ sig = cursig(td);
+ if (sig == -1) {
+ mtx_unlock(&ps->ps_mtx);
+ KASSERT((td->td_flags & TDF_SBDRY) != 0, ("lost TDF_SBDRY"));
+ KASSERT(TD_SBDRY_INTR(td),
+ ("lost TDF_SERESTART of TDF_SEINTR"));
+ KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) !=
+ (TDF_SEINTR | TDF_SERESTART),
+ ("both TDF_SEINTR and TDF_SERESTART"));
+ ret = TD_SBDRY_ERRNO(td);
+ } else if (sig == 0) {
mtx_unlock(&ps->ps_mtx);
ret = thread_suspend_check(1);
MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
@@ -703,38 +744,39 @@ static int
sleepq_check_timeout(void)
{
struct thread *td;
+ int res;
td = curthread;
#ifndef __rtems__
THREAD_LOCK_ASSERT(td, MA_OWNED);
/*
- * If TDF_TIMEOUT is set, we timed out.
+ * If TDF_TIMEOUT is set, we timed out. But recheck
+ * td_sleeptimo anyway.
*/
- if (td->td_flags & TDF_TIMEOUT) {
- td->td_flags &= ~TDF_TIMEOUT;
- return (EWOULDBLOCK);
+ res = 0;
+ if (td->td_sleeptimo != 0) {
+ if (td->td_sleeptimo <= sbinuptime())
+ res = EWOULDBLOCK;
+ td->td_sleeptimo = 0;
}
-
- /*
- * If TDF_TIMOFAIL is set, the timeout ran after we had
- * already been woken up.
- */
- if (td->td_flags & TDF_TIMOFAIL)
- td->td_flags &= ~TDF_TIMOFAIL;
-
- /*
- * If callout_stop() fails, then the timeout is running on
- * another CPU, so synchronize with it to avoid having it
- * accidentally wake up a subsequent sleep.
- */
- else if (callout_stop(&td->td_slpcallout) == 0) {
- td->td_flags |= TDF_TIMEOUT;
- TD_SET_SLEEPING(td);
- mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
- }
- return (0);
+ if (td->td_flags & TDF_TIMEOUT)
+ td->td_flags &= ~TDF_TIMEOUT;
+ else
+ /*
+ * We ignore the situation where timeout subsystem was
+ * unable to stop our callout. The struct thread is
+ * type-stable, the callout will use the correct
+ * memory when running. The checks of the
+ * td_sleeptimo value in this function and in
+ * sleepq_timeout() ensure that the thread does not
+ * get spurious wakeups, even if the callout was reset
+ * or thread reused.
+ */
+ callout_stop(&td->td_slpcallout);
+ return (res);
#else /* __rtems__ */
+ (void)res;
return (td->td_sq_state);
#endif /* __rtems__ */
}
@@ -1069,7 +1111,7 @@ int
sleepq_broadcast(void *wchan, int flags, int pri, int queue)
{
struct sleepqueue *sq;
- struct thread *td, *tdn;
+ struct thread *td;
int wakeup_swapper;
CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
@@ -1083,10 +1125,9 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue)
/* Resume all blocked threads on the sleep queue. */
wakeup_swapper = 0;
- TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
+ while ((td = TAILQ_FIRST(&sq->sq_blocked[queue])) != NULL) {
thread_lock(td);
- if (sleepq_resume_thread(sq, td, pri))
- wakeup_swapper = 1;
+ wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
thread_unlock(td);
}
return (wakeup_swapper);
@@ -1111,12 +1152,17 @@ sleepq_timeout(void *arg)
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
- /*
- * First, see if the thread is asleep and get the wait channel if
- * it is.
- */
thread_lock(td);
- if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
+
+ if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
+ /*
+ * The thread does not want a timeout (yet).
+ */
+ } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
+ /*
+ * See if the thread is asleep and get the wait
+ * channel if it is.
+ */
wchan = td->td_wchan;
sc = SC_LOOKUP(wchan);
THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
@@ -1124,40 +1170,16 @@ sleepq_timeout(void *arg)
MPASS(sq != NULL);
td->td_flags |= TDF_TIMEOUT;
wakeup_swapper = sleepq_resume_thread(sq, td, 0);
- thread_unlock(td);
- if (wakeup_swapper)
- kick_proc0();
- return;
- }
-
- /*
- * If the thread is on the SLEEPQ but isn't sleeping yet, it
- * can either be on another CPU in between sleepq_add() and
- * one of the sleepq_*wait*() routines or it can be in
- * sleepq_catch_signals().
- */
- if (TD_ON_SLEEPQ(td)) {
+ } else if (TD_ON_SLEEPQ(td)) {
+ /*
+ * If the thread is on the SLEEPQ but isn't sleeping
+ * yet, it can either be on another CPU in between
+ * sleepq_add() and one of the sleepq_*wait*()
+ * routines or it can be in sleepq_catch_signals().
+ */
td->td_flags |= TDF_TIMEOUT;
- thread_unlock(td);
- return;
}
- /*
- * Now check for the edge cases. First, if TDF_TIMEOUT is set,
- * then the other thread has already yielded to us, so clear
- * the flag and resume it. If TDF_TIMEOUT is not set, then the
- * we know that the other thread is not on a sleep queue, but it
- * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
- * to let it know that the timeout has already run and doesn't
- * need to be canceled.
- */
- if (td->td_flags & TDF_TIMEOUT) {
- MPASS(TD_IS_SLEEPING(td));
- td->td_flags &= ~TDF_TIMEOUT;
- TD_CLR_SLEEPING(td);
- wakeup_swapper = setrunnable(td);
- } else
- td->td_flags |= TDF_TIMOFAIL;
thread_unlock(td);
if (wakeup_swapper)
kick_proc0();
@@ -1290,6 +1312,122 @@ sleepq_abort(struct thread *td, int intrval)
}
#endif /* __rtems__ */
+/*
+ * Prints the stacks of all threads presently sleeping on wchan/queue to
+ * the sbuf sb. Sets count_stacks_printed to the number of stacks actually
+ * printed. Typically, this will equal the number of threads sleeping on the
+ * queue, but may be less if sb overflowed before all stacks were printed.
+ */
+#ifdef STACK
+int
+sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
+ int *count_stacks_printed)
+{
+ struct thread *td, *td_next;
+ struct sleepqueue *sq;
+ struct stack **st;
+ struct sbuf **td_infos;
+ int i, stack_idx, error, stacks_to_allocate;
+ bool finished, partial_print;
+
+ error = 0;
+ finished = false;
+ partial_print = false;
+
+ KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+ MPASS((queue >= 0) && (queue < NR_SLEEPQS));
+
+ stacks_to_allocate = 10;
+ for (i = 0; i < 3 && !finished ; i++) {
+ /* We cannot malloc while holding the queue's spinlock, so
+ * we do our mallocs now, and hope it is enough. If it
+ * isn't, we will free these, drop the lock, malloc more,
+ * and try again, up to a point. After that point we will
+ * give up and report ENOMEM. We also cannot write to sb
+ * during this time since the client may have set the
+ * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
+ * malloc as we print to it. So we defer actually printing
+ * to sb until after we drop the spinlock.
+ */
+
+ /* Where we will store the stacks. */
+ st = malloc(sizeof(struct stack *) * stacks_to_allocate,
+ M_TEMP, M_WAITOK);
+ for (stack_idx = 0; stack_idx < stacks_to_allocate;
+ stack_idx++)
+ st[stack_idx] = stack_create();
+
+ /* Where we will store the td name, tid, etc. */
+ td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
+ M_TEMP, M_WAITOK);
+ for (stack_idx = 0; stack_idx < stacks_to_allocate;
+ stack_idx++)
+ td_infos[stack_idx] = sbuf_new(NULL, NULL,
+ MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
+ SBUF_FIXEDLEN);
+
+ sleepq_lock(wchan);
+ sq = sleepq_lookup(wchan);
+ if (sq == NULL) {
+ /* This sleepq does not exist; exit and return ENOENT. */
+ error = ENOENT;
+ finished = true;
+ sleepq_release(wchan);
+ goto loop_end;
+ }
+
+ stack_idx = 0;
+ /* Save thread info */
+ TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
+ td_next) {
+ if (stack_idx >= stacks_to_allocate)
+ goto loop_end;
+
+ /* Note the td_lock is equal to the sleepq_lock here. */
+ stack_save_td(st[stack_idx], td);
+
+ sbuf_printf(td_infos[stack_idx], "%d: %s %p",
+ td->td_tid, td->td_name, td);
+
+ ++stack_idx;
+ }
+
+ finished = true;
+ sleepq_release(wchan);
+
+ /* Print the stacks */
+ for (i = 0; i < stack_idx; i++) {
+ sbuf_finish(td_infos[i]);
+ sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
+ stack_sbuf_print(sb, st[i]);
+ sbuf_printf(sb, "\n");
+
+ error = sbuf_error(sb);
+ if (error == 0)
+ *count_stacks_printed = stack_idx;
+ }
+
+loop_end:
+ if (!finished)
+ sleepq_release(wchan);
+ for (stack_idx = 0; stack_idx < stacks_to_allocate;
+ stack_idx++)
+ stack_destroy(st[stack_idx]);
+ for (stack_idx = 0; stack_idx < stacks_to_allocate;
+ stack_idx++)
+ sbuf_delete(td_infos[stack_idx]);
+ free(st, M_TEMP);
+ free(td_infos, M_TEMP);
+ stacks_to_allocate *= 10;
+ }
+
+ if (!finished && error == 0)
+ error = ENOMEM;
+
+ return (error);
+}
+#endif
+
#ifdef SLEEPQUEUE_PROFILING
#define SLEEPQ_PROF_LOCATIONS 1024
#define SLEEPQ_SBUFSIZE 512
diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c
index 99640026..8580e8fc 100644
--- a/freebsd/sys/kern/subr_taskqueue.c
+++ b/freebsd/sys/kern/subr_taskqueue.c
@@ -32,15 +32,18 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <rtems/bsd/sys/cpuset.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
+#include <sys/libkern.h>
#include <sys/limits.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sched.h>
+#include <sys/smp.h>
#include <sys/taskqueue.h>
#include <rtems/bsd/sys/unistd.h>
#include <machine/stdarg.h>
@@ -50,16 +53,24 @@ static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues");
static void *taskqueue_giant_ih;
#endif /* __rtems__ */
static void *taskqueue_ih;
+static void taskqueue_fast_enqueue(void *);
+static void taskqueue_swi_enqueue(void *);
+#ifndef __rtems__
+static void taskqueue_swi_giant_enqueue(void *);
+#endif /* __rtems__ */
struct taskqueue_busy {
struct task *tb_running;
TAILQ_ENTRY(taskqueue_busy) tb_link;
};
+struct task * const TB_DRAIN_WAITER = (struct task *)0x1;
+
struct taskqueue {
STAILQ_HEAD(, task) tq_queue;
taskqueue_enqueue_fn tq_enqueue;
void *tq_context;
+ char *tq_name;
TAILQ_HEAD(, taskqueue_busy) tq_active;
struct mtx tq_mutex;
struct thread **tq_threads;
@@ -69,11 +80,13 @@ struct taskqueue {
#endif /* __rtems__ */
int tq_flags;
int tq_callouts;
+ taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
+ void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
};
#define TQ_FLAGS_ACTIVE (1 << 0)
#define TQ_FLAGS_BLOCKED (1 << 1)
-#define TQ_FLAGS_PENDING (1 << 2)
+#define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2)
#define DT_CALLOUT_ARMED (1 << 0)
@@ -85,7 +98,15 @@ struct taskqueue {
else \
mtx_lock(&(tq)->tq_mutex); \
} while (0)
+#else /* __rtems__ */
+#define TQ_LOCK(tq) \
+ do { \
+ mtx_lock(&(tq)->tq_mutex); \
+ } while (0)
+#endif /* __rtems__ */
+#define TQ_ASSERT_LOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_OWNED)
+#ifndef __rtems__
#define TQ_UNLOCK(tq) \
do { \
if ((tq)->tq_spin) \
@@ -94,16 +115,12 @@ struct taskqueue {
mtx_unlock(&(tq)->tq_mutex); \
} while (0)
#else /* __rtems__ */
-#define TQ_LOCK(tq) \
- do { \
- mtx_lock(&(tq)->tq_mutex); \
- } while (0)
-
#define TQ_UNLOCK(tq) \
do { \
mtx_unlock(&(tq)->tq_mutex); \
} while (0)
#endif /* __rtems__ */
+#define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
void
_timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task,
@@ -111,7 +128,8 @@ _timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task,
{
TASK_INIT(&timeout_task->t, priority, func, context);
- callout_init_mtx(&timeout_task->c, &queue->tq_mutex, 0);
+ callout_init_mtx(&timeout_task->c, &queue->tq_mutex,
+ CALLOUT_RETURNUNLOCKED);
timeout_task->q = queue;
timeout_task->f = 0;
}
@@ -128,20 +146,30 @@ TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
}
static struct taskqueue *
-_taskqueue_create(const char *name __unused, int mflags,
+_taskqueue_create(const char *name, int mflags,
taskqueue_enqueue_fn enqueue, void *context,
- int mtxflags, const char *mtxname)
+ int mtxflags, const char *mtxname __unused)
{
struct taskqueue *queue;
+ char *tq_name;
+
+ tq_name = malloc(TASKQUEUE_NAMELEN, M_TASKQUEUE, mflags | M_ZERO);
+ if (tq_name == NULL)
+ return (NULL);
queue = malloc(sizeof(struct taskqueue), M_TASKQUEUE, mflags | M_ZERO);
- if (!queue)
- return NULL;
+ if (queue == NULL) {
+ free(tq_name, M_TASKQUEUE);
+ return (NULL);
+ }
+
+ snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
STAILQ_INIT(&queue->tq_queue);
TAILQ_INIT(&queue->tq_active);
queue->tq_enqueue = enqueue;
queue->tq_context = context;
+ queue->tq_name = tq_name;
#ifndef __rtems__
queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
#else /* __rtems__ */
@@ -151,17 +179,42 @@ _taskqueue_create(const char *name __unused, int mflags,
*/
#endif /* __rtems__ */
queue->tq_flags |= TQ_FLAGS_ACTIVE;
- mtx_init(&queue->tq_mutex, mtxname, NULL, mtxflags);
+ if (enqueue == taskqueue_fast_enqueue ||
+ enqueue == taskqueue_swi_enqueue ||
+#ifndef __rtems__
+ enqueue == taskqueue_swi_giant_enqueue ||
+#endif /* __rtems__ */
+ enqueue == taskqueue_thread_enqueue)
+ queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
+ mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
- return queue;
+ return (queue);
}
struct taskqueue *
taskqueue_create(const char *name, int mflags,
taskqueue_enqueue_fn enqueue, void *context)
{
+
return _taskqueue_create(name, mflags, enqueue, context,
- MTX_DEF, "taskqueue");
+ MTX_DEF, name);
+}
+
+void
+taskqueue_set_callback(struct taskqueue *queue,
+ enum taskqueue_callback_type cb_type, taskqueue_callback_fn callback,
+ void *context)
+{
+
+ KASSERT(((cb_type >= TASKQUEUE_CALLBACK_TYPE_MIN) &&
+ (cb_type <= TASKQUEUE_CALLBACK_TYPE_MAX)),
+ ("Callback type %d not valid, must be %d-%d", cb_type,
+ TASKQUEUE_CALLBACK_TYPE_MIN, TASKQUEUE_CALLBACK_TYPE_MAX));
+ KASSERT((queue->tq_callbacks[cb_type] == NULL),
+ ("Re-initialization of taskqueue callback?"));
+
+ queue->tq_callbacks[cb_type] = callback;
+ queue->tq_cb_contexts[cb_type] = context;
}
/*
@@ -188,6 +241,7 @@ taskqueue_free(struct taskqueue *queue)
KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
mtx_destroy(&queue->tq_mutex);
free(queue->tq_threads, M_TASKQUEUE);
+ free(queue->tq_name, M_TASKQUEUE);
free(queue, M_TASKQUEUE);
}
@@ -197,12 +251,14 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
struct task *ins;
struct task *prev;
+ KASSERT(task->ta_func != NULL, ("enqueueing task with NULL func"));
/*
* Count multiple enqueues.
*/
if (task->ta_pending) {
if (task->ta_pending < USHRT_MAX)
task->ta_pending++;
+ TQ_UNLOCK(queue);
return (0);
}
@@ -226,13 +282,17 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
}
task->ta_pending = 1;
+ if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) != 0)
+ TQ_UNLOCK(queue);
if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
queue->tq_enqueue(queue->tq_context);
- else
- queue->tq_flags |= TQ_FLAGS_PENDING;
+ if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) == 0)
+ TQ_UNLOCK(queue);
+ /* Return with lock released. */
return (0);
}
+
int
taskqueue_enqueue(struct taskqueue *queue, struct task *task)
{
@@ -240,7 +300,7 @@ taskqueue_enqueue(struct taskqueue *queue, struct task *task)
TQ_LOCK(queue);
res = taskqueue_enqueue_locked(queue, task);
- TQ_UNLOCK(queue);
+ /* The lock is released inside. */
return (res);
}
@@ -257,6 +317,7 @@ taskqueue_timeout_func(void *arg)
timeout_task->f &= ~DT_CALLOUT_ARMED;
queue->tq_callouts--;
taskqueue_enqueue_locked(timeout_task->q, &timeout_task->t);
+ /* The lock is released inside. */
}
int
@@ -275,6 +336,7 @@ taskqueue_enqueue_timeout(struct taskqueue *queue,
res = timeout_task->t.ta_pending;
if (ticks == 0) {
taskqueue_enqueue_locked(queue, &timeout_task->t);
+ /* The lock is released inside. */
} else {
if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
res++;
@@ -288,18 +350,87 @@ taskqueue_enqueue_timeout(struct taskqueue *queue,
callout_reset(&timeout_task->c, ticks,
taskqueue_timeout_func, timeout_task);
}
+ TQ_UNLOCK(queue);
}
- TQ_UNLOCK(queue);
return (res);
}
static void
-taskqueue_drain_running(struct taskqueue *queue)
+taskqueue_task_nop_fn(void *context, int pending)
+{
+}
+
+/*
+ * Block until all currently queued tasks in this taskqueue
+ * have begun execution. Tasks queued during execution of
+ * this function are ignored.
+ */
+static void
+taskqueue_drain_tq_queue(struct taskqueue *queue)
+{
+ struct task t_barrier;
+
+ if (STAILQ_EMPTY(&queue->tq_queue))
+ return;
+
+ /*
+ * Enqueue our barrier after all current tasks, but with
+ * the highest priority so that newly queued tasks cannot
+ * pass it. Because of the high priority, we can not use
+ * taskqueue_enqueue_locked directly (which drops the lock
+ * anyway) so just insert it at tail while we have the
+ * queue lock.
+ */
+ TASK_INIT(&t_barrier, USHRT_MAX, taskqueue_task_nop_fn, &t_barrier);
+ STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
+ t_barrier.ta_pending = 1;
+
+ /*
+ * Once the barrier has executed, all previously queued tasks
+ * have completed or are currently executing.
+ */
+ while (t_barrier.ta_pending != 0)
+ TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
+}
+
+/*
+ * Block until all currently executing tasks for this taskqueue
+ * complete. Tasks that begin execution during the execution
+ * of this function are ignored.
+ */
+static void
+taskqueue_drain_tq_active(struct taskqueue *queue)
{
+ struct taskqueue_busy tb_marker, *tb_first;
+
+ if (TAILQ_EMPTY(&queue->tq_active))
+ return;
+
+ /* Block taskq_terminate().*/
+ queue->tq_callouts++;
+
+ /*
+ * Wait for all currently executing taskqueue threads
+ * to go idle.
+ */
+ tb_marker.tb_running = TB_DRAIN_WAITER;
+ TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
+ while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
+ TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
+ TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
+
+ /*
+ * Wakeup any other drain waiter that happened to queue up
+ * without any intervening active thread.
+ */
+ tb_first = TAILQ_FIRST(&queue->tq_active);
+ if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
+ wakeup(tb_first);
- while (!TAILQ_EMPTY(&queue->tq_active))
- TQ_SLEEP(queue, &queue->tq_active, &queue->tq_mutex,
- PWAIT, "-", 0);
+ /* Release taskqueue_terminate(). */
+ queue->tq_callouts--;
+ if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
+ wakeup_one(queue->tq_threads);
}
void
@@ -317,10 +448,8 @@ taskqueue_unblock(struct taskqueue *queue)
TQ_LOCK(queue);
queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
- if (queue->tq_flags & TQ_FLAGS_PENDING) {
- queue->tq_flags &= ~TQ_FLAGS_PENDING;
+ if (!STAILQ_EMPTY(&queue->tq_queue))
queue->tq_enqueue(queue->tq_context);
- }
TQ_UNLOCK(queue);
}
@@ -328,34 +457,42 @@ static void
taskqueue_run_locked(struct taskqueue *queue)
{
struct taskqueue_busy tb;
+ struct taskqueue_busy *tb_first;
struct task *task;
int pending;
- mtx_assert(&queue->tq_mutex, MA_OWNED);
+ KASSERT(queue != NULL, ("tq is NULL"));
+ TQ_ASSERT_LOCKED(queue);
tb.tb_running = NULL;
- TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
while (STAILQ_FIRST(&queue->tq_queue)) {
+ TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
+
/*
* Carefully remove the first task from the queue and
* zero its pending count.
*/
task = STAILQ_FIRST(&queue->tq_queue);
+ KASSERT(task != NULL, ("task is NULL"));
STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
pending = task->ta_pending;
task->ta_pending = 0;
tb.tb_running = task;
TQ_UNLOCK(queue);
+ KASSERT(task->ta_func != NULL, ("task->ta_func is NULL"));
task->ta_func(task->ta_context, pending);
TQ_LOCK(queue);
tb.tb_running = NULL;
wakeup(task);
+
+ TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
+ tb_first = TAILQ_FIRST(&queue->tq_active);
+ if (tb_first != NULL &&
+ tb_first->tb_running == TB_DRAIN_WAITER)
+ wakeup(tb_first);
}
- TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
- if (TAILQ_EMPTY(&queue->tq_active))
- wakeup(&queue->tq_active);
}
void
@@ -372,7 +509,7 @@ task_is_running(struct taskqueue *queue, struct task *task)
{
struct taskqueue_busy *tb;
- mtx_assert(&queue->tq_mutex, MA_OWNED);
+ TQ_ASSERT_LOCKED(queue);
TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
if (tb->tb_running == task)
return (1);
@@ -413,7 +550,7 @@ taskqueue_cancel_timeout(struct taskqueue *queue,
int error;
TQ_LOCK(queue);
- pending = !!callout_stop(&timeout_task->c);
+ pending = !!(callout_stop(&timeout_task->c) > 0);
error = taskqueue_cancel_locked(queue, &timeout_task->t, &pending1);
if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
timeout_task->f &= ~DT_CALLOUT_ARMED;
@@ -444,7 +581,6 @@ taskqueue_drain(struct taskqueue *queue, struct task *task)
void
taskqueue_drain_all(struct taskqueue *queue)
{
- struct task *task;
#ifndef __rtems__
if (!queue->tq_spin)
@@ -452,13 +588,8 @@ taskqueue_drain_all(struct taskqueue *queue)
#endif /* __rtems__ */
TQ_LOCK(queue);
- task = STAILQ_LAST(&queue->tq_queue, task, ta_link);
- if (task != NULL)
- while (task->ta_pending != 0)
- TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
- taskqueue_drain_running(queue);
- KASSERT(STAILQ_EMPTY(&queue->tq_queue),
- ("taskqueue queue is not empty after draining"));
+ taskqueue_drain_tq_queue(queue);
+ taskqueue_drain_tq_active(queue);
TQ_UNLOCK(queue);
}
@@ -497,24 +628,20 @@ taskqueue_swi_giant_run(void *dummy)
}
#endif /* __rtems__ */
-int
-taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
- const char *name, ...)
+static int
+_taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
+ cpuset_t *mask, const char *name, va_list ap)
{
- va_list ap;
+ char ktname[MAXCOMLEN + 1];
struct thread *td;
struct taskqueue *tq;
int i, error;
- char ktname[MAXCOMLEN + 1];
if (count <= 0)
return (EINVAL);
- tq = *tqp;
-
- va_start(ap, name);
vsnprintf(ktname, sizeof(ktname), name, ap);
- va_end(ap);
+ tq = *tqp;
tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE,
M_NOWAIT | M_ZERO);
@@ -544,6 +671,19 @@ taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
if (tq->tq_threads[i] == NULL)
continue;
td = tq->tq_threads[i];
+ if (mask) {
+ error = cpuset_setthread(td->td_tid, mask);
+ /*
+ * Failing to pin is rarely an actual fatal error;
+ * it'll just affect performance.
+ */
+ if (error)
+ printf("%s: curthread=%llu: can't pin; "
+ "error=%d\n",
+ __func__,
+ (unsigned long long) td->td_tid,
+ error);
+ }
thread_lock(td);
sched_prio(td, pri);
sched_add(td, SRQ_BORING);
@@ -556,6 +696,44 @@ taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
return (0);
}
+int
+taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
+ const char *name, ...)
+{
+ va_list ap;
+ int error;
+
+ va_start(ap, name);
+ error = _taskqueue_start_threads(tqp, count, pri, NULL, name, ap);
+ va_end(ap);
+ return (error);
+}
+
+int
+taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int pri,
+ cpuset_t *mask, const char *name, ...)
+{
+ va_list ap;
+ int error;
+
+ va_start(ap, name);
+ error = _taskqueue_start_threads(tqp, count, pri, mask, name, ap);
+ va_end(ap);
+ return (error);
+}
+
+static inline void
+taskqueue_run_callback(struct taskqueue *tq,
+ enum taskqueue_callback_type cb_type)
+{
+ taskqueue_callback_fn tq_callback;
+
+ TQ_ASSERT_UNLOCKED(tq);
+ tq_callback = tq->tq_callbacks[cb_type];
+ if (tq_callback != NULL)
+ tq_callback(tq->tq_cb_contexts[cb_type]);
+}
+
void
taskqueue_thread_loop(void *arg)
{
@@ -563,8 +741,10 @@ taskqueue_thread_loop(void *arg)
tqp = arg;
tq = *tqp;
+ taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
TQ_LOCK(tq);
while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
+ /* XXX ? */
taskqueue_run_locked(tq);
/*
* Because taskqueue_run() can drop tq_mutex, we need to
@@ -576,6 +756,14 @@ taskqueue_thread_loop(void *arg)
TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
}
taskqueue_run_locked(tq);
+ /*
+ * This thread is on its way out, so just drop the lock temporarily
+ * in order to call the shutdown callback. This allows the callback
+ * to look at the taskqueue, even just before it dies.
+ */
+ TQ_UNLOCK(tq);
+ taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
+ TQ_LOCK(tq);
/* rendezvous with thread that asked us to terminate */
tq->tq_tcount--;
@@ -591,19 +779,17 @@ taskqueue_thread_enqueue(void *context)
tqp = context;
tq = *tqp;
-
- mtx_assert(&tq->tq_mutex, MA_OWNED);
wakeup_one(tq);
}
TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
swi_add(NULL, "task queue", taskqueue_swi_run, NULL, SWI_TQ,
- INTR_MPSAFE, &taskqueue_ih));
+ INTR_MPSAFE, &taskqueue_ih));
#ifndef __rtems__
TASKQUEUE_DEFINE(swi_giant, taskqueue_swi_giant_enqueue, NULL,
swi_add(NULL, "Giant taskq", taskqueue_swi_giant_run,
- NULL, SWI_TQ_GIANT, 0, &taskqueue_giant_ih));
+ NULL, SWI_TQ_GIANT, 0, &taskqueue_giant_ih));
#endif /* __rtems__ */
TASKQUEUE_DEFINE_THREAD(thread);
@@ -616,13 +802,6 @@ taskqueue_create_fast(const char *name, int mflags,
MTX_SPIN, "fast_taskqueue");
}
-/* NB: for backwards compatibility */
-int
-taskqueue_enqueue_fast(struct taskqueue *queue, struct task *task)
-{
- return taskqueue_enqueue(queue, task);
-}
-
static void *taskqueue_fast_ih;
static void
diff --git a/freebsd/sys/kern/subr_uio.c b/freebsd/sys/kern/subr_uio.c
index 73f2db08..a319685a 100644
--- a/freebsd/sys/kern/subr_uio.c
+++ b/freebsd/sys/kern/subr_uio.c
@@ -9,6 +9,11 @@
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
+ * Copyright (c) 2014 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -39,17 +44,15 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_zero.h>
-
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mman.h>
-#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
@@ -58,10 +61,10 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h>
#include <vm/vm_extern.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_map.h>
-#ifdef ZERO_COPY_SOCKETS
-#include <vm/vm_object.h>
-#endif
+
+#include <machine/bus.h>
#ifndef __rtems__
SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, UIO_MAXIOV,
@@ -71,70 +74,6 @@ SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, UIO_MA
static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
#ifndef __rtems__
-#ifdef ZERO_COPY_SOCKETS
-/* Declared in uipc_socket.c */
-extern int so_zero_copy_receive;
-
-/*
- * Identify the physical page mapped at the given kernel virtual
- * address. Insert this physical page into the given address space at
- * the given virtual address, replacing the physical page, if any,
- * that already exists there.
- */
-static int
-vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
-{
- vm_map_t map = mapa;
- vm_page_t kern_pg, user_pg;
- vm_object_t uobject;
- vm_map_entry_t entry;
- vm_pindex_t upindex;
- vm_prot_t prot;
- boolean_t wired;
-
- KASSERT((uaddr & PAGE_MASK) == 0,
- ("vm_pgmoveco: uaddr is not page aligned"));
-
- /*
- * Herein the physical page is validated and dirtied. It is
- * unwired in sf_buf_mext().
- */
- kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
- kern_pg->valid = VM_PAGE_BITS_ALL;
- KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
- ("vm_pgmoveco: kern_pg is not correctly wired"));
-
- if ((vm_map_lookup(&map, uaddr,
- VM_PROT_WRITE, &entry, &uobject,
- &upindex, &prot, &wired)) != KERN_SUCCESS) {
- return(EFAULT);
- }
- VM_OBJECT_LOCK(uobject);
-retry:
- if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
- if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco"))
- goto retry;
- vm_page_lock(user_pg);
- pmap_remove_all(user_pg);
- vm_page_free(user_pg);
- vm_page_unlock(user_pg);
- } else {
- /*
- * Even if a physical page does not exist in the
- * object chain's first object, a physical page from a
- * backing object may be mapped read only.
- */
- if (uobject->backing_object != NULL)
- pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
- }
- vm_page_insert(kern_pg, uobject, upindex);
- vm_page_dirty(kern_pg);
- VM_OBJECT_UNLOCK(uobject);
- vm_map_lookup_done(map, entry);
- return(KERN_SUCCESS);
-}
-#endif /* ZERO_COPY_SOCKETS */
-
int
copyin_nofault(const void *udaddr, void *kaddr, size_t len)
{
@@ -202,6 +141,58 @@ physcopyout(vm_paddr_t src, void *dst, size_t len)
}
#undef PHYS_PAGE_COUNT
+
+int
+physcopyin_vlist(bus_dma_segment_t *src, off_t offset, vm_paddr_t dst,
+ size_t len)
+{
+ size_t seg_len;
+ int error;
+
+ error = 0;
+ while (offset >= src->ds_len) {
+ offset -= src->ds_len;
+ src++;
+ }
+
+ while (len > 0 && error == 0) {
+ seg_len = MIN(src->ds_len - offset, len);
+ error = physcopyin((void *)(uintptr_t)(src->ds_addr + offset),
+ dst, seg_len);
+ offset = 0;
+ src++;
+ len -= seg_len;
+ dst += seg_len;
+ }
+
+ return (error);
+}
+
+int
+physcopyout_vlist(vm_paddr_t src, bus_dma_segment_t *dst, off_t offset,
+ size_t len)
+{
+ size_t seg_len;
+ int error;
+
+ error = 0;
+ while (offset >= dst->ds_len) {
+ offset -= dst->ds_len;
+ dst++;
+ }
+
+ while (len > 0 && error == 0) {
+ seg_len = MIN(dst->ds_len - offset, len);
+ error = physcopyout(src, (void *)(uintptr_t)(dst->ds_addr +
+ offset), seg_len);
+ offset = 0;
+ dst++;
+ len -= seg_len;
+ src += seg_len;
+ }
+
+ return (error);
+}
#endif /* __rtems__ */
int
@@ -329,103 +320,6 @@ uiomove_frombuf(void *buf, int buflen, struct uio *uio)
return (uiomove((char *)buf + offset, n, uio));
}
-#ifdef ZERO_COPY_SOCKETS
-/*
- * Experimental support for zero-copy I/O
- */
-static int
-userspaceco(void *cp, u_int cnt, struct uio *uio, int disposable)
-{
- struct iovec *iov;
- int error;
-
- iov = uio->uio_iov;
- if (uio->uio_rw == UIO_READ) {
- if ((so_zero_copy_receive != 0)
- && ((cnt & PAGE_MASK) == 0)
- && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
- && ((uio->uio_offset & PAGE_MASK) == 0)
- && ((((intptr_t) cp) & PAGE_MASK) == 0)
- && (disposable != 0)) {
- /* SOCKET: use page-trading */
- /*
- * We only want to call vm_pgmoveco() on
- * disposeable pages, since it gives the
- * kernel page to the userland process.
- */
- error = vm_pgmoveco(&curproc->p_vmspace->vm_map,
- (vm_offset_t)cp, (vm_offset_t)iov->iov_base);
-
- /*
- * If we get an error back, attempt
- * to use copyout() instead. The
- * disposable page should be freed
- * automatically if we weren't able to move
- * it into userland.
- */
- if (error != 0)
- error = copyout(cp, iov->iov_base, cnt);
- } else {
- error = copyout(cp, iov->iov_base, cnt);
- }
- } else {
- error = copyin(iov->iov_base, cp, cnt);
- }
- return (error);
-}
-
-int
-uiomoveco(void *cp, int n, struct uio *uio, int disposable)
-{
- struct iovec *iov;
- u_int cnt;
- int error;
-
- KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
- ("uiomoveco: mode"));
- KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
- ("uiomoveco proc"));
-
- while (n > 0 && uio->uio_resid) {
- iov = uio->uio_iov;
- cnt = iov->iov_len;
- if (cnt == 0) {
- uio->uio_iov++;
- uio->uio_iovcnt--;
- continue;
- }
- if (cnt > n)
- cnt = n;
-
- switch (uio->uio_segflg) {
-
- case UIO_USERSPACE:
- maybe_yield();
- error = userspaceco(cp, cnt, uio, disposable);
- if (error)
- return (error);
- break;
-
- case UIO_SYSSPACE:
- if (uio->uio_rw == UIO_READ)
- bcopy(cp, iov->iov_base, cnt);
- else
- bcopy(iov->iov_base, cp, cnt);
- break;
- case UIO_NOCOPY:
- break;
- }
- iov->iov_base = (char *)iov->iov_base + cnt;
- iov->iov_len -= cnt;
- uio->uio_resid -= cnt;
- uio->uio_offset += cnt;
- cp = (char *)cp + cnt;
- n -= cnt;
- }
- return (0);
-}
-#endif /* ZERO_COPY_SOCKETS */
-
/*
* Give next character to user as result of read.
*/
@@ -457,7 +351,6 @@ again:
case UIO_SYSSPACE:
iov_base = iov->iov_base;
*iov_base = c;
- iov->iov_base = iov_base;
break;
case UIO_NOCOPY:
@@ -529,7 +422,7 @@ copyiniov(const struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
#ifndef __rtems__
int
-copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop)
+copyinuio(const struct iovec *iovp, u_int iovcnt, struct uio **uiop)
{
struct iovec *iov;
struct uio *uio;
@@ -594,15 +487,13 @@ copyout_map(struct thread *td, vm_offset_t *addr, size_t sz)
/*
* Map somewhere after heap in process memory.
*/
- PROC_LOCK(td->td_proc);
*addr = round_page((vm_offset_t)vms->vm_daddr +
- lim_max(td->td_proc, RLIMIT_DATA));
- PROC_UNLOCK(td->td_proc);
+ lim_max(td, RLIMIT_DATA));
- /* round size up to page boundry */
+ /* round size up to page boundary */
size = (vm_size_t)round_page(sz);
- error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE,
+ error = vm_mmap(&vms->vm_map, addr, size, VM_PROT_READ | VM_PROT_WRITE,
VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0);
return (error);
@@ -628,4 +519,129 @@ copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz)
return (0);
}
+
+#ifdef NO_FUEWORD
+/*
+ * XXXKIB The temporal implementation of fue*() functions which do not
+ * handle usermode -1 properly, mixing it with the fault code. Keep
+ * this until MD code is written. Currently sparc64 and mips do not
+ * have proper implementation.
+ */
+
+int
+fueword(volatile const void *base, long *val)
+{
+ long res;
+
+ res = fuword(base);
+ if (res == -1)
+ return (-1);
+ *val = res;
+ return (0);
+}
+
+int
+fueword32(volatile const void *base, int32_t *val)
+{
+ int32_t res;
+
+ res = fuword32(base);
+ if (res == -1)
+ return (-1);
+ *val = res;
+ return (0);
+}
+
+#ifdef _LP64
+int
+fueword64(volatile const void *base, int64_t *val)
+{
+ int32_t res;
+
+ res = fuword64(base);
+ if (res == -1)
+ return (-1);
+ *val = res;
+ return (0);
+}
+#endif
+
+int
+casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
+ uint32_t newval)
+{
+ int32_t ov;
+
+ ov = casuword32(base, oldval, newval);
+ if (ov == -1)
+ return (-1);
+ *oldvalp = ov;
+ return (0);
+}
+
+int
+casueword(volatile u_long *p, u_long oldval, u_long *oldvalp, u_long newval)
+{
+ u_long ov;
+
+ ov = casuword(p, oldval, newval);
+ if (ov == -1)
+ return (-1);
+ *oldvalp = ov;
+ return (0);
+}
+#else /* NO_FUEWORD */
+int32_t
+fuword32(volatile const void *addr)
+{
+ int rv;
+ int32_t val;
+
+ rv = fueword32(addr, &val);
+ return (rv == -1 ? -1 : val);
+}
+
+#ifdef _LP64
+int64_t
+fuword64(volatile const void *addr)
+{
+ int rv;
+ int64_t val;
+
+ rv = fueword64(addr, &val);
+ return (rv == -1 ? -1 : val);
+}
+#endif /* _LP64 */
+
+long
+fuword(volatile const void *addr)
+{
+ long val;
+ int rv;
+
+ rv = fueword(addr, &val);
+ return (rv == -1 ? -1 : val);
+}
+
+uint32_t
+casuword32(volatile uint32_t *addr, uint32_t old, uint32_t new)
+{
+ int rv;
+ uint32_t val;
+
+ rv = casueword32(addr, old, &val, new);
+ return (rv == -1 ? -1 : val);
+}
+
+u_long
+casuword(volatile u_long *addr, u_long old, u_long new)
+{
+ int rv;
+ u_long val;
+
+ rv = casueword(addr, old, &val, new);
+ return (rv == -1 ? -1 : val);
+}
+
+#endif /* NO_FUEWORD */
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/subr_unit.c b/freebsd/sys/kern/subr_unit.c
index a560eb50..678916f8 100644
--- a/freebsd/sys/kern/subr_unit.c
+++ b/freebsd/sys/kern/subr_unit.c
@@ -69,13 +69,13 @@
* N is the number of the highest unit allocated.
*/
+#include <rtems/bsd/sys/param.h>
#include <sys/types.h>
-#include <sys/queue.h>
-#include <sys/bitstring.h>
+#include <sys/_unrhdr.h>
#ifdef _KERNEL
-#include <rtems/bsd/sys/param.h>
+#include <sys/bitstring.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/systm.h>
@@ -100,6 +100,11 @@ MTX_SYSINIT(unit, &unitmtx, "unit# allocation", MTX_DEF);
#else /* ...USERLAND */
+#include <bitstring.h>
+#include <err.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -166,7 +171,7 @@ mtx_assert(struct mtx *mp, int flag)
* element:
* If ptr is NULL, it represents a run of free items.
* If ptr points to the unrhdr it represents a run of allocated items.
- * Otherwise it points to an bitstring of allocated items.
+ * Otherwise it points to a bitstring of allocated items.
*
* For runs the len field is the length of the run.
* For bitmaps the len field represents the number of allocated items.
@@ -180,29 +185,32 @@ struct unr {
};
struct unrb {
- u_char busy;
- bitstr_t map[sizeof(struct unr) - 1];
+ bitstr_t map[sizeof(struct unr) / sizeof(bitstr_t)];
};
-CTASSERT(sizeof(struct unr) == sizeof(struct unrb));
+CTASSERT((sizeof(struct unr) % sizeof(bitstr_t)) == 0);
-/* Number of bits in the bitmap */
-#define NBITS ((int)sizeof(((struct unrb *)NULL)->map) * 8)
+/* Number of bits we can store in the bitmap */
+#define NBITS (8 * sizeof(((struct unrb*)NULL)->map))
-/* Header element for a unr number space. */
+/* Is the unrb empty in at least the first len bits? */
+static inline bool
+ub_empty(struct unrb *ub, int len) {
+ int first_set;
-struct unrhdr {
- TAILQ_HEAD(unrhd,unr) head;
- u_int low; /* Lowest item */
- u_int high; /* Highest item */
- u_int busy; /* Count of allocated items */
- u_int alloc; /* Count of memory allocations */
- u_int first; /* items in allocated from start */
- u_int last; /* items free at end */
- struct mtx *mtx;
- TAILQ_HEAD(unrfr,unr) ppfree; /* Items to be freed after mtx
- lock dropped */
-};
+ bit_ffs(ub->map, len, &first_set);
+ return (first_set == -1);
+}
+
+/* Is the unrb full? That is, is the number of set elements equal to len? */
+static inline bool
+ub_full(struct unrb *ub, int len)
+{
+ int first_clear;
+
+ bit_ffc(ub->map, len, &first_clear);
+ return (first_clear == -1);
+}
#if defined(DIAGNOSTIC) || !defined(_KERNEL)
@@ -218,7 +226,8 @@ check_unrhdr(struct unrhdr *uh, int line)
{
struct unr *up;
struct unrb *ub;
- u_int x, y, z, w;
+ int w;
+ u_int y, z;
y = uh->first;
z = 0;
@@ -227,16 +236,11 @@ check_unrhdr(struct unrhdr *uh, int line)
if (up->ptr != uh && up->ptr != NULL) {
ub = up->ptr;
KASSERT (up->len <= NBITS,
- ("UNR inconsistency: len %u max %d (line %d)\n",
+ ("UNR inconsistency: len %u max %zd (line %d)\n",
up->len, NBITS, line));
z++;
w = 0;
- for (x = 0; x < up->len; x++)
- if (bit_test(ub->map, x))
- w++;
- KASSERT (w == ub->busy,
- ("UNR inconsistency: busy %u found %u (line %d)\n",
- ub->busy, w, line));
+ bit_count(ub->map, 0, up->len, &w);
y += w;
} else if (up->ptr != NULL)
y += up->len;
@@ -252,7 +256,7 @@ check_unrhdr(struct unrhdr *uh, int line)
#else
static __inline void
-check_unrhdr(struct unrhdr *uh, int line)
+check_unrhdr(struct unrhdr *uh __unused, int line __unused)
{
}
@@ -317,20 +321,12 @@ clean_unrhdr(struct unrhdr *uh)
mtx_unlock(uh->mtx);
}
-/*
- * Allocate a new unrheader set.
- *
- * Highest and lowest valid values given as parameters.
- */
-
-struct unrhdr *
-new_unrhdr(int low, int high, struct mtx *mutex)
+void
+init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex)
{
- struct unrhdr *uh;
KASSERT(low >= 0 && low <= high,
("UNR: use error: new_unrhdr(%d, %d)", low, high));
- uh = Malloc(sizeof *uh);
if (mutex != NULL)
uh->mtx = mutex;
else
@@ -342,6 +338,21 @@ new_unrhdr(int low, int high, struct mtx *mutex)
uh->first = 0;
uh->last = 1 + (high - low);
check_unrhdr(uh, __LINE__);
+}
+
+/*
+ * Allocate a new unrheader set.
+ *
+ * Highest and lowest valid values given as parameters.
+ */
+
+struct unrhdr *
+new_unrhdr(int low, int high, struct mtx *mutex)
+{
+ struct unrhdr *uh;
+
+ uh = Malloc(sizeof *uh);
+ init_unrhdr(uh, low, high, mutex);
return (uh);
}
@@ -423,32 +434,24 @@ optimize_unr(struct unrhdr *uh)
a = us->len;
l = us->ptr == uh ? 1 : 0;
ub = (void *)us;
- ub->busy = 0;
- if (l) {
+ bit_nclear(ub->map, 0, NBITS - 1);
+ if (l)
bit_nset(ub->map, 0, a);
- ub->busy += a;
- } else {
- bit_nclear(ub->map, 0, a);
- }
if (!is_bitmap(uh, uf)) {
- if (uf->ptr == NULL) {
+ if (uf->ptr == NULL)
bit_nclear(ub->map, a, a + uf->len - 1);
- } else {
+ else
bit_nset(ub->map, a, a + uf->len - 1);
- ub->busy += uf->len;
- }
uf->ptr = ub;
uf->len += a;
us = uf;
} else {
ubf = uf->ptr;
for (l = 0; l < uf->len; l++, a++) {
- if (bit_test(ubf->map, l)) {
+ if (bit_test(ubf->map, l))
bit_set(ub->map, a);
- ub->busy++;
- } else {
+ else
bit_clear(ub->map, a);
- }
}
uf->len = a;
delete_unr(uh, uf->ptr);
@@ -470,19 +473,16 @@ optimize_unr(struct unrhdr *uh)
delete_unr(uh, uf);
} else if (uf->ptr == uh) {
bit_nset(ub->map, us->len, us->len + uf->len - 1);
- ub->busy += uf->len;
us->len += uf->len;
TAILQ_REMOVE(&uh->head, uf, list);
delete_unr(uh, uf);
} else {
ubf = uf->ptr;
for (l = 0; l < uf->len; l++, us->len++) {
- if (bit_test(ubf->map, l)) {
+ if (bit_test(ubf->map, l))
bit_set(ub->map, us->len);
- ub->busy++;
- } else {
+ else
bit_clear(ub->map, us->len);
- }
}
TAILQ_REMOVE(&uh->head, uf, list);
delete_unr(uh, ubf);
@@ -505,10 +505,10 @@ collapse_unr(struct unrhdr *uh, struct unr *up)
/* If bitmap is all set or clear, change it to runlength */
if (is_bitmap(uh, up)) {
ub = up->ptr;
- if (ub->busy == up->len) {
+ if (ub_full(ub, up->len)) {
delete_unr(uh, up->ptr);
up->ptr = uh;
- } else if (ub->busy == 0) {
+ } else if (ub_empty(ub, up->len)) {
delete_unr(uh, up->ptr);
up->ptr = NULL;
}
@@ -606,11 +606,9 @@ alloc_unrl(struct unrhdr *uh)
up->len--;
} else { /* bitmap */
ub = up->ptr;
- KASSERT(ub->busy < up->len, ("UNR bitmap confusion"));
bit_ffc(ub->map, up->len, &y);
KASSERT(y != -1, ("UNR corruption: No clear bit in bitmap."));
bit_set(ub->map, y);
- ub->busy++;
x += y;
}
uh->busy++;
@@ -694,7 +692,6 @@ alloc_unr_specificl(struct unrhdr *uh, u_int item, void **p1, void **p2)
ub = up->ptr;
if (bit_test(ub->map, i) == 0) {
bit_set(ub->map, i);
- ub->busy++;
goto done;
} else
return (-1);
@@ -813,7 +810,6 @@ free_unrl(struct unrhdr *uh, u_int item, void **p1, void **p2)
("UNR: Freeing free item %d (bitmap)\n", item));
bit_clear(ub->map, item);
uh->busy--;
- ub->busy--;
collapse_unr(uh, up);
return;
}
@@ -891,9 +887,13 @@ free_unr(struct unrhdr *uh, u_int item)
#ifndef _KERNEL /* USERLAND test driver */
/*
- * Simple stochastic test driver for the above functions
+ * Simple stochastic test driver for the above functions. The code resides
+ * here so that it can access static functions and structures.
*/
+static bool verbose;
+#define VPRINTF(...) {if (verbose) printf(__VA_ARGS__);}
+
static void
print_unr(struct unrhdr *uh, struct unr *up)
{
@@ -907,7 +907,7 @@ print_unr(struct unrhdr *uh, struct unr *up)
printf("alloc\n");
else {
ub = up->ptr;
- printf("bitmap(%d) [", ub->busy);
+ printf("bitmap [");
for (x = 0; x < up->len; x++) {
if (bit_test(ub->map, x))
printf("#");
@@ -944,7 +944,7 @@ test_alloc_unr(struct unrhdr *uh, u_int i, char a[])
int j;
if (a[i]) {
- printf("F %u\n", i);
+ VPRINTF("F %u\n", i);
free_unr(uh, i);
a[i] = 0;
} else {
@@ -952,7 +952,7 @@ test_alloc_unr(struct unrhdr *uh, u_int i, char a[])
j = alloc_unr(uh);
if (j != -1) {
a[j] = 1;
- printf("A %d\n", j);
+ VPRINTF("A %d\n", j);
}
no_alloc = 0;
}
@@ -965,40 +965,73 @@ test_alloc_unr_specific(struct unrhdr *uh, u_int i, char a[])
j = alloc_unr_specific(uh, i);
if (j == -1) {
- printf("F %u\n", i);
+ VPRINTF("F %u\n", i);
a[i] = 0;
free_unr(uh, i);
} else {
a[i] = 1;
- printf("A %d\n", j);
+ VPRINTF("A %d\n", j);
}
}
-/* Number of unrs to test */
-#define NN 10000
+static void
+usage(char** argv)
+{
+ printf("%s [-h] [-r REPETITIONS] [-v]\n", argv[0]);
+}
int
-main(int argc __unused, const char **argv __unused)
+main(int argc, char **argv)
{
struct unrhdr *uh;
- u_int i, x, m, j;
- char a[NN];
+ char *a;
+ long count = 10000; /* Number of unrs to test */
+ long reps = 1, m;
+ int ch;
+ u_int i, x, j;
+
+ verbose = false;
+
+ while ((ch = getopt(argc, argv, "hr:v")) != -1) {
+ switch (ch) {
+ case 'r':
+ errno = 0;
+ reps = strtol(optarg, NULL, 0);
+ if (errno == ERANGE || errno == EINVAL) {
+ usage(argv);
+ exit(2);
+ }
+
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ exit(2);
+ }
+
+
+ }
setbuf(stdout, NULL);
- uh = new_unrhdr(0, NN - 1, NULL);
+ uh = new_unrhdr(0, count - 1, NULL);
print_unrhdr(uh);
- memset(a, 0, sizeof a);
+ a = calloc(count, sizeof(char));
+ if (a == NULL)
+ err(1, "calloc failed");
srandomdev();
- fprintf(stderr, "sizeof(struct unr) %zu\n", sizeof(struct unr));
- fprintf(stderr, "sizeof(struct unrb) %zu\n", sizeof(struct unrb));
- fprintf(stderr, "sizeof(struct unrhdr) %zu\n", sizeof(struct unrhdr));
- fprintf(stderr, "NBITS %d\n", NBITS);
+ printf("sizeof(struct unr) %zu\n", sizeof(struct unr));
+ printf("sizeof(struct unrb) %zu\n", sizeof(struct unrb));
+ printf("sizeof(struct unrhdr) %zu\n", sizeof(struct unrhdr));
+ printf("NBITS %lu\n", (unsigned long)NBITS);
x = 1;
- for (m = 0; m < NN * 100; m++) {
+ for (m = 0; m < count * reps; m++) {
j = random();
- i = (j >> 1) % NN;
+ i = (j >> 1) % count;
#if 0
if (a[i] && (j & 1))
continue;
@@ -1008,19 +1041,22 @@ main(int argc __unused, const char **argv __unused)
else
test_alloc_unr_specific(uh, i, a);
- if (1) /* XXX: change this for detailed debug printout */
+ if (verbose)
print_unrhdr(uh);
check_unrhdr(uh, __LINE__);
}
- for (i = 0; i < NN; i++) {
+ for (i = 0; i < (u_int)count; i++) {
if (a[i]) {
- printf("C %u\n", i);
+ if (verbose) {
+ printf("C %u\n", i);
+ print_unrhdr(uh);
+ }
free_unr(uh, i);
- print_unrhdr(uh);
}
}
print_unrhdr(uh);
delete_unrhdr(uh);
+ free(a);
return (0);
}
#endif
diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c
index 91733ca3..26cd9d36 100644
--- a/freebsd/sys/kern/sys_generic.c
+++ b/freebsd/sys/kern/sys_generic.c
@@ -46,11 +46,12 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/fcntl.h>
#include <sys/file.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/socketvar.h>
@@ -94,12 +95,14 @@ __FBSDID("$FreeBSD$");
#define SYS_IOCTL_SMALL_ALIGN 8 /* bytes */
#ifndef __rtems__
-int iosize_max_clamp = 1;
+#ifdef __LP64__
+static int iosize_max_clamp = 0;
SYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW,
&iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX");
-int devfs_iosize_max_clamp = 1;
+static int devfs_iosize_max_clamp = 1;
SYSCTL_INT(_debug, OID_AUTO, devfs_iosize_max_clamp, CTLFLAG_RW,
&devfs_iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX for devices");
+#endif
/*
* Assert that the return value of read(2) and write(2) syscalls fits
@@ -131,7 +134,7 @@ static int dofilewrite(struct thread *, int, struct file *, struct uio *,
#endif /* __rtems__ */
static void doselwakeup(struct selinfo *, int);
static void seltdinit(struct thread *);
-static int seltdwait(struct thread *, int);
+static int seltdwait(struct thread *, sbintime_t, sbintime_t);
static void seltdclear(struct thread *);
/*
@@ -163,12 +166,31 @@ struct selfd {
struct mtx *sf_mtx; /* Pointer to selinfo mtx. */
struct seltd *sf_td; /* (k) owning seltd. */
void *sf_cookie; /* (k) fd or pollfd. */
+ u_int sf_refs;
};
static uma_zone_t selfd_zone;
static struct mtx_pool *mtxpool_select;
#ifndef __rtems__
+#ifdef __LP64__
+size_t
+devfs_iosize_max(void)
+{
+
+ return (devfs_iosize_max_clamp || SV_CURPROC_FLAG(SV_ILP32) ?
+ INT_MAX : SSIZE_MAX);
+}
+
+size_t
+iosize_max(void)
+{
+
+ return (iosize_max_clamp || SV_CURPROC_FLAG(SV_ILP32) ?
+ INT_MAX : SSIZE_MAX);
+}
+#endif
+
#ifndef _SYS_SYSPROTO_H_
struct read_args {
int fd;
@@ -230,6 +252,7 @@ sys_pread(td, uap)
return(error);
}
+#if defined(COMPAT_FREEBSD6)
int
freebsd6_pread(td, uap)
struct thread *td;
@@ -243,6 +266,7 @@ freebsd6_pread(td, uap)
oargs.offset = uap->offset;
return (sys_pread(td, &oargs));
}
+#endif
/*
* Scatter read system call.
@@ -272,9 +296,10 @@ int
kern_readv(struct thread *td, int fd, struct uio *auio)
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp);
+ error = fget_read(td, fd, cap_rights_init(&rights, CAP_READ), &fp);
if (error)
return (error);
error = dofileread(td, fd, fp, auio, (off_t)-1, 0);
@@ -315,9 +340,10 @@ kern_preadv(td, fd, auio, offset)
off_t offset;
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_read(td, fd, CAP_READ, &fp);
+ error = fget_read(td, fd, cap_rights_init(&rights, CAP_PREAD), &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
@@ -349,6 +375,8 @@ dofileread(td, fd, fp, auio, offset, flags)
struct uio *ktruio = NULL;
#endif
+ AUDIT_ARG_FD(fd);
+
/* Finish zero length reads right here */
if (auio->uio_resid == 0) {
td->td_retval[0] = 0;
@@ -439,6 +467,7 @@ sys_pwrite(td, uap)
return(error);
}
+#if defined(COMPAT_FREEBSD6)
int
freebsd6_pwrite(td, uap)
struct thread *td;
@@ -452,6 +481,7 @@ freebsd6_pwrite(td, uap)
oargs.offset = uap->offset;
return (sys_pwrite(td, &oargs));
}
+#endif
/*
* Gather write system call.
@@ -481,9 +511,10 @@ int
kern_writev(struct thread *td, int fd, struct uio *auio)
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp);
+ error = fget_write(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
if (error)
return (error);
error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0);
@@ -524,9 +555,10 @@ kern_pwritev(td, fd, auio, offset)
off_t offset;
{
struct file *fp;
+ cap_rights_t rights;
int error;
- error = fget_write(td, fd, CAP_WRITE, &fp);
+ error = fget_write(td, fd, cap_rights_init(&rights, CAP_PWRITE), &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
@@ -558,6 +590,7 @@ dofilewrite(td, fd, fp, auio, offset, flags)
struct uio *ktruio = NULL;
#endif
+ AUDIT_ARG_FD(fd);
auio->uio_rw = UIO_WRITE;
auio->uio_td = td;
auio->uio_offset = offset;
@@ -604,12 +637,13 @@ kern_ftruncate(td, fd, length)
off_t length;
{
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(fd);
if (length < 0)
return (EINVAL);
- error = fget(td, fd, CAP_FTRUNCATE, &fp);
+ error = fget(td, fd, cap_rights_init(&rights, CAP_FTRUNCATE), &fp);
if (error)
return (error);
AUDIT_ARG_FILE(td->td_proc, fp);
@@ -737,28 +771,64 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
{
struct file *fp;
struct filedesc *fdp;
- int error;
- int tmp;
+#ifndef CAPABILITIES
+ cap_rights_t rights;
+#endif
+ int error, tmp, locked;
AUDIT_ARG_FD(fd);
AUDIT_ARG_CMD(com);
- if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0)
- return (error);
- if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
- fdrop(fp, td);
- return (EBADF);
- }
+
fdp = td->td_proc->p_fd;
+
switch (com) {
case FIONCLEX:
+ case FIOCLEX:
FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
- FILEDESC_XUNLOCK(fdp);
+ locked = LA_XLOCKED;
+ break;
+ default:
+#ifdef CAPABILITIES
+ FILEDESC_SLOCK(fdp);
+ locked = LA_SLOCKED;
+#else
+ locked = LA_UNLOCKED;
+#endif
+ break;
+ }
+
+#ifdef CAPABILITIES
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
+ error = EBADF;
+ goto out;
+ }
+ if ((error = cap_ioctl_check(fdp, fd, com)) != 0) {
+ fp = NULL; /* fhold() was not called yet */
+ goto out;
+ }
+ fhold(fp);
+ if (locked == LA_SLOCKED) {
+ FILEDESC_SUNLOCK(fdp);
+ locked = LA_UNLOCKED;
+ }
+#else
+ error = fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp);
+ if (error != 0) {
+ fp = NULL;
+ goto out;
+ }
+#endif
+ if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ switch (com) {
+ case FIONCLEX:
+ fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE;
goto out;
case FIOCLEX:
- FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
- FILEDESC_XUNLOCK(fdp);
+ fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE;
goto out;
case FIONBIO:
if ((tmp = *(int *)data))
@@ -778,7 +848,21 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
error = fo_ioctl(fp, com, data, td->td_ucred, td);
out:
- fdrop(fp, td);
+ switch (locked) {
+ case LA_XLOCKED:
+ FILEDESC_XUNLOCK(fdp);
+ break;
+#ifdef CAPABILITIES
+ case LA_SLOCKED:
+ FILEDESC_SUNLOCK(fdp);
+ break;
+#endif
+ default:
+ FILEDESC_UNLOCK_ASSERT(fdp);
+ break;
+ }
+ if (fp != NULL)
+ fdrop(fp, td);
return (error);
}
#endif /* __rtems__ */
@@ -939,9 +1023,10 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
*/
fd_mask s_selbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp;
- struct timeval atv, rtv, ttv;
- int error, lf, ndu, timo;
+ struct timeval rtv;
+ sbintime_t asbt, precision, rsbt;
u_int nbufbytes, ncpbytes, ncpubytes, nfdbits;
+ int error, lf, ndu;
if (nd < 0)
return (EINVAL);
@@ -1038,35 +1123,37 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
if (nbufbytes != 0)
bzero(selbits, nbufbytes / 2);
+ precision = 0;
if (tvp != NULL) {
- atv = *tvp;
- if (itimerfix(&atv)) {
+ rtv = *tvp;
+ if (rtv.tv_sec < 0 || rtv.tv_usec < 0 ||
+ rtv.tv_usec >= 1000000) {
error = EINVAL;
goto done;
}
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- } else {
- atv.tv_sec = 0;
- atv.tv_usec = 0;
- }
- timo = 0;
+ if (!timevalisset(&rtv))
+ asbt = 0;
+ else if (rtv.tv_sec <= INT32_MAX) {
+ rsbt = tvtosbt(rtv);
+ precision = rsbt;
+ precision >>= tc_precexp;
+ if (TIMESEL(&asbt, rsbt))
+ asbt += tc_tick_sbt;
+ if (asbt <= SBT_MAX - rsbt)
+ asbt += rsbt;
+ else
+ asbt = -1;
+ } else
+ asbt = -1;
+ } else
+ asbt = -1;
seltdinit(td);
/* Iterate until the timeout expires or descriptors become ready. */
for (;;) {
error = selscan(td, ibits, obits, nd);
if (error || td->td_retval[0] != 0)
break;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- break;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- error = seltdwait(td, timo);
+ error = seltdwait(td, asbt, precision);
if (error)
break;
error = selrescan(td, ibits, obits);
@@ -1196,32 +1283,11 @@ selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events)
static __inline int
getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp)
{
- struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
- int error;
-#endif
+ cap_rights_t rights;
- if ((fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
-#ifdef CAPABILITIES
- /*
- * If the file descriptor is for a capability, test rights and use
- * the file descriptor references by the capability.
- */
- error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap);
- if (error) {
- fdrop(fp, curthread);
- return (error);
- }
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, curthread);
- fp = fp_fromcap;
- }
-#endif /* CAPABILITIES */
- *fpp = fp;
- return (0);
+ cap_rights_init(&rights, CAP_EVENT);
+
+ return (fget_unlocked(fdp, fd, &rights, fpp, NULL));
}
/*
@@ -1315,29 +1381,66 @@ selscan(td, ibits, obits, nfd)
return (0);
}
-#ifndef _SYS_SYSPROTO_H_
-struct poll_args {
- struct pollfd *fds;
- u_int nfds;
- int timeout;
-};
-#endif
#ifdef __rtems__
+static int kern_poll(struct thread *td, struct pollfd *fds, u_int nfds,
+ struct timespec *tsp, sigset_t *uset);
+
static
#endif /* __rtems__ */
int
-sys_poll(td, uap)
- struct thread *td;
- struct poll_args *uap;
+sys_poll(struct thread *td, struct poll_args *uap)
+{
+ struct timespec ts, *tsp;
+
+ if (uap->timeout != INFTIM) {
+ if (uap->timeout < 0)
+ return (EINVAL);
+ ts.tv_sec = uap->timeout / 1000;
+ ts.tv_nsec = (uap->timeout % 1000) * 1000000;
+ tsp = &ts;
+ } else
+ tsp = NULL;
+
+ return (kern_poll(td, uap->fds, uap->nfds, tsp, NULL));
+}
+
+int
+kern_poll(struct thread *td, struct pollfd *fds, u_int nfds,
+ struct timespec *tsp, sigset_t *uset)
{
struct pollfd *bits;
struct pollfd smallbits[32];
- struct timeval atv, rtv, ttv;
- int error, timo;
- u_int nfds;
+ sbintime_t sbt, precision, tmp;
+ time_t over;
+ struct timespec ts;
+ int error;
size_t ni;
- nfds = uap->nfds;
+ precision = 0;
+ if (tsp != NULL) {
+ if (tsp->tv_sec < 0)
+ return (EINVAL);
+ if (tsp->tv_nsec < 0 || tsp->tv_nsec >= 1000000000)
+ return (EINVAL);
+ if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
+ sbt = 0;
+ else {
+ ts = *tsp;
+ if (ts.tv_sec > INT32_MAX / 2) {
+ over = ts.tv_sec - INT32_MAX / 2;
+ ts.tv_sec -= over;
+ } else
+ over = 0;
+ tmp = tstosbt(ts);
+ precision = tmp;
+ precision >>= tc_precexp;
+ if (TIMESEL(&sbt, tmp))
+ sbt += tc_tick_sbt;
+ sbt += tmp;
+ }
+ } else
+ sbt = -1;
+
#ifndef __rtems__
if (nfds > maxfilesperproc && nfds > FD_SETSIZE)
#else /* __rtems__ */
@@ -1349,39 +1452,35 @@ sys_poll(td, uap)
bits = malloc(ni, M_TEMP, M_WAITOK);
else
bits = smallbits;
- error = copyin(uap->fds, bits, ni);
+ error = copyin(fds, bits, ni);
if (error)
goto done;
- if (uap->timeout != INFTIM) {
- atv.tv_sec = uap->timeout / 1000;
- atv.tv_usec = (uap->timeout % 1000) * 1000;
- if (itimerfix(&atv)) {
- error = EINVAL;
+
+#ifndef __rtems__
+ if (uset != NULL) {
+ error = kern_sigprocmask(td, SIG_SETMASK, uset,
+ &td->td_oldsigmask, 0);
+ if (error)
goto done;
- }
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- } else {
- atv.tv_sec = 0;
- atv.tv_usec = 0;
+ td->td_pflags |= TDP_OLDMASK;
+ /*
+ * Make sure that ast() is called on return to
+ * usermode and TDP_OLDMASK is cleared, restoring old
+ * sigmask.
+ */
+ thread_lock(td);
+ td->td_flags |= TDF_ASTPENDING;
+ thread_unlock(td);
}
- timo = 0;
+#endif /* __rtems__ */
+
seltdinit(td);
/* Iterate until the timeout expires or descriptors become ready. */
for (;;) {
error = pollscan(td, bits, nfds);
if (error || td->td_retval[0] != 0)
break;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- break;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- error = seltdwait(td, timo);
+ error = seltdwait(td, sbt, precision);
if (error)
break;
error = pollrescan(td);
@@ -1397,7 +1496,7 @@ done:
if (error == EWOULDBLOCK)
error = 0;
if (error == 0) {
- error = pollout(td, bits, uap->fds, nfds);
+ error = pollout(td, bits, fds, nfds);
if (error)
goto out;
}
@@ -1432,6 +1531,37 @@ poll(struct pollfd fds[], nfds_t nfds, int timeout)
}
#endif /* __rtems__ */
+#ifndef __rtems__
+int
+sys_ppoll(struct thread *td, struct ppoll_args *uap)
+{
+ struct timespec ts, *tsp;
+ sigset_t set, *ssp;
+ int error;
+
+ if (uap->ts != NULL) {
+ error = copyin(uap->ts, &ts, sizeof(ts));
+ if (error)
+ return (error);
+ tsp = &ts;
+ } else
+ tsp = NULL;
+ if (uap->set != NULL) {
+ error = copyin(uap->set, &set, sizeof(set));
+ if (error)
+ return (error);
+ ssp = &set;
+ } else
+ ssp = NULL;
+ /*
+ * fds is still a pointer to user space. kern_poll() will
+ * take care of copyin that array to the kernel space.
+ */
+
+ return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp));
+}
+#endif /* __rtems__ */
+
static int
pollrescan(struct thread *td)
{
@@ -1442,6 +1572,9 @@ pollrescan(struct thread *td)
struct filedesc *fdp;
struct file *fp;
struct pollfd *fd;
+#ifdef CAPABILITIES
+ cap_rights_t rights;
+#endif
int n;
n = 0;
@@ -1460,16 +1593,18 @@ pollrescan(struct thread *td)
if (si != NULL)
continue;
#ifndef __rtems__
- fp = fdp->fd_ofiles[fd->fd];
+ fp = fdp->fd_ofiles[fd->fd].fde_file;
#else /* __rtems__ */
- fp = fget_unlocked(fdp, fd->fd);
+ fget_unlocked(fdp, fd->fd, NULL, &fp, NULL);
#endif /* __rtems__ */
#ifdef CAPABILITIES
- if ((fp == NULL)
- || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+ if (fp == NULL ||
+ cap_check(cap_rights(fdp, fd->fd),
+ cap_rights_init(&rights, CAP_EVENT)) != 0)
#else
- if (fp == NULL) {
+ if (fp == NULL)
#endif
+ {
fd->revents = POLLNVAL;
n++;
continue;
@@ -1526,14 +1661,16 @@ pollscan(td, fds, nfd)
#else /* __rtems__ */
struct filedesc *fdp = NULL;
#endif /* __rtems__ */
- int i;
struct file *fp;
- int n = 0;
+#ifdef CAPABILITIES
+ cap_rights_t rights;
+#endif
+ int i, n = 0;
FILEDESC_SLOCK(fdp);
for (i = 0; i < nfd; i++, fds++) {
#ifndef __rtems__
- if (fds->fd >= fdp->fd_nfiles) {
+ if (fds->fd > fdp->fd_lastfile) {
#else /* __rtems__ */
if (fds->fd >= rtems_libio_number_iops) {
#endif /* __rtems__ */
@@ -1543,16 +1680,18 @@ pollscan(td, fds, nfd)
fds->revents = 0;
} else {
#ifndef __rtems__
- fp = fdp->fd_ofiles[fds->fd];
+ fp = fdp->fd_ofiles[fds->fd].fde_file;
#else /* __rtems__ */
- fp = fget_unlocked(fdp, fds->fd);
+ fget_unlocked(fdp, fds->fd, NULL, &fp, NULL);
#endif /* __rtems__ */
#ifdef CAPABILITIES
- if ((fp == NULL)
- || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+ if (fp == NULL ||
+ cap_check(cap_rights(fdp, fds->fd),
+ cap_rights_init(&rights, CAP_EVENT)) != 0)
#else
- if (fp == NULL) {
+ if (fp == NULL)
#endif
+ {
fds->revents = POLLNVAL;
n++;
} else {
@@ -1582,26 +1721,6 @@ pollscan(td, fds, nfd)
#ifndef __rtems__
/*
- * OpenBSD poll system call.
- *
- * XXX this isn't quite a true representation.. OpenBSD uses select ops.
- */
-#ifndef _SYS_SYSPROTO_H_
-struct openbsd_poll_args {
- struct pollfd *fds;
- u_int nfds;
- int timeout;
-};
-#endif
-int
-sys_openbsd_poll(td, uap)
- register struct thread *td;
- register struct openbsd_poll_args *uap;
-{
- return (sys_poll(td, (struct poll_args *)uap));
-}
-
-/*
* XXX This was created specifically to support netncp and netsmb. This
* allows the caller to specify a socket to wait for events on. It returns
* 0 if any events matched and an error otherwise. There is no way to
@@ -1610,21 +1729,32 @@ sys_openbsd_poll(td, uap)
int
selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
{
- struct timeval atv, rtv, ttv;
- int error, timo;
+ struct timeval rtv;
+ sbintime_t asbt, precision, rsbt;
+ int error;
+ precision = 0; /* stupid gcc! */
if (tvp != NULL) {
- atv = *tvp;
- if (itimerfix(&atv))
+ rtv = *tvp;
+ if (rtv.tv_sec < 0 || rtv.tv_usec < 0 ||
+ rtv.tv_usec >= 1000000)
return (EINVAL);
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- } else {
- atv.tv_sec = 0;
- atv.tv_usec = 0;
- }
-
- timo = 0;
+ if (!timevalisset(&rtv))
+ asbt = 0;
+ else if (rtv.tv_sec <= INT32_MAX) {
+ rsbt = tvtosbt(rtv);
+ precision = rsbt;
+ precision >>= tc_precexp;
+ if (TIMESEL(&asbt, rsbt))
+ asbt += tc_tick_sbt;
+ if (asbt <= SBT_MAX - rsbt)
+ asbt += rsbt;
+ else
+ asbt = -1;
+ } else
+ asbt = -1;
+ } else
+ asbt = -1;
seltdinit(td);
/*
* Iterate until the timeout expires or the socket becomes ready.
@@ -1635,22 +1765,11 @@ selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
/* error here is actually the ready events. */
if (error)
return (0);
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=)) {
- seltdclear(td);
- return (EWOULDBLOCK);
- }
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- error = seltdwait(td, timo);
- seltdclear(td);
+ error = seltdwait(td, asbt, precision);
if (error)
break;
}
+ seltdclear(td);
/* XXX Duplicates ncp/smb behavior. */
if (error == ERESTART)
error = 0;
@@ -1682,11 +1801,16 @@ static void
selfdfree(struct seltd *stp, struct selfd *sfp)
{
STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link);
- mtx_lock(sfp->sf_mtx);
- if (sfp->sf_si)
- TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
- mtx_unlock(sfp->sf_mtx);
- uma_zfree(selfd_zone, sfp);
+ if (sfp->sf_si != NULL) {
+ mtx_lock(sfp->sf_mtx);
+ if (sfp->sf_si != NULL) {
+ TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
+ refcount_release(&sfp->sf_refs);
+ }
+ mtx_unlock(sfp->sf_mtx);
+ }
+ if (refcount_release(&sfp->sf_refs))
+ uma_zfree(selfd_zone, sfp);
}
/* Drain the waiters tied to all the selfd belonging the specified selinfo. */
@@ -1742,6 +1866,7 @@ selrecord(selector, sip)
*/
sfp->sf_si = sip;
sfp->sf_mtx = mtxp;
+ refcount_init(&sfp->sf_refs, 2);
STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link);
/*
* Now that we've locked the sip, check for initialization.
@@ -1806,6 +1931,8 @@ doselwakeup(sip, pri)
stp->st_flags |= SELTD_PENDING;
cv_broadcastpri(&stp->st_wait, pri);
mtx_unlock(&stp->st_mtx);
+ if (refcount_release(&sfp->sf_refs))
+ uma_zfree(selfd_zone, sfp);
}
mtx_unlock(sip->si_mtx);
}
@@ -1826,7 +1953,7 @@ out:
}
static int
-seltdwait(struct thread *td, int timo)
+seltdwait(struct thread *td, sbintime_t sbt, sbintime_t precision)
{
struct seltd *stp;
int error;
@@ -1845,8 +1972,11 @@ seltdwait(struct thread *td, int timo)
mtx_unlock(&stp->st_mtx);
return (0);
}
- if (timo > 0)
- error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
+ if (sbt == 0)
+ error = EWOULDBLOCK;
+ else if (sbt != -1)
+ error = cv_timedwait_sig_sbt(&stp->st_wait, &stp->st_mtx,
+ sbt, precision, C_ABSOLUTE);
else
error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
mtx_unlock(&stp->st_mtx);
@@ -1897,6 +2027,24 @@ selectinit(void *dummy __unused)
NULL, NULL, UMA_ALIGN_PTR, 0);
mtxpool_select = mtx_pool_create("select mtxpool", 128, MTX_DEF);
}
+
+#ifndef __rtems__
+/*
+ * Set up a syscall return value that follows the convention specified for
+ * posix_* functions.
+ */
+int
+kern_posix_error(struct thread *td, int error)
+{
+
+ if (error <= 0)
+ return (error);
+ td->td_errno = error;
+ td->td_pflags |= TDP_NERRNO;
+ td->td_retval[0] = error;
+ return (0);
+}
+#endif /* __rtems__ */
#ifdef __rtems__
#include <machine/rtems-bsd-thread.h>
diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c
index 45d3ed13..7ff0c815 100755
--- a/freebsd/sys/kern/sys_pipe.c
+++ b/freebsd/sys/kern/sys_pipe.c
@@ -2,6 +2,7 @@
/*-
* Copyright (c) 1996 John S. Dyson
+ * Copyright (c) 2012 Giovanni Trematerra
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -90,6 +91,8 @@
* in the structure may have changed.
*/
+#include <rtems/bsd/local/opt_compat.h>
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -116,6 +119,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/uio.h>
+#include <sys/user.h>
#include <sys/event.h>
#include <security/mac/mac_framework.h>
@@ -130,12 +134,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/uma.h>
-/* XXX */
-#ifdef __rtems__
-static
-#endif /* __rtems__ */
-int do_pipe(struct thread *td, int fildes[2], int flags);
-
/*
* Use this define if you want to disable *fancy* VM things. Expect an
* approx 30% decrease in transfer rate. This could be useful for
@@ -143,6 +141,9 @@ int do_pipe(struct thread *td, int fildes[2], int flags);
*/
/* #define PIPE_NODIRECT */
+#define PIPE_PEER(pipe) \
+ (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer))
+
/*
* interfaces to the outside world
*/
@@ -155,8 +156,11 @@ static fo_poll_t pipe_poll;
static fo_kqfilter_t pipe_kqfilter;
static fo_stat_t pipe_stat;
static fo_close_t pipe_close;
+static fo_chmod_t pipe_chmod;
+static fo_chown_t pipe_chown;
+static fo_fill_kinfo_t pipe_fill_kinfo;
-static struct fileops pipeops = {
+struct fileops pipeops = {
.fo_read = pipe_read,
.fo_write = pipe_write,
.fo_truncate = pipe_truncate,
@@ -165,8 +169,10 @@ static struct fileops pipeops = {
.fo_kqfilter = pipe_kqfilter,
.fo_stat = pipe_stat,
.fo_close = pipe_close,
- .fo_chmod = invfo_chmod,
- .fo_chown = invfo_chown,
+ .fo_chmod = pipe_chmod,
+ .fo_chown = pipe_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = pipe_fill_kinfo,
.fo_flags = DFLAG_PASSABLE
};
#else /* __rtems__ */
@@ -204,9 +210,16 @@ long maxpipekva; /* Limit on pipe KVA */
#endif /* __rtems__ */
static void filt_pipedetach(struct knote *kn);
+static void filt_pipedetach_notsup(struct knote *kn);
+static int filt_pipenotsup(struct knote *kn, long hint);
static int filt_piperead(struct knote *kn, long hint);
static int filt_pipewrite(struct knote *kn, long hint);
+static struct filterops pipe_nfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_pipedetach_notsup,
+ .f_event = filt_pipenotsup
+};
static struct filterops pipe_rfiltops = {
.f_isfd = 1,
.f_detach = filt_pipedetach,
@@ -233,7 +246,7 @@ static int pipeallocfail;
static int piperesizefail;
static int piperesizeallowed = 1;
-SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN,
+SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&maxpipekva, 0, "Pipe KVA limit");
SYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
&amountpipekva, 0, "Pipe KVA usage");
@@ -249,10 +262,10 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW,
static void pipeinit(void *dummy __unused);
static void pipeclose(struct pipe *cpipe);
static void pipe_free_kmem(struct pipe *cpipe);
-static int pipe_create(struct pipe *pipe, int backing);
+static void pipe_create(struct pipe *pipe, int backing);
+static void pipe_paircreate(struct thread *td, struct pipepair **p_pp);
static __inline int pipelock(struct pipe *cpipe, int catch);
static __inline void pipeunlock(struct pipe *cpipe);
-static __inline void pipeselwakeup(struct pipe *cpipe);
#ifndef PIPE_NODIRECT
static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
static void pipe_destroy_write_buffer(struct pipe *wpipe);
@@ -347,7 +360,7 @@ pipe_zone_init(void *mem, int size, int flags)
pp = (struct pipepair *)mem;
- mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE);
+ mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_NEW);
return (0);
}
@@ -363,31 +376,13 @@ pipe_zone_fini(void *mem, int size)
mtx_destroy(&pp->pp_mtx);
}
-/*
- * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let
- * the zone pick up the pieces via pipeclose().
- */
-int
-kern_pipe(struct thread *td, int fildes[2])
-{
-
- return (do_pipe(td, fildes, 0));
-}
-
-int
-do_pipe(struct thread *td, int fildes[2], int flags)
+static void
+pipe_paircreate(struct thread *td, struct pipepair **p_pp)
{
-#ifndef __rtems__
- struct filedesc *fdp = td->td_proc->p_fd;
-#else /* __rtems__ */
- struct filedesc *fdp = NULL;
-#endif /* __rtems__ */
- struct file *rf, *wf;
struct pipepair *pp;
struct pipe *rpipe, *wpipe;
- int fd, fflags, error;
- pp = uma_zalloc(pipe_zone, M_WAITOK);
+ *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK);
#ifdef MAC
/*
* The MAC label is shared between the connected endpoints. As a
@@ -404,23 +399,64 @@ do_pipe(struct thread *td, int fildes[2], int flags)
knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe));
/* Only the forward direction pipe is backed by default */
- if ((error = pipe_create(rpipe, 1)) != 0 ||
- (error = pipe_create(wpipe, 0)) != 0) {
- pipeclose(rpipe);
- pipeclose(wpipe);
- return (error);
- }
+ pipe_create(rpipe, 1);
+ pipe_create(wpipe, 0);
rpipe->pipe_state |= PIPE_DIRECTOK;
wpipe->pipe_state |= PIPE_DIRECTOK;
+}
+
+void
+pipe_named_ctor(struct pipe **ppipe, struct thread *td)
+{
+ struct pipepair *pp;
+
+ pipe_paircreate(td, &pp);
+ pp->pp_rpipe.pipe_state |= PIPE_NAMED;
+ *ppipe = &pp->pp_rpipe;
+}
+
+void
+pipe_dtor(struct pipe *dpipe)
+{
+ struct pipe *peer;
+ ino_t ino;
- error = falloc(td, &rf, &fd, flags);
+ ino = dpipe->pipe_ino;
+ peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL;
+ funsetown(&dpipe->pipe_sigio);
+ pipeclose(dpipe);
+ if (peer != NULL) {
+ funsetown(&peer->pipe_sigio);
+ pipeclose(peer);
+ }
+ if (ino != 0 && ino != (ino_t)-1)
+ free_unr(pipeino_unr, ino);
+}
+
+/*
+ * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let
+ * the zone pick up the pieces via pipeclose().
+ */
+int
+kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1,
+ struct filecaps *fcaps2)
+{
+ struct file *rf, *wf;
+ struct pipe *rpipe, *wpipe;
+ struct pipepair *pp;
+ int fd, fflags, error;
+
+ pipe_paircreate(td, &pp);
+ rpipe = &pp->pp_rpipe;
+ wpipe = &pp->pp_wpipe;
+ error = falloc_caps(td, &rf, &fd, flags, fcaps1);
if (error) {
pipeclose(rpipe);
pipeclose(wpipe);
return (error);
}
- /* An extra reference on `rf' has been held for us by falloc(). */
+ /* An extra reference on `rf' has been held for us by falloc_caps(). */
fildes[0] = fd;
fflags = FREAD | FWRITE;
@@ -434,15 +470,15 @@ do_pipe(struct thread *td, int fildes[2], int flags)
* side while we are blocked trying to allocate the write side.
*/
finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops);
- error = falloc(td, &wf, &fd, flags);
+ error = falloc_caps(td, &wf, &fd, flags, fcaps2);
if (error) {
- fdclose(fdp, rf, fildes[0], td);
+ fdclose(td, rf, fildes[0]);
fdrop(rf, td);
/* rpipe has been closed by fdrop(). */
pipeclose(wpipe);
return (error);
}
- /* An extra reference on `wf' has been held for us by falloc(). */
+ /* An extra reference on `wf' has been held for us by falloc_caps(). */
finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops);
fdrop(wf, td);
fildes[1] = fd;
@@ -451,14 +487,15 @@ do_pipe(struct thread *td, int fildes[2], int flags)
return (0);
}
+#ifdef COMPAT_FREEBSD10
/* ARGSUSED */
int
-sys_pipe(struct thread *td, struct pipe_args *uap)
+freebsd10_pipe(struct thread *td, struct freebsd10_pipe_args *uap __unused)
{
int error;
int fildes[2];
- error = kern_pipe(td, fildes);
+ error = kern_pipe(td, fildes, 0, NULL, NULL);
if (error)
return (error);
@@ -467,6 +504,28 @@ sys_pipe(struct thread *td, struct pipe_args *uap)
return (0);
}
+#endif
+
+#ifndef __rtems__
+int
+sys_pipe2(struct thread *td, struct pipe2_args *uap)
+{
+ int error, fildes[2];
+
+ if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK))
+ return (EINVAL);
+ error = kern_pipe(td, fildes, uap->flags, NULL, NULL);
+ if (error)
+ return (error);
+ error = copyout(fildes, uap->fildes, 2 * sizeof(int));
+ if (error) {
+ (void)kern_close(td, fildes[0]);
+ (void)kern_close(td, fildes[1]);
+ }
+ return (error);
+}
+#endif /* __rtems__ */
+
#ifdef __rtems__
int
pipe(int fildes[2])
@@ -475,14 +534,12 @@ pipe(int fildes[2])
int error;
if (td != NULL) {
- error = sys_pipe(td, NULL);
+ error = kern_pipe(td, fildes, 0, NULL, NULL);
} else {
error = ENOMEM;
}
if (error == 0) {
- fildes[0] = td->td_retval[0];
- fildes[1] = td->td_retval[1];
return error;
} else {
rtems_set_errno_and_return_minus_one(error);
@@ -519,10 +576,11 @@ retry:
buffer = (caddr_t) vm_map_min(pipe_map);
error = vm_map_find(pipe_map, NULL, 0,
- (vm_offset_t *) &buffer, size, 1,
+ (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error != KERN_SUCCESS) {
#else /* __rtems__ */
+ (void)error;
buffer = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
if (buffer == NULL) {
#endif /* __rtems__ */
@@ -621,7 +679,7 @@ pipeunlock(cpipe)
}
}
-static __inline void
+void
pipeselwakeup(cpipe)
struct pipe *cpipe;
{
@@ -632,8 +690,10 @@ pipeselwakeup(cpipe)
if (!SEL_WAITING(&cpipe->pipe_sel))
cpipe->pipe_state &= ~PIPE_SEL;
}
+#ifndef __rtems__
if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
+#endif /* __rtems__ */
KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0);
}
@@ -641,24 +701,27 @@ pipeselwakeup(cpipe)
* Initialize and allocate VM and memory for pipe. The structure
* will start out zero'd from the ctor, so we just manage the kmem.
*/
-static int
+static void
pipe_create(pipe, backing)
struct pipe *pipe;
int backing;
{
- int error;
if (backing) {
+ /*
+ * Note that these functions can fail if pipe map is exhausted
+ * (as a result of too many pipes created), but we ignore the
+ * error as it is not fatal and could be provoked by
+ * unprivileged users. The only consequence is worse performance
+ * with given pipe.
+ */
if (amountpipekva > maxpipekva / 2)
- error = pipespace_new(pipe, SMALL_PIPE_SIZE);
+ (void)pipespace_new(pipe, SMALL_PIPE_SIZE);
else
- error = pipespace_new(pipe, PIPE_SIZE);
- } else {
- /* If we're not backing this pipe, no need to do anything. */
- error = 0;
+ (void)pipespace_new(pipe, PIPE_SIZE);
}
+
pipe->pipe_ino = -1;
- return (error);
}
/* ARGSUSED */
@@ -670,11 +733,12 @@ pipe_read(fp, uio, active_cred, flags, td)
struct thread *td;
int flags;
{
- struct pipe *rpipe = fp->f_data;
+ struct pipe *rpipe;
int error;
int nread = 0;
int size;
+ rpipe = fp->f_data;
PIPE_LOCK(rpipe);
++rpipe->pipe_busy;
error = pipelock(rpipe, 1);
@@ -751,7 +815,7 @@ pipe_read(fp, uio, active_cred, flags, td)
rpipe->pipe_map.pos += size;
rpipe->pipe_map.cnt -= size;
if (rpipe->pipe_map.cnt == 0) {
- rpipe->pipe_state &= ~PIPE_DIRECTW;
+ rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW);
wakeup(rpipe);
}
#endif
@@ -993,9 +1057,10 @@ pipe_direct_write(wpipe, uio)
retry:
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
error = pipelock(wpipe, 1);
- if (wpipe->pipe_state & PIPE_EOF)
+ if (error != 0)
+ goto error1;
+ if ((wpipe->pipe_state & PIPE_EOF) != 0) {
error = EPIPE;
- if (error) {
pipeunlock(wpipe);
goto error1;
}
@@ -1056,6 +1121,7 @@ retry:
wakeup(wpipe);
}
pipeselwakeup(wpipe);
+ wpipe->pipe_state |= PIPE_WANTW;
pipeunlock(wpipe);
error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
"pipdwt", 0);
@@ -1096,8 +1162,7 @@ pipe_write(fp, uio, active_cred, flags, td)
struct pipe *wpipe, *rpipe;
rpipe = fp->f_data;
- wpipe = rpipe->pipe_peer;
-
+ wpipe = PIPE_PEER(rpipe);
PIPE_LOCK(rpipe);
error = pipelock(wpipe, 1);
if (error) {
@@ -1346,13 +1411,13 @@ pipe_write(fp, uio, active_cred, flags, td)
}
/*
- * Don't return EPIPE if I/O was successful
+ * Don't return EPIPE if any byte was written.
+ * EINTR and other interrupts are handled by generic I/O layer.
+ * Do not pretend that I/O succeeded for obvious user error
+ * like EFAULT.
*/
- if ((wpipe->pipe_buffer.cnt == 0) &&
- (uio->uio_resid == 0) &&
- (error == EPIPE)) {
+ if (uio->uio_resid != orig_resid && error == EPIPE)
error = 0;
- }
if (error == 0)
#ifndef __rtems__
@@ -1416,8 +1481,15 @@ pipe_truncate(fp, length, active_cred, td)
struct ucred *active_cred;
struct thread *td;
{
+ struct pipe *cpipe;
+ int error;
- return (EINVAL);
+ cpipe = fp->f_data;
+ if (cpipe->pipe_state & PIPE_NAMED)
+ error = vnops.fo_truncate(fp, length, active_cred, td);
+ else
+ error = invfo_truncate(fp, length, active_cred, td);
+ return (error);
}
#endif /* __rtems__ */
@@ -1460,6 +1532,15 @@ pipe_ioctl(fp, cmd, data, active_cred, td)
break;
case FIONREAD:
+#ifndef __rtems__
+ if (!(fp->f_flag & FREAD)) {
+#else /* __rtems__ */
+ if (!(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD)) {
+#endif /* __rtems__ */
+ *(int *)data = 0;
+ PIPE_UNLOCK(mpipe);
+ return (0);
+ }
if (mpipe->pipe_state & PIPE_DIRECTW)
*(int *)data = mpipe->pipe_map.cnt;
else
@@ -1519,26 +1600,36 @@ pipe_poll(fp, events, active_cred, td)
struct ucred *active_cred;
struct thread *td;
{
- struct pipe *rpipe = fp->f_data;
+ struct pipe *rpipe;
struct pipe *wpipe;
- int revents = 0;
+ int levents, revents;
#ifdef MAC
int error;
#endif
- wpipe = rpipe->pipe_peer;
+ revents = 0;
+ rpipe = fp->f_data;
+ wpipe = PIPE_PEER(rpipe);
PIPE_LOCK(rpipe);
#ifdef MAC
error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair);
if (error)
goto locked_error;
#endif
- if (events & (POLLIN | POLLRDNORM))
+#ifndef __rtems__
+ if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM))
+#else /* __rtems__ */
+ if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM))
+#endif /* __rtems__ */
if ((rpipe->pipe_state & PIPE_DIRECTW) ||
(rpipe->pipe_buffer.cnt > 0))
revents |= events & (POLLIN | POLLRDNORM);
- if (events & (POLLOUT | POLLWRNORM))
+#ifndef __rtems__
+ if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM))
+#else /* __rtems__ */
+ if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE && events & (POLLOUT | POLLWRNORM))
+#endif /* __rtems__ */
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF) ||
(((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
@@ -1546,6 +1637,16 @@ pipe_poll(fp, events, active_cred, td)
wpipe->pipe_buffer.size == 0)))
revents |= events & (POLLOUT | POLLWRNORM);
+ levents = events &
+ (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
+#ifndef __rtems__
+ if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents &&
+ fp->f_seqcount == rpipe->pipe_wgen)
+#else /* __rtems__ */
+ if (rpipe->pipe_state & PIPE_NAMED && rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && levents)
+#endif /* __rtems__ */
+ events |= POLLINIGNEOF;
+
if ((events & POLLINIGNEOF) == 0) {
if (rpipe->pipe_state & PIPE_EOF) {
revents |= (events & (POLLIN | POLLRDNORM));
@@ -1556,13 +1657,21 @@ pipe_poll(fp, events, active_cred, td)
}
if (revents == 0) {
- if (events & (POLLIN | POLLRDNORM)) {
+#ifndef __rtems__
+ if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) {
+#else /* __rtems__ */
+ if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM)) {
+#endif /* __rtems__ */
selrecord(td, &rpipe->pipe_sel);
if (SEL_WAITING(&rpipe->pipe_sel))
rpipe->pipe_state |= PIPE_SEL;
}
- if (events & (POLLOUT | POLLWRNORM)) {
+#ifndef __rtems__
+ if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) {
+#else /* __rtems__ */
+ if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE && events & (POLLOUT | POLLWRNORM)) {
+#endif /* __rtems__ */
selrecord(td, &wpipe->pipe_sel);
if (SEL_WAITING(&wpipe->pipe_sel))
wpipe->pipe_state |= PIPE_SEL;
@@ -1627,6 +1736,17 @@ pipe_stat(struct pipe *pipe, struct stat *ub)
return (error);
}
#endif
+
+ /* For named pipes ask the underlying filesystem. */
+ if (pipe->pipe_state & PIPE_NAMED) {
+ PIPE_UNLOCK(pipe);
+#ifndef __rtems__
+ return (vnops.fo_stat(fp, ub, active_cred, td));
+#else /* __rtems__ */
+ return (ENXIO);
+#endif /* __rtems__ */
+ }
+
/*
* Lazily allocate an inode number for the pipe. Most pipe
* users do not call fstat(2) on the pipe, which means that
@@ -1655,7 +1775,7 @@ pipe_stat(struct pipe *pipe, struct stat *ub)
ub->st_size = pipe->pipe_map.cnt;
else
ub->st_size = pipe->pipe_buffer.cnt;
- ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
+ ub->st_blocks = howmany(ub->st_size, ub->st_blksize);
ub->st_atim = pipe->pipe_atime;
ub->st_mtim = pipe->pipe_mtime;
ub->st_ctim = pipe->pipe_ctime;
@@ -1695,19 +1815,69 @@ pipe_close(fp, td)
struct file *fp;
struct thread *td;
{
- struct pipe *cpipe = fp->f_data;
#ifndef __rtems__
+ if (fp->f_vnode != NULL)
+ return vnops.fo_close(fp, td);
fp->f_ops = &badfileops;
#else /* __rtems__ */
fp->f_io.pathinfo.handlers = &rtems_filesystem_handlers_default;
#endif /* __rtems__ */
+ pipe_dtor(fp->f_data);
fp->f_data = NULL;
- funsetown(&cpipe->pipe_sigio);
- pipeclose(cpipe);
return (0);
}
+#ifndef __rtems__
+static int
+pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td)
+{
+ struct pipe *cpipe;
+ int error;
+
+ cpipe = fp->f_data;
+ if (cpipe->pipe_state & PIPE_NAMED)
+ error = vn_chmod(fp, mode, active_cred, td);
+ else
+ error = invfo_chmod(fp, mode, active_cred, td);
+ return (error);
+}
+
+static int
+pipe_chown(fp, uid, gid, active_cred, td)
+ struct file *fp;
+ uid_t uid;
+ gid_t gid;
+ struct ucred *active_cred;
+ struct thread *td;
+{
+ struct pipe *cpipe;
+ int error;
+
+ cpipe = fp->f_data;
+ if (cpipe->pipe_state & PIPE_NAMED)
+ error = vn_chown(fp, uid, gid, active_cred, td);
+ else
+ error = invfo_chown(fp, uid, gid, active_cred, td);
+ return (error);
+}
+
+static int
+pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
+{
+ struct pipe *pi;
+
+ if (fp->f_type == DTYPE_FIFO)
+ return (vn_fill_kinfo(fp, kif, fdp));
+ kif->kf_type = KF_TYPE_PIPE;
+ pi = fp->f_data;
+ kif->kf_un.kf_pipe.kf_pipe_addr = (uintptr_t)pi;
+ kif->kf_un.kf_pipe.kf_pipe_peer = (uintptr_t)pi->pipe_peer;
+ kif->kf_un.kf_pipe.kf_pipe_buffer_cnt = pi->pipe_buffer.cnt;
+ return (0);
+}
+#endif /* __rtems__ */
+
static void
pipe_free_kmem(cpipe)
struct pipe *cpipe;
@@ -1745,7 +1915,6 @@ pipeclose(cpipe)
{
struct pipepair *pp;
struct pipe *ppipe;
- ino_t ino;
KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL"));
@@ -1804,12 +1973,6 @@ pipeclose(cpipe)
knlist_destroy(&cpipe->pipe_sel.si_note);
/*
- * Postpone the destroy of the fake inode number allocated for
- * our end, until pipe mtx is unlocked.
- */
- ino = cpipe->pipe_ino;
-
- /*
* If both endpoints are now closed, release the memory for the
* pipe pair. If not, unlock.
*/
@@ -1821,9 +1984,6 @@ pipeclose(cpipe)
uma_zfree(pipe_zone, cpipe->pipe_pair);
} else
PIPE_UNLOCK(cpipe);
-
- if (ino != 0 && ino != (ino_t)-1)
- free_unr(pipeino_unr, ino);
}
/*ARGSUSED*/
@@ -1832,7 +1992,28 @@ pipe_kqfilter(struct file *fp, struct knote *kn)
{
struct pipe *cpipe;
- cpipe = kn->kn_fp->f_data;
+ /*
+ * If a filter is requested that is not supported by this file
+ * descriptor, don't return an error, but also don't ever generate an
+ * event.
+ */
+#ifndef __rtems__
+ if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) {
+#else /* __rtems__ */
+ if ((kn->kn_filter == EVFILT_READ) && !(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD)) {
+#endif /* __rtems__ */
+ kn->kn_fop = &pipe_nfiltops;
+ return (0);
+ }
+#ifndef __rtems__
+ if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) {
+#else /* __rtems__ */
+ if ((kn->kn_filter == EVFILT_WRITE) && !(rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FWRITE)) {
+#endif /* __rtems__ */
+ kn->kn_fop = &pipe_nfiltops;
+ return (0);
+ }
+ cpipe = fp->f_data;
PIPE_LOCK(cpipe);
switch (kn->kn_filter) {
case EVFILT_READ:
@@ -1845,13 +2026,14 @@ pipe_kqfilter(struct file *fp, struct knote *kn)
PIPE_UNLOCK(cpipe);
return (EPIPE);
}
- cpipe = cpipe->pipe_peer;
+ cpipe = PIPE_PEER(cpipe);
break;
default:
PIPE_UNLOCK(cpipe);
return (EINVAL);
}
+ kn->kn_hook = cpipe;
knlist_add(&cpipe->pipe_sel.si_note, kn, 1);
PIPE_UNLOCK(cpipe);
return (0);
@@ -1869,11 +2051,9 @@ rtems_bsd_pipe_kqfilter(rtems_libio_t *iop, struct knote *kn)
static void
filt_pipedetach(struct knote *kn)
{
- struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
+ struct pipe *cpipe = kn->kn_hook;
PIPE_LOCK(cpipe);
- if (kn->kn_filter == EVFILT_WRITE)
- cpipe = cpipe->pipe_peer;
knlist_remove(&cpipe->pipe_sel.si_note, kn, 1);
PIPE_UNLOCK(cpipe);
}
@@ -1882,11 +2062,11 @@ filt_pipedetach(struct knote *kn)
static int
filt_piperead(struct knote *kn, long hint)
{
- struct pipe *rpipe = kn->kn_fp->f_data;
+ struct pipe *rpipe = kn->kn_hook;
struct pipe *wpipe = rpipe->pipe_peer;
int ret;
- PIPE_LOCK(rpipe);
+ PIPE_LOCK_ASSERT(rpipe, MA_OWNED);
kn->kn_data = rpipe->pipe_buffer.cnt;
if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
kn->kn_data = rpipe->pipe_map.cnt;
@@ -1895,11 +2075,9 @@ filt_piperead(struct knote *kn, long hint)
wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF)) {
kn->kn_flags |= EV_EOF;
- PIPE_UNLOCK(rpipe);
return (1);
}
ret = kn->kn_data > 0;
- PIPE_UNLOCK(rpipe);
return ret;
}
@@ -1907,15 +2085,14 @@ filt_piperead(struct knote *kn, long hint)
static int
filt_pipewrite(struct knote *kn, long hint)
{
- struct pipe *rpipe = kn->kn_fp->f_data;
- struct pipe *wpipe = rpipe->pipe_peer;
-
- PIPE_LOCK(rpipe);
+ struct pipe *wpipe;
+
+ wpipe = kn->kn_hook;
+ PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF)) {
kn->kn_data = 0;
kn->kn_flags |= EV_EOF;
- PIPE_UNLOCK(rpipe);
return (1);
}
kn->kn_data = (wpipe->pipe_buffer.size > 0) ?
@@ -1923,9 +2100,21 @@ filt_pipewrite(struct knote *kn, long hint)
if (wpipe->pipe_state & PIPE_DIRECTW)
kn->kn_data = 0;
- PIPE_UNLOCK(rpipe);
return (kn->kn_data >= PIPE_BUF);
}
+
+static void
+filt_pipedetach_notsup(struct knote *kn)
+{
+
+}
+
+static int
+filt_pipenotsup(struct knote *kn, long hint)
+{
+
+ return (0);
+}
#ifdef __rtems__
static int
rtems_bsd_pipe_open(rtems_libio_t *iop, const char *path, int oflag,
diff --git a/freebsd/sys/kern/sys_socket.c b/freebsd/sys/kern/sys_socket.c
index babbcd2c..f312c8f4 100644
--- a/freebsd/sys/kern/sys_socket.c
+++ b/freebsd/sys/kern/sys_socket.c
@@ -36,8 +36,13 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/aio.h>
+#include <sys/domain.h>
#include <sys/file.h>
#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/sigio.h>
@@ -48,20 +53,58 @@ __FBSDID("$FreeBSD$");
#include <sys/filio.h> /* XXX */
#include <sys/sockio.h>
#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/taskqueue.h>
#include <sys/uio.h>
#include <sys/ucred.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <sys/user.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+
#include <security/mac/mac_framework.h>
#ifndef __rtems__
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+
+static SYSCTL_NODE(_kern_ipc, OID_AUTO, aio, CTLFLAG_RD, NULL,
+ "socket AIO stats");
+
+static int empty_results;
+SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_results, CTLFLAG_RD, &empty_results,
+ 0, "socket operation returned EAGAIN");
+
+static int empty_retries;
+SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_retries, CTLFLAG_RD, &empty_retries,
+ 0, "socket operation retries");
+
+static fo_rdwr_t soo_read;
+static fo_rdwr_t soo_write;
+static fo_ioctl_t soo_ioctl;
+static fo_poll_t soo_poll;
+extern fo_kqfilter_t soo_kqfilter;
+static fo_stat_t soo_stat;
+static fo_close_t soo_close;
+static fo_fill_kinfo_t soo_fill_kinfo;
+static fo_aio_queue_t soo_aio_queue;
+
+static void soo_aio_cancel(struct kaiocb *job);
+
struct fileops socketops = {
.fo_read = soo_read,
.fo_write = soo_write,
- .fo_truncate = soo_truncate,
+ .fo_truncate = invfo_truncate,
.fo_ioctl = soo_ioctl,
.fo_poll = soo_poll,
.fo_kqfilter = soo_kqfilter,
@@ -69,15 +112,18 @@ struct fileops socketops = {
.fo_close = soo_close,
.fo_chmod = invfo_chmod,
.fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = soo_fill_kinfo,
+ .fo_aio_queue = soo_aio_queue,
.fo_flags = DFLAG_PASSABLE
};
#endif /* __rtems__ */
-/* ARGSUSED */
#ifdef __rtems__
-static
-#endif /* __rtems__ */
int
+#else /* __rtems__ */
+static int
+#endif /* __rtems__ */
soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
@@ -127,11 +173,11 @@ rtems_bsd_soo_read(rtems_libio_t *iop, void *buffer, size_t count)
}
#endif /* __rtems__ */
-/* ARGSUSED */
#ifdef __rtems__
-static
-#endif /* __rtems__ */
int
+#else /* __rtems__ */
+static int
+#endif /* __rtems__ */
soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
@@ -190,20 +236,11 @@ rtems_bsd_soo_write(rtems_libio_t *iop, const void *buffer, size_t count)
}
#endif /* __rtems__ */
-#ifndef __rtems__
-int
-soo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
- struct thread *td)
-{
-
- return (EINVAL);
-}
-#endif /* __rtems__ */
-
#ifdef __rtems__
-static
-#endif /* __rtems__ */
int
+#else /* __rtems__ */
+static int
+#endif /* __rtems__ */
soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
struct thread *td)
{
@@ -252,16 +289,17 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
case FIONREAD:
/* Unlocked read. */
- *(int *)data = so->so_rcv.sb_cc;
+ *(int *)data = sbavail(&so->so_rcv);
break;
case FIONWRITE:
/* Unlocked read. */
- *(int *)data = so->so_snd.sb_cc;
+ *(int *)data = sbavail(&so->so_snd);
break;
case FIONSPACE:
- if ((so->so_snd.sb_hiwat < so->so_snd.sb_cc) ||
+ /* Unlocked read. */
+ if ((so->so_snd.sb_hiwat < sbused(&so->so_snd)) ||
(so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt))
*(int *)data = 0;
else
@@ -329,9 +367,10 @@ rtems_bsd_soo_ioctl(rtems_libio_t *iop, ioctl_command_t request, void *buffer)
#endif /* __rtems__ */
#ifdef __rtems__
-static
-#endif /* __rtems__ */
int
+#else /* __rtems__ */
+static int
+#endif /* __rtems__ */
soo_poll(struct file *fp, int events, struct ucred *active_cred,
struct thread *td)
{
@@ -363,7 +402,7 @@ rtems_bsd_soo_poll(rtems_libio_t *iop, int events)
error = ENOMEM;
}
- return error;
+ return (error);
}
#endif /* __rtems__ */
@@ -378,6 +417,7 @@ static int
soo_stat(struct socket *so, struct stat *ub)
{
#endif /* __rtems__ */
+ struct sockbuf *sb;
#ifdef MAC
int error;
#endif
@@ -395,15 +435,18 @@ soo_stat(struct socket *so, struct stat *ub)
* If SBS_CANTRCVMORE is set, but there's still data left in the
* receive buffer, the socket is still readable.
*/
- SOCKBUF_LOCK(&so->so_rcv);
- if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 ||
- so->so_rcv.sb_cc != 0)
+ sb = &so->so_rcv;
+ SOCKBUF_LOCK(sb);
+ if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb))
ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
- ub->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
- SOCKBUF_UNLOCK(&so->so_rcv);
- /* Unlocked read. */
- if ((so->so_snd.sb_state & SBS_CANTSENDMORE) == 0)
+ ub->st_size = sbavail(sb) - sb->sb_ctl;
+ SOCKBUF_UNLOCK(sb);
+
+ sb = &so->so_snd;
+ SOCKBUF_LOCK(sb);
+ if ((sb->sb_state & SBS_CANTSENDMORE) == 0)
ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
+ SOCKBUF_UNLOCK(sb);
#ifndef __rtems__
ub->st_uid = so->so_cred->cr_uid;
ub->st_gid = so->so_cred->cr_gid;
@@ -433,11 +476,11 @@ rtems_bsd_soo_stat(
* file reference but the actual socket will not go away until the socket's
* ref count hits 0.
*/
-/* ARGSUSED */
#ifdef __rtems__
-static
-#endif /* __rtems__ */
int
+#else /* __rtems__ */
+static int
+#endif /* __rtems__ */
soo_close(struct file *fp, struct thread *td)
{
int error = 0;
@@ -460,6 +503,474 @@ soo_close(struct file *fp, struct thread *td)
error = soclose(so);
return (error);
}
+
+#ifndef __rtems__
+static int
+soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
+{
+ struct sockaddr *sa;
+ struct inpcb *inpcb;
+ struct unpcb *unpcb;
+ struct socket *so;
+ int error;
+
+ kif->kf_type = KF_TYPE_SOCKET;
+ so = fp->f_data;
+ kif->kf_sock_domain = so->so_proto->pr_domain->dom_family;
+ kif->kf_sock_type = so->so_type;
+ kif->kf_sock_protocol = so->so_proto->pr_protocol;
+ kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb;
+ switch (kif->kf_sock_domain) {
+ case AF_INET:
+ case AF_INET6:
+ if (kif->kf_sock_protocol == IPPROTO_TCP) {
+ if (so->so_pcb != NULL) {
+ inpcb = (struct inpcb *)(so->so_pcb);
+ kif->kf_un.kf_sock.kf_sock_inpcb =
+ (uintptr_t)inpcb->inp_ppcb;
+ }
+ }
+ break;
+ case AF_UNIX:
+ if (so->so_pcb != NULL) {
+ unpcb = (struct unpcb *)(so->so_pcb);
+ if (unpcb->unp_conn) {
+ kif->kf_un.kf_sock.kf_sock_unpconn =
+ (uintptr_t)unpcb->unp_conn;
+ kif->kf_un.kf_sock.kf_sock_rcv_sb_state =
+ so->so_rcv.sb_state;
+ kif->kf_un.kf_sock.kf_sock_snd_sb_state =
+ so->so_snd.sb_state;
+ }
+ }
+ break;
+ }
+ error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
+ if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+ bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
+ if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+ bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name,
+ sizeof(kif->kf_path));
+ return (0);
+}
+
+/*
+ * Use the 'backend3' field in AIO jobs to store the amount of data
+ * completed by the AIO job so far.
+ */
+#define aio_done backend3
+
+static STAILQ_HEAD(, task) soaio_jobs;
+static struct mtx soaio_jobs_lock;
+static struct task soaio_kproc_task;
+static int soaio_starting, soaio_idle, soaio_queued;
+static struct unrhdr *soaio_kproc_unr;
+
+static int soaio_max_procs = MAX_AIO_PROCS;
+SYSCTL_INT(_kern_ipc_aio, OID_AUTO, max_procs, CTLFLAG_RW, &soaio_max_procs, 0,
+ "Maximum number of kernel processes to use for async socket IO");
+
+static int soaio_num_procs;
+SYSCTL_INT(_kern_ipc_aio, OID_AUTO, num_procs, CTLFLAG_RD, &soaio_num_procs, 0,
+ "Number of active kernel processes for async socket IO");
+
+static int soaio_target_procs = TARGET_AIO_PROCS;
+SYSCTL_INT(_kern_ipc_aio, OID_AUTO, target_procs, CTLFLAG_RD,
+ &soaio_target_procs, 0,
+ "Preferred number of ready kernel processes for async socket IO");
+
+static int soaio_lifetime;
+SYSCTL_INT(_kern_ipc_aio, OID_AUTO, lifetime, CTLFLAG_RW, &soaio_lifetime, 0,
+ "Maximum lifetime for idle aiod");
+
+static void
+soaio_kproc_loop(void *arg)
+{
+ struct proc *p;
+ struct vmspace *myvm;
+ struct task *task;
+ int error, id, pending;
+
+ id = (intptr_t)arg;
+
+ /*
+ * Grab an extra reference on the daemon's vmspace so that it
+ * doesn't get freed by jobs that switch to a different
+ * vmspace.
+ */
+ p = curproc;
+ myvm = vmspace_acquire_ref(p);
+
+ mtx_lock(&soaio_jobs_lock);
+ MPASS(soaio_starting > 0);
+ soaio_starting--;
+ for (;;) {
+ while (!STAILQ_EMPTY(&soaio_jobs)) {
+ task = STAILQ_FIRST(&soaio_jobs);
+ STAILQ_REMOVE_HEAD(&soaio_jobs, ta_link);
+ soaio_queued--;
+ pending = task->ta_pending;
+ task->ta_pending = 0;
+ mtx_unlock(&soaio_jobs_lock);
+
+ task->ta_func(task->ta_context, pending);
+
+ mtx_lock(&soaio_jobs_lock);
+ }
+ MPASS(soaio_queued == 0);
+
+ if (p->p_vmspace != myvm) {
+ mtx_unlock(&soaio_jobs_lock);
+ vmspace_switch_aio(myvm);
+ mtx_lock(&soaio_jobs_lock);
+ continue;
+ }
+
+ soaio_idle++;
+ error = mtx_sleep(&soaio_idle, &soaio_jobs_lock, 0, "-",
+ soaio_lifetime);
+ soaio_idle--;
+ if (error == EWOULDBLOCK && STAILQ_EMPTY(&soaio_jobs) &&
+ soaio_num_procs > soaio_target_procs)
+ break;
+ }
+ soaio_num_procs--;
+ mtx_unlock(&soaio_jobs_lock);
+ free_unr(soaio_kproc_unr, id);
+ kproc_exit(0);
+}
+
+static void
+soaio_kproc_create(void *context, int pending)
+{
+ struct proc *p;
+ int error, id;
+
+ mtx_lock(&soaio_jobs_lock);
+ for (;;) {
+ if (soaio_num_procs < soaio_target_procs) {
+ /* Must create */
+ } else if (soaio_num_procs >= soaio_max_procs) {
+ /*
+ * Hit the limit on kernel processes, don't
+ * create another one.
+ */
+ break;
+ } else if (soaio_queued <= soaio_idle + soaio_starting) {
+ /*
+ * No more AIO jobs waiting for a process to be
+ * created, so stop.
+ */
+ break;
+ }
+ soaio_starting++;
+ mtx_unlock(&soaio_jobs_lock);
+
+ id = alloc_unr(soaio_kproc_unr);
+ error = kproc_create(soaio_kproc_loop, (void *)(intptr_t)id,
+ &p, 0, 0, "soaiod%d", id);
+ if (error != 0) {
+ free_unr(soaio_kproc_unr, id);
+ mtx_lock(&soaio_jobs_lock);
+ soaio_starting--;
+ break;
+ }
+
+ mtx_lock(&soaio_jobs_lock);
+ soaio_num_procs++;
+ }
+ mtx_unlock(&soaio_jobs_lock);
+}
+
+void
+soaio_enqueue(struct task *task)
+{
+
+ mtx_lock(&soaio_jobs_lock);
+ MPASS(task->ta_pending == 0);
+ task->ta_pending++;
+ STAILQ_INSERT_TAIL(&soaio_jobs, task, ta_link);
+ soaio_queued++;
+ if (soaio_queued <= soaio_idle)
+ wakeup_one(&soaio_idle);
+ else if (soaio_num_procs < soaio_max_procs)
+ taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task);
+ mtx_unlock(&soaio_jobs_lock);
+}
+
+static void
+soaio_init(void)
+{
+
+ soaio_lifetime = AIOD_LIFETIME_DEFAULT;
+ STAILQ_INIT(&soaio_jobs);
+ mtx_init(&soaio_jobs_lock, "soaio jobs", NULL, MTX_DEF);
+ soaio_kproc_unr = new_unrhdr(1, INT_MAX, NULL);
+ TASK_INIT(&soaio_kproc_task, 0, soaio_kproc_create, NULL);
+ if (soaio_target_procs > 0)
+ taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task);
+}
+SYSINIT(soaio, SI_SUB_VFS, SI_ORDER_ANY, soaio_init, NULL);
+
+static __inline int
+soaio_ready(struct socket *so, struct sockbuf *sb)
+{
+ return (sb == &so->so_rcv ? soreadable(so) : sowriteable(so));
+}
+
+static void
+soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job)
+{
+ struct ucred *td_savedcred;
+ struct thread *td;
+ struct file *fp;
+ struct uio uio;
+ struct iovec iov;
+ size_t cnt, done;
+ long ru_before;
+ int error, flags;
+
+ SOCKBUF_UNLOCK(sb);
+ aio_switch_vmspace(job);
+ td = curthread;
+ fp = job->fd_file;
+retry:
+ td_savedcred = td->td_ucred;
+ td->td_ucred = job->cred;
+
+ done = job->aio_done;
+ cnt = job->uaiocb.aio_nbytes - done;
+ iov.iov_base = (void *)((uintptr_t)job->uaiocb.aio_buf + done);
+ iov.iov_len = cnt;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_resid = cnt;
+ uio.uio_segflg = UIO_USERSPACE;
+ uio.uio_td = td;
+ flags = MSG_NBIO;
+
+ /*
+ * For resource usage accounting, only count a completed request
+ * as a single message to avoid counting multiple calls to
+ * sosend/soreceive on a blocking socket.
+ */
+
+ if (sb == &so->so_rcv) {
+ uio.uio_rw = UIO_READ;
+ ru_before = td->td_ru.ru_msgrcv;
+#ifdef MAC
+ error = mac_socket_check_receive(fp->f_cred, so);
+ if (error == 0)
+
+#endif
+ error = soreceive(so, NULL, &uio, NULL, NULL, &flags);
+ if (td->td_ru.ru_msgrcv != ru_before)
+ job->msgrcv = 1;
+ } else {
+ uio.uio_rw = UIO_WRITE;
+ ru_before = td->td_ru.ru_msgsnd;
+#ifdef MAC
+ error = mac_socket_check_send(fp->f_cred, so);
+ if (error == 0)
+#endif
+ error = sosend(so, NULL, &uio, NULL, NULL, flags, td);
+ if (td->td_ru.ru_msgsnd != ru_before)
+ job->msgsnd = 1;
+ if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
+ PROC_LOCK(job->userproc);
+ kern_psignal(job->userproc, SIGPIPE);
+ PROC_UNLOCK(job->userproc);
+ }
+ }
+
+ done += cnt - uio.uio_resid;
+ job->aio_done = done;
+ td->td_ucred = td_savedcred;
+
+ if (error == EWOULDBLOCK) {
+ /*
+ * The request was either partially completed or not
+ * completed at all due to racing with a read() or
+ * write() on the socket. If the socket is
+ * non-blocking, return with any partial completion.
+ * If the socket is blocking or if no progress has
+ * been made, requeue this request at the head of the
+ * queue to try again when the socket is ready.
+ */
+ MPASS(done != job->uaiocb.aio_nbytes);
+ SOCKBUF_LOCK(sb);
+ if (done == 0 || !(so->so_state & SS_NBIO)) {
+ empty_results++;
+ if (soaio_ready(so, sb)) {
+ empty_retries++;
+ SOCKBUF_UNLOCK(sb);
+ goto retry;
+ }
+
+ if (!aio_set_cancel_function(job, soo_aio_cancel)) {
+ SOCKBUF_UNLOCK(sb);
+ if (done != 0)
+ aio_complete(job, done, 0);
+ else
+ aio_cancel(job);
+ SOCKBUF_LOCK(sb);
+ } else {
+ TAILQ_INSERT_HEAD(&sb->sb_aiojobq, job, list);
+ }
+ return;
+ }
+ SOCKBUF_UNLOCK(sb);
+ }
+ if (done != 0 && (error == ERESTART || error == EINTR ||
+ error == EWOULDBLOCK))
+ error = 0;
+ if (error)
+ aio_complete(job, -1, error);
+ else
+ aio_complete(job, done, 0);
+ SOCKBUF_LOCK(sb);
+}
+
+static void
+soaio_process_sb(struct socket *so, struct sockbuf *sb)
+{
+ struct kaiocb *job;
+
+ SOCKBUF_LOCK(sb);
+ while (!TAILQ_EMPTY(&sb->sb_aiojobq) && soaio_ready(so, sb)) {
+ job = TAILQ_FIRST(&sb->sb_aiojobq);
+ TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
+ if (!aio_clear_cancel_function(job))
+ continue;
+
+ soaio_process_job(so, sb, job);
+ }
+
+ /*
+ * If there are still pending requests, the socket must not be
+ * ready so set SB_AIO to request a wakeup when the socket
+ * becomes ready.
+ */
+ if (!TAILQ_EMPTY(&sb->sb_aiojobq))
+ sb->sb_flags |= SB_AIO;
+ sb->sb_flags &= ~SB_AIO_RUNNING;
+ SOCKBUF_UNLOCK(sb);
+
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ sorele(so);
+}
+
+void
+soaio_rcv(void *context, int pending)
+{
+ struct socket *so;
+
+ so = context;
+ soaio_process_sb(so, &so->so_rcv);
+}
+
+void
+soaio_snd(void *context, int pending)
+{
+ struct socket *so;
+
+ so = context;
+ soaio_process_sb(so, &so->so_snd);
+}
+
+void
+sowakeup_aio(struct socket *so, struct sockbuf *sb)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ sb->sb_flags &= ~SB_AIO;
+ if (sb->sb_flags & SB_AIO_RUNNING)
+ return;
+ sb->sb_flags |= SB_AIO_RUNNING;
+ if (sb == &so->so_snd)
+ SOCK_LOCK(so);
+ soref(so);
+ if (sb == &so->so_snd)
+ SOCK_UNLOCK(so);
+ soaio_enqueue(&sb->sb_aiotask);
+}
+
+static void
+soo_aio_cancel(struct kaiocb *job)
+{
+ struct socket *so;
+ struct sockbuf *sb;
+ long done;
+ int opcode;
+
+ so = job->fd_file->f_data;
+ opcode = job->uaiocb.aio_lio_opcode;
+ if (opcode == LIO_READ)
+ sb = &so->so_rcv;
+ else {
+ MPASS(opcode == LIO_WRITE);
+ sb = &so->so_snd;
+ }
+
+ SOCKBUF_LOCK(sb);
+ if (!aio_cancel_cleared(job))
+ TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
+ if (TAILQ_EMPTY(&sb->sb_aiojobq))
+ sb->sb_flags &= ~SB_AIO;
+ SOCKBUF_UNLOCK(sb);
+
+ done = job->aio_done;
+ if (done != 0)
+ aio_complete(job, done, 0);
+ else
+ aio_cancel(job);
+}
+
+static int
+soo_aio_queue(struct file *fp, struct kaiocb *job)
+{
+ struct socket *so;
+ struct sockbuf *sb;
+ int error;
+
+ so = fp->f_data;
+ error = (*so->so_proto->pr_usrreqs->pru_aio_queue)(so, job);
+ if (error == 0)
+ return (0);
+
+ switch (job->uaiocb.aio_lio_opcode) {
+ case LIO_READ:
+ sb = &so->so_rcv;
+ break;
+ case LIO_WRITE:
+ sb = &so->so_snd;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ SOCKBUF_LOCK(sb);
+ if (!aio_set_cancel_function(job, soo_aio_cancel))
+ panic("new job was cancelled");
+ TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list);
+ if (!(sb->sb_flags & SB_AIO_RUNNING)) {
+ if (soaio_ready(so, sb))
+ sowakeup_aio(so, sb);
+ else
+ sb->sb_flags |= SB_AIO;
+ }
+ SOCKBUF_UNLOCK(sb);
+ return (0);
+}
+#endif /* __rtems__ */
#ifdef __rtems__
static int
rtems_bsd_soo_open(rtems_libio_t *iop, const char *path, int oflag,
diff --git a/freebsd/sys/kern/uipc_accf.c b/freebsd/sys/kern/uipc_accf.c
index 396e9078..6ecea0eb 100644
--- a/freebsd/sys/kern/uipc_accf.c
+++ b/freebsd/sys/kern/uipc_accf.c
@@ -62,9 +62,8 @@ MALLOC_DEFINE(M_ACCF, "accf", "accept filter data");
static int unloadable = 0;
-SYSCTL_DECL(_net_inet); /* XXX: some header should do this for me */
-SYSCTL_NODE(_net_inet, OID_AUTO, accf, CTLFLAG_RW, 0, "Accept filters");
-SYSCTL_INT(_net_inet_accf, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0,
+SYSCTL_NODE(_net, OID_AUTO, accf, CTLFLAG_RW, 0, "Accept filters");
+SYSCTL_INT(_net_accf, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0,
"Allow unload of accept filters (not recommended)");
/*
diff --git a/freebsd/sys/kern/uipc_domain.c b/freebsd/sys/kern/uipc_domain.c
index 3af4448c..7c0e7085 100644
--- a/freebsd/sys/kern/uipc_domain.c
+++ b/freebsd/sys/kern/uipc_domain.c
@@ -48,8 +48,6 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
-#include <vm/uma.h>
-
/*
* System initialization
*
@@ -139,9 +137,12 @@ protosw_init(struct protosw *pr)
#define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
DEFAULT(pu->pru_accept, pru_accept_notsupp);
+ DEFAULT(pu->pru_aio_queue, pru_aio_queue_notsupp);
DEFAULT(pu->pru_bind, pru_bind_notsupp);
+ DEFAULT(pu->pru_bindat, pru_bindat_notsupp);
DEFAULT(pu->pru_connect, pru_connect_notsupp);
DEFAULT(pu->pru_connect2, pru_connect2_notsupp);
+ DEFAULT(pu->pru_connectat, pru_connectat_notsupp);
DEFAULT(pu->pru_control, pru_control_notsupp);
DEFAULT(pu->pru_disconnect, pru_disconnect_notsupp);
DEFAULT(pu->pru_listen, pru_listen_notsupp);
@@ -154,6 +155,7 @@ protosw_init(struct protosw *pr)
DEFAULT(pu->pru_sosend, sosend_generic);
DEFAULT(pu->pru_soreceive, soreceive_generic);
DEFAULT(pu->pru_sopoll, sopoll_generic);
+ DEFAULT(pu->pru_ready, pru_ready_notsupp);
#undef DEFAULT
if (pr->pr_init)
(*pr->pr_init)();
@@ -196,11 +198,7 @@ void
vnet_domain_uninit(void *arg)
{
struct domain *dp = arg;
- struct protosw *pr;
- for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
- if (pr->pr_destroy)
- (*pr->pr_destroy)();
if (dp->dom_destroy)
(*dp->dom_destroy)();
}
@@ -249,8 +247,8 @@ domaininit(void *dummy)
if (max_linkhdr < 16) /* XXX */
max_linkhdr = 16;
- callout_init(&pffast_callout, CALLOUT_MPSAFE);
- callout_init(&pfslow_callout, CALLOUT_MPSAFE);
+ callout_init(&pffast_callout, 1);
+ callout_init(&pfslow_callout, 1);
mtx_lock(&dom_mtx);
KASSERT(domain_init_status == 0, ("domaininit called too late!"));
@@ -272,21 +270,31 @@ domainfinalize(void *dummy)
callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
}
+struct domain *
+pffinddomain(int family)
+{
+ struct domain *dp;
+
+ for (dp = domains; dp != NULL; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ return (dp);
+ return (NULL);
+}
+
struct protosw *
pffindtype(int family, int type)
{
struct domain *dp;
struct protosw *pr;
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (0);
-found:
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (NULL);
+
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_type && pr->pr_type == type)
return (pr);
- return (0);
+ return (NULL);
}
struct protosw *
@@ -294,21 +302,22 @@ pffindproto(int family, int protocol, int type)
{
struct domain *dp;
struct protosw *pr;
- struct protosw *maybe = 0;
+ struct protosw *maybe;
+ maybe = NULL;
if (family == 0)
- return (0);
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (0);
-found:
+ return (NULL);
+
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (NULL);
+
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
return (pr);
if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
- pr->pr_protocol == 0 && maybe == (struct protosw *)0)
+ pr->pr_protocol == 0 && maybe == NULL)
maybe = pr;
}
return (maybe);
@@ -336,12 +345,10 @@ pf_proto_register(int family, struct protosw *npr)
return (ENXIO);
/* Try to find the specified domain based on the family. */
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (EPFNOSUPPORT);
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (EPFNOSUPPORT);
-found:
/* Initialize backpointer to struct domain. */
npr->pr_domain = dp;
fpr = NULL;
@@ -407,12 +414,10 @@ pf_proto_unregister(int family, int protocol, int type)
return (EPROTOTYPE);
/* Try to find the specified domain based on the family type. */
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (EPFNOSUPPORT);
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (EPFNOSUPPORT);
-found:
dpr = NULL;
/* Lock out everyone else while we are manipulating the protosw. */
diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c
index abf4dd3e..db4975ca 100644
--- a/freebsd/sys/kern/uipc_mbuf.c
+++ b/freebsd/sys/kern/uipc_mbuf.c
@@ -49,6 +49,51 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/uio.h>
+#include <sys/sdt.h>
+
+SDT_PROBE_DEFINE5_XLATE(sdt, , , m__init,
+ "struct mbuf *", "mbufinfo_t *",
+ "uint32_t", "uint32_t",
+ "uint16_t", "uint16_t",
+ "uint32_t", "uint32_t",
+ "uint32_t", "uint32_t");
+
+SDT_PROBE_DEFINE3_XLATE(sdt, , , m__gethdr,
+ "uint32_t", "uint32_t",
+ "uint16_t", "uint16_t",
+ "struct mbuf *", "mbufinfo_t *");
+
+SDT_PROBE_DEFINE3_XLATE(sdt, , , m__get,
+ "uint32_t", "uint32_t",
+ "uint16_t", "uint16_t",
+ "struct mbuf *", "mbufinfo_t *");
+
+SDT_PROBE_DEFINE4_XLATE(sdt, , , m__getcl,
+ "uint32_t", "uint32_t",
+ "uint16_t", "uint16_t",
+ "uint32_t", "uint32_t",
+ "struct mbuf *", "mbufinfo_t *");
+
+SDT_PROBE_DEFINE3_XLATE(sdt, , , m__clget,
+ "struct mbuf *", "mbufinfo_t *",
+ "uint32_t", "uint32_t",
+ "uint32_t", "uint32_t");
+
+SDT_PROBE_DEFINE4_XLATE(sdt, , , m__cljget,
+ "struct mbuf *", "mbufinfo_t *",
+ "uint32_t", "uint32_t",
+ "uint32_t", "uint32_t",
+ "void*", "void*");
+
+SDT_PROBE_DEFINE(sdt, , , m__cljset);
+
+SDT_PROBE_DEFINE1_XLATE(sdt, , , m__free,
+ "struct mbuf *", "mbufinfo_t *");
+
+SDT_PROBE_DEFINE1_XLATE(sdt, , , m__freem,
+ "struct mbuf *", "mbufinfo_t *");
+
+#include <security/mac/mac_framework.h>
int max_linkhdr;
int max_protohdr;
@@ -87,224 +132,93 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
#endif
/*
- * Allocate a given length worth of mbufs and/or clusters (whatever fits
- * best) and return a pointer to the top of the allocated chain. If an
- * existing mbuf chain is provided, then we will append the new chain
- * to the existing one but still return the top of the newly allocated
- * chain.
+ * Ensure the correct size of various mbuf parameters. It could be off due
+ * to compiler-induced padding and alignment artifacts.
*/
-struct mbuf *
-m_getm2(struct mbuf *m, int len, int how, short type, int flags)
-{
- struct mbuf *mb, *nm = NULL, *mtail = NULL;
-
- KASSERT(len >= 0, ("%s: len is < 0", __func__));
-
- /* Validate flags. */
- flags &= (M_PKTHDR | M_EOR);
-
- /* Packet header mbuf must be first in chain. */
- if ((flags & M_PKTHDR) && m != NULL)
- flags &= ~M_PKTHDR;
-
- /* Loop and append maximum sized mbufs to the chain tail. */
- while (len > 0) {
- if (len > MCLBYTES)
- mb = m_getjcl(how, type, (flags & M_PKTHDR),
- MJUMPAGESIZE);
- else if (len >= MINCLSIZE)
- mb = m_getcl(how, type, (flags & M_PKTHDR));
- else if (flags & M_PKTHDR)
- mb = m_gethdr(how, type);
- else
- mb = m_get(how, type);
-
- /* Fail the whole operation if one mbuf can't be allocated. */
- if (mb == NULL) {
- if (nm != NULL)
- m_freem(nm);
- return (NULL);
- }
-
- /* Book keeping. */
- len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size :
- ((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN);
- if (mtail != NULL)
- mtail->m_next = mb;
- else
- nm = mb;
- mtail = mb;
- flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */
- }
- if (flags & M_EOR)
- mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */
-
- /* If mbuf was supplied, append new chain to the end of it. */
- if (m != NULL) {
- for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next)
- ;
- mtail->m_next = nm;
- mtail->m_flags &= ~M_EOR;
- } else
- m = nm;
-
- return (m);
-}
+CTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN);
+CTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN);
/*
- * Free an entire chain of mbufs and associated external buffers, if
- * applicable.
+ * mbuf data storage should be 64-bit aligned regardless of architectural
+ * pointer size; check this is the case with and without a packet header.
*/
-void
-m_freem(struct mbuf *mb)
-{
+CTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0);
+CTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0);
- while (mb != NULL)
- mb = m_free(mb);
-}
-
-/*-
- * Configure a provided mbuf to refer to the provided external storage
- * buffer and setup a reference count for said buffer. If the setting
- * up of the reference count fails, the M_EXT bit will not be set. If
- * successfull, the M_EXT bit is set in the mbuf's flags.
- *
- * Arguments:
- * mb The existing mbuf to which to attach the provided buffer.
- * buf The address of the provided external storage buffer.
- * size The size of the provided buffer.
- * freef A pointer to a routine that is responsible for freeing the
- * provided external storage buffer.
- * args A pointer to an argument structure (of any type) to be passed
- * to the provided freef routine (may be NULL).
- * flags Any other flags to be passed to the provided mbuf.
- * type The type that the external storage buffer should be
- * labeled with.
+/*
+ * While the specific values here don't matter too much (i.e., +/- a few
+ * words), we do want to ensure that changes to these values are carefully
+ * reasoned about and properly documented. This is especially the case as
+ * network-protocol and device-driver modules encode these layouts, and must
+ * be recompiled if the structures change. Check these values at compile time
+ * against the ones documented in comments in mbuf.h.
*
- * Returns:
- * Nothing.
+ * NB: Possibly they should be documented there via #define's and not just
+ * comments.
*/
-void
-m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
- void (*freef)(void *, void *), void *arg1, void *arg2, int flags, int type)
-{
- KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
-
- if (type != EXT_EXTREF)
- mb->m_ext.ref_cnt = (u_int *)uma_zalloc(zone_ext_refcnt, M_NOWAIT);
- if (mb->m_ext.ref_cnt != NULL) {
- *(mb->m_ext.ref_cnt) = 1;
- mb->m_flags |= (M_EXT | flags);
- mb->m_ext.ext_buf = buf;
- mb->m_data = mb->m_ext.ext_buf;
- mb->m_ext.ext_size = size;
- mb->m_ext.ext_free = freef;
- mb->m_ext.ext_arg1 = arg1;
- mb->m_ext.ext_arg2 = arg2;
- mb->m_ext.ext_type = type;
- }
-}
+#if defined(__LP64__)
+CTASSERT(offsetof(struct mbuf, m_dat) == 32);
+CTASSERT(sizeof(struct pkthdr) == 56);
+CTASSERT(sizeof(struct m_ext) == 48);
+#else
+CTASSERT(offsetof(struct mbuf, m_dat) == 24);
+CTASSERT(sizeof(struct pkthdr) == 48);
+CTASSERT(sizeof(struct m_ext) == 28);
+#endif
/*
- * Non-directly-exported function to clean up after mbufs with M_EXT
- * storage attached to them if the reference count hits 1.
+ * Assert that the queue(3) macros produce code of the same size as an old
+ * plain pointer does.
*/
-void
-mb_free_ext(struct mbuf *m)
-{
- int skipmbuf;
-
- KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
- KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
-
-
- /*
- * check if the header is embedded in the cluster
- */
- skipmbuf = (m->m_flags & M_NOFREE);
-
- /* Free attached storage if this mbuf is the only reference to it. */
- if (*(m->m_ext.ref_cnt) == 1 ||
- atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) {
- switch (m->m_ext.ext_type) {
- case EXT_PACKET: /* The packet zone is special. */
- if (*(m->m_ext.ref_cnt) == 0)
- *(m->m_ext.ref_cnt) = 1;
- uma_zfree(zone_pack, m);
- return; /* Job done. */
- case EXT_CLUSTER:
- uma_zfree(zone_clust, m->m_ext.ext_buf);
- break;
- case EXT_JUMBOP:
- uma_zfree(zone_jumbop, m->m_ext.ext_buf);
- break;
- case EXT_JUMBO9:
- uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
- break;
- case EXT_JUMBO16:
- uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
- break;
- case EXT_SFBUF:
- case EXT_NET_DRV:
- case EXT_MOD_TYPE:
- case EXT_DISPOSABLE:
- *(m->m_ext.ref_cnt) = 0;
- uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
- m->m_ext.ref_cnt));
- /* FALLTHROUGH */
- case EXT_EXTREF:
- KASSERT(m->m_ext.ext_free != NULL,
- ("%s: ext_free not set", __func__));
- (*(m->m_ext.ext_free))(m->m_ext.ext_arg1,
- m->m_ext.ext_arg2);
- break;
- default:
- KASSERT(m->m_ext.ext_type == 0,
- ("%s: unknown ext_type", __func__));
- }
- }
- if (skipmbuf)
- return;
-
- /*
- * Free this mbuf back to the mbuf zone with all m_ext
- * information purged.
- */
- m->m_ext.ext_buf = NULL;
- m->m_ext.ext_free = NULL;
- m->m_ext.ext_arg1 = NULL;
- m->m_ext.ext_arg2 = NULL;
- m->m_ext.ref_cnt = NULL;
- m->m_ext.ext_size = 0;
- m->m_ext.ext_type = 0;
- m->m_flags &= ~M_EXT;
- uma_zfree(zone_mbuf, m);
-}
+#ifdef INVARIANTS
+static struct mbuf m_assertbuf;
+CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next));
+CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next));
+CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt));
+CTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt));
+#endif
/*
* Attach the cluster from *m to *n, set up m_ext in *n
* and bump the refcount of the cluster.
*/
-static void
+void
mb_dupcl(struct mbuf *n, struct mbuf *m)
{
- KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
- KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__));
- KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
+ volatile u_int *refcnt;
- if (*(m->m_ext.ref_cnt) == 1)
- *(m->m_ext.ref_cnt) += 1;
- else
- atomic_add_int(m->m_ext.ref_cnt, 1);
- n->m_ext.ext_buf = m->m_ext.ext_buf;
- n->m_ext.ext_free = m->m_ext.ext_free;
- n->m_ext.ext_arg1 = m->m_ext.ext_arg1;
- n->m_ext.ext_arg2 = m->m_ext.ext_arg2;
- n->m_ext.ext_size = m->m_ext.ext_size;
- n->m_ext.ref_cnt = m->m_ext.ref_cnt;
- n->m_ext.ext_type = m->m_ext.ext_type;
+ KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
+ KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n));
+
+ n->m_ext = m->m_ext;
n->m_flags |= M_EXT;
n->m_flags |= m->m_flags & M_RDONLY;
+
+ /* See if this is the mbuf that holds the embedded refcount. */
+ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+ refcnt = n->m_ext.ext_cnt = &m->m_ext.ext_count;
+ n->m_ext.ext_flags &= ~EXT_FLAG_EMBREF;
+ } else {
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ refcnt = m->m_ext.ext_cnt;
+ }
+
+ if (*refcnt == 1)
+ *refcnt += 1;
+ else
+ atomic_add_int(refcnt, 1);
+}
+
+void
+m_demote_pkthdr(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+
+ m_tag_delete_chain(m, NULL);
+ m->m_flags &= ~M_PKTHDR;
+ bzero(&m->m_pkthdr, sizeof(struct pkthdr));
}
/*
@@ -313,23 +227,16 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
* cleaned too.
*/
void
-m_demote(struct mbuf *m0, int all)
+m_demote(struct mbuf *m0, int all, int flags)
{
struct mbuf *m;
for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) {
- if (m->m_flags & M_PKTHDR) {
- m_tag_delete_chain(m, NULL);
- m->m_flags &= ~M_PKTHDR;
- bzero(&m->m_pkthdr, sizeof(struct pkthdr));
- }
- if (m != m0 && m->m_nextpkt != NULL) {
- KASSERT(m->m_nextpkt == NULL,
- ("%s: m_nextpkt not NULL", __func__));
- m_freem(m->m_nextpkt);
- m->m_nextpkt = NULL;
- }
- m->m_flags = m->m_flags & (M_EXT|M_RDONLY|M_FREELIST|M_NOFREE);
+ KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p",
+ __func__, m, m0));
+ if (m->m_flags & M_PKTHDR)
+ m_demote_pkthdr(m);
+ m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
}
}
@@ -349,7 +256,7 @@ m_sanity(struct mbuf *m0, int sanitize)
#ifdef INVARIANTS
#define M_SANITY_ACTION(s) panic("mbuf %p: " s, m)
-#else
+#else
#define M_SANITY_ACTION(s) printf("mbuf %p: " s, m)
#endif
@@ -359,22 +266,14 @@ m_sanity(struct mbuf *m0, int sanitize)
* unrelated kernel memory before or after us is trashed.
* No way to recover from that.
*/
- a = ((m->m_flags & M_EXT) ? m->m_ext.ext_buf :
- ((m->m_flags & M_PKTHDR) ? (caddr_t)(&m->m_pktdat) :
- (caddr_t)(&m->m_dat)) );
- b = (caddr_t)(a + (m->m_flags & M_EXT ? m->m_ext.ext_size :
- ((m->m_flags & M_PKTHDR) ? MHLEN : MLEN)));
+ a = M_START(m);
+ b = a + M_SIZE(m);
if ((caddr_t)m->m_data < a)
M_SANITY_ACTION("m_data outside mbuf data range left");
if ((caddr_t)m->m_data > b)
M_SANITY_ACTION("m_data outside mbuf data range right");
if ((caddr_t)m->m_data + m->m_len > b)
M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
- if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.header) {
- if ((caddr_t)m->m_pkthdr.header < a ||
- (caddr_t)m->m_pkthdr.header > b)
- M_SANITY_ACTION("m_pkthdr.header outside mbuf data range");
- }
/* m->m_nextpkt may only be set on first mbuf in chain. */
if (m != m0 && m->m_nextpkt != NULL) {
@@ -421,6 +320,26 @@ m_sanity(struct mbuf *m0, int sanitize)
#undef M_SANITY_ACTION
}
+/*
+ * Non-inlined part of m_init().
+ */
+int
+m_pkthdr_init(struct mbuf *m, int how)
+{
+#ifdef MAC
+ int error;
+#endif
+ m->m_data = m->m_pktdat;
+ bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
+#ifdef MAC
+ /* If the label init fails, fail the alloc */
+ error = mac_mbuf_init(m, how);
+ if (error)
+ return (error);
+#endif
+
+ return (0);
+}
/*
* "Move" mbuf pkthdr from "from" to "to".
@@ -458,14 +377,14 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from)
* In particular, this does a deep copy of the packet tags.
*/
int
-m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
+m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
{
#if 0
/*
* The mbuf allocator only initializes the pkthdr
- * when the mbuf is allocated with MGETHDR. Many users
- * (e.g. m_copy*, m_prepend) use MGET and then
+ * when the mbuf is allocated with m_gethdr(). Many users
+ * (e.g. m_copy*, m_prepend) use m_get() and then
* smash the pkthdr as needed causing these
* assertions to trip. For now just disable them.
*/
@@ -483,7 +402,7 @@ m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr;
SLIST_INIT(&to->m_pkthdr.tags);
- return (m_tag_copy_chain(to, from, MBTOM(how)));
+ return (m_tag_copy_chain(to, from, how));
}
/*
@@ -497,24 +416,19 @@ m_prepend(struct mbuf *m, int len, int how)
struct mbuf *mn;
if (m->m_flags & M_PKTHDR)
- MGETHDR(mn, how, m->m_type);
+ mn = m_gethdr(how, m->m_type);
else
- MGET(mn, how, m->m_type);
+ mn = m_get(how, m->m_type);
if (mn == NULL) {
m_freem(m);
return (NULL);
}
if (m->m_flags & M_PKTHDR)
- M_MOVE_PKTHDR(mn, m);
+ m_move_pkthdr(mn, m);
mn->m_next = m;
m = mn;
- if(m->m_flags & M_PKTHDR) {
- if (len < MHLEN)
- MH_ALIGN(m, len);
- } else {
- if (len < MLEN)
- M_ALIGN(m, len);
- }
+ if (len < M_SIZE(m))
+ M_ALIGN(m, len);
m->m_len = len;
return (m);
}
@@ -522,7 +436,7 @@ m_prepend(struct mbuf *m, int len, int how)
/*
* Make a copy of an mbuf chain starting "off0" bytes from the beginning,
* continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
- * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
+ * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller.
* Note that the copy is read-only, because clusters are not copied,
* only their reference counts are incremented.
*/
@@ -547,17 +461,17 @@ m_copym(struct mbuf *m, int off0, int len, int wait)
m = m->m_next;
}
np = &top;
- top = 0;
+ top = NULL;
while (len > 0) {
if (m == NULL) {
- KASSERT(len == M_COPYALL,
+ KASSERT(len == M_COPYALL,
("m_copym, length > size of mbuf chain"));
break;
}
if (copyhdr)
- MGETHDR(n, wait, m->m_type);
+ n = m_gethdr(wait, m->m_type);
else
- MGET(n, wait, m->m_type);
+ n = m_get(wait, m->m_type);
*np = n;
if (n == NULL)
goto nospace;
@@ -583,165 +497,14 @@ m_copym(struct mbuf *m, int off0, int len, int wait)
m = m->m_next;
np = &n->m_next;
}
- if (top == NULL)
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (top);
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
/*
- * Returns mbuf chain with new head for the prepending case.
- * Copies from mbuf (chain) n from off for len to mbuf (chain) m
- * either prepending or appending the data.
- * The resulting mbuf (chain) m is fully writeable.
- * m is destination (is made writeable)
- * n is source, off is offset in source, len is len from offset
- * dir, 0 append, 1 prepend
- * how, wait or nowait
- */
-
-static int
-m_bcopyxxx(void *s, void *t, u_int len)
-{
- bcopy(s, t, (size_t)len);
- return 0;
-}
-
-struct mbuf *
-m_copymdata(struct mbuf *m, struct mbuf *n, int off, int len,
- int prep, int how)
-{
- struct mbuf *mm, *x, *z, *prev = NULL;
- caddr_t p;
- int i, nlen = 0;
- caddr_t buf[MLEN];
-
- KASSERT(m != NULL && n != NULL, ("m_copymdata, no target or source"));
- KASSERT(off >= 0, ("m_copymdata, negative off %d", off));
- KASSERT(len >= 0, ("m_copymdata, negative len %d", len));
- KASSERT(prep == 0 || prep == 1, ("m_copymdata, unknown direction %d", prep));
-
- mm = m;
- if (!prep) {
- while(mm->m_next) {
- prev = mm;
- mm = mm->m_next;
- }
- }
- for (z = n; z != NULL; z = z->m_next)
- nlen += z->m_len;
- if (len == M_COPYALL)
- len = nlen - off;
- if (off + len > nlen || len < 1)
- return NULL;
-
- if (!M_WRITABLE(mm)) {
- /* XXX: Use proper m_xxx function instead. */
- x = m_getcl(how, MT_DATA, mm->m_flags);
- if (x == NULL)
- return NULL;
- bcopy(mm->m_ext.ext_buf, x->m_ext.ext_buf, x->m_ext.ext_size);
- p = x->m_ext.ext_buf + (mm->m_data - mm->m_ext.ext_buf);
- x->m_data = p;
- mm->m_next = NULL;
- if (mm != m)
- prev->m_next = x;
- m_free(mm);
- mm = x;
- }
-
- /*
- * Append/prepend the data. Allocating mbufs as necessary.
- */
- /* Shortcut if enough free space in first/last mbuf. */
- if (!prep && M_TRAILINGSPACE(mm) >= len) {
- m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t) +
- mm->m_len);
- mm->m_len += len;
- mm->m_pkthdr.len += len;
- return m;
- }
- if (prep && M_LEADINGSPACE(mm) >= len) {
- mm->m_data = mtod(mm, caddr_t) - len;
- m_apply(n, off, len, m_bcopyxxx, mtod(mm, caddr_t));
- mm->m_len += len;
- mm->m_pkthdr.len += len;
- return mm;
- }
-
- /* Expand first/last mbuf to cluster if possible. */
- if (!prep && !(mm->m_flags & M_EXT) && len > M_TRAILINGSPACE(mm)) {
- bcopy(mm->m_data, &buf, mm->m_len);
- m_clget(mm, how);
- if (!(mm->m_flags & M_EXT))
- return NULL;
- bcopy(&buf, mm->m_ext.ext_buf, mm->m_len);
- mm->m_data = mm->m_ext.ext_buf;
- mm->m_pkthdr.header = NULL;
- }
- if (prep && !(mm->m_flags & M_EXT) && len > M_LEADINGSPACE(mm)) {
- bcopy(mm->m_data, &buf, mm->m_len);
- m_clget(mm, how);
- if (!(mm->m_flags & M_EXT))
- return NULL;
- bcopy(&buf, (caddr_t *)mm->m_ext.ext_buf +
- mm->m_ext.ext_size - mm->m_len, mm->m_len);
- mm->m_data = (caddr_t)mm->m_ext.ext_buf +
- mm->m_ext.ext_size - mm->m_len;
- mm->m_pkthdr.header = NULL;
- }
-
- /* Append/prepend as many mbuf (clusters) as necessary to fit len. */
- if (!prep && len > M_TRAILINGSPACE(mm)) {
- if (!m_getm(mm, len - M_TRAILINGSPACE(mm), how, MT_DATA))
- return NULL;
- }
- if (prep && len > M_LEADINGSPACE(mm)) {
- if (!(z = m_getm(NULL, len - M_LEADINGSPACE(mm), how, MT_DATA)))
- return NULL;
- i = 0;
- for (x = z; x != NULL; x = x->m_next) {
- i += x->m_flags & M_EXT ? x->m_ext.ext_size :
- (x->m_flags & M_PKTHDR ? MHLEN : MLEN);
- if (!x->m_next)
- break;
- }
- z->m_data += i - len;
- m_move_pkthdr(mm, z);
- x->m_next = mm;
- mm = z;
- }
-
- /* Seek to start position in source mbuf. Optimization for long chains. */
- while (off > 0) {
- if (off < n->m_len)
- break;
- off -= n->m_len;
- n = n->m_next;
- }
-
- /* Copy data into target mbuf. */
- z = mm;
- while (len > 0) {
- KASSERT(z != NULL, ("m_copymdata, falling off target edge"));
- i = M_TRAILINGSPACE(z);
- m_apply(n, off, i, m_bcopyxxx, mtod(z, caddr_t) + z->m_len);
- z->m_len += i;
- /* fixup pkthdr.len if necessary */
- if ((prep ? mm : m)->m_flags & M_PKTHDR)
- (prep ? mm : m)->m_pkthdr.len += i;
- off += i;
- len -= i;
- z = z->m_next;
- }
- return (prep ? mm : m);
-}
-
-/*
* Copy an entire packet, including header (which must be present).
* An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
* Note that the copy is read-only, because clusters are not copied,
@@ -756,7 +519,7 @@ m_copypacket(struct mbuf *m, int how)
struct mbuf *top, *n, *o;
MBUF_CHECKSLEEP(how);
- MGET(n, how, m->m_type);
+ n = m_get(how, m->m_type);
top = n;
if (n == NULL)
goto nospace;
@@ -774,7 +537,7 @@ m_copypacket(struct mbuf *m, int how)
m = m->m_next;
while (m) {
- MGET(o, how, m->m_type);
+ o = m_get(how, m->m_type);
if (o == NULL)
goto nospace;
@@ -794,7 +557,6 @@ m_copypacket(struct mbuf *m, int how)
return top;
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -833,7 +595,7 @@ m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
* you need a writable copy of an mbuf chain.
*/
struct mbuf *
-m_dup(struct mbuf *m, int how)
+m_dup(const struct mbuf *m, int how)
{
struct mbuf **p, *top = NULL;
int remain, moff, nsize;
@@ -869,6 +631,7 @@ m_dup(struct mbuf *m, int how)
}
if ((n->m_flags & M_EXT) == 0)
nsize = MHLEN;
+ n->m_flags &= ~M_RDONLY;
}
n->m_len = 0;
@@ -898,7 +661,6 @@ m_dup(struct mbuf *m, int how)
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -927,6 +689,22 @@ m_cat(struct mbuf *m, struct mbuf *n)
}
}
+/*
+ * Concatenate two pkthdr mbuf chains.
+ */
+void
+m_catpkt(struct mbuf *m, struct mbuf *n)
+{
+
+ M_ASSERTPKTHDR(m);
+ M_ASSERTPKTHDR(n);
+
+ m->m_pkthdr.len += n->m_pkthdr.len;
+ m_demote(n, 1, 0);
+
+ m_cat(m, n);
+}
+
void
m_adj(struct mbuf *mp, int req_len)
{
@@ -1002,8 +780,8 @@ m_adj(struct mbuf *mp, int req_len)
/*
* Rearange an mbuf chain so that len bytes are contiguous
- * and in the data area of an mbuf (so that mtod and dtom
- * will work for a structure of size len). Returns the resulting
+ * and in the data area of an mbuf (so that mtod will work
+ * for a structure of size len). Returns the resulting
* mbuf chain on success, frees it and returns null on failure.
* If there is room, it will add up to max_protohdr-len extra bytes to the
* contiguous region in an attempt to avoid being called next time.
@@ -1030,12 +808,11 @@ m_pullup(struct mbuf *n, int len)
} else {
if (len > MHLEN)
goto bad;
- MGET(m, M_DONTWAIT, n->m_type);
+ m = m_get(M_NOWAIT, n->m_type);
if (m == NULL)
goto bad;
- m->m_len = 0;
if (n->m_flags & M_PKTHDR)
- M_MOVE_PKTHDR(m, n);
+ m_move_pkthdr(m, n);
}
space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
do {
@@ -1059,7 +836,6 @@ m_pullup(struct mbuf *n, int len)
return (m);
bad:
m_freem(n);
- mbstat.m_mpfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -1068,8 +844,6 @@ bad:
* the amount of empty space before the data in the new mbuf to be specified
* (in the event that the caller expects to prepend later).
*/
-int MSFail;
-
struct mbuf *
m_copyup(struct mbuf *n, int len, int dstoff)
{
@@ -1078,12 +852,11 @@ m_copyup(struct mbuf *n, int len, int dstoff)
if (len > (MHLEN - dstoff))
goto bad;
- MGET(m, M_DONTWAIT, n->m_type);
+ m = m_get(M_NOWAIT, n->m_type);
if (m == NULL)
goto bad;
- m->m_len = 0;
if (n->m_flags & M_PKTHDR)
- M_MOVE_PKTHDR(m, n);
+ m_move_pkthdr(m, n);
m->m_data += dstoff;
space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
do {
@@ -1107,7 +880,6 @@ m_copyup(struct mbuf *n, int len, int dstoff)
return (m);
bad:
m_freem(n);
- MSFail++;
return (NULL);
}
@@ -1133,8 +905,18 @@ m_split(struct mbuf *m0, int len0, int wait)
if (m == NULL)
return (NULL);
remain = m->m_len - len;
- if (m0->m_flags & M_PKTHDR) {
- MGETHDR(n, wait, m0->m_type);
+ if (m0->m_flags & M_PKTHDR && remain == 0) {
+ n = m_gethdr(wait, m0->m_type);
+ if (n == NULL)
+ return (NULL);
+ n->m_next = m->m_next;
+ m->m_next = NULL;
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ n->m_pkthdr.len = m0->m_pkthdr.len - len0;
+ m0->m_pkthdr.len = len0;
+ return (n);
+ } else if (m0->m_flags & M_PKTHDR) {
+ n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
@@ -1144,7 +926,7 @@ m_split(struct mbuf *m0, int len0, int wait)
goto extpacket;
if (remain > MHLEN) {
/* m can't be the lead packet */
- MH_ALIGN(n, 0);
+ M_ALIGN(n, 0);
n->m_next = m_split(m, len, wait);
if (n->m_next == NULL) {
(void) m_free(n);
@@ -1154,13 +936,13 @@ m_split(struct mbuf *m0, int len0, int wait)
return (n);
}
} else
- MH_ALIGN(n, remain);
+ M_ALIGN(n, remain);
} else if (remain == 0) {
n = m->m_next;
m->m_next = NULL;
return (n);
} else {
- MGET(n, wait, m->m_type);
+ n = m_get(wait, m->m_type);
if (n == NULL)
return (NULL);
M_ALIGN(n, remain);
@@ -1197,10 +979,10 @@ m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
while (totlen > 0) {
if (top == NULL) { /* First one, must be PKTHDR */
if (totlen + off >= MINCLSIZE) {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
len = MCLBYTES;
} else {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
len = MHLEN;
/* Place initial small packet/header at end of mbuf */
@@ -1215,10 +997,10 @@ m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
m->m_pkthdr.len = totlen;
} else {
if (totlen + off >= MINCLSIZE) {
- m = m_getcl(M_DONTWAIT, MT_DATA, 0);
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
len = MCLBYTES;
} else {
- m = m_get(M_DONTWAIT, MT_DATA);
+ m = m_get(M_NOWAIT, MT_DATA);
len = MLEN;
}
if (m == NULL) {
@@ -1262,7 +1044,7 @@ m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
off -= mlen;
totlen += mlen;
if (m->m_next == NULL) {
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
goto out;
bzero(mtod(n, caddr_t), MLEN);
@@ -1286,7 +1068,7 @@ m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
if (len == 0)
break;
if (m->m_next == NULL) {
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
break;
n->m_len = min(MLEN, len);
@@ -1330,7 +1112,7 @@ m_append(struct mbuf *m0, int len, c_caddr_t cp)
* Allocate a new mbuf; could check space
* and allocate a cluster instead.
*/
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
break;
n->m_len = min(MLEN, remainder);
@@ -1498,7 +1280,7 @@ m_defrag(struct mbuf *m0, int how)
goto nospace;
}
#endif
-
+
if (m0->m_pkthdr.len > MHLEN)
m_final = m_getcl(how, MT_DATA, M_PKTHDR);
else
@@ -1664,7 +1446,7 @@ m_fragment(struct mbuf *m0, int how, int length)
if (!(m0->m_flags & M_PKTHDR))
return (m0);
-
+
if ((length == 0) || (length < -2))
return (m0);
@@ -1806,25 +1588,6 @@ m_mbuftouio(struct uio *uio, struct mbuf *m, int len)
}
/*
- * Set the m_data pointer of a newly-allocated mbuf
- * to place an object of the specified size at the
- * end of the mbuf, longword aligned.
- */
-void
-m_align(struct mbuf *m, int len)
-{
- int adjust;
-
- if (m->m_flags & M_EXT)
- adjust = m->m_ext.ext_size - len;
- else if (m->m_flags & M_PKTHDR)
- adjust = MHLEN - len;
- else
- adjust = MLEN - len;
- m->m_data += adjust &~ (sizeof(long)-1);
-}
-
-/*
* Create a writable copy of the mbuf chain. While doing this
* we compact the chain with a goal of producing a chain with
* at most two mbufs. The second mbuf in this chain is likely
@@ -1859,7 +1622,7 @@ m_unshare(struct mbuf *m0, int how)
m->m_len <= M_TRAILINGSPACE(mprev)) {
/* XXX: this ignores mbuf types */
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
- mtod(m, caddr_t), m->m_len);
+ mtod(m, caddr_t), m->m_len);
mprev->m_len += m->m_len;
mprev->m_next = m->m_next; /* unlink from chain */
m_free(m); /* reclaim mbuf */
@@ -1891,7 +1654,7 @@ m_unshare(struct mbuf *m0, int how)
m->m_len <= M_TRAILINGSPACE(mprev)) {
/* XXX: this ignores mbuf types */
memcpy(mtod(mprev, caddr_t) + mprev->m_len,
- mtod(m, caddr_t), m->m_len);
+ mtod(m, caddr_t), m->m_len);
mprev->m_len += m->m_len;
mprev->m_next = m->m_next; /* unlink from chain */
m_free(m); /* reclaim mbuf */
@@ -1902,43 +1665,23 @@ m_unshare(struct mbuf *m0, int how)
}
/*
- * Allocate new space to hold the copy...
- */
- /* XXX why can M_PKTHDR be set past the first mbuf? */
- if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
- /*
- * NB: if a packet header is present we must
- * allocate the mbuf separately from any cluster
- * because M_MOVE_PKTHDR will smash the data
- * pointer and drop the M_EXT marker.
- */
- MGETHDR(n, how, m->m_type);
- if (n == NULL) {
- m_freem(m0);
- return (NULL);
- }
- M_MOVE_PKTHDR(n, m);
- MCLGET(n, how);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- m_freem(m0);
- return (NULL);
- }
- } else {
- n = m_getcl(how, m->m_type, m->m_flags);
- if (n == NULL) {
- m_freem(m0);
- return (NULL);
- }
- }
- /*
- * ... and copy the data. We deal with jumbo mbufs
- * (i.e. m_len > MCLBYTES) by splitting them into
- * clusters. We could just malloc a buffer and make
- * it external but too many device drivers don't know
- * how to break up the non-contiguous memory when
+ * Allocate new space to hold the copy and copy the data.
+ * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by
+ * splitting them into clusters. We could just malloc a
+ * buffer and make it external but too many device drivers
+ * don't know how to break up the non-contiguous memory when
* doing DMA.
*/
+ n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS);
+ if (n == NULL) {
+ m_freem(m0);
+ return (NULL);
+ }
+ if (m->m_flags & M_PKTHDR) {
+ KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR",
+ __func__, m0, m));
+ m_move_pkthdr(n, m);
+ }
len = m->m_len;
off = 0;
mfirst = n;
@@ -1949,7 +1692,7 @@ m_unshare(struct mbuf *m0, int how)
n->m_len = cc;
if (mlast != NULL)
mlast->m_next = n;
- mlast = n;
+ mlast = n;
#if 0
newipsecstat.ips_clcopied++;
#endif
@@ -1959,14 +1702,14 @@ m_unshare(struct mbuf *m0, int how)
break;
off += cc;
- n = m_getcl(how, m->m_type, m->m_flags);
+ n = m_getcl(how, m->m_type, m->m_flags & M_COPYFLAGS);
if (n == NULL) {
m_freem(mfirst);
m_freem(m0);
return (NULL);
}
}
- n->m_next = m->m_next;
+ n->m_next = m->m_next;
if (mprev == NULL)
m0 = mfirst; /* new head of chain */
else
@@ -2002,7 +1745,7 @@ m_profile(struct mbuf *m)
int segments = 0;
int used = 0;
int wasted = 0;
-
+
while (m) {
segments++;
used += m->m_len;
@@ -2037,11 +1780,10 @@ mbprof_textify(void)
int offset;
char *c;
uint64_t *p;
-
p = &mbprof.wasted[0];
c = mbprofbuf;
- offset = snprintf(c, MP_MAXLINE + 10,
+ offset = snprintf(c, MP_MAXLINE + 10,
"wasted:\n"
"%ju %ju %ju %ju %ju %ju %ju %ju "
"%ju %ju %ju %ju %ju %ju %ju %ju\n",
@@ -2050,7 +1792,7 @@ mbprof_textify(void)
#ifdef BIG_ARRAY
p = &mbprof.wasted[16];
c += offset;
- offset = snprintf(c, MP_MAXLINE,
+ offset = snprintf(c, MP_MAXLINE,
"%ju %ju %ju %ju %ju %ju %ju %ju "
"%ju %ju %ju %ju %ju %ju %ju %ju\n",
p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
@@ -2058,7 +1800,7 @@ mbprof_textify(void)
#endif
p = &mbprof.used[0];
c += offset;
- offset = snprintf(c, MP_MAXLINE + 10,
+ offset = snprintf(c, MP_MAXLINE + 10,
"used:\n"
"%ju %ju %ju %ju %ju %ju %ju %ju "
"%ju %ju %ju %ju %ju %ju %ju %ju\n",
@@ -2067,7 +1809,7 @@ mbprof_textify(void)
#ifdef BIG_ARRAY
p = &mbprof.used[16];
c += offset;
- offset = snprintf(c, MP_MAXLINE,
+ offset = snprintf(c, MP_MAXLINE,
"%ju %ju %ju %ju %ju %ju %ju %ju "
"%ju %ju %ju %ju %ju %ju %ju %ju\n",
p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
@@ -2075,7 +1817,7 @@ mbprof_textify(void)
#endif
p = &mbprof.segments[0];
c += offset;
- offset = snprintf(c, MP_MAXLINE + 10,
+ offset = snprintf(c, MP_MAXLINE + 10,
"segments:\n"
"%ju %ju %ju %ju %ju %ju %ju %ju "
"%ju %ju %ju %ju %ju %ju %ju %ju\n",
@@ -2084,7 +1826,7 @@ mbprof_textify(void)
#ifdef BIG_ARRAY
p = &mbprof.segments[16];
c += offset;
- offset = snprintf(c, MP_MAXLINE,
+ offset = snprintf(c, MP_MAXLINE,
"%ju %ju %ju %ju %ju %ju %ju %ju "
"%ju %ju %ju %ju %ju %ju %ju %jju",
p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
@@ -2106,16 +1848,16 @@ static int
mbprof_clr_handler(SYSCTL_HANDLER_ARGS)
{
int clear, error;
-
+
clear = 0;
error = sysctl_handle_int(oidp, &clear, 0, req);
if (error || !req->newptr)
return (error);
-
+
if (clear) {
bzero(&mbprof, sizeof(mbprof));
}
-
+
return (error);
}
diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c
index a44ddc27..fef1c514 100644
--- a/freebsd/sys/kern/uipc_mbuf2.c
+++ b/freebsd/sys/kern/uipc_mbuf2.c
@@ -133,6 +133,8 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
}
/*
+ * The following comment is dated but still partially applies:
+ *
* XXX: This code is flawed because it considers a "writable" mbuf
* data region to require all of the following:
* (i) mbuf _has_ to have M_EXT set; if it is just a regular
@@ -143,17 +145,13 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
* Ideally, the requirement should only be (iii).
*
* If we're writable, we're sure we're writable, because the ref. count
- * cannot increase from 1, as that would require posession of mbuf
+ * cannot increase from 1, as that would require possession of mbuf
* n by someone else (which is impossible). However, if we're _not_
* writable, we may eventually become writable )if the ref. count drops
* to 1), but we'll fail to notice it unless we re-evaluate
* M_WRITABLE(). For now, we only evaluate once at the beginning and
* live with this.
*/
- /*
- * XXX: This is dumb. If we're just a regular mbuf with no M_EXT,
- * then we're not "writable," according to this code.
- */
writable = 0;
if ((n->m_flags & M_EXT) == 0 ||
(n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n)))
@@ -173,7 +171,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
* chop the current mbuf into two pieces, set off to 0.
*/
if (len <= n->m_len - off) {
- o = m_dup1(n, off, n->m_len - off, M_DONTWAIT);
+ o = m_dup1(n, off, n->m_len - off, M_NOWAIT);
if (o == NULL) {
m_freem(m);
return NULL; /* ENOBUFS */
@@ -233,9 +231,9 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
* on both end.
*/
if (len > MLEN)
- o = m_getcl(M_DONTWAIT, m->m_type, 0);
+ o = m_getcl(M_NOWAIT, m->m_type, 0);
else
- o = m_get(M_DONTWAIT, m->m_type);
+ o = m_get(M_NOWAIT, m->m_type);
if (!o) {
m_freem(m);
return NULL; /* ENOBUFS */
@@ -431,7 +429,7 @@ m_tag_copy(struct m_tag *t, int how)
* destination mbuf.
*/
int
-m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how)
+m_tag_copy_chain(struct mbuf *to, const struct mbuf *from, int how)
{
struct m_tag *p, *t, *tprev = NULL;
diff --git a/freebsd/sys/kern/uipc_mbufhash.c b/freebsd/sys/kern/uipc_mbufhash.c
new file mode 100644
index 00000000..804510e0
--- /dev/null
+++ b/freebsd/sys/kern/uipc_mbufhash.c
@@ -0,0 +1,176 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */
+
+/*
+ * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
+ * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/fnv_hash.h>
+
+#include <net/ethernet.h>
+
+#if defined(INET) || defined(INET6)
+#include <netinet/in.h>
+#endif
+
+#ifdef INET
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+static const void *
+m_ether_tcpip_hash_gethdr(const struct mbuf *m, const u_int off,
+ const u_int len, void *buf)
+{
+
+ if (m->m_pkthdr.len < (off + len)) {
+ return (NULL);
+ } else if (m->m_len < (off + len)) {
+ m_copydata(m, off, len, buf);
+ return (buf);
+ }
+ return (mtod(m, char *) + off);
+}
+
+uint32_t
+m_ether_tcpip_hash_init(void)
+{
+ uint32_t seed;
+
+ seed = arc4random();
+ return (fnv_32_buf(&seed, sizeof(seed), FNV1_32_INIT));
+}
+
+uint32_t
+m_ether_tcpip_hash(const uint32_t flags, const struct mbuf *m,
+ const uint32_t key)
+{
+ union {
+#ifdef INET
+ struct ip ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr ip6;
+#endif
+ struct ether_vlan_header vlan;
+ uint32_t port;
+ } buf;
+ struct ether_header *eh;
+ const struct ether_vlan_header *vlan;
+#ifdef INET
+ const struct ip *ip;
+#endif
+#ifdef INET6
+ const struct ip6_hdr *ip6;
+#endif
+ uint32_t p;
+ int off;
+ uint16_t etype;
+
+ p = key;
+ off = sizeof(*eh);
+ if (m->m_len < off)
+ goto done;
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (flags & MBUF_HASHFLAG_L2) {
+ p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+ p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+ }
+ /* Special handling for encapsulating VLAN frames */
+ if ((m->m_flags & M_VLANTAG) && (flags & MBUF_HASHFLAG_L2)) {
+ p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
+ sizeof(m->m_pkthdr.ether_vtag), p);
+ } else if (etype == ETHERTYPE_VLAN) {
+ vlan = m_ether_tcpip_hash_gethdr(m, off, sizeof(*vlan), &buf);
+ if (vlan == NULL)
+ goto done;
+
+ if (flags & MBUF_HASHFLAG_L2)
+ p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ etype = ntohs(vlan->evl_proto);
+ off += sizeof(*vlan) - sizeof(*eh);
+ }
+ switch (etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ ip = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip), &buf);
+ if (ip == NULL)
+ break;
+ if (flags & MBUF_HASHFLAG_L3) {
+ p = fnv_32_buf(&ip->ip_src, sizeof(struct in_addr), p);
+ p = fnv_32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
+ }
+ if (flags & MBUF_HASHFLAG_L4) {
+ const uint32_t *ports;
+ int iphlen;
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_SCTP:
+ iphlen = ip->ip_hl << 2;
+ if (iphlen < sizeof(*ip))
+ break;
+ off += iphlen;
+ ports = m_ether_tcpip_hash_gethdr(m,
+ off, sizeof(*ports), &buf);
+ if (ports == NULL)
+ break;
+ p = fnv_32_buf(ports, sizeof(*ports), p);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ ip6 = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip6), &buf);
+ if (ip6 == NULL)
+ break;
+ if (flags & MBUF_HASHFLAG_L3) {
+ p = fnv_32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
+ p = fnv_32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
+ }
+ if (flags & MBUF_HASHFLAG_L4) {
+ uint32_t flow;
+
+ /* IPv6 flow label */
+ flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
+ p = fnv_32_buf(&flow, sizeof(flow), p);
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+done:
+ return (p);
+}
diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c
index bcbd0d9a..f62014bc 100644
--- a/freebsd/sys/kern/uipc_sockbuf.c
+++ b/freebsd/sys/kern/uipc_sockbuf.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/aio.h> /* for aio_swake proto */
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -67,10 +68,144 @@ u_long sb_max_adj =
static u_long sb_efficiency = 8; /* parameter for sbreserve() */
-static void sbdrop_internal(struct sockbuf *sb, int len);
+static struct mbuf *sbcut_internal(struct sockbuf *sb, int len);
static void sbflush_internal(struct sockbuf *sb);
/*
+ * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
+ */
+static void
+sbm_clrprotoflags(struct mbuf *m, int flags)
+{
+ int mask;
+
+ mask = ~M_PROTOFLAGS;
+ if (flags & PRUS_NOTREADY)
+ mask |= M_NOTREADY;
+ while (m) {
+ m->m_flags &= mask;
+ m = m->m_next;
+ }
+}
+
+/*
+ * Mark ready "count" mbufs starting with "m".
+ */
+int
+sbready(struct sockbuf *sb, struct mbuf *m, int count)
+{
+ u_int blocker;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
+
+ blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
+
+ for (int i = 0; i < count; i++, m = m->m_next) {
+ KASSERT(m->m_flags & M_NOTREADY,
+ ("%s: m %p !M_NOTREADY", __func__, m));
+ m->m_flags &= ~(M_NOTREADY | blocker);
+ if (blocker)
+ sb->sb_acc += m->m_len;
+ }
+
+ if (!blocker)
+ return (EINPROGRESS);
+
+ /* This one was blocking all the queue. */
+ for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
+ KASSERT(m->m_flags & M_BLOCKED,
+ ("%s: m %p !M_BLOCKED", __func__, m));
+ m->m_flags &= ~M_BLOCKED;
+ sb->sb_acc += m->m_len;
+ }
+
+ sb->sb_fnrdy = m;
+
+ return (0);
+}
+
+/*
+ * Adjust sockbuf state reflecting allocation of m.
+ */
+void
+sballoc(struct sockbuf *sb, struct mbuf *m)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ sb->sb_ccc += m->m_len;
+
+ if (sb->sb_fnrdy == NULL) {
+ if (m->m_flags & M_NOTREADY)
+ sb->sb_fnrdy = m;
+ else
+ sb->sb_acc += m->m_len;
+ } else
+ m->m_flags |= M_BLOCKED;
+
+ if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
+ sb->sb_ctl += m->m_len;
+
+ sb->sb_mbcnt += MSIZE;
+ sb->sb_mcnt += 1;
+
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt += m->m_ext.ext_size;
+ sb->sb_ccnt += 1;
+ }
+}
+
+/*
+ * Adjust sockbuf state reflecting freeing of m.
+ */
+void
+sbfree(struct sockbuf *sb, struct mbuf *m)
+{
+
+#if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */
+ SOCKBUF_LOCK_ASSERT(sb);
+#endif
+
+ sb->sb_ccc -= m->m_len;
+
+ if (!(m->m_flags & M_NOTAVAIL))
+ sb->sb_acc -= m->m_len;
+
+ if (m == sb->sb_fnrdy) {
+ struct mbuf *n;
+
+ KASSERT(m->m_flags & M_NOTREADY,
+ ("%s: m %p !M_NOTREADY", __func__, m));
+
+ n = m->m_next;
+ while (n != NULL && !(n->m_flags & M_NOTREADY)) {
+ n->m_flags &= ~M_BLOCKED;
+ sb->sb_acc += n->m_len;
+ n = n->m_next;
+ }
+ sb->sb_fnrdy = n;
+ }
+
+ if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
+ sb->sb_ctl -= m->m_len;
+
+ sb->sb_mbcnt -= MSIZE;
+ sb->sb_mcnt -= 1;
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt -= m->m_ext.ext_size;
+ sb->sb_ccnt -= 1;
+ }
+
+ if (sb->sb_sndptr == m) {
+ sb->sb_sndptr = NULL;
+ sb->sb_sndptroff = 0;
+ }
+ if (sb->sb_sndptroff != 0)
+ sb->sb_sndptroff -= m->m_len;
+}
+
+/*
* Socantsendmore indicates that no more data will be sent on the socket; it
* would normally be applied to a socket when the user informs the system
* that no more data is to be sent, by the protocol code (in case
@@ -129,9 +264,9 @@ sbwait(struct sockbuf *sb)
SOCKBUF_LOCK_ASSERT(sb);
sb->sb_flags |= SB_WAIT;
- return (msleep(&sb->sb_cc, &sb->sb_mtx,
+ return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx,
(sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
- sb->sb_timeo));
+ sb->sb_timeo, 0, 0));
}
int
@@ -186,11 +321,11 @@ sowakeup(struct socket *so, struct sockbuf *sb)
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
- wakeup(&sb->sb_cc);
+ wakeup(&sb->sb_acc);
}
KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
if (sb->sb_upcall != NULL) {
- ret = sb->sb_upcall(so, sb->sb_upcallarg, M_DONTWAIT);
+ ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {
KASSERT(sb == &so->so_rcv,
("SO_SND upcall returned SU_ISCONNECTED"));
@@ -199,7 +334,7 @@ sowakeup(struct socket *so, struct sockbuf *sb)
} else
ret = SU_OK;
if (sb->sb_flags & SB_AIO)
- aio_swake(so, sb);
+ sowakeup_aio(so, sb);
SOCKBUF_UNLOCK(sb);
if (ret == SU_ISCONNECTED)
soisconnected(so);
@@ -306,9 +441,7 @@ sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
return (0);
if (td != NULL) {
#ifndef __rtems__
- PROC_LOCK(td->td_proc);
- sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE);
- PROC_UNLOCK(td->td_proc);
+ sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
#else /* __rtems__ */
sbsize_limit = RLIM_INFINITY;
#endif /* __rtems__ */
@@ -460,15 +593,15 @@ sblastmbufchk(struct sockbuf *sb, const char *file, int line)
* are discarded and mbufs are compacted where possible.
*/
void
-sbappend_locked(struct sockbuf *sb, struct mbuf *m)
+sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
{
struct mbuf *n;
SOCKBUF_LOCK_ASSERT(sb);
- if (m == 0)
+ if (m == NULL)
return;
-
+ sbm_clrprotoflags(m, flags);
SBLASTRECORDCHK(sb);
n = sb->sb_mb;
if (n) {
@@ -511,11 +644,11 @@ sbappend_locked(struct sockbuf *sb, struct mbuf *m)
* are discarded and mbufs are compacted where possible.
*/
void
-sbappend(struct sockbuf *sb, struct mbuf *m)
+sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
{
SOCKBUF_LOCK(sb);
- sbappend_locked(sb, m);
+ sbappend_locked(sb, m, flags);
SOCKBUF_UNLOCK(sb);
}
@@ -525,7 +658,7 @@ sbappend(struct sockbuf *sb, struct mbuf *m)
* that is, a stream protocol (such as TCP).
*/
void
-sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
+sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
{
SOCKBUF_LOCK_ASSERT(sb);
@@ -534,6 +667,9 @@ sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
SBLASTMBUFCHK(sb);
+ /* Remove all packet headers and mbuf tags to get a pure data chain. */
+ m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
+
sbcompress(sb, m, sb->sb_mbtail);
sb->sb_lastrecord = sb->sb_mb;
@@ -546,38 +682,63 @@ sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
* that is, a stream protocol (such as TCP).
*/
void
-sbappendstream(struct sockbuf *sb, struct mbuf *m)
+sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
{
SOCKBUF_LOCK(sb);
- sbappendstream_locked(sb, m);
+ sbappendstream_locked(sb, m, flags);
SOCKBUF_UNLOCK(sb);
}
#ifdef SOCKBUF_DEBUG
void
-sbcheck(struct sockbuf *sb)
+sbcheck(struct sockbuf *sb, const char *file, int line)
{
- struct mbuf *m;
- struct mbuf *n = 0;
- u_long len = 0, mbcnt = 0;
+ struct mbuf *m, *n, *fnrdy;
+ u_long acc, ccc, mbcnt;
SOCKBUF_LOCK_ASSERT(sb);
+ acc = ccc = mbcnt = 0;
+ fnrdy = NULL;
+
for (m = sb->sb_mb; m; m = n) {
n = m->m_nextpkt;
for (; m; m = m->m_next) {
- len += m->m_len;
+ if (m->m_len == 0) {
+ printf("sb %p empty mbuf %p\n", sb, m);
+ goto fail;
+ }
+ if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
+ if (m != sb->sb_fnrdy) {
+ printf("sb %p: fnrdy %p != m %p\n",
+ sb, sb->sb_fnrdy, m);
+ goto fail;
+ }
+ fnrdy = m;
+ }
+ if (fnrdy) {
+ if (!(m->m_flags & M_NOTAVAIL)) {
+ printf("sb %p: fnrdy %p, m %p is avail\n",
+ sb, sb->sb_fnrdy, m);
+ goto fail;
+ }
+ } else
+ acc += m->m_len;
+ ccc += m->m_len;
mbcnt += MSIZE;
if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
mbcnt += m->m_ext.ext_size;
}
}
- if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
- printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
- mbcnt, sb->sb_mbcnt);
- panic("sbcheck");
+ if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
+ printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
+ acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
+ goto fail;
}
+ return;
+fail:
+ panic("%s from %s:%u", __func__, file, line);
}
#endif
@@ -591,8 +752,9 @@ sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
SOCKBUF_LOCK_ASSERT(sb);
- if (m0 == 0)
+ if (m0 == NULL)
return;
+ m_clrprotoflags(m0);
/*
* Put the first mbuf on the queue. Note this permits zero length
* records.
@@ -633,11 +795,13 @@ sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
if (asa->sa_len > MLEN)
return (0);
#endif
- MGET(m, M_DONTWAIT, MT_SONAME);
- if (m == 0)
+ m = m_get(M_NOWAIT, MT_SONAME);
+ if (m == NULL)
return (0);
m->m_len = asa->sa_len;
bcopy(asa, mtod(m, caddr_t), asa->sa_len);
+ if (m0)
+ m_clrprotoflags(m0);
if (ctrl_last)
ctrl_last->m_next = m0; /* concatenate data to control */
else
@@ -727,12 +891,13 @@ sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
SOCKBUF_LOCK_ASSERT(sb);
- if (control == 0)
+ if (control == NULL)
panic("sbappendcontrol_locked");
space = m_length(control, &n) + m_length(m0, NULL);
if (space > sbspace(sb))
return (0);
+ m_clrprotoflags(m0);
n->m_next = m0; /* concatenate data to control */
SBLASTRECORDCHK(sb);
@@ -773,8 +938,8 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
*
* (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
* an mbuf already in the socket buffer. This can occur if an
- * appropriate mbuf exists, there is room, and no merging of data types
- * will occur.
+ * appropriate mbuf exists, there is room, both mbufs are not marked as
+ * not ready, and no merging of data types will occur.
*
* (3) The mbuf may be appended to the end of the existing mbuf chain.
*
@@ -803,13 +968,17 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
if (n && (n->m_flags & M_EOR) == 0 &&
M_WRITABLE(n) &&
((sb->sb_flags & SB_NOCOALESCE) == 0) &&
+ !(m->m_flags & M_NOTREADY) &&
+ !(n->m_flags & M_NOTREADY) &&
m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
m->m_len <= M_TRAILINGSPACE(n) &&
n->m_type == m->m_type) {
bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
(unsigned)m->m_len);
n->m_len += m->m_len;
- sb->sb_cc += m->m_len;
+ sb->sb_ccc += m->m_len;
+ if (sb->sb_fnrdy == NULL)
+ sb->sb_acc += m->m_len;
if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
/* XXX: Probably don't need.*/
sb->sb_ctl += m->m_len;
@@ -843,16 +1012,16 @@ sbflush_internal(struct sockbuf *sb)
while (sb->sb_mbcnt) {
/*
- * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
+ * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
* we would loop forever. Panic instead.
*/
- if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
+ if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
break;
- sbdrop_internal(sb, (int)sb->sb_cc);
+ m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
}
- if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
- panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
- sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
+ KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
+ ("%s: ccc %u mb %p mbcnt %u", __func__,
+ sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
}
void
@@ -873,27 +1042,29 @@ sbflush(struct sockbuf *sb)
}
/*
- * Drop data from (the front of) a sockbuf.
+ * Cut data from (the front of) a sockbuf.
*/
-static void
-sbdrop_internal(struct sockbuf *sb, int len)
+static struct mbuf *
+sbcut_internal(struct sockbuf *sb, int len)
{
- struct mbuf *m;
- struct mbuf *next;
+ struct mbuf *m, *next, *mfree;
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ mfree = NULL;
+
while (len > 0) {
- if (m == 0) {
- if (next == 0)
- panic("sbdrop");
+ if (m == NULL) {
+ KASSERT(next, ("%s: no next, len %d", __func__, len));
m = next;
next = m->m_nextpkt;
- continue;
}
if (m->m_len > len) {
+ KASSERT(!(m->m_flags & M_NOTAVAIL),
+ ("%s: m %p M_NOTAVAIL", __func__, m));
m->m_len -= len;
m->m_data += len;
- sb->sb_cc -= len;
+ sb->sb_ccc -= len;
+ sb->sb_acc -= len;
if (sb->sb_sndptroff != 0)
sb->sb_sndptroff -= len;
if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
@@ -902,11 +1073,35 @@ sbdrop_internal(struct sockbuf *sb, int len)
}
len -= m->m_len;
sbfree(sb, m);
- m = m_free(m);
+ /*
+ * Do not put M_NOTREADY buffers to the free list, they
+ * are referenced from outside.
+ */
+ if (m->m_flags & M_NOTREADY)
+ m = m->m_next;
+ else {
+ struct mbuf *n;
+
+ n = m->m_next;
+ m->m_next = mfree;
+ mfree = m;
+ m = n;
+ }
}
+ /*
+ * Free any zero-length mbufs from the buffer.
+ * For SOCK_DGRAM sockets such mbufs represent empty records.
+ * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
+ * when sosend_generic() needs to send only control data.
+ */
while (m && m->m_len == 0) {
+ struct mbuf *n;
+
sbfree(sb, m);
- m = m_free(m);
+ n = m->m_next;
+ m->m_next = mfree;
+ mfree = m;
+ m = n;
}
if (m) {
sb->sb_mb = m;
@@ -924,6 +1119,8 @@ sbdrop_internal(struct sockbuf *sb, int len)
} else if (m->m_nextpkt == NULL) {
sb->sb_lastrecord = m;
}
+
+ return (mfree);
}
/*
@@ -934,17 +1131,31 @@ sbdrop_locked(struct sockbuf *sb, int len)
{
SOCKBUF_LOCK_ASSERT(sb);
+ m_freem(sbcut_internal(sb, len));
+}
- sbdrop_internal(sb, len);
+/*
+ * Drop data from (the front of) a sockbuf,
+ * and return it to caller.
+ */
+struct mbuf *
+sbcut_locked(struct sockbuf *sb, int len)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ return (sbcut_internal(sb, len));
}
void
sbdrop(struct sockbuf *sb, int len)
{
+ struct mbuf *mfree;
SOCKBUF_LOCK(sb);
- sbdrop_locked(sb, len);
+ mfree = sbcut_internal(sb, len);
SOCKBUF_UNLOCK(sb);
+
+ m_freem(mfree);
}
/*
@@ -957,8 +1168,8 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
struct mbuf *m, *ret;
KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
- KASSERT(off + len <= sb->sb_cc, ("%s: beyond sb", __func__));
- KASSERT(sb->sb_sndptroff <= sb->sb_cc, ("%s: sndptroff broken", __func__));
+ KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__));
+ KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__));
/*
* Is off below stored offset? Happens on retransmits.
@@ -1073,9 +1284,9 @@ sbcreatecontrol(caddr_t p, int size, int type, int level)
if (CMSG_SPACE((u_int)size) > MCLBYTES)
return ((struct mbuf *) NULL);
if (CMSG_SPACE((u_int)size) > MLEN)
- m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
+ m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
else
- m = m_get(M_DONTWAIT, MT_CONTROL);
+ m = m_get(M_NOWAIT, MT_CONTROL);
if (m == NULL)
return ((struct mbuf *) NULL);
cp = mtod(m, struct cmsghdr *);
@@ -1107,7 +1318,7 @@ void
sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
{
- xsb->sb_cc = sb->sb_cc;
+ xsb->sb_cc = sb->sb_ccc;
xsb->sb_hiwat = sb->sb_hiwat;
xsb->sb_mbcnt = sb->sb_mbcnt;
xsb->sb_mcnt = sb->sb_mcnt;
@@ -1124,4 +1335,4 @@ SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
&sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
- &sb_efficiency, 0, "");
+ &sb_efficiency, 0, "Socket buffer size waste factor");
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index a37518f2..3f2dba72 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -94,7 +94,7 @@
* from a listen queue to a file descriptor, in order to prevent garbage
* collection of the socket at an untimely moment. For a number of reasons,
* these interfaces are not preferred, and should be avoided.
- *
+ *
* NOTE: With regard to VNETs the general rule is that callers do not set
* curvnet. Exceptions to this rule include soabort(), sodisconnect(),
* sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
@@ -107,7 +107,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_zero.h>
#include <rtems/bsd/local/opt_compat.h>
#include <rtems/bsd/sys/param.h>
@@ -121,7 +120,9 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/domain.h>
#include <sys/file.h> /* for struct knote */
+#include <sys/hhook.h>
#include <sys/kernel.h>
+#include <sys/khelp.h>
#include <sys/event.h>
#include <sys/eventhandler.h>
#include <sys/poll.h>
@@ -135,6 +136,7 @@ __FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
#include <sys/uio.h>
#include <sys/jail.h>
#include <sys/syslog.h>
@@ -164,6 +166,11 @@ static int filt_soread(struct knote *kn, long hint);
static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
static int filt_solisten(struct knote *kn, long hint);
+static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
+#ifdef __rtems__
+static
+#endif /* __rtems__ */
+fo_kqfilter_t soo_kqfilter;
static struct filterops solisten_filtops = {
.f_isfd = 1,
@@ -190,11 +197,16 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
VNET_ASSERT(curvnet != NULL, \
("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
+VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]);
+#define V_socket_hhh VNET(socket_hhh)
+
/*
* Limit on the number of connections in the listen queue waiting
* for accept(2).
+ * NB: The original sysctl somaxconn is still available but hidden
+ * to prevent confusion about the actual purpose of this number.
*/
-static int somaxconn = SOMAXCONN;
+static u_int somaxconn = SOMAXCONN;
static int
sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
@@ -207,32 +219,30 @@ sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
if (error || !req->newptr )
return (error);
- if (val < 1 || val > USHRT_MAX)
+ /*
+ * The purpose of the UINT_MAX / 3 limit, is so that the formula
+ * 3 * so_qlimit / 2
+ * below, will not overflow.
+ */
+
+ if (val < 1 || val > UINT_MAX / 3)
return (EINVAL);
somaxconn = val;
return (0);
}
-SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW,
0, sizeof(int), sysctl_somaxconn, "I",
"Maximum listen socket pending connection accept queue size");
+SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
+ CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP,
+ 0, sizeof(int), sysctl_somaxconn, "I",
+ "Maximum listen socket pending connection accept queue size (compat)");
static int numopensockets;
SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
&numopensockets, 0, "Number of open sockets");
-#ifdef ZERO_COPY_SOCKETS
-/* These aren't static because they're used in other files. */
-int so_zero_copy_send = 1;
-int so_zero_copy_receive = 1;
-SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
- "Zero copy controls");
-SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
- &so_zero_copy_receive, 0, "Enable zero copy receive");
-SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
- &so_zero_copy_send, 0, "Enable zero copy send");
-#endif /* ZERO_COPY_SOCKETS */
-
/*
* accept_mtx locks down per-socket fields relating to accept queues. See
* socketvar.h for an annotation of the protected fields of struct socket.
@@ -257,7 +267,7 @@ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
* Initialize the socket subsystem and set up the socket
* memory allocator.
*/
-uma_zone_t socket_zone;
+static uma_zone_t socket_zone;
int maxsockets;
#ifndef __rtems__
@@ -265,7 +275,25 @@ static void
socket_zone_change(void *tag)
{
- uma_zone_set_max(socket_zone, maxsockets);
+ maxsockets = uma_zone_set_max(socket_zone, maxsockets);
+}
+
+static void
+socket_hhook_register(int subtype)
+{
+
+ if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype,
+ &V_socket_hhh[subtype],
+ HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
+ printf("%s: WARNING: unable to register hook\n", __func__);
+}
+
+static void
+socket_hhook_deregister(int subtype)
+{
+
+ if (hhook_head_deregister(V_socket_hhh[subtype]) != 0)
+ printf("%s: WARNING: unable to deregister hook\n", __func__);
}
#endif /* __rtems__ */
@@ -273,16 +301,42 @@ static void
socket_init(void *tag)
{
- socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(socket_zone, maxsockets);
+ socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ maxsockets = uma_zone_set_max(socket_zone, maxsockets);
+ uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
#ifndef __rtems__
- EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
- EVENTHANDLER_PRI_FIRST);
+ EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
+ EVENTHANDLER_PRI_FIRST);
#endif /* __rtems__ */
}
SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
+#ifndef __rtems__
+static void
+socket_vnet_init(const void *unused __unused)
+{
+ int i;
+
+ /* We expect a contiguous range */
+ for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
+ socket_hhook_register(i);
+}
+VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
+ socket_vnet_init, NULL);
+
+static void
+socket_vnet_uninit(const void *unused __unused)
+{
+ int i;
+
+ for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
+ socket_hhook_deregister(i);
+}
+VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
+ socket_vnet_uninit, NULL);
+#endif /* __rtems__ */
+
/*
* Initialise maxsockets. This SYSINIT must be run after
* tunable_mbinit().
@@ -321,7 +375,7 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
&maxsockets, 0, sysctl_maxsockets, "IU",
- "Maximum number of sockets avaliable");
+ "Maximum number of sockets available");
/*
* Socket operation routines. These routines are called by the routines in
@@ -351,21 +405,40 @@ soalloc(struct vnet *vnet)
return (NULL);
}
#endif
+ if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) {
+ uma_zfree(socket_zone, so);
+ return (NULL);
+ }
+
SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
sx_init(&so->so_snd.sb_sx, "so_snd_sx");
sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
- TAILQ_INIT(&so->so_aiojobq);
+#ifndef __rtems__
+ TAILQ_INIT(&so->so_snd.sb_aiojobq);
+ TAILQ_INIT(&so->so_rcv.sb_aiojobq);
+ TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
+ TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so);
+#endif /* __rtems__ */
+#ifdef VIMAGE
+ VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
+ __func__, __LINE__, so));
+ so->so_vnet = vnet;
+#endif
+ /* We shouldn't need the so_global_mtx */
+ if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) {
+ /* Do we need more comprehensive error returns? */
+ uma_zfree(socket_zone, so);
+ return (NULL);
+ }
mtx_lock(&so_global_mtx);
so->so_gencnt = ++so_gencnt;
++numopensockets;
#ifdef VIMAGE
- VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
- __func__, __LINE__, so));
vnet->vnet_sockcnt++;
- so->so_vnet = vnet;
#endif
mtx_unlock(&so_global_mtx);
+
return (so);
}
@@ -396,15 +469,16 @@ sodealloc(struct socket *so)
if (so->so_snd.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
&so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
-#ifdef INET
- /* remove acccept filter if one is present. */
+ /* remove accept filter if one is present. */
if (so->so_accf != NULL)
do_setopt_accept_filter(so, NULL);
-#endif
#ifdef MAC
mac_socket_destroy(so);
#endif
+ hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE);
+
crfree(so->so_cred);
+ khelp_destroy_osd(&so->osd);
sx_destroy(&so->so_snd.sb_sx);
sx_destroy(&so->so_rcv.sb_sx);
SOCKBUF_LOCK_DESTROY(&so->so_snd);
@@ -429,7 +503,16 @@ socreate(int dom, struct socket **aso, int type, int proto,
else
prp = pffindtype(dom, type);
- if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL ||
+ if (prp == NULL) {
+ /* No support for domain. */
+ if (pffinddomain(dom) == NULL)
+ return (EAFNOSUPPORT);
+ /* No support for socket type. */
+ if (proto == 0 && type != 0)
+ return (EPROTOTYPE);
+ return (EPROTONOSUPPORT);
+ }
+ if (prp->pr_usrreqs->pru_attach == NULL ||
prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
return (EPROTONOSUPPORT);
@@ -490,9 +573,9 @@ SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
/*
* When an attempt at a new connection is noted on a socket which accepts
* connections, sonewconn is called. If the connection is possible (subject
- * to space constraints, etc.) then we allocate a new structure, propoerly
+ * to space constraints, etc.) then we allocate a new structure, properly
* linked into the data structure of the original socket, and return this.
- * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED.
*
* Note: the ref count on the socket is 0 on return.
*/
@@ -629,7 +712,18 @@ sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
CURVNET_RESTORE();
- return error;
+ return (error);
+}
+
+int
+sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ int error;
+
+ CURVNET_SET(so->so_vnet);
+ error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
+ CURVNET_RESTORE();
+ return (error);
}
/*
@@ -652,7 +746,7 @@ solisten(struct socket *so, int backlog, struct thread *td)
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
CURVNET_RESTORE();
- return error;
+ return (error);
}
int
@@ -734,15 +828,17 @@ sofree(struct socket *so)
("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
if (so->so_options & SO_ACCEPTCONN) {
- KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_comp)),
+ ("sofree: so_comp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)),
+ ("sofree: so_incomp populated"));
}
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
VNET_SO_ASSERT(so);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
- (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
+ (*pr->pr_domain->dom_dispose)(so);
if (pr->pr_usrreqs->pru_detach != NULL)
(*pr->pr_usrreqs->pru_detach)(so);
@@ -801,7 +897,8 @@ soclose(struct socket *so)
goto drop;
while (so->so_state & SS_ISCONNECTED) {
error = tsleep(&so->so_timeo,
- PSOCK | PCATCH, "soclos", so->so_linger * hz);
+ PSOCK | PCATCH, "soclos",
+ so->so_linger * hz);
if (error)
break;
}
@@ -907,6 +1004,13 @@ soaccept(struct socket *so, struct sockaddr **nam)
int
soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+
+ return (soconnectat(AT_FDCWD, so, nam, td));
+}
+
+int
+soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
+{
int error;
if (so->so_options & SO_ACCEPTCONN)
@@ -928,7 +1032,13 @@ soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
* biting us.
*/
so->so_error = 0;
- error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
+ if (fd == AT_FDCWD) {
+ error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
+ nam, td);
+ } else {
+ error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
+ so, nam, td);
+ }
}
CURVNET_RESTORE();
@@ -960,113 +1070,6 @@ sodisconnect(struct socket *so)
return (error);
}
-#ifdef ZERO_COPY_SOCKETS
-struct so_zerocopy_stats{
- int size_ok;
- int align_ok;
- int found_ifp;
-};
-struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
-
-/*
- * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
- * sosend_dgram() and sosend_generic() use m_uiotombuf().
- *
- * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
- * all of the data referenced by the uio. If desired, it uses zero-copy.
- * *space will be updated to reflect data copied in.
- *
- * NB: If atomic I/O is requested, the caller must already have checked that
- * space can hold resid bytes.
- *
- * NB: In the event of an error, the caller may need to free the partial
- * chain pointed to by *mpp. The contents of both *uio and *space may be
- * modified even in the case of an error.
- */
-static int
-sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
- int flags)
-{
- struct mbuf *m, **mp, *top;
- long len;
- ssize_t resid;
- int error;
- int cow_send;
-
- *retmp = top = NULL;
- mp = &top;
- len = 0;
- resid = uio->uio_resid;
- error = 0;
- do {
- cow_send = 0;
- if (resid >= MINCLSIZE) {
- if (top == NULL) {
- m = m_gethdr(M_WAITOK, MT_DATA);
- m->m_pkthdr.len = 0;
- m->m_pkthdr.rcvif = NULL;
- } else
- m = m_get(M_WAITOK, MT_DATA);
- if (so_zero_copy_send &&
- resid >= PAGE_SIZE &&
- *space >= PAGE_SIZE &&
- uio->uio_iov->iov_len >= PAGE_SIZE) {
- so_zerocp_stats.size_ok++;
- so_zerocp_stats.align_ok++;
- cow_send = socow_setup(m, uio);
- len = cow_send;
- }
- if (!cow_send) {
- m_clget(m, M_WAITOK);
- len = min(min(MCLBYTES, resid), *space);
- }
- } else {
- if (top == NULL) {
- m = m_gethdr(M_WAIT, MT_DATA);
- m->m_pkthdr.len = 0;
- m->m_pkthdr.rcvif = NULL;
-
- len = min(min(MHLEN, resid), *space);
- /*
- * For datagram protocols, leave room
- * for protocol headers in first mbuf.
- */
- if (atomic && m && len < MHLEN)
- MH_ALIGN(m, len);
- } else {
- m = m_get(M_WAIT, MT_DATA);
- len = min(min(MLEN, resid), *space);
- }
- }
- if (m == NULL) {
- error = ENOBUFS;
- goto out;
- }
-
- *space -= len;
- if (cow_send)
- error = 0;
- else
- error = uiomove(mtod(m, void *), (int)len, uio);
- resid = uio->uio_resid;
- m->m_len = len;
- *mp = m;
- top->m_pkthdr.len += len;
- if (error)
- goto out;
- mp = &m->m_next;
- if (resid <= 0) {
- if (flags & MSG_EOR)
- top->m_flags |= M_EOR;
- break;
- }
- } while (*space > 0 && atomic);
-out:
- *retmp = top;
- return (error);
-}
-#endif /* ZERO_COPY_SOCKETS */
-
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
int
@@ -1076,13 +1079,10 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
long space;
ssize_t resid;
int clen = 0, error, dontroute;
-#ifdef ZERO_COPY_SOCKETS
- int atomic = sosendallatonce(so) || top;
-#endif
- KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM"));
+ KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
- ("sodgram_send: !PR_ATOMIC"));
+ ("sosend_dgram: !PR_ATOMIC"));
if (uio != NULL)
resid = uio->uio_resid;
@@ -1163,11 +1163,6 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
} else {
-#ifdef ZERO_COPY_SOCKETS
- error = sosend_copyin(uio, &top, atomic, &space, flags);
- if (error)
- goto out;
-#else
/*
* Copy the data from userland into a mbuf chain.
* If no data is to be copied in, a single empty mbuf
@@ -1180,7 +1175,6 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
goto out;
}
space -= resid - uio->uio_resid;
-#endif
resid = uio->uio_resid;
}
KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
@@ -1195,7 +1189,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
}
/*
* XXX all the SBS_CANTSENDMORE checks previously done could be out
- * of date. We could have recieved a reset packet in an interrupt or
+ * of date. We could have received a reset packet in an interrupt or
* maybe we slept while doing page faults in uiomove() etc. We could
* probably recheck again inside the locking protection here, but
* there are probably other places that this also happens. We must
@@ -1354,16 +1348,13 @@ restart:
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
} else {
-#ifdef ZERO_COPY_SOCKETS
- error = sosend_copyin(uio, &top, atomic,
- &space, flags);
- if (error != 0)
- goto release;
-#else
/*
* Copy the data from userland into a mbuf
- * chain. If no data is to be copied in,
- * a single empty mbuf is returned.
+ * chain. If resid is 0, which can happen
+ * only if we have control to send, then
+ * a single empty mbuf is returned. This
+ * is a workaround to prevent protocol send
+ * methods to panic.
*/
top = m_uiotombuf(uio, M_WAITOK, space,
(atomic ? max_hdr : 0),
@@ -1374,7 +1365,6 @@ restart:
goto release;
}
space -= resid - uio->uio_resid;
-#endif
resid = uio->uio_resid;
}
if (dontroute) {
@@ -1384,7 +1374,7 @@ restart:
}
/*
* XXX all the SBS_CANTSENDMORE checks previously
- * done could be out of date. We could have recieved
+ * done could be out of date. We could have received
* a reset packet in an interrupt or maybe we slept
* while doing page faults in uiomove() etc. We
* could probably recheck again inside the locking
@@ -1461,26 +1451,11 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
VNET_SO_ASSERT(so);
- m = m_get(M_WAIT, MT_DATA);
+ m = m_get(M_WAITOK, MT_DATA);
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
if (error)
goto bad;
do {
-#ifdef ZERO_COPY_SOCKETS
- if (so_zero_copy_receive) {
- int disposable;
-
- if ((m->m_flags & M_EXT)
- && (m->m_ext.ext_type == EXT_DISPOSABLE))
- disposable = 1;
- else
- disposable = 0;
-
- error = uiomoveco(mtod(m, void *),
- min(uio->uio_resid, m->m_len),
- uio, disposable);
- } else
-#endif /* ZERO_COPY_SOCKETS */
error = uiomove(mtod(m, void *),
(int) min(uio->uio_resid, m->m_len), uio);
m = m_free(m);
@@ -1513,20 +1488,19 @@ sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
else
sb->sb_mb = nextrecord;
- /*
- * Now update any dependent socket buffer fields to reflect the new
- * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
+ /*
+ * Now update any dependent socket buffer fields to reflect the new
+ * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
* addition of a second clause that takes care of the case where
* sb_mb has been updated, but remains the last record.
- */
- if (sb->sb_mb == NULL) {
- sb->sb_mbtail = NULL;
- sb->sb_lastrecord = NULL;
- } else if (sb->sb_mb->m_nextpkt == NULL)
- sb->sb_lastrecord = sb->sb_mb;
+ */
+ if (sb->sb_mb == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (sb->sb_mb->m_nextpkt == NULL)
+ sb->sb_lastrecord = sb->sb_mb;
}
-
/*
* Implement receive operations on a socket. We depend on the way that
* records are added to the sockbuf by sbappend. In particular, each record
@@ -1588,12 +1562,12 @@ restart:
* 2. MSG_DONTWAIT is not set
*/
if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
- so->so_rcv.sb_cc < uio->uio_resid) &&
- so->so_rcv.sb_cc < so->so_rcv.sb_lowat &&
+ sbavail(&so->so_rcv) < uio->uio_resid) &&
+ sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
- KASSERT(m != NULL || !so->so_rcv.sb_cc,
- ("receive: m == %p so->so_rcv.sb_cc == %u",
- m, so->so_rcv.sb_cc));
+ KASSERT(m != NULL || !sbavail(&so->so_rcv),
+ ("receive: m == %p sbavail == %u",
+ m, sbavail(&so->so_rcv)));
if (so->so_error) {
if (m != NULL)
goto dontblock;
@@ -1717,7 +1691,7 @@ dontblock:
SOCKBUF_UNLOCK(&so->so_rcv);
VNET_SO_ASSERT(so);
error = (*pr->pr_domain->dom_externalize)
- (cm, controlp);
+ (cm, controlp, flags);
SOCKBUF_LOCK(&so->so_rcv);
} else if (controlp != NULL)
*controlp = cm;
@@ -1774,11 +1748,12 @@ dontblock:
*/
moff = 0;
offset = 0;
- while (m != NULL && uio->uio_resid > 0 && error == 0) {
+ while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
+ && error == 0) {
/*
* If the type of mbuf has changed since the last mbuf
* examined ('type'), end the receive operation.
- */
+ */
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
if (type != m->m_type)
@@ -1806,21 +1781,6 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
-#ifdef ZERO_COPY_SOCKETS
- if (so_zero_copy_receive) {
- int disposable;
-
- if ((m->m_flags & M_EXT)
- && (m->m_ext.ext_type == EXT_DISPOSABLE))
- disposable = 1;
- else
- disposable = 0;
-
- error = uiomoveco(mtod(m, char *) + moff,
- (int)len, uio,
- disposable);
- } else
-#endif /* ZERO_COPY_SOCKETS */
error = uiomove(mtod(m, char *) + moff, (int)len, uio);
SOCKBUF_LOCK(&so->so_rcv);
if (error) {
@@ -1851,6 +1811,7 @@ dontblock:
nextrecord = m->m_nextpkt;
sbfree(&so->so_rcv, m);
if (mp != NULL) {
+ m->m_nextpkt = NULL;
*mp = m;
mp = &m->m_next;
so->so_rcv.sb_mb = m = m->m_next;
@@ -1868,33 +1829,30 @@ dontblock:
moff += len;
else {
if (mp != NULL) {
- int copy_flag;
-
- if (flags & MSG_DONTWAIT)
- copy_flag = M_DONTWAIT;
- else
- copy_flag = M_WAIT;
- if (copy_flag == M_WAIT)
+ if (flags & MSG_DONTWAIT) {
+ *mp = m_copym(m, 0, len,
+ M_NOWAIT);
+ if (*mp == NULL) {
+ /*
+ * m_copym() couldn't
+ * allocate an mbuf.
+ * Adjust uio_resid back
+ * (it was adjusted
+ * down by len bytes,
+ * which we didn't end
+ * up "copying" over).
+ */
+ uio->uio_resid += len;
+ break;
+ }
+ } else {
SOCKBUF_UNLOCK(&so->so_rcv);
- *mp = m_copym(m, 0, len, copy_flag);
- if (copy_flag == M_WAIT)
+ *mp = m_copym(m, 0, len,
+ M_WAITOK);
SOCKBUF_LOCK(&so->so_rcv);
- if (*mp == NULL) {
- /*
- * m_copym() couldn't
- * allocate an mbuf. Adjust
- * uio_resid back (it was
- * adjusted down by len
- * bytes, which we didn't end
- * up "copying" over).
- */
- uio->uio_resid += len;
- break;
- }
+ }
}
- m->m_data += len;
- m->m_len -= len;
- so->so_rcv.sb_cc -= len;
+ sbcut_locked(&so->so_rcv, len);
}
}
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
@@ -1923,7 +1881,8 @@ dontblock:
while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
!sosendallatonce(so) && nextrecord == NULL) {
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ if (so->so_error ||
+ so->so_rcv.sb_state & SBS_CANTRCVMORE)
break;
/*
* Notify the protocol that some data has been
@@ -2058,7 +2017,7 @@ restart:
/* Abort if socket has reported problems. */
if (so->so_error) {
- if (sb->sb_cc > 0)
+ if (sbavail(sb) > 0)
goto deliver;
if (oresid > uio->uio_resid)
goto out;
@@ -2070,32 +2029,32 @@ restart:
/* Door is closed. Deliver what is left, if any. */
if (sb->sb_state & SBS_CANTRCVMORE) {
- if (sb->sb_cc > 0)
+ if (sbavail(sb) > 0)
goto deliver;
else
goto out;
}
/* Socket buffer is empty and we shall not block. */
- if (sb->sb_cc == 0 &&
+ if (sbavail(sb) == 0 &&
((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
error = EAGAIN;
goto out;
}
/* Socket buffer got some data that we shall deliver now. */
- if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
+ if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
((so->so_state & SS_NBIO) ||
(flags & (MSG_DONTWAIT|MSG_NBIO)) ||
- sb->sb_cc >= sb->sb_lowat ||
- sb->sb_cc >= uio->uio_resid ||
- sb->sb_cc >= sb->sb_hiwat) ) {
+ sbavail(sb) >= sb->sb_lowat ||
+ sbavail(sb) >= uio->uio_resid ||
+ sbavail(sb) >= sb->sb_hiwat) ) {
goto deliver;
}
/* On MSG_WAITALL we must wait until all data or error arrives. */
if ((flags & MSG_WAITALL) &&
- (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat))
+ (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
goto deliver;
/*
@@ -2109,7 +2068,7 @@ restart:
deliver:
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__));
+ KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
/* Statistics. */
@@ -2119,7 +2078,7 @@ deliver:
#endif /* __rtems__ */
/* Fill uio until full or current end of socket buffer is reached. */
- len = min(uio->uio_resid, sb->sb_cc);
+ len = min(uio->uio_resid, sbavail(sb));
if (mp0 != NULL) {
/* Dequeue as many mbufs as possible. */
if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
@@ -2130,6 +2089,8 @@ deliver:
for (m = sb->sb_mb;
m != NULL && m->m_len <= len;
m = m->m_next) {
+ KASSERT(!(m->m_flags & M_NOTAVAIL),
+ ("%s: m %p not available", __func__, m));
len -= m->m_len;
uio->uio_resid -= m->m_len;
sbfree(sb, m);
@@ -2146,7 +2107,7 @@ deliver:
KASSERT(sb->sb_mb != NULL,
("%s: len > 0 && sb->sb_mb empty", __func__));
- m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
+ m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
if (m == NULL)
len = 0; /* Don't flush data from sockbuf. */
else
@@ -2254,9 +2215,9 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
*/
SOCKBUF_LOCK(&so->so_rcv);
while ((m = so->so_rcv.sb_mb) == NULL) {
- KASSERT(so->so_rcv.sb_cc == 0,
- ("soreceive_dgram: sb_mb NULL but sb_cc %u",
- so->so_rcv.sb_cc));
+ KASSERT(sbavail(&so->so_rcv) == 0,
+ ("soreceive_dgram: sb_mb NULL but sbavail %u",
+ sbavail(&so->so_rcv)));
if (so->so_error) {
error = so->so_error;
so->so_error = 0;
@@ -2356,7 +2317,7 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
cm->m_next = NULL;
if (pr->pr_domain->dom_externalize != NULL) {
error = (*pr->pr_domain->dom_externalize)
- (cm, controlp);
+ (cm, controlp, flags);
} else if (controlp != NULL)
*controlp = cm;
else
@@ -2416,11 +2377,13 @@ soshutdown(struct socket *so, int how)
if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
return (EINVAL);
+ if ((so->so_state &
+ (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0)
+ return (ENOTCONN);
CURVNET_SET(so->so_vnet);
- if (pr->pr_usrreqs->pru_flush != NULL) {
- (*pr->pr_usrreqs->pru_flush)(so, how);
- }
+ if (pr->pr_usrreqs->pru_flush != NULL)
+ (*pr->pr_usrreqs->pru_flush)(so, how);
if (how != SHUT_WR)
sorflush(so);
if (how != SHUT_RD) {
@@ -2439,7 +2402,7 @@ sorflush(struct socket *so)
{
struct sockbuf *sb = &so->so_rcv;
struct protosw *pr = so->so_proto;
- struct sockbuf asb;
+ struct socket aso;
VNET_SO_ASSERT(so);
@@ -2464,8 +2427,9 @@ sorflush(struct socket *so)
* and mutex data unchanged.
*/
SOCKBUF_LOCK(sb);
- bzero(&asb, offsetof(struct sockbuf, sb_startzero));
- bcopy(&sb->sb_startzero, &asb.sb_startzero,
+ bzero(&aso, sizeof(aso));
+ aso.so_pcb = so->so_pcb;
+ bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero,
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
bzero(&sb->sb_startzero,
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
@@ -2473,12 +2437,34 @@ sorflush(struct socket *so)
sbunlock(sb);
/*
- * Dispose of special rights and flush the socket buffer. Don't call
- * any unsafe routines (that rely on locks being initialized) on asb.
+ * Dispose of special rights and flush the copied socket. Don't call
+ * any unsafe routines (that rely on locks being initialized) on aso.
*/
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
- (*pr->pr_domain->dom_dispose)(asb.sb_mb);
- sbrelease_internal(&asb, so);
+ (*pr->pr_domain->dom_dispose)(&aso);
+ sbrelease_internal(&aso.so_rcv, so);
+}
+
+/*
+ * Wrapper for Socket established helper hook.
+ * Parameters: socket, context of the hook point, hook id.
+ */
+static int inline
+hhook_run_socket(struct socket *so, void *hctx, int32_t h_id)
+{
+ struct socket_hhook_data hhook_data = {
+ .so = so,
+ .hctx = hctx,
+ .m = NULL,
+ .status = 0
+ };
+
+ CURVNET_SET(so->so_vnet);
+ HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd);
+ CURVNET_RESTORE();
+
+ /* Ugly but needed, since hhooks return void for now */
+ return (hhook_data.status);
}
/*
@@ -2537,7 +2523,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
int error, optval;
struct linger l;
struct timeval tv;
- u_long val;
+ sbintime_t val;
uint32_t val32;
#ifdef MAC
struct mac extmac;
@@ -2554,13 +2540,12 @@ sosetopt(struct socket *so, struct sockopt *sopt)
error = ENOPROTOOPT;
} else {
switch (sopt->sopt_name) {
-#ifdef INET
case SO_ACCEPTFILTER:
error = do_setopt_accept_filter(so, sopt);
if (error)
goto bad;
break;
-#endif
+
case SO_LINGER:
error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
if (error)
@@ -2589,7 +2574,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_NO_DDP:
case SO_NO_OFFLOAD:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
SOCK_LOCK(so);
@@ -2602,7 +2587,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SETFIB:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
@@ -2620,7 +2605,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_USER_COOKIE:
error = sooptcopyin(sopt, &val32, sizeof val32,
- sizeof val32);
+ sizeof val32);
if (error)
goto bad;
so->so_user_cookie = val32;
@@ -2631,7 +2616,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SNDLOWAT:
case SO_RCVLOWAT:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
@@ -2699,8 +2684,10 @@ sosetopt(struct socket *so, struct sockopt *sopt)
error = EDOM;
goto bad;
}
- val = tvtohz(&tv);
-
+ if (tv.tv_sec > INT32_MAX)
+ val = SBT_MAX;
+ else
+ val = tvtosbt(tv);
switch (sopt->sopt_name) {
case SO_SNDTIMEO:
so->so_snd.sb_timeo = val;
@@ -2725,7 +2712,11 @@ sosetopt(struct socket *so, struct sockopt *sopt)
break;
default:
- error = ENOPROTOOPT;
+ if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
+ error = hhook_run_socket(so, sopt,
+ HHOOK_SOCKET_OPT);
+ else
+ error = ENOPROTOOPT;
break;
}
if (error == 0 && so->so_proto->pr_ctloutput != NULL)
@@ -2753,7 +2744,7 @@ sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
* that we always tell the user precisely how much we copied, rather
* than something useful like the total amount we had available for
* her. Note that this interface is not idempotent; the entire
- * answer must generated ahead of time.
+ * answer must be generated ahead of time.
*/
valsize = min(len, sopt->sopt_valsize);
sopt->sopt_valsize = valsize;
@@ -2787,11 +2778,10 @@ sogetopt(struct socket *so, struct sockopt *sopt)
return (error);
} else {
switch (sopt->sopt_name) {
-#ifdef INET
case SO_ACCEPTFILTER:
error = do_getopt_accept_filter(so, sopt);
break;
-#endif
+
case SO_LINGER:
SOCK_LOCK(so);
l.l_onoff = so->so_options & SO_LINGER;
@@ -2850,11 +2840,8 @@ integer:
case SO_SNDTIMEO:
case SO_RCVTIMEO:
- optval = (sopt->sopt_name == SO_SNDTIMEO ?
- so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
-
- tv.tv_sec = optval / hz;
- tv.tv_usec = (optval % hz) * tick;
+ tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
+ so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
#ifdef COMPAT_FREEBSD32
if (SV_CURPROC_FLAG(SV_ILP32)) {
struct timeval32 tv32;
@@ -2912,7 +2899,11 @@ integer:
goto integer;
default:
- error = ENOPROTOOPT;
+ if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
+ error = hhook_run_socket(so, sopt,
+ HHOOK_SOCKET_OPT);
+ else
+ error = ENOPROTOOPT;
break;
}
}
@@ -2923,18 +2914,17 @@ bad:
return (error);
}
-/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
int
soopt_getm(struct sockopt *sopt, struct mbuf **mp)
{
struct mbuf *m, *m_prev;
int sopt_size = sopt->sopt_valsize;
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL)
return ENOBUFS;
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
+ MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
return ENOBUFS;
@@ -2948,14 +2938,14 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
m_prev = m;
while (sopt_size) {
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL) {
m_freem(*mp);
return ENOBUFS;
}
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_td != NULL ? M_WAIT :
- M_DONTWAIT);
+ MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
+ M_NOWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_freem(m);
m_freem(*mp);
@@ -2972,7 +2962,6 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
return (0);
}
-/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
int
soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
{
@@ -2985,7 +2974,7 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
int error;
error = copyin(sopt->sopt_val, mtod(m, char *),
- m->m_len);
+ m->m_len);
if (error != 0) {
m_freem(m0);
return(error);
@@ -3001,7 +2990,6 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
return (0);
}
-/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
int
soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
{
@@ -3015,17 +3003,17 @@ soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
int error;
error = copyout(mtod(m, char *), sopt->sopt_val,
- m->m_len);
+ m->m_len);
if (error != 0) {
m_freem(m0);
return(error);
}
} else
bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
- sopt->sopt_valsize -= m->m_len;
- sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
- valsize += m->m_len;
- m = m->m_next;
+ sopt->sopt_valsize -= m->m_len;
+ sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
+ valsize += m->m_len;
+ m = m->m_next;
}
if (m != NULL) {
/* enough soopt buffer should be given from user-land */
@@ -3109,9 +3097,6 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
return (revents);
}
-#ifdef __rtems__
-static
-#endif /* __rtems__ */
int
soo_kqfilter(struct file *fp, struct knote *kn)
{
@@ -3162,6 +3147,13 @@ pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
}
int
+pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job)
+{
+
+ return EOPNOTSUPP;
+}
+
+int
pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
{
@@ -3176,6 +3168,14 @@ pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
}
int
+pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
+
+ return EOPNOTSUPP;
+}
+
+int
pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
{
@@ -3183,6 +3183,14 @@ pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
}
int
+pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
+
+ return EOPNOTSUPP;
+}
+
+int
pru_connect2_notsupp(struct socket *so1, struct socket *so2)
{
@@ -3240,6 +3248,13 @@ pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
return EOPNOTSUPP;
}
+int
+pru_ready_notsupp(struct socket *so, struct mbuf *m, int count)
+{
+
+ return (EOPNOTSUPP);
+}
+
/*
* This isn't really a ``null'' operation, but it's the default one and
* doesn't do anything destructive.
@@ -3311,17 +3326,24 @@ filt_soread(struct knote *kn, long hint)
so = kn->kn_fp->f_data;
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
+ kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
kn->kn_flags |= EV_EOF;
kn->kn_fflags = so->so_error;
return (1);
} else if (so->so_error) /* temporary udp error */
return (1);
- else if (kn->kn_sfflags & NOTE_LOWAT)
- return (kn->kn_data >= kn->kn_sdata);
- else
- return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
+
+ if (kn->kn_sfflags & NOTE_LOWAT) {
+ if (kn->kn_data >= kn->kn_sdata)
+ return 1;
+ } else {
+ if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat)
+ return 1;
+ }
+
+ /* This hook returning non-zero indicates an event, not error */
+ return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD));
}
static void
@@ -3345,6 +3367,9 @@ filt_sowrite(struct knote *kn, long hint)
so = kn->kn_fp->f_data;
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbspace(&so->so_snd);
+
+ hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE);
+
if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
kn->kn_fflags = so->so_error;
@@ -3367,7 +3392,7 @@ filt_solisten(struct knote *kn, long hint)
struct socket *so = kn->kn_fp->f_data;
kn->kn_data = so->so_qlen;
- return (! TAILQ_EMPTY(&so->so_comp));
+ return (!TAILQ_EMPTY(&so->so_comp));
}
#ifndef __rtems__
@@ -3428,7 +3453,7 @@ soisconnecting(struct socket *so)
void
soisconnected(struct socket *so)
{
- struct socket *head;
+ struct socket *head;
int ret;
restart:
@@ -3456,7 +3481,7 @@ restart:
head->so_accf->so_accept_filter_arg);
so->so_options &= ~SO_ACCEPTFILTER;
ret = head->so_accf->so_accept_filter->accf_callback(so,
- head->so_accf->so_accept_filter_arg, M_DONTWAIT);
+ head->so_accf->so_accept_filter_arg, M_NOWAIT);
if (ret == SU_ISCONNECTED)
soupcall_clear(so, SO_RCV);
SOCK_UNLOCK(so);
@@ -3483,11 +3508,9 @@ soisdisconnecting(struct socket *so)
SOCKBUF_LOCK(&so->so_rcv);
so->so_state &= ~SS_ISCONNECTING;
so->so_state |= SS_ISDISCONNECTING;
- so->so_rcv.sb_state |= SBS_CANTRCVMORE;
- sorwakeup_locked(so);
+ socantrcvmore_locked(so);
SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_state |= SBS_CANTSENDMORE;
- sowwakeup_locked(so);
+ socantsendmore_locked(so);
wakeup(&so->so_timeo);
}
@@ -3502,12 +3525,10 @@ soisdisconnected(struct socket *so)
SOCKBUF_LOCK(&so->so_rcv);
so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_state |= SS_ISDISCONNECTED;
- so->so_rcv.sb_state |= SBS_CANTRCVMORE;
- sorwakeup_locked(so);
+ socantrcvmore_locked(so);
SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_state |= SBS_CANTSENDMORE;
- sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
- sowwakeup_locked(so);
+ sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
+ socantsendmore_locked(so);
wakeup(&so->so_timeo);
}
@@ -3533,7 +3554,7 @@ soupcall_set(struct socket *so, int which,
int (*func)(struct socket *, void *, int), void *arg)
{
struct sockbuf *sb;
-
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3621,9 +3642,10 @@ sotoxsocket(struct socket *so, struct xsocket *xso)
*/
void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg)
+so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
+ void *arg)
{
-
+
TAILQ_FOREACH(so, &so->so_comp, so_list)
func(so, arg);
}
@@ -3743,11 +3765,13 @@ so_sowwakeup_locked(struct socket *so)
void
so_lock(struct socket *so)
{
+
SOCK_LOCK(so);
}
void
so_unlock(struct socket *so)
{
+
SOCK_UNLOCK(so);
}
diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c
index c7a3dbe9..99ae6392 100644
--- a/freebsd/sys/kern/uipc_syscalls.c
+++ b/freebsd/sys/kern/uipc_syscalls.c
@@ -4,9 +4,6 @@
* Copyright (c) 1982, 1986, 1989, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
- * sendfile(2) and related extensions:
- * Copyright (c) 1998, David Greenman. All rights reserved.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -40,37 +37,27 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_capsicum.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_sctp.h>
#include <rtems/bsd/local/opt_compat.h>
#include <rtems/bsd/local/opt_ktrace.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/capability.h>
+#include <sys/capsicum.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/filedesc.h>
-#include <sys/event.h>
#include <sys/proc.h>
-#include <sys/fcntl.h>
-#include <sys/file.h>
#include <sys/filio.h>
#include <sys/jail.h>
-#include <sys/mount.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
-#include <sys/sf_buf.h>
-#include <sys/sysent.h>
+#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
-#include <sys/sysctl.h>
-#include <sys/uio.h>
-#include <sys/vnode.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
@@ -83,103 +70,44 @@ __FBSDID("$FreeBSD$");
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-
-#if defined(INET) || defined(INET6)
-#ifdef SCTP
-#include <netinet/sctp.h>
-#include <netinet/sctp_peeloff.h>
-#endif /* SCTP */
-#endif /* INET || INET6 */
-#ifdef __rtems__
-#include <machine/rtems-bsd-syscall-api.h>
-#include <rtems/bsd/zerocopy.h>
-
-static int kern_bind(struct thread *, int, struct sockaddr *);
-
-static int kern_connect(struct thread *, int, struct sockaddr *);
-
-static int kern_setsockopt( struct thread *td, int s, int level, int name,
- void *val, enum uio_seg valseg, socklen_t valsize);
-
-static int kern_getsockopt( struct thread *td, int s, int level, int name,
- void *val, enum uio_seg valseg, socklen_t *valsize);
-#endif /* __rtems__ */
-
/*
- * Creation flags, OR'ed into socket() and socketpair() type argument.
- * For stable/9, these are supported but not exposed in the header file.
+ * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
+ * and SOCK_NONBLOCK.
*/
-#define SOCK_CLOEXEC 0x10000000
-#define SOCK_NONBLOCK 0x20000000
+#define ACCEPT4_INHERIT 0x1
+#define ACCEPT4_COMPAT 0x2
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
#ifndef __rtems__
-static int accept1(struct thread *td, struct accept_args *uap, int compat);
-static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
+static int accept1(struct thread *td, int s, struct sockaddr *uname,
+ socklen_t *anamelen, int flags);
static int getsockname1(struct thread *td, struct getsockname_args *uap,
int compat);
static int getpeername1(struct thread *td, struct getpeername_args *uap,
int compat);
+#endif /* __rtems__ */
+static int sockargs(struct mbuf **, char *, socklen_t, int);
+#ifndef __rtems__
/*
- * NSFBUFS-related variables and associated sysctls
- */
-int nsfbufs;
-int nsfbufspeak;
-int nsfbufsused;
-
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
- "Maximum number of sendfile(2) sf_bufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
- "Number of sendfile(2) sf_bufs at peak usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
- "Number of sendfile(2) sf_bufs in use");
-
-/*
- * Convert a user file descriptor to a kernel file entry and check that, if
- * it is a capability, the right rights are present. A reference on the file
- * entry is held upon returning.
+ * Convert a user file descriptor to a kernel file entry and check if required
+ * capability rights are present.
+ * A reference on the file entry is held upon returning.
*/
-static int
-getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
+int
+getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp, u_int *fflagp)
{
struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
int error;
-#endif
- fp = NULL;
- if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
- return (EBADF);
-#ifdef CAPABILITIES
- /*
- * If the file descriptor is for a capability, test rights and use
- * the file descriptor referenced by the capability.
- */
- error = cap_funwrap(fp, rights, &fp_fromcap);
- if (error) {
- fdrop(fp, curthread);
+ error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
+ if (error != 0)
return (error);
- }
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, curthread);
- fp = fp_fromcap;
- }
-#endif /* CAPABILITIES */
if (fp->f_type != DTYPE_SOCKET) {
- fdrop(fp, curthread);
+ fdrop(fp, td);
return (ENOTSOCK);
}
if (fflagp != NULL)
@@ -220,7 +148,7 @@ rtems_bsd_getsock(int fd, struct file **fpp, u_int *fflagp)
return (error);
}
-#define getsock_cap(fdp, fd, rights, fpp, fflagp) rtems_bsd_getsock(fd, fpp, fflagp)
+#define getsock_cap(td, fd, rights, fpp, fflagp) rtems_bsd_getsock(fd, fpp, fflagp)
#endif /* __rtems__ */
/*
@@ -242,9 +170,6 @@ sys_socket(td, uap)
int protocol;
} */ *uap;
{
-#ifndef __rtems__
- struct filedesc *fdp;
-#endif /* __rtems__ */
struct socket *so;
struct file *fp;
int fd, error, type, oflag, fflag;
@@ -268,20 +193,17 @@ sys_socket(td, uap)
#ifdef MAC
error = mac_socket_check_create(td->td_ucred, uap->domain, type,
uap->protocol);
- if (error)
+ if (error != 0)
return (error);
#endif
-#ifndef __rtems__
- fdp = td->td_proc->p_fd;
-#endif /* __rtems__ */
error = falloc(td, &fp, &fd, oflag);
- if (error)
+ if (error != 0)
return (error);
/* An extra reference on `fp' has been held for us by falloc(). */
error = socreate(uap->domain, &so, type, uap->protocol,
td->td_ucred, td);
- if (error) {
- fdclose(fdp, fp, fd, td);
+ if (error != 0) {
+ fdclose(td, fp, fd);
} else {
finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
if ((fflag & FNONBLOCK) != 0)
@@ -319,6 +241,9 @@ socket(int domain, int type, int protocol)
/* ARGSUSED */
#ifdef __rtems__
+static int kern_bindat(struct thread *td, int dirfd, int fd,
+ struct sockaddr *sa);
+
static
#endif /* __rtems__ */
int
@@ -333,11 +258,11 @@ sys_bind(td, uap)
struct sockaddr *sa;
int error;
- if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
- return (error);
-
- error = kern_bind(td, uap->s, sa);
- free(sa, M_SONAME);
+ error = getsockaddr(&sa, uap->name, uap->namelen);
+ if (error == 0) {
+ error = kern_bindat(td, AT_FDCWD, uap->s, sa);
+ free(sa, M_SONAME);
+ }
return (error);
}
#ifdef __rtems__
@@ -363,18 +288,18 @@ bind(int socket, const struct sockaddr *address, socklen_t address_len)
#endif /* __rtems__ */
int
-kern_bind(td, fd, sa)
- struct thread *td;
- int fd;
- struct sockaddr *sa;
+kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
- if (error)
+ AUDIT_ARG_SOCKADDR(td, dirfd, sa);
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
#ifdef KTRACE
@@ -383,17 +308,45 @@ kern_bind(td, fd, sa)
#endif
#ifdef MAC
error = mac_socket_check_bind(td->td_ucred, so, sa);
- if (error == 0)
+ if (error == 0) {
+#endif
+ if (dirfd == AT_FDCWD)
+ error = sobind(so, sa, td);
+ else
+ error = sobindat(dirfd, so, sa, td);
+#ifdef MAC
+ }
#endif
- error = sobind(so, sa, td);
fdrop(fp, td);
return (error);
}
/* ARGSUSED */
-#ifdef __rtems__
+#ifndef __rtems__
static
+int
+sys_bindat(td, uap)
+ struct thread *td;
+ struct bindat_args /* {
+ int fd;
+ int s;
+ caddr_t name;
+ int namelen;
+ } */ *uap;
+{
+ struct sockaddr *sa;
+ int error;
+
+ error = getsockaddr(&sa, uap->name, uap->namelen);
+ if (error == 0) {
+ error = kern_bindat(td, uap->fd, uap->s, sa);
+ free(sa, M_SONAME);
+ }
+ return (error);
+}
#endif /* __rtems__ */
+
+/* ARGSUSED */
int
sys_listen(td, uap)
struct thread *td;
@@ -404,10 +357,12 @@ sys_listen(td, uap)
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->s);
- error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
+ error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_LISTEN),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
#ifdef MAC
@@ -441,60 +396,50 @@ listen(int socket, int backlog)
#endif /* __rtems__ */
#ifdef __rtems__
-static int
-kern_accept(struct thread *td, int s, struct sockaddr **name,
- socklen_t *namelen, struct file **fp);
+static int kern_accept4(struct thread *td, int s, struct sockaddr **name,
+ socklen_t *namelen, int flags, struct file **fp);
#endif /* __rtems__ */
/*
* accept1()
*/
static int
-accept1(td, uap, compat)
+accept1(td, s, uname, anamelen, flags)
struct thread *td;
- struct accept_args /* {
- int s;
- struct sockaddr * __restrict name;
- socklen_t * __restrict anamelen;
- } */ *uap;
- int compat;
+ int s;
+ struct sockaddr *uname;
+ socklen_t *anamelen;
+ int flags;
{
struct sockaddr *name;
socklen_t namelen;
struct file *fp;
int error;
- if (uap->name == NULL)
- return (kern_accept(td, uap->s, NULL, NULL, NULL));
+ if (uname == NULL)
+ return (kern_accept4(td, s, NULL, NULL, flags, NULL));
- error = copyin(uap->anamelen, &namelen, sizeof (namelen));
- if (error)
+ error = copyin(anamelen, &namelen, sizeof (namelen));
+ if (error != 0)
return (error);
- error = kern_accept(td, uap->s, &name, &namelen, &fp);
+ error = kern_accept4(td, s, &name, &namelen, flags, &fp);
- /*
- * return a namelen of zero for older code which might
- * ignore the return value from accept.
- */
- if (error) {
- (void) copyout(&namelen,
- uap->anamelen, sizeof(*uap->anamelen));
+ if (error != 0)
return (error);
- }
- if (error == 0 && name != NULL) {
+ if (error == 0 && uname != NULL) {
#ifdef COMPAT_OLDSOCK
- if (compat)
+ if (flags & ACCEPT4_COMPAT)
((struct osockaddr *)name)->sa_family =
name->sa_family;
#endif
- error = copyout(name, uap->name, namelen);
+ error = copyout(name, uname, namelen);
}
if (error == 0)
- error = copyout(&namelen, uap->anamelen,
+ error = copyout(&namelen, anamelen,
sizeof(namelen));
- if (error)
- fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
+ if (error != 0)
+ fdclose(td, fp, td->td_retval[0]);
fdrop(fp, td);
free(name, M_SONAME);
return (error);
@@ -505,15 +450,11 @@ accept(int socket, struct sockaddr *__restrict address,
socklen_t *__restrict address_len)
{
struct thread *td = rtems_bsd_get_curthread_or_null();
- struct accept_args ua = {
- .s = socket,
- .name = address,
- .anamelen = address_len
- };
int error;
if (td != NULL) {
- error = accept1(td, &ua);
+ error = accept1(td, socket, address, address_len,
+ ACCEPT4_INHERIT);
} else {
error = ENOMEM;
}
@@ -526,34 +467,34 @@ accept(int socket, struct sockaddr *__restrict address,
}
#endif /* __rtems__ */
+#ifndef __rtems__
int
kern_accept(struct thread *td, int s, struct sockaddr **name,
socklen_t *namelen, struct file **fp)
{
-#ifndef __rtems__
- struct filedesc *fdp;
+ return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
+}
#endif /* __rtems__ */
+
+int
+kern_accept4(struct thread *td, int s, struct sockaddr **name,
+ socklen_t *namelen, int flags, struct file **fp)
+{
struct file *headfp, *nfp = NULL;
struct sockaddr *sa = NULL;
- int error;
struct socket *head, *so;
- int fd;
+ cap_rights_t rights;
u_int fflag;
pid_t pgid;
- int tmp;
+ int error, fd, tmp;
- if (name) {
+ if (name != NULL)
*name = NULL;
- if (*namelen < 0)
- return (EINVAL);
- }
AUDIT_ARG_FD(s);
-#ifndef __rtems__
- fdp = td->td_proc->p_fd;
-#endif /* __rtems__ */
- error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
- if (error)
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_ACCEPT),
+ &headfp, &fflag);
+ if (error != 0)
return (error);
head = headfp->f_data;
if ((head->so_options & SO_ACCEPTCONN) == 0) {
@@ -565,8 +506,8 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
if (error != 0)
goto done;
#endif
- error = falloc(td, &nfp, &fd, 0);
- if (error)
+ error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0);
+ if (error != 0)
goto done;
ACCEPT_LOCK();
if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
@@ -581,7 +522,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
}
error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
"accept", 0);
- if (error) {
+ if (error != 0) {
ACCEPT_UNLOCK();
goto noconnection;
}
@@ -606,7 +547,10 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
+ if (flags & ACCEPT4_INHERIT)
+ so->so_state |= (head->so_state & SS_NBIO);
+ else
+ so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
@@ -619,9 +563,15 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
/* connection has been removed from the listen queue */
KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
- pgid = fgetown(&head->so_sigio);
- if (pgid != 0)
- fsetown(pgid, &so->so_sigio);
+ if (flags & ACCEPT4_INHERIT) {
+ pgid = fgetown(&head->so_sigio);
+ if (pgid != 0)
+ fsetown(pgid, &so->so_sigio);
+ } else {
+ fflag &= ~(FNONBLOCK | FASYNC);
+ if (flags & SOCK_NONBLOCK)
+ fflag |= FNONBLOCK;
+ }
finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
/* Sync socket nonblocking/async state with file flags */
@@ -629,22 +579,16 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
- sa = 0;
+ sa = NULL;
error = soaccept(so, &sa);
- if (error) {
- /*
- * return a namelen of zero for older code which might
- * ignore the return value from accept.
- */
- if (name)
- *namelen = 0;
+ if (error != 0)
goto noconnection;
- }
if (sa == NULL) {
if (name)
*namelen = 0;
goto done;
}
+ AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
if (name) {
/* check sa_len before it is destroyed */
if (*namelen > sa->sa_len)
@@ -657,15 +601,14 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
sa = NULL;
}
noconnection:
- if (sa)
- free(sa, M_SONAME);
+ free(sa, M_SONAME);
/*
* close the new descriptor, assuming someone hasn't ripped it
* out from under us.
*/
- if (error)
- fdclose(fdp, nfp, fd, td);
+ if (error != 0)
+ fdclose(td, nfp, fd);
/*
* Release explicitly held references before returning. We return
@@ -692,7 +635,19 @@ sys_accept(td, uap)
struct accept_args *uap;
{
- return (accept1(td, uap, 0));
+ return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
+}
+
+int
+sys_accept4(td, uap)
+ struct thread *td;
+ struct accept4_args *uap;
+{
+
+ if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return (EINVAL);
+
+ return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
}
#ifdef COMPAT_OLDSOCK
@@ -702,13 +657,17 @@ oaccept(td, uap)
struct accept_args *uap;
{
- return (accept1(td, uap, 1));
+ return (accept1(td, uap->s, uap->name, uap->anamelen,
+ ACCEPT4_INHERIT | ACCEPT4_COMPAT));
}
#endif /* COMPAT_OLDSOCK */
#endif /* __rtems__ */
/* ARGSUSED */
#ifdef __rtems__
+static int kern_connectat(struct thread *td, int dirfd, int fd,
+ struct sockaddr *sa);
+
static
#endif /* __rtems__ */
int
@@ -724,11 +683,10 @@ sys_connect(td, uap)
int error;
error = getsockaddr(&sa, uap->name, uap->namelen);
- if (error)
- return (error);
-
- error = kern_connect(td, uap->s, sa);
- free(sa, M_SONAME);
+ if (error == 0) {
+ error = kern_connectat(td, AT_FDCWD, uap->s, sa);
+ free(sa, M_SONAME);
+ }
return (error);
}
#ifdef __rtems__
@@ -753,21 +711,19 @@ connect(int socket, const struct sockaddr *address, socklen_t address_len)
}
#endif /* __rtems__ */
-
int
-kern_connect(td, fd, sa)
- struct thread *td;
- int fd;
- struct sockaddr *sa;
+kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
{
struct socket *so;
struct file *fp;
- int error;
- int interrupted = 0;
+ cap_rights_t rights;
+ int error, interrupted = 0;
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
- if (error)
+ AUDIT_ARG_SOCKADDR(td, dirfd, sa);
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_CONNECT),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
if (so->so_state & SS_ISCONNECTING) {
@@ -780,11 +736,14 @@ kern_connect(td, fd, sa)
#endif
#ifdef MAC
error = mac_socket_check_connect(td->td_ucred, so, sa);
- if (error)
+ if (error != 0)
goto bad;
#endif
- error = soconnect(so, sa, td);
- if (error)
+ if (dirfd == AT_FDCWD)
+ error = soconnect(so, sa, td);
+ else
+ error = soconnectat(dirfd, so, sa, td);
+ if (error != 0)
goto bad;
if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
error = EINPROGRESS;
@@ -794,7 +753,7 @@ kern_connect(td, fd, sa)
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
"connec", 0);
- if (error) {
+ if (error != 0) {
if (error == EINTR || error == ERESTART)
interrupted = 1;
break;
@@ -815,13 +774,34 @@ done1:
return (error);
}
+#ifndef __rtems__
+/* ARGSUSED */
+int
+sys_connectat(td, uap)
+ struct thread *td;
+ struct connectat_args /* {
+ int fd;
+ int s;
+ caddr_t name;
+ int namelen;
+ } */ *uap;
+{
+ struct sockaddr *sa;
+ int error;
+
+ error = getsockaddr(&sa, uap->name, uap->namelen);
+ if (error == 0) {
+ error = kern_connectat(td, uap->fd, uap->s, sa);
+ free(sa, M_SONAME);
+ }
+ return (error);
+}
+#endif /* __rtems__ */
+
int
kern_socketpair(struct thread *td, int domain, int type, int protocol,
int *rsv)
{
-#ifndef __rtems__
- struct filedesc *fdp = td->td_proc->p_fd;
-#endif /* __rtems__ */
struct file *fp1, *fp2;
struct socket *so1, *so2;
int fd, error, oflag, fflag;
@@ -844,35 +824,35 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol,
/* We might want to have a separate check for socket pairs. */
error = mac_socket_check_create(td->td_ucred, domain, type,
protocol);
- if (error)
+ if (error != 0)
return (error);
#endif
error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
- if (error)
+ if (error != 0)
return (error);
error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
- if (error)
+ if (error != 0)
goto free1;
/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
error = falloc(td, &fp1, &fd, oflag);
- if (error)
+ if (error != 0)
goto free2;
rsv[0] = fd;
fp1->f_data = so1; /* so1 already has ref count */
error = falloc(td, &fp2, &fd, oflag);
- if (error)
+ if (error != 0)
goto free3;
fp2->f_data = so2; /* so2 already has ref count */
rsv[1] = fd;
error = soconnect2(so1, so2);
- if (error)
+ if (error != 0)
goto free4;
if (type == SOCK_DGRAM) {
/*
* Datagram socket connection is asymmetric.
*/
error = soconnect2(so2, so1);
- if (error)
+ if (error != 0)
goto free4;
}
finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
@@ -887,10 +867,10 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol,
fdrop(fp2, td);
return (0);
free4:
- fdclose(fdp, fp2, rsv[1], td);
+ fdclose(td, fp2, rsv[1]);
fdrop(fp2, td);
free3:
- fdclose(fdp, fp1, rsv[0], td);
+ fdclose(td, fp1, rsv[0]);
fdrop(fp1, td);
free2:
if (so2 != NULL)
@@ -916,11 +896,11 @@ sys_socketpair(struct thread *td, struct socketpair_args *uap)
error = kern_socketpair(td, uap->domain, uap->type,
uap->protocol, sv);
- if (error)
+ if (error != 0)
return (error);
#ifndef __rtems__
error = copyout(sv, uap->rsv, 2 * sizeof(int));
- if (error) {
+ if (error != 0) {
(void)kern_close(td, sv[0]);
(void)kern_close(td, sv[1]);
}
@@ -973,7 +953,7 @@ sendit(td, s, mp, flags)
if (mp->msg_name != NULL) {
error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
- if (error) {
+ if (error != 0) {
to = NULL;
goto bad;
}
@@ -993,13 +973,13 @@ sendit(td, s, mp, flags)
}
error = sockargs(&control, mp->msg_control,
mp->msg_controllen, MT_CONTROL);
- if (error)
+ if (error != 0)
goto bad;
#ifdef COMPAT_OLDSOCK
if (mp->msg_flags == MSG_COMPAT) {
struct cmsghdr *cm;
- M_PREPEND(control, sizeof(*cm), M_WAIT);
+ M_PREPEND(control, sizeof(*cm), M_WAITOK);
cm = mtod(control, struct cmsghdr *);
cm->cmsg_len = control->m_len;
cm->cmsg_level = SOL_SOCKET;
@@ -1013,8 +993,7 @@ sendit(td, s, mp, flags)
error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
bad:
- if (to)
- free(to, M_SONAME);
+ free(to, M_SONAME);
return (error);
}
@@ -1031,20 +1010,24 @@ kern_sendit(td, s, mp, flags, control, segflg)
struct uio auio;
struct iovec *iov;
struct socket *so;
- int i, error;
- ssize_t len;
#ifndef __rtems__
cap_rights_t rights;
#endif /* __rtems__ */
#ifdef KTRACE
struct uio *ktruio = NULL;
#endif
+ ssize_t len;
+ int i, error;
AUDIT_ARG_FD(s);
#ifndef __rtems__
- rights = CAP_WRITE;
- if (mp->msg_name != NULL)
- rights |= CAP_CONNECT;
+ cap_rights_init(&rights, CAP_SEND);
+ if (mp->msg_name != NULL) {
+ AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
+ cap_rights_set(&rights, CAP_CONNECT);
+ }
+ error = getsock_cap(td, s, &rights, &fp, NULL);
+ if (error != 0)
#endif /* __rtems__ */
error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
if (error)
@@ -1059,11 +1042,11 @@ kern_sendit(td, s, mp, flags, control, segflg)
if (mp->msg_name != NULL) {
error = mac_socket_check_connect(td->td_ucred, so,
mp->msg_name);
- if (error)
+ if (error != 0)
goto bad;
}
error = mac_socket_check_send(td->td_ucred, so);
- if (error)
+ if (error != 0)
goto bad;
#endif
@@ -1087,7 +1070,7 @@ kern_sendit(td, s, mp, flags, control, segflg)
#endif
len = auio.uio_resid;
error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
- if (error) {
+ if (error != 0) {
if (auio.uio_resid != len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
error = 0;
@@ -1133,7 +1116,6 @@ sys_sendto(td, uap)
{
struct msghdr msg;
struct iovec aiov;
- int error;
msg.msg_name = uap->to;
msg.msg_namelen = uap->tolen;
@@ -1145,8 +1127,7 @@ sys_sendto(td, uap)
#endif
aiov.iov_base = uap->buf;
aiov.iov_len = uap->len;
- error = sendit(td, uap->s, &msg, uap->flags);
- return (error);
+ return (sendit(td, uap->s, &msg, uap->flags));
}
#ifdef __rtems__
ssize_t
@@ -1216,7 +1197,6 @@ osend(td, uap)
{
struct msghdr msg;
struct iovec aiov;
- int error;
msg.msg_name = 0;
msg.msg_namelen = 0;
@@ -1226,8 +1206,7 @@ osend(td, uap)
aiov.iov_len = uap->len;
msg.msg_control = 0;
msg.msg_flags = 0;
- error = sendit(td, uap->s, &msg, uap->flags);
- return (error);
+ return (sendit(td, uap->s, &msg, uap->flags));
}
int
@@ -1244,10 +1223,10 @@ osendmsg(td, uap)
int error;
error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_iov = iov;
msg.msg_flags = MSG_COMPAT;
@@ -1275,10 +1254,10 @@ sys_sendmsg(td, uap)
int error;
error = copyin(uap->msg, &msg, sizeof (msg));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_iov = iov;
#ifdef COMPAT_OLDSOCK
@@ -1327,30 +1306,31 @@ kern_recvit(td, s, mp, fromseg, controlp)
{
struct uio auio;
struct iovec *iov;
- int i;
- ssize_t len;
- int error;
- struct mbuf *m, *control = 0;
+ struct mbuf *m, *control = NULL;
caddr_t ctlbuf;
struct file *fp;
struct socket *so;
- struct sockaddr *fromsa = 0;
+ struct sockaddr *fromsa = NULL;
+ cap_rights_t rights;
#ifdef KTRACE
struct uio *ktruio = NULL;
#endif
+ ssize_t len;
+ int error, i;
if (controlp != NULL)
*controlp = NULL;
AUDIT_ARG_FD(s);
- error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
- if (error)
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_RECV),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
#ifdef MAC
error = mac_socket_check_receive(td->td_ucred, so);
- if (error) {
+ if (error != 0) {
fdrop(fp, td);
return (error);
}
@@ -1375,26 +1355,28 @@ kern_recvit(td, s, mp, fromseg, controlp)
ktruio = cloneuio(&auio);
#endif
len = auio.uio_resid;
- error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
- (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
+ error = soreceive(so, &fromsa, &auio, NULL,
+ (mp->msg_control || controlp) ? &control : NULL,
&mp->msg_flags);
- if (error) {
+ if (error != 0) {
if (auio.uio_resid != len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
error = 0;
}
+ if (fromsa != NULL)
+ AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
#ifdef KTRACE
if (ktruio != NULL) {
ktruio->uio_resid = len - auio.uio_resid;
ktrgenio(s, UIO_READ, ktruio, error);
}
#endif
- if (error)
+ if (error != 0)
goto out;
td->td_retval[0] = len - auio.uio_resid;
if (mp->msg_name) {
len = mp->msg_namelen;
- if (len <= 0 || fromsa == 0)
+ if (len <= 0 || fromsa == NULL)
len = 0;
else {
/* save sa_len before it is destroyed by MSG_COMPAT */
@@ -1407,7 +1389,7 @@ kern_recvit(td, s, mp, fromseg, controlp)
if (fromseg == UIO_USERSPACE) {
error = copyout(fromsa, mp->msg_name,
(unsigned)len);
- if (error)
+ if (error != 0)
goto out;
} else
bcopy(fromsa, mp->msg_name, len);
@@ -1466,10 +1448,9 @@ out:
if (fromsa && KTRPOINT(td, KTR_STRUCT))
ktrsockaddr(fromsa);
#endif
- if (fromsa)
- free(fromsa, M_SONAME);
+ free(fromsa, M_SONAME);
- if (error == 0 && controlp != NULL)
+ if (error == 0 && controlp != NULL)
*controlp = control;
else if (control)
m_freem(control);
@@ -1487,9 +1468,9 @@ recvit(td, s, mp, namelenp)
int error;
error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
- if (error)
+ if (error != 0)
return (error);
- if (namelenp) {
+ if (namelenp != NULL) {
error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
#ifdef COMPAT_OLDSOCK
if (mp->msg_flags & MSG_COMPAT)
@@ -1521,7 +1502,7 @@ sys_recvfrom(td, uap)
if (uap->fromlenaddr) {
error = copyin(uap->fromlenaddr,
&msg.msg_namelen, sizeof (msg.msg_namelen));
- if (error)
+ if (error != 0)
goto done2;
} else {
msg.msg_namelen = 0;
@@ -1535,7 +1516,7 @@ sys_recvfrom(td, uap)
msg.msg_flags = uap->flags;
error = recvit(td, uap->s, &msg, uap->fromlenaddr);
done2:
- return(error);
+ return (error);
}
#ifdef __rtems__
ssize_t
@@ -1593,7 +1574,6 @@ orecv(td, uap)
{
struct msghdr msg;
struct iovec aiov;
- int error;
msg.msg_name = 0;
msg.msg_namelen = 0;
@@ -1603,8 +1583,7 @@ orecv(td, uap)
aiov.iov_len = uap->len;
msg.msg_control = 0;
msg.msg_flags = uap->flags;
- error = recvit(td, uap->s, &msg, NULL);
- return (error);
+ return (recvit(td, uap->s, &msg, NULL));
}
/*
@@ -1626,10 +1605,10 @@ orecvmsg(td, uap)
int error;
error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_flags = uap->flags | MSG_COMPAT;
msg.msg_iov = iov;
@@ -1660,10 +1639,10 @@ sys_recvmsg(td, uap)
int error;
error = copyin(uap->msg, &msg, sizeof (msg));
- if (error)
+ if (error != 0)
return (error);
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
- if (error)
+ if (error != 0)
return (error);
msg.msg_flags = uap->flags;
#ifdef COMPAT_OLDSOCK
@@ -1719,14 +1698,26 @@ sys_shutdown(td, uap)
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
int error;
AUDIT_ARG_FD(uap->s);
- error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
- NULL);
+ error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_SHUTDOWN),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = soshutdown(so, uap->how);
+#ifndef __rtems__
+ /*
+ * Previous versions did not return ENOTCONN, but 0 in
+ * case the socket was not connected. Some important
+ * programs like syslogd up to r279016, 2015-02-19,
+ * still depend on this behavior.
+ */
+ if (error == ENOTCONN &&
+ td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN)
+ error = 0;
+#endif /* __rtems__ */
fdrop(fp, td);
}
return (error);
@@ -1747,6 +1738,9 @@ shutdown(int socket, int how)
/* ARGSUSED */
#ifdef __rtems__
+static int kern_setsockopt( struct thread *td, int s, int level, int name,
+ void *val, enum uio_seg valseg, socklen_t valsize);
+
static
#endif /* __rtems__ */
int
@@ -1799,10 +1793,11 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize)
enum uio_seg valseg;
socklen_t valsize;
{
- int error;
struct socket *so;
struct file *fp;
struct sockopt sopt;
+ cap_rights_t rights;
+ int error;
if (val == NULL && valsize != 0)
return (EFAULT);
@@ -1826,7 +1821,8 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize)
}
AUDIT_ARG_FD(s);
- error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SETSOCKOPT),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = sosetopt(so, &sopt);
@@ -1837,6 +1833,9 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize)
/* ARGSUSED */
#ifdef __rtems__
+static int kern_getsockopt( struct thread *td, int s, int level, int name,
+ void *val, enum uio_seg valseg, socklen_t *valsize);
+
static
#endif /* __rtems__ */
int
@@ -1851,11 +1850,11 @@ sys_getsockopt(td, uap)
} */ *uap;
{
socklen_t valsize;
- int error;
+ int error;
if (uap->val) {
error = copyin(uap->avalsize, &valsize, sizeof (valsize));
- if (error)
+ if (error != 0)
return (error);
}
@@ -1905,10 +1904,11 @@ kern_getsockopt(td, s, level, name, val, valseg, valsize)
enum uio_seg valseg;
socklen_t *valsize;
{
- int error;
- struct socket *so;
+ struct socket *so;
struct file *fp;
- struct sockopt sopt;
+ struct sockopt sopt;
+ cap_rights_t rights;
+ int error;
if (val == NULL)
*valsize = 0;
@@ -1932,7 +1932,8 @@ kern_getsockopt(td, s, level, name, val, valseg, valsize)
}
AUDIT_ARG_FD(s);
- error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
+ error = getsock_cap(td, s, cap_rights_init(&rights, CAP_GETSOCKOPT),
+ &fp, NULL);
if (error == 0) {
so = fp->f_data;
error = sogetopt(so, &sopt);
@@ -1966,11 +1967,11 @@ getsockname1(td, uap, compat)
int error;
error = copyin(uap->alen, &len, sizeof(len));
- if (error)
+ if (error != 0)
return (error);
error = kern_getsockname(td, uap->fdes, &sa, &len);
- if (error)
+ if (error != 0)
return (error);
if (len != 0) {
@@ -2014,22 +2015,21 @@ kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
socklen_t len;
int error;
- if (*alen < 0)
- return (EINVAL);
-
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
- if (error)
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETSOCKNAME),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
*sa = NULL;
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
CURVNET_RESTORE();
- if (error)
+ if (error != 0)
goto bad;
if (*sa == NULL)
len = 0;
@@ -2042,7 +2042,7 @@ kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
#endif
bad:
fdrop(fp, td);
- if (error && *sa) {
+ if (error != 0 && *sa != NULL) {
free(*sa, M_SONAME);
*sa = NULL;
}
@@ -2095,11 +2095,11 @@ getpeername1(td, uap, compat)
int error;
error = copyin(uap->alen, &len, sizeof (len));
- if (error)
+ if (error != 0)
return (error);
error = kern_getpeername(td, uap->fdes, &sa, &len);
- if (error)
+ if (error != 0)
return (error);
if (len != 0) {
@@ -2143,15 +2143,14 @@ kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
{
struct socket *so;
struct file *fp;
+ cap_rights_t rights;
socklen_t len;
int error;
- if (*alen < 0)
- return (EINVAL);
-
AUDIT_ARG_FD(fd);
- error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
- if (error)
+ error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETPEERNAME),
+ &fp, NULL);
+ if (error != 0)
return (error);
so = fp->f_data;
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
@@ -2162,7 +2161,7 @@ kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
CURVNET_RESTORE();
- if (error)
+ if (error != 0)
goto bad;
if (*sa == NULL)
len = 0;
@@ -2174,7 +2173,7 @@ kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
ktrsockaddr(*sa);
#endif
bad:
- if (error && *sa) {
+ if (error != 0 && *sa != NULL) {
free(*sa, M_SONAME);
*sa = NULL;
}
@@ -2206,31 +2205,26 @@ ogetpeername(td, uap)
#endif /* COMPAT_OLDSOCK */
#endif /* __rtems__ */
-int
-sockargs(mp, buf, buflen, type)
- struct mbuf **mp;
- caddr_t buf;
- int buflen, type;
+static int
+sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
{
struct sockaddr *sa;
struct mbuf *m;
int error;
- if ((u_int)buflen > MLEN) {
+ if (buflen > MLEN) {
#ifdef COMPAT_OLDSOCK
- if (type == MT_SONAME && (u_int)buflen <= 112)
+ if (type == MT_SONAME && buflen <= 112)
buflen = MLEN; /* unix domain compat. hack */
else
#endif
- if ((u_int)buflen > MCLBYTES)
+ if (buflen > MCLBYTES)
return (EINVAL);
}
- m = m_get(M_WAIT, type);
- if ((u_int)buflen > MLEN)
- MCLGET(m, M_WAIT);
+ m = m_get2(buflen, M_WAITOK, type, 0);
m->m_len = buflen;
- error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
- if (error)
+ error = copyin(buf, mtod(m, void *), buflen);
+ if (error != 0)
(void) m_free(m);
else {
*mp = m;
@@ -2262,7 +2256,7 @@ getsockaddr(namp, uaddr, len)
return (EINVAL);
sa = malloc(len, M_SONAME, M_WAITOK);
error = copyin(uaddr, sa, len);
- if (error) {
+ if (error != 0) {
free(sa, M_SONAME);
} else {
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
@@ -2274,1100 +2268,3 @@ getsockaddr(namp, uaddr, len)
}
return (error);
}
-
-#ifndef __rtems__
-#include <sys/condvar.h>
-
-struct sendfile_sync {
- struct mtx mtx;
- struct cv cv;
- unsigned count;
-};
-
-/*
- * Detach mapped page and release resources back to the system.
- */
-void
-sf_buf_mext(void *addr, void *args)
-{
- vm_page_t m;
- struct sendfile_sync *sfs;
-
- m = sf_buf_page(args);
- sf_buf_free(args);
- vm_page_lock(m);
- vm_page_unwire(m, 0);
- /*
- * Check for the object going away on us. This can
- * happen since we don't hold a reference to it.
- * If so, we're responsible for freeing the page.
- */
- if (m->wire_count == 0 && m->object == NULL)
- vm_page_free(m);
- vm_page_unlock(m);
- if (addr == NULL)
- return;
- sfs = addr;
- mtx_lock(&sfs->mtx);
- KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
- if (--sfs->count == 0)
- cv_signal(&sfs->cv);
- mtx_unlock(&sfs->mtx);
-}
-
-/*
- * sendfile(2)
- *
- * int sendfile(int fd, int s, off_t offset, size_t nbytes,
- * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
- *
- * Send a file specified by 'fd' and starting at 'offset' to a socket
- * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
- * 0. Optionally add a header and/or trailer to the socket output. If
- * specified, write the total number of bytes sent into *sbytes.
- */
-int
-sys_sendfile(struct thread *td, struct sendfile_args *uap)
-{
-
- return (do_sendfile(td, uap, 0));
-}
-
-static int
-do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
-{
- struct sf_hdtr hdtr;
- struct uio *hdr_uio, *trl_uio;
- int error;
-
- hdr_uio = trl_uio = NULL;
-
- if (uap->hdtr != NULL) {
- error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
- if (error)
- goto out;
- if (hdtr.headers != NULL) {
- error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
- if (error)
- goto out;
- }
- if (hdtr.trailers != NULL) {
- error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
- if (error)
- goto out;
-
- }
- }
-
- error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
-out:
- if (hdr_uio)
- free(hdr_uio, M_IOV);
- if (trl_uio)
- free(trl_uio, M_IOV);
- return (error);
-}
-
-#ifdef COMPAT_FREEBSD4
-int
-freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
-{
- struct sendfile_args args;
-
- args.fd = uap->fd;
- args.s = uap->s;
- args.offset = uap->offset;
- args.nbytes = uap->nbytes;
- args.hdtr = uap->hdtr;
- args.sbytes = uap->sbytes;
- args.flags = uap->flags;
-
- return (do_sendfile(td, &args, 1));
-}
-#endif /* COMPAT_FREEBSD4 */
-
-int
-kern_sendfile(struct thread *td, struct sendfile_args *uap,
- struct uio *hdr_uio, struct uio *trl_uio, int compat)
-{
- struct file *sock_fp;
- struct vnode *vp;
- struct vm_object *obj = NULL;
- struct socket *so = NULL;
- struct mbuf *m = NULL;
- struct sf_buf *sf;
- struct vm_page *pg;
- struct vattr va;
- off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
- int error, hdrlen = 0, mnw = 0;
- int vfslocked;
- int bsize;
- struct sendfile_sync *sfs = NULL;
-
- /*
- * The file descriptor must be a regular file and have a
- * backing VM object.
- * File offset must be positive. If it goes beyond EOF
- * we send only the header/trailer and no payload data.
- */
- AUDIT_ARG_FD(uap->fd);
- if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
- goto out;
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- vn_lock(vp, LK_SHARED | LK_RETRY);
- if (vp->v_type == VREG) {
- bsize = vp->v_mount->mnt_stat.f_iosize;
- if (uap->nbytes == 0) {
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0) {
- VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
- obj = NULL;
- goto out;
- }
- rem = va.va_size;
- } else
- rem = uap->nbytes;
- obj = vp->v_object;
- if (obj != NULL) {
- /*
- * Temporarily increase the backing VM
- * object's reference count so that a forced
- * reclamation of its vnode does not
- * immediately destroy it.
- */
- VM_OBJECT_LOCK(obj);
- if ((obj->flags & OBJ_DEAD) == 0) {
- vm_object_reference_locked(obj);
- VM_OBJECT_UNLOCK(obj);
- } else {
- VM_OBJECT_UNLOCK(obj);
- obj = NULL;
- }
- }
- } else
- bsize = 0; /* silence gcc */
- VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
- if (obj == NULL) {
- error = EINVAL;
- goto out;
- }
- if (uap->offset < 0) {
- error = EINVAL;
- goto out;
- }
-
- /*
- * The socket must be a stream socket and connected.
- * Remember if it a blocking or non-blocking socket.
- */
- if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
- &sock_fp, NULL)) != 0)
- goto out;
- so = sock_fp->f_data;
- if (so->so_type != SOCK_STREAM) {
- error = EINVAL;
- goto out;
- }
- if ((so->so_state & SS_ISCONNECTED) == 0) {
- error = ENOTCONN;
- goto out;
- }
- /*
- * Do not wait on memory allocations but return ENOMEM for
- * caller to retry later.
- * XXX: Experimental.
- */
- if (uap->flags & SF_MNOWAIT)
- mnw = 1;
-
- if (uap->flags & SF_SYNC) {
- sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
- mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
- cv_init(&sfs->cv, "sendfile");
- }
-
-#ifdef MAC
- error = mac_socket_check_send(td->td_ucred, so);
- if (error)
- goto out;
-#endif
-
- /* If headers are specified copy them into mbufs. */
- if (hdr_uio != NULL) {
- hdr_uio->uio_td = td;
- hdr_uio->uio_rw = UIO_WRITE;
- if (hdr_uio->uio_resid > 0) {
- /*
- * In FBSD < 5.0 the nbytes to send also included
- * the header. If compat is specified subtract the
- * header size from nbytes.
- */
- if (compat) {
- if (uap->nbytes > hdr_uio->uio_resid)
- uap->nbytes -= hdr_uio->uio_resid;
- else
- uap->nbytes = 0;
- }
- m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
- 0, 0, 0);
- if (m == NULL) {
- error = mnw ? EAGAIN : ENOBUFS;
- goto out;
- }
- hdrlen = m_length(m, NULL);
- }
- }
-
- /*
- * Protect against multiple writers to the socket.
- *
- * XXXRW: Historically this has assumed non-interruptibility, so now
- * we implement that, but possibly shouldn't.
- */
- (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
-
- /*
- * Loop through the pages of the file, starting with the requested
- * offset. Get a file page (do I/O if necessary), map the file page
- * into an sf_buf, attach an mbuf header to the sf_buf, and queue
- * it on the socket.
- * This is done in two loops. The inner loop turns as many pages
- * as it can, up to available socket buffer space, without blocking
- * into mbufs to have it bulk delivered into the socket send buffer.
- * The outer loop checks the state and available space of the socket
- * and takes care of the overall progress.
- */
- for (off = uap->offset; ; ) {
- struct mbuf *mtail;
- int loopbytes;
- int space;
- int done;
-
- if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
- (uap->nbytes == 0 && va.va_size == fsbytes))
- break;
-
- mtail = NULL;
- loopbytes = 0;
- space = 0;
- done = 0;
-
- /*
- * Check the socket state for ongoing connection,
- * no errors and space in socket buffer.
- * If space is low allow for the remainder of the
- * file to be processed if it fits the socket buffer.
- * Otherwise block in waiting for sufficient space
- * to proceed, or if the socket is nonblocking, return
- * to userland with EAGAIN while reporting how far
- * we've come.
- * We wait until the socket buffer has significant free
- * space to do bulk sends. This makes good use of file
- * system read ahead and allows packet segmentation
- * offloading hardware to take over lots of work. If
- * we were not careful here we would send off only one
- * sfbuf at a time.
- */
- SOCKBUF_LOCK(&so->so_snd);
- if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
- so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
-retry_space:
- if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
- error = EPIPE;
- SOCKBUF_UNLOCK(&so->so_snd);
- goto done;
- } else if (so->so_error) {
- error = so->so_error;
- so->so_error = 0;
- SOCKBUF_UNLOCK(&so->so_snd);
- goto done;
- }
- space = sbspace(&so->so_snd);
- if (space < rem &&
- (space <= 0 ||
- space < so->so_snd.sb_lowat)) {
- if (so->so_state & SS_NBIO) {
- SOCKBUF_UNLOCK(&so->so_snd);
- error = EAGAIN;
- goto done;
- }
- /*
- * sbwait drops the lock while sleeping.
- * When we loop back to retry_space the
- * state may have changed and we retest
- * for it.
- */
- error = sbwait(&so->so_snd);
- /*
- * An error from sbwait usually indicates that we've
- * been interrupted by a signal. If we've sent anything
- * then return bytes sent, otherwise return the error.
- */
- if (error) {
- SOCKBUF_UNLOCK(&so->so_snd);
- goto done;
- }
- goto retry_space;
- }
- SOCKBUF_UNLOCK(&so->so_snd);
-
- /*
- * Reduce space in the socket buffer by the size of
- * the header mbuf chain.
- * hdrlen is set to 0 after the first loop.
- */
- space -= hdrlen;
-
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- error = vn_lock(vp, LK_SHARED);
- if (error != 0) {
- VFS_UNLOCK_GIANT(vfslocked);
- goto done;
- }
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0 || off >= va.va_size) {
- VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
- goto done;
- }
- VFS_UNLOCK_GIANT(vfslocked);
-
- /*
- * Loop and construct maximum sized mbuf chain to be bulk
- * dumped into socket buffer.
- */
- while (space > loopbytes) {
- vm_pindex_t pindex;
- vm_offset_t pgoff;
- struct mbuf *m0;
-
- /*
- * Calculate the amount to transfer.
- * Not to exceed a page, the EOF,
- * or the passed in nbytes.
- */
- pgoff = (vm_offset_t)(off & PAGE_MASK);
- rem = va.va_size - uap->offset;
- if (uap->nbytes != 0)
- rem = omin(rem, uap->nbytes);
- rem -= fsbytes + loopbytes;
- xfsize = omin(PAGE_SIZE - pgoff, rem);
- xfsize = omin(space - loopbytes, xfsize);
- if (xfsize <= 0) {
- done = 1; /* all data sent */
- break;
- }
-
- /*
- * Attempt to look up the page. Allocate
- * if not found or wait and loop if busy.
- */
- pindex = OFF_TO_IDX(off);
- VM_OBJECT_LOCK(obj);
- pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
- VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
-
- /*
- * Check if page is valid for what we need,
- * otherwise initiate I/O.
- * If we already turned some pages into mbufs,
- * send them off before we come here again and
- * block.
- */
- if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
- VM_OBJECT_UNLOCK(obj);
- else if (m != NULL)
- error = EAGAIN; /* send what we already got */
- else if (uap->flags & SF_NODISKIO)
- error = EBUSY;
- else {
- ssize_t resid;
-
- VM_OBJECT_UNLOCK(obj);
-
- /*
- * Get the page from backing store.
- * XXXMAC: Because we don't have fp->f_cred
- * here, we pass in NOCRED. This is probably
- * wrong, but is consistent with our original
- * implementation.
- */
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
- trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
- IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
- td->td_ucred, NOCRED, &resid, td);
- VFS_UNLOCK_GIANT(vfslocked);
- if (error)
- VM_OBJECT_LOCK(obj);
- mbstat.sf_iocnt++;
- }
- if (error) {
- vm_page_lock(pg);
- vm_page_unwire(pg, 0);
- /*
- * See if anyone else might know about
- * this page. If not and it is not valid,
- * then free it.
- */
- if (pg->wire_count == 0 && pg->valid == 0 &&
- pg->busy == 0 && !(pg->oflags & VPO_BUSY))
- vm_page_free(pg);
- vm_page_unlock(pg);
- VM_OBJECT_UNLOCK(obj);
- if (error == EAGAIN)
- error = 0; /* not a real error */
- break;
- }
-
- /*
- * Get a sendfile buf. When allocating the
- * first buffer for mbuf chain, we usually
- * wait as long as necessary, but this wait
- * can be interrupted. For consequent
- * buffers, do not sleep, since several
- * threads might exhaust the buffers and then
- * deadlock.
- */
- sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
- SFB_CATCH);
- if (sf == NULL) {
- mbstat.sf_allocfail++;
- vm_page_lock(pg);
- vm_page_unwire(pg, 0);
- KASSERT(pg->object != NULL,
- ("kern_sendfile: object disappeared"));
- vm_page_unlock(pg);
- if (m == NULL)
- error = (mnw ? EAGAIN : EINTR);
- break;
- }
-
- /*
- * Get an mbuf and set it up as having
- * external storage.
- */
- m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
- if (m0 == NULL) {
- error = (mnw ? EAGAIN : ENOBUFS);
- sf_buf_mext(NULL, sf);
- break;
- }
- MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
- sfs, sf, M_RDONLY, EXT_SFBUF);
- m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
- m0->m_len = xfsize;
-
- /* Append to mbuf chain. */
- if (mtail != NULL)
- mtail->m_next = m0;
- else if (m != NULL)
- m_last(m)->m_next = m0;
- else
- m = m0;
- mtail = m0;
-
- /* Keep track of bits processed. */
- loopbytes += xfsize;
- off += xfsize;
-
- if (sfs != NULL) {
- mtx_lock(&sfs->mtx);
- sfs->count++;
- mtx_unlock(&sfs->mtx);
- }
- }
-
- VOP_UNLOCK(vp, 0);
-
- /* Add the buffer chain to the socket buffer. */
- if (m != NULL) {
- int mlen, err;
-
- mlen = m_length(m, NULL);
- SOCKBUF_LOCK(&so->so_snd);
- if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
- error = EPIPE;
- SOCKBUF_UNLOCK(&so->so_snd);
- goto done;
- }
- SOCKBUF_UNLOCK(&so->so_snd);
- CURVNET_SET(so->so_vnet);
- /* Avoid error aliasing. */
- err = (*so->so_proto->pr_usrreqs->pru_send)
- (so, 0, m, NULL, NULL, td);
- CURVNET_RESTORE();
- if (err == 0) {
- /*
- * We need two counters to get the
- * file offset and nbytes to send
- * right:
- * - sbytes contains the total amount
- * of bytes sent, including headers.
- * - fsbytes contains the total amount
- * of bytes sent from the file.
- */
- sbytes += mlen;
- fsbytes += mlen;
- if (hdrlen) {
- fsbytes -= hdrlen;
- hdrlen = 0;
- }
- } else if (error == 0)
- error = err;
- m = NULL; /* pru_send always consumes */
- }
-
- /* Quit outer loop on error or when we're done. */
- if (done)
- break;
- if (error)
- goto done;
- }
-
- /*
- * Send trailers. Wimp out and use writev(2).
- */
- if (trl_uio != NULL) {
- sbunlock(&so->so_snd);
- error = kern_writev(td, uap->s, trl_uio);
- if (error == 0)
- sbytes += td->td_retval[0];
- goto out;
- }
-
-done:
- sbunlock(&so->so_snd);
-out:
- /*
- * If there was no error we have to clear td->td_retval[0]
- * because it may have been set by writev.
- */
- if (error == 0) {
- td->td_retval[0] = 0;
- }
- if (uap->sbytes != NULL) {
- copyout(&sbytes, uap->sbytes, sizeof(off_t));
- }
- if (obj != NULL)
- vm_object_deallocate(obj);
- if (vp != NULL) {
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
- }
- if (so)
- fdrop(sock_fp, td);
- if (m)
- m_freem(m);
-
- if (sfs != NULL) {
- mtx_lock(&sfs->mtx);
- if (sfs->count != 0)
- cv_wait(&sfs->cv, &sfs->mtx);
- KASSERT(sfs->count == 0, ("sendfile sync still busy"));
- cv_destroy(&sfs->cv);
- mtx_destroy(&sfs->mtx);
- free(sfs, M_TEMP);
- }
-
- if (error == ERESTART)
- error = EINTR;
-
- return (error);
-}
-
-/*
- * SCTP syscalls.
- * Functionality only compiled in if SCTP is defined in the kernel Makefile,
- * otherwise all return EOPNOTSUPP.
- * XXX: We should make this loadable one day.
- */
-int
-sys_sctp_peeloff(td, uap)
- struct thread *td;
- struct sctp_peeloff_args /* {
- int sd;
- caddr_t name;
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- struct filedesc *fdp;
- struct file *nfp = NULL;
- int error;
- struct socket *head, *so;
- int fd;
- u_int fflag;
-
- fdp = td->td_proc->p_fd;
- AUDIT_ARG_FD(uap->sd);
- error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
- if (error)
- goto done2;
- if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto done;
- }
- error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
- if (error)
- goto done;
- /*
- * At this point we know we do have a assoc to pull
- * we proceed to get the fd setup. This may block
- * but that is ok.
- */
-
- error = falloc(td, &nfp, &fd, 0);
- if (error)
- goto done;
- td->td_retval[0] = fd;
-
- CURVNET_SET(head->so_vnet);
- so = sonewconn(head, SS_ISCONNECTED);
- if (so == NULL) {
- error = ENOMEM;
- goto noconnection;
- }
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so);
- soref(so); /* file descriptor reference */
- SOCK_UNLOCK(so);
-
- ACCEPT_LOCK();
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_state &= ~SS_NOFDREF;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- ACCEPT_UNLOCK();
- finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
- error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
- if (error)
- goto noconnection;
- if (head->so_sigio != NULL)
- fsetown(fgetown(&head->so_sigio), &so->so_sigio);
-
-noconnection:
- /*
- * close the new descriptor, assuming someone hasn't ripped it
- * out from under us.
- */
- if (error)
- fdclose(fdp, nfp, fd, td);
-
- /*
- * Release explicitly held references before returning.
- */
- CURVNET_RESTORE();
-done:
- if (nfp != NULL)
- fdrop(nfp, td);
- fputsock(head);
-done2:
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
-
-int
-sys_sctp_generic_sendmsg (td, uap)
- struct thread *td;
- struct sctp_generic_sendmsg_args /* {
- int sd,
- caddr_t msg,
- int mlen,
- caddr_t to,
- __socklen_t tolen,
- struct sctp_sndrcvinfo *sinfo,
- int flags
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
- struct socket *so;
- struct file *fp = NULL;
- int error = 0, len;
- struct sockaddr *to = NULL;
-#ifdef KTRACE
- struct uio *ktruio = NULL;
-#endif
- struct uio auio;
- struct iovec iov[1];
- cap_rights_t rights;
-
- if (uap->sinfo) {
- error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
- if (error)
- return (error);
- u_sinfo = &sinfo;
- }
-
- rights = CAP_WRITE;
- if (uap->tolen) {
- error = getsockaddr(&to, uap->to, uap->tolen);
- if (error) {
- to = NULL;
- goto sctp_bad2;
- }
- rights |= CAP_CONNECT;
- }
-
- AUDIT_ARG_FD(uap->sd);
- error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
- if (error)
- goto sctp_bad;
-#ifdef KTRACE
- if (to && (KTRPOINT(td, KTR_STRUCT)))
- ktrsockaddr(to);
-#endif
-
- iov[0].iov_base = uap->msg;
- iov[0].iov_len = uap->mlen;
-
- so = (struct socket *)fp->f_data;
- if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto sctp_bad;
- }
-#ifdef MAC
- error = mac_socket_check_send(td->td_ucred, so);
- if (error)
- goto sctp_bad;
-#endif /* MAC */
-
- auio.uio_iov = iov;
- auio.uio_iovcnt = 1;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_rw = UIO_WRITE;
- auio.uio_td = td;
- auio.uio_offset = 0; /* XXX */
- auio.uio_resid = 0;
- len = auio.uio_resid = uap->mlen;
- CURVNET_SET(so->so_vnet);
- error = sctp_lower_sosend(so, to, &auio,
- (struct mbuf *)NULL, (struct mbuf *)NULL,
- uap->flags, u_sinfo, td);
- CURVNET_RESTORE();
- if (error) {
- if (auio.uio_resid != len && (error == ERESTART ||
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- /* Generation of SIGPIPE can be controlled per socket. */
- if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
- !(uap->flags & MSG_NOSIGNAL)) {
- PROC_LOCK(td->td_proc);
- tdsignal(td, SIGPIPE);
- PROC_UNLOCK(td->td_proc);
- }
- }
- if (error == 0)
- td->td_retval[0] = len - auio.uio_resid;
-#ifdef KTRACE
- if (ktruio != NULL) {
- ktruio->uio_resid = td->td_retval[0];
- ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
- }
-#endif /* KTRACE */
-sctp_bad:
- if (fp)
- fdrop(fp, td);
-sctp_bad2:
- if (to)
- free(to, M_SONAME);
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
-
-int
-sys_sctp_generic_sendmsg_iov(td, uap)
- struct thread *td;
- struct sctp_generic_sendmsg_iov_args /* {
- int sd,
- struct iovec *iov,
- int iovlen,
- caddr_t to,
- __socklen_t tolen,
- struct sctp_sndrcvinfo *sinfo,
- int flags
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
- struct socket *so;
- struct file *fp = NULL;
- int error=0, i;
- ssize_t len;
- struct sockaddr *to = NULL;
-#ifdef KTRACE
- struct uio *ktruio = NULL;
-#endif
- struct uio auio;
- struct iovec *iov, *tiov;
- cap_rights_t rights;
-
- if (uap->sinfo) {
- error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
- if (error)
- return (error);
- u_sinfo = &sinfo;
- }
- rights = CAP_WRITE;
- if (uap->tolen) {
- error = getsockaddr(&to, uap->to, uap->tolen);
- if (error) {
- to = NULL;
- goto sctp_bad2;
- }
- rights |= CAP_CONNECT;
- }
-
- AUDIT_ARG_FD(uap->sd);
- error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
- if (error)
- goto sctp_bad1;
-
-#ifdef COMPAT_FREEBSD32
- if (SV_CURPROC_FLAG(SV_ILP32))
- error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
- uap->iovlen, &iov, EMSGSIZE);
- else
-#endif
- error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
- if (error)
- goto sctp_bad1;
-#ifdef KTRACE
- if (to && (KTRPOINT(td, KTR_STRUCT)))
- ktrsockaddr(to);
-#endif
-
- so = (struct socket *)fp->f_data;
- if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto sctp_bad;
- }
-#ifdef MAC
- error = mac_socket_check_send(td->td_ucred, so);
- if (error)
- goto sctp_bad;
-#endif /* MAC */
-
- auio.uio_iov = iov;
- auio.uio_iovcnt = uap->iovlen;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_rw = UIO_WRITE;
- auio.uio_td = td;
- auio.uio_offset = 0; /* XXX */
- auio.uio_resid = 0;
- tiov = iov;
- for (i = 0; i <uap->iovlen; i++, tiov++) {
- if ((auio.uio_resid += tiov->iov_len) < 0) {
- error = EINVAL;
- goto sctp_bad;
- }
- }
- len = auio.uio_resid;
- CURVNET_SET(so->so_vnet);
- error = sctp_lower_sosend(so, to, &auio,
- (struct mbuf *)NULL, (struct mbuf *)NULL,
- uap->flags, u_sinfo, td);
- CURVNET_RESTORE();
- if (error) {
- if (auio.uio_resid != len && (error == ERESTART ||
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- /* Generation of SIGPIPE can be controlled per socket */
- if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
- !(uap->flags & MSG_NOSIGNAL)) {
- PROC_LOCK(td->td_proc);
- tdsignal(td, SIGPIPE);
- PROC_UNLOCK(td->td_proc);
- }
- }
- if (error == 0)
- td->td_retval[0] = len - auio.uio_resid;
-#ifdef KTRACE
- if (ktruio != NULL) {
- ktruio->uio_resid = td->td_retval[0];
- ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
- }
-#endif /* KTRACE */
-sctp_bad:
- free(iov, M_IOV);
-sctp_bad1:
- if (fp)
- fdrop(fp, td);
-sctp_bad2:
- if (to)
- free(to, M_SONAME);
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
-
-int
-sys_sctp_generic_recvmsg(td, uap)
- struct thread *td;
- struct sctp_generic_recvmsg_args /* {
- int sd,
- struct iovec *iov,
- int iovlen,
- struct sockaddr *from,
- __socklen_t *fromlenaddr,
- struct sctp_sndrcvinfo *sinfo,
- int *msg_flags
- } */ *uap;
-{
-#if (defined(INET) || defined(INET6)) && defined(SCTP)
- uint8_t sockbufstore[256];
- struct uio auio;
- struct iovec *iov, *tiov;
- struct sctp_sndrcvinfo sinfo;
- struct socket *so;
- struct file *fp = NULL;
- struct sockaddr *fromsa;
- int fromlen;
- ssize_t len;
- int i, msg_flags;
- int error = 0;
-#ifdef KTRACE
- struct uio *ktruio = NULL;
-#endif
-
- AUDIT_ARG_FD(uap->sd);
- error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
- if (error) {
- return (error);
- }
-#ifdef COMPAT_FREEBSD32
- if (SV_CURPROC_FLAG(SV_ILP32))
- error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
- uap->iovlen, &iov, EMSGSIZE);
- else
-#endif
- error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
- if (error)
- goto out1;
-
- so = fp->f_data;
- if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
- error = EOPNOTSUPP;
- goto out;
- }
-#ifdef MAC
- error = mac_socket_check_receive(td->td_ucred, so);
- if (error) {
- goto out;
- }
-#endif /* MAC */
-
- if (uap->fromlenaddr) {
- error = copyin(uap->fromlenaddr,
- &fromlen, sizeof (fromlen));
- if (error) {
- goto out;
- }
- } else {
- fromlen = 0;
- }
- if (uap->msg_flags) {
- error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
- if (error) {
- goto out;
- }
- } else {
- msg_flags = 0;
- }
- auio.uio_iov = iov;
- auio.uio_iovcnt = uap->iovlen;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_rw = UIO_READ;
- auio.uio_td = td;
- auio.uio_offset = 0; /* XXX */
- auio.uio_resid = 0;
- tiov = iov;
- for (i = 0; i <uap->iovlen; i++, tiov++) {
- if ((auio.uio_resid += tiov->iov_len) < 0) {
- error = EINVAL;
- goto out;
- }
- }
- len = auio.uio_resid;
- fromsa = (struct sockaddr *)sockbufstore;
-
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_GENIO))
- ktruio = cloneuio(&auio);
-#endif /* KTRACE */
- memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
- CURVNET_SET(so->so_vnet);
- error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
- fromsa, fromlen, &msg_flags,
- (struct sctp_sndrcvinfo *)&sinfo, 1);
- CURVNET_RESTORE();
- if (error) {
- if (auio.uio_resid != len && (error == ERESTART ||
- error == EINTR || error == EWOULDBLOCK))
- error = 0;
- } else {
- if (uap->sinfo)
- error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
- }
-#ifdef KTRACE
- if (ktruio != NULL) {
- ktruio->uio_resid = len - auio.uio_resid;
- ktrgenio(uap->sd, UIO_READ, ktruio, error);
- }
-#endif /* KTRACE */
- if (error)
- goto out;
- td->td_retval[0] = len - auio.uio_resid;
-
- if (fromlen && uap->from) {
- len = fromlen;
- if (len <= 0 || fromsa == 0)
- len = 0;
- else {
- len = MIN(len, fromsa->sa_len);
- error = copyout(fromsa, uap->from, (size_t)len);
- if (error)
- goto out;
- }
- error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
- if (error) {
- goto out;
- }
- }
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_STRUCT))
- ktrsockaddr(fromsa);
-#endif
- if (uap->msg_flags) {
- error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
- if (error) {
- goto out;
- }
- }
-out:
- free(iov, M_IOV);
-out1:
- if (fp)
- fdrop(fp, td);
-
- return (error);
-#else /* SCTP */
- return (EOPNOTSUPP);
-#endif /* SCTP */
-}
-#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c
index da4b624a..7b2d1bc4 100644
--- a/freebsd/sys/kern/uipc_usrreq.c
+++ b/freebsd/sys/kern/uipc_usrreq.c
@@ -53,7 +53,6 @@
*
* TODO:
* RDM
- * distinguish datagram size limits from flow control limits in SEQPACKET
* rethink name space problems
* need a proper out-of-band
*/
@@ -64,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/capsicum.h>
#include <sys/domain.h>
#include <sys/fcntl.h>
#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
@@ -106,6 +106,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/imfs.h>
#endif /* __rtems__ */
+MALLOC_DECLARE(M_FILECAPS);
+
/*
* Locking key:
* (l) Locked using list lock
@@ -284,24 +286,27 @@ static int uipc_connect2(struct socket *, struct socket *);
static int uipc_ctloutput(struct socket *, struct sockopt *);
static int unp_connect(struct socket *, struct sockaddr *,
struct thread *);
+static int unp_connectat(int, struct socket *, struct sockaddr *,
+ struct thread *);
static int unp_connect2(struct socket *so, struct socket *so2, int);
static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
#ifndef __rtems__
static void unp_dispose(struct mbuf *);
+static void unp_dispose_so(struct socket *so);
#endif /* __rtems__ */
static void unp_shutdown(struct unpcb *);
-static void unp_drop(struct unpcb *, int);
+static void unp_drop(struct unpcb *);
#ifndef __rtems__
static void unp_gc(__unused void *, int);
-static void unp_scan(struct mbuf *, void (*)(struct file *));
+static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
static void unp_discard(struct file *);
-static void unp_freerights(struct file **, int);
+static void unp_freerights(struct filedescent **, int);
#endif /* __rtems__ */
static void unp_init(void);
#ifndef __rtems__
static int unp_internalize(struct mbuf **, struct thread *);
static void unp_internalize_fp(struct file *);
-static int unp_externalize(struct mbuf *, struct mbuf **);
+static int unp_externalize(struct mbuf *, struct mbuf **, int);
static int unp_externalize_fp(struct file *);
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
static void unp_process_defers(void * __unused, int);
@@ -350,10 +355,10 @@ static struct domain localdomain = {
.dom_init = unp_init,
#ifndef __rtems__
.dom_externalize = unp_externalize,
- .dom_dispose = unp_dispose,
+ .dom_dispose = unp_dispose_so,
#endif /* __rtems__ */
.dom_protosw = localsw,
- .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])]
+ .dom_protoswNPROTOSW = &localsw[nitems(localsw)]
};
DOMAIN_SET(local);
@@ -370,7 +375,7 @@ uipc_abort(struct socket *so)
unp2 = unp->unp_conn;
if (unp2 != NULL) {
UNP_PCB_LOCK(unp2);
- unp_drop(unp2, ECONNABORTED);
+ unp_drop(unp2);
UNP_PCB_UNLOCK(unp2);
}
UNP_PCB_UNLOCK(unp);
@@ -446,6 +451,8 @@ uipc_attach(struct socket *so, int proto, struct thread *td)
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
+ if (so->so_head != NULL)
+ unp->unp_flags |= UNP_NASCENT;
UNP_LIST_LOCK();
unp->unp_gencnt = ++unp_gencnt;
@@ -511,16 +518,17 @@ static const IMFS_node_control rtems_uipc_imfs_zombi_control =
IMFS_node_destroy_default);
#endif /* __rtems__ */
static int
-uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
#ifndef __rtems__
struct vattr vattr;
- int error, namelen, vfslocked;
+ int error, namelen;
struct nameidata nd;
struct unpcb *unp;
struct vnode *vp;
struct mount *mp;
+ cap_rights_t rights;
char *buf;
#else /* __rtems__ */
int rv;
@@ -528,6 +536,9 @@ uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
struct unpcb *unp;
#endif /* __rtems__ */
+ if (nam->sa_family != AF_UNIX)
+ return (EAFNOSUPPORT);
+
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
@@ -564,15 +575,13 @@ uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
buf[namelen] = 0;
restart:
- vfslocked = 0;
- NDINIT(&nd, CREATE, MPSAFE | NOFOLLOW | LOCKPARENT | SAVENAME,
- UIO_SYSSPACE, buf, td);
+ NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
+ UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
error = namei(&nd);
if (error)
goto error;
vp = nd.ni_vp;
- vfslocked = NDHASGIANT(&nd);
if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
@@ -587,7 +596,6 @@ restart:
error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
if (error)
goto error;
- VFS_UNLOCK_GIANT(vfslocked);
goto restart;
}
VATTR_NULL(&vattr);
@@ -632,15 +640,11 @@ restart:
#ifndef __rtems__
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
- VFS_UNLOCK_GIANT(vfslocked);
free(buf, M_TEMP);
#endif /* __rtems__ */
return (0);
error:
-#ifndef __rtems__
- VFS_UNLOCK_GIANT(vfslocked);
-#endif /* __rtems__ */
UNP_PCB_LOCK(unp);
unp->unp_flags &= ~UNP_BINDING;
UNP_PCB_UNLOCK(unp);
@@ -651,6 +655,13 @@ error:
}
static int
+uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+
+ return (uipc_bindat(AT_FDCWD, so, nam, td));
+}
+
+static int
uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
int error;
@@ -662,6 +673,19 @@ uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
return (error);
}
+static int
+uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
+ int error;
+
+ KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
+ UNP_LINK_WLOCK();
+ error = unp_connectat(fd, so, nam, td);
+ UNP_LINK_WUNLOCK();
+ return (error);
+}
+
static void
uipc_close(struct socket *so)
{
@@ -718,14 +742,24 @@ uipc_detach(struct socket *so)
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
- UNP_LINK_WLOCK();
+ vp = NULL;
+#ifndef __rtems__
+ local_unp_rights = 0;
+#endif /* __rtems__ */
+
UNP_LIST_LOCK();
- UNP_PCB_LOCK(unp);
LIST_REMOVE(unp, unp_link);
unp->unp_gencnt = ++unp_gencnt;
--unp_count;
UNP_LIST_UNLOCK();
+ if ((unp->unp_flags & UNP_NASCENT) != 0) {
+ UNP_PCB_LOCK(unp);
+ goto teardown;
+ }
+ UNP_LINK_WLOCK();
+ UNP_PCB_LOCK(unp);
+
/*
* XXXRW: Should assert vp->v_socket == so.
*/
@@ -753,13 +787,14 @@ uipc_detach(struct socket *so)
struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
UNP_PCB_LOCK(ref);
- unp_drop(ref, ECONNRESET);
+ unp_drop(ref);
UNP_PCB_UNLOCK(ref);
}
#ifndef __rtems__
local_unp_rights = unp_rights;
#endif /* __rtems__ */
UNP_LINK_WUNLOCK();
+teardown:
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@@ -773,13 +808,8 @@ uipc_detach(struct socket *so)
} else
UNP_PCB_UNLOCK(unp);
#ifndef __rtems__
- if (vp) {
- int vfslocked;
-
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ if (vp)
vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
- }
if (local_unp_rights)
taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
#endif /* __rtems__ */
@@ -817,8 +847,10 @@ uipc_listen(struct socket *so, int backlog, struct thread *td)
UNP_PCB_LOCK(unp);
if (unp->unp_vnode == NULL) {
+ /* Already connected or not bound to an address. */
+ error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
UNP_PCB_UNLOCK(unp);
- return (EINVAL);
+ return (error);
}
SOCK_LOCK(so);
@@ -872,13 +904,11 @@ uipc_rcvd(struct socket *so, int flags)
struct unpcb *unp, *unp2;
struct socket *so2;
u_int mbcnt, sbcc;
- u_long newhiwat;
unp = sotounpcb(so);
- KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL"));
-
- if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
- panic("uipc_rcvd socktype %d", so->so_type);
+ KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
+ KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
+ ("%s: socktype %d", __func__, so->so_type));
/*
* Adjust backpressure on sender and wakeup any waiting to write.
@@ -892,8 +922,17 @@ uipc_rcvd(struct socket *so, int flags)
*/
SOCKBUF_LOCK(&so->so_rcv);
mbcnt = so->so_rcv.sb_mbcnt;
- sbcc = so->so_rcv.sb_cc;
+ sbcc = sbavail(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
+ /*
+ * There is a benign race condition at this point. If we're planning to
+ * clear SB_STOP, but uipc_send is called on the connected socket at
+ * this instant, it might add data to the sockbuf and set SB_STOP. Then
+ * we would erroneously clear SB_STOP below, even though the sockbuf is
+ * full. The race is benign because the only ill effect is to allow the
+ * sockbuf to exceed its size limit, and the size limits are not
+ * strictly guaranteed anyway.
+ */
UNP_PCB_LOCK(unp);
unp2 = unp->unp_conn;
if (unp2 == NULL) {
@@ -902,13 +941,9 @@ uipc_rcvd(struct socket *so, int flags)
}
so2 = unp2->unp_socket;
SOCKBUF_LOCK(&so2->so_snd);
- so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt;
- newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc;
- (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
+ if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
+ so2->so_snd.sb_flags &= ~SB_STOP;
sowwakeup_locked(so2);
- unp->unp_mbcnt = mbcnt;
- unp->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp);
return (0);
}
@@ -919,12 +954,14 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
{
struct unpcb *unp, *unp2;
struct socket *so2;
- u_int mbcnt_delta, sbcc;
- u_int newhiwat;
+ u_int mbcnt, sbcc;
int error = 0;
unp = sotounpcb(so);
- KASSERT(unp != NULL, ("uipc_send: unp == NULL"));
+ KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
+ KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
+ so->so_type == SOCK_SEQPACKET,
+ ("%s: socktype %d", __func__, so->so_type));
if (flags & PRUS_OOB) {
error = EOPNOTSUPP;
@@ -1067,7 +1104,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
control))
control = NULL;
} else
- sbappend_locked(&so2->so_rcv, m);
+ sbappend_locked(&so2->so_rcv, m, flags);
break;
case SOCK_SEQPACKET: {
@@ -1087,33 +1124,27 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
}
}
+ mbcnt = so2->so_rcv.sb_mbcnt;
+ sbcc = sbavail(&so2->so_rcv);
+ if (sbcc)
+ sorwakeup_locked(so2);
+ else
+ SOCKBUF_UNLOCK(&so2->so_rcv);
+
/*
- * XXXRW: While fine for SOCK_STREAM, this conflates maximum
- * datagram size and back-pressure for SOCK_SEQPACKET, which
- * can lead to undesired return of EMSGSIZE on send instead
- * of more desirable blocking.
+ * The PCB lock on unp2 protects the SB_STOP flag. Without it,
+ * it would be possible for uipc_rcvd to be called at this
+ * point, drain the receiving sockbuf, clear SB_STOP, and then
+ * we would set SB_STOP below. That could lead to an empty
+ * sockbuf having SB_STOP set
*/
- mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt;
- unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
- sbcc = so2->so_rcv.sb_cc;
- sorwakeup_locked(so2);
-
SOCKBUF_LOCK(&so->so_snd);
- if ((int)so->so_snd.sb_hiwat >= (int)(sbcc - unp2->unp_cc))
- newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc);
- else
- newhiwat = 0;
- (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
- so->so_snd.sb_mbmax -= mbcnt_delta;
+ if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
+ so->so_snd.sb_flags |= SB_STOP;
SOCKBUF_UNLOCK(&so->so_snd);
- unp2->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp2);
m = NULL;
break;
-
- default:
- panic("uipc_send unknown socktype");
}
/*
@@ -1147,29 +1178,46 @@ release:
}
static int
-uipc_sense(struct socket *so, struct stat *sb)
+uipc_ready(struct socket *so, struct mbuf *m, int count)
{
struct unpcb *unp, *unp2;
struct socket *so2;
+ int error;
+
+ unp = sotounpcb(so);
+
+ UNP_LINK_RLOCK();
+ unp2 = unp->unp_conn;
+ UNP_PCB_LOCK(unp2);
+ so2 = unp2->unp_socket;
+
+ SOCKBUF_LOCK(&so2->so_rcv);
+ if ((error = sbready(&so2->so_rcv, m, count)) == 0)
+ sorwakeup_locked(so2);
+ else
+ SOCKBUF_UNLOCK(&so2->so_rcv);
+
+ UNP_PCB_UNLOCK(unp2);
+ UNP_LINK_RUNLOCK();
+
+ return (error);
+}
+
+static int
+uipc_sense(struct socket *so, struct stat *sb)
+{
+ struct unpcb *unp;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
sb->st_blksize = so->so_snd.sb_hiwat;
- UNP_LINK_RLOCK();
UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
- unp2 != NULL) {
- so2 = unp2->unp_socket;
- sb->st_blksize += so2->so_rcv.sb_cc;
- }
sb->st_dev = NODEV;
if (unp->unp_ino == 0)
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
sb->st_ino = unp->unp_ino;
UNP_PCB_UNLOCK(unp);
- UNP_LINK_RUNLOCK();
return (0);
}
@@ -1215,7 +1263,9 @@ static struct pr_usrreqs uipc_usrreqs_dgram = {
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1235,7 +1285,9 @@ static struct pr_usrreqs uipc_usrreqs_seqpacket = {
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1255,7 +1307,9 @@ static struct pr_usrreqs uipc_usrreqs_stream = {
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1263,6 +1317,7 @@ static struct pr_usrreqs uipc_usrreqs_stream = {
.pru_peeraddr = uipc_peeraddr,
.pru_rcvd = uipc_rcvd,
.pru_send = uipc_send,
+ .pru_ready = uipc_ready,
.pru_sense = uipc_sense,
.pru_shutdown = uipc_shutdown,
.pru_sockaddr = uipc_sockaddr,
@@ -1367,6 +1422,14 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt)
static int
unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+
+ return (unp_connectat(AT_FDCWD, so, nam, td));
+}
+
+static int
+unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
#ifndef __rtems__
struct vnode *vp;
@@ -1376,16 +1439,19 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
struct socket *so2, *so3;
struct unpcb *unp, *unp2, *unp3;
#ifndef __rtems__
- int error, len, vfslocked;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
#else /* __rtems__ */
- int error, len;
rtems_filesystem_eval_path_context_t ctx;
int eval_flags;
const rtems_filesystem_location_info_t *currentloc;
#endif /* __rtems__ */
struct sockaddr *sa;
+ cap_rights_t rights;
+ int error, len;
+
+ if (nam->sa_family != AF_UNIX)
+ return (EAFNOSUPPORT);
UNP_LINK_WLOCK_ASSERT();
@@ -1413,15 +1479,14 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
#ifndef __rtems__
- NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKSHARED | LOCKLEAF,
- UIO_SYSSPACE, buf, td);
+ NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
+ UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
error = namei(&nd);
if (error)
vp = NULL;
else
vp = nd.ni_vp;
ASSERT_VOP_LOCKED(vp, "unp_connect");
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error)
goto bad;
@@ -1438,7 +1503,6 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
if (error)
goto bad;
- VFS_UNLOCK_GIANT(vfslocked);
#else /* __rtems__ */
eval_flags = RTEMS_FS_FOLLOW_LINK;
currentloc = rtems_filesystem_eval_path_start_with_root_and_current(
@@ -1544,19 +1608,10 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
UNP_PCB_UNLOCK(unp);
bad2:
UNP_LINK_WUNLOCK();
-#ifndef __rtems__
- if (vfslocked)
- /*
- * Giant has been previously acquired. This means filesystem
- * isn't MPSAFE. Do it once again.
- */
- mtx_lock(&Giant);
-#endif /* __rtems__ */
bad:
#ifndef __rtems__
if (vp != NULL)
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
#endif /* __rtems__ */
free(sa, M_SONAME);
UNP_LINK_WLOCK();
@@ -1583,6 +1638,7 @@ unp_connect2(struct socket *so, struct socket *so2, int req)
if (so2->so_type != so->so_type)
return (EPROTOTYPE);
+ unp2->unp_flags &= ~UNP_NASCENT;
unp->unp_conn = unp2;
switch (so->so_type) {
@@ -1808,11 +1864,7 @@ unp_shutdown(struct unpcb *unp)
}
static void
-#ifndef __rtems__
-unp_drop(struct unpcb *unp, int errno)
-#else /* __rtems__ */
-unp_drop(struct unpcb *unp, int error)
-#endif /* __rtems__ */
+unp_drop(struct unpcb *unp)
{
struct socket *so = unp->unp_socket;
struct unpcb *unp2;
@@ -1820,11 +1872,12 @@ unp_drop(struct unpcb *unp, int error)
UNP_LINK_WLOCK_ASSERT();
UNP_PCB_LOCK_ASSERT(unp);
-#ifndef __rtems__
- so->so_error = errno;
-#else /* __rtems__ */
- so->so_error = error;
-#endif /* __rtems__ */
+ /*
+ * Regardless of whether the socket's peer dropped the connection
+ * with this socket by aborting or disconnecting, POSIX requires
+ * that ECONNRESET is returned.
+ */
+ so->so_error = ECONNRESET;
unp2 = unp->unp_conn;
if (unp2 == NULL)
return;
@@ -1835,27 +1888,30 @@ unp_drop(struct unpcb *unp, int error)
#ifndef __rtems__
static void
-unp_freerights(struct file **rp, int fdcount)
+unp_freerights(struct filedescent **fdep, int fdcount)
{
- int i;
struct file *fp;
+ int i;
+
+ KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
for (i = 0; i < fdcount; i++) {
- fp = *rp;
- *rp++ = NULL;
+ fp = fdep[i]->fde_file;
+ filecaps_free(&fdep[i]->fde_caps);
unp_discard(fp);
}
+ free(fdep[0], M_FILECAPS);
}
static int
-unp_externalize(struct mbuf *control, struct mbuf **controlp)
+unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
{
struct thread *td = curthread; /* XXX */
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
int i;
int *fdp;
- struct file **rp;
- struct file *fp;
+ struct filedesc *fdesc = td->td_proc->p_fd;
+ struct filedescent **fdep;
void *data;
socklen_t clen = control->m_len, datalen;
int error, newfds;
@@ -1875,15 +1931,18 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
if (cm->cmsg_level == SOL_SOCKET
&& cm->cmsg_type == SCM_RIGHTS) {
- newfds = datalen / sizeof(struct file *);
- rp = data;
+ newfds = datalen / sizeof(*fdep);
+ if (newfds == 0)
+ goto next;
+ fdep = data;
/* If we're not outputting the descriptors free them. */
if (error || controlp == NULL) {
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
- FILEDESC_XLOCK(td->td_proc->p_fd);
+ FILEDESC_XLOCK(fdesc);
+
/*
* Now change each pointer to an fd in the global
* table to an integer that is the index to the local
@@ -1894,28 +1953,30 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
error = E2BIG;
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
fdp = (int *)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
if (fdallocn(td, 0, fdp, newfds) != 0) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
error = EMSGSIZE;
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
m_freem(*controlp);
*controlp = NULL;
goto next;
}
- for (i = 0; i < newfds; i++) {
- fp = *rp++;
- td->td_proc->p_fd->fd_ofiles[fdp[i]] = fp;
- unp_externalize_fp(fp);
+ for (i = 0; i < newfds; i++, fdp++) {
+ _finstall(fdesc, fdep[i]->fde_file, *fdp,
+ (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
+ &fdep[i]->fde_caps);
+ unp_externalize_fp(fdep[i]->fde_file);
}
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
+ free(fdep[0], M_FILECAPS);
} else {
/* We can just copy anything else across. */
if (error || controlp == NULL)
@@ -1968,6 +2029,7 @@ unp_init(void)
if (unp_zone == NULL)
panic("unp_init");
uma_zone_set_max(unp_zone, maxsockets);
+ uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
LIST_INIT(&unp_dhead);
@@ -1989,14 +2051,14 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
{
struct mbuf *control = *controlp;
struct proc *p = td->td_proc;
- struct filedesc *fdescp = p->p_fd;
+ struct filedesc *fdesc = p->p_fd;
struct bintime *bt;
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
struct cmsgcred *cmcred;
- struct file **rp;
+ struct filedescent *fde, **fdep, *fdev;
struct file *fp;
struct timeval *tv;
- int i, fd, *fdp;
+ int i, *fdp;
void *data;
socklen_t clen = control->m_len, datalen;
int error, oldfds;
@@ -2008,7 +2070,7 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
*controlp = NULL;
while (cm != NULL) {
if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
- || cm->cmsg_len > clen) {
+ || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
error = EINVAL;
goto out;
}
@@ -2041,23 +2103,23 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
case SCM_RIGHTS:
oldfds = datalen / sizeof (int);
+ if (oldfds == 0)
+ break;
/*
* Check that all the FDs passed in refer to legal
* files. If not, reject the entire operation.
*/
fdp = data;
- FILEDESC_SLOCK(fdescp);
- for (i = 0; i < oldfds; i++) {
- fd = *fdp++;
- if ((unsigned)fd >= fdescp->fd_nfiles ||
- fdescp->fd_ofiles[fd] == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SLOCK(fdesc);
+ for (i = 0; i < oldfds; i++, fdp++) {
+ fp = fget_locked(fdesc, *fdp);
+ if (fp == NULL) {
+ FILEDESC_SUNLOCK(fdesc);
error = EBADF;
goto out;
}
- fp = fdescp->fd_ofiles[fd];
if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = EOPNOTSUPP;
goto out;
}
@@ -2066,25 +2128,30 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
/*
* Now replace the integer FDs with pointers to the
- * associated global file table entry..
+ * file structure and capability rights.
*/
- newlen = oldfds * sizeof(struct file *);
+ newlen = oldfds * sizeof(fdep[0]);
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = E2BIG;
goto out;
}
fdp = data;
- rp = (struct file **)
+ fdep = (struct filedescent **)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
- for (i = 0; i < oldfds; i++) {
- fp = fdescp->fd_ofiles[*fdp++];
- *rp++ = fp;
- unp_internalize_fp(fp);
+ fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
+ M_WAITOK);
+ for (i = 0; i < oldfds; i++, fdev++, fdp++) {
+ fde = &fdesc->fd_ofiles[*fdp];
+ fdep[i] = fdev;
+ fdep[i]->fde_file = fde->fde_file;
+ filecaps_copy(&fde->fde_caps,
+ &fdep[i]->fde_caps, true);
+ unp_internalize_fp(fdep[i]->fde_file);
}
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
break;
case SCM_TIMESTAMP:
@@ -2280,17 +2347,22 @@ static int unp_marked;
static int unp_unreachable;
static void
-unp_accessable(struct file *fp)
+unp_accessable(struct filedescent **fdep, int fdcount)
{
struct unpcb *unp;
+ struct file *fp;
+ int i;
- if ((unp = fptounp(fp)) == NULL)
- return;
- if (unp->unp_gcflag & UNPGC_REF)
- return;
- unp->unp_gcflag &= ~UNPGC_DEAD;
- unp->unp_gcflag |= UNPGC_REF;
- unp_marked++;
+ for (i = 0; i < fdcount; i++) {
+ fp = fdep[i]->fde_file;
+ if ((unp = fptounp(fp)) == NULL)
+ continue;
+ if (unp->unp_gcflag & UNPGC_REF)
+ continue;
+ unp->unp_gcflag &= ~UNPGC_DEAD;
+ unp->unp_gcflag |= UNPGC_REF;
+ unp_marked++;
+ }
}
static void
@@ -2321,15 +2393,19 @@ unp_gc_process(struct unpcb *unp)
* Mark all sockets we reference with RIGHTS.
*/
so = unp->unp_socket;
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
/*
* Mark all sockets in our accept queue.
*/
ACCEPT_LOCK();
TAILQ_FOREACH(soa, &so->so_comp, so_list) {
+ if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
+ continue;
SOCKBUF_LOCK(&soa->so_rcv);
unp_scan(soa->so_rcv.sb_mb, unp_accessable);
SOCKBUF_UNLOCK(&soa->so_rcv);
@@ -2359,11 +2435,13 @@ unp_gc(__unused void *arg, int pending)
unp_taskcount++;
UNP_LIST_LOCK();
/*
- * First clear all gc flags from previous runs.
+ * First clear all gc flags from previous runs, apart from
+ * UNPGC_IGNORE_RIGHTS.
*/
for (head = heads; *head != NULL; head++)
LIST_FOREACH(unp, *head, unp_link)
- unp->unp_gcflag = 0;
+ unp->unp_gcflag =
+ (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
/*
* Scan marking all reachable sockets with UNPGC_REF. Once a socket
@@ -2437,19 +2515,31 @@ unp_dispose(struct mbuf *m)
{
if (m)
- unp_scan(m, unp_discard);
+ unp_scan(m, unp_freerights);
}
+/*
+ * Synchronize against unp_gc, which can trip over data as we are freeing it.
+ */
static void
-unp_scan(struct mbuf *m0, void (*op)(struct file *))
+unp_dispose_so(struct socket *so)
+{
+ struct unpcb *unp;
+
+ unp = sotounpcb(so);
+ UNP_LIST_LOCK();
+ unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
+ UNP_LIST_UNLOCK();
+ unp_dispose(so->so_rcv.sb_mb);
+}
+
+static void
+unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
{
struct mbuf *m;
- struct file **rp;
struct cmsghdr *cm;
void *data;
- int i;
socklen_t clen, datalen;
- int qfds;
while (m0 != NULL) {
for (m = m0; m; m = m->m_next) {
@@ -2469,10 +2559,8 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *))
if (cm->cmsg_level == SOL_SOCKET &&
cm->cmsg_type == SCM_RIGHTS) {
- qfds = datalen / sizeof (struct file *);
- rp = data;
- for (i = 0; i < qfds; i++)
- (*op)(*rp++);
+ (*op)(data, datalen /
+ sizeof(struct filedescent *));
}
if (CMSG_SPACE(datalen) < clen) {
@@ -2485,7 +2573,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *))
}
}
}
- m0 = m0->m_act;
+ m0 = m0->m_nextpkt;
}
}
@@ -2619,7 +2707,7 @@ DB_SHOW_COMMAND(unpcb, db_show_unpcb)
db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket,
unp->unp_vnode);
- db_printf("unp_ino: %d unp_conn: %p\n", unp->unp_ino,
+ db_printf("unp_ino: %ju unp_conn: %p\n", (uintmax_t)unp->unp_ino,
unp->unp_conn);
db_printf("unp_refs:\n");
@@ -2628,8 +2716,7 @@ DB_SHOW_COMMAND(unpcb, db_show_unpcb)
/* XXXRW: Would be nice to print the full address, if any. */
db_printf("unp_addr: %p\n", unp->unp_addr);
- db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n",
- unp->unp_cc, unp->unp_mbcnt,
+ db_printf("unp_gencnt: %llu\n",
(unsigned long long)unp->unp_gencnt);
db_printf("unp_flags: %x (", unp->unp_flags);
diff --git a/freebsd/sys/libkern/arc4random.c b/freebsd/sys/libkern/arc4random.c
deleted file mode 100644
index 89c89eea..00000000
--- a/freebsd/sys/libkern/arc4random.c
+++ /dev/null
@@ -1,158 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * THE BEER-WARE LICENSE
- *
- * <dan@FreeBSD.ORG> wrote this file. As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you
- * think this stuff is worth it, you can buy me a beer in return.
- *
- * Dan Moschuk
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/types.h>
-#include <rtems/bsd/sys/param.h>
-#include <sys/kernel.h>
-#include <sys/random.h>
-#include <sys/libkern.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/time.h>
-
-#define ARC4_RESEED_BYTES 65536
-#define ARC4_RESEED_SECONDS 300
-#define ARC4_KEYBYTES (256 / 8)
-
-int arc4rand_iniseed_state = ARC4_ENTR_NONE;
-
-static u_int8_t arc4_i, arc4_j;
-static int arc4_numruns = 0;
-static u_int8_t arc4_sbox[256];
-static time_t arc4_t_reseed;
-static struct mtx arc4_mtx;
-
-static u_int8_t arc4_randbyte(void);
-
-static __inline void
-arc4_swap(u_int8_t *a, u_int8_t *b)
-{
- u_int8_t c;
-
- c = *a;
- *a = *b;
- *b = c;
-}
-
-/*
- * Stir our S-box.
- */
-static void
-arc4_randomstir (void)
-{
- u_int8_t key[256];
- int r, n;
- struct timeval tv_now;
-
- /*
- * XXX read_random() returns unsafe numbers if the entropy
- * device is not loaded -- MarkM.
- */
- r = read_random(key, ARC4_KEYBYTES);
- getmicrouptime(&tv_now);
- mtx_lock(&arc4_mtx);
- /* If r == 0 || -1, just use what was on the stack. */
- if (r > 0) {
- for (n = r; n < sizeof(key); n++)
- key[n] = key[n % r];
- }
-
- for (n = 0; n < 256; n++) {
- arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256;
- arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]);
- }
- arc4_i = arc4_j = 0;
-
- /* Reset for next reseed cycle. */
- arc4_t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS;
- arc4_numruns = 0;
-
- /*
- * Throw away the first N words of output, as suggested in the
- * paper "Weaknesses in the Key Scheduling Algorithm of RC4"
- * by Fluher, Mantin, and Shamir. (N = 256 in our case.)
- */
- for (n = 0; n < 256*4; n++)
- arc4_randbyte();
- mtx_unlock(&arc4_mtx);
-}
-
-/*
- * Initialize our S-box to its beginning defaults.
- */
-static void
-arc4_init(void)
-{
- int n;
-
- mtx_init(&arc4_mtx, "arc4_mtx", NULL, MTX_DEF);
- arc4_i = arc4_j = 0;
- for (n = 0; n < 256; n++)
- arc4_sbox[n] = (u_int8_t) n;
-
- arc4_t_reseed = 0;
-}
-
-SYSINIT(arc4_init, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL);
-
-/*
- * Generate a random byte.
- */
-static u_int8_t
-arc4_randbyte(void)
-{
- u_int8_t arc4_t;
-
- arc4_i = (arc4_i + 1) % 256;
- arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256;
-
- arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]);
-
- arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256;
- return arc4_sbox[arc4_t];
-}
-
-/*
- * MPSAFE
- */
-void
-arc4rand(void *ptr, u_int len, int reseed)
-{
- u_char *p;
- struct timeval tv;
-
- getmicrouptime(&tv);
- if (atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_HAVE,
- ARC4_ENTR_SEED) || reseed ||
- (arc4_numruns > ARC4_RESEED_BYTES) ||
- (tv.tv_sec > arc4_t_reseed))
- arc4_randomstir();
-
- mtx_lock(&arc4_mtx);
- arc4_numruns += len;
- p = ptr;
- while (len--)
- *p++ = arc4_randbyte();
- mtx_unlock(&arc4_mtx);
-}
-
-uint32_t
-arc4random(void)
-{
- uint32_t ret;
-
- arc4rand(&ret, sizeof ret, 0);
- return ret;
-}
diff --git a/freebsd/sys/libkern/jenkins_hash.c b/freebsd/sys/libkern/jenkins_hash.c
new file mode 100644
index 00000000..9ecdb82b
--- /dev/null
+++ b/freebsd/sys/libkern/jenkins_hash.c
@@ -0,0 +1,465 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Taken from http://burtleburtle.net/bob/c/lookup3.c
+ * $FreeBSD$
+ */
+
+#include <sys/hash.h>
+#include <machine/endian.h>
+
+/*
+-------------------------------------------------------------------------------
+lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+
+These are functions for producing 32-bit hashes for hash table lookup.
+hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+are externally useful functions. Routines to test the hash are included
+if SELF_TEST is defined. You can use this free for any purpose. It's in
+the public domain. It has no warranty.
+
+You probably want to use hashlittle(). hashlittle() and hashbig()
+hash byte arrays. hashlittle() is faster than hashbig() on
+little-endian machines. Intel and AMD are little-endian machines.
+On second thought, you probably want hashlittle2(), which is identical to
+hashlittle() except it returns two 32-bit hashes for the price of one.
+You could implement hashbig2() if you wanted but I haven't bothered here.
+
+If you want to find a hash of, say, exactly 7 integers, do
+ a = i1; b = i2; c = i3;
+ mix(a,b,c);
+ a += i4; b += i5; c += i6;
+ mix(a,b,c);
+ a += i7;
+ final(a,b,c);
+then use c as the hash value. If you have a variable length array of
+4-byte integers to hash, use hashword(). If you have a byte array (like
+a character string), use hashlittle(). If you have several byte arrays, or
+a mix of things, see the comments above hashlittle().
+
+Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
+then mix those integers. This is fast (you can do a lot more thorough
+mixing with 12*3 instructions on 3 integers than you can with 3 instructions
+on 1 byte), but shoehorning those bytes into integers efficiently is messy.
+-------------------------------------------------------------------------------
+*/
+
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*
+-------------------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+ 4 6 8 16 19 4
+ 9 15 3 18 27 15
+ 14 9 3 7 17 3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta. I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche. There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a. The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism. Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism. I did what I could. Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different. This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+ 4 8 15 26 3 22 24
+ 10 8 15 26 3 22 24
+ 11 8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c,4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+--------------------------------------------------------------------
+ This works on all machines. To be useful, it requires
+ -- that the key be an array of uint32_t's, and
+ -- that the length be the number of uint32_t's in the key
+
+ The function hashword() is identical to hashlittle() on little-endian
+ machines, and identical to hashbig() on big-endian machines,
+ except that the length has to be measured in uint32_ts rather than in
+ bytes. hashlittle() is more complicated than hashword() only because
+ hashlittle() has to dance around fitting the key bytes into registers.
+--------------------------------------------------------------------
+*/
+uint32_t jenkins_hash32(
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t initval) /* the previous hash, or an arbitrary value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ return c;
+}
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+/*
+-------------------------------------------------------------------------------
+hashlittle() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ length : the length of the key, counting by bytes
+ initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Two keys differing by one or two bits will have
+totally different hash values.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (uint8_t **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
+
+By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+-------------------------------------------------------------------------------
+*/
+
+uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c; /* internal state */
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if ((u.i & 0x3) == 0) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
+ * then masks off the part it's not allowed to read. Because the
+ * string is aligned, the masked-off tail is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But VALGRIND will
+ * still catch it and complain. The masking trick does make the hash
+ * noticably faster for short strings (like English words).
+ */
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff; break;
+ case 2 : a+=k[0]&0xffff; break;
+ case 1 : a+=k[0]&0xff; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+
+ } else if ((u.i & 0x1) == 0) {
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+ const uint8_t *k8;
+
+ /*--------------- all but last block: aligned reads and different mixing */
+ while (length > 12)
+ {
+ a += k[0] + (((uint32_t)k[1])<<16);
+ b += k[2] + (((uint32_t)k[3])<<16);
+ c += k[4] + (((uint32_t)k[5])<<16);
+ mix(a,b,c);
+ length -= 12;
+ k += 6;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=k[4];
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=k[2];
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=k[0];
+ break;
+ case 1 : a+=k8[0];
+ break;
+ case 0 : return c; /* zero length requires no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+
+#else /* !(BYTE_ORDER == LITTLE_ENDIAN) */
+
+/*
+ * hashbig():
+ * This is the same as hashword() on big-endian machines. It is different
+ * from hashlittle() on all machines. hashbig() takes advantage of
+ * big-endian byte ordering.
+ */
+uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c;
+ union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if ((u.i & 0x3) == 0) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]<<8" actually reads beyond the end of the string, but
+ * then shifts out the part it's not allowed to read. Because the
+ * string is aligned, the illegal read is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But VALGRIND will
+ * still catch it and complain. The masking trick does make the hash
+ * noticably faster for short strings (like English words).
+ */
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff00; break;
+ case 2 : a+=k[0]&0xffff0000; break;
+ case 1 : a+=k[0]&0xff000000; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += ((uint32_t)k[0])<<24;
+ a += ((uint32_t)k[1])<<16;
+ a += ((uint32_t)k[2])<<8;
+ a += ((uint32_t)k[3]);
+ b += ((uint32_t)k[4])<<24;
+ b += ((uint32_t)k[5])<<16;
+ b += ((uint32_t)k[6])<<8;
+ b += ((uint32_t)k[7]);
+ c += ((uint32_t)k[8])<<24;
+ c += ((uint32_t)k[9])<<16;
+ c += ((uint32_t)k[10])<<8;
+ c += ((uint32_t)k[11]);
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=k[11];
+ case 11: c+=((uint32_t)k[10])<<8;
+ case 10: c+=((uint32_t)k[9])<<16;
+ case 9 : c+=((uint32_t)k[8])<<24;
+ case 8 : b+=k[7];
+ case 7 : b+=((uint32_t)k[6])<<8;
+ case 6 : b+=((uint32_t)k[5])<<16;
+ case 5 : b+=((uint32_t)k[4])<<24;
+ case 4 : a+=k[3];
+ case 3 : a+=((uint32_t)k[2])<<8;
+ case 2 : a+=((uint32_t)k[1])<<16;
+ case 1 : a+=((uint32_t)k[0])<<24;
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+#endif
diff --git a/freebsd/sys/libkern/murmur3_32.c b/freebsd/sys/libkern/murmur3_32.c
new file mode 100644
index 00000000..63ed07a8
--- /dev/null
+++ b/freebsd/sys/libkern/murmur3_32.c
@@ -0,0 +1,134 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 Dag-Erling Smørgrav
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/hash.h>
+#include <sys/endian.h>
+#include <sys/stdint.h>
+#include <sys/types.h>
+
+#define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
+
+/*
+ * Simple implementation of the Murmur3-32 hash function.
+ *
+ * This implementation is slow but safe. It can be made significantly
+ * faster if the caller guarantees that the input is correctly aligned for
+ * 32-bit reads, and slightly faster yet if the caller guarantees that the
+ * length of the input is always a multiple of 4 bytes.
+ */
+uint32_t
+murmur3_32_hash(const void *data, size_t len, uint32_t seed)
+{
+ const uint8_t *bytes;
+ uint32_t hash, k;
+ size_t res;
+
+ /* initialization */
+ bytes = data;
+ res = len;
+ hash = seed;
+
+ /* main loop */
+ while (res >= 4) {
+ /* replace with le32toh() if input is aligned */
+ k = le32dec(bytes);
+ bytes += 4;
+ res -= 4;
+ k *= 0xcc9e2d51;
+ k = rol32(k, 15);
+ k *= 0x1b873593;
+ hash ^= k;
+ hash = rol32(hash, 13);
+ hash *= 5;
+ hash += 0xe6546b64;
+ }
+
+ /* remainder */
+ /* remove if input length is a multiple of 4 */
+ if (res > 0) {
+ k = 0;
+ switch (res) {
+ case 3:
+ k |= bytes[2] << 16;
+ case 2:
+ k |= bytes[1] << 8;
+ case 1:
+ k |= bytes[0];
+ k *= 0xcc9e2d51;
+ k = rol32(k, 15);
+ k *= 0x1b873593;
+ hash ^= k;
+ break;
+ }
+ }
+
+ /* finalize */
+ hash ^= (uint32_t)len;
+ hash ^= hash >> 16;
+ hash *= 0x85ebca6b;
+ hash ^= hash >> 13;
+ hash *= 0xc2b2ae35;
+ hash ^= hash >> 16;
+ return (hash);
+}
+
+/*
+ * Simplified version of the above optimized for aligned sequences of
+ * 32-bit words. The count argument is the number of words, not the
+ * length in bytes.
+ */
+uint32_t
+murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
+{
+ uint32_t hash, k;
+ size_t res;
+
+ /* iterate */
+ for (res = count, hash = seed; res > 0; res--, data++) {
+ k = le32toh(*data);
+ k *= 0xcc9e2d51;
+ k = rol32(k, 15);
+ k *= 0x1b873593;
+ hash ^= k;
+ hash = rol32(hash, 13);
+ hash *= 5;
+ hash += 0xe6546b64;
+ }
+
+ /* finalize */
+ hash ^= (uint32_t)count;
+ hash ^= hash >> 16;
+ hash *= 0x85ebca6b;
+ hash ^= hash >> 13;
+ hash *= 0xc2b2ae35;
+ hash ^= hash >> 16;
+ return (hash);
+}
+
diff --git a/freebsd/sys/libkern/random.c b/freebsd/sys/libkern/random.c
index 6d6755a3..5b780670 100644
--- a/freebsd/sys/libkern/random.c
+++ b/freebsd/sys/libkern/random.c
@@ -52,7 +52,7 @@ srandom(seed)
}
/*
- * Pseudo-random number generator for randomizing the profiling clock,
+ * Pseudo-random number generator for perturbing the profiling clock,
* and whatever else we might use it for. The result is uniform on
* [0, 2^31 - 1].
*/
diff --git a/freebsd/sys/lm32/include/machine/in_cksum.h b/freebsd/sys/lm32/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/lm32/include/machine/in_cksum.h
+++ b/freebsd/sys/lm32/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/m32c/include/machine/in_cksum.h b/freebsd/sys/m32c/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/m32c/include/machine/in_cksum.h
+++ b/freebsd/sys/m32c/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/m32r/include/machine/in_cksum.h b/freebsd/sys/m32r/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/m32r/include/machine/in_cksum.h
+++ b/freebsd/sys/m32r/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/m68k/include/machine/in_cksum.h b/freebsd/sys/m68k/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/m68k/include/machine/in_cksum.h
+++ b/freebsd/sys/m68k/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/mips/include/machine/cpufunc.h b/freebsd/sys/mips/include/machine/cpufunc.h
index 7945dd38..427aba74 100644
--- a/freebsd/sys/mips/include/machine/cpufunc.h
+++ b/freebsd/sys/mips/include/machine/cpufunc.h
@@ -69,7 +69,7 @@
static __inline void
mips_barrier(void)
{
-#ifdef CPU_CNMIPS
+#if defined(CPU_CNMIPS) || defined(CPU_RMI) || defined(CPU_NLM)
__compiler_membar();
#else
__asm __volatile (".set noreorder\n\t"
@@ -106,18 +106,6 @@ mips_wbflush(void)
#endif
}
-static __inline void
-mips_read_membar(void)
-{
- /* Nil */
-}
-
-static __inline void
-mips_write_membar(void)
-{
- mips_wbflush();
-}
-
#ifdef _KERNEL
/*
* XXX
@@ -171,6 +159,7 @@ mips_wr_ ## n(uint64_t a0) \
MIPS_RW64_COP0(excpc, MIPS_COP_0_EXC_PC);
MIPS_RW64_COP0(entryhi, MIPS_COP_0_TLB_HI);
MIPS_RW64_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK);
+MIPS_RW64_COP0_SEL(userlocal, MIPS_COP_0_USERLOCAL, 2);
#ifdef CPU_CNMIPS
MIPS_RW64_COP0_SEL(cvmcount, MIPS_COP_0_COUNT, 6);
MIPS_RW64_COP0_SEL(cvmctl, MIPS_COP_0_COUNT, 7);
@@ -254,8 +243,13 @@ MIPS_RW32_COP0_SEL(config3, MIPS_COP_0_CONFIG, 3);
#ifdef CPU_CNMIPS
MIPS_RW32_COP0_SEL(config4, MIPS_COP_0_CONFIG, 4);
#endif
-#ifdef CPU_NLM
+#ifdef BERI_LARGE_TLB
+MIPS_RW32_COP0_SEL(config5, MIPS_COP_0_CONFIG, 5);
+#endif
+#if defined(CPU_NLM) || defined(BERI_LARGE_TLB)
MIPS_RW32_COP0_SEL(config6, MIPS_COP_0_CONFIG, 6);
+#endif
+#if defined(CPU_NLM) || defined(CPU_MIPS1004K)
MIPS_RW32_COP0_SEL(config7, MIPS_COP_0_CONFIG, 7);
#endif
MIPS_RW32_COP0(count, MIPS_COP_0_COUNT);
@@ -266,11 +260,16 @@ MIPS_RW32_COP0(cause, MIPS_COP_0_CAUSE);
MIPS_RW32_COP0(excpc, MIPS_COP_0_EXC_PC);
#endif
MIPS_RW32_COP0(status, MIPS_COP_0_STATUS);
+MIPS_RW32_COP0_SEL(cmgcrbase, 15, 3);
/* XXX: Some of these registers are specific to MIPS32. */
#if !defined(__mips_n64)
MIPS_RW32_COP0(entryhi, MIPS_COP_0_TLB_HI);
MIPS_RW32_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK);
+MIPS_RW32_COP0_SEL(userlocal, MIPS_COP_0_USERLOCAL, 2);
+#endif
+#ifdef CPU_NLM
+MIPS_RW32_COP0_SEL(pagegrain, MIPS_COP_0_TLB_PG_MASK, 1);
#endif
#if !defined(__mips_n64) && !defined(__mips_n32) /* !PHYSADDR_64_BIT */
MIPS_RW32_COP0(entrylo0, MIPS_COP_0_TLB_LO0);
@@ -292,6 +291,7 @@ MIPS_RW32_COP0_SEL(perfcnt0, MIPS_COP_0_PERFCNT, 0);
MIPS_RW32_COP0_SEL(perfcnt1, MIPS_COP_0_PERFCNT, 1);
MIPS_RW32_COP0_SEL(perfcnt2, MIPS_COP_0_PERFCNT, 2);
MIPS_RW32_COP0_SEL(perfcnt3, MIPS_COP_0_PERFCNT, 3);
+MIPS_RW32_COP0(hwrena, MIPS_COP_0_HWRENA);
#undef MIPS_RW32_COP0
#undef MIPS_RW32_COP0_SEL
@@ -351,29 +351,8 @@ breakpoint(void)
}
#if defined(__GNUC__) && !defined(__mips_o32)
-static inline uint64_t
-mips3_ld(const volatile uint64_t *va)
-{
- uint64_t rv;
-
-#if defined(_LP64)
- rv = *va;
-#else
- __asm volatile("ld %0,0(%1)" : "=d"(rv) : "r"(va));
-#endif
-
- return (rv);
-}
-
-static inline void
-mips3_sd(volatile uint64_t *va, uint64_t v)
-{
-#if defined(_LP64)
- *va = v;
-#else
- __asm volatile("sd %0,0(%1)" :: "r"(v), "r"(va));
-#endif
-}
+#define mips3_ld(a) (*(const volatile uint64_t *)(a))
+#define mips3_sd(a, v) (*(volatile uint64_t *)(a) = (v))
#else
uint64_t mips3_ld(volatile uint64_t *va);
void mips3_sd(volatile uint64_t *, uint64_t);
@@ -384,87 +363,15 @@ void mips3_sd(volatile uint64_t *, uint64_t);
#define readb(va) (*(volatile uint8_t *) (va))
#define readw(va) (*(volatile uint16_t *) (va))
#define readl(va) (*(volatile uint32_t *) (va))
+#if defined(__GNUC__) && !defined(__mips_o32)
+#define readq(a) (*(volatile uint64_t *)(a))
+#endif
#define writeb(va, d) (*(volatile uint8_t *) (va) = (d))
#define writew(va, d) (*(volatile uint16_t *) (va) = (d))
#define writel(va, d) (*(volatile uint32_t *) (va) = (d))
-
-/*
- * I/O macros.
- */
-
-#define outb(a,v) (*(volatile unsigned char*)(a) = (v))
-#define out8(a,v) (*(volatile unsigned char*)(a) = (v))
-#define outw(a,v) (*(volatile unsigned short*)(a) = (v))
-#define out16(a,v) outw(a,v)
-#define outl(a,v) (*(volatile unsigned int*)(a) = (v))
-#define out32(a,v) outl(a,v)
-#define inb(a) (*(volatile unsigned char*)(a))
-#define in8(a) (*(volatile unsigned char*)(a))
-#define inw(a) (*(volatile unsigned short*)(a))
-#define in16(a) inw(a)
-#define inl(a) (*(volatile unsigned int*)(a))
-#define in32(a) inl(a)
-
-#define out8rb(a,v) (*(volatile unsigned char*)(a) = (v))
-#define out16rb(a,v) (__out16rb((volatile uint16_t *)(a), v))
-#define out32rb(a,v) (__out32rb((volatile uint32_t *)(a), v))
-#define in8rb(a) (*(volatile unsigned char*)(a))
-#define in16rb(a) (__in16rb((volatile uint16_t *)(a)))
-#define in32rb(a) (__in32rb((volatile uint32_t *)(a)))
-
-#define _swap_(x) (((x) >> 24) | ((x) << 24) | \
- (((x) >> 8) & 0xff00) | (((x) & 0xff00) << 8))
-
-static __inline void __out32rb(volatile uint32_t *, uint32_t);
-static __inline void __out16rb(volatile uint16_t *, uint16_t);
-static __inline uint32_t __in32rb(volatile uint32_t *);
-static __inline uint16_t __in16rb(volatile uint16_t *);
-
-static __inline void
-__out32rb(volatile uint32_t *a, uint32_t v)
-{
- uint32_t _v_ = v;
-
- _v_ = _swap_(_v_);
- out32(a, _v_);
-}
-
-static __inline void
-__out16rb(volatile uint16_t *a, uint16_t v)
-{
- uint16_t _v_;
-
- _v_ = ((v >> 8) & 0xff) | (v << 8);
- out16(a, _v_);
-}
-
-static __inline uint32_t
-__in32rb(volatile uint32_t *a)
-{
- uint32_t _v_;
-
- _v_ = in32(a);
- _v_ = _swap_(_v_);
- return _v_;
-}
-
-static __inline uint16_t
-__in16rb(volatile uint16_t *a)
-{
- uint16_t _v_;
-
- _v_ = in16(a);
- _v_ = ((_v_ >> 8) & 0xff) | (_v_ << 8);
- return _v_;
-}
-
-void insb(uint8_t *, uint8_t *,int);
-void insw(uint16_t *, uint16_t *,int);
-void insl(uint32_t *, uint32_t *,int);
-void outsb(uint8_t *, const uint8_t *,int);
-void outsw(uint16_t *, const uint16_t *,int);
-void outsl(uint32_t *, const uint32_t *,int);
-u_int loadandclear(volatile u_int *addr);
+#if defined(__GNUC__) && !defined(__mips_o32)
+#define writeq(va, d) (*(volatile uint64_t *) (va) = (d))
+#endif
#endif /* !_MACHINE_CPUFUNC_H_ */
diff --git a/freebsd/sys/mips/include/machine/cpuregs.h b/freebsd/sys/mips/include/machine/cpuregs.h
index 01d710d2..4ce7e1b4 100644
--- a/freebsd/sys/mips/include/machine/cpuregs.h
+++ b/freebsd/sys/mips/include/machine/cpuregs.h
@@ -110,6 +110,7 @@
* C: Cacheable, coherency unspecified.
* CNC: Cacheable non-coherent.
* CC: Cacheable coherent.
+ * CCS: Cacheable coherent, shared read.
* CCE: Cacheable coherent, exclusive read.
* CCEW: Cacheable coherent, exclusive write.
* CCUOW: Cacheable coherent, update on write.
@@ -149,6 +150,27 @@
#define MIPS_CCA_CC 0x05 /* Cacheable Coherent. */
#endif
+#if defined(CPU_MIPS74K)
+#define MIPS_CCA_UNCACHED 0x02
+#define MIPS_CCA_CACHED 0x03
+#endif
+
+/*
+ * 1004K and 1074K cores, as well as interAptiv and proAptiv cores, support
+ * Cacheable Coherent CCAs 0x04 and 0x05, as well as Cacheable non-Coherent
+ * CCA 0x03 and Uncached Accelerated CCA 0x07
+ */
+#if defined(CPU_MIPS1004K) || defined(CPU_MIPS1074K) || \
+ defined(CPU_INTERAPTIV) || defined(CPU_PROAPTIV)
+#define MIPS_CCA_CNC 0x03
+#define MIPS_CCA_CCE 0x04
+#define MIPS_CCA_CCS 0x05
+#define MIPS_CCA_UA 0x07
+
+/* We use shared read CCA for CACHED CCA */
+#define MIPS_CCA_CACHED MIPS_CCA_CCS
+#endif
+
#ifndef MIPS_CCA_UNCACHED
#define MIPS_CCA_UNCACHED MIPS_CCA_UC
#endif
@@ -200,14 +222,28 @@
/* CPU dependent mtc0 hazard hook */
#if defined(CPU_CNMIPS) || defined(CPU_RMI)
#define COP0_SYNC
+#elif defined(CPU_NLM)
+#define COP0_SYNC .word 0xc0 /* ehb */
#elif defined(CPU_SB1)
#define COP0_SYNC ssnop; ssnop; ssnop; ssnop; ssnop; ssnop; ssnop; ssnop; ssnop
+#elif defined(CPU_MIPS24K) || defined(CPU_MIPS34K) || \
+ defined(CPU_MIPS74K) || defined(CPU_MIPS1004K) || \
+ defined(CPU_MIPS1074K) || defined(CPU_INTERAPTIV) || \
+ defined(CPU_PROAPTIV)
+/*
+ * According to MIPS32tm Architecture for Programmers, Vol.II, rev. 2.00:
+ * "As EHB becomes standard in MIPS implementations, the previous SSNOPs can be
+ * removed, leaving only the EHB".
+ * Also, all MIPS32 Release 2 implementations have the EHB instruction, which
+ * resolves all execution hazards. The same goes for MIPS32 Release 3.
+ */
+#define COP0_SYNC .word 0xc0 /* ehb */
#else
/*
* Pick a reasonable default based on the "typical" spacing described in the
* "CP0 Hazards" chapter of MIPS Architecture Book Vol III.
*/
-#define COP0_SYNC ssnop; ssnop; ssnop; ssnop; ssnop
+#define COP0_SYNC ssnop; ssnop; ssnop; ssnop; .word 0xc0;
#endif
#define COP0_HAZARD_FPUENABLE nop; nop; nop; nop;
@@ -227,10 +263,10 @@
*/
#define MIPS_CR_BR_DELAY 0x80000000
#define MIPS_CR_COP_ERR 0x30000000
-#define MIPS1_CR_EXC_CODE 0x0000003C /* four bits */
-#define MIPS3_CR_EXC_CODE 0x0000007C /* five bits */
+#define MIPS_CR_EXC_CODE 0x0000007C /* five bits */
#define MIPS_CR_IP 0x0000FF00
#define MIPS_CR_EXC_CODE_SHIFT 2
+#define MIPS_CR_COP_ERR_SHIFT 28
/*
* The bits in the status register. All bits are active when set to 1.
@@ -265,94 +301,31 @@
#define MIPS_SR_INT_MASK 0x0000ff00
/*
- * The R2000/R3000-specific status register bit definitions.
- * all bits are active when set to 1.
- *
- * MIPS_SR_PARITY_ERR Parity error.
- * MIPS_SR_CACHE_MISS Most recent D-cache load resulted in a miss.
- * MIPS_SR_PARITY_ZERO Zero replaces outgoing parity bits.
- * MIPS_SR_SWAP_CACHES Swap I-cache and D-cache.
- * MIPS_SR_ISOL_CACHES Isolate D-cache from main memory.
- * Interrupt enable bits defined below.
- * MIPS_SR_KU_OLD Old kernel/user mode bit. 1 => user mode.
- * MIPS_SR_INT_ENA_OLD Old interrupt enable bit.
- * MIPS_SR_KU_PREV Previous kernel/user mode bit. 1 => user mode.
- * MIPS_SR_INT_ENA_PREV Previous interrupt enable bit.
- * MIPS_SR_KU_CUR Current kernel/user mode bit. 1 => user mode.
- */
-
-#define MIPS1_PARITY_ERR 0x00100000
-#define MIPS1_CACHE_MISS 0x00080000
-#define MIPS1_PARITY_ZERO 0x00040000
-#define MIPS1_SWAP_CACHES 0x00020000
-#define MIPS1_ISOL_CACHES 0x00010000
-
-#define MIPS1_SR_KU_OLD 0x00000020 /* 2nd stacked KU/IE*/
-#define MIPS1_SR_INT_ENA_OLD 0x00000010 /* 2nd stacked KU/IE*/
-#define MIPS1_SR_KU_PREV 0x00000008 /* 1st stacked KU/IE*/
-#define MIPS1_SR_INT_ENA_PREV 0x00000004 /* 1st stacked KU/IE*/
-#define MIPS1_SR_KU_CUR 0x00000002 /* current KU */
-
-/* backwards compatibility */
-#define MIPS_SR_PARITY_ERR MIPS1_PARITY_ERR
-#define MIPS_SR_CACHE_MISS MIPS1_CACHE_MISS
-#define MIPS_SR_PARITY_ZERO MIPS1_PARITY_ZERO
-#define MIPS_SR_SWAP_CACHES MIPS1_SWAP_CACHES
-#define MIPS_SR_ISOL_CACHES MIPS1_ISOL_CACHES
-
-#define MIPS_SR_KU_OLD MIPS1_SR_KU_OLD
-#define MIPS_SR_INT_ENA_OLD MIPS1_SR_INT_ENA_OLD
-#define MIPS_SR_KU_PREV MIPS1_SR_KU_PREV
-#define MIPS_SR_KU_CUR MIPS1_SR_KU_CUR
-#define MIPS_SR_INT_ENA_PREV MIPS1_SR_INT_ENA_PREV
-
-/*
* R4000 status register bit definitons,
* where different from r2000/r3000.
*/
-#define MIPS3_SR_XX 0x80000000
-#define MIPS3_SR_RP 0x08000000
-#define MIPS3_SR_FR 0x04000000
-#define MIPS3_SR_RE 0x02000000
-
-#define MIPS3_SR_DIAG_DL 0x01000000 /* QED 52xx */
-#define MIPS3_SR_DIAG_IL 0x00800000 /* QED 52xx */
-#define MIPS3_SR_SR 0x00100000
-#define MIPS3_SR_NMI 0x00080000 /* MIPS32/64 */
-#define MIPS3_SR_DIAG_CH 0x00040000
-#define MIPS3_SR_DIAG_CE 0x00020000
-#define MIPS3_SR_DIAG_PE 0x00010000
-#define MIPS3_SR_EIE 0x00010000 /* TX79/R5900 */
-#define MIPS3_SR_KX 0x00000080
-#define MIPS3_SR_SX 0x00000040
-#define MIPS3_SR_UX 0x00000020
-#define MIPS3_SR_KSU_MASK 0x00000018
-#define MIPS3_SR_KSU_USER 0x00000010
-#define MIPS3_SR_KSU_SUPER 0x00000008
-#define MIPS3_SR_KSU_KERNEL 0x00000000
-#define MIPS3_SR_ERL 0x00000004
-#define MIPS3_SR_EXL 0x00000002
-
-#ifdef MIPS3_5900
-#undef MIPS_SR_INT_IE
-#define MIPS_SR_INT_IE 0x00010001 /* XXX */
-#endif
-
-#define MIPS_SR_SOFT_RESET MIPS3_SR_SR
-#define MIPS_SR_DIAG_CH MIPS3_SR_DIAG_CH
-#define MIPS_SR_DIAG_CE MIPS3_SR_DIAG_CE
-#define MIPS_SR_DIAG_PE MIPS3_SR_DIAG_PE
-#define MIPS_SR_KX MIPS3_SR_KX
-#define MIPS_SR_SX MIPS3_SR_SX
-#define MIPS_SR_UX MIPS3_SR_UX
-
-#define MIPS_SR_KSU_MASK MIPS3_SR_KSU_MASK
-#define MIPS_SR_KSU_USER MIPS3_SR_KSU_USER
-#define MIPS_SR_KSU_SUPER MIPS3_SR_KSU_SUPER
-#define MIPS_SR_KSU_KERNEL MIPS3_SR_KSU_KERNEL
-#define MIPS_SR_ERL MIPS3_SR_ERL
-#define MIPS_SR_EXL MIPS3_SR_EXL
-
+#define MIPS_SR_XX 0x80000000
+#define MIPS_SR_RP 0x08000000
+#define MIPS_SR_FR 0x04000000
+#define MIPS_SR_RE 0x02000000
+
+#define MIPS_SR_DIAG_DL 0x01000000 /* QED 52xx */
+#define MIPS_SR_DIAG_IL 0x00800000 /* QED 52xx */
+#define MIPS_SR_SR 0x00100000
+#define MIPS_SR_NMI 0x00080000 /* MIPS32/64 */
+#define MIPS_SR_DIAG_CH 0x00040000
+#define MIPS_SR_DIAG_CE 0x00020000
+#define MIPS_SR_DIAG_PE 0x00010000
+#define MIPS_SR_EIE 0x00010000 /* TX79/R5900 */
+#define MIPS_SR_KX 0x00000080
+#define MIPS_SR_SX 0x00000040
+#define MIPS_SR_UX 0x00000020
+#define MIPS_SR_KSU_MASK 0x00000018
+#define MIPS_SR_KSU_USER 0x00000010
+#define MIPS_SR_KSU_SUPER 0x00000008
+#define MIPS_SR_KSU_KERNEL 0x00000000
+#define MIPS_SR_ERL 0x00000004
+#define MIPS_SR_EXL 0x00000002
/*
* The interrupt masks.
@@ -370,149 +343,87 @@
#define MIPS_SOFT_INT_MASK_0 0x0100
/*
- * mips3 CPUs have on-chip timer at INT_MASK_5. Each platform can
- * choose to enable this interrupt.
- */
-#if defined(MIPS3_ENABLE_CLOCK_INTR)
-#define MIPS3_INT_MASK MIPS_INT_MASK
-#define MIPS3_HARD_INT_MASK MIPS_HARD_INT_MASK
-#else
-#define MIPS3_INT_MASK (MIPS_INT_MASK & ~MIPS_INT_MASK_5)
-#define MIPS3_HARD_INT_MASK (MIPS_HARD_INT_MASK & ~MIPS_INT_MASK_5)
-#endif
-
-/*
- * The bits in the context register.
- */
-#define MIPS1_CNTXT_PTE_BASE 0xFFE00000
-#define MIPS1_CNTXT_BAD_VPN 0x001FFFFC
-
-#define MIPS3_CNTXT_PTE_BASE 0xFF800000
-#define MIPS3_CNTXT_BAD_VPN2 0x007FFFF0
-
-/*
- * Location of MIPS32 exception vectors. Most are multiplexed in
- * the sense that further decoding is necessary (e.g. reading the
- * CAUSE register or NMI bits in STATUS).
- * Most interrupts go via the
- * The INT vector is dedicated for hardware interrupts; it is
- * only referenced if the IV bit in CAUSE is set to 1.
- */
-#define MIPS_VEC_RESET 0xBFC00000 /* Hard, soft, or NMI */
-#define MIPS_VEC_EJTAG 0xBFC00480
-#define MIPS_VEC_TLB 0x80000000
-#define MIPS_VEC_XTLB 0x80000080
-#define MIPS_VEC_CACHE 0x80000100
-#define MIPS_VEC_GENERIC 0x80000180 /* Most exceptions */
-#define MIPS_VEC_INTERRUPT 0x80000200
-
-/*
* The bits in the MIPS3 config register.
*
* bit 0..5: R/W, Bit 6..31: R/O
*/
/* kseg0 coherency algorithm - see MIPS3_TLB_ATTR values */
-#define MIPS3_CONFIG_K0_MASK 0x00000007
+#define MIPS_CONFIG_K0_MASK 0x00000007
/*
* R/W Update on Store Conditional
* 0: Store Conditional uses coherency algorithm specified by TLB
* 1: Store Conditional uses cacheable coherent update on write
*/
-#define MIPS3_CONFIG_CU 0x00000008
+#define MIPS_CONFIG_CU 0x00000008
-#define MIPS3_CONFIG_DB 0x00000010 /* Primary D-cache line size */
-#define MIPS3_CONFIG_IB 0x00000020 /* Primary I-cache line size */
-#define MIPS3_CONFIG_CACHE_L1_LSIZE(config, bit) \
+#define MIPS_CONFIG_DB 0x00000010 /* Primary D-cache line size */
+#define MIPS_CONFIG_IB 0x00000020 /* Primary I-cache line size */
+#define MIPS_CONFIG_CACHE_L1_LSIZE(config, bit) \
(((config) & (bit)) ? 32 : 16)
-#define MIPS3_CONFIG_DC_MASK 0x000001c0 /* Primary D-cache size */
-#define MIPS3_CONFIG_DC_SHIFT 6
-#define MIPS3_CONFIG_IC_MASK 0x00000e00 /* Primary I-cache size */
-#define MIPS3_CONFIG_IC_SHIFT 9
-#define MIPS3_CONFIG_C_DEFBASE 0x1000 /* default base 2^12 */
+#define MIPS_CONFIG_DC_MASK 0x000001c0 /* Primary D-cache size */
+#define MIPS_CONFIG_DC_SHIFT 6
+#define MIPS_CONFIG_IC_MASK 0x00000e00 /* Primary I-cache size */
+#define MIPS_CONFIG_IC_SHIFT 9
+#define MIPS_CONFIG_C_DEFBASE 0x1000 /* default base 2^12 */
/* Cache size mode indication: available only on Vr41xx CPUs */
-#define MIPS3_CONFIG_CS 0x00001000
-#define MIPS3_CONFIG_C_4100BASE 0x0400 /* base is 2^10 if CS=1 */
-#define MIPS3_CONFIG_CACHE_SIZE(config, mask, base, shift) \
+#define MIPS_CONFIG_CS 0x00001000
+#define MIPS_CONFIG_C_4100BASE 0x0400 /* base is 2^10 if CS=1 */
+#define MIPS_CONFIG_CACHE_SIZE(config, mask, base, shift) \
((base) << (((config) & (mask)) >> (shift)))
/* External cache enable: Controls L2 for R5000/Rm527x and L3 for Rm7000 */
-#define MIPS3_CONFIG_SE 0x00001000
+#define MIPS_CONFIG_SE 0x00001000
/* Block ordering: 0: sequential, 1: sub-block */
-#define MIPS3_CONFIG_EB 0x00002000
+#define MIPS_CONFIG_EB 0x00002000
/* ECC mode - 0: ECC mode, 1: parity mode */
-#define MIPS3_CONFIG_EM 0x00004000
+#define MIPS_CONFIG_EM 0x00004000
/* BigEndianMem - 0: kernel and memory are little endian, 1: big endian */
-#define MIPS3_CONFIG_BE 0x00008000
+#define MIPS_CONFIG_BE 0x00008000
/* Dirty Shared coherency state - 0: enabled, 1: disabled */
-#define MIPS3_CONFIG_SM 0x00010000
+#define MIPS_CONFIG_SM 0x00010000
/* Secondary Cache - 0: present, 1: not present */
-#define MIPS3_CONFIG_SC 0x00020000
+#define MIPS_CONFIG_SC 0x00020000
/* System Port width - 0: 64-bit, 1: 32-bit (QED RM523x), 2,3: reserved */
-#define MIPS3_CONFIG_EW_MASK 0x000c0000
-#define MIPS3_CONFIG_EW_SHIFT 18
+#define MIPS_CONFIG_EW_MASK 0x000c0000
+#define MIPS_CONFIG_EW_SHIFT 18
/* Secondary Cache port width - 0: 128-bit data path to S-cache, 1: reserved */
-#define MIPS3_CONFIG_SW 0x00100000
+#define MIPS_CONFIG_SW 0x00100000
/* Split Secondary Cache Mode - 0: I/D mixed, 1: I/D separated by SCAddr(17) */
-#define MIPS3_CONFIG_SS 0x00200000
+#define MIPS_CONFIG_SS 0x00200000
/* Secondary Cache line size */
-#define MIPS3_CONFIG_SB_MASK 0x00c00000
-#define MIPS3_CONFIG_SB_SHIFT 22
-#define MIPS3_CONFIG_CACHE_L2_LSIZE(config) \
- (0x10 << (((config) & MIPS3_CONFIG_SB_MASK) >> MIPS3_CONFIG_SB_SHIFT))
+#define MIPS_CONFIG_SB_MASK 0x00c00000
+#define MIPS_CONFIG_SB_SHIFT 22
+#define MIPS_CONFIG_CACHE_L2_LSIZE(config) \
+ (0x10 << (((config) & MIPS_CONFIG_SB_MASK) >> MIPS_CONFIG_SB_SHIFT))
/* Write back data rate */
-#define MIPS3_CONFIG_EP_MASK 0x0f000000
-#define MIPS3_CONFIG_EP_SHIFT 24
+#define MIPS_CONFIG_EP_MASK 0x0f000000
+#define MIPS_CONFIG_EP_SHIFT 24
/* System clock ratio - this value is CPU dependent */
-#define MIPS3_CONFIG_EC_MASK 0x70000000
-#define MIPS3_CONFIG_EC_SHIFT 28
+#define MIPS_CONFIG_EC_MASK 0x70000000
+#define MIPS_CONFIG_EC_SHIFT 28
/* Master-Checker Mode - 1: enabled */
-#define MIPS3_CONFIG_CM 0x80000000
+#define MIPS_CONFIG_CM 0x80000000
/*
* The bits in the MIPS4 config register.
*/
-/* kseg0 coherency algorithm - see MIPS3_TLB_ATTR values */
-#define MIPS4_CONFIG_K0_MASK MIPS3_CONFIG_K0_MASK
-#define MIPS4_CONFIG_DN_MASK 0x00000018 /* Device number */
-#define MIPS4_CONFIG_CT 0x00000020 /* CohPrcReqTar */
-#define MIPS4_CONFIG_PE 0x00000040 /* PreElmReq */
-#define MIPS4_CONFIG_PM_MASK 0x00000180 /* PreReqMax */
-#define MIPS4_CONFIG_EC_MASK 0x00001e00 /* SysClkDiv */
-#define MIPS4_CONFIG_SB 0x00002000 /* SCBlkSize */
-#define MIPS4_CONFIG_SK 0x00004000 /* SCColEn */
-#define MIPS4_CONFIG_BE 0x00008000 /* MemEnd */
-#define MIPS4_CONFIG_SS_MASK 0x00070000 /* SCSize */
-#define MIPS4_CONFIG_SC_MASK 0x00380000 /* SCClkDiv */
-#define MIPS4_CONFIG_RESERVED 0x03c00000 /* Reserved wired 0 */
-#define MIPS4_CONFIG_DC_MASK 0x1c000000 /* Primary D-Cache size */
-#define MIPS4_CONFIG_IC_MASK 0xe0000000 /* Primary I-Cache size */
-
-#define MIPS4_CONFIG_DC_SHIFT 26
-#define MIPS4_CONFIG_IC_SHIFT 29
-
-#define MIPS4_CONFIG_CACHE_SIZE(config, mask, base, shift) \
- ((base) << (((config) & (mask)) >> (shift)))
-
-#define MIPS4_CONFIG_CACHE_L2_LSIZE(config) \
- (((config) & MIPS4_CONFIG_SB) ? 128 : 64)
-
/*
* Location of exception vectors.
*
@@ -522,27 +433,16 @@
#define MIPS_UTLB_MISS_EXC_VEC ((intptr_t)(int32_t)0x80000000)
/*
- * MIPS-1 general exception vector (everything else)
- */
-#define MIPS1_GEN_EXC_VEC ((intptr_t)(int32_t)0x80000080)
-
-/*
* MIPS-III exception vectors
*/
-#define MIPS3_XTLB_MISS_EXC_VEC ((intptr_t)(int32_t)0x80000080)
-#define MIPS3_CACHE_ERR_EXC_VEC ((intptr_t)(int32_t)0x80000100)
-#define MIPS3_GEN_EXC_VEC ((intptr_t)(int32_t)0x80000180)
-
-/*
- * TX79 (R5900) exception vectors
- */
-#define MIPS_R5900_COUNTER_EXC_VEC 0x80000080
-#define MIPS_R5900_DEBUG_EXC_VEC 0x80000100
+#define MIPS_XTLB_MISS_EXC_VEC ((intptr_t)(int32_t)0x80000080)
+#define MIPS_CACHE_ERR_EXC_VEC ((intptr_t)(int32_t)0x80000100)
+#define MIPS_GEN_EXC_VEC ((intptr_t)(int32_t)0x80000180)
/*
* MIPS32/MIPS64 (and some MIPS3) dedicated interrupt vector.
*/
-#define MIPS3_INTR_EXC_VEC 0x80000200
+#define MIPS_INTR_EXC_VEC 0x80000200
/*
* Coprocessor 0 registers:
@@ -554,9 +454,10 @@
* 2 MIPS_COP_0_TLB_LO0 .636 r4k TLB entry low.
* 3 MIPS_COP_0_TLB_LO1 .636 r4k TLB entry low, extended.
* 4 MIPS_COP_0_TLB_CONTEXT 3636 TLB Context.
+ * 4/2 MIPS_COP_0_USERLOCAL ..36 UserLocal.
* 5 MIPS_COP_0_TLB_PG_MASK .333 TLB Page Mask register.
* 6 MIPS_COP_0_TLB_WIRED .333 Wired TLB number.
- * 7 MIPS_COP_0_INFO ..33 Info registers
+ * 7 MIPS_COP_0_HWRENA ..33 rdHWR Enable.
* 8 MIPS_COP_0_BAD_VADDR 3636 Bad virtual address.
* 9 MIPS_COP_0_COUNT .333 Count register.
* 10 MIPS_COP_0_TLB_HI 3636 TLB entry high.
@@ -634,7 +535,8 @@
#define MIPS_COP_0_ERROR_PC _(30)
/* MIPS32/64 */
-#define MIPS_COP_0_INFO _(7)
+#define MIPS_COP_0_USERLOCAL _(4) /* sel 2 is userlevel register */
+#define MIPS_COP_0_HWRENA _(7)
#define MIPS_COP_0_DEBUG _(23)
#define MIPS_COP_0_DEPC _(24)
#define MIPS_COP_0_PERFCNT _(25)
@@ -648,14 +550,23 @@
#define MIPS_MMU_BAT 0x02 /* Standard BAT */
#define MIPS_MMU_FIXED 0x03 /* Standard fixed mapping */
-#define MIPS_CONFIG0_MT_MASK 0x00000380 /* bits 9..7 MMU Type */
-#define MIPS_CONFIG0_MT_SHIFT 7
-#define MIPS_CONFIG0_BE 0x00008000 /* data is big-endian */
-#define MIPS_CONFIG0_VI 0x00000004 /* instruction cache is virtual */
-
+/*
+ * Config Register Fields
+ * (See "MIPS Architecture for Programmers Volume III", MD00091, Table 9.39)
+ */
+#define MIPS_CONFIG0_M 0x80000000 /* Flag: Config1 is present. */
+#define MIPS_CONFIG0_MT_MASK 0x00000380 /* bits 9..7 MMU Type */
+#define MIPS_CONFIG0_MT_SHIFT 7
+#define MIPS_CONFIG0_BE 0x00008000 /* data is big-endian */
+#define MIPS_CONFIG0_VI 0x00000008 /* inst cache is virtual */
+
+/*
+ * Config1 Register Fields
+ * (See "MIPS Architecture for Programmers Volume III", MD00091, Table 9-1)
+ */
+#define MIPS_CONFIG1_M 0x80000000 /* Flag: Config2 is present. */
#define MIPS_CONFIG1_TLBSZ_MASK 0x7E000000 /* bits 30..25 # tlb entries minus one */
#define MIPS_CONFIG1_TLBSZ_SHIFT 25
-#define MIPS_MAX_TLB_ENTRIES 128
#define MIPS_CONFIG1_IS_MASK 0x01C00000 /* bits 24..22 icache sets per way */
#define MIPS_CONFIG1_IS_SHIFT 22
@@ -678,6 +589,28 @@
#define MIPS_CONFIG1_EP 0x00000002 /* EJTAG implemented */
#define MIPS_CONFIG1_FP 0x00000001 /* FPU implemented */
+#define MIPS_CONFIG2_SA_SHIFT 0 /* Secondary cache associativity */
+#define MIPS_CONFIG2_SA_MASK 0xf
+#define MIPS_CONFIG2_SL_SHIFT 4 /* Secondary cache line size */
+#define MIPS_CONFIG2_SL_MASK 0xf
+#define MIPS_CONFIG2_SS_SHIFT 8 /* Secondary cache sets per way */
+#define MIPS_CONFIG2_SS_MASK 0xf
+
+#define MIPS_CONFIG3_CMGCR_MASK (1 << 29) /* Coherence manager present */
+
+/*
+ * Config2 Register Fields
+ * (See "MIPS Architecture for Programmers Volume III", MD00091, Table 9.40)
+ */
+#define MIPS_CONFIG2_M 0x80000000 /* Flag: Config3 is present. */
+
+/*
+ * Config3 Register Fields
+ * (See "MIPS Architecture for Programmers Volume III", MD00091, Table 9.41)
+ */
+#define MIPS_CONFIG3_M 0x80000000 /* Flag: Config4 is present */
+#define MIPS_CONFIG3_ULR 0x00002000 /* UserLocal reg implemented */
+
#define MIPS_CONFIG4_MMUSIZEEXT 0x000000FF /* bits 7.. 0 MMU Size Extension */
#define MIPS_CONFIG4_MMUEXTDEF 0x0000C000 /* bits 15.14 MMU Extension Definition */
#define MIPS_CONFIG4_MMUEXTDEF_MMUSIZEEXT 0x00004000 /* This values denotes CONFIG4 bits */
@@ -709,7 +642,7 @@
*/
#define MIPS_MIN_CACHE_SIZE (16 * 1024)
#define MIPS_MAX_CACHE_SIZE (256 * 1024)
-#define MIPS3_MAX_PCACHE_SIZE (32 * 1024) /* max. primary cache size */
+#define MIPS_MAX_PCACHE_SIZE (32 * 1024) /* max. primary cache size */
/*
* The floating point version and status registers.
@@ -746,8 +679,7 @@
#define MIPS_FPU_EXCEPTION_UNIMPL 0x00020000
#define MIPS_FPU_COND_BIT 0x00800000
#define MIPS_FPU_FLUSH_BIT 0x01000000 /* r4k, MBZ on r3k */
-#define MIPS1_FPC_MBZ_BITS 0xff7c0000
-#define MIPS3_FPC_MBZ_BITS 0xfe7c0000
+#define MIPS_FPC_MBZ_BITS 0xfe7c0000
/*
@@ -756,235 +688,19 @@
#define MIPS_OPCODE_SHIFT 26
#define MIPS_OPCODE_C1 0x11
+/* Coherence manager constants */
+#define MIPS_CMGCRB_BASE 11
+#define MIPS_CMGCRF_BASE (~((1 << MIPS_CMGCRB_BASE) - 1))
/*
- * The low part of the TLB entry.
+ * Bits defined for for the HWREna (CP0 register 7, select 0).
*/
-#define MIPS1_TLB_PFN 0xfffff000
-#define MIPS1_TLB_NON_CACHEABLE_BIT 0x00000800
-#define MIPS1_TLB_DIRTY_BIT 0x00000400
-#define MIPS1_TLB_VALID_BIT 0x00000200
-#define MIPS1_TLB_GLOBAL_BIT 0x00000100
-
-#define MIPS3_TLB_PFN 0x3fffffc0
-#define MIPS3_TLB_ATTR_MASK 0x00000038
-#define MIPS3_TLB_ATTR_SHIFT 3
-#define MIPS3_TLB_DIRTY_BIT 0x00000004
-#define MIPS3_TLB_VALID_BIT 0x00000002
-#define MIPS3_TLB_GLOBAL_BIT 0x00000001
-
-#define MIPS1_TLB_PHYS_PAGE_SHIFT 12
-#define MIPS3_TLB_PHYS_PAGE_SHIFT 6
-#define MIPS1_TLB_PF_NUM MIPS1_TLB_PFN
-#define MIPS3_TLB_PF_NUM MIPS3_TLB_PFN
-#define MIPS1_TLB_MOD_BIT MIPS1_TLB_DIRTY_BIT
-#define MIPS3_TLB_MOD_BIT MIPS3_TLB_DIRTY_BIT
-
-/*
- * MIPS3_TLB_ATTR values - coherency algorithm:
- * 0: cacheable, noncoherent, write-through, no write allocate
- * 1: cacheable, noncoherent, write-through, write allocate
- * 2: uncached
- * 3: cacheable, noncoherent, write-back (noncoherent)
- * 4: cacheable, coherent, write-back, exclusive (exclusive)
- * 5: cacheable, coherent, write-back, exclusive on write (sharable)
- * 6: cacheable, coherent, write-back, update on write (update)
- * 7: uncached, accelerated (gather STORE operations)
- */
-#define MIPS3_TLB_ATTR_WT 0 /* IDT */
-#define MIPS3_TLB_ATTR_WT_WRITEALLOCATE 1 /* IDT */
-#define MIPS3_TLB_ATTR_UNCACHED 2 /* R4000/R4400, IDT */
-#define MIPS3_TLB_ATTR_WB_NONCOHERENT 3 /* R4000/R4400, IDT */
-#define MIPS3_TLB_ATTR_WB_EXCLUSIVE 4 /* R4000/R4400 */
-#define MIPS3_TLB_ATTR_WB_SHARABLE 5 /* R4000/R4400 */
-#define MIPS3_TLB_ATTR_WB_UPDATE 6 /* R4000/R4400 */
-#define MIPS4_TLB_ATTR_UNCACHED_ACCELERATED 7 /* R10000 */
-
-
-/*
- * The high part of the TLB entry.
- */
-#define MIPS1_TLB_VPN 0xfffff000
-#define MIPS1_TLB_PID 0x00000fc0
-#define MIPS1_TLB_PID_SHIFT 6
-
-#define MIPS3_TLB_VPN2 0xffffe000
-#define MIPS3_TLB_ASID 0x000000ff
-
-#define MIPS1_TLB_VIRT_PAGE_NUM MIPS1_TLB_VPN
-#define MIPS3_TLB_VIRT_PAGE_NUM MIPS3_TLB_VPN2
-#define MIPS3_TLB_PID MIPS3_TLB_ASID
-#define MIPS_TLB_VIRT_PAGE_SHIFT 12
-
-/*
- * r3000: shift count to put the index in the right spot.
- */
-#define MIPS1_TLB_INDEX_SHIFT 8
-
-/*
- * The first TLB that write random hits.
- */
-#define MIPS1_TLB_FIRST_RAND_ENTRY 8
-#define MIPS3_TLB_WIRED_UPAGES 1
-
-/*
- * The number of process id entries.
- */
-#define MIPS1_TLB_NUM_PIDS 64
-#define MIPS3_TLB_NUM_ASIDS 256
-
-/*
- * Patch codes to hide CPU design differences between MIPS1 and MIPS3.
- */
-
-/* XXX simonb: this is before MIPS3_PLUS is defined (and is ugly!) */
-
-#if !(defined(MIPS3) || defined(MIPS4) || defined(MIPS32) || defined(MIPS64)) \
- && defined(MIPS1) /* XXX simonb must be neater! */
-#define MIPS_TLB_PID_SHIFT MIPS1_TLB_PID_SHIFT
-#define MIPS_TLB_NUM_PIDS MIPS1_TLB_NUM_PIDS
-#endif
-
-#if (defined(MIPS3) || defined(MIPS4) || defined(MIPS32) || defined(MIPS64)) \
- && !defined(MIPS1) /* XXX simonb must be neater! */
-#define MIPS_TLB_PID_SHIFT 0
-#define MIPS_TLB_NUM_PIDS MIPS3_TLB_NUM_ASIDS
-#endif
-
-
-#if !defined(MIPS_TLB_PID_SHIFT)
-#define MIPS_TLB_PID_SHIFT \
- ((MIPS_HAS_R4K_MMU) ? 0 : MIPS1_TLB_PID_SHIFT)
-
-#define MIPS_TLB_NUM_PIDS \
- ((MIPS_HAS_R4K_MMU) ? MIPS3_TLB_NUM_ASIDS : MIPS1_TLB_NUM_PIDS)
-#endif
-
-/*
- * CPU processor revision IDs for company ID == 0 (non mips32/64 chips)
- */
-#define MIPS_R2000 0x01 /* MIPS R2000 ISA I */
-#define MIPS_R3000 0x02 /* MIPS R3000 ISA I */
-#define MIPS_R6000 0x03 /* MIPS R6000 ISA II */
-#define MIPS_R4000 0x04 /* MIPS R4000/R4400 ISA III */
-#define MIPS_R3LSI 0x05 /* LSI Logic R3000 derivative ISA I */
-#define MIPS_R6000A 0x06 /* MIPS R6000A ISA II */
-#define MIPS_R3IDT 0x07 /* IDT R3041 or RC36100 ISA I */
-#define MIPS_R10000 0x09 /* MIPS R10000 ISA IV */
-#define MIPS_R4200 0x0a /* NEC VR4200 ISA III */
-#define MIPS_R4300 0x0b /* NEC VR4300 ISA III */
-#define MIPS_R4100 0x0c /* NEC VR4100 ISA III */
-#define MIPS_R12000 0x0e /* MIPS R12000 ISA IV */
-#define MIPS_R14000 0x0f /* MIPS R14000 ISA IV */
-#define MIPS_R8000 0x10 /* MIPS R8000 Blackbird/TFP ISA IV */
-#define MIPS_RC32300 0x18 /* IDT RC32334,332,355 ISA 32 */
-#define MIPS_R4600 0x20 /* QED R4600 Orion ISA III */
-#define MIPS_R4700 0x21 /* QED R4700 Orion ISA III */
-#define MIPS_R3SONY 0x21 /* Sony R3000 based ISA I */
-#define MIPS_R4650 0x22 /* QED R4650 ISA III */
-#define MIPS_TX3900 0x22 /* Toshiba TX39 family ISA I */
-#define MIPS_R5000 0x23 /* MIPS R5000 ISA IV */
-#define MIPS_R3NKK 0x23 /* NKK R3000 based ISA I */
-#define MIPS_RC32364 0x26 /* IDT RC32364 ISA 32 */
-#define MIPS_RM7000 0x27 /* QED RM7000 ISA IV */
-#define MIPS_RM5200 0x28 /* QED RM5200s ISA IV */
-#define MIPS_TX4900 0x2d /* Toshiba TX49 family ISA III */
-#define MIPS_R5900 0x2e /* Toshiba R5900 (EECore) ISA --- */
-#define MIPS_RC64470 0x30 /* IDT RC64474/RC64475 ISA III */
-#define MIPS_TX7900 0x38 /* Toshiba TX79 ISA III+*/
-#define MIPS_R5400 0x54 /* NEC VR5400 ISA IV */
-#define MIPS_R5500 0x55 /* NEC VR5500 ISA IV */
-
-/*
- * CPU revision IDs for some prehistoric processors.
- */
-
-/* For MIPS_R3000 */
-#define MIPS_REV_R3000 0x20
-#define MIPS_REV_R3000A 0x30
-
-/* For MIPS_TX3900 */
-#define MIPS_REV_TX3912 0x10
-#define MIPS_REV_TX3922 0x30
-#define MIPS_REV_TX3927 0x40
-
-/* For MIPS_R4000 */
-#define MIPS_REV_R4000_A 0x00
-#define MIPS_REV_R4000_B 0x22
-#define MIPS_REV_R4000_C 0x30
-#define MIPS_REV_R4400_A 0x40
-#define MIPS_REV_R4400_B 0x50
-#define MIPS_REV_R4400_C 0x60
-
-/* For MIPS_TX4900 */
-#define MIPS_REV_TX4927 0x22
-
-/*
- * CPU processor revision IDs for company ID == 1 (MIPS)
- */
-#define MIPS_4Kc 0x80 /* MIPS 4Kc ISA 32 */
-#define MIPS_5Kc 0x81 /* MIPS 5Kc ISA 64 */
-#define MIPS_20Kc 0x82 /* MIPS 20Kc ISA 64 */
-#define MIPS_4Kmp 0x83 /* MIPS 4Km/4Kp ISA 32 */
-#define MIPS_4KEc 0x84 /* MIPS 4KEc ISA 32 */
-#define MIPS_4KEmp 0x85 /* MIPS 4KEm/4KEp ISA 32 */
-#define MIPS_4KSc 0x86 /* MIPS 4KSc ISA 32 */
-#define MIPS_M4K 0x87 /* MIPS M4K ISA 32 Rel 2 */
-#define MIPS_25Kf 0x88 /* MIPS 25Kf ISA 64 */
-#define MIPS_5KE 0x89 /* MIPS 5KE ISA 64 Rel 2 */
-#define MIPS_4KEc_R2 0x90 /* MIPS 4KEc_R2 ISA 32 Rel 2 */
-#define MIPS_4KEmp_R2 0x91 /* MIPS 4KEm/4KEp_R2 ISA 32 Rel 2 */
-#define MIPS_4KSd 0x92 /* MIPS 4KSd ISA 32 Rel 2 */
-#define MIPS_24K 0x93 /* MIPS 24Kc/24Kf ISA 32 Rel 2 */
-#define MIPS_34K 0x95 /* MIPS 34K ISA 32 R2 MT */
-#define MIPS_24KE 0x96 /* MIPS 24KEc ISA 32 Rel 2 */
-#define MIPS_74K 0x97 /* MIPS 74Kc/74Kf ISA 32 Rel 2 */
-
-/*
- * AMD (company ID 3) use the processor ID field to donote the CPU core
- * revision and the company options field do donate the SOC chip type.
- */
-
-/* CPU processor revision IDs */
-#define MIPS_AU_REV1 0x01 /* Alchemy Au1000 (Rev 1) ISA 32 */
-#define MIPS_AU_REV2 0x02 /* Alchemy Au1000 (Rev 2) ISA 32 */
-
-/* CPU company options IDs */
-#define MIPS_AU1000 0x00
-#define MIPS_AU1500 0x01
-#define MIPS_AU1100 0x02
-#define MIPS_AU1550 0x03
-
-/*
- * CPU processor revision IDs for company ID == 4 (Broadcom)
- */
-#define MIPS_SB1 0x01 /* SiByte SB1 ISA 64 */
-
-/*
- * CPU processor revision IDs for company ID == 5 (SandCraft)
- */
-#define MIPS_SR7100 0x04 /* SandCraft SR7100 ISA 64 */
-
-/*
- * FPU processor revision ID
- */
-#define MIPS_SOFT 0x00 /* Software emulation ISA I */
-#define MIPS_R2360 0x01 /* MIPS R2360 FPC ISA I */
-#define MIPS_R2010 0x02 /* MIPS R2010 FPC ISA I */
-#define MIPS_R3010 0x03 /* MIPS R3010 FPC ISA I */
-#define MIPS_R6010 0x04 /* MIPS R6010 FPC ISA II */
-#define MIPS_R4010 0x05 /* MIPS R4010 FPC ISA II */
-#define MIPS_R31LSI 0x06 /* LSI Logic derivate ISA I */
-#define MIPS_R3TOSH 0x22 /* Toshiba R3000 based FPU ISA I */
-
-#ifdef ENABLE_MIPS_TX3900
-#include <mips/r3900regs.h>
-#endif
-#ifdef MIPS3_5900
-#include <mips/r5900regs.h>
-#endif
-#ifdef MIPS64_SB1
-#include <mips/sb1regs.h>
-#endif
+#define MIPS_HWRENA_CPUNUM (1<<0) /* CPU number program is running on */
+#define MIPS_HWRENA_SYNCI_STEP (1<<1) /* Address step sized used with SYNCI */
+#define MIPS_HWRENA_CC (1<<2) /* Hi Res cycle counter */
+#define MIPS_HWRENA_CCRES (1<<3) /* Cycle counter resolution */
+#define MIPS_HWRENA_UL (1<<29) /* UserLocal Register */
+#define MIPS_HWRENA_IMPL30 (1<<30) /* Implementation-dependent 30 */
+#define MIPS_HWRENA_IMPL31 (1<<31) /* Implementation-dependent 31 */
#endif /* _MIPS_CPUREGS_H_ */
diff --git a/freebsd/sys/mips/include/machine/in_cksum.h b/freebsd/sys/mips/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/mips/include/machine/in_cksum.h
+++ b/freebsd/sys/mips/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/contrib/altq/altq/altq.h b/freebsd/sys/net/altq/altq.h
index 9319e44c..5d7eab8a 100644
--- a/freebsd/sys/contrib/altq/altq/altq.h
+++ b/freebsd/sys/net/altq/altq.h
@@ -1,7 +1,4 @@
-/* $FreeBSD$ */
-/* $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ */
-
-/*
+/*-
* Copyright (C) 1998-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -25,6 +22,9 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_H_
#define _ALTQ_ALTQ_H_
@@ -63,7 +63,9 @@
#define ALTQT_BLUE 10 /* blue */
#define ALTQT_PRIQ 11 /* priority queue */
#define ALTQT_JOBS 12 /* JoBS */
-#define ALTQT_MAX 13 /* should be max discipline type + 1 */
+#define ALTQT_FAIRQ 13 /* fairq */
+#define ALTQT_CODEL 14 /* CoDel */
+#define ALTQT_MAX 15 /* should be max discipline type + 1 */
#ifdef ALTQ3_COMPAT
struct altqreq {
@@ -198,7 +200,7 @@ struct pktcntr {
#endif /* ALTQ3_COMPAT */
#ifdef _KERNEL
-#include <altq/altq_var.h>
+#include <net/altq/altq_var.h>
#endif
#endif /* _ALTQ_ALTQ_H_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_cbq.c b/freebsd/sys/net/altq/altq_cbq.c
index 25d04b72..b8593fd6 100644
--- a/freebsd/sys/contrib/altq/altq/altq_cbq.c
+++ b/freebsd/sys/net/altq/altq_cbq.c
@@ -1,9 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ */
-
-/*
+/*-
* Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,15 +28,14 @@
* provided "as is" without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
*/
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
#include <rtems/bsd/sys/param.h>
@@ -56,13 +52,16 @@
#endif
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
-#include <net/pfvar.h>
-#include <altq/altq.h>
-#include <altq/altq_cbq.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
#ifdef ALTQ3_COMPAT
-#include <altq/altq_conf.h>
+#include <net/altq/altq_conf.h>
#endif
#ifdef ALTQ3_COMPAT
@@ -240,6 +239,10 @@ get_class_stats(class_stats_t *statsp, struct rm_class *cl)
if (q_is_rio(cl->q_))
rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ codel_getstats(cl->codel_, &statsp->codel);
+#endif
}
int
@@ -250,11 +253,7 @@ cbq_pfattach(struct pf_altq *a)
if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
return (EINVAL);
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
splx(s);
@@ -273,10 +272,9 @@ cbq_add_altq(struct pf_altq *a)
return (ENODEV);
/* allocate and initialize cbq_state_t */
- cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO);
if (cbqp == NULL)
return (ENOMEM);
- bzero(cbqp, sizeof(cbq_state_t));
CALLOUT_INIT(&cbqp->cbq_callout);
cbqp->cbq_qlen = 0;
cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
@@ -705,8 +703,8 @@ cbq_modify_class(acp)
* struct rm_class *parent, struct rm_class *borrow)
*
* This function create a new traffic class in the CBQ class hierarchy of
- * given paramters. The class that created is either the root, default,
- * or a new dynamic class. If CBQ is not initilaized, the the root class
+ * given parameters. The class that created is either the root, default,
+ * or a new dynamic class. If CBQ is not initilaized, the root class
* will be created.
*/
static int
diff --git a/freebsd/sys/contrib/altq/altq/altq_cbq.h b/freebsd/sys/net/altq/altq_cbq.h
index 30a15c73..51e7cf9a 100644
--- a/freebsd/sys/contrib/altq/altq/altq_cbq.h
+++ b/freebsd/sys/net/altq/altq_cbq.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $ */
-
-/*
+/*-
* Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,15 +26,19 @@
* provided "as is" without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_CBQ_H_
#define _ALTQ_ALTQ_CBQ_H_
-#include <altq/altq.h>
-#include <altq/altq_rmclass.h>
-#include <altq/altq_red.h>
-#include <altq/altq_rio.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
#ifdef __cplusplus
extern "C" {
@@ -51,6 +53,7 @@ extern "C" {
#define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
#define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
#define CBQCLF_BORROW 0x0020 /* borrow from parent */
+#define CBQCLF_CODEL 0x0040 /* use CoDel */
/* class flags only for root class */
#define CBQCLF_WRR 0x0100 /* weighted-round robin */
@@ -90,9 +93,10 @@ typedef struct _cbq_class_stats_ {
int qcnt; /* # packets in queue */
int avgidle;
- /* red and rio related info */
+ /* codel, red and rio related info */
int qtype;
struct redstats red[3];
+ struct codel_stats codel;
} class_stats_t;
#ifdef ALTQ3_COMPAT
diff --git a/freebsd/sys/contrib/altq/altq/altq_cdnr.c b/freebsd/sys/net/altq/altq_cdnr.c
index ce517318..f456ce83 100644
--- a/freebsd/sys/contrib/altq/altq/altq_cdnr.c
+++ b/freebsd/sys/net/altq/altq_cdnr.c
@@ -1,9 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
-
-/*
+/*-
* Copyright (C) 1999-2002
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -27,15 +24,14 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $
+ * $FreeBSD$
*/
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#include <rtems/bsd/sys/param.h>
#include <sys/malloc.h>
@@ -57,11 +53,12 @@
#include <netinet/ip6.h>
#endif
-#include <altq/altq.h>
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
#ifdef ALTQ3_COMPAT
-#include <altq/altq_conf.h>
+#include <net/altq/altq_conf.h>
#endif
-#include <altq/altq_cdnr.h>
+#include <net/altq/altq_cdnr.h>
#ifdef ALTQ3_COMPAT
/*
@@ -1273,11 +1270,7 @@ cdnrioctl(dev, cmd, addr, flag, p)
break;
}
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
switch (cmd) {
case CDNR_IF_ATTACH:
diff --git a/freebsd/sys/contrib/altq/altq/altq_cdnr.h b/freebsd/sys/net/altq/altq_cdnr.h
index d55402f4..06fa9c98 100644
--- a/freebsd/sys/contrib/altq/altq/altq_cdnr.h
+++ b/freebsd/sys/net/altq/altq_cdnr.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_cdnr.h,v 1.9 2003/07/10 12:07:48 kjc Exp $ */
-
-/*
+/*-
* Copyright (C) 1999-2002
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -24,12 +22,15 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.h,v 1.9 2003/07/10 12:07:48 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_CDNR_H_
#define _ALTQ_ALTQ_CDNR_H_
-#include <altq/altq.h>
+#include <net/altq/altq.h>
/*
* traffic conditioner element types
diff --git a/freebsd/sys/contrib/altq/altq/altq_classq.h b/freebsd/sys/net/altq/altq_classq.h
index dc5c646f..dc465a0b 100644
--- a/freebsd/sys/contrib/altq/altq/altq_classq.h
+++ b/freebsd/sys/net/altq/altq_classq.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $ */
-
-/*
+/*-
* Copyright (c) 1991-1997 Regents of the University of California.
* All rights reserved.
*
@@ -31,6 +29,9 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $
+ * $FreeBSD$
*/
/*
* class queue definitions extracted from rm_class.h.
@@ -49,6 +50,7 @@ extern "C" {
#define Q_RED 0x01
#define Q_RIO 0x02
#define Q_DROPTAIL 0x03
+#define Q_CODEL 0x04
#ifdef _KERNEL
@@ -59,6 +61,7 @@ struct _class_queue_ {
struct mbuf *tail_; /* Tail of packet queue */
int qlen_; /* Queue length (in number of packets) */
int qlim_; /* Queue limit (in number of packets*) */
+ int qsize_; /* Queue size (in number of bytes*) */
int qtype_; /* Queue type */
};
@@ -67,10 +70,12 @@ typedef struct _class_queue_ class_queue_t;
#define qtype(q) (q)->qtype_ /* Get queue type */
#define qlimit(q) (q)->qlim_ /* Max packets to be queued */
#define qlen(q) (q)->qlen_ /* Current queue length. */
+#define qsize(q) (q)->qsize_ /* Current queue size. */
#define qtail(q) (q)->tail_ /* Tail of the queue */
#define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
#define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */
+#define q_is_codel(q) ((q)->qtype_ == Q_CODEL) /* Is the queue a codel queue */
#define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */
#define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */
#define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
@@ -100,6 +105,7 @@ _addq(class_queue_t *q, struct mbuf *m)
m0->m_nextpkt = m;
qtail(q) = m;
qlen(q)++;
+ qsize(q) += m_pktlen(m);
}
static __inline struct mbuf *
@@ -114,6 +120,7 @@ _getq(class_queue_t *q)
else
qtail(q) = NULL;
qlen(q)--;
+ qsize(q) -= m_pktlen(m0);
m0->m_nextpkt = NULL;
return (m0);
}
diff --git a/freebsd/sys/net/altq/altq_codel.c b/freebsd/sys/net/altq/altq_codel.c
new file mode 100644
index 00000000..438120f5
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_codel.c
@@ -0,0 +1,479 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * CoDel - The Controlled-Delay Active Queue Management algorithm
+ *
+ * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
+ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_codel.h>
+
+static int codel_should_drop(struct codel *, class_queue_t *,
+ struct mbuf *, u_int64_t);
+static void codel_Newton_step(struct codel_vars *);
+static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t);
+
+#define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0)
+#define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0)
+#define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0)
+#define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0)
+
+static int codel_request(struct ifaltq *, int, void *);
+
+static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *codel_dequeue(struct ifaltq *, int);
+
+int
+codel_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+
+ return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc,
+ codel_enqueue, codel_dequeue, codel_request, NULL, NULL));
+}
+
+int
+codel_add_altq(struct pf_altq *a)
+{
+ struct codel_if *cif;
+ struct ifnet *ifp;
+ struct codel_opts *opts;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ opts = &a->pq_u.codel_opts;
+
+ cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cif == NULL)
+ return (ENOMEM);
+ cif->cif_bandwidth = a->ifbandwidth;
+ cif->cif_ifq = &ifp->if_snd;
+
+ cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cif->cl_q == NULL) {
+ free(cif, M_DEVBUF);
+ return (ENOMEM);
+ }
+
+ if (a->qlimit == 0)
+ a->qlimit = 50; /* use default. */
+ qlimit(cif->cl_q) = a->qlimit;
+ qtype(cif->cl_q) = Q_CODEL;
+ qlen(cif->cl_q) = 0;
+ qsize(cif->cl_q) = 0;
+
+ if (opts->target == 0)
+ opts->target = 5;
+ if (opts->interval == 0)
+ opts->interval = 100;
+ cif->codel.params.target = machclk_freq * opts->target / 1000;
+ cif->codel.params.interval = machclk_freq * opts->interval / 1000;
+ cif->codel.params.ecn = opts->ecn;
+ cif->codel.stats.maxpacket = 256;
+
+ cif->cl_stats.qlength = qlen(cif->cl_q);
+ cif->cl_stats.qlimit = qlimit(cif->cl_q);
+
+ /* keep the state in pf_altq */
+ a->altq_disc = cif;
+
+ return (0);
+}
+
+int
+codel_remove_altq(struct pf_altq *a)
+{
+ struct codel_if *cif;
+
+ if ((cif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ if (cif->cl_q)
+ free(cif->cl_q, M_DEVBUF);
+ free(cif, M_DEVBUF);
+
+ return (0);
+}
+
+int
+codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct codel_if *cif;
+ struct codel_ifstats stats;
+ int error = 0;
+
+ if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL)
+ return (EBADF);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ stats = cif->cl_stats;
+ stats.stats = cif->codel.stats;
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+
+ return (0);
+}
+
+static int
+codel_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ if (!ALTQ_IS_ENABLED(cif->cif_ifq))
+ break;
+
+ if (qempty(cif->cl_q))
+ break;
+
+ while ((m = _getq(cif->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ IFQ_DEC_LEN(cif->cif_ifq);
+ }
+ cif->cif_ifq->ifq_len = 0;
+ break;
+ }
+
+ return (0);
+}
+
+static int
+codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+
+ struct codel_if *cif = (struct codel_if *) ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+ return (ENOBUFS);
+ }
+
+ if (codel_addq(&cif->codel, cif->cl_q, m)) {
+ PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ return (0);
+}
+
+static struct mbuf *
+codel_dequeue(struct ifaltq *ifq, int op)
+{
+ struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (IFQ_IS_EMPTY(ifq))
+ return (NULL);
+
+ if (op == ALTDQ_POLL)
+ return (qhead(cif->cl_q));
+
+
+ m = codel_getq(&cif->codel, cif->cl_q);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m));
+ return (m);
+ }
+
+ return (NULL);
+}
+
+struct codel *
+codel_alloc(int target, int interval, int ecn)
+{
+ struct codel *c;
+
+ c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (c != NULL) {
+ c->params.target = machclk_freq * target / 1000;
+ c->params.interval = machclk_freq * interval / 1000;
+ c->params.ecn = ecn;
+ c->stats.maxpacket = 256;
+ }
+
+ return (c);
+}
+
+void
+codel_destroy(struct codel *c)
+{
+
+ free(c, M_DEVBUF);
+}
+
+#define MTAG_CODEL 1438031249
+int
+codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ uint64_t *enqueue_time;
+
+ if (qlen(q) < qlimit(q)) {
+ mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
+ if (mtag == NULL)
+ mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ return (-1);
+ }
+ enqueue_time = (uint64_t *)(mtag + 1);
+ *enqueue_time = read_machclk();
+ m_tag_prepend(m, mtag);
+ _addq(q, m);
+ return (0);
+ }
+ c->drop_overlimit++;
+ m_freem(m);
+
+ return (-1);
+}
+
+static int
+codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m,
+ u_int64_t now)
+{
+ struct m_tag *mtag;
+ uint64_t *enqueue_time;
+
+ if (m == NULL) {
+ c->vars.first_above_time = 0;
+ return (0);
+ }
+
+ mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
+ if (mtag == NULL) {
+ /* Only one warning per second. */
+ if (ppsratecheck(&c->last_log, &c->last_pps, 1))
+ printf("%s: could not found the packet mtag!\n",
+ __func__);
+ c->vars.first_above_time = 0;
+ return (0);
+ }
+ enqueue_time = (uint64_t *)(mtag + 1);
+ c->vars.ldelay = now - *enqueue_time;
+ c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m));
+
+ if (codel_time_before(c->vars.ldelay, c->params.target) ||
+ qsize(q) <= c->stats.maxpacket) {
+ /* went below - stay below for at least interval */
+ c->vars.first_above_time = 0;
+ return (0);
+ }
+ if (c->vars.first_above_time == 0) {
+ /* just went above from below. If we stay above
+ * for at least interval we'll say it's ok to drop
+ */
+ c->vars.first_above_time = now + c->params.interval;
+ return (0);
+ }
+ if (codel_time_after(now, c->vars.first_above_time))
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Run a Newton method step:
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+static void
+codel_Newton_step(struct codel_vars *vars)
+{
+ uint32_t invsqrt, invsqrt2;
+ uint64_t val;
+
+/* sizeof_in_bits(rec_inv_sqrt) */
+#define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t))
+/* needed shift to get a Q0.32 number from rec_inv_sqrt */
+#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
+
+ invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
+ invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32;
+ val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2);
+ val >>= 2; /* avoid overflow in following multiply */
+ val = (val * invsqrt) >> (32 - 2 + 1);
+
+ vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
+}
+
+static u_int64_t
+codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt)
+{
+
+ return (t + (u_int32_t)(((u_int64_t)interval *
+ (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32));
+}
+
+struct mbuf *
+codel_getq(struct codel *c, class_queue_t *q)
+{
+ struct mbuf *m;
+ u_int64_t now;
+ int drop;
+
+ if ((m = _getq(q)) == NULL) {
+ c->vars.dropping = 0;
+ return (m);
+ }
+
+ now = read_machclk();
+ drop = codel_should_drop(c, q, m, now);
+ if (c->vars.dropping) {
+ if (!drop) {
+ /* sojourn time below target - leave dropping state */
+ c->vars.dropping = 0;
+ } else if (codel_time_after_eq(now, c->vars.drop_next)) {
+ /* It's time for the next drop. Drop the current
+ * packet and dequeue the next. The dequeue might
+ * take us out of dropping state.
+ * If not, schedule the next drop.
+ * A large backlog might result in drop rates so high
+ * that the next drop should happen now,
+ * hence the while loop.
+ */
+ while (c->vars.dropping &&
+ codel_time_after_eq(now, c->vars.drop_next)) {
+ c->vars.count++; /* don't care of possible wrap
+ * since there is no more
+ * divide */
+ codel_Newton_step(&c->vars);
+ /* TODO ECN */
+ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+ m = _getq(q);
+ if (!codel_should_drop(c, q, m, now))
+ /* leave dropping state */
+ c->vars.dropping = 0;
+ else
+ /* and schedule the next drop */
+ c->vars.drop_next =
+ codel_control_law(c->vars.drop_next,
+ c->params.interval,
+ c->vars.rec_inv_sqrt);
+ }
+ }
+ } else if (drop) {
+ /* TODO ECN */
+ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+
+ m = _getq(q);
+ drop = codel_should_drop(c, q, m, now);
+
+ c->vars.dropping = 1;
+ /* if min went above target close to when we last went below it
+ * assume that the drop rate that controlled the queue on the
+ * last cycle is a good starting point to control it now.
+ */
+ if (codel_time_before(now - c->vars.drop_next,
+ 16 * c->params.interval)) {
+ c->vars.count = (c->vars.count - c->vars.lastcount) | 1;
+ /* we dont care if rec_inv_sqrt approximation
+ * is not very precise :
+ * Next Newton steps will correct it quadratically.
+ */
+ codel_Newton_step(&c->vars);
+ } else {
+ c->vars.count = 1;
+ c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
+ }
+ c->vars.lastcount = c->vars.count;
+ c->vars.drop_next = codel_control_law(now, c->params.interval,
+ c->vars.rec_inv_sqrt);
+ }
+
+ return (m);
+}
+
+void
+codel_getstats(struct codel *c, struct codel_stats *s)
+{
+ *s = c->stats;
+}
+
+#endif /* ALTQ_CODEL */
diff --git a/freebsd/sys/net/altq/altq_codel.h b/freebsd/sys/net/altq/altq_codel.h
new file mode 100644
index 00000000..8d7178b4
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_codel.h
@@ -0,0 +1,129 @@
+/*
+ * CoDel - The Controlled-Delay Active Queue Management algorithm
+ *
+ * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
+ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CODEL_H_
+#define _ALTQ_ALTQ_CODEL_H_
+
+struct codel_stats {
+ u_int32_t maxpacket;
+ struct pktcntr drop_cnt;
+ u_int marked_packets;
+};
+
+struct codel_ifstats {
+ u_int qlength;
+ u_int qlimit;
+ struct codel_stats stats;
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+#ifdef _KERNEL
+#include <net/altq/altq_classq.h>
+
+/**
+ * struct codel_params - contains codel parameters
+ * <at> target: target queue size (in time units)
+ * <at> interval: width of moving time window
+ * <at> ecn: is Explicit Congestion Notification enabled
+ */
+struct codel_params {
+ u_int64_t target;
+ u_int64_t interval;
+ int ecn;
+};
+
+/**
+ * struct codel_vars - contains codel variables
+ * <at> count: how many drops we've done since the last time we
+ * entered dropping state
+ * <at> lastcount: count at entry to dropping state
+ * <at> dropping: set to true if in dropping state
+ * <at> rec_inv_sqrt: reciprocal value of sqrt(count) >> 1
+ * <at> first_above_time: when we went (or will go) continuously above
+ * target for interval
+ * <at> drop_next: time to drop next packet, or when we dropped last
+ * <at> ldelay: sojourn time of last dequeued packet
+ */
+struct codel_vars {
+ u_int32_t count;
+ u_int32_t lastcount;
+ int dropping;
+ u_int16_t rec_inv_sqrt;
+ u_int64_t first_above_time;
+ u_int64_t drop_next;
+ u_int64_t ldelay;
+};
+
+struct codel {
+ int last_pps;
+ struct codel_params params;
+ struct codel_vars vars;
+ struct codel_stats stats;
+ struct timeval last_log;
+ u_int32_t drop_overlimit;
+};
+
+/*
+ * codel interface state
+ */
+struct codel_if {
+ struct codel_if *cif_next; /* interface state list */
+ struct ifaltq *cif_ifq; /* backpointer to ifaltq */
+ u_int cif_bandwidth; /* link bandwidth in bps */
+
+ class_queue_t *cl_q; /* class queue structure */
+ struct codel codel;
+
+ /* statistics */
+ struct codel_ifstats cl_stats;
+};
+
+struct codel *codel_alloc(int, int, int);
+void codel_destroy(struct codel *);
+int codel_addq(struct codel *, class_queue_t *, struct mbuf *);
+struct mbuf *codel_getq(struct codel *, class_queue_t *);
+void codel_getstats(struct codel *, struct codel_stats *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CODEL_H_ */
diff --git a/freebsd/sys/net/altq/altq_fairq.c b/freebsd/sys/net/altq/altq_fairq.c
new file mode 100644
index 00000000..efb58d3f
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_fairq.c
@@ -0,0 +1,911 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * Copyright (c) 2008 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+/*
+ * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
+ * fairq. The fairq algorithm is completely different then priq, of course,
+ * but because I used priq's skeleton I believe I should include priq's
+ * copyright.
+ *
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FAIRQ - take traffic classified by keep state (hashed into
+ * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract
+ * the first packet from each bucket in a round-robin fashion.
+ *
+ * TODO - better overall qlimit support (right now it is per-bucket).
+ * - NOTE: red etc is per bucket, not overall.
+ * - better service curve support.
+ *
+ * EXAMPLE:
+ *
+ * altq on em0 fairq bandwidth 650Kb queue { std, bulk }
+ * queue std priority 3 bandwidth 400Kb \
+ * fairq (buckets 64, default, hogs 1Kb) qlimit 50
+ * queue bulk priority 2 bandwidth 100Kb \
+ * fairq (buckets 64, hogs 1Kb) qlimit 50
+ *
+ * pass out on em0 from any to any keep state queue std
+ * pass out on em0 inet proto tcp ..... port ... keep state queue bulk
+ */
+#include <rtems/bsd/local/opt_altq.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_fairq.h>
+
+/*
+ * function prototypes
+ */
+static int fairq_clear_interface(struct fairq_if *);
+static int fairq_request(struct ifaltq *, int, void *);
+static void fairq_purge(struct fairq_if *);
+static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
+static int fairq_class_destroy(struct fairq_class *);
+static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *fairq_dequeue(struct ifaltq *, int);
+
+static int fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
+static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
+static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
+static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
+static void fairq_purgeq(struct fairq_class *);
+
+static void get_class_stats(struct fairq_classstats *, struct fairq_class *);
+static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
+
+int
+fairq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+
+ error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
+ fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
+
+ return (error);
+}
+
+int
+fairq_add_altq(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+
+ pif = malloc(sizeof(struct fairq_if),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ pif->pif_bandwidth = a->ifbandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = pif;
+
+ return (0);
+}
+
+int
+fairq_remove_altq(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ fairq_clear_interface(pif);
+
+ free(pif, M_DEVBUF);
+ return (0);
+}
+
+int
+fairq_add_queue(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ /* check parameters */
+ if (a->priority >= FAIRQ_MAXPRI)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+ if (pif->pif_classes[a->priority] != NULL)
+ return (EBUSY);
+ if (clh_to_clp(pif, a->qid) != NULL)
+ return (EBUSY);
+
+ cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
+ &a->pq_u.fairq_opts, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+fairq_remove_queue(struct pf_altq *a)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (fairq_class_destroy(cl));
+}
+
+int
+fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct fairq_if *pif;
+ struct fairq_class *cl;
+ struct fairq_classstats stats;
+ int error = 0;
+
+ if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+fairq_clear_interface(struct fairq_if *pif)
+{
+ struct fairq_class *cl;
+ int pri;
+
+ /* clear out the classes */
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL)
+ fairq_class_destroy(cl);
+ }
+
+ return (0);
+}
+
+static int
+fairq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ fairq_purge(pif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+fairq_purge(struct fairq_if *pif)
+{
+ struct fairq_class *cl;
+ int pri;
+
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
+ fairq_purgeq(cl);
+ }
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ pif->pif_ifq->ifq_len = 0;
+}
+
+static struct fairq_class *
+fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
+ u_int bandwidth, struct fairq_opts *opts, int qid)
+{
+ struct fairq_class *cl;
+ int flags = opts->flags;
+ u_int nbuckets = opts->nbuckets;
+ int i;
+
+#ifndef ALTQ_RED
+ if (flags & FARF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("fairq_class_create: RED not configured for FAIRQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_CODEL
+ if (flags & FARF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("fairq_class_create: CODEL not configured for FAIRQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+ if (nbuckets == 0)
+ nbuckets = 256;
+ if (nbuckets > FAIRQ_MAX_BUCKETS)
+ nbuckets = FAIRQ_MAX_BUCKETS;
+ /* enforce power-of-2 size */
+ while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
+ ++nbuckets;
+
+ if ((cl = pif->pif_classes[pri]) != NULL) {
+ /* modify the class instead of creating a new one */
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+ if (cl->cl_head)
+ fairq_purgeq(cl);
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ codel_destroy(cl->cl_codel);
+#endif
+ } else {
+ cl = malloc(sizeof(struct fairq_class),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ cl->cl_nbuckets = nbuckets;
+ cl->cl_nbucket_mask = nbuckets - 1;
+
+ cl->cl_buckets = malloc(
+ sizeof(struct fairq_bucket) * cl->cl_nbuckets,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ cl->cl_head = NULL;
+ }
+
+ pif->pif_classes[pri] = cl;
+ if (flags & FARF_DEFAULTCLASS)
+ pif->pif_default = cl;
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ cl->cl_qlimit = qlimit;
+ for (i = 0; i < cl->cl_nbuckets; ++i) {
+ qlimit(&cl->cl_buckets[i].queue) = qlimit;
+ }
+ cl->cl_bandwidth = bandwidth / 8;
+ cl->cl_qtype = Q_DROPTAIL;
+ cl->cl_flags = flags & FARF_USERFLAGS;
+ cl->cl_pri = pri;
+ if (pri > pif->pif_maxpri)
+ pif->pif_maxpri = pri;
+ cl->cl_pif = pif;
+ cl->cl_handle = qid;
+ cl->cl_hogs_m1 = opts->hogs_m1 / 8;
+ cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */
+
+#ifdef ALTQ_RED
+ if (flags & (FARF_RED|FARF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & FARF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & FARF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (pif->pif_bandwidth < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+ if (flags & FARF_RIO) {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ cl->cl_qtype = Q_RIO;
+ } else
+#endif
+ if (flags & FARF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ cl->cl_qlimit * 10/100,
+ cl->cl_qlimit * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ cl->cl_qtype = Q_RED;
+ }
+ }
+#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & FARF_CODEL) {
+ cl->cl_codel = codel_alloc(5, 100, 0);
+ if (cl->cl_codel != NULL)
+ cl->cl_qtype = Q_CODEL;
+ }
+#endif
+
+ return (cl);
+}
+
+static int
+fairq_class_destroy(struct fairq_class *cl)
+{
+ struct fairq_if *pif;
+ int pri;
+
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+ if (cl->cl_head)
+ fairq_purgeq(cl);
+
+ pif = cl->cl_pif;
+ pif->pif_classes[cl->cl_pri] = NULL;
+ if (pif->pif_poll_cache == cl)
+ pif->pif_poll_cache = NULL;
+ if (pif->pif_maxpri == cl->cl_pri) {
+ for (pri = cl->cl_pri; pri >= 0; pri--)
+ if (pif->pif_classes[pri] != NULL) {
+ pif->pif_maxpri = pri;
+ break;
+ }
+ if (pri < 0)
+ pif->pif_maxpri = -1;
+ }
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_destroy(cl->cl_red);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ codel_destroy(cl->cl_codel);
+#endif
+ }
+ free(cl->cl_buckets, M_DEVBUF);
+ free(cl, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * fairq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+ struct fairq_class *cl = NULL; /* Make compiler happy */
+ struct pf_mtag *t;
+ u_int32_t qid_hash = 0;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+
+ if ((t = pf_find_mtag(m)) != NULL) {
+ cl = clh_to_clp(pif, t->qid);
+ qid_hash = t->qid_hash;
+ }
+ if (cl == NULL) {
+ cl = pif->pif_default;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+ cl->cl_flags |= FARF_HAS_PACKETS;
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (fairq_addq(cl, m, qid_hash) != 0) {
+ /* drop occurred. mbuf was freed in fairq_addq. */
+ PKTCNTR_ADD(&cl->cl_dropcnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ return (0);
+}
+
+/*
+ * fairq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+fairq_dequeue(struct ifaltq *ifq, int op)
+{
+ struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
+ struct fairq_class *cl;
+ struct fairq_class *best_cl;
+ struct mbuf *best_m;
+ struct mbuf *m = NULL;
+ uint64_t cur_time = read_machclk();
+ int pri;
+ int hit_limit;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (IFQ_IS_EMPTY(ifq)) {
+ return (NULL);
+ }
+
+ if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
+ best_cl = pif->pif_poll_cache;
+ m = fairq_getq(best_cl, cur_time);
+ pif->pif_poll_cache = NULL;
+ if (m) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+ return (m);
+ }
+ } else {
+ best_cl = NULL;
+ best_m = NULL;
+
+ for (pri = pif->pif_maxpri; pri >= 0; pri--) {
+ if ((cl = pif->pif_classes[pri]) == NULL)
+ continue;
+ if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
+ continue;
+ m = fairq_pollq(cl, cur_time, &hit_limit);
+ if (m == NULL) {
+ cl->cl_flags &= ~FARF_HAS_PACKETS;
+ continue;
+ }
+
+ /*
+ * Only override the best choice if we are under
+ * the BW limit.
+ */
+ if (hit_limit == 0 || best_cl == NULL) {
+ best_cl = cl;
+ best_m = m;
+ }
+
+ /*
+ * Remember the highest priority mbuf in case we
+ * do not find any lower priority mbufs.
+ */
+ if (hit_limit)
+ continue;
+ break;
+ }
+ if (op == ALTDQ_POLL) {
+ pif->pif_poll_cache = best_cl;
+ m = best_m;
+ } else if (best_cl) {
+ m = fairq_getq(best_cl, cur_time);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
+ }
+ }
+ return (m);
+ }
+ return (NULL);
+}
+
+static int
+fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
+{
+ fairq_bucket_t *b;
+ u_int hindex;
+ uint64_t bw;
+
+ /*
+ * If the packet doesn't have any keep state put it on the end of
+ * our queue. XXX this can result in out of order delivery.
+ */
+ if (bucketid == 0) {
+ if (cl->cl_head)
+ b = cl->cl_head->prev;
+ else
+ b = &cl->cl_buckets[0];
+ } else {
+ hindex = bucketid & cl->cl_nbucket_mask;
+ b = &cl->cl_buckets[hindex];
+ }
+
+ /*
+ * Add the bucket to the end of the circular list of active buckets.
+ *
+ * As a special case we add the bucket to the beginning of the list
+ * instead of the end if it was not previously on the list and if
+ * its traffic is less then the hog level.
+ */
+ if (b->in_use == 0) {
+ b->in_use = 1;
+ if (cl->cl_head == NULL) {
+ cl->cl_head = b;
+ b->next = b;
+ b->prev = b;
+ } else {
+ b->next = cl->cl_head;
+ b->prev = cl->cl_head->prev;
+ b->prev->next = b;
+ b->next->prev = b;
+
+ if (b->bw_delta && cl->cl_hogs_m1) {
+ bw = b->bw_bytes * machclk_freq / b->bw_delta;
+ if (bw < cl->cl_hogs_m1)
+ cl->cl_head = b;
+ }
+ }
+ }
+
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ return codel_addq(cl->cl_codel, &b->queue, m);
+#endif
+ if (qlen(&b->queue) >= qlimit(&b->queue)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & FARF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(&b->queue, m);
+
+ return (0);
+}
+
+static struct mbuf *
+fairq_getq(struct fairq_class *cl, uint64_t cur_time)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+
+ b = fairq_selectq(cl, 0);
+ if (b == NULL)
+ m = NULL;
+#ifdef ALTQ_RIO
+ else if (cl->cl_qtype == Q_RIO)
+ m = rio_getq((rio_t *)cl->cl_red, &b->queue);
+#endif
+#ifdef ALTQ_RED
+ else if (cl->cl_qtype == Q_RED)
+ m = red_getq(cl->cl_red, &b->queue);
+#endif
+#ifdef ALTQ_CODEL
+ else if (cl->cl_qtype == Q_CODEL)
+ m = codel_getq(cl->cl_codel, &b->queue);
+#endif
+ else
+ m = _getq(&b->queue);
+
+ /*
+ * Calculate the BW change
+ */
+ if (m != NULL) {
+ uint64_t delta;
+
+ /*
+ * Per-class bandwidth calculation
+ */
+ delta = (cur_time - cl->cl_last_time);
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ cl->cl_bw_delta += delta;
+ cl->cl_bw_bytes += m->m_pkthdr.len;
+ cl->cl_last_time = cur_time;
+ cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
+ cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
+
+ /*
+ * Per-bucket bandwidth calculation
+ */
+ delta = (cur_time - b->last_time);
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ b->bw_delta += delta;
+ b->bw_bytes += m->m_pkthdr.len;
+ b->last_time = cur_time;
+ b->bw_delta -= b->bw_delta >> 3;
+ b->bw_bytes -= b->bw_bytes >> 3;
+ }
+ return(m);
+}
+
+/*
+ * Figure out what the next packet would be if there were no limits. If
+ * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
+ * it is set to 0. A non-NULL mbuf is returned either way.
+ */
+static struct mbuf *
+fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+ uint64_t delta;
+ uint64_t bw;
+
+ *hit_limit = 0;
+ b = fairq_selectq(cl, 1);
+ if (b == NULL)
+ return(NULL);
+ m = qhead(&b->queue);
+
+ /*
+ * Did this packet exceed the class bandwidth? Calculate the
+ * bandwidth component of the packet.
+ *
+ * - Calculate bytes per second
+ */
+ delta = cur_time - cl->cl_last_time;
+ if (delta > machclk_freq * 8)
+ delta = machclk_freq * 8;
+ cl->cl_bw_delta += delta;
+ cl->cl_last_time = cur_time;
+ if (cl->cl_bw_delta) {
+ bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
+
+ if (bw > cl->cl_bandwidth)
+ *hit_limit = 1;
+#ifdef ALTQ_DEBUG
+ printf("BW %6ju relative to %6u %d queue %p\n",
+ (uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b);
+#endif
+ }
+ return(m);
+}
+
+/*
+ * Locate the next queue we want to pull a packet out of. This code
+ * is also responsible for removing empty buckets from the circular list.
+ */
+static
+fairq_bucket_t *
+fairq_selectq(struct fairq_class *cl, int ispoll)
+{
+ fairq_bucket_t *b;
+ uint64_t bw;
+
+ if (ispoll == 0 && cl->cl_polled) {
+ b = cl->cl_polled;
+ cl->cl_polled = NULL;
+ return(b);
+ }
+
+ while ((b = cl->cl_head) != NULL) {
+ /*
+ * Remove empty queues from consideration
+ */
+ if (qempty(&b->queue)) {
+ b->in_use = 0;
+ cl->cl_head = b->next;
+ if (cl->cl_head == b) {
+ cl->cl_head = NULL;
+ } else {
+ b->next->prev = b->prev;
+ b->prev->next = b->next;
+ }
+ continue;
+ }
+
+ /*
+ * Advance the round robin. Queues with bandwidths less
+ * then the hog bandwidth are allowed to burst.
+ */
+ if (cl->cl_hogs_m1 == 0) {
+ cl->cl_head = b->next;
+ } else if (b->bw_delta) {
+ bw = b->bw_bytes * machclk_freq / b->bw_delta;
+ if (bw >= cl->cl_hogs_m1) {
+ cl->cl_head = b->next;
+ }
+ /*
+ * XXX TODO -
+ */
+ }
+
+ /*
+ * Return bucket b.
+ */
+ break;
+ }
+ if (ispoll)
+ cl->cl_polled = b;
+ return(b);
+}
+
+static void
+fairq_purgeq(struct fairq_class *cl)
+{
+ fairq_bucket_t *b;
+ struct mbuf *m;
+
+ while ((b = fairq_selectq(cl, 0)) != NULL) {
+ while ((m = _getq(&b->queue)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(&b->queue) == 0);
+ }
+}
+
+static void
+get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
+{
+ fairq_bucket_t *b;
+
+ sp->class_handle = cl->cl_handle;
+ sp->qlimit = cl->cl_qlimit;
+ sp->xmit_cnt = cl->cl_xmitcnt;
+ sp->drop_cnt = cl->cl_dropcnt;
+ sp->qtype = cl->cl_qtype;
+ sp->qlength = 0;
+
+ if (cl->cl_head) {
+ b = cl->cl_head;
+ do {
+ sp->qlength += qlen(&b->queue);
+ b = b->next;
+ } while (b != cl->cl_head);
+ }
+
+#ifdef ALTQ_RED
+ if (cl->cl_qtype == Q_RED)
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (cl->cl_qtype == Q_RIO)
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_CODEL
+ if (cl->cl_qtype == Q_CODEL)
+ codel_getstats(cl->cl_codel, &sp->codel);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct fairq_class *
+clh_to_clp(struct fairq_if *pif, uint32_t chandle)
+{
+ struct fairq_class *cl;
+ int idx;
+
+ if (chandle == 0)
+ return (NULL);
+
+ for (idx = pif->pif_maxpri; idx >= 0; idx--)
+ if ((cl = pif->pif_classes[idx]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+
+ return (NULL);
+}
+
+#endif /* ALTQ_FAIRQ */
diff --git a/freebsd/sys/net/altq/altq_fairq.h b/freebsd/sys/net/altq/altq_fairq.h
new file mode 100644
index 00000000..1a4b97dd
--- /dev/null
+++ b/freebsd/sys/net/altq/altq_fairq.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_FAIRQ_H_
+#define _ALTQ_ALTQ_FAIRQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+#include <net/altq/altq_rmclass.h>
+
+#define FAIRQ_MAX_BUCKETS 2048 /* maximum number of sorting buckets */
+#define FAIRQ_MAXPRI RM_MAXPRIO
+#define FAIRQ_BITMAP_WIDTH (sizeof(fairq_bitmap_t)*8)
+#define FAIRQ_BITMAP_MASK (FAIRQ_BITMAP_WIDTH - 1)
+
+/* fairq class flags */
+#define FARF_RED 0x0001 /* use RED */
+#define FARF_ECN 0x0002 /* use RED/ECN */
+#define FARF_RIO 0x0004 /* use RIO */
+#define FARF_CODEL 0x0008 /* use CoDel */
+#define FARF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define FARF_DEFAULTCLASS 0x1000 /* default class */
+
+#define FARF_HAS_PACKETS 0x2000 /* might have queued packets */
+
+#define FARF_USERFLAGS (FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \
+ FARF_DEFAULTCLASS)
+
+/* special class handles */
+#define FAIRQ_NULLCLASS_HANDLE 0
+
+typedef u_int fairq_bitmap_t;
+
+struct fairq_classstats {
+ uint32_t class_handle;
+
+ u_int qlength;
+ u_int qlimit;
+ struct pktcntr xmit_cnt; /* transmitted packet counter */
+ struct pktcntr drop_cnt; /* dropped packet counter */
+
+ /* codel, red and rio related info */
+ int qtype;
+ struct redstats red[3]; /* rio has 3 red stats */
+ struct codel_stats codel;
+};
+
+#ifdef _KERNEL
+
+typedef struct fairq_bucket {
+ struct fairq_bucket *next; /* circular list */
+ struct fairq_bucket *prev; /* circular list */
+ class_queue_t queue; /* the actual queue */
+ uint64_t bw_bytes; /* statistics used to calculate bw */
+ uint64_t bw_delta; /* statistics used to calculate bw */
+ uint64_t last_time;
+ int in_use;
+} fairq_bucket_t;
+
+struct fairq_class {
+ uint32_t cl_handle; /* class handle */
+ u_int cl_nbuckets; /* (power of 2) */
+ u_int cl_nbucket_mask; /* bucket mask */
+ fairq_bucket_t *cl_buckets;
+ fairq_bucket_t *cl_head; /* head of circular bucket list */
+ fairq_bucket_t *cl_polled;
+ union {
+ struct red *cl_red; /* RED state */
+ struct codel *cl_codel; /* CoDel state */
+ } cl_aqm;
+#define cl_red cl_aqm.cl_red
+#define cl_codel cl_aqm.cl_codel
+ u_int cl_hogs_m1;
+ u_int cl_lssc_m1;
+ u_int cl_bandwidth;
+ uint64_t cl_bw_bytes;
+ uint64_t cl_bw_delta;
+ uint64_t cl_last_time;
+ int cl_qtype; /* rollup */
+ int cl_qlimit;
+ int cl_pri; /* priority */
+ int cl_flags; /* class flags */
+ struct fairq_if *cl_pif; /* back pointer to pif */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ /* round robin index */
+
+ /* statistics */
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+/*
+ * fairq interface state
+ */
+struct fairq_if {
+ struct fairq_if *pif_next; /* interface state list */
+ struct ifaltq *pif_ifq; /* backpointer to ifaltq */
+ u_int pif_bandwidth; /* link bandwidth in bps */
+ int pif_maxpri; /* max priority in use */
+ struct fairq_class *pif_poll_cache;/* cached poll */
+ struct fairq_class *pif_default; /* default class */
+ struct fairq_class *pif_classes[FAIRQ_MAXPRI]; /* classes */
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_FAIRQ_H_ */
diff --git a/freebsd/sys/contrib/altq/altq/altq_hfsc.c b/freebsd/sys/net/altq/altq_hfsc.c
index 64c9d17c..f7a18296 100644
--- a/freebsd/sys/contrib/altq/altq/altq_hfsc.c
+++ b/freebsd/sys/net/altq/altq_hfsc.c
@@ -1,9 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ */
-
-/*
+/*-
* Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
*
* Permission to use, copy, modify, and distribute this software and
@@ -31,6 +28,9 @@
* software to return any improvements or extensions that they make,
* and to grant Carnegie Mellon the rights to redistribute these
* changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
*/
/*
* H-FSC is described in Proceedings of SIGCOMM'97,
@@ -44,13 +44,9 @@
* a class whose fit-time exceeds the current time.
*/
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
@@ -68,13 +64,16 @@
#endif /* ALTQ3_COMPAT */
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
-#include <net/pfvar.h>
-#include <altq/altq.h>
-#include <altq/altq_hfsc.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_hfsc.h>
#ifdef ALTQ3_COMPAT
-#include <altq/altq_conf.h>
+#include <net/altq/altq_conf.h>
#endif
/*
@@ -106,14 +105,10 @@ static void update_ed(struct hfsc_class *, int);
static void update_d(struct hfsc_class *, int);
static void init_vf(struct hfsc_class *, int);
static void update_vf(struct hfsc_class *, int, u_int64_t);
-static ellist_t *ellist_alloc(void);
-static void ellist_destroy(ellist_t *);
static void ellist_insert(struct hfsc_class *);
static void ellist_remove(struct hfsc_class *);
static void ellist_update(struct hfsc_class *);
-struct hfsc_class *ellist_get_mindl(ellist_t *, u_int64_t);
-static actlist_t *actlist_alloc(void);
-static void actlist_destroy(actlist_t *);
+struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t);
static void actlist_insert(struct hfsc_class *);
static void actlist_remove(struct hfsc_class *);
static void actlist_update(struct hfsc_class *);
@@ -180,11 +175,7 @@ hfsc_pfattach(struct pf_altq *a)
if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
return (EINVAL);
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
splx(s);
@@ -202,17 +193,11 @@ hfsc_add_altq(struct pf_altq *a)
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
- hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
+ hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO);
if (hif == NULL)
return (ENOMEM);
- bzero(hif, sizeof(struct hfsc_if));
-
- hif->hif_eligible = ellist_alloc();
- if (hif->hif_eligible == NULL) {
- free(hif, M_DEVBUF);
- return (ENOMEM);
- }
+ TAILQ_INIT(&hif->hif_eligible);
hif->hif_ifq = &ifp->if_snd;
/* keep the state in pf_altq */
@@ -233,8 +218,6 @@ hfsc_remove_altq(struct pf_altq *a)
(void)hfsc_clear_interface(hif);
(void)hfsc_class_destroy(hif->hif_rootclass);
- ellist_destroy(hif->hif_eligible);
-
free(hif, M_DEVBUF);
return (0);
@@ -402,26 +385,31 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
return (NULL);
}
#endif
+#ifndef ALTQ_CODEL
+ if (flags & HFCF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc_class_create: CODEL not configured for HFSC!\n");
+#endif
+ return (NULL);
+ }
+#endif
- cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_WAITOK);
+ cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO);
if (cl == NULL)
return (NULL);
- bzero(cl, sizeof(struct hfsc_class));
- cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK);
+ cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
if (cl->cl_q == NULL)
goto err_ret;
- bzero(cl->cl_q, sizeof(class_queue_t));
- cl->cl_actc = actlist_alloc();
- if (cl->cl_actc == NULL)
- goto err_ret;
+ TAILQ_INIT(&cl->cl_actc);
if (qlimit == 0)
qlimit = 50; /* use default */
qlimit(cl->cl_q) = qlimit;
qtype(cl->cl_q) = Q_DROPTAIL;
qlen(cl->cl_q) = 0;
+ qsize(cl->cl_q) = 0;
cl->cl_flags = flags;
#ifdef ALTQ_RED
if (flags & (HFCF_RED|HFCF_RIO)) {
@@ -466,10 +454,17 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
#endif
}
#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & HFCF_CODEL) {
+ cl->cl_codel = codel_alloc(5, 100, 0);
+ if (cl->cl_codel != NULL)
+ qtype(cl->cl_q) = Q_CODEL;
+ }
+#endif
if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
cl->cl_rsc = malloc(sizeof(struct internal_sc),
- M_DEVBUF, M_WAITOK);
+ M_DEVBUF, M_NOWAIT);
if (cl->cl_rsc == NULL)
goto err_ret;
sc2isc(rsc, cl->cl_rsc);
@@ -478,7 +473,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
}
if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
cl->cl_fsc = malloc(sizeof(struct internal_sc),
- M_DEVBUF, M_WAITOK);
+ M_DEVBUF, M_NOWAIT);
if (cl->cl_fsc == NULL)
goto err_ret;
sc2isc(fsc, cl->cl_fsc);
@@ -486,7 +481,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
}
if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
cl->cl_usc = malloc(sizeof(struct internal_sc),
- M_DEVBUF, M_WAITOK);
+ M_DEVBUF, M_NOWAIT);
if (cl->cl_usc == NULL)
goto err_ret;
sc2isc(usc, cl->cl_usc);
@@ -498,11 +493,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
cl->cl_hif = hif;
cl->cl_parent = parent;
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(hif->hif_ifq);
hif->hif_classes++;
@@ -549,8 +540,6 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
return (cl);
err_ret:
- if (cl->cl_actc != NULL)
- actlist_destroy(cl->cl_actc);
if (cl->cl_red != NULL) {
#ifdef ALTQ_RIO
if (q_is_rio(cl->cl_q))
@@ -560,6 +549,10 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
if (q_is_red(cl->cl_q))
red_destroy(cl->cl_red);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
}
if (cl->cl_fsc != NULL)
free(cl->cl_fsc, M_DEVBUF);
@@ -584,11 +577,7 @@ hfsc_class_destroy(struct hfsc_class *cl)
if (is_a_parent_class(cl))
return (EBUSY);
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(cl->cl_hif->hif_ifq);
#ifdef ALTQ3_COMPAT
@@ -625,8 +614,6 @@ hfsc_class_destroy(struct hfsc_class *cl)
IFQ_UNLOCK(cl->cl_hif->hif_ifq);
splx(s);
- actlist_destroy(cl->cl_actc);
-
if (cl->cl_red != NULL) {
#ifdef ALTQ_RIO
if (q_is_rio(cl->cl_q))
@@ -636,6 +623,10 @@ hfsc_class_destroy(struct hfsc_class *cl)
if (q_is_red(cl->cl_q))
red_destroy(cl->cl_red);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
}
IFQ_LOCK(cl->cl_hif->hif_ifq);
@@ -779,7 +770,7 @@ hfsc_dequeue(struct ifaltq *ifq, int op)
* find the class with the minimum deadline among
* the eligible classes.
*/
- if ((cl = ellist_get_mindl(hif->hif_eligible, cur_time))
+ if ((cl = hfsc_get_mindl(hif, cur_time))
!= NULL) {
realtime = 1;
} else {
@@ -863,6 +854,10 @@ hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
if (q_is_red(cl->cl_q))
return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_addq(cl->cl_codel, cl->cl_q, m);
+#endif
if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
m_freem(m);
return (-1);
@@ -887,6 +882,10 @@ hfsc_getq(struct hfsc_class *cl)
if (q_is_red(cl->cl_q))
return red_getq(cl->cl_red, cl->cl_q);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_getq(cl->cl_codel, cl->cl_q);
+#endif
return _getq(cl->cl_q);
}
@@ -999,7 +998,7 @@ init_vf(struct hfsc_class *cl, int len)
go_active = 0;
if (go_active) {
- max_cl = actlist_last(cl->cl_parent->cl_actc);
+ max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
if (max_cl != NULL) {
/*
* set vt to the average of the min and max
@@ -1164,12 +1163,12 @@ update_cfmin(struct hfsc_class *cl)
struct hfsc_class *p;
u_int64_t cfmin;
- if (TAILQ_EMPTY(cl->cl_actc)) {
+ if (TAILQ_EMPTY(&cl->cl_actc)) {
cl->cl_cfmin = 0;
return;
}
cfmin = HT_INFINITY;
- TAILQ_FOREACH(p, cl->cl_actc, cl_actlist) {
+ TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
if (p->cl_f == 0) {
cl->cl_cfmin = 0;
return;
@@ -1189,22 +1188,6 @@ update_cfmin(struct hfsc_class *cl)
* there is one eligible list per interface.
*/
-static ellist_t *
-ellist_alloc(void)
-{
- ellist_t *head;
-
- head = malloc(sizeof(ellist_t), M_DEVBUF, M_WAITOK);
- TAILQ_INIT(head);
- return (head);
-}
-
-static void
-ellist_destroy(ellist_t *head)
-{
- free(head, M_DEVBUF);
-}
-
static void
ellist_insert(struct hfsc_class *cl)
{
@@ -1212,13 +1195,13 @@ ellist_insert(struct hfsc_class *cl)
struct hfsc_class *p;
/* check the last entry first */
- if ((p = TAILQ_LAST(hif->hif_eligible, _eligible)) == NULL ||
+ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL ||
p->cl_e <= cl->cl_e) {
- TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
return;
}
- TAILQ_FOREACH(p, hif->hif_eligible, cl_ellist) {
+ TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
if (cl->cl_e < p->cl_e) {
TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
return;
@@ -1232,7 +1215,7 @@ ellist_remove(struct hfsc_class *cl)
{
struct hfsc_if *hif = cl->cl_hif;
- TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
}
static void
@@ -1250,11 +1233,11 @@ ellist_update(struct hfsc_class *cl)
return;
/* check the last entry */
- last = TAILQ_LAST(hif->hif_eligible, _eligible);
+ last = TAILQ_LAST(&hif->hif_eligible, elighead);
ASSERT(last != NULL);
if (last->cl_e <= cl->cl_e) {
- TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
- TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
return;
}
@@ -1264,7 +1247,7 @@ ellist_update(struct hfsc_class *cl)
*/
while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
if (cl->cl_e < p->cl_e) {
- TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
return;
}
@@ -1274,11 +1257,11 @@ ellist_update(struct hfsc_class *cl)
/* find the class with the minimum deadline among the eligible classes */
struct hfsc_class *
-ellist_get_mindl(ellist_t *head, u_int64_t cur_time)
+hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
{
struct hfsc_class *p, *cl = NULL;
- TAILQ_FOREACH(p, head, cl_ellist) {
+ TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
if (p->cl_e > cur_time)
break;
if (cl == NULL || p->cl_d < cl->cl_d)
@@ -1292,34 +1275,20 @@ ellist_get_mindl(ellist_t *head, u_int64_t cur_time)
* by their virtual time.
* each intermediate class has one active children list.
*/
-static actlist_t *
-actlist_alloc(void)
-{
- actlist_t *head;
-
- head = malloc(sizeof(actlist_t), M_DEVBUF, M_WAITOK);
- TAILQ_INIT(head);
- return (head);
-}
static void
-actlist_destroy(actlist_t *head)
-{
- free(head, M_DEVBUF);
-}
-static void
actlist_insert(struct hfsc_class *cl)
{
struct hfsc_class *p;
/* check the last entry first */
- if ((p = TAILQ_LAST(cl->cl_parent->cl_actc, _active)) == NULL
+ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL
|| p->cl_vt <= cl->cl_vt) {
- TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
return;
}
- TAILQ_FOREACH(p, cl->cl_parent->cl_actc, cl_actlist) {
+ TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
if (cl->cl_vt < p->cl_vt) {
TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
return;
@@ -1331,7 +1300,7 @@ actlist_insert(struct hfsc_class *cl)
static void
actlist_remove(struct hfsc_class *cl)
{
- TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
}
static void
@@ -1349,11 +1318,11 @@ actlist_update(struct hfsc_class *cl)
return;
/* check the last entry */
- last = TAILQ_LAST(cl->cl_parent->cl_actc, _active);
+ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
ASSERT(last != NULL);
if (last->cl_vt <= cl->cl_vt) {
- TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
- TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
return;
}
@@ -1363,7 +1332,7 @@ actlist_update(struct hfsc_class *cl)
*/
while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
if (cl->cl_vt < p->cl_vt) {
- TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
return;
}
@@ -1376,7 +1345,7 @@ actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
{
struct hfsc_class *p;
- TAILQ_FOREACH(p, cl->cl_actc, cl_actlist) {
+ TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
if (p->cl_f <= cur_time)
return (p);
}
@@ -1701,6 +1670,10 @@ get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
if (q_is_rio(cl->cl_q))
rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_getstats(cl->cl_codel, &sp->codel);
+#endif
}
/* convert a class handle to the corresponding class pointer */
@@ -1820,11 +1793,7 @@ hfsc_class_modify(cl, rsc, fsc, usc)
}
cur_time = read_machclk();
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(cl->cl_hif->hif_ifq);
if (rsc != NULL) {
diff --git a/freebsd/sys/contrib/altq/altq/altq_hfsc.h b/freebsd/sys/net/altq/altq_hfsc.h
index 91ba3d18..de5e89b8 100644
--- a/freebsd/sys/contrib/altq/altq/altq_hfsc.h
+++ b/freebsd/sys/net/altq/altq_hfsc.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $ */
-
-/*
+/*-
* Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
*
* Permission to use, copy, modify, and distribute this software and
@@ -28,14 +26,18 @@
* software to return any improvements or extensions that they make,
* and to grant Carnegie Mellon the rights to redistribute these
* changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_HFSC_H_
#define _ALTQ_ALTQ_HFSC_H_
-#include <altq/altq.h>
-#include <altq/altq_classq.h>
-#include <altq/altq_red.h>
-#include <altq/altq_rio.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
#ifdef __cplusplus
extern "C" {
@@ -55,6 +57,7 @@ struct service_curve {
#define HFCF_RED 0x0001 /* use RED */
#define HFCF_ECN 0x0002 /* use RED/ECN */
#define HFCF_RIO 0x0004 /* use RIO */
+#define HFCF_CODEL 0x0008 /* use CoDel */
#define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
#define HFCF_DEFAULTCLASS 0x1000 /* default class */
@@ -101,9 +104,10 @@ struct hfsc_classstats {
u_int parentperiod; /* parent's vt period seqno */
int nactive; /* number of active children */
- /* red and rio related info */
+ /* codel, red and rio related info */
int qtype;
struct redstats red[3];
+ struct codel_stats codel;
};
#ifdef ALTQ3_COMPAT
@@ -218,16 +222,6 @@ struct runtime_sc {
u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
};
-/* for TAILQ based ellist and actlist implementation */
-struct hfsc_class;
-typedef TAILQ_HEAD(_eligible, hfsc_class) ellist_t;
-typedef TAILQ_ENTRY(hfsc_class) elentry_t;
-typedef TAILQ_HEAD(_active, hfsc_class) actlist_t;
-typedef TAILQ_ENTRY(hfsc_class) actentry_t;
-#define ellist_first(s) TAILQ_FIRST(s)
-#define actlist_first(s) TAILQ_FIRST(s)
-#define actlist_last(s) TAILQ_LAST(s, _active)
-
struct hfsc_class {
u_int cl_id; /* class id (just for debug) */
u_int32_t cl_handle; /* class handle */
@@ -239,7 +233,12 @@ struct hfsc_class {
struct hfsc_class *cl_children; /* child classes */
class_queue_t *cl_q; /* class queue structure */
- struct red *cl_red; /* RED state */
+ union {
+ struct red *cl_red; /* RED state */
+ struct codel *cl_codel; /* CoDel state */
+ } cl_aqm;
+#define cl_red cl_aqm.cl_red
+#define cl_codel cl_aqm.cl_codel
struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
u_int64_t cl_total; /* total work in bytes */
@@ -277,10 +276,10 @@ struct hfsc_class {
u_int cl_vtperiod; /* vt period sequence no */
u_int cl_parentperiod; /* parent's vt period seqno */
int cl_nactive; /* number of active children */
- actlist_t *cl_actc; /* active children list */
- actentry_t cl_actlist; /* active children list entry */
- elentry_t cl_ellist; /* eligible list entry */
+ TAILQ_HEAD(acthead, hfsc_class) cl_actc; /* active children list */
+ TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */
+ TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */
struct {
struct pktcntr xmit_cnt;
@@ -304,7 +303,7 @@ struct hfsc_if {
u_int hif_packets; /* # of packets in the tree */
u_int hif_classid; /* class id sequence number */
- ellist_t *hif_eligible; /* eligible list */
+ TAILQ_HEAD(elighead, hfsc_class) hif_eligible; /* eligible list */
#ifdef ALTQ3_CLFIER_COMPAT
struct acc_classifier hif_classifier;
diff --git a/freebsd/sys/contrib/altq/altq/altq_priq.c b/freebsd/sys/net/altq/altq_priq.c
index 0cb47576..d257ae3c 100644
--- a/freebsd/sys/contrib/altq/altq/altq_priq.c
+++ b/freebsd/sys/net/altq/altq_priq.c
@@ -1,8 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ */
-/*
+/*-
* Copyright (C) 2000-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -26,18 +24,17 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
*/
/*
* priority queue
*/
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
@@ -53,14 +50,17 @@
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
-#include <net/pfvar.h>
-#include <altq/altq.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
#ifdef ALTQ3_COMPAT
-#include <altq/altq_conf.h>
+#include <net/altq/altq_conf.h>
#endif
-#include <altq/altq_priq.h>
+#include <net/altq/altq_priq.h>
/*
* function prototypes
@@ -112,11 +112,7 @@ priq_pfattach(struct pf_altq *a)
if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
return (EINVAL);
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
splx(s);
@@ -134,11 +130,9 @@ priq_add_altq(struct pf_altq *a)
if (!ALTQ_IS_READY(&ifp->if_snd))
return (ENODEV);
- pif = malloc(sizeof(struct priq_if),
- M_DEVBUF, M_WAITOK);
+ pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO);
if (pif == NULL)
return (ENOMEM);
- bzero(pif, sizeof(struct priq_if));
pif->pif_bandwidth = a->ifbandwidth;
pif->pif_maxpri = -1;
pif->pif_ifq = &ifp->if_snd;
@@ -298,14 +292,18 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
return (NULL);
}
#endif
+#ifndef ALTQ_CODEL
+ if (flags & PRCF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("priq_class_create: CODEL not configured for PRIQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
if ((cl = pif->pif_classes[pri]) != NULL) {
/* modify the class instead of creating a new one */
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(cl->cl_pif->pif_ifq);
if (!qempty(cl->cl_q))
priq_purgeq(cl);
@@ -319,18 +317,20 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
if (q_is_red(cl->cl_q))
red_destroy(cl->cl_red);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
} else {
- cl = malloc(sizeof(struct priq_class),
- M_DEVBUF, M_WAITOK);
+ cl = malloc(sizeof(struct priq_class), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
if (cl == NULL)
return (NULL);
- bzero(cl, sizeof(struct priq_class));
- cl->cl_q = malloc(sizeof(class_queue_t),
- M_DEVBUF, M_WAITOK);
+ cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
if (cl->cl_q == NULL)
goto err_ret;
- bzero(cl->cl_q, sizeof(class_queue_t));
}
pif->pif_classes[pri] = cl;
@@ -341,6 +341,7 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
qlimit(cl->cl_q) = qlimit;
qtype(cl->cl_q) = Q_DROPTAIL;
qlen(cl->cl_q) = 0;
+ qsize(cl->cl_q) = 0;
cl->cl_flags = flags;
cl->cl_pri = pri;
if (pri > pif->pif_maxpri)
@@ -368,8 +369,9 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
if (flags & PRCF_RIO) {
cl->cl_red = (red_t *)rio_alloc(0, NULL,
red_flags, red_pkttime);
- if (cl->cl_red != NULL)
- qtype(cl->cl_q) = Q_RIO;
+ if (cl->cl_red == NULL)
+ goto err_ret;
+ qtype(cl->cl_q) = Q_RIO;
} else
#endif
if (flags & PRCF_RED) {
@@ -377,11 +379,19 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
qlimit(cl->cl_q) * 10/100,
qlimit(cl->cl_q) * 30/100,
red_flags, red_pkttime);
- if (cl->cl_red != NULL)
- qtype(cl->cl_q) = Q_RED;
+ if (cl->cl_red == NULL)
+ goto err_ret;
+ qtype(cl->cl_q) = Q_RED;
}
}
#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & PRCF_CODEL) {
+ cl->cl_codel = codel_alloc(5, 100, 0);
+ if (cl->cl_codel != NULL)
+ qtype(cl->cl_q) = Q_CODEL;
+ }
+#endif
return (cl);
@@ -395,6 +405,10 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
if (q_is_red(cl->cl_q))
red_destroy(cl->cl_red);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
}
if (cl->cl_q != NULL)
free(cl->cl_q, M_DEVBUF);
@@ -408,11 +422,7 @@ priq_class_destroy(struct priq_class *cl)
struct priq_if *pif;
int s, pri;
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(cl->cl_pif->pif_ifq);
#ifdef ALTQ3_CLFIER_COMPAT
@@ -446,6 +456,10 @@ priq_class_destroy(struct priq_class *cl)
if (q_is_red(cl->cl_q))
red_destroy(cl->cl_red);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_destroy(cl->cl_codel);
+#endif
}
free(cl->cl_q, M_DEVBUF);
free(cl, M_DEVBUF);
@@ -561,6 +575,10 @@ priq_addq(struct priq_class *cl, struct mbuf *m)
if (q_is_red(cl->cl_q))
return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_addq(cl->cl_codel, cl->cl_q, m);
+#endif
if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
m_freem(m);
return (-1);
@@ -585,6 +603,10 @@ priq_getq(struct priq_class *cl)
if (q_is_red(cl->cl_q))
return red_getq(cl->cl_red, cl->cl_q);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ return codel_getq(cl->cl_codel, cl->cl_q);
+#endif
return _getq(cl->cl_q);
}
@@ -629,7 +651,10 @@ get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
if (q_is_rio(cl->cl_q))
rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
#endif
-
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->cl_q))
+ codel_getstats(cl->cl_codel, &sp->codel);
+#endif
}
/* convert a class handle to the corresponding class pointer */
diff --git a/freebsd/sys/contrib/altq/altq/altq_priq.h b/freebsd/sys/net/altq/altq_priq.h
index 481d31b8..fcbfee98 100644
--- a/freebsd/sys/contrib/altq/altq/altq_priq.h
+++ b/freebsd/sys/net/altq/altq_priq.h
@@ -1,5 +1,4 @@
-/* $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $ */
-/*
+/*-
* Copyright (C) 2000-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -23,15 +22,19 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_PRIQ_H_
#define _ALTQ_ALTQ_PRIQ_H_
-#include <altq/altq.h>
-#include <altq/altq_classq.h>
-#include <altq/altq_red.h>
-#include <altq/altq_rio.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
#ifdef __cplusplus
extern "C" {
@@ -59,6 +62,7 @@ struct priq_add_class {
#define PRCF_RED 0x0001 /* use RED */
#define PRCF_ECN 0x0002 /* use RED/ECN */
#define PRCF_RIO 0x0004 /* use RIO */
+#define PRCF_CODEL 0x0008 /* use CoDel */
#define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
#define PRCF_DEFAULTCLASS 0x1000 /* default class */
@@ -102,9 +106,10 @@ struct priq_classstats {
struct pktcntr xmitcnt; /* transmitted packet counter */
struct pktcntr dropcnt; /* dropped packet counter */
- /* red and rio related info */
+ /* codel, red and rio related info */
int qtype;
struct redstats red[3]; /* rio has 3 red stats */
+ struct codel_stats codel;
};
#ifdef ALTQ3_COMPAT
@@ -134,7 +139,12 @@ struct priq_class_stats {
struct priq_class {
u_int32_t cl_handle; /* class handle */
class_queue_t *cl_q; /* class queue structure */
- struct red *cl_red; /* RED state */
+ union {
+ struct red *cl_red; /* RED state */
+ struct codel *cl_codel; /* CoDel state */
+ } cl_aqm;
+#define cl_red cl_aqm.cl_red
+#define cl_codel cl_aqm.cl_codel
int cl_pri; /* priority */
int cl_flags; /* class flags */
struct priq_if *cl_pif; /* back pointer to pif */
diff --git a/freebsd/sys/contrib/altq/altq/altq_red.c b/freebsd/sys/net/altq/altq_red.c
index 0d9ab935..f83b7b50 100644
--- a/freebsd/sys/contrib/altq/altq/altq_red.c
+++ b/freebsd/sys/net/altq/altq_red.c
@@ -1,9 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */
-
-/*
+/*-
* Copyright (C) 1997-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -29,7 +26,7 @@
* SUCH DAMAGE.
*
*/
-/*
+/*-
* Copyright (c) 1990-1994 Regents of the University of California.
* All rights reserved.
*
@@ -60,15 +57,14 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
+ * $FreeBSD$
*/
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
#include <rtems/bsd/sys/param.h>
@@ -88,6 +84,7 @@
#endif /* ALTQ3_COMPAT */
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -96,13 +93,15 @@
#include <netinet/ip6.h>
#endif
-#include <net/pfvar.h>
-#include <altq/altq.h>
-#include <altq/altq_red.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_red.h>
#ifdef ALTQ3_COMPAT
-#include <altq/altq_conf.h>
+#include <net/altq/altq_conf.h>
#ifdef ALTQ_FLOWVALVE
-#include <altq/altq_flowvalve.h>
+#include <net/altq/altq_flowvalve.h>
#endif
#endif
@@ -161,7 +160,7 @@
#define TH_MIN 5 /* min threshold */
#define TH_MAX 15 /* max threshold */
-#define RED_LIMIT 60 /* default max queue lenght */
+#define RED_LIMIT 60 /* default max queue length */
#define RED_STATS /* collect statistics */
/*
@@ -174,7 +173,7 @@
#ifdef ALTQ3_COMPAT
#ifdef ALTQ_FLOWVALVE
/*
- * flow-valve is an extention to protect red from unresponsive flows
+ * flow-valve is an extension to protect red from unresponsive flows
* and to promote end-to-end congestion control.
* flow-valve observes the average drop rates of the flows that have
* experienced packet drops in the recent past.
@@ -233,18 +232,25 @@ red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
int w, i;
int npkts_per_sec;
- rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK);
+ rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
if (rp == NULL)
return (NULL);
- bzero(rp, sizeof(red_t));
-
- rp->red_avg = 0;
- rp->red_idle = 1;
if (weight == 0)
rp->red_weight = W_WEIGHT;
else
rp->red_weight = weight;
+
+ /* allocate weight table */
+ rp->red_wtab = wtab_alloc(rp->red_weight);
+ if (rp->red_wtab == NULL) {
+ free(rp, M_DEVBUF);
+ return (NULL);
+ }
+
+ rp->red_avg = 0;
+ rp->red_idle = 1;
+
if (inv_pmax == 0)
rp->red_inv_pmax = default_inv_pmax;
else
@@ -304,9 +310,6 @@ red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
* rp->red_inv_pmax) << FP_SHIFT;
- /* allocate weight table */
- rp->red_wtab = wtab_alloc(rp->red_weight);
-
microtime(&rp->red_last);
return (rp);
}
@@ -641,10 +644,9 @@ wtab_alloc(int weight)
return (w);
}
- w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK);
+ w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
if (w == NULL)
- panic("wtab_alloc: malloc failed!");
- bzero(w, sizeof(struct wtab));
+ return (NULL);
w->w_weight = weight;
w->w_refcount = 1;
w->w_next = wtab_list;
@@ -940,11 +942,7 @@ redioctl(dev, cmd, addr, flag, p)
break;
}
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
red_purgeq(rqp);
limit = fc->red_limit;
if (limit < fc->red_thmax)
diff --git a/freebsd/sys/contrib/altq/altq/altq_red.h b/freebsd/sys/net/altq/altq_red.h
index dc8ea0ac..8ae8d291 100644
--- a/freebsd/sys/contrib/altq/altq/altq_red.h
+++ b/freebsd/sys/net/altq/altq_red.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $ */
-
-/*
+/*-
* Copyright (C) 1997-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -24,12 +22,15 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_RED_H_
#define _ALTQ_ALTQ_RED_H_
-#include <altq/altq_classq.h>
+#include <net/altq/altq_classq.h>
#ifdef ALTQ3_COMPAT
struct red_interface {
diff --git a/freebsd/sys/contrib/altq/altq/altq_rio.c b/freebsd/sys/net/altq/altq_rio.c
index c5fb097d..bad0257c 100644
--- a/freebsd/sys/contrib/altq/altq/altq_rio.c
+++ b/freebsd/sys/net/altq/altq_rio.c
@@ -1,9 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */
-
-/*
+/*-
* Copyright (C) 1998-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -28,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-/*
+/*-
* Copyright (c) 1990-1994 Regents of the University of California.
* All rights reserved.
*
@@ -59,15 +56,14 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
*/
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
#include <rtems/bsd/sys/param.h>
@@ -83,6 +79,7 @@
#endif
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -91,13 +88,14 @@
#include <netinet/ip6.h>
#endif
-#include <net/pfvar.h>
-#include <altq/altq.h>
-#include <altq/altq_cdnr.h>
-#include <altq/altq_red.h>
-#include <altq/altq_rio.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cdnr.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
#ifdef ALTQ3_COMPAT
-#include <altq/altq_conf.h>
+#include <net/altq/altq_conf.h>
#endif
/*
@@ -150,7 +148,7 @@
#define TH_MIN 5 /* min threshold */
#define TH_MAX 15 /* max threshold */
-#define RIO_LIMIT 60 /* default max queue lenght */
+#define RIO_LIMIT 60 /* default max queue length */
#define RIO_STATS /* collect statistics */
#define TV_DELTA(a, b, delta) { \
@@ -206,10 +204,9 @@ rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
int w, i;
int npkts_per_sec;
- rp = malloc(sizeof(rio_t), M_DEVBUF, M_WAITOK);
+ rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO);
if (rp == NULL)
return (NULL);
- bzero(rp, sizeof(rio_t));
rp->rio_flags = flags;
if (pkttime == 0)
@@ -685,11 +682,7 @@ rioioctl(dev, cmd, addr, flag, p)
break;
}
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
_flushq(rqp->rq_q);
limit = fc->rio_limit;
if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
diff --git a/freebsd/sys/contrib/altq/altq/altq_rio.h b/freebsd/sys/net/altq/altq_rio.h
index 83210f23..ce9dc0e0 100644
--- a/freebsd/sys/contrib/altq/altq/altq_rio.h
+++ b/freebsd/sys/net/altq/altq_rio.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $ */
-
-/*
+/*-
* Copyright (C) 1998-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -24,12 +22,15 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_RIO_H_
#define _ALTQ_ALTQ_RIO_H_
-#include <altq/altq_classq.h>
+#include <net/altq/altq_classq.h>
/*
* RIO: RED with IN/OUT bit
diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass.c b/freebsd/sys/net/altq/altq_rmclass.c
index 1fa47cc5..160884e2 100644
--- a/freebsd/sys/contrib/altq/altq/altq_rmclass.c
+++ b/freebsd/sys/net/altq/altq_rmclass.c
@@ -1,9 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */
-
-/*
+/*-
* Copyright (c) 1991-1997 Regents of the University of California.
* All rights reserved.
*
@@ -37,17 +34,14 @@
*
* LBL code modified by speer@eng.sun.com, May 1977.
* For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ *
+ * @(#)rm_class.c 1.48 97/12/05 SMI
+ * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
*/
-
-#ident "@(#)rm_class.c 1.48 97/12/05 SMI"
-
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
#include <rtems/bsd/sys/param.h>
@@ -62,17 +56,20 @@
#endif
#include <net/if.h>
+#include <net/if_var.h>
#ifdef ALTQ3_COMPAT
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#endif
-#include <altq/altq.h>
-#include <altq/altq_rmclass.h>
-#include <altq/altq_rmclass_debug.h>
-#include <altq/altq_red.h>
-#include <altq/altq_rio.h>
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_codel.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_rmclass_debug.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
/*
* Local Macros
@@ -219,20 +216,24 @@ rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
return (NULL);
}
#endif
+#ifndef ALTQ_CODEL
+ if (flags & RMCF_CODEL) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: CODEL not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
- cl = malloc(sizeof(struct rm_class),
- M_DEVBUF, M_WAITOK);
+ cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
if (cl == NULL)
return (NULL);
- bzero(cl, sizeof(struct rm_class));
CALLOUT_INIT(&cl->callout_);
- cl->q_ = malloc(sizeof(class_queue_t),
- M_DEVBUF, M_WAITOK);
+ cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
if (cl->q_ == NULL) {
free(cl, M_DEVBUF);
return (NULL);
}
- bzero(cl->q_, sizeof(class_queue_t));
/*
* Class initialization.
@@ -307,15 +308,18 @@ rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
#endif
}
#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (flags & RMCF_CODEL) {
+ cl->codel_ = codel_alloc(5, 100, 0);
+ if (cl->codel_ != NULL)
+ qtype(cl->q_) = Q_CODEL;
+ }
+#endif
/*
* put the class into the class tree
*/
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(ifd->ifq_);
if ((peer = ifd->active_[pri]) != NULL) {
/* find the last class at this pri */
@@ -364,11 +368,7 @@ rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
ifd = cl->ifdat_;
old_allotment = cl->allotment_;
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(ifd->ifq_);
cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
cl->qthresh_ = 0;
@@ -564,11 +564,7 @@ rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
if (cl->sleeping_)
CALLOUT_STOP(&cl->callout_);
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(ifd->ifq_);
/*
* Free packets in the packet queue.
@@ -657,6 +653,10 @@ rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
if (q_is_red(cl->q_))
red_destroy(cl->red_);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ codel_destroy(cl->codel_);
+#endif
}
free(cl->q_, M_DEVBUF);
free(cl, M_DEVBUF);
@@ -1536,13 +1536,8 @@ rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
* a 'backstop' to restart this class.
*/
if (delay > tick * 2) {
-#ifdef __FreeBSD__
/* FreeBSD rounds up the tick */
t = hzto(&cl->undertime_);
-#else
- /* other BSDs round down the tick */
- t = hzto(&cl->undertime_) + 1;
-#endif
} else
t = 2;
CALLOUT_RESET(&cl->callout_, t,
@@ -1573,11 +1568,7 @@ rmc_restart(struct rm_class *cl)
struct rm_ifdat *ifd = cl->ifdat_;
int s;
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_LOCK(ifd->ifq_);
if (cl->sleeping_) {
cl->sleeping_ = 0;
@@ -1623,6 +1614,10 @@ _rmc_addq(rm_class_t *cl, mbuf_t *m)
if (q_is_red(cl->q_))
return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
#endif /* ALTQ_RED */
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ return codel_addq(cl->codel_, cl->q_, m);
+#endif
if (cl->flags_ & RMCF_CLEARDSCP)
write_dsfield(m, cl->pktattr_, 0);
@@ -1652,6 +1647,10 @@ _rmc_getq(rm_class_t *cl)
if (q_is_red(cl->q_))
return red_getq(cl->red_, cl->q_);
#endif
+#ifdef ALTQ_CODEL
+ if (q_is_codel(cl->q_))
+ return codel_getq(cl->codel_, cl->q_);
+#endif
return _getq(cl->q_);
}
@@ -1722,7 +1721,8 @@ void cbqtrace_dump(int counter)
#endif /* CBQ_TRACE */
#endif /* ALTQ_CBQ */
-#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
+ defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
void
diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass.h b/freebsd/sys/net/altq/altq_rmclass.h
index cf0ddf48..6130c4ff 100644
--- a/freebsd/sys/contrib/altq/altq/altq_rmclass.h
+++ b/freebsd/sys/net/altq/altq_rmclass.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $ */
-
-/*
+/*-
* Copyright (c) 1991-1997 Regents of the University of California.
* All rights reserved.
*
@@ -31,12 +29,15 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_RMCLASS_H_
#define _ALTQ_ALTQ_RMCLASS_H_
-#include <altq/altq_classq.h>
+#include <net/altq/altq_classq.h>
/* #pragma ident "@(#)rm_class.h 1.20 97/10/23 SMI" */
@@ -164,7 +165,12 @@ struct rm_class {
void (*overlimit)(struct rm_class *, struct rm_class *);
void (*drop)(struct rm_class *); /* Class drop action. */
- struct red *red_; /* RED state pointer */
+ union {
+ struct red *red_; /* RED state pointer */
+ struct codel *codel_; /* codel state pointer */
+ } cl_aqm_;
+#define red_ cl_aqm_.red_
+#define codel_ cl_aqm_.codel_
struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */
int flags_;
@@ -182,7 +188,7 @@ struct rm_class {
*/
struct rm_ifdat {
int queued_; /* # pkts queued downstream */
- int efficient_; /* Link Efficency bit */
+ int efficient_; /* Link Efficiency bit */
int wrr_; /* Enable Weighted Round-Robin */
u_long ns_per_byte_; /* Link byte speed. */
int maxqueued_; /* Max packets to queue */
@@ -233,6 +239,7 @@ struct rm_ifdat {
#define RMCF_RIO 0x0004
#define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
#define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define RMCF_CODEL 0x0020
/* flags for rmc_init */
#define RMCF_WRR 0x0100
diff --git a/freebsd/sys/contrib/altq/altq/altq_rmclass_debug.h b/freebsd/sys/net/altq/altq_rmclass_debug.h
index 8f471b2f..7adbaec4 100644
--- a/freebsd/sys/contrib/altq/altq/altq_rmclass_debug.h
+++ b/freebsd/sys/net/altq/altq_rmclass_debug.h
@@ -1,6 +1,4 @@
-/* $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $ */
-
-/*
+/*-
* Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,6 +26,9 @@
* provided "as is" without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_
diff --git a/freebsd/sys/contrib/altq/altq/altq_subr.c b/freebsd/sys/net/altq/altq_subr.c
index fbbf5896..66ff441d 100644
--- a/freebsd/sys/contrib/altq/altq/altq_subr.c
+++ b/freebsd/sys/net/altq/altq_subr.c
@@ -1,9 +1,6 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */
-
-/*
+/*-
* Copyright (C) 1997-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -27,15 +24,14 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $
+ * $FreeBSD$
*/
-#if defined(__FreeBSD__) || defined(__NetBSD__)
#include <rtems/bsd/local/opt_altq.h>
#include <rtems/bsd/local/opt_inet.h>
-#ifdef __FreeBSD__
#include <rtems/bsd/local/opt_inet6.h>
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
#include <rtems/bsd/sys/param.h>
#include <sys/malloc.h>
@@ -51,11 +47,10 @@
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
-#ifdef __FreeBSD__
#include <net/vnet.h>
-#endif
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -66,27 +61,22 @@
#include <netinet/tcp.h>
#include <netinet/udp.h>
-#include <net/pfvar.h>
-#include <altq/altq.h>
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
#ifdef ALTQ3_COMPAT
-#include <altq/altq_conf.h>
+#include <net/altq/altq_conf.h>
#endif
/* machine dependent clock related includes */
-#ifdef __FreeBSD__
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/eventhandler.h>
#include <machine/clock.h>
-#endif
#if defined(__amd64__) || defined(__i386__)
#include <machine/cpufunc.h> /* for pentium tsc */
#include <machine/specialreg.h> /* for CPUID_TSC */
-#ifdef __FreeBSD__
#include <machine/md_var.h> /* for cpu_feature */
-#elif defined(__NetBSD__) || defined(__OpenBSD__)
-#include <machine/cpu.h> /* for cpu_feature */
-#endif
#endif /* __amd64 || __i386__ */
/*
@@ -254,11 +244,7 @@ altq_enable(ifq)
return 0;
}
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_PURGE_NOLOCK(ifq);
ASSERT(ifq->ifq_len == 0);
ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */
@@ -283,11 +269,7 @@ altq_disable(ifq)
return 0;
}
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
IFQ_PURGE_NOLOCK(ifq);
ASSERT(ifq->ifq_len == 0);
ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
@@ -403,14 +385,11 @@ tbr_set(ifq, profile)
return (0);
}
- IFQ_UNLOCK(ifq);
- tbr = malloc(sizeof(struct tb_regulator),
- M_DEVBUF, M_WAITOK);
- if (tbr == NULL) { /* can not happen */
+ tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (tbr == NULL) {
IFQ_UNLOCK(ifq);
return (ENOMEM);
}
- bzero(tbr, sizeof(struct tb_regulator));
tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
tbr->tbr_depth = TBR_SCALE(profile->depth);
@@ -422,7 +401,6 @@ tbr_set(ifq, profile)
tbr->tbr_last = read_machclk();
tbr->tbr_lastop = ALTDQ_REMOVE;
- IFQ_LOCK(ifq);
otbr = ifq->altq_tbr;
ifq->altq_tbr = tbr; /* set the new tbr */
@@ -448,24 +426,16 @@ static void
tbr_timeout(arg)
void *arg;
{
-#ifdef __FreeBSD__
VNET_ITERATOR_DECL(vnet_iter);
-#endif
struct ifnet *ifp;
int active, s;
active = 0;
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
-#ifdef __FreeBSD__
IFNET_RLOCK_NOSLEEP();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
-#endif
for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
ifp = TAILQ_NEXT(ifp, if_list)) {
/* read from if_snd unlocked */
@@ -476,12 +446,10 @@ tbr_timeout(arg)
ifp->if_start != NULL)
(*ifp->if_start)(ifp);
}
-#ifdef __FreeBSD__
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
IFNET_RUNLOCK_NOSLEEP();
-#endif
splx(s);
if (active > 0)
CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
@@ -541,6 +509,16 @@ altq_pfattach(struct pf_altq *a)
error = hfsc_pfattach(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_pfattach(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -567,11 +545,7 @@ altq_pfdetach(struct pf_altq *a)
if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
return (0);
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
/* read unlocked from if_snd, _disable and _detach take care */
if (ALTQ_IS_ENABLED(&ifp->if_snd))
error = altq_disable(&ifp->if_snd);
@@ -616,6 +590,16 @@ altq_add(struct pf_altq *a)
error = hfsc_add_altq(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_add_altq(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -652,6 +636,16 @@ altq_remove(struct pf_altq *a)
error = hfsc_remove_altq(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_remove_altq(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -685,6 +679,11 @@ altq_add_queue(struct pf_altq *a)
error = hfsc_add_queue(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_add_queue(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -718,6 +717,11 @@ altq_remove_queue(struct pf_altq *a)
error = hfsc_remove_queue(a);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_remove_queue(a);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -751,6 +755,16 @@ altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
error = hfsc_getqstats(a, ubuf, nbytes);
break;
#endif
+#ifdef ALTQ_FAIRQ
+ case ALTQT_FAIRQ:
+ error = fairq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_CODEL
+ case ALTQT_CODEL:
+ error = codel_getqstats(a, ubuf, nbytes);
+ break;
+#endif
default:
error = ENXIO;
}
@@ -888,7 +902,6 @@ u_int32_t machclk_per_tick;
extern u_int64_t cpu_tsc_freq;
#endif
-#ifndef __rtems__
#if (__FreeBSD_version >= 700035)
/* Update TSC freq with the value indicated by the caller. */
static void
@@ -910,7 +923,6 @@ tsc_freq_changed(void *arg, const struct cf_level *level, int status)
EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
EVENTHANDLER_PRI_LAST);
#endif /* __FreeBSD_version >= 700035 */
-#endif /* __rtems__ */
static void
init_machclk_setup(void)
@@ -930,16 +942,10 @@ init_machclk_setup(void)
#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
machclk_usepcc = 0;
#endif
-#if !defined(__rtems__)
#if defined(__amd64__) || defined(__i386__)
/* check if TSC is available */
-#ifdef __FreeBSD__
if ((cpu_feature & CPUID_TSC) == 0 ||
atomic_load_acq_64(&tsc_freq) == 0)
-#else
- if ((cpu_feature & CPUID_TSC) == 0)
-#endif
-#endif /* __rtems__ */
machclk_usepcc = 0;
#endif
}
@@ -970,15 +976,7 @@ init_machclk(void)
* accessible, just use it.
*/
#if defined(__amd64__) || defined(__i386__)
-#ifndef __rtems__
-#ifdef __FreeBSD__
machclk_freq = atomic_load_acq_64(&tsc_freq);
-#elif defined(__NetBSD__)
- machclk_freq = (u_int32_t)cpu_tsc_freq;
-#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
- machclk_freq = pentium_mhz * 1000000;
-#endif
-#endif /* __rtems */
#endif
/*
@@ -1031,9 +1029,10 @@ read_machclk(void)
panic("read_machclk");
#endif
} else {
- struct timeval tv;
+ struct timeval tv, boottime;
microtime(&tv);
+ getboottime(&boottime);
val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
+ tv.tv_usec) << MACHCLK_SHIFT);
}
@@ -1459,11 +1458,7 @@ acc_add_filter(classifier, filter, class, phandle)
* add this filter to the filter list.
* filters are ordered from the highest rule number.
*/
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
prev = NULL;
LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
@@ -1492,11 +1487,7 @@ acc_delete_filter(classifier, handle)
if ((afp = filth_to_filtp(classifier, handle)) == NULL)
return (EINVAL);
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
LIST_REMOVE(afp, f_chain);
splx(s);
@@ -1520,11 +1511,7 @@ acc_discard_filters(classifier, class, all)
struct acc_filter *afp;
int i, s;
-#ifdef __NetBSD__
s = splnet();
-#else
- s = splimp();
-#endif
for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
do {
LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
diff --git a/freebsd/sys/contrib/altq/altq/altq_var.h b/freebsd/sys/net/altq/altq_var.h
index b956002a..2ddcb211 100644
--- a/freebsd/sys/contrib/altq/altq/altq_var.h
+++ b/freebsd/sys/net/altq/altq_var.h
@@ -1,7 +1,4 @@
-/* $FreeBSD$ */
-/* $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ */
-
-/*
+/*-
* Copyright (C) 1998-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -25,6 +22,9 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_ALTQ_VAR_H_
#define _ALTQ_ALTQ_VAR_H_
@@ -161,7 +161,6 @@ typedef u_long ioctlcmd_t;
#endif
/* macro for timeout/untimeout */
-#if (__FreeBSD_version > 300000) || defined(__NetBSD__)
/* use callout */
#include <sys/callout.h>
@@ -175,35 +174,6 @@ typedef u_long ioctlcmd_t;
#if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000)
#define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 }
#endif
-#elif defined(__OpenBSD__)
-#include <sys/timeout.h>
-/* callout structure as a wrapper of struct timeout */
-struct callout {
- struct timeout c_to;
-};
-#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
-#define CALLOUT_RESET(c,t,f,a) do { if (!timeout_initialized(&(c)->c_to)) \
- timeout_set(&(c)->c_to, (f), (a)); \
- timeout_add(&(c)->c_to, (t)); } while (/*CONSTCOND*/ 0)
-#define CALLOUT_STOP(c) timeout_del(&(c)->c_to)
-#define CALLOUT_INITIALIZER { { { NULL }, NULL, NULL, 0, 0 } }
-#else
-/* use old-style timeout/untimeout */
-/* dummy callout structure */
-struct callout {
- void *c_arg; /* function argument */
- void (*c_func)(void *); /* functiuon to call */
-};
-#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
-#define CALLOUT_RESET(c,t,f,a) do { (c)->c_arg = (a); \
- (c)->c_func = (f); \
- timeout((f),(a),(t)); } while (/*CONSTCOND*/ 0)
-#define CALLOUT_STOP(c) untimeout((c)->c_func,(c)->c_arg)
-#define CALLOUT_INITIALIZER { NULL, NULL }
-#endif
-#if !defined(__FreeBSD__)
-typedef void (timeout_t)(void *);
-#endif
#define m_pktlen(m) ((m)->m_pkthdr.len)
@@ -243,6 +213,11 @@ int cbq_add_queue(struct pf_altq *);
int cbq_remove_queue(struct pf_altq *);
int cbq_getqstats(struct pf_altq *, void *, int *);
+int codel_pfattach(struct pf_altq *);
+int codel_add_altq(struct pf_altq *);
+int codel_remove_altq(struct pf_altq *);
+int codel_getqstats(struct pf_altq *, void *, int *);
+
int priq_pfattach(struct pf_altq *);
int priq_add_altq(struct pf_altq *);
int priq_remove_altq(struct pf_altq *);
@@ -257,5 +232,12 @@ int hfsc_add_queue(struct pf_altq *);
int hfsc_remove_queue(struct pf_altq *);
int hfsc_getqstats(struct pf_altq *, void *, int *);
+int fairq_pfattach(struct pf_altq *);
+int fairq_add_altq(struct pf_altq *);
+int fairq_remove_altq(struct pf_altq *);
+int fairq_add_queue(struct pf_altq *);
+int fairq_remove_queue(struct pf_altq *);
+int fairq_getqstats(struct pf_altq *, void *, int *);
+
#endif /* _KERNEL */
#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/freebsd/sys/contrib/altq/altq/if_altq.h b/freebsd/sys/net/altq/if_altq.h
index 2d983e9a..c5ad2875 100644
--- a/freebsd/sys/contrib/altq/altq/if_altq.h
+++ b/freebsd/sys/net/altq/if_altq.h
@@ -1,7 +1,4 @@
-/* $FreeBSD$ */
-/* $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ */
-
-/*
+/*-
* Copyright (C) 1997-2003
* Sony Computer Science Laboratories Inc. All rights reserved.
*
@@ -25,19 +22,16 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
*/
#ifndef _ALTQ_IF_ALTQ_H_
#define _ALTQ_IF_ALTQ_H_
-#ifdef __FreeBSD__
#include <rtems/bsd/sys/lock.h> /* XXX */
#include <sys/mutex.h> /* XXX */
#include <sys/event.h> /* XXX */
-#endif
-
-#ifdef _KERNEL_OPT
-#include <altq/altqconf.h>
-#endif
struct altq_pktattr; struct tb_regulator; struct top_cdnr;
@@ -50,10 +44,7 @@ struct ifaltq {
struct mbuf *ifq_tail;
int ifq_len;
int ifq_maxlen;
- int ifq_drops;
-#ifdef __FreeBSD__
struct mtx ifq_mtx;
-#endif
/* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */
struct mbuf *ifq_drv_head;
diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c
index f74ac9a1..e7822586 100644
--- a/freebsd/sys/net/bpf.c
+++ b/freebsd/sys/net/bpf.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bpf.h>
#include <rtems/bsd/local/opt_compat.h>
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_netgraph.h>
#include <sys/types.h>
@@ -69,8 +70,13 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
#include <net/if.h>
-#define BPF_INTERNAL
+#include <net/if_var.h>
+#include <net/if_dl.h>
#include <net/bpf.h>
#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
@@ -78,6 +84,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <net/bpf_zerocopy.h>
#include <net/bpfdesc.h>
+#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -96,6 +103,20 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
+struct bpf_if {
+#define bif_next bif_ext.bif_next
+#define bif_dlist bif_ext.bif_dlist
+ struct bpf_if_ext bif_ext; /* public members */
+ u_int bif_dlt; /* link layer type */
+ u_int bif_hdrlen; /* length of link header */
+ struct ifnet *bif_ifp; /* corresponding interface */
+ struct rwlock bif_lock; /* interface lock */
+ LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
+ int bif_flags; /* Interface flags */
+};
+
+CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
+
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
#define PRINET 26 /* interruptible */
@@ -107,7 +128,7 @@ MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
#include <sys/mount.h>
#include <compat/freebsd32/freebsd32.h>
#define BPF_ALIGNMENT32 sizeof(int32_t)
-#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))
+#define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
#ifndef BURN_BRIDGES
/*
@@ -148,7 +169,7 @@ struct bpf_dltlist32 {
* structures registered by different layers in the stack (i.e., 802.11
* frames, ethernet frames, etc).
*/
-static LIST_HEAD(, bpf_if) bpf_iflist;
+static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
static struct mtx bpf_mtx; /* bpf global lock */
static int bpf_bpfd_cnt;
@@ -157,7 +178,7 @@ static void bpf_detachd(struct bpf_d *);
static void bpf_detachd_locked(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
- struct sockaddr *, int *, struct bpf_insn *);
+ struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
static void bpf_timed_out(void *);
static __inline void
@@ -188,8 +209,8 @@ static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
static VNET_DEFINE(int, bpf_optimize_writers) = 0;
#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
-SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
- CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
+SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(bpf_optimize_writers), 0,
"Do not send packets until BPF program is set");
#ifndef __rtems__
@@ -479,7 +500,7 @@ bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
*/
static int
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
+ struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
{
const struct ieee80211_bpf_params *p;
struct ether_header *eh;
@@ -561,37 +582,20 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
}
len = uio->uio_resid;
-
- if (len - hlen > ifp->if_mtu)
+ if (len < hlen || len - hlen > ifp->if_mtu)
return (EMSGSIZE);
- if ((unsigned)len > MJUM16BYTES)
+ m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
+ if (m == NULL)
return (EIO);
-
- if (len <= MHLEN)
- MGETHDR(m, M_WAIT, MT_DATA);
- else if (len <= MCLBYTES)
- m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
- else
- m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
-#if (MJUMPAGESIZE > MCLBYTES)
- len <= MJUMPAGESIZE ? MJUMPAGESIZE :
-#endif
- (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
m->m_pkthdr.len = m->m_len = len;
- m->m_pkthdr.rcvif = NULL;
*mp = m;
- if (m->m_len < hlen) {
- error = EPERM;
- goto bad;
- }
-
error = uiomove(mtod(m, u_char *), len, uio);
if (error)
goto bad;
- slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
+ slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
if (slen == 0) {
error = EPERM;
goto bad;
@@ -608,6 +612,10 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
else
m->m_flags |= M_MCAST;
}
+ if (d->bd_hdrcmplt == 0) {
+ memcpy(eh->ether_shost, IF_LLADDR(ifp),
+ sizeof(eh->ether_shost));
+ }
break;
}
@@ -632,7 +640,7 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
goto bad;
}
}
- bcopy(m->m_data, sockp->sa_data, hlen);
+ bcopy(mtod(m, const void *), sockp->sa_data, hlen);
}
*hdrlen = hlen;
@@ -656,13 +664,13 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
* Save sysctl value to protect from sysctl change
* between reads
*/
- op_w = V_bpf_optimize_writers;
+ op_w = V_bpf_optimize_writers || d->bd_writer;
if (d->bd_bif != NULL)
bpf_detachd_locked(d);
/*
* Point d at bp, and add d to the interface's list.
- * Since there are many applicaiotns using BPF for
+ * Since there are many applications using BPF for
* sending raw packets only (dhcpd, cdpd are good examples)
* we can delay adding d to the list of active listeners until
* some filter is configured.
@@ -760,7 +768,7 @@ bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
/*
* Add d to the list of active bp filters.
- * Reuqires bpf_attachd() to be called before
+ * Requires bpf_attachd() to be called before.
*/
static void
bpf_upgraded(struct bpf_d *d)
@@ -909,7 +917,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
{
struct bpf_d *d;
#ifndef __rtems__
- int error, size;
+ int error;
d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -932,6 +940,8 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
* particular buffer method.
*/
bpf_buffer_init(d);
+ if ((flags & FREAD) == 0)
+ d->bd_writer = 2;
d->bd_hbuf_in_use = 0;
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
@@ -945,10 +955,6 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
- /* Allocate default buffers */
- size = d->bd_bufsize;
- bpf_buffer_ioctl_sblen(d, &size);
-
#ifndef __rtems__
return (0);
#else /* __rtems__ */
@@ -1163,6 +1169,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
struct ifnet *ifp;
struct mbuf *m, *mc;
struct sockaddr dst;
+ struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
@@ -1194,7 +1201,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
hlen = 0;
/* XXX: bpf_movein() can sleep */
error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
- &m, &dst, &hlen, d->bd_wfilter);
+ &m, &dst, &hlen, d);
if (error) {
d->bd_wdcount++;
return (error);
@@ -1204,7 +1211,7 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
dst.sa_family = pseudo_AF_HDRCMPLT;
if (d->bd_feedback) {
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc != NULL)
mc->m_pkthdr.rcvif = ifp;
/* Set M_PROMISC for outgoing packets to be discarded. */
@@ -1226,7 +1233,14 @@ bpfwrite(struct bpf_d *d, struct uio *uio, int ioflag)
BPFD_UNLOCK(d);
#endif
- error = (*ifp->if_output)(ifp, m, &dst, NULL);
+ bzero(&ro, sizeof(ro));
+ if (hlen != 0) {
+ ro.ro_prepend = (u_char *)&dst.sa_data;
+ ro.ro_plen = hlen;
+ ro.ro_flags = RT_HAS_HEADER;
+ }
+
+ error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
d->bd_wdcount++;
@@ -1278,7 +1292,6 @@ reset_d(struct bpf_d *d)
/*
* FIONREAD Check for read packet available.
- * SIOCGIFADDR Get interface address - convenient hook to driver.
* BIOCGBLEN Get buffer len [for read()].
* BIOCSETF Set read filter.
* BIOCSETFNR Set read filter without resetting descriptor.
@@ -1347,7 +1360,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
#endif
case BIOCGETIF:
case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCGRTIMEOUT32:
#endif
case BIOCGSTATS:
@@ -1359,7 +1372,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
case FIONREAD:
case BIOCLOCK:
case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCSRTIMEOUT32:
#endif
case BIOCIMMEDIATE:
@@ -1415,19 +1428,6 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
break;
}
- case SIOCGIFADDR:
- {
- struct ifnet *ifp;
-
- if (d->bd_bif == NULL)
- error = EINVAL;
- else {
- ifp = d->bd_bif->bif_ifp;
- error = (*ifp->if_ioctl)(ifp, cmd, addr);
- }
- break;
- }
-
/*
* Get buffer len [for read()].
*/
@@ -1564,21 +1564,44 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
* Set interface.
*/
case BIOCSETIF:
- BPF_LOCK();
- error = bpf_setif(d, (struct ifreq *)addr);
- BPF_UNLOCK();
- break;
+ {
+ int alloc_buf, size;
+
+ /*
+ * Behavior here depends on the buffering model. If
+ * we're using kernel memory buffers, then we can
+ * allocate them here. If we're using zero-copy,
+ * then the user process must have registered buffers
+ * by the time we get here.
+ */
+ alloc_buf = 0;
+ BPFD_LOCK(d);
+ if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
+ d->bd_sbuf == NULL)
+ alloc_buf = 1;
+ BPFD_UNLOCK(d);
+ if (alloc_buf) {
+ size = d->bd_bufsize;
+ error = bpf_buffer_ioctl_sblen(d, &size);
+ if (error != 0)
+ break;
+ }
+ BPF_LOCK();
+ error = bpf_setif(d, (struct ifreq *)addr);
+ BPF_UNLOCK();
+ break;
+ }
/*
* Set read timeout.
*/
case BIOCSRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCSRTIMEOUT32:
#endif
{
struct timeval *tv = (struct timeval *)addr;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
struct timeval32 *tv32;
struct timeval tv64;
@@ -1604,12 +1627,12 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
* Get read timeout.
*/
case BIOCGRTIMEOUT:
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
case BIOCGRTIMEOUT32:
#endif
{
struct timeval *tv;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
struct timeval32 *tv32;
struct timeval tv64;
@@ -1621,7 +1644,7 @@ bpfioctl(struct bpf_d *d, u_long cmd, caddr_t addr, int flags,
tv->tv_sec = d->bd_rtout / hz;
tv->tv_usec = (d->bd_rtout % hz) * tick;
-#ifdef COMPAT_FREEBSD32
+#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
if (cmd == BIOCGRTIMEOUT32) {
tv32 = (struct timeval32 *)addr;
tv32->tv_sec = tv->tv_sec;
@@ -2001,17 +2024,15 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
/* Check if interface is not being detached from BPF */
BPFIF_RLOCK(bp);
- if (bp->flags & BPFIF_FLAG_DYING) {
+ if (bp->bif_flags & BPFIF_FLAG_DYING) {
BPFIF_RUNLOCK(bp);
return (ENXIO);
}
BPFIF_RUNLOCK(bp);
/*
- * Behavior here depends on the buffering model. If we're using
- * kernel memory buffers, then we can allocate them here. If we're
- * using zero-copy, then the user process must have registered
- * buffers by the time we get here. If not, return an error.
+ * At this point, we expect the buffer is already allocated. If not,
+ * return an error.
*/
switch (d->bd_bufmode) {
case BPF_BUFMODE_BUFFER:
@@ -2131,10 +2152,10 @@ filt_bpfread(struct knote *kn, long hint)
ready = bpf_ready(d);
if (ready) {
kn->kn_data = d->bd_slen;
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
- if (d->bd_hbuf)
+ /*
+ * Ignore the hold buffer if it is being copied to user space.
+ */
+ if (!d->bd_hbuf_in_use && d->bd_hbuf)
kn->kn_data += d->bd_hlen;
} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
callout_reset(&d->bd_callout, d->bd_rtout,
@@ -2405,12 +2426,19 @@ bpf_hdrlen(struct bpf_d *d)
static void
bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
{
+#ifndef __rtems__
+ struct bintime bt2, boottimebin;
+#else /* __rtems__ */
struct bintime bt2;
+#endif /* __rtems__ */
struct timeval tsm;
struct timespec tsn;
if ((tstype & BPF_T_MONOTONIC) == 0) {
bt2 = *bt;
+#ifndef __rtems__
+ getboottimebin(&boottimebin);
+#endif /* __rtems__ */
bintime_add(&bt2, &boottimebin);
bt = &bt2;
}
@@ -2466,9 +2494,6 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
* spot to do it.
*/
if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
d->bd_fbuf = d->bd_hbuf;
d->bd_hbuf = NULL;
d->bd_hlen = 0;
@@ -2511,9 +2536,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
++d->bd_dcount;
return;
}
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
+ KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
@@ -2652,10 +2675,36 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
bp->bif_hdrlen = hdrlen;
- if (bootverbose)
+ if (bootverbose && IS_DEFAULT_VNET(curvnet))
if_printf(ifp, "bpf attached\n");
}
+#ifdef VIMAGE
+/*
+ * When moving interfaces between vnet instances we need a way to
+ * query the dlt and hdrlen before detach so we can re-attch the if_bpf
+ * after the vmove. We unfortunately have no device driver infrastructure
+ * to query the interface for these values after creation/attach, thus
+ * add this as a workaround.
+ */
+int
+bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
+{
+
+ if (bp == NULL)
+ return (ENXIO);
+ if (bif_dlt == NULL && bif_hdrlen == NULL)
+ return (0);
+
+ if (bif_dlt != NULL)
+ *bif_dlt = bp->bif_dlt;
+ if (bif_hdrlen != NULL)
+ *bif_hdrlen = bp->bif_hdrlen;
+
+ return (0);
+}
+#endif
+
/*
* Detach bpf from an interface. This involves detaching each descriptor
* associated with the interface. Notify each descriptor as it's detached
@@ -2664,52 +2713,51 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
void
bpfdetach(struct ifnet *ifp)
{
- struct bpf_if *bp;
+ struct bpf_if *bp, *bp_temp;
struct bpf_d *d;
-#ifdef INVARIANTS
int ndetached;
ndetached = 0;
-#endif
BPF_LOCK();
/* Find all bpf_if struct's which reference ifp and detach them. */
- do {
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- if (ifp == bp->bif_ifp)
- break;
- }
- if (bp != NULL)
- LIST_REMOVE(bp, bif_next);
+ LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+ if (ifp != bp->bif_ifp)
+ continue;
- if (bp != NULL) {
-#ifdef INVARIANTS
- ndetached++;
-#endif
- while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
- }
- /* Free writer-only descriptors */
- while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
- }
+ LIST_REMOVE(bp, bif_next);
+ /* Add to to-be-freed list */
+ LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
- /*
- * Delay freing bp till interface is detached
- * and all routes through this interface are removed.
- * Mark bp as detached to restrict new consumers.
- */
- BPFIF_WLOCK(bp);
- bp->flags |= BPFIF_FLAG_DYING;
- BPFIF_WUNLOCK(bp);
+ ndetached++;
+ /*
+ * Delay freeing bp till interface is detached
+ * and all routes through this interface are removed.
+ * Mark bp as detached to restrict new consumers.
+ */
+ BPFIF_WLOCK(bp);
+ bp->bif_flags |= BPFIF_FLAG_DYING;
+ BPFIF_WUNLOCK(bp);
+
+ CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+ __func__, bp->bif_dlt, bp, ifp);
+
+ /* Free common descriptors */
+ while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
}
- } while (bp != NULL);
+
+ /* Free writer-only descriptors */
+ while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+ bpf_detachd_locked(d);
+ BPFD_LOCK(d);
+ bpf_wakeup(d);
+ BPFD_UNLOCK(d);
+ }
+ }
BPF_UNLOCK();
#ifdef INVARIANTS
@@ -2721,32 +2769,46 @@ bpfdetach(struct ifnet *ifp)
/*
* Interface departure handler.
* Note departure event does not guarantee interface is going down.
+ * Interface renaming is currently done via departure/arrival event set.
+ *
+ * Departure handled is called after all routes pointing to
+ * given interface are removed and interface is in down state
+ * restricting any packets to be sent/received. We assume it is now safe
+ * to free data allocated by BPF.
*/
static void
bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
{
- struct bpf_if *bp;
+ struct bpf_if *bp, *bp_temp;
+ int nmatched = 0;
BPF_LOCK();
- if ((bp = ifp->if_bpf) == NULL) {
- BPF_UNLOCK();
- return;
- }
+ /*
+ * Find matching entries in free list.
+ * Nothing should be found if bpfdetach() was not called.
+ */
+ LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
+ if (ifp != bp->bif_ifp)
+ continue;
- /* Check if bpfdetach() was called previously */
- if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
- BPF_UNLOCK();
- return;
- }
+ CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
+ __func__, bp, ifp);
- CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
- __func__, bp, ifp);
+ LIST_REMOVE(bp, bif_next);
- ifp->if_bpf = NULL;
+ rw_destroy(&bp->bif_lock);
+ free(bp, M_BPF);
+
+ nmatched++;
+ }
BPF_UNLOCK();
- rw_destroy(&bp->bif_lock);
- free(bp, M_BPF);
+ /*
+ * Note that we cannot zero other pointers to
+ * custom DLTs possibly used by given interface.
+ */
+ if (nmatched != 0)
+ ifp->if_bpf = NULL;
}
/*
@@ -2755,26 +2817,44 @@ bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
static int
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
{
- int n, error;
struct ifnet *ifp;
struct bpf_if *bp;
+ u_int *lst;
+ int error, n, n1;
BPF_LOCK_ASSERT();
ifp = d->bd_bif->bif_ifp;
+again:
+ n1 = 0;
+ LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ if (bp->bif_ifp == ifp)
+ n1++;
+ }
+ if (bfl->bfl_list == NULL) {
+ bfl->bfl_len = n1;
+ return (0);
+ }
+ if (n1 > bfl->bfl_len)
+ return (ENOMEM);
+ BPF_UNLOCK();
+ lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
n = 0;
- error = 0;
+ BPF_LOCK();
LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp != ifp)
continue;
- if (bfl->bfl_list != NULL) {
- if (n >= bfl->bfl_len)
- return (ENOMEM);
- error = copyout(&bp->bif_dlt,
- bfl->bfl_list + n, sizeof(u_int));
+ if (n >= n1) {
+ free(lst, M_TEMP);
+ goto again;
}
+ lst[n] = bp->bif_dlt;
n++;
}
+ BPF_UNLOCK();
+ error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
+ free(lst, M_TEMP);
+ BPF_LOCK();
bfl->bfl_len = n;
return (error);
}
@@ -2999,6 +3079,7 @@ bpf_drvinit(void *unused)
mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
LIST_INIT(&bpf_iflist);
+ LIST_INIT(&bpf_freelist);
#ifndef __rtems__
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
@@ -3214,3 +3295,34 @@ bpf_validate(const struct bpf_insn *f, int len)
}
#endif /* !DEV_BPF && !NETGRAPH_BPF */
+
+#ifdef DDB
+static void
+bpf_show_bpf_if(struct bpf_if *bpf_if)
+{
+
+ if (bpf_if == NULL)
+ return;
+ db_printf("%p:\n", bpf_if);
+#define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e);
+ /* bif_ext.bif_next */
+ /* bif_ext.bif_dlist */
+ BPF_DB_PRINTF("%#x", bif_dlt);
+ BPF_DB_PRINTF("%u", bif_hdrlen);
+ BPF_DB_PRINTF("%p", bif_ifp);
+ /* bif_lock */
+ /* bif_wlist */
+ BPF_DB_PRINTF("%#x", bif_flags);
+}
+
+DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
+{
+
+ if (!have_addr) {
+ db_printf("usage: show bpf_if <struct bpf_if *>\n");
+ return;
+ }
+
+ bpf_show_bpf_if((struct bpf_if *)addr);
+}
+#endif
diff --git a/freebsd/sys/net/bpf.h b/freebsd/sys/net/bpf.h
index bfe8cfe0..f707f436 100644
--- a/freebsd/sys/net/bpf.h
+++ b/freebsd/sys/net/bpf.h
@@ -582,7 +582,7 @@ struct bpf_zbuf_header {
* input packets such as port scans, packets from old lost connections,
* etc. to force the connection to stay up).
*
- * The first byte of the PPP header (0xff03) is modified to accomodate
+ * The first byte of the PPP header (0xff03) is modified to accommodate
* the direction - 0x00 = IN, 0x01 = OUT.
*/
#define DLT_PPP_PPPD 166
@@ -1096,7 +1096,7 @@ struct bpf_zbuf_header {
#define DLT_NETANALYZER_TRANSPARENT 241
/*
- * IP-over-Infiniband, as specified by RFC 4391.
+ * IP-over-InfiniBand, as specified by RFC 4391.
*
* Requested by Petr Sumbera <petr.sumbera@oracle.com>.
*/
@@ -1138,7 +1138,145 @@ struct bpf_zbuf_header {
#define DLT_PFSYNC 246
#endif
-#define DLT_MATCHING_MAX 246 /* highest value in the "matching" range */
+/*
+ * Raw InfiniBand packets, starting with the Local Routing Header.
+ *
+ * Requested by Oren Kladnitsky <orenk@mellanox.com>.
+ */
+#define DLT_INFINIBAND 247
+
+/*
+ * SCTP, with no lower-level protocols (i.e., no IPv4 or IPv6).
+ *
+ * Requested by Michael Tuexen <Michael.Tuexen@lurchi.franken.de>.
+ */
+#define DLT_SCTP 248
+
+/*
+ * USB packets, beginning with a USBPcap header.
+ *
+ * Requested by Tomasz Mon <desowin@gmail.com>
+ */
+#define DLT_USBPCAP 249
+
+/*
+ * Schweitzer Engineering Laboratories "RTAC" product serial-line
+ * packets.
+ *
+ * Requested by Chris Bontje <chris_bontje@selinc.com>.
+ */
+#define DLT_RTAC_SERIAL 250
+
+/*
+ * Bluetooth Low Energy air interface link-layer packets.
+ *
+ * Requested by Mike Kershaw <dragorn@kismetwireless.net>.
+ */
+#define DLT_BLUETOOTH_LE_LL 251
+
+/*
+ * DLT type for upper-protocol layer PDU saves from wireshark.
+ *
+ * the actual contents are determined by two TAGs stored with each
+ * packet:
+ * EXP_PDU_TAG_LINKTYPE the link type (LINKTYPE_ value) of the
+ * original packet.
+ *
+ * EXP_PDU_TAG_PROTO_NAME the name of the wireshark dissector
+ * that can make sense of the data stored.
+ */
+#define DLT_WIRESHARK_UPPER_PDU 252
+
+/*
+ * DLT type for the netlink protocol (nlmon devices).
+ */
+#define DLT_NETLINK 253
+
+/*
+ * Bluetooth Linux Monitor headers for the BlueZ stack.
+ */
+#define DLT_BLUETOOTH_LINUX_MONITOR 254
+
+/*
+ * Bluetooth Basic Rate/Enhanced Data Rate baseband packets, as
+ * captured by Ubertooth.
+ */
+#define DLT_BLUETOOTH_BREDR_BB 255
+
+/*
+ * Bluetooth Low Energy link layer packets, as captured by Ubertooth.
+ */
+#define DLT_BLUETOOTH_LE_LL_WITH_PHDR 256
+
+/*
+ * PROFIBUS data link layer.
+ */
+#define DLT_PROFIBUS_DL 257
+
+/*
+ * Apple's DLT_PKTAP headers.
+ *
+ * Sadly, the folks at Apple either had no clue that the DLT_USERn values
+ * are for internal use within an organization and partners only, and
+ * didn't know that the right way to get a link-layer header type is to
+ * ask tcpdump.org for one, or knew and didn't care, so they just
+ * used DLT_USER2, which causes problems for everything except for
+ * their version of tcpdump.
+ *
+ * So I'll just give them one; hopefully this will show up in a
+ * libpcap release in time for them to get this into 10.10 Big Sur
+ * or whatever Mavericks' successor is called. LINKTYPE_PKTAP
+ * will be 258 *even on OS X*; that is *intentional*, so that
+ * PKTAP files look the same on *all* OSes (different OSes can have
+ * different numerical values for a given DLT_, but *MUST NOT* have
+ * different values for what goes in a file, as files can be moved
+ * between OSes!).
+ *
+ * When capturing, on a system with a Darwin-based OS, on a device
+ * that returns 149 (DLT_USER2 and Apple's DLT_PKTAP) with this
+ * version of libpcap, the DLT_ value for the pcap_t will be DLT_PKTAP,
+ * and that will continue to be DLT_USER2 on Darwin-based OSes. That way,
+ * binary compatibility with Mavericks is preserved for programs using
+ * this version of libpcap. This does mean that if you were using
+ * DLT_USER2 for some capture device on OS X, you can't do so with
+ * this version of libpcap, just as you can't with Apple's libpcap -
+ * on OS X, they define DLT_PKTAP to be DLT_USER2, so programs won't
+ * be able to distinguish between PKTAP and whatever you were using
+ * DLT_USER2 for.
+ *
+ * If the program saves the capture to a file using this version of
+ * libpcap's pcap_dump code, the LINKTYPE_ value in the file will be
+ * LINKTYPE_PKTAP, which will be 258, even on Darwin-based OSes.
+ * That way, the file will *not* be a DLT_USER2 file. That means
+ * that the latest version of tcpdump, when built with this version
+ * of libpcap, and sufficiently recent versions of Wireshark will
+ * be able to read those files and interpret them correctly; however,
+ * Apple's version of tcpdump in OS X 10.9 won't be able to handle
+ * them. (Hopefully, Apple will pick up this version of libpcap,
+ * and the corresponding version of tcpdump, so that tcpdump will
+ * be able to handle the old LINKTYPE_USER2 captures *and* the new
+ * LINKTYPE_PKTAP captures.)
+ */
+#ifdef __APPLE__
+#define DLT_PKTAP DLT_USER2
+#else
+#define DLT_PKTAP 258
+#endif
+
+/*
+ * Ethernet packets preceded by a header giving the last 6 octets
+ * of the preamble specified by 802.3-2012 Clause 65, section
+ * 65.1.3.2 "Transmit".
+ */
+#define DLT_EPON 259
+
+/*
+ * IPMI trace packets, as specified by Table 3-20 "Trace Data Block Format"
+ * in the PICMG HPM.2 specification.
+ */
+#define DLT_IPMI_HPM_2 260
+
+#define DLT_MATCHING_MAX 260 /* highest value in the "matching" range */
/*
* DLT and savefile link type values are split into a class and
@@ -1149,7 +1287,17 @@ struct bpf_zbuf_header {
/*
* The instruction encodings.
+ *
+ * Please inform tcpdump-workers@lists.tcpdump.org if you use any
+ * of the reserved values, so that we can note that they're used
+ * (and perhaps implement it in the reference BPF implementation
+ * and encourage its implementation elsewhere).
*/
+
+/*
+ * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
+ */
+
/* instruction classes */
#define BPF_CLASS(code) ((code) & 0x07)
#define BPF_LD 0x00
@@ -1166,6 +1314,7 @@ struct bpf_zbuf_header {
#define BPF_W 0x00
#define BPF_H 0x08
#define BPF_B 0x10
+/* 0x18 reserved; used by BSD/OS */
#define BPF_MODE(code) ((code) & 0xe0)
#define BPF_IMM 0x00
#define BPF_ABS 0x20
@@ -1173,6 +1322,8 @@ struct bpf_zbuf_header {
#define BPF_MEM 0x60
#define BPF_LEN 0x80
#define BPF_MSH 0xa0
+/* 0xc0 reserved; used by BSD/OS */
+/* 0xe0 reserved; used by BSD/OS */
/* alu/jmp fields */
#define BPF_OP(code) ((code) & 0xf0)
@@ -1185,11 +1336,30 @@ struct bpf_zbuf_header {
#define BPF_LSH 0x60
#define BPF_RSH 0x70
#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+/* 0xb0 reserved */
+/* 0xc0 reserved */
+/* 0xd0 reserved */
+/* 0xe0 reserved */
+/* 0xf0 reserved */
+
#define BPF_JA 0x00
#define BPF_JEQ 0x10
#define BPF_JGT 0x20
#define BPF_JGE 0x30
#define BPF_JSET 0x40
+/* 0x50 reserved; used on BSD/OS */
+/* 0x60 reserved */
+/* 0x70 reserved */
+/* 0x80 reserved */
+/* 0x90 reserved */
+/* 0xa0 reserved */
+/* 0xb0 reserved */
+/* 0xc0 reserved */
+/* 0xd0 reserved */
+/* 0xe0 reserved */
+/* 0xf0 reserved */
#define BPF_SRC(code) ((code) & 0x08)
#define BPF_K 0x00
#define BPF_X 0x08
@@ -1197,11 +1367,43 @@ struct bpf_zbuf_header {
/* ret - BPF_K and BPF_X also apply */
#define BPF_RVAL(code) ((code) & 0x18)
#define BPF_A 0x10
+/* 0x18 reserved */
/* misc */
#define BPF_MISCOP(code) ((code) & 0xf8)
#define BPF_TAX 0x00
+/* 0x08 reserved */
+/* 0x10 reserved */
+/* 0x18 reserved */
+/* #define BPF_COP 0x20 NetBSD "coprocessor" extensions */
+/* 0x28 reserved */
+/* 0x30 reserved */
+/* 0x38 reserved */
+/* #define BPF_COPX 0x40 NetBSD "coprocessor" extensions */
+/* also used on BSD/OS */
+/* 0x48 reserved */
+/* 0x50 reserved */
+/* 0x58 reserved */
+/* 0x60 reserved */
+/* 0x68 reserved */
+/* 0x70 reserved */
+/* 0x78 reserved */
#define BPF_TXA 0x80
+/* 0x88 reserved */
+/* 0x90 reserved */
+/* 0x98 reserved */
+/* 0xa0 reserved */
+/* 0xa8 reserved */
+/* 0xb0 reserved */
+/* 0xb8 reserved */
+/* 0xc0 reserved; used on BSD/OS */
+/* 0xc8 reserved */
+/* 0xd0 reserved */
+/* 0xd8 reserved */
+/* 0xe0 reserved */
+/* 0xe8 reserved */
+/* 0xf0 reserved */
+/* 0xf8 reserved */
/*
* The instruction data structure.
@@ -1237,9 +1439,9 @@ SYSCTL_DECL(_net_bpf);
/*
* Rotate the packet buffers in descriptor d. Move the store buffer into the
- * hold slot, and the free buffer ino the store slot. Zero the length of the
- * new store buffer. Descriptor lock should be held. Hold buffer must
- * not be marked "in use".
+ * hold slot, and the free buffer into the store slot. Zero the length of the
+ * new store buffer. Descriptor lock should be held. One must be careful to
+ * not rotate the buffers twice, i.e. if fbuf != NULL.
*/
#define ROTATE_BUFFERS(d) do { \
(d)->bd_hbuf = (d)->bd_sbuf; \
@@ -1252,21 +1454,14 @@ SYSCTL_DECL(_net_bpf);
/*
* Descriptor associated with each attached hardware interface.
- * FIXME: this structure is exposed to external callers to speed up
- * bpf_peers_present() call. However we cover all fields not needed by
- * this function via BPF_INTERNAL define
+ * Part of this structure is exposed to external callers to speed up
+ * bpf_peers_present() calls.
*/
-struct bpf_if {
+struct bpf_if;
+
+struct bpf_if_ext {
LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
-#ifdef BPF_INTERNAL
- u_int bif_dlt; /* link layer type */
- u_int bif_hdrlen; /* length of link header */
- struct ifnet *bif_ifp; /* corresponding interface */
- struct rwlock bif_lock; /* interface lock */
- LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
- int flags; /* Interface flags */
-#endif
};
void bpf_bufheld(struct bpf_d *d);
@@ -1277,6 +1472,9 @@ void bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
void bpfattach(struct ifnet *, u_int, u_int);
void bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
void bpfdetach(struct ifnet *);
+#ifdef VIMAGE
+int bpf_get_bp_params(struct bpf_if *, u_int *, u_int *);
+#endif
void bpfilterattach(int);
u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
@@ -1284,8 +1482,10 @@ u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
static __inline int
bpf_peers_present(struct bpf_if *bpf)
{
+ struct bpf_if_ext *ext;
- if (!LIST_EMPTY(&bpf->bif_dlist))
+ ext = (struct bpf_if_ext *)bpf;
+ if (!LIST_EMPTY(&ext->bif_dlist))
return (1);
return (0);
}
@@ -1313,4 +1513,12 @@ bpf_peers_present(struct bpf_if *bpf)
*/
#define BPF_MEMWORDS 16
+#ifdef _SYS_EVENTHANDLER_H_
+/* BPF attach/detach events */
+struct ifnet;
+typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
+ int /* 1 =>'s attach */);
+EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
+#endif /* _SYS_EVENTHANDLER_H_ */
+
#endif /* _NET_BPF_H_ */
diff --git a/freebsd/sys/net/bpf_buffer.c b/freebsd/sys/net/bpf_buffer.c
index ec6aed74..d42df1b0 100644
--- a/freebsd/sys/net/bpf_buffer.c
+++ b/freebsd/sys/net/bpf_buffer.c
@@ -81,8 +81,6 @@ __FBSDID("$FreeBSD$");
#include <net/bpf_buffer.h>
#include <net/bpfdesc.h>
-#define PRINET 26 /* interruptible */
-
/*
* Implement historical kernel memory buffering model for BPF: two malloc(9)
* kernel buffers are hung off of the descriptor. The size is fixed prior to
@@ -193,9 +191,6 @@ bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
return (EINVAL);
}
- while (d->bd_hbuf_in_use)
- mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
- PRINET, "bd_hbuf", 0);
/* Free old buffers if set */
if (d->bd_fbuf != NULL)
free(d->bd_fbuf, M_BPF);
diff --git a/freebsd/sys/net/bpf_filter.c b/freebsd/sys/net/bpf_filter.c
index a313f4bd..941fa290 100644
--- a/freebsd/sys/net/bpf_filter.c
+++ b/freebsd/sys/net/bpf_filter.c
@@ -41,6 +41,9 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
+#if !defined(_KERNEL)
+#include <strings.h>
+#endif
#if !defined(_KERNEL) || defined(sun)
#include <netinet/in.h>
#endif
@@ -98,7 +101,7 @@ m_xword(struct mbuf *m, bpf_u_int32 k, int *err)
while (k >= len) {
k -= len;
m = m->m_next;
- if (m == 0)
+ if (m == NULL)
goto bad;
len = m->m_len;
}
@@ -108,7 +111,7 @@ m_xword(struct mbuf *m, bpf_u_int32 k, int *err)
return (EXTRACT_LONG(cp));
}
m0 = m->m_next;
- if (m0 == 0 || m0->m_len + len - k < 4)
+ if (m0 == NULL || m0->m_len + len - k < 4)
goto bad;
*err = 0;
np = mtod(m0, u_char *);
@@ -147,7 +150,7 @@ m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err)
while (k >= len) {
k -= len;
m = m->m_next;
- if (m == 0)
+ if (m == NULL)
goto bad;
len = m->m_len;
}
@@ -157,7 +160,7 @@ m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err)
return (EXTRACT_SHORT(cp));
}
m0 = m->m_next;
- if (m0 == 0)
+ if (m0 == NULL)
goto bad;
*err = 0;
return ((cp[0] << 8) | mtod(m0, u_char *)[0]);
diff --git a/freebsd/sys/net/bridgestp.c b/freebsd/sys/net/bridgestp.c
index 167bc59f..5fea7ae7 100644
--- a/freebsd/sys/net/bridgestp.c
+++ b/freebsd/sys/net/bridgestp.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/callout.h>
#include <sys/module.h>
#include <sys/proc.h>
@@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_llc.h>
@@ -236,7 +238,7 @@ bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp)
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
return;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
@@ -350,7 +352,7 @@ bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp,
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
return;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
@@ -789,7 +791,7 @@ bstp_assign_roles(struct bstp_state *bs)
bs->bs_root_htime = bs->bs_bridge_htime;
bs->bs_root_port = NULL;
- /* check if any recieved info supersedes us */
+ /* check if any received info supersedes us */
LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
if (bp->bp_infois != BSTP_INFO_RECEIVED)
continue;
diff --git a/freebsd/sys/net/ethernet.h b/freebsd/sys/net/ethernet.h
index ae7341ee..bc5fa9cb 100644
--- a/freebsd/sys/net/ethernet.h
+++ b/freebsd/sys/net/ethernet.h
@@ -71,6 +71,28 @@ struct ether_addr {
} __packed;
#define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
+#define ETHER_IS_BROADCAST(addr) \
+ (((addr)[0] & (addr)[1] & (addr)[2] & \
+ (addr)[3] & (addr)[4] & (addr)[5]) == 0xff)
+
+/*
+ * 802.1q Virtual LAN header.
+ */
+struct ether_vlan_header {
+ uint8_t evl_dhost[ETHER_ADDR_LEN];
+ uint8_t evl_shost[ETHER_ADDR_LEN];
+ uint16_t evl_encap_proto;
+ uint16_t evl_tag;
+ uint16_t evl_proto;
+} __packed;
+
+#define EVL_VLID_MASK 0x0FFF
+#define EVL_PRI_MASK 0xE000
+#define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK)
+#define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7)
+#define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1)
+#define EVL_MAKETAG(vlid, pri, cfi) \
+ ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
/*
* NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields.
@@ -314,6 +336,7 @@ struct ether_addr {
#define ETHERTYPE_SLOW 0x8809 /* 802.3ad link aggregation (LACP) */
#define ETHERTYPE_PPP 0x880B /* PPP (obsolete by PPPoE) */
#define ETHERTYPE_HITACHI 0x8820 /* Hitachi Cable (Optoelectronic Systems Laboratory) */
+#define ETHERTYPE_TEST 0x8822 /* Network Conformance Testing */
#define ETHERTYPE_MPLS 0x8847 /* MPLS Unicast */
#define ETHERTYPE_MPLS_MCAST 0x8848 /* MPLS Multicast */
#define ETHERTYPE_AXIS 0x8856 /* Axis Communications AB proprietary bootstrap/config */
@@ -375,8 +398,8 @@ extern void ether_demux(struct ifnet *, struct mbuf *);
extern void ether_ifattach(struct ifnet *, const u_int8_t *);
extern void ether_ifdetach(struct ifnet *);
extern int ether_ioctl(struct ifnet *, u_long, caddr_t);
-extern int ether_output(struct ifnet *,
- struct mbuf *, struct sockaddr *, struct route *);
+extern int ether_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
extern int ether_output_frame(struct ifnet *, struct mbuf *);
extern char *ether_sprintf(const u_int8_t *);
void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
diff --git a/freebsd/sys/net/flowtable.h b/freebsd/sys/net/flowtable.h
index d810fa33..5a1d9273 100644
--- a/freebsd/sys/net/flowtable.h
+++ b/freebsd/sys/net/flowtable.h
@@ -1,83 +1,56 @@
-/**************************************************************************
-
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD$
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the BitGravity Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
#ifndef _NET_FLOWTABLE_H_
#define _NET_FLOWTABLE_H_
-#ifdef _KERNEL
-
-#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
-#define FL_PCPU (1<<1) /* pcpu cache */
-#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
-#define FL_IPV6 (1<<9)
-
-#define FL_TCP (1<<11)
-#define FL_SCTP (1<<12)
-#define FL_UDP (1<<13)
-#define FL_DEBUG (1<<14)
-#define FL_DEBUG_ALL (1<<15)
-
-struct flowtable;
-struct flentry;
-struct route;
-struct route_in6;
+struct flowtable_stat {
+ uint64_t ft_collisions;
+ uint64_t ft_misses;
+ uint64_t ft_free_checks;
+ uint64_t ft_frees;
+ uint64_t ft_hits;
+ uint64_t ft_lookups;
+ uint64_t ft_fail_lle_invalid;
+ uint64_t ft_inserts;
+};
-VNET_DECLARE(struct flowtable *, ip_ft);
-#define V_ip_ft VNET(ip_ft)
-
-VNET_DECLARE(struct flowtable *, ip6_ft);
-#define V_ip6_ft VNET(ip6_ft)
-
-struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
+#ifdef _KERNEL
/*
- * Given a flow table, look up the L3 and L2 information and
- * return it in the route.
- *
+ * Given a flow table, look up the L3 and L2 information
+ * and return it in the route.
*/
-struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
-
-struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
- struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
-
-int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
- struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
-
-void flow_invalidate(struct flentry *fl);
-void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
-
-void flow_to_route(struct flentry *fl, struct route *ro);
-
-void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
-
+int flowtable_lookup(sa_family_t, struct mbuf *, struct route *);
+void flowtable_route_flush(sa_family_t, struct rtentry *);
#endif /* _KERNEL */
-#endif
+#endif /* !_NET_FLOWTABLE_H_ */
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 5172ad54..619db8af 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/callout.h>
+#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/malloc.h>
@@ -44,8 +45,10 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
+#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/ethernet.h>
#include <net/if_media.h>
@@ -189,30 +192,37 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *,
static void lacp_dprintf(const struct lacp_port *, const char *, ...)
__attribute__((__format__(__printf__, 2, 3)));
-static int lacp_debug = 0;
-SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN,
- &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)");
-TUNABLE_INT("net.lacp_debug", &lacp_debug);
+static VNET_DEFINE(int, lacp_debug);
+#define V_lacp_debug VNET(lacp_debug)
+SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad");
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
+ &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
-#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; }
-#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); }
+static VNET_DEFINE(int, lacp_default_strict_mode) = 1;
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN,
+ &VNET_NAME(lacp_default_strict_mode), 0,
+ "LACP strict protocol compliance default");
+
+#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
+#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
+#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
/*
* partner administration variables.
* XXX should be configurable.
*/
-static const struct lacp_peerinfo lacp_partner_admin = {
+static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
.lip_systemid = { .lsi_prio = 0xffff },
.lip_portid = { .lpi_prio = 0xffff },
-#if 1
- /* optimistic */
.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
-#else
- /* pessimistic */
+};
+
+static const struct lacp_peerinfo lacp_partner_admin_strict = {
+ .lip_systemid = { .lsi_prio = 0xffff },
+ .lip_portid = { .lpi_prio = 0xffff },
.lip_state = 0,
-#endif
};
static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
@@ -298,11 +308,16 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
goto bad;
}
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu receive\n");
lacp_dump_lacpdu(du);
}
+ if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
+ LACP_TPRINTF((lp, "Dropping RX PDU\n"));
+ goto bad;
+ }
+
LACP_LOCK(lsc);
lacp_sm_rx(lp, du);
LACP_UNLOCK(lsc);
@@ -350,7 +365,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -378,7 +393,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
sizeof(du->ldu_collector));
du->ldu_collector.lci_maxdelay = 0;
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu transmit\n");
lacp_dump_lacpdu(du);
}
@@ -404,7 +419,7 @@ lacp_xmit_marker(struct lacp_port *lp)
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -490,12 +505,14 @@ lacp_tick(void *arg)
if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
continue;
+ CURVNET_SET(lp->lp_ifp->if_vnet);
lacp_run_timers(lp);
lacp_select(lp);
lacp_sm_mux(lp);
lacp_sm_tx(lp);
lacp_sm_ptx_tx_schedule(lp);
+ CURVNET_RESTORE();
}
callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
}
@@ -512,20 +529,17 @@ lacp_port_create(struct lagg_port *lgp)
int error;
boolean_t active = TRUE; /* XXX should be configurable */
- boolean_t fast = FALSE; /* XXX should be configurable */
+ boolean_t fast = FALSE; /* Configurable via ioctl */
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_index = ifp->if_index;
- sdl.sdl_type = IFT_ETHER;
+ link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER);
sdl.sdl_alen = ETHER_ADDR_LEN;
bcopy(&ethermulticastaddr_slowprotocols,
LLADDR(&sdl), ETHER_ADDR_LEN);
error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
if (error) {
- printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
+ printf("%s: ADDMULTI failed on %s\n", __func__,
+ lgp->lp_ifp->if_xname);
return (error);
}
@@ -535,7 +549,7 @@ lacp_port_create(struct lagg_port *lgp)
return (ENOMEM);
LACP_LOCK(lsc);
- lgp->lp_psc = (caddr_t)lp;
+ lgp->lp_psc = lp;
lp->lp_ifp = ifp;
lp->lp_lagg = lgp;
lp->lp_lsc = lsc;
@@ -572,17 +586,18 @@ lacp_port_destroy(struct lagg_port *lgp)
lacp_disable_distributing(lp);
lacp_unselect(lp);
+ LIST_REMOVE(lp, lp_next);
+ LACP_UNLOCK(lsc);
+
/* The address may have already been removed by if_purgemaddrs() */
if (!lgp->lp_detaching)
if_delmulti_ifma(lp->lp_ifma);
- LIST_REMOVE(lp, lp_next);
- LACP_UNLOCK(lsc);
free(lp, M_DEVBUF);
}
void
-lacp_req(struct lagg_softc *sc, caddr_t data)
+lacp_req(struct lagg_softc *sc, void *data)
{
struct lacp_opreq *req = (struct lacp_opreq *)data;
struct lacp_softc *lsc = LACP_SOFTC(sc);
@@ -590,7 +605,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
bzero(req, sizeof(struct lacp_opreq));
- /*
+ /*
* If the LACP softc is NULL, return with the opreq structure full of
* zeros. It is normal for the softc to be NULL while the lagg is
* being destroyed.
@@ -621,7 +636,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
}
void
-lacp_portreq(struct lagg_port *lgp, caddr_t data)
+lacp_portreq(struct lagg_port *lgp, void *data)
{
struct lacp_opreq *req = (struct lacp_opreq *)data;
struct lacp_port *lp = LACP_PORT(lgp);
@@ -665,6 +680,7 @@ lacp_disable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -684,6 +700,7 @@ lacp_disable_distributing(struct lacp_port *lp)
TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
la->la_nports--;
+ sc->sc_active = la->la_nports;
if (lsc->lsc_active_aggregator == la) {
lacp_suppress_distributing(lsc, la);
@@ -700,6 +717,7 @@ lacp_enable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -716,6 +734,7 @@ lacp_enable_distributing(struct lacp_port *lp)
KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
la->la_nports++;
+ sc->sc_active = la->la_nports;
lp->lp_state |= LACP_STATE_DISTRIBUTING;
@@ -734,26 +753,26 @@ lacp_transit_expire(void *vp)
LACP_LOCK_ASSERT(lsc);
+ CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
LACP_TRACE(NULL);
+ CURVNET_RESTORE();
lsc->lsc_suppress_distributing = FALSE;
}
-int
+void
lacp_attach(struct lagg_softc *sc)
{
struct lacp_softc *lsc;
- lsc = malloc(sizeof(struct lacp_softc),
- M_DEVBUF, M_NOWAIT|M_ZERO);
- if (lsc == NULL)
- return (ENOMEM);
+ lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
- sc->sc_psc = (caddr_t)lsc;
+ sc->sc_psc = lsc;
lsc->lsc_softc = sc;
- lsc->lsc_hashkey = arc4random();
+ lsc->lsc_hashkey = m_ether_tcpip_hash_init();
lsc->lsc_active_aggregator = NULL;
+ lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
LACP_LOCK_INIT(lsc);
TAILQ_INIT(&lsc->lsc_aggregators);
LIST_INIT(&lsc->lsc_ports);
@@ -764,27 +783,23 @@ lacp_attach(struct lagg_softc *sc)
/* if the lagg is already up then do the same */
if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
lacp_init(sc);
-
- return (0);
}
-int
-lacp_detach(struct lagg_softc *sc)
+void
+lacp_detach(void *psc)
{
- struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_softc *lsc = (struct lacp_softc *)psc;
KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
("aggregators still active"));
KASSERT(lsc->lsc_active_aggregator == NULL,
("aggregator still attached"));
- sc->sc_psc = NULL;
callout_drain(&lsc->lsc_transit_callout);
callout_drain(&lsc->lsc_callout);
LACP_LOCK_DESTROY(lsc);
free(lsc, M_DEVBUF);
- return (0);
}
void
@@ -827,10 +842,11 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (NULL);
}
- if (sc->use_flowid && (m->m_flags & M_FLOWID))
- hash = m->m_pkthdr.flowid;
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
- hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
+ hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
hash %= pm->pm_count;
lp = pm->pm_map[hash];
@@ -920,7 +936,6 @@ lacp_aggregator_bandwidth(struct lacp_aggregator *la)
static void
lacp_select_active_aggregator(struct lacp_softc *lsc)
{
- struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_aggregator *best_la = NULL;
uint64_t best_speed = 0;
@@ -940,13 +955,13 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
speed, la->la_nports));
- /* This aggregator is chosen if
- * the partner has a better system priority
- * or, the total aggregated speed is higher
- * or, it is already the chosen aggregator
+ /*
+ * This aggregator is chosen if the partner has a better
+ * system priority or, the total aggregated speed is higher
+ * or, it is already the chosen aggregator
*/
if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
- LACP_SYS_PRI(best_la->la_partner)) ||
+ LACP_SYS_PRI(best_la->la_partner)) ||
speed > best_speed ||
(speed == best_speed &&
la == lsc->lsc_active_aggregator)) {
@@ -972,7 +987,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
if (lsc->lsc_active_aggregator != best_la) {
- sc->sc_ifp->if_baudrate = best_speed;
lsc->lsc_active_aggregator = best_la;
lacp_update_portmap(lsc);
if (best_la) {
@@ -988,15 +1002,18 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
static void
lacp_update_portmap(struct lacp_softc *lsc)
{
+ struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_portmap *p;
struct lacp_port *lp;
+ uint64_t speed;
u_int newmap;
int i;
newmap = lsc->lsc_activemap == 0 ? 1 : 0;
p = &lsc->lsc_pmap[newmap];
la = lsc->lsc_active_aggregator;
+ speed = 0;
bzero(p, sizeof(struct lacp_portmap));
if (la != NULL && la->la_nports > 0) {
@@ -1005,7 +1022,9 @@ lacp_update_portmap(struct lacp_softc *lsc)
TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
p->pm_map[i++] = lp;
KASSERT(i == p->pm_count, ("Invalid port count"));
+ speed = lacp_aggregator_bandwidth(la);
}
+ sc->sc_ifp->if_baudrate = speed;
/* switch the active portmap over */
atomic_store_rel_int(&lsc->lsc_activemap, newmap);
@@ -1054,12 +1073,16 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_100_T4:
case IFM_100_VG:
case IFM_100_T2:
+ case IFM_100_T:
key = IFM_100_TX;
break;
case IFM_1000_SX:
case IFM_1000_LX:
case IFM_1000_CX:
case IFM_1000_T:
+ case IFM_1000_KX:
+ case IFM_1000_SGMII:
+ case IFM_1000_CX_SGMII:
key = IFM_1000_SX;
break;
case IFM_10G_LR:
@@ -1069,15 +1092,53 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_10G_TWINAX_LONG:
case IFM_10G_LRM:
case IFM_10G_T:
+ case IFM_10G_KX4:
+ case IFM_10G_KR:
+ case IFM_10G_CR1:
+ case IFM_10G_ER:
+ case IFM_10G_SFI:
key = IFM_10G_LR;
break;
+ case IFM_20G_KR2:
+ key = IFM_20G_KR2;
+ break;
+ case IFM_2500_KX:
+ case IFM_2500_T:
+ key = IFM_2500_KX;
+ break;
+ case IFM_5000_T:
+ key = IFM_5000_T;
+ break;
+ case IFM_50G_PCIE:
+ case IFM_50G_CR2:
+ case IFM_50G_KR2:
+ key = IFM_50G_PCIE;
+ break;
+ case IFM_56G_R4:
+ key = IFM_56G_R4;
+ break;
+ case IFM_25G_PCIE:
+ case IFM_25G_CR:
+ case IFM_25G_KR:
+ case IFM_25G_SR:
+ key = IFM_25G_PCIE;
+ break;
case IFM_40G_CR4:
case IFM_40G_SR4:
case IFM_40G_LR4:
+ case IFM_40G_XLPPI:
+ case IFM_40G_KR4:
key = IFM_40G_CR4;
break;
+ case IFM_100G_CR4:
+ case IFM_100G_SR4:
+ case IFM_100G_KR4:
+ case IFM_100G_LR4:
+ key = IFM_100G_CR4;
+ break;
default:
key = subtype;
+ break;
}
/* bit 5..14: (some bits of) if_index of lagg device */
key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
@@ -1313,6 +1374,8 @@ lacp_unselect(struct lacp_port *lp)
static void
lacp_sm_mux(struct lacp_port *lp)
{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct lagg_softc *sc = lgp->lp_softc;
enum lacp_mux_state new_state;
boolean_t p_sync =
(lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
@@ -1321,8 +1384,10 @@ lacp_sm_mux(struct lacp_port *lp)
enum lacp_selected selected = lp->lp_selected;
struct lacp_aggregator *la;
- if (lacp_debug > 1)
- lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state);
+ if (V_lacp_debug > 1)
+ lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
+ "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
+ lp->lp_mux_state, selected, p_sync, p_collecting);
re_eval:
la = lp->lp_aggregator;
@@ -1362,6 +1427,8 @@ re_eval:
case LACP_MUX_DISTRIBUTING:
if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
new_state = LACP_MUX_COLLECTING;
+ lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
+ sc->sc_flapping++;
}
break;
default:
@@ -1610,6 +1677,10 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
sizeof(buf))));
}
+ /* XXX Hack, still need to implement 5.4.9 para 2,3,4 */
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner.lip_state |= LACP_STATE_SYNC;
+
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1635,7 +1706,10 @@ lacp_sm_rx_record_default(struct lacp_port *lp)
LACP_TRACE(lp);
oldpstate = lp->lp_partner.lip_state;
- lp->lp_partner = lacp_partner_admin;
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner = lacp_partner_admin_strict;
+ else
+ lp->lp_partner = lacp_partner_admin_optimistic;
lp->lp_state |= LACP_STATE_DEFAULTED;
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1670,7 +1744,12 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
LACP_TRACE(lp);
- lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+ if (lp->lp_lsc->lsc_strict_mode)
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_strict);
+ else
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_optimistic);
}
/* transmit machine */
@@ -1678,7 +1757,7 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
static void
lacp_sm_tx(struct lacp_port *lp)
{
- int error;
+ int error = 0;
if (!(lp->lp_state & LACP_STATE_AGGREGATION)
#if 1
@@ -1700,7 +1779,11 @@ lacp_sm_tx(struct lacp_port *lp)
return;
}
- error = lacp_xmit_lacpdu(lp);
+ if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
+ error = lacp_xmit_lacpdu(lp);
+ } else {
+ LACP_TPRINTF((lp, "Dropping TX PDU\n"));
+ }
if (error == 0) {
lp->lp_flags &= ~LACP_PORT_NTT;
diff --git a/freebsd/sys/net/ieee8023ad_lacp.h b/freebsd/sys/net/ieee8023ad_lacp.h
index 9cebc591..8f0f51a7 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.h
+++ b/freebsd/sys/net/ieee8023ad_lacp.h
@@ -75,6 +75,7 @@
"\007DEFAULTED" \
"\010EXPIRED"
+#ifdef _KERNEL
/*
* IEEE802.3 slow protocols
*
@@ -245,6 +246,12 @@ struct lacp_softc {
struct lacp_portmap lsc_pmap[2];
volatile u_int lsc_activemap;
u_int32_t lsc_hashkey;
+ struct {
+ u_int32_t lsc_rx_test;
+ u_int32_t lsc_tx_test;
+ } lsc_debug;
+ u_int32_t lsc_strict_mode;
+ boolean_t lsc_fast_timeout; /* if set, fast timeout */
};
#define LACP_TYPE_ACTORINFO 1
@@ -277,15 +284,15 @@ struct lacp_softc {
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-int lacp_attach(struct lagg_softc *);
-int lacp_detach(struct lagg_softc *);
+void lacp_attach(struct lagg_softc *);
+void lacp_detach(void *);
void lacp_init(struct lagg_softc *);
void lacp_stop(struct lagg_softc *);
int lacp_port_create(struct lagg_port *);
void lacp_port_destroy(struct lagg_port *);
void lacp_linkstate(struct lagg_port *);
-void lacp_req(struct lagg_softc *, caddr_t);
-void lacp_portreq(struct lagg_port *, caddr_t);
+void lacp_req(struct lagg_softc *, void *);
+void lacp_portreq(struct lagg_port *, void *);
static __inline int
lacp_isactive(struct lagg_port *lgp)
@@ -331,3 +338,4 @@ lacp_isdistributing(struct lagg_port *lgp)
#define LACP_LAGIDSTR_MAX \
(1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1)
#define LACP_STATESTR_MAX (255) /* XXX */
+#endif /* _KERNEL */
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index 2c638a37..8bfa9e21 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -65,12 +65,16 @@
#include <machine/stdarg.h>
#include <vm/uma.h>
+#include <net/bpf.h>
+#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_var.h>
+#include <net/if_media.h>
+#include <net/if_vlan_var.h>
#include <net/radix.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -97,14 +101,9 @@
#include <compat/freebsd32/freebsd32.h>
#endif
-struct ifindex_entry {
- struct ifnet *ife_ifnet;
-};
-
SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
-TUNABLE_INT("net.link.ifqmaxlen", &ifqmaxlen);
SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN,
&ifqmaxlen, 0, "max send queue size");
@@ -115,6 +114,13 @@ SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
&log_link_state_change, 0,
"log interface link state change events");
+/* Log promiscuous mode change events */
+static int log_promisc_mode_change = 1;
+
+SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN,
+ &log_promisc_mode_change, 1,
+ "log promiscuous mode change events");
+
/* Interface description */
static unsigned int ifdescr_maxlen = 1024;
SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
@@ -132,18 +138,22 @@ void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
/* These are external hooks for CARP. */
void (*carp_linkstate_p)(struct ifnet *ifp);
+void (*carp_demote_adj_p)(int, char *);
+int (*carp_master_p)(struct ifaddr *);
#if defined(INET) || defined(INET6)
-struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
+int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *sa, struct rtentry *rt);
+ const struct sockaddr *sa);
+int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+int (*carp_attach_p)(struct ifaddr *, int);
+void (*carp_detach_p)(struct ifaddr *);
#endif
#ifdef INET
-int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
- u_int8_t **);
+int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#endif
#ifdef INET6
struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
-caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
+caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
const struct in6_addr *taddr);
#endif
@@ -158,23 +168,25 @@ static void if_attachdomain(void *);
static void if_attachdomain1(struct ifnet *);
static int ifconf(u_long, caddr_t);
static void if_freemulti(struct ifmultiaddr *);
-static void if_init(void *);
static void if_grow(void);
static void if_input_default(struct ifnet *, struct mbuf *);
+static int if_requestencap_default(struct ifnet *, struct if_encap_req *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-static int if_rtdel(struct radix_node *, void *);
static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
static void do_link_state_change(void *, int);
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
static int if_getgroupmembers(struct ifgroupreq *);
static void if_delgroups(struct ifnet *);
-static void if_attach_internal(struct ifnet *, int);
-static void if_detach_internal(struct ifnet *, int);
+static void if_attach_internal(struct ifnet *, int, struct if_clone *);
+static int if_detach_internal(struct ifnet *, int, struct if_clone **);
+#ifdef VIMAGE
+static void if_vmove(struct ifnet *, struct vnet *);
+#endif
#ifdef INET6
/*
@@ -184,6 +196,10 @@ static void if_detach_internal(struct ifnet *, int);
extern void nd6_setmtu(struct ifnet *);
#endif
+/* ipsec helper hooks */
+VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
+VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
+
VNET_DEFINE(int, if_index);
int ifqmaxlen = IFQ_MAXLEN;
VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
@@ -192,7 +208,7 @@ VNET_DEFINE(struct ifgrouphead, ifg_head);
static VNET_DEFINE(int, if_indexlim) = 8;
/* Table of ifnet by index. */
-VNET_DEFINE(struct ifindex_entry *, ifindex_table);
+VNET_DEFINE(struct ifnet **, ifindex_table);
#define V_if_indexlim VNET(if_indexlim)
#define V_ifindex_table VNET(ifindex_table)
@@ -207,7 +223,9 @@ VNET_DEFINE(struct ifindex_entry *, ifindex_table);
* inversions and deadlocks.
*/
struct rwlock ifnet_rwlock;
+RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE);
struct sx ifnet_sxlock;
+SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE);
/*
* The allocation of network interfaces is a rather non-atomic affair; we
@@ -229,9 +247,9 @@ ifnet_byindex_locked(u_short idx)
if (idx > V_if_index)
return (NULL);
- if (V_ifindex_table[idx].ife_ifnet == IFNET_HOLD)
+ if (V_ifindex_table[idx] == IFNET_HOLD)
return (NULL);
- return (V_ifindex_table[idx].ife_ifnet);
+ return (V_ifindex_table[idx]);
}
struct ifnet *
@@ -265,34 +283,30 @@ ifnet_byindex_ref(u_short idx)
* Allocate an ifindex array entry; return 0 on success or an error on
* failure.
*/
-static int
-ifindex_alloc_locked(u_short *idxp)
+static u_short
+ifindex_alloc(void)
{
u_short idx;
IFNET_WLOCK_ASSERT();
-
retry:
/*
* Try to find an empty slot below V_if_index. If we fail, take the
* next slot.
*/
for (idx = 1; idx <= V_if_index; idx++) {
- if (V_ifindex_table[idx].ife_ifnet == NULL)
+ if (V_ifindex_table[idx] == NULL)
break;
}
/* Catch if_index overflow. */
- if (idx < 1)
- return (ENOSPC);
if (idx >= V_if_indexlim) {
if_grow();
goto retry;
}
if (idx > V_if_index)
V_if_index = idx;
- *idxp = idx;
- return (0);
+ return (idx);
}
static void
@@ -301,9 +315,9 @@ ifindex_free_locked(u_short idx)
IFNET_WLOCK_ASSERT();
- V_ifindex_table[idx].ife_ifnet = NULL;
+ V_ifindex_table[idx] = NULL;
while (V_if_index > 0 &&
- V_ifindex_table[V_if_index].ife_ifnet == NULL)
+ V_ifindex_table[V_if_index] == NULL)
V_if_index--;
}
@@ -322,7 +336,7 @@ ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp)
IFNET_WLOCK_ASSERT();
- V_ifindex_table[idx].ife_ifnet = ifp;
+ V_ifindex_table[idx] = ifp;
}
static void
@@ -337,11 +351,12 @@ ifnet_setbyindex(u_short idx, struct ifnet *ifp)
struct ifaddr *
ifaddr_byindex(u_short idx)
{
- struct ifaddr *ifa;
+ struct ifnet *ifp;
+ struct ifaddr *ifa = NULL;
IFNET_RLOCK_NOSLEEP();
- ifa = ifnet_byindex_locked(idx)->if_addr;
- if (ifa != NULL)
+ ifp = ifnet_byindex_locked(idx);
+ if (ifp != NULL && (ifa = ifp->if_addr) != NULL)
ifa_ref(ifa);
IFNET_RUNLOCK_NOSLEEP();
return (ifa);
@@ -368,17 +383,6 @@ vnet_if_init(const void *unused __unused)
VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init,
NULL);
-/* ARGSUSED*/
-static void
-if_init(void *dummy __unused)
-{
-
- IFNET_LOCK_INIT();
- if_clone_init();
-}
-SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
-
-
#ifdef VIMAGE
static void
vnet_if_uninit(const void *unused __unused)
@@ -393,6 +397,20 @@ vnet_if_uninit(const void *unused __unused)
}
VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
vnet_if_uninit, NULL);
+
+static void
+vnet_if_return(const void *unused __unused)
+{
+ struct ifnet *ifp, *nifp;
+
+ /* Return all inherited interfaces to their parent vnets. */
+ TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
+ if (ifp->if_home_vnet != ifp->if_vnet)
+ if_vmove(ifp, ifp->if_home_vnet);
+ }
+}
+VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
+ vnet_if_return, NULL);
#endif
static void
@@ -400,7 +418,7 @@ if_grow(void)
{
int oldlim;
u_int n;
- struct ifindex_entry *e;
+ struct ifnet **e;
IFNET_WLOCK_ASSERT();
oldlim = V_if_indexlim;
@@ -433,16 +451,15 @@ if_alloc(u_char type)
ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
IFNET_WLOCK();
- if (ifindex_alloc_locked(&idx) != 0) {
- IFNET_WUNLOCK();
- free(ifp, M_IFNET);
- return (NULL);
- }
+ idx = ifindex_alloc();
ifnet_setbyindex_locked(idx, IFNET_HOLD);
IFNET_WUNLOCK();
ifp->if_index = idx;
ifp->if_type = type;
ifp->if_alloctype = type;
+#ifdef VIMAGE
+ ifp->if_vnet = curvnet;
+#endif
if (if_com_alloc[type] != NULL) {
ifp->if_l2com = if_com_alloc[type](type, ifp);
if (ifp->if_l2com == NULL) {
@@ -457,7 +474,6 @@ if_alloc(u_char type)
ifp->if_afdata_initialized = 0;
IF_AFDATA_LOCK_INIT(ifp);
TAILQ_INIT(&ifp->if_addrhead);
- TAILQ_INIT(&ifp->if_prefixhead);
TAILQ_INIT(&ifp->if_multiaddrs);
TAILQ_INIT(&ifp->if_groups);
#ifdef MAC
@@ -466,6 +482,9 @@ if_alloc(u_char type)
ifq_init(&ifp->if_snd, ifp);
refcount_init(&ifp->if_refcount, 1); /* Index reference. */
+ for (int i = 0; i < IFCOUNTERS; i++)
+ ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
+ ifp->if_get_counter = if_get_counter_default;
ifnet_setbyindex(ifp->if_index, ifp);
return (ifp);
}
@@ -494,23 +513,20 @@ if_free_internal(struct ifnet *ifp)
IF_AFDATA_DESTROY(ifp);
IF_ADDR_LOCK_DESTROY(ifp);
ifq_delete(&ifp->if_snd);
+
+ for (int i = 0; i < IFCOUNTERS; i++)
+ counter_u64_free(ifp->if_counters[i]);
+
free(ifp, M_IFNET);
}
/*
- * This version should only be called by intefaces that switch their type
- * after calling if_alloc(). if_free_type() will go away again now that we
- * have if_alloctype to cache the original allocation type. For now, assert
- * that they match, since we require that in practice.
+ * Deregister an interface and free the associated storage.
*/
void
-if_free_type(struct ifnet *ifp, u_char type)
+if_free(struct ifnet *ifp)
{
- KASSERT(ifp->if_alloctype == type,
- ("if_free_type: type (%d) != alloctype (%d)", type,
- ifp->if_alloctype));
-
ifp->if_flags |= IFF_DYING; /* XXX: Locking */
CURVNET_SET_QUIET(ifp->if_vnet);
@@ -527,18 +543,6 @@ if_free_type(struct ifnet *ifp, u_char type)
}
/*
- * This is the normal version of if_free(), used by device drivers to free a
- * detached network interface. The contents of if_free_type() will move into
- * here when if_free_type() goes away.
- */
-void
-if_free(struct ifnet *ifp)
-{
-
- if_free_type(ifp, ifp->if_alloctype);
-}
-
-/*
* Interfaces to keep an ifnet type-stable despite the possibility of the
* driver calling if_free(). If there are additional references, we defer
* freeing the underlying data structure.
@@ -583,12 +587,21 @@ ifq_delete(struct ifaltq *ifq)
}
/*
- * Perform generic interface initalization tasks and attach the interface
+ * Perform generic interface initialization tasks and attach the interface
* to the list of "active" interfaces. If vmove flag is set on entry
* to if_attach_internal(), perform only a limited subset of initialization
* tasks, given that we are moving from one vnet to another an ifnet which
* has already been fully initialized.
*
+ * Note that if_detach_internal() removes group membership unconditionally
+ * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL.
+ * Thus, when if_vmove() is applied to a cloned interface, group membership
+ * is lost while a cloned one always joins a group whose name is
+ * ifc->ifc_name. To recover this after if_detach_internal() and
+ * if_attach_internal(), the cloner should be specified to
+ * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal()
+ * attempts to join a group whose name is ifc->ifc_name.
+ *
* XXX:
* - The decision to return void and thus require this function to
* succeed is questionable.
@@ -599,14 +612,14 @@ void
if_attach(struct ifnet *ifp)
{
- if_attach_internal(ifp, 0);
+ if_attach_internal(ifp, 0, NULL);
}
/*
* Compute the least common TSO limit.
*/
void
-if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax)
{
/*
* 1) If there is no limit currently, take the limit from
@@ -635,7 +648,7 @@ if_hw_tsomax_common(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
* Returns zero if no change. Else non-zero.
*/
int
-if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
+if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax)
{
int retval = 0;
if (ifp->if_hw_tsomax != pmax->tsomaxbytes) {
@@ -654,7 +667,7 @@ if_hw_tsomax_update(struct ifnet *ifp, struct ifnet_hw_tsomax *pmax)
}
static void
-if_attach_internal(struct ifnet *ifp, int vmove)
+if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
{
unsigned socksize, ifasize;
int namelen, masklen;
@@ -673,9 +686,12 @@ if_attach_internal(struct ifnet *ifp, int vmove)
if_addgroup(ifp, IFG_ALL);
+ /* Restore group membership for cloned interfaces. */
+ if (vmove && ifc != NULL)
+ if_clone_addgroup(ifp, ifc);
+
getmicrotime(&ifp->if_lastchange);
- ifp->if_data.ifi_epoch = time_uptime;
- ifp->if_data.ifi_datalen = sizeof(struct if_data);
+ ifp->if_epoch = time_uptime;
KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
(ifp->if_transmit != NULL && ifp->if_qflush != NULL),
@@ -687,6 +703,9 @@ if_attach_internal(struct ifnet *ifp, int vmove)
if (ifp->if_input == NULL)
ifp->if_input = if_input_default;
+ if (ifp->if_requestencap == NULL)
+ ifp->if_requestencap = if_requestencap_default;
+
if (!vmove) {
#ifdef MAC
mac_ifnet_create(ifp);
@@ -706,8 +725,7 @@ if_attach_internal(struct ifnet *ifp, int vmove)
socksize = sizeof(*sdl);
socksize = roundup2(socksize, sizeof(long));
ifasize = sizeof(*ifa) + 2 * socksize;
- ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
- ifa_init(ifa);
+ ifa = ifa_alloc(ifasize, M_WAITOK);
sdl = (struct sockaddr_dl *)(ifa + 1);
sdl->sdl_len = socksize;
sdl->sdl_family = AF_LINK;
@@ -792,12 +810,9 @@ static void
if_attachdomain(void *dummy)
{
struct ifnet *ifp;
- int s;
- s = splnet();
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
if_attachdomain1(ifp);
- splx(s);
}
SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
if_attachdomain, NULL);
@@ -806,23 +821,16 @@ static void
if_attachdomain1(struct ifnet *ifp)
{
struct domain *dp;
- int s;
-
- s = splnet();
/*
* Since dp->dom_ifattach calls malloc() with M_WAITOK, we
* cannot lock ifp->if_afdata initialization, entirely.
*/
- if (IF_AFDATA_TRYLOCK(ifp) == 0) {
- splx(s);
- return;
- }
+ IF_AFDATA_LOCK(ifp);
if (ifp->if_afdata_initialized >= domain_init_status) {
IF_AFDATA_UNLOCK(ifp);
- splx(s);
- printf("if_attachdomain called more than once on %s\n",
- ifp->if_xname);
+ log(LOG_WARNING, "%s called more than once on %s\n",
+ __func__, ifp->if_xname);
return;
}
ifp->if_afdata_initialized = domain_init_status;
@@ -835,8 +843,6 @@ if_attachdomain1(struct ifnet *ifp)
ifp->if_afdata[dp->dom_family] =
(*dp->dom_ifattach)(ifp);
}
-
- splx(s);
}
/*
@@ -847,6 +853,7 @@ if_purgeaddrs(struct ifnet *ifp)
{
struct ifaddr *ifa, *next;
+ /* XXX cannot hold IF_ADDR_WLOCK over called functions. */
TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family == AF_LINK)
continue;
@@ -871,7 +878,9 @@ if_purgeaddrs(struct ifnet *ifp)
continue;
}
#endif /* INET6 */
+ IF_ADDR_WLOCK(ifp);
TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
ifa_free(ifa);
}
}
@@ -906,20 +915,34 @@ if_detach(struct ifnet *ifp)
{
CURVNET_SET_QUIET(ifp->if_vnet);
- if_detach_internal(ifp, 0);
+ if_detach_internal(ifp, 0, NULL);
CURVNET_RESTORE();
}
-static void
-if_detach_internal(struct ifnet *ifp, int vmove)
+/*
+ * The vmove flag, if set, indicates that we are called from a callpath
+ * that is moving an interface to a different vnet instance.
+ *
+ * The shutdown flag, if set, indicates that we are called in the
+ * process of shutting down a vnet instance. Currently only the
+ * vnet_if_return SYSUNINIT function sets it. Note: we can be called
+ * on a vnet instance shutdown without this flag being set, e.g., when
+ * the cloned interfaces are destoyed as first thing of teardown.
+ */
+static int
+if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp)
{
struct ifaddr *ifa;
- struct radix_node_head *rnh;
- int i, j;
+ int i;
struct domain *dp;
struct ifnet *iter;
int found = 0;
+#ifdef VIMAGE
+ int shutdown;
+ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+ ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+#endif
IFNET_WLOCK();
TAILQ_FOREACH(iter, &V_ifnet, if_link)
if (iter == ifp) {
@@ -927,28 +950,77 @@ if_detach_internal(struct ifnet *ifp, int vmove)
found = 1;
break;
}
-#ifdef VIMAGE
- if (found)
- curvnet->vnet_ifcnt--;
-#endif
IFNET_WUNLOCK();
if (!found) {
+ /*
+ * While we would want to panic here, we cannot
+ * guarantee that the interface is indeed still on
+ * the list given we don't hold locks all the way.
+ */
+ return (ENOENT);
+#if 0
if (vmove)
panic("%s: ifp=%p not on the ifnet tailq %p",
__func__, ifp, &V_ifnet);
else
return; /* XXX this should panic as well? */
+#endif
}
/*
- * Remove/wait for pending events.
+ * At this point we know the interface still was on the ifnet list
+ * and we removed it so we are in a stable state.
*/
+#ifdef VIMAGE
+ curvnet->vnet_ifcnt--;
+#endif
+
+ /*
+ * In any case (destroy or vmove) detach us from the groups
+ * and remove/wait for pending events on the taskq.
+ * XXX-BZ in theory an interface could still enqueue a taskq change?
+ */
+ if_delgroups(ifp);
+
taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
/*
- * Remove routes and flush queues.
+ * Check if this is a cloned interface or not. Must do even if
+ * shutting down as a if_vmove_reclaim() would move the ifp and
+ * the if_clone_addgroup() will have a corrupted string overwise
+ * from a gibberish pointer.
*/
+ if (vmove && ifcp != NULL)
+ *ifcp = if_clone_findifc(ifp);
+
if_down(ifp);
+
+#ifdef VIMAGE
+ /*
+ * On VNET shutdown abort here as the stack teardown will do all
+ * the work top-down for us.
+ */
+ if (shutdown) {
+ /*
+ * In case of a vmove we are done here without error.
+ * If we would signal an error it would lead to the same
+ * abort as if we did not find the ifnet anymore.
+ * if_detach() calls us in void context and does not care
+ * about an early abort notification, so life is splendid :)
+ */
+ goto finish_vnet_shutdown;
+ }
+#endif
+
+ /*
+ * At this point we are not tearing down a VNET and are either
+ * going to destroy or vmove the interface and have to cleanup
+ * accordingly.
+ */
+
+ /*
+ * Remove routes and flush queues.
+ */
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd))
altq_disable(&ifp->if_snd);
@@ -973,6 +1045,12 @@ if_detach_internal(struct ifnet *ifp, int vmove)
#endif
if_purgemaddrs(ifp);
+ /* Announce that the interface is gone. */
+ rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
+ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
+
if (!vmove) {
/*
* Prevent further calls into the device driver via ifnet.
@@ -986,37 +1064,21 @@ if_detach_internal(struct ifnet *ifp, int vmove)
ifp->if_addr = NULL;
/* We can now free link ifaddr. */
+ IF_ADDR_WLOCK(ifp);
if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
ifa = TAILQ_FIRST(&ifp->if_addrhead);
TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
ifa_free(ifa);
- }
- }
-
- /*
- * Delete all remaining routes using this interface
- * Unfortuneatly the only way to do this is to slog through
- * the entire routing table looking for routes which point
- * to this interface...oh well...
- */
- for (i = 1; i <= AF_MAX; i++) {
- for (j = 0; j < rt_numfibs; j++) {
- rnh = rt_tables_get_rnh(j, i);
- if (rnh == NULL)
- continue;
- RADIX_NODE_HEAD_LOCK(rnh);
- (void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
+ } else
+ IF_ADDR_WUNLOCK(ifp);
}
- /* Announce that the interface is gone. */
- rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
- EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
- if (IS_DEFAULT_VNET(curvnet))
- devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
- if_delgroups(ifp);
+ rt_flushifroutes(ifp);
+#ifdef VIMAGE
+finish_vnet_shutdown:
+#endif
/*
* We cannot hold the lock over dom_ifdetach calls as they might
* sleep, for example trying to drain a callout, thus open up the
@@ -1027,10 +1089,14 @@ if_detach_internal(struct ifnet *ifp, int vmove)
ifp->if_afdata_initialized = 0;
IF_AFDATA_UNLOCK(ifp);
for (dp = domains; i > 0 && dp; dp = dp->dom_next) {
- if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
+ if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) {
(*dp->dom_ifdetach)(ifp,
ifp->if_afdata[dp->dom_family]);
+ ifp->if_afdata[dp->dom_family] = NULL;
+ }
}
+
+ return (0);
}
#ifdef VIMAGE
@@ -1041,16 +1107,28 @@ if_detach_internal(struct ifnet *ifp, int vmove)
* unused if_index in target vnet and calls if_grow() if necessary,
* and finally find an unused if_xname for the target vnet.
*/
-void
+static void
if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
{
- u_short idx;
+ struct if_clone *ifc;
+ u_int bif_dlt, bif_hdrlen;
+ int rc;
+
+ /*
+ * if_detach_internal() will call the eventhandler to notify
+ * interface departure. That will detach if_bpf. We need to
+ * safe the dlt and hdrlen so we can re-attach it later.
+ */
+ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
/*
* Detach from current vnet, but preserve LLADDR info, do not
* mark as dead etc. so that the ifnet can be reattached later.
+ * If we cannot find it, we lost the race to someone else.
*/
- if_detach_internal(ifp, 1);
+ rc = if_detach_internal(ifp, 1, &ifc);
+ if (rc != 0)
+ return;
/*
* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -1076,15 +1154,14 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
CURVNET_SET_QUIET(new_vnet);
IFNET_WLOCK();
- if (ifindex_alloc_locked(&idx) != 0) {
- IFNET_WUNLOCK();
- panic("if_index overflow");
- }
- ifp->if_index = idx;
+ ifp->if_index = ifindex_alloc();
ifnet_setbyindex_locked(ifp->if_index, ifp);
IFNET_WUNLOCK();
- if_attach_internal(ifp, 1);
+ if_attach_internal(ifp, 1, ifc);
+
+ if (ifp->if_bpf == NULL)
+ bpfattach(ifp, bif_dlt, bif_hdrlen);
CURVNET_RESTORE();
}
@@ -1097,6 +1174,7 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
{
struct prison *pr;
struct ifnet *difp;
+ int shutdown;
/* Try to find the prison within our visibility. */
sx_slock(&allprison_lock);
@@ -1117,12 +1195,22 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid)
/* XXX Lock interfaces to avoid races. */
CURVNET_SET_QUIET(pr->pr_vnet);
difp = ifunit(ifname);
- CURVNET_RESTORE();
if (difp != NULL) {
+ CURVNET_RESTORE();
prison_free(pr);
return (EEXIST);
}
+ /* Make sure the VNET is stable. */
+ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+ ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+ if (shutdown) {
+ CURVNET_RESTORE();
+ prison_free(pr);
+ return (EBUSY);
+ }
+ CURVNET_RESTORE();
+
/* Move the interface into the child jail/vnet. */
if_vmove(ifp, pr->pr_vnet);
@@ -1139,6 +1227,7 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid)
struct prison *pr;
struct vnet *vnet_dst;
struct ifnet *ifp;
+ int shutdown;
/* Try to find the prison within our visibility. */
sx_slock(&allprison_lock);
@@ -1166,6 +1255,15 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid)
return (EEXIST);
}
+ /* Make sure the VNET is stable. */
+ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET &&
+ ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+ if (shutdown) {
+ CURVNET_RESTORE();
+ prison_free(pr);
+ return (EBUSY);
+ }
+
/* Get interface back from child jail/vnet. */
if_vmove(ifp, vnet_dst);
CURVNET_RESTORE();
@@ -1187,6 +1285,7 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
struct ifg_list *ifgl;
struct ifg_group *ifg = NULL;
struct ifg_member *ifgm;
+ int new = 0;
if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
groupname[strlen(groupname) - 1] <= '9')
@@ -1227,8 +1326,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
ifg->ifg_refcnt = 0;
TAILQ_INIT(&ifg->ifg_members);
- EVENTHANDLER_INVOKE(group_attach_event, ifg);
TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
+ new = 1;
}
ifg->ifg_refcnt++;
@@ -1242,6 +1341,8 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
IFNET_WUNLOCK();
+ if (new)
+ EVENTHANDLER_INVOKE(group_attach_event, ifg);
EVENTHANDLER_INVOKE(group_change_event, groupname);
return (0);
@@ -1280,10 +1381,11 @@ if_delgroup(struct ifnet *ifp, const char *groupname)
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ IFNET_WUNLOCK();
EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
free(ifgl->ifgl_group, M_TEMP);
- }
- IFNET_WUNLOCK();
+ } else
+ IFNET_WUNLOCK();
free(ifgl, M_TEMP);
@@ -1324,11 +1426,12 @@ if_delgroups(struct ifnet *ifp)
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
+ IFNET_WUNLOCK();
EVENTHANDLER_INVOKE(group_detach_event,
ifgl->ifgl_group);
free(ifgl->ifgl_group, M_TEMP);
- }
- IFNET_WUNLOCK();
+ } else
+ IFNET_WUNLOCK();
free(ifgl, M_TEMP);
@@ -1434,46 +1537,63 @@ if_getgroupmembers(struct ifgroupreq *data)
}
/*
- * Delete Routes for a Network Interface
- *
- * Called for each routing entry via the rnh->rnh_walktree() call above
- * to delete all route entries referencing a detaching network interface.
- *
- * Arguments:
- * rn pointer to node in the routing table
- * arg argument passed to rnh->rnh_walktree() - detaching interface
- *
- * Returns:
- * 0 successful
- * errno failed - reason indicated
- *
+ * Return counter values from counter(9)s stored in ifnet.
*/
-static int
-if_rtdel(struct radix_node *rn, void *arg)
+uint64_t
+if_get_counter_default(struct ifnet *ifp, ift_counter cnt)
{
- struct rtentry *rt = (struct rtentry *)rn;
- struct ifnet *ifp = arg;
- int err;
- if (rt->rt_ifp == ifp) {
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
- /*
- * Protect (sorta) against walktree recursion problems
- * with cloned routes
- */
- if ((rt->rt_flags & RTF_UP) == 0)
- return (0);
+ return (counter_u64_fetch(ifp->if_counters[cnt]));
+}
- err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
- rt_mask(rt),
- rt->rt_flags|RTF_RNH_LOCKED|RTF_PINNED,
- (struct rtentry **) NULL, rt->rt_fibnum);
- if (err) {
- log(LOG_WARNING, "if_rtdel: error %d\n", err);
- }
- }
+/*
+ * Increase an ifnet counter. Usually used for counters shared
+ * between the stack and a driver, but function supports them all.
+ */
+void
+if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc)
+{
- return (0);
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+
+ counter_u64_add(ifp->if_counters[cnt], inc);
+}
+
+/*
+ * Copy data from ifnet to userland API structure if_data.
+ */
+void
+if_data_copy(struct ifnet *ifp, struct if_data *ifd)
+{
+
+ ifd->ifi_type = ifp->if_type;
+ ifd->ifi_physical = 0;
+ ifd->ifi_addrlen = ifp->if_addrlen;
+ ifd->ifi_hdrlen = ifp->if_hdrlen;
+ ifd->ifi_link_state = ifp->if_link_state;
+ ifd->ifi_vhid = 0;
+ ifd->ifi_datalen = sizeof(struct if_data);
+ ifd->ifi_mtu = ifp->if_mtu;
+ ifd->ifi_metric = ifp->if_metric;
+ ifd->ifi_baudrate = ifp->if_baudrate;
+ ifd->ifi_hwassist = ifp->if_hwassist;
+ ifd->ifi_epoch = ifp->if_epoch;
+ ifd->ifi_lastchange = ifp->if_lastchange;
+
+ ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
+ ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
+ ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
+ ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
+ ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS);
+ ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
+ ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
+ ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
+ ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS);
+ ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
+ ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
+ ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
}
/*
@@ -1497,28 +1617,56 @@ if_addr_runlock(struct ifnet *ifp)
}
void
-if_maddr_rlock(struct ifnet *ifp)
+if_maddr_rlock(if_t ifp)
{
- IF_ADDR_RLOCK(ifp);
+ IF_ADDR_RLOCK((struct ifnet *)ifp);
}
void
-if_maddr_runlock(struct ifnet *ifp)
+if_maddr_runlock(if_t ifp)
{
- IF_ADDR_RUNLOCK(ifp);
+ IF_ADDR_RUNLOCK((struct ifnet *)ifp);
}
/*
- * Reference count functions for ifaddrs.
+ * Initialization, destruction and refcounting functions for ifaddrs.
*/
-void
-ifa_init(struct ifaddr *ifa)
+struct ifaddr *
+ifa_alloc(size_t size, int flags)
{
+ struct ifaddr *ifa;
+
+ KASSERT(size >= sizeof(struct ifaddr),
+ ("%s: invalid size %zu", __func__, size));
+
+ ifa = malloc(size, M_IFADDR, M_ZERO | flags);
+ if (ifa == NULL)
+ return (NULL);
+
+ if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
+ goto fail;
+ if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
+ goto fail;
+ if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
+ goto fail;
+ if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
+ goto fail;
- mtx_init(&ifa->ifa_mtx, "ifaddr", NULL, MTX_DEF);
refcount_init(&ifa->ifa_refcnt, 1);
+
+ return (ifa);
+
+fail:
+ /* free(NULL) is okay */
+ counter_u64_free(ifa->ifa_opackets);
+ counter_u64_free(ifa->ifa_ipackets);
+ counter_u64_free(ifa->ifa_obytes);
+ counter_u64_free(ifa->ifa_ibytes);
+ free(ifa, M_IFADDR);
+
+ return (NULL);
}
void
@@ -1533,62 +1681,61 @@ ifa_free(struct ifaddr *ifa)
{
if (refcount_release(&ifa->ifa_refcnt)) {
- mtx_destroy(&ifa->ifa_mtx);
+ counter_u64_free(ifa->ifa_opackets);
+ counter_u64_free(ifa->ifa_ipackets);
+ counter_u64_free(ifa->ifa_obytes);
+ counter_u64_free(ifa->ifa_ibytes);
free(ifa, M_IFADDR);
}
}
-int
-ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+static int
+ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
+ struct sockaddr *ia)
{
- int error = 0;
- struct rtentry *rt = NULL;
+ int error;
struct rt_addrinfo info;
- static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+ struct sockaddr_dl null_sdl;
+ struct ifnet *ifp;
+
+ ifp = ifa->ifa_ifp;
bzero(&info, sizeof(info));
- info.rti_ifp = V_loif;
+ if (cmd != RTM_DELETE)
+ info.rti_ifp = V_loif;
info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
info.rti_info[RTAX_DST] = ia;
info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
- error = rtrequest1_fib(RTM_ADD, &info, &rt, ifa->ifa_ifp->if_fib);
-
- if (error == 0 && rt != NULL) {
- RT_LOCK(rt);
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
- ifa->ifa_ifp->if_type;
- ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
- ifa->ifa_ifp->if_index;
- RT_REMREF(rt);
- RT_UNLOCK(rt);
- } else if (error != 0)
- log(LOG_INFO, "ifa_add_loopback_route: insertion failed\n");
+ link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
+
+ error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
+
+ if (error != 0)
+ log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
+ __func__, otype, if_name(ifp), error);
return (error);
}
int
+ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
+
+ return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia));
+}
+
+int
ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
{
- int error = 0;
- struct rt_addrinfo info;
- struct sockaddr_dl null_sdl;
- bzero(&null_sdl, sizeof(null_sdl));
- null_sdl.sdl_len = sizeof(null_sdl);
- null_sdl.sdl_family = AF_LINK;
- null_sdl.sdl_type = ifa->ifa_ifp->if_type;
- null_sdl.sdl_index = ifa->ifa_ifp->if_index;
- bzero(&info, sizeof(info));
- info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
- info.rti_info[RTAX_DST] = ia;
- info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
- error = rtrequest1_fib(RTM_DELETE, &info, NULL, ifa->ifa_ifp->if_fib);
+ return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia));
+}
- if (error != 0)
- log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+int
+ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
+{
- return (error);
+ return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia));
}
/*
@@ -1597,22 +1744,19 @@ ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
* to perform a different comparison.
*/
-#define sa_equal(a1, a2) \
- (bcmp((a1), (a2), ((a1))->sa_len) == 0)
-
#define sa_dl_equal(a1, a2) \
- ((((struct sockaddr_dl *)(a1))->sdl_len == \
- ((struct sockaddr_dl *)(a2))->sdl_len) && \
- (bcmp(LLADDR((struct sockaddr_dl *)(a1)), \
- LLADDR((struct sockaddr_dl *)(a2)), \
- ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
+ ((((const struct sockaddr_dl *)(a1))->sdl_len == \
+ ((const struct sockaddr_dl *)(a2))->sdl_len) && \
+ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \
+ CLLADDR((const struct sockaddr_dl *)(a2)), \
+ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
/*
* Locate an interface based on a complete address.
*/
/*ARGSUSED*/
static struct ifaddr *
-ifa_ifwithaddr_internal(struct sockaddr *addr, int getref)
+ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1649,14 +1793,14 @@ done:
}
struct ifaddr *
-ifa_ifwithaddr(struct sockaddr *addr)
+ifa_ifwithaddr(const struct sockaddr *addr)
{
return (ifa_ifwithaddr_internal(addr, 1));
}
int
-ifa_ifwithaddr_check(struct sockaddr *addr)
+ifa_ifwithaddr_check(const struct sockaddr *addr)
{
return (ifa_ifwithaddr_internal(addr, 0) != NULL);
@@ -1667,13 +1811,15 @@ ifa_ifwithaddr_check(struct sockaddr *addr)
*/
/* ARGSUSED */
struct ifaddr *
-ifa_ifwithbroadaddr(struct sockaddr *addr)
+ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
+ continue;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
@@ -1700,7 +1846,7 @@ done:
*/
/*ARGSUSED*/
struct ifaddr *
-ifa_ifwithdstaddr_fib(struct sockaddr *addr, int fibnum)
+ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1730,32 +1876,25 @@ done:
return (ifa);
}
-struct ifaddr *
-ifa_ifwithdstaddr(struct sockaddr *addr)
-{
-
- return (ifa_ifwithdstaddr_fib(addr, RT_ALL_FIBS));
-}
-
/*
* Find an interface on a specific network. If many, choice
* is most specific found.
*/
struct ifaddr *
-ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
+ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
struct ifaddr *ifa_maybe = NULL;
u_int af = addr->sa_family;
- char *addr_data = addr->sa_data, *cplim;
+ const char *addr_data = addr->sa_data, *cplim;
/*
* AF_LINK addresses can be looked up directly by their index number,
* so do that if we can.
*/
if (af == AF_LINK) {
- struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+ const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
return (ifaddr_byindex(sdl->sdl_index));
}
@@ -1772,7 +1911,7 @@ ifa_ifwithnet_fib(struct sockaddr *addr, int ignore_ptp, int fibnum)
continue;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- char *cp, *cp2, *cp3;
+ const char *cp, *cp2, *cp3;
if (ifa->ifa_addr->sa_family != af)
next: continue;
@@ -1794,19 +1933,6 @@ next: continue;
}
} else {
/*
- * if we have a special address handler,
- * then use it instead of the generic one.
- */
- if (ifa->ifa_claim_addr) {
- if ((*ifa->ifa_claim_addr)(ifa, addr)) {
- ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
- goto done;
- }
- continue;
- }
-
- /*
* Scan all the bits in the ifa's address.
* If a bit dissagrees with what we are
* looking for, mask it with the netmask
@@ -1826,11 +1952,13 @@ next: continue;
/*
* If the netmask of what we just found
* is more specific than what we had before
- * (if we had one) then remember the new one
- * before continuing to search
- * for an even better one.
+ * (if we had one), or if the virtual status
+ * of new prefix is better than of the old one,
+ * then remember the new one before continuing
+ * to search for an even better one.
*/
if (ifa_maybe == NULL ||
+ ifa_preferred(ifa_maybe, ifa) ||
rn_refines((caddr_t)ifa->ifa_netmask,
(caddr_t)ifa_maybe->ifa_netmask)) {
if (ifa_maybe != NULL)
@@ -1851,22 +1979,15 @@ done:
return (ifa);
}
-struct ifaddr *
-ifa_ifwithnet(struct sockaddr *addr, int ignore_ptp)
-{
-
- return (ifa_ifwithnet_fib(addr, ignore_ptp, RT_ALL_FIBS));
-}
-
/*
* Find an interface address specific to an interface best matching
* a given address.
*/
struct ifaddr *
-ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
+ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
{
struct ifaddr *ifa;
- char *cp, *cp2, *cp3;
+ const char *cp, *cp2, *cp3;
char *cplim;
struct ifaddr *ifa_maybe = NULL;
u_int af = addr->sa_family;
@@ -1909,6 +2030,21 @@ done:
return (ifa);
}
+/*
+ * See whether new ifa is better than current one:
+ * 1) A non-virtual one is preferred over virtual.
+ * 2) A virtual in master state preferred over any other state.
+ *
+ * Used in several address selecting functions.
+ */
+int
+ifa_preferred(struct ifaddr *cur, struct ifaddr *next)
+{
+
+ return (cur->ifa_carp && (!next->ifa_carp ||
+ ((*carp_master_p)(next) && !(*carp_master_p)(cur))));
+}
+
#include <net/if_llatbl.h>
/*
@@ -1923,10 +2059,8 @@ link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
struct sockaddr *dst;
struct ifnet *ifp;
- RT_LOCK_ASSERT(rt);
-
- if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
- ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
+ if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
+ ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL))
return;
ifa = ifaof_ifpforaddr(dst, ifp);
if (ifa) {
@@ -1938,10 +2072,41 @@ link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
}
}
+struct sockaddr_dl *
+link_alloc_sdl(size_t size, int flags)
+{
+
+ return (malloc(size, M_TEMP, flags));
+}
+
+void
+link_free_sdl(struct sockaddr *sa)
+{
+ free(sa, M_TEMP);
+}
+
+/*
+ * Fills in given sdl with interface basic info.
+ * Returns pointer to filled sdl.
+ */
+struct sockaddr_dl *
+link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype)
+{
+ struct sockaddr_dl *sdl;
+
+ sdl = (struct sockaddr_dl *)paddr;
+ memset(sdl, 0, sizeof(struct sockaddr_dl));
+ sdl->sdl_len = sizeof(struct sockaddr_dl);
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = iftype;
+
+ return (sdl);
+}
+
/*
* Mark an interface down and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
static void
if_unroute(struct ifnet *ifp, int flag, int fam)
@@ -1965,7 +2130,6 @@ if_unroute(struct ifnet *ifp, int flag, int fam)
/*
* Mark an interface up and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
static void
if_route(struct ifnet *ifp, int flag, int fam)
@@ -2026,7 +2190,7 @@ do_link_state_change(void *arg, int pending)
(*vlan_link_state_p)(ifp);
if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
- IFP2AC(ifp)->ac_netgraph != NULL)
+ ifp->if_l2com != NULL)
(*ng_ether_link_state_p)(ifp, link_state);
if (ifp->if_carp)
(*carp_linkstate_p)(ifp);
@@ -2051,7 +2215,6 @@ do_link_state_change(void *arg, int pending)
/*
* Mark an interface down and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
void
if_down(struct ifnet *ifp)
@@ -2063,7 +2226,6 @@ if_down(struct ifnet *ifp)
/*
* Mark an interface up and notify protocols of
* the transition.
- * NOTE: must be called at splnet or eqivalent.
*/
void
if_up(struct ifnet *ifp)
@@ -2088,8 +2250,8 @@ if_qflush(struct ifnet *ifp)
ALTQ_PURGE(ifq);
#endif
n = ifq->ifq_head;
- while ((m = n) != 0) {
- n = m->m_act;
+ while ((m = n) != NULL) {
+ n = m->m_nextpkt;
m_freem(m);
}
ifq->ifq_head = 0;
@@ -2140,7 +2302,6 @@ static int
ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
{
struct ifreq *ifr;
- struct ifstat *ifs;
int error = 0;
int new_flags, temp_flags;
size_t namelen, onamelen;
@@ -2182,7 +2343,8 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
break;
case SIOCGIFPHYS:
- ifr->ifr_phys = ifp->if_physical;
+ /* XXXGL: did this ever worked? */
+ ifr->ifr_phys = 0;
break;
case SIOCGIFDESCR:
@@ -2262,18 +2424,12 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
*/
new_flags = (ifr->ifr_flags & 0xffff) |
(ifr->ifr_flagshigh << 16);
- if (ifp->if_flags & IFF_SMART) {
- /* Smart drivers twiddle their own routes */
- } else if (ifp->if_flags & IFF_UP &&
+ if (ifp->if_flags & IFF_UP &&
(new_flags & IFF_UP) == 0) {
- int s = splimp();
if_down(ifp);
- splx(s);
} else if (new_flags & IFF_UP &&
(ifp->if_flags & IFF_UP) == 0) {
- int s = splimp();
if_up(ifp);
- splx(s);
}
/* See if permanently promiscuous mode bit is about to flip */
if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
@@ -2281,9 +2437,11 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
ifp->if_flags |= IFF_PROMISC;
else if (ifp->if_pcount == 0)
ifp->if_flags &= ~IFF_PROMISC;
- log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
- ifp->if_xname,
- (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
+ if (log_promisc_mode_change)
+ log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
+ ifp->if_xname,
+ ((new_flags & IFF_PPROMISC) ?
+ "enabled" : "disabled"));
}
ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
(new_flags &~ IFF_CANTCHANGE);
@@ -2321,6 +2479,11 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
if (new_name[0] == '\0')
return (EINVAL);
+ if (new_name[IFNAMSIZ-1] != '\0') {
+ new_name[IFNAMSIZ-1] = '\0';
+ if (strlen(new_name) == IFNAMSIZ-1)
+ return (EINVAL);
+ }
if (ifunit(new_name) != NULL)
return (EEXIST);
@@ -2339,9 +2502,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
log(LOG_INFO, "%s: changing name to '%s'\n",
ifp->if_xname, new_name);
+ IF_ADDR_WLOCK(ifp);
strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
ifa = ifp->if_addr;
- IFA_LOCK(ifa);
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
namelen = strlen(new_name);
onamelen = sdl->sdl_nlen;
@@ -2360,7 +2523,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
bzero(sdl->sdl_data, onamelen);
while (namelen != 0)
sdl->sdl_data[--namelen] = 0xff;
- IFA_UNLOCK(ifa);
+ IF_ADDR_WUNLOCK(ifp);
EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
/* Announce the return of the interface. */
@@ -2420,6 +2583,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
#ifdef INET6
nd6_setmtu(ifp);
#endif
+ rt_updatemtu(ifp);
}
break;
}
@@ -2470,7 +2634,6 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
#endif
- case SIOCSLIFPHYADDR:
case SIOCSIFMEDIA:
case SIOCSIFGENERIC:
error = priv_check(td, PRIV_NET_HWIOCTL);
@@ -2484,13 +2647,10 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
break;
case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- ifs->ascii[0] = '\0';
-
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
- case SIOCGLIFPHYADDR:
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
case SIOCGIFGENERIC:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
@@ -2503,7 +2663,6 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
error = if_setlladdr(ifp,
ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
- EVENTHANDLER_INVOKE(iflladdr_event, ifp);
break;
case SIOCAIFGROUP:
@@ -2542,6 +2701,9 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
return (error);
}
+/* COMPAT_SVR4 */
+#define OSIOCGIFCONF _IOWR('i', 20, struct ifconf)
+
#ifdef COMPAT_FREEBSD32
struct ifconf32 {
int32_t ifc_len;
@@ -2563,11 +2725,25 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
struct ifreq *ifr;
int error;
int oif_flags;
+#ifdef VIMAGE
+ int shutdown;
+#endif
CURVNET_SET(so->so_vnet);
+#ifdef VIMAGE
+ /* Make sure the VNET is stable. */
+ shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET &&
+ so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0;
+ if (shutdown) {
+ CURVNET_RESTORE();
+ return (EBUSY);
+ }
+#endif
+
+
switch (cmd) {
case SIOCGIFCONF:
- case OSIOCGIFCONF:
+ case OSIOCGIFCONF: /* COMPAT_SVR4 */
error = ifconf(cmd, data);
CURVNET_RESTORE();
return (error);
@@ -2626,6 +2802,16 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
error = if_getgroupmembers((struct ifgroupreq *)data);
CURVNET_RESTORE();
return (error);
+#if defined(INET) || defined(INET6)
+ case SIOCSVH:
+ case SIOCGVH:
+ if (carp_ioctl_p == NULL)
+ error = EPROTONOSUPPORT;
+ else
+ error = (*carp_ioctl_p)(ifr, cmd, td);
+ CURVNET_RESTORE();
+ return (error);
+#endif
}
ifp = ifunit_ref(ifr->ifr_name);
@@ -2657,79 +2843,17 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
* layer, and do not perform any credentials checks or input
* validation.
*/
-#ifndef COMPAT_43
- error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
- data,
- ifp, td));
+ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data,
+ ifp, td));
if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL &&
cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
error = (*ifp->if_ioctl)(ifp, cmd, data);
-#else
- {
- u_long ocmd = cmd;
-
- switch (cmd) {
-
- case SIOCSIFDSTADDR:
- case SIOCSIFADDR:
- case SIOCSIFBRDADDR:
- case SIOCSIFNETMASK:
-#if BYTE_ORDER != BIG_ENDIAN
- if (ifr->ifr_addr.sa_family == 0 &&
- ifr->ifr_addr.sa_len < 16) {
- ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
- ifr->ifr_addr.sa_len = 16;
- }
-#else
- if (ifr->ifr_addr.sa_len == 0)
- ifr->ifr_addr.sa_len = 16;
-#endif
- break;
-
- case OSIOCGIFADDR:
- cmd = SIOCGIFADDR;
- break;
-
- case OSIOCGIFDSTADDR:
- cmd = SIOCGIFDSTADDR;
- break;
-
- case OSIOCGIFBRDADDR:
- cmd = SIOCGIFBRDADDR;
- break;
-
- case OSIOCGIFNETMASK:
- cmd = SIOCGIFNETMASK;
- }
- error = ((*so->so_proto->pr_usrreqs->pru_control)(so,
- cmd,
- data,
- ifp, td));
- if (error == EOPNOTSUPP && ifp != NULL &&
- ifp->if_ioctl != NULL &&
- cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR &&
- cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK)
- error = (*ifp->if_ioctl)(ifp, cmd, data);
- switch (ocmd) {
-
- case OSIOCGIFADDR:
- case OSIOCGIFDSTADDR:
- case OSIOCGIFBRDADDR:
- case OSIOCGIFNETMASK:
- *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
-
- }
- }
-#endif /* COMPAT_43 */
if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
#ifdef INET6
- if (ifp->if_flags & IFF_UP) {
- int s = splimp();
+ if (ifp->if_flags & IFF_UP)
in6_if_up(ifp);
- splx(s);
- }
#endif
}
if_rele(ifp);
@@ -2825,7 +2949,8 @@ ifpromisc(struct ifnet *ifp, int pswitch)
error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
&ifp->if_pcount, pswitch);
/* If promiscuous mode status has changed, log a message */
- if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
+ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) &&
+ log_promisc_mode_change)
log(LOG_INFO, "%s: promiscuous mode %s\n",
ifp->if_xname,
(ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
@@ -2890,16 +3015,15 @@ again:
if (prison_if(curthread->td_ucred, sa) != 0)
continue;
addrs++;
-#ifdef COMPAT_43
+ /* COMPAT_SVR4 */
if (cmd == OSIOCGIFCONF) {
struct osockaddr *osa =
- (struct osockaddr *)&ifr.ifr_addr;
+ (struct osockaddr *)&ifr.ifr_addr;
ifr.ifr_addr = *sa;
osa->sa_family = sa->sa_family;
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
} else
-#endif
if (sa->sa_len <= sizeof(*sa)) {
ifr.ifr_addr = *sa;
sbuf_bcat(sb, &ifr, sizeof(ifr));
@@ -2955,7 +3079,7 @@ if_allmulti(struct ifnet *ifp, int onswitch)
}
struct ifmultiaddr *
-if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
+if_findmulti(struct ifnet *ifp, const struct sockaddr *sa)
{
struct ifmultiaddr *ifma;
@@ -3034,8 +3158,6 @@ if_freemulti(struct ifmultiaddr *ifma)
KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
ifma->ifma_refcount));
- KASSERT(ifma->ifma_protospec == NULL,
- ("if_freemulti: protospec not NULL"));
if (ifma->ifma_lladdr != NULL)
free(ifma->ifma_lladdr, M_IFMADDR);
@@ -3067,6 +3189,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
{
struct ifmultiaddr *ifma, *ll_ifma;
struct sockaddr *llsa;
+ struct sockaddr_dl sdl;
int error;
/*
@@ -3086,12 +3209,18 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
/*
* The address isn't already present; resolve the protocol address
* into a link layer address, and then look that up, bump its
- * refcount or allocate an ifma for that also. If 'llsa' was
- * returned, we will need to free it later.
+ * refcount or allocate an ifma for that also.
+ * Most link layer resolving functions returns address data which
+ * fits inside default sockaddr_dl structure. However callback
+ * can allocate another sockaddr structure, in that case we need to
+ * free it later.
*/
llsa = NULL;
ll_ifma = NULL;
if (ifp->if_resolvemulti != NULL) {
+ /* Provide called function with buffer size information */
+ sdl.sdl_len = sizeof(sdl);
+ llsa = (struct sockaddr *)&sdl;
error = ifp->if_resolvemulti(ifp, &llsa, sa);
if (error)
goto unlock_out;
@@ -3155,14 +3284,14 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
}
- if (llsa != NULL)
- free(llsa, M_IFMADDR);
+ if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
+ link_free_sdl(llsa);
return (0);
free_llsa_out:
- if (llsa != NULL)
- free(llsa, M_IFMADDR);
+ if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl))
+ link_free_sdl(llsa);
unlock_out:
IF_ADDR_WUNLOCK(ifp);
@@ -3363,8 +3492,10 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
*
* At this time we only support certain types of interfaces,
* and we don't allow the length of the address to change.
+ *
+ * Set noinline to be dtrace-friendly
*/
-int
+__noinline int
if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
{
struct sockaddr_dl *sdl;
@@ -3422,17 +3553,45 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
ifr.ifr_flagshigh = ifp->if_flags >> 16;
(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
}
-#ifdef INET
- /*
- * Also send gratuitous ARPs to notify other nodes about
- * the address change.
- */
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family == AF_INET)
- arp_ifinit(ifp, ifa);
- }
-#endif
}
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ return (0);
+}
+
+/*
+ * Compat function for handling basic encapsulation requests.
+ * Not converted stacks (FDDI, IB, ..) supports traditional
+ * output model: ARP (and other similar L2 protocols) are handled
+ * inside output routine, arpresolve/nd6_resolve() returns MAC
+ * address instead of full prepend.
+ *
+ * This function creates calculated header==MAC for IPv4/IPv6 and
+ * returns EAFNOSUPPORT (which is then handled in ARP code) for other
+ * address families.
+ */
+static int
+if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
+{
+
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
+
+ if (req->bufsize < req->lladdr_len)
+ return (ENOMEM);
+
+ switch (req->family) {
+ case AF_INET:
+ case AF_INET6:
+ break;
+ default:
+ return (EAFNOSUPPORT);
+ }
+
+ /* Copy lladdr to storage as is */
+ memmove(req->buf, req->lladdr, req->lladdr_len);
+ req->bufsize = req->lladdr_len;
+ req->lladdr_off = 0;
+
return (0);
}
@@ -3500,15 +3659,15 @@ if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
IF_LOCK(ifq);
if (_IF_QFULL(ifq)) {
- _IF_DROP(ifq);
IF_UNLOCK(ifq);
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
m_freem(m);
return (0);
}
if (ifp != NULL) {
- ifp->if_obytes += m->m_pkthdr.len + adjust;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust);
if (m->m_flags & (M_BCAST|M_MCAST))
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
}
_IF_ENQUEUE(ifq, m);
@@ -3543,3 +3702,465 @@ if_deregister_com_alloc(u_char type)
if_com_alloc[type] = NULL;
if_com_free[type] = NULL;
}
+
+/* API for driver access to network stack owned ifnet.*/
+uint64_t
+if_setbaudrate(struct ifnet *ifp, uint64_t baudrate)
+{
+ uint64_t oldbrate;
+
+ oldbrate = ifp->if_baudrate;
+ ifp->if_baudrate = baudrate;
+ return (oldbrate);
+}
+
+uint64_t
+if_getbaudrate(if_t ifp)
+{
+
+ return (((struct ifnet *)ifp)->if_baudrate);
+}
+
+int
+if_setcapabilities(if_t ifp, int capabilities)
+{
+ ((struct ifnet *)ifp)->if_capabilities = capabilities;
+ return (0);
+}
+
+int
+if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit)
+{
+ ((struct ifnet *)ifp)->if_capabilities |= setbit;
+ ((struct ifnet *)ifp)->if_capabilities &= ~clearbit;
+
+ return (0);
+}
+
+int
+if_getcapabilities(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_capabilities;
+}
+
+int
+if_setcapenable(if_t ifp, int capabilities)
+{
+ ((struct ifnet *)ifp)->if_capenable = capabilities;
+ return (0);
+}
+
+int
+if_setcapenablebit(if_t ifp, int setcap, int clearcap)
+{
+ if(setcap)
+ ((struct ifnet *)ifp)->if_capenable |= setcap;
+ if(clearcap)
+ ((struct ifnet *)ifp)->if_capenable &= ~clearcap;
+
+ return (0);
+}
+
+const char *
+if_getdname(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_dname;
+}
+
+int
+if_togglecapenable(if_t ifp, int togglecap)
+{
+ ((struct ifnet *)ifp)->if_capenable ^= togglecap;
+ return (0);
+}
+
+int
+if_getcapenable(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_capenable;
+}
+
+/*
+ * This is largely undesirable because it ties ifnet to a device, but does
+ * provide flexiblity for an embedded product vendor. Should be used with
+ * the understanding that it violates the interface boundaries, and should be
+ * a last resort only.
+ */
+int
+if_setdev(if_t ifp, void *dev)
+{
+ return (0);
+}
+
+int
+if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags)
+{
+ ((struct ifnet *)ifp)->if_drv_flags |= set_flags;
+ ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags;
+
+ return (0);
+}
+
+int
+if_getdrvflags(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_drv_flags;
+}
+
+int
+if_setdrvflags(if_t ifp, int flags)
+{
+ ((struct ifnet *)ifp)->if_drv_flags = flags;
+ return (0);
+}
+
+
+int
+if_setflags(if_t ifp, int flags)
+{
+ ((struct ifnet *)ifp)->if_flags = flags;
+ return (0);
+}
+
+int
+if_setflagbits(if_t ifp, int set, int clear)
+{
+ ((struct ifnet *)ifp)->if_flags |= set;
+ ((struct ifnet *)ifp)->if_flags &= ~clear;
+
+ return (0);
+}
+
+int
+if_getflags(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_flags;
+}
+
+int
+if_clearhwassist(if_t ifp)
+{
+ ((struct ifnet *)ifp)->if_hwassist = 0;
+ return (0);
+}
+
+int
+if_sethwassistbits(if_t ifp, int toset, int toclear)
+{
+ ((struct ifnet *)ifp)->if_hwassist |= toset;
+ ((struct ifnet *)ifp)->if_hwassist &= ~toclear;
+
+ return (0);
+}
+
+int
+if_sethwassist(if_t ifp, int hwassist_bit)
+{
+ ((struct ifnet *)ifp)->if_hwassist = hwassist_bit;
+ return (0);
+}
+
+int
+if_gethwassist(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_hwassist;
+}
+
+int
+if_setmtu(if_t ifp, int mtu)
+{
+ ((struct ifnet *)ifp)->if_mtu = mtu;
+ return (0);
+}
+
+int
+if_getmtu(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_mtu;
+}
+
+int
+if_getmtu_family(if_t ifp, int family)
+{
+ struct domain *dp;
+
+ for (dp = domains; dp; dp = dp->dom_next) {
+ if (dp->dom_family == family && dp->dom_ifmtu != NULL)
+ return (dp->dom_ifmtu((struct ifnet *)ifp));
+ }
+
+ return (((struct ifnet *)ifp)->if_mtu);
+}
+
+int
+if_setsoftc(if_t ifp, void *softc)
+{
+ ((struct ifnet *)ifp)->if_softc = softc;
+ return (0);
+}
+
+void *
+if_getsoftc(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_softc;
+}
+
+void
+if_setrcvif(struct mbuf *m, if_t ifp)
+{
+ m->m_pkthdr.rcvif = (struct ifnet *)ifp;
+}
+
+void
+if_setvtag(struct mbuf *m, uint16_t tag)
+{
+ m->m_pkthdr.ether_vtag = tag;
+}
+
+uint16_t
+if_getvtag(struct mbuf *m)
+{
+
+ return (m->m_pkthdr.ether_vtag);
+}
+
+int
+if_sendq_empty(if_t ifp)
+{
+ return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd);
+}
+
+struct ifaddr *
+if_getifaddr(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_addr;
+}
+
+int
+if_getamcount(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_amcount;
+}
+
+
+int
+if_setsendqready(if_t ifp)
+{
+ IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd);
+ return (0);
+}
+
+int
+if_setsendqlen(if_t ifp, int tx_desc_count)
+{
+ IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count);
+ ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count;
+
+ return (0);
+}
+
+int
+if_vlantrunkinuse(if_t ifp)
+{
+ return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0;
+}
+
+int
+if_input(if_t ifp, struct mbuf* sendmp)
+{
+ (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp);
+ return (0);
+
+}
+
+/* XXX */
+#ifndef ETH_ADDR_LEN
+#define ETH_ADDR_LEN 6
+#endif
+
+int
+if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max)
+{
+ struct ifmultiaddr *ifma;
+ uint8_t *lmta = (uint8_t *)mta;
+ int mcnt = 0;
+
+ TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+
+ if (mcnt == max)
+ break;
+
+ bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
+ &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
+ mcnt++;
+ }
+ *cnt = mcnt;
+
+ return (0);
+}
+
+int
+if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max)
+{
+ int error;
+
+ if_maddr_rlock(ifp);
+ error = if_setupmultiaddr(ifp, mta, cnt, max);
+ if_maddr_runlock(ifp);
+ return (error);
+}
+
+int
+if_multiaddr_count(if_t ifp, int max)
+{
+ struct ifmultiaddr *ifma;
+ int count;
+
+ count = 0;
+ if_maddr_rlock(ifp);
+ TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ count++;
+ if (count == max)
+ break;
+ }
+ if_maddr_runlock(ifp);
+ return (count);
+}
+
+int
+if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
+{
+ struct ifmultiaddr *ifma;
+ int cnt = 0;
+
+ if_maddr_rlock(ifp);
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
+ cnt += filter(arg, ifma, cnt);
+ if_maddr_runlock(ifp);
+ return (cnt);
+}
+
+struct mbuf *
+if_dequeue(if_t ifp)
+{
+ struct mbuf *m;
+ IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m);
+
+ return (m);
+}
+
+int
+if_sendq_prepend(if_t ifp, struct mbuf *m)
+{
+ IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m);
+ return (0);
+}
+
+int
+if_setifheaderlen(if_t ifp, int len)
+{
+ ((struct ifnet *)ifp)->if_hdrlen = len;
+ return (0);
+}
+
+caddr_t
+if_getlladdr(if_t ifp)
+{
+ return (IF_LLADDR((struct ifnet *)ifp));
+}
+
+void *
+if_gethandle(u_char type)
+{
+ return (if_alloc(type));
+}
+
+void
+if_bpfmtap(if_t ifh, struct mbuf *m)
+{
+ struct ifnet *ifp = (struct ifnet *)ifh;
+
+ BPF_MTAP(ifp, m);
+}
+
+void
+if_etherbpfmtap(if_t ifh, struct mbuf *m)
+{
+ struct ifnet *ifp = (struct ifnet *)ifh;
+
+ ETHER_BPF_MTAP(ifp, m);
+}
+
+void
+if_vlancap(if_t ifh)
+{
+ struct ifnet *ifp = (struct ifnet *)ifh;
+ VLAN_CAPABILITIES(ifp);
+}
+
+void
+if_setinitfn(if_t ifp, void (*init_fn)(void *))
+{
+ ((struct ifnet *)ifp)->if_init = init_fn;
+}
+
+void
+if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t))
+{
+ ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn;
+}
+
+void
+if_setstartfn(if_t ifp, void (*start_fn)(if_t))
+{
+ ((struct ifnet *)ifp)->if_start = (void *)start_fn;
+}
+
+void
+if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn)
+{
+ ((struct ifnet *)ifp)->if_transmit = start_fn;
+}
+
+void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn)
+{
+ ((struct ifnet *)ifp)->if_qflush = flush_fn;
+
+}
+
+void
+if_setgetcounterfn(if_t ifp, if_get_counter_t fn)
+{
+
+ ifp->if_get_counter = fn;
+}
+
+/* Revisit these - These are inline functions originally. */
+int
+drbr_inuse_drv(if_t ifh, struct buf_ring *br)
+{
+ return drbr_inuse(ifh, br);
+}
+
+struct mbuf*
+drbr_dequeue_drv(if_t ifh, struct buf_ring *br)
+{
+ return drbr_dequeue(ifh, br);
+}
+
+int
+drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br)
+{
+ return drbr_needs_enqueue(ifh, br);
+}
+
+int
+drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m)
+{
+ return drbr_enqueue(ifh, br, m);
+
+}
diff --git a/freebsd/sys/net/if.h b/freebsd/sys/net/if.h
index e182db54..98ae0a82 100644
--- a/freebsd/sys/net/if.h
+++ b/freebsd/sys/net/if.h
@@ -35,10 +35,6 @@
#include <sys/cdefs.h>
-#ifdef _KERNEL
-#include <sys/queue.h>
-#endif
-
#if __BSD_VISIBLE
/*
* <net/if.h> does not depend on <sys/time.h> on most other systems. This
@@ -49,8 +45,6 @@
#include <sys/time.h>
#include <sys/socket.h>
#endif
-
-struct ifnet;
#endif
/*
@@ -80,32 +74,45 @@ struct if_clonereq {
*/
struct if_data {
/* generic interface information */
- u_char ifi_type; /* ethernet, tokenring, etc */
- u_char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */
- u_char ifi_addrlen; /* media address length */
- u_char ifi_hdrlen; /* media header length */
- u_char ifi_link_state; /* current link state */
- u_char ifi_spare_char1; /* spare byte */
- u_char ifi_spare_char2; /* spare byte */
- u_char ifi_datalen; /* length of this data struct */
- u_long ifi_mtu; /* maximum transmission unit */
- u_long ifi_metric; /* routing metric (external only) */
- u_long ifi_baudrate; /* linespeed */
+ uint8_t ifi_type; /* ethernet, tokenring, etc */
+ uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */
+ uint8_t ifi_addrlen; /* media address length */
+ uint8_t ifi_hdrlen; /* media header length */
+ uint8_t ifi_link_state; /* current link state */
+ uint8_t ifi_vhid; /* carp vhid */
+ uint16_t ifi_datalen; /* length of this data struct */
+ uint32_t ifi_mtu; /* maximum transmission unit */
+ uint32_t ifi_metric; /* routing metric (external only) */
+ uint64_t ifi_baudrate; /* linespeed */
/* volatile statistics */
- u_long ifi_ipackets; /* packets received on interface */
- u_long ifi_ierrors; /* input errors on interface */
- u_long ifi_opackets; /* packets sent on interface */
- u_long ifi_oerrors; /* output errors on interface */
- u_long ifi_collisions; /* collisions on csma interfaces */
- u_long ifi_ibytes; /* total number of octets received */
- u_long ifi_obytes; /* total number of octets sent */
- u_long ifi_imcasts; /* packets received via multicast */
- u_long ifi_omcasts; /* packets sent via multicast */
- u_long ifi_iqdrops; /* dropped on input, this interface */
- u_long ifi_noproto; /* destined for unsupported protocol */
- u_long ifi_hwassist; /* HW offload capabilities, see IFCAP */
- time_t ifi_epoch; /* uptime at attach or stat reset */
- struct timeval ifi_lastchange; /* time of last administrative change */
+ uint64_t ifi_ipackets; /* packets received on interface */
+ uint64_t ifi_ierrors; /* input errors on interface */
+ uint64_t ifi_opackets; /* packets sent on interface */
+ uint64_t ifi_oerrors; /* output errors on interface */
+ uint64_t ifi_collisions; /* collisions on csma interfaces */
+ uint64_t ifi_ibytes; /* total number of octets received */
+ uint64_t ifi_obytes; /* total number of octets sent */
+ uint64_t ifi_imcasts; /* packets received via multicast */
+ uint64_t ifi_omcasts; /* packets sent via multicast */
+ uint64_t ifi_iqdrops; /* dropped on input */
+ uint64_t ifi_oqdrops; /* dropped on output */
+ uint64_t ifi_noproto; /* destined for unsupported protocol */
+ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */
+
+ /* Unions are here to make sizes MI. */
+ union { /* uptime at attach or stat reset */
+ time_t tt;
+ uint64_t ph;
+ } __ifi_epoch;
+#define ifi_epoch __ifi_epoch.tt
+ union { /* time of last administrative change */
+ struct timeval tv;
+ struct {
+ uint64_t ph1;
+ uint64_t ph2;
+ } ph;
+ } __ifi_lastchange;
+#define ifi_lastchange __ifi_lastchange.tv
};
/*-
@@ -135,7 +142,7 @@ struct if_data {
#define IFF_DEBUG 0x4 /* (n) turn on debugging */
#define IFF_LOOPBACK 0x8 /* (i) is a loopback net */
#define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */
-#define IFF_SMART 0x20 /* (i) interface manages own routes */
+/* 0x20 was IFF_SMART */
#define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */
#define IFF_NOARP 0x80 /* (n) no address resolution protocol */
#define IFF_PROMISC 0x100 /* (n) receive all packets */
@@ -153,7 +160,6 @@ struct if_data {
#define IFF_STATICARP 0x80000 /* (n) static ARP */
#define IFF_DYING 0x200000 /* (n) interface is winding down */
#define IFF_RENAMING 0x400000 /* (n) interface is being renamed */
-
/*
* Old names for driver flags so that user space tools can continue to use
* the old (portable) names.
@@ -166,7 +172,7 @@ struct if_data {
/* flags set internally only: */
#define IFF_CANTCHANGE \
(IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\
- IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_SMART|IFF_PROMISC|\
+ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\
IFF_DYING|IFF_CANTCONFIG)
/*
@@ -180,7 +186,7 @@ struct if_data {
* Some convenience macros used for setting ifi_baudrate.
* XXX 1000 vs. 1024? --thorpej@netbsd.org
*/
-#define IF_Kbps(x) ((x) * 1000) /* kilobits/sec. */
+#define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */
#define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */
#define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */
@@ -232,6 +238,7 @@ struct if_data {
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
#define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */
#define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */
+#define IFCAP_HWSTATS 0x800000 /* manages counters internally */
#define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
@@ -297,7 +304,7 @@ struct ifa_msghdr {
int ifam_addrs; /* like rtm_addrs */
int ifam_flags; /* value of ifa_flags */
u_short ifam_index; /* index for associated ifp */
- int ifam_metric; /* value of ifa_metric */
+ int ifam_metric; /* value of ifa_ifp->if_metric */
};
/*
@@ -322,7 +329,7 @@ struct ifa_msghdrl {
u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */
u_short ifam_len; /* length of ifa_msghdrl incl. if_data */
u_short ifam_data_off; /* offset of if_data from beginning */
- int ifam_metric; /* value of ifa_metric */
+ int ifam_metric; /* value of ifa_ifp->if_metric */
struct if_data ifam_data;/* statistics and other data about if or
* address */
};
@@ -386,6 +393,7 @@ struct ifreq {
caddr_t ifru_data;
int ifru_cap[2];
u_int ifru_fib;
+ u_char ifru_vlan_pcp;
} ifr_ifru;
#define ifr_addr ifr_ifru.ifru_addr /* address */
#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
@@ -403,6 +411,7 @@ struct ifreq {
#define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */
#define ifr_index ifr_ifru.ifru_index /* interface index */
#define ifr_fib ifr_ifru.ifru_fib /* interface fib */
+#define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
};
#define _SIZEOF_ADDR_IFREQ(ifr) \
@@ -415,6 +424,15 @@ struct ifaliasreq {
struct sockaddr ifra_addr;
struct sockaddr ifra_broadaddr;
struct sockaddr ifra_mask;
+ int ifra_vhid;
+};
+
+/* 9.x compat */
+struct oifaliasreq {
+ char ifra_name[IFNAMSIZ];
+ struct sockaddr ifra_addr;
+ struct sockaddr ifra_broadaddr;
+ struct sockaddr ifra_mask;
};
struct ifmediareq {
@@ -495,16 +513,17 @@ struct ifgroupreq {
};
/*
- * Structure for SIOC[AGD]LIFADDR
+ * Structure used to request i2c data
+ * from interface transceivers.
*/
-struct if_laddrreq {
- char iflr_name[IFNAMSIZ];
- u_int flags;
-#define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */
- u_int prefixlen; /* in/out */
- struct sockaddr_storage addr; /* in/out */
- struct sockaddr_storage dstaddr; /* out */
-};
+struct ifi2creq {
+ uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */
+ uint8_t offset; /* read offset */
+ uint8_t len; /* read length */
+ uint8_t spare0;
+ uint32_t spare1;
+ uint8_t data[8]; /* read buffer */
+};
#endif /* __BSD_VISIBLE */
@@ -528,10 +547,4 @@ struct if_nameindex *if_nameindex(void);
unsigned int if_nametoindex(const char *);
__END_DECLS
#endif
-
-#ifdef _KERNEL
-/* XXX - this should go away soon. */
-#include <net/if_var.h>
-#endif
-
#endif /* !_NET_IF_H_ */
diff --git a/freebsd/sys/net/if_arc.h b/freebsd/sys/net/if_arc.h
index 88a72403..23139aa6 100644
--- a/freebsd/sys/net/if_arc.h
+++ b/freebsd/sys/net/if_arc.h
@@ -133,7 +133,7 @@ void arc_storelladdr(struct ifnet *, u_int8_t);
int arc_isphds(u_int8_t);
void arc_input(struct ifnet *, struct mbuf *);
int arc_output(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+ const struct sockaddr *, struct route *);
int arc_ioctl(struct ifnet *, u_long, caddr_t);
void arc_frag_init(struct ifnet *);
diff --git a/freebsd/sys/net/if_arcsubr.c b/freebsd/sys/net/if_arcsubr.c
index fae432ad..1954e262 100644
--- a/freebsd/sys/net/if_arcsubr.c
+++ b/freebsd/sys/net/if_arcsubr.c
@@ -42,7 +42,6 @@
*/
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -59,6 +58,7 @@
#include <machine/cpu.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/if_dl.h>
@@ -78,11 +78,6 @@
#include <netinet6/nd6.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#define ARCNET_ALLOW_BROKEN_ARP
static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *);
@@ -94,8 +89,7 @@ u_int8_t arcbroadcastaddr = 0;
#define ARC_LLADDR(ifp) (*(u_int8_t *)IF_LLADDR(ifp))
#define senderr(e) { error = (e); goto bad;}
-#define SIN(s) ((struct sockaddr_in *)s)
-#define SIPX(s) ((struct sockaddr_ipx *)s)
+#define SIN(s) ((const struct sockaddr_in *)(s))
/*
* ARCnet output routine.
@@ -103,7 +97,7 @@ u_int8_t arcbroadcastaddr = 0;
* Assumes that ifp is actually pointer to arccom structure.
*/
int
-arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
struct arc_header *ah;
@@ -112,7 +106,7 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
int loop_copy = 0;
int isphds;
#if defined(INET) || defined(INET6)
- struct llentry *lle;
+ int is_gw = 0;
#endif
if (!((ifp->if_flags & IFF_UP) &&
@@ -120,6 +114,10 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
return(ENETDOWN); /* m, m1 aren't initialized yet */
error = 0;
+#if defined(INET) || defined(INET6)
+ if (ro != NULL)
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
switch (dst->sa_family) {
#ifdef INET
@@ -133,8 +131,8 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
else if (ifp->if_flags & IFF_NOARP)
adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF;
else {
- error = arpresolve(ifp, ro ? ro->ro_rt : NULL,
- m, dst, &adst, &lle);
+ error = arpresolve(ifp, is_gw, m, dst, &adst, NULL,
+ NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
}
@@ -172,24 +170,23 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#endif
#ifdef INET6
case AF_INET6:
- error = nd6_storelladdr(ifp, m, dst, (u_char *)&adst, &lle);
- if (error)
- return (error);
+ if ((m->m_flags & M_MCAST) != 0)
+ adst = arcbroadcastaddr; /* ARCnet broadcast address */
+ else {
+ error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL,
+ NULL);
+ if (error != 0)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
atype = ARCTYPE_INET6;
break;
#endif
-#ifdef IPX
- case AF_IPX:
- adst = SIPX(dst)->sipx_addr.x_host.c_host[5];
- atype = ARCTYPE_IPX;
- if (adst == 0xff)
- adst = arcbroadcastaddr;
- break;
-#endif
-
case AF_UNSPEC:
+ {
+ const struct arc_header *ah;
+
loop_copy = -1;
- ah = (struct arc_header *)dst->sa_data;
+ ah = (const struct arc_header *)dst->sa_data;
adst = ah->arc_dhost;
atype = ah->arc_type;
@@ -209,15 +206,15 @@ arc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#endif
}
break;
-
+ }
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
senderr(EAFNOSUPPORT);
}
isphds = arc_isphds(atype);
- M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
ah = mtod(m, struct arc_header *);
ah->arc_type = atype;
@@ -268,12 +265,12 @@ arc_frag_next(struct ifnet *ifp)
struct arc_header *ah;
ac = (struct arccom *)ifp->if_l2com;
- if ((m = ac->curr_frag) == 0) {
+ if ((m = ac->curr_frag) == NULL) {
int tfrags;
/* dequeue new packet */
IF_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
+ if (m == NULL)
return 0;
ah = mtod(m, struct arc_header *);
@@ -281,7 +278,7 @@ arc_frag_next(struct ifnet *ifp)
return m;
++ac->ac_seqid; /* make the seqid unique */
- tfrags = (m->m_pkthdr.len + ARC_MAX_DATA - 1) / ARC_MAX_DATA;
+ tfrags = howmany(m->m_pkthdr.len, ARC_MAX_DATA);
ac->fsflag = 2 * tfrags - 3;
ac->sflag = 0;
ac->rsflag = ac->fsflag;
@@ -296,14 +293,14 @@ arc_frag_next(struct ifnet *ifp)
/* split out next fragment and return it */
if (ac->sflag < ac->fsflag) {
/* we CAN'T have short packets here */
- ac->curr_frag = m_split(m, ARC_MAX_DATA, M_DONTWAIT);
+ ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT);
if (ac->curr_frag == 0) {
m_freem(m);
return 0;
}
- M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
- if (m == 0) {
+ M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
+ if (m == NULL) {
m_freem(ac->curr_frag);
ac->curr_frag = 0;
return 0;
@@ -321,8 +318,8 @@ arc_frag_next(struct ifnet *ifp)
ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
ac->curr_frag = 0;
- M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT);
+ if (m == NULL)
return 0;
ah = mtod(m, struct arc_header *);
@@ -334,8 +331,8 @@ arc_frag_next(struct ifnet *ifp)
} else {
ac->curr_frag = 0;
- M_PREPEND(m, ARC_HDRNEWLEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
+ if (m == NULL)
return 0;
ah = mtod(m, struct arc_header *);
@@ -352,7 +349,7 @@ arc_frag_next(struct ifnet *ifp)
/*
* Defragmenter. Returns mbuf if last packet found, else
- * NULL. frees imcoming mbuf as necessary.
+ * NULL. frees incoming mbuf as necessary.
*/
static __inline struct mbuf *
@@ -371,7 +368,7 @@ arc_defrag(struct ifnet *ifp, struct mbuf *m)
if (m->m_len < ARC_HDRNEWLEN) {
m = m_pullup(m, ARC_HDRNEWLEN);
if (m == NULL) {
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return NULL;
}
}
@@ -391,7 +388,7 @@ arc_defrag(struct ifnet *ifp, struct mbuf *m)
if (m->m_len < ARC_HDRNEWLEN) {
m = m_pullup(m, ARC_HDRNEWLEN);
if (m == NULL) {
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return NULL;
}
}
@@ -544,11 +541,11 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
return;
}
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
if (ah->arc_dhost == arcbroadcastaddr) {
m->m_flags |= M_BCAST|M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
atype = ah->arc_type;
@@ -556,15 +553,11 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
#ifdef INET
case ARCTYPE_IP:
m_adj(m, ARC_HDRNEWLEN);
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
case ARCTYPE_IP_OLD:
m_adj(m, ARC_HDRLEN);
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -600,12 +593,6 @@ arc_input(struct ifnet *ifp, struct mbuf *m)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case ARCTYPE_IPX:
- m_adj(m, ARC_HDRNEWLEN);
- isr = NETISR_IPX;
- break;
-#endif
default:
m_freem(m);
return;
@@ -640,11 +627,7 @@ arc_ifattach(struct ifnet *ifp, u_int8_t lla)
ifp->if_resolvemulti = arc_resolvemulti;
if (ifp->if_baudrate == 0)
ifp->if_baudrate = 2500000;
-#if __FreeBSD_version < 500000
- ifa = ifnet_addrs[ifp->if_index - 1];
-#else
ifa = ifp->if_addr;
-#endif
KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
sdl->sdl_type = IFT_ARCNET;
@@ -691,26 +674,6 @@ arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
arp_ifinit(ifp, ifa);
break;
#endif
-#ifdef IPX
- /*
- * XXX This code is probably wrong
- */
- case AF_IPX:
- {
- struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina))
- ina->x_host.c_host[5] = ARC_LLADDR(ifp);
- else
- arc_storelladdr(ifp, ina->x_host.c_host[5]);
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- break;
- }
-#endif
default:
ifp->if_init(ifp->if_softc);
break;
@@ -781,21 +744,14 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
sdl = (struct sockaddr_dl *)sa;
if (*LLADDR(sdl) != arcbroadcastaddr)
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#ifdef INET
case AF_INET:
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return ENOMEM;
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ARCNET;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ARC_ADDR_LEN;
*LLADDR(sdl) = 0;
*llsa = (struct sockaddr *)sdl;
@@ -811,19 +767,12 @@ arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return 0;
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return ENOMEM;
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ARCNET;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ARC_ADDR_LEN;
*LLADDR(sdl) = 0;
*llsa = (struct sockaddr *)sdl;
diff --git a/freebsd/sys/net/if_arp.h b/freebsd/sys/net/if_arp.h
index 38c64020..7d141f37 100644
--- a/freebsd/sys/net/if_arp.h
+++ b/freebsd/sys/net/if_arp.h
@@ -97,43 +97,37 @@ struct arpreq {
#define ATF_PUBL 0x08 /* publish entry (respond for other host) */
#define ATF_USETRAILERS 0x10 /* has requested trailers */
-#ifdef _KERNEL
-/*
- * Structure shared between the ethernet driver modules and
- * the address resolution code.
- */
-struct arpcom {
- struct ifnet *ac_ifp; /* network-visible interface */
- void *ac_netgraph; /* ng_ether(4) netgraph node info */
-};
-#define IFP2AC(ifp) ((struct arpcom *)(ifp->if_l2com))
-#define AC2IFP(ac) ((ac)->ac_ifp)
-
-#endif /* _KERNEL */
-
struct arpstat {
/* Normal things that happen: */
- u_long txrequests; /* # of ARP requests sent by this host. */
- u_long txreplies; /* # of ARP replies sent by this host. */
- u_long rxrequests; /* # of ARP requests received by this host. */
- u_long rxreplies; /* # of ARP replies received by this host. */
- u_long received; /* # of ARP packets received by this host. */
+ uint64_t txrequests; /* # of ARP requests sent by this host. */
+ uint64_t txreplies; /* # of ARP replies sent by this host. */
+ uint64_t rxrequests; /* # of ARP requests received by this host. */
+ uint64_t rxreplies; /* # of ARP replies received by this host. */
+ uint64_t received; /* # of ARP packets received by this host. */
- u_long arp_spares[4]; /* For either the upper or lower half. */
+ uint64_t arp_spares[4]; /* For either the upper or lower half. */
/* Abnormal event and error counting: */
- u_long dropped; /* # of packets dropped waiting for a reply. */
- u_long timeouts; /* # of times with entries removed */
+ uint64_t dropped; /* # of packets dropped waiting for a reply. */
+ uint64_t timeouts; /* # of times with entries removed */
/* due to timeout. */
- u_long dupips; /* # of duplicate IPs detected. */
+ uint64_t dupips; /* # of duplicate IPs detected. */
};
+#ifdef _KERNEL
+#include <sys/counter.h>
+#include <net/vnet.h>
+
+VNET_PCPUSTAT_DECLARE(struct arpstat, arpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define ARPSTAT_ADD(name, val) V_arpstat.name += (val)
-#define ARPSTAT_SUB(name, val) V_arpstat.name -= (val)
+#define ARPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct arpstat, arpstat, name, (val))
+#define ARPSTAT_SUB(name, val) ARPSTAT_ADD(name, -(val))
#define ARPSTAT_INC(name) ARPSTAT_ADD(name, 1)
#define ARPSTAT_DEC(name) ARPSTAT_SUB(name, 1)
+#endif /* _KERNEL */
+
#endif /* !_NET_IF_ARP_H_ */
diff --git a/freebsd/sys/net/if_atm.h b/freebsd/sys/net/if_atm.h
index e8f69da0..a0900eee 100644
--- a/freebsd/sys/net/if_atm.h
+++ b/freebsd/sys/net/if_atm.h
@@ -96,7 +96,7 @@ struct ifatm_mib {
/*
* Traffic parameters for ATM connections. This contains all parameters
- * to accomodate UBR, UBR+MCR, CBR, VBR and ABR connections.
+ * to accommodate UBR, UBR+MCR, CBR, VBR and ABR connections.
*
* Keep in sync with ng_atm.h
*/
@@ -292,7 +292,7 @@ void atm_ifattach(struct ifnet *);
void atm_ifdetach(struct ifnet *);
void atm_input(struct ifnet *, struct atm_pseudohdr *,
struct mbuf *, void *);
-int atm_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+int atm_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
struct atmio_vcctable *atm_getvccs(struct atmio_vcc **, u_int, u_int,
struct mtx *, int);
diff --git a/freebsd/sys/net/if_atmsubr.c b/freebsd/sys/net/if_atmsubr.c
index a4cbeb09..fff233c4 100644
--- a/freebsd/sys/net/if_atmsubr.c
+++ b/freebsd/sys/net/if_atmsubr.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/if_dl.h>
@@ -123,7 +124,7 @@ static MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals");
* ro->ro_rt must also be NULL.
*/
int
-atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+atm_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
struct route *ro)
{
u_int16_t etype = 0; /* if using LLC/SNAP */
@@ -131,7 +132,7 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
struct atm_pseudohdr atmdst, *ad;
struct mbuf *m = m0;
struct atmllc *atmllc;
- struct atmllc *llc_hdr = NULL;
+ const struct atmllc *llc_hdr = NULL;
u_int32_t atm_flags;
#ifdef MAC
@@ -175,7 +176,7 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
* (atm pseudo header (4) + LLC/SNAP (8))
*/
bcopy(dst->sa_data, &atmdst, sizeof(atmdst));
- llc_hdr = (struct atmllc *)(dst->sa_data +
+ llc_hdr = (const struct atmllc *)(dst->sa_data +
sizeof(atmdst));
break;
@@ -192,8 +193,8 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
atm_flags = ATM_PH_FLAGS(&atmdst);
if (atm_flags & ATM_PH_LLCSNAP)
sz += 8; /* sizeof snap == 8 */
- M_PREPEND(m, sz, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, sz, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
ad = mtod(m, struct atm_pseudohdr *);
*ad = atmdst;
@@ -253,7 +254,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
#ifdef MAC
mac_ifnet_create_mbuf(ifp, m);
#endif
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
if (ng_atm_input_p != NULL) {
(*ng_atm_input_p)(ifp, &m, ah, rxhand);
@@ -296,7 +297,7 @@ atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
struct atmllc *alc;
if (m->m_len < sizeof(*alc) &&
- (m = m_pullup(m, sizeof(*alc))) == 0)
+ (m = m_pullup(m, sizeof(*alc))) == NULL)
return; /* failed */
alc = mtod(m, struct atmllc *);
if (bcmp(alc, ATMLLC_HDR, 6)) {
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 65553092..77b376b9 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -73,7 +73,7 @@
* - Currently only supports Ethernet-like interfaces (Ethernet,
* 802.11, VLANs on Ethernet, etc.) Figure out a nice way
* to bridge other types of interfaces (FDDI-FDDI, and maybe
- * consider heterogenous bridges).
+ * consider heterogeneous bridges).
*/
#include <sys/cdefs.h>
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/protosw.h>
@@ -102,7 +103,6 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
-#include <sys/rwlock.h>
#include <net/bpf.h>
#include <net/if.h>
@@ -113,7 +113,7 @@ __FBSDID("$FreeBSD$");
#include <net/pfil.h>
#include <net/vnet.h>
-#include <netinet/in.h> /* for struct arpcom */
+#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -127,15 +127,13 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_carp.h>
#endif
#include <machine/in_cksum.h>
-#include <netinet/if_ether.h> /* for struct arpcom */
+#include <netinet/if_ether.h>
#include <net/bridgestp.h>
#include <net/if_bridgevar.h>
#include <net/if_llc.h>
#include <net/if_vlan_var.h>
#include <net/route.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
/*
* Size of the route hash table. Must be a power of two.
@@ -170,7 +168,8 @@ __FBSDID("$FreeBSD$");
/*
* List of capabilities to possibly mask on the member interface.
*/
-#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
+ IFCAP_TXCSUM_IPV6)
/*
* List of capabilities to strip
@@ -230,8 +229,9 @@ struct bridge_softc {
u_char sc_defaddr[6]; /* Default MAC address */
};
-static struct mtx bridge_list_mtx;
-eventhandler_tag bridge_detach_cookie = NULL;
+static VNET_DEFINE(struct mtx, bridge_list_mtx);
+#define V_bridge_list_mtx VNET(bridge_list_mtx)
+static eventhandler_tag bridge_detach_cookie;
int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
@@ -248,11 +248,12 @@ static void bridge_ifdetach(void *arg __unused, struct ifnet *);
static void bridge_init(void *);
static void bridge_dummynet(struct mbuf *, struct ifnet *);
static void bridge_stop(struct ifnet *, int);
-static void bridge_start(struct ifnet *);
+static int bridge_transmit(struct ifnet *, struct mbuf *);
+static void bridge_qflush(struct ifnet *);
static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
-static void bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int bridge_enqueue(struct bridge_softc *, struct ifnet *,
struct mbuf *);
static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
@@ -275,7 +276,7 @@ static void bridge_rtflush(struct bridge_softc *, int);
static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
uint16_t);
-static int bridge_rtable_init(struct bridge_softc *);
+static void bridge_rtable_init(struct bridge_softc *);
static void bridge_rtable_fini(struct bridge_softc *);
static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
@@ -353,43 +354,64 @@ static struct bstp_cb_ops bridge_ops = {
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
-static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
-static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
-static int pfil_member = 1; /* run pfil hooks on the member interface */
-static int pfil_ipfw = 0; /* layer2 filter with ipfw */
-static int pfil_ipfw_arp = 0; /* layer2 filter with ipfw */
-static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
- locally destined packets */
-static int log_stp = 0; /* log STP state changes */
-static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
-TUNABLE_INT("net.link.bridge.pfil_onlyip", &pfil_onlyip);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
- &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
-TUNABLE_INT("net.link.bridge.ipfw_arp", &pfil_ipfw_arp);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
- &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
-TUNABLE_INT("net.link.bridge.pfil_bridge", &pfil_bridge);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
- &pfil_bridge, 0, "Packet filter on the bridge interface");
-TUNABLE_INT("net.link.bridge.pfil_member", &pfil_member);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
- &pfil_member, 0, "Packet filter on the member interface");
-TUNABLE_INT("net.link.bridge.pfil_local_phys", &pfil_local_phys);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
- &pfil_local_phys, 0,
+/* only pass IP[46] packets when pfil is enabled */
+static VNET_DEFINE(int, pfil_onlyip) = 1;
+#define V_pfil_onlyip VNET(pfil_onlyip)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
+ "Only pass IP packets when pfil is enabled");
+
+/* run pfil hooks on the bridge interface */
+static VNET_DEFINE(int, pfil_bridge) = 1;
+#define V_pfil_bridge VNET(pfil_bridge)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
+ "Packet filter on the bridge interface");
+
+/* layer2 filter with ipfw */
+static VNET_DEFINE(int, pfil_ipfw);
+#define V_pfil_ipfw VNET(pfil_ipfw)
+
+/* layer2 ARP filter with ipfw */
+static VNET_DEFINE(int, pfil_ipfw_arp);
+#define V_pfil_ipfw_arp VNET(pfil_ipfw_arp)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
+ "Filter ARP packets through IPFW layer2");
+
+/* run pfil hooks on the member interface */
+static VNET_DEFINE(int, pfil_member) = 1;
+#define V_pfil_member VNET(pfil_member)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
+ "Packet filter on the member interface");
+
+/* run pfil hooks on the physical interface for locally destined packets */
+static VNET_DEFINE(int, pfil_local_phys);
+#define V_pfil_local_phys VNET(pfil_local_phys)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
"Packet filter on the physical interface for locally destined packets");
-TUNABLE_INT("net.link.bridge.log_stp", &log_stp);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
- &log_stp, 0, "Log STP state changes");
-TUNABLE_INT("net.link.bridge.inherit_mac", &bridge_inherit_mac);
-SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RW,
- &bridge_inherit_mac, 0,
+
+/* log STP state changes */
+static VNET_DEFINE(int, log_stp);
+#define V_log_stp VNET(log_stp)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
+ "Log STP state changes");
+
+/* share MAC with first bridge member */
+static VNET_DEFINE(int, bridge_inherit_mac);
+#define V_bridge_inherit_mac VNET(bridge_inherit_mac)
+SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
+ CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
"Inherit MAC address from the first bridge member");
static VNET_DEFINE(int, allow_llz_overlap) = 0;
#define V_allow_llz_overlap VNET(allow_llz_overlap)
-SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW,
- &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope "
+SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
+ "Allow overlap of link-local scope "
"zones of a bridge interface and the member interfaces");
struct bridge_control {
@@ -487,12 +509,43 @@ const struct bridge_control bridge_control_table[] = {
BC_F_COPYIN|BC_F_SUSER },
};
-const int bridge_control_table_size =
- sizeof(bridge_control_table) / sizeof(bridge_control_table[0]);
+const int bridge_control_table_size = nitems(bridge_control_table);
+
+static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list);
+#define V_bridge_list VNET(bridge_list)
+#define BRIDGE_LIST_LOCK_INIT(x) mtx_init(&V_bridge_list_mtx, \
+ "if_bridge list", NULL, MTX_DEF)
+#define BRIDGE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_bridge_list_mtx)
+#define BRIDGE_LIST_LOCK(x) mtx_lock(&V_bridge_list_mtx)
+#define BRIDGE_LIST_UNLOCK(x) mtx_unlock(&V_bridge_list_mtx)
+
+static VNET_DEFINE(struct if_clone *, bridge_cloner);
+#define V_bridge_cloner VNET(bridge_cloner)
-LIST_HEAD(, bridge_softc) bridge_list;
+static const char bridge_name[] = "bridge";
+
+static void
+vnet_bridge_init(const void *unused __unused)
+{
+
+ BRIDGE_LIST_LOCK_INIT();
+ LIST_INIT(&V_bridge_list);
+ V_bridge_cloner = if_clone_simple(bridge_name,
+ bridge_clone_create, bridge_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_bridge_init, NULL);
+
+static void
+vnet_bridge_uninit(const void *unused __unused)
+{
-IFC_SIMPLE_DECLARE(bridge, 0);
+ if_clone_detach(V_bridge_cloner);
+ V_bridge_cloner = NULL;
+ BRIDGE_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_bridge_uninit, NULL);
static int
bridge_modevent(module_t mod, int type, void *data)
@@ -500,12 +553,9 @@ bridge_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF);
- if_clone_attach(&bridge_cloner);
bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
- LIST_INIT(&bridge_list);
bridge_input_p = bridge_input;
bridge_output_p = bridge_output;
bridge_dn_p = bridge_dummynet;
@@ -517,13 +567,11 @@ bridge_modevent(module_t mod, int type, void *data)
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
bridge_detach_cookie);
- if_clone_detach(&bridge_cloner);
uma_zdestroy(bridge_rtnode_zone);
bridge_input_p = NULL;
bridge_output_p = NULL;
bridge_dn_p = NULL;
bridge_linkstate_p = NULL;
- mtx_destroy(&bridge_list_mtx);
break;
default:
return (EOPNOTSUPP);
@@ -541,19 +589,19 @@ DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
/*
- * handler for net.link.bridge.pfil_ipfw
+ * handler for net.link.bridge.ipfw
*/
static int
sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
{
- int enable = pfil_ipfw;
+ int enable = V_pfil_ipfw;
int error;
error = sysctl_handle_int(oidp, &enable, 0, req);
- enable = (enable) ? 1 : 0;
+ enable &= 1;
- if (enable != pfil_ipfw) {
- pfil_ipfw = enable;
+ if (enable != V_pfil_ipfw) {
+ V_pfil_ipfw = enable;
/*
* Disable pfil so that ipfw doesnt run twice, if the user
@@ -561,17 +609,19 @@ sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
* pfil_member. Also allow non-ip packets as ipfw can filter by
* layer2 type.
*/
- if (pfil_ipfw) {
- pfil_onlyip = 0;
- pfil_bridge = 0;
- pfil_member = 0;
+ if (V_pfil_ipfw) {
+ V_pfil_onlyip = 0;
+ V_pfil_bridge = 0;
+ V_pfil_member = 0;
}
}
return (error);
}
-SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW,
- &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW");
+SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I",
+ "Layer2 filter with IPFW");
/*
* bridge_clone_create:
@@ -606,15 +656,13 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
LIST_INIT(&sc->sc_spanlist);
ifp->if_softc = sc;
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, bridge_name, unit);
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = bridge_ioctl;
- ifp->if_start = bridge_start;
+ ifp->if_transmit = bridge_transmit;
+ ifp->if_qflush = bridge_qflush;
ifp->if_init = bridge_init;
ifp->if_type = IFT_BRIDGE;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
- IFQ_SET_READY(&ifp->if_snd);
/*
* Generate an ethernet address with a locally administered address.
@@ -626,7 +674,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
*/
fb = 0;
getcredhostid(curthread->td_ucred, &hostid);
- for (retry = 1; retry != 0;) {
+ do {
if (fb || hostid == 0) {
arc4rand(sc->sc_defaddr, ETHER_ADDR_LEN, 1);
sc->sc_defaddr[0] &= ~1;/* clear multicast bit */
@@ -642,15 +690,17 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
fb = 1;
retry = 0;
- mtx_lock(&bridge_list_mtx);
- LIST_FOREACH(sc2, &bridge_list, sc_list) {
+ BRIDGE_LIST_LOCK();
+ LIST_FOREACH(sc2, &V_bridge_list, sc_list) {
bifp = sc2->sc_ifp;
if (memcmp(sc->sc_defaddr,
- IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
+ IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
retry = 1;
+ break;
+ }
}
- mtx_unlock(&bridge_list_mtx);
- }
+ BRIDGE_LIST_UNLOCK();
+ } while (retry == 1);
bstp_attach(&sc->sc_stp, &bridge_ops);
ether_ifattach(ifp, sc->sc_defaddr);
@@ -658,9 +708,9 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_baudrate = 0;
ifp->if_type = IFT_BRIDGE;
- mtx_lock(&bridge_list_mtx);
- LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
- mtx_unlock(&bridge_list_mtx);
+ BRIDGE_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
+ BRIDGE_LIST_UNLOCK();
return (0);
}
@@ -692,13 +742,13 @@ bridge_clone_destroy(struct ifnet *ifp)
callout_drain(&sc->sc_brcallout);
- mtx_lock(&bridge_list_mtx);
+ BRIDGE_LIST_LOCK();
LIST_REMOVE(sc, sc_list);
- mtx_unlock(&bridge_list_mtx);
+ BRIDGE_LIST_UNLOCK();
bstp_detach(&sc->sc_stp);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
/* Tear down the routing table. */
bridge_rtable_fini(sc);
@@ -818,7 +868,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
BRIDGE_LOCK(sc);
LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) {
- log(LOG_NOTICE, "%s: invalid MTU: %lu(%s)"
+ log(LOG_NOTICE, "%s: invalid MTU: %u(%s)"
" != %d\n", sc->sc_ifp->if_xname,
bif->bif_ifp->if_mtu,
bif->bif_ifp->if_xname, ifr->ifr_mtu);
@@ -960,7 +1010,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
* the mac address of the bridge to the address of the next member, or
* to its default address if no members are left.
*/
- if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
+ if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
if (LIST_EMPTY(&sc->sc_iflist)) {
bcopy(sc->sc_defaddr,
IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
@@ -986,9 +1036,12 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
case IFT_ETHER:
case IFT_L2VLAN:
/*
- * Take the interface out of promiscuous mode.
+ * Take the interface out of promiscuous mode, but only
+ * if it was promiscuous in the first place. It might
+ * not be if we're in the bridge_ioctl_add() error path.
*/
- (void) ifpromisc(ifs, 0);
+ if (ifs->if_flags & IFF_PROMISC)
+ (void) ifpromisc(ifs, 0);
break;
case IFT_GIF:
@@ -1108,7 +1161,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
if (LIST_EMPTY(&sc->sc_iflist))
sc->sc_ifp->if_mtu = ifs->if_mtu;
else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
- if_printf(sc->sc_ifp, "invalid MTU: %lu(%s) != %lu\n",
+ if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n",
ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu);
return (EINVAL);
}
@@ -1126,7 +1179,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
* member and the MAC address of the bridge has not been changed from
* the default randomly generated one.
*/
- if (bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
+ if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) &&
!memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
sc->sc_ifaddr = ifs;
@@ -1156,10 +1209,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
break;
}
- if (error) {
+ if (error)
bridge_delete_member(sc, bif, 0);
- free(bif, M_DEVBUF);
- }
return (error);
}
@@ -1751,7 +1802,13 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
if (ifp->if_flags & IFF_RENAMING)
return;
-
+ if (V_bridge_cloner == NULL) {
+ /*
+ * This detach handler can be called after
+ * vnet_bridge_uninit(). Just return in that case.
+ */
+ return;
+ }
/* Check if the interface is a bridge member */
if (sc != NULL) {
BRIDGE_LOCK(sc);
@@ -1765,8 +1822,8 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
}
/* Check if the interface is a span port */
- mtx_lock(&bridge_list_mtx);
- LIST_FOREACH(sc, &bridge_list, sc_list) {
+ BRIDGE_LIST_LOCK();
+ LIST_FOREACH(sc, &V_bridge_list, sc_list) {
BRIDGE_LOCK(sc);
LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
if (ifp == bif->bif_ifp) {
@@ -1776,7 +1833,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
BRIDGE_UNLOCK(sc);
}
- mtx_unlock(&bridge_list_mtx);
+ BRIDGE_LIST_UNLOCK();
}
/*
@@ -1832,20 +1889,19 @@ bridge_stop(struct ifnet *ifp, int disable)
* Enqueue a packet on a bridge member interface.
*
*/
-static void
+static int
bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
{
int len, err = 0;
short mflags;
struct mbuf *m0;
- len = m->m_pkthdr.len;
- mflags = m->m_flags;
-
/* We may be sending a fragment so traverse the mbuf */
for (; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
+ len = m->m_pkthdr.len;
+ mflags = m->m_flags;
/*
* If underlying interface can not do VLAN tag insertion itself
@@ -1857,7 +1913,7 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
if (m == NULL) {
if_printf(dst_ifp,
"unable to prepend VLAN header\n");
- dst_ifp->if_oerrors++;
+ if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
m->m_flags &= ~M_VLANTAG;
@@ -1865,16 +1921,17 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
m_freem(m0);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
break;
}
- }
- if (err == 0) {
- sc->sc_ifp->if_opackets++;
- sc->sc_ifp->if_obytes += len;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
if (mflags & M_MCAST)
- sc->sc_ifp->if_omcasts++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1);
}
+
+ return (err);
}
/*
@@ -2000,9 +2057,9 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
used = 1;
mc = m;
} else {
- mc = m_copypacket(m, M_DONTWAIT);
+ mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2033,44 +2090,42 @@ sendunicast:
}
/*
- * bridge_start:
+ * bridge_transmit:
*
- * Start output on a bridge.
+ * Do output on a bridge.
*
*/
-static void
-bridge_start(struct ifnet *ifp)
+static int
+bridge_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct bridge_softc *sc;
- struct mbuf *m;
struct ether_header *eh;
struct ifnet *dst_if;
+ int error = 0;
sc = ifp->if_softc;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- for (;;) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
- ETHER_BPF_MTAP(ifp, m);
+ ETHER_BPF_MTAP(ifp, m);
- eh = mtod(m, struct ether_header *);
- dst_if = NULL;
+ eh = mtod(m, struct ether_header *);
- BRIDGE_LOCK(sc);
- if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
- dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
- }
+ BRIDGE_LOCK(sc);
+ if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
+ (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) {
+ BRIDGE_UNLOCK(sc);
+ error = bridge_enqueue(sc, dst_if, m);
+ } else
+ bridge_broadcast(sc, ifp, m, 0);
- if (dst_if == NULL)
- bridge_broadcast(sc, ifp, m, 0);
- else {
- BRIDGE_UNLOCK(sc);
- bridge_enqueue(sc, dst_if, m);
- }
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ return (error);
+}
+
+/*
+ * The ifp->if_qflush entry point for if_bridge(4) is no-op.
+ */
+static void
+bridge_qflush(struct ifnet *ifp __unused)
+{
}
/*
@@ -2094,8 +2149,8 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
src_if = m->m_pkthdr.rcvif;
ifp = sc->sc_ifp;
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
vlan = VLANTAGOF(m);
if ((sbif->bif_flags & IFBIF_STP) &&
@@ -2147,7 +2202,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
goto drop;
/* ...forward it to all interfaces. */
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
dst_if = NULL;
}
@@ -2255,8 +2310,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
if ((bifp->if_flags & IFF_MONITOR) != 0) {
m->m_pkthdr.rcvif = bifp;
ETHER_BPF_MTAP(bifp, m);
- bifp->if_ipackets++;
- bifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
m_freem(m);
return (NULL);
}
@@ -2291,7 +2346,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
* for bridge processing; return the original packet for
* local processing.
*/
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc == NULL) {
BRIDGE_UNLOCK(sc);
return (m);
@@ -2308,7 +2363,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
*/
KASSERT(bifp->if_bridge == NULL,
("loop created in bridge_input"));
- mc2 = m_dup(m, M_DONTWAIT);
+ mc2 = m_dup(m, M_NOWAIT);
if (mc2 != NULL) {
/* Keep the layer3 header aligned */
int i = min(mc2->m_pkthdr.len, max_protohdr);
@@ -2357,9 +2412,10 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
) { \
if ((iface)->if_type == IFT_BRIDGE) { \
ETHER_BPF_MTAP(iface, m); \
- iface->if_ipackets++; \
+ if_inc_counter(iface, IFCOUNTER_IPACKETS, 1); \
+ if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
/* Filter on the physical interface. */ \
- if (pfil_local_phys && \
+ if (V_pfil_local_phys && \
(PFIL_HOOKED(&V_inet_pfil_hook) \
OR_PFIL_HOOKED_INET6)) { \
if (bridge_pfil(&m, NULL, ifp, \
@@ -2485,9 +2541,9 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
mc = m;
used = 1;
} else {
- mc = m_dup(m, M_DONTWAIT);
+ mc = m_dup(m, M_NOWAIT);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2507,7 +2563,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
i = min(mc->m_pkthdr.len, max_protohdr);
mc = m_copyup(mc, i, ETHER_ALIGN);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
}
@@ -2548,9 +2604,9 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m)
if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
continue;
- mc = m_copypacket(m, M_DONTWAIT);
+ mc = m_copypacket(m, M_NOWAIT);
if (mc == NULL) {
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
continue;
}
@@ -2793,24 +2849,19 @@ bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
*
* Initialize the route table for this bridge.
*/
-static int
+static void
bridge_rtable_init(struct bridge_softc *sc)
{
int i;
sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
- M_DEVBUF, M_NOWAIT);
- if (sc->sc_rthash == NULL)
- return (ENOMEM);
+ M_DEVBUF, M_WAITOK);
for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
LIST_INIT(&sc->sc_rthash[i]);
sc->sc_rthash_key = arc4random();
-
LIST_INIT(&sc->sc_rtlist);
-
- return (0);
}
/*
@@ -3018,9 +3069,11 @@ bridge_state_change(struct ifnet *ifp, int state)
"discarding"
};
- if (log_stp)
+ CURVNET_SET(ifp->if_vnet);
+ if (V_log_stp)
log(LOG_NOTICE, "%s: state changed to %s on %s\n",
sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
+ CURVNET_RESTORE();
}
/*
@@ -3034,7 +3087,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
{
int snap, error, i, hlen;
struct ether_header *eh1, eh2;
- struct ip_fw_args args;
struct ip *ip;
struct llc llc1;
u_int16_t ether_type;
@@ -3047,7 +3099,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
#endif
- if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0)
+ if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0)
return (0); /* filtering is disabled */
i = min((*mp)->m_pkthdr.len, max_protohdr);
@@ -3089,7 +3141,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
switch (ether_type) {
case ETHERTYPE_ARP:
case ETHERTYPE_REVARP:
- if (pfil_ipfw_arp == 0)
+ if (V_pfil_ipfw_arp == 0)
return (0); /* Automatically pass */
break;
@@ -3104,10 +3156,20 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
* packets, these will not be checked by pfil(9) and
* passed unconditionally so the default is to drop.
*/
- if (pfil_onlyip)
+ if (V_pfil_onlyip)
goto bad;
}
+ /* Run the packet through pfil before stripping link headers */
+ if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 &&
+ dir == PFIL_OUT && ifp != NULL) {
+
+ error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL);
+
+ if (*mp == NULL || error != 0) /* packet consumed by filter */
+ return (error);
+ }
+
/* Strip off the Ethernet header and keep a copy. */
m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
m_adj(*mp, ETHER_HDR_LEN);
@@ -3138,63 +3200,6 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
goto bad;
}
- /* XXX this section is also in if_ethersubr.c */
- // XXX PFIL_OUT or DIR_OUT ?
- if (V_ip_fw_chk_ptr && pfil_ipfw != 0 &&
- dir == PFIL_OUT && ifp != NULL) {
- struct m_tag *mtag;
-
- error = -1;
- /* fetch the start point from existing tags, if any */
- mtag = m_tag_locate(*mp, MTAG_IPFW_RULE, 0, NULL);
- if (mtag == NULL) {
- args.rule.slot = 0;
- } else {
- struct ipfw_rule_ref *r;
-
- /* XXX can we free the tag after use ? */
- mtag->m_tag_id = PACKET_TAG_NONE;
- r = (struct ipfw_rule_ref *)(mtag + 1);
- /* packet already partially processed ? */
- if (r->info & IPFW_ONEPASS)
- goto ipfwpass;
- args.rule = *r;
- }
-
- args.m = *mp;
- args.oif = ifp;
- args.next_hop = NULL;
- args.next_hop6 = NULL;
- args.eh = &eh2;
- args.inp = NULL; /* used by ipfw uid/gid/jail rules */
- i = V_ip_fw_chk_ptr(&args);
- *mp = args.m;
-
- if (*mp == NULL)
- return (error);
-
- if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
-
- /* put the Ethernet header back on */
- M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
- if (*mp == NULL)
- return (error);
- bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
-
- /*
- * Pass the pkt to dummynet, which consumes it. The
- * packet will return to us via bridge_dummynet().
- */
- args.oif = ifp;
- ip_dn_io_ptr(mp, DIR_FWD | PROTO_IFB, &args);
- return (error);
- }
-
- if (i != IP_FW_PASS) /* drop */
- goto bad;
- }
-
-ipfwpass:
error = 0;
/*
@@ -3203,36 +3208,27 @@ ipfwpass:
switch (ether_type) {
case ETHERTYPE_IP:
/*
- * before calling the firewall, swap fields the same as
- * IP does. here we assume the header is contiguous
- */
- ip = mtod(*mp, struct ip *);
-
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
- /*
* Run pfil on the member interface and the bridge, both can
* be skipped by clearing pfil_member or pfil_bridge.
*
* Keep the order:
* in_if -> bridge_if -> out_if
*/
- if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_member && ifp != NULL)
+ if (V_pfil_member && ifp != NULL)
error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp,
dir, NULL);
@@ -3240,7 +3236,7 @@ ipfwpass:
break;
/* check if we need to fragment the packet */
- if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
+ if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) {
i = (*mp)->m_pkthdr.len;
if (i > ifp->if_mtu) {
error = bridge_fragment(ifp, *mp, &eh2, snap,
@@ -3249,20 +3245,18 @@ ipfwpass:
}
}
- /* Recalculate the ip checksum and restore byte ordering */
+ /* Recalculate the ip checksum. */
ip = mtod(*mp, struct ip *);
hlen = ip->ip_hl << 2;
if (hlen < sizeof(struct ip))
goto bad;
if (hlen > (*mp)->m_len) {
- if ((*mp = m_pullup(*mp, hlen)) == 0)
+ if ((*mp = m_pullup(*mp, hlen)) == NULL)
goto bad;
ip = mtod(*mp, struct ip *);
if (ip == NULL)
goto bad;
}
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);
@@ -3272,21 +3266,21 @@ ipfwpass:
break;
#ifdef INET6
case ETHERTYPE_IPV6:
- if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL)
error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_member && ifp != NULL)
+ if (V_pfil_member && ifp != NULL)
error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp,
dir, NULL);
if (*mp == NULL || error != 0) /* filter may consume */
break;
- if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+ if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL)
error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp,
dir, NULL);
break;
@@ -3307,13 +3301,13 @@ ipfwpass:
* Finally, put everything back the way it was and return
*/
if (snap) {
- M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+ M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
if (*mp == NULL)
return (error);
bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
}
- M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
if (*mp == NULL)
return (error);
bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
@@ -3375,7 +3369,7 @@ bridge_ip_checkbasic(struct mbuf **mp)
goto bad;
}
if (hlen > m->m_len) {
- if ((m = m_pullup(m, hlen)) == 0) {
+ if ((m = m_pullup(m, hlen)) == NULL) {
KMOD_IPSTAT_INC(ips_badhlen);
goto bad;
}
@@ -3499,8 +3493,8 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
goto out;
ip = mtod(m, struct ip *);
- error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
- CSUM_DELAY_IP);
+ m->m_pkthdr.csum_flags |= CSUM_IP;
+ error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
if (error)
goto out;
@@ -3508,7 +3502,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
for (m0 = m; m0; m0 = m0->m_nextpkt) {
if (error == 0) {
if (snap) {
- M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
+ M_PREPEND(m0, sizeof(struct llc), M_NOWAIT);
if (m0 == NULL) {
error = ENOBUFS;
continue;
@@ -3516,7 +3510,7 @@ bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
bcopy(llc, mtod(m0, caddr_t),
sizeof(struct llc));
}
- M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m0, ETHER_HDR_LEN, M_NOWAIT);
if (m0 == NULL) {
error = ENOBUFS;
continue;
diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c
index 0b752139..61ba9c6c 100644
--- a/freebsd/sys/net/if_clone.c
+++ b/freebsd/sys/net/if_clone.c
@@ -1,6 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1980, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -33,6 +34,7 @@
*/
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/limits.h>
#include <rtems/bsd/sys/lock.h>
@@ -43,29 +45,74 @@
#include <sys/socket.h>
#include <net/if.h>
-#include <net/if_clone.h>
-#if 0
-#include <net/if_dl.h>
-#endif
-#include <net/if_types.h>
#include <net/if_var.h>
+#include <net/if_clone.h>
#include <net/radix.h>
#include <net/route.h>
#include <net/vnet.h>
+/* Current IF_MAXUNIT expands maximum to 5 characters. */
+#define IFCLOSIZ (IFNAMSIZ - 5)
+
+/*
+ * Structure describing a `cloning' interface.
+ *
+ * List of locks
+ * (c) const until freeing
+ * (d) driver specific data, may need external protection.
+ * (e) locked by if_cloners_mtx
+ * (i) locked by ifc_mtx mtx
+ */
+struct if_clone {
+ char ifc_name[IFCLOSIZ]; /* (c) Name of device, e.g. `gif' */
+ struct unrhdr *ifc_unrhdr; /* (c) alloc_unr(9) header */
+ int ifc_maxunit; /* (c) maximum unit number */
+ long ifc_refcnt; /* (i) Reference count. */
+ LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */
+ struct mtx ifc_mtx; /* Mutex to protect members. */
+
+ enum { SIMPLE, ADVANCED } ifc_type; /* (c) */
+
+ /* (c) Driver specific cloning functions. Called with no locks held. */
+ union {
+ struct { /* advanced cloner */
+ ifc_match_t *_ifc_match;
+ ifc_create_t *_ifc_create;
+ ifc_destroy_t *_ifc_destroy;
+ } A;
+ struct { /* simple cloner */
+ ifcs_create_t *_ifcs_create;
+ ifcs_destroy_t *_ifcs_destroy;
+ int _ifcs_minifs; /* minimum ifs */
+
+ } S;
+ } U;
+#define ifc_match U.A._ifc_match
+#define ifc_create U.A._ifc_create
+#define ifc_destroy U.A._ifc_destroy
+#define ifcs_create U.S._ifcs_create
+#define ifcs_destroy U.S._ifcs_destroy
+#define ifcs_minifs U.S._ifcs_minifs
+
+ LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */
+};
+
static void if_clone_free(struct if_clone *ifc);
static int if_clone_createif(struct if_clone *ifc, char *name, size_t len,
caddr_t params);
-static struct mtx if_cloners_mtx;
+static int ifc_simple_match(struct if_clone *, const char *);
+static int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
+static int ifc_simple_destroy(struct if_clone *, struct ifnet *);
+
+static struct mtx if_cloners_mtx;
+MTX_SYSINIT(if_cloners_lock, &if_cloners_mtx, "if_cloners lock", MTX_DEF);
static VNET_DEFINE(int, if_cloners_count);
VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners);
#define V_if_cloners_count VNET(if_cloners_count)
#define V_if_cloners VNET(if_cloners)
-#define IF_CLONERS_LOCK_INIT() \
- mtx_init(&if_cloners_mtx, "if_cloners lock", NULL, MTX_DEF)
#define IF_CLONERS_LOCK_ASSERT() mtx_assert(&if_cloners_mtx, MA_OWNED)
#define IF_CLONERS_LOCK() mtx_lock(&if_cloners_mtx)
#define IF_CLONERS_UNLOCK() mtx_unlock(&if_cloners_mtx)
@@ -123,13 +170,6 @@ vnet_if_clone_init(void)
LIST_INIT(&V_if_cloners);
}
-void
-if_clone_init(void)
-{
-
- IF_CLONERS_LOCK_INIT();
-}
-
/*
* Lookup and create a clone network interface.
*/
@@ -140,18 +180,25 @@ if_clone_create(char *name, size_t len, caddr_t params)
/* Try to find an applicable cloner for this request */
IF_CLONERS_LOCK();
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name)) {
- break;
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
}
- }
#ifdef VIMAGE
if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
CURVNET_SET_QUIET(vnet0);
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name))
- break;
- }
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
CURVNET_RESTORE();
}
#endif
@@ -175,7 +222,10 @@ if_clone_createif(struct if_clone *ifc, char *name, size_t len, caddr_t params)
if (ifunit(name) != NULL)
return (EEXIST);
- err = (*ifc->ifc_create)(ifc, name, len, params);
+ if (ifc->ifc_type == SIMPLE)
+ err = ifc_simple_create(ifc, name, len, params);
+ else
+ err = (*ifc->ifc_create)(ifc, name, len, params);
if (!err) {
ifp = ifunit(name);
@@ -216,10 +266,14 @@ if_clone_destroy(const char *name)
#ifdef VIMAGE
if (ifc == NULL && !IS_DEFAULT_VNET(curvnet)) {
CURVNET_SET_QUIET(vnet0);
- LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
- if (ifc->ifc_match(ifc, name))
- break;
- }
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list)
+ if (ifc->ifc_type == SIMPLE) {
+ if (ifc_simple_match(ifc, name))
+ break;
+ } else {
+ if (ifc->ifc_match(ifc, name))
+ break;
+ }
CURVNET_RESTORE();
}
#endif
@@ -243,7 +297,7 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
int err;
struct ifnet *ifcifp;
- if (ifc->ifc_destroy == NULL)
+ if (ifc->ifc_type == ADVANCED && ifc->ifc_destroy == NULL)
return(EOPNOTSUPP);
/*
@@ -268,7 +322,10 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
if_delgroup(ifp, ifc->ifc_name);
- err = (*ifc->ifc_destroy)(ifc, ifp);
+ if (ifc->ifc_type == SIMPLE)
+ err = ifc_simple_destroy(ifc, ifp);
+ else
+ err = (*ifc->ifc_destroy)(ifc, ifp);
if (err != 0) {
if_addgroup(ifp, ifc->ifc_name);
@@ -281,36 +338,97 @@ if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp)
return (err);
}
-/*
- * Register a network interface cloner.
- */
-void
-if_clone_attach(struct if_clone *ifc)
+static struct if_clone *
+if_clone_alloc(const char *name, int maxunit)
{
- int len, maxclone;
+ struct if_clone *ifc;
- /*
- * Compute bitmap size and allocate it.
- */
- maxclone = ifc->ifc_maxunit + 1;
- len = maxclone >> 3;
- if ((len << 3) < maxclone)
- len++;
- ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO);
- ifc->ifc_bmlen = len;
+ KASSERT(name != NULL, ("%s: no name\n", __func__));
+
+ ifc = malloc(sizeof(struct if_clone), M_CLONE, M_WAITOK | M_ZERO);
+ strncpy(ifc->ifc_name, name, IFCLOSIZ-1);
IF_CLONE_LOCK_INIT(ifc);
IF_CLONE_ADDREF(ifc);
+ ifc->ifc_maxunit = maxunit ? maxunit : IF_MAXUNIT;
+ ifc->ifc_unrhdr = new_unrhdr(0, ifc->ifc_maxunit, &ifc->ifc_mtx);
+ LIST_INIT(&ifc->ifc_iflist);
+
+ return (ifc);
+}
+
+static int
+if_clone_attach(struct if_clone *ifc)
+{
+ struct if_clone *ifc1;
IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc1, &V_if_cloners, ifc_list)
+ if (strcmp(ifc->ifc_name, ifc1->ifc_name) == 0) {
+ IF_CLONERS_UNLOCK();
+ IF_CLONE_REMREF(ifc);
+ return (EEXIST);
+ }
LIST_INSERT_HEAD(&V_if_cloners, ifc, ifc_list);
V_if_cloners_count++;
IF_CLONERS_UNLOCK();
- LIST_INIT(&ifc->ifc_iflist);
+ return (0);
+}
+
+struct if_clone *
+if_clone_advanced(const char *name, u_int maxunit, ifc_match_t match,
+ ifc_create_t create, ifc_destroy_t destroy)
+{
+ struct if_clone *ifc;
+
+ ifc = if_clone_alloc(name, maxunit);
+ ifc->ifc_type = ADVANCED;
+ ifc->ifc_match = match;
+ ifc->ifc_create = create;
+ ifc->ifc_destroy = destroy;
+
+ if (if_clone_attach(ifc) != 0) {
+ if_clone_free(ifc);
+ return (NULL);
+ }
- if (ifc->ifc_attach != NULL)
- (*ifc->ifc_attach)(ifc);
EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+ return (ifc);
+}
+
+struct if_clone *
+if_clone_simple(const char *name, ifcs_create_t create, ifcs_destroy_t destroy,
+ u_int minifs)
+{
+ struct if_clone *ifc;
+ u_int unit;
+
+ ifc = if_clone_alloc(name, 0);
+ ifc->ifc_type = SIMPLE;
+ ifc->ifcs_create = create;
+ ifc->ifcs_destroy = destroy;
+ ifc->ifcs_minifs = minifs;
+
+ if (if_clone_attach(ifc) != 0) {
+ if_clone_free(ifc);
+ return (NULL);
+ }
+
+ for (unit = 0; unit < minifs; unit++) {
+ char name[IFNAMSIZ];
+ int error;
+
+ snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
+ error = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
+ KASSERT(error == 0,
+ ("%s: failed to create required interface %s",
+ __func__, name));
+ }
+
+ EVENTHANDLER_INVOKE(if_clone_event, ifc);
+
+ return (ifc);
}
/*
@@ -319,7 +437,6 @@ if_clone_attach(struct if_clone *ifc)
void
if_clone_detach(struct if_clone *ifc)
{
- struct ifc_simple_data *ifcs = ifc->ifc_data;
IF_CLONERS_LOCK();
LIST_REMOVE(ifc, ifc_list);
@@ -327,8 +444,8 @@ if_clone_detach(struct if_clone *ifc)
IF_CLONERS_UNLOCK();
/* Allow all simples to be destroyed */
- if (ifc->ifc_attach == ifc_simple_attach)
- ifcs->ifcs_minifs = 0;
+ if (ifc->ifc_type == SIMPLE)
+ ifc->ifcs_minifs = 0;
/* destroy all interfaces for this cloner */
while (!LIST_EMPTY(&ifc->ifc_iflist))
@@ -340,16 +457,13 @@ if_clone_detach(struct if_clone *ifc)
static void
if_clone_free(struct if_clone *ifc)
{
- for (int bytoff = 0; bytoff < ifc->ifc_bmlen; bytoff++) {
- KASSERT(ifc->ifc_units[bytoff] == 0x00,
- ("ifc_units[%d] is not empty", bytoff));
- }
KASSERT(LIST_EMPTY(&ifc->ifc_iflist),
("%s: ifc_iflist not empty", __func__));
IF_CLONE_LOCK_DESTROY(ifc);
- free(ifc->ifc_units, M_CLONE);
+ delete_unrhdr(ifc->ifc_unrhdr);
+ free(ifc, M_CLONE);
}
/*
@@ -372,7 +486,7 @@ if_clone_list(struct if_clonereq *ifcr)
* below, but that's not a major problem. Not caping our
* allocation to the number of cloners actually in the system
* could be because that would let arbitrary users cause us to
- * allocate abritrary amounts of kernel memory.
+ * allocate arbitrary amounts of kernel memory.
*/
buf_count = (V_if_cloners_count < ifcr->ifcr_count) ?
V_if_cloners_count : ifcr->ifcr_count;
@@ -406,6 +520,49 @@ done:
}
/*
+ * if_clone_findifc() looks up ifnet from the current
+ * cloner list, and returns ifc if found. Note that ifc_refcnt
+ * is incremented.
+ */
+struct if_clone *
+if_clone_findifc(struct ifnet *ifp)
+{
+ struct if_clone *ifc, *ifc0;
+ struct ifnet *ifcifp;
+
+ ifc0 = NULL;
+ IF_CLONERS_LOCK();
+ LIST_FOREACH(ifc, &V_if_cloners, ifc_list) {
+ IF_CLONE_LOCK(ifc);
+ LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) {
+ if (ifp == ifcifp) {
+ ifc0 = ifc;
+ IF_CLONE_ADDREF_LOCKED(ifc);
+ break;
+ }
+ }
+ IF_CLONE_UNLOCK(ifc);
+ if (ifc0 != NULL)
+ break;
+ }
+ IF_CLONERS_UNLOCK();
+
+ return (ifc0);
+}
+
+/*
+ * if_clone_addgroup() decrements ifc_refcnt because it is called after
+ * if_clone_findifc().
+ */
+void
+if_clone_addgroup(struct ifnet *ifp, struct if_clone *ifc)
+{
+
+ if_addgroup(ifp, ifc->ifc_name);
+ IF_CLONE_REMREF(ifc);
+}
+
+/*
* A utility function to extract unit numbers from interface names of
* the form name###.
*
@@ -443,98 +600,52 @@ ifc_name2unit(const char *name, int *unit)
int
ifc_alloc_unit(struct if_clone *ifc, int *unit)
{
- int wildcard, bytoff, bitoff;
- int err = 0;
-
- IF_CLONE_LOCK(ifc);
+ char name[IFNAMSIZ];
+ int wildcard;
- bytoff = bitoff = 0;
wildcard = (*unit < 0);
- /*
- * Find a free unit if none was given.
- */
- if (wildcard) {
- while ((bytoff < ifc->ifc_bmlen)
- && (ifc->ifc_units[bytoff] == 0xff))
- bytoff++;
- if (bytoff >= ifc->ifc_bmlen) {
- err = ENOSPC;
- goto done;
+retry:
+ if (*unit > ifc->ifc_maxunit)
+ return (ENOSPC);
+ if (*unit < 0) {
+ *unit = alloc_unr(ifc->ifc_unrhdr);
+ if (*unit == -1)
+ return (ENOSPC);
+ } else {
+ *unit = alloc_unr_specific(ifc->ifc_unrhdr, *unit);
+ if (*unit == -1) {
+ if (wildcard) {
+ (*unit)++;
+ goto retry;
+ } else
+ return (EEXIST);
}
- while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0)
- bitoff++;
- *unit = (bytoff << 3) + bitoff;
}
- if (*unit > ifc->ifc_maxunit) {
- err = ENOSPC;
- goto done;
+ snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, *unit);
+ if (ifunit(name) != NULL) {
+ free_unr(ifc->ifc_unrhdr, *unit);
+ if (wildcard) {
+ (*unit)++;
+ goto retry;
+ } else
+ return (EEXIST);
}
- if (!wildcard) {
- bytoff = *unit >> 3;
- bitoff = *unit - (bytoff << 3);
- }
-
- if((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0) {
- err = EEXIST;
- goto done;
- }
- /*
- * Allocate the unit in the bitmap.
- */
- KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0,
- ("%s: bit is already set", __func__));
- ifc->ifc_units[bytoff] |= (1 << bitoff);
- IF_CLONE_ADDREF_LOCKED(ifc);
+ IF_CLONE_ADDREF(ifc);
-done:
- IF_CLONE_UNLOCK(ifc);
- return (err);
+ return (0);
}
void
ifc_free_unit(struct if_clone *ifc, int unit)
{
- int bytoff, bitoff;
-
-
- /*
- * Compute offset in the bitmap and deallocate the unit.
- */
- bytoff = unit >> 3;
- bitoff = unit - (bytoff << 3);
- IF_CLONE_LOCK(ifc);
- KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0,
- ("%s: bit is already cleared", __func__));
- ifc->ifc_units[bytoff] &= ~(1 << bitoff);
- IF_CLONE_REMREF_LOCKED(ifc); /* releases lock */
-}
-
-void
-ifc_simple_attach(struct if_clone *ifc)
-{
- int err;
- int unit;
- char name[IFNAMSIZ];
- struct ifc_simple_data *ifcs = ifc->ifc_data;
-
- KASSERT(ifcs->ifcs_minifs - 1 <= ifc->ifc_maxunit,
- ("%s: %s requested more units than allowed (%d > %d)",
- __func__, ifc->ifc_name, ifcs->ifcs_minifs,
- ifc->ifc_maxunit + 1));
-
- for (unit = 0; unit < ifcs->ifcs_minifs; unit++) {
- snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
- err = if_clone_createif(ifc, name, IFNAMSIZ, NULL);
- KASSERT(err == 0,
- ("%s: failed to create required interface %s",
- __func__, name));
- }
+ free_unr(ifc->ifc_unrhdr, unit);
+ IF_CLONE_REMREF(ifc);
}
-int
+static int
ifc_simple_match(struct if_clone *ifc, const char *name)
{
const char *cp;
@@ -555,14 +666,13 @@ ifc_simple_match(struct if_clone *ifc, const char *name)
return (1);
}
-int
+static int
ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
{
char *dp;
int wildcard;
int unit;
int err;
- struct ifc_simple_data *ifcs = ifc->ifc_data;
err = ifc_name2unit(name, &unit);
if (err != 0)
@@ -574,7 +684,7 @@ ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
if (err != 0)
return (err);
- err = ifcs->ifcs_create(ifc, unit, params);
+ err = ifc->ifcs_create(ifc, unit, params);
if (err != 0) {
ifc_free_unit(ifc, unit);
return (err);
@@ -598,18 +708,17 @@ ifc_simple_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
return (0);
}
-int
+static int
ifc_simple_destroy(struct if_clone *ifc, struct ifnet *ifp)
{
int unit;
- struct ifc_simple_data *ifcs = ifc->ifc_data;
unit = ifp->if_dunit;
- if (unit < ifcs->ifcs_minifs)
+ if (unit < ifc->ifcs_minifs)
return (EINVAL);
- ifcs->ifcs_destroy(ifp);
+ ifc->ifcs_destroy(ifp);
ifc_free_unit(ifc, unit);
diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h
index f125f8b5..3a60b0a1 100644
--- a/freebsd/sys/net/if_clone.h
+++ b/freebsd/sys/net/if_clone.h
@@ -35,82 +35,45 @@
#ifdef _KERNEL
-#define IFC_CLONE_INITIALIZER(name, data, maxunit, \
- attach, match, create, destroy) \
- { { 0 }, name, maxunit, NULL, 0, data, attach, match, create, destroy }
-
-/*
- * Structure describing a `cloning' interface.
- *
- * List of locks
- * (c) const until freeing
- * (d) driver specific data, may need external protection.
- * (e) locked by if_cloners_mtx
- * (i) locked by ifc_mtx mtx
- */
-struct if_clone {
- LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */
- const char *ifc_name; /* (c) Name of device, e.g. `gif' */
- int ifc_maxunit; /* (c) Maximum unit number */
- unsigned char *ifc_units; /* (i) Bitmap to handle units. */
- /* Considered private, access */
- /* via ifc_(alloc|free)_unit(). */
- int ifc_bmlen; /* (c) Bitmap length. */
- void *ifc_data; /* (*) Data for ifc_* functions. */
-
- /* (c) Driver specific cloning functions. Called with no locks held. */
- void (*ifc_attach)(struct if_clone *);
- int (*ifc_match)(struct if_clone *, const char *);
- int (*ifc_create)(struct if_clone *, char *, size_t, caddr_t);
- int (*ifc_destroy)(struct if_clone *, struct ifnet *);
-
- long ifc_refcnt; /* (i) Refrence count. */
- struct mtx ifc_mtx; /* Muted to protect members. */
- LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */
-};
-
-void if_clone_init(void);
-void if_clone_attach(struct if_clone *);
+struct if_clone;
+
+/* Methods. */
+typedef int ifc_match_t(struct if_clone *, const char *);
+typedef int ifc_create_t(struct if_clone *, char *, size_t, caddr_t);
+typedef int ifc_destroy_t(struct if_clone *, struct ifnet *);
+
+typedef int ifcs_create_t(struct if_clone *, int, caddr_t);
+typedef void ifcs_destroy_t(struct ifnet *);
+
+/* Interface cloner (de)allocating functions. */
+struct if_clone *
+ if_clone_advanced(const char *, u_int, ifc_match_t, ifc_create_t,
+ ifc_destroy_t);
+struct if_clone *
+ if_clone_simple(const char *, ifcs_create_t, ifcs_destroy_t, u_int);
void if_clone_detach(struct if_clone *);
-void vnet_if_clone_init(void);
-
-int if_clone_create(char *, size_t, caddr_t);
-int if_clone_destroy(const char *);
-int if_clone_destroyif(struct if_clone *, struct ifnet *);
-int if_clone_list(struct if_clonereq *);
+/* Unit (de)allocating fucntions. */
int ifc_name2unit(const char *name, int *unit);
int ifc_alloc_unit(struct if_clone *, int *);
void ifc_free_unit(struct if_clone *, int);
-/*
- * The ifc_simple functions, structures, and macros implement basic
- * cloning as in 5.[012].
- */
-
-struct ifc_simple_data {
- int ifcs_minifs; /* minimum number of interfaces */
-
- int (*ifcs_create)(struct if_clone *, int, caddr_t);
- void (*ifcs_destroy)(struct ifnet *);
-};
-
-/* interface clone event */
+#ifdef _SYS_EVENTHANDLER_H_
+/* Interface clone event. */
typedef void (*if_clone_event_handler_t)(void *, struct if_clone *);
EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t);
+#endif
-#define IFC_SIMPLE_DECLARE(name, minifs) \
-struct ifc_simple_data name##_cloner_data = \
- {minifs, name##_clone_create, name##_clone_destroy}; \
-struct if_clone name##_cloner = \
- IFC_CLONE_INITIALIZER(#name, &name##_cloner_data, IF_MAXUNIT, \
- ifc_simple_attach, ifc_simple_match, ifc_simple_create, ifc_simple_destroy)
+/* The below interfaces used only by net/if.c. */
+void vnet_if_clone_init(void);
+int if_clone_create(char *, size_t, caddr_t);
+int if_clone_destroy(const char *);
+int if_clone_list(struct if_clonereq *);
+struct if_clone *if_clone_findifc(struct ifnet *);
+void if_clone_addgroup(struct ifnet *, struct if_clone *);
-void ifc_simple_attach(struct if_clone *);
-int ifc_simple_match(struct if_clone *, const char *);
-int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t);
-int ifc_simple_destroy(struct if_clone *, struct ifnet *);
+/* The below interface used only by epair(4). */
+int if_clone_destroyif(struct if_clone *, struct ifnet *);
#endif /* _KERNEL */
-
#endif /* !_NET_IF_CLONE_H_ */
diff --git a/freebsd/sys/net/if_dead.c b/freebsd/sys/net/if_dead.c
index b85793f8..e290823c 100644
--- a/freebsd/sys/net/if_dead.c
+++ b/freebsd/sys/net/if_dead.c
@@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
static int
-ifdead_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ifdead_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
struct route *ro)
{
@@ -95,6 +95,13 @@ ifdead_transmit(struct ifnet *ifp, struct mbuf *m)
return (ENXIO);
}
+static uint64_t
+ifdead_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+
+ return (0);
+}
+
void
if_dead(struct ifnet *ifp)
{
@@ -106,4 +113,5 @@ if_dead(struct ifnet *ifp)
ifp->if_resolvemulti = ifdead_resolvemulti;
ifp->if_qflush = ifdead_qflush;
ifp->if_transmit = ifdead_transmit;
+ ifp->if_get_counter = ifdead_get_counter;
}
diff --git a/freebsd/sys/net/if_disc.c b/freebsd/sys/net/if_disc.c
index 3d4f3159..a2e5a7e8 100644
--- a/freebsd/sys/net/if_disc.c
+++ b/freebsd/sys/net/if_disc.c
@@ -47,10 +47,12 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/bpf.h>
+#include <net/vnet.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -61,22 +63,21 @@
#define DSMTU 65532
#endif
-#define DISCNAME "disc"
-
struct disc_softc {
struct ifnet *sc_ifp;
};
static int discoutput(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
-static void discrtrequest(int, struct rtentry *, struct rt_addrinfo *);
+ const struct sockaddr *, struct route *);
static int discioctl(struct ifnet *, u_long, caddr_t);
static int disc_clone_create(struct if_clone *, int, caddr_t);
static void disc_clone_destroy(struct ifnet *);
-static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface");
+static const char discname[] = "disc";
+static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
-IFC_SIMPLE_DECLARE(disc, 0);
+static VNET_DEFINE(struct if_clone *, disc_cloner);
+#define V_disc_cloner VNET(disc_cloner)
static int
disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
@@ -92,7 +93,7 @@ disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
}
ifp->if_softc = sc;
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, discname, unit);
ifp->if_mtu = DSMTU;
/*
* IFF_LOOPBACK should not be removed from disc's flags because
@@ -131,16 +132,32 @@ disc_clone_destroy(struct ifnet *ifp)
free(sc, M_DISC);
}
+static void
+vnet_disc_init(const void *unused __unused)
+{
+
+ V_disc_cloner = if_clone_simple(discname, disc_clone_create,
+ disc_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_disc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_disc_init, NULL);
+
+static void
+vnet_disc_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_disc_cloner);
+}
+VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_disc_uninit, NULL);
+
static int
disc_modevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- if_clone_attach(&disc_cloner);
- break;
case MOD_UNLOAD:
- if_clone_detach(&disc_cloner);
break;
default:
return (EOPNOTSUPP);
@@ -157,7 +174,7 @@ static moduledata_t disc_mod = {
DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
static int
-discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
u_int32_t af;
@@ -165,62 +182,47 @@ discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
M_ASSERTPKTHDR(m);
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
- if (bpf_peers_present(ifp->if_bpf)) {
- u_int af = dst->sa_family;
+ if (bpf_peers_present(ifp->if_bpf))
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
+
m->m_pkthdr.rcvif = ifp;
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
m_freem(m);
return (0);
}
-/* ARGSUSED */
-static void
-discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = DSMTU;
-}
-
/*
* Process an ioctl request.
*/
static int
discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct ifaddr *ifa;
struct ifreq *ifr = (struct ifreq *)data;
int error = 0;
switch (cmd) {
-
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
- ifa = (struct ifaddr *)data;
- if (ifa != 0)
- ifa->ifa_rtrequest = discrtrequest;
+
/*
* Everything else is done at a higher level.
*/
break;
-
case SIOCADDMULTI:
case SIOCDELMULTI:
- if (ifr == 0) {
+ if (ifr == NULL) {
error = EAFNOSUPPORT; /* XXX */
break;
}
switch (ifr->ifr_addr.sa_family) {
-
#ifdef INET
case AF_INET:
break;
@@ -229,17 +231,14 @@ discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case AF_INET6:
break;
#endif
-
default:
error = EAFNOSUPPORT;
break;
}
break;
-
case SIOCSIFMTU:
ifp->if_mtu = ifr->ifr_mtu;
break;
-
default:
error = EINVAL;
}
diff --git a/freebsd/sys/net/if_dl.h b/freebsd/sys/net/if_dl.h
index 8d88623d..f53bc5e4 100644
--- a/freebsd/sys/net/if_dl.h
+++ b/freebsd/sys/net/if_dl.h
@@ -67,6 +67,14 @@ struct sockaddr_dl {
};
#define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define CLLADDR(s) ((c_caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+#define LLINDEX(s) ((s)->sdl_index)
+
+
+struct ifnet;
+struct sockaddr_dl *link_alloc_sdl(size_t, int);
+void link_free_sdl(struct sockaddr *sa);
+struct sockaddr_dl *link_init_sdl(struct ifnet *, struct sockaddr *, u_char);
#ifndef _KERNEL
diff --git a/freebsd/sys/net/if_edsc.c b/freebsd/sys/net/if_edsc.c
index 6bb80fdb..d90f072a 100644
--- a/freebsd/sys/net/if_edsc.c
+++ b/freebsd/sys/net/if_edsc.c
@@ -48,10 +48,14 @@
#include <net/bpf.h> /* bpf(9) */
#include <net/ethernet.h> /* Ethernet related constants and types */
-#include <net/if.h> /* basic part of ifnet(9) */
+#include <net/if.h>
+#include <net/if_var.h> /* basic part of ifnet(9) */
#include <net/if_clone.h> /* network interface cloning */
#include <net/if_types.h> /* IFT_ETHER and friends */
#include <net/if_var.h> /* kernel-only part of ifnet(9) */
+#include <net/vnet.h>
+
+static const char edscname[] = "edsc";
/*
* Software configuration of an interface specific to this device type.
@@ -66,9 +70,10 @@ struct edsc_softc {
};
/*
- * Simple cloning methods.
- * IFC_SIMPLE_DECLARE() expects precisely these names.
+ * Attach to the interface cloning framework.
*/
+static VNET_DEFINE(struct if_clone *, edsc_cloner);
+#define V_edsc_cloner VNET(edsc_cloner)
static int edsc_clone_create(struct if_clone *, int, caddr_t);
static void edsc_clone_destroy(struct ifnet *);
@@ -83,15 +88,7 @@ static void edsc_start(struct ifnet *ifp);
/*
* We'll allocate softc instances from this.
*/
-static MALLOC_DEFINE(M_EDSC, "edsc", "Ethernet discard interface");
-
-/*
- * Attach to the interface cloning framework under the name of "edsc".
- * The second argument is the number of units to be created from
- * the outset. It's also the minimum number of units allowed.
- * We don't want any units created as soon as the driver is loaded.
- */
-IFC_SIMPLE_DECLARE(edsc, 0);
+static MALLOC_DEFINE(M_EDSC, edscname, "Ethernet discard interface");
/*
* Create an interface instance.
@@ -118,7 +115,7 @@ edsc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
/*
* Get a name for this particular interface in its ifnet structure.
*/
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, edscname, unit);
/*
* Typical Ethernet interface flags: we can do broadcast and
@@ -298,8 +295,8 @@ edsc_start(struct ifnet *ifp)
/*
* Update the interface counters.
*/
- ifp->if_obytes += m->m_pkthdr.len;
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/*
* Finally, just drop the packet.
@@ -314,6 +311,36 @@ edsc_start(struct ifnet *ifp)
*/
}
+static void
+vnet_edsc_init(const void *unused __unused)
+{
+
+ /*
+ * Connect to the network interface cloning framework.
+ * The last argument is the number of units to be created
+ * from the outset. It's also the minimum number of units
+ * allowed. We don't want any units created as soon as the
+ * driver is loaded.
+ */
+ V_edsc_cloner = if_clone_simple(edscname, edsc_clone_create,
+ edsc_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_edsc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_edsc_init, NULL);
+
+static void
+vnet_edsc_uninit(const void *unused __unused)
+{
+
+ /*
+ * Disconnect from the cloning framework.
+ * Existing interfaces will be disposed of properly.
+ */
+ if_clone_detach(V_edsc_cloner);
+}
+VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_edsc_uninit, NULL);
+
/*
* This function provides handlers for module events, namely load and unload.
*/
@@ -323,20 +350,8 @@ edsc_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- /*
- * Connect to the network interface cloning framework.
- */
- if_clone_attach(&edsc_cloner);
- break;
-
case MOD_UNLOAD:
- /*
- * Disconnect from the cloning framework.
- * Existing interfaces will be disposed of properly.
- */
- if_clone_detach(&edsc_cloner);
break;
-
default:
/*
* There are other event types, but we don't handle them.
diff --git a/freebsd/sys/net/if_ef.c b/freebsd/sys/net/if_ef.c
deleted file mode 100644
index 4aa76712..00000000
--- a/freebsd/sys/net/if_ef.c
+++ /dev/null
@@ -1,610 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 1999, 2000 Boris Popov
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_ipx.h>
-#include <rtems/bsd/local/opt_ef.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/syslog.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-
-#include <net/ethernet.h>
-#include <net/if_llc.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/bpf.h>
-#include <net/vnet.h>
-
-#ifdef INET
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/if_ether.h>
-#endif
-
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
-/* If none of the supported layers is enabled explicitly enable them all */
-#if !defined(ETHER_II) && !defined(ETHER_8023) && !defined(ETHER_8022) && \
- !defined(ETHER_SNAP)
-#define ETHER_II 1
-#define ETHER_8023 1
-#define ETHER_8022 1
-#define ETHER_SNAP 1
-#endif
-
-/* internal frame types */
-#define ETHER_FT_EII 0 /* Ethernet_II - default */
-#define ETHER_FT_8023 1 /* 802.3 (Novell) */
-#define ETHER_FT_8022 2 /* 802.2 */
-#define ETHER_FT_SNAP 3 /* SNAP */
-#define EF_NFT 4 /* total number of frame types */
-
-#ifdef EF_DEBUG
-#define EFDEBUG(format, args...) printf("%s: "format, __func__ ,## args)
-#else
-#define EFDEBUG(format, args...)
-#endif
-
-#define EFERROR(format, args...) printf("%s: "format, __func__ ,## args)
-
-struct efnet {
- struct ifnet *ef_ifp;
- struct ifnet *ef_pifp;
- int ef_frametype;
-};
-
-struct ef_link {
- SLIST_ENTRY(ef_link) el_next;
- struct ifnet *el_ifp; /* raw device for this clones */
- struct efnet *el_units[EF_NFT]; /* our clones */
-};
-
-static SLIST_HEAD(ef_link_head, ef_link) efdev = {NULL};
-static int efcount;
-
-extern int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
-extern int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
-
-/*
-static void ef_reset (struct ifnet *);
-*/
-static int ef_attach(struct efnet *sc);
-static int ef_detach(struct efnet *sc);
-static void ef_init(void *);
-static int ef_ioctl(struct ifnet *, u_long, caddr_t);
-static void ef_start(struct ifnet *);
-static int ef_input(struct ifnet*, struct ether_header *, struct mbuf *);
-static int ef_output(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
-
-static int ef_load(void);
-static int ef_unload(void);
-
-/*
- * Install the interface, most of structure initialization done in ef_clone()
- */
-static int
-ef_attach(struct efnet *sc)
-{
- struct ifnet *ifp = sc->ef_ifp;
-
- ifp->if_start = ef_start;
- ifp->if_init = ef_init;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
- /*
- * Attach the interface
- */
- ether_ifattach(ifp, IF_LLADDR(sc->ef_pifp));
-
- ifp->if_resolvemulti = 0;
- ifp->if_type = IFT_XETHER;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
-
- EFDEBUG("%s: attached\n", ifp->if_xname);
- return 1;
-}
-
-/*
- * This is for _testing_only_, just removes interface from interfaces list
- */
-static int
-ef_detach(struct efnet *sc)
-{
- struct ifnet *ifp = sc->ef_ifp;
- int s;
-
- s = splimp();
-
- ether_ifdetach(ifp);
- if_free(ifp);
-
- splx(s);
- return 0;
-}
-
-static void
-ef_init(void *foo) {
- return;
-}
-
-static int
-ef_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct efnet *sc = ifp->if_softc;
- struct ifaddr *ifa = (struct ifaddr*)data;
- int s, error;
-
- EFDEBUG("IOCTL %ld for %s\n", cmd, ifp->if_xname);
- error = 0;
- s = splimp();
- switch (cmd) {
- case SIOCSIFFLAGS:
- error = 0;
- break;
- case SIOCSIFADDR:
- if (sc->ef_frametype == ETHER_FT_8023 &&
- ifa->ifa_addr->sa_family != AF_IPX) {
- error = EAFNOSUPPORT;
- break;
- }
- ifp->if_flags |= IFF_UP;
- /* FALL THROUGH */
- default:
- error = ether_ioctl(ifp, cmd, data);
- break;
- }
- splx(s);
- return error;
-}
-
-/*
- * Currently packet prepared in the ether_output(), but this can be a better
- * place.
- */
-static void
-ef_start(struct ifnet *ifp)
-{
- struct efnet *sc = (struct efnet*)ifp->if_softc;
- struct ifnet *p;
- struct mbuf *m;
- int error;
-
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- p = sc->ef_pifp;
-
- EFDEBUG("\n");
- for (;;) {
- IF_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
- BPF_MTAP(ifp, m);
- error = p->if_transmit(p, m);
- if (error) {
- ifp->if_oerrors++;
- continue;
- }
- ifp->if_opackets++;
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- return;
-}
-
-/*
- * Inline functions do not put additional overhead to procedure call or
- * parameter passing but simplify the code
- */
-static int __inline
-ef_inputEII(struct mbuf *m, struct ether_header *eh, u_short ether_type)
-{
- int isr;
-
- switch(ether_type) {
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef INET
- case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return (0);
- isr = NETISR_IP;
- break;
-
- case ETHERTYPE_ARP:
- isr = NETISR_ARP;
- break;
-#endif
- default:
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-static int __inline
-ef_inputSNAP(struct mbuf *m, struct ether_header *eh, struct llc* l,
- u_short ether_type)
-{
- int isr;
-
- switch(ether_type) {
-#ifdef IPX
- case ETHERTYPE_IPX:
- m_adj(m, 8);
- isr = NETISR_IPX;
- break;
-#endif
- default:
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-static int __inline
-ef_input8022(struct mbuf *m, struct ether_header *eh, struct llc* l,
- u_short ether_type)
-{
- int isr;
-
- switch(ether_type) {
-#ifdef IPX
- case 0xe0:
- m_adj(m, 3);
- isr = NETISR_IPX;
- break;
-#endif
- default:
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-/*
- * Called from ether_input()
- */
-static int
-ef_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
-{
- u_short ether_type;
- int ft = -1;
- struct efnet *efp;
- struct ifnet *eifp;
- struct llc *l;
- struct ef_link *efl;
- int isr;
-
- ether_type = ntohs(eh->ether_type);
- l = NULL;
- if (ether_type < ETHERMTU) {
- l = mtod(m, struct llc*);
- if (l->llc_dsap == 0xff && l->llc_ssap == 0xff) {
- /*
- * Novell's "802.3" frame
- */
- ft = ETHER_FT_8023;
- } else if (l->llc_dsap == 0xaa && l->llc_ssap == 0xaa) {
- /*
- * 802.2/SNAP
- */
- ft = ETHER_FT_SNAP;
- ether_type = ntohs(l->llc_un.type_snap.ether_type);
- } else if (l->llc_dsap == l->llc_ssap) {
- /*
- * 802.3/802.2
- */
- ft = ETHER_FT_8022;
- ether_type = l->llc_ssap;
- }
- } else
- ft = ETHER_FT_EII;
-
- if (ft == -1) {
- EFDEBUG("Unrecognised ether_type %x\n", ether_type);
- return EPROTONOSUPPORT;
- }
-
- /*
- * Check if interface configured for the given frame
- */
- efp = NULL;
- SLIST_FOREACH(efl, &efdev, el_next) {
- if (efl->el_ifp == ifp) {
- efp = efl->el_units[ft];
- break;
- }
- }
- if (efp == NULL) {
- EFDEBUG("Can't find if for %d\n", ft);
- return EPROTONOSUPPORT;
- }
- eifp = efp->ef_ifp;
- if ((eifp->if_flags & IFF_UP) == 0)
- return EPROTONOSUPPORT;
- eifp->if_ibytes += m->m_pkthdr.len + sizeof (*eh);
- m->m_pkthdr.rcvif = eifp;
-
- BPF_MTAP2(eifp, eh, ETHER_HDR_LEN, m);
- /*
- * Now we ready to adjust mbufs and pass them to protocol intr's
- */
- switch(ft) {
- case ETHER_FT_EII:
- return (ef_inputEII(m, eh, ether_type));
-#ifdef IPX
- case ETHER_FT_8023: /* only IPX can be here */
- isr = NETISR_IPX;
- break;
-#endif
- case ETHER_FT_SNAP:
- return (ef_inputSNAP(m, eh, l, ether_type));
- case ETHER_FT_8022:
- return (ef_input8022(m, eh, l, ether_type));
- default:
- EFDEBUG("No support for frame %d and proto %04x\n",
- ft, ether_type);
- return (EPROTONOSUPPORT);
- }
- netisr_dispatch(isr, m);
- return (0);
-}
-
-static int
-ef_output(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, short *tp,
- int *hlen)
-{
- struct efnet *sc = (struct efnet*)ifp->if_softc;
- struct mbuf *m = *mp;
- u_char *cp;
- short type;
-
- if (ifp->if_type != IFT_XETHER)
- return ENETDOWN;
- switch (sc->ef_frametype) {
- case ETHER_FT_EII:
-#ifdef IPX
- type = htons(ETHERTYPE_IPX);
-#else
- return EPFNOSUPPORT;
-#endif
- break;
- case ETHER_FT_8023:
- type = htons(m->m_pkthdr.len);
- break;
- case ETHER_FT_8022:
- M_PREPEND(m, ETHER_HDR_LEN + 3, M_WAIT);
- /*
- * Ensure that ethernet header and next three bytes
- * will fit into single mbuf
- */
- m = m_pullup(m, ETHER_HDR_LEN + 3);
- if (m == NULL) {
- *mp = NULL;
- return ENOBUFS;
- }
- m_adj(m, ETHER_HDR_LEN);
- type = htons(m->m_pkthdr.len);
- cp = mtod(m, u_char *);
- *cp++ = 0xE0;
- *cp++ = 0xE0;
- *cp++ = 0x03;
- *hlen += 3;
- break;
- case ETHER_FT_SNAP:
- M_PREPEND(m, 8, M_WAIT);
- type = htons(m->m_pkthdr.len);
- cp = mtod(m, u_char *);
- bcopy("\xAA\xAA\x03\x00\x00\x00\x81\x37", cp, 8);
- *hlen += 8;
- break;
- default:
- return EPFNOSUPPORT;
- }
- *mp = m;
- *tp = type;
- return 0;
-}
-
-/*
- * Create clone from the given interface
- */
-static int
-ef_clone(struct ef_link *efl, int ft)
-{
- struct efnet *efp;
- struct ifnet *eifp;
- struct ifnet *ifp = efl->el_ifp;
-
- efp = (struct efnet*)malloc(sizeof(struct efnet), M_IFADDR,
- M_WAITOK | M_ZERO);
- if (efp == NULL)
- return ENOMEM;
- efp->ef_pifp = ifp;
- efp->ef_frametype = ft;
- eifp = efp->ef_ifp = if_alloc(IFT_ETHER);
- if (eifp == NULL) {
- free(efp, M_IFADDR);
- return (ENOSPC);
- }
- snprintf(eifp->if_xname, IFNAMSIZ,
- "%sf%d", ifp->if_xname, efp->ef_frametype);
- eifp->if_dname = "ef";
- eifp->if_dunit = IF_DUNIT_NONE;
- eifp->if_softc = efp;
- if (ifp->if_ioctl)
- eifp->if_ioctl = ef_ioctl;
- efl->el_units[ft] = efp;
- return 0;
-}
-
-static int
-ef_load(void)
-{
- VNET_ITERATOR_DECL(vnet_iter);
- struct ifnet *ifp;
- struct efnet *efp;
- struct ef_link *efl = NULL, *efl_temp;
- int error = 0, d;
-
- VNET_LIST_RLOCK();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
-
- /*
- * XXXRW: The following loop walks the ifnet list while
- * modifying it, something not well-supported by ifnet
- * locking. To avoid lock upgrade/recursion issues, manually
- * acquire a write lock of ifnet_sxlock here, rather than a
- * read lock, so that when if_alloc() recurses the lock, we
- * don't panic. This structure, in which if_ef automatically
- * attaches to all ethernet interfaces, should be replaced
- * with a model like that found in if_vlan, in which
- * interfaces are explicitly configured, which would avoid
- * this (and other) problems.
- */
- sx_xlock(&ifnet_sxlock);
- TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- if (ifp->if_type != IFT_ETHER) continue;
- EFDEBUG("Found interface %s\n", ifp->if_xname);
- efl = (struct ef_link*)malloc(sizeof(struct ef_link),
- M_IFADDR, M_WAITOK | M_ZERO);
- if (efl == NULL) {
- error = ENOMEM;
- break;
- }
-
- efl->el_ifp = ifp;
-#ifdef ETHER_II
- error = ef_clone(efl, ETHER_FT_EII);
- if (error) break;
-#endif
-#ifdef ETHER_8023
- error = ef_clone(efl, ETHER_FT_8023);
- if (error) break;
-#endif
-#ifdef ETHER_8022
- error = ef_clone(efl, ETHER_FT_8022);
- if (error) break;
-#endif
-#ifdef ETHER_SNAP
- error = ef_clone(efl, ETHER_FT_SNAP);
- if (error) break;
-#endif
- efcount++;
- SLIST_INSERT_HEAD(&efdev, efl, el_next);
- }
- sx_xunlock(&ifnet_sxlock);
- CURVNET_RESTORE();
- }
- VNET_LIST_RUNLOCK();
- if (error) {
- if (efl)
- SLIST_INSERT_HEAD(&efdev, efl, el_next);
- SLIST_FOREACH_SAFE(efl, &efdev, el_next, efl_temp) {
- for (d = 0; d < EF_NFT; d++)
- if (efl->el_units[d]) {
- if (efl->el_units[d]->ef_pifp != NULL)
- if_free(efl->el_units[d]->ef_pifp);
- free(efl->el_units[d], M_IFADDR);
- }
- free(efl, M_IFADDR);
- }
- return error;
- }
- SLIST_FOREACH(efl, &efdev, el_next) {
- for (d = 0; d < EF_NFT; d++) {
- efp = efl->el_units[d];
- if (efp)
- ef_attach(efp);
- }
- }
- ef_inputp = ef_input;
- ef_outputp = ef_output;
- EFDEBUG("Loaded\n");
- return 0;
-}
-
-static int
-ef_unload(void)
-{
- struct efnet *efp;
- struct ef_link *efl;
- int d;
-
- ef_inputp = NULL;
- ef_outputp = NULL;
- SLIST_FOREACH(efl, &efdev, el_next) {
- for (d = 0; d < EF_NFT; d++) {
- efp = efl->el_units[d];
- if (efp) {
- ef_detach(efp);
- }
- }
- }
- EFDEBUG("Unloaded\n");
- return 0;
-}
-
-static int
-if_ef_modevent(module_t mod, int type, void *data)
-{
- switch ((modeventtype_t)type) {
- case MOD_LOAD:
- return ef_load();
- case MOD_UNLOAD:
- return ef_unload();
- default:
- return EOPNOTSUPP;
- }
- return 0;
-}
-
-static moduledata_t if_ef_mod = {
- "if_ef", if_ef_modevent, NULL
-};
-
-DECLARE_MODULE(if_ef, if_ef_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c
index 91d34722..d0d065b8 100644
--- a/freebsd/sys/net/if_enc.c
+++ b/freebsd/sys/net/if_enc.c
@@ -2,6 +2,7 @@
/*-
* Copyright (c) 2006 The FreeBSD Project.
+ * Copyright (c) 2015 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,10 +32,10 @@
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_enc.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/hhook.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -46,6 +47,8 @@
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_enc.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/pfil.h>
@@ -80,56 +83,66 @@ struct enchdr {
u_int32_t spi;
u_int32_t flags;
};
-
-struct ifnet *encif;
-static struct mtx enc_mtx;
-
struct enc_softc {
struct ifnet *sc_ifp;
};
+static VNET_DEFINE(struct enc_softc *, enc_sc);
+#define V_enc_sc VNET(enc_sc)
+static VNET_DEFINE(struct if_clone *, enc_cloner);
+#define V_enc_cloner VNET(enc_cloner)
static int enc_ioctl(struct ifnet *, u_long, caddr_t);
-static int enc_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+static int enc_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
static int enc_clone_create(struct if_clone *, int, caddr_t);
static void enc_clone_destroy(struct ifnet *);
+static int enc_add_hhooks(struct enc_softc *);
+static void enc_remove_hhooks(struct enc_softc *);
-IFC_SIMPLE_DECLARE(enc, 1);
-
-/*
- * Sysctls.
- */
+static const char encname[] = "enc";
/*
* Before and after are relative to when we are stripping the
* outer IP header.
*/
-static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
+static VNET_DEFINE(int, filter_mask_in) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, bpf_mask_in) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, filter_mask_out) = IPSEC_ENC_BEFORE;
+static VNET_DEFINE(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER;
+#define V_filter_mask_in VNET(filter_mask_in)
+#define V_bpf_mask_in VNET(bpf_mask_in)
+#define V_filter_mask_out VNET(filter_mask_out)
+#define V_bpf_mask_out VNET(bpf_mask_out)
+static SYSCTL_NODE(_net, OID_AUTO, enc, CTLFLAG_RW, 0, "enc sysctl");
static SYSCTL_NODE(_net_enc, OID_AUTO, in, CTLFLAG_RW, 0, "enc input sysctl");
-static int ipsec_filter_mask_in = ENC_BEFORE;
-SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
- &ipsec_filter_mask_in, 0, "IPsec input firewall filter mask");
-static int ipsec_bpf_mask_in = ENC_BEFORE;
-SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
- &ipsec_bpf_mask_in, 0, "IPsec input bpf mask");
-
static SYSCTL_NODE(_net_enc, OID_AUTO, out, CTLFLAG_RW, 0, "enc output sysctl");
-static int ipsec_filter_mask_out = ENC_BEFORE;
-SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask, CTLFLAG_RW,
- &ipsec_filter_mask_out, 0, "IPsec output firewall filter mask");
-static int ipsec_bpf_mask_out = ENC_BEFORE|ENC_AFTER;
-SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask, CTLFLAG_RW,
- &ipsec_bpf_mask_out, 0, "IPsec output bpf mask");
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_filter_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_in), 0,
+ "IPsec input firewall filter mask");
+SYSCTL_INT(_net_enc_in, OID_AUTO, ipsec_bpf_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_in), 0,
+ "IPsec input bpf mask");
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_filter_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(filter_mask_out), 0,
+ "IPsec output firewall filter mask");
+SYSCTL_INT(_net_enc_out, OID_AUTO, ipsec_bpf_mask,
+ CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(bpf_mask_out), 0,
+ "IPsec output bpf mask");
static void
enc_clone_destroy(struct ifnet *ifp)
{
- KASSERT(ifp != encif, ("%s: destroying encif", __func__));
+ struct enc_softc *sc;
+
+ sc = ifp->if_softc;
+ KASSERT(sc == V_enc_sc, ("sc != ifp->if_softc"));
bpfdetach(ifp);
if_detach(ifp);
if_free(ifp);
+ free(sc, M_DEVBUF);
+ V_enc_sc = NULL;
}
static int
@@ -138,244 +151,277 @@ enc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct ifnet *ifp;
struct enc_softc *sc;
- sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ sc = malloc(sizeof(struct enc_softc), M_DEVBUF,
+ M_WAITOK | M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ENC);
if (ifp == NULL) {
free(sc, M_DEVBUF);
return (ENOSPC);
}
-
- if_initname(ifp, ifc->ifc_name, unit);
+ if (V_enc_sc != NULL) {
+ if_free(ifp);
+ free(sc, M_DEVBUF);
+ return (EEXIST);
+ }
+ V_enc_sc = sc;
+ if_initname(ifp, encname, unit);
ifp->if_mtu = ENCMTU;
ifp->if_ioctl = enc_ioctl;
ifp->if_output = enc_output;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_softc = sc;
if_attach(ifp);
bpfattach(ifp, DLT_ENC, sizeof(struct enchdr));
-
- mtx_lock(&enc_mtx);
- /* grab a pointer to enc0, ignore the rest */
- if (encif == NULL)
- encif = ifp;
- mtx_unlock(&enc_mtx);
-
return (0);
}
static int
-enc_modevent(module_t mod, int type, void *data)
+enc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
- switch (type) {
- case MOD_LOAD:
- mtx_init(&enc_mtx, "enc mtx", NULL, MTX_DEF);
- if_clone_attach(&enc_cloner);
- break;
- case MOD_UNLOAD:
- printf("enc module unload - not possible for this module\n");
- return (EINVAL);
- default:
- return (EOPNOTSUPP);
- }
+
+ m_freem(m);
return (0);
}
-static moduledata_t enc_mod = {
- "if_enc",
- enc_modevent,
- 0
-};
-
-DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
-
static int
-enc_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- m_freem(m);
+
+ if (cmd != SIOCSIFFLAGS)
+ return (EINVAL);
+ if (ifp->if_flags & IFF_UP)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ else
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
return (0);
}
/*
- * Process an ioctl request.
+ * One helper hook function is used by any hook points.
+ * + from hhook_type we can determine the packet direction:
+ * HHOOK_TYPE_IPSEC_IN or HHOOK_TYPE_IPSEC_OUT;
+ * + from hhook_id we can determine address family: AF_INET or AF_INET6;
+ * + udata contains pointer to enc_softc;
+ * + ctx_data contains pointer to struct ipsec_ctx_data.
*/
-/* ARGSUSED */
static int
-enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data,
+ void *hdata, struct osd *hosd)
{
- int error = 0;
-
- mtx_lock(&enc_mtx);
+ struct enchdr hdr;
+ struct ipsec_ctx_data *ctx;
+ struct enc_softc *sc;
+ struct ifnet *ifp, *rcvif;
+ struct pfil_head *ph;
+ int pdir;
- switch (cmd) {
+ sc = (struct enc_softc *)udata;
+ ifp = sc->sc_ifp;
+ if ((ifp->if_flags & IFF_UP) == 0)
+ return (0);
- case SIOCSIFFLAGS:
- if (ifp->if_flags & IFF_UP)
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ ctx = (struct ipsec_ctx_data *)ctx_data;
+ /* XXX: wrong hook point was used by caller? */
+ if (ctx->af != hhook_id)
+ return (EPFNOSUPPORT);
+
+ if (((hhook_type == HHOOK_TYPE_IPSEC_IN &&
+ (ctx->enc & V_bpf_mask_in) != 0) ||
+ (hhook_type == HHOOK_TYPE_IPSEC_OUT &&
+ (ctx->enc & V_bpf_mask_out) != 0)) &&
+ bpf_peers_present(ifp->if_bpf) != 0) {
+ hdr.af = ctx->af;
+ hdr.spi = ctx->sav->spi;
+ hdr.flags = 0;
+ if (ctx->sav->alg_enc != SADB_EALG_NONE)
+ hdr.flags |= M_CONF;
+ if (ctx->sav->alg_auth != SADB_AALG_NONE)
+ hdr.flags |= M_AUTH;
+ bpf_mtap2(ifp->if_bpf, &hdr, sizeof(hdr), *ctx->mp);
+ }
+ switch (hhook_type) {
+ case HHOOK_TYPE_IPSEC_IN:
+ if (ctx->enc == IPSEC_ENC_BEFORE) {
+ /* Do accounting only once */
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES,
+ (*ctx->mp)->m_pkthdr.len);
+ }
+ if ((ctx->enc & V_filter_mask_in) == 0)
+ return (0); /* skip pfil processing */
+ pdir = PFIL_IN;
+ break;
+ case HHOOK_TYPE_IPSEC_OUT:
+ if (ctx->enc == IPSEC_ENC_BEFORE) {
+ /* Do accounting only once */
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES,
+ (*ctx->mp)->m_pkthdr.len);
+ }
+ if ((ctx->enc & V_filter_mask_out) == 0)
+ return (0); /* skip pfil processing */
+ pdir = PFIL_OUT;
break;
-
default:
- error = EINVAL;
+ return (EINVAL);
}
- mtx_unlock(&enc_mtx);
- return (error);
+ switch (hhook_id) {
+#ifdef INET
+ case AF_INET:
+ ph = &V_inet_pfil_hook;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ph = &V_inet6_pfil_hook;
+ break;
+#endif
+ default:
+ ph = NULL;
+ }
+ if (ph == NULL || !PFIL_HOOKED(ph))
+ return (0);
+ /* Make a packet looks like it was received on enc(4) */
+ rcvif = (*ctx->mp)->m_pkthdr.rcvif;
+ (*ctx->mp)->m_pkthdr.rcvif = ifp;
+ if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, NULL) != 0 ||
+ *ctx->mp == NULL) {
+ *ctx->mp = NULL; /* consumed by filter */
+ return (EACCES);
+ }
+ (*ctx->mp)->m_pkthdr.rcvif = rcvif;
+ return (0);
}
-int
-ipsec_filter(struct mbuf **mp, int dir, int flags)
+static int
+enc_add_hhooks(struct enc_softc *sc)
{
- int error, i;
- struct ip *ip;
-
- KASSERT(encif != NULL, ("%s: encif is null", __func__));
- KASSERT(flags & (ENC_IN|ENC_OUT),
- ("%s: invalid flags: %04x", __func__, flags));
-
- if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
- return (0);
+ struct hookinfo hki;
+ int error;
- if (flags & ENC_IN) {
- if ((flags & ipsec_filter_mask_in) == 0)
- return (0);
- } else {
- if ((flags & ipsec_filter_mask_out) == 0)
- return (0);
- }
-
- /* Skip pfil(9) if no filters are loaded */
- if (1
+ error = EPFNOSUPPORT;
+ hki.hook_func = enc_hhook;
+ hki.hook_helper = NULL;
+ hki.hook_udata = sc;
#ifdef INET
- && !PFIL_HOOKED(&V_inet_pfil_hook)
+ hki.hook_id = AF_INET;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
#endif
#ifdef INET6
- && !PFIL_HOOKED(&V_inet6_pfil_hook)
+ hki.hook_id = AF_INET6;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ error = hhook_add_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ error = hhook_add_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
+ &hki, HHOOK_WAITOK);
+ if (error != 0)
+ return (error);
#endif
- ) {
- return (0);
- }
+ return (error);
+}
- i = min((*mp)->m_pkthdr.len, max_protohdr);
- if ((*mp)->m_len < i) {
- *mp = m_pullup(*mp, i);
- if (*mp == NULL) {
- printf("%s: m_pullup failed\n", __func__);
- return (-1);
- }
- }
+static void
+enc_remove_hhooks(struct enc_softc *sc)
+{
+ struct hookinfo hki;
- error = 0;
- ip = mtod(*mp, struct ip *);
- switch (ip->ip_v) {
+ hki.hook_func = enc_hhook;
+ hki.hook_helper = NULL;
+ hki.hook_udata = sc;
#ifdef INET
- case 4:
- /*
- * before calling the firewall, swap fields the same as
- * IP does. here we assume the header is contiguous
- */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
- error = pfil_run_hooks(&V_inet_pfil_hook, mp,
- encif, dir, NULL);
-
- if (*mp == NULL || error != 0)
- break;
-
- /* restore byte ordering */
- ip = mtod(*mp, struct ip *);
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
- break;
+ hki.hook_id = AF_INET;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET], &hki);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET], &hki);
#endif
#ifdef INET6
- case 6:
- error = pfil_run_hooks(&V_inet6_pfil_hook, mp,
- encif, dir, NULL);
- break;
+ hki.hook_id = AF_INET6;
+ hki.hook_type = HHOOK_TYPE_IPSEC_IN;
+ hhook_remove_hook(V_ipsec_hhh_in[HHOOK_IPSEC_INET6], &hki);
+ hki.hook_type = HHOOK_TYPE_IPSEC_OUT;
+ hhook_remove_hook(V_ipsec_hhh_out[HHOOK_IPSEC_INET6], &hki);
#endif
- default:
- printf("%s: unknown IP version\n", __func__);
- }
+}
- /*
- * If the mbuf was consumed by the filter for requeueing (dummynet, etc)
- * then error will be zero but we still want to return an error to our
- * caller so the null mbuf isn't forwarded further.
- */
- if (*mp == NULL && error == 0)
- return (-1); /* Consumed by the filter */
- if (*mp == NULL)
- return (error);
- if (error != 0)
- goto bad;
+static void
+vnet_enc_init(const void *unused __unused)
+{
- return (error);
+ V_enc_sc = NULL;
+ V_enc_cloner = if_clone_simple(encname, enc_clone_create,
+ enc_clone_destroy, 1);
+}
+VNET_SYSINIT(vnet_enc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_enc_init, NULL);
-bad:
- m_freem(*mp);
- *mp = NULL;
- return (error);
+static void
+vnet_enc_init_proto(void *unused __unused)
+{
+ KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
+
+ if (enc_add_hhooks(V_enc_sc) != 0)
+ enc_clone_destroy(V_enc_sc->sc_ifp);
}
+VNET_SYSINIT(vnet_enc_init_proto, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_enc_init_proto, NULL);
-void
-ipsec_bpf(struct mbuf *m, struct secasvar *sav, int af, int flags)
+static void
+vnet_enc_uninit(const void *unused __unused)
{
- int mflags;
- struct enchdr hdr;
+ KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
- KASSERT(encif != NULL, ("%s: encif is null", __func__));
- KASSERT(flags & (ENC_IN|ENC_OUT),
- ("%s: invalid flags: %04x", __func__, flags));
+ if_clone_detach(V_enc_cloner);
+}
+VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_enc_uninit, NULL);
- if ((encif->if_drv_flags & IFF_DRV_RUNNING) == 0)
- return;
+/*
+ * The hhook consumer needs to go before ip[6]_destroy are called on
+ * SI_ORDER_THIRD.
+ */
+static void
+vnet_enc_uninit_hhook(const void *unused __unused)
+{
+ KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc));
- if (flags & ENC_IN) {
- if ((flags & ipsec_bpf_mask_in) == 0)
- return;
- } else {
- if ((flags & ipsec_bpf_mask_out) == 0)
- return;
- }
+ enc_remove_hhooks(V_enc_sc);
+}
+VNET_SYSUNINIT(vnet_enc_uninit_hhook, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
+ vnet_enc_uninit_hhook, NULL);
- if (bpf_peers_present(encif->if_bpf)) {
- mflags = 0;
- hdr.spi = 0;
- if (!sav) {
- struct m_tag *mtag;
- mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- if (mtag != NULL) {
- struct tdb_ident *tdbi;
- tdbi = (struct tdb_ident *) (mtag + 1);
- if (tdbi->alg_enc != SADB_EALG_NONE)
- mflags |= M_CONF;
- if (tdbi->alg_auth != SADB_AALG_NONE)
- mflags |= M_AUTH;
- hdr.spi = tdbi->spi;
- }
- } else {
- if (sav->alg_enc != SADB_EALG_NONE)
- mflags |= M_CONF;
- if (sav->alg_auth != SADB_AALG_NONE)
- mflags |= M_AUTH;
- hdr.spi = sav->spi;
- }
+static int
+enc_modevent(module_t mod, int type, void *data)
+{
- /*
- * We need to prepend the address family as a four byte
- * field. Cons up a dummy header to pacify bpf. This
- * is safe because bpf will only read from the mbuf
- * (i.e., it won't try to free it or keep a pointer a
- * to it).
- */
- hdr.af = af;
- /* hdr.spi already set above */
- hdr.flags = mflags;
-
- bpf_mtap2(encif->if_bpf, &hdr, sizeof(hdr), m);
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
}
+ return (0);
}
+
+static moduledata_t enc_mod = {
+ "if_enc",
+ enc_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/net/if_enc.h b/freebsd/sys/net/if_enc.h
index 59a55fcf..941ed12a 100644
--- a/freebsd/sys/net/if_enc.h
+++ b/freebsd/sys/net/if_enc.h
@@ -30,6 +30,13 @@
#ifndef _NET_IF_ENC_H
#define _NET_IF_ENC_H
-extern struct ifnet *encif;
+struct ipsec_ctx_data {
+ struct mbuf **mp;
+ struct secasvar *sav;
+ uint8_t af;
+#define IPSEC_ENC_BEFORE 0x01
+#define IPSEC_ENC_AFTER 0x02
+ uint8_t enc;
+};
#endif /* _NET_IF_ENC_H */
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index 755e608a..b4f73d68 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/refcount.h>
@@ -67,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_media.h>
#include <net/if_var.h>
@@ -74,8 +76,6 @@ __FBSDID("$FreeBSD$");
#include <net/netisr.h>
#include <net/vnet.h>
-#define EPAIRNAME "epair"
-
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl");
@@ -102,9 +102,11 @@ static int epair_clone_match(struct if_clone *, const char *);
static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int epair_clone_destroy(struct if_clone *, struct ifnet *);
-/* Netisr realted definitions and sysctl. */
+static const char epairname[] = "epair";
+
+/* Netisr related definitions and sysctl. */
static struct netisr_handler epair_nh = {
- .nh_name = EPAIRNAME,
+ .nh_name = epairname,
.nh_proto = NETISR_EPAIR,
.nh_policy = NETISR_POLICY_CPU,
.nh_handler = epair_nh_sintr,
@@ -170,12 +172,11 @@ STAILQ_HEAD(eid_list, epair_ifp_drain);
#define EPAIR_REFCOUNT_ASSERT(a, p)
#endif
-static MALLOC_DEFINE(M_EPAIR, EPAIRNAME,
+static MALLOC_DEFINE(M_EPAIR, epairname,
"Pair of virtual cross-over connected Ethernet-like interfaces");
-static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER(
- EPAIRNAME, NULL, IF_MAXUNIT,
- NULL, epair_clone_match, epair_clone_create, epair_clone_destroy);
+static VNET_DEFINE(struct if_clone *, epair_cloner);
+#define V_epair_cloner VNET(epair_cloner)
/*
* DPCPU area and functions.
@@ -421,7 +422,7 @@ epair_start_locked(struct ifnet *ifp)
*/
if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(oifp->if_flags & IFF_UP) ==0) {
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
continue;
}
@@ -437,15 +438,15 @@ epair_start_locked(struct ifnet *ifp)
error = netisr_queue(NETISR_EPAIR, m);
CURVNET_RESTORE();
if (!error) {
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/* Someone else received the packet. */
- oifp->if_ipackets++;
+ if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
} else {
/* The packet was freed already. */
epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
(void) epair_add_ifp_for_draining(ifp);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
EPAIR_REFCOUNT_RELEASE(&sc->refcount);
EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
("%s: ifp=%p sc->refcount not >= 1: %d",
@@ -506,7 +507,7 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
oifp = sc->oifp;
if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(oifp->if_flags & IFF_UP) ==0) {
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
return (0);
}
@@ -515,17 +516,17 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname);
#ifdef ALTQ
- /* Support ALTQ via the clasic if_start() path. */
+ /* Support ALTQ via the classic if_start() path. */
IF_LOCK(&ifp->if_snd);
if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
if (error)
- ifp->if_snd.ifq_drops++;
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
IF_UNLOCK(&ifp->if_snd);
if (!error) {
- ifp->if_obytes += len;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
if (mflags & (M_BCAST|M_MCAST))
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0)
epair_start_locked(ifp);
@@ -559,22 +560,22 @@ epair_transmit_locked(struct ifnet *ifp, struct mbuf *m)
error = netisr_queue(NETISR_EPAIR, m);
CURVNET_RESTORE();
if (!error) {
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/*
* IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
* but as we bypass all this we have to duplicate
* the logic another time.
*/
- ifp->if_obytes += len;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
if (mflags & (M_BCAST|M_MCAST))
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
/* Someone else received the packet. */
- oifp->if_ipackets++;
+ if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
} else {
/* The packet was freed already. */
epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
EPAIR_REFCOUNT_RELEASE(&sc->refcount);
EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1,
("%s: ifp=%p sc->refcount not >= 1: %d",
@@ -694,10 +695,10 @@ epair_clone_match(struct if_clone *ifc, const char *name)
* - epair<n>
* but not the epair<n>[ab] versions.
*/
- if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0)
+ if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
return (0);
- for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) {
+ for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
if (*cp < '0' || *cp > '9')
return (0);
}
@@ -716,7 +717,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
/*
* We are abusing params to create our second interface.
- * Actually we already created it and called if_clone_createif()
+ * Actually we already created it and called if_clone_create()
* for it to do the official insertion procedure the moment we knew
* it cannot fail anymore. So just do attach it here.
*/
@@ -763,10 +764,17 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifc_free_unit(ifc, unit);
return (ENOSPC);
}
- *dp = 'a';
+ *dp = 'b';
/* Must not change dp so we can replace 'a' by 'b' later. */
*(dp+1) = '\0';
+ /* Check if 'a' and 'b' interfaces already exist. */
+ if (ifunit(name) != NULL)
+ return (EEXIST);
+ *dp = 'a';
+ if (ifunit(name) != NULL)
+ return (EEXIST);
+
/* Allocate memory for both [ab] interfaces */
sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
EPAIR_REFCOUNT_INIT(&sca->refcount, 1);
@@ -801,15 +809,23 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
* cache locality but we can at least allow parallelism.
*/
sca->cpuid =
- netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
+ netisr_get_cpuid(sca->ifp->if_index);
scb->cpuid =
- netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+ netisr_get_cpuid(scb->ifp->if_index);
+
+ /* Initialise pseudo media types. */
+ ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
+ ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
+ ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
+ ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
/* Finish initialization of interface <n>a. */
ifp = sca->ifp;
ifp->if_softc = sca;
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -827,7 +843,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
sca->if_qflush = ifp->if_qflush;
ifp->if_qflush = epair_qflush;
ifp->if_transmit = epair_transmit;
- ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+ ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
/* Swap the name and finish initialization of interface <n>b. */
*dp = 'b';
@@ -835,7 +851,7 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp = scb->ifp;
ifp->if_softc = scb;
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_capabilities = IFCAP_VLAN_MTU;
@@ -845,15 +861,15 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_init = epair_init;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
/* We need to play some tricks here for the second interface. */
- strlcpy(name, EPAIRNAME, len);
+ strlcpy(name, epairname, len);
error = if_clone_create(name, len, (caddr_t)scb);
if (error)
- panic("%s: if_clone_createif() for our 2nd iface failed: %d",
+ panic("%s: if_clone_create() for our 2nd iface failed: %d",
__func__, error);
scb->if_qflush = ifp->if_qflush;
ifp->if_qflush = epair_qflush;
ifp->if_transmit = epair_transmit;
- ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */
+ ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
/*
* Restore name to <n>a as the ifp for this will go into the
@@ -862,14 +878,6 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
strlcpy(name, sca->ifp->if_xname, len);
DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb);
- /* Initialise pseudo media types. */
- ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
- ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
- ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
- ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
- ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
- ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
-
/* Tell the world, that we are ready to rock. */
sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -947,6 +955,31 @@ epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
return (0);
}
+static void
+vnet_epair_init(const void *unused __unused)
+{
+
+ V_epair_cloner = if_clone_advanced(epairname, 0,
+ epair_clone_match, epair_clone_create, epair_clone_destroy);
+#ifdef VIMAGE
+ netisr_register_vnet(&epair_nh);
+#endif
+}
+VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
+ vnet_epair_init, NULL);
+
+static void
+vnet_epair_uninit(const void *unused __unused)
+{
+
+#ifdef VIMAGE
+ netisr_unregister_vnet(&epair_nh);
+#endif
+ if_clone_detach(V_epair_cloner);
+}
+VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_epair_uninit, NULL);
+
static int
epair_modevent(module_t mod, int type, void *data)
{
@@ -962,16 +995,14 @@ epair_modevent(module_t mod, int type, void *data)
epair_nh.nh_qlimit = qlimit;
#endif /* __rtems__ */
netisr_register(&epair_nh);
- if_clone_attach(&epair_cloner);
if (bootverbose)
- printf("%s initialized.\n", EPAIRNAME);
+ printf("%s initialized.\n", epairname);
break;
case MOD_UNLOAD:
- if_clone_detach(&epair_cloner);
netisr_unregister(&epair_nh);
epair_dpcpu_detach();
if (bootverbose)
- printf("%s unloaded.\n", EPAIRNAME);
+ printf("%s unloaded.\n", epairname);
break;
default:
return (EOPNOTSUPP);
@@ -985,5 +1016,5 @@ static moduledata_t epair_mod = {
0
};
-DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
MODULE_VERSION(if_epair, 1);
diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c
index 5ee2606e..1d22c0a6 100644
--- a/freebsd/sys/net/if_ethersubr.c
+++ b/freebsd/sys/net/if_ethersubr.c
@@ -32,12 +32,11 @@
* $FreeBSD$
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/local/opt_netgraph.h>
#include <rtems/bsd/local/opt_mbuf_profiling.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -47,12 +46,13 @@
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/random.h>
-#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
+#include <sys/uuid.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/netisr.h>
#include <net/route.h>
@@ -64,43 +64,22 @@
#include <net/if_bridgevar.h>
#include <net/if_vlan_var.h>
#include <net/if_llatbl.h>
-#include <net/pf_mtag.h>
+#include <net/pfil.h>
+#include <net/rss_config.h>
#include <net/vnet.h>
+#include <netpfil/pf/pf_mtag.h>
+
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
#include <netinet/ip_carp.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netpfil/ipfw/ip_fw_private.h>
#endif
#ifdef INET6
#include <netinet6/nd6.h>
#endif
-
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
-int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m);
-int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *dst, short *tp, int *hlen);
-
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
-
-#define llc_snap_org_code llc_un.type_snap.org_code
-#define llc_snap_ether_type llc_un.type_snap.ether_type
-
-extern u_char at_org_code[3];
-extern u_char aarp_org_code[3];
-#endif /* NETATALK */
-
#include <security/mac/mac_framework.h>
#ifdef CTASSERT
@@ -108,6 +87,8 @@ CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
#endif
+VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */
+
/* netgraph node hooks for ng_ether(4) */
void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
@@ -134,22 +115,160 @@ static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
#ifdef VIMAGE
static void ether_reassign(struct ifnet *, struct vnet *, char *);
#endif
+static int ether_requestencap(struct ifnet *, struct if_encap_req *);
-/* XXX: should be in an arp support file, not here */
-static MALLOC_DEFINE(M_ARPCOM, "arpcom", "802.* interface internals");
-
-#define ETHER_IS_BROADCAST(addr) \
- (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
#define senderr(e) do { error = (e); goto bad;} while (0)
+static void
+update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
+{
+ int csum_flags = 0;
+
+ if (src->m_pkthdr.csum_flags & CSUM_IP)
+ csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
+ if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
+ csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
+ if (src->m_pkthdr.csum_flags & CSUM_SCTP)
+ csum_flags |= CSUM_SCTP_VALID;
+ dst->m_pkthdr.csum_flags |= csum_flags;
+ if (csum_flags & CSUM_DATA_VALID)
+ dst->m_pkthdr.csum_data = 0xffff;
+}
+
+/*
+ * Handle link-layer encapsulation requests.
+ */
+static int
+ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
+{
+ struct ether_header *eh;
+ struct arphdr *ah;
+ uint16_t etype;
+ const u_char *lladdr;
+
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
+
+ if (req->bufsize < ETHER_HDR_LEN)
+ return (ENOMEM);
+
+ eh = (struct ether_header *)req->buf;
+ lladdr = req->lladdr;
+ req->lladdr_off = 0;
+
+ switch (req->family) {
+ case AF_INET:
+ etype = htons(ETHERTYPE_IP);
+ break;
+ case AF_INET6:
+ etype = htons(ETHERTYPE_IPV6);
+ break;
+ case AF_ARP:
+ ah = (struct arphdr *)req->hdata;
+ ah->ar_hrd = htons(ARPHRD_ETHER);
+
+ switch(ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ etype = htons(ETHERTYPE_REVARP);
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ etype = htons(ETHERTYPE_ARP);
+ break;
+ }
+
+ if (req->flags & IFENCAP_FLAG_BROADCAST)
+ lladdr = ifp->if_broadcastaddr;
+ break;
+ default:
+ return (EAFNOSUPPORT);
+ }
+
+ memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
+ memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ req->bufsize = sizeof(struct ether_header);
+
+ return (0);
+}
+
+
+static int
+ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro, u_char *phdr,
+ uint32_t *pflags, struct llentry **plle)
+{
+ struct ether_header *eh;
+ uint32_t lleflags = 0;
+ int error = 0;
#if defined(INET) || defined(INET6)
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared);
-static VNET_DEFINE(int, ether_ipfw);
-#define V_ether_ipfw VNET(ether_ipfw)
+ uint16_t etype;
+#endif
+
+ if (plle)
+ *plle = NULL;
+ eh = (struct ether_header *)phdr;
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
+ plle);
+ else {
+ if (m->m_flags & M_BCAST)
+ memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
+ ETHER_ADDR_LEN);
+ else {
+ const struct in_addr *a;
+ a = &(((const struct sockaddr_in *)dst)->sin_addr);
+ ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
+ }
+ etype = htons(ETHERTYPE_IP);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
+ break;
#endif
+#ifdef INET6
+ case AF_INET6:
+ if ((m->m_flags & M_MCAST) == 0)
+ error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
+ plle);
+ else {
+ const struct in6_addr *a6;
+ a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
+ ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
+ etype = htons(ETHERTYPE_IPV6);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
+ break;
+#endif
+ default:
+ if_printf(ifp, "can't handle af%d\n", dst->sa_family);
+ if (m != NULL)
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+
+ if (error == EHOSTDOWN) {
+ if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
+ error = EHOSTUNREACH;
+ }
+ if (error != 0)
+ return (error);
+
+ *pflags = RT_MAY_LOOP;
+ if (lleflags & LLE_IFADDR)
+ *pflags |= RT_L2_ME;
+
+ return (0);
+}
/*
* Ethernet output routine.
@@ -159,23 +278,49 @@ static VNET_DEFINE(int, ether_ipfw);
*/
int
ether_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+ const struct sockaddr *dst, struct route *ro)
{
- short type;
- int error = 0, hdrcmplt = 0;
- u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
- struct llentry *lle = NULL;
- struct rtentry *rt0 = NULL;
+ int error = 0;
+ char linkhdr[ETHER_HDR_LEN], *phdr;
struct ether_header *eh;
struct pf_mtag *t;
int loop_copy = 1;
int hlen; /* link layer header length */
+ uint32_t pflags;
+ struct llentry *lle = NULL;
+ struct rtentry *rt0 = NULL;
+ int addref = 0;
+ phdr = NULL;
+ pflags = 0;
if (ro != NULL) {
- if (!(m->m_flags & (M_BCAST | M_MCAST)))
- lle = ro->ro_lle;
+ /* XXX BPF uses ro_prepend */
+ if (ro->ro_prepend != NULL) {
+ phdr = ro->ro_prepend;
+ hlen = ro->ro_plen;
+ } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
+ if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
+ lle = ro->ro_lle;
+ if (lle != NULL &&
+ (lle->la_flags & LLE_VALID) == 0) {
+ LLE_FREE(lle);
+ lle = NULL; /* redundant */
+ ro->ro_lle = NULL;
+ }
+ if (lle == NULL) {
+ /* if we lookup, keep cache */
+ addref = 1;
+ }
+ }
+ if (lle != NULL) {
+ phdr = lle->r_linkdata;
+ hlen = lle->r_hdrlen;
+ pflags = lle->r_flags;
+ }
+ }
rt0 = ro->ro_rt;
}
+
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error)
@@ -189,153 +334,39 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
(ifp->if_drv_flags & IFF_DRV_RUNNING)))
senderr(ENETDOWN);
- hlen = ETHER_HDR_LEN;
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = arpresolve(ifp, rt0, m, dst, edst, &lle);
- if (error)
+ if (phdr == NULL) {
+ /* No prepend data supplied. Try to calculate ourselves. */
+ phdr = linkhdr;
+ hlen = ETHER_HDR_LEN;
+ error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
+ addref ? &lle : NULL);
+ if (addref && lle != NULL)
+ ro->ro_lle = lle;
+ if (error != 0)
return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IP);
- break;
- case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
- ah->ar_hrd = htons(ARPHRD_ETHER);
-
- loop_copy = 0; /* if this is for us, don't do it */
-
- switch(ntohs(ah->ar_op)) {
- case ARPOP_REVREQUEST:
- case ARPOP_REVREPLY:
- type = htons(ETHERTYPE_REVARP);
- break;
- case ARPOP_REQUEST:
- case ARPOP_REPLY:
- default:
- type = htons(ETHERTYPE_ARP);
- break;
- }
-
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
- else
- bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
-
- }
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
- if (error)
- return error;
- type = htons(ETHERTYPE_IPV6);
- break;
-#endif
-#ifdef IPX
- case AF_IPX:
- if (ef_outputp) {
- error = ef_outputp(ifp, &m, dst, &type, &hlen);
- if (error)
- goto bad;
- } else
- type = htons(ETHERTYPE_IPX);
- bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
- (caddr_t)edst, sizeof (edst));
- break;
-#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- {
- struct at_ifaddr *aa;
-
- if ((aa = at_ifawithnet((struct sockaddr_at *)dst)) == NULL)
- senderr(EHOSTUNREACH); /* XXX */
- if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
- ifa_free(&aa->aa_ifa);
- return (0);
- }
- /*
- * In the phase 2 case, need to prepend an mbuf for the llc header.
- */
- if ( aa->aa_flags & AFA_PHASE2 ) {
- struct llc llc;
-
- ifa_free(&aa->aa_ifa);
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
- if (m == NULL)
- senderr(ENOBUFS);
- llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
- llc.llc_control = LLC_UI;
- bcopy(at_org_code, llc.llc_snap_org_code, sizeof(at_org_code));
- llc.llc_snap_ether_type = htons( ETHERTYPE_AT );
- bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
- type = htons(m->m_pkthdr.len);
- hlen = LLC_SNAPFRAMELEN + ETHER_HDR_LEN;
- } else {
- ifa_free(&aa->aa_ifa);
- type = htons(ETHERTYPE_AT);
- }
- break;
- }
-#endif /* NETATALK */
-
- case pseudo_AF_HDRCMPLT:
- hdrcmplt = 1;
- eh = (struct ether_header *)dst->sa_data;
- (void)memcpy(esrc, eh->ether_shost, sizeof (esrc));
- /* FALLTHROUGH */
-
- case AF_UNSPEC:
- loop_copy = 0; /* if this is for us, don't do it */
- eh = (struct ether_header *)dst->sa_data;
- (void)memcpy(edst, eh->ether_dhost, sizeof (edst));
- type = eh->ether_type;
- break;
-
- default:
- if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- senderr(EAFNOSUPPORT);
}
- if (lle != NULL && (lle->la_flags & LLE_IFADDR)) {
- int csum_flags = 0;
- if (m->m_pkthdr.csum_flags & CSUM_IP)
- csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- csum_flags |= CSUM_SCTP_VALID;
- m->m_pkthdr.csum_flags |= csum_flags;
- m->m_pkthdr.csum_data = 0xffff;
+ if ((pflags & RT_L2_ME) != 0) {
+ update_mbuf_csumflags(m, m);
return (if_simloop(ifp, m, dst->sa_family, 0));
}
+ loop_copy = pflags & RT_MAY_LOOP;
/*
* Add local net header. If no space in first mbuf,
* allocate another.
+ *
+ * Note that we do prepend regardless of RT_HAS_HEADER flag.
+ * This is done because BPF code shifts m_data pointer
+ * to the end of ethernet header prior to calling if_output().
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL)
senderr(ENOBUFS);
- eh = mtod(m, struct ether_header *);
- (void)memcpy(&eh->ether_type, &type,
- sizeof(eh->ether_type));
- (void)memcpy(eh->ether_dhost, edst, sizeof (edst));
- if (hdrcmplt)
- (void)memcpy(eh->ether_shost, esrc,
- sizeof(eh->ether_shost));
- else
- (void)memcpy(eh->ether_shost, IF_LLADDR(ifp),
- sizeof(eh->ether_shost));
+ if ((pflags & RT_HAS_HEADER) == 0) {
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh, phdr, hlen);
+ }
/*
* If a simplex interface, and the packet is being sent to our
@@ -346,47 +377,27 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
* on the wire). However, we don't do that here for security
* reasons and compatibility with the original behavior.
*/
- if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
+ if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
((t = pf_find_mtag(m)) == NULL || !t->routed)) {
- int csum_flags = 0;
+ struct mbuf *n;
- if (m->m_pkthdr.csum_flags & CSUM_IP)
- csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- csum_flags |= CSUM_SCTP_VALID;
-
- if (m->m_flags & M_BCAST) {
- struct mbuf *n;
-
- /*
- * Because if_simloop() modifies the packet, we need a
- * writable copy through m_dup() instead of a readonly
- * one as m_copy[m] would give us. The alternative would
- * be to modify if_simloop() to handle the readonly mbuf,
- * but performancewise it is mostly equivalent (trading
- * extra data copying vs. extra locking).
- *
- * XXX This is a local workaround. A number of less
- * often used kernel parts suffer from the same bug.
- * See PR kern/105943 for a proposed general solution.
- */
- if ((n = m_dup(m, M_DONTWAIT)) != NULL) {
- n->m_pkthdr.csum_flags |= csum_flags;
- if (csum_flags & CSUM_DATA_VALID)
- n->m_pkthdr.csum_data = 0xffff;
- (void)if_simloop(ifp, n, dst->sa_family, hlen);
- } else
- ifp->if_iqdrops++;
- } else if (bcmp(eh->ether_dhost, eh->ether_shost,
- ETHER_ADDR_LEN) == 0) {
- m->m_pkthdr.csum_flags |= csum_flags;
- if (csum_flags & CSUM_DATA_VALID)
- m->m_pkthdr.csum_data = 0xffff;
- (void) if_simloop(ifp, m, dst->sa_family, hlen);
- return (0); /* XXX */
- }
+ /*
+ * Because if_simloop() modifies the packet, we need a
+ * writable copy through m_dup() instead of a readonly
+ * one as m_copy[m] would give us. The alternative would
+ * be to modify if_simloop() to handle the readonly mbuf,
+ * but performancewise it is mostly equivalent (trading
+ * extra data copying vs. extra locking).
+ *
+ * XXX This is a local workaround. A number of less
+ * often used kernel parts suffer from the same bug.
+ * See PR kern/105943 for a proposed general solution.
+ */
+ if ((n = m_dup(m, M_NOWAIT)) != NULL) {
+ update_mbuf_csumflags(m, n);
+ (void)if_simloop(ifp, n, dst->sa_family, hlen);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
}
/*
@@ -399,12 +410,12 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
#if defined(INET) || defined(INET6)
if (ifp->if_carp &&
- (error = (*carp_output_p)(ifp, m, dst, NULL)))
+ (error = (*carp_output_p)(ifp, m, dst)))
goto bad;
#endif
/* Handle ng_ether(4) processing, if any */
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_output_p != NULL,
("ng_ether_output_p is NULL"));
if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
@@ -429,18 +440,17 @@ bad: if (m != NULL)
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
-#if defined(INET) || defined(INET6)
+ int i;
- if (V_ip_fw_chk_ptr && V_ether_ipfw != 0) {
- if (ether_ipfw_chk(&m, ifp, 0) == 0) {
- if (m) {
- m_freem(m);
- return EACCES; /* pkt dropped */
- } else
- return 0; /* consumed e.g. in a pipe */
- }
+ if (PFIL_HOOKED(&V_link_pfil_hook)) {
+ i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
+
+ if (i != 0)
+ return (EACCES);
+
+ if (m == NULL)
+ return (0);
}
-#endif
/*
* Queue message on interface, update output statistics if
@@ -449,116 +459,6 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m)
return ((ifp->if_transmit)(ifp, m));
}
-#if defined(INET) || defined(INET6)
-/*
- * ipfw processing for ethernet packets (in and out).
- * The second parameter is NULL from ether_demux, and ifp from
- * ether_output_frame.
- */
-int
-ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared)
-{
- struct ether_header *eh;
- struct ether_header save_eh;
- struct mbuf *m;
- int i;
- struct ip_fw_args args;
- struct m_tag *mtag;
-
- /* fetch start point from rule, if any */
- mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
- if (mtag == NULL) {
- args.rule.slot = 0;
- } else {
- /* dummynet packet, already partially processed */
- struct ipfw_rule_ref *r;
-
- /* XXX can we free it after use ? */
- mtag->m_tag_id = PACKET_TAG_NONE;
- r = (struct ipfw_rule_ref *)(mtag + 1);
- if (r->info & IPFW_ONEPASS)
- return (1);
- args.rule = *r;
- }
-
- /*
- * I need some amt of data to be contiguous, and in case others need
- * the packet (shared==1) also better be in the first mbuf.
- */
- m = *m0;
- i = min( m->m_pkthdr.len, max_protohdr);
- if ( shared || m->m_len < i) {
- m = m_pullup(m, i);
- if (m == NULL) {
- *m0 = m;
- return 0;
- }
- }
- eh = mtod(m, struct ether_header *);
- save_eh = *eh; /* save copy for restore below */
- m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
-
- args.m = m; /* the packet we are looking at */
- args.oif = dst; /* destination, if any */
- args.next_hop = NULL; /* we do not support forward yet */
- args.next_hop6 = NULL; /* we do not support forward yet */
- args.eh = &save_eh; /* MAC header for bridged/MAC packets */
- args.inp = NULL; /* used by ipfw uid/gid/jail rules */
- i = V_ip_fw_chk_ptr(&args);
- m = args.m;
- if (m != NULL) {
- /*
- * Restore Ethernet header, as needed, in case the
- * mbuf chain was replaced by ipfw.
- */
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
- if (m == NULL) {
- *m0 = m;
- return 0;
- }
- if (eh != mtod(m, struct ether_header *))
- bcopy(&save_eh, mtod(m, struct ether_header *),
- ETHER_HDR_LEN);
- }
- *m0 = m;
-
- if (i == IP_FW_DENY) /* drop */
- return 0;
-
- KASSERT(m != NULL, ("ether_ipfw_chk: m is NULL"));
-
- if (i == IP_FW_PASS) /* a PASS rule. */
- return 1;
-
- if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) {
- int dir;
- /*
- * Pass the pkt to dummynet, which consumes it.
- * If shared, make a copy and keep the original.
- */
- if (shared) {
- m = m_copypacket(m, M_DONTWAIT);
- if (m == NULL)
- return 0;
- } else {
- /*
- * Pass the original to dummynet and
- * nothing back to the caller
- */
- *m0 = NULL ;
- }
- dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
- ip_dn_io_ptr(&m, dir, &args);
- return 0;
- }
- /*
- * XXX at some point add support for divert/forward actions.
- * If none of the above matches, we have to drop the pkt.
- */
- return 0;
-}
-#endif
-
/*
* Process a received Ethernet packet; the packet is in the
* mbuf chain m with the ethernet header at the front.
@@ -580,39 +480,18 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
return;
}
#endif
- /*
- * Do consistency checks to verify assumptions
- * made by code past this point.
- */
- if ((m->m_flags & M_PKTHDR) == 0) {
- if_printf(ifp, "discard frame w/o packet header\n");
- ifp->if_ierrors++;
- m_freem(m);
- return;
- }
if (m->m_len < ETHER_HDR_LEN) {
/* XXX maybe should pullup? */
if_printf(ifp, "discard frame w/o leading ethernet "
"header (len %u pkt len %u)\n",
m->m_len, m->m_pkthdr.len);
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
- if (m->m_pkthdr.rcvif == NULL) {
- if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
- m_freem(m);
- return;
- }
-#ifdef DIAGNOSTIC
- if (m->m_pkthdr.rcvif != ifp) {
- if_printf(ifp, "Warning, frame marked as received on %s\n",
- m->m_pkthdr.rcvif->if_xname);
- }
-#endif
+ random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_ETHER);
CURVNET_SET_QUIET(ifp->if_vnet);
@@ -621,7 +500,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
#ifdef MAC
@@ -647,7 +526,8 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
m->m_flags &= ~M_HASFCS;
}
- ifp->if_ibytes += m->m_pkthdr.len;
+ if (!(ifp->if_capenable & IFCAP_HWSTATS))
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
/* Allow monitor mode to claim this frame, after stats are updated. */
if (ifp->if_flags & IFF_MONITOR) {
@@ -683,8 +563,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
#ifdef DIAGNOSTIC
if_printf(ifp, "cannot pullup VLAN header\n");
#endif
- ifp->if_ierrors++;
- m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
CURVNET_RESTORE();
return;
}
@@ -702,7 +581,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
M_SETFIB(m, ifp->if_fib);
/* Allow ng_ether(4) to claim this frame. */
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_input_p != NULL,
("%s: ng_ether_input_p is NULL", __func__));
m->m_flags &= ~M_PROMISC;
@@ -757,22 +636,36 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
m->m_flags |= M_PROMISC;
}
- /* First chunk of an mbuf contains good entropy */
- if (harvest.ethernet)
- random_harvest(m, 16, 3, 0, RANDOM_NET);
-
ether_demux(ifp, m);
CURVNET_RESTORE();
}
/*
* Ethernet input dispatch; by default, direct dispatch here regardless of
- * global configuration.
+ * global configuration. However, if RSS is enabled, hook up RSS affinity
+ * so that when deferred or hybrid dispatch is enabled, we can redistribute
+ * load based on RSS.
+ *
+ * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
+ * not it had already done work distribution via multi-queue. Then we could
+ * direct dispatch in the event load balancing was already complete and
+ * handle the case of interfaces with different capabilities better.
+ *
+ * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
+ * at multiple layers?
+ *
+ * XXXRW: For now, enable all this only if RSS is compiled in, although it
+ * works fine without RSS. Need to characterise the performance overhead
+ * of the detour through the netisr code in the event the result is always
+ * direct dispatch.
*/
static void
ether_nh_input(struct mbuf *m)
{
+ M_ASSERTPKTHDR(m);
+ KASSERT(m->m_pkthdr.rcvif != NULL,
+ ("%s: NULL interface pointer", __func__));
ether_input_internal(m->m_pkthdr.rcvif, m);
}
@@ -780,8 +673,14 @@ static struct netisr_handler ether_nh = {
.nh_name = "ether",
.nh_handler = ether_nh_input,
.nh_proto = NETISR_ETHER,
+#ifdef RSS
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_DIRECT,
+ .nh_m2cpuid = rss_m2cpuid,
+#else
.nh_policy = NETISR_POLICY_SOURCE,
.nh_dispatch = NETISR_DISPATCH_DIRECT,
+#endif
};
static void
@@ -793,16 +692,74 @@ ether_init(__unused void *arg)
SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
static void
+vnet_ether_init(__unused void *arg)
+{
+ int i;
+
+ /* Initialize packet filter hooks. */
+ V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
+ V_link_pfil_hook.ph_af = AF_LINK;
+ if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to register pfil link hook, "
+ "error %d\n", __func__, i);
+#ifdef VIMAGE
+ netisr_register_vnet(&ether_nh);
+#endif
+}
+VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_ether_init, NULL);
+
+#ifdef VIMAGE
+static void
+vnet_ether_pfil_destroy(__unused void *arg)
+{
+ int i;
+
+ if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil link hook, "
+ "error %d\n", __func__, i);
+}
+VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
+ vnet_ether_pfil_destroy, NULL);
+
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+
+ netisr_unregister_vnet(&ether_nh);
+}
+VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_ether_destroy, NULL);
+#endif
+
+
+
+static void
ether_input(struct ifnet *ifp, struct mbuf *m)
{
+ struct mbuf *mn;
+
/*
- * We will rely on rcvif being set properly in the deferred context,
- * so assert it is correct here.
+ * The drivers are allowed to pass in a chain of packets linked with
+ * m_nextpkt. We split them up into separate packets here and pass
+ * them up. This allows the drivers to amortize the receive lock.
*/
- KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+ while (m) {
+ mn = m->m_nextpkt;
+ m->m_nextpkt = NULL;
- netisr_dispatch(NETISR_ETHER, m);
+ /*
+ * We will rely on rcvif being set properly in the deferred context,
+ * so assert it is correct here.
+ */
+ KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
+ "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
+ CURVNET_SET_QUIET(ifp->if_vnet);
+ netisr_dispatch(NETISR_ETHER, m);
+ CURVNET_RESTORE();
+ m = mn;
+ }
}
/*
@@ -812,27 +769,19 @@ void
ether_demux(struct ifnet *ifp, struct mbuf *m)
{
struct ether_header *eh;
- int isr;
+ int i, isr;
u_short ether_type;
-#if defined(NETATALK)
- struct llc *l;
-#endif
KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
-#if defined(INET) || defined(INET6)
- /*
- * Allow dummynet and/or ipfw to claim the frame.
- * Do not do this for PROMISC frames in case we are re-entered.
- */
- if (V_ip_fw_chk_ptr && V_ether_ipfw != 0 && !(m->m_flags & M_PROMISC)) {
- if (ether_ipfw_chk(&m, NULL, 0) == 0) {
- if (m)
- m_freem(m); /* dropped; free mbuf chain */
- return; /* consumed */
- }
+ /* Do not grab PROMISC frames in case we are re-entered. */
+ if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
+ i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
+
+ if (i != 0 || m == NULL)
+ return;
}
-#endif
+
eh = mtod(m, struct ether_header *);
ether_type = ntohs(eh->ether_type);
@@ -843,7 +792,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
if ((m->m_flags & M_VLANTAG) &&
EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
if (ifp->if_vlantrunk == NULL) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
m_freem(m);
return;
}
@@ -869,7 +818,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
* Strip off Ethernet header.
*/
m->m_flags &= ~M_VLANTAG;
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m_adj(m, ETHER_HDR_LEN);
/*
@@ -878,8 +827,6 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
switch (ether_type) {
#ifdef INET
case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -892,54 +839,12 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
isr = NETISR_ARP;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
- return;
- isr = NETISR_IPX;
- break;
-#endif
#ifdef INET6
case ETHERTYPE_IPV6:
isr = NETISR_IPV6;
break;
#endif
-#ifdef NETATALK
- case ETHERTYPE_AT:
- isr = NETISR_ATALK1;
- break;
- case ETHERTYPE_AARP:
- isr = NETISR_AARP;
- break;
-#endif /* NETATALK */
default:
-#ifdef IPX
- if (ef_inputp && ef_inputp(ifp, eh, m) == 0)
- return;
-#endif /* IPX */
-#if defined(NETATALK)
- if (ether_type > ETHERMTU)
- goto discard;
- l = mtod(m, struct llc *);
- if (l->llc_dsap == LLC_SNAP_LSAP &&
- l->llc_ssap == LLC_SNAP_LSAP &&
- l->llc_control == LLC_UI) {
- if (bcmp(&(l->llc_snap_org_code)[0], at_org_code,
- sizeof(at_org_code)) == 0 &&
- ntohs(l->llc_snap_ether_type) == ETHERTYPE_AT) {
- m_adj(m, LLC_SNAPFRAMELEN);
- isr = NETISR_ATALK2;
- break;
- }
- if (bcmp(&(l->llc_snap_org_code)[0], aarp_org_code,
- sizeof(aarp_org_code)) == 0 &&
- ntohs(l->llc_snap_ether_type) == ETHERTYPE_AARP) {
- m_adj(m, LLC_SNAPFRAMELEN);
- isr = NETISR_AARP;
- break;
- }
- }
-#endif /* NETATALK */
goto discard;
}
netisr_dispatch(isr, m);
@@ -951,14 +856,14 @@ discard:
* hand the packet to it for last chance processing;
* otherwise dispose of it.
*/
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_input_orphan_p != NULL,
("ng_ether_input_orphan_p is NULL"));
/*
* Put back the ethernet header so netgraph has a
* consistent view of inbound packets.
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
+ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
(*ng_ether_input_orphan_p)(ifp, m);
return;
}
@@ -998,6 +903,7 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
ifp->if_output = ether_output;
ifp->if_input = ether_input;
ifp->if_resolvemulti = ether_resolvemulti;
+ ifp->if_requestencap = ether_requestencap;
#ifdef VIMAGE
ifp->if_reassign = ether_reassign;
#endif
@@ -1022,6 +928,8 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
break;
if (i != ifp->if_addrlen)
if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
+
+ uuid_ether_add(LLADDR(sdl));
}
/*
@@ -1030,7 +938,12 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
void
ether_ifdetach(struct ifnet *ifp)
{
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ struct sockaddr_dl *sdl;
+
+ sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
+ uuid_ether_del(LLADDR(sdl));
+
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_detach_p != NULL,
("ng_ether_detach_p is NULL"));
(*ng_ether_detach_p)(ifp);
@@ -1045,7 +958,7 @@ void
ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
{
- if (IFP2AC(ifp)->ac_netgraph != NULL) {
+ if (ifp->if_l2com != NULL) {
KASSERT(ng_ether_detach_p != NULL,
("ng_ether_detach_p is NULL"));
(*ng_ether_detach_p)(ifp);
@@ -1061,10 +974,6 @@ ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
SYSCTL_DECL(_net_link);
SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
-#if defined(INET) || defined(INET6)
-SYSCTL_VNET_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
- &VNET_NAME(ether_ipfw), 0, "Pass ether pkts through firewall");
-#endif
#if 0
/*
@@ -1158,31 +1067,6 @@ ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
arp_ifinit(ifp, ifa);
break;
#endif
-#ifdef IPX
- /*
- * XXX - This code is probably wrong
- */
- case AF_IPX:
- {
- struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina))
- ina->x_host =
- *(union ipx_host *)
- IF_LLADDR(ifp);
- else {
- bcopy((caddr_t) ina->x_host.c_host,
- (caddr_t) IF_LLADDR(ifp),
- ETHER_ADDR_LEN);
- }
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- break;
- }
-#endif
default:
ifp->if_init(ifp->if_softc);
break;
@@ -1238,7 +1122,7 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
e_addr = LLADDR(sdl);
if (!ETHER_IS_MULTICAST(e_addr))
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#ifdef INET
@@ -1246,14 +1130,7 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return ENOMEM;
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ETHER;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ETHER_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
@@ -1270,19 +1147,12 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return 0;
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return EADDRNOTAVAIL;
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ETHER;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
sdl->sdl_alen = ETHER_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
@@ -1299,46 +1169,8 @@ ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
}
}
-static void*
-ether_alloc(u_char type, struct ifnet *ifp)
-{
- struct arpcom *ac;
-
- ac = malloc(sizeof(struct arpcom), M_ARPCOM, M_WAITOK | M_ZERO);
- ac->ac_ifp = ifp;
-
- return (ac);
-}
-
-static void
-ether_free(void *com, u_char type)
-{
-
- free(com, M_ARPCOM);
-}
-
-static int
-ether_modevent(module_t mod, int type, void *data)
-{
-
- switch (type) {
- case MOD_LOAD:
- if_register_com_alloc(IFT_ETHER, ether_alloc, ether_free);
- break;
- case MOD_UNLOAD:
- if_deregister_com_alloc(IFT_ETHER);
- break;
- default:
- return EOPNOTSUPP;
- }
-
- return (0);
-}
-
static moduledata_t ether_mod = {
- "ether",
- ether_modevent,
- 0
+ .name = "ether",
};
void
@@ -1386,7 +1218,7 @@ ether_vlanencap(struct mbuf *m, uint16_t tag)
{
struct ether_vlan_header *evl;
- M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
+ M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
if (m == NULL)
return (NULL);
/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
diff --git a/freebsd/sys/net/if_faith.c b/freebsd/sys/net/if_faith.c
deleted file mode 100644
index cf4a7fba..00000000
--- a/freebsd/sys/net/if_faith.c
+++ /dev/null
@@ -1,353 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/* $KAME: if_faith.c,v 1.23 2001/12/17 13:55:29 sumikawa Exp $ */
-
-/*-
- * Copyright (c) 1982, 1986, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-/*
- * derived from
- * @(#)if_loop.c 8.1 (Berkeley) 6/10/93
- * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp
- */
-
-/*
- * Loopback interface driver for protocol testing and timing.
- */
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/socket.h>
-#include <rtems/bsd/sys/errno.h>
-#include <sys/sockio.h>
-#include <sys/time.h>
-#include <sys/queue.h>
-#include <sys/types.h>
-#include <sys/malloc.h>
-
-#include <net/if.h>
-#include <net/if_clone.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/bpf.h>
-#include <net/vnet.h>
-
-#ifdef INET
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#endif
-
-#ifdef INET6
-#ifndef INET
-#include <netinet/in.h>
-#endif
-#include <netinet6/in6_var.h>
-#include <netinet/ip6.h>
-#include <netinet6/ip6_var.h>
-#endif
-
-#define FAITHNAME "faith"
-
-struct faith_softc {
- struct ifnet *sc_ifp;
-};
-
-static int faithioctl(struct ifnet *, u_long, caddr_t);
-int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
-static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *);
-#ifdef INET6
-static int faithprefix(struct in6_addr *);
-#endif
-
-static int faithmodevent(module_t, int, void *);
-
-static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface");
-
-static int faith_clone_create(struct if_clone *, int, caddr_t);
-static void faith_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(faith, 0);
-
-#define FAITHMTU 1500
-
-static int
-faithmodevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
-{
-
- switch (type) {
- case MOD_LOAD:
- if_clone_attach(&faith_cloner);
-
-#ifdef INET6
- faithprefix_p = faithprefix;
-#endif
-
- break;
- case MOD_UNLOAD:
-#ifdef INET6
- faithprefix_p = NULL;
-#endif
-
- if_clone_detach(&faith_cloner);
- break;
- default:
- return EOPNOTSUPP;
- }
- return 0;
-}
-
-static moduledata_t faith_mod = {
- "if_faith",
- faithmodevent,
- 0
-};
-
-DECLARE_MODULE(if_faith, faith_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(if_faith, 1);
-
-static int
-faith_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
-{
- struct ifnet *ifp;
- struct faith_softc *sc;
-
- sc = malloc(sizeof(struct faith_softc), M_FAITH, M_WAITOK | M_ZERO);
- ifp = sc->sc_ifp = if_alloc(IFT_FAITH);
- if (ifp == NULL) {
- free(sc, M_FAITH);
- return (ENOSPC);
- }
-
- ifp->if_softc = sc;
- if_initname(sc->sc_ifp, ifc->ifc_name, unit);
-
- ifp->if_mtu = FAITHMTU;
- /* Change to BROADCAST experimentaly to announce its prefix. */
- ifp->if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST;
- ifp->if_ioctl = faithioctl;
- ifp->if_output = faithoutput;
- ifp->if_hdrlen = 0;
- ifp->if_addrlen = 0;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- if_attach(ifp);
- bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
- return (0);
-}
-
-static void
-faith_clone_destroy(ifp)
- struct ifnet *ifp;
-{
- struct faith_softc *sc = ifp->if_softc;
-
- bpfdetach(ifp);
- if_detach(ifp);
- if_free(ifp);
- free(sc, M_FAITH);
-}
-
-int
-faithoutput(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
-{
- int isr;
- u_int32_t af;
- struct rtentry *rt = NULL;
-
- M_ASSERTPKTHDR(m);
-
- if (ro != NULL)
- rt = ro->ro_rt;
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- if (bpf_peers_present(ifp->if_bpf)) {
- af = dst->sa_family;
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
-
- if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- m_freem(m);
- return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
- rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- }
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- isr = NETISR_IP;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- /* XXX do we need more sanity checks? */
-
- m->m_pkthdr.rcvif = ifp;
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
- netisr_dispatch(isr, m);
- return (0);
-}
-
-/* ARGSUSED */
-static void
-faithrtrequest(cmd, rt, info)
- int cmd;
- struct rtentry *rt;
- struct rt_addrinfo *info;
-{
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
-}
-
-/*
- * Process an ioctl request.
- */
-/* ARGSUSED */
-static int
-faithioctl(ifp, cmd, data)
- struct ifnet *ifp;
- u_long cmd;
- caddr_t data;
-{
- struct ifaddr *ifa;
- struct ifreq *ifr = (struct ifreq *)data;
- int error = 0;
-
- switch (cmd) {
-
- case SIOCSIFADDR:
- ifp->if_flags |= IFF_UP;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifa = (struct ifaddr *)data;
- ifa->ifa_rtrequest = faithrtrequest;
- /*
- * Everything else is done at a higher level.
- */
- break;
-
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- if (ifr == 0) {
- error = EAFNOSUPPORT; /* XXX */
- break;
- }
- switch (ifr->ifr_addr.sa_family) {
-#ifdef INET
- case AF_INET:
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- break;
-#endif
-
- default:
- error = EAFNOSUPPORT;
- break;
- }
- break;
-
-#ifdef SIOCSIFMTU
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- break;
-#endif
-
- case SIOCSIFFLAGS:
- break;
-
- default:
- error = EINVAL;
- }
- return (error);
-}
-
-#ifdef INET6
-/*
- * XXX could be slow
- * XXX could be layer violation to call sys/net from sys/netinet6
- */
-static int
-faithprefix(in6)
- struct in6_addr *in6;
-{
- struct rtentry *rt;
- struct sockaddr_in6 sin6;
- int ret;
-
- if (V_ip6_keepfaith == 0)
- return 0;
-
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_addr = *in6;
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB);
- if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH &&
- (rt->rt_ifp->if_flags & IFF_UP) != 0)
- ret = 1;
- else
- ret = 0;
- if (rt)
- RTFREE_LOCKED(rt);
- return ret;
-}
-#endif
diff --git a/freebsd/sys/net/if_fddisubr.c b/freebsd/sys/net/if_fddisubr.c
index 7a7fb471..9df882ec 100644
--- a/freebsd/sys/net/if_fddisubr.c
+++ b/freebsd/sys/net/if_fddisubr.c
@@ -38,10 +38,8 @@
* $FreeBSD$
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -53,6 +51,7 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_llc.h>
#include <net/if_types.h>
@@ -73,24 +72,10 @@
#include <netinet6/nd6.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#ifdef DECNET
#include <netdnet/dn.h>
#endif
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
-
-extern u_char at_org_code[ 3 ];
-extern u_char aarp_org_code[ 3 ];
-#endif /* NETATALK */
-
#include <security/mac/mac_framework.h>
static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
@@ -98,7 +83,7 @@ static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
struct sockaddr *);
-static int fddi_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static void fddi_input(struct ifnet *ifp, struct mbuf *m);
@@ -109,21 +94,17 @@ static void fddi_input(struct ifnet *ifp, struct mbuf *m);
* Encapsulate a packet of type family for the local net.
* Use trailer local net encapsulation if enough data in first
* packet leaves a multiple of 512 bytes of data in remainder.
- * Assumes that ifp is actually pointer to arpcom structure.
*/
static int
-fddi_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
u_int16_t type;
int loop_copy = 0, error = 0, hdrcmplt = 0;
u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
struct fddi_header *fh;
#if defined(INET) || defined(INET6)
- struct llentry *lle;
+ int is_gw = 0;
#endif
#ifdef MAC
@@ -139,14 +120,15 @@ fddi_output(ifp, m, dst, ro)
senderr(ENETDOWN);
getmicrotime(&ifp->if_lastchange);
+#if defined(INET) || defined(INET6)
+ if (ro != NULL)
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
+
switch (dst->sa_family) {
#ifdef INET
case AF_INET: {
- struct rtentry *rt0 = NULL;
-
- if (ro != NULL)
- rt0 = ro->ro_rt;
- error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+ error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IP);
@@ -182,68 +164,29 @@ fddi_output(ifp, m, dst, ro)
#endif /* INET */
#ifdef INET6
case AF_INET6:
- error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+ error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
- return (error); /* Something bad happened */
+ return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IPV6);
break;
#endif /* INET6 */
-#ifdef IPX
- case AF_IPX:
- type = htons(ETHERTYPE_IPX);
- bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
- (caddr_t)edst, FDDI_ADDR_LEN);
- break;
-#endif /* IPX */
-#ifdef NETATALK
- case AF_APPLETALK: {
- struct at_ifaddr *aa;
- if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst))
- return (0);
- /*
- * ifaddr is the first thing in at_ifaddr
- */
- if ((aa = at_ifawithnet( (struct sockaddr_at *)dst)) == 0)
- goto bad;
-
- /*
- * In the phase 2 case, we need to prepend an mbuf for the llc header.
- * Since we must preserve the value of m, which is passed to us by
- * value, we m_copy() the first mbuf, and use it for our llc header.
- */
- if (aa->aa_flags & AFA_PHASE2) {
- struct llc llc;
-
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_WAIT);
- llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
- llc.llc_control = LLC_UI;
- bcopy(at_org_code, llc.llc_snap.org_code, sizeof(at_org_code));
- llc.llc_snap.ether_type = htons(ETHERTYPE_AT);
- bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
- type = 0;
- } else {
- type = htons(ETHERTYPE_AT);
- }
- ifa_free(&aa->aa_ifa);
- break;
- }
-#endif /* NETATALK */
-
case pseudo_AF_HDRCMPLT:
{
- struct ether_header *eh;
+ const struct ether_header *eh;
+
hdrcmplt = 1;
- eh = (struct ether_header *)dst->sa_data;
- bcopy((caddr_t)eh->ether_shost, (caddr_t)esrc, FDDI_ADDR_LEN);
+ eh = (const struct ether_header *)dst->sa_data;
+ bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
/* FALLTHROUGH */
}
case AF_UNSPEC:
{
- struct ether_header *eh;
+ const struct ether_header *eh;
+
loop_copy = -1;
- eh = (struct ether_header *)dst->sa_data;
- bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, FDDI_ADDR_LEN);
+ eh = (const struct ether_header *)dst->sa_data;
+ bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
if (*edst & 1)
m->m_flags |= (M_BCAST|M_MCAST);
type = eh->ether_type;
@@ -293,8 +236,8 @@ fddi_output(ifp, m, dst, ro)
*/
if (type != 0) {
struct llc *l;
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
l = mtod(m, struct llc *);
l->llc_control = LLC_UI;
@@ -309,8 +252,8 @@ fddi_output(ifp, m, dst, ro)
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, FDDI_HDR_LEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
fh = mtod(m, struct fddi_header *);
fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
@@ -347,12 +290,12 @@ fddi_output(ifp, m, dst, ro)
error = (ifp->if_transmit)(ifp, m);
if (error)
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
bad:
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if (m)
m_freem(m);
return (error);
@@ -376,24 +319,23 @@ fddi_input(ifp, m)
*/
if ((m->m_flags & M_PKTHDR) == 0) {
if_printf(ifp, "discard frame w/o packet header\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
if (m->m_pkthdr.rcvif == NULL) {
if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
m = m_pullup(m, FDDI_HDR_LEN);
if (m == NULL) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
fh = mtod(m, struct fddi_header *);
- m->m_pkthdr.header = (void *)fh;
/*
* Discard packet if interface is not up.
@@ -422,7 +364,7 @@ fddi_input(ifp, m)
/*
* Update interface statistics.
*/
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
getmicrotime(&ifp->if_lastchange);
/*
@@ -443,7 +385,7 @@ fddi_input(ifp, m)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
#ifdef M_LINK0
@@ -461,7 +403,7 @@ fddi_input(ifp, m)
m = m_pullup(m, LLC_SNAPFRAMELEN);
if (m == 0) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
l = mtod(m, struct llc *);
@@ -472,30 +414,13 @@ fddi_input(ifp, m)
u_int16_t type;
if ((l->llc_control != LLC_UI) ||
(l->llc_ssap != LLC_SNAP_LSAP)) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
-#ifdef NETATALK
- if (bcmp(&(l->llc_snap.org_code)[0], at_org_code,
- sizeof(at_org_code)) == 0 &&
- ntohs(l->llc_snap.ether_type) == ETHERTYPE_AT) {
- isr = NETISR_ATALK2;
- m_adj(m, LLC_SNAPFRAMELEN);
- break;
- }
-
- if (bcmp(&(l->llc_snap.org_code)[0], aarp_org_code,
- sizeof(aarp_org_code)) == 0 &&
- ntohs(l->llc_snap.ether_type) == ETHERTYPE_AARP) {
- m_adj(m, LLC_SNAPFRAMELEN);
- isr = NETISR_AARP;
- break;
- }
-#endif /* NETATALK */
if (l->llc_snap.org_code[0] != 0 ||
l->llc_snap.org_code[1] != 0 ||
l->llc_snap.org_code[2] != 0) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
@@ -505,8 +430,6 @@ fddi_input(ifp, m)
switch (type) {
#ifdef INET
case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -521,27 +444,14 @@ fddi_input(ifp, m)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- break;
-#endif
#ifdef DECNET
case ETHERTYPE_DECNET:
isr = NETISR_DECNET;
break;
#endif
-#ifdef NETATALK
- case ETHERTYPE_AT:
- isr = NETISR_ATALK1;
- break;
- case ETHERTYPE_AARP:
- isr = NETISR_AARP;
- break;
-#endif /* NETATALK */
default:
/* printf("fddi_input: unknown protocol 0x%x\n", type); */
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
break;
@@ -549,7 +459,7 @@ fddi_input(ifp, m)
default:
/* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
M_SETFIB(m, ifp->if_fib);
@@ -557,7 +467,7 @@ fddi_input(ifp, m)
return;
dropanyway:
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
if (m)
m_freem(m);
return;
@@ -643,31 +553,6 @@ fddi_ioctl (ifp, command, data)
arp_ifinit(ifp, ifa);
break;
#endif
-#ifdef IPX
- /*
- * XXX - This code is probably wrong
- */
- case AF_IPX: {
- struct ipx_addr *ina;
-
- ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina)) {
- ina->x_host = *(union ipx_host *)
- IF_LLADDR(ifp);
- } else {
- bcopy((caddr_t) ina->x_host.c_host,
- (caddr_t) IF_LLADDR(ifp),
- ETHER_ADDR_LEN);
- }
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- }
- break;
-#endif
default:
ifp->if_init(ifp->if_softc);
break;
@@ -724,7 +609,7 @@ fddi_resolvemulti(ifp, llsa, sa)
e_addr = LLADDR(sdl);
if ((e_addr[0] & 1) != 1)
return (EADDRNOTAVAIL);
- *llsa = 0;
+ *llsa = NULL;
return (0);
#ifdef INET
@@ -732,14 +617,7 @@ fddi_resolvemulti(ifp, llsa, sa)
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return (EADDRNOTAVAIL);
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_FDDI;
+ sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
sdl->sdl_nlen = 0;
sdl->sdl_alen = FDDI_ADDR_LEN;
sdl->sdl_slen = 0;
@@ -758,19 +636,12 @@ fddi_resolvemulti(ifp, llsa, sa)
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return (0);
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return (EADDRNOTAVAIL);
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT | M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_FDDI;
+ sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
sdl->sdl_nlen = 0;
sdl->sdl_alen = FDDI_ADDR_LEN;
sdl->sdl_slen = 0;
diff --git a/freebsd/sys/net/if_fwsubr.c b/freebsd/sys/net/if_fwsubr.c
index b022ecae..df4c38cf 100644
--- a/freebsd/sys/net/if_fwsubr.c
+++ b/freebsd/sys/net/if_fwsubr.c
@@ -45,6 +45,7 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/if_llc.h>
@@ -77,7 +78,7 @@ struct fw_hwaddr firewire_broadcastaddr = {
};
static int
-firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
struct fw_com *fc = IFP2FWC(ifp);
@@ -91,7 +92,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
int unicast, dgl, foff;
static int next_dgl;
#if defined(INET) || defined(INET6)
- struct llentry *lle;
+ int is_gw = 0;
#endif
#ifdef MAC
@@ -106,6 +107,10 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
goto bad;
}
+#if defined(INET) || defined(INET6)
+ if (ro != NULL)
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
+#endif
/*
* For unicast, we make a tag to store the lladdr of the
* destination. This might not be the first time we have seen
@@ -129,7 +134,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
}
destfw = (struct fw_hwaddr *)(mtag + 1);
} else {
- destfw = 0;
+ destfw = NULL;
}
switch (dst->sa_family) {
@@ -141,7 +146,8 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
* doesn't fit into the arp model.
*/
if (unicast) {
- error = arpresolve(ifp, ro ? ro->ro_rt : NULL, m, dst, (u_char *) destfw, &lle);
+ error = arpresolve(ifp, is_gw, m, dst,
+ (u_char *) destfw, NULL, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
}
@@ -170,10 +176,10 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#ifdef INET6
case AF_INET6:
if (unicast) {
- error = nd6_storelladdr(fc->fc_ifp, m, dst,
- (u_char *) destfw, &lle);
+ error = nd6_resolve(fc->fc_ifp, is_gw, m, dst,
+ (u_char *) destfw, NULL, NULL);
if (error)
- return (error);
+ return (error == EWOULDBLOCK ? 0 : error);
}
type = ETHERTYPE_IPV6;
break;
@@ -231,7 +237,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
/*
* No fragmentation is necessary.
*/
- M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
+ M_PREPEND(m, sizeof(uint32_t), M_NOWAIT);
if (!m) {
error = ENOBUFS;
goto bad;
@@ -263,17 +269,17 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
* Split off the tail segment from the
* datagram, copying our tags over.
*/
- mtail = m_split(m, fsize, M_DONTWAIT);
+ mtail = m_split(m, fsize, M_NOWAIT);
m_tag_copy_chain(mtail, m, M_NOWAIT);
} else {
- mtail = 0;
+ mtail = NULL;
}
/*
* Add our encapsulation header to this
* fragment and hand it off to the link.
*/
- M_PREPEND(m, 2*sizeof(uint32_t), M_DONTWAIT);
+ M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
if (!m) {
error = ENOBUFS;
goto bad;
@@ -538,7 +544,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
if (m->m_pkthdr.rcvif == NULL) {
if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
@@ -583,7 +589,7 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
return;
}
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
/* Discard packet if interface is not up */
if ((ifp->if_flags & IFF_UP) == 0) {
@@ -592,13 +598,11 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
}
if (m->m_flags & (M_BCAST|M_MCAST))
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
switch (type) {
#ifdef INET
case ETHERTYPE_IP:
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -700,7 +704,7 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
/*
* No mapping needed.
*/
- *llsa = 0;
+ *llsa = NULL;
return 0;
#ifdef INET
@@ -708,7 +712,7 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
sin = (struct sockaddr_in *)sa;
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#endif
#ifdef INET6
@@ -721,12 +725,12 @@ firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return 0;
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return EADDRNOTAVAIL;
- *llsa = 0;
+ *llsa = NULL;
return 0;
#endif
diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c
index 27cbbdda..e07a2da0 100644
--- a/freebsd/sys/net/if_gif.c
+++ b/freebsd/sys/net/if_gif.c
@@ -1,8 +1,5 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $FreeBSD$ */
-/* $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */
-
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
@@ -30,8 +27,13 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -39,11 +41,14 @@
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/time.h>
#include <sys/sysctl.h>
@@ -55,6 +60,7 @@
#include <machine/cpu.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -65,9 +71,9 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/ip_ecn.h>
#ifdef INET
#include <netinet/in_var.h>
-#include <netinet/in_gif.h>
#include <netinet/ip_var.h>
#endif /* INET */
@@ -77,9 +83,9 @@
#endif
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
+#include <netinet6/ip6_ecn.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
-#include <netinet6/in6_gif.h>
#include <netinet6/ip6protosw.h>
#endif /* INET6 */
@@ -90,26 +96,41 @@
#include <security/mac/mac_framework.h>
-#define GIFNAME "gif"
+static const char gifname[] = "gif";
/*
- * gif_mtx protects the global gif_softc_list.
+ * gif_mtx protects a per-vnet gif_softc_list.
*/
-static struct mtx gif_mtx;
+static VNET_DEFINE(struct mtx, gif_mtx);
+#define V_gif_mtx VNET(gif_mtx)
static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
#define V_gif_softc_list VNET(gif_softc_list)
+static struct sx gif_ioctl_sx;
+SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
+
+#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \
+ NULL, MTX_DEF)
+#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx)
+#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx)
+#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx)
void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
void (*ng_gif_attach_p)(struct ifnet *ifp);
void (*ng_gif_detach_p)(struct ifnet *ifp);
-static void gif_start(struct ifnet *);
+static int gif_check_nesting(struct ifnet *, struct mbuf *);
+static int gif_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gif_delete_tunnel(struct ifnet *);
+static int gif_ioctl(struct ifnet *, u_long, caddr_t);
+static int gif_transmit(struct ifnet *, struct mbuf *);
+static void gif_qflush(struct ifnet *);
static int gif_clone_create(struct if_clone *, int, caddr_t);
static void gif_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(gif, 0);
+static VNET_DEFINE(struct if_clone *, gif_cloner);
+#define V_gif_cloner VNET(gif_cloner)
static int gifmodevent(module_t, int, void *);
@@ -129,7 +150,7 @@ static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
#endif
static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
#define V_max_gif_nesting VNET(max_gif_nesting)
-SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
+SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
/*
@@ -143,22 +164,12 @@ static VNET_DEFINE(int, parallel_tunnels) = 1;
static VNET_DEFINE(int, parallel_tunnels) = 0;
#endif
#define V_parallel_tunnels VNET(parallel_tunnels)
-SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
- &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
-
-/* copy from src/sys/net/if_ethersubr.c */
-static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
- { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-#ifndef ETHER_IS_BROADCAST
-#define ETHER_IS_BROADCAST(addr) \
- (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
-#endif
+SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
+ "Allow parallel tunnels?");
static int
-gif_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
+gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct gif_softc *sc;
@@ -169,18 +180,9 @@ gif_clone_create(ifc, unit, params)
sc->gif_fibnum = BSD_DEFAULT_FIB;
#endif /* __rtems__ */
GIF2IFP(sc) = if_alloc(IFT_GIF);
- if (GIF2IFP(sc) == NULL) {
- free(sc, M_GIF);
- return (ENOSPC);
- }
-
GIF_LOCK_INIT(sc);
-
GIF2IFP(sc)->if_softc = sc;
- if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
-
- sc->encap_cookie4 = sc->encap_cookie6 = NULL;
- sc->gif_options = GIF_ACCEPT_REVETHIP;
+ if_initname(GIF2IFP(sc), gifname, unit);
GIF2IFP(sc)->if_addrlen = 0;
GIF2IFP(sc)->if_mtu = GIF_MTU;
@@ -190,56 +192,42 @@ gif_clone_create(ifc, unit, params)
GIF2IFP(sc)->if_flags |= IFF_LINK2;
#endif
GIF2IFP(sc)->if_ioctl = gif_ioctl;
- GIF2IFP(sc)->if_start = gif_start;
+ GIF2IFP(sc)->if_transmit = gif_transmit;
+ GIF2IFP(sc)->if_qflush = gif_qflush;
GIF2IFP(sc)->if_output = gif_output;
- GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
+ GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+ GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(GIF2IFP(sc));
bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
if (ng_gif_attach_p != NULL)
(*ng_gif_attach_p)(GIF2IFP(sc));
- mtx_lock(&gif_mtx);
+ GIF_LIST_LOCK();
LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
- mtx_unlock(&gif_mtx);
-
+ GIF_LIST_UNLOCK();
return (0);
}
static void
-gif_clone_destroy(ifp)
- struct ifnet *ifp;
+gif_clone_destroy(struct ifnet *ifp)
{
-#if defined(INET) || defined(INET6)
- int err;
-#endif
- struct gif_softc *sc = ifp->if_softc;
-
- mtx_lock(&gif_mtx);
- LIST_REMOVE(sc, gif_list);
- mtx_unlock(&gif_mtx);
+ struct gif_softc *sc;
+ sx_xlock(&gif_ioctl_sx);
+ sc = ifp->if_softc;
gif_delete_tunnel(ifp);
-#ifdef INET6
- if (sc->encap_cookie6 != NULL) {
- err = encap_detach(sc->encap_cookie6);
- KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
- }
-#endif
-#ifdef INET
- if (sc->encap_cookie4 != NULL) {
- err = encap_detach(sc->encap_cookie4);
- KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
- }
-#endif
-
+ GIF_LIST_LOCK();
+ LIST_REMOVE(sc, gif_list);
+ GIF_LIST_UNLOCK();
if (ng_gif_detach_p != NULL)
(*ng_gif_detach_p)(ifp);
bpfdetach(ifp);
if_detach(ifp);
- if_free(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gif_ioctl_sx);
+ if_free(ifp);
GIF_LOCK_DESTROY(sc);
-
free(sc, M_GIF);
}
@@ -248,31 +236,35 @@ vnet_gif_init(const void *unused __unused)
{
LIST_INIT(&V_gif_softc_list);
+ GIF_LIST_LOCK_INIT();
+ V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
+ gif_clone_destroy, 0);
}
-VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_gif_init,
- NULL);
+VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gif_init, NULL);
+
+static void
+vnet_gif_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_gif_cloner);
+ GIF_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gif_uninit, NULL);
static int
-gifmodevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
+gifmodevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
- if_clone_attach(&gif_cloner);
- break;
-
case MOD_UNLOAD:
- if_clone_detach(&gif_cloner);
- mtx_destroy(&gif_mtx);
break;
default:
- return EOPNOTSUPP;
+ return (EOPNOTSUPP);
}
- return 0;
+ return (0);
}
static moduledata_t gif_mod = {
@@ -285,219 +277,257 @@ DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_gif, 1);
int
-gif_encapcheck(m, off, proto, arg)
- const struct mbuf *m;
- int off;
- int proto;
- void *arg;
+gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- struct ip ip;
+ GIF_RLOCK_TRACKER;
+ const struct ip *ip;
struct gif_softc *sc;
+ int ret;
sc = (struct gif_softc *)arg;
- if (sc == NULL)
- return 0;
+ if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
- if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
- return 0;
+ ret = 0;
+ GIF_RLOCK(sc);
/* no physical address */
- if (!sc->gif_psrc || !sc->gif_pdst)
- return 0;
+ if (sc->gif_family == 0)
+ goto done;
switch (proto) {
#ifdef INET
case IPPROTO_IPV4:
- break;
#endif
#ifdef INET6
case IPPROTO_IPV6:
- break;
#endif
case IPPROTO_ETHERIP:
break;
-
default:
- return 0;
+ goto done;
}
/* Bail on short packets */
- if (m->m_pkthdr.len < sizeof(ip))
- return 0;
+ M_ASSERTPKTHDR(m);
+ if (m->m_pkthdr.len < sizeof(struct ip))
+ goto done;
- m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
-
- switch (ip.ip_v) {
+ ip = mtod(m, const struct ip *);
+ switch (ip->ip_v) {
#ifdef INET
case 4:
- if (sc->gif_psrc->sa_family != AF_INET ||
- sc->gif_pdst->sa_family != AF_INET)
- return 0;
- return gif_encapcheck4(m, off, proto, arg);
+ if (sc->gif_family != AF_INET)
+ goto done;
+ ret = in_gif_encapcheck(m, off, proto, arg);
+ break;
#endif
#ifdef INET6
case 6:
if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
- return 0;
- if (sc->gif_psrc->sa_family != AF_INET6 ||
- sc->gif_pdst->sa_family != AF_INET6)
- return 0;
- return gif_encapcheck6(m, off, proto, arg);
+ goto done;
+ if (sc->gif_family != AF_INET6)
+ goto done;
+ ret = in6_gif_encapcheck(m, off, proto, arg);
+ break;
#endif
- default:
- return 0;
}
+done:
+ GIF_RUNLOCK(sc);
+ return (ret);
}
-static void
-gif_start(struct ifnet *ifp)
+static int
+gif_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct gif_softc *sc;
- struct mbuf *m;
-
- sc = ifp->if_softc;
-
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- for (;;) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == 0)
- break;
-
- gif_output(ifp, m, sc->gif_pdst, NULL);
-
- }
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-
- return;
-}
-
-int
-gif_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
-{
- struct gif_softc *sc = ifp->if_softc;
- struct m_tag *mtag;
- int error = 0;
- int gif_called;
- u_int32_t af;
+ struct etherip_header *eth;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+ uint32_t t;
+#endif
+ uint32_t af;
+ uint8_t proto, ecn;
+ int error;
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error) {
m_freem(m);
- goto end;
+ goto err;
}
#endif
-
- /*
- * gif may cause infinite recursion calls when misconfigured.
- * We'll prevent this by detecting loops.
- *
- * High nesting level may cause stack exhaustion.
- * We'll prevent this by introducing upper limit.
- */
- gif_called = 1;
- mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
- while (mtag != NULL) {
- if (*(struct ifnet **)(mtag + 1) == ifp) {
- log(LOG_NOTICE,
- "gif_output: loop detected on %s\n",
- (*(struct ifnet **)(mtag + 1))->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
- gif_called++;
- }
- if (gif_called > V_max_gif_nesting) {
- log(LOG_NOTICE,
- "gif_output: recursively called too many times(%d)\n",
- gif_called);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
- *(struct ifnet **)(mtag + 1) = ifp;
- m_tag_prepend(m, mtag);
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
-
- GIF_LOCK(sc);
-
- if (!(ifp->if_flags & IFF_UP) ||
- sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
- GIF_UNLOCK(sc);
+ error = ENETDOWN;
+ sc = ifp->if_softc;
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0 ||
+ sc->gif_family == 0 ||
+ (error = gif_check_nesting(ifp, m)) != 0) {
m_freem(m);
- error = ENETDOWN;
- goto end;
- }
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
+ goto err;
}
-
- af = dst->sa_family;
- BPF_MTAP2(ifp, &af, sizeof(af), m);
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
-
- /* override to IPPROTO_ETHERIP for bridged traffic */
+ /* Now pull back the af that we stashed in the csum_data. */
if (ifp->if_bridge)
af = AF_LINK;
-
+ else
+ af = m->m_pkthdr.csum_data;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
M_SETFIB(m, sc->gif_fibnum);
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
/* inner AF-specific encapsulation */
-
+ ecn = 0;
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ proto = IPPROTO_IPV4;
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ ip = mtod(m, struct ip *);
+ ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &ecn, &ip->ip_tos);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ proto = IPPROTO_IPV6;
+ if (m->m_len < sizeof(struct ip6_hdr))
+ m = m_pullup(m, sizeof(struct ip6_hdr));
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ t = 0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &t, &ip6->ip6_flow);
+ ecn = (ntohl(t) >> 20) & 0xff;
+ break;
+#endif
+ case AF_LINK:
+ proto = IPPROTO_ETHERIP;
+ M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto err;
+ }
+ eth = mtod(m, struct etherip_header *);
+ eth->eip_resvh = 0;
+ eth->eip_ver = ETHERIP_VERSION;
+ eth->eip_resvl = 0;
+ break;
+ default:
+ error = EAFNOSUPPORT;
+ m_freem(m);
+ goto err;
+ }
/* XXX should we check if our outer source is legal? */
-
/* dispatch to output logic based on outer AF */
- switch (sc->gif_psrc->sa_family) {
+ switch (sc->gif_family) {
#ifdef INET
case AF_INET:
- error = in_gif_output(ifp, af, m);
+ error = in_gif_output(ifp, m, proto, ecn);
break;
#endif
#ifdef INET6
case AF_INET6:
- error = in6_gif_output(ifp, af, m);
+ error = in6_gif_output(ifp, m, proto, ecn);
break;
#endif
default:
- m_freem(m);
- error = ENETDOWN;
+ m_freem(m);
}
-
- GIF_UNLOCK(sc);
- end:
+err:
if (error)
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
}
+static void
+gif_qflush(struct ifnet *ifp __unused)
+{
+
+}
+
+#define MTAG_GIF 1080679712
+static int
+gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ /*
+ * gif may cause infinite recursion calls when misconfigured.
+ * We'll prevent this by detecting loops.
+ *
+ * High nesting level may cause stack exhaustion.
+ * We'll prevent this by introducing upper limit.
+ */
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_gif_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ if_name(ifp), count);
+ return (EIO);
+ }
+ mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
+
+int
+gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ /*
+ * Now save the af in the inbound pkt csum data, this is a cheat since
+ * we are using the inbound csum_data field to carry the af over to
+ * the gif_transmit() routine, avoiding using yet another mtag.
+ */
+ m->m_pkthdr.csum_data = af;
+ return (ifp->if_transmit(ifp, m));
+}
+
void
-gif_input(m, af, ifp)
- struct mbuf *m;
- int af;
- struct ifnet *ifp;
+gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
{
- int isr, n;
- struct gif_softc *sc;
struct etherip_header *eip;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+ uint32_t t;
+#endif
+ struct gif_softc *sc;
struct ether_header *eh;
struct ifnet *oldifp;
+ int isr, n, af;
if (ifp == NULL) {
/* just in case */
@@ -506,20 +536,67 @@ gif_input(m, af, ifp)
}
sc = ifp->if_softc;
m->m_pkthdr.rcvif = ifp;
+ m_clrprotoflags(m);
+ switch (proto) {
+#ifdef INET
+ case IPPROTO_IPV4:
+ af = AF_INET;
+ if (m->m_len < sizeof(struct ip))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL)
+ goto drop;
+ ip = mtod(m, struct ip *);
+ if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
+ m_freem(m);
+ goto drop;
+ }
+ break;
+#endif
+#ifdef INET6
+ case IPPROTO_IPV6:
+ af = AF_INET6;
+ if (m->m_len < sizeof(struct ip6_hdr))
+ m = m_pullup(m, sizeof(struct ip6_hdr));
+ if (m == NULL)
+ goto drop;
+ t = htonl((uint32_t)ecn << 20);
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
+ ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
+ m_freem(m);
+ goto drop;
+ }
+ break;
+#endif
+ case IPPROTO_ETHERIP:
+ af = AF_LINK;
+ break;
+ default:
+ m_freem(m);
+ goto drop;
+ }
#ifdef MAC
mac_ifnet_create_mbuf(ifp, m);
#endif
if (bpf_peers_present(ifp->if_bpf)) {
- u_int32_t af1 = af;
+ uint32_t af1 = af;
bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
}
+ if ((ifp->if_flags & IFF_MONITOR) != 0) {
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ m_freem(m);
+ return;
+ }
+
if (ng_gif_input_p != NULL) {
(*ng_gif_input_p)(ifp, &m, af);
if (m == NULL)
- return;
+ goto drop;
}
/*
@@ -546,34 +623,15 @@ gif_input(m, af, ifp)
#endif
case AF_LINK:
n = sizeof(struct etherip_header) + sizeof(struct ether_header);
- if (n > m->m_len) {
+ if (n > m->m_len)
m = m_pullup(m, n);
- if (m == NULL) {
- ifp->if_ierrors++;
- return;
- }
- }
-
+ if (m == NULL)
+ goto drop;
eip = mtod(m, struct etherip_header *);
- /*
- * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
- * accepts an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if (sc->gif_options & GIF_ACCEPT_REVETHIP) {
- if (eip->eip_resvl != ETHERIP_VERSION
- && eip->eip_ver != ETHERIP_VERSION) {
- /* discard unknown versions */
- m_freem(m);
- return;
- }
- } else {
- if (eip->eip_ver != ETHERIP_VERSION) {
- /* discard unknown versions */
- m_freem(m);
- return;
- }
+ if (eip->eip_ver != ETHERIP_VERSION) {
+ /* discard unknown versions */
+ m_freem(m);
+ goto drop;
}
m_adj(m, sizeof(struct etherip_header));
@@ -588,7 +646,7 @@ gif_input(m, af, ifp)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
BRIDGE_INPUT(ifp, m);
@@ -613,59 +671,61 @@ gif_input(m, af, ifp)
return;
}
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
+ return;
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
int
-gif_ioctl(ifp, cmd, data)
- struct ifnet *ifp;
- u_long cmd;
- caddr_t data;
+gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct gif_softc *sc = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq*)data;
- int error = 0, size;
- u_int options;
+ GIF_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq*)data;
struct sockaddr *dst, *src;
-#ifdef SIOCSIFMTU /* xxx */
- u_long mtu;
+ struct gif_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ u_int options;
+ int error;
switch (cmd) {
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
- break;
-
- case SIOCSIFDSTADDR:
- break;
-
case SIOCADDMULTI:
case SIOCDELMULTI:
- break;
-
-#ifdef SIOCSIFMTU /* xxx */
case SIOCGIFMTU:
- break;
-
+ case SIOCSIFFLAGS:
+ return (0);
case SIOCSIFMTU:
- mtu = ifr->ifr_mtu;
- if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
+ if (ifr->ifr_mtu < GIF_MTU_MIN ||
+ ifr->ifr_mtu > GIF_MTU_MAX)
return (EINVAL);
- ifp->if_mtu = mtu;
- break;
-#endif /* SIOCSIFMTU */
-
-#ifdef INET
+ else
+ ifp->if_mtu = ifr->ifr_mtu;
+ return (0);
+ }
+ sx_xlock(&gif_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ error = 0;
+ switch (cmd) {
case SIOCSIFPHYADDR:
-#endif
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
-#endif /* INET6 */
- case SIOCSLIFPHYADDR:
+#endif
+ error = EINVAL;
switch (cmd) {
#ifdef INET
case SIOCSIFPHYADDR:
@@ -683,199 +743,169 @@ gif_ioctl(ifp, cmd, data)
&(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
- case SIOCSLIFPHYADDR:
- src = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->addr);
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->dstaddr);
- break;
default:
- return EINVAL;
+ goto bad;
}
-
/* sa_family must be equal */
- if (src->sa_family != dst->sa_family)
- return EINVAL;
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto bad;
/* validate sa_len */
+ /* check sa_family looks sane for the cmd */
switch (src->sa_family) {
#ifdef INET
case AF_INET:
if (src->sa_len != sizeof(struct sockaddr_in))
- return EINVAL;
+ goto bad;
+ if (cmd != SIOCSIFPHYADDR) {
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
break;
#endif
#ifdef INET6
case AF_INET6:
if (src->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
- break;
-#endif
- default:
- return EAFNOSUPPORT;
- }
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (dst->sa_len != sizeof(struct sockaddr_in))
- return EINVAL;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (dst->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
+ goto bad;
+ if (cmd != SIOCSIFPHYADDR_IN6) {
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+ error = EADDRNOTAVAIL;
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto bad;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto bad;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto bad;
break;
#endif
default:
- return EAFNOSUPPORT;
- }
-
- /* check sa_family looks sane for the cmd */
- switch (cmd) {
- case SIOCSIFPHYADDR:
- if (src->sa_family == AF_INET)
- break;
- return EAFNOSUPPORT;
-#ifdef INET6
- case SIOCSIFPHYADDR_IN6:
- if (src->sa_family == AF_INET6)
- break;
- return EAFNOSUPPORT;
-#endif /* INET6 */
- case SIOCSLIFPHYADDR:
- /* checks done in the above */
- break;
+ error = EAFNOSUPPORT;
+ goto bad;
}
-
- error = gif_set_tunnel(GIF2IFP(sc), src, dst);
+ error = gif_set_tunnel(ifp, src, dst);
break;
-
-#ifdef SIOCDIFPHYADDR
case SIOCDIFPHYADDR:
- gif_delete_tunnel(GIF2IFP(sc));
+ gif_delete_tunnel(ifp);
break;
-#endif
-
case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
-#endif /* INET6 */
- if (sc->gif_psrc == NULL) {
+ case SIOCGIFPDSTADDR_IN6:
+#endif
+ if (sc->gif_family == 0) {
error = EADDRNOTAVAIL;
- goto bad;
+ break;
}
- src = sc->gif_psrc;
+ GIF_RLOCK(sc);
switch (cmd) {
#ifdef INET
case SIOCGIFPSRCADDR:
- dst = &ifr->ifr_addr;
- size = sizeof(ifr->ifr_addr);
+ case SIOCGIFPDSTADDR:
+ if (sc->gif_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
-#endif /* INET */
+#endif
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
- dst = (struct sockaddr *)
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gif_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
&(((struct in6_ifreq *)data)->ifr_addr);
- size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
-#endif /* INET6 */
+#endif
default:
- error = EADDRNOTAVAIL;
- goto bad;
- }
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
- if (dst->sa_family == AF_INET6) {
- error = sa6_recoverscope((struct sockaddr_in6 *)dst);
- if (error != 0)
- return (error);
+ error = EAFNOSUPPORT;
}
+ if (error == 0) {
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gif_iphdr->ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gif_iphdr->ip_dst;
+ break;
#endif
- break;
-
- case SIOCGIFPDSTADDR:
#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
-#endif /* INET6 */
- if (sc->gif_pdst == NULL) {
- error = EADDRNOTAVAIL;
- goto bad;
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
+ break;
+#endif
+ }
}
- src = sc->gif_pdst;
+ GIF_RUNLOCK(sc);
+ if (error != 0)
+ break;
switch (cmd) {
#ifdef INET
+ case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
- dst = &ifr->ifr_addr;
- size = sizeof(ifr->ifr_addr);
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
break;
-#endif /* INET */
+#endif
#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
- dst = (struct sockaddr *)
- &(((struct in6_ifreq *)data)->ifr_addr);
- size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
- break;
-#endif /* INET6 */
- default:
- error = EADDRNOTAVAIL;
- goto bad;
- }
- if (src->sa_len > size)
- return EINVAL;
- error = prison_if(curthread->td_ucred, src);
- if (error != 0)
- return (error);
- error = prison_if(curthread->td_ucred, dst);
- if (error != 0)
- return (error);
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-#ifdef INET6
- if (dst->sa_family == AF_INET6) {
- error = sa6_recoverscope((struct sockaddr_in6 *)dst);
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
if (error != 0)
- return (error);
- }
+ memset(sin6, 0, sizeof(*sin6));
#endif
- break;
-
- case SIOCGLIFPHYADDR:
- if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
- error = EADDRNOTAVAIL;
- goto bad;
}
-
- /* copy src */
- src = sc->gif_psrc;
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->addr);
- size = sizeof(((struct if_laddrreq *)data)->addr);
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
-
- /* copy dst */
- src = sc->gif_pdst;
- dst = (struct sockaddr *)
- &(((struct if_laddrreq *)data)->dstaddr);
- size = sizeof(((struct if_laddrreq *)data)->dstaddr);
- if (src->sa_len > size)
- return EINVAL;
- bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
break;
-
- case SIOCSIFFLAGS:
- /* if_ioctl() takes care of it */
+ case SIOCGTUNFIB:
+ ifr->ifr_fib = sc->gif_fibnum;
+ break;
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
+ break;
+ if (ifr->ifr_fib >= rt_numfibs)
+ error = EINVAL;
+ else
+ sc->gif_fibnum = ifr->ifr_fib;
break;
-
case GIFGOPTS:
options = sc->gif_options;
- error = copyout(&options, ifr->ifr_data,
- sizeof(options));
+ error = copyout(&options, ifr->ifr_data, sizeof(options));
break;
-
case GIFSOPTS:
if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
break;
@@ -887,151 +917,154 @@ gif_ioctl(ifp, cmd, data)
else
sc->gif_options = options;
break;
-
default:
error = EINVAL;
break;
}
- bad:
- return error;
+bad:
+ sx_xunlock(&gif_ioctl_sx);
+ return (error);
}
-/*
- * XXXRW: There's a general event-ordering issue here: the code to check
- * if a given tunnel is already present happens before we perform a
- * potentially blocking setup of the tunnel. This code needs to be
- * re-ordered so that the check and replacement can be atomic using
- * a mutex.
- */
-int
-gif_set_tunnel(ifp, src, dst)
- struct ifnet *ifp;
- struct sockaddr *src;
- struct sockaddr *dst;
+static void
+gif_detach(struct gif_softc *sc)
{
- struct gif_softc *sc = ifp->if_softc;
- struct gif_softc *sc2;
- struct sockaddr *osrc, *odst, *sa;
- int error = 0;
-
- mtx_lock(&gif_mtx);
- LIST_FOREACH(sc2, &V_gif_softc_list, gif_list) {
- if (sc2 == sc)
- continue;
- if (!sc2->gif_pdst || !sc2->gif_psrc)
- continue;
- if (sc2->gif_pdst->sa_family != dst->sa_family ||
- sc2->gif_pdst->sa_len != dst->sa_len ||
- sc2->gif_psrc->sa_family != src->sa_family ||
- sc2->gif_psrc->sa_len != src->sa_len)
- continue;
-
- /*
- * Disallow parallel tunnels unless instructed
- * otherwise.
- */
- if (!V_parallel_tunnels &&
- bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
- bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
- error = EADDRNOTAVAIL;
- mtx_unlock(&gif_mtx);
- goto bad;
- }
- /* XXX both end must be valid? (I mean, not 0.0.0.0) */
- }
- mtx_unlock(&gif_mtx);
+ sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+ if (sc->gif_ecookie != NULL)
+ encap_detach(sc->gif_ecookie);
+ sc->gif_ecookie = NULL;
+}
+
+static int
+gif_attach(struct gif_softc *sc, int af)
+{
- /* XXX we can detach from both, but be polite just in case */
- if (sc->gif_psrc)
- switch (sc->gif_psrc->sa_family) {
+ sx_assert(&gif_ioctl_sx, SA_XLOCKED);
+ switch (af) {
#ifdef INET
- case AF_INET:
- (void)in_gif_detach(sc);
- break;
+ case AF_INET:
+ return (in_gif_attach(sc));
#endif
#ifdef INET6
- case AF_INET6:
- (void)in6_gif_detach(sc);
- break;
+ case AF_INET6:
+ return (in6_gif_attach(sc));
#endif
- }
-
- osrc = sc->gif_psrc;
- sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
- bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
- sc->gif_psrc = sa;
+ }
+ return (EAFNOSUPPORT);
+}
- odst = sc->gif_pdst;
- sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
- bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
- sc->gif_pdst = sa;
+static int
+gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
+{
+ struct gif_softc *sc = ifp->if_softc;
+ struct gif_softc *tsc;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ void *hdr;
+ int error = 0;
- switch (sc->gif_psrc->sa_family) {
+ if (sc == NULL)
+ return (ENXIO);
+ /* Disallow parallel tunnels unless instructed otherwise. */
+ if (V_parallel_tunnels == 0) {
+ GIF_LIST_LOCK();
+ LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
+ if (tsc == sc || tsc->gif_family != src->sa_family)
+ continue;
+#ifdef INET
+ if (tsc->gif_family == AF_INET &&
+ tsc->gif_iphdr->ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gif_iphdr->ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ error = EADDRNOTAVAIL;
+ GIF_LIST_UNLOCK();
+ goto bad;
+ }
+#endif
+#ifdef INET6
+ if (tsc->gif_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ error = EADDRNOTAVAIL;
+ GIF_LIST_UNLOCK();
+ goto bad;
+ }
+#endif
+ }
+ GIF_LIST_UNLOCK();
+ }
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
- error = in_gif_attach(sc);
+ hdr = ip = malloc(sizeof(struct ip), M_GIF,
+ M_WAITOK | M_ZERO);
+ ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
+ ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
break;
#endif
#ifdef INET6
case AF_INET6:
- /*
- * Check validity of the scope zone ID of the addresses, and
- * convert it into the kernel internal form if necessary.
- */
- error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
- if (error != 0)
- break;
- error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
- if (error != 0)
- break;
- error = in6_gif_attach(sc);
+ hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
+ M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ ip6->ip6_vfc = IPV6_VERSION;
break;
#endif
+ default:
+ return (EAFNOSUPPORT);
}
- if (error) {
- /* rollback */
- free((caddr_t)sc->gif_psrc, M_IFADDR);
- free((caddr_t)sc->gif_pdst, M_IFADDR);
- sc->gif_psrc = osrc;
- sc->gif_pdst = odst;
- goto bad;
- }
-
- if (osrc)
- free((caddr_t)osrc, M_IFADDR);
- if (odst)
- free((caddr_t)odst, M_IFADDR);
- bad:
- if (sc->gif_psrc && sc->gif_pdst)
+ if (sc->gif_family != src->sa_family)
+ gif_detach(sc);
+ if (sc->gif_family == 0 ||
+ sc->gif_family != src->sa_family)
+ error = gif_attach(sc, src->sa_family);
+
+ GIF_WLOCK(sc);
+ if (sc->gif_family != 0)
+ free(sc->gif_hdr, M_GIF);
+ sc->gif_family = src->sa_family;
+ sc->gif_hdr = hdr;
+ GIF_WUNLOCK(sc);
+#if defined(INET) || defined(INET6)
+bad:
+#endif
+ if (error == 0 && sc->gif_family != 0) {
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
+ if_link_state_change(ifp, LINK_STATE_UP);
+ } else {
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-
- return error;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ }
+ return (error);
}
-void
-gif_delete_tunnel(ifp)
- struct ifnet *ifp;
+static void
+gif_delete_tunnel(struct ifnet *ifp)
{
struct gif_softc *sc = ifp->if_softc;
+ int family;
- if (sc->gif_psrc) {
- free((caddr_t)sc->gif_psrc, M_IFADDR);
- sc->gif_psrc = NULL;
- }
- if (sc->gif_pdst) {
- free((caddr_t)sc->gif_pdst, M_IFADDR);
- sc->gif_pdst = NULL;
+ if (sc == NULL)
+ return;
+
+ GIF_WLOCK(sc);
+ family = sc->gif_family;
+ sc->gif_family = 0;
+ GIF_WUNLOCK(sc);
+ if (family != 0) {
+ gif_detach(sc);
+ free(sc->gif_hdr, M_GIF);
}
- /* it is safe to detach from both */
-#ifdef INET
- (void)in_gif_detach(sc);
-#endif
-#ifdef INET6
- (void)in6_gif_detach(sc);
-#endif
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
}
diff --git a/freebsd/sys/net/if_gif.h b/freebsd/sys/net/if_gif.h
index a2f214c5..28da85bd 100644
--- a/freebsd/sys/net/if_gif.h
+++ b/freebsd/sys/net/if_gif.h
@@ -30,21 +30,17 @@
* SUCH DAMAGE.
*/
-/*
- * if_gif.h
- */
-
#ifndef _NET_IF_GIF_H_
#define _NET_IF_GIF_H_
-
#ifdef _KERNEL
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <netinet/in.h>
-/* xxx sigh, why route have struct route instead of pointer? */
+struct ip;
+struct ip6_hdr;
struct encaptab;
extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
@@ -56,44 +52,44 @@ extern void (*ng_gif_attach_p)(struct ifnet *ifp);
extern void (*ng_gif_detach_p)(struct ifnet *ifp);
struct gif_softc {
- struct ifnet *gif_ifp;
- struct mtx gif_mtx;
- struct sockaddr *gif_psrc; /* Physical src addr */
- struct sockaddr *gif_pdst; /* Physical dst addr */
+ struct ifnet *gif_ifp;
+ struct rmlock gif_lock;
+ const struct encaptab *gif_ecookie;
+ int gif_family;
+ int gif_flags;
+ u_int gif_fibnum;
+ u_int gif_options;
+ void *gif_netgraph; /* netgraph node info */
union {
- struct route gifscr_ro; /* xxx */
+ void *hdr;
+ struct ip *iphdr;
#ifdef INET6
- struct route_in6 gifscr_ro6; /* xxx */
+ struct ip6_hdr *ip6hdr;
#endif
- } gifsc_gifscr;
- int gif_flags;
- u_int gif_fibnum;
- const struct encaptab *encap_cookie4;
- const struct encaptab *encap_cookie6;
- void *gif_netgraph; /* ng_gif(4) netgraph node info */
- u_int gif_options;
- LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
+ } gif_uhdr;
+ LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
};
#define GIF2IFP(sc) ((sc)->gif_ifp)
-#define GIF_LOCK_INIT(sc) mtx_init(&(sc)->gif_mtx, "gif softc", \
- NULL, MTX_DEF)
-#define GIF_LOCK_DESTROY(sc) mtx_destroy(&(sc)->gif_mtx)
-#define GIF_LOCK(sc) mtx_lock(&(sc)->gif_mtx)
-#define GIF_UNLOCK(sc) mtx_unlock(&(sc)->gif_mtx)
-#define GIF_LOCK_ASSERT(sc) mtx_assert(&(sc)->gif_mtx, MA_OWNED)
-
-#define gif_ro gifsc_gifscr.gifscr_ro
+#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc")
+#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock)
+#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker
+#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker)
+#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker)
+#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED)
+#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock)
+#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock)
+#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED)
+
+#define gif_iphdr gif_uhdr.iphdr
+#define gif_hdr gif_uhdr.hdr
#ifdef INET6
-#define gif_ro6 gifsc_gifscr.gifscr_ro6
+#define gif_ip6hdr gif_uhdr.ip6hdr
#endif
#define GIF_MTU (1280) /* Default MTU */
#define GIF_MTU_MIN (1280) /* Minimum MTU */
#define GIF_MTU_MAX (8192) /* Maximum MTU */
-#define MTAG_GIF 1080679712
-#define MTAG_GIF_CALLED 0
-
struct etherip_header {
#if BYTE_ORDER == LITTLE_ENDIAN
u_int eip_resvl:4, /* reserved */
@@ -111,20 +107,26 @@ struct etherip_header {
#define ETHERIP_ALIGN 2
/* Prototypes */
-void gif_input(struct mbuf *, int, struct ifnet *);
-int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
+int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
-int gif_ioctl(struct ifnet *, u_long, caddr_t);
-int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *);
-void gif_delete_tunnel(struct ifnet *);
int gif_encapcheck(const struct mbuf *, int, int, void *);
+#ifdef INET
+int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in_gif_attach(struct gif_softc *);
+#endif
+#ifdef INET6
+int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
+int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
+int in6_gif_attach(struct gif_softc *);
+#endif
#endif /* _KERNEL */
#define GIFGOPTS _IOWR('i', 150, struct ifreq)
#define GIFSOPTS _IOW('i', 151, struct ifreq)
-#define GIF_ACCEPT_REVETHIP 0x0001
-#define GIF_SEND_REVETHIP 0x0010
-#define GIF_OPTMASK (GIF_ACCEPT_REVETHIP|GIF_SEND_REVETHIP)
+#define GIF_IGNORE_SOURCE 0x0002
+#define GIF_OPTMASK (GIF_IGNORE_SOURCE)
#endif /* _NET_IF_GIF_H_ */
diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c
index b7e0bd15..68b515ea 100644
--- a/freebsd/sys/net/if_gre.c
+++ b/freebsd/sys/net/if_gre.c
@@ -1,10 +1,8 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -32,24 +30,20 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
*/
-/*
- * Encapsulate L3 protocols into IP
- * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
- * If_gre is compatible with Cisco GRE tunnels, so you can
- * have a NetBSD box as the other end of a tunnel interface of a Cisco
- * router. See gre(4) for more details.
- * Also supported: IP in IP encaps (proto 55) as of RFC 2004
- */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -57,97 +51,76 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
+#include <sys/sx.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
-#include <net/route.h>
+#include <net/netisr.h>
#include <net/vnet.h>
+#include <net/route.h>
-#ifdef INET
#include <netinet/in.h>
+#ifdef INET
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
-#include <netinet/ip_gre.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_encap.h>
-#else
-#error "Huh? if_gre without inet?"
#endif
-#include <net/bpf.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#endif
+#include <netinet/ip_encap.h>
+#include <net/bpf.h>
#include <net/if_gre.h>
-/*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
- */
-#define GREMTU 1476
-
-#define GRENAME "gre"
-
-#define MTAG_COOKIE_GRE 1307983903
-#define MTAG_GRE_NESTING 1
-struct mtag_gre_nesting {
- uint16_t count;
- uint16_t max;
- struct ifnet *ifp[];
-};
-
-/*
- * gre_mtx protects all global variables in if_gre.c.
- * XXX: gre_softc data not protected yet.
- */
-struct mtx gre_mtx;
-static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
-
-struct gre_softc_head gre_softc_list;
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define GREMTU 1500
+static const char grename[] = "gre";
+static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
+static VNET_DEFINE(struct mtx, gre_mtx);
+#define V_gre_mtx VNET(gre_mtx)
+#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
+ MTX_DEF)
+#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx)
+#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx)
+#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx)
+
+static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
+#define V_gre_softc_list VNET(gre_softc_list)
+static struct sx gre_ioctl_sx;
+SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
static int gre_clone_create(struct if_clone *, int, caddr_t);
static void gre_clone_destroy(struct ifnet *);
-static int gre_ioctl(struct ifnet *, u_long, caddr_t);
-static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *ro);
-
-IFC_SIMPLE_DECLARE(gre, 0);
+static VNET_DEFINE(struct if_clone *, gre_cloner);
+#define V_gre_cloner VNET(gre_cloner)
-static int gre_compute_route(struct gre_softc *sc);
-
-static void greattach(void);
+static void gre_qflush(struct ifnet *);
+static int gre_transmit(struct ifnet *, struct mbuf *);
+static int gre_ioctl(struct ifnet *, u_long, caddr_t);
+static int gre_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
-#ifdef INET
-extern struct domain inetdomain;
-static const struct protosw in_gre_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_GRE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_input,
- .pr_output = (pr_output_t *)rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-static const struct protosw in_mobile_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_MOBILE,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = gre_mobile_input,
- .pr_output = (pr_output_t *)rip_output,
- .pr_ctlinput = rip_ctlinput,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-#endif
+static void gre_updatehdr(struct gre_softc *);
+static int gre_set_tunnel(struct ifnet *, struct sockaddr *,
+ struct sockaddr *);
+static void gre_delete_tunnel(struct ifnet *);
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
@@ -163,805 +136,851 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
*/
#define MAX_GRE_NEST 1
#endif
-static int max_gre_nesting = MAX_GRE_NEST;
-SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
- &max_gre_nesting, 0, "Max nested tunnels");
-/* ARGSUSED */
+static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
+#define V_max_gre_nesting VNET(max_gre_nesting)
+SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
+
+static void
+vnet_gre_init(const void *unused __unused)
+{
+ LIST_INIT(&V_gre_softc_list);
+ GRE_LIST_LOCK_INIT();
+ V_gre_cloner = if_clone_simple(grename, gre_clone_create,
+ gre_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gre_init, NULL);
+
static void
-greattach(void)
+vnet_gre_uninit(const void *unused __unused)
{
- mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
- LIST_INIT(&gre_softc_list);
- if_clone_attach(&gre_cloner);
+ if_clone_detach(V_gre_cloner);
+ GRE_LIST_LOCK_DESTROY();
}
+VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_gre_uninit, NULL);
static int
-gre_clone_create(ifc, unit, params)
- struct if_clone *ifc;
- int unit;
- caddr_t params;
+gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct gre_softc *sc;
sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
-
+#ifndef __rtems__
+ sc->gre_fibnum = curthread->td_proc->p_fibnum;
+#else /* __rtems__ */
+ sc->gre_fibnum = BSD_DEFAULT_FIB;
+#endif /* __rtems__ */
GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
- if (GRE2IFP(sc) == NULL) {
- free(sc, M_GRE);
- return (ENOSPC);
- }
-
+ GRE_LOCK_INIT(sc);
GRE2IFP(sc)->if_softc = sc;
- if_initname(GRE2IFP(sc), ifc->ifc_name, unit);
+ if_initname(GRE2IFP(sc), grename, unit);
- GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen;
- GRE2IFP(sc)->if_addrlen = 0;
- GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
- GRE2IFP(sc)->if_mtu = GREMTU;
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
GRE2IFP(sc)->if_output = gre_output;
GRE2IFP(sc)->if_ioctl = gre_ioctl;
- sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
- sc->g_proto = IPPROTO_GRE;
- GRE2IFP(sc)->if_flags |= IFF_LINK0;
- sc->encap = NULL;
-#ifndef __rtems__
- sc->gre_fibnum = curthread->td_proc->p_fibnum;
-#else /* __rtems__ */
- sc->gre_fibnum = BSD_DEFAULT_FIB;
-#endif /* __rtems__ */
- sc->wccp_ver = WCCP_V1;
- sc->key = 0;
+ GRE2IFP(sc)->if_transmit = gre_transmit;
+ GRE2IFP(sc)->if_qflush = gre_qflush;
+ GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
+ GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(GRE2IFP(sc));
bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- mtx_lock(&gre_mtx);
- LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
- mtx_unlock(&gre_mtx);
+ GRE_LIST_LOCK();
+ LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
+ GRE_LIST_UNLOCK();
return (0);
}
static void
-gre_clone_destroy(ifp)
- struct ifnet *ifp;
+gre_clone_destroy(struct ifnet *ifp)
{
- struct gre_softc *sc = ifp->if_softc;
-
- mtx_lock(&gre_mtx);
- LIST_REMOVE(sc, sc_list);
- mtx_unlock(&gre_mtx);
+ struct gre_softc *sc;
-#ifdef INET
- if (sc->encap != NULL)
- encap_detach(sc->encap);
-#endif
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ gre_delete_tunnel(ifp);
+ GRE_LIST_LOCK();
+ LIST_REMOVE(sc, gre_list);
+ GRE_LIST_UNLOCK();
bpfdetach(ifp);
if_detach(ifp);
+ ifp->if_softc = NULL;
+ sx_xunlock(&gre_ioctl_sx);
+
if_free(ifp);
+ GRE_LOCK_DESTROY(sc);
free(sc, M_GRE);
}
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004
- */
static int
-gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- int error = 0;
- struct gre_softc *sc = ifp->if_softc;
- struct greip *gh;
- struct ip *ip;
- struct m_tag *mtag;
- struct mtag_gre_nesting *gt;
- size_t len;
- u_short gre_ip_id = 0;
- uint8_t gre_ip_tos = 0;
- u_int16_t etype = 0;
- struct mobile_h mob_h;
- u_int32_t af;
- int extra = 0, max;
-
- /*
- * gre may cause infinite recursion calls when misconfigured. High
- * nesting level may cause stack exhaustion. We'll prevent this by
- * detecting loops and by introducing upper limit.
- */
- mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL);
- if (mtag != NULL) {
- struct ifnet **ifp2;
-
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- gt->count++;
- if (gt->count > min(gt->max,max_gre_nesting)) {
- printf("%s: hit maximum recursion limit %u on %s\n",
- __func__, gt->count - 1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
-
- ifp2 = gt->ifp;
- for (max = gt->count - 1; max > 0; max--) {
- if (*ifp2 == ifp)
- break;
- ifp2++;
- }
- if (*ifp2 == ifp) {
- printf("%s: detected loop with nexting %u on %s\n",
- __func__, gt->count-1, ifp->if_xname);
- m_freem(m);
- error = EIO; /* is there better errno? */
- goto end;
- }
- *ifp2 = ifp;
+ GRE_RLOCK_TRACKER;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr *src, *dst;
+ struct gre_softc *sc;
+#ifdef INET
+ struct sockaddr_in *sin = NULL;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6 = NULL;
+#endif
+ uint32_t opt;
+ int error;
- } else {
- /*
- * Given that people should NOT increase max_gre_nesting beyond
- * their real needs, we allocate once per packet rather than
- * allocating an mtag once per passing through gre.
- *
- * Note: the sysctl does not actually check for saneness, so we
- * limit the maximum numbers of possible recursions here.
- */
- max = imin(max_gre_nesting, 256);
- /* If someone sets the sysctl <= 0, we want at least 1. */
- max = imax(max, 1);
- len = sizeof(struct mtag_gre_nesting) +
- max * sizeof(struct ifnet *);
- mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len,
- M_NOWAIT);
- if (mtag == NULL) {
- m_freem(m);
- error = ENOMEM;
- goto end;
- }
- gt = (struct mtag_gre_nesting *)(mtag + 1);
- bzero(gt, len);
- gt->count = 1;
- gt->max = max;
- *gt->ifp = ifp;
- m_tag_prepend(m, mtag);
+ switch (cmd) {
+ case SIOCSIFMTU:
+ /* XXX: */
+ if (ifr->ifr_mtu < 576)
+ return (EINVAL);
+ break;
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ return (0);
+ case GRESADDRS:
+ case GRESADDRD:
+ case GREGADDRS:
+ case GREGADDRD:
+ case GRESPROTO:
+ case GREGPROTO:
+ return (EOPNOTSUPP);
}
-
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
- sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
- m_freem(m);
- error = ENETDOWN;
+ src = dst = NULL;
+ sx_xlock(&gre_ioctl_sx);
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENXIO;
goto end;
}
-
- gh = NULL;
- ip = NULL;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- if (bpf_peers_present(ifp->if_bpf)) {
- af = dst->sa_family;
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
-
- if ((ifp->if_flags & IFF_MONITOR) != 0) {
- m_freem(m);
- error = ENETDOWN;
+ error = 0;
+ switch (cmd) {
+ case SIOCSIFMTU:
+ GRE_WLOCK(sc);
+ sc->gre_mtu = ifr->ifr_mtu;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
goto end;
- }
-
- m->m_flags &= ~(M_BCAST|M_MCAST);
-
- if (sc->g_proto == IPPROTO_MOBILE) {
- if (dst->sa_family == AF_INET) {
- struct mbuf *m0;
- int msiz;
-
- ip = mtod(m, struct ip *);
-
- /*
- * RFC2004 specifies that fragmented diagrams shouldn't
- * be encapsulated.
- */
- if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL; /* is there better errno? */
- goto end;
- }
- memset(&mob_h, 0, MOB_H_SIZ_L);
- mob_h.proto = (ip->ip_p) << 8;
- mob_h.odst = ip->ip_dst.s_addr;
- ip->ip_dst.s_addr = sc->g_dst.s_addr;
-
- /*
- * If the packet comes from our host, we only change
- * the destination address in the IP header.
- * Else we also need to save and change the source
- */
- if (in_hosteq(ip->ip_src, sc->g_src)) {
- msiz = MOB_H_SIZ_S;
- } else {
- mob_h.proto |= MOB_H_SBIT;
- mob_h.osrc = ip->ip_src.s_addr;
- ip->ip_src.s_addr = sc->g_src.s_addr;
- msiz = MOB_H_SIZ_L;
- }
- mob_h.proto = htons(mob_h.proto);
- mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
-
- if ((m->m_data - msiz) < m->m_pktdat) {
- /* need new mbuf */
- MGETHDR(m0, M_DONTWAIT, MT_DATA);
- if (m0 == NULL) {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = ENOBUFS;
- goto end;
- }
- m0->m_next = m;
- m->m_data += sizeof(struct ip);
- m->m_len -= sizeof(struct ip);
- m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
- m0->m_len = msiz + sizeof(struct ip);
- m0->m_data += max_linkhdr;
- memcpy(mtod(m0, caddr_t), (caddr_t)ip,
- sizeof(struct ip));
- m = m0;
- } else { /* we have some space left in the old one */
- m->m_data -= msiz;
- m->m_len += msiz;
- m->m_pkthdr.len += msiz;
- bcopy(ip, mtod(m, caddr_t),
- sizeof(struct ip));
- }
- ip = mtod(m, struct ip *);
- memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
- ip->ip_len = ntohs(ip->ip_len) + msiz;
- } else { /* AF_INET */
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
- } else if (sc->g_proto == IPPROTO_GRE) {
- switch (dst->sa_family) {
- case AF_INET:
- ip = mtod(m, struct ip *);
- gre_ip_tos = ip->ip_tos;
- gre_ip_id = ip->ip_id;
- if (sc->wccp_ver == WCCP_V2) {
- extra = sizeof(uint32_t);
- etype = WCCP_PROTOCOL_TYPE;
- } else {
- etype = ETHERTYPE_IP;
- }
- break;
+ case SIOCSIFPHYADDR:
#ifdef INET6
- case AF_INET6:
- gre_ip_id = ip_newid();
- etype = ETHERTYPE_IPV6;
+ case SIOCSIFPHYADDR_IN6:
+#endif
+ error = EINVAL;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
break;
#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- etype = ETHERTYPE_ATALK;
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ src = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
default:
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
error = EAFNOSUPPORT;
goto end;
}
-
- /* Reserve space for GRE header + optional GRE key */
- int hdrlen = sizeof(struct greip) + extra;
- if (sc->key)
- hdrlen += sizeof(uint32_t);
- M_PREPEND(m, hdrlen, M_DONTWAIT);
- } else {
- _IF_DROP(&ifp->if_snd);
- m_freem(m);
- error = EINVAL;
- goto end;
- }
-
- if (m == NULL) { /* mbuf allocation failed */
- _IF_DROP(&ifp->if_snd);
- error = ENOBUFS;
- goto end;
- }
-
- M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
-
- gh = mtod(m, struct greip *);
- if (sc->g_proto == IPPROTO_GRE) {
- uint32_t *options = gh->gi_options;
-
- memset((void *)gh, 0, sizeof(struct greip) + extra);
- gh->gi_ptype = htons(etype);
- gh->gi_flags = 0;
-
- /* Add key option */
- if (sc->key)
- {
- gh->gi_flags |= htons(GRE_KP);
- *(options++) = htonl(sc->key);
- }
- }
-
- gh->gi_pr = sc->g_proto;
- if (sc->g_proto != IPPROTO_MOBILE) {
- gh->gi_src = sc->g_src;
- gh->gi_dst = sc->g_dst;
- ((struct ip*)gh)->ip_v = IPPROTO_IPV4;
- ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
- ((struct ip*)gh)->ip_ttl = GRE_TTL;
- ((struct ip*)gh)->ip_tos = gre_ip_tos;
- ((struct ip*)gh)->ip_id = gre_ip_id;
- gh->gi_len = m->m_pkthdr.len;
- }
-
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
- /*
- * Send it off and with IP_FORWARD flag to prevent it from
- * overwriting the ip_id again. ip_id is already set to the
- * ip_id of the encapsulated packet.
- */
- error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
- (struct ip_moptions *)NULL, (struct inpcb *)NULL);
- end:
- if (error)
- ifp->if_oerrors++;
- return (error);
-}
-
-static int
-gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct if_laddrreq *lifr = (struct if_laddrreq *)data;
- struct in_aliasreq *aifr = (struct in_aliasreq *)data;
- struct gre_softc *sc = ifp->if_softc;
- int s;
- struct sockaddr_in si;
- struct sockaddr *sa = NULL;
- int error, adj;
- struct sockaddr_in sp, sm, dp, dm;
- uint32_t key;
-
- error = 0;
- adj = 0;
+ /* sa_family must be equal */
+ if (src->sa_family != dst->sa_family ||
+ src->sa_len != dst->sa_len)
+ goto end;
- s = splnet();
- switch (cmd) {
- case SIOCSIFADDR:
- ifp->if_flags |= IFF_UP;
- break;
- case SIOCSIFDSTADDR:
- break;
- case SIOCSIFFLAGS:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
- break;
- if ((ifr->ifr_flags & IFF_LINK0) != 0)
- sc->g_proto = IPPROTO_GRE;
- else
- sc->g_proto = IPPROTO_MOBILE;
- if ((ifr->ifr_flags & IFF_LINK2) != 0)
- sc->wccp_ver = WCCP_V2;
- else
- sc->wccp_ver = WCCP_V1;
- goto recompute;
- case SIOCSIFMTU:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
- break;
- if (ifr->ifr_mtu < 576) {
- error = EINVAL;
- break;
- }
- ifp->if_mtu = ifr->ifr_mtu;
- break;
- case SIOCGIFMTU:
- ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
- break;
- case SIOCADDMULTI:
- /*
- * XXXRW: Isn't this priv_checkr() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
- }
- switch (ifr->ifr_addr.sa_family) {
+ /* validate sa_len */
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (src->sa_len != sizeof(struct sockaddr_in))
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
+ if (src->sa_len != sizeof(struct sockaddr_in6))
+ goto end;
break;
#endif
default:
error = EAFNOSUPPORT;
- break;
+ goto end;
}
- break;
- case SIOCDELMULTI:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
- break;
- if (ifr == 0) {
- error = EAFNOSUPPORT;
- break;
+ /* check sa_family looks sane for the cmd */
+ error = EAFNOSUPPORT;
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ if (src->sa_family == AF_INET)
+ break;
+ goto end;
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ if (src->sa_family == AF_INET6)
+ break;
+ goto end;
+#endif
}
- switch (ifr->ifr_addr.sa_family) {
+ error = EADDRNOTAVAIL;
+ switch (src->sa_family) {
#ifdef INET
case AF_INET:
+ if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
+ satosin(dst)->sin_addr.s_addr == INADDR_ANY)
+ goto end;
break;
#endif
#ifdef INET6
case AF_INET6:
- break;
+ if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
+ ||
+ IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
+ goto end;
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ error = sa6_embedscope(satosin6(src), 0);
+ if (error != 0)
+ goto end;
+ error = sa6_embedscope(satosin6(dst), 0);
+ if (error != 0)
+ goto end;
#endif
- default:
- error = EAFNOSUPPORT;
- break;
}
+ error = gre_set_tunnel(ifp, src, dst);
break;
- case GRESPROTO:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
- break;
- sc->g_proto = ifr->ifr_flags;
- switch (sc->g_proto) {
- case IPPROTO_GRE:
- ifp->if_flags |= IFF_LINK0;
- break;
- case IPPROTO_MOBILE:
- ifp->if_flags &= ~IFF_LINK0;
- break;
- default:
- error = EPROTONOSUPPORT;
- break;
- }
- goto recompute;
- case GREGPROTO:
- ifr->ifr_flags = sc->g_proto;
+ case SIOCDIFPHYADDR:
+ gre_delete_tunnel(ifp);
break;
- case GRESADDRS:
- case GRESADDRD:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- return (error);
- /*
- * set tunnel endpoints, compute a less specific route
- * to the remote end and mark if as up
- */
- sa = &ifr->ifr_addr;
- if (cmd == GRESADDRS)
- sc->g_src = (satosin(sa))->sin_addr;
- if (cmd == GRESADDRD)
- sc->g_dst = (satosin(sa))->sin_addr;
- recompute:
-#ifdef INET
- if (sc->encap != NULL) {
- encap_detach(sc->encap);
- sc->encap = NULL;
- }
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
#endif
- if ((sc->g_src.s_addr != INADDR_ANY) &&
- (sc->g_dst.s_addr != INADDR_ANY)) {
- bzero(&sp, sizeof(sp));
- bzero(&sm, sizeof(sm));
- bzero(&dp, sizeof(dp));
- bzero(&dm, sizeof(dm));
- sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
- sizeof(struct sockaddr_in);
- sp.sin_family = sm.sin_family = dp.sin_family =
- dm.sin_family = AF_INET;
- sp.sin_addr = sc->g_src;
- dp.sin_addr = sc->g_dst;
- sm.sin_addr.s_addr = dm.sin_addr.s_addr =
- INADDR_BROADCAST;
-#ifdef INET
- sc->encap = encap_attach(AF_INET, sc->g_proto,
- sintosa(&sp), sintosa(&sm), sintosa(&dp),
- sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
- &in_gre_protosw : &in_mobile_protosw, sc);
- if (sc->encap == NULL)
- printf("%s: unable to attach encap\n",
- if_name(GRE2IFP(sc)));
-#endif
- if (sc->route.ro_rt != 0) /* free old route */
- RTFREE(sc->route.ro_rt);
- if (gre_compute_route(sc) == 0)
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- else
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- }
- break;
- case GREGADDRS:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
- break;
- case GREGADDRD:
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- sa = sintosa(&si);
- error = prison_if(curthread->td_ucred, sa);
- if (error != 0)
- break;
- ifr->ifr_addr = *sa;
- break;
- case SIOCSIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- if (aifr->ifra_addr.sin_family != AF_INET ||
- aifr->ifra_dstaddr.sin_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (aifr->ifra_addr.sin_len != sizeof(si) ||
- aifr->ifra_dstaddr.sin_len != sizeof(si)) {
- error = EINVAL;
- break;
- }
- sc->g_src = aifr->ifra_addr.sin_addr;
- sc->g_dst = aifr->ifra_dstaddr.sin_addr;
- goto recompute;
- case SIOCSLIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- if (lifr->addr.ss_family != AF_INET ||
- lifr->dstaddr.ss_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- if (lifr->addr.ss_len != sizeof(si) ||
- lifr->dstaddr.ss_len != sizeof(si)) {
- error = EINVAL;
- break;
- }
- sc->g_src = (satosin(&lifr->addr))->sin_addr;
- sc->g_dst =
- (satosin(&lifr->dstaddr))->sin_addr;
- goto recompute;
- case SIOCDIFPHYADDR:
- /*
- * XXXRW: Isn't this priv_check() redundant to the ifnet
- * layer check?
- */
- if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
- break;
- sc->g_src.s_addr = INADDR_ANY;
- sc->g_dst.s_addr = INADDR_ANY;
- goto recompute;
- case SIOCGLIFPHYADDR:
- if (sc->g_src.s_addr == INADDR_ANY ||
- sc->g_dst.s_addr == INADDR_ANY) {
+ if (sc->gre_family == 0) {
error = EADDRNOTAVAIL;
break;
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+ GRE_RLOCK(sc);
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ if (sc->gre_family != AF_INET) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin = (struct sockaddr_in *)&ifr->ifr_addr;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
break;
- memcpy(&lifr->addr, &si, sizeof(si));
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+#endif
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gre_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ sin6 = (struct sockaddr_in6 *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
break;
- memcpy(&lifr->dstaddr, &si, sizeof(si));
- break;
- case SIOCGIFPSRCADDR:
+#endif
+ }
+ if (error == 0) {
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ sin->sin_addr = sc->gre_oip.ip_src;
+ break;
+ case SIOCGIFPDSTADDR:
+ sin->sin_addr = sc->gre_oip.ip_dst;
+ break;
+#endif
#ifdef INET6
- case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPSRCADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_src;
+ break;
+ case SIOCGIFPDSTADDR_IN6:
+ sin6->sin6_addr = sc->gre_oip6.ip6_dst;
+ break;
#endif
- if (sc->g_src.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
- break;
+ }
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_src.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
+ GRE_RUNLOCK(sc);
if (error != 0)
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
- break;
- case SIOCGIFPDSTADDR:
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ case SIOCGIFPDSTADDR:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin);
+ if (error != 0)
+ memset(sin, 0, sizeof(*sin));
+ break;
+#endif
#ifdef INET6
- case SIOCGIFPDSTADDR_IN6:
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ error = prison_if(curthread->td_ucred,
+ (struct sockaddr *)sin6);
+ if (error == 0)
+ error = sa6_recoverscope(sin6);
+ if (error != 0)
+ memset(sin6, 0, sizeof(*sin6));
#endif
- if (sc->g_dst.s_addr == INADDR_ANY) {
- error = EADDRNOTAVAIL;
- break;
}
- memset(&si, 0, sizeof(si));
- si.sin_family = AF_INET;
- si.sin_len = sizeof(struct sockaddr_in);
- si.sin_addr.s_addr = sc->g_dst.s_addr;
- error = prison_if(curthread->td_ucred, (struct sockaddr *)&si);
- if (error != 0)
+ break;
+ case SIOCGTUNFIB:
+ ifr->ifr_fib = sc->gre_fibnum;
+ break;
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
+ if (ifr->ifr_fib >= rt_numfibs)
+ error = EINVAL;
+ else
+ sc->gre_fibnum = ifr->ifr_fib;
break;
case GRESKEY:
- error = priv_check(curthread, PRIV_NET_GRE);
- if (error)
- break;
- error = copyin(ifr->ifr_data, &key, sizeof(key));
- if (error)
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
break;
- /* adjust MTU for option header */
- if (key == 0 && sc->key != 0) /* clear */
- adj += sizeof(key);
- else if (key != 0 && sc->key == 0) /* set */
- adj -= sizeof(key);
-
- if (ifp->if_mtu + adj < 576) {
- error = EINVAL;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
break;
+ if (sc->gre_key != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_key = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
}
- ifp->if_mtu += adj;
- sc->key = key;
break;
case GREGKEY:
- error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key));
+ error = copyout(&sc->gre_key, ifr->ifr_data,
+ sizeof(sc->gre_key));
+ break;
+ case GRESOPTS:
+ if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
+ break;
+ if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
+ break;
+ if (opt & ~GRE_OPTMASK)
+ error = EINVAL;
+ else {
+ if (sc->gre_options != opt) {
+ GRE_WLOCK(sc);
+ sc->gre_options = opt;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+ }
+ }
break;
+ case GREGOPTS:
+ error = copyout(&sc->gre_options, ifr->ifr_data,
+ sizeof(sc->gre_options));
+ break;
default:
error = EINVAL;
break;
}
-
- splx(s);
+end:
+ sx_xunlock(&gre_ioctl_sx);
return (error);
}
-/*
- * computes a route to our destination that is not the one
- * which would be taken by ip_output(), as this one will loop back to
- * us. If the interface is p2p as a--->b, then a routing entry exists
- * If we now send a packet to b (e.g. ping b), this will come down here
- * gets src=a, dst=b tacked on and would from ip_output() sent back to
- * if_gre.
- * Goal here is to compute a route to b that is less specific than
- * a-->b. We know that this one exists as in normal operation we have
- * at least a default route which matches.
- */
+static void
+gre_updatehdr(struct gre_softc *sc)
+{
+ struct grehdr *gh = NULL;
+ uint32_t *opts;
+ uint16_t flags;
+
+ GRE_WLOCK_ASSERT(sc);
+ switch (sc->gre_family) {
+#ifdef INET
+ case AF_INET:
+ sc->gre_hlen = sizeof(struct greip);
+ sc->gre_oip.ip_v = IPPROTO_IPV4;
+ sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
+ sc->gre_oip.ip_p = IPPROTO_GRE;
+ gh = &sc->gre_gihdr->gi_gre;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sc->gre_hlen = sizeof(struct greip6);
+ sc->gre_oip6.ip6_vfc = IPV6_VERSION;
+ sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
+ gh = &sc->gre_gi6hdr->gi6_gre;
+ break;
+#endif
+ default:
+ return;
+ }
+ flags = 0;
+ opts = gh->gre_opts;
+ if (sc->gre_options & GRE_ENABLE_CSUM) {
+ flags |= GRE_FLAGS_CP;
+ sc->gre_hlen += 2 * sizeof(uint16_t);
+ *opts++ = 0;
+ }
+ if (sc->gre_key != 0) {
+ flags |= GRE_FLAGS_KP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = htonl(sc->gre_key);
+ }
+ if (sc->gre_options & GRE_ENABLE_SEQ) {
+ flags |= GRE_FLAGS_SP;
+ sc->gre_hlen += sizeof(uint32_t);
+ *opts++ = 0;
+ } else
+ sc->gre_oseq = 0;
+ gh->gre_flags = htons(flags);
+ GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
+}
+
+static void
+gre_detach(struct gre_softc *sc)
+{
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ if (sc->gre_ecookie != NULL)
+ encap_detach(sc->gre_ecookie);
+ sc->gre_ecookie = NULL;
+}
+
static int
-gre_compute_route(struct gre_softc *sc)
+gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
+ struct sockaddr *dst)
{
- struct route *ro;
-
- ro = &sc->route;
-
- memset(ro, 0, sizeof(struct route));
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
- ro->ro_dst.sa_family = AF_INET;
- ro->ro_dst.sa_len = sizeof(ro->ro_dst);
-
- /*
- * toggle last bit, so our interface is not found, but a less
- * specific route. I'd rather like to specify a shorter mask,
- * but this is not possible. Should work though. XXX
- * XXX MRT Use a different FIB for the tunnel to solve this problem.
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
- htonl(0x01);
+ struct gre_softc *sc, *tsc;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+#ifdef INET
+ struct ip *ip;
+#endif
+ void *hdr;
+ int error;
+
+ sx_assert(&gre_ioctl_sx, SA_XLOCKED);
+ GRE_LIST_LOCK();
+ sc = ifp->if_softc;
+ LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
+ if (tsc == sc || tsc->gre_family != src->sa_family)
+ continue;
+#ifdef INET
+ if (tsc->gre_family == AF_INET &&
+ tsc->gre_oip.ip_src.s_addr ==
+ satosin(src)->sin_addr.s_addr &&
+ tsc->gre_oip.ip_dst.s_addr ==
+ satosin(dst)->sin_addr.s_addr) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
+#ifdef INET6
+ if (tsc->gre_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
+ &satosin6(src)->sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
+ &satosin6(dst)->sin6_addr)) {
+ GRE_LIST_UNLOCK();
+ return (EADDRNOTAVAIL);
+ }
+#endif
}
+ GRE_LIST_UNLOCK();
-#ifdef DIAGNOSTIC
- printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
- inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ hdr = ip = malloc(sizeof(struct greip) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip->ip_src = satosin(src)->sin_addr;
+ ip->ip_dst = satosin(dst)->sin_addr;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ hdr = ip6 = malloc(sizeof(struct greip6) +
+ 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
+ ip6->ip6_src = satosin6(src)->sin6_addr;
+ ip6->ip6_dst = satosin6(dst)->sin6_addr;
+ break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+ if (sc->gre_family != 0)
+ gre_detach(sc);
+ GRE_WLOCK(sc);
+ if (sc->gre_family != 0)
+ free(sc->gre_hdr, M_GRE);
+ sc->gre_family = src->sa_family;
+ sc->gre_hdr = hdr;
+ sc->gre_oseq = 0;
+ sc->gre_iseq = UINT32_MAX;
+ gre_updatehdr(sc);
+ GRE_WUNLOCK(sc);
+
+ error = 0;
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_attach(sc);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_attach(sc);
+ break;
#endif
+ }
+ if (error == 0) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_UP);
+ }
+ return (error);
+}
- rtalloc_fib(ro, sc->gre_fibnum);
+static void
+gre_delete_tunnel(struct ifnet *ifp)
+{
+ struct gre_softc *sc = ifp->if_softc;
+ int family;
+
+ GRE_WLOCK(sc);
+ family = sc->gre_family;
+ sc->gre_family = 0;
+ GRE_WUNLOCK(sc);
+ if (family != 0) {
+ gre_detach(sc);
+ free(sc->gre_hdr, M_GRE);
+ }
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+}
- /*
- * check if this returned a route at all and this route is no
- * recursion to ourself
- */
- if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
-#ifdef DIAGNOSTIC
- if (ro->ro_rt == NULL)
- printf(" - no route found!\n");
- else
- printf(" - route loops back to ourself!\n");
+int
+gre_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t *opts;
+#ifdef notyet
+ uint32_t key;
+#endif
+ uint16_t flags;
+ int hlen, isr, af;
+
+ m = *mp;
+ sc = encap_getarg(m);
+ KASSERT(sc != NULL, ("encap_getarg returned NULL"));
+
+ ifp = GRE2IFP(sc);
+ hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
+ if (m->m_pkthdr.len < hlen)
+ goto drop;
+ if (m->m_len < hlen) {
+ m = m_pullup(m, hlen);
+ if (m == NULL)
+ goto drop;
+ }
+ gh = (struct grehdr *)mtodo(m, *offp);
+ flags = ntohs(gh->gre_flags);
+ if (flags & ~GRE_FLAGS_MASK)
+ goto drop;
+ opts = gh->gre_opts;
+ hlen = 2 * sizeof(uint16_t);
+ if (flags & GRE_FLAGS_CP) {
+ /* reserved1 field must be zero */
+ if (((uint16_t *)opts)[1] != 0)
+ goto drop;
+ if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
+ goto drop;
+ hlen += 2 * sizeof(uint16_t);
+ opts++;
+ }
+ if (flags & GRE_FLAGS_KP) {
+#ifdef notyet
+ /*
+ * XXX: The current implementation uses the key only for outgoing
+ * packets. But we can check the key value here, or even in the
+ * encapcheck function.
+ */
+ key = ntohl(*opts);
+#endif
+ hlen += sizeof(uint32_t);
+ opts++;
+ }
+#ifdef notyet
+ } else
+ key = 0;
+
+ if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
+ goto drop;
+#endif
+ if (flags & GRE_FLAGS_SP) {
+#ifdef notyet
+ seq = ntohl(*opts);
+#endif
+ hlen += sizeof(uint32_t);
+ }
+ switch (ntohs(gh->gre_proto)) {
+ case ETHERTYPE_WCCP:
+ /*
+ * For WCCP skip an additional 4 bytes if after GRE header
+ * doesn't follow an IP header.
+ */
+ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
+ hlen += sizeof(uint32_t);
+ /* FALLTHROUGH */
+ case ETHERTYPE_IP:
+ isr = NETISR_IP;
+ af = AF_INET;
+ break;
+ case ETHERTYPE_IPV6:
+ isr = NETISR_IPV6;
+ af = AF_INET6;
+ break;
+ default:
+ goto drop;
+ }
+ m_adj(m, *offp + hlen);
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, ifp->if_fib);
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
#endif
- return EADDRNOTAVAIL;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if ((ifp->if_flags & IFF_MONITOR) != 0)
+ m_freem(m);
+ else
+ netisr_dispatch(isr, m);
+ return (IPPROTO_DONE);
+drop:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ return (IPPROTO_DONE);
+}
+
+#define MTAG_GRE 1307983903
+static int
+gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ int count;
+
+ count = 1;
+ mtag = NULL;
+ while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
+ if (*(struct ifnet **)(mtag + 1) == ifp) {
+ log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
+ return (EIO);
+ }
+ count++;
+ }
+ if (count > V_max_gre_nesting) {
+ log(LOG_NOTICE,
+ "%s: if_output recursively called too many times(%d)\n",
+ ifp->if_xname, count);
+ return (EIO);
}
+ mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ *(struct ifnet **)(mtag + 1) = ifp;
+ m_tag_prepend(m, mtag);
+ return (0);
+}
- /*
- * now change it back - else ip_output will just drop
- * the route and search one to this interface ...
- */
- if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
- ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
+static int
+gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint32_t af;
+ int error;
-#ifdef DIAGNOSTIC
- printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
- inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
- printf("\n");
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error != 0)
+ goto drop;
#endif
+ if ((ifp->if_flags & IFF_MONITOR) != 0 ||
+ (ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto drop;
+ }
+
+ error = gre_check_nesting(ifp, m);
+ if (error != 0)
+ goto drop;
- return 0;
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ m->m_pkthdr.csum_data = af; /* save af for if_transmit */
+ return (ifp->if_transmit(ifp, m));
+drop:
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
}
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static void
+gre_setseqn(struct grehdr *gh, uint32_t seq)
{
- u_int32_t sum = 0;
- int nwords = len >> 1;
-
- while (nwords-- != 0)
- sum += *p++;
-
- if (len & 1) {
- union {
- u_short w;
- u_char c[2];
- } u;
- u.c[0] = *(u_char *)p;
- u.c[1] = 0;
- sum += u.w;
+ uint32_t *opts;
+ uint16_t flags;
+
+ opts = gh->gre_opts;
+ flags = ntohs(gh->gre_flags);
+ KASSERT((flags & GRE_FLAGS_SP) != 0,
+ ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
+ if (flags & GRE_FLAGS_CP)
+ opts++;
+ if (flags & GRE_FLAGS_KP)
+ opts++;
+ *opts = htonl(seq);
+}
+
+static int
+gre_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ GRE_RLOCK_TRACKER;
+ struct gre_softc *sc;
+ struct grehdr *gh;
+ uint32_t iaf, oaf, oseq;
+ int error, hlen, olen, plen;
+ int want_seq, want_csum;
+
+ plen = 0;
+ sc = ifp->if_softc;
+ if (sc == NULL) {
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0) {
+ GRE_RUNLOCK(sc);
+ error = ENETDOWN;
+ m_freem(m);
+ goto drop;
+ }
+ iaf = m->m_pkthdr.csum_data;
+ oaf = sc->gre_family;
+ hlen = sc->gre_hlen;
+ want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
+ if (want_seq)
+ oseq = sc->gre_oseq++; /* XXX */
+ else
+ oseq = 0; /* Make compiler happy. */
+ want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
+ M_SETFIB(m, sc->gre_fibnum);
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ GRE_RUNLOCK(sc);
+ error = ENOBUFS;
+ goto drop;
+ }
+ bcopy(sc->gre_hdr, mtod(m, void *), hlen);
+ GRE_RUNLOCK(sc);
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ olen = sizeof(struct ip);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ olen = sizeof(struct ip6_hdr);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
}
+ gh = (struct grehdr *)mtodo(m, olen);
+ switch (iaf) {
+#ifdef INET
+ case AF_INET:
+ gh->gre_proto = htons(ETHERTYPE_IP);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ gh->gre_proto = htons(ETHERTYPE_IPV6);
+ break;
+#endif
+ default:
+ error = ENETDOWN;
+ goto drop;
+ }
+ if (want_seq)
+ gre_setseqn(gh, oseq);
+ if (want_csum) {
+ *(uint16_t *)gh->gre_opts = in_cksum_skip(m,
+ m->m_pkthdr.len, olen);
+ }
+ plen = m->m_pkthdr.len - hlen;
+ switch (oaf) {
+#ifdef INET
+ case AF_INET:
+ error = in_gre_output(m, iaf, hlen);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ error = in6_gre_output(m, iaf, hlen);
+ break;
+#endif
+ default:
+ m_freem(m);
+ error = ENETDOWN;
+ }
+drop:
+ if (error)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ else {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ }
+ return (error);
+}
+
+static void
+gre_qflush(struct ifnet *ifp __unused)
+{
- /* end-around-carry */
- sum = (sum >> 16) + (sum & 0xffff);
- sum += (sum >> 16);
- return (~sum);
}
static int
@@ -970,16 +989,12 @@ gremodevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
- greattach();
- break;
case MOD_UNLOAD:
- if_clone_detach(&gre_cloner);
- mtx_destroy(&gre_mtx);
break;
default:
- return EOPNOTSUPP;
+ return (EOPNOTSUPP);
}
- return 0;
+ return (0);
}
static moduledata_t gre_mod = {
diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h
index 74d16b1c..806b0cb8 100644
--- a/freebsd/sys/net/if_gre.h
+++ b/freebsd/sys/net/if_gre.h
@@ -1,8 +1,6 @@
-/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -28,158 +26,111 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $
+ * $FreeBSD$
*/
-#ifndef _NET_IF_GRE_H
-#define _NET_IF_GRE_H
+#ifndef _NET_IF_GRE_H_
+#define _NET_IF_GRE_H_
-#include <sys/ioccom.h>
#ifdef _KERNEL
-#include <sys/queue.h>
-
-/*
- * Version of the WCCP, need to be configured manually since
- * header for version 2 is the same but IP payload is prepended
- * with additional 4-bytes field.
- */
-typedef enum {
- WCCP_V1 = 0,
- WCCP_V2
-} wccp_ver_t;
-
-struct gre_softc {
- struct ifnet *sc_ifp;
- LIST_ENTRY(gre_softc) sc_list;
- int gre_unit;
- int gre_flags;
- u_int gre_fibnum; /* use this fib for envelopes */
- struct in_addr g_src; /* source address of gre packets */
- struct in_addr g_dst; /* destination address of gre packets */
- struct route route; /* routing entry that determines, where a
- encapsulated packet should go */
- u_char g_proto; /* protocol of encapsulator */
-
- const struct encaptab *encap; /* encapsulation cookie */
-
- uint32_t key; /* key included in outgoing GRE packets */
- /* zero means none */
-
- wccp_ver_t wccp_ver; /* version of the WCCP */
-};
-#define GRE2IFP(sc) ((sc)->sc_ifp)
-
-
-struct gre_h {
- u_int16_t flags; /* GRE flags */
- u_int16_t ptype; /* protocol type of payload typically
- Ether protocol type*/
- uint32_t options[0]; /* optional options */
-/*
- * from here on: fields are optional, presence indicated by flags
- *
- u_int_16 checksum checksum (one-complements of GRE header
- and payload
- Present if (ck_pres | rt_pres == 1).
- Valid if (ck_pres == 1).
- u_int_16 offset offset from start of routing filed to
- first octet of active SRE (see below).
- Present if (ck_pres | rt_pres == 1).
- Valid if (rt_pres == 1).
- u_int_32 key inserted by encapsulator e.g. for
- authentication
- Present if (key_pres ==1 ).
- u_int_32 seq_num Sequence number to allow for packet order
- Present if (seq_pres ==1 ).
- struct gre_sre[] routing Routing fileds (see below)
- Present if (rt_pres == 1)
- */
+/* GRE header according to RFC 2784 and RFC 2890 */
+struct grehdr {
+ uint16_t gre_flags; /* GRE flags */
+#define GRE_FLAGS_CP 0x8000 /* checksum present */
+#define GRE_FLAGS_KP 0x2000 /* key present */
+#define GRE_FLAGS_SP 0x1000 /* sequence present */
+#define GRE_FLAGS_MASK (GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP)
+ uint16_t gre_proto; /* protocol type */
+ uint32_t gre_opts[0]; /* optional fields */
} __packed;
+#ifdef INET
struct greip {
- struct ip gi_i;
- struct gre_h gi_g;
+ struct ip gi_ip;
+ struct grehdr gi_gre;
} __packed;
+#endif
-#define gi_pr gi_i.ip_p
-#define gi_len gi_i.ip_len
-#define gi_src gi_i.ip_src
-#define gi_dst gi_i.ip_dst
-#define gi_ptype gi_g.ptype
-#define gi_flags gi_g.flags
-#define gi_options gi_g.options
-
-#define GRE_CP 0x8000 /* Checksum Present */
-#define GRE_RP 0x4000 /* Routing Present */
-#define GRE_KP 0x2000 /* Key Present */
-#define GRE_SP 0x1000 /* Sequence Present */
-#define GRE_SS 0x0800 /* Strict Source Route */
+#ifdef INET6
+struct greip6 {
+ struct ip6_hdr gi6_ip6;
+ struct grehdr gi6_gre;
+} __packed;
+#endif
+struct gre_softc {
+ struct ifnet *gre_ifp;
+ LIST_ENTRY(gre_softc) gre_list;
+ struct rmlock gre_lock;
+ int gre_family; /* AF of delivery header */
+ uint32_t gre_iseq;
+ uint32_t gre_oseq;
+ uint32_t gre_key;
+ uint32_t gre_options;
+ uint32_t gre_mtu;
+ u_int gre_fibnum;
+ u_int gre_hlen; /* header size */
+ union {
+ void *hdr;
+#ifdef INET
+ struct greip *gihdr;
+#endif
+#ifdef INET6
+ struct greip6 *gi6hdr;
+#endif
+ } gre_uhdr;
+ const struct encaptab *gre_ecookie;
+};
+#define GRE2IFP(sc) ((sc)->gre_ifp)
+#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc")
+#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock)
+#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker
+#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker)
+#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED)
+#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock)
+#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock)
+#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED)
+
+#define gre_hdr gre_uhdr.hdr
+#define gre_gihdr gre_uhdr.gihdr
+#define gre_gi6hdr gre_uhdr.gi6hdr
+#define gre_oip gre_gihdr->gi_ip
+#define gre_oip6 gre_gi6hdr->gi6_ip6
+
+int gre_input(struct mbuf **, int *, int);
+#ifdef INET
+int in_gre_attach(struct gre_softc *);
+int in_gre_output(struct mbuf *, int, int);
+#endif
+#ifdef INET6
+int in6_gre_attach(struct gre_softc *);
+int in6_gre_output(struct mbuf *, int, int);
+#endif
/*
* CISCO uses special type for GRE tunnel created as part of WCCP
* connection, while in fact those packets are just IPv4 encapsulated
* into GRE.
*/
-#define WCCP_PROTOCOL_TYPE 0x883E
-
-/*
- * gre_sre defines a Source route Entry. These are needed if packets
- * should be routed over more than one tunnel hop by hop
- */
-struct gre_sre {
- u_int16_t sre_family; /* address family */
- u_char sre_offset; /* offset to first octet of active entry */
- u_char sre_length; /* number of octets in the SRE.
- sre_lengthl==0 -> last entry. */
- u_char *sre_rtinfo; /* the routing information */
-};
-
-struct greioctl {
- int unit;
- struct in_addr addr;
-};
-
-/* for mobile encaps */
-
-struct mobile_h {
- u_int16_t proto; /* protocol and S-bit */
- u_int16_t hcrc; /* header checksum */
- u_int32_t odst; /* original destination address */
- u_int32_t osrc; /* original source addr, if S-bit set */
-} __packed;
-
-struct mobip_h {
- struct ip mi;
- struct mobile_h mh;
-} __packed;
-
-
-#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t))
-#define MOB_H_SIZ_L (sizeof(struct mobile_h))
-#define MOB_H_SBIT 0x0080
-
-#define GRE_TTL 30
-
+#define ETHERTYPE_WCCP 0x883E
#endif /* _KERNEL */
-/*
- * ioctls needed to manipulate the interface
- */
-
#define GRESADDRS _IOW('i', 101, struct ifreq)
#define GRESADDRD _IOW('i', 102, struct ifreq)
#define GREGADDRS _IOWR('i', 103, struct ifreq)
#define GREGADDRD _IOWR('i', 104, struct ifreq)
#define GRESPROTO _IOW('i' , 105, struct ifreq)
#define GREGPROTO _IOWR('i', 106, struct ifreq)
-#define GREGKEY _IOWR('i', 107, struct ifreq)
-#define GRESKEY _IOW('i', 108, struct ifreq)
-#ifdef _KERNEL
-LIST_HEAD(gre_softc_head, gre_softc);
-extern struct mtx gre_mtx;
-extern struct gre_softc_head gre_softc_list;
+#define GREGKEY _IOWR('i', 107, struct ifreq)
+#define GRESKEY _IOW('i', 108, struct ifreq)
+#define GREGOPTS _IOWR('i', 109, struct ifreq)
+#define GRESOPTS _IOW('i', 110, struct ifreq)
-u_int16_t gre_in_cksum(u_int16_t *, u_int);
-#endif /* _KERNEL */
+#define GRE_ENABLE_CSUM 0x0001
+#define GRE_ENABLE_SEQ 0x0002
+#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ)
-#endif
+#endif /* _NET_IF_GRE_H_ */
diff --git a/freebsd/sys/net/if_iso88025subr.c b/freebsd/sys/net/if_iso88025subr.c
index 660dc7dd..d26d0ebd 100644
--- a/freebsd/sys/net/if_iso88025subr.c
+++ b/freebsd/sys/net/if_iso88025subr.c
@@ -44,7 +44,6 @@
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -56,6 +55,7 @@
#include <sys/sockio.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_llc.h>
@@ -77,11 +77,6 @@
#include <netinet6/nd6.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#include <security/mac/mac_framework.h>
static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
@@ -172,30 +167,6 @@ iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
arp_ifinit(ifp, ifa);
break;
#endif /* INET */
-#ifdef IPX
- /*
- * XXX - This code is probably wrong
- */
- case AF_IPX: {
- struct ipx_addr *ina;
-
- ina = &(IA_SIPX(ifa)->sipx_addr);
-
- if (ipx_nullhost(*ina))
- ina->x_host = *(union ipx_host *)
- IF_LLADDR(ifp);
- else
- bcopy((caddr_t) ina->x_host.c_host,
- (caddr_t) IF_LLADDR(ifp),
- ISO88025_ADDR_LEN);
-
- /*
- * Set new address
- */
- ifp->if_init(ifp->if_softc);
- }
- break;
-#endif /* IPX */
default:
ifp->if_init(ifp->if_softc);
break;
@@ -233,11 +204,8 @@ iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
* ISO88025 encapsulation
*/
int
-iso88025_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
u_int16_t snap_type = 0;
int loop_copy = 0, error = 0, rif_len = 0;
@@ -246,13 +214,10 @@ iso88025_output(ifp, m, dst, ro)
struct iso88025_header gen_th;
struct sockaddr_dl *sdl = NULL;
struct rtentry *rt0 = NULL;
-#if defined(INET) || defined(INET6)
- struct llentry *lle;
-#endif
+ int is_gw = 0;
if (ro != NULL)
- rt0 = ro->ro_rt;
-
+ is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error)
@@ -291,7 +256,7 @@ iso88025_output(ifp, m, dst, ro)
switch (dst->sa_family) {
#ifdef INET
case AF_INET:
- error = arpresolve(ifp, rt0, m, dst, edst, &lle);
+ error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
snap_type = ETHERTYPE_IP;
@@ -326,34 +291,15 @@ iso88025_output(ifp, m, dst, ro)
#endif /* INET */
#ifdef INET6
case AF_INET6:
- error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
+ error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
if (error)
- return (error);
+ return (error == EWOULDBLOCK ? 0 : error);
snap_type = ETHERTYPE_IPV6;
break;
#endif /* INET6 */
-#ifdef IPX
- case AF_IPX:
- {
- u_int8_t *cp;
-
- bcopy((caddr_t)&(satoipx_addr(dst).x_host), (caddr_t)edst,
- ISO88025_ADDR_LEN);
-
- M_PREPEND(m, 3, M_WAIT);
- m = m_pullup(m, 3);
- if (m == 0)
- senderr(ENOBUFS);
- cp = mtod(m, u_int8_t *);
- *cp++ = ETHERTYPE_IPX_8022;
- *cp++ = ETHERTYPE_IPX_8022;
- *cp++ = LLC_UI;
- }
- break;
-#endif /* IPX */
case AF_UNSPEC:
{
- struct iso88025_sockaddr_data *sd;
+ const struct iso88025_sockaddr_data *sd;
/*
* For AF_UNSPEC sockaddr.sa_data must contain all of the
* mac information needed to send the packet. This allows
@@ -363,13 +309,12 @@ iso88025_output(ifp, m, dst, ro)
* should be an iso88025_sockaddr_data structure see iso88025.h
*/
loop_copy = -1;
- sd = (struct iso88025_sockaddr_data *)dst->sa_data;
+ sd = (const struct iso88025_sockaddr_data *)dst->sa_data;
gen_th.ac = sd->ac;
gen_th.fc = sd->fc;
- (void)memcpy((caddr_t)edst, (caddr_t)sd->ether_dhost,
- ISO88025_ADDR_LEN);
- (void)memcpy((caddr_t)gen_th.iso88025_shost,
- (caddr_t)sd->ether_shost, ISO88025_ADDR_LEN);
+ (void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN);
+ (void)memcpy(gen_th.iso88025_shost, sd->ether_shost,
+ ISO88025_ADDR_LEN);
rif_len = 0;
break;
}
@@ -384,8 +329,8 @@ iso88025_output(ifp, m, dst, ro)
*/
if (snap_type != 0) {
struct llc *l;
- M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
l = mtod(m, struct llc *);
l->llc_control = LLC_UI;
@@ -400,8 +345,8 @@ iso88025_output(ifp, m, dst, ro)
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_DONTWAIT);
- if (m == 0)
+ M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT);
+ if (m == NULL)
senderr(ENOBUFS);
th = mtod(m, struct iso88025_header *);
bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN);
@@ -435,12 +380,12 @@ iso88025_output(ifp, m, dst, ro)
IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error);
if (error) {
printf("iso88025_output: packet dropped QFULL.\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
return (error);
bad:
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
if (m)
m_freem(m);
return (error);
@@ -465,24 +410,23 @@ iso88025_input(ifp, m)
*/
if ((m->m_flags & M_PKTHDR) == 0) {
if_printf(ifp, "discard frame w/o packet header\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
if (m->m_pkthdr.rcvif == NULL) {
if_printf(ifp, "discard frame w/o interface pointer\n");
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
m_freem(m);
return;
}
m = m_pullup(m, ISO88025_HDR_LEN);
if (m == NULL) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
th = mtod(m, struct iso88025_header *);
- m->m_pkthdr.header = (void *)th;
/*
* Discard packet if interface is not up.
@@ -511,7 +455,7 @@ iso88025_input(ifp, m)
/*
* Update interface statistics.
*/
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
getmicrotime(&ifp->if_lastchange);
/*
@@ -533,7 +477,7 @@ iso88025_input(ifp, m)
m->m_flags |= M_BCAST;
else
m->m_flags |= M_MCAST;
- ifp->if_imcasts++;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
}
mac_hdr_len = ISO88025_HDR_LEN;
@@ -546,37 +490,24 @@ iso88025_input(ifp, m)
m = m_pullup(m, LLC_SNAPFRAMELEN);
if (m == 0) {
- ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
goto dropanyway;
}
l = mtod(m, struct llc *);
switch (l->llc_dsap) {
-#ifdef IPX
- case ETHERTYPE_IPX_8022: /* Thanks a bunch Novell */
- if ((l->llc_control != LLC_UI) ||
- (l->llc_ssap != ETHERTYPE_IPX_8022)) {
- ifp->if_noproto++;
- goto dropanyway;
- }
-
- th->iso88025_shost[0] &= ~(TR_RII);
- m_adj(m, 3);
- isr = NETISR_IPX;
- break;
-#endif /* IPX */
case LLC_SNAP_LSAP: {
u_int16_t type;
if ((l->llc_control != LLC_UI) ||
(l->llc_ssap != LLC_SNAP_LSAP)) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
if (l->llc_snap.org_code[0] != 0 ||
l->llc_snap.org_code[1] != 0 ||
l->llc_snap.org_code[2] != 0) {
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
@@ -586,8 +517,6 @@ iso88025_input(ifp, m)
#ifdef INET
case ETHERTYPE_IP:
th->iso88025_shost[0] &= ~(TR_RII);
- if ((m = ip_fastforward(m)) == NULL)
- return;
isr = NETISR_IP;
break;
@@ -597,12 +526,6 @@ iso88025_input(ifp, m)
isr = NETISR_ARP;
break;
#endif /* INET */
-#ifdef IPX_SNAP /* XXX: Not supported! */
- case ETHERTYPE_IPX:
- th->iso88025_shost[0] &= ~(TR_RII);
- isr = NETISR_IPX;
- break;
-#endif /* IPX_SNAP */
#ifdef INET6
case ETHERTYPE_IPV6:
th->iso88025_shost[0] &= ~(TR_RII);
@@ -611,7 +534,7 @@ iso88025_input(ifp, m)
#endif /* INET6 */
default:
printf("iso88025_input: unexpected llc_snap ether_type 0x%02x\n", type);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
}
break;
@@ -620,7 +543,7 @@ iso88025_input(ifp, m)
case LLC_ISO_LSAP:
switch (l->llc_control) {
case LLC_UI:
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
break;
case LLC_XID:
@@ -636,7 +559,6 @@ iso88025_input(ifp, m)
case LLC_TEST_P:
{
struct sockaddr sa;
- struct arpcom *ac;
struct iso88025_sockaddr_data *th2;
int i;
u_char c;
@@ -669,7 +591,7 @@ iso88025_input(ifp, m)
}
default:
printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
break;
}
@@ -677,7 +599,7 @@ iso88025_input(ifp, m)
#endif /* ISO */
default:
printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto dropanyway;
break;
}
@@ -687,7 +609,7 @@ iso88025_input(ifp, m)
return;
dropanyway:
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
if (m)
m_freem(m);
return;
@@ -718,7 +640,7 @@ iso88025_resolvemulti (ifp, llsa, sa)
if ((e_addr[0] & 1) != 1) {
return (EADDRNOTAVAIL);
}
- *llsa = 0;
+ *llsa = NULL;
return (0);
#ifdef INET
@@ -727,14 +649,7 @@ iso88025_resolvemulti (ifp, llsa, sa)
if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
return (EADDRNOTAVAIL);
}
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ISO88025;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
sdl->sdl_alen = ISO88025_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
@@ -751,20 +666,13 @@ iso88025_resolvemulti (ifp, llsa, sa)
* (This is used for multicast routers.)
*/
ifp->if_flags |= IFF_ALLMULTI;
- *llsa = 0;
+ *llsa = NULL;
return (0);
}
if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
return (EADDRNOTAVAIL);
}
- sdl = malloc(sizeof *sdl, M_IFMADDR,
- M_NOWAIT|M_ZERO);
- if (sdl == NULL)
- return (ENOMEM);
- sdl->sdl_len = sizeof *sdl;
- sdl->sdl_family = AF_LINK;
- sdl->sdl_index = ifp->if_index;
- sdl->sdl_type = IFT_ISO88025;
+ sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
sdl->sdl_alen = ISO88025_ADDR_LEN;
e_addr = LLADDR(sdl);
ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
@@ -783,49 +691,8 @@ iso88025_resolvemulti (ifp, llsa, sa)
return (0);
}
-static MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals");
-
-static void*
-iso88025_alloc(u_char type, struct ifnet *ifp)
-{
- struct arpcom *ac;
-
- ac = malloc(sizeof(struct arpcom), M_ISO88025, M_WAITOK | M_ZERO);
- ac->ac_ifp = ifp;
-
- return (ac);
-}
-
-static void
-iso88025_free(void *com, u_char type)
-{
-
- free(com, M_ISO88025);
-}
-
-static int
-iso88025_modevent(module_t mod, int type, void *data)
-{
-
- switch (type) {
- case MOD_LOAD:
- if_register_com_alloc(IFT_ISO88025, iso88025_alloc,
- iso88025_free);
- break;
- case MOD_UNLOAD:
- if_deregister_com_alloc(IFT_ISO88025);
- break;
- default:
- return EOPNOTSUPP;
- }
-
- return (0);
-}
-
static moduledata_t iso88025_mod = {
- "iso88025",
- iso88025_modevent,
- 0
+ .name = "iso88025",
};
DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index 46f3f46c..9cfb7b8b 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -5,6 +5,7 @@
/*
* Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
* Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
+ * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -37,9 +38,8 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/systm.h>
#include <sys/proc.h>
-#include <sys/hash.h>
#include <rtems/bsd/sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/taskqueue.h>
#include <sys/eventhandler.h>
@@ -48,11 +48,11 @@ __FBSDID("$FreeBSD$");
#include <net/if_clone.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
-#include <net/if_llc.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/bpf.h>
+#include <net/vnet.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
@@ -83,15 +83,26 @@ static struct {
{0, NULL}
};
-SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */
-static struct mtx lagg_list_mtx;
+VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
+#define V_lagg_list VNET(lagg_list)
+static VNET_DEFINE(struct mtx, lagg_list_mtx);
+#define V_lagg_list_mtx VNET(lagg_list_mtx)
+#define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \
+ "if_lagg list", NULL, MTX_DEF)
+#define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx)
+#define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx)
+#define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx)
eventhandler_tag lagg_detach_cookie = NULL;
static int lagg_clone_create(struct if_clone *, int, caddr_t);
static void lagg_clone_destroy(struct ifnet *);
+static VNET_DEFINE(struct if_clone *, lagg_cloner);
+#define V_lagg_cloner VNET(lagg_cloner)
+static const char laggname[] = "lagg";
+
static void lagg_lladdr(struct lagg_softc *, uint8_t *);
static void lagg_capabilities(struct lagg_softc *);
-static void lagg_port_lladdr(struct lagg_port *, uint8_t *);
+static void lagg_port_lladdr(struct lagg_port *, uint8_t *, lagg_llqtype);
static void lagg_port_setlladdr(void *, int);
static int lagg_port_create(struct lagg_softc *, struct ifnet *);
static int lagg_port_destroy(struct lagg_port *, int);
@@ -100,7 +111,7 @@ static void lagg_linkstate(struct lagg_softc *);
static void lagg_port_state(struct ifnet *, int);
static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
static int lagg_port_output(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+ const struct sockaddr *, struct route *);
static void lagg_port_ifdetach(void *arg __unused, struct ifnet *);
#ifdef LAGG_PORT_STACKING
static int lagg_port_checkstacking(struct lagg_softc *);
@@ -114,33 +125,28 @@ static int lagg_ether_cmdmulti(struct lagg_port *, int);
static int lagg_setflag(struct lagg_port *, int, int,
int (*func)(struct ifnet *, int));
static int lagg_setflags(struct lagg_port *, int status);
+static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
static int lagg_transmit(struct ifnet *, struct mbuf *);
static void lagg_qflush(struct ifnet *);
static int lagg_media_change(struct ifnet *);
static void lagg_media_status(struct ifnet *, struct ifmediareq *);
static struct lagg_port *lagg_link_active(struct lagg_softc *,
struct lagg_port *);
-static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
-
-IFC_SIMPLE_DECLARE(lagg, 0);
/* Simple round robin */
-static int lagg_rr_attach(struct lagg_softc *);
-static int lagg_rr_detach(struct lagg_softc *);
+static void lagg_rr_attach(struct lagg_softc *);
static int lagg_rr_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
/* Active failover */
-static int lagg_fail_attach(struct lagg_softc *);
-static int lagg_fail_detach(struct lagg_softc *);
static int lagg_fail_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
/* Loadbalancing */
-static int lagg_lb_attach(struct lagg_softc *);
-static int lagg_lb_detach(struct lagg_softc *);
+static void lagg_lb_attach(struct lagg_softc *);
+static void lagg_lb_detach(struct lagg_softc *);
static int lagg_lb_port_create(struct lagg_port *);
static void lagg_lb_port_destroy(struct lagg_port *);
static int lagg_lb_start(struct lagg_softc *, struct mbuf *);
@@ -148,50 +154,134 @@ static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
+/* Broadcast */
+static int lagg_bcast_start(struct lagg_softc *, struct mbuf *);
+static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+
/* 802.3ad LACP */
-static int lagg_lacp_attach(struct lagg_softc *);
-static int lagg_lacp_detach(struct lagg_softc *);
+static void lagg_lacp_attach(struct lagg_softc *);
+static void lagg_lacp_detach(struct lagg_softc *);
static int lagg_lacp_start(struct lagg_softc *, struct mbuf *);
static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
struct mbuf *);
static void lagg_lacp_lladdr(struct lagg_softc *);
/* lagg protocol table */
-static const struct {
- int ti_proto;
- int (*ti_attach)(struct lagg_softc *);
+static const struct lagg_proto {
+ lagg_proto pr_num;
+ void (*pr_attach)(struct lagg_softc *);
+ void (*pr_detach)(struct lagg_softc *);
+ int (*pr_start)(struct lagg_softc *, struct mbuf *);
+ struct mbuf * (*pr_input)(struct lagg_softc *, struct lagg_port *,
+ struct mbuf *);
+ int (*pr_addport)(struct lagg_port *);
+ void (*pr_delport)(struct lagg_port *);
+ void (*pr_linkstate)(struct lagg_port *);
+ void (*pr_init)(struct lagg_softc *);
+ void (*pr_stop)(struct lagg_softc *);
+ void (*pr_lladdr)(struct lagg_softc *);
+ void (*pr_request)(struct lagg_softc *, void *);
+ void (*pr_portreq)(struct lagg_port *, void *);
} lagg_protos[] = {
- { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach },
- { LAGG_PROTO_FAILOVER, lagg_fail_attach },
- { LAGG_PROTO_LOADBALANCE, lagg_lb_attach },
- { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach },
- { LAGG_PROTO_LACP, lagg_lacp_attach },
- { LAGG_PROTO_NONE, NULL }
+ {
+ .pr_num = LAGG_PROTO_NONE
+ },
+ {
+ .pr_num = LAGG_PROTO_ROUNDROBIN,
+ .pr_attach = lagg_rr_attach,
+ .pr_start = lagg_rr_start,
+ .pr_input = lagg_rr_input,
+ },
+ {
+ .pr_num = LAGG_PROTO_FAILOVER,
+ .pr_start = lagg_fail_start,
+ .pr_input = lagg_fail_input,
+ },
+ {
+ .pr_num = LAGG_PROTO_LOADBALANCE,
+ .pr_attach = lagg_lb_attach,
+ .pr_detach = lagg_lb_detach,
+ .pr_start = lagg_lb_start,
+ .pr_input = lagg_lb_input,
+ .pr_addport = lagg_lb_port_create,
+ .pr_delport = lagg_lb_port_destroy,
+ },
+ {
+ .pr_num = LAGG_PROTO_LACP,
+ .pr_attach = lagg_lacp_attach,
+ .pr_detach = lagg_lacp_detach,
+ .pr_start = lagg_lacp_start,
+ .pr_input = lagg_lacp_input,
+ .pr_addport = lacp_port_create,
+ .pr_delport = lacp_port_destroy,
+ .pr_linkstate = lacp_linkstate,
+ .pr_init = lacp_init,
+ .pr_stop = lacp_stop,
+ .pr_lladdr = lagg_lacp_lladdr,
+ .pr_request = lacp_req,
+ .pr_portreq = lacp_portreq,
+ },
+ {
+ .pr_num = LAGG_PROTO_BROADCAST,
+ .pr_start = lagg_bcast_start,
+ .pr_input = lagg_bcast_input,
+ },
};
SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
+SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
"Link Aggregation");
-static int lagg_failover_rx_all = 0; /* Allow input on any failover links */
-SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
- &lagg_failover_rx_all, 0,
+/* Allow input on any failover links */
+static VNET_DEFINE(int, lagg_failover_rx_all);
+#define V_lagg_failover_rx_all VNET(lagg_failover_rx_all)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(lagg_failover_rx_all), 0,
"Accept input from any interface in a failover lagg");
-static int def_use_flowid = 1; /* Default value for using M_FLOWID */
-TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
-SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
- &def_use_flowid, 0,
+
+/* Default value for using flowid */
+static VNET_DEFINE(int, def_use_flowid) = 1;
+#define V_def_use_flowid VNET(def_use_flowid)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
+ &VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
+/* Default value for flowid shift */
+static VNET_DEFINE(int, def_flowid_shift) = 16;
+#define V_def_flowid_shift VNET(def_flowid_shift)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
+ &VNET_NAME(def_flowid_shift), 0,
+ "Default setting for flowid shift for load sharing");
+
+static void
+vnet_lagg_init(const void *unused __unused)
+{
+
+ LAGG_LIST_LOCK_INIT();
+ SLIST_INIT(&V_lagg_list);
+ V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
+ lagg_clone_destroy, 0);
+}
+VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ vnet_lagg_init, NULL);
+
+static void
+vnet_lagg_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_lagg_cloner);
+ LAGG_LIST_LOCK_DESTROY();
+}
+VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
+ vnet_lagg_uninit, NULL);
+
static int
lagg_modevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF);
- SLIST_INIT(&lagg_list);
- if_clone_attach(&lagg_cloner);
lagg_input_p = lagg_input;
lagg_linkstate_p = lagg_port_state;
lagg_detach_cookie = EVENTHANDLER_REGISTER(
@@ -201,10 +291,8 @@ lagg_modevent(module_t mod, int type, void *data)
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
lagg_detach_cookie);
- if_clone_detach(&lagg_cloner);
lagg_input_p = NULL;
lagg_linkstate_p = NULL;
- mtx_destroy(&lagg_list_mtx);
break;
default:
return (EOPNOTSUPP);
@@ -221,7 +309,117 @@ static moduledata_t lagg_mod = {
DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_lagg, 1);
-#if __FreeBSD_version >= 800000
+static void
+lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
+{
+
+ KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
+ __func__, sc));
+
+ if (sc->sc_ifflags & IFF_DEBUG)
+ if_printf(sc->sc_ifp, "using proto %u\n", pr);
+
+ if (lagg_protos[pr].pr_attach != NULL)
+ lagg_protos[pr].pr_attach(sc);
+ sc->sc_proto = pr;
+}
+
+static void
+lagg_proto_detach(struct lagg_softc *sc)
+{
+ lagg_proto pr;
+
+ LAGG_WLOCK_ASSERT(sc);
+
+ pr = sc->sc_proto;
+ sc->sc_proto = LAGG_PROTO_NONE;
+
+ if (lagg_protos[pr].pr_detach != NULL)
+ lagg_protos[pr].pr_detach(sc);
+ else
+ LAGG_WUNLOCK(sc);
+}
+
+static int
+lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
+{
+
+ return (lagg_protos[sc->sc_proto].pr_start(sc, m));
+}
+
+static struct mbuf *
+lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
+{
+
+ return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
+}
+
+static int
+lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_addport == NULL)
+ return (0);
+ else
+ return (lagg_protos[sc->sc_proto].pr_addport(lp));
+}
+
+static void
+lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_delport != NULL)
+ lagg_protos[sc->sc_proto].pr_delport(lp);
+}
+
+static void
+lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
+ lagg_protos[sc->sc_proto].pr_linkstate(lp);
+}
+
+static void
+lagg_proto_init(struct lagg_softc *sc)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_init != NULL)
+ lagg_protos[sc->sc_proto].pr_init(sc);
+}
+
+static void
+lagg_proto_stop(struct lagg_softc *sc)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_stop != NULL)
+ lagg_protos[sc->sc_proto].pr_stop(sc);
+}
+
+static void
+lagg_proto_lladdr(struct lagg_softc *sc)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
+ lagg_protos[sc->sc_proto].pr_lladdr(sc);
+}
+
+static void
+lagg_proto_request(struct lagg_softc *sc, void *v)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_request != NULL)
+ lagg_protos[sc->sc_proto].pr_request(sc, v);
+}
+
+static void
+lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
+{
+
+ if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
+ lagg_protos[sc->sc_proto].pr_portreq(lp, v);
+}
+
/*
* This routine is run via an vlan
* config EVENT
@@ -229,18 +427,19 @@ MODULE_VERSION(if_lagg, 1);
static void
lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
{
- struct lagg_softc *sc = ifp->if_softc;
- struct lagg_port *lp;
+ struct lagg_softc *sc = ifp->if_softc;
+ struct lagg_port *lp;
+ struct rm_priotracker tracker;
- if (ifp->if_softc != arg) /* Not our event */
- return;
+ if (ifp->if_softc != arg) /* Not our event */
+ return;
- LAGG_RLOCK(sc);
- if (!SLIST_EMPTY(&sc->sc_ports)) {
- SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
- EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
- }
- LAGG_RUNLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
+ if (!SLIST_EMPTY(&sc->sc_ports)) {
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
+ }
+ LAGG_RUNLOCK(sc, &tracker);
}
/*
@@ -250,30 +449,27 @@ lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
static void
lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
{
- struct lagg_softc *sc = ifp->if_softc;
- struct lagg_port *lp;
+ struct lagg_softc *sc = ifp->if_softc;
+ struct lagg_port *lp;
+ struct rm_priotracker tracker;
- if (ifp->if_softc != arg) /* Not our event */
- return;
+ if (ifp->if_softc != arg) /* Not our event */
+ return;
- LAGG_RLOCK(sc);
- if (!SLIST_EMPTY(&sc->sc_ports)) {
- SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
- EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
- }
- LAGG_RUNLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
+ if (!SLIST_EMPTY(&sc->sc_ports)) {
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
+ }
+ LAGG_RUNLOCK(sc, &tracker);
}
-#endif
static int
lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct lagg_softc *sc;
struct ifnet *ifp;
- int i, error = 0;
static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
- struct sysctl_oid *oid;
- char num[14]; /* sufficient for 32 bits */
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -282,32 +478,15 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
return (ENOSPC);
}
- sysctl_ctx_init(&sc->ctx);
- snprintf(num, sizeof(num), "%u", unit);
- sc->use_flowid = def_use_flowid;
- oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
- OID_AUTO, num, CTLFLAG_RD, NULL, "");
- SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "use_flowid", CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
- "Use flow id for load sharing");
- SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "count", CTLFLAG_RD, &sc->sc_count, sc->sc_count,
- "Total number of ports");
+ if (V_def_use_flowid)
+ sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+ sc->flowid_shift = V_def_flowid_shift;
+
/* Hash all layers by default */
- sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
+ sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
+
+ lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
- sc->sc_proto = LAGG_PROTO_NONE;
- for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
- if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
- sc->sc_proto = lagg_protos[i].ti_proto;
- if ((error = lagg_protos[i].ti_attach(sc)) != 0) {
- if_free_type(ifp, IFT_ETHER);
- free(sc, M_DEVBUF);
- return (error);
- }
- break;
- }
- }
LAGG_LOCK_INIT(sc);
SLIST_INIT(&sc->sc_ports);
TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
@@ -318,32 +497,31 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
- if_initname(ifp, ifc->ifc_name, unit);
- ifp->if_type = IFT_ETHER;
+ if_initname(ifp, laggname, unit);
ifp->if_softc = sc;
ifp->if_transmit = lagg_transmit;
ifp->if_qflush = lagg_qflush;
ifp->if_init = lagg_init;
ifp->if_ioctl = lagg_ioctl;
+ ifp->if_get_counter = lagg_get_counter;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
+ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
/*
- * Attach as an ordinary ethernet device, childs will be attached
+ * Attach as an ordinary ethernet device, children will be attached
* as special device IFT_IEEE8023ADLAG.
*/
ether_ifattach(ifp, eaddr);
-#if __FreeBSD_version >= 800000
sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-#endif
/* Insert into the global list of laggs */
- mtx_lock(&lagg_list_mtx);
- SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries);
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_LOCK();
+ SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
+ LAGG_LIST_UNLOCK();
return (0);
}
@@ -359,47 +537,64 @@ lagg_clone_destroy(struct ifnet *ifp)
lagg_stop(sc);
ifp->if_flags &= ~IFF_UP;
-#if __FreeBSD_version >= 800000
EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
-#endif
/* Shutdown and remove lagg ports */
while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
lagg_port_destroy(lp, 1);
/* Unhook the aggregation protocol */
- if (sc->sc_detach != NULL)
- (*sc->sc_detach)(sc);
+ lagg_proto_detach(sc);
+ LAGG_UNLOCK_ASSERT(sc);
- LAGG_WUNLOCK(sc);
-
- sysctl_ctx_free(&sc->ctx);
ifmedia_removeall(&sc->sc_media);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
- mtx_lock(&lagg_list_mtx);
- SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries);
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_LOCK();
+ SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
+ LAGG_LIST_UNLOCK();
taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
LAGG_LOCK_DESTROY(sc);
free(sc, M_DEVBUF);
}
-static void
+/*
+ * Set link-layer address on the lagg interface itself.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
{
struct ifnet *ifp = sc->sc_ifp;
+ struct lagg_port lp;
if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
return;
+ LAGG_WLOCK_ASSERT(sc);
+ /*
+ * Set the link layer address on the lagg interface.
+ * lagg_proto_lladdr() notifies the MAC change to
+ * the aggregation protocol. iflladdr_event handler which
+ * may trigger gratuitous ARPs for INET will be handled in
+ * a taskqueue.
+ */
bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
- /* Let the protocol know the MAC has changed */
- if (sc->sc_lladdr != NULL)
- (*sc->sc_lladdr)(sc);
- EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ lagg_proto_lladdr(sc);
+
+ /*
+ * Send notification request for lagg interface
+ * itself. Note that new lladdr is already set.
+ */
+ bzero(&lp, sizeof(lp));
+ lp.lp_ifp = sc->sc_ifp;
+ lp.lp_softc = sc;
+
+ /* Do not request lladdr change */
+ lagg_port_lladdr(&lp, lladdr, LAGG_LLQTYPE_VIRT);
}
static void
@@ -440,54 +635,63 @@ lagg_capabilities(struct lagg_softc *sc)
}
}
-static void
-lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
+/*
+ * Enqueue interface lladdr notification.
+ * If request is already queued, it is updated.
+ * If setting lladdr is also desired, @do_change has to be set to 1.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
+lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr, lagg_llqtype llq_type)
{
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *ifp = lp->lp_ifp;
struct lagg_llq *llq;
- int pending = 0;
LAGG_WLOCK_ASSERT(sc);
- if (lp->lp_detaching ||
- memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
+ /*
+ * Do not enqueue requests where lladdr is the same for
+ * "physical" interfaces (e.g. ports in lagg)
+ */
+ if (llq_type == LAGG_LLQTYPE_PHYS &&
+ memcmp(IF_LLADDR(ifp), lladdr, ETHER_ADDR_LEN) == 0)
return;
/* Check to make sure its not already queued to be changed */
SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
if (llq->llq_ifp == ifp) {
- pending = 1;
- break;
+ /* Update lladdr, it may have changed */
+ bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
+ return;
}
}
- if (!pending) {
- llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
- if (llq == NULL) /* XXX what to do */
- return;
- }
+ llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (llq == NULL) /* XXX what to do */
+ return;
- /* Update the lladdr even if pending, it may have changed */
llq->llq_ifp = ifp;
+ llq->llq_type = llq_type;
bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
-
- if (!pending)
- SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
+ /* XXX: We should insert to tail */
+ SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
}
/*
* Set the interface MAC address from a taskqueue to avoid a LOR.
+ *
+ * Set noinline to be dtrace-friendly
*/
-static void
+static __noinline void
lagg_port_setlladdr(void *arg, int pending)
{
struct lagg_softc *sc = (struct lagg_softc *)arg;
struct lagg_llq *llq, *head;
struct ifnet *ifp;
- int error;
/* Grab a local reference of the queue and remove it from the softc */
LAGG_WLOCK(sc);
@@ -502,14 +706,19 @@ lagg_port_setlladdr(void *arg, int pending)
for (llq = head; llq != NULL; llq = head) {
ifp = llq->llq_ifp;
- /* Set the link layer address */
CURVNET_SET(ifp->if_vnet);
- error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN);
- CURVNET_RESTORE();
- if (error)
- printf("%s: setlladdr failed on %s\n", __func__,
- ifp->if_xname);
+ /*
+ * Set the link layer address on the laggport interface.
+ * Note that if_setlladdr() or iflladdr_event handler
+ * may result in arp transmission / lltable updates.
+ */
+ if (llq->llq_type == LAGG_LLQTYPE_PHYS)
+ if_setlladdr(ifp, llq->llq_lladdr,
+ ETHER_ADDR_LEN);
+ else
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ CURVNET_RESTORE();
head = SLIST_NEXT(llq, llq_entries);
free(llq, M_DEVBUF);
}
@@ -520,7 +729,8 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
{
struct lagg_softc *sc_ptr;
struct lagg_port *lp, *tlp;
- int error = 0;
+ int error, i;
+ uint64_t *pval;
LAGG_WLOCK_ASSERT(sc);
@@ -538,37 +748,9 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
}
/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
- if (ifp->if_type != IFT_ETHER)
+ if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
return (EPROTONOSUPPORT);
-#ifdef INET6
- /*
- * The member interface should not have inet6 address because
- * two interfaces with a valid link-local scope zone must not be
- * merged in any form. This restriction is needed to
- * prevent violation of link-local scope zone. Attempts to
- * add a member interface which has inet6 addresses triggers
- * removal of all inet6 addresses on the member interface.
- */
- SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
- if (in6ifa_llaonifp(lp->lp_ifp)) {
- in6_ifdetach(lp->lp_ifp);
- if_printf(sc->sc_ifp,
- "IPv6 addresses on %s have been removed "
- "before adding it as a member to prevent "
- "IPv6 address scope violation.\n",
- lp->lp_ifp->if_xname);
- }
- }
- if (in6ifa_llaonifp(ifp)) {
- in6_ifdetach(ifp);
- if_printf(sc->sc_ifp,
- "IPv6 addresses on %s have been removed "
- "before adding it as a member to prevent "
- "IPv6 address scope violation.\n",
- ifp->if_xname);
- }
-#endif
/* Allow the first Ethernet member to define the MTU */
if (SLIST_EMPTY(&sc->sc_ports))
sc->sc_ifp->if_mtu = ifp->if_mtu;
@@ -583,10 +765,10 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
return (ENOMEM);
/* Check if port is a stacked lagg */
- mtx_lock(&lagg_list_mtx);
- SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) {
+ LAGG_LIST_LOCK();
+ SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
if (ifp == sc_ptr->sc_ifp) {
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
free(lp, M_DEVBUF);
return (EINVAL);
/* XXX disable stacking for the moment, its untested */
@@ -594,14 +776,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
lp->lp_flags |= LAGG_PORT_STACK;
if (lagg_port_checkstacking(sc_ptr) >=
LAGG_MAX_STACKING) {
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
free(lp, M_DEVBUF);
return (E2BIG);
}
#endif
}
}
- mtx_unlock(&lagg_list_mtx);
+ LAGG_LIST_UNLOCK();
/* Change the interface type */
lp->lp_iftype = ifp->if_type;
@@ -620,10 +802,15 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
if (SLIST_EMPTY(&sc->sc_ports)) {
sc->sc_primary = lp;
+ /* First port in lagg. Update/notify lagg lladdress */
lagg_lladdr(sc, IF_LLADDR(ifp));
} else {
- /* Update link layer address for this port */
- lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
+
+ /*
+ * Update link layer address for this port and
+ * send notifications to other subsystems.
+ */
+ lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp), LAGG_LLQTYPE_PHYS);
}
/*
@@ -649,19 +836,21 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
lagg_capabilities(sc);
lagg_linkstate(sc);
+ /* Read port counters */
+ pval = lp->port_counters.val;
+ for (i = 0; i < IFCOUNTERS; i++, pval++)
+ *pval = ifp->if_get_counter(ifp, i);
/* Add multicast addresses and interface flags to this port */
lagg_ether_cmdmulti(lp, 1);
lagg_setflags(lp, 1);
- if (sc->sc_port_create != NULL)
- error = (*sc->sc_port_create)(lp);
- if (error) {
- /* remove the port again, without calling sc_port_destroy */
+ if ((error = lagg_proto_addport(sc, lp)) != 0) {
+ /* Remove the port, without calling pr_delport. */
lagg_port_destroy(lp, 0);
return (error);
}
- return (error);
+ return (0);
}
#ifdef LAGG_PORT_STACKING
@@ -686,17 +875,19 @@ lagg_port_checkstacking(struct lagg_softc *sc)
#endif
static int
-lagg_port_destroy(struct lagg_port *lp, int runpd)
+lagg_port_destroy(struct lagg_port *lp, int rundelport)
{
struct lagg_softc *sc = lp->lp_softc;
- struct lagg_port *lp_ptr;
+ struct lagg_port *lp_ptr, *lp0;
struct lagg_llq *llq;
struct ifnet *ifp = lp->lp_ifp;
+ uint64_t *pval, vdiff;
+ int i;
LAGG_WLOCK_ASSERT(sc);
- if (runpd && sc->sc_port_destroy != NULL)
- (*sc->sc_port_destroy)(lp);
+ if (rundelport)
+ lagg_proto_delport(sc, lp);
/*
* Remove multicast addresses and interface flags from this port and
@@ -705,7 +896,7 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
if (!lp->lp_detaching) {
lagg_ether_cmdmulti(lp, 0);
lagg_setflags(lp, 0);
- lagg_port_lladdr(lp, lp->lp_lladdr);
+ lagg_port_lladdr(lp, lp->lp_lladdr, LAGG_LLQTYPE_PHYS);
}
/* Restore interface */
@@ -714,6 +905,13 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
ifp->if_output = lp->lp_output;
ifp->if_lagg = NULL;
+ /* Update detached port counters */
+ pval = lp->port_counters.val;
+ for (i = 0; i < IFCOUNTERS; i++, pval++) {
+ vdiff = ifp->if_get_counter(ifp, i) - *pval;
+ sc->detached_counters.val[i] += vdiff;
+ }
+
/* Finally, remove the port from the lagg */
SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
sc->sc_count--;
@@ -722,18 +920,24 @@ lagg_port_destroy(struct lagg_port *lp, int runpd)
if (lp == sc->sc_primary) {
uint8_t lladdr[ETHER_ADDR_LEN];
- if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
+ if ((lp0 = SLIST_FIRST(&sc->sc_ports)) == NULL) {
bzero(&lladdr, ETHER_ADDR_LEN);
} else {
- bcopy(lp_ptr->lp_lladdr,
+ bcopy(lp0->lp_lladdr,
lladdr, ETHER_ADDR_LEN);
}
lagg_lladdr(sc, lladdr);
- sc->sc_primary = lp_ptr;
- /* Update link layer address for each port */
+ /* Mark lp0 as new primary */
+ sc->sc_primary = lp0;
+
+ /*
+ * Enqueue lladdr update/notification for each port
+ * (new primary needs update as well, to switch from
+ * old lladdr to its 'real' one).
+ */
SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
- lagg_port_lladdr(lp_ptr, lladdr);
+ lagg_port_lladdr(lp_ptr, lladdr, LAGG_LLQTYPE_PHYS);
}
/* Remove any pending lladdr changes from the queue */
@@ -767,6 +971,7 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct lagg_softc *sc;
struct lagg_port *lp = NULL;
int error = 0;
+ struct rm_priotracker tracker;
/* Should be checked by the caller */
if (ifp->if_type != IFT_IEEE8023ADLAG ||
@@ -781,15 +986,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
error = ENOENT;
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
}
lagg_port2req(lp, rp);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSIFCAP:
@@ -826,11 +1031,66 @@ fallback:
}
/*
+ * Requests counter @cnt data.
+ *
+ * Counter value is calculated the following way:
+ * 1) for each port, sum difference between current and "initial" measurements.
+ * 2) add lagg logical interface counters.
+ * 3) add data from detached_counters array.
+ *
+ * We also do the following things on ports attach/detach:
+ * 1) On port attach we store all counters it has into port_counter array.
+ * 2) On port detach we add the different between "initial" and
+ * current counters data to detached_counters array.
+ */
+static uint64_t
+lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *lpifp;
+ struct rm_priotracker tracker;
+ uint64_t newval, oldval, vsum;
+
+ /* Revise this when we've got non-generic counters. */
+ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
+
+ sc = (struct lagg_softc *)ifp->if_softc;
+ LAGG_RLOCK(sc, &tracker);
+
+ vsum = 0;
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ /* Saved attached value */
+ oldval = lp->port_counters.val[cnt];
+ /* current value */
+ lpifp = lp->lp_ifp;
+ newval = lpifp->if_get_counter(lpifp, cnt);
+ /* Calculate diff and save new */
+ vsum += newval - oldval;
+ }
+
+ /*
+ * Add counter data which might be added by upper
+ * layer protocols operating on logical interface.
+ */
+ vsum += if_get_counter_default(ifp, cnt);
+
+ /*
+ * Add counter data from detached ports counters
+ */
+ vsum += sc->detached_counters.val[cnt];
+
+ LAGG_RUNLOCK(sc, &tracker);
+
+ return (vsum);
+}
+
+/*
* For direct output to child ports.
*/
static int
lagg_port_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+ const struct sockaddr *dst, struct route *ro)
{
struct lagg_port *lp = ifp->if_lagg;
@@ -874,8 +1134,7 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
rp->rp_prio = lp->lp_prio;
rp->rp_flags = lp->lp_flags;
- if (sc->sc_portreq != NULL)
- (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc);
+ lagg_proto_portreq(sc, lp, &rp->rp_psc);
/* Add protocol specific flags */
switch (sc->sc_proto) {
@@ -888,7 +1147,7 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
case LAGG_PROTO_ROUNDROBIN:
case LAGG_PROTO_LOADBALANCE:
- case LAGG_PROTO_ETHERCHANNEL:
+ case LAGG_PROTO_BROADCAST:
if (LAGG_PORTACTIVE(lp))
rp->rp_flags |= LAGG_PORT_ACTIVE;
break;
@@ -910,8 +1169,8 @@ static void
lagg_init(void *xsc)
{
struct lagg_softc *sc = (struct lagg_softc *)xsc;
- struct lagg_port *lp;
struct ifnet *ifp = sc->sc_ifp;
+ struct lagg_port *lp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
return;
@@ -919,12 +1178,16 @@ lagg_init(void *xsc)
LAGG_WLOCK(sc);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- /* Update the port lladdrs */
+
+ /*
+ * Update the port lladdrs if needed.
+ * This might be if_setlladdr() notification
+ * that lladdr has been changed.
+ */
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
- lagg_port_lladdr(lp, IF_LLADDR(ifp));
+ lagg_port_lladdr(lp, IF_LLADDR(ifp), LAGG_LLQTYPE_PHYS);
- if (sc->sc_init != NULL)
- (*sc->sc_init)(sc);
+ lagg_proto_init(sc);
LAGG_WUNLOCK(sc);
}
@@ -941,8 +1204,7 @@ lagg_stop(struct lagg_softc *sc)
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- if (sc->sc_stop != NULL)
- (*sc->sc_stop)(sc);
+ lagg_proto_stop(sc);
}
static int
@@ -950,6 +1212,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_reqall *ra = (struct lagg_reqall *)data;
+ struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
struct ifreq *ifr = (struct ifreq *)data;
@@ -958,25 +1221,24 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct thread *td = curthread;
char *buf, *outbuf;
int count, buflen, len, error = 0;
+ struct rm_priotracker tracker;
bzero(&rpbuf, sizeof(rpbuf));
switch (cmd) {
case SIOCGLAGG:
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
count = 0;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
count++;
buflen = count * sizeof(struct lagg_reqport);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
ra->ra_proto = sc->sc_proto;
- if (sc->sc_req != NULL)
- (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
-
+ lagg_proto_request(sc, &ra->ra_psc);
count = 0;
buf = outbuf;
len = min(ra->ra_size, buflen);
@@ -990,7 +1252,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
buf += sizeof(rpbuf);
len -= sizeof(rpbuf);
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
ra->ra_ports = count;
ra->ra_size = count * sizeof(rpbuf);
error = copyout(outbuf, ra->ra_port, ra->ra_size);
@@ -1004,49 +1266,150 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EPROTONOSUPPORT;
break;
}
+
LAGG_WLOCK(sc);
- if (sc->sc_proto != LAGG_PROTO_NONE) {
- /* Reset protocol first in case detach unlocks */
- sc->sc_proto = LAGG_PROTO_NONE;
- error = sc->sc_detach(sc);
- sc->sc_detach = NULL;
- sc->sc_start = NULL;
- sc->sc_input = NULL;
- sc->sc_port_create = NULL;
- sc->sc_port_destroy = NULL;
- sc->sc_linkstate = NULL;
- sc->sc_init = NULL;
- sc->sc_stop = NULL;
- sc->sc_lladdr = NULL;
- sc->sc_req = NULL;
- sc->sc_portreq = NULL;
- } else if (sc->sc_input != NULL) {
- /* Still detaching */
- error = EBUSY;
+ lagg_proto_detach(sc);
+ LAGG_UNLOCK_ASSERT(sc);
+ lagg_proto_attach(sc, ra->ra_proto);
+ break;
+ case SIOCGLAGGOPTS:
+ ro->ro_opts = sc->sc_opts;
+ if (sc->sc_proto == LAGG_PROTO_LACP) {
+ struct lacp_softc *lsc;
+
+ lsc = (struct lacp_softc *)sc->sc_psc;
+ if (lsc->lsc_debug.lsc_tx_test != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
+ if (lsc->lsc_debug.lsc_rx_test != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
+ if (lsc->lsc_strict_mode != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_STRICT;
+ if (lsc->lsc_fast_timeout != 0)
+ ro->ro_opts |= LAGG_OPT_LACP_TIMEOUT;
+
+ ro->ro_active = sc->sc_active;
+ } else {
+ ro->ro_active = 0;
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
+ ro->ro_active += LAGG_PORTACTIVE(lp);
}
- if (error != 0) {
- LAGG_WUNLOCK(sc);
+ ro->ro_bkt = sc->sc_bkt;
+ ro->ro_flapping = sc->sc_flapping;
+ ro->ro_flowid_shift = sc->flowid_shift;
+ break;
+ case SIOCSLAGGOPTS:
+ if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) {
+ if (ro->ro_bkt == 0)
+ sc->sc_bkt = 1; // Minimum 1 packet per iface.
+ else
+ sc->sc_bkt = ro->ro_bkt;
+ }
+ error = priv_check(td, PRIV_NET_LAGG);
+ if (error)
+ break;
+ if (ro->ro_opts == 0)
+ break;
+ /*
+ * Set options. LACP options are stored in sc->sc_psc,
+ * not in sc_opts.
+ */
+ int valid, lacp;
+
+ switch (ro->ro_opts) {
+ case LAGG_OPT_USE_FLOWID:
+ case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_FLOWIDSHIFT:
+ valid = 1;
+ lacp = 0;
+ break;
+ case LAGG_OPT_LACP_TXTEST:
+ case -LAGG_OPT_LACP_TXTEST:
+ case LAGG_OPT_LACP_RXTEST:
+ case -LAGG_OPT_LACP_RXTEST:
+ case LAGG_OPT_LACP_STRICT:
+ case -LAGG_OPT_LACP_STRICT:
+ case LAGG_OPT_LACP_TIMEOUT:
+ case -LAGG_OPT_LACP_TIMEOUT:
+ valid = lacp = 1;
+ break;
+ default:
+ valid = lacp = 0;
break;
}
- for (int i = 0; i < (sizeof(lagg_protos) /
- sizeof(lagg_protos[0])); i++) {
- if (lagg_protos[i].ti_proto == ra->ra_proto) {
- if (sc->sc_ifflags & IFF_DEBUG)
- printf("%s: using proto %u\n",
- sc->sc_ifname,
- lagg_protos[i].ti_proto);
- sc->sc_proto = lagg_protos[i].ti_proto;
- if (sc->sc_proto != LAGG_PROTO_NONE)
- error = lagg_protos[i].ti_attach(sc);
- LAGG_WUNLOCK(sc);
- return (error);
+
+ LAGG_WLOCK(sc);
+
+ if (valid == 0 ||
+ (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
+ /* Invalid combination of options specified. */
+ error = EINVAL;
+ LAGG_WUNLOCK(sc);
+ break; /* Return from SIOCSLAGGOPTS. */
+ }
+ /*
+ * Store new options into sc->sc_opts except for
+ * FLOWIDSHIFT and LACP options.
+ */
+ if (lacp == 0) {
+ if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
+ sc->flowid_shift = ro->ro_flowid_shift;
+ else if (ro->ro_opts > 0)
+ sc->sc_opts |= ro->ro_opts;
+ else
+ sc->sc_opts &= ~ro->ro_opts;
+ } else {
+ struct lacp_softc *lsc;
+ struct lacp_port *lp;
+
+ lsc = (struct lacp_softc *)sc->sc_psc;
+
+ switch (ro->ro_opts) {
+ case LAGG_OPT_LACP_TXTEST:
+ lsc->lsc_debug.lsc_tx_test = 1;
+ break;
+ case -LAGG_OPT_LACP_TXTEST:
+ lsc->lsc_debug.lsc_tx_test = 0;
+ break;
+ case LAGG_OPT_LACP_RXTEST:
+ lsc->lsc_debug.lsc_rx_test = 1;
+ break;
+ case -LAGG_OPT_LACP_RXTEST:
+ lsc->lsc_debug.lsc_rx_test = 0;
+ break;
+ case LAGG_OPT_LACP_STRICT:
+ lsc->lsc_strict_mode = 1;
+ break;
+ case -LAGG_OPT_LACP_STRICT:
+ lsc->lsc_strict_mode = 0;
+ break;
+ case LAGG_OPT_LACP_TIMEOUT:
+ LACP_LOCK(lsc);
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+ lp->lp_state |= LACP_STATE_TIMEOUT;
+ LACP_UNLOCK(lsc);
+ lsc->lsc_fast_timeout = 1;
+ break;
+ case -LAGG_OPT_LACP_TIMEOUT:
+ LACP_LOCK(lsc);
+ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
+ lp->lp_state &= ~LACP_STATE_TIMEOUT;
+ LACP_UNLOCK(lsc);
+ lsc->lsc_fast_timeout = 0;
+ break;
}
}
LAGG_WUNLOCK(sc);
- error = EPROTONOSUPPORT;
break;
case SIOCGLAGGFLAGS:
- rf->rf_flags = sc->sc_flags;
+ rf->rf_flags = 0;
+ LAGG_RLOCK(sc, &tracker);
+ if (sc->sc_flags & MBUF_HASHFLAG_L2)
+ rf->rf_flags |= LAGG_F_HASHL2;
+ if (sc->sc_flags & MBUF_HASHFLAG_L3)
+ rf->rf_flags |= LAGG_F_HASHL3;
+ if (sc->sc_flags & MBUF_HASHFLAG_L4)
+ rf->rf_flags |= LAGG_F_HASHL4;
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSLAGGHASH:
error = priv_check(td, PRIV_NET_LAGG);
@@ -1057,8 +1420,13 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
LAGG_WLOCK(sc);
- sc->sc_flags &= ~LAGG_F_HASHMASK;
- sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
+ sc->sc_flags = 0;
+ if (rf->rf_flags & LAGG_F_HASHL2)
+ sc->sc_flags |= MBUF_HASHFLAG_L2;
+ if (rf->rf_flags & LAGG_F_HASHL3)
+ sc->sc_flags |= MBUF_HASHFLAG_L3;
+ if (rf->rf_flags & LAGG_F_HASHL4)
+ sc->sc_flags |= MBUF_HASHFLAG_L4;
LAGG_WUNLOCK(sc);
break;
case SIOCGLAGGPORT:
@@ -1068,16 +1436,16 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
lp->lp_softc != sc) {
error = ENOENT;
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
}
lagg_port2req(lp, rp);
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
break;
case SIOCSLAGGPORT:
error = priv_check(td, PRIV_NET_LAGG);
@@ -1088,6 +1456,26 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EINVAL;
break;
}
+#ifdef INET6
+ /*
+ * A laggport interface should not have inet6 address
+ * because two interfaces with a valid link-local
+ * scope zone must not be merged in any form. This
+ * restriction is needed to prevent violation of
+ * link-local scope zone. Attempts to add a laggport
+ * interface which has inet6 addresses triggers
+ * removal of all inet6 addresses on the member
+ * interface.
+ */
+ if (in6ifa_llaonifp(tpif)) {
+ in6_ifdetach(tpif);
+ if_printf(sc->sc_ifp,
+ "IPv6 addresses on %s have been removed "
+ "before adding it as a member to prevent "
+ "IPv6 address scope violation.\n",
+ tpif->if_xname);
+ }
+#endif
LAGG_WLOCK(sc);
error = lagg_port_create(sc, tpif);
LAGG_WUNLOCK(sc);
@@ -1186,39 +1574,39 @@ lagg_ether_cmdmulti(struct lagg_port *lp, int set)
struct ifnet *ifp = lp->lp_ifp;
struct ifnet *scifp = sc->sc_ifp;
struct lagg_mc *mc;
- struct ifmultiaddr *ifma, *rifma = NULL;
- struct sockaddr_dl sdl;
+ struct ifmultiaddr *ifma;
int error;
LAGG_WLOCK_ASSERT(sc);
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_type = IFT_ETHER;
- sdl.sdl_alen = ETHER_ADDR_LEN;
- sdl.sdl_index = ifp->if_index;
-
if (set) {
+ IF_ADDR_WLOCK(scifp);
TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- LLADDR(&sdl), ETHER_ADDR_LEN);
-
- error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
- if (error)
- return (error);
mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
- if (mc == NULL)
+ if (mc == NULL) {
+ IF_ADDR_WUNLOCK(scifp);
return (ENOMEM);
- mc->mc_ifma = rifma;
+ }
+ bcopy(ifma->ifma_addr, &mc->mc_addr,
+ ifma->ifma_addr->sa_len);
+ mc->mc_addr.sdl_index = ifp->if_index;
+ mc->mc_ifma = NULL;
SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
}
+ IF_ADDR_WUNLOCK(scifp);
+ SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
+ error = if_addmulti(ifp,
+ (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
+ if (error)
+ return (error);
+ }
} else {
while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
- if_delmulti_ifma(mc->mc_ifma);
+ if (mc->mc_ifma && !lp->lp_detaching)
+ if_delmulti_ifma(mc->mc_ifma);
free(mc, M_DEVBUF);
}
}
@@ -1228,7 +1616,7 @@ lagg_ether_cmdmulti(struct lagg_port *lp, int set)
/* Handle a ref counted flag that should be set on the lagg port as well */
static int
lagg_setflag(struct lagg_port *lp, int flag, int status,
- int (*func)(struct ifnet *, int))
+ int (*func)(struct ifnet *, int))
{
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *scifp = sc->sc_ifp;
@@ -1283,30 +1671,27 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error, len, mcast;
+ struct rm_priotracker tracker;
len = m->m_pkthdr.len;
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENXIO);
}
ETHER_BPF_MTAP(ifp, m);
- error = (*sc->sc_start)(sc, m);
- LAGG_RUNLOCK(sc);
+ error = lagg_proto_start(sc, m);
+ LAGG_RUNLOCK(sc, &tracker);
- if (error == 0) {
- ifp->if_opackets++;
- ifp->if_omcasts += mcast;
- ifp->if_obytes += len;
- } else
- ifp->if_oerrors++;
+ if (error != 0)
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
}
@@ -1325,31 +1710,33 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
struct lagg_port *lp = ifp->if_lagg;
struct lagg_softc *sc = lp->lp_softc;
struct ifnet *scifp = sc->sc_ifp;
+ struct rm_priotracker tracker;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
(lp->lp_flags & LAGG_PORT_DISABLED) ||
sc->sc_proto == LAGG_PROTO_NONE) {
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
m_freem(m);
return (NULL);
}
ETHER_BPF_MTAP(scifp, m);
- m = (*sc->sc_input)(sc, lp, m);
+ if (lp->lp_detaching != 0) {
+ m_freem(m);
+ m = NULL;
+ } else
+ m = lagg_proto_input(sc, lp, m);
if (m != NULL) {
- scifp->if_ipackets++;
- scifp->if_ibytes += m->m_pkthdr.len;
-
if (scifp->if_flags & IFF_MONITOR) {
m_freem(m);
m = NULL;
}
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
return (m);
}
@@ -1370,16 +1757,17 @@ lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_port *lp;
+ struct rm_priotracker tracker;
imr->ifm_status = IFM_AVALID;
imr->ifm_active = IFM_ETHER | IFM_AUTO;
- LAGG_RLOCK(sc);
+ LAGG_RLOCK(sc, &tracker);
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp))
imr->ifm_status |= IFM_ACTIVE;
}
- LAGG_RUNLOCK(sc);
+ LAGG_RUNLOCK(sc, &tracker);
}
static void
@@ -1391,7 +1779,7 @@ lagg_linkstate(struct lagg_softc *sc)
/* Our link is considered up if at least one of our ports is active */
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
- if (lp->lp_link_state == LINK_STATE_UP) {
+ if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
new_link = LINK_STATE_UP;
break;
}
@@ -1406,7 +1794,7 @@ lagg_linkstate(struct lagg_softc *sc)
break;
case LAGG_PROTO_ROUNDROBIN:
case LAGG_PROTO_LOADBALANCE:
- case LAGG_PROTO_ETHERCHANNEL:
+ case LAGG_PROTO_BROADCAST:
speed = 0;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
speed += lp->lp_ifp->if_baudrate;
@@ -1431,8 +1819,7 @@ lagg_port_state(struct ifnet *ifp, int state)
LAGG_WLOCK(sc);
lagg_linkstate(sc);
- if (sc->sc_linkstate != NULL)
- (*sc->sc_linkstate)(lp);
+ lagg_proto_linkstate(sc, lp);
LAGG_WUNLOCK(sc);
}
@@ -1487,120 +1874,6 @@ found:
return (rval);
}
-static const void *
-lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
-{
- if (m->m_pkthdr.len < (off + len)) {
- return (NULL);
- } else if (m->m_len < (off + len)) {
- m_copydata(m, off, len, buf);
- return (buf);
- }
- return (mtod(m, char *) + off);
-}
-
-uint32_t
-lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
-{
- uint16_t etype;
- uint32_t p = key;
- int off;
- struct ether_header *eh;
- const struct ether_vlan_header *vlan;
-#ifdef INET
- const struct ip *ip;
- const uint32_t *ports;
- int iphlen;
-#endif
-#ifdef INET6
- const struct ip6_hdr *ip6;
- uint32_t flow;
-#endif
- union {
-#ifdef INET
- struct ip ip;
-#endif
-#ifdef INET6
- struct ip6_hdr ip6;
-#endif
- struct ether_vlan_header vlan;
- uint32_t port;
- } buf;
-
-
- off = sizeof(*eh);
- if (m->m_len < off)
- goto out;
- eh = mtod(m, struct ether_header *);
- etype = ntohs(eh->ether_type);
- if (sc->sc_flags & LAGG_F_HASHL2) {
- p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
- p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
- }
-
- /* Special handling for encapsulating VLAN frames */
- if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
- p = hash32_buf(&m->m_pkthdr.ether_vtag,
- sizeof(m->m_pkthdr.ether_vtag), p);
- } else if (etype == ETHERTYPE_VLAN) {
- vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf);
- if (vlan == NULL)
- goto out;
-
- if (sc->sc_flags & LAGG_F_HASHL2)
- p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
- etype = ntohs(vlan->evl_proto);
- off += sizeof(*vlan) - sizeof(*eh);
- }
-
- switch (etype) {
-#ifdef INET
- case ETHERTYPE_IP:
- ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
- if (ip == NULL)
- goto out;
-
- if (sc->sc_flags & LAGG_F_HASHL3) {
- p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
- p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
- }
- if (!(sc->sc_flags & LAGG_F_HASHL4))
- break;
- switch (ip->ip_p) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_SCTP:
- iphlen = ip->ip_hl << 2;
- if (iphlen < sizeof(*ip))
- break;
- off += iphlen;
- ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
- if (ports == NULL)
- break;
- p = hash32_buf(ports, sizeof(*ports), p);
- break;
- }
- break;
-#endif
-#ifdef INET6
- case ETHERTYPE_IPV6:
- if (!(sc->sc_flags & LAGG_F_HASHL3))
- break;
- ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
- if (ip6 == NULL)
- goto out;
-
- p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
- p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
- flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
- p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */
- break;
-#endif
- }
-out:
- return (p);
-}
-
int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
@@ -1611,24 +1884,12 @@ lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
/*
* Simple round robin aggregation
*/
-
-static int
+static void
lagg_rr_attach(struct lagg_softc *sc)
{
- sc->sc_detach = lagg_rr_detach;
- sc->sc_start = lagg_rr_start;
- sc->sc_input = lagg_rr_input;
- sc->sc_port_create = NULL;
sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
sc->sc_seq = 0;
-
- return (0);
-}
-
-static int
-lagg_rr_detach(struct lagg_softc *sc)
-{
- return (0);
+ sc->sc_bkt_count = sc->sc_bkt;
}
static int
@@ -1637,9 +1898,21 @@ lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp;
uint32_t p;
- p = atomic_fetchadd_32(&sc->sc_seq, 1);
+ if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0)
+ sc->sc_bkt_count = sc->sc_bkt;
+
+ if (sc->sc_bkt > 0) {
+ atomic_subtract_int(&sc->sc_bkt_count, 1);
+ if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt))
+ p = atomic_fetchadd_32(&sc->sc_seq, 1);
+ else
+ p = sc->sc_seq;
+ } else
+ p = atomic_fetchadd_32(&sc->sc_seq, 1);
+
p %= sc->sc_count;
lp = SLIST_FIRST(&sc->sc_ports);
+
while (p--)
lp = SLIST_NEXT(lp, lp_entries);
@@ -1668,27 +1941,69 @@ lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
}
/*
- * Active failover
+ * Broadcast mode
*/
-
static int
-lagg_fail_attach(struct lagg_softc *sc)
+lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
{
- sc->sc_detach = lagg_fail_detach;
- sc->sc_start = lagg_fail_start;
- sc->sc_input = lagg_fail_input;
- sc->sc_port_create = NULL;
- sc->sc_port_destroy = NULL;
+ int active_ports = 0;
+ int errors = 0;
+ int ret;
+ struct lagg_port *lp, *last = NULL;
+ struct mbuf *m0;
+
+ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
+ if (!LAGG_PORTACTIVE(lp))
+ continue;
+
+ active_ports++;
+
+ if (last != NULL) {
+ m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+ if (m0 == NULL) {
+ ret = ENOBUFS;
+ errors++;
+ break;
+ }
+
+ ret = lagg_enqueue(last->lp_ifp, m0);
+ if (ret != 0)
+ errors++;
+ }
+ last = lp;
+ }
+ if (last == NULL) {
+ m_freem(m);
+ return (ENOENT);
+ }
+ if ((last = lagg_link_active(sc, last)) == NULL) {
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ ret = lagg_enqueue(last->lp_ifp, m);
+ if (ret != 0)
+ errors++;
+
+ if (errors == 0)
+ return (ret);
return (0);
}
-static int
-lagg_fail_detach(struct lagg_softc *sc)
+static struct mbuf*
+lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
{
- return (0);
+ struct ifnet *ifp = sc->sc_ifp;
+
+ /* Just pass in the packet to our lagg device */
+ m->m_pkthdr.rcvif = ifp;
+ return (m);
}
+/*
+ * Active failover
+ */
static int
lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
{
@@ -1710,7 +2025,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
struct ifnet *ifp = sc->sc_ifp;
struct lagg_port *tmp_tp;
- if (lp == sc->sc_primary || lagg_failover_rx_all) {
+ if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
m->m_pkthdr.rcvif = ifp;
return (m);
}
@@ -1718,7 +2033,7 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
if (!LAGG_PORTACTIVE(sc->sc_primary)) {
tmp_tp = lagg_link_active(sc, sc->sc_primary);
/*
- * If tmp_tp is null, we've recieved a packet when all
+ * If tmp_tp is null, we've received a packet when all
* our links are down. Weird, but process it anyways.
*/
if ((tmp_tp == NULL || tmp_tp == lp)) {
@@ -1734,40 +2049,32 @@ lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
/*
* Loadbalancing
*/
-
-static int
+static void
lagg_lb_attach(struct lagg_softc *sc)
{
struct lagg_port *lp;
struct lagg_lb *lb;
- if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb),
- M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
- return (ENOMEM);
+ lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
- sc->sc_detach = lagg_lb_detach;
- sc->sc_start = lagg_lb_start;
- sc->sc_input = lagg_lb_input;
- sc->sc_port_create = lagg_lb_port_create;
- sc->sc_port_destroy = lagg_lb_port_destroy;
sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
- lb->lb_key = arc4random();
- sc->sc_psc = (caddr_t)lb;
+ lb->lb_key = m_ether_tcpip_hash_init();
+ sc->sc_psc = lb;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lagg_lb_port_create(lp);
-
- return (0);
}
-static int
+static void
lagg_lb_detach(struct lagg_softc *sc)
{
- struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
+ struct lagg_lb *lb;
+
+ lb = (struct lagg_lb *)sc->sc_psc;
+ LAGG_WUNLOCK(sc);
if (lb != NULL)
free(lb, M_DEVBUF);
- return (0);
}
static int
@@ -1785,7 +2092,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
return (EINVAL);
if (sc->sc_ifflags & IFF_DEBUG)
printf("%s: port %s at index %d\n",
- sc->sc_ifname, lp_next->lp_ifname, i);
+ sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
lb->lb_ports[i++] = lp_next;
}
@@ -1813,10 +2120,11 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp = NULL;
uint32_t p = 0;
- if (sc->use_flowid && (m->m_flags & M_FLOWID))
- p = m->m_pkthdr.flowid;
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ p = m->m_pkthdr.flowid >> sc->flowid_shift;
else
- p = lagg_hashmbuf(sc, m, lb->lb_key);
+ p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
p %= sc->sc_count;
lp = lb->lb_ports[p];
@@ -1847,50 +2155,30 @@ lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
/*
* 802.3ad LACP
*/
-
-static int
+static void
lagg_lacp_attach(struct lagg_softc *sc)
{
struct lagg_port *lp;
- int error;
-
- sc->sc_detach = lagg_lacp_detach;
- sc->sc_port_create = lacp_port_create;
- sc->sc_port_destroy = lacp_port_destroy;
- sc->sc_linkstate = lacp_linkstate;
- sc->sc_start = lagg_lacp_start;
- sc->sc_input = lagg_lacp_input;
- sc->sc_init = lacp_init;
- sc->sc_stop = lacp_stop;
- sc->sc_lladdr = lagg_lacp_lladdr;
- sc->sc_req = lacp_req;
- sc->sc_portreq = lacp_portreq;
-
- error = lacp_attach(sc);
- if (error)
- return (error);
+ lacp_attach(sc);
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lacp_port_create(lp);
-
- return (error);
}
-static int
+static void
lagg_lacp_detach(struct lagg_softc *sc)
{
struct lagg_port *lp;
- int error;
+ void *psc;
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
lacp_port_destroy(lp);
- /* unlocking is safe here */
+ psc = sc->sc_psc;
+ sc->sc_psc = NULL;
LAGG_WUNLOCK(sc);
- error = lacp_detach(sc);
- LAGG_WLOCK(sc);
- return (error);
+ lacp_detach(psc);
}
static void
@@ -1951,3 +2239,4 @@ lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
m->m_pkthdr.rcvif = ifp;
return (m);
}
+
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index 27ab46f2..334995e5 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -21,8 +21,6 @@
#ifndef _NET_LAGG_H
#define _NET_LAGG_H
-#include <sys/sysctl.h>
-
/*
* Global definitions
*/
@@ -49,26 +47,28 @@
"\05DISTRIBUTING\06DISABLED"
/* Supported lagg PROTOs */
-#define LAGG_PROTO_NONE 0 /* no lagg protocol defined */
-#define LAGG_PROTO_ROUNDROBIN 1 /* simple round robin */
-#define LAGG_PROTO_FAILOVER 2 /* active failover */
-#define LAGG_PROTO_LOADBALANCE 3 /* loadbalance */
-#define LAGG_PROTO_LACP 4 /* 802.3ad lacp */
-#define LAGG_PROTO_ETHERCHANNEL 5 /* Cisco FEC */
-#define LAGG_PROTO_MAX 6
+typedef enum {
+ LAGG_PROTO_NONE = 0, /* no lagg protocol defined */
+ LAGG_PROTO_ROUNDROBIN, /* simple round robin */
+ LAGG_PROTO_FAILOVER, /* active failover */
+ LAGG_PROTO_LOADBALANCE, /* loadbalance */
+ LAGG_PROTO_LACP, /* 802.3ad lacp */
+ LAGG_PROTO_BROADCAST, /* broadcast */
+ LAGG_PROTO_MAX,
+} lagg_proto;
struct lagg_protos {
const char *lpr_name;
- int lpr_proto;
+ lagg_proto lpr_proto;
};
#define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER
#define LAGG_PROTOS { \
- { "failover", LAGG_PROTO_FAILOVER }, \
- { "fec", LAGG_PROTO_ETHERCHANNEL }, \
+ { "failover", LAGG_PROTO_FAILOVER }, \
{ "lacp", LAGG_PROTO_LACP }, \
{ "loadbalance", LAGG_PROTO_LOADBALANCE }, \
- { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \
+ { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \
+ { "broadcast", LAGG_PROTO_BROADCAST }, \
{ "none", LAGG_PROTO_NONE }, \
{ "default", LAGG_PROTO_DEFAULT } \
}
@@ -136,16 +136,40 @@ struct lagg_reqflags {
#define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags)
#define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags)
+struct lagg_reqopts {
+ char ro_ifname[IFNAMSIZ]; /* name of the lagg */
+
+ int ro_opts; /* Option bitmap */
+#define LAGG_OPT_NONE 0x00
+#define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */
+/* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
+#define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */
+#define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */
+#define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */
+#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */
+#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */
+#define LAGG_OPT_LACP_TIMEOUT 0x80 /* LACP timeout */
+ u_int ro_count; /* number of ports */
+ u_int ro_active; /* active port count */
+ u_int ro_flapping; /* number of flapping */
+ int ro_flowid_shift; /* shift the flowid */
+ uint32_t ro_bkt; /* packet bucket for roundrobin */
+};
+
+#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts)
+#define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts)
+
+#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \
+ "\006LACP_TXTEST\007LACP_RXTEST"
+
#ifdef _KERNEL
+
/*
* Internal kernel part
*/
-#define lp_ifname lp_ifp->if_xname /* interface name */
-#define lp_link_state lp_ifp->if_link_state /* link state */
-
#define LAGG_PORTACTIVE(_tp) ( \
- ((_tp)->lp_link_state == LINK_STATE_UP) && \
+ ((_tp)->lp_ifp->if_link_state == LINK_STATE_UP) && \
((_tp)->lp_ifp->if_flags & IFF_UP) \
)
@@ -173,25 +197,39 @@ struct lagg_lb {
};
struct lagg_mc {
+ struct sockaddr_dl mc_addr;
struct ifmultiaddr *mc_ifma;
SLIST_ENTRY(lagg_mc) mc_entries;
};
+typedef enum {
+ LAGG_LLQTYPE_PHYS = 0, /* Task related to physical (underlying) port */
+ LAGG_LLQTYPE_VIRT, /* Task related to lagg interface itself */
+} lagg_llqtype;
+
/* List of interfaces to have the MAC address modified */
struct lagg_llq {
struct ifnet *llq_ifp;
uint8_t llq_lladdr[ETHER_ADDR_LEN];
+ lagg_llqtype llq_type;
SLIST_ENTRY(lagg_llq) llq_entries;
};
+struct lagg_counters {
+ uint64_t val[IFCOUNTERS];
+};
+
struct lagg_softc {
struct ifnet *sc_ifp; /* virtual interface */
- struct rwlock sc_mtx;
+ struct rmlock sc_mtx;
int sc_proto; /* lagg protocol */
u_int sc_count; /* number of ports */
+ u_int sc_active; /* active port count */
+ u_int sc_flapping; /* number of flapping
+ * events */
struct lagg_port *sc_primary; /* primary port */
struct ifmedia sc_media; /* media config */
- caddr_t sc_psc; /* protocol data */
+ void *sc_psc; /* protocol data */
uint32_t sc_seq; /* sequence counter */
uint32_t sc_flags;
@@ -201,26 +239,14 @@ struct lagg_softc {
struct task sc_lladdr_task;
SLIST_HEAD(__llqhd, lagg_llq) sc_llq_head; /* interfaces to program
the lladdr on */
-
- /* lagg protocol callbacks */
- int (*sc_detach)(struct lagg_softc *);
- int (*sc_start)(struct lagg_softc *, struct mbuf *);
- struct mbuf *(*sc_input)(struct lagg_softc *, struct lagg_port *,
- struct mbuf *);
- int (*sc_port_create)(struct lagg_port *);
- void (*sc_port_destroy)(struct lagg_port *);
- void (*sc_linkstate)(struct lagg_port *);
- void (*sc_init)(struct lagg_softc *);
- void (*sc_stop)(struct lagg_softc *);
- void (*sc_lladdr)(struct lagg_softc *);
- void (*sc_req)(struct lagg_softc *, caddr_t);
- void (*sc_portreq)(struct lagg_port *, caddr_t);
-#if __FreeBSD_version >= 800000
eventhandler_tag vlan_attach;
eventhandler_tag vlan_detach;
-#endif
- struct sysctl_ctx_list ctx; /* sysctl variables */
- int use_flowid; /* use M_FLOWID */
+ struct callout sc_callout;
+ u_int sc_opts;
+ int flowid_shift; /* shift the flowid */
+ uint32_t sc_bkt; /* packates bucket for roundrobin */
+ uint32_t sc_bkt_count; /* packates bucket count for roundrobin */
+ struct lagg_counters detached_counters; /* detached ports sum */
};
struct lagg_port {
@@ -233,33 +259,36 @@ struct lagg_port {
uint32_t lp_flags; /* port flags */
int lp_ifflags; /* saved ifp flags */
void *lh_cookie; /* if state hook */
- caddr_t lp_psc; /* protocol data */
+ void *lp_psc; /* protocol data */
int lp_detaching; /* ifnet is detaching */
SLIST_HEAD(__mclhd, lagg_mc) lp_mc_head; /* multicast addresses */
/* Redirected callbacks */
int (*lp_ioctl)(struct ifnet *, u_long, caddr_t);
- int (*lp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
+ int (*lp_output)(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+ struct lagg_counters port_counters; /* ifp counters copy */
SLIST_ENTRY(lagg_port) lp_entries;
};
-#define LAGG_LOCK_INIT(_sc) rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
-#define LAGG_LOCK_DESTROY(_sc) rw_destroy(&(_sc)->sc_mtx)
-#define LAGG_RLOCK(_sc) rw_rlock(&(_sc)->sc_mtx)
-#define LAGG_WLOCK(_sc) rw_wlock(&(_sc)->sc_mtx)
-#define LAGG_RUNLOCK(_sc) rw_runlock(&(_sc)->sc_mtx)
-#define LAGG_WUNLOCK(_sc) rw_wunlock(&(_sc)->sc_mtx)
-#define LAGG_RLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
-#define LAGG_WLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define LAGG_LOCK_INIT(_sc) rm_init(&(_sc)->sc_mtx, "if_lagg rmlock")
+#define LAGG_LOCK_DESTROY(_sc) rm_destroy(&(_sc)->sc_mtx)
+#define LAGG_RLOCK(_sc, _p) rm_rlock(&(_sc)->sc_mtx, (_p))
+#define LAGG_WLOCK(_sc) rm_wlock(&(_sc)->sc_mtx)
+#define LAGG_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->sc_mtx, (_p))
+#define LAGG_WUNLOCK(_sc) rm_wunlock(&(_sc)->sc_mtx)
+#define LAGG_RLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define LAGG_WLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define LAGG_UNLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_UNLOCKED)
extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
extern void (*lagg_linkstate_p)(struct ifnet *, int );
int lagg_enqueue(struct ifnet *, struct mbuf *);
-uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t);
+
+SYSCTL_DECL(_net_link_lagg);
#endif /* _KERNEL */
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index 55b816a7..20c0b9d2 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -64,17 +64,43 @@ __FBSDID("$FreeBSD$");
MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
-static VNET_DEFINE(SLIST_HEAD(, lltable), lltables);
+static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
+ SLIST_HEAD_INITIALIZER(lltables);
#define V_lltables VNET(lltables)
-extern void arprequest(struct ifnet *, struct in_addr *, struct in_addr *,
- u_char *);
-
-static void vnet_lltable_init(void);
-
struct rwlock lltable_rwlock;
RW_SYSINIT(lltable_rwlock, &lltable_rwlock, "lltable_rwlock");
+static void lltable_unlink(struct lltable *llt);
+static void llentries_unlink(struct lltable *llt, struct llentries *head);
+
+static void htable_unlink_entry(struct llentry *lle);
+static void htable_link_entry(struct lltable *llt, struct llentry *lle);
+static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
+ void *farg);
+
+/*
+ * Dump lle state for a specific address family.
+ */
+static int
+lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
+{
+ int error;
+
+ LLTABLE_LOCK_ASSERT();
+
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return (0);
+ error = 0;
+
+ IF_AFDATA_RLOCK(llt->llt_ifp);
+ error = lltable_foreach_lle(llt,
+ (llt_foreach_cb_t *)llt->llt_dump_entry, wr);
+ IF_AFDATA_RUNLOCK(llt->llt_ifp);
+
+ return (error);
+}
+
/*
* Dump arp state for a specific address family.
*/
@@ -87,7 +113,7 @@ lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
LLTABLE_RLOCK();
SLIST_FOREACH(llt, &V_lltables, llt_link) {
if (llt->llt_af == af) {
- error = llt->llt_dump(llt, wr);
+ error = lltable_dump_af(llt, wr);
if (error != 0)
goto done;
}
@@ -98,25 +124,144 @@ done:
}
/*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
+ * Common function helpers for chained hash table.
+ */
+
+/*
+ * Runs specified callback for each entry in @llt.
+ * Caller does the locking.
+ *
+ */
+static int
+htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
+{
+ struct llentry *lle, *next;
+ int i, error;
+
+ error = 0;
+
+ for (i = 0; i < llt->llt_hsize; i++) {
+ LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
+ error = f(llt, lle, farg);
+ if (error != 0)
+ break;
+ }
+ }
+
+ return (error);
+}
+
+static void
+htable_link_entry(struct lltable *llt, struct llentry *lle)
+{
+ struct llentries *lleh;
+ uint32_t hashidx;
+
+ if ((lle->la_flags & LLE_LINKED) != 0)
+ return;
+
+ IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
+
+ hashidx = llt->llt_hash(lle, llt->llt_hsize);
+ lleh = &llt->lle_head[hashidx];
+
+ lle->lle_tbl = llt;
+ lle->lle_head = lleh;
+ lle->la_flags |= LLE_LINKED;
+ LIST_INSERT_HEAD(lleh, lle, lle_next);
+}
+
+static void
+htable_unlink_entry(struct llentry *lle)
+{
+
+ if ((lle->la_flags & LLE_LINKED) != 0) {
+ IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
+ LIST_REMOVE(lle, lle_next);
+ lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+#if 0
+ lle->lle_tbl = NULL;
+ lle->lle_head = NULL;
+#endif
+ }
+}
+
+struct prefix_match_data {
+ const struct sockaddr *addr;
+ const struct sockaddr *mask;
+ struct llentries dchain;
+ u_int flags;
+};
+
+static int
+htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct prefix_match_data *pmd;
+
+ pmd = (struct prefix_match_data *)farg;
+
+ if (llt->llt_match_prefix(pmd->addr, pmd->mask, pmd->flags, lle)) {
+ LLE_WLOCK(lle);
+ LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain);
+ }
+
+ return (0);
+}
+
+static void
+htable_prefix_free(struct lltable *llt, const struct sockaddr *addr,
+ const struct sockaddr *mask, u_int flags)
+{
+ struct llentry *lle, *next;
+ struct prefix_match_data pmd;
+
+ bzero(&pmd, sizeof(pmd));
+ pmd.addr = addr;
+ pmd.mask = mask;
+ pmd.flags = flags;
+ LIST_INIT(&pmd.dchain);
+
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ /* Push matching lles to chain */
+ lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd);
+
+ llentries_unlink(llt, &pmd.dchain);
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
+
+ LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next)
+ lltable_free_entry(llt, lle);
+}
+
+static void
+htable_free_tbl(struct lltable *llt)
+{
+
+ free(llt->lle_head, M_LLTABLE);
+ free(llt, M_LLTABLE);
+}
+
+static void
+llentries_unlink(struct lltable *llt, struct llentries *head)
+{
+ struct llentry *lle, *next;
+
+ LIST_FOREACH_SAFE(lle, head, lle_chain, next)
+ llt->llt_unlink_entry(lle);
+}
+
+/*
+ * Helper function used to drop all mbufs in hold queue.
*
* Returns the number of held packets, if any, that were dropped.
*/
size_t
-llentry_free(struct llentry *lle)
+lltable_drop_entry_queue(struct llentry *lle)
{
size_t pkts_dropped;
struct mbuf *next;
- IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
LLE_WLOCK_ASSERT(lle);
- LIST_REMOVE(lle, lle_next);
- lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
-
pkts_dropped = 0;
while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
next = lle->la_hold->m_nextpkt;
@@ -130,6 +275,162 @@ llentry_free(struct llentry *lle)
("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
lle->la_numheld, pkts_dropped));
+ return (pkts_dropped);
+}
+
+void
+lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off)
+{
+
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ lle->r_hdrlen = linkhdrsize;
+ lle->ll_addr = &lle->r_linkdata[lladdr_off];
+ lle->la_flags |= LLE_VALID;
+ lle->r_flags |= RLLE_VALID;
+}
+
+/*
+ * Tries to update @lle link-level address.
+ * Since update requires AFDATA WLOCK, function
+ * drops @lle lock, acquires AFDATA lock and then acquires
+ * @lle lock to maintain lock order.
+ *
+ * Returns 1 on success.
+ */
+int
+lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off)
+{
+
+ /* Perform real LLE update */
+ /* use afdata WLOCK to update fields */
+ LLE_WLOCK_ASSERT(lle);
+ LLE_ADDREF(lle);
+ LLE_WUNLOCK(lle);
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(lle);
+
+ /*
+ * Since we droppped LLE lock, other thread might have deleted
+ * this lle. Check and return
+ */
+ if ((lle->la_flags & LLE_DELETED) != 0) {
+ IF_AFDATA_WUNLOCK(ifp);
+ LLE_FREE_LOCKED(lle);
+ return (0);
+ }
+
+ /* Update data */
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
+
+ IF_AFDATA_WUNLOCK(ifp);
+
+ LLE_REMREF(lle);
+
+ return (1);
+}
+
+ /*
+ * Helper function used to pre-compute full/partial link-layer
+ * header data suitable for feeding into if_output().
+ */
+int
+lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off)
+{
+ struct if_encap_req ereq;
+ int error;
+
+ bzero(buf, *bufsize);
+ bzero(&ereq, sizeof(ereq));
+ ereq.buf = buf;
+ ereq.bufsize = *bufsize;
+ ereq.rtype = IFENCAP_LL;
+ ereq.family = family;
+ ereq.lladdr = lladdr;
+ ereq.lladdr_len = ifp->if_addrlen;
+ error = ifp->if_requestencap(ifp, &ereq);
+ if (error == 0) {
+ *bufsize = ereq.bufsize;
+ *lladdr_off = ereq.lladdr_off;
+ }
+
+ return (error);
+}
+
+/*
+ * Update link-layer header for given @lle after
+ * interface lladdr was changed.
+ */
+static int
+llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct ifnet *ifp;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ u_char *lladdr;
+ int lladdr_off;
+
+ ifp = (struct ifnet *)farg;
+
+ lladdr = lle->ll_addr;
+
+ LLE_WLOCK(lle);
+ if ((lle->la_flags & LLE_VALID) == 0) {
+ LLE_WUNLOCK(lle);
+ return (0);
+ }
+
+ if ((lle->la_flags & LLE_IFADDR) != 0)
+ lladdr = IF_LLADDR(ifp);
+
+ linkhdrsize = sizeof(linkhdr);
+ lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
+ &lladdr_off);
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ LLE_WUNLOCK(lle);
+
+ return (0);
+}
+
+/*
+ * Update all calculated headers for given @llt
+ */
+void
+lltable_update_ifaddr(struct lltable *llt)
+{
+
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return;
+
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
+}
+
+/*
+ *
+ * Performs generic cleanup routines and frees lle.
+ *
+ * Called for non-linked entries, with callouts and
+ * other AF-specific cleanups performed.
+ *
+ * @lle must be passed WLOCK'ed
+ *
+ * Returns the number of held packets, if any, that were dropped.
+ */
+size_t
+llentry_free(struct llentry *lle)
+{
+ size_t pkts_dropped;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ KASSERT((lle->la_flags & LLE_LINKED) == 0, ("freeing linked lle"));
+
+ pkts_dropped = lltable_drop_entry_queue(lle);
+
LLE_FREE_LOCKED(lle);
return (pkts_dropped);
@@ -144,22 +445,35 @@ struct llentry *
llentry_alloc(struct ifnet *ifp, struct lltable *lt,
struct sockaddr_storage *dst)
{
- struct llentry *la;
+ struct llentry *la, *la_tmp;
IF_AFDATA_RLOCK(ifp);
la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
IF_AFDATA_RUNLOCK(ifp);
- if ((la == NULL) &&
- (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
- IF_AFDATA_WLOCK(ifp);
- la = lla_lookup(lt, (LLE_CREATE | LLE_EXCLUSIVE),
- (struct sockaddr *)dst);
- IF_AFDATA_WUNLOCK(ifp);
- }
if (la != NULL) {
LLE_ADDREF(la);
LLE_WUNLOCK(la);
+ return (la);
+ }
+
+ if ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
+ la = lltable_alloc_entry(lt, 0, (struct sockaddr *)dst);
+ if (la == NULL)
+ return (NULL);
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(la);
+ /* Prefer any existing LLE over newly-created one */
+ la_tmp = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
+ if (la_tmp == NULL)
+ lltable_link_entry(lt, la);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (la_tmp != NULL) {
+ lltable_free_entry(lt, la);
+ la = la_tmp;
+ }
+ LLE_ADDREF(la);
+ LLE_WUNLOCK(la);
}
return (la);
@@ -168,30 +482,47 @@ llentry_alloc(struct ifnet *ifp, struct lltable *lt,
/*
* Free all entries from given table and free itself.
*/
+
+static int
+lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct llentries *dchain;
+
+ dchain = (struct llentries *)farg;
+
+ LLE_WLOCK(lle);
+ LIST_INSERT_HEAD(dchain, lle, lle_chain);
+
+ return (0);
+}
+
+/*
+ * Free all entries from given table and free itself.
+ */
void
lltable_free(struct lltable *llt)
{
struct llentry *lle, *next;
- int i;
+ struct llentries dchain;
KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
- LLTABLE_WLOCK();
- SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
- LLTABLE_WUNLOCK();
+ lltable_unlink(llt);
+ LIST_INIT(&dchain);
IF_AFDATA_WLOCK(llt->llt_ifp);
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
- LLE_WLOCK(lle);
- if (callout_stop(&lle->la_timer))
- LLE_REMREF(lle);
- llentry_free(lle);
- }
- }
+ /* Push all lles to @dchain */
+ lltable_foreach_lle(llt, lltable_free_cb, &dchain);
+ llentries_unlink(llt, &dchain);
IF_AFDATA_WUNLOCK(llt->llt_ifp);
- free(llt, M_LLTABLE);
+ LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) {
+ if (callout_stop(&lle->lle_timer) > 0)
+ LLE_REMREF(lle);
+ llentry_free(lle);
+ }
+
+ llt->llt_free_tbl(llt);
}
#if 0
@@ -207,7 +538,7 @@ lltable_drain(int af)
if (llt->llt_af != af)
continue;
- for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
+ for (i=0; i < llt->llt_hsize; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
LLE_WLOCK(lle);
if (lle->la_hold) {
@@ -222,8 +553,42 @@ lltable_drain(int af)
}
#endif
+/*
+ * Deletes an address from given lltable.
+ * Used for userland interaction to remove
+ * individual entries. Skips entries added by OS.
+ */
+int
+lltable_delete_addr(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+ struct llentry *lle;
+ struct ifnet *ifp;
+
+ ifp = llt->llt_ifp;
+ IF_AFDATA_WLOCK(ifp);
+ lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr);
+
+ if (lle == NULL) {
+ IF_AFDATA_WUNLOCK(ifp);
+ return (ENOENT);
+ }
+ if ((lle->la_flags & LLE_IFADDR) != 0 && (flags & LLE_IFADDR) == 0) {
+ IF_AFDATA_WUNLOCK(ifp);
+ LLE_WUNLOCK(lle);
+ return (EPERM);
+ }
+
+ lltable_unlink_entry(llt, lle);
+ IF_AFDATA_WUNLOCK(ifp);
+
+ llt->llt_delete_entry(llt, lle);
+
+ return (0);
+}
+
void
-lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
+lltable_prefix_free(int af, struct sockaddr *addr, struct sockaddr *mask,
u_int flags)
{
struct lltable *llt;
@@ -233,38 +598,122 @@ lltable_prefix_free(int af, struct sockaddr *prefix, struct sockaddr *mask,
if (llt->llt_af != af)
continue;
- llt->llt_prefix_free(llt, prefix, mask, flags);
+ llt->llt_prefix_free(llt, addr, mask, flags);
}
LLTABLE_RUNLOCK();
}
-
-
-/*
- * Create a new lltable.
- */
struct lltable *
-lltable_init(struct ifnet *ifp, int af)
+lltable_allocate_htbl(uint32_t hsize)
{
struct lltable *llt;
- register int i;
+ int i;
- llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK);
+ llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO);
+ llt->llt_hsize = hsize;
+ llt->lle_head = malloc(sizeof(struct llentries) * hsize,
+ M_LLTABLE, M_WAITOK | M_ZERO);
- llt->llt_af = af;
- llt->llt_ifp = ifp;
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++)
+ for (i = 0; i < llt->llt_hsize; i++)
LIST_INIT(&llt->lle_head[i]);
+ /* Set some default callbacks */
+ llt->llt_link_entry = htable_link_entry;
+ llt->llt_unlink_entry = htable_unlink_entry;
+ llt->llt_prefix_free = htable_prefix_free;
+ llt->llt_foreach_entry = htable_foreach_lle;
+ llt->llt_free_tbl = htable_free_tbl;
+
+ return (llt);
+}
+
+/*
+ * Links lltable to global llt list.
+ */
+void
+lltable_link(struct lltable *llt)
+{
+
LLTABLE_WLOCK();
SLIST_INSERT_HEAD(&V_lltables, llt, llt_link);
LLTABLE_WUNLOCK();
+}
- return (llt);
+static void
+lltable_unlink(struct lltable *llt)
+{
+
+ LLTABLE_WLOCK();
+ SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
+ LLTABLE_WUNLOCK();
+
+}
+
+/*
+ * External methods used by lltable consumers
+ */
+
+int
+lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
+{
+
+ return (llt->llt_foreach_entry(llt, f, farg));
+}
+
+struct llentry *
+lltable_alloc_entry(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+
+ return (llt->llt_alloc_entry(llt, flags, l3addr));
+}
+
+void
+lltable_free_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ llt->llt_free_entry(llt, lle);
+}
+
+void
+lltable_link_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ llt->llt_link_entry(llt, lle);
+}
+
+void
+lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ llt->llt_unlink_entry(lle);
+}
+
+void
+lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
+{
+ struct lltable *llt;
+
+ llt = lle->lle_tbl;
+ llt->llt_fill_sa_entry(lle, sa);
+}
+
+struct ifnet *
+lltable_get_ifp(const struct lltable *llt)
+{
+
+ return (llt->llt_ifp);
+}
+
+int
+lltable_get_af(const struct lltable *llt)
+{
+
+ return (llt->llt_af);
}
/*
- * Called in route_output when adding/deleting a route to an interface.
+ * Called in route_output when rtm_flags contains RTF_LLDATA.
*/
int
lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
@@ -274,14 +723,16 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
struct ifnet *ifp;
struct lltable *llt;
- struct llentry *lle;
- u_int laflags = 0, flags = 0;
- int error = 0;
+ struct llentry *lle, *lle_tmp;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+ u_int laflags = 0;
+ int error;
+
+ KASSERT(dl != NULL && dl->sdl_family == AF_LINK,
+ ("%s: invalid dl\n", __func__));
- if (dl == NULL || dl->sdl_family != AF_LINK) {
- log(LOG_INFO, "%s: invalid dl\n", __func__);
- return EINVAL;
- }
ifp = ifnet_byindex(dl->sdl_index);
if (ifp == NULL) {
log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
@@ -289,44 +740,6 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
return EINVAL;
}
- switch (rtm->rtm_type) {
- case RTM_ADD:
- if (rtm->rtm_flags & RTF_ANNOUNCE) {
- flags |= LLE_PUB;
-#ifdef INET
- if (dst->sa_family == AF_INET &&
- ((struct sockaddr_inarp *)dst)->sin_other != 0) {
- struct rtentry *rt;
- ((struct sockaddr_inarp *)dst)->sin_other = 0;
- rt = rtalloc1(dst, 0, 0);
- if (rt == NULL || !(rt->rt_flags & RTF_HOST)) {
- log(LOG_INFO, "%s: RTM_ADD publish "
- "(proxy only) is invalid\n",
- __func__);
- if (rt)
- RTFREE_LOCKED(rt);
- return EINVAL;
- }
- RTFREE_LOCKED(rt);
-
- flags |= LLE_PROXY;
- }
-#endif
- }
- flags |= LLE_CREATE;
- break;
-
- case RTM_DELETE:
- flags |= LLE_DELETE;
- break;
-
- case RTM_CHANGE:
- break;
-
- default:
- return EINVAL; /* XXX not implemented yet */
- }
-
/* XXX linked list may be too expensive */
LLTABLE_RLOCK();
SLIST_FOREACH(llt, &V_lltables, llt_link) {
@@ -337,73 +750,82 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
LLTABLE_RUNLOCK();
KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
- if (flags & LLE_CREATE)
- flags |= LLE_EXCLUSIVE;
-
- IF_AFDATA_LOCK(ifp);
- lle = lla_lookup(llt, flags, dst);
- IF_AFDATA_UNLOCK(ifp);
- if (LLE_IS_VALID(lle)) {
- if (flags & LLE_CREATE) {
- /*
- * If we delay the delete, then a subsequent
- * "arp add" should look up this entry, reset the
- * LLE_DELETED flag, and reset the expiration timer
- */
- bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
- lle->la_flags |= (flags & (LLE_PUB | LLE_PROXY));
- lle->la_flags |= LLE_VALID;
- lle->la_flags &= ~LLE_DELETED;
-#ifdef INET6
- /*
- * ND6
- */
- if (dst->sa_family == AF_INET6)
- lle->ln_state = ND6_LLINFO_REACHABLE;
-#endif
- /*
- * NB: arp and ndp always set (RTF_STATIC | RTF_HOST)
- */
-
- if (rtm->rtm_rmx.rmx_expire == 0) {
- lle->la_flags |= LLE_STATIC;
- lle->la_expire = 0;
- } else
- lle->la_expire = rtm->rtm_rmx.rmx_expire;
- laflags = lle->la_flags;
- LLE_WUNLOCK(lle);
-#ifdef INET
- /* gratuitous ARP */
- if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
- arprequest(ifp,
- &((struct sockaddr_in *)dst)->sin_addr,
- &((struct sockaddr_in *)dst)->sin_addr,
- ((laflags & LLE_PROXY) ?
- (u_char *)IF_LLADDR(ifp) :
- (u_char *)LLADDR(dl)));
+ error = 0;
+
+ switch (rtm->rtm_type) {
+ case RTM_ADD:
+ /* Add static LLE */
+ laflags = 0;
+ if (rtm->rtm_rmx.rmx_expire == 0)
+ laflags = LLE_STATIC;
+ lle = lltable_alloc_entry(llt, laflags, dst);
+ if (lle == NULL)
+ return (ENOMEM);
+
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (EINVAL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
+ if ((rtm->rtm_flags & RTF_ANNOUNCE))
+ lle->la_flags |= LLE_PUB;
+ lle->la_expire = rtm->rtm_rmx.rmx_expire;
+
+ laflags = lle->la_flags;
+
+ /* Try to link new entry */
+ lle_tmp = NULL;
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(lle);
+ lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, dst);
+ if (lle_tmp != NULL) {
+ /* Check if we are trying to replace immutable entry */
+ if ((lle_tmp->la_flags & LLE_IFADDR) != 0) {
+ IF_AFDATA_WUNLOCK(ifp);
+ LLE_WUNLOCK(lle_tmp);
+ lltable_free_entry(llt, lle);
+ return (EPERM);
}
-#endif
- } else {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(lle);
- else
- LLE_RUNLOCK(lle);
+ /* Unlink existing entry from table */
+ lltable_unlink_entry(llt, lle_tmp);
}
- } else if ((lle == NULL) && (flags & LLE_DELETE))
- error = EINVAL;
+ lltable_link_entry(llt, lle);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (lle_tmp != NULL) {
+ EVENTHANDLER_INVOKE(lle_event, lle_tmp,LLENTRY_EXPIRED);
+ lltable_free_entry(llt, lle_tmp);
+ }
- return (error);
-}
+ /*
+ * By invoking LLE handler here we might get
+ * two events on static LLE entry insertion
+ * in routing socket. However, since we might have
+ * other subscribers we need to generate this event.
+ */
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
+ LLE_WUNLOCK(lle);
+#ifdef INET
+ /* gratuitous ARP */
+ if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
+ arprequest(ifp,
+ &((struct sockaddr_in *)dst)->sin_addr,
+ &((struct sockaddr_in *)dst)->sin_addr,
+ (u_char *)LLADDR(dl));
+#endif
-static void
-vnet_lltable_init()
-{
+ break;
- SLIST_INIT(&V_lltables);
+ case RTM_DELETE:
+ return (lltable_delete_addr(llt, 0, dst));
+
+ default:
+ error = EINVAL;
+ }
+
+ return (error);
}
-VNET_SYSINIT(vnet_lltable_init, SI_SUB_PSEUDO, SI_ORDER_FIRST,
- vnet_lltable_init, NULL);
#ifdef DDB
struct llentry_sa {
@@ -429,15 +851,14 @@ llatbl_lle_show(struct llentry_sa *la)
db_printf(" la_flags=0x%04x\n", lle->la_flags);
db_printf(" la_asked=%u\n", lle->la_asked);
db_printf(" la_preempt=%u\n", lle->la_preempt);
- db_printf(" ln_byhint=%u\n", lle->ln_byhint);
db_printf(" ln_state=%d\n", lle->ln_state);
db_printf(" ln_router=%u\n", lle->ln_router);
db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
- bcopy(&lle->ll_addr.mac16, octet, sizeof(octet));
+ bcopy(lle->ll_addr, octet, sizeof(octet));
db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
- db_printf(" la_timer=%p\n", &lle->la_timer);
+ db_printf(" lle_timer=%p\n", &lle->lle_timer);
switch (la->l3_addr.sa_family) {
#ifdef INET
@@ -490,7 +911,7 @@ llatbl_llt_show(struct lltable *llt)
db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
llt, llt->llt_af, llt->llt_ifp);
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
+ for (i = 0; i < llt->llt_hsize; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
llatbl_lle_show((struct llentry_sa *)lle);
diff --git a/freebsd/sys/net/if_llatbl.h b/freebsd/sys/net/if_llatbl.h
index 8ac72c4f..51de726a 100644
--- a/freebsd/sys/net/if_llatbl.h
+++ b/freebsd/sys/net/if_llatbl.h
@@ -30,8 +30,6 @@ __FBSDID("$FreeBSD$");
#ifndef _NET_IF_LLATBL_H_
#define _NET_IF_LLATBL_H_
-#include <rtems/bsd/local/opt_ofed.h>
-
#include <sys/_rwlock.h>
#include <netinet/in.h>
@@ -50,42 +48,44 @@ extern struct rwlock lltable_rwlock;
#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock)
#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED)
+#define LLE_MAX_LINKHDR 24 /* Full IB header */
/*
* Code referencing llentry must at least hold
* a shared lock
*/
struct llentry {
LIST_ENTRY(llentry) lle_next;
- struct rwlock lle_lock;
+ union {
+ struct in_addr addr4;
+ struct in6_addr addr6;
+ } r_l3addr;
+ char r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */
+ uint8_t r_hdrlen; /* length for LL header */
+ uint8_t spare0[3];
+ uint16_t r_flags; /* LLE runtime flags */
+ uint16_t r_skip_req; /* feedback from fast path */
+
struct lltable *lle_tbl;
struct llentries *lle_head;
- void (*lle_free)(struct lltable *, struct llentry *);
+ void (*lle_free)(struct llentry *);
struct mbuf *la_hold;
int la_numheld; /* # of packets currently held */
time_t la_expire;
uint16_t la_flags;
uint16_t la_asked;
uint16_t la_preempt;
- uint16_t ln_byhint;
int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */
uint16_t ln_router;
time_t ln_ntick;
+ time_t lle_remtime; /* Real time remaining */
+ time_t lle_hittime; /* Time when r_skip_req was unset */
int lle_refcnt;
+ char *ll_addr; /* link-layer address */
- union {
- uint64_t mac_aligned;
- uint16_t mac16[3];
-#ifdef OFED
- uint8_t mac8[20]; /* IB needs 20 bytes. */
-#endif
- } ll_addr;
-
- /* XXX af-private? */
- union {
- struct callout ln_timer_ch;
- struct callout la_timer;
- } lle_timer;
- /* NB: struct sockaddr must immediately follow */
+ LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */
+ struct callout lle_timer;
+ struct rwlock lle_lock;
+ struct mtx req_mtx;
};
#define LLE_WLOCK(lle) rw_wlock(&(lle)->lle_lock)
@@ -98,6 +98,12 @@ struct llentry {
#define LLE_LOCK_DESTROY(lle) rw_destroy(&(lle)->lle_lock)
#define LLE_WLOCK_ASSERT(lle) rw_assert(&(lle)->lle_lock, RA_WLOCKED)
+#define LLE_REQ_INIT(lle) mtx_init(&(lle)->req_mtx, "lle req", \
+ NULL, MTX_DEF)
+#define LLE_REQ_DESTROY(lle) mtx_destroy(&(lle)->req_mtx)
+#define LLE_REQ_LOCK(lle) mtx_lock(&(lle)->req_mtx)
+#define LLE_REQ_UNLOCK(lle) mtx_unlock(&(lle)->req_mtx)
+
#define LLE_IS_VALID(lle) (((lle) != NULL) && ((lle) != (void *)-1))
#define LLE_ADDREF(lle) do { \
@@ -118,7 +124,7 @@ struct llentry {
#define LLE_FREE_LOCKED(lle) do { \
if ((lle)->lle_refcnt == 1) \
- (lle)->lle_free((lle)->lle_tbl, (lle)); \
+ (lle)->lle_free(lle); \
else { \
LLE_REMREF(lle); \
LLE_WUNLOCK(lle); \
@@ -132,58 +138,77 @@ struct llentry {
LLE_FREE_LOCKED(lle); \
} while (0)
+typedef struct llentry *(llt_lookup_t)(struct lltable *, u_int flags,
+ const struct sockaddr *l3addr);
+typedef struct llentry *(llt_alloc_t)(struct lltable *, u_int flags,
+ const struct sockaddr *l3addr);
+typedef void (llt_delete_t)(struct lltable *, struct llentry *);
+typedef void (llt_prefix_free_t)(struct lltable *,
+ const struct sockaddr *addr, const struct sockaddr *mask, u_int flags);
+typedef int (llt_dump_entry_t)(struct lltable *, struct llentry *,
+ struct sysctl_req *);
+typedef uint32_t (llt_hash_t)(const struct llentry *, uint32_t);
+typedef int (llt_match_prefix_t)(const struct sockaddr *,
+ const struct sockaddr *, u_int, struct llentry *);
+typedef void (llt_free_entry_t)(struct lltable *, struct llentry *);
+typedef void (llt_fill_sa_entry_t)(const struct llentry *, struct sockaddr *);
+typedef void (llt_free_tbl_t)(struct lltable *);
+typedef void (llt_link_entry_t)(struct lltable *, struct llentry *);
+typedef void (llt_unlink_entry_t)(struct llentry *);
-#define ln_timer_ch lle_timer.ln_timer_ch
-#define la_timer lle_timer.la_timer
-
-/* XXX bad name */
-#define L3_ADDR(lle) ((struct sockaddr *)(&lle[1]))
-#define L3_ADDR_LEN(lle) (((struct sockaddr *)(&lle[1]))->sa_len)
-
-#ifndef LLTBL_HASHTBL_SIZE
-#define LLTBL_HASHTBL_SIZE 32 /* default 32 ? */
-#endif
-
-#ifndef LLTBL_HASHMASK
-#define LLTBL_HASHMASK (LLTBL_HASHTBL_SIZE - 1)
-#endif
+typedef int (llt_foreach_cb_t)(struct lltable *, struct llentry *, void *);
+typedef int (llt_foreach_entry_t)(struct lltable *, llt_foreach_cb_t *, void *);
struct lltable {
SLIST_ENTRY(lltable) llt_link;
- struct llentries lle_head[LLTBL_HASHTBL_SIZE];
int llt_af;
+ int llt_hsize;
+ struct llentries *lle_head;
struct ifnet *llt_ifp;
- void (*llt_prefix_free)(struct lltable *,
- const struct sockaddr *prefix,
- const struct sockaddr *mask,
- u_int flags);
- struct llentry * (*llt_lookup)(struct lltable *, u_int flags,
- const struct sockaddr *l3addr);
- int (*llt_dump)(struct lltable *,
- struct sysctl_req *);
+ llt_lookup_t *llt_lookup;
+ llt_alloc_t *llt_alloc_entry;
+ llt_delete_t *llt_delete_entry;
+ llt_prefix_free_t *llt_prefix_free;
+ llt_dump_entry_t *llt_dump_entry;
+ llt_hash_t *llt_hash;
+ llt_match_prefix_t *llt_match_prefix;
+ llt_free_entry_t *llt_free_entry;
+ llt_foreach_entry_t *llt_foreach_entry;
+ llt_link_entry_t *llt_link_entry;
+ llt_unlink_entry_t *llt_unlink_entry;
+ llt_fill_sa_entry_t *llt_fill_sa_entry;
+ llt_free_tbl_t *llt_free_tbl;
};
+
MALLOC_DECLARE(M_LLTABLE);
/*
- * flags to be passed to arplookup.
+ * LLentry flags
*/
#define LLE_DELETED 0x0001 /* entry must be deleted */
#define LLE_STATIC 0x0002 /* entry is static */
#define LLE_IFADDR 0x0004 /* entry is interface addr */
#define LLE_VALID 0x0008 /* ll_addr is valid */
-#define LLE_PROXY 0x0010 /* proxy entry ??? */
+#define LLE_REDIRECT 0x0010 /* installed by redirect; has host rtentry */
#define LLE_PUB 0x0020 /* publish entry ??? */
#define LLE_LINKED 0x0040 /* linked to lookup structure */
+/* LLE request flags */
#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
-#define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */
-#define LLE_CREATE 0x8000 /* create on a lookup miss */
+#define LLE_UNLOCKED 0x4000 /* return lle unlocked */
+#define LLE_ADDRONLY 0x4000 /* return lladdr instead of full header */
+#define LLE_CREATE 0x8000 /* hint to avoid lle lookup */
+
+/* LLE flags used by fastpath code */
+#define RLLE_VALID 0x0001 /* entry is valid */
+#define RLLE_IFADDR LLE_IFADDR /* entry is ifaddr */
#define LLATBL_HASH(key, mask) \
(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
-struct lltable *lltable_init(struct ifnet *, int);
+struct lltable *lltable_allocate_htbl(uint32_t hsize);
void lltable_free(struct lltable *);
+void lltable_link(struct lltable *llt);
void lltable_prefix_free(int, struct sockaddr *,
struct sockaddr *, u_int);
#if 0
@@ -195,13 +220,37 @@ size_t llentry_free(struct llentry *);
struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
struct sockaddr_storage *);
+/* helper functions */
+size_t lltable_drop_entry_queue(struct llentry *);
+void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+
+int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off);
+void lltable_update_ifaddr(struct lltable *llt);
+struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
+ const struct sockaddr *l4addr);
+void lltable_free_entry(struct lltable *llt, struct llentry *lle);
+int lltable_delete_addr(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr);
+void lltable_link_entry(struct lltable *llt, struct llentry *lle);
+void lltable_unlink_entry(struct lltable *llt, struct llentry *lle);
+void lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa);
+struct ifnet *lltable_get_ifp(const struct lltable *llt);
+int lltable_get_af(const struct lltable *llt);
+
+int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
+ void *farg);
/*
* Generic link layer address lookup function.
*/
static __inline struct llentry *
lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
{
- return llt->llt_lookup(llt, flags, l3addr);
+
+ return (llt->llt_lookup(llt, flags, l3addr));
}
int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c
index b40dec8e..aa5109eb 100644
--- a/freebsd/sys/net/if_loop.c
+++ b/freebsd/sys/net/if_loop.c
@@ -36,10 +36,8 @@
* Loopback interface driver for protocol testing and timing.
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -53,6 +51,7 @@
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -65,11 +64,6 @@
#include <netinet/in_var.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#ifdef INET6
#ifndef INET
#include <netinet/in.h>
@@ -78,11 +72,6 @@
#include <netinet/ip6.h>
#endif
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#endif
-
#include <security/mac/mac_framework.h>
#ifdef TINY_LOMTU
@@ -101,22 +90,20 @@
CSUM_SCTP_VALID)
int loioctl(struct ifnet *, u_long, caddr_t);
-static void lortrequest(int, struct rtentry *, struct rt_addrinfo *);
int looutput(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+ const struct sockaddr *dst, struct route *ro);
static int lo_clone_create(struct if_clone *, int, caddr_t);
static void lo_clone_destroy(struct ifnet *);
VNET_DEFINE(struct ifnet *, loif); /* Used externally */
#ifdef VIMAGE
-static VNET_DEFINE(struct ifc_simple_data, lo_cloner_data);
-static VNET_DEFINE(struct if_clone, lo_cloner);
-#define V_lo_cloner_data VNET(lo_cloner_data)
+static VNET_DEFINE(struct if_clone *, lo_cloner);
#define V_lo_cloner VNET(lo_cloner)
#endif
-IFC_SIMPLE_DECLARE(lo, 1);
+static struct if_clone *lo_cloner;
+static const char loname[] = "lo";
static void
lo_clone_destroy(struct ifnet *ifp)
@@ -141,7 +128,7 @@ lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
if (ifp == NULL)
return (ENOSPC);
- if_initname(ifp, ifc->ifc_name, unit);
+ if_initname(ifp, loname, unit);
ifp->if_mtu = LOMTU;
ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
ifp->if_ioctl = loioctl;
@@ -163,15 +150,15 @@ vnet_loif_init(const void *unused __unused)
{
#ifdef VIMAGE
+ lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+ 1);
V_lo_cloner = lo_cloner;
- V_lo_cloner_data = lo_cloner_data;
- V_lo_cloner.ifc_data = &V_lo_cloner_data;
- if_clone_attach(&V_lo_cloner);
#else
- if_clone_attach(&lo_cloner);
+ lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
+ 1);
#endif
}
-VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSINIT(vnet_loif_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
vnet_loif_init, NULL);
#ifdef VIMAGE
@@ -179,10 +166,10 @@ static void
vnet_loif_uninit(const void *unused __unused)
{
- if_clone_detach(&V_lo_cloner);
+ if_clone_detach(V_lo_cloner);
V_loif = NULL;
}
-VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
vnet_loif_uninit, NULL);
#endif
@@ -213,19 +200,16 @@ static moduledata_t loop_mod = {
DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
int
-looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
{
u_int32_t af;
- struct rtentry *rt = NULL;
#ifdef MAC
int error;
#endif
M_ASSERTPKTHDR(m); /* check if we have the packet header */
- if (ro != NULL)
- rt = ro->ro_rt;
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error) {
@@ -234,23 +218,22 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
}
#endif
- if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ if (ro != NULL && ro->ro_flags & (RT_REJECT|RT_BLACKHOLE)) {
m_freem(m);
- return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
- rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ return (ro->ro_flags & RT_BLACKHOLE ? 0 : EHOSTUNREACH);
}
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ else
+ af = dst->sa_family;
#if 1 /* XXX */
- switch (dst->sa_family) {
+ switch (af) {
case AF_INET:
if (ifp->if_capenable & IFCAP_RXCSUM) {
m->m_pkthdr.csum_data = 0xffff;
@@ -275,16 +258,13 @@ looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
#endif
m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6;
break;
- case AF_IPX:
- case AF_APPLETALK:
- break;
default:
- printf("looutput: af=%d unexpected\n", dst->sa_family);
+ printf("looutput: af=%d unexpected\n", af);
m_freem(m);
return (EAFNOSUPPORT);
}
#endif
- return (if_simloop(ifp, m, dst->sa_family, 0));
+ return (if_simloop(ifp, m, af, 0));
}
/*
@@ -370,36 +350,17 @@ if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case AF_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- isr = NETISR_ATALK2;
- break;
-#endif
default:
printf("if_simloop: can't handle af=%d\n", af);
m_freem(m);
return (EAFNOSUPPORT);
}
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
netisr_queue(isr, m); /* mbuf is free'd on failure. */
return (0);
}
-/* ARGSUSED */
-static void
-lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
-{
-
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
-}
-
/*
* Process an ioctl request.
*/
@@ -407,7 +368,6 @@ lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
int
loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct ifaddr *ifa;
struct ifreq *ifr = (struct ifreq *)data;
int error = 0, mask;
@@ -415,8 +375,6 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifa = (struct ifaddr *)data;
- ifa->ifa_rtrequest = lortrequest;
/*
* Everything else is done at a higher level.
*/
@@ -424,7 +382,7 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCADDMULTI:
case SIOCDELMULTI:
- if (ifr == 0) {
+ if (ifr == NULL) {
error = EAFNOSUPPORT; /* XXX */
break;
}
diff --git a/freebsd/sys/net/if_media.c b/freebsd/sys/net/if_media.c
index 264d3535..66b13568 100644
--- a/freebsd/sys/net/if_media.c
+++ b/freebsd/sys/net/if_media.c
@@ -48,6 +48,8 @@
* to implement this interface.
*/
+#include <rtems/bsd/local/opt_ifmedia.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/socket.h>
@@ -70,6 +72,7 @@ static struct ifmedia_entry *ifmedia_match(struct ifmedia *ifm,
int flags, int mask);
#ifdef IFMEDIA_DEBUG
+#include <net/if_var.h>
int ifmedia_debug = 0;
SYSCTL_INT(_debug, OID_AUTO, ifmedia, CTLFLAG_RW, &ifmedia_debug,
0, "if_media debugging msgs");
@@ -195,6 +198,21 @@ ifmedia_set(ifm, target)
}
/*
+ * Given a media word, return one suitable for an application
+ * using the original encoding.
+ */
+static int
+compat_media(int media)
+{
+
+ if (IFM_TYPE(media) == IFM_ETHER && IFM_SUBTYPE(media) > IFM_OTHER) {
+ media &= ~(IFM_ETH_XTYPE|IFM_TMASK);
+ media |= IFM_OTHER;
+ }
+ return (media);
+}
+
+/*
* Device-independent media ioctl support function.
*/
int
@@ -206,7 +224,7 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
{
struct ifmedia_entry *match;
struct ifmediareq *ifmr = (struct ifmediareq *) ifr;
- int error = 0, sticky;
+ int error = 0;
if (ifp == NULL || ifr == NULL || ifm == NULL)
return(EINVAL);
@@ -273,80 +291,42 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd)
* Get list of available media and current media on interface.
*/
case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
{
struct ifmedia_entry *ep;
- int *kptr, count;
- int usermax; /* user requested max */
+ int i;
- kptr = NULL; /* XXX gcc */
+ if (ifmr->ifm_count < 0)
+ return (EINVAL);
- ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
- ifm->ifm_cur->ifm_media : IFM_NONE;
+ if (cmd == SIOCGIFMEDIA) {
+ ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+ compat_media(ifm->ifm_cur->ifm_media) : IFM_NONE;
+ } else {
+ ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ?
+ ifm->ifm_cur->ifm_media : IFM_NONE;
+ }
ifmr->ifm_mask = ifm->ifm_mask;
ifmr->ifm_status = 0;
(*ifm->ifm_status)(ifp, ifmr);
- count = 0;
- usermax = 0;
-
/*
* If there are more interfaces on the list, count
* them. This allows the caller to set ifmr->ifm_count
* to 0 on the first call to know how much space to
* allocate.
*/
+ i = 0;
LIST_FOREACH(ep, &ifm->ifm_list, ifm_list)
- usermax++;
-
- /*
- * Don't allow the user to ask for too many
- * or a negative number.
- */
- if (ifmr->ifm_count > usermax)
- ifmr->ifm_count = usermax;
- else if (ifmr->ifm_count < 0)
- return (EINVAL);
-
- if (ifmr->ifm_count != 0) {
- kptr = (int *)malloc(ifmr->ifm_count * sizeof(int),
- M_TEMP, M_NOWAIT);
-
- if (kptr == NULL)
- return (ENOMEM);
- /*
- * Get the media words from the interface's list.
- */
- ep = LIST_FIRST(&ifm->ifm_list);
- for (; ep != NULL && count < ifmr->ifm_count;
- ep = LIST_NEXT(ep, ifm_list), count++)
- kptr[count] = ep->ifm_media;
-
- if (ep != NULL)
- error = E2BIG; /* oops! */
- } else {
- count = usermax;
- }
-
- /*
- * We do the copyout on E2BIG, because that's
- * just our way of telling userland that there
- * are more. This is the behavior I've observed
- * under BSD/OS 3.0
- */
- sticky = error;
- if ((error == 0 || error == E2BIG) && ifmr->ifm_count != 0) {
- error = copyout((caddr_t)kptr,
- (caddr_t)ifmr->ifm_ulist,
- ifmr->ifm_count * sizeof(int));
- }
-
- if (error == 0)
- error = sticky;
-
- if (ifmr->ifm_count != 0)
- free(kptr, M_TEMP);
-
- ifmr->ifm_count = count;
+ if (i++ < ifmr->ifm_count) {
+ error = copyout(&ep->ifm_media,
+ ifmr->ifm_ulist + i - 1, sizeof(int));
+ if (error)
+ break;
+ }
+ if (error == 0 && i > ifmr->ifm_count)
+ error = ifmr->ifm_count ? E2BIG : 0;
+ ifmr->ifm_count = i;
break;
}
@@ -400,8 +380,7 @@ ifmedia_baudrate(int mword)
int i;
for (i = 0; ifmedia_baudrate_descriptions[i].ifmb_word != 0; i++) {
- if ((mword & (IFM_NMASK|IFM_TMASK)) ==
- ifmedia_baudrate_descriptions[i].ifmb_word)
+ if (IFM_TYPE_MATCH(mword, ifmedia_baudrate_descriptions[i].ifmb_word))
return (ifmedia_baudrate_descriptions[i].ifmb_baudrate);
}
@@ -507,7 +486,7 @@ ifmedia_printword(ifmw)
printf("<unknown type>\n");
return;
}
- printf(desc->ifmt_string);
+ printf("%s", desc->ifmt_string);
/* Any mode. */
for (desc = ttos->modes; desc && desc->ifmt_string != NULL; desc++)
diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h
index 12585095..86439950 100644
--- a/freebsd/sys/net/if_media.h
+++ b/freebsd/sys/net/if_media.h
@@ -54,11 +54,13 @@
#include <sys/queue.h>
+struct ifnet;
+
/*
* Driver callbacks for media status and change requests.
*/
-typedef int (*ifm_change_cb_t)(struct ifnet *ifp);
-typedef void (*ifm_stat_cb_t)(struct ifnet *ifp, struct ifmediareq *req);
+typedef int (*ifm_change_cb_t)(struct ifnet *);
+typedef void (*ifm_stat_cb_t)(struct ifnet *, struct ifmediareq *req);
/*
* In-kernel representation of a single supported media type.
@@ -104,6 +106,7 @@ void ifmedia_set(struct ifmedia *ifm, int mword);
int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
struct ifmedia *ifm, u_long cmd);
+
/* Compute baudrate for a given media. */
uint64_t ifmedia_baudrate(int);
@@ -115,7 +118,7 @@ uint64_t ifmedia_baudrate(int);
* ---- -------
* 0-4 Media variant
* 5-7 Media type
- * 8-15 Type specific options
+ * 8-15 Type specific options (includes added variant bits on Ethernet)
* 16-18 Mode (for multi-mode devices)
* 19 RFU
* 20-27 Shared (global) options
@@ -124,8 +127,18 @@ uint64_t ifmedia_baudrate(int);
/*
* Ethernet
+ * In order to use more than 31 subtypes, Ethernet uses some of the option
+ * bits as part of the subtype field. See the options section below for
+ * relevant definitions
*/
#define IFM_ETHER 0x00000020
+#define IFM_ETHER_SUBTYPE(x) (((x) & IFM_TMASK) | \
+ (((x) & (IFM_ETH_XTYPE >> IFM_ETH_XSHIFT)) << IFM_ETH_XSHIFT))
+#define IFM_X(x) IFM_ETHER_SUBTYPE(x) /* internal shorthand */
+#define IFM_ETHER_SUBTYPE_SET(x) (IFM_ETHER_SUBTYPE(x) | IFM_ETHER)
+#define IFM_ETHER_SUBTYPE_GET(x) ((x) & (IFM_TMASK|IFM_ETH_XTYPE))
+#define IFM_ETHER_IS_EXTENDED(x) ((x) & IFM_ETH_XTYPE)
+
#define IFM_10_T 3 /* 10BaseT - RJ45 */
#define IFM_10_2 4 /* 10Base2 - Thinnet */
#define IFM_10_5 5 /* 10Base5 - AUI */
@@ -153,15 +166,49 @@ uint64_t ifmedia_baudrate(int);
#define IFM_40G_CR4 27 /* 40GBase-CR4 */
#define IFM_40G_SR4 28 /* 40GBase-SR4 */
#define IFM_40G_LR4 29 /* 40GBase-LR4 */
+#define IFM_1000_KX 30 /* 1000Base-KX backplane */
+#define IFM_OTHER 31 /* Other: one of the following */
+
+/* following types are not visible to old binaries using only IFM_TMASK */
+#define IFM_10G_KX4 IFM_X(32) /* 10GBase-KX4 backplane */
+#define IFM_10G_KR IFM_X(33) /* 10GBase-KR backplane */
+#define IFM_10G_CR1 IFM_X(34) /* 10GBase-CR1 Twinax splitter */
+#define IFM_20G_KR2 IFM_X(35) /* 20GBase-KR2 backplane */
+#define IFM_2500_KX IFM_X(36) /* 2500Base-KX backplane */
+#define IFM_2500_T IFM_X(37) /* 2500Base-T - RJ45 (NBaseT) */
+#define IFM_5000_T IFM_X(38) /* 5000Base-T - RJ45 (NBaseT) */
+#define IFM_50G_PCIE IFM_X(39) /* 50G Ethernet over PCIE */
+#define IFM_25G_PCIE IFM_X(40) /* 25G Ethernet over PCIE */
+#define IFM_1000_SGMII IFM_X(41) /* 1G media interface */
+#define IFM_10G_SFI IFM_X(42) /* 10G media interface */
+#define IFM_40G_XLPPI IFM_X(43) /* 40G media interface */
+#define IFM_1000_CX_SGMII IFM_X(44) /* 1000Base-CX-SGMII */
+#define IFM_40G_KR4 IFM_X(45) /* 40GBase-KR4 */
+#define IFM_10G_ER IFM_X(46) /* 10GBase-ER */
+#define IFM_100G_CR4 IFM_X(47) /* 100GBase-CR4 */
+#define IFM_100G_SR4 IFM_X(48) /* 100GBase-SR4 */
+#define IFM_100G_KR4 IFM_X(49) /* 100GBase-KR4 */
+#define IFM_100G_LR4 IFM_X(50) /* 100GBase-LR4 */
+#define IFM_56G_R4 IFM_X(51) /* 56GBase-R4 */
+#define IFM_100_T IFM_X(52) /* 100BaseT - RJ45 */
+#define IFM_25G_CR IFM_X(53) /* 25GBase-CR */
+#define IFM_25G_KR IFM_X(54) /* 25GBase-KR */
+#define IFM_25G_SR IFM_X(55) /* 25GBase-SR */
+#define IFM_50G_CR2 IFM_X(56) /* 50GBase-CR2 */
+#define IFM_50G_KR2 IFM_X(57) /* 50GBase-KR2 */
+
/*
* Please update ieee8023ad_lacp.c:lacp_compose_key()
* after adding new Ethernet media types.
*/
-/* note 31 is the max! */
+/* Note IFM_X(511) is the max! */
+/* Ethernet option values; includes bits used for extended variant field */
#define IFM_ETH_MASTER 0x00000100 /* master mode (1000baseT) */
#define IFM_ETH_RXPAUSE 0x00000200 /* receive PAUSE frames */
#define IFM_ETH_TXPAUSE 0x00000400 /* transmit PAUSE frames */
+#define IFM_ETH_XTYPE 0x00007800 /* extended media variants */
+#define IFM_ETH_XSHIFT 6 /* shift XTYPE next to TMASK */
/*
* Token ring
@@ -253,11 +300,6 @@ uint64_t ifmedia_baudrate(int);
#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
/*
- * CARP Common Address Redundancy Protocol
- */
-#define IFM_CARP 0x000000c0
-
-/*
* Shared media sub-types
*/
#define IFM_AUTO 0 /* Autoselect best media */
@@ -309,7 +351,10 @@ uint64_t ifmedia_baudrate(int);
* Macros to extract various bits of information from the media word.
*/
#define IFM_TYPE(x) ((x) & IFM_NMASK)
-#define IFM_SUBTYPE(x) ((x) & IFM_TMASK)
+#define IFM_SUBTYPE(x) \
+ (IFM_TYPE(x) == IFM_ETHER ? IFM_ETHER_SUBTYPE_GET(x) : ((x) & IFM_TMASK))
+#define IFM_TYPE_MATCH(x,y) \
+ (IFM_TYPE(x) == IFM_TYPE(y) && IFM_SUBTYPE(x) == IFM_SUBTYPE(y))
#define IFM_TYPE_OPTIONS(x) ((x) & IFM_OMASK)
#define IFM_INST(x) (((x) & IFM_IMASK) >> IFM_ISHIFT)
#define IFM_OPTIONS(x) ((x) & (IFM_OMASK | IFM_GMASK))
@@ -343,7 +388,6 @@ struct ifmedia_description {
{ IFM_FDDI, "FDDI" }, \
{ IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \
{ IFM_ATM, "ATM" }, \
- { IFM_CARP, "Common Address Redundancy Protocol" }, \
{ 0, NULL }, \
}
@@ -375,6 +419,34 @@ struct ifmedia_description {
{ IFM_40G_CR4, "40Gbase-CR4" }, \
{ IFM_40G_SR4, "40Gbase-SR4" }, \
{ IFM_40G_LR4, "40Gbase-LR4" }, \
+ { IFM_1000_KX, "1000Base-KX" }, \
+ { IFM_OTHER, "Other" }, \
+ { IFM_10G_KX4, "10GBase-KX4" }, \
+ { IFM_10G_KR, "10GBase-KR" }, \
+ { IFM_10G_CR1, "10GBase-CR1" }, \
+ { IFM_20G_KR2, "20GBase-KR2" }, \
+ { IFM_2500_KX, "2500Base-KX" }, \
+ { IFM_2500_T, "2500Base-T" }, \
+ { IFM_5000_T, "5000Base-T" }, \
+ { IFM_50G_PCIE, "PCIExpress-50G" }, \
+ { IFM_25G_PCIE, "PCIExpress-25G" }, \
+ { IFM_1000_SGMII, "1000Base-SGMII" }, \
+ { IFM_10G_SFI, "10GBase-SFI" }, \
+ { IFM_40G_XLPPI, "40GBase-XLPPI" }, \
+ { IFM_1000_CX_SGMII, "1000Base-CX-SGMII" }, \
+ { IFM_40G_KR4, "40GBase-KR4" }, \
+ { IFM_10G_ER, "10GBase-ER" }, \
+ { IFM_100G_CR4, "100GBase-CR4" }, \
+ { IFM_100G_SR4, "100GBase-SR4" }, \
+ { IFM_100G_KR4, "100GBase-KR4" }, \
+ { IFM_100G_LR4, "100GBase-LR4" }, \
+ { IFM_56G_R4, "56GBase-R4" }, \
+ { IFM_100_T, "100BaseT" }, \
+ { IFM_25G_CR, "25GBase-CR" }, \
+ { IFM_25G_KR, "25GBase-KR" }, \
+ { IFM_25G_SR, "25GBase-SR" }, \
+ { IFM_50G_CR2, "50GBase-CR2" }, \
+ { IFM_50G_KR2, "50GBase-KR2" }, \
{ 0, NULL }, \
}
@@ -676,6 +748,33 @@ struct ifmedia_baudrate {
{ IFM_ETHER | IFM_40G_CR4, IF_Gbps(40ULL) }, \
{ IFM_ETHER | IFM_40G_SR4, IF_Gbps(40ULL) }, \
{ IFM_ETHER | IFM_40G_LR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_1000_KX, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_10G_KX4, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_KR, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_10G_CR1, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_20G_KR2, IF_Gbps(20ULL) }, \
+ { IFM_ETHER | IFM_2500_KX, IF_Mbps(2500) }, \
+ { IFM_ETHER | IFM_2500_T, IF_Mbps(2500) }, \
+ { IFM_ETHER | IFM_5000_T, IF_Mbps(5000) }, \
+ { IFM_ETHER | IFM_50G_PCIE, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_25G_PCIE, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_1000_SGMII, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_10G_SFI, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_40G_XLPPI, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_1000_CX_SGMII, IF_Mbps(1000) }, \
+ { IFM_ETHER | IFM_40G_KR4, IF_Gbps(40ULL) }, \
+ { IFM_ETHER | IFM_10G_ER, IF_Gbps(10ULL) }, \
+ { IFM_ETHER | IFM_100G_CR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_SR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_KR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_100G_LR4, IF_Gbps(100ULL) }, \
+ { IFM_ETHER | IFM_56G_R4, IF_Gbps(56ULL) }, \
+ { IFM_ETHER | IFM_100_T, IF_Mbps(100ULL) }, \
+ { IFM_ETHER | IFM_25G_CR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_KR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_25G_SR, IF_Gbps(25ULL) }, \
+ { IFM_ETHER | IFM_50G_CR2, IF_Gbps(50ULL) }, \
+ { IFM_ETHER | IFM_50G_KR2, IF_Gbps(50ULL) }, \
\
{ IFM_TOKEN | IFM_TOK_STP4, IF_Mbps(4) }, \
{ IFM_TOKEN | IFM_TOK_STP16, IF_Mbps(16) }, \
@@ -730,8 +829,6 @@ struct ifmedia_status_description {
{ "no network", "active" } }, \
{ IFM_ATM, IFM_AVALID, IFM_ACTIVE, \
{ "no network", "active" } }, \
- { IFM_CARP, IFM_AVALID, IFM_ACTIVE, \
- { "backup", "master" } }, \
{ 0, 0, 0, \
{ NULL, NULL } } \
}
diff --git a/freebsd/sys/net/if_mib.c b/freebsd/sys/net/if_mib.c
index ec7a6984..d91c94ab 100644
--- a/freebsd/sys/net/if_mib.c
+++ b/freebsd/sys/net/if_mib.c
@@ -34,10 +34,12 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_mib.h>
#include <net/vnet.h>
@@ -68,9 +70,9 @@ SYSCTL_DECL(_net_link_generic);
static SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0,
"Variables global to all interfaces");
-SYSCTL_VNET_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD,
- &VNET_NAME(if_index), 0,
- "Number of configured interfaces");
+SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(if_index), 0,
+ "Number of configured interfaces");
static int
sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
@@ -100,37 +102,18 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
bzero(&ifmd, sizeof(ifmd));
strlcpy(ifmd.ifmd_name, ifp->if_xname, sizeof(ifmd.ifmd_name));
-#define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld
- COPY(pcount);
- COPY(data);
-#undef COPY
+ ifmd.ifmd_pcount = ifp->if_pcount;
+ if_data_copy(ifp, &ifmd.ifmd_data);
+
ifmd.ifmd_flags = ifp->if_flags | ifp->if_drv_flags;
ifmd.ifmd_snd_len = ifp->if_snd.ifq_len;
ifmd.ifmd_snd_maxlen = ifp->if_snd.ifq_maxlen;
- ifmd.ifmd_snd_drops = ifp->if_snd.ifq_drops;
+ ifmd.ifmd_snd_drops =
+ ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
error = SYSCTL_OUT(req, &ifmd, sizeof ifmd);
- if (error || !req->newptr)
- goto out;
-
- error = SYSCTL_IN(req, &ifmd, sizeof ifmd);
if (error)
goto out;
-
-#define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld
- DONTCOPY(type);
- DONTCOPY(physical);
- DONTCOPY(addrlen);
- DONTCOPY(hdrlen);
- DONTCOPY(mtu);
- DONTCOPY(metric);
- DONTCOPY(baudrate);
-#undef DONTCOPY
-#define COPY(fld) ifp->if_##fld = ifmd.ifmd_##fld
- COPY(data);
- ifp->if_snd.ifq_maxlen = ifmd.ifmd_snd_maxlen;
- ifp->if_snd.ifq_drops = ifmd.ifmd_snd_drops;
-#undef COPY
break;
case IFDATA_LINKSPECIFIC:
diff --git a/freebsd/sys/net/if_pflog.h b/freebsd/sys/net/if_pflog.h
new file mode 100644
index 00000000..0faeb7d4
--- /dev/null
+++ b/freebsd/sys/net/if_pflog.h
@@ -0,0 +1,66 @@
+/* $OpenBSD: if_pflog.h,v 1.13 2006/10/23 12:46:09 henning Exp $ */
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_PFLOG_H_
+#define _NET_IF_PFLOG_H_
+
+#define PFLOGIFS_MAX 16
+
+#define PFLOG_RULESET_NAME_SIZE 16
+
+struct pfloghdr {
+ u_int8_t length;
+ sa_family_t af;
+ u_int8_t action;
+ u_int8_t reason;
+ char ifname[IFNAMSIZ];
+ char ruleset[PFLOG_RULESET_NAME_SIZE];
+ u_int32_t rulenr;
+ u_int32_t subrulenr;
+ uid_t uid;
+ pid_t pid;
+ uid_t rule_uid;
+ pid_t rule_pid;
+ u_int8_t dir;
+ u_int8_t pad[3];
+};
+
+#define PFLOG_HDRLEN sizeof(struct pfloghdr)
+/* minus pad, also used as a signature */
+#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad)
+
+#ifdef _KERNEL
+struct pf_rule;
+struct pf_ruleset;
+struct pfi_kif;
+struct pf_pdesc;
+
+#define PFLOG_PACKET(i,a,b,c,d,e,f,g,h,di) do { \
+ if (pflog_packet_ptr != NULL) \
+ pflog_packet_ptr(i,a,b,c,d,e,f,g,h,di); \
+} while (0)
+#endif /* _KERNEL */
+#endif /* _NET_IF_PFLOG_H_ */
diff --git a/freebsd/sys/net/if_pfsync.h b/freebsd/sys/net/if_pfsync.h
new file mode 100644
index 00000000..5c4ba631
--- /dev/null
+++ b/freebsd/sys/net/if_pfsync.h
@@ -0,0 +1,265 @@
+/*-
+ * Copyright (c) 2001 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2008 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $
+ * $FreeBSD$
+ */
+
+
+#ifndef _NET_IF_PFSYNC_H_
+#define _NET_IF_PFSYNC_H_
+
+#define PFSYNC_VERSION 5
+#define PFSYNC_DFLTTL 255
+
+#define PFSYNC_ACT_CLR 0 /* clear all states */
+#define PFSYNC_ACT_INS 1 /* insert state */
+#define PFSYNC_ACT_INS_ACK 2 /* ack of insterted state */
+#define PFSYNC_ACT_UPD 3 /* update state */
+#define PFSYNC_ACT_UPD_C 4 /* "compressed" update state */
+#define PFSYNC_ACT_UPD_REQ 5 /* request "uncompressed" state */
+#define PFSYNC_ACT_DEL 6 /* delete state */
+#define PFSYNC_ACT_DEL_C 7 /* "compressed" delete state */
+#define PFSYNC_ACT_INS_F 8 /* insert fragment */
+#define PFSYNC_ACT_DEL_F 9 /* delete fragments */
+#define PFSYNC_ACT_BUS 10 /* bulk update status */
+#define PFSYNC_ACT_TDB 11 /* TDB replay counter update */
+#define PFSYNC_ACT_EOF 12 /* end of frame */
+#define PFSYNC_ACT_MAX 13
+
+/*
+ * A pfsync frame is built from a header followed by several sections which
+ * are all prefixed with their own subheaders. Frames must be terminated with
+ * an EOF subheader.
+ *
+ * | ... |
+ * | IP header |
+ * +============================+
+ * | pfsync_header |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | first action fields |
+ * | ... |
+ * +----------------------------+
+ * | pfsync_subheader |
+ * +----------------------------+
+ * | second action fields |
+ * | ... |
+ * +----------------------------+
+ * | EOF pfsync_subheader |
+ * +----------------------------+
+ * | HMAC |
+ * +============================+
+ */
+
+/*
+ * Frame header
+ */
+
+struct pfsync_header {
+ u_int8_t version;
+ u_int8_t _pad;
+ u_int16_t len;
+ u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH];
+} __packed;
+
+/*
+ * Frame region subheader
+ */
+
+struct pfsync_subheader {
+ u_int8_t action;
+ u_int8_t _pad;
+ u_int16_t count;
+} __packed;
+
+/*
+ * CLR
+ */
+
+struct pfsync_clr {
+ char ifname[IFNAMSIZ];
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * INS, UPD, DEL
+ */
+
+/* these use struct pfsync_state in pfvar.h */
+
+/*
+ * INS_ACK
+ */
+
+struct pfsync_ins_ack {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * UPD_C
+ */
+
+struct pfsync_upd_c {
+ u_int64_t id;
+ struct pfsync_state_peer src;
+ struct pfsync_state_peer dst;
+ u_int32_t creatorid;
+ u_int32_t expire;
+ u_int8_t timeout;
+ u_int8_t _pad[3];
+} __packed;
+
+/*
+ * UPD_REQ
+ */
+
+struct pfsync_upd_req {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * DEL_C
+ */
+
+struct pfsync_del_c {
+ u_int64_t id;
+ u_int32_t creatorid;
+} __packed;
+
+/*
+ * INS_F, DEL_F
+ */
+
+/* not implemented (yet) */
+
+/*
+ * BUS
+ */
+
+struct pfsync_bus {
+ u_int32_t creatorid;
+ u_int32_t endtime;
+ u_int8_t status;
+#define PFSYNC_BUS_START 1
+#define PFSYNC_BUS_END 2
+ u_int8_t _pad[3];
+} __packed;
+
+/*
+ * TDB
+ */
+
+struct pfsync_tdb {
+ u_int32_t spi;
+ union sockaddr_union dst;
+ u_int32_t rpl;
+ u_int64_t cur_bytes;
+ u_int8_t sproto;
+ u_int8_t updates;
+ u_int8_t _pad[2];
+} __packed;
+
+#define PFSYNC_HDRLEN sizeof(struct pfsync_header)
+
+struct pfsyncstats {
+ u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */
+ u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */
+ u_int64_t pfsyncs_badif; /* not the right interface */
+ u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */
+ u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */
+ u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */
+ u_int64_t pfsyncs_badact; /* bad action */
+ u_int64_t pfsyncs_badlen; /* data length does not match */
+ u_int64_t pfsyncs_badauth; /* bad authentication */
+ u_int64_t pfsyncs_stale; /* stale state */
+ u_int64_t pfsyncs_badval; /* bad values */
+ u_int64_t pfsyncs_badstate; /* insert/lookup failed */
+
+ u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */
+ u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */
+ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */
+ u_int64_t pfsyncs_oerrors; /* ip output error */
+
+ u_int64_t pfsyncs_iacts[PFSYNC_ACT_MAX];
+ u_int64_t pfsyncs_oacts[PFSYNC_ACT_MAX];
+};
+
+/*
+ * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
+ */
+struct pfsyncreq {
+ char pfsyncr_syncdev[IFNAMSIZ];
+ struct in_addr pfsyncr_syncpeer;
+ int pfsyncr_maxupdates;
+ int pfsyncr_defer;
+};
+
+#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq)
+#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq)
+
+#ifdef _KERNEL
+
+/*
+ * this shows where a pf state is with respect to the syncing.
+ */
+#define PFSYNC_S_INS 0x00
+#define PFSYNC_S_IACK 0x01
+#define PFSYNC_S_UPD 0x02
+#define PFSYNC_S_UPD_C 0x03
+#define PFSYNC_S_DEL 0x04
+#define PFSYNC_S_COUNT 0x05
+
+#define PFSYNC_S_DEFER 0xfe
+#define PFSYNC_S_NONE 0xff
+
+#define PFSYNC_SI_IOCTL 0x01
+#define PFSYNC_SI_CKSUM 0x02
+#define PFSYNC_SI_ACK 0x04
+
+#endif /* _KERNEL */
+
+#endif /* _NET_IF_PFSYNC_H_ */
diff --git a/freebsd/sys/net/if_sppp.h b/freebsd/sys/net/if_sppp.h
index 97f94b39..23a08e77 100644
--- a/freebsd/sys/net/if_sppp.h
+++ b/freebsd/sys/net/if_sppp.h
@@ -78,7 +78,7 @@ struct sauth {
/*
* Don't change the order of this. Ordering the phases this way allows
- * for a comparision of ``pp_phase >= PHASE_AUTHENTICATE'' in order to
+ * for a comparison of ``pp_phase >= PHASE_AUTHENTICATE'' in order to
* know whether LCP is up.
*/
enum ppp_phase {
diff --git a/freebsd/sys/net/if_spppfr.c b/freebsd/sys/net/if_spppfr.c
index 93bbaeba..d30509d5 100644
--- a/freebsd/sys/net/if_spppfr.c
+++ b/freebsd/sys/net/if_spppfr.c
@@ -27,10 +27,9 @@
#include <rtems/bsd/sys/param.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#endif
#ifdef NetBSD1_3
@@ -47,7 +46,7 @@
#include <sys/sockio.h>
#include <sys/socket.h>
#include <sys/syslog.h>
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3
+#if defined(__FreeBSD__)
#include <sys/random.h>
#endif
#include <sys/malloc.h>
@@ -60,6 +59,7 @@
#endif
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -86,11 +86,6 @@
# include <net/ethertypes.h>
#endif
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#include <net/if_sppp.h>
/*
@@ -151,7 +146,7 @@ struct arp_req {
unsigned short ptarget2;
} __packed;
-#if defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD_version < 501113
+#if defined(__FreeBSD__) && __FreeBSD_version < 501113
#define SPP_FMT "%s%d: "
#define SPP_ARGS(ifp) (ifp)->if_name, (ifp)->if_unit
#else
@@ -257,9 +252,9 @@ bad: m_freem (m);
switch (proto) {
default:
- ++ifp->if_noproto;
-drop: ++ifp->if_ierrors;
- ++ifp->if_iqdrops;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
+drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
m_freem (m);
return;
#ifdef INET
@@ -267,16 +262,6 @@ drop: ++ifp->if_ierrors;
isr = NETISR_IP;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef NETATALK
- case ETHERTYPE_AT:
- isr = NETISR_ATALK;
- break;
-#endif
}
if (! (ifp->if_flags & IFF_UP))
@@ -306,7 +291,7 @@ struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m,
/* Prepend the space for Frame Relay header. */
hlen = (family == AF_INET) ? 4 : 10;
- M_PREPEND (m, hlen, M_DONTWAIT);
+ M_PREPEND (m, hlen, M_NOWAIT);
if (! m)
return 0;
h = mtod (m, u_char*);
@@ -346,21 +331,11 @@ struct mbuf *sppp_fr_header (struct sppp *sp, struct mbuf *m,
h[3] = FR_IP;
return m;
#endif
-#ifdef IPX
- case AF_IPX:
- type = ETHERTYPE_IPX;
- break;
-#endif
#ifdef NS
case AF_NS:
type = 0x8137;
break;
#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- type = ETHERTYPE_AT;
- break;
-#endif
}
h[3] = FR_PADDING;
h[4] = FR_SNAP;
@@ -383,7 +358,7 @@ void sppp_fr_keepalive (struct sppp *sp)
unsigned char *h, *p;
struct mbuf *m;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.rcvif = 0;
@@ -421,7 +396,7 @@ void sppp_fr_keepalive (struct sppp *sp)
(u_char) sp->pp_rseq[IDX_LCP]);
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -503,7 +478,7 @@ static void sppp_fr_arp (struct sppp *sp, struct arp_req *req,
(unsigned char) his_ip_address);
/* Send the Inverse ARP reply. */
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = 10 + sizeof (*reply);
@@ -535,7 +510,7 @@ static void sppp_fr_arp (struct sppp *sp, struct arp_req *req,
reply->ptarget2 = htonl (his_ip_address) >> 16;
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
diff --git a/freebsd/sys/net/if_spppsubr.c b/freebsd/sys/net/if_spppsubr.c
index fa6a7c1b..e7a62277 100644
--- a/freebsd/sys/net/if_spppsubr.c
+++ b/freebsd/sys/net/if_spppsubr.c
@@ -27,11 +27,12 @@
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/module.h>
+#include <sys/rmlock.h>
#include <sys/sockio.h>
#include <sys/socket.h>
#include <sys/syslog.h>
@@ -42,6 +43,7 @@
#include <sys/md5.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -66,11 +68,6 @@
#include <netinet/if_ether.h>
-#ifdef IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
#include <net/if_sppp.h>
#define IOCTL_CMD_T u_long
@@ -264,7 +261,7 @@ static const u_short interactive_ports[8] = {
int debug = ifp->if_flags & IFF_DEBUG
static int sppp_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro);
+ const struct sockaddr *dst, struct route *ro);
static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2);
static void sppp_cisco_input(struct sppp *sp, struct mbuf *m);
@@ -525,7 +522,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
if (ifp->if_flags & IFF_UP)
/* Count received bytes, add FCS and one flag */
- ifp->if_ibytes += m->m_pkthdr.len + 3;
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len + 3);
if (m->m_pkthdr.len <= PPP_HEADER_LEN) {
/* Too small packet, drop it. */
@@ -537,8 +534,8 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
m_freem (m);
SPPP_UNLOCK(sp);
drop2:
- ++ifp->if_ierrors;
- ++ifp->if_iqdrops;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
return;
}
@@ -577,7 +574,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
sppp_cp_send (sp, PPP_LCP, PROTO_REJ,
++sp->pp_seq[IDX_LCP], m->m_pkthdr.len + 2,
&h->protocol);
- ++ifp->if_noproto;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto drop;
case PPP_LCP:
sppp_cp_input(&lcp, sp, m);
@@ -631,7 +628,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
* enough leading space in the existing mbuf).
*/
m_adj(m, vjlen);
- M_PREPEND(m, hlen, M_DONTWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL) {
SPPP_UNLOCK(sp);
goto drop2;
@@ -673,14 +670,6 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
do_account++;
break;
#endif
-#ifdef IPX
- case PPP_IPX:
- /* IPX IPXCP not implemented yet */
- if (sp->pp_phase == PHASE_NETWORK)
- isr = NETISR_IPX;
- do_account++;
- break;
-#endif
}
break;
case CISCO_MULTICAST:
@@ -697,7 +686,7 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
}
switch (ntohs (h->protocol)) {
default:
- ++ifp->if_noproto;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
goto invalid;
case CISCO_KEEPALIVE:
sppp_cisco_input (sp, m);
@@ -716,12 +705,6 @@ sppp_input(struct ifnet *ifp, struct mbuf *m)
do_account++;
break;
#endif
-#ifdef IPX
- case ETHERTYPE_IPX:
- isr = NETISR_IPX;
- do_account++;
- break;
-#endif
}
break;
default: /* Invalid PPP packet. */
@@ -787,19 +770,18 @@ sppp_ifstart(struct ifnet *ifp)
* Enqueue transmit packet.
*/
static int
-sppp_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
struct sppp *sp = IFP2SP(ifp);
struct ppp_header *h;
struct ifqueue *ifq = NULL;
- int s, error, rv = 0;
+ int error, rv = 0;
#ifdef INET
int ipproto = PPP_IP;
#endif
int debug = ifp->if_flags & IFF_DEBUG;
- s = splimp();
SPPP_LOCK(sp);
if (!(ifp->if_flags & IFF_UP) ||
@@ -810,7 +792,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
#endif
m_freem (m);
SPPP_UNLOCK(sp);
- splx (s);
return (ENETDOWN);
}
@@ -834,9 +815,7 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
* to start LCP for it.
*/
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- splx(s);
lcp.Open(sp);
- s = splimp();
}
#ifdef INET
@@ -860,7 +839,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
{
m_freem(m);
SPPP_UNLOCK(sp);
- splx(s);
if(ip->ip_p == IPPROTO_TCP)
return(EADDRNOTAVAIL);
else
@@ -905,7 +883,6 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
default:
m_freem(m);
SPPP_UNLOCK(sp);
- splx(s);
return (EINVAL);
}
}
@@ -928,14 +905,13 @@ sppp_output(struct ifnet *ifp, struct mbuf *m,
/*
* Prepend general data packet PPP header. For now, IP only.
*/
- M_PREPEND (m, PPP_HEADER_LEN, M_DONTWAIT);
+ M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT);
if (! m) {
nobufs: if (debug)
log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n",
SPP_ARGS(ifp));
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
SPPP_UNLOCK(sp);
- splx (s);
return (ENOBUFS);
}
/*
@@ -992,17 +968,10 @@ nobufs: if (debug)
}
break;
#endif
-#ifdef IPX
- case AF_IPX: /* Novell IPX Protocol */
- h->protocol = htons (sp->pp_mode == IFF_CISCO ?
- ETHERTYPE_IPX : PPP_IPX);
- break;
-#endif
default:
m_freem (m);
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
SPPP_UNLOCK(sp);
- splx (s);
return (EAFNOSUPPORT);
}
@@ -1016,13 +985,11 @@ out:
else
IFQ_HANDOFF_ADJ(ifp, m, 3, error);
if (error) {
- ++ifp->if_oerrors;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
SPPP_UNLOCK(sp);
- splx (s);
return (rv? rv: ENOBUFS);
}
SPPP_UNLOCK(sp);
- splx (s);
/*
* Unlike in sppp_input(), we can always bump the timestamp
* here since sppp_output() is only called on behalf of
@@ -1042,7 +1009,7 @@ sppp_attach(struct ifnet *ifp)
mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE);
/* Initialize keepalive handler. */
- callout_init(&sp->keepalive_callout, CALLOUT_MPSAFE);
+ callout_init(&sp->keepalive_callout, 1);
callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
(void *)sp);
@@ -1074,7 +1041,7 @@ sppp_attach(struct ifnet *ifp)
#ifdef INET6
sp->confflags |= CONF_ENABLE_IPV6;
#endif
- callout_init(&sp->ifstart_callout, CALLOUT_MPSAFE);
+ callout_init(&sp->ifstart_callout, 1);
sp->if_start = ifp->if_start;
ifp->if_start = sppp_ifstart;
sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK);
@@ -1139,14 +1106,12 @@ int
sppp_isempty(struct ifnet *ifp)
{
struct sppp *sp = IFP2SP(ifp);
- int empty, s;
+ int empty;
- s = splimp();
SPPP_LOCK(sp);
empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head &&
!SP2IFP(sp)->if_snd.ifq_head;
SPPP_UNLOCK(sp);
- splx(s);
return (empty);
}
@@ -1158,9 +1123,7 @@ sppp_dequeue(struct ifnet *ifp)
{
struct sppp *sp = IFP2SP(ifp);
struct mbuf *m;
- int s;
- s = splimp();
SPPP_LOCK(sp);
/*
* Process only the control protocol queue until we have at
@@ -1177,7 +1140,6 @@ sppp_dequeue(struct ifnet *ifp)
IF_DEQUEUE (&SP2IFP(sp)->if_snd, m);
}
SPPP_UNLOCK(sp);
- splx(s);
return m;
}
@@ -1189,9 +1151,7 @@ sppp_pick(struct ifnet *ifp)
{
struct sppp *sp = IFP2SP(ifp);
struct mbuf *m;
- int s;
- s = splimp ();
SPPP_LOCK(sp);
m = sp->pp_cpq.ifq_head;
@@ -1202,7 +1162,6 @@ sppp_pick(struct ifnet *ifp)
if ((m = sp->pp_fastq.ifq_head) == NULL)
m = SP2IFP(sp)->if_snd.ifq_head;
SPPP_UNLOCK(sp);
- splx (s);
return (m);
}
@@ -1214,14 +1173,12 @@ sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
{
struct ifreq *ifr = (struct ifreq*) data;
struct sppp *sp = IFP2SP(ifp);
- int s, rv, going_up, going_down, newmode;
+ int rv, going_up, going_down, newmode;
- s = splimp();
SPPP_LOCK(sp);
rv = 0;
switch (cmd) {
case SIOCAIFADDR:
- case SIOCSIFDSTADDR:
break;
case SIOCSIFADDR:
@@ -1322,7 +1279,6 @@ sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
rv = ENOTTY;
}
SPPP_UNLOCK(sp);
- splx(s);
return rv;
}
@@ -1414,7 +1370,7 @@ sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
getmicrouptime(&tv);
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN;
@@ -1441,7 +1397,7 @@ sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
(u_long)ch->par2, (u_int)ch->rel, (u_int)ch->time0, (u_int)ch->time1);
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -1462,7 +1418,7 @@ sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN)
len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
@@ -1490,7 +1446,7 @@ sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
log(-1, ">\n");
}
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -1532,7 +1488,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
log(-1, SPP_FMT "%s invalid conf-req length %d\n",
SPP_ARGS(ifp), cp->name,
len);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
/* handle states where RCR doesn't get a SCA/SCN */
@@ -1588,7 +1544,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case CONF_ACK:
@@ -1597,7 +1553,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
SPP_ARGS(ifp), cp->name,
h->ident, sp->confid[cp->protoidx]);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
switch (sp->state[cp->protoidx]) {
@@ -1632,7 +1588,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case CONF_NAK:
@@ -1642,7 +1598,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
SPP_ARGS(ifp), cp->name,
h->ident, sp->confid[cp->protoidx]);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
if (h->type == CONF_NAK)
@@ -1682,7 +1638,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
@@ -1715,7 +1671,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case TERM_ACK:
@@ -1746,7 +1702,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case CODE_REJ:
@@ -1773,7 +1729,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
case PROTO_REJ:
@@ -1832,7 +1788,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name,
sppp_cp_type_name(h->type),
sppp_state_name(sp->state[cp->protoidx]));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
break;
}
@@ -1848,7 +1804,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
if (debug)
log(-1, SPP_FMT "lcp echo req but lcp closed\n",
SPP_ARGS(ifp));
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
if (len < 8) {
@@ -1882,7 +1838,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
if (cp->proto != PPP_LCP)
goto illegal;
if (h->ident != sp->lcp.echoid) {
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
break;
}
if (len < 8) {
@@ -1907,7 +1863,7 @@ sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
SPP_ARGS(ifp), cp->name, h->type);
sppp_cp_send(sp, cp->proto, CODE_REJ,
++sp->pp_seq[cp->protoidx], m->m_pkthdr.len, h);
- ++ifp->if_ierrors;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
}
@@ -2072,9 +2028,7 @@ static void
sppp_to_event(const struct cp *cp, struct sppp *sp)
{
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n",
@@ -2124,7 +2078,6 @@ sppp_to_event(const struct cp *cp, struct sppp *sp)
}
SPPP_UNLOCK(sp);
- splx(s);
}
/*
@@ -2196,7 +2149,7 @@ sppp_lcp_init(struct sppp *sp)
sp->lcp.max_terminate = 2;
sp->lcp.max_configure = 10;
sp->lcp.max_failure = 10;
- callout_init(&sp->ch[IDX_LCP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_LCP], 1);
}
static void
@@ -2887,7 +2840,7 @@ sppp_ipcp_init(struct sppp *sp)
sp->fail_counter[IDX_IPCP] = 0;
sp->pp_seq[IDX_IPCP] = 0;
sp->pp_rseq[IDX_IPCP] = 0;
- callout_init(&sp->ch[IDX_IPCP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_IPCP], 1);
}
static void
@@ -3011,7 +2964,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
* since our algorithm always uses the
* original option to NAK it with new values,
* things would become more complicated. In
- * pratice, the only commonly implemented IP
+ * practice, the only commonly implemented IP
* compression option is VJ anyway, so the
* difference is negligible.
*/
@@ -3446,7 +3399,7 @@ sppp_ipv6cp_init(struct sppp *sp)
sp->fail_counter[IDX_IPV6CP] = 0;
sp->pp_seq[IDX_IPV6CP] = 0;
sp->pp_rseq[IDX_IPV6CP] = 0;
- callout_init(&sp->ch[IDX_IPV6CP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_IPV6CP], 1);
}
static void
@@ -4027,7 +3980,7 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
{
STDDCL;
struct lcp_header *h;
- int len, x;
+ int len;
u_char *value, *name, digest[AUTHKEYLEN], dsize;
int value_len, name_len;
MD5_CTX ctx;
@@ -4104,7 +4057,6 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
}
log(-1, "\n");
}
- x = splimp();
SPPP_LOCK(sp);
sp->pp_flags &= ~PP_NEEDAUTH;
if (sp->myauth.proto == PPP_CHAP &&
@@ -4116,11 +4068,9 @@ sppp_chap_input(struct sppp *sp, struct mbuf *m)
* to network phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
break;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
break;
@@ -4254,7 +4204,7 @@ sppp_chap_init(struct sppp *sp)
sp->fail_counter[IDX_CHAP] = 0;
sp->pp_seq[IDX_CHAP] = 0;
sp->pp_rseq[IDX_CHAP] = 0;
- callout_init(&sp->ch[IDX_CHAP], CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_CHAP], 1);
}
static void
@@ -4282,9 +4232,7 @@ sppp_chap_TO(void *cookie)
{
struct sppp *sp = (struct sppp *)cookie;
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n",
@@ -4315,14 +4263,13 @@ sppp_chap_TO(void *cookie)
}
SPPP_UNLOCK(sp);
- splx(s);
}
static void
sppp_chap_tlu(struct sppp *sp)
{
STDDCL;
- int i, x;
+ int i;
i = 0;
sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
@@ -4350,10 +4297,9 @@ sppp_chap_tlu(struct sppp *sp)
if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0)
log(-1, "next re-challenge in %d seconds\n", i);
else
- log(-1, "re-challenging supressed\n");
+ log(-1, "re-challenging suppressed\n");
}
- x = splimp();
SPPP_LOCK(sp);
/* indicate to LCP that we need to be closed down */
sp->lcp.protos |= (1 << IDX_CHAP);
@@ -4365,11 +4311,9 @@ sppp_chap_tlu(struct sppp *sp)
* phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
return;
}
SPPP_UNLOCK(sp);
- splx(x);
/*
* If we are already in phase network, we are done here. This
@@ -4438,7 +4382,7 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
{
STDDCL;
struct lcp_header *h;
- int len, x;
+ int len;
u_char *name, *passwd, mlen;
int name_len, passwd_len;
@@ -4525,7 +4469,6 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
}
log(-1, "\n");
}
- x = splimp();
SPPP_LOCK(sp);
sp->pp_flags &= ~PP_NEEDAUTH;
if (sp->myauth.proto == PPP_PAP &&
@@ -4537,11 +4480,9 @@ sppp_pap_input(struct sppp *sp, struct mbuf *m)
* to network phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
break;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
break;
@@ -4585,8 +4526,8 @@ sppp_pap_init(struct sppp *sp)
sp->fail_counter[IDX_PAP] = 0;
sp->pp_seq[IDX_PAP] = 0;
sp->pp_rseq[IDX_PAP] = 0;
- callout_init(&sp->ch[IDX_PAP], CALLOUT_MPSAFE);
- callout_init(&sp->pap_my_to_ch, CALLOUT_MPSAFE);
+ callout_init(&sp->ch[IDX_PAP], 1);
+ callout_init(&sp->pap_my_to_ch, 1);
}
static void
@@ -4622,9 +4563,7 @@ sppp_pap_TO(void *cookie)
{
struct sppp *sp = (struct sppp *)cookie;
STDDCL;
- int s;
- s = splimp();
SPPP_LOCK(sp);
if (debug)
log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n",
@@ -4650,7 +4589,6 @@ sppp_pap_TO(void *cookie)
}
SPPP_UNLOCK(sp);
- splx(s);
}
/*
@@ -4677,7 +4615,6 @@ static void
sppp_pap_tlu(struct sppp *sp)
{
STDDCL;
- int x;
sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
@@ -4685,7 +4622,6 @@ sppp_pap_tlu(struct sppp *sp)
log(LOG_DEBUG, SPP_FMT "%s tlu\n",
SPP_ARGS(ifp), pap.name);
- x = splimp();
SPPP_LOCK(sp);
/* indicate to LCP that we need to be closed down */
sp->lcp.protos |= (1 << IDX_PAP);
@@ -4697,11 +4633,9 @@ sppp_pap_tlu(struct sppp *sp)
* phase.
*/
SPPP_UNLOCK(sp);
- splx(x);
return;
}
SPPP_UNLOCK(sp);
- splx(x);
sppp_phase_network(sp);
}
@@ -4766,7 +4700,7 @@ sppp_auth_send(const struct cp *cp, struct sppp *sp,
const char *msg;
va_list ap;
- MGETHDR (m, M_DONTWAIT, MT_DATA);
+ MGETHDR (m, M_NOWAIT, MT_DATA);
if (! m)
return;
m->m_pkthdr.rcvif = 0;
@@ -4810,7 +4744,7 @@ sppp_auth_send(const struct cp *cp, struct sppp *sp,
log(-1, ">\n");
}
if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
}
/*
@@ -4823,7 +4757,7 @@ sppp_qflush(struct ifqueue *ifq)
n = ifq->ifq_head;
while ((m = n)) {
- n = m->m_act;
+ n = m->m_nextpkt;
m_freem (m);
}
ifq->ifq_head = 0;
@@ -4839,9 +4773,7 @@ sppp_keepalive(void *dummy)
{
struct sppp *sp = (struct sppp*)dummy;
struct ifnet *ifp = SP2IFP(sp);
- int s;
- s = splimp();
SPPP_LOCK(sp);
/* Keepalive mode disabled or channel down? */
if (! (sp->pp_flags & PP_KEEPALIVE) ||
@@ -4884,7 +4816,6 @@ sppp_keepalive(void *dummy)
}
out:
SPPP_UNLOCK(sp);
- splx(s);
callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
(void *)sp);
}
@@ -4906,7 +4837,7 @@ sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
* Pick the first AF_INET address from the list,
* aliases don't make any sense on a p2p link anyway.
*/
- si = 0;
+ si = NULL;
if_addr_rlock(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET) {
@@ -4934,7 +4865,7 @@ sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
#ifdef INET
/*
- * Set my IP address. Must be called at splimp.
+ * Set my IP address.
*/
static void
sppp_set_ip_addr(struct sppp *sp, u_long src)
@@ -4948,7 +4879,7 @@ sppp_set_ip_addr(struct sppp *sp, u_long src)
* Pick the first AF_INET address from the list,
* aliases don't make any sense on a p2p link anyway.
*/
- si = 0;
+ si = NULL;
if_addr_rlock(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family == AF_INET) {
@@ -5051,7 +4982,7 @@ sppp_gen_ip6_addr(struct sppp *sp, struct in6_addr *addr)
}
/*
- * Set my IPv6 address. Must be called at splimp.
+ * Set my IPv6 address.
*/
static void
sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src)
@@ -5126,14 +5057,15 @@ sppp_params(struct sppp *sp, u_long cmd, void *data)
struct spppreq *spr;
int rv = 0;
- if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == 0)
+ if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == NULL)
return (EAGAIN);
/*
* ifr->ifr_data is supposed to point to a struct spppreq.
* Check the cmd word first before attempting to fetch all the
* data.
*/
- if ((subcmd = fuword(ifr->ifr_data)) == -1) {
+ rv = fueword(ifr->ifr_data, &subcmd);
+ if (rv == -1) {
rv = EFAULT;
goto quit;
}
diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c
index e88fd34d..7c1b7075 100644
--- a/freebsd/sys/net/if_stf.c
+++ b/freebsd/sys/net/if_stf.c
@@ -76,9 +76,6 @@
* Note that there is no way to be 100% secure.
*/
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/socket.h>
@@ -86,24 +83,27 @@
#include <sys/mbuf.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/module.h>
#include <sys/protosw.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/rmlock.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/route.h>
#include <net/netisr.h>
#include <net/if_types.h>
-#include <net/if_stf.h>
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
@@ -125,16 +125,10 @@
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface");
-static int stf_route_cache = 1;
-SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW,
- &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output");
-
static int stf_permit_rfc1918 = 0;
-TUNABLE_INT("net.link.stf.permit_rfc1918", &stf_permit_rfc1918);
-SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
+SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN,
&stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
-#define STFNAME "stf"
#define STFUNIT 0
#define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002)
@@ -143,36 +137,34 @@ SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN,
* XXX: Return a pointer with 16-bit aligned. Don't cast it to
* struct in_addr *; use bcopy() instead.
*/
-#define GET_V4(x) ((caddr_t)(&(x)->s6_addr16[1]))
+#define GET_V4(x) (&(x)->s6_addr16[1])
struct stf_softc {
struct ifnet *sc_ifp;
- union {
- struct route __sc_ro4;
- struct route_in6 __sc_ro6; /* just for safety */
- } __sc_ro46;
-#define sc_ro __sc_ro46.__sc_ro4
struct mtx sc_ro_mtx;
u_int sc_fibnum;
const struct encaptab *encap_cookie;
};
#define STF2IFP(sc) ((sc)->sc_ifp)
+static const char stfname[] = "stf";
+
/*
* Note that mutable fields in the softc are not currently locked.
* We do lock sc_ro in stf_output though.
*/
-static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
+static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
static const int ip_stf_ttl = 40;
extern struct domain inetdomain;
-struct protosw in_stf_protosw = {
+static int in_stf_input(struct mbuf **, int *, int);
+static struct protosw in_stf_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IPV6,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = in_stf_input,
- .pr_output = (pr_output_t *)rip_output,
+ .pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
@@ -181,22 +173,20 @@ static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
static int stfmodevent(module_t, int, void *);
static int stf_encapcheck(const struct mbuf *, int, int, void *);
-static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
-static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+static int stf_getsrcifa6(struct ifnet *, struct in6_addr *, struct in6_addr *);
+static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
static int isrfc1918addr(struct in_addr *);
static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
struct ifnet *);
static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
struct ifnet *);
-static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int stf_ioctl(struct ifnet *, u_long, caddr_t);
static int stf_clone_match(struct if_clone *, const char *);
static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int stf_clone_destroy(struct if_clone *, struct ifnet *);
-struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
- NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
+static struct if_clone *stf_cloner;
static int
stf_clone_match(struct if_clone *ifc, const char *name)
@@ -247,7 +237,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
* we don't conform to the default naming convention for interfaces.
*/
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = stfname;
ifp->if_dunit = IF_DUNIT_NONE;
mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF);
@@ -289,18 +279,16 @@ stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
}
static int
-stfmodevent(mod, type, data)
- module_t mod;
- int type;
- void *data;
+stfmodevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
- if_clone_attach(&stf_cloner);
+ stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match,
+ stf_clone_create, stf_clone_destroy);
break;
case MOD_UNLOAD:
- if_clone_detach(&stf_cloner);
+ if_clone_detach(stf_cloner);
break;
default:
return (EOPNOTSUPP);
@@ -318,16 +306,12 @@ static moduledata_t stf_mod = {
DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
static int
-stf_encapcheck(m, off, proto, arg)
- const struct mbuf *m;
- int off;
- int proto;
- void *arg;
+stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
struct ip ip;
- struct in6_ifaddr *ia6;
struct stf_softc *sc;
struct in_addr a, b, mask;
+ struct in6_addr addr6, mask6;
sc = (struct stf_softc *)arg;
if (sc == NULL)
@@ -349,20 +333,16 @@ stf_encapcheck(m, off, proto, arg)
if (ip.ip_v != 4)
return 0;
- ia6 = stf_getsrcifa6(STF2IFP(sc));
- if (ia6 == NULL)
- return 0;
+ if (stf_getsrcifa6(STF2IFP(sc), &addr6, &mask6) != 0)
+ return (0);
/*
* check if IPv4 dst matches the IPv4 address derived from the
* local 6to4 address.
* success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
*/
- if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst,
- sizeof(ip.ip_dst)) != 0) {
- ifa_free(&ia6->ia_ifa);
+ if (bcmp(GET_V4(&addr6), &ip.ip_dst, sizeof(ip.ip_dst)) != 0)
return 0;
- }
/*
* check if IPv4 src matches the IPv4 address derived from the
@@ -371,9 +351,8 @@ stf_encapcheck(m, off, proto, arg)
* fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
*/
bzero(&a, sizeof(a));
- bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a));
- bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask));
- ifa_free(&ia6->ia_ifa);
+ bcopy(GET_V4(&addr6), &a, sizeof(a));
+ bcopy(GET_V4(&mask6), &mask, sizeof(mask));
a.s_addr &= mask.s_addr;
b = ip.ip_src;
b.s_addr &= mask.s_addr;
@@ -384,12 +363,12 @@ stf_encapcheck(m, off, proto, arg)
return 32;
}
-static struct in6_ifaddr *
-stf_getsrcifa6(ifp)
- struct ifnet *ifp;
+static int
+stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask)
{
struct ifaddr *ia;
struct in_ifaddr *ia4;
+ struct in6_ifaddr *ia6;
struct sockaddr_in6 *sin6;
struct in_addr in;
@@ -408,33 +387,30 @@ stf_getsrcifa6(ifp)
if (ia4 == NULL)
continue;
- ifa_ref(ia);
+ ia6 = (struct in6_ifaddr *)ia;
+
+ *addr = sin6->sin6_addr;
+ *mask = ia6->ia_prefixmask.sin6_addr;
if_addr_runlock(ifp);
- return (struct in6_ifaddr *)ia;
+ return (0);
}
if_addr_runlock(ifp);
- return NULL;
+ return (ENOENT);
}
static int
-stf_output(ifp, m, dst, ro)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr *dst;
- struct route *ro;
+stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
{
struct stf_softc *sc;
- struct sockaddr_in6 *dst6;
- struct route *cached_route;
+ const struct sockaddr_in6 *dst6;
struct in_addr in4;
- caddr_t ptr;
- struct sockaddr_in *dst4;
+ const void *ptr;
u_int8_t tos;
struct ip *ip;
struct ip6_hdr *ip6;
- struct in6_ifaddr *ia6;
- u_int32_t af;
+ struct in6_addr addr6, mask6;
int error;
#ifdef MAC
@@ -446,12 +422,12 @@ stf_output(ifp, m, dst, ro)
#endif
sc = ifp->if_softc;
- dst6 = (struct sockaddr_in6 *)dst;
+ dst6 = (const struct sockaddr_in6 *)dst;
/* just in case */
if ((ifp->if_flags & IFF_UP) == 0) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENETDOWN;
}
@@ -460,18 +436,16 @@ stf_output(ifp, m, dst, ro)
* we shouldn't generate output. Without this check, we'll end up
* using wrong IPv4 source.
*/
- ia6 = stf_getsrcifa6(ifp);
- if (ia6 == NULL) {
+ if (stf_getsrcifa6(ifp, &addr6, &mask6) != 0) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENETDOWN;
}
if (m->m_len < sizeof(*ip6)) {
m = m_pullup(m, sizeof(*ip6));
if (!m) {
- ifa_free(&ia6->ia_ifa);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENOBUFS;
}
}
@@ -479,15 +453,6 @@ stf_output(ifp, m, dst, ro)
tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
/*
- * BPF writes need to be handled specially.
- * This is a null operation, nothing here checks dst->sa_family.
- */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- /*
* Pickup the right outer dst addr from the list of candidates.
* ip6_dst has priority as it may be able to give us shorter IPv4 hops.
*/
@@ -497,9 +462,8 @@ stf_output(ifp, m, dst, ro)
else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
ptr = GET_V4(&dst6->sin6_addr);
else {
- ifa_free(&ia6->ia_ifa);
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENETUNREACH;
}
bcopy(ptr, &in4, sizeof(in4));
@@ -512,78 +476,38 @@ stf_output(ifp, m, dst, ro)
* will only read from the mbuf (i.e., it won't
* try to free it or keep a pointer a to it).
*/
- af = AF_INET6;
+ u_int af = AF_INET6;
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
}
- M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+ M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
if (m == NULL) {
- ifa_free(&ia6->ia_ifa);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return ENOBUFS;
}
ip = mtod(m, struct ip *);
bzero(ip, sizeof(*ip));
- bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr),
- &ip->ip_src, sizeof(ip->ip_src));
- ifa_free(&ia6->ia_ifa);
+ bcopy(GET_V4(&addr6), &ip->ip_src, sizeof(ip->ip_src));
bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
ip->ip_p = IPPROTO_IPV6;
ip->ip_ttl = ip_stf_ttl;
- ip->ip_len = m->m_pkthdr.len; /*host order*/
+ ip->ip_len = htons(m->m_pkthdr.len);
if (ifp->if_flags & IFF_LINK1)
ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
else
ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
- if (!stf_route_cache) {
- cached_route = NULL;
- goto sendit;
- }
-
- /*
- * Do we have a cached route?
- */
- mtx_lock(&(sc)->sc_ro_mtx);
- dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
- if (dst4->sin_family != AF_INET ||
- bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
- /* cache route doesn't match */
- dst4->sin_family = AF_INET;
- dst4->sin_len = sizeof(struct sockaddr_in);
- bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr));
- if (sc->sc_ro.ro_rt) {
- RTFREE(sc->sc_ro.ro_rt);
- sc->sc_ro.ro_rt = NULL;
- }
- }
-
- if (sc->sc_ro.ro_rt == NULL) {
- rtalloc_fib(&sc->sc_ro, sc->sc_fibnum);
- if (sc->sc_ro.ro_rt == NULL) {
- m_freem(m);
- mtx_unlock(&(sc)->sc_ro_mtx);
- ifp->if_oerrors++;
- return ENETUNREACH;
- }
- }
- cached_route = &sc->sc_ro;
-
-sendit:
M_SETFIB(m, sc->sc_fibnum);
- ifp->if_opackets++;
- error = ip_output(m, NULL, cached_route, 0, NULL, NULL);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ error = ip_output(m, NULL, NULL, 0, NULL, NULL);
- if (cached_route != NULL)
- mtx_unlock(&(sc)->sc_ro_mtx);
return error;
}
static int
-isrfc1918addr(in)
- struct in_addr *in;
+isrfc1918addr(struct in_addr *in)
{
/*
* returns 1 if private address range:
@@ -599,11 +523,9 @@ isrfc1918addr(in)
}
static int
-stf_checkaddr4(sc, in, inifp)
- struct stf_softc *sc;
- struct in_addr *in;
- struct ifnet *inifp; /* incoming interface */
+stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia4;
/*
@@ -627,54 +549,35 @@ stf_checkaddr4(sc, in, inifp)
/*
* reject packets with broadcast
*/
- IN_IFADDR_RLOCK();
- for (ia4 = TAILQ_FIRST(&V_in_ifaddrhead);
- ia4;
- ia4 = TAILQ_NEXT(ia4, ia_link))
- {
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
+ TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
continue;
if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return -1;
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
/*
* perform ingress filter
*/
if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) {
- struct sockaddr_in sin;
- struct rtentry *rt;
-
- bzero(&sin, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(struct sockaddr_in);
- sin.sin_addr = *in;
- rt = rtalloc1_fib((struct sockaddr *)&sin, 0,
- 0UL, sc->sc_fibnum);
- if (!rt || rt->rt_ifp != inifp) {
-#if 0
- log(LOG_WARNING, "%s: packet from 0x%x dropped "
- "due to ingress filter\n", if_name(STF2IFP(sc)),
- (u_int32_t)ntohl(sin.sin_addr.s_addr));
-#endif
- if (rt)
- RTFREE_LOCKED(rt);
- return -1;
- }
- RTFREE_LOCKED(rt);
+ struct nhop4_basic nh4;
+
+ if (fib4_lookup_nh_basic(sc->sc_fibnum, *in, 0, 0, &nh4) != 0)
+ return (-1);
+
+ if (nh4.nh_ifp != inifp)
+ return (-1);
}
return 0;
}
static int
-stf_checkaddr6(sc, in6, inifp)
- struct stf_softc *sc;
- struct in6_addr *in6;
- struct ifnet *inifp; /* incoming interface */
+stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp)
{
/*
* check 6to4 addresses
@@ -697,23 +600,23 @@ stf_checkaddr6(sc, in6, inifp)
return 0;
}
-void
-in_stf_input(m, off)
- struct mbuf *m;
- int off;
+static int
+in_stf_input(struct mbuf **mp, int *offp, int proto)
{
- int proto;
struct stf_softc *sc;
struct ip *ip;
struct ip6_hdr *ip6;
+ struct mbuf *m;
u_int8_t otos, itos;
struct ifnet *ifp;
+ int off;
- proto = mtod(m, struct ip *)->ip_p;
+ m = *mp;
+ off = *offp;
if (proto != IPPROTO_IPV6) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
@@ -722,7 +625,7 @@ in_stf_input(m, off)
if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
ifp = STF2IFP(sc);
@@ -738,7 +641,7 @@ in_stf_input(m, off)
if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
otos = ip->ip_tos;
@@ -747,7 +650,7 @@ in_stf_input(m, off)
if (m->m_len < sizeof(*ip6)) {
m = m_pullup(m, sizeof(*ip6));
if (!m)
- return;
+ return (IPPROTO_DONE);
}
ip6 = mtod(m, struct ip6_hdr *);
@@ -758,7 +661,7 @@ in_stf_input(m, off)
if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
@@ -789,28 +692,15 @@ in_stf_input(m, off)
* See net/if_gif.c for possible issues with packet processing
* reorder due to extra queueing.
*/
- ifp->if_ipackets++;
- ifp->if_ibytes += m->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
M_SETFIB(m, ifp->if_fib);
netisr_dispatch(NETISR_IPV6, m);
-}
-
-/* ARGSUSED */
-static void
-stf_rtrequest(cmd, rt, info)
- int cmd;
- struct rtentry *rt;
- struct rt_addrinfo *info;
-{
- RT_LOCK_ASSERT(rt);
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+ return (IPPROTO_DONE);
}
static int
-stf_ioctl(ifp, cmd, data)
- struct ifnet *ifp;
- u_long cmd;
- caddr_t data;
+stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct ifaddr *ifa;
struct ifreq *ifr;
@@ -837,7 +727,6 @@ stf_ioctl(ifp, cmd, data)
break;
}
- ifa->ifa_rtrequest = stf_rtrequest;
ifp->if_flags |= IFF_UP;
break;
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
index 599905e8..24ae0092 100644
--- a/freebsd/sys/net/if_tap.c
+++ b/freebsd/sys/net/if_tap.c
@@ -65,6 +65,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -81,8 +82,8 @@
#define CDEV_NAME "tap"
#define TAPDEBUG if (tapdebug) printf
-#define TAP "tap"
-#define VMNET "vmnet"
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
#define TAPMAXUNIT 0x7fff
#define VMNET_DEV_MASK CLONE_FLAG0
@@ -101,11 +102,10 @@ static void tapifinit(void *);
static int tap_clone_create(struct if_clone *, int, caddr_t);
static void tap_clone_destroy(struct ifnet *);
+static struct if_clone *tap_cloner;
static int vmnet_clone_create(struct if_clone *, int, caddr_t);
static void vmnet_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(tap, 0);
-IFC_SIMPLE_DECLARE(vmnet, 0);
+static struct if_clone *vmnet_cloner;
/* character device */
static d_open_t tapopen;
@@ -137,7 +137,7 @@ static struct filterops tap_write_filterops = {
static struct cdevsw tap_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_flags = D_NEEDMINOR,
.d_open = tapopen,
.d_close = tapclose,
.d_read = tapread,
@@ -172,12 +172,10 @@ SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
"Allow user to open /dev/tap (based on node permissions)");
SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
"Bring interface up when /dev/tap is opened");
-SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0,
- "Enably legacy devfs interface creation");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
+ "Enable legacy devfs interface creation");
SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
-TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone);
-
DEV_MODULE(if_tap, tapmodevent, NULL);
static int
@@ -185,18 +183,12 @@ tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct cdev *dev;
int i;
- int extra;
- if (strcmp(ifc->ifc_name, VMNET) == 0)
- extra = VMNET_DEV_MASK;
- else
- extra = 0;
-
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra);
+ /* Find any existing device, or allocate new unit number. */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
if (i) {
- dev = make_dev(&tap_cdevsw, unit | extra,
- UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit);
+ dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
+ "%s%d", tapname, unit);
}
tapcreate(dev);
@@ -207,7 +199,18 @@ tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
static int
vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
- return tap_clone_create(ifc, unit, params);
+ struct cdev *dev;
+ int i;
+
+ /* Find any existing device, or allocate new unit number. */
+ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
+ if (i) {
+ dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
+ GID_WHEEL, 0600, "%s%d", vmnetname, unit);
+ }
+
+ tapcreate(dev);
+ return (0);
}
static void
@@ -218,9 +221,10 @@ tap_destroy(struct tap_softc *tp)
CURVNET_SET(ifp->if_vnet);
destroy_dev(tp->tap_dev);
seldrain(&tp->tap_rsel);
+ knlist_clear(&tp->tap_rsel.si_note, 0);
knlist_destroy(&tp->tap_rsel.si_note);
ether_ifdetach(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
mtx_destroy(&tp->tap_mtx);
free(tp, M_TAP);
@@ -272,8 +276,10 @@ tapmodevent(module_t mod, int type, void *data)
mtx_destroy(&tapmtx);
return (ENOMEM);
}
- if_clone_attach(&tap_cloner);
- if_clone_attach(&vmnet_cloner);
+ tap_cloner = if_clone_simple(tapname, tap_clone_create,
+ tap_clone_destroy, 0);
+ vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
+ vmnet_clone_destroy, 0);
return (0);
case MOD_UNLOAD:
@@ -295,8 +301,8 @@ tapmodevent(module_t mod, int type, void *data)
mtx_unlock(&tapmtx);
EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
- if_clone_detach(&tap_cloner);
- if_clone_detach(&vmnet_cloner);
+ if_clone_detach(tap_cloner);
+ if_clone_detach(vmnet_cloner);
drain_dev_clone_events();
mtx_lock(&tapmtx);
@@ -350,13 +356,13 @@ tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **d
extra = 0;
/* We're interested in only tap/vmnet devices. */
- if (strcmp(name, TAP) == 0) {
+ if (strcmp(name, tapname) == 0) {
unit = -1;
- } else if (strcmp(name, VMNET) == 0) {
+ } else if (strcmp(name, vmnetname) == 0) {
unit = -1;
extra = VMNET_DEV_MASK;
- } else if (dev_stdclone(name, NULL, TAP, &unit) != 1) {
- if (dev_stdclone(name, NULL, VMNET, &unit) != 1) {
+ } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
+ if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
return;
} else {
extra = VMNET_DEV_MASK;
@@ -402,11 +408,9 @@ tapcreate(struct cdev *dev)
unsigned short macaddr_hi;
uint32_t macaddr_mid;
int unit;
- char *name = NULL;
+ const char *name = NULL;
u_char eaddr[6];
- dev->si_flags &= ~SI_CHEAPCLONE;
-
/* allocate driver storage and create device */
tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
@@ -418,10 +422,10 @@ tapcreate(struct cdev *dev)
/* select device: tap or vmnet */
if (unit & VMNET_DEV_MASK) {
- name = VMNET;
+ name = vmnetname;
tp->tap_flags |= TAP_VMNET;
} else
- name = TAP;
+ name = tapname;
unit &= TAPMAXUNIT;
@@ -534,11 +538,11 @@ tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
IF_DRAIN(&ifp->if_snd);
/*
- * do not bring the interface down, and do not anything with
- * interface, if we are in VMnet mode. just close the device.
+ * Do not bring the interface down, and do not anything with
+ * interface, if we are in VMnet mode. Just close the device.
*/
-
- if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
+ if (((tp->tap_flags & TAP_VMNET) == 0) &&
+ (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) {
mtx_unlock(&tp->tap_mtx);
if_down(ifp);
mtx_lock(&tp->tap_mtx);
@@ -636,12 +640,12 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCGIFSTATUS:
ifs = (struct ifstat *)data;
- dummy = strlen(ifs->ascii);
mtx_lock(&tp->tap_mtx);
- if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
- snprintf(ifs->ascii + dummy,
- sizeof(ifs->ascii) - dummy,
+ if (tp->tap_pid != 0)
+ snprintf(ifs->ascii, sizeof(ifs->ascii),
"\tOpened by PID %d\n", tp->tap_pid);
+ else
+ ifs->ascii[0] = '\0';
mtx_unlock(&tp->tap_mtx);
break;
@@ -684,7 +688,7 @@ tapifstart(struct ifnet *ifp)
IF_DEQUEUE(&ifp->if_snd, m);
if (m != NULL) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
} else
break;
}
@@ -709,7 +713,7 @@ tapifstart(struct ifnet *ifp)
selwakeuppri(&tp->tap_rsel, PZERO+1);
KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
- ifp->if_opackets ++; /* obytes are counted in ether_output */
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
}
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
@@ -829,8 +833,7 @@ tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td
mtx_unlock(&tp->tap_mtx);
break;
- case OSIOCGIFADDR: /* get MAC address of the remote side */
- case SIOCGIFADDR:
+ case SIOCGIFADDR: /* get MAC address of the remote side */
mtx_lock(&tp->tap_mtx);
bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
mtx_unlock(&tp->tap_mtx);
@@ -948,9 +951,9 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
return (EIO);
}
- if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, ETHER_ALIGN,
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
M_PKTHDR)) == NULL) {
- ifp->if_ierrors ++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return (ENOBUFS);
}
@@ -977,7 +980,7 @@ tapwrite(struct cdev *dev, struct uio *uio, int flag)
CURVNET_SET(ifp->if_vnet);
(*ifp->if_input)(ifp, m);
CURVNET_RESTORE();
- ifp->if_ipackets ++; /* ibytes are counted in parent */
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */
return (0);
} /* tapwrite */
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
index 556a4860..edb30d04 100644
--- a/freebsd/sys/net/if_tun.c
+++ b/freebsd/sys/net/if_tun.c
@@ -18,10 +18,8 @@
* $FreeBSD$
*/
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/sys/param.h>
#include <sys/priv.h>
@@ -47,6 +45,7 @@
#include <sys/random.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_types.h>
#include <net/netisr.h>
@@ -101,7 +100,6 @@ struct tun_softc {
#define TUN2IFP(sc) ((sc)->tun_ifp)
#define TUNDEBUG if (tundebug) if_printf
-#define TUNNAME "tun"
/*
* All mutable global variables in if_tun are locked using tunmtx, with
@@ -109,7 +107,8 @@ struct tun_softc {
* which is static after setup.
*/
static struct mtx tunmtx;
-static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface");
+static const char tunname[] = "tun";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
static int tundebug = 0;
static int tundclone = 1;
static struct clonedevs *tunclones;
@@ -119,25 +118,22 @@ SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
"IP tunnel software network interface.");
-SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0,
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
"Enable legacy devfs interface creation.");
-TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone);
-
static void tunclone(void *arg, struct ucred *cred, char *name,
int namelen, struct cdev **dev);
static void tuncreate(const char *name, struct cdev *dev);
static int tunifioctl(struct ifnet *, u_long, caddr_t);
static void tuninit(struct ifnet *);
static int tunmodevent(module_t, int, void *);
-static int tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *ro);
+static int tunoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *ro);
static void tunstart(struct ifnet *);
static int tun_clone_create(struct if_clone *, int, caddr_t);
static void tun_clone_destroy(struct ifnet *);
-
-IFC_SIMPLE_DECLARE(tun, 0);
+static struct if_clone *tun_cloner;
static d_open_t tunopen;
static d_close_t tunclose;
@@ -167,7 +163,7 @@ static struct filterops tun_write_filterops = {
static struct cdevsw tun_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_PSEUDO | D_NEEDMINOR,
+ .d_flags = D_NEEDMINOR,
.d_open = tunopen,
.d_close = tunclose,
.d_read = tunread,
@@ -175,7 +171,7 @@ static struct cdevsw tun_cdevsw = {
.d_ioctl = tunioctl,
.d_poll = tunpoll,
.d_kqfilter = tunkqfilter,
- .d_name = TUNNAME,
+ .d_name = tunname,
};
static int
@@ -189,9 +185,9 @@ tun_clone_create(struct if_clone *ifc, int unit, caddr_t params)
if (i) {
/* No preexisting struct cdev *, create one */
dev = make_dev(&tun_cdevsw, unit,
- UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit);
+ UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
}
- tuncreate(ifc->ifc_name, dev);
+ tuncreate(tunname, dev);
return (0);
}
@@ -213,9 +209,9 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
return;
- if (strcmp(name, TUNNAME) == 0) {
+ if (strcmp(name, tunname) == 0) {
u = -1;
- } else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1)
+ } else if (dev_stdclone(name, NULL, tunname, &u) != 1)
return; /* Don't recognise the name */
if (u != -1 && u > IF_MAXUNIT)
return; /* Unit number too high */
@@ -248,7 +244,6 @@ tun_destroy(struct tun_softc *tp)
{
struct cdev *dev;
- /* Unlocked read. */
mtx_lock(&tp->tun_mtx);
if ((tp->tun_flags & TUN_OPEN) != 0)
cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
@@ -262,6 +257,7 @@ tun_destroy(struct tun_softc *tp)
if_free(TUN2IFP(tp));
destroy_dev(dev);
seldrain(&tp->tun_rsel);
+ knlist_clear(&tp->tun_rsel.si_note, 0);
knlist_destroy(&tp->tun_rsel.si_note);
mtx_destroy(&tp->tun_mtx);
cv_destroy(&tp->tun_cv);
@@ -293,10 +289,11 @@ tunmodevent(module_t mod, int type, void *data)
tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
if (tag == NULL)
return (ENOMEM);
- if_clone_attach(&tun_cloner);
+ tun_cloner = if_clone_simple(tunname, tun_clone_create,
+ tun_clone_destroy, 0);
break;
case MOD_UNLOAD:
- if_clone_detach(&tun_cloner);
+ if_clone_detach(tun_cloner);
EVENTHANDLER_DEREGISTER(dev_clone, tag);
drain_dev_clone_events();
@@ -364,8 +361,6 @@ tuncreate(const char *name, struct cdev *dev)
struct tun_softc *sc;
struct ifnet *ifp;
- dev->si_flags &= ~SI_CHEAPCLONE;
-
sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
cv_init(&sc->tun_cv, "tun_condvar");
@@ -412,7 +407,7 @@ tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
*/
tp = dev->si_drv1;
if (!tp) {
- tuncreate(TUNNAME, dev);
+ tuncreate(tunname, dev);
tp = dev->si_drv1;
}
@@ -557,18 +552,16 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifs = (struct ifstat *)data;
mtx_lock(&tp->tun_mtx);
if (tp->tun_pid)
- sprintf(ifs->ascii + strlen(ifs->ascii),
+ snprintf(ifs->ascii, sizeof(ifs->ascii),
"\tOpened by PID %d\n", tp->tun_pid);
+ else
+ ifs->ascii[0] = '\0';
mtx_unlock(&tp->tun_mtx);
break;
case SIOCSIFADDR:
tuninit(ifp);
TUNDEBUG(ifp, "address set\n");
break;
- case SIOCSIFDSTADDR:
- tuninit(ifp);
- TUNDEBUG(ifp, "destination address set\n");
- break;
case SIOCSIFMTU:
ifp->if_mtu = ifr->ifr_mtu;
TUNDEBUG(ifp, "mtu set\n");
@@ -587,7 +580,7 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
* tunoutput - queue packets from higher level ready to put out.
*/
static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
struct route *ro)
{
struct tun_softc *tp = ifp->if_softc;
@@ -621,25 +614,23 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
}
/* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
+ if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
- if (bpf_peers_present(ifp->if_bpf)) {
+ else
af = dst->sa_family;
+
+ if (bpf_peers_present(ifp->if_bpf))
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
- }
/* prepend sockaddr? this may abort if the mbuf allocation fails */
if (cached_tun_flags & TUN_LMODE) {
/* allocate space for sockaddr */
- M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
+ M_PREPEND(m0, dst->sa_len, M_NOWAIT);
/* if allocation failed drop packet */
if (m0 == NULL) {
- ifp->if_iqdrops++;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENOBUFS);
} else {
bcopy(dst, m0->m_data, dst->sa_len);
@@ -648,18 +639,18 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
if (cached_tun_flags & TUN_IFHEAD) {
/* Prepend the address family */
- M_PREPEND(m0, 4, M_DONTWAIT);
+ M_PREPEND(m0, 4, M_NOWAIT);
/* if allocation failed drop packet */
if (m0 == NULL) {
- ifp->if_iqdrops++;
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENOBUFS);
} else
- *(u_int32_t *)m0->m_data = htonl(dst->sa_family);
+ *(u_int32_t *)m0->m_data = htonl(af);
} else {
#ifdef INET
- if (dst->sa_family != AF_INET)
+ if (af != AF_INET)
#endif
{
m_freem(m0);
@@ -670,7 +661,7 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
error = (ifp->if_transmit)(ifp, m0);
if (error)
return (ENOBUFS);
- ifp->if_opackets++;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
return (0);
}
@@ -871,7 +862,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
struct tun_softc *tp = dev->si_drv1;
struct ifnet *ifp = TUN2IFP(tp);
struct mbuf *m;
- uint32_t family;
+ uint32_t family, mru;
int isr;
TUNDEBUG(ifp, "tunwrite\n");
@@ -883,13 +874,16 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
if (uio->uio_resid == 0)
return (0);
- if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
+ mru = TUNMRU;
+ if (tp->tun_flags & TUN_IFHEAD)
+ mru += sizeof(family);
+ if (uio->uio_resid < 0 || uio->uio_resid > mru) {
TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
return (EIO);
}
- if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) {
- ifp->if_ierrors++;
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return (ENOBUFS);
}
@@ -925,25 +919,13 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag)
isr = NETISR_IPV6;
break;
#endif
-#ifdef IPX
- case AF_IPX:
- isr = NETISR_IPX;
- break;
-#endif
-#ifdef NETATALK
- case AF_APPLETALK:
- isr = NETISR_ATALK2;
- break;
-#endif
default:
m_freem(m);
return (EAFNOSUPPORT);
}
- /* First chunk of an mbuf contains good junk */
- if (harvest.point_to_point)
- random_harvest(m, 16, 3, 0, RANDOM_NET);
- ifp->if_ibytes += m->m_pkthdr.len;
- ifp->if_ipackets++;
+ random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
CURVNET_SET(ifp->if_vnet);
M_SETFIB(m, ifp->if_fib);
netisr_dispatch(isr, m);
diff --git a/freebsd/sys/net/if_tun.h b/freebsd/sys/net/if_tun.h
index 382881cb..1ea375f7 100644
--- a/freebsd/sys/net/if_tun.h
+++ b/freebsd/sys/net/if_tun.h
@@ -25,11 +25,11 @@
#define TUNMTU 1500
/* Maximum receive packet size (hard limit) */
-#define TUNMRU 16384
+#define TUNMRU 65535
struct tuninfo {
int baudrate; /* linespeed */
- short mtu; /* maximum transmission unit */
+ unsigned short mtu; /* maximum transmission unit */
u_char type; /* ethernet, tokenring, etc. */
u_char dummy; /* place holder */
};
diff --git a/freebsd/sys/net/if_types.h b/freebsd/sys/net/if_types.h
index c2effacd..92e101ac 100644
--- a/freebsd/sys/net/if_types.h
+++ b/freebsd/sys/net/if_types.h
@@ -42,214 +42,232 @@
* http://www.iana.org/assignments/smi-numbers
*/
-#define IFT_OTHER 0x1 /* none of the following */
-#define IFT_1822 0x2 /* old-style arpanet imp */
-#define IFT_HDH1822 0x3 /* HDH arpanet imp */
-#define IFT_X25DDN 0x4 /* x25 to imp */
-#define IFT_X25 0x5 /* PDN X25 interface (RFC877) */
-#define IFT_ETHER 0x6 /* Ethernet CSMA/CD */
-#define IFT_ISO88023 0x7 /* CMSA/CD */
-#define IFT_ISO88024 0x8 /* Token Bus */
-#define IFT_ISO88025 0x9 /* Token Ring */
-#define IFT_ISO88026 0xa /* MAN */
-#define IFT_STARLAN 0xb
-#define IFT_P10 0xc /* Proteon 10MBit ring */
-#define IFT_P80 0xd /* Proteon 80MBit ring */
-#define IFT_HY 0xe /* Hyperchannel */
-#define IFT_FDDI 0xf
-#define IFT_LAPB 0x10
-#define IFT_SDLC 0x11
-#define IFT_T1 0x12
-#define IFT_CEPT 0x13 /* E1 - european T1 */
-#define IFT_ISDNBASIC 0x14
-#define IFT_ISDNPRIMARY 0x15
-#define IFT_PTPSERIAL 0x16 /* Proprietary PTP serial */
-#define IFT_PPP 0x17 /* RFC 1331 */
-#define IFT_LOOP 0x18 /* loopback */
-#define IFT_EON 0x19 /* ISO over IP */
-#define IFT_XETHER 0x1a /* obsolete 3MB experimental ethernet */
-#define IFT_NSIP 0x1b /* XNS over IP */
-#define IFT_SLIP 0x1c /* IP over generic TTY */
-#define IFT_ULTRA 0x1d /* Ultra Technologies */
-#define IFT_DS3 0x1e /* Generic T3 */
-#define IFT_SIP 0x1f /* SMDS */
-#define IFT_FRELAY 0x20 /* Frame Relay DTE only */
-#define IFT_RS232 0x21
-#define IFT_PARA 0x22 /* parallel-port */
-#define IFT_ARCNET 0x23
-#define IFT_ARCNETPLUS 0x24
-#define IFT_ATM 0x25 /* ATM cells */
-#define IFT_MIOX25 0x26
-#define IFT_SONET 0x27 /* SONET or SDH */
-#define IFT_X25PLE 0x28
-#define IFT_ISO88022LLC 0x29
-#define IFT_LOCALTALK 0x2a
-#define IFT_SMDSDXI 0x2b
-#define IFT_FRELAYDCE 0x2c /* Frame Relay DCE */
-#define IFT_V35 0x2d
-#define IFT_HSSI 0x2e
-#define IFT_HIPPI 0x2f
-#define IFT_MODEM 0x30 /* Generic Modem */
-#define IFT_AAL5 0x31 /* AAL5 over ATM */
-#define IFT_SONETPATH 0x32
-#define IFT_SONETVT 0x33
-#define IFT_SMDSICIP 0x34 /* SMDS InterCarrier Interface */
-#define IFT_PROPVIRTUAL 0x35 /* Proprietary Virtual/internal */
-#define IFT_PROPMUX 0x36 /* Proprietary Multiplexing */
-#define IFT_IEEE80212 0x37 /* 100BaseVG */
-#define IFT_FIBRECHANNEL 0x38 /* Fibre Channel */
-#define IFT_HIPPIINTERFACE 0x39 /* HIPPI interfaces */
-#define IFT_FRAMERELAYINTERCONNECT 0x3a /* Obsolete, use either 0x20 or 0x2c */
-#define IFT_AFLANE8023 0x3b /* ATM Emulated LAN for 802.3 */
-#define IFT_AFLANE8025 0x3c /* ATM Emulated LAN for 802.5 */
-#define IFT_CCTEMUL 0x3d /* ATM Emulated circuit */
-#define IFT_FASTETHER 0x3e /* Fast Ethernet (100BaseT) */
-#define IFT_ISDN 0x3f /* ISDN and X.25 */
-#define IFT_V11 0x40 /* CCITT V.11/X.21 */
-#define IFT_V36 0x41 /* CCITT V.36 */
-#define IFT_G703AT64K 0x42 /* CCITT G703 at 64Kbps */
-#define IFT_G703AT2MB 0x43 /* Obsolete see DS1-MIB */
-#define IFT_QLLC 0x44 /* SNA QLLC */
-#define IFT_FASTETHERFX 0x45 /* Fast Ethernet (100BaseFX) */
-#define IFT_CHANNEL 0x46 /* channel */
-#define IFT_IEEE80211 0x47 /* radio spread spectrum */
-#define IFT_IBM370PARCHAN 0x48 /* IBM System 360/370 OEMI Channel */
-#define IFT_ESCON 0x49 /* IBM Enterprise Systems Connection */
-#define IFT_DLSW 0x4a /* Data Link Switching */
-#define IFT_ISDNS 0x4b /* ISDN S/T interface */
-#define IFT_ISDNU 0x4c /* ISDN U interface */
-#define IFT_LAPD 0x4d /* Link Access Protocol D */
-#define IFT_IPSWITCH 0x4e /* IP Switching Objects */
-#define IFT_RSRB 0x4f /* Remote Source Route Bridging */
-#define IFT_ATMLOGICAL 0x50 /* ATM Logical Port */
-#define IFT_DS0 0x51 /* Digital Signal Level 0 */
-#define IFT_DS0BUNDLE 0x52 /* group of ds0s on the same ds1 */
-#define IFT_BSC 0x53 /* Bisynchronous Protocol */
-#define IFT_ASYNC 0x54 /* Asynchronous Protocol */
-#define IFT_CNR 0x55 /* Combat Net Radio */
-#define IFT_ISO88025DTR 0x56 /* ISO 802.5r DTR */
-#define IFT_EPLRS 0x57 /* Ext Pos Loc Report Sys */
-#define IFT_ARAP 0x58 /* Appletalk Remote Access Protocol */
-#define IFT_PROPCNLS 0x59 /* Proprietary Connectionless Protocol*/
-#define IFT_HOSTPAD 0x5a /* CCITT-ITU X.29 PAD Protocol */
-#define IFT_TERMPAD 0x5b /* CCITT-ITU X.3 PAD Facility */
-#define IFT_FRAMERELAYMPI 0x5c /* Multiproto Interconnect over FR */
-#define IFT_X213 0x5d /* CCITT-ITU X213 */
-#define IFT_ADSL 0x5e /* Asymmetric Digital Subscriber Loop */
-#define IFT_RADSL 0x5f /* Rate-Adapt. Digital Subscriber Loop*/
-#define IFT_SDSL 0x60 /* Symmetric Digital Subscriber Loop */
-#define IFT_VDSL 0x61 /* Very H-Speed Digital Subscrib. Loop*/
-#define IFT_ISO88025CRFPINT 0x62 /* ISO 802.5 CRFP */
-#define IFT_MYRINET 0x63 /* Myricom Myrinet */
-#define IFT_VOICEEM 0x64 /* voice recEive and transMit */
-#define IFT_VOICEFXO 0x65 /* voice Foreign Exchange Office */
-#define IFT_VOICEFXS 0x66 /* voice Foreign Exchange Station */
-#define IFT_VOICEENCAP 0x67 /* voice encapsulation */
-#define IFT_VOICEOVERIP 0x68 /* voice over IP encapsulation */
-#define IFT_ATMDXI 0x69 /* ATM DXI */
-#define IFT_ATMFUNI 0x6a /* ATM FUNI */
-#define IFT_ATMIMA 0x6b /* ATM IMA */
-#define IFT_PPPMULTILINKBUNDLE 0x6c /* PPP Multilink Bundle */
-#define IFT_IPOVERCDLC 0x6d /* IBM ipOverCdlc */
-#define IFT_IPOVERCLAW 0x6e /* IBM Common Link Access to Workstn */
-#define IFT_STACKTOSTACK 0x6f /* IBM stackToStack */
-#define IFT_VIRTUALIPADDRESS 0x70 /* IBM VIPA */
-#define IFT_MPC 0x71 /* IBM multi-protocol channel support */
-#define IFT_IPOVERATM 0x72 /* IBM ipOverAtm */
-#define IFT_ISO88025FIBER 0x73 /* ISO 802.5j Fiber Token Ring */
-#define IFT_TDLC 0x74 /* IBM twinaxial data link control */
-#define IFT_GIGABITETHERNET 0x75 /* Gigabit Ethernet */
-#define IFT_HDLC 0x76 /* HDLC */
-#define IFT_LAPF 0x77 /* LAP F */
-#define IFT_V37 0x78 /* V.37 */
-#define IFT_X25MLP 0x79 /* Multi-Link Protocol */
-#define IFT_X25HUNTGROUP 0x7a /* X25 Hunt Group */
-#define IFT_TRANSPHDLC 0x7b /* Transp HDLC */
-#define IFT_INTERLEAVE 0x7c /* Interleave channel */
-#define IFT_FAST 0x7d /* Fast channel */
-#define IFT_IP 0x7e /* IP (for APPN HPR in IP networks) */
-#define IFT_DOCSCABLEMACLAYER 0x7f /* CATV Mac Layer */
-#define IFT_DOCSCABLEDOWNSTREAM 0x80 /* CATV Downstream interface */
-#define IFT_DOCSCABLEUPSTREAM 0x81 /* CATV Upstream interface */
-#define IFT_A12MPPSWITCH 0x82 /* Avalon Parallel Processor */
-#define IFT_TUNNEL 0x83 /* Encapsulation interface */
-#define IFT_COFFEE 0x84 /* coffee pot */
-#define IFT_CES 0x85 /* Circiut Emulation Service */
-#define IFT_ATMSUBINTERFACE 0x86 /* (x) ATM Sub Interface */
-#define IFT_L2VLAN 0x87 /* Layer 2 Virtual LAN using 802.1Q */
-#define IFT_L3IPVLAN 0x88 /* Layer 3 Virtual LAN - IP Protocol */
-#define IFT_L3IPXVLAN 0x89 /* Layer 3 Virtual LAN - IPX Prot. */
-#define IFT_DIGITALPOWERLINE 0x8a /* IP over Power Lines */
-#define IFT_MEDIAMAILOVERIP 0x8b /* (xxx) Multimedia Mail over IP */
-#define IFT_DTM 0x8c /* Dynamic synchronous Transfer Mode */
-#define IFT_DCN 0x8d /* Data Communications Network */
-#define IFT_IPFORWARD 0x8e /* IP Forwarding Interface */
-#define IFT_MSDSL 0x8f /* Multi-rate Symmetric DSL */
-#define IFT_IEEE1394 0x90 /* IEEE1394 High Performance SerialBus*/
-#define IFT_IFGSN 0x91 /* HIPPI-6400 */
-#define IFT_DVBRCCMACLAYER 0x92 /* DVB-RCC MAC Layer */
-#define IFT_DVBRCCDOWNSTREAM 0x93 /* DVB-RCC Downstream Channel */
-#define IFT_DVBRCCUPSTREAM 0x94 /* DVB-RCC Upstream Channel */
-#define IFT_ATMVIRTUAL 0x95 /* ATM Virtual Interface */
-#define IFT_MPLSTUNNEL 0x96 /* MPLS Tunnel Virtual Interface */
-#define IFT_SRP 0x97 /* Spatial Reuse Protocol */
-#define IFT_VOICEOVERATM 0x98 /* Voice over ATM */
-#define IFT_VOICEOVERFRAMERELAY 0x99 /* Voice Over Frame Relay */
-#define IFT_IDSL 0x9a /* Digital Subscriber Loop over ISDN */
-#define IFT_COMPOSITELINK 0x9b /* Avici Composite Link Interface */
-#define IFT_SS7SIGLINK 0x9c /* SS7 Signaling Link */
-#define IFT_PROPWIRELESSP2P 0x9d /* Prop. P2P wireless interface */
-#define IFT_FRFORWARD 0x9e /* Frame forward Interface */
-#define IFT_RFC1483 0x9f /* Multiprotocol over ATM AAL5 */
-#define IFT_USB 0xa0 /* USB Interface */
-#define IFT_IEEE8023ADLAG 0xa1 /* IEEE 802.3ad Link Aggregate*/
-#define IFT_BGPPOLICYACCOUNTING 0xa2 /* BGP Policy Accounting */
-#define IFT_FRF16MFRBUNDLE 0xa3 /* FRF.16 Multilik Frame Relay*/
-#define IFT_H323GATEKEEPER 0xa4 /* H323 Gatekeeper */
-#define IFT_H323PROXY 0xa5 /* H323 Voice and Video Proxy */
-#define IFT_MPLS 0xa6 /* MPLS */
-#define IFT_MFSIGLINK 0xa7 /* Multi-frequency signaling link */
-#define IFT_HDSL2 0xa8 /* High Bit-Rate DSL, 2nd gen. */
-#define IFT_SHDSL 0xa9 /* Multirate HDSL2 */
-#define IFT_DS1FDL 0xaa /* Facility Data Link (4Kbps) on a DS1*/
-#define IFT_POS 0xab /* Packet over SONET/SDH Interface */
-#define IFT_DVBASILN 0xac /* DVB-ASI Input */
-#define IFT_DVBASIOUT 0xad /* DVB-ASI Output */
-#define IFT_PLC 0xae /* Power Line Communications */
-#define IFT_NFAS 0xaf /* Non-Facility Associated Signaling */
-#define IFT_TR008 0xb0 /* TROO8 */
-#define IFT_GR303RDT 0xb1 /* Remote Digital Terminal */
-#define IFT_GR303IDT 0xb2 /* Integrated Digital Terminal */
-#define IFT_ISUP 0xb3 /* ISUP */
-#define IFT_PROPDOCSWIRELESSMACLAYER 0xb4 /* prop/Wireless MAC Layer */
-#define IFT_PROPDOCSWIRELESSDOWNSTREAM 0xb5 /* prop/Wireless Downstream */
-#define IFT_PROPDOCSWIRELESSUPSTREAM 0xb6 /* prop/Wireless Upstream */
-#define IFT_HIPERLAN2 0xb7 /* HIPERLAN Type 2 Radio Interface */
-#define IFT_PROPBWAP2MP 0xb8 /* PropBroadbandWirelessAccess P2MP*/
-#define IFT_SONETOVERHEADCHANNEL 0xb9 /* SONET Overhead Channel */
-#define IFT_DIGITALWRAPPEROVERHEADCHANNEL 0xba /* Digital Wrapper Overhead */
-#define IFT_AAL2 0xbb /* ATM adaptation layer 2 */
-#define IFT_RADIOMAC 0xbc /* MAC layer over radio links */
-#define IFT_ATMRADIO 0xbd /* ATM over radio links */
-#define IFT_IMT 0xbe /* Inter-Machine Trunks */
-#define IFT_MVL 0xbf /* Multiple Virtual Lines DSL */
-#define IFT_REACHDSL 0xc0 /* Long Reach DSL */
-#define IFT_FRDLCIENDPT 0xc1 /* Frame Relay DLCI End Point */
-#define IFT_ATMVCIENDPT 0xc2 /* ATM VCI End Point */
-#define IFT_OPTICALCHANNEL 0xc3 /* Optical Channel */
-#define IFT_OPTICALTRANSPORT 0xc4 /* Optical Transport */
-#define IFT_INFINIBAND 0xc7 /* Infiniband */
-#define IFT_BRIDGE 0xd1 /* Transparent bridge interface */
+typedef enum {
+ IFT_OTHER = 0x1, /* none of the following */
+ IFT_1822 = 0x2, /* old-style arpanet imp */
+ IFT_HDH1822 = 0x3, /* HDH arpanet imp */
+ IFT_X25DDN = 0x4, /* x25 to imp */
+ IFT_X25 = 0x5, /* PDN X25 interface (RFC877) */
+ IFT_ETHER = 0x6, /* Ethernet CSMA/CD */
+ IFT_ISO88023 = 0x7, /* CMSA/CD */
+ IFT_ISO88024 = 0x8, /* Token Bus */
+ IFT_ISO88025 = 0x9, /* Token Ring */
+ IFT_ISO88026 = 0xa, /* MAN */
+ IFT_STARLAN = 0xb,
+ IFT_P10 = 0xc, /* Proteon 10MBit ring */
+ IFT_P80 = 0xd, /* Proteon 80MBit ring */
+ IFT_HY = 0xe, /* Hyperchannel */
+ IFT_FDDI = 0xf,
+ IFT_LAPB = 0x10,
+ IFT_SDLC = 0x11,
+ IFT_T1 = 0x12,
+ IFT_CEPT = 0x13, /* E1 - european T1 */
+ IFT_ISDNBASIC = 0x14,
+ IFT_ISDNPRIMARY = 0x15,
+ IFT_PTPSERIAL = 0x16, /* Proprietary PTP serial */
+ IFT_PPP = 0x17, /* RFC 1331 */
+ IFT_LOOP = 0x18, /* loopback */
+ IFT_EON = 0x19, /* ISO over IP */
+ IFT_XETHER = 0x1a, /* obsolete 3MB experimental ethernet */
+ IFT_NSIP = 0x1b, /* XNS over IP */
+ IFT_SLIP = 0x1c, /* IP over generic TTY */
+ IFT_ULTRA = 0x1d, /* Ultra Technologies */
+ IFT_DS3 = 0x1e, /* Generic T3 */
+ IFT_SIP = 0x1f, /* SMDS */
+ IFT_FRELAY = 0x20, /* Frame Relay DTE only */
+ IFT_RS232 = 0x21,
+ IFT_PARA = 0x22, /* parallel-port */
+ IFT_ARCNET = 0x23,
+ IFT_ARCNETPLUS = 0x24,
+ IFT_ATM = 0x25, /* ATM cells */
+ IFT_MIOX25 = 0x26,
+ IFT_SONET = 0x27, /* SONET or SDH */
+ IFT_X25PLE = 0x28,
+ IFT_ISO88022LLC = 0x29,
+ IFT_LOCALTALK = 0x2a,
+ IFT_SMDSDXI = 0x2b,
+ IFT_FRELAYDCE = 0x2c, /* Frame Relay DCE */
+ IFT_V35 = 0x2d,
+ IFT_HSSI = 0x2e,
+ IFT_HIPPI = 0x2f,
+ IFT_MODEM = 0x30, /* Generic Modem */
+ IFT_AAL5 = 0x31, /* AAL5 over ATM */
+ IFT_SONETPATH = 0x32,
+ IFT_SONETVT = 0x33,
+ IFT_SMDSICIP = 0x34, /* SMDS InterCarrier Interface */
+ IFT_PROPVIRTUAL = 0x35, /* Proprietary Virtual/internal */
+ IFT_PROPMUX = 0x36, /* Proprietary Multiplexing */
+ IFT_IEEE80212 = 0x37, /* 100BaseVG */
+ IFT_FIBRECHANNEL = 0x38, /* Fibre Channel */
+ IFT_HIPPIINTERFACE = 0x39, /* HIPPI interfaces */
+ IFT_FRAMERELAYINTERCONNECT = 0x3a, /* Obsolete, use 0x20 either 0x2c */
+ IFT_AFLANE8023 = 0x3b, /* ATM Emulated LAN for 802.3 */
+ IFT_AFLANE8025 = 0x3c, /* ATM Emulated LAN for 802.5 */
+ IFT_CCTEMUL = 0x3d, /* ATM Emulated circuit */
+ IFT_FASTETHER = 0x3e, /* Fast Ethernet (100BaseT) */
+ IFT_ISDN = 0x3f, /* ISDN and X.25 */
+ IFT_V11 = 0x40, /* CCITT V.11/X.21 */
+ IFT_V36 = 0x41, /* CCITT V.36 */
+ IFT_G703AT64K = 0x42, /* CCITT G703 at 64Kbps */
+ IFT_G703AT2MB = 0x43, /* Obsolete see DS1-MIB */
+ IFT_QLLC = 0x44, /* SNA QLLC */
+ IFT_FASTETHERFX = 0x45, /* Fast Ethernet (100BaseFX) */
+ IFT_CHANNEL = 0x46, /* channel */
+ IFT_IEEE80211 = 0x47, /* radio spread spectrum */
+ IFT_IBM370PARCHAN = 0x48, /* IBM System 360/370 OEMI Channel */
+ IFT_ESCON = 0x49, /* IBM Enterprise Systems Connection */
+ IFT_DLSW = 0x4a, /* Data Link Switching */
+ IFT_ISDNS = 0x4b, /* ISDN S/T interface */
+ IFT_ISDNU = 0x4c, /* ISDN U interface */
+ IFT_LAPD = 0x4d, /* Link Access Protocol D */
+ IFT_IPSWITCH = 0x4e, /* IP Switching Objects */
+ IFT_RSRB = 0x4f, /* Remote Source Route Bridging */
+ IFT_ATMLOGICAL = 0x50, /* ATM Logical Port */
+ IFT_DS0 = 0x51, /* Digital Signal Level 0 */
+ IFT_DS0BUNDLE = 0x52, /* group of ds0s on the same ds1 */
+ IFT_BSC = 0x53, /* Bisynchronous Protocol */
+ IFT_ASYNC = 0x54, /* Asynchronous Protocol */
+ IFT_CNR = 0x55, /* Combat Net Radio */
+ IFT_ISO88025DTR = 0x56, /* ISO 802.5r DTR */
+ IFT_EPLRS = 0x57, /* Ext Pos Loc Report Sys */
+ IFT_ARAP = 0x58, /* Appletalk Remote Access Protocol */
+ IFT_PROPCNLS = 0x59, /* Proprietary Connectionless Protocol*/
+ IFT_HOSTPAD = 0x5a, /* CCITT-ITU X.29 PAD Protocol */
+ IFT_TERMPAD = 0x5b, /* CCITT-ITU X.3 PAD Facility */
+ IFT_FRAMERELAYMPI = 0x5c, /* Multiproto Interconnect over FR */
+ IFT_X213 = 0x5d, /* CCITT-ITU X213 */
+ IFT_ADSL = 0x5e, /* Asymmetric Digital Subscriber Loop */
+ IFT_RADSL = 0x5f, /* Rate-Adapt. Digital Subscriber Loop*/
+ IFT_SDSL = 0x60, /* Symmetric Digital Subscriber Loop */
+ IFT_VDSL = 0x61, /* Very H-Speed Digital Subscrib. Loop*/
+ IFT_ISO88025CRFPINT = 0x62, /* ISO 802.5 CRFP */
+ IFT_MYRINET = 0x63, /* Myricom Myrinet */
+ IFT_VOICEEM = 0x64, /* voice recEive and transMit */
+ IFT_VOICEFXO = 0x65, /* voice Foreign Exchange Office */
+ IFT_VOICEFXS = 0x66, /* voice Foreign Exchange Station */
+ IFT_VOICEENCAP = 0x67, /* voice encapsulation */
+ IFT_VOICEOVERIP = 0x68, /* voice over IP encapsulation */
+ IFT_ATMDXI = 0x69, /* ATM DXI */
+ IFT_ATMFUNI = 0x6a, /* ATM FUNI */
+ IFT_ATMIMA = 0x6b, /* ATM IMA */
+ IFT_PPPMULTILINKBUNDLE = 0x6c, /* PPP Multilink Bundle */
+ IFT_IPOVERCDLC = 0x6d, /* IBM ipOverCdlc */
+ IFT_IPOVERCLAW = 0x6e, /* IBM Common Link Access to Workstn */
+ IFT_STACKTOSTACK = 0x6f, /* IBM stackToStack */
+ IFT_VIRTUALIPADDRESS = 0x70, /* IBM VIPA */
+ IFT_MPC = 0x71, /* IBM multi-protocol channel support */
+ IFT_IPOVERATM = 0x72, /* IBM ipOverAtm */
+ IFT_ISO88025FIBER = 0x73, /* ISO 802.5j Fiber Token Ring */
+ IFT_TDLC = 0x74, /* IBM twinaxial data link control */
+ IFT_GIGABITETHERNET = 0x75, /* Gigabit Ethernet */
+ IFT_HDLC = 0x76, /* HDLC */
+ IFT_LAPF = 0x77, /* LAP F */
+ IFT_V37 = 0x78, /* V.37 */
+ IFT_X25MLP = 0x79, /* Multi-Link Protocol */
+ IFT_X25HUNTGROUP = 0x7a, /* X25 Hunt Group */
+ IFT_TRANSPHDLC = 0x7b, /* Transp HDLC */
+ IFT_INTERLEAVE = 0x7c, /* Interleave channel */
+ IFT_FAST = 0x7d, /* Fast channel */
+ IFT_IP = 0x7e, /* IP (for APPN HPR in IP networks) */
+ IFT_DOCSCABLEMACLAYER = 0x7f, /* CATV Mac Layer */
+ IFT_DOCSCABLEDOWNSTREAM = 0x80, /* CATV Downstream interface */
+ IFT_DOCSCABLEUPSTREAM = 0x81, /* CATV Upstream interface */
+ IFT_A12MPPSWITCH = 0x82, /* Avalon Parallel Processor */
+ IFT_TUNNEL = 0x83, /* Encapsulation interface */
+ IFT_COFFEE = 0x84, /* coffee pot */
+ IFT_CES = 0x85, /* Circiut Emulation Service */
+ IFT_ATMSUBINTERFACE = 0x86, /* (x) ATM Sub Interface */
+ IFT_L2VLAN = 0x87, /* Layer 2 Virtual LAN using 802.1Q */
+ IFT_L3IPVLAN = 0x88, /* Layer 3 Virtual LAN - IP Protocol */
+ IFT_L3IPXVLAN = 0x89, /* Layer 3 Virtual LAN - IPX Prot. */
+ IFT_DIGITALPOWERLINE = 0x8a, /* IP over Power Lines */
+ IFT_MEDIAMAILOVERIP = 0x8b, /* (xxx) Multimedia Mail over IP */
+ IFT_DTM = 0x8c, /* Dynamic synchronous Transfer Mode */
+ IFT_DCN = 0x8d, /* Data Communications Network */
+ IFT_IPFORWARD = 0x8e, /* IP Forwarding Interface */
+ IFT_MSDSL = 0x8f, /* Multi-rate Symmetric DSL */
+ IFT_IEEE1394 = 0x90, /* IEEE1394 High Performance SerialBus*/
+ IFT_IFGSN = 0x91, /* HIPPI-6400 */
+ IFT_DVBRCCMACLAYER = 0x92, /* DVB-RCC MAC Layer */
+ IFT_DVBRCCDOWNSTREAM = 0x93, /* DVB-RCC Downstream Channel */
+ IFT_DVBRCCUPSTREAM = 0x94, /* DVB-RCC Upstream Channel */
+ IFT_ATMVIRTUAL = 0x95, /* ATM Virtual Interface */
+ IFT_MPLSTUNNEL = 0x96, /* MPLS Tunnel Virtual Interface */
+ IFT_SRP = 0x97, /* Spatial Reuse Protocol */
+ IFT_VOICEOVERATM = 0x98, /* Voice over ATM */
+ IFT_VOICEOVERFRAMERELAY = 0x99, /* Voice Over Frame Relay */
+ IFT_IDSL = 0x9a, /* Digital Subscriber Loop over ISDN */
+ IFT_COMPOSITELINK = 0x9b, /* Avici Composite Link Interface */
+ IFT_SS7SIGLINK = 0x9c, /* SS7 Signaling Link */
+ IFT_PROPWIRELESSP2P = 0x9d, /* Prop. P2P wireless interface */
+ IFT_FRFORWARD = 0x9e, /* Frame forward Interface */
+ IFT_RFC1483 = 0x9f, /* Multiprotocol over ATM AAL5 */
+ IFT_USB = 0xa0, /* USB Interface */
+ IFT_IEEE8023ADLAG = 0xa1, /* IEEE 802.3ad Link Aggregate*/
+ IFT_BGPPOLICYACCOUNTING = 0xa2, /* BGP Policy Accounting */
+ IFT_FRF16MFRBUNDLE = 0xa3, /* FRF.16 Multilik Frame Relay*/
+ IFT_H323GATEKEEPER = 0xa4, /* H323 Gatekeeper */
+ IFT_H323PROXY = 0xa5, /* H323 Voice and Video Proxy */
+ IFT_MPLS = 0xa6, /* MPLS */
+ IFT_MFSIGLINK = 0xa7, /* Multi-frequency signaling link */
+ IFT_HDSL2 = 0xa8, /* High Bit-Rate DSL, 2nd gen. */
+ IFT_SHDSL = 0xa9, /* Multirate HDSL2 */
+ IFT_DS1FDL = 0xaa, /* Facility Data Link (4Kbps) on a DS1*/
+ IFT_POS = 0xab, /* Packet over SONET/SDH Interface */
+ IFT_DVBASILN = 0xac, /* DVB-ASI Input */
+ IFT_DVBASIOUT = 0xad, /* DVB-ASI Output */
+ IFT_PLC = 0xae, /* Power Line Communications */
+ IFT_NFAS = 0xaf, /* Non-Facility Associated Signaling */
+ IFT_TR008 = 0xb0, /* TROO8 */
+ IFT_GR303RDT = 0xb1, /* Remote Digital Terminal */
+ IFT_GR303IDT = 0xb2, /* Integrated Digital Terminal */
+ IFT_ISUP = 0xb3, /* ISUP */
+ IFT_PROPDOCSWIRELESSMACLAYER = 0xb4, /* prop/Wireless MAC Layer */
+ IFT_PROPDOCSWIRELESSDOWNSTREAM = 0xb5, /* prop/Wireless Downstream */
+ IFT_PROPDOCSWIRELESSUPSTREAM = 0xb6, /* prop/Wireless Upstream */
+ IFT_HIPERLAN2 = 0xb7, /* HIPERLAN Type 2 Radio Interface */
+ IFT_PROPBWAP2MP = 0xb8, /* PropBroadbandWirelessAccess P2MP*/
+ IFT_SONETOVERHEADCHANNEL = 0xb9, /* SONET Overhead Channel */
+ IFT_DIGITALWRAPPEROVERHEADCHANNEL = 0xba, /* Digital Wrapper Overhead */
+ IFT_AAL2 = 0xbb, /* ATM adaptation layer 2 */
+ IFT_RADIOMAC = 0xbc, /* MAC layer over radio links */
+ IFT_ATMRADIO = 0xbd, /* ATM over radio links */
+ IFT_IMT = 0xbe, /* Inter-Machine Trunks */
+ IFT_MVL = 0xbf, /* Multiple Virtual Lines DSL */
+ IFT_REACHDSL = 0xc0, /* Long Reach DSL */
+ IFT_FRDLCIENDPT = 0xc1, /* Frame Relay DLCI End Point */
+ IFT_ATMVCIENDPT = 0xc2, /* ATM VCI End Point */
+ IFT_OPTICALCHANNEL = 0xc3, /* Optical Channel */
+ IFT_OPTICALTRANSPORT = 0xc4, /* Optical Transport */
+ IFT_INFINIBAND = 0xc7, /* Infiniband */
+ IFT_BRIDGE = 0xd1, /* Transparent bridge interface */
+ IFT_STF = 0xd7, /* 6to4 interface */
-#define IFT_STF 0xd7 /* 6to4 interface */
+ /*
+ * Not based on IANA assignments. Conflicting with IANA assignments.
+ * We should make them negative probably.
+ * This requires changes to struct if_data.
+ */
+ IFT_GIF = 0xf0, /* Generic tunnel interface */
+ IFT_PVC = 0xf1, /* Unused */
+ IFT_ENC = 0xf4, /* Encapsulating interface */
+ IFT_PFLOG = 0xf6, /* PF packet filter logging */
+ IFT_PFSYNC = 0xf7, /* PF packet filter synchronization */
+} ifType;
+
+/*
+ * Some (broken) software uses #ifdef IFT_TYPE to check whether
+ * an operating systems supports certain interface type. Lack of
+ * ifdef leads to a piece of functionality compiled out.
+ */
+#ifndef BURN_BRIDGES
+#define IFT_BRIDGE IFT_BRIDGE
+#define IFT_PPP IFT_PPP
+#define IFT_PROPVIRTUAL IFT_PROPVIRTUAL
+#define IFT_L2VLAN IFT_L2VLAN
+#define IFT_L3IPVLAN IFT_L3IPVLAN
+#define IFT_IEEE1394 IFT_IEEE1394
+#define IFT_INFINIBAND IFT_INFINIBAND
+#endif
-/* not based on IANA assignments */
-#define IFT_GIF 0xf0
-#define IFT_PVC 0xf1
-#define IFT_FAITH 0xf2
-#define IFT_ENC 0xf4
-#define IFT_PFLOG 0xf6
-#define IFT_PFSYNC 0xf7
-#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
-#define IFT_IPXIP 0xf9 /* IPX over IP tunneling; no longer used. */
#endif /* !_NET_IF_TYPES_H_ */
diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h
index ee4db195..ec3719d4 100644
--- a/freebsd/sys/net/if_var.h
+++ b/freebsd/sys/net/if_var.h
@@ -58,58 +58,75 @@
* interfaces. These routines live in the files if.c and route.c
*/
-#ifdef __STDC__
-/*
- * Forward structure declarations for function prototypes [sic].
- */
-struct mbuf;
-struct thread;
-struct rtentry;
-struct rt_addrinfo;
+struct rtentry; /* ifa_rtrequest */
+struct rt_addrinfo; /* ifa_rtrequest */
struct socket;
-struct ether_header;
struct carp_if;
+struct carp_softc;
struct ifvlantrunk;
-struct route;
+struct route; /* if_output */
struct vnet;
-#endif
-
-#include <sys/queue.h> /* get TAILQ macros */
+struct ifmedia;
+struct netmap_adapter;
#ifdef _KERNEL
-#include <sys/mbuf.h>
-#include <sys/eventhandler.h>
+#include <sys/mbuf.h> /* ifqueue only? */
#include <sys/buf_ring.h>
#include <net/vnet.h>
#endif /* _KERNEL */
+#include <sys/counter.h>
#include <rtems/bsd/sys/lock.h> /* XXX */
-#include <sys/mutex.h> /* XXX */
+#include <sys/mutex.h> /* struct ifqueue */
#include <sys/rwlock.h> /* XXX */
#include <sys/sx.h> /* XXX */
-#include <sys/event.h> /* XXX */
-#include <sys/_task.h>
+#include <sys/_task.h> /* if_link_task */
#define IF_DUNIT_NONE -1
-#include <altq/if_altq.h>
+#include <net/altq/if_altq.h>
TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
-TAILQ_HEAD(ifprefixhead, ifprefix);
TAILQ_HEAD(ifmultihead, ifmultiaddr);
TAILQ_HEAD(ifgrouphead, ifg_group);
-/*
- * Structure defining a queue for a network interface.
- */
-struct ifqueue {
- struct mbuf *ifq_head;
- struct mbuf *ifq_tail;
- int ifq_len;
- int ifq_maxlen;
- int ifq_drops;
- struct mtx ifq_mtx;
-};
+#ifdef _KERNEL
+VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */
+#define V_link_pfil_hook VNET(link_pfil_hook)
+
+#define HHOOK_IPSEC_INET 0
+#define HHOOK_IPSEC_INET6 1
+#define HHOOK_IPSEC_COUNT 2
+VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]);
+VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]);
+#define V_ipsec_hhh_in VNET(ipsec_hhh_in)
+#define V_ipsec_hhh_out VNET(ipsec_hhh_out)
+#endif /* _KERNEL */
+
+typedef enum {
+ IFCOUNTER_IPACKETS = 0,
+ IFCOUNTER_IERRORS,
+ IFCOUNTER_OPACKETS,
+ IFCOUNTER_OERRORS,
+ IFCOUNTER_COLLISIONS,
+ IFCOUNTER_IBYTES,
+ IFCOUNTER_OBYTES,
+ IFCOUNTER_IMCASTS,
+ IFCOUNTER_OMCASTS,
+ IFCOUNTER_IQDROPS,
+ IFCOUNTER_OQDROPS,
+ IFCOUNTER_NOPROTO,
+ IFCOUNTERS /* Array size. */
+} ift_counter;
+
+typedef struct ifnet * if_t;
+
+typedef void (*if_start_fn_t)(if_t);
+typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t);
+typedef void (*if_init_fn_t)(void *);
+typedef void (*if_qflush_fn_t)(if_t);
+typedef int (*if_transmit_fn_t)(if_t, struct mbuf *);
+typedef uint64_t (*if_get_counter_t)(if_t, ift_counter);
struct ifnet_hw_tsomax {
u_int tsomaxbytes; /* TSO total burst length limit in bytes */
@@ -117,22 +134,99 @@ struct ifnet_hw_tsomax {
u_int tsomaxsegsize; /* TSO maximum segment size in bytes */
};
+/* Interface encap request types */
+typedef enum {
+ IFENCAP_LL = 1 /* pre-calculate link-layer header */
+} ife_type;
+
/*
- * Structure defining a network interface.
+ * The structure below allows to request various pre-calculated L2/L3 headers
+ * for different media. Requests varies by type (rtype field).
+ *
+ * IFENCAP_LL type: pre-calculates link header based on address family
+ * and destination lladdr.
*
- * (Would like to call this struct ``if'', but C isn't PL/1.)
+ * Input data fields:
+ * buf: pointer to destination buffer
+ * bufsize: buffer size
+ * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast
+ * family: address family defined by AF_ constant.
+ * lladdr: pointer to link-layer address
+ * lladdr_len: length of link-layer address
+ * hdata: pointer to L3 header (optional, used for ARP requests).
+ * Output data fields:
+ * buf: encap data is stored here
+ * bufsize: resulting encap length is stored here
+ * lladdr_off: offset of link-layer address from encap hdr start
+ * hdata: L3 header may be altered if necessary
*/
+struct if_encap_req {
+ u_char *buf; /* Destination buffer (w) */
+ size_t bufsize; /* size of provided buffer (r) */
+ ife_type rtype; /* request type (r) */
+ uint32_t flags; /* Request flags (r) */
+ int family; /* Address family AF_* (r) */
+ int lladdr_off; /* offset from header start (w) */
+ int lladdr_len; /* lladdr length (r) */
+ char *lladdr; /* link-level address pointer (r) */
+ char *hdata; /* Upper layer header data (rw) */
+};
+
+#define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */
+
+
+/*
+ * Structure defining a network interface.
+ *
+ * Size ILP32: 592 (approx)
+ * LP64: 1048 (approx)
+ */
struct ifnet {
+ /* General book keeping of interface lists. */
+ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
+ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
+ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
+ /* protected by if_addr_lock */
+ u_char if_alloctype; /* if_type at time of allocation */
+
+ /* Driver and protocol specific information that remains stable. */
void *if_softc; /* pointer to driver state */
+ void *if_llsoftc; /* link layer softc */
void *if_l2com; /* pointer to protocol bits */
- struct vnet *if_vnet; /* pointer to network stack instance */
- TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
- char if_xname[IFNAMSIZ]; /* external name (name + unit) */
const char *if_dname; /* driver name */
int if_dunit; /* unit or IF_DUNIT_NONE */
+ u_short if_index; /* numeric abbreviation for this if */
+ short if_index_reserved; /* spare space to grow if_index */
+ char if_xname[IFNAMSIZ]; /* external name (name + unit) */
+ char *if_description; /* interface description */
+
+ /* Variable fields that are touched by the stack and drivers. */
+ int if_flags; /* up/down, broadcast, etc. */
+ int if_drv_flags; /* driver-managed status flags */
+ int if_capabilities; /* interface features & capabilities */
+ int if_capenable; /* enabled features & capabilities */
+ void *if_linkmib; /* link-type-specific MIB data */
+ size_t if_linkmiblen; /* length of above data */
u_int if_refcount; /* reference count */
- struct ifaddrhead if_addrhead; /* linked list of addresses per if */
+
+ /* These fields are shared with struct if_data. */
+ uint8_t if_type; /* ethernet, tokenring, etc */
+ uint8_t if_addrlen; /* media address length */
+ uint8_t if_hdrlen; /* media header length */
+ uint8_t if_link_state; /* current link state */
+ uint32_t if_mtu; /* maximum transmission unit */
+ uint32_t if_metric; /* routing metric (external only) */
+ uint64_t if_baudrate; /* linespeed */
+ uint64_t if_hwassist; /* HW offload capabilities, see IFCAP */
+ time_t if_epoch; /* uptime at attach or stat reset */
+ struct timeval if_lastchange; /* time of last administrative change */
+
+ struct ifaltq if_snd; /* output queue (includes altq) */
+ struct task if_linktask; /* task for link change events */
+
+ /* Addresses of different protocol families assigned to this if. */
+ struct rwlock if_addr_lock; /* lock to protect address lists */
/*
* if_addrhead is the list of all addresses associated to
* an interface.
@@ -143,74 +237,53 @@ struct ifnet {
* However, access to the AF_LINK address through this
* field is deprecated. Use if_addr or ifaddr_byindex() instead.
*/
- int if_pcount; /* number of promiscuous listeners */
- struct carp_if *if_carp; /* carp interface structure */
- struct bpf_if *if_bpf; /* packet filter structure */
- u_short if_index; /* numeric abbreviation for this if */
- short if_index_reserved; /* spare space to grow if_index */
- struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
- int if_flags; /* up/down, broadcast, etc. */
- int if_capabilities; /* interface features & capabilities */
- int if_capenable; /* enabled features & capabilities */
- void *if_linkmib; /* link-type-specific MIB data */
- size_t if_linkmiblen; /* length of above data */
- struct if_data if_data;
+ struct ifaddrhead if_addrhead; /* linked list of addresses per if */
struct ifmultihead if_multiaddrs; /* multicast addresses configured */
int if_amcount; /* number of all-multicast requests */
-/* procedure handles */
+ struct ifaddr *if_addr; /* pointer to link-level address */
+ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
+ struct rwlock if_afdata_lock;
+ void *if_afdata[AF_MAX];
+ int if_afdata_initialized;
+
+ /* Additional features hung off the interface. */
+ u_int if_fib; /* interface FIB */
+ struct vnet *if_vnet; /* pointer to network stack instance */
+ struct vnet *if_home_vnet; /* where this ifnet originates from */
+ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
+ struct bpf_if *if_bpf; /* packet filter structure */
+ int if_pcount; /* number of promiscuous listeners */
+ void *if_bridge; /* bridge glue */
+ void *if_lagg; /* lagg glue */
+ void *if_pf_kif; /* pf glue */
+ struct carp_if *if_carp; /* carp interface structure */
+ struct label *if_label; /* interface MAC label */
+ struct netmap_adapter *if_netmap; /* netmap(4) softc */
+
+ /* Various procedures of the layer2 encapsulation and drivers. */
int (*if_output) /* output routine (enqueue) */
- (struct ifnet *, struct mbuf *, struct sockaddr *,
+ (struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
void (*if_input) /* input routine (from h/w driver) */
(struct ifnet *, struct mbuf *);
- void (*if_start) /* initiate output routine */
- (struct ifnet *);
- int (*if_ioctl) /* ioctl routine */
- (struct ifnet *, u_long, caddr_t);
- void (*if_init) /* Init routine */
- (void *);
+ if_start_fn_t if_start; /* initiate output routine */
+ if_ioctl_fn_t if_ioctl; /* ioctl routine */
+ if_init_fn_t if_init; /* Init routine */
int (*if_resolvemulti) /* validate/resolve multicast */
(struct ifnet *, struct sockaddr **, struct sockaddr *);
- void (*if_qflush) /* flush any queues */
- (struct ifnet *);
- int (*if_transmit) /* initiate output routine */
- (struct ifnet *, struct mbuf *);
+ if_qflush_fn_t if_qflush; /* flush any queue */
+ if_transmit_fn_t if_transmit; /* initiate output routine */
+
void (*if_reassign) /* reassign to vnet routine */
(struct ifnet *, struct vnet *, char *);
- struct vnet *if_home_vnet; /* where this ifnet originates from */
- struct ifaddr *if_addr; /* pointer to link-level address */
- void *if_llsoftc; /* link layer softc */
- int if_drv_flags; /* driver-managed status flags */
- struct ifaltq if_snd; /* output queue (includes altq) */
- const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
+ if_get_counter_t if_get_counter; /* get counter values */
+ int (*if_requestencap) /* make link header from request */
+ (struct ifnet *, struct if_encap_req *);
- void *if_bridge; /* bridge glue */
+ /* Statistics. */
+ counter_u64_t if_counters[IFCOUNTERS];
- struct label *if_label; /* interface MAC label */
-
- /* these are only used by IPv6 */
- struct ifprefixhead if_prefixhead; /* list of prefixes per if */
- void *if_afdata[AF_MAX];
- int if_afdata_initialized;
- struct rwlock if_afdata_lock;
- struct task if_linktask; /* task for link change events */
- struct mtx if_addr_mtx; /* mutex to protect address lists */
-
- LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
- TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
- /* protected by if_addr_mtx */
- void *if_pf_kif;
- void *if_lagg; /* lagg glue */
- char *if_description; /* interface description */
- u_int if_fib; /* interface FIB */
- u_char if_alloctype; /* if_type at time of allocation */
-
- /*
- * Spare fields are added so that we can modify sensitive data
- * structures without changing the kernel binary interface, and must
- * be used with care where binary compatibility is required.
- */
- char if_cspare[3];
+ /* Stuff that's only temporary and doesn't belong here. */
/*
* Network adapter TSO limits:
@@ -222,50 +295,25 @@ struct ifnet {
* count limit does not apply. If all three fields are zero,
* there is no TSO limit.
*
- * NOTE: The TSO limits only apply to the data payload part of
- * a TCP/IP packet. That means there is no need to subtract
- * space for ethernet-, vlan-, IP- or TCP- headers from the
- * TSO limits unless the hardware driver in question requires
- * so.
- */
- u_int if_hw_tsomax;
- int if_ispare[1];
- /*
- * TSO fields for segment limits. If a field is zero below,
- * there is no limit:
+ * NOTE: The TSO limits should reflect the values used in the
+ * BUSDMA tag a network adapter is using to load a mbuf chain
+ * for transmission. The TCP/IP network stack will subtract
+ * space for all linklevel and protocol level headers and
+ * ensure that the full mbuf chain passed to the network
+ * adapter fits within the given limits.
*/
+ u_int if_hw_tsomax; /* TSO maximum size in bytes */
u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */
u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */
- void *if_pspare[8]; /* 1 netmap, 7 TDB */
-};
-
-typedef void if_init_f_t(void *);
-/*
- * XXX These aliases are terribly dangerous because they could apply
- * to anything.
- */
-#define if_mtu if_data.ifi_mtu
-#define if_type if_data.ifi_type
-#define if_physical if_data.ifi_physical
-#define if_addrlen if_data.ifi_addrlen
-#define if_hdrlen if_data.ifi_hdrlen
-#define if_metric if_data.ifi_metric
-#define if_link_state if_data.ifi_link_state
-#define if_baudrate if_data.ifi_baudrate
-#define if_hwassist if_data.ifi_hwassist
-#define if_ipackets if_data.ifi_ipackets
-#define if_ierrors if_data.ifi_ierrors
-#define if_opackets if_data.ifi_opackets
-#define if_oerrors if_data.ifi_oerrors
-#define if_collisions if_data.ifi_collisions
-#define if_ibytes if_data.ifi_ibytes
-#define if_obytes if_data.ifi_obytes
-#define if_imcasts if_data.ifi_imcasts
-#define if_omcasts if_data.ifi_omcasts
-#define if_iqdrops if_data.ifi_iqdrops
-#define if_noproto if_data.ifi_noproto
-#define if_lastchange if_data.ifi_lastchange
+ /*
+ * Spare fields to be added before branching a stable branch, so
+ * that structure can be enhanced without changing the kernel
+ * binary interface.
+ */
+ void *if_pspare[4]; /* packet pacing / general use */
+ int if_ispare[4]; /* packet pacing / general use */
+};
/* for compatibility with other BSDs */
#define if_addrlist if_addrhead
@@ -275,18 +323,14 @@ typedef void if_init_f_t(void *);
/*
* Locks for address lists on the network interface.
*/
-#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
- "if_addr_mtx", NULL, MTX_DEF)
-#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
-#define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_mtx)
-#define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
-#define IF_ADDR_RLOCK(if) mtx_lock(&(if)->if_addr_mtx)
-#define IF_ADDR_RUNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
-#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-#define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
-/* XXX: Compat. */
-#define IF_ADDR_LOCK(if) IF_ADDR_WLOCK(if)
-#define IF_ADDR_UNLOCK(if) IF_ADDR_WUNLOCK(if)
+#define IF_ADDR_LOCK_INIT(if) rw_init(&(if)->if_addr_lock, "if_addr_lock")
+#define IF_ADDR_LOCK_DESTROY(if) rw_destroy(&(if)->if_addr_lock)
+#define IF_ADDR_WLOCK(if) rw_wlock(&(if)->if_addr_lock)
+#define IF_ADDR_WUNLOCK(if) rw_wunlock(&(if)->if_addr_lock)
+#define IF_ADDR_RLOCK(if) rw_rlock(&(if)->if_addr_lock)
+#define IF_ADDR_RUNLOCK(if) rw_runlock(&(if)->if_addr_lock)
+#define IF_ADDR_LOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_LOCKED)
+#define IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED)
/*
* Function variations on locking macros intended to be used by loadable
@@ -295,100 +339,11 @@ typedef void if_init_f_t(void *);
*/
void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */
void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */
-void if_maddr_rlock(struct ifnet *ifp); /* if_multiaddrs */
-void if_maddr_runlock(struct ifnet *ifp); /* if_multiaddrs */
-
-/*
- * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
- * are queues of messages stored on ifqueue structures
- * (defined above). Entries are added to and deleted from these structures
- * by these macros, which should be called with ipl raised to splimp().
- */
-#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
-#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
-#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
-#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
-#define _IF_DROP(ifq) ((ifq)->ifq_drops++)
-#define _IF_QLEN(ifq) ((ifq)->ifq_len)
-
-#define _IF_ENQUEUE(ifq, m) do { \
- (m)->m_nextpkt = NULL; \
- if ((ifq)->ifq_tail == NULL) \
- (ifq)->ifq_head = m; \
- else \
- (ifq)->ifq_tail->m_nextpkt = m; \
- (ifq)->ifq_tail = m; \
- (ifq)->ifq_len++; \
-} while (0)
-
-#define IF_ENQUEUE(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_ENQUEUE(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_PREPEND(ifq, m) do { \
- (m)->m_nextpkt = (ifq)->ifq_head; \
- if ((ifq)->ifq_tail == NULL) \
- (ifq)->ifq_tail = (m); \
- (ifq)->ifq_head = (m); \
- (ifq)->ifq_len++; \
-} while (0)
-
-#define IF_PREPEND(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_PREPEND(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_DEQUEUE(ifq, m) do { \
- (m) = (ifq)->ifq_head; \
- if (m) { \
- if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
- (ifq)->ifq_tail = NULL; \
- (m)->m_nextpkt = NULL; \
- (ifq)->ifq_len--; \
- } \
-} while (0)
-
-#define IF_DEQUEUE(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_DEQUEUE(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_DEQUEUE_ALL(ifq, m) do { \
- (m) = (ifq)->ifq_head; \
- (ifq)->ifq_head = (ifq)->ifq_tail = NULL; \
- (ifq)->ifq_len = 0; \
-} while (0)
-
-#define IF_DEQUEUE_ALL(ifq, m) do { \
- IF_LOCK(ifq); \
- _IF_DEQUEUE_ALL(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
-#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
-
-#define _IF_DRAIN(ifq) do { \
- struct mbuf *m; \
- for (;;) { \
- _IF_DEQUEUE(ifq, m); \
- if (m == NULL) \
- break; \
- m_freem(m); \
- } \
-} while (0)
-
-#define IF_DRAIN(ifq) do { \
- IF_LOCK(ifq); \
- _IF_DRAIN(ifq); \
- IF_UNLOCK(ifq); \
-} while(0)
+void if_maddr_rlock(if_t ifp); /* if_multiaddrs */
+void if_maddr_runlock(if_t ifp); /* if_multiaddrs */
#ifdef _KERNEL
+#ifdef _SYS_EVENTHANDLER_H_
/* interface link layer address change event */
typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t);
@@ -404,6 +359,7 @@ EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
/* Interface link state change event */
typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int);
EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t);
+#endif /* _SYS_EVENTHANDLER_H_ */
/*
* interface groups
@@ -426,6 +382,7 @@ struct ifg_list {
TAILQ_ENTRY(ifg_list) ifgl_next;
};
+#ifdef _SYS_EVENTHANDLER_H_
/* group attach event */
typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
@@ -435,6 +392,7 @@ EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
/* group change event */
typedef void (*group_change_event_handler_t)(void *, const char *);
EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
+#endif /* _SYS_EVENTHANDLER_H_ */
#define IF_AFDATA_LOCK_INIT(ifp) \
rw_init(&(ifp)->if_afdata_lock, "if_afdata")
@@ -453,331 +411,6 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
#define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED)
#define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED)
-int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
- int adjust);
-#define IF_HANDOFF(ifq, m, ifp) \
- if_handoff((struct ifqueue *)ifq, m, ifp, 0)
-#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
- if_handoff((struct ifqueue *)ifq, m, ifp, adj)
-
-void if_start(struct ifnet *);
-
-#define IFQ_ENQUEUE(ifq, m, err) \
-do { \
- IF_LOCK(ifq); \
- if (ALTQ_IS_ENABLED(ifq)) \
- ALTQ_ENQUEUE(ifq, m, NULL, err); \
- else { \
- if (_IF_QFULL(ifq)) { \
- m_freem(m); \
- (err) = ENOBUFS; \
- } else { \
- _IF_ENQUEUE(ifq, m); \
- (err) = 0; \
- } \
- } \
- if (err) \
- (ifq)->ifq_drops++; \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
-do { \
- if (TBR_IS_ENABLED(ifq)) \
- (m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
- else if (ALTQ_IS_ENABLED(ifq)) \
- ALTQ_DEQUEUE(ifq, m); \
- else \
- _IF_DEQUEUE(ifq, m); \
-} while (0)
-
-#define IFQ_DEQUEUE(ifq, m) \
-do { \
- IF_LOCK(ifq); \
- IFQ_DEQUEUE_NOLOCK(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_POLL_NOLOCK(ifq, m) \
-do { \
- if (TBR_IS_ENABLED(ifq)) \
- (m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
- else if (ALTQ_IS_ENABLED(ifq)) \
- ALTQ_POLL(ifq, m); \
- else \
- _IF_POLL(ifq, m); \
-} while (0)
-
-#define IFQ_POLL(ifq, m) \
-do { \
- IF_LOCK(ifq); \
- IFQ_POLL_NOLOCK(ifq, m); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_PURGE_NOLOCK(ifq) \
-do { \
- if (ALTQ_IS_ENABLED(ifq)) { \
- ALTQ_PURGE(ifq); \
- } else \
- _IF_DRAIN(ifq); \
-} while (0)
-
-#define IFQ_PURGE(ifq) \
-do { \
- IF_LOCK(ifq); \
- IFQ_PURGE_NOLOCK(ifq); \
- IF_UNLOCK(ifq); \
-} while (0)
-
-#define IFQ_SET_READY(ifq) \
- do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
-
-#define IFQ_LOCK(ifq) IF_LOCK(ifq)
-#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
-#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
-#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
-#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
-#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
-#define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++)
-#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))
-
-/*
- * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
- * the handoff logic, as that flag is locked by the device driver.
- */
-#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
-do { \
- int len; \
- short mflags; \
- \
- len = (m)->m_pkthdr.len; \
- mflags = (m)->m_flags; \
- IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
- if ((err) == 0) { \
- (ifp)->if_obytes += len + (adj); \
- if (mflags & M_MCAST) \
- (ifp)->if_omcasts++; \
- if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
- if_start(ifp); \
- } \
-} while (0)
-
-#define IFQ_HANDOFF(ifp, m, err) \
- IFQ_HANDOFF_ADJ(ifp, m, 0, err)
-
-#define IFQ_DRV_DEQUEUE(ifq, m) \
-do { \
- (m) = (ifq)->ifq_drv_head; \
- if (m) { \
- if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
- (ifq)->ifq_drv_tail = NULL; \
- (m)->m_nextpkt = NULL; \
- (ifq)->ifq_drv_len--; \
- } else { \
- IFQ_LOCK(ifq); \
- IFQ_DEQUEUE_NOLOCK(ifq, m); \
- while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
- struct mbuf *m0; \
- IFQ_DEQUEUE_NOLOCK(ifq, m0); \
- if (m0 == NULL) \
- break; \
- m0->m_nextpkt = NULL; \
- if ((ifq)->ifq_drv_tail == NULL) \
- (ifq)->ifq_drv_head = m0; \
- else \
- (ifq)->ifq_drv_tail->m_nextpkt = m0; \
- (ifq)->ifq_drv_tail = m0; \
- (ifq)->ifq_drv_len++; \
- } \
- IFQ_UNLOCK(ifq); \
- } \
-} while (0)
-
-#define IFQ_DRV_PREPEND(ifq, m) \
-do { \
- (m)->m_nextpkt = (ifq)->ifq_drv_head; \
- if ((ifq)->ifq_drv_tail == NULL) \
- (ifq)->ifq_drv_tail = (m); \
- (ifq)->ifq_drv_head = (m); \
- (ifq)->ifq_drv_len++; \
-} while (0)
-
-#define IFQ_DRV_IS_EMPTY(ifq) \
- (((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
-
-#define IFQ_DRV_PURGE(ifq) \
-do { \
- struct mbuf *m, *n = (ifq)->ifq_drv_head; \
- while((m = n) != NULL) { \
- n = m->m_nextpkt; \
- m_freem(m); \
- } \
- (ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
- (ifq)->ifq_drv_len = 0; \
- IFQ_PURGE(ifq); \
-} while (0)
-
-#ifdef _KERNEL
-static __inline int
-drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
-{
- int error = 0;
-
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_ENQUEUE(&ifp->if_snd, m, error);
- return (error);
- }
-#endif
- error = buf_ring_enqueue(br, m);
- if (error)
- m_freem(m);
-
- return (error);
-}
-
-static __inline void
-drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new_mbuf)
-{
- /*
- * The top of the list needs to be swapped
- * for this one.
- */
-#ifdef ALTQ
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
- /*
- * Peek in altq case dequeued it
- * so put it back.
- */
- IFQ_DRV_PREPEND(&ifp->if_snd, new_mbuf);
- return;
- }
-#endif
- buf_ring_putback_sc(br, new_mbuf);
-}
-
-static __inline struct mbuf *
-drbr_peek(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- struct mbuf *m;
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
- /*
- * Pull it off like a dequeue
- * since drbr_advance() does nothing
- * for altq and drbr_putback() will
- * use the old prepend function.
- */
- IFQ_DEQUEUE(&ifp->if_snd, m);
- return (m);
- }
-#endif
- return ((struct mbuf *)buf_ring_peek(br));
-}
-
-static __inline void
-drbr_flush(struct ifnet *ifp, struct buf_ring *br)
-{
- struct mbuf *m;
-
-#ifdef ALTQ
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
- IFQ_PURGE(&ifp->if_snd);
-#endif
- while ((m = (struct mbuf *)buf_ring_dequeue_sc(br)) != NULL)
- m_freem(m);
-}
-
-static __inline void
-drbr_free(struct buf_ring *br, struct malloc_type *type)
-{
-
- drbr_flush(NULL, br);
- buf_ring_free(br, type);
-}
-
-static __inline struct mbuf *
-drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- struct mbuf *m;
-
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- return (m);
- }
-#endif
- return ((struct mbuf *)buf_ring_dequeue_sc(br));
-}
-
-static __inline void
-drbr_advance(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- /* Nothing to do here since peek dequeues in altq case */
- if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
- return;
-#endif
- return (buf_ring_advance_sc(br));
-}
-
-
-static __inline struct mbuf *
-drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
- int (*func) (struct mbuf *, void *), void *arg)
-{
- struct mbuf *m;
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m != NULL && func(m, arg) == 0) {
- IFQ_UNLOCK(&ifp->if_snd);
- return (NULL);
- }
- IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
- IFQ_UNLOCK(&ifp->if_snd);
- return (m);
- }
-#endif
- m = (struct mbuf *)buf_ring_peek(br);
- if (m == NULL || func(m, arg) == 0)
- return (NULL);
-
- return ((struct mbuf *)buf_ring_dequeue_sc(br));
-}
-
-static __inline int
-drbr_empty(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd))
- return (IFQ_IS_EMPTY(&ifp->if_snd));
-#endif
- return (buf_ring_empty(br));
-}
-
-static __inline int
-drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd))
- return (1);
-#endif
- return (!buf_ring_empty(br));
-}
-
-static __inline int
-drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
-{
-#ifdef ALTQ
- if (ALTQ_IS_ENABLED(&ifp->if_snd))
- return (ifp->if_snd.ifq_len);
-#endif
- return (buf_ring_count(br));
-}
-#endif
/*
* 72 was chosen below because it is the size of a TCP/IP
* header (40) + the minimum mss (32).
@@ -787,8 +420,6 @@ drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
#define TOEDEV(ifp) ((ifp)->if_llsoftc)
-#endif /* _KERNEL */
-
/*
* The ifaddr structure contains information about one address
* of an interface. They are maintained by the different address families,
@@ -804,46 +435,28 @@ struct ifaddr {
struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */
#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
struct sockaddr *ifa_netmask; /* used to determine subnet */
- struct if_data if_data; /* not all members are meaningful */
struct ifnet *ifa_ifp; /* back-pointer to interface */
+ struct carp_softc *ifa_carp; /* pointer to CARP data */
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
(int, struct rtentry *, struct rt_addrinfo *);
u_short ifa_flags; /* mostly rt_flags for cloning */
+#define IFA_ROUTE RTF_UP /* route installed */
+#define IFA_RTSELF RTF_HOST /* loopback route to self installed */
u_int ifa_refcnt; /* references to this structure */
- int ifa_metric; /* cost of going out this interface */
- int (*ifa_claim_addr) /* check if an addr goes to this if */
- (struct ifaddr *, struct sockaddr *);
- struct mtx ifa_mtx;
+
+ counter_u64_t ifa_ipackets;
+ counter_u64_t ifa_opackets;
+ counter_u64_t ifa_ibytes;
+ counter_u64_t ifa_obytes;
};
-#define IFA_ROUTE RTF_UP /* route installed */
-#define IFA_RTSELF RTF_HOST /* loopback route to self installed */
-/* for compatibility with other BSDs */
+/* For compatibility with other BSDs. SCTP uses it. */
#define ifa_list ifa_link
-#ifdef _KERNEL
-#define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx)
-#define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx)
-
+struct ifaddr * ifa_alloc(size_t size, int flags);
void ifa_free(struct ifaddr *ifa);
-void ifa_init(struct ifaddr *ifa);
void ifa_ref(struct ifaddr *ifa);
-#endif
-
-/*
- * The prefix structure contains information about one prefix
- * of an interface. They are maintained by the different address families,
- * are allocated and attached when a prefix or an address is set,
- * and are linked together so all prefixes for an interface can be located.
- */
-struct ifprefix {
- struct sockaddr *ifpr_prefix; /* prefix of interface */
- struct ifnet *ifpr_ifp; /* back-pointer to interface */
- TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
- u_char ifpr_plen; /* prefix length in bits */
- u_char ifpr_type; /* protocol dependent prefix type */
-};
/*
* Multicast address structure. This is analogous to the ifaddr
@@ -859,16 +472,9 @@ struct ifmultiaddr {
struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
};
-#ifdef _KERNEL
-
extern struct rwlock ifnet_rwlock;
extern struct sx ifnet_sxlock;
-#define IFNET_LOCK_INIT() do { \
- rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \
- sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \
-} while(0)
-
#define IFNET_WLOCK() do { \
sx_xlock(&ifnet_sxlock); \
rw_wlock(&ifnet_rwlock); \
@@ -915,15 +521,11 @@ VNET_DECLARE(struct ifnethead, ifnet);
VNET_DECLARE(struct ifgrouphead, ifg_head);
VNET_DECLARE(int, if_index);
VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */
-VNET_DECLARE(int, useloopback);
#define V_ifnet VNET(ifnet)
#define V_ifg_head VNET(ifg_head)
#define V_if_index VNET(if_index)
#define V_loif VNET(loif)
-#define V_useloopback VNET(useloopback)
-
-extern int ifqmaxlen;
int if_addgroup(struct ifnet *, const char *);
int if_delgroup(struct ifnet *, const char *);
@@ -935,18 +537,15 @@ void if_dead(struct ifnet *);
int if_delmulti(struct ifnet *, struct sockaddr *);
void if_delmulti_ifma(struct ifmultiaddr *);
void if_detach(struct ifnet *);
-void if_vmove(struct ifnet *, struct vnet *);
void if_purgeaddrs(struct ifnet *);
void if_delallmulti(struct ifnet *);
void if_down(struct ifnet *);
struct ifmultiaddr *
- if_findmulti(struct ifnet *, struct sockaddr *);
+ if_findmulti(struct ifnet *, const struct sockaddr *);
void if_free(struct ifnet *);
-void if_free_type(struct ifnet *, u_char);
void if_initname(struct ifnet *, const char *, int);
void if_link_state_change(struct ifnet *, int);
int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
-void if_qflush(struct ifnet *);
void if_ref(struct ifnet *);
void if_rele(struct ifnet *);
int if_setlladdr(struct ifnet *, const u_char *, int);
@@ -956,23 +555,19 @@ int ifpromisc(struct ifnet *, int);
struct ifnet *ifunit(const char *);
struct ifnet *ifunit_ref(const char *);
-void ifq_init(struct ifaltq *, struct ifnet *ifp);
-void ifq_delete(struct ifaltq *);
-
int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
-
-struct ifaddr *ifa_ifwithaddr(struct sockaddr *);
-int ifa_ifwithaddr_check(struct sockaddr *);
-struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
-struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
-struct ifaddr *ifa_ifwithdstaddr_fib(struct sockaddr *, int);
-struct ifaddr *ifa_ifwithnet(struct sockaddr *, int);
-struct ifaddr *ifa_ifwithnet_fib(struct sockaddr *, int, int);
-struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
-struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
-
-struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
+int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *);
+
+struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
+int ifa_ifwithaddr_check(const struct sockaddr *);
+struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int);
+struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int);
+struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int);
+struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *,
+ u_int);
+struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
+int ifa_preferred(struct ifaddr *, struct ifaddr *);
int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
@@ -980,22 +575,92 @@ typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp);
typedef void if_com_free_t(void *com, u_char type);
void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
void if_deregister_com_alloc(u_char type);
+void if_data_copy(struct ifnet *, struct if_data *);
+uint64_t if_get_counter_default(struct ifnet *, ift_counter);
+void if_inc_counter(struct ifnet *, ift_counter, int64_t);
#define IF_LLADDR(ifp) \
LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
+uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate);
+uint64_t if_getbaudrate(if_t ifp);
+int if_setcapabilities(if_t ifp, int capabilities);
+int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit);
+int if_getcapabilities(if_t ifp);
+int if_togglecapenable(if_t ifp, int togglecap);
+int if_setcapenable(if_t ifp, int capenable);
+int if_setcapenablebit(if_t ifp, int setcap, int clearcap);
+int if_getcapenable(if_t ifp);
+const char *if_getdname(if_t ifp);
+int if_setdev(if_t ifp, void *dev);
+int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags);
+int if_getdrvflags(if_t ifp);
+int if_setdrvflags(if_t ifp, int flags);
+int if_clearhwassist(if_t ifp);
+int if_sethwassistbits(if_t ifp, int toset, int toclear);
+int if_sethwassist(if_t ifp, int hwassist_bit);
+int if_gethwassist(if_t ifp);
+int if_setsoftc(if_t ifp, void *softc);
+void *if_getsoftc(if_t ifp);
+int if_setflags(if_t ifp, int flags);
+int if_setmtu(if_t ifp, int mtu);
+int if_getmtu(if_t ifp);
+int if_getmtu_family(if_t ifp, int family);
+int if_setflagbits(if_t ifp, int set, int clear);
+int if_getflags(if_t ifp);
+int if_sendq_empty(if_t ifp);
+int if_setsendqready(if_t ifp);
+int if_setsendqlen(if_t ifp, int tx_desc_count);
+int if_input(if_t ifp, struct mbuf* sendmp);
+int if_sendq_prepend(if_t ifp, struct mbuf *m);
+struct mbuf *if_dequeue(if_t ifp);
+int if_setifheaderlen(if_t ifp, int len);
+void if_setrcvif(struct mbuf *m, if_t ifp);
+void if_setvtag(struct mbuf *m, u_int16_t tag);
+u_int16_t if_getvtag(struct mbuf *m);
+int if_vlantrunkinuse(if_t ifp);
+caddr_t if_getlladdr(if_t ifp);
+void *if_gethandle(u_char);
+void if_bpfmtap(if_t ifp, struct mbuf *m);
+void if_etherbpfmtap(if_t ifp, struct mbuf *m);
+void if_vlancap(if_t ifp);
+
+int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max);
+int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max);
+int if_multiaddr_count(if_t ifp, int max);
+
+int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg);
+int if_getamcount(if_t ifp);
+struct ifaddr * if_getifaddr(if_t ifp);
+
+/* Functions */
+void if_setinitfn(if_t ifp, void (*)(void *));
+void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t));
+void if_setstartfn(if_t ifp, void (*)(if_t));
+void if_settransmitfn(if_t ifp, if_transmit_fn_t);
+void if_setqflushfn(if_t ifp, if_qflush_fn_t);
+void if_setgetcounterfn(if_t ifp, if_get_counter_t);
+
+/* Revisit the below. These are inline functions originally */
+int drbr_inuse_drv(if_t ifp, struct buf_ring *br);
+struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br);
+int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br);
+int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m);
+
+/* TSO */
+void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *);
+int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *);
+
#ifdef DEVICE_POLLING
-enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
+enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
-typedef int poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
-int ether_poll_register(poll_handler_t *h, struct ifnet *ifp);
-int ether_poll_deregister(struct ifnet *ifp);
+typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count);
+int ether_poll_register(poll_handler_t *h, if_t ifp);
+int ether_poll_deregister(if_t ifp);
#endif /* DEVICE_POLLING */
-/* TSO */
-void if_hw_tsomax_common(struct ifnet *, struct ifnet_hw_tsomax *);
-int if_hw_tsomax_update(struct ifnet *, struct ifnet_hw_tsomax *);
-
#endif /* _KERNEL */
+#include <net/ifq.h> /* XXXAO: temporary unconditional include */
+
#endif /* !_NET_IF_VAR_H_ */
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 7d08e298..8a93565b 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -2,6 +2,10 @@
/*-
* Copyright 1998 Massachusetts Institute of Technology
+ * Copyright 2012 ADARA Networks, Inc.
+ *
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to ADARA Networks, Inc.
*
* Permission to use, copy, modify, and distribute this software and
* its documentation for any purpose and without fee is hereby
@@ -31,8 +35,7 @@
/*
* if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
- * Might be extended some day to also handle IEEE 802.1p priority
- * tagging. This is sort of sneaky in the implementation, since
+ * This is sort of sneaky in the implementation, since
* we need to pretend to be enough of an Ethernet implementation
* to make arp work. The way we do this is by telling everyone
* that we are an Ethernet, and then catch the packets that
@@ -47,12 +50,14 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_vlan.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -63,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_types.h>
@@ -74,7 +80,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/if_ether.h>
#endif
-#define VLANNAME "vlan"
#define VLAN_DEF_HWIDTH 4
#define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST)
@@ -85,7 +90,7 @@ LIST_HEAD(ifvlanhead, ifvlan);
struct ifvlantrunk {
struct ifnet *parent; /* parent interface of this trunk */
- struct rwlock rw;
+ struct rmlock lock;
#ifdef VLAN_ARRAY
#define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1)
struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */
@@ -105,9 +110,9 @@ struct vlan_mc_entry {
struct ifvlan {
struct ifvlantrunk *ifv_trunk;
struct ifnet *ifv_ifp;
- void *ifv_cookie;
#define TRUNK(ifv) ((ifv)->ifv_trunk)
#define PARENT(ifv) ((ifv)->ifv_trunk->parent)
+ void *ifv_cookie;
int ifv_pflags; /* special flags we have set on parent */
struct ifv_linkmib {
int ifvm_encaplen; /* encapsulation length */
@@ -115,6 +120,8 @@ struct ifvlan {
int ifvm_mintu; /* min transmission unit */
uint16_t ifvm_proto; /* encapsulation ethertype */
uint16_t ifvm_tag; /* tag to apply on packets leaving if */
+ uint16_t ifvm_vid; /* VLAN ID */
+ uint8_t ifvm_pcp; /* Priority Code Point (PCP). */
} ifv_mib;
SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
#ifndef VLAN_ARRAY
@@ -123,6 +130,8 @@ struct ifvlan {
};
#define ifv_proto ifv_mib.ifvm_proto
#define ifv_tag ifv_mib.ifvm_tag
+#define ifv_vid ifv_mib.ifvm_vid
+#define ifv_pcp ifv_mib.ifvm_pcp
#define ifv_encaplen ifv_mib.ifvm_encaplen
#define ifv_mtufudge ifv_mib.ifvm_mtufudge
#define ifv_mintu ifv_mib.ifvm_mintu
@@ -143,11 +152,22 @@ static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
"for consistency");
-static int soft_pad = 0;
-SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
- "pad short frames before tagging");
+static VNET_DEFINE(int, soft_pad);
+#define V_soft_pad VNET(soft_pad)
+SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(soft_pad), 0, "pad short frames before tagging");
+
+/*
+ * For now, make preserving PCP via an mbuf tag optional, as it increases
+ * per-packet memory allocations and frees. In the future, it would be
+ * preferable to reuse ether_vtag for this, or similar.
+ */
+static int vlan_mtag_pcp = 0;
+SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0,
+ "Retain VLAN PCP information as packets are passed up the stack");
-static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface");
+static const char vlanname[] = "vlan";
+static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
static eventhandler_tag ifdetach_tag;
static eventhandler_tag iflladdr_tag;
@@ -156,7 +176,7 @@ static eventhandler_tag iflladdr_tag;
* We have a global mutex, that is used to serialize configuration
* changes and isn't used in normal packet delivery.
*
- * We also have a per-trunk rwlock, that is locked shared on packet
+ * We also have a per-trunk rmlock(9), that is locked shared on packet
* processing and exclusive when configuration is changed.
*
* The VLAN_ARRAY substitutes the dynamic hash with a static array
@@ -170,14 +190,15 @@ static struct sx ifv_lock;
#define VLAN_LOCK_ASSERT() sx_assert(&ifv_lock, SA_LOCKED)
#define VLAN_LOCK() sx_xlock(&ifv_lock)
#define VLAN_UNLOCK() sx_xunlock(&ifv_lock)
-#define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME)
-#define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
-#define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw)
-#define TRUNK_UNLOCK(trunk) rw_wunlock(&(trunk)->rw)
-#define TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
-#define TRUNK_RLOCK(trunk) rw_rlock(&(trunk)->rw)
-#define TRUNK_RUNLOCK(trunk) rw_runlock(&(trunk)->rw)
-#define TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
+#define TRUNK_LOCK_INIT(trunk) rm_init(&(trunk)->lock, vlanname)
+#define TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock)
+#define TRUNK_LOCK(trunk) rm_wlock(&(trunk)->lock)
+#define TRUNK_UNLOCK(trunk) rm_wunlock(&(trunk)->lock)
+#define TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED)
+#define TRUNK_RLOCK(trunk) rm_rlock(&(trunk)->lock, &tracker)
+#define TRUNK_RUNLOCK(trunk) rm_runlock(&(trunk)->lock, &tracker)
+#define TRUNK_LOCK_RASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED)
+#define TRUNK_LOCK_READER struct rm_priotracker tracker
#ifndef VLAN_ARRAY
static void vlan_inithash(struct ifvlantrunk *trunk);
@@ -186,7 +207,7 @@ static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
- uint16_t tag);
+ uint16_t vid);
#endif
static void trunk_destroy(struct ifvlantrunk *trunk);
@@ -206,8 +227,7 @@ static void vlan_link_state(struct ifnet *ifp);
static void vlan_capabilities(struct ifvlan *ifv);
static void vlan_trunk_capabilities(struct ifnet *ifp);
-static struct ifnet *vlan_clone_match_ethertag(struct if_clone *,
- const char *, int *);
+static struct ifnet *vlan_clone_match_ethervid(const char *, int *);
static int vlan_clone_match(struct if_clone *, const char *);
static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
static int vlan_clone_destroy(struct if_clone *, struct ifnet *);
@@ -215,11 +235,10 @@ static int vlan_clone_destroy(struct if_clone *, struct ifnet *);
static void vlan_ifdetach(void *arg, struct ifnet *ifp);
static void vlan_iflladdr(void *arg, struct ifnet *ifp);
-static struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL,
- IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy);
+static struct if_clone *vlan_cloner;
#ifdef VIMAGE
-static VNET_DEFINE(struct if_clone, vlan_cloner);
+static VNET_DEFINE(struct if_clone *, vlan_cloner);
#define V_vlan_cloner VNET(vlan_cloner)
#endif
@@ -274,9 +293,9 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
- if (ifv->ifv_tag == ifv2->ifv_tag)
+ if (ifv->ifv_vid == ifv2->ifv_vid)
return (EEXIST);
/*
@@ -286,7 +305,7 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
*/
if (trunk->refcnt > (b * b) / 2) {
vlan_growhash(trunk, 1);
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
}
LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
trunk->refcnt++;
@@ -304,7 +323,7 @@ vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
b = 1 << trunk->hwidth;
- i = HASH(ifv->ifv_tag, trunk->hmask);
+ i = HASH(ifv->ifv_vid, trunk->hmask);
LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
if (ifv2 == ifv) {
trunk->refcnt--;
@@ -356,7 +375,7 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
for (i = 0; i < n; i++)
while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
LIST_REMOVE(ifv, ifv_list);
- j = HASH(ifv->ifv_tag, n2 - 1);
+ j = HASH(ifv->ifv_vid, n2 - 1);
LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
}
free(trunk->hash, M_VLAN);
@@ -370,14 +389,14 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
}
static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
struct ifvlan *ifv;
TRUNK_LOCK_RASSERT(trunk);
- LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list)
- if (ifv->ifv_tag == tag)
+ LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
+ if (ifv->ifv_vid == vid)
return (ifv);
return (NULL);
}
@@ -401,19 +420,19 @@ vlan_dumphash(struct ifvlantrunk *trunk)
#else
static __inline struct ifvlan *
-vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag)
+vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
{
- return trunk->vlans[tag];
+ return trunk->vlans[vid];
}
static __inline int
vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
{
- if (trunk->vlans[ifv->ifv_tag] != NULL)
+ if (trunk->vlans[ifv->ifv_vid] != NULL)
return EEXIST;
- trunk->vlans[ifv->ifv_tag] = ifv;
+ trunk->vlans[ifv->ifv_vid] = ifv;
trunk->refcnt++;
return (0);
@@ -423,7 +442,7 @@ static __inline int
vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
{
- trunk->vlans[ifv->ifv_tag] = NULL;
+ trunk->vlans[ifv->ifv_vid] = NULL;
trunk->refcnt--;
return (0);
@@ -461,48 +480,48 @@ trunk_destroy(struct ifvlantrunk *trunk)
* traffic that it doesn't really want, which ends up being discarded
* later by the upper protocol layers. Unfortunately, there's no way
* to avoid this: there really is only one physical interface.
- *
- * XXX: There is a possible race here if more than one thread is
- * modifying the multicast state of the vlan interface at the same time.
*/
static int
vlan_setmulti(struct ifnet *ifp)
{
struct ifnet *ifp_p;
- struct ifmultiaddr *ifma, *rifma = NULL;
+ struct ifmultiaddr *ifma;
struct ifvlan *sc;
struct vlan_mc_entry *mc;
int error;
- /*VLAN_LOCK_ASSERT();*/
-
/* Find the parent. */
sc = ifp->if_softc;
+ TRUNK_LOCK_ASSERT(TRUNK(sc));
ifp_p = PARENT(sc);
CURVNET_SET_QUIET(ifp_p->if_vnet);
/* First, remove any existing filter entries. */
while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
- error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
- if (error)
- return (error);
SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
+ (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
free(mc, M_VLAN);
}
/* Now program new ones. */
+ IF_ADDR_WLOCK(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
- if (mc == NULL)
+ if (mc == NULL) {
+ IF_ADDR_WUNLOCK(ifp);
return (ENOMEM);
+ }
bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
mc->mc_addr.sdl_index = ifp_p->if_index;
SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
+ }
+ IF_ADDR_WUNLOCK(ifp);
+ SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
- &rifma);
+ NULL);
if (error)
return (error);
}
@@ -625,17 +644,21 @@ vlan_trunkdev(struct ifnet *ifp)
}
/*
- * Return the 16bit vlan tag for this interface.
+ * Return the 12-bit VLAN VID for this interface, for use by external
+ * components such as Infiniband.
+ *
+ * XXXRW: Note that the function name here is historical; it should be named
+ * vlan_vid().
*/
static int
-vlan_tag(struct ifnet *ifp, uint16_t *tagp)
+vlan_tag(struct ifnet *ifp, uint16_t *vidp)
{
struct ifvlan *ifv;
if (ifp->if_type != IFT_L2VLAN)
return (EINVAL);
ifv = ifp->if_softc;
- *tagp = ifv->ifv_tag;
+ *vidp = ifv->ifv_vid;
return (0);
}
@@ -671,20 +694,21 @@ vlan_setcookie(struct ifnet *ifp, void *cookie)
}
/*
- * Return the vlan device present at the specific tag.
+ * Return the vlan device present at the specific VID.
*/
static struct ifnet *
-vlan_devat(struct ifnet *ifp, uint16_t tag)
+vlan_devat(struct ifnet *ifp, uint16_t vid)
{
struct ifvlantrunk *trunk;
struct ifvlan *ifv;
+ TRUNK_LOCK_READER;
trunk = ifp->if_vlantrunk;
if (trunk == NULL)
return (NULL);
ifp = NULL;
TRUNK_RLOCK(trunk);
- ifv = vlan_gethash(trunk, tag);
+ ifv = vlan_gethash(trunk, vid);
if (ifv)
ifp = ifv->ifv_ifp;
TRUNK_RUNLOCK(trunk);
@@ -692,10 +716,20 @@ vlan_devat(struct ifnet *ifp, uint16_t tag)
}
/*
+ * Recalculate the cached VLAN tag exposed via the MIB.
+ */
+static void
+vlan_tag_recalculate(struct ifvlan *ifv)
+{
+
+ ifv->ifv_tag = EVL_MAKETAG(ifv->ifv_vid, ifv->ifv_pcp, 0);
+}
+
+/*
* VLAN support can be loaded as a module. The only place in the
* system that's intimately aware of this is ether_input. We hook
* into this code through vlan_input_p which is defined there and
- * set here. Noone else in the system should be aware of this so
+ * set here. No one else in the system should be aware of this so
* we use an explicit reference here.
*/
extern void (*vlan_input_p)(struct ifnet *, struct mbuf *);
@@ -727,7 +761,8 @@ vlan_modevent(module_t mod, int type, void *data)
vlan_tag_p = vlan_tag;
vlan_devat_p = vlan_devat;
#ifndef VIMAGE
- if_clone_attach(&vlan_cloner);
+ vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+ vlan_clone_create, vlan_clone_destroy);
#endif
if (bootverbose)
printf("vlan: initialized, using "
@@ -741,7 +776,7 @@ vlan_modevent(module_t mod, int type, void *data)
break;
case MOD_UNLOAD:
#ifndef VIMAGE
- if_clone_detach(&vlan_cloner);
+ if_clone_detach(vlan_cloner);
#endif
EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
@@ -777,8 +812,9 @@ static void
vnet_vlan_init(const void *unused __unused)
{
+ vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
+ vlan_clone_create, vlan_clone_destroy);
V_vlan_cloner = vlan_cloner;
- if_clone_attach(&V_vlan_cloner);
}
VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_vlan_init, NULL);
@@ -787,46 +823,39 @@ static void
vnet_vlan_uninit(const void *unused __unused)
{
- if_clone_detach(&V_vlan_cloner);
+ if_clone_detach(V_vlan_cloner);
}
-VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
+VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST,
vnet_vlan_uninit, NULL);
#endif
+/*
+ * Check for <etherif>.<vlan> style interface names.
+ */
static struct ifnet *
-vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag)
+vlan_clone_match_ethervid(const char *name, int *vidp)
{
- const char *cp;
+ char ifname[IFNAMSIZ];
+ char *cp;
struct ifnet *ifp;
- int t;
+ int vid;
- /* Check for <etherif>.<vlan> style interface names. */
- IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- /*
- * We can handle non-ethernet hardware types as long as
- * they handle the tagging and headers themselves.
- */
- if (ifp->if_type != IFT_ETHER &&
- (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
- continue;
- if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
- continue;
- cp = name + strlen(ifp->if_xname);
- if (*cp++ != '.')
- continue;
- if (*cp == '\0')
- continue;
- t = 0;
- for(; *cp >= '0' && *cp <= '9'; cp++)
- t = (t * 10) + (*cp - '0');
- if (*cp != '\0')
- continue;
- if (tag != NULL)
- *tag = t;
- break;
- }
- IFNET_RUNLOCK_NOSLEEP();
+ strlcpy(ifname, name, IFNAMSIZ);
+ if ((cp = strchr(ifname, '.')) == NULL)
+ return (NULL);
+ *cp = '\0';
+ if ((ifp = ifunit(ifname)) == NULL)
+ return (NULL);
+ /* Parse VID. */
+ if (*++cp == '\0')
+ return (NULL);
+ vid = 0;
+ for(; *cp >= '0' && *cp <= '9'; cp++)
+ vid = (vid * 10) + (*cp - '0');
+ if (*cp != '\0')
+ return (NULL);
+ if (vidp != NULL)
+ *vidp = vid;
return (ifp);
}
@@ -836,10 +865,10 @@ vlan_clone_match(struct if_clone *ifc, const char *name)
{
const char *cp;
- if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL)
+ if (vlan_clone_match_ethervid(name, NULL) != NULL)
return (1);
- if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0)
+ if (strncmp(vlanname, name, strlen(vlanname)) != 0)
return (0);
for (cp = name + 4; *cp != '\0'; cp++) {
if (*cp < '0' || *cp > '9')
@@ -856,7 +885,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
int wildcard;
int unit;
int error;
- int tag;
+ int vid;
int ethertag;
struct ifvlan *ifv;
struct ifnet *ifp;
@@ -873,7 +902,10 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
* o specify no parameters and get an unattached device that
* must be configured separately.
* The first technique is preferred; the latter two are
- * supported for backwards compatibilty.
+ * supported for backwards compatibility.
+ *
+ * XXXRW: Note historic use of the word "tag" here. New ioctls may be
+ * called for.
*/
if (params) {
error = copyin(params, &vlr, sizeof(vlr));
@@ -881,31 +913,18 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
return error;
p = ifunit(vlr.vlr_parent);
if (p == NULL)
- return ENXIO;
- /*
- * Don't let the caller set up a VLAN tag with
- * anything except VLID bits.
- */
- if (vlr.vlr_tag & ~EVL_VLID_MASK)
- return (EINVAL);
+ return (ENXIO);
error = ifc_name2unit(name, &unit);
if (error != 0)
return (error);
ethertag = 1;
- tag = vlr.vlr_tag;
+ vid = vlr.vlr_tag;
wildcard = (unit < 0);
- } else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) {
+ } else if ((p = vlan_clone_match_ethervid(name, &vid)) != NULL) {
ethertag = 1;
unit = -1;
wildcard = 0;
-
- /*
- * Don't let the caller set up a VLAN tag with
- * anything except VLID bits.
- */
- if (tag & ~EVL_VLID_MASK)
- return (EINVAL);
} else {
ethertag = 0;
@@ -937,14 +956,13 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
return (ENOSPC);
}
SLIST_INIT(&ifv->vlan_mc_listhead);
-
ifp->if_softc = ifv;
/*
* Set the name manually rather than using if_initname because
* we don't conform to the default naming convention for interfaces.
*/
strlcpy(ifp->if_xname, name, IFNAMSIZ);
- ifp->if_dname = ifc->ifc_name;
+ ifp->if_dname = vlanname;
ifp->if_dunit = unit;
/* NB: flags are not set here */
ifp->if_linkmib = &ifv->ifv_mib;
@@ -966,7 +984,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
sdl->sdl_type = IFT_L2VLAN;
if (ethertag) {
- error = vlan_config(ifv, p, tag);
+ error = vlan_config(ifv, p, vid);
if (error != 0) {
/*
* Since we've partially failed, we need to back
@@ -975,7 +993,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
*/
ether_ifdetach(ifp);
vlan_unconfig(ifp);
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
ifc_free_unit(ifc, unit);
free(ifv, M_VLAN);
@@ -997,7 +1015,7 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
ether_ifdetach(ifp); /* first, remove it from system-wide lists */
vlan_unconfig(ifp); /* now it can be unconfigured and freed */
- if_free_type(ifp, IFT_ETHER);
+ if_free(ifp);
free(ifv, M_VLAN);
ifc_free_unit(ifc, unit);
@@ -1020,6 +1038,8 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct ifvlan *ifv;
struct ifnet *p;
+ struct m_tag *mtag;
+ uint16_t tag;
int error, len, mcast;
ifv = ifp->if_softc;
@@ -1035,7 +1055,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
*/
if (!UP_AND_RUNNING(p)) {
m_freem(m);
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (ENETDOWN);
}
@@ -1051,7 +1071,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
* devices that just discard such runts instead or mishandle
* them somehow.
*/
- if (soft_pad && p->if_type == IFT_ETHER) {
+ if (V_soft_pad && p->if_type == IFT_ETHER) {
static char pad[8]; /* just zeros */
int n;
@@ -1062,7 +1082,7 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
if (n > 0) {
if_printf(ifp, "cannot pad short frame\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
return (0);
}
@@ -1075,14 +1095,19 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
* knows how to find the VLAN tag to use, so we attach a
* packet tag that holds it.
*/
+ if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q,
+ MTAG_8021Q_PCP_OUT, NULL)) != NULL)
+ tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0);
+ else
+ tag = ifv->ifv_tag;
if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
- m->m_pkthdr.ether_vtag = ifv->ifv_tag;
+ m->m_pkthdr.ether_vtag = tag;
m->m_flags |= M_VLANTAG;
} else {
- m = ether_vlanencap(m, ifv->ifv_tag);
+ m = ether_vlanencap(m, tag);
if (m == NULL) {
if_printf(ifp, "unable to prepend VLAN header\n");
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (0);
}
}
@@ -1091,12 +1116,12 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
* Send it, precisely as ether_output() would have.
*/
error = (p->if_transmit)(p, m);
- if (!error) {
- ifp->if_opackets++;
- ifp->if_omcasts += mcast;
- ifp->if_obytes += len;
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast);
} else
- ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
}
@@ -1113,7 +1138,9 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
{
struct ifvlantrunk *trunk = ifp->if_vlantrunk;
struct ifvlan *ifv;
- uint16_t tag;
+ TRUNK_LOCK_READER;
+ struct m_tag *mtag;
+ uint16_t vid, tag;
KASSERT(trunk != NULL, ("%s: no trunk", __func__));
@@ -1122,7 +1149,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
* Packet is tagged, but m contains a normal
* Ethernet frame; the tag is stored out-of-band.
*/
- tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
+ tag = m->m_pkthdr.ether_vtag;
m->m_flags &= ~M_VLANTAG;
} else {
struct ether_vlan_header *evl;
@@ -1138,7 +1165,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
return;
}
evl = mtod(m, struct ether_vlan_header *);
- tag = EVL_VLANOFTAG(ntohs(evl->evl_tag));
+ tag = ntohs(evl->evl_tag);
/*
* Remove the 802.1q header by copying the Ethernet
@@ -1157,43 +1184,75 @@ vlan_input(struct ifnet *ifp, struct mbuf *m)
__func__, ifp->if_xname, ifp->if_type);
#endif
m_freem(m);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
return;
}
}
+ vid = EVL_VLANOFTAG(tag);
+
TRUNK_RLOCK(trunk);
- ifv = vlan_gethash(trunk, tag);
+ ifv = vlan_gethash(trunk, vid);
if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
TRUNK_RUNLOCK(trunk);
m_freem(m);
- ifp->if_noproto++;
+ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
return;
}
TRUNK_RUNLOCK(trunk);
+ if (vlan_mtag_pcp) {
+ /*
+ * While uncommon, it is possible that we will find a 802.1q
+ * packet encapsulated inside another packet that also had an
+ * 802.1q header. For example, ethernet tunneled over IPSEC
+ * arriving over ethernet. In that case, we replace the
+ * existing 802.1q PCP m_tag value.
+ */
+ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
+ if (mtag == NULL) {
+ mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN,
+ sizeof(uint8_t), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return;
+ }
+ m_tag_prepend(m, mtag);
+ }
+ *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag);
+ }
+
m->m_pkthdr.rcvif = ifv->ifv_ifp;
- ifv->ifv_ifp->if_ipackets++;
+ if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1);
/* Pass it back through the parent's input routine. */
(*ifp->if_input)(ifv->ifv_ifp, m);
}
static int
-vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
+vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
{
struct ifvlantrunk *trunk;
struct ifnet *ifp;
int error = 0;
- /* VID numbers 0x0 and 0xFFF are reserved */
- if (tag == 0 || tag == 0xFFF)
- return (EINVAL);
+ /*
+ * We can handle non-ethernet hardware types as long as
+ * they handle the tagging and headers themselves.
+ */
if (p->if_type != IFT_ETHER &&
(p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
return (EPROTONOSUPPORT);
if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
return (EPROTONOSUPPORT);
+ /*
+ * Don't let the caller set up a VLAN VID with
+ * anything except VLID bits.
+ * VID numbers 0x0 and 0xFFF are reserved.
+ */
+ if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK))
+ return (EINVAL);
if (ifv->ifv_trunk)
return (EBUSY);
@@ -1203,7 +1262,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag)
vlan_inithash(trunk);
VLAN_LOCK();
if (p->if_vlantrunk != NULL) {
- /* A race that that is very unlikely to be hit. */
+ /* A race that is very unlikely to be hit. */
vlan_freehash(trunk);
free(trunk, M_VLAN);
goto exists;
@@ -1219,7 +1278,9 @@ exists:
TRUNK_LOCK(trunk);
}
- ifv->ifv_tag = tag; /* must set this before vlan_inshash() */
+ ifv->ifv_vid = vid; /* must set this before vlan_inshash() */
+ ifv->ifv_pcp = 0; /* Default: best effort delivery. */
+ vlan_tag_recalculate(ifv);
error = vlan_inshash(trunk, ifv);
if (error)
goto done;
@@ -1297,7 +1358,7 @@ exists:
done:
TRUNK_UNLOCK(trunk);
if (error == 0)
- EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag);
+ EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
VLAN_UNLOCK();
return (error);
@@ -1366,7 +1427,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
* Check if we were the last.
*/
if (trunk->refcnt == 0) {
- trunk->parent->if_vlantrunk = NULL;
+ parent->if_vlantrunk = NULL;
/*
* XXXGL: If some ithread has already entered
* vlan_input() and is now blocked on the trunk
@@ -1393,7 +1454,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing)
* to cleanup anyway.
*/
if (parent != NULL)
- EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag);
+ EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
}
/* Handle a reference counted flag that should be set on the parent as well */
@@ -1494,7 +1555,7 @@ vlan_capabilities(struct ifvlan *ifv)
p->if_capenable & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
- CSUM_UDP | CSUM_SCTP | CSUM_FRAGMENT);
+ CSUM_UDP | CSUM_SCTP);
} else {
ifp->if_capenable = 0;
ifp->if_hwassist = 0;
@@ -1562,6 +1623,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ifreq *ifr;
struct ifaddr *ifa;
struct ifvlan *ifv;
+ struct ifvlantrunk *trunk;
struct vlanreq vlr;
int error = 0;
@@ -1633,6 +1695,13 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSETVLAN:
#ifdef VIMAGE
+ /*
+ * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
+ * interface to be delegated to a jail without allowing the
+ * jail to change what underlying interface/VID it is
+ * associated with. We are not entirely convinced that this
+ * is the right way to accomplish that policy goal.
+ */
if (ifp->if_vnet != ifp->if_home_vnet) {
error = EPERM;
break;
@@ -1650,14 +1719,6 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = ENOENT;
break;
}
- /*
- * Don't let the caller set up a VLAN tag with
- * anything except VLID bits.
- */
- if (vlr.vlr_tag & ~EVL_VLID_MASK) {
- error = EINVAL;
- break;
- }
error = vlan_config(ifv, p, vlr.vlr_tag);
if (error)
break;
@@ -1678,7 +1739,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (TRUNK(ifv) != NULL) {
strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
sizeof(vlr.vlr_parent));
- vlr.vlr_tag = ifv->ifv_tag;
+ vlr.vlr_tag = ifv->ifv_vid;
}
VLAN_UNLOCK();
error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
@@ -1699,8 +1760,40 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
* If we don't have a parent, just remember the membership for
* when we do.
*/
- if (TRUNK(ifv) != NULL)
+ trunk = TRUNK(ifv);
+ if (trunk != NULL) {
+ TRUNK_LOCK(trunk);
error = vlan_setmulti(ifp);
+ TRUNK_UNLOCK(trunk);
+ }
+ break;
+
+ case SIOCGVLANPCP:
+#ifdef VIMAGE
+ if (ifp->if_vnet != ifp->if_home_vnet) {
+ error = EPERM;
+ break;
+ }
+#endif
+ ifr->ifr_vlan_pcp = ifv->ifv_pcp;
+ break;
+
+ case SIOCSVLANPCP:
+#ifdef VIMAGE
+ if (ifp->if_vnet != ifp->if_home_vnet) {
+ error = EPERM;
+ break;
+ }
+#endif
+ error = priv_check(curthread, PRIV_NET_SETVLANPCP);
+ if (error)
+ break;
+ if (ifr->ifr_vlan_pcp > 7) {
+ error = EINVAL;
+ break;
+ }
+ ifv->ifv_pcp = ifr->ifr_vlan_pcp;
+ vlan_tag_recalculate(ifv);
break;
default:
diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h
index fd3fc4f3..6b20d142 100644
--- a/freebsd/sys/net/if_vlan_var.h
+++ b/freebsd/sys/net/if_vlan_var.h
@@ -32,22 +32,6 @@
#ifndef _NET_IF_VLAN_VAR_H_
#define _NET_IF_VLAN_VAR_H_ 1
-struct ether_vlan_header {
- u_char evl_dhost[ETHER_ADDR_LEN];
- u_char evl_shost[ETHER_ADDR_LEN];
- u_int16_t evl_encap_proto;
- u_int16_t evl_tag;
- u_int16_t evl_proto;
-};
-
-#define EVL_VLID_MASK 0x0FFF
-#define EVL_PRI_MASK 0xE000
-#define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK)
-#define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7)
-#define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1)
-#define EVL_MAKETAG(vlid, pri, cfi) \
- ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK))
-
/* Set the VLAN ID in an mbuf packet header non-destructively. */
#define EVL_APPLY_VLID(m, vlid) \
do { \
@@ -89,6 +73,23 @@ struct vlanreq {
#define SIOCSETVLAN SIOCSIFGENERIC
#define SIOCGETVLAN SIOCGIFGENERIC
+#define SIOCGVLANPCP _IOWR('i', 152, struct ifreq) /* Get VLAN PCP */
+#define SIOCSVLANPCP _IOW('i', 153, struct ifreq) /* Set VLAN PCP */
+
+/*
+ * Names for 802.1q priorities ("802.1p"). Notice that in this scheme,
+ * (0 < 1), allowing default 0-tagged traffic to take priority over background
+ * tagged traffic.
+ */
+#define IEEE8021Q_PCP_BK 1 /* Background (lowest) */
+#define IEEE8021Q_PCP_BE 0 /* Best effort (default) */
+#define IEEE8021Q_PCP_EE 2 /* Excellent effort */
+#define IEEE8021Q_PCP_CA 3 /* Critical applications */
+#define IEEE8021Q_PCP_VI 4 /* Video, < 100ms latency */
+#define IEEE8021Q_PCP_VO 5 /* Video, < 10ms latency */
+#define IEEE8021Q_PCP_IC 6 /* Internetwork control */
+#define IEEE8021Q_PCP_NC 7 /* Network control (highest) */
+
#ifdef _KERNEL
/*
* Drivers that are capable of adding and removing the VLAN header
@@ -108,7 +109,7 @@ struct vlanreq {
* received VLAN tag (containing both vlan and priority information)
* into the ether_vtag mbuf packet header field:
*
- * m->m_pkthdr.ether_vtag = vlan_id; // ntohs()?
+ * m->m_pkthdr.ether_vtag = vtag; // ntohs()?
* m->m_flags |= M_VLANTAG;
*
* to mark the packet m with the specified VLAN tag.
@@ -126,6 +127,16 @@ struct vlanreq {
* if_capabilities.
*/
+/*
+ * The 802.1q code may also tag mbufs with the PCP (priority) field for use in
+ * other layers of the stack, in which case an m_tag will be used. This is
+ * semantically quite different from use of the ether_vtag field, which is
+ * defined only between the device driver and VLAN layer.
+ */
+#define MTAG_8021Q 1326104895
+#define MTAG_8021Q_PCP_IN 0 /* Input priority. */
+#define MTAG_8021Q_PCP_OUT 1 /* Output priority. */
+
#define VLAN_CAPABILITIES(_ifp) do { \
if ((_ifp)->if_vlantrunk != NULL) \
(*vlan_trunk_cap_p)(_ifp); \
@@ -133,15 +144,15 @@ struct vlanreq {
#define VLAN_TRUNKDEV(_ifp) \
(_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL
-#define VLAN_TAG(_ifp, _tag) \
- (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_tag)) : EINVAL
+#define VLAN_TAG(_ifp, _vid) \
+ (_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL
#define VLAN_COOKIE(_ifp) \
(_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL
#define VLAN_SETCOOKIE(_ifp, _cookie) \
(_ifp)->if_type == IFT_L2VLAN ? \
(*vlan_setcookie_p)((_ifp), (_cookie)) : EINVAL
-#define VLAN_DEVAT(_ifp, _tag) \
- (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_tag)) : NULL
+#define VLAN_DEVAT(_ifp, _vid) \
+ (_ifp)->if_vlantrunk != NULL ? (*vlan_devat_p)((_ifp), (_vid)) : NULL
extern void (*vlan_trunk_cap_p)(struct ifnet *);
extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *);
@@ -150,6 +161,14 @@ extern int (*vlan_tag_p)(struct ifnet *, uint16_t *);
extern int (*vlan_setcookie_p)(struct ifnet *, void *);
extern void *(*vlan_cookie_p)(struct ifnet *);
+#ifdef _SYS_EVENTHANDLER_H_
+/* VLAN state change events */
+typedef void (*vlan_config_fn)(void *, struct ifnet *, uint16_t);
+typedef void (*vlan_unconfig_fn)(void *, struct ifnet *, uint16_t);
+EVENTHANDLER_DECLARE(vlan_config, vlan_config_fn);
+EVENTHANDLER_DECLARE(vlan_unconfig, vlan_unconfig_fn);
+#endif /* _SYS_EVENTHANDLER_H_ */
+
#endif /* _KERNEL */
#endif /* _NET_IF_VLAN_VAR_H_ */
diff --git a/freebsd/sys/net/ifq.h b/freebsd/sys/net/ifq.h
new file mode 100644
index 00000000..f0d206d8
--- /dev/null
+++ b/freebsd/sys/net/ifq.h
@@ -0,0 +1,484 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)if.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NET_IFQ_H_
+#define _NET_IFQ_H_
+
+#ifdef _KERNEL
+#include <sys/mbuf.h> /* ifqueue only? */
+#include <sys/buf_ring.h>
+#include <net/vnet.h>
+#endif /* _KERNEL */
+#include <rtems/bsd/sys/lock.h> /* XXX */
+#include <sys/mutex.h> /* struct ifqueue */
+
+/*
+ * Couple of ugly extra definitions that are required since ifq.h
+ * is splitted from if_var.h.
+ */
+#define IF_DUNIT_NONE -1
+
+#include <net/altq/if_altq.h>
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifqueue {
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+ struct mtx ifq_mtx;
+};
+
+#ifdef _KERNEL
+/*
+ * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
+ * are queues of messages stored on ifqueue structures
+ * (defined above). Entries are added to and deleted from these structures
+ * by these macros.
+ */
+#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
+#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
+#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
+#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
+#define _IF_QLEN(ifq) ((ifq)->ifq_len)
+
+#define _IF_ENQUEUE(ifq, m) do { \
+ (m)->m_nextpkt = NULL; \
+ if ((ifq)->ifq_tail == NULL) \
+ (ifq)->ifq_head = m; \
+ else \
+ (ifq)->ifq_tail->m_nextpkt = m; \
+ (ifq)->ifq_tail = m; \
+ (ifq)->ifq_len++; \
+} while (0)
+
+#define IF_ENQUEUE(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_ENQUEUE(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_PREPEND(ifq, m) do { \
+ (m)->m_nextpkt = (ifq)->ifq_head; \
+ if ((ifq)->ifq_tail == NULL) \
+ (ifq)->ifq_tail = (m); \
+ (ifq)->ifq_head = (m); \
+ (ifq)->ifq_len++; \
+} while (0)
+
+#define IF_PREPEND(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_PREPEND(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_DEQUEUE(ifq, m) do { \
+ (m) = (ifq)->ifq_head; \
+ if (m) { \
+ if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
+ (ifq)->ifq_tail = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (ifq)->ifq_len--; \
+ } \
+} while (0)
+
+#define IF_DEQUEUE(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_DEQUEUE(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_DEQUEUE_ALL(ifq, m) do { \
+ (m) = (ifq)->ifq_head; \
+ (ifq)->ifq_head = (ifq)->ifq_tail = NULL; \
+ (ifq)->ifq_len = 0; \
+} while (0)
+
+#define IF_DEQUEUE_ALL(ifq, m) do { \
+ IF_LOCK(ifq); \
+ _IF_DEQUEUE_ALL(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
+#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
+
+#define _IF_DRAIN(ifq) do { \
+ struct mbuf *m; \
+ for (;;) { \
+ _IF_DEQUEUE(ifq, m); \
+ if (m == NULL) \
+ break; \
+ m_freem(m); \
+ } \
+} while (0)
+
+#define IF_DRAIN(ifq) do { \
+ IF_LOCK(ifq); \
+ _IF_DRAIN(ifq); \
+ IF_UNLOCK(ifq); \
+} while(0)
+
+int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
+ int adjust);
+#define IF_HANDOFF(ifq, m, ifp) \
+ if_handoff((struct ifqueue *)ifq, m, ifp, 0)
+#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
+ if_handoff((struct ifqueue *)ifq, m, ifp, adj)
+
+void if_start(struct ifnet *);
+
+#define IFQ_ENQUEUE(ifq, m, err) \
+do { \
+ IF_LOCK(ifq); \
+ if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_ENQUEUE(ifq, m, NULL, err); \
+ else { \
+ if (_IF_QFULL(ifq)) { \
+ m_freem(m); \
+ (err) = ENOBUFS; \
+ } else { \
+ _IF_ENQUEUE(ifq, m); \
+ (err) = 0; \
+ } \
+ } \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
+do { \
+ if (TBR_IS_ENABLED(ifq)) \
+ (m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
+ else if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_DEQUEUE(ifq, m); \
+ else \
+ _IF_DEQUEUE(ifq, m); \
+} while (0)
+
+#define IFQ_DEQUEUE(ifq, m) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_DEQUEUE_NOLOCK(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_POLL_NOLOCK(ifq, m) \
+do { \
+ if (TBR_IS_ENABLED(ifq)) \
+ (m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
+ else if (ALTQ_IS_ENABLED(ifq)) \
+ ALTQ_POLL(ifq, m); \
+ else \
+ _IF_POLL(ifq, m); \
+} while (0)
+
+#define IFQ_POLL(ifq, m) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_POLL_NOLOCK(ifq, m); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_PURGE_NOLOCK(ifq) \
+do { \
+ if (ALTQ_IS_ENABLED(ifq)) { \
+ ALTQ_PURGE(ifq); \
+ } else \
+ _IF_DRAIN(ifq); \
+} while (0)
+
+#define IFQ_PURGE(ifq) \
+do { \
+ IF_LOCK(ifq); \
+ IFQ_PURGE_NOLOCK(ifq); \
+ IF_UNLOCK(ifq); \
+} while (0)
+
+#define IFQ_SET_READY(ifq) \
+ do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
+
+#define IFQ_LOCK(ifq) IF_LOCK(ifq)
+#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
+#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
+#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
+#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
+#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
+#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))
+
+/*
+ * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
+ * the handoff logic, as that flag is locked by the device driver.
+ */
+#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
+do { \
+ int len; \
+ short mflags; \
+ \
+ len = (m)->m_pkthdr.len; \
+ mflags = (m)->m_flags; \
+ IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
+ if ((err) == 0) { \
+ if_inc_counter((ifp), IFCOUNTER_OBYTES, len + (adj)); \
+ if (mflags & M_MCAST) \
+ if_inc_counter((ifp), IFCOUNTER_OMCASTS, 1); \
+ if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
+ if_start(ifp); \
+ } else \
+ if_inc_counter((ifp), IFCOUNTER_OQDROPS, 1); \
+} while (0)
+
+#define IFQ_HANDOFF(ifp, m, err) \
+ IFQ_HANDOFF_ADJ(ifp, m, 0, err)
+
+#define IFQ_DRV_DEQUEUE(ifq, m) \
+do { \
+ (m) = (ifq)->ifq_drv_head; \
+ if (m) { \
+ if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
+ (ifq)->ifq_drv_tail = NULL; \
+ (m)->m_nextpkt = NULL; \
+ (ifq)->ifq_drv_len--; \
+ } else { \
+ IFQ_LOCK(ifq); \
+ IFQ_DEQUEUE_NOLOCK(ifq, m); \
+ while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
+ struct mbuf *m0; \
+ IFQ_DEQUEUE_NOLOCK(ifq, m0); \
+ if (m0 == NULL) \
+ break; \
+ m0->m_nextpkt = NULL; \
+ if ((ifq)->ifq_drv_tail == NULL) \
+ (ifq)->ifq_drv_head = m0; \
+ else \
+ (ifq)->ifq_drv_tail->m_nextpkt = m0; \
+ (ifq)->ifq_drv_tail = m0; \
+ (ifq)->ifq_drv_len++; \
+ } \
+ IFQ_UNLOCK(ifq); \
+ } \
+} while (0)
+
+#define IFQ_DRV_PREPEND(ifq, m) \
+do { \
+ (m)->m_nextpkt = (ifq)->ifq_drv_head; \
+ if ((ifq)->ifq_drv_tail == NULL) \
+ (ifq)->ifq_drv_tail = (m); \
+ (ifq)->ifq_drv_head = (m); \
+ (ifq)->ifq_drv_len++; \
+} while (0)
+
+#define IFQ_DRV_IS_EMPTY(ifq) \
+ (((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
+
+#define IFQ_DRV_PURGE(ifq) \
+do { \
+ struct mbuf *m, *n = (ifq)->ifq_drv_head; \
+ while((m = n) != NULL) { \
+ n = m->m_nextpkt; \
+ m_freem(m); \
+ } \
+ (ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
+ (ifq)->ifq_drv_len = 0; \
+ IFQ_PURGE(ifq); \
+} while (0)
+
+static __inline int
+drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
+{
+ int error = 0;
+
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_ENQUEUE(&ifp->if_snd, m, error);
+ if (error)
+ if_inc_counter((ifp), IFCOUNTER_OQDROPS, 1);
+ return (error);
+ }
+#endif
+ error = buf_ring_enqueue(br, m);
+ if (error)
+ m_freem(m);
+
+ return (error);
+}
+
+static __inline void
+drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new)
+{
+ /*
+ * The top of the list needs to be swapped
+ * for this one.
+ */
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Peek in altq case dequeued it
+ * so put it back.
+ */
+ IFQ_DRV_PREPEND(&ifp->if_snd, new);
+ return;
+ }
+#endif
+ buf_ring_putback_sc(br, new);
+}
+
+static __inline struct mbuf *
+drbr_peek(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Pull it off like a dequeue
+ * since drbr_advance() does nothing
+ * for altq and drbr_putback() will
+ * use the old prepend function.
+ */
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return(buf_ring_peek_clear_sc(br));
+}
+
+static __inline void
+drbr_flush(struct ifnet *ifp, struct buf_ring *br)
+{
+ struct mbuf *m;
+
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ IFQ_PURGE(&ifp->if_snd);
+#endif
+ while ((m = buf_ring_dequeue_sc(br)) != NULL)
+ m_freem(m);
+}
+
+static __inline void
+drbr_free(struct buf_ring *br, struct malloc_type *type)
+{
+
+ drbr_flush(NULL, br);
+ buf_ring_free(br, type);
+}
+
+static __inline struct mbuf *
+drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline void
+drbr_advance(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ /* Nothing to do here since peek dequeues in altq case */
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ return;
+#endif
+ return (buf_ring_advance_sc(br));
+}
+
+
+static __inline struct mbuf *
+drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
+ int (*func) (struct mbuf *, void *), void *arg)
+{
+ struct mbuf *m;
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m != NULL && func(m, arg) == 0) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return (NULL);
+ }
+ IFQ_DEQUEUE_NOLOCK(&ifp->if_snd, m);
+ IFQ_UNLOCK(&ifp->if_snd);
+ return (m);
+ }
+#endif
+ m = buf_ring_peek(br);
+ if (m == NULL || func(m, arg) == 0)
+ return (NULL);
+
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline int
+drbr_empty(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (IFQ_IS_EMPTY(&ifp->if_snd));
+#endif
+ return (buf_ring_empty(br));
+}
+
+static __inline int
+drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (1);
+#endif
+ return (!buf_ring_empty(br));
+}
+
+static __inline int
+drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (ifp->if_snd.ifq_len);
+#endif
+ return (buf_ring_count(br));
+}
+
+extern int ifqmaxlen;
+
+void if_qflush(struct ifnet *);
+void ifq_init(struct ifaltq *, struct ifnet *ifp);
+void ifq_delete(struct ifaltq *);
+
+#endif /* _KERNEL */
+#endif /* !_NET_IFQ_H_ */
diff --git a/freebsd/sys/net/iso88025.h b/freebsd/sys/net/iso88025.h
index 6edd2e0b..11bd6ec4 100644
--- a/freebsd/sys/net/iso88025.h
+++ b/freebsd/sys/net/iso88025.h
@@ -162,11 +162,13 @@ struct iso88025_addr {
#define ISO88025_BPF_UNSUPPORTED 0
#define ISO88025_BPF_SUPPORTED 1
+#ifdef _KERNEL
void iso88025_ifattach (struct ifnet *, const u_int8_t *, int);
void iso88025_ifdetach (struct ifnet *, int);
int iso88025_ioctl (struct ifnet *, u_long, caddr_t );
-int iso88025_output (struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
+int iso88025_output (struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
void iso88025_input (struct ifnet *, struct mbuf *);
+#endif /* _KERNEL */
-#endif
+#endif /* !_NET_ISO88025_H_ */
diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c
index f43cffa1..f14b2e95 100644
--- a/freebsd/sys/net/netisr.c
+++ b/freebsd/sys/net/netisr.c
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
+#include <sys/malloc.h>
#include <sys/interrupt.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mbuf.h>
@@ -131,7 +132,7 @@ static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
/*-
* Three global direct dispatch policies are supported:
*
- * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
+ * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of
* context (may be overriden by protocols).
*
* NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
@@ -151,37 +152,25 @@ static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
#define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */
static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);
-SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RW |
- CTLFLAG_TUN, 0, 0, sysctl_netisr_dispatch_policy, "A",
+SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN,
+ 0, 0, sysctl_netisr_dispatch_policy, "A",
"netisr dispatch policy");
/*
- * These sysctls were used in previous versions to control and export
- * dispatch policy state. Now, we provide read-only export via them so that
- * older netstat binaries work. At some point they can be garbage collected.
- */
-static int netisr_direct_force;
-SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD,
- &netisr_direct_force, 0, "compat: force direct dispatch");
-
-static int netisr_direct;
-SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0,
- "compat: enable direct dispatch");
-
-/*
* Allow the administrator to limit the number of threads (CPUs) to use for
* netisr. We don't check netisr_maxthreads before creating the thread for
- * CPU 0, so in practice we ignore values <= 1. This must be set at boot.
- * We will create at most one thread per CPU.
+ * CPU 0. This must be set at boot. We will create at most one thread per CPU.
+ * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and
+ * therefore only 1 workstream. If set to -1, netisr would use all cpus
+ * (mp_ncpus) and therefore would have those many workstreams. One workstream
+ * per thread (CPU).
*/
-static int netisr_maxthreads = -1; /* Max number of threads. */
-TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
+static int netisr_maxthreads = 1; /* Max number of threads. */
SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
&netisr_maxthreads, 0,
"Use at most this many CPUs for netisr processing");
static int netisr_bindthreads = 0; /* Bind threads to CPUs. */
-TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
&netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
@@ -192,7 +181,6 @@ SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
*/
#define NETISR_DEFAULT_MAXQLIMIT 10240
static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
-TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
&netisr_maxqlimit, 0,
"Maximum netisr per-protocol, per-CPU queue depth.");
@@ -204,7 +192,6 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
*/
#define NETISR_DEFAULT_DEFAULTQLIMIT 256
static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
-TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN,
&netisr_defaultqlimit, 0,
"Default netisr per-protocol, per-CPU queue limit if not set by protocol");
@@ -225,6 +212,23 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
*/
static struct netisr_proto netisr_proto[NETISR_MAXPROT];
+#ifdef VIMAGE
+/*
+ * The netisr_enable array describes a per-VNET flag for registered
+ * protocols on whether this netisr is active in this VNET or not.
+ * netisr_register() will automatically enable the netisr for the
+ * default VNET and all currently active instances.
+ * netisr_unregister() will disable all active VNETs, including vnet0.
+ * Individual network stack instances can be enabled/disabled by the
+ * netisr_(un)register _vnet() functions.
+ * With this we keep the one netisr_proto per protocol but add a
+ * mechanism to stop netisr processing for vnet teardown.
+ * Apart from that we expect a VNET to always be enabled.
+ */
+static VNET_DEFINE(u_int, netisr_enable[NETISR_MAXPROT]);
+#define V_netisr_enable VNET(netisr_enable)
+#endif
+
#ifndef __rtems__
/*
* Per-CPU workstream data. See netisr_internal.h for more details.
@@ -275,10 +279,7 @@ u_int
netisr_get_cpuid(u_int cpunumber)
{
- KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
- nws_count));
-
- return (nws_array[cpunumber]);
+ return (nws_array[cpunumber % nws_count]);
}
/*
@@ -308,8 +309,6 @@ static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = {
{ NETISR_DISPATCH_HYBRID, "hybrid" },
{ NETISR_DISPATCH_DIRECT, "direct" },
};
-static const u_int netisr_dispatch_table_len =
- (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0]));
static void
netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
@@ -320,7 +319,7 @@ netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
u_int i;
str = "unknown";
- for (i = 0; i < netisr_dispatch_table_len; i++) {
+ for (i = 0; i < nitems(netisr_dispatch_table); i++) {
ndtep = &netisr_dispatch_table[i];
if (ndtep->ndte_policy == dispatch_policy) {
str = ndtep->ndte_policy_str;
@@ -336,7 +335,7 @@ netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
const struct netisr_dispatch_table_entry *ndtep;
u_int i;
- for (i = 0; i < netisr_dispatch_table_len; i++) {
+ for (i = 0; i < nitems(netisr_dispatch_table); i++) {
ndtep = &netisr_dispatch_table[i];
if (strcmp(ndtep->ndte_policy_str, str) == 0) {
*dispatch_policyp = ndtep->ndte_policy;
@@ -346,32 +345,6 @@ netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
return (EINVAL);
}
-static void
-netisr_dispatch_policy_compat(void)
-{
-
- switch (netisr_dispatch_policy) {
- case NETISR_DISPATCH_DEFERRED:
- netisr_direct_force = 0;
- netisr_direct = 0;
- break;
-
- case NETISR_DISPATCH_HYBRID:
- netisr_direct_force = 0;
- netisr_direct = 1;
- break;
-
- case NETISR_DISPATCH_DIRECT:
- netisr_direct_force = 1;
- netisr_direct = 1;
- break;
-
- default:
- panic("%s: unknown policy %u", __func__,
- netisr_dispatch_policy);
- }
-}
-
static int
sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
{
@@ -387,10 +360,8 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
&dispatch_policy);
if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
error = EINVAL;
- if (error == 0) {
+ if (error == 0)
netisr_dispatch_policy = dispatch_policy;
- netisr_dispatch_policy_compat();
- }
}
return (error);
}
@@ -403,6 +374,7 @@ sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
void
netisr_register(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
const char *name;
u_int i, proto;
@@ -475,6 +447,22 @@ netisr_register(const struct netisr_handler *nhp)
bzero(npwp, sizeof(*npwp));
npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
}
+
+#ifdef VIMAGE
+ /*
+ * Test that we are in vnet0 and have a curvnet set.
+ */
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p",
+ __func__, curvnet, vnet0));
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 1;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
NETISR_WUNLOCK();
}
@@ -651,6 +639,7 @@ netisr_drain_proto(struct netisr_work *npwp)
void
netisr_unregister(const struct netisr_handler *nhp)
{
+ VNET_ITERATOR_DECL(vnet_iter);
struct netisr_work *npwp;
#ifdef INVARIANTS
const char *name;
@@ -669,6 +658,16 @@ netisr_unregister(const struct netisr_handler *nhp)
("%s(%u): protocol not registered for %s", __func__, proto,
name));
+#ifdef VIMAGE
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_netisr_enable[proto] = 0;
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
+
netisr_proto[proto].np_name = NULL;
netisr_proto[proto].np_handler = NULL;
netisr_proto[proto].np_m2flow = NULL;
@@ -687,6 +686,97 @@ netisr_unregister(const struct netisr_handler *nhp)
NETISR_WUNLOCK();
}
+#ifdef VIMAGE
+void
+netisr_register_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 1;
+ NETISR_WUNLOCK();
+}
+
+static void
+netisr_drain_proto_vnet(struct vnet *vnet, u_int proto)
+{
+ struct netisr_workstream *nwsp;
+ struct netisr_work *npwp;
+ struct mbuf *m, *mp, *n, *ne;
+ u_int i;
+
+ KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__));
+ NETISR_LOCK_ASSERT();
+
+ CPU_FOREACH(i) {
+ nwsp = DPCPU_ID_PTR(i, nws);
+ if (nwsp->nws_intr_event == NULL)
+ continue;
+ npwp = &nwsp->nws_work[proto];
+ NWS_LOCK(nwsp);
+
+ /*
+ * Rather than dissecting and removing mbufs from the middle
+ * of the chain, we build a new chain if the packet stays and
+ * update the head and tail pointers at the end. All packets
+ * matching the given vnet are freed.
+ */
+ m = npwp->nw_head;
+ n = ne = NULL;
+ while (m != NULL) {
+ mp = m;
+ m = m->m_nextpkt;
+ mp->m_nextpkt = NULL;
+ if (mp->m_pkthdr.rcvif->if_vnet != vnet) {
+ if (n == NULL) {
+ n = ne = mp;
+ } else {
+ ne->m_nextpkt = mp;
+ ne = mp;
+ }
+ continue;
+ }
+ /* This is a packet in the selected vnet. Free it. */
+ npwp->nw_len--;
+ m_freem(mp);
+ }
+ npwp->nw_head = n;
+ npwp->nw_tail = ne;
+ NWS_UNLOCK(nwsp);
+ }
+}
+
+void
+netisr_unregister_vnet(const struct netisr_handler *nhp)
+{
+ u_int proto;
+
+ proto = nhp->nh_proto;
+
+ KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+ KASSERT(proto < NETISR_MAXPROT,
+ ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+ NETISR_WLOCK();
+ KASSERT(netisr_proto[proto].np_handler != NULL,
+ ("%s(%u): protocol not registered for %s", __func__, proto,
+ nhp->nh_name));
+
+ V_netisr_enable[proto] = 0;
+
+ netisr_drain_proto_vnet(curvnet, proto);
+ NETISR_WUNLOCK();
+}
+#endif
+
/*
* Compose the global and per-protocol policies on dispatch, and return the
* dispatch policy to use.
@@ -746,22 +836,25 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
* dispatch. In the queued case, fall back on the SOURCE
* policy.
*/
- if (*cpuidp != NETISR_CPUID_NONE)
+ if (*cpuidp != NETISR_CPUID_NONE) {
+ *cpuidp = netisr_get_cpuid(*cpuidp);
return (m);
+ }
if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
- *cpuidp = curcpu;
+ *cpuidp = netisr_get_cpuid(curcpu);
return (m);
}
policy = NETISR_POLICY_SOURCE;
}
if (policy == NETISR_POLICY_FLOW) {
- if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
+ if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE &&
+ npp->np_m2flow != NULL) {
m = npp->np_m2flow(m, source);
if (m == NULL)
return (NULL);
}
- if (m->m_flags & M_FLOWID) {
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
*cpuidp =
netisr_default_flow2cpu(m->m_pkthdr.flowid);
return (m);
@@ -984,6 +1077,13 @@ netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(netisr_proto[proto].np_handler != NULL,
("%s: invalid proto %u", __func__, proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
source, m, &cpuid);
if (m != NULL) {
@@ -1030,6 +1130,13 @@ netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
proto));
+#ifdef VIMAGE
+ if (V_netisr_enable[proto] == 0) {
+ m_freem(m);
+ return (ENOPROTOOPT);
+ }
+#endif
+
dispatch_policy = netisr_get_dispatch(npp);
if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
return (netisr_queue_src(proto, source, m));
@@ -1215,15 +1322,15 @@ netisr_start_swi(u_int cpuid, struct pcpu *pc)
static void
netisr_init(void *arg)
{
- char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
- u_int dispatch_policy;
- int error;
-
- KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
+#ifdef EARLY_AP_STARTUP
+ struct pcpu *pc;
+#endif
NETISR_LOCK_INIT();
- if (netisr_maxthreads < 1)
- netisr_maxthreads = 1;
+ if (netisr_maxthreads == 0 || netisr_maxthreads < -1 )
+ netisr_maxthreads = 1; /* default behavior */
+ else if (netisr_maxthreads == -1)
+ netisr_maxthreads = mp_ncpus; /* use max cpus */
if (netisr_maxthreads > mp_ncpus) {
printf("netisr_init: forcing maxthreads from %d to %d\n",
netisr_maxthreads, mp_ncpus);
@@ -1248,31 +1355,24 @@ netisr_init(void *arg)
}
#endif
-#ifndef __rtems__
- if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) {
- error = netisr_dispatch_policy_from_str(tmp,
- &dispatch_policy);
- if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
- error = EINVAL;
- if (error == 0) {
- netisr_dispatch_policy = dispatch_policy;
- netisr_dispatch_policy_compat();
- } else
- printf(
- "%s: invalid dispatch policy %s, using default\n",
- __func__, tmp);
+#ifdef EARLY_AP_STARTUP
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
+ if (nws_count >= netisr_maxthreads)
+ break;
+ netisr_start_swi(pc->pc_cpuid, pc);
}
-#endif /* __rtems__ */
-
+#else
#ifndef __rtems__
netisr_start_swi(curcpu, pcpu_find(curcpu));
#else /* __rtems__ */
netisr_start_swi(0, NULL);
#endif /* __rtems__ */
+#endif
}
SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
#ifndef __rtems__
+#ifndef EARLY_AP_STARTUP
/*
* Start worker threads for additional CPUs. No attempt to gracefully handle
* work reassignment, we don't yet support dynamic reconfiguration.
@@ -1285,9 +1385,6 @@ netisr_start(void *arg)
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
if (nws_count >= netisr_maxthreads)
break;
- /* XXXRW: Is skipping absent CPUs still required here? */
- if (CPU_ABSENT(pc->pc_cpuid))
- continue;
/* Worker will already be present for boot CPU. */
if (pc->pc_netisr != NULL)
continue;
@@ -1295,6 +1392,7 @@ netisr_start(void *arg)
}
}
SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
+#endif
#endif /* __rtems__ */
/*
diff --git a/freebsd/sys/net/netisr.h b/freebsd/sys/net/netisr.h
index 83bf9ce5..63764a74 100644
--- a/freebsd/sys/net/netisr.h
+++ b/freebsd/sys/net/netisr.h
@@ -52,15 +52,13 @@
#define NETISR_IP 1
#define NETISR_IGMP 2 /* IGMPv3 output queue */
#define NETISR_ROUTE 3 /* routing socket */
-#define NETISR_AARP 4 /* Appletalk ARP */
-#define NETISR_ATALK2 5 /* Appletalk phase 2 */
-#define NETISR_ATALK1 6 /* Appletalk phase 1 */
-#define NETISR_ARP 7 /* same as AF_LINK */
-#define NETISR_IPX 8 /* same as AF_IPX */
-#define NETISR_ETHER 9 /* ethernet input */
-#define NETISR_IPV6 10
-#define NETISR_NATM 11
-#define NETISR_EPAIR 12 /* if_epair(4) */
+#define NETISR_ARP 4 /* same as AF_LINK */
+#define NETISR_ETHER 5 /* ethernet input */
+#define NETISR_IPV6 6
+#define NETISR_NATM 7
+#define NETISR_EPAIR 8 /* if_epair(4) */
+#define NETISR_IP_DIRECT 9 /* direct-dispatch IPv4 */
+#define NETISR_IPV6_DIRECT 10 /* direct-dispatch IPv6 */
/*
* Protocol ordering and affinity policy constants. See the detailed
@@ -212,6 +210,10 @@ void netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
void netisr_register(const struct netisr_handler *nhp);
int netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
void netisr_unregister(const struct netisr_handler *nhp);
+#ifdef VIMAGE
+void netisr_register_vnet(const struct netisr_handler *nhp);
+void netisr_unregister_vnet(const struct netisr_handler *nhp);
+#endif
/*
* Process a packet destined for a protocol, and attempt direct dispatch.
diff --git a/freebsd/sys/net/pfil.c b/freebsd/sys/net/pfil.c
index 123d03c4..7fcecc88 100644
--- a/freebsd/sys/net/pfil.c
+++ b/freebsd/sys/net/pfil.c
@@ -47,6 +47,7 @@
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/pfil.h>
static struct mtx pfil_global_lock;
@@ -54,18 +55,18 @@ static struct mtx pfil_global_lock;
MTX_SYSINIT(pfil_heads_lock, &pfil_global_lock, "pfil_head_list lock",
MTX_DEF);
-static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
-
-static int pfil_list_remove(pfil_list_t *,
- int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
- void *);
+static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
+static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
+static int pfil_chain_remove(pfil_chain_t *, pfil_func_t, void *);
LIST_HEAD(pfilheadhead, pfil_head);
VNET_DEFINE(struct pfilheadhead, pfil_head_list);
#define V_pfil_head_list VNET(pfil_head_list)
+VNET_DEFINE(struct rmlock, pfil_lock);
+#define V_pfil_lock VNET(pfil_lock)
/*
- * pfil_run_hooks() runs the specified packet filter hooks.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
*/
int
pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
@@ -78,8 +79,8 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
PFIL_RLOCK(ph, &rmpt);
KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0"));
- for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
- pfh = TAILQ_NEXT(pfh, pfil_link)) {
+ for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
+ pfh = TAILQ_NEXT(pfh, pfil_chain)) {
if (pfh->pfil_func != NULL) {
rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir,
inp);
@@ -92,6 +93,80 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
return (rv);
}
+static struct packet_filter_hook *
+pfil_chain_get(int dir, struct pfil_head *ph)
+{
+
+ if (dir == PFIL_IN)
+ return (TAILQ_FIRST(&ph->ph_in));
+ else if (dir == PFIL_OUT)
+ return (TAILQ_FIRST(&ph->ph_out));
+ else
+ return (NULL);
+}
+
+/*
+ * pfil_try_rlock() acquires rm reader lock for specified head
+ * if this is immediately possible.
+ */
+int
+pfil_try_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ return (PFIL_TRY_RLOCK(ph, tracker));
+}
+
+/*
+ * pfil_rlock() acquires rm reader lock for specified head.
+ */
+void
+pfil_rlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ PFIL_RLOCK(ph, tracker);
+}
+
+/*
+ * pfil_runlock() releases reader lock for specified head.
+ */
+void
+pfil_runlock(struct pfil_head *ph, struct rm_priotracker *tracker)
+{
+
+ PFIL_RUNLOCK(ph, tracker);
+}
+
+/*
+ * pfil_wlock() acquires writer lock for specified head.
+ */
+void
+pfil_wlock(struct pfil_head *ph)
+{
+
+ PFIL_WLOCK(ph);
+}
+
+/*
+ * pfil_wunlock() releases writer lock for specified head.
+ */
+void
+pfil_wunlock(struct pfil_head *ph)
+{
+
+ PFIL_WUNLOCK(ph);
+}
+
+/*
+ * pfil_wowned() returns a non-zero value if the current thread owns
+ * an exclusive lock.
+ */
+int
+pfil_wowned(struct pfil_head *ph)
+{
+
+ return (PFIL_WOWNED(ph));
+}
+
/*
* pfil_head_register() registers a pfil_head with the packet filter hook
* mechanism.
@@ -101,11 +176,11 @@ pfil_head_register(struct pfil_head *ph)
{
struct pfil_head *lph;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_FOREACH(lph, &V_pfil_head_list, ph_list) {
if (ph->ph_type == lph->ph_type &&
ph->ph_un.phu_val == lph->ph_un.phu_val) {
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (EEXIST);
}
}
@@ -114,7 +189,7 @@ pfil_head_register(struct pfil_head *ph)
TAILQ_INIT(&ph->ph_in);
TAILQ_INIT(&ph->ph_out);
LIST_INSERT_HEAD(&V_pfil_head_list, ph, ph_list);
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (0);
}
@@ -128,12 +203,12 @@ pfil_head_unregister(struct pfil_head *ph)
{
struct packet_filter_hook *pfh, *pfnext;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_REMOVE(ph, ph_list);
- PFIL_LIST_UNLOCK();
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_link, pfnext)
+ PFIL_HEADLIST_UNLOCK();
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_in, pfil_chain, pfnext)
free(pfh, M_IFADDR);
- TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_link, pfnext)
+ TAILQ_FOREACH_SAFE(pfh, &ph->ph_out, pfil_chain, pfnext)
free(pfh, M_IFADDR);
PFIL_LOCK_DESTROY(ph);
return (0);
@@ -147,11 +222,11 @@ pfil_head_get(int type, u_long val)
{
struct pfil_head *ph;
- PFIL_LIST_LOCK();
+ PFIL_HEADLIST_LOCK();
LIST_FOREACH(ph, &V_pfil_head_list, ph_list)
if (ph->ph_type == type && ph->ph_un.phu_val == val)
break;
- PFIL_LIST_UNLOCK();
+ PFIL_HEADLIST_UNLOCK();
return (ph);
}
@@ -164,8 +239,7 @@ pfil_head_get(int type, u_long val)
* PFIL_WAITOK OK to call malloc with M_WAITOK.
*/
int
-pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
{
struct packet_filter_hook *pfh1 = NULL;
struct packet_filter_hook *pfh2 = NULL;
@@ -191,7 +265,7 @@ pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
if (flags & PFIL_IN) {
pfh1->pfil_func = func;
pfh1->pfil_arg = arg;
- err = pfil_list_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
+ err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT);
if (err)
goto locked_error;
ph->ph_nhooks++;
@@ -199,10 +273,10 @@ pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
if (flags & PFIL_OUT) {
pfh2->pfil_func = func;
pfh2->pfil_arg = arg;
- err = pfil_list_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
+ err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN);
if (err) {
if (flags & PFIL_IN)
- pfil_list_remove(&ph->ph_in, func, arg);
+ pfil_chain_remove(&ph->ph_in, func, arg);
goto locked_error;
}
ph->ph_nhooks++;
@@ -221,22 +295,21 @@ error:
/*
* pfil_remove_hook removes a specific function from the packet filter hook
- * list.
+ * chain.
*/
int
-pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
{
int err = 0;
PFIL_WLOCK(ph);
if (flags & PFIL_IN) {
- err = pfil_list_remove(&ph->ph_in, func, arg);
+ err = pfil_chain_remove(&ph->ph_in, func, arg);
if (err == 0)
ph->ph_nhooks--;
}
if ((err == 0) && (flags & PFIL_OUT)) {
- err = pfil_list_remove(&ph->ph_out, func, arg);
+ err = pfil_chain_remove(&ph->ph_out, func, arg);
if (err == 0)
ph->ph_nhooks--;
}
@@ -244,15 +317,18 @@ pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
return (err);
}
+/*
+ * Internal: Add a new pfil hook into a hook chain.
+ */
static int
-pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
+pfil_chain_add(pfil_chain_t *chain, struct packet_filter_hook *pfh1, int flags)
{
struct packet_filter_hook *pfh;
/*
* First make sure the hook is not already there.
*/
- TAILQ_FOREACH(pfh, list, pfil_link)
+ TAILQ_FOREACH(pfh, chain, pfil_chain)
if (pfh->pfil_func == pfh1->pfil_func &&
pfh->pfil_arg == pfh1->pfil_arg)
return (EEXIST);
@@ -262,26 +338,23 @@ pfil_list_add(pfil_list_t *list, struct packet_filter_hook *pfh1, int flags)
* the same path is followed in or out of the kernel.
*/
if (flags & PFIL_IN)
- TAILQ_INSERT_HEAD(list, pfh1, pfil_link);
+ TAILQ_INSERT_HEAD(chain, pfh1, pfil_chain);
else
- TAILQ_INSERT_TAIL(list, pfh1, pfil_link);
+ TAILQ_INSERT_TAIL(chain, pfh1, pfil_chain);
return (0);
}
/*
- * pfil_list_remove is an internal function that takes a function off the
- * specified list.
+ * Internal: Remove a pfil hook from a hook chain.
*/
static int
-pfil_list_remove(pfil_list_t *list,
- int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
- void *arg)
+pfil_chain_remove(pfil_chain_t *chain, pfil_func_t func, void *arg)
{
struct packet_filter_hook *pfh;
- TAILQ_FOREACH(pfh, list, pfil_link)
+ TAILQ_FOREACH(pfh, chain, pfil_chain)
if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
- TAILQ_REMOVE(list, pfh, pfil_link);
+ TAILQ_REMOVE(chain, pfh, pfil_chain);
free(pfh, M_IFADDR);
return (0);
}
@@ -292,36 +365,34 @@ pfil_list_remove(pfil_list_t *list,
* Stuff that must be initialized for every instance (including the first of
* course).
*/
-static int
-vnet_pfil_init(const void *unused)
+static void
+vnet_pfil_init(const void *unused __unused)
{
LIST_INIT(&V_pfil_head_list);
- return (0);
+ PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared");
}
/*
* Called for the removal of each instance.
*/
-static int
-vnet_pfil_uninit(const void *unused)
+static void
+vnet_pfil_uninit(const void *unused __unused)
{
- /* XXX should panic if list is not empty */
- return (0);
+ KASSERT(LIST_EMPTY(&V_pfil_head_list),
+ ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list));
+ PFIL_LOCK_DESTROY_REAL(&V_pfil_lock);
}
-/* Define startup order. */
-#define PFIL_SYSINIT_ORDER SI_SUB_PROTO_BEGIN
-#define PFIL_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */
-#define PFIL_VNET_ORDER (PFIL_MODEVENT_ORDER + 2) /* Later still. */
-
/*
* Starting up.
*
* VNET_SYSINIT is called for each existing vnet and each new vnet.
+ * Make sure the pfil bits are first before any possible subsystem which
+ * might piggyback on the SI_SUB_PROTO_PFIL.
*/
-VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
vnet_pfil_init, NULL);
/*
@@ -329,5 +400,5 @@ VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
*
* VNET_SYSUNINIT is called for each exiting vnet as it exits.
*/
-VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER,
+VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST,
vnet_pfil_uninit, NULL);
diff --git a/freebsd/sys/net/pfil.h b/freebsd/sys/net/pfil.h
index da06dedf..b78023b7 100644
--- a/freebsd/sys/net/pfil.h
+++ b/freebsd/sys/net/pfil.h
@@ -43,15 +43,18 @@ struct mbuf;
struct ifnet;
struct inpcb;
+typedef int (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
+
/*
* The packet filter hooks are designed for anything to call them to
- * possibly intercept the packet.
+ * possibly intercept the packet. Multiple filter hooks are chained
+ * together and after each other in the specified order.
*/
struct packet_filter_hook {
- TAILQ_ENTRY(packet_filter_hook) pfil_link;
- int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *);
- void *pfil_arg;
+ TAILQ_ENTRY(packet_filter_hook) pfil_chain;
+ pfil_func_t pfil_func;
+ void *pfil_arg;
};
#define PFIL_IN 0x00000001
@@ -59,63 +62,87 @@ struct packet_filter_hook {
#define PFIL_WAITOK 0x00000004
#define PFIL_ALL (PFIL_IN|PFIL_OUT)
-typedef TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t;
+typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t;
#define PFIL_TYPE_AF 1 /* key is AF_* type */
#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */
+#define PFIL_FLAG_PRIVATE_LOCK 0x01 /* Personal lock instead of global */
+
+/*
+ * A pfil head is created by each protocol or packet intercept point.
+ * For packet is then run through the hook chain for inspection.
+ */
struct pfil_head {
- pfil_list_t ph_in;
- pfil_list_t ph_out;
- int ph_type;
- int ph_nhooks;
+ pfil_chain_t ph_in;
+ pfil_chain_t ph_out;
+ int ph_type;
+ int ph_nhooks;
#if defined( __linux__ ) || defined( _WIN32 )
- rwlock_t ph_mtx;
+ rwlock_t ph_mtx;
#else
- struct rmlock ph_lock;
+ struct rmlock *ph_plock; /* Pointer to the used lock */
+ struct rmlock ph_lock; /* Private lock storage */
+ int flags;
#endif
union {
- u_long phu_val;
- void *phu_ptr;
+ u_long phu_val;
+ void *phu_ptr;
} ph_un;
-#define ph_af ph_un.phu_val
-#define ph_ifnet ph_un.phu_ptr
+#define ph_af ph_un.phu_val
+#define ph_ifnet ph_un.phu_ptr
LIST_ENTRY(pfil_head) ph_list;
};
-int pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
- int, struct inpcb *), void *, int, struct pfil_head *);
-int pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
- int, struct inpcb *), void *, int, struct pfil_head *);
+/* Public functions for pfil hook management by packet filters. */
+struct pfil_head *pfil_head_get(int, u_long);
+int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
+int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
+#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
+
+/* Public functions to run the packet inspection by protocols. */
int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
int, struct inpcb *inp);
+/* Public functions for pfil head management by protocols. */
int pfil_head_register(struct pfil_head *);
int pfil_head_unregister(struct pfil_head *);
-struct pfil_head *pfil_head_get(int, u_long);
-
-#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-#define PFIL_LOCK_INIT(p) \
- rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE)
-#define PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock)
-#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t))
-#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock)
-#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t))
-#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock)
-#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
-#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
-
-static __inline struct packet_filter_hook *
-pfil_hook_get(int dir, struct pfil_head *ph)
-{
-
- if (dir == PFIL_IN)
- return (TAILQ_FIRST(&ph->ph_in));
- else if (dir == PFIL_OUT)
- return (TAILQ_FIRST(&ph->ph_out));
- else
- return (NULL);
-}
+/* Public pfil locking functions for self managed locks by packet filters. */
+struct rm_priotracker; /* Do not require including rmlock header */
+int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_rlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_runlock(struct pfil_head *, struct rm_priotracker *);
+void pfil_wlock(struct pfil_head *);
+void pfil_wunlock(struct pfil_head *);
+int pfil_wowned(struct pfil_head *ph);
+
+/* Internal pfil locking functions. */
+#define PFIL_LOCK_INIT_REAL(l, t) \
+ rm_init_flags(l, "PFil " t " rmlock", RM_RECURSE)
+#define PFIL_LOCK_DESTROY_REAL(l) \
+ rm_destroy(l)
+#define PFIL_LOCK_INIT(p) do { \
+ if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) { \
+ PFIL_LOCK_INIT_REAL(&(p)->ph_lock, "private"); \
+ (p)->ph_plock = &(p)->ph_lock; \
+ } else \
+ (p)->ph_plock = &V_pfil_lock; \
+} while (0)
+#define PFIL_LOCK_DESTROY(p) do { \
+ if ((p)->flags & PFIL_FLAG_PRIVATE_LOCK) \
+ PFIL_LOCK_DESTROY_REAL((p)->ph_plock); \
+} while (0)
+
+#define PFIL_TRY_RLOCK(p, t) rm_try_rlock((p)->ph_plock, (t))
+#define PFIL_RLOCK(p, t) rm_rlock((p)->ph_plock, (t))
+#define PFIL_WLOCK(p) rm_wlock((p)->ph_plock)
+#define PFIL_RUNLOCK(p, t) rm_runlock((p)->ph_plock, (t))
+#define PFIL_WUNLOCK(p) rm_wunlock((p)->ph_plock)
+#define PFIL_WOWNED(p) rm_wowned((p)->ph_plock)
+
+/* Internal locking macros for global/vnet pfil_head_list. */
+#define PFIL_HEADLIST_LOCK() mtx_lock(&pfil_global_lock)
+#define PFIL_HEADLIST_UNLOCK() mtx_unlock(&pfil_global_lock)
#endif /* _NET_PFIL_H_ */
diff --git a/freebsd/sys/net/pfkeyv2.h b/freebsd/sys/net/pfkeyv2.h
index c45f8b05..c9b27695 100644
--- a/freebsd/sys/net/pfkeyv2.h
+++ b/freebsd/sys/net/pfkeyv2.h
@@ -218,7 +218,6 @@ struct sadb_x_sa2 {
};
/* XXX Policy Extension */
-/* sizeof(struct sadb_x_policy) == 16 */
struct sadb_x_policy {
u_int16_t sadb_x_policy_len;
u_int16_t sadb_x_policy_exttype;
@@ -226,8 +225,10 @@ struct sadb_x_policy {
u_int8_t sadb_x_policy_dir; /* direction, see ipsec.h */
u_int8_t sadb_x_policy_reserved;
u_int32_t sadb_x_policy_id;
- u_int32_t sadb_x_policy_reserved2;
+ u_int32_t sadb_x_policy_priority;
};
+_Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch");
+
/*
* When policy_type == IPSEC, it is followed by some of
* the ipsec policy request.
@@ -256,31 +257,31 @@ struct sadb_x_ipsecrequest {
};
/* NAT-Traversal type, see RFC 3948 (and drafts). */
-/* sizeof(struct sadb_x_nat_t_type) == 8 */
struct sadb_x_nat_t_type {
u_int16_t sadb_x_nat_t_type_len;
u_int16_t sadb_x_nat_t_type_exttype;
u_int8_t sadb_x_nat_t_type_type;
u_int8_t sadb_x_nat_t_type_reserved[3];
};
+_Static_assert(sizeof(struct sadb_x_nat_t_type) == 8, "struct size mismatch");
/* NAT-Traversal source or destination port. */
-/* sizeof(struct sadb_x_nat_t_port) == 8 */
struct sadb_x_nat_t_port {
u_int16_t sadb_x_nat_t_port_len;
u_int16_t sadb_x_nat_t_port_exttype;
u_int16_t sadb_x_nat_t_port_port;
u_int16_t sadb_x_nat_t_port_reserved;
};
+_Static_assert(sizeof(struct sadb_x_nat_t_port) == 8, "struct size mismatch");
/* ESP fragmentation size. */
-/* sizeof(struct sadb_x_nat_t_frag) == 8 */
struct sadb_x_nat_t_frag {
u_int16_t sadb_x_nat_t_frag_len;
u_int16_t sadb_x_nat_t_frag_exttype;
u_int16_t sadb_x_nat_t_frag_fraglen;
u_int16_t sadb_x_nat_t_frag_reserved;
};
+_Static_assert(sizeof(struct sadb_x_nat_t_frag) == 8, "struct size mismatch");
#define SADB_EXT_RESERVED 0
@@ -332,39 +333,47 @@ struct sadb_x_nat_t_frag {
#define SADB_SAFLAGS_PFS 1
-/* RFC2367 numbers - meets RFC2407 */
+/*
+ * Though some of these numbers (both _AALG and _EALG) appear to be
+ * IKEv2 numbers and others original IKE numbers, they have no meaning.
+ * These are constants that the various IKE daemons use to tell the kernel
+ * what cipher to use.
+ *
+ * Do not use these constants directly to decide which Transformation ID
+ * to send. You are responsible for mapping them yourself.
+ */
#define SADB_AALG_NONE 0
#define SADB_AALG_MD5HMAC 2
#define SADB_AALG_SHA1HMAC 3
#define SADB_AALG_MAX 252
-/* private allocations - based on RFC2407/IANA assignment */
#define SADB_X_AALG_SHA2_256 5
#define SADB_X_AALG_SHA2_384 6
#define SADB_X_AALG_SHA2_512 7
#define SADB_X_AALG_RIPEMD160HMAC 8
-#define SADB_X_AALG_AES_XCBC_MAC 9 /* draft-ietf-ipsec-ciph-aes-xcbc-mac-04 */
-/* private allocations should use 249-255 (RFC2407) */
+#define SADB_X_AALG_AES_XCBC_MAC 9 /* RFC3566 */
+#define SADB_X_AALG_AES128GMAC 11 /* RFC4543 + Errata1821 */
+#define SADB_X_AALG_AES192GMAC 12
+#define SADB_X_AALG_AES256GMAC 13
#define SADB_X_AALG_MD5 249 /* Keyed MD5 */
#define SADB_X_AALG_SHA 250 /* Keyed SHA */
#define SADB_X_AALG_NULL 251 /* null authentication */
#define SADB_X_AALG_TCP_MD5 252 /* Keyed TCP-MD5 (RFC2385) */
-/* RFC2367 numbers - meets RFC2407 */
#define SADB_EALG_NONE 0
#define SADB_EALG_DESCBC 2
#define SADB_EALG_3DESCBC 3
-#define SADB_EALG_NULL 11
-#define SADB_EALG_MAX 250
-/* private allocations - based on RFC2407/IANA assignment */
#define SADB_X_EALG_CAST128CBC 6
#define SADB_X_EALG_BLOWFISHCBC 7
+#define SADB_EALG_NULL 11
#define SADB_X_EALG_RIJNDAELCBC 12
#define SADB_X_EALG_AES 12
-/* private allocations - based on RFC4312/IANA assignment */
-#define SADB_X_EALG_CAMELLIACBC 22
-/* private allocations should use 249-255 (RFC2407) */
-#define SADB_X_EALG_SKIPJACK 249 /*250*/ /* for IPSEC */
-#define SADB_X_EALG_AESCTR 250 /*249*/ /* draft-ietf-ipsec-ciph-aes-ctr-03 */
+#define SADB_X_EALG_AESCTR 13
+#define SADB_X_EALG_AESGCM8 18 /* RFC4106 */
+#define SADB_X_EALG_AESGCM12 19
+#define SADB_X_EALG_AESGCM16 20
+#define SADB_X_EALG_CAMELLIACBC 22
+#define SADB_X_EALG_AESGMAC 23 /* RFC4543 + Errata1821 */
+#define SADB_EALG_MAX 23 /* !!! keep updated !!! */
/* private allocations - based on RFC2407/IANA assignment */
#define SADB_X_CALG_NONE 0
diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h
new file mode 100644
index 00000000..17768e96
--- /dev/null
+++ b/freebsd/sys/net/pfvar.h
@@ -0,0 +1,1757 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _NET_PFVAR_H_
+#define _NET_PFVAR_H_
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/queue.h>
+#include <sys/counter.h>
+#include <sys/malloc.h>
+#include <sys/refcount.h>
+#include <sys/tree.h>
+#include <vm/uma.h>
+
+#include <net/radix.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+
+struct pf_addr {
+ union {
+ struct in_addr v4;
+ struct in6_addr v6;
+ u_int8_t addr8[16];
+ u_int16_t addr16[8];
+ u_int32_t addr32[4];
+ } pfa; /* 128-bit address */
+#define v4 pfa.v4
+#define v6 pfa.v6
+#define addr8 pfa.addr8
+#define addr16 pfa.addr16
+#define addr32 pfa.addr32
+};
+
+#define PFI_AFLAG_NETWORK 0x01
+#define PFI_AFLAG_BROADCAST 0x02
+#define PFI_AFLAG_PEER 0x04
+#define PFI_AFLAG_MODEMASK 0x07
+#define PFI_AFLAG_NOALIAS 0x08
+
+struct pf_addr_wrap {
+ union {
+ struct {
+ struct pf_addr addr;
+ struct pf_addr mask;
+ } a;
+ char ifname[IFNAMSIZ];
+ char tblname[PF_TABLE_NAME_SIZE];
+ } v;
+ union {
+ struct pfi_dynaddr *dyn;
+ struct pfr_ktable *tbl;
+ int dyncnt;
+ int tblcnt;
+ } p;
+ u_int8_t type; /* PF_ADDR_* */
+ u_int8_t iflags; /* PFI_AFLAG_* */
+};
+
+#ifdef _KERNEL
+
+struct pfi_dynaddr {
+ TAILQ_ENTRY(pfi_dynaddr) entry;
+ struct pf_addr pfid_addr4;
+ struct pf_addr pfid_mask4;
+ struct pf_addr pfid_addr6;
+ struct pf_addr pfid_mask6;
+ struct pfr_ktable *pfid_kt;
+ struct pfi_kif *pfid_kif;
+ int pfid_net; /* mask or 128 */
+ int pfid_acnt4; /* address count IPv4 */
+ int pfid_acnt6; /* address count IPv6 */
+ sa_family_t pfid_af; /* rule af */
+ u_int8_t pfid_iflags; /* PFI_AFLAG_* */
+};
+
+/*
+ * Address manipulation macros
+ */
+#define HTONL(x) (x) = htonl((__uint32_t)(x))
+#define HTONS(x) (x) = htons((__uint16_t)(x))
+#define NTOHL(x) (x) = ntohl((__uint32_t)(x))
+#define NTOHS(x) (x) = ntohs((__uint16_t)(x))
+
+#define PF_NAME "pf"
+
+#define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED)
+#define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock)
+#define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock)
+
+#define PF_STATE_LOCK(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \
+ PF_HASHROW_LOCK(_ih); \
+ } while (0)
+
+#define PF_STATE_UNLOCK(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))]; \
+ PF_HASHROW_UNLOCK(_ih); \
+ } while (0)
+
+#ifdef INVARIANTS
+#define PF_STATE_LOCK_ASSERT(s) \
+ do { \
+ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \
+ PF_HASHROW_ASSERT(_ih); \
+ } while (0)
+#else /* !INVARIANTS */
+#define PF_STATE_LOCK_ASSERT(s) do {} while (0)
+#endif /* INVARIANTS */
+
+extern struct mtx pf_unlnkdrules_mtx;
+#define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx)
+#define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx)
+
+extern struct rwlock pf_rules_lock;
+#define PF_RULES_RLOCK() rw_rlock(&pf_rules_lock)
+#define PF_RULES_RUNLOCK() rw_runlock(&pf_rules_lock)
+#define PF_RULES_WLOCK() rw_wlock(&pf_rules_lock)
+#define PF_RULES_WUNLOCK() rw_wunlock(&pf_rules_lock)
+#define PF_RULES_ASSERT() rw_assert(&pf_rules_lock, RA_LOCKED)
+#define PF_RULES_RASSERT() rw_assert(&pf_rules_lock, RA_RLOCKED)
+#define PF_RULES_WASSERT() rw_assert(&pf_rules_lock, RA_WLOCKED)
+
+#define PF_MODVER 1
+#define PFLOG_MODVER 1
+#define PFSYNC_MODVER 1
+
+#define PFLOG_MINVER 1
+#define PFLOG_PREFVER PFLOG_MODVER
+#define PFLOG_MAXVER 1
+#define PFSYNC_MINVER 1
+#define PFSYNC_PREFVER PFSYNC_MODVER
+#define PFSYNC_MAXVER 1
+
+#ifdef INET
+#ifndef INET6
+#define PF_INET_ONLY
+#endif /* ! INET6 */
+#endif /* INET */
+
+#ifdef INET6
+#ifndef INET
+#define PF_INET6_ONLY
+#endif /* ! INET */
+#endif /* INET6 */
+
+#ifdef INET
+#ifdef INET6
+#define PF_INET_INET6
+#endif /* INET6 */
+#endif /* INET */
+
+#else
+
+#define PF_INET_INET6
+
+#endif /* _KERNEL */
+
+/* Both IPv4 and IPv6 */
+#ifdef PF_INET_INET6
+
+#define PF_AEQ(a, b, c) \
+ ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \
+ (c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \
+ (a)->addr32[2] == (b)->addr32[2] && \
+ (a)->addr32[1] == (b)->addr32[1] && \
+ (a)->addr32[0] == (b)->addr32[0])) \
+
+#define PF_ANEQ(a, b, c) \
+ ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \
+ (c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \
+ (a)->addr32[1] != (b)->addr32[1] || \
+ (a)->addr32[2] != (b)->addr32[2] || \
+ (a)->addr32[3] != (b)->addr32[3]))) \
+
+#define PF_AZERO(a, c) \
+ ((c == AF_INET && !(a)->addr32[0]) || \
+ (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \
+ !(a)->addr32[2] && !(a)->addr32[3] )) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+ pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv6 */
+
+#ifdef PF_INET6_ONLY
+
+#define PF_AEQ(a, b, c) \
+ ((a)->addr32[3] == (b)->addr32[3] && \
+ (a)->addr32[2] == (b)->addr32[2] && \
+ (a)->addr32[1] == (b)->addr32[1] && \
+ (a)->addr32[0] == (b)->addr32[0]) \
+
+#define PF_ANEQ(a, b, c) \
+ ((a)->addr32[3] != (b)->addr32[3] || \
+ (a)->addr32[2] != (b)->addr32[2] || \
+ (a)->addr32[1] != (b)->addr32[1] || \
+ (a)->addr32[0] != (b)->addr32[0]) \
+
+#define PF_AZERO(a, c) \
+ (!(a)->addr32[0] && \
+ !(a)->addr32[1] && \
+ !(a)->addr32[2] && \
+ !(a)->addr32[3] ) \
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ pf_addrcpy(a, b, f)
+
+#define PF_AINC(a, f) \
+ pf_addr_inc(a, f)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ pf_poolmask(a, b, c, d, f)
+
+#else
+
+/* Just IPv4 */
+#ifdef PF_INET_ONLY
+
+#define PF_AEQ(a, b, c) \
+ ((a)->addr32[0] == (b)->addr32[0])
+
+#define PF_ANEQ(a, b, c) \
+ ((a)->addr32[0] != (b)->addr32[0])
+
+#define PF_AZERO(a, c) \
+ (!(a)->addr32[0])
+
+#define PF_MATCHA(n, a, m, b, f) \
+ pf_match_addr(n, a, m, b, f)
+
+#define PF_ACPY(a, b, f) \
+ (a)->v4.s_addr = (b)->v4.s_addr
+
+#define PF_AINC(a, f) \
+ do { \
+ (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
+ } while (0)
+
+#define PF_POOLMASK(a, b, c, d, f) \
+ do { \
+ (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
+ (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
+ } while (0)
+
+#endif /* PF_INET_ONLY */
+#endif /* PF_INET6_ONLY */
+#endif /* PF_INET_INET6 */
+
+/*
+ * XXX callers not FIB-aware in our version of pf yet.
+ * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio.
+ */
+#define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \
+ ( \
+ (((aw)->type == PF_ADDR_NOROUTE && \
+ pf_routable((x), (af), NULL, (rtid))) || \
+ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \
+ pf_routable((x), (af), (ifp), (rtid))) || \
+ ((aw)->type == PF_ADDR_TABLE && \
+ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \
+ ((aw)->type == PF_ADDR_DYNIFTL && \
+ !pfi_match_addr((aw)->p.dyn, (x), (af))) || \
+ ((aw)->type == PF_ADDR_RANGE && \
+ !pf_match_addr_range(&(aw)->v.a.addr, \
+ &(aw)->v.a.mask, (x), (af))) || \
+ ((aw)->type == PF_ADDR_ADDRMASK && \
+ !PF_AZERO(&(aw)->v.a.mask, (af)) && \
+ !PF_MATCHA(0, &(aw)->v.a.addr, \
+ &(aw)->v.a.mask, (x), (af))))) != \
+ (neg) \
+ )
+
+
+struct pf_rule_uid {
+ uid_t uid[2];
+ u_int8_t op;
+};
+
+struct pf_rule_gid {
+ uid_t gid[2];
+ u_int8_t op;
+};
+
+struct pf_rule_addr {
+ struct pf_addr_wrap addr;
+ u_int16_t port[2];
+ u_int8_t neg;
+ u_int8_t port_op;
+};
+
+struct pf_pooladdr {
+ struct pf_addr_wrap addr;
+ TAILQ_ENTRY(pf_pooladdr) entries;
+ char ifname[IFNAMSIZ];
+ struct pfi_kif *kif;
+};
+
+TAILQ_HEAD(pf_palist, pf_pooladdr);
+
+struct pf_poolhashkey {
+ union {
+ u_int8_t key8[16];
+ u_int16_t key16[8];
+ u_int32_t key32[4];
+ } pfk; /* 128-bit hash key */
+#define key8 pfk.key8
+#define key16 pfk.key16
+#define key32 pfk.key32
+};
+
+struct pf_pool {
+ struct pf_palist list;
+ struct pf_pooladdr *cur;
+ struct pf_poolhashkey key;
+ struct pf_addr counter;
+ int tblidx;
+ u_int16_t proxy_port[2];
+ u_int8_t opts;
+};
+
+
+/* A packed Operating System description for fingerprinting */
+typedef u_int32_t pf_osfp_t;
+#define PF_OSFP_ANY ((pf_osfp_t)0)
+#define PF_OSFP_UNKNOWN ((pf_osfp_t)-1)
+#define PF_OSFP_NOMATCH ((pf_osfp_t)-2)
+
+struct pf_osfp_entry {
+ SLIST_ENTRY(pf_osfp_entry) fp_entry;
+ pf_osfp_t fp_os;
+ int fp_enflags;
+#define PF_OSFP_EXPANDED 0x001 /* expanded entry */
+#define PF_OSFP_GENERIC 0x002 /* generic signature */
+#define PF_OSFP_NODETAIL 0x004 /* no p0f details */
+#define PF_OSFP_LEN 32
+ char fp_class_nm[PF_OSFP_LEN];
+ char fp_version_nm[PF_OSFP_LEN];
+ char fp_subtype_nm[PF_OSFP_LEN];
+};
+#define PF_OSFP_ENTRY_EQ(a, b) \
+ ((a)->fp_os == (b)->fp_os && \
+ memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \
+ memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \
+ memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0)
+
+/* handle pf_osfp_t packing */
+#define _FP_RESERVED_BIT 1 /* For the special negative #defines */
+#define _FP_UNUSED_BITS 1
+#define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */
+#define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */
+#define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */
+#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \
+ (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \
+ ((1 << _FP_CLASS_BITS) - 1); \
+ (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \
+ ((1 << _FP_VERSION_BITS) - 1);\
+ (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+#define PF_OSFP_PACK(osfp, class, version, subtype) do { \
+ (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \
+ + _FP_SUBTYPE_BITS); \
+ (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \
+ _FP_SUBTYPE_BITS; \
+ (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \
+} while(0)
+
+/* the fingerprint of an OSes TCP SYN packet */
+typedef u_int64_t pf_tcpopts_t;
+struct pf_os_fingerprint {
+ SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */
+ pf_tcpopts_t fp_tcpopts; /* packed TCP options */
+ u_int16_t fp_wsize; /* TCP window size */
+ u_int16_t fp_psize; /* ip->ip_len */
+ u_int16_t fp_mss; /* TCP MSS */
+ u_int16_t fp_flags;
+#define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */
+#define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */
+#define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */
+#define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */
+#define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */
+#define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */
+#define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */
+#define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */
+#define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */
+#define PF_OSFP_MSS 0x0200 /* TCP MSS */
+#define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */
+#define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */
+#define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */
+#define PF_OSFP_TS0 0x2000 /* Zero timestamp */
+#define PF_OSFP_INET6 0x4000 /* IPv6 */
+ u_int8_t fp_optcnt; /* TCP option count */
+ u_int8_t fp_wscale; /* TCP window scaling */
+ u_int8_t fp_ttl; /* IPv4 TTL */
+#define PF_OSFP_MAXTTL_OFFSET 40
+/* TCP options packing */
+#define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */
+#define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */
+#define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */
+#define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */
+#define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */
+#define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */
+#define PF_OSFP_MAX_OPTS \
+ (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \
+ / PF_OSFP_TCPOPT_BITS
+
+ SLIST_ENTRY(pf_os_fingerprint) fp_next;
+};
+
+struct pf_osfp_ioctl {
+ struct pf_osfp_entry fp_os;
+ pf_tcpopts_t fp_tcpopts; /* packed TCP options */
+ u_int16_t fp_wsize; /* TCP window size */
+ u_int16_t fp_psize; /* ip->ip_len */
+ u_int16_t fp_mss; /* TCP MSS */
+ u_int16_t fp_flags;
+ u_int8_t fp_optcnt; /* TCP option count */
+ u_int8_t fp_wscale; /* TCP window scaling */
+ u_int8_t fp_ttl; /* IPv4 TTL */
+
+ int fp_getnum; /* DIOCOSFPGET number */
+};
+
+
+union pf_rule_ptr {
+ struct pf_rule *ptr;
+ u_int32_t nr;
+};
+
+#define PF_ANCHOR_NAME_SIZE 64
+
+struct pf_rule {
+ struct pf_rule_addr src;
+ struct pf_rule_addr dst;
+#define PF_SKIP_IFP 0
+#define PF_SKIP_DIR 1
+#define PF_SKIP_AF 2
+#define PF_SKIP_PROTO 3
+#define PF_SKIP_SRC_ADDR 4
+#define PF_SKIP_SRC_PORT 5
+#define PF_SKIP_DST_ADDR 6
+#define PF_SKIP_DST_PORT 7
+#define PF_SKIP_COUNT 8
+ union pf_rule_ptr skip[PF_SKIP_COUNT];
+#define PF_RULE_LABEL_SIZE 64
+ char label[PF_RULE_LABEL_SIZE];
+ char ifname[IFNAMSIZ];
+ char qname[PF_QNAME_SIZE];
+ char pqname[PF_QNAME_SIZE];
+#define PF_TAG_NAME_SIZE 64
+ char tagname[PF_TAG_NAME_SIZE];
+ char match_tagname[PF_TAG_NAME_SIZE];
+
+ char overload_tblname[PF_TABLE_NAME_SIZE];
+
+ TAILQ_ENTRY(pf_rule) entries;
+ struct pf_pool rpool;
+
+ u_int64_t evaluations;
+ u_int64_t packets[2];
+ u_int64_t bytes[2];
+
+ struct pfi_kif *kif;
+ struct pf_anchor *anchor;
+ struct pfr_ktable *overload_tbl;
+
+ pf_osfp_t os_fingerprint;
+
+ int rtableid;
+ u_int32_t timeout[PFTM_MAX];
+ u_int32_t max_states;
+ u_int32_t max_src_nodes;
+ u_int32_t max_src_states;
+ u_int32_t max_src_conn;
+ struct {
+ u_int32_t limit;
+ u_int32_t seconds;
+ } max_src_conn_rate;
+ u_int32_t qid;
+ u_int32_t pqid;
+ u_int32_t rt_listid;
+ u_int32_t nr;
+ u_int32_t prob;
+ uid_t cuid;
+ pid_t cpid;
+
+ counter_u64_t states_cur;
+ counter_u64_t states_tot;
+ counter_u64_t src_nodes;
+
+ u_int16_t return_icmp;
+ u_int16_t return_icmp6;
+ u_int16_t max_mss;
+ u_int16_t tag;
+ u_int16_t match_tag;
+ u_int16_t scrub_flags;
+
+ struct pf_rule_uid uid;
+ struct pf_rule_gid gid;
+
+ u_int32_t rule_flag;
+ u_int8_t action;
+ u_int8_t direction;
+ u_int8_t log;
+ u_int8_t logif;
+ u_int8_t quick;
+ u_int8_t ifnot;
+ u_int8_t match_tag_not;
+ u_int8_t natpass;
+
+#define PF_STATE_NORMAL 0x1
+#define PF_STATE_MODULATE 0x2
+#define PF_STATE_SYNPROXY 0x3
+ u_int8_t keep_state;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t type;
+ u_int8_t code;
+ u_int8_t flags;
+ u_int8_t flagset;
+ u_int8_t min_ttl;
+ u_int8_t allow_opts;
+ u_int8_t rt;
+ u_int8_t return_ttl;
+ u_int8_t tos;
+ u_int8_t set_tos;
+ u_int8_t anchor_relative;
+ u_int8_t anchor_wildcard;
+
+#define PF_FLUSH 0x01
+#define PF_FLUSH_GLOBAL 0x02
+ u_int8_t flush;
+#define PF_PRIO_ZERO 0xff /* match "prio 0" packets */
+#define PF_PRIO_MAX 7
+ u_int8_t prio;
+ u_int8_t set_prio[2];
+
+ struct {
+ struct pf_addr addr;
+ u_int16_t port;
+ } divert;
+
+ uint64_t u_states_cur;
+ uint64_t u_states_tot;
+ uint64_t u_src_nodes;
+};
+
+/* rule flags */
+#define PFRULE_DROP 0x0000
+#define PFRULE_RETURNRST 0x0001
+#define PFRULE_FRAGMENT 0x0002
+#define PFRULE_RETURNICMP 0x0004
+#define PFRULE_RETURN 0x0008
+#define PFRULE_NOSYNC 0x0010
+#define PFRULE_SRCTRACK 0x0020 /* track source states */
+#define PFRULE_RULESRCTRACK 0x0040 /* per rule */
+#define PFRULE_REFS 0x0080 /* rule has references */
+
+/* scrub flags */
+#define PFRULE_NODF 0x0100
+#define PFRULE_RANDOMID 0x0800
+#define PFRULE_REASSEMBLE_TCP 0x1000
+#define PFRULE_SET_TOS 0x2000
+
+/* rule flags again */
+#define PFRULE_IFBOUND 0x00010000 /* if-bound */
+#define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */
+
+#define PFSTATE_HIWAT 10000 /* default state table size */
+#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */
+#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */
+
+
+struct pf_threshold {
+ u_int32_t limit;
+#define PF_THRESHOLD_MULT 1000
+#define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT
+ u_int32_t seconds;
+ u_int32_t count;
+ u_int32_t last;
+};
+
+struct pf_src_node {
+ LIST_ENTRY(pf_src_node) entry;
+ struct pf_addr addr;
+ struct pf_addr raddr;
+ union pf_rule_ptr rule;
+ struct pfi_kif *kif;
+ u_int64_t bytes[2];
+ u_int64_t packets[2];
+ u_int32_t states;
+ u_int32_t conn;
+ struct pf_threshold conn_rate;
+ u_int32_t creation;
+ u_int32_t expire;
+ sa_family_t af;
+ u_int8_t ruletype;
+};
+
+#define PFSNODE_HIWAT 10000 /* default source node table size */
+
+struct pf_state_scrub {
+ struct timeval pfss_last; /* time received last packet */
+ u_int32_t pfss_tsecr; /* last echoed timestamp */
+ u_int32_t pfss_tsval; /* largest timestamp */
+ u_int32_t pfss_tsval0; /* original timestamp */
+ u_int16_t pfss_flags;
+#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */
+#define PFSS_PAWS 0x0010 /* stricter PAWS checks */
+#define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */
+#define PFSS_DATA_TS 0x0040 /* timestamp on data packets */
+#define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */
+ u_int8_t pfss_ttl; /* stashed TTL */
+ u_int8_t pad;
+ u_int32_t pfss_ts_mod; /* timestamp modulation */
+};
+
+struct pf_state_host {
+ struct pf_addr addr;
+ u_int16_t port;
+ u_int16_t pad;
+};
+
+struct pf_state_peer {
+ struct pf_state_scrub *scrub; /* state is scrubbed */
+ u_int32_t seqlo; /* Max sequence number sent */
+ u_int32_t seqhi; /* Max the other end ACKd + win */
+ u_int32_t seqdiff; /* Sequence number modulator */
+ u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
+ u_int8_t state; /* active state level */
+ u_int8_t wscale; /* window scaling factor */
+ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */
+ u_int8_t pad[1];
+};
+
+/* Keep synced with struct pf_state_key. */
+struct pf_state_key_cmp {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t pad[2];
+};
+
+struct pf_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t pad[2];
+
+ LIST_ENTRY(pf_state_key) entry;
+ TAILQ_HEAD(, pf_state) states[2];
+};
+
+/* Keep synced with struct pf_state. */
+struct pf_state_cmp {
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+ u_int8_t pad[3];
+};
+
+struct pf_state {
+ u_int64_t id;
+ u_int32_t creatorid;
+ u_int8_t direction;
+ u_int8_t pad[3];
+
+ u_int refs;
+ TAILQ_ENTRY(pf_state) sync_list;
+ TAILQ_ENTRY(pf_state) key_list[2];
+ LIST_ENTRY(pf_state) entry;
+ struct pf_state_peer src;
+ struct pf_state_peer dst;
+ union pf_rule_ptr rule;
+ union pf_rule_ptr anchor;
+ union pf_rule_ptr nat_rule;
+ struct pf_addr rt_addr;
+ struct pf_state_key *key[2]; /* addresses stack and wire */
+ struct pfi_kif *kif;
+ struct pfi_kif *rt_kif;
+ struct pf_src_node *src_node;
+ struct pf_src_node *nat_src_node;
+ u_int64_t packets[2];
+ u_int64_t bytes[2];
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t pfsync_time;
+ u_int16_t tag;
+ u_int8_t log;
+ u_int8_t state_flags;
+#define PFSTATE_ALLOWOPTS 0x01
+#define PFSTATE_SLOPPY 0x02
+/* was PFSTATE_PFLOW 0x04 */
+#define PFSTATE_NOSYNC 0x08
+#define PFSTATE_ACK 0x10
+#define PFSTATE_SETPRIO 0x0200
+#define PFSTATE_SETMASK (PFSTATE_SETPRIO)
+ u_int8_t timeout;
+ u_int8_t sync_state; /* PFSYNC_S_x */
+
+ /* XXX */
+ u_int8_t sync_updates;
+ u_int8_t _tail[3];
+};
+
+/*
+ * Unified state structures for pulling states out of the kernel
+ * used by pfsync(4) and the pf(4) ioctl.
+ */
+struct pfsync_state_scrub {
+ u_int16_t pfss_flags;
+ u_int8_t pfss_ttl; /* stashed TTL */
+#define PFSYNC_SCRUB_FLAG_VALID 0x01
+ u_int8_t scrub_flag;
+ u_int32_t pfss_ts_mod; /* timestamp modulation */
+} __packed;
+
+struct pfsync_state_peer {
+ struct pfsync_state_scrub scrub; /* state is scrubbed */
+ u_int32_t seqlo; /* Max sequence number sent */
+ u_int32_t seqhi; /* Max the other end ACKd + win */
+ u_int32_t seqdiff; /* Sequence number modulator */
+ u_int16_t max_win; /* largest window (pre scaling) */
+ u_int16_t mss; /* Maximum segment size option */
+ u_int8_t state; /* active state level */
+ u_int8_t wscale; /* window scaling factor */
+ u_int8_t pad[6];
+} __packed;
+
+struct pfsync_state_key {
+ struct pf_addr addr[2];
+ u_int16_t port[2];
+};
+
+struct pfsync_state {
+ u_int64_t id;
+ char ifname[IFNAMSIZ];
+ struct pfsync_state_key key[2];
+ struct pfsync_state_peer src;
+ struct pfsync_state_peer dst;
+ struct pf_addr rt_addr;
+ u_int32_t rule;
+ u_int32_t anchor;
+ u_int32_t nat_rule;
+ u_int32_t creation;
+ u_int32_t expire;
+ u_int32_t packets[2][2];
+ u_int32_t bytes[2][2];
+ u_int32_t creatorid;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t direction;
+ u_int8_t __spare[2];
+ u_int8_t log;
+ u_int8_t state_flags;
+ u_int8_t timeout;
+ u_int8_t sync_flags;
+ u_int8_t updates;
+} __packed;
+
+#ifdef _KERNEL
+/* pfsync */
+typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t);
+typedef void pfsync_insert_state_t(struct pf_state *);
+typedef void pfsync_update_state_t(struct pf_state *);
+typedef void pfsync_delete_state_t(struct pf_state *);
+typedef void pfsync_clear_states_t(u_int32_t, const char *);
+typedef int pfsync_defer_t(struct pf_state *, struct mbuf *);
+
+extern pfsync_state_import_t *pfsync_state_import_ptr;
+extern pfsync_insert_state_t *pfsync_insert_state_ptr;
+extern pfsync_update_state_t *pfsync_update_state_ptr;
+extern pfsync_delete_state_t *pfsync_delete_state_ptr;
+extern pfsync_clear_states_t *pfsync_clear_states_ptr;
+extern pfsync_defer_t *pfsync_defer_ptr;
+
+void pfsync_state_export(struct pfsync_state *,
+ struct pf_state *);
+
+/* pflog */
+struct pf_ruleset;
+struct pf_pdesc;
+typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t,
+ u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *,
+ struct pf_ruleset *, struct pf_pdesc *, int);
+extern pflog_packet_t *pflog_packet_ptr;
+
+#endif /* _KERNEL */
+
+#define PFSYNC_FLAG_SRCNODE 0x04
+#define PFSYNC_FLAG_NATSRCNODE 0x08
+
+/* for copies to/from network byte order */
+/* ioctl interface also uses network byte order */
+#define pf_state_peer_hton(s,d) do { \
+ (d)->seqlo = htonl((s)->seqlo); \
+ (d)->seqhi = htonl((s)->seqhi); \
+ (d)->seqdiff = htonl((s)->seqdiff); \
+ (d)->max_win = htons((s)->max_win); \
+ (d)->mss = htons((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub) { \
+ (d)->scrub.pfss_flags = \
+ htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \
+ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \
+ (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \
+ } \
+} while (0)
+
+#define pf_state_peer_ntoh(s,d) do { \
+ (d)->seqlo = ntohl((s)->seqlo); \
+ (d)->seqhi = ntohl((s)->seqhi); \
+ (d)->seqdiff = ntohl((s)->seqdiff); \
+ (d)->max_win = ntohs((s)->max_win); \
+ (d)->mss = ntohs((s)->mss); \
+ (d)->state = (s)->state; \
+ (d)->wscale = (s)->wscale; \
+ if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \
+ (d)->scrub != NULL) { \
+ (d)->scrub->pfss_flags = \
+ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \
+ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \
+ (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+ } \
+} while (0)
+
+#define pf_state_counter_hton(s,d) do { \
+ d[0] = htonl((s>>32)&0xffffffff); \
+ d[1] = htonl(s&0xffffffff); \
+} while (0)
+
+#define pf_state_counter_from_pfsync(s) \
+ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1]))
+
+#define pf_state_counter_ntoh(s,d) do { \
+ d = ntohl(s[0]); \
+ d = d<<32; \
+ d += ntohl(s[1]); \
+} while (0)
+
+TAILQ_HEAD(pf_rulequeue, pf_rule);
+
+struct pf_anchor;
+
+struct pf_ruleset {
+ struct {
+ struct pf_rulequeue queues[2];
+ struct {
+ struct pf_rulequeue *ptr;
+ struct pf_rule **ptr_array;
+ u_int32_t rcount;
+ u_int32_t ticket;
+ int open;
+ } active, inactive;
+ } rules[PF_RULESET_MAX];
+ struct pf_anchor *anchor;
+ u_int32_t tticket;
+ int tables;
+ int topen;
+};
+
+RB_HEAD(pf_anchor_global, pf_anchor);
+RB_HEAD(pf_anchor_node, pf_anchor);
+struct pf_anchor {
+ RB_ENTRY(pf_anchor) entry_global;
+ RB_ENTRY(pf_anchor) entry_node;
+ struct pf_anchor *parent;
+ struct pf_anchor_node children;
+ char name[PF_ANCHOR_NAME_SIZE];
+ char path[MAXPATHLEN];
+ struct pf_ruleset ruleset;
+ int refcnt; /* anchor rules */
+ int match; /* XXX: used for pfctl black magic */
+};
+RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+#define PF_RESERVED_ANCHOR "_pf"
+
+#define PFR_TFLAG_PERSIST 0x00000001
+#define PFR_TFLAG_CONST 0x00000002
+#define PFR_TFLAG_ACTIVE 0x00000004
+#define PFR_TFLAG_INACTIVE 0x00000008
+#define PFR_TFLAG_REFERENCED 0x00000010
+#define PFR_TFLAG_REFDANCHOR 0x00000020
+#define PFR_TFLAG_COUNTERS 0x00000040
+/* Adjust masks below when adding flags. */
+#define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \
+ PFR_TFLAG_CONST | \
+ PFR_TFLAG_COUNTERS)
+#define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \
+ PFR_TFLAG_INACTIVE | \
+ PFR_TFLAG_REFERENCED | \
+ PFR_TFLAG_REFDANCHOR)
+#define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \
+ PFR_TFLAG_CONST | \
+ PFR_TFLAG_ACTIVE | \
+ PFR_TFLAG_INACTIVE | \
+ PFR_TFLAG_REFERENCED | \
+ PFR_TFLAG_REFDANCHOR | \
+ PFR_TFLAG_COUNTERS)
+
+struct pf_anchor_stackframe;
+
+struct pfr_table {
+ char pfrt_anchor[MAXPATHLEN];
+ char pfrt_name[PF_TABLE_NAME_SIZE];
+ u_int32_t pfrt_flags;
+ u_int8_t pfrt_fback;
+};
+
+enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED,
+ PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE,
+ PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX };
+
+struct pfr_addr {
+ union {
+ struct in_addr _pfra_ip4addr;
+ struct in6_addr _pfra_ip6addr;
+ } pfra_u;
+ u_int8_t pfra_af;
+ u_int8_t pfra_net;
+ u_int8_t pfra_not;
+ u_int8_t pfra_fback;
+};
+#define pfra_ip4addr pfra_u._pfra_ip4addr
+#define pfra_ip6addr pfra_u._pfra_ip6addr
+
+enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX };
+enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX };
+#define PFR_OP_XPASS PFR_OP_ADDR_MAX
+
+struct pfr_astats {
+ struct pfr_addr pfras_a;
+ u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ long pfras_tzero;
+};
+
+enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX };
+
+struct pfr_tstats {
+ struct pfr_table pfrts_t;
+ u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_match;
+ u_int64_t pfrts_nomatch;
+ long pfrts_tzero;
+ int pfrts_cnt;
+ int pfrts_refcnt[PFR_REFCNT_MAX];
+};
+#define pfrts_name pfrts_t.pfrt_name
+#define pfrts_flags pfrts_t.pfrt_flags
+
+#ifndef _SOCKADDR_UNION_DEFINED
+#define _SOCKADDR_UNION_DEFINED
+union sockaddr_union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+};
+#endif /* _SOCKADDR_UNION_DEFINED */
+
+struct pfr_kcounters {
+ u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+};
+
+SLIST_HEAD(pfr_kentryworkq, pfr_kentry);
+struct pfr_kentry {
+ struct radix_node pfrke_node[2];
+ union sockaddr_union pfrke_sa;
+ SLIST_ENTRY(pfr_kentry) pfrke_workq;
+ struct pfr_kcounters *pfrke_counters;
+ long pfrke_tzero;
+ u_int8_t pfrke_af;
+ u_int8_t pfrke_net;
+ u_int8_t pfrke_not;
+ u_int8_t pfrke_mark;
+};
+
+SLIST_HEAD(pfr_ktableworkq, pfr_ktable);
+RB_HEAD(pfr_ktablehead, pfr_ktable);
+struct pfr_ktable {
+ struct pfr_tstats pfrkt_ts;
+ RB_ENTRY(pfr_ktable) pfrkt_tree;
+ SLIST_ENTRY(pfr_ktable) pfrkt_workq;
+ struct radix_node_head *pfrkt_ip4;
+ struct radix_node_head *pfrkt_ip6;
+ struct pfr_ktable *pfrkt_shadow;
+ struct pfr_ktable *pfrkt_root;
+ struct pf_ruleset *pfrkt_rs;
+ long pfrkt_larg;
+ int pfrkt_nflags;
+};
+#define pfrkt_t pfrkt_ts.pfrts_t
+#define pfrkt_name pfrkt_t.pfrt_name
+#define pfrkt_anchor pfrkt_t.pfrt_anchor
+#define pfrkt_ruleset pfrkt_t.pfrt_ruleset
+#define pfrkt_flags pfrkt_t.pfrt_flags
+#define pfrkt_cnt pfrkt_ts.pfrts_cnt
+#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt
+#define pfrkt_packets pfrkt_ts.pfrts_packets
+#define pfrkt_bytes pfrkt_ts.pfrts_bytes
+#define pfrkt_match pfrkt_ts.pfrts_match
+#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch
+#define pfrkt_tzero pfrkt_ts.pfrts_tzero
+
+/* keep synced with pfi_kif, used in RB_FIND */
+struct pfi_kif_cmp {
+ char pfik_name[IFNAMSIZ];
+};
+
+struct pfi_kif {
+ char pfik_name[IFNAMSIZ];
+ union {
+ RB_ENTRY(pfi_kif) _pfik_tree;
+ LIST_ENTRY(pfi_kif) _pfik_list;
+ } _pfik_glue;
+#define pfik_tree _pfik_glue._pfik_tree
+#define pfik_list _pfik_glue._pfik_list
+ u_int64_t pfik_packets[2][2][2];
+ u_int64_t pfik_bytes[2][2][2];
+ u_int32_t pfik_tzero;
+ u_int pfik_flags;
+ struct ifnet *pfik_ifp;
+ struct ifg_group *pfik_group;
+ u_int pfik_rulerefs;
+ TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs;
+};
+
+#define PFI_IFLAG_REFS 0x0001 /* has state references */
+#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */
+
+struct pf_pdesc {
+ struct {
+ int done;
+ uid_t uid;
+ gid_t gid;
+ } lookup;
+ u_int64_t tot_len; /* Make Mickey money */
+ union {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct icmp *icmp;
+#ifdef INET6
+ struct icmp6_hdr *icmp6;
+#endif /* INET6 */
+ void *any;
+ } hdr;
+
+ struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */
+ struct pf_addr *src; /* src address */
+ struct pf_addr *dst; /* dst address */
+ u_int16_t *sport;
+ u_int16_t *dport;
+ struct pf_mtag *pf_mtag;
+
+ u_int32_t p_len; /* total length of payload */
+
+ u_int16_t *ip_sum;
+ u_int16_t *proto_sum;
+ u_int16_t flags; /* Let SCRUB trigger behavior in
+ * state code. Easier than tags */
+#define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */
+#define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t tos;
+ u_int8_t dir; /* direction */
+ u_int8_t sidx; /* key index for source */
+ u_int8_t didx; /* key index for destination */
+};
+
+/* flags for RDR options */
+#define PF_DPORT_RANGE 0x01 /* Dest port uses range */
+#define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */
+
+/* UDP state enumeration */
+#define PFUDPS_NO_TRAFFIC 0
+#define PFUDPS_SINGLE 1
+#define PFUDPS_MULTIPLE 2
+
+#define PFUDPS_NSTATES 3 /* number of state levels */
+
+#define PFUDPS_NAMES { \
+ "NO_TRAFFIC", \
+ "SINGLE", \
+ "MULTIPLE", \
+ NULL \
+}
+
+/* Other protocol state enumeration */
+#define PFOTHERS_NO_TRAFFIC 0
+#define PFOTHERS_SINGLE 1
+#define PFOTHERS_MULTIPLE 2
+
+#define PFOTHERS_NSTATES 3 /* number of state levels */
+
+#define PFOTHERS_NAMES { \
+ "NO_TRAFFIC", \
+ "SINGLE", \
+ "MULTIPLE", \
+ NULL \
+}
+
+#define ACTION_SET(a, x) \
+ do { \
+ if ((a) != NULL) \
+ *(a) = (x); \
+ } while (0)
+
+#define REASON_SET(a, x) \
+ do { \
+ if ((a) != NULL) \
+ *(a) = (x); \
+ if (x < PFRES_MAX) \
+ counter_u64_add(V_pf_status.counters[x], 1); \
+ } while (0)
+
+struct pf_kstatus {
+ counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */
+ counter_u64_t lcounters[LCNT_MAX]; /* limit counters */
+ counter_u64_t fcounters[FCNT_MAX]; /* state operation counters */
+ counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */
+ uint32_t states;
+ uint32_t src_nodes;
+ uint32_t running;
+ uint32_t since;
+ uint32_t debug;
+ uint32_t hostid;
+ char ifname[IFNAMSIZ];
+ uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+struct pf_divert {
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ } addr;
+ u_int16_t port;
+};
+
+#define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */
+#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */
+
+/*
+ * ioctl parameter structures
+ */
+
+struct pfioc_pooladdr {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t nr;
+ u_int32_t r_num;
+ u_int8_t r_action;
+ u_int8_t r_last;
+ u_int8_t af;
+ char anchor[MAXPATHLEN];
+ struct pf_pooladdr addr;
+};
+
+struct pfioc_rule {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t pool_ticket;
+ u_int32_t nr;
+ char anchor[MAXPATHLEN];
+ char anchor_call[MAXPATHLEN];
+ struct pf_rule rule;
+};
+
+struct pfioc_natlook {
+ struct pf_addr saddr;
+ struct pf_addr daddr;
+ struct pf_addr rsaddr;
+ struct pf_addr rdaddr;
+ u_int16_t sport;
+ u_int16_t dport;
+ u_int16_t rsport;
+ u_int16_t rdport;
+ sa_family_t af;
+ u_int8_t proto;
+ u_int8_t direction;
+};
+
+struct pfioc_state {
+ struct pfsync_state state;
+};
+
+struct pfioc_src_node_kill {
+ sa_family_t psnk_af;
+ struct pf_rule_addr psnk_src;
+ struct pf_rule_addr psnk_dst;
+ u_int psnk_killed;
+};
+
+struct pfioc_state_kill {
+ struct pf_state_cmp psk_pfcmp;
+ sa_family_t psk_af;
+ int psk_proto;
+ struct pf_rule_addr psk_src;
+ struct pf_rule_addr psk_dst;
+ char psk_ifname[IFNAMSIZ];
+ char psk_label[PF_RULE_LABEL_SIZE];
+ u_int psk_killed;
+};
+
+struct pfioc_states {
+ int ps_len;
+ union {
+ caddr_t psu_buf;
+ struct pfsync_state *psu_states;
+ } ps_u;
+#define ps_buf ps_u.psu_buf
+#define ps_states ps_u.psu_states
+};
+
+struct pfioc_src_nodes {
+ int psn_len;
+ union {
+ caddr_t psu_buf;
+ struct pf_src_node *psu_src_nodes;
+ } psn_u;
+#define psn_buf psn_u.psu_buf
+#define psn_src_nodes psn_u.psu_src_nodes
+};
+
+struct pfioc_if {
+ char ifname[IFNAMSIZ];
+};
+
+struct pfioc_tm {
+ int timeout;
+ int seconds;
+};
+
+struct pfioc_limit {
+ int index;
+ unsigned limit;
+};
+
+struct pfioc_altq {
+ u_int32_t action;
+ u_int32_t ticket;
+ u_int32_t nr;
+ struct pf_altq altq;
+};
+
+struct pfioc_qstats {
+ u_int32_t ticket;
+ u_int32_t nr;
+ void *buf;
+ int nbytes;
+ u_int8_t scheduler;
+};
+
+struct pfioc_ruleset {
+ u_int32_t nr;
+ char path[MAXPATHLEN];
+ char name[PF_ANCHOR_NAME_SIZE];
+};
+
+#define PF_RULESET_ALTQ (PF_RULESET_MAX)
+#define PF_RULESET_TABLE (PF_RULESET_MAX+1)
+struct pfioc_trans {
+ int size; /* number of elements */
+ int esize; /* size of each element in bytes */
+ struct pfioc_trans_e {
+ int rs_num;
+ char anchor[MAXPATHLEN];
+ u_int32_t ticket;
+ } *array;
+};
+
+#define PFR_FLAG_ATOMIC 0x00000001 /* unused */
+#define PFR_FLAG_DUMMY 0x00000002
+#define PFR_FLAG_FEEDBACK 0x00000004
+#define PFR_FLAG_CLSTATS 0x00000008
+#define PFR_FLAG_ADDRSTOO 0x00000010
+#define PFR_FLAG_REPLACE 0x00000020
+#define PFR_FLAG_ALLRSETS 0x00000040
+#define PFR_FLAG_ALLMASK 0x0000007F
+#ifdef _KERNEL
+#define PFR_FLAG_USERIOCTL 0x10000000
+#endif
+
+struct pfioc_table {
+ struct pfr_table pfrio_table;
+ void *pfrio_buffer;
+ int pfrio_esize;
+ int pfrio_size;
+ int pfrio_size2;
+ int pfrio_nadd;
+ int pfrio_ndel;
+ int pfrio_nchange;
+ int pfrio_flags;
+ u_int32_t pfrio_ticket;
+};
+#define pfrio_exists pfrio_nadd
+#define pfrio_nzero pfrio_nadd
+#define pfrio_nmatch pfrio_nadd
+#define pfrio_naddr pfrio_size2
+#define pfrio_setflag pfrio_size2
+#define pfrio_clrflag pfrio_nadd
+
+struct pfioc_iface {
+ char pfiio_name[IFNAMSIZ];
+ void *pfiio_buffer;
+ int pfiio_esize;
+ int pfiio_size;
+ int pfiio_nzero;
+ int pfiio_flags;
+};
+
+
+/*
+ * ioctl operations
+ */
+
+#define DIOCSTART _IO ('D', 1)
+#define DIOCSTOP _IO ('D', 2)
+#define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule)
+#define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule)
+#define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule)
+/* XXX cut 8 - 17 */
+#define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill)
+#define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state)
+#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
+#define DIOCGETSTATUS _IOWR('D', 21, struct pf_status)
+#define DIOCCLRSTATUS _IO ('D', 22)
+#define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook)
+#define DIOCSETDEBUG _IOWR('D', 24, u_int32_t)
+#define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states)
+#define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule)
+/* XXX cut 26 - 28 */
+#define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm)
+#define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm)
+#define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state)
+#define DIOCCLRRULECTRS _IO ('D', 38)
+#define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit)
+#define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit)
+#define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill)
+#define DIOCSTARTALTQ _IO ('D', 42)
+#define DIOCSTOPALTQ _IO ('D', 43)
+#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq)
+#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq)
+#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq)
+#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq)
+#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats)
+#define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr)
+#define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr)
+#define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr)
+#define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr)
+#define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr)
+/* XXX cut 55 - 57 */
+#define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset)
+#define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset)
+#define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table)
+#define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table)
+#define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table)
+#define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table)
+#define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table)
+#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table)
+#define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table)
+#define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table)
+#define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table)
+#define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table)
+#define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table)
+#define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table)
+#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table)
+#define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table)
+#define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table)
+#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table)
+#define DIOCOSFPFLUSH _IO('D', 78)
+#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl)
+#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl)
+#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans)
+#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans)
+#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans)
+#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes)
+#define DIOCCLRSRCNODES _IO('D', 85)
+#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t)
+#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface)
+#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface)
+#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface)
+#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill)
+struct pf_ifspeed {
+ char ifname[IFNAMSIZ];
+ u_int32_t baudrate;
+};
+#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed)
+
+#ifdef _KERNEL
+LIST_HEAD(pf_src_node_list, pf_src_node);
+struct pf_srchash {
+ struct pf_src_node_list nodes;
+ struct mtx lock;
+};
+
+struct pf_keyhash {
+ LIST_HEAD(, pf_state_key) keys;
+ struct mtx lock;
+};
+
+struct pf_idhash {
+ LIST_HEAD(, pf_state) states;
+ struct mtx lock;
+};
+
+extern u_long pf_hashmask;
+extern u_long pf_srchashmask;
+#define PF_HASHSIZ (32768)
+VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
+VNET_DECLARE(struct pf_idhash *, pf_idhash);
+#define V_pf_keyhash VNET(pf_keyhash)
+#define V_pf_idhash VNET(pf_idhash)
+VNET_DECLARE(struct pf_srchash *, pf_srchash);
+#define V_pf_srchash VNET(pf_srchash)
+
+#define PF_IDHASH(s) (be64toh((s)->id) % (pf_hashmask + 1))
+
+VNET_DECLARE(void *, pf_swi_cookie);
+#define V_pf_swi_cookie VNET(pf_swi_cookie)
+
+VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]);
+#define V_pf_stateid VNET(pf_stateid)
+
+TAILQ_HEAD(pf_altqqueue, pf_altq);
+VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]);
+#define V_pf_altqs VNET(pf_altqs)
+VNET_DECLARE(struct pf_palist, pf_pabuf);
+#define V_pf_pabuf VNET(pf_pabuf)
+
+VNET_DECLARE(u_int32_t, ticket_altqs_active);
+#define V_ticket_altqs_active VNET(ticket_altqs_active)
+VNET_DECLARE(u_int32_t, ticket_altqs_inactive);
+#define V_ticket_altqs_inactive VNET(ticket_altqs_inactive)
+VNET_DECLARE(int, altqs_inactive_open);
+#define V_altqs_inactive_open VNET(altqs_inactive_open)
+VNET_DECLARE(u_int32_t, ticket_pabuf);
+#define V_ticket_pabuf VNET(ticket_pabuf)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active);
+#define V_pf_altqs_active VNET(pf_altqs_active)
+VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive);
+#define V_pf_altqs_inactive VNET(pf_altqs_inactive)
+
+VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules);
+#define V_pf_unlinked_rules VNET(pf_unlinked_rules)
+
+void pf_initialize(void);
+void pf_mtag_initialize(void);
+void pf_mtag_cleanup(void);
+void pf_cleanup(void);
+
+struct pf_mtag *pf_get_mtag(struct mbuf *);
+
+extern void pf_calc_skip_steps(struct pf_rulequeue *);
+#ifdef ALTQ
+extern void pf_altq_ifnet_event(struct ifnet *, int);
+#endif
+VNET_DECLARE(uma_zone_t, pf_state_z);
+#define V_pf_state_z VNET(pf_state_z)
+VNET_DECLARE(uma_zone_t, pf_state_key_z);
+#define V_pf_state_key_z VNET(pf_state_key_z)
+VNET_DECLARE(uma_zone_t, pf_state_scrub_z);
+#define V_pf_state_scrub_z VNET(pf_state_scrub_z)
+
+extern void pf_purge_thread(void *);
+extern void pf_unload_vnet_purge(void);
+extern void pf_intr(void *);
+extern void pf_purge_expired_src_nodes(void);
+
+extern int pf_unlink_state(struct pf_state *, u_int);
+#define PF_ENTER_LOCKED 0x00000001
+#define PF_RETURN_LOCKED 0x00000002
+extern int pf_state_insert(struct pfi_kif *,
+ struct pf_state_key *,
+ struct pf_state_key *,
+ struct pf_state *);
+extern void pf_free_state(struct pf_state *);
+
+static __inline void
+pf_ref_state(struct pf_state *s)
+{
+
+ refcount_acquire(&s->refs);
+}
+
+static __inline int
+pf_release_state(struct pf_state *s)
+{
+
+ if (refcount_release(&s->refs)) {
+ pf_free_state(s);
+ return (1);
+ } else
+ return (0);
+}
+
+extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t);
+extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *,
+ u_int, int *);
+extern struct pf_src_node *pf_find_src_node(struct pf_addr *,
+ struct pf_rule *, sa_family_t, int);
+extern void pf_unlink_src_node(struct pf_src_node *);
+extern u_int pf_free_src_nodes(struct pf_src_node_list *);
+extern void pf_print_state(struct pf_state *);
+extern void pf_print_flags(u_int8_t);
+extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
+ u_int8_t);
+extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
+ u_int16_t, u_int16_t, u_int8_t);
+
+VNET_DECLARE(struct ifnet *, sync_ifp);
+#define V_sync_ifp VNET(sync_ifp);
+VNET_DECLARE(struct pf_rule, pf_default_rule);
+#define V_pf_default_rule VNET(pf_default_rule)
+extern void pf_addrcpy(struct pf_addr *, struct pf_addr *,
+ u_int8_t);
+void pf_free_rule(struct pf_rule *);
+
+#ifdef INET
+int pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
+#endif /* INET */
+
+#ifdef INET6
+int pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
+void pf_poolmask(struct pf_addr *, struct pf_addr*,
+ struct pf_addr *, struct pf_addr *, u_int8_t);
+void pf_addr_inc(struct pf_addr *, sa_family_t);
+int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *);
+#endif /* INET6 */
+
+u_int32_t pf_new_isn(struct pf_state *);
+void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
+ sa_family_t);
+void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
+ u_int8_t);
+void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t);
+void pf_send_deferred_syn(struct pf_state *);
+int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, sa_family_t);
+int pf_match_addr_range(struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, sa_family_t);
+int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
+
+void pf_normalize_init(void);
+void pf_normalize_cleanup(void);
+int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
+ struct pf_pdesc *);
+void pf_normalize_tcp_cleanup(struct pf_state *);
+int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
+ struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *);
+int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
+ u_short *, struct tcphdr *, struct pf_state *,
+ struct pf_state_peer *, struct pf_state_peer *, int *);
+u_int32_t
+ pf_state_expires(const struct pf_state *);
+void pf_purge_expired_fragments(void);
+int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
+ int);
+int pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *);
+struct pf_state_key *pf_alloc_state_key(int);
+void pfr_initialize(void);
+void pfr_cleanup(void);
+int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
+void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
+ u_int64_t, int, int, int);
+int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t);
+void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
+struct pfr_ktable *
+ pfr_attach_table(struct pf_ruleset *, char *);
+void pfr_detach_table(struct pfr_ktable *);
+int pfr_clr_tables(struct pfr_table *, int *, int);
+int pfr_add_tables(struct pfr_table *, int, int *, int);
+int pfr_del_tables(struct pfr_table *, int, int *, int);
+int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int);
+int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int);
+int pfr_clr_tstats(struct pfr_table *, int, int *, int);
+int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int);
+int pfr_clr_addrs(struct pfr_table *, int *, int);
+int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long);
+int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int *, int *, int *, int, u_int32_t);
+int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int);
+int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int);
+int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *,
+ int);
+int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int);
+int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
+int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
+int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
+ int *, u_int32_t, int);
+
+MALLOC_DECLARE(PFI_MTYPE);
+VNET_DECLARE(struct pfi_kif *, pfi_all);
+#define V_pfi_all VNET(pfi_all)
+
+void pfi_initialize(void);
+void pfi_initialize_vnet(void);
+void pfi_cleanup(void);
+void pfi_cleanup_vnet(void);
+void pfi_kif_ref(struct pfi_kif *);
+void pfi_kif_unref(struct pfi_kif *);
+struct pfi_kif *pfi_kif_find(const char *);
+struct pfi_kif *pfi_kif_attach(struct pfi_kif *, const char *);
+int pfi_kif_match(struct pfi_kif *, struct pfi_kif *);
+void pfi_kif_purge(void);
+int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *,
+ sa_family_t);
+int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t);
+void pfi_dynaddr_remove(struct pfi_dynaddr *);
+void pfi_dynaddr_copyout(struct pf_addr_wrap *);
+void pfi_update_status(const char *, struct pf_status *);
+void pfi_get_ifaces(const char *, struct pfi_kif *, int *);
+int pfi_set_flags(const char *, int);
+int pfi_clear_flags(const char *, int);
+
+int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
+int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
+int pf_addr_cmp(struct pf_addr *, struct pf_addr *,
+ sa_family_t);
+void pf_qid2qname(u_int32_t, char *);
+
+VNET_DECLARE(struct pf_kstatus, pf_status);
+#define V_pf_status VNET(pf_status)
+
+struct pf_limit {
+ uma_zone_t zone;
+ u_int limit;
+};
+VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+#define V_pf_limits VNET(pf_limits)
+
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DECLARE(struct pf_anchor_global, pf_anchors);
+#define V_pf_anchors VNET(pf_anchors)
+VNET_DECLARE(struct pf_anchor, pf_main_anchor);
+#define V_pf_main_anchor VNET(pf_main_anchor)
+#define pf_main_ruleset V_pf_main_anchor.ruleset
+#endif
+
+/* these ruleset functions can be linked into userland programs (pfctl) */
+int pf_get_ruleset_number(u_int8_t);
+void pf_init_ruleset(struct pf_ruleset *);
+int pf_anchor_setup(struct pf_rule *,
+ const struct pf_ruleset *, const char *);
+int pf_anchor_copyout(const struct pf_ruleset *,
+ const struct pf_rule *, struct pfioc_rule *);
+void pf_anchor_remove(struct pf_rule *);
+void pf_remove_if_empty_ruleset(struct pf_ruleset *);
+struct pf_ruleset *pf_find_ruleset(const char *);
+struct pf_ruleset *pf_find_or_create_ruleset(const char *);
+void pf_rs_initialize(void);
+
+/* The fingerprint functions can be linked into userland programs (tcpdump) */
+int pf_osfp_add(struct pf_osfp_ioctl *);
+#ifdef _KERNEL
+struct pf_osfp_enlist *
+ pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int,
+ const struct tcphdr *);
+#endif /* _KERNEL */
+void pf_osfp_flush(void);
+int pf_osfp_get(struct pf_osfp_ioctl *);
+int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
+
+#ifdef _KERNEL
+void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
+
+void pf_step_into_anchor(struct pf_anchor_stackframe *, int *,
+ struct pf_ruleset **, int, struct pf_rule **,
+ struct pf_rule **, int *);
+int pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *,
+ struct pf_ruleset **, int, struct pf_rule **,
+ struct pf_rule **, int *);
+
+int pf_map_addr(u_int8_t, struct pf_rule *,
+ struct pf_addr *, struct pf_addr *,
+ struct pf_addr *, struct pf_src_node **);
+struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
+ int, int, struct pfi_kif *, struct pf_src_node **,
+ struct pf_state_key **, struct pf_state_key **,
+ struct pf_addr *, struct pf_addr *,
+ uint16_t, uint16_t, struct pf_anchor_stackframe *);
+
+struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *,
+ struct pf_addr *, u_int16_t, u_int16_t);
+struct pf_state_key *pf_state_key_clone(struct pf_state_key *);
+#endif /* _KERNEL */
+
+#endif /* _NET_PFVAR_H_ */
diff --git a/freebsd/sys/net/ppp_defs.h b/freebsd/sys/net/ppp_defs.h
index 386a1763..5f6d4106 100644
--- a/freebsd/sys/net/ppp_defs.h
+++ b/freebsd/sys/net/ppp_defs.h
@@ -31,6 +31,8 @@
#ifndef _PPP_DEFS_H_
#define _PPP_DEFS_H_
+#include <sys/_types.h>
+
/*
* The basic PPP frame.
*/
@@ -83,7 +85,7 @@
/*
* Extended asyncmap - allows any character to be escaped.
*/
-typedef u_int32_t ext_accm[8];
+typedef __uint32_t ext_accm[8];
/*
* What to do with network protocol (NP) packets.
@@ -143,8 +145,8 @@ struct ppp_comp_stats {
* the last NP packet was sent or received.
*/
struct ppp_idle {
- time_t xmit_idle; /* time since last NP packet sent */
- time_t recv_idle; /* time since last NP packet received */
+ __time_t xmit_idle; /* time since last NP packet sent */
+ __time_t recv_idle; /* time since last NP packet received */
};
#ifndef __P
diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c
index ba15eb51..2615de65 100644
--- a/freebsd/sys/net/radix.c
+++ b/freebsd/sys/net/radix.c
@@ -58,18 +58,15 @@
#include <net/radix.h>
#endif /* !_KERNEL */
-static int rn_walktree_from(struct radix_node_head *h, void *a, void *m,
- walktree_f_t *f, void *w);
-static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
static struct radix_node
- *rn_insert(void *, struct radix_node_head *, int *,
+ *rn_insert(void *, struct radix_head *, int *,
struct radix_node [2]),
*rn_newpair(void *, int, struct radix_node[2]),
*rn_search(void *, struct radix_node *),
*rn_search_m(void *, struct radix_node *, void *);
+static struct radix_node *rn_addmask(void *, struct radix_mask_head *, int,int);
-static void rn_detachhead_internal(void **head);
-static int rn_inithead_internal(void **head, int off);
+static void rn_detachhead_internal(struct radix_head *);
#define RADIX_MAX_KEY_LEN 32
@@ -81,14 +78,6 @@ static char rn_ones[RADIX_MAX_KEY_LEN] = {
-1, -1, -1, -1, -1, -1, -1, -1,
};
-/*
- * XXX: Compat stuff for old rn_addmask() users
- */
-static struct radix_node_head *mask_rnhead_compat;
-#ifdef _KERNEL
-static struct mtx mask_mtx;
-#endif
-
static int rn_lexobetter(void *m_arg, void *n_arg);
static struct radix_mask *
@@ -225,7 +214,7 @@ rn_refines(void *m_arg, void *n_arg)
* from host routes.
*/
struct radix_node *
-rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
+rn_lookup(void *v_arg, void *m_arg, struct radix_head *head)
{
struct radix_node *x;
caddr_t netmask;
@@ -234,7 +223,7 @@ rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
/*
* Most common case: search exact prefix/mask
*/
- x = rn_addmask_r(m_arg, head->rnh_masks, 1,
+ x = rn_addmask(m_arg, head->rnh_masks, 1,
head->rnh_treetop->rn_offset);
if (x == NULL)
return (NULL);
@@ -287,7 +276,7 @@ rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip)
* Search for longest-prefix match in given @head
*/
struct radix_node *
-rn_match(void *v_arg, struct radix_node_head *head)
+rn_match(void *v_arg, struct radix_head *head)
{
caddr_t v = v_arg;
struct radix_node *t = head->rnh_treetop, *x;
@@ -436,7 +425,7 @@ rn_newpair(void *v, int b, struct radix_node nodes[2])
}
static struct radix_node *
-rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
+rn_insert(void *v_arg, struct radix_head *head, int *dupentry,
struct radix_node nodes[2])
{
caddr_t v = v_arg;
@@ -500,9 +489,9 @@ on1:
}
struct radix_node *
-rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
+rn_addmask(void *n_arg, struct radix_mask_head *maskhead, int search, int skip)
{
- unsigned char *netmask = arg;
+ unsigned char *netmask = n_arg;
unsigned char *cp, *cplim;
struct radix_node *x;
int b = 0, mlen, j;
@@ -515,7 +504,7 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
if (skip == 0)
skip = 1;
if (mlen <= skip)
- return (maskhead->rnh_nodes);
+ return (maskhead->mask_nodes);
bzero(addmask_key, RADIX_MAX_KEY_LEN);
if (skip > 1)
@@ -528,22 +517,22 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
cp--;
mlen = cp - addmask_key;
if (mlen <= skip)
- return (maskhead->rnh_nodes);
+ return (maskhead->mask_nodes);
*addmask_key = mlen;
- x = rn_search(addmask_key, maskhead->rnh_treetop);
+ x = rn_search(addmask_key, maskhead->head.rnh_treetop);
if (bcmp(addmask_key, x->rn_key, mlen) != 0)
- x = 0;
+ x = NULL;
if (x || search)
return (x);
R_Zalloc(x, struct radix_node *, RADIX_MAX_KEY_LEN + 2 * sizeof (*x));
- if ((saved_x = x) == 0)
+ if ((saved_x = x) == NULL)
return (0);
- netmask = cp = (caddr_t)(x + 2);
+ netmask = cp = (unsigned char *)(x + 2);
bcopy(addmask_key, cp, mlen);
- x = rn_insert(cp, maskhead, &maskduplicated, x);
+ x = rn_insert(cp, &maskhead->head, &maskduplicated, x);
if (maskduplicated) {
log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
- Free(saved_x);
+ R_Free(saved_x);
return (x);
}
/*
@@ -571,23 +560,6 @@ rn_addmask_r(void *arg, struct radix_node_head *maskhead, int search, int skip)
return (x);
}
-struct radix_node *
-rn_addmask(void *n_arg, int search, int skip)
-{
- struct radix_node *tt;
-
-#ifdef _KERNEL
- mtx_lock(&mask_mtx);
-#endif
- tt = rn_addmask_r(&mask_rnhead_compat, n_arg, search, skip);
-
-#ifdef _KERNEL
- mtx_unlock(&mask_mtx);
-#endif
-
- return (tt);
-}
-
static int /* XXX: arbitrary ordering for non-contiguous masks */
rn_lexobetter(void *m_arg, void *n_arg)
{
@@ -625,11 +597,11 @@ rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
}
struct radix_node *
-rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+rn_addroute(void *v_arg, void *n_arg, struct radix_head *head,
struct radix_node treenodes[2])
{
caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
- struct radix_node *t, *x = 0, *tt;
+ struct radix_node *t, *x = NULL, *tt;
struct radix_node *saved_tt, *top = head->rnh_treetop;
short b = 0, b_leaf = 0;
int keyduplicated;
@@ -644,7 +616,7 @@ rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
* nodes and possibly save time in calculating indices.
*/
if (netmask) {
- x = rn_addmask_r(netmask, head->rnh_masks, 0, top->rn_offset);
+ x = rn_addmask(netmask, head->rnh_masks, 0, top->rn_offset);
if (x == NULL)
return (0);
b_leaf = x->rn_bit;
@@ -752,7 +724,7 @@ rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
if (m->rm_bit >= b_leaf)
break;
- t->rn_mklist = m; *mp = 0;
+ t->rn_mklist = m; *mp = NULL;
}
on2:
/* Add new route to highest possible ancestor's list */
@@ -799,7 +771,7 @@ on2:
}
struct radix_node *
-rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
+rn_delete(void *v_arg, void *netmask_arg, struct radix_head *head)
{
struct radix_node *t, *p, *x, *tt;
struct radix_mask *m, *saved_m, **mp;
@@ -815,22 +787,22 @@ rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
vlen = LEN(v);
saved_tt = tt;
top = x;
- if (tt == 0 ||
+ if (tt == NULL ||
bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
return (0);
/*
* Delete our route from mask lists.
*/
if (netmask) {
- x = rn_addmask_r(netmask, head->rnh_masks, 1, head_off);
+ x = rn_addmask(netmask, head->rnh_masks, 1, head_off);
if (x == NULL)
return (0);
netmask = x->rn_key;
while (tt->rn_mask != netmask)
- if ((tt = tt->rn_dupedkey) == 0)
+ if ((tt = tt->rn_dupedkey) == NULL)
return (0);
}
- if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
+ if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == NULL)
goto on1;
if (tt->rn_flags & RNF_NORMAL) {
if (m->rm_leaf != tt || m->rm_refs > 0) {
@@ -856,10 +828,10 @@ rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
if (m == saved_m) {
*mp = m->rm_mklist;
- Free(m);
+ R_Free(m);
break;
}
- if (m == 0) {
+ if (m == NULL) {
log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
if (tt->rn_flags & RNF_NORMAL)
return (0); /* Dangling ref to us */
@@ -947,7 +919,7 @@ on1:
struct radix_mask *mm = m->rm_mklist;
x->rn_mklist = 0;
if (--(m->rm_refs) < 0)
- Free(m);
+ R_Free(m);
m = mm;
}
if (m)
@@ -986,8 +958,8 @@ out:
* This is the same as rn_walktree() except for the parameters and the
* exit.
*/
-static int
-rn_walktree_from(struct radix_node_head *h, void *a, void *m,
+int
+rn_walktree_from(struct radix_head *h, void *a, void *m,
walktree_f_t *f, void *w)
{
int error;
@@ -998,6 +970,8 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
int stopping = 0;
int lastb;
+ KASSERT(m != NULL, ("%s: mask needs to be specified", __func__));
+
/*
* rn_search_m is sort-of-open-coded here. We cannot use the
* function because we need to keep track of the last node seen.
@@ -1021,11 +995,11 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
/*
* Two cases: either we stepped off the end of our mask,
* in which case last == rn, or we reached a leaf, in which
- * case we want to start from the last node we looked at.
- * Either way, last is the node we want to start from.
+ * case we want to start from the leaf.
*/
- rn = last;
- lastb = rn->rn_bit;
+ if (rn->rn_bit >= 0)
+ rn = last;
+ lastb = last->rn_bit;
/* printf("rn %p, lastb %d\n", rn, lastb);*/
@@ -1072,7 +1046,7 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
rn = rn->rn_left;
next = rn;
/* Process leaves */
- while ((rn = base) != 0) {
+ while ((rn = base) != NULL) {
base = rn->rn_dupedkey;
/* printf("leaf %p\n", rn); */
if (!(rn->rn_flags & RNF_ROOT)
@@ -1090,8 +1064,8 @@ rn_walktree_from(struct radix_node_head *h, void *a, void *m,
return (0);
}
-static int
-rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
+int
+rn_walktree(struct radix_head *h, walktree_f_t *f, void *w)
{
int error;
struct radix_node *base, *next;
@@ -1130,82 +1104,94 @@ rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
}
/*
- * Allocate and initialize an empty tree. This has 3 nodes, which are
- * part of the radix_node_head (in the order <left,root,right>) and are
+ * Initialize an empty tree. This has 3 nodes, which are passed
+ * via base_nodes (in the order <left,root,right>) and are
* marked RNF_ROOT so they cannot be freed.
* The leaves have all-zero and all-one keys, with significant
* bits starting at 'off'.
- * Return 1 on success, 0 on error.
*/
-static int
-rn_inithead_internal(void **head, int off)
+void
+rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes, int off)
{
- struct radix_node_head *rnh;
struct radix_node *t, *tt, *ttt;
- if (*head)
- return (1);
- R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
- if (rnh == 0)
- return (0);
-#ifdef _KERNEL
- RADIX_NODE_HEAD_LOCK_INIT(rnh);
-#endif
- *head = rnh;
- t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
- ttt = rnh->rnh_nodes + 2;
+
+ t = rn_newpair(rn_zeros, off, base_nodes);
+ ttt = base_nodes + 2;
t->rn_right = ttt;
t->rn_parent = t;
- tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */
+ tt = t->rn_left; /* ... which in turn is base_nodes */
tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
tt->rn_bit = -1 - off;
*ttt = *tt;
ttt->rn_key = rn_ones;
- rnh->rnh_addaddr = rn_addroute;
- rnh->rnh_deladdr = rn_delete;
- rnh->rnh_matchaddr = rn_match;
- rnh->rnh_lookup = rn_lookup;
- rnh->rnh_walktree = rn_walktree;
- rnh->rnh_walktree_from = rn_walktree_from;
- rnh->rnh_treetop = t;
- return (1);
+
+ rh->rnh_treetop = t;
}
static void
-rn_detachhead_internal(void **head)
+rn_detachhead_internal(struct radix_head *head)
{
- struct radix_node_head *rnh;
- KASSERT((head != NULL && *head != NULL),
+ KASSERT((head != NULL),
("%s: head already freed", __func__));
- rnh = *head;
/* Free <left,root,right> nodes. */
- Free(rnh);
-
- *head = NULL;
+ R_Free(head);
}
+/* Functions used by 'struct radix_node_head' users */
+
int
rn_inithead(void **head, int off)
{
struct radix_node_head *rnh;
+ struct radix_mask_head *rmh;
+
+ rnh = *head;
+ rmh = NULL;
if (*head != NULL)
return (1);
- if (rn_inithead_internal(head, off) == 0)
+ R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
+ R_Zalloc(rmh, struct radix_mask_head *, sizeof (*rmh));
+ if (rnh == NULL || rmh == NULL) {
+ if (rnh != NULL)
+ R_Free(rnh);
+ if (rmh != NULL)
+ R_Free(rmh);
return (0);
+ }
- rnh = (struct radix_node_head *)(*head);
+ /* Init trees */
+ rn_inithead_internal(&rnh->rh, rnh->rnh_nodes, off);
+ rn_inithead_internal(&rmh->head, rmh->mask_nodes, 0);
+ *head = rnh;
+ rnh->rh.rnh_masks = rmh;
- if (rn_inithead_internal((void **)&rnh->rnh_masks, 0) == 0) {
- rn_detachhead_internal(head);
- return (0);
- }
+ /* Finally, set base callbacks */
+ rnh->rnh_addaddr = rn_addroute;
+ rnh->rnh_deladdr = rn_delete;
+ rnh->rnh_matchaddr = rn_match;
+ rnh->rnh_lookup = rn_lookup;
+ rnh->rnh_walktree = rn_walktree;
+ rnh->rnh_walktree_from = rn_walktree_from;
return (1);
}
+static int
+rn_freeentry(struct radix_node *rn, void *arg)
+{
+ struct radix_head * const rnh = arg;
+ struct radix_node *x;
+
+ x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+ if (x != NULL)
+ R_Free(x);
+ return (0);
+}
+
int
rn_detachhead(void **head)
{
@@ -1214,29 +1200,14 @@ rn_detachhead(void **head)
KASSERT((head != NULL && *head != NULL),
("%s: head already freed", __func__));
- rnh = *head;
+ rnh = (struct radix_node_head *)(*head);
- rn_detachhead_internal((void **)&rnh->rnh_masks);
- rn_detachhead_internal(head);
- return (1);
-}
+ rn_walktree(&rnh->rh.rnh_masks->head, rn_freeentry, rnh->rh.rnh_masks);
+ rn_detachhead_internal(&rnh->rh.rnh_masks->head);
+ rn_detachhead_internal(&rnh->rh);
-void
-rn_init(int maxk)
-{
- if ((maxk <= 0) || (maxk > RADIX_MAX_KEY_LEN)) {
- log(LOG_ERR,
- "rn_init: max_keylen must be within 1..%d\n",
- RADIX_MAX_KEY_LEN);
- return;
- }
+ *head = NULL;
- /*
- * XXX: Compat for old rn_addmask() users
- */
- if (rn_inithead((void **)(void *)&mask_rnhead_compat, 0) == 0)
- panic("rn_init 2");
-#ifdef _KERNEL
- mtx_init(&mask_mtx, "radix_mask", NULL, MTX_DEF);
-#endif
+ return (1);
}
+
diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h
index 3554c77c..69aad831 100644
--- a/freebsd/sys/net/radix.h
+++ b/freebsd/sys/net/radix.h
@@ -101,52 +101,61 @@ struct radix_mask {
#define rm_mask rm_rmu.rmu_mask
#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */
+struct radix_head;
+
typedef int walktree_f_t(struct radix_node *, void *);
+typedef struct radix_node *rn_matchaddr_f_t(void *v,
+ struct radix_head *head);
+typedef struct radix_node *rn_addaddr_f_t(void *v, void *mask,
+ struct radix_head *head, struct radix_node nodes[]);
+typedef struct radix_node *rn_deladdr_f_t(void *v, void *mask,
+ struct radix_head *head);
+typedef struct radix_node *rn_lookup_f_t(void *v, void *mask,
+ struct radix_head *head);
+typedef int rn_walktree_t(struct radix_head *head, walktree_f_t *f,
+ void *w);
+typedef int rn_walktree_from_t(struct radix_head *head,
+ void *a, void *m, walktree_f_t *f, void *w);
+typedef void rn_close_t(struct radix_node *rn, struct radix_head *head);
+
+struct radix_mask_head;
+
+struct radix_head {
+ struct radix_node *rnh_treetop;
+ struct radix_mask_head *rnh_masks; /* Storage for our masks */
+};
struct radix_node_head {
- struct radix_node *rnh_treetop;
- u_int rnh_gen; /* generation counter */
- int rnh_multipath; /* multipath capable ? */
- int rnh_addrsize; /* permit, but not require fixed keys */
- int rnh_pktsize; /* permit, but not require fixed keys */
- struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
- (void *v, void *mask,
- struct radix_node_head *head, struct radix_node nodes[]);
- struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
- (void *v, void *mask,
- struct radix_node_head *head, struct radix_node nodes[]);
- struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
- (void *v, void *mask, struct radix_node_head *head);
- struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
- (void *v, void *mask, struct radix_node_head *head);
- struct radix_node *(*rnh_matchaddr) /* longest match for sockaddr */
- (void *v, struct radix_node_head *head);
- struct radix_node *(*rnh_lookup) /*exact match for sockaddr*/
- (void *v, void *mask, struct radix_node_head *head);
- struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
- (void *v, struct radix_node_head *head);
- int (*rnh_walktree) /* traverse tree */
- (struct radix_node_head *head, walktree_f_t *f, void *w);
- int (*rnh_walktree_from) /* traverse tree below a */
- (struct radix_node_head *head, void *a, void *m,
- walktree_f_t *f, void *w);
- void (*rnh_close) /* do something when the last ref drops */
- (struct radix_node *rn, struct radix_node_head *head);
+ struct radix_head rh;
+ rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */
+ rn_addaddr_f_t *rnh_addaddr; /* add based on sockaddr*/
+ rn_deladdr_f_t *rnh_deladdr; /* remove based on sockaddr */
+ rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */
+ rn_walktree_t *rnh_walktree; /* traverse tree */
+ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
+ rn_close_t *rnh_close; /*do something when the last ref drops*/
struct radix_node rnh_nodes[3]; /* empty tree for common case */
#ifdef _KERNEL
struct rwlock rnh_lock; /* locks entire radix tree */
#endif
- struct radix_node_head *rnh_masks; /* Storage for our masks */
};
+struct radix_mask_head {
+ struct radix_head head;
+ struct radix_node mask_nodes[3];
+};
+
+void rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes,
+ int off);
+
#ifndef _KERNEL
#define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n)))
#define R_Zalloc(p, t, n) (p = (t) calloc(1,(unsigned int)(n)))
-#define Free(p) free((char *)p);
+#define R_Free(p) free((char *)p);
#else
#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT))
#define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO))
-#define Free(p) free((caddr_t)p, M_RTABLE);
+#define R_Free(p) free((caddr_t)p, M_RTABLE);
#define RADIX_NODE_HEAD_LOCK_INIT(rnh) \
rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0)
@@ -162,18 +171,17 @@ struct radix_node_head {
#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED)
#endif /* _KERNEL */
-void rn_init(int);
int rn_inithead(void **, int);
int rn_detachhead(void **);
int rn_refines(void *, void *);
-struct radix_node
- *rn_addmask(void *, int, int),
- *rn_addmask_r(void *, struct radix_node_head *, int, int),
- *rn_addroute (void *, void *, struct radix_node_head *,
- struct radix_node [2]),
- *rn_delete(void *, void *, struct radix_node_head *),
- *rn_lookup (void *v_arg, void *m_arg,
- struct radix_node_head *head),
- *rn_match(void *, struct radix_node_head *);
+struct radix_node *rn_addroute(void *, void *, struct radix_head *,
+ struct radix_node[2]);
+struct radix_node *rn_delete(void *, void *, struct radix_head *);
+struct radix_node *rn_lookup (void *v_arg, void *m_arg,
+ struct radix_head *head);
+struct radix_node *rn_match(void *, struct radix_head *);
+int rn_walktree_from(struct radix_head *h, void *a, void *m,
+ walktree_f_t *f, void *w);
+int rn_walktree(struct radix_head *, walktree_f_t *, void *);
#endif /* _RADIX_H_ */
diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c
index 1bce388e..f5215205 100644
--- a/freebsd/sys/net/radix_mpath.c
+++ b/freebsd/sys/net/radix_mpath.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <net/radix.h>
#include <net/radix_mpath.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -59,12 +60,19 @@ __FBSDID("$FreeBSD$");
static uint32_t hashjitter;
int
-rn_mpath_capable(struct radix_node_head *rnh)
+rt_mpath_capable(struct rib_head *rnh)
{
return rnh->rnh_multipath;
}
+int
+rn_mpath_capable(struct radix_head *rh)
+{
+
+ return (rt_mpath_capable((struct rib_head *)rh));
+}
+
struct radix_node *
rn_mpath_next(struct radix_node *rn)
{
@@ -91,7 +99,7 @@ rn_mpath_count(struct radix_node *rn)
while (rn != NULL) {
rt = (struct rtentry *)rn;
- i += rt->rt_rmx.rmx_weight;
+ i += rt->rt_weight;
rn = rn_mpath_next(rn);
}
return (i);
@@ -165,14 +173,14 @@ rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
* Assume @rt rt_key host bits are cleared according to @netmask
*/
int
-rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
+rt_mpath_conflict(struct rib_head *rnh, struct rtentry *rt,
struct sockaddr *netmask)
{
struct radix_node *rn, *rn1;
struct rtentry *rt1;
rn = (struct radix_node *)rt;
- rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
+ rn1 = rnh->rnh_lookup(rt_key(rt), netmask, &rnh->head);
if (!rn1 || rn1->rn_flags & RNF_ROOT)
return (0);
@@ -203,18 +211,50 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
return (0);
}
-void
-#ifndef __rtems__
-rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
-#else /* __rtems__ */
-rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
-#endif /* __rtems__ */
+static struct rtentry *
+rt_mpath_selectrte(struct rtentry *rte, uint32_t hash)
{
struct radix_node *rn0, *rn;
- u_int32_t n;
+ uint32_t total_weight;
struct rtentry *rt;
int64_t weight;
+ /* beyond here, we use rn as the master copy */
+ rn0 = rn = (struct radix_node *)rte;
+ rt = rte;
+
+ /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+ total_weight = rn_mpath_count(rn0);
+ hash += hashjitter;
+ hash %= total_weight;
+ for (weight = abs((int32_t)hash);
+ rt != NULL && weight >= rt->rt_weight;
+ weight -= (rt == NULL) ? 0 : rt->rt_weight) {
+
+ /* stay within the multipath routes */
+ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+ break;
+ rn = rn->rn_dupedkey;
+ rt = (struct rtentry *)rn;
+ }
+
+ return (rt);
+}
+
+struct rtentry *
+rt_mpath_select(struct rtentry *rte, uint32_t hash)
+{
+ if (rn_mpath_next((struct radix_node *)rte) == NULL)
+ return (rte);
+
+ return (rt_mpath_selectrte(rte, hash));
+}
+
+void
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+{
+ struct rtentry *rt;
+
/*
* XXX we don't attempt to lookup cached route again; what should
* be done for sendto(3) case?
@@ -232,34 +272,18 @@ rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
return;
}
- /* beyond here, we use rn as the master copy */
- rn0 = rn = (struct radix_node *)ro->ro_rt;
- n = rn_mpath_count(rn0);
-
- /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
- hash += hashjitter;
- hash %= n;
- for (weight = abs((int32_t)hash), rt = ro->ro_rt;
- weight >= rt->rt_rmx.rmx_weight && rn;
- weight -= rt->rt_rmx.rmx_weight) {
-
- /* stay within the multipath routes */
- if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
- break;
- rn = rn->rn_dupedkey;
- rt = (struct rtentry *)rn;
- }
+ rt = rt_mpath_selectrte(ro->ro_rt, hash);
/* XXX try filling rt_gwroute and avoid unreachable gw */
/* gw selection has failed - there must be only zero weight routes */
- if (!rn) {
+ if (!rt) {
RT_UNLOCK(ro->ro_rt);
ro->ro_rt = NULL;
return;
}
if (ro->ro_rt != rt) {
RTFREE_LOCKED(ro->ro_rt);
- ro->ro_rt = (struct rtentry *)rn;
+ ro->ro_rt = rt;
RT_LOCK(ro->ro_rt);
RT_ADDREF(ro->ro_rt);
@@ -274,11 +298,11 @@ extern int in_inithead(void **head, int off);
int
rn4_mpath_inithead(void **head, int off)
{
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
hashjitter = arc4random();
if (in_inithead(head, off) == 1) {
- rnh = (struct radix_node_head *)*head;
+ rnh = (struct rib_head *)*head;
rnh->rnh_multipath = 1;
return 1;
} else
@@ -290,11 +314,11 @@ rn4_mpath_inithead(void **head, int off)
int
rn6_mpath_inithead(void **head, int off)
{
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
hashjitter = arc4random();
if (in6_inithead(head, off) == 1) {
- rnh = (struct radix_node_head *)*head;
+ rnh = (struct rib_head *)*head;
rnh->rnh_multipath = 1;
return 1;
} else
diff --git a/freebsd/sys/net/radix_mpath.h b/freebsd/sys/net/radix_mpath.h
index bcb210e3..2b0d442e 100644
--- a/freebsd/sys/net/radix_mpath.h
+++ b/freebsd/sys/net/radix_mpath.h
@@ -44,16 +44,16 @@
struct route;
struct rtentry;
struct sockaddr;
-int rn_mpath_capable(struct radix_node_head *);
+struct rib_head;
+int rt_mpath_capable(struct rib_head *);
+int rn_mpath_capable(struct radix_head *);
struct radix_node *rn_mpath_next(struct radix_node *);
u_int32_t rn_mpath_count(struct radix_node *);
struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
-int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
+int rt_mpath_conflict(struct rib_head *, struct rtentry *,
struct sockaddr *);
void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
-#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
-struct radix_node *rn_mpath_lookup(void *, void *,
- struct radix_node_head *);
+struct rtentry *rt_mpath_select(struct rtentry *, uint32_t);
int rt_mpath_deldup(struct rtentry *, struct rtentry *);
int rn4_mpath_inithead(void **, int);
int rn6_mpath_inithead(void **, int);
diff --git a/freebsd/sys/net/raw_cb.c b/freebsd/sys/net/raw_cb.c
index 10db8bba..00a199f3 100644
--- a/freebsd/sys/net/raw_cb.c
+++ b/freebsd/sys/net/raw_cb.c
@@ -46,8 +46,8 @@
#include <sys/systm.h>
#include <net/if.h>
-#include <net/raw_cb.h>
#include <net/vnet.h>
+#include <net/raw_cb.h>
/*
* Routines to manage the raw protocol control blocks.
diff --git a/freebsd/sys/net/raw_usrreq.c b/freebsd/sys/net/raw_usrreq.c
index 1030526f..e170ad74 100644
--- a/freebsd/sys/net/raw_usrreq.c
+++ b/freebsd/sys/net/raw_usrreq.c
@@ -48,8 +48,8 @@
#include <sys/systm.h>
#include <net/if.h>
-#include <net/raw_cb.h>
#include <net/vnet.h>
+#include <net/raw_cb.h>
MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb", MTX_DEF);
@@ -85,7 +85,7 @@ raw_input_ext(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
struct mbuf *m = m0;
struct socket *last;
- last = 0;
+ last = NULL;
mtx_lock(&rawcb_mtx);
LIST_FOREACH(rp, &V_rawcb_list, list) {
if (rp->rcb_proto.sp_family != proto->sp_family)
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 781d8bb9..3eb05b94 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -45,7 +45,6 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/syslog.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -57,8 +56,10 @@
#include <sys/kernel.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/vnet.h>
#include <net/flowtable.h>
@@ -75,8 +76,7 @@
#include <sys/file.h>
#endif /* __rtems__ */
-/* We use 4 bits in the mbuf flags, thus we are limited to 16 FIBS. */
-#define RT_MAXFIBS 16
+#define RT_MAXFIBS UINT16_MAX
/* Kernel config default option. */
#ifdef ROUTETABLES
@@ -102,17 +102,7 @@ extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
/* This is read-only.. */
u_int rt_numfibs = RT_NUMFIBS;
-SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
-/*
- * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
- * We can't do more because storage is statically allocated for now.
- * (for compatibility reasons.. this will change. When this changes, code should
- * be refactored to protocol independent parts and protocol dependent parts,
- * probably hanging of domain(9) specific storage to not need the full
- * fib * af RNH allocation etc. but allow tuning the number of tables per
- * address family).
- */
-TUNABLE_INT("net.fibs", &rt_numfibs);
+SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
/*
* By default add routes to all fibs for new interfaces.
@@ -124,25 +114,20 @@ TUNABLE_INT("net.fibs", &rt_numfibs);
* always work given the fib can be overridden and prefixes can be added
* from the network stack context.
*/
-u_int rt_add_addr_allfibs = 1;
-SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
- &rt_add_addr_allfibs, 0, "");
-TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
+VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
+SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
+ &VNET_NAME(rt_add_addr_allfibs), 0, "");
VNET_DEFINE(struct rtstat, rtstat);
#define V_rtstat VNET(rtstat)
-VNET_DEFINE(struct radix_node_head *, rt_tables);
+VNET_DEFINE(struct rib_head *, rt_tables);
#define V_rt_tables VNET(rt_tables)
VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
#define V_rttrash VNET(rttrash)
-/* compare two sockaddr structures */
-#define sa_equal(a1, a2) (((a1)->sa_len == (a2)->sa_len) && \
- (bcmp((a1), (a2), (a1)->sa_len) == 0))
-
/*
* Convert a 'struct radix_node *' to a 'struct rtentry *'.
* The operation can be done safely (in this code) because a
@@ -158,6 +143,28 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
#define V_rtzone VNET(rtzone)
+static int rtrequest1_fib_change(struct rib_head *, struct rt_addrinfo *,
+ struct rtentry **, u_int);
+static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *);
+static int rt_ifdelroute(const struct rtentry *rt, void *arg);
+static struct rtentry *rt_unlinkrte(struct rib_head *rnh,
+ struct rt_addrinfo *info, int *perror);
+static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
+#ifdef RADIX_MPATH
+static struct radix_node *rt_mpath_unlink(struct rib_head *rnh,
+ struct rt_addrinfo *info, struct rtentry *rto, int *perror);
+#endif
+static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
+ int flags);
+
+struct if_mtuinfo
+{
+ struct ifnet *ifp;
+ int mtu;
+};
+
+static int if_updatemtu_cb(struct radix_node *, void *);
+
/*
* handler for net.my_fibnum
*/
@@ -179,10 +186,10 @@ sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
-static __inline struct radix_node_head **
+static __inline struct rib_head **
rt_tables_get_rnh_ptr(int table, int fam)
{
- struct radix_node_head **rnh;
+ struct rib_head **rnh;
KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
__func__));
@@ -190,20 +197,32 @@ rt_tables_get_rnh_ptr(int table, int fam)
__func__));
/* rnh is [fib=0][af=0]. */
- rnh = (struct radix_node_head **)V_rt_tables;
+ rnh = (struct rib_head **)V_rt_tables;
/* Get the offset to the requested table and fam. */
rnh += table * (AF_MAX+1) + fam;
return (rnh);
}
-struct radix_node_head *
+struct rib_head *
rt_tables_get_rnh(int table, int fam)
{
return (*rt_tables_get_rnh_ptr(table, fam));
}
+u_int
+rt_tables_get_gen(int table, int fam)
+{
+ struct rib_head *rnh;
+
+ rnh = *rt_tables_get_rnh_ptr(table, fam);
+ KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d",
+ __func__, table, fam));
+ return (rnh->rnh_gen);
+}
+
+
/*
* route initialization must occur before ip6_init2(), which happenas at
* SI_ORDER_MIDDLE.
@@ -211,36 +230,72 @@ rt_tables_get_rnh(int table, int fam)
static void
route_init(void)
{
- struct domain *dom;
- int max_keylen = 0;
/* whack the tunable ints into line. */
if (rt_numfibs > RT_MAXFIBS)
rt_numfibs = RT_MAXFIBS;
if (rt_numfibs == 0)
rt_numfibs = 1;
+}
+SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
- for (dom = domains; dom; dom = dom->dom_next)
- if (dom->dom_maxrtkey > max_keylen)
- max_keylen = dom->dom_maxrtkey;
+static int
+rtentry_zinit(void *mem, int size, int how)
+{
+ struct rtentry *rt = mem;
+
+ rt->rt_pksent = counter_u64_alloc(how);
+ if (rt->rt_pksent == NULL)
+ return (ENOMEM);
- rn_init(max_keylen); /* init all zeroes, all ones, mask table */
+ RT_LOCK_INIT(rt);
+
+ return (0);
+}
+
+static void
+rtentry_zfini(void *mem, int size)
+{
+ struct rtentry *rt = mem;
+
+ RT_LOCK_DESTROY(rt);
+ counter_u64_free(rt->rt_pksent);
+}
+
+static int
+rtentry_ctor(void *mem, int size, void *arg, int how)
+{
+ struct rtentry *rt = mem;
+
+ bzero(rt, offsetof(struct rtentry, rt_endzero));
+ counter_u64_zero(rt->rt_pksent);
+ rt->rt_chain = NULL;
+
+ return (0);
+}
+
+static void
+rtentry_dtor(void *mem, int size, void *arg)
+{
+ struct rtentry *rt = mem;
+
+ RT_UNLOCK_COND(rt);
}
-SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
static void
vnet_route_init(const void *unused __unused)
{
struct domain *dom;
- struct radix_node_head **rnh;
+ struct rib_head **rnh;
int table;
int fam;
V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
- sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
+ sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO);
- V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
+ V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
+ rtentry_ctor, rtentry_dtor,
+ rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtattach == NULL)
continue;
@@ -250,15 +305,10 @@ vnet_route_init(const void *unused __unused)
if (table != 0 && fam != AF_INET6 && fam != AF_INET)
break;
- /*
- * XXX MRT rtattach will be also called from
- * vfs_export.c but the offset will be 0 (only for
- * AF_INET and AF_INET6 which don't need it anyhow).
- */
rnh = rt_tables_get_rnh_ptr(table, fam);
if (rnh == NULL)
panic("%s: rnh NULL", __func__);
- dom->dom_rtattach((void **)rnh, dom->dom_rtoffset);
+ dom->dom_rtattach((void **)rnh, 0);
}
}
}
@@ -272,7 +322,7 @@ vnet_route_uninit(const void *unused __unused)
int table;
int fam;
struct domain *dom;
- struct radix_node_head **rnh;
+ struct rib_head **rnh;
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtdetach == NULL)
@@ -287,14 +337,68 @@ vnet_route_uninit(const void *unused __unused)
rnh = rt_tables_get_rnh_ptr(table, fam);
if (rnh == NULL)
panic("%s: rnh NULL", __func__);
- dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
+ dom->dom_rtdetach((void **)rnh, 0);
}
}
+
+ free(V_rt_tables, M_RTABLE);
+ uma_zdestroy(V_rtzone);
}
-VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
vnet_route_uninit, 0);
#endif
+struct rib_head *
+rt_table_init(int offset)
+{
+ struct rib_head *rh;
+
+ rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO);
+
+ /* TODO: These details should be hidded inside radix.c */
+ /* Init masks tree */
+ rn_inithead_internal(&rh->head, rh->rnh_nodes, offset);
+ rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0);
+ rh->head.rnh_masks = &rh->rmhead;
+
+ /* Init locks */
+ rw_init(&rh->rib_lock, "rib head lock");
+
+ /* Finally, set base callbacks */
+ rh->rnh_addaddr = rn_addroute;
+ rh->rnh_deladdr = rn_delete;
+ rh->rnh_matchaddr = rn_match;
+ rh->rnh_lookup = rn_lookup;
+ rh->rnh_walktree = rn_walktree;
+ rh->rnh_walktree_from = rn_walktree_from;
+
+ return (rh);
+}
+
+static int
+rt_freeentry(struct radix_node *rn, void *arg)
+{
+ struct radix_head * const rnh = arg;
+ struct radix_node *x;
+
+ x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
+ if (x != NULL)
+ R_Free(x);
+ return (0);
+}
+
+void
+rt_table_destroy(struct rib_head *rh)
+{
+
+ rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
+
+ /* Assume table is already empty */
+ rw_destroy(&rh->rib_lock);
+ free(rh, M_RTABLE);
+}
+
+
#ifndef _SYS_SYSPROTO_H_
struct setfib_args {
int fibnum;
@@ -335,35 +439,6 @@ setfib(int fibnum)
* Packet routing routines.
*/
void
-rtalloc(struct route *ro)
-{
-
- rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB);
-}
-
-void
-rtalloc_fib(struct route *ro, u_int fibnum)
-{
- rtalloc_ign_fib(ro, 0UL, fibnum);
-}
-
-void
-rtalloc_ign(struct route *ro, u_long ignore)
-{
- struct rtentry *rt;
-
- if ((rt = ro->ro_rt) != NULL) {
- if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
- return;
- RTFREE(rt);
- ro->ro_rt = NULL;
- }
- ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB);
- if (ro->ro_rt)
- RT_UNLOCK(ro->ro_rt);
-}
-
-void
rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
{
struct rtentry *rt;
@@ -396,49 +471,32 @@ struct rtentry *
rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
u_int fibnum)
{
- struct radix_node_head *rnh;
+ struct rib_head *rh;
struct radix_node *rn;
struct rtentry *newrt;
struct rt_addrinfo info;
int err = 0, msgtype = RTM_MISS;
- int needlock;
KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
- switch (dst->sa_family) {
- case AF_INET6:
- case AF_INET:
- /* We support multiple FIBs. */
- break;
- default:
- fibnum = RT_DEFAULT_FIB;
- break;
- }
- rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ rh = rt_tables_get_rnh(fibnum, dst->sa_family);
newrt = NULL;
- if (rnh == NULL)
+ if (rh == NULL)
goto miss;
/*
* Look up the address in the table for that Address Family
*/
- needlock = !(ignflags & RTF_RNH_LOCKED);
- if (needlock)
- RADIX_NODE_HEAD_RLOCK(rnh);
-#ifdef INVARIANTS
- else
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
-#endif
- rn = rnh->rnh_matchaddr(dst, rnh);
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr(dst, &rh->head);
if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
newrt = RNTORT(rn);
RT_LOCK(newrt);
RT_ADDREF(newrt);
- if (needlock)
- RADIX_NODE_HEAD_RUNLOCK(rnh);
- goto done;
+ RIB_RUNLOCK(rh);
+ return (newrt);
- } else if (needlock)
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ } else
+ RIB_RUNLOCK(rh);
/*
* Either we hit the root or couldn't find any match,
@@ -457,10 +515,7 @@ miss:
bzero(&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
rt_missmsg_fib(msgtype, &info, 0, err, fibnum);
- }
-done:
- if (newrt)
- RT_LOCK_ASSERT(newrt);
+ }
return (newrt);
}
@@ -471,7 +526,7 @@ done:
void
rtfree(struct rtentry *rt)
{
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
KASSERT(rt != NULL,("%s: NULL rt", __func__));
rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
@@ -499,7 +554,7 @@ rtfree(struct rtentry *rt)
* on the entry so that the code below reclaims the storage.
*/
if (rt->rt_refcnt == 0 && rnh->rnh_close)
- rnh->rnh_close((struct radix_node *)rt, rnh);
+ rnh->rnh_close((struct radix_node *)rt, &rnh->head);
/*
* If we are no longer "up" (and ref == 0)
@@ -531,12 +586,11 @@ rtfree(struct rtentry *rt)
* This also frees the gateway, as they are always malloc'd
* together.
*/
- Free(rt_key(rt));
+ R_Free(rt_key(rt));
/*
* and the rtentry itself of course
*/
- RT_LOCK_DESTROY(rt);
uma_zfree(V_rtzone, rt);
return;
}
@@ -552,17 +606,6 @@ done:
* message from the network layer.
*/
void
-rtredirect(struct sockaddr *dst,
- struct sockaddr *gateway,
- struct sockaddr *netmask,
- int flags,
- struct sockaddr *src)
-{
-
- rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB);
-}
-
-void
rtredirect_fib(struct sockaddr *dst,
struct sockaddr *gateway,
struct sockaddr *netmask,
@@ -570,12 +613,12 @@ rtredirect_fib(struct sockaddr *dst,
struct sockaddr *src,
u_int fibnum)
{
- struct rtentry *rt, *rt0 = NULL;
+ struct rtentry *rt;
int error = 0;
short *stat = NULL;
struct rt_addrinfo info;
struct ifaddr *ifa;
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
ifa = NULL;
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
@@ -585,7 +628,7 @@ rtredirect_fib(struct sockaddr *dst,
}
/* verify the gateway is directly reachable */
- if ((ifa = ifa_ifwithnet_fib(gateway, 0, fibnum)) == NULL) {
+ if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) {
error = ENETUNREACH;
goto out;
}
@@ -596,13 +639,20 @@ rtredirect_fib(struct sockaddr *dst,
* we have a routing loop, perhaps as a result of an interface
* going down recently.
*/
- if (!(flags & RTF_DONE) && rt &&
- (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
- error = EINVAL;
- else if (ifa_ifwithaddr_check(gateway))
+ if (!(flags & RTF_DONE) && rt) {
+ if (!sa_equal(src, rt->rt_gateway)) {
+ error = EINVAL;
+ goto done;
+ }
+ if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) {
+ error = EINVAL;
+ goto done;
+ }
+ }
+ if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) {
error = EHOSTUNREACH;
- if (error)
goto done;
+ }
/*
* Create a new entry if we just got back a wildcard entry
* or the lookup failed. This is necessary for hosts
@@ -622,36 +672,31 @@ rtredirect_fib(struct sockaddr *dst,
* Create new route, rather than smashing route to net.
*/
create:
- rt0 = rt;
- rt = NULL;
+ if (rt != NULL)
+ RTFREE_LOCKED(rt);
- flags |= RTF_GATEWAY | RTF_DYNAMIC;
+ flags |= RTF_DYNAMIC;
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_NETMASK] = netmask;
info.rti_ifa = ifa;
info.rti_flags = flags;
- if (rt0 != NULL)
- RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */
error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
if (rt != NULL) {
RT_LOCK(rt);
- if (rt0 != NULL)
- EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
flags = rt->rt_flags;
}
- if (rt0 != NULL)
- RTFREE(rt0);
stat = &V_rtstat.rts_dynamic;
} else {
- struct rtentry *gwrt;
/*
* Smash the current notion of the gateway to
* this destination. Should check about netmask!!!
*/
+ if ((flags & RTF_GATEWAY) == 0)
+ rt->rt_flags &= ~RTF_GATEWAY;
rt->rt_flags |= RTF_MODIFIED;
flags |= RTF_MODIFIED;
stat = &V_rtstat.rts_newgateway;
@@ -659,13 +704,10 @@ rtredirect_fib(struct sockaddr *dst,
* add the key and gateway (in one malloc'd chunk).
*/
RT_UNLOCK(rt);
- RADIX_NODE_HEAD_LOCK(rnh);
+ RIB_WLOCK(rnh);
RT_LOCK(rt);
rt_setgate(rt, rt_key(rt), gateway);
- gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
- RTFREE_LOCKED(gwrt);
+ RIB_WUNLOCK(rnh);
}
} else
error = EHOSTUNREACH;
@@ -687,13 +729,6 @@ out:
ifa_free(ifa);
}
-int
-rtioctl(u_long req, caddr_t data)
-{
-
- return (rtioctl_fib(req, data, RT_DEFAULT_FIB));
-}
-
/*
* Routing table ioctl interface.
*/
@@ -715,21 +750,11 @@ rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
#endif /* INET */
}
-/*
- * For both ifa_ifwithroute() routines, 'ifa' is returned referenced.
- */
struct ifaddr *
-ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
-{
-
- return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB));
-}
-
-struct ifaddr *
-ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
+ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway,
u_int fibnum)
{
- register struct ifaddr *ifa;
+ struct ifaddr *ifa;
int not_found = 0;
if ((flags & RTF_GATEWAY) == 0) {
@@ -742,7 +767,7 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
*/
ifa = NULL;
if (flags & RTF_HOST)
- ifa = ifa_ifwithdstaddr_fib(dst, fibnum);
+ ifa = ifa_ifwithdstaddr(dst, fibnum);
if (ifa == NULL)
ifa = ifa_ifwithaddr(gateway);
} else {
@@ -751,12 +776,12 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
* or host, the gateway may still be on the
* other end of a pt to pt link.
*/
- ifa = ifa_ifwithdstaddr_fib(gateway, fibnum);
+ ifa = ifa_ifwithdstaddr(gateway, fibnum);
}
if (ifa == NULL)
- ifa = ifa_ifwithnet_fib(gateway, 0, fibnum);
+ ifa = ifa_ifwithnet(gateway, 0, fibnum);
if (ifa == NULL) {
- struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
+ struct rtentry *rt = rtalloc1_fib(gateway, 0, 0, fibnum);
if (rt == NULL)
return (NULL);
/*
@@ -800,19 +825,6 @@ ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
* all the bits of info needed
*/
int
-rtrequest(int req,
- struct sockaddr *dst,
- struct sockaddr *gateway,
- struct sockaddr *netmask,
- int flags,
- struct rtentry **ret_nrt)
-{
-
- return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt,
- RT_DEFAULT_FIB));
-}
-
-int
rtrequest_fib(int req,
struct sockaddr *dst,
struct sockaddr *gateway,
@@ -834,6 +846,443 @@ rtrequest_fib(int req,
return rtrequest1_fib(req, &info, ret_nrt, fibnum);
}
+
+/*
+ * Copy most of @rt data into @info.
+ *
+ * If @flags contains NHR_COPY, copies dst,netmask and gw to the
+ * pointers specified by @info structure. Assume such pointers
+ * are zeroed sockaddr-like structures with sa_len field initialized
+ * to reflect size of the provided buffer. if no NHR_COPY is specified,
+ * point dst,netmask and gw @info fields to appropriate @rt values.
+ *
+ * if @flags contains NHR_REF, do refcouting on rt_ifp.
+ *
+ * Returns 0 on success.
+ */
+int
+rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
+{
+ struct rt_metrics *rmx;
+ struct sockaddr *src, *dst;
+ int sa_len;
+
+ if (flags & NHR_COPY) {
+ /* Copy destination if dst is non-zero */
+ src = rt_key(rt);
+ dst = info->rti_info[RTAX_DST];
+ sa_len = src->sa_len;
+ if (dst != NULL) {
+ if (src->sa_len > dst->sa_len)
+ return (ENOMEM);
+ memcpy(dst, src, src->sa_len);
+ info->rti_addrs |= RTA_DST;
+ }
+
+ /* Copy mask if set && dst is non-zero */
+ src = rt_mask(rt);
+ dst = info->rti_info[RTAX_NETMASK];
+ if (src != NULL && dst != NULL) {
+
+ /*
+ * Radix stores different value in sa_len,
+ * assume rt_mask() to have the same length
+ * as rt_key()
+ */
+ if (sa_len > dst->sa_len)
+ return (ENOMEM);
+ memcpy(dst, src, src->sa_len);
+ info->rti_addrs |= RTA_NETMASK;
+ }
+
+ /* Copy gateway is set && dst is non-zero */
+ src = rt->rt_gateway;
+ dst = info->rti_info[RTAX_GATEWAY];
+ if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
+ if (src->sa_len > dst->sa_len)
+ return (ENOMEM);
+ memcpy(dst, src, src->sa_len);
+ info->rti_addrs |= RTA_GATEWAY;
+ }
+ } else {
+ info->rti_info[RTAX_DST] = rt_key(rt);
+ info->rti_addrs |= RTA_DST;
+ if (rt_mask(rt) != NULL) {
+ info->rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info->rti_addrs |= RTA_NETMASK;
+ }
+ if (rt->rt_flags & RTF_GATEWAY) {
+ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info->rti_addrs |= RTA_GATEWAY;
+ }
+ }
+
+ rmx = info->rti_rmx;
+ if (rmx != NULL) {
+ info->rti_mflags |= RTV_MTU;
+ rmx->rmx_mtu = rt->rt_mtu;
+ }
+
+ info->rti_flags = rt->rt_flags;
+ info->rti_ifp = rt->rt_ifp;
+ info->rti_ifa = rt->rt_ifa;
+
+ if (flags & NHR_REF) {
+ /* Do 'traditional' refcouting */
+ if_ref(info->rti_ifp);
+ }
+
+ return (0);
+}
+
+/*
+ * Lookups up route entry for @dst in RIB database for fib @fibnum.
+ * Exports entry data to @info using rt_exportinfo().
+ *
+ * if @flags contains NHR_REF, refcouting is performed on rt_ifp.
+ * All references can be released later by calling rib_free_info()
+ *
+ * Returns 0 on success.
+ * Returns ENOENT for lookup failure, ENOMEM for export failure.
+ */
+int
+rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
+ uint32_t flowid, struct rt_addrinfo *info)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct rtentry *rt;
+ int error;
+
+ KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, dst->sa_family);
+ if (rh == NULL)
+ return (ENOENT);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rt = RNTORT(rn);
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rt->rt_ifp)) {
+ flags = (flags & NHR_REF) | NHR_COPY;
+ error = rt_exportinfo(rt, info, flags);
+ RIB_RUNLOCK(rh);
+
+ return (error);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+/*
+ * Releases all references acquired by rib_lookup_info() when
+ * called with NHR_REF flags.
+ */
+void
+rib_free_info(struct rt_addrinfo *info)
+{
+
+ if_rele(info->rti_ifp);
+}
+
+/*
+ * Iterates over all existing fibs in system calling
+ * @setwa_f function prior to traversing each fib.
+ * Calls @wa_f function for each element in current fib.
+ * If af is not AF_UNSPEC, iterates over fibs in particular
+ * address family.
+ */
+void
+rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
+ void *arg)
+{
+ struct rib_head *rnh;
+ uint32_t fibnum;
+ int i;
+
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ /* Do we want some specific family? */
+ if (af != AF_UNSPEC) {
+ rnh = rt_tables_get_rnh(fibnum, af);
+ if (rnh == NULL)
+ continue;
+ if (setwa_f != NULL)
+ setwa_f(rnh, fibnum, af, arg);
+
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
+ RIB_WUNLOCK(rnh);
+ continue;
+ }
+
+ for (i = 1; i <= AF_MAX; i++) {
+ rnh = rt_tables_get_rnh(fibnum, i);
+ if (rnh == NULL)
+ continue;
+ if (setwa_f != NULL)
+ setwa_f(rnh, fibnum, i, arg);
+
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
+ RIB_WUNLOCK(rnh);
+ }
+ }
+}
+
+struct rt_delinfo
+{
+ struct rt_addrinfo info;
+ struct rib_head *rnh;
+ struct rtentry *head;
+};
+
+/*
+ * Conditionally unlinks @rn from radix tree based
+ * on info data passed in @arg.
+ */
+static int
+rt_checkdelroute(struct radix_node *rn, void *arg)
+{
+ struct rt_delinfo *di;
+ struct rt_addrinfo *info;
+ struct rtentry *rt;
+ int error;
+
+ di = (struct rt_delinfo *)arg;
+ rt = (struct rtentry *)rn;
+ info = &di->info;
+ error = 0;
+
+ info->rti_info[RTAX_DST] = rt_key(rt);
+ info->rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+
+ rt = rt_unlinkrte(di->rnh, info, &error);
+ if (rt == NULL) {
+ /* Either not allowed or not matched. Skip entry */
+ return (0);
+ }
+
+ /* Entry was unlinked. Add to the list and return */
+ rt->rt_chain = di->head;
+ di->head = rt;
+
+ return (0);
+}
+
+/*
+ * Iterates over all existing fibs in system.
+ * Deletes each element for which @filter_f function returned
+ * non-zero value.
+ * If @af is not AF_UNSPEC, iterates over fibs in particular
+ * address family.
+ */
+void
+rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg)
+{
+ struct rib_head *rnh;
+ struct rt_delinfo di;
+ struct rtentry *rt;
+ uint32_t fibnum;
+ int i, start, end;
+
+ bzero(&di, sizeof(di));
+ di.info.rti_filter = filter_f;
+ di.info.rti_filterdata = arg;
+
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+ /* Do we want some specific family? */
+ if (af != AF_UNSPEC) {
+ start = af;
+ end = af;
+ } else {
+ start = 1;
+ end = AF_MAX;
+ }
+
+ for (i = start; i <= end; i++) {
+ rnh = rt_tables_get_rnh(fibnum, i);
+ if (rnh == NULL)
+ continue;
+ di.rnh = rnh;
+
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
+ RIB_WUNLOCK(rnh);
+
+ if (di.head == NULL)
+ continue;
+
+ /* We might have something to reclaim */
+ while (di.head != NULL) {
+ rt = di.head;
+ di.head = rt->rt_chain;
+ rt->rt_chain = NULL;
+
+ /* TODO std rt -> rt_addrinfo export */
+ di.info.rti_info[RTAX_DST] = rt_key(rt);
+ di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+
+ rt_notifydelete(rt, &di.info);
+ RTFREE_LOCKED(rt);
+ }
+
+ }
+ }
+}
+
+/*
+ * Delete Routes for a Network Interface
+ *
+ * Called for each routing entry via the rnh->rnh_walktree() call above
+ * to delete all route entries referencing a detaching network interface.
+ *
+ * Arguments:
+ * rt pointer to rtentry
+ * arg argument passed to rnh->rnh_walktree() - detaching interface
+ *
+ * Returns:
+ * 0 successful
+ * errno failed - reason indicated
+ */
+static int
+rt_ifdelroute(const struct rtentry *rt, void *arg)
+{
+ struct ifnet *ifp = arg;
+
+ if (rt->rt_ifp != ifp)
+ return (0);
+
+ /*
+ * Protect (sorta) against walktree recursion problems
+ * with cloned routes
+ */
+ if ((rt->rt_flags & RTF_UP) == 0)
+ return (0);
+
+ return (1);
+}
+
+/*
+ * Delete all remaining routes using this interface
+ * Unfortuneatly the only way to do this is to slog through
+ * the entire routing table looking for routes which point
+ * to this interface...oh well...
+ */
+void
+rt_flushifroutes_af(struct ifnet *ifp, int af)
+{
+ KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d",
+ __func__, af, AF_MAX));
+
+ rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp);
+}
+
+void
+rt_flushifroutes(struct ifnet *ifp)
+{
+
+ rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
+}
+
+/*
+ * Conditionally unlinks rtentry matching data inside @info from @rnh.
+ * Returns unlinked, locked and referenced @rtentry on success,
+ * Returns NULL and sets @perror to:
+ * ESRCH - if prefix was not found,
+ * EADDRINUSE - if trying to delete PINNED route without appropriate flag.
+ * ENOENT - if supplied filter function returned 0 (not matched).
+ */
+static struct rtentry *
+rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror)
+{
+ struct sockaddr *dst, *netmask;
+ struct rtentry *rt;
+ struct radix_node *rn;
+
+ dst = info->rti_info[RTAX_DST];
+ netmask = info->rti_info[RTAX_NETMASK];
+
+ rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
+ if (rt == NULL) {
+ *perror = ESRCH;
+ return (NULL);
+ }
+
+ if ((info->rti_flags & RTF_PINNED) == 0) {
+ /* Check if target route can be deleted */
+ if (rt->rt_flags & RTF_PINNED) {
+ *perror = EADDRINUSE;
+ return (NULL);
+ }
+ }
+
+ if (info->rti_filter != NULL) {
+ if (info->rti_filter(rt, info->rti_filterdata) == 0) {
+ /* Not matched */
+ *perror = ENOENT;
+ return (NULL);
+ }
+
+ /*
+ * Filter function requested rte deletion.
+ * Ease the caller work by filling in remaining info
+ * from that particular entry.
+ */
+ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ }
+
+ /*
+ * Remove the item from the tree and return it.
+ * Complain if it is not there and do no more processing.
+ */
+ *perror = ESRCH;
+#ifdef RADIX_MPATH
+ if (rt_mpath_capable(rnh))
+ rn = rt_mpath_unlink(rnh, info, rt, perror);
+ else
+#endif
+ rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
+ if (rn == NULL)
+ return (NULL);
+
+ if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+ panic ("rtrequest delete");
+
+ rt = RNTORT(rn);
+ RT_LOCK(rt);
+ RT_ADDREF(rt);
+ rt->rt_flags &= ~RTF_UP;
+
+ *perror = 0;
+
+ return (rt);
+}
+
+static void
+rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
+{
+ struct ifaddr *ifa;
+
+ /*
+ * give the protocol a chance to keep things in sync.
+ */
+ ifa = rt->rt_ifa;
+ if (ifa != NULL && ifa->ifa_rtrequest != NULL)
+ ifa->ifa_rtrequest(RTM_DELETE, rt, info);
+
+ /*
+ * One more rtentry floating around that is not
+ * linked to the routing table. rttrash will be decremented
+ * when RTFREE(rt) is eventually called.
+ */
+ V_rttrash++;
+}
+
+
/*
* These (questionable) definitions of apparent local variables apply
* to the next two functions. XXXXXX!!!
@@ -845,13 +1294,6 @@ rtrequest_fib(int req,
#define ifpaddr info->rti_info[RTAX_IFP]
#define flags info->rti_flags
-int
-rt_getifa(struct rt_addrinfo *info)
-{
-
- return (rt_getifa_fib(info, RT_DEFAULT_FIB));
-}
-
/*
* Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined,
* it will be referenced so the caller must free it.
@@ -868,7 +1310,7 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
*/
if (info->rti_ifp == NULL && ifpaddr != NULL &&
ifpaddr->sa_family == AF_LINK &&
- (ifa = ifa_ifwithnet_fib(ifpaddr, 0, fibnum)) != NULL) {
+ (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) {
info->rti_ifp = ifa->ifa_ifp;
ifa_free(ifa);
}
@@ -882,10 +1324,10 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
if (sa != NULL && info->rti_ifp != NULL)
info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
else if (dst != NULL && gateway != NULL)
- info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
+ info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
fibnum);
else if (sa != NULL)
- info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
+ info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
fibnum);
}
if ((ifa = info->rti_ifa) != NULL) {
@@ -896,94 +1338,70 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
return (error);
}
-/*
- * Expunges references to a route that's about to be reclaimed.
- * The route must be locked.
- */
-int
-rtexpunge(struct rtentry *rt)
+static int
+if_updatemtu_cb(struct radix_node *rn, void *arg)
{
-#if !defined(RADIX_MPATH)
- struct radix_node *rn;
-#else
- struct rt_addrinfo info;
- int fib;
- struct rtentry *rt0;
-#endif
- struct radix_node_head *rnh;
- struct ifaddr *ifa;
- int error = 0;
-
- /*
- * Find the correct routing tree to use for this Address Family
- */
- rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
- RT_LOCK_ASSERT(rt);
- if (rnh == NULL)
- return (EAFNOSUPPORT);
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+ struct rtentry *rt;
+ struct if_mtuinfo *ifmtu;
-#ifdef RADIX_MPATH
- fib = rt->rt_fibnum;
- bzero(&info, sizeof(info));
- info.rti_ifp = rt->rt_ifp;
- info.rti_flags = RTF_RNH_LOCKED;
- info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
+ rt = (struct rtentry *)rn;
+ ifmtu = (struct if_mtuinfo *)arg;
- RT_UNLOCK(rt);
- error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
+ if (rt->rt_ifp != ifmtu->ifp)
+ return (0);
- if (error == 0 && rt0 != NULL) {
- rt = rt0;
- RT_LOCK(rt);
- } else if (error != 0) {
- RT_LOCK(rt);
- return (error);
+ if (rt->rt_mtu >= ifmtu->mtu) {
+ /* We have to decrease mtu regardless of flags */
+ rt->rt_mtu = ifmtu->mtu;
+ return (0);
}
-#else
+
/*
- * Remove the item from the tree; it should be there,
- * but when callers invoke us blindly it may not (sigh).
+ * New MTU is bigger. Check if are allowed to alter it
*/
- rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
- if (rn == NULL) {
- error = ESRCH;
- goto bad;
+ if ((rt->rt_flags & (RTF_FIXEDMTU | RTF_GATEWAY | RTF_HOST)) != 0) {
+
+ /*
+ * Skip routes with user-supplied MTU and
+ * non-interface routes
+ */
+ return (0);
}
- KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
- ("unexpected flags 0x%x", rn->rn_flags));
- KASSERT(rt == RNTORT(rn),
- ("lookup mismatch, rt %p rn %p", rt, rn));
-#endif /* RADIX_MPATH */
- rt->rt_flags &= ~RTF_UP;
+ /* We are safe to update route MTU */
+ rt->rt_mtu = ifmtu->mtu;
- /*
- * Give the protocol a chance to keep things in sync.
- */
- if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
- struct rt_addrinfo info;
+ return (0);
+}
- bzero((caddr_t)&info, sizeof(info));
- info.rti_flags = rt->rt_flags;
- info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
- ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
- }
+void
+rt_updatemtu(struct ifnet *ifp)
+{
+ struct if_mtuinfo ifmtu;
+ struct rib_head *rnh;
+ int i, j;
+
+ ifmtu.ifp = ifp;
/*
- * one more rtentry floating around that is not
- * linked to the routing table.
+ * Try to update rt_mtu for all routes using this interface
+ * Unfortunately the only way to do this is to traverse all
+ * routing tables in all fibs/domains.
*/
- V_rttrash++;
-#if !defined(RADIX_MPATH)
-bad:
-#endif
- return (error);
+ for (i = 1; i <= AF_MAX; i++) {
+ ifmtu.mtu = if_getmtu_family(ifp, i);
+ for (j = 0; j < rt_numfibs; j++) {
+ rnh = rt_tables_get_rnh(j, i);
+ if (rnh == NULL)
+ continue;
+ RIB_WLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, if_updatemtu_cb, &ifmtu);
+ RIB_WUNLOCK(rnh);
+ }
+ }
}
+
#if 0
int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
int rt_print(char *buf, int buflen, struct rtentry *rt);
@@ -1036,26 +1454,32 @@ rt_print(char *buf, int buflen, struct rtentry *rt)
#endif
#ifdef RADIX_MPATH
-static int
-rn_mpath_update(int req, struct rt_addrinfo *info,
- struct radix_node_head *rnh, struct rtentry **ret_nrt)
+/*
+ * Deletes key for single-path routes, unlinks rtentry with
+ * gateway specified in @info from multi-path routes.
+ *
+ * Returnes unlinked entry. In case of failure, returns NULL
+ * and sets @perror to ESRCH.
+ */
+static struct radix_node *
+rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
+ struct rtentry *rto, int *perror)
{
/*
* if we got multipath routes, we require users to specify
* a matching RTAX_GATEWAY.
*/
- struct rtentry *rt, *rto = NULL;
- register struct radix_node *rn;
- int error = 0;
+ struct rtentry *rt; // *rto = NULL;
+ struct radix_node *rn;
+ struct sockaddr *gw;
- rn = rnh->rnh_lookup(dst, netmask, rnh);
- if (rn == NULL)
- return (ESRCH);
- rto = rt = RNTORT(rn);
+ gw = info->rti_info[RTAX_GATEWAY];
+ rt = rt_mpath_matchgate(rto, gw);
+ if (rt == NULL) {
+ *perror = ESRCH;
+ return (NULL);
+ }
- rt = rt_mpath_matchgate(rt, gateway);
- if (rt == NULL)
- return (ESRCH);
/*
* this is the first entry in the chain
*/
@@ -1078,67 +1502,95 @@ rn_mpath_update(int req, struct rt_addrinfo *info,
* check the case when there is only
* one route in the chain.
*/
- if (gateway &&
- (rt->rt_gateway->sa_len != gateway->sa_len ||
- memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
- error = ESRCH;
- else {
- /*
- * remove from tree before returning it
- * to the caller
- */
- rn = rnh->rnh_deladdr(dst, netmask, rnh);
- KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
- goto gwdelete;
+ if (gw &&
+ (rt->rt_gateway->sa_len != gw->sa_len ||
+ memcmp(rt->rt_gateway, gw, gw->sa_len))) {
+ *perror = ESRCH;
+ return (NULL);
}
-
}
+
/*
* use the normal delete code to remove
* the first entry
*/
- if (req != RTM_DELETE)
- goto nondelete;
-
- error = ENOENT;
- goto done;
+ rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
+ *perror = 0;
+ return (rn);
}
/*
* if the entry is 2nd and on up
*/
- if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+ if (rt_mpath_deldup(rto, rt) == 0)
panic ("rtrequest1: rt_mpath_deldup");
-gwdelete:
- RT_LOCK(rt);
- RT_ADDREF(rt);
- if (req == RTM_DELETE) {
- rt->rt_flags &= ~RTF_UP;
- /*
- * One more rtentry floating around that is not
- * linked to the routing table. rttrash will be decremented
- * when RTFREE(rt) is eventually called.
- */
- V_rttrash++;
+ *perror = 0;
+ rn = (struct radix_node *)rt;
+ return (rn);
+}
+#endif
+
+#ifdef FLOWTABLE
+static struct rtentry *
+rt_flowtable_check_route(struct rib_head *rnh, struct rt_addrinfo *info)
+{
+#if defined(INET6) || defined(INET)
+ struct radix_node *rn;
+#endif
+ struct rtentry *rt0;
+
+ rt0 = NULL;
+ /* "flow-table" only supports IPv6 and IPv4 at the moment. */
+ switch (dst->sa_family) {
+#ifdef INET6
+ case AF_INET6:
+#endif
+#ifdef INET
+ case AF_INET:
+#endif
+#if defined(INET6) || defined(INET)
+ rn = rnh->rnh_matchaddr(dst, &rnh->head);
+ if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ struct sockaddr *mask;
+ u_char *m, *n;
+ int len;
+
+ /*
+ * compare mask to see if the new route is
+ * more specific than the existing one
+ */
+ rt0 = RNTORT(rn);
+ RT_LOCK(rt0);
+ RT_ADDREF(rt0);
+ RT_UNLOCK(rt0);
+ /*
+ * A host route is already present, so
+ * leave the flow-table entries as is.
+ */
+ if (rt0->rt_flags & RTF_HOST) {
+ RTFREE(rt0);
+ rt0 = NULL;
+ } else if (!(flags & RTF_HOST) && netmask) {
+ mask = rt_mask(rt0);
+ len = mask->sa_len;
+ m = (u_char *)mask;
+ n = (u_char *)netmask;
+ while (len-- > 0) {
+ if (*n != *m)
+ break;
+ n++;
+ m++;
+ }
+ if (len == 0 || (*n < *m)) {
+ RTFREE(rt0);
+ rt0 = NULL;
+ }
+ }
+ }
+#endif/* INET6 || INET */
}
-
-nondelete:
- if (req != RTM_DELETE)
- panic("unrecognized request %d", req);
-
- /*
- * If the caller wants it, then it can have it,
- * but it's up to it to free the rtentry as we won't be
- * doing it.
- */
- if (ret_nrt) {
- *ret_nrt = rt;
- RT_UNLOCK(rt);
- } else
- RTFREE_LOCKED(rt);
-done:
- return (error);
+ return (rt0);
}
#endif
@@ -1146,19 +1598,19 @@ int
rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
u_int fibnum)
{
- int error = 0, needlock = 0;
- register struct rtentry *rt;
+ int error = 0;
+ struct rtentry *rt, *rt_old;
#ifdef FLOWTABLE
- register struct rtentry *rt0;
+ struct rtentry *rt0;
#endif
- register struct radix_node *rn;
- register struct radix_node_head *rnh;
+ struct radix_node *rn;
+ struct rib_head *rnh;
struct ifaddr *ifa;
struct sockaddr *ndst;
struct sockaddr_storage mdst;
-#define senderr(x) { error = x ; goto bad; }
KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
+ KASSERT((flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
switch (dst->sa_family) {
case AF_INET6:
case AF_INET:
@@ -1175,12 +1627,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
if (rnh == NULL)
return (EAFNOSUPPORT);
- needlock = ((flags & RTF_RNH_LOCKED) == 0);
- flags &= ~RTF_RNH_LOCKED;
- if (needlock)
- RADIX_NODE_HEAD_LOCK(rnh);
- else
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
+
/*
* If we are adding a host route then we don't want to put
* a netmask in the tree, nor do we want to clone it.
@@ -1194,52 +1641,14 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
dst = (struct sockaddr *)&mdst;
}
-#ifdef RADIX_MPATH
- if (rn_mpath_capable(rnh)) {
- error = rn_mpath_update(req, info, rnh, ret_nrt);
- /*
- * "bad" holds true for the success case
- * as well
- */
- if (error != ENOENT)
- goto bad;
- error = 0;
- }
-#endif
- if ((flags & RTF_PINNED) == 0) {
- /* Check if target route can be deleted */
- rt = (struct rtentry *)rnh->rnh_lookup(dst,
- netmask, rnh);
- if ((rt != NULL) && (rt->rt_flags & RTF_PINNED))
- senderr(EADDRINUSE);
- }
- /*
- * Remove the item from the tree and return it.
- * Complain if it is not there and do no more processing.
- */
- rn = rnh->rnh_deladdr(dst, netmask, rnh);
- if (rn == NULL)
- senderr(ESRCH);
- if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
- panic ("rtrequest delete");
- rt = RNTORT(rn);
- RT_LOCK(rt);
- RT_ADDREF(rt);
- rt->rt_flags &= ~RTF_UP;
+ RIB_WLOCK(rnh);
+ rt = rt_unlinkrte(rnh, info, &error);
+ RIB_WUNLOCK(rnh);
+ if (error != 0)
+ return (error);
- /*
- * give the protocol a chance to keep things in sync.
- */
- if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
- ifa->ifa_rtrequest(RTM_DELETE, rt, info);
-
- /*
- * One more rtentry floating around that is not
- * linked to the routing table. rttrash will be decremented
- * when RTFREE(rt) is eventually called.
- */
- V_rttrash++;
+ rt_notifydelete(rt, info);
/*
* If the caller wants it, then it can have it,
@@ -1260,37 +1669,32 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
break;
case RTM_ADD:
if ((flags & RTF_GATEWAY) && !gateway)
- senderr(EINVAL);
+ return (EINVAL);
if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
(gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
- senderr(EINVAL);
+ return (EINVAL);
if (info->rti_ifa == NULL) {
error = rt_getifa_fib(info, fibnum);
if (error)
- senderr(error);
+ return (error);
} else
ifa_ref(info->rti_ifa);
ifa = info->rti_ifa;
- rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
+ rt = uma_zalloc(V_rtzone, M_NOWAIT);
if (rt == NULL) {
- if (ifa != NULL)
- ifa_free(ifa);
- senderr(ENOBUFS);
+ ifa_free(ifa);
+ return (ENOBUFS);
}
- RT_LOCK_INIT(rt);
rt->rt_flags = RTF_UP | flags;
rt->rt_fibnum = fibnum;
/*
* Add the gateway. Possibly re-malloc-ing the storage for it.
*/
- RT_LOCK(rt);
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
- RT_LOCK_DESTROY(rt);
- if (ifa != NULL)
- ifa_free(ifa);
+ ifa_free(ifa);
uma_zfree(V_rtzone, rt);
- senderr(error);
+ return (error);
}
/*
@@ -1313,111 +1717,81 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
*/
rt->rt_ifa = ifa;
rt->rt_ifp = ifa->ifa_ifp;
- rt->rt_rmx.rmx_weight = 1;
+ rt->rt_weight = 1;
+ rt_setmetrics(info, rt);
+
+ RIB_WLOCK(rnh);
+ RT_LOCK(rt);
#ifdef RADIX_MPATH
/* do not permit exactly the same dst/mask/gw pair */
- if (rn_mpath_capable(rnh) &&
+ if (rt_mpath_capable(rnh) &&
rt_mpath_conflict(rnh, rt, netmask)) {
- if (rt->rt_ifa) {
- ifa_free(rt->rt_ifa);
- }
- Free(rt_key(rt));
- RT_LOCK_DESTROY(rt);
+ RIB_WUNLOCK(rnh);
+
+ ifa_free(rt->rt_ifa);
+ R_Free(rt_key(rt));
uma_zfree(V_rtzone, rt);
- senderr(EEXIST);
+ return (EEXIST);
}
#endif
#ifdef FLOWTABLE
- rt0 = NULL;
- /* "flow-table" only supports IPv6 and IPv4 at the moment. */
- switch (dst->sa_family) {
-#ifdef INET6
- case AF_INET6:
-#endif
-#ifdef INET
- case AF_INET:
-#endif
-#if defined(INET6) || defined(INET)
- rn = rnh->rnh_matchaddr(dst, rnh);
- if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
- struct sockaddr *mask;
- u_char *m, *n;
- int len;
-
- /*
- * compare mask to see if the new route is
- * more specific than the existing one
- */
- rt0 = RNTORT(rn);
- RT_LOCK(rt0);
- RT_ADDREF(rt0);
- RT_UNLOCK(rt0);
- /*
- * A host route is already present, so
- * leave the flow-table entries as is.
- */
- if (rt0->rt_flags & RTF_HOST) {
- RTFREE(rt0);
- rt0 = NULL;
- } else if (!(flags & RTF_HOST) && netmask) {
- mask = rt_mask(rt0);
- len = mask->sa_len;
- m = (u_char *)mask;
- n = (u_char *)netmask;
- while (len-- > 0) {
- if (*n != *m)
- break;
- n++;
- m++;
- }
- if (len == 0 || (*n < *m)) {
- RTFREE(rt0);
- rt0 = NULL;
- }
- }
- }
-#endif/* INET6 || INET */
- }
+ rt0 = rt_flowtable_check_route(rnh, info);
#endif /* FLOWTABLE */
/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
- rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
+ rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
+
+ rt_old = NULL;
+ if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) {
+
+ /*
+ * Force removal and re-try addition
+ * TODO: better multipath&pinned support
+ */
+ struct sockaddr *info_dst = info->rti_info[RTAX_DST];
+ info->rti_info[RTAX_DST] = ndst;
+ /* Do not delete existing PINNED(interface) routes */
+ info->rti_flags &= ~RTF_PINNED;
+ rt_old = rt_unlinkrte(rnh, info, &error);
+ info->rti_flags |= RTF_PINNED;
+ info->rti_info[RTAX_DST] = info_dst;
+ if (rt_old != NULL)
+ rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head,
+ rt->rt_nodes);
+ }
+ RIB_WUNLOCK(rnh);
+
+ if (rt_old != NULL)
+ RT_UNLOCK(rt_old);
+
/*
* If it still failed to go into the tree,
* then un-make it (this should be a function)
*/
if (rn == NULL) {
- if (rt->rt_ifa)
- ifa_free(rt->rt_ifa);
- Free(rt_key(rt));
- RT_LOCK_DESTROY(rt);
+ ifa_free(rt->rt_ifa);
+ R_Free(rt_key(rt));
uma_zfree(V_rtzone, rt);
#ifdef FLOWTABLE
if (rt0 != NULL)
RTFREE(rt0);
#endif
- senderr(EEXIST);
+ return (EEXIST);
}
#ifdef FLOWTABLE
else if (rt0 != NULL) {
- switch (dst->sa_family) {
-#ifdef INET6
- case AF_INET6:
- flowtable_route_flush(V_ip6_ft, rt0);
- break;
-#endif
-#ifdef INET
- case AF_INET:
- flowtable_route_flush(V_ip_ft, rt0);
- break;
-#endif
- }
+ flowtable_route_flush(dst->sa_family, rt0);
RTFREE(rt0);
}
#endif
+ if (rt_old != NULL) {
+ rt_notifydelete(rt_old, info);
+ RTFREE(rt_old);
+ }
+
/*
* If this protocol has something to add to this then
* allow it to do that as well.
@@ -1433,16 +1807,19 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
*ret_nrt = rt;
RT_ADDREF(rt);
}
+ rnh->rnh_gen++; /* Routing table updated */
RT_UNLOCK(rt);
break;
+ case RTM_CHANGE:
+ RIB_WLOCK(rnh);
+ error = rtrequest1_fib_change(rnh, info, ret_nrt, fibnum);
+ RIB_WUNLOCK(rnh);
+ break;
default:
error = EOPNOTSUPP;
}
-bad:
- if (needlock)
- RADIX_NODE_HEAD_UNLOCK(rnh);
+
return (error);
-#undef senderr
}
#undef dst
@@ -1452,20 +1829,147 @@ bad:
#undef ifpaddr
#undef flags
+static int
+rtrequest1_fib_change(struct rib_head *rnh, struct rt_addrinfo *info,
+ struct rtentry **ret_nrt, u_int fibnum)
+{
+ struct rtentry *rt = NULL;
+ int error = 0;
+ int free_ifa = 0;
+ int family, mtu;
+ struct if_mtuinfo ifmtu;
+
+ rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
+ info->rti_info[RTAX_NETMASK], &rnh->head);
+
+ if (rt == NULL)
+ return (ESRCH);
+
+#ifdef RADIX_MPATH
+ /*
+ * If we got multipath routes,
+ * we require users to specify a matching RTAX_GATEWAY.
+ */
+ if (rt_mpath_capable(rnh)) {
+ rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]);
+ if (rt == NULL)
+ return (ESRCH);
+ }
+#endif
+
+ RT_LOCK(rt);
+
+ rt_setmetrics(info, rt);
+
+ /*
+ * New gateway could require new ifaddr, ifp;
+ * flags may also be different; ifp may be specified
+ * by ll sockaddr when protocol address is ambiguous
+ */
+ if (((rt->rt_flags & RTF_GATEWAY) &&
+ info->rti_info[RTAX_GATEWAY] != NULL) ||
+ info->rti_info[RTAX_IFP] != NULL ||
+ (info->rti_info[RTAX_IFA] != NULL &&
+ !sa_equal(info->rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) {
+
+ error = rt_getifa_fib(info, fibnum);
+ if (info->rti_ifa != NULL)
+ free_ifa = 1;
+
+ if (error != 0)
+ goto bad;
+ }
+
+ /* Check if outgoing interface has changed */
+ if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa &&
+ rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) {
+ rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, info);
+ ifa_free(rt->rt_ifa);
+ }
+ /* Update gateway address */
+ if (info->rti_info[RTAX_GATEWAY] != NULL) {
+ error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]);
+ if (error != 0)
+ goto bad;
+
+ rt->rt_flags &= ~RTF_GATEWAY;
+ rt->rt_flags |= (RTF_GATEWAY & info->rti_flags);
+ }
+
+ if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) {
+ ifa_ref(info->rti_ifa);
+ rt->rt_ifa = info->rti_ifa;
+ rt->rt_ifp = info->rti_ifp;
+ }
+ /* Allow some flags to be toggled on change. */
+ rt->rt_flags &= ~RTF_FMASK;
+ rt->rt_flags |= info->rti_flags & RTF_FMASK;
+
+ if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL)
+ rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
+
+ /* Alter route MTU if necessary */
+ if (rt->rt_ifp != NULL) {
+ family = info->rti_info[RTAX_DST]->sa_family;
+ mtu = if_getmtu_family(rt->rt_ifp, family);
+ /* Set default MTU */
+ if (rt->rt_mtu == 0)
+ rt->rt_mtu = mtu;
+ if (rt->rt_mtu != mtu) {
+ /* Check if we really need to update */
+ ifmtu.ifp = rt->rt_ifp;
+ ifmtu.mtu = mtu;
+ if_updatemtu_cb(rt->rt_nodes, &ifmtu);
+ }
+ }
+
+ if (ret_nrt) {
+ *ret_nrt = rt;
+ RT_ADDREF(rt);
+ }
+bad:
+ RT_UNLOCK(rt);
+ if (free_ifa != 0)
+ ifa_free(info->rti_ifa);
+ return (error);
+}
+
+static void
+rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt)
+{
+
+ if (info->rti_mflags & RTV_MTU) {
+ if (info->rti_rmx->rmx_mtu != 0) {
+
+ /*
+ * MTU was explicitly provided by user.
+ * Keep it.
+ */
+ rt->rt_flags |= RTF_FIXEDMTU;
+ } else {
+
+ /*
+ * User explicitly sets MTU to 0.
+ * Assume rollback to default.
+ */
+ rt->rt_flags &= ~RTF_FIXEDMTU;
+ }
+ rt->rt_mtu = info->rti_rmx->rmx_mtu;
+ }
+ if (info->rti_mflags & RTV_WEIGHT)
+ rt->rt_weight = info->rti_rmx->rmx_weight;
+ /* Kernel -> userland timebase conversion. */
+ if (info->rti_mflags & RTV_EXPIRE)
+ rt->rt_expire = info->rti_rmx->rmx_expire ?
+ info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
+}
+
int
rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
{
/* XXX dst may be overwritten, can we move this to below */
int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
-#ifdef INVARIANTS
- struct radix_node_head *rnh;
- rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
-#endif
-
- RT_LOCK_ASSERT(rt);
- RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
-
/*
* Prepare to store the gateway in rt->rt_gateway.
* Both dst and gateway are stored one after the other in the same
@@ -1487,7 +1991,7 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
* Free()/free() handle a NULL argument just fine.
*/
bcopy(dst, new, dlen);
- Free(rt_key(rt)); /* free old block, if any */
+ R_Free(rt_key(rt)); /* free old block, if any */
rt_key(rt) = (struct sockaddr *)new;
rt->rt_gateway = (struct sockaddr *)(new + dlen);
}
@@ -1503,9 +2007,9 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
void
rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
{
- register u_char *cp1 = (u_char *)src;
- register u_char *cp2 = (u_char *)dst;
- register u_char *cp3 = (u_char *)netmask;
+ u_char *cp1 = (u_char *)src;
+ u_char *cp2 = (u_char *)dst;
+ u_char *cp3 = (u_char *)netmask;
u_char *cplim = cp2 + *cp3;
u_char *cplim2 = cp2 + *cp1;
@@ -1537,7 +2041,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
int didwork = 0;
int a_failure = 0;
static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
if (flags & RTF_HOST) {
dst = ifa->ifa_dstaddr;
@@ -1558,13 +2062,13 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
break;
}
if (fibnum == RT_ALL_FIBS) {
- if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
+ if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD)
#ifndef __rtems__
startfib = endfib = ifa->ifa_ifp->if_fib;
#else /* __rtems__ */
startfib = endfib = BSD_DEFAULT_FIB;
#endif /* __rtems__ */
- } else {
+ else {
startfib = 0;
endfib = rt_numfibs - 1;
}
@@ -1609,10 +2113,10 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
if (rnh == NULL)
/* this table doesn't exist but others might */
continue;
- RADIX_NODE_HEAD_RLOCK(rnh);
- rn = rnh->rnh_lookup(dst, netmask, rnh);
+ RIB_RLOCK(rnh);
+ rn = rnh->rnh_lookup(dst, netmask, &rnh->head);
#ifdef RADIX_MPATH
- if (rn_mpath_capable(rnh)) {
+ if (rt_mpath_capable(rnh)) {
if (rn == NULL)
error = ESRCH;
@@ -1635,7 +2139,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
error = (rn == NULL ||
(rn->rn_flags & RNF_ROOT) ||
RNTORT(rn)->rt_ifa != ifa);
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
if (error) {
/* this is only an error if bad on ALL tables */
continue;
@@ -1660,32 +2164,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
info.rti_info[RTAX_NETMASK] = netmask;
error = rtrequest1_fib(cmd, &info, &rt, fibnum);
- if ((error == EEXIST) && (cmd == RTM_ADD)) {
- /*
- * Interface route addition failed.
- * Atomically delete current prefix generating
- * RTM_DELETE message, and retry adding
- * interface prefix.
- */
- rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
- RADIX_NODE_HEAD_LOCK(rnh);
-
- /* Delete old prefix */
- info.rti_ifa = NULL;
- info.rti_flags = RTF_RNH_LOCKED;
-
- error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
- if (error == 0) {
- info.rti_ifa = ifa;
- info.rti_flags = flags | RTF_RNH_LOCKED |
- (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
- error = rtrequest1_fib(cmd, &info, &rt, fibnum);
- }
-
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
-
-
if (error == 0 && rt != NULL) {
/*
* notify any listening routing agents of the change
@@ -1760,15 +2238,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
return (error);
}
-#ifndef BURN_BRIDGES
-/* special one for inet internal use. may not use. */
-int
-rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
-{
- return (rtinit1(ifa, cmd, flags, RT_ALL_FIBS));
-}
-#endif
-
/*
* Set up a routing table entry, normally
* for an interface.
diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h
index 0baa9a4c..d44dc9d5 100644
--- a/freebsd/sys/net/route.h
+++ b/freebsd/sys/net/route.h
@@ -33,6 +33,9 @@
#ifndef _NET_ROUTE_H_
#define _NET_ROUTE_H_
+#include <sys/counter.h>
+#include <net/vnet.h>
+
/*
* Kernel resident routing tables.
*
@@ -41,32 +44,39 @@
*/
/*
- * A route consists of a destination address, a reference
- * to a routing entry, and a reference to an llentry.
- * These are often held by protocols in their control
- * blocks, e.g. inpcb.
+ * Struct route consiste of a destination address,
+ * a route entry pointer, link-layer prepend data pointer along
+ * with its length.
*/
struct route {
struct rtentry *ro_rt;
struct llentry *ro_lle;
- struct in_ifaddr *ro_ia;
- int ro_flags;
+ /*
+ * ro_prepend and ro_plen are only used for bpf to pass in a
+ * preformed header. They are not cacheable.
+ */
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
+ uint16_t ro_mtu; /* saved ro_rt mtu */
+ uint16_t spare;
struct sockaddr ro_dst;
};
+#define RT_L2_ME_BIT 2 /* dst L2 addr is our address */
+#define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */
+#define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */
+
#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
+#define RT_L2_ME (1 << RT_L2_ME_BIT) /* 0x0004 */
+#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) /* 0x0008 */
+#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) /* 0x0010 */
-/*
- * These numbers are used by reliable protocols for determining
- * retransmission behavior and are included in the routing structure.
- */
-struct rt_metrics_lite {
- u_long rmx_mtu; /* MTU for this path */
- u_long rmx_expire; /* lifetime for route, e.g. redirect */
- u_long rmx_pksent; /* packets sent using this route */
- u_long rmx_weight; /* absolute weight */
-};
+#define RT_REJECT 0x0020 /* Destination is reject */
+#define RT_BLACKHOLE 0x0040 /* Destination is blackhole */
+#define RT_HAS_GW 0x0080 /* Destination has GW */
+#define RT_LLE_CACHE 0x0100 /* Cache link layer */
struct rt_metrics {
u_long rmx_locks; /* Kernel must leave these values alone */
@@ -91,14 +101,24 @@ struct rt_metrics {
#define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */
#define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ))
+/* lle state is exported in rmx_state rt_metrics field */
+#define rmx_state rmx_weight
+
+/*
+ * Keep a generation count of routing table, incremented on route addition,
+ * so we can invalidate caches. This is accessed without a lock, as precision
+ * is not required.
+ */
+typedef volatile u_int rt_gen_t; /* tree generation (for adds) */
+#define RT_GEN(fibnum, af) rt_tables_get_gen(fibnum, af)
+
#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */
#define RT_ALL_FIBS -1 /* Announce event for every fib */
+#ifdef _KERNEL
extern u_int rt_numfibs; /* number of usable routing tables */
-extern u_int rt_add_addr_allfibs; /* Announce interfaces to all fibs */
-/*
- * XXX kernel function pointer `rt_output' is visible to applications.
- */
-struct mbuf;
+VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
+#define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs)
+#endif
/*
* We distinguish between routes to hosts and routes to networks,
@@ -114,6 +134,8 @@ struct mbuf;
#include <net/radix_mpath.h>
#endif
#endif
+
+#if defined(_KERNEL) || defined(_WANT_RTENTRY)
struct rtentry {
struct radix_node rt_nodes[2]; /* tree glue, and other values */
/*
@@ -124,33 +146,20 @@ struct rtentry {
#define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
#define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
struct sockaddr *rt_gateway; /* value */
- int rt_flags; /* up/down?, host/net */
- int rt_refcnt; /* # held references */
struct ifnet *rt_ifp; /* the answer: interface to use */
struct ifaddr *rt_ifa; /* the answer: interface address to use */
- struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */
- u_int rt_fibnum; /* which FIB */
-#ifdef _KERNEL
- /* XXX ugly, user apps use this definition but don't have a mtx def */
- struct mtx rt_mtx; /* mutex for routing entry */
-#endif
+ int rt_flags; /* up/down?, host/net */
+ int rt_refcnt; /* # held references */
+ u_int rt_fibnum; /* which FIB */
+ u_long rt_mtu; /* MTU for this path */
+ u_long rt_weight; /* absolute weight */
+ u_long rt_expire; /* lifetime for route, e.g. redirect */
+#define rt_endzero rt_pksent
+ counter_u64_t rt_pksent; /* packets sent using this route */
+ struct mtx rt_mtx; /* mutex for routing entry */
+ struct rtentry *rt_chain; /* pointer to next rtentry to delete */
};
-
-/*
- * Following structure necessary for 4.3 compatibility;
- * We should eventually move it to a compat file.
- */
-struct ortentry {
- u_long rt_hash; /* to speed lookups */
- struct sockaddr rt_dst; /* key */
- struct sockaddr rt_gateway; /* value */
- short rt_flags; /* up/down?, host/net */
- short rt_refcnt; /* # held references */
- u_long rt_use; /* raw # packets forwarded */
- struct ifnet *rt_ifp; /* the answer: interface to use */
-};
-
-#define rt_use rt_rmx.rmx_pksent
+#endif /* _KERNEL || _WANT_RTENTRY */
#define RTF_UP 0x1 /* route usable */
#define RTF_GATEWAY 0x2 /* destination is a gateway */
@@ -169,15 +178,10 @@ struct ortentry {
#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */
#define RTF_PROTO2 0x4000 /* protocol specific routing flag */
#define RTF_PROTO1 0x8000 /* protocol specific routing flag */
-
-/* XXX: temporary to stay API/ABI compatible with userland */
-#ifndef _KERNEL
-#define RTF_PRCLONING 0x10000 /* unused, for compatibility */
-#endif
-
+/* 0x10000 unused, was RTF_PRCLONING */
/* 0x20000 unused, was RTF_WASCLONED */
#define RTF_PROTO3 0x40000 /* protocol specific routing flag */
-/* 0x80000 unused */
+#define RTF_FIXEDMTU 0x80000 /* MTU was explicitly specified */
#define RTF_PINNED 0x100000 /* route is immutable */
#define RTF_LOCAL 0x200000 /* route represents a local address */
#define RTF_BROADCAST 0x400000 /* route represents a bcast address */
@@ -185,7 +189,10 @@ struct ortentry {
/* 0x8000000 and up unassigned */
#define RTF_STICKY 0x10000000 /* always route dst->src */
-#define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */
+#define RTF_RNH_LOCKED 0x40000000 /* unused */
+
+#define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting
+ with existing routing apps */
/* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
#define RTF_FMASK \
@@ -193,6 +200,40 @@ struct ortentry {
RTF_REJECT | RTF_STATIC | RTF_STICKY)
/*
+ * fib_ nexthop API flags.
+ */
+
+/* Consumer-visible nexthop info flags */
+#define NHF_REJECT 0x0010 /* RTF_REJECT */
+#define NHF_BLACKHOLE 0x0020 /* RTF_BLACKHOLE */
+#define NHF_REDIRECT 0x0040 /* RTF_DYNAMIC|RTF_MODIFIED */
+#define NHF_DEFAULT 0x0080 /* Default route */
+#define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */
+#define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */
+
+/* Nexthop request flags */
+#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
+#define NHR_REF 0x02 /* For future use */
+
+/* Control plane route request flags */
+#define NHR_COPY 0x100 /* Copy rte data */
+
+#ifdef _KERNEL
+/* rte<>ro_flags translation */
+static inline void
+rt_update_ro_flags(struct route *ro)
+{
+ int rt_flags = ro->ro_rt->rt_flags;
+
+ ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
+
+ ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0;
+ ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0;
+ ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0;
+}
+#endif
+
+/*
* Routing statistics.
*/
struct rtstat {
@@ -233,8 +274,8 @@ struct rt_msghdr {
#define RTM_REDIRECT 0x6 /* Told to use different route */
#define RTM_MISS 0x7 /* Lookup failed on this address */
#define RTM_LOCK 0x8 /* fix specified metrics */
-#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */
-#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */
+ /* 0x9 */
+ /* 0xa */
#define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */
#define RTM_NEWADDR 0xc /* address being added to iface */
#define RTM_DELADDR 0xd /* address being removed from iface */
@@ -282,12 +323,19 @@ struct rt_msghdr {
#define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */
#define RTAX_MAX 8 /* size of array to allocate */
+typedef int rt_filter_f_t(const struct rtentry *, void *);
+
struct rt_addrinfo {
- int rti_addrs;
- struct sockaddr *rti_info[RTAX_MAX];
- int rti_flags;
- struct ifaddr *rti_ifa;
- struct ifnet *rti_ifp;
+ int rti_addrs; /* Route RTF_ flags */
+ int rti_flags; /* Route RTF_ flags */
+ struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */
+ struct ifaddr *rti_ifa; /* value of rt_ifa addr */
+ struct ifnet *rti_ifp; /* route interface */
+ rt_filter_f_t *rti_filter; /* filter function */
+ void *rti_filterdata; /* filter paramenters */
+ u_long rti_mflags; /* metrics RTV_ flags */
+ u_long rti_spare; /* Will be used for fib */
+ struct rt_metrics *rti_rmx; /* Pointer to route metrics */
};
/*
@@ -302,17 +350,25 @@ struct rt_addrinfo {
sizeof(long) : \
1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) )
+#define sa_equal(a, b) ( \
+ (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \
+ (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0))
+
#ifdef _KERNEL
#define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
|| (ifp)->if_link_state == LINK_STATE_UP)
#define RT_LOCK_INIT(_rt) \
- mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
+ mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW)
#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx)
#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx)
#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx)
#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
+#define RT_UNLOCK_COND(_rt) do { \
+ if (mtx_owned(&(_rt)->rt_mtx)) \
+ mtx_unlock(&(_rt)->rt_mtx); \
+} while (0)
#define RT_ADDREF(_rt) do { \
RT_LOCK_ASSERT(_rt); \
@@ -349,6 +405,7 @@ struct rt_addrinfo {
if ((_ro)->ro_flags & RT_NORTREF) { \
(_ro)->ro_flags &= ~RT_NORTREF; \
(_ro)->ro_rt = NULL; \
+ (_ro)->ro_lle = NULL; \
} else { \
RT_LOCK((_ro)->ro_rt); \
RTFREE_LOCKED((_ro)->ro_rt); \
@@ -356,9 +413,24 @@ struct rt_addrinfo {
} \
} while (0)
-struct radix_node_head *rt_tables_get_rnh(int, int);
+/*
+ * Validate a cached route based on a supplied cookie. If there is an
+ * out-of-date cache, simply free it. Update the generation number
+ * for the new allocation
+ */
+#define RT_VALIDATE(ro, cookiep, fibnum) do { \
+ rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \
+ if (*(cookiep) != cookie) { \
+ if ((ro)->ro_rt != NULL) { \
+ RTFREE((ro)->ro_rt); \
+ (ro)->ro_rt = NULL; \
+ } \
+ *(cookiep) = cookie; \
+ } \
+} while (0)
struct ifmultiaddr;
+struct rib_head;
void rt_ieee80211msg(struct ifnet *, int, void *, size_t);
void rt_ifannouncemsg(struct ifnet *, int);
@@ -372,6 +444,9 @@ int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
void rt_newmaddrmsg(int, struct ifmultiaddr *);
int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
+struct rib_head *rt_table_init(int);
+void rt_table_destroy(struct rib_head *);
+u_int rt_tables_get_gen(int table, int fam);
int rtsock_addrmsg(int, struct ifaddr *, int);
int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
@@ -379,8 +454,6 @@ int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
/*
* Note the following locking behavior:
*
- * rtalloc_ign() and rtalloc() return ro->ro_rt unlocked
- *
* rtalloc1() returns a locked rtentry
*
* rtfree() and RTFREE_LOCKED() require a locked rtentry
@@ -388,27 +461,20 @@ int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
* RTFREE() uses an unlocked entry.
*/
-int rtexpunge(struct rtentry *);
void rtfree(struct rtentry *);
-int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *);
+void rt_updatemtu(struct ifnet *);
+
+typedef int rt_walktree_f_t(struct rtentry *, void *);
+typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *);
+void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
+void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
+void rt_flushifroutes_af(struct ifnet *, int);
+void rt_flushifroutes(struct ifnet *ifp);
/* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */
/* Thes are used by old code not yet converted to use multiple FIBS */
-int rt_getifa(struct rt_addrinfo *);
-void rtalloc_ign(struct route *ro, u_long ignflags);
-void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */
struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
int rtinit(struct ifaddr *, int, int);
-int rtioctl(u_long, caddr_t);
-void rtredirect(struct sockaddr *, struct sockaddr *,
- struct sockaddr *, int, struct sockaddr *);
-int rtrequest(int, struct sockaddr *,
- struct sockaddr *, struct sockaddr *, int, struct rtentry **);
-
-#ifndef BURN_BRIDGES
-/* defaults to "all" FIBs */
-int rtinit_fib(struct ifaddr *, int, int);
-#endif
/* XXX MRT NEW VERSIONS THAT USE FIBs
* For now the protocol indepedent versions are the same as the AF_INET ones
@@ -416,7 +482,6 @@ int rtinit_fib(struct ifaddr *, int, int);
*/
int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum);
void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
-void rtalloc_fib(struct route *ro, u_int fibnum);
struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
int rtioctl_fib(u_long, caddr_t, u_int);
void rtredirect_fib(struct sockaddr *, struct sockaddr *,
@@ -424,13 +489,10 @@ void rtredirect_fib(struct sockaddr *, struct sockaddr *,
int rtrequest_fib(int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
+int rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t,
+ struct rt_addrinfo *);
+void rib_free_info(struct rt_addrinfo *info);
-#include <sys/eventhandler.h>
-typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *);
-typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *);
-/* route_arp_update_event is no longer generated; see arp_update_event */
-EVENTHANDLER_DECLARE(route_arp_update_event, rtevent_arp_update_fn);
-EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn);
#endif
#endif
diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h
new file mode 100644
index 00000000..a8ef56a5
--- /dev/null
+++ b/freebsd/sys/net/route_var.h
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 2015-2016
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_ROUTE_VAR_H_
+#define _NET_ROUTE_VAR_H_
+
+struct rib_head {
+ struct radix_head head;
+ rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */
+ rn_addaddr_f_t *rnh_addaddr; /* add based on sockaddr*/
+ rn_deladdr_f_t *rnh_deladdr; /* remove based on sockaddr */
+ rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */
+ rn_walktree_t *rnh_walktree; /* traverse tree */
+ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
+ rn_close_t *rnh_close; /*do something when the last ref drops*/
+ rt_gen_t rnh_gen; /* generation counter */
+ int rnh_multipath; /* multipath capable ? */
+ struct radix_node rnh_nodes[3]; /* empty tree for common case */
+ struct rwlock rib_lock; /* config/data path lock */
+ struct radix_mask_head rmhead; /* masks radix head */
+};
+
+#define RIB_RLOCK(rh) rw_rlock(&(rh)->rib_lock)
+#define RIB_RUNLOCK(rh) rw_runlock(&(rh)->rib_lock)
+#define RIB_WLOCK(rh) rw_wlock(&(rh)->rib_lock)
+#define RIB_WUNLOCK(rh) rw_wunlock(&(rh)->rib_lock)
+#define RIB_LOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_LOCKED)
+#define RIB_WLOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_WLOCKED)
+
+struct rib_head *rt_tables_get_rnh(int fib, int family);
+
+/* rte<>nhop translation */
+static inline uint16_t
+fib_rte_to_nh_flags(int rt_flags)
+{
+ uint16_t res;
+
+ res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+ res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
+ res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
+ res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
+ res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0;
+
+ return (res);
+}
+
+
+#endif
diff --git a/freebsd/sys/net/rss_config.h b/freebsd/sys/net/rss_config.h
new file mode 100644
index 00000000..2ab32a43
--- /dev/null
+++ b/freebsd/sys/net/rss_config.h
@@ -0,0 +1,138 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_RSS_CONFIG_H_
+#define _NET_RSS_CONFIG_H_
+
+#include <netinet/in.h> /* in_addr_t */
+
+/*
+ * Supported RSS hash functions.
+ */
+#define RSS_HASH_NAIVE 0x00000001 /* Poor but fast hash. */
+#define RSS_HASH_TOEPLITZ 0x00000002 /* Required by RSS. */
+#define RSS_HASH_CRC32 0x00000004 /* Future; some NICs do it. */
+
+#define RSS_HASH_MASK (RSS_HASH_NAIVE | RSS_HASH_TOEPLITZ)
+
+/*
+ * Instances of struct inpcbinfo declare an RSS hash type indicating what
+ * header fields are covered.
+ */
+#define RSS_HASHFIELDS_NONE 0
+#define RSS_HASHFIELDS_4TUPLE 1
+#define RSS_HASHFIELDS_2TUPLE 2
+
+/*
+ * Define RSS representations of the M_HASHTYPE_* values, representing
+ * which particular bits are supported. The NICs can then use this to
+ * calculate which hash types to enable and which not to enable.
+ *
+ * The fact that these line up with M_HASHTYPE_* is not to be relied
+ * upon.
+ */
+#define RSS_HASHTYPE_RSS_IPV4 (1 << 1) /* IPv4 2-tuple */
+#define RSS_HASHTYPE_RSS_TCP_IPV4 (1 << 2) /* TCPv4 4-tuple */
+#define RSS_HASHTYPE_RSS_IPV6 (1 << 3) /* IPv6 2-tuple */
+#define RSS_HASHTYPE_RSS_TCP_IPV6 (1 << 4) /* TCPv6 4-tuple */
+#define RSS_HASHTYPE_RSS_IPV6_EX (1 << 5) /* IPv6 2-tuple + ext hdrs */
+#define RSS_HASHTYPE_RSS_TCP_IPV6_EX (1 << 6) /* TCPv6 4-tiple + ext hdrs */
+#define RSS_HASHTYPE_RSS_UDP_IPV4 (1 << 7) /* IPv4 UDP 4-tuple */
+#define RSS_HASHTYPE_RSS_UDP_IPV4_EX (1 << 8) /* IPv4 UDP 4-tuple + ext hdrs */
+#define RSS_HASHTYPE_RSS_UDP_IPV6 (1 << 9) /* IPv6 UDP 4-tuple */
+#define RSS_HASHTYPE_RSS_UDP_IPV6_EX (1 << 10) /* IPv6 UDP 4-tuple + ext hdrs */
+
+/*
+ * Compile-time limits on the size of the indirection table.
+ */
+#define RSS_MAXBITS 7
+#define RSS_TABLE_MAXLEN (1 << RSS_MAXBITS)
+
+/*
+ * Maximum key size used throughout. It's OK for hardware to use only the
+ * first 16 bytes, which is all that's required for IPv4.
+ */
+#define RSS_KEYSIZE 40
+
+/*
+ * For RSS hash methods that do a software hash on an mbuf, the packet
+ * direction (ingress / egress) is required.
+ *
+ * The default direction (INGRESS) is the "receive into the NIC" - ie,
+ * what the hardware is hashing on.
+ */
+#define RSS_HASH_PKT_INGRESS 0
+#define RSS_HASH_PKT_EGRESS 1
+
+/*
+ * Rate limited debugging routines.
+ */
+#define RSS_DEBUG(format, ...) do { \
+ if (rss_debug) { \
+ static struct timeval lastfail; \
+ static int curfail; \
+ if (ppsratecheck(&lastfail, &curfail, 5)) \
+ printf("RSS (%s:%u): " format, __func__, __LINE__,\
+ ##__VA_ARGS__); \
+ } \
+} while (0)
+
+extern int rss_debug;
+
+/*
+ * Device driver interfaces to query RSS properties that must be programmed
+ * into hardware.
+ */
+u_int rss_getbits(void);
+u_int rss_getbucket(u_int hash);
+u_int rss_get_indirection_to_bucket(u_int index);
+u_int rss_getcpu(u_int bucket);
+void rss_getkey(uint8_t *key);
+u_int rss_gethashalgo(void);
+u_int rss_getnumbuckets(void);
+u_int rss_getnumcpus(void);
+u_int rss_gethashconfig(void);
+
+/*
+ * Hash calculation functions.
+ */
+uint32_t rss_hash(u_int datalen, const uint8_t *data);
+
+/*
+ * Network stack interface to query desired CPU affinity of a packet.
+ */
+struct mbuf * rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid);
+u_int rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type);
+int rss_hash2bucket(uint32_t hash_val, uint32_t hash_type,
+ uint32_t *bucket_id);
+int rss_m2bucket(struct mbuf *m, uint32_t *bucket_id);
+
+#endif /* !_NET_RSS_CONFIG_H_ */
diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c
index e768e17b..1e69bcdf 100644
--- a/freebsd/sys/net/rtsock.c
+++ b/freebsd/sys/net/rtsock.c
@@ -54,17 +54,21 @@
#include <sys/systm.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/netisr.h>
#include <net/raw_cb.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
+#include <netinet/ip_carp.h>
#ifdef INET6
+#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
@@ -72,34 +76,6 @@
#include <sys/mount.h>
#include <compat/freebsd32/freebsd32.h>
-struct if_data32 {
- uint8_t ifi_type;
- uint8_t ifi_physical;
- uint8_t ifi_addrlen;
- uint8_t ifi_hdrlen;
- uint8_t ifi_link_state;
- uint8_t ifi_spare_char1;
- uint8_t ifi_spare_char2;
- uint8_t ifi_datalen;
- uint32_t ifi_mtu;
- uint32_t ifi_metric;
- uint32_t ifi_baudrate;
- uint32_t ifi_ipackets;
- uint32_t ifi_ierrors;
- uint32_t ifi_opackets;
- uint32_t ifi_oerrors;
- uint32_t ifi_collisions;
- uint32_t ifi_ibytes;
- uint32_t ifi_obytes;
- uint32_t ifi_imcasts;
- uint32_t ifi_omcasts;
- uint32_t ifi_iqdrops;
- uint32_t ifi_noproto;
- uint32_t ifi_hwassist;
- int32_t ifi_epoch;
- struct timeval32 ifi_lastchange;
-};
-
struct if_msghdr32 {
uint16_t ifm_msglen;
uint8_t ifm_version;
@@ -107,7 +83,7 @@ struct if_msghdr32 {
int32_t ifm_addrs;
int32_t ifm_flags;
uint16_t ifm_index;
- struct if_data32 ifm_data;
+ struct if_data ifm_data;
};
struct if_msghdrl32 {
@@ -120,7 +96,7 @@ struct if_msghdrl32 {
uint16_t _ifm_spare1;
uint16_t ifm_len;
uint16_t ifm_data_off;
- struct if_data32 ifm_data;
+ struct if_data ifm_data;
};
struct ifa_msghdrl32 {
@@ -134,7 +110,7 @@ struct ifa_msghdrl32 {
uint16_t ifam_len;
uint16_t ifam_data_off;
int32_t ifam_metric;
- struct if_data32 ifam_data;
+ struct if_data ifam_data;
};
#endif /* COMPAT_FREEBSD32 */
@@ -144,18 +120,22 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
static struct sockaddr route_src = { 2, PF_ROUTE, };
static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
+/* These are external hooks for CARP. */
+int (*carp_get_vhid_p)(struct ifaddr *);
+
/*
* Used by rtsock/raw_input callback code to decide whether to filter the update
* notification to a socket bound to a particular FIB.
*/
#define RTS_FILTER_FIB M_PROTO8
-static struct {
+typedef struct {
int ip_count; /* attached w/ AF_INET */
int ip6_count; /* attached w/ AF_INET6 */
- int ipx_count; /* attached w/ AF_IPX */
int any_count; /* total attached */
-} route_cb;
+} route_cb_t;
+static VNET_DEFINE(route_cb_t, route_cb);
+#define V_route_cb VNET(route_cb)
struct mtx rtsock_mtx;
MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
@@ -174,20 +154,19 @@ struct walkarg {
};
static void rts_input(struct mbuf *m);
-static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
-static int rt_msg2(int type, struct rt_addrinfo *rtinfo,
- caddr_t cp, struct walkarg *w);
+static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
+static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
+ struct walkarg *w, int *plen);
static int rt_xaddrs(caddr_t cp, caddr_t cplim,
struct rt_addrinfo *rtinfo);
static int sysctl_dumpentry(struct radix_node *rn, void *vw);
static int sysctl_iflist(int af, struct walkarg *w);
static int sysctl_ifmalist(int af, struct walkarg *w);
-static int route_output(struct mbuf *m, struct socket *so);
-static void rt_setmetrics(u_long which, const struct rt_metrics *in,
- struct rt_metrics_lite *out);
-static void rt_getmetrics(const struct rt_metrics_lite *in,
- struct rt_metrics *out);
+static int route_output(struct mbuf *m, struct socket *so, ...);
+static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
static void rt_dispatch(struct mbuf *, sa_family_t);
+static struct sockaddr *rtsock_fix_netmask(struct sockaddr *dst,
+ struct sockaddr *smask, struct sockaddr_storage *dmask);
static struct netisr_handler rtsock_nh = {
.nh_name = "rtsock",
@@ -214,17 +193,35 @@ SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
"maximum routing socket dispatch queue length");
static void
-rts_init(void)
+vnet_rts_init(void)
{
int tmp;
+ if (IS_DEFAULT_VNET(curvnet)) {
#ifndef __rtems__
- if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
- rtsock_nh.nh_qlimit = tmp;
+ if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
+ rtsock_nh.nh_qlimit = tmp;
+#endif /* __rtems__ */
+ netisr_register(&rtsock_nh);
+ }
+#ifdef VIMAGE
+ else
+ netisr_register_vnet(&rtsock_nh);
#endif
- netisr_register(&rtsock_nh);
}
-SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_init, 0);
+
+#ifdef VIMAGE
+static void
+vnet_rts_uninit(void)
+{
+
+ netisr_unregister_vnet(&rtsock_nh);
+}
+VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_rts_uninit, 0);
+#endif
static int
raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
@@ -294,23 +291,13 @@ static int
rts_attach(struct socket *so, int proto, struct thread *td)
{
struct rawcb *rp;
- int s, error;
+ int error;
KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
/* XXX */
rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
- if (rp == NULL)
- return ENOBUFS;
- /*
- * The splnet() is necessary to block protocols from sending
- * error notifications (like RTM_REDIRECT or RTM_LOSING) while
- * this PCB is extant but incompletely initialized.
- * Probably we should try to do more of this work beforehand and
- * eliminate the spl.
- */
- s = splnet();
so->so_pcb = (caddr_t)rp;
#ifndef __rtems__
so->so_fibnum = td->td_proc->p_fibnum;
@@ -320,7 +307,6 @@ rts_attach(struct socket *so, int proto, struct thread *td)
error = raw_attach(so, proto);
rp = sotorawcb(so);
if (error) {
- splx(s);
so->so_pcb = NULL;
free(rp, M_PCB);
return error;
@@ -328,20 +314,16 @@ rts_attach(struct socket *so, int proto, struct thread *td)
RTSOCK_LOCK();
switch(rp->rcb_proto.sp_protocol) {
case AF_INET:
- route_cb.ip_count++;
+ V_route_cb.ip_count++;
break;
case AF_INET6:
- route_cb.ip6_count++;
- break;
- case AF_IPX:
- route_cb.ipx_count++;
+ V_route_cb.ip6_count++;
break;
}
- route_cb.any_count++;
+ V_route_cb.any_count++;
RTSOCK_UNLOCK();
soisconnected(so);
so->so_options |= SO_USELOOPBACK;
- splx(s);
return 0;
}
@@ -372,16 +354,13 @@ rts_detach(struct socket *so)
RTSOCK_LOCK();
switch(rp->rcb_proto.sp_protocol) {
case AF_INET:
- route_cb.ip_count--;
+ V_route_cb.ip_count--;
break;
case AF_INET6:
- route_cb.ip6_count--;
- break;
- case AF_IPX:
- route_cb.ipx_count--;
+ V_route_cb.ip6_count--;
break;
}
- route_cb.any_count--;
+ V_route_cb.any_count--;
RTSOCK_UNLOCK();
raw_usrreqs.pru_detach(so);
}
@@ -562,17 +541,25 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*ARGSUSED*/
static int
-route_output(struct mbuf *m, struct socket *so)
+route_output(struct mbuf *m, struct socket *so, ...)
{
-#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
struct rt_msghdr *rtm = NULL;
struct rtentry *rt = NULL;
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
struct rt_addrinfo info;
- int len, error = 0;
+ struct sockaddr_storage ss;
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+ int i, rti_need_deembed = 0;
+#endif
+ int alloc_len = 0, len, error = 0, fibnum;
struct ifnet *ifp = NULL;
union sockaddr_union saun;
sa_family_t saf = AF_UNSPEC;
+ struct rawcb *rp = NULL;
+ struct walkarg w;
+
+ fibnum = so->so_fibnum;
#define senderr(e) { error = e; goto flush;}
if (m == NULL || ((m->m_len < sizeof(long)) &&
@@ -582,31 +569,53 @@ route_output(struct mbuf *m, struct socket *so)
panic("route_output");
len = m->m_pkthdr.len;
if (len < sizeof(*rtm) ||
- len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
- info.rti_info[RTAX_DST] = NULL;
+ len != mtod(m, struct rt_msghdr *)->rtm_msglen)
senderr(EINVAL);
- }
- R_Malloc(rtm, struct rt_msghdr *, len);
- if (rtm == NULL) {
- info.rti_info[RTAX_DST] = NULL;
+
+ /*
+ * Most of current messages are in range 200-240 bytes,
+ * minimize possible re-allocation on reply using larger size
+ * buffer aligned on 1k boundaty.
+ */
+ alloc_len = roundup2(len, 1024);
+ if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL)
senderr(ENOBUFS);
- }
+
m_copydata(m, 0, len, (caddr_t)rtm);
+ bzero(&info, sizeof(info));
+ bzero(&w, sizeof(w));
+
if (rtm->rtm_version != RTM_VERSION) {
- info.rti_info[RTAX_DST] = NULL;
+ /* Do not touch message since format is unknown */
+ free(rtm, M_TEMP);
+ rtm = NULL;
senderr(EPROTONOSUPPORT);
}
+
+ /*
+ * Starting from here, it is possible
+ * to alter original message and insert
+ * caller PID and error value.
+ */
+
#ifndef __rtems__
rtm->rtm_pid = curproc->p_pid;
#else /* __rtems__ */
rtm->rtm_pid = BSD_DEFAULT_PID;
#endif /* __rtems__ */
- bzero(&info, sizeof(info));
info.rti_addrs = rtm->rtm_addrs;
- if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
- info.rti_info[RTAX_DST] = NULL;
+
+ info.rti_mflags = rtm->rtm_inits;
+ info.rti_rmx = &rtm->rtm_rmx;
+
+ /*
+ * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
+ * link-local address because rtrequest requires addresses with
+ * embedded scope id.
+ */
+ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
senderr(EINVAL);
- }
+
info.rti_flags = rtm->rtm_flags;
if (info.rti_info[RTAX_DST] == NULL ||
info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
@@ -634,11 +643,16 @@ route_output(struct mbuf *m, struct socket *so)
*/
if (info.rti_info[RTAX_GATEWAY] != NULL &&
info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
- struct route gw_ro;
+ struct rt_addrinfo ginfo;
+ struct sockaddr *gdst;
+
+ bzero(&ginfo, sizeof(ginfo));
+ bzero(&ss, sizeof(ss));
+ ss.ss_len = sizeof(ss);
+
+ ginfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&ss;
+ gdst = info.rti_info[RTAX_GATEWAY];
- bzero(&gw_ro, sizeof(gw_ro));
- gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY];
- rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum);
/*
* A host route through the loopback interface is
* installed for each interface adddress. In pre 8.0
@@ -649,18 +663,21 @@ route_output(struct mbuf *m, struct socket *so)
* AF_LINK sa_family type of the rt_gateway, and the
* rt_ifp has the IFF_LOOPBACK flag set.
*/
- if (gw_ro.ro_rt != NULL &&
- gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
- gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
- info.rti_flags &= ~RTF_GATEWAY;
- if (gw_ro.ro_rt != NULL)
- RTFREE(gw_ro.ro_rt);
+ if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
+ if (ss.ss_family == AF_LINK &&
+ ginfo.rti_ifp->if_flags & IFF_LOOPBACK) {
+ info.rti_flags &= ~RTF_GATEWAY;
+ info.rti_flags |= RTF_GWFLAG_COMPAT;
+ }
+ rib_free_info(&ginfo);
+ }
}
switch (rtm->rtm_type) {
struct rtentry *saved_nrt;
case RTM_ADD:
+ case RTM_CHANGE:
if (info.rti_info[RTAX_GATEWAY] == NULL)
senderr(EINVAL);
saved_nrt = NULL;
@@ -669,14 +686,19 @@ route_output(struct mbuf *m, struct socket *so)
if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
(rtm->rtm_flags & RTF_LLDATA) != 0) {
error = lla_rt_output(rtm, &info);
+#ifdef INET6
+ if (error == 0)
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
}
- error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
- so->so_fibnum);
- if (error == 0 && saved_nrt) {
+ error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt,
+ fibnum);
+ if (error == 0 && saved_nrt != NULL) {
+#ifdef INET6
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
RT_LOCK(saved_nrt);
- rt_setmetrics(rtm->rtm_inits,
- &rtm->rtm_rmx, &saved_nrt->rt_rmx);
rtm->rtm_index = saved_nrt->rt_ifp->if_index;
RT_REMREF(saved_nrt);
RT_UNLOCK(saved_nrt);
@@ -690,26 +712,30 @@ route_output(struct mbuf *m, struct socket *so)
(info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
(rtm->rtm_flags & RTF_LLDATA) != 0) {
error = lla_rt_output(rtm, &info);
+#ifdef INET6
+ if (error == 0)
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
}
- error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
- so->so_fibnum);
+ error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum);
if (error == 0) {
RT_LOCK(saved_nrt);
rt = saved_nrt;
goto report;
}
+#ifdef INET6
+ /* rt_msg2() will not be used when RTM_DELETE fails. */
+ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
+#endif
break;
case RTM_GET:
- case RTM_CHANGE:
- case RTM_LOCK:
- rnh = rt_tables_get_rnh(so->so_fibnum,
- info.rti_info[RTAX_DST]->sa_family);
+ rnh = rt_tables_get_rnh(fibnum, saf);
if (rnh == NULL)
senderr(EAFNOSUPPORT);
- RADIX_NODE_HEAD_RLOCK(rnh);
+ RIB_RLOCK(rnh);
if (info.rti_info[RTAX_NETMASK] == NULL &&
rtm->rtm_type == RTM_GET) {
@@ -719,14 +745,14 @@ route_output(struct mbuf *m, struct socket *so)
* 'route -n get addr'
*/
rt = (struct rtentry *) rnh->rnh_matchaddr(
- info.rti_info[RTAX_DST], rnh);
+ info.rti_info[RTAX_DST], &rnh->head);
} else
rt = (struct rtentry *) rnh->rnh_lookup(
info.rti_info[RTAX_DST],
- info.rti_info[RTAX_NETMASK], rnh);
+ info.rti_info[RTAX_NETMASK], &rnh->head);
if (rt == NULL) {
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
senderr(ESRCH);
}
#ifdef RADIX_MPATH
@@ -738,11 +764,11 @@ route_output(struct mbuf *m, struct socket *so)
* if gate == NULL the first match is returned.
* (no need to call rt_mpath_matchgate if gate == NULL)
*/
- if (rn_mpath_capable(rnh) &&
+ if (rt_mpath_capable(rnh) &&
(rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
if (!rt) {
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
senderr(ESRCH);
}
}
@@ -760,7 +786,8 @@ route_output(struct mbuf *m, struct socket *so)
rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
struct ifaddr *ifa;
- ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1);
+ ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
+ RT_ALL_FIBS);
if (ifa != NULL)
rt_maskedcopy(ifa->ifa_addr,
&laddr,
@@ -772,139 +799,81 @@ route_output(struct mbuf *m, struct socket *so)
/*
* refactor rt and no lock operation necessary
*/
- rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh);
+ rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr,
+ &rnh->head);
if (rt == NULL) {
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
senderr(ESRCH);
}
}
RT_LOCK(rt);
RT_ADDREF(rt);
- RADIX_NODE_HEAD_RUNLOCK(rnh);
-
- switch(rtm->rtm_type) {
-
- case RTM_GET:
- report:
- RT_LOCK_ASSERT(rt);
- if ((rt->rt_flags & RTF_HOST) == 0
- ? jailed_without_vnet(curthread->td_ucred)
- : prison_if(curthread->td_ucred,
- rt_key(rt)) != 0) {
- RT_UNLOCK(rt);
- senderr(ESRCH);
- }
- info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
- info.rti_info[RTAX_GENMASK] = 0;
- if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
- ifp = rt->rt_ifp;
- if (ifp) {
- info.rti_info[RTAX_IFP] =
- ifp->if_addr->ifa_addr;
- error = rtm_get_jailed(&info, ifp, rt,
- &saun, curthread->td_ucred);
- if (error != 0) {
- RT_UNLOCK(rt);
- senderr(error);
- }
- if (ifp->if_flags & IFF_POINTOPOINT)
- info.rti_info[RTAX_BRD] =
- rt->rt_ifa->ifa_dstaddr;
- rtm->rtm_index = ifp->if_index;
- } else {
- info.rti_info[RTAX_IFP] = NULL;
- info.rti_info[RTAX_IFA] = NULL;
- }
- } else if ((ifp = rt->rt_ifp) != NULL) {
- rtm->rtm_index = ifp->if_index;
- }
- len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
- if (len > rtm->rtm_msglen) {
- struct rt_msghdr *new_rtm;
- R_Malloc(new_rtm, struct rt_msghdr *, len);
- if (new_rtm == NULL) {
- RT_UNLOCK(rt);
- senderr(ENOBUFS);
- }
- bcopy(rtm, new_rtm, rtm->rtm_msglen);
- Free(rtm); rtm = new_rtm;
- }
- (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
- rtm->rtm_flags = rt->rt_flags;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
- rtm->rtm_addrs = info.rti_addrs;
- break;
-
- case RTM_CHANGE:
- /*
- * New gateway could require new ifaddr, ifp;
- * flags may also be different; ifp may be specified
- * by ll sockaddr when protocol address is ambiguous
- */
- if (((rt->rt_flags & RTF_GATEWAY) &&
- info.rti_info[RTAX_GATEWAY] != NULL) ||
- info.rti_info[RTAX_IFP] != NULL ||
- (info.rti_info[RTAX_IFA] != NULL &&
- !sa_equal(info.rti_info[RTAX_IFA],
- rt->rt_ifa->ifa_addr))) {
- RT_UNLOCK(rt);
- RADIX_NODE_HEAD_LOCK(rnh);
- error = rt_getifa_fib(&info, rt->rt_fibnum);
- /*
- * XXXRW: Really we should release this
- * reference later, but this maintains
- * historical behavior.
- */
- if (info.rti_ifa != NULL)
- ifa_free(info.rti_ifa);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- if (error != 0)
- senderr(error);
- RT_LOCK(rt);
- }
- if (info.rti_ifa != NULL &&
- info.rti_ifa != rt->rt_ifa &&
- rt->rt_ifa != NULL &&
- rt->rt_ifa->ifa_rtrequest != NULL) {
- rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
- &info);
- ifa_free(rt->rt_ifa);
- }
- if (info.rti_info[RTAX_GATEWAY] != NULL) {
- RT_UNLOCK(rt);
- RADIX_NODE_HEAD_LOCK(rnh);
- RT_LOCK(rt);
-
- error = rt_setgate(rt, rt_key(rt),
- info.rti_info[RTAX_GATEWAY]);
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
+
+report:
+ RT_LOCK_ASSERT(rt);
+ if ((rt->rt_flags & RTF_HOST) == 0
+ ? jailed_without_vnet(curthread->td_ucred)
+ : prison_if(curthread->td_ucred,
+ rt_key(rt)) != 0) {
+ RT_UNLOCK(rt);
+ senderr(ESRCH);
+ }
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
+ rt_mask(rt), &ss);
+ info.rti_info[RTAX_GENMASK] = 0;
+ if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
+ ifp = rt->rt_ifp;
+ if (ifp) {
+ info.rti_info[RTAX_IFP] =
+ ifp->if_addr->ifa_addr;
+ error = rtm_get_jailed(&info, ifp, rt,
+ &saun, curthread->td_ucred);
if (error != 0) {
RT_UNLOCK(rt);
senderr(error);
}
- rt->rt_flags |= (RTF_GATEWAY & info.rti_flags);
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ info.rti_info[RTAX_BRD] =
+ rt->rt_ifa->ifa_dstaddr;
+ rtm->rtm_index = ifp->if_index;
+ } else {
+ info.rti_info[RTAX_IFP] = NULL;
+ info.rti_info[RTAX_IFA] = NULL;
}
- if (info.rti_ifa != NULL &&
- info.rti_ifa != rt->rt_ifa) {
- ifa_ref(info.rti_ifa);
- rt->rt_ifa = info.rti_ifa;
- rt->rt_ifp = info.rti_ifp;
+ } else if ((ifp = rt->rt_ifp) != NULL) {
+ rtm->rtm_index = ifp->if_index;
+ }
+
+ /* Check if we need to realloc storage */
+ rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len);
+ if (len > alloc_len) {
+ struct rt_msghdr *new_rtm;
+ new_rtm = malloc(len, M_TEMP, M_NOWAIT);
+ if (new_rtm == NULL) {
+ RT_UNLOCK(rt);
+ senderr(ENOBUFS);
}
- /* Allow some flags to be toggled on change. */
- rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
- (rtm->rtm_flags & RTF_FMASK);
- rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
- &rt->rt_rmx);
- rtm->rtm_index = rt->rt_ifp->if_index;
- if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
- rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
- /* FALLTHROUGH */
- case RTM_LOCK:
- /* We don't support locks anymore */
- break;
+ bcopy(rtm, new_rtm, rtm->rtm_msglen);
+ free(rtm, M_TEMP);
+ rtm = new_rtm;
+ alloc_len = len;
}
+
+ w.w_tmem = (caddr_t)rtm;
+ w.w_tmemsize = alloc_len;
+ rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len);
+
+ if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+ rtm->rtm_flags = RTF_GATEWAY |
+ (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+ else
+ rtm->rtm_flags = rt->rt_flags;
+ rt_getmetrics(rt, &rtm->rtm_rmx);
+ rtm->rtm_addrs = info.rti_addrs;
+
RT_UNLOCK(rt);
break;
@@ -913,39 +882,55 @@ route_output(struct mbuf *m, struct socket *so)
}
flush:
- if (rtm) {
- if (error)
- rtm->rtm_errno = error;
- else
- rtm->rtm_flags |= RTF_DONE;
- }
- if (rt) /* XXX can this be true? */
+ if (rt != NULL)
RTFREE(rt);
- {
- struct rawcb *rp = NULL;
/*
* Check to see if we don't want our own messages.
*/
if ((so->so_options & SO_USELOOPBACK) == 0) {
- if (route_cb.any_count <= 1) {
- if (rtm)
- Free(rtm);
+ if (V_route_cb.any_count <= 1) {
+ if (rtm != NULL)
+ free(rtm, M_TEMP);
m_freem(m);
return (error);
}
/* There is another listener, so construct message */
rp = sotorawcb(so);
}
- if (rtm) {
+
+ if (rtm != NULL) {
+#ifdef INET6
+ if (rti_need_deembed) {
+ /* sin6_scope_id is recovered before sending rtm. */
+ sin6 = (struct sockaddr_in6 *)&ss;
+ for (i = 0; i < RTAX_MAX; i++) {
+ if (info.rti_info[i] == NULL)
+ continue;
+ if (info.rti_info[i]->sa_family != AF_INET6)
+ continue;
+ bcopy(info.rti_info[i], sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ bcopy(sin6, info.rti_info[i],
+ sizeof(*sin6));
+ }
+ }
+#endif
+ if (error != 0)
+ rtm->rtm_errno = error;
+ else
+ rtm->rtm_flags |= RTF_DONE;
+
m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
if (m->m_pkthdr.len < rtm->rtm_msglen) {
m_freem(m);
m = NULL;
} else if (m->m_pkthdr.len > rtm->rtm_msglen)
m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
+
+ free(rtm, M_TEMP);
}
- if (m) {
- M_SETFIB(m, so->so_fibnum);
+ if (m != NULL) {
+ M_SETFIB(m, fibnum);
m->m_flags |= RTS_FILTER_FIB;
if (rp) {
/*
@@ -959,43 +944,21 @@ flush:
} else
rt_dispatch(m, saf);
}
- /* info.rti_info[RTAX_DST] (used above) can point inside of rtm */
- if (rtm)
- Free(rtm);
- }
+
return (error);
-#undef sa_equal
}
static void
-rt_setmetrics(u_long which, const struct rt_metrics *in,
- struct rt_metrics_lite *out)
+rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
{
-#define metric(f, e) if (which & (f)) out->e = in->e;
- /*
- * Only these are stored in the routing entry since introduction
- * of tcp hostcache. The rest is ignored.
- */
- metric(RTV_MTU, rmx_mtu);
- metric(RTV_WEIGHT, rmx_weight);
- /* Userland -> kernel timebase conversion. */
- if (which & RTV_EXPIRE)
- out->rmx_expire = in->rmx_expire ?
- in->rmx_expire - time_second + time_uptime : 0;
-#undef metric
-}
-static void
-rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
-{
-#define metric(e) out->e = in->e;
bzero(out, sizeof(*out));
- metric(rmx_mtu);
- metric(rmx_weight);
+ out->rmx_mtu = rt->rt_mtu;
+ out->rmx_weight = rt->rt_weight;
+ out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
/* Kernel -> userland timebase conversion. */
- out->rmx_expire = in->rmx_expire ?
- in->rmx_expire - time_uptime + time_second : 0;
-#undef metric
+ out->rmx_expire = rt->rt_expire ?
+ rt->rt_expire - time_uptime + time_second : 0;
}
/*
@@ -1030,6 +993,11 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
return (0); /* should be EINVAL but for compat */
}
/* accept it */
+#ifdef INET6
+ if (sa->sa_family == AF_INET6)
+ sa6_embedscope((struct sockaddr_in6 *)sa,
+ V_ip6_use_defzone);
+#endif
rtinfo->rti_info[i] = sa;
cp += SA_SIZE(sa);
}
@@ -1037,15 +1005,42 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
}
/*
- * Used by the routing socket.
+ * Fill in @dmask with valid netmask leaving original @smask
+ * intact. Mostly used with radix netmasks.
+ */
+static struct sockaddr *
+rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask,
+ struct sockaddr_storage *dmask)
+{
+ if (dst == NULL || smask == NULL)
+ return (NULL);
+
+ memset(dmask, 0, dst->sa_len);
+ memcpy(dmask, smask, smask->sa_len);
+ dmask->ss_len = dst->sa_len;
+ dmask->ss_family = dst->sa_family;
+
+ return ((struct sockaddr *)dmask);
+}
+
+/*
+ * Writes information related to @rtinfo object to newly-allocated mbuf.
+ * Assumes MCLBYTES is enough to construct any message.
+ * Used for OS notifications of vaious events (if/ifa announces,etc)
+ *
+ * Returns allocated mbuf or NULL on failure.
*/
static struct mbuf *
-rt_msg1(int type, struct rt_addrinfo *rtinfo)
+rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
{
struct rt_msghdr *rtm;
struct mbuf *m;
int i;
struct sockaddr *sa;
+#ifdef INET6
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+#endif
int len, dlen;
switch (type) {
@@ -1072,20 +1067,17 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
default:
len = sizeof(struct rt_msghdr);
}
- if (len > MCLBYTES)
- panic("rt_msg1");
- m = m_gethdr(M_DONTWAIT, MT_DATA);
- if (m && len > MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+
+ /* XXXGL: can we use MJUMPAGESIZE cluster here? */
+ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
+ if (len > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (m);
+
m->m_pkthdr.len = m->m_len = len;
- m->m_pkthdr.rcvif = NULL;
rtm = mtod(m, struct rt_msghdr *);
bzero((caddr_t)rtm, len);
for (i = 0; i < RTAX_MAX; i++) {
@@ -1093,6 +1085,14 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
continue;
rtinfo->rti_addrs |= (1 << i);
dlen = SA_SIZE(sa);
+#ifdef INET6
+ if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ bcopy(sa, sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ sa = (struct sockaddr *)sin6;
+ }
+#endif
m_copyback(m, len, dlen, (caddr_t)sa);
len += dlen;
}
@@ -1107,17 +1107,26 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
}
/*
- * Used by the sysctl code and routing socket.
+ * Writes information related to @rtinfo object to preallocated buffer.
+ * Stores needed size in @plen. If @w is NULL, calculates size without
+ * writing.
+ * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
+ *
+ * Returns 0 on success.
+ *
*/
static int
-rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
+rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
{
int i;
- int len, dlen, second_time = 0;
- caddr_t cp0;
+ int len, buflen = 0, dlen;
+ caddr_t cp = NULL;
+ struct rt_msghdr *rtm = NULL;
+#ifdef INET6
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+#endif
- rtinfo->rti_addrs = 0;
-again:
switch (type) {
case RTM_DELADDR:
@@ -1156,9 +1165,14 @@ again:
default:
len = sizeof(struct rt_msghdr);
}
- cp0 = cp;
- if (cp0)
- cp += len;
+
+ if (w != NULL) {
+ rtm = (struct rt_msghdr *)w->w_tmem;
+ buflen = w->w_tmemsize - len;
+ cp = (caddr_t)w->w_tmem + len;
+ }
+
+ rtinfo->rti_addrs = 0;
for (i = 0; i < RTAX_MAX; i++) {
struct sockaddr *sa;
@@ -1166,45 +1180,56 @@ again:
continue;
rtinfo->rti_addrs |= (1 << i);
dlen = SA_SIZE(sa);
- if (cp) {
+ if (cp != NULL && buflen >= dlen) {
+#ifdef INET6
+ if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ bcopy(sa, sin6, sizeof(*sin6));
+ if (sa6_recoverscope(sin6) == 0)
+ sa = (struct sockaddr *)sin6;
+ }
+#endif
bcopy((caddr_t)sa, cp, (unsigned)dlen);
cp += dlen;
+ buflen -= dlen;
+ } else if (cp != NULL) {
+ /*
+ * Buffer too small. Count needed size
+ * and return with error.
+ */
+ cp = NULL;
}
+
len += dlen;
}
- len = ALIGN(len);
- if (cp == NULL && w != NULL && !second_time) {
- struct walkarg *rw = w;
-
- if (rw->w_req) {
- if (rw->w_tmemsize < len) {
- if (rw->w_tmem)
- free(rw->w_tmem, M_RTABLE);
- rw->w_tmem = (caddr_t)
- malloc(len, M_RTABLE, M_NOWAIT);
- if (rw->w_tmem)
- rw->w_tmemsize = len;
- }
- if (rw->w_tmem) {
- cp = rw->w_tmem;
- second_time = 1;
- goto again;
- }
- }
+
+ if (cp != NULL) {
+ dlen = ALIGN(len) - len;
+ if (buflen < dlen)
+ cp = NULL;
+ else
+ buflen -= dlen;
}
- if (cp) {
- struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+ len = ALIGN(len);
+ if (cp != NULL) {
+ /* fill header iff buffer is large enough */
rtm->rtm_version = RTM_VERSION;
rtm->rtm_type = type;
rtm->rtm_msglen = len;
}
- return (len);
+
+ *plen = len;
+
+ if (w != NULL && cp == NULL)
+ return (ENOBUFS);
+
+ return (0);
}
/*
* This routine is called to generate a message from the routing
- * socket indicating that a redirect has occured, a routing lookup
+ * socket indicating that a redirect has occurred, a routing lookup
* has failed, or that a protocol has detected timeouts to a particular
* destination.
*/
@@ -1216,9 +1241,9 @@ rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
struct mbuf *m;
struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return;
- m = rt_msg1(type, rtinfo);
+ m = rtsock_msg_mbuf(type, rtinfo);
if (m == NULL)
return;
@@ -1254,16 +1279,16 @@ rt_ifmsg(struct ifnet *ifp)
struct mbuf *m;
struct rt_addrinfo info;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return;
bzero((caddr_t)&info, sizeof(info));
- m = rt_msg1(RTM_IFINFO, &info);
+ m = rtsock_msg_mbuf(RTM_IFINFO, &info);
if (m == NULL)
return;
ifm = mtod(m, struct if_msghdr *);
ifm->ifm_index = ifp->if_index;
ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
- ifm->ifm_data = ifp->if_data;
+ if_data_copy(ifp, &ifm->ifm_data);
ifm->ifm_addrs = 0;
rt_dispatch(m, AF_UNSPEC);
}
@@ -1283,8 +1308,9 @@ rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
struct mbuf *m;
struct ifa_msghdr *ifam;
struct ifnet *ifp = ifa->ifa_ifp;
+ struct sockaddr_storage ss;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return (0);
ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
@@ -1292,13 +1318,14 @@ rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
- info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
+ info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss);
info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
- if ((m = rt_msg1(ncmd, &info)) == NULL)
+ if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
return (ENOBUFS);
ifam = mtod(m, struct ifa_msghdr *);
ifam->ifam_index = ifp->if_index;
- ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_metric = ifa->ifa_ifp->if_metric;
ifam->ifam_flags = ifa->ifa_flags;
ifam->ifam_addrs = info.rti_addrs;
@@ -1331,15 +1358,16 @@ rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
struct sockaddr *sa;
struct mbuf *m;
struct rt_msghdr *rtm;
+ struct sockaddr_storage ss;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return (0);
bzero((caddr_t)&info, sizeof(info));
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
info.rti_info[RTAX_DST] = sa = rt_key(rt);
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- if ((m = rt_msg1(cmd, &info)) == NULL)
+ if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL)
return (ENOBUFS);
rtm = mtod(m, struct rt_msghdr *);
rtm->rtm_index = ifp->if_index;
@@ -1370,7 +1398,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
struct ifnet *ifp = ifma->ifma_ifp;
struct ifma_msghdr *ifmam;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return;
bzero((caddr_t)&info, sizeof(info));
@@ -1381,7 +1409,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
* (similarly to how ARP entries, e.g., are presented).
*/
info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
- m = rt_msg1(cmd, &info);
+ m = rtsock_msg_mbuf(cmd, &info);
if (m == NULL)
return;
ifmam = mtod(m, struct ifma_msghdr *);
@@ -1399,10 +1427,10 @@ rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
struct if_announcemsghdr *ifan;
struct mbuf *m;
- if (route_cb.any_count == 0)
+ if (V_route_cb.any_count == 0)
return NULL;
bzero((caddr_t)info, sizeof(*info));
- m = rt_msg1(type, info);
+ m = rtsock_msg_mbuf(type, info);
if (m != NULL) {
ifan = mtod(m, struct if_announcemsghdr *);
ifan->ifan_index = ifp->if_index;
@@ -1509,6 +1537,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
struct rtentry *rt = (struct rtentry *)rn;
int error = 0, size;
struct rt_addrinfo info;
+ struct sockaddr_storage ss;
if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
return 0;
@@ -1519,7 +1548,8 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
+ rt_mask(rt), &ss);
info.rti_info[RTAX_GENMASK] = 0;
if (rt->rt_ifp) {
info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
@@ -1527,16 +1557,17 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
}
- size = rt_msg2(RTM_GET, &info, NULL, w);
+ if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
+ return (error);
if (w->w_req && w->w_tmem) {
struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
- rtm->rtm_flags = rt->rt_flags;
- /*
- * let's be honest about this being a retarded hack
- */
- rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ if (rt->rt_flags & RTF_GWFLAG_COMPAT)
+ rtm->rtm_flags = RTF_GATEWAY |
+ (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
+ else
+ rtm->rtm_flags = rt->rt_flags;
+ rt_getmetrics(rt, &rtm->rtm_rmx);
rtm->rtm_index = rt->rt_ifp->if_index;
rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
rtm->rtm_addrs = info.rti_addrs;
@@ -1546,70 +1577,40 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
return (error);
}
-#ifdef COMPAT_FREEBSD32
-static void
-copy_ifdata32(struct if_data *src, struct if_data32 *dst)
-{
-
- bzero(dst, sizeof(*dst));
- CP(*src, *dst, ifi_type);
- CP(*src, *dst, ifi_physical);
- CP(*src, *dst, ifi_addrlen);
- CP(*src, *dst, ifi_hdrlen);
- CP(*src, *dst, ifi_link_state);
- dst->ifi_datalen = sizeof(struct if_data32);
- CP(*src, *dst, ifi_mtu);
- CP(*src, *dst, ifi_metric);
- CP(*src, *dst, ifi_baudrate);
- CP(*src, *dst, ifi_ipackets);
- CP(*src, *dst, ifi_ierrors);
- CP(*src, *dst, ifi_opackets);
- CP(*src, *dst, ifi_oerrors);
- CP(*src, *dst, ifi_collisions);
- CP(*src, *dst, ifi_ibytes);
- CP(*src, *dst, ifi_obytes);
- CP(*src, *dst, ifi_imcasts);
- CP(*src, *dst, ifi_omcasts);
- CP(*src, *dst, ifi_iqdrops);
- CP(*src, *dst, ifi_noproto);
- CP(*src, *dst, ifi_hwassist);
- CP(*src, *dst, ifi_epoch);
- TV_CP(*src, *dst, ifi_lastchange);
-}
-#endif
-
static int
sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info,
struct walkarg *w, int len)
{
struct if_msghdrl *ifm;
+ struct if_data *ifd;
+
+ ifm = (struct if_msghdrl *)w->w_tmem;
#ifdef COMPAT_FREEBSD32
if (w->w_req->flags & SCTL_MASK32) {
struct if_msghdrl32 *ifm32;
- ifm32 = (struct if_msghdrl32 *)w->w_tmem;
+ ifm32 = (struct if_msghdrl32 *)ifm;
ifm32->ifm_addrs = info->rti_addrs;
ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
ifm32->ifm_index = ifp->if_index;
ifm32->_ifm_spare1 = 0;
ifm32->ifm_len = sizeof(*ifm32);
ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
-
- copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
-
- return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
- }
+ ifd = &ifm32->ifm_data;
+ } else
#endif
- ifm = (struct if_msghdrl *)w->w_tmem;
- ifm->ifm_addrs = info->rti_addrs;
- ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
- ifm->ifm_index = ifp->if_index;
- ifm->_ifm_spare1 = 0;
- ifm->ifm_len = sizeof(*ifm);
- ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+ {
+ ifm->ifm_addrs = info->rti_addrs;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_index = ifp->if_index;
+ ifm->_ifm_spare1 = 0;
+ ifm->ifm_len = sizeof(*ifm);
+ ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
+ ifd = &ifm->ifm_data;
+ }
- ifm->ifm_data = ifp->if_data;
+ if_data_copy(ifp, ifd);
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
}
@@ -1619,27 +1620,29 @@ sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info,
struct walkarg *w, int len)
{
struct if_msghdr *ifm;
+ struct if_data *ifd;
+
+ ifm = (struct if_msghdr *)w->w_tmem;
#ifdef COMPAT_FREEBSD32
if (w->w_req->flags & SCTL_MASK32) {
struct if_msghdr32 *ifm32;
- ifm32 = (struct if_msghdr32 *)w->w_tmem;
+ ifm32 = (struct if_msghdr32 *)ifm;
ifm32->ifm_addrs = info->rti_addrs;
ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
ifm32->ifm_index = ifp->if_index;
-
- copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
-
- return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
- }
+ ifd = &ifm32->ifm_data;
+ } else
#endif
- ifm = (struct if_msghdr *)w->w_tmem;
- ifm->ifm_addrs = info->rti_addrs;
- ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
- ifm->ifm_index = ifp->if_index;
+ {
+ ifm->ifm_addrs = info->rti_addrs;
+ ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifm_index = ifp->if_index;
+ ifd = &ifm->ifm_data;
+ }
- ifm->ifm_data = ifp->if_data;
+ if_data_copy(ifp, ifd);
return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
}
@@ -1649,12 +1652,15 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
struct walkarg *w, int len)
{
struct ifa_msghdrl *ifam;
+ struct if_data *ifd;
+
+ ifam = (struct ifa_msghdrl *)w->w_tmem;
#ifdef COMPAT_FREEBSD32
if (w->w_req->flags & SCTL_MASK32) {
struct ifa_msghdrl32 *ifam32;
- ifam32 = (struct ifa_msghdrl32 *)w->w_tmem;
+ ifam32 = (struct ifa_msghdrl32 *)ifam;
ifam32->ifam_addrs = info->rti_addrs;
ifam32->ifam_flags = ifa->ifa_flags;
ifam32->ifam_index = ifa->ifa_ifp->if_index;
@@ -1662,24 +1668,31 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
ifam32->ifam_len = sizeof(*ifam32);
ifam32->ifam_data_off =
offsetof(struct ifa_msghdrl32, ifam_data);
- ifam32->ifam_metric = ifa->ifa_metric;
-
- copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
-
- return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
- }
+ ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
+ ifd = &ifam32->ifam_data;
+ } else
#endif
+ {
+ ifam->ifam_addrs = info->rti_addrs;
+ ifam->ifam_flags = ifa->ifa_flags;
+ ifam->ifam_index = ifa->ifa_ifp->if_index;
+ ifam->_ifam_spare1 = 0;
+ ifam->ifam_len = sizeof(*ifam);
+ ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
+ ifam->ifam_metric = ifa->ifa_ifp->if_metric;
+ ifd = &ifam->ifam_data;
+ }
- ifam = (struct ifa_msghdrl *)w->w_tmem;
- ifam->ifam_addrs = info->rti_addrs;
- ifam->ifam_flags = ifa->ifa_flags;
- ifam->ifam_index = ifa->ifa_ifp->if_index;
- ifam->_ifam_spare1 = 0;
- ifam->ifam_len = sizeof(*ifam);
- ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
- ifam->ifam_metric = ifa->ifa_metric;
+ bzero(ifd, sizeof(*ifd));
+ ifd->ifi_datalen = sizeof(struct if_data);
+ ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
+ ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
+ ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
+ ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
- ifam->ifam_data = ifa->if_data;
+ /* Fixup if_data carp(4) vhid. */
+ if (carp_get_vhid_p != NULL)
+ ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
}
@@ -1694,7 +1707,7 @@ sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
ifam->ifam_addrs = info->rti_addrs;
ifam->ifam_flags = ifa->ifa_flags;
ifam->ifam_index = ifa->ifa_ifp->if_index;
- ifam->ifam_metric = ifa->ifa_metric;
+ ifam->ifam_metric = ifa->ifa_ifp->if_metric;
return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
}
@@ -1706,16 +1719,19 @@ sysctl_iflist(int af, struct walkarg *w)
struct ifaddr *ifa;
struct rt_addrinfo info;
int len, error = 0;
+ struct sockaddr_storage ss;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
IF_ADDR_RLOCK(ifp);
ifa = ifp->if_addr;
info.rti_info[RTAX_IFP] = ifa->ifa_addr;
- len = rt_msg2(RTM_IFINFO, &info, NULL, w);
+ error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
+ if (error != 0)
+ goto done;
info.rti_info[RTAX_IFP] = NULL;
if (w->w_req && w->w_tmem) {
if (w->w_op == NET_RT_IFLISTL)
@@ -1732,9 +1748,12 @@ sysctl_iflist(int af, struct walkarg *w)
ifa->ifa_addr) != 0)
continue;
info.rti_info[RTAX_IFA] = ifa->ifa_addr;
- info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+ info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
+ ifa->ifa_addr, ifa->ifa_netmask, &ss);
info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
- len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
+ error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
+ if (error != 0)
+ goto done;
if (w->w_req && w->w_tmem) {
if (w->w_op == NET_RT_IFLISTL)
error = sysctl_iflist_ifaml(ifa, &info,
@@ -1747,13 +1766,14 @@ sysctl_iflist(int af, struct walkarg *w)
}
}
IF_ADDR_RUNLOCK(ifp);
- info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
- info.rti_info[RTAX_BRD] = NULL;
+ info.rti_info[RTAX_IFA] = NULL;
+ info.rti_info[RTAX_NETMASK] = NULL;
+ info.rti_info[RTAX_BRD] = NULL;
}
done:
if (ifp != NULL)
IF_ADDR_RUNLOCK(ifp);
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (error);
}
@@ -1767,7 +1787,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
struct ifaddr *ifa;
bzero((caddr_t)&info, sizeof(info));
- IFNET_RLOCK();
+ IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (w->w_arg && w->w_arg != ifp->if_index)
continue;
@@ -1784,7 +1804,9 @@ sysctl_ifmalist(int af, struct walkarg *w)
info.rti_info[RTAX_GATEWAY] =
(ifma->ifma_addr->sa_family != AF_LINK) ?
ifma->ifma_lladdr : NULL;
- len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
+ error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
+ if (error != 0)
+ goto done;
if (w->w_req && w->w_tmem) {
struct ifma_msghdr *ifmam;
@@ -1802,7 +1824,7 @@ sysctl_ifmalist(int af, struct walkarg *w)
IF_ADDR_RUNLOCK(ifp);
}
done:
- IFNET_RUNLOCK();
+ IFNET_RUNLOCK_NOSLEEP();
return (error);
}
@@ -1811,7 +1833,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
{
int *name = (int *)arg1;
u_int namelen = arg2;
- struct radix_node_head *rnh = NULL; /* silence compiler. */
+ struct rib_head *rnh = NULL; /* silence compiler. */
int i, lim, error = EINVAL;
int fib = 0;
u_char af;
@@ -1852,6 +1874,14 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
error = sysctl_wire_old_buffer(req, 0);
if (error)
return (error);
+
+ /*
+ * Allocate reply buffer in advance.
+ * All rtsock messages has maximum length of u_short.
+ */
+ w.w_tmemsize = 65536;
+ w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
+
switch (w.w_op) {
case NET_RT_DUMP:
@@ -1880,10 +1910,10 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
for (error = 0; error == 0 && i <= lim; i++) {
rnh = rt_tables_get_rnh(fib, i);
if (rnh != NULL) {
- RADIX_NODE_HEAD_RLOCK(rnh);
- error = rnh->rnh_walktree(rnh,
+ RIB_RLOCK(rnh);
+ error = rnh->rnh_walktree(&rnh->head,
sysctl_dumpentry, &w);
- RADIX_NODE_HEAD_RUNLOCK(rnh);
+ RIB_RUNLOCK(rnh);
} else if (af != 0)
error = EAFNOSUPPORT;
}
@@ -1898,8 +1928,8 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
error = sysctl_ifmalist(af, &w);
break;
}
- if (w.w_tmem)
- free(w.w_tmem, M_RTABLE);
+
+ free(w.w_tmem, M_TEMP);
return (error);
}
@@ -1927,7 +1957,7 @@ static struct domain routedomain = {
.dom_family = PF_ROUTE,
.dom_name = "route",
.dom_protosw = routesw,
- .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])]
+ .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
};
VNET_DOMAIN_SET(route);
diff --git a/freebsd/sys/net/sff8436.h b/freebsd/sys/net/sff8436.h
new file mode 100644
index 00000000..3399cce5
--- /dev/null
+++ b/freebsd/sys/net/sff8436.h
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 2014 Yandex LLC.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The following set of constants are from Document SFF-8436
+ * "QSFP+ 10 Gbs 4X PLUGGABLE TRANSCEIVER" revision 4.8 dated October 31, 2013
+ *
+ * This SFF standard defines the following QSFP+ memory address module:
+ *
+ * 1) 256-byte addressable block and 128-byte pages
+ * 2) Lower 128-bytes addresses always refer to the same page
+ * 3) Upper address space may refer to different pages depending on
+ * "page select" byte value.
+ *
+ * Map description:
+ *
+ * Serial address 0xA02:
+ *
+ * Lower bits
+ * 0-127 Monitoring data & page select byte
+ * 128-255:
+ *
+ * Page 00:
+ * 128-191 Base ID Fields
+ * 191-223 Extended ID
+ * 223-255 Vendor Specific ID
+ *
+ * Page 01 (optional):
+ * 128-255 App-specific data
+ *
+ * Page 02 (optional):
+ * 128-255 User EEPROM Data
+ *
+ * Page 03 (optional for Cable Assmeblies)
+ * 128-223 Thresholds
+ * 225-237 Vendor Specific
+ * 238-253 Channel Controls/Monitor
+ * 254-255 Reserverd
+ *
+ * All these values are read across an I2C (i squared C) bus.
+ */
+
+#define SFF_8436_BASE 0xA0 /* Base address for all requests */
+
+/* Table 17 - Lower Memory Map */
+enum {
+ SFF_8436_MID = 0, /* Copy of SFF_8436_ID field */
+ SFF_8436_STATUS = 1, /* 2-bytes status (Table 18) */
+ SFF_8436_INTR_START = 3, /* Interrupt flags (Tables 19-21) */
+ SFF_8436_INTR_END = 21,
+ SFF_8436_MODMON_START = 22, /* Module monitors (Table 22 */
+ SFF_8436_TEMP = 22, /* Internally measured module temp */
+ SFF_8436_VCC = 26, /* Internally mesasure module
+ * supplied voltage */
+ SFF_8436_MODMON_END = 33,
+ SFF_8436_CHMON_START = 34, /* Channel monitors (Table 23) */
+ SFF_8436_RX_CH1_MSB = 34, /* Internally measured RX input power */
+ SFF_8436_RX_CH1_LSB = 35, /* for channel 1 */
+ SFF_8436_RX_CH2_MSB = 36, /* Internally measured RX input power */
+ SFF_8436_RX_CH2_LSB = 37, /* for channel 2 */
+ SFF_8436_RX_CH3_MSB = 38, /* Internally measured RX input power */
+ SFF_8436_RX_CH3_LSB = 39, /* for channel 3 */
+ SFF_8436_RX_CH4_MSB = 40, /* Internally measured RX input power */
+ SFF_8436_RX_CH4_LSB = 41, /* for channel 4 */
+ SFF_8436_TX_CH1_MSB = 42, /* Internally measured TX bias */
+ SFF_8436_TX_CH1_LSB = 43, /* for channel 1 */
+ SFF_8436_TX_CH2_MSB = 44, /* Internally measured TX bias */
+ SFF_8436_TX_CH2_LSB = 45, /* for channel 2 */
+ SFF_8436_TX_CH3_MSB = 46, /* Internally measured TX bias */
+ SFF_8436_TX_CH3_LSB = 47, /* for channel 3 */
+ SFF_8436_TX_CH4_MSB = 48, /* Internally measured TX bias */
+ SFF_8436_TX_CH4_LSB = 49, /* for channel 4 */
+ SFF_8436_CHANMON_END = 81,
+ SFF_8436_CONTROL_START = 86, /* Control (Table 24) */
+ SFF_8436_CONTROL_END = 97,
+ SFF_8436_MASKS_START = 100, /* Module/channel masks (Table 25) */
+ SFF_8436_MASKS_END = 106,
+ SFF_8436_CHPASSWORD = 119, /* Password change entry (4 bytes) */
+ SFF_8436_PASSWORD = 123, /* Password entry area (4 bytes) */
+ SFF_8436_PAGESEL = 127, /* Page select byte */
+};
+
+/* Table 18 - Status Indicators bits */
+/* Byte 1: all bits reserved */
+
+/* Byte 2 bits */
+#define SFF_8436_STATUS_FLATMEM (1 << 2) /* Upper memory flat or paged
+ * 0 = paging, 1=Page 0 only */
+#define SFF_8436_STATUS_INTL (1 << 1) /* Digital state of the intL
+ * Interrupt output pin */
+#define SFF_8436_STATUS_NOTREADY 1 /* Module has not yet achieved
+ * power up and memory data is not
+ * ready. 0=data is ready */
+/*
+ * Upper page 0 definitions:
+ * Table 29 - Serial ID: Data fields.
+ *
+ * Note that this table is mostly the same as used in SFF-8472.
+ * The only differenee is address shift: +128 bytes.
+ */
+enum {
+ SFF_8436_ID = 128, /* Module Type (defined in sff8472.h) */
+ SFF_8436_EXT_ID = 129, /* Extended transceiver type
+ * (Table 31) */
+ SFF_8436_CONNECTOR = 130, /* Connector type (Table 32) */
+ SFF_8436_TRANS_START = 131, /* Electric or Optical Compatibility
+ * (Table 33) */
+ SFF_8436_CODE_E1040100G = 131, /* 10/40/100G Ethernet Compliance Code */
+ SFF_8436_CODE_SONET = 132, /* SONET Compliance codes */
+ SFF_8436_CODE_SATA = 133, /* SAS/SATA compliance codes */
+ SFF_8436_CODE_E1G = 134, /* Gigabit Ethernet Compliant codes */
+ SFF_8436_CODE_FC_START = 135, /* FC link/media/speed */
+ SFF_8436_CODE_FC_END = 138,
+ SFF_8436_TRANS_END = 138,
+ SFF_8436_ENCODING = 139, /* Encoding Code for high speed
+ * serial encoding algorithm (see
+ * Table 34) */
+ SFF_8436_BITRATE = 140, /* Nominal signaling rate, units
+ * of 100MBd. */
+ SFF_8436_RATEID = 141, /* Extended RateSelect Compliance
+ * (see Table 35) */
+ SFF_8436_LEN_SMF_KM = 142, /* Link length supported for single
+ * mode fiber, units of km */
+ SFF_8436_LEN_OM3 = 143, /* Link length supported for 850nm
+ * 50um multimode fiber, units of 2 m */
+ SFF_8436_LEN_OM2 = 144, /* Link length supported for 50 um
+ * OM2 fiber, units of 1 m */
+ SFF_8436_LEN_OM1 = 145, /* Link length supported for 1310 nm
+ * 50um multi-mode fiber, units of 1m*/
+ SFF_8436_LEN_ASM = 144, /* Link length of passive cable assembly
+ * Length is specified as in the INF
+ * 8074, units of 1m. 0 means this is
+ * not value assembly. Value of 255
+ * means thet the Module supports length
+ * greater than 254 m. */
+ SFF_8436_DEV_TECH = 147, /* Device/transmitter technology,
+ * see Table 36/37 */
+ SFF_8436_VENDOR_START = 148, /* Vendor name, 16 bytes, padded
+ * right with 0x20 */
+ SFF_8436_VENDOR_END = 163,
+ SFF_8436_EXTMODCODE = 164, /* Extended module code, Table 164 */
+ SFF_8436_VENDOR_OUI_START = 165 , /* Vendor OUI SFP vendor IEEE
+ * company ID */
+ SFF_8436_VENDOR_OUI_END = 167,
+ SFF_8436_PN_START = 168, /* Vendor PN, padded right with 0x20 */
+ SFF_8436_PN_END = 183,
+ SFF_8436_REV_START = 184, /* Vendor Revision, padded right 0x20 */
+ SFF_8436_REV_END = 185,
+ SFF_8436_WAVELEN_START = 186, /* Wavelength Laser wavelength
+ * (Passive/Active Cable
+ * Specification Compliance) */
+ SFF_8436_WAVELEN_END = 189,
+ SFF_8436_MAX_CASE_TEMP = 190, /* Allows to specify maximum temp
+ * above 70C. Maximum case temperature is
+ * an 8-bit value in Degrees C. A value
+ *of 0 implies the standard 70C rating.*/
+ SFF_8436_CC_BASE = 191, /* CC_BASE Check code for Base ID
+ * Fields (first 63 bytes) */
+ /* Extended ID fields */
+ SFF_8436_OPTIONS_START = 192, /* Options Indicates which optional
+ * transceiver signals are
+ * implemented (see Table 39) */
+ SFF_8436_OPTIONS_END = 195,
+ SFF_8436_SN_START = 196, /* Vendor SN, riwght padded with 0x20 */
+ SFF_8436_SN_END = 211,
+ SFF_8436_DATE_START = 212, /* Vendor’s manufacturing date code
+ * (see Table 40) */
+ SFF_8436_DATE_END = 219,
+ SFF_8436_DIAG_TYPE = 220, /* Diagnostic Monitoring Type
+ * Indicates which type of
+ * diagnostic monitoring is
+ * implemented (if any) in the
+ * transceiver (see Table 41) */
+
+ SFF_8436_ENHANCED = 221, /* Enhanced Options Indicates which
+ * optional features are implemented
+ * (if any) in the transceiver
+ * (see Table 42) */
+ SFF_8636_BITRATE = 222, /* Nominal bit rate per channel, units
+ * of 250 Mbps */
+ SFF_8436_CC_EXT = 223, /* Check code for the Extended ID
+ * Fields (bytes 192-222 incl) */
+ SFF_8436_VENDOR_RSRVD_START = 224,
+ SFF_8436_VENDOR_RSRVD_END = 255,
+};
+
+
diff --git a/freebsd/sys/net/sff8472.h b/freebsd/sys/net/sff8472.h
new file mode 100644
index 00000000..5c50ea46
--- /dev/null
+++ b/freebsd/sys/net/sff8472.h
@@ -0,0 +1,508 @@
+/*-
+ * Copyright (c) 2013 George V. Neville-Neil
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The following set of constants are from Document SFF-8472
+ * "Diagnostic Monitoring Interface for Optical Transceivers" revision
+ * 11.3 published by the SFF Committee on June 11, 2013
+ *
+ * The SFF standard defines two ranges of addresses, each 255 bytes
+ * long for the storage of data and diagnostics on cables, such as
+ * SFP+ optics and TwinAx cables. The ranges are defined in the
+ * following way:
+ *
+ * Base Address 0xa0 (Identification Data)
+ * 0-95 Serial ID Defined by SFP MSA
+ * 96-127 Vendor Specific Data
+ * 128-255 Reserved
+ *
+ * Base Address 0xa2 (Diagnostic Data)
+ * 0-55 Alarm and Warning Thresholds
+ * 56-95 Cal Constants
+ * 96-119 Real Time Diagnostic Interface
+ * 120-127 Vendor Specific
+ * 128-247 User Writable EEPROM
+ * 248-255 Vendor Specific
+ *
+ * Note that not all addresses are supported. Where support is
+ * optional this is noted and instructions for checking for the
+ * support are supplied.
+ *
+ * All these values are read across an I2C (i squared C) bus. Any
+ * device wishing to read these addresses must first have support for
+ * i2c calls. The Chelsio T4/T5 driver (dev/cxgbe) is one such
+ * driver.
+ */
+
+
+/* Table 3.1 Two-wire interface ID: Data Fields */
+
+enum {
+ SFF_8472_BASE = 0xa0, /* Base address for all our queries. */
+ SFF_8472_ID = 0, /* Transceiver Type (Table 3.2) */
+ SFF_8472_EXT_ID = 1, /* Extended transceiver type (Table 3.3) */
+ SFF_8472_CONNECTOR = 2, /* Connector type (Table 3.4) */
+ SFF_8472_TRANS_START = 3, /* Elec or Optical Compatibility
+ * (Table 3.5) */
+ SFF_8472_TRANS_END = 10,
+ SFF_8472_ENCODING = 11, /* Encoding Code for high speed
+ * serial encoding algorithm (see
+ * Table 3.6) */
+ SFF_8472_BITRATE = 12, /* Nominal signaling rate, units
+ * of 100MBd. (see details for
+ * rates > 25.0Gb/s) */
+ SFF_8472_RATEID = 13, /* Type of rate select
+ * functionality (see Table
+ * 3.6a) */
+ SFF_8472_LEN_SMF_KM = 14, /* Link length supported for single
+ * mode fiber, units of km */
+ SFF_8472_LEN_SMF = 15, /* Link length supported for single
+ * mode fiber, units of 100 m */
+ SFF_8472_LEN_50UM = 16, /* Link length supported for 50 um
+ * OM2 fiber, units of 10 m */
+ SFF_8472_LEN_625UM = 17, /* Link length supported for 62.5
+ * um OM1 fiber, units of 10 m */
+ SFF_8472_LEN_OM4 = 18, /* Link length supported for 50um
+ * OM4 fiber, units of 10m.
+ * Alternatively copper or direct
+ * attach cable, units of m */
+ SFF_8472_LEN_OM3 = 19, /* Link length supported for 50 um OM3 fiber, units of 10 m */
+ SFF_8472_VENDOR_START = 20, /* Vendor name [Address A0h, Bytes
+ * 20-35] */
+ SFF_8472_VENDOR_END = 35,
+ SFF_8472_TRANS = 36, /* Transceiver Code for electronic
+ * or optical compatibility (see
+ * Table 3.5) */
+ SFF_8472_VENDOR_OUI_START = 37, /* Vendor OUI SFP vendor IEEE
+ * company ID */
+ SFF_8472_VENDOR_OUI_END = 39,
+ SFF_8472_PN_START = 40, /* Vendor PN */
+ SFF_8472_PN_END = 55,
+ SFF_8472_REV_START = 56, /* Vendor Revision */
+ SFF_8472_REV_END = 59,
+ SFF_8472_WAVELEN_START = 60, /* Wavelength Laser wavelength
+ * (Passive/Active Cable
+ * Specification Compliance) */
+ SFF_8472_WAVELEN_END = 61,
+ SFF_8472_CC_BASE = 63, /* CC_BASE Check code for Base ID
+ * Fields (addresses 0 to 62) */
+
+/*
+ * Extension Fields (optional) check the options before reading other
+ * addresses.
+ */
+ SFF_8472_OPTIONS_MSB = 64, /* Options Indicates which optional
+ * transceiver signals are
+ * implemented */
+ SFF_8472_OPTIONS_LSB = 65, /* (see Table 3.7) */
+ SFF_8472_BR_MAX = 66, /* BR max Upper bit rate margin,
+ * units of % (see details for
+ * rates > 25.0Gb/s) */
+ SFF_8472_BR_MIN = 67, /* Lower bit rate margin, units of
+ * % (see details for rates >
+ * 25.0Gb/s) */
+ SFF_8472_SN_START = 68, /* Vendor SN [Address A0h, Bytes 68-83] */
+ SFF_8472_SN_END = 83,
+ SFF_8472_DATE_START = 84, /* Date code Vendor’s manufacturing
+ * date code (see Table 3.8) */
+ SFF_8472_DATE_END = 91,
+ SFF_8472_DIAG_TYPE = 92, /* Diagnostic Monitoring Type
+ * Indicates which type of
+ * diagnostic monitoring is
+ * implemented (if any) in the
+ * transceiver (see Table 3.9)
+ */
+
+ SFF_8472_ENHANCED = 93, /* Enhanced Options Indicates which
+ * optional enhanced features are
+ * implemented (if any) in the
+ * transceiver (see Table 3.10) */
+ SFF_8472_COMPLIANCE = 94, /* SFF-8472 Compliance Indicates
+ * which revision of SFF-8472 the
+ * transceiver complies with. (see
+ * Table 3.12)*/
+ SFF_8472_CC_EXT = 95, /* Check code for the Extended ID
+ * Fields (addresses 64 to 94)
+ */
+
+ SFF_8472_VENDOR_RSRVD_START = 96,
+ SFF_8472_VENDOR_RSRVD_END = 127,
+
+ SFF_8472_RESERVED_START = 128,
+ SFF_8472_RESERVED_END = 255
+};
+
+#define SFF_8472_DIAG_IMPL (1 << 6) /* Required to be 1 */
+#define SFF_8472_DIAG_INTERNAL (1 << 5) /* Internal measurements. */
+#define SFF_8472_DIAG_EXTERNAL (1 << 4) /* External measurements. */
+#define SFF_8472_DIAG_POWER (1 << 3) /* Power measurement type */
+#define SFF_8472_DIAG_ADDR_CHG (1 << 2) /* Address change required.
+ * See SFF-8472 doc. */
+
+ /*
+ * Diagnostics are available at the two wire address 0xa2. All
+ * diagnostics are OPTIONAL so you should check 0xa0 registers 92 to
+ * see which, if any are supported.
+ */
+
+enum {SFF_8472_DIAG = 0xa2}; /* Base address for diagnostics. */
+
+ /*
+ * Table 3.15 Alarm and Warning Thresholds All values are 2 bytes
+ * and MUST be read in a single read operation starting at the MSB
+ */
+
+enum {
+ SFF_8472_TEMP_HIGH_ALM = 0, /* Temp High Alarm */
+ SFF_8472_TEMP_LOW_ALM = 2, /* Temp Low Alarm */
+ SFF_8472_TEMP_HIGH_WARN = 4, /* Temp High Warning */
+ SFF_8472_TEMP_LOW_WARN = 6, /* Temp Low Warning */
+ SFF_8472_VOLTAGE_HIGH_ALM = 8, /* Voltage High Alarm */
+ SFF_8472_VOLTAGE_LOW_ALM = 10, /* Voltage Low Alarm */
+ SFF_8472_VOLTAGE_HIGH_WARN = 12, /* Voltage High Warning */
+ SFF_8472_VOLTAGE_LOW_WARN = 14, /* Voltage Low Warning */
+ SFF_8472_BIAS_HIGH_ALM = 16, /* Bias High Alarm */
+ SFF_8472_BIAS_LOW_ALM = 18, /* Bias Low Alarm */
+ SFF_8472_BIAS_HIGH_WARN = 20, /* Bias High Warning */
+ SFF_8472_BIAS_LOW_WARN = 22, /* Bias Low Warning */
+ SFF_8472_TX_POWER_HIGH_ALM = 24, /* TX Power High Alarm */
+ SFF_8472_TX_POWER_LOW_ALM = 26, /* TX Power Low Alarm */
+ SFF_8472_TX_POWER_HIGH_WARN = 28, /* TX Power High Warning */
+ SFF_8472_TX_POWER_LOW_WARN = 30, /* TX Power Low Warning */
+ SFF_8472_RX_POWER_HIGH_ALM = 32, /* RX Power High Alarm */
+ SFF_8472_RX_POWER_LOW_ALM = 34, /* RX Power Low Alarm */
+ SFF_8472_RX_POWER_HIGH_WARN = 36, /* RX Power High Warning */
+ SFF_8472_RX_POWER_LOW_WARN = 38, /* RX Power Low Warning */
+
+ SFF_8472_RX_POWER4 = 56, /* Rx_PWR(4) Single precision
+ * floating point calibration data
+ * - Rx optical power. Bit 7 of
+ * byte 56 is MSB. Bit 0 of byte
+ * 59 is LSB. Rx_PWR(4) should be
+ * set to zero for “internally
+ * calibrated†devices. */
+ SFF_8472_RX_POWER3 = 60, /* Rx_PWR(3) Single precision
+ * floating point calibration data
+ * - Rx optical power. Bit 7 of
+ * byte 60 is MSB. Bit 0 of byte 63
+ * is LSB. Rx_PWR(3) should be set
+ * to zero for “internally
+ * calibrated†devices.*/
+ SFF_8472_RX_POWER2 = 64, /* Rx_PWR(2) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 64 is MSB, bit 0 of byte 67 is
+ * LSB. Rx_PWR(2) should be set to
+ * zero for “internally calibratedâ€
+ * devices. */
+ SFF_8472_RX_POWER1 = 68, /* Rx_PWR(1) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 68 is MSB, bit 0 of byte 71 is
+ * LSB. Rx_PWR(1) should be set to
+ * 1 for “internally calibratedâ€
+ * devices. */
+ SFF_8472_RX_POWER0 = 72, /* Rx_PWR(0) Single precision
+ * floating point calibration data,
+ * Rx optical power. Bit 7 of byte
+ * 72 is MSB, bit 0 of byte 75 is
+ * LSB. Rx_PWR(0) should be set to
+ * zero for “internally calibratedâ€
+ * devices. */
+ SFF_8472_TX_I_SLOPE = 76, /* Tx_I(Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * laser bias current. Bit 7 of
+ * byte 76 is MSB, bit 0 of byte 77
+ * is LSB. Tx_I(Slope) should be
+ * set to 1 for “internally
+ * calibrated†devices. */
+ SFF_8472_TX_I_OFFSET = 78, /* Tx_I(Offset) Fixed decimal
+ * (signed two’s complement)
+ * calibration data, laser bias
+ * current. Bit 7 of byte 78 is
+ * MSB, bit 0 of byte 79 is
+ * LSB. Tx_I(Offset) should be set
+ * to zero for “internally
+ * calibrated†devices. */
+ SFF_8472_TX_POWER_SLOPE = 80, /* Tx_PWR(Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * transmitter coupled output
+ * power. Bit 7 of byte 80 is MSB,
+ * bit 0 of byte 81 is LSB.
+ * Tx_PWR(Slope) should be set to 1
+ * for “internally calibratedâ€
+ * devices. */
+ SFF_8472_TX_POWER_OFFSET = 82, /* Tx_PWR(Offset) Fixed decimal
+ * (signed two’s complement)
+ * calibration data, transmitter
+ * coupled output power. Bit 7 of
+ * byte 82 is MSB, bit 0 of byte 83
+ * is LSB. Tx_PWR(Offset) should be
+ * set to zero for “internally
+ * calibrated†devices. */
+ SFF_8472_T_SLOPE = 84, /* T (Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * internal module temperature. Bit
+ * 7 of byte 84 is MSB, bit 0 of
+ * byte 85 is LSB. T(Slope) should
+ * be set to 1 for “internally
+ * calibrated†devices. */
+ SFF_8472_T_OFFSET = 86, /* T (Offset) Fixed decimal (signed
+ * two’s complement) calibration
+ * data, internal module
+ * temperature. Bit 7 of byte 86 is
+ * MSB, bit 0 of byte 87 is LSB.
+ * T(Offset) should be set to zero
+ * for “internally calibratedâ€
+ * devices. */
+ SFF_8472_V_SLOPE = 88, /* V (Slope) Fixed decimal
+ * (unsigned) calibration data,
+ * internal module supply
+ * voltage. Bit 7 of byte 88 is
+ * MSB, bit 0 of byte 89 is
+ * LSB. V(Slope) should be set to 1
+ * for “internally calibratedâ€
+ * devices. */
+ SFF_8472_V_OFFSET = 90, /* V (Offset) Fixed decimal (signed
+ * two’s complement) calibration
+ * data, internal module supply
+ * voltage. Bit 7 of byte 90 is
+ * MSB. Bit 0 of byte 91 is
+ * LSB. V(Offset) should be set to
+ * zero for “internally calibratedâ€
+ * devices. */
+ SFF_8472_CHECKSUM = 95, /* Checksum Byte 95 contains the
+ * low order 8 bits of the sum of
+ * bytes 0 – 94. */
+ /* Internal measurements. */
+
+ SFF_8472_TEMP = 96, /* Internally measured module temperature. */
+ SFF_8472_VCC = 98, /* Internally measured supply
+ * voltage in transceiver.
+ */
+ SFF_8472_TX_BIAS = 100, /* Internally measured TX Bias Current. */
+ SFF_8472_TX_POWER = 102, /* Measured TX output power. */
+ SFF_8472_RX_POWER = 104, /* Measured RX input power. */
+
+ SFF_8472_STATUS = 110 /* See below */
+};
+ /* Status Bits Described */
+
+/*
+ * TX Disable State Digital state of the TX Disable Input Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_DISABLE (1 << 7)
+
+/*
+ * Select Read/write bit that allows software disable of
+ * laser. Writing ‘1’ disables laser. See Table 3.11 for
+ * enable/disable timing requirements. This bit is “ORâ€d with the hard
+ * TX_DISABLE pin value. Note, per SFP MSA TX_DISABLE pin is default
+ * enabled unless pulled low by hardware. If Soft TX Disable is not
+ * implemented, the transceiver ignores the value of this bit. Default
+ * power up value is zero/low.
+ */
+#define SFF_8472_STATUS_SOFT_TX_DISABLE (1 << 6)
+
+/*
+ * RS(1) State Digital state of SFP input pin AS(1) per SFF-8079 or
+ * RS(1) per SFF-8431. Updated within 100ms of change on pin. See A2h
+ * Byte 118, Bit 3 for Soft RS(1) Select control information.
+ */
+#define SFF_8472_RS_STATE (1 << 5)
+
+/*
+ * Rate_Select State [aka. “RS(0)â€] Digital state of the SFP
+ * Rate_Select Input Pin. Updated within 100ms of change on pin. Note:
+ * This pin is also known as AS(0) in SFF-8079 and RS(0) in SFF-8431.
+ */
+#define SFF_8472_STATUS_SELECT_STATE (1 << 4)
+
+/*
+ * Read/write bit that allows software rate select control. Writing
+ * ‘1’ selects full bandwidth operation. This bit is “OR’d with the
+ * hard Rate_Select, AS(0) or RS(0) pin value. See Table 3.11 for
+ * timing requirements. Default at power up is logic zero/low. If Soft
+ * Rate Select is not implemented, the transceiver ignores the value
+ * of this bit. Note: Specific transceiver behaviors of this bit are
+ * identified in Table 3.6a and referenced documents. See Table 3.18a,
+ * byte 118, bit 3 for Soft RS(1) Select.
+ */
+#define SFF_8472_STATUS_SOFT_RATE_SELECT (1 << 3)
+
+/*
+ * TX Fault State Digital state of the TX Fault Output Pin. Updated
+ * within 100ms of change on pin.
+ */
+#define SFF_8472_STATUS_TX_FAULT_STATE (1 << 2)
+
+/*
+ * Digital state of the RX_LOS Output Pin. Updated within 100ms of
+ * change on pin.
+ */
+#define SFF_8472_STATUS_RX_LOS (1 << 1)
+
+/*
+ * Indicates transceiver has achieved power up and data is ready. Bit
+ * remains high until data is ready to be read at which time the
+ * device sets the bit low.
+ */
+#define SFF_8472_STATUS_DATA_READY (1 << 0)
+
+/*
+ * Table 3.2 Identifier values.
+ * Identifier constants has taken from SFF-8024 rev 2.9 table 4.1
+ * (as referenced by table 3.2 footer)
+ * */
+enum {
+ SFF_8024_ID_UNKNOWN = 0x0, /* Unknown or unspecified */
+ SFF_8024_ID_GBIC = 0x1, /* GBIC */
+ SFF_8024_ID_SFF = 0x2, /* Module soldered to motherboard (ex: SFF)*/
+ SFF_8024_ID_SFP = 0x3, /* SFP or SFP “Plus†*/
+ SFF_8024_ID_XBI = 0x4, /* 300 pin XBI */
+ SFF_8024_ID_XENPAK = 0x5, /* Xenpak */
+ SFF_8024_ID_XFP = 0x6, /* XFP */
+ SFF_8024_ID_XFF = 0x7, /* XFF */
+ SFF_8024_ID_XFPE = 0x8, /* XFP-E */
+ SFF_8024_ID_XPAK = 0x9, /* XPAk */
+ SFF_8024_ID_X2 = 0xA, /* X2 */
+ SFF_8024_ID_DWDM_SFP = 0xB, /* DWDM-SFP */
+ SFF_8024_ID_QSFP = 0xC, /* QSFP */
+ SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ */
+ SFF_8024_ID_CXP = 0xE, /* CXP */
+ SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
+ SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
+ SFF_8024_ID_QSFP28 = 0x11, /* QSFP28 */
+ SFF_8024_ID_CXP2 = 0x12, /* CXP2 (aka CXP28) */
+ SFF_8024_ID_CDFP = 0x13, /* CDFP (Style 1/Style 2) */
+ SFF_8024_ID_SMM4 = 0x14, /* Shielded Mini Multilate HD 4X Fanout */
+ SFF_8024_ID_SMM8 = 0x15, /* Shielded Mini Multilate HD 8X Fanout */
+ SFF_8024_ID_CDFP3 = 0x16, /* CDFP (Style3) */
+ SFF_8024_ID_LAST = SFF_8024_ID_CDFP3
+ };
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
+ "GBIC",
+ "SFF",
+ "SFP/SFP+/SFP28",
+ "XBI",
+ "Xenpak",
+ "XFP",
+ "XFF",
+ "XFP-E",
+ "XPAK",
+ "X2",
+ "DWDM-SFP/SFP+",
+ "QSFP",
+ "QSFP+",
+ "CXP",
+ "HD4X",
+ "HD8X",
+ "QSFP28",
+ "CXP2",
+ "CDFP",
+ "SMM4",
+ "SMM8",
+ "CDFP3"};
+
+/* Keep compatibility with old definitions */
+#define SFF_8472_ID_UNKNOWN SFF_8024_ID_UNKNOWN
+#define SFF_8472_ID_GBIC SFF_8024_ID_GBIC
+#define SFF_8472_ID_SFF SFF_8024_ID_SFF
+#define SFF_8472_ID_SFP SFF_8024_ID_SFP
+#define SFF_8472_ID_XBI SFF_8024_ID_XBI
+#define SFF_8472_ID_XENPAK SFF_8024_ID_XENPAK
+#define SFF_8472_ID_XFP SFF_8024_ID_XFP
+#define SFF_8472_ID_XFF SFF_8024_ID_XFF
+#define SFF_8472_ID_XFPE SFF_8024_ID_XFPE
+#define SFF_8472_ID_XPAK SFF_8024_ID_XPAK
+#define SFF_8472_ID_X2 SFF_8024_ID_X2
+#define SFF_8472_ID_DWDM_SFP SFF_8024_ID_DWDM_SFP
+#define SFF_8472_ID_QSFP SFF_8024_ID_QSFP
+#define SFF_8472_ID_LAST SFF_8024_ID_LAST
+
+#define sff_8472_id sff_8024_id
+
+/*
+ * Table 3.9 Diagnostic Monitoring Type (byte 92)
+ * bits described.
+ */
+
+/*
+ * Digital diagnostic monitoring implemented.
+ * Set to 1 for transceivers implementing DDM.
+ */
+#define SFF_8472_DDM_DONE (1 << 6)
+
+/*
+ * Measurements are internally calibrated.
+ */
+#define SFF_8472_DDM_INTERNAL (1 << 5)
+
+/*
+ * Measurements are externally calibrated.
+ */
+#define SFF_8472_DDM_EXTERNAL (1 << 4)
+
+/*
+ * Received power measurement type
+ * 0 = OMA, 1 = average power
+ */
+#define SFF_8472_DDM_PMTYPE (1 << 3)
+
+/* Table 3.13 and 3.14 Temperature Conversion Values */
+#define SFF_8472_TEMP_SIGN (1 << 15)
+#define SFF_8472_TEMP_SHIFT 8
+#define SFF_8472_TEMP_MSK 0xEF00
+#define SFF_8472_TEMP_FRAC 0x00FF
+
+/* Internal Callibration Conversion factors */
+
+/*
+ * Represented as a 16 bit unsigned integer with the voltage defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 100 uVolt,
+ * yielding a total range of 0 to +6.55 Volts.
+ */
+#define SFF_8472_VCC_FACTOR 10000.0
+
+/*
+ * Represented as a 16 bit unsigned integer with the current defined
+ * as the full 16 bit value (0 – 65535) with LSB equal to 2 uA,
+ * yielding a total range of 0 to 131 mA.
+ */
+
+#define SFF_8472_BIAS_FACTOR 2000.0
+
+/*
+ * Represented as a 16 bit unsigned integer with the power defined as
+ * the full 16 bit value (0 – 65535) with LSB equal to 0.1 uW,
+ * yielding a total range of 0 to 6.5535 mW (~ -40 to +8.2 dBm).
+ */
+
+#define SFF_8472_POWER_FACTOR 10000.0
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index 01e26cdb..3e186c12 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -70,6 +70,7 @@ struct vnet {
u_int vnet_magic_n;
u_int vnet_ifcnt;
u_int vnet_sockcnt;
+ u_int vnet_state; /* SI_SUB_* */
void *vnet_data_mem;
uintptr_t vnet_data_base;
};
@@ -85,6 +86,61 @@ struct vnet {
#ifdef _KERNEL
+#define VNET_PCPUSTAT_DECLARE(type, name) \
+ VNET_DECLARE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_DEFINE(type, name) \
+ VNET_DEFINE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_ALLOC(name, wait) \
+ COUNTER_ARRAY_ALLOC(VNET(name), \
+ sizeof(VNET(name)) / sizeof(counter_u64_t), (wait))
+
+#define VNET_PCPUSTAT_FREE(name) \
+ COUNTER_ARRAY_FREE(VNET(name), sizeof(VNET(name)) / sizeof(counter_u64_t))
+
+#define VNET_PCPUSTAT_ADD(type, name, f, v) \
+ counter_u64_add(VNET(name)[offsetof(type, f) / sizeof(uint64_t)], (v))
+
+#define VNET_PCPUSTAT_FETCH(type, name, f) \
+ counter_u64_fetch(VNET(name)[offsetof(type, f) / sizeof(uint64_t)])
+
+#define VNET_PCPUSTAT_SYSINIT(name) \
+static void \
+vnet_##name##_init(const void *unused) \
+{ \
+ VNET_PCPUSTAT_ALLOC(name, M_WAITOK); \
+} \
+VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_INIT_IF, \
+ SI_ORDER_FIRST, vnet_ ## name ## _init, NULL)
+
+#define VNET_PCPUSTAT_SYSUNINIT(name) \
+static void \
+vnet_##name##_uninit(const void *unused) \
+{ \
+ VNET_PCPUSTAT_FREE(name); \
+} \
+VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_INIT_IF, \
+ SI_ORDER_FIRST, vnet_ ## name ## _uninit, NULL)
+
+#ifdef SYSCTL_OID
+#define SYSCTL_VNET_PCPUSTAT(parent, nbr, name, type, array, desc) \
+static int \
+array##_sysctl(SYSCTL_HANDLER_ARGS) \
+{ \
+ type s; \
+ CTASSERT((sizeof(type) / sizeof(uint64_t)) == \
+ (sizeof(VNET(array)) / sizeof(counter_u64_t))); \
+ COUNTER_ARRAY_COPY(VNET(array), &s, sizeof(type) / sizeof(uint64_t));\
+ if (req->newptr) \
+ COUNTER_ARRAY_ZERO(VNET(array), \
+ sizeof(type) / sizeof(uint64_t)); \
+ return (SYSCTL_OUT(req, &s, sizeof(type))); \
+} \
+SYSCTL_PROC(parent, nbr, name, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RW, \
+ NULL, 0, array ## _sysctl, "I", desc)
+#endif /* SYSCTL_OID */
+
#ifdef VIMAGE
#include <rtems/bsd/sys/lock.h>
#include <sys/proc.h> /* for struct thread */
@@ -233,53 +289,6 @@ void vnet_data_copy(void *start, int size);
void vnet_data_free(void *start_arg, int size);
/*
- * Sysctl variants for vnet-virtualized global variables. Include
- * <sys/sysctl.h> to expose these definitions.
- *
- * Note: SYSCTL_PROC() handler functions will need to resolve pointer
- * arguments themselves, if required.
- */
-#ifdef SYSCTL_OID
-int vnet_sysctl_handle_int(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_string(SYSCTL_HANDLER_ARGS);
-int vnet_sysctl_handle_uint(SYSCTL_HANDLER_ARGS);
-
-#define SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
- ptr, val, vnet_sysctl_handle_int, "I", descr)
-#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
- fmt, descr) \
- CTASSERT(((access) & CTLTYPE) != 0); \
- SYSCTL_OID(parent, nbr, name, CTLFLAG_VNET|(access), ptr, arg, \
- handler, fmt, descr)
-#define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \
- descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, len, \
- vnet_sysctl_handle_opaque, fmt, descr)
-#define SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_STRING|CTLFLAG_VNET|(access), \
- arg, len, vnet_sysctl_handle_string, "A", descr)
-#define SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_OPAQUE|CTLFLAG_VNET|(access), ptr, \
- sizeof(struct type), vnet_sysctl_handle_opaque, "S," #type, \
- descr)
-#define SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_UINT|CTLFLAG_MPSAFE|CTLFLAG_VNET|(access), \
- ptr, val, vnet_sysctl_handle_uint, "IU", descr)
-#define VNET_SYSCTL_ARG(req, arg1) do { \
- if (arg1 != NULL) \
- arg1 = (void *)(TD_TO_VNET((req)->td)->vnet_data_base + \
- (uintptr_t)(arg1)); \
-} while (0)
-#endif /* SYSCTL_OID */
-
-/*
* Virtual sysinit mechanism, allowing network stack components to declare
* startup and shutdown methods to be run when virtual network stack
* instances are created and destroyed.
@@ -402,29 +411,6 @@ do { \
#define VNET(n) (n)
/*
- * When VIMAGE isn't compiled into the kernel, virtaulized SYSCTLs simply
- * become normal SYSCTLs.
- */
-#ifdef SYSCTL_OID
-#define SYSCTL_VNET_INT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_INT(parent, nbr, name, access, ptr, val, descr)
-#define SYSCTL_VNET_PROC(parent, nbr, name, access, ptr, arg, handler, \
- fmt, descr) \
- SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, \
- descr)
-#define SYSCTL_VNET_OPAQUE(parent, nbr, name, access, ptr, len, fmt, \
- descr) \
- SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr)
-#define SYSCTL_VNET_STRING(parent, nbr, name, access, arg, len, descr) \
- SYSCTL_STRING(parent, nbr, name, access, arg, len, descr)
-#define SYSCTL_VNET_STRUCT(parent, nbr, name, access, ptr, type, descr) \
- SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr)
-#define SYSCTL_VNET_UINT(parent, nbr, name, access, ptr, val, descr) \
- SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr)
-#define VNET_SYSCTL_ARG(req, arg1)
-#endif /* SYSCTL_OID */
-
-/*
* When VIMAGE isn't compiled into the kernel, VNET_SYSINIT/VNET_SYSUNINIT
* map into normal sysinits, which have the same ordering properties.
*/
diff --git a/freebsd/sys/netgraph/ng_socket.h b/freebsd/sys/netgraph/ng_socket.h
new file mode 100644
index 00000000..b7e83101
--- /dev/null
+++ b/freebsd/sys/netgraph/ng_socket.h
@@ -0,0 +1,69 @@
+/*
+ * ng_socket.h
+ */
+
+/*-
+ * Copyright (c) 1996-1999 Whistle Communications, Inc.
+ * All rights reserved.
+ *
+ * Subject to the following obligations and disclaimer of warranty, use and
+ * redistribution of this software, in source or object code forms, with or
+ * without modifications are expressly permitted by Whistle Communications;
+ * provided, however, that:
+ * 1. Any and all reproductions of the source or object code must include the
+ * copyright notice above and the following disclaimer of warranties; and
+ * 2. No rights are granted, in any manner or form, to use Whistle
+ * Communications, Inc. trademarks, including the mark "WHISTLE
+ * COMMUNICATIONS" on advertising, endorsements, or otherwise except as
+ * such appears in the above copyright notice or in the software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
+ * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
+ * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
+ * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
+ * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
+ * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
+ * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
+ * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * Author: Julian Elischer <julian@freebsd.org>
+ *
+ * $FreeBSD$
+ * $Whistle: ng_socket.h,v 1.5 1999/01/20 00:22:14 archie Exp $
+ */
+
+#ifndef _NETGRAPH_NG_SOCKET_H_
+#define _NETGRAPH_NG_SOCKET_H_
+
+/* Netgraph node type name and cookie */
+#define NG_SOCKET_NODE_TYPE "socket"
+#define NGM_SOCKET_COOKIE 851601233
+
+/* Netgraph socket(2) constants */
+#define NG_DATA 1
+#define NG_CONTROL 2
+
+/* Commands */
+enum {
+ NGM_SOCK_CMD_NOLINGER = 1, /* close the socket with last hook */
+ NGM_SOCK_CMD_LINGER /* Keep socket even if 0 hooks */
+};
+
+/* Netgraph version of struct sockaddr */
+struct sockaddr_ng {
+ unsigned char sg_len; /* total length */
+ sa_family_t sg_family; /* address family */
+ char sg_data[14]; /* actually longer; address value */
+};
+
+#endif /* _NETGRAPH_NG_SOCKET_H_ */
+
diff --git a/freebsd/sys/netinet/accf_dns.c b/freebsd/sys/netinet/accf_dns.c
index 9858db4e..b6d2ff63 100644
--- a/freebsd/sys/netinet/accf_dns.c
+++ b/freebsd/sys/netinet/accf_dns.c
@@ -77,7 +77,7 @@ sohasdns(struct socket *so, void *arg, int waitflag)
struct sockbuf *sb = &so->so_rcv;
/* If the socket is full, we're ready. */
- if (sb->sb_cc >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax)
+ if (sbused(sb) >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax)
goto ready;
/* Check to see if we have a request. */
@@ -117,14 +117,14 @@ skippacket(struct sockbuf *sb) {
unsigned long packlen;
struct packet q, *p = &q;
- if (sb->sb_cc < 2)
+ if (sbavail(sb) < 2)
return DNS_WAIT;
q.m = sb->sb_mb;
q.n = q.m->m_nextpkt;
q.moff = 0;
q.offset = 0;
- q.len = sb->sb_cc;
+ q.len = sbavail(sb);
GET16(p, packlen);
if (packlen + 2 > q.len)
diff --git a/freebsd/sys/netinet/accf_http.c b/freebsd/sys/netinet/accf_http.c
index 3af867b0..83093db3 100644
--- a/freebsd/sys/netinet/accf_http.c
+++ b/freebsd/sys/netinet/accf_http.c
@@ -94,7 +94,7 @@ sbfull(struct sockbuf *sb)
"mbcnt(%ld) >= mbmax(%ld): %d",
sb->sb_cc, sb->sb_hiwat, sb->sb_cc >= sb->sb_hiwat,
sb->sb_mbcnt, sb->sb_mbmax, sb->sb_mbcnt >= sb->sb_mbmax);
- return (sb->sb_cc >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax);
+ return (sbused(sb) >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax);
}
/*
@@ -164,13 +164,14 @@ static int
sohashttpget(struct socket *so, void *arg, int waitflag)
{
- if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 && !sbfull(&so->so_rcv)) {
+ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 &&
+ !sbfull(&so->so_rcv)) {
struct mbuf *m;
char *cmp;
int cmplen, cc;
m = so->so_rcv.sb_mb;
- cc = so->so_rcv.sb_cc - 1;
+ cc = sbavail(&so->so_rcv) - 1;
if (cc < 1)
return (SU_OK);
switch (*mtod(m, char *)) {
@@ -217,7 +218,7 @@ soparsehttpvers(struct socket *so, void *arg, int waitflag)
goto fallout;
m = so->so_rcv.sb_mb;
- cc = so->so_rcv.sb_cc;
+ cc = sbavail(&so->so_rcv);
inspaces = spaces = 0;
for (m = so->so_rcv.sb_mb; m; m = n) {
n = m->m_nextpkt;
@@ -306,7 +307,7 @@ soishttpconnected(struct socket *so, void *arg, int waitflag)
* have NCHRS left
*/
copied = 0;
- ccleft = so->so_rcv.sb_cc;
+ ccleft = sbavail(&so->so_rcv);
if (ccleft < NCHRS)
goto readmore;
a = b = c = '\0';
diff --git a/freebsd/sys/netinet/cc/cc.c b/freebsd/sys/netinet/cc/cc.c
index 4be9a63b..ab3e831e 100644
--- a/freebsd/sys/netinet/cc/cc.c
+++ b/freebsd/sys/netinet/cc/cc.c
@@ -65,13 +65,13 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
#include <sys/sysctl.h>
-#include <net/if.h>
-#include <net/if_var.h>
+#include <net/vnet.h>
-#include <netinet/cc.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
+#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
+#include <netinet/cc/cc.h>
#include <netinet/cc/cc_module.h>
@@ -320,13 +320,14 @@ SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
/* Declare sysctl tree and populate it. */
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
- "congestion control related settings");
+ "Congestion control related settings");
-SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW,
- NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
+SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
+ CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW,
+ NULL, 0, cc_default_algo, "A", "Default congestion control algorithm");
#ifndef __rtems__
SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
NULL, 0, cc_list_available, "A",
- "list available congestion control algorithms");
+ "List available congestion control algorithms");
#endif /* __rtems__ */
diff --git a/freebsd/sys/netinet/cc.h b/freebsd/sys/netinet/cc/cc.h
index 14b4a9de..1da6f620 100644
--- a/freebsd/sys/netinet/cc.h
+++ b/freebsd/sys/netinet/cc/cc.h
@@ -48,11 +48,12 @@
* http://caia.swin.edu.au/urp/newtcp/
*/
-#ifndef _NETINET_CC_H_
-#define _NETINET_CC_H_
+#ifndef _NETINET_CC_CC_H_
+#define _NETINET_CC_CC_H_
-/* XXX: TCP_CA_NAME_MAX define lives in tcp.h for compat reasons. */
-#include <netinet/tcp.h>
+#if !defined(_KERNEL)
+#error "no user-servicable parts inside"
+#endif
/* Global CC vars. */
extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
@@ -90,6 +91,10 @@ struct cc_var {
/* cc_var flags. */
#define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */
#define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */
+#define CCF_DELACK 0x0004 /* Is this ack delayed? */
+#define CCF_ACKNOW 0x0008 /* Will this ack be sent now? */
+#define CCF_IPHDR_CE 0x0010 /* Does this packet set CE bit? */
+#define CCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */
/* ACK types passed to the ack_received() hook. */
#define CC_ACK 0x0001 /* Regular in sequence ACK. */
@@ -143,6 +148,12 @@ struct cc_algo {
/* Called when data transfer resumes after an idle period. */
void (*after_idle)(struct cc_var *ccv);
+ /* Called for an additional ECN processing apart from RFC3168. */
+ void (*ecnpkt_handler)(struct cc_var *ccv);
+
+ /* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */
+ int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
+
STAILQ_ENTRY (cc_algo) entries;
};
@@ -164,4 +175,4 @@ extern struct rwlock cc_list_lock;
#define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock)
#define CC_LIST_LOCK_ASSERT() rw_assert(&cc_list_lock, RA_LOCKED)
-#endif /* _NETINET_CC_H_ */
+#endif /* _NETINET_CC_CC_H_ */
diff --git a/freebsd/sys/netinet/cc/cc_newreno.c b/freebsd/sys/netinet/cc/cc_newreno.c
index c0f0cfc5..8077bb22 100644
--- a/freebsd/sys/netinet/cc/cc_newreno.c
+++ b/freebsd/sys/netinet/cc/cc_newreno.c
@@ -64,10 +64,10 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
-#include <netinet/cc.h>
+#include <netinet/tcp.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_var.h>
-
+#include <netinet/cc/cc.h>
#include <netinet/cc/cc_module.h>
static void newreno_ack_received(struct cc_var *ccv, uint16_t type);
@@ -216,6 +216,9 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
static void
newreno_post_recovery(struct cc_var *ccv)
{
+ int pipe;
+ pipe = 0;
+
if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
/*
* Fast recovery will conclude after returning from this
@@ -226,10 +229,13 @@ newreno_post_recovery(struct cc_var *ccv)
*
* XXXLAS: Find a way to do this without needing curack
*/
- if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh),
- CCV(ccv, snd_max)))
- CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) -
- ccv->curack + CCV(ccv, t_maxseg);
+ if (V_tcp_do_rfc6675_pipe)
+ pipe = tcp_compute_pipe(ccv->ccvc.tcp);
+ else
+ pipe = CCV(ccv, snd_max) - ccv->curack;
+
+ if (pipe < CCV(ccv, snd_ssthresh))
+ CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg);
else
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
}
diff --git a/freebsd/sys/netinet/icmp6.h b/freebsd/sys/netinet/icmp6.h
index 5483721d..af35c847 100644
--- a/freebsd/sys/netinet/icmp6.h
+++ b/freebsd/sys/netinet/icmp6.h
@@ -144,6 +144,9 @@ struct icmp6_hdr {
#define ICMP6_DST_UNREACH_BEYONDSCOPE 2 /* beyond scope of source address */
#define ICMP6_DST_UNREACH_ADDR 3 /* address unreachable */
#define ICMP6_DST_UNREACH_NOPORT 4 /* port unreachable */
+#define ICMP6_DST_UNREACH_POLICY 5 /* failed ingress/egress policy */
+#define ICMP6_DST_UNREACH_REJECT 6 /* Reject route to destination */
+#define ICMP6_DST_UNREACH_SRCROUTE 7 /* Error in source routing header */
#define ICMP6_TIME_EXCEED_TRANSIT 0 /* ttl==0 in transit */
#define ICMP6_TIME_EXCEED_REASSEMBLY 1 /* ttl==0 in reass */
@@ -297,9 +300,11 @@ struct nd_opt_hdr { /* Neighbor discovery option header */
#define ND_OPT_PREFIX_INFORMATION 3
#define ND_OPT_REDIRECTED_HEADER 4
#define ND_OPT_MTU 5
+#define ND_OPT_NONCE 14 /* RFC 3971 */
#define ND_OPT_ROUTE_INFO 24 /* RFC 4191 */
#define ND_OPT_RDNSS 25 /* RFC 6106 */
#define ND_OPT_DNSSL 31 /* RFC 6106 */
+#define ND_OPT_MAX 31
struct nd_opt_prefix_info { /* prefix information */
u_int8_t nd_opt_pi_type;
@@ -330,6 +335,16 @@ struct nd_opt_mtu { /* MTU option */
u_int32_t nd_opt_mtu_mtu;
} __packed;
+#define ND_OPT_NONCE_LEN ((1 * 8) - 2)
+#if ((ND_OPT_NONCE_LEN + 2) % 8) != 0
+#error "(ND_OPT_NONCE_LEN + 2) must be a multiple of 8."
+#endif
+struct nd_opt_nonce { /* nonce option */
+ u_int8_t nd_opt_nonce_type;
+ u_int8_t nd_opt_nonce_len;
+ u_int8_t nd_opt_nonce[ND_OPT_NONCE_LEN];
+} __packed;
+
struct nd_opt_route_info { /* route info */
u_int8_t nd_opt_rti_type;
u_int8_t nd_opt_rti_len;
@@ -555,39 +570,39 @@ do { \
* of the internet control message protocol version 6.
*/
struct icmp6errstat {
- u_quad_t icp6errs_dst_unreach_noroute;
- u_quad_t icp6errs_dst_unreach_admin;
- u_quad_t icp6errs_dst_unreach_beyondscope;
- u_quad_t icp6errs_dst_unreach_addr;
- u_quad_t icp6errs_dst_unreach_noport;
- u_quad_t icp6errs_packet_too_big;
- u_quad_t icp6errs_time_exceed_transit;
- u_quad_t icp6errs_time_exceed_reassembly;
- u_quad_t icp6errs_paramprob_header;
- u_quad_t icp6errs_paramprob_nextheader;
- u_quad_t icp6errs_paramprob_option;
- u_quad_t icp6errs_redirect; /* we regard redirect as an error here */
- u_quad_t icp6errs_unknown;
+ uint64_t icp6errs_dst_unreach_noroute;
+ uint64_t icp6errs_dst_unreach_admin;
+ uint64_t icp6errs_dst_unreach_beyondscope;
+ uint64_t icp6errs_dst_unreach_addr;
+ uint64_t icp6errs_dst_unreach_noport;
+ uint64_t icp6errs_packet_too_big;
+ uint64_t icp6errs_time_exceed_transit;
+ uint64_t icp6errs_time_exceed_reassembly;
+ uint64_t icp6errs_paramprob_header;
+ uint64_t icp6errs_paramprob_nextheader;
+ uint64_t icp6errs_paramprob_option;
+ uint64_t icp6errs_redirect; /* we regard redirect as an error here */
+ uint64_t icp6errs_unknown;
};
struct icmp6stat {
/* statistics related to icmp6 packets generated */
- u_quad_t icp6s_error; /* # of calls to icmp6_error */
- u_quad_t icp6s_canterror; /* no error 'cuz old was icmp */
- u_quad_t icp6s_toofreq; /* no error 'cuz rate limitation */
- u_quad_t icp6s_outhist[256];
+ uint64_t icp6s_error; /* # of calls to icmp6_error */
+ uint64_t icp6s_canterror; /* no error 'cuz old was icmp */
+ uint64_t icp6s_toofreq; /* no error 'cuz rate limitation */
+ uint64_t icp6s_outhist[256];
/* statistics related to input message processed */
- u_quad_t icp6s_badcode; /* icmp6_code out of range */
- u_quad_t icp6s_tooshort; /* packet < sizeof(struct icmp6_hdr) */
- u_quad_t icp6s_checksum; /* bad checksum */
- u_quad_t icp6s_badlen; /* calculated bound mismatch */
+ uint64_t icp6s_badcode; /* icmp6_code out of range */
+ uint64_t icp6s_tooshort; /* packet < sizeof(struct icmp6_hdr) */
+ uint64_t icp6s_checksum; /* bad checksum */
+ uint64_t icp6s_badlen; /* calculated bound mismatch */
/*
* number of responses: this member is inherited from netinet code, but
* for netinet6 code, it is already available in icp6s_outhist[].
*/
- u_quad_t icp6s_reflect;
- u_quad_t icp6s_inhist[256];
- u_quad_t icp6s_nd_toomanyopt; /* too many ND options */
+ uint64_t icp6s_reflect;
+ uint64_t icp6s_inhist[256];
+ uint64_t icp6s_nd_toomanyopt; /* too many ND options */
struct icmp6errstat icp6s_outerrhist;
#define icp6s_odst_unreach_noroute \
icp6s_outerrhist.icp6errs_dst_unreach_noroute
@@ -607,29 +622,33 @@ struct icmp6stat {
#define icp6s_oparamprob_option icp6s_outerrhist.icp6errs_paramprob_option
#define icp6s_oredirect icp6s_outerrhist.icp6errs_redirect
#define icp6s_ounknown icp6s_outerrhist.icp6errs_unknown
- u_quad_t icp6s_pmtuchg; /* path MTU changes */
- u_quad_t icp6s_nd_badopt; /* bad ND options */
- u_quad_t icp6s_badns; /* bad neighbor solicitation */
- u_quad_t icp6s_badna; /* bad neighbor advertisement */
- u_quad_t icp6s_badrs; /* bad router advertisement */
- u_quad_t icp6s_badra; /* bad router advertisement */
- u_quad_t icp6s_badredirect; /* bad redirect message */
+ uint64_t icp6s_pmtuchg; /* path MTU changes */
+ uint64_t icp6s_nd_badopt; /* bad ND options */
+ uint64_t icp6s_badns; /* bad neighbor solicitation */
+ uint64_t icp6s_badna; /* bad neighbor advertisement */
+ uint64_t icp6s_badrs; /* bad router advertisement */
+ uint64_t icp6s_badra; /* bad router advertisement */
+ uint64_t icp6s_badredirect; /* bad redirect message */
};
#ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct icmp6stat, icmp6stat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define ICMP6STAT_ADD(name, val) V_icmp6stat.name += (val)
+#define ICMP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct icmp6stat, icmp6stat, name, (val))
#define ICMP6STAT_INC(name) ICMP6STAT_ADD(name, 1)
/*
* Kernel module consumers must use this accessor macro.
*/
void kmod_icmp6stat_inc(int statnum);
-#define KMOD_ICMP6STAT_INC(name) \
- kmod_icmp6stat_inc(offsetof(struct icmp6stat, name) / sizeof(u_quad_t))
+#define KMOD_ICMP6STAT_INC(name) \
+ kmod_icmp6stat_inc(offsetof(struct icmp6stat, name) / sizeof(uint64_t))
#endif
/*
@@ -688,7 +707,9 @@ void icmp6_mtudisc_update(struct ip6ctlparam *, int);
#define icmp6_ifstat_inc(ifp, tag) \
do { \
if (ifp) \
- ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->icmp6_ifstat->tag++; \
+ counter_u64_add(((struct in6_ifextra *) \
+ ((ifp)->if_afdata[AF_INET6]))->icmp6_ifstat[\
+ offsetof(struct icmp6_ifstat, tag) / sizeof(uint64_t)], 1);\
} while (/*CONSTCOND*/ 0)
#define icmp6_ifoutstat_inc(ifp, type, code) \
diff --git a/freebsd/sys/netinet/icmp_var.h b/freebsd/sys/netinet/icmp_var.h
index d939cc2e..d3e72bc2 100644
--- a/freebsd/sys/netinet/icmp_var.h
+++ b/freebsd/sys/netinet/icmp_var.h
@@ -58,11 +58,15 @@ struct icmpstat {
};
#ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct icmpstat, icmpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define ICMPSTAT_ADD(name, val) V_icmpstat.name += (val)
+#define ICMPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct icmpstat, icmpstat, name, (val))
#define ICMPSTAT_INC(name) ICMPSTAT_ADD(name, 1)
/*
@@ -70,30 +74,19 @@ struct icmpstat {
*/
void kmod_icmpstat_inc(int statnum);
#define KMOD_ICMPSTAT_INC(name) \
- kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(u_long))
+ kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(uint64_t))
#endif
/*
- * Names for ICMP sysctl objects
+ * Identifiers for ICMP sysctl nodes
*/
#define ICMPCTL_MASKREPL 1 /* allow replies to netmask requests */
#define ICMPCTL_STATS 2 /* statistics (read-only) */
#define ICMPCTL_ICMPLIM 3
-#define ICMPCTL_MAXID 4
-
-#define ICMPCTL_NAMES { \
- { 0, 0 }, \
- { "maskrepl", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "icmplim", CTLTYPE_INT }, \
-}
#ifdef _KERNEL
SYSCTL_DECL(_net_inet_icmp);
-VNET_DECLARE(struct icmpstat, icmpstat); /* icmp statistics. */
-#define V_icmpstat VNET(icmpstat)
-
extern int badport_bandlim(int);
#define BANDLIM_UNLIMITED -1
#define BANDLIM_ICMP_UNREACH 0
diff --git a/freebsd/sys/netinet/if_atm.c b/freebsd/sys/netinet/if_atm.c
index e26d0c7c..cb0317fb 100644
--- a/freebsd/sys/netinet/if_atm.c
+++ b/freebsd/sys/netinet/if_atm.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/if_atm.h>
@@ -319,7 +320,7 @@ failed:
* but this is enough for PVCs entered via the "route" command.
*/
int
-atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
+atmresolve(struct rtentry *rt, struct mbuf *m, const struct sockaddr *dst,
struct atm_pseudohdr *desten)
{
struct sockaddr_dl *sdl;
@@ -331,7 +332,8 @@ atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
}
if (rt == NULL) {
- rt = RTALLOC1(dst, 0); /* link level on table 0 XXX MRT */
+ /* link level on table 0 XXX MRT */
+ rt = RTALLOC1(__DECONST(struct sockaddr *, dst), 0);
if (rt == NULL)
goto bad; /* failed */
RT_REMREF(rt); /* don't keep LL references */
diff --git a/freebsd/sys/netinet/if_atm.h b/freebsd/sys/netinet/if_atm.h
index bd8b5143..04ad218d 100644
--- a/freebsd/sys/netinet/if_atm.h
+++ b/freebsd/sys/netinet/if_atm.h
@@ -43,5 +43,5 @@ struct rtentry;
struct sockaddr;
void atm_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-int atmresolve(struct rtentry *, struct mbuf *, struct sockaddr *,
+int atmresolve(struct rtentry *, struct mbuf *, const struct sockaddr *,
struct atm_pseudohdr *);
diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c
index eec06dd8..0a8b101e 100644
--- a/freebsd/sys/netinet/if_ether.c
+++ b/freebsd/sys/netinet/if_ether.c
@@ -44,39 +44,50 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/proc.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/netisr.h>
-#include <net/if_llc.h>
#include <net/ethernet.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
#include <netinet/in_var.h>
#include <net/if_llatbl.h>
#include <netinet/if_ether.h>
-#if defined(INET)
+#ifdef INET
#include <netinet/ip_carp.h>
#endif
-#include <net/if_arc.h>
-#include <net/iso88025.h>
-
#include <security/mac/mac_framework.h>
-#define SIN(s) ((struct sockaddr_in *)s)
-#define SDL(s) ((struct sockaddr_dl *)s)
+#define SIN(s) ((const struct sockaddr_in *)(s))
+
+static struct timeval arp_lastlog;
+static int arp_curpps;
+static int arp_maxpps = 1;
+
+/* Simple ARP state machine */
+enum arp_llinfo_state {
+ ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */
+ ARP_LLINFO_REACHABLE, /* LLE is valid */
+ ARP_LLINFO_VERIFY, /* LLE is valid, need refresh */
+ ARP_LLINFO_DELETED, /* LLE is deleted */
+};
SYSCTL_DECL(_net_link_ether);
static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
@@ -86,53 +97,67 @@ static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20
* minutes */
static VNET_DEFINE(int, arp_maxtries) = 5;
-VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for
- * local traffic */
static VNET_DEFINE(int, arp_proxyall) = 0;
static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for
* 20 seconds */
-VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
+static VNET_DEFINE(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/
+VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
+VNET_PCPUSTAT_SYSINIT(arpstat);
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(arpstat);
+#endif /* VIMAGE */
static VNET_DEFINE(int, arp_maxhold) = 1;
#define V_arpt_keep VNET(arpt_keep)
#define V_arpt_down VNET(arpt_down)
+#define V_arpt_rexmit VNET(arpt_rexmit)
#define V_arp_maxtries VNET(arp_maxtries)
#define V_arp_proxyall VNET(arp_proxyall)
-#define V_arpstat VNET(arpstat)
#define V_arp_maxhold VNET(arp_maxhold)
-SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(arpt_keep), 0,
"ARP entry lifetime in seconds");
-SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(arp_maxtries), 0,
"ARP resolution attempts before returning error");
-SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
- &VNET_NAME(useloopback), 0,
- "Use the loopback interface for local traffic");
-SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(arp_proxyall), 0,
"Enable proxy ARP for all suitable requests");
-SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(arpt_down), 0,
"Incomplete ARP entry lifetime in seconds");
-SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW,
- &VNET_NAME(arpstat), arpstat,
- "ARP statistics (struct arpstat, net/if_arp.h)");
-SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW,
+SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat,
+ arpstat, "ARP statistics (struct arpstat, net/if_arp.h)");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(arp_maxhold), 0,
"Number of packets to hold per ARP entry");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
+ CTLFLAG_RW, &arp_maxpps, 0,
+ "Maximum number of remotely triggered ARP messages that can be "
+ "logged per second");
+
+#define ARP_LOG(pri, ...) do { \
+ if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \
+ log((pri), "arp: " __VA_ARGS__); \
+} while (0)
+
-static void arp_init(void);
-void arprequest(struct ifnet *,
- struct in_addr *, struct in_addr *, u_char *);
static void arpintr(struct mbuf *);
static void arptimer(void *);
#ifdef INET
static void in_arpinput(struct mbuf *);
#endif
+static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr,
+ struct ifnet *ifp, int bridged, struct llentry *la);
+static void arp_mark_lle_reachable(struct llentry *la);
+static void arp_iflladdr(void *arg __unused, struct ifnet *ifp);
+
+static eventhandler_tag iflladdr_tag;
+
static const struct netisr_handler arp_nh = {
.nh_name = "arp",
.nh_handler = arpintr,
@@ -140,29 +165,6 @@ static const struct netisr_handler arp_nh = {
.nh_policy = NETISR_POLICY_SOURCE,
};
-#ifdef AF_INET
-void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
-
-/*
- * called by in_ifscrub to remove entry from the table when
- * the interface goes away
- */
-void
-arp_ifscrub(struct ifnet *ifp, uint32_t addr)
-{
- struct sockaddr_in addr4;
-
- bzero((void *)&addr4, sizeof(addr4));
- addr4.sin_len = sizeof(addr4);
- addr4.sin_family = AF_INET;
- addr4.sin_addr.s_addr = addr;
- IF_AFDATA_WLOCK(ifp);
- lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR),
- (struct sockaddr *)&addr4);
- IF_AFDATA_WUNLOCK(ifp);
-}
-#endif
-
/*
* Timeout routine. Age arp_tab entries periodically.
*/
@@ -171,15 +173,83 @@ arptimer(void *arg)
{
struct llentry *lle = (struct llentry *)arg;
struct ifnet *ifp;
+ int r_skip_req;
if (lle->la_flags & LLE_STATIC) {
- LLE_WUNLOCK(lle);
return;
}
-
+ LLE_WLOCK(lle);
+ if (callout_pending(&lle->lle_timer)) {
+ /*
+ * Here we are a bit odd here in the treatment of
+ * active/pending. If the pending bit is set, it got
+ * rescheduled before I ran. The active
+ * bit we ignore, since if it was stopped
+ * in ll_tablefree() and was currently running
+ * it would have return 0 so the code would
+ * not have deleted it since the callout could
+ * not be stopped so we want to go through
+ * with the delete here now. If the callout
+ * was restarted, the pending bit will be back on and
+ * we just want to bail since the callout_reset would
+ * return 1 and our reference would have been removed
+ * by arpresolve() below.
+ */
+ LLE_WUNLOCK(lle);
+ return;
+ }
ifp = lle->lle_tbl->llt_ifp;
CURVNET_SET(ifp->if_vnet);
+ switch (lle->ln_state) {
+ case ARP_LLINFO_REACHABLE:
+
+ /*
+ * Expiration time is approaching.
+ * Let's try to refresh entry if it is still
+ * in use.
+ *
+ * Set r_skip_req to get feedback from
+ * fast path. Change state and re-schedule
+ * ourselves.
+ */
+ LLE_REQ_LOCK(lle);
+ lle->r_skip_req = 1;
+ LLE_REQ_UNLOCK(lle);
+ lle->ln_state = ARP_LLINFO_VERIFY;
+ callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
+ LLE_WUNLOCK(lle);
+ CURVNET_RESTORE();
+ return;
+ case ARP_LLINFO_VERIFY:
+ LLE_REQ_LOCK(lle);
+ r_skip_req = lle->r_skip_req;
+ LLE_REQ_UNLOCK(lle);
+
+ if (r_skip_req == 0 && lle->la_preempt > 0) {
+ /* Entry was used, issue refresh request */
+ struct in_addr dst;
+ dst = lle->r_l3addr.addr4;
+ lle->la_preempt--;
+ callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
+ LLE_WUNLOCK(lle);
+ arprequest(ifp, NULL, &dst, NULL);
+ CURVNET_RESTORE();
+ return;
+ }
+ /* Nothing happened. Reschedule if not too late */
+ if (lle->la_expire > time_uptime) {
+ callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
+ LLE_WUNLOCK(lle);
+ CURVNET_RESTORE();
+ return;
+ }
+ break;
+ case ARP_LLINFO_INCOMPLETE:
+ case ARP_LLINFO_DELETED:
+ break;
+ }
+
if ((lle->la_flags & LLE_DELETED) == 0) {
int evt;
@@ -190,7 +260,7 @@ arptimer(void *arg)
EVENTHANDLER_INVOKE(lle_event, lle, evt);
}
- callout_stop(&lle->la_timer);
+ callout_stop(&lle->lle_timer);
/* XXX: LOR avoidance. We still have ref on lle. */
LLE_WUNLOCK(lle);
@@ -199,64 +269,109 @@ arptimer(void *arg)
/* Guard against race with other llentry_free(). */
if (lle->la_flags & LLE_LINKED) {
- size_t pkts_dropped;
-
LLE_REMREF(lle);
- pkts_dropped = llentry_free(lle);
- ARPSTAT_ADD(dropped, pkts_dropped);
- } else
- LLE_FREE_LOCKED(lle);
-
+ lltable_unlink_entry(lle->lle_tbl, lle);
+ }
IF_AFDATA_UNLOCK(ifp);
+ size_t pkts_dropped = llentry_free(lle);
+
+ ARPSTAT_ADD(dropped, pkts_dropped);
ARPSTAT_INC(timeouts);
CURVNET_RESTORE();
}
/*
+ * Stores link-layer header for @ifp in format suitable for if_output()
+ * into buffer @buf. Resulting header length is stored in @bufsize.
+ *
+ * Returns 0 on success.
+ */
+static int
+arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
+ size_t *bufsize)
+{
+ struct if_encap_req ereq;
+ int error;
+
+ bzero(buf, *bufsize);
+ bzero(&ereq, sizeof(ereq));
+ ereq.buf = buf;
+ ereq.bufsize = *bufsize;
+ ereq.rtype = IFENCAP_LL;
+ ereq.family = AF_ARP;
+ ereq.lladdr = ar_tha(ah);
+ ereq.hdata = (u_char *)ah;
+ if (bcast)
+ ereq.flags = IFENCAP_FLAG_BROADCAST;
+ error = ifp->if_requestencap(ifp, &ereq);
+ if (error == 0)
+ *bufsize = ereq.bufsize;
+
+ return (error);
+}
+
+
+/*
* Broadcast an ARP request. Caller specifies:
* - arp header source ip address
* - arp header target ip address
* - arp header source ethernet address
*/
void
-arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
- u_char *enaddr)
+arprequest(struct ifnet *ifp, const struct in_addr *sip,
+ const struct in_addr *tip, u_char *enaddr)
{
struct mbuf *m;
struct arphdr *ah;
struct sockaddr sa;
+ u_char *carpaddr = NULL;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ struct route ro;
+ int error;
if (sip == NULL) {
- /* XXX don't believe this can happen (or explain why) */
/*
* The caller did not supply a source address, try to find
* a compatible one among those assigned to this interface.
*/
struct ifaddr *ifa;
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (!ifa->ifa_addr ||
- ifa->ifa_addr->sa_family != AF_INET)
+ if (ifa->ifa_addr->sa_family != AF_INET)
continue;
- sip = &SIN(ifa->ifa_addr)->sin_addr;
+
+ if (ifa->ifa_carp) {
+ if ((*carp_iamatch_p)(ifa, &carpaddr) == 0)
+ continue;
+ sip = &IA_SIN(ifa)->sin_addr;
+ } else {
+ carpaddr = NULL;
+ sip = &IA_SIN(ifa)->sin_addr;
+ }
+
if (0 == ((sip->s_addr ^ tip->s_addr) &
- SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
+ IA_MASKSIN(ifa)->sin_addr.s_addr))
break; /* found it. */
}
+ IF_ADDR_RUNLOCK(ifp);
if (sip == NULL) {
printf("%s: cannot find matching address\n", __func__);
return;
}
}
+ if (enaddr == NULL)
+ enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp);
- if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+ if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
return;
- m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
- 2*ifp->if_data.ifi_addrlen;
+ m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
+ 2 * ifp->if_addrlen;
m->m_pkthdr.len = m->m_len;
- MH_ALIGN(m, m->m_len);
+ M_ALIGN(m, m->m_len);
ah = mtod(m, struct arphdr *);
bzero((caddr_t)ah, m->m_len);
#ifdef MAC
@@ -266,109 +381,121 @@ arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
ah->ar_hln = ifp->if_addrlen; /* hardware address length */
ah->ar_pln = sizeof(struct in_addr); /* protocol address length */
ah->ar_op = htons(ARPOP_REQUEST);
- bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
- bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
- bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
+ bcopy(enaddr, ar_sha(ah), ah->ar_hln);
+ bcopy(sip, ar_spa(ah), ah->ar_pln);
+ bcopy(tip, ar_tpa(ah), ah->ar_pln);
sa.sa_family = AF_ARP;
sa.sa_len = 2;
+
+ /* Calculate link header for sending frame */
+ bzero(&ro, sizeof(ro));
+ linkhdrsize = sizeof(linkhdr);
+ error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize);
+ if (error != 0 && error != EAFNOSUPPORT) {
+ ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
+ if_name(ifp), error);
+ return;
+ }
+
+ ro.ro_prepend = linkhdr;
+ ro.ro_plen = linkhdrsize;
+ ro.ro_flags = 0;
+
m->m_flags |= M_BCAST;
- (*ifp->if_output)(ifp, m, &sa, NULL);
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
+ (*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txrequests);
}
+
/*
- * Resolve an IP address into an ethernet address.
- * On input:
- * ifp is the interface we use
- * rt0 is the route to the final destination (possibly useless)
- * m is the mbuf. May be NULL if we don't have a packet.
- * dst is the next hop,
- * desten is where we want the address.
+ * Resolve an IP address into an ethernet address - heavy version.
+ * Used internally by arpresolve().
+ * We have already checked than we can't use existing lle without
+ * modification so we have to acquire LLE_EXCLUSIVE lle lock.
*
- * On success, desten is filled in and the function returns 0;
+ * On success, desten and flags are filled in and the function returns 0;
* If the packet must be held pending resolution, we return EWOULDBLOCK
* On other errors, we return the corresponding error code.
* Note that m_freem() handles NULL.
*/
-int
-arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
- struct sockaddr *dst, u_char *desten, struct llentry **lle)
+static int
+arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
+ const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
+ struct llentry **plle)
{
- struct llentry *la = 0;
- u_int flags = 0;
+ struct llentry *la = NULL, *la_tmp;
struct mbuf *curr = NULL;
struct mbuf *next = NULL;
int error, renew;
+ char *lladdr;
+ int ll_len;
- *lle = NULL;
- if (m != NULL) {
- if (m->m_flags & M_BCAST) {
- /* broadcast */
- (void)memcpy(desten,
- ifp->if_broadcastaddr, ifp->if_addrlen);
- return (0);
- }
- if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {
- /* multicast */
- ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
- return (0);
- }
+ if (pflags != NULL)
+ *pflags = 0;
+ if (plle != NULL)
+ *plle = NULL;
+
+ if ((flags & LLE_CREATE) == 0) {
+ IF_AFDATA_RLOCK(ifp);
+ la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
+ IF_AFDATA_RUNLOCK(ifp);
}
-retry:
- IF_AFDATA_RLOCK(ifp);
- la = lla_lookup(LLTABLE(ifp), flags, dst);
- IF_AFDATA_RUNLOCK(ifp);
- if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0)
- && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) {
- flags |= (LLE_CREATE | LLE_EXCLUSIVE);
+ if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
+ la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
+ if (la == NULL) {
+ log(LOG_DEBUG,
+ "arpresolve: can't allocate llinfo for %s on %s\n",
+ inet_ntoa(SIN(dst)->sin_addr), if_name(ifp));
+ m_freem(m);
+ return (EINVAL);
+ }
+
IF_AFDATA_WLOCK(ifp);
- la = lla_lookup(LLTABLE(ifp), flags, dst);
+ LLE_WLOCK(la);
+ la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
+ /* Prefer ANY existing lle over newly-created one */
+ if (la_tmp == NULL)
+ lltable_link_entry(LLTABLE(ifp), la);
IF_AFDATA_WUNLOCK(ifp);
+ if (la_tmp != NULL) {
+ lltable_free_entry(LLTABLE(ifp), la);
+ la = la_tmp;
+ }
}
if (la == NULL) {
- if (flags & LLE_CREATE)
- log(LOG_DEBUG,
- "arpresolve: can't allocate llinfo for %s on %s\n",
- inet_ntoa(SIN(dst)->sin_addr), ifp->if_xname);
m_freem(m);
return (EINVAL);
}
if ((la->la_flags & LLE_VALID) &&
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
- bcopy(&la->ll_addr, desten, ifp->if_addrlen);
- /*
- * If entry has an expiry time and it is approaching,
- * see if we need to send an ARP request within this
- * arpt_down interval.
- */
- if (!(la->la_flags & LLE_STATIC) &&
- time_uptime + la->la_preempt > la->la_expire) {
- arprequest(ifp, NULL,
- &SIN(dst)->sin_addr, IF_LLADDR(ifp));
-
- la->la_preempt--;
+ if (flags & LLE_ADDRONLY) {
+ lladdr = la->ll_addr;
+ ll_len = ifp->if_addrlen;
+ } else {
+ lladdr = la->r_linkdata;
+ ll_len = la->r_hdrlen;
}
+ bcopy(lladdr, desten, ll_len);
- *lle = la;
- error = 0;
- goto done;
- }
-
- if (la->la_flags & LLE_STATIC) { /* should not happen! */
- log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n",
- inet_ntoa(SIN(dst)->sin_addr));
- m_freem(m);
- error = EINVAL;
- goto done;
+ /* Check if we have feedback request from arptimer() */
+ if (la->r_skip_req != 0) {
+ LLE_REQ_LOCK(la);
+ la->r_skip_req = 0; /* Notify that entry was used */
+ LLE_REQ_UNLOCK(la);
+ }
+ if (pflags != NULL)
+ *pflags = la->la_flags & (LLE_VALID|LLE_IFADDR);
+ if (plle) {
+ LLE_ADDREF(la);
+ *plle = la;
+ }
+ LLE_WUNLOCK(la);
+ return (0);
}
renew = (la->la_asked == 0 || la->la_expire != time_uptime);
- if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(la);
- goto retry;
- }
/*
* There is an arptab entry, but no ethernet address
* response yet. Add the mbuf to the list, dropping
@@ -393,11 +520,6 @@ retry:
} else
la->la_hold = m;
la->la_numheld++;
- if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
- flags &= ~LLE_EXCLUSIVE;
- LLE_DOWNGRADE(la);
- }
-
}
/*
* Return EWOULDBLOCK if we have tried less than arp_maxtries. It
@@ -408,32 +530,113 @@ retry:
if (la->la_asked < V_arp_maxtries)
error = EWOULDBLOCK; /* First request. */
else
- error = rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) ?
- EHOSTUNREACH : EHOSTDOWN;
+ error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN;
if (renew) {
int canceled;
LLE_ADDREF(la);
la->la_expire = time_uptime;
- canceled = callout_reset(&la->la_timer, hz * V_arpt_down,
+ canceled = callout_reset(&la->lle_timer, hz * V_arpt_down,
arptimer, la);
if (canceled)
LLE_REMREF(la);
la->la_asked++;
LLE_WUNLOCK(la);
- arprequest(ifp, NULL, &SIN(dst)->sin_addr,
- IF_LLADDR(ifp));
+ arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
return (error);
}
-done:
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(la);
- else
- LLE_RUNLOCK(la);
+
+ LLE_WUNLOCK(la);
+ return (error);
+}
+
+/*
+ * Resolve an IP address into an ethernet address.
+ */
+int
+arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags, struct llentry **plle)
+{
+ int error;
+
+ flags |= LLE_ADDRONLY;
+ error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags, plle);
return (error);
}
+
+/*
+ * Lookups link header based on an IP address.
+ * On input:
+ * ifp is the interface we use
+ * is_gw != 0 if @dst represents gateway to some destination
+ * m is the mbuf. May be NULL if we don't have a packet.
+ * dst is the next hop,
+ * desten is the storage to put LL header.
+ * flags returns subset of lle flags: LLE_VALID | LLE_IFADDR
+ *
+ * On success, full/partial link header and flags are filled in and
+ * the function returns 0.
+ * If the packet must be held pending resolution, we return EWOULDBLOCK
+ * On other errors, we return the corresponding error code.
+ * Note that m_freem() handles NULL.
+ */
+int
+arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
+ const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
+ struct llentry **plle)
+{
+ struct llentry *la = NULL;
+
+ if (pflags != NULL)
+ *pflags = 0;
+ if (plle != NULL)
+ *plle = NULL;
+
+ if (m != NULL) {
+ if (m->m_flags & M_BCAST) {
+ /* broadcast */
+ (void)memcpy(desten,
+ ifp->if_broadcastaddr, ifp->if_addrlen);
+ return (0);
+ }
+ if (m->m_flags & M_MCAST) {
+ /* multicast */
+ ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
+ return (0);
+ }
+ }
+
+ IF_AFDATA_RLOCK(ifp);
+ la = lla_lookup(LLTABLE(ifp), plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, dst);
+ if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
+ /* Entry found, let's copy lle info */
+ bcopy(la->r_linkdata, desten, la->r_hdrlen);
+ if (pflags != NULL)
+ *pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR);
+ /* Check if we have feedback request from arptimer() */
+ if (la->r_skip_req != 0) {
+ LLE_REQ_LOCK(la);
+ la->r_skip_req = 0; /* Notify that entry was used */
+ LLE_REQ_UNLOCK(la);
+ }
+ if (plle) {
+ LLE_ADDREF(la);
+ *plle = la;
+ LLE_WUNLOCK(la);
+ }
+ IF_AFDATA_RUNLOCK(ifp);
+ return (0);
+ }
+ if (plle && la)
+ LLE_WUNLOCK(la);
+ IF_AFDATA_RUNLOCK(ifp);
+
+ return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst,
+ desten, pflags, plle));
+}
+
/*
* Common length and type checks are done here,
* then the protocol-specific routine is called.
@@ -442,34 +645,76 @@ static void
arpintr(struct mbuf *m)
{
struct arphdr *ar;
+ struct ifnet *ifp;
+ char *layer;
+ int hlen;
+
+ ifp = m->m_pkthdr.rcvif;
if (m->m_len < sizeof(struct arphdr) &&
((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
- log(LOG_NOTICE, "arp: runt packet -- m_pullup failed\n");
+ ARP_LOG(LOG_NOTICE, "packet with short header received on %s\n",
+ if_name(ifp));
return;
}
ar = mtod(m, struct arphdr *);
- if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
- ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
- ntohs(ar->ar_hrd) != ARPHRD_ARCNET &&
- ntohs(ar->ar_hrd) != ARPHRD_IEEE1394 &&
- ntohs(ar->ar_hrd) != ARPHRD_INFINIBAND) {
- log(LOG_NOTICE, "arp: unknown hardware address format (0x%2D)"
- " (from %*D to %*D)\n", (unsigned char *)&ar->ar_hrd, "",
- ETHER_ADDR_LEN, (u_char *)ar_sha(ar), ":",
- ETHER_ADDR_LEN, (u_char *)ar_tha(ar), ":");
+ /* Check if length is sufficient */
+ if (m->m_len < arphdr_len(ar)) {
+ m = m_pullup(m, arphdr_len(ar));
+ if (m == NULL) {
+ ARP_LOG(LOG_NOTICE, "short packet received on %s\n",
+ if_name(ifp));
+ return;
+ }
+ ar = mtod(m, struct arphdr *);
+ }
+
+ hlen = 0;
+ layer = "";
+ switch (ntohs(ar->ar_hrd)) {
+ case ARPHRD_ETHER:
+ hlen = ETHER_ADDR_LEN; /* RFC 826 */
+ layer = "ethernet";
+ break;
+ case ARPHRD_IEEE802:
+ hlen = 6; /* RFC 1390, FDDI_ADDR_LEN */
+ layer = "fddi";
+ break;
+ case ARPHRD_ARCNET:
+ hlen = 1; /* RFC 1201, ARC_ADDR_LEN */
+ layer = "arcnet";
+ break;
+ case ARPHRD_INFINIBAND:
+ hlen = 20; /* RFC 4391, INFINIBAND_ALEN */
+ layer = "infiniband";
+ break;
+ case ARPHRD_IEEE1394:
+ hlen = 0; /* SHALL be 16 */ /* RFC 2734 */
+ layer = "firewire";
+
+ /*
+ * Restrict too long hardware addresses.
+ * Currently we are capable of handling 20-byte
+ * addresses ( sizeof(lle->ll_addr) )
+ */
+ if (ar->ar_hln >= 20)
+ hlen = 16;
+ break;
+ default:
+ ARP_LOG(LOG_NOTICE,
+ "packet with unknown hardware format 0x%02d received on "
+ "%s\n", ntohs(ar->ar_hrd), if_name(ifp));
m_freem(m);
return;
}
- if (m->m_len < arphdr_len(ar)) {
- if ((m = m_pullup(m, arphdr_len(ar))) == NULL) {
- log(LOG_NOTICE, "arp: runt packet\n");
- m_freem(m);
- return;
- }
- ar = mtod(m, struct arphdr *);
+ if (hlen != 0 && hlen != ar->ar_hln) {
+ ARP_LOG(LOG_NOTICE,
+ "packet with invalid %s address length %d received on %s\n",
+ layer, ar->ar_hln, if_name(ifp));
+ m_freem(m);
+ return;
}
ARPSTAT_INC(received);
@@ -518,20 +763,27 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW,
static void
in_arpinput(struct mbuf *m)
{
+ struct rm_priotracker in_ifa_tracker;
struct arphdr *ah;
struct ifnet *ifp = m->m_pkthdr.rcvif;
- struct llentry *la = NULL;
- struct rtentry *rt;
+ struct llentry *la = NULL, *la_tmp;
struct ifaddr *ifa;
struct in_ifaddr *ia;
struct sockaddr sa;
struct in_addr isaddr, itaddr, myaddr;
u_int8_t *enaddr = NULL;
- int op, flags;
- int req_len;
+ int op;
int bridged = 0, is_bridge = 0;
- int carp_match = 0;
+ int carped;
struct sockaddr_in sin;
+ struct sockaddr *dst;
+ struct nhop4_basic nh4;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ struct route ro;
+ size_t linkhdrsize;
+ int lladdr_off;
+ int error;
+
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
@@ -541,25 +793,24 @@ in_arpinput(struct mbuf *m)
if (ifp->if_type == IFT_BRIDGE)
is_bridge = 1;
- req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
- if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
- log(LOG_NOTICE, "in_arp: runt packet -- m_pullup failed\n");
- return;
- }
-
+ /*
+ * We already have checked that mbuf contains enough contiguous data
+ * to hold entire arp message according to the arp header.
+ */
ah = mtod(m, struct arphdr *);
+
/*
* ARP is only for IPv4 so we can reject packets with
* a protocol length not equal to an IPv4 address.
*/
if (ah->ar_pln != sizeof(struct in_addr)) {
- log(LOG_NOTICE, "in_arp: requested protocol length != %zu\n",
+ ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n",
sizeof(struct in_addr));
goto drop;
}
if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) {
- log(LOG_NOTICE, "arp: %*D is multicast\n",
+ ARP_LOG(LOG_NOTICE, "%*D is multicast\n",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":");
goto drop;
}
@@ -575,26 +826,16 @@ in_arpinput(struct mbuf *m)
* For a bridge, we want to check the address irrespective
* of the receive interface. (This will change slightly
* when we have clusters of interfaces).
- * If the interface does not match, but the recieving interface
- * is part of carp, we call carp_iamatch to see if this is a
- * request for the virtual host ip.
- * XXX: This is really ugly!
*/
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
ia->ia_ifp == ifp) &&
- itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
+ itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
+ (ia->ia_ifa.ifa_carp == NULL ||
+ (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
- goto match;
- }
- if (ifp->if_carp != NULL &&
- (*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) &&
- itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
- carp_match = 1;
- ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
goto match;
}
}
@@ -603,7 +844,7 @@ in_arpinput(struct mbuf *m)
ia->ia_ifp == ifp) &&
isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
goto match;
}
@@ -622,13 +863,13 @@ in_arpinput(struct mbuf *m)
if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
ifa_ref(&ia->ia_ifa);
ifp = ia->ia_ifp;
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
goto match;
}
}
}
#undef BDG_MEMBER_MATCHES_ARP
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
/*
* No match, use the first inet address on the receive interface
@@ -636,7 +877,9 @@ in_arpinput(struct mbuf *m)
*/
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
- if (ifa->ifa_addr->sa_family == AF_INET) {
+ if (ifa->ifa_addr->sa_family == AF_INET &&
+ (ifa->ifa_carp == NULL ||
+ (*carp_iamatch_p)(ifa, &enaddr))) {
ia = ifatoia(ifa);
ifa_ref(ifa);
IF_ADDR_RUNLOCK(ifp);
@@ -647,35 +890,44 @@ in_arpinput(struct mbuf *m)
/*
* If bridging, fall back to using any inet address.
*/
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL) {
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
goto drop;
}
ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
match:
if (!enaddr)
enaddr = (u_int8_t *)IF_LLADDR(ifp);
+ carped = (ia->ia_ifa.ifa_carp != NULL);
myaddr = ia->ia_addr.sin_addr;
ifa_free(&ia->ia_ifa);
if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
goto drop; /* it's from me, ignore it. */
if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
- log(LOG_NOTICE,
- "arp: link address is broadcast for IP address %s!\n",
- inet_ntoa(isaddr));
+ ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address "
+ "%s!\n", inet_ntoa(isaddr));
+ goto drop;
+ }
+
+ if (ifp->if_addrlen != ah->ar_hln) {
+ ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, "
+ "i/f %d (ignored)\n", ifp->if_addrlen,
+ (u_char *) ar_sha(ah), ":", ah->ar_hln,
+ ifp->if_addrlen);
goto drop;
}
+
/*
* Warn if another host is using the same IP address, but only if the
* IP address isn't 0.0.0.0, which is used for DHCP only, in which
* case we suppress the warning to avoid false positive complaints of
* potential misconfiguration.
*/
- if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
- log(LOG_ERR,
- "arp: %*D is using my IP address %s on %s!\n",
+ if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
+ myaddr.s_addr != 0) {
+ ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
inet_ntoa(isaddr), ifp->if_xname);
itaddr = myaddr;
@@ -689,95 +941,73 @@ match:
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr = isaddr;
- flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
- flags |= LLE_EXCLUSIVE;
- IF_AFDATA_LOCK(ifp);
- la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin);
- IF_AFDATA_UNLOCK(ifp);
- if (la != NULL) {
- /* the following is not an error when doing bridging */
- if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) {
- if (log_arp_wrong_iface)
- log(LOG_WARNING, "arp: %s is on %s "
- "but got reply from %*D on %s\n",
- inet_ntoa(isaddr),
- la->lle_tbl->llt_ifp->if_xname,
- ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
- ifp->if_xname);
- LLE_WUNLOCK(la);
+ dst = (struct sockaddr *)&sin;
+ IF_AFDATA_RLOCK(ifp);
+ la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
+ IF_AFDATA_RUNLOCK(ifp);
+ if (la != NULL)
+ arp_check_update_lle(ah, isaddr, ifp, bridged, la);
+ else if (itaddr.s_addr == myaddr.s_addr) {
+ /*
+ * Request/reply to our address, but no lle exists yet.
+ * Calculate full link prepend to use in lle.
+ */
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
+ &linkhdrsize, &lladdr_off) != 0)
goto reply;
- }
- if ((la->la_flags & LLE_VALID) &&
- bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
- if (la->la_flags & LLE_STATIC) {
- LLE_WUNLOCK(la);
- if (log_arp_permanent_modify)
- log(LOG_ERR,
- "arp: %*D attempts to modify "
- "permanent entry for %s on %s\n",
- ifp->if_addrlen,
- (u_char *)ar_sha(ah), ":",
- inet_ntoa(isaddr), ifp->if_xname);
- goto reply;
- }
- if (log_arp_movements) {
- log(LOG_INFO, "arp: %s moved from %*D "
- "to %*D on %s\n",
- inet_ntoa(isaddr),
- ifp->if_addrlen,
- (u_char *)&la->ll_addr, ":",
- ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
- ifp->if_xname);
- }
- }
- if (ifp->if_addrlen != ah->ar_hln) {
- LLE_WUNLOCK(la);
- log(LOG_WARNING, "arp from %*D: addr len: new %d, "
- "i/f %d (ignored)\n", ifp->if_addrlen,
- (u_char *) ar_sha(ah), ":", ah->ar_hln,
- ifp->if_addrlen);
- goto drop;
- }
- (void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
- la->la_flags |= LLE_VALID;
+ /* Allocate new entry */
+ la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
+ if (la == NULL) {
- EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
+ /*
+ * lle creation may fail if source address belongs
+ * to non-directly connected subnet. However, we
+ * will try to answer the request instead of dropping
+ * frame.
+ */
+ goto reply;
+ }
+ lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
+ lladdr_off);
- if (!(la->la_flags & LLE_STATIC)) {
- int canceled;
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(la);
+ la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
- LLE_ADDREF(la);
- la->la_expire = time_uptime + V_arpt_keep;
- canceled = callout_reset(&la->la_timer,
- hz * V_arpt_keep, arptimer, la);
- if (canceled)
- LLE_REMREF(la);
- }
- la->la_asked = 0;
- la->la_preempt = V_arp_maxtries;
/*
- * The packets are all freed within the call to the output
- * routine.
+ * Check if lle still does not exists.
+ * If it does, that means that we either
+ * 1) have configured it explicitly, via
+ * 1a) 'arp -s' static entry or
+ * 1b) interface address static record
+ * or
+ * 2) it was the result of sending first packet to-host
+ * or
+ * 3) it was another arp reply packet we handled in
+ * different thread.
*
- * NB: The lock MUST be released before the call to the
- * output routine.
+ * In all cases except 3) we definitely need to prefer
+ * existing lle. For the sake of simplicity, prefer any
+ * existing lle over newly-create one.
*/
- if (la->la_hold != NULL) {
- struct mbuf *m_hold, *m_hold_next;
+ if (la_tmp == NULL)
+ lltable_link_entry(LLTABLE(ifp), la);
+ IF_AFDATA_WUNLOCK(ifp);
- m_hold = la->la_hold;
- la->la_hold = NULL;
- la->la_numheld = 0;
- memcpy(&sa, L3_ADDR(la), sizeof(sa));
- LLE_WUNLOCK(la);
- for (; m_hold != NULL; m_hold = m_hold_next) {
- m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
- (*ifp->if_output)(ifp, m_hold, &sa, NULL);
- }
- } else
+ if (la_tmp == NULL) {
+ arp_mark_lle_reachable(la);
LLE_WUNLOCK(la);
+ } else {
+ /* Free newly-create entry and handle packet */
+ lltable_free_entry(LLTABLE(ifp), la);
+ la = la_tmp;
+ la_tmp = NULL;
+ arp_check_update_lle(ah, isaddr, ifp, bridged, la);
+ /* arp_check_update_lle() returns @la unlocked */
+ }
+ la = NULL;
}
reply:
if (op != ARPOP_REQUEST)
@@ -798,7 +1028,7 @@ reply:
if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
- (void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
+ (void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln);
LLE_RUNLOCK(lle);
} else {
@@ -808,10 +1038,8 @@ reply:
if (!V_arp_proxyall)
goto drop;
- sin.sin_addr = itaddr;
/* XXX MRT use table 0 for arp reply */
- rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
- if (!rt)
+ if (fib4_lookup_nh_basic(0, itaddr, 0, 0, &nh4) != 0)
goto drop;
/*
@@ -819,11 +1047,8 @@ reply:
* as this one came out of, or we'll get into a fight
* over who claims what Ether address.
*/
- if (!rt->rt_ifp || rt->rt_ifp == ifp) {
- RTFREE_LOCKED(rt);
+ if (nh4.nh_ifp == ifp)
goto drop;
- }
- RTFREE_LOCKED(rt);
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
@@ -834,21 +1059,16 @@ reply:
* avoids ARP chaos if an interface is connected to the
* wrong network.
*/
- sin.sin_addr = isaddr;
/* XXX MRT use table 0 for arp checks */
- rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
- if (!rt)
+ if (fib4_lookup_nh_basic(0, isaddr, 0, 0, &nh4) != 0)
goto drop;
- if (rt->rt_ifp != ifp) {
- log(LOG_INFO, "arp_proxy: ignoring request"
- " from %s via %s, expecting %s\n",
- inet_ntoa(isaddr), ifp->if_xname,
- rt->rt_ifp->if_xname);
- RTFREE_LOCKED(rt);
+ if (nh4.nh_ifp != ifp) {
+ ARP_LOG(LOG_INFO, "proxy: ignoring request"
+ " from %s via %s\n",
+ inet_ntoa(isaddr), ifp->if_xname);
goto drop;
}
- RTFREE_LOCKED(rt);
#ifdef DEBUG_PROXY
printf("arp: proxying for %s\n", inet_ntoa(itaddr));
@@ -878,7 +1098,29 @@ reply:
m->m_pkthdr.rcvif = NULL;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
- (*ifp->if_output)(ifp, m, &sa, NULL);
+
+ /* Calculate link header for sending frame */
+ bzero(&ro, sizeof(ro));
+ linkhdrsize = sizeof(linkhdr);
+ error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize);
+
+ /*
+ * arp_fillheader() may fail due to lack of support inside encap request
+ * routing. This is not necessary an error, AF_ARP can/should be handled
+ * by if_output().
+ */
+ if (error != 0 && error != EAFNOSUPPORT) {
+ ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
+ if_name(ifp), error);
+ return;
+ }
+
+ ro.ro_prepend = linkhdr;
+ ro.ro_plen = linkhdrsize;
+ ro.ro_flags = 0;
+
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
+ (*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txreplies);
return;
@@ -887,45 +1129,249 @@ drop:
}
#endif
+/*
+ * Checks received arp data against existing @la.
+ * Updates lle state/performs notification if necessary.
+ */
+static void
+arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp,
+ int bridged, struct llentry *la)
+{
+ struct sockaddr sa;
+ struct mbuf *m_hold, *m_hold_next;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+
+ LLE_WLOCK_ASSERT(la);
+
+ /* the following is not an error when doing bridging */
+ if (!bridged && la->lle_tbl->llt_ifp != ifp) {
+ if (log_arp_wrong_iface)
+ ARP_LOG(LOG_WARNING, "%s is on %s "
+ "but got reply from %*D on %s\n",
+ inet_ntoa(isaddr),
+ la->lle_tbl->llt_ifp->if_xname,
+ ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
+ ifp->if_xname);
+ LLE_WUNLOCK(la);
+ return;
+ }
+ if ((la->la_flags & LLE_VALID) &&
+ bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) {
+ if (la->la_flags & LLE_STATIC) {
+ LLE_WUNLOCK(la);
+ if (log_arp_permanent_modify)
+ ARP_LOG(LOG_ERR,
+ "%*D attempts to modify "
+ "permanent entry for %s on %s\n",
+ ifp->if_addrlen,
+ (u_char *)ar_sha(ah), ":",
+ inet_ntoa(isaddr), ifp->if_xname);
+ return;
+ }
+ if (log_arp_movements) {
+ ARP_LOG(LOG_INFO, "%s moved from %*D "
+ "to %*D on %s\n",
+ inet_ntoa(isaddr),
+ ifp->if_addrlen,
+ (u_char *)&la->ll_addr, ":",
+ ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
+ ifp->if_xname);
+ }
+ }
+
+ /* Calculate full link prepend to use in lle */
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
+ &linkhdrsize, &lladdr_off) != 0)
+ return;
+
+ /* Check if something has changed */
+ if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 ||
+ (la->la_flags & LLE_VALID) == 0) {
+ /* Try to perform LLE update */
+ if (lltable_try_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
+ lladdr_off) == 0)
+ return;
+
+ /* Clear fast path feedback request if set */
+ la->r_skip_req = 0;
+ }
+
+ arp_mark_lle_reachable(la);
+
+ /*
+ * The packets are all freed within the call to the output
+ * routine.
+ *
+ * NB: The lock MUST be released before the call to the
+ * output routine.
+ */
+ if (la->la_hold != NULL) {
+ m_hold = la->la_hold;
+ la->la_hold = NULL;
+ la->la_numheld = 0;
+ lltable_fill_sa_entry(la, &sa);
+ LLE_WUNLOCK(la);
+ for (; m_hold != NULL; m_hold = m_hold_next) {
+ m_hold_next = m_hold->m_nextpkt;
+ m_hold->m_nextpkt = NULL;
+ /* Avoid confusing lower layers. */
+ m_clrprotoflags(m_hold);
+ (*ifp->if_output)(ifp, m_hold, &sa, NULL);
+ }
+ } else
+ LLE_WUNLOCK(la);
+}
+
+static void
+arp_mark_lle_reachable(struct llentry *la)
+{
+ int canceled, wtime;
+
+ LLE_WLOCK_ASSERT(la);
+
+ la->ln_state = ARP_LLINFO_REACHABLE;
+ EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
+
+ if (!(la->la_flags & LLE_STATIC)) {
+ LLE_ADDREF(la);
+ la->la_expire = time_uptime + V_arpt_keep;
+ wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit;
+ if (wtime < 0)
+ wtime = V_arpt_keep;
+ canceled = callout_reset(&la->lle_timer,
+ hz * wtime, arptimer, la);
+ if (canceled)
+ LLE_REMREF(la);
+ }
+ la->la_asked = 0;
+ la->la_preempt = V_arp_maxtries;
+}
+
+/*
+ * Add pernament link-layer record for given interface address.
+ */
+static __noinline void
+arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst)
+{
+ struct llentry *lle, *lle_tmp;
+
+ /*
+ * Interface address LLE record is considered static
+ * because kernel code relies on LLE_STATIC flag to check
+ * if these entries can be rewriten by arp updates.
+ */
+ lle = lltable_alloc_entry(LLTABLE(ifp), LLE_IFADDR | LLE_STATIC, dst);
+ if (lle == NULL) {
+ log(LOG_INFO, "arp_ifinit: cannot create arp "
+ "entry for interface address\n");
+ return;
+ }
+
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(lle);
+ /* Unlink any entry if exists */
+ lle_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
+ if (lle_tmp != NULL)
+ lltable_unlink_entry(LLTABLE(ifp), lle_tmp);
+
+ lltable_link_entry(LLTABLE(ifp), lle);
+ IF_AFDATA_WUNLOCK(ifp);
+
+ if (lle_tmp != NULL)
+ EVENTHANDLER_INVOKE(lle_event, lle_tmp, LLENTRY_EXPIRED);
+
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
+ LLE_WUNLOCK(lle);
+ if (lle_tmp != NULL)
+ lltable_free_entry(LLTABLE(ifp), lle_tmp);
+}
+
void
arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
{
- struct llentry *lle;
+ const struct sockaddr_in *dst_in;
+ const struct sockaddr *dst;
- if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
- arprequest(ifp, &IA_SIN(ifa)->sin_addr,
- &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
- /*
- * interface address is considered static entry
- * because the output of the arp utility shows
- * that L2 entry as permanent
- */
- IF_AFDATA_LOCK(ifp);
- lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC),
- (struct sockaddr *)IA_SIN(ifa));
- IF_AFDATA_UNLOCK(ifp);
- if (lle == NULL)
- log(LOG_INFO, "arp_ifinit: cannot create arp "
- "entry for interface address\n");
- else
- LLE_RUNLOCK(lle);
- }
- ifa->ifa_rtrequest = NULL;
+ if (ifa->ifa_carp != NULL)
+ return;
+
+ dst = ifa->ifa_addr;
+ dst_in = (const struct sockaddr_in *)dst;
+
+ if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY)
+ return;
+ arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp));
+
+ arp_add_ifa_lle(ifp, dst);
}
void
-arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
+arp_announce_ifaddr(struct ifnet *ifp, struct in_addr addr, u_char *enaddr)
+{
+
+ if (ntohl(addr.s_addr) != INADDR_ANY)
+ arprequest(ifp, &addr, &addr, enaddr);
+}
+
+/*
+ * Sends gratuitous ARPs for each ifaddr to notify other
+ * nodes about the address change.
+ */
+static __noinline void
+arp_handle_ifllchange(struct ifnet *ifp)
+{
+ struct ifaddr *ifa;
+
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit(ifp, ifa);
+ }
+}
+
+/*
+ * A handler for interface link layer address change event.
+ */
+static void
+arp_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+
+ lltable_update_ifaddr(LLTABLE(ifp));
+
+ if ((ifp->if_flags & IFF_UP) != 0)
+ arp_handle_ifllchange(ifp);
+}
+
+static void
+vnet_arp_init(void)
{
- if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
- arprequest(ifp, &IA_SIN(ifa)->sin_addr,
- &IA_SIN(ifa)->sin_addr, enaddr);
- ifa->ifa_rtrequest = NULL;
+
+ if (IS_DEFAULT_VNET(curvnet)) {
+ netisr_register(&arp_nh);
+ iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
+ arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ }
+#ifdef VIMAGE
+ else
+ netisr_register_vnet(&arp_nh);
+#endif
}
+VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
+ vnet_arp_init, 0);
+#ifdef VIMAGE
+/*
+ * We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH
+ * lookups after destroying the hash. Ideally this would go on SI_ORDER_3.5.
+ */
static void
-arp_init(void)
+vnet_arp_destroy(__unused void *arg)
{
- netisr_register(&arp_nh);
+ netisr_unregister_vnet(&arp_nh);
}
-SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
+VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+ vnet_arp_destroy, NULL);
+#endif
diff --git a/freebsd/sys/netinet/if_ether.h b/freebsd/sys/netinet/if_ether.h
index ce63d8db..27e51f78 100644
--- a/freebsd/sys/netinet/if_ether.h
+++ b/freebsd/sys/netinet/if_ether.h
@@ -48,9 +48,9 @@
(enaddr)[0] = 0x01; \
(enaddr)[1] = 0x00; \
(enaddr)[2] = 0x5e; \
- (enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \
- (enaddr)[4] = ((u_char *)ipaddr)[2]; \
- (enaddr)[5] = ((u_char *)ipaddr)[3]; \
+ (enaddr)[3] = ((const u_char *)ipaddr)[1] & 0x7f; \
+ (enaddr)[4] = ((const u_char *)ipaddr)[2]; \
+ (enaddr)[5] = ((const u_char *)ipaddr)[3]; \
}
/*
* Macro to map an IP6 multicast address to an Ethernet multicast address.
@@ -63,10 +63,10 @@
{ \
(enaddr)[0] = 0x33; \
(enaddr)[1] = 0x33; \
- (enaddr)[2] = ((u_char *)ip6addr)[12]; \
- (enaddr)[3] = ((u_char *)ip6addr)[13]; \
- (enaddr)[4] = ((u_char *)ip6addr)[14]; \
- (enaddr)[5] = ((u_char *)ip6addr)[15]; \
+ (enaddr)[2] = ((const u_char *)ip6addr)[12]; \
+ (enaddr)[3] = ((const u_char *)ip6addr)[13]; \
+ (enaddr)[4] = ((const u_char *)ip6addr)[14]; \
+ (enaddr)[5] = ((const u_char *)ip6addr)[15]; \
}
/*
@@ -89,6 +89,7 @@ struct ether_arp {
#define arp_pln ea_hdr.ar_pln
#define arp_op ea_hdr.ar_op
+#ifndef BURN_BRIDGES /* Can be used by third party software. */
struct sockaddr_inarp {
u_char sin_len;
u_char sin_family;
@@ -99,6 +100,8 @@ struct sockaddr_inarp {
u_short sin_other;
#define SIN_PROXY 1
};
+#endif /* !BURN_BRIDGES */
+
/*
* IP and ethernet specific routing flags
*/
@@ -109,14 +112,19 @@ struct sockaddr_inarp {
extern u_char ether_ipmulticast_min[ETHER_ADDR_LEN];
extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN];
-struct llentry;
struct ifaddr;
+struct llentry;
-int arpresolve(struct ifnet *ifp, struct rtentry *rt,
- struct mbuf *m, struct sockaddr *dst, u_char *desten,
- struct llentry **lle);
+int arpresolve_addr(struct ifnet *ifp, int flags,
+ const struct sockaddr *dst, char *desten, uint32_t *pflags,
+ struct llentry **plle);
+int arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
+ const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
+ struct llentry **plle);
+void arprequest(struct ifnet *, const struct in_addr *,
+ const struct in_addr *, u_char *);
void arp_ifinit(struct ifnet *, struct ifaddr *);
-void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
+void arp_announce_ifaddr(struct ifnet *, struct in_addr addr, u_char *);
#endif
#endif
diff --git a/freebsd/sys/netinet/igmp.c b/freebsd/sys/netinet/igmp.c
index 78d9685b..cd57e426 100644
--- a/freebsd/sys/netinet/igmp.c
+++ b/freebsd/sys/netinet/igmp.c
@@ -52,6 +52,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_ddb.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/module.h>
@@ -60,11 +62,18 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/sysctl.h>
#include <sys/ktr.h>
#include <sys/condvar.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/vnet.h>
@@ -85,15 +94,15 @@ __FBSDID("$FreeBSD$");
#define KTR_IGMPV3 KTR_INET
#endif
-static struct igmp_ifinfo *
+static struct igmp_ifsoftc *
igi_alloc_locked(struct ifnet *);
static void igi_delete_locked(const struct ifnet *);
-static void igmp_dispatch_queue(struct ifqueue *, int, const int);
+static void igmp_dispatch_queue(struct mbufq *, int, const int);
static void igmp_fasttimo_vnet(void);
-static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
+static void igmp_final_leave(struct in_multi *, struct igmp_ifsoftc *);
static int igmp_handle_state_change(struct in_multi *,
- struct igmp_ifinfo *);
-static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
+ struct igmp_ifsoftc *);
+static int igmp_initial_join(struct in_multi *, struct igmp_ifsoftc *);
static int igmp_input_v1_query(struct ifnet *, const struct ip *,
const struct igmp *);
static int igmp_input_v2_query(struct ifnet *, const struct ip *,
@@ -101,7 +110,7 @@ static int igmp_input_v2_query(struct ifnet *, const struct ip *,
static int igmp_input_v3_query(struct ifnet *, const struct ip *,
/*const*/ struct igmpv3 *);
static int igmp_input_v3_group_query(struct in_multi *,
- struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
+ struct igmp_ifsoftc *, int, /*const*/ struct igmpv3 *);
static int igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
/*const*/ struct igmp *);
static int igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
@@ -113,25 +122,25 @@ static struct mbuf *
#ifdef KTR
static char * igmp_rec_type_to_str(const int);
#endif
-static void igmp_set_version(struct igmp_ifinfo *, const int);
+static void igmp_set_version(struct igmp_ifsoftc *, const int);
static void igmp_slowtimo_vnet(void);
static int igmp_v1v2_queue_report(struct in_multi *, const int);
static void igmp_v1v2_process_group_timer(struct in_multi *, const int);
-static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
+static void igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *);
static void igmp_v2_update_group(struct in_multi *, const int);
-static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
-static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
+static void igmp_v3_cancel_link_timers(struct igmp_ifsoftc *);
+static void igmp_v3_dispatch_general_query(struct igmp_ifsoftc *);
static struct mbuf *
igmp_v3_encap_report(struct ifnet *, struct mbuf *);
-static int igmp_v3_enqueue_group_record(struct ifqueue *,
+static int igmp_v3_enqueue_group_record(struct mbufq *,
struct in_multi *, const int, const int, const int);
-static int igmp_v3_enqueue_filter_change(struct ifqueue *,
+static int igmp_v3_enqueue_filter_change(struct mbufq *,
struct in_multi *);
-static void igmp_v3_process_group_timers(struct igmp_ifinfo *,
- struct ifqueue *, struct ifqueue *, struct in_multi *,
+static void igmp_v3_process_group_timers(struct igmp_ifsoftc *,
+ struct mbufq *, struct mbufq *, struct in_multi *,
const int);
static int igmp_v3_merge_state_changes(struct in_multi *,
- struct ifqueue *);
+ struct mbufq *);
static void igmp_v3_suppress_group_record(struct in_multi *);
static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
@@ -159,13 +168,13 @@ static const struct netisr_handler igmp_nh = {
* * All output is delegated to the netisr.
* Now that Giant has been eliminated, the netisr may be inlined.
* * IN_MULTI_LOCK covers in_multi.
- * * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
+ * * IGMP_LOCK covers igmp_ifsoftc and any global variables in this file,
* including the output queue.
* * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
* per-link state iterators.
- * * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
+ * * igmp_ifsoftc is valid as long as PF_INET is attached to the interface,
* therefore it is not refcounted.
- * We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
+ * We allow unlocked reads of igmp_ifsoftc when accessed via in_multi.
*
* Reference counting
* * IGMP acquires its own reference every time an in_multi is passed to
@@ -220,7 +229,8 @@ static VNET_DEFINE(int, current_state_timers_running); /* IGMPv1/v2 host
#define V_state_change_timers_running VNET(state_change_timers_running)
#define V_current_state_timers_running VNET(current_state_timers_running)
-static VNET_DEFINE(LIST_HEAD(, igmp_ifinfo), igi_head);
+static VNET_DEFINE(LIST_HEAD(, igmp_ifsoftc), igi_head) =
+ LIST_HEAD_INITIALIZER(igi_head);
static VNET_DEFINE(struct igmpstat, igmpstat) = {
.igps_version = IGPS_VERSION_3,
.igps_len = sizeof(struct igmpstat),
@@ -250,32 +260,32 @@ static VNET_DEFINE(int, igmp_default_version) = IGMP_VERSION_3;
/*
* Virtualized sysctls.
*/
-SYSCTL_VNET_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RW,
+SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(igmpstat), igmpstat, "");
-SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(igmp_recvifkludge), 0,
"Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
-SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(igmp_sendra), 0,
"Send IP Router Alert option in IGMPv2/v3 messages");
-SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(igmp_sendlocal), 0,
"Send IGMP membership reports for 224.0.0.0/24 groups");
-SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(igmp_v1enable), 0,
"Enable backwards compatibility with IGMPv1");
-SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(igmp_v2enable), 0,
"Enable backwards compatibility with IGMPv2");
-SYSCTL_VNET_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(igmp_legacysupp), 0,
"Allow v1/v2 reports to suppress v3 group responses");
-SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, default_version,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I",
"Default version of IGMP to run on each interface");
-SYSCTL_VNET_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I",
"Rate limit for IGMPv3 Group-and-Source queries in seconds");
@@ -291,7 +301,7 @@ igmp_save_context(struct mbuf *m, struct ifnet *ifp)
{
#ifdef VIMAGE
- m->m_pkthdr.header = ifp->if_vnet;
+ m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
#endif /* VIMAGE */
m->m_pkthdr.flowid = ifp->if_index;
}
@@ -300,7 +310,7 @@ static __inline void
igmp_scrub_context(struct mbuf *m)
{
- m->m_pkthdr.header = NULL;
+ m->m_pkthdr.PH_loc.ptr = NULL;
m->m_pkthdr.flowid = 0;
}
@@ -328,7 +338,7 @@ igmp_restore_context(struct mbuf *m)
#ifdef notyet
#if defined(VIMAGE) && defined(INVARIANTS)
- KASSERT(curvnet == (m->m_pkthdr.header),
+ KASSERT(curvnet == (m->m_pkthdr.PH_loc.ptr),
("%s: called when curvnet was not restored", __func__));
#endif
#endif
@@ -413,7 +423,7 @@ out_locked:
}
/*
- * Expose struct igmp_ifinfo to userland, keyed by ifindex.
+ * Expose struct igmp_ifsoftc to userland, keyed by ifindex.
* For use by ifmcstat(8).
*
* SMPng: NOTE: Does an unlocked ifindex space read.
@@ -427,7 +437,7 @@ sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
int error;
u_int namelen;
struct ifnet *ifp;
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
name = (int *)arg1;
namelen = arg2;
@@ -458,8 +468,18 @@ sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
LIST_FOREACH(igi, &V_igi_head, igi_link) {
if (ifp == igi->igi_ifp) {
- error = SYSCTL_OUT(req, igi,
- sizeof(struct igmp_ifinfo));
+ struct igmp_ifinfo info;
+
+ info.igi_version = igi->igi_version;
+ info.igi_v1_timer = igi->igi_v1_timer;
+ info.igi_v2_timer = igi->igi_v2_timer;
+ info.igi_v3_timer = igi->igi_v3_timer;
+ info.igi_flags = igi->igi_flags;
+ info.igi_rv = igi->igi_rv;
+ info.igi_qi = igi->igi_qi;
+ info.igi_qri = igi->igi_qri;
+ info.igi_uri = igi->igi_uri;
+ error = SYSCTL_OUT(req, &info, sizeof(info));
break;
}
}
@@ -476,15 +496,12 @@ out_locked:
* VIMAGE: Assumes the vnet pointer has been set.
*/
static void
-igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
+igmp_dispatch_queue(struct mbufq *mq, int limit, const int loop)
{
struct mbuf *m;
- for (;;) {
- _IF_DEQUEUE(ifq, m);
- if (m == NULL)
- break;
- CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
+ while ((m = mbufq_dequeue(mq)) != NULL) {
+ CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, mq, m);
if (loop)
m->m_flags |= M_IGMP_LOOP;
netisr_dispatch(NETISR_IGMP, m);
@@ -525,7 +542,7 @@ igmp_ra_alloc(void)
struct mbuf *m;
struct ipoption *p;
- MGET(m, M_DONTWAIT, MT_DATA);
+ m = m_get(M_WAITOK, MT_DATA);
p = mtod(m, struct ipoption *);
p->ipopt_dst.s_addr = INADDR_ANY;
p->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */
@@ -540,10 +557,10 @@ igmp_ra_alloc(void)
/*
* Attach IGMP when PF_INET is attached to an interface.
*/
-struct igmp_ifinfo *
+struct igmp_ifsoftc *
igmp_domifattach(struct ifnet *ifp)
{
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
__func__, ifp, ifp->if_xname);
@@ -562,14 +579,14 @@ igmp_domifattach(struct ifnet *ifp)
/*
* VIMAGE: assume curvnet set by caller.
*/
-static struct igmp_ifinfo *
+static struct igmp_ifsoftc *
igi_alloc_locked(/*const*/ struct ifnet *ifp)
{
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
IGMP_LOCK_ASSERT();
- igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
+ igi = malloc(sizeof(struct igmp_ifsoftc), M_IGMP, M_NOWAIT|M_ZERO);
if (igi == NULL)
goto out;
@@ -580,17 +597,12 @@ igi_alloc_locked(/*const*/ struct ifnet *ifp)
igi->igi_qi = IGMP_QI_INIT;
igi->igi_qri = IGMP_QRI_INIT;
igi->igi_uri = IGMP_URI_INIT;
-
SLIST_INIT(&igi->igi_relinmhead);
-
- /*
- * Responses to general queries are subject to bounds.
- */
- IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
+ mbufq_init(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
- CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
+ CTR2(KTR_IGMPV3, "allocate igmp_ifsoftc for ifp %p(%s)",
ifp, ifp->if_xname);
out:
@@ -609,7 +621,7 @@ out:
void
igmp_ifdetach(struct ifnet *ifp)
{
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
struct ifmultiaddr *ifma;
struct in_multi *inm, *tinm;
@@ -656,25 +668,21 @@ igmp_ifdetach(struct ifnet *ifp)
void
igmp_domifdetach(struct ifnet *ifp)
{
- struct igmp_ifinfo *igi;
CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
__func__, ifp, ifp->if_xname);
IGMP_LOCK();
-
- igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
igi_delete_locked(ifp);
-
IGMP_UNLOCK();
}
static void
igi_delete_locked(const struct ifnet *ifp)
{
- struct igmp_ifinfo *igi, *tigi;
+ struct igmp_ifsoftc *igi, *tigi;
- CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
+ CTR3(KTR_IGMPV3, "%s: freeing igmp_ifsoftc for ifp %p(%s)",
__func__, ifp, ifp->if_xname);
IGMP_LOCK_ASSERT();
@@ -684,7 +692,7 @@ igi_delete_locked(const struct ifnet *ifp)
/*
* Free deferred General Query responses.
*/
- _IF_DRAIN(&igi->igi_gq);
+ mbufq_drain(&igi->igi_gq);
LIST_REMOVE(igi, igi_link);
@@ -696,10 +704,6 @@ igi_delete_locked(const struct ifnet *ifp)
return;
}
}
-
-#ifdef INVARIANTS
- panic("%s: igmp_ifinfo not found for ifp %p\n", __func__, ifp);
-#endif
}
/*
@@ -713,7 +717,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
const struct igmp *igmp)
{
struct ifmultiaddr *ifma;
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
struct in_multi *inm;
/*
@@ -733,7 +737,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
IGMP_LOCK();
igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
- KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
if (igi->igi_flags & IGIF_LOOPBACK) {
CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
@@ -798,7 +802,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
const struct igmp *igmp)
{
struct ifmultiaddr *ifma;
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
struct in_multi *inm;
int is_general_query;
uint16_t timer;
@@ -827,7 +831,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
IGMP_LOCK();
igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
- KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
if (igi->igi_flags & IGIF_LOOPBACK) {
CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
@@ -948,7 +952,7 @@ static int
igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
/*const*/ struct igmpv3 *igmpv3)
{
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
struct in_multi *inm;
int is_general_query;
uint32_t maxresp, nsrc, qqi;
@@ -1021,7 +1025,7 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
IGMP_LOCK();
igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
- KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
if (igi->igi_flags & IGIF_LOOPBACK) {
CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
@@ -1104,12 +1108,12 @@ out_locked:
}
/*
- * Process a recieved IGMPv3 group-specific or group-and-source-specific
+ * Process a received IGMPv3 group-specific or group-and-source-specific
* query.
- * Return <0 if any error occured. Currently this is ignored.
+ * Return <0 if any error occurred. Currently this is ignored.
*/
static int
-igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi,
+igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifsoftc *igi,
int timer, /*const*/ struct igmpv3 *igmpv3)
{
int retval;
@@ -1214,6 +1218,7 @@ static int
igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
/*const*/ struct igmp *igmp)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia;
struct in_multi *inm;
@@ -1236,7 +1241,7 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
* Replace 0.0.0.0 with the subnet address if told to do so.
*/
if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
- IFP_TO_IA(ifp, ia);
+ IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL) {
ip->ip_src.s_addr = htonl(ia->ia_subnet);
ifa_free(&ia->ia_ifa);
@@ -1254,7 +1259,7 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
IN_MULTI_LOCK();
inm = inm_lookup(ifp, igmp->igmp_group);
if (inm != NULL) {
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
igi = inm->inm_igi;
if (igi == NULL) {
@@ -1322,6 +1327,7 @@ static int
igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
/*const*/ struct igmp *igmp)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia;
struct in_multi *inm;
@@ -1330,7 +1336,7 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
* leave requires knowing that we are the only member of a
* group.
*/
- IFP_TO_IA(ifp, ia);
+ IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
ifa_free(&ia->ia_ifa);
return (0);
@@ -1378,7 +1384,7 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
IN_MULTI_LOCK();
inm = inm_lookup(ifp, igmp->igmp_group);
if (inm != NULL) {
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
igi = inm->inm_igi;
KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
@@ -1425,26 +1431,29 @@ out_locked:
return (0);
}
-void
-igmp_input(struct mbuf *m, int off)
+int
+igmp_input(struct mbuf **mp, int *offp, int proto)
{
int iphlen;
struct ifnet *ifp;
struct igmp *igmp;
struct ip *ip;
+ struct mbuf *m;
int igmplen;
int minlen;
int queryver;
- CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off);
+ CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, *mp, *offp);
+ m = *mp;
ifp = m->m_pkthdr.rcvif;
+ *mp = NULL;
IGMPSTAT_INC(igps_rcv_total);
ip = mtod(m, struct ip *);
- iphlen = off;
- igmplen = ip->ip_len;
+ iphlen = *offp;
+ igmplen = ntohs(ip->ip_len) - iphlen;
/*
* Validate lengths.
@@ -1452,7 +1461,7 @@ igmp_input(struct mbuf *m, int off)
if (igmplen < IGMP_MINLEN) {
IGMPSTAT_INC(igps_rcv_tooshort);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/*
@@ -1464,10 +1473,10 @@ igmp_input(struct mbuf *m, int off)
minlen += IGMP_V3_QUERY_MINLEN;
else
minlen += IGMP_MINLEN;
- if ((m->m_flags & M_EXT || m->m_len < minlen) &&
- (m = m_pullup(m, minlen)) == 0) {
+ if ((!M_WRITABLE(m) || m->m_len < minlen) &&
+ (m = m_pullup(m, minlen)) == NULL) {
IGMPSTAT_INC(igps_rcv_tooshort);
- return;
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
@@ -1480,7 +1489,7 @@ igmp_input(struct mbuf *m, int off)
if (in_cksum(m, igmplen)) {
IGMPSTAT_INC(igps_rcv_badsum);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
m->m_data -= iphlen;
m->m_len += iphlen;
@@ -1493,7 +1502,7 @@ igmp_input(struct mbuf *m, int off)
if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
IGMPSTAT_INC(igps_rcv_badttl);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
switch (igmp->igmp_type) {
@@ -1508,7 +1517,7 @@ igmp_input(struct mbuf *m, int off)
} else {
IGMPSTAT_INC(igps_rcv_tooshort);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
switch (queryver) {
@@ -1518,7 +1527,7 @@ igmp_input(struct mbuf *m, int off)
break;
if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
break;
@@ -1528,7 +1537,7 @@ igmp_input(struct mbuf *m, int off)
break;
if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
break;
@@ -1546,25 +1555,25 @@ igmp_input(struct mbuf *m, int off)
if (nsrc * sizeof(in_addr_t) >
UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) {
IGMPSTAT_INC(igps_rcv_tooshort);
- return;
+ return (IPPROTO_DONE);
}
/*
* m_pullup() may modify m, so pullup in
* this scope.
*/
igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
- sizeof(struct in_addr) * nsrc;
- if ((m->m_flags & M_EXT ||
+ sizeof(struct in_addr) * nsrc;
+ if ((!M_WRITABLE(m) ||
m->m_len < igmpv3len) &&
(m = m_pullup(m, igmpv3len)) == NULL) {
IGMPSTAT_INC(igps_rcv_tooshort);
- return;
+ return (IPPROTO_DONE);
}
igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
+ iphlen);
if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
}
break;
@@ -1576,7 +1585,7 @@ igmp_input(struct mbuf *m, int off)
break;
if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
break;
@@ -1587,7 +1596,7 @@ igmp_input(struct mbuf *m, int off)
IGMPSTAT_INC(igps_rcv_nora);
if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
break;
@@ -1608,7 +1617,8 @@ igmp_input(struct mbuf *m, int off)
* Pass all valid IGMP packets up to any process(es) listening on a
* raw IGMP socket.
*/
- rip_input(m, off);
+ *mp = m;
+ return (rip_input(mp, offp, proto));
}
@@ -1639,10 +1649,10 @@ igmp_fasttimo(void)
static void
igmp_fasttimo_vnet(void)
{
- struct ifqueue scq; /* State-change packets */
- struct ifqueue qrq; /* Query response packets */
+ struct mbufq scq; /* State-change packets */
+ struct mbufq qrq; /* Query response packets */
struct ifnet *ifp;
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
struct ifmultiaddr *ifma;
struct in_multi *inm;
int loop, uri_fasthz;
@@ -1701,12 +1711,8 @@ igmp_fasttimo_vnet(void)
loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
PR_FASTHZ);
-
- memset(&qrq, 0, sizeof(struct ifqueue));
- IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS);
-
- memset(&scq, 0, sizeof(struct ifqueue));
- IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
+ mbufq_init(&qrq, IGMP_MAX_G_GS_PACKETS);
+ mbufq_init(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
}
IF_ADDR_RLOCK(ifp);
@@ -1804,8 +1810,8 @@ igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
* Note: Unlocked read from igi.
*/
static void
-igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
- struct ifqueue *qrq, struct ifqueue *scq,
+igmp_v3_process_group_timers(struct igmp_ifsoftc *igi,
+ struct mbufq *qrq, struct mbufq *scq,
struct in_multi *inm, const int uri_fasthz)
{
int query_response_timer_expired;
@@ -1951,7 +1957,7 @@ igmp_v3_suppress_group_record(struct in_multi *inm)
* as per Section 7.2.1.
*/
static void
-igmp_set_version(struct igmp_ifinfo *igi, const int version)
+igmp_set_version(struct igmp_ifsoftc *igi, const int version)
{
int old_version_timer;
@@ -2000,7 +2006,7 @@ igmp_set_version(struct igmp_ifinfo *igi, const int version)
* query processing.
*/
static void
-igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
+igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
{
struct ifmultiaddr *ifma;
struct ifnet *ifp;
@@ -2067,7 +2073,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
*/
inm->inm_sctimer = 0;
inm->inm_timer = 0;
- _IF_DRAIN(&inm->inm_scq);
+ mbufq_drain(&inm->inm_scq);
}
IF_ADDR_RUNLOCK(ifp);
SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, tinm) {
@@ -2081,7 +2087,7 @@ igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
* See Section 7.2.1 of RFC 3376.
*/
static void
-igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
+igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *igi)
{
IGMP_LOCK_ASSERT();
@@ -2122,6 +2128,7 @@ igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
__func__, igi->igi_version, IGMP_VERSION_2,
igi->igi_ifp, igi->igi_ifp->if_xname);
igi->igi_version = IGMP_VERSION_2;
+ igmp_v3_cancel_link_timers(igi);
}
}
} else if (igi->igi_v1_timer > 0) {
@@ -2176,7 +2183,7 @@ igmp_slowtimo(void)
static void
igmp_slowtimo_vnet(void)
{
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
IGMP_LOCK();
@@ -2204,10 +2211,10 @@ igmp_v1v2_queue_report(struct in_multi *inm, const int type)
ifp = inm->inm_ifp;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (ENOMEM);
- MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
+ M_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
@@ -2226,7 +2233,7 @@ igmp_v1v2_queue_report(struct in_multi *inm, const int type)
ip = mtod(m, struct ip *);
ip->ip_tos = 0;
- ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
+ ip->ip_len = htons(sizeof(struct ip) + sizeof(struct igmp));
ip->ip_off = 0;
ip->ip_p = IPPROTO_IGMP;
ip->ip_src.s_addr = INADDR_ANY;
@@ -2272,7 +2279,7 @@ igmp_v1v2_queue_report(struct in_multi *inm, const int type)
int
igmp_change_state(struct in_multi *inm)
{
- struct igmp_ifinfo *igi;
+ struct igmp_ifsoftc *igi;
struct ifnet *ifp;
int error;
@@ -2295,7 +2302,7 @@ igmp_change_state(struct in_multi *inm)
IGMP_LOCK();
igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
- KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
/*
* If we detect a state transition to or from MCAST_UNDEFINED
@@ -2336,10 +2343,10 @@ out_locked:
* initial state of the membership.
*/
static int
-igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
+igmp_initial_join(struct in_multi *inm, struct igmp_ifsoftc *igi)
{
struct ifnet *ifp;
- struct ifqueue *ifq;
+ struct mbufq *mq;
int error, retval, syncstates;
CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)",
@@ -2413,9 +2420,9 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
* Don't kick the timers if there is nothing to do,
* or if an error occurred.
*/
- ifq = &inm->inm_scq;
- _IF_DRAIN(ifq);
- retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
+ mq = &inm->inm_scq;
+ mbufq_drain(mq);
+ retval = igmp_v3_enqueue_group_record(mq, inm, 1,
0, 0);
CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
__func__, retval);
@@ -2464,7 +2471,7 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
* Issue an intermediate state change during the IGMP life-cycle.
*/
static int
-igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
+igmp_handle_state_change(struct in_multi *inm, struct igmp_ifsoftc *igi)
{
struct ifnet *ifp;
int retval;
@@ -2495,7 +2502,7 @@ igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
return (0);
}
- _IF_DRAIN(&inm->inm_scq);
+ mbufq_drain(&inm->inm_scq);
retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
@@ -2523,7 +2530,7 @@ igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
* to INCLUDE {} for immediate transmission.
*/
static void
-igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
+igmp_final_leave(struct in_multi *inm, struct igmp_ifsoftc *igi)
{
int syncstates;
@@ -2564,7 +2571,7 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
* TO_IN {} to be sent on the next fast timeout,
* giving us an opportunity to merge reports.
*/
- _IF_DRAIN(&inm->inm_scq);
+ mbufq_drain(&inm->inm_scq);
inm->inm_timer = 0;
if (igi->igi_flags & IGIF_LOOPBACK) {
inm->inm_scrv = 1;
@@ -2642,7 +2649,7 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
* no record(s) were appended.
*/
static int
-igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
+igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm,
const int is_state_change, const int is_group_query,
const int is_source_query)
{
@@ -2732,7 +2739,7 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
* Generate the filter list changes using a separate function.
*/
if (is_filter_list_change)
- return (igmp_v3_enqueue_filter_change(ifq, inm));
+ return (igmp_v3_enqueue_filter_change(mq, inm));
if (type == IGMP_DO_NOTHING) {
CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s",
@@ -2762,7 +2769,7 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
* Note: Group records for G/GSR query responses MUST be sent
* in their own packet.
*/
- m0 = ifq->ifq_tail;
+ m0 = mbufq_last(mq);
if (!is_group_query &&
m0 != NULL &&
(m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
@@ -2773,7 +2780,7 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
m = m0;
CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
} else {
- if (_IF_QFULL(ifq)) {
+ if (mbufq_full(mq)) {
CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
@@ -2781,14 +2788,14 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
if (!is_state_change && !is_group_query) {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m)
m->m_data += IGMP_LEADINGSPACE;
}
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m)
- MH_ALIGN(m, IGMP_LEADINGSPACE);
+ M_ALIGN(m, IGMP_LEADINGSPACE);
}
if (m == NULL)
return (-ENOMEM);
@@ -2886,7 +2893,7 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
if (m != m0) {
CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
m->m_pkthdr.PH_vt.vt_nrecs = 1;
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
} else
m->m_pkthdr.PH_vt.vt_nrecs++;
@@ -2902,17 +2909,17 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
* Always try for a cluster first.
*/
while (nims != NULL) {
- if (_IF_QFULL(ifq)) {
+ if (mbufq_full(mq)) {
CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m)
m->m_data += IGMP_LEADINGSPACE;
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m)
- MH_ALIGN(m, IGMP_LEADINGSPACE);
+ M_ALIGN(m, IGMP_LEADINGSPACE);
}
if (m == NULL)
return (-ENOMEM);
@@ -2965,7 +2972,7 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
nbytes += (msrcs * sizeof(in_addr_t));
CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
}
return (nbytes);
@@ -3005,7 +3012,7 @@ typedef enum {
* no record(s) were appended.
*/
static int
-igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
+igmp_v3_enqueue_filter_change(struct mbufq *mq, struct in_multi *inm)
{
static const int MINRECLEN =
sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
@@ -3049,7 +3056,7 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
*/
while (drt != REC_FULL) {
do {
- m0 = ifq->ifq_tail;
+ m0 = mbufq_last(mq);
if (m0 != NULL &&
(m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
IGMP_V3_REPORT_MAXRECS) &&
@@ -3062,13 +3069,13 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
CTR1(KTR_IGMPV3,
"%s: use previous packet", __func__);
} else {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m)
m->m_data += IGMP_LEADINGSPACE;
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m)
- MH_ALIGN(m, IGMP_LEADINGSPACE);
+ M_ALIGN(m, IGMP_LEADINGSPACE);
}
if (m == NULL) {
CTR1(KTR_IGMPV3,
@@ -3196,7 +3203,7 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
*/
m->m_pkthdr.PH_vt.vt_nrecs++;
if (m != m0)
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
nbytes += npbytes;
} while (nims != NULL);
drt |= crt;
@@ -3210,9 +3217,9 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
}
static int
-igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
+igmp_v3_merge_state_changes(struct in_multi *inm, struct mbufq *scq)
{
- struct ifqueue *gq;
+ struct mbufq *gq;
struct mbuf *m; /* pending state-change */
struct mbuf *m0; /* copy of pending state-change */
struct mbuf *mt; /* last state-change in packet */
@@ -3235,13 +3242,13 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
gq = &inm->inm_scq;
#ifdef KTR
- if (gq->ifq_head == NULL) {
+ if (mbufq_first(gq) == NULL) {
CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
__func__, inm);
}
#endif
- m = gq->ifq_head;
+ m = mbufq_first(gq);
while (m != NULL) {
/*
* Only merge the report into the current packet if
@@ -3252,7 +3259,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
* allocated clusters.
*/
domerge = 0;
- mt = ifscq->ifq_tail;
+ mt = mbufq_last(scq);
if (mt != NULL) {
recslen = m_length(m, NULL);
@@ -3264,7 +3271,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
domerge = 1;
}
- if (!domerge && _IF_QFULL(gq)) {
+ if (!domerge && mbufq_full(gq)) {
CTR2(KTR_IGMPV3,
"%s: outbound queue full, skipping whole packet %p",
__func__, m);
@@ -3277,7 +3284,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
if (!docopy) {
CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
- _IF_DEQUEUE(gq, m0);
+ m0 = mbufq_dequeue(gq);
m = m0->m_nextpkt;
} else {
CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
@@ -3289,13 +3296,13 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
}
if (!domerge) {
- CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)",
- __func__, m0, ifscq);
- _IF_ENQUEUE(ifscq, m0);
+ CTR3(KTR_IGMPV3, "%s: queueing %p to scq %p)",
+ __func__, m0, scq);
+ mbufq_enqueue(scq, m0);
} else {
struct mbuf *mtl; /* last mbuf of packet mt */
- CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)",
+ CTR3(KTR_IGMPV3, "%s: merging %p with scq tail %p)",
__func__, m0, mt);
mtl = m_last(mt);
@@ -3315,7 +3322,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
* Respond to a pending IGMPv3 General Query.
*/
static void
-igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
+igmp_v3_dispatch_general_query(struct igmp_ifsoftc *igi)
{
struct ifmultiaddr *ifma;
struct ifnet *ifp;
@@ -3328,6 +3335,15 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
KASSERT(igi->igi_version == IGMP_VERSION_3,
("%s: called when version %d", __func__, igi->igi_version));
+ /*
+ * Check that there are some packets queued. If so, send them first.
+ * For large number of groups the reply to general query can take
+ * many packets, we should finish sending them before starting of
+ * queuing the new reply.
+ */
+ if (mbufq_len(&igi->igi_gq) != 0)
+ goto send;
+
ifp = igi->igi_ifp;
IF_ADDR_RLOCK(ifp);
@@ -3363,13 +3379,14 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
}
IF_ADDR_RUNLOCK(ifp);
+send:
loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
/*
* Slew transmission of bursts over 500ms intervals.
*/
- if (igi->igi_gq.ifq_head != NULL) {
+ if (mbufq_first(&igi->igi_gq) != NULL) {
igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
IGMP_RESPONSE_BURST_INTERVAL);
V_interface_timers_running = 1;
@@ -3403,7 +3420,7 @@ igmp_intr(struct mbuf *m)
* indexes to guard against interface detach, they are
* unique to each VIMAGE and must be retrieved.
*/
- CURVNET_SET((struct vnet *)(m->m_pkthdr.header));
+ CURVNET_SET((struct vnet *)(m->m_pkthdr.PH_loc.ptr));
ifindex = igmp_restore_context(m);
/*
@@ -3450,7 +3467,7 @@ igmp_intr(struct mbuf *m)
}
igmp_scrub_context(m0);
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m0->m_pkthdr.rcvif = V_loif;
#ifdef MAC
mac_netinet_igmp_send(ifp, m0);
@@ -3485,6 +3502,7 @@ out:
static struct mbuf *
igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
{
+ struct rm_priotracker in_ifa_tracker;
struct igmp_report *igmp;
struct ip *ip;
int hdrlen, igmpreclen;
@@ -3498,7 +3516,7 @@ igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
if (m->m_flags & M_IGMPV3_HDR) {
igmpreclen -= hdrlen;
} else {
- M_PREPEND(m, hdrlen, M_DONTWAIT);
+ M_PREPEND(m, hdrlen, M_NOWAIT);
if (m == NULL)
return (NULL);
m->m_flags |= M_IGMPV3_HDR;
@@ -3523,8 +3541,8 @@ igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
ip = mtod(m, struct ip *);
ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
- ip->ip_len = hdrlen + igmpreclen;
- ip->ip_off = IP_DF;
+ ip->ip_len = htons(hdrlen + igmpreclen);
+ ip->ip_off = htons(IP_DF);
ip->ip_p = IPPROTO_IGMP;
ip->ip_sum = 0;
@@ -3533,7 +3551,7 @@ igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
if (m->m_flags & M_IGMP_LOOP) {
struct in_ifaddr *ia;
- IFP_TO_IA(ifp, ia);
+ IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL) {
ip->ip_src = ia->ia_addr.sin_addr;
ifa_free(&ia->ia_ifa);
@@ -3576,70 +3594,82 @@ igmp_rec_type_to_str(const int type)
}
#endif
+#ifdef VIMAGE
static void
-igmp_init(void *unused __unused)
+vnet_igmp_init(const void *unused __unused)
{
- CTR1(KTR_IGMPV3, "%s: initializing", __func__);
-
- IGMP_LOCK_INIT();
-
- m_raopt = igmp_ra_alloc();
-
- netisr_register(&igmp_nh);
+ netisr_register_vnet(&igmp_nh);
}
-SYSINIT(igmp_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_init, NULL);
+VNET_SYSINIT(vnet_igmp_init, SI_SUB_PROTO_MC, SI_ORDER_ANY,
+ vnet_igmp_init, NULL);
static void
-igmp_uninit(void *unused __unused)
+vnet_igmp_uninit(const void *unused __unused)
{
+ /* This can happen when we shutdown the entire network stack. */
CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
- netisr_unregister(&igmp_nh);
-
- m_free(m_raopt);
- m_raopt = NULL;
-
- IGMP_LOCK_DESTROY();
+ netisr_unregister_vnet(&igmp_nh);
}
-SYSUNINIT(igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_uninit, NULL);
+VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY,
+ vnet_igmp_uninit, NULL);
+#endif
-static void
-vnet_igmp_init(const void *unused __unused)
+#ifdef DDB
+DB_SHOW_COMMAND(igi_list, db_show_igi_list)
{
+ struct igmp_ifsoftc *igi, *tigi;
+ LIST_HEAD(_igi_list, igmp_ifsoftc) *igi_head;
- CTR1(KTR_IGMPV3, "%s: initializing", __func__);
-
- LIST_INIT(&V_igi_head);
-}
-VNET_SYSINIT(vnet_igmp_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_init,
- NULL);
-
-static void
-vnet_igmp_uninit(const void *unused __unused)
-{
-
- CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
-
- KASSERT(LIST_EMPTY(&V_igi_head),
- ("%s: igi list not empty; ifnets not detached?", __func__));
+ if (!have_addr) {
+ db_printf("usage: show igi_list <addr>\n");
+ return;
+ }
+ igi_head = (struct _igi_list *)addr;
+
+ LIST_FOREACH_SAFE(igi, igi_head, igi_link, tigi) {
+ db_printf("igmp_ifsoftc %p:\n", igi);
+ db_printf(" ifp %p\n", igi->igi_ifp);
+ db_printf(" version %u\n", igi->igi_version);
+ db_printf(" v1_timer %u\n", igi->igi_v1_timer);
+ db_printf(" v2_timer %u\n", igi->igi_v2_timer);
+ db_printf(" v3_timer %u\n", igi->igi_v3_timer);
+ db_printf(" flags %#x\n", igi->igi_flags);
+ db_printf(" rv %u\n", igi->igi_rv);
+ db_printf(" qi %u\n", igi->igi_qi);
+ db_printf(" qri %u\n", igi->igi_qri);
+ db_printf(" uri %u\n", igi->igi_uri);
+ /* SLIST_HEAD(,in_multi) igi_relinmhead */
+ /* struct mbufq igi_gq; */
+ db_printf("\n");
+ }
}
-VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
- vnet_igmp_uninit, NULL);
+#endif
static int
igmp_modevent(module_t mod, int type, void *unused __unused)
{
- switch (type) {
- case MOD_LOAD:
- case MOD_UNLOAD:
- break;
- default:
- return (EOPNOTSUPP);
- }
- return (0);
+ switch (type) {
+ case MOD_LOAD:
+ CTR1(KTR_IGMPV3, "%s: initializing", __func__);
+ IGMP_LOCK_INIT();
+ m_raopt = igmp_ra_alloc();
+ netisr_register(&igmp_nh);
+ break;
+ case MOD_UNLOAD:
+ CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
+ netisr_unregister(&igmp_nh);
+ m_free(m_raopt);
+ m_raopt = NULL;
+ IGMP_LOCK_DESTROY();
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
}
static moduledata_t igmp_mod = {
@@ -3647,4 +3677,4 @@ static moduledata_t igmp_mod = {
igmp_modevent,
0
};
-DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE);
diff --git a/freebsd/sys/netinet/igmp_var.h b/freebsd/sys/netinet/igmp_var.h
index ca17158f..5242d07d 100644
--- a/freebsd/sys/netinet/igmp_var.h
+++ b/freebsd/sys/netinet/igmp_var.h
@@ -46,24 +46,6 @@
* MULTICAST Revision: 3.5.1.3
*/
-#ifndef BURN_BRIDGES
-/*
- * Pre-IGMPV3 igmpstat structure.
- */
-struct oigmpstat {
- u_int igps_rcv_total; /* total IGMP messages received */
- u_int igps_rcv_tooshort; /* received with too few bytes */
- u_int igps_rcv_badsum; /* received with bad checksum */
- u_int igps_rcv_queries; /* received membership queries */
- u_int igps_rcv_badqueries; /* received invalid queries */
- u_int igps_rcv_reports; /* received membership reports */
- u_int igps_rcv_badreports; /* received invalid reports */
- u_int igps_rcv_ourreports; /* received reports for our groups */
- u_int igps_snd_reports; /* sent membership reports */
- u_int igps_rcv_toolong; /* received with too many bytes */
-};
-#endif
-
/*
* IGMPv3 protocol statistics.
*/
@@ -105,19 +87,16 @@ struct igmpstat {
};
#define IGPS_VERSION_3 3 /* as of FreeBSD 8.x */
#define IGPS_VERSION3_LEN 168
-
-#ifdef _KERNEL
-#define IGMPSTAT_ADD(name, val) V_igmpstat.name += (val)
-#define IGMPSTAT_INC(name) IGMPSTAT_ADD(name, 1)
-#endif
-
#ifdef CTASSERT
-CTASSERT(sizeof(struct igmpstat) == 168);
+CTASSERT(sizeof(struct igmpstat) == IGPS_VERSION3_LEN);
#endif
-#ifdef _KERNEL
-#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
+/*
+ * Identifiers for IGMP sysctl nodes
+ */
+#define IGMPCTL_STATS 1 /* statistics (read-only) */
+#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
#define IGMP_MAX_STATE_CHANGES 24 /* Max pending changes per group */
/*
@@ -186,6 +165,27 @@ CTASSERT(sizeof(struct igmpstat) == 168);
(sizeof(struct ip) + RAOPT_LEN + sizeof(struct igmp_report))
/*
+ * Structure returned by net.inet.igmp.ifinfo sysctl.
+ */
+struct igmp_ifinfo {
+ uint32_t igi_version; /* IGMPv3 Host Compatibility Mode */
+ uint32_t igi_v1_timer; /* IGMPv1 Querier Present timer (s) */
+ uint32_t igi_v2_timer; /* IGMPv2 Querier Present timer (s) */
+ uint32_t igi_v3_timer; /* IGMPv3 General Query (interface) timer (s)*/
+ uint32_t igi_flags; /* IGMP per-interface flags */
+#define IGIF_SILENT 0x00000001 /* Do not use IGMP on this ifp */
+#define IGIF_LOOPBACK 0x00000002 /* Send IGMP reports to loopback */
+ uint32_t igi_rv; /* IGMPv3 Robustness Variable */
+ uint32_t igi_qi; /* IGMPv3 Query Interval (s) */
+ uint32_t igi_qri; /* IGMPv3 Query Response Interval (s) */
+ uint32_t igi_uri; /* IGMPv3 Unsolicited Report Interval (s) */
+};
+
+#ifdef _KERNEL
+#define IGMPSTAT_ADD(name, val) V_igmpstat.name += (val)
+#define IGMPSTAT_INC(name) IGMPSTAT_ADD(name, 1)
+
+/*
* Subsystem lock macros.
* The IGMP lock is only taken with IGMP. Currently it is system-wide.
* VIMAGE: The lock could be pushed to per-VIMAGE granularity in future.
@@ -197,29 +197,35 @@ CTASSERT(sizeof(struct igmpstat) == 168);
#define IGMP_UNLOCK() mtx_unlock(&igmp_mtx)
#define IGMP_UNLOCK_ASSERT() mtx_assert(&igmp_mtx, MA_NOTOWNED)
-struct igmp_ifinfo;
+/*
+ * Per-interface IGMP router version information.
+ */
+struct igmp_ifsoftc {
+ LIST_ENTRY(igmp_ifsoftc) igi_link;
+ struct ifnet *igi_ifp; /* pointer back to interface */
+ uint32_t igi_version; /* IGMPv3 Host Compatibility Mode */
+ uint32_t igi_v1_timer; /* IGMPv1 Querier Present timer (s) */
+ uint32_t igi_v2_timer; /* IGMPv2 Querier Present timer (s) */
+ uint32_t igi_v3_timer; /* IGMPv3 General Query (interface) timer (s)*/
+ uint32_t igi_flags; /* IGMP per-interface flags */
+ uint32_t igi_rv; /* IGMPv3 Robustness Variable */
+ uint32_t igi_qi; /* IGMPv3 Query Interval (s) */
+ uint32_t igi_qri; /* IGMPv3 Query Response Interval (s) */
+ uint32_t igi_uri; /* IGMPv3 Unsolicited Report Interval (s) */
+ SLIST_HEAD(,in_multi) igi_relinmhead; /* released groups */
+ struct mbufq igi_gq; /* general query responses queue */
+};
int igmp_change_state(struct in_multi *);
void igmp_fasttimo(void);
-struct igmp_ifinfo *
+struct igmp_ifsoftc *
igmp_domifattach(struct ifnet *);
void igmp_domifdetach(struct ifnet *);
void igmp_ifdetach(struct ifnet *);
-void igmp_input(struct mbuf *, int);
+int igmp_input(struct mbuf **, int *, int);
void igmp_slowtimo(void);
SYSCTL_DECL(_net_inet_igmp);
#endif /* _KERNEL */
-
-/*
- * Names for IGMP sysctl objects
- */
-#define IGMPCTL_STATS 1 /* statistics (read-only) */
-#define IGMPCTL_MAXID 2
-
-#define IGMPCTL_NAMES { \
- { 0, 0 }, \
- { "stats", CTLTYPE_STRUCT } \
-}
#endif
diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c
index 653580c7..06b23973 100644
--- a/freebsd/sys/netinet/in.c
+++ b/freebsd/sys/netinet/in.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_mpath.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/systm.h>
#include <sys/sockio.h>
#include <sys/malloc.h>
@@ -45,9 +46,12 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/proc.h>
+#include <sys/rmlock.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/sx.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -58,37 +62,33 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/vnet.h>
+#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_carp.h>
#include <netinet/igmp_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
-static int in_mask2len(struct in_addr *);
-static void in_len2mask(struct in_addr *, int);
-static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
- struct ifnet *, struct thread *);
+static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
+static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *);
-static int in_addprefix(struct in_ifaddr *, int);
-static int in_scrubprefix(struct in_ifaddr *, u_int);
static void in_socktrim(struct sockaddr_in *);
-static int in_ifinit(struct ifnet *,
- struct in_ifaddr *, struct sockaddr_in *, int);
static void in_purgemaddrs(struct ifnet *);
-static VNET_DEFINE(int, sameprefixcarponly);
-#define V_sameprefixcarponly VNET(sameprefixcarponly)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
- &VNET_NAME(sameprefixcarponly), 0,
+static VNET_DEFINE(int, nosameprefix);
+#define V_nosameprefix VNET(nosameprefix)
+SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(nosameprefix), 0,
"Refuse to create same prefixes on different interfaces");
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
#define V_ripcbinfo VNET(ripcbinfo)
-VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
-#define V_arpstat VNET(arpstat)
+static struct sx in_control_sx;
+SX_SYSINIT(in_control_sx, &in_control_sx, "in_control");
/*
* Return 1 if an internet address is for a ``local'' host
@@ -97,17 +97,18 @@ VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
int
in_localaddr(struct in_addr in)
{
+ struct rm_priotracker in_ifa_tracker;
register u_long i = ntohl(in.s_addr);
register struct in_ifaddr *ia;
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (1);
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (0);
}
@@ -118,20 +119,69 @@ in_localaddr(struct in_addr in)
int
in_localip(struct in_addr in)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia;
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (1);
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (0);
}
/*
+ * Return 1 if an internet address is configured on an interface.
+ */
+int
+in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
+{
+ struct ifaddr *ifa;
+ struct in_ifaddr *ia;
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ ia = (struct in_ifaddr *)ifa;
+ if (ia->ia_addr.sin_addr.s_addr == in.s_addr) {
+ IF_ADDR_RUNLOCK(ifp);
+ return (1);
+ }
+ }
+ IF_ADDR_RUNLOCK(ifp);
+
+ return (0);
+}
+
+/*
+ * Return a reference to the interface address which is different to
+ * the supplied one but with same IP address value.
+ */
+static struct in_ifaddr *
+in_localip_more(struct in_ifaddr *ia)
+{
+ struct rm_priotracker in_ifa_tracker;
+ in_addr_t in = IA_SIN(ia)->sin_addr.s_addr;
+ struct in_ifaddr *it;
+
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
+ LIST_FOREACH(it, INADDR_HASH(in), ia_hash) {
+ if (it != ia && IA_SIN(it)->sin_addr.s_addr == in) {
+ ifa_ref(&it->ia_ifa);
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
+ return (it);
+ }
+ }
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
+
+ return (NULL);
+}
+
+/*
* Determine whether an IP address is in a reserved set of addresses
* that may not be forwarded, or whether datagrams to that destination
* may be forwarded.
@@ -169,793 +219,430 @@ in_socktrim(struct sockaddr_in *ap)
}
}
-static int
-in_mask2len(mask)
- struct in_addr *mask;
-{
- int x, y;
- u_char *p;
-
- p = (u_char *)mask;
- for (x = 0; x < sizeof(*mask); x++) {
- if (p[x] != 0xff)
- break;
- }
- y = 0;
- if (x < sizeof(*mask)) {
- for (y = 0; y < 8; y++) {
- if ((p[x] & (0x80 >> y)) == 0)
- break;
- }
- }
- return (x * 8 + y);
-}
-
-static void
-in_len2mask(struct in_addr *mask, int len)
-{
- int i;
- u_char *p;
-
- p = (u_char *)mask;
- bzero(mask, sizeof(*mask));
- for (i = 0; i < len / 8; i++)
- p[i] = 0xff;
- if (len % 8)
- p[i] = (0xff00 >> (len % 8)) & 0xff;
-}
-
/*
* Generic internet control operations (ioctl's).
- *
- * ifp is NULL if not an interface-specific ioctl.
*/
-/* ARGSUSED */
int
in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
struct thread *td)
{
- register struct ifreq *ifr = (struct ifreq *)data;
- register struct in_ifaddr *ia, *iap;
- register struct ifaddr *ifa;
- struct in_addr allhosts_addr;
- struct in_addr dst;
- struct in_ifinfo *ii;
- struct in_aliasreq *ifra = (struct in_aliasreq *)data;
- struct sockaddr_in oldaddr;
- int error, hostIsNew, iaIsNew, maskIsNew;
- int iaIsFirst;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr;
+ struct ifaddr *ifa;
+ struct in_ifaddr *ia;
+ int error;
- ia = NULL;
- iaIsFirst = 0;
- iaIsNew = 0;
- allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
+ if (ifp == NULL)
+ return (EADDRNOTAVAIL);
/*
- * Filter out ioctls we implement directly; forward the rest on to
- * in_lifaddr_ioctl() and ifp->if_ioctl().
+ * Filter out 4 ioctls we implement directly. Forward the rest
+ * to specific functions and ifp->if_ioctl().
*/
switch (cmd) {
- case SIOCAIFADDR:
- case SIOCDIFADDR:
case SIOCGIFADDR:
case SIOCGIFBRDADDR:
case SIOCGIFDSTADDR:
case SIOCGIFNETMASK:
+ break;
+ case SIOCDIFADDR:
+ sx_xlock(&in_control_sx);
+ error = in_difaddr_ioctl(data, ifp, td);
+ sx_xunlock(&in_control_sx);
+ return (error);
+#ifndef __rtems__
+ case OSIOCAIFADDR: /* 9.x compat */
+#endif /* __rtems__ */
+ case SIOCAIFADDR:
+ sx_xlock(&in_control_sx);
+ error = in_aifaddr_ioctl(cmd, data, ifp, td);
+ sx_xunlock(&in_control_sx);
+ return (error);
case SIOCSIFADDR:
case SIOCSIFBRDADDR:
case SIOCSIFDSTADDR:
case SIOCSIFNETMASK:
- break;
-
- case SIOCALIFADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_ADDIFADDR);
- if (error)
- return (error);
- }
- if (ifp == NULL)
- return (EINVAL);
- return in_lifaddr_ioctl(so, cmd, data, ifp, td);
-
- case SIOCDLIFADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_DELIFADDR);
- if (error)
- return (error);
- }
- if (ifp == NULL)
- return (EINVAL);
- return in_lifaddr_ioctl(so, cmd, data, ifp, td);
-
- case SIOCGLIFADDR:
- if (ifp == NULL)
- return (EINVAL);
- return in_lifaddr_ioctl(so, cmd, data, ifp, td);
-
+ /* We no longer support that old commands. */
+ return (EINVAL);
default:
- if (ifp == NULL || ifp->if_ioctl == NULL)
+ if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
return ((*ifp->if_ioctl)(ifp, cmd, data));
}
- if (ifp == NULL)
+ if (addr->sin_addr.s_addr != INADDR_ANY &&
+ prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0)
return (EADDRNOTAVAIL);
/*
- * Security checks before we get involved in any work.
- */
- switch (cmd) {
- case SIOCAIFADDR:
- case SIOCSIFADDR:
- case SIOCSIFBRDADDR:
- case SIOCSIFNETMASK:
- case SIOCSIFDSTADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_ADDIFADDR);
- if (error)
- return (error);
- }
- break;
-
- case SIOCDIFADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_DELIFADDR);
- if (error)
- return (error);
- }
- break;
- }
-
- /*
- * Find address for this interface, if it exists.
- *
- * If an alias address was specified, find that one instead of the
+ * Find address for this interface, if it exists. If an
+ * address was specified, find that one instead of the
* first one on the interface, if possible.
*/
- dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
- IN_IFADDR_RLOCK();
- LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
- if (iap->ia_ifp == ifp &&
- iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
- if (td == NULL || prison_check_ip4(td->td_ucred,
- &dst) == 0)
- ia = iap;
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ ia = (struct in_ifaddr *)ifa;
+ if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr)
break;
- }
}
- if (ia != NULL)
- ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
- if (ia == NULL) {
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- iap = ifatoia(ifa);
- if (iap->ia_addr.sin_family == AF_INET) {
- if (td != NULL &&
- prison_check_ip4(td->td_ucred,
- &iap->ia_addr.sin_addr) != 0)
- continue;
- ia = iap;
- break;
+ if (ifa == NULL)
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ ia = (struct in_ifaddr *)ifa;
+ if (prison_check_ip4(td->td_ucred,
+ &ia->ia_addr.sin_addr) == 0)
+ break;
}
- }
- if (ia != NULL)
- ifa_ref(&ia->ia_ifa);
+
+ if (ifa == NULL) {
IF_ADDR_RUNLOCK(ifp);
+ return (EADDRNOTAVAIL);
}
- if (ia == NULL)
- iaIsFirst = 1;
error = 0;
switch (cmd) {
- case SIOCAIFADDR:
- case SIOCDIFADDR:
- if (ifra->ifra_addr.sin_family == AF_INET) {
- struct in_ifaddr *oia;
-
- IN_IFADDR_RLOCK();
- for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
- if (ia->ia_ifp == ifp &&
- ia->ia_addr.sin_addr.s_addr ==
- ifra->ifra_addr.sin_addr.s_addr)
- break;
- }
- if (ia != NULL && ia != oia)
- ifa_ref(&ia->ia_ifa);
- if (oia != NULL && ia != oia)
- ifa_free(&oia->ia_ifa);
- IN_IFADDR_RUNLOCK();
- if ((ifp->if_flags & IFF_POINTOPOINT)
- && (cmd == SIOCAIFADDR)
- && (ifra->ifra_dstaddr.sin_addr.s_addr
- == INADDR_ANY)) {
- error = EDESTADDRREQ;
- goto out;
- }
- }
- if (cmd == SIOCDIFADDR && ia == NULL) {
- error = EADDRNOTAVAIL;
- goto out;
- }
- /* FALLTHROUGH */
- case SIOCSIFADDR:
- case SIOCSIFNETMASK:
- case SIOCSIFDSTADDR:
- if (ia == NULL) {
- ia = (struct in_ifaddr *)
- malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
- M_ZERO);
- if (ia == NULL) {
- error = ENOBUFS;
- goto out;
- }
-
- ifa = &ia->ia_ifa;
- ifa_init(ifa);
- ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
- ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
- ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
-
- ia->ia_sockmask.sin_len = 8;
- ia->ia_sockmask.sin_family = AF_INET;
- if (ifp->if_flags & IFF_BROADCAST) {
- ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
- ia->ia_broadaddr.sin_family = AF_INET;
- }
- ia->ia_ifp = ifp;
-
- ifa_ref(ifa); /* if_addrhead */
- IF_ADDR_WLOCK(ifp);
- TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
- IF_ADDR_WUNLOCK(ifp);
- ifa_ref(ifa); /* in_ifaddrhead */
- IN_IFADDR_WLOCK();
- TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
- IN_IFADDR_WUNLOCK();
- iaIsNew = 1;
- }
- break;
-
- case SIOCSIFBRDADDR:
case SIOCGIFADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFDSTADDR:
- case SIOCGIFBRDADDR:
- if (ia == NULL) {
- error = EADDRNOTAVAIL;
- goto out;
- }
+ *addr = ia->ia_addr;
break;
- }
-
- /*
- * Most paths in this switch return directly or via out. Only paths
- * that remove the address break in order to hit common removal code.
- */
- switch (cmd) {
- case SIOCGIFADDR:
- *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
- goto out;
case SIOCGIFBRDADDR:
if ((ifp->if_flags & IFF_BROADCAST) == 0) {
error = EINVAL;
- goto out;
+ break;
}
- *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
- goto out;
+ *addr = ia->ia_broadaddr;
+ break;
case SIOCGIFDSTADDR:
if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
error = EINVAL;
- goto out;
+ break;
}
- *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
- goto out;
+ *addr = ia->ia_dstaddr;
+ break;
case SIOCGIFNETMASK:
- *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
- goto out;
-
- case SIOCSIFDSTADDR:
- if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
- error = EINVAL;
- goto out;
- }
- oldaddr = ia->ia_dstaddr;
- ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
- if (ifp->if_ioctl != NULL) {
- error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
- (caddr_t)ia);
- if (error) {
- ia->ia_dstaddr = oldaddr;
- goto out;
- }
- }
- if (ia->ia_flags & IFA_ROUTE) {
- ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
- rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
- ia->ia_ifa.ifa_dstaddr =
- (struct sockaddr *)&ia->ia_dstaddr;
- rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
- }
- goto out;
+ *addr = ia->ia_sockmask;
+ break;
+ }
- case SIOCSIFBRDADDR:
- if ((ifp->if_flags & IFF_BROADCAST) == 0) {
- error = EINVAL;
- goto out;
- }
- ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
- goto out;
+ IF_ADDR_RUNLOCK(ifp);
- case SIOCSIFADDR:
- error = in_ifinit(ifp, ia,
- (struct sockaddr_in *) &ifr->ifr_addr, 1);
- if (error != 0 && iaIsNew)
- break;
- if (error == 0) {
- ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
- if (iaIsFirst &&
- (ifp->if_flags & IFF_MULTICAST) != 0) {
- error = in_joingroup(ifp, &allhosts_addr,
- NULL, &ii->ii_allhosts);
- }
- EVENTHANDLER_INVOKE(ifaddr_event, ifp);
- }
- error = 0;
- goto out;
+ return (error);
+}
- case SIOCSIFNETMASK:
- ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
- ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
- goto out;
+static int
+in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
+{
+ const struct in_aliasreq *ifra = (struct in_aliasreq *)data;
+ const struct sockaddr_in *addr = &ifra->ifra_addr;
+ const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr;
+ const struct sockaddr_in *mask = &ifra->ifra_mask;
+ const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr;
+ const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0;
+ struct ifaddr *ifa;
+ struct in_ifaddr *ia;
+ bool iaIsFirst;
+ int error = 0;
- case SIOCAIFADDR:
- maskIsNew = 0;
- hostIsNew = 1;
- error = 0;
- if (ia->ia_addr.sin_family == AF_INET) {
- if (ifra->ifra_addr.sin_len == 0) {
- ifra->ifra_addr = ia->ia_addr;
- hostIsNew = 0;
- } else if (ifra->ifra_addr.sin_addr.s_addr ==
- ia->ia_addr.sin_addr.s_addr)
- hostIsNew = 0;
- }
- if (ifra->ifra_mask.sin_len) {
- /*
- * QL: XXX
- * Need to scrub the prefix here in case
- * the issued command is SIOCAIFADDR with
- * the same address, but with a different
- * prefix length. And if the prefix length
- * is the same as before, then the call is
- * un-necessarily executed here.
- */
- in_ifscrub(ifp, ia, LLE_STATIC);
- ia->ia_sockmask = ifra->ifra_mask;
- ia->ia_sockmask.sin_family = AF_INET;
- ia->ia_subnetmask =
- ntohl(ia->ia_sockmask.sin_addr.s_addr);
- maskIsNew = 1;
- }
- if ((ifp->if_flags & IFF_POINTOPOINT) &&
- (ifra->ifra_dstaddr.sin_family == AF_INET)) {
- in_ifscrub(ifp, ia, LLE_STATIC);
- ia->ia_dstaddr = ifra->ifra_dstaddr;
- maskIsNew = 1; /* We lie; but the effect's the same */
- }
- if (ifra->ifra_addr.sin_family == AF_INET &&
- (hostIsNew || maskIsNew))
- error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
- if (error != 0 && iaIsNew)
- break;
+ error = priv_check(td, PRIV_NET_ADDIFADDR);
+ if (error)
+ return (error);
- if ((ifp->if_flags & IFF_BROADCAST) &&
- (ifra->ifra_broadaddr.sin_family == AF_INET))
- ia->ia_broadaddr = ifra->ifra_broadaddr;
- if (error == 0) {
- ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
- if (iaIsFirst &&
- (ifp->if_flags & IFF_MULTICAST) != 0) {
- error = in_joingroup(ifp, &allhosts_addr,
- NULL, &ii->ii_allhosts);
- }
- EVENTHANDLER_INVOKE(ifaddr_event, ifp);
- }
- goto out;
+ /*
+ * ifra_addr must be present and be of INET family.
+ * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional.
+ */
+ if (addr->sin_len != sizeof(struct sockaddr_in) ||
+ addr->sin_family != AF_INET)
+ return (EINVAL);
+ if (broadaddr->sin_len != 0 &&
+ (broadaddr->sin_len != sizeof(struct sockaddr_in) ||
+ broadaddr->sin_family != AF_INET))
+ return (EINVAL);
+ if (mask->sin_len != 0 &&
+ (mask->sin_len != sizeof(struct sockaddr_in) ||
+ mask->sin_family != AF_INET))
+ return (EINVAL);
+ if ((ifp->if_flags & IFF_POINTOPOINT) &&
+ (dstaddr->sin_len != sizeof(struct sockaddr_in) ||
+ dstaddr->sin_addr.s_addr == INADDR_ANY))
+ return (EDESTADDRREQ);
+ if (vhid > 0 && carp_attach_p == NULL)
+ return (EPROTONOSUPPORT);
- case SIOCDIFADDR:
- /*
- * in_ifscrub kills the interface route.
- */
- in_ifscrub(ifp, ia, LLE_STATIC);
+ /*
+ * See whether address already exist.
+ */
+ iaIsFirst = true;
+ ia = NULL;
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ struct in_ifaddr *it;
- /*
- * in_ifadown gets rid of all the rest of
- * the routes. This is not quite the right
- * thing to do, but at least if we are running
- * a routing process they will come back.
- */
- in_ifadown(&ia->ia_ifa, 1);
- EVENTHANDLER_INVOKE(ifaddr_event, ifp);
- error = 0;
- break;
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
- default:
- panic("in_control: unsupported ioctl");
+ it = (struct in_ifaddr *)ifa;
+ iaIsFirst = false;
+ if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
+ prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)
+ ia = it;
}
+ IF_ADDR_RUNLOCK(ifp);
+
+ if (ia != NULL)
+ (void )in_difaddr_ioctl(data, ifp, td);
+
+ ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK);
+ ia = (struct in_ifaddr *)ifa;
+ ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
+ ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
+
+ ia->ia_ifp = ifp;
+ ia->ia_addr = *addr;
+ if (mask->sin_len != 0) {
+ ia->ia_sockmask = *mask;
+ ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
+ } else {
+ in_addr_t i = ntohl(addr->sin_addr.s_addr);
- IF_ADDR_WLOCK(ifp);
- /* Re-check that ia is still part of the list. */
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa == &ia->ia_ifa)
- break;
- }
- if (ifa == NULL) {
/*
- * If we lost the race with another thread, there is no need to
- * try it again for the next loop as there is no other exit
- * path between here and out.
- */
- IF_ADDR_WUNLOCK(ifp);
- error = EADDRNOTAVAIL;
- goto out;
+ * Be compatible with network classes, if netmask isn't
+ * supplied, guess it based on classes.
+ */
+ if (IN_CLASSA(i))
+ ia->ia_subnetmask = IN_CLASSA_NET;
+ else if (IN_CLASSB(i))
+ ia->ia_subnetmask = IN_CLASSB_NET;
+ else
+ ia->ia_subnetmask = IN_CLASSC_NET;
+ ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
}
- TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
- IF_ADDR_WUNLOCK(ifp);
- ifa_free(&ia->ia_ifa); /* if_addrhead */
+ ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask;
+ in_socktrim(&ia->ia_sockmask);
- IN_IFADDR_WLOCK();
- TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
- if (ia->ia_addr.sin_family == AF_INET) {
- struct in_ifaddr *if_ia;
+ if (ifp->if_flags & IFF_BROADCAST) {
+ if (broadaddr->sin_len != 0) {
+ ia->ia_broadaddr = *broadaddr;
+ } else if (ia->ia_subnetmask == IN_RFC3021_MASK) {
+ ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
+ ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
+ ia->ia_broadaddr.sin_family = AF_INET;
+ } else {
+ ia->ia_broadaddr.sin_addr.s_addr =
+ htonl(ia->ia_subnet | ~ia->ia_subnetmask);
+ ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
+ ia->ia_broadaddr.sin_family = AF_INET;
+ }
+ }
- LIST_REMOVE(ia, ia_hash);
- IN_IFADDR_WUNLOCK();
- /*
- * If this is the last IPv4 address configured on this
- * interface, leave the all-hosts group.
- * No state-change report need be transmitted.
- */
- if_ia = NULL;
- IFP_TO_IA(ifp, if_ia);
- if (if_ia == NULL) {
- ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
- IN_MULTI_LOCK();
- if (ii->ii_allhosts) {
- (void)in_leavegroup_locked(ii->ii_allhosts,
- NULL);
- ii->ii_allhosts = NULL;
- }
- IN_MULTI_UNLOCK();
- } else
- ifa_free(&if_ia->ia_ifa);
- } else
- IN_IFADDR_WUNLOCK();
- ifa_free(&ia->ia_ifa); /* in_ifaddrhead */
-out:
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
- return (error);
-}
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ ia->ia_dstaddr = *dstaddr;
-/*
- * SIOC[GAD]LIFADDR.
- * SIOCGLIFADDR: get first address. (?!?)
- * SIOCGLIFADDR with IFLR_PREFIX:
- * get first address that matches the specified prefix.
- * SIOCALIFADDR: add the specified address.
- * SIOCALIFADDR with IFLR_PREFIX:
- * EINVAL since we can't deduce hostid part of the address.
- * SIOCDLIFADDR: delete the specified address.
- * SIOCDLIFADDR with IFLR_PREFIX:
- * delete the first address that matches the specified prefix.
- * return values:
- * EINVAL on invalid parameters
- * EADDRNOTAVAIL on prefix match failed/specified address not found
- * other values may be returned from in_ioctl()
- */
-static int
-in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
- struct ifnet *ifp, struct thread *td)
-{
- struct if_laddrreq *iflr = (struct if_laddrreq *)data;
- struct ifaddr *ifa;
+ /* XXXGL: rtinit() needs this strange assignment. */
+ if (ifp->if_flags & IFF_LOOPBACK)
+ ia->ia_dstaddr = ia->ia_addr;
- /* sanity checks */
- if (data == NULL || ifp == NULL) {
- panic("invalid argument to in_lifaddr_ioctl");
- /*NOTRECHED*/
+ if (vhid != 0) {
+ error = (*carp_attach_p)(&ia->ia_ifa, vhid);
+ if (error)
+ return (error);
}
- switch (cmd) {
- case SIOCGLIFADDR:
- /* address must be specified on GET with IFLR_PREFIX */
- if ((iflr->flags & IFLR_PREFIX) == 0)
- break;
- /*FALLTHROUGH*/
- case SIOCALIFADDR:
- case SIOCDLIFADDR:
- /* address must be specified on ADD and DELETE */
- if (iflr->addr.ss_family != AF_INET)
- return (EINVAL);
- if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
- return (EINVAL);
- /* XXX need improvement */
- if (iflr->dstaddr.ss_family
- && iflr->dstaddr.ss_family != AF_INET)
- return (EINVAL);
- if (iflr->dstaddr.ss_family
- && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
- return (EINVAL);
- break;
- default: /*shouldn't happen*/
- return (EOPNOTSUPP);
- }
- if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
- return (EINVAL);
+ /* if_addrhead is already referenced by ifa_alloc() */
+ IF_ADDR_WLOCK(ifp);
+ TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
- switch (cmd) {
- case SIOCALIFADDR:
- {
- struct in_aliasreq ifra;
+ ifa_ref(ifa); /* in_ifaddrhead */
+ IN_IFADDR_WLOCK();
+ TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
+ LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
+ IN_IFADDR_WUNLOCK();
- if (iflr->flags & IFLR_PREFIX)
- return (EINVAL);
+ /*
+ * Give the interface a chance to initialize
+ * if this is its first address,
+ * and to validate the address if necessary.
+ */
+ if (ifp->if_ioctl != NULL) {
+ error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
+ if (error)
+ goto fail1;
+ }
- /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */
- bzero(&ifra, sizeof(ifra));
- bcopy(iflr->iflr_name, ifra.ifra_name,
- sizeof(ifra.ifra_name));
+ /*
+ * Add route for the network.
+ */
+ if (vhid == 0) {
+ int flags = RTF_UP;
- bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
+ if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+ flags |= RTF_HOST;
- if (iflr->dstaddr.ss_family) { /*XXX*/
- bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
- iflr->dstaddr.ss_len);
- }
+ error = in_addprefix(ia, flags);
+ if (error)
+ goto fail1;
+ }
- ifra.ifra_mask.sin_family = AF_INET;
- ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
- in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
-
- return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
- }
- case SIOCGLIFADDR:
- case SIOCDLIFADDR:
- {
- struct in_ifaddr *ia;
- struct in_addr mask, candidate, match;
- struct sockaddr_in *sin;
-
- bzero(&mask, sizeof(mask));
- bzero(&match, sizeof(match));
- if (iflr->flags & IFLR_PREFIX) {
- /* lookup a prefix rather than address. */
- in_len2mask(&mask, iflr->prefixlen);
-
- sin = (struct sockaddr_in *)&iflr->addr;
- match.s_addr = sin->sin_addr.s_addr;
- match.s_addr &= mask.s_addr;
-
- /* if you set extra bits, that's wrong */
- if (match.s_addr != sin->sin_addr.s_addr)
- return (EINVAL);
+ /*
+ * Add a loopback route to self.
+ */
+ if (vhid == 0 && (ifp->if_flags & IFF_LOOPBACK) == 0 &&
+ ia->ia_addr.sin_addr.s_addr != INADDR_ANY &&
+ !((ifp->if_flags & IFF_POINTOPOINT) &&
+ ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)) {
+ struct in_ifaddr *eia;
- } else {
- /* on getting an address, take the 1st match */
- /* on deleting an address, do exact match */
- if (cmd != SIOCGLIFADDR) {
- in_len2mask(&mask, 32);
- sin = (struct sockaddr_in *)&iflr->addr;
- match.s_addr = sin->sin_addr.s_addr;
- }
- }
+ eia = in_localip_more(ia);
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET)
- continue;
- if (match.s_addr == 0)
- break;
- candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
- candidate.s_addr &= mask.s_addr;
- if (candidate.s_addr == match.s_addr)
- break;
- }
- if (ifa != NULL)
- ifa_ref(ifa);
- IF_ADDR_RUNLOCK(ifp);
- if (ifa == NULL)
- return (EADDRNOTAVAIL);
- ia = (struct in_ifaddr *)ifa;
+ if (eia == NULL) {
+ error = ifa_add_loopback_route((struct ifaddr *)ia,
+ (struct sockaddr *)&ia->ia_addr);
+ if (error)
+ goto fail2;
+ } else
+ ifa_free(&eia->ia_ifa);
+ }
- if (cmd == SIOCGLIFADDR) {
- /* fill in the if_laddrreq structure */
- bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
+ if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) {
+ struct in_addr allhosts_addr;
+ struct in_ifinfo *ii;
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
- bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
- ia->ia_dstaddr.sin_len);
- } else
- bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
+ ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
+ allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
- iflr->prefixlen =
- in_mask2len(&ia->ia_sockmask.sin_addr);
+ error = in_joingroup(ifp, &allhosts_addr, NULL,
+ &ii->ii_allhosts);
+ }
- iflr->flags = 0; /*XXX*/
- ifa_free(ifa);
+ EVENTHANDLER_INVOKE(ifaddr_event, ifp);
- return (0);
- } else {
- struct in_aliasreq ifra;
-
- /* fill in_aliasreq and do ioctl(SIOCDIFADDR) */
- bzero(&ifra, sizeof(ifra));
- bcopy(iflr->iflr_name, ifra.ifra_name,
- sizeof(ifra.ifra_name));
-
- bcopy(&ia->ia_addr, &ifra.ifra_addr,
- ia->ia_addr.sin_len);
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
- bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
- ia->ia_dstaddr.sin_len);
- }
- bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
- ia->ia_sockmask.sin_len);
- ifa_free(ifa);
+ return (error);
- return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
- ifp, td));
- }
- }
- }
+fail2:
+ if (vhid == 0)
+ (void )in_scrubprefix(ia, LLE_STATIC);
- return (EOPNOTSUPP); /*just for safety*/
-}
+fail1:
+ if (ia->ia_ifa.ifa_carp)
+ (*carp_detach_p)(&ia->ia_ifa);
-/*
- * Delete any existing route for an interface.
- */
-void
-in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
-{
+ IF_ADDR_WLOCK(ifp);
+ TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
+ ifa_free(&ia->ia_ifa); /* if_addrhead */
+
+ IN_IFADDR_WLOCK();
+ TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
+ LIST_REMOVE(ia, ia_hash);
+ IN_IFADDR_WUNLOCK();
+ ifa_free(&ia->ia_ifa); /* in_ifaddrhead */
- in_scrubprefix(ia, flags);
+ return (error);
}
-/*
- * Initialize an interface's internet address
- * and routing table entry.
- */
static int
-in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
- int scrub)
+in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td)
{
- register u_long i = ntohl(sin->sin_addr.s_addr);
- struct sockaddr_in oldaddr;
- int s = splimp(), flags = RTF_UP, error = 0;
-
- oldaddr = ia->ia_addr;
- if (oldaddr.sin_family == AF_INET)
- LIST_REMOVE(ia, ia_hash);
- ia->ia_addr = *sin;
- if (ia->ia_addr.sin_family == AF_INET) {
- IN_IFADDR_WLOCK();
- LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
- ia, ia_hash);
- IN_IFADDR_WUNLOCK();
- }
- /*
- * Give the interface a chance to initialize
- * if this is its first address,
- * and to validate the address if necessary.
- */
- if (ifp->if_ioctl != NULL) {
- error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
- if (error) {
- splx(s);
- /* LIST_REMOVE(ia, ia_hash) is done in in_control */
- ia->ia_addr = oldaddr;
- IN_IFADDR_WLOCK();
- if (ia->ia_addr.sin_family == AF_INET)
- LIST_INSERT_HEAD(INADDR_HASH(
- ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
- else
- /*
- * If oldaddr family is not AF_INET (e.g.
- * interface has been just created) in_control
- * does not call LIST_REMOVE, and we end up
- * with bogus ia entries in hash
- */
- LIST_REMOVE(ia, ia_hash);
- IN_IFADDR_WUNLOCK();
+ const struct ifreq *ifr = (struct ifreq *)data;
+ const struct sockaddr_in *addr = (const struct sockaddr_in *)
+ &ifr->ifr_addr;
+ struct ifaddr *ifa;
+ struct in_ifaddr *ia;
+ bool deleteAny, iaIsLast;
+ int error;
+
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NET_DELIFADDR);
+ if (error)
return (error);
- }
}
- splx(s);
- if (scrub) {
- ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
- in_ifscrub(ifp, ia, LLE_STATIC);
- ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+
+ if (addr->sin_len != sizeof(struct sockaddr_in) ||
+ addr->sin_family != AF_INET)
+ deleteAny = true;
+ else
+ deleteAny = false;
+
+ iaIsLast = true;
+ ia = NULL;
+ IF_ADDR_WLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ struct in_ifaddr *it;
+
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+
+ it = (struct in_ifaddr *)ifa;
+ if (deleteAny && ia == NULL && (td == NULL ||
+ prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0))
+ ia = it;
+
+ if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
+ (td == NULL || prison_check_ip4(td->td_ucred,
+ &addr->sin_addr) == 0))
+ ia = it;
+
+ if (it != ia)
+ iaIsLast = false;
}
- /*
- * Be compatible with network classes, if netmask isn't supplied,
- * guess it based on classes.
- */
- if (ia->ia_subnetmask == 0) {
- if (IN_CLASSA(i))
- ia->ia_subnetmask = IN_CLASSA_NET;
- else if (IN_CLASSB(i))
- ia->ia_subnetmask = IN_CLASSB_NET;
- else
- ia->ia_subnetmask = IN_CLASSC_NET;
- ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
+
+ if (ia == NULL) {
+ IF_ADDR_WUNLOCK(ifp);
+ return (EADDRNOTAVAIL);
}
- ia->ia_subnet = i & ia->ia_subnetmask;
- in_socktrim(&ia->ia_sockmask);
+
+ TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
+ ifa_free(&ia->ia_ifa); /* if_addrhead */
+
+ IN_IFADDR_WLOCK();
+ TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
+ LIST_REMOVE(ia, ia_hash);
+ IN_IFADDR_WUNLOCK();
+
/*
- * XXX: carp(4) does not have interface route
+ * in_scrubprefix() kills the interface route.
*/
- if (ifp->if_type == IFT_CARP)
- return (0);
+ in_scrubprefix(ia, LLE_STATIC);
+
/*
- * Add route for the network.
+ * in_ifadown gets rid of all the rest of
+ * the routes. This is not quite the right
+ * thing to do, but at least if we are running
+ * a routing process they will come back.
*/
- ia->ia_ifa.ifa_metric = ifp->if_metric;
- if (ifp->if_flags & IFF_BROADCAST) {
- if (ia->ia_subnetmask == IN_RFC3021_MASK)
- ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
- else
- ia->ia_broadaddr.sin_addr.s_addr =
- htonl(ia->ia_subnet | ~ia->ia_subnetmask);
- } else if (ifp->if_flags & IFF_LOOPBACK) {
- ia->ia_dstaddr = ia->ia_addr;
- flags |= RTF_HOST;
- } else if (ifp->if_flags & IFF_POINTOPOINT) {
- if (ia->ia_dstaddr.sin_family != AF_INET)
- return (0);
- flags |= RTF_HOST;
- }
- if ((error = in_addprefix(ia, flags)) != 0)
- return (error);
-
- if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
- return (0);
-
- if (ifp->if_flags & IFF_POINTOPOINT) {
- if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
- return (0);
- }
+ in_ifadown(&ia->ia_ifa, 1);
+ if (ia->ia_ifa.ifa_carp)
+ (*carp_detach_p)(&ia->ia_ifa);
/*
- * add a loopback route to self
+ * If this is the last IPv4 address configured on this
+ * interface, leave the all-hosts group.
+ * No state-change report need be transmitted.
*/
- if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
- struct route ia_ro;
-
- bzero(&ia_ro, sizeof(ia_ro));
- *((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
- rtalloc_ign_fib(&ia_ro, 0, RT_DEFAULT_FIB);
- if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
- (ia_ro.ro_rt->rt_ifp == V_loif)) {
- RT_LOCK(ia_ro.ro_rt);
- RT_ADDREF(ia_ro.ro_rt);
- RTFREE_LOCKED(ia_ro.ro_rt);
- } else
- error = ifa_add_loopback_route((struct ifaddr *)ia,
- (struct sockaddr *)&ia->ia_addr);
- if (error == 0)
- ia->ia_flags |= IFA_RTSELF;
- if (ia_ro.ro_rt != NULL)
- RTFREE(ia_ro.ro_rt);
+ if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) {
+ struct in_ifinfo *ii;
+
+ ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
+ IN_MULTI_LOCK();
+ if (ii->ii_allhosts) {
+ (void)in_leavegroup_locked(ii->ii_allhosts, NULL);
+ ii->ii_allhosts = NULL;
+ }
+ IN_MULTI_UNLOCK();
}
- return (error);
+ EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+ ifa_free(&ia->ia_ifa); /* in_ifaddrhead */
+
+ return (0);
}
#define rtinitflags(x) \
@@ -965,9 +652,10 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
/*
* Check if we have a route for the given prefix already or add one accordingly.
*/
-static int
+int
in_addprefix(struct in_ifaddr *target, int flags)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia;
struct in_addr prefix, mask, p, m;
int error;
@@ -981,7 +669,7 @@ in_addprefix(struct in_ifaddr *target, int flags)
prefix.s_addr &= mask.s_addr;
}
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
/* Look for an existing address with the same prefix, mask, and fib */
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (rtinitflags(ia)) {
@@ -1009,28 +697,26 @@ in_addprefix(struct in_ifaddr *target, int flags)
#ifdef RADIX_MPATH
if (ia->ia_addr.sin_addr.s_addr ==
target->ia_addr.sin_addr.s_addr) {
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (EEXIST);
} else
break;
#endif
- if (V_sameprefixcarponly &&
- target->ia_ifp->if_type != IFT_CARP &&
- ia->ia_ifp->if_type != IFT_CARP) {
- IN_IFADDR_RUNLOCK();
+ if (V_nosameprefix) {
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (EEXIST);
} else {
int fibnum;
- fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS :
+ fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
target->ia_ifp->if_fib;
rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum);
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (0);
}
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
/*
* No-one seem to have this prefix route, so we try to insert it.
@@ -1041,68 +727,87 @@ in_addprefix(struct in_ifaddr *target, int flags)
return (error);
}
-extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
+/*
+ * Removes either all lle entries for given @ia, or lle
+ * corresponding to @ia address.
+ */
+static void
+in_scrubprefixlle(struct in_ifaddr *ia, int all, u_int flags)
+{
+ struct sockaddr_in addr, mask;
+ struct sockaddr *saddr, *smask;
+ struct ifnet *ifp;
+
+ saddr = (struct sockaddr *)&addr;
+ bzero(&addr, sizeof(addr));
+ addr.sin_len = sizeof(addr);
+ addr.sin_family = AF_INET;
+ smask = (struct sockaddr *)&mask;
+ bzero(&mask, sizeof(mask));
+ mask.sin_len = sizeof(mask);
+ mask.sin_family = AF_INET;
+ mask.sin_addr.s_addr = ia->ia_subnetmask;
+ ifp = ia->ia_ifp;
+
+ if (all) {
+
+ /*
+ * Remove all L2 entries matching given prefix.
+ * Convert address to host representation to avoid
+ * doing this on every callback. ia_subnetmask is already
+ * stored in host representation.
+ */
+ addr.sin_addr.s_addr = ntohl(ia->ia_addr.sin_addr.s_addr);
+ lltable_prefix_free(AF_INET, saddr, smask, flags);
+ } else {
+ /* Remove interface address only */
+ addr.sin_addr.s_addr = ia->ia_addr.sin_addr.s_addr;
+ lltable_delete_addr(LLTABLE(ifp), LLE_IFADDR, saddr);
+ }
+}
/*
* If there is no other address in the system that can serve a route to the
* same prefix, remove the route. Hand over the route to the new address
* otherwise.
*/
-static int
+int
in_scrubprefix(struct in_ifaddr *target, u_int flags)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia;
- struct in_addr prefix, mask, p;
+ struct in_addr prefix, mask, p, m;
int error = 0;
- struct sockaddr_in prefix0, mask0;
/*
* Remove the loopback route to the interface address.
- * The "useloopback" setting is not consulted because if the
- * user configures an interface address, turns off this
- * setting, and then tries to delete that interface address,
- * checking the current setting of "useloopback" would leave
- * that interface address loopback route untouched, which
- * would be wrong. Therefore the interface address loopback route
- * deletion is unconditional.
*/
if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
!(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
- (target->ia_flags & IFA_RTSELF)) {
- struct route ia_ro;
- int freeit = 0;
- int fibnum;
+ (flags & LLE_STATIC)) {
+ struct in_ifaddr *eia;
- bzero(&ia_ro, sizeof(ia_ro));
- *((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
- fibnum = target->ia_ifp->if_fib;
- rtalloc_ign_fib(&ia_ro, 0, fibnum);
- if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
- (ia_ro.ro_rt->rt_ifp == V_loif)) {
- RT_LOCK(ia_ro.ro_rt);
- if (ia_ro.ro_rt->rt_refcnt <= 1)
- freeit = 1;
- else if (flags & LLE_STATIC) {
- RT_REMREF(ia_ro.ro_rt);
- target->ia_flags &= ~IFA_RTSELF;
- }
- RTFREE_LOCKED(ia_ro.ro_rt);
- }
- if (freeit && (flags & LLE_STATIC)) {
+ /*
+ * XXXME: add fib-aware in_localip.
+ * We definitely don't want to switch between
+ * prefixes in different fibs.
+ */
+ eia = in_localip_more(target);
+
+ if (eia != NULL) {
+ error = ifa_switch_loopback_route((struct ifaddr *)eia,
+ (struct sockaddr *)&target->ia_addr);
+ ifa_free(&eia->ia_ifa);
+ } else {
error = ifa_del_loopback_route((struct ifaddr *)target,
(struct sockaddr *)&target->ia_addr);
- if (error == 0)
- target->ia_flags &= ~IFA_RTSELF;
}
- if ((flags & LLE_STATIC) &&
- !(target->ia_ifp->if_flags & IFF_NOARP))
- /* remove arp cache */
- arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
}
- if (rtinitflags(target))
+ if (rtinitflags(target)) {
prefix = target->ia_dstaddr.sin_addr;
- else {
+ mask.s_addr = 0;
+ } else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
@@ -1111,38 +816,48 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
if ((target->ia_flags & IFA_ROUTE) == 0) {
int fibnum;
- fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS :
+ fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
target->ia_ifp->if_fib;
rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum);
+
+ /*
+ * Removing address from !IFF_UP interface or
+ * prefix which exists on other interface (along with route).
+ * No entries should exist here except target addr.
+ * Given that, delete this entry only.
+ */
+ in_scrubprefixlle(target, 0, flags);
return (0);
}
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- if (rtinitflags(ia))
+ if (rtinitflags(ia)) {
p = ia->ia_dstaddr.sin_addr;
- else {
+
+ if (prefix.s_addr != p.s_addr)
+ continue;
+ } else {
p = ia->ia_addr.sin_addr;
- p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
+ m = ia->ia_sockmask.sin_addr;
+ p.s_addr &= m.s_addr;
+
+ if (prefix.s_addr != p.s_addr ||
+ mask.s_addr != m.s_addr)
+ continue;
}
- if ((prefix.s_addr != p.s_addr) ||
- !(ia->ia_ifp->if_flags & IFF_UP))
+ if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
continue;
/*
* If we got a matching prefix address, move IFA_ROUTE and
* the route itself to it. Make sure that routing daemons
* get a heads-up.
- *
- * XXX: a special case for carp(4) interface - this should
- * be more generally specified as an interface that
- * doesn't support such action.
*/
- if ((ia->ia_flags & IFA_ROUTE) == 0
- && (ia->ia_ifp->if_type != IFT_CARP)) {
+ if ((ia->ia_flags & IFA_ROUTE) == 0) {
ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
rtinitflags(target));
if (error == 0)
@@ -1150,6 +865,9 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
else
log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
error);
+ /* Scrub all entries IFF interface is different */
+ in_scrubprefixlle(target, target->ia_ifp != ia->ia_ifp,
+ flags);
error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
rtinitflags(ia) | RTF_UP);
if (error == 0)
@@ -1161,21 +879,12 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
return (error);
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
/*
* remove all L2 entries on the given prefix
*/
- bzero(&prefix0, sizeof(prefix0));
- prefix0.sin_len = sizeof(prefix0);
- prefix0.sin_family = AF_INET;
- prefix0.sin_addr.s_addr = target->ia_subnet;
- bzero(&mask0, sizeof(mask0));
- mask0.sin_len = sizeof(mask0);
- mask0.sin_family = AF_INET;
- mask0.sin_addr.s_addr = target->ia_subnetmask;
- lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
- (struct sockaddr *)&mask0, flags);
+ in_scrubprefixlle(target, 1, flags);
/*
* As no-one seem to have this prefix, we can remove the route.
@@ -1190,6 +899,58 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
#undef rtinitflags
+void
+in_ifscrub_all(void)
+{
+ struct ifnet *ifp;
+ struct ifaddr *ifa, *nifa;
+ struct ifaliasreq ifr;
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ /* Cannot lock here - lock recursion. */
+ /* IF_ADDR_RLOCK(ifp); */
+ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+
+ /*
+ * This is ugly but the only way for legacy IP to
+ * cleanly remove addresses and everything attached.
+ */
+ bzero(&ifr, sizeof(ifr));
+ ifr.ifra_addr = *ifa->ifa_addr;
+ if (ifa->ifa_dstaddr)
+ ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
+ (void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr,
+ ifp, NULL);
+ }
+ /* IF_ADDR_RUNLOCK(ifp); */
+ in_purgemaddrs(ifp);
+ igmp_domifdetach(ifp);
+ }
+ IFNET_RUNLOCK();
+}
+
+int
+in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
+{
+
+ return ((in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
+ /*
+ * Check for old-style (host 0) broadcast, but
+ * taking into account that RFC 3021 obsoletes it.
+ */
+ (ia->ia_subnetmask != IN_RFC3021_MASK &&
+ ntohl(in.s_addr) == ia->ia_subnet)) &&
+ /*
+ * Check for an all one subnetmask. These
+ * only exist when an interface gets a secondary
+ * address.
+ */
+ ia->ia_subnetmask != (u_long)0xffffffff);
+}
+
/*
* Return 1 if the address might be a local broadcast address.
*/
@@ -1197,37 +958,27 @@ int
in_broadcast(struct in_addr in, struct ifnet *ifp)
{
register struct ifaddr *ifa;
- u_long t;
+ int found;
if (in.s_addr == INADDR_BROADCAST ||
in.s_addr == INADDR_ANY)
return (1);
if ((ifp->if_flags & IFF_BROADCAST) == 0)
return (0);
- t = ntohl(in.s_addr);
+ found = 0;
/*
* Look through the list of addresses for a match
* with a broadcast address.
*/
-#define ia ((struct in_ifaddr *)ifa)
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET &&
- (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
- /*
- * Check for old-style (host 0) broadcast, but
- * taking into account that RFC 3021 obsoletes it.
- */
- (ia->ia_subnetmask != IN_RFC3021_MASK &&
- t == ia->ia_subnet)) &&
- /*
- * Check for an all one subnetmask. These
- * only exist when an interface gets a secondary
- * address.
- */
- ia->ia_subnetmask != (u_long)0xffffffff)
- return (1);
- return (0);
-#undef ia
+ in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa)) {
+ found = 1;
+ break;
+ }
+ IF_ADDR_RUNLOCK(ifp);
+ return (found);
}
/*
@@ -1239,6 +990,7 @@ in_ifdetach(struct ifnet *ifp)
in_pcbpurgeif0(&V_ripcbinfo, ifp);
in_pcbpurgeif0(&V_udbinfo, ifp);
+ in_pcbpurgeif0(&V_ulitecbinfo, ifp);
in_purgemaddrs(ifp);
}
@@ -1288,34 +1040,44 @@ in_purgemaddrs(struct ifnet *ifp)
IN_MULTI_UNLOCK();
}
-#include <net/if_dl.h>
-#include <netinet/if_ether.h>
-
struct in_llentry {
struct llentry base;
- struct sockaddr_in l3_addr4;
};
+#define IN_LLTBL_DEFAULT_HSIZE 32
+#define IN_LLTBL_HASH(k, h) \
+ (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
+
/*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
+ * Do actual deallocation of @lle.
*/
static void
-in_lltable_free(struct lltable *llt, struct llentry *lle)
+in_lltable_destroy_lle_unlocked(struct llentry *lle)
{
- LLE_WUNLOCK(lle);
+
LLE_LOCK_DESTROY(lle);
+ LLE_REQ_DESTROY(lle);
free(lle, M_LLTABLE);
}
+/*
+ * Called by LLE_FREE_LOCKED when number of references
+ * drops to zero.
+ */
+static void
+in_lltable_destroy_lle(struct llentry *lle)
+{
+
+ LLE_WUNLOCK(lle);
+ in_lltable_destroy_lle_unlocked(lle);
+}
+
static struct llentry *
-in_lltable_new(const struct sockaddr *l3addr, u_int flags)
+in_lltable_new(struct in_addr addr4, u_int flags)
{
struct in_llentry *lle;
- lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
+ lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
if (lle == NULL) /* NB: caller generates msg */
return NULL;
@@ -1324,82 +1086,123 @@ in_lltable_new(const struct sockaddr *l3addr, u_int flags)
* an ARP request.
*/
lle->base.la_expire = time_uptime; /* mark expired */
- lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
+ lle->base.r_l3addr.addr4 = addr4;
lle->base.lle_refcnt = 1;
- lle->base.lle_free = in_lltable_free;
+ lle->base.lle_free = in_lltable_destroy_lle;
LLE_LOCK_INIT(&lle->base);
- callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock,
- CALLOUT_RETURNUNLOCKED);
+ LLE_REQ_INIT(&lle->base);
+ callout_init(&lle->base.lle_timer, 1);
return (&lle->base);
}
-#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \
- (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
+#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \
+ ((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 )
+
+static int
+in_lltable_match_prefix(const struct sockaddr *saddr,
+ const struct sockaddr *smask, u_int flags, struct llentry *lle)
+{
+ struct in_addr addr, mask, lle_addr;
+
+ addr = ((const struct sockaddr_in *)saddr)->sin_addr;
+ mask = ((const struct sockaddr_in *)smask)->sin_addr;
+ lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr);
+
+ if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
+ return (0);
+
+ if (lle->la_flags & LLE_IFADDR) {
+
+ /*
+ * Delete LLE_IFADDR records IFF address & flag matches.
+ * Note that addr is the interface address within prefix
+ * being matched.
+ * Note also we should handle 'ifdown' cases without removing
+ * ifaddr macs.
+ */
+ if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0)
+ return (1);
+ return (0);
+ }
+
+ /* flags & LLE_STATIC means deleting both dynamic and static entries */
+ if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
+ return (1);
+
+ return (0);
+}
static void
-in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
- const struct sockaddr *mask, u_int flags)
+in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
{
- const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
- const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
- struct llentry *lle, *next;
- int i;
+ struct ifnet *ifp;
size_t pkts_dropped;
- IF_AFDATA_WLOCK(llt->llt_ifp);
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
- /*
- * (flags & LLE_STATIC) means deleting all entries
- * including static ARP entries.
- */
- if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)),
- pfx, msk) && ((flags & LLE_STATIC) ||
- !(lle->la_flags & LLE_STATIC))) {
- LLE_WLOCK(lle);
- if (callout_stop(&lle->la_timer))
- LLE_REMREF(lle);
- pkts_dropped = llentry_free(lle);
- ARPSTAT_ADD(dropped, pkts_dropped);
- }
- }
+ LLE_WLOCK_ASSERT(lle);
+ KASSERT(llt != NULL, ("lltable is NULL"));
+
+ /* Unlink entry from table if not already */
+ if ((lle->la_flags & LLE_LINKED) != 0) {
+ ifp = llt->llt_ifp;
+ IF_AFDATA_WLOCK_ASSERT(ifp);
+ lltable_unlink_entry(llt, lle);
}
- IF_AFDATA_WUNLOCK(llt->llt_ifp);
-}
+ /* cancel timer */
+ if (callout_stop(&lle->lle_timer) > 0)
+ LLE_REMREF(lle);
+
+ /* Drop hold queue */
+ pkts_dropped = llentry_free(lle);
+ ARPSTAT_ADD(dropped, pkts_dropped);
+}
static int
in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
{
- struct rtentry *rt;
+ struct rt_addrinfo info;
+ struct sockaddr_in rt_key, rt_mask;
+ struct sockaddr rt_gateway;
+ int rt_flags;
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
- /* XXX rtalloc1_fib should take a const param */
- rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0,
- ifp->if_fib);
+ bzero(&rt_key, sizeof(rt_key));
+ rt_key.sin_len = sizeof(rt_key);
+ bzero(&rt_mask, sizeof(rt_mask));
+ rt_mask.sin_len = sizeof(rt_mask);
+ bzero(&rt_gateway, sizeof(rt_gateway));
+ rt_gateway.sa_len = sizeof(rt_gateway);
+
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
+ info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&rt_mask;
+ info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
- if (rt == NULL)
+ if (rib_lookup_info(ifp->if_fib, l3addr, NHR_REF, 0, &info) != 0)
return (EINVAL);
+ rt_flags = info.rti_flags;
+
/*
* If the gateway for an existing host route matches the target L3
* address, which is a special route inserted by some implementation
* such as MANET, and the interface is of the correct type, then
* allow for ARP to proceed.
*/
- if (rt->rt_flags & RTF_GATEWAY) {
- if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
- rt->rt_ifp->if_type != IFT_ETHER ||
- (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
- memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
+ if (rt_flags & RTF_GATEWAY) {
+ if (!(rt_flags & RTF_HOST) || !info.rti_ifp ||
+ info.rti_ifp->if_type != IFT_ETHER ||
+ (info.rti_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
+ memcmp(rt_gateway.sa_data, l3addr->sa_data,
sizeof(in_addr_t)) != 0) {
- RTFREE_LOCKED(rt);
+ rib_free_info(&info);
return (EINVAL);
}
}
+ rib_free_info(&info);
/*
* Make sure that at least the destination address is covered
@@ -1408,21 +1211,19 @@ in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr
* on one interface and the corresponding outgoing packet leaves
* another interface.
*/
- if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
+ if (!(rt_flags & RTF_HOST) && info.rti_ifp != ifp) {
const char *sa, *mask, *addr, *lim;
int len;
- mask = (const char *)rt_mask(rt);
+ mask = (const char *)&rt_mask;
/*
* Just being extra cautious to avoid some custom
* code getting into trouble.
*/
- if (mask == NULL) {
- RTFREE_LOCKED(rt);
+ if ((info.rti_addrs & RTA_NETMASK) == 0)
return (EINVAL);
- }
- sa = (const char *)rt_key(rt);
+ sa = (const char *)&rt_key;
addr = (const char *)l3addr;
len = ((const struct sockaddr_in *)l3addr)->sin_len;
lim = addr + len;
@@ -1433,151 +1234,188 @@ in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr
log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
#endif
- RTFREE_LOCKED(rt);
return (EINVAL);
}
}
}
- RTFREE_LOCKED(rt);
return (0);
}
-/*
- * Return NULL if not found or marked for deletion.
- * If found return lle read locked.
- */
-static struct llentry *
-in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
+static inline uint32_t
+in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize)
+{
+
+ return (IN_LLTBL_HASH(dst.s_addr, hsize));
+}
+
+static uint32_t
+in_lltable_hash(const struct llentry *lle, uint32_t hsize)
+{
+
+ return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize));
+}
+
+static void
+in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
+{
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)sa;
+ bzero(sin, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = lle->r_l3addr.addr4;
+}
+
+static inline struct llentry *
+in_lltable_find_dst(struct lltable *llt, struct in_addr dst)
{
- const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
- struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
struct llentries *lleh;
- u_int hashkey;
-
- IF_AFDATA_LOCK_ASSERT(ifp);
- KASSERT(l3addr->sa_family == AF_INET,
- ("sin_family %d", l3addr->sa_family));
+ u_int hashidx;
- hashkey = sin->sin_addr.s_addr;
- lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
+ hashidx = in_lltable_hash_dst(dst, llt->llt_hsize);
+ lleh = &llt->lle_head[hashidx];
LIST_FOREACH(lle, lleh, lle_next) {
- struct sockaddr_in *sa2 = satosin(L3_ADDR(lle));
if (lle->la_flags & LLE_DELETED)
continue;
- if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
+ if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
break;
}
- if (lle == NULL) {
-#ifdef DIAGNOSTIC
- if (flags & LLE_DELETE)
- log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle);
-#endif
- if (!(flags & LLE_CREATE))
- return (NULL);
- IF_AFDATA_WLOCK_ASSERT(ifp);
- /*
- * A route that covers the given address must have
- * been installed 1st because we are doing a resolution,
- * verify this.
- */
- if (!(flags & LLE_IFADDR) &&
- in_lltable_rtcheck(ifp, flags, l3addr) != 0)
- goto done;
-
- lle = in_lltable_new(l3addr, flags);
- if (lle == NULL) {
- log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
- goto done;
- }
- lle->la_flags = flags & ~LLE_CREATE;
- if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
- bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
- lle->la_flags |= (LLE_VALID | LLE_STATIC);
- }
- lle->lle_tbl = llt;
- lle->lle_head = lleh;
- lle->la_flags |= LLE_LINKED;
- LIST_INSERT_HEAD(lleh, lle, lle_next);
- } else if (flags & LLE_DELETE) {
- if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
- LLE_WLOCK(lle);
- lle->la_flags |= LLE_DELETED;
- EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
+ return (lle);
+}
+
+static void
+in_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ lle->la_flags |= LLE_DELETED;
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
#ifdef DIAGNOSTIC
- log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
#endif
- if ((lle->la_flags &
- (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
- llentry_free(lle);
- else
- LLE_WUNLOCK(lle);
- }
- lle = (void *)-1;
+ llentry_free(lle);
+}
+
+static struct llentry *
+in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
+{
+ const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
+ struct ifnet *ifp = llt->llt_ifp;
+ struct llentry *lle;
+ char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+
+ KASSERT(l3addr->sa_family == AF_INET,
+ ("sin_family %d", l3addr->sa_family));
+ /*
+ * A route that covers the given address must have
+ * been installed 1st because we are doing a resolution,
+ * verify this.
+ */
+ if (!(flags & LLE_IFADDR) &&
+ in_lltable_rtcheck(ifp, flags, l3addr) != 0)
+ return (NULL);
+
+ lle = in_lltable_new(sin->sin_addr, flags);
+ if (lle == NULL) {
+ log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
+ return (NULL);
}
- if (LLE_IS_VALID(lle)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WLOCK(lle);
- else
- LLE_RLOCK(lle);
+ lle->la_flags = flags;
+ if (flags & LLE_STATIC)
+ lle->r_flags |= RLLE_VALID;
+ if ((flags & LLE_IFADDR) == LLE_IFADDR) {
+ linkhdrsize = LLE_MAX_LINKHDR;
+ if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0) {
+ in_lltable_destroy_lle_unlocked(lle);
+ return (NULL);
+ }
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
+ lle->la_flags |= LLE_STATIC;
+ lle->r_flags |= (RLLE_VALID | RLLE_IFADDR);
}
-done:
+
+ return (lle);
+}
+
+/*
+ * Return NULL if not found or marked for deletion.
+ * If found return lle read locked.
+ */
+static struct llentry *
+in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
+{
+ const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
+ struct llentry *lle;
+
+ IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
+ KASSERT(l3addr->sa_family == AF_INET,
+ ("sin_family %d", l3addr->sa_family));
+ lle = in_lltable_find_dst(llt, sin->sin_addr);
+
+ if (lle == NULL)
+ return (NULL);
+
+ KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
+ (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
+ flags));
+
+ if (flags & LLE_UNLOCKED)
+ return (lle);
+
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WLOCK(lle);
+ else
+ LLE_RLOCK(lle);
+
return (lle);
}
static int
-in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
+in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
+ struct sysctl_req *wr)
{
-#define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle))
struct ifnet *ifp = llt->llt_ifp;
- struct llentry *lle;
/* XXX stack use */
struct {
struct rt_msghdr rtm;
- struct sockaddr_inarp sin;
+ struct sockaddr_in sin;
struct sockaddr_dl sdl;
} arpc;
- int error, i;
-
- LLTABLE_LOCK_ASSERT();
-
- error = 0;
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
- struct sockaddr_dl *sdl;
+ struct sockaddr_dl *sdl;
+ int error;
+ bzero(&arpc, sizeof(arpc));
/* skip deleted entries */
if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
- continue;
+ return (0);
/* Skip if jailed and not a valid IP of the prison. */
- if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
- continue;
+ lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin);
+ if (prison_if(wr->td->td_ucred,
+ (struct sockaddr *)&arpc.sin) != 0)
+ return (0);
/*
* produce a msg made of:
* struct rt_msghdr;
- * struct sockaddr_inarp; (IPv4)
+ * struct sockaddr_in; (IPv4)
* struct sockaddr_dl;
*/
- bzero(&arpc, sizeof(arpc));
arpc.rtm.rtm_msglen = sizeof(arpc);
arpc.rtm.rtm_version = RTM_VERSION;
arpc.rtm.rtm_type = RTM_GET;
arpc.rtm.rtm_flags = RTF_UP;
arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
- arpc.sin.sin_family = AF_INET;
- arpc.sin.sin_len = sizeof(arpc.sin);
- arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
/* publish */
- if (lle->la_flags & LLE_PUB) {
+ if (lle->la_flags & LLE_PUB)
arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
- /* proxy only */
- if (lle->la_flags & LLE_PROXY)
- arpc.sin.sin_other = SIN_PROXY;
- }
sdl = &arpc.sdl;
sdl->sdl_family = AF_LINK;
@@ -1586,7 +1424,7 @@ in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
sdl->sdl_type = ifp->if_type;
if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
sdl->sdl_alen = ifp->if_addrlen;
- bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
+ bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
} else {
sdl->sdl_alen = 0;
bzero(LLADDR(sdl), ifp->if_addrlen);
@@ -1597,35 +1435,47 @@ in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
if (lle->la_flags & LLE_STATIC)
arpc.rtm.rtm_flags |= RTF_STATIC;
+ if (lle->la_flags & LLE_IFADDR)
+ arpc.rtm.rtm_flags |= RTF_PINNED;
arpc.rtm.rtm_index = ifp->if_index;
error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
- if (error)
- break;
- }
- }
- return error;
-#undef SIN
+
+ return (error);
+}
+
+static struct lltable *
+in_lltattach(struct ifnet *ifp)
+{
+ struct lltable *llt;
+
+ llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE);
+ llt->llt_af = AF_INET;
+ llt->llt_ifp = ifp;
+
+ llt->llt_lookup = in_lltable_lookup;
+ llt->llt_alloc_entry = in_lltable_alloc;
+ llt->llt_delete_entry = in_lltable_delete_entry;
+ llt->llt_dump_entry = in_lltable_dump_entry;
+ llt->llt_hash = in_lltable_hash;
+ llt->llt_fill_sa_entry = in_lltable_fill_sa_entry;
+ llt->llt_free_entry = in_lltable_free_entry;
+ llt->llt_match_prefix = in_lltable_match_prefix;
+ lltable_link(llt);
+
+ return (llt);
}
void *
in_domifattach(struct ifnet *ifp)
{
struct in_ifinfo *ii;
- struct lltable *llt;
ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
- llt = lltable_init(ifp, AF_INET);
- if (llt != NULL) {
- llt->llt_prefix_free = in_lltable_prefix_free;
- llt->llt_lookup = in_lltable_lookup;
- llt->llt_dump = in_lltable_dump;
- }
- ii->ii_llt = llt;
-
+ ii->ii_llt = in_lltattach(ifp);
ii->ii_igmp = igmp_domifattach(ifp);
- return ii;
+ return (ii);
}
void
diff --git a/freebsd/sys/netinet/in.h b/freebsd/sys/netinet/in.h
index 06f9b793..b06e3334 100644
--- a/freebsd/sys/netinet/in.h
+++ b/freebsd/sys/netinet/in.h
@@ -47,8 +47,8 @@
#define IPPROTO_TCP 6 /* tcp */
#define IPPROTO_UDP 17 /* user datagram protocol */
-#define INADDR_ANY (u_int32_t)0x00000000
-#define INADDR_BROADCAST (u_int32_t)0xffffffff /* must be masked */
+#define INADDR_ANY ((in_addr_t)0x00000000)
+#define INADDR_BROADCAST ((in_addr_t)0xffffffff) /* must be masked */
#ifndef _UINT8_T_DECLARED
typedef __uint8_t uint8_t;
@@ -104,7 +104,7 @@ struct sockaddr_in {
char sin_zero[8];
};
-#if !defined(_KERNEL) && __BSD_VISIBLE
+#if !defined(_KERNEL) && __POSIX_VISIBLE >= 200112
#ifndef _BYTEORDER_PROTOTYPED
#define _BYTEORDER_PROTOTYPED
@@ -124,7 +124,7 @@ __END_DECLS
#define ntohs(x) __ntohs(x)
#endif
-#endif /* !_KERNEL && __BSD_VISIBLE */
+#endif /* !_KERNEL && __POSIX_VISIBLE >= 200112 */
#if __POSIX_VISIBLE >= 200112
#define IPPROTO_IPV6 41 /* IP6 header */
@@ -241,12 +241,17 @@ __END_DECLS
#define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */
#define IPPROTO_SCTP 132 /* SCTP */
#define IPPROTO_MH 135 /* IPv6 Mobility Header */
+#define IPPROTO_UDPLITE 136 /* UDP-Lite */
+#define IPPROTO_HIP 139 /* IP6 Host Identity Protocol */
+#define IPPROTO_SHIM6 140 /* IP6 Shim6 Protocol */
/* 101-254: Partly Unassigned */
#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
#define IPPROTO_CARP 112 /* CARP */
#define IPPROTO_PGM 113 /* PGM */
#define IPPROTO_MPLS 137 /* MPLS-in-IP */
#define IPPROTO_PFSYNC 240 /* PFSYNC */
+#define IPPROTO_RESERVED_253 253 /* Reserved */
+#define IPPROTO_RESERVED_254 254 /* Reserved */
/* 255: Reserved */
/* BSD Private, local use, namespace incursion, no longer used */
#define IPPROTO_OLD_DIVERT 254 /* OLD divert pseudo-proto */
@@ -343,61 +348,61 @@ __END_DECLS
* On subnets, the decomposition of addresses to host and net parts
* is done according to subnet mask, not the masks here.
*/
-#define IN_CLASSA(i) (((u_int32_t)(i) & 0x80000000) == 0)
+#define IN_CLASSA(i) (((in_addr_t)(i) & 0x80000000) == 0)
#define IN_CLASSA_NET 0xff000000
#define IN_CLASSA_NSHIFT 24
#define IN_CLASSA_HOST 0x00ffffff
#define IN_CLASSA_MAX 128
-#define IN_CLASSB(i) (((u_int32_t)(i) & 0xc0000000) == 0x80000000)
+#define IN_CLASSB(i) (((in_addr_t)(i) & 0xc0000000) == 0x80000000)
#define IN_CLASSB_NET 0xffff0000
#define IN_CLASSB_NSHIFT 16
#define IN_CLASSB_HOST 0x0000ffff
#define IN_CLASSB_MAX 65536
-#define IN_CLASSC(i) (((u_int32_t)(i) & 0xe0000000) == 0xc0000000)
+#define IN_CLASSC(i) (((in_addr_t)(i) & 0xe0000000) == 0xc0000000)
#define IN_CLASSC_NET 0xffffff00
#define IN_CLASSC_NSHIFT 8
#define IN_CLASSC_HOST 0x000000ff
-#define IN_CLASSD(i) (((u_int32_t)(i) & 0xf0000000) == 0xe0000000)
+#define IN_CLASSD(i) (((in_addr_t)(i) & 0xf0000000) == 0xe0000000)
#define IN_CLASSD_NET 0xf0000000 /* These ones aren't really */
#define IN_CLASSD_NSHIFT 28 /* net and host fields, but */
#define IN_CLASSD_HOST 0x0fffffff /* routing needn't know. */
#define IN_MULTICAST(i) IN_CLASSD(i)
-#define IN_EXPERIMENTAL(i) (((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
-#define IN_BADCLASS(i) (((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
+#define IN_EXPERIMENTAL(i) (((in_addr_t)(i) & 0xf0000000) == 0xf0000000)
+#define IN_BADCLASS(i) (((in_addr_t)(i) & 0xf0000000) == 0xf0000000)
-#define IN_LINKLOCAL(i) (((u_int32_t)(i) & 0xffff0000) == 0xa9fe0000)
-#define IN_LOOPBACK(i) (((u_int32_t)(i) & 0xff000000) == 0x7f000000)
-#define IN_ZERONET(i) (((u_int32_t)(i) & 0xff000000) == 0)
+#define IN_LINKLOCAL(i) (((in_addr_t)(i) & 0xffff0000) == 0xa9fe0000)
+#define IN_LOOPBACK(i) (((in_addr_t)(i) & 0xff000000) == 0x7f000000)
+#define IN_ZERONET(i) (((in_addr_t)(i) & 0xff000000) == 0)
-#define IN_PRIVATE(i) ((((u_int32_t)(i) & 0xff000000) == 0x0a000000) || \
- (((u_int32_t)(i) & 0xfff00000) == 0xac100000) || \
- (((u_int32_t)(i) & 0xffff0000) == 0xc0a80000))
+#define IN_PRIVATE(i) ((((in_addr_t)(i) & 0xff000000) == 0x0a000000) || \
+ (((in_addr_t)(i) & 0xfff00000) == 0xac100000) || \
+ (((in_addr_t)(i) & 0xffff0000) == 0xc0a80000))
-#define IN_LOCAL_GROUP(i) (((u_int32_t)(i) & 0xffffff00) == 0xe0000000)
+#define IN_LOCAL_GROUP(i) (((in_addr_t)(i) & 0xffffff00) == 0xe0000000)
#define IN_ANY_LOCAL(i) (IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))
-#define INADDR_LOOPBACK (u_int32_t)0x7f000001
+#define INADDR_LOOPBACK ((in_addr_t)0x7f000001)
#ifndef _KERNEL
-#define INADDR_NONE 0xffffffff /* -1 return */
+#define INADDR_NONE ((in_addr_t)0xffffffff) /* -1 return */
#endif
-#define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */
-#define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */
-#define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */
-#define INADDR_ALLRPTS_GROUP (u_int32_t)0xe0000016 /* 224.0.0.22, IGMPv3 */
-#define INADDR_CARP_GROUP (u_int32_t)0xe0000012 /* 224.0.0.18 */
-#define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */
-#define INADDR_ALLMDNS_GROUP (u_int32_t)0xe00000fb /* 224.0.0.251 */
-#define INADDR_MAX_LOCAL_GROUP (u_int32_t)0xe00000ff /* 224.0.0.255 */
+#define INADDR_UNSPEC_GROUP ((in_addr_t)0xe0000000) /* 224.0.0.0 */
+#define INADDR_ALLHOSTS_GROUP ((in_addr_t)0xe0000001) /* 224.0.0.1 */
+#define INADDR_ALLRTRS_GROUP ((in_addr_t)0xe0000002) /* 224.0.0.2 */
+#define INADDR_ALLRPTS_GROUP ((in_addr_t)0xe0000016) /* 224.0.0.22, IGMPv3 */
+#define INADDR_CARP_GROUP ((in_addr_t)0xe0000012) /* 224.0.0.18 */
+#define INADDR_PFSYNC_GROUP ((in_addr_t)0xe00000f0) /* 224.0.0.240 */
+#define INADDR_ALLMDNS_GROUP ((in_addr_t)0xe00000fb) /* 224.0.0.251 */
+#define INADDR_MAX_LOCAL_GROUP ((in_addr_t)0xe00000ff) /* 224.0.0.255 */
#define IN_LOOPBACKNET 127 /* official! */
-#define IN_RFC3021_MASK (u_int32_t)0xfffffffe
+#define IN_RFC3021_MASK ((in_addr_t)0xfffffffe)
/*
* Options for use with [gs]etsockopt at the IP level.
@@ -427,10 +432,11 @@ __END_DECLS
#define IP_RECVIF 20 /* bool; receive reception if w/dgram */
/* for IPSEC */
#define IP_IPSEC_POLICY 21 /* int; set/get security policy */
-#define IP_FAITH 22 /* bool; accept FAITH'ed connections */
-
+ /* unused; was IP_FAITH */
#define IP_ONESBCAST 23 /* bool: send all-ones broadcast */
#define IP_BINDANY 24 /* bool: allow bind to any address */
+#define IP_BINDMULTI 25 /* bool: allow multiple listeners on a tuple */
+#define IP_RSS_LISTEN_BUCKET 26 /* int; set RSS listen bucket */
/*
* Options for controlling the firewall and dummynet.
@@ -485,6 +491,13 @@ __END_DECLS
#define MCAST_BLOCK_SOURCE 84 /* block a source */
#define MCAST_UNBLOCK_SOURCE 85 /* unblock a source */
+/* Flow and RSS definitions */
+#define IP_FLOWID 90 /* get flow id for the given socket/inp */
+#define IP_FLOWTYPE 91 /* get flow type (M_HASHTYPE) */
+#define IP_RSSBUCKETID 92 /* get RSS flowid -> bucket mapping */
+#define IP_RECVFLOWID 93 /* bool; receive IP flowid/flowtype w/ datagram */
+#define IP_RECVRSSBUCKETID 94 /* bool; receive IP RSS bucket id w/ datagram */
+
/*
* Defaults and limits for options
*/
@@ -602,86 +615,7 @@ int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
#define IP_PORTRANGE_LOW 2 /* "low" - vouchsafe security */
/*
- * Definitions for inet sysctl operations.
- *
- * Third level is protocol number.
- * Fourth level is desired variable within that protocol.
- */
-#define IPPROTO_MAXID (IPPROTO_AH + 1) /* don't list to IPPROTO_MAX */
-
-#define CTL_IPPROTO_NAMES { \
- { "ip", CTLTYPE_NODE }, \
- { "icmp", CTLTYPE_NODE }, \
- { "igmp", CTLTYPE_NODE }, \
- { "ggp", CTLTYPE_NODE }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { "tcp", CTLTYPE_NODE }, \
- { 0, 0 }, \
- { "egp", CTLTYPE_NODE }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { "pup", CTLTYPE_NODE }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { "udp", CTLTYPE_NODE }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { "idp", CTLTYPE_NODE }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { "ipsec", CTLTYPE_NODE }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { 0, 0 }, \
- { "pim", CTLTYPE_NODE }, \
-}
-
-/*
- * Names for IP sysctl objects
+ * Identifiers for IP sysctl nodes
*/
#define IPCTL_FORWARDING 1 /* act as router */
#define IPCTL_SENDREDIRECTS 2 /* may send redirects when forwarding */
@@ -689,9 +623,9 @@ int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
#ifdef notyet
#define IPCTL_DEFMTU 4 /* default MTU */
#endif
-#define IPCTL_RTEXPIRE 5 /* cloned route expiration time */
-#define IPCTL_RTMINEXPIRE 6 /* min value for expiration time */
-#define IPCTL_RTMAXCACHE 7 /* trigger level for dynamic expire */
+/* IPCTL_RTEXPIRE 5 deprecated */
+/* IPCTL_RTMINEXPIRE 6 deprecated */
+/* IPCTL_RTMAXCACHE 7 deprecated */
#define IPCTL_SOURCEROUTE 8 /* may perform source routes */
#define IPCTL_DIRECTEDBROADCAST 9 /* may re-broadcast received packets */
#define IPCTL_INTRQMAXLEN 10 /* max length of netisr queue */
@@ -699,38 +633,22 @@ int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
#define IPCTL_STATS 12 /* ipstat structure */
#define IPCTL_ACCEPTSOURCEROUTE 13 /* may accept source routed packets */
#define IPCTL_FASTFORWARDING 14 /* use fast IP forwarding code */
-#define IPCTL_KEEPFAITH 15 /* FAITH IPv4->IPv6 translater ctl */
+ /* 15, unused, was: IPCTL_KEEPFAITH */
#define IPCTL_GIF_TTL 16 /* default TTL for gif encap packet */
-#define IPCTL_MAXID 17
-
-#define IPCTL_NAMES { \
- { 0, 0 }, \
- { "forwarding", CTLTYPE_INT }, \
- { "redirect", CTLTYPE_INT }, \
- { "ttl", CTLTYPE_INT }, \
- { "mtu", CTLTYPE_INT }, \
- { "rtexpire", CTLTYPE_INT }, \
- { "rtminexpire", CTLTYPE_INT }, \
- { "rtmaxcache", CTLTYPE_INT }, \
- { "sourceroute", CTLTYPE_INT }, \
- { "directed-broadcast", CTLTYPE_INT }, \
- { "intr-queue-maxlen", CTLTYPE_INT }, \
- { "intr-queue-drops", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "accept_sourceroute", CTLTYPE_INT }, \
- { "fastforwarding", CTLTYPE_INT }, \
-}
#endif /* __BSD_VISIBLE */
#ifdef _KERNEL
struct ifnet; struct mbuf; /* forward declarations for Standard C */
+struct in_ifaddr;
int in_broadcast(struct in_addr, struct ifnet *);
+int in_ifaddr_broadcast(struct in_addr, struct in_ifaddr *);
int in_canforward(struct in_addr);
int in_localaddr(struct in_addr);
int in_localip(struct in_addr);
+int in_ifhasaddr(struct ifnet *, struct in_addr);
int inet_aton(const char *, struct in_addr *); /* in libkern */
char *inet_ntoa(struct in_addr); /* in libkern */
char *inet_ntoa_r(struct in_addr ina, char *buf); /* in libkern */
@@ -745,33 +663,6 @@ void in_ifdetach(struct ifnet *);
#define satosin(sa) ((struct sockaddr_in *)(sa))
#define sintosa(sin) ((struct sockaddr *)(sin))
#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
-
-/*
- * Historically, BSD keeps ip_len and ip_off in host format
- * when doing layer 3 processing, and this often requires
- * to translate the format back and forth.
- * To make the process explicit, we define a couple of macros
- * that also take into account the fact that at some point
- * we may want to keep those fields always in net format.
- */
-
-#if (BYTE_ORDER == BIG_ENDIAN) || defined(HAVE_NET_IPLEN)
-#define SET_NET_IPLEN(p) do {} while (0)
-#define SET_HOST_IPLEN(p) do {} while (0)
-#else
-#define SET_NET_IPLEN(p) do { \
- struct ip *h_ip = (p); \
- h_ip->ip_len = htons(h_ip->ip_len); \
- h_ip->ip_off = htons(h_ip->ip_off); \
- } while (0)
-
-#define SET_HOST_IPLEN(p) do { \
- struct ip *h_ip = (p); \
- h_ip->ip_len = ntohs(h_ip->ip_len); \
- h_ip->ip_off = ntohs(h_ip->ip_off); \
- } while (0)
-#endif /* !HAVE_NET_IPLEN */
-
#endif /* _KERNEL */
/* INET6 stuff */
diff --git a/freebsd/sys/netinet/in_fib.c b/freebsd/sys/netinet/in_fib.c
new file mode 100644
index 00000000..f1edf976
--- /dev/null
+++ b/freebsd/sys/netinet/in_fib.c
@@ -0,0 +1,235 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_mpath.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/vnet.h>
+
+#ifdef RADIX_MPATH
+#include <net/radix_mpath.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_fib.h>
+
+#ifdef INET
+static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
+ uint32_t flags, struct nhop4_basic *pnh4);
+static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
+ uint32_t flags, struct nhop4_extended *pnh4);
+
+#define RNTORT(p) ((struct rtentry *)(p))
+
+static void
+fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
+ uint32_t flags, struct nhop4_basic *pnh4)
+{
+ struct sockaddr_in *gw;
+
+ if ((flags & NHR_IFAIF) != 0)
+ pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
+ else
+ pnh4->nh_ifp = rte->rt_ifp;
+ pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
+ if (rte->rt_flags & RTF_GATEWAY) {
+ gw = (struct sockaddr_in *)rte->rt_gateway;
+ pnh4->nh_addr = gw->sin_addr;
+ } else
+ pnh4->nh_addr = dst;
+ /* Set flags */
+ pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
+ gw = (struct sockaddr_in *)rt_key(rte);
+ if (gw->sin_addr.s_addr == 0)
+ pnh4->nh_flags |= NHF_DEFAULT;
+ /* TODO: Handle RTF_BROADCAST here */
+}
+
+static void
+fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
+ uint32_t flags, struct nhop4_extended *pnh4)
+{
+ struct sockaddr_in *gw;
+ struct in_ifaddr *ia;
+
+ if ((flags & NHR_IFAIF) != 0)
+ pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
+ else
+ pnh4->nh_ifp = rte->rt_ifp;
+ pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
+ if (rte->rt_flags & RTF_GATEWAY) {
+ gw = (struct sockaddr_in *)rte->rt_gateway;
+ pnh4->nh_addr = gw->sin_addr;
+ } else
+ pnh4->nh_addr = dst;
+ /* Set flags */
+ pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
+ gw = (struct sockaddr_in *)rt_key(rte);
+ if (gw->sin_addr.s_addr == 0)
+ pnh4->nh_flags |= NHF_DEFAULT;
+ /* XXX: Set RTF_BROADCAST if GW address is broadcast */
+
+ ia = ifatoia(rte->rt_ifa);
+ pnh4->nh_src = IA_SIN(ia)->sin_addr;
+}
+
+/*
+ * Performs IPv4 route table lookup on @dst. Returns 0 on success.
+ * Stores nexthop info provided @pnh4 structure.
+ * Note that
+ * - nh_ifp cannot be safely dereferenced
+ * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if
+ * looking up address on interface "ix0" pointer to "lo0" interface
+ * will be returned instead of "ix0")
+ * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
+ * - howewer mtu from "transmit" interface will be returned.
+ */
+int
+fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
+ uint32_t flowid, struct nhop4_basic *pnh4)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct sockaddr_in sin;
+ struct rtentry *rte;
+
+ KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET);
+ if (rh == NULL)
+ return (ENOENT);
+
+ /* Prepare lookup key */
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_addr = dst;
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rte = RNTORT(rn);
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rte->rt_ifp)) {
+ fib4_rte_to_nh_basic(rte, dst, flags, pnh4);
+ RIB_RUNLOCK(rh);
+
+ return (0);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+/*
+ * Performs IPv4 route table lookup on @dst. Returns 0 on success.
+ * Stores extende nexthop info provided @pnh4 structure.
+ * Note that
+ * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified.
+ * - in that case you need to call fib4_free_nh_ext()
+ * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if
+ * looking up address of interface "ix0" pointer to "lo0" interface
+ * will be returned instead of "ix0")
+ * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
+ * - howewer mtu from "transmit" interface will be returned.
+ */
+int
+fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
+ uint32_t flowid, struct nhop4_extended *pnh4)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct sockaddr_in sin;
+ struct rtentry *rte;
+
+ KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET);
+ if (rh == NULL)
+ return (ENOENT);
+
+ /* Prepare lookup key */
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_addr = dst;
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rte = RNTORT(rn);
+#ifdef RADIX_MPATH
+ rte = rt_mpath_select(rte, flowid);
+ if (rte == NULL) {
+ RIB_RUNLOCK(rh);
+ return (ENOENT);
+ }
+#endif
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rte->rt_ifp)) {
+ fib4_rte_to_nh_extended(rte, dst, flags, pnh4);
+ if ((flags & NHR_REF) != 0) {
+ /* TODO: lwref on egress ifp's ? */
+ }
+ RIB_RUNLOCK(rh);
+
+ return (0);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+void
+fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4)
+{
+
+}
+
+#endif
diff --git a/freebsd/sys/netinet/in_fib.h b/freebsd/sys/netinet/in_fib.h
new file mode 100644
index 00000000..754a2e3c
--- /dev/null
+++ b/freebsd/sys/netinet/in_fib.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2015
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_FIB_H_
+#define _NETINET_IN_FIB_H_
+
+/* Basic nexthop info used for uRPF/mtu checks */
+struct nhop4_basic {
+ struct ifnet *nh_ifp; /* Logical egress interface */
+ uint16_t nh_mtu; /* nexthop mtu */
+ uint16_t nh_flags; /* nhop flags */
+ struct in_addr nh_addr; /* GW/DST IPv4 address */
+};
+
+/* Extended nexthop info used for control protocols */
+struct nhop4_extended {
+ struct ifnet *nh_ifp; /* Logical egress interface */
+ uint16_t nh_mtu; /* nexthop mtu */
+ uint16_t nh_flags; /* nhop flags */
+ uint8_t spare[4];
+ struct in_addr nh_addr; /* GW/DST IPv4 address */
+ struct in_addr nh_src; /* default source IPv4 address */
+ uint64_t spare2[2];
+};
+
+int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
+ uint32_t flowid, struct nhop4_basic *pnh4);
+int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
+ uint32_t flowid, struct nhop4_extended *pnh4);
+void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4);
+
+#endif
+
diff --git a/freebsd/sys/netinet/in_gif.c b/freebsd/sys/netinet/in_gif.c
index 332d7ff4..02e2efd8 100644
--- a/freebsd/sys/netinet/in_gif.c
+++ b/freebsd/sys/netinet/in_gif.c
@@ -1,7 +1,5 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $ */
-
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
@@ -29,16 +27,19 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_mrouting.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -50,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -57,162 +59,56 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
-#include <netinet/in_gif.h>
#include <netinet/in_var.h>
#include <netinet/ip_encap.h>
#include <netinet/ip_ecn.h>
+#include <netinet/in_fib.h>
#ifdef INET6
#include <netinet/ip6.h>
#endif
-#ifdef MROUTING
-#include <netinet/ip_mroute.h>
-#endif /* MROUTING */
-
-#include <net/if_gif.h>
+#include <net/if_gif.h>
-static int gif_validate4(const struct ip *, struct gif_softc *,
- struct ifnet *);
+static int in_gif_input(struct mbuf **, int *, int);
extern struct domain inetdomain;
-struct protosw in_gif_protosw = {
+static struct protosw in_gif_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = 0/* IPPROTO_IPV[46] */,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = in_gif_input,
- .pr_output = (pr_output_t*)rip_output,
+ .pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
-VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
+#define GIF_TTL 30
+static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
#define V_ip_gif_ttl VNET(ip_gif_ttl)
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_gif_ttl), 0, "");
int
-in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
+in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
+ GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
- struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
- struct sockaddr_in *sin_src = (struct sockaddr_in *)sc->gif_psrc;
- struct sockaddr_in *sin_dst = (struct sockaddr_in *)sc->gif_pdst;
- struct ip iphdr; /* capsule IP header, host byte ordered */
- struct etherip_header eiphdr;
- int error, len, proto;
- u_int8_t tos;
-
- GIF_LOCK_ASSERT(sc);
-
- if (sin_src == NULL || sin_dst == NULL ||
- sin_src->sin_family != AF_INET ||
- sin_dst->sin_family != AF_INET) {
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- {
- struct ip *ip;
-
- proto = IPPROTO_IPV4;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return ENOBUFS;
- }
- ip = mtod(m, struct ip *);
- tos = ip->ip_tos;
- break;
- }
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- {
- struct ip6_hdr *ip6;
- proto = IPPROTO_IPV6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return ENOBUFS;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- break;
- }
-#endif /* INET6 */
- case AF_LINK:
- proto = IPPROTO_ETHERIP;
-
- /*
- * GIF_SEND_REVETHIP (disabled by default) intentionally
- * sends an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if ((sc->gif_options & GIF_SEND_REVETHIP)) {
- eiphdr.eip_ver = 0;
- eiphdr.eip_resvl = ETHERIP_VERSION;
- eiphdr.eip_resvh = 0;
- } else {
- eiphdr.eip_ver = ETHERIP_VERSION;
- eiphdr.eip_resvl = 0;
- eiphdr.eip_resvh = 0;
- }
- /* prepend Ethernet-in-IP header */
- M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- if (m && m->m_len < sizeof(struct etherip_header))
- m = m_pullup(m, sizeof(struct etherip_header));
- if (m == NULL)
- return ENOBUFS;
- bcopy(&eiphdr, mtod(m, struct etherip_header *),
- sizeof(struct etherip_header));
- break;
-
- default:
-#ifdef DEBUG
- printf("in_gif_output: warning: unknown family %d passed\n",
- family);
-#endif
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- bzero(&iphdr, sizeof(iphdr));
- iphdr.ip_src = sin_src->sin_addr;
- /* bidirectional configured tunnel mode */
- if (sin_dst->sin_addr.s_addr != INADDR_ANY)
- iphdr.ip_dst = sin_dst->sin_addr;
- else {
- m_freem(m);
- return ENETUNREACH;
- }
- iphdr.ip_p = proto;
- /* version will be set in ip_output() */
- iphdr.ip_ttl = V_ip_gif_ttl;
- iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip);
- ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
- &iphdr.ip_tos, &tos);
+ struct ip *ip;
+ int len;
/* prepend new IP header */
len = sizeof(struct ip);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK)
+ if (proto == IPPROTO_ETHERIP)
len += ETHERIP_ALIGN;
#endif
- M_PREPEND(m, len, M_DONTWAIT);
- if (m != NULL && m->m_len < len)
- m = m_pullup(m, len);
- if (m == NULL) {
- printf("ENOBUFS in in_gif_output %d\n", __LINE__);
- return ENOBUFS;
- }
+ M_PREPEND(m, len, M_NOWAIT);
+ if (m == NULL)
+ return (ENOBUFS);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK) {
+ if (proto == IPPROTO_ETHERIP) {
len = mtod(m, vm_offset_t) & 3;
KASSERT(len == 0 || len == ETHERIP_ALIGN,
("in_gif_output: unexpected misalignment"));
@@ -220,212 +116,51 @@ in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
m->m_len -= ETHERIP_ALIGN;
}
#endif
- bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
-
- M_SETFIB(m, sc->gif_fibnum);
-
- if (dst->sin_family != sin_dst->sin_family ||
- dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
- /* cache route doesn't match */
- bzero(dst, sizeof(*dst));
- dst->sin_family = sin_dst->sin_family;
- dst->sin_len = sizeof(struct sockaddr_in);
- dst->sin_addr = sin_dst->sin_addr;
- if (sc->gif_ro.ro_rt) {
- RTFREE(sc->gif_ro.ro_rt);
- sc->gif_ro.ro_rt = NULL;
- }
-#if 0
- GIF2IFP(sc)->if_mtu = GIF_MTU;
-#endif
- }
-
- if (sc->gif_ro.ro_rt == NULL) {
- in_rtalloc_ign(&sc->gif_ro, 0, sc->gif_fibnum);
- if (sc->gif_ro.ro_rt == NULL) {
- m_freem(m);
- return ENETUNREACH;
- }
-
- /* if it constitutes infinite encapsulation, punt. */
- if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
- m_freem(m);
- return ENETUNREACH; /* XXX */
- }
-#if 0
- ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu
- - sizeof(struct ip);
-#endif
+ ip = mtod(m, struct ip *);
+ GIF_RLOCK(sc);
+ if (sc->gif_family != AF_INET) {
+ m_freem(m);
+ GIF_RUNLOCK(sc);
+ return (ENETDOWN);
}
+ bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
+ GIF_RUNLOCK(sc);
- m_addr_changed(m);
-
- error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL);
-
- if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
- sc->gif_ro.ro_rt != NULL) {
- RTFREE(sc->gif_ro.ro_rt);
- sc->gif_ro.ro_rt = NULL;
- }
+ ip->ip_p = proto;
+ /* version will be set in ip_output() */
+ ip->ip_ttl = V_ip_gif_ttl;
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_tos = ecn;
- return (error);
+ return (ip_output(m, NULL, NULL, 0, NULL, NULL));
}
-void
-in_gif_input(struct mbuf *m, int off)
+static int
+in_gif_input(struct mbuf **mp, int *offp, int proto)
{
- struct ifnet *gifp = NULL;
+ struct mbuf *m = *mp;
struct gif_softc *sc;
+ struct ifnet *gifp;
struct ip *ip;
- int af;
- u_int8_t otos;
- int proto;
+ uint8_t ecn;
- ip = mtod(m, struct ip *);
- proto = ip->ip_p;
-
- sc = (struct gif_softc *)encap_getarg(m);
+ sc = encap_getarg(m);
if (sc == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_nogif);
- return;
+ return (IPPROTO_DONE);
}
-
gifp = GIF2IFP(sc);
- if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
- m_freem(m);
- KMOD_IPSTAT_INC(ips_nogif);
- return;
- }
-
- otos = ip->ip_tos;
- m_adj(m, off);
-
- switch (proto) {
-#ifdef INET
- case IPPROTO_IPV4:
- {
- struct ip *ip;
- af = AF_INET;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return;
- }
+ if ((gifp->if_flags & IFF_UP) != 0) {
ip = mtod(m, struct ip *);
- if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos, &ip->ip_tos) == 0) {
- m_freem(m);
- return;
- }
- break;
- }
-#endif
-#ifdef INET6
- case IPPROTO_IPV6:
- {
- struct ip6_hdr *ip6;
- u_int8_t itos, oitos;
-
- af = AF_INET6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos, &itos) == 0) {
- m_freem(m);
- return;
- }
- if (itos != oitos) {
- ip6->ip6_flow &= ~htonl(0xff << 20);
- ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
- }
- break;
- }
-#endif /* INET6 */
- case IPPROTO_ETHERIP:
- af = AF_LINK;
- break;
-
- default:
- KMOD_IPSTAT_INC(ips_nogif);
+ ecn = ip->ip_tos;
+ m_adj(m, *offp);
+ gif_input(m, gifp, proto, ecn);
+ } else {
m_freem(m);
- return;
- }
- gif_input(m, af, gifp);
- return;
-}
-
-/*
- * validate outer address.
- */
-static int
-gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
-{
- struct sockaddr_in *src, *dst;
- struct in_ifaddr *ia4;
-
- src = (struct sockaddr_in *)sc->gif_psrc;
- dst = (struct sockaddr_in *)sc->gif_pdst;
-
- /* check for address match */
- if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
- dst->sin_addr.s_addr != ip->ip_src.s_addr)
- return 0;
-
- /* martian filters on outer source - NOT done in ip_input! */
- if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)))
- return 0;
- switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
- case 0: case 127: case 255:
- return 0;
- }
-
- /* reject packets with broadcast on source */
- /* XXXRW: should use hash lists? */
- IN_IFADDR_RLOCK();
- TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
- if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
- continue;
- if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
- IN_IFADDR_RUNLOCK();
- return 0;
- }
- }
- IN_IFADDR_RUNLOCK();
-
- /* ingress filters on outer source */
- if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) {
- struct sockaddr_in sin;
- struct rtentry *rt;
-
- bzero(&sin, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(struct sockaddr_in);
- sin.sin_addr = ip->ip_src;
- /* XXX MRT check for the interface we would use on output */
- rt = in_rtalloc1((struct sockaddr *)&sin, 0,
- 0UL, sc->gif_fibnum);
- if (!rt || rt->rt_ifp != ifp) {
-#if 0
- log(LOG_WARNING, "%s: packet from 0x%x dropped "
- "due to ingress filter\n", if_name(GIF2IFP(sc)),
- (u_int32_t)ntohl(sin.sin_addr.s_addr));
-#endif
- if (rt)
- RTFREE_LOCKED(rt);
- return 0;
- }
- RTFREE_LOCKED(rt);
+ KMOD_IPSTAT_INC(ips_nogif);
}
-
- return 32 * 2;
+ return (IPPROTO_DONE);
}
/*
@@ -433,39 +168,51 @@ gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
* matched the physical addr family. see gif_encapcheck().
*/
int
-gif_encapcheck4(const struct mbuf *m, int off, int proto, void *arg)
+in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- struct ip ip;
+ const struct ip *ip;
struct gif_softc *sc;
- struct ifnet *ifp;
+ int ret;
/* sanity check done in caller */
sc = (struct gif_softc *)arg;
+ GIF_RLOCK_ASSERT(sc);
- /* LINTED const cast */
- m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
- ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
+ /* check for address match */
+ ip = mtod(m, const struct ip *);
+ if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr)
+ return (0);
+ ret = 32;
+ if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) {
+ if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
+ return (0);
+ } else
+ ret += 32;
- return gif_validate4(&ip, sc, ifp);
-}
+ /* ingress filters on outer source */
+ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
+ struct nhop4_basic nh4;
+ struct in_addr dst;
-int
-in_gif_attach(struct gif_softc *sc)
-{
- sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
- &in_gif_protosw, sc);
- if (sc->encap_cookie4 == NULL)
- return EEXIST;
- return 0;
+ dst = ip->ip_src;
+
+ if (fib4_lookup_nh_basic(sc->gif_fibnum, dst, 0, 0, &nh4) != 0)
+ return (0);
+
+ if (nh4.nh_ifp != m->m_pkthdr.rcvif)
+ return (0);
+ }
+ return (ret);
}
int
-in_gif_detach(struct gif_softc *sc)
+in_gif_attach(struct gif_softc *sc)
{
- int error;
- error = encap_detach(sc->encap_cookie4);
- if (error == 0)
- sc->encap_cookie4 = NULL;
- return error;
+ KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
+ sc->gif_ecookie = encap_attach_func(AF_INET, -1, gif_encapcheck,
+ &in_gif_protosw, sc);
+ if (sc->gif_ecookie == NULL)
+ return (EEXIST);
+ return (0);
}
diff --git a/freebsd/sys/netinet/in_kdtrace.h b/freebsd/sys/netinet/in_kdtrace.h
new file mode 100644
index 00000000..a36991ef
--- /dev/null
+++ b/freebsd/sys/netinet/in_kdtrace.h
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2013 Mark Johnston <markj@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_IN_KDTRACE_H_
+#define _SYS_IN_KDTRACE_H_
+
+#define IP_PROBE(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
+ SDT_PROBE6(ip, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+#define UDP_PROBE(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(udp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define TCP_PROBE1(probe, arg0) \
+ SDT_PROBE1(tcp, , , probe, arg0)
+#define TCP_PROBE2(probe, arg0, arg1) \
+ SDT_PROBE2(tcp, , , probe, arg0, arg1)
+#define TCP_PROBE3(probe, arg0, arg1, arg2) \
+ SDT_PROBE3(tcp, , , probe, arg0, arg1, arg2)
+#define TCP_PROBE4(probe, arg0, arg1, arg2, arg3) \
+ SDT_PROBE4(tcp, , , probe, arg0, arg1, arg2, arg3)
+#define TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(tcp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
+ SDT_PROBE6(tcp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+
+SDT_PROVIDER_DECLARE(ip);
+SDT_PROVIDER_DECLARE(tcp);
+SDT_PROVIDER_DECLARE(udp);
+
+SDT_PROBE_DECLARE(ip, , , receive);
+SDT_PROBE_DECLARE(ip, , , send);
+
+SDT_PROBE_DECLARE(tcp, , , accept__established);
+SDT_PROBE_DECLARE(tcp, , , accept__refused);
+SDT_PROBE_DECLARE(tcp, , , connect__established);
+SDT_PROBE_DECLARE(tcp, , , connect__refused);
+SDT_PROBE_DECLARE(tcp, , , connect__request);
+SDT_PROBE_DECLARE(tcp, , , receive);
+SDT_PROBE_DECLARE(tcp, , , send);
+SDT_PROBE_DECLARE(tcp, , , siftr);
+SDT_PROBE_DECLARE(tcp, , , state__change);
+SDT_PROBE_DECLARE(tcp, , , debug__input);
+SDT_PROBE_DECLARE(tcp, , , debug__output);
+SDT_PROBE_DECLARE(tcp, , , debug__user);
+SDT_PROBE_DECLARE(tcp, , , debug__drop);
+
+SDT_PROBE_DECLARE(udp, , , receive);
+SDT_PROBE_DECLARE(udp, , , send);
+
+#endif
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
index 4112046c..3d68718e 100644
--- a/freebsd/sys/netinet/in_mcast.c
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -40,23 +40,28 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/ktr.h>
+#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
+#include <netinet/in_fib.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
@@ -148,6 +153,8 @@ static void inm_purge(struct in_multi *);
static void inm_reap(struct in_multi *);
static struct ip_moptions *
inp_findmoptions(struct inpcb *);
+static void inp_freemoptions_internal(struct ip_moptions *);
+static void inp_gcmoptions(void *, int);
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
@@ -164,25 +171,26 @@ static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0,
static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
- CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
+ CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0,
"Max source filters per group");
-TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
- CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
+ CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0,
"Max source filters per socket");
-TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
-SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
+SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
&in_mcast_loop, 0, "Loopback multicast datagrams by default");
-TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
"Per-interface stack-wide source filters");
+static STAILQ_HEAD(, ip_moptions) imo_gc_list =
+ STAILQ_HEAD_INITIALIZER(imo_gc_list);
+static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
+
#ifdef KTR
/*
* Inline function which wraps assertions for a valid ifp.
@@ -222,6 +230,49 @@ imf_init(struct in_mfilter *imf, const int st0, const int st1)
}
/*
+ * Function for looking up an in_multi record for an IPv4 multicast address
+ * on a given interface. ifp must be valid. If no record found, return NULL.
+ * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held.
+ */
+struct in_multi *
+inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
+{
+ struct ifmultiaddr *ifma;
+ struct in_multi *inm;
+
+ IN_MULTI_LOCK_ASSERT();
+ IF_ADDR_LOCK_ASSERT(ifp);
+
+ inm = NULL;
+ TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
+ if (ifma->ifma_addr->sa_family == AF_INET) {
+ inm = (struct in_multi *)ifma->ifma_protospec;
+ if (inm->inm_addr.s_addr == ina.s_addr)
+ break;
+ inm = NULL;
+ }
+ }
+ return (inm);
+}
+
+/*
+ * Wrapper for inm_lookup_locked().
+ * The IF_ADDR_LOCK will be taken on ifp and released on return.
+ */
+struct in_multi *
+inm_lookup(struct ifnet *ifp, const struct in_addr ina)
+{
+ struct in_multi *inm;
+
+ IN_MULTI_LOCK_ASSERT();
+ IF_ADDR_RLOCK(ifp);
+ inm = inm_lookup_locked(ifp, ina);
+ IF_ADDR_RUNLOCK(ifp);
+
+ return (inm);
+}
+
+/*
* Resize the ip_moptions vector to the next power-of-two minus 1.
* May be called with locks held; do not sleep.
*/
@@ -467,8 +518,8 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *group,
*/
inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
if (inm == NULL) {
- if_delmulti_ifma(ifma);
IF_ADDR_WUNLOCK(ifp);
+ if_delmulti_ifma(ifma);
return (ENOMEM);
}
inm->inm_addr = *group;
@@ -477,12 +528,7 @@ in_getmulti(struct ifnet *ifp, const struct in_addr *group,
inm->inm_ifma = ifma;
inm->inm_refcount = 1;
inm->inm_state = IGMP_NOT_MEMBER;
-
- /*
- * Pending state-changes per group are subject to a bounds check.
- */
- IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
-
+ mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
RB_INIT(&inm->inm_srcs);
@@ -575,7 +621,7 @@ inm_clear_recorded(struct in_multi *inm)
*
* Return 0 if the source didn't exist or was already marked as recorded.
* Return 1 if the source was marked as recorded by this function.
- * Return <0 if any error occured (negated errno code).
+ * Return <0 if any error occurred (negated errno code).
*/
int
inm_record_source(struct in_multi *inm, const in_addr_t naddr)
@@ -1177,11 +1223,8 @@ out_inm_release:
int
in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
{
- struct ifnet *ifp;
int error;
- ifp = inm->inm_ifp;
-
IN_MULTI_LOCK();
error = in_leavegroup_locked(inm, imf);
IN_MULTI_UNLOCK();
@@ -1238,7 +1281,9 @@ in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ CURVNET_SET(inm->inm_ifp->if_vnet);
error = igmp_change_state(inm);
+ CURVNET_RESTORE();
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
@@ -1526,17 +1571,29 @@ inp_findmoptions(struct inpcb *inp)
}
/*
- * Discard the IP multicast options (and source filters).
+ * Discard the IP multicast options (and source filters). To minimize
+ * the amount of work done while holding locks such as the INP's
+ * pcbinfo lock (which is used in the receive path), the free
+ * operation is performed asynchronously in a separate task.
*
* SMPng: NOTE: assumes INP write lock is held.
*/
void
inp_freemoptions(struct ip_moptions *imo)
{
- struct in_mfilter *imf;
- size_t idx, nmships;
KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+ IN_MULTI_LOCK();
+ STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
+ IN_MULTI_UNLOCK();
+ taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
+}
+
+static void
+inp_freemoptions_internal(struct ip_moptions *imo)
+{
+ struct in_mfilter *imf;
+ size_t idx, nmships;
nmships = imo->imo_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
@@ -1554,6 +1611,22 @@ inp_freemoptions(struct ip_moptions *imo)
free(imo, M_IPMOPTS);
}
+static void
+inp_gcmoptions(void *context, int pending)
+{
+ struct ip_moptions *imo;
+
+ IN_MULTI_LOCK();
+ while (!STAILQ_EMPTY(&imo_gc_list)) {
+ imo = STAILQ_FIRST(&imo_gc_list);
+ STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
+ IN_MULTI_UNLOCK();
+ inp_freemoptions_internal(imo);
+ IN_MULTI_LOCK();
+ }
+ IN_MULTI_UNLOCK();
+}
+
/*
* Atomically get source filters on a socket for an IPv4 multicast group.
* Called with INP lock held; returns with lock released.
@@ -1680,6 +1753,7 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
int
inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
{
+ struct rm_priotracker in_ifa_tracker;
struct ip_mreqn mreqn;
struct ip_moptions *imo;
struct ifnet *ifp;
@@ -1719,7 +1793,7 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
mreqn.imr_address = imo->imo_multicast_addr;
} else if (ifp != NULL) {
mreqn.imr_ifindex = ifp->if_index;
- IFP_TO_IA(ifp, ia);
+ IFP_TO_IA(ifp, ia, &in_ifa_tracker);
if (ia != NULL) {
mreqn.imr_address =
IA_SIN(ia)->sin_addr;
@@ -1738,7 +1812,7 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
break;
case IP_MULTICAST_TTL:
- if (imo == 0)
+ if (imo == NULL)
optval = coptval = IP_DEFAULT_MULTICAST_TTL;
else
optval = coptval = imo->imo_multicast_ttl;
@@ -1750,7 +1824,7 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
break;
case IP_MULTICAST_LOOP:
- if (imo == 0)
+ if (imo == NULL)
optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
else
optval = coptval = imo->imo_multicast_loop;
@@ -1810,7 +1884,10 @@ static struct ifnet *
inp_lookup_mcast_ifp(const struct inpcb *inp,
const struct sockaddr_in *gsin, const struct in_addr ina)
{
+ struct rm_priotracker in_ifa_tracker;
struct ifnet *ifp;
+ struct nhop4_basic nh4;
+ uint32_t fibnum;
KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
@@ -1820,21 +1897,15 @@ inp_lookup_mcast_ifp(const struct inpcb *inp,
if (!in_nullhost(ina)) {
INADDR_TO_IFP(ina, ifp);
} else {
- struct route ro;
-
- ro.ro_rt = NULL;
- memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
- in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
- if (ro.ro_rt != NULL) {
- ifp = ro.ro_rt->rt_ifp;
- KASSERT(ifp != NULL, ("%s: null ifp", __func__));
- RTFREE(ro.ro_rt);
- } else {
+ fibnum = inp ? inp->inp_inc.inc_fibnum : 0;
+ if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0)
+ ifp = nh4.nh_ifp;
+ else {
struct in_ifaddr *ia;
struct ifnet *mifp;
mifp = NULL;
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
mifp = ia->ia_ifp;
if (!(mifp->if_flags & IFF_LOOPBACK) &&
@@ -1843,7 +1914,7 @@ inp_lookup_mcast_ifp(const struct inpcb *inp,
break;
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
}
}
@@ -2855,7 +2926,7 @@ sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
return (retval);
}
-#ifdef KTR
+#if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3)
static const char *inm_modestrs[] = { "un", "in", "ex" };
@@ -2910,7 +2981,7 @@ inm_print(const struct in_multi *inm)
inm->inm_timer,
inm_state_str(inm->inm_state),
inm->inm_refcount,
- inm->inm_scq.ifq_len);
+ inm->inm_scq.mq_len);
printf("igi %p nsrc %lu sctimer %u scrv %u\n",
inm->inm_igi,
inm->inm_nsrc,
@@ -2927,7 +2998,7 @@ inm_print(const struct in_multi *inm)
printf("%s: --- end inm %p ---\n", __func__, inm);
}
-#else /* !KTR */
+#else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */
void
inm_print(const struct in_multi *inm)
@@ -2935,6 +3006,6 @@ inm_print(const struct in_multi *inm)
}
-#endif /* KTR */
+#endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */
RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c
index b93abadf..f8790938 100644
--- a/freebsd/sys/netinet/in_pcb.c
+++ b/freebsd/sys/netinet/in_pcb.c
@@ -49,14 +49,18 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_pcbgroup.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/callout.h>
+#include <sys/eventhandler.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/priv.h>
@@ -73,8 +77,11 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
+#include <net/if_llatbl.h>
#include <net/route.h>
+#include <net/rss_config.h>
#include <net/vnet.h>
#if defined(INET) || defined(INET6)
@@ -150,11 +157,7 @@ sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
{
int error;
-#ifdef VIMAGE
- error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
-#else
error = sysctl_handle_int(oidp, arg1, arg2, req);
-#endif
if (error == 0) {
RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
@@ -171,38 +174,42 @@ sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0,
"IP Ports");
-SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0,
- &sysctl_net_ipport_check, "I", "");
-SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0,
- &sysctl_net_ipport_check, "I", "");
-SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0,
- &sysctl_net_ipport_check, "I", "");
-SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0,
- &sysctl_net_ipport_check, "I", "");
-SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0,
- &sysctl_net_ipport_check, "I", "");
-SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0,
- &sysctl_net_ipport_check, "I", "");
-SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
- CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, "");
-SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ &VNET_NAME(ipport_lowfirstauto), 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ &VNET_NAME(ipport_lowlastauto), 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ &VNET_NAME(ipport_firstauto), 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ &VNET_NAME(ipport_lastauto), 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ &VNET_NAME(ipport_hifirstauto), 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ &VNET_NAME(ipport_hilastauto), 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
+ &VNET_NAME(ipport_reservedhigh), 0, "");
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
-SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
+ CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
-SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
+ CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
"allocations before switching to a sequental one");
-SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
+ CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipport_randomtime), 0,
"Minimum time to keep sequental port "
"allocation before switching to a random one");
-#endif
+#endif /* INET */
/*
* in_pcb.c: manage the Protocol Control Blocks.
@@ -225,6 +232,7 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
INP_INFO_LOCK_INIT(pcbinfo, name);
INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
+ INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
@@ -242,6 +250,8 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
inpcbzone_flags);
uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
+ uma_zone_set_warning(pcbinfo->ipi_zone,
+ "kern.ipc.maxsockets limit reached");
}
/*
@@ -261,6 +271,7 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
in_pcbgroup_destroy(pcbinfo);
#endif
uma_zdestroy(pcbinfo->ipi_zone);
+ INP_LIST_LOCK_DESTROY(pcbinfo);
INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
}
@@ -275,7 +286,14 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
struct inpcb *inp;
int error;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
+
error = 0;
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
@@ -307,6 +325,8 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
inp->inp_flags |= IN6P_IPV6_V6ONLY;
}
#endif
+ INP_WLOCK(inp);
+ INP_LIST_WLOCK(pcbinfo);
LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
pcbinfo->ipi_count++;
so->so_pcb = (caddr_t)inp;
@@ -314,9 +334,9 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
if (V_ip6_auto_flowlabel)
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
#endif
- INP_WLOCK(inp);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */
+ INP_LIST_WUNLOCK(pcbinfo);
#if defined(IPSEC) || defined(MAC)
out:
if (error != 0) {
@@ -338,8 +358,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
return (EINVAL);
- anonport = inp->inp_lport == 0 && (nam == NULL ||
- ((struct sockaddr_in *)nam)->sin_port == 0);
+ anonport = nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0;
error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
&inp->inp_lport, cred);
if (error)
@@ -355,6 +374,9 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
}
#endif
+/*
+ * Select a local port (number) to use.
+ */
#if defined(INET) || defined(INET6)
int
in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
@@ -395,13 +417,14 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
lastport = &pcbinfo->ipi_lastport;
}
/*
- * For UDP, use random port allocation as long as the user
+ * For UDP(-Lite), use random port allocation as long as the user
* allows it. For TCP (and as of yet unknown) connections,
* use random port allocation only if the user allows it AND
* ipport_tick() allows it.
*/
if (V_ipport_randomized &&
- (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
+ (!V_ipport_stoprandom || pcbinfo == &V_udbinfo ||
+ pcbinfo == &V_ulitecbinfo))
dorandom = 1;
else
dorandom = 0;
@@ -411,8 +434,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
*/
if (first == last)
dorandom = 0;
- /* Make sure to not include UDP packets in the count. */
- if (pcbinfo != &V_udbinfo)
+ /* Make sure to not include UDP(-Lite) packets in the count. */
+ if (pcbinfo != &V_udbinfo || pcbinfo != &V_ulitecbinfo)
V_ipport_tcpallocs++;
/*
* Instead of having two loops further down counting up or down
@@ -467,7 +490,7 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
#ifdef INET
if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4)
laddrp->s_addr = laddr.s_addr;
-#endif
+#endif
*lportp = lport;
return (0);
@@ -491,6 +514,38 @@ inp_so_options(const struct inpcb *inp)
}
#endif /* INET || INET6 */
+/*
+ * Check if a new BINDMULTI socket is allowed to be created.
+ *
+ * ni points to the new inp.
+ * oi points to the exisitng inp.
+ *
+ * This checks whether the existing inp also has BINDMULTI and
+ * whether the credentials match.
+ */
+int
+in_pcbbind_check_bindmulti(const struct inpcb *ni, const struct inpcb *oi)
+{
+ /* Check permissions match */
+#ifndef __rtems__
+ if ((ni->inp_flags2 & INP_BINDMULTI) &&
+ (ni->inp_cred->cr_uid !=
+ oi->inp_cred->cr_uid))
+ return (0);
+#endif /* __rtems__ */
+
+ /* Check the existing inp has BINDMULTI set */
+ if ((ni->inp_flags2 & INP_BINDMULTI) &&
+ ((oi->inp_flags2 & INP_BINDMULTI) == 0))
+ return (0);
+
+ /*
+ * We're okay - either INP_BINDMULTI isn't set on ni, or
+ * it is and it matches the checks.
+ */
+ return (1);
+}
+
#ifdef INET
/*
* Set up a bind operation on a PCB, performing port allocation
@@ -594,6 +649,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
* This entire block sorely needs a rewrite.
*/
if (t &&
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
((t->inp_flags & INP_TIMEWAIT) == 0) &&
(so->so_type != SOCK_STREAM ||
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
@@ -607,6 +663,15 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
0)
#endif /* __rtems__ */
return (EADDRINUSE);
+
+ /*
+ * If the socket is a BINDMULTI socket, then
+ * the credentials need to match and the
+ * original socket also has to have been bound
+ * with BINDMULTI.
+ */
+ if (t && (! in_pcbbind_check_bindmulti(inp, t)))
+ return (EADDRINUSE);
}
t = in_pcblookup_local(pcbinfo, sin->sin_addr,
lport, lookupflags, cred);
@@ -621,7 +686,9 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
if (tw == NULL ||
(reuseport & tw->tw_so_options) == 0)
return (EADDRINUSE);
- } else if (t && (reuseport & inp_so_options(t)) == 0) {
+ } else if (t &&
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
+ (reuseport & inp_so_options(t)) == 0) {
#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
@@ -631,6 +698,8 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
(t->inp_vflag & INP_IPV6PROTO) == 0)
#endif
return (EADDRINUSE);
+ if (t && (! in_pcbbind_check_bindmulti(inp, t)))
+ return (EADDRINUSE);
}
}
}
@@ -706,7 +775,7 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
* Do proper source address selection on an unbound socket in case
* of connect. Take jails into account as well.
*/
-static int
+int
in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
struct ucred *cred)
{
@@ -754,9 +823,11 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
struct in_ifaddr *ia;
struct ifnet *ifp;
- ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin));
+ ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin,
+ inp->inp_socket->so_fibnum));
if (ia == NULL)
- ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0));
+ ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0,
+ inp->inp_socket->so_fibnum));
if (ia == NULL) {
error = ENETUNREACH;
goto done;
@@ -871,9 +942,11 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
sain.sin_len = sizeof(struct sockaddr_in);
sain.sin_addr.s_addr = faddr->s_addr;
- ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain)));
+ ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain),
+ inp->inp_socket->so_fibnum));
if (ia == NULL)
- ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0));
+ ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0,
+ inp->inp_socket->so_fibnum));
if (ia == NULL)
ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));
@@ -946,6 +1019,7 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
struct inpcb **oinpp, struct ucred *cred)
{
+ struct rm_priotracker in_ifa_tracker;
struct sockaddr_in *sin = (struct sockaddr_in *)nam;
struct in_ifaddr *ia;
struct inpcb *oinp;
@@ -982,20 +1056,20 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
* choose the broadcast address for that interface.
*/
if (faddr.s_addr == INADDR_ANY) {
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
faddr =
IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
if (cred != NULL &&
(error = prison_get_ip4(cred, &faddr)) != 0)
return (error);
} else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
IFF_BROADCAST)
faddr = satosin(&TAILQ_FIRST(
&V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
}
}
if (laddr.s_addr == INADDR_ANY) {
@@ -1013,7 +1087,7 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
imo = inp->inp_moptions;
if (imo->imo_multicast_ifp != NULL) {
ifp = imo->imo_multicast_ifp;
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if ((ia->ia_ifp == ifp) &&
(cred == NULL ||
@@ -1027,7 +1101,7 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
laddr = ia->ia_addr.sin_addr;
error = 0;
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
}
}
if (error)
@@ -1064,7 +1138,7 @@ in_pcbdisconnect(struct inpcb *inp)
inp->inp_fport = 0;
in_pcbrehash(inp);
}
-#endif
+#endif /* INET */
/*
* in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
@@ -1160,8 +1234,17 @@ in_pcbrele_wlocked(struct inpcb *inp)
INP_WLOCK_ASSERT(inp);
- if (refcount_release(&inp->inp_refcount) == 0)
+ if (refcount_release(&inp->inp_refcount) == 0) {
+ /*
+ * If the inpcb has been freed, let the caller know, even if
+ * this isn't the last reference.
+ */
+ if (inp->inp_flags2 & INP_FREED) {
+ INP_WUNLOCK(inp);
+ return (1);
+ }
return (0);
+ }
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
@@ -1197,16 +1280,24 @@ in_pcbfree(struct inpcb *inp)
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_LOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
INP_WLOCK_ASSERT(inp);
/* XXXRW: Do as much as possible here. */
#ifdef IPSEC
if (inp->inp_sp != NULL)
ipsec_delete_pcbpolicy(inp);
-#endif /* IPSEC */
+#endif
+ INP_LIST_WLOCK(pcbinfo);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
in_pcbremlists(inp);
+ INP_LIST_WUNLOCK(pcbinfo);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO) {
ip6_freepcbopts(inp->in6p_outputopts);
@@ -1220,6 +1311,13 @@ in_pcbfree(struct inpcb *inp)
if (inp->inp_moptions != NULL)
inp_freemoptions(inp->inp_moptions);
#endif
+ if (inp->inp_route.ro_rt) {
+ RTFREE(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = (struct rtentry *)NULL;
+ }
+ if (inp->inp_route.ro_lle)
+ LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */
+
inp->inp_vflag = 0;
inp->inp_flags2 |= INP_FREED;
crfree(inp->inp_cred);
@@ -1363,7 +1461,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
struct ip_moptions *imo;
int i, gap;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_WLOCK(pcbinfo);
LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(inp);
imo = inp->inp_moptions;
@@ -1393,7 +1491,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
}
/*
@@ -1565,6 +1663,83 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
goto found;
}
+#ifdef RSS
+ /*
+ * For incoming connections, we may wish to do a wildcard
+ * match for an RSS-local socket.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+ struct inpcb *local_wild_mapped = NULL;
+#endif
+ struct inpcb *jail_wild = NULL;
+ struct inpcbhead *head;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(INADDR_ANY,
+ lport, 0, pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr != INADDR_ANY ||
+ inp->inp_lport != lport)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP4);
+ if (injail) {
+ if (prison_check_ip4(inp->inp_cred,
+ &laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (inp->inp_laddr.s_addr == laddr.s_addr) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+ /* XXX inp locking, NULL check */
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ local_wild_mapped = inp;
+ else
+#endif
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+#ifdef INET6
+ if (inp == NULL)
+ inp = local_wild_mapped;
+#endif
+ if (inp != NULL)
+ goto found;
+ }
+#endif
+
/*
* Then look for a wildcard match, if requested.
*/
@@ -1596,11 +1771,6 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
inp->inp_lport != lport)
continue;
- /* XXX inp locking */
- if (ifp && ifp->if_type == IFT_FAITH &&
- (inp->inp_flags & INP_FAITH) == 0)
- continue;
-
injail = prison_flag(inp->inp_cred, PR_IP4);
if (injail) {
if (prison_check_ip4(inp->inp_cred,
@@ -1622,7 +1792,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
if (inp->inp_vflag & INP_IPV6PROTO)
local_wild_mapped = inp;
else
-#endif /* INET6 */
+#endif
if (injail)
jail_wild = inp;
else
@@ -1637,7 +1807,7 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
#ifdef INET6
if (inp == NULL)
inp = local_wild_mapped;
-#endif /* defined(INET6) */
+#endif
if (inp != NULL)
goto found;
} /* if (lookupflags & INPLOOKUP_WILDCARD) */
@@ -1741,11 +1911,6 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
inp->inp_lport != lport)
continue;
- /* XXX inp locking */
- if (ifp && ifp->if_type == IFT_FAITH &&
- (inp->inp_flags & INP_FAITH) == 0)
- continue;
-
injail = prison_flag(inp->inp_cred, PR_IP4);
if (injail) {
if (prison_check_ip4(inp->inp_cred,
@@ -1767,7 +1932,7 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
if (inp->inp_vflag & INP_IPV6PROTO)
local_wild_mapped = inp;
else
-#endif /* INET6 */
+#endif
if (injail)
jail_wild = inp;
else
@@ -1783,7 +1948,7 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
#ifdef INET6
if (local_wild_mapped != NULL)
return (local_wild_mapped);
-#endif /* defined(INET6) */
+#endif
} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
return (NULL);
@@ -1832,7 +1997,7 @@ struct inpcb *
in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
-#if defined(PCBGROUP)
+#if defined(PCBGROUP) && !defined(RSS)
struct inpcbgroup *pcbgroup;
#endif
@@ -1841,7 +2006,17 @@ in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
-#if defined(PCBGROUP)
+ /*
+ * When not using RSS, use connection groups in preference to the
+ * reservation table when looking up 4-tuples. When using RSS, just
+ * use the reservation table, due to the cost of the Toeplitz hash
+ * in software.
+ *
+ * XXXRW: This policy belongs in the pcbgroup code, as in principle
+ * we could be doing RSS with a non-Toeplitz hash that is affordable
+ * in software.
+ */
+#if defined(PCBGROUP) && !defined(RSS)
if (in_pcbgroup_enabled(pcbinfo)) {
pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
fport);
@@ -1868,16 +2043,27 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
("%s: LOCKPCB not set", __func__));
#ifdef PCBGROUP
- if (in_pcbgroup_enabled(pcbinfo)) {
+ /*
+ * If we can use a hardware-generated hash to look up the connection
+ * group, use that connection group to find the inpcb. Otherwise
+ * fall back on a software hash -- or the reservation table if we're
+ * using RSS.
+ *
+ * XXXRW: As above, that policy belongs in the pcbgroup code.
+ */
+ if (in_pcbgroup_enabled(pcbinfo) &&
+ !(M_HASHTYPE_TEST(m, M_HASHTYPE_NONE))) {
pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
m->m_pkthdr.flowid);
if (pcbgroup != NULL)
return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
fport, laddr, lport, lookupflags, ifp));
+#ifndef RSS
pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
fport);
return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
laddr, lport, lookupflags, ifp));
+#endif
}
#endif
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
@@ -1905,9 +2091,9 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
#ifdef INET6
if (inp->inp_vflag & INP_IPV6)
- hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
+ hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
else
-#endif /* INET6 */
+#endif
hashkey_faddr = inp->inp_faddr.s_addr;
pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
@@ -1992,9 +2178,9 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
#ifdef INET6
if (inp->inp_vflag & INP_IPV6)
- hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
+ hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
else
-#endif /* INET6 */
+#endif
hashkey_faddr = inp->inp_faddr.s_addr;
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
@@ -2026,8 +2212,16 @@ in_pcbremlists(struct inpcb *inp)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
+
INP_WLOCK_ASSERT(inp);
+ INP_LIST_WLOCK_ASSERT(pcbinfo);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
if (inp->inp_flags & INP_INHASHLIST) {
@@ -2051,6 +2245,25 @@ in_pcbremlists(struct inpcb *inp)
}
/*
+ * Check for alternatives when higher level complains
+ * about service problems. For now, invalidate cached
+ * routing information. If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+void
+in_losing(struct inpcb *inp)
+{
+
+ if (inp->inp_route.ro_rt) {
+ RTFREE(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = (struct rtentry *)NULL;
+ }
+ if (inp->inp_route.ro_lle)
+ LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */
+ return;
+}
+
+/*
* A set label operation has occurred at the socket layer, propagate the
* label change into the in_pcb for the socket.
*/
@@ -2115,7 +2328,7 @@ ipport_tick_init(const void *unused __unused)
{
/* Start ipport_tick. */
- callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
+ callout_init(&ipport_tick_callout, 1);
callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
@@ -2172,13 +2385,13 @@ inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
{
struct inpcb *inp;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
func(inp, arg);
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
}
struct socket *
@@ -2262,14 +2475,13 @@ db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
/* IPv6. */
ip6_sprintf(laddr_str, &inc->inc6_laddr);
ip6_sprintf(faddr_str, &inc->inc6_faddr);
- } else {
+ } else
#endif
+ {
/* IPv4. */
inet_ntoa_r(inc->inc_laddr, laddr_str);
inet_ntoa_r(inc->inc_faddr, faddr_str);
-#ifdef INET6
}
-#endif
db_print_indent(indent);
db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
ntohs(inc->inc_lport));
@@ -2320,10 +2532,6 @@ db_print_inpflags(int inp_flags)
db_printf("%sINP_MTUDISC", comma ? ", " : "");
comma = 1;
}
- if (inp_flags & INP_FAITH) {
- db_printf("%sINP_FAITH", comma ? ", " : "");
- comma = 1;
- }
if (inp_flags & INP_RECVTTL) {
db_printf("%sINP_RECVTTL", comma ? ", " : "");
comma = 1;
@@ -2486,4 +2694,4 @@ DB_SHOW_COMMAND(inpcb, db_show_inpcb)
db_print_inpcb(inp, "inpcb", 0);
}
-#endif
+#endif /* DDB */
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
index a78c6ab6..ea47d6b2 100644
--- a/freebsd/sys/netinet/in_pcb.h
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -42,6 +42,7 @@
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_rwlock.h>
+#include <net/route.h>
#ifdef _KERNEL
#include <rtems/bsd/sys/lock.h>
@@ -79,6 +80,8 @@ struct in_addr_4in6 {
/*
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
* some extra padding to accomplish this.
+ * NOTE 2: tcp_syncache.c uses first 5 32-bit words, which identify fport,
+ * lport, faddr to generate hash, so these fields shouldn't be moved.
*/
struct in_endpoints {
u_int16_t ie_fport; /* foreign port */
@@ -94,6 +97,7 @@ struct in_endpoints {
struct in_addr_4in6 ie46_local;
struct in6_addr ie6_local;
} ie_dependladdr;
+ u_int32_t ie6_zoneid; /* scope zone id */
};
#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4
#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4
@@ -117,34 +121,47 @@ struct in_conninfo {
*/
#define INC_ISIPV6 0x01
-#define inc_isipv6 inc_flags /* temp compatability */
+#define inc_isipv6 inc_flags /* temp compatibility */
#define inc_fport inc_ie.ie_fport
#define inc_lport inc_ie.ie_lport
#define inc_faddr inc_ie.ie_faddr
#define inc_laddr inc_ie.ie_laddr
#define inc6_faddr inc_ie.ie6_faddr
#define inc6_laddr inc_ie.ie6_laddr
+#define inc6_zoneid inc_ie.ie6_zoneid
struct icmp6_filter;
/*-
- * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
- * and IPv6 sockets. In the case of TCP, further per-connection state is
+ * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
+ * IPv6 sockets. In the case of TCP and UDP, further per-connection state is
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb
* are static after creation or protected by a per-inpcb rwlock, inp_lock. A
- * few fields also require the global pcbinfo lock for the inpcb to be held,
- * when modified, such as the global connection lists and hashes, as well as
- * binding information (which affects which hash a connection is on). This
- * model means that connections can be looked up without holding the
- * per-connection lock, which is important for performance when attempting to
- * find the connection for a packet given its IP and port tuple. Writing to
- * these fields that write locks be held on both the inpcb and global locks.
+ * few fields are protected by multiple locks as indicated in the locking notes
+ * below. For these fields, all of the listed locks must be write-locked for
+ * any modifications. However, these fields can be safely read while any one of
+ * the listed locks are read-locked. This model can permit greater concurrency
+ * for read operations. For example, connections can be looked up while only
+ * holding a read lock on the global pcblist lock. This is important for
+ * performance when attempting to find the connection for a packet given its IP
+ * and port tuple.
+ *
+ * One noteworthy exception is that the global pcbinfo lock follows a different
+ * set of rules in relation to the inp_list field. Rather than being
+ * write-locked for modifications and read-locked for list iterations, it must
+ * be read-locked during modifications and write-locked during list iterations.
+ * This ensures that the relatively rare global list iterations safely walk a
+ * stable snapshot of connections while allowing more common list modifications
+ * to safely grab the pcblist lock just while adding or removing a connection
+ * from the global list.
*
* Key:
* (c) - Constant after initialization
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
+ * (l) - Protected by the pcblist lock for the inpcb
+ * (h) - Protected by the pcbhash lock for the inpcb
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
*
@@ -159,15 +176,21 @@ struct icmp6_filter;
* socket has been freed), or there may be close(2)-related races.
*
* The inp_vflag field is overloaded, and would otherwise ideally be (c).
+ *
+ * TODO: Currently only the TCP stack is leveraging the global pcbinfo lock
+ * read-lock usage during modification, this model can be applied to other
+ * protocols (especially SCTP).
*/
struct inpcb {
- LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
- LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
+ LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
+ /* (p[w]) for list iteration */
+ /* (p[r]/l) for addition/removal */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
- LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -179,12 +202,14 @@ struct inpcb {
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
uint32_t inp_flowid; /* (x) flow id / queue id */
u_int inp_refcount; /* (i) refcount */
- void *inp_pspare[5]; /* (x) route caching / general use */
- u_int inp_ispare[6]; /* (x) route caching / user cookie /
+ void *inp_pspare[5]; /* (x) packet pacing / general use */
+ uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
+ uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */
+ u_int inp_ispare[4]; /* (x) packet pacing / user cookie /
* general use */
/* Local and foreign ports, local and foreign addr. */
- struct in_conninfo inp_inc; /* (i/p) list for PCB's local port */
+ struct in_conninfo inp_inc; /* (i) list for PCB's local port */
/* MAC and IPSEC policy information. */
struct label *inp_label; /* (i) MAC label */
@@ -209,13 +234,19 @@ struct inpcb {
int inp6_cksum;
short inp6_hops;
} inp_depend6;
- LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */
- struct inpcbport *inp_phd; /* (i/p) head of this list */
+ LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
+ struct inpcbport *inp_phd; /* (i/h) head of this list */
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* (c) generation count */
struct llentry *inp_lle; /* cached L2 information */
- struct rtentry *inp_rt; /* cached L3 information */
struct rwlock inp_lock;
+ rt_gen_t inp_rt_cookie; /* generation for route entry */
+ union { /* cached L3 information */
+ struct route inpu_route;
+ struct route_in6 inpu_route6;
+ } inp_rtu;
+#define inp_route inp_rtu.inpu_route
+#define inp_route6 inp_rtu.inpu_route6
};
#define inp_fport inp_inc.inc_fport
#define inp_lport inp_inc.inc_lport
@@ -227,6 +258,7 @@ struct inpcb {
#define in6p_faddr inp_inc.inc6_faddr
#define in6p_laddr inp_inc.inc6_laddr
+#define in6p_zoneid inp_inc.inc6_zoneid
#define in6p_hops inp_depend6.inp6_hops /* default hop limit */
#define in6p_flowinfo inp_flow
#define in6p_options inp_depend6.inp6_options
@@ -274,37 +306,46 @@ struct inpcbport {
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
- * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
- * the former covering mutable global fields (such as the global pcb list),
- * and the latter covering the hashed lookup tables. The lock order is:
+ * Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
+ * ipi_list_lock:
+ * - ipi_lock covering the global pcb list stability during loop iteration,
+ * - ipi_hash_lock covering the hashed lookup tables,
+ * - ipi_list_lock covering mutable global fields (such as the global
+ * pcb list)
+ *
+ * The lock order is:
*
- * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
+ * ipi_lock (before)
+ * inpcb locks (before)
+ * ipi_list locks (before)
+ * {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
+ * (l) Locked by ipi_list_lock
* (h) Read using either ipi_hash_lock or inpcb lock; write requires both
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting global inpcb list, inpcb count, etc.
+ * Global lock protecting full inpcb list traversal
*/
struct rwlock ipi_lock;
/*
* Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_listhead; /* (g) */
- u_int ipi_count; /* (g) */
+ struct inpcbhead *ipi_listhead; /* (g/l) */
+ u_int ipi_count; /* (l) */
/*
* Generation count -- incremented each time a connection is allocated
* or freed.
*/
- u_quad_t ipi_gencnt; /* (g) */
+ u_quad_t ipi_gencnt; /* (l) */
/*
* Fields associated with port lookup and allocation.
@@ -362,6 +403,11 @@ struct inpcbinfo {
* general use 2
*/
void *ipi_pspare[2];
+
+ /*
+ * Global lock protecting global inpcb list, inpcb count, etc.
+ */
+ struct rwlock ipi_list_lock;
};
#ifdef _KERNEL
@@ -454,6 +500,7 @@ short inp_so_options(const struct inpcb *inp);
#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock)
#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock)
#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock)
#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
@@ -461,6 +508,25 @@ short inp_so_options(const struct inpcb *inp);
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+#define INP_LIST_LOCK_INIT(ipi, d) \
+ rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
+#define INP_LIST_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_list_lock)
+#define INP_LIST_RLOCK(ipi) rw_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WLOCK(ipi) rw_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_list_lock)
+#define INP_LIST_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_LOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
+#define INP_LIST_RLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
+#define INP_LIST_WLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
+#define INP_LIST_UNLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
+
#define INP_HASH_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
@@ -485,6 +551,7 @@ short inp_so_options(const struct inpcb *inp);
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
(ntohs((lport)) & (mask))
+#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3])
/*
* Flags for inp_vflags -- historically version flags only
@@ -505,7 +572,7 @@ short inp_so_options(const struct inpcb *inp);
#define INP_ANONPORT 0x00000040 /* port chosen for user */
#define INP_RECVIF 0x00000080 /* receive incoming interface */
#define INP_MTUDISC 0x00000100 /* user can do MTU discovery */
-#define INP_FAITH 0x00000200 /* accept FAITH'ed connections */
+ /* 0x000200 unused: was INP_FAITH */
#define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */
#define INP_DONTFRAG 0x00000800 /* don't fragment packet */
#define INP_BINDANY 0x00001000 /* allow bind to any address */
@@ -524,8 +591,8 @@ short inp_so_options(const struct inpcb *inp);
#define INP_ONESBCAST 0x02000000 /* send all-ones broadcast */
#define INP_DROPPED 0x04000000 /* protocol drop flag */
#define INP_SOCKREF 0x08000000 /* strong socket reference */
-#define INP_SW_FLOWID 0x10000000 /* software generated flow id */
-#define INP_HW_FLOWID 0x20000000 /* hardware generated flow id */
+#define INP_RESERVED_0 0x10000000 /* reserved field */
+#define INP_RESERVED_1 0x20000000 /* reserved field */
#define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */
#define IN6P_MTU 0x80000000 /* receive path MTU */
@@ -545,6 +612,10 @@ short inp_so_options(const struct inpcb *inp);
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
#define INP_FREED 0x00000010 /* inp itself is not valid */
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
+#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */
+#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
+#define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */
+#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */
/*
* Flags passed to in_pcblookup*() functions.
@@ -603,6 +674,9 @@ void in_pcbinfo_destroy(struct inpcbinfo *);
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+int in_pcbbind_check_bindmulti(const struct inpcb *ni,
+ const struct inpcb *oi);
+
struct inpcbgroup *
in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
struct inpcbgroup *
@@ -636,6 +710,8 @@ void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
int in_pcbinshash_nopcbgroup(struct inpcb *);
+int in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
+ struct ucred *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
@@ -653,6 +729,7 @@ void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *);
int in_pcbrele(struct inpcb *);
int in_pcbrele_rlocked(struct inpcb *);
int in_pcbrele_wlocked(struct inpcb *);
+void in_losing(struct inpcb *);
void in_pcbsetsolabel(struct socket *so);
int in_getpeeraddr(struct socket *so, struct sockaddr **nam);
int in_getsockaddr(struct socket *so, struct sockaddr **nam);
diff --git a/freebsd/sys/netinet/in_proto.c b/freebsd/sys/netinet/in_proto.c
index 1eef2c72..8c3efa4d 100644
--- a/freebsd/sys/netinet/in_proto.c
+++ b/freebsd/sys/netinet/in_proto.c
@@ -34,7 +34,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_ipx.h>
#include <rtems/bsd/local/opt_mrouting.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_inet.h>
@@ -45,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/domain.h>
#include <sys/proc.h>
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
*/
#ifdef INET
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
@@ -120,9 +121,6 @@ struct protosw inetsw[] = {
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IP,
.pr_init = ip_init,
-#ifdef VIMAGE
- .pr_destroy = ip_destroy,
-#endif
.pr_slowtimo = ip_slowtimo,
.pr_drain = ip_drain,
.pr_usrreqs = &nousrreqs
@@ -136,9 +134,6 @@ struct protosw inetsw[] = {
.pr_ctlinput = udp_ctlinput,
.pr_ctloutput = udp_ctloutput,
.pr_init = udp_init,
-#ifdef VIMAGE
- .pr_destroy = udp_destroy,
-#endif
.pr_usrreqs = &udp_usrreqs
},
{
@@ -150,9 +145,6 @@ struct protosw inetsw[] = {
.pr_ctlinput = tcp_ctlinput,
.pr_ctloutput = tcp_ctloutput,
.pr_init = tcp_init,
-#ifdef VIMAGE
- .pr_destroy = tcp_destroy,
-#endif
.pr_slowtimo = tcp_slowtimo,
.pr_drain = tcp_drain,
.pr_usrreqs = &tcp_usrreqs
@@ -167,9 +159,6 @@ struct protosw inetsw[] = {
.pr_ctlinput = sctp_ctlinput,
.pr_ctloutput = sctp_ctloutput,
.pr_init = sctp_init,
-#ifdef VIMAGE
- .pr_destroy = sctp_finish,
-#endif
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp_usrreqs
},
@@ -177,7 +166,7 @@ struct protosw inetsw[] = {
.pr_type = SOCK_STREAM,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_SCTP,
- .pr_flags = PR_WANTRCVD,
+ .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD,
.pr_input = sctp_input,
.pr_ctlinput = sctp_ctlinput,
.pr_ctloutput = sctp_ctloutput,
@@ -186,6 +175,17 @@ struct protosw inetsw[] = {
},
#endif /* SCTP */
{
+ .pr_type = SOCK_DGRAM,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_UDPLITE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = udp_input,
+ .pr_ctlinput = udplite_ctlinput,
+ .pr_ctloutput = udp_ctloutput,
+ .pr_init = udplite_init,
+ .pr_usrreqs = &udp_usrreqs
+},
+{
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_RAW,
@@ -330,9 +330,6 @@ IPPROTOSPACER,
.pr_input = rip_input,
.pr_ctloutput = rip_ctloutput,
.pr_init = rip_init,
-#ifdef VIMAGE
- .pr_destroy = rip_destroy,
-#endif
.pr_usrreqs = &rip_usrreqs
},
};
@@ -344,7 +341,7 @@ struct domain inetdomain = {
.dom_family = AF_INET,
.dom_name = "internet",
.dom_protosw = inetsw,
- .dom_protoswNPROTOSW = &inetsw[sizeof(inetsw)/sizeof(inetsw[0])],
+ .dom_protoswNPROTOSW = &inetsw[nitems(inetsw)],
#ifdef RADIX_MPATH
.dom_rtattach = rn4_mpath_inithead,
#else
@@ -353,8 +350,6 @@ struct domain inetdomain = {
#ifdef VIMAGE
.dom_rtdetach = in_detachhead,
#endif
- .dom_rtoffset = 32,
- .dom_maxrtkey = sizeof(struct sockaddr_in),
.dom_ifattach = in_domifattach,
.dom_ifdetach = in_domifdetach
};
@@ -382,3 +377,5 @@ SYSCTL_NODE(_net_inet, IPPROTO_IPCOMP, ipcomp, CTLFLAG_RW, 0, "IPCOMP");
SYSCTL_NODE(_net_inet, IPPROTO_IPIP, ipip, CTLFLAG_RW, 0, "IPIP");
#endif /* IPSEC */
SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, CTLFLAG_RW, 0, "RAW");
+SYSCTL_NODE(_net_inet, OID_AUTO, accf, CTLFLAG_RW, 0,
+ "Accept filters");
diff --git a/freebsd/sys/netinet/in_rmx.c b/freebsd/sys/netinet/in_rmx.c
index 939193f6..2062d1d1 100644
--- a/freebsd/sys/netinet/in_rmx.c
+++ b/freebsd/sys/netinet/in_rmx.c
@@ -38,11 +38,11 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <sys/mbuf.h>
-#include <sys/syslog.h>
-#include <sys/callout.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -56,19 +56,16 @@ extern int in_inithead(void **head, int off);
extern int in_detachhead(void **head, int off);
#endif
-#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */
-
/*
* Do what we need to do when inserting a route.
*/
static struct radix_node *
-in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+in_addroute(void *v_arg, void *n_arg, struct radix_head *head,
struct radix_node *treenodes)
{
struct rtentry *rt = (struct rtentry *)treenodes;
struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
- RADIX_NODE_HEAD_WLOCK_ASSERT(head);
/*
* A little bit of help for both IP output and input:
* For host routes, we make sure that RTF_BROADCAST
@@ -95,247 +92,20 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
rt->rt_flags |= RTF_MULTICAST;
- if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
-
- return (rn_addroute(v_arg, n_arg, head, treenodes));
-}
-
-/*
- * This code is the inverse of in_clsroute: on first reference, if we
- * were managing the route, stop doing so and set the expiration timer
- * back off again.
- */
-static struct radix_node *
-in_matroute(void *v_arg, struct radix_node_head *head)
-{
- struct radix_node *rn = rn_match(v_arg, head);
- struct rtentry *rt = (struct rtentry *)rn;
-
- if (rt) {
- RT_LOCK(rt);
- if (rt->rt_flags & RTPRF_OURS) {
- rt->rt_flags &= ~RTPRF_OURS;
- rt->rt_rmx.rmx_expire = 0;
- }
- RT_UNLOCK(rt);
- }
- return rn;
-}
-
-static VNET_DEFINE(int, rtq_reallyold) = 60*60; /* one hour is "really old" */
-#define V_rtq_reallyold VNET(rtq_reallyold)
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
- &VNET_NAME(rtq_reallyold), 0,
- "Default expiration time on dynamically learned routes");
-
-/* never automatically crank down to less */
-static VNET_DEFINE(int, rtq_minreallyold) = 10;
-#define V_rtq_minreallyold VNET(rtq_minreallyold)
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
- &VNET_NAME(rtq_minreallyold), 0,
- "Minimum time to attempt to hold onto dynamically learned routes");
-
-/* 128 cached routes is "too many" */
-static VNET_DEFINE(int, rtq_toomany) = 128;
-#define V_rtq_toomany VNET(rtq_toomany)
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
- &VNET_NAME(rtq_toomany), 0,
- "Upper limit on dynamically learned routes");
-
-/*
- * On last reference drop, mark the route as belong to us so that it can be
- * timed out.
- */
-static void
-in_clsroute(struct radix_node *rn, struct radix_node_head *head)
-{
- struct rtentry *rt = (struct rtentry *)rn;
-
- RT_LOCK_ASSERT(rt);
-
- if (!(rt->rt_flags & RTF_UP))
- return; /* prophylactic measures */
-
- if (rt->rt_flags & RTPRF_OURS)
- return;
-
- if (!(rt->rt_flags & RTF_DYNAMIC))
- return;
-
- /*
- * If rtq_reallyold is 0, just delete the route without
- * waiting for a timeout cycle to kill it.
- */
- if (V_rtq_reallyold != 0) {
- rt->rt_flags |= RTPRF_OURS;
- rt->rt_rmx.rmx_expire = time_uptime + V_rtq_reallyold;
- } else {
- rtexpunge(rt);
- }
-}
-
-struct rtqk_arg {
- struct radix_node_head *rnh;
- int draining;
- int killed;
- int found;
- int updating;
- time_t nextstop;
-};
-
-/*
- * Get rid of old routes. When draining, this deletes everything, even when
- * the timeout is not expired yet. When updating, this makes sure that
- * nothing has a timeout longer than the current value of rtq_reallyold.
- */
-static int
-in_rtqkill(struct radix_node *rn, void *rock)
-{
- struct rtqk_arg *ap = rock;
- struct rtentry *rt = (struct rtentry *)rn;
- int err;
-
- RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh);
-
- if (rt->rt_flags & RTPRF_OURS) {
- ap->found++;
-
- if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
- if (rt->rt_refcnt > 0)
- panic("rtqkill route really not free");
-
- err = in_rtrequest(RTM_DELETE,
- (struct sockaddr *)rt_key(rt),
- rt->rt_gateway, rt_mask(rt),
- rt->rt_flags | RTF_RNH_LOCKED, 0,
- rt->rt_fibnum);
- if (err) {
- log(LOG_WARNING, "in_rtqkill: error %d\n", err);
- } else {
- ap->killed++;
- }
- } else {
- if (ap->updating &&
- (rt->rt_rmx.rmx_expire - time_uptime >
- V_rtq_reallyold)) {
- rt->rt_rmx.rmx_expire =
- time_uptime + V_rtq_reallyold;
- }
- ap->nextstop = lmin(ap->nextstop,
- rt->rt_rmx.rmx_expire);
- }
- }
-
- return 0;
-}
-
-#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */
-static VNET_DEFINE(int, rtq_timeout) = RTQ_TIMEOUT;
-static VNET_DEFINE(struct callout, rtq_timer);
-
-#define V_rtq_timeout VNET(rtq_timeout)
-#define V_rtq_timer VNET(rtq_timer)
-
-static void in_rtqtimo_one(void *rock);
-
-static void
-in_rtqtimo(void *rock)
-{
- CURVNET_SET((struct vnet *) rock);
- int fibnum;
- void *newrock;
- struct timeval atv;
-
- for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
- newrock = rt_tables_get_rnh(fibnum, AF_INET);
- if (newrock != NULL)
- in_rtqtimo_one(newrock);
- }
- atv.tv_usec = 0;
- atv.tv_sec = V_rtq_timeout;
- callout_reset(&V_rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
- CURVNET_RESTORE();
-}
+ if (rt->rt_ifp != NULL) {
-static void
-in_rtqtimo_one(void *rock)
-{
- struct radix_node_head *rnh = rock;
- struct rtqk_arg arg;
- static time_t last_adjusted_timeout = 0;
-
- arg.found = arg.killed = 0;
- arg.rnh = rnh;
- arg.nextstop = time_uptime + V_rtq_timeout;
- arg.draining = arg.updating = 0;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in_rtqkill, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
-
- /*
- * Attempt to be somewhat dynamic about this:
- * If there are ``too many'' routes sitting around taking up space,
- * then crank down the timeout, and see if we can't make some more
- * go away. However, we make sure that we will never adjust more
- * than once in rtq_timeout seconds, to keep from cranking down too
- * hard.
- */
- if ((arg.found - arg.killed > V_rtq_toomany) &&
- (time_uptime - last_adjusted_timeout >= V_rtq_timeout) &&
- V_rtq_reallyold > V_rtq_minreallyold) {
- V_rtq_reallyold = 2 * V_rtq_reallyold / 3;
- if (V_rtq_reallyold < V_rtq_minreallyold) {
- V_rtq_reallyold = V_rtq_minreallyold;
- }
-
- last_adjusted_timeout = time_uptime;
-#ifdef DIAGNOSTIC
- log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
- V_rtq_reallyold);
-#endif
- arg.found = arg.killed = 0;
- arg.updating = 1;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in_rtqkill, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
-
-}
-
-void
-in_rtqdrain(void)
-{
- VNET_ITERATOR_DECL(vnet_iter);
- struct radix_node_head *rnh;
- struct rtqk_arg arg;
- int fibnum;
-
- VNET_LIST_RLOCK_NOSLEEP();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
-
- for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
- rnh = rt_tables_get_rnh(fibnum, AF_INET);
- arg.found = arg.killed = 0;
- arg.rnh = rnh;
- arg.nextstop = 0;
- arg.draining = 1;
- arg.updating = 0;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in_rtqkill, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
- CURVNET_RESTORE();
+ /*
+ * Check route MTU:
+ * inherit interface MTU if not set or
+ * check if MTU is too large.
+ */
+ if (rt->rt_mtu == 0) {
+ rt->rt_mtu = rt->rt_ifp->if_mtu;
+ } else if (rt->rt_mtu > rt->rt_ifp->if_mtu)
+ rt->rt_mtu = rt->rt_ifp->if_mtu;
}
- VNET_LIST_RUNLOCK_NOSLEEP();
-}
-void
-in_setmatchfunc(struct radix_node_head *rnh, int val)
-{
-
- rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute;
+ return (rn_addroute(v_arg, n_arg, head, treenodes));
}
static int _in_rt_was_here;
@@ -345,29 +115,16 @@ static int _in_rt_was_here;
int
in_inithead(void **head, int off)
{
- struct radix_node_head *rnh;
+ struct rib_head *rh;
- /* XXX MRT
- * This can be called from vfs_export.c too in which case 'off'
- * will be 0. We know the correct value so just use that and
- * return directly if it was 0.
- * This is a hack that replaces an even worse hack on a bad hack
- * on a bad design. After RELENG_7 this should be fixed but that
- * will change the ABI, so for now do it this way.
- */
- if (!rn_inithead(head, 32))
- return 0;
+ rh = rt_table_init(32);
+ if (rh == NULL)
+ return (0);
- if (off == 0) /* XXX MRT see above */
- return 1; /* only do the rest for a real routing table */
+ rh->rnh_addaddr = in_addroute;
+ *head = (void *)rh;
- rnh = *head;
- rnh->rnh_addaddr = in_addroute;
- in_setmatchfunc(rnh, V_drop_redirect);
- rnh->rnh_close = in_clsroute;
if (_in_rt_was_here == 0 ) {
- callout_init(&V_rtq_timer, CALLOUT_MPSAFE);
- callout_reset(&V_rtq_timer, 1, in_rtqtimo, curvnet);
_in_rt_was_here = 1;
}
return 1;
@@ -378,7 +135,7 @@ int
in_detachhead(void **head, int off)
{
- callout_drain(&V_rtq_timer);
+ rt_table_destroy((struct rib_head *)(*head));
return (1);
}
#endif
@@ -398,62 +155,32 @@ struct in_ifadown_arg {
};
static int
-in_ifadownkill(struct radix_node *rn, void *xap)
+in_ifadownkill(const struct rtentry *rt, void *xap)
{
struct in_ifadown_arg *ap = xap;
- struct rtentry *rt = (struct rtentry *)rn;
- RT_LOCK(rt);
- if (rt->rt_ifa == ap->ifa &&
- (ap->del || !(rt->rt_flags & RTF_STATIC))) {
- /*
- * Aquire a reference so that it can later be freed
- * as the refcount would be 0 here in case of at least
- * ap->del.
- */
- RT_ADDREF(rt);
- /*
- * Disconnect it from the tree and permit protocols
- * to cleanup.
- */
- rtexpunge(rt);
- /*
- * At this point it is an rttrash node, and in case
- * the above is the only reference we must free it.
- * If we do not noone will have a pointer and the
- * rtentry will be leaked forever.
- * In case someone else holds a reference, we are
- * fine as we only decrement the refcount. In that
- * case if the other entity calls RT_REMREF, we
- * will still be leaking but at least we tried.
- */
- RTFREE_LOCKED(rt);
+ if (rt->rt_ifa != ap->ifa)
return (0);
- }
- RT_UNLOCK(rt);
- return 0;
+
+ if ((rt->rt_flags & RTF_STATIC) != 0 && ap->del == 0)
+ return (0);
+
+ return (1);
}
-int
+void
in_ifadown(struct ifaddr *ifa, int delete)
{
struct in_ifadown_arg arg;
- struct radix_node_head *rnh;
- int fibnum;
- if (ifa->ifa_addr->sa_family != AF_INET)
- return 1;
+ KASSERT(ifa->ifa_addr->sa_family == AF_INET,
+ ("%s: wrong family", __func__));
- for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
- rnh = rt_tables_get_rnh(fibnum, AF_INET);
- arg.ifa = ifa;
- arg.del = delete;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */
- }
- return 0;
+ arg.ifa = ifa;
+ arg.del = delete;
+
+ rt_foreach_fib_walk_del(AF_INET, in_ifadownkill, &arg);
+ ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */
}
/*
@@ -467,25 +194,6 @@ in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum)
rtalloc_ign_fib(ro, ignflags, fibnum);
}
-int
-in_rtrequest( int req,
- struct sockaddr *dst,
- struct sockaddr *gateway,
- struct sockaddr *netmask,
- int flags,
- struct rtentry **ret_nrt,
- u_int fibnum)
-{
- return (rtrequest_fib(req, dst, gateway, netmask,
- flags, ret_nrt, fibnum));
-}
-
-struct rtentry *
-in_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum)
-{
- return (rtalloc1_fib(dst, report, ignflags, fibnum));
-}
-
void
in_rtredirect(struct sockaddr *dst,
struct sockaddr *gateway,
@@ -497,16 +205,3 @@ in_rtredirect(struct sockaddr *dst,
rtredirect_fib(dst, gateway, netmask, flags, src, fibnum);
}
-void
-in_rtalloc(struct route *ro, u_int fibnum)
-{
- rtalloc_ign_fib(ro, 0UL, fibnum);
-}
-
-#if 0
-int in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
-int in_rtioctl(u_long, caddr_t, u_int);
-int in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
-#endif
-
-
diff --git a/freebsd/sys/netinet/in_rss.h b/freebsd/sys/netinet/in_rss.h
new file mode 100644
index 00000000..fd300ac5
--- /dev/null
+++ b/freebsd/sys/netinet/in_rss.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_RSS_H_
+#define _NETINET_IN_RSS_H_
+
+#include <netinet/in.h> /* in_addr_t */
+
+/*
+ * Network stack interface to generate a hash for a protocol tuple.
+ */
+uint32_t rss_hash_ip4_4tuple(struct in_addr src, u_short srcport,
+ struct in_addr dst, u_short dstport);
+uint32_t rss_hash_ip4_2tuple(struct in_addr src, struct in_addr dst);
+
+/*
+ * Functions to calculate a software RSS hash for a given mbuf or
+ * packet detail.
+ */
+int rss_mbuf_software_hash_v4(const struct mbuf *m, int dir,
+ uint32_t *hashval, uint32_t *hashtype);
+int rss_proto_software_hash_v4(struct in_addr src,
+ struct in_addr dst, u_short src_port, u_short dst_port,
+ int proto, uint32_t *hashval,
+ uint32_t *hashtype);
+struct mbuf * rss_soft_m2cpuid_v4(struct mbuf *m, uintptr_t source,
+ u_int *cpuid);
+
+#endif /* !_NETINET_IN_RSS_H_ */
diff --git a/freebsd/sys/netinet/in_systm.h b/freebsd/sys/netinet/in_systm.h
index 4b34aa00..a4a56833 100644
--- a/freebsd/sys/netinet/in_systm.h
+++ b/freebsd/sys/netinet/in_systm.h
@@ -44,14 +44,26 @@
* Internally the system keeps counters in the headers with the bytes
* swapped so that VAX instructions will work on them. It reverses
* the bytes before transmission at each protocol level. The n_ types
- * represent the types with the bytes in ``high-ender'' order.
+ * represent the types with the bytes in ``high-ender'' order. Network
+ * byte order is usually referered to as big-endian these days rather
+ * than high-ender, which sadly invokes an Orson Scott Card novel, or
+ * worse, the movie.
*/
typedef u_int16_t n_short; /* short as received from the net */
typedef u_int32_t n_long; /* long as received from the net */
-typedef u_int32_t n_time; /* ms since 00:00 GMT, byte rev */
+typedef u_int32_t n_time; /* ms since 00:00 UTC, byte rev */
#ifdef _KERNEL
+struct inpcb;
+struct ucred;
+
+#ifndef __rtems__
+int cr_canseeinpcb(struct ucred *cred, struct inpcb *inp);
+#else /* __rtems__ */
+#define cr_canseeinpcb(cred, inp) 0
+#endif /* __rtems__ */
+
uint32_t iptime(void);
#endif
diff --git a/freebsd/sys/netinet/in_var.h b/freebsd/sys/netinet/in_var.h
index b8477309..af83e9a1 100644
--- a/freebsd/sys/netinet/in_var.h
+++ b/freebsd/sys/netinet/in_var.h
@@ -33,11 +33,24 @@
#ifndef _NETINET_IN_VAR_H_
#define _NETINET_IN_VAR_H_
+/*
+ * Argument structure for SIOCAIFADDR.
+ */
+struct in_aliasreq {
+ char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct sockaddr_in ifra_addr;
+ struct sockaddr_in ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+ struct sockaddr_in ifra_mask;
+ int ifra_vhid;
+};
+
+#ifdef _KERNEL
#include <sys/queue.h>
#include <sys/fnv_hash.h>
#include <sys/tree.h>
-struct igmp_ifinfo;
+struct igmp_ifsoftc;
struct in_multi;
struct lltable;
@@ -46,7 +59,7 @@ struct lltable;
*/
struct in_ifinfo {
struct lltable *ii_llt; /* ARP state */
- struct igmp_ifinfo *ii_igmp; /* IGMP state */
+ struct igmp_ifsoftc *ii_igmp; /* IGMP state */
struct in_multi *ii_allhosts; /* 224.0.0.1 membership */
};
@@ -71,25 +84,17 @@ struct in_ifaddr {
struct sockaddr_in ia_sockmask; /* reserve space for general netmask */
};
-struct in_aliasreq {
- char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */
- struct sockaddr_in ifra_addr;
- struct sockaddr_in ifra_broadaddr;
-#define ifra_dstaddr ifra_broadaddr
- struct sockaddr_in ifra_mask;
-};
/*
* Given a pointer to an in_ifaddr (ifaddr),
* return a pointer to the addr as a sockaddr_in.
*/
#define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))
#define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
+#define IA_MASKSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_sockmask))
#define IN_LNAOF(in, ifa) \
((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
-
-#ifdef _KERNEL
extern u_char inetctlerrmap[];
#define LLTABLE(ifp) \
@@ -114,15 +119,15 @@ VNET_DECLARE(u_long, in_ifaddrhmask); /* mask for hash table */
#define INADDR_HASH(x) \
(&V_in_ifaddrhashtbl[INADDR_HASHVAL(x) & V_in_ifaddrhmask])
-extern struct rwlock in_ifaddr_lock;
+extern struct rmlock in_ifaddr_lock;
-#define IN_IFADDR_LOCK_ASSERT() rw_assert(&in_ifaddr_lock, RA_LOCKED)
-#define IN_IFADDR_RLOCK() rw_rlock(&in_ifaddr_lock)
-#define IN_IFADDR_RLOCK_ASSERT() rw_assert(&in_ifaddr_lock, RA_RLOCKED)
-#define IN_IFADDR_RUNLOCK() rw_runlock(&in_ifaddr_lock)
-#define IN_IFADDR_WLOCK() rw_wlock(&in_ifaddr_lock)
-#define IN_IFADDR_WLOCK_ASSERT() rw_assert(&in_ifaddr_lock, RA_WLOCKED)
-#define IN_IFADDR_WUNLOCK() rw_wunlock(&in_ifaddr_lock)
+#define IN_IFADDR_LOCK_ASSERT() rm_assert(&in_ifaddr_lock, RA_LOCKED)
+#define IN_IFADDR_RLOCK(t) rm_rlock(&in_ifaddr_lock, (t))
+#define IN_IFADDR_RLOCK_ASSERT() rm_assert(&in_ifaddr_lock, RA_RLOCKED)
+#define IN_IFADDR_RUNLOCK(t) rm_runlock(&in_ifaddr_lock, (t))
+#define IN_IFADDR_WLOCK() rm_wlock(&in_ifaddr_lock)
+#define IN_IFADDR_WLOCK_ASSERT() rm_assert(&in_ifaddr_lock, RA_WLOCKED)
+#define IN_IFADDR_WUNLOCK() rm_wunlock(&in_ifaddr_lock)
/*
* Macro for finding the internet address structure (in_ifaddr)
@@ -156,29 +161,20 @@ do { \
* Macro for finding the internet address structure (in_ifaddr) corresponding
* to a given interface (ifnet structure).
*/
-#define IFP_TO_IA(ifp, ia) \
+#define IFP_TO_IA(ifp, ia, t) \
/* struct ifnet *ifp; */ \
/* struct in_ifaddr *ia; */ \
+ /* struct rm_priotracker *t; */ \
do { \
- IN_IFADDR_RLOCK(); \
+ IN_IFADDR_RLOCK((t)); \
for ((ia) = TAILQ_FIRST(&V_in_ifaddrhead); \
(ia) != NULL && (ia)->ia_ifp != (ifp); \
(ia) = TAILQ_NEXT((ia), ia_link)) \
continue; \
if ((ia) != NULL) \
ifa_ref(&(ia)->ia_ifa); \
- IN_IFADDR_RUNLOCK(); \
+ IN_IFADDR_RUNLOCK((t)); \
} while (0)
-#endif
-
-/*
- * IP datagram reassembly.
- */
-#define IPREASS_NHASH_LOG2 6
-#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
-#define IPREASS_HMASK (IPREASS_NHASH - 1)
-#define IPREASS_HASH(x,y) \
- (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
/*
* Legacy IPv4 IGMP per-link structure.
@@ -191,28 +187,6 @@ struct router_info {
};
/*
- * Per-interface IGMP router version information.
- */
-struct igmp_ifinfo {
- LIST_ENTRY(igmp_ifinfo) igi_link;
- struct ifnet *igi_ifp; /* interface this instance belongs to */
- uint32_t igi_version; /* IGMPv3 Host Compatibility Mode */
- uint32_t igi_v1_timer; /* IGMPv1 Querier Present timer (s) */
- uint32_t igi_v2_timer; /* IGMPv2 Querier Present timer (s) */
- uint32_t igi_v3_timer; /* IGMPv3 General Query (interface) timer (s)*/
- uint32_t igi_flags; /* IGMP per-interface flags */
- uint32_t igi_rv; /* IGMPv3 Robustness Variable */
- uint32_t igi_qi; /* IGMPv3 Query Interval (s) */
- uint32_t igi_qri; /* IGMPv3 Query Response Interval (s) */
- uint32_t igi_uri; /* IGMPv3 Unsolicited Report Interval (s) */
- SLIST_HEAD(,in_multi) igi_relinmhead; /* released groups */
- struct ifqueue igi_gq; /* queue of general query responses */
-};
-
-#define IGIF_SILENT 0x00000001 /* Do not use IGMP on this ifp */
-#define IGIF_LOOPBACK 0x00000002 /* Send IGMP reports to loopback */
-
-/*
* IPv4 multicast IGMP-layer source entry.
*/
struct ip_msource {
@@ -290,12 +264,12 @@ struct in_multi {
u_int inm_refcount; /* reference count */
/* New fields for IGMPv3 follow. */
- struct igmp_ifinfo *inm_igi; /* IGMP info */
+ struct igmp_ifsoftc *inm_igi; /* IGMP info */
SLIST_ENTRY(in_multi) inm_nrele; /* to-be-released by IGMP */
struct ip_msource_tree inm_srcs; /* tree of sources */
u_long inm_nsrc; /* # of tree entries */
- struct ifqueue inm_scq; /* queue of pending
+ struct mbufq inm_scq; /* queue of pending
* state-change packets */
struct timeval inm_lastgsrtv; /* Time of last G-S-R query */
uint16_t inm_sctimer; /* state-change timer */
@@ -339,8 +313,6 @@ ims_get_mode(const struct in_multi *inm, const struct ip_msource *ims,
return (MCAST_UNDEFINED);
}
-#ifdef _KERNEL
-
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet);
SYSCTL_DECL(_net_inet_ip);
@@ -359,49 +331,6 @@ extern struct mtx in_multi_mtx;
#define IN_MULTI_LOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_OWNED)
#define IN_MULTI_UNLOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_NOTOWNED)
-/*
- * Function for looking up an in_multi record for an IPv4 multicast address
- * on a given interface. ifp must be valid. If no record found, return NULL.
- * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held.
- */
-static __inline struct in_multi *
-inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
-{
- struct ifmultiaddr *ifma;
- struct in_multi *inm;
-
- IN_MULTI_LOCK_ASSERT();
- IF_ADDR_LOCK_ASSERT(ifp);
-
- inm = NULL;
- TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
- if (ifma->ifma_addr->sa_family == AF_INET) {
- inm = (struct in_multi *)ifma->ifma_protospec;
- if (inm->inm_addr.s_addr == ina.s_addr)
- break;
- inm = NULL;
- }
- }
- return (inm);
-}
-
-/*
- * Wrapper for inm_lookup_locked().
- * The IF_ADDR_LOCK will be taken on ifp and released on return.
- */
-static __inline struct in_multi *
-inm_lookup(struct ifnet *ifp, const struct in_addr ina)
-{
- struct in_multi *inm;
-
- IN_MULTI_LOCK_ASSERT();
- IF_ADDR_RLOCK(ifp);
- inm = inm_lookup_locked(ifp, ina);
- IF_ADDR_RUNLOCK(ifp);
-
- return (inm);
-}
-
/* Acquire an in_multi record. */
static __inline void
inm_acquire_locked(struct in_multi *inm)
@@ -422,8 +351,9 @@ inm_acquire_locked(struct in_multi *inm)
struct rtentry;
struct route;
struct ip_moptions;
-struct radix_node_head;
+struct in_multi *inm_lookup_locked(struct ifnet *, const struct in_addr);
+struct in_multi *inm_lookup(struct ifnet *, const struct in_addr);
int imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
const struct sockaddr *, const struct sockaddr *);
void inm_commit(struct in_multi *);
@@ -444,30 +374,21 @@ int in_leavegroup_locked(struct in_multi *,
/*const*/ struct in_mfilter *);
int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
struct thread *);
-void in_rtqdrain(void);
+int in_addprefix(struct in_ifaddr *, int);
+int in_scrubprefix(struct in_ifaddr *, u_int);
+void in_ifscrub_all(void);
void ip_input(struct mbuf *);
-int in_ifadown(struct ifaddr *ifa, int);
-void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int);
-struct mbuf *ip_fastforward(struct mbuf *);
+void ip_direct_input(struct mbuf *);
+void in_ifadown(struct ifaddr *ifa, int);
+struct mbuf *ip_tryforward(struct mbuf *);
void *in_domifattach(struct ifnet *);
void in_domifdetach(struct ifnet *, void *);
/* XXX */
void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum);
-void in_rtalloc(struct route *ro, u_int fibnum);
-struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int);
void in_rtredirect(struct sockaddr *, struct sockaddr *,
struct sockaddr *, int, struct sockaddr *, u_int);
-int in_rtrequest(int, struct sockaddr *,
- struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
-void in_setmatchfunc(struct radix_node_head *, int);
-
-#if 0
-int in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
-int in_rtioctl(u_long, caddr_t, u_int);
-int in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
-#endif
#endif /* _KERNEL */
/* INET6 stuff */
diff --git a/freebsd/sys/netinet/ip.h b/freebsd/sys/netinet/ip.h
index 79afeb8f..98bd1e99 100644
--- a/freebsd/sys/netinet/ip.h
+++ b/freebsd/sys/netinet/ip.h
@@ -67,7 +67,7 @@ struct ip {
u_char ip_p; /* protocol */
u_short ip_sum; /* checksum */
struct in_addr ip_src,ip_dst; /* source and dest address */
-} __packed __aligned(4);
+} __packed __aligned(2);
#define IP_MAXPACKET 65535 /* maximum packet size */
@@ -80,19 +80,19 @@ struct ip {
#define IPTOS_MINCOST 0x02
/*
- * Definitions for IP precedence (also in ip_tos) (hopefully unused).
+ * Definitions for IP precedence (also in ip_tos) (deprecated).
*/
-#define IPTOS_PREC_NETCONTROL 0xe0
-#define IPTOS_PREC_INTERNETCONTROL 0xc0
-#define IPTOS_PREC_CRITIC_ECP 0xa0
-#define IPTOS_PREC_FLASHOVERRIDE 0x80
-#define IPTOS_PREC_FLASH 0x60
-#define IPTOS_PREC_IMMEDIATE 0x40
-#define IPTOS_PREC_PRIORITY 0x20
-#define IPTOS_PREC_ROUTINE 0x00
+#define IPTOS_PREC_NETCONTROL IPTOS_DSCP_CS7
+#define IPTOS_PREC_INTERNETCONTROL IPTOS_DSCP_CS6
+#define IPTOS_PREC_CRITIC_ECP IPTOS_DSCP_CS5
+#define IPTOS_PREC_FLASHOVERRIDE IPTOS_DSCP_CS4
+#define IPTOS_PREC_FLASH IPTOS_DSCP_CS3
+#define IPTOS_PREC_IMMEDIATE IPTOS_DSCP_CS2
+#define IPTOS_PREC_PRIORITY IPTOS_DSCP_CS1
+#define IPTOS_PREC_ROUTINE IPTOS_DSCP_CS0
/*
- * Definitions for DiffServ Codepoints as per RFC2474
+ * Definitions for DiffServ Codepoints as per RFC2474 and RFC5865.
*/
#define IPTOS_DSCP_CS0 0x00
#define IPTOS_DSCP_CS1 0x20
@@ -112,6 +112,7 @@ struct ip {
#define IPTOS_DSCP_AF42 0x90
#define IPTOS_DSCP_AF43 0x98
#define IPTOS_DSCP_CS5 0xa0
+#define IPTOS_DSCP_VA 0xb0
#define IPTOS_DSCP_EF 0xb8
#define IPTOS_DSCP_CS6 0xc0
#define IPTOS_DSCP_CS7 0xe0
@@ -146,7 +147,7 @@ struct ip {
#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */
#define IPOPT_LSRR 131 /* loose source route */
#define IPOPT_ESO 133 /* extended security */
-#define IPOPT_CIPSO 134 /* commerical security */
+#define IPOPT_CIPSO 134 /* commercial security */
#define IPOPT_SATID 136 /* satnet id */
#define IPOPT_SSRR 137 /* strict source route */
#define IPOPT_RA 148 /* router alert */
diff --git a/freebsd/sys/netinet/ip6.h b/freebsd/sys/netinet/ip6.h
index 8f498410..ff870579 100644
--- a/freebsd/sys/netinet/ip6.h
+++ b/freebsd/sys/netinet/ip6.h
@@ -277,12 +277,6 @@ do { \
(((m) = m_pullup((m), (off) + (hlen))) == NULL)) { \
IP6STAT_INC(ip6s_exthdrtoolong); \
return ret; \
- } else if ((m)->m_flags & M_EXT) { \
- if ((m)->m_len < (off) + (hlen)) { \
- IP6STAT_INC(ip6s_exthdrtoolong); \
- m_freem(m); \
- return ret; \
- } \
} else { \
if ((m)->m_len < (off) + (hlen)) { \
IP6STAT_INC(ip6s_exthdrtoolong); \
diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c
index 330023b1..6b683f45 100644
--- a/freebsd/sys/netinet/ip_carp.c
+++ b/freebsd/sys/netinet/ip_carp.c
@@ -1,8 +1,10 @@
#include <machine/rtems-bsd-kernel-space.h>
-/*
- * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
- * Copyright (c) 2003 Ryan McBride. All rights reserved.
+/*-
+ * Copyright (c) 2002 Michael Shalayeff.
+ * Copyright (c) 2003 Ryan McBride.
+ * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -33,38 +35,33 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <sys/types.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/conf.h>
+#include <sys/bus.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
-#include <sys/time.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <sys/signalvar.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
-
-#include <sys/socket.h>
-#include <sys/vnode.h>
-
-#include <machine/stdarg.h>
+#include <sys/taskqueue.h>
+#include <sys/counter.h>
-#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/fddi.h>
-#include <net/iso88025.h>
#include <net/if.h>
-#include <net/if_clone.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
+#include <net/if_llatbl.h>
#include <net/if_types.h>
+#include <net/iso88025.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -73,12 +70,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_var.h>
#include <netinet/ip_carp.h>
#include <netinet/ip.h>
-
#include <machine/in_cksum.h>
#endif
-
#ifdef INET
-#include <netinet/in_systm.h>
#include <netinet/ip_var.h>
#include <netinet/if_ether.h>
#endif
@@ -86,182 +80,254 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet/icmp6.h>
#include <netinet/ip6.h>
-#include <netinet6/ip6protosw.h>
+#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
-#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#endif
#include <crypto/sha1.h>
-#define CARP_IFNAME "carp"
-static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
-SYSCTL_DECL(_net_inet_carp);
+static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
struct carp_softc {
- struct ifnet *sc_ifp; /* Interface clue */
- struct ifnet *sc_carpdev; /* Pointer to parent interface */
- struct in_ifaddr *sc_ia; /* primary iface address */
+ struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */
+ struct ifaddr **sc_ifas; /* Our ifaddrs. */
+ struct sockaddr_dl sc_addr; /* Our link level address. */
+ struct callout sc_ad_tmo; /* Advertising timeout. */
#ifdef INET
- struct ip_moptions sc_imo;
+ struct callout sc_md_tmo; /* Master down timeout. */
#endif
#ifdef INET6
- struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
- struct ip6_moptions sc_im6o;
-#endif /* INET6 */
- TAILQ_ENTRY(carp_softc) sc_list;
-
- enum { INIT = 0, BACKUP, MASTER } sc_state;
+ struct callout sc_md6_tmo; /* XXX: Master down timeout. */
+#endif
+ struct mtx sc_mtx;
- int sc_flags_backup;
- int sc_suppress;
+ int sc_vhid;
+ int sc_advskew;
+ int sc_advbase;
- int sc_sendad_errors;
+ int sc_naddrs;
+ int sc_naddrs6;
+ int sc_ifasiz;
+ enum { INIT = 0, BACKUP, MASTER } sc_state;
+ int sc_suppress;
+ int sc_sendad_errors;
#define CARP_SENDAD_MAX_ERRORS 3
- int sc_sendad_success;
+ int sc_sendad_success;
#define CARP_SENDAD_MIN_SUCCESS 3
- int sc_vhid;
- int sc_advskew;
- int sc_naddrs;
- int sc_naddrs6;
- int sc_advbase; /* seconds */
- int sc_init_counter;
- u_int64_t sc_counter;
+ int sc_init_counter;
+ uint64_t sc_counter;
/* authentication */
-#define CARP_HMAC_PAD 64
+#define CARP_HMAC_PAD 64
unsigned char sc_key[CARP_KEY_LEN];
unsigned char sc_pad[CARP_HMAC_PAD];
SHA1_CTX sc_sha1;
- struct callout sc_ad_tmo; /* advertisement timeout */
- struct callout sc_md_tmo; /* master down timeout */
- struct callout sc_md6_tmo; /* master down timeout */
-
- LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
+ TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */
+ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */
};
-#define SC2IFP(sc) ((sc)->sc_ifp)
-
-int carp_suppress_preempt = 0;
-int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
-SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
- &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
- &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
-SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
- &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
- &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
-SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
- &carp_suppress_preempt, 0, "Preemption is suppressed");
-
-struct carpstats carpstats;
-SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
- &carpstats, carpstats,
- "CARP statistics (struct carpstats, netinet/ip_carp.h)");
struct carp_if {
- TAILQ_HEAD(, carp_softc) vhif_vrs;
- int vhif_nvrs;
-
- struct ifnet *vhif_ifp;
- struct mtx vhif_mtx;
+#ifdef INET
+ int cif_naddrs;
+#endif
+#ifdef INET6
+ int cif_naddrs6;
+#endif
+ TAILQ_HEAD(, carp_softc) cif_vrs;
+#ifdef INET
+ struct ip_moptions cif_imo;
+#endif
+#ifdef INET6
+ struct ip6_moptions cif_im6o;
+#endif
+ struct ifnet *cif_ifp;
+ struct mtx cif_mtx;
+ uint32_t cif_flags;
+#define CIF_PROMISC 0x00000001
};
#define CARP_INET 0
#define CARP_INET6 1
static int proto_reg[] = {-1, -1};
-/* Get carp_if from softc. Valid after carp_set_addr{,6}. */
-#define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
+/*
+ * Brief design of carp(4).
+ *
+ * Any carp-capable ifnet may have a list of carp softcs hanging off
+ * its ifp->if_carp pointer. Each softc represents one unique virtual
+ * host id, or vhid. The softc has a back pointer to the ifnet. All
+ * softcs are joined in a global list, which has quite limited use.
+ *
+ * Any interface address that takes part in CARP negotiation has a
+ * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
+ * AF_INET or AF_INET6 address.
+ *
+ * Although, one can get the softc's backpointer to ifnet and traverse
+ * through its ifp->if_addrhead queue to find all interface addresses
+ * involved in CARP, we keep a growable array of ifaddr pointers. This
+ * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
+ * do calls into the network stack, thus avoiding LORs.
+ *
+ * Locking:
+ *
+ * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
+ * callout-driven events and ioctl()s.
+ *
+ * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
+ * traverse the global list we use the mutex carp_mtx.
+ *
+ * Known issues with locking:
+ *
+ * - Sending ad, we put the pointer to the softc in an mtag, and no reference
+ * counting is done on the softc.
+ * - On module unload we may race (?) with packet processing thread
+ * dereferencing our function pointers.
+ */
+
+/* Accept incoming CARP packets. */
+static VNET_DEFINE(int, carp_allow) = 1;
+#define V_carp_allow VNET(carp_allow)
-/* lock per carp_if queue */
-#define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \
- NULL, MTX_DEF)
-#define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx)
-#define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
-#define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx)
-#define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx)
+/* Preempt slower nodes. */
+static VNET_DEFINE(int, carp_preempt) = 0;
+#define V_carp_preempt VNET(carp_preempt)
-#define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx)
-#define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx)
-#define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
+/* Log level. */
+static VNET_DEFINE(int, carp_log) = 1;
+#define V_carp_log VNET(carp_log)
+
+/* Global advskew demotion. */
+static VNET_DEFINE(int, carp_demotion) = 0;
+#define V_carp_demotion VNET(carp_demotion)
+
+/* Send error demotion factor. */
+static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
+#define V_carp_senderr_adj VNET(carp_senderr_adj)
+
+/* Iface down demotion factor. */
+static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
+#define V_carp_ifdown_adj VNET(carp_ifdown_adj)
+
+static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(carp_log), 0, "CARP log level");
+SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ 0, 0, carp_demote_adj_sysctl, "I",
+ "Adjust demotion factor (skew of advskew)");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
+ CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
+ CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(carp_ifdown_adj), 0,
+ "Interface down demotion factor adjustment");
+
+VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
+VNET_PCPUSTAT_SYSINIT(carpstats);
+VNET_PCPUSTAT_SYSUNINIT(carpstats);
+
+#define CARPSTATS_ADD(name, val) \
+ counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
+ sizeof(uint64_t)], (val))
+#define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1)
+
+SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
+ carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
+
+#define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \
+ NULL, MTX_DEF)
+#define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx)
+#define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+#define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx)
+#define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
+#define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \
+ NULL, MTX_DEF)
+#define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx)
+#define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED)
+#define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx)
+#define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx)
+#define CIF_FREE(cif) do { \
+ CIF_LOCK(cif); \
+ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \
+ carp_free_if(cif); \
+ else \
+ CIF_UNLOCK(cif); \
+} while (0)
#define CARP_LOG(...) do { \
- if (carp_opts[CARPCTL_LOG] > 0) \
- log(LOG_INFO, __VA_ARGS__); \
+ if (V_carp_log > 0) \
+ log(LOG_INFO, "carp: " __VA_ARGS__); \
} while (0)
#define CARP_DEBUG(...) do { \
- if (carp_opts[CARPCTL_LOG] > 1) \
+ if (V_carp_log > 1) \
log(LOG_DEBUG, __VA_ARGS__); \
} while (0)
-static void carp_hmac_prepare(struct carp_softc *);
-static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
- unsigned char *);
-static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
- unsigned char *);
-static void carp_setroute(struct carp_softc *, int);
-static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
-static int carp_clone_create(struct if_clone *, int, caddr_t);
-static void carp_clone_destroy(struct ifnet *);
-static void carpdetach(struct carp_softc *, int);
-static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
- struct carp_header *);
-static void carp_send_ad_all(void);
-static void carp_send_ad(void *);
-static void carp_send_ad_locked(struct carp_softc *);
-#ifdef INET
-static void carp_send_arp(struct carp_softc *);
-#endif
-static void carp_master_down(void *);
-static void carp_master_down_locked(struct carp_softc *);
-static int carp_ioctl(struct ifnet *, u_long, caddr_t);
-static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
-static void carp_start(struct ifnet *);
-static void carp_setrun(struct carp_softc *, sa_family_t);
-static void carp_set_state(struct carp_softc *, int);
-#ifdef INET
-static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
-#endif
-enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+#define IFNET_FOREACH_IFA(ifp, ifa) \
+ IF_ADDR_LOCK_ASSERT(ifp); \
+ TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
+ if ((ifa)->ifa_carp != NULL)
-#ifdef INET
-static void carp_multicast_cleanup(struct carp_softc *, int dofree);
-static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
-static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
-#endif
-static void carp_carpdev_state_locked(struct carp_if *);
-static void carp_sc_state_locked(struct carp_softc *);
-#ifdef INET6
-static void carp_send_na(struct carp_softc *);
-static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static void carp_multicast6_cleanup(struct carp_softc *, int dofree);
-#endif
+#define CARP_FOREACH_IFA(sc, ifa) \
+ CARP_LOCK_ASSERT(sc); \
+ for (int _i = 0; \
+ _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \
+ ((ifa) = sc->sc_ifas[_i]) != NULL; \
+ ++_i)
-static LIST_HEAD(, carp_softc) carpif_list;
-static struct mtx carp_mtx;
-IFC_SIMPLE_DECLARE(carp, 0);
+#define IFNET_FOREACH_CARP(ifp, sc) \
+ CIF_LOCK_ASSERT(ifp->if_carp); \
+ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
-static eventhandler_tag if_detach_event_tag;
+#define DEMOTE_ADVSKEW(sc) \
+ (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \
+ CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion))
-static __inline u_int16_t
-carp_cksum(struct mbuf *m, int len)
-{
- return (in_cksum(m, len));
-}
+static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
+static struct carp_softc
+ *carp_alloc(struct ifnet *);
+static void carp_destroy(struct carp_softc *);
+static struct carp_if
+ *carp_alloc_if(struct ifnet *);
+static void carp_free_if(struct carp_if *);
+static void carp_set_state(struct carp_softc *, int, const char* reason);
+static void carp_sc_state(struct carp_softc *);
+static void carp_setrun(struct carp_softc *, sa_family_t);
+static void carp_master_down(void *);
+static void carp_master_down_locked(struct carp_softc *,
+ const char* reason);
+static void carp_send_ad(void *);
+static void carp_send_ad_locked(struct carp_softc *);
+static void carp_addroute(struct carp_softc *);
+static void carp_ifa_addroute(struct ifaddr *);
+static void carp_delroute(struct carp_softc *);
+static void carp_ifa_delroute(struct ifaddr *);
+static void carp_send_ad_all(void *, int);
+static void carp_demote_adj(int, char *);
+
+static LIST_HEAD(, carp_softc) carp_list;
+static struct mtx carp_mtx;
+static struct sx carp_sx;
+static struct task carp_sendall_task =
+ TASK_INITIALIZER(0, carp_send_ad_all, NULL);
static void
carp_hmac_prepare(struct carp_softc *sc)
{
- u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
- u_int8_t vhid = sc->sc_vhid & 0xff;
+ uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+ uint8_t vhid = sc->sc_vhid & 0xff;
struct ifaddr *ifa;
int i, found;
#ifdef INET
@@ -271,18 +337,15 @@ carp_hmac_prepare(struct carp_softc *sc)
struct in6_addr last6, cur6, in6;
#endif
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
- /* XXX: possible race here */
-
- /* compute ipad from key */
+ /* Compute ipad from key. */
bzero(sc->sc_pad, sizeof(sc->sc_pad));
bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36;
- /* precompute first part of inner hash */
+ /* Precompute first part of inner hash. */
SHA1Init(&sc->sc_sha1);
SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
@@ -294,8 +357,7 @@ carp_hmac_prepare(struct carp_softc *sc)
found = 0;
last = cur;
cur.s_addr = 0xffffffff;
- IF_ADDR_RLOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
if (ifa->ifa_addr->sa_family == AF_INET &&
ntohl(in.s_addr) > ntohl(last.s_addr) &&
@@ -304,7 +366,6 @@ carp_hmac_prepare(struct carp_softc *sc)
found++;
}
}
- IF_ADDR_RUNLOCK(SC2IFP(sc));
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
} while (found);
@@ -315,8 +376,7 @@ carp_hmac_prepare(struct carp_softc *sc)
found = 0;
last6 = cur6;
memset(&cur6, 0xff, sizeof(cur6));
- IF_ADDR_RLOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
if (IN6_IS_SCOPE_EMBED(&in6))
in6.s6_addr16[1] = 0;
@@ -327,7 +387,6 @@ carp_hmac_prepare(struct carp_softc *sc)
found++;
}
}
- IF_ADDR_RUNLOCK(SC2IFP(sc));
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
} while (found);
@@ -336,17 +395,16 @@ carp_hmac_prepare(struct carp_softc *sc)
/* convert ipad to opad */
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36 ^ 0x5c;
-
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
}
static void
-carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
unsigned char md[20])
{
SHA1_CTX sha1ctx;
+ CARP_LOCK_ASSERT(sc);
+
/* fetch first half of inner hash */
bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
@@ -361,260 +419,68 @@ carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
}
static int
-carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
unsigned char md[20])
{
unsigned char md2[20];
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
carp_hmac_generate(sc, counter, md2);
return (bcmp(md, md2, sizeof(md2)));
}
-static void
-carp_setroute(struct carp_softc *sc, int cmd)
-{
- struct ifaddr *ifa;
- int s;
-
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
-
- s = splnet();
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-#ifdef INET
- if (ifa->ifa_addr->sa_family == AF_INET &&
- sc->sc_carpdev != NULL) {
- int count = carp_addrcount(
- (struct carp_if *)sc->sc_carpdev->if_carp,
- ifatoia(ifa), CARP_COUNT_MASTER);
-
- if ((cmd == RTM_ADD && count == 1) ||
- (cmd == RTM_DELETE && count == 0))
- rtinit(ifa, cmd, RTF_UP | RTF_HOST);
- }
-#endif
- }
- splx(s);
-}
-
-static int
-carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
-
- struct carp_softc *sc;
- struct ifnet *ifp;
-
- sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
- ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
- free(sc, M_CARP);
- return (ENOSPC);
- }
-
- sc->sc_flags_backup = 0;
- sc->sc_suppress = 0;
- sc->sc_advbase = CARP_DFLTINTV;
- sc->sc_vhid = -1; /* required setting */
- sc->sc_advskew = 0;
- sc->sc_init_counter = 1;
- sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
-#ifdef INET
- sc->sc_imo.imo_membership = (struct in_multi **)malloc(
- (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
- M_WAITOK);
- sc->sc_imo.imo_mfilters = NULL;
- sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
- sc->sc_imo.imo_multicast_vif = -1;
-#endif
-#ifdef INET6
- sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
- (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
- M_WAITOK);
- sc->sc_im6o.im6o_mfilters = NULL;
- sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
- sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
-#endif
-
- callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
- callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
- callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
-
- ifp->if_softc = sc;
- if_initname(ifp, CARP_IFNAME, unit);
- ifp->if_mtu = ETHERMTU;
- ifp->if_flags = IFF_LOOPBACK;
- ifp->if_ioctl = carp_ioctl;
- ifp->if_output = carp_looutput;
- ifp->if_start = carp_start;
- ifp->if_type = IFT_CARP;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- ifp->if_hdrlen = 0;
- if_attach(ifp);
- bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- mtx_lock(&carp_mtx);
- LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
- mtx_unlock(&carp_mtx);
- return (0);
-}
-
-static void
-carp_clone_destroy(struct ifnet *ifp)
-{
- struct carp_softc *sc = ifp->if_softc;
-
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carpdetach(sc, 1); /* Returns unlocked. */
-
- mtx_lock(&carp_mtx);
- LIST_REMOVE(sc, sc_next);
- mtx_unlock(&carp_mtx);
- bpfdetach(ifp);
- if_detach(ifp);
- if_free_type(ifp, IFT_ETHER);
-#ifdef INET
- free(sc->sc_imo.imo_membership, M_CARP);
-#endif
-#ifdef INET6
- free(sc->sc_im6o.im6o_membership, M_CARP);
-#endif
- free(sc, M_CARP);
-}
-
-/*
- * This function can be called on CARP interface destroy path,
- * and in case of the removal of the underlying interface as
- * well. We differentiate these two cases: in case of destruction
- * of the underlying interface, we do not cleanup our multicast
- * memberships, since they are already freed. But we purge pointers
- * to multicast structures, since they are no longer valid, to
- * avoid panic in future calls to carpdetach(). Also, we do not
- * release the lock on return, because the function will be
- * called once more, for another CARP instance on the same
- * interface.
- */
-static void
-carpdetach(struct carp_softc *sc, int unlock)
-{
- struct carp_if *cif;
-
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
-
- if (sc->sc_suppress)
- carp_suppress_preempt--;
- sc->sc_suppress = 0;
-
- if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
- carp_suppress_preempt--;
- sc->sc_sendad_errors = 0;
-
- carp_set_state(sc, INIT);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- carp_setrun(sc, 0);
-#ifdef INET
- carp_multicast_cleanup(sc, unlock);
-#endif
-#ifdef INET6
- carp_multicast6_cleanup(sc, unlock);
-#endif
-
- if (sc->sc_carpdev != NULL) {
- cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- CARP_LOCK_ASSERT(cif);
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- ifpromisc(sc->sc_carpdev, 0);
- sc->sc_carpdev->if_carp = NULL;
- CARP_LOCK_DESTROY(cif);
- free(cif, M_CARP);
- } else if (unlock)
- CARP_UNLOCK(cif);
- sc->sc_carpdev = NULL;
- }
-}
-
-/* Detach an interface from the carp. */
-static void
-carp_ifdetach(void *arg __unused, struct ifnet *ifp)
-{
- struct carp_if *cif = (struct carp_if *)ifp->if_carp;
- struct carp_softc *sc, *nextsc;
-
- if (cif == NULL)
- return;
-
- /*
- * XXX: At the end of for() cycle the lock will be destroyed.
- */
- CARP_LOCK(cif);
- for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
- nextsc = TAILQ_NEXT(sc, sc_list);
- carpdetach(sc, 0);
- }
-}
-
/*
* process input packet.
* we have rearranged checks order compared to the rfc,
* but it seems more efficient this way or not possible otherwise.
*/
#ifdef INET
-void
-carp_input(struct mbuf *m, int hlen)
+int
+carp_input(struct mbuf **mp, int *offp, int proto)
{
+ struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
struct carp_header *ch;
int iplen, len;
- CARPSTATS_INC(carps_ipackets);
+ iplen = *offp;
+ *mp = NULL;
- if (!carp_opts[CARPCTL_ALLOW]) {
- m_freem(m);
- return;
- }
+ CARPSTATS_INC(carps_ipackets);
- /* check if received on a valid carp interface */
- if (m->m_pkthdr.rcvif->if_carp == NULL) {
- CARPSTATS_INC(carps_badif);
- CARP_DEBUG("carp_input: packet received on non-carp "
- "interface: %s\n",
- m->m_pkthdr.rcvif->if_xname);
+ if (!V_carp_allow) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* verify that the IP TTL is 255. */
if (ip->ip_ttl != CARP_DFLTTL) {
CARPSTATS_INC(carps_badttl);
- CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
+ CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
ip->ip_ttl,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
iplen = ip->ip_hl << 2;
if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp_input: received len %zd < "
- "sizeof(struct carp_header) on %s\n",
- m->m_len - sizeof(struct ip),
+ CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
+ "on %s\n", __func__, m->m_len - sizeof(struct ip),
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
if (iplen + sizeof(*ch) < m->m_len) {
if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
CARPSTATS_INC(carps_hdrops);
- CARP_DEBUG("carp_input: pullup failed\n");
- return;
+ CARP_DEBUG("%s: pullup failed\n", __func__);
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
}
@@ -627,32 +493,33 @@ carp_input(struct mbuf *m, int hlen)
len = iplen + sizeof(*ch);
if (len > m->m_pkthdr.len) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp_input: packet too short %d on %s\n",
+ CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
m->m_pkthdr.len,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
if ((m = m_pullup(m, len)) == NULL) {
CARPSTATS_INC(carps_hdrops);
- return;
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
ch = (struct carp_header *)((char *)ip + iplen);
/* verify the CARP checksum */
m->m_data += iplen;
- if (carp_cksum(m, len - iplen)) {
+ if (in_cksum(m, len - iplen)) {
CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("carp_input: checksum failed on %s\n",
+ CARP_DEBUG("%s: checksum failed on %s\n", __func__,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
m->m_data -= iplen;
carp_input_c(m, ch, AF_INET);
+ return (IPPROTO_DONE);
}
#endif
@@ -667,7 +534,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
CARPSTATS_INC(carps_ipackets6);
- if (!carp_opts[CARPCTL_ALLOW]) {
+ if (!V_carp_allow) {
m_freem(m);
return (IPPROTO_DONE);
}
@@ -675,9 +542,8 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
/* check if received on a valid carp interface */
if (m->m_pkthdr.rcvif->if_carp == NULL) {
CARPSTATS_INC(carps_badif);
- CARP_DEBUG("carp6_input: packet received on non-carp "
- "interface: %s\n",
- m->m_pkthdr.rcvif->if_xname);
+ CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
+ __func__, m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -685,9 +551,8 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
/* verify that the IP TTL is 255 */
if (ip6->ip6_hlim != CARP_DFLTTL) {
CARPSTATS_INC(carps_badttl);
- CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
- ip6->ip6_hlim,
- m->m_pkthdr.rcvif->if_xname);
+ CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
+ ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -697,16 +562,16 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
if (ch == NULL) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp6_input: packet size %u too small\n", len);
+ CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
return (IPPROTO_DONE);
}
/* verify the CARP checksum */
m->m_data += *offp;
- if (carp_cksum(m, sizeof(*ch))) {
+ if (in_cksum(m, sizeof(*ch))) {
CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("carp6_input: checksum failed, on %s\n",
+ CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
@@ -722,62 +587,46 @@ static void
carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
+ struct ifaddr *ifa;
struct carp_softc *sc;
- u_int64_t tmp_counter;
+ uint64_t tmp_counter;
struct timeval sc_tv, ch_tv;
/* verify that the VHID is valid on the receiving interface */
- CARP_LOCK(ifp->if_carp);
- TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
- if (sc->sc_vhid == ch->carp_vhid)
+ IF_ADDR_RLOCK(ifp);
+ IFNET_FOREACH_IFA(ifp, ifa)
+ if (ifa->ifa_addr->sa_family == af &&
+ ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
+ ifa_ref(ifa);
break;
+ }
+ IF_ADDR_RUNLOCK(ifp);
- if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
+ if (ifa == NULL) {
CARPSTATS_INC(carps_badvhid);
- CARP_UNLOCK(ifp->if_carp);
m_freem(m);
return;
}
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_ipackets++;
- SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
- if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
- uint32_t af1 = af;
-#ifdef INET
- struct ip *ip = mtod(m, struct ip *);
-
- /* BPF wants net byte order */
- if (af == AF_INET) {
- ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
- ip->ip_off = htons(ip->ip_off);
- }
-#endif
- bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
- }
-
/* verify the CARP version. */
if (ch->carp_version != CARP_VERSION) {
CARPSTATS_INC(carps_badver);
- SC2IFP(sc)->if_ierrors++;
- CARP_UNLOCK(ifp->if_carp);
- CARP_DEBUG("%s; invalid version %d\n",
- SC2IFP(sc)->if_xname,
+ CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
ch->carp_version);
+ ifa_free(ifa);
m_freem(m);
return;
}
- /* verify the hash */
+ sc = ifa->ifa_carp;
+ CARP_LOCK(sc);
+ ifa_free(ifa);
+
if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
CARPSTATS_INC(carps_badauth);
- SC2IFP(sc)->if_ierrors++;
- CARP_UNLOCK(ifp->if_carp);
- CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
- m_freem(m);
- return;
+ CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
+ sc->sc_vhid, ifp->if_xname);
+ goto out;
}
tmp_counter = ntohl(ch->carp_counter[0]);
@@ -790,10 +639,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
sc->sc_counter = tmp_counter;
sc_tv.tv_sec = sc->sc_advbase;
- if (carp_suppress_preempt && sc->sc_advskew < 240)
- sc_tv.tv_usec = 240 * 1000000 / 256;
- else
- sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
ch_tv.tv_sec = ch->carp_advbase;
ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
@@ -808,12 +654,10 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
if (timevalcmp(&sc_tv, &ch_tv, >) ||
timevalcmp(&sc_tv, &ch_tv, ==)) {
callout_stop(&sc->sc_ad_tmo);
- CARP_LOG("%s: MASTER -> BACKUP "
- "(more frequent advertisement received)\n",
- SC2IFP(sc)->if_xname);
- carp_set_state(sc, BACKUP);
+ carp_set_state(sc, BACKUP,
+ "more frequent advertisement received");
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_DELETE);
+ carp_delroute(sc);
}
break;
case BACKUP:
@@ -821,12 +665,9 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
* If we're pre-empting masters who advertise slower than us,
* and this one claims to be slower, treat him as down.
*/
- if (carp_opts[CARPCTL_PREEMPT] &&
- timevalcmp(&sc_tv, &ch_tv, <)) {
- CARP_LOG("%s: BACKUP -> MASTER "
- "(preempting a slower master)\n",
- SC2IFP(sc)->if_xname);
- carp_master_down_locked(sc);
+ if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
+ carp_master_down_locked(sc,
+ "preempting a slower master");
break;
}
@@ -837,10 +678,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
*/
sc_tv.tv_sec = sc->sc_advbase * 3;
if (timevalcmp(&sc_tv, &ch_tv, <)) {
- CARP_LOG("%s: BACKUP -> MASTER "
- "(master timed out)\n",
- SC2IFP(sc)->if_xname);
- carp_master_down_locked(sc);
+ carp_master_down_locked(sc, "master will time out");
break;
}
@@ -852,17 +690,15 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
break;
}
- CARP_UNLOCK(ifp->if_carp);
-
+out:
+ CARP_UNLOCK(sc);
m_freem(m);
- return;
}
static int
carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
{
struct m_tag *mtag;
- struct ifnet *ifp = SC2IFP(sc);
if (sc->sc_init_counter) {
/* this could also be seconds since unix epoch */
@@ -878,45 +714,79 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
/* Tag packet for carp_output */
- mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL) {
+ if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
+ M_NOWAIT)) == NULL) {
m_freem(m);
- SC2IFP(sc)->if_oerrors++;
+ CARPSTATS_INC(carps_onomem);
return (ENOMEM);
}
- bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ bcopy(&sc, mtag + 1, sizeof(sc));
m_tag_prepend(m, mtag);
return (0);
}
+/*
+ * To avoid LORs and possible recursions this function shouldn't
+ * be called directly, but scheduled via taskqueue.
+ */
static void
-carp_send_ad_all(void)
+carp_send_ad_all(void *ctx __unused, int pending __unused)
{
struct carp_softc *sc;
mtx_lock(&carp_mtx);
- LIST_FOREACH(sc, &carpif_list, sc_next) {
- if (sc->sc_carpdev == NULL)
- continue;
- CARP_SCLOCK(sc);
- if ((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
- sc->sc_state == MASTER)
+ LIST_FOREACH(sc, &carp_list, sc_next)
+ if (sc->sc_state == MASTER) {
+ CARP_LOCK(sc);
+ CURVNET_SET(sc->sc_carpdev->if_vnet);
carp_send_ad_locked(sc);
- CARP_SCUNLOCK(sc);
- }
+ CURVNET_RESTORE();
+ CARP_UNLOCK(sc);
+ }
mtx_unlock(&carp_mtx);
}
+/* Send a periodic advertisement, executed in callout context. */
static void
carp_send_ad(void *v)
{
struct carp_softc *sc = v;
- CARP_SCLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
+ CURVNET_SET(sc->sc_carpdev->if_vnet);
carp_send_ad_locked(sc);
- CARP_SCUNLOCK(sc);
+ CURVNET_RESTORE();
+ CARP_UNLOCK(sc);
+}
+
+static void
+carp_send_ad_error(struct carp_softc *sc, int error)
+{
+
+ if (error) {
+ if (sc->sc_sendad_errors < INT_MAX)
+ sc->sc_sendad_errors++;
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+ static const char fmt[] = "send error %d on %s";
+ char msg[sizeof(fmt) + IFNAMSIZ];
+
+ sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
+ carp_demote_adj(V_carp_senderr_adj, msg);
+ }
+ sc->sc_sendad_success = 0;
+ } else {
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS &&
+ ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
+ static const char fmt[] = "send ok on %s";
+ char msg[sizeof(fmt) + IFNAMSIZ];
+
+ sprintf(msg, fmt, sc->sc_carpdev->if_xname);
+ carp_demote_adj(-V_carp_senderr_adj, msg);
+ sc->sc_sendad_errors = 0;
+ } else
+ sc->sc_sendad_errors = 0;
+ }
}
static void
@@ -924,190 +794,211 @@ carp_send_ad_locked(struct carp_softc *sc)
{
struct carp_header ch;
struct timeval tv;
+ struct sockaddr sa;
+ struct ifaddr *ifa;
struct carp_header *ch_ptr;
struct mbuf *m;
- int len, advbase, advskew;
+ int len, advskew;
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
- /* bow out if we've lost our UPness or RUNNINGuiness */
- if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
- advbase = 255;
- advskew = 255;
- } else {
- advbase = sc->sc_advbase;
- if (!carp_suppress_preempt || sc->sc_advskew > 240)
- advskew = sc->sc_advskew;
- else
- advskew = 240;
- tv.tv_sec = advbase;
- tv.tv_usec = advskew * 1000000 / 256;
- }
+ advskew = DEMOTE_ADVSKEW(sc);
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = advskew * 1000000 / 256;
ch.carp_version = CARP_VERSION;
ch.carp_type = CARP_ADVERTISEMENT;
ch.carp_vhid = sc->sc_vhid;
- ch.carp_advbase = advbase;
+ ch.carp_advbase = sc->sc_advbase;
ch.carp_advskew = advskew;
ch.carp_authlen = 7; /* XXX DEFINE */
ch.carp_pad1 = 0; /* must be zero */
ch.carp_cksum = 0;
+ /* XXXGL: OpenBSD picks first ifaddr with needed family. */
+
#ifdef INET
- if (sc->sc_ia) {
+ if (sc->sc_naddrs) {
struct ip *ip;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
- SC2IFP(sc)->if_oerrors++;
CARPSTATS_INC(carps_onomem);
- /* XXX maybe less ? */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
- return;
+ goto resched;
}
len = sizeof(*ip) + sizeof(ch);
m->m_pkthdr.len = len;
m->m_pkthdr.rcvif = NULL;
m->m_len = len;
- MH_ALIGN(m, m->m_len);
+ M_ALIGN(m, m->m_len);
m->m_flags |= M_MCAST;
ip = mtod(m, struct ip *);
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(*ip) >> 2;
ip->ip_tos = IPTOS_LOWDELAY;
- ip->ip_len = len;
- ip->ip_id = ip_newid();
- ip->ip_off = IP_DF;
+ ip->ip_len = htons(len);
+ ip->ip_off = htons(IP_DF);
ip->ip_ttl = CARP_DFLTTL;
ip->ip_p = IPPROTO_CARP;
ip->ip_sum = 0;
- ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+ ip_fillid(ip);
+
+ bzero(&sa, sizeof(sa));
+ sa.sa_family = AF_INET;
+ ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+ if (ifa != NULL) {
+ ip->ip_src.s_addr =
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+ ifa_free(ifa);
+ } else
+ ip->ip_src.s_addr = 0;
ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
ch_ptr = (struct carp_header *)(&ip[1]);
bcopy(&ch, ch_ptr, sizeof(ch));
if (carp_prepare_ad(m, sc, ch_ptr))
- return;
+ goto resched;
m->m_data += sizeof(*ip);
- ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+ ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
m->m_data -= sizeof(*ip);
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_opackets++;
- SC2IFP(sc)->if_obytes += len;
CARPSTATS_INC(carps_opackets);
- if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
- SC2IFP(sc)->if_oerrors++;
- if (sc->sc_sendad_errors < INT_MAX)
- sc->sc_sendad_errors++;
- if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
- }
- sc->sc_sendad_success = 0;
- } else {
- if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
- if (++sc->sc_sendad_success >=
- CARP_SENDAD_MIN_SUCCESS) {
- carp_suppress_preempt--;
- sc->sc_sendad_errors = 0;
- }
- } else
- sc->sc_sendad_errors = 0;
- }
+ carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
+ &sc->sc_carpdev->if_carp->cif_imo, NULL));
}
#endif /* INET */
#ifdef INET6
- if (sc->sc_ia6) {
+ if (sc->sc_naddrs6) {
struct ip6_hdr *ip6;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
- SC2IFP(sc)->if_oerrors++;
CARPSTATS_INC(carps_onomem);
- /* XXX maybe less ? */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
- return;
+ goto resched;
}
len = sizeof(*ip6) + sizeof(ch);
m->m_pkthdr.len = len;
m->m_pkthdr.rcvif = NULL;
m->m_len = len;
- MH_ALIGN(m, m->m_len);
+ M_ALIGN(m, m->m_len);
m->m_flags |= M_MCAST;
ip6 = mtod(m, struct ip6_hdr *);
bzero(ip6, sizeof(*ip6));
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_hlim = CARP_DFLTTL;
ip6->ip6_nxt = IPPROTO_CARP;
- bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
- sizeof(struct in6_addr));
- /* set the multicast destination */
+ bzero(&sa, sizeof(sa));
+
+ /* set the source address */
+ sa.sa_family = AF_INET6;
+ ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+ if (ifa != NULL) {
+ bcopy(IFA_IN6(ifa), &ip6->ip6_src,
+ sizeof(struct in6_addr));
+ ifa_free(ifa);
+ } else
+ /* This should never happen with IPv6. */
+ bzero(&ip6->ip6_src, sizeof(struct in6_addr));
+ /* Set the multicast destination. */
ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
ip6->ip6_dst.s6_addr8[15] = 0x12;
if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
- SC2IFP(sc)->if_oerrors++;
m_freem(m);
CARP_DEBUG("%s: in6_setscope failed\n", __func__);
- return;
+ goto resched;
}
ch_ptr = (struct carp_header *)(&ip6[1]);
bcopy(&ch, ch_ptr, sizeof(ch));
if (carp_prepare_ad(m, sc, ch_ptr))
- return;
+ goto resched;
m->m_data += sizeof(*ip6);
- ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
+ ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
m->m_data -= sizeof(*ip6);
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_opackets++;
- SC2IFP(sc)->if_obytes += len;
CARPSTATS_INC(carps_opackets6);
- if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
- SC2IFP(sc)->if_oerrors++;
- if (sc->sc_sendad_errors < INT_MAX)
- sc->sc_sendad_errors++;
- if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
- }
- sc->sc_sendad_success = 0;
- } else {
- if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
- if (++sc->sc_sendad_success >=
- CARP_SENDAD_MIN_SUCCESS) {
- carp_suppress_preempt--;
- sc->sc_sendad_errors = 0;
- }
- } else
- sc->sc_sendad_errors = 0;
- }
+ carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
+ &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
}
#endif /* INET6 */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
+resched:
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
+}
+
+static void
+carp_addroute(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ CARP_FOREACH_IFA(sc, ifa)
+ carp_ifa_addroute(ifa);
+}
+
+static void
+carp_ifa_addroute(struct ifaddr *ifa)
+{
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ in_addprefix(ifatoia(ifa), RTF_UP);
+ ifa_add_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ifa_add_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+ nd6_add_ifa_lle(ifatoia6(ifa));
+ break;
+#endif
+ }
+}
+static void
+carp_delroute(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ CARP_FOREACH_IFA(sc, ifa)
+ carp_ifa_delroute(ifa);
+}
+
+static void
+carp_ifa_delroute(struct ifaddr *ifa)
+{
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ ifa_del_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+ in_scrubprefix(ifatoia(ifa), LLE_STATIC);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ifa_del_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+ nd6_rem_ifa_lle(ifatoia6(ifa), 1);
+ break;
+#endif
+ }
+}
+
+int
+carp_master(struct ifaddr *ifa)
+{
+ struct carp_softc *sc = ifa->ifa_carp;
+
+ return (sc->sc_state == MASTER);
}
#ifdef INET
@@ -1120,17 +1011,27 @@ static void
carp_send_arp(struct carp_softc *sc)
{
struct ifaddr *ifa;
+ struct in_addr addr;
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-
+ CARP_FOREACH_IFA(sc, ifa) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
+ addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
+ arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
+ }
+}
-/* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
- arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
+int
+carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
+{
+ struct carp_softc *sc = ifa->ifa_carp;
- DELAY(1000); /* XXX */
+ if (sc->sc_state == MASTER) {
+ *enaddr = LLADDR(&sc->sc_addr);
+ return (1);
}
+
+ return (0);
}
#endif
@@ -1138,262 +1039,148 @@ carp_send_arp(struct carp_softc *sc)
static void
carp_send_na(struct carp_softc *sc)
{
+ static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
struct ifaddr *ifa;
struct in6_addr *in6;
- static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
-
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
- in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
+ in6 = IFA_IN6(ifa);
nd6_na_output(sc->sc_carpdev, &mcast, in6,
ND_NA_FLAG_OVERRIDE, 1, NULL);
DELAY(1000); /* XXX */
}
}
-#endif /* INET6 */
-
-#ifdef INET
-static int
-carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
-{
- struct carp_softc *vh;
- struct ifaddr *ifa;
- int count = 0;
-
- CARP_LOCK_ASSERT(cif);
-
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((type == CARP_COUNT_RUNNING &&
- (SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
- (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
- IF_ADDR_RLOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
- ifa_list) {
- if (ifa->ifa_addr->sa_family == AF_INET &&
- ia->ia_addr.sin_addr.s_addr ==
- ifatoia(ifa)->ia_addr.sin_addr.s_addr)
- count++;
- }
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- }
- }
- return (count);
-}
-
-int
-carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
- struct in_addr *isaddr, u_int8_t **enaddr)
-{
- struct carp_if *cif;
- struct carp_softc *vh;
- int index, count = 0;
- struct ifaddr *ifa;
-
- cif = ifp->if_carp;
- CARP_LOCK(cif);
-
- if (carp_opts[CARPCTL_ARPBALANCE]) {
- /*
- * XXX proof of concept implementation.
- * We use the source ip to decide which virtual host should
- * handle the request. If we're master of that virtual host,
- * then we respond, otherwise, just drop the arp packet on
- * the floor.
- */
- count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
- if (count == 0) {
- /* should never reach this */
- CARP_UNLOCK(cif);
- return (0);
- }
-
- /* this should be a hash, like pf_hash() */
- index = ntohl(isaddr->s_addr) % count;
- count = 0;
-
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
- IF_ADDR_RLOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
- ifa_list) {
- if (ifa->ifa_addr->sa_family ==
- AF_INET &&
- ia->ia_addr.sin_addr.s_addr ==
- ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
- if (count == index) {
- if (vh->sc_state ==
- MASTER) {
- *enaddr = IF_LLADDR(vh->sc_ifp);
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (1);
- } else {
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (0);
- }
- }
- count++;
- }
- }
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- }
- }
- } else {
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- ia->ia_ifp == SC2IFP(vh) &&
- vh->sc_state == MASTER) {
- *enaddr = IF_LLADDR(vh->sc_ifp);
- CARP_UNLOCK(cif);
- return (1);
- }
- }
- }
- CARP_UNLOCK(cif);
- return (0);
-}
-#endif
-#ifdef INET6
+/*
+ * Returns ifa in case it's a carp address and it is MASTER, or if the address
+ * matches and is not a carp address. Returns NULL otherwise.
+ */
struct ifaddr *
carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
{
- struct carp_if *cif;
- struct carp_softc *vh;
struct ifaddr *ifa;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- IF_ADDR_RLOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
- if (IN6_ARE_ADDR_EQUAL(taddr,
- &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- (SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- vh->sc_state == MASTER) {
- ifa_ref(ifa);
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (ifa);
- }
- }
- IF_ADDR_RUNLOCK(SC2IFP(vh));
+ ifa = NULL;
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
+ continue;
+ if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
+ ifa = NULL;
+ else
+ ifa_ref(ifa);
+ break;
}
- CARP_UNLOCK(cif);
-
- return (NULL);
+ IF_ADDR_RUNLOCK(ifp);
+
+ return (ifa);
}
caddr_t
carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
{
- struct m_tag *mtag;
- struct carp_if *cif;
- struct carp_softc *sc;
struct ifaddr *ifa;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
- IF_ADDR_RLOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
- if (IN6_ARE_ADDR_EQUAL(taddr,
- &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- (SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
- struct ifnet *ifp = SC2IFP(sc);
- mtag = m_tag_get(PACKET_TAG_CARP,
- sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL) {
- /* better a bit than nothing */
- IF_ADDR_RUNLOCK(SC2IFP(sc));
- CARP_UNLOCK(cif);
- return (IF_LLADDR(sc->sc_ifp));
- }
- bcopy(&ifp, (caddr_t)(mtag + 1),
- sizeof(struct ifnet *));
- m_tag_prepend(m, mtag);
+ IF_ADDR_RLOCK(ifp);
+ IFNET_FOREACH_IFA(ifp, ifa)
+ if (ifa->ifa_addr->sa_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
+ struct carp_softc *sc = ifa->ifa_carp;
+ struct m_tag *mtag;
- IF_ADDR_RUNLOCK(SC2IFP(sc));
- CARP_UNLOCK(cif);
- return (IF_LLADDR(sc->sc_ifp));
- }
+ IF_ADDR_RUNLOCK(ifp);
+
+ mtag = m_tag_get(PACKET_TAG_CARP,
+ sizeof(struct carp_softc *), M_NOWAIT);
+ if (mtag == NULL)
+ /* Better a bit than nothing. */
+ return (LLADDR(&sc->sc_addr));
+
+ bcopy(&sc, mtag + 1, sizeof(sc));
+ m_tag_prepend(m, mtag);
+
+ return (LLADDR(&sc->sc_addr));
}
- IF_ADDR_RUNLOCK(SC2IFP(sc));
- }
- CARP_UNLOCK(cif);
+ IF_ADDR_RUNLOCK(ifp);
return (NULL);
}
-#endif
+#endif /* INET6 */
-struct ifnet *
+int
carp_forus(struct ifnet *ifp, u_char *dhost)
{
- struct carp_if *cif;
- struct carp_softc *vh;
- u_int8_t *ena = dhost;
+ struct carp_softc *sc;
+ uint8_t *ena = dhost;
if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
- return (NULL);
-
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- vh->sc_state == MASTER &&
- !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
- CARP_UNLOCK(cif);
- return (SC2IFP(vh));
+ return (0);
+
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc) {
+ CARP_LOCK(sc);
+ if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
+ ETHER_ADDR_LEN)) {
+ CARP_UNLOCK(sc);
+ CIF_UNLOCK(ifp->if_carp);
+ return (1);
}
+ CARP_UNLOCK(sc);
+ }
+ CIF_UNLOCK(ifp->if_carp);
- CARP_UNLOCK(cif);
- return (NULL);
+ return (0);
}
+/* Master down timeout event, executed in callout context. */
static void
carp_master_down(void *v)
{
struct carp_softc *sc = v;
- CARP_SCLOCK(sc);
- carp_master_down_locked(sc);
- CARP_SCUNLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
+
+ CURVNET_SET(sc->sc_carpdev->if_vnet);
+ if (sc->sc_state == BACKUP) {
+ carp_master_down_locked(sc, "master timed out");
+ }
+ CURVNET_RESTORE();
+
+ CARP_UNLOCK(sc);
}
static void
-carp_master_down_locked(struct carp_softc *sc)
+carp_master_down_locked(struct carp_softc *sc, const char *reason)
{
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
+
+ CARP_LOCK_ASSERT(sc);
switch (sc->sc_state) {
- case INIT:
- printf("%s: master_down event in INIT state\n",
- SC2IFP(sc)->if_xname);
- break;
- case MASTER:
- break;
case BACKUP:
- carp_set_state(sc, MASTER);
+ carp_set_state(sc, MASTER, reason);
carp_send_ad_locked(sc);
#ifdef INET
carp_send_arp(sc);
#endif
#ifdef INET6
carp_send_na(sc);
-#endif /* INET6 */
+#endif
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_ADD);
+ carp_addroute(sc);
+ break;
+ case INIT:
+ case MASTER:
+#ifdef INVARIANTS
+ panic("carp: VHID %u@%s: master_down event in %s state\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname,
+ sc->sc_state ? "MASTER" : "INIT");
+#endif
break;
}
}
@@ -1407,28 +1194,16 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
{
struct timeval tv;
- if (sc->sc_carpdev == NULL) {
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- carp_set_state(sc, INIT);
- return;
- } else
- CARP_SCLOCK_ASSERT(sc);
-
- if (SC2IFP(sc)->if_flags & IFF_UP &&
- sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
- sc->sc_carpdev->if_link_state == LINK_STATE_UP)
- SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
- else {
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- carp_setroute(sc, RTM_DELETE);
+ CARP_LOCK_ASSERT(sc);
+
+ if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
+ sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
+ (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
return;
- }
switch (sc->sc_state) {
case INIT:
- CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
- carp_set_state(sc, BACKUP);
- carp_setroute(sc, RTM_DELETE);
+ carp_set_state(sc, BACKUP, "initialization complete");
carp_setrun(sc, 0);
break;
case BACKUP:
@@ -1441,20 +1216,24 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
-#endif /* INET */
+#endif
#ifdef INET6
case AF_INET6:
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
-#endif /* INET6 */
+#endif
default:
+#ifdef INET
if (sc->sc_naddrs)
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
+#endif
+#ifdef INET6
if (sc->sc_naddrs6)
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
+#endif
break;
}
break;
@@ -1467,842 +1246,779 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
}
}
-#ifdef INET
-static void
-carp_multicast_cleanup(struct carp_softc *sc, int dofree)
-{
- struct ip_moptions *imo = &sc->sc_imo;
- u_int16_t n = imo->imo_num_memberships;
-
- /* Clean up our own multicast memberships */
- while (n-- > 0) {
- if (imo->imo_membership[n] != NULL) {
- if (dofree)
- in_delmulti(imo->imo_membership[n]);
- imo->imo_membership[n] = NULL;
- }
- }
- KASSERT(imo->imo_mfilters == NULL,
- ("%s: imo_mfilters != NULL", __func__));
- imo->imo_num_memberships = 0;
- imo->imo_multicast_ifp = NULL;
-}
-#endif
-
-#ifdef INET6
-static void
-carp_multicast6_cleanup(struct carp_softc *sc, int dofree)
-{
- struct ip6_moptions *im6o = &sc->sc_im6o;
- u_int16_t n = im6o->im6o_num_memberships;
-
- while (n-- > 0) {
- if (im6o->im6o_membership[n] != NULL) {
- if (dofree)
- in6_mc_leave(im6o->im6o_membership[n], NULL);
- im6o->im6o_membership[n] = NULL;
- }
- }
- KASSERT(im6o->im6o_mfilters == NULL,
- ("%s: im6o_mfilters != NULL", __func__));
- im6o->im6o_num_memberships = 0;
- im6o->im6o_multicast_ifp = NULL;
-}
-#endif
-
-#ifdef INET
+/*
+ * Setup multicast structures.
+ */
static int
-carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
{
- struct ifnet *ifp;
- struct carp_if *cif;
- struct in_ifaddr *ia, *ia_if;
- struct ip_moptions *imo = &sc->sc_imo;
- struct in_addr addr;
- u_long iaddr = htonl(sin->sin_addr.s_addr);
- int own, error;
-
- if (sin->sin_addr.s_addr == 0) {
- if (!(SC2IFP(sc)->if_flags & IFF_UP))
- carp_set_state(sc, INIT);
- if (sc->sc_naddrs)
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carp_setrun(sc, 0);
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
- return (0);
- }
-
- /* we have to do it by hands to check we won't match on us */
- ia_if = NULL; own = 0;
- IN_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- /* and, yeah, we need a multicast-capable iface too */
- if (ia->ia_ifp != SC2IFP(sc) &&
- (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
- (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
- if (!ia_if)
- ia_if = ia;
- if (sin->sin_addr.s_addr ==
- ia->ia_addr.sin_addr.s_addr)
- own++;
- }
- }
-
- if (!ia_if) {
- IN_IFADDR_RUNLOCK();
- return (EADDRNOTAVAIL);
- }
+ struct ifnet *ifp = cif->cif_ifp;
+ int error = 0;
- ia = ia_if;
- ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ switch (sa) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct ip_moptions *imo = &cif->cif_imo;
+ struct in_addr addr;
- ifp = ia->ia_ifp;
+ if (imo->imo_membership)
+ return (0);
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
- (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
- ifa_free(&ia->ia_ifa);
- return (EADDRNOTAVAIL);
- }
+ imo->imo_membership = (struct in_multi **)malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
+ M_WAITOK);
+ imo->imo_mfilters = NULL;
+ imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ imo->imo_multicast_vif = -1;
- if (imo->imo_num_memberships == 0) {
addr.s_addr = htonl(INADDR_CARP_GROUP);
- if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
- NULL) {
- ifa_free(&ia->ia_ifa);
- return (ENOBUFS);
+ if ((error = in_joingroup(ifp, &addr, NULL,
+ &imo->imo_membership[0])) != 0) {
+ free(imo->imo_membership, M_CARP);
+ break;
}
imo->imo_num_memberships++;
imo->imo_multicast_ifp = ifp;
imo->imo_multicast_ttl = CARP_DFLTTL;
imo->imo_multicast_loop = 0;
- }
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct ip6_moptions *im6o = &cif->cif_im6o;
+ struct in6_addr in6;
+ struct in6_multi *in6m;
+
+ if (im6o->im6o_membership)
+ return (0);
- if (!ifp->if_carp) {
+ im6o->im6o_membership = (struct in6_multi **)malloc(
+ (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
+ M_ZERO | M_WAITOK);
+ im6o->im6o_mfilters = NULL;
+ im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+ im6o->im6o_multicast_hlim = CARP_DFLTTL;
+ im6o->im6o_multicast_ifp = ifp;
- cif = malloc(sizeof(*cif), M_CARP,
- M_WAITOK|M_ZERO);
- if (!cif) {
- error = ENOBUFS;
- goto cleanup;
+ /* Join IPv6 CARP multicast group. */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr8[15] = 0x12;
+ if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+ free(im6o->im6o_membership, M_CARP);
+ break;
}
- if ((error = ifpromisc(ifp, 1))) {
- free(cif, M_CARP);
- goto cleanup;
+ in6m = NULL;
+ if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+ free(im6o->im6o_membership, M_CARP);
+ break;
}
-
- CARP_LOCK_INIT(cif);
- CARP_LOCK(cif);
- cif->vhif_ifp = ifp;
- TAILQ_INIT(&cif->vhif_vrs);
- ifp->if_carp = cif;
+ im6o->im6o_membership[0] = in6m;
+ im6o->im6o_num_memberships++;
- } else {
- struct carp_softc *vr;
-
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
- CARP_UNLOCK(cif);
- error = EEXIST;
- goto cleanup;
- }
+ /* Join solicited multicast address. */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr32[1] = 0;
+ in6.s6_addr32[2] = htonl(1);
+ in6.s6_addr32[3] = 0;
+ in6.s6_addr8[12] = 0xff;
+ if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ in6m = NULL;
+ if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ im6o->im6o_membership[1] = in6m;
+ im6o->im6o_num_memberships++;
+ break;
+ }
+#endif
}
- sc->sc_ia = ia;
- sc->sc_carpdev = ifp;
- { /* XXX prevent endless loop if already in queue */
- struct carp_softc *vr, *after = NULL;
- int myself = 0;
- cif = (struct carp_if *)ifp->if_carp;
+ return (error);
+}
- /* XXX: cif should not change, right? So we still hold the lock */
- CARP_LOCK_ASSERT(cif);
+/*
+ * Free multicast structures.
+ */
+static void
+carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
+{
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
- if (vr == sc)
- myself = 1;
- if (vr->sc_vhid < sc->sc_vhid)
- after = vr;
- }
+ sx_assert(&carp_sx, SA_XLOCKED);
+
+ switch (sa) {
+#ifdef INET
+ case AF_INET:
+ if (cif->cif_naddrs == 0) {
+ struct ip_moptions *imo = &cif->cif_imo;
+
+ in_leavegroup(imo->imo_membership[0], NULL);
+ KASSERT(imo->imo_mfilters == NULL,
+ ("%s: imo_mfilters != NULL", __func__));
+ free(imo->imo_membership, M_CARP);
+ imo->imo_membership = NULL;
- if (!myself) {
- /* We're trying to keep things in order */
- if (after == NULL) {
- TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
- } else {
- TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
}
- cif->vhif_nvrs++;
- }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (cif->cif_naddrs6 == 0) {
+ struct ip6_moptions *im6o = &cif->cif_im6o;
+
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ in6_mc_leave(im6o->im6o_membership[1], NULL);
+ KASSERT(im6o->im6o_mfilters == NULL,
+ ("%s: im6o_mfilters != NULL", __func__));
+ free(im6o->im6o_membership, M_CARP);
+ im6o->im6o_membership = NULL;
+ }
+ break;
+#endif
}
+}
+
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
+{
+ struct m_tag *mtag;
+ struct carp_softc *sc;
- sc->sc_naddrs++;
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (own)
- sc->sc_advskew = 0;
- carp_sc_state_locked(sc);
- carp_setrun(sc, 0);
+ if (!sa)
+ return (0);
- CARP_UNLOCK(cif);
- ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+ default:
+ return (0);
+ }
- return (0);
+ mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+ if (mtag == NULL)
+ return (0);
-cleanup:
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- ifa_free(&ia->ia_ifa);
- return (error);
-}
+ bcopy(mtag + 1, &sc, sizeof(sc));
-static int
-carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
-{
- int error = 0;
+ /* Set the source MAC address to the Virtual Router MAC Address. */
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_BRIDGE:
+ case IFT_L2VLAN: {
+ struct ether_header *eh;
- if (!--sc->sc_naddrs) {
- struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- struct ip_moptions *imo = &sc->sc_imo;
+ eh = mtod(m, struct ether_header *);
+ eh->ether_shost[0] = 0;
+ eh->ether_shost[1] = 0;
+ eh->ether_shost[2] = 0x5e;
+ eh->ether_shost[3] = 0;
+ eh->ether_shost[4] = 1;
+ eh->ether_shost[5] = sc->sc_vhid;
+ }
+ break;
+ case IFT_FDDI: {
+ struct fddi_header *fh;
- CARP_LOCK(cif);
- callout_stop(&sc->sc_ad_tmo);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- sc->sc_vhid = -1;
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- imo->imo_multicast_ifp = NULL;
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- sc->sc_carpdev->if_carp = NULL;
- CARP_LOCK_DESTROY(cif);
- free(cif, M_CARP);
- } else {
- CARP_UNLOCK(cif);
+ fh = mtod(m, struct fddi_header *);
+ fh->fddi_shost[0] = 0;
+ fh->fddi_shost[1] = 0;
+ fh->fddi_shost[2] = 0x5e;
+ fh->fddi_shost[3] = 0;
+ fh->fddi_shost[4] = 1;
+ fh->fddi_shost[5] = sc->sc_vhid;
+ }
+ break;
+ case IFT_ISO88025: {
+ struct iso88025_header *th;
+ th = mtod(m, struct iso88025_header *);
+ th->iso88025_shost[0] = 3;
+ th->iso88025_shost[1] = 0;
+ th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[4] = 0;
+ th->iso88025_shost[5] = 0;
}
+ break;
+ default:
+ printf("%s: carp is not supported for the %d interface type\n",
+ ifp->if_xname, ifp->if_type);
+ return (EOPNOTSUPP);
}
- return (error);
+ return (0);
}
-#endif
-#ifdef INET6
-static int
-carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static struct carp_softc*
+carp_alloc(struct ifnet *ifp)
{
- struct ifnet *ifp;
+ struct carp_softc *sc;
struct carp_if *cif;
- struct in6_ifaddr *ia, *ia_if;
- struct ip6_moptions *im6o = &sc->sc_im6o;
- struct in6_addr in6;
- int own, error;
-
- error = 0;
-
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
- if (!(SC2IFP(sc)->if_flags & IFF_UP))
- carp_set_state(sc, INIT);
- if (sc->sc_naddrs6)
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carp_setrun(sc, 0);
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
- return (0);
- }
- /* we have to do it by hands to check we won't match on us */
- ia_if = NULL; own = 0;
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- int i;
-
- for (i = 0; i < 4; i++) {
- if ((sin6->sin6_addr.s6_addr32[i] &
- ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
- (ia->ia_addr.sin6_addr.s6_addr32[i] &
- ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
- break;
- }
- /* and, yeah, we need a multicast-capable iface too */
- if (ia->ia_ifp != SC2IFP(sc) &&
- (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
- (i == 4)) {
- if (!ia_if)
- ia_if = ia;
- if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
- &ia->ia_addr.sin6_addr))
- own++;
- }
- }
+ if ((cif = ifp->if_carp) == NULL)
+ cif = carp_alloc_if(ifp);
- if (!ia_if) {
- IN6_IFADDR_RUNLOCK();
- return (EADDRNOTAVAIL);
- }
- ia = ia_if;
- ifa_ref(&ia->ia_ifa);
- IN6_IFADDR_RUNLOCK();
- ifp = ia->ia_ifp;
-
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
- (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
- ifa_free(&ia->ia_ifa);
- return (EADDRNOTAVAIL);
- }
+ sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
- if (!sc->sc_naddrs6) {
- struct in6_multi *in6m;
+ sc->sc_advbase = CARP_DFLTINTV;
+ sc->sc_vhid = -1; /* required setting */
+ sc->sc_init_counter = 1;
+ sc->sc_state = INIT;
- im6o->im6o_multicast_ifp = ifp;
+ sc->sc_ifasiz = sizeof(struct ifaddr *);
+ sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
+ sc->sc_carpdev = ifp;
- /* join CARP multicast address */
- bzero(&in6, sizeof(in6));
- in6.s6_addr16[0] = htons(0xff02);
- in6.s6_addr8[15] = 0x12;
- if (in6_setscope(&in6, ifp, NULL) != 0)
- goto cleanup;
- in6m = NULL;
- error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
- if (error)
- goto cleanup;
- im6o->im6o_membership[0] = in6m;
- im6o->im6o_num_memberships++;
+ CARP_LOCK_INIT(sc);
+#ifdef INET
+ callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+#ifdef INET6
+ callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+ callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
- /* join solicited multicast address */
- bzero(&in6, sizeof(in6));
- in6.s6_addr16[0] = htons(0xff02);
- in6.s6_addr32[1] = 0;
- in6.s6_addr32[2] = htonl(1);
- in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
- in6.s6_addr8[12] = 0xff;
- if (in6_setscope(&in6, ifp, NULL) != 0)
- goto cleanup;
- in6m = NULL;
- error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
- if (error)
- goto cleanup;
- im6o->im6o_membership[1] = in6m;
- im6o->im6o_num_memberships++;
- }
+ CIF_LOCK(cif);
+ TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
+ CIF_UNLOCK(cif);
- if (!ifp->if_carp) {
- cif = malloc(sizeof(*cif), M_CARP,
- M_WAITOK|M_ZERO);
- if (!cif) {
- error = ENOBUFS;
- goto cleanup;
- }
- if ((error = ifpromisc(ifp, 1))) {
- free(cif, M_CARP);
- goto cleanup;
- }
+ mtx_lock(&carp_mtx);
+ LIST_INSERT_HEAD(&carp_list, sc, sc_next);
+ mtx_unlock(&carp_mtx);
- CARP_LOCK_INIT(cif);
- CARP_LOCK(cif);
- cif->vhif_ifp = ifp;
- TAILQ_INIT(&cif->vhif_vrs);
- ifp->if_carp = cif;
+ return (sc);
+}
- } else {
- struct carp_softc *vr;
+static void
+carp_grow_ifas(struct carp_softc *sc)
+{
+ struct ifaddr **new;
+
+ new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
+ CARP_LOCK(sc);
+ bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
+ free(sc->sc_ifas, M_CARP);
+ sc->sc_ifas = new;
+ sc->sc_ifasiz *= 2;
+ CARP_UNLOCK(sc);
+}
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
- CARP_UNLOCK(cif);
- error = EINVAL;
- goto cleanup;
- }
- }
- sc->sc_ia6 = ia;
- sc->sc_carpdev = ifp;
+static void
+carp_destroy(struct carp_softc *sc)
+{
+ struct ifnet *ifp = sc->sc_carpdev;
+ struct carp_if *cif = ifp->if_carp;
- { /* XXX prevent endless loop if already in queue */
- struct carp_softc *vr, *after = NULL;
- int myself = 0;
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK_ASSERT(cif);
-
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
- if (vr == sc)
- myself = 1;
- if (vr->sc_vhid < sc->sc_vhid)
- after = vr;
- }
+ sx_assert(&carp_sx, SA_XLOCKED);
- if (!myself) {
- /* We're trying to keep things in order */
- if (after == NULL) {
- TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
- } else {
- TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
- }
- cif->vhif_nvrs++;
- }
- }
+ if (sc->sc_suppress)
+ carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
+ CARP_UNLOCK(sc);
- sc->sc_naddrs6++;
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (own)
- sc->sc_advskew = 0;
- carp_sc_state_locked(sc);
- carp_setrun(sc, 0);
+ CIF_LOCK(cif);
+ TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
+ CIF_UNLOCK(cif);
- CARP_UNLOCK(cif);
- ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */
+ mtx_lock(&carp_mtx);
+ LIST_REMOVE(sc, sc_next);
+ mtx_unlock(&carp_mtx);
- return (0);
+ callout_drain(&sc->sc_ad_tmo);
+#ifdef INET
+ callout_drain(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
+ callout_drain(&sc->sc_md6_tmo);
+#endif
+ CARP_LOCK_DESTROY(sc);
-cleanup:
- if (!sc->sc_naddrs6)
- carp_multicast6_cleanup(sc, 1);
- ifa_free(&ia->ia_ifa);
- return (error);
+ free(sc->sc_ifas, M_CARP);
+ free(sc, M_CARP);
}
-static int
-carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static struct carp_if*
+carp_alloc_if(struct ifnet *ifp)
{
- int error = 0;
+ struct carp_if *cif;
+ int error;
- if (!--sc->sc_naddrs6) {
- struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+ cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
- CARP_LOCK(cif);
- callout_stop(&sc->sc_ad_tmo);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- sc->sc_vhid = -1;
- carp_multicast6_cleanup(sc, 1);
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- CARP_LOCK_DESTROY(cif);
- sc->sc_carpdev->if_carp = NULL;
- free(cif, M_CARP);
- } else
- CARP_UNLOCK(cif);
- }
+ if ((error = ifpromisc(ifp, 1)) != 0)
+ printf("%s: ifpromisc(%s) failed: %d\n",
+ __func__, ifp->if_xname, error);
+ else
+ cif->cif_flags |= CIF_PROMISC;
- return (error);
+ CIF_LOCK_INIT(cif);
+ cif->cif_ifp = ifp;
+ TAILQ_INIT(&cif->cif_vrs);
+
+ IF_ADDR_WLOCK(ifp);
+ ifp->if_carp = cif;
+ if_ref(ifp);
+ IF_ADDR_WUNLOCK(ifp);
+
+ return (cif);
}
-#endif /* INET6 */
-static int
-carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+static void
+carp_free_if(struct carp_if *cif)
+{
+ struct ifnet *ifp = cif->cif_ifp;
+
+ CIF_LOCK_ASSERT(cif);
+ KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
+ __func__));
+
+ IF_ADDR_WLOCK(ifp);
+ ifp->if_carp = NULL;
+ IF_ADDR_WUNLOCK(ifp);
+
+ CIF_LOCK_DESTROY(cif);
+
+ if (cif->cif_flags & CIF_PROMISC)
+ ifpromisc(ifp, 0);
+ if_rele(ifp);
+
+ free(cif, M_CARP);
+}
+
+static void
+carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
+{
+
+ CARP_LOCK(sc);
+ carpr->carpr_state = sc->sc_state;
+ carpr->carpr_vhid = sc->sc_vhid;
+ carpr->carpr_advbase = sc->sc_advbase;
+ carpr->carpr_advskew = sc->sc_advskew;
+ if (priv)
+ bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
+ else
+ bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
+ CARP_UNLOCK(sc);
+}
+
+int
+carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
{
- struct carp_softc *sc = ifp->if_softc, *vr;
struct carpreq carpr;
- struct ifaddr *ifa;
- struct ifreq *ifr;
- struct ifaliasreq *ifra;
- int locked = 0, error = 0;
+ struct ifnet *ifp;
+ struct carp_softc *sc = NULL;
+ int error = 0, locked = 0;
- ifa = (struct ifaddr *)addr;
- ifra = (struct ifaliasreq *)addr;
- ifr = (struct ifreq *)addr;
+ if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+ return (error);
- switch (cmd) {
- case SIOCSIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- SC2IFP(sc)->if_flags |= IFF_UP;
- bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
- sizeof(struct sockaddr));
- error = carp_set_addr(sc, satosin(ifa->ifa_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- SC2IFP(sc)->if_flags |= IFF_UP;
- error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
- break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
- break;
- }
- break;
+ ifp = ifunit_ref(ifr->ifr_name);
+ if (ifp == NULL)
+ return (ENXIO);
- case SIOCAIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- SC2IFP(sc)->if_flags |= IFF_UP;
- bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
- sizeof(struct sockaddr));
- error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- SC2IFP(sc)->if_flags |= IFF_UP;
- error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
- break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
- break;
- }
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN:
+ case IFT_BRIDGE:
+ case IFT_FDDI:
+ case IFT_ISO88025:
break;
+ default:
+ error = EOPNOTSUPP;
+ goto out;
+ }
- case SIOCDIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ error = EADDRNOTAVAIL;
+ goto out;
+ }
+
+ sx_xlock(&carp_sx);
+ switch (cmd) {
+ case SIOCSVH:
+ if ((error = priv_check(td, PRIV_NETINET_CARP)))
break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
+ if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
+ carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
+ error = EINVAL;
break;
}
- break;
- case SIOCSIFFLAGS:
- if (sc->sc_carpdev) {
- locked = 1;
- CARP_SCLOCK(sc);
+ if (ifp->if_carp) {
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == carpr.carpr_vhid)
+ break;
+ CIF_UNLOCK(ifp->if_carp);
}
- if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
- if (sc->sc_state == MASTER)
- carp_send_ad_locked(sc);
- carp_set_state(sc, INIT);
- carp_setrun(sc, 0);
- } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
- SC2IFP(sc)->if_flags |= IFF_UP;
- carp_setrun(sc, 0);
+ if (sc == NULL) {
+ sc = carp_alloc(ifp);
+ CARP_LOCK(sc);
+ sc->sc_vhid = carpr.carpr_vhid;
+ LLADDR(&sc->sc_addr)[0] = 0;
+ LLADDR(&sc->sc_addr)[1] = 0;
+ LLADDR(&sc->sc_addr)[2] = 0x5e;
+ LLADDR(&sc->sc_addr)[3] = 0;
+ LLADDR(&sc->sc_addr)[4] = 1;
+ LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
+ } else
+ CARP_LOCK(sc);
+ locked = 1;
+ if (carpr.carpr_advbase > 0) {
+ if (carpr.carpr_advbase > 255 ||
+ carpr.carpr_advbase < CARP_DFLTINTV) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advbase = carpr.carpr_advbase;
}
- break;
-
- case SIOCSVH:
- error = priv_check(curthread, PRIV_NETINET_CARP);
- if (error)
- break;
- if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+ if (carpr.carpr_advskew >= 255) {
+ error = EINVAL;
break;
- error = 1;
- if (sc->sc_carpdev) {
- locked = 1;
- CARP_SCLOCK(sc);
}
- if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
+ sc->sc_advskew = carpr.carpr_advskew;
+ if (carpr.carpr_key[0] != '\0') {
+ bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+ carp_hmac_prepare(sc);
+ }
+ if (sc->sc_state != INIT &&
+ carpr.carpr_state != sc->sc_state) {
switch (carpr.carpr_state) {
case BACKUP:
callout_stop(&sc->sc_ad_tmo);
- carp_set_state(sc, BACKUP);
+ carp_set_state(sc, BACKUP,
+ "user requested via ifconfig");
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_DELETE);
+ carp_delroute(sc);
break;
case MASTER:
- carp_master_down_locked(sc);
+ carp_master_down_locked(sc,
+ "user requested via ifconfig");
break;
default:
break;
}
}
- if (carpr.carpr_vhid > 0) {
- if (carpr.carpr_vhid > 255) {
- error = EINVAL;
- break;
- }
- if (sc->sc_carpdev) {
- struct carp_if *cif;
- cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc &&
- vr->sc_vhid == carpr.carpr_vhid) {
- error = EEXIST;
- break;
- }
- if (error == EEXIST)
- break;
- }
- sc->sc_vhid = carpr.carpr_vhid;
- IF_LLADDR(sc->sc_ifp)[0] = 0;
- IF_LLADDR(sc->sc_ifp)[1] = 0;
- IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
- IF_LLADDR(sc->sc_ifp)[3] = 0;
- IF_LLADDR(sc->sc_ifp)[4] = 1;
- IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
- error--;
+ break;
+
+ case SIOCGVH:
+ {
+ int priveleged;
+
+ if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
+ error = EINVAL;
+ break;
}
- if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
- if (carpr.carpr_advskew >= 255) {
- error = EINVAL;
+ if (carpr.carpr_count < 1) {
+ error = EMSGSIZE;
+ break;
+ }
+ if (ifp->if_carp == NULL) {
+ error = ENOENT;
+ break;
+ }
+
+ priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
+ if (carpr.carpr_vhid != 0) {
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == carpr.carpr_vhid)
+ break;
+ CIF_UNLOCK(ifp->if_carp);
+ if (sc == NULL) {
+ error = ENOENT;
break;
}
- if (carpr.carpr_advbase > 255) {
- error = EINVAL;
+ carp_carprcp(&carpr, sc, priveleged);
+ error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ } else {
+ int i, count;
+
+ count = 0;
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ count++;
+
+ if (count > carpr.carpr_count) {
+ CIF_UNLOCK(ifp->if_carp);
+ error = EMSGSIZE;
break;
}
- sc->sc_advbase = carpr.carpr_advbase;
- sc->sc_advskew = carpr.carpr_advskew;
- error--;
- }
- bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
- if (error > 0)
- error = EINVAL;
- else {
- error = 0;
- carp_setrun(sc, 0);
- }
- break;
- case SIOCGVH:
- /* XXX: lockless read */
- bzero(&carpr, sizeof(carpr));
- carpr.carpr_state = sc->sc_state;
- carpr.carpr_vhid = sc->sc_vhid;
- carpr.carpr_advbase = sc->sc_advbase;
- carpr.carpr_advskew = sc->sc_advskew;
- error = priv_check(curthread, PRIV_NETINET_CARP);
- if (error == 0)
- bcopy(sc->sc_key, carpr.carpr_key,
- sizeof(carpr.carpr_key));
- error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ i = 0;
+ IFNET_FOREACH_CARP(ifp, sc) {
+ carp_carprcp(&carpr, sc, priveleged);
+ carpr.carpr_count = count;
+ error = copyout(&carpr, ifr->ifr_data +
+ (i * sizeof(carpr)), sizeof(carpr));
+ if (error) {
+ CIF_UNLOCK(ifp->if_carp);
+ break;
+ }
+ i++;
+ }
+ CIF_UNLOCK(ifp->if_carp);
+ }
break;
-
+ }
default:
error = EINVAL;
}
+ sx_xunlock(&carp_sx);
+out:
if (locked)
- CARP_SCUNLOCK(sc);
-
- carp_hmac_prepare(sc);
+ CARP_UNLOCK(sc);
+ if_rele(ifp);
return (error);
}
-/*
- * XXX: this is looutput. We should eventually use it from there.
- */
static int
-carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+carp_get_vhid(struct ifaddr *ifa)
{
- u_int32_t af;
- struct rtentry *rt = NULL;
-
- M_ASSERTPKTHDR(m); /* check if we have the packet header */
-
- if (ro != NULL)
- rt = ro->ro_rt;
- if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- m_freem(m);
- return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
- rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- }
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
-#if 1 /* XXX */
- switch (dst->sa_family) {
- case AF_INET:
- case AF_INET6:
- case AF_IPX:
- case AF_APPLETALK:
- break;
- default:
- printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
- m_freem(m);
- return (EAFNOSUPPORT);
- }
-#endif
- return(if_simloop(ifp, m, dst->sa_family, 0));
-}
+ if (ifa == NULL || ifa->ifa_carp == NULL)
+ return (0);
-/*
- * Start output on carp interface. This function should never be called.
- */
-static void
-carp_start(struct ifnet *ifp)
-{
-#ifdef DEBUG
- printf("%s: start called\n", ifp->if_xname);
-#endif
+ return (ifa->ifa_carp->sc_vhid);
}
int
-carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
- struct rtentry *rt)
+carp_attach(struct ifaddr *ifa, int vhid)
{
- struct m_tag *mtag;
+ struct ifnet *ifp = ifa->ifa_ifp;
+ struct carp_if *cif = ifp->if_carp;
struct carp_softc *sc;
- struct ifnet *carp_ifp;
+ int index, error;
- if (!sa)
- return (0);
+ KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
- switch (sa->sa_family) {
+ switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
- break;
-#endif /* INET */
+#endif
#ifdef INET6
case AF_INET6:
+#endif
break;
-#endif /* INET6 */
default:
- return (0);
+ return (EPROTOTYPE);
}
- mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
- if (mtag == NULL)
- return (0);
+ sx_xlock(&carp_sx);
+ if (ifp->if_carp == NULL) {
+ sx_xunlock(&carp_sx);
+ return (ENOPROTOOPT);
+ }
- bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
- sc = carp_ifp->if_softc;
+ CIF_LOCK(cif);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == vhid)
+ break;
+ CIF_UNLOCK(cif);
+ if (sc == NULL) {
+ sx_xunlock(&carp_sx);
+ return (ENOENT);
+ }
- /* Set the source MAC address to Virtual Router MAC Address */
- switch (ifp->if_type) {
- case IFT_ETHER:
- case IFT_L2VLAN: {
- struct ether_header *eh;
+ error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
+ if (error) {
+ CIF_FREE(cif);
+ sx_xunlock(&carp_sx);
+ return (error);
+ }
- eh = mtod(m, struct ether_header *);
- eh->ether_shost[0] = 0;
- eh->ether_shost[1] = 0;
- eh->ether_shost[2] = 0x5e;
- eh->ether_shost[3] = 0;
- eh->ether_shost[4] = 1;
- eh->ether_shost[5] = sc->sc_vhid;
- }
- break;
- case IFT_FDDI: {
- struct fddi_header *fh;
+ index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
+ if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
+ carp_grow_ifas(sc);
- fh = mtod(m, struct fddi_header *);
- fh->fddi_shost[0] = 0;
- fh->fddi_shost[1] = 0;
- fh->fddi_shost[2] = 0x5e;
- fh->fddi_shost[3] = 0;
- fh->fddi_shost[4] = 1;
- fh->fddi_shost[5] = sc->sc_vhid;
- }
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ cif->cif_naddrs++;
+ sc->sc_naddrs++;
break;
- case IFT_ISO88025: {
- struct iso88025_header *th;
- th = mtod(m, struct iso88025_header *);
- th->iso88025_shost[0] = 3;
- th->iso88025_shost[1] = 0;
- th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
- th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
- th->iso88025_shost[4] = 0;
- th->iso88025_shost[5] = 0;
- }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ cif->cif_naddrs6++;
+ sc->sc_naddrs6++;
break;
- default:
- printf("%s: carp is not supported for this interface type\n",
- ifp->if_xname);
- return (EOPNOTSUPP);
+#endif
}
+ ifa_ref(ifa);
+
+ CARP_LOCK(sc);
+ sc->sc_ifas[index - 1] = ifa;
+ ifa->ifa_carp = sc;
+ carp_hmac_prepare(sc);
+ carp_sc_state(sc);
+ CARP_UNLOCK(sc);
+
+ sx_xunlock(&carp_sx);
+
return (0);
}
-static void
-carp_set_state(struct carp_softc *sc, int state)
+void
+carp_detach(struct ifaddr *ifa)
{
- int link_state;
+ struct ifnet *ifp = ifa->ifa_ifp;
+ struct carp_if *cif = ifp->if_carp;
+ struct carp_softc *sc = ifa->ifa_carp;
+ int i, index;
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
+ KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
- if (sc->sc_state == state)
- return;
+ sx_xlock(&carp_sx);
- sc->sc_state = state;
- switch (state) {
- case BACKUP:
- link_state = LINK_STATE_DOWN;
- break;
- case MASTER:
- link_state = LINK_STATE_UP;
+ CARP_LOCK(sc);
+ /* Shift array. */
+ index = sc->sc_naddrs + sc->sc_naddrs6;
+ for (i = 0; i < index; i++)
+ if (sc->sc_ifas[i] == ifa)
+ break;
+ KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
+ for (; i < index - 1; i++)
+ sc->sc_ifas[i] = sc->sc_ifas[i+1];
+ sc->sc_ifas[index - 1] = NULL;
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ cif->cif_naddrs--;
+ sc->sc_naddrs--;
break;
- default:
- link_state = LINK_STATE_UNKNOWN;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ cif->cif_naddrs6--;
+ sc->sc_naddrs6--;
break;
+#endif
}
- if_link_state_change(SC2IFP(sc), link_state);
+
+ carp_ifa_delroute(ifa);
+ carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
+
+ ifa->ifa_carp = NULL;
+ ifa_free(ifa);
+
+ carp_hmac_prepare(sc);
+ carp_sc_state(sc);
+
+ if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
+ carp_destroy(sc);
+ else
+ CARP_UNLOCK(sc);
+
+ CIF_FREE(cif);
+
+ sx_xunlock(&carp_sx);
}
-void
-carp_carpdev_state(struct ifnet *ifp)
+static void
+carp_set_state(struct carp_softc *sc, int state, const char *reason)
{
- struct carp_if *cif;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- carp_carpdev_state_locked(cif);
- CARP_UNLOCK(cif);
+ CARP_LOCK_ASSERT(sc);
+
+ if (sc->sc_state != state) {
+ const char *carp_states[] = { CARP_STATES };
+ char subsys[IFNAMSIZ+5];
+
+ snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
+
+ CARP_LOG("%s: %s -> %s (%s)\n", subsys,
+ carp_states[sc->sc_state], carp_states[state], reason);
+
+ sc->sc_state = state;
+
+ devctl_notify("CARP", subsys, carp_states[state], NULL);
+ }
}
static void
-carp_carpdev_state_locked(struct carp_if *cif)
+carp_linkstate(struct ifnet *ifp)
{
struct carp_softc *sc;
- TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
- carp_sc_state_locked(sc);
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc) {
+ CARP_LOCK(sc);
+ carp_sc_state(sc);
+ CARP_UNLOCK(sc);
+ }
+ CIF_UNLOCK(ifp->if_carp);
}
static void
-carp_sc_state_locked(struct carp_softc *sc)
+carp_sc_state(struct carp_softc *sc)
{
- CARP_SCLOCK_ASSERT(sc);
+
+ CARP_LOCK_ASSERT(sc);
if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
!(sc->sc_carpdev->if_flags & IFF_UP)) {
- sc->sc_flags_backup = SC2IFP(sc)->if_flags;
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
callout_stop(&sc->sc_ad_tmo);
+#ifdef INET
callout_stop(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
callout_stop(&sc->sc_md6_tmo);
- carp_set_state(sc, INIT);
+#endif
+ carp_set_state(sc, INIT, "hardware interface down");
carp_setrun(sc, 0);
- if (!sc->sc_suppress) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
- }
+ if (!sc->sc_suppress)
+ carp_demote_adj(V_carp_ifdown_adj, "interface down");
sc->sc_suppress = 1;
} else {
- SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
- carp_set_state(sc, INIT);
+ carp_set_state(sc, INIT, "hardware interface up");
carp_setrun(sc, 0);
if (sc->sc_suppress)
- carp_suppress_preempt--;
+ carp_demote_adj(-V_carp_ifdown_adj, "interface up");
sc->sc_suppress = 0;
}
+}
+
+static void
+carp_demote_adj(int adj, char *reason)
+{
+ atomic_add_int(&V_carp_demotion, adj);
+ CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
+ taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
+}
+
+static int
+carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ int new, error;
- return;
+ new = V_carp_demotion;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ carp_demote_adj(new, "sysctl");
+
+ return (0);
}
#ifdef INET
@@ -2313,7 +2029,7 @@ static struct protosw in_carp_protosw = {
.pr_protocol = IPPROTO_CARP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = carp_input,
- .pr_output = (pr_output_t *)rip_output,
+ .pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
@@ -2321,7 +2037,7 @@ static struct protosw in_carp_protosw = {
#ifdef INET6
extern struct domain inet6domain;
-static struct ip6protosw in6_carp_protosw = {
+static struct protosw in6_carp_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_CARP,
@@ -2337,10 +2053,6 @@ static void
carp_mod_cleanup(void)
{
- if (if_detach_event_tag == NULL)
- return;
- EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
- if_clone_detach(&carp_cloner);
#ifdef INET
if (proto_reg[CARP_INET] == 0) {
(void)ipproto_unregister(IPPROTO_CARP);
@@ -2358,10 +2070,19 @@ carp_mod_cleanup(void)
carp_iamatch6_p = NULL;
carp_macmatch6_p = NULL;
#endif
+ carp_ioctl_p = NULL;
+ carp_attach_p = NULL;
+ carp_detach_p = NULL;
+ carp_get_vhid_p = NULL;
carp_linkstate_p = NULL;
carp_forus_p = NULL;
carp_output_p = NULL;
+ carp_demote_adj_p = NULL;
+ carp_master_p = NULL;
+ mtx_unlock(&carp_mtx);
+ taskqueue_drain(taskqueue_swi, &carp_sendall_task);
mtx_destroy(&carp_mtx);
+ sx_destroy(&carp_sx);
}
static int
@@ -2369,22 +2090,24 @@ carp_mod_load(void)
{
int err;
- if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
- carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
- if (if_detach_event_tag == NULL)
- return (ENOMEM);
mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
- LIST_INIT(&carpif_list);
- if_clone_attach(&carp_cloner);
- carp_linkstate_p = carp_carpdev_state;
+ sx_init(&carp_sx, "carp_sx");
+ LIST_INIT(&carp_list);
+ carp_get_vhid_p = carp_get_vhid;
carp_forus_p = carp_forus;
carp_output_p = carp_output;
+ carp_linkstate_p = carp_linkstate;
+ carp_ioctl_p = carp_ioctl;
+ carp_attach_p = carp_attach;
+ carp_detach_p = carp_detach;
+ carp_demote_adj_p = carp_demote_adj;
+ carp_master_p = carp_master;
#ifdef INET6
carp_iamatch6_p = carp_iamatch6;
carp_macmatch6_p = carp_macmatch6;
proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
(struct protosw *)&in6_carp_protosw);
- if (proto_reg[CARP_INET6] != 0) {
+ if (proto_reg[CARP_INET6]) {
printf("carp: error %d attaching to PF_INET6\n",
proto_reg[CARP_INET6]);
carp_mod_cleanup();
@@ -2400,7 +2123,7 @@ carp_mod_load(void)
#ifdef INET
carp_iamatch_p = carp_iamatch;
proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
- if (proto_reg[CARP_INET] != 0) {
+ if (proto_reg[CARP_INET]) {
printf("carp: error %d attaching to PF_INET\n",
proto_reg[CARP_INET]);
carp_mod_cleanup();
@@ -2413,7 +2136,7 @@ carp_mod_load(void)
return (err);
}
#endif
- return 0;
+ return (0);
}
static int
@@ -2424,17 +2147,13 @@ carp_modevent(module_t mod, int type, void *data)
return carp_mod_load();
/* NOTREACHED */
case MOD_UNLOAD:
- /*
- * XXX: For now, disallow module unloading by default due to
- * a race condition where a thread may dereference one of the
- * function pointer hooks after the module has been
- * unloaded, during processing of a packet, causing a panic.
- */
-#ifdef CARPMOD_CAN_UNLOAD
- carp_mod_cleanup();
-#else
- return (EBUSY);
-#endif
+ mtx_lock(&carp_mtx);
+ if (LIST_EMPTY(&carp_list))
+ carp_mod_cleanup();
+ else {
+ mtx_unlock(&carp_mtx);
+ return (EBUSY);
+ }
break;
default:
diff --git a/freebsd/sys/netinet/ip_carp.h b/freebsd/sys/netinet/ip_carp.h
index 2f2b4f28..5b7e5064 100644
--- a/freebsd/sys/netinet/ip_carp.h
+++ b/freebsd/sys/netinet/ip_carp.h
@@ -117,69 +117,57 @@ struct carpstats {
uint64_t carps_preempt; /* if enabled, preemptions */
};
-#ifdef _KERNEL
-#define CARPSTATS_ADD(name, val) carpstats.name += (val)
-#define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1)
-#endif
-
/*
* Configuration structure for SIOCSVH SIOCGVH
*/
struct carpreq {
+ int carpr_count;
+ int carpr_vhid;
+#define CARP_MAXVHID 255
int carpr_state;
#define CARP_STATES "INIT", "BACKUP", "MASTER"
#define CARP_MAXSTATE 2
- int carpr_vhid;
int carpr_advskew;
+#define CARP_MAXSKEW 240
int carpr_advbase;
unsigned char carpr_key[CARP_KEY_LEN];
};
#define SIOCSVH _IOWR('i', 245, struct ifreq)
#define SIOCGVH _IOWR('i', 246, struct ifreq)
-/*
- * Names for CARP sysctl objects
- */
-#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */
-#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
-#define CARPCTL_LOG 3 /* log bad packets */
-#define CARPCTL_STATS 4 /* statistics (read-only) */
-#define CARPCTL_ARPBALANCE 5 /* balance arp responses */
-#define CARPCTL_MAXID 6
-
-#define CARPCTL_NAMES { \
- { 0, 0 }, \
- { "allow", CTLTYPE_INT }, \
- { "preempt", CTLTYPE_INT }, \
- { "log", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "arpbalance", CTLTYPE_INT }, \
-}
-
#ifdef _KERNEL
-void carp_carpdev_state(struct ifnet *);
-void carp_input (struct mbuf *, int);
-int carp6_input (struct mbuf **, int *, int);
-int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
- struct rtentry *);
-int carp_iamatch (struct ifnet *, struct in_ifaddr *, struct in_addr *,
- u_int8_t **);
+int carp_ioctl(struct ifreq *, u_long, struct thread *);
+int carp_attach(struct ifaddr *, int);
+void carp_detach(struct ifaddr *);
+void carp_carpdev_state(struct ifnet *);
+int carp_input(struct mbuf **, int *, int);
+int carp6_input (struct mbuf **, int *, int);
+int carp_output (struct ifnet *, struct mbuf *,
+ const struct sockaddr *);
+int carp_master(struct ifaddr *);
+int carp_iamatch(struct ifaddr *, uint8_t **);
struct ifaddr *carp_iamatch6(struct ifnet *, struct in6_addr *);
caddr_t carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
-struct ifnet *carp_forus (struct ifnet *, u_char *);
+int carp_forus(struct ifnet *, u_char *);
/* These are external networking stack hooks for CARP */
/* net/if.c */
+extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+extern int (*carp_attach_p)(struct ifaddr *, int);
+extern void (*carp_detach_p)(struct ifaddr *);
extern void (*carp_linkstate_p)(struct ifnet *);
+extern void (*carp_demote_adj_p)(int, char *);
+extern int (*carp_master_p)(struct ifaddr *);
/* net/if_bridge.c net/if_ethersubr.c */
-extern struct ifnet *(*carp_forus_p)(struct ifnet *, u_char *);
+extern int (*carp_forus_p)(struct ifnet *, u_char *);
/* net/if_ethersubr.c */
extern int (*carp_output_p)(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct rtentry *);
+ const struct sockaddr *);
+/* net/rtsock.c */
+extern int (*carp_get_vhid_p)(struct ifaddr *);
#ifdef INET
/* netinet/if_ether.c */
-extern int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *,
- struct in_addr *, u_int8_t **);
+extern int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#endif
#ifdef INET6
/* netinet6/nd6_nbr.c */
diff --git a/freebsd/sys/netinet/ip_divert.c b/freebsd/sys/netinet/ip_divert.c
index 879f411f..b43ebb7c 100644
--- a/freebsd/sys/netinet/ip_divert.c
+++ b/freebsd/sys/netinet/ip_divert.c
@@ -32,16 +32,15 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#if !defined(KLD_MODULE)
#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_sctp.h>
#ifndef INET
-#error "IPDIVERT requires INET."
-#endif
+#error "IPDIVERT requires INET"
#endif
-#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
@@ -57,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <netinet/in.h>
@@ -160,27 +160,30 @@ div_init(void)
* place for hashbase == NULL.
*/
in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
- div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE,
- IPI_HASHFIELDS_NONE);
+ div_inpcb_init, div_inpcb_fini, 0, IPI_HASHFIELDS_NONE);
}
static void
-div_destroy(void)
+div_destroy(void *unused __unused)
{
in_pcbinfo_destroy(&V_divcbinfo);
}
+VNET_SYSUNINIT(divert, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
+ div_destroy, NULL);
/*
* IPPROTO_DIVERT is not in the real IP protocol number space; this
* function should never be called. Just in case, drop any packets.
*/
-static void
-div_input(struct mbuf *m, int off)
+static int
+div_input(struct mbuf **mp, int *offp, int proto)
{
+ struct mbuf *m = *mp;
KMOD_IPSTAT_INC(ips_noproto);
m_freem(m);
+ return (IPPROTO_DONE);
}
/*
@@ -206,23 +209,19 @@ divert_packet(struct mbuf *m, int incoming)
}
/* Assure header */
if (m->m_len < sizeof(struct ip) &&
- (m = m_pullup(m, sizeof(struct ip))) == 0)
+ (m = m_pullup(m, sizeof(struct ip))) == NULL)
return;
ip = mtod(m, struct ip *);
/* Delayed checksums are currently not compatible with divert. */
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- ip->ip_len = ntohs(ip->ip_len);
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
- ip->ip_len = htons(ip->ip_len);
}
#ifdef SCTP
if (m->m_pkthdr.csum_flags & CSUM_SCTP) {
- ip->ip_len = ntohs(ip->ip_len);
sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
- ip->ip_len = htons(ip->ip_len);
}
#endif
bzero(&divsrc, sizeof(divsrc));
@@ -394,10 +393,6 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
INP_RUNLOCK(inp);
goto cantsend;
}
-
- /* Convert fields to host order for ip_output() */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
break;
#ifdef INET6
case IPV6_VERSION >> 4:
@@ -410,8 +405,6 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
INP_RUNLOCK(inp);
goto cantsend;
}
-
- ip6->ip6_plen = ntohs(ip6->ip6_plen);
break;
}
#endif
@@ -611,7 +604,7 @@ div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
/* Packet must have a header (but that's about it) */
if (m->m_len < sizeof (struct ip) &&
- (m = m_pullup(m, sizeof (struct ip))) == 0) {
+ (m = m_pullup(m, sizeof (struct ip))) == NULL) {
KMOD_IPSTAT_INC(ips_toosmall);
m_freem(m);
return EINVAL;
@@ -677,7 +670,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
return error;
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == 0)
+ if (inp_list == NULL)
return ENOMEM;
INP_INFO_RLOCK(&V_divcbinfo);
@@ -766,9 +759,6 @@ struct protosw div_protosw = {
.pr_ctlinput = div_ctlinput,
.pr_ctloutput = ip_ctloutput,
.pr_init = div_init,
-#ifdef VIMAGE
- .pr_destroy = div_destroy,
-#endif
.pr_usrreqs = &div_usrreqs
};
@@ -776,9 +766,6 @@ static int
div_modevent(module_t mod, int type, void *unused)
{
int err = 0;
-#ifndef VIMAGE
- int n;
-#endif
switch (type) {
case MOD_LOAD:
@@ -803,10 +790,6 @@ div_modevent(module_t mod, int type, void *unused)
err = EPERM;
break;
case MOD_UNLOAD:
-#ifdef VIMAGE
- err = EPERM;
- break;
-#else
/*
* Forced unload.
*
@@ -819,8 +802,7 @@ div_modevent(module_t mod, int type, void *unused)
* we destroy the lock.
*/
INP_INFO_WLOCK(&V_divcbinfo);
- n = V_divcbinfo.ipi_count;
- if (n != 0) {
+ if (V_divcbinfo.ipi_count != 0) {
err = EBUSY;
INP_INFO_WUNLOCK(&V_divcbinfo);
break;
@@ -828,10 +810,11 @@ div_modevent(module_t mod, int type, void *unused)
ip_divert_ptr = NULL;
err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
INP_INFO_WUNLOCK(&V_divcbinfo);
- div_destroy();
+#ifndef VIMAGE
+ div_destroy(NULL);
+#endif
EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag);
break;
-#endif /* !VIMAGE */
default:
err = EOPNOTSUPP;
break;
@@ -845,6 +828,6 @@ static moduledata_t ipdivertmod = {
0
};
-DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
-MODULE_DEPEND(ipdivert, ipfw, 2, 2, 2);
+DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
+MODULE_DEPEND(ipdivert, ipfw, 3, 3, 3);
MODULE_VERSION(ipdivert, 1);
diff --git a/freebsd/sys/netinet/ip_dummynet.h b/freebsd/sys/netinet/ip_dummynet.h
index dc2c3412..377b5b09 100644
--- a/freebsd/sys/netinet/ip_dummynet.h
+++ b/freebsd/sys/netinet/ip_dummynet.h
@@ -29,7 +29,7 @@
#ifndef _IP_DUMMYNET_H
#define _IP_DUMMYNET_H
-
+#define NEW_AQM
/*
* Definition of the kernel-userland API for dummynet.
*
@@ -85,7 +85,13 @@ enum {
/* special commands for emulation of sysctl variables */
DN_SYSCTL_GET,
DN_SYSCTL_SET,
-
+#ifdef NEW_AQM
+ /* subtypes used for setting/getting extra parameters.
+ * these subtypes used with IP_DUMMYNET3 command (get)
+ * and DN_TEXT (set). */
+ DN_AQM_PARAMS, /* AQM extra params */
+ DN_SCH_PARAMS, /* scheduler extra params */
+#endif
DN_LAST,
};
@@ -104,6 +110,10 @@ enum { /* user flags */
DN_HAS_PROFILE = 0x0010, /* a link has a profile */
DN_IS_RED = 0x0020,
DN_IS_GENTLE_RED= 0x0040,
+ DN_IS_ECN = 0x0080,
+ #ifdef NEW_AQM
+ DN_IS_AQM = 0x0100, /* AQMs: e.g Codel & PIE */
+ #endif
DN_PIPE_CMD = 0x1000, /* pipe config... */
};
@@ -171,8 +181,8 @@ struct dn_flow {
struct ipfw_flow_id fid;
uint64_t tot_pkts; /* statistics counters */
uint64_t tot_bytes;
- uint32_t length; /* Queue lenght, in packets */
- uint32_t len_bytes; /* Queue lenght, in bytes */
+ uint32_t length; /* Queue length, in packets */
+ uint32_t len_bytes; /* Queue length, in bytes */
uint32_t drops;
};
@@ -209,7 +219,19 @@ struct dn_profile {
int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
};
-
+#ifdef NEW_AQM
+/* Extra parameters for AQM and scheduler.
+ * This struct is used to pass and retrieve parameters (configurations)
+ * to/from AQM and Scheduler.
+ */
+struct dn_extra_parms {
+ struct dn_id oid;
+ char name[16];
+ uint32_t nr;
+#define DN_MAX_EXTRA_PARM 10
+ int64_t par[DN_MAX_EXTRA_PARM];
+};
+#endif
/*
* Overall structure of dummynet
diff --git a/freebsd/sys/netinet/ip_ecn.h b/freebsd/sys/netinet/ip_ecn.h
index 6a814160..c5c1c4eb 100644
--- a/freebsd/sys/netinet/ip_ecn.h
+++ b/freebsd/sys/netinet/ip_ecn.h
@@ -38,10 +38,6 @@
#ifndef _NETINET_IP_ECN_H_
#define _NETINET_IP_ECN_H_
-#if defined(_KERNEL) && !defined(_LKM)
-#include <rtems/bsd/local/opt_inet.h>
-#endif
-
#define ECN_ALLOWED 1 /* ECN allowed */
#define ECN_FORBIDDEN 0 /* ECN forbidden */
#define ECN_NOCARE (-1) /* no consideration to ECN */
diff --git a/freebsd/sys/netinet/ip_encap.c b/freebsd/sys/netinet/ip_encap.c
index 14f8cd51..19ff1a09 100644
--- a/freebsd/sys/netinet/ip_encap.c
+++ b/freebsd/sys/netinet/ip_encap.c
@@ -67,6 +67,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mutex.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
@@ -86,7 +88,6 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
-#include <netinet6/ip6protosw.h>
#endif
#include <machine/stdarg.h>
@@ -98,14 +99,14 @@ static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address struct
static void encap_add(struct encaptab *);
static int mask_match(const struct encaptab *, const struct sockaddr *,
const struct sockaddr *);
-static void encap_fillarg(struct mbuf *, const struct encaptab *);
+static void encap_fillarg(struct mbuf *, void *);
/*
* All global variables in ip_encap.c are locked using encapmtx.
*/
static struct mtx encapmtx;
MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF);
-LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab);
+static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab);
/*
* We currently keey encap_init() for source code compatibility reasons --
@@ -117,18 +118,20 @@ encap_init(void)
}
#ifdef INET
-void
-encap4_input(struct mbuf *m, int off)
+int
+encap4_input(struct mbuf **mp, int *offp, int proto)
{
struct ip *ip;
- int proto;
+ struct mbuf *m;
struct sockaddr_in s, d;
const struct protosw *psw;
struct encaptab *ep, *match;
- int prio, matchprio;
+ void *arg;
+ int matchprio, off, prio;
+ m = *mp;
+ off = *offp;
ip = mtod(m, struct ip *);
- proto = ip->ip_p;
bzero(&s, sizeof(s));
s.sin_family = AF_INET;
@@ -139,6 +142,8 @@ encap4_input(struct mbuf *m, int off)
d.sin_len = sizeof(struct sockaddr_in);
d.sin_addr = ip->ip_dst;
+ arg = NULL;
+ psw = NULL;
match = NULL;
matchprio = 0;
mtx_lock(&encapmtx);
@@ -183,21 +188,24 @@ encap4_input(struct mbuf *m, int off)
match = ep;
}
}
+ if (match != NULL) {
+ psw = match->psw;
+ arg = match->arg;
+ }
mtx_unlock(&encapmtx);
- if (match) {
+ if (match != NULL) {
/* found a match, "match" has the best one */
- psw = match->psw;
- if (psw && psw->pr_input) {
- encap_fillarg(m, match);
- (*psw->pr_input)(m, off);
+ if (psw != NULL && psw->pr_input != NULL) {
+ encap_fillarg(m, arg);
+ (*psw->pr_input)(mp, offp, proto);
} else
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* last resort: inject to raw socket */
- rip_input(m, off);
+ return (rip_input(mp, offp, proto));
}
#endif
@@ -208,8 +216,9 @@ encap6_input(struct mbuf **mp, int *offp, int proto)
struct mbuf *m = *mp;
struct ip6_hdr *ip6;
struct sockaddr_in6 s, d;
- const struct ip6protosw *psw;
+ const struct protosw *psw;
struct encaptab *ep, *match;
+ void *arg;
int prio, matchprio;
ip6 = mtod(m, struct ip6_hdr *);
@@ -223,6 +232,8 @@ encap6_input(struct mbuf **mp, int *offp, int proto)
d.sin6_len = sizeof(struct sockaddr_in6);
d.sin6_addr = ip6->ip6_dst;
+ arg = NULL;
+ psw = NULL;
match = NULL;
matchprio = 0;
mtx_lock(&encapmtx);
@@ -250,17 +261,20 @@ encap6_input(struct mbuf **mp, int *offp, int proto)
match = ep;
}
}
+ if (match != NULL) {
+ psw = match->psw;
+ arg = match->arg;
+ }
mtx_unlock(&encapmtx);
- if (match) {
+ if (match != NULL) {
/* found a match */
- psw = (const struct ip6protosw *)match->psw;
- if (psw && psw->pr_input) {
- encap_fillarg(m, match);
+ if (psw != NULL && psw->pr_input != NULL) {
+ encap_fillarg(m, arg);
return (*psw->pr_input)(mp, offp, proto);
} else {
m_freem(m);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
}
@@ -439,14 +453,16 @@ mask_match(const struct encaptab *ep, const struct sockaddr *sp,
}
static void
-encap_fillarg(struct mbuf *m, const struct encaptab *ep)
+encap_fillarg(struct mbuf *m, void *arg)
{
struct m_tag *tag;
- tag = m_tag_get(PACKET_TAG_ENCAP, sizeof (void*), M_NOWAIT);
- if (tag) {
- *(void**)(tag+1) = ep->arg;
- m_tag_prepend(m, tag);
+ if (arg != NULL) {
+ tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT);
+ if (tag != NULL) {
+ *(void**)(tag+1) = arg;
+ m_tag_prepend(m, tag);
+ }
}
}
diff --git a/freebsd/sys/netinet/ip_encap.h b/freebsd/sys/netinet/ip_encap.h
index 3b1a5aee..0b8dbd6f 100644
--- a/freebsd/sys/netinet/ip_encap.h
+++ b/freebsd/sys/netinet/ip_encap.h
@@ -49,7 +49,7 @@ struct encaptab {
};
void encap_init(void);
-void encap4_input(struct mbuf *, int);
+int encap4_input(struct mbuf **, int *, int);
int encap6_input(struct mbuf **, int *, int);
const struct encaptab *encap_attach(int, int, const struct sockaddr *,
const struct sockaddr *, const struct sockaddr *,
diff --git a/freebsd/sys/netinet/ip_fastfwd.c b/freebsd/sys/netinet/ip_fastfwd.c
index 863b9a16..19dfb1ab 100644
--- a/freebsd/sys/netinet/ip_fastfwd.c
+++ b/freebsd/sys/netinet/ip_fastfwd.c
@@ -78,7 +78,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipstealth.h>
#include <rtems/bsd/sys/param.h>
@@ -87,6 +86,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
@@ -99,6 +99,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -108,12 +109,6 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h>
-static VNET_DEFINE(int, ipfastforward_active);
-#define V_ipfastforward_active VNET(ipfastforward_active)
-
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
- &VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding");
-
static struct sockaddr_in *
ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
{
@@ -158,7 +153,7 @@ ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
* to ip_input for full processing.
*/
struct mbuf *
-ip_fastforward(struct mbuf *m)
+ip_tryforward(struct mbuf *m)
{
struct ip *ip;
struct mbuf *m0 = NULL;
@@ -166,119 +161,20 @@ ip_fastforward(struct mbuf *m)
struct sockaddr_in *dst = NULL;
struct ifnet *ifp;
struct in_addr odest, dest;
- u_short sum, ip_len;
+ uint16_t ip_len, ip_off;
int error = 0;
- int hlen, mtu;
+ int mtu;
struct m_tag *fwd_tag = NULL;
/*
* Are we active and forwarding packets?
*/
- if (!V_ipfastforward_active || !V_ipforwarding)
- return m;
M_ASSERTVALID(m);
M_ASSERTPKTHDR(m);
bzero(&ro, sizeof(ro));
- /*
- * Step 1: check for packet drop conditions (and sanity checks)
- */
-
- /*
- * Is entire packet big enough?
- */
- if (m->m_pkthdr.len < sizeof(struct ip)) {
- IPSTAT_INC(ips_tooshort);
- goto drop;
- }
-
- /*
- * Is first mbuf large enough for ip header and is header present?
- */
- if (m->m_len < sizeof (struct ip) &&
- (m = m_pullup(m, sizeof (struct ip))) == NULL) {
- IPSTAT_INC(ips_toosmall);
- return NULL; /* mbuf already free'd */
- }
-
- ip = mtod(m, struct ip *);
-
- /*
- * Is it IPv4?
- */
- if (ip->ip_v != IPVERSION) {
- IPSTAT_INC(ips_badvers);
- goto drop;
- }
-
- /*
- * Is IP header length correct and is it in first mbuf?
- */
- hlen = ip->ip_hl << 2;
- if (hlen < sizeof(struct ip)) { /* minimum header length */
- IPSTAT_INC(ips_badhlen);
- goto drop;
- }
- if (hlen > m->m_len) {
- if ((m = m_pullup(m, hlen)) == NULL) {
- IPSTAT_INC(ips_badhlen);
- return NULL; /* mbuf already free'd */
- }
- ip = mtod(m, struct ip *);
- }
-
- /*
- * Checksum correct?
- */
- if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)
- sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
- else {
- if (hlen == sizeof(struct ip))
- sum = in_cksum_hdr(ip);
- else
- sum = in_cksum(m, hlen);
- }
- if (sum) {
- IPSTAT_INC(ips_badsum);
- goto drop;
- }
-
- /*
- * Remember that we have checked the IP header and found it valid.
- */
- m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
-
- ip_len = ntohs(ip->ip_len);
-
- /*
- * Is IP length longer than packet we have got?
- */
- if (m->m_pkthdr.len < ip_len) {
- IPSTAT_INC(ips_tooshort);
- goto drop;
- }
-
- /*
- * Is packet longer than IP header tells us? If yes, truncate packet.
- */
- if (m->m_pkthdr.len > ip_len) {
- if (m->m_len == m->m_pkthdr.len) {
- m->m_len = ip_len;
- m->m_pkthdr.len = ip_len;
- } else
- m_adj(m, ip_len - m->m_pkthdr.len);
- }
-
- /*
- * Is packet from or to 127/8?
- */
- if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
- (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
- IPSTAT_INC(ips_badaddr);
- goto drop;
- }
#ifdef ALTQ
/*
@@ -289,16 +185,14 @@ ip_fastforward(struct mbuf *m)
#endif
/*
- * Step 2: fallback conditions to normal ip_input path processing
- */
-
- /*
* Only IP packets without options
*/
+ ip = mtod(m, struct ip *);
+
if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
- if (ip_doopts == 1)
+ if (V_ip_doopts == 1)
return m;
- else if (ip_doopts == 2) {
+ else if (V_ip_doopts == 2) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
0, 0);
return NULL; /* mbuf already free'd */
@@ -312,7 +206,7 @@ ip_fastforward(struct mbuf *m)
*
* XXX: Probably some of these checks could be direct drop
* conditions. However it is not clear whether there are some
- * hacks or obscure behaviours which make it neccessary to
+ * hacks or obscure behaviours which make it necessary to
* let ip_input handle it. We play safe here and let ip_input
* deal with it until it is proven that we can directly drop it.
*/
@@ -340,12 +234,6 @@ ip_fastforward(struct mbuf *m)
* Step 3: incoming packet firewall processing
*/
- /*
- * Convert to host representation
- */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
/*
@@ -464,8 +352,6 @@ passin:
forwardlocal:
/*
* Return packet for processing by ip_input().
- * Keep host byte order as expected at ip_input's
- * "ours"-label.
*/
m->m_flags |= M_FASTFWD_OURS;
if (ro.ro_rt)
@@ -491,29 +377,18 @@ passout:
/*
* Step 6: send off the packet
*/
+ ip_len = ntohs(ip->ip_len);
+ ip_off = ntohs(ip->ip_off);
/*
* Check if route is dampned (when ARP is unable to resolve)
*/
if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
- (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
- time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
+ (ro.ro_rt->rt_expire == 0 || time_uptime < ro.ro_rt->rt_expire)) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
goto consumed;
}
-#ifndef ALTQ
- /*
- * Check if there is enough space in the interface queue
- */
- if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
- ifp->if_snd.ifq_maxlen) {
- IPSTAT_INC(ips_odropped);
- /* would send source quench here but that is depreciated */
- goto drop;
- }
-#endif
-
/*
* Check if media link state of interface is not down
*/
@@ -525,28 +400,27 @@ passout:
/*
* Check if packet fits MTU or if hardware will fragment for us
*/
- if (ro.ro_rt->rt_rmx.rmx_mtu)
- mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+ if (ro.ro_rt->rt_mtu)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
else
mtu = ifp->if_mtu;
- if (ip->ip_len <= mtu ||
- (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
+ if (ip_len <= mtu) {
/*
- * Restore packet header fields to original values
+ * Avoid confusing lower layers.
*/
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
+ m_clrprotoflags(m);
/*
* Send off the packet via outgoing interface
*/
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, &ro);
} else {
/*
* Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
*/
- if (ip->ip_off & IP_DF) {
+ if (ip_off & IP_DF) {
IPSTAT_INC(ips_cantfrag);
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
0, mtu);
@@ -556,14 +430,8 @@ passout:
* We have to fragment the packet
*/
m->m_pkthdr.csum_flags |= CSUM_IP;
- /*
- * ip_fragment expects ip_len and ip_off in host byte
- * order but returns all packets in network byte order
- */
- if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
- (~ifp->if_hwassist & CSUM_DELAY_IP))) {
+ if (ip_fragment(ip, &m, mtu, ifp->if_hwassist))
goto drop;
- }
KASSERT(m != NULL, ("null mbuf and no error"));
/*
* Send off the fragments via outgoing interface
@@ -572,7 +440,12 @@ passout:
do {
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
+ /*
+ * Avoid confusing lower layers.
+ */
+ m_clrprotoflags(m);
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, &ro);
if (error)
@@ -592,7 +465,7 @@ passout:
if (error != 0)
IPSTAT_INC(ips_odropped);
else {
- ro.ro_rt->rt_rmx.rmx_pksent++;
+ counter_u64_add(ro.ro_rt->rt_pksent, 1);
IPSTAT_INC(ips_forward);
IPSTAT_INC(ips_fastforward);
}
diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h
index 14b08f5e..d274ab27 100644
--- a/freebsd/sys/netinet/ip_fw.h
+++ b/freebsd/sys/netinet/ip_fw.h
@@ -36,25 +36,31 @@
*/
#define IPFW_DEFAULT_RULE 65535
+#define RESVD_SET 31 /*set for default and persistent rules*/
+#define IPFW_MAX_SETS 32 /* Number of sets supported by ipfw*/
+
/*
- * Default number of ipfw tables.
+ * Compat values for old clients
*/
+#ifndef _KERNEL
#define IPFW_TABLES_MAX 65535
#define IPFW_TABLES_DEFAULT 128
+#endif
/*
* Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit
- * argument between 1 and 65534. The value 0 is unused, the value
- * 65535 (IP_FW_TABLEARG) is used to represent 'tablearg', i.e. the
- * can be 1..65534, or 65535 to indicate the use of a 'tablearg'
+ * argument between 1 and 65534. The value 0 (IP_FW_TARG) is used
+ * to represent 'tablearg' value, e.g. indicate the use of a 'tablearg'
* result of the most recent table() lookup.
* Note that 16bit is only a historical limit, resulting from
* the use of a 16-bit fields for that value. In reality, we can have
- * 2^32 pipes, queues, tag values and so on, and use 0 as a tablearg.
+ * 2^32 pipes, queues, tag values and so on.
*/
#define IPFW_ARG_MIN 1
#define IPFW_ARG_MAX 65534
-#define IP_FW_TABLEARG 65535 /* XXX should use 0 */
+#define IP_FW_TABLEARG 65535 /* Compat value for old clients */
+#define IP_FW_TARG 0 /* Current tablearg value */
+#define IP_FW_NAT44_GLOBAL 65535 /* arg1 value for "nat global" */
/*
* Number of entries in the call stack of the call/return commands.
@@ -65,15 +71,66 @@
/* IP_FW3 header/opcodes */
typedef struct _ip_fw3_opheader {
uint16_t opcode; /* Operation opcode */
- uint16_t reserved[3]; /* Align to 64-bit boundary */
+ uint16_t version; /* Opcode version */
+ uint16_t reserved[2]; /* Align to 64-bit boundary */
} ip_fw3_opheader;
-
-/* IPFW extented tables support */
+/* IP_FW3 opcodes */
#define IP_FW_TABLE_XADD 86 /* add entry */
#define IP_FW_TABLE_XDEL 87 /* delete entry */
-#define IP_FW_TABLE_XGETSIZE 88 /* get table size */
+#define IP_FW_TABLE_XGETSIZE 88 /* get table size (deprecated) */
#define IP_FW_TABLE_XLIST 89 /* list table contents */
+#define IP_FW_TABLE_XDESTROY 90 /* destroy table */
+#define IP_FW_TABLES_XLIST 92 /* list all tables */
+#define IP_FW_TABLE_XINFO 93 /* request info for one table */
+#define IP_FW_TABLE_XFLUSH 94 /* flush table data */
+#define IP_FW_TABLE_XCREATE 95 /* create new table */
+#define IP_FW_TABLE_XMODIFY 96 /* modify existing table */
+#define IP_FW_XGET 97 /* Retrieve configuration */
+#define IP_FW_XADD 98 /* add rule */
+#define IP_FW_XDEL 99 /* del rule */
+#define IP_FW_XMOVE 100 /* move rules to different set */
+#define IP_FW_XZERO 101 /* clear accounting */
+#define IP_FW_XRESETLOG 102 /* zero rules logs */
+#define IP_FW_SET_SWAP 103 /* Swap between 2 sets */
+#define IP_FW_SET_MOVE 104 /* Move one set to another one */
+#define IP_FW_SET_ENABLE 105 /* Enable/disable sets */
+#define IP_FW_TABLE_XFIND 106 /* finds an entry */
+#define IP_FW_XIFLIST 107 /* list tracked interfaces */
+#define IP_FW_TABLES_ALIST 108 /* list table algorithms */
+#define IP_FW_TABLE_XSWAP 109 /* swap two tables */
+#define IP_FW_TABLE_VLIST 110 /* dump table value hash */
+
+#define IP_FW_NAT44_XCONFIG 111 /* Create/modify NAT44 instance */
+#define IP_FW_NAT44_DESTROY 112 /* Destroys NAT44 instance */
+#define IP_FW_NAT44_XGETCONFIG 113 /* Get NAT44 instance config */
+#define IP_FW_NAT44_LIST_NAT 114 /* List all NAT44 instances */
+#define IP_FW_NAT44_XGETLOG 115 /* Get log from NAT44 instance */
+
+#define IP_FW_DUMP_SOPTCODES 116 /* Dump available sopts/versions */
+#define IP_FW_DUMP_SRVOBJECTS 117 /* Dump existing named objects */
+
+#define IP_FW_NAT64STL_CREATE 130 /* Create stateless NAT64 instance */
+#define IP_FW_NAT64STL_DESTROY 131 /* Destroy stateless NAT64 instance */
+#define IP_FW_NAT64STL_CONFIG 132 /* Modify stateless NAT64 instance */
+#define IP_FW_NAT64STL_LIST 133 /* List stateless NAT64 instances */
+#define IP_FW_NAT64STL_STATS 134 /* Get NAT64STL instance statistics */
+#define IP_FW_NAT64STL_RESET_STATS 135 /* Reset NAT64STL instance statistics */
+
+#define IP_FW_NAT64LSN_CREATE 140 /* Create stateful NAT64 instance */
+#define IP_FW_NAT64LSN_DESTROY 141 /* Destroy stateful NAT64 instance */
+#define IP_FW_NAT64LSN_CONFIG 142 /* Modify stateful NAT64 instance */
+#define IP_FW_NAT64LSN_LIST 143 /* List stateful NAT64 instances */
+#define IP_FW_NAT64LSN_STATS 144 /* Get NAT64LSN instance statistics */
+#define IP_FW_NAT64LSN_LIST_STATES 145 /* Get stateful NAT64 states */
+#define IP_FW_NAT64LSN_RESET_STATS 146 /* Reset NAT64LSN instance statistics */
+
+#define IP_FW_NPTV6_CREATE 150 /* Create NPTv6 instance */
+#define IP_FW_NPTV6_DESTROY 151 /* Destroy NPTv6 instance */
+#define IP_FW_NPTV6_CONFIG 152 /* Modify NPTv6 instance */
+#define IP_FW_NPTV6_LIST 153 /* List NPTv6 instances */
+#define IP_FW_NPTV6_STATS 154 /* Get NPTv6 instance statistics */
+#define IP_FW_NPTV6_RESET_STATS 155 /* Reset NPTv6 instance statistics */
/*
* The kernel representation of ipfw rules is made of a list of
@@ -220,11 +277,14 @@ enum ipfw_opcodes { /* arguments (4 byte each) */
O_DSCP, /* 2 u32 = DSCP mask */
O_SETDSCP, /* arg1=DSCP value */
+ O_IP_FLOW_LOOKUP, /* arg1=table number, u32=value */
+
+ O_EXTERNAL_ACTION, /* arg1=id of external action handler */
+ O_EXTERNAL_INSTANCE, /* arg1=id of eaction handler instance */
O_LAST_OPCODE /* not an opcode! */
};
-
/*
* The extension header are filtered only for presence using a bit
* vector with a flag for each header.
@@ -341,6 +401,7 @@ typedef struct _ipfw_insn_if {
union {
struct in_addr ip;
int glob;
+ uint16_t kidx;
} p;
char name[IFNAMSIZ];
} ipfw_insn_if;
@@ -377,6 +438,8 @@ typedef struct _ipfw_insn_log {
u_int32_t log_left; /* how many left to log */
} ipfw_insn_log;
+/* Legacy NAT structures, compat only */
+#ifndef _KERNEL
/*
* Data structures required by both ipfw(8) and ipfw(4) but not part of the
* management API are protected by IPFW_INTERNAL.
@@ -438,6 +501,44 @@ struct cfg_nat {
#define SOF_REDIR sizeof(struct cfg_redir)
#define SOF_SPOOL sizeof(struct cfg_spool)
+#endif /* ifndef _KERNEL */
+
+
+struct nat44_cfg_spool {
+ struct in_addr addr;
+ uint16_t port;
+ uint16_t spare;
+};
+#define NAT44_REDIR_ADDR 0x01
+#define NAT44_REDIR_PORT 0x02
+#define NAT44_REDIR_PROTO 0x04
+
+/* Nat redirect configuration. */
+struct nat44_cfg_redir {
+ struct in_addr laddr; /* local ip address */
+ struct in_addr paddr; /* public ip address */
+ struct in_addr raddr; /* remote ip address */
+ uint16_t lport; /* local port */
+ uint16_t pport; /* public port */
+ uint16_t rport; /* remote port */
+ uint16_t pport_cnt; /* number of public ports */
+ uint16_t rport_cnt; /* number of remote ports */
+ uint16_t mode; /* type of redirect mode */
+ uint16_t spool_cnt; /* num of entry in spool chain */
+ uint16_t spare;
+ uint32_t proto; /* protocol: tcp/udp */
+};
+
+/* Nat configuration data struct. */
+struct nat44_cfg_nat {
+ char name[64]; /* nat name */
+ char if_name[64]; /* interface name */
+ uint32_t size; /* structure size incl. redirs */
+ struct in_addr ip; /* nat IPv4 address */
+ uint32_t mode; /* aliasing mode */
+ uint32_t redir_cnt; /* number of entry in spool chain */
+};
+
/* Nat command. */
typedef struct _ipfw_insn_nat {
ipfw_insn o;
@@ -471,15 +572,17 @@ typedef struct _ipfw_insn_icmp6 {
/*
* Here we have the structure representing an ipfw rule.
*
- * It starts with a general area (with link fields and counters)
- * followed by an array of one or more instructions, which the code
- * accesses as an array of 32-bit values.
- *
- * Given a rule pointer r:
+ * Layout:
+ * struct ip_fw_rule
+ * [ counter block, size = rule->cntr_len ]
+ * [ one or more instructions, size = rule->cmd_len * 4 ]
*
- * r->cmd is the start of the first instruction.
- * ACTION_PTR(r) is the start of the first action (things to do
- * once a rule matched).
+ * It starts with a general area (with link fields).
+ * Counter block may be next (if rule->cntr_len > 0),
+ * followed by an array of one or more instructions, which the code
+ * accesses as an array of 32-bit values. rule->cmd_len represents
+ * the total instructions legth in u32 worrd, while act_ofs represents
+ * rule action offset in u32 words.
*
* When assembling instruction, remember the following:
*
@@ -490,11 +593,41 @@ typedef struct _ipfw_insn_icmp6 {
* + if a rule has an "altq" option, it comes after "log"
* + if a rule has an O_TAG option, it comes after "log" and "altq"
*
- * NOTE: we use a simple linked list of rules because we never need
- * to delete a rule without scanning the list. We do not use
- * queue(3) macros for portability and readability.
+ *
+ * All structures (excluding instructions) are u64-aligned.
+ * Please keep this.
*/
+struct ip_fw_rule {
+ uint16_t act_ofs; /* offset of action in 32-bit units */
+ uint16_t cmd_len; /* # of 32-bit words in cmd */
+ uint16_t spare;
+ uint8_t set; /* rule set (0..31) */
+ uint8_t flags; /* rule flags */
+ uint32_t rulenum; /* rule number */
+ uint32_t id; /* rule id */
+
+ ipfw_insn cmd[1]; /* storage for commands */
+};
+#define IPFW_RULE_NOOPT 0x01 /* Has no options in body */
+
+/* Unaligned version */
+
+/* Base ipfw rule counter block. */
+struct ip_fw_bcounter {
+ uint16_t size; /* Size of counter block, bytes */
+ uint8_t flags; /* flags for given block */
+ uint8_t spare;
+ uint32_t timestamp; /* tv_sec of last match */
+ uint64_t pcnt; /* Packet counter */
+ uint64_t bcnt; /* Byte counter */
+};
+
+
+#ifndef _KERNEL
+/*
+ * Legacy rule format
+ */
struct ip_fw {
struct ip_fw *x_next; /* linked list of rules */
struct ip_fw *next_rule; /* ptr to next [skipto] rule */
@@ -503,8 +636,7 @@ struct ip_fw {
uint16_t act_ofs; /* offset of action in 32-bit units */
uint16_t cmd_len; /* # of 32-bit words in cmd */
uint16_t rulenum; /* rule number */
- uint8_t set; /* rule set (0..31) */
-#define RESVD_SET 31 /* set for default and persistent rules */
+ uint8_t set; /* rule set (0..31) */
uint8_t _pad; /* padding */
uint32_t id; /* rule id */
@@ -515,12 +647,13 @@ struct ip_fw {
ipfw_insn cmd[1]; /* storage for commands */
};
+#endif
#define ACTION_PTR(rule) \
(ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) )
-#define RULESIZE(rule) (sizeof(struct ip_fw) + \
- ((struct ip_fw *)(rule))->cmd_len * 4 - 4)
+#define RULESIZE(rule) (sizeof(*(rule)) + (rule)->cmd_len * 4 - 4)
+
#if 1 // should be moved to in.h
/*
@@ -572,7 +705,8 @@ struct _ipfw_dyn_rule {
/* to generate keepalives) */
u_int16_t dyn_type; /* rule type */
u_int16_t count; /* refcount */
-};
+ u_int16_t kidx; /* index of named object */
+} __packed __aligned(8);
/*
* Definitions for IP option names.
@@ -598,9 +732,27 @@ struct _ipfw_dyn_rule {
* These are used for lookup tables.
*/
-#define IPFW_TABLE_CIDR 1 /* Table for holding IPv4/IPv6 prefixes */
+#define IPFW_TABLE_ADDR 1 /* Table for holding IPv4/IPv6 prefixes */
#define IPFW_TABLE_INTERFACE 2 /* Table for holding interface names */
-#define IPFW_TABLE_MAXTYPE 2 /* Maximum valid number */
+#define IPFW_TABLE_NUMBER 3 /* Table for holding ports/uid/gid/etc */
+#define IPFW_TABLE_FLOW 4 /* Table for holding flow data */
+#define IPFW_TABLE_MAXTYPE 4 /* Maximum valid number */
+
+#define IPFW_TABLE_CIDR IPFW_TABLE_ADDR /* compat */
+
+/* Value types */
+#define IPFW_VTYPE_LEGACY 0xFFFFFFFF /* All data is filled in */
+#define IPFW_VTYPE_SKIPTO 0x00000001 /* skipto/call/callreturn */
+#define IPFW_VTYPE_PIPE 0x00000002 /* pipe/queue */
+#define IPFW_VTYPE_FIB 0x00000004 /* setfib */
+#define IPFW_VTYPE_NAT 0x00000008 /* nat */
+#define IPFW_VTYPE_DSCP 0x00000010 /* dscp */
+#define IPFW_VTYPE_TAG 0x00000020 /* tag/untag */
+#define IPFW_VTYPE_DIVERT 0x00000040 /* divert/tee */
+#define IPFW_VTYPE_NETGRAPH 0x00000080 /* netgraph/ngtee */
+#define IPFW_VTYPE_LIMIT 0x00000100 /* limit */
+#define IPFW_VTYPE_NH4 0x00000200 /* IPv4 nexthop */
+#define IPFW_VTYPE_NH6 0x00000400 /* IPv6 nexthop */
typedef struct _ipfw_table_entry {
in_addr_t addr; /* network address */
@@ -614,6 +766,7 @@ typedef struct _ipfw_table_xentry {
uint8_t type; /* entry type */
uint8_t masklen; /* mask length */
uint16_t tbl; /* table number */
+ uint16_t flags; /* record flags */
uint32_t value; /* value */
union {
/* Longest field needs to be aligned by 4-byte boundary */
@@ -621,6 +774,7 @@ typedef struct _ipfw_table_xentry {
char iface[IF_NAMESIZE]; /* interface name */
} k;
} ipfw_table_xentry;
+#define IPFW_TCF_INET 0x01 /* CIDR flags: IPv4 record */
typedef struct _ipfw_table {
u_int32_t size; /* size of entries in bytes */
@@ -630,7 +784,7 @@ typedef struct _ipfw_table {
} ipfw_table;
typedef struct _ipfw_xtable {
- ip_fw3_opheader opheader; /* eXtended tables are controlled via IP_FW3 */
+ ip_fw3_opheader opheader; /* IP_FW3 opcode */
uint32_t size; /* size of entries in bytes */
uint32_t cnt; /* # of entries */
uint16_t tbl; /* table number */
@@ -638,4 +792,259 @@ typedef struct _ipfw_xtable {
ipfw_table_xentry xent[0]; /* entries */
} ipfw_xtable;
+typedef struct _ipfw_obj_tlv {
+ uint16_t type; /* TLV type */
+ uint16_t flags; /* TLV-specific flags */
+ uint32_t length; /* Total length, aligned to u64 */
+} ipfw_obj_tlv;
+#define IPFW_TLV_TBL_NAME 1
+#define IPFW_TLV_TBLNAME_LIST 2
+#define IPFW_TLV_RULE_LIST 3
+#define IPFW_TLV_DYNSTATE_LIST 4
+#define IPFW_TLV_TBL_ENT 5
+#define IPFW_TLV_DYN_ENT 6
+#define IPFW_TLV_RULE_ENT 7
+#define IPFW_TLV_TBLENT_LIST 8
+#define IPFW_TLV_RANGE 9
+#define IPFW_TLV_EACTION 10
+#define IPFW_TLV_COUNTERS 11
+#define IPFW_TLV_OBJDATA 12
+#define IPFW_TLV_STATE_NAME 14
+
+#define IPFW_TLV_EACTION_BASE 1000
+#define IPFW_TLV_EACTION_NAME(arg) (IPFW_TLV_EACTION_BASE + (arg))
+
+typedef struct _ipfw_obj_data {
+ ipfw_obj_tlv head;
+ void *data[0];
+} ipfw_obj_data;
+
+/* Object name TLV */
+typedef struct _ipfw_obj_ntlv {
+ ipfw_obj_tlv head; /* TLV header */
+ uint16_t idx; /* Name index */
+ uint8_t set; /* set, if applicable */
+ uint8_t type; /* object type, if applicable */
+ uint32_t spare; /* unused */
+ char name[64]; /* Null-terminated name */
+} ipfw_obj_ntlv;
+
+/* IPv4/IPv6 L4 flow description */
+struct tflow_entry {
+ uint8_t af;
+ uint8_t proto;
+ uint16_t spare;
+ uint16_t sport;
+ uint16_t dport;
+ union {
+ struct {
+ struct in_addr sip;
+ struct in_addr dip;
+ } a4;
+ struct {
+ struct in6_addr sip6;
+ struct in6_addr dip6;
+ } a6;
+ } a;
+};
+
+typedef struct _ipfw_table_value {
+ uint32_t tag; /* O_TAG/O_TAGGED */
+ uint32_t pipe; /* O_PIPE/O_QUEUE */
+ uint16_t divert; /* O_DIVERT/O_TEE */
+ uint16_t skipto; /* skipto, CALLRET */
+ uint32_t netgraph; /* O_NETGRAPH/O_NGTEE */
+ uint32_t fib; /* O_SETFIB */
+ uint32_t nat; /* O_NAT */
+ uint32_t nh4;
+ uint8_t dscp;
+ uint8_t spare0;
+ uint16_t spare1;
+ struct in6_addr nh6;
+ uint32_t limit; /* O_LIMIT */
+ uint32_t zoneid; /* scope zone id for nh6 */
+ uint64_t reserved;
+} ipfw_table_value;
+
+/* Table entry TLV */
+typedef struct _ipfw_obj_tentry {
+ ipfw_obj_tlv head; /* TLV header */
+ uint8_t subtype; /* subtype (IPv4,IPv6) */
+ uint8_t masklen; /* mask length */
+ uint8_t result; /* request result */
+ uint8_t spare0;
+ uint16_t idx; /* Table name index */
+ uint16_t spare1;
+ union {
+ /* Longest field needs to be aligned by 8-byte boundary */
+ struct in_addr addr; /* IPv4 address */
+ uint32_t key; /* uid/gid/port */
+ struct in6_addr addr6; /* IPv6 address */
+ char iface[IF_NAMESIZE]; /* interface name */
+ struct tflow_entry flow;
+ } k;
+ union {
+ ipfw_table_value value; /* value data */
+ uint32_t kidx; /* value kernel index */
+ } v;
+} ipfw_obj_tentry;
+#define IPFW_TF_UPDATE 0x01 /* Update record if exists */
+/* Container TLV */
+#define IPFW_CTF_ATOMIC 0x01 /* Perform atomic operation */
+/* Operation results */
+#define IPFW_TR_IGNORED 0 /* Entry was ignored (rollback) */
+#define IPFW_TR_ADDED 1 /* Entry was successfully added */
+#define IPFW_TR_UPDATED 2 /* Entry was successfully updated*/
+#define IPFW_TR_DELETED 3 /* Entry was successfully deleted*/
+#define IPFW_TR_LIMIT 4 /* Entry was ignored (limit) */
+#define IPFW_TR_NOTFOUND 5 /* Entry was not found */
+#define IPFW_TR_EXISTS 6 /* Entry already exists */
+#define IPFW_TR_ERROR 7 /* Request has failed (unknown) */
+
+typedef struct _ipfw_obj_dyntlv {
+ ipfw_obj_tlv head;
+ ipfw_dyn_rule state;
+} ipfw_obj_dyntlv;
+#define IPFW_DF_LAST 0x01 /* Last state in chain */
+
+/* Containter TLVs */
+typedef struct _ipfw_obj_ctlv {
+ ipfw_obj_tlv head; /* TLV header */
+ uint32_t count; /* Number of sub-TLVs */
+ uint16_t objsize; /* Single object size */
+ uint8_t version; /* TLV version */
+ uint8_t flags; /* TLV-specific flags */
+} ipfw_obj_ctlv;
+
+/* Range TLV */
+typedef struct _ipfw_range_tlv {
+ ipfw_obj_tlv head; /* TLV header */
+ uint32_t flags; /* Range flags */
+ uint16_t start_rule; /* Range start */
+ uint16_t end_rule; /* Range end */
+ uint32_t set; /* Range set to match */
+ uint32_t new_set; /* New set to move/swap to */
+} ipfw_range_tlv;
+#define IPFW_RCFLAG_RANGE 0x01 /* rule range is set */
+#define IPFW_RCFLAG_ALL 0x02 /* match ALL rules */
+#define IPFW_RCFLAG_SET 0x04 /* match rules in given set */
+/* User-settable flags */
+#define IPFW_RCFLAG_USER (IPFW_RCFLAG_RANGE | IPFW_RCFLAG_ALL | \
+ IPFW_RCFLAG_SET)
+/* Internally used flags */
+#define IPFW_RCFLAG_DEFAULT 0x0100 /* Do not skip defaul rule */
+
+typedef struct _ipfw_ta_tinfo {
+ uint32_t flags; /* Format flags */
+ uint32_t spare;
+ uint8_t taclass4; /* algorithm class */
+ uint8_t spare4;
+ uint16_t itemsize4; /* item size in runtime */
+ uint32_t size4; /* runtime structure size */
+ uint32_t count4; /* number of items in runtime */
+ uint8_t taclass6; /* algorithm class */
+ uint8_t spare6;
+ uint16_t itemsize6; /* item size in runtime */
+ uint32_t size6; /* runtime structure size */
+ uint32_t count6; /* number of items in runtime */
+} ipfw_ta_tinfo;
+#define IPFW_TACLASS_HASH 1 /* algo is based on hash */
+#define IPFW_TACLASS_ARRAY 2 /* algo is based on array */
+#define IPFW_TACLASS_RADIX 3 /* algo is based on radix tree */
+
+#define IPFW_TATFLAGS_DATA 0x0001 /* Has data filled in */
+#define IPFW_TATFLAGS_AFDATA 0x0002 /* Separate data per AF */
+#define IPFW_TATFLAGS_AFITEM 0x0004 /* diff. items per AF */
+
+typedef struct _ipfw_xtable_info {
+ uint8_t type; /* table type (addr,iface,..) */
+ uint8_t tflags; /* type flags */
+ uint16_t mflags; /* modification flags */
+ uint16_t flags; /* generic table flags */
+ uint16_t spare[3];
+ uint32_t vmask; /* bitmask with value types */
+ uint32_t set; /* set table is in */
+ uint32_t kidx; /* kernel index */
+ uint32_t refcnt; /* number of references */
+ uint32_t count; /* Number of records */
+ uint32_t size; /* Total size of records(export)*/
+ uint32_t limit; /* Max number of records */
+ char tablename[64]; /* table name */
+ char algoname[64]; /* algorithm name */
+ ipfw_ta_tinfo ta_info; /* additional algo stats */
+} ipfw_xtable_info;
+/* Generic table flags */
+#define IPFW_TGFLAGS_LOCKED 0x01 /* Tables is locked from changes*/
+/* Table type-specific flags */
+#define IPFW_TFFLAG_SRCIP 0x01
+#define IPFW_TFFLAG_DSTIP 0x02
+#define IPFW_TFFLAG_SRCPORT 0x04
+#define IPFW_TFFLAG_DSTPORT 0x08
+#define IPFW_TFFLAG_PROTO 0x10
+/* Table modification flags */
+#define IPFW_TMFLAGS_LIMIT 0x0002 /* Change limit value */
+#define IPFW_TMFLAGS_LOCK 0x0004 /* Change table lock state */
+
+typedef struct _ipfw_iface_info {
+ char ifname[64]; /* interface name */
+ uint32_t ifindex; /* interface index */
+ uint32_t flags; /* flags */
+ uint32_t refcnt; /* number of references */
+ uint32_t gencnt; /* number of changes */
+ uint64_t spare;
+} ipfw_iface_info;
+#define IPFW_IFFLAG_RESOLVED 0x01 /* Interface exists */
+
+typedef struct _ipfw_ta_info {
+ char algoname[64]; /* algorithm name */
+ uint32_t type; /* lookup type */
+ uint32_t flags;
+ uint32_t refcnt;
+ uint32_t spare0;
+ uint64_t spare1;
+} ipfw_ta_info;
+
+typedef struct _ipfw_obj_header {
+ ip_fw3_opheader opheader; /* IP_FW3 opcode */
+ uint32_t spare;
+ uint16_t idx; /* object name index */
+ uint8_t objtype; /* object type */
+ uint8_t objsubtype; /* object subtype */
+ ipfw_obj_ntlv ntlv; /* object name tlv */
+} ipfw_obj_header;
+
+typedef struct _ipfw_obj_lheader {
+ ip_fw3_opheader opheader; /* IP_FW3 opcode */
+ uint32_t set_mask; /* disabled set mask */
+ uint32_t count; /* Total objects count */
+ uint32_t size; /* Total size (incl. header) */
+ uint32_t objsize; /* Size of one object */
+} ipfw_obj_lheader;
+
+#define IPFW_CFG_GET_STATIC 0x01
+#define IPFW_CFG_GET_STATES 0x02
+#define IPFW_CFG_GET_COUNTERS 0x04
+typedef struct _ipfw_cfg_lheader {
+ ip_fw3_opheader opheader; /* IP_FW3 opcode */
+ uint32_t set_mask; /* enabled set mask */
+ uint32_t spare;
+ uint32_t flags; /* Request flags */
+ uint32_t size; /* neded buffer size */
+ uint32_t start_rule;
+ uint32_t end_rule;
+} ipfw_cfg_lheader;
+
+typedef struct _ipfw_range_header {
+ ip_fw3_opheader opheader; /* IP_FW3 opcode */
+ ipfw_range_tlv range;
+} ipfw_range_header;
+
+typedef struct _ipfw_sopt_info {
+ uint16_t opcode;
+ uint8_t version;
+ uint8_t dir;
+ uint8_t spare;
+ uint64_t refcnt;
+} ipfw_sopt_info;
+
#endif /* _IPFW2_H */
diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c
index 9289be96..36d3ed69 100644
--- a/freebsd/sys/netinet/ip_gre.c
+++ b/freebsd/sys/netinet/ip_gre.c
@@ -1,9 +1,8 @@
#include <machine/rtems-bsd-kernel-space.h>
-/* $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -31,19 +30,14 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * deencapsulate tunneled packets and send them on
- * output half is in net/if_gre.[ch]
- * This currently handles IPPROTO_GRE, IPPROTO_MOBILE
+ *
+ * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_atalk.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
@@ -55,285 +49,121 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
-#include <sys/syslog.h>
-#include <net/bpf.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/sysctl.h>
#include <net/ethernet.h>
#include <net/if.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/raw_cb.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
-#ifdef INET
#include <netinet/in.h>
#include <netinet/in_var.h>
-#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/ip_encap.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_gre.h>
-#include <machine/in_cksum.h>
-#else
-#error ip_gre input without IP?
-#endif
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
+#ifdef INET6
+#include <netinet/ip6.h>
#endif
-/* Needs IP headers. */
#include <net/if_gre.h>
-#include <machine/stdarg.h>
-
-#if 1
-void gre_inet_ntoa(struct in_addr in); /* XXX */
-#endif
-
-static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
-
-static struct mbuf *gre_input2(struct mbuf *, int, u_char);
-
-/*
- * De-encapsulate a packet and feed it back through ip input (this
- * routine is called whenever IP gets a packet with proto type
- * IPPROTO_GRE and a local destination address).
- * This really is simple
- */
-void
-gre_input(struct mbuf *m, int off)
+extern struct domain inetdomain;
+static const struct protosw in_gre_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_GRE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = gre_input,
+ .pr_output = rip_output,
+ .pr_ctlinput = rip_ctlinput,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+
+#define GRE_TTL 30
+VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
+#define V_ip_gre_ttl VNET(ip_gre_ttl)
+SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip_gre_ttl), 0, "");
+
+static int
+in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- int proto;
-
- proto = (mtod(m, struct ip *))->ip_p;
+ GRE_RLOCK_TRACKER;
+ struct gre_softc *sc;
+ struct ip *ip;
- m = gre_input2(m, off, proto);
+ sc = (struct gre_softc *)arg;
+ if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+ M_ASSERTPKTHDR(m);
/*
- * If no matching tunnel that is up is found. We inject
- * the mbuf to raw ip socket to see if anyone picks it up.
+ * We expect that payload contains at least IPv4
+ * or IPv6 packet.
*/
- if (m != NULL)
- rip_input(m, off);
-}
-
-/*
- * Decapsulate. Does the real work and is called from gre_input()
- * (above). Returns an mbuf back if packet is not yet processed,
- * and NULL if it needs no further processing. proto is the protocol
- * number of the "calling" foo_input() routine.
- */
-static struct mbuf *
-gre_input2(struct mbuf *m ,int hlen, u_char proto)
-{
- struct greip *gip;
- int isr;
- struct gre_softc *sc;
- u_int16_t flags;
- u_int32_t af;
-
- if ((sc = gre_lookup(m, proto)) == NULL) {
- /* No matching tunnel or tunnel is down. */
- return (m);
- }
-
- if (m->m_len < sizeof(*gip)) {
- m = m_pullup(m, sizeof(*gip));
- if (m == NULL)
- return (NULL);
- }
- gip = mtod(m, struct greip *);
-
- GRE2IFP(sc)->if_ipackets++;
- GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
- switch (proto) {
- case IPPROTO_GRE:
- hlen += sizeof(struct gre_h);
-
- /* process GRE flags as packet can be of variable len */
- flags = ntohs(gip->gi_flags);
-
- /* Checksum & Offset are present */
- if ((flags & GRE_CP) | (flags & GRE_RP))
- hlen += 4;
- /* We don't support routing fields (variable length) */
- if (flags & GRE_RP)
- return (m);
- if (flags & GRE_KP)
- hlen += 4;
- if (flags & GRE_SP)
- hlen += 4;
-
- switch (ntohs(gip->gi_ptype)) { /* ethertypes */
- case WCCP_PROTOCOL_TYPE:
- if (sc->wccp_ver == WCCP_V2)
- hlen += 4;
- /* FALLTHROUGH */
- case ETHERTYPE_IP: /* shouldn't need a schednetisr(), */
- isr = NETISR_IP;/* as we are in ip_input */
- af = AF_INET;
- break;
-#ifdef INET6
- case ETHERTYPE_IPV6:
- isr = NETISR_IPV6;
- af = AF_INET6;
- break;
-#endif
-#ifdef NETATALK
- case ETHERTYPE_ATALK:
- isr = NETISR_ATALK1;
- af = AF_APPLETALK;
- break;
-#endif
- default:
- /* Others not yet supported. */
- return (m);
- }
- break;
- default:
- /* Others not yet supported. */
- return (m);
- }
-
- if (hlen > m->m_pkthdr.len) {
- m_freem(m);
- return (NULL);
- }
- /* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */
- m_adj(m, hlen);
-
- if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
- bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
- }
+ if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip))
+ return (0);
- if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) {
- m_freem(m);
- return(NULL);
- }
-
- m->m_pkthdr.rcvif = GRE2IFP(sc);
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0)
+ goto bad;
- netisr_queue(isr, m);
+ KASSERT(sc->gre_family == AF_INET,
+ ("wrong gre_family: %d", sc->gre_family));
- /* Packet is done, no further processing needed. */
- return (NULL);
+ ip = mtod(m, struct ip *);
+ if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr ||
+ sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr)
+ goto bad;
+
+ GRE_RUNLOCK(sc);
+ return (32 * 2);
+bad:
+ GRE_RUNLOCK(sc);
+ return (0);
}
-/*
- * input routine for IPPRPOTO_MOBILE
- * This is a little bit diffrent from the other modes, as the
- * encapsulating header was not prepended, but instead inserted
- * between IP header and payload
- */
-
-void
-gre_mobile_input(struct mbuf *m, int hlen)
+int
+in_gre_output(struct mbuf *m, int af, int hlen)
{
- struct ip *ip;
- struct mobip_h *mip;
- struct gre_softc *sc;
- int msiz;
-
- if ((sc = gre_lookup(m, IPPROTO_MOBILE)) == NULL) {
- /* No matching tunnel or tunnel is down. */
- m_freem(m);
- return;
- }
-
- if (m->m_len < sizeof(*mip)) {
- m = m_pullup(m, sizeof(*mip));
- if (m == NULL)
- return;
- }
- ip = mtod(m, struct ip *);
- mip = mtod(m, struct mobip_h *);
-
- GRE2IFP(sc)->if_ipackets++;
- GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
- if (ntohs(mip->mh.proto) & MOB_H_SBIT) {
- msiz = MOB_H_SIZ_L;
- mip->mi.ip_src.s_addr = mip->mh.osrc;
- } else
- msiz = MOB_H_SIZ_S;
-
- if (m->m_len < (ip->ip_hl << 2) + msiz) {
- m = m_pullup(m, (ip->ip_hl << 2) + msiz);
- if (m == NULL)
- return;
- ip = mtod(m, struct ip *);
- mip = mtod(m, struct mobip_h *);
- }
-
- mip->mi.ip_dst.s_addr = mip->mh.odst;
- mip->mi.ip_p = (ntohs(mip->mh.proto) >> 8);
-
- if (gre_in_cksum((u_int16_t *)&mip->mh, msiz) != 0) {
- m_freem(m);
- return;
- }
-
- bcopy((caddr_t)(ip) + (ip->ip_hl << 2) + msiz, (caddr_t)(ip) +
- (ip->ip_hl << 2), m->m_len - msiz - (ip->ip_hl << 2));
- m->m_len -= msiz;
- m->m_pkthdr.len -= msiz;
-
- /*
- * On FreeBSD, rip_input() supplies us with ip->ip_len
- * already converted into host byteorder and also decreases
- * it by the lengh of IP header, however, ip_input() expects
- * that this field is in the original format (network byteorder
- * and full size of IP packet), so that adjust accordingly.
- */
- ip->ip_len = htons(ip->ip_len + sizeof(struct ip) - msiz);
-
- ip->ip_sum = 0;
- ip->ip_sum = in_cksum(m, (ip->ip_hl << 2));
-
- if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
- u_int32_t af = AF_INET;
- bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
- }
-
- if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) {
- m_freem(m);
- return;
+ struct greip *gi;
+
+ gi = mtod(m, struct greip *);
+ switch (af) {
+ case AF_INET:
+ /*
+ * gre_transmit() has used M_PREPEND() that doesn't guarantee
+ * m_data is contiguous more than hlen bytes. Use m_copydata()
+ * here to avoid m_pullup().
+ */
+ m_copydata(m, hlen + offsetof(struct ip, ip_tos),
+ sizeof(u_char), &gi->gi_ip.ip_tos);
+ m_copydata(m, hlen + offsetof(struct ip, ip_id),
+ sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ gi->gi_ip.ip_tos = 0; /* XXX */
+ ip_fillid(&gi->gi_ip);
+ break;
+#endif
}
-
- m->m_pkthdr.rcvif = GRE2IFP(sc);
-
- netisr_queue(NETISR_IP, m);
+ gi->gi_ip.ip_ttl = V_ip_gre_ttl;
+ gi->gi_ip.ip_len = htons(m->m_pkthdr.len);
+ return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
}
-/*
- * Find the gre interface associated with our src/dst/proto set.
- *
- * XXXRW: Need some sort of drain/refcount mechanism so that the softc
- * reference remains valid after it's returned from gre_lookup(). Right
- * now, I'm thinking it should be reference-counted with a gre_dropref()
- * when the caller is done with the softc. This is complicated by how
- * to handle destroying the gre softc; probably using a gre_drain() in
- * in_gre.c during destroy.
- */
-static struct gre_softc *
-gre_lookup(struct mbuf *m, u_int8_t proto)
+int
+in_gre_attach(struct gre_softc *sc)
{
- struct ip *ip = mtod(m, struct ip *);
- struct gre_softc *sc;
-
- mtx_lock(&gre_mtx);
- for (sc = LIST_FIRST(&gre_softc_list); sc != NULL;
- sc = LIST_NEXT(sc, sc_list)) {
- if ((sc->g_dst.s_addr == ip->ip_src.s_addr) &&
- (sc->g_src.s_addr == ip->ip_dst.s_addr) &&
- (sc->g_proto == proto) &&
- ((GRE2IFP(sc)->if_flags & IFF_UP) != 0)) {
- mtx_unlock(&gre_mtx);
- return (sc);
- }
- }
- mtx_unlock(&gre_mtx);
- return (NULL);
+ KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
+ sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE,
+ in_gre_encapcheck, &in_gre_protosw, sc);
+ if (sc->gre_ecookie == NULL)
+ return (EEXIST);
+ return (0);
}
diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c
index cd581948..f34cc4bd 100644
--- a/freebsd/sys/netinet/ip_icmp.c
+++ b/freebsd/sys/netinet/ip_icmp.c
@@ -35,7 +35,6 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -44,15 +43,19 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -60,16 +63,13 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_icmp.h>
#include <netinet/ip_var.h>
#include <netinet/ip_options.h>
+#include <netinet/sctp.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
#include <netinet/icmp_var.h>
#ifdef INET
-#ifdef IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
#include <machine/in_cksum.h>
@@ -83,68 +83,79 @@ __FBSDID("$FreeBSD$");
*/
static VNET_DEFINE(int, icmplim) = 200;
#define V_icmplim VNET(icmplim)
-SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmplim), 0,
"Maximum number of ICMP responses per second");
static VNET_DEFINE(int, icmplim_output) = 1;
#define V_icmplim_output VNET(icmplim_output)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmplim_output), 0,
- "Enable rate limiting of ICMP responses");
+ "Enable logging of ICMP response rate limiting");
#ifdef INET
-VNET_DEFINE(struct icmpstat, icmpstat);
-SYSCTL_VNET_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(icmpstat), icmpstat, "");
+VNET_PCPUSTAT_DEFINE(struct icmpstat, icmpstat);
+VNET_PCPUSTAT_SYSINIT(icmpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_icmp, ICMPCTL_STATS, stats, struct icmpstat,
+ icmpstat, "ICMP statistics (struct icmpstat, netinet/icmp_var.h)");
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(icmpstat);
+#endif /* VIMAGE */
static VNET_DEFINE(int, icmpmaskrepl) = 0;
#define V_icmpmaskrepl VNET(icmpmaskrepl)
-SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpmaskrepl), 0,
- "Reply to ICMP Address Mask Request packets.");
+ "Reply to ICMP Address Mask Request packets");
static VNET_DEFINE(u_int, icmpmaskfake) = 0;
#define V_icmpmaskfake VNET(icmpmaskfake)
-SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW,
+SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpmaskfake), 0,
- "Fake reply to ICMP Address Mask Request packets.");
+ "Fake reply to ICMP Address Mask Request packets");
VNET_DEFINE(int, drop_redirect) = 0;
+#define V_drop_redirect VNET(drop_redirect)
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(drop_redirect), 0,
+ "Ignore ICMP redirects");
static VNET_DEFINE(int, log_redirect) = 0;
#define V_log_redirect VNET(log_redirect)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(log_redirect), 0,
"Log ICMP redirects to the console");
static VNET_DEFINE(char, reply_src[IFNAMSIZ]);
#define V_reply_src VNET(reply_src)
-SYSCTL_VNET_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
+SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(reply_src), IFNAMSIZ,
- "icmp reply source for non-local packets.");
+ "ICMP reply source for non-local packets");
static VNET_DEFINE(int, icmp_rfi) = 0;
#define V_icmp_rfi VNET(icmp_rfi)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_rfi), 0,
"ICMP reply from incoming interface for non-local packets");
static VNET_DEFINE(int, icmp_quotelen) = 8;
#define V_icmp_quotelen VNET(icmp_quotelen)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_quotelen), 0,
"Number of bytes from original packet to quote in ICMP reply");
-/*
- * ICMP broadcast echo sysctl
- */
static VNET_DEFINE(int, icmpbmcastecho) = 0;
#define V_icmpbmcastecho VNET(icmpbmcastecho)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmpbmcastecho), 0,
- "");
+ "Reply to multicast ICMP Echo Request and Timestamp packets");
+static VNET_DEFINE(int, icmptstamprepl) = 1;
+#define V_icmptstamprepl VNET(icmptstamprepl)
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW,
+ &VNET_NAME(icmptstamprepl), 0,
+ "Respond to ICMP Timestamp packets");
#ifdef ICMPPRINTFS
int icmpprintfs = 0;
@@ -155,39 +166,6 @@ static void icmp_send(struct mbuf *, struct mbuf *);
extern struct protosw inetsw[];
-static int
-sysctl_net_icmp_drop_redir(SYSCTL_HANDLER_ARGS)
-{
- int error, new;
- int i;
- struct radix_node_head *rnh;
-
- new = V_drop_redirect;
- error = sysctl_handle_int(oidp, &new, 0, req);
- if (error == 0 && req->newptr) {
- new = (new != 0) ? 1 : 0;
-
- if (new == V_drop_redirect)
- return (0);
-
- for (i = 0; i < rt_numfibs; i++) {
- if ((rnh = rt_tables_get_rnh(i, AF_INET)) == NULL)
- continue;
- RADIX_NODE_HEAD_LOCK(rnh);
- in_setmatchfunc(rnh, new);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
-
- V_drop_redirect = new;
- }
-
- return (error);
-}
-
-SYSCTL_VNET_PROC(_net_inet_icmp, OID_AUTO, drop_redirect,
- CTLTYPE_INT|CTLFLAG_RW, 0, 0,
- sysctl_net_icmp_drop_redir, "I", "Ignore ICMP redirects");
-
/*
* Kernel module interface for updating icmpstat. The argument is an index
* into icmpstat treated as an array of u_long. While this encodes the
@@ -199,7 +177,7 @@ void
kmod_icmpstat_inc(int statnum)
{
- (*((u_long *)&V_icmpstat + statnum))++;
+ counter_u64_add(VNET(icmpstat)[statnum], 1);
}
/*
@@ -231,7 +209,7 @@ icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu)
*/
if (n->m_flags & M_DECRYPTED)
goto freeit;
- if (oip->ip_off & ~(IP_MF|IP_DF))
+ if (oip->ip_off & htons(~(IP_MF|IP_DF)))
goto freeit;
if (n->m_flags & (M_BCAST|M_MCAST))
goto freeit;
@@ -247,7 +225,7 @@ icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu)
/*
* Calculate length to quote from original packet and
* prevent the ICMP mbuf from overflowing.
- * Unfortunatly this is non-trivial since ip_forward()
+ * Unfortunately this is non-trivial since ip_forward()
* sends us truncated packets.
*/
nlen = m_length(n, NULL);
@@ -265,25 +243,54 @@ icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu)
tcphlen = th->th_off << 2;
if (tcphlen < sizeof(struct tcphdr))
goto freeit;
- if (oip->ip_len < oiphlen + tcphlen)
+ if (ntohs(oip->ip_len) < oiphlen + tcphlen)
goto freeit;
if (oiphlen + tcphlen > n->m_len && n->m_next == NULL)
goto stdreply;
if (n->m_len < oiphlen + tcphlen &&
((n = m_pullup(n, oiphlen + tcphlen)) == NULL))
goto freeit;
- icmpelen = max(tcphlen, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+ icmpelen = max(tcphlen, min(V_icmp_quotelen,
+ ntohs(oip->ip_len) - oiphlen));
+ } else if (oip->ip_p == IPPROTO_SCTP) {
+ struct sctphdr *sh;
+ struct sctp_chunkhdr *ch;
+
+ if (ntohs(oip->ip_len) < oiphlen + sizeof(struct sctphdr))
+ goto stdreply;
+ if (oiphlen + sizeof(struct sctphdr) > n->m_len &&
+ n->m_next == NULL)
+ goto stdreply;
+ if (n->m_len < oiphlen + sizeof(struct sctphdr) &&
+ (n = m_pullup(n, oiphlen + sizeof(struct sctphdr))) == NULL)
+ goto freeit;
+ icmpelen = max(sizeof(struct sctphdr),
+ min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen));
+ sh = (struct sctphdr *)((caddr_t)oip + oiphlen);
+ if (ntohl(sh->v_tag) == 0 &&
+ ntohs(oip->ip_len) >= oiphlen + sizeof(struct sctphdr) + 8 &&
+ (n->m_len >= oiphlen + sizeof(struct sctphdr) + 8 ||
+ n->m_next != NULL)) {
+ if (n->m_len < oiphlen + sizeof(struct sctphdr) + 8 &&
+ (n = m_pullup(n, oiphlen + sizeof(struct sctphdr) + 8)) == NULL)
+ goto freeit;
+ ch = (struct sctp_chunkhdr *)(sh + 1);
+ if (ch->chunk_type == SCTP_INITIATION) {
+ icmpelen = max(sizeof(struct sctphdr) + 8,
+ min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen));
+ }
+ }
} else
-stdreply: icmpelen = max(8, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen));
icmplen = min(oiphlen + icmpelen, nlen);
if (icmplen < sizeof(struct ip))
goto freeit;
if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
else
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
goto freeit;
#ifdef MAC
@@ -324,8 +331,6 @@ stdreply: icmpelen = max(8, min(V_icmp_quotelen, oip->ip_len - oiphlen));
*/
m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
nip = &icp->icmp_ip;
- nip->ip_len = htons(nip->ip_len);
- nip->ip_off = htons(nip->ip_off);
/*
* Set up ICMP message mbuf and copy old IP header (without options
@@ -340,7 +345,7 @@ stdreply: icmpelen = max(8, min(V_icmp_quotelen, oip->ip_len - oiphlen));
m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
nip = mtod(m, struct ip *);
bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
- nip->ip_len = m->m_len;
+ nip->ip_len = htons(m->m_len);
nip->ip_v = IPVERSION;
nip->ip_hl = 5;
nip->ip_p = IPPROTO_ICMP;
@@ -355,19 +360,22 @@ freeit:
/*
* Process a received ICMP message.
*/
-void
-icmp_input(struct mbuf *m, int off)
+int
+icmp_input(struct mbuf **mp, int *offp, int proto)
{
struct icmp *icp;
struct in_ifaddr *ia;
+ struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
struct sockaddr_in icmpsrc, icmpdst, icmpgw;
- int hlen = off;
- int icmplen = ip->ip_len;
+ int hlen = *offp;
+ int icmplen = ntohs(ip->ip_len) - *offp;
int i, code;
void (*ctlfunc)(int, struct sockaddr *, void *);
int fibnum;
+ *mp = NULL;
+
/*
* Locate icmp structure in mbuf, and check
* that not corrupted and of at least minimum length.
@@ -387,7 +395,7 @@ icmp_input(struct mbuf *m, int off)
i = hlen + min(icmplen, ICMP_ADVLENMIN);
if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
ICMPSTAT_INC(icps_tooshort);
- return;
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
m->m_len -= hlen;
@@ -400,19 +408,6 @@ icmp_input(struct mbuf *m, int off)
m->m_len += hlen;
m->m_data -= hlen;
- if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
- /*
- * Deliver very specific ICMP type only.
- */
- switch (icp->icmp_type) {
- case ICMP_UNREACH:
- case ICMP_TIMXCEED:
- break;
- default:
- goto freeit;
- }
- }
-
#ifdef ICMPPRINTFS
if (icmpprintfs)
printf("icmp_input, type %d code %d\n", icp->icmp_type,
@@ -489,12 +484,6 @@ icmp_input(struct mbuf *m, int off)
if (code > 1)
goto badcode;
code = PRC_PARAMPROB;
- goto deliver;
-
- case ICMP_SOURCEQUENCH:
- if (code)
- goto badcode;
- code = PRC_QUENCH;
deliver:
/*
* Problem with datagram; advise higher level routines.
@@ -504,7 +493,6 @@ icmp_input(struct mbuf *m, int off)
ICMPSTAT_INC(icps_badlen);
goto freeit;
}
- icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
/* Discard ICMP's in response to multicast packets */
if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
goto badcode;
@@ -517,6 +505,23 @@ icmp_input(struct mbuf *m, int off)
* XXX if the packet contains [IPv4 AH TCP], we can't make a
* notification to TCP layer.
*/
+ i = sizeof(struct ip) + min(icmplen, ICMP_ADVLENPREF(icp));
+ ip_stripoptions(m);
+ if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
+ /* This should actually not happen */
+ ICMPSTAT_INC(icps_tooshort);
+ return (IPPROTO_DONE);
+ }
+ ip = mtod(m, struct ip *);
+ icp = (struct icmp *)(ip + 1);
+ /*
+ * The upper layer handler can rely on:
+ * - The outer IP header has no options.
+ * - The outer IP header, the ICMP header, the inner IP header,
+ * and the first n bytes of the inner payload are contiguous.
+ * n is at least 8, but might be larger based on
+ * ICMP_ADVLENPREF. See its definition in ip_icmp.h.
+ */
ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
if (ctlfunc)
(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
@@ -540,6 +545,8 @@ icmp_input(struct mbuf *m, int off)
goto reflect;
case ICMP_TSTAMP:
+ if (V_icmptstamprepl == 0)
+ break;
if (!V_icmpbmcastecho
&& (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
ICMPSTAT_INC(icps_bmcasttstamp);
@@ -597,11 +604,10 @@ icmp_input(struct mbuf *m, int off)
}
ifa_free(&ia->ia_ifa);
reflect:
- ip->ip_len += hlen; /* since ip_input deducts this */
ICMPSTAT_INC(icps_reflect);
ICMPSTAT_INC(icps_outhist[icp->icmp_type]);
icmp_reflect(m);
- return;
+ return (IPPROTO_DONE);
case ICMP_REDIRECT:
if (V_log_redirect) {
@@ -658,9 +664,6 @@ reflect:
(struct sockaddr *)&icmpgw, fibnum);
}
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
-#ifdef IPSEC
- key_sa_routechange((struct sockaddr *)&icmpsrc);
-#endif
break;
/*
@@ -673,16 +676,19 @@ reflect:
case ICMP_TSTAMPREPLY:
case ICMP_IREQREPLY:
case ICMP_MASKREPLY:
+ case ICMP_SOURCEQUENCH:
default:
break;
}
raw:
- rip_input(m, off);
- return;
+ *mp = m;
+ rip_input(mp, offp, proto);
+ return (IPPROTO_DONE);
freeit:
m_freem(m);
+ return (IPPROTO_DONE);
}
/*
@@ -691,12 +697,14 @@ freeit:
static void
icmp_reflect(struct mbuf *m)
{
+ struct rm_priotracker in_ifa_tracker;
struct ip *ip = mtod(m, struct ip *);
struct ifaddr *ifa;
struct ifnet *ifp;
struct in_ifaddr *ia;
struct in_addr t;
- struct mbuf *opts = 0;
+ struct nhop4_extended nh_ext;
+ struct mbuf *opts = NULL;
int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
@@ -707,8 +715,6 @@ icmp_reflect(struct mbuf *m)
goto done; /* Ip_output() will check for broadcast */
}
- m_addr_changed(m);
-
t = ip->ip_dst;
ip->ip_dst = ip->ip_src;
@@ -718,15 +724,15 @@ icmp_reflect(struct mbuf *m)
* If the incoming packet was addressed directly to one of our
* own addresses, use dst as the src for the reply.
*/
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) {
if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) {
t = IA_SIN(ia)->sin_addr;
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
goto match;
}
}
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
/*
* If the incoming packet was addressed to one of our broadcast
@@ -791,14 +797,12 @@ icmp_reflect(struct mbuf *m)
* When we don't have a route back to the packet source, stop here
* and drop the packet.
*/
- ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
- if (ia == NULL) {
+ if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh_ext) != 0) {
m_freem(m);
ICMPSTAT_INC(icps_noroute);
goto done;
}
- t = IA_SIN(ia)->sin_addr;
- ifa_free(&ia->ia_ifa);
+ t = nh_ext.nh_src;
match:
#ifdef MAC
mac_netinet_icmp_replyinplace(m);
@@ -816,8 +820,8 @@ match:
* add on any record-route or timestamp options.
*/
cp = (u_char *) (ip + 1);
- if ((opts = ip_srcroute(m)) == 0 &&
- (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
+ if ((opts = ip_srcroute(m)) == NULL &&
+ (opts = m_gethdr(M_NOWAIT, MT_DATA))) {
opts->m_len = sizeof(struct in_addr);
mtod(opts, struct in_addr *)->s_addr = 0;
}
@@ -865,19 +869,7 @@ match:
printf("%d\n", opts->m_len);
#endif
}
- /*
- * Now strip out original options by copying rest of first
- * mbuf's data back, and adjust the IP length.
- */
- ip->ip_len -= optlen;
- ip->ip_v = IPVERSION;
- ip->ip_hl = 5;
- m->m_len -= optlen;
- if (m->m_flags & M_PKTHDR)
- m->m_pkthdr.len -= optlen;
- optlen += sizeof(struct ip);
- bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
- (unsigned)(m->m_len - sizeof(struct ip)));
+ ip_stripoptions(m);
}
m_tag_delete_nonpersistent(m);
m->m_flags &= ~(M_BCAST|M_MCAST);
@@ -903,7 +895,7 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
m->m_len -= hlen;
icp = mtod(m, struct icmp *);
icp->icmp_cksum = 0;
- icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+ icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
m->m_data -= hlen;
m->m_len += hlen;
m->m_pkthdr.rcvif = (struct ifnet *)0;
@@ -919,7 +911,7 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
}
/*
- * Return milliseconds since 00:00 GMT in network format.
+ * Return milliseconds since 00:00 UTC in network format.
*/
uint32_t
iptime(void)
diff --git a/freebsd/sys/netinet/ip_icmp.h b/freebsd/sys/netinet/ip_icmp.h
index 9cabdb58..64db0064 100644
--- a/freebsd/sys/netinet/ip_icmp.h
+++ b/freebsd/sys/netinet/ip_icmp.h
@@ -99,7 +99,7 @@ struct icmp {
struct id_ts { /* ICMP Timestamp */
/*
* The next 3 fields are in network format,
- * milliseconds since 00:00 GMT
+ * milliseconds since 00:00 UTC
*/
uint32_t its_otime; /* Originate */
uint32_t its_rtime; /* Receive */
@@ -136,6 +136,14 @@ struct icmp {
#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */
#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8)
/* N.B.: must separately check that ip_hl >= 5 */
+ /* This is the minimum length required by RFC 792. */
+/*
+ * ICMP_ADVLENPREF is the preferred number of bytes which should be contiguous.
+ * SCTP needs additional 12 bytes to be able to access the initiate tag
+ * in packets containing an INIT chunk. For also supporting SCTP/UDP,
+ * additional 8 bytes are needed.
+ */
+#define ICMP_ADVLENPREF(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8 + 8 + 12)
/*
* Definition of type and code field values.
@@ -207,7 +215,7 @@ struct icmp {
#ifdef _KERNEL
void icmp_error(struct mbuf *, int, int, uint32_t, int);
-void icmp_input(struct mbuf *, int);
+int icmp_input(struct mbuf **, int *, int);
int ip_next_mtu(int, int);
#endif
diff --git a/freebsd/sys/netinet/ip_id.c b/freebsd/sys/netinet/ip_id.c
index a76c7b78..17352cfb 100644
--- a/freebsd/sys/netinet/ip_id.c
+++ b/freebsd/sys/netinet/ip_id.c
@@ -76,119 +76,149 @@ __FBSDID("$FreeBSD$");
* enabled.
*/
-#include <sys/types.h>
-#include <sys/malloc.h>
#include <rtems/bsd/sys/param.h>
-#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
#include <sys/kernel.h>
-#include <sys/libkern.h>
+#include <sys/malloc.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
#include <sys/random.h>
-#include <sys/systm.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
+#include <sys/bitstring.h>
+
+#include <net/vnet.h>
+
#include <netinet/in.h>
+#include <netinet/ip.h>
#include <netinet/ip_var.h>
-#include <sys/bitstring.h>
+/*
+ * By default we generate IP ID only for non-atomic datagrams, as
+ * suggested by RFC6864. We use per-CPU counter for that, or if
+ * user wants to, we can turn on random ID generation.
+ */
+static VNET_DEFINE(int, ip_rfc6864) = 1;
+static VNET_DEFINE(int, ip_do_randomid) = 0;
+#define V_ip_rfc6864 VNET(ip_rfc6864)
+#define V_ip_do_randomid VNET(ip_do_randomid)
+
+/*
+ * Random ID state engine.
+ */
static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
+static VNET_DEFINE(uint16_t *, id_array);
+static VNET_DEFINE(bitstr_t *, id_bits);
+static VNET_DEFINE(int, array_ptr);
+static VNET_DEFINE(int, array_size);
+static VNET_DEFINE(int, random_id_collisions);
+static VNET_DEFINE(int, random_id_total);
+static VNET_DEFINE(struct mtx, ip_id_mtx);
+#define V_id_array VNET(id_array)
+#define V_id_bits VNET(id_bits)
+#define V_array_ptr VNET(array_ptr)
+#define V_array_size VNET(array_size)
+#define V_random_id_collisions VNET(random_id_collisions)
+#define V_random_id_total VNET(random_id_total)
+#define V_ip_id_mtx VNET(ip_id_mtx)
-static u_int16_t *id_array = NULL;
-static bitstr_t *id_bits = NULL;
-static int array_ptr = 0;
-static int array_size = 8192;
-static int random_id_collisions = 0;
-static int random_id_total = 0;
-static struct mtx ip_id_mtx;
+/*
+ * Non-random ID state engine is simply a per-cpu counter.
+ */
+static VNET_DEFINE(counter_u64_t, ip_id);
+#define V_ip_id VNET(ip_id)
-static void ip_initid(void);
+static int sysctl_ip_randomid(SYSCTL_HANDLER_ARGS);
static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
-
-MTX_SYSINIT(ip_id_mtx, &ip_id_mtx, "ip_id_mtx", MTX_DEF);
+static void ip_initid(int);
+static uint16_t ip_randomid(void);
+static void ipid_sysinit(void);
+static void ipid_sysuninit(void);
SYSCTL_DECL(_net_inet_ip);
-SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period, CTLTYPE_INT|CTLFLAG_RW,
- &array_size, 0, sysctl_ip_id_change, "IU", "IP ID Array size");
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions, CTLFLAG_RD,
- &random_id_collisions, 0, "Count of IP ID collisions");
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD,
- &random_id_total, 0, "Count of IP IDs created");
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id,
+ CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip_do_randomid), 0, sysctl_ip_randomid, "IU",
+ "Assign random ip_id values");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip_rfc6864), 0,
+ "Use constant IP ID for atomic datagrams");
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
+ &VNET_NAME(array_size), 0, sysctl_ip_id_change, "IU", "IP ID Array size");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions,
+ CTLFLAG_RD | CTLFLAG_VNET,
+ &VNET_NAME(random_id_collisions), 0, "Count of IP ID collisions");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD | CTLFLAG_VNET,
+ &VNET_NAME(random_id_total), 0, "Count of IP IDs created");
+
+static int
+sysctl_ip_randomid(SYSCTL_HANDLER_ARGS)
+{
+ int error, new;
+
+ new = V_ip_do_randomid;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+ if (new != 0 && new != 1)
+ return (EINVAL);
+ if (new == V_ip_do_randomid)
+ return (0);
+ if (new == 1 && V_ip_do_randomid == 0)
+ ip_initid(8192);
+ /* We don't free memory when turning random ID off, due to race. */
+ V_ip_do_randomid = new;
+ return (0);
+}
static int
sysctl_ip_id_change(SYSCTL_HANDLER_ARGS)
{
int error, new;
- new = array_size;
+ new = V_array_size;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr) {
- if (new >= 512 && new <= 32768) {
- mtx_lock(&ip_id_mtx);
- array_size = new;
- ip_initid();
- mtx_unlock(&ip_id_mtx);
- } else
+ if (new >= 512 && new <= 32768)
+ ip_initid(new);
+ else
error = EINVAL;
}
return (error);
}
-/*
- * ip_initid() runs with a mutex held and may execute in a network context.
- * As a result, it uses M_NOWAIT. Ideally, we would always do this
- * allocation from the sysctl contact and have it be an invariant that if
- * this random ID allocation mode is selected, the buffers are present. This
- * would also avoid potential network context failures of IP ID generation.
- */
static void
-ip_initid(void)
+ip_initid(int new_size)
{
+ uint16_t *new_array;
+ bitstr_t *new_bits;
- mtx_assert(&ip_id_mtx, MA_OWNED);
+ new_array = malloc(new_size * sizeof(uint16_t), M_IPID,
+ M_WAITOK | M_ZERO);
+ new_bits = malloc(bitstr_size(65536), M_IPID, M_WAITOK | M_ZERO);
- if (id_array != NULL) {
- free(id_array, M_IPID);
- free(id_bits, M_IPID);
- }
- random_id_collisions = 0;
- random_id_total = 0;
- array_ptr = 0;
- id_array = (u_int16_t *) malloc(array_size * sizeof(u_int16_t),
- M_IPID, M_NOWAIT | M_ZERO);
- id_bits = (bitstr_t *) malloc(bitstr_size(65536), M_IPID,
- M_NOWAIT | M_ZERO);
- if (id_array == NULL || id_bits == NULL) {
- /* Neither or both. */
- if (id_array != NULL) {
- free(id_array, M_IPID);
- id_array = NULL;
- }
- if (id_bits != NULL) {
- free(id_bits, M_IPID);
- id_bits = NULL;
- }
+ mtx_lock(&V_ip_id_mtx);
+ if (V_id_array != NULL) {
+ free(V_id_array, M_IPID);
+ free(V_id_bits, M_IPID);
}
+ V_id_array = new_array;
+ V_id_bits = new_bits;
+ V_array_size = new_size;
+ V_array_ptr = 0;
+ V_random_id_collisions = 0;
+ V_random_id_total = 0;
+ mtx_unlock(&V_ip_id_mtx);
}
-u_int16_t
+static uint16_t
ip_randomid(void)
{
- u_int16_t new_id;
-
- mtx_lock(&ip_id_mtx);
- if (id_array == NULL)
- ip_initid();
-
- /*
- * Fail gracefully; return a fixed id if memory allocation failed;
- * ideally we wouldn't do allocation in this context in order to
- * avoid the possibility of this failure mode.
- */
- if (id_array == NULL) {
- mtx_unlock(&ip_id_mtx);
- return (1);
- }
+ uint16_t new_id;
+ mtx_lock(&V_ip_id_mtx);
/*
* To avoid a conflict with the zeros that the array is initially
* filled with, we never hand out an id of zero.
@@ -196,16 +226,76 @@ ip_randomid(void)
new_id = 0;
do {
if (new_id != 0)
- random_id_collisions++;
+ V_random_id_collisions++;
arc4rand(&new_id, sizeof(new_id), 0);
- } while (bit_test(id_bits, new_id) || new_id == 0);
- bit_clear(id_bits, id_array[array_ptr]);
- bit_set(id_bits, new_id);
- id_array[array_ptr] = new_id;
- array_ptr++;
- if (array_ptr == array_size)
- array_ptr = 0;
- random_id_total++;
- mtx_unlock(&ip_id_mtx);
+ } while (bit_test(V_id_bits, new_id) || new_id == 0);
+ bit_clear(V_id_bits, V_id_array[V_array_ptr]);
+ bit_set(V_id_bits, new_id);
+ V_id_array[V_array_ptr] = new_id;
+ V_array_ptr++;
+ if (V_array_ptr == V_array_size)
+ V_array_ptr = 0;
+ V_random_id_total++;
+ mtx_unlock(&V_ip_id_mtx);
return (new_id);
}
+
+void
+ip_fillid(struct ip *ip)
+{
+
+ /*
+ * Per RFC6864 Section 4
+ *
+ * o Atomic datagrams: (DF==1) && (MF==0) && (frag_offset==0)
+ * o Non-atomic datagrams: (DF==0) || (MF==1) || (frag_offset>0)
+ */
+ if (V_ip_rfc6864 && (ip->ip_off & htons(IP_DF)) == htons(IP_DF))
+ ip->ip_id = 0;
+ else if (V_ip_do_randomid)
+ ip->ip_id = ip_randomid();
+ else {
+ counter_u64_add(V_ip_id, 1);
+ /*
+ * There are two issues about this trick, to be kept in mind.
+ * 1) We can migrate between counter_u64_add() and next
+ * line, and grab counter from other CPU, resulting in too
+ * quick ID reuse. This is tolerable in our particular case,
+ * since probability of such event is much lower then reuse
+ * of ID due to legitimate overflow, that at modern Internet
+ * speeds happens all the time.
+ * 2) We are relying on the fact that counter(9) is based on
+ * UMA_ZONE_PCPU uma(9) zone. We also take only last
+ * sixteen bits of a counter, so we don't care about the
+ * fact that machines with 32-bit word update their counters
+ * not atomically.
+ */
+ ip->ip_id = htons((*(uint64_t *)zpcpu_get(V_ip_id)) & 0xffff);
+ }
+}
+
+static void
+ipid_sysinit(void)
+{
+ int i;
+
+ mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF);
+ V_ip_id = counter_u64_alloc(M_WAITOK);
+
+ CPU_FOREACH(i)
+ arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0);
+}
+VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL);
+
+static void
+ipid_sysuninit(void)
+{
+
+ if (V_id_array != NULL) {
+ free(V_id_array, M_IPID);
+ free(V_id_bits, M_IPID);
+ }
+ counter_u64_free(V_ip_id);
+ mtx_destroy(&V_ip_id_mtx);
+}
+VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ipid_sysuninit, NULL);
diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c
index 24002aac..425dbc1f 100644
--- a/freebsd/sys/netinet/ip_input.c
+++ b/freebsd/sys/netinet/ip_input.c
@@ -35,13 +35,14 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_bootp.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipstealth.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/hhook.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/domain.h>
@@ -50,7 +51,9 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/rwlock.h>
+#include <sys/sdt.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
@@ -61,10 +64,11 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/route.h>
#include <net/netisr.h>
+#include <net/rss_config.h>
#include <net/vnet.h>
-#include <net/flowtable.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -77,7 +81,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_carp.h>
#ifdef IPSEC
#include <netinet/ip_ipsec.h>
+#include <netipsec/ipsec.h>
+#include <netipsec/key.h>
#endif /* IPSEC */
+#include <netinet/in_rss.h>
#include <sys/socketvar.h>
@@ -87,39 +94,30 @@ __FBSDID("$FreeBSD$");
CTASSERT(sizeof(struct ip) == 20);
#endif
-struct rwlock in_ifaddr_lock;
-RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
+/* IP reassembly functions are defined in ip_reass.c. */
+extern void ipreass_init(void);
+extern void ipreass_drain(void);
+extern void ipreass_slowtimo(void);
+#ifdef VIMAGE
+extern void ipreass_destroy(void);
+#endif
+
+struct rmlock in_ifaddr_lock;
+RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
VNET_DEFINE(int, rsvp_on);
VNET_DEFINE(int, ipforwarding);
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipforwarding), 0,
"Enable IP forwarding between interfaces");
static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */
#define V_ipsendredirects VNET(ipsendredirects)
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipsendredirects), 0,
"Enable sending IP redirects");
-static VNET_DEFINE(int, ip_keepfaith);
-#define V_ip_keepfaith VNET(ip_keepfaith)
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
- &VNET_NAME(ip_keepfaith), 0,
- "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
-
-static VNET_DEFINE(int, ip_sendsourcequench);
-#define V_ip_sendsourcequench VNET(ip_sendsourcequench)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
- &VNET_NAME(ip_sendsourcequench), 0,
- "Enable the transmission of source quench packets");
-
-VNET_DEFINE(int, ip_do_randomid);
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
- &VNET_NAME(ip_do_randomid), 0,
- "Assign random ip_id values");
-
/*
* XXX - Setting ip_checkinterface mostly implements the receive side of
* the Strong ES model described in RFC 1122, but since the routing table
@@ -135,7 +133,7 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
*/
static VNET_DEFINE(int, ip_checkinterface);
#define V_ip_checkinterface VNET(ip_checkinterface)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_checkinterface), 0,
"Verify packet arrives on correct interface");
@@ -145,8 +143,32 @@ static struct netisr_handler ip_nh = {
.nh_name = "ip",
.nh_handler = ip_input,
.nh_proto = NETISR_IP,
+#ifdef RSS
+ .nh_m2cpuid = rss_soft_m2cpuid_v4,
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_HYBRID,
+#else
.nh_policy = NETISR_POLICY_FLOW,
+#endif
+};
+
+#ifdef RSS
+/*
+ * Directly dispatched frames are currently assumed
+ * to have a flowid already calculated.
+ *
+ * It should likely have something that assert it
+ * actually has valid flow details.
+ */
+static struct netisr_handler ip_direct_nh = {
+ .nh_name = "ip_direct",
+ .nh_handler = ip_direct_input,
+ .nh_proto = NETISR_IP_DIRECT,
+ .nh_m2cpuid = rss_soft_m2cpuid_v4,
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_HYBRID,
};
+#endif
extern struct domain inetdomain;
extern struct protosw inetsw[];
@@ -155,41 +177,6 @@ VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */
VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */
VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */
-VNET_DEFINE(struct ipstat, ipstat);
-SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(ipstat), ipstat,
- "IP statistics (struct ipstat, netinet/ip_var.h)");
-
-static VNET_DEFINE(uma_zone_t, ipq_zone);
-static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
-static struct mtx ipqlock;
-
-#define V_ipq_zone VNET(ipq_zone)
-#define V_ipq VNET(ipq)
-
-#define IPQ_LOCK() mtx_lock(&ipqlock)
-#define IPQ_UNLOCK() mtx_unlock(&ipqlock)
-#define IPQ_LOCK_INIT() mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
-#define IPQ_LOCK_ASSERT() mtx_assert(&ipqlock, MA_OWNED)
-
-static void maxnipq_update(void);
-static void ipq_zone_change(void *);
-static void ip_drain_locked(void);
-
-static VNET_DEFINE(int, maxnipq); /* Administrative limit on # reass queues. */
-static VNET_DEFINE(int, nipq); /* Total # of reass queues */
-#define V_maxnipq VNET(maxnipq)
-#define V_nipq VNET(nipq)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD,
- &VNET_NAME(nipq), 0,
- "Current number of IPv4 fragment reassembly queue entries");
-
-static VNET_DEFINE(int, maxfragsperpacket);
-#define V_maxfragsperpacket VNET(maxfragsperpacket)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
- &VNET_NAME(maxfragsperpacket), 0,
- "Maximum number of IPv4 fragments allowed per packet");
-
#ifdef IPCTL_DEFMTU
SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
&ip_mtu, 0, "Default MTU");
@@ -197,42 +184,39 @@ SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
#ifdef IPSTEALTH
VNET_DEFINE(int, ipstealth);
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ipstealth), 0,
"IP stealth mode, no TTL decrementation on forwarding");
#endif
-#ifdef FLOWTABLE
-static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
-VNET_DEFINE(struct flowtable *, ip_ft);
-#define V_ip_output_flowtable_size VNET(ip_output_flowtable_size)
-
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
- &VNET_NAME(ip_output_flowtable_size), 2048,
- "number of entries in the per-cpu output flow caches");
-#endif
+/*
+ * IP statistics are stored in the "array" of counter(9)s.
+ */
+VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
+VNET_PCPUSTAT_SYSINIT(ipstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
+ "IP statistics (struct ipstat, netinet/ip_var.h)");
-static void ip_freef(struct ipqhead *, struct ipq *);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(ipstat);
+#endif /* VIMAGE */
/*
* Kernel module interface for updating ipstat. The argument is an index
- * into ipstat treated as an array of u_long. While this encodes the general
- * layout of ipstat into the caller, it doesn't encode its location, so that
- * future changes to add, for example, per-CPU stats support won't cause
- * binary compatibility problems for kernel modules.
+ * into ipstat treated as an array.
*/
void
kmod_ipstat_inc(int statnum)
{
- (*((u_long *)&V_ipstat + statnum))++;
+ counter_u64_add(VNET(ipstat)[statnum], 1);
}
void
kmod_ipstat_dec(int statnum)
{
- (*((u_long *)&V_ipstat + statnum))--;
+ counter_u64_add(VNET(ipstat)[statnum], -1);
}
static int
@@ -273,6 +257,46 @@ SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
"Number of packets dropped from the IP input queue");
+#ifdef RSS
+static int
+sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
+{
+ int error, qlimit;
+
+ netisr_getqlimit(&ip_direct_nh, &qlimit);
+ error = sysctl_handle_int(oidp, &qlimit, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qlimit < 1)
+ return (EINVAL);
+ return (netisr_setqlimit(&ip_direct_nh, qlimit));
+}
+SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
+ CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
+ "Maximum size of the IP direct input queue");
+
+static int
+sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
+{
+ u_int64_t qdrops_long;
+ int error, qdrops;
+
+ netisr_getqdrops(&ip_direct_nh, &qdrops_long);
+ qdrops = qdrops_long;
+ error = sysctl_handle_int(oidp, &qdrops, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qdrops != 0)
+ return (EINVAL);
+ netisr_clearqdrops(&ip_direct_nh);
+ return (0);
+}
+
+SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
+ CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
+ "Number of packets dropped from the IP direct input queue");
+#endif /* RSS */
+
/*
* IP initialization: fill in IP protocol switch table.
* All protocols not implemented in kernel go to raw IP protocol handler.
@@ -283,19 +307,11 @@ ip_init(void)
struct protosw *pr;
int i;
- V_ip_id = time_second & 0xffff;
-
TAILQ_INIT(&V_in_ifaddrhead);
V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
/* Initialize IP reassembly queue. */
- for (i = 0; i < IPREASS_NHASH; i++)
- TAILQ_INIT(&V_ipq[i]);
- V_maxnipq = nmbclusters / 32;
- V_maxfragsperpacket = 16;
- V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
- NULL, UMA_ALIGN_PTR, 0);
- maxnipq_update();
+ ipreass_init();
/* Initialize packet filter hooks. */
V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
@@ -304,27 +320,27 @@ ip_init(void)
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
-#ifdef FLOWTABLE
- if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
- &V_ip_output_flowtable_size)) {
- if (V_ip_output_flowtable_size < 256)
- V_ip_output_flowtable_size = 256;
- if (!powerof2(V_ip_output_flowtable_size)) {
- printf("flowtable must be power of 2 size\n");
- V_ip_output_flowtable_size = 2048;
- }
- } else {
- /*
- * round up to the next power of 2
- */
- V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
- }
- V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
-#endif
+ if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
+ &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
+ HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
+ printf("%s: WARNING: unable to register input helper hook\n",
+ __func__);
+ if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
+ &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
+ HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
+ printf("%s: WARNING: unable to register output helper hook\n",
+ __func__);
/* Skip initialization of globals for non-default instances. */
- if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet)) {
+ netisr_register_vnet(&ip_nh);
+#ifdef RSS
+ netisr_register_vnet(&ip_direct_nh);
+#endif
return;
+ }
+#endif
pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
@@ -346,27 +362,79 @@ ip_init(void)
ip_protox[pr->pr_protocol] = pr - inetsw;
}
- EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
- NULL, EVENTHANDLER_PRI_ANY);
-
- /* Initialize various other remaining things. */
- IPQ_LOCK_INIT();
netisr_register(&ip_nh);
+#ifdef RSS
+ netisr_register(&ip_direct_nh);
+#endif
}
#ifdef VIMAGE
-void
-ip_destroy(void)
+static void
+ip_destroy(void *unused __unused)
{
+ struct ifnet *ifp;
+ int error;
+
+#ifdef RSS
+ netisr_unregister_vnet(&ip_direct_nh);
+#endif
+ netisr_unregister_vnet(&ip_nh);
+
+ if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil hook, "
+ "error %d\n", __func__, error);
+
+ error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister input helper hook "
+ "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
+ "error %d returned\n", __func__, error);
+ }
+ error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister output helper hook "
+ "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
+ "error %d returned\n", __func__, error);
+ }
+
+ /* Remove the IPv4 addresses from all interfaces. */
+ in_ifscrub_all();
+
+ /* Make sure the IPv4 routes are gone as well. */
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link)
+ rt_flushifroutes_af(ifp, AF_INET);
+ IFNET_RUNLOCK();
+
+ /* Destroy IP reassembly queue. */
+ ipreass_destroy();
/* Cleanup in_ifaddr hash table; should be empty. */
hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
+}
- IPQ_LOCK();
- ip_drain_locked();
- IPQ_UNLOCK();
+VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
+#endif
- uma_zdestroy(V_ipq_zone);
+#ifdef RSS
+/*
+ * IP direct input routine.
+ *
+ * This is called when reinjecting completed fragments where
+ * all of the previous checking and book-keeping has been done.
+ */
+void
+ip_direct_input(struct mbuf *m)
+{
+ struct ip *ip;
+ int hlen;
+
+ ip = mtod(m, struct ip *);
+ hlen = ip->ip_hl << 2;
+
+ IPSTAT_INC(ips_delivered);
+ (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
+ return;
}
#endif
@@ -382,21 +450,18 @@ ip_input(struct mbuf *m)
struct ifaddr *ifa;
struct ifnet *ifp;
int checkif, hlen = 0;
- u_short sum;
+ uint16_t sum, ip_len;
int dchg = 0; /* dest changed after fw */
struct in_addr odst; /* original dst address */
M_ASSERTPKTHDR(m);
if (m->m_flags & M_FASTFWD_OURS) {
- /*
- * Firewall or NAT changed destination to local.
- * We expect ip_len and ip_off to be in host byte order.
- */
m->m_flags &= ~M_FASTFWD_OURS;
/* Set up some basics that will be used later. */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
+ ip_len = ntohs(ip->ip_len);
goto ours;
}
@@ -430,6 +495,8 @@ ip_input(struct mbuf *m)
ip = mtod(m, struct ip *);
}
+ IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
+
/* 127/8 must not appear on wire - RFC1122 */
ifp = m->m_pkthdr.rcvif;
if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
@@ -460,15 +527,11 @@ ip_input(struct mbuf *m)
return;
#endif
- /*
- * Convert fields to host representation.
- */
- ip->ip_len = ntohs(ip->ip_len);
- if (ip->ip_len < hlen) {
+ ip_len = ntohs(ip->ip_len);
+ if (ip_len < hlen) {
IPSTAT_INC(ips_badlen);
goto bad;
}
- ip->ip_off = ntohs(ip->ip_off);
/*
* Check that the amount of data in the buffers
@@ -476,24 +539,35 @@ ip_input(struct mbuf *m)
* Trim mbufs if longer than we expect.
* Drop packet if shorter than we expect.
*/
- if (m->m_pkthdr.len < ip->ip_len) {
+ if (m->m_pkthdr.len < ip_len) {
tooshort:
IPSTAT_INC(ips_tooshort);
goto bad;
}
- if (m->m_pkthdr.len > ip->ip_len) {
+ if (m->m_pkthdr.len > ip_len) {
if (m->m_len == m->m_pkthdr.len) {
- m->m_len = ip->ip_len;
- m->m_pkthdr.len = ip->ip_len;
+ m->m_len = ip_len;
+ m->m_pkthdr.len = ip_len;
} else
- m_adj(m, ip->ip_len - m->m_pkthdr.len);
+ m_adj(m, ip_len - m->m_pkthdr.len);
}
+
+ /* Try to forward the packet, but if we fail continue */
#ifdef IPSEC
+ /* For now we do not handle IPSEC in tryforward. */
+ if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) &&
+ (V_ipforwarding == 1))
+ if (ip_tryforward(m) == NULL)
+ return;
/*
* Bypass packet filtering for packets previously handled by IPsec.
*/
if (ip_ipsec_filtertunnel(m))
goto passin;
+#else
+ if (V_ipforwarding == 1)
+ if (ip_tryforward(m) == NULL)
+ return;
#endif /* IPSEC */
/*
@@ -523,8 +597,7 @@ tooshort:
goto ours;
}
if (m->m_flags & M_IP_NEXTHOP) {
- dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
- if (dchg != 0) {
+ if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
/*
* Directly ship the packet on. This allows
* forwarding packets originally destined to us
@@ -535,6 +608,7 @@ tooshort:
}
}
passin:
+
/*
* Process options and, if not destined for us,
* ship it on. ip_dooptions returns 1 when an
@@ -597,7 +671,9 @@ passin:
*/
if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
(!checkif || ia->ia_ifp == ifp)) {
- ifa_ref(&ia->ia_ifa);
+ counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_ibytes,
+ m->m_pkthdr.len);
/* IN_IFADDR_RUNLOCK(); */
goto ours;
}
@@ -620,13 +696,17 @@ passin:
ia = ifatoia(ifa);
if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
ip->ip_dst.s_addr) {
- ifa_ref(ifa);
+ counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_ibytes,
+ m->m_pkthdr.len);
IF_ADDR_RUNLOCK(ifp);
goto ours;
}
#ifdef BOOTP_COMPAT
if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
- ifa_ref(ifa);
+ counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_ibytes,
+ m->m_pkthdr.len);
IF_ADDR_RUNLOCK(ifp);
goto ours;
}
@@ -679,28 +759,12 @@ passin:
goto ours;
/*
- * FAITH(Firewall Aided Internet Translator)
- */
- if (ifp && ifp->if_type == IFT_FAITH) {
- if (V_ip_keepfaith) {
- if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
- goto ours;
- }
- m_freem(m);
- return;
- }
-
- /*
* Not for us; forward if possible and desirable.
*/
if (V_ipforwarding == 0) {
IPSTAT_INC(ips_cantforward);
m_freem(m);
} else {
-#ifdef IPSEC
- if (ip_ipsec_fwd(m))
- goto bad;
-#endif /* IPSEC */
ip_forward(m, dchg);
}
return;
@@ -711,25 +775,16 @@ ours:
* IPSTEALTH: Process non-routing options only
* if the packet is destined for us.
*/
- if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) {
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
+ if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
return;
- }
#endif /* IPSTEALTH */
- /* Count the packet in the ip address stats */
- if (ia != NULL) {
- ia->ia_ifa.if_ipackets++;
- ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
- ifa_free(&ia->ia_ifa);
- }
-
/*
* Attempt reassembly; if it succeeds, proceed.
* ip_reass() will return a different mbuf.
*/
- if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+ /* XXXGL: shouldn't we save & set m_flags? */
m = ip_reass(m);
if (m == NULL)
return;
@@ -738,19 +793,13 @@ ours:
hlen = ip->ip_hl << 2;
}
- /*
- * Further protocols expect the packet length to be w/o the
- * IP header.
- */
- ip->ip_len -= hlen;
-
#ifdef IPSEC
/*
* enforce IPsec policy checking if we are seeing last header.
* note that we do not visit this with protocols with pcb layer
* code - like udp/tcp/raw ip.
*/
- if (ip_ipsec_input(m))
+ if (ip_ipsec_input(m, ip->ip_p) != 0)
goto bad;
#endif /* IPSEC */
@@ -759,419 +808,13 @@ ours:
*/
IPSTAT_INC(ips_delivered);
- (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
+ (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
return;
bad:
m_freem(m);
}
/*
- * After maxnipq has been updated, propagate the change to UMA. The UMA zone
- * max has slightly different semantics than the sysctl, for historical
- * reasons.
- */
-static void
-maxnipq_update(void)
-{
-
- /*
- * -1 for unlimited allocation.
- */
- if (V_maxnipq < 0)
- uma_zone_set_max(V_ipq_zone, 0);
- /*
- * Positive number for specific bound.
- */
- if (V_maxnipq > 0)
- uma_zone_set_max(V_ipq_zone, V_maxnipq);
- /*
- * Zero specifies no further fragment queue allocation -- set the
- * bound very low, but rely on implementation elsewhere to actually
- * prevent allocation and reclaim current queues.
- */
- if (V_maxnipq == 0)
- uma_zone_set_max(V_ipq_zone, 1);
-}
-
-static void
-ipq_zone_change(void *tag)
-{
-
- if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
- V_maxnipq = nmbclusters / 32;
- maxnipq_update();
- }
-}
-
-static int
-sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
-{
- int error, i;
-
- i = V_maxnipq;
- error = sysctl_handle_int(oidp, &i, 0, req);
- if (error || !req->newptr)
- return (error);
-
- /*
- * XXXRW: Might be a good idea to sanity check the argument and place
- * an extreme upper bound.
- */
- if (i < -1)
- return (EINVAL);
- V_maxnipq = i;
- maxnipq_update();
- return (0);
-}
-
-SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
- NULL, 0, sysctl_maxnipq, "I",
- "Maximum number of IPv4 fragment reassembly queue entries");
-
-/*
- * Take incoming datagram fragment and try to reassemble it into
- * whole datagram. If the argument is the first fragment or one
- * in between the function will return NULL and store the mbuf
- * in the fragment chain. If the argument is the last fragment
- * the packet will be reassembled and the pointer to the new
- * mbuf returned for further processing. Only m_tags attached
- * to the first packet/fragment are preserved.
- * The IP header is *NOT* adjusted out of iplen.
- */
-struct mbuf *
-ip_reass(struct mbuf *m)
-{
- struct ip *ip;
- struct mbuf *p, *q, *nq, *t;
- struct ipq *fp = NULL;
- struct ipqhead *head;
- int i, hlen, next;
- u_int8_t ecn, ecn0;
- u_short hash;
-
- /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
- if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
- IPSTAT_INC(ips_fragments);
- IPSTAT_INC(ips_fragdropped);
- m_freem(m);
- return (NULL);
- }
-
- ip = mtod(m, struct ip *);
- hlen = ip->ip_hl << 2;
-
- hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
- head = &V_ipq[hash];
- IPQ_LOCK();
-
- /*
- * Look for queue of fragments
- * of this datagram.
- */
- TAILQ_FOREACH(fp, head, ipq_list)
- if (ip->ip_id == fp->ipq_id &&
- ip->ip_src.s_addr == fp->ipq_src.s_addr &&
- ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
-#ifdef MAC
- mac_ipq_match(m, fp) &&
-#endif
- ip->ip_p == fp->ipq_p)
- goto found;
-
- fp = NULL;
-
- /*
- * Attempt to trim the number of allocated fragment queues if it
- * exceeds the administrative limit.
- */
- if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
- /*
- * drop something from the tail of the current queue
- * before proceeding further
- */
- struct ipq *q = TAILQ_LAST(head, ipqhead);
- if (q == NULL) { /* gak */
- for (i = 0; i < IPREASS_NHASH; i++) {
- struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
- if (r) {
- IPSTAT_ADD(ips_fragtimeout,
- r->ipq_nfrags);
- ip_freef(&V_ipq[i], r);
- break;
- }
- }
- } else {
- IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags);
- ip_freef(head, q);
- }
- }
-
-found:
- /*
- * Adjust ip_len to not reflect header,
- * convert offset of this to bytes.
- */
- ip->ip_len -= hlen;
- if (ip->ip_off & IP_MF) {
- /*
- * Make sure that fragments have a data length
- * that's a non-zero multiple of 8 bytes.
- */
- if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
- IPSTAT_INC(ips_toosmall); /* XXX */
- goto dropfrag;
- }
- m->m_flags |= M_FRAG;
- } else
- m->m_flags &= ~M_FRAG;
- ip->ip_off <<= 3;
-
-
- /*
- * Attempt reassembly; if it succeeds, proceed.
- * ip_reass() will return a different mbuf.
- */
- IPSTAT_INC(ips_fragments);
- m->m_pkthdr.header = ip;
-
- /* Previous ip_reass() started here. */
- /*
- * Presence of header sizes in mbufs
- * would confuse code below.
- */
- m->m_data += hlen;
- m->m_len -= hlen;
-
- /*
- * If first fragment to arrive, create a reassembly queue.
- */
- if (fp == NULL) {
- fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
- if (fp == NULL)
- goto dropfrag;
-#ifdef MAC
- if (mac_ipq_init(fp, M_NOWAIT) != 0) {
- uma_zfree(V_ipq_zone, fp);
- fp = NULL;
- goto dropfrag;
- }
- mac_ipq_create(m, fp);
-#endif
- TAILQ_INSERT_HEAD(head, fp, ipq_list);
- V_nipq++;
- fp->ipq_nfrags = 1;
- fp->ipq_ttl = IPFRAGTTL;
- fp->ipq_p = ip->ip_p;
- fp->ipq_id = ip->ip_id;
- fp->ipq_src = ip->ip_src;
- fp->ipq_dst = ip->ip_dst;
- fp->ipq_frags = m;
- m->m_nextpkt = NULL;
- goto done;
- } else {
- fp->ipq_nfrags++;
-#ifdef MAC
- mac_ipq_update(m, fp);
-#endif
- }
-
-#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
-
- /*
- * Handle ECN by comparing this segment with the first one;
- * if CE is set, do not lose CE.
- * drop if CE and not-ECT are mixed for the same packet.
- */
- ecn = ip->ip_tos & IPTOS_ECN_MASK;
- ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
- if (ecn == IPTOS_ECN_CE) {
- if (ecn0 == IPTOS_ECN_NOTECT)
- goto dropfrag;
- if (ecn0 != IPTOS_ECN_CE)
- GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
- }
- if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
- goto dropfrag;
-
- /*
- * Find a segment which begins after this one does.
- */
- for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
- if (GETIP(q)->ip_off > ip->ip_off)
- break;
-
- /*
- * If there is a preceding segment, it may provide some of
- * our data already. If so, drop the data from the incoming
- * segment. If it provides all of our data, drop us, otherwise
- * stick new segment in the proper place.
- *
- * If some of the data is dropped from the preceding
- * segment, then it's checksum is invalidated.
- */
- if (p) {
- i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
- if (i > 0) {
- if (i >= ip->ip_len)
- goto dropfrag;
- m_adj(m, i);
- m->m_pkthdr.csum_flags = 0;
- ip->ip_off += i;
- ip->ip_len -= i;
- }
- m->m_nextpkt = p->m_nextpkt;
- p->m_nextpkt = m;
- } else {
- m->m_nextpkt = fp->ipq_frags;
- fp->ipq_frags = m;
- }
-
- /*
- * While we overlap succeeding segments trim them or,
- * if they are completely covered, dequeue them.
- */
- for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
- q = nq) {
- i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
- if (i < GETIP(q)->ip_len) {
- GETIP(q)->ip_len -= i;
- GETIP(q)->ip_off += i;
- m_adj(q, i);
- q->m_pkthdr.csum_flags = 0;
- break;
- }
- nq = q->m_nextpkt;
- m->m_nextpkt = nq;
- IPSTAT_INC(ips_fragdropped);
- fp->ipq_nfrags--;
- m_freem(q);
- }
-
- /*
- * Check for complete reassembly and perform frag per packet
- * limiting.
- *
- * Frag limiting is performed here so that the nth frag has
- * a chance to complete the packet before we drop the packet.
- * As a result, n+1 frags are actually allowed per packet, but
- * only n will ever be stored. (n = maxfragsperpacket.)
- *
- */
- next = 0;
- for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
- if (GETIP(q)->ip_off != next) {
- if (fp->ipq_nfrags > V_maxfragsperpacket) {
- IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
- ip_freef(head, fp);
- }
- goto done;
- }
- next += GETIP(q)->ip_len;
- }
- /* Make sure the last packet didn't have the IP_MF flag */
- if (p->m_flags & M_FRAG) {
- if (fp->ipq_nfrags > V_maxfragsperpacket) {
- IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
- ip_freef(head, fp);
- }
- goto done;
- }
-
- /*
- * Reassembly is complete. Make sure the packet is a sane size.
- */
- q = fp->ipq_frags;
- ip = GETIP(q);
- if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
- IPSTAT_INC(ips_toolong);
- IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
- ip_freef(head, fp);
- goto done;
- }
-
- /*
- * Concatenate fragments.
- */
- m = q;
- t = m->m_next;
- m->m_next = NULL;
- m_cat(m, t);
- nq = q->m_nextpkt;
- q->m_nextpkt = NULL;
- for (q = nq; q != NULL; q = nq) {
- nq = q->m_nextpkt;
- q->m_nextpkt = NULL;
- m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
- m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
- m_cat(m, q);
- }
- /*
- * In order to do checksumming faster we do 'end-around carry' here
- * (and not in for{} loop), though it implies we are not going to
- * reassemble more than 64k fragments.
- */
- while (m->m_pkthdr.csum_data & 0xffff0000)
- m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
- (m->m_pkthdr.csum_data >> 16);
-#ifdef MAC
- mac_ipq_reassemble(fp, m);
- mac_ipq_destroy(fp);
-#endif
-
- /*
- * Create header for new ip packet by modifying header of first
- * packet; dequeue and discard fragment reassembly header.
- * Make header visible.
- */
- ip->ip_len = (ip->ip_hl << 2) + next;
- ip->ip_src = fp->ipq_src;
- ip->ip_dst = fp->ipq_dst;
- TAILQ_REMOVE(head, fp, ipq_list);
- V_nipq--;
- uma_zfree(V_ipq_zone, fp);
- m->m_len += (ip->ip_hl << 2);
- m->m_data -= (ip->ip_hl << 2);
- /* some debugging cruft by sklower, below, will go away soon */
- if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */
- m_fixhdr(m);
- IPSTAT_INC(ips_reassembled);
- IPQ_UNLOCK();
- return (m);
-
-dropfrag:
- IPSTAT_INC(ips_fragdropped);
- if (fp != NULL)
- fp->ipq_nfrags--;
- m_freem(m);
-done:
- IPQ_UNLOCK();
- return (NULL);
-
-#undef GETIP
-}
-
-/*
- * Free a fragment reassembly header and all
- * associated datagrams.
- */
-static void
-ip_freef(struct ipqhead *fhp, struct ipq *fp)
-{
- struct mbuf *q;
-
- IPQ_LOCK_ASSERT();
-
- while (fp->ipq_frags) {
- q = fp->ipq_frags;
- fp->ipq_frags = q->m_nextpkt;
- m_freem(q);
- }
- TAILQ_REMOVE(fhp, fp, ipq_list);
- uma_zfree(V_ipq_zone, fp);
- V_nipq--;
-}
-
-/*
* IP timer processing;
* if a timer expires on a reassembly
* queue, discard it.
@@ -1180,82 +823,28 @@ void
ip_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- struct ipq *fp;
- int i;
VNET_LIST_RLOCK_NOSLEEP();
- IPQ_LOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- for (i = 0; i < IPREASS_NHASH; i++) {
- for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
- struct ipq *fpp;
-
- fpp = fp;
- fp = TAILQ_NEXT(fp, ipq_list);
- if(--fpp->ipq_ttl == 0) {
- IPSTAT_ADD(ips_fragtimeout,
- fpp->ipq_nfrags);
- ip_freef(&V_ipq[i], fpp);
- }
- }
- }
- /*
- * If we are over the maximum number of fragments
- * (due to the limit being lowered), drain off
- * enough to get down to the new limit.
- */
- if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
- for (i = 0; i < IPREASS_NHASH; i++) {
- while (V_nipq > V_maxnipq &&
- !TAILQ_EMPTY(&V_ipq[i])) {
- IPSTAT_ADD(ips_fragdropped,
- TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
- ip_freef(&V_ipq[i],
- TAILQ_FIRST(&V_ipq[i]));
- }
- }
- }
+ ipreass_slowtimo();
CURVNET_RESTORE();
}
- IPQ_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
-/*
- * Drain off all datagram fragments.
- */
-static void
-ip_drain_locked(void)
-{
- int i;
-
- IPQ_LOCK_ASSERT();
-
- for (i = 0; i < IPREASS_NHASH; i++) {
- while(!TAILQ_EMPTY(&V_ipq[i])) {
- IPSTAT_ADD(ips_fragdropped,
- TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
- ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
- }
- }
-}
-
void
ip_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK_NOSLEEP();
- IPQ_LOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- ip_drain_locked();
+ ipreass_drain();
CURVNET_RESTORE();
}
- IPQ_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
- in_rtqdrain();
}
/*
@@ -1314,33 +903,6 @@ ipproto_unregister(short ipproto)
return (0);
}
-/*
- * Given address of next destination (final or next hop), return (referenced)
- * internet address info of interface to be used to get there.
- */
-struct in_ifaddr *
-ip_rtaddr(struct in_addr dst, u_int fibnum)
-{
- struct route sro;
- struct sockaddr_in *sin;
- struct in_ifaddr *ia;
-
- bzero(&sro, sizeof(sro));
- sin = (struct sockaddr_in *)&sro.ro_dst;
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- sin->sin_addr = dst;
- in_rtalloc_ign(&sro, 0, fibnum);
-
- if (sro.ro_rt == NULL)
- return (NULL);
-
- ia = ifatoia(sro.ro_rt->rt_ifa);
- ifa_ref(&ia->ia_ifa);
- RTFREE(sro.ro_rt);
- return (ia);
-}
-
u_char inetctlerrmap[PRC_NCMDS] = {
0, 0, 0, 0,
0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
@@ -1370,6 +932,7 @@ ip_forward(struct mbuf *m, int srcrt)
struct ip *ip = mtod(m, struct ip *);
struct in_ifaddr *ia;
struct mbuf *mcopy;
+ struct sockaddr_in *sin;
struct in_addr dest;
struct route ro;
int error, type = 0, code = 0, mtu = 0;
@@ -1379,6 +942,13 @@ ip_forward(struct mbuf *m, int srcrt)
m_freem(m);
return;
}
+#ifdef IPSEC
+ if (ip_ipsec_fwd(m) != 0) {
+ IPSTAT_INC(ips_cantforward);
+ m_freem(m);
+ return;
+ }
+#endif /* IPSEC */
#ifdef IPSTEALTH
if (!V_ipstealth) {
#endif
@@ -1391,7 +961,23 @@ ip_forward(struct mbuf *m, int srcrt)
}
#endif
- ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
+ bzero(&ro, sizeof(ro));
+ sin = (struct sockaddr_in *)&ro.ro_dst;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ip->ip_dst;
+#ifdef RADIX_MPATH
+ rtalloc_mpath_fib(&ro,
+ ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+ M_GETFIB(m));
+#else
+ in_rtalloc_ign(&ro, 0, M_GETFIB(m));
+#endif
+ if (ro.ro_rt != NULL) {
+ ia = ifatoia(ro.ro_rt->rt_ifa);
+ ifa_ref(&ia->ia_ifa);
+ } else
+ ia = NULL;
#ifndef IPSEC
/*
* 'ia' may be NULL if there is no route for this destination.
@@ -1400,6 +986,7 @@ ip_forward(struct mbuf *m, int srcrt)
*/
if (!srcrt && ia == NULL) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+ RO_RTFREE(&ro);
return;
}
#endif
@@ -1420,8 +1007,8 @@ ip_forward(struct mbuf *m, int srcrt)
* assume exclusive access to the IP header in `m', so any
* data in a cluster may change before we reach icmp_error().
*/
- MGETHDR(mcopy, M_DONTWAIT, m->m_type);
- if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
+ mcopy = m_gethdr(M_NOWAIT, m->m_type);
+ if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
/*
* It's probably ok if the pkthdr dup fails (because
* the deep copy of the tag chain failed), but for now
@@ -1432,7 +1019,7 @@ ip_forward(struct mbuf *m, int srcrt)
mcopy = NULL;
}
if (mcopy != NULL) {
- mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
+ mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
mcopy->m_pkthdr.len = mcopy->m_len;
m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
}
@@ -1456,16 +1043,8 @@ ip_forward(struct mbuf *m, int srcrt)
dest.s_addr = 0;
if (!srcrt && V_ipsendredirects &&
ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
- struct sockaddr_in *sin;
struct rtentry *rt;
- bzero(&ro, sizeof(ro));
- sin = (struct sockaddr_in *)&ro.ro_dst;
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- sin->sin_addr = ip->ip_dst;
- in_rtalloc_ign(&ro, 0, M_GETFIB(m));
-
rt = ro.ro_rt;
if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
@@ -1484,20 +1063,12 @@ ip_forward(struct mbuf *m, int srcrt)
code = ICMP_REDIRECT_HOST;
}
}
- if (rt)
- RTFREE(rt);
}
- /*
- * Try to cache the route MTU from ip_output so we can consider it for
- * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
- */
- bzero(&ro, sizeof(ro));
-
error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
if (error == EMSGSIZE && ro.ro_rt)
- mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ mtu = ro.ro_rt->rt_mtu;
RO_RTFREE(&ro);
if (error)
@@ -1560,31 +1131,12 @@ ip_forward(struct mbuf *m, int srcrt)
if (ia != NULL)
mtu = ia->ia_ifp->if_mtu;
else
- mtu = ip_next_mtu(ip->ip_len, 0);
+ mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
}
IPSTAT_INC(ips_cantfrag);
break;
case ENOBUFS:
- /*
- * A router should not generate ICMP_SOURCEQUENCH as
- * required in RFC1812 Requirements for IP Version 4 Routers.
- * Source quench could be a big problem under DoS attacks,
- * or if the underlying interface is rate-limited.
- * Those who need source quench packets may re-enable them
- * via the net.inet.ip.sendsourcequench sysctl.
- */
- if (V_ip_sendsourcequench == 0) {
- m_freem(mcopy);
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
- return;
- } else {
- type = ICMP_SOURCEQUENCH;
- code = 0;
- }
- break;
-
case EACCES: /* ipfw denied packet */
m_freem(mcopy);
if (ia != NULL)
@@ -1606,8 +1158,8 @@ ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
bintime(&bt);
if (inp->inp_socket->so_options & SO_BINTIME) {
- *mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
- SCM_BINTIME, SOL_SOCKET);
+ *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
+ SCM_BINTIME, SOL_SOCKET);
if (*mp)
mp = &(*mp)->m_next;
}
@@ -1615,20 +1167,20 @@ ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
struct timeval tv;
bintime2timeval(&bt, &tv);
- *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
- SCM_TIMESTAMP, SOL_SOCKET);
+ *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
+ SCM_TIMESTAMP, SOL_SOCKET);
if (*mp)
mp = &(*mp)->m_next;
}
}
if (inp->inp_flags & INP_RECVDSTADDR) {
- *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
+ *mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
if (inp->inp_flags & INP_RECVTTL) {
- *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
+ *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
@@ -1640,14 +1192,14 @@ ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
*/
/* options were tossed already */
if (inp->inp_flags & INP_RECVOPTS) {
- *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
+ *mp = sbcreatecontrol((caddr_t)opts_deleted_above,
sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
/* ip_srcroute doesn't do what we want here, need to fix */
if (inp->inp_flags & INP_RECVRETOPTS) {
- *mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
+ *mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
@@ -1662,36 +1214,73 @@ ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
struct sockaddr_dl *sdp;
struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
- if (((ifp = m->m_pkthdr.rcvif))
- && ( ifp->if_index && (ifp->if_index <= V_if_index))) {
+ if ((ifp = m->m_pkthdr.rcvif) &&
+ ifp->if_index && ifp->if_index <= V_if_index) {
sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
/*
* Change our mind and don't try copy.
*/
- if ((sdp->sdl_family != AF_LINK)
- || (sdp->sdl_len > sizeof(sdlbuf))) {
+ if (sdp->sdl_family != AF_LINK ||
+ sdp->sdl_len > sizeof(sdlbuf)) {
goto makedummy;
}
bcopy(sdp, sdl2, sdp->sdl_len);
} else {
makedummy:
- sdl2->sdl_len
- = offsetof(struct sockaddr_dl, sdl_data[0]);
+ sdl2->sdl_len =
+ offsetof(struct sockaddr_dl, sdl_data[0]);
sdl2->sdl_family = AF_LINK;
sdl2->sdl_index = 0;
sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
}
- *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
- IP_RECVIF, IPPROTO_IP);
+ *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
+ IP_RECVIF, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
if (inp->inp_flags & INP_RECVTOS) {
- *mp = sbcreatecontrol((caddr_t) &ip->ip_tos,
+ *mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
+
+ if (inp->inp_flags2 & INP_RECVFLOWID) {
+ uint32_t flowid, flow_type;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ /*
+ * XXX should handle the failure of one or the
+ * other - don't populate both?
+ */
+ *mp = sbcreatecontrol((caddr_t) &flowid,
+ sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ *mp = sbcreatecontrol((caddr_t) &flow_type,
+ sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+
+#ifdef RSS
+ if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
+ uint32_t flowid, flow_type;
+ uint32_t rss_bucketid;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
+ *mp = sbcreatecontrol((caddr_t) &rss_bucketid,
+ sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+ }
+#endif
}
/*
@@ -1745,13 +1334,18 @@ ip_rsvp_done(void)
return 0;
}
-void
-rsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */
+int
+rsvp_input(struct mbuf **mp, int *offp, int proto)
{
+ struct mbuf *m;
+
+ m = *mp;
+ *mp = NULL;
if (rsvp_input_p) { /* call the real one if loaded */
- rsvp_input_p(m, off);
- return;
+ *mp = m;
+ rsvp_input_p(mp, offp, proto);
+ return (IPPROTO_DONE);
}
/* Can still get packets with rsvp_on = 0 if there is a local member
@@ -1761,13 +1355,15 @@ rsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */
if (!V_rsvp_on) {
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
if (V_ip_rsvpd != NULL) {
- rip_input(m, off);
- return;
+ *mp = m;
+ rip_input(mp, offp, proto);
+ return (IPPROTO_DONE);
}
/* Drop the packet */
m_freem(m);
+ return (IPPROTO_DONE);
}
diff --git a/freebsd/sys/netinet/ip_ipsec.h b/freebsd/sys/netinet/ip_ipsec.h
index 2870c114..f499b740 100644
--- a/freebsd/sys/netinet/ip_ipsec.h
+++ b/freebsd/sys/netinet/ip_ipsec.h
@@ -34,7 +34,7 @@
int ip_ipsec_filtertunnel(struct mbuf *);
int ip_ipsec_fwd(struct mbuf *);
-int ip_ipsec_input(struct mbuf *);
+int ip_ipsec_input(struct mbuf *, int);
int ip_ipsec_mtu(struct mbuf *, int);
-int ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *);
+int ip_ipsec_output(struct mbuf **, struct inpcb *, int *);
#endif
diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c
index f4aeed24..f8b14735 100644
--- a/freebsd/sys/netinet/ip_mroute.c
+++ b/freebsd/sys/netinet/ip_mroute.c
@@ -79,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
#include <sys/stddef.h>
+#include <sys/eventhandler.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/ktr.h>
#include <sys/malloc.h>
@@ -95,8 +96,10 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
+#include <sys/counter.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -121,7 +124,6 @@ __FBSDID("$FreeBSD$");
#endif
#define VIFI_INVALID ((vifi_t) -1)
-#define M_HASCL(m) ((m)->m_flags & M_EXT)
static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
#define V_last_tv_sec VNET(last_tv_sec)
@@ -147,11 +149,11 @@ static struct mtx mrouter_mtx;
static int ip_mrouter_cnt; /* # of vnets with active mrouters */
static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
-static VNET_DEFINE(struct mrtstat, mrtstat);
-#define V_mrtstat VNET(mrtstat)
-SYSCTL_VNET_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
- &VNET_NAME(mrtstat), mrtstat,
- "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
+static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
+VNET_PCPUSTAT_SYSINIT(mrtstat);
+VNET_PCPUSTAT_SYSUNINIT(mrtstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
+ mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
"netinet/ip_mroute.h)");
static VNET_DEFINE(u_long, mfchash);
@@ -179,7 +181,7 @@ static VNET_DEFINE(vifi_t, numvifs);
#define V_numvifs VNET(numvifs)
static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
#define V_viftable VNET(viftable)
-SYSCTL_VNET_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD,
+SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
"IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
@@ -227,13 +229,13 @@ static VNET_DEFINE(struct callout, bw_upcalls_ch);
#define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */
-static VNET_DEFINE(struct pimstat, pimstat);
-#define V_pimstat VNET(pimstat)
+static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
+VNET_PCPUSTAT_SYSINIT(pimstat);
+VNET_PCPUSTAT_SYSUNINIT(pimstat);
SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
-SYSCTL_VNET_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD,
- &VNET_NAME(pimstat), pimstat,
- "PIM Statistics (struct pimstat, netinet/pim_var.h)");
+SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
+ pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
static u_long pim_squelch_wholepkt = 0;
SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
@@ -247,7 +249,7 @@ static const struct protosw in_pim_protosw = {
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = pim_input,
- .pr_output = (pr_output_t*)rip_output,
+ .pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
@@ -538,7 +540,7 @@ X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
int error = 0;
/*
- * Currently the only function calling this ioctl routine is rtioctl().
+ * Currently the only function calling this ioctl routine is rtioctl_fib().
* Typically, only root can create the raw socket in order to execute
* this ioctl method, however the request might be coming from a prison
*/
@@ -635,8 +637,8 @@ if_detached_event(void *arg __unused, struct ifnet *ifp)
continue;
for (i = 0; i < mfchashsize; i++) {
struct mfc *rt, *nrt;
- for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
- nrt = LIST_NEXT(rt, mfc_hash);
+
+ LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
if (rt->mfc_parent == vifi) {
expire_mfc(rt);
}
@@ -754,8 +756,8 @@ X_ip_mrouter_done(void)
*/
for (i = 0; i < mfchashsize; i++) {
struct mfc *rt, *nrt;
- for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
- nrt = LIST_NEXT(rt, mfc_hash);
+
+ LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
expire_mfc(rt);
}
}
@@ -1303,8 +1305,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
return ENOBUFS;
}
- mb0 = m_copypacket(m, M_DONTWAIT);
- if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
+ mb0 = m_copypacket(m, M_NOWAIT);
+ if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < hlen))
mb0 = m_pullup(mb0, hlen);
if (mb0 == NULL) {
free(rte, M_MRTABLE);
@@ -1446,9 +1448,7 @@ expire_upcalls(void *arg)
if (V_nexpire[i] == 0)
continue;
- for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
- nrt = LIST_NEXT(rt, mfc_hash);
-
+ LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
if (TAILQ_EMPTY(&rt->mfc_stall))
continue;
@@ -1490,7 +1490,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
{
struct ip *ip = mtod(m, struct ip *);
vifi_t vifi;
- int plen = ip->ip_len;
+ int plen = ntohs(ip->ip_len);
VIF_LOCK_ASSERT();
@@ -1546,7 +1546,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
int hlen = ip->ip_hl << 2;
struct mbuf *mm = m_copy(m, 0, hlen);
- if (mm && (M_HASCL(mm) || mm->m_len < hlen))
+ if (mm && (!M_WRITABLE(mm) || mm->m_len < hlen))
mm = m_pullup(mm, hlen);
if (mm == NULL)
return ENOBUFS;
@@ -1666,8 +1666,8 @@ phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
* the IP header is actually copied, not just referenced,
* so that ip_output() only scribbles on the copy.
*/
- mb_copy = m_copypacket(m, M_DONTWAIT);
- if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
+ mb_copy = m_copypacket(m, M_NOWAIT);
+ if (mb_copy && (!M_WRITABLE(mb_copy) || mb_copy->m_len < hlen))
mb_copy = m_pullup(mb_copy, hlen);
if (mb_copy == NULL)
return;
@@ -1720,12 +1720,16 @@ X_ip_rsvp_force_done(struct socket *so __unused)
}
-static void
-X_rsvp_input(struct mbuf *m, int off __unused)
+static int
+X_rsvp_input(struct mbuf **mp, int *offp, int proto)
{
+ struct mbuf *m;
+ m = *mp;
+ *mp = NULL;
if (!V_rsvp_on)
m_freem(m);
+ return (IPPROTO_DONE);
}
/*
@@ -2080,13 +2084,12 @@ bw_upcalls_send(void)
* Allocate a new mbuf, initialize it with the header and
* the payload for the pending calls.
*/
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
return;
}
- m->m_len = m->m_pkthdr.len = 0;
m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
@@ -2381,7 +2384,7 @@ pim_register_prepare(struct ip *ip, struct mbuf *m)
* Copy the old packet & pullup its IP header into the
* new mbuf so we can modify it.
*/
- mb_copy = m_copypacket(m, M_DONTWAIT);
+ mb_copy = m_copypacket(m, M_NOWAIT);
if (mb_copy == NULL)
return NULL;
mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
@@ -2395,15 +2398,14 @@ pim_register_prepare(struct ip *ip, struct mbuf *m)
/* Compute the MTU after the PIM Register encapsulation */
mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
- if (ip->ip_len <= mtu) {
+ if (ntohs(ip->ip_len) <= mtu) {
/* Turn the IP header into a valid one */
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
} else {
/* Fragment the packet */
- if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) {
+ mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
+ if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
m_freem(mb_copy);
return NULL;
}
@@ -2428,7 +2430,7 @@ pim_register_send_upcall(struct ip *ip, struct vif *vifp,
/*
* Add a new mbuf with an upcall header
*/
- MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+ mb_first = m_gethdr(M_NOWAIT, MT_DATA);
if (mb_first == NULL) {
m_freem(mb_copy);
return ENOBUFS;
@@ -2486,7 +2488,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
/*
* Add a new mbuf with the encapsulating header
*/
- MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+ mb_first = m_gethdr(M_NOWAIT, MT_DATA);
if (mb_first == NULL) {
m_freem(mb_copy);
return ENOBUFS;
@@ -2502,8 +2504,8 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
*/
ip_outer = mtod(mb_first, struct ip *);
*ip_outer = pim_encap_iphdr;
- ip_outer->ip_id = ip_newid();
- ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
+ ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
+ sizeof(pim_encap_pimhdr));
ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
ip_outer->ip_dst = rt->mfc_rp;
/*
@@ -2511,8 +2513,9 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
* IP_DF bit.
*/
ip_outer->ip_tos = ip->ip_tos;
- if (ntohs(ip->ip_off) & IP_DF)
- ip_outer->ip_off |= IP_DF;
+ if (ip->ip_off & htons(IP_DF))
+ ip_outer->ip_off |= htons(IP_DF);
+ ip_fillid(ip_outer);
pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
+ sizeof(pim_encap_iphdr));
*pimhdr = pim_encap_pimhdr;
@@ -2559,15 +2562,18 @@ pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
* (used by PIM-SM): the PIM header is stripped off, and the inner packet
* is passed to if_simloop().
*/
-void
-pim_input(struct mbuf *m, int off)
+int
+pim_input(struct mbuf **mp, int *offp, int proto)
{
+ struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
struct pim *pim;
+ int iphlen = *offp;
int minlen;
- int datalen = ip->ip_len;
+ int datalen = ntohs(ip->ip_len) - iphlen;
int ip_tos;
- int iphlen = off;
+
+ *mp = NULL;
/* Keep statistics */
PIMSTAT_INC(pims_rcv_total_msgs);
@@ -2581,7 +2587,7 @@ pim_input(struct mbuf *m, int off)
CTR3(KTR_IPMF, "%s: short packet (%d) from %s",
__func__, datalen, inet_ntoa(ip->ip_src));
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/*
@@ -2597,10 +2603,9 @@ pim_input(struct mbuf *m, int off)
* Get the IP and PIM headers in contiguous memory, and
* possibly the PIM REGISTER header.
*/
- if ((m->m_flags & M_EXT || m->m_len < minlen) &&
- (m = m_pullup(m, minlen)) == 0) {
+ if (m->m_len < minlen && (m = m_pullup(m, minlen)) == NULL) {
CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
- return;
+ return (IPPROTO_DONE);
}
/* m_pullup() may have given us a new mbuf so reset ip. */
@@ -2625,7 +2630,7 @@ pim_input(struct mbuf *m, int off)
PIMSTAT_INC(pims_rcv_badsum);
CTR1(KTR_IPMF, "%s: invalid checksum", __func__);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* PIM version check */
@@ -2634,7 +2639,7 @@ pim_input(struct mbuf *m, int off)
CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__,
(int)PIM_VT_V(pim->pim_vt), PIM_VERSION);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* restore mbuf back to the outer IP */
@@ -2659,7 +2664,7 @@ pim_input(struct mbuf *m, int off)
CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__,
(int)V_reg_vif_num);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* XXX need refcnt? */
vifp = V_viftable[V_reg_vif_num].v_ifp;
@@ -2673,7 +2678,7 @@ pim_input(struct mbuf *m, int off)
PIMSTAT_INC(pims_rcv_badregisters);
CTR1(KTR_IPMF, "%s: register packet size too small", __func__);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
reghdr = (u_int32_t *)(pim + 1);
@@ -2687,7 +2692,7 @@ pim_input(struct mbuf *m, int off)
PIMSTAT_INC(pims_rcv_badregisters);
CTR1(KTR_IPMF, "%s: bad encap ip version", __func__);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* verify the inner packet is destined to a mcast group */
@@ -2696,7 +2701,7 @@ pim_input(struct mbuf *m, int off)
CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__,
inet_ntoa(encap_ip->ip_dst));
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* If a NULL_REGISTER, pass it to the daemon */
@@ -2735,7 +2740,7 @@ pim_input(struct mbuf *m, int off)
if (mcp == NULL) {
CTR1(KTR_IPMF, "%s: m_copy() failed", __func__);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
/* Keep statistics */
@@ -2771,9 +2776,10 @@ pim_input_to_daemon:
* XXX: the outer IP header pkt size of a Register is not adjust to
* reflect the fact that the inner multicast data is truncated.
*/
- rip_input(m, iphlen);
+ *mp = m;
+ rip_input(mp, offp, proto);
- return;
+ return (IPPROTO_DONE);
}
static int
@@ -2813,12 +2819,12 @@ vnet_mroute_init(const void *unused __unused)
MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
- callout_init(&V_expire_upcalls_ch, CALLOUT_MPSAFE);
- callout_init(&V_bw_upcalls_ch, CALLOUT_MPSAFE);
- callout_init(&V_bw_meter_ch, CALLOUT_MPSAFE);
+ callout_init(&V_expire_upcalls_ch, 1);
+ callout_init(&V_bw_upcalls_ch, 1);
+ callout_init(&V_bw_meter_ch, 1);
}
-VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init,
+VNET_SYSINIT(vnet_mroute_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init,
NULL);
static void
@@ -2829,7 +2835,7 @@ vnet_mroute_uninit(const void *unused __unused)
V_nexpire = NULL;
}
-VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE,
+VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE,
vnet_mroute_uninit, NULL);
static int
@@ -2944,4 +2950,4 @@ static moduledata_t ip_mroutemod = {
0
};
-DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
+DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE);
diff --git a/freebsd/sys/netinet/ip_mroute.h b/freebsd/sys/netinet/ip_mroute.h
index e945b92c..65f7d83c 100644
--- a/freebsd/sys/netinet/ip_mroute.h
+++ b/freebsd/sys/netinet/ip_mroute.h
@@ -206,23 +206,24 @@ struct bw_upcall {
* The kernel's multicast routing statistics.
*/
struct mrtstat {
- u_long mrts_mfc_lookups; /* # forw. cache hash table hits */
- u_long mrts_mfc_misses; /* # forw. cache hash table misses */
- u_long mrts_upcalls; /* # calls to multicast routing daemon */
- u_long mrts_no_route; /* no route for packet's origin */
- u_long mrts_bad_tunnel; /* malformed tunnel options */
- u_long mrts_cant_tunnel; /* no room for tunnel options */
- u_long mrts_wrong_if; /* arrived on wrong interface */
- u_long mrts_upq_ovflw; /* upcall Q overflow */
- u_long mrts_cache_cleanups; /* # entries with no upcalls */
- u_long mrts_drop_sel; /* pkts dropped selectively */
- u_long mrts_q_overflow; /* pkts dropped - Q overflow */
- u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */
- u_long mrts_upq_sockfull; /* upcalls dropped - socket full */
+ uint64_t mrts_mfc_lookups; /* # forw. cache hash table hits */
+ uint64_t mrts_mfc_misses; /* # forw. cache hash table misses */
+ uint64_t mrts_upcalls; /* # calls to multicast routing daemon */
+ uint64_t mrts_no_route; /* no route for packet's origin */
+ uint64_t mrts_bad_tunnel; /* malformed tunnel options */
+ uint64_t mrts_cant_tunnel; /* no room for tunnel options */
+ uint64_t mrts_wrong_if; /* arrived on wrong interface */
+ uint64_t mrts_upq_ovflw; /* upcall Q overflow */
+ uint64_t mrts_cache_cleanups; /* # entries with no upcalls */
+ uint64_t mrts_drop_sel; /* pkts dropped selectively */
+ uint64_t mrts_q_overflow; /* pkts dropped - Q overflow */
+ uint64_t mrts_pkt2large; /* pkts dropped - size > BKT SIZE */
+ uint64_t mrts_upq_sockfull; /* upcalls dropped - socket full */
};
#ifdef _KERNEL
-#define MRTSTAT_ADD(name, val) V_mrtstat.name += (val)
+#define MRTSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct mrtstat, mrtstat, name, (val))
#define MRTSTAT_INC(name) MRTSTAT_ADD(name, 1)
#endif
diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c
index 6431aaa1..134479c9 100644
--- a/freebsd/sys/netinet/ip_options.c
+++ b/freebsd/sys/netinet/ip_options.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -67,18 +68,21 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
-static int ip_dosourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
- &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
+static VNET_DEFINE(int, ip_dosourceroute);
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_dosourceroute), 0,
+ "Enable forwarding source routed IP packets");
+#define V_ip_dosourceroute VNET(ip_dosourceroute)
-static int ip_acceptsourceroute = 0;
+static VNET_DEFINE(int, ip_acceptsourceroute);
SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
- CTLFLAG_RW, &ip_acceptsourceroute, 0,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_acceptsourceroute), 0,
"Enable accepting source routed IP packets");
+#define V_ip_acceptsourceroute VNET(ip_acceptsourceroute)
-int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
- &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
+VNET_DEFINE(int, ip_doopts) = 1; /* 0 = ignore, 1 = process, 2 = reject */
+SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip_doopts), 0, "Enable IP options processing ([LS]SRR, RR, TS)");
static void save_rte(struct mbuf *m, u_char *, struct in_addr);
@@ -103,12 +107,13 @@ ip_dooptions(struct mbuf *m, int pass)
int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
struct in_addr *sin, dst;
uint32_t ntime;
+ struct nhop4_extended nh_ext;
struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
/* Ignore or reject packets with IP options. */
- if (ip_doopts == 0)
+ if (V_ip_doopts == 0)
return 0;
- else if (ip_doopts == 2) {
+ else if (V_ip_doopts == 2) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_FILTER_PROHIB;
goto bad;
@@ -169,7 +174,7 @@ ip_dooptions(struct mbuf *m, int pass)
code = ICMP_UNREACH_SRCFAIL;
goto bad;
}
- if (!ip_dosourceroute)
+ if (!V_ip_dosourceroute)
goto nosourcerouting;
/*
* Loose routing, and not at next destination
@@ -182,7 +187,7 @@ ip_dooptions(struct mbuf *m, int pass)
/*
* End of source route. Should be for us.
*/
- if (!ip_acceptsourceroute)
+ if (!V_ip_acceptsourceroute)
goto nosourcerouting;
save_rte(m, cp, ip->ip_src);
break;
@@ -191,7 +196,7 @@ ip_dooptions(struct mbuf *m, int pass)
if (V_ipstealth)
goto dropit;
#endif
- if (!ip_dosourceroute) {
+ if (!V_ip_dosourceroute) {
if (V_ipforwarding) {
char buf[16]; /* aaa.bbb.ccc.ddd\0 */
/*
@@ -226,23 +231,34 @@ dropit:
(void)memcpy(&ipaddr.sin_addr, cp + off,
sizeof(ipaddr.sin_addr));
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+
if (opt == IPOPT_SSRR) {
#define INA struct in_ifaddr *
#define SA struct sockaddr *
- if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
- ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0);
- } else
-/* XXX MRT 0 for routing */
- ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m));
- if (ia == NULL) {
- type = ICMP_UNREACH;
- code = ICMP_UNREACH_SRCFAIL;
- goto bad;
+ ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr,
+ RT_ALL_FIBS);
+ if (ia == NULL)
+ ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0,
+ RT_ALL_FIBS);
+ if (ia == NULL)
+ goto bad;
+
+ memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+ sizeof(struct in_addr));
+ ifa_free(&ia->ia_ifa);
+ } else {
+ /* XXX MRT 0 for routing */
+ if (fib4_lookup_nh_ext(M_GETFIB(m),
+ ipaddr.sin_addr, 0, 0, &nh_ext) != 0)
+ goto bad;
+
+ memcpy(cp + off, &nh_ext.nh_src,
+ sizeof(struct in_addr));
}
+
ip->ip_dst = ipaddr.sin_addr;
- (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
- sizeof(struct in_addr));
- ifa_free(&ia->ia_ifa);
cp[IPOPT_OFFSET] += sizeof(struct in_addr);
/*
* Let ip_intr's mcast routing check handle mcast pkts
@@ -276,15 +292,19 @@ dropit:
* destination, use the incoming interface (should be
* same).
*/
- if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
- (ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) {
+ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) != NULL) {
+ memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+ sizeof(struct in_addr));
+ ifa_free(&ia->ia_ifa);
+ } else if (fib4_lookup_nh_ext(M_GETFIB(m),
+ ipaddr.sin_addr, 0, 0, &nh_ext) == 0) {
+ memcpy(cp + off, &nh_ext.nh_src,
+ sizeof(struct in_addr));
+ } else {
type = ICMP_UNREACH;
code = ICMP_UNREACH_HOST;
goto bad;
}
- (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
- sizeof(struct in_addr));
- ifa_free(&ia->ia_ifa);
cp[IPOPT_OFFSET] += sizeof(struct in_addr);
break;
@@ -413,7 +433,7 @@ ip_srcroute(struct mbuf *m0)
if (opts->ip_nhops == 0)
return (NULL);
- m = m_get(M_DONTWAIT, MT_DATA);
+ m = m_get(M_NOWAIT, MT_DATA);
if (m == NULL)
return (NULL);
@@ -455,29 +475,23 @@ ip_srcroute(struct mbuf *m0)
}
/*
- * Strip out IP options, at higher level protocol in the kernel. Second
- * argument is buffer to which options will be moved, and return value is
- * their length.
- *
- * XXX should be deleted; last arg currently ignored.
+ * Strip out IP options, at higher level protocol in the kernel.
*/
void
-ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
+ip_stripoptions(struct mbuf *m)
{
- int i;
struct ip *ip = mtod(m, struct ip *);
- caddr_t opts;
int olen;
- olen = (ip->ip_hl << 2) - sizeof (struct ip);
- opts = (caddr_t)(ip + 1);
- i = m->m_len - (sizeof (struct ip) + olen);
- bcopy(opts + olen, opts, (unsigned)i);
+ olen = (ip->ip_hl << 2) - sizeof(struct ip);
m->m_len -= olen;
if (m->m_flags & M_PKTHDR)
m->m_pkthdr.len -= olen;
- ip->ip_v = IPVERSION;
+ ip->ip_len = htons(ntohs(ip->ip_len) - olen);
ip->ip_hl = sizeof(struct ip) >> 2;
+
+ bcopy((char *)ip + sizeof(struct ip) + olen, (ip + 1),
+ (size_t )(m->m_len - sizeof(struct ip)));
}
/*
@@ -496,19 +510,19 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
unsigned optlen;
optlen = opt->m_len - sizeof(p->ipopt_dst);
- if (optlen + ip->ip_len > IP_MAXPACKET) {
+ if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) {
*phlen = 0;
return (m); /* XXX should fail */
}
if (p->ipopt_dst.s_addr)
ip->ip_dst = p->ipopt_dst;
- if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
- MGETHDR(n, M_DONTWAIT, MT_DATA);
+ if (!M_WRITABLE(m) || M_LEADINGSPACE(m) < optlen) {
+ n = m_gethdr(M_NOWAIT, MT_DATA);
if (n == NULL) {
*phlen = 0;
return (m);
}
- M_MOVE_PKTHDR(n, m);
+ m_move_pkthdr(n, m);
n->m_pkthdr.rcvif = NULL;
n->m_pkthdr.len += optlen;
m->m_len -= sizeof(struct ip);
@@ -529,7 +543,7 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
*phlen = sizeof(struct ip) + optlen;
ip->ip_v = IPVERSION;
ip->ip_hl = *phlen >> 2;
- ip->ip_len += optlen;
+ ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
return (m);
}
@@ -596,7 +610,7 @@ ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
/* turn off any old options */
if (*pcbopt)
(void)m_free(*pcbopt);
- *pcbopt = 0;
+ *pcbopt = NULL;
if (m == NULL || m->m_len == 0) {
/*
* Only turning off any previous options.
@@ -694,7 +708,7 @@ bad:
* may change in future.
* Router alert options SHOULD be passed if running in IPSTEALTH mode and
* we are not the endpoint.
- * Length checks on individual options should already have been peformed
+ * Length checks on individual options should already have been performed
* by ip_dooptions() therefore they are folded under INVARIANTS here.
*
* Return zero if not present or options are invalid, non-zero if present.
diff --git a/freebsd/sys/netinet/ip_options.h b/freebsd/sys/netinet/ip_options.h
index 7ba5ae64..4a6ea420 100644
--- a/freebsd/sys/netinet/ip_options.h
+++ b/freebsd/sys/netinet/ip_options.h
@@ -47,14 +47,15 @@ struct ipopt_tag {
struct ipoptrt ip_srcrt;
};
-extern int ip_doopts; /* process or ignore IP options */
+VNET_DECLARE(int, ip_doopts); /* process or ignore IP options */
+#define V_ip_doopts VNET(ip_doopts)
int ip_checkrouteralert(struct mbuf *);
int ip_dooptions(struct mbuf *, int);
struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
int ip_optcopy(struct ip *, struct ip *);
int ip_pcbopts(struct inpcb *, int, struct mbuf *);
-void ip_stripoptions(struct mbuf *, struct mbuf *);
+void ip_stripoptions(struct mbuf *);
struct mbuf *ip_srcroute(struct mbuf *);
#endif /* !_NETINET_IP_OPTIONS_H_ */
diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c
index a06fed68..81e7b123 100644
--- a/freebsd/sys/netinet/ip_output.c
+++ b/freebsd/sys/netinet/ip_output.c
@@ -34,27 +34,32 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_ipsec.h>
-#include <rtems/bsd/local/opt_route.h>
#include <rtems/bsd/local/opt_mbuf_stress_test.h>
#include <rtems/bsd/local/opt_mpath.h>
+#include <rtems/bsd/local/opt_route.h>
#include <rtems/bsd/local/opt_sctp.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/ucred.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_llatbl.h>
#include <net/netisr.h>
#include <net/pfil.h>
@@ -63,12 +68,15 @@ __FBSDID("$FreeBSD$");
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
+#include <net/rss_config.h>
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_rss.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/ip_options.h>
@@ -86,25 +94,112 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-VNET_DEFINE(u_short, ip_id);
-
#ifdef MBUF_STRESS_TEST
static int mbuf_frag_size = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
#endif
-static void ip_mloopback
- (struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
+static void ip_mloopback(struct ifnet *, const struct mbuf *, int);
extern int in_mcast_loop;
extern struct protosw inetsw[];
+static inline int
+ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp,
+ struct sockaddr_in *dst, int *fibnum, int *error)
+{
+ struct m_tag *fwd_tag = NULL;
+ struct mbuf *m;
+ struct in_addr odst;
+ struct ip *ip;
+
+ m = *mp;
+ ip = mtod(m, struct ip *);
+
+ /* Run through list of hooks for output packets. */
+ odst.s_addr = ip->ip_dst.s_addr;
+ *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, inp);
+ m = *mp;
+ if ((*error) != 0 || m == NULL)
+ return 1; /* Finished */
+
+ ip = mtod(m, struct ip *);
+
+ /* See if destination IP address was changed by packet filter. */
+ if (odst.s_addr != ip->ip_dst.s_addr) {
+ m->m_flags |= M_SKIP_FIREWALL;
+ /* If destination is now ourself drop to ip_input(). */
+ if (in_localip(ip->ip_dst)) {
+ m->m_flags |= M_FASTFWD_OURS;
+ if (m->m_pkthdr.rcvif == NULL)
+ m->m_pkthdr.rcvif = V_loif;
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ m->m_pkthdr.csum_flags |=
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+ m->m_pkthdr.csum_flags |=
+ CSUM_IP_CHECKED | CSUM_IP_VALID;
+#ifdef SCTP
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP)
+ m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+#endif
+ *error = netisr_queue(NETISR_IP, m);
+ return 1; /* Finished */
+ }
+
+ bzero(dst, sizeof(*dst));
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr = ip->ip_dst;
+
+ return -1; /* Reloop */
+ }
+ /* See if fib was changed by packet filter. */
+ if ((*fibnum) != M_GETFIB(m)) {
+ m->m_flags |= M_SKIP_FIREWALL;
+ *fibnum = M_GETFIB(m);
+ return -1; /* Reloop for FIB change */
+ }
+
+ /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
+ if (m->m_flags & M_FASTFWD_OURS) {
+ if (m->m_pkthdr.rcvif == NULL)
+ m->m_pkthdr.rcvif = V_loif;
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ m->m_pkthdr.csum_flags |=
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+#ifdef SCTP
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP)
+ m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
+#endif
+ m->m_pkthdr.csum_flags |=
+ CSUM_IP_CHECKED | CSUM_IP_VALID;
+
+ *error = netisr_queue(NETISR_IP, m);
+ return 1; /* Finished */
+ }
+ /* Or forward to some other address? */
+ if ((m->m_flags & M_IP_NEXTHOP) &&
+ ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
+ bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
+ m->m_flags |= M_SKIP_FIREWALL;
+ m->m_flags &= ~M_IP_NEXTHOP;
+ m_tag_delete(m, fwd_tag);
+
+ return -1; /* Reloop for CHANGE of dst */
+ }
+
+ return 0;
+}
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
- * ip_len and ip_off are in host format.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
* If route ro is present and has ro_rt initialized, route lookup would be
@@ -118,20 +213,22 @@ int
ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
struct ip_moptions *imo, struct inpcb *inp)
{
+ struct rm_priotracker in_ifa_tracker;
struct ip *ip;
struct ifnet *ifp = NULL; /* keep compiler happy */
struct mbuf *m0;
int hlen = sizeof (struct ip);
int mtu;
- int n; /* scratchpad */
int error = 0;
struct sockaddr_in *dst;
+ const struct sockaddr_in *gw;
struct in_ifaddr *ia;
- int isbroadcast, sw_csum;
+ int isbroadcast;
+ uint16_t ip_len, ip_off;
struct route iproute;
struct rtentry *rte; /* cache for ro->ro_rt */
- struct in_addr odst;
- struct m_tag *fwd_tag = NULL;
+ uint32_t fibnum;
+ int have_ia_ref;
#ifdef IPSEC
int no_route_but_check_spd = 0;
#endif
@@ -140,31 +237,21 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
if (inp != NULL) {
INP_LOCK_ASSERT(inp);
M_SETFIB(m, inp->inp_inc.inc_fibnum);
- if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
+ if ((flags & IP_NODEFAULTFLOWID) == 0) {
m->m_pkthdr.flowid = inp->inp_flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, inp->inp_flowtype);
}
}
if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
- }
+ } else
+ ro->ro_flags |= RT_LLE_CACHE;
#ifdef FLOWTABLE
- if (ro->ro_rt == NULL) {
- struct flentry *fle;
-
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
- if (fle != NULL)
- flow_to_route(fle, ro);
- }
+ if (ro->ro_rt == NULL)
+ (void )flowtable_lookup(AF_INET, m, ro);
#endif
if (opt) {
@@ -174,37 +261,49 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
hlen = len; /* ip->ip_hl is updated above */
}
ip = mtod(m, struct ip *);
+ ip_len = ntohs(ip->ip_len);
+ ip_off = ntohs(ip->ip_off);
- /*
- * Fill in IP header. If we are not allowing fragmentation,
- * then the ip_id field is meaningless, but we don't set it
- * to zero. Doing so causes various problems when devices along
- * the path (routers, load balancers, firewalls, etc.) illegally
- * disable DF on our packet. Note that a 16-bit counter
- * will wrap around in less than 10 seconds at 100 Mbit/s on a
- * medium with MTU 1500. See Steven M. Bellovin, "A Technique
- * for Counting NATted Hosts", Proc. IMW'02, available at
- * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
- */
if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
ip->ip_v = IPVERSION;
ip->ip_hl = hlen >> 2;
- ip->ip_id = ip_newid();
+ ip_fillid(ip);
IPSTAT_INC(ips_localout);
} else {
/* Header already set, fetch hlen from there */
hlen = ip->ip_hl << 2;
}
+ /*
+ * dst/gw handling:
+ *
+ * dst can be rewritten but always points to &ro->ro_dst.
+ * gw is readonly but can point either to dst OR rt_gateway,
+ * therefore we need restore gw if we're redoing lookup.
+ */
+ gw = dst = (struct sockaddr_in *)&ro->ro_dst;
+ fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
+ rte = ro->ro_rt;
+ if (rte == NULL) {
+ bzero(dst, sizeof(*dst));
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr = ip->ip_dst;
+ }
again:
- dst = (struct sockaddr_in *)&ro->ro_dst;
- ia = NULL;
+ /*
+ * Validate route against routing table additions;
+ * a better/more specific route might have been added.
+ */
+ if (inp)
+ RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
/*
* If there is a cached route,
* check that it is to the same destination
* and is still up. If not, free it and try again.
* The address family should also be checked in case of sharing the
* cache with IPv6.
+ * Also check whether routing cache needs invalidation.
*/
rte = ro->ro_rt;
if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
@@ -212,16 +311,14 @@ again:
!RT_LINK_IS_UP(rte->rt_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
- RO_RTFREE(ro);
- ro->ro_lle = NULL;
- rte = NULL;
- }
- if (rte == NULL && fwd_tag == NULL) {
- bzero(dst, sizeof(*dst));
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(*dst);
- dst->sin_addr = ip->ip_dst;
+ RTFREE(rte);
+ rte = ro->ro_rt = (struct rtentry *)NULL;
+ if (ro->ro_lle)
+ LLE_FREE(ro->ro_lle); /* zeros ro_lle */
+ ro->ro_lle = (struct llentry *)NULL;
}
+ ia = NULL;
+ have_ia_ref = 0;
/*
* If routing to interface only, short circuit routing lookup.
* The use of an all-ones broadcast address implies this; an
@@ -229,27 +326,33 @@ again:
* or the destination address of a ptp interface.
*/
if (flags & IP_SENDONES) {
- if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
- (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
+ if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
+ M_GETFIB(m)))) == NULL &&
+ (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
+ M_GETFIB(m)))) == NULL) {
IPSTAT_INC(ips_noroute);
error = ENETUNREACH;
goto bad;
}
+ have_ia_ref = 1;
ip->ip_dst.s_addr = INADDR_BROADCAST;
dst->sin_addr = ip->ip_dst;
ifp = ia->ia_ifp;
ip->ip_ttl = 1;
isbroadcast = 1;
} else if (flags & IP_ROUTETOIF) {
- if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
- (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0))) == NULL) {
+ if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
+ M_GETFIB(m)))) == NULL &&
+ (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
+ M_GETFIB(m)))) == NULL) {
IPSTAT_INC(ips_noroute);
error = ENETUNREACH;
goto bad;
}
+ have_ia_ref = 1;
ifp = ia->ia_ifp;
ip->ip_ttl = 1;
- isbroadcast = in_broadcast(dst->sin_addr, ifp);
+ isbroadcast = in_ifaddr_broadcast(dst->sin_addr, ia);
} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
imo != NULL && imo->imo_multicast_ifp != NULL) {
/*
@@ -257,7 +360,9 @@ again:
* packets if the interface is specified.
*/
ifp = imo->imo_multicast_ifp;
- IFP_TO_IA(ifp, ia);
+ IFP_TO_IA(ifp, ia, &in_ifa_tracker);
+ if (ia)
+ have_ia_ref = 1;
isbroadcast = 0; /* fool gcc */
} else {
/*
@@ -269,14 +374,14 @@ again:
#ifdef RADIX_MPATH
rtalloc_mpath_fib(ro,
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
- inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
+ fibnum);
#else
- in_rtalloc_ign(ro, 0,
- inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
+ in_rtalloc_ign(ro, 0, fibnum);
#endif
rte = ro->ro_rt;
}
if (rte == NULL ||
+ (rte->rt_flags & RTF_UP) == 0 ||
rte->rt_ifp == NULL ||
!RT_LINK_IS_UP(rte->rt_ifp)) {
#ifdef IPSEC
@@ -293,45 +398,37 @@ again:
goto bad;
}
ia = ifatoia(rte->rt_ifa);
- ifa_ref(&ia->ia_ifa);
ifp = rte->rt_ifp;
- rte->rt_rmx.rmx_pksent++;
+ counter_u64_add(rte->rt_pksent, 1);
+ rt_update_ro_flags(ro);
if (rte->rt_flags & RTF_GATEWAY)
- dst = (struct sockaddr_in *)rte->rt_gateway;
+ gw = (struct sockaddr_in *)rte->rt_gateway;
if (rte->rt_flags & RTF_HOST)
isbroadcast = (rte->rt_flags & RTF_BROADCAST);
else
- isbroadcast = in_broadcast(dst->sin_addr, ifp);
+ isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
}
+
/*
* Calculate MTU. If we have a route that is up, use that,
* otherwise use the interface's MTU.
*/
- if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) {
- /*
- * This case can happen if the user changed the MTU
- * of an interface after enabling IP on it. Because
- * most netifs don't keep track of routes pointing to
- * them, there is no way for one to update all its
- * routes when the MTU is changed.
- */
- if (rte->rt_rmx.rmx_mtu > ifp->if_mtu)
- rte->rt_rmx.rmx_mtu = ifp->if_mtu;
- mtu = rte->rt_rmx.rmx_mtu;
- } else {
+ if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
+ mtu = rte->rt_mtu;
+ else
mtu = ifp->if_mtu;
- }
/* Catch a possible divide by zero later. */
KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
__func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
+
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
/*
- * IP destination address is multicast. Make sure "dst"
+ * IP destination address is multicast. Make sure "gw"
* still points to the address in "ro". (It may have been
* changed to point to a gateway address, above.)
*/
- dst = (struct sockaddr_in *)&ro->ro_dst;
+ gw = dst;
/*
* See if the caller provided any multicast options
*/
@@ -373,7 +470,7 @@ again:
* thus deferring a hash lookup and mutex acquisition
* at the expense of a cheap copy using m_copym().
*/
- ip_mloopback(ifp, m, dst, hlen);
+ ip_mloopback(ifp, m, hlen);
} else {
/*
* If we are acting as a multicast router, perform
@@ -433,23 +530,6 @@ again:
}
/*
- * Verify that we have any chance at all of being able to queue the
- * packet or packet fragments, unless ALTQ is enabled on the given
- * interface in which case packetdrop should be done by queueing.
- */
- n = ip->ip_len / mtu + 1; /* how many fragments ? */
- if (
-#ifdef ALTQ
- (!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
-#endif /* ALTQ */
- (ifp->if_snd.ifq_len + n) >= ifp->if_snd.ifq_maxlen ) {
- error = ENOBUFS;
- IPSTAT_INC(ips_odropped);
- ifp->if_snd.ifq_drops += n;
- goto bad;
- }
-
- /*
* Look for broadcast address and
* verify user is allowed to send
* such a packet.
@@ -464,7 +544,7 @@ again:
goto bad;
}
/* don't allow broadcast messages to be fragmented */
- if (ip->ip_len > mtu) {
+ if (ip_len > mtu) {
error = EMSGSIZE;
goto bad;
}
@@ -475,7 +555,7 @@ again:
sendit:
#ifdef IPSEC
- switch(ip_ipsec_output(&m, inp, &flags, &error)) {
+ switch(ip_ipsec_output(&m, inp, &error)) {
case 1:
goto bad;
case -1:
@@ -498,78 +578,29 @@ sendit:
#endif /* IPSEC */
/* Jump over all PFIL processing if hooks are not active. */
- if (!PFIL_HOOKED(&V_inet_pfil_hook))
- goto passout;
-
- /* Run through list of hooks for output packets. */
- odst.s_addr = ip->ip_dst.s_addr;
- error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
- if (error != 0 || m == NULL)
- goto done;
+ if (PFIL_HOOKED(&V_inet_pfil_hook)) {
+ switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) {
+ case 1: /* Finished */
+ goto done;
- ip = mtod(m, struct ip *);
+ case 0: /* Continue normally */
+ ip = mtod(m, struct ip *);
+ break;
- /* See if destination IP address was changed by packet filter. */
- if (odst.s_addr != ip->ip_dst.s_addr) {
- m->m_flags |= M_SKIP_FIREWALL;
- /* If destination is now ourself drop to ip_input(). */
- if (in_localip(ip->ip_dst)) {
- m->m_flags |= M_FASTFWD_OURS;
- if (m->m_pkthdr.rcvif == NULL)
- m->m_pkthdr.rcvif = V_loif;
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- m->m_pkthdr.csum_flags |=
- CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xffff;
- }
- m->m_pkthdr.csum_flags |=
- CSUM_IP_CHECKED | CSUM_IP_VALID;
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-#endif
- error = netisr_queue(NETISR_IP, m);
- goto done;
- } else {
- if (ia != NULL)
+ case -1: /* Need to try again */
+ /* Reset everything for a new round */
+ RO_RTFREE(ro);
+ if (have_ia_ref)
ifa_free(&ia->ia_ifa);
- goto again; /* Redo the routing table lookup. */
- }
- }
+ ro->ro_prepend = NULL;
+ rte = NULL;
+ gw = dst;
+ ip = mtod(m, struct ip *);
+ goto again;
- /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
- if (m->m_flags & M_FASTFWD_OURS) {
- if (m->m_pkthdr.rcvif == NULL)
- m->m_pkthdr.rcvif = V_loif;
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- m->m_pkthdr.csum_flags |=
- CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xffff;
}
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP)
- m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-#endif
- m->m_pkthdr.csum_flags |=
- CSUM_IP_CHECKED | CSUM_IP_VALID;
-
- error = netisr_queue(NETISR_IP, m);
- goto done;
- }
- /* Or forward to some other address? */
- if ((m->m_flags & M_IP_NEXTHOP) &&
- (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
- dst = (struct sockaddr_in *)&ro->ro_dst;
- bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
- m->m_flags |= M_SKIP_FIREWALL;
- m->m_flags &= ~M_IP_NEXTHOP;
- m_tag_delete(m, fwd_tag);
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
- goto again;
}
-passout:
/* 127/8 must not appear on wire - RFC1122. */
if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
(ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
@@ -581,31 +612,28 @@ passout:
}
m->m_pkthdr.csum_flags |= CSUM_IP;
- sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
- if (sw_csum & CSUM_DELAY_DATA) {
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
in_delayed_cksum(m);
- sw_csum &= ~CSUM_DELAY_DATA;
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
#ifdef SCTP
- if (sw_csum & CSUM_SCTP) {
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
- sw_csum &= ~CSUM_SCTP;
+ m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
#endif
- m->m_pkthdr.csum_flags &= ifp->if_hwassist;
/*
* If small enough for interface, or the interface will take
* care of the fragmentation for us, we can just send directly.
*/
- if (ip->ip_len <= mtu ||
- (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
- ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
+ if (ip_len <= mtu ||
+ (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
ip->ip_sum = 0;
- if (sw_csum & CSUM_DELAY_IP)
+ if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
ip->ip_sum = in_cksum(m, hlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP;
+ }
/*
* Record statistics for this interface address.
@@ -615,28 +643,30 @@ passout:
*/
if (!(flags & IP_FORWARDING) && ia) {
if (m->m_pkthdr.csum_flags & CSUM_TSO)
- ia->ia_ifa.if_opackets +=
- m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
+ counter_u64_add(ia->ia_ifa.ifa_opackets,
+ m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
else
- ia->ia_ifa.if_opackets++;
- ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
+
+ counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
}
#ifdef MBUF_STRESS_TEST
if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
- m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
+ m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
#endif
/*
* Reset layer specific mbuf flags
* to avoid confusing lower layers.
*/
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, ro);
+ (const struct sockaddr *)gw, ro);
goto done;
}
/* Balk when DF bit is set or the interface didn't support TSO. */
- if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
+ if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
IPSTAT_INC(ips_cantfrag);
goto bad;
@@ -646,7 +676,7 @@ passout:
* Too large for interface; fragment if possible. If successful,
* on return, m will point to a list of packets to be sent.
*/
- error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
+ error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
if (error)
goto bad;
for (; m; m = m0) {
@@ -655,17 +685,19 @@ passout:
if (error == 0) {
/* Record statistics for this interface address. */
if (ia != NULL) {
- ia->ia_ifa.if_opackets++;
- ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_obytes,
+ m->m_pkthdr.len);
}
/*
* Reset layer specific mbuf flags
* to avoid confusing upper layers.
*/
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, ro);
+ (const struct sockaddr *)gw, ro);
} else
m_freem(m);
}
@@ -674,9 +706,20 @@ passout:
IPSTAT_INC(ips_fragmented);
done:
- if (ro == &iproute)
+ /*
+ * Release the route if using our private route, or if
+ * (with flowtable) we don't have our own reference.
+ */
+ if (ro == &iproute || ro->ro_flags & RT_NORTREF)
RO_RTFREE(ro);
- if (ia != NULL)
+ else if (rte == NULL)
+ /*
+ * If the caller supplied a route but somehow the reference
+ * to it has been released need to prevent the caller
+ * calling RTFREE on it again.
+ */
+ ro->ro_rt = NULL;
+ if (have_ia_ref)
ifa_free(&ia->ia_ifa);
return (error);
bad:
@@ -691,11 +734,10 @@ bad:
* chain of fragments that should be freed by the caller.
*
* if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
- * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
*/
int
ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
- u_long if_hwassist_flags, int sw_csum)
+ u_long if_hwassist_flags)
{
int error = 0;
int hlen = ip->ip_hl << 2;
@@ -705,8 +747,12 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
int firstlen;
struct mbuf **mnext;
int nfrags;
+ uint16_t ip_len, ip_off;
+
+ ip_len = ntohs(ip->ip_len);
+ ip_off = ntohs(ip->ip_off);
- if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
+ if (ip_off & IP_DF) { /* Fragmentation not allowed */
IPSTAT_INC(ips_cantfrag);
return EMSGSIZE;
}
@@ -732,10 +778,10 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
}
#endif
if (len > PAGE_SIZE) {
- /*
- * Fragment large datagrams such that each segment
- * contains a multiple of PAGE_SIZE amount of data,
- * plus headers. This enables a receiver to perform
+ /*
+ * Fragment large datagrams such that each segment
+ * contains a multiple of PAGE_SIZE amount of data,
+ * plus headers. This enables a receiver to perform
* page-flipping zero-copy optimizations.
*
* XXX When does this help given that sender and receiver
@@ -747,7 +793,7 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
off = MIN(mtu, m0->m_pkthdr.len);
/*
- * firstlen (off - hlen) must be aligned on an
+ * firstlen (off - hlen) must be aligned on an
* 8-byte boundary
*/
if (off < hlen)
@@ -776,22 +822,30 @@ smart_frag_failure:
* The fragments are linked off the m_nextpkt of the original
* packet, which after processing serves as the first fragment.
*/
- for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
+ for (nfrags = 1; off < ip_len; off += len, nfrags++) {
struct ip *mhip; /* ip header on the fragment */
struct mbuf *m;
int mhlen = sizeof (struct ip);
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
IPSTAT_INC(ips_odropped);
goto done;
}
- /* copy multicast and flowid flag, if any */
- m->m_flags |= (m0->m_flags & (M_FLOWID | M_MCAST)) | M_FRAG;
- /* make sure the flowid is the same for the fragmented mbufs */
- M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0));
- m->m_pkthdr.flowid = m0->m_pkthdr.flowid;
+ /*
+ * Make sure the complete packet header gets copied
+ * from the originating mbuf to the newly created
+ * mbuf. This also ensures that existing firewall
+ * classification(s), VLAN tags and so on get copied
+ * to the resulting fragmented packet(s):
+ */
+ if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
+ m_free(m);
+ error = ENOBUFS;
+ IPSTAT_INC(ips_odropped);
+ goto done;
+ }
/*
* In the first mbuf, leave room for the link header, then
* copy the original IP header including options. The payload
@@ -806,15 +860,14 @@ smart_frag_failure:
mhip->ip_hl = mhlen >> 2;
}
m->m_len = mhlen;
- /* XXX do we need to add ip->ip_off below ? */
- mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
- if (off + len >= ip->ip_len) { /* last fragment */
- len = ip->ip_len - off;
- m->m_flags |= M_LASTFRAG;
- } else
+ /* XXX do we need to add ip_off below ? */
+ mhip->ip_off = ((off - hlen) >> 3) + ip_off;
+ if (off + len >= ip_len)
+ len = ip_len - off;
+ else
mhip->ip_off |= IP_MF;
mhip->ip_len = htons((u_short)(len + mhlen));
- m->m_next = m_copym(m0, off, len, M_DONTWAIT);
+ m->m_next = m_copym(m0, off, len, M_NOWAIT);
if (m->m_next == NULL) { /* copy failed */
m_free(m);
error = ENOBUFS; /* ??? */
@@ -822,36 +875,33 @@ smart_frag_failure:
goto done;
}
m->m_pkthdr.len = mhlen + len;
- m->m_pkthdr.rcvif = NULL;
#ifdef MAC
mac_netinet_fragment(m0, m);
#endif
- m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
mhip->ip_off = htons(mhip->ip_off);
mhip->ip_sum = 0;
- if (sw_csum & CSUM_DELAY_IP)
+ if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
mhip->ip_sum = in_cksum(m, mhlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP;
+ }
*mnext = m;
mnext = &m->m_nextpkt;
}
IPSTAT_ADD(ips_ofragments, nfrags);
- /* set first marker for fragment chain */
- m0->m_flags |= M_FIRSTFRAG | M_FRAG;
- m0->m_pkthdr.csum_data = nfrags;
-
/*
* Update first fragment by trimming what's been copied out
* and updating header.
*/
- m_adj(m0, hlen + firstlen - ip->ip_len);
+ m_adj(m0, hlen + firstlen - ip_len);
m0->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m0->m_pkthdr.len);
- ip->ip_off |= IP_MF;
- ip->ip_off = htons(ip->ip_off);
+ ip->ip_off = htons(ip_off | IP_MF);
ip->ip_sum = 0;
- if (sw_csum & CSUM_DELAY_IP)
+ if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
ip->ip_sum = in_cksum(m0, hlen);
+ m0->m_pkthdr.csum_flags &= ~CSUM_IP;
+ }
done:
*m_frag = m0;
@@ -862,11 +912,12 @@ void
in_delayed_cksum(struct mbuf *m)
{
struct ip *ip;
- u_short csum, offset;
+ uint16_t csum, offset, ip_len;
ip = mtod(m, struct ip *);
offset = ip->ip_hl << 2 ;
- csum = in_cksum_skip(m, ip->ip_len, offset);
+ ip_len = ntohs(ip->ip_len);
+ csum = in_cksum_skip(m, ip_len, offset);
if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
csum = 0xffff;
offset += m->m_pkthdr.csum_data; /* checksum offset */
@@ -889,6 +940,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
{
struct inpcb *inp = sotoinpcb(so);
int error, optval;
+#ifdef RSS
+ uint32_t rss_bucket;
+ int retval;
+#endif
error = optval = 0;
if (sopt->sopt_level != IPPROTO_IP) {
@@ -941,7 +996,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
error = EMSGSIZE;
break;
}
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
break;
@@ -967,6 +1022,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
break;
}
/* FALLTHROUGH */
+ case IP_BINDMULTI:
+#ifdef RSS
+ case IP_RSS_LISTEN_BUCKET:
+#endif
case IP_TOS:
case IP_TTL:
case IP_MINTTL:
@@ -975,10 +1034,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_RECVDSTADDR:
case IP_RECVTTL:
case IP_RECVIF:
- case IP_FAITH:
case IP_ONESBCAST:
case IP_DONTFRAG:
case IP_RECVTOS:
+ case IP_RECVFLOWID:
+#ifdef RSS
+ case IP_RECVRSSBUCKETID:
+#endif
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
@@ -1009,6 +1071,15 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(inp); \
} while (0)
+#define OPTSET2(bit, val) do { \
+ INP_WLOCK(inp); \
+ if (val) \
+ inp->inp_flags2 |= bit; \
+ else \
+ inp->inp_flags2 &= ~bit; \
+ INP_WUNLOCK(inp); \
+} while (0)
+
case IP_RECVOPTS:
OPTSET(INP_RECVOPTS);
break;
@@ -1029,10 +1100,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
OPTSET(INP_RECVIF);
break;
- case IP_FAITH:
- OPTSET(INP_FAITH);
- break;
-
case IP_ONESBCAST:
OPTSET(INP_ONESBCAST);
break;
@@ -1045,9 +1112,30 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_RECVTOS:
OPTSET(INP_RECVTOS);
break;
+ case IP_BINDMULTI:
+ OPTSET2(INP_BINDMULTI, optval);
+ break;
+ case IP_RECVFLOWID:
+ OPTSET2(INP_RECVFLOWID, optval);
+ break;
+#ifdef RSS
+ case IP_RSS_LISTEN_BUCKET:
+ if ((optval >= 0) &&
+ (optval < rss_getnumbuckets())) {
+ inp->inp_rss_listen_bucket = optval;
+ OPTSET2(INP_RSS_BUCKET_SET, 1);
+ } else {
+ error = EINVAL;
+ }
+ break;
+ case IP_RECVRSSBUCKETID:
+ OPTSET2(INP_RECVRSSBUCKETID, optval);
+ break;
+#endif
}
break;
#undef OPTSET
+#undef OPTSET2
/*
* Multicast socket options are processed by the in_mcast
@@ -1133,7 +1221,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_OPTIONS:
case IP_RETOPTS:
if (inp->inp_options)
- error = sooptcopyout(sopt,
+ error = sooptcopyout(sopt,
mtod(inp->inp_options,
char *),
inp->inp_options->m_len);
@@ -1150,11 +1238,18 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_RECVTTL:
case IP_RECVIF:
case IP_PORTRANGE:
- case IP_FAITH:
case IP_ONESBCAST:
case IP_DONTFRAG:
case IP_BINDANY:
case IP_RECVTOS:
+ case IP_BINDMULTI:
+ case IP_FLOWID:
+ case IP_FLOWTYPE:
+ case IP_RECVFLOWID:
+#ifdef RSS
+ case IP_RSSBUCKETID:
+ case IP_RECVRSSBUCKETID:
+#endif
switch (sopt->sopt_name) {
case IP_TOS:
@@ -1170,6 +1265,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
break;
#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
+#define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0)
case IP_RECVOPTS:
optval = OPTBIT(INP_RECVOPTS);
@@ -1200,10 +1296,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
optval = 0;
break;
- case IP_FAITH:
- optval = OPTBIT(INP_FAITH);
- break;
-
case IP_ONESBCAST:
optval = OPTBIT(INP_ONESBCAST);
break;
@@ -1216,6 +1308,32 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_RECVTOS:
optval = OPTBIT(INP_RECVTOS);
break;
+ case IP_FLOWID:
+ optval = inp->inp_flowid;
+ break;
+ case IP_FLOWTYPE:
+ optval = inp->inp_flowtype;
+ break;
+ case IP_RECVFLOWID:
+ optval = OPTBIT2(INP_RECVFLOWID);
+ break;
+#ifdef RSS
+ case IP_RSSBUCKETID:
+ retval = rss_hash2bucket(inp->inp_flowid,
+ inp->inp_flowtype,
+ &rss_bucket);
+ if (retval == 0)
+ optval = rss_bucket;
+ else
+ error = EINVAL;
+ break;
+ case IP_RECVRSSBUCKETID:
+ optval = OPTBIT2(INP_RECVRSSBUCKETID);
+ break;
+#endif
+ case IP_BINDMULTI:
+ optval = OPTBIT2(INP_BINDMULTI);
+ break;
}
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
@@ -1239,7 +1357,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
caddr_t req = NULL;
size_t len = 0;
- if (m != 0) {
+ if (m != NULL) {
req = mtod(m, caddr_t);
len = m->m_len;
}
@@ -1269,18 +1387,17 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
* replicating that code here.
*/
static void
-ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
- int hlen)
+ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
{
- register struct ip *ip;
+ struct ip *ip;
struct mbuf *copym;
/*
* Make a deep copy of the packet because we're going to
* modify the pack in order to generate checksums.
*/
- copym = m_dup(m, M_DONTWAIT);
- if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
+ copym = m_dup(m, M_NOWAIT);
+ if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
copym = m_pullup(copym, hlen);
if (copym != NULL) {
/* If needed, compute the checksum and mark it as valid. */
@@ -1296,17 +1413,8 @@ ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
* than the interface's MTU. Can this possibly matter?
*/
ip = mtod(copym, struct ip *);
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(copym, hlen);
-#if 1 /* XXX */
- if (dst->sin_family != AF_INET) {
- printf("ip_mloopback: bad address family %d\n",
- dst->sin_family);
- dst->sin_family = AF_INET;
- }
-#endif
- if_simloop(ifp, copym, dst->sin_family, 0);
+ if_simloop(ifp, copym, AF_INET, 0);
}
}
diff --git a/freebsd/sys/netinet/ip_reass.c b/freebsd/sys/netinet/ip_reass.c
new file mode 100644
index 00000000..aae24b9d
--- /dev/null
+++ b/freebsd/sys/netinet/ip_reass.c
@@ -0,0 +1,660 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_rss.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/eventhandler.h>
+#include <sys/hash.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+
+#include <net/rss_config.h>
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_rss.h>
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+SYSCTL_DECL(_net_inet_ip);
+
+/*
+ * Reassembly headers are stored in hash buckets.
+ */
+#define IPREASS_NHASH_LOG2 6
+#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
+#define IPREASS_HMASK (IPREASS_NHASH - 1)
+
+struct ipqbucket {
+ TAILQ_HEAD(ipqhead, ipq) head;
+ struct mtx lock;
+};
+
+static VNET_DEFINE(struct ipqbucket, ipq[IPREASS_NHASH]);
+#define V_ipq VNET(ipq)
+static VNET_DEFINE(uint32_t, ipq_hashseed);
+#define V_ipq_hashseed VNET(ipq_hashseed)
+
+#define IPQ_LOCK(i) mtx_lock(&V_ipq[i].lock)
+#define IPQ_TRYLOCK(i) mtx_trylock(&V_ipq[i].lock)
+#define IPQ_UNLOCK(i) mtx_unlock(&V_ipq[i].lock)
+#define IPQ_LOCK_ASSERT(i) mtx_assert(&V_ipq[i].lock, MA_OWNED)
+
+void ipreass_init(void);
+void ipreass_drain(void);
+void ipreass_slowtimo(void);
+#ifdef VIMAGE
+void ipreass_destroy(void);
+#endif
+static int sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS);
+static void ipreass_zone_change(void *);
+static void ipreass_drain_tomax(void);
+static void ipq_free(struct ipqhead *, struct ipq *);
+static struct ipq * ipq_reuse(int);
+
+static inline void
+ipq_timeout(struct ipqhead *head, struct ipq *fp)
+{
+
+ IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
+ ipq_free(head, fp);
+}
+
+static inline void
+ipq_drop(struct ipqhead *head, struct ipq *fp)
+{
+
+ IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
+ ipq_free(head, fp);
+}
+
+static VNET_DEFINE(uma_zone_t, ipq_zone);
+#define V_ipq_zone VNET(ipq_zone)
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_VNET |
+ CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_maxfragpackets, "I",
+ "Maximum number of IPv4 fragment reassembly queue entries");
+SYSCTL_UMA_CUR(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_VNET,
+ &VNET_NAME(ipq_zone),
+ "Current number of IPv4 fragment reassembly queue entries");
+
+static VNET_DEFINE(int, noreass);
+#define V_noreass VNET(noreass)
+
+static VNET_DEFINE(int, maxfragsperpacket);
+#define V_maxfragsperpacket VNET(maxfragsperpacket)
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(maxfragsperpacket), 0,
+ "Maximum number of IPv4 fragments allowed per packet");
+
+/*
+ * Take incoming datagram fragment and try to reassemble it into
+ * whole datagram. If the argument is the first fragment or one
+ * in between the function will return NULL and store the mbuf
+ * in the fragment chain. If the argument is the last fragment
+ * the packet will be reassembled and the pointer to the new
+ * mbuf returned for further processing. Only m_tags attached
+ * to the first packet/fragment are preserved.
+ * The IP header is *NOT* adjusted out of iplen.
+ */
+#define M_IP_FRAG M_PROTO9
+struct mbuf *
+ip_reass(struct mbuf *m)
+{
+ struct ip *ip;
+ struct mbuf *p, *q, *nq, *t;
+ struct ipq *fp;
+ struct ipqhead *head;
+ int i, hlen, next;
+ u_int8_t ecn, ecn0;
+ uint32_t hash;
+#ifdef RSS
+ uint32_t rss_hash, rss_type;
+#endif
+
+ /*
+ * If no reassembling or maxfragsperpacket are 0,
+ * never accept fragments.
+ */
+ if (V_noreass == 1 || V_maxfragsperpacket == 0) {
+ IPSTAT_INC(ips_fragments);
+ IPSTAT_INC(ips_fragdropped);
+ m_freem(m);
+ return (NULL);
+ }
+
+ ip = mtod(m, struct ip *);
+ hlen = ip->ip_hl << 2;
+
+ /*
+ * Adjust ip_len to not reflect header,
+ * convert offset of this to bytes.
+ */
+ ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
+ if (ip->ip_off & htons(IP_MF)) {
+ /*
+ * Make sure that fragments have a data length
+ * that's a non-zero multiple of 8 bytes.
+ */
+ if (ip->ip_len == htons(0) || (ntohs(ip->ip_len) & 0x7) != 0) {
+ IPSTAT_INC(ips_toosmall); /* XXX */
+ IPSTAT_INC(ips_fragdropped);
+ m_freem(m);
+ return (NULL);
+ }
+ m->m_flags |= M_IP_FRAG;
+ } else
+ m->m_flags &= ~M_IP_FRAG;
+ ip->ip_off = htons(ntohs(ip->ip_off) << 3);
+
+ /*
+ * Attempt reassembly; if it succeeds, proceed.
+ * ip_reass() will return a different mbuf.
+ */
+ IPSTAT_INC(ips_fragments);
+ m->m_pkthdr.PH_loc.ptr = ip;
+
+ /*
+ * Presence of header sizes in mbufs
+ * would confuse code below.
+ */
+ m->m_data += hlen;
+ m->m_len -= hlen;
+
+ hash = ip->ip_src.s_addr ^ ip->ip_id;
+ hash = jenkins_hash32(&hash, 1, V_ipq_hashseed) & IPREASS_HMASK;
+ head = &V_ipq[hash].head;
+ IPQ_LOCK(hash);
+
+ /*
+ * Look for queue of fragments
+ * of this datagram.
+ */
+ TAILQ_FOREACH(fp, head, ipq_list)
+ if (ip->ip_id == fp->ipq_id &&
+ ip->ip_src.s_addr == fp->ipq_src.s_addr &&
+ ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
+#ifdef MAC
+ mac_ipq_match(m, fp) &&
+#endif
+ ip->ip_p == fp->ipq_p)
+ break;
+ /*
+ * If first fragment to arrive, create a reassembly queue.
+ */
+ if (fp == NULL) {
+ fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
+ if (fp == NULL)
+ fp = ipq_reuse(hash);
+#ifdef MAC
+ if (mac_ipq_init(fp, M_NOWAIT) != 0) {
+ uma_zfree(V_ipq_zone, fp);
+ fp = NULL;
+ goto dropfrag;
+ }
+ mac_ipq_create(m, fp);
+#endif
+ TAILQ_INSERT_HEAD(head, fp, ipq_list);
+ fp->ipq_nfrags = 1;
+ fp->ipq_ttl = IPFRAGTTL;
+ fp->ipq_p = ip->ip_p;
+ fp->ipq_id = ip->ip_id;
+ fp->ipq_src = ip->ip_src;
+ fp->ipq_dst = ip->ip_dst;
+ fp->ipq_frags = m;
+ m->m_nextpkt = NULL;
+ goto done;
+ } else {
+ fp->ipq_nfrags++;
+#ifdef MAC
+ mac_ipq_update(m, fp);
+#endif
+ }
+
+#define GETIP(m) ((struct ip*)((m)->m_pkthdr.PH_loc.ptr))
+
+ /*
+ * Handle ECN by comparing this segment with the first one;
+ * if CE is set, do not lose CE.
+ * drop if CE and not-ECT are mixed for the same packet.
+ */
+ ecn = ip->ip_tos & IPTOS_ECN_MASK;
+ ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
+ if (ecn == IPTOS_ECN_CE) {
+ if (ecn0 == IPTOS_ECN_NOTECT)
+ goto dropfrag;
+ if (ecn0 != IPTOS_ECN_CE)
+ GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
+ }
+ if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
+ goto dropfrag;
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
+ if (ntohs(GETIP(q)->ip_off) > ntohs(ip->ip_off))
+ break;
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us, otherwise
+ * stick new segment in the proper place.
+ *
+ * If some of the data is dropped from the preceding
+ * segment, then it's checksum is invalidated.
+ */
+ if (p) {
+ i = ntohs(GETIP(p)->ip_off) + ntohs(GETIP(p)->ip_len) -
+ ntohs(ip->ip_off);
+ if (i > 0) {
+ if (i >= ntohs(ip->ip_len))
+ goto dropfrag;
+ m_adj(m, i);
+ m->m_pkthdr.csum_flags = 0;
+ ip->ip_off = htons(ntohs(ip->ip_off) + i);
+ ip->ip_len = htons(ntohs(ip->ip_len) - i);
+ }
+ m->m_nextpkt = p->m_nextpkt;
+ p->m_nextpkt = m;
+ } else {
+ m->m_nextpkt = fp->ipq_frags;
+ fp->ipq_frags = m;
+ }
+
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ for (; q != NULL && ntohs(ip->ip_off) + ntohs(ip->ip_len) >
+ ntohs(GETIP(q)->ip_off); q = nq) {
+ i = (ntohs(ip->ip_off) + ntohs(ip->ip_len)) -
+ ntohs(GETIP(q)->ip_off);
+ if (i < ntohs(GETIP(q)->ip_len)) {
+ GETIP(q)->ip_len = htons(ntohs(GETIP(q)->ip_len) - i);
+ GETIP(q)->ip_off = htons(ntohs(GETIP(q)->ip_off) + i);
+ m_adj(q, i);
+ q->m_pkthdr.csum_flags = 0;
+ break;
+ }
+ nq = q->m_nextpkt;
+ m->m_nextpkt = nq;
+ IPSTAT_INC(ips_fragdropped);
+ fp->ipq_nfrags--;
+ m_freem(q);
+ }
+
+ /*
+ * Check for complete reassembly and perform frag per packet
+ * limiting.
+ *
+ * Frag limiting is performed here so that the nth frag has
+ * a chance to complete the packet before we drop the packet.
+ * As a result, n+1 frags are actually allowed per packet, but
+ * only n will ever be stored. (n = maxfragsperpacket.)
+ *
+ */
+ next = 0;
+ for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
+ if (ntohs(GETIP(q)->ip_off) != next) {
+ if (fp->ipq_nfrags > V_maxfragsperpacket)
+ ipq_drop(head, fp);
+ goto done;
+ }
+ next += ntohs(GETIP(q)->ip_len);
+ }
+ /* Make sure the last packet didn't have the IP_MF flag */
+ if (p->m_flags & M_IP_FRAG) {
+ if (fp->ipq_nfrags > V_maxfragsperpacket)
+ ipq_drop(head, fp);
+ goto done;
+ }
+
+ /*
+ * Reassembly is complete. Make sure the packet is a sane size.
+ */
+ q = fp->ipq_frags;
+ ip = GETIP(q);
+ if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
+ IPSTAT_INC(ips_toolong);
+ ipq_drop(head, fp);
+ goto done;
+ }
+
+ /*
+ * Concatenate fragments.
+ */
+ m = q;
+ t = m->m_next;
+ m->m_next = NULL;
+ m_cat(m, t);
+ nq = q->m_nextpkt;
+ q->m_nextpkt = NULL;
+ for (q = nq; q != NULL; q = nq) {
+ nq = q->m_nextpkt;
+ q->m_nextpkt = NULL;
+ m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
+ m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
+ m_cat(m, q);
+ }
+ /*
+ * In order to do checksumming faster we do 'end-around carry' here
+ * (and not in for{} loop), though it implies we are not going to
+ * reassemble more than 64k fragments.
+ */
+ while (m->m_pkthdr.csum_data & 0xffff0000)
+ m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
+ (m->m_pkthdr.csum_data >> 16);
+#ifdef MAC
+ mac_ipq_reassemble(fp, m);
+ mac_ipq_destroy(fp);
+#endif
+
+ /*
+ * Create header for new ip packet by modifying header of first
+ * packet; dequeue and discard fragment reassembly header.
+ * Make header visible.
+ */
+ ip->ip_len = htons((ip->ip_hl << 2) + next);
+ ip->ip_src = fp->ipq_src;
+ ip->ip_dst = fp->ipq_dst;
+ TAILQ_REMOVE(head, fp, ipq_list);
+ uma_zfree(V_ipq_zone, fp);
+ m->m_len += (ip->ip_hl << 2);
+ m->m_data -= (ip->ip_hl << 2);
+ /* some debugging cruft by sklower, below, will go away soon */
+ if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */
+ m_fixhdr(m);
+ IPSTAT_INC(ips_reassembled);
+ IPQ_UNLOCK(hash);
+
+#ifdef RSS
+ /*
+ * Query the RSS layer for the flowid / flowtype for the
+ * mbuf payload.
+ *
+ * For now, just assume we have to calculate a new one.
+ * Later on we should check to see if the assigned flowid matches
+ * what RSS wants for the given IP protocol and if so, just keep it.
+ *
+ * We then queue into the relevant netisr so it can be dispatched
+ * to the correct CPU.
+ *
+ * Note - this may return 1, which means the flowid in the mbuf
+ * is correct for the configured RSS hash types and can be used.
+ */
+ if (rss_mbuf_software_hash_v4(m, 0, &rss_hash, &rss_type) == 0) {
+ m->m_pkthdr.flowid = rss_hash;
+ M_HASHTYPE_SET(m, rss_type);
+ }
+
+ /*
+ * Queue/dispatch for reprocessing.
+ *
+ * Note: this is much slower than just handling the frame in the
+ * current receive context. It's likely worth investigating
+ * why this is.
+ */
+ netisr_dispatch(NETISR_IP_DIRECT, m);
+ return (NULL);
+#endif
+
+ /* Handle in-line */
+ return (m);
+
+dropfrag:
+ IPSTAT_INC(ips_fragdropped);
+ if (fp != NULL)
+ fp->ipq_nfrags--;
+ m_freem(m);
+done:
+ IPQ_UNLOCK(hash);
+ return (NULL);
+
+#undef GETIP
+}
+
+/*
+ * Initialize IP reassembly structures.
+ */
+void
+ipreass_init(void)
+{
+
+ for (int i = 0; i < IPREASS_NHASH; i++) {
+ TAILQ_INIT(&V_ipq[i].head);
+ mtx_init(&V_ipq[i].lock, "IP reassembly", NULL,
+ MTX_DEF | MTX_DUPOK);
+ }
+ V_ipq_hashseed = arc4random();
+ V_maxfragsperpacket = 16;
+ V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
+ NULL, UMA_ALIGN_PTR, 0);
+ uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
+
+ if (IS_DEFAULT_VNET(curvnet))
+ EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change,
+ NULL, EVENTHANDLER_PRI_ANY);
+}
+
+/*
+ * If a timer expires on a reassembly queue, discard it.
+ */
+void
+ipreass_slowtimo(void)
+{
+ struct ipq *fp, *tmp;
+
+ for (int i = 0; i < IPREASS_NHASH; i++) {
+ IPQ_LOCK(i);
+ TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, tmp)
+ if (--fp->ipq_ttl == 0)
+ ipq_timeout(&V_ipq[i].head, fp);
+ IPQ_UNLOCK(i);
+ }
+}
+
+/*
+ * Drain off all datagram fragments.
+ */
+void
+ipreass_drain(void)
+{
+
+ for (int i = 0; i < IPREASS_NHASH; i++) {
+ IPQ_LOCK(i);
+ while(!TAILQ_EMPTY(&V_ipq[i].head))
+ ipq_drop(&V_ipq[i].head, TAILQ_FIRST(&V_ipq[i].head));
+ IPQ_UNLOCK(i);
+ }
+}
+
+#ifdef VIMAGE
+/*
+ * Destroy IP reassembly structures.
+ */
+void
+ipreass_destroy(void)
+{
+
+ ipreass_drain();
+ uma_zdestroy(V_ipq_zone);
+ for (int i = 0; i < IPREASS_NHASH; i++)
+ mtx_destroy(&V_ipq[i].lock);
+}
+#endif
+
+/*
+ * After maxnipq has been updated, propagate the change to UMA. The UMA zone
+ * max has slightly different semantics than the sysctl, for historical
+ * reasons.
+ */
+static void
+ipreass_drain_tomax(void)
+{
+ int target;
+
+ /*
+ * If we are over the maximum number of fragments,
+ * drain off enough to get down to the new limit,
+ * stripping off last elements on queues. Every
+ * run we strip the oldest element from each bucket.
+ */
+ target = uma_zone_get_max(V_ipq_zone);
+ while (uma_zone_get_cur(V_ipq_zone) > target) {
+ struct ipq *fp;
+
+ for (int i = 0; i < IPREASS_NHASH; i++) {
+ IPQ_LOCK(i);
+ fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
+ if (fp != NULL)
+ ipq_timeout(&V_ipq[i].head, fp);
+ IPQ_UNLOCK(i);
+ }
+ }
+}
+
+static void
+ipreass_zone_change(void *tag)
+{
+
+ uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
+ ipreass_drain_tomax();
+}
+
+/*
+ * Change the limit on the UMA zone, or disable the fragment allocation
+ * at all. Since 0 and -1 is a special values here, we need our own handler,
+ * instead of sysctl_handle_uma_zone_max().
+ */
+static int
+sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
+{
+ int error, max;
+
+ if (V_noreass == 0) {
+ max = uma_zone_get_max(V_ipq_zone);
+ if (max == 0)
+ max = -1;
+ } else
+ max = 0;
+ error = sysctl_handle_int(oidp, &max, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (max > 0) {
+ /*
+ * XXXRW: Might be a good idea to sanity check the argument
+ * and place an extreme upper bound.
+ */
+ max = uma_zone_set_max(V_ipq_zone, max);
+ ipreass_drain_tomax();
+ V_noreass = 0;
+ } else if (max == 0) {
+ V_noreass = 1;
+ ipreass_drain();
+ } else if (max == -1) {
+ V_noreass = 0;
+ uma_zone_set_max(V_ipq_zone, 0);
+ } else
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * Seek for old fragment queue header that can be reused. Try to
+ * reuse a header from currently locked hash bucket.
+ */
+static struct ipq *
+ipq_reuse(int start)
+{
+ struct ipq *fp;
+ int i;
+
+ IPQ_LOCK_ASSERT(start);
+
+ for (i = start;; i++) {
+ if (i == IPREASS_NHASH)
+ i = 0;
+ if (i != start && IPQ_TRYLOCK(i) == 0)
+ continue;
+ fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
+ if (fp) {
+ struct mbuf *m;
+
+ IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
+ while (fp->ipq_frags) {
+ m = fp->ipq_frags;
+ fp->ipq_frags = m->m_nextpkt;
+ m_freem(m);
+ }
+ TAILQ_REMOVE(&V_ipq[i].head, fp, ipq_list);
+ if (i != start)
+ IPQ_UNLOCK(i);
+ IPQ_LOCK_ASSERT(start);
+ return (fp);
+ }
+ if (i != start)
+ IPQ_UNLOCK(i);
+ }
+}
+
+/*
+ * Free a fragment reassembly header and all associated datagrams.
+ */
+static void
+ipq_free(struct ipqhead *fhp, struct ipq *fp)
+{
+ struct mbuf *q;
+
+ while (fp->ipq_frags) {
+ q = fp->ipq_frags;
+ fp->ipq_frags = q->m_nextpkt;
+ m_freem(q);
+ }
+ TAILQ_REMOVE(fhp, fp, ipq_list);
+ uma_zfree(V_ipq_zone, fp);
+}
diff --git a/freebsd/sys/netinet/ip_var.h b/freebsd/sys/netinet/ip_var.h
index b07ef162..847704fd 100644
--- a/freebsd/sys/netinet/ip_var.h
+++ b/freebsd/sys/netinet/ip_var.h
@@ -93,50 +93,54 @@ struct ip_moptions {
u_short imo_max_memberships; /* max memberships this socket */
struct in_multi **imo_membership; /* group memberships */
struct in_mfilter *imo_mfilters; /* source filters */
+ STAILQ_ENTRY(ip_moptions) imo_link;
};
struct ipstat {
- u_long ips_total; /* total packets received */
- u_long ips_badsum; /* checksum bad */
- u_long ips_tooshort; /* packet too short */
- u_long ips_toosmall; /* not enough data */
- u_long ips_badhlen; /* ip header length < data size */
- u_long ips_badlen; /* ip length < ip header length */
- u_long ips_fragments; /* fragments received */
- u_long ips_fragdropped; /* frags dropped (dups, out of space) */
- u_long ips_fragtimeout; /* fragments timed out */
- u_long ips_forward; /* packets forwarded */
- u_long ips_fastforward; /* packets fast forwarded */
- u_long ips_cantforward; /* packets rcvd for unreachable dest */
- u_long ips_redirectsent; /* packets forwarded on same net */
- u_long ips_noproto; /* unknown or unsupported protocol */
- u_long ips_delivered; /* datagrams delivered to upper level*/
- u_long ips_localout; /* total ip packets generated here */
- u_long ips_odropped; /* lost packets due to nobufs, etc. */
- u_long ips_reassembled; /* total packets reassembled ok */
- u_long ips_fragmented; /* datagrams successfully fragmented */
- u_long ips_ofragments; /* output fragments created */
- u_long ips_cantfrag; /* don't fragment flag was set, etc. */
- u_long ips_badoptions; /* error in option processing */
- u_long ips_noroute; /* packets discarded due to no route */
- u_long ips_badvers; /* ip version != 4 */
- u_long ips_rawout; /* total raw ip packets generated */
- u_long ips_toolong; /* ip length > max ip packet size */
- u_long ips_notmember; /* multicasts for unregistered grps */
- u_long ips_nogif; /* no match gif found */
- u_long ips_badaddr; /* invalid address on header */
+ uint64_t ips_total; /* total packets received */
+ uint64_t ips_badsum; /* checksum bad */
+ uint64_t ips_tooshort; /* packet too short */
+ uint64_t ips_toosmall; /* not enough data */
+ uint64_t ips_badhlen; /* ip header length < data size */
+ uint64_t ips_badlen; /* ip length < ip header length */
+ uint64_t ips_fragments; /* fragments received */
+ uint64_t ips_fragdropped; /* frags dropped (dups, out of space) */
+ uint64_t ips_fragtimeout; /* fragments timed out */
+ uint64_t ips_forward; /* packets forwarded */
+ uint64_t ips_fastforward; /* packets fast forwarded */
+ uint64_t ips_cantforward; /* packets rcvd for unreachable dest */
+ uint64_t ips_redirectsent; /* packets forwarded on same net */
+ uint64_t ips_noproto; /* unknown or unsupported protocol */
+ uint64_t ips_delivered; /* datagrams delivered to upper level*/
+ uint64_t ips_localout; /* total ip packets generated here */
+ uint64_t ips_odropped; /* lost packets due to nobufs, etc. */
+ uint64_t ips_reassembled; /* total packets reassembled ok */
+ uint64_t ips_fragmented; /* datagrams successfully fragmented */
+ uint64_t ips_ofragments; /* output fragments created */
+ uint64_t ips_cantfrag; /* don't fragment flag was set, etc. */
+ uint64_t ips_badoptions; /* error in option processing */
+ uint64_t ips_noroute; /* packets discarded due to no route */
+ uint64_t ips_badvers; /* ip version != 4 */
+ uint64_t ips_rawout; /* total raw ip packets generated */
+ uint64_t ips_toolong; /* ip length > max ip packet size */
+ uint64_t ips_notmember; /* multicasts for unregistered grps */
+ uint64_t ips_nogif; /* no match gif found */
+ uint64_t ips_badaddr; /* invalid address on header */
};
#ifdef _KERNEL
+#include <sys/counter.h>
#include <net/vnet.h>
+VNET_PCPUSTAT_DECLARE(struct ipstat, ipstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define IPSTAT_ADD(name, val) V_ipstat.name += (val)
-#define IPSTAT_SUB(name, val) V_ipstat.name -= (val)
+#define IPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct ipstat, ipstat, name, (val))
+#define IPSTAT_SUB(name, val) IPSTAT_ADD(name, -(val))
#define IPSTAT_INC(name) IPSTAT_ADD(name, 1)
#define IPSTAT_DEC(name) IPSTAT_SUB(name, 1)
@@ -144,11 +148,11 @@ struct ipstat {
* Kernel module consumers must use this accessor macro.
*/
void kmod_ipstat_inc(int statnum);
-#define KMOD_IPSTAT_INC(name) \
- kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(u_long))
+#define KMOD_IPSTAT_INC(name) \
+ kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(uint64_t))
void kmod_ipstat_dec(int statnum);
-#define KMOD_IPSTAT_DEC(name) \
- kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(u_long))
+#define KMOD_IPSTAT_DEC(name) \
+ kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(uint64_t))
/* flags passed to ip_output as last parameter */
#define IP_FORWARDING 0x1 /* most of ip header exists */
@@ -157,12 +161,7 @@ void kmod_ipstat_dec(int statnum);
#define IP_SENDTOIF 0x8 /* send on specific ifnet */
#define IP_ROUTETOIF SO_DONTROUTE /* 0x10 bypass routing tables */
#define IP_ALLOWBROADCAST SO_BROADCAST /* 0x20 can send broadcast packets */
-
-/*
- * mbuf flag used by ip_fastfwd
- */
-#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */
-#define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */
+#define IP_NODEFAULTFLOWID 0x40 /* Don't set the flowid from inp */
#ifdef __NO_STRICT_ALIGNMENT
#define IP_HDR_ALIGNED_P(ip) 1
@@ -175,8 +174,6 @@ struct inpcb;
struct route;
struct sockopt;
-VNET_DECLARE(struct ipstat, ipstat);
-VNET_DECLARE(u_short, ip_id); /* ip packet ctr, for ids */
VNET_DECLARE(int, ip_defttl); /* default IP ttl */
VNET_DECLARE(int, ipforwarding); /* ip forwarding */
#ifdef IPSTEALTH
@@ -191,7 +188,6 @@ VNET_DECLARE(int, rsvp_on);
VNET_DECLARE(int, drop_redirect);
extern struct pr_usrreqs rip_usrreqs;
-#define V_ipstat VNET(ipstat)
#define V_ip_id VNET(ip_id)
#define V_ip_defttl VNET(ip_defttl)
#define V_ipforwarding VNET(ipforwarding)
@@ -210,12 +206,9 @@ int inp_setmoptions(struct inpcb *, struct sockopt *);
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
- u_long if_hwassist_flags, int sw_csum);
+ u_long if_hwassist_flags);
void ip_forward(struct mbuf *m, int srcrt);
void ip_init(void);
-#ifdef VIMAGE
-void ip_destroy(void);
-#endif
extern int
(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
struct ip_moptions *);
@@ -226,27 +219,22 @@ int ipproto_register(short);
int ipproto_unregister(short);
struct mbuf *
ip_reass(struct mbuf *);
-struct in_ifaddr *
- ip_rtaddr(struct in_addr, u_int fibnum);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
void ip_slowtimo(void);
-u_int16_t ip_randomid(void);
+void ip_fillid(struct ip *);
int rip_ctloutput(struct socket *, struct sockopt *);
void rip_ctlinput(int, struct sockaddr *, void *);
void rip_init(void);
-#ifdef VIMAGE
-void rip_destroy(void);
-#endif
-void rip_input(struct mbuf *, int);
-int rip_output(struct mbuf *, struct socket *, u_long);
-void ipip_input(struct mbuf *, int);
-void rsvp_input(struct mbuf *, int);
+int rip_input(struct mbuf **, int *, int);
+int rip_output(struct mbuf *, struct socket *, ...);
+int ipip_input(struct mbuf **, int *, int);
+int rsvp_input(struct mbuf **, int *, int);
int ip_rsvp_init(struct socket *);
int ip_rsvp_done(void);
extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
extern void (*ip_rsvp_force_done)(struct socket *);
-extern void (*rsvp_input_p)(struct mbuf *m, int off);
+extern int (*rsvp_input_p)(struct mbuf **, int *, int);
VNET_DECLARE(struct pfil_head, inet_pfil_hook); /* packet filter hooks */
#define V_inet_pfil_hook VNET(inet_pfil_hook)
@@ -285,7 +273,7 @@ enum {
IPFW_IS_MASK = 0x30000000, /* which source ? */
IPFW_IS_DIVERT = 0x20000000,
IPFW_IS_DUMMYNET =0x10000000,
- IPFW_IS_PIPE = 0x08000000, /* pip1=1, queue = 0 */
+ IPFW_IS_PIPE = 0x08000000, /* pipe=1, queue = 0 */
};
#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */
#define MTAG_IPFW_RULE 1262273568 /* rule reference */
@@ -294,9 +282,7 @@ enum {
struct ip_fw_args;
typedef int (*ip_fw_chk_ptr_t)(struct ip_fw_args *args);
typedef int (*ip_fw_ctl_ptr_t)(struct sockopt *);
-VNET_DECLARE(ip_fw_chk_ptr_t, ip_fw_chk_ptr);
VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
-#define V_ip_fw_chk_ptr VNET(ip_fw_chk_ptr)
#define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr)
/* Divert hooks. */
@@ -307,12 +293,6 @@ extern int (*ng_ipfw_input_p)(struct mbuf **, int,
extern int (*ip_dn_ctl_ptr)(struct sockopt *);
extern int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
-
-VNET_DECLARE(int, ip_do_randomid);
-#define V_ip_do_randomid VNET(ip_do_randomid)
-#define ip_newid() ((V_ip_do_randomid != 0) ? ip_randomid() : \
- htons(V_ip_id++))
-
#endif /* _KERNEL */
#endif /* !_NETINET_IP_VAR_H_ */
diff --git a/freebsd/sys/netinet/libalias/alias.c b/freebsd/sys/netinet/libalias/alias.c
index 9e975122..a2cd987c 100644
--- a/freebsd/sys/netinet/libalias/alias.c
+++ b/freebsd/sys/netinet/libalias/alias.c
@@ -1724,7 +1724,7 @@ LibAliasUnLoadAllModule(void)
/* Unload all modules then reload everything. */
while ((p = first_handler()) != NULL) {
- detach_handler(p);
+ LibAliasDetachHandlers(p);
}
while ((t = walk_dll_chain()) != NULL) {
dlclose(t->handle);
@@ -1751,40 +1751,22 @@ LibAliasUnLoadAllModule(void)
struct mbuf *
m_megapullup(struct mbuf *m, int len) {
struct mbuf *mcl;
-
+
if (len > m->m_pkthdr.len)
goto bad;
-
- /* Do not reallocate packet if it is sequentional,
- * writable and has some extra space for expansion.
- * XXX: Constant 100bytes is completely empirical. */
-#define RESERVE 100
- if (m->m_next == NULL && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= RESERVE)
+
+ if (m->m_next == NULL && M_WRITABLE(m))
return (m);
- if (len <= MCLBYTES - RESERVE) {
- mcl = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
- } else if (len < MJUM16BYTES) {
- int size;
- if (len <= MJUMPAGESIZE - RESERVE) {
- size = MJUMPAGESIZE;
- } else if (len <= MJUM9BYTES - RESERVE) {
- size = MJUM9BYTES;
- } else {
- size = MJUM16BYTES;
- };
- mcl = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size);
- } else {
- goto bad;
- }
+ mcl = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
if (mcl == NULL)
goto bad;
-
+ m_align(mcl, len);
m_move_pkthdr(mcl, m);
m_copydata(m, 0, len, mtod(mcl, caddr_t));
mcl->m_len = mcl->m_pkthdr.len = len;
m_freem(m);
-
+
return (mcl);
bad:
m_freem(m);
diff --git a/freebsd/sys/netinet/libalias/alias_cuseeme.c b/freebsd/sys/netinet/libalias/alias_cuseeme.c
index 1bdb7c4a..d6c9520c 100644
--- a/freebsd/sys/netinet/libalias/alias_cuseeme.c
+++ b/freebsd/sys/netinet/libalias/alias_cuseeme.c
@@ -58,14 +58,14 @@ __FBSDID("$FreeBSD$");
#define CUSEEME_PORT_NUMBER 7648
static void
-AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip,
+AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip,
struct alias_link *lnk);
static void
-AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip,
+AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip,
struct in_addr original_addr);
-static int
+static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
@@ -76,7 +76,7 @@ fingerprint(struct libalias *la, struct alias_data *ah)
return (-1);
}
-static int
+static int
protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -84,7 +84,7 @@ protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
return (0);
}
-static int
+static int
protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -94,20 +94,20 @@ protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
/* Kernel module definition. */
struct proto_handler handlers[] = {
- {
- .pri = 120,
- .dir = OUT,
- .proto = UDP,
- .fingerprint = &fingerprint,
+ {
+ .pri = 120,
+ .dir = OUT,
+ .proto = UDP,
+ .fingerprint = &fingerprint,
.protohandler = &protohandlerout
- },
+ },
{
- .pri = 120,
- .dir = IN,
- .proto = UDP,
- .fingerprint = &fingerprint,
+ .pri = 120,
+ .dir = IN,
+ .proto = UDP,
+ .fingerprint = &fingerprint,
.protohandler = &protohandlerin
- },
+ },
{ EOH }
};
@@ -132,9 +132,9 @@ mod_handler(module_t mod, int type, void *data)
}
#ifdef _KERNEL
-static
+static
#endif
-moduledata_t
+moduledata_t
alias_mod = {
"alias_cuseeme", mod_handler, NULL
};
diff --git a/freebsd/sys/netinet/libalias/alias_db.c b/freebsd/sys/netinet/libalias/alias_db.c
index fabe586e..219d5d34 100644
--- a/freebsd/sys/netinet/libalias/alias_db.c
+++ b/freebsd/sys/netinet/libalias/alias_db.c
@@ -148,6 +148,7 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
+#include <sys/systm.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/module.h>
#include <sys/rwlock.h>
@@ -350,24 +351,16 @@ MODULE_VERSION(libalias, 1);
static int
alias_mod_handler(module_t mod, int type, void *data)
{
- int error;
switch (type) {
- case MOD_LOAD:
- error = 0;
- handler_chain_init();
- break;
case MOD_QUIESCE:
case MOD_UNLOAD:
- handler_chain_destroy();
finishoff();
- error = 0;
- break;
+ case MOD_LOAD:
+ return (0);
default:
- error = EINVAL;
+ return (EINVAL);
}
-
- return (error);
}
static moduledata_t alias_mod = {
@@ -793,9 +786,9 @@ FindNewPortGroup(struct libalias *la,
struct alias_link *search_result;
for (j = 0; j < port_count; j++)
- if (0 != (search_result = FindLinkIn(la, dst_addr, alias_addr,
- dst_port, htons(port_sys + j),
- link_type, 0)))
+ if ((search_result = FindLinkIn(la, dst_addr,
+ alias_addr, dst_port, htons(port_sys + j),
+ link_type, 0)) != NULL)
break;
/* Found a good range, return base */
diff --git a/freebsd/sys/netinet/libalias/alias_dummy.c b/freebsd/sys/netinet/libalias/alias_dummy.c
index eacfac86..b4c00c20 100644
--- a/freebsd/sys/netinet/libalias/alias_dummy.c
+++ b/freebsd/sys/netinet/libalias/alias_dummy.c
@@ -29,7 +29,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-/*
+/*
* Alias_dummy is just an empty skeleton used to demostrate how to write
* a module for libalias, that will run unalterated in userland or in
* kernel land.
@@ -61,19 +61,19 @@ __FBSDID("$FreeBSD$");
static void
AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah);
-static int
+static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
- /*
- * Check here all the data that will be used later, if any field
+ /*
+ * Check here all the data that will be used later, if any field
* is empy/NULL, return a -1 value.
*/
- if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
ah->maxpktsize == 0)
return (-1);
- /*
- * Fingerprint the incoming packet, if it matches any conditions
+ /*
+ * Fingerprint the incoming packet, if it matches any conditions
* return an OK value.
*/
if (ntohs(*ah->dport) == 123
@@ -82,12 +82,12 @@ fingerprint(struct libalias *la, struct alias_data *ah)
return (-1); /* I don't recognize this packet. */
}
-/*
- * Wrap in this general purpose function, the real function used to alias the
+/*
+ * Wrap in this general purpose function, the real function used to alias the
* packets.
*/
-static int
+static int
protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -95,22 +95,22 @@ protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
return (0);
}
-/*
- * NOTA BENE: the next variable MUST NOT be renamed in any case if you want
- * your module to work in userland, cause it's used to find and use all
+/*
+ * NOTA BENE: the next variable MUST NOT be renamed in any case if you want
+ * your module to work in userland, cause it's used to find and use all
* the protocol handlers present in every module.
- * So WATCH OUT, your module needs this variables and it needs it with
+ * So WATCH OUT, your module needs this variables and it needs it with
* ITS EXACT NAME: handlers.
*/
struct proto_handler handlers [] = {
- {
- .pri = 666,
- .dir = IN|OUT,
- .proto = UDP|TCP,
- .fingerprint = &fingerprint,
+ {
+ .pri = 666,
+ .dir = IN|OUT,
+ .proto = UDP|TCP,
+ .fingerprint = &fingerprint,
.protohandler = &protohandler
- },
+ },
{ EOH }
};
@@ -119,7 +119,7 @@ mod_handler(module_t mod, int type, void *data)
{
int error;
- switch (type) {
+ switch (type) {
case MOD_LOAD:
error = 0;
LibAliasAttachHandlers(handlers);
diff --git a/freebsd/sys/netinet/libalias/alias_irc.c b/freebsd/sys/netinet/libalias/alias_irc.c
index 880d897e..44ff6d92 100644
--- a/freebsd/sys/netinet/libalias/alias_irc.c
+++ b/freebsd/sys/netinet/libalias/alias_irc.c
@@ -46,7 +46,7 @@ __FBSDID("$FreeBSD$");
Version 2.1: May, 1997 (cjm)
Very minor changes to conform with
local/global/function naming conventions
- withing the packet alising module.
+ within the packet alising module.
*/
/* Includes */
@@ -94,11 +94,11 @@ static void
AliasHandleIrcOut(struct libalias *, struct ip *, struct alias_link *,
int maxpacketsize);
-static int
+static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
- if (ah->dport == NULL || ah->dport == NULL || ah->lnk == NULL ||
+ if (ah->dport == NULL || ah->dport == NULL || ah->lnk == NULL ||
ah->maxpktsize == 0)
return (-1);
if (ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_1
@@ -107,7 +107,7 @@ fingerprint(struct libalias *la, struct alias_data *ah)
return (-1);
}
-static int
+static int
protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -120,13 +120,13 @@ protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
}
struct proto_handler handlers[] = {
- {
- .pri = 90,
- .dir = OUT,
- .proto = TCP,
- .fingerprint = &fingerprint,
+ {
+ .pri = 90,
+ .dir = OUT,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
.protohandler = &protohandler
- },
+ },
{ EOH }
};
@@ -151,7 +151,7 @@ mod_handler(module_t mod, int type, void *data)
}
#ifdef _KERNEL
-static
+static
#endif
moduledata_t alias_mod = {
"alias_irc", mod_handler, NULL
@@ -484,7 +484,7 @@ lPACKET_DONE:
which will generate a type-error on all but 32-bit machines.
[Note 2] This routine really ought to be replaced with one that
- creates a transparent proxy on the aliasing host, to allow arbitary
+ creates a transparent proxy on the aliasing host, to allow arbitrary
changes in the TCP stream. This should not be too difficult given
this base; I (ee) will try to do this some time later.
*/
diff --git a/freebsd/sys/netinet/libalias/alias_local.h b/freebsd/sys/netinet/libalias/alias_local.h
index a7b3fe19..3010be84 100644
--- a/freebsd/sys/netinet/libalias/alias_local.h
+++ b/freebsd/sys/netinet/libalias/alias_local.h
@@ -357,7 +357,7 @@ void PunchFWHole(struct alias_link *_lnk);
/* Housekeeping function */
void HouseKeeping(struct libalias *);
-/* Tcp specfic routines */
+/* Tcp specific routines */
/* lint -save -library Suppress flexelint warnings */
/* Transparent proxy routines */
diff --git a/freebsd/sys/netinet/libalias/alias_mod.c b/freebsd/sys/netinet/libalias/alias_mod.c
index 0e0bd56a..6acbbee6 100644
--- a/freebsd/sys/netinet/libalias/alias_mod.c
+++ b/freebsd/sys/netinet/libalias/alias_mod.c
@@ -54,201 +54,82 @@ __FBSDID("$FreeBSD$");
#endif
/* Protocol and userland module handlers chains. */
-LIST_HEAD(handler_chain, proto_handler) handler_chain = LIST_HEAD_INITIALIZER(handler_chain);
-#ifdef _KERNEL
-struct rwlock handler_rw;
-#endif
-SLIST_HEAD(dll_chain, dll) dll_chain = SLIST_HEAD_INITIALIZER(dll_chain);
-
-#ifdef _KERNEL
-
-#define LIBALIAS_RWLOCK_INIT() \
- rw_init(&handler_rw, "Libalias_modules_rwlock")
-#define LIBALIAS_RWLOCK_DESTROY() rw_destroy(&handler_rw)
-#define LIBALIAS_WLOCK_ASSERT() \
- rw_assert(&handler_rw, RA_WLOCKED)
-
-static __inline void
-LIBALIAS_RLOCK(void)
-{
- rw_rlock(&handler_rw);
-}
-
-static __inline void
-LIBALIAS_RUNLOCK(void)
-{
- rw_runlock(&handler_rw);
-}
-
-static __inline void
-LIBALIAS_WLOCK(void)
-{
- rw_wlock(&handler_rw);
-}
-
-static __inline void
-LIBALIAS_WUNLOCK(void)
-{
- rw_wunlock(&handler_rw);
-}
-
-static void
-_handler_chain_init(void)
-{
-
- if (!rw_initialized(&handler_rw))
- LIBALIAS_RWLOCK_INIT();
-}
-
-static void
-_handler_chain_destroy(void)
-{
-
- if (rw_initialized(&handler_rw))
- LIBALIAS_RWLOCK_DESTROY();
-}
-
-#else
-#define LIBALIAS_RWLOCK_INIT() ;
-#define LIBALIAS_RWLOCK_DESTROY() ;
-#define LIBALIAS_WLOCK_ASSERT() ;
-#define LIBALIAS_RLOCK() ;
-#define LIBALIAS_RUNLOCK() ;
-#define LIBALIAS_WLOCK() ;
-#define LIBALIAS_WUNLOCK() ;
-#define _handler_chain_init() ;
-#define _handler_chain_destroy() ;
-#endif
-
-void
-handler_chain_init(void)
-{
- _handler_chain_init();
-}
-
-void
-handler_chain_destroy(void)
-{
- _handler_chain_destroy();
-}
+static TAILQ_HEAD(handler_chain, proto_handler) handler_chain =
+ TAILQ_HEAD_INITIALIZER(handler_chain);
static int
-_attach_handler(struct proto_handler *p)
+attach_handler(struct proto_handler *p)
{
struct proto_handler *b;
- LIBALIAS_WLOCK_ASSERT();
- b = NULL;
- LIST_FOREACH(b, &handler_chain, entries) {
- if ((b->pri == p->pri) &&
+ TAILQ_FOREACH(b, &handler_chain, link) {
+ if ((b->pri == p->pri) &&
(b->dir == p->dir) &&
(b->proto == p->proto))
- return (EEXIST); /* Priority conflict. */
+ return (EEXIST);
if (b->pri > p->pri) {
- LIST_INSERT_BEFORE(b, p, entries);
+ TAILQ_INSERT_BEFORE(b, p, link);
return (0);
}
}
- /* End of list or found right position, inserts here. */
- if (b)
- LIST_INSERT_AFTER(b, p, entries);
- else
- LIST_INSERT_HEAD(&handler_chain, p, entries);
- return (0);
-}
-static int
-_detach_handler(struct proto_handler *p)
-{
- struct proto_handler *b, *b_tmp;
+ TAILQ_INSERT_TAIL(&handler_chain, p, link);
- LIBALIAS_WLOCK_ASSERT();
- LIST_FOREACH_SAFE(b, &handler_chain, entries, b_tmp) {
- if (b == p) {
- LIST_REMOVE(b, entries);
- return (0);
- }
- }
- return (ENOENT); /* Handler not found. */
+ return (0);
}
int
-LibAliasAttachHandlers(struct proto_handler *_p)
+LibAliasAttachHandlers(struct proto_handler *p)
{
- int i, error;
+ int error;
- LIBALIAS_WLOCK();
- error = -1;
- for (i = 0; 1; i++) {
- if (*((int *)&_p[i]) == EOH)
- break;
- error = _attach_handler(&_p[i]);
- if (error != 0)
- break;
+ while (p->dir != NODIR) {
+ error = attach_handler(p);
+ if (error)
+ return (error);
+ p++;
}
- LIBALIAS_WUNLOCK();
- return (error);
+
+ return (0);
}
+/* XXXGL: should be void, but no good reason to break ABI */
int
-LibAliasDetachHandlers(struct proto_handler *_p)
+LibAliasDetachHandlers(struct proto_handler *p)
{
- int i, error;
- LIBALIAS_WLOCK();
- error = -1;
- for (i = 0; 1; i++) {
- if (*((int *)&_p[i]) == EOH)
- break;
- error = _detach_handler(&_p[i]);
- if (error != 0)
- break;
+ while (p->dir != NODIR) {
+ TAILQ_REMOVE(&handler_chain, p, link);
+ p++;
}
- LIBALIAS_WUNLOCK();
- return (error);
-}
-
-int
-detach_handler(struct proto_handler *_p)
-{
- int error;
- LIBALIAS_WLOCK();
- error = -1;
- error = _detach_handler(_p);
- LIBALIAS_WUNLOCK();
- return (error);
+ return (0);
}
int
-find_handler(int8_t dir, int8_t proto, struct libalias *la, __unused struct ip *pip,
+find_handler(int8_t dir, int8_t proto, struct libalias *la, struct ip *ip,
struct alias_data *ad)
{
struct proto_handler *p;
- int error;
- LIBALIAS_RLOCK();
- error = ENOENT;
- LIST_FOREACH(p, &handler_chain, entries) {
- if ((p->dir & dir) && (p->proto & proto))
- if (p->fingerprint(la, ad) == 0) {
- error = p->protohandler(la, pip, ad);
- break;
- }
- }
- LIBALIAS_RUNLOCK();
- return (error);
+ TAILQ_FOREACH(p, &handler_chain, link)
+ if ((p->dir & dir) && (p->proto & proto) &&
+ p->fingerprint(la, ad) == 0)
+ return (p->protohandler(la, ip, ad));
+
+ return (ENOENT);
}
struct proto_handler *
first_handler(void)
{
-
- return (LIST_FIRST(&handler_chain));
+
+ return (TAILQ_FIRST(&handler_chain));
}
+#ifndef _KERNEL
/* Dll manipulation code - this code is not thread safe... */
-
+SLIST_HEAD(dll_chain, dll) dll_chain = SLIST_HEAD_INITIALIZER(dll_chain);
int
attach_dll(struct dll *p)
{
@@ -272,7 +153,7 @@ detach_dll(char *p)
error = NULL;
SLIST_FOREACH_SAFE(b, &dll_chain, next, b_tmp)
if (!strncmp(b->name, p, DLL_LEN)) {
- SLIST_REMOVE(&dll_chain, b, dll, next);
+ SLIST_REMOVE(&dll_chain, b, dll, next);
error = b;
break;
}
@@ -290,3 +171,4 @@ walk_dll_chain(void)
SLIST_REMOVE_HEAD(&dll_chain, next);
return (t);
}
+#endif /* !_KERNEL */
diff --git a/freebsd/sys/netinet/libalias/alias_mod.h b/freebsd/sys/netinet/libalias/alias_mod.h
index 727df8e6..fd020c46 100644
--- a/freebsd/sys/netinet/libalias/alias_mod.h
+++ b/freebsd/sys/netinet/libalias/alias_mod.h
@@ -54,102 +54,92 @@ MALLOC_DECLARE(M_ALIAS);
#endif
#endif
-/* Protocol handlers struct & function. */
+/* Packet flow direction flags. */
+#define IN 0x0001
+#define OUT 0x0002
+#define NODIR 0x4000
-/* Packet flow direction. */
-#define IN 1
-#define OUT 2
+/* Working protocol flags. */
+#define IP 0x01
+#define TCP 0x02
+#define UDP 0x04
-/* Working protocol. */
-#define IP 1
-#define TCP 2
-#define UDP 4
-
-/*
+/*
* Data passed to protocol handler module, it must be filled
* right before calling find_handler() to determine which
* module is elegible to be called.
*/
+struct alias_data {
+ struct alias_link *lnk;
+ struct in_addr *oaddr; /* Original address. */
+ struct in_addr *aaddr; /* Alias address. */
+ uint16_t *aport; /* Alias port. */
+ uint16_t *sport, *dport; /* Source & destination port */
+ uint16_t maxpktsize; /* Max packet size. */
+};
-struct alias_data {
- struct alias_link *lnk;
- struct in_addr *oaddr; /* Original address. */
- struct in_addr *aaddr; /* Alias address. */
- uint16_t *aport; /* Alias port. */
- uint16_t *sport, *dport; /* Source & destination port */
- uint16_t maxpktsize; /* Max packet size. */
-};
-
-/*
+/*
* This structure contains all the information necessary to make
* a protocol handler correctly work.
*/
-
struct proto_handler {
- u_int pri; /* Handler priority. */
- int16_t dir; /* Flow direction. */
- uint8_t proto; /* Working protocol. */
- int (*fingerprint)(struct libalias *, /* Fingerprint * function. */
- struct alias_data *);
- int (*protohandler)(struct libalias *, /* Aliasing * function. */
- struct ip *, struct alias_data *);
- LIST_ENTRY(proto_handler) entries;
+ u_int pri; /* Handler priority. */
+ int16_t dir; /* Flow direction. */
+ uint8_t proto; /* Working protocol. */
+ /* Fingerprint * function. */
+ int (*fingerprint)(struct libalias *, struct alias_data *);
+ /* Aliasing * function. */
+ int (*protohandler)(struct libalias *, struct ip *,
+ struct alias_data *);
+ TAILQ_ENTRY(proto_handler) link;
};
+/* End of handlers. */
+#define EOH .dir = NODIR
-/*
+/* Functions used with protocol handlers. */
+int LibAliasAttachHandlers(struct proto_handler *);
+int LibAliasDetachHandlers(struct proto_handler *);
+int find_handler(int8_t, int8_t, struct libalias *, struct ip *,
+ struct alias_data *);
+struct proto_handler *first_handler(void);
+
+#ifndef _KERNEL
+/*
* Used only in userland when libalias needs to keep track of all
* module loaded. In kernel land (kld mode) we don't need to care
* care about libalias modules cause it's kld to do it for us.
*/
-
-#define DLL_LEN 32
-struct dll {
- char name[DLL_LEN]; /* Name of module. */
- void *handle; /*
- * Ptr to shared obj obtained through
- * dlopen() - use this ptr to get access
- * to any symbols from a loaded module
- * via dlsym().
- */
- SLIST_ENTRY(dll) next;
+#define DLL_LEN 32
+struct dll {
+ char name[DLL_LEN]; /* Name of module. */
+ void *handle; /*
+ * Ptr to shared obj obtained through
+ * dlopen() - use this ptr to get access
+ * to any symbols from a loaded module
+ * via dlsym().
+ */
+ SLIST_ENTRY(dll) next;
};
-/* Functions used with protocol handlers. */
-
-void handler_chain_init(void);
-void handler_chain_destroy(void);
-int LibAliasAttachHandlers(struct proto_handler *);
-int LibAliasDetachHandlers(struct proto_handler *);
-int detach_handler(struct proto_handler *);
-int find_handler(int8_t, int8_t, struct libalias *,
- struct ip *, struct alias_data *);
-struct proto_handler *first_handler(void);
-
/* Functions used with dll module. */
+void dll_chain_init(void);
+void dll_chain_destroy(void);
+int attach_dll(struct dll *);
+void *detach_dll(char *);
+struct dll *walk_dll_chain(void);
-void dll_chain_init(void);
-void dll_chain_destroy(void);
-int attach_dll(struct dll *);
-void *detach_dll(char *);
-struct dll *walk_dll_chain(void);
-
-/* End of handlers. */
-#define EOH -1
-
-/*
+/*
* Some defines borrowed from sys/module.h used to compile a kld
* in userland as a shared lib.
*/
-
-#ifndef _KERNEL
typedef enum modeventtype {
- MOD_LOAD,
- MOD_UNLOAD,
- MOD_SHUTDOWN,
- MOD_QUIESCE
+ MOD_LOAD,
+ MOD_UNLOAD,
+ MOD_SHUTDOWN,
+ MOD_QUIESCE
} modeventtype_t;
-
+
typedef struct module *module_t;
typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *);
@@ -157,10 +147,10 @@ typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *);
* Struct for registering modules statically via SYSINIT.
*/
typedef struct moduledata {
- const char *name; /* module name */
- modeventhand_t evhand; /* event handler */
- void *priv; /* extra data */
+ const char *name; /* module name */
+ modeventhand_t evhand; /* event handler */
+ void *priv; /* extra data */
} moduledata_t;
-#endif
+#endif /* !_KERNEL */
-#endif /* !_ALIAS_MOD_H_ */
+#endif /* !_ALIAS_MOD_H_ */
diff --git a/freebsd/sys/netinet/libalias/alias_nbt.c b/freebsd/sys/netinet/libalias/alias_nbt.c
index 5a917872..c10f9b48 100644
--- a/freebsd/sys/netinet/libalias/alias_nbt.c
+++ b/freebsd/sys/netinet/libalias/alias_nbt.c
@@ -72,17 +72,17 @@ __FBSDID("$FreeBSD$");
#define NETBIOS_DGM_PORT_NUMBER 138
static int
-AliasHandleUdpNbt(struct libalias *, struct ip *, struct alias_link *,
+AliasHandleUdpNbt(struct libalias *, struct ip *, struct alias_link *,
struct in_addr *, u_short);
static int
AliasHandleUdpNbtNS(struct libalias *, struct ip *, struct alias_link *,
struct in_addr *, u_short *, struct in_addr *, u_short *);
-static int
+static int
fingerprint1(struct libalias *la, struct alias_data *ah)
{
- if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
ah->aaddr == NULL || ah->aport == NULL)
return (-1);
if (ntohs(*ah->dport) == NETBIOS_DGM_PORT_NUMBER
@@ -91,18 +91,18 @@ fingerprint1(struct libalias *la, struct alias_data *ah)
return (-1);
}
-static int
+static int
protohandler1(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
return (AliasHandleUdpNbt(la, pip, ah->lnk, ah->aaddr, *ah->aport));
}
-static int
+static int
fingerprint2(struct libalias *la, struct alias_data *ah)
{
- if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
ah->aaddr == NULL || ah->aport == NULL)
return (-1);
if (ntohs(*ah->dport) == NETBIOS_NS_PORT_NUMBER
@@ -111,7 +111,7 @@ fingerprint2(struct libalias *la, struct alias_data *ah)
return (-1);
}
-static int
+static int
protohandler2in(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -120,7 +120,7 @@ protohandler2in(struct libalias *la, struct ip *pip, struct alias_data *ah)
return (0);
}
-static int
+static int
protohandler2out(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -130,27 +130,27 @@ protohandler2out(struct libalias *la, struct ip *pip, struct alias_data *ah)
/* Kernel module definition. */
struct proto_handler handlers[] = {
- {
- .pri = 130,
- .dir = IN|OUT,
- .proto = UDP,
- .fingerprint = &fingerprint1,
+ {
+ .pri = 130,
+ .dir = IN|OUT,
+ .proto = UDP,
+ .fingerprint = &fingerprint1,
.protohandler = &protohandler1
- },
- {
- .pri = 140,
- .dir = IN,
- .proto = UDP,
- .fingerprint = &fingerprint2,
+ },
+ {
+ .pri = 140,
+ .dir = IN,
+ .proto = UDP,
+ .fingerprint = &fingerprint2,
.protohandler = &protohandler2in
- },
- {
- .pri = 140,
- .dir = OUT,
- .proto = UDP,
- .fingerprint = &fingerprint2,
+ },
+ {
+ .pri = 140,
+ .dir = OUT,
+ .proto = UDP,
+ .fingerprint = &fingerprint2,
.protohandler = &protohandler2out
- },
+ },
{ EOH }
};
@@ -175,7 +175,7 @@ mod_handler(module_t mod, int type, void *data)
}
#ifdef _KERNEL
-static
+static
#endif
moduledata_t alias_mod = {
"alias_nbt", mod_handler, NULL
diff --git a/freebsd/sys/netinet/libalias/alias_pptp.c b/freebsd/sys/netinet/libalias/alias_pptp.c
index e8205db0..39861c5c 100644
--- a/freebsd/sys/netinet/libalias/alias_pptp.c
+++ b/freebsd/sys/netinet/libalias/alias_pptp.c
@@ -80,7 +80,7 @@ AliasHandlePptpGreOut(struct libalias *, struct ip *);
static int
AliasHandlePptpGreIn(struct libalias *, struct ip *);
-static int
+static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
@@ -92,14 +92,14 @@ fingerprint(struct libalias *la, struct alias_data *ah)
return (-1);
}
-static int
+static int
fingerprintgre(struct libalias *la, struct alias_data *ah)
{
return (0);
}
-static int
+static int
protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -107,7 +107,7 @@ protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
return (0);
}
-static int
+static int
protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -115,7 +115,7 @@ protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
return (0);
}
-static int
+static int
protohandlergrein(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -125,7 +125,7 @@ protohandlergrein(struct libalias *la, struct ip *pip, struct alias_data *ah)
return (-1);
}
-static int
+static int
protohandlergreout(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -136,39 +136,39 @@ protohandlergreout(struct libalias *la, struct ip *pip, struct alias_data *ah)
/* Kernel module definition. */
struct proto_handler handlers[] = {
- {
- .pri = 200,
- .dir = IN,
- .proto = TCP,
- .fingerprint = &fingerprint,
+ {
+ .pri = 200,
+ .dir = IN,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
.protohandler = &protohandlerin
},
- {
- .pri = 210,
- .dir = OUT,
- .proto = TCP,
- .fingerprint = &fingerprint,
+ {
+ .pri = 210,
+ .dir = OUT,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
.protohandler = &protohandlerout
},
-/*
- * WATCH OUT!!! these 2 handlers NEED a priority of INT_MAX (highest possible)
+/*
+ * WATCH OUT!!! these 2 handlers NEED a priority of INT_MAX (highest possible)
* cause they will ALWAYS process packets, so they must be the last one
* in chain: look fingerprintgre() above.
*/
- {
- .pri = INT_MAX,
- .dir = IN,
- .proto = IP,
- .fingerprint = &fingerprintgre,
+ {
+ .pri = INT_MAX,
+ .dir = IN,
+ .proto = IP,
+ .fingerprint = &fingerprintgre,
.protohandler = &protohandlergrein
},
- {
- .pri = INT_MAX,
- .dir = OUT,
- .proto = IP,
- .fingerprint = &fingerprintgre,
+ {
+ .pri = INT_MAX,
+ .dir = OUT,
+ .proto = IP,
+ .fingerprint = &fingerprintgre,
.protohandler = &protohandlergreout
- },
+ },
{ EOH }
};
static int
@@ -192,7 +192,7 @@ mod_handler(module_t mod, int type, void *data)
}
#ifdef _KERNEL
-static
+static
#endif
moduledata_t alias_mod = {
"alias_pptp", mod_handler, NULL
diff --git a/freebsd/sys/netinet/libalias/alias_sctp.h b/freebsd/sys/netinet/libalias/alias_sctp.h
index 840917ad..99cceee4 100644
--- a/freebsd/sys/netinet/libalias/alias_sctp.h
+++ b/freebsd/sys/netinet/libalias/alias_sctp.h
@@ -92,7 +92,6 @@
#ifndef _KERNEL
#include <stdlib.h>
#include <stdio.h>
-#include <curses.h>
#endif //#ifdef _KERNEL
diff --git a/freebsd/sys/netinet/libalias/alias_skinny.c b/freebsd/sys/netinet/libalias/alias_skinny.c
index 9f292916..b1f8f8c7 100644
--- a/freebsd/sys/netinet/libalias/alias_skinny.c
+++ b/freebsd/sys/netinet/libalias/alias_skinny.c
@@ -58,7 +58,7 @@
static void
AliasHandleSkinny(struct libalias *, struct ip *, struct alias_link *);
-static int
+static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
@@ -70,7 +70,7 @@ fingerprint(struct libalias *la, struct alias_data *ah)
return (-1);
}
-static int
+static int
protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -79,13 +79,13 @@ protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
}
struct proto_handler handlers[] = {
- {
- .pri = 110,
- .dir = IN|OUT,
- .proto = TCP,
- .fingerprint = &fingerprint,
+ {
+ .pri = 110,
+ .dir = IN|OUT,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
.protohandler = &protohandler
- },
+ },
{ EOH }
};
@@ -110,7 +110,7 @@ mod_handler(module_t mod, int type, void *data)
}
#ifdef _KERNEL
-static
+static
#endif
moduledata_t alias_mod = {
"alias_skinny", mod_handler, NULL
@@ -342,7 +342,7 @@ AliasHandleSkinny(struct libalias *la, struct ip *pip, struct alias_link *lnk)
* through the packet using len to determine message boundaries.
* This comes into play big time with port messages being in the
* same packet as register messages. Also, open receive channel
- * acks are usually buried in a pakcet some 400 bytes long.
+ * acks are usually buried in a packet some 400 bytes long.
*/
while (dlen >= skinny_hdr_len) {
len = (sd->len);
diff --git a/freebsd/sys/netinet/libalias/alias_smedia.c b/freebsd/sys/netinet/libalias/alias_smedia.c
index 47ae2748..9578a4af 100644
--- a/freebsd/sys/netinet/libalias/alias_smedia.c
+++ b/freebsd/sys/netinet/libalias/alias_smedia.c
@@ -133,14 +133,14 @@ __FBSDID("$FreeBSD$");
static void
AliasHandleRtspOut(struct libalias *, struct ip *, struct alias_link *,
int maxpacketsize);
-static int
+static int
fingerprint(struct libalias *la, struct alias_data *ah)
{
if (ah->dport != NULL && ah->aport != NULL && ah->sport != NULL &&
ntohs(*ah->dport) == TFTP_PORT_NUMBER)
return (0);
- if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
ah->maxpktsize == 0)
return (-1);
if (ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_1
@@ -151,7 +151,7 @@ fingerprint(struct libalias *la, struct alias_data *ah)
return (-1);
}
-static int
+static int
protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
{
@@ -163,13 +163,13 @@ protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
}
struct proto_handler handlers[] = {
- {
- .pri = 100,
- .dir = OUT,
+ {
+ .pri = 100,
+ .dir = OUT,
.proto = TCP|UDP,
- .fingerprint = &fingerprint,
+ .fingerprint = &fingerprint,
.protohandler = &protohandler
- },
+ },
{ EOH }
};
@@ -194,7 +194,7 @@ mod_handler(module_t mod, int type, void *data)
}
#ifdef _KERNEL
-static
+static
#endif
moduledata_t alias_mod = {
"alias_smedia", mod_handler, NULL
@@ -408,7 +408,7 @@ alias_rtsp_out(struct libalias *la, struct ip *pip,
SetAckModified(lnk);
tc = (struct tcphdr *)ip_next(pip);
delta = GetDeltaSeqOut(tc->th_seq, lnk);
- AddSeq(lnk, delta + new_dlen - dlen, pip->ip_hl, pip->ip_len,
+ AddSeq(lnk, delta + new_dlen - dlen, pip->ip_hl, pip->ip_len,
tc->th_seq, tc->th_off);
new_len = htons(hlen + new_dlen);
@@ -520,7 +520,7 @@ AliasHandleRtspOut(struct libalias *la, struct ip *pip, struct alias_link *lnk,
/*
* When aliasing a server, check for the 200 reply
- * Accomodate varying number of blanks between 200 & OK
+ * Accommodate varying number of blanks between 200 & OK
*/
if (dlen >= (int)strlen(str200)) {
diff --git a/freebsd/sys/netinet/pim_var.h b/freebsd/sys/netinet/pim_var.h
index 41657b61..ae876c94 100644
--- a/freebsd/sys/netinet/pim_var.h
+++ b/freebsd/sys/netinet/pim_var.h
@@ -46,38 +46,33 @@
* PIM statistics kept in the kernel
*/
struct pimstat {
- u_quad_t pims_rcv_total_msgs; /* total PIM messages received */
- u_quad_t pims_rcv_total_bytes; /* total PIM bytes received */
- u_quad_t pims_rcv_tooshort; /* rcvd with too few bytes */
- u_quad_t pims_rcv_badsum; /* rcvd with bad checksum */
- u_quad_t pims_rcv_badversion; /* rcvd bad PIM version */
- u_quad_t pims_rcv_registers_msgs; /* rcvd regs. msgs (data only) */
- u_quad_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only) */
- u_quad_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif */
- u_quad_t pims_rcv_badregisters; /* rcvd invalid registers */
- u_quad_t pims_snd_registers_msgs; /* sent regs. msgs (data only) */
- u_quad_t pims_snd_registers_bytes; /* sent regs. bytes (data only) */
+ uint64_t pims_rcv_total_msgs; /* total PIM messages received */
+ uint64_t pims_rcv_total_bytes; /* total PIM bytes received */
+ uint64_t pims_rcv_tooshort; /* rcvd with too few bytes */
+ uint64_t pims_rcv_badsum; /* rcvd with bad checksum */
+ uint64_t pims_rcv_badversion; /* rcvd bad PIM version */
+ uint64_t pims_rcv_registers_msgs; /* rcvd regs. msgs (data only) */
+ uint64_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only) */
+ uint64_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif */
+ uint64_t pims_rcv_badregisters; /* rcvd invalid registers */
+ uint64_t pims_snd_registers_msgs; /* sent regs. msgs (data only) */
+ uint64_t pims_snd_registers_bytes; /* sent regs. bytes (data only) */
};
#ifdef _KERNEL
-#define PIMSTAT_ADD(name, val) V_pimstat.name += (val)
+#define PIMSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct pimstat, pimstat, name, (val))
#define PIMSTAT_INC(name) PIMSTAT_ADD(name, 1)
#endif
/*
- * Names for PIM sysctl objects
+ * Identifiers for PIM sysctl nodes
*/
#define PIMCTL_STATS 1 /* statistics (read-only) */
-#define PIMCTL_MAXID 2
-
-#define PIMCTL_NAMES { \
- { 0, 0 }, \
- { "stats", CTLTYPE_STRUCT }, \
-}
#ifdef _KERNEL
-void pim_input(struct mbuf *, int);
+int pim_input(struct mbuf **, int *, int);
SYSCTL_DECL(_net_inet_pim);
#endif
diff --git a/freebsd/sys/netinet/raw_ip.c b/freebsd/sys/netinet/raw_ip.c
index 827eca6e..a4679586 100644
--- a/freebsd/sys/netinet/raw_ip.c
+++ b/freebsd/sys/netinet/raw_ip.c
@@ -42,12 +42,14 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/eventhandler.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/rmlock.h>
#include <sys/rwlock.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -59,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -70,15 +73,17 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_mroute.h>
+#include <netinet/ip_icmp.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
#endif /*IPSEC*/
+#include <machine/stdarg.h>
#include <security/mac/mac_framework.h>
VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
-SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_defttl), 0,
"Maximum TTL on IP packets");
@@ -102,9 +107,6 @@ void (*ip_divert_ptr)(struct mbuf *, int);
int (*ng_ipfw_input_p)(struct mbuf **, int,
struct ip_fw_args *, int);
-/* Hook for telling pf that the destination address changed */
-void (*m_addr_chg_pf_p)(struct mbuf *m);
-
#ifdef INET
/*
* Hooks for multicast routing. They all default to NULL, so leave them not
@@ -128,11 +130,13 @@ int (*mrt_ioctl)(u_long, caddr_t, int);
int (*legal_vif_num)(int);
u_long (*ip_mcast_src)(int);
-void (*rsvp_input_p)(struct mbuf *m, int off);
+int (*rsvp_input_p)(struct mbuf **, int *, int);
int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
void (*ip_rsvp_force_done)(struct socket *);
#endif /* INET */
+extern struct protosw inetsw[];
+
u_long rip_sendspace = 9216;
SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
&rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
@@ -210,19 +214,19 @@ rip_init(void)
{
in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
- 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
- IPI_HASHFIELDS_NONE);
+ 1, "ripcb", rip_inpcb_init, NULL, 0, IPI_HASHFIELDS_NONE);
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
#ifdef VIMAGE
-void
-rip_destroy(void)
+static void
+rip_destroy(void *unused __unused)
{
in_pcbinfo_destroy(&V_ripcbinfo);
}
+VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
#endif
#ifdef INET
@@ -274,16 +278,18 @@ rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
* Setup generic address and protocol structures for raw_input routine, then
* pass them along with mbuf chain.
*/
-void
-rip_input(struct mbuf *m, int off)
+int
+rip_input(struct mbuf **mp, int *offp, int proto)
{
struct ifnet *ifp;
+ struct mbuf *m = *mp;
struct ip *ip = mtod(m, struct ip *);
- int proto = ip->ip_p;
struct inpcb *inp, *last;
struct sockaddr_in ripsrc;
int hash;
+ *mp = NULL;
+
bzero(&ripsrc, sizeof(ripsrc));
ripsrc.sin_len = sizeof(ripsrc);
ripsrc.sin_family = AF_INET;
@@ -411,10 +417,15 @@ rip_input(struct mbuf *m, int off)
IPSTAT_INC(ips_delivered);
INP_RUNLOCK(last);
} else {
- m_freem(m);
- IPSTAT_INC(ips_noproto);
- IPSTAT_DEC(ips_delivered);
+ if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
+ IPSTAT_INC(ips_noproto);
+ IPSTAT_DEC(ips_delivered);
+ icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
+ } else {
+ m_freem(m);
+ }
}
+ return (IPPROTO_DONE);
}
/*
@@ -422,14 +433,20 @@ rip_input(struct mbuf *m, int off)
* have setup with control call.
*/
int
-rip_output(struct mbuf *m, struct socket *so, u_long dst)
+rip_output(struct mbuf *m, struct socket *so, ...)
{
struct ip *ip;
int error;
struct inpcb *inp = sotoinpcb(so);
+ va_list ap;
+ u_long dst;
int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
IP_ALLOWBROADCAST;
+ va_start(ap, so);
+ dst = va_arg(ap, u_long);
+ va_end(ap);
+
/*
* If the user handed us a complete IP packet, use it. Otherwise,
* allocate an mbuf for a header and fill it in.
@@ -439,7 +456,7 @@ rip_output(struct mbuf *m, struct socket *so, u_long dst)
m_freem(m);
return(EMSGSIZE);
}
- M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+ M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
if (m == NULL)
return(ENOBUFS);
@@ -447,32 +464,32 @@ rip_output(struct mbuf *m, struct socket *so, u_long dst)
ip = mtod(m, struct ip *);
ip->ip_tos = inp->inp_ip_tos;
if (inp->inp_flags & INP_DONTFRAG)
- ip->ip_off = IP_DF;
+ ip->ip_off = htons(IP_DF);
else
- ip->ip_off = 0;
+ ip->ip_off = htons(0);
ip->ip_p = inp->inp_ip_p;
- ip->ip_len = m->m_pkthdr.len;
+ ip->ip_len = htons(m->m_pkthdr.len);
ip->ip_src = inp->inp_laddr;
+ ip->ip_dst.s_addr = dst;
if (jailed(inp->inp_cred)) {
/*
* prison_local_ip4() would be good enough but would
* let a source of INADDR_ANY pass, which we do not
- * want to see from jails. We do not go through the
- * pain of in_pcbladdr() for raw sockets.
+ * want to see from jails.
*/
- if (ip->ip_src.s_addr == INADDR_ANY)
- error = prison_get_ip4(inp->inp_cred,
- &ip->ip_src);
- else
+ if (ip->ip_src.s_addr == INADDR_ANY) {
+ error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
+ inp->inp_cred);
+ } else {
error = prison_local_ip4(inp->inp_cred,
&ip->ip_src);
+ }
if (error != 0) {
INP_RUNLOCK(inp);
m_freem(m);
return (error);
}
}
- ip->ip_dst.s_addr = dst;
ip->ip_ttl = inp->inp_ip_ttl;
} else {
if (m->m_pkthdr.len > IP_MAXPACKET) {
@@ -493,14 +510,18 @@ rip_output(struct mbuf *m, struct socket *so, u_long dst)
* and don't allow packet length sizes that will crash.
*/
if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
- || (ip->ip_len > m->m_pkthdr.len)
- || (ip->ip_len < (ip->ip_hl << 2))) {
+ || (ntohs(ip->ip_len) > m->m_pkthdr.len)
+ || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
INP_RUNLOCK(inp);
m_freem(m);
return (EINVAL);
}
+ /*
+ * This doesn't allow application to specify ID of zero,
+ * but we got this limitation from the beginning of history.
+ */
if (ip->ip_id == 0)
- ip->ip_id = ip_newid();
+ ip_fillid(ip);
/*
* XXX prevent ip_output from overwriting header fields.
@@ -539,6 +560,8 @@ rip_output(struct mbuf *m, struct socket *so, u_long dst)
*
* When adding new socket options here, make sure to add access control
* checks here as necessary.
+ *
+ * XXX-BZ inp locking?
*/
int
rip_ctloutput(struct socket *so, struct sockopt *sopt)
@@ -712,6 +735,7 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
void
rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
+ struct rm_priotracker in_ifa_tracker;
struct in_ifaddr *ia;
struct ifnet *ifp;
int err;
@@ -719,16 +743,16 @@ rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
switch (cmd) {
case PRC_IFDOWN:
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (ia->ia_ifa.ifa_addr == sa
&& (ia->ia_flags & IFA_ROUTE)) {
ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
/*
- * in_ifscrub kills the interface route.
+ * in_scrubprefix() kills the interface route.
*/
- in_ifscrub(ia->ia_ifp, ia, 0);
+ in_scrubprefix(ia, 0);
/*
* in_ifadown gets rid of all the rest of the
* routes. This is not quite the right thing
@@ -741,21 +765,21 @@ rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
}
}
if (ia == NULL) /* If ia matched, already unlocked. */
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
break;
case PRC_IFUP:
- IN_IFADDR_RLOCK();
+ IN_IFADDR_RLOCK(&in_ifa_tracker);
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (ia->ia_ifa.ifa_addr == sa)
break;
}
if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return;
}
ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ IN_IFADDR_RUNLOCK(&in_ifa_tracker);
flags = RTF_UP;
ifp = ia->ia_ifa.ifa_ifp;
@@ -764,16 +788,12 @@ rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
flags |= RTF_HOST;
err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
- if (err == 0)
- ia->ia_flags &= ~IFA_RTSELF;
err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
if (err == 0)
ia->ia_flags |= IFA_ROUTE;
err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
- if (err == 0)
- ia->ia_flags |= IFA_RTSELF;
ifa_free(&ia->ia_ifa);
break;
@@ -1036,7 +1056,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == 0)
+ if (inp_list == NULL)
return (ENOMEM);
INP_INFO_RLOCK(&V_ripcbinfo);
diff --git a/freebsd/sys/netinet/sctp.h b/freebsd/sys/netinet/sctp.h
index 4c5c03dc..ec42cffa 100644
--- a/freebsd/sys/netinet/sctp.h
+++ b/freebsd/sys/netinet/sctp.h
@@ -121,6 +121,14 @@ struct sctp_paramhdr {
#define SCTP_DEFAULT_PRINFO 0x00000022
#define SCTP_PEER_ADDR_THLDS 0x00000023
#define SCTP_REMOTE_UDP_ENCAPS_PORT 0x00000024
+#define SCTP_ECN_SUPPORTED 0x00000025
+#define SCTP_PR_SUPPORTED 0x00000026
+#define SCTP_AUTH_SUPPORTED 0x00000027
+#define SCTP_ASCONF_SUPPORTED 0x00000028
+#define SCTP_RECONFIG_SUPPORTED 0x00000029
+#define SCTP_NRSACK_SUPPORTED 0x00000030
+#define SCTP_PKTDROP_SUPPORTED 0x00000031
+#define SCTP_MAX_CWND 0x00000032
/*
* read-only options
@@ -133,6 +141,8 @@ struct sctp_paramhdr {
#define SCTP_GET_ASSOC_NUMBER 0x00000104 /* ro */
#define SCTP_GET_ASSOC_ID_LIST 0x00000105 /* ro */
#define SCTP_TIMEOUTS 0x00000106
+#define SCTP_PR_STREAM_STATUS 0x00000107
+#define SCTP_PR_ASSOC_STATUS 0x00000108
/*
* user socket options: BSD implementation specific
@@ -186,6 +196,9 @@ struct sctp_paramhdr {
#define SCTP_SS_VALUE 0x00001204
#define SCTP_CC_OPTION 0x00001205 /* Options for CC
* modules */
+/* For I-DATA */
+#define SCTP_INTERLEAVING_SUPPORTED 0x00001206
+
/* read only */
#define SCTP_GET_SNDBUF_USE 0x00001101
#define SCTP_GET_STAT_LOG 0x00001103
@@ -378,33 +391,32 @@ struct sctp_error_cause {
} SCTP_PACKED;
struct sctp_error_invalid_stream {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_INVALID_STREAM */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_INVALID_STREAM */
uint16_t stream_id; /* stream id of the DATA in error */
uint16_t reserved;
} SCTP_PACKED;
struct sctp_error_missing_param {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_MISSING_PARAM */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_MISSING_PARAM */
uint32_t num_missing_params; /* number of missing parameters */
- /* uint16_t param_type's follow */
+ uint16_t type[];
} SCTP_PACKED;
struct sctp_error_stale_cookie {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_STALE_COOKIE */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_STALE_COOKIE */
uint32_t stale_time; /* time in usec of staleness */
} SCTP_PACKED;
struct sctp_error_out_of_resource {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_OUT_OF_RESOURCES */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_OUT_OF_RESOURCES */
} SCTP_PACKED;
struct sctp_error_unresolv_addr {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_UNRESOLVABLE_ADDR */
-
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRESOLVABLE_ADDR */
} SCTP_PACKED;
struct sctp_error_unrecognized_chunk {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_UNRECOG_CHUNK */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRECOG_CHUNK */
struct sctp_chunkhdr ch;/* header from chunk in error */
} SCTP_PACKED;
@@ -413,6 +425,11 @@ struct sctp_error_no_user_data {
uint32_t tsn; /* TSN of the empty data chunk */
} SCTP_PACKED;
+struct sctp_error_auth_invalid_hmac {
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNSUPPORTED_HMACID */
+ uint16_t hmac_id;
+} SCTP_PACKED;
+
/*
* Main SCTP chunk types we place these here so natd and f/w's in user land
* can find them.
@@ -438,6 +455,7 @@ struct sctp_error_no_user_data {
/* EY nr_sack chunk id*/
#define SCTP_NR_SELECTIVE_ACK 0x10
/************0x40 series ***********/
+#define SCTP_IDATA 0x40
/************0x80 series ***********/
/* RFC5061 */
#define SCTP_ASCONF_ACK 0x80
@@ -453,7 +471,7 @@ struct sctp_error_no_user_data {
#define SCTP_FORWARD_CUM_TSN 0xc0
/* RFC5061 */
#define SCTP_ASCONF 0xc1
-
+#define SCTP_IFORWARD_CUM_TSN 0xc2
/* ABORT and SHUTDOWN COMPLETE FLAG */
#define SCTP_HAD_NO_TCB 0x01
diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c
index 551f0690..4256ab51 100644
--- a/freebsd/sys/netinet/sctp_asconf.c
+++ b/freebsd/sys/netinet/sctp_asconf.c
@@ -82,7 +82,7 @@ sctp_asconf_success_response(uint32_t id)
struct sctp_asconf_paramhdr *aph;
m_reply = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_paramhdr),
- 0, M_DONTWAIT, 1, MT_DATA);
+ 0, M_NOWAIT, 1, MT_DATA);
if (m_reply == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"asconf_success_response: couldn't get mbuf!\n");
@@ -110,7 +110,7 @@ sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t * error_tlv,
m_reply = sctp_get_mbuf_for_msg((sizeof(struct sctp_asconf_paramhdr) +
tlv_length +
sizeof(struct sctp_error_cause)),
- 0, M_DONTWAIT, 1, MT_DATA);
+ 0, M_NOWAIT, 1, MT_DATA);
if (m_reply == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"asconf_error_response: couldn't get mbuf!\n");
@@ -150,7 +150,7 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
{
struct sctp_nets *net;
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_store;
+ union sctp_sockstore store;
struct sctp_paramhdr *ph;
uint16_t param_type, aparam_length;
@@ -179,7 +179,7 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
#endif
- sa = (struct sockaddr *)&sa_store;
+ sa = &store.sa;
switch (param_type) {
#ifdef INET
case SCTP_IPV4_ADDRESS:
@@ -188,7 +188,7 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
return (NULL);
}
v4addr = (struct sctp_ipv4addr_param *)ph;
- sin = (struct sockaddr_in *)&sa_store;
+ sin = &store.sin;
bzero(sin, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(struct sockaddr_in);
@@ -211,7 +211,7 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
return (NULL);
}
v6addr = (struct sctp_ipv6addr_param *)ph;
- sin6 = (struct sockaddr_in6 *)&sa_store;
+ sin6 = &store.sin6;
bzero(sin6, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -246,7 +246,8 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
m_reply = sctp_asconf_error_response(aph->correlation_id,
SCTP_CAUSE_INVALID_PARAM, (uint8_t *) aph,
aparam_length);
- } else if (sctp_add_remote_addr(stcb, sa, &net, SCTP_DONOT_SETSCOPE,
+ } else if (sctp_add_remote_addr(stcb, sa, &net, stcb->asoc.port,
+ SCTP_DONOT_SETSCOPE,
SCTP_ADDR_DYNAMIC_ADDED) != 0) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"process_asconf_add_ip: error adding address\n");
@@ -304,7 +305,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
struct sctp_tcb *stcb, int response_required)
{
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_store;
+ union sctp_sockstore store;
struct sctp_paramhdr *ph;
uint16_t param_type, aparam_length;
@@ -333,7 +334,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
#endif
- sa = (struct sockaddr *)&sa_store;
+ sa = &store.sa;
switch (param_type) {
#ifdef INET
case SCTP_IPV4_ADDRESS:
@@ -342,7 +343,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
return (NULL);
}
v4addr = (struct sctp_ipv4addr_param *)ph;
- sin = (struct sockaddr_in *)&sa_store;
+ sin = &store.sin;
bzero(sin, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(struct sockaddr_in);
@@ -362,7 +363,7 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
return (NULL);
}
v6addr = (struct sctp_ipv6addr_param *)ph;
- sin6 = (struct sockaddr_in6 *)&sa_store;
+ sin6 = &store.sin6;
bzero(sin6, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -439,7 +440,7 @@ sctp_process_asconf_set_primary(struct sockaddr *src,
struct sctp_tcb *stcb, int response_required)
{
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_store;
+ union sctp_sockstore store;
struct sctp_paramhdr *ph;
uint16_t param_type, aparam_length;
@@ -467,7 +468,7 @@ sctp_process_asconf_set_primary(struct sockaddr *src,
#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
#endif
- sa = (struct sockaddr *)&sa_store;
+ sa = &store.sa;
switch (param_type) {
#ifdef INET
case SCTP_IPV4_ADDRESS:
@@ -476,7 +477,7 @@ sctp_process_asconf_set_primary(struct sockaddr *src,
return (NULL);
}
v4addr = (struct sctp_ipv4addr_param *)ph;
- sin = (struct sockaddr_in *)&sa_store;
+ sin = &store.sin;
bzero(sin, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(struct sockaddr_in);
@@ -494,7 +495,7 @@ sctp_process_asconf_set_primary(struct sockaddr *src,
return (NULL);
}
v6addr = (struct sctp_ipv6addr_param *)ph;
- sin6 = (struct sockaddr_in6 *)&sa_store;
+ sin6 = &store.sin6;
bzero(sin6, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -557,7 +558,9 @@ sctp_process_asconf_set_primary(struct sockaddr *src,
(stcb->asoc.primary_destination->dest_state &
SCTP_ADDR_UNCONFIRMED) == 0) {
- sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+ sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED,
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1);
if (sctp_is_mobility_feature_on(stcb->sctp_ep,
SCTP_MOBILITY_FASTHANDOFF)) {
sctp_assoc_immediate_retrans(stcb,
@@ -598,7 +601,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
uint32_t serial_num;
struct mbuf *n, *m_ack, *m_result, *m_tail;
struct sctp_asconf_ack_chunk *ack_cp;
- struct sctp_asconf_paramhdr *aph, *ack_aph;
+ struct sctp_asconf_paramhdr *aph;
struct sctp_ipv6addr_param *p_addr;
unsigned int asconf_limit, cnt;
int error = 0; /* did an error occur? */
@@ -653,7 +656,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
}
}
m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
- M_DONTWAIT, 1, MT_DATA);
+ M_NOWAIT, 1, MT_DATA);
if (m_ack == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"handle_asconf: couldn't get mbuf!\n");
@@ -681,13 +684,6 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
}
/* param_length is already validated in process_control... */
offset += ntohs(p_addr->ph.param_length); /* skip lookup addr */
-
- /* get pointer to first asconf param in ASCONF-ACK */
- ack_aph = (struct sctp_asconf_paramhdr *)(mtod(m_ack, caddr_t)+sizeof(struct sctp_asconf_ack_chunk));
- if (ack_aph == NULL) {
- SCTPDBG(SCTP_DEBUG_ASCONF1, "Gak in asconf2\n");
- return;
- }
/* get pointer to first asconf param in ASCONF */
aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_asconf_paramhdr), (uint8_t *) & aparam_buf);
if (aph == NULL) {
@@ -726,13 +722,11 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
}
switch (param_type) {
case SCTP_ADD_IP_ADDRESS:
- asoc->peer_supports_asconf = 1;
m_result = sctp_process_asconf_add_ip(src, aph, stcb,
(cnt < SCTP_BASE_SYSCTL(sctp_hb_maxburst)), error);
cnt++;
break;
case SCTP_DEL_IP_ADDRESS:
- asoc->peer_supports_asconf = 1;
m_result = sctp_process_asconf_delete_ip(src, aph, stcb,
error);
break;
@@ -740,7 +734,6 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
/* not valid in an ASCONF chunk */
break;
case SCTP_SET_PRIM_ADDR:
- asoc->peer_supports_asconf = 1;
m_result = sctp_process_asconf_set_primary(src, aph,
stcb, error);
break;
@@ -932,8 +925,6 @@ sctp_addr_match(struct sctp_paramhdr *ph, struct sockaddr *sa)
void
sctp_asconf_cleanup(struct sctp_tcb *stcb, struct sctp_nets *net)
{
- /* mark peer as ASCONF incapable */
- stcb->asoc.peer_supports_asconf = 0;
/*
* clear out any existing asconfs going out
*/
@@ -1005,7 +996,7 @@ sctp_assoc_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *dstnet)
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.primary_destination->ro._l_addr.sa);
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb,
stcb->asoc.deleted_primary,
- SCTP_FROM_SCTP_TIMER + SCTP_LOC_8);
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
stcb->asoc.num_send_timers_up--;
if (stcb->asoc.num_send_timers_up < 0) {
stcb->asoc.num_send_timers_up = 0;
@@ -1044,7 +1035,7 @@ sctp_net_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *net)
SCTPDBG(SCTP_DEBUG_ASCONF1, "net_immediate_retrans: RTO is %d\n", net->RTO);
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net,
- SCTP_FROM_SCTP_TIMER + SCTP_LOC_5);
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_4);
stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
net->error_count = 0;
TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
@@ -1121,7 +1112,8 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
* not be changed.
*/
SCTP_RTALLOC((sctp_route_t *) & net->ro,
- stcb->sctp_ep->def_vrf_id);
+ stcb->sctp_ep->def_vrf_id,
+ stcb->sctp_ep->fibnum);
if (net->ro.ro_rt == NULL)
continue;
@@ -1275,7 +1267,7 @@ sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
{
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&ifa->address.sa;
+ sin6 = &ifa->address.sin6;
aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
@@ -1290,7 +1282,7 @@ sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
{
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&ifa->address.sa;
+ sin = &ifa->address.sin;
aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
@@ -1340,24 +1332,31 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
{
uint32_t status;
int pending_delete_queued = 0;
+ int last;
/* see if peer supports ASCONF */
- if (stcb->asoc.peer_supports_asconf == 0) {
+ if (stcb->asoc.asconf_supported == 0) {
return (-1);
}
/*
* if this is deleting the last address from the assoc, mark it as
* pending.
*/
- if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending &&
- (sctp_local_addr_count(stcb) < 2)) {
- /* set the pending delete info only */
- stcb->asoc.asconf_del_pending = 1;
- stcb->asoc.asconf_addr_del_pending = ifa;
- atomic_add_int(&ifa->refcount, 1);
- SCTPDBG(SCTP_DEBUG_ASCONF2,
- "asconf_queue_add: mark delete last address pending\n");
- return (-1);
+ if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending) {
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ last = (sctp_local_addr_count(stcb) == 0);
+ } else {
+ last = (sctp_local_addr_count(stcb) == 1);
+ }
+ if (last) {
+ /* set the pending delete info only */
+ stcb->asoc.asconf_del_pending = 1;
+ stcb->asoc.asconf_addr_del_pending = ifa;
+ atomic_add_int(&ifa->refcount, 1);
+ SCTPDBG(SCTP_DEBUG_ASCONF2,
+ "asconf_queue_add: mark delete last address pending\n");
+ return (-1);
+ }
}
/* queue an asconf parameter */
status = sctp_asconf_queue_mgmt(stcb, ifa, type);
@@ -1426,13 +1425,12 @@ sctp_asconf_queue_sa_delete(struct sctp_tcb *stcb, struct sockaddr *sa)
{
struct sctp_ifa *ifa;
struct sctp_asconf_addr *aa, *aa_next;
- uint32_t vrf_id;
if (stcb == NULL) {
return (-1);
}
/* see if peer supports ASCONF */
- if (stcb->asoc.peer_supports_asconf == 0) {
+ if (stcb->asoc.asconf_supported == 0) {
return (-1);
}
/* make sure the request isn't already in the queue */
@@ -1458,12 +1456,7 @@ sctp_asconf_queue_sa_delete(struct sctp_tcb *stcb, struct sockaddr *sa)
} /* for each aa */
/* find any existing ifa-- NOTE ifa CAN be allowed to be NULL */
- if (stcb) {
- vrf_id = stcb->asoc.vrf_id;
- } else {
- vrf_id = SCTP_DEFAULT_VRFID;
- }
- ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+ ifa = sctp_find_ifa_by_addr(sa, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
/* adding new request to the queue */
SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
@@ -1552,7 +1545,7 @@ sctp_asconf_find_param(struct sctp_tcb *stcb, uint32_t correlation_id)
* notifications based on the error response
*/
static void
-sctp_asconf_process_error(struct sctp_tcb *stcb,
+sctp_asconf_process_error(struct sctp_tcb *stcb SCTP_UNUSED,
struct sctp_asconf_paramhdr *aph)
{
struct sctp_error_cause *eh;
@@ -1590,10 +1583,7 @@ sctp_asconf_process_error(struct sctp_tcb *stcb,
switch (param_type) {
case SCTP_ADD_IP_ADDRESS:
case SCTP_DEL_IP_ADDRESS:
- stcb->asoc.peer_supports_asconf = 0;
- break;
case SCTP_SET_PRIM_ADDR:
- stcb->asoc.peer_supports_asconf = 0;
break;
default:
break;
@@ -1629,8 +1619,6 @@ sctp_asconf_process_param_ack(struct sctp_tcb *stcb,
SCTPDBG(SCTP_DEBUG_ASCONF1,
"process_param_ack: set primary IP address\n");
/* nothing to do... peer may start using this addr */
- if (flag == 0)
- stcb->asoc.peer_supports_asconf = 0;
break;
default:
/* should NEVER happen */
@@ -1648,11 +1636,11 @@ sctp_asconf_process_param_ack(struct sctp_tcb *stcb,
* cleanup from a bad asconf ack parameter
*/
static void
-sctp_asconf_ack_clear(struct sctp_tcb *stcb)
+sctp_asconf_ack_clear(struct sctp_tcb *stcb SCTP_UNUSED)
{
/* assume peer doesn't really know how to do asconfs */
- stcb->asoc.peer_supports_asconf = 0;
/* XXX we could free the pending queue here */
+
}
void
@@ -1695,8 +1683,14 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset,
* abort the asoc, since someone probably just hijacked us...
*/
if (serial_num == (asoc->asconf_seq_out + 1)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n");
- sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg), "Never sent serial number %8.8x",
+ serial_num);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_no_unlock = 1;
return;
}
@@ -1709,7 +1703,7 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset,
if (serial_num == asoc->asconf_seq_out - 1) {
/* stop our timer */
sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
- SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_5);
}
/* process the ASCONF-ACK contents */
ack_length = ntohs(cp->ch.chunk_length) -
@@ -1937,7 +1931,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
{
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/* we skip unspecifed addresses */
return;
@@ -1970,7 +1964,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
SCTP_IPV6_V6ONLY(inp6))
return;
- sin = (struct sockaddr_in *)&ifa->address.sa;
+ sin = &ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* we skip unspecifed addresses */
return;
@@ -1990,7 +1984,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
/* queue an asconf for this address add/delete */
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
/* does the peer do asconf? */
- if (stcb->asoc.peer_supports_asconf) {
+ if (stcb->asoc.asconf_supported) {
/* queue an asconf for this addr */
status = sctp_asconf_queue_add(stcb, ifa, type);
@@ -2000,7 +1994,8 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* sent when the state goes open.
*/
if (status == 0 &&
- SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED))) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
stcb, stcb->asoc.primary_destination);
@@ -2127,7 +2122,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
else
continue;
}
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/* we skip unspecifed addresses */
continue;
@@ -2161,7 +2156,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
SCTP_IPV6_V6ONLY(inp6))
continue;
- sin = (struct sockaddr_in *)&ifa->address.sa;
+ sin = &ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* we skip unspecifed addresses */
continue;
@@ -2240,7 +2235,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
}
/* queue an asconf for this address add/delete */
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF) &&
- stcb->asoc.peer_supports_asconf) {
+ stcb->asoc.asconf_supported == 1) {
/* queue an asconf for this addr */
status = sctp_asconf_queue_add(stcb, ifa, type);
/*
@@ -2248,7 +2243,8 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* count of queued params. If in the non-open
* state, these get sent when the assoc goes open.
*/
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
if (status >= 0) {
num_queued++;
}
@@ -2308,7 +2304,8 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
"set_primary_ip_address_sa: queued on tcb=%p, ",
(void *)stcb);
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2344,7 +2341,8 @@ sctp_set_primary_ip_address(struct sctp_ifa *ifa)
SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address: queued on stcb=%p, ",
(void *)stcb);
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &ifa->address.sa);
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2478,7 +2476,7 @@ sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked)
if (stcb->asoc.scope.ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* skip unspecifed addresses */
continue;
@@ -2512,7 +2510,7 @@ sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked)
if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
continue;
}
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/*
* we skip unspecifed
@@ -2606,14 +2604,14 @@ sctp_compose_asconf(struct sctp_tcb *stcb, int *retlen, int addr_locked)
* it's simpler to fill in the asconf chunk header lookup address on
* the fly
*/
- m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_DONTWAIT, 1, MT_DATA);
+ m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_NOWAIT, 1, MT_DATA);
if (m_asconf_chk == NULL) {
/* no mbuf's */
SCTPDBG(SCTP_DEBUG_ASCONF1,
"compose_asconf: couldn't get chunk mbuf!\n");
return (NULL);
}
- m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m_asconf == NULL) {
/* no mbuf's */
SCTPDBG(SCTP_DEBUG_ASCONF1,
@@ -2784,19 +2782,16 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
struct sctp_paramhdr tmp_param, *ph;
uint16_t plen, ptype;
struct sctp_ifa *sctp_ifa;
+ union sctp_sockstore store;
#ifdef INET6
struct sctp_ipv6addr_param addr6_store;
- struct sockaddr_in6 sin6;
#endif
#ifdef INET
struct sctp_ipv4addr_param addr4_store;
- struct sockaddr_in sin;
#endif
- struct sockaddr *sa;
- uint32_t vrf_id;
SCTPDBG(SCTP_DEBUG_ASCONF2, "processing init-ack addresses\n");
if (stcb == NULL) /* Un-needed check for SA */
@@ -2808,21 +2803,6 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
if ((offset + sizeof(struct sctp_paramhdr)) > length) {
return;
}
- /* init the addresses */
-#ifdef INET6
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(sin6);
- sin6.sin6_port = stcb->rport;
-#endif
-
-#ifdef INET
- bzero(&sin, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(sin);
- sin.sin_port = stcb->rport;
-#endif
-
/* go through the addresses in the init-ack */
ph = (struct sctp_paramhdr *)
sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
@@ -2845,9 +2825,11 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
a6p == NULL) {
return;
}
- memcpy(&sin6.sin6_addr, a6p->addr,
- sizeof(struct in6_addr));
- sa = (struct sockaddr *)&sin6;
+ memset(&store, 0, sizeof(union sctp_sockstore));
+ store.sin6.sin6_family = AF_INET6;
+ store.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ store.sin6.sin6_port = stcb->rport;
+ memcpy(&store.sin6.sin6_addr, a6p->addr, sizeof(struct in6_addr));
break;
}
#endif
@@ -2864,8 +2846,11 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
a4p == NULL) {
return;
}
- sin.sin_addr.s_addr = a4p->addr;
- sa = (struct sockaddr *)&sin;
+ memset(&store, 0, sizeof(union sctp_sockstore));
+ store.sin.sin_family = AF_INET;
+ store.sin.sin_len = sizeof(struct sockaddr_in);
+ store.sin.sin_port = stcb->rport;
+ store.sin.sin_addr.s_addr = a4p->addr;
break;
}
#endif
@@ -2874,12 +2859,7 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
}
/* see if this address really (still) exists */
- if (stcb) {
- vrf_id = stcb->asoc.vrf_id;
- } else {
- vrf_id = SCTP_DEFAULT_VRFID;
- }
- sctp_ifa = sctp_find_ifa_by_addr(sa, vrf_id,
+ sctp_ifa = sctp_find_ifa_by_addr(&store.sa, stcb->asoc.vrf_id,
SCTP_ADDR_NOT_LOCKED);
if (sctp_ifa == NULL) {
/* address doesn't exist anymore */
@@ -2888,9 +2868,9 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
/* are ASCONFs allowed ? */
if ((sctp_is_feature_on(stcb->sctp_ep,
SCTP_PCB_FLAGS_DO_ASCONF)) &&
- stcb->asoc.peer_supports_asconf) {
+ stcb->asoc.asconf_supported) {
/* queue an ASCONF DEL_IP_ADDRESS */
- status = sctp_asconf_queue_sa_delete(stcb, sa);
+ status = sctp_asconf_queue_sa_delete(stcb, &store.sa);
/*
* if queued ok, and in correct state, send
* out the ASCONF.
@@ -3137,7 +3117,7 @@ sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset,
switch (sctp_ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
- sin = (struct sockaddr_in *)&sctp_ifa->address.sin;
+ sin = &sctp_ifa->address.sin;
if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
&sin->sin_addr) != 0) {
continue;
@@ -3151,7 +3131,7 @@ sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset,
#endif
#ifdef INET6
case AF_INET6:
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sin6;
+ sin6 = &sctp_ifa->address.sin6;
if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
&sin6->sin6_addr) != 0) {
continue;
@@ -3271,6 +3251,7 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa,
} else {
struct sctp_asconf_iterator *asc;
struct sctp_laddr *wi;
+ int ret;
SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
sizeof(struct sctp_asconf_iterator),
@@ -3292,7 +3273,7 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa,
wi->action = type;
atomic_add_int(&ifa->refcount, 1);
LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
- (void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+ ret = sctp_initiate_iterator(sctp_asconf_iterator_ep,
sctp_asconf_iterator_stcb,
sctp_asconf_iterator_ep_end,
SCTP_PCB_ANY_FLAGS,
@@ -3300,6 +3281,12 @@ sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa,
SCTP_ASOC_ANY_STATE,
(void *)asc, 0,
sctp_asconf_iterator_end, inp, 0);
+ if (ret) {
+ SCTP_PRINTF("Failed to initiate iterator for addr_mgmt_ep_sa\n");
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EFAULT);
+ sctp_asconf_iterator_end(asc, 0);
+ return (EFAULT);
+ }
}
return (0);
} else {
@@ -3389,6 +3376,11 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb,
TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
break;
#endif
+ default:
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "sctp_asconf_send_nat_state_update: unknown address family\n");
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ return;
}
SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
SCTP_M_ASC_ADDR);
@@ -3422,6 +3414,11 @@ sctp_asconf_send_nat_state_update(struct sctp_tcb *stcb,
TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
break;
#endif
+ default:
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "sctp_asconf_send_nat_state_update: unknown address family\n");
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ return;
}
/* Now we must hunt the addresses and add all global addresses */
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
diff --git a/freebsd/sys/netinet/sctp_auth.c b/freebsd/sys/netinet/sctp_auth.c
index fc649032..19e30718 100644
--- a/freebsd/sys/netinet/sctp_auth.c
+++ b/freebsd/sys/netinet/sctp_auth.c
@@ -135,11 +135,6 @@ sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list)
if (list == NULL)
return (-1);
- /* is chunk restricted? */
- if ((chunk == SCTP_ASCONF) ||
- (chunk == SCTP_ASCONF_ACK)) {
- return (-1);
- }
if (list->chunks[chunk] == 1) {
list->chunks[chunk] = 0;
list->num_chunks--;
@@ -160,16 +155,6 @@ sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list)
}
/*
- * set the default list of chunks requiring AUTH
- */
-void
-sctp_auth_set_default_chunks(sctp_auth_chklist_t * list)
-{
- (void)sctp_auth_add_chunk(SCTP_ASCONF, list);
- (void)sctp_auth_add_chunk(SCTP_ASCONF_ACK, list);
-}
-
-/*
* return the current number and list of required chunks caller must
* guarantee ptr has space for up to 256 bytes
*/
@@ -559,7 +544,7 @@ sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
}
}
/* shouldn't reach here */
- return (0);
+ return (EINVAL);
}
void
@@ -575,7 +560,7 @@ sctp_auth_key_acquire(struct sctp_tcb *stcb, uint16_t key_id)
atomic_add_int(&skey->refcount, 1);
SCTPDBG(SCTP_DEBUG_AUTH2,
"%s: stcb %p key %u refcount acquire to %d\n",
- __FUNCTION__, (void *)stcb, key_id, skey->refcount);
+ __func__, (void *)stcb, key_id, skey->refcount);
}
}
@@ -593,20 +578,20 @@ sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t key_id, int so_locked
/* decrement the ref count */
if (skey) {
- sctp_free_sharedkey(skey);
SCTPDBG(SCTP_DEBUG_AUTH2,
"%s: stcb %p key %u refcount release to %d\n",
- __FUNCTION__, (void *)stcb, key_id, skey->refcount);
+ __func__, (void *)stcb, key_id, skey->refcount);
/* see if a notification should be generated */
- if ((skey->refcount <= 1) && (skey->deactivated)) {
+ if ((skey->refcount <= 2) && (skey->deactivated)) {
/* notify ULP that key is no longer used */
sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb,
key_id, 0, so_locked);
SCTPDBG(SCTP_DEBUG_AUTH2,
"%s: stcb %p key %u no longer used, %d\n",
- __FUNCTION__, (void *)stcb, key_id, skey->refcount);
+ __func__, (void *)stcb, key_id, skey->refcount);
}
+ sctp_free_sharedkey(skey);
}
}
@@ -639,8 +624,11 @@ sctp_copy_skeylist(const struct sctp_keyhead *src, struct sctp_keyhead *dest)
LIST_FOREACH(skey, src, next) {
new_skey = sctp_copy_sharedkey(skey);
if (new_skey != NULL) {
- (void)sctp_insert_sharedkey(dest, new_skey);
- count++;
+ if (sctp_insert_sharedkey(dest, new_skey)) {
+ sctp_free_sharedkey(new_skey);
+ } else {
+ count++;
+ }
}
}
return (count);
@@ -648,7 +636,7 @@ sctp_copy_skeylist(const struct sctp_keyhead *src, struct sctp_keyhead *dest)
sctp_hmaclist_t *
-sctp_alloc_hmaclist(uint8_t num_hmacs)
+sctp_alloc_hmaclist(uint16_t num_hmacs)
{
sctp_hmaclist_t *new_list;
int alloc_size;
@@ -1455,8 +1443,8 @@ sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
p_random = (struct sctp_auth_random *)phdr;
random_len = plen - sizeof(*p_random);
} else if (ptype == SCTP_HMAC_LIST) {
- int num_hmacs;
- int i;
+ uint16_t num_hmacs;
+ uint16_t i;
if (plen > sizeof(hmacs_store))
break;
@@ -1668,8 +1656,8 @@ sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth,
/* is the indicated HMAC supported? */
if (!sctp_auth_is_supported_hmac(stcb->asoc.local_hmacs, hmac_id)) {
- struct mbuf *m_err;
- struct sctp_auth_invalid_hmac *err;
+ struct mbuf *op_err;
+ struct sctp_error_auth_invalid_hmac *cause;
SCTP_STAT_INCR(sctps_recvivalhmacid);
SCTPDBG(SCTP_DEBUG_AUTH1,
@@ -1679,20 +1667,19 @@ sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth,
* report this in an Error Chunk: Unsupported HMAC
* Identifier
*/
- m_err = sctp_get_mbuf_for_msg(sizeof(*err), 0, M_DONTWAIT,
- 1, MT_HEADER);
- if (m_err != NULL) {
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_auth_invalid_hmac),
+ 0, M_NOWAIT, 1, MT_HEADER);
+ if (op_err != NULL) {
/* pre-reserve some space */
- SCTP_BUF_RESV_UF(m_err, sizeof(struct sctp_chunkhdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
/* fill in the error */
- err = mtod(m_err, struct sctp_auth_invalid_hmac *);
- bzero(err, sizeof(*err));
- err->ph.param_type = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
- err->ph.param_length = htons(sizeof(*err));
- err->hmac_id = ntohs(hmac_id);
- SCTP_BUF_LEN(m_err) = sizeof(*err);
+ cause = mtod(op_err, struct sctp_error_auth_invalid_hmac *);
+ cause->cause.code = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
+ cause->cause.length = htons(sizeof(struct sctp_error_auth_invalid_hmac));
+ cause->hmac_id = ntohs(hmac_id);
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_auth_invalid_hmac);
/* queue it */
- sctp_queue_op_err(stcb, m_err);
+ sctp_queue_op_err(stcb, op_err);
}
return (-1);
}
@@ -1785,7 +1772,7 @@ sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
return;
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_authkey_event),
- 0, M_DONTWAIT, 1, MT_HEADER);
+ 0, M_NOWAIT, 1, MT_HEADER);
if (m_notify == NULL)
/* no space left */
return;
@@ -1951,8 +1938,7 @@ sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit)
"SCTP: peer sent chunk list w/o AUTH\n");
return (-1);
}
- if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && peer_supports_asconf &&
- !peer_supports_auth) {
+ if (peer_supports_asconf && !peer_supports_auth) {
SCTPDBG(SCTP_DEBUG_AUTH1,
"SCTP: peer supports ASCONF but not AUTH\n");
return (-1);
diff --git a/freebsd/sys/netinet/sctp_auth.h b/freebsd/sys/netinet/sctp_auth.h
index 535c0fc0..b98764e2 100644
--- a/freebsd/sys/netinet/sctp_auth.h
+++ b/freebsd/sys/netinet/sctp_auth.h
@@ -112,7 +112,6 @@ extern sctp_auth_chklist_t *sctp_copy_chunklist(sctp_auth_chklist_t * chklist);
extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list);
-extern void sctp_auth_set_default_chunks(sctp_auth_chklist_t * list);
extern int
sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list,
uint8_t * ptr);
@@ -155,7 +154,7 @@ sctp_auth_key_release(struct sctp_tcb *stcb, uint16_t keyid,
/* hmac list handling */
-extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint8_t num_hmacs);
+extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint16_t num_hmacs);
extern void sctp_free_hmaclist(sctp_hmaclist_t * list);
extern int sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id);
extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t * list);
diff --git a/freebsd/sys/netinet/sctp_bsd_addr.c b/freebsd/sys/netinet/sctp_bsd_addr.c
index d558bd82..bfd7f816 100644
--- a/freebsd/sys/netinet/sctp_bsd_addr.c
+++ b/freebsd/sys/netinet/sctp_bsd_addr.c
@@ -295,9 +295,12 @@ sctp_addr_change(struct ifaddr *ifa, int cmd)
{
uint32_t ifa_flags = 0;
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ return;
+ }
/*
* BSD only has one VRF, if this changes we will need to hook in the
- * right things here to get the id to pass to the address managment
+ * right things here to get the id to pass to the address management
* routine.
*/
if (SCTP_BASE_VAR(first_time) == 0) {
@@ -383,17 +386,7 @@ sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header,
return (m);
}
if (allonebuf) {
- int siz;
-
- if (SCTP_BUF_IS_EXTENDED(m)) {
- siz = SCTP_BUF_EXTEND_SIZE(m);
- } else {
- if (want_header)
- siz = MHLEN;
- else
- siz = MLEN;
- }
- if (siz < space_needed) {
+ if (SCTP_BUF_SIZE(m) < space_needed) {
m_freem(m);
return (NULL);
}
@@ -404,9 +397,7 @@ sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header,
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- if (SCTP_BUF_IS_EXTENDED(m)) {
- sctp_log_mb(m, SCTP_MBUF_IALLOC);
- }
+ sctp_log_mb(m, SCTP_MBUF_IALLOC);
}
#endif
return (m);
diff --git a/freebsd/sys/netinet/sctp_cc_functions.c b/freebsd/sys/netinet/sctp_cc_functions.c
index 9758e011..68dc460a 100644
--- a/freebsd/sys/netinet/sctp_cc_functions.c
+++ b/freebsd/sys/netinet/sctp_cc_functions.c
@@ -55,6 +55,19 @@ __FBSDID("$FreeBSD$");
#define SHIFT_MPTCP_MULTI 8
static void
+sctp_enforce_cwnd_limit(struct sctp_association *assoc, struct sctp_nets *net)
+{
+ if ((assoc->max_cwnd > 0) &&
+ (net->cwnd > assoc->max_cwnd) &&
+ (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
+ net->cwnd = assoc->max_cwnd;
+ if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
+ net->cwnd = net->mtu - sizeof(struct sctphdr);
+ }
+ }
+}
+
+static void
sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
{
struct sctp_association *assoc;
@@ -82,8 +95,9 @@ sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
net->cwnd = net->mtu - sizeof(struct sctphdr);
}
}
+ sctp_enforce_cwnd_limit(assoc, net);
net->ssthresh = assoc->peers_rwnd;
- SDT_PROBE(sctp, cwnd, net, init,
+ SDT_PROBE5(sctp, cwnd, net, init,
stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
0, net->cwnd);
if (SCTP_BASE_SYSCTL(sctp_logging_level) &
@@ -180,7 +194,8 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
}
}
net->cwnd = net->ssthresh;
- SDT_PROBE(sctp, cwnd, net, fr,
+ sctp_enforce_cwnd_limit(asoc, net);
+ SDT_PROBE5(sctp, cwnd, net, fr,
stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
old_cwnd, net->cwnd);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
@@ -213,7 +228,8 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
}
sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
- stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_1);
sctp_timer_start(SCTP_TIMER_TYPE_SEND,
stcb->sctp_ep, stcb, net);
}
@@ -228,7 +244,7 @@ sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
}
/* Defines for instantaneous bw decisions */
-#define SCTP_INST_LOOSING 1 /* Loosing to other flows */
+#define SCTP_INST_LOOSING 1 /* Losing to other flows */
#define SCTP_INST_NEUTRAL 2 /* Neutral, no indication */
#define SCTP_INST_GAINING 3 /* Gaining, step down possible */
@@ -247,7 +263,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
*/
/* Probe point 5 */
probepoint |= ((5 << 16) | 1);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -268,7 +284,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -292,7 +308,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
*/
/* Probe point 6 */
probepoint |= ((6 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -304,7 +320,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -335,7 +351,7 @@ cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
*/
/* Probe point 7 */
probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -384,7 +400,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
/* We caused it maybe.. back off? */
/* PROBE POINT 1 */
probepoint |= ((1 << 16) | 1);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -402,7 +418,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
}
/* Probe point 2 */
probepoint |= ((2 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -415,7 +431,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -428,6 +444,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
if ((net->cc_mod.rtcc.vol_reduce) &&
(inst_ind != SCTP_INST_GAINING)) {
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
net->cc_mod.rtcc.vol_reduce--;
}
net->cc_mod.rtcc.last_step_state = 2;
@@ -438,7 +455,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
/* bw & rtt decreased */
/* Probe point 3 */
probepoint |= ((3 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -450,7 +467,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -459,6 +476,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
if ((net->cc_mod.rtcc.vol_reduce) &&
(inst_ind != SCTP_INST_GAINING)) {
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
net->cc_mod.rtcc.vol_reduce--;
}
net->cc_mod.rtcc.last_step_state = 3;
@@ -469,7 +487,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
/* The bw decreased but rtt stayed the same */
/* Probe point 4 */
probepoint |= ((4 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -481,7 +499,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -490,6 +508,7 @@ cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
if ((net->cc_mod.rtcc.vol_reduce) &&
(inst_ind != SCTP_INST_GAINING)) {
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
net->cc_mod.rtcc.vol_reduce--;
}
net->cc_mod.rtcc.last_step_state = 4;
@@ -518,7 +537,7 @@ cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
*/
/* PROBE POINT 0 */
probepoint = (((uint64_t) net->cwnd) << 32);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -530,7 +549,7 @@ cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -546,7 +565,7 @@ cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint6
return (0);
}
-/* RTCC Algoritm to limit growth of cwnd, return
+/* RTCC Algorithm to limit growth of cwnd, return
* true if you want to NOT allow cwnd growth
*/
static int
@@ -630,7 +649,7 @@ cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw)
/* Can't determine do not change */
probepoint |= ((0xd << 16) | inst_ind);
}
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((nbw << 32) | inst_bw),
((net->cc_mod.rtcc.lbw_rtt << 32) | rtt),
@@ -790,7 +809,7 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
(((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
(stcb->rport);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
nbw,
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -884,11 +903,12 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
break;
}
net->cwnd += incr;
+ sctp_enforce_cwnd_limit(asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, incr,
SCTP_CWND_LOG_FROM_SS);
}
- SDT_PROBE(sctp, cwnd, net, ack,
+ SDT_PROBE5(sctp, cwnd, net, ack,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -950,7 +970,8 @@ sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
break;
}
net->cwnd += incr;
- SDT_PROBE(sctp, cwnd, net, ack,
+ sctp_enforce_cwnd_limit(asoc, net);
+ SDT_PROBE5(sctp, cwnd, net, ack,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -982,7 +1003,7 @@ sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net)
old_cwnd = net->cwnd;
net->cwnd = net->mtu;
- SDT_PROBE(sctp, cwnd, net, ack,
+ SDT_PROBE5(sctp, cwnd, net, ack,
stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
old_cwnd, net->cwnd);
SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
@@ -1053,7 +1074,7 @@ sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
}
net->cwnd = net->mtu;
net->partial_bytes_acked = 0;
- SDT_PROBE(sctp, cwnd, net, to,
+ SDT_PROBE5(sctp, cwnd, net, to,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1091,7 +1112,7 @@ sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *
} else {
/*
* Further tuning down required over the drastic
- * orginal cut
+ * original cut
*/
net->ssthresh -= (net->mtu * num_pkt_lost);
net->cwnd -= (net->mtu * num_pkt_lost);
@@ -1113,7 +1134,7 @@ sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *
net->RTO <<= 1;
}
net->cwnd = net->ssthresh;
- SDT_PROBE(sctp, cwnd, net, ecn,
+ SDT_PROBE5(sctp, cwnd, net, ecn,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1132,12 +1153,9 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
uint32_t * bottle_bw, uint32_t * on_queue)
{
uint32_t bw_avail;
- int rtt;
unsigned int incr;
int old_cwnd = net->cwnd;
- /* need real RTT in msd for this calc */
- rtt = net->rtt / 1000;
/* get bottle neck bw */
*bottle_bw = ntohl(cp->bottle_bw);
/* and whats on queue */
@@ -1146,10 +1164,11 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
* adjust the on-queue if our flight is more it could be that the
* router has not yet gotten data "in-flight" to it
*/
- if (*on_queue < net->flight_size)
+ if (*on_queue < net->flight_size) {
*on_queue = net->flight_size;
- /* calculate the available space */
- bw_avail = (*bottle_bw * rtt) / 1000;
+ }
+ /* rtt is measured in micro seconds, bottle_bw in bytes per second */
+ bw_avail = (uint32_t) (((uint64_t) (*bottle_bw) * net->rtt) / (uint64_t) 1000000);
if (bw_avail > *bottle_bw) {
/*
* Cap the growth to no more than the bottle neck. This can
@@ -1169,7 +1188,6 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
int seg_inflight, seg_onqueue, my_portion;
net->partial_bytes_acked = 0;
-
/* how much are we over queue size? */
incr = *on_queue - bw_avail;
if (stcb->asoc.seen_a_sack_this_pkt) {
@@ -1232,9 +1250,10 @@ sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
/* We always have 1 MTU */
net->cwnd = net->mtu;
}
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
if (net->cwnd - old_cwnd != 0) {
/* log only changes */
- SDT_PROBE(sctp, cwnd, net, pd,
+ SDT_PROBE5(sctp, cwnd, net, pd,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1256,7 +1275,8 @@ sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
net->ssthresh = net->cwnd;
if (burst_limit) {
net->cwnd = (net->flight_size + (burst_limit * net->mtu));
- SDT_PROBE(sctp, cwnd, net, bl,
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
+ SDT_PROBE5(sctp, cwnd, net, bl,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1272,7 +1292,7 @@ sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
struct sctp_association *asoc,
int accum_moved, int reneged_all, int will_exit)
{
- /* Passing a zero argument in last disables the rtcc algoritm */
+ /* Passing a zero argument in last disables the rtcc algorithm */
sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0);
}
@@ -1280,13 +1300,13 @@ static void
sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
int in_window, int num_pkt_lost)
{
- /* Passing a zero argument in last disables the rtcc algoritm */
+ /* Passing a zero argument in last disables the rtcc algorithm */
sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0);
}
/* Here starts the RTCCVAR type CC invented by RRS which
* is a slight mod to RFC2581. We reuse a common routine or
- * two since these algoritms are so close and need to
+ * two since these algorithms are so close and need to
* remain the same.
*/
static void
@@ -1332,7 +1352,7 @@ sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb,
probepoint = (((uint64_t) net->cwnd) << 32);
/* Probe point 8 */
probepoint |= ((8 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | 0),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -1395,7 +1415,7 @@ sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb,
vtag = (net->rtt << 32) |
(((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
(stcb->rport);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
0,
0,
@@ -1492,7 +1512,7 @@ sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb,
struct sctp_association *asoc,
int accum_moved, int reneged_all, int will_exit)
{
- /* Passing a one argument at the last enables the rtcc algoritm */
+ /* Passing a one argument at the last enables the rtcc algorithm */
sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1);
}
@@ -1508,13 +1528,13 @@ sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb SCTP_UNUSED,
struct sctp_hs_raise_drop {
int32_t cwnd;
- int32_t increase;
- int32_t drop_percent;
+ int8_t increase;
+ int8_t drop_percent;
};
#define SCTP_HS_TABLE_SIZE 73
-struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
+static const struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
{38, 1, 50}, /* 0 */
{118, 2, 44}, /* 1 */
{221, 3, 41}, /* 2 */
@@ -1594,6 +1614,7 @@ static void
sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
{
int cur_val, i, indx, incr;
+ int old_cwnd = net->cwnd;
cur_val = net->cwnd >> 10;
indx = SCTP_HS_TABLE_SIZE - 1;
@@ -1602,14 +1623,8 @@ sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
/* normal mode */
if (net->net_ack > net->mtu) {
net->cwnd += net->mtu;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
- sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
- }
} else {
net->cwnd += net->net_ack;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
- sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
- }
}
} else {
for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
@@ -1619,11 +1634,12 @@ sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
}
}
net->last_hs_used = indx;
- incr = ((sctp_cwnd_adjust[indx].increase) << 10);
+ incr = (((int32_t) sctp_cwnd_adjust[indx].increase) << 10);
net->cwnd += incr;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
- sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
- }
+ }
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SS);
}
}
@@ -1644,7 +1660,7 @@ sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
} else {
/* drop by the proper amount */
net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
- sctp_cwnd_adjust[net->last_hs_used].drop_percent);
+ (int32_t) sctp_cwnd_adjust[net->last_hs_used].drop_percent);
net->cwnd = net->ssthresh;
/* now where are we */
indx = net->last_hs_used;
@@ -1662,6 +1678,7 @@ sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
net->last_hs_used = indx;
}
}
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
}
@@ -1718,7 +1735,8 @@ sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
}
sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
- stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_2);
sctp_timer_start(SCTP_TIMER_TYPE_SEND,
stcb->sctp_ep, stcb, net);
}
@@ -1793,9 +1811,7 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
if (net->cwnd <= net->ssthresh) {
/* We are in slow start */
if (net->flight_size + net->net_ack >= net->cwnd) {
-
sctp_hs_cwnd_increase(stcb, net);
-
} else {
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
sctp_log_cwnd(stcb, net, net->net_ack,
@@ -1809,6 +1825,7 @@ sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
(net->partial_bytes_acked >= net->cwnd)) {
net->partial_bytes_acked -= net->cwnd;
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, net->mtu,
SCTP_CWND_LOG_FROM_CA);
@@ -2047,6 +2064,7 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
SCTP_CWND_LOG_FROM_SS);
}
}
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
} else {
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
sctp_log_cwnd(stcb, net, net->net_ack,
@@ -2068,6 +2086,7 @@ htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
*/
net->cwnd += net->mtu;
net->partial_bytes_acked = 0;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
htcp_alpha_update(&net->cc_mod.htcp_ca);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, net->mtu,
@@ -2114,6 +2133,7 @@ sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
*/
net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
net->ssthresh = stcb->asoc.peers_rwnd;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
htcp_init(net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
@@ -2217,6 +2237,7 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
htcp_reset(&net->cc_mod.htcp_ca);
net->ssthresh = htcp_recalc_ssthresh(net);
net->cwnd = net->ssthresh;
+ sctp_enforce_cwnd_limit(asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
SCTP_CWND_LOG_FROM_FR);
@@ -2247,7 +2268,8 @@ sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
}
sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
- stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_3);
sctp_timer_start(SCTP_TIMER_TYPE_SEND,
stcb->sctp_ep, stcb, net);
}
@@ -2296,13 +2318,14 @@ sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
net->RTO <<= 1;
}
net->cwnd = net->ssthresh;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
}
}
}
-struct sctp_cc_functions sctp_cc_functions[] = {
+const struct sctp_cc_functions sctp_cc_functions[] = {
{
.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
.sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack,
diff --git a/freebsd/sys/netinet/sctp_constants.h b/freebsd/sys/netinet/sctp_constants.h
index 0ede04ca..ecde4fee 100644
--- a/freebsd/sys/netinet/sctp_constants.h
+++ b/freebsd/sys/netinet/sctp_constants.h
@@ -66,6 +66,10 @@ __FBSDID("$FreeBSD$");
*/
#define SCTP_LARGEST_INIT_ACCEPTED (65535 - 2048)
+/* Largest length of a chunk */
+#define SCTP_MAX_CHUNK_LENGTH 0xffff
+/* Largest length of an error cause */
+#define SCTP_MAX_CAUSE_LENGTH 0xffff
/* Number of addresses where we just skip the counting */
#define SCTP_COUNT_LIMIT 40
@@ -267,20 +271,11 @@ __FBSDID("$FreeBSD$");
/* how many addresses per assoc remote and local */
#define SCTP_SCALE_FOR_ADDR 2
-/* default AUTO_ASCONF mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_AUTO_ASCONF 1
-
/* default MULTIPLE_ASCONF mode enable(1)/disable(0) value (sysctl) */
#define SCTP_DEFAULT_MULTIPLE_ASCONFS 0
-/* default MOBILITY_BASE mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_MOBILITY_BASE 0
-
-/* default MOBILITY_FASTHANDOFF mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF 0
-
/*
- * Theshold for rwnd updates, we have to read (sb_hiwat >>
+ * Threshold for rwnd updates, we have to read (sb_hiwat >>
* SCTP_RWND_HIWAT_SHIFT) before we will look to see if we need to send a
* window update sack. When we look, we compare the last rwnd we sent vs the
* current rwnd. It too must be greater than this value. Using 3 divdes the
@@ -350,6 +345,7 @@ __FBSDID("$FreeBSD$");
#define SCTP_RTT_FROM_NON_DATA 0
#define SCTP_RTT_FROM_DATA 1
+#define PR_SCTP_UNORDERED_FLAG 0x0001
/* IP hdr (20/40) + 12+2+2 (enet) + sctp common 12 */
#define SCTP_FIRST_MBUF_RESV 68
@@ -391,8 +387,8 @@ __FBSDID("$FreeBSD$");
/* align to 32-bit sizes */
#define SCTP_SIZE32(x) ((((x) + 3) >> 2) << 2)
-#define IS_SCTP_CONTROL(a) ((a)->chunk_type != SCTP_DATA)
-#define IS_SCTP_DATA(a) ((a)->chunk_type == SCTP_DATA)
+#define IS_SCTP_CONTROL(a) (((a)->chunk_type != SCTP_DATA) && ((a)->chunk_type != SCTP_IDATA))
+#define IS_SCTP_DATA(a) (((a)->chunk_type == SCTP_DATA) || ((a)->chunk_type == SCTP_IDATA))
/* SCTP parameter types */
@@ -467,7 +463,7 @@ __FBSDID("$FreeBSD$");
/*
- * SCTP states for internal state machine XXX (should match "user" values)
+ * SCTP states for internal state machine
*/
#define SCTP_STATE_EMPTY 0x0000
#define SCTP_STATE_INUSE 0x0001
@@ -518,7 +514,7 @@ __FBSDID("$FreeBSD$");
/* Maximum the mapping array will grow to (TSN mapping array) */
#define SCTP_MAPPING_ARRAY 512
-/* size of the inital malloc on the mapping array */
+/* size of the initial malloc on the mapping array */
#define SCTP_INITIAL_MAPPING_ARRAY 16
/* how much we grow the mapping array each call */
#define SCTP_MAPPING_ARRAY_INCR 32
@@ -621,10 +617,6 @@ __FBSDID("$FreeBSD$");
/* 30 seconds + RTO (in ms) */
#define SCTP_HB_DEFAULT_MSEC 30000
-/* Max time I will wait for Shutdown to complete */
-#define SCTP_DEF_MAX_SHUTDOWN_SEC 180
-
-
/*
* This is how long a secret lives, NOT how long a cookie lives how many
* ticks the current secret will live.
@@ -647,7 +639,7 @@ __FBSDID("$FreeBSD$");
#define SCTP_DEF_PMTU_RAISE_SEC 600 /* 10 min between raise attempts */
-/* How many streams I request initally by default */
+/* How many streams I request initially by default */
#define SCTP_OSTREAM_INITIAL 10
#define SCTP_ISTREAM_INITIAL 2048
@@ -774,18 +766,19 @@ __FBSDID("$FreeBSD$");
*/
/* File defines */
-#define SCTP_FROM_SCTP_INPUT 0x10000000
-#define SCTP_FROM_SCTP_PCB 0x20000000
-#define SCTP_FROM_SCTP_INDATA 0x30000000
-#define SCTP_FROM_SCTP_TIMER 0x40000000
-#define SCTP_FROM_SCTP_USRREQ 0x50000000
-#define SCTP_FROM_SCTPUTIL 0x60000000
-#define SCTP_FROM_SCTP6_USRREQ 0x70000000
-#define SCTP_FROM_SCTP_ASCONF 0x80000000
-#define SCTP_FROM_SCTP_OUTPUT 0x90000000
-#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
-#define SCTP_FROM_SCTP_PANDA 0xb0000000
-#define SCTP_FROM_SCTP_SYSCTL 0xc0000000
+#define SCTP_FROM_SCTP_INPUT 0x10000000
+#define SCTP_FROM_SCTP_PCB 0x20000000
+#define SCTP_FROM_SCTP_INDATA 0x30000000
+#define SCTP_FROM_SCTP_TIMER 0x40000000
+#define SCTP_FROM_SCTP_USRREQ 0x50000000
+#define SCTP_FROM_SCTPUTIL 0x60000000
+#define SCTP_FROM_SCTP6_USRREQ 0x70000000
+#define SCTP_FROM_SCTP_ASCONF 0x80000000
+#define SCTP_FROM_SCTP_OUTPUT 0x90000000
+#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
+#define SCTP_FROM_SCTP_PANDA 0xb0000000
+#define SCTP_FROM_SCTP_SYSCTL 0xc0000000
+#define SCTP_FROM_SCTP_CC_FUNCTIONS 0xd0000000
/* Location ID's */
#define SCTP_LOC_1 0x00000001
@@ -821,6 +814,8 @@ __FBSDID("$FreeBSD$");
#define SCTP_LOC_31 0x0000001f
#define SCTP_LOC_32 0x00000020
#define SCTP_LOC_33 0x00000021
+#define SCTP_LOC_34 0x00000022
+#define SCTP_LOC_35 0x00000023
/* Free assoc codes */
@@ -892,12 +887,19 @@ __FBSDID("$FreeBSD$");
/* modular comparison */
/* See RFC 1982 for details. */
-#define SCTP_SSN_GT(a, b) (((a < b) && ((uint16_t)(b - a) > (1U<<15))) || \
- ((a > b) && ((uint16_t)(a - b) < (1U<<15))))
-#define SCTP_SSN_GE(a, b) (SCTP_SSN_GT(a, b) || (a == b))
-#define SCTP_TSN_GT(a, b) (((a < b) && ((uint32_t)(b - a) > (1U<<31))) || \
- ((a > b) && ((uint32_t)(a - b) < (1U<<31))))
-#define SCTP_TSN_GE(a, b) (SCTP_TSN_GT(a, b) || (a == b))
+#define SCTP_UINT16_GT(a, b) (((a < b) && ((uint16_t)(b - a) > (1U<<15))) || \
+ ((a > b) && ((uint16_t)(a - b) < (1U<<15))))
+#define SCTP_UINT16_GE(a, b) (SCTP_UINT16_GT(a, b) || (a == b))
+#define SCTP_UINT32_GT(a, b) (((a < b) && ((uint32_t)(b - a) > (1U<<31))) || \
+ ((a > b) && ((uint32_t)(a - b) < (1U<<31))))
+#define SCTP_UINT32_GE(a, b) (SCTP_UINT32_GT(a, b) || (a == b))
+
+#define SCTP_SSN_GT(a, b) SCTP_UINT16_GT(a, b)
+#define SCTP_SSN_GE(a, b) SCTP_UINT16_GE(a, b)
+#define SCTP_TSN_GT(a, b) SCTP_UINT32_GT(a, b)
+#define SCTP_TSN_GE(a, b) SCTP_UINT32_GE(a, b)
+#define SCTP_MSGID_GT(o, a, b) ((o == 1) ? SCTP_UINT16_GT((uint16_t)a, (uint16_t)b) : SCTP_UINT32_GT(a, b))
+#define SCTP_MSGID_GE(o, a, b) ((o == 1) ? SCTP_UINT16_GE((uint16_t)a, (uint16_t)b) : SCTP_UINT32_GE(a, b))
/* Mapping array manipulation routines */
#define SCTP_IS_TSN_PRESENT(arry, gap) ((arry[(gap >> 3)] >> (gap & 0x07)) & 0x01)
@@ -920,7 +922,7 @@ __FBSDID("$FreeBSD$");
* element. Each entry will take 2 4 byte ints (and of course the overhead
* of the next pointer as well). Using 15 as an example will yield * ((8 *
* 15) + 8) or 128 bytes of overhead for each timewait block that gets
- * initialized. Increasing it to 31 would yeild 256 bytes per block.
+ * initialized. Increasing it to 31 would yield 256 bytes per block.
*/
#define SCTP_NUMBER_IN_VTAG_BLOCK 15
/*
@@ -986,10 +988,7 @@ __FBSDID("$FreeBSD$");
(((uint8_t *)&(a)->s_addr)[1] == 168)))
#define IN4_ISLOOPBACK_ADDRESS(a) \
- ((((uint8_t *)&(a)->s_addr)[0] == 127) && \
- (((uint8_t *)&(a)->s_addr)[1] == 0) && \
- (((uint8_t *)&(a)->s_addr)[2] == 0) && \
- (((uint8_t *)&(a)->s_addr)[3] == 1))
+ (((uint8_t *)&(a)->s_addr)[0] == 127)
#define IN4_ISLINKLOCAL_ADDRESS(a) \
((((uint8_t *)&(a)->s_addr)[0] == 169) && \
diff --git a/freebsd/sys/netinet/sctp_dtrace_declare.h b/freebsd/sys/netinet/sctp_dtrace_declare.h
index f6fe48bd..c5c8f9ce 100644
--- a/freebsd/sys/netinet/sctp_dtrace_declare.h
+++ b/freebsd/sys/netinet/sctp_dtrace_declare.h
@@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$");
#ifndef _NETINET_SCTP_DTRACE_DECLARE_H_
#define _NETINET_SCTP_DTRACE_DECLARE_H_
-#include <rtems/bsd/local/opt_kdtrace.h>
#include <sys/kernel.h>
#include <sys/sdt.h>
diff --git a/freebsd/sys/netinet/sctp_dtrace_define.h b/freebsd/sys/netinet/sctp_dtrace_define.h
index 0bfe18c0..19f44da4 100644
--- a/freebsd/sys/netinet/sctp_dtrace_define.h
+++ b/freebsd/sys/netinet/sctp_dtrace_define.h
@@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$");
#ifndef _NETINET_SCTP_DTRACE_DEFINE_H_
#define _NETINET_SCTP_DTRACE_DEFINE_H_
-#include <rtems/bsd/local/opt_kdtrace.h>
#include <sys/kernel.h>
#include <sys/sdt.h>
@@ -46,131 +45,131 @@ SDT_PROVIDER_DEFINE(sctp);
/********************************************************/
/* Initial */
SDT_PROBE_DEFINE5(sctp, cwnd, net, init,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
/* ACK-INCREASE */
SDT_PROBE_DEFINE5(sctp, cwnd, net, ack,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
/* ACK-INCREASE */
SDT_PROBE_DEFINE5(sctp, cwnd, net, rttvar,
- "uint64_t", /* The Vtag << 32 | localport << 16 | remoteport */
- "uint64_t", /* obw | nbw */
- "uint64_t", /* bwrtt | newrtt */
- "uint64_t", /* flight */
- "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
+ "uint64_t", /* The Vtag << 32 | localport << 16 |
+ * remoteport */
+ "uint64_t", /* obw | nbw */
+ "uint64_t", /* bwrtt | newrtt */
+ "uint64_t", /* flight */
+ "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
SDT_PROBE_DEFINE5(sctp, cwnd, net, rttstep,
- "uint64_t", /* The Vtag << 32 | localport << 16 | remoteport */
- "uint64_t", /* obw | nbw */
- "uint64_t", /* bwrtt | newrtt */
- "uint64_t", /* flight */
- "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
+ "uint64_t", /* The Vtag << 32 | localport << 16 |
+ * remoteport */
+ "uint64_t", /* obw | nbw */
+ "uint64_t", /* bwrtt | newrtt */
+ "uint64_t", /* flight */
+ "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
/* FastRetransmit-DECREASE */
SDT_PROBE_DEFINE5(sctp, cwnd, net, fr,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
/* TimeOut-DECREASE */
SDT_PROBE_DEFINE5(sctp, cwnd, net, to,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
/* BurstLimit-DECREASE */
SDT_PROBE_DEFINE5(sctp, cwnd, net, bl,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
/* ECN-DECREASE */
SDT_PROBE_DEFINE5(sctp, cwnd, net, ecn,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
/* PacketDrop-DECREASE */
SDT_PROBE_DEFINE5(sctp, cwnd, net, pd,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
/********************************************************/
/* Rwnd probe - tracks changes in the receiver window for an assoc */
/********************************************************/
SDT_PROBE_DEFINE4(sctp, rwnd, assoc, val,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "int", /* The up/down amount */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "int", /* The up/down amount */
+ "int"); /* The new value of the cwnd */
/********************************************************/
/* flight probe - tracks changes in the flight size on a net or assoc */
/********************************************************/
SDT_PROBE_DEFINE5(sctp, flightsize, net, val,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "uintptr_t", /* The pointer to the struct sctp_nets * changing */
- "int", /* The up/down amount */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "uintptr_t", /* The pointer to the struct sctp_nets *
+ * changing */
+ "int", /* The up/down amount */
+ "int"); /* The new value of the cwnd */
/********************************************************/
/* The total flight version */
/********************************************************/
SDT_PROBE_DEFINE4(sctp, flightsize, assoc, val,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /*
- * The port number of the local side << 16 | port number
- * of remote in network byte order.
- */
- "int", /* The up/down amount */
- "int"); /* The new value of the cwnd */
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /* The port number of the local side << 16 |
+ * port number of remote in network byte
+ * order. */
+ "int", /* The up/down amount */
+ "int"); /* The new value of the cwnd */
#endif
diff --git a/freebsd/sys/netinet/sctp_header.h b/freebsd/sys/netinet/sctp_header.h
index 8f898a4b..3f4948dd 100644
--- a/freebsd/sys/netinet/sctp_header.h
+++ b/freebsd/sys/netinet/sctp_header.h
@@ -82,12 +82,6 @@ struct sctp_supported_addr_param {
uint16_t addr_type[2]; /* array of supported address types */
} SCTP_PACKED;
-/* ECN parameter */
-struct sctp_ecn_supported_param {
- struct sctp_paramhdr ph;/* type=SCTP_ECN_CAPABLE */
-} SCTP_PACKED;
-
-
/* heartbeat info parameter */
struct sctp_heartbeat_info_param {
struct sctp_paramhdr ph;
@@ -158,6 +152,23 @@ struct sctp_data_chunk {
struct sctp_data dp;
} SCTP_PACKED;
+struct sctp_idata {
+ uint32_t tsn;
+ uint16_t stream_id;
+ uint16_t reserved; /* Where does the SSN go? */
+ uint32_t msg_id;
+ union {
+ uint32_t protocol_id;
+ uint32_t fsn; /* Fragment Sequence Number */
+ } ppid_fsn;
+ /* user data follows */
+} SCTP_PACKED;
+
+struct sctp_idata_chunk {
+ struct sctp_chunkhdr ch;
+ struct sctp_idata dp;
+} SCTP_PACKED;
+
/*
* Structures for the control chunks
*/
@@ -208,34 +219,6 @@ struct sctp_state_cookie { /* this is our definition... */
*/
} SCTP_PACKED;
-
-/* Used for NAT state error cause */
-struct sctp_missing_nat_state {
- uint16_t cause;
- uint16_t length;
- uint8_t data[];
-} SCTP_PACKED;
-
-
-struct sctp_inv_mandatory_param {
- uint16_t cause;
- uint16_t length;
- uint32_t num_param;
- uint16_t param;
- /*
- * We include this to 0 it since only a missing cookie will cause
- * this error.
- */
- uint16_t resv;
-} SCTP_PACKED;
-
-struct sctp_unresolv_addr {
- uint16_t cause;
- uint16_t length;
- uint16_t addr_type;
- uint16_t reserved; /* Only one invalid addr type */
-} SCTP_PACKED;
-
/* state cookie parameter */
struct sctp_state_cookie_param {
struct sctp_paramhdr ph;
@@ -376,28 +359,11 @@ struct sctp_shutdown_complete_chunk {
struct sctp_chunkhdr ch;
} SCTP_PACKED;
-/* Oper error holding a stale cookie */
-struct sctp_stale_cookie_msg {
- struct sctp_paramhdr ph;/* really an error cause */
- uint32_t time_usec;
-} SCTP_PACKED;
-
struct sctp_adaptation_layer_indication {
struct sctp_paramhdr ph;
uint32_t indication;
} SCTP_PACKED;
-struct sctp_cookie_while_shutting_down {
- struct sctphdr sh;
- struct sctp_chunkhdr ch;
- struct sctp_paramhdr ph;/* really an error cause */
-} SCTP_PACKED;
-
-struct sctp_shutdown_complete_msg {
- struct sctphdr sh;
- struct sctp_shutdown_complete_chunk shut_cmp;
-} SCTP_PACKED;
-
/*
* draft-ietf-tsvwg-addip-sctp
*/
@@ -429,6 +395,12 @@ struct sctp_strseq {
uint16_t sequence;
} SCTP_PACKED;
+struct sctp_strseq_mid {
+ uint16_t stream;
+ uint16_t flags;
+ uint32_t msg_id;
+};
+
struct sctp_forward_tsn_msg {
struct sctphdr sh;
struct sctp_forward_tsn_chunk msg;
@@ -456,6 +428,11 @@ struct sctp_pktdrop_chunk {
/**********STREAM RESET STUFF ******************/
+struct sctp_stream_reset_request {
+ struct sctp_paramhdr ph;
+ uint32_t request_seq;
+} SCTP_PACKED;
+
struct sctp_stream_reset_out_request {
struct sctp_paramhdr ph;
uint32_t request_seq; /* monotonically increasing seq no */
@@ -470,7 +447,6 @@ struct sctp_stream_reset_in_request {
uint16_t list_of_streams[]; /* if not all list of streams */
} SCTP_PACKED;
-
struct sctp_stream_reset_tsn_request {
struct sctp_paramhdr ph;
uint32_t request_seq;
@@ -556,12 +532,6 @@ struct sctp_auth_chunk {
uint8_t hmac[];
} SCTP_PACKED;
-struct sctp_auth_invalid_hmac {
- struct sctp_paramhdr ph;
- uint16_t hmac_id;
- uint16_t padding;
-} SCTP_PACKED;
-
/*
* we pre-reserve enough room for a ECNE or CWR AND a SACK with no missing
* pieces. If ENCE is missing we could have a couple of blocks. This way we
diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c
index 07d8fd2b..12c2c80f 100644
--- a/freebsd/sys/netinet/sctp_indata.c
+++ b/freebsd/sys/netinet/sctp_indata.c
@@ -36,18 +36,22 @@
__FBSDID("$FreeBSD$");
#include <netinet/sctp_os.h>
+#include <sys/proc.h>
#include <netinet/sctp_var.h>
#include <netinet/sctp_sysctl.h>
-#include <netinet/sctp_pcb.h>
#include <netinet/sctp_header.h>
+#include <netinet/sctp_pcb.h>
#include <netinet/sctputil.h>
#include <netinet/sctp_output.h>
-#include <netinet/sctp_input.h>
-#include <netinet/sctp_indata.h>
#include <netinet/sctp_uio.h>
+#include <netinet/sctp_auth.h>
#include <netinet/sctp_timer.h>
-
-
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp_crc32.h>
+#include <netinet/sctp_lock_bsd.h>
/*
* NOTES: On the outbound side of things I need to check the sack timer to
* see if I should generate a sack into the chunk queue (if I have data to
@@ -57,6 +61,13 @@ __FBSDID("$FreeBSD$");
* This will cause sctp_service_queues() to get called on the top entry in
* the list.
*/
+static void
+sctp_add_chk_to_control(struct sctp_queued_to_read *control,
+ struct sctp_stream_in *strm,
+ struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_tmit_chunk *chk, int lock_held);
+
void
sctp_set_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
@@ -76,9 +87,9 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
* sctp_soreceive then we will fix this so that ONLY this
* associations data is taken into account.
*/
- if (stcb->sctp_socket == NULL)
+ if (stcb->sctp_socket == NULL) {
return (calc);
-
+ }
if (stcb->asoc.sb_cc == 0 &&
asoc->size_on_reasm_queue == 0 &&
asoc->size_on_all_streams == 0) {
@@ -88,7 +99,6 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
}
/* get actual space */
calc = (uint32_t) sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv);
-
/*
* take out what has NOT been put on socket queue and we yet hold
* for putting up.
@@ -97,7 +107,6 @@ sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
asoc->cnt_on_reasm_queue * MSIZE));
calc = sctp_sbspace_sub(calc, (uint32_t) (asoc->size_on_all_streams +
asoc->cnt_on_all_streams * MSIZE));
-
if (calc == 0) {
/* out of space */
return (calc);
@@ -124,7 +133,7 @@ sctp_build_readq_entry(struct sctp_tcb *stcb,
struct sctp_nets *net,
uint32_t tsn, uint32_t ppid,
uint32_t context, uint16_t stream_no,
- uint16_t stream_seq, uint8_t flags,
+ uint32_t stream_seq, uint8_t flags,
struct mbuf *dm)
{
struct sctp_queued_to_read *read_queue_e = NULL;
@@ -133,73 +142,26 @@ sctp_build_readq_entry(struct sctp_tcb *stcb,
if (read_queue_e == NULL) {
goto failed_build;
}
+ memset(read_queue_e, 0, sizeof(struct sctp_queued_to_read));
read_queue_e->sinfo_stream = stream_no;
read_queue_e->sinfo_ssn = stream_seq;
read_queue_e->sinfo_flags = (flags << 8);
read_queue_e->sinfo_ppid = ppid;
read_queue_e->sinfo_context = context;
- read_queue_e->sinfo_timetolive = 0;
read_queue_e->sinfo_tsn = tsn;
read_queue_e->sinfo_cumtsn = tsn;
read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
+ read_queue_e->top_fsn = read_queue_e->fsn_included = 0xffffffff;
+ TAILQ_INIT(&read_queue_e->reasm);
read_queue_e->whoFrom = net;
- read_queue_e->length = 0;
atomic_add_int(&net->ref_count, 1);
read_queue_e->data = dm;
- read_queue_e->spec_flags = 0;
- read_queue_e->tail_mbuf = NULL;
- read_queue_e->aux_data = NULL;
read_queue_e->stcb = stcb;
read_queue_e->port_from = stcb->rport;
- read_queue_e->do_not_ref_stcb = 0;
- read_queue_e->end_added = 0;
- read_queue_e->some_taken = 0;
- read_queue_e->pdapi_aborted = 0;
failed_build:
return (read_queue_e);
}
-
-/*
- * Build out our readq entry based on the incoming packet.
- */
-static struct sctp_queued_to_read *
-sctp_build_readq_entry_chk(struct sctp_tcb *stcb,
- struct sctp_tmit_chunk *chk)
-{
- struct sctp_queued_to_read *read_queue_e = NULL;
-
- sctp_alloc_a_readq(stcb, read_queue_e);
- if (read_queue_e == NULL) {
- goto failed_build;
- }
- read_queue_e->sinfo_stream = chk->rec.data.stream_number;
- read_queue_e->sinfo_ssn = chk->rec.data.stream_seq;
- read_queue_e->sinfo_flags = (chk->rec.data.rcv_flags << 8);
- read_queue_e->sinfo_ppid = chk->rec.data.payloadtype;
- read_queue_e->sinfo_context = stcb->asoc.context;
- read_queue_e->sinfo_timetolive = 0;
- read_queue_e->sinfo_tsn = chk->rec.data.TSN_seq;
- read_queue_e->sinfo_cumtsn = chk->rec.data.TSN_seq;
- read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
- read_queue_e->whoFrom = chk->whoTo;
- read_queue_e->aux_data = NULL;
- read_queue_e->length = 0;
- atomic_add_int(&chk->whoTo->ref_count, 1);
- read_queue_e->data = chk->data;
- read_queue_e->tail_mbuf = NULL;
- read_queue_e->stcb = stcb;
- read_queue_e->port_from = stcb->rport;
- read_queue_e->spec_flags = 0;
- read_queue_e->do_not_ref_stcb = 0;
- read_queue_e->end_added = 0;
- read_queue_e->some_taken = 0;
- read_queue_e->pdapi_aborted = 0;
-failed_build:
- return (read_queue_e);
-}
-
-
struct mbuf *
sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo)
{
@@ -225,9 +187,9 @@ sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo)
}
seinfo = (struct sctp_extrcvinfo *)sinfo;
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO) &&
- (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) {
+ (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) {
provide_nxt = 1;
- len += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
+ len += CMSG_SPACE(sizeof(struct sctp_nxtinfo));
} else {
provide_nxt = 0;
}
@@ -243,7 +205,7 @@ sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo)
use_extended = 0;
}
- ret = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+ ret = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
if (ret == NULL) {
/* No space */
return (ret);
@@ -278,20 +240,20 @@ sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo)
cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_nxtinfo));
cmh->cmsg_type = SCTP_NXTINFO;
nxtinfo = (struct sctp_nxtinfo *)CMSG_DATA(cmh);
- nxtinfo->nxt_sid = seinfo->sreinfo_next_stream;
+ nxtinfo->nxt_sid = seinfo->serinfo_next_stream;
nxtinfo->nxt_flags = 0;
- if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) {
+ if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) {
nxtinfo->nxt_flags |= SCTP_UNORDERED;
}
- if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) {
+ if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) {
nxtinfo->nxt_flags |= SCTP_NOTIFICATION;
}
- if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) {
+ if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) {
nxtinfo->nxt_flags |= SCTP_COMPLETE;
}
- nxtinfo->nxt_ppid = seinfo->sreinfo_next_ppid;
- nxtinfo->nxt_length = seinfo->sreinfo_next_length;
- nxtinfo->nxt_assoc_id = seinfo->sreinfo_next_aid;
+ nxtinfo->nxt_ppid = seinfo->serinfo_next_ppid;
+ nxtinfo->nxt_length = seinfo->serinfo_next_length;
+ nxtinfo->nxt_assoc_id = seinfo->serinfo_next_aid;
cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_nxtinfo)));
SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_nxtinfo));
}
@@ -319,6 +281,7 @@ sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn)
{
uint32_t gap, i, cumackp1;
int fnd = 0;
+ int in_r = 0, in_nr = 0;
if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) {
return;
@@ -332,15 +295,20 @@ sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn)
return;
}
SCTP_CALC_TSN_TO_GAP(gap, tsn, asoc->mapping_array_base_tsn);
- if (!SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+ in_r = SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap);
+ in_nr = SCTP_IS_TSN_PRESENT(asoc->nr_mapping_array, gap);
+ if ((in_r == 0) && (in_nr == 0)) {
+#ifdef INVARIANTS
+ panic("Things are really messed up now");
+#else
SCTP_PRINTF("gap:%x tsn:%x\n", gap, tsn);
sctp_print_mapping_array(asoc);
-#ifdef INVARIANTS
- panic("Things are really messed up now!!");
#endif
}
- SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
- SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+ if (in_nr == 0)
+ SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+ if (in_r)
+ SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
asoc->highest_tsn_inside_nr_map = tsn;
}
@@ -360,197 +328,162 @@ sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn)
}
}
-
-/*
- * We are delivering currently from the reassembly queue. We must continue to
- * deliver until we either: 1) run out of space. 2) run out of sequential
- * TSN's 3) hit the SCTP_DATA_LAST_FRAG flag.
- */
-static void
-sctp_service_reassembly(struct sctp_tcb *stcb, struct sctp_association *asoc)
+static int
+sctp_place_control_in_stream(struct sctp_stream_in *strm,
+ struct sctp_association *asoc,
+ struct sctp_queued_to_read *control)
{
- struct sctp_tmit_chunk *chk, *nchk;
- uint16_t nxt_todel;
- uint16_t stream_no;
- int end = 0;
- int cntDel;
- struct sctp_queued_to_read *control, *ctl, *nctl;
-
- if (stcb == NULL)
- return;
-
- cntDel = stream_no = 0;
- if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
- (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
- (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
- /* socket above is long gone or going.. */
-abandon:
- asoc->fragmented_delivery_inprogress = 0;
- TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) {
- TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
- asoc->size_on_reasm_queue -= chk->send_size;
- sctp_ucount_decr(asoc->cnt_on_reasm_queue);
- /*
- * Lose the data pointer, since its in the socket
- * buffer
- */
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
+ struct sctp_queued_to_read *at;
+ struct sctp_readhead *q;
+ uint8_t bits, unordered;
+
+ bits = (control->sinfo_flags >> 8);
+ unordered = bits & SCTP_DATA_UNORDERED;
+ if (unordered) {
+ q = &strm->uno_inqueue;
+ if (asoc->idata_supported == 0) {
+ if (!TAILQ_EMPTY(q)) {
+ /*
+ * Only one stream can be here in old style
+ * -- abort
+ */
+ return (-1);
}
- /* Now free the address and data */
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- /* sa_ignore FREED_MEMORY */
+ TAILQ_INSERT_TAIL(q, control, next_instrm);
+ control->on_strm_q = SCTP_ON_UNORDERED;
+ return (0);
}
- return;
+ } else {
+ q = &strm->inqueue;
}
- SCTP_TCB_LOCK_ASSERT(stcb);
- TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) {
- if (chk->rec.data.TSN_seq != (asoc->tsn_last_delivered + 1)) {
- /* Can't deliver more :< */
- return;
- }
- stream_no = chk->rec.data.stream_number;
- nxt_todel = asoc->strmin[stream_no].last_sequence_delivered + 1;
- if (nxt_todel != chk->rec.data.stream_seq &&
- (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
- /*
- * Not the next sequence to deliver in its stream OR
- * unordered
- */
- return;
- }
- if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
-
- control = sctp_build_readq_entry_chk(stcb, chk);
- if (control == NULL) {
- /* out of memory? */
- return;
- }
- /* save it off for our future deliveries */
- stcb->asoc.control_pdapi = control;
- if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
- end = 1;
- else
- end = 0;
- sctp_mark_non_revokable(asoc, chk->rec.data.TSN_seq);
- sctp_add_to_readq(stcb->sctp_ep,
- stcb, control, &stcb->sctp_socket->so_rcv, end,
- SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
- cntDel++;
+ if ((bits & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) {
+ control->end_added = control->last_frag_seen = control->first_frag_seen = 1;
+ }
+ if (TAILQ_EMPTY(q)) {
+ /* Empty queue */
+ TAILQ_INSERT_HEAD(q, control, next_instrm);
+ if (unordered) {
+ control->on_strm_q = SCTP_ON_UNORDERED;
} else {
- if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
- end = 1;
- else
- end = 0;
- sctp_mark_non_revokable(asoc, chk->rec.data.TSN_seq);
- if (sctp_append_to_readq(stcb->sctp_ep, stcb,
- stcb->asoc.control_pdapi,
- chk->data, end, chk->rec.data.TSN_seq,
- &stcb->sctp_socket->so_rcv)) {
+ control->on_strm_q = SCTP_ON_ORDERED;
+ }
+ return (0);
+ } else {
+ TAILQ_FOREACH(at, q, next_instrm) {
+ if (SCTP_TSN_GT(at->msg_id, control->msg_id)) {
/*
- * something is very wrong, either
- * control_pdapi is NULL, or the tail_mbuf
- * is corrupt, or there is a EOM already on
- * the mbuf chain.
+ * one in queue is bigger than the new one,
+ * insert before this one
*/
- if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
- goto abandon;
+ TAILQ_INSERT_BEFORE(at, control, next_instrm);
+ if (unordered) {
+ control->on_strm_q = SCTP_ON_UNORDERED;
} else {
-#ifdef INVARIANTS
- if ((stcb->asoc.control_pdapi == NULL) || (stcb->asoc.control_pdapi->tail_mbuf == NULL)) {
- panic("This should not happen control_pdapi NULL?");
+ control->on_strm_q = SCTP_ON_ORDERED;
+ }
+ break;
+ } else if (at->msg_id == control->msg_id) {
+ /*
+ * Gak, He sent me a duplicate msg id
+ * number?? return -1 to abort.
+ */
+ return (-1);
+ } else {
+ if (TAILQ_NEXT(at, next_instrm) == NULL) {
+ /*
+ * We are at the end, insert it
+ * after this one
+ */
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del(control, at,
+ SCTP_STR_LOG_FROM_INSERT_TL);
}
- /* if we did not panic, it was a EOM */
- panic("Bad chunking ??");
-#else
- if ((stcb->asoc.control_pdapi == NULL) || (stcb->asoc.control_pdapi->tail_mbuf == NULL)) {
- SCTP_PRINTF("This should not happen control_pdapi NULL?\n");
+ TAILQ_INSERT_AFTER(q,
+ at, control, next_instrm);
+ if (unordered) {
+ control->on_strm_q = SCTP_ON_UNORDERED;
+ } else {
+ control->on_strm_q = SCTP_ON_ORDERED;
}
- SCTP_PRINTF("Bad chunking ??\n");
- SCTP_PRINTF("Dumping re-assembly queue this will probably hose the association\n");
-
-#endif
- goto abandon;
+ break;
}
}
- cntDel++;
}
- /* pull it we did it */
- TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
- if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
- asoc->fragmented_delivery_inprogress = 0;
- if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
- asoc->strmin[stream_no].last_sequence_delivered++;
- }
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
- SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
- }
- } else if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
- /*
- * turn the flag back on since we just delivered
- * yet another one.
- */
- asoc->fragmented_delivery_inprogress = 1;
- }
- asoc->tsn_of_pdapi_last_delivered = chk->rec.data.TSN_seq;
- asoc->last_flags_delivered = chk->rec.data.rcv_flags;
- asoc->last_strm_seq_delivered = chk->rec.data.stream_seq;
- asoc->last_strm_no_delivered = chk->rec.data.stream_number;
+ }
+ return (0);
+}
- asoc->tsn_last_delivered = chk->rec.data.TSN_seq;
- asoc->size_on_reasm_queue -= chk->send_size;
- sctp_ucount_decr(asoc->cnt_on_reasm_queue);
- /* free up the chk */
- chk->data = NULL;
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
+static void
+sctp_abort_in_reasm(struct sctp_tcb *stcb,
+ struct sctp_queued_to_read *control,
+ struct sctp_tmit_chunk *chk,
+ int *abort_flag, int opspot)
+{
+ char msg[SCTP_DIAG_INFO_LEN];
+ struct mbuf *oper;
+
+ if (stcb->asoc.idata_supported) {
+ snprintf(msg, sizeof(msg),
+ "Reass %x,CF:%x,TSN=%8.8x,SID=%4.4x,FSN=%8.8x,MID:%8.8x",
+ opspot,
+ control->fsn_included,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.fsn_num, chk->rec.data.stream_seq);
+ } else {
+ snprintf(msg, sizeof(msg),
+ "Reass %x,CI:%x,TSN=%8.8x,SID=%4.4x,FSN=%4.4x,SSN:%4.4x",
+ opspot,
+ control->fsn_included,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.fsn_num,
+ (uint16_t) chk->rec.data.stream_seq);
+ }
+ oper = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+}
- if (asoc->fragmented_delivery_inprogress == 0) {
- /*
- * Now lets see if we can deliver the next one on
- * the stream
- */
- struct sctp_stream_in *strm;
+static void
+sctp_clean_up_control(struct sctp_tcb *stcb, struct sctp_queued_to_read *control)
+{
+ /*
+ * The control could not be placed and must be cleaned.
+ */
+ struct sctp_tmit_chunk *chk, *nchk;
- strm = &asoc->strmin[stream_no];
- nxt_todel = strm->last_sequence_delivered + 1;
- TAILQ_FOREACH_SAFE(ctl, &strm->inqueue, next, nctl) {
- /* Deliver more if we can. */
- if (nxt_todel == ctl->sinfo_ssn) {
- TAILQ_REMOVE(&strm->inqueue, ctl, next);
- asoc->size_on_all_streams -= ctl->length;
- sctp_ucount_decr(asoc->cnt_on_all_streams);
- strm->last_sequence_delivered++;
- sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
- sctp_add_to_readq(stcb->sctp_ep, stcb,
- ctl,
- &stcb->sctp_socket->so_rcv, 1,
- SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
- } else {
- break;
- }
- nxt_todel = strm->last_sequence_delivered + 1;
- }
- break;
- }
+ TAILQ_FOREACH_SAFE(chk, &control->reasm, sctp_next, nchk) {
+ TAILQ_REMOVE(&control->reasm, chk, sctp_next);
+ if (chk->data)
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
}
+ sctp_free_a_readq(stcb, control);
}
/*
* Queue the chunk either right into the socket buffer if it is the next one
* to go OR put it in the correct place in the delivery queue. If we do
- * append to the so_buf, keep doing so until we are out of order. One big
- * question still remains, what to do when the socket buffer is FULL??
+ * append to the so_buf, keep doing so until we are out of order as
+ * long as the control's entered are non-fragmented.
*/
static void
-sctp_queue_data_to_stream(struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_queued_to_read *control, int *abort_flag)
+sctp_queue_data_to_stream(struct sctp_tcb *stcb,
+ struct sctp_stream_in *strm,
+ struct sctp_association *asoc,
+ struct sctp_queued_to_read *control, int *abort_flag, int *need_reasm)
{
/*
* FIX-ME maybe? What happens when the ssn wraps? If we are getting
* all the data in one stream this could happen quite rapidly. One
* could use the TSN to keep track of things, but this scheme breaks
- * down in the other type of stream useage that could occur. Send a
+ * down in the other type of stream usage that could occur. Send a
* single msg to stream 0, send 4Billion messages to stream 1, now
* send a message to stream 0. You have a situation where the TSN
* has wrapped but not in the stream. Is this worth worrying about
@@ -564,47 +497,57 @@ sctp_queue_data_to_stream(struct sctp_tcb *stcb, struct sctp_association *asoc,
* SSN alone. Maybe a hybred approach is the answer
*
*/
- struct sctp_stream_in *strm;
struct sctp_queued_to_read *at;
int queue_needed;
- uint16_t nxt_todel;
+ uint32_t nxt_todel;
struct mbuf *op_err;
char msg[SCTP_DIAG_INFO_LEN];
- queue_needed = 1;
- asoc->size_on_all_streams += control->length;
- sctp_ucount_incr(asoc->cnt_on_all_streams);
- strm = &asoc->strmin[control->sinfo_stream];
- nxt_todel = strm->last_sequence_delivered + 1;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INTO_STRD);
}
- SCTPDBG(SCTP_DEBUG_INDATA1,
- "queue to stream called for ssn:%u lastdel:%u nxt:%u\n",
- (uint32_t) control->sinfo_stream,
- (uint32_t) strm->last_sequence_delivered,
- (uint32_t) nxt_todel);
- if (SCTP_SSN_GE(strm->last_sequence_delivered, control->sinfo_ssn)) {
+ if (SCTP_MSGID_GT((!asoc->idata_supported), strm->last_sequence_delivered, control->sinfo_ssn)) {
/* The incoming sseq is behind where we last delivered? */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort association\n",
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ: %u delivered: %u from peer, Abort association\n",
control->sinfo_ssn, strm->last_sequence_delivered);
protocol_error:
/*
* throw it in the stream so it gets cleaned up in
* association destruction
*/
- TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
+ TAILQ_INSERT_HEAD(&strm->inqueue, control, next_instrm);
snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
strm->last_sequence_delivered, control->sinfo_tsn,
control->sinfo_stream, control->sinfo_ssn);
op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
+ if ((SCTP_TSN_GE(asoc->cumulative_tsn, control->sinfo_tsn)) && (asoc->idata_supported == 0)) {
+ goto protocol_error;
+ }
+ queue_needed = 1;
+ asoc->size_on_all_streams += control->length;
+ sctp_ucount_incr(asoc->cnt_on_all_streams);
+ nxt_todel = strm->last_sequence_delivered + 1;
if (nxt_todel == control->sinfo_ssn) {
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
/* can be delivered right away? */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_IMMED_DEL);
@@ -614,19 +557,27 @@ protocol_error:
asoc->size_on_all_streams -= control->length;
sctp_ucount_decr(asoc->cnt_on_all_streams);
strm->last_sequence_delivered++;
-
sctp_mark_non_revokable(asoc, control->sinfo_tsn);
sctp_add_to_readq(stcb->sctp_ep, stcb,
control,
&stcb->sctp_socket->so_rcv, 1,
- SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
- TAILQ_FOREACH_SAFE(control, &strm->inqueue, next, at) {
+ SCTP_READ_LOCK_NOT_HELD, SCTP_SO_LOCKED);
+ TAILQ_FOREACH_SAFE(control, &strm->inqueue, next_instrm, at) {
/* all delivered */
nxt_todel = strm->last_sequence_delivered + 1;
- if (nxt_todel == control->sinfo_ssn) {
- TAILQ_REMOVE(&strm->inqueue, control, next);
+ if ((nxt_todel == control->sinfo_ssn) &&
+ (((control->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG)) {
asoc->size_on_all_streams -= control->length;
sctp_ucount_decr(asoc->cnt_on_all_streams);
+ if (control->on_strm_q == SCTP_ON_ORDERED) {
+ TAILQ_REMOVE(&strm->inqueue, control, next_instrm);
+#ifdef INVARIANTS
+ } else {
+ panic("Huh control: %p is on_strm_q: %d",
+ control, control->on_strm_q);
+#endif
+ }
+ control->on_strm_q = 0;
strm->last_sequence_delivered++;
/*
* We ignore the return of deliver_data here
@@ -643,184 +594,686 @@ protocol_error:
control,
&stcb->sctp_socket->so_rcv, 1,
SCTP_READ_LOCK_NOT_HELD,
- SCTP_SO_NOT_LOCKED);
+ SCTP_SO_LOCKED);
continue;
+ } else if (nxt_todel == control->sinfo_ssn) {
+ *need_reasm = 1;
}
break;
}
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
}
if (queue_needed) {
/*
* Ok, we did not deliver this guy, find the correct place
* to put it on the queue.
*/
- if (SCTP_TSN_GE(asoc->cumulative_tsn, control->sinfo_tsn)) {
- goto protocol_error;
+ if (sctp_place_control_in_stream(strm, asoc, control)) {
+ snprintf(msg, sizeof(msg),
+ "Queue to str msg_id: %u duplicate",
+ control->msg_id);
+ sctp_clean_up_control(stcb, control);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
}
- if (TAILQ_EMPTY(&strm->inqueue)) {
- /* Empty queue */
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
- sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INSERT_HD);
+ }
+}
+
+
+static void
+sctp_setup_tail_pointer(struct sctp_queued_to_read *control)
+{
+ struct mbuf *m, *prev = NULL;
+ struct sctp_tcb *stcb;
+
+ stcb = control->stcb;
+ control->held_length = 0;
+ control->length = 0;
+ m = control->data;
+ while (m) {
+ if (SCTP_BUF_LEN(m) == 0) {
+ /* Skip mbufs with NO length */
+ if (prev == NULL) {
+ /* First one */
+ control->data = sctp_m_free(m);
+ m = control->data;
+ } else {
+ SCTP_BUF_NEXT(prev) = sctp_m_free(m);
+ m = SCTP_BUF_NEXT(prev);
}
- TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
- } else {
- TAILQ_FOREACH(at, &strm->inqueue, next) {
- if (SCTP_SSN_GT(at->sinfo_ssn, control->sinfo_ssn)) {
+ if (m == NULL) {
+ control->tail_mbuf = prev;
+ }
+ continue;
+ }
+ prev = m;
+ atomic_add_int(&control->length, SCTP_BUF_LEN(m));
+ if (control->on_read_q) {
+ /*
+ * On read queue so we must increment the SB stuff,
+ * we assume caller has done any locks of SB.
+ */
+ sctp_sballoc(stcb, &stcb->sctp_socket->so_rcv, m);
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ if (prev) {
+ control->tail_mbuf = prev;
+ }
+}
+
+static void
+sctp_add_to_tail_pointer(struct sctp_queued_to_read *control, struct mbuf *m)
+{
+ struct mbuf *prev = NULL;
+ struct sctp_tcb *stcb;
+
+ stcb = control->stcb;
+ if (stcb == NULL) {
+#ifdef INVARIANTS
+ panic("Control broken");
+#else
+ return;
+#endif
+ }
+ if (control->tail_mbuf == NULL) {
+ /* TSNH */
+ control->data = m;
+ sctp_setup_tail_pointer(control);
+ return;
+ }
+ control->tail_mbuf->m_next = m;
+ while (m) {
+ if (SCTP_BUF_LEN(m) == 0) {
+ /* Skip mbufs with NO length */
+ if (prev == NULL) {
+ /* First one */
+ control->tail_mbuf->m_next = sctp_m_free(m);
+ m = control->tail_mbuf->m_next;
+ } else {
+ SCTP_BUF_NEXT(prev) = sctp_m_free(m);
+ m = SCTP_BUF_NEXT(prev);
+ }
+ if (m == NULL) {
+ control->tail_mbuf = prev;
+ }
+ continue;
+ }
+ prev = m;
+ if (control->on_read_q) {
+ /*
+ * On read queue so we must increment the SB stuff,
+ * we assume caller has done any locks of SB.
+ */
+ sctp_sballoc(stcb, &stcb->sctp_socket->so_rcv, m);
+ }
+ atomic_add_int(&control->length, SCTP_BUF_LEN(m));
+ m = SCTP_BUF_NEXT(m);
+ }
+ if (prev) {
+ control->tail_mbuf = prev;
+ }
+}
+
+static void
+sctp_build_readq_entry_from_ctl(struct sctp_queued_to_read *nc, struct sctp_queued_to_read *control)
+{
+ memset(nc, 0, sizeof(struct sctp_queued_to_read));
+ nc->sinfo_stream = control->sinfo_stream;
+ nc->sinfo_ssn = control->sinfo_ssn;
+ TAILQ_INIT(&nc->reasm);
+ nc->top_fsn = control->top_fsn;
+ nc->msg_id = control->msg_id;
+ nc->sinfo_flags = control->sinfo_flags;
+ nc->sinfo_ppid = control->sinfo_ppid;
+ nc->sinfo_context = control->sinfo_context;
+ nc->fsn_included = 0xffffffff;
+ nc->sinfo_tsn = control->sinfo_tsn;
+ nc->sinfo_cumtsn = control->sinfo_cumtsn;
+ nc->sinfo_assoc_id = control->sinfo_assoc_id;
+ nc->whoFrom = control->whoFrom;
+ atomic_add_int(&nc->whoFrom->ref_count, 1);
+ nc->stcb = control->stcb;
+ nc->port_from = control->port_from;
+}
+
+static void
+sctp_reset_a_control(struct sctp_queued_to_read *control,
+ struct sctp_inpcb *inp, uint32_t tsn)
+{
+ control->fsn_included = tsn;
+ if (control->on_read_q) {
+ /*
+ * We have to purge it from there, hopefully this will work
+ * :-)
+ */
+ TAILQ_REMOVE(&inp->read_queue, control, next);
+ control->on_read_q = 0;
+ }
+}
+
+static int
+sctp_handle_old_unordered_data(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_stream_in *strm,
+ struct sctp_queued_to_read *control,
+ uint32_t pd_point,
+ int inp_read_lock_held)
+{
+ /*
+ * Special handling for the old un-ordered data chunk. All the
+ * chunks/TSN's go to msg_id 0. So we have to do the old style
+ * watching to see if we have it all. If you return one, no other
+ * control entries on the un-ordered queue will be looked at. In
+ * theory there should be no others entries in reality, unless the
+ * guy is sending both unordered NDATA and unordered DATA...
+ */
+ struct sctp_tmit_chunk *chk, *lchk, *tchk;
+ uint32_t fsn;
+ struct sctp_queued_to_read *nc;
+ int cnt_added;
+
+ if (control->first_frag_seen == 0) {
+ /* Nothing we can do, we have not seen the first piece yet */
+ return (1);
+ }
+ /* Collapse any we can */
+ cnt_added = 0;
+restart:
+ fsn = control->fsn_included + 1;
+ /* Now what can we add? */
+ TAILQ_FOREACH_SAFE(chk, &control->reasm, sctp_next, lchk) {
+ if (chk->rec.data.fsn_num == fsn) {
+ /* Ok lets add it */
+ sctp_alloc_a_readq(stcb, nc);
+ if (nc == NULL) {
+ break;
+ }
+ memset(nc, 0, sizeof(struct sctp_queued_to_read));
+ TAILQ_REMOVE(&control->reasm, chk, sctp_next);
+ sctp_add_chk_to_control(control, strm, stcb, asoc, chk, SCTP_READ_LOCK_NOT_HELD);
+ fsn++;
+ cnt_added++;
+ chk = NULL;
+ if (control->end_added) {
+ /* We are done */
+ if (!TAILQ_EMPTY(&control->reasm)) {
/*
- * one in queue is bigger than the
- * new one, insert before this one
+ * Ok we have to move anything left
+ * on the control queue to a new
+ * control.
*/
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
- sctp_log_strm_del(control, at,
- SCTP_STR_LOG_FROM_INSERT_MD);
+ sctp_build_readq_entry_from_ctl(nc, control);
+ tchk = TAILQ_FIRST(&control->reasm);
+ if (tchk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+ TAILQ_REMOVE(&control->reasm, tchk, sctp_next);
+ nc->first_frag_seen = 1;
+ nc->fsn_included = tchk->rec.data.fsn_num;
+ nc->data = tchk->data;
+ nc->sinfo_ppid = tchk->rec.data.payloadtype;
+ nc->sinfo_tsn = tchk->rec.data.TSN_seq;
+ sctp_mark_non_revokable(asoc, tchk->rec.data.TSN_seq);
+ tchk->data = NULL;
+ sctp_free_a_chunk(stcb, tchk, SCTP_SO_NOT_LOCKED);
+ sctp_setup_tail_pointer(nc);
+ tchk = TAILQ_FIRST(&control->reasm);
+ }
+ /* Spin the rest onto the queue */
+ while (tchk) {
+ TAILQ_REMOVE(&control->reasm, tchk, sctp_next);
+ TAILQ_INSERT_TAIL(&nc->reasm, tchk, sctp_next);
+ tchk = TAILQ_FIRST(&control->reasm);
}
- TAILQ_INSERT_BEFORE(at, control, next);
- break;
- } else if (at->sinfo_ssn == control->sinfo_ssn) {
/*
- * Gak, He sent me a duplicate str
- * seq number
+ * Now lets add it to the queue
+ * after removing control
*/
+ TAILQ_INSERT_TAIL(&strm->uno_inqueue, nc, next_instrm);
+ nc->on_strm_q = SCTP_ON_UNORDERED;
+ if (control->on_strm_q) {
+ TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+ }
+ }
+ if (control->pdapi_started) {
+ strm->pd_api_started = 0;
+ control->pdapi_started = 0;
+ }
+ if (control->on_strm_q) {
+ TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+ SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
+ }
+ if (control->on_read_q == 0) {
+ sctp_add_to_readq(stcb->sctp_ep, stcb, control,
+ &stcb->sctp_socket->so_rcv, control->end_added,
+ inp_read_lock_held, SCTP_SO_NOT_LOCKED);
+ }
+ sctp_wakeup_the_read_socket(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+ if ((nc->first_frag_seen) && !TAILQ_EMPTY(&nc->reasm)) {
/*
- * foo bar, I guess I will just free
- * this new guy, should we abort
- * too? FIX ME MAYBE? Or it COULD be
- * that the SSN's have wrapped.
- * Maybe I should compare to TSN
- * somehow... sigh for now just blow
- * away the chunk!
+ * Switch to the new guy and
+ * continue
*/
-
- if (control->data)
- sctp_m_freem(control->data);
- control->data = NULL;
- asoc->size_on_all_streams -= control->length;
- sctp_ucount_decr(asoc->cnt_on_all_streams);
- if (control->whoFrom) {
- sctp_free_remote_addr(control->whoFrom);
- control->whoFrom = NULL;
- }
- sctp_free_a_readq(stcb, control);
- return;
+ control = nc;
+ goto restart;
} else {
- if (TAILQ_NEXT(at, next) == NULL) {
- /*
- * We are at the end, insert
- * it after this one
- */
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
- sctp_log_strm_del(control, at,
- SCTP_STR_LOG_FROM_INSERT_TL);
- }
- TAILQ_INSERT_AFTER(&strm->inqueue,
- at, control, next);
- break;
+ if (nc->on_strm_q == 0) {
+ sctp_free_a_readq(stcb, nc);
}
}
+ return (1);
+ } else {
+ sctp_free_a_readq(stcb, nc);
}
+ } else {
+ /* Can't add more */
+ break;
}
}
+ if ((control->length > pd_point) && (strm->pd_api_started == 0)) {
+ strm->pd_api_started = 1;
+ control->pdapi_started = 1;
+ sctp_add_to_readq(stcb->sctp_ep, stcb, control,
+ &stcb->sctp_socket->so_rcv, control->end_added,
+ inp_read_lock_held, SCTP_SO_NOT_LOCKED);
+ sctp_wakeup_the_read_socket(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+ return (0);
+ } else {
+ return (1);
+ }
+}
+
+static void
+sctp_inject_old_unordered_data(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_queued_to_read *control,
+ struct sctp_tmit_chunk *chk,
+ int *abort_flag)
+{
+ struct sctp_tmit_chunk *at;
+ int inserted;
+
+ /*
+ * Here we need to place the chunk into the control structure sorted
+ * in the correct order.
+ */
+ if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+ /* Its the very first one. */
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "chunk is a first fsn: %u becomes fsn_included\n",
+ chk->rec.data.fsn_num);
+ if (control->first_frag_seen) {
+ /*
+ * In old un-ordered we can reassembly on one
+ * control multiple messages. As long as the next
+ * FIRST is greater then the old first (TSN i.e. FSN
+ * wise)
+ */
+ struct mbuf *tdata;
+ uint32_t tmp;
+
+ if (SCTP_TSN_GT(chk->rec.data.fsn_num, control->fsn_included)) {
+ /*
+ * Easy way the start of a new guy beyond
+ * the lowest
+ */
+ goto place_chunk;
+ }
+ if ((chk->rec.data.fsn_num == control->fsn_included) ||
+ (control->pdapi_started)) {
+ /*
+ * Ok this should not happen, if it does we
+ * started the pd-api on the higher TSN
+ * (since the equals part is a TSN failure
+ * it must be that).
+ *
+ * We are completly hosed in that case since I
+ * have no way to recover. This really will
+ * only happen if we can get more TSN's
+ * higher before the pd-api-point.
+ */
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_4);
+
+ return;
+ }
+ /*
+ * Ok we have two firsts and the one we just got is
+ * smaller than the one we previously placed.. yuck!
+ * We must swap them out.
+ */
+ /* swap the mbufs */
+ tdata = control->data;
+ control->data = chk->data;
+ chk->data = tdata;
+ /* Save the lengths */
+ chk->send_size = control->length;
+ /* Recompute length of control and tail pointer */
+ sctp_setup_tail_pointer(control);
+ /* Fix the FSN included */
+ tmp = control->fsn_included;
+ control->fsn_included = chk->rec.data.fsn_num;
+ chk->rec.data.fsn_num = tmp;
+ /* Fix the TSN included */
+ tmp = control->sinfo_tsn;
+ control->sinfo_tsn = chk->rec.data.TSN_seq;
+ chk->rec.data.TSN_seq = tmp;
+ /* Fix the PPID included */
+ tmp = control->sinfo_ppid;
+ control->sinfo_ppid = chk->rec.data.payloadtype;
+ chk->rec.data.payloadtype = tmp;
+ /* Fix tail pointer */
+ goto place_chunk;
+ }
+ control->first_frag_seen = 1;
+ control->top_fsn = control->fsn_included = chk->rec.data.fsn_num;
+ control->sinfo_tsn = chk->rec.data.TSN_seq;
+ control->sinfo_ppid = chk->rec.data.payloadtype;
+ control->data = chk->data;
+ sctp_mark_non_revokable(asoc, chk->rec.data.TSN_seq);
+ chk->data = NULL;
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
+ sctp_setup_tail_pointer(control);
+ return;
+ }
+place_chunk:
+ inserted = 0;
+ TAILQ_FOREACH(at, &control->reasm, sctp_next) {
+ if (SCTP_TSN_GT(at->rec.data.fsn_num, chk->rec.data.fsn_num)) {
+ /*
+ * This one in queue is bigger than the new one,
+ * insert the new one before at.
+ */
+ asoc->size_on_reasm_queue += chk->send_size;
+ sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+ inserted = 1;
+ TAILQ_INSERT_BEFORE(at, chk, sctp_next);
+ break;
+ } else if (at->rec.data.fsn_num == chk->rec.data.fsn_num) {
+ /*
+ * They sent a duplicate fsn number. This really
+ * should not happen since the FSN is a TSN and it
+ * should have been dropped earlier.
+ */
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
+ return;
+ }
+ }
+ if (inserted == 0) {
+ /* Its at the end */
+ asoc->size_on_reasm_queue += chk->send_size;
+ sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+ control->top_fsn = chk->rec.data.fsn_num;
+ TAILQ_INSERT_TAIL(&control->reasm, chk, sctp_next);
+ }
}
-/*
- * Returns two things: You get the total size of the deliverable parts of the
- * first fragmented message on the reassembly queue. And you get a 1 back if
- * all of the message is ready or a 0 back if the message is still incomplete
- */
static int
-sctp_is_all_msg_on_reasm(struct sctp_association *asoc, uint32_t * t_size)
+sctp_deliver_reasm_check(struct sctp_tcb *stcb, struct sctp_association *asoc,
+ struct sctp_stream_in *strm, int inp_read_lock_held)
{
- struct sctp_tmit_chunk *chk;
- uint32_t tsn;
+ /*
+ * Given a stream, strm, see if any of the SSN's on it that are
+ * fragmented are ready to deliver. If so go ahead and place them on
+ * the read queue. In so placing if we have hit the end, then we
+ * need to remove them from the stream's queue.
+ */
+ struct sctp_queued_to_read *control, *nctl = NULL;
+ uint32_t next_to_del;
+ uint32_t pd_point;
+ int ret = 0;
- *t_size = 0;
- chk = TAILQ_FIRST(&asoc->reasmqueue);
- if (chk == NULL) {
- /* nothing on the queue */
- return (0);
+ if (stcb->sctp_socket) {
+ pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT,
+ stcb->sctp_ep->partial_delivery_point);
+ } else {
+ pd_point = stcb->sctp_ep->partial_delivery_point;
+ }
+ control = TAILQ_FIRST(&strm->uno_inqueue);
+
+ if ((control) &&
+ (asoc->idata_supported == 0)) {
+ /* Special handling needed for "old" data format */
+ if (sctp_handle_old_unordered_data(stcb, asoc, strm, control, pd_point, inp_read_lock_held)) {
+ goto done_un;
+ }
}
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
- /* Not a first on the queue */
+ if (strm->pd_api_started) {
+ /* Can't add more */
return (0);
}
- tsn = chk->rec.data.TSN_seq;
- TAILQ_FOREACH(chk, &asoc->reasmqueue, sctp_next) {
- if (tsn != chk->rec.data.TSN_seq) {
- return (0);
+ while (control) {
+ SCTPDBG(SCTP_DEBUG_XXX, "Looking at control: %p e(%d) ssn: %u top_fsn: %u inc_fsn: %u -uo\n",
+ control, control->end_added, control->sinfo_ssn, control->top_fsn, control->fsn_included);
+ nctl = TAILQ_NEXT(control, next_instrm);
+ if (control->end_added) {
+ /* We just put the last bit on */
+ if (control->on_strm_q) {
+#ifdef INVARIANTS
+ if (control->on_strm_q != SCTP_ON_UNORDERED) {
+ panic("Huh control: %p on_q: %d -- not unordered?",
+ control, control->on_strm_q);
+ }
+#endif
+ SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
+ TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+ }
+ if (control->on_read_q == 0) {
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, control->end_added,
+ inp_read_lock_held, SCTP_SO_NOT_LOCKED);
+ }
+ } else {
+ /* Can we do a PD-API for this un-ordered guy? */
+ if ((control->length >= pd_point) && (strm->pd_api_started == 0)) {
+ strm->pd_api_started = 1;
+ control->pdapi_started = 1;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, control->end_added,
+ inp_read_lock_held, SCTP_SO_NOT_LOCKED);
+
+ break;
+ }
}
- *t_size += chk->send_size;
- if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
- return (1);
+ control = nctl;
+ }
+done_un:
+ control = TAILQ_FIRST(&strm->inqueue);
+ if (strm->pd_api_started) {
+ /* Can't add more */
+ return (0);
+ }
+ if (control == NULL) {
+ return (ret);
+ }
+ if (strm->last_sequence_delivered == control->sinfo_ssn) {
+ /*
+ * Ok the guy at the top was being partially delivered
+ * completed, so we remove it. Note the pd_api flag was
+ * taken off when the chunk was merged on in
+ * sctp_queue_data_for_reasm below.
+ */
+ nctl = TAILQ_NEXT(control, next_instrm);
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "Looking at control: %p e(%d) ssn: %u top_fsn: %u inc_fsn: %u (lastdel: %u)- o\n",
+ control, control->end_added, control->sinfo_ssn,
+ control->top_fsn, control->fsn_included,
+ strm->last_sequence_delivered);
+ if (control->end_added) {
+ if (control->on_strm_q) {
+#ifdef INVARIANTS
+ if (control->on_strm_q != SCTP_ON_ORDERED) {
+ panic("Huh control: %p on_q: %d -- not ordered?",
+ control, control->on_strm_q);
+ }
+#endif
+ SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
+ TAILQ_REMOVE(&strm->inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+ }
+ if (strm->pd_api_started && control->pdapi_started) {
+ control->pdapi_started = 0;
+ strm->pd_api_started = 0;
+ }
+ if (control->on_read_q == 0) {
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, control->end_added,
+ inp_read_lock_held, SCTP_SO_NOT_LOCKED);
+ }
+ control = nctl;
}
- tsn++;
}
- return (0);
-}
-
-static void
-sctp_deliver_reasm_check(struct sctp_tcb *stcb, struct sctp_association *asoc)
-{
- struct sctp_tmit_chunk *chk;
- uint16_t nxt_todel;
- uint32_t tsize, pd_point;
-
-doit_again:
- chk = TAILQ_FIRST(&asoc->reasmqueue);
- if (chk == NULL) {
- /* Huh? */
- asoc->size_on_reasm_queue = 0;
- asoc->cnt_on_reasm_queue = 0;
- return;
+ if (strm->pd_api_started) {
+ /*
+ * Can't add more must have gotten an un-ordered above being
+ * partially delivered.
+ */
+ return (0);
}
- if (asoc->fragmented_delivery_inprogress == 0) {
- nxt_todel =
- asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
- (nxt_todel == chk->rec.data.stream_seq ||
- (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
- /*
- * Yep the first one is here and its ok to deliver
- * but should we?
- */
- if (stcb->sctp_socket) {
- pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT,
- stcb->sctp_ep->partial_delivery_point);
- } else {
- pd_point = stcb->sctp_ep->partial_delivery_point;
+deliver_more:
+ next_to_del = strm->last_sequence_delivered + 1;
+ if (control) {
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "Looking at control: %p e(%d) ssn: %u top_fsn: %u inc_fsn: %u (nxtdel: %u)- o\n",
+ control, control->end_added, control->sinfo_ssn, control->top_fsn, control->fsn_included,
+ next_to_del);
+ nctl = TAILQ_NEXT(control, next_instrm);
+ if ((control->sinfo_ssn == next_to_del) &&
+ (control->first_frag_seen)) {
+ int done;
+
+ /* Ok we can deliver it onto the stream. */
+ if (control->end_added) {
+ /* We are done with it afterwards */
+ if (control->on_strm_q) {
+#ifdef INVARIANTS
+ if (control->on_strm_q != SCTP_ON_ORDERED) {
+ panic("Huh control: %p on_q: %d -- not ordered?",
+ control, control->on_strm_q);
+ }
+#endif
+ SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
+ TAILQ_REMOVE(&strm->inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+ }
+ ret++;
}
- if (sctp_is_all_msg_on_reasm(asoc, &tsize) || (tsize >= pd_point)) {
+ if (((control->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) {
+ /*
+ * A singleton now slipping through - mark
+ * it non-revokable too
+ */
+ sctp_mark_non_revokable(asoc, control->sinfo_tsn);
+ } else if (control->end_added == 0) {
/*
- * Yes, we setup to start reception, by
- * backing down the TSN just in case we
- * can't deliver. If we
+ * Check if we can defer adding until its
+ * all there
*/
- asoc->fragmented_delivery_inprogress = 1;
- asoc->tsn_last_delivered =
- chk->rec.data.TSN_seq - 1;
- asoc->str_of_pdapi =
- chk->rec.data.stream_number;
- asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
- asoc->pdapi_ppid = chk->rec.data.payloadtype;
- asoc->fragment_flags = chk->rec.data.rcv_flags;
- sctp_service_reassembly(stcb, asoc);
+ if ((control->length < pd_point) || (strm->pd_api_started)) {
+ /*
+ * Don't need it or cannot add more
+ * (one being delivered that way)
+ */
+ goto out;
+ }
+ }
+ done = (control->end_added) && (control->last_frag_seen);
+ if (control->on_read_q == 0) {
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, control->end_added,
+ inp_read_lock_held, SCTP_SO_NOT_LOCKED);
+ }
+ strm->last_sequence_delivered = next_to_del;
+ if (done) {
+ control = nctl;
+ goto deliver_more;
+ } else {
+ /* We are now doing PD API */
+ strm->pd_api_started = 1;
+ control->pdapi_started = 1;
}
}
- } else {
+ }
+out:
+ return (ret);
+}
+
+
+void
+sctp_add_chk_to_control(struct sctp_queued_to_read *control,
+ struct sctp_stream_in *strm,
+ struct sctp_tcb *stcb, struct sctp_association *asoc,
+ struct sctp_tmit_chunk *chk, int hold_rlock)
+{
+ /*
+ * Given a control and a chunk, merge the data from the chk onto the
+ * control and free up the chunk resources.
+ */
+ int i_locked = 0;
+
+ if (control->on_read_q && (hold_rlock == 0)) {
/*
- * Service re-assembly will deliver stream data queued at
- * the end of fragmented delivery.. but it wont know to go
- * back and call itself again... we do that here with the
- * got doit_again
+ * Its being pd-api'd so we must do some locks.
*/
- sctp_service_reassembly(stcb, asoc);
- if (asoc->fragmented_delivery_inprogress == 0) {
- /*
- * finished our Fragmented delivery, could be more
- * waiting?
- */
- goto doit_again;
+ SCTP_INP_READ_LOCK(stcb->sctp_ep);
+ i_locked = 1;
+ }
+ if (control->data == NULL) {
+ control->data = chk->data;
+ sctp_setup_tail_pointer(control);
+ } else {
+ sctp_add_to_tail_pointer(control, chk->data);
+ }
+ control->fsn_included = chk->rec.data.fsn_num;
+ asoc->size_on_reasm_queue -= chk->send_size;
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ sctp_mark_non_revokable(asoc, chk->rec.data.TSN_seq);
+ chk->data = NULL;
+ if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+ control->first_frag_seen = 1;
+ }
+ if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+ /* Its complete */
+ if ((control->on_strm_q) && (control->on_read_q)) {
+ if (control->pdapi_started) {
+ control->pdapi_started = 0;
+ strm->pd_api_started = 0;
+ }
+ if (control->on_strm_q == SCTP_ON_UNORDERED) {
+ /* Unordered */
+ TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+ } else if (control->on_strm_q == SCTP_ON_ORDERED) {
+ /* Ordered */
+ TAILQ_REMOVE(&strm->inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+#ifdef INVARIANTS
+ } else if (control->on_strm_q) {
+ panic("Unknown state on ctrl: %p on_strm_q: %d", control,
+ control->on_strm_q);
+#endif
+ }
}
+ control->end_added = 1;
+ control->last_frag_seen = 1;
+ }
+ if (i_locked) {
+ SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
}
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
}
/*
@@ -831,462 +1284,361 @@ doit_again:
*/
static void
sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct sctp_tmit_chunk *chk, int *abort_flag)
+ struct sctp_stream_in *strm,
+ struct sctp_queued_to_read *control,
+ struct sctp_tmit_chunk *chk,
+ int created_control,
+ int *abort_flag, uint32_t tsn)
{
- struct mbuf *op_err;
- char msg[SCTP_DIAG_INFO_LEN];
- uint32_t cum_ackp1, prev_tsn, post_tsn;
- struct sctp_tmit_chunk *at, *prev, *next;
-
- prev = next = NULL;
- cum_ackp1 = asoc->tsn_last_delivered + 1;
- if (TAILQ_EMPTY(&asoc->reasmqueue)) {
- /* This is the first one on the queue */
- TAILQ_INSERT_HEAD(&asoc->reasmqueue, chk, sctp_next);
- /*
- * we do not check for delivery of anything when only one
- * fragment is here
- */
- asoc->size_on_reasm_queue = chk->send_size;
- sctp_ucount_incr(asoc->cnt_on_reasm_queue);
- if (chk->rec.data.TSN_seq == cum_ackp1) {
- if (asoc->fragmented_delivery_inprogress == 0 &&
- (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) !=
- SCTP_DATA_FIRST_FRAG) {
- /*
- * An empty queue, no delivery inprogress,
- * we hit the next one and it does NOT have
- * a FIRST fragment mark.
- */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not first, no fragmented delivery in progress\n");
- snprintf(msg, sizeof(msg),
- "Expected B-bit for TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- } else if (asoc->fragmented_delivery_inprogress &&
- (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
- /*
- * We are doing a partial delivery and the
- * NEXT chunk MUST be either the LAST or
- * MIDDLE fragment NOT a FIRST
- */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS a first and fragmented delivery in progress\n");
- snprintf(msg, sizeof(msg),
- "Didn't expect B-bit for TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- } else if (asoc->fragmented_delivery_inprogress) {
- /*
- * Here we are ok with a MIDDLE or LAST
- * piece
- */
- if (chk->rec.data.stream_number !=
- asoc->str_of_pdapi) {
- /* Got to be the right STR No */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream number %d vs %d\n",
- chk->rec.data.stream_number,
- asoc->str_of_pdapi);
- snprintf(msg, sizeof(msg),
- "Expected SID=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- asoc->str_of_pdapi,
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- } else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) !=
- SCTP_DATA_UNORDERED &&
- chk->rec.data.stream_seq != asoc->ssn_of_pdapi) {
- /* Got to be the right STR Seq */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream seq %d vs %d\n",
- chk->rec.data.stream_seq,
- asoc->ssn_of_pdapi);
- snprintf(msg, sizeof(msg),
- "Expected SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- asoc->ssn_of_pdapi,
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- }
- }
+ uint32_t next_fsn;
+ struct sctp_tmit_chunk *at, *nat;
+ int do_wakeup, unordered;
+
+ /*
+ * For old un-ordered data chunks.
+ */
+ if ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) {
+ unordered = 1;
+ } else {
+ unordered = 0;
+ }
+ /* Must be added to the stream-in queue */
+ if (created_control) {
+ if (sctp_place_control_in_stream(strm, asoc, control)) {
+ /* Duplicate SSN? */
+ sctp_clean_up_control(stcb, control);
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_6);
+ return;
}
- return;
- }
- /* Find its place */
- TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
- if (SCTP_TSN_GT(at->rec.data.TSN_seq, chk->rec.data.TSN_seq)) {
- /*
- * one in queue is bigger than the new one, insert
- * before this one
- */
- /* A check */
- asoc->size_on_reasm_queue += chk->send_size;
- sctp_ucount_incr(asoc->cnt_on_reasm_queue);
- next = at;
- TAILQ_INSERT_BEFORE(at, chk, sctp_next);
- break;
- } else if (at->rec.data.TSN_seq == chk->rec.data.TSN_seq) {
- /* Gak, He sent me a duplicate str seq number */
+ if ((tsn == (asoc->cumulative_tsn + 1) && (asoc->idata_supported == 0))) {
/*
- * foo bar, I guess I will just free this new guy,
- * should we abort too? FIX ME MAYBE? Or it COULD be
- * that the SSN's have wrapped. Maybe I should
- * compare to TSN somehow... sigh for now just blow
- * away the chunk!
+ * Ok we created this control and now lets validate
+ * that its legal i.e. there is a B bit set, if not
+ * and we have up to the cum-ack then its invalid.
*/
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
- }
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- return;
- } else {
- prev = at;
- if (TAILQ_NEXT(at, sctp_next) == NULL) {
- /*
- * We are at the end, insert it after this
- * one
- */
- /* check it first */
- asoc->size_on_reasm_queue += chk->send_size;
- sctp_ucount_incr(asoc->cnt_on_reasm_queue);
- TAILQ_INSERT_AFTER(&asoc->reasmqueue, at, chk, sctp_next);
- break;
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_7);
+ return;
}
}
}
- /* Now the audits */
- if (prev) {
- prev_tsn = chk->rec.data.TSN_seq - 1;
- if (prev_tsn == prev->rec.data.TSN_seq) {
+ if ((asoc->idata_supported == 0) && (unordered == 1)) {
+ sctp_inject_old_unordered_data(stcb, asoc, control, chk, abort_flag);
+ return;
+ }
+ /*
+ * Ok we must queue the chunk into the reasembly portion: o if its
+ * the first it goes to the control mbuf. o if its not first but the
+ * next in sequence it goes to the control, and each succeeding one
+ * in order also goes. o if its not in order we place it on the list
+ * in its place.
+ */
+ if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+ /* Its the very first one. */
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "chunk is a first fsn: %u becomes fsn_included\n",
+ chk->rec.data.fsn_num);
+ if (control->first_frag_seen) {
/*
- * Ok the one I am dropping onto the end is the
- * NEXT. A bit of valdiation here.
+ * Error on senders part, they either sent us two
+ * data chunks with FIRST, or they sent two
+ * un-ordered chunks that were fragmented at the
+ * same time in the same stream.
*/
- if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
- SCTP_DATA_FIRST_FRAG ||
- (prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
- SCTP_DATA_MIDDLE_FRAG) {
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_8);
+ return;
+ }
+ control->first_frag_seen = 1;
+ control->fsn_included = chk->rec.data.fsn_num;
+ control->data = chk->data;
+ sctp_mark_non_revokable(asoc, chk->rec.data.TSN_seq);
+ chk->data = NULL;
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
+ sctp_setup_tail_pointer(control);
+ } else {
+ /* Place the chunk in our list */
+ int inserted = 0;
+
+ if (control->last_frag_seen == 0) {
+ /* Still willing to raise highest FSN seen */
+ if (SCTP_TSN_GT(chk->rec.data.fsn_num, control->top_fsn)) {
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "We have a new top_fsn: %u\n",
+ chk->rec.data.fsn_num);
+ control->top_fsn = chk->rec.data.fsn_num;
+ }
+ if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "The last fsn is now in place fsn: %u\n",
+ chk->rec.data.fsn_num);
+ control->last_frag_seen = 1;
+ }
+ if (asoc->idata_supported || control->first_frag_seen) {
/*
- * Insert chk MUST be a MIDDLE or LAST
- * fragment
+ * For IDATA we always check since we know
+ * that the first fragment is 0. For old
+ * DATA we have to receive the first before
+ * we know the first FSN (which is the TSN).
*/
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
- SCTP_DATA_FIRST_FRAG) {
- SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - It can be a midlle or last but not a first\n");
- SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it's a FIRST!\n");
- snprintf(msg, sizeof(msg),
- "Can't handle B-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
- }
- if (chk->rec.data.stream_number !=
- prev->rec.data.stream_number) {
- /*
- * Huh, need the correct STR here,
- * they must be the same.
- */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sid:%d not the same as at:%d\n",
- chk->rec.data.stream_number,
- prev->rec.data.stream_number);
- snprintf(msg, sizeof(msg),
- "Expect SID=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- prev->rec.data.stream_number,
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
- }
- if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
- (prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
+ if (SCTP_TSN_GE(control->fsn_included, chk->rec.data.fsn_num)) {
/*
- * Huh, need the same ordering here,
- * they must be the same.
+ * We have already delivered up to
+ * this so its a dup
*/
- SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, U-bit not constant\n");
- snprintf(msg, sizeof(msg),
- "Expect U-bit=%d for TSN=%8.8x, got U-bit=%d",
- (prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0,
- chk->rec.data.TSN_seq,
- (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
return;
}
- if ((prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
- chk->rec.data.stream_seq !=
- prev->rec.data.stream_seq) {
+ }
+ } else {
+ if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+ /* Second last? huh? */
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "Duplicate last fsn: %u (top: %u) -- abort\n",
+ chk->rec.data.fsn_num, control->top_fsn);
+ sctp_abort_in_reasm(stcb, control,
+ chk, abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
+ return;
+ }
+ if (asoc->idata_supported || control->first_frag_seen) {
+ /*
+ * For IDATA we always check since we know
+ * that the first fragment is 0. For old
+ * DATA we have to receive the first before
+ * we know the first FSN (which is the TSN).
+ */
+
+ if (SCTP_TSN_GE(control->fsn_included, chk->rec.data.fsn_num)) {
/*
- * Huh, need the correct STR here,
- * they must be the same.
+ * We have already delivered up to
+ * this so its a dup
*/
- SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sseq:%d not the same as at:%d\n",
- chk->rec.data.stream_seq,
- prev->rec.data.stream_seq);
- snprintf(msg, sizeof(msg),
- "Expect SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- prev->rec.data.stream_seq,
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
- }
- } else if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
- SCTP_DATA_LAST_FRAG) {
- /* Insert chk MUST be a FIRST */
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
- SCTP_DATA_FIRST_FRAG) {
- SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, evil plot, its not FIRST and it must be!\n");
- snprintf(msg, sizeof(msg),
- "Expect B-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "New fsn: %u is already seen in included_fsn: %u -- abort\n",
+ chk->rec.data.fsn_num, control->fsn_included);
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
return;
}
}
- }
- }
- if (next) {
- post_tsn = chk->rec.data.TSN_seq + 1;
- if (post_tsn == next->rec.data.TSN_seq) {
/*
- * Ok the one I am inserting ahead of is my NEXT
- * one. A bit of valdiation here.
+ * validate not beyond top FSN if we have seen last
+ * one
*/
- if (next->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
- /* Insert chk MUST be a last fragment */
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK)
- != SCTP_DATA_LAST_FRAG) {
- SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is FIRST, we must be LAST\n");
- SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not a last!\n");
- snprintf(msg, sizeof(msg),
- "Expect only E-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
- }
- } else if ((next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
- SCTP_DATA_MIDDLE_FRAG ||
- (next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
- SCTP_DATA_LAST_FRAG) {
+ if (SCTP_TSN_GT(chk->rec.data.fsn_num, control->top_fsn)) {
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "New fsn: %u is beyond or at top_fsn: %u -- abort\n",
+ chk->rec.data.fsn_num,
+ control->top_fsn);
+ sctp_abort_in_reasm(stcb, control, chk,
+ abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
+ return;
+ }
+ }
+ /*
+ * If we reach here, we need to place the new chunk in the
+ * reassembly for this control.
+ */
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "chunk is a not first fsn: %u needs to be inserted\n",
+ chk->rec.data.fsn_num);
+ TAILQ_FOREACH(at, &control->reasm, sctp_next) {
+ if (SCTP_TSN_GT(at->rec.data.fsn_num, chk->rec.data.fsn_num)) {
/*
- * Insert chk CAN be MIDDLE or FIRST NOT
- * LAST
+ * This one in queue is bigger than the new
+ * one, insert the new one before at.
*/
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
- SCTP_DATA_LAST_FRAG) {
- SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is a MIDDLE/LAST\n");
- SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, new prev chunk is a LAST\n");
- snprintf(msg, sizeof(msg),
- "Didn't expect E-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
- }
- if (chk->rec.data.stream_number !=
- next->rec.data.stream_number) {
- /*
- * Huh, need the correct STR here,
- * they must be the same.
- */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, ssn:%d not the same as at:%d\n",
- chk->rec.data.stream_number,
- next->rec.data.stream_number);
- snprintf(msg, sizeof(msg),
- "Required SID %4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- next->rec.data.stream_number,
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
- }
- if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
- (next->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
- /*
- * Huh, need the same ordering here,
- * they must be the same.
- */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Next check - Gak, Evil plot, U-bit not constant\n");
- snprintf(msg, sizeof(msg),
- "Expect U-bit=%d for TSN=%8.8x, got U-bit=%d",
- (next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0,
- chk->rec.data.TSN_seq,
- (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "Insert it before fsn: %u\n",
+ at->rec.data.fsn_num);
+ asoc->size_on_reasm_queue += chk->send_size;
+ sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+ TAILQ_INSERT_BEFORE(at, chk, sctp_next);
+ inserted = 1;
+ break;
+ } else if (at->rec.data.fsn_num == chk->rec.data.fsn_num) {
+ /*
+ * Gak, He sent me a duplicate str seq
+ * number
+ */
+ /*
+ * foo bar, I guess I will just free this
+ * new guy, should we abort too? FIX ME
+ * MAYBE? Or it COULD be that the SSN's have
+ * wrapped. Maybe I should compare to TSN
+ * somehow... sigh for now just blow away
+ * the chunk!
+ */
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "Duplicate to fsn: %u -- abort\n",
+ at->rec.data.fsn_num);
+ sctp_abort_in_reasm(stcb, control,
+ chk, abort_flag,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
+ return;
+ }
+ }
+ if (inserted == 0) {
+ /* Goes on the end */
+ SCTPDBG(SCTP_DEBUG_XXX, "Inserting at tail of list fsn: %u\n",
+ chk->rec.data.fsn_num);
+ asoc->size_on_reasm_queue += chk->send_size;
+ sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+ TAILQ_INSERT_TAIL(&control->reasm, chk, sctp_next);
+ }
+ }
+ /*
+ * Ok lets see if we can suck any up into the control structure that
+ * are in seq if it makes sense.
+ */
+ do_wakeup = 0;
+ /*
+ * If the first fragment has not been seen there is no sense in
+ * looking.
+ */
+ if (control->first_frag_seen) {
+ next_fsn = control->fsn_included + 1;
+ TAILQ_FOREACH_SAFE(at, &control->reasm, sctp_next, nat) {
+ if (at->rec.data.fsn_num == next_fsn) {
+ /* We can add this one now to the control */
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "Adding more to control: %p at: %p fsn: %u next_fsn: %u included: %u\n",
+ control, at,
+ at->rec.data.fsn_num,
+ next_fsn, control->fsn_included);
+ TAILQ_REMOVE(&control->reasm, at, sctp_next);
+ sctp_add_chk_to_control(control, strm, stcb, asoc, at, SCTP_READ_LOCK_NOT_HELD);
+ if (control->on_read_q) {
+ do_wakeup = 1;
}
- if ((next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
- chk->rec.data.stream_seq !=
- next->rec.data.stream_seq) {
- /*
- * Huh, need the correct STR here,
- * they must be the same.
- */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, sseq:%d not the same as at:%d\n",
- chk->rec.data.stream_seq,
- next->rec.data.stream_seq);
- snprintf(msg, sizeof(msg),
- "Required SSN %4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- next->rec.data.stream_seq,
- chk->rec.data.TSN_seq,
- chk->rec.data.stream_number,
- chk->rec.data.stream_seq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- return;
+ next_fsn++;
+ if (control->end_added && control->pdapi_started) {
+ if (strm->pd_api_started) {
+ strm->pd_api_started = 0;
+ control->pdapi_started = 0;
+ }
+ if (control->on_read_q == 0) {
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, control->end_added,
+ SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+ do_wakeup = 1;
+ }
+ break;
}
+ } else {
+ break;
}
}
}
- /* Do we need to do some delivery? check */
- sctp_deliver_reasm_check(stcb, asoc);
+ if (do_wakeup) {
+ /* Need to wakeup the reader */
+ sctp_wakeup_the_read_socket(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+ }
}
-/*
- * This is an unfortunate routine. It checks to make sure a evil guy is not
- * stuffing us full of bad packet fragments. A broken peer could also do this
- * but this is doubtful. It is to bad I must worry about evil crackers sigh
- * :< more cycles.
- */
-static int
-sctp_does_tsn_belong_to_reasm(struct sctp_association *asoc,
- uint32_t TSN_seq)
+static struct sctp_queued_to_read *
+sctp_find_reasm_entry(struct sctp_stream_in *strm, uint32_t msg_id, int ordered, int old)
{
- struct sctp_tmit_chunk *at;
- uint32_t tsn_est;
-
- TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
- if (SCTP_TSN_GT(TSN_seq, at->rec.data.TSN_seq)) {
- /* is it one bigger? */
- tsn_est = at->rec.data.TSN_seq + 1;
- if (tsn_est == TSN_seq) {
- /* yep. It better be a last then */
- if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
- SCTP_DATA_LAST_FRAG) {
- /*
- * Ok this guy belongs next to a guy
- * that is NOT last, it should be a
- * middle/last, not a complete
- * chunk.
- */
- return (1);
- } else {
- /*
- * This guy is ok since its a LAST
- * and the new chunk is a fully
- * self- contained one.
- */
- return (0);
- }
+ struct sctp_queued_to_read *control;
+
+ if (ordered) {
+ TAILQ_FOREACH(control, &strm->inqueue, next_instrm) {
+ if (control->msg_id == msg_id) {
+ break;
}
- } else if (TSN_seq == at->rec.data.TSN_seq) {
- /* Software error since I have a dup? */
- return (1);
- } else {
- /*
- * Ok, 'at' is larger than new chunk but does it
- * need to be right before it.
- */
- tsn_est = TSN_seq + 1;
- if (tsn_est == at->rec.data.TSN_seq) {
- /* Yep, It better be a first */
- if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
- SCTP_DATA_FIRST_FRAG) {
- return (1);
- } else {
- return (0);
- }
+ }
+ } else {
+ if (old) {
+ control = TAILQ_FIRST(&strm->uno_inqueue);
+ return (control);
+ }
+ TAILQ_FOREACH(control, &strm->uno_inqueue, next_instrm) {
+ if (control->msg_id == msg_id) {
+ break;
}
}
}
- return (0);
+ return (control);
}
static int
sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
- struct mbuf **m, int offset, struct sctp_data_chunk *ch, int chk_length,
+ struct mbuf **m, int offset, int chk_length,
struct sctp_nets *net, uint32_t * high_tsn, int *abort_flag,
- int *break_flag, int last_chunk)
+ int *break_flag, int last_chunk, uint8_t chtype)
{
/* Process a data chunk */
/* struct sctp_tmit_chunk *chk; */
+ struct sctp_data_chunk *ch;
+ struct sctp_idata_chunk *nch, chunk_buf;
struct sctp_tmit_chunk *chk;
- uint32_t tsn, gap;
+ uint32_t tsn, fsn, gap, msg_id;
struct mbuf *dmbuf;
int the_len;
int need_reasm_check = 0;
- uint16_t strmno, strmseq;
+ uint16_t strmno;
struct mbuf *op_err;
char msg[SCTP_DIAG_INFO_LEN];
- struct sctp_queued_to_read *control;
- int ordered;
+ struct sctp_queued_to_read *control = NULL;
uint32_t protocol_id;
uint8_t chunk_flags;
struct sctp_stream_reset_list *liste;
+ struct sctp_stream_in *strm;
+ int ordered;
+ size_t clen;
+ int created_control = 0;
+ uint8_t old_data;
chk = NULL;
- tsn = ntohl(ch->dp.tsn);
+ if (chtype == SCTP_IDATA) {
+ nch = (struct sctp_idata_chunk *)sctp_m_getptr(*m, offset,
+ sizeof(struct sctp_idata_chunk), (uint8_t *) & chunk_buf);
+ ch = (struct sctp_data_chunk *)nch;
+ clen = sizeof(struct sctp_idata_chunk);
+ tsn = ntohl(ch->dp.tsn);
+ msg_id = ntohl(nch->dp.msg_id);
+ protocol_id = nch->dp.ppid_fsn.protocol_id;
+ if (ch->ch.chunk_flags & SCTP_DATA_FIRST_FRAG)
+ fsn = 0;
+ else
+ fsn = ntohl(nch->dp.ppid_fsn.fsn);
+ old_data = 0;
+ } else {
+ ch = (struct sctp_data_chunk *)sctp_m_getptr(*m, offset,
+ sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+ tsn = ntohl(ch->dp.tsn);
+ protocol_id = ch->dp.protocol_id;
+ clen = sizeof(struct sctp_data_chunk);
+ fsn = tsn;
+ msg_id = (uint32_t) (ntohs(ch->dp.stream_sequence));
+ nch = NULL;
+ old_data = 1;
+ }
chunk_flags = ch->ch.chunk_flags;
+ if ((size_t)chk_length == clen) {
+ /*
+ * Need to send an abort since we had a empty data chunk.
+ */
+ op_err = sctp_generate_no_user_data_cause(ch->dp.tsn);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ return (0);
+ }
if ((chunk_flags & SCTP_DATA_SACK_IMMEDIATELY) == SCTP_DATA_SACK_IMMEDIATELY) {
asoc->send_sack = 1;
}
- protocol_id = ch->dp.protocol_id;
ordered = ((chunk_flags & SCTP_DATA_UNORDERED) == 0);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
sctp_log_map(tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_TSN_ENTERS);
@@ -1356,6 +1708,117 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
* for on a partial delivery API.
*/
+ /* Is the stream valid? */
+ strmno = ntohs(ch->dp.stream_id);
+
+ if (strmno >= asoc->streamincnt) {
+ struct sctp_error_invalid_stream *cause;
+
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_invalid_stream),
+ 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ /* add some space up front so prepend will work well */
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ cause = mtod(op_err, struct sctp_error_invalid_stream *);
+ /*
+ * Error causes are just param's and this one has
+ * two back to back phdr, one with the error type
+ * and size, the other with the streamid and a rsvd
+ */
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_invalid_stream);
+ cause->cause.code = htons(SCTP_CAUSE_INVALID_STREAM);
+ cause->cause.length = htons(sizeof(struct sctp_error_invalid_stream));
+ cause->stream_id = ch->dp.stream_id;
+ cause->reserved = htons(0);
+ sctp_queue_op_err(stcb, op_err);
+ }
+ SCTP_STAT_INCR(sctps_badsid);
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+ if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
+ asoc->highest_tsn_inside_nr_map = tsn;
+ }
+ if (tsn == (asoc->cumulative_tsn + 1)) {
+ /* Update cum-ack */
+ asoc->cumulative_tsn = tsn;
+ }
+ return (0);
+ }
+ strm = &asoc->strmin[strmno];
+ /*
+ * If its a fragmented message, lets see if we can find the control
+ * on the reassembly queues.
+ */
+ if ((chtype == SCTP_IDATA) &&
+ ((chunk_flags & SCTP_DATA_FIRST_FRAG) == 0) &&
+ (fsn == 0)) {
+ /*
+ * The first *must* be fsn 0, and other (middle/end) pieces
+ * can *not* be fsn 0. XXX: This can happen in case of a
+ * wrap around. Ignore is for now.
+ */
+ snprintf(msg, sizeof(msg), "FSN zero for MID=%8.8x, but flags=%2.2x",
+ msg_id, chunk_flags);
+ goto err_out;
+ }
+ control = sctp_find_reasm_entry(strm, msg_id, ordered, old_data);
+ SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for control on queues %p\n",
+ chunk_flags, control);
+ if ((chunk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) {
+ /* See if we can find the re-assembly entity */
+ if (control != NULL) {
+ /* We found something, does it belong? */
+ if (ordered && (msg_id != control->sinfo_ssn)) {
+ snprintf(msg, sizeof(msg), "Reassembly problem (MID=%8.8x)", msg_id);
+ err_out:
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ return (0);
+ }
+ if (ordered && ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED)) {
+ /*
+ * We can't have a switched order with an
+ * unordered chunk
+ */
+ snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)",
+ tsn);
+ goto err_out;
+ }
+ if (!ordered && (((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) == 0)) {
+ /*
+ * We can't have a switched unordered with a
+ * ordered chunk
+ */
+ snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)",
+ tsn);
+ goto err_out;
+ }
+ }
+ } else {
+ /*
+ * Its a complete segment. Lets validate we don't have a
+ * re-assembly going on with the same Stream/Seq (for
+ * ordered) or in the same Stream for unordered.
+ */
+ if (control != NULL) {
+ if (ordered || (old_data == 0)) {
+ SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x dup detected on msg_id: %u\n",
+ chunk_flags, msg_id);
+ snprintf(msg, sizeof(msg), "Duplicate MID=%8.8x detected.", msg_id);
+ goto err_out;
+ } else {
+ if ((tsn == control->fsn_included + 1) &&
+ (control->end_added == 0)) {
+ snprintf(msg, sizeof(msg), "Illegal message sequence, missing end for MID: %8.8x", control->fsn_included);
+ goto err_out;
+ } else {
+ control = NULL;
+ }
+ }
+ }
+ }
/* now do the tests */
if (((asoc->cnt_on_all_streams +
asoc->cnt_on_reasm_queue +
@@ -1388,68 +1851,31 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
#endif
}
/* now is it in the mapping array of what we have accepted? */
- if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map) &&
- SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
- /* Nope not in the valid range dump it */
- sctp_set_rwnd(stcb, asoc);
- if ((asoc->cnt_on_all_streams +
- asoc->cnt_on_reasm_queue +
- asoc->cnt_msg_on_sb) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)) {
- SCTP_STAT_INCR(sctps_datadropchklmt);
- } else {
- SCTP_STAT_INCR(sctps_datadroprwnd);
+ if (nch == NULL) {
+ if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map) &&
+ SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
+ /* Nope not in the valid range dump it */
+ dump_packet:
+ sctp_set_rwnd(stcb, asoc);
+ if ((asoc->cnt_on_all_streams +
+ asoc->cnt_on_reasm_queue +
+ asoc->cnt_msg_on_sb) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)) {
+ SCTP_STAT_INCR(sctps_datadropchklmt);
+ } else {
+ SCTP_STAT_INCR(sctps_datadroprwnd);
+ }
+ *break_flag = 1;
+ return (0);
+ }
+ } else {
+ if (control == NULL) {
+ goto dump_packet;
+ }
+ if (SCTP_TSN_GT(fsn, control->top_fsn)) {
+ goto dump_packet;
}
- *break_flag = 1;
- return (0);
- }
- }
- strmno = ntohs(ch->dp.stream_id);
- if (strmno >= asoc->streamincnt) {
- struct sctp_paramhdr *phdr;
- struct mbuf *mb;
-
- mb = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) * 2),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (mb != NULL) {
- /* add some space up front so prepend will work well */
- SCTP_BUF_RESV_UF(mb, sizeof(struct sctp_chunkhdr));
- phdr = mtod(mb, struct sctp_paramhdr *);
- /*
- * Error causes are just param's and this one has
- * two back to back phdr, one with the error type
- * and size, the other with the streamid and a rsvd
- */
- SCTP_BUF_LEN(mb) = (sizeof(struct sctp_paramhdr) * 2);
- phdr->param_type = htons(SCTP_CAUSE_INVALID_STREAM);
- phdr->param_length =
- htons(sizeof(struct sctp_paramhdr) * 2);
- phdr++;
- /* We insert the stream in the type field */
- phdr->param_type = ch->dp.stream_id;
- /* And set the length to 0 for the rsvd field */
- phdr->param_length = 0;
- sctp_queue_op_err(stcb, mb);
- }
- SCTP_STAT_INCR(sctps_badsid);
- SCTP_TCB_LOCK_ASSERT(stcb);
- SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
- if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
- asoc->highest_tsn_inside_nr_map = tsn;
- }
- if (tsn == (asoc->cumulative_tsn + 1)) {
- /* Update cum-ack */
- asoc->cumulative_tsn = tsn;
}
- return (0);
}
- /*
- * Before we continue lets validate that we are not being fooled by
- * an evil attacker. We can only have 4k chunks based on our TSN
- * spread allowed by the mapping array 512 * 8 bits, so there is no
- * way our stream sequence numbers could have wrapped. We of course
- * only validate the FIRST fragment so the bit must be set.
- */
- strmseq = ntohs(ch->dp.stream_sequence);
#ifdef SCTP_ASOCLOG_OF_TSNS
SCTP_TCB_LOCK_ASSERT(stcb);
if (asoc->tsn_in_at >= SCTP_TSN_LOG_SIZE) {
@@ -1458,7 +1884,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
}
asoc->in_tsnlog[asoc->tsn_in_at].tsn = tsn;
asoc->in_tsnlog[asoc->tsn_in_at].strm = strmno;
- asoc->in_tsnlog[asoc->tsn_in_at].seq = strmseq;
+ asoc->in_tsnlog[asoc->tsn_in_at].seq = msg_id;
asoc->in_tsnlog[asoc->tsn_in_at].sz = chk_length;
asoc->in_tsnlog[asoc->tsn_in_at].flgs = chunk_flags;
asoc->in_tsnlog[asoc->tsn_in_at].stcb = (void *)stcb;
@@ -1466,18 +1892,26 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
asoc->in_tsnlog[asoc->tsn_in_at].in_out = 1;
asoc->tsn_in_at++;
#endif
+ /*
+ * Before we continue lets validate that we are not being fooled by
+ * an evil attacker. We can only have Nk chunks based on our TSN
+ * spread allowed by the mapping array N * 8 bits, so there is no
+ * way our stream sequence numbers could have wrapped. We of course
+ * only validate the FIRST fragment so the bit must be set.
+ */
if ((chunk_flags & SCTP_DATA_FIRST_FRAG) &&
(TAILQ_EMPTY(&asoc->resetHead)) &&
(chunk_flags & SCTP_DATA_UNORDERED) == 0 &&
- SCTP_SSN_GE(asoc->strmin[strmno].last_sequence_delivered, strmseq)) {
+ SCTP_MSGID_GE(old_data, asoc->strmin[strmno].last_sequence_delivered, msg_id)) {
/* The incoming sseq is behind where we last delivered? */
- SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ:%d delivered:%d from peer, Abort!\n",
- strmseq, asoc->strmin[strmno].last_sequence_delivered);
+ SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ: %u delivered: %u from peer, Abort!\n",
+ msg_id, asoc->strmin[strmno].last_sequence_delivered);
+
snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
asoc->strmin[strmno].last_sequence_delivered,
- tsn, strmno, strmseq);
+ tsn, strmno, msg_id);
op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
@@ -1486,21 +1920,24 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
* From here down we may find ch-> invalid
* so its a good idea NOT to use it.
*************************************/
-
- the_len = (chk_length - sizeof(struct sctp_data_chunk));
+ if (nch) {
+ the_len = (chk_length - sizeof(struct sctp_idata_chunk));
+ } else {
+ the_len = (chk_length - sizeof(struct sctp_data_chunk));
+ }
if (last_chunk == 0) {
- dmbuf = SCTP_M_COPYM(*m,
- (offset + sizeof(struct sctp_data_chunk)),
- the_len, M_DONTWAIT);
+ if (nch) {
+ dmbuf = SCTP_M_COPYM(*m,
+ (offset + sizeof(struct sctp_idata_chunk)),
+ the_len, M_NOWAIT);
+ } else {
+ dmbuf = SCTP_M_COPYM(*m,
+ (offset + sizeof(struct sctp_data_chunk)),
+ the_len, M_NOWAIT);
+ }
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = dmbuf; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(dmbuf, SCTP_MBUF_ICOPY);
}
#endif
} else {
@@ -1509,7 +1946,11 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
dmbuf = *m;
/* lop off the top part */
- m_adj(dmbuf, (offset + sizeof(struct sctp_data_chunk)));
+ if (nch) {
+ m_adj(dmbuf, (offset + sizeof(struct sctp_idata_chunk)));
+ } else {
+ m_adj(dmbuf, (offset + sizeof(struct sctp_data_chunk)));
+ }
if (SCTP_BUF_NEXT(dmbuf) == NULL) {
l_len = SCTP_BUF_LEN(dmbuf);
} else {
@@ -1533,11 +1974,36 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
SCTP_STAT_INCR(sctps_nomem);
return (0);
}
+ /*
+ * Now no matter what we need a control, get one if we don't have
+ * one (we may have gotten it above when we found the message was
+ * fragmented
+ */
+ if (control == NULL) {
+ sctp_alloc_a_readq(stcb, control);
+ sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
+ protocol_id,
+ strmno, msg_id,
+ chunk_flags,
+ NULL, fsn, msg_id);
+ if (control == NULL) {
+ SCTP_STAT_INCR(sctps_nomem);
+ return (0);
+ }
+ if ((chunk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) {
+ control->data = dmbuf;
+ control->tail_mbuf = NULL;
+ control->end_added = control->last_frag_seen = control->first_frag_seen = 1;
+ control->top_fsn = control->fsn_included = fsn;
+ }
+ created_control = 1;
+ }
+ SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x ordered: %d msgid: %u control: %p\n",
+ chunk_flags, ordered, msg_id, control);
if ((chunk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG &&
- asoc->fragmented_delivery_inprogress == 0 &&
TAILQ_EMPTY(&asoc->resetHead) &&
((ordered == 0) ||
- ((uint16_t) (asoc->strmin[strmno].last_sequence_delivered + 1) == strmseq &&
+ ((uint16_t) (asoc->strmin[strmno].last_sequence_delivered + 1) == msg_id &&
TAILQ_EMPTY(&asoc->strmin[strmno].inqueue)))) {
/* Candidate for express delivery */
/*
@@ -1547,109 +2013,30 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
* And there is room for it in the socket buffer. Lets just
* stuff it up the buffer....
*/
-
- /* It would be nice to avoid this copy if we could :< */
- sctp_alloc_a_readq(stcb, control);
- sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
- protocol_id,
- strmno, strmseq,
- chunk_flags,
- dmbuf);
- if (control == NULL) {
- goto failed_express_del;
- }
SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
asoc->highest_tsn_inside_nr_map = tsn;
}
+ SCTPDBG(SCTP_DEBUG_XXX, "Injecting control: %p to be read (msg_id: %u)\n",
+ control, msg_id);
+
sctp_add_to_readq(stcb->sctp_ep, stcb,
control, &stcb->sctp_socket->so_rcv,
1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
/* for ordered, bump what we delivered */
- asoc->strmin[strmno].last_sequence_delivered++;
+ strm->last_sequence_delivered++;
}
SCTP_STAT_INCR(sctps_recvexpress);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
- sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno,
+ sctp_log_strm_del_alt(stcb, tsn, msg_id, strmno,
SCTP_STR_LOG_FROM_EXPRS_DEL);
}
control = NULL;
-
goto finish_express_del;
}
-failed_express_del:
- /* If we reach here this is a new chunk */
- chk = NULL;
- control = NULL;
- /* Express for fragmented delivery? */
- if ((asoc->fragmented_delivery_inprogress) &&
- (stcb->asoc.control_pdapi) &&
- (asoc->str_of_pdapi == strmno) &&
- (asoc->ssn_of_pdapi == strmseq)
- ) {
- control = stcb->asoc.control_pdapi;
- if ((chunk_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
- /* Can't be another first? */
- goto failed_pdapi_express_del;
- }
- if (tsn == (control->sinfo_tsn + 1)) {
- /* Yep, we can add it on */
- int end = 0;
-
- if (chunk_flags & SCTP_DATA_LAST_FRAG) {
- end = 1;
- }
- if (sctp_append_to_readq(stcb->sctp_ep, stcb, control, dmbuf, end,
- tsn,
- &stcb->sctp_socket->so_rcv)) {
- SCTP_PRINTF("Append fails end:%d\n", end);
- goto failed_pdapi_express_del;
- }
- SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
- if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
- asoc->highest_tsn_inside_nr_map = tsn;
- }
- SCTP_STAT_INCR(sctps_recvexpressm);
- asoc->tsn_last_delivered = tsn;
- asoc->fragment_flags = chunk_flags;
- asoc->tsn_of_pdapi_last_delivered = tsn;
- asoc->last_flags_delivered = chunk_flags;
- asoc->last_strm_seq_delivered = strmseq;
- asoc->last_strm_no_delivered = strmno;
- if (end) {
- /* clean up the flags and such */
- asoc->fragmented_delivery_inprogress = 0;
- if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
- asoc->strmin[strmno].last_sequence_delivered++;
- }
- stcb->asoc.control_pdapi = NULL;
- if (TAILQ_EMPTY(&asoc->reasmqueue) == 0) {
- /*
- * There could be another message
- * ready
- */
- need_reasm_check = 1;
- }
- }
- control = NULL;
- goto finish_express_del;
- }
- }
-failed_pdapi_express_del:
- control = NULL;
- if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) {
- SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
- if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
- asoc->highest_tsn_inside_nr_map = tsn;
- }
- } else {
- SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap);
- if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map)) {
- asoc->highest_tsn_inside_map = tsn;
- }
- }
+ /* Now will we need a chunk too? */
if ((chunk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) {
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
@@ -1663,7 +2050,8 @@ failed_pdapi_express_del:
}
chk->rec.data.TSN_seq = tsn;
chk->no_fr_allowed = 0;
- chk->rec.data.stream_seq = strmseq;
+ chk->rec.data.fsn_num = fsn;
+ chk->rec.data.stream_seq = msg_id;
chk->rec.data.stream_number = strmno;
chk->rec.data.payloadtype = protocol_id;
chk->rec.data.context = stcb->asoc.context;
@@ -1672,193 +2060,110 @@ failed_pdapi_express_del:
chk->asoc = asoc;
chk->send_size = the_len;
chk->whoTo = net;
+ SCTPDBG(SCTP_DEBUG_XXX, "Building ck: %p for control: %p to be read (msg_id: %u)\n",
+ chk,
+ control, msg_id);
atomic_add_int(&net->ref_count, 1);
chk->data = dmbuf;
+ }
+ /* Set the appropriate TSN mark */
+ if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) {
+ SCTP_SET_TSN_PRESENT(asoc->nr_mapping_array, gap);
+ if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_nr_map)) {
+ asoc->highest_tsn_inside_nr_map = tsn;
+ }
} else {
- sctp_alloc_a_readq(stcb, control);
- sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
- protocol_id,
- strmno, strmseq,
- chunk_flags,
- dmbuf);
- if (control == NULL) {
- /* No memory so we drop the chunk */
- SCTP_STAT_INCR(sctps_nomem);
- if (last_chunk == 0) {
- /* we copied it, free the copy */
- sctp_m_freem(dmbuf);
- }
- return (0);
+ SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap);
+ if (SCTP_TSN_GT(tsn, asoc->highest_tsn_inside_map)) {
+ asoc->highest_tsn_inside_map = tsn;
}
- control->length = the_len;
}
-
- /* Mark it as received */
- /* Now queue it where it belongs */
- if (control != NULL) {
- /* First a sanity check */
- if (asoc->fragmented_delivery_inprogress) {
+ /* Now is it complete (i.e. not fragmented)? */
+ if ((chunk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) {
+ /*
+ * Special check for when streams are resetting. We could be
+ * more smart about this and check the actual stream to see
+ * if it is not being reset.. that way we would not create a
+ * HOLB when amongst streams being reset and those not being
+ * reset.
+ *
+ */
+ if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
+ SCTP_TSN_GT(tsn, liste->tsn)) {
/*
- * Ok, we have a fragmented delivery in progress if
- * this chunk is next to deliver OR belongs in our
- * view to the reassembly, the peer is evil or
- * broken.
+ * yep its past where we need to reset... go ahead
+ * and queue it.
*/
- uint32_t estimate_tsn;
-
- estimate_tsn = asoc->tsn_last_delivered + 1;
- if (TAILQ_EMPTY(&asoc->reasmqueue) &&
- (estimate_tsn == control->sinfo_tsn)) {
- /* Evil/Broke peer */
- sctp_m_freem(control->data);
- control->data = NULL;
- if (control->whoFrom) {
- sctp_free_remote_addr(control->whoFrom);
- control->whoFrom = NULL;
- }
- sctp_free_a_readq(stcb, control);
- snprintf(msg, sizeof(msg), "Reas. queue emtpy, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- tsn, strmno, strmseq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- if (last_chunk) {
- *m = NULL;
- }
- return (0);
+ if (TAILQ_EMPTY(&asoc->pending_reply_queue)) {
+ /* first one on */
+ TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
} else {
- if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
- sctp_m_freem(control->data);
- control->data = NULL;
- if (control->whoFrom) {
- sctp_free_remote_addr(control->whoFrom);
- control->whoFrom = NULL;
- }
- sctp_free_a_readq(stcb, control);
- snprintf(msg, sizeof(msg), "PD ongoing, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- tsn, strmno, strmseq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- if (last_chunk) {
- *m = NULL;
+ struct sctp_queued_to_read *ctlOn, *nctlOn;
+ unsigned char inserted = 0;
+
+ TAILQ_FOREACH_SAFE(ctlOn, &asoc->pending_reply_queue, next, nctlOn) {
+ if (SCTP_TSN_GT(control->sinfo_tsn, ctlOn->sinfo_tsn)) {
+
+ continue;
+ } else {
+ /* found it */
+ TAILQ_INSERT_BEFORE(ctlOn, control, next);
+ inserted = 1;
+ break;
}
- return (0);
}
- }
- } else {
- /* No PDAPI running */
- if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
- /*
- * Reassembly queue is NOT empty validate
- * that this tsn does not need to be in
- * reasembly queue. If it does then our peer
- * is broken or evil.
- */
- if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
- sctp_m_freem(control->data);
- control->data = NULL;
- if (control->whoFrom) {
- sctp_free_remote_addr(control->whoFrom);
- control->whoFrom = NULL;
- }
- sctp_free_a_readq(stcb, control);
- snprintf(msg, sizeof(msg), "No PD ongoing, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
- tsn, strmno, strmseq);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
- if (last_chunk) {
- *m = NULL;
- }
- return (0);
+ if (inserted == 0) {
+ /*
+ * must be put at end, use prevP
+ * (all setup from loop) to setup
+ * nextP.
+ */
+ TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
}
}
+ goto finish_express_del;
}
- /* ok, if we reach here we have passed the sanity checks */
if (chunk_flags & SCTP_DATA_UNORDERED) {
/* queue directly into socket buffer */
+ SCTPDBG(SCTP_DEBUG_XXX, "Unordered data to be read control: %p msg_id: %u\n",
+ control, msg_id);
sctp_mark_non_revokable(asoc, control->sinfo_tsn);
sctp_add_to_readq(stcb->sctp_ep, stcb,
control,
- &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
- } else {
- /*
- * Special check for when streams are resetting. We
- * could be more smart about this and check the
- * actual stream to see if it is not being reset..
- * that way we would not create a HOLB when amongst
- * streams being reset and those not being reset.
- *
- * We take complete messages that have a stream reset
- * intervening (aka the TSN is after where our
- * cum-ack needs to be) off and put them on a
- * pending_reply_queue. The reassembly ones we do
- * not have to worry about since they are all sorted
- * and proceessed by TSN order. It is only the
- * singletons I must worry about.
- */
- if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
- SCTP_TSN_GT(tsn, liste->tsn)) {
- /*
- * yep its past where we need to reset... go
- * ahead and queue it.
- */
- if (TAILQ_EMPTY(&asoc->pending_reply_queue)) {
- /* first one on */
- TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
- } else {
- struct sctp_queued_to_read *ctlOn,
- *nctlOn;
- unsigned char inserted = 0;
+ &stcb->sctp_socket->so_rcv, 1,
+ SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
- TAILQ_FOREACH_SAFE(ctlOn, &asoc->pending_reply_queue, next, nctlOn) {
- if (SCTP_TSN_GT(control->sinfo_tsn, ctlOn->sinfo_tsn)) {
- continue;
- } else {
- /* found it */
- TAILQ_INSERT_BEFORE(ctlOn, control, next);
- inserted = 1;
- break;
- }
- }
- if (inserted == 0) {
- /*
- * must be put at end, use
- * prevP (all setup from
- * loop) to setup nextP.
- */
- TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
- }
- }
- } else {
- sctp_queue_data_to_stream(stcb, asoc, control, abort_flag);
- if (*abort_flag) {
- if (last_chunk) {
- *m = NULL;
- }
- return (0);
+ } else {
+ SCTPDBG(SCTP_DEBUG_XXX, "Queue control: %p for reordering msg_id: %u\n", control,
+ msg_id);
+ sctp_queue_data_to_stream(stcb, strm, asoc, control, abort_flag, &need_reasm_check);
+ if (*abort_flag) {
+ if (last_chunk) {
+ *m = NULL;
}
+ return (0);
}
}
- } else {
- /* Into the re-assembly queue */
- sctp_queue_data_for_reasm(stcb, asoc, chk, abort_flag);
- if (*abort_flag) {
- /*
- * the assoc is now gone and chk was put onto the
- * reasm queue, which has all been freed.
- */
- if (last_chunk) {
- *m = NULL;
- }
- return (0);
+ goto finish_express_del;
+ }
+ /* If we reach here its a reassembly */
+ need_reasm_check = 1;
+ SCTPDBG(SCTP_DEBUG_XXX,
+ "Queue data to stream for reasm control: %p msg_id: %u\n",
+ control, msg_id);
+ sctp_queue_data_for_reasm(stcb, asoc, strm, control, chk, created_control, abort_flag, tsn);
+ if (*abort_flag) {
+ /*
+ * the assoc is now gone and chk was put onto the reasm
+ * queue, which has all been freed.
+ */
+ if (last_chunk) {
+ *m = NULL;
}
+ return (0);
}
finish_express_del:
+ /* Here we tidy up things */
if (tsn == (asoc->cumulative_tsn + 1)) {
/* Update cum-ack */
asoc->cumulative_tsn = tsn;
@@ -1874,7 +2179,7 @@ finish_express_del:
SCTP_STAT_INCR(sctps_recvdata);
/* Set it present please */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
- sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno, SCTP_STR_LOG_FROM_MARK_TSN);
+ sctp_log_strm_del_alt(stcb, tsn, msg_id, strmno, SCTP_STR_LOG_FROM_MARK_TSN);
}
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn,
@@ -1893,6 +2198,7 @@ finish_express_del:
sctp_reset_in_stream(stcb, liste->number_entries, liste->list_of_streams);
TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+ sctp_send_deferred_reset_response(stcb, liste, SCTP_STREAM_RESET_RESULT_PERFORMED);
SCTP_FREE(liste, SCTP_M_STRESET);
/* sa_ignore FREED_MEMORY */
liste = TAILQ_FIRST(&asoc->resetHead);
@@ -1900,7 +2206,7 @@ finish_express_del:
/* All can be removed */
TAILQ_FOREACH_SAFE(ctl, &asoc->pending_reply_queue, next, nctl) {
TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
- sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+ sctp_queue_data_to_stream(stcb, strm, asoc, ctl, abort_flag, &need_reasm_check);
if (*abort_flag) {
return (0);
}
@@ -1916,7 +2222,7 @@ finish_express_del:
* ctl->sinfo_tsn > liste->tsn
*/
TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
- sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+ sctp_queue_data_to_stream(stcb, strm, asoc, ctl, abort_flag, &need_reasm_check);
if (*abort_flag) {
return (0);
}
@@ -1926,17 +2232,17 @@ finish_express_del:
* Now service re-assembly to pick up anything that has been
* held on reassembly queue?
*/
- sctp_deliver_reasm_check(stcb, asoc);
+ (void)sctp_deliver_reasm_check(stcb, asoc, strm, SCTP_READ_LOCK_NOT_HELD);
need_reasm_check = 0;
}
if (need_reasm_check) {
/* Another one waits ? */
- sctp_deliver_reasm_check(stcb, asoc);
+ (void)sctp_deliver_reasm_check(stcb, asoc, strm, SCTP_READ_LOCK_NOT_HELD);
}
return (1);
}
-int8_t sctp_map_lookup_tab[256] = {
+static const int8_t sctp_map_lookup_tab[256] = {
0, 1, 0, 2, 0, 1, 0, 3,
0, 1, 0, 2, 0, 1, 0, 4,
0, 1, 0, 2, 0, 1, 0, 3,
@@ -1980,7 +2286,7 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb)
* 1) Did we move the cum-ack point?
*
* When you first glance at this you might think that all entries that
- * make up the postion of the cum-ack would be in the nr-mapping
+ * make up the position of the cum-ack would be in the nr-mapping
* array only.. i.e. things up to the cum-ack are always
* deliverable. Thats true with one exception, when its a fragmented
* message we may not deliver the data until some threshold (or all
@@ -2078,7 +2384,7 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb)
#ifdef INVARIANTS
panic("impossible slide");
#else
- SCTP_PRINTF("impossible slide lgap:%x slide_end:%x slide_from:%x? at:%d\n",
+ SCTP_PRINTF("impossible slide lgap: %x slide_end: %x slide_from: %x? at: %d\n",
lgap, slide_end, slide_from, at);
return;
#endif
@@ -2087,7 +2393,7 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb)
#ifdef INVARIANTS
panic("would overrun buffer");
#else
- SCTP_PRINTF("Gak, would have overrun map end:%d slide_end:%d\n",
+ SCTP_PRINTF("Gak, would have overrun map end: %d slide_end: %d\n",
asoc->mapping_array_size, slide_end);
slide_end = asoc->mapping_array_size;
#endif
@@ -2166,7 +2472,8 @@ sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap)
*/
if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_18);
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_17);
}
sctp_send_shutdown(stcb,
((stcb->asoc.alternate) ? stcb->asoc.alternate : stcb->asoc.primary_destination));
@@ -2231,76 +2538,12 @@ sctp_sack_check(struct sctp_tcb *stcb, int was_a_gap)
}
}
-void
-sctp_service_queues(struct sctp_tcb *stcb, struct sctp_association *asoc)
-{
- struct sctp_tmit_chunk *chk;
- uint32_t tsize, pd_point;
- uint16_t nxt_todel;
-
- if (asoc->fragmented_delivery_inprogress) {
- sctp_service_reassembly(stcb, asoc);
- }
- /* Can we proceed further, i.e. the PD-API is complete */
- if (asoc->fragmented_delivery_inprogress) {
- /* no */
- return;
- }
- /*
- * Now is there some other chunk I can deliver from the reassembly
- * queue.
- */
-doit_again:
- chk = TAILQ_FIRST(&asoc->reasmqueue);
- if (chk == NULL) {
- asoc->size_on_reasm_queue = 0;
- asoc->cnt_on_reasm_queue = 0;
- return;
- }
- nxt_todel = asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
- if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
- ((nxt_todel == chk->rec.data.stream_seq) ||
- (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
- /*
- * Yep the first one is here. We setup to start reception,
- * by backing down the TSN just in case we can't deliver.
- */
-
- /*
- * Before we start though either all of the message should
- * be here or the socket buffer max or nothing on the
- * delivery queue and something can be delivered.
- */
- if (stcb->sctp_socket) {
- pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT,
- stcb->sctp_ep->partial_delivery_point);
- } else {
- pd_point = stcb->sctp_ep->partial_delivery_point;
- }
- if (sctp_is_all_msg_on_reasm(asoc, &tsize) || (tsize >= pd_point)) {
- asoc->fragmented_delivery_inprogress = 1;
- asoc->tsn_last_delivered = chk->rec.data.TSN_seq - 1;
- asoc->str_of_pdapi = chk->rec.data.stream_number;
- asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
- asoc->pdapi_ppid = chk->rec.data.payloadtype;
- asoc->fragment_flags = chk->rec.data.rcv_flags;
- sctp_service_reassembly(stcb, asoc);
- if (asoc->fragmented_delivery_inprogress == 0) {
- goto doit_again;
- }
- }
- }
-}
-
int
sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
- struct sockaddr *src, struct sockaddr *dst,
- struct sctphdr *sh, struct sctp_inpcb *inp,
- struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t * high_tsn,
- uint8_t use_mflowid, uint32_t mflowid,
- uint32_t vrf_id, uint16_t port)
+ struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, uint32_t * high_tsn)
{
- struct sctp_data_chunk *ch, chunk_buf;
+ struct sctp_chunkhdr *ch, chunk_buf;
struct sctp_association *asoc;
int num_chunks = 0; /* number of control chunks processed */
int stop_proc = 0;
@@ -2338,7 +2581,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
*/
if (SCTP_BUF_LEN(m) < (long)MLEN && SCTP_BUF_NEXT(m) == NULL) {
/* we only handle mbufs that are singletons.. not chains */
- m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_DONTWAIT, 1, MT_DATA);
+ m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_NOWAIT, 1, MT_DATA);
if (m) {
/* ok lets see if we can copy the data up */
caddr_t *from, *to;
@@ -2350,7 +2593,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
/* copy the length and free up the old */
SCTP_BUF_LEN(m) = SCTP_BUF_LEN((*mm));
sctp_m_freem(*mm);
- /* sucess, back copy */
+ /* success, back copy */
*mm = m;
} else {
/* We are in trouble in the mbuf world .. yikes */
@@ -2358,8 +2601,8 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
}
}
/* get pointer to the first chunk header */
- ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
- sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_chunkhdr), (uint8_t *) & chunk_buf);
if (ch == NULL) {
return (1);
}
@@ -2371,14 +2614,44 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
asoc->data_pkts_seen++;
while (stop_proc == 0) {
/* validate chunk length */
- chk_length = ntohs(ch->ch.chunk_length);
+ chk_length = ntohs(ch->chunk_length);
if (length - *offset < chk_length) {
/* all done, mutulated chunk */
stop_proc = 1;
continue;
}
- if (ch->ch.chunk_type == SCTP_DATA) {
- if ((size_t)chk_length < sizeof(struct sctp_data_chunk)) {
+ if ((asoc->idata_supported == 1) &&
+ (ch->chunk_type == SCTP_DATA)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
+ snprintf(msg, sizeof(msg), "%s", "I-DATA chunk received when DATA was negotiated");
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_18;
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ return (2);
+ }
+ if ((asoc->idata_supported == 0) &&
+ (ch->chunk_type == SCTP_IDATA)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
+ snprintf(msg, sizeof(msg), "%s", "DATA chunk received when I-DATA was negotiated");
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ return (2);
+ }
+ if ((ch->chunk_type == SCTP_DATA) ||
+ (ch->chunk_type == SCTP_IDATA)) {
+ int clen;
+
+ if (ch->chunk_type == SCTP_DATA) {
+ clen = sizeof(struct sctp_data_chunk);
+ } else {
+ clen = sizeof(struct sctp_idata_chunk);
+ }
+ if (chk_length < clen) {
/*
* Need to send an abort since we had a
* invalid data chunk.
@@ -2389,26 +2662,8 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
snprintf(msg, sizeof(msg), "DATA chunk of length %d",
chk_length);
op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
- sctp_abort_association(inp, stcb, m, iphlen,
- src, dst, sh, op_err,
- use_mflowid, mflowid,
- vrf_id, port);
- return (2);
- }
- if ((size_t)chk_length == sizeof(struct sctp_data_chunk)) {
- /*
- * Need to send an abort since we had an
- * empty data chunk.
- */
- struct mbuf *op_err;
-
- op_err = sctp_generate_no_user_data_cause(ch->dp.tsn);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
- sctp_abort_association(inp, stcb, m, iphlen,
- src, dst, sh, op_err,
- use_mflowid, mflowid,
- vrf_id, port);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_20;
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
return (2);
}
#ifdef SCTP_AUDITING_ENABLED
@@ -2419,9 +2674,9 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
} else {
last_chunk = 0;
}
- if (sctp_process_a_data_chunk(stcb, asoc, mm, *offset, ch,
+ if (sctp_process_a_data_chunk(stcb, asoc, mm, *offset,
chk_length, net, high_tsn, &abort_flag, &break_flag,
- last_chunk)) {
+ last_chunk, ch->chunk_type)) {
num_chunks++;
}
if (abort_flag)
@@ -2437,7 +2692,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
}
} else {
/* not a data chunk in the data region */
- switch (ch->ch.chunk_type) {
+ switch (ch->chunk_type) {
case SCTP_INITIATION:
case SCTP_INITIATION_ACK:
case SCTP_SELECTIVE_ACK:
@@ -2459,64 +2714,50 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
case SCTP_STREAM_RESET:
case SCTP_FORWARD_CUM_TSN:
case SCTP_ASCONF:
- /*
- * Now, what do we do with KNOWN chunks that
- * are NOT in the right place?
- *
- * For now, I do nothing but ignore them. We
- * may later want to add sysctl stuff to
- * switch out and do either an ABORT() or
- * possibly process them.
- */
- if (SCTP_BASE_SYSCTL(sctp_strict_data_order)) {
+ {
+ /*
+ * Now, what do we do with KNOWN
+ * chunks that are NOT in the right
+ * place?
+ *
+ * For now, I do nothing but ignore
+ * them. We may later want to add
+ * sysctl stuff to switch out and do
+ * either an ABORT() or possibly
+ * process them.
+ */
struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, "");
- sctp_abort_association(inp, stcb,
- m, iphlen,
- src, dst,
- sh, op_err,
- use_mflowid, mflowid,
- vrf_id, port);
+ snprintf(msg, sizeof(msg), "DATA chunk followed by chunk of type %2.2x",
+ ch->chunk_type);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
return (2);
}
- break;
default:
/* unknown chunk type, use bit rules */
- if (ch->ch.chunk_type & 0x40) {
+ if (ch->chunk_type & 0x40) {
/* Add a error report to the queue */
- struct mbuf *merr;
- struct sctp_paramhdr *phd;
-
- merr = sctp_get_mbuf_for_msg(sizeof(*phd), 0, M_DONTWAIT, 1, MT_DATA);
- if (merr) {
- phd = mtod(merr, struct sctp_paramhdr *);
- /*
- * We cheat and use param
- * type since we did not
- * bother to define a error
- * cause struct. They are
- * the same basic format
- * with different names.
- */
- phd->param_type =
- htons(SCTP_CAUSE_UNRECOG_CHUNK);
- phd->param_length =
- htons(chk_length + sizeof(*phd));
- SCTP_BUF_LEN(merr) = sizeof(*phd);
- SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT);
- if (SCTP_BUF_NEXT(merr)) {
- if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(merr), SCTP_SIZE32(chk_length) - chk_length, NULL)) {
- sctp_m_freem(merr);
- } else {
- sctp_queue_op_err(stcb, merr);
- }
+ struct mbuf *op_err;
+ struct sctp_gen_error_cause *cause;
+
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause),
+ 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ cause = mtod(op_err, struct sctp_gen_error_cause *);
+ cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+ cause->length = htons((uint16_t) (chk_length + sizeof(struct sctp_gen_error_cause)));
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
+ SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, chk_length, M_NOWAIT);
+ if (SCTP_BUF_NEXT(op_err) != NULL) {
+ sctp_queue_op_err(stcb, op_err);
} else {
- sctp_m_freem(merr);
+ sctp_m_freem(op_err);
}
}
}
- if ((ch->ch.chunk_type & 0x80) == 0) {
+ if ((ch->chunk_type & 0x80) == 0) {
/* discard the rest of this packet */
stop_proc = 1;
} /* else skip this bad chunk and
@@ -2530,8 +2771,8 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
stop_proc = 1;
continue;
}
- ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
- sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_chunkhdr), (uint8_t *) & chunk_buf);
if (ch == NULL) {
*offset = length;
stop_proc = 1;
@@ -2561,9 +2802,6 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_last_rcvd);
}
/* now service all of the reassm queue if needed */
- if (!(TAILQ_EMPTY(&asoc->reasmqueue)))
- sctp_service_queues(stcb, asoc);
-
if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
/* Assure that we ack right away */
stcb->asoc.send_sack = 1;
@@ -2604,12 +2842,14 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
* cumack trackers for first transmissions,
* and retransmissions.
*/
- if ((tp1->whoTo->find_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ (tp1->whoTo->find_pseudo_cumack == 1) &&
(tp1->snd_count == 1)) {
tp1->whoTo->pseudo_cumack = tp1->rec.data.TSN_seq;
tp1->whoTo->find_pseudo_cumack = 0;
}
- if ((tp1->whoTo->find_rtx_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ (tp1->whoTo->find_rtx_pseudo_cumack == 1) &&
(tp1->snd_count > 1)) {
tp1->whoTo->rtx_pseudo_cumack = tp1->rec.data.TSN_seq;
tp1->whoTo->find_rtx_pseudo_cumack = 0;
@@ -2697,7 +2937,7 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_GAP,
tp1->whoTo->flight_size,
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
}
sctp_flight_size_decrease(tp1);
@@ -2772,6 +3012,11 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number);
#endif
}
+ if ((stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+ (stcb->asoc.strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&stcb->asoc.strmout[tp1->rec.data.stream_number].outqueue)) {
+ stcb->asoc.trigger_reset = 1;
+ }
tp1->sent = SCTP_DATAGRAM_NR_ACKED;
if (tp1->data) {
/*
@@ -2901,7 +3146,7 @@ sctp_check_for_revoked(struct sctp_tcb *stcb,
sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
tp1->whoTo->flight_size,
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
}
sctp_flight_size_increase(tp1);
@@ -2961,7 +3206,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
num_dests_sacked++;
}
}
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported) {
(void)SCTP_GETTIME_TIMEVAL(&now);
}
TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
@@ -2982,7 +3227,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
/* done */
break;
}
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported) {
if ((PR_SCTP_TTL_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
/* Is it expired? */
if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
@@ -3215,7 +3460,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND,
(tp1->whoTo ? (tp1->whoTo->flight_size) : 0),
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
}
if (tp1->whoTo) {
@@ -3236,7 +3481,7 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
/* remove from the total flight */
sctp_total_flight_decrease(stcb, tp1);
- if ((stcb->asoc.peer_supports_prsctp) &&
+ if ((stcb->asoc.prsctp_supported) &&
(PR_SCTP_RTX_ENABLED(tp1->flags))) {
/*
* Has it been retransmitted tv_sec times? -
@@ -3381,7 +3626,7 @@ sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb,
struct timeval now;
int now_filled = 0;
- if (asoc->peer_supports_prsctp == 0) {
+ if (asoc->prsctp_supported == 0) {
return (NULL);
}
TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) {
@@ -3467,18 +3712,24 @@ sctp_fs_audit(struct sctp_association *asoc)
{
struct sctp_tmit_chunk *chk;
int inflight = 0, resend = 0, inbetween = 0, acked = 0, above = 0;
- int entry_flight, entry_cnt, ret;
+ int ret;
+
+#ifndef INVARIANTS
+ int entry_flight, entry_cnt;
+#endif
+
+ ret = 0;
+#ifndef INVARIANTS
entry_flight = asoc->total_flight;
entry_cnt = asoc->total_flight_count;
- ret = 0;
-
+#endif
if (asoc->pr_sctp_cnt >= asoc->sent_queue_cnt)
return (0);
TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
if (chk->sent < SCTP_DATAGRAM_RESEND) {
- SCTP_PRINTF("Chk TSN:%u size:%d inflight cnt:%d\n",
+ SCTP_PRINTF("Chk TSN: %u size: %d inflight cnt: %d\n",
chk->rec.data.TSN_seq,
chk->send_size,
chk->snd_count);
@@ -3498,10 +3749,10 @@ sctp_fs_audit(struct sctp_association *asoc)
#ifdef INVARIANTS
panic("Flight size-express incorrect? \n");
#else
- SCTP_PRINTF("asoc->total_flight:%d cnt:%d\n",
+ SCTP_PRINTF("asoc->total_flight: %d cnt: %d\n",
entry_flight, entry_cnt);
- SCTP_PRINTF("Flight size-express incorrect F:%d I:%d R:%d Ab:%d ACK:%d\n",
+ SCTP_PRINTF("Flight size-express incorrect F: %d I: %d R: %d Ab: %d ACK: %d\n",
inflight, inbetween, resend, above, acked);
ret = 1;
#endif
@@ -3519,9 +3770,9 @@ sctp_window_probe_recovery(struct sctp_tcb *stcb,
if ((tp1->sent >= SCTP_DATAGRAM_ACKED) || (tp1->data == NULL)) {
/* TSN's skipped we do NOT move back. */
sctp_misc_ints(SCTP_FLIGHT_LOG_DWN_WP_FWD,
- tp1->whoTo->flight_size,
+ tp1->whoTo ? tp1->whoTo->flight_size : 0,
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
return;
}
@@ -3540,7 +3791,7 @@ sctp_window_probe_recovery(struct sctp_tcb *stcb,
sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_WP,
tp1->whoTo->flight_size,
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
}
}
@@ -3557,6 +3808,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
int win_probe_recovered = 0;
int j, done_once = 0;
int rto_ok = 1;
+ uint32_t send_s;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_SACK_ARRIVALS_ENABLE) {
sctp_misc_ints(SCTP_SACK_LOG_EXPRESS, cumack,
@@ -3608,36 +3860,25 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
(*stcb->asoc.cc_functions.sctp_cwnd_prepare_net_for_sack) (stcb, net);
}
}
- if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) {
- uint32_t send_s;
-
- if (!TAILQ_EMPTY(&asoc->sent_queue)) {
- tp1 = TAILQ_LAST(&asoc->sent_queue,
- sctpchunk_listhead);
- send_s = tp1->rec.data.TSN_seq + 1;
- } else {
- send_s = asoc->sending_seq;
- }
- if (SCTP_TSN_GE(cumack, send_s)) {
-#ifndef INVARIANTS
- struct mbuf *op_err;
- char msg[SCTP_DIAG_INFO_LEN];
-
-#endif
-#ifdef INVARIANTS
- panic("Impossible sack 1");
-#else
+ if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+ tp1 = TAILQ_LAST(&asoc->sent_queue,
+ sctpchunk_listhead);
+ send_s = tp1->rec.data.TSN_seq + 1;
+ } else {
+ send_s = asoc->sending_seq;
+ }
+ if (SCTP_TSN_GE(cumack, send_s)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
- *abort_now = 1;
- /* XXX */
- snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal then TSN %8.8x",
- cumack, send_s);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- return;
-#endif
- }
+ *abort_now = 1;
+ /* XXX */
+ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
+ cumack, send_s);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_21;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ return;
}
asoc->this_sack_highest_gap = cumack;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
@@ -3666,7 +3907,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
tp1->whoTo->flight_size,
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
}
sctp_flight_size_decrease(tp1);
@@ -3746,6 +3987,11 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
#endif
}
}
+ if ((asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+ (asoc->strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.stream_number].outqueue)) {
+ asoc->trigger_reset = 1;
+ }
TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
if (tp1->data) {
/* sa_ignore NO_NULL_CHK */
@@ -3830,7 +4076,9 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
}
if (net->dest_state & SCTP_ADDR_PF) {
net->dest_state &= ~SCTP_ADDR_PF;
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
/* Done with this net */
@@ -3916,7 +4164,7 @@ again:
} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
stcb, net,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_23);
}
}
}
@@ -3957,28 +4205,8 @@ again:
if ((asoc->stream_queue_cnt == 1) &&
((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
(asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
- (asoc->locked_on_sending)
- ) {
- struct sctp_stream_queue_pending *sp;
-
- /*
- * I may be in a state where we got all across.. but
- * cannot write more due to a shutdown... we abort
- * since the user did not indicate EOR in this case.
- * The sp will be cleaned during free of the asoc.
- */
- sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
- sctp_streamhead);
- if ((sp) && (sp->length == 0)) {
- /* Let cleanup code purge it */
- if (sp->msg_is_complete) {
- asoc->stream_queue_cnt--;
- } else {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
- asoc->locked_on_sending = NULL;
- asoc->stream_queue_cnt--;
- }
- }
+ ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
}
if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
(asoc->stream_queue_cnt == 0)) {
@@ -3992,6 +4220,7 @@ again:
op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24;
sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ return;
} else {
struct sctp_nets *netp;
@@ -4043,7 +4272,7 @@ again:
asoc->advanced_peer_ack_point = cumack;
}
/* PR-Sctp issues need to be addressed too */
- if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+ if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) {
struct sctp_tmit_chunk *lchk;
uint32_t old_adv_peer_ack_point;
@@ -4173,40 +4402,38 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
sctp_log_fr(*dupdata, 0, 0, SCTP_FR_DUPED);
}
}
- if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) {
- /* reality check */
- if (!TAILQ_EMPTY(&asoc->sent_queue)) {
- tp1 = TAILQ_LAST(&asoc->sent_queue,
- sctpchunk_listhead);
- send_s = tp1->rec.data.TSN_seq + 1;
- } else {
- tp1 = NULL;
- send_s = asoc->sending_seq;
- }
- if (SCTP_TSN_GE(cum_ack, send_s)) {
- struct mbuf *op_err;
- char msg[SCTP_DIAG_INFO_LEN];
+ /* reality check */
+ if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+ tp1 = TAILQ_LAST(&asoc->sent_queue,
+ sctpchunk_listhead);
+ send_s = tp1->rec.data.TSN_seq + 1;
+ } else {
+ tp1 = NULL;
+ send_s = asoc->sending_seq;
+ }
+ if (SCTP_TSN_GE(cum_ack, send_s)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
- /*
- * no way, we have not even sent this TSN out yet.
- * Peer is hopelessly messed up with us.
- */
- SCTP_PRINTF("NEW cum_ack:%x send_s:%x is smaller or equal\n",
- cum_ack, send_s);
- if (tp1) {
- SCTP_PRINTF("Got send_s from tsn:%x + 1 of tp1:%p\n",
- tp1->rec.data.TSN_seq, (void *)tp1);
- }
- hopeless_peer:
- *abort_now = 1;
- /* XXX */
- snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal then TSN %8.8x",
- cum_ack, send_s);
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
- sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
- return;
- }
+ /*
+ * no way, we have not even sent this TSN out yet. Peer is
+ * hopelessly messed up with us.
+ */
+ SCTP_PRINTF("NEW cum_ack:%x send_s:%x is smaller or equal\n",
+ cum_ack, send_s);
+ if (tp1) {
+ SCTP_PRINTF("Got send_s from tsn:%x + 1 of tp1: %p\n",
+ tp1->rec.data.TSN_seq, (void *)tp1);
+ }
+hopeless_peer:
+ *abort_now = 1;
+ /* XXX */
+ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
+ cum_ack, send_s);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ return;
}
/**********************/
/* 1) check the range */
@@ -4299,7 +4526,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
tp1->whoTo->flight_size,
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
}
sctp_flight_size_decrease(tp1);
@@ -4416,20 +4643,18 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
num_seg, num_nr_seg, &rto_ok)) {
wake_him++;
}
- if (SCTP_BASE_SYSCTL(sctp_strict_sacks)) {
+ /*
+ * validate the biggest_tsn_acked in the gap acks if strict
+ * adherence is wanted.
+ */
+ if (SCTP_TSN_GE(biggest_tsn_acked, send_s)) {
/*
- * validate the biggest_tsn_acked in the gap acks if
- * strict adherence is wanted.
+ * peer is either confused or we are under attack.
+ * We must abort.
*/
- if (SCTP_TSN_GE(biggest_tsn_acked, send_s)) {
- /*
- * peer is either confused or we are under
- * attack. We must abort.
- */
- SCTP_PRINTF("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n",
- biggest_tsn_acked, send_s);
- goto hopeless_peer;
- }
+ SCTP_PRINTF("Hopeless peer! biggest_tsn_acked:%x largest seq:%x\n",
+ biggest_tsn_acked, send_s);
+ goto hopeless_peer;
}
}
/*******************************************/
@@ -4469,6 +4694,11 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
#endif
}
}
+ if ((asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+ (asoc->strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.stream_number].outqueue)) {
+ asoc->trigger_reset = 1;
+ }
TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
if (PR_SCTP_ENABLED(tp1->flags)) {
if (asoc->pr_sctp_cnt != 0)
@@ -4480,7 +4710,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
sctp_free_bufspace(stcb, asoc, tp1, 1);
sctp_m_freem(tp1->data);
tp1->data = NULL;
- if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(tp1->flags)) {
+ if (asoc->prsctp_supported && PR_SCTP_BUF_ENABLED(tp1->flags)) {
asoc->sent_queue_cnt_removeable--;
}
}
@@ -4497,7 +4727,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
}
if (TAILQ_EMPTY(&asoc->sent_queue) && (asoc->total_flight > 0)) {
#ifdef INVARIANTS
- panic("Warning flight size is postive and should be 0");
+ panic("Warning flight size is positive and should be 0");
#else
SCTP_PRINTF("Warning flight size incorrect should be 0 is %d\n",
asoc->total_flight);
@@ -4567,7 +4797,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
tp1->whoTo->flight_size,
tp1->book_size,
- (uintptr_t) tp1->whoTo,
+ (uint32_t) (uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
}
sctp_flight_size_increase(tp1);
@@ -4620,7 +4850,9 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
}
if (net->dest_state & SCTP_ADDR_PF) {
net->dest_state &= ~SCTP_ADDR_PF;
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_29);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
/* Done with this net */
@@ -4643,7 +4875,8 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
/* stop all timers */
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
- stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
+ stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
net->flight_size = 0;
net->partial_bytes_acked = 0;
}
@@ -4668,26 +4901,8 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
if ((asoc->stream_queue_cnt == 1) &&
((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
(asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
- (asoc->locked_on_sending)
- ) {
- struct sctp_stream_queue_pending *sp;
-
- /*
- * I may be in a state where we got all across.. but
- * cannot write more due to a shutdown... we abort
- * since the user did not indicate EOR in this case.
- */
- sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
- sctp_streamhead);
- if ((sp) && (sp->length == 0)) {
- asoc->locked_on_sending = NULL;
- if (sp->msg_is_complete) {
- asoc->stream_queue_cnt--;
- } else {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
- asoc->stream_queue_cnt--;
- }
- }
+ ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc))) {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
}
if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
(asoc->stream_queue_cnt == 0)) {
@@ -4851,7 +5066,7 @@ again:
} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
stcb, net,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
}
}
}
@@ -4892,7 +5107,7 @@ again:
asoc->advanced_peer_ack_point = cum_ack;
}
/* C2. try to further move advancedPeerAckPoint ahead */
- if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+ if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) {
struct sctp_tmit_chunk *lchk;
uint32_t old_adv_peer_ack_point;
@@ -4952,134 +5167,219 @@ sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb,
{
struct sctp_queued_to_read *ctl, *nctl;
struct sctp_association *asoc;
- uint16_t tt;
+ uint32_t tt;
+ int need_reasm_check = 0, old;
asoc = &stcb->asoc;
tt = strmin->last_sequence_delivered;
+ if (asoc->idata_supported) {
+ old = 0;
+ } else {
+ old = 1;
+ }
/*
* First deliver anything prior to and including the stream no that
- * came in
+ * came in.
*/
- TAILQ_FOREACH_SAFE(ctl, &strmin->inqueue, next, nctl) {
- if (SCTP_SSN_GE(tt, ctl->sinfo_ssn)) {
+ TAILQ_FOREACH_SAFE(ctl, &strmin->inqueue, next_instrm, nctl) {
+ if (SCTP_MSGID_GE(old, tt, ctl->sinfo_ssn)) {
/* this is deliverable now */
- TAILQ_REMOVE(&strmin->inqueue, ctl, next);
- /* subtract pending on streams */
- asoc->size_on_all_streams -= ctl->length;
- sctp_ucount_decr(asoc->cnt_on_all_streams);
- /* deliver it to at least the delivery-q */
- if (stcb->sctp_socket) {
- sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
- sctp_add_to_readq(stcb->sctp_ep, stcb,
- ctl,
- &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED);
+ if (((ctl->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) {
+ if (ctl->on_strm_q) {
+ if (ctl->on_strm_q == SCTP_ON_ORDERED) {
+ TAILQ_REMOVE(&strmin->inqueue, ctl, next_instrm);
+ } else if (ctl->on_strm_q == SCTP_ON_UNORDERED) {
+ TAILQ_REMOVE(&strmin->uno_inqueue, ctl, next_instrm);
+#ifdef INVARIANTS
+ } else {
+ panic("strmin: %p ctl: %p unknown %d",
+ strmin, ctl, ctl->on_strm_q);
+#endif
+ }
+ ctl->on_strm_q = 0;
+ }
+ /* subtract pending on streams */
+ asoc->size_on_all_streams -= ctl->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ /* deliver it to at least the delivery-q */
+ if (stcb->sctp_socket) {
+ sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ ctl,
+ &stcb->sctp_socket->so_rcv,
+ 1, SCTP_READ_LOCK_HELD,
+ SCTP_SO_NOT_LOCKED);
+ }
+ } else {
+ /* Its a fragmented message */
+ if (ctl->first_frag_seen) {
+ /*
+ * Make it so this is next to
+ * deliver, we restore later
+ */
+ strmin->last_sequence_delivered = ctl->sinfo_ssn - 1;
+ need_reasm_check = 1;
+ break;
+ }
}
} else {
/* no more delivery now. */
break;
}
}
+ if (need_reasm_check) {
+ int ret;
+
+ ret = sctp_deliver_reasm_check(stcb, &stcb->asoc, strmin, SCTP_READ_LOCK_HELD);
+ if (SCTP_MSGID_GT(old, tt, strmin->last_sequence_delivered)) {
+ /* Restore the next to deliver unless we are ahead */
+ strmin->last_sequence_delivered = tt;
+ }
+ if (ret == 0) {
+ /* Left the front Partial one on */
+ return;
+ }
+ need_reasm_check = 0;
+ }
/*
* now we must deliver things in queue the normal way if any are
* now ready.
*/
tt = strmin->last_sequence_delivered + 1;
- TAILQ_FOREACH_SAFE(ctl, &strmin->inqueue, next, nctl) {
+ TAILQ_FOREACH_SAFE(ctl, &strmin->inqueue, next_instrm, nctl) {
if (tt == ctl->sinfo_ssn) {
- /* this is deliverable now */
- TAILQ_REMOVE(&strmin->inqueue, ctl, next);
- /* subtract pending on streams */
- asoc->size_on_all_streams -= ctl->length;
- sctp_ucount_decr(asoc->cnt_on_all_streams);
- /* deliver it to at least the delivery-q */
- strmin->last_sequence_delivered = ctl->sinfo_ssn;
- if (stcb->sctp_socket) {
- sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
- sctp_add_to_readq(stcb->sctp_ep, stcb,
- ctl,
- &stcb->sctp_socket->so_rcv, 1, SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED);
+ if (((ctl->sinfo_flags >> 8) & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG) {
+ /* this is deliverable now */
+ if (ctl->on_strm_q) {
+ if (ctl->on_strm_q == SCTP_ON_ORDERED) {
+ TAILQ_REMOVE(&strmin->inqueue, ctl, next_instrm);
+ } else if (ctl->on_strm_q == SCTP_ON_UNORDERED) {
+ TAILQ_REMOVE(&strmin->uno_inqueue, ctl, next_instrm);
+#ifdef INVARIANTS
+ } else {
+ panic("strmin: %p ctl: %p unknown %d",
+ strmin, ctl, ctl->on_strm_q);
+#endif
+ }
+ ctl->on_strm_q = 0;
+ }
+ /* subtract pending on streams */
+ asoc->size_on_all_streams -= ctl->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ /* deliver it to at least the delivery-q */
+ strmin->last_sequence_delivered = ctl->sinfo_ssn;
+ if (stcb->sctp_socket) {
+ sctp_mark_non_revokable(asoc, ctl->sinfo_tsn);
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ ctl,
+ &stcb->sctp_socket->so_rcv, 1,
+ SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED);
+ }
+ tt = strmin->last_sequence_delivered + 1;
+ } else {
+ /* Its a fragmented message */
+ if (ctl->first_frag_seen) {
+ /*
+ * Make it so this is next to
+ * deliver
+ */
+ strmin->last_sequence_delivered = ctl->sinfo_ssn - 1;
+ need_reasm_check = 1;
+ break;
+ }
}
- tt = strmin->last_sequence_delivered + 1;
} else {
break;
}
}
+ if (need_reasm_check) {
+ (void)sctp_deliver_reasm_check(stcb, &stcb->asoc, strmin, SCTP_READ_LOCK_HELD);
+ }
}
+
+
static void
sctp_flush_reassm_for_str_seq(struct sctp_tcb *stcb,
struct sctp_association *asoc,
- uint16_t stream, uint16_t seq)
+ uint16_t stream, uint32_t seq, int ordered, int old, uint32_t cumtsn)
{
+ struct sctp_queued_to_read *control;
+ struct sctp_stream_in *strm;
struct sctp_tmit_chunk *chk, *nchk;
+ int cnt_removed = 0;
- /* For each one on here see if we need to toss it */
/*
- * For now large messages held on the reasmqueue that are complete
+ * For now large messages held on the stream reasm that are complete
* will be tossed too. We could in theory do more work to spin
* through and stop after dumping one msg aka seeing the start of a
* new msg at the head, and call the delivery function... to see if
* it can be delivered... But for now we just dump everything on the
* queue.
*/
- TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) {
- /*
- * Do not toss it if on a different stream or marked for
- * unordered delivery in which case the stream sequence
- * number has no meaning.
- */
- if ((chk->rec.data.stream_number != stream) ||
- ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == SCTP_DATA_UNORDERED)) {
- continue;
- }
- if (chk->rec.data.stream_seq == seq) {
- /* It needs to be tossed */
- TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
- if (SCTP_TSN_GT(chk->rec.data.TSN_seq, asoc->tsn_last_delivered)) {
- asoc->tsn_last_delivered = chk->rec.data.TSN_seq;
- asoc->str_of_pdapi = chk->rec.data.stream_number;
- asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
- asoc->fragment_flags = chk->rec.data.rcv_flags;
- }
- asoc->size_on_reasm_queue -= chk->send_size;
- sctp_ucount_decr(asoc->cnt_on_reasm_queue);
-
- /* Clear up any stream problem */
- if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) != SCTP_DATA_UNORDERED &&
- SCTP_SSN_GT(chk->rec.data.stream_seq, asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered)) {
- /*
- * We must dump forward this streams
- * sequence number if the chunk is not
- * unordered that is being skipped. There is
- * a chance that if the peer does not
- * include the last fragment in its FWD-TSN
- * we WILL have a problem here since you
- * would have a partial chunk in queue that
- * may not be deliverable. Also if a Partial
- * delivery API as started the user may get
- * a partial chunk. The next read returning
- * a new chunk... really ugly but I see no
- * way around it! Maybe a notify??
- */
- asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered = chk->rec.data.stream_seq;
- }
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
+ strm = &asoc->strmin[stream];
+ control = sctp_find_reasm_entry(strm, (uint32_t) seq, ordered, old);
+ if (control == NULL) {
+ /* Not found */
+ return;
+ }
+ TAILQ_FOREACH_SAFE(chk, &control->reasm, sctp_next, nchk) {
+ /* Purge hanging chunks */
+ if (old && (ordered == 0)) {
+ if (SCTP_TSN_GT(chk->rec.data.TSN_seq, cumtsn)) {
+ break;
}
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- } else if (SCTP_SSN_GT(chk->rec.data.stream_seq, seq)) {
- /*
- * If the stream_seq is > than the purging one, we
- * are done
- */
- break;
}
+ cnt_removed++;
+ TAILQ_REMOVE(&control->reasm, chk, sctp_next);
+ asoc->size_on_reasm_queue -= chk->send_size;
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
+ }
+ if (!TAILQ_EMPTY(&control->reasm)) {
+ /* This has to be old data, unordered */
+ if (control->data) {
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ }
+ sctp_reset_a_control(control, stcb->sctp_ep, cumtsn);
+ chk = TAILQ_FIRST(&control->reasm);
+ if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+ TAILQ_REMOVE(&control->reasm, chk, sctp_next);
+ sctp_add_chk_to_control(control, strm, stcb, asoc,
+ chk, SCTP_READ_LOCK_HELD);
+ }
+ sctp_deliver_reasm_check(stcb, asoc, strm, SCTP_READ_LOCK_HELD);
+ return;
+ }
+ if (control->on_strm_q == SCTP_ON_ORDERED) {
+ TAILQ_REMOVE(&strm->inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+ } else if (control->on_strm_q == SCTP_ON_UNORDERED) {
+ TAILQ_REMOVE(&strm->uno_inqueue, control, next_instrm);
+ control->on_strm_q = 0;
+#ifdef INVARIANTS
+ } else if (control->on_strm_q) {
+ panic("strm: %p ctl: %p unknown %d",
+ strm, control, control->on_strm_q);
+#endif
+ }
+ control->on_strm_q = 0;
+ if (control->on_read_q == 0) {
+ sctp_free_remote_addr(control->whoFrom);
+ if (control->data) {
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ }
+ sctp_free_a_readq(stcb, control);
}
}
-
void
sctp_handle_forward_tsn(struct sctp_tcb *stcb,
struct sctp_forward_tsn_chunk *fwd,
@@ -5102,7 +5402,6 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb,
unsigned int i, fwd_sz, m_size;
uint32_t str_seq;
struct sctp_stream_in *strm;
- struct sctp_tmit_chunk *chk, *nchk;
struct sctp_queued_to_read *ctl, *sv;
asoc = &stcb->asoc;
@@ -5172,66 +5471,17 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb,
/*************************************************************/
/* 2. Clear up re-assembly queue */
/*************************************************************/
- /*
- * First service it if pd-api is up, just in case we can progress it
- * forward
- */
- if (asoc->fragmented_delivery_inprogress) {
- sctp_service_reassembly(stcb, asoc);
- }
- /* For each one on here see if we need to toss it */
- /*
- * For now large messages held on the reasmqueue that are complete
- * will be tossed too. We could in theory do more work to spin
- * through and stop after dumping one msg aka seeing the start of a
- * new msg at the head, and call the delivery function... to see if
- * it can be delivered... But for now we just dump everything on the
- * queue.
- */
- TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) {
- if (SCTP_TSN_GE(new_cum_tsn, chk->rec.data.TSN_seq)) {
- /* It needs to be tossed */
- TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
- if (SCTP_TSN_GT(chk->rec.data.TSN_seq, asoc->tsn_last_delivered)) {
- asoc->tsn_last_delivered = chk->rec.data.TSN_seq;
- asoc->str_of_pdapi = chk->rec.data.stream_number;
- asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
- asoc->fragment_flags = chk->rec.data.rcv_flags;
- }
- asoc->size_on_reasm_queue -= chk->send_size;
- sctp_ucount_decr(asoc->cnt_on_reasm_queue);
-
- /* Clear up any stream problem */
- if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) != SCTP_DATA_UNORDERED &&
- SCTP_SSN_GT(chk->rec.data.stream_seq, asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered)) {
- /*
- * We must dump forward this streams
- * sequence number if the chunk is not
- * unordered that is being skipped. There is
- * a chance that if the peer does not
- * include the last fragment in its FWD-TSN
- * we WILL have a problem here since you
- * would have a partial chunk in queue that
- * may not be deliverable. Also if a Partial
- * delivery API as started the user may get
- * a partial chunk. The next read returning
- * a new chunk... really ugly but I see no
- * way around it! Maybe a notify??
- */
- asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered = chk->rec.data.stream_seq;
- }
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
- }
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- } else {
- /*
- * Ok we have gone beyond the end of the fwd-tsn's
- * mark.
- */
- break;
+
+ /* This is now done as part of clearing up the stream/seq */
+ if (asoc->idata_supported == 0) {
+ uint16_t sid;
+
+ /* Flush all the un-ordered data based on cum-tsn */
+ SCTP_INP_READ_LOCK(stcb->sctp_ep);
+ for (sid = 0; sid < asoc->streamincnt; sid++) {
+ sctp_flush_reassm_for_str_seq(stcb, asoc, sid, 0, 0, 1, new_cum_tsn);
}
+ SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
}
/*******************************************************/
/* 3. Update the PR-stream re-ordering queues and fix */
@@ -5241,27 +5491,53 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb,
if (m && fwd_sz) {
/* New method. */
unsigned int num_str;
+ uint32_t sequence;
+ uint16_t stream;
+ uint16_t ordered, flags;
+ int old;
struct sctp_strseq *stseq, strseqbuf;
+ struct sctp_strseq_mid *stseq_m, strseqbuf_m;
offset += sizeof(*fwd);
SCTP_INP_READ_LOCK(stcb->sctp_ep);
- num_str = fwd_sz / sizeof(struct sctp_strseq);
+ if (asoc->idata_supported) {
+ num_str = fwd_sz / sizeof(struct sctp_strseq_mid);
+ old = 0;
+ } else {
+ num_str = fwd_sz / sizeof(struct sctp_strseq);
+ old = 1;
+ }
for (i = 0; i < num_str; i++) {
- uint16_t st;
-
- stseq = (struct sctp_strseq *)sctp_m_getptr(m, offset,
- sizeof(struct sctp_strseq),
- (uint8_t *) & strseqbuf);
- offset += sizeof(struct sctp_strseq);
- if (stseq == NULL) {
- break;
+ if (asoc->idata_supported) {
+ stseq_m = (struct sctp_strseq_mid *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_strseq_mid),
+ (uint8_t *) & strseqbuf_m);
+ offset += sizeof(struct sctp_strseq_mid);
+ if (stseq_m == NULL) {
+ break;
+ }
+ stream = ntohs(stseq_m->stream);
+ sequence = ntohl(stseq_m->msg_id);
+ flags = ntohs(stseq_m->flags);
+ if (flags & PR_SCTP_UNORDERED_FLAG) {
+ ordered = 0;
+ } else {
+ ordered = 1;
+ }
+ } else {
+ stseq = (struct sctp_strseq *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_strseq),
+ (uint8_t *) & strseqbuf);
+ offset += sizeof(struct sctp_strseq);
+ if (stseq == NULL) {
+ break;
+ }
+ stream = ntohs(stseq->stream);
+ sequence = (uint32_t) ntohs(stseq->sequence);
+ ordered = 1;
}
/* Convert */
- st = ntohs(stseq->stream);
- stseq->stream = st;
- st = ntohs(stseq->sequence);
- stseq->sequence = st;
/* now process */
@@ -5270,12 +5546,12 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb,
* queue where its not all delivered. If we find it
* we transmute the read entry into a PDI_ABORTED.
*/
- if (stseq->stream >= asoc->streamincnt) {
+ if (stream >= asoc->streamincnt) {
/* screwed up streams, stop! */
break;
}
- if ((asoc->str_of_pdapi == stseq->stream) &&
- (asoc->ssn_of_pdapi == stseq->sequence)) {
+ if ((asoc->str_of_pdapi == stream) &&
+ (asoc->ssn_of_pdapi == sequence)) {
/*
* If this is the one we were partially
* delivering now then we no longer are.
@@ -5284,14 +5560,38 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb,
*/
asoc->fragmented_delivery_inprogress = 0;
}
- sctp_flush_reassm_for_str_seq(stcb, asoc, stseq->stream, stseq->sequence);
+ strm = &asoc->strmin[stream];
+ if (asoc->idata_supported == 0) {
+ uint16_t strm_at;
+
+ for (strm_at = strm->last_sequence_delivered; SCTP_MSGID_GE(1, sequence, strm_at); strm_at++) {
+ sctp_flush_reassm_for_str_seq(stcb, asoc, stream, strm_at, ordered, old, new_cum_tsn);
+ }
+ } else {
+ uint32_t strm_at;
+
+ for (strm_at = strm->last_sequence_delivered; SCTP_MSGID_GE(0, sequence, strm_at); strm_at++) {
+ sctp_flush_reassm_for_str_seq(stcb, asoc, stream, strm_at, ordered, old, new_cum_tsn);
+ }
+ }
TAILQ_FOREACH(ctl, &stcb->sctp_ep->read_queue, next) {
- if ((ctl->sinfo_stream == stseq->stream) &&
- (ctl->sinfo_ssn == stseq->sequence)) {
- str_seq = (stseq->stream << 16) | stseq->sequence;
- ctl->end_added = 1;
+ if ((ctl->sinfo_stream == stream) &&
+ (ctl->sinfo_ssn == sequence)) {
+ str_seq = (stream << 16) | (0x0000ffff & sequence);
ctl->pdapi_aborted = 1;
sv = stcb->asoc.control_pdapi;
+ ctl->end_added = 1;
+ if (ctl->on_strm_q == SCTP_ON_ORDERED) {
+ TAILQ_REMOVE(&strm->inqueue, ctl, next_instrm);
+ } else if (ctl->on_strm_q == SCTP_ON_UNORDERED) {
+ TAILQ_REMOVE(&strm->uno_inqueue, ctl, next_instrm);
+#ifdef INVARIANTS
+ } else if (ctl->on_strm_q) {
+ panic("strm: %p ctl: %p unknown %d",
+ strm, ctl, ctl->on_strm_q);
+#endif
+ }
+ ctl->on_strm_q = 0;
stcb->asoc.control_pdapi = ctl;
sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
stcb,
@@ -5300,16 +5600,15 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb,
SCTP_SO_NOT_LOCKED);
stcb->asoc.control_pdapi = sv;
break;
- } else if ((ctl->sinfo_stream == stseq->stream) &&
- SCTP_SSN_GT(ctl->sinfo_ssn, stseq->sequence)) {
+ } else if ((ctl->sinfo_stream == stream) &&
+ SCTP_MSGID_GT(old, ctl->sinfo_ssn, sequence)) {
/* We are past our victim SSN */
break;
}
}
- strm = &asoc->strmin[stseq->stream];
- if (SCTP_SSN_GT(stseq->sequence, strm->last_sequence_delivered)) {
+ if (SCTP_MSGID_GT(old, sequence, strm->last_sequence_delivered)) {
/* Update the sequence number */
- strm->last_sequence_delivered = stseq->sequence;
+ strm->last_sequence_delivered = sequence;
}
/* now kick the stream the new way */
/* sa_ignore NO_NULL_CHK */
@@ -5321,10 +5620,4 @@ sctp_handle_forward_tsn(struct sctp_tcb *stcb,
* Now slide thing forward.
*/
sctp_slide_mapping_arrays(stcb);
-
- if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
- /* now lets kick out and check for more fragmented delivery */
- /* sa_ignore NO_NULL_CHK */
- sctp_deliver_reasm_check(stcb, &stcb->asoc);
- }
}
diff --git a/freebsd/sys/netinet/sctp_indata.h b/freebsd/sys/netinet/sctp_indata.h
index 79a86e2a..162ca905 100644
--- a/freebsd/sys/netinet/sctp_indata.h
+++ b/freebsd/sys/netinet/sctp_indata.h
@@ -43,35 +43,31 @@ sctp_build_readq_entry(struct sctp_tcb *stcb,
struct sctp_nets *net,
uint32_t tsn, uint32_t ppid,
uint32_t context, uint16_t stream_no,
- uint16_t stream_seq, uint8_t flags,
+ uint32_t stream_seq, uint8_t flags,
struct mbuf *dm);
-#define sctp_build_readq_entry_mac(_ctl, in_it, context, net, tsn, ppid, stream_no, stream_seq, flags, dm) do { \
+#define sctp_build_readq_entry_mac(_ctl, in_it, context, net, tsn, ppid, stream_no, stream_seq, flags, dm, tfsn, msgid) do { \
if (_ctl) { \
atomic_add_int(&((net)->ref_count), 1); \
+ memset(_ctl, 0, sizeof(struct sctp_queued_to_read)); \
(_ctl)->sinfo_stream = stream_no; \
(_ctl)->sinfo_ssn = stream_seq; \
+ TAILQ_INIT(&_ctl->reasm); \
+ (_ctl)->top_fsn = tfsn; \
+ (_ctl)->msg_id = msgid; \
(_ctl)->sinfo_flags = (flags << 8); \
(_ctl)->sinfo_ppid = ppid; \
(_ctl)->sinfo_context = context; \
- (_ctl)->sinfo_timetolive = 0; \
+ (_ctl)->fsn_included = 0xffffffff; \
+ (_ctl)->top_fsn = 0xffffffff; \
(_ctl)->sinfo_tsn = tsn; \
(_ctl)->sinfo_cumtsn = tsn; \
(_ctl)->sinfo_assoc_id = sctp_get_associd((in_it)); \
- (_ctl)->length = 0; \
- (_ctl)->held_length = 0; \
(_ctl)->whoFrom = net; \
(_ctl)->data = dm; \
- (_ctl)->tail_mbuf = NULL; \
- (_ctl)->aux_data = NULL; \
(_ctl)->stcb = (in_it); \
(_ctl)->port_from = (in_it)->rport; \
- (_ctl)->spec_flags = 0; \
- (_ctl)->do_not_ref_stcb = 0; \
- (_ctl)->end_added = 0; \
- (_ctl)->pdapi_aborted = 0; \
- (_ctl)->some_taken = 0; \
} \
} while (0)
@@ -112,12 +108,8 @@ void
int
sctp_process_data(struct mbuf **, int, int *, int,
- struct sockaddr *src, struct sockaddr *dst,
- struct sctphdr *,
struct sctp_inpcb *, struct sctp_tcb *,
- struct sctp_nets *, uint32_t *,
- uint8_t, uint32_t,
- uint32_t, uint16_t);
+ struct sctp_nets *, uint32_t *);
void sctp_slide_mapping_arrays(struct sctp_tcb *stcb);
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
index 9e35c882..621784ea 100644
--- a/freebsd/sys/netinet/sctp_input.c
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -49,7 +49,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_timer.h>
#include <netinet/sctp_crc32.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
#include <sys/smp.h>
@@ -85,8 +87,8 @@ static void
sctp_handle_init(struct mbuf *m, int iphlen, int offset,
struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
struct sctp_init_chunk *cp, struct sctp_inpcb *inp,
- struct sctp_tcb *stcb, int *abort_no_unlock,
- uint8_t use_mflowid, uint32_t mflowid,
+ struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_init *init;
@@ -101,7 +103,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -113,7 +115,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
/* protocol error... send abort */
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -123,7 +125,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
/* invalid parameter... send abort */
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -133,7 +135,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
/* protocol error... send abort */
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -143,7 +145,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
/* protocol error... send abort */
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -155,7 +157,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
"Problem with AUTH parameters");
sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -186,7 +188,7 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
"No listener");
sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
}
goto outnow;
@@ -198,9 +200,9 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
} else {
SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
- sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, src, dst,
- sh, cp,
- use_mflowid, mflowid,
+ sctp_send_initiate_ack(inp, stcb, net, m, iphlen, offset,
+ src, dst, sh, cp,
+ mflowtype, mflowid,
vrf_id, port,
((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
}
@@ -221,18 +223,18 @@ sctp_is_there_unsent_data(struct sctp_tcb *stcb, int so_locked
#endif
)
{
- int unsent_data = 0;
+ int unsent_data;
unsigned int i;
struct sctp_stream_queue_pending *sp;
struct sctp_association *asoc;
/*
- * This function returns the number of streams that have true unsent
- * data on them. Note that as it looks through it will clean up any
- * places that have old data that has been sent but left at top of
- * stream queue.
+ * This function returns if any stream has true unsent data on it.
+ * Note that as it looks through it will clean up any places that
+ * have old data that has been sent but left at top of stream queue.
*/
asoc = &stcb->asoc;
+ unsent_data = 0;
SCTP_TCB_SEND_LOCK(stcb);
if (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
/* Check to see if some data queued */
@@ -260,6 +262,7 @@ sctp_is_there_unsent_data(struct sctp_tcb *stcb, int so_locked
}
atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1);
TAILQ_REMOVE(&stcb->asoc.strmout[i].outqueue, sp, next);
+ stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, &asoc->strmout[i], sp, 1);
if (sp->net) {
sctp_free_remote_addr(sp->net);
sp->net = NULL;
@@ -269,8 +272,13 @@ sctp_is_there_unsent_data(struct sctp_tcb *stcb, int so_locked
sp->data = NULL;
}
sctp_free_a_strmoq(stcb, sp, so_locked);
+ if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ unsent_data++;
+ }
} else {
unsent_data++;
+ }
+ if (unsent_data > 0) {
break;
}
}
@@ -341,8 +349,9 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
for (i = newcnt; i < asoc->pre_open_streams; i++) {
outs = &asoc->strmout[i];
TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
+ atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1);
TAILQ_REMOVE(&outs->outqueue, sp, next);
- asoc->stream_queue_cnt--;
+ stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 1);
sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL,
stcb, 0, sp, SCTP_SO_NOT_LOCKED);
if (sp->data) {
@@ -357,14 +366,19 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
sctp_free_a_strmoq(stcb, sp, SCTP_SO_NOT_LOCKED);
/* sa_ignore FREED_MEMORY */
}
+ outs->state = SCTP_STREAM_CLOSED;
}
}
/* cut back the count */
asoc->pre_open_streams = newcnt;
}
SCTP_TCB_SEND_UNLOCK(stcb);
- asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams;
-
+ asoc->streamoutcnt = asoc->pre_open_streams;
+ if (asoc->strmout) {
+ for (i = 0; i < asoc->streamoutcnt; i++) {
+ asoc->strmout[i].state = SCTP_STREAM_OPEN;
+ }
+ }
/* EY - nr_sack: initialize highest tsn in nr_mapping_array */
asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
@@ -381,17 +395,9 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
if (asoc->strmin != NULL) {
/* Free the old ones */
- struct sctp_queued_to_read *ctl, *nctl;
-
for (i = 0; i < asoc->streamincnt; i++) {
- TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[i].inqueue, next, nctl) {
- TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next);
- sctp_free_remote_addr(ctl->whoFrom);
- ctl->whoFrom = NULL;
- sctp_m_freem(ctl->data);
- ctl->data = NULL;
- sctp_free_a_readq(stcb, ctl);
- }
+ sctp_clean_up_stream(stcb, &asoc->strmin[i].inqueue);
+ sctp_clean_up_stream(stcb, &asoc->strmin[i].uno_inqueue);
}
SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
}
@@ -409,8 +415,10 @@ sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
}
for (i = 0; i < asoc->streamincnt; i++) {
asoc->strmin[i].stream_no = i;
- asoc->strmin[i].last_sequence_delivered = 0xffff;
+ asoc->strmin[i].last_sequence_delivered = 0xffffffff;
TAILQ_INIT(&asoc->strmin[i].inqueue);
+ TAILQ_INIT(&asoc->strmin[i].uno_inqueue);
+ asoc->strmin[i].pd_api_started = 0;
asoc->strmin[i].delivery_started = 0;
}
/*
@@ -434,7 +442,7 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
struct sctp_nets *net, int *abort_no_unlock,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id)
{
struct sctp_association *asoc;
@@ -466,7 +474,7 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
/* load all addresses */
if ((retval = sctp_load_addresses_from_init(stcb, m,
(offset + sizeof(struct sctp_init_chunk)), initack_limit,
- src, dst, NULL))) {
+ src, dst, NULL, stcb->asoc.port))) {
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
"Problem with address parameters");
SCTPDBG(SCTP_DEBUG_INPUT1,
@@ -474,13 +482,13 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
retval);
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
/* if the peer doesn't support asconf, flush the asconf queue */
- if (asoc->peer_supports_asconf == 0) {
+ if (asoc->asconf_supported == 0) {
struct sctp_asconf_addr *param, *nparam;
TAILQ_FOREACH_SAFE(param, &asoc->asconf_queue, next, nparam) {
@@ -513,12 +521,11 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
* primary.
*/
sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb,
- asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
+ asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
/* calculate the RTO */
net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy,
SCTP_RTT_FROM_NON_DATA);
-
retval = sctp_send_cookie_echo(m, offset, stcb, net);
if (retval < 0) {
/*
@@ -527,29 +534,25 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
* abandon the peer, its broke.
*/
if (retval == -3) {
+ uint16_t len;
+
+ len = (uint16_t) (sizeof(struct sctp_error_missing_param) + sizeof(uint16_t));
/* We abort with an error of missing mandatory param */
- op_err = sctp_generate_cause(SCTP_CAUSE_MISSING_PARAM, "");
- if (op_err) {
- /*
- * Expand beyond to include the mandatory
- * param cookie
- */
- struct sctp_inv_mandatory_param *mp;
+ op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ struct sctp_error_missing_param *cause;
- SCTP_BUF_LEN(op_err) =
- sizeof(struct sctp_inv_mandatory_param);
- mp = mtod(op_err,
- struct sctp_inv_mandatory_param *);
+ SCTP_BUF_LEN(op_err) = len;
+ cause = mtod(op_err, struct sctp_error_missing_param *);
/* Subtract the reserved param */
- mp->length =
- htons(sizeof(struct sctp_inv_mandatory_param) - 2);
- mp->num_param = htonl(1);
- mp->param = htons(SCTP_STATE_COOKIE);
- mp->resv = 0;
+ cause->cause.code = htons(SCTP_CAUSE_MISSING_PARAM);
+ cause->cause.length = htons(len);
+ cause->num_missing_params = htonl(1);
+ cause->type[0] = htons(SCTP_STATE_COOKIE);
}
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
}
@@ -562,21 +565,12 @@ static void
sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
struct sctp_tcb *stcb, struct sctp_nets *net)
{
- struct sockaddr_storage store;
+ union sctp_sockstore store;
struct sctp_nets *r_net, *f_net;
struct timeval tv;
int req_prim = 0;
uint16_t old_error_counter;
-#ifdef INET
- struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-
-#endif
-
if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) {
/* Invalid length */
return;
@@ -586,12 +580,11 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
#ifdef INET
case AF_INET:
if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) {
- sin = (struct sockaddr_in *)&store;
- sin->sin_family = cp->heartbeat.hb_info.addr_family;
- sin->sin_len = cp->heartbeat.hb_info.addr_len;
- sin->sin_port = stcb->rport;
- memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address,
- sizeof(sin->sin_addr));
+ store.sin.sin_family = cp->heartbeat.hb_info.addr_family;
+ store.sin.sin_len = cp->heartbeat.hb_info.addr_len;
+ store.sin.sin_port = stcb->rport;
+ memcpy(&store.sin.sin_addr, cp->heartbeat.hb_info.address,
+ sizeof(store.sin.sin_addr));
} else {
return;
}
@@ -600,12 +593,10 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
#ifdef INET6
case AF_INET6:
if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) {
- sin6 = (struct sockaddr_in6 *)&store;
- sin6->sin6_family = cp->heartbeat.hb_info.addr_family;
- sin6->sin6_len = cp->heartbeat.hb_info.addr_len;
- sin6->sin6_port = stcb->rport;
- memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address,
- sizeof(sin6->sin6_addr));
+ store.sin6.sin6_family = cp->heartbeat.hb_info.addr_family;
+ store.sin6.sin6_len = cp->heartbeat.hb_info.addr_len;
+ store.sin6.sin6_port = stcb->rport;
+ memcpy(&store.sin6.sin6_addr, cp->heartbeat.hb_info.address, sizeof(struct in6_addr));
} else {
return;
}
@@ -614,7 +605,7 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
default:
return;
}
- r_net = sctp_findnet(stcb, (struct sockaddr *)&store);
+ r_net = sctp_findnet(stcb, &store.sa);
if (r_net == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n");
return;
@@ -634,7 +625,7 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
if (f_net != r_net) {
/*
* first one on the list is NOT the primary
- * sctp_cmpaddr() is much more efficent if
+ * sctp_cmpaddr() is much more efficient if
* the primary is the first on the list,
* make it so.
*/
@@ -645,7 +636,8 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
}
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb,
+ r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
}
old_error_counter = r_net->error_count;
@@ -666,7 +658,8 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
stcb->asoc.cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
}
if (old_error_counter > 0) {
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+ stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_5);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
}
if (r_net == stcb->asoc.primary_destination) {
@@ -685,7 +678,9 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
sctp_is_mobility_feature_on(stcb->sctp_ep,
SCTP_MOBILITY_PRIM_DELETED)) {
- sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+ sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED,
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
if (sctp_is_mobility_feature_on(stcb->sctp_ep,
SCTP_MOBILITY_FASTHANDOFF)) {
sctp_assoc_immediate_retrans(stcb,
@@ -756,7 +751,7 @@ sctp_handle_nat_missing_state(struct sctp_tcb *stcb,
* return 0 means we want you to proceed with the abort non-zero
* means no abort processing
*/
- if (stcb->asoc.peer_supports_auth == 0) {
+ if (stcb->asoc.auth_supported == 0) {
SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_nat_missing_state: Peer does not support AUTH, cannot send an asconf\n");
return (0);
}
@@ -786,10 +781,10 @@ sctp_handle_abort(struct sctp_abort_chunk *abort,
* Need to check the cause codes for our two magic nat
* aborts which don't kill the assoc necessarily.
*/
- struct sctp_missing_nat_state *natc;
+ struct sctp_gen_error_cause *cause;
- natc = (struct sctp_missing_nat_state *)(abort + 1);
- error = ntohs(natc->cause);
+ cause = (struct sctp_gen_error_cause *)(abort + 1);
+ error = ntohs(cause->code);
if (error == SCTP_CAUSE_NAT_COLLIDING_STATE) {
SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
abort->ch.chunk_flags);
@@ -807,7 +802,8 @@ sctp_handle_abort(struct sctp_abort_chunk *abort,
error = 0;
}
/* stop any receive timers */
- sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_7);
/* notify user of the abort and clean up... */
sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED);
/* free the tcb */
@@ -829,7 +825,7 @@ sctp_handle_abort(struct sctp_abort_chunk *abort,
#endif
stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -871,6 +867,7 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
{
struct sctp_association *asoc;
int some_on_streamwheel;
+ int old_state;
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
@@ -889,17 +886,37 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
/* Shutdown NOT the expected size */
return;
- } else {
- sctp_update_acked(stcb, cp, abort_flag);
- if (*abort_flag) {
- return;
- }
+ }
+ old_state = SCTP_GET_STATE(asoc);
+ sctp_update_acked(stcb, cp, abort_flag);
+ if (*abort_flag) {
+ return;
}
if (asoc->control_pdapi) {
/*
* With a normal shutdown we assume the end of last record.
*/
SCTP_INP_READ_LOCK(stcb->sctp_ep);
+ if (asoc->control_pdapi->on_strm_q) {
+ struct sctp_stream_in *strm;
+
+ strm = &asoc->strmin[asoc->control_pdapi->sinfo_stream];
+ if (asoc->control_pdapi->on_strm_q == SCTP_ON_UNORDERED) {
+ /* Unordered */
+ TAILQ_REMOVE(&strm->uno_inqueue, asoc->control_pdapi, next_instrm);
+ asoc->control_pdapi->on_strm_q = 0;
+ } else if (asoc->control_pdapi->on_strm_q == SCTP_ON_ORDERED) {
+ /* Ordered */
+ TAILQ_REMOVE(&strm->inqueue, asoc->control_pdapi, next_instrm);
+ asoc->control_pdapi->on_strm_q = 0;
+#ifdef INVARIANTS
+ } else {
+ panic("Unknown state on ctrl:%p on_strm_q:%d",
+ asoc->control_pdapi,
+ asoc->control_pdapi->on_strm_q);
+#endif
+ }
+ }
asoc->control_pdapi->end_added = 1;
asoc->control_pdapi->pdapi_aborted = 1;
asoc->control_pdapi = NULL;
@@ -917,7 +934,9 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
return;
}
#endif
- sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+ if (stcb->sctp_socket) {
+ sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+ }
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -944,7 +963,8 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
* stop the shutdown timer, since we WILL move to
* SHUTDOWN-ACK-SENT.
*/
- sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+ net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
}
/* Now is there unsent data on a stream somewhere? */
some_on_streamwheel = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED);
@@ -962,12 +982,16 @@ sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
(SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
- sctp_stop_timers_for_shutdown(stcb);
- sctp_send_shutdown_ack(stcb, net);
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep,
- stcb, net);
+ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown_ack(stcb, net);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+ stcb->sctp_ep, stcb, net);
+ } else if (old_state == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ sctp_send_shutdown_ack(stcb, net);
+ }
}
}
@@ -1032,12 +1056,13 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED,
#ifdef INVARIANTS
if (!TAILQ_EMPTY(&asoc->send_queue) ||
!TAILQ_EMPTY(&asoc->sent_queue) ||
- !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
+ sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) {
panic("Queues are not empty when handling SHUTDOWN-ACK");
}
#endif
/* stop the timer */
- sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
/* send SHUTDOWN-COMPLETE */
sctp_send_shutdown_complete(stcb, net, 0);
/* notify upper layer protocol */
@@ -1058,7 +1083,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED,
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -1066,7 +1091,7 @@ sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED,
/*
* Skip past the param header and then we will find the chunk that caused the
- * problem. There are two possiblities ASCONF or FWD-TSN other than that and
+ * problem. There are two possibilities ASCONF or FWD-TSN other than that and
* our peer must be broken.
*/
static void
@@ -1081,8 +1106,9 @@ sctp_process_unrecog_chunk(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr,
case SCTP_ASCONF:
sctp_asconf_cleanup(stcb, net);
break;
+ case SCTP_IFORWARD_CUM_TSN:
case SCTP_FORWARD_CUM_TSN:
- stcb->asoc.peer_supports_prsctp = 0;
+ stcb->asoc.prsctp_supported = 0;
break;
default:
SCTPDBG(SCTP_DEBUG_INPUT2,
@@ -1096,6 +1122,7 @@ sctp_process_unrecog_chunk(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr,
* Skip past the param header and then we will find the param that caused the
* problem. There are a number of param's in a ASCONF OR the prsctp param
* these will turn of specific features.
+ * XXX: Is this the right thing to do?
*/
static void
sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
@@ -1106,7 +1133,7 @@ sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
switch (ntohs(pbad->param_type)) {
/* pr-sctp draft */
case SCTP_PRSCTP_SUPPORTED:
- stcb->asoc.peer_supports_prsctp = 0;
+ stcb->asoc.prsctp_supported = 0;
break;
case SCTP_SUPPORTED_CHUNK_EXT:
break;
@@ -1117,14 +1144,14 @@ sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
case SCTP_ADD_IP_ADDRESS:
case SCTP_DEL_IP_ADDRESS:
case SCTP_SET_PRIM_ADDR:
- stcb->asoc.peer_supports_asconf = 0;
+ stcb->asoc.asconf_supported = 0;
break;
case SCTP_SUCCESS_REPORT:
case SCTP_ERROR_CAUSE_IND:
SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n");
SCTPDBG(SCTP_DEBUG_INPUT2,
"Turning off ASCONF to this strange peer\n");
- stcb->asoc.peer_supports_asconf = 0;
+ stcb->asoc.asconf_supported = 0;
break;
default:
SCTPDBG(SCTP_DEBUG_INPUT2,
@@ -1217,7 +1244,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch,
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -1238,7 +1265,7 @@ sctp_handle_error(struct sctp_chunkhdr *ch,
* (or IPv4 for that matter) it does not matter. If
* they don't support that type of address, they can
* NOT possibly get that packet type... i.e. with no
- * IPv6 you can't recieve a IPv6 packet. so we can
+ * IPv6 you can't receive a IPv6 packet. so we can
* safely ignore this one. If we ever added support
* for HOSTNAME Addresses, then we would need to do
* something here.
@@ -1295,7 +1322,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
struct sctp_nets *net, int *abort_no_unlock,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id)
{
struct sctp_init_ack *init_ack;
@@ -1314,7 +1341,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1326,7 +1353,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1336,7 +1363,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1346,7 +1373,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1356,7 +1383,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1381,7 +1408,7 @@ sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
}
if (sctp_process_init_ack(m, iphlen, offset, src, dst, sh, cp, stcb,
net, abort_no_unlock,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id) < 0) {
/* error in parsing parameters */
return (-1);
@@ -1438,7 +1465,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
struct sctp_inpcb *inp, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port);
@@ -1455,7 +1482,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_association *asoc;
@@ -1468,6 +1495,11 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
int spec_flag = 0;
uint32_t how_indx;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
net = *netp;
/* I know that the TCB is non-NULL from the caller */
asoc = &stcb->asoc;
@@ -1483,7 +1515,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
op_err = sctp_generate_cause(SCTP_CAUSE_COOKIE_IN_SHUTDOWN, "");
sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, net->port);
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 2;
@@ -1564,9 +1596,12 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
return (NULL);
}
/* we have already processed the INIT so no problem */
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb,
- net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
- sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp,
+ stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp,
+ stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
/* update current state */
if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
@@ -1646,7 +1681,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
*/
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, src, dst, init_src)) {
+ initack_offset, src, dst, init_src, stcb->asoc.port)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 4;
return (NULL);
@@ -1690,7 +1725,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
*/
op_err = sctp_generate_cause(SCTP_CAUSE_NAT_COLLIDING_STATE, "");
sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
return (NULL);
}
@@ -1726,7 +1761,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
}
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 8;
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
sctp_stop_all_cookie_timers(stcb);
/*
* since we did not send a HB make sure we don't double
@@ -1772,7 +1808,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
}
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, src, dst, init_src)) {
+ initack_offset, src, dst, init_src, stcb->asoc.port)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 10;
return (NULL);
@@ -1862,7 +1898,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
sh, cookie, cookie_len,
inp, netp, init_src, notification,
auth_skipped, auth_offset, auth_len,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port));
}
/*
@@ -1871,8 +1907,10 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
/* temp code */
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 12;
- sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
/* notify upper layer */
*notification = SCTP_NOTIFY_ASSOC_RESTART;
@@ -1930,8 +1968,18 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_LOCKED);
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
stcb->asoc.strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ asoc->strmout[i].abandoned_sent[j] = 0;
+ asoc->strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ asoc->strmout[i].abandoned_sent[0] = 0;
+ asoc->strmout[i].abandoned_unsent[0] = 0;
+#endif
stcb->asoc.strmout[i].stream_no = i;
- stcb->asoc.strmout[i].next_sequence_send = 0;
+ stcb->asoc.strmout[i].next_mid_ordered = 0;
+ stcb->asoc.strmout[i].next_mid_unordered = 0;
stcb->asoc.strmout[i].last_msg_incomplete = 0;
}
/* process the INIT-ACK info (my info) */
@@ -1973,7 +2021,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, src, dst, init_src)) {
+ initack_offset, src, dst, init_src, stcb->asoc.port)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 14;
@@ -2009,28 +2057,19 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
struct sctp_inpcb *inp, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_tcb *stcb;
struct sctp_init_chunk *init_cp, init_buf;
struct sctp_init_ack_chunk *initack_cp, initack_buf;
- struct sockaddr_storage sa_store;
- struct sockaddr *initack_src = (struct sockaddr *)&sa_store;
+ union sctp_sockstore store;
struct sctp_association *asoc;
int init_offset, initack_offset, initack_limit;
int retval;
int error = 0;
uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE];
-#ifdef INET
- struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-
-#endif
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
@@ -2093,6 +2132,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
*/
stcb = sctp_aloc_assoc(inp, init_src, &error,
ntohl(initack_cp->init.initiate_tag), vrf_id,
+ ntohs(initack_cp->init.num_outbound_streams),
+ port,
(struct thread *)NULL
);
if (stcb == NULL) {
@@ -2104,7 +2145,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
return (NULL);
}
@@ -2132,7 +2173,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
@@ -2140,7 +2181,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
SCTP_TCB_LOCK(stcb);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -2171,7 +2212,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -2181,14 +2223,15 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
/* load all addresses */
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk), initack_offset,
- src, dst, init_src)) {
+ src, dst, init_src, port)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -2217,7 +2260,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_21);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -2254,23 +2298,20 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
#ifdef INET
case SCTP_IPV4_ADDRESS:
/* source addr is IPv4 */
- sin = (struct sockaddr_in *)initack_src;
- memset(sin, 0, sizeof(*sin));
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(struct sockaddr_in);
- sin->sin_addr.s_addr = cookie->laddress[0];
+ memset(&store.sin, 0, sizeof(struct sockaddr_in));
+ store.sin.sin_family = AF_INET;
+ store.sin.sin_len = sizeof(struct sockaddr_in);
+ store.sin.sin_addr.s_addr = cookie->laddress[0];
break;
#endif
#ifdef INET6
case SCTP_IPV6_ADDRESS:
/* source addr is IPv6 */
- sin6 = (struct sockaddr_in6 *)initack_src;
- memset(sin6, 0, sizeof(*sin6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(struct sockaddr_in6);
- sin6->sin6_scope_id = cookie->scope_id;
- memcpy(&sin6->sin6_addr, cookie->laddress,
- sizeof(sin6->sin6_addr));
+ memset(&store.sin6, 0, sizeof(struct sockaddr_in6));
+ store.sin6.sin6_family = AF_INET6;
+ store.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ store.sin6.sin6_scope_id = cookie->scope_id;
+ memcpy(&store.sin6.sin6_addr, cookie->laddress, sizeof(struct in6_addr));
break;
#endif
default:
@@ -2280,7 +2321,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -2334,9 +2376,9 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL);
}
- /* calculate the RTT */
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
- if ((netp) && (*netp)) {
+ if ((netp != NULL) && (*netp != NULL)) {
+ /* calculate the RTT and set the encaps port */
(*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
&cookie->time_entered, sctp_align_unsafe_makecopy,
SCTP_RTT_FROM_NON_DATA);
@@ -2351,7 +2393,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
sctp_check_address_list(stcb, m,
initack_offset + sizeof(struct sctp_init_ack_chunk),
initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)),
- initack_src, cookie->local_scope, cookie->site_scope,
+ &store.sa, cookie->local_scope, cookie->site_scope,
cookie->ipv4_scope, cookie->loopback_scope);
@@ -2382,7 +2424,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
struct sctp_tcb **locked_tcb,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_state_cookie *cookie;
@@ -2422,8 +2464,8 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
cookie_offset = offset + sizeof(struct sctp_chunkhdr);
cookie_len = ntohs(cp->ch.chunk_length);
- if ((cookie->peerport != sh->src_port) &&
- (cookie->myport != sh->dest_port) &&
+ if ((cookie->peerport != sh->src_port) ||
+ (cookie->myport != sh->dest_port) ||
(cookie->my_vtag != sh->v_tag)) {
/*
* invalid ports or bad tag. Note that we always leave the
@@ -2445,20 +2487,14 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
* calculated in the sctp_hmac_m() call).
*/
sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
- m_sig = m_split(m, sig_offset, M_DONTWAIT);
+ m_sig = m_split(m, sig_offset, M_NOWAIT);
if (m_sig == NULL) {
/* out of memory or ?? */
return (NULL);
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m_sig; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_SPLIT);
- }
- }
+ sctp_log_mbc(m_sig, SCTP_MBUF_SPLIT);
}
#endif
@@ -2547,29 +2583,29 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
if (timevalcmp(&now, &time_expires, >)) {
/* cookie is stale! */
struct mbuf *op_err;
- struct sctp_stale_cookie_msg *scm;
+ struct sctp_error_stale_cookie *cause;
uint32_t tim;
- op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_stale_cookie_msg),
- 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_stale_cookie),
+ 0, M_NOWAIT, 1, MT_DATA);
if (op_err == NULL) {
/* FOOBAR */
return (NULL);
}
/* Set the len */
- SCTP_BUF_LEN(op_err) = sizeof(struct sctp_stale_cookie_msg);
- scm = mtod(op_err, struct sctp_stale_cookie_msg *);
- scm->ph.param_type = htons(SCTP_CAUSE_STALE_COOKIE);
- scm->ph.param_length = htons((sizeof(struct sctp_paramhdr) +
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_stale_cookie);
+ cause = mtod(op_err, struct sctp_error_stale_cookie *);
+ cause->cause.code = htons(SCTP_CAUSE_STALE_COOKIE);
+ cause->cause.length = htons((sizeof(struct sctp_paramhdr) +
(sizeof(uint32_t))));
/* seconds to usec */
tim = (now.tv_sec - time_expires.tv_sec) * 1000000;
/* add in usec */
if (tim == 0)
tim = now.tv_usec - cookie->time_entered.tv_usec;
- scm->time_usec = htonl(tim);
+ cause->stale_time = htonl(tim);
sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, l_inp->fibnum,
vrf_id, port);
return (NULL);
}
@@ -2610,7 +2646,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
/* This should not happen */
return (NULL);
}
- if ((*stcb == NULL) && to) {
+ if (*stcb == NULL) {
/* Yep, lets check */
*stcb = sctp_findassociation_ep_addr(inp_p, to, netp, dst, NULL);
if (*stcb == NULL) {
@@ -2649,9 +2685,6 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
}
}
}
- if (to == NULL) {
- return (NULL);
- }
cookie_len -= SCTP_SIGNATURE_SIZE;
if (*stcb == NULL) {
/* this is the "normal" case... get a new TCB */
@@ -2659,7 +2692,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
cookie, cookie_len, *inp_p,
netp, to, &notification,
auth_skipped, auth_offset, auth_len,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
} else {
/* this is abnormal... cookie-echo on existing TCB */
@@ -2668,7 +2701,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
src, dst, sh,
cookie, cookie_len, *inp_p, *stcb, netp, to,
&notification, auth_skipped, auth_offset, auth_len,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
}
@@ -2676,11 +2709,9 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
/* still no TCB... must be bad cookie-echo */
return (NULL);
}
- if ((*netp != NULL) && (use_mflowid != 0)) {
+ if (*netp != NULL) {
+ (*netp)->flowtype = mflowtype;
(*netp)->flowid = mflowid;
-#ifdef INVARIANTS
- (*netp)->flowidset = 1;
-#endif
}
/*
* Ok, we built an association so confirm the address we sent the
@@ -2692,7 +2723,8 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
*/
if (netl == NULL) {
/* TSNH! Huh, why do I need to add this address here? */
- if (sctp_add_remote_addr(*stcb, to, NULL, SCTP_DONOT_SETSCOPE, SCTP_IN_COOKIE_PROC)) {
+ if (sctp_add_remote_addr(*stcb, to, NULL, port,
+ SCTP_DONOT_SETSCOPE, SCTP_IN_COOKIE_PROC)) {
return (NULL);
}
netl = sctp_findnet(*stcb, to);
@@ -2751,7 +2783,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(*inp_p, NULL, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
pcb_so = SCTP_INP_SO(*inp_p);
@@ -2761,7 +2793,8 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
SCTP_TCB_LOCK((*stcb));
atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
+ (void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(pcb_so, 1);
#endif
@@ -2784,11 +2817,19 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features;
inp->sctp_socket = so;
inp->sctp_frag_point = (*inp_p)->sctp_frag_point;
+ inp->max_cwnd = (*inp_p)->max_cwnd;
inp->sctp_cmt_on_off = (*inp_p)->sctp_cmt_on_off;
- inp->sctp_ecn_enable = (*inp_p)->sctp_ecn_enable;
+ inp->ecn_supported = (*inp_p)->ecn_supported;
+ inp->prsctp_supported = (*inp_p)->prsctp_supported;
+ inp->auth_supported = (*inp_p)->auth_supported;
+ inp->asconf_supported = (*inp_p)->asconf_supported;
+ inp->reconfig_supported = (*inp_p)->reconfig_supported;
+ inp->nrsack_supported = (*inp_p)->nrsack_supported;
+ inp->pktdrop_supported = (*inp_p)->pktdrop_supported;
inp->partial_delivery_point = (*inp_p)->partial_delivery_point;
inp->sctp_context = (*inp_p)->sctp_context;
inp->local_strreset_support = (*inp_p)->local_strreset_support;
+ inp->fibnum = (*inp_p)->fibnum;
inp->inp_starting_point_for_iterator = NULL;
/*
* copy in the authentication parameters from the
@@ -2885,9 +2926,9 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
SCTPDBG(SCTP_DEBUG_INPUT2,
"sctp_handle_cookie_ack: handling COOKIE-ACK\n");
- if (stcb == NULL)
+ if ((stcb == NULL) || (net == NULL)) {
return;
-
+ }
asoc = &stcb->asoc;
sctp_stop_all_cookie_timers(stcb);
@@ -2962,7 +3003,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
* in flight)
*/
if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) &&
- (stcb->asoc.peer_supports_asconf) &&
+ (stcb->asoc.asconf_supported == 1) &&
(!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
@@ -3123,7 +3164,6 @@ sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb, struct sct
uint32_t cwr_tsn;
cwr_tsn = ntohl(cp->tsn);
-
override = cp->ch.chunk_flags & SCTP_CWR_REDUCE_OVERRIDE;
TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) {
@@ -3139,10 +3179,8 @@ sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb, struct sct
stcb->asoc.ecn_echo_cnt_onq--;
TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk,
sctp_next);
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
- }
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
stcb->asoc.ctrl_queue_cnt--;
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
if (override == 0) {
@@ -3184,12 +3222,13 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSE
#ifdef INVARIANTS
if (!TAILQ_EMPTY(&asoc->send_queue) ||
!TAILQ_EMPTY(&asoc->sent_queue) ||
- !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
+ sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) {
panic("Queues are not empty when handling SHUTDOWN-COMPLETE");
}
#endif
/* stop the timer */
- sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
/* free the TCB */
SCTPDBG(SCTP_DEBUG_INPUT2,
@@ -3202,7 +3241,8 @@ sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSE
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
+ (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -3310,7 +3350,8 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
/* restart the timer */
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
- stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
+ stcb, tp1->whoTo,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
stcb, tp1->whoTo);
@@ -3319,7 +3360,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PDRP,
tp1->whoTo->flight_size,
tp1->book_size,
- (uintptr_t) stcb,
+ (uint32_t) (uintptr_t) stcb,
tp1->rec.data.TSN_seq);
}
if (tp1->sent < SCTP_DATAGRAM_RESEND) {
@@ -3378,7 +3419,8 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
* this, otherwise we let the timer fire.
*/
sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep,
- stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+ stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
}
break;
@@ -3429,6 +3471,7 @@ process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
/* resend last asconf ack */
sctp_send_asconf_ack(stcb);
break;
+ case SCTP_IFORWARD_CUM_TSN:
case SCTP_FORWARD_CUM_TSN:
send_forward_tsn(stcb, &stcb->asoc);
break;
@@ -3454,8 +3497,8 @@ sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *
uint16_t temp;
/*
- * We set things to 0xffff since this is the last delivered sequence
- * and we will be sending in 0 after the reset.
+ * We set things to 0xffffffff since this is the last delivered
+ * sequence and we will be sending in 0 after the reset.
*/
if (number_entries) {
@@ -3464,12 +3507,12 @@ sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *
if (temp >= stcb->asoc.streamincnt) {
continue;
}
- stcb->asoc.strmin[temp].last_sequence_delivered = 0xffff;
+ stcb->asoc.strmin[temp].last_sequence_delivered = 0xffffffff;
}
} else {
list = NULL;
for (i = 0; i < stcb->asoc.streamincnt; i++) {
- stcb->asoc.strmin[i].last_sequence_delivered = 0xffff;
+ stcb->asoc.strmin[i].last_sequence_delivered = 0xffffffff;
}
}
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_RECV, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
@@ -3488,23 +3531,47 @@ sctp_reset_out_streams(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t
/* no such stream */
continue;
}
- stcb->asoc.strmout[temp].next_sequence_send = 0;
+ stcb->asoc.strmout[temp].next_mid_ordered = 0;
+ stcb->asoc.strmout[temp].next_mid_unordered = 0;
}
} else {
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
- stcb->asoc.strmout[i].next_sequence_send = 0;
+ stcb->asoc.strmout[i].next_mid_ordered = 0;
+ stcb->asoc.strmout[i].next_mid_unordered = 0;
}
}
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
}
+static void
+sctp_reset_clear_pending(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
+{
+ uint32_t i;
+ uint16_t temp;
-struct sctp_stream_reset_out_request *
+ if (number_entries > 0) {
+ for (i = 0; i < number_entries; i++) {
+ temp = ntohs(list[i]);
+ if (temp >= stcb->asoc.streamoutcnt) {
+ /* no such stream */
+ continue;
+ }
+ stcb->asoc.strmout[temp].state = SCTP_STREAM_OPEN;
+ }
+ } else {
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].state = SCTP_STREAM_OPEN;
+ }
+ }
+}
+
+
+struct sctp_stream_reset_request *
sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk)
{
struct sctp_association *asoc;
struct sctp_chunkhdr *ch;
- struct sctp_stream_reset_out_request *r;
+ struct sctp_stream_reset_request *r;
struct sctp_tmit_chunk *chk;
int len, clen;
@@ -3527,7 +3594,7 @@ sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chu
}
clen = chk->send_size;
ch = mtod(chk->data, struct sctp_chunkhdr *);
- r = (struct sctp_stream_reset_out_request *)(ch + 1);
+ r = (struct sctp_stream_reset_request *)(ch + 1);
if (ntohl(r->request_seq) == seq) {
/* found it */
return (r);
@@ -3535,7 +3602,7 @@ sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chu
len = SCTP_SIZE32(ntohs(r->ph.param_length));
if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) {
/* move to the next one, there can only be a max of two */
- r = (struct sctp_stream_reset_out_request *)((caddr_t)r + len);
+ r = (struct sctp_stream_reset_request *)((caddr_t)r + len);
if (ntohl(r->request_seq) == seq) {
return (r);
}
@@ -3555,7 +3622,8 @@ sctp_clean_up_stream_reset(struct sctp_tcb *stcb)
}
asoc = &stcb->asoc;
- sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
+ sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb,
+ chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_28);
TAILQ_REMOVE(&asoc->control_send_queue,
chk,
sctp_next);
@@ -3579,7 +3647,9 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
int lparm_len;
struct sctp_association *asoc = &stcb->asoc;
struct sctp_tmit_chunk *chk;
- struct sctp_stream_reset_out_request *srparam;
+ struct sctp_stream_reset_request *req_param;
+ struct sctp_stream_reset_out_request *req_out_param;
+ struct sctp_stream_reset_in_request *req_in_param;
uint32_t number_entries;
if (asoc->stream_reset_outstanding == 0) {
@@ -3587,35 +3657,50 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
return (0);
}
if (seq == stcb->asoc.str_reset_seq_out) {
- srparam = sctp_find_stream_reset(stcb, seq, &chk);
- if (srparam) {
+ req_param = sctp_find_stream_reset(stcb, seq, &chk);
+ if (req_param != NULL) {
stcb->asoc.str_reset_seq_out++;
- type = ntohs(srparam->ph.param_type);
- lparm_len = ntohs(srparam->ph.param_length);
+ type = ntohs(req_param->ph.param_type);
+ lparm_len = ntohs(req_param->ph.param_length);
if (type == SCTP_STR_RESET_OUT_REQUEST) {
+ int no_clear = 0;
+
+ req_out_param = (struct sctp_stream_reset_out_request *)req_param;
number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t);
asoc->stream_reset_out_is_outstanding = 0;
if (asoc->stream_reset_outstanding)
asoc->stream_reset_outstanding--;
if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
/* do it */
- sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams);
+ sctp_reset_out_streams(stcb, number_entries, req_out_param->list_of_streams);
} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
- sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
+ } else if (action == SCTP_STREAM_RESET_RESULT_IN_PROGRESS) {
+ /*
+ * Set it up so we don't stop
+ * retransmitting
+ */
+ asoc->stream_reset_outstanding++;
+ stcb->asoc.str_reset_seq_out--;
+ asoc->stream_reset_out_is_outstanding = 1;
+ no_clear = 1;
} else {
- sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
+ }
+ if (no_clear == 0) {
+ sctp_reset_clear_pending(stcb, number_entries, req_out_param->list_of_streams);
}
} else if (type == SCTP_STR_RESET_IN_REQUEST) {
- /* Answered my request */
+ req_in_param = (struct sctp_stream_reset_in_request *)req_param;
number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
if (asoc->stream_reset_outstanding)
asoc->stream_reset_outstanding--;
if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_IN, stcb,
- number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
} else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) {
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb,
- number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
}
} else if (type == SCTP_STR_RESET_ADD_OUT_STREAMS) {
/* Ok we now may have more streams */
@@ -3631,7 +3716,12 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
asoc->stream_reset_outstanding--;
if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
/* Put the new streams into effect */
- stcb->asoc.streamoutcnt += num_stream;
+ int i;
+
+ for (i = asoc->streamoutcnt; i < (asoc->streamoutcnt + num_stream); i++) {
+ asoc->strmout[i].state = SCTP_STREAM_OPEN;
+ }
+ asoc->streamoutcnt += num_stream;
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0);
} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
@@ -3708,6 +3798,9 @@ sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
}
}
}
+ if (asoc->stream_reset_outstanding == 0) {
+ sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
+ }
return (0);
}
@@ -3738,22 +3831,33 @@ sctp_handle_str_reset_request_in(struct sctp_tcb *stcb,
} else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
len = ntohs(req->ph.param_length);
number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
- for (i = 0; i < number_entries; i++) {
- temp = ntohs(req->list_of_streams[i]);
- req->list_of_streams[i] = temp;
+ if (number_entries) {
+ for (i = 0; i < number_entries; i++) {
+ temp = ntohs(req->list_of_streams[i]);
+ if (temp >= stcb->asoc.streamoutcnt) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ goto bad_boy;
+ }
+ req->list_of_streams[i] = temp;
+ }
+ for (i = 0; i < number_entries; i++) {
+ if (stcb->asoc.strmout[req->list_of_streams[i]].state == SCTP_STREAM_OPEN) {
+ stcb->asoc.strmout[req->list_of_streams[i]].state = SCTP_STREAM_RESET_PENDING;
+ }
+ }
+ } else {
+ /* Its all */
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN)
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
+ }
}
asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
- sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams,
- asoc->str_reset_seq_out,
- seq, (asoc->sending_seq - 1));
- asoc->stream_reset_out_is_outstanding = 1;
- asoc->str_reset = chk;
- sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
- stcb->asoc.stream_reset_outstanding++;
} else {
/* Can't do it, since we have sent one out */
asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS;
}
+bad_boy:
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
asoc->str_reset_seq_in++;
} else if (asoc->str_reset_seq_in - 1 == seq) {
@@ -3763,6 +3867,7 @@ sctp_handle_str_reset_request_in(struct sctp_tcb *stcb,
} else {
sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
}
+ sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
}
static int
@@ -3881,11 +3986,12 @@ sctp_handle_str_reset_request_out(struct sctp_tcb *stcb,
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
return;
}
+ liste->seq = seq;
liste->tsn = tsn;
liste->number_entries = number_entries;
memcpy(&liste->list_of_streams, req->list_of_streams, number_entries * sizeof(uint16_t));
TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_IN_PROGRESS;
}
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
asoc->str_reset_seq_in++;
@@ -3949,20 +4055,28 @@ sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *ch
/* copy off the old data */
for (i = 0; i < stcb->asoc.streamincnt; i++) {
TAILQ_INIT(&stcb->asoc.strmin[i].inqueue);
+ TAILQ_INIT(&stcb->asoc.strmin[i].uno_inqueue);
stcb->asoc.strmin[i].stream_no = i;
stcb->asoc.strmin[i].last_sequence_delivered = oldstrm[i].last_sequence_delivered;
stcb->asoc.strmin[i].delivery_started = oldstrm[i].delivery_started;
+ stcb->asoc.strmin[i].pd_api_started = oldstrm[i].pd_api_started;
/* now anything on those queues? */
- TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].inqueue, next, nctl) {
- TAILQ_REMOVE(&oldstrm[i].inqueue, ctl, next);
- TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].inqueue, ctl, next);
+ TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].inqueue, next_instrm, nctl) {
+ TAILQ_REMOVE(&oldstrm[i].inqueue, ctl, next_instrm);
+ TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].inqueue, ctl, next_instrm);
+ }
+ TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].uno_inqueue, next_instrm, nctl) {
+ TAILQ_REMOVE(&oldstrm[i].uno_inqueue, ctl, next_instrm);
+ TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].uno_inqueue, ctl, next_instrm);
}
}
/* Init the new streams */
for (i = stcb->asoc.streamincnt; i < num_stream; i++) {
TAILQ_INIT(&stcb->asoc.strmin[i].inqueue);
+ TAILQ_INIT(&stcb->asoc.strmin[i].uno_inqueue);
stcb->asoc.strmin[i].stream_no = i;
- stcb->asoc.strmin[i].last_sequence_delivered = 0xffff;
+ stcb->asoc.strmin[i].last_sequence_delivered = 0xffffffff;
+ stcb->asoc.strmin[i].pd_api_started = 0;
stcb->asoc.strmin[i].delivery_started = 0;
}
SCTP_FREE(oldstrm, SCTP_M_STRMI);
@@ -4022,7 +4136,7 @@ sctp_handle_str_reset_add_out_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk
mychk += num_stream;
if (mychk < 0x10000) {
stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
- if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, 1, num_stream, 0, 1)) {
+ if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, num_stream, 0, 1)) {
stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
}
} else {
@@ -4075,13 +4189,15 @@ __attribute__((noinline))
if (chk == NULL) {
return (ret_code);
}
+ chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_STREAM_RESET;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->no_fr_allowed = 0;
chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr);
chk->book_size_scale = 0;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
strres_nochunk:
if (chk->data) {
@@ -4366,7 +4482,7 @@ sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
(stcb->asoc.sat_t3_loss_recovery == 0) &&
(stcb->asoc.sat_network)) {
/*
- * This is debateable but for sat networks it makes sense
+ * This is debatable but for sat networks it makes sense
* Note if a T3 timer has went off, we will prohibit any
* changes to cwnd until we exit the t3 loss recovery.
*/
@@ -4392,7 +4508,7 @@ __attribute__((noinline))
struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
struct sctp_association *asoc;
@@ -4461,7 +4577,7 @@ __attribute__((noinline))
*/
if ((ch->chunk_type == SCTP_AUTHENTICATION) &&
(stcb == NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ (inp->auth_supported == 1)) {
/* save this chunk for later processing */
auth_skipped = 1;
auth_offset = *offset;
@@ -4551,12 +4667,12 @@ __attribute__((noinline))
}
}
if (stcb == NULL) {
- snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s\n", __FILE__, __LINE__, __FUNCTION__);
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
/* no association, so it's out of the blue... */
sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
*offset = length;
if (locked_tcb) {
@@ -4595,12 +4711,12 @@ __attribute__((noinline))
if (locked_tcb) {
SCTP_TCB_UNLOCK(locked_tcb);
}
- snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s\n", __FILE__, __LINE__, __FUNCTION__);
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
sctp_handle_ootb(m, iphlen, *offset, src, dst,
sh, inp, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return (NULL);
}
@@ -4728,7 +4844,7 @@ process_control_chunks:
/* check to see if this chunk required auth, but isn't */
if ((stcb != NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+ (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -4741,13 +4857,11 @@ process_control_chunks:
/* The INIT chunk must be the only chunk. */
if ((num_chunks > 1) ||
(length - *offset > (int)SCTP_SIZE32(chk_length))) {
- op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
- "INIT not the only chunk");
- sctp_abort_association(inp, stcb, m, iphlen,
- src, dst, sh, op_err,
- use_mflowid, mflowid,
- vrf_id, port);
+ /* RFC 4960 requires that no ABORT is sent */
*offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
return (NULL);
}
/* Honor our resource limit. */
@@ -4755,15 +4869,15 @@ process_control_chunks:
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(inp, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
*offset = length;
return (NULL);
}
sctp_handle_init(m, iphlen, *offset, src, dst, sh,
(struct sctp_init_chunk *)ch, inp,
- stcb, &abort_no_unlock,
- use_mflowid, mflowid,
+ stcb, *netp, &abort_no_unlock,
+ mflowtype, mflowid,
vrf_id, port);
*offset = length;
if ((!abort_no_unlock) && (locked_tcb)) {
@@ -4780,7 +4894,7 @@ process_control_chunks:
if ((stcb) && (stcb->asoc.total_output_queue_size)) {
;
} else {
- if (locked_tcb != stcb) {
+ if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
/* Very unlikely */
SCTP_TCB_UNLOCK(locked_tcb);
}
@@ -4794,7 +4908,8 @@ process_control_chunks:
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -4817,7 +4932,7 @@ process_control_chunks:
(struct sctp_init_ack_chunk *)ch,
stcb, *netp,
&abort_no_unlock,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id);
} else {
ret = -1;
@@ -4936,8 +5051,7 @@ process_control_chunks:
SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing NR-SACK chunk\n");
break;
}
- if ((stcb->asoc.sctp_nr_sack_on_off == 0) ||
- (stcb->asoc.peer_supports_nr_sack == 0)) {
+ if (stcb->asoc.nrsack_supported == 0) {
goto unknown_chunk;
}
if (chk_length < sizeof(struct sctp_nr_sack_chunk)) {
@@ -5123,7 +5237,7 @@ process_control_chunks:
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(inp, stcb, m, iphlen,
src, dst, sh, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
}
*offset = length;
@@ -5158,7 +5272,7 @@ process_control_chunks:
auth_offset,
auth_len,
&locked_tcb,
- use_mflowid,
+ mflowtype,
mflowid,
vrf_id,
port);
@@ -5215,7 +5329,8 @@ process_control_chunks:
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -5248,6 +5363,9 @@ process_control_chunks:
return (NULL);
}
if (stcb) {
+ if (stcb->asoc.ecn_supported == 0) {
+ goto unknown_chunk;
+ }
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
stcb->asoc.overall_error_count,
@@ -5273,6 +5391,9 @@ process_control_chunks:
return (NULL);
}
if (stcb) {
+ if (stcb->asoc.ecn_supported == 0) {
+ goto unknown_chunk;
+ }
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
stcb->asoc.overall_error_count,
@@ -5306,6 +5427,9 @@ process_control_chunks:
SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n");
/* He's alive so give him credit */
if (stcb) {
+ if (stcb->asoc.asconf_supported == 0) {
+ goto unknown_chunk;
+ }
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
stcb->asoc.overall_error_count,
@@ -5330,6 +5454,9 @@ process_control_chunks:
return (NULL);
}
if ((stcb) && netp && *netp) {
+ if (stcb->asoc.asconf_supported == 0) {
+ goto unknown_chunk;
+ }
/* He's alive so give him credit */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -5346,6 +5473,7 @@ process_control_chunks:
}
break;
case SCTP_FORWARD_CUM_TSN:
+ case SCTP_IFORWARD_CUM_TSN:
SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_FWD-TSN\n");
if (chk_length < sizeof(struct sctp_forward_tsn_chunk)) {
/* Its not ours */
@@ -5359,6 +5487,9 @@ process_control_chunks:
if (stcb) {
int abort_flag = 0;
+ if (stcb->asoc.prsctp_supported == 0) {
+ goto unknown_chunk;
+ }
stcb->asoc.overall_error_count = 0;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -5378,7 +5509,8 @@ process_control_chunks:
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_31);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -5413,13 +5545,8 @@ process_control_chunks:
*offset = length;
return (NULL);
}
- if (stcb->asoc.peer_supports_strreset == 0) {
- /*
- * hmm, peer should have announced this, but
- * we will turn it on since he is sending us
- * a stream reset.
- */
- stcb->asoc.peer_supports_strreset = 1;
+ if (stcb->asoc.reconfig_supported == 0) {
+ goto unknown_chunk;
}
if (sctp_handle_stream_reset(stcb, m, *offset, ch)) {
/* stop processing */
@@ -5439,18 +5566,17 @@ process_control_chunks:
return (NULL);
}
if (ch && (stcb) && netp && (*netp)) {
+ if (stcb->asoc.pktdrop_supported == 0) {
+ goto unknown_chunk;
+ }
sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch,
stcb, *netp,
min(chk_length, (sizeof(chunk_buf) - 4)));
}
break;
-
case SCTP_AUTHENTICATION:
SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n");
- if (SCTP_BASE_SYSCTL(sctp_auth_disable))
- goto unknown_chunk;
-
if (stcb == NULL) {
/* save the first AUTH for later processing */
if (auth_skipped == 0) {
@@ -5461,6 +5587,9 @@ process_control_chunks:
/* skip this chunk (temporarily) */
goto next_chunk;
}
+ if (stcb->asoc.auth_supported == 0) {
+ goto unknown_chunk;
+ }
if ((chk_length < (sizeof(struct sctp_auth_chunk))) ||
(chk_length > (sizeof(struct sctp_auth_chunk) +
SCTP_AUTH_DIGEST_LEN_MAX))) {
@@ -5491,43 +5620,27 @@ process_control_chunks:
unknown_chunk:
/* it's an unknown chunk! */
if ((ch->chunk_type & 0x40) && (stcb != NULL)) {
- struct mbuf *mm;
- struct sctp_paramhdr *phd;
-
- mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (mm) {
- phd = mtod(mm, struct sctp_paramhdr *);
- /*
- * We cheat and use param type since
- * we did not bother to define a
- * error cause struct. They are the
- * same basic format with different
- * names.
- */
- phd->param_type = htons(SCTP_CAUSE_UNRECOG_CHUNK);
- phd->param_length = htons(chk_length + sizeof(*phd));
- SCTP_BUF_LEN(mm) = sizeof(*phd);
- SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT);
- if (SCTP_BUF_NEXT(mm)) {
- if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(mm), SCTP_SIZE32(chk_length) - chk_length, NULL)) {
- sctp_m_freem(mm);
- } else {
+ struct sctp_gen_error_cause *cause;
+ int len;
+
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause),
+ 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ len = min(SCTP_SIZE32(chk_length), (uint32_t) (length - *offset));
+ cause = mtod(op_err, struct sctp_gen_error_cause *);
+ cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+ cause->length = htons((uint16_t) (len + sizeof(struct sctp_gen_error_cause)));
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
+ SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, len, M_NOWAIT);
+ if (SCTP_BUF_NEXT(op_err) != NULL) {
#ifdef SCTP_MBUF_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = SCTP_BUF_NEXT(mm); mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
- }
-#endif
- sctp_queue_op_err(stcb, mm);
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+ sctp_log_mbc(SCTP_BUF_NEXT(op_err), SCTP_MBUF_ICOPY);
}
+#endif
+ sctp_queue_op_err(stcb, op_err);
} else {
- sctp_m_freem(mm);
+ sctp_m_freem(op_err);
}
}
}
@@ -5565,30 +5678,6 @@ next_chunk:
}
-#ifdef INVARIANTS
-#ifdef __GNUC__
-__attribute__((noinline))
-#endif
- void
- sctp_validate_no_locks(struct sctp_inpcb *inp)
-{
- struct sctp_tcb *lstcb;
-
- LIST_FOREACH(lstcb, &inp->sctp_asoc_list, sctp_tcblist) {
- if (mtx_owned(&lstcb->tcb_mtx)) {
- panic("Own lock on stcb at return from input");
- }
- }
- if (mtx_owned(&inp->inp_create_mtx)) {
- panic("Own create lock on inp");
- }
- if (mtx_owned(&inp->inp_mtx)) {
- panic("Own inp lock on inp");
- }
-}
-
-#endif
-
/*
* common input chunk processing (v4 and v6)
*/
@@ -5600,7 +5689,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
uint8_t compute_crc,
#endif
uint8_t ecn_bits,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
uint32_t high_tsn;
@@ -5631,17 +5720,26 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
calc_check, check, (void *)m, length, iphlen);
stcb = sctp_findassociation_addr(m, offset, src, dst,
sh, ch, &inp, &net, vrf_id);
- if ((net != NULL) && (port != 0)) {
+#if defined(INET) || defined(INET6)
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
}
net->port = port;
}
- if ((net != NULL) && (use_mflowid != 0)) {
- net->flowid = mflowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
#endif
+ if (net != NULL) {
+ net->flowtype = mflowtype;
+ net->flowid = mflowid;
}
if ((inp != NULL) && (stcb != NULL)) {
sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1);
@@ -5662,17 +5760,26 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
}
stcb = sctp_findassociation_addr(m, offset, src, dst,
sh, ch, &inp, &net, vrf_id);
- if ((net != NULL) && (port != 0)) {
+#if defined(INET) || defined(INET6)
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
}
net->port = port;
}
- if ((net != NULL) && (use_mflowid != 0)) {
- net->flowid = mflowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
#endif
+ if (net != NULL) {
+ net->flowtype = mflowtype;
+ net->flowid = mflowid;
}
if (inp == NULL) {
SCTP_STAT_INCR(sctps_noport);
@@ -5681,7 +5788,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
}
if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
sctp_send_shutdown_complete2(src, dst, sh,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
goto out;
}
@@ -5696,7 +5803,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
"Out of the blue");
sctp_send_abort(m, iphlen, src, dst,
sh, 0, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
}
}
@@ -5714,7 +5821,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
#ifdef INET
case AF_INET:
if (ipsec4_in_reject(m, &inp->ip_inp.inp)) {
- IPSECSTAT_INC(in_polvio);
SCTP_STAT_INCR(sctps_hdrops);
goto out;
}
@@ -5723,7 +5829,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
#ifdef INET6
case AF_INET6:
if (ipsec6_in_reject(m, &inp->ip_inp.inp)) {
- IPSEC6STAT_INC(in_polvio);
SCTP_STAT_INCR(sctps_hdrops);
goto out;
}
@@ -5753,11 +5858,11 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
SCTP_TCB_UNLOCK(stcb);
stcb = NULL;
- snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s\n", __FILE__, __LINE__, __FUNCTION__);
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
goto out;
}
@@ -5768,7 +5873,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
stcb = sctp_process_control(m, iphlen, &offset, length,
src, dst, sh, ch,
inp, stcb, &net, &fwd_tsn_seen,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
if (stcb) {
/*
@@ -5776,12 +5881,23 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* it changes our INP.
*/
inp = stcb->sctp_ep;
- if ((net) && (port)) {
+#if defined(INET) || defined(INET6)
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
}
net->port = port;
}
+#endif
}
} else {
/*
@@ -5795,7 +5911,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
* chunks
*/
if ((stcb != NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+ (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
/* "silently" ignore */
SCTP_STAT_INCR(sctps_recvauthmissing);
@@ -5803,11 +5919,11 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
}
if (stcb == NULL) {
/* out of the blue DATA chunk */
- snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s\n", __FILE__, __LINE__, __FUNCTION__);
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
goto out;
}
@@ -5837,7 +5953,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
if ((length > offset) &&
(stcb != NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+ (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -5875,11 +5991,11 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
/*
* We consider OOTB any data sent during asoc setup.
*/
- snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s\n", __FILE__, __LINE__, __FUNCTION__);
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
goto out;
/* sa_ignore NOTREACHED */
@@ -5898,10 +6014,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
}
/* plow through the data chunks while length > offset */
retval = sctp_process_data(mm, iphlen, &offset, length,
- src, dst, sh,
- inp, stcb, net, &high_tsn,
- use_mflowid, mflowid,
- vrf_id, port);
+ inp, stcb, net, &high_tsn);
if (retval == 2) {
/*
* The association aborted, NO UNLOCK needed since
@@ -5918,7 +6031,7 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
}
/* take care of ecn */
if ((data_processed == 1) &&
- (stcb->asoc.ecn_allowed == 1) &&
+ (stcb->asoc.ecn_supported == 1) &&
((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS)) {
/* Yep, we need to add a ECNE */
sctp_send_ecn_echo(stcb, net, high_tsn);
@@ -5953,7 +6066,9 @@ trigger_send:
if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
cnt_ctrl_ready = stcb->asoc.ctrl_queue_cnt - stcb->asoc.ecn_echo_cnt_onq;
}
- if (cnt_ctrl_ready ||
+ if (!TAILQ_EMPTY(&stcb->asoc.asconf_send_queue) ||
+ cnt_ctrl_ready ||
+ stcb->asoc.trigger_reset ||
((un_sent) &&
(stcb->asoc.peers_rwnd > 0 ||
(stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) {
@@ -5975,27 +6090,9 @@ out:
SCTP_INP_DECR_REF(inp_decr);
SCTP_INP_WUNLOCK(inp_decr);
}
-#ifdef INVARIANTS
- if (inp != NULL) {
- sctp_validate_no_locks(inp);
- }
-#endif
return;
}
-#if 0
-static void
-sctp_print_mbuf_chain(struct mbuf *m)
-{
- for (; m; m = SCTP_BUF_NEXT(m)) {
- SCTP_PRINTF("%p: m_len = %ld\n", (void *)m, SCTP_BUF_LEN(m));
- if (SCTP_BUF_IS_EXTENDED(m))
- SCTP_PRINTF("%p: extend_size = %d\n", (void *)m, SCTP_BUF_EXTEND_SIZE(m));
- }
-}
-
-#endif
-
#ifdef INET
void
sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
@@ -6015,7 +6112,8 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
#endif
uint32_t mflowid;
- uint8_t use_mflowid;
+ uint8_t mflowtype;
+ uint16_t fibnum;
iphlen = off;
if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
@@ -6026,13 +6124,7 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
#ifdef SCTP_MBUF_LOGGING
/* Log in any input mbufs */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_INPUT);
- }
- }
+ sctp_log_mbc(m, SCTP_MBUF_INPUT);
}
#endif
#ifdef SCTP_PACKET_LOGGING
@@ -6041,17 +6133,13 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
}
#endif
SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
- "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
+ "sctp_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
m->m_pkthdr.len,
if_name(m->m_pkthdr.rcvif),
- m->m_pkthdr.csum_flags);
- if (m->m_flags & M_FLOWID) {
- mflowid = m->m_pkthdr.flowid;
- use_mflowid = 1;
- } else {
- mflowid = 0;
- use_mflowid = 0;
- }
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+ mflowid = m->m_pkthdr.flowid;
+ mflowtype = M_HASHTYPE_GET(m);
+ fibnum = M_GETFIB(m);
SCTP_STAT_INCR(sctps_recvpackets);
SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
/* Get IP, SCTP, and first chunk header together in the first mbuf. */
@@ -6076,7 +6164,7 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
dst.sin_len = sizeof(struct sockaddr_in);
dst.sin_port = sh->dest_port;
dst.sin_addr = ip->ip_dst;
- length = ip->ip_len + iphlen;
+ length = ntohs(ip->ip_len);
/* Validate mbuf chain length with IP payload length. */
if (SCTP_HEADER_LEN(m) != length) {
SCTPDBG(SCTP_DEBUG_INPUT1,
@@ -6111,7 +6199,7 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
compute_crc,
#endif
ecn_bits,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
out:
if (m) {
@@ -6125,18 +6213,23 @@ extern int *sctp_cpuarry;
#endif
-void
-sctp_input(struct mbuf *m, int off)
+int
+sctp_input(struct mbuf **mp, int *offp, int proto SCTP_UNUSED)
{
-#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
- struct ip *ip;
- struct sctphdr *sh;
- int offset;
- int cpu_to_use;
- uint32_t flowid, tag;
+ struct mbuf *m;
+ int off;
+ m = *mp;
+ off = *offp;
+#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
if (mp_ncpus > 1) {
- if (m->m_flags & M_FLOWID) {
+ struct ip *ip;
+ struct sctphdr *sh;
+ int offset;
+ int cpu_to_use;
+ uint32_t flowid, tag;
+
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
flowid = m->m_pkthdr.flowid;
} else {
/*
@@ -6147,7 +6240,7 @@ sctp_input(struct mbuf *m, int off)
if (SCTP_BUF_LEN(m) < offset) {
if ((m = m_pullup(m, offset)) == NULL) {
SCTP_STAT_INCR(sctps_hdrops);
- return;
+ return (IPPROTO_DONE);
}
}
ip = mtod(m, struct ip *);
@@ -6155,14 +6248,15 @@ sctp_input(struct mbuf *m, int off)
tag = htonl(sh->v_tag);
flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port);
m->m_pkthdr.flowid = flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE_HASH);
}
cpu_to_use = sctp_cpuarry[flowid % mp_ncpus];
sctp_queue_to_mcore(m, off, cpu_to_use);
- return;
+ return (IPPROTO_DONE);
}
#endif
sctp_input_with_port(m, off, 0);
+ return (IPPROTO_DONE);
}
#endif
diff --git a/freebsd/sys/netinet/sctp_input.h b/freebsd/sys/netinet/sctp_input.h
index 95208032..148864b6 100644
--- a/freebsd/sys/netinet/sctp_input.h
+++ b/freebsd/sys/netinet/sctp_input.h
@@ -45,10 +45,10 @@ sctp_common_input_processing(struct mbuf **, int, int, int,
uint8_t,
#endif
uint8_t,
- uint8_t, uint32_t,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
-struct sctp_stream_reset_out_request *
+struct sctp_stream_reset_request *
sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
struct sctp_tmit_chunk **bchk);
diff --git a/freebsd/sys/netinet/sctp_lock_bsd.h b/freebsd/sys/netinet/sctp_lock_bsd.h
index 35cdf5f8..96e35214 100644
--- a/freebsd/sys/netinet/sctp_lock_bsd.h
+++ b/freebsd/sys/netinet/sctp_lock_bsd.h
@@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$");
* Most other locks (INP and INFO) attempt to localize the locking i.e. we try
* to contain the lock and unlock within the function that needs to lock it.
* This sometimes mean we do extra locks and unlocks and lose a bit of
- * efficency, but if the performance statements about non-recursive locks are
+ * efficiency, but if the performance statements about non-recursive locks are
* true this should not be a problem. One issue that arises with this only
* lock when needed is that if an implicit association setup is done we have
* a problem. If at the time I lookup an association I have NULL in the tcb
diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h
index d33d1fd3..e87914e5 100644
--- a/freebsd/sys/netinet/sctp_os_bsd.h
+++ b/freebsd/sys/netinet/sctp_os_bsd.h
@@ -95,7 +95,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
-#include <netinet/icmp6.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/nd6.h>
#include <netinet6/scope6_var.h>
@@ -105,7 +104,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_options.h>
#include <crypto/sha1.h>
-#include <crypto/sha2/sha2.h>
+#include <crypto/sha2/sha256.h>
#ifndef in6pcb
#define in6pcb inpcb
@@ -152,33 +151,27 @@ MALLOC_DECLARE(SCTP_M_MCORE);
#define V_system_base_info VNET(system_base_info)
#define SCTP_BASE_INFO(__m) V_system_base_info.sctppcbinfo.__m
#define SCTP_BASE_STATS V_system_base_info.sctpstat
-#define SCTP_BASE_STATS_SYSCTL VNET_NAME(system_base_info.sctpstat)
-#define SCTP_BASE_STAT(__m) V_system_base_info.sctpstat.__m
-#define SCTP_BASE_SYSCTL(__m) VNET_NAME(system_base_info.sctpsysctl.__m)
+#define SCTP_BASE_STAT(__m) V_system_base_info.sctpstat.__m
+#define SCTP_BASE_SYSCTL(__m) V_system_base_info.sctpsysctl.__m
#define SCTP_BASE_VAR(__m) V_system_base_info.__m
-/*
- *
- */
-#define USER_ADDR_NULL (NULL) /* FIX ME: temp */
-
#define SCTP_PRINTF(params...) printf(params)
#if defined(SCTP_DEBUG)
#define SCTPDBG(level, params...) \
{ \
- do { \
- if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
- SCTP_PRINTF(params); \
- } \
- } while (0); \
+ do { \
+ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
+ SCTP_PRINTF(params); \
+ } \
+ } while (0); \
}
#define SCTPDBG_ADDR(level, addr) \
{ \
- do { \
- if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
- sctp_print_address(addr); \
- } \
- } while (0); \
+ do { \
+ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
+ sctp_print_address(addr); \
+ } \
+ } while (0); \
}
#else
#define SCTPDBG(level, params...)
@@ -194,11 +187,11 @@ MALLOC_DECLARE(SCTP_M_MCORE);
#ifdef SCTP_LTRACE_ERRORS
#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) \
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
- SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
m, inp, stcb, net, file, __LINE__, err);
#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) \
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
- SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
inp, stcb, net, file, __LINE__, err);
#else
#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
@@ -232,16 +225,16 @@ MALLOC_DECLARE(SCTP_M_MCORE);
* general memory allocation
*/
#define SCTP_MALLOC(var, type, size, name) \
- do { \
- var = (type)malloc(size, name, M_NOWAIT); \
- } while (0)
+ do { \
+ var = (type)malloc(size, name, M_NOWAIT); \
+ } while (0)
#define SCTP_FREE(var, type) free(var, type)
#define SCTP_MALLOC_SONAME(var, type, size) \
- do { \
- var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \
- } while (0)
+ do { \
+ var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \
+ } while (0)
#define SCTP_FREE_SONAME(var) free(var, M_SONAME)
@@ -305,16 +298,12 @@ typedef struct callout sctp_os_timer_t;
#define SCTP_BUF_RESV_UF(m, size) m->m_data += size
#define SCTP_BUF_AT(m, size) m->m_data + size
#define SCTP_BUF_IS_EXTENDED(m) (m->m_flags & M_EXT)
-#define SCTP_BUF_EXTEND_SIZE(m) (m->m_ext.ext_size)
+#define SCTP_BUF_SIZE M_SIZE
#define SCTP_BUF_TYPE(m) (m->m_type)
#define SCTP_BUF_RECVIF(m) (m->m_pkthdr.rcvif)
#define SCTP_BUF_PREPEND M_PREPEND
-#define SCTP_ALIGN_TO_END(m, len) if(m->m_flags & M_PKTHDR) { \
- MH_ALIGN(m, len); \
- } else if ((m->m_flags & M_EXT) == 0) { \
- M_ALIGN(m, len); \
- }
+#define SCTP_ALIGN_TO_END(m, len) M_ALIGN(m, len)
/* We make it so if you have up to 4 threads
* writing based on the default size of
@@ -328,11 +317,11 @@ typedef struct callout sctp_os_timer_t;
/* MTU */
/*************************/
#define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
-#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((rt != NULL) ? rt->rt_rmx.rmx_mtu : 0)
+#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((uint32_t)((rt != NULL) ? rt->rt_mtu : 0))
#define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
if (rt != NULL) \
- rt->rt_rmx.rmx_mtu = mtu; \
+ rt->rt_mtu = mtu; \
} while(0)
/* (de-)register interface event notifications */
@@ -346,7 +335,7 @@ typedef struct callout sctp_os_timer_t;
/* return the base ext data pointer */
#define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf)
/* return the refcnt of the data pointer */
-#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt)
+#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ext_cnt)
/* return any buffer related flags, this is
* used beyond logging for apple only.
*/
@@ -399,6 +388,11 @@ typedef struct callout sctp_os_timer_t;
#define SCTP_CLEAR_SO_NBIO(so) ((so)->so_state &= ~SS_NBIO)
/* get the socket type */
#define SCTP_SO_TYPE(so) ((so)->so_type)
+/* Use a macro for renaming sb_cc to sb_acc.
+ * Initially sb_ccc was used, but this broke select() when used
+ * with SCTP sockets.
+ */
+#define sb_cc sb_acc
/* reserve sb space for a socket */
#define SCTP_SORESERVE(so, send, recv) soreserve(so, send, recv)
/* wakeup a socket */
@@ -418,13 +412,8 @@ typedef struct callout sctp_os_timer_t;
typedef struct route sctp_route_t;
typedef struct rtentry sctp_rtentry_t;
-/*
- * XXX multi-FIB support was backed out in r179783 and it seems clear that the
- * VRF support as currently in FreeBSD is not ready to support multi-FIB.
- * It might be best to implement multi-FIB support for both v4 and v6 indepedent
- * of VRFs and leave those to a real MPLS stack.
- */
-#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+#define SCTP_RTALLOC(ro, vrf_id, fibnum) \
+ rtalloc_ign_fib((struct route *)ro, 0UL, fibnum)
/* Future zero copy wakeup/send function */
#define SCTP_ZERO_COPY_EVENT(inp, so)
@@ -432,6 +421,11 @@ typedef struct rtentry sctp_rtentry_t;
#define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
/*
+ * SCTP protocol specific mbuf flags.
+ */
+#define M_NOTIFICATION M_PROTO1 /* SCTP notification */
+
+/*
* IP output routines
*/
#define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \
@@ -442,12 +436,14 @@ typedef struct rtentry sctp_rtentry_t;
local_stcb->sctp_ep && \
local_stcb->sctp_ep->sctp_socket) \
o_flgs |= local_stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE; \
+ m_clrprotoflags(o_pak); \
result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \
}
#define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \
{ \
struct sctp_tcb *local_stcb = stcb; \
+ m_clrprotoflags(o_pak); \
if (local_stcb && local_stcb->sctp_ep) \
result = ip6_output(o_pak, \
((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
index cbc25b9c..9e12e775 100644
--- a/freebsd/sys/netinet/sctp_output.c
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -52,7 +52,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_input.h>
#include <netinet/sctp_crc32.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
@@ -67,7 +69,7 @@ struct sack_track {
struct sctp_gap_ack_block gaps[SCTP_MAX_GAPS_INARRAY];
};
-struct sack_track sack_array[256] = {
+const struct sack_track sack_array[256] = {
{0, 0, 0, 0, /* 0x00 */
{{0, 0},
{0, 0},
@@ -1881,7 +1883,7 @@ sctp_is_address_in_scope(struct sctp_ifa *ifa,
if (scope->ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&ifa->address.sin;
+ sin = &ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* not in scope , unspecified */
return (0);
@@ -1912,7 +1914,7 @@ sctp_is_address_in_scope(struct sctp_ifa *ifa,
return (0);
}
/* ok to use deprecated addresses? */
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/* skip unspecifed addresses */
return (0);
@@ -1971,7 +1973,7 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len)
while (SCTP_BUF_NEXT(mret) != NULL) {
mret = SCTP_BUF_NEXT(mret);
}
- SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_DONTWAIT, 1, MT_DATA);
+ SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA);
if (SCTP_BUF_NEXT(mret) == NULL) {
/* We are hosed, can't add more addresses */
return (m);
@@ -1987,7 +1989,7 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len)
struct sctp_ipv4addr_param *ipv4p;
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&ifa->address.sin;
+ sin = &ifa->address.sin;
ipv4p = (struct sctp_ipv4addr_param *)parmh;
parmh->param_type = htons(SCTP_IPV4_ADDRESS);
parmh->param_length = htons(plen);
@@ -2002,7 +2004,7 @@ sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len)
struct sctp_ipv6addr_param *ipv6p;
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
ipv6p = (struct sctp_ipv6addr_param *)parmh;
parmh->param_type = htons(SCTP_IPV6_ADDRESS);
parmh->param_length = htons(plen);
@@ -2417,7 +2419,7 @@ sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
if (laddr->ifa == ifa) {
@@ -2439,7 +2441,7 @@ sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
if ((laddr->ifa == ifa) && laddr->action == 0)
@@ -3071,7 +3073,7 @@ bound_all_plan_b:
ifn, num_preferred);
if (num_preferred == 0) {
/* None on this interface. */
- SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefered -- skipping to next\n");
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "No preferred -- skipping to next\n");
continue;
}
SCTPDBG(SCTP_DEBUG_OUTPUT2,
@@ -3156,12 +3158,10 @@ again_with_private_addresses_allowed:
* It is restricted for some reason..
* probably not yet added.
*/
- SCTPDBG(SCTP_DEBUG_OUTPUT2, "Its resticted\n");
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Its restricted\n");
sifa = NULL;
continue;
}
- } else {
- SCTP_PRINTF("Stcb is null - no print\n");
}
atomic_add_int(&sifa->refcount, 1);
goto out;
@@ -3224,12 +3224,14 @@ plan_d:
}
}
#ifdef INET
- if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) {
- stcb->asoc.scope.ipv4_local_scope = 1;
- retried = 1;
- goto again_with_private_addresses_allowed;
- } else if (retried == 1) {
- stcb->asoc.scope.ipv4_local_scope = 0;
+ if (stcb) {
+ if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) {
+ stcb->asoc.scope.ipv4_local_scope = 1;
+ retried = 1;
+ goto again_with_private_addresses_allowed;
+ } else if (retried == 1) {
+ stcb->asoc.scope.ipv4_local_scope = 0;
+ }
}
#endif
out:
@@ -3326,10 +3328,11 @@ sctp_source_address_selection(struct sctp_inpcb *inp,
#endif
/**
- * Rules: - Find the route if needed, cache if I can. - Look at
- * interface address in route, Is it in the bound list. If so we
- * have the best source. - If not we must rotate amongst the
- * addresses.
+ * Rules:
+ * - Find the route if needed, cache if I can.
+ * - Look at interface address in route, Is it in the bound list. If so we
+ * have the best source.
+ * - If not we must rotate amongst the addresses.
*
* Cavets and issues
*
@@ -3391,7 +3394,7 @@ sctp_source_address_selection(struct sctp_inpcb *inp,
/*
* Need a route to cache.
*/
- SCTP_RTALLOC(ro, vrf_id);
+ SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
}
if (ro->ro_rt == NULL) {
return (NULL);
@@ -3508,7 +3511,7 @@ sctp_find_cmsg(int c_type, void *data, struct mbuf *control, size_t cpsize)
return (found);
}
/* It is exactly what we want. Copy it out. */
- m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), cpsize, (caddr_t)data);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), (int)cpsize, (caddr_t)data);
return (1);
} else {
struct sctp_sndrcvinfo *sndrcvinfo;
@@ -3618,6 +3621,11 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
struct sctp_stream_out *tmp_str;
unsigned int i;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
/* Default is NOT correct */
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n",
stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams);
@@ -3637,10 +3645,21 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
stcb->asoc.strmout[i].chunks_on_queues = 0;
- stcb->asoc.strmout[i].next_sequence_send = 0;
+ stcb->asoc.strmout[i].next_mid_ordered = 0;
+ stcb->asoc.strmout[i].next_mid_unordered = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ stcb->asoc.strmout[i].abandoned_sent[j] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ stcb->asoc.strmout[i].abandoned_sent[0] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
+#endif
stcb->asoc.strmout[i].stream_no = i;
stcb->asoc.strmout[i].last_msg_incomplete = 0;
- stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
+ stcb->asoc.strmout[i].state = SCTP_STREAM_OPENING;
+ stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], NULL);
}
}
break;
@@ -3661,7 +3680,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
*error = EINVAL;
return (1);
}
- if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL,
+ if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, stcb->asoc.port,
SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
*error = ENOBUFS;
return (1);
@@ -3693,14 +3712,14 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
*error = EINVAL;
return (1);
}
- if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL,
+ if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, stcb->asoc.port,
SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
*error = ENOBUFS;
return (1);
}
} else
#endif
- if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, NULL,
+ if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, NULL, stcb->asoc.port,
SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
*error = ENOBUFS;
return (1);
@@ -3821,28 +3840,22 @@ sctp_add_cookie(struct mbuf *init, int init_offset,
mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
sizeof(struct sctp_paramhdr)), 0,
- M_DONTWAIT, 1, MT_DATA);
+ M_NOWAIT, 1, MT_DATA);
if (mret == NULL) {
return (NULL);
}
- copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_DONTWAIT);
+ copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_NOWAIT);
if (copy_init == NULL) {
sctp_m_freem(mret);
return (NULL);
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = copy_init; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(copy_init, SCTP_MBUF_ICOPY);
}
#endif
copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
- M_DONTWAIT);
+ M_NOWAIT);
if (copy_initack == NULL) {
sctp_m_freem(mret);
sctp_m_freem(copy_init);
@@ -3850,13 +3863,7 @@ sctp_add_cookie(struct mbuf *init, int init_offset,
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = copy_initack; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(copy_initack, SCTP_MBUF_ICOPY);
}
#endif
/* easy side we just drop it on the end */
@@ -3892,7 +3899,7 @@ sctp_add_cookie(struct mbuf *init, int init_offset,
break;
}
}
- sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_DONTWAIT, 1, MT_DATA);
+ sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_NOWAIT, 1, MT_DATA);
if (sig == NULL) {
/* no space, so free the entire chain */
sctp_m_freem(mret);
@@ -3914,7 +3921,7 @@ sctp_add_cookie(struct mbuf *init, int init_offset,
static uint8_t
sctp_get_ect(struct sctp_tcb *stcb)
{
- if ((stcb != NULL) && (stcb->asoc.ecn_allowed == 1)) {
+ if ((stcb != NULL) && (stcb->asoc.ecn_supported == 1)) {
return (SCTP_ECT0_BIT);
} else {
return (0);
@@ -3985,7 +3992,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
uint32_t v_tag,
uint16_t port,
union sctp_sockstore *over_addr,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
int so_locked SCTP_UNUSED
#else
@@ -4061,11 +4068,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
sctp_route_t iproute;
int len;
- len = sizeof(struct ip) + sizeof(struct sctphdr);
+ len = SCTP_MIN_V4_OVERHEAD;
if (port) {
len += sizeof(struct udphdr);
}
- newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+ newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
if (newm == NULL) {
sctp_m_freem(m);
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -4076,18 +4083,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTP_BUF_NEXT(newm) = m;
m = newm;
if (net != NULL) {
-#ifdef INVARIANTS
- if (net->flowidset == 0) {
- panic("Flow ID not set");
- }
-#endif
m->m_pkthdr.flowid = net->flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, net->flowtype);
} else {
- if (use_mflowid != 0) {
- m->m_pkthdr.flowid = mflowid;
- m->m_flags |= M_FLOWID;
- }
+ m->m_pkthdr.flowid = mflowid;
+ M_HASHTYPE_SET(m, mflowtype);
}
packet_length = sctp_calculate_len(m);
ip = mtod(m, struct ip *);
@@ -4106,15 +4106,15 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
tos_value |= sctp_get_ect(stcb);
}
if ((nofragment_flag) && (port == 0)) {
- ip->ip_off = IP_DF;
+ ip->ip_off = htons(IP_DF);
} else {
- ip->ip_off = 0;
+ ip->ip_off = htons(0);
}
/* FreeBSD has a function for ip_id's */
- ip->ip_id = ip_newid();
+ ip_fillid(ip);
ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
- ip->ip_len = packet_length;
+ ip->ip_len = htons(packet_length);
ip->ip_tos = tos_value;
if (port) {
ip->ip_p = IPPROTO_UDP;
@@ -4177,7 +4177,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
sctp_free_ifa(_lsrc);
} else {
ip->ip_src = over_addr->sin.sin_addr;
- SCTP_RTALLOC(ro, vrf_id);
+ SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
}
}
if (port) {
@@ -4190,7 +4190,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip));
udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
udp->uh_dport = port;
- udp->uh_ulen = htons(packet_length - sizeof(struct ip));
+ udp->uh_ulen = htons((uint16_t) (packet_length - sizeof(struct ip)));
if (V_udp_cksum) {
udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
} else {
@@ -4350,11 +4350,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo);
}
flowlabel &= 0x000fffff;
- len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
+ len = SCTP_MIN_OVERHEAD;
if (port) {
len += sizeof(struct udphdr);
}
- newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+ newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
if (newm == NULL) {
sctp_m_freem(m);
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -4365,18 +4365,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTP_BUF_NEXT(newm) = m;
m = newm;
if (net != NULL) {
-#ifdef INVARIANTS
- if (net->flowidset == 0) {
- panic("Flow ID not set");
- }
-#endif
m->m_pkthdr.flowid = net->flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, net->flowtype);
} else {
- if (use_mflowid != 0) {
- m->m_pkthdr.flowid = mflowid;
- m->m_flags |= M_FLOWID;
- }
+ m->m_pkthdr.flowid = mflowid;
+ M_HASHTYPE_SET(m, mflowtype);
}
packet_length = sctp_calculate_len(m);
@@ -4425,7 +4418,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
} else {
ip6h->ip6_nxt = IPPROTO_SCTP;
}
- ip6h->ip6_plen = (packet_length - sizeof(struct ip6_hdr));
+ ip6h->ip6_plen = (uint16_t) (packet_length - sizeof(struct ip6_hdr));
ip6h->ip6_dst = sin6->sin6_addr;
/*
@@ -4498,7 +4491,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
sctp_free_ifa(_lsrc);
} else {
lsa6->sin6_addr = over_addr->sin6.sin6_addr;
- SCTP_RTALLOC(ro, vrf_id);
+ SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
}
(void)sa6_recoverscope(sin6);
}
@@ -4544,7 +4537,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
udp = (struct udphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr));
udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
udp->uh_dport = port;
- udp->uh_ulen = htons(packet_length - sizeof(struct ip6_hdr));
+ udp->uh_ulen = htons((uint16_t) (packet_length - sizeof(struct ip6_hdr)));
udp->uh_sum = 0;
sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr));
} else {
@@ -4700,7 +4693,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
#endif
)
{
- struct mbuf *m;
+ struct mbuf *m, *m_last;
struct sctp_nets *net;
struct sctp_init_chunk *init;
struct sctp_supported_addr_param *sup_addr;
@@ -4745,7 +4738,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
/* start the INIT timer */
sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
- m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_DONTWAIT, 1, MT_DATA);
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_NOWAIT, 1, MT_DATA);
if (m == NULL) {
/* No memory, INIT timer will re-attempt. */
SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
@@ -4753,12 +4746,6 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
}
chunk_len = (uint16_t) sizeof(struct sctp_init_chunk);
padding_len = 0;
- /*
- * assume peer supports asconf in order to be able to queue local
- * address changes while an INIT is in flight and before the assoc
- * is established.
- */
- stcb->asoc.peer_supports_asconf = 1;
/* Now lets put the chunk header in place */
init = mtod(m, struct sctp_init_chunk *);
/* now the chunk header */
@@ -4775,120 +4762,76 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
init->init.initial_tsn = htonl(stcb->asoc.init_seq_number);
- if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
- uint8_t i;
-
- parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
- if (stcb->asoc.scope.ipv4_addr_legal) {
- parameter_len += (uint16_t) sizeof(uint16_t);
- }
- if (stcb->asoc.scope.ipv6_addr_legal) {
- parameter_len += (uint16_t) sizeof(uint16_t);
- }
- sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len);
- sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
- sup_addr->ph.param_length = htons(parameter_len);
- i = 0;
- if (stcb->asoc.scope.ipv4_addr_legal) {
- sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS);
- }
- if (stcb->asoc.scope.ipv6_addr_legal) {
- sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS);
- }
- padding_len = 4 - 2 * i;
- chunk_len += parameter_len;
- }
/* Adaptation layer indication parameter */
if (inp->sctp_ep.adaptation_layer_indicator_provided) {
- if (padding_len > 0) {
- memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
- chunk_len += padding_len;
- padding_len = 0;
- }
parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
ali->ph.param_length = htons(parameter_len);
- ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
+ ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
chunk_len += parameter_len;
}
- if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
- /* Add NAT friendly parameter. */
- if (padding_len > 0) {
- memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
- chunk_len += padding_len;
- padding_len = 0;
- }
+ /* ECN parameter */
+ if (stcb->asoc.ecn_supported == 1) {
parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
- ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
+ ph->param_type = htons(SCTP_ECN_CAPABLE);
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
- /* now any cookie time extensions */
- if (stcb->asoc.cookie_preserve_req) {
- struct sctp_cookie_perserve_param *cookie_preserve;
-
- if (padding_len > 0) {
- memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
- chunk_len += padding_len;
- padding_len = 0;
- }
- parameter_len = (uint16_t) sizeof(struct sctp_cookie_perserve_param);
- cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len);
- cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
- cookie_preserve->ph.param_length = htons(parameter_len);
- cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
- stcb->asoc.cookie_preserve_req = 0;
+ /* PR-SCTP supported parameter */
+ if (stcb->asoc.prsctp_supported == 1) {
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+ ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
+ ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
- /* ECN parameter */
- if (stcb->asoc.ecn_allowed == 1) {
- if (padding_len > 0) {
- memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
- chunk_len += padding_len;
- padding_len = 0;
- }
+ /* Add NAT friendly parameter. */
+ if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
- ph->param_type = htons(SCTP_ECN_CAPABLE);
+ ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
ph->param_length = htons(parameter_len);
chunk_len += parameter_len;
}
- /* And now tell the peer we do support PR-SCTP. */
- if (padding_len > 0) {
- memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
- chunk_len += padding_len;
- padding_len = 0;
- }
- parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
- ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
- ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
- ph->param_length = htons(parameter_len);
- chunk_len += parameter_len;
-
- /* And now tell the peer we do all the extensions */
- pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
- pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ /* And now tell the peer which extensions we support */
num_ext = 0;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
- pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
- pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
- pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
+ if (stcb->asoc.prsctp_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+ if (stcb->asoc.idata_supported) {
+ pr_supported->chunk_types[num_ext++] = SCTP_IFORWARD_CUM_TSN;
+ }
+ }
+ if (stcb->asoc.auth_supported == 1) {
pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
}
- if (stcb->asoc.sctp_nr_sack_on_off == 1) {
+ if (stcb->asoc.asconf_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+ }
+ if (stcb->asoc.reconfig_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+ }
+ if (stcb->asoc.idata_supported) {
+ pr_supported->chunk_types[num_ext++] = SCTP_IDATA;
+ }
+ if (stcb->asoc.nrsack_supported == 1) {
pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
}
- parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
- pr_supported->ph.param_length = htons(parameter_len);
- padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
- chunk_len += parameter_len;
-
+ if (stcb->asoc.pktdrop_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+ }
+ if (num_ext > 0) {
+ parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
+ pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ pr_supported->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+ }
/* add authentication parameters */
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ if (stcb->asoc.auth_supported) {
/* attach RANDOM parameter, if available */
if (stcb->asoc.authinfo.random != NULL) {
struct sctp_auth_random *randp;
@@ -4906,8 +4849,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
chunk_len += parameter_len;
}
/* add HMAC_ALGO parameter */
- if ((stcb->asoc.local_hmacs != NULL) &&
- (stcb->asoc.local_hmacs->num_algo > 0)) {
+ if (stcb->asoc.local_hmacs != NULL) {
struct sctp_auth_hmac_algo *hmacs;
if (padding_len > 0) {
@@ -4925,7 +4867,7 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
chunk_len += parameter_len;
}
/* add CHUNKS parameter */
- if (sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks) > 0) {
+ if (stcb->asoc.local_auth_chunks != NULL) {
struct sctp_auth_chunk_list *chunks;
if (padding_len > 0) {
@@ -4943,8 +4885,52 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
chunk_len += parameter_len;
}
}
- SCTP_BUF_LEN(m) = chunk_len;
+ /* now any cookie time extensions */
+ if (stcb->asoc.cookie_preserve_req) {
+ struct sctp_cookie_perserve_param *cookie_preserve;
+
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
+ parameter_len = (uint16_t) sizeof(struct sctp_cookie_perserve_param);
+ cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len);
+ cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
+ cookie_preserve->ph.param_length = htons(parameter_len);
+ cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
+ stcb->asoc.cookie_preserve_req = 0;
+ chunk_len += parameter_len;
+ }
+ if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
+ uint8_t i;
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ if (stcb->asoc.scope.ipv4_addr_legal) {
+ parameter_len += (uint16_t) sizeof(uint16_t);
+ }
+ if (stcb->asoc.scope.ipv6_addr_legal) {
+ parameter_len += (uint16_t) sizeof(uint16_t);
+ }
+ sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len);
+ sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
+ sup_addr->ph.param_length = htons(parameter_len);
+ i = 0;
+ if (stcb->asoc.scope.ipv4_addr_legal) {
+ sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS);
+ }
+ if (stcb->asoc.scope.ipv6_addr_legal) {
+ sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS);
+ }
+ padding_len = 4 - 2 * i;
+ chunk_len += parameter_len;
+ }
+ SCTP_BUF_LEN(m) = chunk_len;
/* now the addresses */
/*
* To optimize this we could put the scoping stuff into a structure
@@ -4952,18 +4938,13 @@ sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
* we could just sifa in the address within the stcb. But for now
* this is a quick hack to get the address stuff teased apart.
*/
- sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope, m, cnt_inits_to, &padding_len, &chunk_len);
+ m_last = sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope,
+ m, cnt_inits_to,
+ &padding_len, &chunk_len);
init->ch.chunk_length = htons(chunk_len);
if (padding_len > 0) {
- struct mbuf *m_at, *mp_last;
-
- mp_last = NULL;
- for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
- if (SCTP_BUF_NEXT(m_at) == NULL)
- mp_last = m_at;
- }
- if ((mp_last == NULL) || sctp_add_pad_tombuf(mp_last, padding_len)) {
+ if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
sctp_m_freem(m);
return;
}
@@ -5100,7 +5081,6 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
*nat_friendly = 1;
/* fall through */
case SCTP_PRSCTP_SUPPORTED:
-
if (padded_size != sizeof(struct sctp_paramhdr)) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error prsctp/nat support %d\n", plen);
goto invalid_size;
@@ -5108,7 +5088,7 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
at += padded_size;
break;
case SCTP_ECN_CAPABLE:
- if (padded_size != sizeof(struct sctp_ecn_supported_param)) {
+ if (padded_size != sizeof(struct sctp_paramhdr)) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
goto invalid_size;
}
@@ -5138,13 +5118,14 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
if (op_err == NULL) {
/* Ok need to try to get a mbuf */
#ifdef INET6
- l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_OVERHEAD;
#else
- l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_V4_OVERHEAD;
#endif
+ l_len += sizeof(struct sctp_chunkhdr);
l_len += plen;
l_len += sizeof(struct sctp_paramhdr);
- op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
if (op_err) {
SCTP_BUF_LEN(op_err) = 0;
/*
@@ -5207,13 +5188,14 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
/* Ok need to try to get an mbuf */
#ifdef INET6
- l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_OVERHEAD;
#else
- l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_V4_OVERHEAD;
#endif
+ l_len += sizeof(struct sctp_chunkhdr);
l_len += plen;
l_len += sizeof(struct sctp_paramhdr);
- op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
if (op_err) {
SCTP_BUF_LEN(op_err) = 0;
#ifdef INET6
@@ -5282,12 +5264,13 @@ invalid_size:
int l_len;
#ifdef INET6
- l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_OVERHEAD;
#else
- l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_V4_OVERHEAD;
#endif
+ l_len += sizeof(struct sctp_chunkhdr);
l_len += (2 * sizeof(struct sctp_paramhdr));
- op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
if (op_err) {
SCTP_BUF_LEN(op_err) = 0;
#ifdef INET6
@@ -5336,6 +5319,7 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
uint16_t ptype, plen;
uint8_t fnd;
struct sctp_nets *net;
+ int check_src;
#ifdef INET
struct sockaddr_in sin4, *sa4;
@@ -5357,39 +5341,61 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
sin6.sin6_len = sizeof(sin6);
#endif
/* First what about the src address of the pkt ? */
- fnd = 0;
- TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
- sa = (struct sockaddr *)&net->ro._l_addr;
- if (sa->sa_family == src->sa_family) {
+ check_src = 0;
+ switch (src->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (asoc->scope.ipv4_addr_legal) {
+ check_src = 1;
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (asoc->scope.ipv6_addr_legal) {
+ check_src = 1;
+ }
+ break;
+#endif
+ default:
+ /* TSNH */
+ break;
+ }
+ if (check_src) {
+ fnd = 0;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ sa = (struct sockaddr *)&net->ro._l_addr;
+ if (sa->sa_family == src->sa_family) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
- struct sockaddr_in *src4;
+ if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *src4;
- sa4 = (struct sockaddr_in *)sa;
- src4 = (struct sockaddr_in *)src;
- if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
- fnd = 1;
- break;
+ sa4 = (struct sockaddr_in *)sa;
+ src4 = (struct sockaddr_in *)src;
+ if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
+ fnd = 1;
+ break;
+ }
}
- }
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
- struct sockaddr_in6 *src6;
+ if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src6;
- sa6 = (struct sockaddr_in6 *)sa;
- src6 = (struct sockaddr_in6 *)src;
- if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
- fnd = 1;
- break;
+ sa6 = (struct sockaddr_in6 *)sa;
+ src6 = (struct sockaddr_in6 *)src;
+ if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
+ fnd = 1;
+ break;
+ }
}
- }
#endif
+ }
+ }
+ if (fnd == 0) {
+ /* New address added! no need to look further. */
+ return (1);
}
- }
- if (fnd == 0) {
- /* New address added! no need to look futher. */
- return (1);
}
/* Ok so far lets munge through the rest of the packet */
offset += sizeof(struct sctp_init_chunk);
@@ -5410,9 +5416,11 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
phdr == NULL) {
return (1);
}
- p4 = (struct sctp_ipv4addr_param *)phdr;
- sin4.sin_addr.s_addr = p4->addr;
- sa_touse = (struct sockaddr *)&sin4;
+ if (asoc->scope.ipv4_addr_legal) {
+ p4 = (struct sctp_ipv4addr_param *)phdr;
+ sin4.sin_addr.s_addr = p4->addr;
+ sa_touse = (struct sockaddr *)&sin4;
+ }
break;
}
#endif
@@ -5427,10 +5435,12 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
phdr == NULL) {
return (1);
}
- p6 = (struct sctp_ipv6addr_param *)phdr;
- memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
- sizeof(p6->addr));
- sa_touse = (struct sockaddr *)&sin6;
+ if (asoc->scope.ipv6_addr_legal) {
+ p6 = (struct sctp_ipv6addr_param *)phdr;
+ memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+ sizeof(p6->addr));
+ sa_touse = (struct sockaddr *)&sin6;
+ }
break;
}
#endif
@@ -5486,20 +5496,21 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
*/
void
sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct mbuf *init_pkt, int iphlen, int offset,
+ struct sctp_nets *src_net, struct mbuf *init_pkt,
+ int iphlen, int offset,
struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_init_chunk *init_chk,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port, int hold_inp_lock)
{
struct sctp_association *asoc;
- struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last;
+ struct mbuf *m, *m_tmp, *m_last, *m_cookie, *op_err;
struct sctp_init_ack_chunk *initack;
struct sctp_adaptation_layer_indication *ali;
- struct sctp_ecn_supported_param *ecn;
- struct sctp_prsctp_supported_param *prsctp;
struct sctp_supported_chunk_types_param *pr_supported;
+ struct sctp_paramhdr *ph;
union sctp_sockstore *over_addr;
+ struct sctp_scoping scp;
#ifdef INET
struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
@@ -5519,33 +5530,50 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
uint8_t *signature = NULL;
int cnt_inits_to = 0;
uint16_t his_limit, i_want;
- int abort_flag, padval;
- int num_ext;
- int p_len;
+ int abort_flag;
int nat_friendly = 0;
struct socket *so;
+ uint16_t num_ext, chunk_len, padding_len, parameter_len;
if (stcb) {
asoc = &stcb->asoc;
} else {
asoc = NULL;
}
- mp_last = NULL;
if ((asoc != NULL) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
- (sctp_are_there_new_addresses(asoc, init_pkt, offset, src))) {
- /* new addresses, out of here in non-cookie-wait states */
- /*
- * Send a ABORT, we don't add the new address error clause
- * though we even set the T bit and copy in the 0 tag.. this
- * looks no different than if no listener was present.
- */
- op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
- "Address added");
- sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
- use_mflowid, mflowid,
- vrf_id, port);
- return;
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT)) {
+ if (sctp_are_there_new_addresses(asoc, init_pkt, offset, src)) {
+ /*
+ * new addresses, out of here in non-cookie-wait
+ * states
+ *
+ * Send an ABORT, without the new address error cause.
+ * This looks no different than if no listener was
+ * present.
+ */
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Address added");
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
+ vrf_id, port);
+ return;
+ }
+ if (src_net != NULL && (src_net->port != port)) {
+ /*
+ * change of remote encapsulation port, out of here
+ * in non-cookie-wait states
+ *
+ * Send an ABORT, without an specific error cause. This
+ * looks no different than if no listener was
+ * present.
+ */
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Remote encapsulation port changed");
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
+ vrf_id, port);
+ return;
+ }
}
abort_flag = 0;
op_err = sctp_arethere_unrecognized_parameters(init_pkt,
@@ -5556,24 +5584,25 @@ do_a_abort:
if (op_err == NULL) {
char msg[SCTP_DIAG_INFO_LEN];
- snprintf(msg, sizeof(msg), "%s:%d at %s\n", __FILE__, __LINE__, __FUNCTION__);
+ snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__);
op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
msg);
}
sctp_send_abort(init_pkt, iphlen, src, dst, sh,
init_chk->init.initiate_tag, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
return;
}
- m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m == NULL) {
/* No memory, INIT timer will re-attempt. */
if (op_err)
sctp_m_freem(op_err);
return;
}
- SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk);
+ chunk_len = (uint16_t) sizeof(struct sctp_init_ack_chunk);
+ padding_len = 0;
/*
* We might not overwrite the identification[] completely and on
@@ -5605,7 +5634,7 @@ do_a_abort:
stc.peerport = sh->src_port;
/*
- * If we wanted to honor cookie life extentions, we would add to
+ * If we wanted to honor cookie life extensions, we would add to
* stc.cookie_life. For now we should NOT honor any extension
*/
stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
@@ -5620,11 +5649,7 @@ do_a_abort:
stc.ipv6_addr_legal = 0;
stc.ipv4_addr_legal = 1;
}
-#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
- stc.ipv4_scope = 1;
-#else
stc.ipv4_scope = 0;
-#endif
if (net == NULL) {
to = src;
switch (dst->sa_family) {
@@ -5645,13 +5670,10 @@ do_a_abort:
stc.laddr_type = SCTP_IPV4_ADDRESS;
/* scope_id is only for v6 */
stc.scope_id = 0;
-#ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
- if (IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) {
+ if ((IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) ||
+ (IN4_ISPRIVATE_ADDRESS(&dst4->sin_addr))) {
stc.ipv4_scope = 1;
}
-#else
- stc.ipv4_scope = 1;
-#endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */
/* Must use the address in this case */
if (sctp_is_address_on_local_host(src, vrf_id)) {
stc.loopback_scope = 1;
@@ -5667,22 +5689,24 @@ do_a_abort:
{
stc.addr_type = SCTP_IPV6_ADDRESS;
memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr));
- stc.scope_id = in6_getscope(&src6->sin6_addr);
+ stc.scope_id = ntohs(in6_getscope(&src6->sin6_addr));
if (sctp_is_address_on_local_host(src, vrf_id)) {
stc.loopback_scope = 1;
stc.local_scope = 0;
stc.site_scope = 1;
stc.ipv4_scope = 1;
- } else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr)) {
+ } else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr) ||
+ IN6_IS_ADDR_LINKLOCAL(&dst6->sin6_addr)) {
/*
- * If the new destination is a
- * LINK_LOCAL we must have common
- * both site and local scope. Don't
- * set local scope though since we
- * must depend on the source to be
- * added implicitly. We cannot
- * assure just because we share one
- * link that all links are common.
+ * If the new destination or source
+ * is a LINK_LOCAL we must have
+ * common both site and local scope.
+ * Don't set local scope though
+ * since we must depend on the
+ * source to be added implicitly. We
+ * cannot assure just because we
+ * share one link that all links are
+ * common.
*/
stc.local_scope = 0;
stc.site_scope = 1;
@@ -5698,11 +5722,12 @@ do_a_abort:
* pull out the scope_id from
* incoming pkt
*/
- } else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr)) {
+ } else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr) ||
+ IN6_IS_ADDR_SITELOCAL(&dst6->sin6_addr)) {
/*
- * If the new destination is
- * SITE_LOCAL then we must have site
- * scope in common.
+ * If the new destination or source
+ * is SITE_LOCAL then we must have
+ * site scope in common.
*/
stc.site_scope = 1;
}
@@ -5806,7 +5831,7 @@ do_a_abort:
/* Now lets put the SCTP header in place */
initack = mtod(m, struct sctp_init_ack_chunk *);
/* Save it off for quick ref */
- stc.peers_vtag = init_chk->init.initiate_tag;
+ stc.peers_vtag = ntohl(init_chk->init.initiate_tag);
/* who are we */
memcpy(stc.identification, SCTP_VERSION_STRING,
min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification)));
@@ -5876,10 +5901,10 @@ do_a_abort:
his_limit = ntohs(init_chk->init.num_inbound_streams);
/* choose what I want */
if (asoc != NULL) {
- if (asoc->streamoutcnt > inp->sctp_ep.pre_open_stream_count) {
+ if (asoc->streamoutcnt > asoc->pre_open_streams) {
i_want = asoc->streamoutcnt;
} else {
- i_want = inp->sctp_ep.pre_open_stream_count;
+ i_want = asoc->pre_open_streams;
}
} else {
i_want = inp->sctp_ep.pre_open_stream_count;
@@ -5897,161 +5922,182 @@ do_a_abort:
/* adaptation layer indication parameter */
if (inp->sctp_ep.adaptation_layer_indicator_provided) {
- ali = (struct sctp_adaptation_layer_indication *)((caddr_t)initack + sizeof(*initack));
+ parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
+ ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
- ali->ph.param_length = htons(sizeof(*ali));
- ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
- SCTP_BUF_LEN(m) += sizeof(*ali);
- ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali));
- } else {
- ecn = (struct sctp_ecn_supported_param *)((caddr_t)initack + sizeof(*initack));
+ ali->ph.param_length = htons(parameter_len);
+ ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
+ chunk_len += parameter_len;
}
-
/* ECN parameter */
- if (((asoc != NULL) && (asoc->ecn_allowed == 1)) ||
- (inp->sctp_ecn_enable == 1)) {
- ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
- ecn->ph.param_length = htons(sizeof(*ecn));
- SCTP_BUF_LEN(m) += sizeof(*ecn);
-
- prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
- sizeof(*ecn));
- } else {
- prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+ if (((asoc != NULL) && (asoc->ecn_supported == 1)) ||
+ ((asoc == NULL) && (inp->ecn_supported == 1))) {
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+ ph->param_type = htons(SCTP_ECN_CAPABLE);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
+ }
+ /* PR-SCTP supported parameter */
+ if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
+ ((asoc == NULL) && (inp->prsctp_supported == 1))) {
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+ ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
}
- /* And now tell the peer we do pr-sctp */
- prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
- prsctp->ph.param_length = htons(sizeof(*prsctp));
- SCTP_BUF_LEN(m) += sizeof(*prsctp);
+ /* Add NAT friendly parameter */
if (nat_friendly) {
- /* Add NAT friendly parameter */
- struct sctp_paramhdr *ph;
-
- ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
- ph->param_length = htons(sizeof(struct sctp_paramhdr));
- SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
}
- /* And now tell the peer we do all the extensions */
- pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ /* And now tell the peer which extensions we support */
num_ext = 0;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
- pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
- pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
- pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable))
+ pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
+ if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
+ ((asoc == NULL) && (inp->prsctp_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+ if (((asoc != NULL) && (asoc->idata_supported == 1)) ||
+ ((asoc == NULL) && (inp->idata_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_IFORWARD_CUM_TSN;
+ }
+ }
+ if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
+ ((asoc == NULL) && (inp->auth_supported == 1))) {
pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
- if (SCTP_BASE_SYSCTL(sctp_nr_sack_on_off))
+ }
+ if (((asoc != NULL) && (asoc->asconf_supported == 1)) ||
+ ((asoc == NULL) && (inp->asconf_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+ }
+ if (((asoc != NULL) && (asoc->reconfig_supported == 1)) ||
+ ((asoc == NULL) && (inp->reconfig_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+ }
+ if (((asoc != NULL) && (asoc->idata_supported == 1)) ||
+ ((asoc == NULL) && (inp->idata_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_IDATA;
+ }
+ if (((asoc != NULL) && (asoc->nrsack_supported == 1)) ||
+ ((asoc == NULL) && (inp->nrsack_supported == 1))) {
pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
- p_len = sizeof(*pr_supported) + num_ext;
- pr_supported->ph.param_length = htons(p_len);
- bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
-
+ }
+ if (((asoc != NULL) && (asoc->pktdrop_supported == 1)) ||
+ ((asoc == NULL) && (inp->pktdrop_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+ }
+ if (num_ext > 0) {
+ parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
+ pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ pr_supported->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+ }
/* add authentication parameters */
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
+ ((asoc == NULL) && (inp->auth_supported == 1))) {
struct sctp_auth_random *randp;
struct sctp_auth_hmac_algo *hmacs;
struct sctp_auth_chunk_list *chunks;
- uint16_t random_len;
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
/* generate and add RANDOM parameter */
- random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
- randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) sizeof(struct sctp_auth_random) +
+ SCTP_AUTH_RANDOM_SIZE_DEFAULT;
randp->ph.param_type = htons(SCTP_RANDOM);
- p_len = sizeof(*randp) + random_len;
- randp->ph.param_length = htons(p_len);
- SCTP_READ_RANDOM(randp->random_data, random_len);
- /* zero out any padding required */
- bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ randp->ph.param_length = htons(parameter_len);
+ SCTP_READ_RANDOM(randp->random_data, SCTP_AUTH_RANDOM_SIZE_DEFAULT);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
/* add HMAC_ALGO parameter */
- hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- p_len = sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
+ hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) sizeof(struct sctp_auth_hmac_algo) +
+ sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
(uint8_t *) hmacs->hmac_ids);
- if (p_len > 0) {
- p_len += sizeof(*hmacs);
- hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
- hmacs->ph.param_length = htons(p_len);
- /* zero out any padding required */
- bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+ hmacs->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
}
/* add CHUNKS parameter */
- chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- p_len = sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
+ chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) sizeof(struct sctp_auth_chunk_list) +
+ sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
chunks->chunk_types);
- if (p_len > 0) {
- p_len += sizeof(*chunks);
- chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
- chunks->ph.param_length = htons(p_len);
- /* zero out any padding required */
- bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
- }
+ chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+ chunks->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
}
- m_at = m;
+ SCTP_BUF_LEN(m) = chunk_len;
+ m_last = m;
/* now the addresses */
- {
- struct sctp_scoping scp;
-
- /*
- * To optimize this we could put the scoping stuff into a
- * structure and remove the individual uint8's from the stc
- * structure. Then we could just sifa in the address within
- * the stc.. but for now this is a quick hack to get the
- * address stuff teased apart.
- */
- scp.ipv4_addr_legal = stc.ipv4_addr_legal;
- scp.ipv6_addr_legal = stc.ipv6_addr_legal;
- scp.loopback_scope = stc.loopback_scope;
- scp.ipv4_local_scope = stc.ipv4_scope;
- scp.local_scope = stc.local_scope;
- scp.site_scope = stc.site_scope;
- m_at = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_at, cnt_inits_to, NULL, NULL);
+ /*
+ * To optimize this we could put the scoping stuff into a structure
+ * and remove the individual uint8's from the stc structure. Then we
+ * could just sifa in the address within the stc.. but for now this
+ * is a quick hack to get the address stuff teased apart.
+ */
+ scp.ipv4_addr_legal = stc.ipv4_addr_legal;
+ scp.ipv6_addr_legal = stc.ipv6_addr_legal;
+ scp.loopback_scope = stc.loopback_scope;
+ scp.ipv4_local_scope = stc.ipv4_scope;
+ scp.local_scope = stc.local_scope;
+ scp.site_scope = stc.site_scope;
+ m_last = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_last,
+ cnt_inits_to,
+ &padding_len, &chunk_len);
+ /* padding_len can only be positive, if no addresses have been added */
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ SCTP_BUF_LEN(m) += padding_len;
+ padding_len = 0;
}
-
/* tack on the operational error if present */
if (op_err) {
- struct mbuf *ol;
- int llen;
-
- llen = 0;
- ol = op_err;
-
- while (ol) {
- llen += SCTP_BUF_LEN(ol);
- ol = SCTP_BUF_NEXT(ol);
- }
- if (llen % 4) {
- /* must add a pad to the param */
- uint32_t cpthis = 0;
- int padlen;
-
- padlen = 4 - (llen % 4);
- m_copyback(op_err, llen, padlen, (caddr_t)&cpthis);
+ parameter_len = 0;
+ for (m_tmp = op_err; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+ parameter_len += SCTP_BUF_LEN(m_tmp);
}
- while (SCTP_BUF_NEXT(m_at) != NULL) {
- m_at = SCTP_BUF_NEXT(m_at);
- }
- SCTP_BUF_NEXT(m_at) = op_err;
- while (SCTP_BUF_NEXT(m_at) != NULL) {
- m_at = SCTP_BUF_NEXT(m_at);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ SCTP_BUF_NEXT(m_last) = op_err;
+ while (SCTP_BUF_NEXT(m_last) != NULL) {
+ m_last = SCTP_BUF_NEXT(m_last);
}
+ chunk_len += parameter_len;
}
- /* pre-calulate the size and update pkt header and chunk header */
- p_len = 0;
- for (m_tmp = m; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
- p_len += SCTP_BUF_LEN(m_tmp);
- if (SCTP_BUF_NEXT(m_tmp) == NULL) {
- /* m_tmp should now point to last one */
- break;
+ if (padding_len > 0) {
+ m_last = sctp_add_pad_tombuf(m_last, padding_len);
+ if (m_last == NULL) {
+ /* Houston we have a problem, no space */
+ sctp_m_freem(m);
+ return;
}
+ chunk_len += padding_len;
+ padding_len = 0;
}
-
/* Now we must build a cookie */
m_cookie = sctp_add_cookie(init_pkt, offset, m, 0, &stc, &signature);
if (m_cookie == NULL) {
@@ -6060,21 +6106,22 @@ do_a_abort:
return;
}
/* Now append the cookie to the end and update the space/size */
- SCTP_BUF_NEXT(m_tmp) = m_cookie;
-
- for (m_tmp = m_cookie; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
- p_len += SCTP_BUF_LEN(m_tmp);
+ SCTP_BUF_NEXT(m_last) = m_cookie;
+ parameter_len = 0;
+ for (m_tmp = m_cookie; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+ parameter_len += SCTP_BUF_LEN(m_tmp);
if (SCTP_BUF_NEXT(m_tmp) == NULL) {
- /* m_tmp should now point to last one */
- mp_last = m_tmp;
- break;
+ m_last = m_tmp;
}
}
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+
/*
* Place in the size, but we don't include the last pad (if any) in
* the INIT-ACK.
*/
- initack->ch.chunk_length = htons(p_len);
+ initack->ch.chunk_length = htons(chunk_len);
/*
* Time to sign the cookie, we don't sign over the cookie signature
@@ -6088,11 +6135,8 @@ do_a_abort:
* We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
* here since the timer will drive a retranmission.
*/
- padval = p_len % 4;
- if ((padval) && (mp_last)) {
- /* see my previous comments on mp_last */
- if (sctp_add_pad_tombuf(mp_last, (4 - padval))) {
- /* Houston we have a problem, no space */
+ if (padding_len > 0) {
+ if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
sctp_m_freem(m);
return;
}
@@ -6107,7 +6151,7 @@ do_a_abort:
0, 0,
inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag,
port, over_addr,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
SCTP_SO_NOT_LOCKED);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
}
@@ -6123,7 +6167,7 @@ sctp_prune_prsctp(struct sctp_tcb *stcb,
struct sctp_tmit_chunk *chk, *nchk;
SCTP_TCB_LOCK_ASSERT(stcb);
- if ((asoc->peer_supports_prsctp) &&
+ if ((asoc->prsctp_supported) &&
(asoc->sent_queue_cnt_removeable > 0)) {
TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
/*
@@ -6165,7 +6209,7 @@ sctp_prune_prsctp(struct sctp_tcb *stcb,
return;
}
} /* if chunk was present */
- } /* if of sufficent priority */
+ } /* if of sufficient priority */
} /* if chunk has enabled */
} /* tailqforeach */
@@ -6206,11 +6250,15 @@ sctp_get_frag_point(struct sctp_tcb *stcb,
* we use a larger frag point.
*/
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ovh = SCTP_MED_OVERHEAD;
+ ovh = SCTP_MIN_OVERHEAD;
} else {
- ovh = SCTP_MED_V4_OVERHEAD;
+ ovh = SCTP_MIN_V4_OVERHEAD;
+ }
+ if (stcb->asoc.idata_supported) {
+ ovh += sizeof(struct sctp_idata_chunk);
+ } else {
+ ovh += sizeof(struct sctp_data_chunk);
}
-
if (stcb->asoc.sctp_frag_point > asoc->smallest_mtu)
siz = asoc->smallest_mtu - ovh;
else
@@ -6335,6 +6383,7 @@ sctp_msg_append(struct sctp_tcb *stcb,
sp->timetolive = srcv->sinfo_timetolive;
sp->ppid = srcv->sinfo_ppid;
sp->context = srcv->sinfo_context;
+ sp->fsn = 0;
if (sp->sinfo_flags & SCTP_ADDR_OVER) {
sp->net = net;
atomic_add_int(&sp->net->ref_count, 1);
@@ -6420,7 +6469,7 @@ error_out:
if (outchain == NULL) {
/* This is the general case */
new_mbuf:
- outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+ outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
if (outchain == NULL) {
goto error_out;
}
@@ -6453,10 +6502,10 @@ error_out:
}
}
/* get the new end of length */
- len = M_TRAILINGSPACE(*endofchain);
+ len = (int)M_TRAILINGSPACE(*endofchain);
} else {
/* how much is left at the end? */
- len = M_TRAILINGSPACE(*endofchain);
+ len = (int)M_TRAILINGSPACE(*endofchain);
}
/* Find the end of the data, for appending */
cp = (mtod((*endofchain), caddr_t)+SCTP_BUF_LEN((*endofchain)));
@@ -6474,7 +6523,7 @@ error_out:
/* now we need another one */
sizeofcpy -= len;
}
- m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
if (m == NULL) {
/* We failed */
goto error_out;
@@ -6488,16 +6537,10 @@ error_out:
return (outchain);
} else {
/* copy the old fashion way */
- appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_DONTWAIT);
+ appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_NOWAIT);
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = appendchain; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(appendchain, SCTP_MBUF_ICOPY);
}
#endif
}
@@ -6523,7 +6566,7 @@ error_out:
}
}
/*
- * save off the end and update the end-chain postion
+ * save off the end and update the end-chain position
*/
m = appendchain;
while (m) {
@@ -6535,7 +6578,7 @@ error_out:
}
return (outchain);
} else {
- /* save off the end and update the end-chain postion */
+ /* save off the end and update the end-chain position */
m = appendchain;
while (m) {
if (SCTP_BUF_NEXT(m) == NULL) {
@@ -6582,7 +6625,7 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
return;
}
if (ca->sndlen > 0) {
- m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_DONTWAIT);
+ m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_NOWAIT);
if (m == NULL) {
/* can't copy so we are done */
ca->cnt_failed++;
@@ -6590,13 +6633,7 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(m, SCTP_MBUF_ICOPY);
}
#endif
} else {
@@ -6622,7 +6659,7 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
ph = mtod(m, struct sctp_paramhdr *);
ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(sizeof(struct sctp_paramhdr) + ca->sndlen);
+ ph->param_length = htons((uint16_t) (sizeof(struct sctp_paramhdr) + ca->sndlen));
}
/*
* We add one here to keep the assoc from dis-appearing on
@@ -6652,14 +6689,10 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
asoc = &stcb->asoc;
if (ca->sndrcv.sinfo_flags & SCTP_EOF) {
/* shutdown this assoc */
- int cnt;
-
- cnt = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED);
-
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
- (cnt == 0)) {
- if (asoc->locked_on_sending) {
+ sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED) == 0) {
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
goto abort_anyway;
}
/*
@@ -6701,27 +6734,24 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
(SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
(SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
- if (asoc->locked_on_sending) {
- /*
- * Locked to send out the
- * data
- */
- struct sctp_stream_queue_pending *sp;
-
- sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
- if (sp) {
- if ((sp->length == 0) && (sp->msg_is_complete == 0))
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
- }
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
}
asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
abort_anyway:
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
atomic_add_int(&stcb->asoc.refcnt, 1);
sctp_abort_an_association(stcb->sctp_ep, stcb,
- NULL, SCTP_SO_NOT_LOCKED);
+ op_err, SCTP_SO_NOT_LOCKED);
atomic_add_int(&stcb->asoc.refcnt, -1);
goto no_chunk_output;
}
@@ -6743,7 +6773,7 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
if (do_chunk_output)
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
else if (added_control) {
- int num_out = 0, reason = 0, now_filled = 0;
+ int num_out, reason, now_filled = 0;
struct timeval now;
int frag_point;
@@ -6768,7 +6798,7 @@ sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED)
/*
* Do a notify here? Kacheong suggests that the notify be done at
* the send time.. so you would push up a notification if any send
- * failed. Don't know if this is feasable since the only failures we
+ * failed. Don't know if this is feasible since the only failures we
* have is "memory" related and if you cannot get an mbuf to send
* the data you surely can't get an mbuf to send up to notify the
* user you can't send the data :->
@@ -6779,20 +6809,13 @@ sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED)
SCTP_FREE(ca, SCTP_M_COPYAL);
}
-
-#define MC_ALIGN(m, len) do { \
- SCTP_BUF_RESV_UF(m, ((MCLBYTES - (len)) & ~(sizeof(long) - 1)); \
-} while (0)
-
-
-
static struct mbuf *
sctp_copy_out_all(struct uio *uio, int len)
{
struct mbuf *ret, *at;
int left, willcpy, cancpy, error;
- ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAIT, 1, MT_DATA);
+ ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAITOK, 1, MT_DATA);
if (ret == NULL) {
/* TSNH */
return (NULL);
@@ -6800,7 +6823,7 @@ sctp_copy_out_all(struct uio *uio, int len)
left = len;
SCTP_BUF_LEN(ret) = 0;
/* save space for the data chunk header */
- cancpy = M_TRAILINGSPACE(ret);
+ cancpy = (int)M_TRAILINGSPACE(ret);
willcpy = min(cancpy, left);
at = ret;
while (left > 0) {
@@ -6815,13 +6838,13 @@ sctp_copy_out_all(struct uio *uio, int len)
SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
left -= willcpy;
if (left > 0) {
- SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 1, MT_DATA);
+ SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAITOK, 1, MT_DATA);
if (SCTP_BUF_NEXT(at) == NULL) {
goto err_out_now;
}
at = SCTP_BUF_NEXT(at);
SCTP_BUF_LEN(at) = 0;
- cancpy = M_TRAILINGSPACE(at);
+ cancpy = (int)M_TRAILINGSPACE(at);
willcpy = min(cancpy, left);
}
}
@@ -6855,7 +6878,7 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
/* get length and mbuf chain */
if (uio) {
- ca->sndlen = uio->uio_resid;
+ ca->sndlen = (int)uio->uio_resid;
ca->m = sctp_copy_out_all(uio, ca->sndlen);
if (ca->m == NULL) {
SCTP_FREE(ca, SCTP_M_COPYAL);
@@ -7005,7 +7028,7 @@ all_done:
sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
data_list[i]->whoTo->flight_size,
data_list[i]->book_size,
- (uintptr_t) data_list[i]->whoTo,
+ (uint32_t) (uintptr_t) data_list[i]->whoTo,
data_list[i]->rec.data.TSN_seq);
}
sctp_flight_size_increase(data_list[i]);
@@ -7135,7 +7158,6 @@ sctp_move_to_outqueue(struct sctp_tcb *stcb,
struct sctp_stream_out *strq,
uint32_t goal_mtu,
uint32_t frag_point,
- int *locked,
int *giveup,
int eeor_mode,
int *bail,
@@ -7149,8 +7171,10 @@ sctp_move_to_outqueue(struct sctp_tcb *stcb,
struct sctp_association *asoc;
struct sctp_stream_queue_pending *sp;
struct sctp_tmit_chunk *chk;
- struct sctp_data_chunk *dchkh;
+ struct sctp_data_chunk *dchkh = NULL;
+ struct sctp_idata_chunk *ndchkh = NULL;
uint32_t to_move, length;
+ int leading;
uint8_t rcv_flags = 0;
uint8_t some_taken;
uint8_t send_lock_up = 0;
@@ -7161,7 +7185,6 @@ one_more_time:
/* sa_ignore FREED_MEMORY */
sp = TAILQ_FIRST(&strq->outqueue);
if (sp == NULL) {
- *locked = 0;
if (send_lock_up == 0) {
SCTP_TCB_SEND_LOCK(stcb);
send_lock_up = 1;
@@ -7170,7 +7193,9 @@ one_more_time:
if (sp) {
goto one_more_time;
}
- if (strq->last_msg_incomplete) {
+ if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_EXPLICIT_EOR) == 0) &&
+ (stcb->asoc.idata_supported == 0) &&
+ (strq->last_msg_incomplete)) {
SCTP_PRINTF("Huh? Stream:%d lm_in_c=%d but queue is NULL\n",
strq->stream_no,
strq->last_msg_incomplete);
@@ -7206,6 +7231,11 @@ one_more_time:
atomic_subtract_int(&asoc->stream_queue_cnt, 1);
TAILQ_REMOVE(&strq->outqueue, sp, next);
stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
+ if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
+ (strq->chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&strq->outqueue)) {
+ stcb->asoc.trigger_reset = 1;
+ }
if (sp->net) {
sctp_free_remote_addr(sp->net);
sp->net = NULL;
@@ -7216,8 +7246,6 @@ one_more_time:
}
sctp_free_a_strmoq(stcb, sp, so_locked);
/* we can't be locked to it */
- *locked = 0;
- stcb->asoc.locked_on_sending = NULL;
if (send_lock_up) {
SCTP_TCB_SEND_UNLOCK(stcb);
send_lock_up = 0;
@@ -7229,7 +7257,6 @@ one_more_time:
* sender just finished this but still holds a
* reference
*/
- *locked = 1;
*giveup = 1;
to_move = 0;
goto out_of;
@@ -7238,7 +7265,6 @@ one_more_time:
/* is there some to get */
if (sp->length == 0) {
/* no */
- *locked = 1;
*giveup = 1;
to_move = 0;
goto out_of;
@@ -7249,7 +7275,7 @@ one_more_time:
}
/* Whack down the size */
atomic_subtract_int(&stcb->asoc.total_output_queue_size, sp->length);
- if ((stcb->sctp_socket != NULL) && \
+ if ((stcb->sctp_socket != NULL) &&
((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc, sp->length);
@@ -7261,16 +7287,12 @@ one_more_time:
}
sp->length = 0;
sp->some_taken = 1;
- *locked = 1;
*giveup = 1;
to_move = 0;
goto out_of;
}
}
some_taken = sp->some_taken;
- if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
- sp->msg_is_complete = 1;
- }
re_look:
length = sp->length;
if (sp->msg_is_complete) {
@@ -7280,10 +7302,12 @@ re_look:
/* All of it fits in the MTU */
if (sp->some_taken) {
rcv_flags |= SCTP_DATA_LAST_FRAG;
- sp->put_last_out = 1;
} else {
rcv_flags |= SCTP_DATA_NOT_FRAG;
- sp->put_last_out = 1;
+ }
+ sp->put_last_out = 1;
+ if (sp->sinfo_flags & SCTP_SACK_IMMEDIATELY) {
+ rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY;
}
} else {
/* Not all of it fits, we fragment */
@@ -7326,9 +7350,6 @@ re_look:
}
} else {
/* Nothing to take. */
- if (sp->some_taken) {
- *locked = 1;
- }
*giveup = 1;
to_move = 0;
goto out_of;
@@ -7350,8 +7371,8 @@ re_look:
if (sp->sinfo_flags & SCTP_UNORDERED) {
rcv_flags |= SCTP_DATA_UNORDERED;
}
- if ((SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) && ((sp->sinfo_flags & SCTP_EOF) == SCTP_EOF)) ||
- ((sp->sinfo_flags & SCTP_SACK_IMMEDIATELY) == SCTP_SACK_IMMEDIATELY)) {
+ if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) &&
+ (sp->sinfo_flags & SCTP_EOF) == SCTP_EOF) {
rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY;
}
/* clear out the chunk before setting up */
@@ -7376,7 +7397,7 @@ re_look:
struct mbuf *m;
dont_do_it:
- chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_DONTWAIT);
+ chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_NOWAIT);
chk->last_mbuf = NULL;
if (chk->data == NULL) {
sp->some_taken = some_taken;
@@ -7387,13 +7408,7 @@ dont_do_it:
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = chk->data; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(chk->data, SCTP_MBUF_ICOPY);
}
#endif
/* Pull off the data */
@@ -7428,7 +7443,7 @@ dont_do_it:
chk->copy_by_ref = 0;
}
/*
- * get last_mbuf and counts of mb useage This is ugly but hopefully
+ * get last_mbuf and counts of mb usage This is ugly but hopefully
* its only one mbuf.
*/
if (chk->last_mbuf == NULL) {
@@ -7451,11 +7466,16 @@ dont_do_it:
} else {
atomic_subtract_int(&sp->length, to_move);
}
- if (M_LEADINGSPACE(chk->data) < (int)sizeof(struct sctp_data_chunk)) {
+ if (stcb->asoc.idata_supported == 0) {
+ leading = sizeof(struct sctp_data_chunk);
+ } else {
+ leading = sizeof(struct sctp_idata_chunk);
+ }
+ if (M_LEADINGSPACE(chk->data) < leading) {
/* Not enough room for a chunk header, get some */
struct mbuf *m;
- m = sctp_get_mbuf_for_msg(1, 0, M_DONTWAIT, 0, MT_DATA);
+ m = sctp_get_mbuf_for_msg(1, 0, M_NOWAIT, 0, MT_DATA);
if (m == NULL) {
/*
* we're in trouble here. _PREPEND below will free
@@ -7466,7 +7486,7 @@ dont_do_it:
SCTP_TCB_SEND_LOCK(stcb);
send_lock_up = 1;
}
- if (chk->data == NULL) {
+ if (sp->data == NULL) {
/* unsteal the data */
sp->data = chk->data;
sp->tail_mbuf = chk->last_mbuf;
@@ -7492,7 +7512,11 @@ dont_do_it:
M_ALIGN(chk->data, 4);
}
}
- SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_DONTWAIT);
+ if (stcb->asoc.idata_supported == 0) {
+ SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_NOWAIT);
+ } else {
+ SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_idata_chunk), M_NOWAIT);
+ }
if (chk->data == NULL) {
/* HELP, TSNH since we assured it would not above? */
#ifdef INVARIANTS
@@ -7505,8 +7529,13 @@ dont_do_it:
to_move = 0;
goto out_of;
}
- sctp_snd_sb_alloc(stcb, sizeof(struct sctp_data_chunk));
- chk->book_size = chk->send_size = (to_move + sizeof(struct sctp_data_chunk));
+ if (stcb->asoc.idata_supported == 0) {
+ sctp_snd_sb_alloc(stcb, sizeof(struct sctp_data_chunk));
+ chk->book_size = chk->send_size = (uint16_t) (to_move + sizeof(struct sctp_data_chunk));
+ } else {
+ sctp_snd_sb_alloc(stcb, sizeof(struct sctp_idata_chunk));
+ chk->book_size = chk->send_size = (uint16_t) (to_move + sizeof(struct sctp_idata_chunk));
+ }
chk->book_size_scale = 0;
chk->sent = SCTP_DATAGRAM_UNSENT;
@@ -7514,10 +7543,28 @@ dont_do_it:
chk->asoc = &stcb->asoc;
chk->pad_inplace = 0;
chk->no_fr_allowed = 0;
- chk->rec.data.stream_seq = strq->next_sequence_send;
- if ((rcv_flags & SCTP_DATA_LAST_FRAG) &&
- !(rcv_flags & SCTP_DATA_UNORDERED)) {
- strq->next_sequence_send++;
+ if (stcb->asoc.idata_supported == 0) {
+ if (rcv_flags & SCTP_DATA_UNORDERED) {
+ /* Just use 0. The receiver ignores the values. */
+ chk->rec.data.stream_seq = 0;
+ } else {
+ chk->rec.data.stream_seq = strq->next_mid_ordered;
+ if (rcv_flags & SCTP_DATA_LAST_FRAG) {
+ strq->next_mid_ordered++;
+ }
+ }
+ } else {
+ if (rcv_flags & SCTP_DATA_UNORDERED) {
+ chk->rec.data.stream_seq = strq->next_mid_unordered;
+ if (rcv_flags & SCTP_DATA_LAST_FRAG) {
+ strq->next_mid_unordered++;
+ }
+ } else {
+ chk->rec.data.stream_seq = strq->next_mid_ordered;
+ if (rcv_flags & SCTP_DATA_LAST_FRAG) {
+ strq->next_mid_ordered++;
+ }
+ }
}
chk->rec.data.stream_number = sp->stream;
chk->rec.data.payloadtype = sp->ppid;
@@ -7541,11 +7588,15 @@ dont_do_it:
chk->rec.data.TSN_seq = atomic_fetchadd_int(&asoc->sending_seq, 1);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_OUTQ) {
sctp_misc_ints(SCTP_STRMOUT_LOG_SEND,
- (uintptr_t) stcb, sp->length,
+ (uint32_t) (uintptr_t) stcb, sp->length,
(uint32_t) ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq),
chk->rec.data.TSN_seq);
}
- dchkh = mtod(chk->data, struct sctp_data_chunk *);
+ if (stcb->asoc.idata_supported == 0) {
+ dchkh = mtod(chk->data, struct sctp_data_chunk *);
+ } else {
+ ndchkh = mtod(chk->data, struct sctp_idata_chunk *);
+ }
/*
* Put the rest of the things in place now. Size was done earlier in
* previous loop prior to padding.
@@ -7567,14 +7618,28 @@ dont_do_it:
asoc->out_tsnlog[asoc->tsn_out_at].in_out = 2;
asoc->tsn_out_at++;
#endif
-
- dchkh->ch.chunk_type = SCTP_DATA;
- dchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
- dchkh->dp.tsn = htonl(chk->rec.data.TSN_seq);
- dchkh->dp.stream_id = htons(strq->stream_no);
- dchkh->dp.stream_sequence = htons(chk->rec.data.stream_seq);
- dchkh->dp.protocol_id = chk->rec.data.payloadtype;
- dchkh->ch.chunk_length = htons(chk->send_size);
+ if (stcb->asoc.idata_supported == 0) {
+ dchkh->ch.chunk_type = SCTP_DATA;
+ dchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
+ dchkh->dp.tsn = htonl(chk->rec.data.TSN_seq);
+ dchkh->dp.stream_id = htons((strq->stream_no & 0x0000ffff));
+ dchkh->dp.stream_sequence = htons((uint16_t) chk->rec.data.stream_seq);
+ dchkh->dp.protocol_id = chk->rec.data.payloadtype;
+ dchkh->ch.chunk_length = htons(chk->send_size);
+ } else {
+ ndchkh->ch.chunk_type = SCTP_IDATA;
+ ndchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
+ ndchkh->dp.tsn = htonl(chk->rec.data.TSN_seq);
+ ndchkh->dp.stream_id = htons(strq->stream_no);
+ ndchkh->dp.reserved = htons(0);
+ ndchkh->dp.msg_id = htonl(chk->rec.data.stream_seq);
+ if (sp->fsn == 0)
+ ndchkh->dp.ppid_fsn.protocol_id = chk->rec.data.payloadtype;
+ else
+ ndchkh->dp.ppid_fsn.fsn = htonl(sp->fsn);
+ sp->fsn++;
+ ndchkh->ch.chunk_length = htons(chk->send_size);
+ }
/* Now advance the chk->send_size by the actual pad needed. */
if (chk->send_size < SCTP_SIZE32(chk->book_size)) {
/* need a pad */
@@ -7582,12 +7647,10 @@ dont_do_it:
int pads;
pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
- if (sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf) == 0) {
- chk->pad_inplace = 1;
- }
- if ((lm = SCTP_BUF_NEXT(chk->last_mbuf)) != NULL) {
- /* pad added an mbuf */
+ lm = sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf);
+ if (lm != NULL) {
chk->last_mbuf = lm;
+ chk->pad_inplace = 1;
}
chk->send_size += pads;
}
@@ -7596,7 +7659,6 @@ dont_do_it:
}
if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
/* All done pull and kill the message */
- atomic_subtract_int(&asoc->stream_queue_cnt, 1);
if (sp->put_last_out == 0) {
SCTP_PRINTF("Gak, put out entire msg with NO end!-2\n");
SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
@@ -7610,8 +7672,14 @@ dont_do_it:
SCTP_TCB_SEND_LOCK(stcb);
send_lock_up = 1;
}
+ atomic_subtract_int(&asoc->stream_queue_cnt, 1);
TAILQ_REMOVE(&strq->outqueue, sp, next);
stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
+ if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
+ (strq->chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&strq->outqueue)) {
+ stcb->asoc.trigger_reset = 1;
+ }
if (sp->net) {
sctp_free_remote_addr(sp->net);
sp->net = NULL;
@@ -7621,13 +7689,6 @@ dont_do_it:
sp->data = NULL;
}
sctp_free_a_strmoq(stcb, sp, so_locked);
-
- /* we can't be locked to it */
- *locked = 0;
- stcb->asoc.locked_on_sending = NULL;
- } else {
- /* more to go, we are locked */
- *locked = 1;
}
asoc->chunks_on_out_queue++;
strq->chunks_on_queues++;
@@ -7652,7 +7713,7 @@ sctp_fill_outqueue(struct sctp_tcb *stcb,
struct sctp_association *asoc;
struct sctp_stream_out *strq;
int goal_mtu, moved_how_much, total_moved = 0, bail = 0;
- int locked, giveup;
+ int giveup;
SCTP_TCB_LOCK_ASSERT(stcb);
asoc = &stcb->asoc;
@@ -7673,40 +7734,28 @@ sctp_fill_outqueue(struct sctp_tcb *stcb,
break;
}
/* Need an allowance for the data chunk header too */
- goal_mtu -= sizeof(struct sctp_data_chunk);
+ if (stcb->asoc.idata_supported == 0) {
+ goal_mtu -= sizeof(struct sctp_data_chunk);
+ } else {
+ goal_mtu -= sizeof(struct sctp_idata_chunk);
+ }
/* must make even word boundary */
goal_mtu &= 0xfffffffc;
- if (asoc->locked_on_sending) {
- /* We are stuck on one stream until the message completes. */
- strq = asoc->locked_on_sending;
- locked = 1;
- } else {
- strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
- locked = 0;
- }
+ strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
while ((goal_mtu > 0) && strq) {
giveup = 0;
bail = 0;
- moved_how_much = sctp_move_to_outqueue(stcb, strq, goal_mtu, frag_point, &locked,
+ moved_how_much = sctp_move_to_outqueue(stcb, strq, goal_mtu, frag_point,
&giveup, eeor_mode, &bail, so_locked);
- if (moved_how_much)
- stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, moved_how_much);
+ stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, moved_how_much);
- if (locked) {
- asoc->locked_on_sending = strq;
- if ((moved_how_much == 0) || (giveup) || bail)
- /* no more to move for now */
- break;
- } else {
- asoc->locked_on_sending = NULL;
- if ((giveup) || bail) {
- break;
- }
- strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
- if (strq == NULL) {
- break;
- }
+ if ((giveup) || bail) {
+ break;
+ }
+ strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc);
+ if (strq == NULL) {
+ break;
}
total_moved += moved_how_much;
goal_mtu -= (moved_how_much + sizeof(struct sctp_data_chunk));
@@ -7784,12 +7833,15 @@ sctp_med_chunk_output(struct sctp_inpcb *inp,
{
/**
* Ok this is the generic chunk service queue. we must do the
- * following: - Service the stream queue that is next, moving any
- * message (note I must get a complete message i.e. FIRST/MIDDLE and
- * LAST to the out queue in one pass) and assigning TSN's - Check to
- * see if the cwnd/rwnd allows any output, if so we go ahead and
- * fomulate and send the low level chunks. Making sure to combine
- * any control in the control chunk queue also.
+ * following:
+ * - Service the stream queue that is next, moving any
+ * message (note I must get a complete message i.e. FIRST/MIDDLE and
+ * LAST to the out queue in one pass) and assigning TSN's. This
+ * only applys though if the peer does not support NDATA. For NDATA
+ * chunks its ok to not send the entire message ;-)
+ * - Check to see if the cwnd/rwnd allows any output, if so we go ahead and
+ * fomulate and send the low level chunks. Making sure to combine
+ * any control in the control chunk queue also.
*/
struct sctp_nets *net, *start_at, *sack_goes_to = NULL, *old_start_at = NULL;
struct mbuf *outchain, *endoutchain;
@@ -7818,8 +7870,8 @@ sctp_med_chunk_output(struct sctp_inpcb *inp,
int quit_now = 0;
*num_out = 0;
+ *reason_code = 0;
auth_keyid = stcb->asoc.authinfo.active_keyid;
-
if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
(asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
(sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
@@ -7838,7 +7890,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp,
#endif
SCTP_TCB_LOCK_ASSERT(stcb);
hbflag = 0;
- if ((control_only) || (asoc->stream_reset_outstanding))
+ if (control_only)
no_data_chunks = 1;
else
no_data_chunks = 0;
@@ -7848,7 +7900,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp,
(asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) &&
TAILQ_EMPTY(&asoc->asconf_send_queue) &&
TAILQ_EMPTY(&asoc->send_queue) &&
- stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
+ sctp_is_there_unsent_data(stcb, so_locked) == 0) {
nothing_to_send:
*reason_code = 9;
return (0);
@@ -8006,31 +8058,15 @@ again_one_more_time:
} else {
skip_data_for_this_net = 0;
}
- if ((net->ro.ro_rt) && (net->ro.ro_rt->rt_ifp)) {
- /*
- * if we have a route and an ifp check to see if we
- * have room to send to this guy
- */
- struct ifnet *ifp;
-
- ifp = net->ro.ro_rt->rt_ifp;
- if ((ifp->if_snd.ifq_len + 2) >= ifp->if_snd.ifq_maxlen) {
- SCTP_STAT_INCR(sctps_ifnomemqueued);
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) {
- sctp_log_maxburst(stcb, net, ifp->if_snd.ifq_len, ifp->if_snd.ifq_maxlen, SCTP_MAX_IFP_APPLIED);
- }
- continue;
- }
- }
switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
#ifdef INET
case AF_INET:
- mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+ mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
break;
#endif
#ifdef INET6
case AF_INET6:
- mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+ mtu = net->mtu - SCTP_MIN_OVERHEAD;
break;
#endif
default:
@@ -8052,6 +8088,7 @@ again_one_more_time:
} else {
r_mtu = mtu;
}
+ error = 0;
/************************/
/* ASCONF transmission */
/************************/
@@ -8175,6 +8212,12 @@ again_one_more_time:
* it is used to do appropriate
* source address selection.
*/
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(now);
+ *now_filled = 1;
+ }
+ net->last_sent_time = *now;
+ hbflag = 0;
if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
(struct sockaddr *)&net->ro._l_addr,
outchain, auth_offset, auth,
@@ -8185,21 +8228,18 @@ again_one_more_time:
net->port, NULL,
0, 0,
so_locked))) {
- if (error == ENOBUFS) {
- asoc->ifp_had_enobuf = 1;
- SCTP_STAT_INCR(sctps_lowlevelerr);
- }
+ /*
+ * error, we could not
+ * output
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
if (from_where == 0) {
SCTP_STAT_INCR(sctps_lowlevelerrusr);
}
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
+ if (error == ENOBUFS) {
+ asoc->ifp_had_enobuf = 1;
+ SCTP_STAT_INCR(sctps_lowlevelerr);
}
- hbflag = 0;
/* error, could not output */
if (error == EHOSTUNREACH) {
/*
@@ -8210,17 +8250,10 @@ again_one_more_time:
sctp_move_chunks_from_net(stcb, net);
}
*reason_code = 7;
- continue;
- } else
- asoc->ifp_had_enobuf = 0;
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
+ break;
} else {
- net->last_sent_time = *now;
+ asoc->ifp_had_enobuf = 0;
}
- hbflag = 0;
/*
* increase the number we sent, if a
* cookie is sent we don't tell them
@@ -8253,6 +8286,10 @@ again_one_more_time:
}
}
}
+ if (error != 0) {
+ /* try next net */
+ continue;
+ }
/************************/
/* Control transmission */
/************************/
@@ -8391,7 +8428,8 @@ again_one_more_time:
/* turn off the timer */
if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
+ inp, stcb, net,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
}
}
ctl_cnt++;
@@ -8448,6 +8486,15 @@ again_one_more_time:
sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
cookie = 0;
}
+ /* Only HB or ASCONF advances time */
+ if (hbflag) {
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(now);
+ *now_filled = 1;
+ }
+ net->last_sent_time = *now;
+ hbflag = 0;
+ }
if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
(struct sockaddr *)&net->ro._l_addr,
outchain,
@@ -8459,23 +8506,17 @@ again_one_more_time:
net->port, NULL,
0, 0,
so_locked))) {
- if (error == ENOBUFS) {
- asoc->ifp_had_enobuf = 1;
- SCTP_STAT_INCR(sctps_lowlevelerr);
- }
+ /*
+ * error, we could not
+ * output
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
if (from_where == 0) {
SCTP_STAT_INCR(sctps_lowlevelerrusr);
}
- /* error, could not output */
- if (hbflag) {
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- hbflag = 0;
+ if (error == ENOBUFS) {
+ asoc->ifp_had_enobuf = 1;
+ SCTP_STAT_INCR(sctps_lowlevelerr);
}
if (error == EHOSTUNREACH) {
/*
@@ -8486,19 +8527,9 @@ again_one_more_time:
sctp_move_chunks_from_net(stcb, net);
}
*reason_code = 7;
- continue;
- } else
+ break;
+ } else {
asoc->ifp_had_enobuf = 0;
- /* Only HB or ASCONF advances time */
- if (hbflag) {
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- hbflag = 0;
}
/*
* increase the number we sent, if a
@@ -8532,6 +8563,10 @@ again_one_more_time:
}
}
}
+ if (error != 0) {
+ /* try next net */
+ continue;
+ }
/* JRI: if dest is in PF state, do not send data to it */
if ((asoc->sctp_cmt_on_off > 0) &&
(net != stcb->asoc.alternate) &&
@@ -8576,16 +8611,16 @@ again_one_more_time:
switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
#ifdef INET
case AF_INET:
- if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
- omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+ if (net->mtu > SCTP_MIN_V4_OVERHEAD)
+ omtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
else
omtu = 0;
break;
#endif
#ifdef INET6
case AF_INET6:
- if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
- omtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+ if (net->mtu > SCTP_MIN_OVERHEAD)
+ omtu = net->mtu - SCTP_MIN_OVERHEAD;
else
omtu = 0;
break;
@@ -8595,7 +8630,8 @@ again_one_more_time:
omtu = 0;
break;
}
- if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) &&
+ if ((((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(skip_data_for_this_net == 0)) ||
(cookie)) {
TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
@@ -8785,6 +8821,14 @@ no_data_fill:
*/
sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
}
+ if (bundle_at || hbflag) {
+ /* For data/asconf and hb set time */
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(now);
+ *now_filled = 1;
+ }
+ net->last_sent_time = *now;
+ }
/* Now send it, if there is anything to send :> */
if ((error = sctp_lowlevel_chunk_output(inp,
stcb,
@@ -8803,23 +8847,13 @@ no_data_fill:
0, 0,
so_locked))) {
/* error, we could not output */
- if (error == ENOBUFS) {
- SCTP_STAT_INCR(sctps_lowlevelerr);
- asoc->ifp_had_enobuf = 1;
- }
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
if (from_where == 0) {
SCTP_STAT_INCR(sctps_lowlevelerrusr);
}
- SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
- if (hbflag) {
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- hbflag = 0;
+ if (error == ENOBUFS) {
+ SCTP_STAT_INCR(sctps_lowlevelerr);
+ asoc->ifp_had_enobuf = 1;
}
if (error == EHOSTUNREACH) {
/*
@@ -8844,16 +8878,6 @@ no_data_fill:
endoutchain = NULL;
auth = NULL;
auth_offset = 0;
- if (bundle_at || hbflag) {
- /* For data/asconf and hb set time */
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- }
if (!no_out_cnt) {
*num_out += (ctl_cnt + bundle_at);
}
@@ -8914,9 +8938,37 @@ sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
*/
struct sctp_chunkhdr *hdr;
struct sctp_tmit_chunk *chk;
- struct mbuf *mat;
+ struct mbuf *mat, *last_mbuf;
+ uint32_t chunk_length;
+ uint16_t padding_length;
SCTP_TCB_LOCK_ASSERT(stcb);
+ SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_NOWAIT);
+ if (op_err == NULL) {
+ return;
+ }
+ last_mbuf = NULL;
+ chunk_length = 0;
+ for (mat = op_err; mat != NULL; mat = SCTP_BUF_NEXT(mat)) {
+ chunk_length += SCTP_BUF_LEN(mat);
+ if (SCTP_BUF_NEXT(mat) == NULL) {
+ last_mbuf = mat;
+ }
+ }
+ if (chunk_length > SCTP_MAX_CHUNK_LENGTH) {
+ sctp_m_freem(op_err);
+ return;
+ }
+ padding_length = chunk_length % 4;
+ if (padding_length != 0) {
+ padding_length = 4 - padding_length;
+ }
+ if (padding_length != 0) {
+ if (sctp_add_pad_tombuf(last_mbuf, padding_length) == NULL) {
+ sctp_m_freem(op_err);
+ return;
+ }
+ }
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
/* no memory */
@@ -8924,32 +8976,19 @@ sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
return;
}
chk->copy_by_ref = 0;
- SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_DONTWAIT);
- if (op_err == NULL) {
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- return;
- }
- chk->send_size = 0;
- mat = op_err;
- while (mat != NULL) {
- chk->send_size += SCTP_BUF_LEN(mat);
- mat = SCTP_BUF_NEXT(mat);
- }
- chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
- chk->rec.chunk_id.can_take_data = 1;
+ chk->send_size = (uint16_t) chunk_length;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = op_err;
chk->whoTo = NULL;
+ chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
+ chk->rec.chunk_id.can_take_data = 0;
hdr = mtod(op_err, struct sctp_chunkhdr *);
hdr->chunk_type = SCTP_OPERATION_ERROR;
hdr->chunk_flags = 0;
hdr->chunk_length = htons(chk->send_size);
- TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue,
- chk,
- sctp_next);
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
chk->asoc->ctrl_queue_cnt++;
}
@@ -8970,12 +9009,11 @@ sctp_send_cookie_echo(struct mbuf *m,
struct sctp_tmit_chunk *chk;
uint16_t ptype, plen;
+ SCTP_TCB_LOCK_ASSERT(stcb);
/* First find the cookie in the param area */
cookie = NULL;
at = offset + sizeof(struct sctp_init_chunk);
-
- SCTP_TCB_LOCK_ASSERT(stcb);
- do {
+ for (;;) {
phdr = sctp_get_next_param(m, at, &parm, sizeof(parm));
if (phdr == NULL) {
return (-3);
@@ -8989,32 +9027,21 @@ sctp_send_cookie_echo(struct mbuf *m,
if ((pad = (plen % 4))) {
plen += 4 - pad;
}
- cookie = SCTP_M_COPYM(m, at, plen, M_DONTWAIT);
+ cookie = SCTP_M_COPYM(m, at, plen, M_NOWAIT);
if (cookie == NULL) {
/* No memory */
return (-2);
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = cookie; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(cookie, SCTP_MBUF_ICOPY);
}
#endif
break;
}
at += SCTP_SIZE32(plen);
- } while (phdr);
- if (cookie == NULL) {
- /* Did not find the cookie */
- return (-3);
}
/* ok, we got the cookie lets change it into a cookie echo chunk */
-
/* first the change from param to cookie */
hdr = mtod(cookie, struct sctp_chunkhdr *);
hdr->chunk_type = SCTP_COOKIE_ECHO;
@@ -9027,12 +9054,12 @@ sctp_send_cookie_echo(struct mbuf *m,
return (-5);
}
chk->copy_by_ref = 0;
- chk->send_size = plen;
chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+ chk->send_size = plen;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
chk->asoc = &stcb->asoc;
chk->data = cookie;
chk->whoTo = net;
@@ -9061,20 +9088,14 @@ sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
/* must have a net pointer */
return;
- outchain = SCTP_M_COPYM(m, offset, chk_length, M_DONTWAIT);
+ outchain = SCTP_M_COPYM(m, offset, chk_length, M_NOWAIT);
if (outchain == NULL) {
/* gak out of memory */
return;
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = outchain; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(outchain, SCTP_MBUF_ICOPY);
}
#endif
chdr = mtod(outchain, struct sctp_chunkhdr *);
@@ -9095,12 +9116,12 @@ sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
return;
}
chk->copy_by_ref = 0;
- chk->send_size = chk_length;
chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = chk_length;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = outchain;
chk->whoTo = net;
@@ -9119,7 +9140,7 @@ sctp_send_cookie_ack(struct sctp_tcb *stcb)
SCTP_TCB_LOCK_ASSERT(stcb);
- cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+ cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
if (cookie_ack == NULL) {
/* no mbuf's */
return;
@@ -9132,12 +9153,12 @@ sctp_send_cookie_ack(struct sctp_tcb *stcb)
return;
}
chk->copy_by_ref = 0;
- chk->send_size = sizeof(struct sctp_chunkhdr);
chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = sizeof(struct sctp_chunkhdr);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = cookie_ack;
if (chk->asoc->last_control_chunk_from != NULL) {
@@ -9165,7 +9186,7 @@ sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
struct sctp_shutdown_ack_chunk *ack_cp;
struct sctp_tmit_chunk *chk;
- m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_NOWAIT, 1, MT_HEADER);
if (m_shutdown_ack == NULL) {
/* no mbuf's */
return;
@@ -9178,9 +9199,10 @@ sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
return;
}
chk->copy_by_ref = 0;
- chk->send_size = sizeof(struct sctp_chunkhdr);
chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = sizeof(struct sctp_chunkhdr);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
chk->flags = 0;
@@ -9208,7 +9230,7 @@ sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
struct sctp_shutdown_chunk *shutdown_cp;
struct sctp_tmit_chunk *chk;
- m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_NOWAIT, 1, MT_HEADER);
if (m_shutdown == NULL) {
/* no mbuf's */
return;
@@ -9221,9 +9243,10 @@ sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
return;
}
chk->copy_by_ref = 0;
- chk->send_size = sizeof(struct sctp_shutdown_chunk);
chk->rec.chunk_id.id = SCTP_SHUTDOWN;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = sizeof(struct sctp_shutdown_chunk);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
chk->flags = 0;
@@ -9274,13 +9297,13 @@ sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
return;
}
chk->copy_by_ref = 0;
- chk->data = m_asconf;
- chk->send_size = len;
chk->rec.chunk_id.id = SCTP_ASCONF;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+ chk->data = m_asconf;
+ chk->send_size = len;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
chk->asoc = &stcb->asoc;
chk->whoTo = net;
if (chk->whoTo) {
@@ -9344,20 +9367,14 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb)
continue;
}
/* copy the asconf_ack */
- m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_DONTWAIT);
+ m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT);
if (m_ack == NULL) {
/* couldn't copy it */
return;
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m_ack; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(m_ack, SCTP_MBUF_ICOPY);
}
#endif
@@ -9369,20 +9386,17 @@ sctp_send_asconf_ack(struct sctp_tcb *stcb)
return;
}
chk->copy_by_ref = 0;
-
+ chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
chk->whoTo = net;
if (chk->whoTo) {
atomic_add_int(&chk->whoTo->ref_count, 1);
}
chk->data = m_ack;
- chk->send_size = 0;
- /* Get size */
chk->send_size = ack->len;
- chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
- chk->rec.chunk_id.can_take_data = 1;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; /* XXX */
chk->asoc = &stcb->asoc;
TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
@@ -9491,7 +9505,7 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp,
cnt_thru = 0;
/* do we have control chunks to retransmit? */
if (m != NULL) {
- /* Start a timer no matter if we suceed or fail */
+ /* Start a timer no matter if we succeed or fail */
if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, chk->whoTo);
} else if (chk->rec.chunk_id.id == SCTP_ASCONF)
@@ -9555,12 +9569,16 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp,
}
if ((SCTP_BASE_SYSCTL(sctp_max_retran_chunk)) &&
(chk->snd_count >= SCTP_BASE_SYSCTL(sctp_max_retran_chunk))) {
- /* Gak, we have exceeded max unlucky retran, abort! */
- SCTP_PRINTF("Gak, chk->snd_count:%d >= max:%d - send abort\n",
- chk->snd_count,
- SCTP_BASE_SYSCTL(sctp_max_retran_chunk));
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
+ snprintf(msg, sizeof(msg), "TSN %8.8x retransmitted %d times, giving up",
+ chk->rec.data.TSN_seq, chk->snd_count);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
atomic_add_int(&stcb->asoc.refcnt, 1);
- sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, so_locked);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err,
+ so_locked);
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
return (SCTP_RETRAN_EXIT);
@@ -9752,7 +9770,7 @@ one_chunk_around:
/* Is there something to send for this destination? */
if (m) {
/*
- * No matter if we fail/or suceed we should start a
+ * No matter if we fail/or succeed we should start a
* timer. A failure is like a lost IP packet :-)
*/
if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
@@ -9850,7 +9868,7 @@ one_chunk_around:
sctp_misc_ints(SCTP_FLIGHT_LOG_UP_RSND,
data_list[i]->whoTo->flight_size,
data_list[i]->book_size,
- (uintptr_t) data_list[i]->whoTo,
+ (uint32_t) (uintptr_t) data_list[i]->whoTo,
data_list[i]->rec.data.TSN_seq);
}
sctp_flight_size_increase(data_list[i]);
@@ -9874,7 +9892,7 @@ one_chunk_around:
* t3-expiring.
*/
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
- SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2);
sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
}
}
@@ -9954,7 +9972,7 @@ sctp_chunk_output(struct sctp_inpcb *inp,
*/
struct sctp_association *asoc;
struct sctp_nets *net;
- int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0;
+ int error = 0, num_out, tot_out = 0, ret = 0, reason_code;
unsigned int burst_cnt = 0;
struct timeval now;
int now_filled = 0;
@@ -9965,6 +9983,7 @@ sctp_chunk_output(struct sctp_inpcb *inp,
unsigned int tot_frs = 0;
asoc = &stcb->asoc;
+do_it_again:
/* The Nagle algorithm is only applied when handling a send call. */
if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
@@ -9982,7 +10001,8 @@ sctp_chunk_output(struct sctp_inpcb *inp,
if ((un_sent <= 0) &&
(TAILQ_EMPTY(&asoc->control_send_queue)) &&
(TAILQ_EMPTY(&asoc->asconf_send_queue)) &&
- (asoc->sent_queue_retran_cnt == 0)) {
+ (asoc->sent_queue_retran_cnt == 0) &&
+ (asoc->trigger_reset == 0)) {
/* Nothing to do unless there is something to be sent left */
return;
}
@@ -10156,15 +10176,14 @@ sctp_chunk_output(struct sctp_inpcb *inp,
un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
(stcb->asoc.stream_queue_cnt * sizeof(struct sctp_data_chunk)));
if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) &&
- (stcb->asoc.total_flight > 0) &&
- ((stcb->asoc.locked_on_sending == NULL) ||
- sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
+ (stcb->asoc.total_flight > 0)) {
+/* && sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {*/
break;
}
}
if (TAILQ_EMPTY(&asoc->control_send_queue) &&
TAILQ_EMPTY(&asoc->send_queue) &&
- stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
+ sctp_is_there_unsent_data(stcb, so_locked) == 0) {
/* Nothing left to send */
break;
}
@@ -10201,6 +10220,12 @@ sctp_chunk_output(struct sctp_inpcb *inp,
*/
if (stcb->asoc.ecn_echo_cnt_onq)
sctp_fix_ecn_echo(asoc);
+
+ if (stcb->asoc.trigger_reset) {
+ if (sctp_send_stream_reset_out_if_possible(stcb, so_locked) == 0) {
+ goto do_it_again;
+ }
+ }
return;
}
@@ -10235,10 +10260,21 @@ void
send_forward_tsn(struct sctp_tcb *stcb,
struct sctp_association *asoc)
{
- struct sctp_tmit_chunk *chk;
+ struct sctp_tmit_chunk *chk, *at, *tp1, *last;
struct sctp_forward_tsn_chunk *fwdtsn;
+ struct sctp_strseq *strseq;
+ struct sctp_strseq_mid *strseq_m;
uint32_t advance_peer_ack_point;
+ unsigned int cnt_of_space, i, ovh;
+ unsigned int space_needed;
+ unsigned int cnt_of_skipped = 0;
+ int old;
+ if (asoc->idata_supported) {
+ old = 0;
+ } else {
+ old = 1;
+ }
SCTP_TCB_LOCK_ASSERT(stcb);
TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
@@ -10260,11 +10296,17 @@ send_forward_tsn(struct sctp_tcb *stcb,
}
asoc->fwd_tsn_cnt++;
chk->copy_by_ref = 0;
+ /*
+ * We don't do the old thing here since this is used not for on-wire
+ * but to tell if we are sending a fwd-tsn by the stack during
+ * output. And if its a IFORWARD or a FORWARD it is a fwd-tsn.
+ */
chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = asoc;
chk->whoTo = NULL;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
return;
@@ -10280,132 +10322,155 @@ sctp_fill_in_rest:
* stream/seq of the ones we skip.
*/
SCTP_BUF_LEN(chk->data) = 0;
- {
- struct sctp_tmit_chunk *at, *tp1, *last;
- struct sctp_strseq *strseq;
- unsigned int cnt_of_space, i, ovh;
- unsigned int space_needed;
- unsigned int cnt_of_skipped = 0;
-
- TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
- if ((at->sent != SCTP_FORWARD_TSN_SKIP) &&
- (at->sent != SCTP_DATAGRAM_NR_ACKED)) {
- /* no more to look at */
- break;
- }
- if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
- /* We don't report these */
- continue;
- }
- cnt_of_skipped++;
+ TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
+ if ((at->sent != SCTP_FORWARD_TSN_SKIP) &&
+ (at->sent != SCTP_DATAGRAM_NR_ACKED)) {
+ /* no more to look at */
+ break;
+ }
+ if (old && (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
+ /* We don't report these */
+ continue;
}
+ cnt_of_skipped++;
+ }
+ if (old) {
space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
(cnt_of_skipped * sizeof(struct sctp_strseq)));
+ } else {
+ space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
+ (cnt_of_skipped * sizeof(struct sctp_strseq_mid)));
+ }
+ cnt_of_space = (unsigned int)M_TRAILINGSPACE(chk->data);
- cnt_of_space = M_TRAILINGSPACE(chk->data);
-
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ovh = SCTP_MIN_OVERHEAD;
- } else {
- ovh = SCTP_MIN_V4_OVERHEAD;
- }
- if (cnt_of_space > (asoc->smallest_mtu - ovh)) {
- /* trim to a mtu size */
- cnt_of_space = asoc->smallest_mtu - ovh;
- }
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MIN_OVERHEAD;
+ } else {
+ ovh = SCTP_MIN_V4_OVERHEAD;
+ }
+ if (cnt_of_space > (asoc->smallest_mtu - ovh)) {
+ /* trim to a mtu size */
+ cnt_of_space = asoc->smallest_mtu - ovh;
+ }
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
+ sctp_misc_ints(SCTP_FWD_TSN_CHECK,
+ 0xff, 0, cnt_of_skipped,
+ asoc->advanced_peer_ack_point);
+ }
+ advance_peer_ack_point = asoc->advanced_peer_ack_point;
+ if (cnt_of_space < space_needed) {
+ /*-
+ * ok we must trim down the chunk by lowering the
+ * advance peer ack point.
+ */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
sctp_misc_ints(SCTP_FWD_TSN_CHECK,
- 0xff, 0, cnt_of_skipped,
- asoc->advanced_peer_ack_point);
-
+ 0xff, 0xff, cnt_of_space,
+ space_needed);
}
- advance_peer_ack_point = asoc->advanced_peer_ack_point;
- if (cnt_of_space < space_needed) {
- /*-
- * ok we must trim down the chunk by lowering the
- * advance peer ack point.
- */
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
- sctp_misc_ints(SCTP_FWD_TSN_CHECK,
- 0xff, 0xff, cnt_of_space,
- space_needed);
- }
+ if (old) {
cnt_of_skipped = cnt_of_space - sizeof(struct sctp_forward_tsn_chunk);
cnt_of_skipped /= sizeof(struct sctp_strseq);
- /*-
- * Go through and find the TSN that will be the one
- * we report.
- */
- at = TAILQ_FIRST(&asoc->sent_queue);
- if (at != NULL) {
- for (i = 0; i < cnt_of_skipped; i++) {
- tp1 = TAILQ_NEXT(at, sctp_next);
- if (tp1 == NULL) {
- break;
- }
- at = tp1;
+ } else {
+ cnt_of_skipped = cnt_of_space - sizeof(struct sctp_forward_tsn_chunk);
+ cnt_of_skipped /= sizeof(struct sctp_strseq_mid);
+ }
+ /*-
+ * Go through and find the TSN that will be the one
+ * we report.
+ */
+ at = TAILQ_FIRST(&asoc->sent_queue);
+ if (at != NULL) {
+ for (i = 0; i < cnt_of_skipped; i++) {
+ tp1 = TAILQ_NEXT(at, sctp_next);
+ if (tp1 == NULL) {
+ break;
}
+ at = tp1;
}
- if (at && SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
- sctp_misc_ints(SCTP_FWD_TSN_CHECK,
- 0xff, cnt_of_skipped, at->rec.data.TSN_seq,
- asoc->advanced_peer_ack_point);
- }
- last = at;
- /*-
- * last now points to last one I can report, update
- * peer ack point
- */
- if (last)
- advance_peer_ack_point = last->rec.data.TSN_seq;
+ }
+ if (at && SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
+ sctp_misc_ints(SCTP_FWD_TSN_CHECK,
+ 0xff, cnt_of_skipped, at->rec.data.TSN_seq,
+ asoc->advanced_peer_ack_point);
+ }
+ last = at;
+ /*-
+ * last now points to last one I can report, update
+ * peer ack point
+ */
+ if (last) {
+ advance_peer_ack_point = last->rec.data.TSN_seq;
+ }
+ if (old) {
space_needed = sizeof(struct sctp_forward_tsn_chunk) +
cnt_of_skipped * sizeof(struct sctp_strseq);
+ } else {
+ space_needed = sizeof(struct sctp_forward_tsn_chunk) +
+ cnt_of_skipped * sizeof(struct sctp_strseq_mid);
}
- chk->send_size = space_needed;
- /* Setup the chunk */
- fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *);
- fwdtsn->ch.chunk_length = htons(chk->send_size);
- fwdtsn->ch.chunk_flags = 0;
+ }
+ chk->send_size = space_needed;
+ /* Setup the chunk */
+ fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *);
+ fwdtsn->ch.chunk_length = htons(chk->send_size);
+ fwdtsn->ch.chunk_flags = 0;
+ if (old) {
fwdtsn->ch.chunk_type = SCTP_FORWARD_CUM_TSN;
- fwdtsn->new_cumulative_tsn = htonl(advance_peer_ack_point);
- SCTP_BUF_LEN(chk->data) = chk->send_size;
- fwdtsn++;
- /*-
- * Move pointer to after the fwdtsn and transfer to the
- * strseq pointer.
- */
+ } else {
+ fwdtsn->ch.chunk_type = SCTP_IFORWARD_CUM_TSN;
+ }
+ fwdtsn->new_cumulative_tsn = htonl(advance_peer_ack_point);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ fwdtsn++;
+ /*-
+ * Move pointer to after the fwdtsn and transfer to the
+ * strseq pointer.
+ */
+ if (old) {
strseq = (struct sctp_strseq *)fwdtsn;
- /*-
- * Now populate the strseq list. This is done blindly
- * without pulling out duplicate stream info. This is
- * inefficent but won't harm the process since the peer will
- * look at these in sequence and will thus release anything.
- * It could mean we exceed the PMTU and chop off some that
- * we could have included.. but this is unlikely (aka 1432/4
- * would mean 300+ stream seq's would have to be reported in
- * one FWD-TSN. With a bit of work we can later FIX this to
- * optimize and pull out duplcates.. but it does add more
- * overhead. So for now... not!
- */
- at = TAILQ_FIRST(&asoc->sent_queue);
- for (i = 0; i < cnt_of_skipped; i++) {
- tp1 = TAILQ_NEXT(at, sctp_next);
- if (tp1 == NULL)
- break;
+ } else {
+ strseq_m = (struct sctp_strseq_mid *)fwdtsn;
+ }
+ /*-
+ * Now populate the strseq list. This is done blindly
+ * without pulling out duplicate stream info. This is
+ * inefficent but won't harm the process since the peer will
+ * look at these in sequence and will thus release anything.
+ * It could mean we exceed the PMTU and chop off some that
+ * we could have included.. but this is unlikely (aka 1432/4
+ * would mean 300+ stream seq's would have to be reported in
+ * one FWD-TSN. With a bit of work we can later FIX this to
+ * optimize and pull out duplicates.. but it does add more
+ * overhead. So for now... not!
+ */
+ i = 0;
+ TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
+ if (i >= cnt_of_skipped) {
+ break;
+ }
+ if (old && (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
+ /* We don't report these */
+ continue;
+ }
+ if (at->rec.data.TSN_seq == advance_peer_ack_point) {
+ at->rec.data.fwd_tsn_cnt = 0;
+ }
+ if (old) {
+ strseq->stream = htons(at->rec.data.stream_number);
+ strseq->sequence = htons((uint16_t) at->rec.data.stream_seq);
+ strseq++;
+ } else {
+ strseq_m->stream = htons(at->rec.data.stream_number);
if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
- /* We don't report these */
- i--;
- at = tp1;
- continue;
- }
- if (at->rec.data.TSN_seq == advance_peer_ack_point) {
- at->rec.data.fwd_tsn_cnt = 0;
+ strseq_m->flags = htons(PR_SCTP_UNORDERED_FLAG);
+ } else {
+ strseq_m->flags = 0;
}
- strseq->stream = ntohs(at->rec.data.stream_number);
- strseq->sequence = ntohs(at->rec.data.stream_seq);
- strseq++;
- at = tp1;
+ strseq_m->msg_id = htonl(at->rec.data.stream_seq);
+ strseq_m++;
}
+ i++;
}
return;
}
@@ -10428,7 +10493,7 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
struct sctp_sack_chunk *sack;
struct sctp_nr_sack_chunk *nr_sack;
struct sctp_gap_ack_block *gap_descriptor;
- struct sack_track *selector;
+ const struct sack_track *selector;
int mergeable = 0;
int offset;
caddr_t limit;
@@ -10443,8 +10508,7 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
uint8_t type;
uint8_t tsn_map;
- if ((stcb->asoc.sctp_nr_sack_on_off == 1) &&
- (stcb->asoc.peer_supports_nr_sack == 1)) {
+ if (stcb->asoc.nrsack_supported == 1) {
type = SCTP_NR_SELECTIVE_ACK;
} else {
type = SCTP_SELECTIVE_ACK;
@@ -10481,7 +10545,8 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
/* No memory so we drop the idea, and set a timer */
if (stcb->asoc.delayed_ack) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3);
sctp_timer_start(SCTP_TIMER_TYPE_RECV,
stcb->sctp_ep, stcb, NULL);
} else {
@@ -10496,38 +10561,24 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
/* Clear our pkt counts */
asoc->data_pkts_seen = 0;
+ a_chk->flags = 0;
a_chk->asoc = asoc;
a_chk->snd_count = 0;
a_chk->send_size = 0; /* fill in later */
a_chk->sent = SCTP_DATAGRAM_UNSENT;
a_chk->whoTo = NULL;
- if ((asoc->numduptsns) ||
- (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE))) {
+ if (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE)) {
/*-
- * Ok, we have some duplicates or the destination for the
- * sack is unreachable, lets see if we can select an
- * alternate than asoc->last_data_chunk_from
+ * Ok, the destination for the SACK is unreachable, lets see if
+ * we can select an alternate to asoc->last_data_chunk_from
*/
- if ((asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE) &&
- (asoc->used_alt_onsack > asoc->numnets)) {
- /* We used an alt last time, don't this time */
- a_chk->whoTo = NULL;
- } else {
- asoc->used_alt_onsack++;
- a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
- }
+ a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
if (a_chk->whoTo == NULL) {
/* Nope, no alternate */
a_chk->whoTo = asoc->last_data_chunk_from;
- asoc->used_alt_onsack = 0;
}
} else {
- /*
- * No duplicates so we use the last place we received data
- * from.
- */
- asoc->used_alt_onsack = 0;
a_chk->whoTo = asoc->last_data_chunk_from;
}
if (a_chk->whoTo) {
@@ -10550,7 +10601,7 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
space_req = MCLBYTES;
}
/* Ok now lets formulate a MBUF with our sack */
- a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_DONTWAIT, 1, MT_DATA);
+ a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_NOWAIT, 1, MT_DATA);
if ((a_chk->data == NULL) ||
(a_chk->whoTo == NULL)) {
/* rats, no mbuf memory */
@@ -10563,7 +10614,8 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
/* sa_ignore NO_NULL_CHK */
if (stcb->asoc.delayed_ack) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
sctp_timer_start(SCTP_TIMER_TYPE_RECV,
stcb->sctp_ep, stcb, NULL);
} else {
@@ -10573,7 +10625,7 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
}
/* ok, lets go through and fill it in */
SCTP_BUF_RESV_UF(a_chk->data, SCTP_MIN_OVERHEAD);
- space = M_TRAILINGSPACE(a_chk->data);
+ space = (unsigned int)M_TRAILINGSPACE(a_chk->data);
if (space > (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD)) {
space = (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD);
}
@@ -10642,7 +10694,7 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
* Clear all bits corresponding to TSNs
* smaller or equal to the cumulative TSN.
*/
- tsn_map &= (~0 << (1 - offset));
+ tsn_map &= (~0U << (1 - offset));
}
selector = &sack_array[tsn_map];
if (mergeable && selector->right_edge) {
@@ -10717,7 +10769,7 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
* TSNs smaller or equal to the
* cumulative TSN.
*/
- tsn_map &= (~0 << (1 - offset));
+ tsn_map &= (~0U << (1 - offset));
}
selector = &sack_array[tsn_map];
if (mergeable && selector->right_edge) {
@@ -10787,9 +10839,9 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
* queue.
*/
if (type == SCTP_SELECTIVE_ACK) {
- a_chk->send_size = sizeof(struct sctp_sack_chunk) +
+ a_chk->send_size = (uint16_t) (sizeof(struct sctp_sack_chunk) +
(num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
- num_dups * sizeof(int32_t);
+ num_dups * sizeof(int32_t));
SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
sack->sack.a_rwnd = htonl(asoc->my_rwnd);
@@ -10799,9 +10851,9 @@ sctp_send_sack(struct sctp_tcb *stcb, int so_locked
sack->ch.chunk_flags = flags;
sack->ch.chunk_length = htons(a_chk->send_size);
} else {
- a_chk->send_size = sizeof(struct sctp_nr_sack_chunk) +
+ a_chk->send_size = (uint16_t) (sizeof(struct sctp_nr_sack_chunk) +
(num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) +
- num_dups * sizeof(int32_t);
+ num_dups * sizeof(int32_t));
SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
nr_sack->nr_sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
nr_sack->nr_sack.a_rwnd = htonl(asoc->my_rwnd);
@@ -10850,7 +10902,7 @@ sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
} else {
m_out = NULL;
}
- m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_NOWAIT, 1, MT_HEADER);
if (m_abort == NULL) {
if (m_out) {
sctp_m_freem(m_out);
@@ -10900,7 +10952,8 @@ sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
abort->ch.chunk_length = htons(chunk_len);
/* Add padding, if necessary. */
if (padding_len > 0) {
- if ((m_last == NULL) || sctp_add_pad_tombuf(m_last, padding_len)) {
+ if ((m_last == NULL) ||
+ (sctp_add_pad_tombuf(m_last, padding_len) == NULL)) {
sctp_m_freem(m_out);
return;
}
@@ -10926,7 +10979,7 @@ sctp_send_shutdown_complete(struct sctp_tcb *stcb,
uint32_t vtag;
uint8_t flags;
- m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
if (m_shutdown_comp == NULL) {
/* no mbuf's */
return;
@@ -10959,20 +11012,21 @@ static void
sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, uint32_t vtag,
uint8_t type, struct mbuf *cause,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
struct mbuf *o_pak;
struct mbuf *mout;
struct sctphdr *shout;
struct sctp_chunkhdr *ch;
- struct udphdr *udp;
- int len, cause_len, padding_len;
#if defined(INET) || defined(INET6)
+ struct udphdr *udp;
int ret;
#endif
+ int len, cause_len, padding_len;
+
#ifdef INET
struct sockaddr_in *src_sin, *dst_sin;
struct ip *ip;
@@ -10999,7 +11053,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
padding_len = 4 - padding_len;
}
if (padding_len != 0) {
- if (sctp_add_pad_tombuf(m_last, padding_len)) {
+ if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
sctp_m_freem(cause);
return;
}
@@ -11023,10 +11077,12 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
default:
break;
}
+#if defined(INET) || defined(INET6)
if (port) {
len += sizeof(struct udphdr);
}
- mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
+#endif
+ mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_NOWAIT, 1, MT_DATA);
if (mout == NULL) {
if (cause) {
sctp_m_freem(cause);
@@ -11036,10 +11092,9 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
SCTP_BUF_RESV_UF(mout, max_linkhdr);
SCTP_BUF_LEN(mout) = len;
SCTP_BUF_NEXT(mout) = cause;
- if (use_mflowid != 0) {
- mout->m_pkthdr.flowid = mflowid;
- mout->m_flags |= M_FLOWID;
- }
+ M_SETFIB(mout, fibnum);
+ mout->m_pkthdr.flowid = mflowid;
+ M_HASHTYPE_SET(mout, mflowtype);
#ifdef INET
ip = NULL;
#endif
@@ -11055,8 +11110,8 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
ip->ip_v = IPVERSION;
ip->ip_hl = (sizeof(struct ip) >> 2);
ip->ip_tos = 0;
- ip->ip_id = ip_newid();
ip->ip_off = 0;
+ ip_fillid(ip);
ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
if (port) {
ip->ip_p = IPPROTO_UDP;
@@ -11096,6 +11151,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
shout = mtod(mout, struct sctphdr *);
break;
}
+#if defined(INET) || defined(INET6)
if (port) {
if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
sctp_m_freem(mout);
@@ -11105,15 +11161,16 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port));
udp->uh_dport = port;
udp->uh_sum = 0;
- udp->uh_ulen = htons(sizeof(struct udphdr) +
+ udp->uh_ulen = htons((uint16_t) (sizeof(struct udphdr) +
sizeof(struct sctphdr) +
sizeof(struct sctp_chunkhdr) +
- cause_len + padding_len);
+ cause_len + padding_len));
len += sizeof(struct udphdr);
shout = (struct sctphdr *)((caddr_t)shout + sizeof(struct udphdr));
} else {
udp = NULL;
}
+#endif
shout->src_port = sh->dest_port;
shout->dest_port = sh->src_port;
shout->checksum = 0;
@@ -11130,7 +11187,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
} else {
ch->chunk_flags = SCTP_HAD_NO_TCB;
}
- ch->chunk_length = htons(sizeof(struct sctp_chunkhdr) + cause_len);
+ ch->chunk_length = htons((uint16_t) (sizeof(struct sctp_chunkhdr) + cause_len));
len += sizeof(struct sctp_chunkhdr);
len += cause_len + padding_len;
@@ -11149,7 +11206,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
udp->uh_sum = 0;
}
}
- ip->ip_len = len;
+ ip->ip_len = htons(len);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
@@ -11179,7 +11236,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
#endif
#ifdef INET6
case AF_INET6:
- ip6->ip6_plen = len - sizeof(struct ip6_hdr);
+ ip6->ip6_plen = (uint16_t) (len - sizeof(struct ip6_hdr));
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
@@ -11223,11 +11280,11 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
void
sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
}
@@ -11267,10 +11324,11 @@ sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->send_size = sizeof(struct sctp_heartbeat_chunk);
- chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, so_locked);
return;
@@ -11294,7 +11352,7 @@ sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked
hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
/* Did our user request this one, put it in */
- hb->heartbeat.hb_info.addr_family = net->ro._l_addr.sa.sa_family;
+ hb->heartbeat.hb_info.addr_family = (uint8_t) net->ro._l_addr.sa.sa_family;
hb->heartbeat.hb_info.addr_len = net->ro._l_addr.sa.sa_len;
if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
/*
@@ -11323,6 +11381,11 @@ sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked
break;
#endif
default:
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk, so_locked);
return;
break;
}
@@ -11368,13 +11431,14 @@ sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
if (chk == NULL) {
return;
}
- chk->copy_by_ref = 0;
SCTP_STAT_INCR(sctps_queue_upd_ecne);
+ chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_ECN_ECHO;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->send_size = sizeof(struct sctp_ecne_chunk);
- chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
return;
@@ -11417,7 +11481,7 @@ sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
}
asoc = &stcb->asoc;
SCTP_TCB_LOCK_ASSERT(stcb);
- if (asoc->peer_supports_pktdrop == 0) {
+ if (asoc->pktdrop_supported == 0) {
/*-
* peer must declare support before I send one.
*/
@@ -11431,6 +11495,9 @@ sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
return;
}
chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
len -= iphlen;
chk->send_size = len;
/* Validate that we do not have an ABORT in here. */
@@ -11473,7 +11540,7 @@ sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
was_trunc = 1;
}
chk->asoc = &stcb->asoc;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
jump_out:
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
@@ -11496,7 +11563,7 @@ jump_out:
* Len is already adjusted to size minus overhead above take
* out the pkt_drop chunk itself from it.
*/
- chk->send_size = len - sizeof(struct sctp_pktdrop_chunk);
+ chk->send_size = (uint16_t) (len - sizeof(struct sctp_pktdrop_chunk));
len = chk->send_size;
} else {
/* no truncation needed */
@@ -11517,8 +11584,6 @@ jump_out:
} else {
chk->whoTo = NULL;
}
- chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
- chk->rec.chunk_id.can_take_data = 1;
drp->ch.chunk_type = SCTP_PACKET_DROPPED;
drp->ch.chunk_length = htons(chk->send_size);
spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
@@ -11584,9 +11649,10 @@ sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn, u
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_ECN_CWR;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->send_size = sizeof(struct sctp_cwr_chunk);
- chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
return;
@@ -11606,30 +11672,60 @@ sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn, u
asoc->ctrl_queue_cnt++;
}
-void
-sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
- int number_entries, uint16_t * list,
+static int
+sctp_add_stream_reset_out(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk,
uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
{
uint16_t len, old_len, i;
struct sctp_stream_reset_out_request *req_out;
struct sctp_chunkhdr *ch;
+ int at;
+ int number_entries = 0;
ch = mtod(chk->data, struct sctp_chunkhdr *);
old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
-
/* get to new offset for the param. */
req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
/* now how long will this param be? */
- len = (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
+ (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ number_entries++;
+ }
+ }
+ if (number_entries == 0) {
+ return (0);
+ }
+ if (number_entries == stcb->asoc.streamoutcnt) {
+ number_entries = 0;
+ }
+ if (number_entries > SCTP_MAX_STREAMS_AT_ONCE_RESET) {
+ number_entries = SCTP_MAX_STREAMS_AT_ONCE_RESET;
+ }
+ len = (uint16_t) (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
req_out->ph.param_length = htons(len);
req_out->request_seq = htonl(seq);
req_out->response_seq = htonl(resp_seq);
req_out->send_reset_at_tsn = htonl(last_sent);
+ at = 0;
if (number_entries) {
- for (i = 0; i < number_entries; i++) {
- req_out->list_of_streams[i] = htons(list[i]);
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
+ (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ req_out->list_of_streams[at] = htons(i);
+ at++;
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
+ if (at >= number_entries) {
+ break;
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
}
}
if (SCTP_SIZE32(len) > len) {
@@ -11646,7 +11742,7 @@ sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
chk->book_size_scale = 0;
chk->send_size = SCTP_SIZE32(chk->book_size);
SCTP_BUF_LEN(chk->data) = chk->send_size;
- return;
+ return (1);
}
static void
@@ -11664,7 +11760,7 @@ sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
/* get to new offset for the param. */
req_in = (struct sctp_stream_reset_in_request *)((caddr_t)ch + len);
/* now how long will this param be? */
- len = (sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries));
+ len = (uint16_t) (sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries));
req_in->ph.param_type = htons(SCTP_STR_RESET_IN_REQUEST);
req_in->ph.param_length = htons(len);
req_in->request_seq = htonl(seq);
@@ -11748,6 +11844,68 @@ sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
}
void
+sctp_send_deferred_reset_response(struct sctp_tcb *stcb,
+ struct sctp_stream_reset_list *ent,
+ int response)
+{
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_chunkhdr *ch;
+
+ asoc = &stcb->asoc;
+
+ /*
+ * Reset our last reset action to the new one IP -> response
+ * (PERFORMED probably). This assures that if we fail to send, a
+ * retran from the peer will get the new response.
+ */
+ asoc->last_reset_action[0] = response;
+ if (asoc->stream_reset_outstanding) {
+ return;
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->book_size = sizeof(struct sctp_chunkhdr);
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ chk->book_size_scale = 0;
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return;
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ /* setup chunk parameters */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ if (stcb->asoc.alternate) {
+ chk->whoTo = stcb->asoc.alternate;
+ } else {
+ chk->whoTo = stcb->asoc.primary_destination;
+ }
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+ ch->chunk_type = SCTP_STREAM_RESET;
+ ch->chunk_flags = 0;
+ ch->chunk_length = htons(chk->book_size);
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ sctp_add_stream_reset_result(chk, ent->seq, response);
+ /* insert the chunk for sending */
+ TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+ chk,
+ sctp_next);
+ asoc->ctrl_queue_cnt++;
+}
+
+void
sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
uint32_t resp_seq, uint32_t result,
uint32_t send_una, uint32_t recv_next)
@@ -11845,19 +12003,90 @@ sctp_add_an_in_stream(struct sctp_tmit_chunk *chk,
}
int
+sctp_send_stream_reset_out_if_possible(struct sctp_tcb *stcb, int so_locked)
+{
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_chunkhdr *ch;
+ uint32_t seq;
+
+ asoc = &stcb->asoc;
+ asoc->trigger_reset = 0;
+ if (asoc->stream_reset_outstanding) {
+ return (EALREADY);
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->book_size = sizeof(struct sctp_chunkhdr);
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ chk->book_size_scale = 0;
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk, so_locked);
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+
+ /* setup chunk parameters */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ if (stcb->asoc.alternate) {
+ chk->whoTo = stcb->asoc.alternate;
+ } else {
+ chk->whoTo = stcb->asoc.primary_destination;
+ }
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+ ch->chunk_type = SCTP_STREAM_RESET;
+ ch->chunk_flags = 0;
+ ch->chunk_length = htons(chk->book_size);
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ seq = stcb->asoc.str_reset_seq_out;
+ if (sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1))) {
+ seq++;
+ asoc->stream_reset_outstanding++;
+ } else {
+ m_freem(chk->data);
+ chk->data = NULL;
+ sctp_free_a_chunk(stcb, chk, so_locked);
+ return (ENOENT);
+ }
+ asoc->str_reset = chk;
+ /* insert the chunk for sending */
+ TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+ chk,
+ sctp_next);
+ asoc->ctrl_queue_cnt++;
+
+ if (stcb->asoc.send_sack) {
+ sctp_send_sack(stcb, so_locked);
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+ return (0);
+}
+
+int
sctp_send_str_reset_req(struct sctp_tcb *stcb,
- int number_entries, uint16_t * list,
- uint8_t send_out_req,
+ uint16_t number_entries, uint16_t * list,
uint8_t send_in_req,
uint8_t send_tsn_req,
uint8_t add_stream,
uint16_t adding_o,
uint16_t adding_i, uint8_t peer_asked)
{
-
struct sctp_association *asoc;
struct sctp_tmit_chunk *chk;
struct sctp_chunkhdr *ch;
+ int can_send_out_req = 0;
uint32_t seq;
asoc = &stcb->asoc;
@@ -11868,16 +12097,26 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb,
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
return (EBUSY);
}
- if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0) &&
+ if ((send_in_req == 0) && (send_tsn_req == 0) &&
(add_stream == 0)) {
/* nothing to do */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
}
- if (send_tsn_req && (send_out_req || send_in_req)) {
+ if (send_tsn_req && send_in_req) {
/* error, can't do that */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
+ } else if (send_in_req) {
+ can_send_out_req = 1;
+ }
+ if (number_entries > (MCLBYTES -
+ SCTP_MIN_OVERHEAD -
+ sizeof(struct sctp_chunkhdr) -
+ sizeof(struct sctp_stream_reset_out_request)) /
+ sizeof(uint16_t)) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
}
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
@@ -11887,12 +12126,13 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb,
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_STREAM_RESET;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->book_size = sizeof(struct sctp_chunkhdr);
chk->send_size = SCTP_SIZE32(chk->book_size);
chk->book_size_scale = 0;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -11916,12 +12156,14 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb,
SCTP_BUF_LEN(chk->data) = chk->send_size;
seq = stcb->asoc.str_reset_seq_out;
- if (send_out_req) {
- sctp_add_stream_reset_out(chk, number_entries, list,
- seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
- asoc->stream_reset_out_is_outstanding = 1;
- seq++;
- asoc->stream_reset_outstanding++;
+ if (can_send_out_req) {
+ int ret;
+
+ ret = sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
+ if (ret) {
+ seq++;
+ asoc->stream_reset_outstanding++;
+ }
}
if ((add_stream & 1) &&
((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < adding_o)) {
@@ -11930,10 +12172,15 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb,
struct sctp_stream_queue_pending *sp, *nsp;
int i;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
oldstream = stcb->asoc.strmout;
/* get some more */
SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *,
- ((stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out)),
+ (stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out),
SCTP_M_STRMO);
if (stcb->asoc.strmout == NULL) {
uint8_t x;
@@ -11953,32 +12200,44 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb,
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues;
- stcb->asoc.strmout[i].next_sequence_send = oldstream[i].next_sequence_send;
+ stcb->asoc.strmout[i].next_mid_ordered = oldstream[i].next_mid_ordered;
+ stcb->asoc.strmout[i].next_mid_unordered = oldstream[i].next_mid_unordered;
stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete;
stcb->asoc.strmout[i].stream_no = i;
- stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], &oldstream[i]);
+ stcb->asoc.strmout[i].state = oldstream[i].state;
+ /* FIX ME FIX ME */
+ /*
+ * This should be a SS_COPY operation FIX ME STREAM
+ * SCHEDULER EXPERT
+ */
+ stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], &oldstream[i]);
/* now anything on those queues? */
TAILQ_FOREACH_SAFE(sp, &oldstream[i].outqueue, next, nsp) {
TAILQ_REMOVE(&oldstream[i].outqueue, sp, next);
TAILQ_INSERT_TAIL(&stcb->asoc.strmout[i].outqueue, sp, next);
}
- /* Now move assoc pointers too */
- if (stcb->asoc.last_out_stream == &oldstream[i]) {
- stcb->asoc.last_out_stream = &stcb->asoc.strmout[i];
- }
- if (stcb->asoc.locked_on_sending == &oldstream[i]) {
- stcb->asoc.locked_on_sending = &stcb->asoc.strmout[i];
- }
+
}
/* now the new streams */
stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + adding_o); i++) {
TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
stcb->asoc.strmout[i].chunks_on_queues = 0;
- stcb->asoc.strmout[i].next_sequence_send = 0x0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ stcb->asoc.strmout[i].abandoned_sent[j] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ stcb->asoc.strmout[i].abandoned_sent[0] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
+#endif
+ stcb->asoc.strmout[i].next_mid_ordered = 0;
+ stcb->asoc.strmout[i].next_mid_unordered = 0;
stcb->asoc.strmout[i].stream_no = i;
stcb->asoc.strmout[i].last_msg_incomplete = 0;
- stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
+ stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], NULL);
+ stcb->asoc.strmout[i].state = SCTP_STREAM_CLOSED;
}
stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + adding_o;
SCTP_FREE(oldstream, SCTP_M_STRMO);
@@ -12012,6 +12271,9 @@ skip_stuff:
chk,
sctp_next);
asoc->ctrl_queue_cnt++;
+ if (stcb->asoc.send_sack) {
+ sctp_send_sack(stcb, SCTP_SO_LOCKED);
+ }
sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
return (0);
}
@@ -12019,7 +12281,7 @@ skip_stuff:
void
sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
/* Don't respond to an ABORT with an ABORT. */
@@ -12029,7 +12291,7 @@ sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockadd
return;
}
sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return;
}
@@ -12037,11 +12299,11 @@ sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockadd
void
sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return;
}
@@ -12073,9 +12335,6 @@ sctp_copy_one(struct sctp_stream_queue_pending *sp,
struct uio *uio,
int resv_upfront)
{
- int left;
-
- left = sp->length;
sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
resv_upfront, 0);
if (sp->data == NULL) {
@@ -12131,10 +12390,11 @@ sctp_copy_it_in(struct sctp_tcb *stcb,
sp->timetolive = srcv->sinfo_timetolive;
sp->ppid = srcv->sinfo_ppid;
sp->context = srcv->sinfo_context;
+ sp->fsn = 0;
(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
sp->stream = srcv->sinfo_stream;
- sp->length = min(uio->uio_resid, max_send_len);
+ sp->length = (uint32_t) min(uio->uio_resid, max_send_len);
if ((sp->length == (uint32_t) uio->uio_resid) &&
((user_marks_eor == 0) ||
(srcv->sinfo_flags & SCTP_EOF) ||
@@ -12293,7 +12553,7 @@ sctp_lower_sosend(struct socket *so,
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
}
- sndlen = uio->uio_resid;
+ sndlen = (unsigned int)uio->uio_resid;
} else {
top = SCTP_HEADER_TO_CHAIN(i_pak);
sndlen = SCTP_HEADER_LEN(i_pak);
@@ -12381,7 +12641,10 @@ sctp_lower_sosend(struct socket *so,
}
SCTP_INP_RUNLOCK(inp);
} else if (sinfo_assoc_id) {
- stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 0);
+ stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 1);
+ if (stcb != NULL) {
+ hold_tcblock = 1;
+ }
} else if (addr) {
/*-
* Since we did not use findep we must
@@ -12469,8 +12732,9 @@ sctp_lower_sosend(struct socket *so,
}
#endif
stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
- p
- );
+ inp->sctp_ep.pre_open_stream_count,
+ inp->sctp_ep.port,
+ p);
if (stcb == NULL) {
/* Error is setup for us in the call */
goto out_unlocked;
@@ -12504,7 +12768,8 @@ sctp_lower_sosend(struct socket *so,
if (control) {
if (sctp_process_cmsgs_for_init(stcb, control, &error)) {
- sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_7);
+ sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
hold_tcblock = 0;
stcb = NULL;
goto out_unlocked;
@@ -12590,12 +12855,24 @@ sctp_lower_sosend(struct socket *so,
SCTP_ASOC_CREATE_UNLOCK(inp);
create_lock_applied = 0;
}
- if (asoc->stream_reset_outstanding) {
+ /* Is the stream no. valid? */
+ if (srcv->sinfo_stream >= asoc->streamoutcnt) {
+ /* Invalid stream number */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ if ((asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPEN) &&
+ (asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPENING)) {
/*
* Can't queue any data while stream reset is underway.
*/
- SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAGAIN);
- error = EAGAIN;
+ if (asoc->strmout[srcv->sinfo_stream].state > SCTP_STREAM_OPEN) {
+ error = EAGAIN;
+ } else {
+ error = EINVAL;
+ }
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, error);
goto out_unlocked;
}
if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
@@ -12646,7 +12923,7 @@ sctp_lower_sosend(struct socket *so,
if (top) {
struct mbuf *cntm = NULL;
- mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAIT, 1, MT_DATA);
+ mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAITOK, 1, MT_DATA);
if (sndlen != 0) {
for (cntm = top; cntm; cntm = SCTP_BUF_NEXT(cntm)) {
tot_out += SCTP_BUF_LEN(cntm);
@@ -12662,7 +12939,7 @@ sctp_lower_sosend(struct socket *so,
error = EMSGSIZE;
goto out;
}
- mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAIT, 1, MT_DATA);
+ mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAITOK, 1, MT_DATA);
}
if (mm == NULL) {
SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -12680,7 +12957,7 @@ sctp_lower_sosend(struct socket *so,
/* now move forward the data pointer */
ph = mtod(mm, struct sctp_paramhdr *);
ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(sizeof(struct sctp_paramhdr) + tot_out);
+ ph->param_length = htons((uint16_t) (sizeof(struct sctp_paramhdr) + tot_out));
ph++;
SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
if (top == NULL) {
@@ -12736,13 +13013,6 @@ sctp_lower_sosend(struct socket *so,
SCTP_TCB_UNLOCK(stcb);
hold_tcblock = 0;
}
- /* Is the stream no. valid? */
- if (srcv->sinfo_stream >= asoc->streamoutcnt) {
- /* Invalid stream number */
- SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
- error = EINVAL;
- goto out_unlocked;
- }
if (asoc->strmout == NULL) {
/* huh? software error */
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
@@ -12818,6 +13088,7 @@ sctp_lower_sosend(struct socket *so,
asoc, stcb->asoc.total_output_queue_size);
}
if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SOCKBUF_UNLOCK(&so->so_snd);
goto out_unlocked;
}
inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
@@ -12879,8 +13150,10 @@ skip_preblock:
* interrupt.
*/
strm->last_msg_incomplete = 1;
- asoc->stream_locked = 1;
- asoc->stream_locked_on = srcv->sinfo_stream;
+ if (stcb->asoc.idata_supported == 0) {
+ asoc->stream_locked = 1;
+ asoc->stream_locked_on = srcv->sinfo_stream;
+ }
sp->sender_all_done = 0;
}
sctp_snd_sb_alloc(stcb, sp->length);
@@ -12959,7 +13232,9 @@ skip_preblock:
sctp_snd_sb_alloc(stcb, sndout);
atomic_add_int(&sp->length, sndout);
len += sndout;
-
+ if (srcv->sinfo_flags & SCTP_SACK_IMMEDIATELY) {
+ sp->sinfo_flags |= SCTP_SACK_IMMEDIATELY;
+ }
/* Did we reach EOR? */
if ((uio->uio_resid == 0) &&
((user_marks_eor == 0) ||
@@ -12976,7 +13251,7 @@ skip_preblock:
continue;
}
/* PR-SCTP? */
- if ((asoc->peer_supports_prsctp) && (asoc->sent_queue_cnt_removeable > 0)) {
+ if ((asoc->prsctp_supported) && (asoc->sent_queue_cnt_removeable > 0)) {
/*
* This is ugly but we must assure locking
* order
@@ -13038,7 +13313,7 @@ skip_preblock:
/*-
* Ok, Nagle is set on and we have data outstanding.
* Don't send anything and let SACKs drive out the
- * data unless wen have a "full" segment to send.
+ * data unless we have a "full" segment to send.
*/
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
@@ -13107,7 +13382,7 @@ skip_preblock:
min(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTP_SB_LIMIT_SND(so)))) {
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) {
sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK,
- asoc, uio->uio_resid);
+ asoc, (size_t)uio->uio_resid);
}
be.error = 0;
stcb->block_entry = &be;
@@ -13136,11 +13411,17 @@ skip_preblock:
}
}
SCTP_TCB_SEND_LOCK(stcb);
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ goto out_unlocked;
+ }
if (sp) {
if (sp->msg_is_complete == 0) {
strm->last_msg_incomplete = 1;
- asoc->stream_locked = 1;
- asoc->stream_locked_on = srcv->sinfo_stream;
+ if (stcb->asoc.idata_supported == 0) {
+ asoc->stream_locked = 1;
+ asoc->stream_locked_on = srcv->sinfo_stream;
+ }
} else {
sp->sender_all_done = 1;
strm->last_msg_incomplete = 0;
@@ -13176,19 +13457,16 @@ dataless_eof:
/* EOF thing ? */
if ((srcv->sinfo_flags & SCTP_EOF) &&
(got_all_of_the_send == 1)) {
- int cnt;
-
SCTP_STAT_INCR(sctps_sends_with_eof);
error = 0;
if (hold_tcblock == 0) {
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
- cnt = sctp_is_there_unsent_data(stcb, SCTP_SO_LOCKED);
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
- (cnt == 0)) {
- if (asoc->locked_on_sending) {
+ sctp_is_there_unsent_data(stcb, SCTP_SO_LOCKED) == 0) {
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
goto abort_anyway;
}
/* there is nothing queued to send, so I'm done... */
@@ -13233,27 +13511,27 @@ dataless_eof:
SCTP_TCB_LOCK(stcb);
hold_tcblock = 1;
}
- if (asoc->locked_on_sending) {
- /* Locked to send out the data */
- struct sctp_stream_queue_pending *sp;
-
- sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
- if (sp) {
- if ((sp->length == 0) && (sp->msg_is_complete == 0))
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
- }
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
}
asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
abort_anyway:
if (free_cnt_applied) {
atomic_add_int(&stcb->asoc.refcnt, -1);
free_cnt_applied = 0;
}
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
sctp_abort_an_association(stcb->sctp_ep, stcb,
- NULL, SCTP_SO_LOCKED);
+ op_err, SCTP_SO_LOCKED);
/*
* now relock the stcb so everything
* is sane
@@ -13393,13 +13671,6 @@ out_unlocked:
}
}
#endif
-#ifdef INVARIANTS
- if (inp) {
- sctp_validate_no_locks(inp);
- } else {
- SCTP_PRINTF("Warning - inp is NULL so cant validate locks\n");
- }
-#endif
if (top) {
sctp_m_freem(top);
}
@@ -13427,19 +13698,14 @@ sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
(stcb == NULL))
return (m);
- /* sysctl disabled auth? */
- if (SCTP_BASE_SYSCTL(sctp_auth_disable))
- return (m);
-
- /* peer doesn't do auth... */
- if (!stcb->asoc.peer_supports_auth) {
+ if (stcb->asoc.auth_supported == 0) {
return (m);
}
/* does the requested chunk require auth? */
if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
return (m);
}
- m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_NOWAIT, 1, MT_HEADER);
if (m_auth == NULL) {
/* no mbuf's */
return (m);
@@ -13538,7 +13804,7 @@ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro)
}
ifa = (struct ifaddr *)sifa->ifa;
mask = (struct sockaddr_in *)(ifa->ifa_netmask);
- sin = (struct sockaddr_in *)&sifa->address.sin;
+ sin = &sifa->address.sin;
srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
diff --git a/freebsd/sys/netinet/sctp_output.h b/freebsd/sys/netinet/sctp_output.h
index 59af5af2..b2441a6f 100644
--- a/freebsd/sys/netinet/sctp_output.h
+++ b/freebsd/sys/netinet/sctp_output.h
@@ -80,7 +80,8 @@ sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int
);
void
-sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *, struct mbuf *,
int, int,
struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_init_chunk *,
@@ -117,7 +118,7 @@ void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int);
void
sctp_send_shutdown_complete2(struct sockaddr *, struct sockaddr *,
struct sctphdr *,
- uint8_t, uint32_t,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked);
@@ -170,30 +171,33 @@ void sctp_send_cwr(struct sctp_tcb *, struct sctp_nets *, uint32_t, uint8_t);
void
-sctp_add_stream_reset_out(struct sctp_tmit_chunk *,
- int, uint16_t *, uint32_t, uint32_t, uint32_t);
+ sctp_add_stream_reset_result(struct sctp_tmit_chunk *, uint32_t, uint32_t);
void
- sctp_add_stream_reset_result(struct sctp_tmit_chunk *, uint32_t, uint32_t);
+sctp_send_deferred_reset_response(struct sctp_tcb *,
+ struct sctp_stream_reset_list *,
+ int);
void
sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *,
uint32_t, uint32_t, uint32_t, uint32_t);
+int
+ sctp_send_stream_reset_out_if_possible(struct sctp_tcb *, int);
int
-sctp_send_str_reset_req(struct sctp_tcb *, int, uint16_t *, uint8_t, uint8_t,
- uint8_t, uint8_t, uint16_t, uint16_t, uint8_t);
+sctp_send_str_reset_req(struct sctp_tcb *, uint16_t, uint16_t *,
+ uint8_t, uint8_t, uint8_t, uint16_t, uint16_t, uint8_t);
void
sctp_send_abort(struct mbuf *, int, struct sockaddr *, struct sockaddr *,
struct sctphdr *, uint32_t, struct mbuf *,
- uint8_t, uint32_t,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
void
sctp_send_operr_to(struct sockaddr *, struct sockaddr *,
struct sctphdr *, uint32_t, struct mbuf *,
- uint8_t, uint32_t,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
#endif /* _KERNEL || __Userspace__ */
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
index 16dc231f..62ef1e3d 100644
--- a/freebsd/sys/netinet/sctp_pcb.c
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -48,7 +48,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_timer.h>
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_dtrace_define.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
#ifdef INET6
#include <netinet6/ip6_var.h>
#endif
@@ -330,7 +332,7 @@ sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
goto out;
}
if (sctp_ifap->ifn_p == NULL) {
- SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unusable\n");
goto out;
}
if (if_name) {
@@ -374,7 +376,7 @@ sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
goto out;
}
if (sctp_ifap->ifn_p == NULL) {
- SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unusable\n");
goto out;
}
if (if_name) {
@@ -625,7 +627,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
{
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifap->address.sin;
+ sin = &sctp_ifap->address.sin;
if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
(IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) {
sctp_ifap->src_is_loop = 1;
@@ -645,7 +647,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
/* ok to use deprecated addresses? */
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6;
+ sin6 = &sctp_ifap->address.sin6;
if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
(IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
sctp_ifap->src_is_loop = 1;
@@ -974,7 +976,7 @@ sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
{
struct sockaddr_in *sin, *rsin;
- sin = (struct sockaddr_in *)&laddr->ifa->address.sin;
+ sin = &laddr->ifa->address.sin;
rsin = (struct sockaddr_in *)to;
if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
SCTP_IPI_ADDR_RUNLOCK();
@@ -988,7 +990,7 @@ sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
{
struct sockaddr_in6 *sin6, *rsin6;
- sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
+ sin6 = &laddr->ifa->address.sin6;
rsin6 = (struct sockaddr_in6 *)to;
if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
SCTP_IPI_ADDR_RUNLOCK();
@@ -1115,7 +1117,7 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
- SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__);
+ SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __func__);
continue;
}
if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
@@ -1441,9 +1443,6 @@ sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
}
head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport,
inp->sctp_hashmark)];
- if (head == NULL) {
- goto null_return;
- }
LIST_FOREACH(stcb, head, sctp_tcbhash) {
if (stcb->rport != rport) {
/* remote port does not match */
@@ -1776,7 +1775,7 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head,
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ",
@@ -1870,7 +1869,7 @@ sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp)
{
/* For 1-2-1 with port reuse */
struct sctppcbhead *head;
- struct sctp_inpcb *tinp;
+ struct sctp_inpcb *tinp, *ninp;
if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
/* only works with port reuse on */
@@ -1880,10 +1879,11 @@ sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp)
return (0);
}
SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_WLOCK();
head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport,
SCTP_BASE_INFO(hashmark))];
/* Kick out all non-listeners to the TCP hash */
- LIST_FOREACH(tinp, head, sctp_hash) {
+ LIST_FOREACH_SAFE(tinp, head, sctp_hash, ninp) {
if (tinp->sctp_lport != inp->sctp_lport) {
continue;
}
@@ -1911,6 +1911,7 @@ sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp)
LIST_INSERT_HEAD(head, inp, sctp_hash);
SCTP_INP_WUNLOCK(inp);
SCTP_INP_RLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
return (0);
}
@@ -2166,11 +2167,6 @@ sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag
SCTP_INP_INFO_RLOCK();
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag,
SCTP_BASE_INFO(hashasocmark))];
- if (head == NULL) {
- /* invalid vtag */
- SCTP_INP_INFO_RUNLOCK();
- return (NULL);
- }
LIST_FOREACH(stcb, head, sctp_asocs) {
SCTP_INP_RLOCK(stcb->sctp_ep);
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
@@ -2262,7 +2258,6 @@ sctp_findassociation_addr(struct mbuf *m, int offset,
struct sctphdr *sh, struct sctp_chunkhdr *ch,
struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
{
- int find_tcp_pool;
struct sctp_tcb *stcb;
struct sctp_inpcb *inp;
@@ -2274,21 +2269,13 @@ sctp_findassociation_addr(struct mbuf *m, int offset,
return (stcb);
}
}
- find_tcp_pool = 0;
- if ((ch->chunk_type != SCTP_INITIATION) &&
- (ch->chunk_type != SCTP_INITIATION_ACK) &&
- (ch->chunk_type != SCTP_COOKIE_ACK) &&
- (ch->chunk_type != SCTP_COOKIE_ECHO)) {
- /* Other chunk types go to the tcp pool. */
- find_tcp_pool = 1;
- }
if (inp_p) {
stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp,
- find_tcp_pool, vrf_id);
+ 1, vrf_id);
inp = *inp_p;
} else {
stcb = sctp_findassociation_addr_sa(src, dst, &inp, netp,
- find_tcp_pool, vrf_id);
+ 1, vrf_id);
}
SCTPDBG(SCTP_DEBUG_PCB1, "stcb:%p inp:%p\n", (void *)stcb, (void *)inp);
if (stcb == NULL && inp) {
@@ -2330,7 +2317,7 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
{
struct sctp_tcb *stcb;
- struct sockaddr_storage remote_store;
+ union sctp_sockstore remote_store;
struct sctp_paramhdr parm_buf, *phdr;
int ptype;
int zero_address = 0;
@@ -2349,7 +2336,7 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
&parm_buf, sizeof(struct sctp_paramhdr));
if (phdr == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n",
- __FUNCTION__);
+ __func__);
return NULL;
}
ptype = (int)((uint32_t) ntohs(phdr->param_type));
@@ -2369,10 +2356,10 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
&p6_buf.ph, sizeof(*p6));
if (p6 == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n",
- __FUNCTION__);
+ __func__);
return (NULL);
}
- sin6 = (struct sockaddr_in6 *)&remote_store;
+ sin6 = &remote_store.sin6;
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
sin6->sin6_port = sh->src_port;
@@ -2396,10 +2383,10 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
&p4_buf.ph, sizeof(*p4));
if (p4 == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n",
- __FUNCTION__);
+ __func__);
return (NULL);
}
- sin = (struct sockaddr_in *)&remote_store;
+ sin = &remote_store.sin;
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_port = sh->src_port;
@@ -2422,7 +2409,7 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
}
} else {
stcb = sctp_findassociation_ep_addr(inp_p,
- (struct sockaddr *)&remote_store, netp,
+ &remote_store.sa, netp,
dst, NULL);
}
return (stcb);
@@ -2482,8 +2469,18 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
inp->sctp_associd_counter = 1;
inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT;
inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+ inp->max_cwnd = 0;
inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off);
- inp->sctp_ecn_enable = SCTP_BASE_SYSCTL(sctp_ecn_enable);
+ inp->ecn_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_ecn_enable);
+ inp->prsctp_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_pr_enable);
+ inp->auth_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_auth_enable);
+ inp->asconf_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_asconf_enable);
+ inp->reconfig_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_reconfig_enable);
+ inp->nrsack_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_nrsack_enable);
+ inp->pktdrop_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_pktdrop_enable);
+ inp->idata_supported = 0;
+
+ inp->fibnum = so->so_fibnum;
/* init the small hash table we use to track asocid <-> tcb */
inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark);
if (inp->sctp_asocidhash == NULL) {
@@ -2493,14 +2490,7 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
return (ENOBUFS);
}
#ifdef IPSEC
- {
- struct inpcbpolicy *pcb_sp = NULL;
-
- error = ipsec_init_policy(so, &pcb_sp);
- /* Arrange to share the policy */
- inp->ip_inp.inp.inp_sp = pcb_sp;
- ((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp;
- }
+ error = ipsec_init_policy(so, &inp->ip_inp.inp.inp_sp);
if (error != 0) {
crfree(inp->ip_inp.inp.inp_cred);
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
@@ -2534,6 +2524,9 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP);
so->so_pcb = NULL;
crfree(inp->ip_inp.inp.inp_cred);
+#ifdef IPSEC
+ ipsec_delete_pcbpolicy(&inp->ip_inp.inp);
+#endif
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
return (EOPNOTSUPP);
}
@@ -2554,6 +2547,9 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
so->so_pcb = NULL;
crfree(inp->ip_inp.inp.inp_cred);
+#ifdef IPSEC
+ ipsec_delete_pcbpolicy(&inp->ip_inp.inp);
+#endif
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
return (ENOBUFS);
}
@@ -2647,12 +2643,15 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
*/
m->local_hmacs = sctp_default_supported_hmaclist();
m->local_auth_chunks = sctp_alloc_chunklist();
+ if (inp->asconf_supported) {
+ sctp_auth_add_chunk(SCTP_ASCONF, m->local_auth_chunks);
+ sctp_auth_add_chunk(SCTP_ASCONF_ACK, m->local_auth_chunks);
+ }
m->default_dscp = 0;
#ifdef INET6
m->default_flowlabel = 0;
#endif
m->port = 0; /* encapsulation disabled by default */
- sctp_auth_set_default_chunks(m->local_auth_chunks);
LIST_INIT(&m->shared_keys);
/* add default NULL key as key id 0 */
null_key = sctp_alloc_sharedkey();
@@ -2786,6 +2785,45 @@ sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp,
SCTP_INP_WUNLOCK(old_inp);
}
+/*
+ * insert an laddr entry with the given ifa for the desired list
+ */
+static int
+sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
+{
+ struct sctp_laddr *laddr;
+
+ laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+ if (laddr == NULL) {
+ /* out of memory? */
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ SCTP_INCR_LADDR_COUNT();
+ bzero(laddr, sizeof(*laddr));
+ (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+ laddr->ifa = ifa;
+ laddr->action = act;
+ atomic_add_int(&ifa->refcount, 1);
+ /* insert it */
+ LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
+
+ return (0);
+}
+
+/*
+ * Remove an laddr entry from the local address list (on an assoc)
+ */
+static void
+sctp_remove_laddr(struct sctp_laddr *laddr)
+{
+
+ /* remove from the list */
+ LIST_REMOVE(laddr, sctp_nxt_addr);
+ sctp_free_ifa(laddr->ifa);
+ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
+ SCTP_DECR_LADDR_COUNT();
+}
@@ -3117,31 +3155,21 @@ continue_anyway:
* too (before adding).
*/
struct sctp_ifa *ifa;
- struct sockaddr_storage store_sa;
+ union sctp_sockstore store;
- memset(&store_sa, 0, sizeof(store_sa));
+ memset(&store, 0, sizeof(store));
switch (addr->sa_family) {
#ifdef INET
case AF_INET:
- {
- struct sockaddr_in *sin;
-
- sin = (struct sockaddr_in *)&store_sa;
- memcpy(sin, addr, sizeof(struct sockaddr_in));
- sin->sin_port = 0;
- break;
- }
+ memcpy(&store.sin, addr, sizeof(struct sockaddr_in));
+ store.sin.sin_port = 0;
+ break;
#endif
#ifdef INET6
case AF_INET6:
- {
- struct sockaddr_in6 *sin6;
-
- sin6 = (struct sockaddr_in6 *)&store_sa;
- memcpy(sin6, addr, sizeof(struct sockaddr_in6));
- sin6->sin6_port = 0;
- break;
- }
+ memcpy(&store.sin6, addr, sizeof(struct sockaddr_in6));
+ store.sin6.sin6_port = 0;
+ break;
#endif
default:
break;
@@ -3159,7 +3187,7 @@ continue_anyway:
* pass things in via the sctp_ifap argument
* (Panda).
*/
- ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa,
+ ifa = sctp_find_ifa_by_addr(&store.sa,
vrf_id, SCTP_ADDR_NOT_LOCKED);
}
if (ifa == NULL) {
@@ -3418,7 +3446,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
} else if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
(asoc->asoc.stream_queue_cnt == 0)) {
- if (asoc->asoc.locked_on_sending) {
+ if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) {
goto abort_anyway;
}
if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
@@ -3450,22 +3478,11 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
}
} else {
/* mark into shutdown pending */
- struct sctp_stream_queue_pending *sp;
-
asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING;
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
asoc->asoc.primary_destination);
- if (asoc->asoc.locked_on_sending) {
- sp = TAILQ_LAST(&((asoc->asoc.locked_on_sending)->outqueue),
- sctp_streamhead);
- if (sp == NULL) {
- SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n",
- (void *)asoc->asoc.locked_on_sending,
- asoc->asoc.locked_on_sending->stream_no);
- } else {
- if ((sp->length == 0) && (sp->msg_is_complete == 0))
- asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
- }
+ if ((*asoc->asoc.ss_functions.sctp_ss_is_user_msgs_incomplete) (asoc, &asoc->asoc)) {
+ asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
}
if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
@@ -3550,7 +3567,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
(SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
+ if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE,
+ SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
cnt++;
}
}
@@ -3637,8 +3655,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
* no need to free the net count, since at this point all
* assoc's are gone.
*/
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq);
- SCTP_DECR_READQ_COUNT();
+ sctp_free_a_readq(NULL, sq);
}
/* Now the sctp_pcb things */
/*
@@ -3646,13 +3663,9 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
* macro here since le_next will get freed as part of the
* sctp_free_assoc() call.
*/
- if (so) {
#ifdef IPSEC
- ipsec_delete_pcbpolicy(ip_pcb);
-#endif /* IPSEC */
-
- /* Unlocks not needed since the socket is gone now */
- }
+ ipsec_delete_pcbpolicy(ip_pcb);
+#endif
if (ip_pcb->inp_options) {
(void)sctp_m_free(ip_pcb->inp_options);
ip_pcb->inp_options = 0;
@@ -3746,7 +3759,7 @@ sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id)
*/
int
sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
- struct sctp_nets **netp, int set_scope, int from)
+ struct sctp_nets **netp, uint16_t port, int set_scope, int from)
{
/*
* The following is redundant to the same lines in the
@@ -3799,13 +3812,9 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
/* assure len is set */
sin->sin_len = sizeof(struct sockaddr_in);
if (set_scope) {
-#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
- stcb->asoc.scope.ipv4_local_scope = 1;
-#else
if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
stcb->asoc.scope.ipv4_local_scope = 1;
}
-#endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */
} else {
/* Validate the address is in scope */
if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) &&
@@ -3928,7 +3937,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
stcb->asoc.numnets++;
net->ref_count = 1;
net->cwr_window_tsn = net->last_cwr_tsn = stcb->asoc.sending_seq - 1;
- net->port = stcb->asoc.port;
+ net->port = port;
net->dscp = stcb->asoc.default_dscp;
#ifdef INET6
net->flowlabel = stcb->asoc.default_flowlabel;
@@ -3960,7 +3969,9 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
sin6->sin6_scope_id = 0;
}
#endif
- SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id);
+ SCTP_RTALLOC((sctp_route_t *) & net->ro,
+ stcb->asoc.vrf_id,
+ stcb->sctp_ep->fibnum);
if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
/* Get source address */
@@ -3970,9 +3981,14 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
net,
0,
stcb->asoc.vrf_id);
- /* Now get the interface MTU */
- if (net->ro._s_addr && net->ro._s_addr->ifn_p) {
- net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+ if (net->ro._s_addr != NULL) {
+ net->src_addr_selected = 1;
+ /* Now get the interface MTU */
+ if (net->ro._s_addr->ifn_p != NULL) {
+ net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+ }
+ } else {
+ net->src_addr_selected = 0;
}
if (net->mtu > 0) {
uint32_t rmtu;
@@ -3994,6 +4010,8 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
net->mtu = rmtu;
}
}
+ } else {
+ net->src_addr_selected = 0;
}
if (net->mtu == 0) {
switch (newaddr->sa_family) {
@@ -4011,14 +4029,16 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
break;
}
}
+#if defined(INET) || defined(INET6)
if (net->port) {
net->mtu -= (uint32_t) sizeof(struct udphdr);
}
+#endif
if (from == SCTP_ALLOC_ASOC) {
stcb->asoc.smallest_mtu = net->mtu;
}
if (stcb->asoc.smallest_mtu > net->mtu) {
- stcb->asoc.smallest_mtu = net->mtu;
+ sctp_pathmtu_adjustment(stcb, net->mtu);
}
#ifdef INET6
if (newaddr->sa_family == AF_INET6) {
@@ -4039,14 +4059,11 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
*/
net->find_pseudo_cumack = 1;
net->find_rtx_pseudo_cumack = 1;
- net->src_addr_selected = 0;
/* Choose an initial flowid. */
net->flowid = stcb->asoc.my_vtag ^
ntohs(stcb->rport) ^
ntohs(stcb->sctp_ep->sctp_lport);
-#ifdef INVARIANTS
- net->flowidset = 1;
-#endif
+ net->flowtype = M_HASHTYPE_OPAQUE_HASH;
if (netp) {
*netp = net;
}
@@ -4167,6 +4184,7 @@ try_again:
struct sctp_tcb *
sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
int *error, uint32_t override_tag, uint32_t vrf_id,
+ uint16_t o_streams, uint16_t port,
struct thread *p
)
{
@@ -4325,7 +4343,7 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
/* setup back pointer's */
stcb->sctp_ep = inp;
stcb->sctp_socket = inp->sctp_socket;
- if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id))) {
+ if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id, o_streams))) {
/* failed */
SCTP_TCB_LOCK_DESTROY(stcb);
SCTP_TCB_SEND_LOCK_DESTROY(stcb);
@@ -4359,7 +4377,7 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
LIST_INSERT_HEAD(head, stcb, sctp_asocs);
SCTP_INP_INFO_WUNLOCK();
- if ((err = sctp_add_remote_addr(stcb, firstaddr, NULL, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) {
+ if ((err = sctp_add_remote_addr(stcb, firstaddr, NULL, port, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) {
/* failure.. memory error? */
if (asoc->strmout) {
SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
@@ -4625,6 +4643,45 @@ sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time, uint16_t lport, uint16_t
}
}
+void
+sctp_clean_up_stream(struct sctp_tcb *stcb, struct sctp_readhead *rh)
+{
+ struct sctp_tmit_chunk *chk, *nchk;
+ struct sctp_queued_to_read *ctl, *nctl;
+
+ TAILQ_FOREACH_SAFE(ctl, rh, next_instrm, nctl) {
+ TAILQ_REMOVE(rh, ctl, next_instrm);
+ ctl->on_strm_q = 0;
+ if (ctl->on_read_q == 0) {
+ sctp_free_remote_addr(ctl->whoFrom);
+ if (ctl->data) {
+ sctp_m_freem(ctl->data);
+ ctl->data = NULL;
+ }
+ }
+ /* Reassembly free? */
+ TAILQ_FOREACH_SAFE(chk, &ctl->reasm, sctp_next, nchk) {
+ TAILQ_REMOVE(&ctl->reasm, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ if (chk->holds_key_ref)
+ sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED);
+ sctp_free_remote_addr(chk->whoTo);
+ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
+ SCTP_DECR_CHK_COUNT();
+ /* sa_ignore FREED_MEMORY */
+ }
+ /*
+ * We don't free the address here since all the net's were
+ * freed above.
+ */
+ if (ctl->on_read_q == 0) {
+ sctp_free_a_readq(stcb, ctl);
+ }
+ }
+}
/*-
@@ -4925,7 +4982,9 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
outs = &asoc->strmout[i];
/* now clean up any chunks here */
TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
+ atomic_subtract_int(&asoc->stream_queue_cnt, 1);
TAILQ_REMOVE(&outs->outqueue, sp, next);
+ stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 0);
sctp_free_spbufspace(stcb, asoc, sp);
if (sp->data) {
if (so) {
@@ -4962,8 +5021,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
sq->whoFrom = NULL;
sq->stcb = NULL;
/* Free the ctl entry */
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq);
- SCTP_DECR_READQ_COUNT();
+ sctp_free_a_readq(stcb, sq);
/* sa_ignore FREED_MEMORY */
}
TAILQ_FOREACH_SAFE(chk, &asoc->free_chunks, sctp_next, nchk) {
@@ -5076,20 +5134,6 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
SCTP_DECR_CHK_COUNT();
/* sa_ignore FREED_MEMORY */
}
- TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) {
- TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
- }
- if (chk->holds_key_ref)
- sctp_auth_key_release(stcb, chk->auth_keyid, SCTP_SO_LOCKED);
- sctp_free_remote_addr(chk->whoTo);
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk);
- SCTP_DECR_CHK_COUNT();
- /* sa_ignore FREED_MEMORY */
- }
-
if (asoc->mapping_array) {
SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
asoc->mapping_array = NULL;
@@ -5105,23 +5149,9 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre
}
asoc->strm_realoutsize = asoc->streamoutcnt = 0;
if (asoc->strmin) {
- struct sctp_queued_to_read *ctl, *nctl;
-
for (i = 0; i < asoc->streamincnt; i++) {
- TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[i].inqueue, next, nctl) {
- TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next);
- sctp_free_remote_addr(ctl->whoFrom);
- if (ctl->data) {
- sctp_m_freem(ctl->data);
- ctl->data = NULL;
- }
- /*
- * We don't free the address here since all
- * the net's were freed above.
- */
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl);
- SCTP_DECR_READQ_COUNT();
- }
+ sctp_clean_up_stream(stcb, &asoc->strmin[i].inqueue);
+ sctp_clean_up_stream(stcb, &asoc->strmin[i].uno_inqueue);
}
SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
asoc->strmin = NULL;
@@ -5302,7 +5332,7 @@ sctp_update_ep_vflag(struct sctp_inpcb *inp)
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
@@ -5333,6 +5363,7 @@ void
sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action)
{
struct sctp_laddr *laddr;
+ struct sctp_tcb *stcb;
int fnd, error = 0;
fnd = 0;
@@ -5378,6 +5409,9 @@ sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t ac
default:
break;
}
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ sctp_add_local_addr_restricted(stcb, ifa);
+ }
}
return;
}
@@ -5408,7 +5442,7 @@ sctp_select_primary_destination(struct sctp_tcb *stcb)
/*
- * Delete the address from the endpoint local address list There is nothing
+ * Delete the address from the endpoint local address list. There is nothing
* to be done if we are bound to all addresses
*/
void
@@ -5459,8 +5493,7 @@ sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
* to laddr
*/
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (net->ro._s_addr &&
- (net->ro._s_addr->ifa == laddr->ifa)) {
+ if (net->ro._s_addr == laddr->ifa) {
/* Yep, purge src address selected */
sctp_rtentry_t *rt;
@@ -5524,46 +5557,6 @@ sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
}
/*
- * insert an laddr entry with the given ifa for the desired list
- */
-int
-sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
-{
- struct sctp_laddr *laddr;
-
- laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
- if (laddr == NULL) {
- /* out of memory? */
- SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
- return (EINVAL);
- }
- SCTP_INCR_LADDR_COUNT();
- bzero(laddr, sizeof(*laddr));
- (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
- laddr->ifa = ifa;
- laddr->action = act;
- atomic_add_int(&ifa->refcount, 1);
- /* insert it */
- LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
-
- return (0);
-}
-
-/*
- * Remove an laddr entry from the local address list (on an assoc)
- */
-void
-sctp_remove_laddr(struct sctp_laddr *laddr)
-{
-
- /* remove from the list */
- LIST_REMOVE(laddr, sctp_nxt_addr);
- sctp_free_ifa(laddr->ifa);
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
- SCTP_DECR_LADDR_COUNT();
-}
-
-/*
* Remove a local address from the TCB local address restricted list
*/
void
@@ -5774,7 +5767,7 @@ sctp_pcb_init()
{
/*
* SCTP initialization for the PCB structures should be called by
- * the sctp_init() funciton.
+ * the sctp_init() function.
*/
int i;
struct timeval tv;
@@ -5933,12 +5926,32 @@ sctp_pcb_finish(void)
int i;
struct sctp_iterator *it, *nit;
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ SCTP_PRINTF("%s: race condition on teardown.\n", __func__);
+ return;
+ }
+ SCTP_BASE_VAR(sctp_pcb_initialized) = 0;
/*
* In FreeBSD the iterator thread never exits but we do clean up.
* The only way FreeBSD reaches here is if we have VRF's but we
* still add the ifdef to make it compile on old versions.
*/
+retry:
SCTP_IPI_ITERATOR_WQ_LOCK();
+ /*
+ * sctp_iterator_worker() might be working on an it entry without
+ * holding the lock. We won't find it on the list either and
+ * continue and free/destroy it. While holding the lock, spin, to
+ * avoid the race condition as sctp_iterator_worker() will have to
+ * wait to re-aquire the lock.
+ */
+ if (sctp_it_ctl.iterator_running != 0 || sctp_it_ctl.cur_it != NULL) {
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ SCTP_PRINTF("%s: Iterator running while we held the lock. Retry. "
+ "cur_it=%p\n", __func__, sctp_it_ctl.cur_it);
+ DELAY(10);
+ goto retry;
+ }
TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
if (it->vn != curvnet) {
continue;
@@ -5956,11 +5969,14 @@ sctp_pcb_finish(void)
sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
}
SCTP_ITERATOR_UNLOCK();
- SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer));
+ SCTP_OS_TIMER_STOP_DRAIN(&SCTP_BASE_INFO(addr_wq_timer.timer));
SCTP_WQ_ADDR_LOCK();
LIST_FOREACH_SAFE(wi, &SCTP_BASE_INFO(addr_wq), sctp_nxt_addr, nwi) {
LIST_REMOVE(wi, sctp_nxt_addr);
SCTP_DECR_LADDR_COUNT();
+ if (wi->action == SCTP_DEL_IP_ADDRESS) {
+ SCTP_FREE(wi->ifa, SCTP_M_IFA);
+ }
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi);
}
SCTP_WQ_ADDR_UNLOCK();
@@ -6020,6 +6036,14 @@ sctp_pcb_finish(void)
SCTP_WQ_ADDR_DESTROY();
+ /* Get rid of other stuff too. */
+ if (SCTP_BASE_INFO(sctp_asochash) != NULL)
+ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
+ if (SCTP_BASE_INFO(sctp_ephash) != NULL)
+ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
+ if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
+ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
+
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr));
@@ -6029,13 +6053,6 @@ sctp_pcb_finish(void)
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_strmoq));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf_ack));
- /* Get rid of other stuff to */
- if (SCTP_BASE_INFO(sctp_asochash) != NULL)
- SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
- if (SCTP_BASE_INFO(sctp_ephash) != NULL)
- SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
- if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
- SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
SCTP_FREE(SCTP_BASE_STATS, SCTP_M_MCORE);
#endif
@@ -6046,7 +6063,7 @@ int
sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
int offset, int limit,
struct sockaddr *src, struct sockaddr *dst,
- struct sockaddr *altsa)
+ struct sockaddr *altsa, uint16_t port)
{
/*
* grub through the INIT pulling addresses and loading them to the
@@ -6075,7 +6092,15 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
sctp_key_t *new_key;
uint32_t keylen;
int got_random = 0, got_hmacs = 0, got_chklist = 0;
- uint8_t ecn_allowed;
+ uint8_t peer_supports_ecn;
+ uint8_t peer_supports_prsctp;
+ uint8_t peer_supports_auth;
+ uint8_t peer_supports_asconf;
+ uint8_t peer_supports_asconf_ack;
+ uint8_t peer_supports_reconfig;
+ uint8_t peer_supports_nrsack;
+ uint8_t peer_supports_pktdrop;
+ uint8_t peer_supports_idata;
#ifdef INET
struct sockaddr_in sin;
@@ -6104,8 +6129,14 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
} else {
sa = src;
}
- /* Turn off ECN until we get through all params */
- ecn_allowed = 0;
+ peer_supports_idata = 0;
+ peer_supports_ecn = 0;
+ peer_supports_prsctp = 0;
+ peer_supports_auth = 0;
+ peer_supports_asconf = 0;
+ peer_supports_reconfig = 0;
+ peer_supports_nrsack = 0;
+ peer_supports_pktdrop = 0;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
/* mark all addresses that we have currently on the list */
net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC;
@@ -6123,7 +6154,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
#ifdef INET
case AF_INET:
if (stcb->asoc.scope.ipv4_addr_legal) {
- if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
+ if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
return (-1);
}
}
@@ -6132,7 +6163,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
#ifdef INET6
case AF_INET6:
if (stcb->asoc.scope.ipv6_addr_legal) {
- if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
+ if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
return (-2);
}
}
@@ -6155,12 +6186,6 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
/* the assoc was freed? */
return (-4);
}
- /*
- * peer must explicitly turn this on. This may have been initialized
- * to be "on" in order to allow local addr changes while INIT's are
- * in flight.
- */
- stcb->asoc.peer_supports_asconf = 0;
/* now we must go through each of the params. */
phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
while (phdr) {
@@ -6223,7 +6248,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
/* the assoc was freed? */
return (-7);
}
- if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) {
+ if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) {
return (-8);
}
} else if (stcb_tmp == stcb) {
@@ -6243,12 +6268,20 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
*/
if (stcb_tmp) {
if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
/*
* in setup state we
* abort this guy
*/
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
sctp_abort_an_association(stcb_tmp->sctp_ep,
- stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
+ stcb_tmp, op_err,
+ SCTP_SO_NOT_LOCKED);
goto add_it_now;
}
SCTP_TCB_UNLOCK(stcb_tmp);
@@ -6310,7 +6343,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
* we must add the address, no scope
* set
*/
- if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) {
+ if (sctp_add_remote_addr(stcb, sa, NULL, port, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) {
return (-17);
}
} else if (stcb_tmp == stcb) {
@@ -6332,18 +6365,26 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
* strange, address is in another
* assoc? straighten out locks.
*/
- if (stcb_tmp)
+ if (stcb_tmp) {
if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
/*
* in setup state we
* abort this guy
*/
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
sctp_abort_an_association(stcb_tmp->sctp_ep,
- stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
+ stcb_tmp, op_err,
+ SCTP_SO_NOT_LOCKED);
goto add_it_now6;
}
- SCTP_TCB_UNLOCK(stcb_tmp);
-
+ SCTP_TCB_UNLOCK(stcb_tmp);
+ }
if (stcb->asoc.state == 0) {
/* the assoc was freed? */
return (-21);
@@ -6354,7 +6395,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
} else
#endif
if (ptype == SCTP_ECN_CAPABLE) {
- ecn_allowed = 1;
+ peer_supports_ecn = 1;
} else if (ptype == SCTP_ULP_ADAPTATION) {
if (stcb->asoc.state != SCTP_STATE_OPEN) {
struct sctp_adaptation_layer_indication ai,
@@ -6378,7 +6419,9 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
#endif
- stcb->asoc.peer_supports_asconf = 1;
+ if (stcb->asoc.asconf_supported == 0) {
+ return (-100);
+ }
if (plen > sizeof(lstore)) {
return (-23);
}
@@ -6430,7 +6473,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
stcb->asoc.peer_supports_nat = 1;
} else if (ptype == SCTP_PRSCTP_SUPPORTED) {
/* Peer supports pr-sctp */
- stcb->asoc.peer_supports_prsctp = 1;
+ peer_supports_prsctp = 1;
} else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
/* A supported extension chunk */
struct sctp_supported_chunk_types_param *pr_supported;
@@ -6442,34 +6485,33 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
if (phdr == NULL) {
return (-25);
}
- stcb->asoc.peer_supports_asconf = 0;
- stcb->asoc.peer_supports_prsctp = 0;
- stcb->asoc.peer_supports_pktdrop = 0;
- stcb->asoc.peer_supports_strreset = 0;
- stcb->asoc.peer_supports_nr_sack = 0;
- stcb->asoc.peer_supports_auth = 0;
pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
num_ent = plen - sizeof(struct sctp_paramhdr);
for (i = 0; i < num_ent; i++) {
switch (pr_supported->chunk_types[i]) {
case SCTP_ASCONF:
+ peer_supports_asconf = 1;
+ break;
case SCTP_ASCONF_ACK:
- stcb->asoc.peer_supports_asconf = 1;
+ peer_supports_asconf_ack = 1;
break;
case SCTP_FORWARD_CUM_TSN:
- stcb->asoc.peer_supports_prsctp = 1;
+ peer_supports_prsctp = 1;
break;
case SCTP_PACKET_DROPPED:
- stcb->asoc.peer_supports_pktdrop = 1;
+ peer_supports_pktdrop = 1;
break;
case SCTP_NR_SELECTIVE_ACK:
- stcb->asoc.peer_supports_nr_sack = 1;
+ peer_supports_nrsack = 1;
break;
case SCTP_STREAM_RESET:
- stcb->asoc.peer_supports_strreset = 1;
+ peer_supports_reconfig = 1;
break;
case SCTP_AUTHENTICATION:
- stcb->asoc.peer_supports_auth = 1;
+ peer_supports_auth = 1;
+ break;
+ case SCTP_IDATA:
+ peer_supports_idata = 1;
break;
default:
/* one I have not learned yet */
@@ -6498,8 +6540,8 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
}
got_random = 1;
} else if (ptype == SCTP_HMAC_LIST) {
- int num_hmacs;
- int i;
+ uint16_t num_hmacs;
+ uint16_t i;
if (plen > sizeof(hmacs_store))
break;
@@ -6606,24 +6648,51 @@ next_param:
}
}
}
- if (ecn_allowed == 0) {
- stcb->asoc.ecn_allowed = 0;
+ if ((stcb->asoc.ecn_supported == 1) &&
+ (peer_supports_ecn == 0)) {
+ stcb->asoc.ecn_supported = 0;
}
- /* validate authentication required parameters */
- if (got_random && got_hmacs) {
- stcb->asoc.peer_supports_auth = 1;
- } else {
- stcb->asoc.peer_supports_auth = 0;
+ if ((stcb->asoc.prsctp_supported == 1) &&
+ (peer_supports_prsctp == 0)) {
+ stcb->asoc.prsctp_supported = 0;
+ }
+ if ((stcb->asoc.auth_supported == 1) &&
+ ((peer_supports_auth == 0) ||
+ (got_random == 0) || (got_hmacs == 0))) {
+ stcb->asoc.auth_supported = 0;
+ }
+ if ((stcb->asoc.asconf_supported == 1) &&
+ ((peer_supports_asconf == 0) || (peer_supports_asconf_ack == 0) ||
+ (stcb->asoc.auth_supported == 0) ||
+ (saw_asconf == 0) || (saw_asconf_ack == 0))) {
+ stcb->asoc.asconf_supported = 0;
+ }
+ if ((stcb->asoc.reconfig_supported == 1) &&
+ (peer_supports_reconfig == 0)) {
+ stcb->asoc.reconfig_supported = 0;
}
- if (!stcb->asoc.peer_supports_auth && got_chklist) {
+ if ((stcb->asoc.idata_supported == 1) &&
+ (peer_supports_idata == 0)) {
+ stcb->asoc.idata_supported = 0;
+ }
+ if ((stcb->asoc.nrsack_supported == 1) &&
+ (peer_supports_nrsack == 0)) {
+ stcb->asoc.nrsack_supported = 0;
+ }
+ if ((stcb->asoc.pktdrop_supported == 1) &&
+ (peer_supports_pktdrop == 0)) {
+ stcb->asoc.pktdrop_supported = 0;
+ }
+ /* validate authentication required parameters */
+ if ((peer_supports_auth == 0) && (got_chklist == 1)) {
/* peer does not support auth but sent a chunks list? */
return (-31);
}
- if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && stcb->asoc.peer_supports_asconf &&
- !stcb->asoc.peer_supports_auth) {
+ if ((peer_supports_asconf == 1) && (peer_supports_auth == 0)) {
/* peer supports asconf but not auth? */
return (-32);
- } else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) &&
+ } else if ((peer_supports_asconf == 1) &&
+ (peer_supports_auth == 1) &&
((saw_asconf == 0) || (saw_asconf_ack == 0))) {
return (-33);
}
@@ -6718,10 +6787,6 @@ sctp_is_vtag_good(uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *
SCTP_INP_INFO_RLOCK();
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag,
SCTP_BASE_INFO(hashasocmark))];
- if (head == NULL) {
- /* invalid vtag */
- goto skip_vtag_check;
- }
LIST_FOREACH(stcb, head, sctp_asocs) {
/*
* We choose not to lock anything here. TCB's can't be
@@ -6745,8 +6810,6 @@ sctp_is_vtag_good(uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *
return (0);
}
}
-skip_vtag_check:
-
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
/* Now what about timed wait ? */
LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
@@ -6803,26 +6866,15 @@ sctp_drain_mbufs(struct sctp_tcb *stcb)
SCTP_STAT_INCR(sctps_protocol_drains_done);
cumulative_tsn_p1 = asoc->cumulative_tsn + 1;
cnt = 0;
- /* First look in the re-assembly queue */
- TAILQ_FOREACH_SAFE(chk, &asoc->reasmqueue, sctp_next, nchk) {
- if (SCTP_TSN_GT(chk->rec.data.TSN_seq, cumulative_tsn_p1)) {
- /* Yep it is above cum-ack */
- cnt++;
- SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.TSN_seq, asoc->mapping_array_base_tsn);
- asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size);
- sctp_ucount_decr(asoc->cnt_on_reasm_queue);
- SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
- TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
- }
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- }
- }
/* Ok that was fun, now we will drain all the inbound streams? */
for (strmat = 0; strmat < asoc->streamincnt; strmat++) {
- TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[strmat].inqueue, next, nctl) {
+ TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[strmat].inqueue, next_instrm, nctl) {
+#ifdef INVARIANTS
+ if (ctl->on_strm_q != SCTP_ON_ORDERED) {
+ panic("Huh control: %p on_q: %d -- not ordered?",
+ ctl, ctl->on_strm_q);
+ }
+#endif
if (SCTP_TSN_GT(ctl->sinfo_tsn, cumulative_tsn_p1)) {
/* Yep it is above cum-ack */
cnt++;
@@ -6830,14 +6882,74 @@ sctp_drain_mbufs(struct sctp_tcb *stcb)
asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length);
sctp_ucount_decr(asoc->cnt_on_all_streams);
SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
- TAILQ_REMOVE(&asoc->strmin[strmat].inqueue, ctl, next);
+ if (ctl->on_read_q) {
+ TAILQ_REMOVE(&stcb->sctp_ep->read_queue, ctl, next);
+ ctl->on_read_q = 0;
+ }
+ TAILQ_REMOVE(&asoc->strmin[strmat].inqueue, ctl, next_instrm);
+ ctl->on_strm_q = 0;
if (ctl->data) {
sctp_m_freem(ctl->data);
ctl->data = NULL;
}
sctp_free_remote_addr(ctl->whoFrom);
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl);
- SCTP_DECR_READQ_COUNT();
+ /* Now its reasm? */
+ TAILQ_FOREACH_SAFE(chk, &ctl->reasm, sctp_next, nchk) {
+ cnt++;
+ SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.TSN_seq, asoc->mapping_array_base_tsn);
+ asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size);
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+ TAILQ_REMOVE(&ctl->reasm, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
+ }
+ sctp_free_a_readq(stcb, ctl);
+ }
+ }
+ TAILQ_FOREACH_SAFE(ctl, &asoc->strmin[strmat].uno_inqueue, next_instrm, nctl) {
+#ifdef INVARIANTS
+ if (ctl->on_strm_q != SCTP_ON_UNORDERED) {
+ panic("Huh control: %p on_q: %d -- not unordered?",
+ ctl, ctl->on_strm_q);
+ }
+#endif
+ if (SCTP_TSN_GT(ctl->sinfo_tsn, cumulative_tsn_p1)) {
+ /* Yep it is above cum-ack */
+ cnt++;
+ SCTP_CALC_TSN_TO_GAP(gap, ctl->sinfo_tsn, asoc->mapping_array_base_tsn);
+ asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length);
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+ if (ctl->on_read_q) {
+ TAILQ_REMOVE(&stcb->sctp_ep->read_queue, ctl, next);
+ ctl->on_read_q = 0;
+ }
+ TAILQ_REMOVE(&asoc->strmin[strmat].uno_inqueue, ctl, next_instrm);
+ ctl->on_strm_q = 0;
+ if (ctl->data) {
+ sctp_m_freem(ctl->data);
+ ctl->data = NULL;
+ }
+ sctp_free_remote_addr(ctl->whoFrom);
+ /* Now its reasm? */
+ TAILQ_FOREACH_SAFE(chk, &ctl->reasm, sctp_next, nchk) {
+ cnt++;
+ SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.TSN_seq, asoc->mapping_array_base_tsn);
+ asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size);
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+ TAILQ_REMOVE(&ctl->reasm, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
+ }
+ sctp_free_a_readq(stcb, ctl);
}
}
}
@@ -6962,6 +7074,11 @@ sctp_initiate_iterator(inp_func inpf,
if (af == NULL) {
return (-1);
}
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ SCTP_PRINTF("%s: abort on initialize being %d\n", __func__,
+ SCTP_BASE_VAR(sctp_pcb_initialized));
+ return (-1);
+ }
SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator),
SCTP_M_ITER);
if (it == NULL) {
@@ -7000,7 +7117,13 @@ sctp_initiate_iterator(inp_func inpf,
}
SCTP_IPI_ITERATOR_WQ_LOCK();
-
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ SCTP_PRINTF("%s: rollback on initialize being %d it=%p\n", __func__,
+ SCTP_BASE_VAR(sctp_pcb_initialized), it);
+ SCTP_FREE(it, SCTP_M_ITER);
+ return (-1);
+ }
TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
if (sctp_it_ctl.iterator_running == 0) {
sctp_wakeup_iterator();
diff --git a/freebsd/sys/netinet/sctp_pcb.h b/freebsd/sys/netinet/sctp_pcb.h
index 8045765c..98204096 100644
--- a/freebsd/sys/netinet/sctp_pcb.h
+++ b/freebsd/sys/netinet/sctp_pcb.h
@@ -107,7 +107,7 @@ struct sctp_ifa {
* that we MUST lock appropriate locks. This
* is for V6. */
union sctp_sockstore address;
- uint32_t refcount; /* number of folks refering to this */
+ uint32_t refcount; /* number of folks referring to this */
uint32_t flags;
uint32_t localifa_flags;
uint32_t vrf_id; /* vrf_id of this addr (for deleting) */
@@ -360,7 +360,7 @@ struct sctp_pcbtsn_rlog {
struct sctp_inpcb {
/*-
* put an inpcb in front of it all, kind of a waste but we need to
- * for compatability with all the other stuff.
+ * for compatibility with all the other stuff.
*/
union {
struct inpcb inp;
@@ -404,9 +404,17 @@ struct sctp_inpcb {
uint32_t sctp_frag_point;
uint32_t partial_delivery_point;
uint32_t sctp_context;
+ uint32_t max_cwnd;
uint8_t local_strreset_support;
uint32_t sctp_cmt_on_off;
- uint32_t sctp_ecn_enable;
+ uint8_t ecn_supported;
+ uint8_t prsctp_supported;
+ uint8_t auth_supported;
+ uint8_t idata_supported;
+ uint8_t asconf_supported;
+ uint8_t reconfig_supported;
+ uint8_t nrsack_supported;
+ uint8_t pktdrop_supported;
struct sctp_nonpad_sndrcvinfo def_send;
/*-
* These three are here for the sosend_dgram
@@ -423,6 +431,7 @@ struct sctp_inpcb {
struct mtx inp_rdata_mtx;
int32_t refcount;
uint32_t def_vrf_id;
+ uint16_t fibnum;
uint32_t total_sends;
uint32_t total_recvs;
uint32_t last_abort_code;
@@ -576,7 +585,7 @@ void sctp_inpcb_free(struct sctp_inpcb *, int, int);
struct sctp_tcb *
sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *,
- int *, uint32_t, uint32_t, struct thread *);
+ int *, uint32_t, uint32_t, uint16_t, uint16_t, struct thread *);
int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int);
@@ -590,13 +599,9 @@ void
void sctp_add_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *, uint32_t);
-int sctp_insert_laddr(struct sctpladdr *, struct sctp_ifa *, uint32_t);
-
-void sctp_remove_laddr(struct sctp_laddr *);
-
void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *);
-int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, struct sctp_nets **, int, int);
+int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, struct sctp_nets **, uint16_t, int, int);
void sctp_remove_net(struct sctp_tcb *, struct sctp_nets *);
@@ -611,7 +616,7 @@ void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
int
sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int,
- struct sockaddr *, struct sockaddr *, struct sockaddr *);
+ struct sockaddr *, struct sockaddr *, struct sockaddr *, uint16_t);
int
sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
@@ -625,6 +630,8 @@ int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *);
int sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp);
+void sctp_clean_up_stream(struct sctp_tcb *stcb, struct sctp_readhead *rh);
+
/*-
* Null in last arg inpcb indicate run on ALL ep's. Specific inp in last arg
* indicates run on ONLY assoc's of the specified endpoint.
@@ -646,11 +653,5 @@ void
#endif
-#ifdef INVARIANTS
-void
- sctp_validate_no_locks(struct sctp_inpcb *inp);
-
-#endif
-
#endif /* _KERNEL */
#endif /* !__sctp_pcb_h__ */
diff --git a/freebsd/sys/netinet/sctp_peeloff.c b/freebsd/sys/netinet/sctp_peeloff.c
index e8bb0444..3603e41a 100644
--- a/freebsd/sys/netinet/sctp_peeloff.c
+++ b/freebsd/sys/netinet/sctp_peeloff.c
@@ -120,9 +120,16 @@ sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id)
n_inp->sctp_mobility_features = inp->sctp_mobility_features;
n_inp->sctp_frag_point = inp->sctp_frag_point;
n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
- n_inp->sctp_ecn_enable = inp->sctp_ecn_enable;
+ n_inp->ecn_supported = inp->ecn_supported;
+ n_inp->prsctp_supported = inp->prsctp_supported;
+ n_inp->auth_supported = inp->auth_supported;
+ n_inp->asconf_supported = inp->asconf_supported;
+ n_inp->reconfig_supported = inp->reconfig_supported;
+ n_inp->nrsack_supported = inp->nrsack_supported;
+ n_inp->pktdrop_supported = inp->pktdrop_supported;
n_inp->partial_delivery_point = inp->partial_delivery_point;
n_inp->sctp_context = inp->sctp_context;
+ n_inp->max_cwnd = inp->max_cwnd;
n_inp->local_strreset_support = inp->local_strreset_support;
n_inp->inp_starting_point_for_iterator = NULL;
/* copy in the authentication parameters from the original endpoint */
diff --git a/freebsd/sys/netinet/sctp_structs.h b/freebsd/sys/netinet/sctp_structs.h
index a8b86c62..280100bb 100644
--- a/freebsd/sys/netinet/sctp_structs.h
+++ b/freebsd/sys/netinet/sctp_structs.h
@@ -76,6 +76,7 @@ TAILQ_HEAD(sctpnetlisthead, sctp_nets);
struct sctp_stream_reset_list {
TAILQ_ENTRY(sctp_stream_reset_list) next_resp;
+ uint32_t seq;
uint32_t tsn;
uint32_t number_entries;
uint16_t list_of_streams[];
@@ -188,9 +189,12 @@ struct iterator_control {
struct sctp_net_route {
sctp_rtentry_t *ro_rt;
- void *ro_lle;
- void *ro_ia;
- int ro_flags;
+ struct llentry *ro_lle;
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
+ uint16_t ro_mtu;
+ uint16_t spare;
union sctp_sockstore _l_addr; /* remote peer addr */
struct sctp_ifa *_s_addr; /* our selected src addr */
};
@@ -380,15 +384,13 @@ struct sctp_nets {
uint8_t lan_type;
uint8_t rto_needed;
uint32_t flowid;
-#ifdef INVARIANTS
- uint8_t flowidset;
-#endif
+ uint8_t flowtype;
};
struct sctp_data_chunkrec {
uint32_t TSN_seq; /* the TSN of this transmit */
- uint16_t stream_seq; /* the stream sequence number of this transmit */
+ uint32_t stream_seq; /* the stream sequence number of this transmit */
uint16_t stream_number; /* the stream number of this guy */
uint32_t payloadtype;
uint32_t context; /* from send */
@@ -399,6 +401,7 @@ struct sctp_data_chunkrec {
*/
uint32_t fast_retran_tsn; /* sending_seq at the time of FR */
struct timeval timetodrop; /* time we drop it from queue */
+ uint32_t fsn_num; /* Fragment Sequence Number */
uint8_t doing_fast_retransmit;
uint8_t rcv_flags; /* flags pulled from data chunk on inbound for
* outbound holds sending flags for PR-SCTP. */
@@ -418,8 +421,8 @@ TAILQ_HEAD(sctpchunk_listhead, sctp_tmit_chunk);
#define CHUNK_FLAGS_FRAGMENT_OK 0x0100
struct chk_id {
- uint16_t id;
- uint16_t can_take_data;
+ uint8_t id;
+ uint8_t can_take_data;
};
@@ -450,14 +453,9 @@ struct sctp_tmit_chunk {
uint8_t window_probe;
};
-/*
- * The first part of this structure MUST be the entire sinfo structure. Maybe
- * I should have made it a sub structure... we can circle back later and do
- * that if we want.
- */
struct sctp_queued_to_read { /* sinfo structure Pluse more */
uint16_t sinfo_stream; /* off the wire */
- uint16_t sinfo_ssn; /* off the wire */
+ uint32_t sinfo_ssn; /* off the wire */
uint16_t sinfo_flags; /* SCTP_UNORDERED from wire use SCTP_EOF for
* EOR */
uint32_t sinfo_ppid; /* off the wire */
@@ -467,8 +465,11 @@ struct sctp_queued_to_read { /* sinfo structure Pluse more */
uint32_t sinfo_cumtsn; /* Use this in reassembly as last TSN */
sctp_assoc_t sinfo_assoc_id; /* our assoc id */
/* Non sinfo stuff */
+ uint32_t msg_id; /* Fragment Index */
uint32_t length; /* length of data */
uint32_t held_length; /* length held in sb */
+ uint32_t top_fsn; /* Highest FSN in queue */
+ uint32_t fsn_included; /* Highest FSN in *data portion */
struct sctp_nets *whoFrom; /* where it came from */
struct mbuf *data; /* front of the mbuf chain of data with
* PKT_HDR */
@@ -477,14 +478,24 @@ struct sctp_queued_to_read { /* sinfo structure Pluse more */
* take it from us */
struct sctp_tcb *stcb; /* assoc, used for window update */
TAILQ_ENTRY(sctp_queued_to_read) next;
+ TAILQ_ENTRY(sctp_queued_to_read) next_instrm;
+ struct sctpchunk_listhead reasm;
uint16_t port_from;
uint16_t spec_flags; /* Flags to hold the notification field */
uint8_t do_not_ref_stcb;
uint8_t end_added;
uint8_t pdapi_aborted;
+ uint8_t pdapi_started;
uint8_t some_taken;
+ uint8_t last_frag_seen;
+ uint8_t first_frag_seen;
+ uint8_t on_read_q;
+ uint8_t on_strm_q;
};
+#define SCTP_ON_ORDERED 1
+#define SCTP_ON_UNORDERED 2
+
/* This data structure will be on the outbound
* stream queues. Data will be pulled off from
* the front of the mbuf data and chunk-ified
@@ -510,6 +521,7 @@ struct sctp_stream_queue_pending {
struct sctp_nets *net;
TAILQ_ENTRY(sctp_stream_queue_pending) next;
TAILQ_ENTRY(sctp_stream_queue_pending) ss_next;
+ uint32_t fsn;
uint32_t length;
uint32_t timetolive;
uint32_t ppid;
@@ -533,14 +545,17 @@ struct sctp_stream_queue_pending {
TAILQ_HEAD(sctpwheelunrel_listhead, sctp_stream_in);
struct sctp_stream_in {
struct sctp_readhead inqueue;
+ struct sctp_readhead uno_inqueue;
+ uint32_t last_sequence_delivered; /* used for re-order */
uint16_t stream_no;
- uint16_t last_sequence_delivered; /* used for re-order */
uint8_t delivery_started;
+ uint8_t pd_api_started;
};
TAILQ_HEAD(sctpwheel_listhead, sctp_stream_out);
TAILQ_HEAD(sctplist_listhead, sctp_stream_queue_pending);
+
/* Round-robin schedulers */
struct ss_rr {
/* next link in wheel */
@@ -567,9 +582,14 @@ struct ss_fb {
* This union holds all data necessary for
* different stream schedulers.
*/
-union scheduling_data {
- struct sctpwheel_listhead out_wheel;
- struct sctplist_listhead out_list;
+struct scheduling_data {
+ struct sctp_stream_out *locked_on_sending;
+ /* circular looking for output selection */
+ struct sctp_stream_out *last_out_stream;
+ union {
+ struct sctpwheel_listhead wheel;
+ struct sctplist_listhead list;
+ } out;
};
/*
@@ -582,14 +602,37 @@ union scheduling_parameters {
struct ss_fb fb;
};
+/* States for outgoing streams */
+#define SCTP_STREAM_CLOSED 0x00
+#define SCTP_STREAM_OPENING 0x01
+#define SCTP_STREAM_OPEN 0x02
+#define SCTP_STREAM_RESET_PENDING 0x03
+#define SCTP_STREAM_RESET_IN_FLIGHT 0x04
+
+#define SCTP_MAX_STREAMS_AT_ONCE_RESET 200
+
/* This struct is used to track the traffic on outbound streams */
struct sctp_stream_out {
struct sctp_streamhead outqueue;
union scheduling_parameters ss_params;
- uint32_t chunks_on_queues;
+ uint32_t chunks_on_queues; /* send queue and sent queue */
+#if defined(SCTP_DETAILED_STR_STATS)
+ uint32_t abandoned_unsent[SCTP_PR_SCTP_MAX + 1];
+ uint32_t abandoned_sent[SCTP_PR_SCTP_MAX + 1];
+#else
+ /* Only the aggregation */
+ uint32_t abandoned_unsent[1];
+ uint32_t abandoned_sent[1];
+#endif
+ /*
+ * For associations using DATA chunks, the lower 16-bit of
+ * next_mid_ordered are used as the next SSN.
+ */
+ uint32_t next_mid_ordered;
+ uint32_t next_mid_unordered;
uint16_t stream_no;
- uint16_t next_sequence_send; /* next one I expect to send out */
uint8_t last_msg_incomplete;
+ uint8_t state;
};
/* used to keep track of the addresses yet to try to add/delete */
@@ -616,12 +659,13 @@ struct sctp_scoping {
struct sctp_tsn_log {
void *stcb;
uint32_t tsn;
+ uint32_t seq;
uint16_t strm;
- uint16_t seq;
uint16_t sz;
uint16_t flgs;
uint16_t in_pos;
uint16_t in_out;
+ uint16_t resv;
};
#define SCTP_FS_SPEC_LOG_SIZE 200
@@ -697,7 +741,7 @@ struct sctp_ss_functions {
int holds_lock);
void (*sctp_ss_clear) (struct sctp_tcb *stcb, struct sctp_association *asoc,
int clear_values, int holds_lock);
- void (*sctp_ss_init_stream) (struct sctp_stream_out *strq, struct sctp_stream_out *with_strq);
+ void (*sctp_ss_init_stream) (struct sctp_tcb *stcb, struct sctp_stream_out *strq, struct sctp_stream_out *with_strq);
void (*sctp_ss_add_to_stream) (struct sctp_tcb *stcb, struct sctp_association *asoc,
struct sctp_stream_out *strq, struct sctp_stream_queue_pending *sp, int holds_lock);
int (*sctp_ss_is_empty) (struct sctp_tcb *stcb, struct sctp_association *asoc);
@@ -713,6 +757,7 @@ struct sctp_ss_functions {
struct sctp_stream_out *strq, uint16_t * value);
int (*sctp_ss_set_value) (struct sctp_tcb *stcb, struct sctp_association *asoc,
struct sctp_stream_out *strq, uint16_t value);
+ int (*sctp_ss_is_user_msgs_incomplete) (struct sctp_tcb *stcb, struct sctp_association *asoc);
};
/* used to save ASCONF chunks for retransmission */
@@ -792,19 +837,8 @@ struct sctp_association {
struct sctpchunk_listhead sent_queue;
struct sctpchunk_listhead send_queue;
- /* re-assembly queue for fragmented chunks on the inbound path */
- struct sctpchunk_listhead reasmqueue;
-
/* Scheduling queues */
- union scheduling_data ss_data;
-
- /*
- * This pointer will be set to NULL most of the time. But when we
- * have a fragmented message, where we could not get out all of the
- * message at the last send then this will point to the stream to go
- * get data from.
- */
- struct sctp_stream_out *locked_on_sending;
+ struct scheduling_data ss_data;
/* If an iterator is looking at me, this is it */
struct sctp_iterator *stcb_starting_point_for_iterator;
@@ -837,8 +871,6 @@ struct sctp_association {
/* last place I got a control from */
struct sctp_nets *last_control_chunk_from;
- /* circular looking for output selection */
- struct sctp_stream_out *last_out_stream;
/*
* wait to the point the cum-ack passes req->send_reset_at_tsn for
@@ -862,7 +894,6 @@ struct sctp_association {
uint32_t stream_scheduling_module;
uint32_t vrf_id;
-
uint32_t cookie_preserve_req;
/* ASCONF next seq I am sending out, inits at init-tsn */
uint32_t asconf_seq_out;
@@ -936,7 +967,7 @@ struct sctp_association {
uint32_t sat_t3_recovery_tsn;
uint32_t tsn_last_delivered;
/*
- * For the pd-api we should re-write this a bit more efficent. We
+ * For the pd-api we should re-write this a bit more efficient. We
* could have multiple sctp_queued_to_read's that we are building at
* once. Now we only do this when we get ready to deliver to the
* socket buffer. Note that we depend on the fact that the struct is
@@ -1142,7 +1173,7 @@ struct sctp_association {
uint8_t hb_random_idx;
uint8_t default_dscp;
uint8_t asconf_del_pending; /* asconf delete last addr pending */
-
+ uint8_t trigger_reset;
/*
* This value, plus all other ack'd but above cum-ack is added
* together to cross check against the bit that we have yet to
@@ -1150,34 +1181,24 @@ struct sctp_association {
* sum is updated as well.
*/
- /* Flag to tell if ECN is allowed */
- uint8_t ecn_allowed;
+ /* Flags whether an extension is supported or not */
+ uint8_t ecn_supported;
+ uint8_t prsctp_supported;
+ uint8_t auth_supported;
+ uint8_t asconf_supported;
+ uint8_t reconfig_supported;
+ uint8_t nrsack_supported;
+ uint8_t pktdrop_supported;
+ uint8_t idata_supported;
/* Did the peer make the stream config (add out) request */
uint8_t peer_req_out;
- /* flag to indicate if peer can do asconf */
- uint8_t peer_supports_asconf;
- /* EY - flag to indicate if peer can do nr_sack */
- uint8_t peer_supports_nr_sack;
- /* pr-sctp support flag */
- uint8_t peer_supports_prsctp;
- /* peer authentication support flag */
- uint8_t peer_supports_auth;
- /* stream resets are supported by the peer */
- uint8_t peer_supports_strreset;
uint8_t local_strreset_support;
-
uint8_t peer_supports_nat;
- /*
- * packet drop's are supported by the peer, we don't really care
- * about this but we bookkeep it anyway.
- */
- uint8_t peer_supports_pktdrop;
struct sctp_scoping scope;
/* flags to handle send alternate net tracking */
- uint8_t used_alt_onsack;
uint8_t used_alt_asconfack;
uint8_t fast_retran_loss_recovery;
uint8_t sat_t3_loss_recovery;
@@ -1198,12 +1219,11 @@ struct sctp_association {
uint8_t sctp_cmt_on_off;
uint8_t iam_blocking;
uint8_t cookie_how[8];
- /* EY 05/05/08 - NR_SACK variable */
- uint8_t sctp_nr_sack_on_off;
/* JRS 5/21/07 - CMT PF variable */
uint8_t sctp_cmt_pf;
uint8_t use_precise_time;
uint64_t sctp_features;
+ uint32_t max_cwnd;
uint16_t port; /* remote UDP encapsulation port */
/*
* The mapping array is used to track out of order sequences above
@@ -1222,6 +1242,8 @@ struct sctp_association {
uint32_t timoshutdownack;
struct timeval start_time;
struct timeval discontinuity_time;
+ uint64_t abandoned_unsent[SCTP_PR_SCTP_MAX + 1];
+ uint64_t abandoned_sent[SCTP_PR_SCTP_MAX + 1];
};
#endif
diff --git a/freebsd/sys/netinet/sctp_sysctl.c b/freebsd/sys/netinet/sctp_sysctl.c
index d0da7a6f..8715c69b 100644
--- a/freebsd/sys/netinet/sctp_sysctl.c
+++ b/freebsd/sys/netinet/sctp_sysctl.c
@@ -43,6 +43,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctputil.h>
#include <netinet/sctp_output.h>
#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+FEATURE(sctp, "Stream Control Transmission Protocol");
/*
* sysctl tunable variables
@@ -56,7 +59,12 @@ sctp_init_sysctls()
SCTP_BASE_SYSCTL(sctp_auto_asconf) = SCTPCTL_AUTOASCONF_DEFAULT;
SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT;
- SCTP_BASE_SYSCTL(sctp_strict_sacks) = SCTPCTL_STRICT_SACKS_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_pr_enable) = SCTPCTL_PR_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_auth_enable) = SCTPCTL_AUTH_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_asconf_enable) = SCTPCTL_ASCONF_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_reconfig_enable) = SCTPCTL_RECONFIG_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_nrsack_enable) = SCTPCTL_NRSACK_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_pktdrop_enable) = SCTPCTL_PKTDROP_ENABLE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT;
SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT;
SCTP_BASE_SYSCTL(sctp_fr_max_burst_default) = SCTPCTL_FRMAXBURST_DEFAULT;
@@ -86,25 +94,18 @@ sctp_init_sysctls()
SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default) = SCTPCTL_INCOMING_STREAMS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT;
- /* EY */
- SCTP_BASE_SYSCTL(sctp_nr_sack_on_off) = SCTPCTL_NR_SACK_ON_OFF_DEFAULT;
SCTP_BASE_SYSCTL(sctp_cmt_use_dac) = SCTPCTL_CMT_USE_DAC_DEFAULT;
SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) = SCTPCTL_CWND_MAXBURST_DEFAULT;
- SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) = SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT;
- SCTP_BASE_SYSCTL(sctp_auth_disable) = SCTPCTL_AUTH_DISABLE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_nat_friendly) = SCTPCTL_NAT_FRIENDLY_DEFAULT;
SCTP_BASE_SYSCTL(sctp_L2_abc_variable) = SCTPCTL_ABC_L_VAR_DEFAULT;
SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_do_drain) = SCTPCTL_DO_SCTP_DRAIN_DEFAULT;
SCTP_BASE_SYSCTL(sctp_hb_maxburst) = SCTPCTL_HB_MAX_BURST_DEFAULT;
SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit) = SCTPCTL_ABORT_AT_LIMIT_DEFAULT;
- SCTP_BASE_SYSCTL(sctp_strict_data_order) = SCTPCTL_STRICT_DATA_ORDER_DEFAULT;
SCTP_BASE_SYSCTL(sctp_min_residual) = SCTPCTL_MIN_RESIDUAL_DEFAULT;
SCTP_BASE_SYSCTL(sctp_max_retran_chunk) = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT;
SCTP_BASE_SYSCTL(sctp_logging_level) = SCTPCTL_LOGGING_LEVEL_DEFAULT;
- /* JRS - Variable for default congestion control module */
SCTP_BASE_SYSCTL(sctp_default_cc_module) = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT;
- /* RS - Variable for default stream scheduling module */
SCTP_BASE_SYSCTL(sctp_default_ss_module) = SCTPCTL_DEFAULT_SS_MODULE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_default_frag_interleave) = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_mobility_base) = SCTPCTL_MOBILITY_BASE_DEFAULT;
@@ -136,7 +137,7 @@ sctp_init_sysctls()
/* It returns an upper limit. No filtering is done here */
static unsigned int
-number_of_addresses(struct sctp_inpcb *inp)
+sctp_sysctl_number_of_addresses(struct sctp_inpcb *inp)
{
unsigned int cnt;
struct sctp_vrf *vrf;
@@ -186,7 +187,7 @@ number_of_addresses(struct sctp_inpcb *inp)
}
static int
-copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
+sctp_sysctl_copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
{
struct sctp_ifn *sctp_ifn;
struct sctp_ifa *sctp_ifa;
@@ -251,7 +252,7 @@ copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct s
if (ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0)
continue;
if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
@@ -270,7 +271,7 @@ copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct s
if (ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
continue;
if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
@@ -280,15 +281,6 @@ copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct s
if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
if (local_scope == 0)
continue;
- if (sin6->sin6_scope_id == 0) {
- /*
- * bad link
- * local
- * address
- */
- if (sa6_recoverscope(sin6) != 0)
- continue;
- }
}
if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)))
continue;
@@ -352,7 +344,7 @@ copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct s
* sysctl functions
*/
static int
-sctp_assoclist(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
{
unsigned int number_of_endpoints;
unsigned int number_of_local_addresses;
@@ -374,14 +366,14 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
number_of_remote_addresses = 0;
SCTP_INP_INFO_RLOCK();
- if (req->oldptr == USER_ADDR_NULL) {
+ if (req->oldptr == NULL) {
LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
SCTP_INP_RLOCK(inp);
number_of_endpoints++;
- number_of_local_addresses += number_of_addresses(inp);
+ number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
number_of_associations++;
- number_of_local_addresses += number_of_addresses(inp);
+ number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
number_of_remote_addresses++;
}
@@ -398,7 +390,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
req->oldidx = (n + n / 8);
return (0);
}
- if (req->newptr != USER_ADDR_NULL) {
+ if (req->newptr != NULL) {
SCTP_INP_INFO_RUNLOCK();
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM);
return (EPERM);
@@ -412,11 +404,12 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
xinpcb.last = 0;
xinpcb.local_port = ntohs(inp->sctp_lport);
xinpcb.flags = inp->sctp_flags;
- xinpcb.features = (uint32_t) inp->sctp_features;
+ xinpcb.features = inp->sctp_features;
xinpcb.total_sends = inp->total_sends;
xinpcb.total_recvs = inp->total_recvs;
xinpcb.total_nospaces = inp->total_nospaces;
xinpcb.fragmentation_point = inp->sctp_frag_point;
+ xinpcb.socket = inp->sctp_socket;
so = inp->sctp_socket;
if ((so == NULL) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
@@ -424,7 +417,11 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
xinpcb.maxqlen = 0;
} else {
xinpcb.qlen = so->so_qlen;
+ xinpcb.qlen_old = so->so_qlen > USHRT_MAX ?
+ USHRT_MAX : (uint16_t) so->so_qlen;
xinpcb.maxqlen = so->so_qlimit;
+ xinpcb.maxqlen_old = so->so_qlimit > USHRT_MAX ?
+ USHRT_MAX : (uint16_t) so->so_qlimit;
}
SCTP_INP_INCR_REF(inp);
SCTP_INP_RUNLOCK(inp);
@@ -436,7 +433,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
}
SCTP_INP_INFO_RLOCK();
SCTP_INP_RLOCK(inp);
- error = copy_out_local_addresses(inp, NULL, req);
+ error = sctp_sysctl_copy_out_local_addresses(inp, NULL, req);
if (error) {
SCTP_INP_DECR_REF(inp);
return (error);
@@ -451,7 +448,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
if (stcb->asoc.primary_destination != NULL)
xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr;
xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay;
- xstcb.state = SCTP_GET_STATE(&stcb->asoc); /* FIXME */
+ xstcb.state = (uint32_t) sctp_map_assoc_state(stcb->asoc.state);
/* 7.0 does not support these */
xstcb.assoc_id = sctp_get_associd(stcb);
xstcb.peers_rwnd = stcb->asoc.peers_rwnd;
@@ -487,7 +484,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
}
SCTP_INP_INFO_RLOCK();
SCTP_INP_RLOCK(inp);
- error = copy_out_local_addresses(inp, stcb, req);
+ error = sctp_sysctl_copy_out_local_addresses(inp, stcb, req);
if (error) {
SCTP_INP_DECR_REF(inp);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -509,6 +506,7 @@ sctp_assoclist(SYSCTL_HANDLER_ARGS)
xraddr.mtu = net->mtu;
xraddr.rtt = net->rtt / 1000;
xraddr.heartbeat_interval = net->heart_beat_delay;
+ xraddr.ssthresh = net->ssthresh;
xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec;
xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec;
SCTP_INP_RUNLOCK(inp);
@@ -555,153 +553,120 @@ skip:
return (error);
}
-
-#define RANGECHK(var, min, max) \
- if ((var) < (min)) { (var) = (min); } \
- else if ((var) > (max)) { (var) = (max); }
-
static int
-sysctl_sctp_udp_tunneling_check(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_udp_tunneling(SYSCTL_HANDLER_ARGS)
{
int error;
- uint32_t old_sctp_udp_tunneling_port;
+ uint32_t old, new;
SCTP_INP_INFO_RLOCK();
- old_sctp_udp_tunneling_port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
+ old = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
SCTP_INP_INFO_RUNLOCK();
- error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
- if (error == 0) {
- RANGECHK(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), SCTPCTL_UDP_TUNNELING_PORT_MIN, SCTPCTL_UDP_TUNNELING_PORT_MAX);
- if (old_sctp_udp_tunneling_port == SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
- error = 0;
- goto out;
- }
- SCTP_INP_INFO_WLOCK();
- if (old_sctp_udp_tunneling_port) {
- sctp_over_udp_stop();
- }
- if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
- if (sctp_over_udp_start()) {
- SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = 0;
+ new = old;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) &&
+ (req->newptr != NULL)) {
+#if (SCTPCTL_UDP_TUNNELING_PORT_MIN == 0)
+ if (new > SCTPCTL_UDP_TUNNELING_PORT_MAX) {
+#else
+ if ((new < SCTPCTL_UDP_TUNNELING_PORT_MIN) ||
+ (new > SCTPCTL_UDP_TUNNELING_PORT_MAX)) {
+#endif
+ error = EINVAL;
+ } else {
+ SCTP_INP_INFO_WLOCK();
+ SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = new;
+ if (old != 0) {
+ sctp_over_udp_stop();
}
+ if (new != 0) {
+ error = sctp_over_udp_start();
+ }
+ SCTP_INP_INFO_WUNLOCK();
}
- SCTP_INP_INFO_WUNLOCK();
}
-out:
return (error);
}
static int
-sysctl_sctp_check(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_auth(SYSCTL_HANDLER_ARGS)
{
int error;
-
-#ifdef VIMAGE
- error = vnet_sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+ uint32_t new;
+
+ new = SCTP_BASE_SYSCTL(sctp_auth_enable);
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) &&
+ (req->newptr != NULL)) {
+#if (SCTPCTL_AUTH_ENABLE_MIN == 0)
+ if ((new > SCTPCTL_AUTH_ENABLE_MAX) ||
+ ((new == 0) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
#else
- error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+ if ((new < SCTPCTL_AUTH_ENABLE_MIN) ||
+ (new > SCTPCTL_AUTH_ENABLE_MAX) ||
+ ((new == 0) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
#endif
- if (error == 0) {
- RANGECHK(SCTP_BASE_SYSCTL(sctp_sendspace), SCTPCTL_MAXDGRAM_MIN, SCTPCTL_MAXDGRAM_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_recvspace), SCTPCTL_RECVSPACE_MIN, SCTPCTL_RECVSPACE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_auto_asconf), SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_enable), SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_sacks), SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_max_burst_default), SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), SCTPCTL_FRMAXBURST_MIN, SCTPCTL_FRMAXBURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), SCTPCTL_MAXCHUNKS_MIN, SCTPCTL_MAXCHUNKS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_hashtblsize), SCTPCTL_TCBHASHSIZE_MIN, SCTPCTL_TCBHASHSIZE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_pcbtblsize), SCTPCTL_PCBHASHSIZE_MIN, SCTPCTL_PCBHASHSIZE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_min_split_point), SCTPCTL_MIN_SPLIT_POINT_MIN, SCTPCTL_MIN_SPLIT_POINT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_chunkscale), SCTPCTL_CHUNKSCALE_MIN, SCTPCTL_CHUNKSCALE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), SCTPCTL_DELAYED_SACK_TIME_MIN, SCTPCTL_DELAYED_SACK_TIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_sack_freq_default), SCTPCTL_SACK_FREQ_MIN, SCTPCTL_SACK_FREQ_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), SCTPCTL_SYS_RESOURCE_MIN, SCTPCTL_SYS_RESOURCE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), SCTPCTL_ASOC_RESOURCE_MIN, SCTPCTL_ASOC_RESOURCE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), SCTPCTL_HEARTBEAT_INTERVAL_MIN, SCTPCTL_HEARTBEAT_INTERVAL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), SCTPCTL_PMTU_RAISE_TIME_MIN, SCTPCTL_PMTU_RAISE_TIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), SCTPCTL_SHUTDOWN_GUARD_TIME_MIN, SCTPCTL_SHUTDOWN_GUARD_TIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), SCTPCTL_SECRET_LIFETIME_MIN, SCTPCTL_SECRET_LIFETIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_max_default), SCTPCTL_RTO_MAX_MIN, SCTPCTL_RTO_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_min_default), SCTPCTL_RTO_MIN_MIN, SCTPCTL_RTO_MIN_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_initial_default), SCTPCTL_RTO_INITIAL_MIN, SCTPCTL_RTO_INITIAL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rto_max_default), SCTPCTL_INIT_RTO_MAX_MIN, SCTPCTL_INIT_RTO_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), SCTPCTL_VALID_COOKIE_LIFE_MIN, SCTPCTL_VALID_COOKIE_LIFE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), SCTPCTL_INIT_RTX_MAX_MIN, SCTPCTL_INIT_RTX_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), SCTPCTL_ASSOC_RTX_MAX_MIN, SCTPCTL_ASSOC_RTX_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_path_pf_threshold), SCTPCTL_PATH_PF_THRESHOLD_MIN, SCTPCTL_PATH_PF_THRESHOLD_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default), SCTPCTL_INCOMING_STREAMS_MIN, SCTPCTL_INCOMING_STREAMS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_on_off), SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX);
- /* EY */
- RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), SCTPCTL_NR_SACK_ON_OFF_MIN, SCTPCTL_NR_SACK_ON_OFF_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_use_dac), SCTPCTL_CMT_USE_DAC_MIN, SCTPCTL_CMT_USE_DAC_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), SCTPCTL_CWND_MAXBURST_MIN, SCTPCTL_CWND_MAXBURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), SCTPCTL_ASCONF_AUTH_NOCHK_MIN, SCTPCTL_ASCONF_AUTH_NOCHK_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_auth_disable), SCTPCTL_AUTH_DISABLE_MIN, SCTPCTL_AUTH_DISABLE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_nat_friendly), SCTPCTL_NAT_FRIENDLY_MIN, SCTPCTL_NAT_FRIENDLY_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_L2_abc_variable), SCTPCTL_ABC_L_VAR_MIN, SCTPCTL_ABC_L_VAR_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), SCTPCTL_MAX_CHAINED_MBUFS_MIN, SCTPCTL_MAX_CHAINED_MBUFS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_do_drain), SCTPCTL_DO_SCTP_DRAIN_MIN, SCTPCTL_DO_SCTP_DRAIN_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_hb_maxburst), SCTPCTL_HB_MAX_BURST_MIN, SCTPCTL_HB_MAX_BURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), SCTPCTL_ABORT_AT_LIMIT_MIN, SCTPCTL_ABORT_AT_LIMIT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_data_order), SCTPCTL_STRICT_DATA_ORDER_MIN, SCTPCTL_STRICT_DATA_ORDER_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_min_residual), SCTPCTL_MIN_RESIDUAL_MIN, SCTPCTL_MIN_RESIDUAL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_max_retran_chunk), SCTPCTL_MAX_RETRAN_CHUNK_MIN, SCTPCTL_MAX_RETRAN_CHUNK_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_logging_level), SCTPCTL_LOGGING_LEVEL_MIN, SCTPCTL_LOGGING_LEVEL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_default_cc_module), SCTPCTL_DEFAULT_CC_MODULE_MIN, SCTPCTL_DEFAULT_CC_MODULE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_default_ss_module), SCTPCTL_DEFAULT_SS_MODULE_MIN, SCTPCTL_DEFAULT_SS_MODULE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_default_frag_interleave), SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_vtag_time_wait), SCTPCTL_TIME_WAIT_MIN, SCTPCTL_TIME_WAIT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_buffer_splitting), SCTPCTL_BUFFER_SPLITTING_MIN, SCTPCTL_BUFFER_SPLITTING_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_initial_cwnd), SCTPCTL_INITIAL_CWND_MIN, SCTPCTL_INITIAL_CWND_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_bw), SCTPCTL_RTTVAR_BW_MIN, SCTPCTL_RTTVAR_BW_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_rtt), SCTPCTL_RTTVAR_RTT_MIN, SCTPCTL_RTTVAR_RTT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_eqret), SCTPCTL_RTTVAR_EQRET_MIN, SCTPCTL_RTTVAR_EQRET_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_steady_step), SCTPCTL_RTTVAR_STEADYS_MIN, SCTPCTL_RTTVAR_STEADYS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), SCTPCTL_RTTVAR_DCCCECN_MIN, SCTPCTL_RTTVAR_DCCCECN_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_base), SCTPCTL_MOBILITY_BASE_MIN, SCTPCTL_MOBILITY_BASE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), SCTPCTL_MOBILITY_FASTHANDOFF_MIN, SCTPCTL_MOBILITY_FASTHANDOFF_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN, SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), SCTPCTL_NAT_FRIENDLY_INITS_MIN, SCTPCTL_NAT_FRIENDLY_INITS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_blackhole), SCTPCTL_BLACKHOLE_MIN, SCTPCTL_BLACKHOLE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_diag_info_code), SCTPCTL_DIAG_INFO_CODE_MIN, SCTPCTL_DIAG_INFO_CODE_MAX);
+ error = EINVAL;
+ } else {
+ SCTP_BASE_SYSCTL(sctp_auth_enable) = new;
+ }
+ }
+ return (error);
+}
-#ifdef SCTP_DEBUG
- RANGECHK(SCTP_BASE_SYSCTL(sctp_debug_on), SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX);
-#endif
-#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- RANGECHK(SCTP_BASE_SYSCTL(sctp_output_unlocked), SCTPCTL_OUTPUT_UNLOCKED_MIN, SCTPCTL_OUTPUT_UNLOCKED_MAX);
+static int
+sctp_sysctl_handle_asconf(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint32_t new;
+
+ new = SCTP_BASE_SYSCTL(sctp_asconf_enable);
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) &&
+ (req->newptr != NULL)) {
+#if (SCTPCTL_ASCONF_ENABLE_MIN == 0)
+ if ((new > SCTPCTL_ASCONF_ENABLE_MAX) ||
+ ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_enable) == 0))) {
+#else
+ if ((new < SCTPCTL_ASCONF_ENABLE_MIN) ||
+ (new > SCTPCTL_ASCONF_ENABLE_MAX) ||
+ ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_enable) == 0))) {
#endif
+ error = EINVAL;
+ } else {
+ SCTP_BASE_SYSCTL(sctp_asconf_enable) = new;
+ }
}
return (error);
}
-#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
static int
-sysctl_stat_get(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_stats(SYSCTL_HANDLER_ARGS)
{
- int cpu, error;
- struct sctpstat sb, sb_temp, *sarry, *cpin = NULL;
+ int error;
- if ((req->newptr) && (req->newlen == sizeof(struct sctpstat))) {
- /*
- * User wants us to clear or at least reset the counters to
- * the specified values.
- */
- cpin = &sb_temp;
- memset(&sb_temp, 0, sizeof(sb_temp));
- error = SYSCTL_IN(req, &sb_temp, sizeof(sb_temp));
- if (error != 0)
- return (error);
- } else if (req->newptr) {
- /* Must be a stat structure */
+#if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+ struct sctpstat *sarry;
+ struct sctpstat sb;
+ int cpu;
+
+#endif
+ struct sctpstat sb_temp;
+
+ if ((req->newptr != NULL) &&
+ (req->newlen != sizeof(struct sctpstat))) {
return (EINVAL);
}
+ memset(&sb_temp, 0, sizeof(struct sctpstat));
+
+ if (req->newptr != NULL) {
+ error = SYSCTL_IN(req, &sb_temp, sizeof(struct sctpstat));
+ if (error != 0) {
+ return (error);
+ }
+ }
+#if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
memset(&sb, 0, sizeof(sb));
for (cpu = 0; cpu < mp_maxid; cpu++) {
sarry = &SCTP_BASE_STATS[cpu];
@@ -830,19 +795,35 @@ sysctl_stat_get(SYSCTL_HANDLER_ARGS)
sb.sctps_send_burst_avoid += sarry->sctps_send_burst_avoid;
sb.sctps_send_cwnd_avoid += sarry->sctps_send_cwnd_avoid;
sb.sctps_fwdtsn_map_over += sarry->sctps_fwdtsn_map_over;
- if (cpin) {
- memcpy(sarry, cpin, sizeof(struct sctpstat));
+ if (req->newptr != NULL) {
+ memcpy(sarry, &sb_temp, sizeof(struct sctpstat));
}
}
- error = SYSCTL_OUT(req, &sb, sizeof(sb));
+ error = SYSCTL_OUT(req, &sb, sizeof(struct sctpstat));
+#else
+ error = SYSCTL_OUT(req, &SCTP_BASE_STATS, sizeof(struct sctpstat));
+ if (error != 0) {
+ return (error);
+ }
+ if (req->newptr != NULL) {
+ memcpy(&SCTP_BASE_STATS, &sb_temp, sizeof(struct sctpstat));
+ }
+#endif
return (error);
}
-#endif
-
#if defined(SCTP_LOCAL_TRACE_BUF)
static int
-sysctl_sctp_cleartrace(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_trace_log(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+
+ error = SYSCTL_OUT(req, &SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log));
+ return (error);
+}
+
+static int
+sctp_sysctl_handle_trace_log_clear(SYSCTL_HANDLER_ARGS)
{
int error = 0;
@@ -852,314 +833,117 @@ sysctl_sctp_cleartrace(SYSCTL_HANDLER_ARGS)
#endif
+#define SCTP_UINT_SYSCTL(mib_name, var_name, prefix) \
+ static int \
+ sctp_sysctl_handle_##mib_name(SYSCTL_HANDLER_ARGS) \
+ { \
+ int error; \
+ uint32_t new; \
+ \
+ new = SCTP_BASE_SYSCTL(var_name); \
+ error = sysctl_handle_int(oidp, &new, 0, req); \
+ if ((error == 0) && (req->newptr != NULL)) { \
+ if ((new < prefix##_MIN) || \
+ (new > prefix##_MAX)) { \
+ error = EINVAL; \
+ } else { \
+ SCTP_BASE_SYSCTL(var_name) = new; \
+ } \
+ } \
+ return (error); \
+ } \
+ SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mib_name, \
+ CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, NULL, 0, \
+ sctp_sysctl_handle_##mib_name, "UI", prefix##_DESC);
/*
* sysctl definitions
*/
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_sendspace), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAXDGRAM_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_recvspace), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RECVSPACE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_auto_asconf), 0, sysctl_sctp_check, "IU",
- SCTPCTL_AUTOASCONF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_ecn_enable), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ECN_ENABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_strict_sacks), 0, sysctl_sctp_check, "IU",
- SCTPCTL_STRICT_SACKS_DESC);
-
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PEER_CHKOH_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_max_burst_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, fr_maxburst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_FRMAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAXCHUNKS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_hashtblsize), 0, sysctl_sctp_check, "IU",
- SCTPCTL_TCBHASHSIZE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_pcbtblsize), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PCBHASHSIZE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_min_split_point), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MIN_SPLIT_POINT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_chunkscale), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CHUNKSCALE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DELAYED_SACK_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_sack_freq_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SACK_FREQ_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SYS_RESOURCE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ASOC_RESOURCE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_HEARTBEAT_INTERVAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PMTU_RAISE_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SHUTDOWN_GUARD_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SECRET_LIFETIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rto_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTO_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rto_min_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTO_MIN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rto_initial_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTO_INITIAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_init_rto_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_INIT_RTO_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_VALID_COOKIE_LIFE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_INIT_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ASSOC_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PATH_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_pf_threshold, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_path_pf_threshold), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PATH_PF_THRESHOLD_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_add_more_threshold), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ADD_MORE_ON_OUTPUT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, incoming_streams, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_INCOMING_STREAMS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_OUTGOING_STREAMS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_cmt_on_off), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CMT_ON_OFF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nr_sack_on_off, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), 0, sysctl_sctp_check, "IU",
- SCTPCTL_NR_SACK_ON_OFF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_cmt_use_dac), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CMT_USE_DAC_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CWND_MAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ASCONF_AUTH_NOCHK_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_auth_disable), 0, sysctl_sctp_check, "IU",
- SCTPCTL_AUTH_DISABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_nat_friendly), 0, sysctl_sctp_check, "IU",
- SCTPCTL_NAT_FRIENDLY_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_L2_abc_variable), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ABC_L_VAR_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAX_CHAINED_MBUFS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_do_drain), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DO_SCTP_DRAIN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_hb_maxburst), 0, sysctl_sctp_check, "IU",
- SCTPCTL_HB_MAX_BURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ABORT_AT_LIMIT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_strict_data_order), 0, sysctl_sctp_check, "IU",
- SCTPCTL_STRICT_DATA_ORDER_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_min_residual), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MIN_RESIDUAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_max_retran_chunk), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAX_RETRAN_CHUNK_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_logging_level), 0, sysctl_sctp_check, "IU",
- SCTPCTL_LOGGING_LEVEL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_default_cc_module), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEFAULT_CC_MODULE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_ss_module, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_default_ss_module), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEFAULT_SS_MODULE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_default_frag_interleave), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_mobility_base), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MOBILITY_BASE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MOBILITY_FASTHANDOFF_DESC);
-
+SCTP_UINT_SYSCTL(sendspace, sctp_sendspace, SCTPCTL_MAXDGRAM)
+SCTP_UINT_SYSCTL(recvspace, sctp_recvspace, SCTPCTL_RECVSPACE)
+SCTP_UINT_SYSCTL(auto_asconf, sctp_auto_asconf, SCTPCTL_AUTOASCONF)
+SCTP_UINT_SYSCTL(ecn_enable, sctp_ecn_enable, SCTPCTL_ECN_ENABLE)
+SCTP_UINT_SYSCTL(pr_enable, sctp_pr_enable, SCTPCTL_PR_ENABLE)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_auth, "IU", SCTPCTL_AUTH_ENABLE_DESC);
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_asconf, "IU", SCTPCTL_ASCONF_ENABLE_DESC);
+SCTP_UINT_SYSCTL(reconfig_enable, sctp_reconfig_enable, SCTPCTL_RECONFIG_ENABLE)
+SCTP_UINT_SYSCTL(nrsack_enable, sctp_nrsack_enable, SCTPCTL_NRSACK_ENABLE)
+SCTP_UINT_SYSCTL(pktdrop_enable, sctp_pktdrop_enable, SCTPCTL_PKTDROP_ENABLE)
+SCTP_UINT_SYSCTL(peer_chkoh, sctp_peer_chunk_oh, SCTPCTL_PEER_CHKOH)
+SCTP_UINT_SYSCTL(maxburst, sctp_max_burst_default, SCTPCTL_MAXBURST)
+SCTP_UINT_SYSCTL(fr_maxburst, sctp_fr_max_burst_default, SCTPCTL_FRMAXBURST)
+SCTP_UINT_SYSCTL(maxchunks, sctp_max_chunks_on_queue, SCTPCTL_MAXCHUNKS)
+SCTP_UINT_SYSCTL(tcbhashsize, sctp_hashtblsize, SCTPCTL_TCBHASHSIZE)
+SCTP_UINT_SYSCTL(pcbhashsize, sctp_pcbtblsize, SCTPCTL_PCBHASHSIZE)
+SCTP_UINT_SYSCTL(min_split_point, sctp_min_split_point, SCTPCTL_MIN_SPLIT_POINT)
+SCTP_UINT_SYSCTL(chunkscale, sctp_chunkscale, SCTPCTL_CHUNKSCALE)
+SCTP_UINT_SYSCTL(delayed_sack_time, sctp_delayed_sack_time_default, SCTPCTL_DELAYED_SACK_TIME)
+SCTP_UINT_SYSCTL(sack_freq, sctp_sack_freq_default, SCTPCTL_SACK_FREQ)
+SCTP_UINT_SYSCTL(sys_resource, sctp_system_free_resc_limit, SCTPCTL_SYS_RESOURCE)
+SCTP_UINT_SYSCTL(asoc_resource, sctp_asoc_free_resc_limit, SCTPCTL_ASOC_RESOURCE)
+SCTP_UINT_SYSCTL(heartbeat_interval, sctp_heartbeat_interval_default, SCTPCTL_HEARTBEAT_INTERVAL)
+SCTP_UINT_SYSCTL(pmtu_raise_time, sctp_pmtu_raise_time_default, SCTPCTL_PMTU_RAISE_TIME)
+SCTP_UINT_SYSCTL(shutdown_guard_time, sctp_shutdown_guard_time_default, SCTPCTL_SHUTDOWN_GUARD_TIME)
+SCTP_UINT_SYSCTL(secret_lifetime, sctp_secret_lifetime_default, SCTPCTL_SECRET_LIFETIME)
+SCTP_UINT_SYSCTL(rto_max, sctp_rto_max_default, SCTPCTL_RTO_MAX)
+SCTP_UINT_SYSCTL(rto_min, sctp_rto_min_default, SCTPCTL_RTO_MIN)
+SCTP_UINT_SYSCTL(rto_initial, sctp_rto_initial_default, SCTPCTL_RTO_INITIAL)
+SCTP_UINT_SYSCTL(init_rto_max, sctp_init_rto_max_default, SCTPCTL_INIT_RTO_MAX)
+SCTP_UINT_SYSCTL(valid_cookie_life, sctp_valid_cookie_life_default, SCTPCTL_VALID_COOKIE_LIFE)
+SCTP_UINT_SYSCTL(init_rtx_max, sctp_init_rtx_max_default, SCTPCTL_INIT_RTX_MAX)
+SCTP_UINT_SYSCTL(assoc_rtx_max, sctp_assoc_rtx_max_default, SCTPCTL_ASSOC_RTX_MAX)
+SCTP_UINT_SYSCTL(path_rtx_max, sctp_path_rtx_max_default, SCTPCTL_PATH_RTX_MAX)
+SCTP_UINT_SYSCTL(path_pf_threshold, sctp_path_pf_threshold, SCTPCTL_PATH_PF_THRESHOLD)
+SCTP_UINT_SYSCTL(add_more_on_output, sctp_add_more_threshold, SCTPCTL_ADD_MORE_ON_OUTPUT)
+SCTP_UINT_SYSCTL(incoming_streams, sctp_nr_incoming_streams_default, SCTPCTL_INCOMING_STREAMS)
+SCTP_UINT_SYSCTL(outgoing_streams, sctp_nr_outgoing_streams_default, SCTPCTL_OUTGOING_STREAMS)
+SCTP_UINT_SYSCTL(cmt_on_off, sctp_cmt_on_off, SCTPCTL_CMT_ON_OFF)
+SCTP_UINT_SYSCTL(cmt_use_dac, sctp_cmt_use_dac, SCTPCTL_CMT_USE_DAC)
+SCTP_UINT_SYSCTL(cwnd_maxburst, sctp_use_cwnd_based_maxburst, SCTPCTL_CWND_MAXBURST)
+SCTP_UINT_SYSCTL(nat_friendly, sctp_nat_friendly, SCTPCTL_NAT_FRIENDLY)
+SCTP_UINT_SYSCTL(abc_l_var, sctp_L2_abc_variable, SCTPCTL_ABC_L_VAR)
+SCTP_UINT_SYSCTL(max_chained_mbufs, sctp_mbuf_threshold_count, SCTPCTL_MAX_CHAINED_MBUFS)
+SCTP_UINT_SYSCTL(do_sctp_drain, sctp_do_drain, SCTPCTL_DO_SCTP_DRAIN)
+SCTP_UINT_SYSCTL(hb_max_burst, sctp_hb_maxburst, SCTPCTL_HB_MAX_BURST)
+SCTP_UINT_SYSCTL(abort_at_limit, sctp_abort_if_one_2_one_hits_limit, SCTPCTL_ABORT_AT_LIMIT)
+SCTP_UINT_SYSCTL(min_residual, sctp_min_residual, SCTPCTL_MIN_RESIDUAL)
+SCTP_UINT_SYSCTL(max_retran_chunk, sctp_max_retran_chunk, SCTPCTL_MAX_RETRAN_CHUNK)
+SCTP_UINT_SYSCTL(log_level, sctp_logging_level, SCTPCTL_LOGGING_LEVEL)
+SCTP_UINT_SYSCTL(default_cc_module, sctp_default_cc_module, SCTPCTL_DEFAULT_CC_MODULE)
+SCTP_UINT_SYSCTL(default_ss_module, sctp_default_ss_module, SCTPCTL_DEFAULT_SS_MODULE)
+SCTP_UINT_SYSCTL(default_frag_interleave, sctp_default_frag_interleave, SCTPCTL_DEFAULT_FRAG_INTERLEAVE)
+SCTP_UINT_SYSCTL(mobility_base, sctp_mobility_base, SCTPCTL_MOBILITY_BASE)
+SCTP_UINT_SYSCTL(mobility_fasthandoff, sctp_mobility_fasthandoff, SCTPCTL_MOBILITY_FASTHANDOFF)
#if defined(SCTP_LOCAL_TRACE_BUF)
-SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD,
- &SCTP_BASE_SYSCTL(sctp_log), sctp_log,
- "SCTP logging (struct sctp_log)");
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_log), 0, sysctl_sctp_cleartrace, "IU",
- "Clear SCTP Logging buffer");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RD,
+ NULL, 0, sctp_sysctl_handle_trace_log, "S,sctplog", "SCTP logging (struct sctp_log)");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_trace_log_clear, "IU", "Clear SCTP Logging buffer");
#endif
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), 0, sysctl_sctp_udp_tunneling_check, "IU",
- SCTPCTL_UDP_TUNNELING_PORT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, enable_sack_immediately, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly_init, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), 0, sysctl_sctp_check, "IU",
- SCTPCTL_NAT_FRIENDLY_INITS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, vtag_time_wait, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_vtag_time_wait), 0, sysctl_sctp_check, "IU",
- SCTPCTL_TIME_WAIT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, buffer_splitting, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_buffer_splitting), 0, sysctl_sctp_check, "IU",
- SCTPCTL_BUFFER_SPLITTING_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, initial_cwnd, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_initial_cwnd), 0, sysctl_sctp_check, "IU",
- SCTPCTL_INITIAL_CWND_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_bw, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rttvar_bw), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_BW_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_rtt, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rttvar_rtt), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_RTT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_eqret, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rttvar_eqret), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_EQRET_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_steady_step, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_steady_step), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_STEADYS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, use_dcccecn, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_DCCCECN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, blackhole, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_blackhole), 0, sysctl_sctp_check, "IU",
- SCTPCTL_BLACKHOLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, diag_info_code, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_diag_info_code), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DIAG_INFO_CODE_DESC);
-
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_udp_tunneling, "IU", SCTPCTL_UDP_TUNNELING_PORT_DESC);
+SCTP_UINT_SYSCTL(enable_sack_immediately, sctp_enable_sack_immediately, SCTPCTL_SACK_IMMEDIATELY_ENABLE)
+SCTP_UINT_SYSCTL(nat_friendly_init, sctp_inits_include_nat_friendly, SCTPCTL_NAT_FRIENDLY_INITS)
+SCTP_UINT_SYSCTL(vtag_time_wait, sctp_vtag_time_wait, SCTPCTL_TIME_WAIT)
+SCTP_UINT_SYSCTL(buffer_splitting, sctp_buffer_splitting, SCTPCTL_BUFFER_SPLITTING)
+SCTP_UINT_SYSCTL(initial_cwnd, sctp_initial_cwnd, SCTPCTL_INITIAL_CWND)
+SCTP_UINT_SYSCTL(rttvar_bw, sctp_rttvar_bw, SCTPCTL_RTTVAR_BW)
+SCTP_UINT_SYSCTL(rttvar_rtt, sctp_rttvar_rtt, SCTPCTL_RTTVAR_RTT)
+SCTP_UINT_SYSCTL(rttvar_eqret, sctp_rttvar_eqret, SCTPCTL_RTTVAR_EQRET)
+SCTP_UINT_SYSCTL(rttvar_steady_step, sctp_steady_step, SCTPCTL_RTTVAR_STEADYS)
+SCTP_UINT_SYSCTL(use_dcccecn, sctp_use_dccc_ecn, SCTPCTL_RTTVAR_DCCCECN)
+SCTP_UINT_SYSCTL(blackhole, sctp_blackhole, SCTPCTL_BLACKHOLE)
+SCTP_UINT_SYSCTL(diag_info_code, sctp_diag_info_code, SCTPCTL_DIAG_INFO_CODE)
#ifdef SCTP_DEBUG
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_debug_on), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEBUG_DESC);
+SCTP_UINT_SYSCTL(debug, sctp_debug_on, SCTPCTL_DEBUG)
#endif
-
-
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, output_unlocked, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_output_unlocked), 0, sysctl_sctp_check, "IU",
- SCTPCTL_OUTPUT_UNLOCKED_DESC);
+SCTP_UINT_SYSCTL(output_unlocked, sctp_output_unlocked, SCTPCTL_OUTPUT_UNLOCKED)
#endif
-
-#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, stats,
- CTLTYPE_STRUCT | CTLFLAG_RW,
- 0, 0, sysctl_stat_get, "S,sctpstat",
- "SCTP statistics (struct sctp_stat)");
-#else
-SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW,
- &SCTP_BASE_STATS_SYSCTL, sctpstat,
- "SCTP statistics (struct sctp_stat)");
-#endif
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLTYPE_OPAQUE | CTLFLAG_RD,
- 0, 0, sctp_assoclist,
- "S,xassoc", "List of active SCTP associations");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_stats, "S,sctpstat", "SCTP statistics (struct sctp_stat)");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RD,
+ NULL, 0, sctp_sysctl_handle_assoclist, "S,xassoc", "List of active SCTP associations");
diff --git a/freebsd/sys/netinet/sctp_sysctl.h b/freebsd/sys/netinet/sctp_sysctl.h
index 432d36a4..959bd1e4 100644
--- a/freebsd/sys/netinet/sctp_sysctl.h
+++ b/freebsd/sys/netinet/sctp_sysctl.h
@@ -45,8 +45,13 @@ struct sctp_sysctl {
uint32_t sctp_auto_asconf;
uint32_t sctp_multiple_asconfs;
uint32_t sctp_ecn_enable;
+ uint32_t sctp_pr_enable;
+ uint32_t sctp_auth_enable;
+ uint32_t sctp_asconf_enable;
+ uint32_t sctp_reconfig_enable;
+ uint32_t sctp_nrsack_enable;
+ uint32_t sctp_pktdrop_enable;
uint32_t sctp_fr_max_burst_default;
- uint32_t sctp_strict_sacks;
uint32_t sctp_peer_chunk_oh;
uint32_t sctp_max_burst_default;
uint32_t sctp_max_chunks_on_queue;
@@ -76,18 +81,13 @@ struct sctp_sysctl {
uint32_t sctp_nr_outgoing_streams_default;
uint32_t sctp_cmt_on_off;
uint32_t sctp_cmt_use_dac;
- /* EY 5/5/08 - nr_sack flag variable */
- uint32_t sctp_nr_sack_on_off;
uint32_t sctp_use_cwnd_based_maxburst;
- uint32_t sctp_asconf_auth_nochk;
- uint32_t sctp_auth_disable;
uint32_t sctp_nat_friendly;
uint32_t sctp_L2_abc_variable;
uint32_t sctp_mbuf_threshold_count;
uint32_t sctp_do_drain;
uint32_t sctp_hb_maxburst;
uint32_t sctp_abort_if_one_2_one_hits_limit;
- uint32_t sctp_strict_data_order;
uint32_t sctp_min_residual;
uint32_t sctp_max_retran_chunk;
uint32_t sctp_logging_level;
@@ -141,7 +141,7 @@ struct sctp_sysctl {
#define SCTPCTL_AUTOASCONF_DESC "Enable SCTP Auto-ASCONF"
#define SCTPCTL_AUTOASCONF_MIN 0
#define SCTPCTL_AUTOASCONF_MAX 1
-#define SCTPCTL_AUTOASCONF_DEFAULT SCTP_DEFAULT_AUTO_ASCONF
+#define SCTPCTL_AUTOASCONF_DEFAULT 1
/* autoasconf: Enable SCTP Auto-ASCONF */
#define SCTPCTL_MULTIPLEASCONFS_DESC "Enable SCTP Muliple-ASCONFs"
@@ -155,11 +155,41 @@ struct sctp_sysctl {
#define SCTPCTL_ECN_ENABLE_MAX 1
#define SCTPCTL_ECN_ENABLE_DEFAULT 1
-/* strict_sacks: Enable SCTP Strict SACK checking */
-#define SCTPCTL_STRICT_SACKS_DESC "Enable SCTP Strict SACK checking"
-#define SCTPCTL_STRICT_SACKS_MIN 0
-#define SCTPCTL_STRICT_SACKS_MAX 1
-#define SCTPCTL_STRICT_SACKS_DEFAULT 1
+/* pr_enable: Enable PR-SCTP */
+#define SCTPCTL_PR_ENABLE_DESC "Enable PR-SCTP"
+#define SCTPCTL_PR_ENABLE_MIN 0
+#define SCTPCTL_PR_ENABLE_MAX 1
+#define SCTPCTL_PR_ENABLE_DEFAULT 1
+
+/* auth_enable: Enable SCTP AUTH function */
+#define SCTPCTL_AUTH_ENABLE_DESC "Enable SCTP AUTH function"
+#define SCTPCTL_AUTH_ENABLE_MIN 0
+#define SCTPCTL_AUTH_ENABLE_MAX 1
+#define SCTPCTL_AUTH_ENABLE_DEFAULT 1
+
+/* asconf_enable: Enable SCTP ASCONF */
+#define SCTPCTL_ASCONF_ENABLE_DESC "Enable SCTP ASCONF"
+#define SCTPCTL_ASCONF_ENABLE_MIN 0
+#define SCTPCTL_ASCONF_ENABLE_MAX 1
+#define SCTPCTL_ASCONF_ENABLE_DEFAULT 1
+
+/* reconfig_enable: Enable SCTP RE-CONFIG */
+#define SCTPCTL_RECONFIG_ENABLE_DESC "Enable SCTP RE-CONFIG"
+#define SCTPCTL_RECONFIG_ENABLE_MIN 0
+#define SCTPCTL_RECONFIG_ENABLE_MAX 1
+#define SCTPCTL_RECONFIG_ENABLE_DEFAULT 1
+
+/* nrsack_enable: Enable NR_SACK */
+#define SCTPCTL_NRSACK_ENABLE_DESC "Enable SCTP NR-SACK"
+#define SCTPCTL_NRSACK_ENABLE_MIN 0
+#define SCTPCTL_NRSACK_ENABLE_MAX 1
+#define SCTPCTL_NRSACK_ENABLE_DEFAULT 0
+
+/* pktdrop_enable: Enable SCTP Packet Drop Reports */
+#define SCTPCTL_PKTDROP_ENABLE_DESC "Enable SCTP PKTDROP"
+#define SCTPCTL_PKTDROP_ENABLE_MIN 0
+#define SCTPCTL_PKTDROP_ENABLE_MAX 1
+#define SCTPCTL_PKTDROP_ENABLE_DEFAULT 0
/* loopback_nocsum: Enable NO Csum on packets sent on loopback */
#define SCTPCTL_LOOPBACK_NOCSUM_DESC "Enable NO Csum on packets sent on loopback"
@@ -253,10 +283,10 @@ struct sctp_sysctl {
#define SCTPCTL_PMTU_RAISE_TIME_DEFAULT SCTP_DEF_PMTU_RAISE_SEC
/* shutdown_guard_time: Default shutdown guard timer in seconds */
-#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Default shutdown guard timer in seconds"
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Shutdown guard timer in seconds (0 means 5 times RTO.Max)"
#define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN 0
#define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX 0xFFFFFFFF
-#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT SCTP_DEF_MAX_SHUTDOWN_SEC
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT 0
/* secret_lifetime: Default secret lifetime in seconds */
#define SCTPCTL_SECRET_LIFETIME_DESC "Default secret lifetime in seconds"
@@ -342,12 +372,6 @@ struct sctp_sysctl {
#define SCTPCTL_CMT_ON_OFF_MAX SCTP_CMT_MAX
#define SCTPCTL_CMT_ON_OFF_DEFAULT SCTP_CMT_OFF
-/* EY - nr_sack_on_off: NR_SACK on/off flag */
-#define SCTPCTL_NR_SACK_ON_OFF_DESC "NR_SACK on/off flag"
-#define SCTPCTL_NR_SACK_ON_OFF_MIN 0
-#define SCTPCTL_NR_SACK_ON_OFF_MAX 1
-#define SCTPCTL_NR_SACK_ON_OFF_DEFAULT 0
-
/* cmt_use_dac: CMT DAC on/off flag */
#define SCTPCTL_CMT_USE_DAC_DESC "CMT DAC on/off flag"
#define SCTPCTL_CMT_USE_DAC_MIN 0
@@ -360,18 +384,6 @@ struct sctp_sysctl {
#define SCTPCTL_CWND_MAXBURST_MAX 1
#define SCTPCTL_CWND_MAXBURST_DEFAULT 1
-/* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */
-#define SCTPCTL_ASCONF_AUTH_NOCHK_DESC "Disable SCTP ASCONF AUTH requirement"
-#define SCTPCTL_ASCONF_AUTH_NOCHK_MIN 0
-#define SCTPCTL_ASCONF_AUTH_NOCHK_MAX 1
-#define SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT 0
-
-/* auth_disable: Disable SCTP AUTH function */
-#define SCTPCTL_AUTH_DISABLE_DESC "Disable SCTP AUTH function"
-#define SCTPCTL_AUTH_DISABLE_MIN 0
-#define SCTPCTL_AUTH_DISABLE_MAX 1
-#define SCTPCTL_AUTH_DISABLE_DEFAULT 0
-
/* nat_friendly: SCTP NAT friendly operation */
#define SCTPCTL_NAT_FRIENDLY_DESC "SCTP NAT friendly operation"
#define SCTPCTL_NAT_FRIENDLY_MIN 0
@@ -408,12 +420,6 @@ struct sctp_sysctl {
#define SCTPCTL_ABORT_AT_LIMIT_MAX 1
#define SCTPCTL_ABORT_AT_LIMIT_DEFAULT 0
-/* strict_data_order: Enforce strict data ordering, abort if control inside data */
-#define SCTPCTL_STRICT_DATA_ORDER_DESC "Enforce strict data ordering, abort if control inside data"
-#define SCTPCTL_STRICT_DATA_ORDER_MIN 0
-#define SCTPCTL_STRICT_DATA_ORDER_MAX 1
-#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT 0
-
/* min_residual: min residual in a data fragment leftover */
#define SCTPCTL_MIN_RESIDUAL_DESC "Minimum residual data chunk in second part of split"
#define SCTPCTL_MIN_RESIDUAL_MIN 20
@@ -454,13 +460,13 @@ struct sctp_sysctl {
#define SCTPCTL_MOBILITY_BASE_DESC "Enable SCTP base mobility"
#define SCTPCTL_MOBILITY_BASE_MIN 0
#define SCTPCTL_MOBILITY_BASE_MAX 1
-#define SCTPCTL_MOBILITY_BASE_DEFAULT SCTP_DEFAULT_MOBILITY_BASE
+#define SCTPCTL_MOBILITY_BASE_DEFAULT 0
/* mobility_fasthandoff: Enable SCTP fast handoff support */
#define SCTPCTL_MOBILITY_FASTHANDOFF_DESC "Enable SCTP fast handoff"
#define SCTPCTL_MOBILITY_FASTHANDOFF_MIN 0
#define SCTPCTL_MOBILITY_FASTHANDOFF_MAX 1
-#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT SCTP_DEFAULT_MOBILITY_FASTHANDOFF
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT 0
/* Enable SCTP/UDP tunneling port */
#define SCTPCTL_UDP_TUNNELING_PORT_DESC "Set the SCTP/UDP tunneling port"
@@ -472,7 +478,7 @@ struct sctp_sysctl {
#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC "Enable sending of the SACK-IMMEDIATELY-bit."
#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN 0
#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX 1
-#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN
+#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX
/* Enable sending of the NAT-FRIENDLY message */
#define SCTPCTL_NAT_FRIENDLY_INITS_DESC "Enable sending of the nat-friendly SCTP option on INITs."
@@ -525,7 +531,7 @@ struct sctp_sysctl {
#define SCTPCTL_RTTVAR_DCCCECN_MAX 1
#define SCTPCTL_RTTVAR_DCCCECN_DEFAULT 1 /* 0 means disable feature */
-#define SCTPCTL_BLACKHOLE_DESC "Enable SCTP blackholing"
+#define SCTPCTL_BLACKHOLE_DESC "Enable SCTP blackholing. See blackhole(4) for more details."
#define SCTPCTL_BLACKHOLE_MIN 0
#define SCTPCTL_BLACKHOLE_MAX 2
#define SCTPCTL_BLACKHOLE_DEFAULT SCTPCTL_BLACKHOLE_MIN
diff --git a/freebsd/sys/netinet/sctp_timer.c b/freebsd/sys/netinet/sctp_timer.c
index 7d010c7b..c851317b 100644
--- a/freebsd/sys/netinet/sctp_timer.c
+++ b/freebsd/sys/netinet/sctp_timer.c
@@ -51,7 +51,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_input.h>
#include <netinet/sctp.h>
#include <netinet/sctp_uio.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
void
@@ -85,7 +87,7 @@ sctp_audit_retranmission_queue(struct sctp_association *asoc)
asoc->sent_queue_cnt);
}
-int
+static int
sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
struct sctp_nets *net, uint16_t threshold)
{
@@ -110,8 +112,10 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
net->dest_state |= SCTP_ADDR_PF;
net->last_active = sctp_get_tick_count();
sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
- sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ inp, stcb, net,
+ SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
}
}
}
@@ -151,9 +155,9 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
/* Abort notification sends a ULP notify */
struct mbuf *op_err;
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION,
- "Association error couter exceeded");
- inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Association error counter exceeded");
+ inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_2;
sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
return (1);
}
@@ -337,7 +341,7 @@ sctp_find_alternate_net(struct sctp_tcb *stcb,
return (NULL);
}
}
- do {
+ for (;;) {
alt = TAILQ_NEXT(mnet, sctp_next);
if (alt == NULL) {
once++;
@@ -356,7 +360,6 @@ sctp_find_alternate_net(struct sctp_tcb *stcb,
}
alt->src_addr_selected = 0;
}
- /* sa_ignore NO_NULL_CHK */
if (((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
(alt->ro.ro_rt != NULL) &&
(!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))) {
@@ -364,14 +367,14 @@ sctp_find_alternate_net(struct sctp_tcb *stcb,
break;
}
mnet = alt;
- } while (alt != NULL);
+ }
if (alt == NULL) {
/* Case where NO insv network exists (dormant state) */
/* we rotate destinations */
once = 0;
mnet = net;
- do {
+ for (;;) {
if (mnet == NULL) {
return (TAILQ_FIRST(&stcb->asoc.nets));
}
@@ -382,15 +385,17 @@ sctp_find_alternate_net(struct sctp_tcb *stcb,
break;
}
alt = TAILQ_FIRST(&stcb->asoc.nets);
+ if (alt == NULL) {
+ break;
+ }
}
- /* sa_ignore NO_NULL_CHK */
if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
(alt != net)) {
/* Found an alternate address */
break;
}
mnet = alt;
- } while (alt != NULL);
+ }
}
if (alt == NULL) {
return (net);
@@ -405,7 +410,11 @@ sctp_backoff_on_timeout(struct sctp_tcb *stcb,
int num_marked, int num_abandoned)
{
if (net->RTO == 0) {
- net->RTO = stcb->asoc.minrto;
+ if (net->RTO_measured) {
+ net->RTO = stcb->asoc.minrto;
+ } else {
+ net->RTO = stcb->asoc.initial_rto;
+ }
}
net->RTO <<= 1;
if (net->RTO > stcb->asoc.maxrto) {
@@ -435,6 +444,11 @@ sctp_recover_sent_list(struct sctp_tcb *stcb)
asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
}
}
+ if ((asoc->strmout[chk->rec.data.stream_number].chunks_on_queues == 0) &&
+ (asoc->strmout[chk->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&asoc->strmout[chk->rec.data.stream_number].outqueue)) {
+ asoc->trigger_reset = 1;
+ }
TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
if (PR_SCTP_ENABLED(chk->flags)) {
if (asoc->pr_sctp_cnt != 0)
@@ -445,7 +459,7 @@ sctp_recover_sent_list(struct sctp_tcb *stcb)
sctp_free_bufspace(stcb, asoc, chk, 1);
sctp_m_freem(chk->data);
chk->data = NULL;
- if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(chk->flags)) {
+ if (asoc->prsctp_supported && PR_SCTP_BUF_ENABLED(chk->flags)) {
asoc->sent_queue_cnt_removeable--;
}
}
@@ -600,7 +614,7 @@ start_again:
continue;
}
}
- if (stcb->asoc.peer_supports_prsctp && PR_SCTP_TTL_ENABLED(chk->flags)) {
+ if (stcb->asoc.prsctp_supported && PR_SCTP_TTL_ENABLED(chk->flags)) {
/* Is it expired? */
if (timevalcmp(&now, &chk->rec.data.timetodrop, >)) {
/* Yes so drop it */
@@ -614,7 +628,7 @@ start_again:
continue;
}
}
- if (stcb->asoc.peer_supports_prsctp && PR_SCTP_RTX_ENABLED(chk->flags)) {
+ if (stcb->asoc.prsctp_supported && PR_SCTP_RTX_ENABLED(chk->flags)) {
/* Has it been retransmitted tv_sec times? */
if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
if (chk->data) {
@@ -650,7 +664,7 @@ start_again:
sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND_TO,
chk->whoTo->flight_size,
chk->book_size,
- (uintptr_t) chk->whoTo,
+ (uint32_t) (uintptr_t) chk->whoTo,
chk->rec.data.TSN_seq);
}
sctp_flight_size_decrease(chk);
@@ -778,7 +792,7 @@ start_again:
sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
chk->whoTo->flight_size,
chk->book_size,
- (uintptr_t) chk->whoTo,
+ (uint32_t) (uintptr_t) chk->whoTo,
chk->rec.data.TSN_seq);
}
sctp_flight_size_increase(chk);
@@ -957,7 +971,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
return (0);
}
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported) {
struct sctp_tmit_chunk *lchk;
lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
@@ -1043,9 +1057,9 @@ sctp_cookie_timer(struct sctp_inpcb *inp,
/* FOOBAR! */
struct mbuf *op_err;
- op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION,
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
"Cookie timer expired, but no cookie");
- inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
+ inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_3;
sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
} else {
#ifdef INVARIANTS
@@ -1064,8 +1078,8 @@ sctp_cookie_timer(struct sctp_inpcb *inp,
return (1);
}
/*
- * cleared theshold management now lets backoff the address & select
- * an alternate
+ * Cleared threshold management, now lets backoff the address and
+ * select an alternate
*/
stcb->asoc.dropped_special_cnt = 0;
sctp_backoff_on_timeout(stcb, cookie->whoTo, 1, 0, 0);
@@ -1110,8 +1124,8 @@ sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
return (1);
}
/*
- * cleared theshold management now lets backoff the address & select
- * an alternate
+ * Cleared threshold management, now lets backoff the address and
+ * select an alternate
*/
sctp_backoff_on_timeout(stcb, strrst->whoTo, 1, 0, 0);
alt = sctp_find_alternate_net(stcb, strrst->whoTo, 0);
@@ -1270,7 +1284,7 @@ sctp_shutdown_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
{
struct sctp_nets *alt;
- /* first threshold managment */
+ /* first threshold management */
if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
/* Assoc is over */
return (1);
@@ -1293,7 +1307,7 @@ sctp_shutdownack_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
{
struct sctp_nets *alt;
- /* first threshold managment */
+ /* first threshold management */
if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
/* Assoc is over */
return (1);
@@ -1482,11 +1496,15 @@ sctp_pathmtu_timer(struct sctp_inpcb *inp,
}
if (net->ro._s_addr) {
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
+#if defined(INET) || defined(INET6)
if (net->port) {
mtu -= sizeof(struct udphdr);
}
+#endif
if (mtu > next_mtu) {
net->mtu = next_mtu;
+ } else {
+ net->mtu = mtu;
}
}
}
diff --git a/freebsd/sys/netinet/sctp_timer.h b/freebsd/sys/netinet/sctp_timer.h
index fd9df804..6d409cdc 100644
--- a/freebsd/sys/netinet/sctp_timer.h
+++ b/freebsd/sys/netinet/sctp_timer.h
@@ -46,10 +46,6 @@ sctp_find_alternate_net(struct sctp_tcb *,
struct sctp_nets *, int mode);
int
-sctp_threshold_management(struct sctp_inpcb *, struct sctp_tcb *,
- struct sctp_nets *, uint16_t);
-
-int
sctp_t3rxt_timer(struct sctp_inpcb *, struct sctp_tcb *,
struct sctp_nets *);
int
diff --git a/freebsd/sys/netinet/sctp_uio.h b/freebsd/sys/netinet/sctp_uio.h
index 968fc980..e65b7b5e 100644
--- a/freebsd/sys/netinet/sctp_uio.h
+++ b/freebsd/sys/netinet/sctp_uio.h
@@ -134,20 +134,27 @@ struct sctp_extrcvinfo {
uint16_t sinfo_flags;
uint32_t sinfo_ppid;
uint32_t sinfo_context;
- uint32_t sinfo_timetolive;
+ uint32_t sinfo_timetolive; /* should have been sinfo_pr_value */
uint32_t sinfo_tsn;
uint32_t sinfo_cumtsn;
sctp_assoc_t sinfo_assoc_id;
- uint16_t sreinfo_next_flags;
- uint16_t sreinfo_next_stream;
- uint32_t sreinfo_next_aid;
- uint32_t sreinfo_next_length;
- uint32_t sreinfo_next_ppid;
+ uint16_t serinfo_next_flags;
+ uint16_t serinfo_next_stream;
+ uint32_t serinfo_next_aid;
+ uint32_t serinfo_next_length;
+ uint32_t serinfo_next_ppid;
uint16_t sinfo_keynumber;
uint16_t sinfo_keynumber_valid;
uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
};
+#define sinfo_pr_value sinfo_timetolive
+#define sreinfo_next_flags serinfo_next_flags
+#define sreinfo_next_stream serinfo_next_stream
+#define sreinfo_next_aid serinfo_next_aid
+#define sreinfo_next_length serinfo_next_length
+#define sreinfo_next_ppid serinfo_next_ppid
+
struct sctp_sndinfo {
uint16_t snd_sid;
uint16_t snd_flags;
@@ -249,18 +256,24 @@ struct sctp_snd_all_completes {
SCTP_SACK_IMMEDIATELY)) != 0)
/* for the endpoint */
-/* The lower byte is an enumeration of PR-SCTP policies */
+/* The lower four bits is an enumeration of PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000/* Reliable transfer */
#define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */
-#define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */
+#define SCTP_PR_SCTP_PRIO 0x0002/* Buffer based PR-SCTP */
+#define SCTP_PR_SCTP_BUF SCTP_PR_SCTP_PRIO /* For backwards compatibility */
#define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */
+#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_RTX
+#define SCTP_PR_SCTP_ALL 0x000f/* Used for aggregated stats */
#define PR_SCTP_POLICY(x) ((x) & 0x0f)
-#define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE)
+#define PR_SCTP_ENABLED(x) ((PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE) && \
+ (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_ALL))
#define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
#define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
#define PR_SCTP_RTX_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX)
-#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX)
+#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_MAX)
+#define PR_SCTP_VALID_POLICY(x) (PR_SCTP_POLICY(x) <= SCTP_PR_SCTP_MAX)
+
/* Stat's */
struct sctp_pcbinfo {
uint32_t ep_count;
@@ -306,12 +319,13 @@ struct sctp_assoc_change {
#define SCTP_CANT_STR_ASSOC 0x0005
/* sac_info values */
-#define SCTP_ASSOC_SUPPORTS_PR 0x01
-#define SCTP_ASSOC_SUPPORTS_AUTH 0x02
-#define SCTP_ASSOC_SUPPORTS_ASCONF 0x03
-#define SCTP_ASSOC_SUPPORTS_MULTIBUF 0x04
-#define SCTP_ASSOC_SUPPORTS_RE_CONFIG 0x05
-#define SCTP_ASSOC_SUPPORTS_MAX 0x05
+#define SCTP_ASSOC_SUPPORTS_PR 0x01
+#define SCTP_ASSOC_SUPPORTS_AUTH 0x02
+#define SCTP_ASSOC_SUPPORTS_ASCONF 0x03
+#define SCTP_ASSOC_SUPPORTS_MULTIBUF 0x04
+#define SCTP_ASSOC_SUPPORTS_RE_CONFIG 0x05
+#define SCTP_ASSOC_SUPPORTS_INTERLEAVING 0x06
+#define SCTP_ASSOC_SUPPORTS_MAX 0x06
/*
* Address event
*/
@@ -323,7 +337,6 @@ struct sctp_paddr_change {
uint32_t spc_state;
uint32_t spc_error;
sctp_assoc_t spc_assoc_id;
- uint8_t spc_padding[4];
};
/* paddr state values */
@@ -346,7 +359,7 @@ struct sctp_remote_error {
uint32_t sre_length;
uint16_t sre_error;
sctp_assoc_t sre_assoc_id;
- uint8_t sre_data[4];
+ uint8_t sre_data[];
};
/* data send failure event (deprecated) */
@@ -578,6 +591,7 @@ struct sctp_paddrthlds {
sctp_assoc_t spt_assoc_id;
uint16_t spt_pathmaxrxt;
uint16_t spt_pathpfthld;
+ uint16_t spt_pathcpthld;
};
struct sctp_paddrinfo {
@@ -720,6 +734,14 @@ struct sctp_udpencaps {
uint16_t sue_port;
};
+struct sctp_prstatus {
+ sctp_assoc_t sprstat_assoc_id;
+ uint16_t sprstat_sid;
+ uint16_t sprstat_policy;
+ uint64_t sprstat_abandoned_unsent;
+ uint64_t sprstat_abandoned_sent;
+};
+
struct sctp_cwnd_args {
struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */
uint32_t cwnd_new_value;/* cwnd in k */
@@ -1145,15 +1167,22 @@ union sctp_sockstore {
struct xsctp_inpcb {
uint32_t last;
uint32_t flags;
- uint32_t features;
+ uint64_t features;
uint32_t total_sends;
uint32_t total_recvs;
uint32_t total_nospaces;
uint32_t fragmentation_point;
uint16_t local_port;
- uint16_t qlen;
- uint16_t maxqlen;
- uint32_t extra_padding[32]; /* future */
+ uint16_t qlen_old;
+ uint16_t maxqlen_old;
+ void *socket;
+ uint32_t qlen;
+ uint32_t maxqlen;
+#if defined(__LP64__)
+ uint32_t extra_padding[27]; /* future */
+#else
+ uint32_t extra_padding[28]; /* future */
+#endif
};
struct xsctp_tcb {
@@ -1211,7 +1240,8 @@ struct xsctp_raddr {
struct sctp_timeval start_time; /* sctpAssocLocalRemEntry 8 */
uint32_t rtt;
uint32_t heartbeat_interval;
- uint32_t extra_padding[31]; /* future */
+ uint32_t ssthresh;
+ uint32_t extra_padding[30]; /* future */
};
#define SCTP_MAX_LOGGING_SIZE 30000
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
index b19a7499..1cbb7076 100644
--- a/freebsd/sys/netinet/sctp_usrreq.c
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_header.h>
#include <netinet/sctp_var.h>
#ifdef INET6
+#include <netinet6/sctp6_var.h>
#endif
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_output.h>
@@ -55,8 +56,8 @@ __FBSDID("$FreeBSD$");
-extern struct sctp_cc_functions sctp_cc_functions[];
-extern struct sctp_ss_functions sctp_ss_functions[];
+extern const struct sctp_cc_functions sctp_cc_functions[];
+extern const struct sctp_ss_functions sctp_ss_functions[];
void
sctp_init(void)
@@ -90,13 +91,15 @@ sctp_init(void)
#endif
}
-void
-sctp_finish(void)
+#ifdef VIMAGE
+static void
+sctp_finish(void *unused __unused)
{
sctp_pcb_finish();
}
-
+VNET_SYSUNINIT(sctp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, sctp_finish, NULL);
+#endif
void
sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint16_t nxtsz)
@@ -126,148 +129,55 @@ sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint16_t nxtsz)
if (chk->sent < SCTP_DATAGRAM_RESEND) {
sctp_flight_size_decrease(chk);
sctp_total_flight_decrease(stcb, chk);
- }
- if (chk->sent != SCTP_DATAGRAM_RESEND) {
+ chk->sent = SCTP_DATAGRAM_RESEND;
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ chk->rec.data.doing_fast_retransmit = 0;
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
+ chk->whoTo->flight_size,
+ chk->book_size,
+ (uint32_t) (uintptr_t) chk->whoTo,
+ chk->rec.data.TSN_seq);
+ }
+ /* Clear any time so NO RTT is being done */
+ chk->do_rtt = 0;
}
- chk->sent = SCTP_DATAGRAM_RESEND;
- chk->rec.data.doing_fast_retransmit = 0;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
- sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
- chk->whoTo->flight_size,
- chk->book_size,
- (uintptr_t) chk->whoTo,
- chk->rec.data.TSN_seq);
- }
- /* Clear any time so NO RTT is being done */
- chk->do_rtt = 0;
}
}
}
#ifdef INET
-static void
-sctp_notify_mbuf(struct sctp_inpcb *inp,
- struct sctp_tcb *stcb,
- struct sctp_nets *net,
- struct ip *ip,
- struct sctphdr *sh)
-{
- struct icmp *icmph;
- int totsz, tmr_stopped = 0;
- uint16_t nxtsz;
-
- /* protection */
- if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
- (ip == NULL) || (sh == NULL)) {
- if (stcb != NULL) {
- SCTP_TCB_UNLOCK(stcb);
- }
- return;
- }
- /* First job is to verify the vtag matches what I would send */
- if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
- sizeof(struct ip)));
- if (icmph->icmp_type != ICMP_UNREACH) {
- /* We only care about unreachable */
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- if (icmph->icmp_code != ICMP_UNREACH_NEEDFRAG) {
- /* not a unreachable message due to frag. */
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- totsz = ip->ip_len;
-
- nxtsz = ntohs(icmph->icmp_nextmtu);
- if (nxtsz == 0) {
- /*
- * old type router that does not tell us what the next size
- * mtu is. Rats we will have to guess (in a educated fashion
- * of course)
- */
- nxtsz = sctp_get_prev_mtu(totsz);
- }
- /* Stop any PMTU timer */
- if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
- tmr_stopped = 1;
- sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
- }
- /* Adjust destination size limit */
- if (net->mtu > nxtsz) {
- net->mtu = nxtsz;
- if (net->port) {
- net->mtu -= sizeof(struct udphdr);
- }
- }
- /* now what about the ep? */
- if (stcb->asoc.smallest_mtu > nxtsz) {
- sctp_pathmtu_adjustment(stcb, nxtsz);
- }
- if (tmr_stopped)
- sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
-
- SCTP_TCB_UNLOCK(stcb);
-}
-
-#endif
-
void
sctp_notify(struct sctp_inpcb *inp,
- struct ip *ip,
- struct sctphdr *sh,
- struct sockaddr *to,
struct sctp_tcb *stcb,
- struct sctp_nets *net)
+ struct sctp_nets *net,
+ uint8_t icmp_type,
+ uint8_t icmp_code,
+ uint16_t ip_len,
+ uint16_t next_mtu)
{
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
- struct icmp *icmph;
+ int timer_stopped;
- /* protection */
- if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
- (sh == NULL) || (to == NULL)) {
- if (stcb)
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- /* First job is to verify the vtag matches what I would send */
- if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
- sizeof(struct ip)));
- if (icmph->icmp_type != ICMP_UNREACH) {
+ if (icmp_type != ICMP_UNREACH) {
/* We only care about unreachable */
SCTP_TCB_UNLOCK(stcb);
return;
}
- if ((icmph->icmp_code == ICMP_UNREACH_NET) ||
- (icmph->icmp_code == ICMP_UNREACH_HOST) ||
- (icmph->icmp_code == ICMP_UNREACH_NET_UNKNOWN) ||
- (icmph->icmp_code == ICMP_UNREACH_HOST_UNKNOWN) ||
- (icmph->icmp_code == ICMP_UNREACH_ISOLATED) ||
- (icmph->icmp_code == ICMP_UNREACH_NET_PROHIB) ||
- (icmph->icmp_code == ICMP_UNREACH_HOST_PROHIB) ||
- (icmph->icmp_code == ICMP_UNREACH_FILTER_PROHIB)) {
-
- /*
- * Hmm reachablity problems we must examine closely. If its
- * not reachable, we may have lost a network. Or if there is
- * NO protocol at the other end named SCTP. well we consider
- * it a OOTB abort.
- */
+ if ((icmp_code == ICMP_UNREACH_NET) ||
+ (icmp_code == ICMP_UNREACH_HOST) ||
+ (icmp_code == ICMP_UNREACH_NET_UNKNOWN) ||
+ (icmp_code == ICMP_UNREACH_HOST_UNKNOWN) ||
+ (icmp_code == ICMP_UNREACH_ISOLATED) ||
+ (icmp_code == ICMP_UNREACH_NET_PROHIB) ||
+ (icmp_code == ICMP_UNREACH_HOST_PROHIB) ||
+ (icmp_code == ICMP_UNREACH_FILTER_PROHIB)) {
+ /* Mark the net unreachable. */
if (net->dest_state & SCTP_ADDR_REACHABLE) {
- /* Ok that destination is NOT reachable */
+ /* OK, that destination is NOT reachable. */
net->dest_state &= ~SCTP_ADDR_REACHABLE;
net->dest_state &= ~SCTP_ADDR_PF;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
@@ -275,15 +185,9 @@ sctp_notify(struct sctp_inpcb *inp,
(void *)net, SCTP_SO_NOT_LOCKED);
}
SCTP_TCB_UNLOCK(stcb);
- } else if ((icmph->icmp_code == ICMP_UNREACH_PROTOCOL) ||
- (icmph->icmp_code == ICMP_UNREACH_PORT)) {
- /*
- * Here the peer is either playing tricks on us, including
- * an address that belongs to someone who does not support
- * SCTP OR was a userland implementation that shutdown and
- * now is dead. In either case treat it like a OOTB abort
- * with no TCB
- */
+ } else if ((icmp_code == ICMP_UNREACH_PROTOCOL) ||
+ (icmp_code == ICMP_UNREACH_PORT)) {
+ /* Treat it like an ABORT. */
sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
@@ -293,72 +197,141 @@ sctp_notify(struct sctp_inpcb *inp,
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
#endif
/* no need to unlock here, since the TCB is gone */
+ } else if (icmp_code == ICMP_UNREACH_NEEDFRAG) {
+ /* Find the next (smaller) MTU */
+ if (next_mtu == 0) {
+ /*
+ * Old type router that does not tell us what the
+ * next MTU is. Rats we will have to guess (in a
+ * educated fashion of course).
+ */
+ next_mtu = sctp_get_prev_mtu(ip_len);
+ }
+ /* Stop the PMTU timer. */
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ timer_stopped = 1;
+ sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
+ } else {
+ timer_stopped = 0;
+ }
+ /* Update the path MTU. */
+ if (net->mtu > next_mtu) {
+ net->mtu = next_mtu;
+ if (net->port) {
+ net->mtu -= sizeof(struct udphdr);
+ }
+ }
+ /* Update the association MTU */
+ if (stcb->asoc.smallest_mtu > next_mtu) {
+ sctp_pathmtu_adjustment(stcb, next_mtu);
+ }
+ /* Finally, start the PMTU timer if it was running before. */
+ if (timer_stopped) {
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+ }
+ SCTP_TCB_UNLOCK(stcb);
} else {
SCTP_TCB_UNLOCK(stcb);
}
}
-#ifdef INET
void
-sctp_ctlinput(cmd, sa, vip)
- int cmd;
- struct sockaddr *sa;
- void *vip;
+sctp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
- struct ip *ip = vip;
+ struct ip *outer_ip;
+ struct ip *inner_ip;
struct sctphdr *sh;
- uint32_t vrf_id;
+ struct icmp *icmp;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
+ struct sctp_init_chunk *ch;
+ struct sockaddr_in src, dst;
- /* FIX, for non-bsd is this right? */
- vrf_id = SCTP_DEFAULT_VRFID;
if (sa->sa_family != AF_INET ||
((struct sockaddr_in *)sa)->sin_addr.s_addr == INADDR_ANY) {
return;
}
if (PRC_IS_REDIRECT(cmd)) {
- ip = 0;
+ vip = NULL;
} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
return;
}
- if (ip) {
- struct sctp_inpcb *inp = NULL;
- struct sctp_tcb *stcb = NULL;
- struct sctp_nets *net = NULL;
- struct sockaddr_in to, from;
-
- sh = (struct sctphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- bzero(&to, sizeof(to));
- bzero(&from, sizeof(from));
- from.sin_family = to.sin_family = AF_INET;
- from.sin_len = to.sin_len = sizeof(to);
- from.sin_port = sh->src_port;
- from.sin_addr = ip->ip_src;
- to.sin_port = sh->dest_port;
- to.sin_addr = ip->ip_dst;
-
+ if (vip != NULL) {
+ inner_ip = (struct ip *)vip;
+ icmp = (struct icmp *)((caddr_t)inner_ip -
+ (sizeof(struct icmp) - sizeof(struct ip)));
+ outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip));
+ sh = (struct sctphdr *)((caddr_t)inner_ip + (inner_ip->ip_hl << 2));
+ memset(&src, 0, sizeof(struct sockaddr_in));
+ src.sin_family = AF_INET;
+ src.sin_len = sizeof(struct sockaddr_in);
+ src.sin_port = sh->src_port;
+ src.sin_addr = inner_ip->ip_src;
+ memset(&dst, 0, sizeof(struct sockaddr_in));
+ dst.sin_family = AF_INET;
+ dst.sin_len = sizeof(struct sockaddr_in);
+ dst.sin_port = sh->dest_port;
+ dst.sin_addr = inner_ip->ip_dst;
/*
- * 'to' holds the dest of the packet that failed to be sent.
- * 'from' holds our local endpoint address. Thus we reverse
- * the to and the from in the lookup.
+ * 'dst' holds the dest of the packet that failed to be
+ * sent. 'src' holds our local endpoint address. Thus we
+ * reverse the dst and the src in the lookup.
*/
- stcb = sctp_findassociation_addr_sa((struct sockaddr *)&to,
- (struct sockaddr *)&from,
- &inp, &net, 1, vrf_id);
- if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
- if (cmd != PRC_MSGSIZE) {
- sctp_notify(inp, ip, sh,
- (struct sockaddr *)&to, stcb,
- net);
+ inp = NULL;
+ net = NULL;
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
+ (struct sockaddr *)&src,
+ &inp, &net, 1,
+ SCTP_DEFAULT_VRFID);
+ if ((stcb != NULL) &&
+ (net != NULL) &&
+ (inp != NULL)) {
+ /* Check the verification tag */
+ if (ntohl(sh->v_tag) != 0) {
+ /*
+ * This must be the verification tag used
+ * for sending out packets. We don't
+ * consider packets reflecting the
+ * verification tag.
+ */
+ if (ntohl(sh->v_tag) != stcb->asoc.peer_vtag) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
} else {
- /* handle possible ICMP size messages */
- sctp_notify_mbuf(inp, stcb, net, ip, sh);
+ if (ntohs(outer_ip->ip_len) >=
+ sizeof(struct ip) +
+ 8 + (inner_ip->ip_hl << 2) + 20) {
+ /*
+ * In this case we can check if we
+ * got an INIT chunk and if the
+ * initiate tag matches.
+ */
+ ch = (struct sctp_init_chunk *)(sh + 1);
+ if ((ch->ch.chunk_type != SCTP_INITIATION) ||
+ (ntohl(ch->init.initiate_tag) != stcb->asoc.my_vtag)) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
}
+ sctp_notify(inp, stcb, net,
+ icmp->icmp_type,
+ icmp->icmp_code,
+ ntohs(inner_ip->ip_len),
+ ntohs(icmp->icmp_nextmtu));
} else {
if ((stcb == NULL) && (inp != NULL)) {
/* reduce ref-count */
@@ -489,13 +462,8 @@ sctp_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUS
int error;
uint32_t vrf_id = SCTP_DEFAULT_VRFID;
-#ifdef IPSEC
- uint32_t flags;
-
-#endif
-
inp = (struct sctp_inpcb *)so->so_pcb;
- if (inp != 0) {
+ if (inp != NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
@@ -515,33 +483,6 @@ sctp_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUS
ip_inp = &inp->ip_inp.inp;
ip_inp->inp_vflag |= INP_IPV4;
ip_inp->inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
-#ifdef IPSEC
- error = ipsec_init_policy(so, &ip_inp->inp_sp);
-#ifdef SCTP_LOG_CLOSING
- sctp_log_closing(inp, NULL, 17);
-#endif
- if (error != 0) {
-try_again:
- flags = inp->sctp_flags;
- if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
- (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
-#ifdef SCTP_LOG_CLOSING
- sctp_log_closing(inp, NULL, 15);
-#endif
- SCTP_INP_WUNLOCK(inp);
- sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
- SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
- } else {
- flags = inp->sctp_flags;
- if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
- goto try_again;
- } else {
- SCTP_INP_WUNLOCK(inp);
- }
- }
- return (error);
- }
-#endif /* IPSEC */
SCTP_INP_WUNLOCK(inp);
return (0);
}
@@ -759,7 +700,7 @@ sctp_disconnect(struct socket *so)
/* Left with Data unread */
struct mbuf *err;
- err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+ err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA);
if (err) {
/*
* Fill in the user
@@ -780,7 +721,8 @@ sctp_disconnect(struct socket *so)
(SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
/* No unlock tcb assoc is gone */
return (0);
}
@@ -788,7 +730,7 @@ sctp_disconnect(struct socket *so)
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->stream_queue_cnt == 0)) {
/* there is nothing queued to send, so done */
- if (asoc->locked_on_sending) {
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
goto abort_anyway;
}
if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
@@ -837,18 +779,8 @@ sctp_disconnect(struct socket *so)
asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
netp);
- if (asoc->locked_on_sending) {
- /* Locked to send out the data */
- struct sctp_stream_queue_pending *sp;
-
- sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
- if (sp == NULL) {
- SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
- asoc->locked_on_sending->stream_no);
- } else {
- if ((sp->length == 0) && (sp->msg_is_complete == 0))
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
- }
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
}
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
@@ -865,7 +797,8 @@ sctp_disconnect(struct socket *so)
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
SCTP_INP_RUNLOCK(inp);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
return (0);
} else {
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
@@ -957,14 +890,15 @@ sctp_shutdown(struct socket *so)
SCTP_INP_RUNLOCK(inp);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
return (EOPNOTSUPP);
- }
- /*
- * Ok if we reach here its the TCP model and it is either a SHUT_WR
- * or SHUT_RDWR. This means we put the shutdown flag against it.
- */
- {
+ } else {
+ /*
+ * Ok, if we reach here its the TCP model and it is either a
+ * SHUT_WR or SHUT_RDWR. This means we put the shutdown flag
+ * against it.
+ */
struct sctp_tcb *stcb;
struct sctp_association *asoc;
+ struct sctp_nets *netp;
if ((so->so_state &
(SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
@@ -976,7 +910,7 @@ sctp_shutdown(struct socket *so)
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
/*
- * Ok we hit the case that the shutdown call was
+ * Ok, we hit the case that the shutdown call was
* made after an abort or something. Nothing to do
* now.
*/
@@ -985,66 +919,50 @@ sctp_shutdown(struct socket *so)
}
SCTP_TCB_LOCK(stcb);
asoc = &stcb->asoc;
- if (TAILQ_EMPTY(&asoc->send_queue) &&
+ if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ }
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_ECHOED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN)) {
+ /*
+ * If we are not in or before ESTABLISHED, there is
+ * no protocol action required.
+ */
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ }
+ if (stcb->asoc.alternate) {
+ netp = stcb->asoc.alternate;
+ } else {
+ netp = stcb->asoc.primary_destination;
+ }
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+ TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->stream_queue_cnt == 0)) {
- if (asoc->locked_on_sending) {
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
goto abort_anyway;
}
/* there is nothing queued to send, so I'm done... */
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
- /* only send SHUTDOWN the first time through */
- struct sctp_nets *netp;
-
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
- SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- }
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
- sctp_stop_timers_for_shutdown(stcb);
- if (stcb->asoc.alternate) {
- netp = stcb->asoc.alternate;
- } else {
- netp = stcb->asoc.primary_destination;
- }
- sctp_send_shutdown(stcb, netp);
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
- stcb->sctp_ep, stcb, netp);
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
- stcb->sctp_ep, stcb, netp);
- sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
- }
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown(stcb, netp);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+ stcb->sctp_ep, stcb, netp);
} else {
/*
- * we still got (or just got) data to send, so set
- * SHUTDOWN_PENDING
+ * We still got (or just got) data to send, so set
+ * SHUTDOWN_PENDING.
*/
- struct sctp_nets *netp;
-
- if (stcb->asoc.alternate) {
- netp = stcb->asoc.alternate;
- } else {
- netp = stcb->asoc.primary_destination;
- }
-
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
- netp);
-
- if (asoc->locked_on_sending) {
- /* Locked to send out the data */
- struct sctp_stream_queue_pending *sp;
-
- sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
- if (sp == NULL) {
- SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
- asoc->locked_on_sending->stream_no);
- } else {
- if ((sp->length == 0) && (sp->msg_is_complete == 0)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
- }
- }
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
}
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
@@ -1056,16 +974,20 @@ sctp_shutdown(struct socket *so)
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
sctp_abort_an_association(stcb->sctp_ep, stcb,
op_err, SCTP_SO_LOCKED);
- goto skip_unlock;
- } else {
- sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
}
}
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, netp);
+ /*
+ * XXX: Why do this in the case where we have still data
+ * queued?
+ */
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
}
-skip_unlock:
- SCTP_INP_RUNLOCK(inp);
- return (0);
}
/*
@@ -1190,7 +1112,7 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
if (ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/*
* we skip
@@ -1235,7 +1157,7 @@ sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
if (ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/*
* we skip
@@ -1375,10 +1297,14 @@ sctp_count_max_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id)
switch (sctp_ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
+#ifdef INET6
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
cnt += sizeof(struct sockaddr_in6);
else
cnt += sizeof(struct sockaddr_in);
+#else
+ cnt += sizeof(struct sockaddr_in);
+#endif
break;
#endif
#ifdef INET6
@@ -1398,10 +1324,14 @@ sctp_count_max_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id)
switch (laddr->ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
+#ifdef INET6
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
cnt += sizeof(struct sockaddr_in6);
else
cnt += sizeof(struct sockaddr_in);
+#else
+ cnt += sizeof(struct sockaddr_in);
+#endif
break;
#endif
#ifdef INET6
@@ -1437,7 +1367,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
int creat_lock_on = 0;
struct sctp_tcb *stcb = NULL;
struct sockaddr *sa;
- int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
+ unsigned int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
uint32_t vrf_id;
int bad_addresses = 0;
sctp_assoc_t *a_id;
@@ -1473,10 +1403,10 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
error = EFAULT;
goto out_now;
}
- totaddrp = (int *)optval;
+ totaddrp = (unsigned int *)optval;
totaddr = *totaddrp;
sa = (struct sockaddr *)(totaddrp + 1);
- stcb = sctp_connectx_helper_find(inp, sa, &totaddr, &num_v4, &num_v6, &error, (optsize - sizeof(int)), &bad_addresses);
+ stcb = sctp_connectx_helper_find(inp, sa, &totaddr, &num_v4, &num_v6, &error, (unsigned int)(optsize - sizeof(int)), &bad_addresses);
if ((stcb != NULL) || bad_addresses) {
/* Already have or am bring up an association */
SCTP_ASOC_CREATE_UNLOCK(inp);
@@ -1525,6 +1455,8 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
/* We are GOOD to go */
stcb = sctp_aloc_assoc(inp, sa, &error, 0, vrf_id,
+ inp->sctp_ep.pre_open_stream_count,
+ inp->sctp_ep.port,
(struct thread *)p
);
if (stcb == NULL) {
@@ -1557,7 +1489,8 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
/* Fill in the return id */
if (error) {
- (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
+ (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
goto out_now;
}
a_id = (sctp_assoc_t *) optval;
@@ -1575,11 +1508,6 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
}
SCTP_TCB_UNLOCK(stcb);
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
- stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
- /* Set the connected flag so we can queue data */
- soisconnecting(so);
- }
out_now:
if (creat_lock_on) {
SCTP_ASOC_CREATE_UNLOCK(inp);
@@ -1752,6 +1680,37 @@ flags_out:
*optsize = sizeof(uint32_t);
break;
}
+ case SCTP_INTERLEAVING_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.idata_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ if (inp->idata_supported) {
+ av->assoc_value = 1;
+ } else {
+ av->assoc_value = 0;
+ }
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
case SCTP_CMT_ON_OFF:
{
struct sctp_assoc_value *av;
@@ -1905,8 +1864,15 @@ flags_out:
uint32_t *value, cnt;
SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
- cnt = 0;
SCTP_INP_RLOCK(inp);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /* Can't do this for a 1-1 socket */
+ error = EINVAL;
+ SCTP_INP_RUNLOCK(inp);
+ break;
+ }
+ cnt = 0;
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
cnt++;
}
@@ -1918,15 +1884,28 @@ flags_out:
case SCTP_GET_ASSOC_ID_LIST:
{
struct sctp_assoc_ids *ids;
- unsigned int at, limit;
+ uint32_t at;
+ size_t limit;
SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize);
+ SCTP_INP_RLOCK(inp);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /* Can't do this for a 1-1 socket */
+ error = EINVAL;
+ SCTP_INP_RUNLOCK(inp);
+ break;
+ }
at = 0;
limit = (*optsize - sizeof(uint32_t)) / sizeof(sctp_assoc_t);
- SCTP_INP_RLOCK(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
if (at < limit) {
ids->gaids_assoc_id[at++] = sctp_get_associd(stcb);
+ if (at == 0) {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
} else {
error = EINVAL;
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -2219,23 +2198,27 @@ flags_out:
size = 0;
/* Count the sizes */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
- size += sizeof(struct sockaddr_in6);
- } else {
- switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+ switch (net->ro._l_addr.sa.sa_family) {
#ifdef INET
- case AF_INET:
+ case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ size += sizeof(struct sockaddr_in6);
+ } else {
size += sizeof(struct sockaddr_in);
- break;
+ }
+#else
+ size += sizeof(struct sockaddr_in);
+#endif
+ break;
#endif
#ifdef INET6
- case AF_INET6:
- size += sizeof(struct sockaddr_in6);
- break;
+ case AF_INET6:
+ size += sizeof(struct sockaddr_in6);
+ break;
#endif
- default:
- break;
- }
+ default:
+ break;
}
}
SCTP_TCB_UNLOCK(stcb);
@@ -2267,24 +2250,28 @@ flags_out:
sas = (struct sockaddr_storage *)&saddr->addr[0];
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
- cpsz = sizeof(struct sockaddr_in6);
- } else {
- switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+ switch (net->ro._l_addr.sa.sa_family) {
#ifdef INET
- case AF_INET:
+ case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ cpsz = sizeof(struct sockaddr_in6);
+ } else {
cpsz = sizeof(struct sockaddr_in);
- break;
+ }
+#else
+ cpsz = sizeof(struct sockaddr_in);
+#endif
+ break;
#endif
#ifdef INET6
- case AF_INET6:
- cpsz = sizeof(struct sockaddr_in6);
- break;
+ case AF_INET6:
+ cpsz = sizeof(struct sockaddr_in6);
+ break;
#endif
- default:
- cpsz = 0;
- break;
- }
+ default:
+ cpsz = 0;
+ break;
}
if (cpsz == 0) {
break;
@@ -2295,15 +2282,15 @@ flags_out:
}
#if defined(INET) && defined(INET6)
if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
- (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET)) {
+ (net->ro._l_addr.sa.sa_family == AF_INET)) {
/* Must map the address */
- in6_sin_2_v4mapsin6((struct sockaddr_in *)&net->ro._l_addr,
+ in6_sin_2_v4mapsin6(&net->ro._l_addr.sin,
(struct sockaddr_in6 *)sas);
} else {
-#endif
memcpy(sas, &net->ro._l_addr, cpsz);
-#if defined(INET) && defined(INET6)
}
+#else
+ memcpy(sas, &net->ro._l_addr, cpsz);
#endif
((struct sockaddr_in *)sas)->sin_port = stcb->rport;
@@ -2340,13 +2327,35 @@ flags_out:
{
struct sctp_paddrparams *paddrp;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize);
SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+#if defined(INET) && defined(INET6)
+ if (paddrp->spp_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
+#else
+ addr = (struct sockaddr *)&paddrp->spp_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -2355,22 +2364,20 @@ flags_out:
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddrp->spp_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&paddrp->spp_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -2379,10 +2386,10 @@ flags_out:
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
@@ -2398,21 +2405,27 @@ flags_out:
break;
}
}
- if (stcb) {
+ if (stcb != NULL) {
/* Applies to the specific association */
paddrp->spp_flags = 0;
- if (net) {
- int ovh;
-
- if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ovh = SCTP_MED_OVERHEAD;
- } else {
- ovh = SCTP_MED_V4_OVERHEAD;
- }
-
+ if (net != NULL) {
paddrp->spp_hbinterval = net->heart_beat_delay;
paddrp->spp_pathmaxrxt = net->failure_threshold;
- paddrp->spp_pathmtu = net->mtu - ovh;
+ paddrp->spp_pathmtu = net->mtu;
+ switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
/* get flags for HB */
if (net->dest_state & SCTP_ADDR_NOHB) {
paddrp->spp_flags |= SPP_HB_DISABLE;
@@ -2421,9 +2434,9 @@ flags_out:
}
/* get flags for PMTU */
if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
- paddrp->spp_flags |= SPP_PMTUD_ENABLE;
- } else {
paddrp->spp_flags |= SPP_PMTUD_DISABLE;
+ } else {
+ paddrp->spp_flags |= SPP_PMTUD_ENABLE;
}
if (net->dscp & 0x01) {
paddrp->spp_dscp = net->dscp & 0xfc;
@@ -2442,7 +2455,7 @@ flags_out:
* value
*/
paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure;
- paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc);
+ paddrp->spp_pathmtu = 0;
if (stcb->asoc.default_dscp & 0x01) {
paddrp->spp_dscp = stcb->asoc.default_dscp & 0xfc;
paddrp->spp_flags |= SPP_DSCP;
@@ -2517,13 +2530,35 @@ flags_out:
{
struct sctp_paddrinfo *paddri;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize);
SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&paddri->spinfo_address);
+#if defined(INET) && defined(INET6)
+ if (paddri->spinfo_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&paddri->spinfo_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&paddri->spinfo_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&paddri->spinfo_address;
+ }
+#else
+ addr = (struct sockaddr *)&paddri->spinfo_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -2532,14 +2567,15 @@ flags_out:
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddri->spinfo_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if ((stcb) && (net)) {
+ if ((stcb != NULL) && (net != NULL)) {
if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
/* It's unconfirmed */
paddri->spinfo_state = SCTP_UNCONFIRMED;
@@ -2555,10 +2591,24 @@ flags_out:
paddri->spinfo_rto = net->RTO;
paddri->spinfo_assoc_id = sctp_get_associd(stcb);
paddri->spinfo_mtu = net->mtu;
+ switch (addr->sa_family) {
+#if defined(INET)
+ case AF_INET:
+ paddri->spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#if defined(INET6)
+ case AF_INET6:
+ paddri->spinfo_mtu -= SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
SCTP_TCB_UNLOCK(stcb);
*optsize = sizeof(struct sctp_paddrinfo);
} else {
- if (stcb) {
+ if (stcb != NULL) {
SCTP_TCB_UNLOCK(stcb);
}
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
@@ -2588,12 +2638,7 @@ flags_out:
error = EINVAL;
break;
}
- /*
- * I think passing the state is fine since
- * sctp_constants.h will be available to the user
- * land.
- */
- sstat->sstat_state = stcb->asoc.state;
+ sstat->sstat_state = sctp_map_assoc_state(stcb->asoc.state);
sstat->sstat_assoc_id = sctp_get_associd(stcb);
sstat->sstat_rwnd = stcb->asoc.peers_rwnd;
sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt;
@@ -2631,6 +2676,20 @@ flags_out:
sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT;
sstat->sstat_primary.spinfo_rto = net->RTO;
sstat->sstat_primary.spinfo_mtu = net->mtu;
+ switch (stcb->asoc.primary_destination->ro._l_addr.sa.sa_family) {
+#if defined(INET)
+ case AF_INET:
+ sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#if defined(INET6)
+ case AF_INET6:
+ sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
SCTP_TCB_UNLOCK(stcb);
*optsize = sizeof(struct sctp_status);
@@ -2775,16 +2834,32 @@ flags_out:
SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id);
if (stcb) {
- /* simply copy out the sockaddr_storage... */
- size_t len;
+ union sctp_sockstore *addr;
- len = *optsize;
- if (len > stcb->asoc.primary_destination->ro._l_addr.sa.sa_len)
- len = stcb->asoc.primary_destination->ro._l_addr.sa.sa_len;
-
- memcpy(&ssp->ssp_addr,
- &stcb->asoc.primary_destination->ro._l_addr,
- len);
+ addr = &stcb->asoc.primary_destination->ro._l_addr;
+ switch (addr->sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ in6_sin_2_v4mapsin6(&addr->sin,
+ (struct sockaddr_in6 *)&ssp->ssp_addr);
+ } else {
+ memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
+ }
+#else
+ memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
+#endif
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ memcpy(&ssp->ssp_addr, &addr->sin6, sizeof(struct sockaddr_in6));
+ break;
+#endif
+ default:
+ break;
+ }
SCTP_TCB_UNLOCK(stcb);
*optsize = sizeof(struct sctp_setprim);
} else {
@@ -3124,13 +3199,35 @@ flags_out:
{
struct sctp_paddrthlds *thlds;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, *optsize);
SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_address);
+#if defined(INET) && defined(INET6)
+ if (thlds->spt_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
+#else
+ addr = (struct sockaddr *)&thlds->spt_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -3139,22 +3236,20 @@ flags_out:
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&thlds->spt_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&thlds->spt_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -3163,10 +3258,10 @@ flags_out:
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
@@ -3182,13 +3277,15 @@ flags_out:
break;
}
}
- if (stcb) {
- if (net) {
+ if (stcb != NULL) {
+ if (net != NULL) {
thlds->spt_pathmaxrxt = net->failure_threshold;
thlds->spt_pathpfthld = net->pf_threshold;
+ thlds->spt_pathcpthld = 0xffff;
} else {
thlds->spt_pathmaxrxt = stcb->asoc.def_net_failure;
thlds->spt_pathpfthld = stcb->asoc.def_net_pf_threshold;
+ thlds->spt_pathcpthld = 0xffff;
}
thlds->spt_assoc_id = sctp_get_associd(stcb);
SCTP_TCB_UNLOCK(stcb);
@@ -3200,6 +3297,7 @@ flags_out:
SCTP_INP_RLOCK(inp);
thlds->spt_pathmaxrxt = inp->sctp_ep.def_net_failure;
thlds->spt_pathpfthld = inp->sctp_ep.def_net_pf_threshold;
+ thlds->spt_pathcpthld = 0xffff;
SCTP_INP_RUNLOCK(inp);
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -3215,12 +3313,35 @@ flags_out:
{
struct sctp_udpencaps *encaps;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, *optsize);
SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
+#if defined(INET) && defined(INET6)
+ if (encaps->sue_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
+#else
+ addr = (struct sockaddr *)&encaps->sue_address;
+#endif
if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address);
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -3231,21 +3352,18 @@ flags_out:
*/
net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&encaps->sue_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -3254,10 +3372,10 @@ flags_out:
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
@@ -3273,7 +3391,7 @@ flags_out:
break;
}
}
- if (stcb) {
+ if (stcb != NULL) {
if (net) {
encaps->sue_port = net->port;
} else {
@@ -3297,6 +3415,195 @@ flags_out:
}
break;
}
+ case SCTP_ECN_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.ecn_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->ecn_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_PR_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.prsctp_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->prsctp_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_AUTH_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.auth_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->auth_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_ASCONF_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.asconf_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->asconf_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_RECONFIG_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.reconfig_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->reconfig_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_NRSACK_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.nrsack_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->nrsack_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_PKTDROP_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.pktdrop_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->pktdrop_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
case SCTP_ENABLE_STREAM_RESET:
{
struct sctp_assoc_value *av;
@@ -3324,6 +3631,100 @@ flags_out:
}
break;
}
+ case SCTP_PR_STREAM_STATUS:
+ {
+ struct sctp_prstatus *sprstat;
+ uint16_t sid;
+ uint16_t policy;
+
+ SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
+
+ sid = sprstat->sprstat_sid;
+ policy = sprstat->sprstat_policy;
+#if defined(SCTP_DETAILED_STR_STATS)
+ if ((stcb != NULL) &&
+ (sid < stcb->asoc.streamoutcnt) &&
+ (policy != SCTP_PR_SCTP_NONE) &&
+ ((policy <= SCTP_PR_SCTP_MAX) ||
+ (policy == SCTP_PR_SCTP_ALL))) {
+ if (policy == SCTP_PR_SCTP_ALL) {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
+ } else {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[policy];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[policy];
+ }
+#else
+ if ((stcb != NULL) &&
+ (sid < stcb->asoc.streamoutcnt) &&
+ (policy == SCTP_PR_SCTP_ALL)) {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
+#endif
+ SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_prstatus);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ break;
+ }
+ case SCTP_PR_ASSOC_STATUS:
+ {
+ struct sctp_prstatus *sprstat;
+ uint16_t policy;
+
+ SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
+
+ policy = sprstat->sprstat_policy;
+ if ((stcb != NULL) &&
+ (policy != SCTP_PR_SCTP_NONE) &&
+ ((policy <= SCTP_PR_SCTP_MAX) ||
+ (policy == SCTP_PR_SCTP_ALL))) {
+ if (policy == SCTP_PR_SCTP_ALL) {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[0];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[0];
+ } else {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[policy];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[policy];
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_prstatus);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ break;
+ }
+ case SCTP_MAX_CWND:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.max_cwnd;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->max_cwnd;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
default:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
@@ -3487,6 +3888,47 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
break;
}
+ case SCTP_INTERLEAVING_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->idata_supported = 0;
+ } else {
+ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) &&
+ (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS))) {
+ inp->idata_supported = 1;
+ } else {
+ /*
+ * Must have Frag
+ * interleave and
+ * stream interleave
+ * on
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
case SCTP_CMT_ON_OFF:
if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) {
struct sctp_assoc_value *av;
@@ -3684,7 +4126,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
}
SCTP_INP_RUNLOCK(inp);
-
} else {
/*
* Can't set stream value without
@@ -3959,12 +4400,13 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
uint32_t i;
SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
- if (optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) {
+ if ((optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) ||
+ (shmac->shmac_number_of_idents > 0xffff)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
}
- hmaclist = sctp_alloc_hmaclist(shmac->shmac_number_of_idents);
+ hmaclist = sctp_alloc_hmaclist((uint16_t) shmac->shmac_number_of_idents);
if (hmaclist == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
error = ENOMEM;
@@ -4172,7 +4614,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
error = ENOENT;
break;
}
- if (stcb->asoc.peer_supports_strreset == 0) {
+ if (stcb->asoc.reconfig_supported == 0) {
/*
* Peer does not support the chunk type.
*/
@@ -4181,18 +4623,30 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
break;
}
- if (stcb->asoc.stream_reset_outstanding) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
- error = EALREADY;
+ if (sizeof(struct sctp_reset_streams) +
+ strrst->srs_number_streams * sizeof(uint16_t) > optsize) {
+ error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
break;
}
if (strrst->srs_flags & SCTP_STREAM_RESET_INCOMING) {
send_in = 1;
+ if (stcb->asoc.stream_reset_outstanding) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ error = EALREADY;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
}
if (strrst->srs_flags & SCTP_STREAM_RESET_OUTGOING) {
send_out = 1;
}
+ if ((strrst->srs_number_streams > SCTP_MAX_STREAMS_AT_ONCE_RESET) && send_in) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
if ((send_in == 0) && (send_out == 0)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
@@ -4217,11 +4671,46 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
break;
}
- error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams,
- strrst->srs_stream_list,
- send_out, send_in, 0, 0, 0, 0, 0);
-
- sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+ if (send_out) {
+ int cnt;
+ uint16_t strm;
+
+ if (strrst->srs_number_streams) {
+ for (i = 0, cnt = 0; i < strrst->srs_number_streams; i++) {
+ strm = strrst->srs_stream_list[i];
+ if (stcb->asoc.strmout[strm].state == SCTP_STREAM_OPEN) {
+ stcb->asoc.strmout[strm].state = SCTP_STREAM_RESET_PENDING;
+ cnt++;
+ }
+ }
+ } else {
+ /* Its all */
+ for (i = 0, cnt = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN) {
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
+ cnt++;
+ }
+ }
+ }
+ }
+ if (send_in) {
+ error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams,
+ strrst->srs_stream_list,
+ send_in, 0, 0, 0, 0, 0);
+ } else {
+ error = sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_LOCKED);
+ }
+ if (error == 0) {
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+ } else {
+ /*
+ * For outgoing streams don't report any
+ * problems in sending the request to the
+ * application. XXX: Double check resetting
+ * incoming streams.
+ */
+ error = 0;
+ }
SCTP_TCB_UNLOCK(stcb);
break;
}
@@ -4239,7 +4728,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
error = ENOENT;
break;
}
- if (stcb->asoc.peer_supports_strreset == 0) {
+ if (stcb->asoc.reconfig_supported == 0) {
/*
* Peer does not support the chunk type.
*/
@@ -4291,7 +4780,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
goto skip_stuff;
}
}
- error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0);
+ error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
skip_stuff:
SCTP_TCB_UNLOCK(stcb);
@@ -4299,6 +4788,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
case SCTP_RESET_ASSOC:
{
+ int i;
uint32_t *value;
SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
@@ -4308,7 +4798,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
error = ENOENT;
break;
}
- if (stcb->asoc.peer_supports_strreset == 0) {
+ if (stcb->asoc.reconfig_supported == 0) {
/*
* Peer does not support the chunk type.
*/
@@ -4323,7 +4813,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_TCB_UNLOCK(stcb);
break;
}
- error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, 0, 0, 0, 0);
+ /*
+ * Is there any data pending in the send or sent
+ * queues?
+ */
+ if (!TAILQ_EMPTY(&stcb->asoc.send_queue) ||
+ !TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+ busy_out:
+ error = EBUSY;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ /* Do any streams have data queued? */
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ goto busy_out;
+ }
+ }
+ error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 1, 0, 0, 0, 0);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
SCTP_TCB_UNLOCK(stcb);
break;
@@ -4347,7 +4855,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
case SCTP_CONNECT_X_COMPLETE:
{
struct sockaddr *sa;
- struct sctp_nets *net;
/* FIXME MT: check correct? */
SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize);
@@ -4358,7 +4865,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb) {
SCTP_TCB_LOCK(stcb);
- net = sctp_findnet(stcb, sa);
}
SCTP_INP_RUNLOCK(inp);
} else {
@@ -4370,7 +4876,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
* TCB.. aka NULL.
*/
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, sa, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
@@ -4386,7 +4892,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb,
stcb->asoc.primary_destination,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
} else {
/*
@@ -4690,12 +5196,35 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sctp_paddrparams *paddrp;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize);
SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+
+#if defined(INET) && defined(INET6)
+ if (paddrp->spp_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
+#else
+ addr = (struct sockaddr *)&paddrp->spp_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -4704,25 +5233,22 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp,
- (struct sockaddr *)&paddrp->spp_address,
+ stcb = sctp_findassociation_ep_addr(&inp, addr,
&net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&paddrp->spp_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
error = EINVAL;
@@ -4731,10 +5257,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
@@ -4763,28 +5289,15 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
- if (stcb) {
+ if (stcb != NULL) {
/************************TCB SPECIFIC SET ******************/
- /*
- * do we change the timer for HB, we run
- * only one?
- */
- int ovh = 0;
-
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ovh = SCTP_MED_OVERHEAD;
- } else {
- ovh = SCTP_MED_V4_OVERHEAD;
- }
-
- /* network sets ? */
- if (net) {
+ if (net != NULL) {
/************************NET SPECIFIC SET ******************/
if (paddrp->spp_flags & SPP_HB_DISABLE) {
if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
!(net->dest_state & SCTP_ADDR_NOHB)) {
sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
}
net->dest_state |= SCTP_ADDR_NOHB;
}
@@ -4808,10 +5321,24 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11);
}
net->dest_state |= SCTP_ADDR_NO_PMTUD;
- net->mtu = paddrp->spp_pathmtu + ovh;
+ net->mtu = paddrp->spp_pathmtu;
+ switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ net->mtu += SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ net->mtu += SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
if (net->mtu < stcb->asoc.smallest_mtu) {
sctp_pathmtu_adjustment(stcb, net->mtu);
}
@@ -4832,7 +5359,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
(net->error_count > net->pf_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_12);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
@@ -4863,7 +5392,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
#endif
} else {
/************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/
- if (paddrp->spp_pathmaxrxt) {
+ if (paddrp->spp_pathmaxrxt != 0) {
stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
if (net->dest_state & SCTP_ADDR_PF) {
@@ -4875,7 +5404,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
(net->error_count > net->pf_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_13);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
@@ -4894,14 +5425,14 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
}
if (paddrp->spp_flags & SPP_HB_ENABLE) {
- if (paddrp->spp_hbinterval) {
+ if (paddrp->spp_hbinterval != 0) {
stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
stcb->asoc.heart_beat_delay = 0;
}
/* Turn back on the timer */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (paddrp->spp_hbinterval) {
+ if (paddrp->spp_hbinterval != 0) {
net->heart_beat_delay = paddrp->spp_hbinterval;
} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
net->heart_beat_delay = 0;
@@ -4910,7 +5441,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
net->dest_state &= ~SCTP_ADDR_NOHB;
}
sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_14);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
}
sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
@@ -4920,7 +5451,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
if (!(net->dest_state & SCTP_ADDR_NOHB)) {
net->dest_state |= SCTP_ADDR_NOHB;
if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_15);
}
}
}
@@ -4930,10 +5463,24 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_16);
}
net->dest_state |= SCTP_ADDR_NO_PMTUD;
- net->mtu = paddrp->spp_pathmtu + ovh;
+ net->mtu = paddrp->spp_pathmtu;
+ switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ net->mtu += SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ net->mtu += SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
if (net->mtu < stcb->asoc.smallest_mtu) {
sctp_pathmtu_adjustment(stcb, net->mtu);
}
@@ -4982,12 +5529,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
* set it with the options on the
* socket
*/
- if (paddrp->spp_pathmaxrxt) {
+ if (paddrp->spp_pathmaxrxt != 0) {
inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
}
if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
- else if (paddrp->spp_hbinterval) {
+ else if (paddrp->spp_hbinterval != 0) {
if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
@@ -5153,13 +5700,35 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sctp_setprim *spa;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&spa->ssp_addr);
+#if defined(INET) && defined(INET6)
+ if (spa->ssp_addr.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&spa->ssp_addr;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&spa->ssp_addr;
+ }
+ } else {
+ addr = (struct sockaddr *)&spa->ssp_addr;
+ }
+#else
+ addr = (struct sockaddr *)&spa->ssp_addr;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -5168,33 +5737,40 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp,
- (struct sockaddr *)&spa->ssp_addr,
+ stcb = sctp_findassociation_ep_addr(&inp, addr,
&net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if ((stcb) && (net)) {
- if ((net != stcb->asoc.primary_destination) &&
- (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
- /* Ok we need to set it */
- if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
- if ((stcb->asoc.alternate) &&
- (!(net->dest_state & SCTP_ADDR_PF)) &&
- (net->dest_state & SCTP_ADDR_REACHABLE)) {
- sctp_free_remote_addr(stcb->asoc.alternate);
- stcb->asoc.alternate = NULL;
+ if ((stcb != NULL) && (net != NULL)) {
+ if (net != stcb->asoc.primary_destination) {
+ if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+ /* Ok we need to set it */
+ if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
+ if ((stcb->asoc.alternate) &&
+ (!(net->dest_state & SCTP_ADDR_PF)) &&
+ (net->dest_state & SCTP_ADDR_REACHABLE)) {
+ sctp_free_remote_addr(stcb->asoc.alternate);
+ stcb->asoc.alternate = NULL;
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
}
} else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
- if (stcb) {
+ if (stcb != NULL) {
SCTP_TCB_UNLOCK(stcb);
}
break;
@@ -5216,14 +5792,36 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
case SCTP_SET_PEER_PRIMARY_ADDR:
{
struct sctp_setpeerprim *sspp;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize);
SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id);
if (stcb != NULL) {
struct sctp_ifa *ifa;
- ifa = sctp_find_ifa_by_addr((struct sockaddr *)&sspp->sspp_addr,
- stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
+#if defined(INET) && defined(INET6)
+ if (sspp->sspp_addr.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&sspp->sspp_addr;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&sspp->sspp_addr;
+ }
+ } else {
+ addr = (struct sockaddr *)&sspp->sspp_addr;
+ }
+#else
+ addr = (struct sockaddr *)&sspp->sspp_addr;
+#endif
+ ifa = sctp_find_ifa_by_addr(addr, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
if (ifa == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
@@ -5240,7 +5838,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
+ continue;
+ }
+ if ((sctp_is_addr_restricted(stcb, laddr->ifa)) &&
+ (!sctp_is_addr_pending(stcb, laddr->ifa))) {
continue;
}
if (laddr->ifa == ifa) {
@@ -5254,13 +5856,13 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
goto out_of_it;
}
} else {
- switch (sspp->sspp_addr.ss_family) {
+ switch (addr->sa_family) {
#ifdef INET
case AF_INET:
{
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sspp->sspp_addr;
+ sin = (struct sockaddr_in *)addr;
if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
&sin->sin_addr) != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -5275,7 +5877,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sspp->sspp_addr;
+ sin6 = (struct sockaddr_in6 *)addr;
if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
&sin6->sin6_addr) != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -5291,11 +5893,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
goto out_of_it;
}
}
- if (sctp_set_primary_ip_address_sa(stcb,
- (struct sockaddr *)&sspp->sspp_addr) != 0) {
+ if (sctp_set_primary_ip_address_sa(stcb, addr) != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED);
out_of_it:
SCTP_TCB_UNLOCK(stcb);
} else {
@@ -5602,7 +6204,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize);
SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
- if (PR_SCTP_INVALID_POLICY(info->pr_policy)) {
+ if (info->pr_policy > SCTP_PR_SCTP_MAX) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -5643,12 +6245,35 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sctp_paddrthlds *thlds;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, optsize);
SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_address);
+
+#if defined(INET) && defined(INET6)
+ if (thlds->spt_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
+#else
+ addr = (struct sockaddr *)&thlds->spt_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -5657,25 +6282,22 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp,
- (struct sockaddr *)&thlds->spt_address,
+ stcb = sctp_findassociation_ep_addr(&inp, addr,
&net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&thlds->spt_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
error = EINVAL;
@@ -5684,10 +6306,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
@@ -5703,68 +6325,78 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
break;
}
}
- if (stcb) {
- if (net) {
+ if (thlds->spt_pathcpthld != 0xffff) {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
+ if (stcb != NULL) {
+ if (net != NULL) {
+ net->failure_threshold = thlds->spt_pathmaxrxt;
+ net->pf_threshold = thlds->spt_pathpfthld;
if (net->dest_state & SCTP_ADDR_PF) {
- if ((net->failure_threshold > thlds->spt_pathmaxrxt) ||
- (net->failure_threshold <= thlds->spt_pathpfthld)) {
+ if ((net->error_count > net->failure_threshold) ||
+ (net->error_count <= net->pf_threshold)) {
net->dest_state &= ~SCTP_ADDR_PF;
}
} else {
- if ((net->failure_threshold > thlds->spt_pathpfthld) &&
- (net->failure_threshold <= thlds->spt_pathmaxrxt)) {
+ if ((net->error_count > net->pf_threshold) &&
+ (net->error_count <= net->failure_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_17);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
if (net->dest_state & SCTP_ADDR_REACHABLE) {
- if (net->failure_threshold > thlds->spt_pathmaxrxt) {
+ if (net->error_count > net->failure_threshold) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
- if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
+ if (net->error_count <= net->failure_threshold) {
net->dest_state |= SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
- net->failure_threshold = thlds->spt_pathmaxrxt;
- net->pf_threshold = thlds->spt_pathpfthld;
} else {
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ net->failure_threshold = thlds->spt_pathmaxrxt;
+ net->pf_threshold = thlds->spt_pathpfthld;
if (net->dest_state & SCTP_ADDR_PF) {
- if ((net->failure_threshold > thlds->spt_pathmaxrxt) ||
- (net->failure_threshold <= thlds->spt_pathpfthld)) {
+ if ((net->error_count > net->failure_threshold) ||
+ (net->error_count <= net->pf_threshold)) {
net->dest_state &= ~SCTP_ADDR_PF;
}
} else {
- if ((net->failure_threshold > thlds->spt_pathpfthld) &&
- (net->failure_threshold <= thlds->spt_pathmaxrxt)) {
+ if ((net->error_count > net->pf_threshold) &&
+ (net->error_count <= net->failure_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_18);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
if (net->dest_state & SCTP_ADDR_REACHABLE) {
- if (net->failure_threshold > thlds->spt_pathmaxrxt) {
+ if (net->error_count > net->failure_threshold) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
- if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
+ if (net->error_count <= net->failure_threshold) {
net->dest_state |= SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
- net->failure_threshold = thlds->spt_pathmaxrxt;
- net->pf_threshold = thlds->spt_pathpfthld;
}
stcb->asoc.def_net_failure = thlds->spt_pathmaxrxt;
stcb->asoc.def_net_pf_threshold = thlds->spt_pathpfthld;
}
+ SCTP_TCB_UNLOCK(stcb);
} else {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
@@ -5784,11 +6416,35 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
{
struct sctp_udpencaps *encaps;
struct sctp_nets *net;
+ struct sockaddr *addr;
+
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, optsize);
SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address);
+
+#if defined(INET) && defined(INET6)
+ if (encaps->sue_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
+#else
+ addr = (struct sockaddr *)&encaps->sue_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -5799,22 +6455,19 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
*/
net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&encaps->sue_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
error = EINVAL;
@@ -5823,10 +6476,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
@@ -5842,8 +6495,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
break;
}
}
- if (stcb) {
- if (net) {
+ if (stcb != NULL) {
+ if (net != NULL) {
net->port = encaps->sue_port;
} else {
stcb->asoc.port = encaps->sue_port;
@@ -5863,6 +6516,273 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
}
break;
}
+ case SCTP_ECN_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->ecn_supported = 0;
+ } else {
+ inp->ecn_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_PR_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->prsctp_supported = 0;
+ } else {
+ inp->prsctp_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_AUTH_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ if ((av->assoc_value == 0) &&
+ (inp->asconf_supported == 1)) {
+ /*
+ * AUTH is required for
+ * ASCONF
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->auth_supported = 0;
+ } else {
+ inp->auth_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_ASCONF_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ if ((av->assoc_value != 0) &&
+ (inp->auth_supported == 0)) {
+ /*
+ * AUTH is required for
+ * ASCONF
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->asconf_supported = 0;
+ sctp_auth_delete_chunk(SCTP_ASCONF,
+ inp->sctp_ep.local_auth_chunks);
+ sctp_auth_delete_chunk(SCTP_ASCONF_ACK,
+ inp->sctp_ep.local_auth_chunks);
+ } else {
+ inp->asconf_supported = 1;
+ sctp_auth_add_chunk(SCTP_ASCONF,
+ inp->sctp_ep.local_auth_chunks);
+ sctp_auth_add_chunk(SCTP_ASCONF_ACK,
+ inp->sctp_ep.local_auth_chunks);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_RECONFIG_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->reconfig_supported = 0;
+ } else {
+ inp->reconfig_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_NRSACK_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->nrsack_supported = 0;
+ } else {
+ inp->nrsack_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_PKTDROP_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->pktdrop_supported = 0;
+ } else {
+ inp->pktdrop_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_MAX_CWND:
+ {
+ struct sctp_assoc_value *av;
+ struct sctp_nets *net;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ stcb->asoc.max_cwnd = av->assoc_value;
+ if (stcb->asoc.max_cwnd > 0) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if ((net->cwnd > stcb->asoc.max_cwnd) &&
+ (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
+ net->cwnd = stcb->asoc.max_cwnd;
+ if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
+ net->cwnd = net->mtu - sizeof(struct sctphdr);
+ }
+ }
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ inp->max_cwnd = av->assoc_value;
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
default:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
@@ -5878,7 +6798,20 @@ sctp_ctloutput(struct socket *so, struct sockopt *sopt)
size_t optsize = 0;
void *p;
int error = 0;
+ struct sctp_inpcb *inp;
+ if ((sopt->sopt_level == SOL_SOCKET) &&
+ (sopt->sopt_name == SO_SETFIB)) {
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+ return (EINVAL);
+ }
+ SCTP_INP_WLOCK(inp);
+ inp->fibnum = so->so_fibnum;
+ SCTP_INP_WUNLOCK(inp);
+ return (0);
+ }
if (sopt->sopt_level != IPPROTO_SCTP) {
/* wrong proto level... send back up to IP */
#ifdef INET6
@@ -6052,7 +6985,9 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
}
vrf_id = inp->def_vrf_id;
/* We are GOOD to go */
- stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+ stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
+ inp->sctp_ep.pre_open_stream_count,
+ inp->sctp_ep.port, p);
if (stcb == NULL) {
/* Gak! no memory */
goto out_now;
@@ -6182,7 +7117,7 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
SCTP_INP_DECR_REF(tinp);
return (EADDRINUSE);
} else if (tinp) {
- SCTP_INP_DECR_REF(inp);
+ SCTP_INP_DECR_REF(tinp);
}
}
}
@@ -6194,8 +7129,8 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
#endif
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ SOCK_UNLOCK(so);
if (error) {
- SOCK_UNLOCK(so);
SCTP_INP_RUNLOCK(inp);
return (error);
}
@@ -6208,28 +7143,27 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
* move the guy that was listener to the TCP Pool.
*/
if (sctp_swap_inpcb_for_listen(inp)) {
- goto in_use;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+ return (EADDRINUSE);
}
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
/* We are already connected AND the TCP model */
-in_use:
SCTP_INP_RUNLOCK(inp);
- SOCK_UNLOCK(so);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
return (EADDRINUSE);
}
SCTP_INP_RUNLOCK(inp);
if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
/* We must do a bind. */
- SOCK_UNLOCK(so);
if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
/* bind error, probably perm */
return (error);
}
- SOCK_LOCK(so);
}
+ SOCK_LOCK(so);
/* It appears for 7.0 and on, we must always call this. */
solisten_proto(so, backlog);
if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
@@ -6357,7 +7291,8 @@ sctp_accept(struct socket *so, struct sockaddr **addr)
}
if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
SCTP_TCB_LOCK(stcb);
- sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+ sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_19);
}
return (0);
}
@@ -6442,7 +7377,7 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr)
if (laddr->ifa->address.sa.sa_family == AF_INET) {
struct sockaddr_in *sin_a;
- sin_a = (struct sockaddr_in *)&laddr->ifa->address.sa;
+ sin_a = &laddr->ifa->address.sin;
sin->sin_addr = sin_a->sin_addr;
fnd = 1;
break;
diff --git a/freebsd/sys/netinet/sctp_var.h b/freebsd/sys/netinet/sctp_var.h
index d88a2376..a4d2b998 100644
--- a/freebsd/sys/netinet/sctp_var.h
+++ b/freebsd/sys/netinet/sctp_var.h
@@ -72,7 +72,7 @@ extern struct pr_usrreqs sctp_usrreqs;
((stcb->asoc.sctp_features & feature) == 0)) || \
((stcb == NULL) && (inp != NULL) && \
((inp->sctp_features & feature) == 0)) || \
- ((stcb == NULL) && (inp == NULL)))
+ ((stcb == NULL) && (inp == NULL)))
/* managing mobility_feature in inpcb (by micchie) */
#define sctp_mobility_feature_on(inp, feature) (inp->sctp_mobility_features |= feature)
@@ -86,7 +86,7 @@ extern struct pr_usrreqs sctp_usrreqs;
#define sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > (sb)->sb_cc) ? (sctp_maxspace(sb) - (sb)->sb_cc) : 0))
-#define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0)
+#define sctp_sbspace_sub(a,b) (((a) > (b)) ? ((a) - (b)) : 0)
/*
* I tried to cache the readq entries at one point. But the reality
@@ -97,16 +97,24 @@ extern struct pr_usrreqs sctp_usrreqs;
* an mbuf cache as well so it is not really worth doing, at least
* right now :-D
*/
-
+#ifdef INVARIANTS
#define sctp_free_a_readq(_stcb, _readq) { \
+ if ((_readq)->on_strm_q) \
+ panic("On strm q stcb:%p readq:%p", (_stcb), (_readq)); \
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), (_readq)); \
SCTP_DECR_READQ_COUNT(); \
}
+#else
+#define sctp_free_a_readq(_stcb, _readq) { \
+ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), (_readq)); \
+ SCTP_DECR_READQ_COUNT(); \
+}
+#endif
#define sctp_alloc_a_readq(_stcb, _readq) { \
(_readq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_readq), struct sctp_queued_to_read); \
if ((_readq)) { \
- SCTP_INCR_READQ_COUNT(); \
+ SCTP_INCR_READQ_COUNT(); \
} \
}
@@ -121,11 +129,11 @@ extern struct pr_usrreqs sctp_usrreqs;
#define sctp_alloc_a_strmoq(_stcb, _strmoq) { \
(_strmoq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_strmoq), struct sctp_stream_queue_pending); \
- if ((_strmoq)) { \
+ if ((_strmoq)) { \
memset(_strmoq, 0, sizeof(struct sctp_stream_queue_pending)); \
SCTP_INCR_STRMOQ_COUNT(); \
(_strmoq)->holds_key_ref = 0; \
- } \
+ } \
}
#define sctp_free_a_chunk(_stcb, _chk, _so_locked) { \
@@ -133,22 +141,22 @@ extern struct pr_usrreqs sctp_usrreqs;
sctp_auth_key_release((_stcb), (_chk)->auth_keyid, _so_locked); \
(_chk)->holds_key_ref = 0; \
} \
- if (_stcb) { \
- SCTP_TCB_LOCK_ASSERT((_stcb)); \
- if ((_chk)->whoTo) { \
- sctp_free_remote_addr((_chk)->whoTo); \
- (_chk)->whoTo = NULL; \
- } \
- if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \
- (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
- SCTP_DECR_CHK_COUNT(); \
- } else { \
- TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
- (_stcb)->asoc.free_chunk_cnt++; \
- atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
- } \
- } else { \
+ if (_stcb) { \
+ SCTP_TCB_LOCK_ASSERT((_stcb)); \
+ if ((_chk)->whoTo) { \
+ sctp_free_remote_addr((_chk)->whoTo); \
+ (_chk)->whoTo = NULL; \
+ } \
+ if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \
+ (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \
+ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
+ SCTP_DECR_CHK_COUNT(); \
+ } else { \
+ TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+ (_stcb)->asoc.free_chunk_cnt++; \
+ atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
+ } \
+ } else { \
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
SCTP_DECR_CHK_COUNT(); \
} \
@@ -159,7 +167,7 @@ extern struct pr_usrreqs sctp_usrreqs;
(_chk) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_chunk), struct sctp_tmit_chunk); \
if ((_chk)) { \
SCTP_INCR_CHK_COUNT(); \
- (_chk)->whoTo = NULL; \
+ (_chk)->whoTo = NULL; \
(_chk)->holds_key_ref = 0; \
} \
} else { \
@@ -167,7 +175,7 @@ extern struct pr_usrreqs sctp_usrreqs;
TAILQ_REMOVE(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
(_chk)->holds_key_ref = 0; \
- SCTP_STAT_INCR(sctps_cached_chk); \
+ SCTP_STAT_INCR(sctps_cached_chk); \
(_stcb)->asoc.free_chunk_cnt--; \
} \
}
@@ -178,15 +186,16 @@ extern struct pr_usrreqs sctp_usrreqs;
if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \
(void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \
(void)SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \
- if ((__net)->ro.ro_rt) { \
+ (void)SCTP_OS_TIMER_STOP(&(__net)->hb_timer.timer); \
+ if ((__net)->ro.ro_rt) { \
RTFREE((__net)->ro.ro_rt); \
(__net)->ro.ro_rt = NULL; \
- } \
+ } \
if ((__net)->src_addr_selected) { \
sctp_free_ifa((__net)->ro._s_addr); \
(__net)->ro._s_addr = NULL; \
} \
- (__net)->src_addr_selected = 0; \
+ (__net)->src_addr_selected = 0; \
(__net)->dest_state &= ~SCTP_ADDR_REACHABLE; \
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_net), (__net)); \
SCTP_DECR_RADDR_COUNT(); \
@@ -210,7 +219,7 @@ extern struct pr_usrreqs sctp_usrreqs;
atomic_add_int(&(sb)->sb_cc,SCTP_BUF_LEN((m))); \
atomic_add_int(&(sb)->sb_mbcnt, MSIZE); \
if (stcb) { \
- atomic_add_int(&(stcb)->asoc.sb_cc,SCTP_BUF_LEN((m))); \
+ atomic_add_int(&(stcb)->asoc.sb_cc, SCTP_BUF_LEN((m))); \
atomic_add_int(&(stcb)->asoc.my_rwnd_control_len, MSIZE); \
} \
if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \
@@ -250,12 +259,12 @@ extern struct pr_usrreqs sctp_usrreqs;
} while (0)
#define sctp_flight_size_increase(tp1) do { \
- (tp1)->whoTo->flight_size += (tp1)->book_size; \
+ (tp1)->whoTo->flight_size += (tp1)->book_size; \
} while (0)
#ifdef SCTP_FS_SPEC_LOG
#define sctp_total_flight_decrease(stcb, tp1) do { \
- if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+ if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
stcb->asoc.fs_index = 0;\
stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
@@ -264,7 +273,7 @@ extern struct pr_usrreqs sctp_usrreqs;
stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \
stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \
stcb->asoc.fs_index++; \
- tp1->window_probe = 0; \
+ tp1->window_probe = 0; \
if (stcb->asoc.total_flight >= tp1->book_size) { \
stcb->asoc.total_flight -= tp1->book_size; \
if (stcb->asoc.total_flight_count > 0) \
@@ -276,7 +285,7 @@ extern struct pr_usrreqs sctp_usrreqs;
} while (0)
#define sctp_total_flight_increase(stcb, tp1) do { \
- if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+ if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
stcb->asoc.fs_index = 0;\
stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
@@ -285,14 +294,14 @@ extern struct pr_usrreqs sctp_usrreqs;
stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \
stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \
stcb->asoc.fs_index++; \
- (stcb)->asoc.total_flight_count++; \
- (stcb)->asoc.total_flight += (tp1)->book_size; \
+ (stcb)->asoc.total_flight_count++; \
+ (stcb)->asoc.total_flight += (tp1)->book_size; \
} while (0)
#else
#define sctp_total_flight_decrease(stcb, tp1) do { \
- tp1->window_probe = 0; \
+ tp1->window_probe = 0; \
if (stcb->asoc.total_flight >= tp1->book_size) { \
stcb->asoc.total_flight -= tp1->book_size; \
if (stcb->asoc.total_flight_count > 0) \
@@ -304,8 +313,8 @@ extern struct pr_usrreqs sctp_usrreqs;
} while (0)
#define sctp_total_flight_increase(stcb, tp1) do { \
- (stcb)->asoc.total_flight_count++; \
- (stcb)->asoc.total_flight += (tp1)->book_size; \
+ (stcb)->asoc.total_flight_count++; \
+ (stcb)->asoc.total_flight += (tp1)->book_size; \
} while (0)
#endif
@@ -326,19 +335,17 @@ int sctp_ctloutput(struct socket *, struct sockopt *);
#ifdef INET
void sctp_input_with_port(struct mbuf *, int, uint16_t);
-void sctp_input(struct mbuf *, int);
+int sctp_input(struct mbuf **, int *, int);
#endif
void sctp_pathmtu_adjustment(struct sctp_tcb *, uint16_t);
void sctp_drain(void);
void sctp_init(void);
-void sctp_finish(void);
+void
+sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
+ uint8_t, uint8_t, uint16_t, uint16_t);
int sctp_flush(struct socket *, int);
int sctp_shutdown(struct socket *);
-void
-sctp_notify(struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
- struct sockaddr *, struct sctp_tcb *,
- struct sctp_nets *);
int
sctp_bindx(struct socket *, int, struct sockaddr_storage *,
int, int, struct proc *);
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
index 6cd82739..36a9c2ce 100644
--- a/freebsd/sys/netinet/sctputil.c
+++ b/freebsd/sys/netinet/sctputil.c
@@ -54,14 +54,17 @@ __FBSDID("$FreeBSD$");
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <sys/proc.h>
+#ifdef INET6
+#include <netinet/icmp6.h>
+#endif
#ifndef KTR_SCTP
#define KTR_SCTP KTR_SUBSYS
#endif
-extern struct sctp_cc_functions sctp_cc_functions[];
-extern struct sctp_ss_functions sctp_ss_functions[];
+extern const struct sctp_cc_functions sctp_cc_functions[];
+extern const struct sctp_ss_functions sctp_ss_functions[];
void
sctp_sblog(struct sockbuf *sb, struct sctp_tcb *stcb, int from, int incr)
@@ -219,6 +222,7 @@ sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn, int fr
sctp_clog.x.misc.log4);
}
+#ifdef SCTP_MBUF_LOGGING
void
sctp_log_mb(struct mbuf *m, int from)
{
@@ -245,6 +249,18 @@ sctp_log_mb(struct mbuf *m, int from)
}
void
+sctp_log_mbc(struct mbuf *m, int from)
+{
+ struct mbuf *mat;
+
+ for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
+ sctp_log_mb(mat, from);
+ }
+}
+
+#endif
+
+void
sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk, int from)
{
struct sctp_cwnd_log sctp_clog;
@@ -415,7 +431,8 @@ sctp_log_rwnd_set(uint8_t from, uint32_t peers_rwnd, uint32_t flight_size, uint3
sctp_clog.x.misc.log4);
}
-void
+#ifdef SCTP_MBCNT_LOGGING
+static void
sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mbcnt_q, uint32_t mbcnt)
{
struct sctp_cwnd_log sctp_clog;
@@ -433,6 +450,8 @@ sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mb
sctp_clog.x.misc.log4);
}
+#endif
+
void
sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
{
@@ -489,7 +508,7 @@ sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t wake_cnt, int from)
}
void
-sctp_log_block(uint8_t from, struct sctp_association *asoc, int sendlen)
+sctp_log_block(uint8_t from, struct sctp_association *asoc, size_t sendlen)
{
struct sctp_cwnd_log sctp_clog;
@@ -499,7 +518,7 @@ sctp_log_block(uint8_t from, struct sctp_association *asoc, int sendlen)
sctp_clog.x.blk.stream_qcnt = (uint16_t) asoc->stream_queue_cnt;
sctp_clog.x.blk.chunks_on_oque = (uint16_t) asoc->chunks_on_out_queue;
sctp_clog.x.blk.flight_size = (uint16_t) (asoc->total_flight / 1024);
- sctp_clog.x.blk.sndlen = sendlen;
+ sctp_clog.x.blk.sndlen = (uint32_t) sendlen;
SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
SCTP_LOG_EVENT_BLOCK,
from,
@@ -879,9 +898,52 @@ sctp_select_a_tag(struct sctp_inpcb *inp, uint16_t lport, uint16_t rport, int ch
return (x);
}
+int32_t
+sctp_map_assoc_state(int kernel_state)
+{
+ int32_t user_state;
+
+ if (kernel_state & SCTP_STATE_WAS_ABORTED) {
+ user_state = SCTP_CLOSED;
+ } else if (kernel_state & SCTP_STATE_SHUTDOWN_PENDING) {
+ user_state = SCTP_SHUTDOWN_PENDING;
+ } else {
+ switch (kernel_state & SCTP_STATE_MASK) {
+ case SCTP_STATE_EMPTY:
+ user_state = SCTP_CLOSED;
+ break;
+ case SCTP_STATE_INUSE:
+ user_state = SCTP_CLOSED;
+ break;
+ case SCTP_STATE_COOKIE_WAIT:
+ user_state = SCTP_COOKIE_WAIT;
+ break;
+ case SCTP_STATE_COOKIE_ECHOED:
+ user_state = SCTP_COOKIE_ECHOED;
+ break;
+ case SCTP_STATE_OPEN:
+ user_state = SCTP_ESTABLISHED;
+ break;
+ case SCTP_STATE_SHUTDOWN_SENT:
+ user_state = SCTP_SHUTDOWN_SENT;
+ break;
+ case SCTP_STATE_SHUTDOWN_RECEIVED:
+ user_state = SCTP_SHUTDOWN_RECEIVED;
+ break;
+ case SCTP_STATE_SHUTDOWN_ACK_SENT:
+ user_state = SCTP_SHUTDOWN_ACK_SENT;
+ break;
+ default:
+ user_state = SCTP_CLOSED;
+ break;
+ }
+ }
+ return (user_state);
+}
+
int
sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- uint32_t override_tag, uint32_t vrf_id)
+ uint32_t override_tag, uint32_t vrf_id, uint16_t o_strms)
{
struct sctp_association *asoc;
@@ -898,6 +960,11 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
*/
int i;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
asoc = &stcb->asoc;
/* init all variables to a known value. */
SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
@@ -906,12 +973,20 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc->heart_beat_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
asoc->cookie_life = inp->sctp_ep.def_cookie_life;
asoc->sctp_cmt_on_off = inp->sctp_cmt_on_off;
- asoc->ecn_allowed = inp->sctp_ecn_enable;
- asoc->sctp_nr_sack_on_off = (uint8_t) SCTP_BASE_SYSCTL(sctp_nr_sack_on_off);
+ asoc->ecn_supported = inp->ecn_supported;
+ asoc->prsctp_supported = inp->prsctp_supported;
+ asoc->idata_supported = inp->idata_supported;
+ asoc->auth_supported = inp->auth_supported;
+ asoc->asconf_supported = inp->asconf_supported;
+ asoc->reconfig_supported = inp->reconfig_supported;
+ asoc->nrsack_supported = inp->nrsack_supported;
+ asoc->pktdrop_supported = inp->pktdrop_supported;
+ asoc->idata_supported = inp->idata_supported;
asoc->sctp_cmt_pf = (uint8_t) 0;
asoc->sctp_frag_point = inp->sctp_frag_point;
asoc->sctp_features = inp->sctp_features;
asoc->default_dscp = inp->sctp_ep.default_dscp;
+ asoc->max_cwnd = inp->max_cwnd;
#ifdef INET6
if (inp->sctp_ep.default_flowlabel) {
asoc->default_flowlabel = inp->sctp_ep.default_flowlabel;
@@ -953,7 +1028,6 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
sctp_select_initial_TSN(&inp->sctp_ep);
asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
/* we are optimisitic here */
- asoc->peer_supports_pktdrop = 1;
asoc->peer_supports_nat = 0;
asoc->sent_queue_retran_cnt = 0;
@@ -1005,7 +1079,6 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc->minrto = inp->sctp_ep.sctp_minrto;
asoc->maxrto = inp->sctp_ep.sctp_maxrto;
- asoc->locked_on_sending = NULL;
asoc->stream_locked_on = 0;
asoc->ecn_echo_cnt_onq = 0;
asoc->stream_locked = 0;
@@ -1033,7 +1106,7 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* that we request by default.
*/
asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams =
- inp->sctp_ep.pre_open_stream_count;
+ o_strms;
SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *,
asoc->streamoutcnt * sizeof(struct sctp_stream_out),
SCTP_M_STRMO);
@@ -1051,12 +1124,23 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* that were dropped must be notified to the upper layer as
* failed to send.
*/
- asoc->strmout[i].next_sequence_send = 0x0;
+ asoc->strmout[i].next_mid_ordered = 0;
+ asoc->strmout[i].next_mid_unordered = 0;
TAILQ_INIT(&asoc->strmout[i].outqueue);
asoc->strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ asoc->strmout[i].abandoned_sent[j] = 0;
+ asoc->strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ asoc->strmout[i].abandoned_sent[0] = 0;
+ asoc->strmout[i].abandoned_unsent[0] = 0;
+#endif
asoc->strmout[i].stream_no = i;
asoc->strmout[i].last_msg_incomplete = 0;
- asoc->ss_functions.sctp_ss_init_stream(&asoc->strmout[i], NULL);
+ asoc->strmout[i].state = SCTP_STREAM_OPENING;
+ asoc->ss_functions.sctp_ss_init_stream(stcb, &asoc->strmout[i], NULL);
}
asoc->ss_functions.sctp_ss_init(stcb, asoc, 0);
@@ -1086,7 +1170,6 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
TAILQ_INIT(&asoc->asconf_send_queue);
TAILQ_INIT(&asoc->send_queue);
TAILQ_INIT(&asoc->sent_queue);
- TAILQ_INIT(&asoc->reasmqueue);
TAILQ_INIT(&asoc->resetHead);
asoc->max_inbound_streams = inp->sctp_ep.max_open_streams_intome;
TAILQ_INIT(&asoc->asconf_queue);
@@ -1109,6 +1192,10 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc->timoshutdownack = 0;
(void)SCTP_GETTIME_TIMEVAL(&asoc->start_time);
asoc->discontinuity_time = asoc->start_time;
+ for (i = 0; i < SCTP_PR_SCTP_MAX + 1; i++) {
+ asoc->abandoned_unsent[i] = 0;
+ asoc->abandoned_sent[i] = 0;
+ }
/*
* sa_ignore MEMLEAK {memory is put in the assoc mapping array and
* freed later when the association is freed.
@@ -1195,6 +1282,7 @@ sctp_iterator_work(struct sctp_iterator *it)
SCTP_INP_INFO_RLOCK();
SCTP_ITERATOR_LOCK();
+ sctp_it_ctl.cur_it = it;
if (it->inp) {
SCTP_INP_RLOCK(it->inp);
SCTP_INP_DECR_REF(it->inp);
@@ -1202,6 +1290,7 @@ sctp_iterator_work(struct sctp_iterator *it)
if (it->inp == NULL) {
/* iterator is complete */
done_with_iterator:
+ sctp_it_ctl.cur_it = NULL;
SCTP_ITERATOR_UNLOCK();
SCTP_INP_INFO_RUNLOCK();
if (it->function_atend != NULL) {
@@ -1342,13 +1431,11 @@ sctp_iterator_worker(void)
sctp_it_ctl.iterator_running = 1;
TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
- sctp_it_ctl.cur_it = it;
/* now lets work on this one */
TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
SCTP_IPI_ITERATOR_WQ_UNLOCK();
CURVNET_SET(it->vn);
sctp_iterator_work(it);
- sctp_it_ctl.cur_it = NULL;
CURVNET_RESTORE();
SCTP_IPI_ITERATOR_WQ_LOCK();
/* sa_ignore FREED_MEMORY */
@@ -1389,7 +1476,9 @@ sctp_handle_addr_wq(void)
if (asc->cnt == 0) {
SCTP_FREE(asc, SCTP_M_ASC_IT);
} else {
- (void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+ int ret;
+
+ ret = sctp_initiate_iterator(sctp_asconf_iterator_ep,
sctp_asconf_iterator_stcb,
NULL, /* No ep end for boundall */
SCTP_PCB_FLAGS_BOUNDALL,
@@ -1397,6 +1486,23 @@ sctp_handle_addr_wq(void)
SCTP_ASOC_ANY_STATE,
(void *)asc, 0,
sctp_asconf_iterator_end, NULL, 0);
+ if (ret) {
+ SCTP_PRINTF("Failed to initiate iterator for handle_addr_wq\n");
+ /*
+ * Freeing if we are stopping or put back on the
+ * addr_wq.
+ */
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ sctp_asconf_iterator_end(asc, 0);
+ } else {
+ SCTP_WQ_ADDR_LOCK();
+ LIST_FOREACH(wi, &asc->list_of_work, sctp_nxt_addr) {
+ LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
+ }
+ SCTP_WQ_ADDR_UNLOCK();
+ SCTP_FREE(asc, SCTP_M_ASC_IT);
+ }
+ }
}
}
@@ -1407,12 +1513,14 @@ sctp_timeout_handler(void *t)
struct sctp_tcb *stcb;
struct sctp_nets *net;
struct sctp_timer *tmr;
+ struct mbuf *op_err;
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
- int did_output, type;
+ int did_output;
+ int type;
tmr = (struct sctp_timer *)t;
inp = (struct sctp_inpcb *)tmr->ep;
@@ -1451,7 +1559,6 @@ sctp_timeout_handler(void *t)
}
/* if this is an iterator timeout, get the struct and clear inp */
tmr->stopped_from = 0xa003;
- type = tmr->type;
if (inp) {
SCTP_INP_INCR_REF(inp);
if ((inp->sctp_socket == NULL) &&
@@ -1482,8 +1589,9 @@ sctp_timeout_handler(void *t)
return;
}
}
+ type = tmr->type;
tmr->stopped_from = 0xa005;
- SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", tmr->type);
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", type);
if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
if (inp) {
SCTP_INP_DECR_REF(inp);
@@ -1499,7 +1607,7 @@ sctp_timeout_handler(void *t)
if (stcb) {
SCTP_TCB_LOCK(stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
- if ((tmr->type != SCTP_TIMER_TYPE_ASOCKILL) &&
+ if ((type != SCTP_TIMER_TYPE_ASOCKILL) &&
((stcb->asoc.state == 0) ||
(stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) {
SCTP_TCB_UNLOCK(stcb);
@@ -1510,8 +1618,8 @@ sctp_timeout_handler(void *t)
return;
}
}
- /* record in stopped what t-o occured */
- tmr->stopped_from = tmr->type;
+ /* record in stopped what t-o occurred */
+ tmr->stopped_from = type;
/* mark as being serviced now */
if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
@@ -1529,7 +1637,7 @@ sctp_timeout_handler(void *t)
SCTP_OS_TIMER_DEACTIVATE(&tmr->timer);
/* call the handler for the appropriate timer type */
- switch (tmr->type) {
+ switch (type) {
case SCTP_TIMER_TYPE_ZERO_COPY:
if (inp == NULL) {
break;
@@ -1719,7 +1827,9 @@ sctp_timeout_handler(void *t)
break;
}
SCTP_STAT_INCR(sctps_timoshutdownguard);
- sctp_abort_an_association(inp, stcb, NULL, SCTP_SO_NOT_LOCKED);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Shutdown guard timer expired");
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
/* no need to unlock on tcb its gone */
goto out_decr;
@@ -1772,7 +1882,8 @@ sctp_timeout_handler(void *t)
SCTP_STAT_INCR(sctps_timoassockill);
/* Can we free it yet? */
SCTP_INP_DECR_REF(inp);
- sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
+ sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
@@ -1781,7 +1892,8 @@ sctp_timeout_handler(void *t)
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -1801,18 +1913,19 @@ sctp_timeout_handler(void *t)
* killer
*/
SCTP_INP_DECR_REF(inp);
- sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
SCTP_CALLED_FROM_INPKILL_TIMER);
inp = NULL;
goto out_no_decr;
default:
SCTPDBG(SCTP_DEBUG_TIMER1, "sctp_timeout_handler:unknown timer %d\n",
- tmr->type);
+ type);
break;
}
#ifdef SCTP_AUDITING_ENABLED
- sctp_audit_log(0xF1, (uint8_t) tmr->type);
+ sctp_audit_log(0xF1, (uint8_t) type);
if (inp)
sctp_auditing(5, inp, stcb, net);
#endif
@@ -1835,8 +1948,7 @@ out_decr:
SCTP_INP_DECR_REF(inp);
}
out_no_decr:
- SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type %d)\n",
- type);
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type = %d)\n", type);
CURVNET_RESTORE();
}
@@ -1929,7 +2041,7 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* though we use a different timer. We also add the HB timer
* PLUS a random jitter.
*/
- if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
+ if ((stcb == NULL) || (net == NULL)) {
return;
} else {
uint32_t rndval;
@@ -1984,9 +2096,6 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* nothing needed but the endpoint here ususually about 60
* minutes.
*/
- if (inp == NULL) {
- return;
- }
tmr = &inp->sctp_ep.signature_change;
to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_SIGNATURE];
break;
@@ -2003,9 +2112,6 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* timer since that has stopped and we are in the GONE
* state.
*/
- if (inp == NULL) {
- return;
- }
tmr = &inp->sctp_ep.signature_change;
to_ticks = MSEC_TO_TICKS(SCTP_INP_KILL_TIMEOUT);
break;
@@ -2014,10 +2120,7 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* Here we use the value found in the EP for PMTU ususually
* about 10 minutes.
*/
- if ((stcb == NULL) || (inp == NULL)) {
- return;
- }
- if (net == NULL) {
+ if ((stcb == NULL) || (net == NULL)) {
return;
}
if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
@@ -2043,10 +2146,14 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
* Here we use the endpoints shutdown guard timer usually
* about 3 minutes.
*/
- if ((inp == NULL) || (stcb == NULL)) {
+ if (stcb == NULL) {
return;
}
- to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+ if (inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] == 0) {
+ to_ticks = 5 * MSEC_TO_TICKS(stcb->asoc.maxrto);
+ } else {
+ to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+ }
tmr = &stcb->asoc.shut_guard_timer;
break;
case SCTP_TIMER_TYPE_STRRESET:
@@ -2102,13 +2209,13 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
break;
default:
SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
- __FUNCTION__, t_type);
+ __func__, t_type);
return;
break;
}
if ((to_ticks <= 0) || (tmr == NULL)) {
SCTPDBG(SCTP_DEBUG_TIMER1, "%s: %d:software error to_ticks:%d tmr:%p not set ??\n",
- __FUNCTION__, t_type, to_ticks, (void *)tmr);
+ __func__, t_type, to_ticks, (void *)tmr);
return;
}
if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
@@ -2264,7 +2371,7 @@ sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
break;
default:
SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
- __FUNCTION__, t_type);
+ __func__, t_type);
break;
}
if (tmr == NULL) {
@@ -2383,8 +2490,8 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
net->rtt = (uint64_t) 1000000 *(uint64_t) now.tv_sec +
(uint64_t) now.tv_usec;
- /* computer rtt in ms */
- rtt = net->rtt / 1000;
+ /* compute rtt in ms */
+ rtt = (int32_t) (net->rtt / 1000);
if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
/*
* Tell the CC module that a new update has just occurred
@@ -2518,58 +2625,44 @@ sctp_get_next_param(struct mbuf *m,
}
-int
+struct mbuf *
sctp_add_pad_tombuf(struct mbuf *m, int padlen)
{
- /*
- * add padlen bytes of 0 filled padding to the end of the mbuf. If
- * padlen is > 3 this routine will fail.
- */
- uint8_t *dp;
- int i;
+ struct mbuf *m_last;
+ caddr_t dp;
if (padlen > 3) {
- SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- return (ENOBUFS);
+ return (NULL);
}
if (padlen <= M_TRAILINGSPACE(m)) {
/*
* The easy way. We hope the majority of the time we hit
* here :)
*/
- dp = (uint8_t *) (mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- SCTP_BUF_LEN(m) += padlen;
+ m_last = m;
} else {
- /* Hard way we must grow the mbuf */
- struct mbuf *tmp;
-
- tmp = sctp_get_mbuf_for_msg(padlen, 0, M_DONTWAIT, 1, MT_DATA);
- if (tmp == NULL) {
- /* Out of space GAK! we are in big trouble. */
- SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- return (ENOBUFS);
- }
- /* setup and insert in middle */
- SCTP_BUF_LEN(tmp) = padlen;
- SCTP_BUF_NEXT(tmp) = NULL;
- SCTP_BUF_NEXT(m) = tmp;
- dp = mtod(tmp, uint8_t *);
- }
- /* zero out the pad */
- for (i = 0; i < padlen; i++) {
- *dp = 0;
- dp++;
+ /* Hard way we must grow the mbuf chain */
+ m_last = sctp_get_mbuf_for_msg(padlen, 0, M_NOWAIT, 1, MT_DATA);
+ if (m_last == NULL) {
+ return (NULL);
+ }
+ SCTP_BUF_LEN(m_last) = 0;
+ SCTP_BUF_NEXT(m_last) = NULL;
+ SCTP_BUF_NEXT(m) = m_last;
}
- return (0);
+ dp = mtod(m_last, caddr_t)+SCTP_BUF_LEN(m_last);
+ SCTP_BUF_LEN(m_last) += padlen;
+ memset(dp, 0, padlen);
+ return (m_last);
}
-int
+struct mbuf *
sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
{
/* find the last mbuf in chain and pad it */
struct mbuf *m_at;
- if (last_mbuf) {
+ if (last_mbuf != NULL) {
return (sctp_add_pad_tombuf(last_mbuf, padval));
} else {
for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
@@ -2578,8 +2671,7 @@ sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
}
}
}
- SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
- return (EFAULT);
+ return (NULL);
}
static void
@@ -2593,7 +2685,8 @@ sctp_notify_assoc_change(uint16_t state, struct sctp_tcb *stcb,
struct mbuf *m_notify;
struct sctp_assoc_change *sac;
struct sctp_queued_to_read *control;
- size_t notif_len, abort_len;
+ unsigned int notif_len;
+ uint16_t abort_len;
unsigned int i;
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
@@ -2601,8 +2694,11 @@ sctp_notify_assoc_change(uint16_t state, struct sctp_tcb *stcb,
#endif
+ if (stcb == NULL) {
+ return;
+ }
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
- notif_len = sizeof(struct sctp_assoc_change);
+ notif_len = (unsigned int)sizeof(struct sctp_assoc_change);
if (abort != NULL) {
abort_len = ntohs(abort->ch.chunk_length);
} else {
@@ -2613,11 +2709,11 @@ sctp_notify_assoc_change(uint16_t state, struct sctp_tcb *stcb,
} else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) {
notif_len += abort_len;
}
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* Retry with smaller value. */
- notif_len = sizeof(struct sctp_assoc_change);
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ notif_len = (unsigned int)sizeof(struct sctp_assoc_change);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
goto set_error;
}
@@ -2637,17 +2733,20 @@ sctp_notify_assoc_change(uint16_t state, struct sctp_tcb *stcb,
if (notif_len > sizeof(struct sctp_assoc_change)) {
if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) {
i = 0;
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_PR;
}
- if (stcb->asoc.peer_supports_auth) {
+ if (stcb->asoc.auth_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_AUTH;
}
- if (stcb->asoc.peer_supports_asconf) {
+ if (stcb->asoc.asconf_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_ASCONF;
}
+ if (stcb->asoc.idata_supported == 1) {
+ sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_INTERLEAVING;
+ }
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_MULTIBUF;
- if (stcb->asoc.peer_supports_strreset) {
+ if (stcb->asoc.reconfig_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_RE_CONFIG;
}
sac->sac_length += i;
@@ -2732,7 +2831,11 @@ set_error:
static void
sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
- struct sockaddr *sa, uint32_t error)
+ struct sockaddr *sa, uint32_t error, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
{
struct mbuf *m_notify;
struct sctp_paddr_change *spc;
@@ -2743,18 +2846,28 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
return;
SCTP_BUF_LEN(m_notify) = 0;
spc = mtod(m_notify, struct sctp_paddr_change *);
+ memset(spc, 0, sizeof(struct sctp_paddr_change));
spc->spc_type = SCTP_PEER_ADDR_CHANGE;
spc->spc_flags = 0;
spc->spc_length = sizeof(struct sctp_paddr_change);
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)sa,
+ (struct sockaddr_in6 *)&spc->spc_aaddr);
+ } else {
+ memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+ }
+#else
memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+#endif
break;
#endif
#ifdef INET6
@@ -2805,7 +2918,7 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
control,
&stcb->sctp_socket->so_rcv, 1,
SCTP_READ_LOCK_NOT_HELD,
- SCTP_SO_NOT_LOCKED);
+ so_locked);
}
@@ -2821,7 +2934,8 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error,
struct sctp_send_failed *ssf;
struct sctp_send_failed_event *ssfe;
struct sctp_queued_to_read *control;
- int length;
+ struct sctp_chunkhdr *chkhdr;
+ int notifhdr_len, chk_len, chkhdr_len, padding_len, payload_len;
if ((stcb == NULL) ||
(sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) &&
@@ -2830,27 +2944,49 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error,
return;
}
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
- length = sizeof(struct sctp_send_failed_event);
+ notifhdr_len = sizeof(struct sctp_send_failed_event);
} else {
- length = sizeof(struct sctp_send_failed);
+ notifhdr_len = sizeof(struct sctp_send_failed);
}
- m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(notifhdr_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
- SCTP_BUF_LEN(m_notify) = 0;
+ SCTP_BUF_LEN(m_notify) = notifhdr_len;
+ if (stcb->asoc.idata_supported) {
+ chkhdr_len = sizeof(struct sctp_idata_chunk);
+ } else {
+ chkhdr_len = sizeof(struct sctp_data_chunk);
+ }
+ /* Use some defaults in case we can't access the chunk header */
+ if (chk->send_size >= chkhdr_len) {
+ payload_len = chk->send_size - chkhdr_len;
+ } else {
+ payload_len = 0;
+ }
+ padding_len = 0;
+ if (chk->data != NULL) {
+ chkhdr = mtod(chk->data, struct sctp_chunkhdr *);
+ if (chkhdr != NULL) {
+ chk_len = ntohs(chkhdr->chunk_length);
+ if ((chk_len >= chkhdr_len) &&
+ (chk->send_size >= chk_len) &&
+ (chk->send_size - chk_len < 4)) {
+ padding_len = chk->send_size - chk_len;
+ payload_len = chk->send_size - chkhdr_len - padding_len;
+ }
+ }
+ }
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
ssfe = mtod(m_notify, struct sctp_send_failed_event *);
- memset(ssfe, 0, length);
+ memset(ssfe, 0, notifhdr_len);
ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
if (sent) {
ssfe->ssfe_flags = SCTP_DATA_SENT;
} else {
ssfe->ssfe_flags = SCTP_DATA_UNSENT;
}
- length += chk->send_size;
- length -= sizeof(struct sctp_data_chunk);
- ssfe->ssfe_length = length;
+ ssfe->ssfe_length = (uint32_t) (notifhdr_len + payload_len);
ssfe->ssfe_error = error;
/* not exactly what the user sent in, but should be close :) */
ssfe->ssfe_info.snd_sid = chk->rec.data.stream_number;
@@ -2859,39 +2995,33 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint8_t sent, uint32_t error,
ssfe->ssfe_info.snd_context = chk->rec.data.context;
ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb);
ssfe->ssfe_assoc_id = sctp_get_associd(stcb);
- SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
} else {
ssf = mtod(m_notify, struct sctp_send_failed *);
- memset(ssf, 0, length);
+ memset(ssf, 0, notifhdr_len);
ssf->ssf_type = SCTP_SEND_FAILED;
if (sent) {
ssf->ssf_flags = SCTP_DATA_SENT;
} else {
ssf->ssf_flags = SCTP_DATA_UNSENT;
}
- length += chk->send_size;
- length -= sizeof(struct sctp_data_chunk);
- ssf->ssf_length = length;
+ ssf->ssf_length = (uint32_t) (notifhdr_len + payload_len);
ssf->ssf_error = error;
/* not exactly what the user sent in, but should be close :) */
- bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number;
- ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq;
+ ssf->ssf_info.sinfo_ssn = (uint16_t) chk->rec.data.stream_seq;
ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags;
ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype;
ssf->ssf_info.sinfo_context = chk->rec.data.context;
ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
ssf->ssf_assoc_id = sctp_get_associd(stcb);
- SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
}
- if (chk->data) {
- /*
- * trim off the sctp chunk header(it should be there)
- */
- if (chk->send_size >= sizeof(struct sctp_data_chunk)) {
- m_adj(chk->data, sizeof(struct sctp_data_chunk));
+ if (chk->data != NULL) {
+ /* Trim off the sctp chunk header (it should be there) */
+ if (chk->send_size == chkhdr_len + payload_len + padding_len) {
+ m_adj(chk->data, chkhdr_len);
+ m_adj(chk->data, -padding_len);
sctp_mbuf_crush(chk->data);
- chk->send_size -= sizeof(struct sctp_data_chunk);
+ chk->send_size -= (chkhdr_len + padding_len);
}
}
SCTP_BUF_NEXT(m_notify) = chk->data;
@@ -2936,7 +3066,7 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
struct sctp_send_failed *ssf;
struct sctp_send_failed_event *ssfe;
struct sctp_queued_to_read *control;
- int length;
+ int notifhdr_len;
if ((stcb == NULL) ||
(sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT) &&
@@ -2945,23 +3075,22 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
return;
}
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
- length = sizeof(struct sctp_send_failed_event);
+ notifhdr_len = sizeof(struct sctp_send_failed_event);
} else {
- length = sizeof(struct sctp_send_failed);
+ notifhdr_len = sizeof(struct sctp_send_failed);
}
- m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(notifhdr_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* no space left */
return;
}
- SCTP_BUF_LEN(m_notify) = 0;
+ SCTP_BUF_LEN(m_notify) = notifhdr_len;
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
ssfe = mtod(m_notify, struct sctp_send_failed_event *);
- memset(ssfe, 0, length);
+ memset(ssfe, 0, notifhdr_len);
ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
ssfe->ssfe_flags = SCTP_DATA_UNSENT;
- length += sp->length;
- ssfe->ssfe_length = length;
+ ssfe->ssfe_length = (uint32_t) (notifhdr_len + sp->length);
ssfe->ssfe_error = error;
/* not exactly what the user sent in, but should be close :) */
ssfe->ssfe_info.snd_sid = sp->stream;
@@ -2974,14 +3103,12 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
ssfe->ssfe_info.snd_context = sp->context;
ssfe->ssfe_info.snd_assoc_id = sctp_get_associd(stcb);
ssfe->ssfe_assoc_id = sctp_get_associd(stcb);
- SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
} else {
ssf = mtod(m_notify, struct sctp_send_failed *);
- memset(ssf, 0, length);
+ memset(ssf, 0, notifhdr_len);
ssf->ssf_type = SCTP_SEND_FAILED;
ssf->ssf_flags = SCTP_DATA_UNSENT;
- length += sp->length;
- ssf->ssf_length = length;
+ ssf->ssf_length = (uint32_t) (notifhdr_len + sp->length);
ssf->ssf_error = error;
/* not exactly what the user sent in, but should be close :) */
ssf->ssf_info.sinfo_stream = sp->stream;
@@ -2995,7 +3122,6 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
ssf->ssf_info.sinfo_context = sp->context;
ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
ssf->ssf_assoc_id = sctp_get_associd(stcb);
- SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
}
SCTP_BUF_NEXT(m_notify) = sp->data;
@@ -3039,7 +3165,7 @@ sctp_notify_adaptation_layer(struct sctp_tcb *stcb)
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
@@ -3095,7 +3221,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
@@ -3206,7 +3332,7 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb)
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
@@ -3255,7 +3381,7 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb,
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* no space left */
return;
@@ -3307,7 +3433,7 @@ sctp_notify_stream_reset_add(struct sctp_tcb *stcb, uint16_t numberin, uint16_t
return;
}
stcb->asoc.peer_req_out = 0;
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_stream_change_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_stream_change_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
@@ -3357,7 +3483,7 @@ sctp_notify_stream_reset_tsn(struct sctp_tcb *stcb, uint32_t sending_tsn, uint32
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_reset_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_reset_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
@@ -3411,7 +3537,7 @@ sctp_notify_stream_reset(struct sctp_tcb *stcb,
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
@@ -3467,7 +3593,8 @@ sctp_notify_remote_error(struct sctp_tcb *stcb, uint16_t error, struct sctp_erro
struct mbuf *m_notify;
struct sctp_remote_error *sre;
struct sctp_queued_to_read *control;
- size_t notif_len, chunk_len;
+ unsigned int notif_len;
+ uint16_t chunk_len;
if ((stcb == NULL) ||
sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPEERERR)) {
@@ -3478,18 +3605,19 @@ sctp_notify_remote_error(struct sctp_tcb *stcb, uint16_t error, struct sctp_erro
} else {
chunk_len = 0;
}
- notif_len = sizeof(struct sctp_remote_error) + chunk_len;
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ notif_len = (unsigned int)(sizeof(struct sctp_remote_error) + chunk_len);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* Retry with smaller value. */
- notif_len = sizeof(struct sctp_remote_error);
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ notif_len = (unsigned int)sizeof(struct sctp_remote_error);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
return;
}
}
SCTP_BUF_NEXT(m_notify) = NULL;
sre = mtod(m_notify, struct sctp_remote_error *);
+ memset(sre, 0, notif_len);
sre->sre_type = SCTP_REMOTE_ERROR;
sre->sre_flags = 0;
sre->sre_length = sizeof(struct sctp_remote_error);
@@ -3554,7 +3682,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
sctp_notify_adaptation_layer(stcb);
}
- if (stcb->asoc.peer_supports_auth == 0) {
+ if (stcb->asoc.auth_supported == 0) {
sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
NULL, so_locked);
}
@@ -3568,7 +3696,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
net = (struct sctp_nets *)data;
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_UNREACHABLE,
- (struct sockaddr *)&net->ro._l_addr, error);
+ (struct sockaddr *)&net->ro._l_addr, error, so_locked);
break;
}
case SCTP_NOTIFY_INTERFACE_UP:
@@ -3577,7 +3705,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
net = (struct sctp_nets *)data;
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_AVAILABLE,
- (struct sockaddr *)&net->ro._l_addr, error);
+ (struct sockaddr *)&net->ro._l_addr, error, so_locked);
break;
}
case SCTP_NOTIFY_INTERFACE_CONFIRMED:
@@ -3586,7 +3714,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
net = (struct sctp_nets *)data;
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_CONFIRMED,
- (struct sockaddr *)&net->ro._l_addr, error);
+ (struct sockaddr *)&net->ro._l_addr, error, so_locked);
break;
}
case SCTP_NOTIFY_SPECIAL_SP_FAIL:
@@ -3628,7 +3756,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
break;
case SCTP_NOTIFY_ASSOC_RESTART:
sctp_notify_assoc_change(SCTP_RESTART, stcb, error, NULL, 0, so_locked);
- if (stcb->asoc.peer_supports_auth == 0) {
+ if (stcb->asoc.auth_supported == 0) {
sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
NULL, so_locked);
}
@@ -3657,15 +3785,15 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
break;
case SCTP_NOTIFY_ASCONF_ADD_IP:
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
- error);
+ error, so_locked);
break;
case SCTP_NOTIFY_ASCONF_DELETE_IP:
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_REMOVED, data,
- error);
+ error, so_locked);
break;
case SCTP_NOTIFY_ASCONF_SET_PRIMARY:
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
- error);
+ error, so_locked);
break;
case SCTP_NOTIFY_PEER_SHUTDOWN:
sctp_notify_shutdown_event(stcb);
@@ -3693,7 +3821,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
break;
default:
SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
- __FUNCTION__, notification, notification);
+ __func__, notification, notification);
break;
} /* end switch */
}
@@ -3780,10 +3908,10 @@ sctp_report_all_outbound(struct sctp_tcb *stcb, uint16_t error, int holds_lock,
/* For each stream */
outs = &asoc->strmout[i];
/* clean up any sends there */
- asoc->locked_on_sending = NULL;
TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
- asoc->stream_queue_cnt--;
+ atomic_subtract_int(&asoc->stream_queue_cnt, 1);
TAILQ_REMOVE(&outs->outqueue, sp, next);
+ stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, holds_lock);
sctp_free_spbufspace(stcb, asoc, sp);
if (sp->data) {
sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
@@ -3845,7 +3973,7 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
struct mbuf *m, int iphlen,
struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct mbuf *op_err,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
uint32_t vtag;
@@ -3865,7 +3993,7 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
}
sctp_send_abort(m, iphlen, src, dst, sh, vtag, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
if (stcb != NULL) {
/* Ok, now lets free it */
@@ -3882,7 +4010,8 @@ sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
(SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
@@ -4006,7 +4135,8 @@ sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
atomic_subtract_int(&stcb->asoc.refcnt, 1);
}
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
@@ -4019,7 +4149,7 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_inpcb *inp,
struct mbuf *cause,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
struct sctp_chunkhdr *ch, chunk_buf;
@@ -4061,7 +4191,7 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
return;
case SCTP_SHUTDOWN_ACK:
sctp_send_shutdown_complete2(src, dst, sh,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return;
default:
@@ -4075,7 +4205,7 @@ sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
(contains_init_chunk == 0))) {
sctp_send_abort(m, iphlen, src, dst, sh, 0, cause,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
}
}
@@ -4342,6 +4472,49 @@ sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
}
void
+sctp_wakeup_the_read_socket(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ if ((inp != NULL) && (inp->sctp_socket != NULL)) {
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+ SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+ } else {
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(inp);
+ if (!so_locked) {
+ if (stcb) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_SOCKET_LOCK(so, 1);
+ if (stcb) {
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+ }
+#endif
+ sctp_sorwakeup(inp, inp->sctp_socket);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ }
+#endif
+ }
+ }
+}
+
+void
sctp_add_to_readq(struct sctp_inpcb *inp,
struct sctp_tcb *stcb,
struct sctp_queued_to_read *control,
@@ -4376,7 +4549,7 @@ sctp_add_to_readq(struct sctp_inpcb *inp,
sctp_m_freem(control->data);
control->data = NULL;
}
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), control);
+ sctp_free_a_readq(stcb, control);
if (inp_read_lock_held == 0)
SCTP_INP_READ_UNLOCK(inp);
return;
@@ -4422,7 +4595,7 @@ sctp_add_to_readq(struct sctp_inpcb *inp,
} else {
/* Everything got collapsed out?? */
sctp_free_remote_addr(control->whoFrom);
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), control);
+ sctp_free_a_readq(stcb, control);
if (inp_read_lock_held == 0)
SCTP_INP_READ_UNLOCK(inp);
return;
@@ -4431,195 +4604,14 @@ sctp_add_to_readq(struct sctp_inpcb *inp,
control->end_added = 1;
}
TAILQ_INSERT_TAIL(&inp->read_queue, control, next);
+ control->on_read_q = 1;
if (inp_read_lock_held == 0)
SCTP_INP_READ_UNLOCK(inp);
if (inp && inp->sctp_socket) {
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
- SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
- } else {
-#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- struct socket *so;
-
- so = SCTP_INP_SO(inp);
- if (!so_locked) {
- if (stcb) {
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- }
- SCTP_SOCKET_LOCK(so, 1);
- if (stcb) {
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
- }
- if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
- SCTP_SOCKET_UNLOCK(so, 1);
- return;
- }
- }
-#endif
- sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- if (!so_locked) {
- SCTP_SOCKET_UNLOCK(so, 1);
- }
-#endif
- }
+ sctp_wakeup_the_read_socket(inp, stcb, so_locked);
}
}
-
-int
-sctp_append_to_readq(struct sctp_inpcb *inp,
- struct sctp_tcb *stcb,
- struct sctp_queued_to_read *control,
- struct mbuf *m,
- int end,
- int ctls_cumack,
- struct sockbuf *sb)
-{
- /*
- * A partial delivery API event is underway. OR we are appending on
- * the reassembly queue.
- *
- * If PDAPI this means we need to add m to the end of the data.
- * Increase the length in the control AND increment the sb_cc.
- * Otherwise sb is NULL and all we need to do is put it at the end
- * of the mbuf chain.
- */
- int len = 0;
- struct mbuf *mm, *tail = NULL, *prev = NULL;
-
- if (inp) {
- SCTP_INP_READ_LOCK(inp);
- }
- if (control == NULL) {
-get_out:
- if (inp) {
- SCTP_INP_READ_UNLOCK(inp);
- }
- return (-1);
- }
- if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ)) {
- SCTP_INP_READ_UNLOCK(inp);
- return (0);
- }
- if (control->end_added) {
- /* huh this one is complete? */
- goto get_out;
- }
- mm = m;
- if (mm == NULL) {
- goto get_out;
- }
- while (mm) {
- if (SCTP_BUF_LEN(mm) == 0) {
- /* Skip mbufs with NO lenght */
- if (prev == NULL) {
- /* First one */
- m = sctp_m_free(mm);
- mm = m;
- } else {
- SCTP_BUF_NEXT(prev) = sctp_m_free(mm);
- mm = SCTP_BUF_NEXT(prev);
- }
- continue;
- }
- prev = mm;
- len += SCTP_BUF_LEN(mm);
- if (sb) {
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
- sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(mm));
- }
- sctp_sballoc(stcb, sb, mm);
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SB_LOGGING_ENABLE) {
- sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
- }
- }
- mm = SCTP_BUF_NEXT(mm);
- }
- if (prev) {
- tail = prev;
- } else {
- /* Really there should always be a prev */
- if (m == NULL) {
- /* Huh nothing left? */
-#ifdef INVARIANTS
- panic("Nothing left to add?");
-#else
- goto get_out;
-#endif
- }
- tail = m;
- }
- if (control->tail_mbuf) {
- /* append */
- SCTP_BUF_NEXT(control->tail_mbuf) = m;
- control->tail_mbuf = tail;
- } else {
- /* nothing there */
-#ifdef INVARIANTS
- if (control->data != NULL) {
- panic("This should NOT happen");
- }
-#endif
- control->data = m;
- control->tail_mbuf = tail;
- }
- atomic_add_int(&control->length, len);
- if (end) {
- /* message is complete */
- if (stcb && (control == stcb->asoc.control_pdapi)) {
- stcb->asoc.control_pdapi = NULL;
- }
- control->held_length = 0;
- control->end_added = 1;
- }
- if (stcb == NULL) {
- control->do_not_ref_stcb = 1;
- }
- /*
- * When we are appending in partial delivery, the cum-ack is used
- * for the actual pd-api highest tsn on this mbuf. The true cum-ack
- * is populated in the outbound sinfo structure from the true cumack
- * if the association exists...
- */
- control->sinfo_tsn = control->sinfo_cumtsn = ctls_cumack;
- if (inp) {
- SCTP_INP_READ_UNLOCK(inp);
- }
- if (inp && inp->sctp_socket) {
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
- SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
- } else {
-#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- struct socket *so;
-
- so = SCTP_INP_SO(inp);
- if (stcb) {
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- }
- SCTP_SOCKET_LOCK(so, 1);
- if (stcb) {
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
- }
- if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
- SCTP_SOCKET_UNLOCK(so, 1);
- return (0);
- }
-#endif
- sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- SCTP_SOCKET_UNLOCK(so, 1);
-#endif
- }
- }
- return (0);
-}
-
-
-
/*************HOLD THIS COMMENT FOR PATCH FILE OF
*************ALTERNATE ROUTING CODE
*/
@@ -4633,19 +4625,23 @@ sctp_generate_cause(uint16_t code, char *info)
{
struct mbuf *m;
struct sctp_gen_error_cause *cause;
- size_t info_len, len;
+ size_t info_len;
+ uint16_t len;
if ((code == 0) || (info == NULL)) {
return (NULL);
}
info_len = strlen(info);
- len = sizeof(struct sctp_paramhdr) + info_len;
+ if (info_len > (SCTP_MAX_CAUSE_LENGTH - sizeof(struct sctp_paramhdr))) {
+ return (NULL);
+ }
+ len = (uint16_t) (sizeof(struct sctp_paramhdr) + info_len);
m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
if (m != NULL) {
SCTP_BUF_LEN(m) = len;
cause = mtod(m, struct sctp_gen_error_cause *);
cause->code = htons(code);
- cause->length = htons((uint16_t) len);
+ cause->length = htons(len);
memcpy(cause->info, info, info_len);
}
return (m);
@@ -4656,15 +4652,15 @@ sctp_generate_no_user_data_cause(uint32_t tsn)
{
struct mbuf *m;
struct sctp_error_no_user_data *no_user_data_cause;
- size_t len;
+ uint16_t len;
- len = sizeof(struct sctp_error_no_user_data);
+ len = (uint16_t) sizeof(struct sctp_error_no_user_data);
m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
if (m != NULL) {
SCTP_BUF_LEN(m) = len;
no_user_data_cause = mtod(m, struct sctp_error_no_user_data *);
no_user_data_cause->cause.code = htons(SCTP_CAUSE_NO_USER_DATA);
- no_user_data_cause->cause.length = htons((uint16_t) len);
+ no_user_data_cause->cause.length = htons(len);
no_user_data_cause->tsn = tsn; /* tsn is passed in as NBO */
}
return (m);
@@ -4724,6 +4720,21 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1,
stream = tp1->rec.data.stream_number;
seq = tp1->rec.data.stream_seq;
+ if (sent || !(tp1->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG)) {
+ stcb->asoc.abandoned_sent[0]++;
+ stcb->asoc.abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
+ stcb->asoc.strmout[stream].abandoned_sent[0]++;
+#if defined(SCTP_DETAILED_STR_STATS)
+ stcb->asoc.strmout[stream].abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
+#endif
+ } else {
+ stcb->asoc.abandoned_unsent[0]++;
+ stcb->asoc.abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
+ stcb->asoc.strmout[stream].abandoned_unsent[0]++;
+#if defined(SCTP_DETAILED_STR_STATS)
+ stcb->asoc.strmout[stream].abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
+#endif
+ }
do {
ret_sz += tp1->book_size;
if (tp1->data != NULL) {
@@ -4840,28 +4851,48 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1,
goto oh_well;
}
memset(chk, 0, sizeof(*chk));
- chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG;
+ chk->rec.data.rcv_flags = 0;
chk->sent = SCTP_FORWARD_TSN_SKIP;
chk->asoc = &stcb->asoc;
- chk->rec.data.stream_seq = strq->next_sequence_send;
+ if (stcb->asoc.idata_supported == 0) {
+ if (sp->sinfo_flags & SCTP_UNORDERED) {
+ chk->rec.data.stream_seq = 0;
+ } else {
+ chk->rec.data.stream_seq = strq->next_mid_ordered;
+ }
+ } else {
+ if (sp->sinfo_flags & SCTP_UNORDERED) {
+ chk->rec.data.stream_seq = strq->next_mid_unordered;
+ } else {
+ chk->rec.data.stream_seq = strq->next_mid_ordered;
+ }
+ }
chk->rec.data.stream_number = sp->stream;
chk->rec.data.payloadtype = sp->ppid;
chk->rec.data.context = sp->context;
chk->flags = sp->act_flags;
- if (sp->net)
- chk->whoTo = sp->net;
- else
- chk->whoTo = stcb->asoc.primary_destination;
- atomic_add_int(&chk->whoTo->ref_count, 1);
+ chk->whoTo = NULL;
chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1);
- stcb->asoc.pr_sctp_cnt++;
+ strq->chunks_on_queues++;
TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next);
stcb->asoc.sent_queue_cnt++;
stcb->asoc.pr_sctp_cnt++;
+ }
+ chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
+ if (sp->sinfo_flags & SCTP_UNORDERED) {
+ chk->rec.data.rcv_flags |= SCTP_DATA_UNORDERED;
+ }
+ if (stcb->asoc.idata_supported == 0) {
+ if ((sp->sinfo_flags & SCTP_UNORDERED) == 0) {
+ strq->next_mid_ordered++;
+ }
} else {
- chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
+ if (sp->sinfo_flags & SCTP_UNORDERED) {
+ strq->next_mid_unordered++;
+ } else {
+ strq->next_mid_ordered++;
+ }
}
- strq->next_sequence_send++;
oh_well:
if (sp->data) {
/*
@@ -5009,7 +5040,6 @@ sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock)
vrf = sctp_find_vrf(vrf_id);
if (vrf == NULL) {
-stage_right:
if (holds_lock == 0)
SCTP_IPI_ADDR_RUNLOCK();
return (NULL);
@@ -5029,15 +5059,6 @@ stage_right:
return (NULL);
}
LIST_FOREACH(sctp_ifap, hash_head, next_bucket) {
- if (sctp_ifap == NULL) {
-#ifdef INVARIANTS
- panic("Huh LIST_FOREACH corrupt");
- goto stage_right;
-#else
- SCTP_PRINTF("LIST corrupt of sctp_ifap's?\n");
- goto stage_right;
-#endif
- }
if (addr->sa_family != sctp_ifap->address.sa.sa_family)
continue;
#ifdef INET
@@ -5136,7 +5157,8 @@ sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t * freed_so_far, int hold_rlock,
sctp_chunk_output(stcb->sctp_ep, stcb,
SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED);
/* make sure no timer is running */
- sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
SCTP_TCB_UNLOCK(stcb);
} else {
/* Update how much we have pending */
@@ -5187,7 +5209,7 @@ sctp_sorecvmsg(struct socket *so,
uint32_t rwnd_req = 0;
int hold_sblock = 0;
int hold_rlock = 0;
- int slen = 0;
+ ssize_t slen = 0;
uint32_t held_length = 0;
int sockbuf_lock = 0;
@@ -5232,11 +5254,11 @@ sctp_sorecvmsg(struct socket *so,
in_eeor_mode = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
sctp_misc_ints(SCTP_SORECV_ENTER,
- rwnd_req, in_eeor_mode, so->so_rcv.sb_cc, uio->uio_resid);
+ rwnd_req, in_eeor_mode, so->so_rcv.sb_cc, (uint32_t) uio->uio_resid);
}
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
sctp_misc_ints(SCTP_SORECV_ENTERPL,
- rwnd_req, block_allowed, so->so_rcv.sb_cc, uio->uio_resid);
+ rwnd_req, block_allowed, so->so_rcv.sb_cc, (uint32_t) uio->uio_resid);
}
error = sblock(&so->so_rcv, (block_allowed ? SBL_WAIT : 0));
if (error) {
@@ -5269,8 +5291,14 @@ restart_nosblocks:
}
}
}
- if ((so->so_rcv.sb_cc <= held_length) && block_allowed) {
- /* we need to wait for data */
+ if (so->so_rcv.sb_cc <= held_length) {
+ if (so->so_error) {
+ error = so->so_error;
+ if ((in_flags & MSG_PEEK) == 0) {
+ so->so_error = 0;
+ }
+ goto out;
+ }
if ((so->so_rcv.sb_cc == 0) &&
((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
@@ -5301,51 +5329,18 @@ restart_nosblocks:
goto out;
}
}
- error = sbwait(&so->so_rcv);
- if (error) {
- goto out;
- }
- held_length = 0;
- goto restart_nosblocks;
- } else if (so->so_rcv.sb_cc == 0) {
- if (so->so_error) {
- error = so->so_error;
- if ((in_flags & MSG_PEEK) == 0)
- so->so_error = 0;
- } else {
- if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
- (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
- if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
- /*
- * For active open side clear flags
- * for re-use passive open is
- * blocked by connect.
- */
- if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
- /*
- * You were aborted, passive
- * side always hits here
- */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
- error = ECONNRESET;
- }
- so->so_state &= ~(SS_ISCONNECTING |
- SS_ISDISCONNECTING |
- SS_ISCONFIRMING |
- SS_ISCONNECTED);
- if (error == 0) {
- if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
- error = ENOTCONN;
- }
- }
- goto out;
- }
+ if (block_allowed) {
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ goto out;
}
+ held_length = 0;
+ goto restart_nosblocks;
+ } else {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EWOULDBLOCK);
error = EWOULDBLOCK;
+ goto out;
}
- goto out;
}
if (hold_sblock == 1) {
SOCKBUF_UNLOCK(&so->so_rcv);
@@ -5438,6 +5433,12 @@ restart_nosblocks:
sctp_m_free(control->aux_data);
control->aux_data = NULL;
}
+#ifdef INVARIANTS
+ if (control->on_strm_q) {
+ panic("About to free ctl:%p so:%p and its in %d",
+ control, so, control->on_strm_q);
+ }
+#endif
sctp_free_remote_addr(control->whoFrom);
sctp_free_a_readq(stcb, control);
if (hold_rlock) {
@@ -5498,20 +5499,16 @@ restart_nosblocks:
}
/* Clear the held length since there is something to read */
control->held_length = 0;
- if (hold_rlock) {
- SCTP_INP_READ_UNLOCK(inp);
- hold_rlock = 0;
- }
found_one:
/*
* If we reach here, control has a some data for us to read off.
* Note that stcb COULD be NULL.
*/
- control->some_taken++;
- if (hold_sblock) {
- SOCKBUF_UNLOCK(&so->so_rcv);
- hold_sblock = 0;
+ if (hold_rlock == 0) {
+ hold_rlock = 1;
+ SCTP_INP_READ_LOCK(inp);
}
+ control->some_taken++;
stcb = control->stcb;
if (stcb) {
if ((control->do_not_ref_stcb == 0) &&
@@ -5556,8 +5553,16 @@ found_one:
stcb->asoc.strmin[control->sinfo_stream].delivery_started = 1;
}
/* First lets get off the sinfo and sockaddr info */
- if ((sinfo) && filling_sinfo) {
- memcpy(sinfo, control, sizeof(struct sctp_nonpad_sndrcvinfo));
+ if ((sinfo != NULL) && (filling_sinfo != 0)) {
+ sinfo->sinfo_stream = control->sinfo_stream;
+ sinfo->sinfo_ssn = (uint16_t) control->sinfo_ssn;
+ sinfo->sinfo_flags = control->sinfo_flags;
+ sinfo->sinfo_ppid = control->sinfo_ppid;
+ sinfo->sinfo_context = control->sinfo_context;
+ sinfo->sinfo_timetolive = control->sinfo_timetolive;
+ sinfo->sinfo_tsn = control->sinfo_tsn;
+ sinfo->sinfo_cumtsn = control->sinfo_cumtsn;
+ sinfo->sinfo_assoc_id = control->sinfo_assoc_id;
nxt = TAILQ_NEXT(control, next);
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) ||
sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) {
@@ -5566,20 +5571,20 @@ found_one:
s_extra = (struct sctp_extrcvinfo *)sinfo;
if ((nxt) &&
(nxt->length)) {
- s_extra->sreinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
+ s_extra->serinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
if (nxt->sinfo_flags & SCTP_UNORDERED) {
- s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
+ s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
}
if (nxt->spec_flags & M_NOTIFICATION) {
- s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
+ s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
}
- s_extra->sreinfo_next_aid = nxt->sinfo_assoc_id;
- s_extra->sreinfo_next_length = nxt->length;
- s_extra->sreinfo_next_ppid = nxt->sinfo_ppid;
- s_extra->sreinfo_next_stream = nxt->sinfo_stream;
+ s_extra->serinfo_next_aid = nxt->sinfo_assoc_id;
+ s_extra->serinfo_next_length = nxt->length;
+ s_extra->serinfo_next_ppid = nxt->sinfo_ppid;
+ s_extra->serinfo_next_stream = nxt->sinfo_stream;
if (nxt->tail_mbuf != NULL) {
if (nxt->end_added) {
- s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
+ s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
}
}
} else {
@@ -5590,11 +5595,11 @@ found_one:
* :-D
*/
nxt = NULL;
- s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
- s_extra->sreinfo_next_aid = 0;
- s_extra->sreinfo_next_length = 0;
- s_extra->sreinfo_next_ppid = 0;
- s_extra->sreinfo_next_stream = 0;
+ s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
+ s_extra->serinfo_next_aid = 0;
+ s_extra->serinfo_next_length = 0;
+ s_extra->serinfo_next_ppid = 0;
+ s_extra->serinfo_next_stream = 0;
}
}
/*
@@ -5631,43 +5636,43 @@ found_one:
entry->flgs = control->sinfo_flags;
}
#endif
- if (fromlen && from) {
- cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sa.sa_len);
+ if ((fromlen > 0) && (from != NULL)) {
+ union sctp_sockstore store;
+ size_t len;
+
switch (control->whoFrom->ro._l_addr.sa.sa_family) {
#ifdef INET6
case AF_INET6:
- ((struct sockaddr_in6 *)from)->sin6_port = control->port_from;
+ len = sizeof(struct sockaddr_in6);
+ store.sin6 = control->whoFrom->ro._l_addr.sin6;
+ store.sin6.sin6_port = control->port_from;
break;
#endif
#ifdef INET
case AF_INET:
- ((struct sockaddr_in *)from)->sin_port = control->port_from;
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ len = sizeof(struct sockaddr_in6);
+ in6_sin_2_v4mapsin6(&control->whoFrom->ro._l_addr.sin,
+ &store.sin6);
+ store.sin6.sin6_port = control->port_from;
+ } else {
+ len = sizeof(struct sockaddr_in);
+ store.sin = control->whoFrom->ro._l_addr.sin;
+ store.sin.sin_port = control->port_from;
+ }
+#else
+ len = sizeof(struct sockaddr_in);
+ store.sin = control->whoFrom->ro._l_addr.sin;
+ store.sin.sin_port = control->port_from;
+#endif
break;
#endif
default:
+ len = 0;
break;
}
- memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
-
-#if defined(INET) && defined(INET6)
- if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
- (from->sa_family == AF_INET) &&
- ((size_t)fromlen >= sizeof(struct sockaddr_in6))) {
- struct sockaddr_in *sin;
- struct sockaddr_in6 sin6;
-
- sin = (struct sockaddr_in *)from;
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- bcopy(&sin->sin_addr,
- &sin6.sin6_addr.s6_addr32[3],
- sizeof(sin6.sin6_addr.s6_addr32[3]));
- sin6.sin6_port = sin->sin_port;
- memcpy(from, &sin6, sizeof(struct sockaddr_in6));
- }
-#endif
+ memcpy(from, &store, min((size_t)fromlen, len));
#ifdef INET6
{
struct sockaddr_in6 lsa6, *from6;
@@ -5677,6 +5682,14 @@ found_one:
}
#endif
}
+ if (hold_rlock) {
+ SCTP_INP_READ_UNLOCK(inp);
+ hold_rlock = 0;
+ }
+ if (hold_sblock) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ hold_sblock = 0;
+ }
/* now copy out what data we can */
if (mp == NULL) {
/* copy out each mbuf in the chain up to length */
@@ -5708,15 +5721,8 @@ get_more_data:
/* error we are out of here */
goto release;
}
- if ((SCTP_BUF_NEXT(m) == NULL) &&
- (cp_len >= SCTP_BUF_LEN(m)) &&
- ((control->end_added == 0) ||
- (control->end_added &&
- (TAILQ_NEXT(control, next) == NULL)))
- ) {
- SCTP_INP_READ_LOCK(inp);
- hold_rlock = 1;
- }
+ SCTP_INP_READ_LOCK(inp);
+ hold_rlock = 1;
if (cp_len == SCTP_BUF_LEN(m)) {
if ((SCTP_BUF_NEXT(m) == NULL) &&
(control->end_added)) {
@@ -5834,19 +5840,9 @@ get_more_data:
#endif
}
done_with_control:
- if (TAILQ_NEXT(control, next) == NULL) {
- /*
- * If we don't have a next we need a
- * lock, if there is a next
- * interrupt is filling ahead of us
- * and we don't need a lock to
- * remove this guy (which is the
- * head of the queue).
- */
- if (hold_rlock == 0) {
- SCTP_INP_READ_LOCK(inp);
- hold_rlock = 1;
- }
+ if (hold_rlock == 0) {
+ SCTP_INP_READ_LOCK(inp);
+ hold_rlock = 1;
}
TAILQ_REMOVE(&inp->read_queue, control, next);
/* Add back any hiddend data */
@@ -5862,6 +5858,12 @@ get_more_data:
no_rcv_needed = control->do_not_ref_stcb;
sctp_free_remote_addr(control->whoFrom);
control->data = NULL;
+#ifdef INVARIANTS
+ if (control->on_strm_q) {
+ panic("About to free ctl:%p so:%p and its in %d",
+ control, so, control->on_strm_q);
+ }
+#endif
sctp_free_a_readq(stcb, control);
control = NULL;
if ((freed_so_far >= rwnd_req) &&
@@ -6077,7 +6079,7 @@ out:
struct sctp_extrcvinfo *s_extra;
s_extra = (struct sctp_extrcvinfo *)sinfo;
- s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+ s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
}
if (hold_rlock == 1) {
SCTP_INP_READ_UNLOCK(inp);
@@ -6103,21 +6105,21 @@ out:
goto stage_left;
#endif
}
- atomic_add_int(&stcb->asoc.refcnt, -1);
/* Save the value back for next time */
stcb->freed_by_sorcv_sincelast = freed_so_far;
+ atomic_add_int(&stcb->asoc.refcnt, -1);
}
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_RECV_RWND_LOGGING_ENABLE) {
if (stcb) {
sctp_misc_ints(SCTP_SORECV_DONE,
freed_so_far,
- ((uio) ? (slen - uio->uio_resid) : slen),
+ (uint32_t) ((uio) ? (slen - uio->uio_resid) : slen),
stcb->asoc.my_rwnd,
so->so_rcv.sb_cc);
} else {
sctp_misc_ints(SCTP_SORECV_DONE,
freed_so_far,
- ((uio) ? (slen - uio->uio_resid) : slen),
+ (uint32_t) ((uio) ? (slen - uio->uio_resid) : slen),
0,
so->so_rcv.sb_cc);
}
@@ -6135,9 +6137,7 @@ struct mbuf *
sctp_m_free(struct mbuf *m)
{
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- if (SCTP_BUF_IS_EXTENDED(m)) {
- sctp_log_mb(m, SCTP_MBUF_IFREE);
- }
+ sctp_log_mb(m, SCTP_MBUF_IFREE);
}
return (m_free(m));
}
@@ -6296,14 +6296,18 @@ sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
(sin->sin_addr.s_addr == INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_7);
*error = EINVAL;
goto out_now;
}
- if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+ if (sctp_add_remote_addr(stcb, sa, NULL, stcb->asoc.port,
+ SCTP_DONOT_SETSCOPE,
+ SCTP_ADDR_IS_CONFIRMED)) {
/* assoc gone no un-lock */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_8);
*error = ENOBUFS;
goto out_now;
}
@@ -6317,14 +6321,18 @@ sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_9);
*error = EINVAL;
goto out_now;
}
- if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+ if (sctp_add_remote_addr(stcb, sa, NULL, stcb->asoc.port,
+ SCTP_DONOT_SETSCOPE,
+ SCTP_ADDR_IS_CONFIRMED)) {
/* assoc gone no un-lock */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_10);
*error = ENOBUFS;
goto out_now;
}
@@ -6342,30 +6350,30 @@ out_now:
struct sctp_tcb *
sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
- int *totaddr, int *num_v4, int *num_v6, int *error,
- int limit, int *bad_addr)
+ unsigned int *totaddr,
+ unsigned int *num_v4, unsigned int *num_v6, int *error,
+ unsigned int limit, int *bad_addr)
{
struct sockaddr *sa;
struct sctp_tcb *stcb = NULL;
- size_t incr, at, i;
+ unsigned int incr, at, i;
- at = incr = 0;
+ at = 0;
sa = addr;
-
*error = *num_v6 = *num_v4 = 0;
/* account and validate addresses */
- for (i = 0; i < (size_t)*totaddr; i++) {
+ for (i = 0; i < *totaddr; i++) {
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
- (*num_v4) += 1;
- incr = sizeof(struct sockaddr_in);
+ incr = (unsigned int)sizeof(struct sockaddr_in);
if (sa->sa_len != incr) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
*bad_addr = 1;
return (NULL);
}
+ (*num_v4) += 1;
break;
#endif
#ifdef INET6
@@ -6381,14 +6389,14 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
*bad_addr = 1;
return (NULL);
}
- (*num_v6) += 1;
- incr = sizeof(struct sockaddr_in6);
+ incr = (unsigned int)sizeof(struct sockaddr_in6);
if (sa->sa_len != incr) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
*bad_addr = 1;
return (NULL);
}
+ (*num_v6) += 1;
break;
}
#endif
@@ -6397,7 +6405,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
/* we are done */
break;
}
- if (i == (size_t)*totaddr) {
+ if (i == *totaddr) {
break;
}
SCTP_INP_INCR_REF(inp);
@@ -6408,7 +6416,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
} else {
SCTP_INP_DECR_REF(inp);
}
- if ((at + incr) > (size_t)limit) {
+ if ((at + incr) > limit) {
*totaddr = i;
break;
}
@@ -6428,7 +6436,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
{
struct sockaddr *addr_touse;
-#ifdef INET6
+#if defined(INET) && defined(INET6)
struct sockaddr_in sin;
#endif
@@ -6442,8 +6450,10 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
addr_touse = sa;
#ifdef INET6
if (sa->sa_family == AF_INET6) {
+#ifdef INET
struct sockaddr_in6 *sin6;
+#endif
if (sa->sa_len != sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
@@ -6455,6 +6465,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
*error = EINVAL;
return;
}
+#ifdef INET
sin6 = (struct sockaddr_in6 *)addr_touse;
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
@@ -6467,6 +6478,7 @@ sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
in6_sin6_2_sin(&sin, sin6);
addr_touse = (struct sockaddr *)&sin;
}
+#endif
}
#endif
#ifdef INET
@@ -6556,7 +6568,7 @@ sctp_bindx_delete_address(struct sctp_inpcb *inp,
{
struct sockaddr *addr_touse;
-#ifdef INET6
+#if defined(INET) && defined(INET6)
struct sockaddr_in sin;
#endif
@@ -6570,8 +6582,11 @@ sctp_bindx_delete_address(struct sctp_inpcb *inp,
addr_touse = sa;
#ifdef INET6
if (sa->sa_family == AF_INET6) {
+#ifdef INET
struct sockaddr_in6 *sin6;
+#endif
+
if (sa->sa_len != sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
@@ -6583,6 +6598,7 @@ sctp_bindx_delete_address(struct sctp_inpcb *inp,
*error = EINVAL;
return;
}
+#ifdef INET
sin6 = (struct sockaddr_in6 *)addr_touse;
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
@@ -6595,6 +6611,7 @@ sctp_bindx_delete_address(struct sctp_inpcb *inp,
in6_sin6_2_sin(&sin, sin6);
addr_touse = (struct sockaddr *)&sin;
}
+#endif
}
#endif
#ifdef INET
@@ -6688,7 +6705,7 @@ sctp_local_addr_count(struct sctp_tcb *stcb)
if (ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/*
* skip unspecified
@@ -6716,7 +6733,7 @@ sctp_local_addr_count(struct sctp_tcb *stcb)
if (ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
continue;
}
@@ -6810,7 +6827,8 @@ sctp_log_trace(uint32_t subsys, const char *str SCTP_UNUSED, uint32_t a, uint32_
#endif
static void
-sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *ignored)
+sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
+ const struct sockaddr *sa SCTP_UNUSED, void *ctx SCTP_UNUSED)
{
struct ip *iph;
@@ -6834,7 +6852,7 @@ sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *ignored)
* Split out the mbuf chain. Leave the IP header in m, place the
* rest in the sp.
*/
- sp = m_split(m, off, M_DONTWAIT);
+ sp = m_split(m, off, M_NOWAIT);
if (sp == NULL) {
/* Gak, drop packet, we can't do a split */
goto out;
@@ -6857,11 +6875,23 @@ sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *ignored)
for (last = m; last->m_next; last = last->m_next);
last->m_next = sp;
m->m_pkthdr.len += sp->m_pkthdr.len;
+ /*
+ * The CSUM_DATA_VALID flags indicates that the HW checked the UDP
+ * checksum and it was valid. Since CSUM_DATA_VALID ==
+ * CSUM_SCTP_VALID this would imply that the HW also verified the
+ * SCTP checksum. Therefore, clear the bit.
+ */
+ SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+ "sctp_recv_udp_tunneled_packet(): Packet of length %d received on %s with csum_flags 0x%b.\n",
+ m->m_pkthdr.len,
+ if_name(m->m_pkthdr.rcvif),
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+ m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
iph = mtod(m, struct ip *);
switch (iph->ip_v) {
#ifdef INET
case IPVERSION:
- iph->ip_len -= sizeof(struct udphdr);
+ iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
sctp_input_with_port(m, off, port);
break;
#endif
@@ -6881,6 +6911,259 @@ out:
m_freem(m);
}
+#ifdef INET
+static void
+sctp_recv_icmp_tunneled_packet(int cmd, struct sockaddr *sa, void *vip, void *ctx SCTP_UNUSED)
+{
+ struct ip *outer_ip, *inner_ip;
+ struct sctphdr *sh;
+ struct icmp *icmp;
+ struct udphdr *udp;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
+ struct sctp_init_chunk *ch;
+ struct sockaddr_in src, dst;
+ uint8_t type, code;
+
+ inner_ip = (struct ip *)vip;
+ icmp = (struct icmp *)((caddr_t)inner_ip -
+ (sizeof(struct icmp) - sizeof(struct ip)));
+ outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip));
+ if (ntohs(outer_ip->ip_len) <
+ sizeof(struct ip) + 8 + (inner_ip->ip_hl << 2) + sizeof(struct udphdr) + 8) {
+ return;
+ }
+ udp = (struct udphdr *)((caddr_t)inner_ip + (inner_ip->ip_hl << 2));
+ sh = (struct sctphdr *)(udp + 1);
+ memset(&src, 0, sizeof(struct sockaddr_in));
+ src.sin_family = AF_INET;
+ src.sin_len = sizeof(struct sockaddr_in);
+ src.sin_port = sh->src_port;
+ src.sin_addr = inner_ip->ip_src;
+ memset(&dst, 0, sizeof(struct sockaddr_in));
+ dst.sin_family = AF_INET;
+ dst.sin_len = sizeof(struct sockaddr_in);
+ dst.sin_port = sh->dest_port;
+ dst.sin_addr = inner_ip->ip_dst;
+ /*
+ * 'dst' holds the dest of the packet that failed to be sent. 'src'
+ * holds our local endpoint address. Thus we reverse the dst and the
+ * src in the lookup.
+ */
+ inp = NULL;
+ net = NULL;
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
+ (struct sockaddr *)&src,
+ &inp, &net, 1,
+ SCTP_DEFAULT_VRFID);
+ if ((stcb != NULL) &&
+ (net != NULL) &&
+ (inp != NULL)) {
+ /* Check the UDP port numbers */
+ if ((udp->uh_dport != net->port) ||
+ (udp->uh_sport != htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)))) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ /* Check the verification tag */
+ if (ntohl(sh->v_tag) != 0) {
+ /*
+ * This must be the verification tag used for
+ * sending out packets. We don't consider packets
+ * reflecting the verification tag.
+ */
+ if (ntohl(sh->v_tag) != stcb->asoc.peer_vtag) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ } else {
+ if (ntohs(outer_ip->ip_len) >=
+ sizeof(struct ip) +
+ 8 + (inner_ip->ip_hl << 2) + 8 + 20) {
+ /*
+ * In this case we can check if we got an
+ * INIT chunk and if the initiate tag
+ * matches.
+ */
+ ch = (struct sctp_init_chunk *)(sh + 1);
+ if ((ch->ch.chunk_type != SCTP_INITIATION) ||
+ (ntohl(ch->init.initiate_tag) != stcb->asoc.my_vtag)) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ }
+ type = icmp->icmp_type;
+ code = icmp->icmp_code;
+ if ((type == ICMP_UNREACH) &&
+ (code == ICMP_UNREACH_PORT)) {
+ code = ICMP_UNREACH_PROTOCOL;
+ }
+ sctp_notify(inp, stcb, net, type, code,
+ ntohs(inner_ip->ip_len),
+ ntohs(icmp->icmp_nextmtu));
+ } else {
+ if ((stcb == NULL) && (inp != NULL)) {
+ /* reduce ref-count */
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ }
+ return;
+}
+
+#endif
+
+#ifdef INET6
+static void
+sctp_recv_icmp6_tunneled_packet(int cmd, struct sockaddr *sa, void *d, void *ctx SCTP_UNUSED)
+{
+ struct ip6ctlparam *ip6cp;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
+ struct sctphdr sh;
+ struct udphdr udp;
+ struct sockaddr_in6 src, dst;
+ uint8_t type, code;
+
+ ip6cp = (struct ip6ctlparam *)d;
+ /*
+ * XXX: We assume that when IPV6 is non NULL, M and OFF are valid.
+ */
+ if (ip6cp->ip6c_m == NULL) {
+ return;
+ }
+ /*
+ * Check if we can safely examine the ports and the verification tag
+ * of the SCTP common header.
+ */
+ if (ip6cp->ip6c_m->m_pkthdr.len <
+ ip6cp->ip6c_off + sizeof(struct udphdr) + offsetof(struct sctphdr, checksum)) {
+ return;
+ }
+ /* Copy out the UDP header. */
+ memset(&udp, 0, sizeof(struct udphdr));
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off,
+ sizeof(struct udphdr),
+ (caddr_t)&udp);
+ /* Copy out the port numbers and the verification tag. */
+ memset(&sh, 0, sizeof(struct sctphdr));
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off + sizeof(struct udphdr),
+ sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t),
+ (caddr_t)&sh);
+ memset(&src, 0, sizeof(struct sockaddr_in6));
+ src.sin6_family = AF_INET6;
+ src.sin6_len = sizeof(struct sockaddr_in6);
+ src.sin6_port = sh.src_port;
+ src.sin6_addr = ip6cp->ip6c_ip6->ip6_src;
+ if (in6_setscope(&src.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
+ return;
+ }
+ memset(&dst, 0, sizeof(struct sockaddr_in6));
+ dst.sin6_family = AF_INET6;
+ dst.sin6_len = sizeof(struct sockaddr_in6);
+ dst.sin6_port = sh.dest_port;
+ dst.sin6_addr = ip6cp->ip6c_ip6->ip6_dst;
+ if (in6_setscope(&dst.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
+ return;
+ }
+ inp = NULL;
+ net = NULL;
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
+ (struct sockaddr *)&src,
+ &inp, &net, 1, SCTP_DEFAULT_VRFID);
+ if ((stcb != NULL) &&
+ (net != NULL) &&
+ (inp != NULL)) {
+ /* Check the UDP port numbers */
+ if ((udp.uh_dport != net->port) ||
+ (udp.uh_sport != htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)))) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ /* Check the verification tag */
+ if (ntohl(sh.v_tag) != 0) {
+ /*
+ * This must be the verification tag used for
+ * sending out packets. We don't consider packets
+ * reflecting the verification tag.
+ */
+ if (ntohl(sh.v_tag) != stcb->asoc.peer_vtag) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ } else {
+ if (ip6cp->ip6c_m->m_pkthdr.len >=
+ ip6cp->ip6c_off + sizeof(struct udphdr) +
+ sizeof(struct sctphdr) +
+ sizeof(struct sctp_chunkhdr) +
+ offsetof(struct sctp_init, a_rwnd)) {
+ /*
+ * In this case we can check if we got an
+ * INIT chunk and if the initiate tag
+ * matches.
+ */
+ uint32_t initiate_tag;
+ uint8_t chunk_type;
+
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off +
+ sizeof(struct udphdr) +
+ sizeof(struct sctphdr),
+ sizeof(uint8_t),
+ (caddr_t)&chunk_type);
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off +
+ sizeof(struct udphdr) +
+ sizeof(struct sctphdr) +
+ sizeof(struct sctp_chunkhdr),
+ sizeof(uint32_t),
+ (caddr_t)&initiate_tag);
+ if ((chunk_type != SCTP_INITIATION) ||
+ (ntohl(initiate_tag) != stcb->asoc.my_vtag)) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ }
+ type = ip6cp->ip6c_icmp6->icmp6_type;
+ code = ip6cp->ip6c_icmp6->icmp6_code;
+ if ((type == ICMP6_DST_UNREACH) &&
+ (code == ICMP6_DST_UNREACH_NOPORT)) {
+ type = ICMP6_PARAM_PROB;
+ code = ICMP6_PARAMPROB_NEXTHEADER;
+ }
+ sctp6_notify(inp, stcb, net, type, code,
+ (uint16_t) ntohl(ip6cp->ip6c_icmp6->icmp6_mtu));
+ } else {
+ if ((stcb == NULL) && (inp != NULL)) {
+ /* reduce inp's ref-count */
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ }
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ }
+}
+
+#endif
+
void
sctp_over_udp_stop(void)
{
@@ -6946,7 +7229,9 @@ sctp_over_udp_start(void)
}
/* Call the special UDP hook. */
if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp4_tun_socket),
- sctp_recv_udp_tunneled_packet))) {
+ sctp_recv_udp_tunneled_packet,
+ sctp_recv_icmp_tunneled_packet,
+ NULL))) {
sctp_over_udp_stop();
return (ret);
}
@@ -6970,7 +7255,9 @@ sctp_over_udp_start(void)
}
/* Call the special UDP hook. */
if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp6_tun_socket),
- sctp_recv_udp_tunneled_packet))) {
+ sctp_recv_udp_tunneled_packet,
+ sctp_recv_icmp6_tunneled_packet,
+ NULL))) {
sctp_over_udp_stop();
return (ret);
}
diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h
index af5a0f29..292068af 100644
--- a/freebsd/sys/netinet/sctputil.h
+++ b/freebsd/sys/netinet/sctputil.h
@@ -67,6 +67,9 @@ void
/*
* Function prototypes
*/
+int32_t
+sctp_map_assoc_state(int);
+
uint32_t
sctp_get_ifa_hash_val(struct sockaddr *addr);
@@ -80,7 +83,7 @@ uint32_t sctp_select_initial_TSN(struct sctp_pcb *);
uint32_t sctp_select_a_tag(struct sctp_inpcb *, uint16_t lport, uint16_t rport, int);
-int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t);
+int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t, uint16_t);
void sctp_fill_random_store(struct sctp_pcb *);
@@ -105,6 +108,14 @@ void
sctp_mtu_size_reset(struct sctp_inpcb *, struct sctp_association *, uint32_t);
void
+sctp_wakeup_the_read_socket(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+void
sctp_add_to_readq(struct sctp_inpcb *inp,
struct sctp_tcb *stcb,
struct sctp_queued_to_read *control,
@@ -117,16 +128,6 @@ sctp_add_to_readq(struct sctp_inpcb *inp,
#endif
);
-int
-sctp_append_to_readq(struct sctp_inpcb *inp,
- struct sctp_tcb *stcb,
- struct sctp_queued_to_read *control,
- struct mbuf *m,
- int end,
- int new_cumack,
- struct sockbuf *sb);
-
-
void sctp_iterator_worker(void);
uint32_t sctp_get_prev_mtu(uint32_t);
@@ -147,9 +148,11 @@ struct sctp_paramhdr *
sctp_get_next_param(struct mbuf *, int,
struct sctp_paramhdr *, int);
-int sctp_add_pad_tombuf(struct mbuf *, int);
+struct mbuf *
+ sctp_add_pad_tombuf(struct mbuf *, int);
-int sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+struct mbuf *
+ sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
void
sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
@@ -206,7 +209,7 @@ sctp_handle_ootb(struct mbuf *, int, int,
struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_inpcb *,
struct mbuf *,
- uint8_t, uint32_t,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
int
@@ -215,7 +218,8 @@ sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
struct sctp_tcb *
sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
- int *totaddr, int *num_v4, int *num_v6, int *error, int limit, int *bad_addr);
+ unsigned int *totaddr, unsigned int *num_v4, unsigned int *num_v6,
+ int *error, unsigned int limit, int *bad_addr);
int sctp_is_there_an_abort_here(struct mbuf *, int, uint32_t *);
@@ -276,42 +280,42 @@ sctp_free_bufspace(struct sctp_tcb *, struct sctp_association *,
#define sctp_free_bufspace(stcb, asoc, tp1, chk_cnt) \
do { \
if (tp1->data != NULL) { \
- atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
+ atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
if ((asoc)->total_output_queue_size >= tp1->book_size) { \
atomic_subtract_int(&((asoc)->total_output_queue_size), tp1->book_size); \
} else { \
(asoc)->total_output_queue_size = 0; \
} \
- if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
- (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+ if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) { \
atomic_subtract_int(&((stcb)->sctp_socket->so_snd.sb_cc), tp1->book_size); \
} else { \
stcb->sctp_socket->so_snd.sb_cc = 0; \
} \
} \
- } \
+ } \
} while (0)
#endif
#define sctp_free_spbufspace(stcb, asoc, sp) \
do { \
- if (sp->data != NULL) { \
+ if (sp->data != NULL) { \
if ((asoc)->total_output_queue_size >= sp->length) { \
atomic_subtract_int(&(asoc)->total_output_queue_size, sp->length); \
} else { \
(asoc)->total_output_queue_size = 0; \
} \
- if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
- (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+ if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
if (stcb->sctp_socket->so_snd.sb_cc >= sp->length) { \
atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc,sp->length); \
} else { \
stcb->sctp_socket->so_snd.sb_cc = 0; \
} \
} \
- } \
+ } \
} while (0)
#define sctp_snd_sb_alloc(stcb, sz) \
@@ -347,10 +351,16 @@ void sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t, uint16_t, uint16_t,
void sctp_log_nagle_event(struct sctp_tcb *stcb, int action);
+#ifdef SCTP_MBUF_LOGGING
void
sctp_log_mb(struct mbuf *m, int from);
void
+ sctp_log_mbc(struct mbuf *m, int from);
+
+#endif
+
+void
sctp_sblog(struct sockbuf *sb,
struct sctp_tcb *stcb, int from, int incr);
@@ -365,9 +375,8 @@ void sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc
void sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from);
void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t);
-void sctp_log_block(uint8_t, struct sctp_association *, int);
+void sctp_log_block(uint8_t, struct sctp_association *, size_t);
void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t);
-void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
int sctp_fill_stat_log(void *, size_t *);
void sctp_log_fr(uint32_t, uint32_t, uint32_t, int);
diff --git a/freebsd/sys/netinet/tcp.h b/freebsd/sys/netinet/tcp.h
index fb2f8108..47038104 100644
--- a/freebsd/sys/netinet/tcp.h
+++ b/freebsd/sys/netinet/tcp.h
@@ -97,6 +97,10 @@ struct tcphdr {
#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
#define TCPOLEN_SIGNATURE 18
+#define TCPOPT_FAST_OPEN 34
+#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOLEN_FAST_OPEN_MIN 6
+#define TCPOLEN_FAST_OPEN_MAX 18
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
@@ -161,11 +165,15 @@ struct tcphdr {
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
+#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
-
+#define TCP_FASTOPEN 1025 /* enable TFO / was created via TFO */
+#define TCP_PCAP_OUT 2048 /* number of output packets to keep */
+#define TCP_PCAP_IN 4096 /* number of input packets to keep */
+#define TCP_FUNCTION_BLK 8192 /* Set the tcp function pointers to the specified stack */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
@@ -243,5 +251,11 @@ struct tcp_info {
u_int32_t __tcpi_pad[26]; /* Padding. */
};
#endif
+#define TCP_FUNCTION_NAME_LEN_MAX 32
+
+struct tcp_function_set {
+ char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
+ uint32_t pcbcnt;
+};
#endif /* !_NETINET_TCP_H_ */
diff --git a/freebsd/sys/netinet/tcp_debug.c b/freebsd/sys/netinet/tcp_debug.c
index 2ef9ce43..c5f74182 100644
--- a/freebsd/sys/netinet/tcp_debug.c
+++ b/freebsd/sys/netinet/tcp_debug.c
@@ -177,11 +177,10 @@ tcp_trace(short act, short ostate, struct tcpcb *tp, void *ipgen,
#ifdef INET6
isipv6 ? ntohs(((struct ip6_hdr *)ipgen)->ip6_plen) :
#endif
- ((struct ip *)ipgen)->ip_len;
+ ntohs(((struct ip *)ipgen)->ip_len);
if (act == TA_OUTPUT) {
seq = ntohl(seq);
ack = ntohl(ack);
- len = ntohs((u_short)len);
}
if (act == TA_OUTPUT)
len -= sizeof (struct tcphdr);
diff --git a/freebsd/sys/netinet/tcp_hostcache.c b/freebsd/sys/netinet/tcp_hostcache.c
index 260d161d..4e78b8b2 100644
--- a/freebsd/sys/netinet/tcp_hostcache.c
+++ b/freebsd/sys/netinet/tcp_hostcache.c
@@ -34,8 +34,8 @@
* table to a dedicated structure indexed by the remote IP address. It keeps
* information on the measured TCP parameters of past TCP sessions to allow
* better initial start values to be used with later connections to/from the
- * same source. Depending on the network parameters (delay, bandwidth, max
- * MTU, congestion window) between local and remote sites, this can lead to
+ * same source. Depending on the network parameters (delay, max MTU,
+ * congestion window) between local and remote sites, this can lead to
* significant speed-ups for new TCP connections after the first one.
*
* Due to the tcp_hostcache, all TCP-specific metrics information in the
@@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -118,37 +119,38 @@ static VNET_DEFINE(struct callout, tcp_hc_callout);
static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *);
static struct hc_metrics *tcp_hc_insert(struct in_conninfo *);
static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
+static int sysctl_tcp_hc_purgenow(SYSCTL_HANDLER_ARGS);
static void tcp_hc_purge_internal(int);
static void tcp_hc_purge(void *);
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0,
"TCP Host cache");
-SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
+SYSCTL_UINT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(tcp_hostcache.cache_limit), 0,
"Overall entry limit for hostcache");
-SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
+SYSCTL_UINT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(tcp_hostcache.hashsize), 0,
"Size of TCP hostcache hashtable");
-SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit,
- CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.bucket_limit), 0,
+SYSCTL_UINT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit,
+ CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.bucket_limit), 0,
"Per-bucket hash limit for hostcache");
-SYSCTL_VNET_UINT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD,
+SYSCTL_UINT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcp_hostcache.cache_count), 0,
"Current number of entries in hostcache");
-SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_hostcache.expire), 0,
"Expire time of TCP hostcache entries");
-SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, prune, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, prune, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_hostcache.prune), 0,
"Time between purge runs");
-SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_hostcache.purgeall), 0,
"Expire all entires on next purge run");
@@ -156,6 +158,9 @@ SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
sysctl_tcp_hc_list, "A", "List of all hostcache entries");
+SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, purgenow,
+ CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+ sysctl_tcp_hc_purgenow, "I", "Immediately purge all entries");
static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache");
@@ -235,7 +240,7 @@ tcp_hc_init(void)
/*
* Set up periodic cache cleanup.
*/
- callout_init(&V_tcp_hc_callout, CALLOUT_MPSAFE);
+ callout_init(&V_tcp_hc_callout, 1);
callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
tcp_hc_purge, curvnet);
}
@@ -297,6 +302,7 @@ tcp_hc_lookup(struct in_conninfo *inc)
*/
TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) {
if (inc->inc_flags & INC_ISIPV6) {
+ /* XXX: check ip6_zoneid */
if (memcmp(&inc->inc6_faddr, &hc_entry->ip6,
sizeof(inc->inc6_faddr)) == 0)
return hc_entry;
@@ -388,9 +394,10 @@ tcp_hc_insert(struct in_conninfo *inc)
* Initialize basic information of hostcache entry.
*/
bzero(hc_entry, sizeof(*hc_entry));
- if (inc->inc_flags & INC_ISIPV6)
- bcopy(&inc->inc6_faddr, &hc_entry->ip6, sizeof(hc_entry->ip6));
- else
+ if (inc->inc_flags & INC_ISIPV6) {
+ hc_entry->ip6 = inc->inc6_faddr;
+ hc_entry->ip6_zoneid = inc->inc6_zoneid;
+ } else
hc_entry->ip4 = inc->inc_faddr;
hc_entry->rmx_head = hc_head;
hc_entry->rmx_expire = V_tcp_hostcache.expire;
@@ -435,7 +442,6 @@ tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh;
hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt;
hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar;
- hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth;
hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd;
hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe;
hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe;
@@ -550,14 +556,6 @@ tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
(hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2;
TCPSTAT_INC(tcps_cachedssthresh);
}
- if (hcml->rmx_bandwidth != 0) {
- if (hc_entry->rmx_bandwidth == 0)
- hc_entry->rmx_bandwidth = hcml->rmx_bandwidth;
- else
- hc_entry->rmx_bandwidth =
- (hc_entry->rmx_bandwidth + hcml->rmx_bandwidth) / 2;
- /* TCPSTAT_INC(tcps_cachedbandwidth); */
- }
if (hcml->rmx_cwnd != 0) {
if (hc_entry->rmx_cwnd == 0)
hc_entry->rmx_cwnd = hcml->rmx_cwnd;
@@ -595,7 +593,7 @@ tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
static int
sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
{
- int linesize = 128;
+ const int linesize = 128;
struct sbuf sb;
int i, error;
struct hc_metrics *hc_entry;
@@ -604,10 +602,10 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
#endif
sbuf_new(&sb, NULL, linesize * (V_tcp_hostcache.cache_count + 1),
- SBUF_FIXEDLEN);
+ SBUF_INCLUDENUL);
sbuf_printf(&sb,
- "\nIP address MTU SSTRESH RTT RTTVAR BANDWIDTH "
+ "\nIP address MTU SSTRESH RTT RTTVAR "
" CWND SENDPIPE RECVPIPE HITS UPD EXP\n");
#define msec(u) (((u) + 500) / 1000)
@@ -616,8 +614,8 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket,
rmx_q) {
sbuf_printf(&sb,
- "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
- "%4lu %4lu %4i\n",
+ "%-15s %5lu %8lu %6lums %6lums %8lu %8lu %8lu %4lu "
+ "%4lu %4i\n",
hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
#ifdef INET6
ip6_sprintf(ip6buf, &hc_entry->ip6),
@@ -630,7 +628,6 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
msec(hc_entry->rmx_rttvar *
(RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE))),
- hc_entry->rmx_bandwidth * 8,
hc_entry->rmx_cwnd,
hc_entry->rmx_sendpipe,
hc_entry->rmx_recvpipe,
@@ -641,8 +638,9 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
}
#undef msec
- sbuf_finish(&sb);
- error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
+ error = sbuf_finish(&sb);
+ if (error == 0)
+ error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
sbuf_delete(&sb);
return(error);
}
@@ -694,3 +692,24 @@ tcp_hc_purge(void *arg)
tcp_hc_purge, arg);
CURVNET_RESTORE();
}
+
+/*
+ * Expire and purge all entries in hostcache immediately.
+ */
+static int
+sysctl_tcp_hc_purgenow(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = 0;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ tcp_hc_purge_internal(1);
+
+ callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
+ tcp_hc_purge, curvnet);
+
+ return (0);
+}
diff --git a/freebsd/sys/netinet/tcp_hostcache.h b/freebsd/sys/netinet/tcp_hostcache.h
index 8569edcc..44875ff6 100644
--- a/freebsd/sys/netinet/tcp_hostcache.h
+++ b/freebsd/sys/netinet/tcp_hostcache.h
@@ -51,12 +51,12 @@ struct hc_metrics {
struct hc_head *rmx_head; /* head of bucket tail queue */
struct in_addr ip4; /* IP address */
struct in6_addr ip6; /* IP6 address */
+ uint32_t ip6_zoneid; /* IPv6 scope zone id */
/* endpoint specific values for tcp */
u_long rmx_mtu; /* MTU for this path */
u_long rmx_ssthresh; /* outbound gateway buffer limit */
u_long rmx_rtt; /* estimated round trip time */
u_long rmx_rttvar; /* estimated rtt variance */
- u_long rmx_bandwidth; /* estimated bandwidth */
u_long rmx_cwnd; /* congestion window */
u_long rmx_sendpipe; /* outbound delay-bandwidth product */
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index f9512eb3..eaa3eb3d 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -52,7 +52,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_ipfw.h> /* for ipfw_fwd */
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
@@ -65,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -77,16 +77,16 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
#define TCPSTATES /* for logging */
-#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h> /* required for icmp_var.h */
#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
@@ -95,14 +95,23 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet6/in6_pcb.h>
+#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
+#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet6/tcp6_var.h>
#include <netinet/tcpip.h>
+#include <netinet/cc/cc.h>
+#ifdef TCPPCAP
+#include <netinet/tcp_pcap.h>
+#endif
#include <netinet/tcp_syncache.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
@@ -122,11 +131,6 @@ __FBSDID("$FreeBSD$");
const int tcprexmtthresh = 3;
-VNET_DEFINE(struct tcpstat, tcpstat);
-SYSCTL_VNET_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(tcpstat), tcpstat,
- "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
-
int tcp_log_in_vain = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
&tcp_log_in_vain, 0,
@@ -134,88 +138,96 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
VNET_DEFINE(int, blackhole) = 0;
#define V_blackhole VNET(blackhole)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(blackhole), 0,
"Do not send RST on segments to closed ports");
VNET_DEFINE(int, tcp_delack_enabled) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_delack_enabled), 0,
"Delay ACK to try and piggyback it onto a data packet");
VNET_DEFINE(int, drop_synfin) = 0;
#define V_drop_synfin VNET(drop_synfin)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(drop_synfin), 0,
"Drop TCP packets with SYN+FIN set");
+VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_do_rfc6675_pipe), 0,
+ "Use calculated pipe/in-flight bytes per RFC 6675");
+
VNET_DEFINE(int, tcp_do_rfc3042) = 1;
#define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc3042), 0,
"Enable RFC 3042 (Limited Transmit)");
VNET_DEFINE(int, tcp_do_rfc3390) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc3390), 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, experimental, CTLFLAG_RW, 0,
- "Experimental TCP extensions");
-
-VNET_DEFINE(int, tcp_do_initcwnd10) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp_experimental, OID_AUTO, initcwnd10, CTLFLAG_RW,
- &VNET_NAME(tcp_do_initcwnd10), 0,
- "Enable RFC 6928 (Increasing initial CWND to 10)");
+VNET_DEFINE(int, tcp_initcwnd_segments) = 10;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, initcwnd_segments,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_initcwnd_segments), 0,
+ "Slow-start flight size (initial congestion window) in number of segments");
VNET_DEFINE(int, tcp_do_rfc3465) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc3465), 0,
"Enable RFC 3465 (Appropriate Byte Counting)");
VNET_DEFINE(int, tcp_abc_l_var) = 2;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_abc_l_var), 2,
"Cap the max cwnd increment during slow-start to this number of segments");
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
-VNET_DEFINE(int, tcp_do_ecn) = 0;
-SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_RW,
+VNET_DEFINE(int, tcp_do_ecn) = 2;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_ecn), 0,
"TCP ECN support");
VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_ecn_maxretries), 0,
"Max retries before giving up on ECN");
+VNET_DEFINE(int, tcp_insecure_syn) = 0;
+#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_insecure_syn), 0,
+ "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets");
+
VNET_DEFINE(int, tcp_insecure_rst) = 0;
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_insecure_rst), 0,
- "Follow the old (insecure) criteria for accepting RST packets");
+ "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
VNET_DEFINE(int, tcp_recvspace) = 1024*64;
#define V_tcp_recvspace VNET(tcp_recvspace)
-SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size");
VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_autorcvbuf), 0,
"Enable automatic receive buffer sizing");
VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024;
#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_inc), 0,
"Incrementor step size of automatic receive buffer");
VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
"Max size of automatic receive buffer");
@@ -223,47 +235,55 @@ VNET_DEFINE(struct inpcbhead, tcb);
#define tcb6 tcb /* for KAME src sync over BSD*'s */
VNET_DEFINE(struct inpcbinfo, tcbinfo);
-static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
-static void tcp_do_segment(struct mbuf *, struct tcphdr *,
- struct socket *, struct tcpcb *, int, int, uint8_t,
- int);
-static void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
- struct tcpcb *, int, int);
-static void tcp_pulloutofband(struct socket *,
- struct tcphdr *, struct mbuf *, int);
-static void tcp_xmit_timer(struct tcpcb *, int);
-static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static void inline tcp_fields_to_host(struct tcphdr *);
-#ifdef TCP_SIGNATURE
-static void inline tcp_fields_to_net(struct tcphdr *);
-static int inline tcp_signature_verify_input(struct mbuf *, int, int,
- int, struct tcpopt *, struct tcphdr *, u_int);
-#endif
-static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
- uint16_t type);
-static void inline cc_conn_init(struct tcpcb *tp);
-static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
-static void inline hhook_run_tcp_est_in(struct tcpcb *tp,
- struct tcphdr *th, struct tcpopt *to);
+/*
+ * TCP statistics are stored in an array of counter(9)s, which size matches
+ * size of struct tcpstat. TCP running connection count is a regular array.
+ */
+VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat,
+ tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
+VNET_DEFINE(counter_u64_t, tcps_states[TCP_NSTATES]);
+SYSCTL_COUNTER_U64_ARRAY(_net_inet_tcp, TCPCTL_STATES, states, CTLFLAG_RD |
+ CTLFLAG_VNET, &VNET_NAME(tcps_states)[0], TCP_NSTATES,
+ "TCP connection counts by TCP state");
+
+static void
+tcp_vnet_init(const void *unused)
+{
+
+ COUNTER_ARRAY_ALLOC(V_tcps_states, TCP_NSTATES, M_WAITOK);
+ VNET_PCPUSTAT_ALLOC(tcpstat, M_WAITOK);
+}
+VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ tcp_vnet_init, NULL);
+
+#ifdef VIMAGE
+static void
+tcp_vnet_uninit(const void *unused)
+{
+
+ COUNTER_ARRAY_FREE(V_tcps_states, TCP_NSTATES);
+ VNET_PCPUSTAT_FREE(tcpstat);
+}
+VNET_SYSUNINIT(tcp_vnet_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
+ tcp_vnet_uninit, NULL);
+#endif /* VIMAGE */
/*
* Kernel module interface for updating tcpstat. The argument is an index
- * into tcpstat treated as an array of u_long. While this encodes the
- * general layout of tcpstat into the caller, it doesn't encode its location,
- * so that future changes to add, for example, per-CPU stats support won't
- * cause binary compatibility problems for kernel modules.
+ * into tcpstat treated as an array.
*/
void
kmod_tcpstat_inc(int statnum)
{
- (*((u_long *)&V_tcpstat + statnum))++;
+ counter_u64_add(VNET(tcpstat)[statnum], 1);
}
/*
* Wrapper for the TCP established input helper hook.
*/
-static void inline
+void
hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
{
struct tcp_hhook_data hhook_data;
@@ -281,7 +301,7 @@ hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
/*
* CC wrapper hook functions
*/
-static void inline
+void
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -295,7 +315,7 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
if (type == CC_ACK) {
if (tp->snd_cwnd > tp->snd_ssthresh) {
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
- V_tcp_abc_l_var * tp->t_maxseg);
+ V_tcp_abc_l_var * tcp_maxseg(tp));
if (tp->t_bytes_acked >= tp->snd_cwnd) {
tp->t_bytes_acked -= tp->snd_cwnd;
tp->ccv->flags |= CCF_ABC_SENTAWND;
@@ -313,16 +333,18 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
}
}
-static void inline
+void
cc_conn_init(struct tcpcb *tp)
{
struct hc_metrics_lite metrics;
struct inpcb *inp = tp->t_inpcb;
+ u_int maxseg;
int rtt;
INP_WLOCK_ASSERT(tp->t_inpcb);
tcp_hc_get(&inp->inp_inc, &metrics);
+ maxseg = tcp_maxseg(tp);
if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
tp->t_srtt = rtt;
@@ -344,10 +366,10 @@ cc_conn_init(struct tcpcb *tp)
/*
* There's some sort of gateway or interface
* buffer limit on the path. Use this to set
- * the slow start threshhold, but set the
+ * the slow start threshold, but set the
* threshold to no less than 2*mss.
*/
- tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh);
+ tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh);
TCPSTAT_INC(tcps_usedssthresh);
}
@@ -357,27 +379,27 @@ cc_conn_init(struct tcpcb *tp)
* RFC5681 Section 3.1 specifies the default conservative values.
* RFC3390 specifies slightly more aggressive values.
* RFC6928 increases it to ten segments.
+ * Support for user specified value for initial flight size.
*
* If a SYN or SYN/ACK was lost and retransmitted, we have to
* reduce the initial CWND to one segment as congestion is likely
* requiring us to be cautious.
*/
if (tp->snd_cwnd == 1)
- tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */
- else if (V_tcp_do_initcwnd10)
- tp->snd_cwnd = min(10 * tp->t_maxseg,
- max(2 * tp->t_maxseg, 14600));
+ tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */
+ else if (V_tcp_initcwnd_segments)
+ tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg,
+ max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
else if (V_tcp_do_rfc3390)
- tp->snd_cwnd = min(4 * tp->t_maxseg,
- max(2 * tp->t_maxseg, 4380));
+ tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380));
else {
/* Per RFC5681 Section 3.1 */
- if (tp->t_maxseg > 2190)
- tp->snd_cwnd = 2 * tp->t_maxseg;
- else if (tp->t_maxseg > 1095)
- tp->snd_cwnd = 3 * tp->t_maxseg;
+ if (maxseg > 2190)
+ tp->snd_cwnd = 2 * maxseg;
+ else if (maxseg > 1095)
+ tp->snd_cwnd = 3 * maxseg;
else
- tp->snd_cwnd = 4 * tp->t_maxseg;
+ tp->snd_cwnd = 4 * maxseg;
}
if (CC_ALGO(tp)->conn_init != NULL)
@@ -387,6 +409,8 @@ cc_conn_init(struct tcpcb *tp)
void inline
cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
{
+ u_int maxseg;
+
INP_WLOCK_ASSERT(tp->t_inpcb);
switch(type) {
@@ -406,12 +430,13 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
}
break;
case CC_RTO:
+ maxseg = tcp_maxseg(tp);
tp->t_dupacks = 0;
tp->t_bytes_acked = 0;
EXIT_RECOVERY(tp->t_flags);
tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 /
- tp->t_maxseg) * tp->t_maxseg;
- tp->snd_cwnd = tp->t_maxseg;
+ maxseg) * maxseg;
+ tp->snd_cwnd = maxseg;
break;
case CC_RTO_ERR:
TCPSTAT_INC(tcps_sndrexmitbad);
@@ -436,7 +461,7 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
}
}
-static void inline
+void inline
cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -451,27 +476,7 @@ cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
tp->t_bytes_acked = 0;
}
-static inline void
-tcp_fields_to_host(struct tcphdr *th)
-{
-
- th->th_seq = ntohl(th->th_seq);
- th->th_ack = ntohl(th->th_ack);
- th->th_win = ntohs(th->th_win);
- th->th_urp = ntohs(th->th_urp);
-}
-
#ifdef TCP_SIGNATURE
-static inline void
-tcp_fields_to_net(struct tcphdr *th)
-{
-
- th->th_seq = htonl(th->th_seq);
- th->th_ack = htonl(th->th_ack);
- th->th_win = htons(th->th_win);
- th->th_urp = htons(th->th_urp);
-}
-
static inline int
tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen,
struct tcpopt *to, struct tcphdr *th, u_int tcpbflag)
@@ -485,34 +490,56 @@ tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen,
}
#endif
-/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
-#ifdef INET6
-#define ND6_HINT(tp) \
-do { \
- if ((tp) && (tp)->t_inpcb && \
- ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \
- nd6_nud_hint(NULL, NULL, 0); \
-} while (0)
-#else
-#define ND6_HINT(tp)
-#endif
-
/*
* Indicate whether this ack should be delayed. We can delay the ack if
- * - there is no delayed ack timer in progress and
- * - our last ack wasn't a 0-sized window. We never want to delay
- * the ack that opens up a 0-sized window and
- * - delayed acks are enabled or
- * - this is a half-synchronized T/TCP connection.
- * - the segment size is not larger than the MSS and LRO wasn't used
- * for this segment.
+ * following conditions are met:
+ * - There is no delayed ack timer in progress.
+ * - Our last ack wasn't a 0-sized window. We never want to delay
+ * the ack that opens up a 0-sized window.
+ * - LRO wasn't used for this segment. We make sure by checking that the
+ * segment size is not larger than the MSS.
*/
#define DELAY_ACK(tp, tlen) \
((!tcp_timer_active(tp, TT_DELACK) && \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
- (tlen <= tp->t_maxopd) && \
+ (tlen <= tp->t_maxseg) && \
(V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+static void inline
+cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
+{
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ if (CC_ALGO(tp)->ecnpkt_handler != NULL) {
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ tp->ccv->flags |= CCF_IPHDR_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ tp->ccv->flags &= ~CCF_IPHDR_CE;
+ break;
+ case IPTOS_ECN_ECT1:
+ tp->ccv->flags &= ~CCF_IPHDR_CE;
+ break;
+ }
+
+ if (th->th_flags & TH_CWR)
+ tp->ccv->flags |= CCF_TCPHDR_CWR;
+ else
+ tp->ccv->flags &= ~CCF_TCPHDR_CWR;
+
+ if (tp->t_flags & TF_DELACK)
+ tp->ccv->flags |= CCF_DELACK;
+ else
+ tp->ccv->flags &= ~CCF_DELACK;
+
+ CC_ALGO(tp)->ecnpkt_handler(tp->ccv);
+
+ if (tp->ccv->flags & CCF_ACKNOW)
+ tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
+ }
+}
+
/*
* TCP input handling is split into multiple parts:
* tcp6_input is a thin wrapper around tcp_input for the extended
@@ -528,6 +555,7 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
struct in6_ifaddr *ia6;
+ struct ip6_hdr *ip6;
IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
@@ -535,7 +563,8 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
* draft-itojun-ipv6-tcp-to-anycast
* better place to put this in?
*/
- ia6 = ip6_getdstifaddr(m);
+ ip6 = mtod(m, struct ip6_hdr *);
+ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
struct ip6_hdr *ip6;
@@ -543,28 +572,26 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
ip6 = mtod(m, struct ip6_hdr *);
icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
(caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
if (ia6)
ifa_free(&ia6->ia_ifa);
- tcp_input(m, *offp);
- return IPPROTO_DONE;
+ return (tcp_input(mp, offp, proto));
}
#endif /* INET6 */
-void
-tcp_input(struct mbuf *m, int off0)
+int
+tcp_input(struct mbuf **mp, int *offp, int proto)
{
+ struct mbuf *m = *mp;
struct tcphdr *th = NULL;
struct ip *ip = NULL;
-#ifdef INET
- struct ipovly *ipov;
-#endif
struct inpcb *inp = NULL;
struct tcpcb *tp = NULL;
struct socket *so = NULL;
u_char *optp = NULL;
+ int off0;
int optlen = 0;
#ifdef INET
int len;
@@ -587,9 +614,6 @@ tcp_input(struct mbuf *m, int off0)
struct tcpopt to; /* options in this segment */
char *s = NULL; /* address and port logging */
int ti_locked;
-#define TI_UNLOCKED 1
-#define TI_WLOCKED 2
-
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -604,6 +628,9 @@ tcp_input(struct mbuf *m, int off0)
isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
#endif
+ off0 = *offp;
+ m = *mp;
+ *mp = NULL;
to.to_flags = 0;
TCPSTAT_INC(tcps_rcvtotal);
@@ -615,7 +642,7 @@ tcp_input(struct mbuf *m, int off0)
m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
if (m == NULL) {
TCPSTAT_INC(tcps_rcvshort);
- return;
+ return (IPPROTO_DONE);
}
}
@@ -660,45 +687,43 @@ tcp_input(struct mbuf *m, int off0)
* Note: IP leaves IP header in first mbuf.
*/
if (off0 > sizeof (struct ip)) {
- ip_stripoptions(m, (struct mbuf *)0);
+ ip_stripoptions(m);
off0 = sizeof(struct ip);
}
if (m->m_len < sizeof (struct tcpiphdr)) {
if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
== NULL) {
TCPSTAT_INC(tcps_rcvshort);
- return;
+ return (IPPROTO_DONE);
}
}
ip = mtod(m, struct ip *);
- ipov = (struct ipovly *)ip;
th = (struct tcphdr *)((caddr_t)ip + off0);
- tlen = ip->ip_len;
+ tlen = ntohs(ip->ip_len) - off0;
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
th->th_sum = m->m_pkthdr.csum_data;
else
th->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr,
- htonl(m->m_pkthdr.csum_data +
- ip->ip_len +
- IPPROTO_TCP));
+ ip->ip_dst.s_addr,
+ htonl(m->m_pkthdr.csum_data + tlen +
+ IPPROTO_TCP));
th->th_sum ^= 0xffff;
-#ifdef TCPDEBUG
- ipov->ih_len = (u_short)tlen;
- ipov->ih_len = htons(ipov->ih_len);
-#endif
} else {
+ struct ipovly *ipov = (struct ipovly *)ip;
+
/*
* Checksum extended TCP header and data.
*/
- len = sizeof (struct ip) + tlen;
+ len = off0 + tlen;
bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
- ipov->ih_len = (u_short)tlen;
- ipov->ih_len = htons(ipov->ih_len);
+ ipov->ih_len = htons(tlen);
th->th_sum = in_cksum(m, len);
+ /* Reset length for SDT probes. */
+ ip->ip_len = htons(tlen + off0);
}
+
if (th->th_sum) {
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
@@ -732,7 +757,7 @@ tcp_input(struct mbuf *m, int off0)
if (off > sizeof (struct tcphdr)) {
#ifdef INET6
if (isipv6) {
- IP6_EXTHDR_CHECK(m, off0, off, );
+ IP6_EXTHDR_CHECK(m, off0, off, IPPROTO_DONE);
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)((caddr_t)ip6 + off0);
}
@@ -746,10 +771,9 @@ tcp_input(struct mbuf *m, int off0)
if ((m = m_pullup(m, sizeof (struct ip) + off))
== NULL) {
TCPSTAT_INC(tcps_rcvshort);
- return;
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
- ipov = (struct ipovly *)ip;
th = (struct tcphdr *)((caddr_t)ip + off0);
}
}
@@ -771,26 +795,17 @@ tcp_input(struct mbuf *m, int off0)
/*
* Locate pcb for segment; if we're likely to add or remove a
- * connection then first acquire pcbinfo lock. There are two cases
+ * connection then first acquire pcbinfo lock. There are three cases
* where we might discover later we need a write lock despite the
- * flags: ACKs moving a connection out of the syncache, and ACKs for
- * a connection in TIMEWAIT.
+ * flags: ACKs moving a connection out of the syncache, ACKs for a
+ * connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ if ((thflags & (TH_FIN | TH_RST)) != 0) {
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
} else
ti_locked = TI_UNLOCKED;
-findpcb:
-#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- } else {
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
-
/*
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
*/
@@ -807,6 +822,14 @@ findpcb:
)
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+findpcb:
+#ifdef INVARIANTS
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ } else {
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
#ifdef INET6
if (isipv6 && fwd_tag != NULL) {
struct sockaddr_in6 *next_hop6;
@@ -831,10 +854,6 @@ findpcb:
th->th_dport, INPLOOKUP_WILDCARD |
INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
}
- /* Remove the tag from the packet. We don't need it anymore. */
- m_tag_delete(m, fwd_tag);
- m->m_flags &= ~M_IP6_NEXTHOP;
- fwd_tag = NULL;
} else if (isipv6) {
inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
th->th_sport, &ip6->ip6_dst, th->th_dport,
@@ -869,10 +888,6 @@ findpcb:
th->th_dport, INPLOOKUP_WILDCARD |
INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
}
- /* Remove the tag from the packet. We don't need it anymore. */
- m_tag_delete(m, fwd_tag);
- m->m_flags &= ~M_IP_NEXTHOP;
- fwd_tag = NULL;
} else
inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
th->th_sport, ip->ip_dst, th->th_dport,
@@ -908,23 +923,20 @@ findpcb:
goto dropwithreset;
}
INP_WLOCK_ASSERT(inp);
- if (!(inp->inp_flags & INP_HW_FLOWID)
- && (m->m_flags & M_FLOWID)
- && ((inp->inp_socket == NULL)
- || !(inp->inp_socket->so_options & SO_ACCEPTCONN))) {
- inp->inp_flags |= INP_HW_FLOWID;
- inp->inp_flags &= ~INP_SW_FLOWID;
+ if ((inp->inp_flowtype == M_HASHTYPE_NONE) &&
+ (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) &&
+ ((inp->inp_socket == NULL) ||
+ (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
inp->inp_flowid = m->m_pkthdr.flowid;
+ inp->inp_flowtype = M_HASHTYPE_GET(m);
}
#ifdef IPSEC
#ifdef INET6
if (isipv6 && ipsec6_in_reject(m, inp)) {
- IPSEC6STAT_INC(in_polvio);
goto dropunlock;
} else
#endif /* INET6 */
if (ipsec4_in_reject(m, inp) != 0) {
- IPSECSTAT_INC(in_polvio);
goto dropunlock;
}
#endif /* IPSEC */
@@ -934,9 +946,10 @@ findpcb:
*/
if (inp->inp_ip_minttl != 0) {
#ifdef INET6
- if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim)
- goto dropunlock;
- else
+ if (isipv6) {
+ if (inp->inp_ip_minttl > ip6->ip6_hlim)
+ goto dropunlock;
+ } else
#endif
if (inp->inp_ip_minttl > ip->ip_ttl)
goto dropunlock;
@@ -945,7 +958,7 @@ findpcb:
/*
* A previous connection in TIMEWAIT state is supposed to catch stray
* or duplicate segments arriving late. If this segment was a
- * legitimate new connection attempt the old INPCB gets removed and
+ * legitimate new connection attempt, the old INPCB gets removed and
* we can try again to find a listening socket.
*
* At this point, due to earlier optimism, we may hold only an inpcb
@@ -961,20 +974,20 @@ findpcb:
relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if (thflags & TH_SYN)
tcp_dooptions(&to, optp, optlen, TO_SYN);
@@ -983,8 +996,8 @@ relocked:
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- return;
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ return (IPPROTO_DONE);
}
/*
* The TCPCB may no longer exist if the connection is winding
@@ -1013,16 +1026,18 @@ relocked:
* now be in TIMEWAIT.
*/
#ifdef INVARIANTS
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if ((thflags & (TH_FIN | TH_RST)) != 0)
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
#endif
- if (tp->t_state != TCPS_ESTABLISHED) {
+ if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
+ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+ !(tp->t_flags & TF_FASTOPEN)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
@@ -1030,9 +1045,9 @@ relocked:
}
goto relocked;
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
#ifdef MAC
@@ -1057,17 +1072,13 @@ relocked:
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
- * attempt or the completion of a previous one. Because listen
- * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
- * held in this case.
+ * attempt or the completion of a previous one.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
bzero(&inc, sizeof(inc));
#ifdef INET6
if (isipv6) {
@@ -1090,6 +1101,8 @@ relocked:
* socket appended to the listen queue in SYN_RECEIVED state.
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1110,6 +1123,9 @@ relocked:
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
if (so == NULL) {
/*
* We completed the 3-way handshake
@@ -1141,7 +1157,11 @@ relocked:
*/
INP_WUNLOCK(inp); /* listen socket */
inp = sotoinpcb(so);
- INP_WLOCK(inp); /* new connection */
+ /*
+ * New connection inpcb is already locked by
+ * syncache_expand().
+ */
+ INP_WLOCK_ASSERT(inp);
tp = intotcpcb(inp);
KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
("%s: ", __func__));
@@ -1170,10 +1190,10 @@ relocked:
* contains. tcp_do_segment() consumes
* the mbuf chain and unlocks the inpcb.
*/
- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
iptos, ti_locked);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- return;
+ return (IPPROTO_DONE);
}
/*
* Segment flag validation for new connection attempts:
@@ -1277,7 +1297,7 @@ relocked:
if (isipv6 && !V_ip6_use_deprecated) {
struct in6_ifaddr *ia6;
- ia6 = ip6_getdstifaddr(m);
+ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
if (ia6 != NULL &&
(ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
ifa_free(&ia6->ia_ifa);
@@ -1366,14 +1386,24 @@ relocked:
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
tcp_dooptions(&to, optp, optlen, TO_SYN);
- syncache_add(&inc, &to, th, inp, &so, m);
+#ifdef TCP_RFC7413
+ if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ goto new_tfo_socket;
+#else
+ syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
/*
* Entry added to syncache and mbuf consumed.
- * Everything already unlocked by syncache_add().
+ * Only the listen socket is unlocked by syncache_add().
*/
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ ti_locked = TI_UNLOCKED;
+ }
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- return;
+ return (IPPROTO_DONE);
} else if (tp->t_state == TCPS_LISTEN) {
/*
* When a listen socket is torn down the SO_ACCEPTCONN
@@ -1404,18 +1434,22 @@ relocked:
}
#endif
+ TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
/*
* Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
* state. tcp_do_segment() always consumes the mbuf chain, unlocks
* the inpcb, and unlocks pcbinfo.
*/
- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- return;
+ return (IPPROTO_DONE);
dropwithreset:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1435,8 +1469,11 @@ dropwithreset:
goto drop;
dropunlock:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (m != NULL)
+ TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1456,18 +1493,23 @@ drop:
free(s, M_TCPLOG);
if (m != NULL)
m_freem(m);
+ return (IPPROTO_DONE);
}
-static void
+void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
int ti_locked)
{
- int thflags, acked, ourfinisacked, needoutput = 0;
+ int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win;
u_long tiwin;
+ char *s;
+ struct in_conninfo *inc;
+ struct mbuf *mfree;
struct tcpopt to;
-
+ int tfo_syn;
+
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -1478,30 +1520,25 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
short ostate = 0;
#endif
thflags = th->th_flags;
+ inc = &tp->t_inpcb->inp_inc;
tp->sackhint.last_sack_ack = 0;
+ sack_changed = 0;
/*
* If this is either a state-changing packet or current state isn't
* established, we require a write lock on tcbinfo. Otherwise, we
- * allow either a read lock or a write lock, as we may have acquired
- * a write lock due to a race.
- *
- * Require a global write lock for SYN/FIN/RST segments or
- * non-established connections; otherwise accept either a read or
- * write lock, as we may have conservatively acquired a write lock in
- * certain cases in tcp_input() (is this still true?). Currently we
- * will never enter with no lock, so we try to drop it quickly in the
- * common pure ack/pure data cases.
+ * allow the tcbinfo to be in either alocked or unlocked, as the
+ * caller may have unnecessarily acquired a write lock due to a race.
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) {
- KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
"SYN/FIN/RST/!EST", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
"ti_locked: %d", __func__, ti_locked));
@@ -1515,6 +1552,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
__func__));
+#ifdef TCPPCAP
+ /* Save segment, if requested. */
+ tcp_pcap_add(th, m, &(tp->t_inpkts));
+#endif
+
/*
* Segment received on connection.
* Reset idle time and keep-alive timer.
@@ -1526,7 +1568,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
/*
- * Unscale the window into a 32-bit value.
+ * Scale up the window into a 32-bit value.
* For the SYN_SENT state the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
@@ -1549,6 +1591,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
TCPSTAT_INC(tcps_ecn_ect1);
break;
}
+
+ /* Process a packet differently from RFC3168. */
+ cc_ecnpkt_handler(tp, th, iptos);
+
/* Congestion experienced. */
if (thflags & TH_ECE) {
cc_cong_signal(tp, th, CC_ECN);
@@ -1573,6 +1619,24 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
to.to_tsecr = 0;
}
+ /*
+ * If timestamps were negotiated during SYN/ACK they should
+ * appear on every segment during this session and vice versa.
+ */
+ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+ "no action\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ }
+ if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
+ "no action\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ }
/*
* Process options only when we get SYN/ACK back. The SYN case
@@ -1652,8 +1716,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack);
@@ -1720,7 +1784,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_wl2 = th->th_ack;
tp->t_dupacks = 0;
m_freem(m);
- ND6_HINT(tp); /* Some progress has been made. */
/*
* If all outstanding data are acked, stop
@@ -1737,14 +1800,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
(void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
+ TCP_PROBE3(debug__input, tp, th,
+ mtod(m, const char *));
if (tp->snd_una == tp->snd_max)
tcp_timer_activate(tp, TT_REXMT, 0);
else if (!tcp_timer_active(tp, TT_PERSIST))
tcp_timer_activate(tp, TT_REXMT,
tp->t_rxtcur);
sowwakeup(so);
- if (so->so_snd.sb_cc)
- (void) tcp_output(tp);
+ if (sbavail(&so->so_snd))
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto check_delack;
}
} else if (th->th_ack == tp->snd_una &&
@@ -1756,8 +1821,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */
@@ -1777,12 +1842,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->rcv_up = tp->rcv_nxt;
TCPSTAT_INC(tcps_rcvpack);
TCPSTAT_ADD(tcps_rcvbyte, tlen);
- ND6_HINT(tp); /* Some progress has been made */
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
+
/*
* Automatic sizing of receive socket buffer. Often the send
* buffer size is not optimally adjusted to the actual network
@@ -1802,11 +1868,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* reassembly queue.
*
* The criteria to step up the receive buffer one notch are:
- * 1. the number of bytes received during the time it takes
+ * 1. Application has not set receive buffer size with
+ * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
+ * 2. the number of bytes received during the time it takes
* one timestamp to be reflected back to us (the RTT);
- * 2. received bytes per RTT is within seven eighth of the
+ * 3. received bytes per RTT is within seven eighth of the
* current socket buffer size;
- * 3. receive buffer size has not hit maximal automatic size;
+ * 4. receive buffer size has not hit maximal automatic size;
*
* This algorithm does one step per RTT at most and only if
* we receive a bulk stream w/o packet losses or reorderings.
@@ -1817,6 +1885,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* the buffer to better manage the socket buffer resources.
*/
if (V_tcp_do_autorcvbuf &&
+ (to.to_flags & TOF_TS) &&
to.to_tsecr &&
(so->so_rcv.sb_flags & SB_AUTOSIZE)) {
if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
@@ -1851,7 +1920,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
newsize, so, NULL))
so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
m_adj(m, drop_hdrlen); /* delayed header drop */
- sbappendstream_locked(&so->so_rcv, m);
+ sbappendstream_locked(&so->so_rcv, m, 0);
}
/* NB: sorwakeup_locked() does an implicit unlock. */
sorwakeup_locked(so);
@@ -1859,7 +1928,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->t_flags |= TF_DELACK;
} else {
tp->t_flags |= TF_ACKNOW;
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
goto check_delack;
}
@@ -1893,6 +1962,28 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN) {
+ /*
+ * When a TFO connection is in SYN_RECEIVED, the
+ * only valid packets are the initial SYN, a
+ * retransmit/copy of the initial SYN (possibly with
+ * a subset of the original data), a valid ACK, a
+ * FIN, or a RST.
+ */
+ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+ rstreason = BANDLIM_RST_OPENPORT;
+ goto dropwithreset;
+ } else if (thflags & TH_SYN) {
+ /* non-initial SYN is ignored */
+ if ((tcp_timer_active(tp, TT_DELACK) ||
+ tcp_timer_active(tp, TT_REXMT)))
+ goto drop;
+ } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+ goto drop;
+ }
+ }
+#endif
break;
/*
@@ -1916,8 +2007,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
rstreason = BANDLIM_UNLIMITED;
goto dropwithreset;
}
- if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST))
+ if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
+ TCP_PROBE5(connect__refused, NULL, tp,
+ mtod(m, const char *), tp, th);
tp = tcp_drop(tp, ECONNREFUSED);
+ }
if (thflags & TH_RST)
goto drop;
if (!(thflags & TH_SYN))
@@ -1962,11 +2056,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
tp->t_starttime = ticks;
if (tp->t_flags & TF_NEEDFIN) {
- tp->t_state = TCPS_FIN_WAIT_1;
+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
thflags &= ~TH_SYN;
} else {
- tp->t_state = TCPS_ESTABLISHED;
+ tcp_state_change(tp, TCPS_ESTABLISHED);
+ TCP_PROBE5(connect__established, NULL, tp,
+ mtod(m, const char *), tp, th);
cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP,
TP_KEEPIDLE(tp));
@@ -1974,22 +2070,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
/*
* Received initial SYN in SYN-SENT[*] state =>
- * simultaneous open. If segment contains CC option
- * and there is a cached CC, apply TAO test.
+ * simultaneous open.
* If it succeeds, connection is * half-synchronized.
* Otherwise, do 3-way handshake:
* SYN-SENT -> SYN-RECEIVED
* SYN-SENT* -> SYN-RECEIVED*
- * If there was no CC option, clear cached CC value.
*/
tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
tcp_timer_activate(tp, TT_REXMT, 0);
- tp->t_state = TCPS_SYN_RECEIVED;
+ tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
"ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2045,98 +2139,84 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* Then check that at least some bytes of segment are within
* receive window. If segment begins before rcv_nxt,
* drop leading data (and SYN); if nothing left, just ack.
- *
- *
- * If the RST bit is set, check the sequence number to see
- * if this is a valid reset segment.
- * RFC 793 page 37:
- * In all states except SYN-SENT, all reset (RST) segments
- * are validated by checking their SEQ-fields. A reset is
- * valid if its sequence number is in the window.
- * Note: this does not take into account delayed ACKs, so
- * we should test against last_ack_sent instead of rcv_nxt.
- * The sequence number in the reset segment is normally an
- * echo of our outgoing acknowlegement numbers, but some hosts
- * send a reset with the sequence number at the rightmost edge
- * of our receive window, and we have to handle this case.
- * Note 2: Paul Watson's paper "Slipping in the Window" has shown
- * that brute force RST attacks are possible. To combat this,
- * we use a much stricter check while in the ESTABLISHED state,
- * only accepting RSTs where the sequence number is equal to
- * last_ack_sent. In all other states (the states in which a
- * RST is more likely), the more permissive check is used.
- * If we have multiple segments in flight, the initial reset
- * segment sequence numbers will be to the left of last_ack_sent,
- * but they will eventually catch up.
- * In any case, it never made sense to trim reset segments to
- * fit the receive window since RFC 1122 says:
- * 4.2.2.12 RST Segment: RFC-793 Section 3.4
- *
- * A TCP SHOULD allow a received RST segment to include data.
- *
- * DISCUSSION
- * It has been suggested that a RST segment could contain
- * ASCII text that encoded and explained the cause of the
- * RST. No standard has yet been established for such
- * data.
- *
- * If the reset segment passes the sequence number test examine
- * the state:
- * SYN_RECEIVED STATE:
- * If passive open, return to LISTEN state.
- * If active open, inform user that connection was refused.
- * ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT STATES:
- * Inform user that connection was reset, and close tcb.
- * CLOSING, LAST_ACK STATES:
- * Close the tcb.
- * TIME_WAIT STATE:
- * Drop the segment - see Stevens, vol. 2, p. 964 and
- * RFC 1337.
*/
if (thflags & TH_RST) {
- if (SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
- SEQ_LEQ(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
- switch (tp->t_state) {
-
- case TCPS_SYN_RECEIVED:
- so->so_error = ECONNREFUSED;
- goto close;
-
- case TCPS_ESTABLISHED:
- if (V_tcp_insecure_rst == 0 &&
- !(SEQ_GEQ(th->th_seq, tp->rcv_nxt - 1) &&
- SEQ_LEQ(th->th_seq, tp->rcv_nxt + 1)) &&
- !(SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
- SEQ_LEQ(th->th_seq, tp->last_ack_sent + 1))) {
- TCPSTAT_INC(tcps_badrst);
- goto drop;
- }
- /* FALLTHROUGH */
- case TCPS_FIN_WAIT_1:
- case TCPS_FIN_WAIT_2:
- case TCPS_CLOSE_WAIT:
- so->so_error = ECONNRESET;
- close:
- KASSERT(ti_locked == TI_WLOCKED,
- ("tcp_do_segment: TH_RST 1 ti_locked %d",
- ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
- tp->t_state = TCPS_CLOSED;
+ /*
+ * RFC5961 Section 3.2
+ *
+ * - RST drops connection only if SEG.SEQ == RCV.NXT.
+ * - If RST is in window, we send challenge ACK.
+ *
+ * Note: to take into account delayed ACKs, we should
+ * test against last_ack_sent instead of rcv_nxt.
+ * Note 2: we handle special case of closed window, not
+ * covered by the RFC.
+ */
+ if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
+ SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
+ (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
+
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ KASSERT(ti_locked == TI_RLOCKED,
+ ("%s: TH_RST ti_locked %d, th %p tp %p",
+ __func__, ti_locked, th, tp));
+ KASSERT(tp->t_state != TCPS_SYN_SENT,
+ ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
+ __func__, th, tp));
+
+ if (V_tcp_insecure_rst ||
+ tp->last_ack_sent == th->th_seq) {
TCPSTAT_INC(tcps_drops);
- tp = tcp_close(tp);
- break;
+ /* Drop the connection. */
+ switch (tp->t_state) {
+ case TCPS_SYN_RECEIVED:
+ so->so_error = ECONNREFUSED;
+ goto close;
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ so->so_error = ECONNRESET;
+ close:
+ tcp_state_change(tp, TCPS_CLOSED);
+ /* FALLTHROUGH */
+ default:
+ tp = tcp_close(tp);
+ }
+ } else {
+ TCPSTAT_INC(tcps_badrst);
+ /* Send challenge ACK. */
+ tcp_respond(tp, mtod(m, void *), th, m,
+ tp->rcv_nxt, tp->snd_nxt, TH_ACK);
+ tp->last_ack_sent = tp->rcv_nxt;
+ m = NULL;
+ }
+ }
+ goto drop;
+ }
- case TCPS_CLOSING:
- case TCPS_LAST_ACK:
- KASSERT(ti_locked == TI_WLOCKED,
- ("tcp_do_segment: TH_RST 2 ti_locked %d",
- ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ /*
+ * RFC5961 Section 4.2
+ * Send challenge ACK for any SYN in synchronized state.
+ */
+ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
+ tp->t_state != TCPS_SYN_RECEIVED) {
+ KASSERT(ti_locked == TI_RLOCKED,
+ ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- tp = tcp_close(tp);
- break;
- }
+ TCPSTAT_INC(tcps_badsyn);
+ if (V_tcp_insecure_syn &&
+ SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
+ SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+ tp = tcp_drop(tp, ECONNRESET);
+ rstreason = BANDLIM_UNLIMITED;
+ } else {
+ /* Send challenge ACK. */
+ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
+ tp->snd_nxt, TH_ACK);
+ tp->last_ack_sent = tp->rcv_nxt;
+ m = NULL;
}
goto drop;
}
@@ -2236,15 +2316,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- char *s;
-
- KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
"CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
- log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
- "was closed, sending RST and removing tcpcb\n",
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
+ "after socket was closed, "
+ "sending RST and removing tcpcb\n",
s, __func__, tcpstates[tp->t_state], tlen);
free(s, M_TCPLOG);
}
@@ -2309,29 +2388,22 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
/*
- * If a SYN is in the window, then this is an
- * error and we send an RST and drop the connection.
- */
- if (thflags & TH_SYN) {
- KASSERT(ti_locked == TI_WLOCKED,
- ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
- tp = tcp_drop(tp, ECONNRESET);
- rstreason = BANDLIM_UNLIMITED;
- goto drop;
- }
-
- /*
* If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN
* flag is on (half-synchronized state), then queue data for
* later processing; else drop segment and return.
*/
if ((thflags & TH_ACK) == 0) {
if (tp->t_state == TCPS_SYN_RECEIVED ||
- (tp->t_flags & TF_NEEDSYN))
+ (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+ if (tp->t_state == TCPS_SYN_RECEIVED &&
+ tp->t_flags & TF_FASTOPEN) {
+ tp->snd_wnd = tiwin;
+ cc_conn_init(tp);
+ }
+#endif
goto step6;
- else if (tp->t_flags & TF_ACKNOW)
+ } else if (tp->t_flags & TF_ACKNOW)
goto dropafterack;
else
goto drop;
@@ -2364,11 +2436,33 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
tp->t_starttime = ticks;
if (tp->t_flags & TF_NEEDFIN) {
- tp->t_state = TCPS_FIN_WAIT_1;
+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
} else {
- tp->t_state = TCPS_ESTABLISHED;
- cc_conn_init(tp);
+ tcp_state_change(tp, TCPS_ESTABLISHED);
+ TCP_PROBE5(accept__established, NULL, tp,
+ mtod(m, const char *), tp, th);
+#ifdef TCP_RFC7413
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+
+ /*
+ * Account for the ACK of our SYN prior to
+ * regular ACK processing below.
+ */
+ tp->snd_una++;
+ }
+ /*
+ * TFO connections call cc_conn_init() during SYN
+ * processing. Calling it again here for such
+ * connections is not harmless as it would undo the
+ * snd_cwnd reduction that occurs when a TFO SYN|ACK
+ * is retransmitted.
+ */
+ if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+ cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
@@ -2402,21 +2496,45 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((tp->t_flags & TF_SACK_PERMIT) &&
((to.to_flags & TOF_SACK) ||
!TAILQ_EMPTY(&tp->snd_holes)))
- tcp_sack_doack(tp, &to, th->th_ack);
+ sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
+ else
+ /*
+ * Reset the value so that previous (valid) value
+ * from the last ack with SACK doesn't get used.
+ */
+ tp->sackhint.sacked_bytes = 0;
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
hhook_run_tcp_est_in(tp, th, &to);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
- if (tlen == 0 && tiwin == tp->snd_wnd) {
+ u_int maxseg;
+
+ maxseg = tcp_maxseg(tp);
+ if (tlen == 0 &&
+ (tiwin == tp->snd_wnd ||
+ (tp->t_flags & TF_SACK_PERMIT))) {
+ /*
+ * If this is the first time we've seen a
+ * FIN from the remote, this is not a
+ * duplicate and it needs to be processed
+ * normally. This happens during a
+ * simultaneous close.
+ */
+ if ((thflags & TH_FIN) &&
+ (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
+ tp->t_dupacks = 0;
+ break;
+ }
TCPSTAT_INC(tcps_rcvdupack);
/*
* If we have outstanding data (other than
* a window probe), this is a completely
* duplicate ack (ie, window info didn't
- * change), the ack is the biggest we've
+ * change and FIN isn't set),
+ * the ack is the biggest we've
* seen and we've seen exactly our rexmt
- * threshhold of them, assume a packet
+ * threshold of them, assume a packet
* has been dropped and retransmit it.
* Kludge snd_nxt & the congestion
* window so we send only this one
@@ -2437,8 +2555,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* When using TCP ECN, notify the peer that
* we reduced the cwnd.
*/
- if (!tcp_timer_active(tp, TT_REXMT) ||
- th->th_ack != tp->snd_una)
+ /*
+ * Following 2 kinds of acks should not affect
+ * dupack counting:
+ * 1) Old acks
+ * 2) Acks with SACK but without any new SACK
+ * information in them. These could result from
+ * any anomaly in the network like a switch
+ * duplicating packets or a possible DoS attack.
+ */
+ if (th->th_ack != tp->snd_una ||
+ ((tp->t_flags & TF_SACK_PERMIT) &&
+ !sack_changed))
+ break;
+ else if (!tcp_timer_active(tp, TT_REXMT))
tp->t_dupacks = 0;
else if (++tp->t_dupacks > tcprexmtthresh ||
IN_FASTRECOVERY(tp->t_flags)) {
@@ -2453,26 +2583,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* we have less than 1/2 the original window's
* worth of data in flight.
*/
- awnd = (tp->snd_nxt - tp->snd_fack) +
- tp->sackhint.sack_bytes_rexmit;
+ if (V_tcp_do_rfc6675_pipe)
+ awnd = tcp_compute_pipe(tp);
+ else
+ awnd = (tp->snd_nxt - tp->snd_fack) +
+ tp->sackhint.sack_bytes_rexmit;
+
if (awnd < tp->snd_ssthresh) {
- tp->snd_cwnd += tp->t_maxseg;
+ tp->snd_cwnd += maxseg;
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
}
} else
- tp->snd_cwnd += tp->t_maxseg;
- if ((thflags & TH_FIN) &&
- (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
- /*
- * If its a fin we need to process
- * it to avoid a race where both
- * sides enter FIN-WAIT and send FIN|ACK
- * at the same time.
- */
- break;
- }
- (void) tcp_output(tp);
+ tp->snd_cwnd += maxseg;
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
} else if (tp->t_dupacks == tcprexmtthresh) {
tcp_seq onxt = tp->snd_nxt;
@@ -2505,33 +2629,33 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
TCPSTAT_INC(
tcps_sack_recovery_episode);
tp->sack_newdata = tp->snd_nxt;
- tp->snd_cwnd = tp->t_maxseg;
- (void) tcp_output(tp);
+ tp->snd_cwnd = maxseg;
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
}
tp->snd_nxt = th->th_ack;
- tp->snd_cwnd = tp->t_maxseg;
- if ((thflags & TH_FIN) &&
- (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
- /*
- * If its a fin we need to process
- * it to avoid a race where both
- * sides enter FIN-WAIT and send FIN|ACK
- * at the same time.
- */
- break;
- }
- (void) tcp_output(tp);
+ tp->snd_cwnd = maxseg;
+ (void) tp->t_fb->tfb_tcp_output(tp);
KASSERT(tp->snd_limited <= 2,
("%s: tp->snd_limited too big",
__func__));
tp->snd_cwnd = tp->snd_ssthresh +
- tp->t_maxseg *
+ maxseg *
(tp->t_dupacks - tp->snd_limited);
if (SEQ_GT(onxt, tp->snd_nxt))
tp->snd_nxt = onxt;
goto drop;
} else if (V_tcp_do_rfc3042) {
+ /*
+ * Process first and second duplicate
+ * ACKs. Each indicates a segment
+ * leaving the network, creating room
+ * for more. Make sure we can send a
+ * packet on reception of each duplicate
+ * ACK by increasing snd_cwnd by one
+ * segment. Restore the original
+ * snd_cwnd after packet transmission.
+ */
cc_ack_received(tp, th, CC_DUPACK);
u_long oldcwnd = tp->snd_cwnd;
tcp_seq oldsndmax = tp->snd_max;
@@ -2547,33 +2671,23 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_cwnd =
(tp->snd_nxt - tp->snd_una) +
(tp->t_dupacks - tp->snd_limited) *
- tp->t_maxseg;
- if ((thflags & TH_FIN) &&
- (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
- /*
- * If its a fin we need to process
- * it to avoid a race where both
- * sides enter FIN-WAIT and send FIN|ACK
- * at the same time.
- */
- break;
- }
+ maxseg;
/*
* Only call tcp_output when there
* is new data available to be sent.
* Otherwise we would send pure ACKs.
*/
SOCKBUF_LOCK(&so->so_snd);
- avail = so->so_snd.sb_cc -
+ avail = sbavail(&so->so_snd) -
(tp->snd_nxt - tp->snd_una);
SOCKBUF_UNLOCK(&so->so_snd);
if (avail > 0)
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
sent = tp->snd_max - oldsndmax;
- if (sent > tp->t_maxseg) {
+ if (sent > maxseg) {
KASSERT((tp->t_dupacks == 2 &&
tp->snd_limited == 0) ||
- (sent == tp->t_maxseg + 1 &&
+ (sent == maxseg + 1 &&
tp->t_flags & TF_SENTFIN),
("%s: sent too much",
__func__));
@@ -2583,9 +2697,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_cwnd = oldcwnd;
goto drop;
}
- } else
- tp->t_dupacks = 0;
+ }
break;
+ } else {
+ /*
+ * This ack is advancing the left edge, reset the
+ * counter.
+ */
+ tp->t_dupacks = 0;
+ /*
+ * If this ack also has new SACK info, increment the
+ * counter as per rfc6675.
+ */
+ if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
+ tp->t_dupacks++;
}
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
@@ -2604,7 +2729,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else
cc_post_recovery(tp, th);
}
- tp->t_dupacks = 0;
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
@@ -2631,6 +2755,9 @@ process_ACK:
INP_WLOCK_ASSERT(tp->t_inpcb);
acked = BYTES_THIS_ACK(tp, th);
+ KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
+ "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,
+ tp->snd_una, th->th_ack, tp, m));
TCPSTAT_INC(tcps_rcvackpack);
TCPSTAT_ADD(tcps_rcvackbyte, acked);
@@ -2699,17 +2826,25 @@ process_ACK:
cc_ack_received(tp, th, CC_ACK);
SOCKBUF_LOCK(&so->so_snd);
- if (acked > so->so_snd.sb_cc) {
- tp->snd_wnd -= so->so_snd.sb_cc;
- sbdrop_locked(&so->so_snd, (int)so->so_snd.sb_cc);
+ if (acked > sbavail(&so->so_snd)) {
+ if (tp->snd_wnd >= sbavail(&so->so_snd))
+ tp->snd_wnd -= sbavail(&so->so_snd);
+ else
+ tp->snd_wnd = 0;
+ mfree = sbcut_locked(&so->so_snd,
+ (int)sbavail(&so->so_snd));
ourfinisacked = 1;
} else {
- sbdrop_locked(&so->so_snd, acked);
- tp->snd_wnd -= acked;
+ mfree = sbcut_locked(&so->so_snd, acked);
+ if (tp->snd_wnd >= (u_long) acked)
+ tp->snd_wnd -= acked;
+ else
+ tp->snd_wnd = 0;
ourfinisacked = 0;
}
/* NB: sowwakeup_locked() does an implicit unlock. */
sowwakeup_locked(so);
+ m_freem(mfree);
/* Detect una wraparound. */
if (!IN_RECOVERY(tp->t_flags) &&
SEQ_GT(tp->snd_una, tp->snd_recover) &&
@@ -2755,7 +2890,7 @@ process_ACK:
tcp_finwait2_timeout :
TP_MAXIDLE(tp)));
}
- tp->t_state = TCPS_FIN_WAIT_2;
+ tcp_state_change(tp, TCPS_FIN_WAIT_2);
}
break;
@@ -2767,9 +2902,9 @@ process_ACK:
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return;
}
@@ -2783,7 +2918,7 @@ process_ACK:
*/
case TCPS_LAST_ACK:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
goto drop;
}
@@ -2826,7 +2961,7 @@ step6:
* actually wanting to send this much urgent data.
*/
SOCKBUF_LOCK(&so->so_rcv);
- if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
+ if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
th->th_urp = 0; /* XXX */
thflags &= ~TH_URG; /* XXX */
SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */
@@ -2848,7 +2983,7 @@ step6:
*/
if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
tp->rcv_up = th->th_seq + th->th_urp;
- so->so_oobmark = so->so_rcv.sb_cc +
+ so->so_oobmark = sbavail(&so->so_rcv) +
(tp->rcv_up - tp->rcv_nxt) - 1;
if (so->so_oobmark == 0)
so->so_rcv.sb_state |= SBS_RCVATMARK;
@@ -2887,7 +3022,9 @@ dodata: /* XXX */
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
*/
- if ((tlen || (thflags & TH_FIN)) &&
+ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags & TF_FASTOPEN));
+ if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
m_adj(m, drop_hdrlen); /* delayed header drop */
@@ -2905,8 +3042,9 @@ dodata: /* XXX */
*/
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
- TCPS_HAVEESTABLISHED(tp->t_state)) {
- if (DELAY_ACK(tp, tlen))
+ (TCPS_HAVEESTABLISHED(tp->t_state) ||
+ tfo_syn)) {
+ if (DELAY_ACK(tp, tlen) || tfo_syn)
tp->t_flags |= TF_DELACK;
else
tp->t_flags |= TF_ACKNOW;
@@ -2914,12 +3052,11 @@ dodata: /* XXX */
thflags = th->th_flags & TH_FIN;
TCPSTAT_INC(tcps_rcvpack);
TCPSTAT_ADD(tcps_rcvbyte, tlen);
- ND6_HINT(tp);
SOCKBUF_LOCK(&so->so_rcv);
if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
m_freem(m);
else
- sbappendstream_locked(&so->so_rcv, m);
+ sbappendstream_locked(&so->so_rcv, m, 0);
/* NB: sorwakeup_locked() does an implicit unlock. */
sorwakeup_locked(so);
} else {
@@ -2981,7 +3118,7 @@ dodata: /* XXX */
tp->t_starttime = ticks;
/* FALLTHROUGH */
case TCPS_ESTABLISHED:
- tp->t_state = TCPS_CLOSE_WAIT;
+ tcp_state_change(tp, TCPS_CLOSE_WAIT);
break;
/*
@@ -2989,7 +3126,7 @@ dodata: /* XXX */
* enter the CLOSING state.
*/
case TCPS_FIN_WAIT_1:
- tp->t_state = TCPS_CLOSING;
+ tcp_state_change(tp, TCPS_CLOSING);
break;
/*
@@ -2998,18 +3135,18 @@ dodata: /* XXX */
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata "
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
"TCP_FIN_WAIT_2 ti_locked: %d", __func__,
ti_locked));
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return;
}
}
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG
@@ -3017,12 +3154,13 @@ dodata: /* XXX */
tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
/*
* Return any desired output.
*/
if (needoutput || (tp->t_flags & TF_ACKNOW))
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
check_delack:
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
@@ -3064,19 +3202,20 @@ dropafterack:
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(tp->t_inpcb);
m_freem(m);
return;
dropwithreset:
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
if (tp != NULL) {
@@ -3087,8 +3226,8 @@ dropwithreset:
return;
drop:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -3104,6 +3243,7 @@ drop:
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
if (tp != NULL)
INP_WUNLOCK(tp->t_inpcb);
m_freem(m);
@@ -3114,7 +3254,7 @@ drop:
* The mbuf must still include the original packet header.
* tp may be NULL.
*/
-static void
+void
tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
int tlen, int rstreason)
{
@@ -3177,7 +3317,7 @@ drop:
/*
* Parse TCP options and place in tcpopt.
*/
-static void
+void
tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
{
int opt, optlen;
@@ -3259,6 +3399,21 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
to->to_sacks = cp + 2;
TCPSTAT_INC(tcps_sack_rcv_blocks);
break;
+#ifdef TCP_RFC7413
+ case TCPOPT_FAST_OPEN:
+ if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+ (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+ (optlen > TCPOLEN_FAST_OPEN_MAX))
+ continue;
+ if (!(flags & TO_SYN))
+ continue;
+ if (!V_tcp_fastopen_enabled)
+ continue;
+ to->to_flags |= TOF_FASTOPEN;
+ to->to_tfo_len = optlen - 2;
+ to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+ break;
+#endif
default:
continue;
}
@@ -3271,7 +3426,7 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
* It is still reflected in the segment length for
* sequencing purposes.
*/
-static void
+void
tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
int off)
{
@@ -3304,7 +3459,7 @@ tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
* Collect new round-trip time estimate
* and update averages and current timeout.
*/
-static void
+void
tcp_xmit_timer(struct tcpcb *tp, int rtt)
{
int delta;
@@ -3394,11 +3549,9 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt)
* While looking at the routing entry, we also initialize other path-dependent
* parameters from pre-set or cached values in the routing entry.
*
- * Also take into account the space needed for options that we
- * send regularly. Make maxseg shorter by that amount to assure
- * that we can send maxseg amount of data even when the options
- * are present. Store the upper limit of the length of options plus
- * data in maxopd.
+ * NOTE that resulting t_maxseg doesn't include space for TCP options or
+ * IP options, e.g. IPSEC data, since length of this data may vary, and
+ * thus it is calculated for every segment separately in tcp_output().
*
* NOTE that this routine is only called when we process an incoming
* segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
@@ -3412,7 +3565,6 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
u_long maxmtu = 0;
struct inpcb *inp = tp->t_inpcb;
struct hc_metrics_lite metrics;
- int origoffer;
#ifdef INET6
int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
size_t min_protoh = isipv6 ?
@@ -3428,13 +3580,12 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
KASSERT(offer == -1, ("%s: conflict", __func__));
offer = mtuoffer - min_protoh;
}
- origoffer = offer;
/* Initialize. */
#ifdef INET6
if (isipv6) {
maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
- tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+ tp->t_maxseg = V_tcp_v6mssdflt;
}
#endif
#if defined(INET) && defined(INET6)
@@ -3443,7 +3594,7 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
#ifdef INET
{
maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
- tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+ tp->t_maxseg = V_tcp_mssdflt;
}
#endif
@@ -3467,9 +3618,9 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
/*
* Offer == 0 means that there was no MSS on the SYN
* segment, in this case we use tcp_mssdflt as
- * already assigned to t_maxopd above.
+ * already assigned to t_maxseg above.
*/
- offer = tp->t_maxopd;
+ offer = tp->t_maxseg;
break;
case -1:
@@ -3494,8 +3645,8 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite));
/*
- * If there's a discovered mtu int tcp hostcache, use it
- * else, use the link mtu.
+ * If there's a discovered mtu in tcp hostcache, use it.
+ * Else, use the link mtu.
*/
if (metrics.rmx_mtu)
mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
@@ -3541,31 +3692,15 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
mss = min(mss, offer);
/*
- * Sanity check: make sure that maxopd will be large
+ * Sanity check: make sure that maxseg will be large
* enough to allow some data on segments even if the
* all the option space is used (40bytes). Otherwise
* funny things may happen in tcp_output.
+ *
+ * XXXGL: shouldn't we reserve space for IP/IPv6 options?
*/
mss = max(mss, 64);
- /*
- * maxopd stores the maximum length of data AND options
- * in a segment; maxseg is the amount of data in a normal
- * segment. We need to store this value (maxopd) apart
- * from maxseg, because now every segment carries options
- * and thus we normally have somewhat less data in segments.
- */
- tp->t_maxopd = mss;
-
- /*
- * origoffer==-1 indicates that no segments were received yet.
- * In this case we just guess.
- */
- if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
- (origoffer == -1 ||
- (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
- mss -= TCPOLEN_TSTAMP_APPA;
-
tp->t_maxseg = mss;
}
@@ -3684,11 +3819,12 @@ tcp_mssopt(struct in_conninfo *inc)
* By setting snd_nxt to ti_ack, this forces retransmission timer to
* be started again.
*/
-static void
+void
tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
{
tcp_seq onxt = tp->snd_nxt;
- u_long ocwnd = tp->snd_cwnd;
+ u_long ocwnd = tp->snd_cwnd;
+ u_int maxseg = tcp_maxseg(tp);
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -3699,9 +3835,9 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
* Set snd_cwnd to one segment beyond acknowledged offset.
* (tp->snd_una has not yet been updated when this function is called.)
*/
- tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th);
+ tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
tp->snd_cwnd = ocwnd;
if (SEQ_GT(onxt, tp->snd_nxt))
tp->snd_nxt = onxt;
@@ -3713,5 +3849,13 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
tp->snd_cwnd -= BYTES_THIS_ACK(tp, th);
else
tp->snd_cwnd = 0;
- tp->snd_cwnd += tp->t_maxseg;
+ tp->snd_cwnd += maxseg;
+}
+
+int
+tcp_compute_pipe(struct tcpcb *tp)
+{
+ return (tp->snd_max - tp->snd_una +
+ tp->sackhint.sack_bytes_rexmit -
+ tp->sackhint.sacked_bytes);
}
diff --git a/freebsd/sys/netinet/tcp_lro.c b/freebsd/sys/netinet/tcp_lro.c
index 52d92aa0..3550ab84 100644
--- a/freebsd/sys/netinet/tcp_lro.c
+++ b/freebsd/sys/netinet/tcp_lro.c
@@ -4,6 +4,7 @@
* Copyright (c) 2007, Myricom Inc.
* Copyright (c) 2008, Intel Corporation.
* Copyright (c) 2012 The FreeBSD Foundation
+ * Copyright (c) 2016 Mellanox Technologies.
* All rights reserved.
*
* Portions of this software were developed by Bjoern Zeeb
@@ -39,9 +40,11 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/mbuf.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
#include <sys/socket.h>
+#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -55,59 +58,139 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
+#include <netinet/tcp_var.h>
#include <netinet6/ip6_var.h>
#include <machine/in_cksum.h>
-#ifndef LRO_ENTRIES
-#define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */
-#endif
+static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
#define TCP_LRO_UPDATE_CSUM 1
#ifndef TCP_LRO_UPDATE_CSUM
#define TCP_LRO_INVALID_CSUM 0x0000
#endif
+static void tcp_lro_rx_done(struct lro_ctrl *lc);
+static int tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
+ uint32_t csum, int use_hash);
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "TCP LRO");
+
+static unsigned tcp_lro_entries = TCP_LRO_ENTRIES;
+SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
+ CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_entries, 0,
+ "default number of LRO entries");
+
+static __inline void
+tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
+ struct lro_entry *le)
+{
+
+ LIST_INSERT_HEAD(&lc->lro_active, le, next);
+ LIST_INSERT_HEAD(bucket, le, hash_next);
+}
+
+static __inline void
+tcp_lro_active_remove(struct lro_entry *le)
+{
+
+ LIST_REMOVE(le, next); /* active list */
+ LIST_REMOVE(le, hash_next); /* hash bucket */
+}
+
int
tcp_lro_init(struct lro_ctrl *lc)
{
+ return (tcp_lro_init_args(lc, NULL, tcp_lro_entries, 0));
+}
+
+int
+tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
+ unsigned lro_entries, unsigned lro_mbufs)
+{
struct lro_entry *le;
- int error, i;
+ size_t size;
+ unsigned i, elements;
lc->lro_bad_csum = 0;
lc->lro_queued = 0;
lc->lro_flushed = 0;
lc->lro_cnt = 0;
- SLIST_INIT(&lc->lro_free);
- SLIST_INIT(&lc->lro_active);
-
- error = 0;
- for (i = 0; i < LRO_ENTRIES; i++) {
- le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (le == NULL) {
- if (i == 0)
- error = ENOMEM;
- break;
- }
- lc->lro_cnt = i + 1;
- SLIST_INSERT_HEAD(&lc->lro_free, le, next);
- }
-
- return (error);
+ lc->lro_mbuf_count = 0;
+ lc->lro_mbuf_max = lro_mbufs;
+ lc->lro_cnt = lro_entries;
+ lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
+ lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
+ lc->ifp = ifp;
+ LIST_INIT(&lc->lro_free);
+ LIST_INIT(&lc->lro_active);
+
+ /* create hash table to accelerate entry lookup */
+ if (lro_entries > lro_mbufs)
+ elements = lro_entries;
+ else
+ elements = lro_mbufs;
+ lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
+ HASH_NOWAIT);
+ if (lc->lro_hash == NULL) {
+ memset(lc, 0, sizeof(*lc));
+ return (ENOMEM);
+ }
+
+ /* compute size to allocate */
+ size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
+ (lro_entries * sizeof(*le));
+ lc->lro_mbuf_data = (struct lro_mbuf_sort *)
+ malloc(size, M_LRO, M_NOWAIT | M_ZERO);
+
+ /* check for out of memory */
+ if (lc->lro_mbuf_data == NULL) {
+ memset(lc, 0, sizeof(*lc));
+ return (ENOMEM);
+ }
+ /* compute offset for LRO entries */
+ le = (struct lro_entry *)
+ (lc->lro_mbuf_data + lro_mbufs);
+
+ /* setup linked list */
+ for (i = 0; i != lro_entries; i++)
+ LIST_INSERT_HEAD(&lc->lro_free, le + i, next);
+
+ return (0);
}
void
tcp_lro_free(struct lro_ctrl *lc)
{
struct lro_entry *le;
+ unsigned x;
- while (!SLIST_EMPTY(&lc->lro_free)) {
- le = SLIST_FIRST(&lc->lro_free);
- SLIST_REMOVE_HEAD(&lc->lro_free, next);
- free(le, M_DEVBUF);
+ /* reset LRO free list */
+ LIST_INIT(&lc->lro_free);
+
+ /* free active mbufs, if any */
+ while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
+ tcp_lro_active_remove(le);
+ m_freem(le->m_head);
}
+
+ /* free hash table */
+ if (lc->lro_hash != NULL) {
+ free(lc->lro_hash, M_LRO);
+ lc->lro_hash = NULL;
+ }
+ lc->lro_hashsz = 0;
+
+ /* free mbuf array, if any */
+ for (x = 0; x != lc->lro_mbuf_count; x++)
+ m_freem(lc->lro_mbuf_data[x].mb);
+ lc->lro_mbuf_count = 0;
+
+ /* free allocated memory, if any */
+ free(lc->lro_mbuf_data, M_LRO);
+ lc->lro_mbuf_data = NULL;
}
#ifdef TCP_LRO_UPDATE_CSUM
@@ -195,6 +278,36 @@ tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
}
#endif
+static void
+tcp_lro_rx_done(struct lro_ctrl *lc)
+{
+ struct lro_entry *le;
+
+ while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
+ tcp_lro_active_remove(le);
+ tcp_lro_flush(lc, le);
+ }
+}
+
+void
+tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
+{
+ struct lro_entry *le, *le_tmp;
+ struct timeval tv;
+
+ if (LIST_EMPTY(&lc->lro_active))
+ return;
+
+ getmicrotime(&tv);
+ timevalsub(&tv, timeout);
+ LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
+ if (timevalcmp(&tv, &le->mtime, >=)) {
+ tcp_lro_active_remove(le);
+ tcp_lro_flush(lc, le);
+ }
+ }
+}
+
void
tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
{
@@ -285,7 +398,143 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
lc->lro_queued += le->append_cnt + 1;
lc->lro_flushed++;
bzero(le, sizeof(*le));
- SLIST_INSERT_HEAD(&lc->lro_free, le, next);
+ LIST_INSERT_HEAD(&lc->lro_free, le, next);
+}
+
+#ifdef HAVE_INLINE_FLSLL
+#define tcp_lro_msb_64(x) (1ULL << (flsll(x) - 1))
+#else
+static inline uint64_t
+tcp_lro_msb_64(uint64_t x)
+{
+ x |= (x >> 1);
+ x |= (x >> 2);
+ x |= (x >> 4);
+ x |= (x >> 8);
+ x |= (x >> 16);
+ x |= (x >> 32);
+ return (x & ~(x >> 1));
+}
+#endif
+
+/*
+ * The tcp_lro_sort() routine is comparable to qsort(), except it has
+ * a worst case complexity limit of O(MIN(N,64)*N), where N is the
+ * number of elements to sort and 64 is the number of sequence bits
+ * available. The algorithm is bit-slicing the 64-bit sequence number,
+ * sorting one bit at a time from the most significant bit until the
+ * least significant one, skipping the constant bits. This is
+ * typically called a radix sort.
+ */
+static void
+tcp_lro_sort(struct lro_mbuf_sort *parray, uint32_t size)
+{
+ struct lro_mbuf_sort temp;
+ uint64_t ones;
+ uint64_t zeros;
+ uint32_t x;
+ uint32_t y;
+
+repeat:
+ /* for small arrays insertion sort is faster */
+ if (size <= 12) {
+ for (x = 1; x < size; x++) {
+ temp = parray[x];
+ for (y = x; y > 0 && temp.seq < parray[y - 1].seq; y--)
+ parray[y] = parray[y - 1];
+ parray[y] = temp;
+ }
+ return;
+ }
+
+ /* compute sequence bits which are constant */
+ ones = 0;
+ zeros = 0;
+ for (x = 0; x != size; x++) {
+ ones |= parray[x].seq;
+ zeros |= ~parray[x].seq;
+ }
+
+ /* compute bits which are not constant into "ones" */
+ ones &= zeros;
+ if (ones == 0)
+ return;
+
+ /* pick the most significant bit which is not constant */
+ ones = tcp_lro_msb_64(ones);
+
+ /*
+ * Move entries having cleared sequence bits to the beginning
+ * of the array:
+ */
+ for (x = y = 0; y != size; y++) {
+ /* skip set bits */
+ if (parray[y].seq & ones)
+ continue;
+ /* swap entries */
+ temp = parray[x];
+ parray[x] = parray[y];
+ parray[y] = temp;
+ x++;
+ }
+
+ KASSERT(x != 0 && x != size, ("Memory is corrupted\n"));
+
+ /* sort zeros */
+ tcp_lro_sort(parray, x);
+
+ /* sort ones */
+ parray += x;
+ size -= x;
+ goto repeat;
+}
+
+void
+tcp_lro_flush_all(struct lro_ctrl *lc)
+{
+ uint64_t seq;
+ uint64_t nseq;
+ unsigned x;
+
+ /* check if no mbufs to flush */
+ if (lc->lro_mbuf_count == 0)
+ goto done;
+
+ /* sort all mbufs according to stream */
+ tcp_lro_sort(lc->lro_mbuf_data, lc->lro_mbuf_count);
+
+ /* input data into LRO engine, stream by stream */
+ seq = 0;
+ for (x = 0; x != lc->lro_mbuf_count; x++) {
+ struct mbuf *mb;
+
+ /* get mbuf */
+ mb = lc->lro_mbuf_data[x].mb;
+
+ /* get sequence number, masking away the packet index */
+ nseq = lc->lro_mbuf_data[x].seq & (-1ULL << 24);
+
+ /* check for new stream */
+ if (seq != nseq) {
+ seq = nseq;
+
+ /* flush active streams */
+ tcp_lro_rx_done(lc);
+ }
+
+ /* add packet to LRO engine */
+ if (tcp_lro_rx2(lc, mb, 0, 0) != 0) {
+ /* input packet to network layer */
+ (*lc->ifp->if_input)(lc->ifp, mb);
+ lc->lro_queued++;
+ lc->lro_flushed++;
+ }
+ }
+done:
+ /* flush active streams */
+ tcp_lro_rx_done(lc);
+
+ lc->lro_mbuf_count = 0;
}
#ifdef INET6
@@ -348,8 +597,8 @@ tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
}
#endif
-int
-tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+static int
+tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
{
struct lro_entry *le;
struct ether_header *eh;
@@ -365,6 +614,8 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
tcp_seq seq;
int error, ip_len, l;
uint16_t eh_type, tcp_data_len;
+ struct lro_head *bucket;
+ int force_flush = 0;
/* We expect a contiguous header [eh, ip, tcp]. */
@@ -431,10 +682,17 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
* Check TCP header constraints.
*/
/* Ensure no bits set besides ACK or PSH. */
- if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
- return (TCP_LRO_CANNOT);
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
+ if (th->th_flags & TH_SYN)
+ return (TCP_LRO_CANNOT);
+ /*
+ * Make sure that previously seen segements/ACKs are delivered
+ * before this segement, e.g. FIN.
+ */
+ force_flush = 1;
+ }
- /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
+ /* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
/* XXX-BZ Ideally we'd flush on PUSH? */
/*
@@ -448,8 +706,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
ts_ptr = (uint32_t *)(th + 1);
if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
(*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
- TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
- return (TCP_LRO_CANNOT);
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+ /*
+ * Make sure that previously seen segements/ACKs are delivered
+ * before this segement.
+ */
+ force_flush = 1;
+ }
/* If the driver did not pass in the checksum, set it now. */
if (csum == 0x0000)
@@ -457,8 +720,41 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
seq = ntohl(th->th_seq);
+ if (!use_hash) {
+ bucket = &lc->lro_hash[0];
+ } else if (M_HASHTYPE_ISHASH(m)) {
+ bucket = &lc->lro_hash[m->m_pkthdr.flowid % lc->lro_hashsz];
+ } else {
+ uint32_t hash;
+
+ switch (eh_type) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ hash = ip4->ip_src.s_addr + ip4->ip_dst.s_addr;
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ hash = ip6->ip6_src.s6_addr32[0] +
+ ip6->ip6_dst.s6_addr32[0];
+ hash += ip6->ip6_src.s6_addr32[1] +
+ ip6->ip6_dst.s6_addr32[1];
+ hash += ip6->ip6_src.s6_addr32[2] +
+ ip6->ip6_dst.s6_addr32[2];
+ hash += ip6->ip6_src.s6_addr32[3] +
+ ip6->ip6_dst.s6_addr32[3];
+ break;
+#endif
+ default:
+ hash = 0;
+ break;
+ }
+ hash += th->th_sport + th->th_dport;
+ bucket = &lc->lro_hash[hash % lc->lro_hashsz];
+ }
+
/* Try to find a matching previous segment. */
- SLIST_FOREACH(le, &lc->lro_active, next) {
+ LIST_FOREACH(le, bucket, hash_next) {
if (le->eh_type != eh_type)
continue;
if (le->source_port != th->th_sport ||
@@ -483,9 +779,16 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
#endif
}
+ if (force_flush) {
+ /* Timestamps mismatch; this is a FIN, etc */
+ tcp_lro_active_remove(le);
+ tcp_lro_flush(lc, le);
+ return (TCP_LRO_CANNOT);
+ }
+
/* Flush now if appending will result in overflow. */
- if (le->p_len > (65535 - tcp_data_len)) {
- SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
+ tcp_lro_active_remove(le);
tcp_lro_flush(lc, le);
break;
}
@@ -494,7 +797,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
if (__predict_false(seq != le->next_seq ||
(tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
/* Out of order packet or duplicate ACK. */
- SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_active_remove(le);
tcp_lro_flush(lc, le);
return (TCP_LRO_CANNOT);
}
@@ -522,6 +825,14 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
if (tcp_data_len == 0) {
m_freem(m);
+ /*
+ * Flush this LRO entry, if this ACK should not
+ * be further delayed.
+ */
+ if (le->append_cnt >= lc->lro_ackcnt_lim) {
+ tcp_lro_active_remove(le);
+ tcp_lro_flush(lc, le);
+ }
return (0);
}
@@ -533,7 +844,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
* append new segment to existing mbuf chain.
*/
m_adj(m, m->m_pkthdr.len - tcp_data_len);
- m->m_flags &= ~M_PKTHDR;
+ m_demote_pkthdr(m);
le->m_tail->m_next = m;
le->m_tail = m_last(m);
@@ -542,22 +853,32 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
* If a possible next full length packet would cause an
* overflow, pro-actively flush now.
*/
- if (le->p_len > (65535 - lc->ifp->if_mtu)) {
- SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
+ tcp_lro_active_remove(le);
tcp_lro_flush(lc, le);
- }
+ } else
+ getmicrotime(&le->mtime);
return (0);
}
- /* Try to find an empty slot. */
- if (SLIST_EMPTY(&lc->lro_free))
+ if (force_flush) {
+ /*
+ * Nothing to flush, but this segment can not be further
+ * aggregated/delayed.
+ */
return (TCP_LRO_CANNOT);
+ }
+
+ /* Try to find an empty slot. */
+ if (LIST_EMPTY(&lc->lro_free))
+ return (TCP_LRO_NO_ENTRIES);
/* Start a new segment chain. */
- le = SLIST_FIRST(&lc->lro_free);
- SLIST_REMOVE_HEAD(&lc->lro_free, next);
- SLIST_INSERT_HEAD(&lc->lro_active, le, next);
+ le = LIST_FIRST(&lc->lro_free);
+ LIST_REMOVE(le, next);
+ tcp_lro_active_insert(lc, bucket, le);
+ getmicrotime(&le->mtime);
/* Start filling in details. */
switch (eh_type) {
@@ -614,4 +935,47 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
return (0);
}
+int
+tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+{
+
+ return tcp_lro_rx2(lc, m, csum, 1);
+}
+
+void
+tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
+{
+ /* sanity checks */
+ if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
+ lc->lro_mbuf_max == 0)) {
+ /* packet drop */
+ m_freem(mb);
+ return;
+ }
+
+ /* check if packet is not LRO capable */
+ if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
+ (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
+ lc->lro_flushed++;
+ lc->lro_queued++;
+
+ /* input packet to network layer */
+ (*lc->ifp->if_input) (lc->ifp, mb);
+ return;
+ }
+
+ /* check if array is full */
+ if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
+ tcp_lro_flush_all(lc);
+
+ /* create sequence number */
+ lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
+ (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
+ (((uint64_t)mb->m_pkthdr.flowid) << 24) |
+ ((uint64_t)lc->lro_mbuf_count);
+
+ /* enter mbuf */
+ lc->lro_mbuf_data[lc->lro_mbuf_count++].mb = mb;
+}
+
/* end */
diff --git a/freebsd/sys/netinet/tcp_lro.h b/freebsd/sys/netinet/tcp_lro.h
index b3a50179..e019cd1e 100644
--- a/freebsd/sys/netinet/tcp_lro.h
+++ b/freebsd/sys/netinet/tcp_lro.h
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 2006, Myricom Inc.
* Copyright (c) 2008, Intel Corporation.
+ * Copyright (c) 2016 Mellanox Technologies.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,9 +31,16 @@
#ifndef _TCP_LRO_H_
#define _TCP_LRO_H_
-struct lro_entry
-{
- SLIST_ENTRY(lro_entry) next;
+#include <sys/time.h>
+
+#ifndef TCP_LRO_ENTRIES
+/* Define default number of LRO entries per RX queue */
+#define TCP_LRO_ENTRIES 8
+#endif
+
+struct lro_entry {
+ LIST_ENTRY(lro_entry) next;
+ LIST_ENTRY(lro_entry) hash_next;
struct mbuf *m_head;
struct mbuf *m_tail;
union {
@@ -59,8 +67,9 @@ struct lro_entry
uint32_t tsecr;
uint16_t window;
uint16_t timestamp; /* flag, not a TCP hdr field. */
+ struct timeval mtime;
};
-SLIST_HEAD(lro_head, lro_entry);
+LIST_HEAD(lro_head, lro_entry);
#define le_ip4 leip.ip4
#define le_ip6 leip.ip6
@@ -69,23 +78,43 @@ SLIST_HEAD(lro_head, lro_entry);
#define source_ip6 lesource.s_ip6
#define dest_ip6 ledest.d_ip6
+struct lro_mbuf_sort {
+ uint64_t seq;
+ struct mbuf *mb;
+};
+
/* NB: This is part of driver structs. */
struct lro_ctrl {
struct ifnet *ifp;
- int lro_queued;
- int lro_flushed;
- int lro_bad_csum;
- int lro_cnt;
+ struct lro_mbuf_sort *lro_mbuf_data;
+ uint64_t lro_queued;
+ uint64_t lro_flushed;
+ uint64_t lro_bad_csum;
+ unsigned lro_cnt;
+ unsigned lro_mbuf_count;
+ unsigned lro_mbuf_max;
+ unsigned short lro_ackcnt_lim; /* max # of aggregated ACKs */
+ unsigned lro_length_lim; /* max len of aggregated data */
+ u_long lro_hashsz;
+ struct lro_head *lro_hash;
struct lro_head lro_active;
struct lro_head lro_free;
};
+#define TCP_LRO_LENGTH_MAX 65535
+#define TCP_LRO_ACKCNT_MAX 65535 /* unlimited */
+
int tcp_lro_init(struct lro_ctrl *);
+int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
void tcp_lro_free(struct lro_ctrl *);
+void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
+void tcp_lro_flush_all(struct lro_ctrl *);
int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
+void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
+#define TCP_LRO_NO_ENTRIES -2
#define TCP_LRO_CANNOT -1
#define TCP_LRO_NOT_SUPPORTED 1
diff --git a/freebsd/sys/netinet/tcp_offload.c b/freebsd/sys/netinet/tcp_offload.c
index 1a90f408..78275fb8 100644
--- a/freebsd/sys/netinet/tcp_offload.c
+++ b/freebsd/sys/netinet/tcp_offload.c
@@ -39,14 +39,15 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
#include <sys/sockopt.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_var.h>
#include <netinet/toecore.h>
int registered_toedevs;
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
index 550af64f..af11d805 100644
--- a/freebsd/sys/netinet/tcp_output.c
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -56,8 +57,8 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/vnet.h>
-#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
@@ -68,12 +69,20 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
+#include <netinet/tcp.h>
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
+#include <netinet/cc/cc.h>
+#ifdef TCPPCAP
+#include <netinet/tcp_pcap.h>
+#endif
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
@@ -90,46 +99,56 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
VNET_DEFINE(int, path_mtu_discovery) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(path_mtu_discovery), 1,
"Enable Path MTU Discovery");
VNET_DEFINE(int, tcp_do_tso) = 1;
#define V_tcp_do_tso VNET(tcp_do_tso)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_tso), 0,
"Enable TCP Segmentation Offload");
VNET_DEFINE(int, tcp_sendspace) = 1024*32;
#define V_tcp_sendspace VNET(tcp_sendspace)
-SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_autosndbuf), 0,
"Enable automatic send buffer sizing");
VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024;
#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_inc), 0,
"Incrementor step size of automatic send buffer");
VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_max), 0,
"Max size of automatic send buffer");
+/*
+ * Make sure that either retransmit or persist timer is set for SYN, FIN and
+ * non-ACK.
+ */
+#define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \
+ KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
+ tcp_timer_active((tp), TT_REXMT) || \
+ tcp_timer_active((tp), TT_PERSIST), \
+ ("neither rexmt nor persist timer is set"))
+
static void inline hhook_run_tcp_est_out(struct tcpcb *tp,
struct tcphdr *th, struct tcpopt *to,
long len, int tso);
static void inline cc_after_idle(struct tcpcb *tp);
/*
- * Wrapper for the TCP established ouput helper hook.
+ * Wrapper for the TCP established output helper hook.
*/
static void inline
hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
@@ -201,6 +220,17 @@ tcp_output(struct tcpcb *tp)
return (tcp_offload_output(tp));
#endif
+#ifdef TCP_RFC7413
+ /*
+ * For TFO connections in SYN_RECEIVED, only allow the initial
+ * SYN|ACK and those sent by the retransmit timer.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */
+ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
+ return (0);
+#endif
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
@@ -322,7 +352,7 @@ after_sack_rexmit:
* to send then the probe will be the FIN
* itself.
*/
- if (off < so->so_snd.sb_cc)
+ if (off < sbused(&so->so_snd))
flags &= ~TH_FIN;
sendwin = 1;
} else {
@@ -348,7 +378,8 @@ after_sack_rexmit:
*/
if (sack_rxmit == 0) {
if (sack_bytes_rxmt == 0)
- len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
+ len = ((long)ulmin(sbavail(&so->so_snd), sendwin) -
+ off);
else {
long cwin;
@@ -357,8 +388,8 @@ after_sack_rexmit:
* sending new data, having retransmitted all the
* data possible in the scoreboard.
*/
- len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd)
- - off);
+ len = ((long)ulmin(sbavail(&so->so_snd), tp->snd_wnd) -
+ off);
/*
* Don't remove this (len > 0) check !
* We explicitly check for len > 0 here (although it
@@ -386,6 +417,15 @@ after_sack_rexmit:
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
if (tp->t_state != TCPS_SYN_RECEIVED)
flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+ /*
+ * When sending additional segments following a TFO SYN|ACK,
+ * do not include the SYN bit.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ flags &= ~TH_SYN;
+#endif
off--, len++;
}
@@ -399,7 +439,18 @@ after_sack_rexmit:
flags &= ~TH_FIN;
}
- if (len < 0) {
+#ifdef TCP_RFC7413
+ /*
+ * When retransmitting SYN|ACK on a passively-created TFO socket,
+ * don't include data, as the presence of data may have caused the
+ * original SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+ (flags & TH_RST)))
+ len = 0;
+#endif
+ if (len <= 0) {
/*
* If FIN has been sent but not acked,
* but we haven't been called to retransmit,
@@ -409,9 +460,16 @@ after_sack_rexmit:
* to (closed) window, and set the persist timer
* if it isn't already going. If the window didn't
* close completely, just wait for an ACK.
+ *
+ * We also do a general check here to ensure that
+ * we will set the persist timer when we have data
+ * to send, but a 0-byte window. This makes sure
+ * the persist timer is set even if the packet
+ * hits one of the "goto send" lines below.
*/
len = 0;
- if (sendwin == 0) {
+ if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
+ (off < (int) sbavail(&so->so_snd))) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rxtshift = 0;
tp->snd_nxt = tp->snd_una;
@@ -449,20 +507,23 @@ after_sack_rexmit:
* and does at most one step per received ACK. This fast
* scaling has the drawback of growing the send buffer beyond
* what is strictly necessary to make full use of a given
- * delay*bandwith product. However testing has shown this not
+ * delay*bandwidth product. However testing has shown this not
* to be much of an problem. At worst we are trading wasting
- * of available bandwith (the non-use of it) for wasting some
+ * of available bandwidth (the non-use of it) for wasting some
* socket buffer memory.
*
* TODO: Shrink send buffer during idle periods together
* with congestion window. Requires another timer. Has to
* wait for upcoming tcp timer rewrite.
+ *
+ * XXXGL: should there be used sbused() or sbavail()?
*/
if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
- so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
- so->so_snd.sb_cc < V_tcp_autosndbuf_max &&
- sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) {
+ sbused(&so->so_snd) >= (so->so_snd.sb_hiwat / 8 * 7) &&
+ sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
+ sendwin >= (sbused(&so->so_snd) -
+ (tp->snd_nxt - tp->snd_una))) {
if (!sbreserve_locked(&so->so_snd,
min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
V_tcp_autosndbuf_max), so, curthread))
@@ -499,10 +560,11 @@ after_sack_rexmit:
tso = 1;
if (sack_rxmit) {
- if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
+ if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd)))
flags &= ~TH_FIN;
} else {
- if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
+ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
+ sbused(&so->so_snd)))
flags &= ~TH_FIN;
}
@@ -532,7 +594,7 @@ after_sack_rexmit:
*/
if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */
(idle || (tp->t_flags & TF_NODELAY)) &&
- len + off >= so->so_snd.sb_cc &&
+ len + off >= sbavail(&so->so_snd) &&
(tp->t_flags & TF_NOPUSH) == 0) {
goto send;
}
@@ -660,7 +722,7 @@ dontupdate:
* if window is nonzero, transmit what we can,
* otherwise force out a byte.
*/
- if (so->so_snd.sb_cc && !tcp_timer_active(tp, TT_REXMT) &&
+ if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
tp->t_rxtshift = 0;
tcp_setpersist(tp);
@@ -675,6 +737,12 @@ just_return:
send:
SOCKBUF_LOCK_ASSERT(&so->so_snd);
+ if (len > 0) {
+ if (len >= tp->t_maxseg)
+ tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
+ else
+ tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
+ }
/*
* Before ESTABLISHED, force sending of initial options
* unless TCP set not to do any options.
@@ -697,13 +765,29 @@ send:
* segments. Options for SYN-ACK segments are handled in TCP
* syncache.
*/
+ to.to_flags = 0;
if ((tp->t_flags & TF_NOOPT) == 0) {
- to.to_flags = 0;
/* Maximum segment size. */
if (flags & TH_SYN) {
tp->snd_nxt = tp->iss;
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+ /*
+ * Only include the TFO option on the first
+ * transmission of the SYN|ACK on a
+ * passively-created TFO socket, as the presence of
+ * the TFO option may have caused the original
+ * SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_rxtshift == 0)) {
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+ to.to_flags |= TOF_FASTOPEN;
+ }
+#endif
}
/* Window scaling. */
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -759,11 +843,11 @@ send:
/*
* Adjust data length if insertion of options will
- * bump the packet length beyond the t_maxopd length.
+ * bump the packet length beyond the t_maxseg length.
* Clear the FIN bit because we cut off the tail of
* the segment.
*/
- if (len + optlen + ipoptlen > tp->t_maxopd) {
+ if (len + optlen + ipoptlen > tp->t_maxseg) {
flags &= ~TH_FIN;
if (tso) {
@@ -793,7 +877,8 @@ send:
*/
if (if_hw_tsomax != 0) {
/* compute maximum TSO length */
- max_len = (if_hw_tsomax - hdrlen);
+ max_len = (if_hw_tsomax - hdrlen -
+ max_linkhdr);
if (max_len <= 0) {
len = 0;
} else if (len > max_len) {
@@ -808,6 +893,15 @@ send:
*/
if (if_hw_tsomaxsegcount != 0 &&
if_hw_tsomaxsegsize != 0) {
+ /*
+ * Subtract one segment for the LINK
+ * and TCP/IP headers mbuf that will
+ * be prepended to this mbuf chain
+ * after the code in this section
+ * limits the number of mbufs in the
+ * chain to if_hw_tsomaxsegcount.
+ */
+ if_hw_tsomaxsegcount -= 1;
max_len = 0;
mb = sbsndmbuf(&so->so_snd, off, &moff);
@@ -856,8 +950,8 @@ send:
* fractional unless the send sockbuf can be
* emptied:
*/
- max_len = (tp->t_maxopd - optlen);
- if ((off + len) < so->so_snd.sb_cc) {
+ max_len = (tp->t_maxseg - optlen);
+ if ((off + len) < sbavail(&so->so_snd)) {
moff = len % max_len;
if (moff != 0) {
len -= moff;
@@ -886,7 +980,7 @@ send:
sendalot = 1;
} else {
- len = tp->t_maxopd - optlen - ipoptlen;
+ len = tp->t_maxseg - optlen - ipoptlen;
sendalot = 1;
}
} else
@@ -929,23 +1023,20 @@ send:
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
}
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+#ifdef INET6
+ if (MHLEN < hdrlen + max_linkhdr)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+#endif
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+
if (m == NULL) {
SOCKBUF_UNLOCK(&so->so_snd);
error = ENOBUFS;
+ sack_rxmit = 0;
goto out;
}
-#ifdef INET6
- if (MHLEN < hdrlen + max_linkhdr) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- SOCKBUF_UNLOCK(&so->so_snd);
- m_freem(m);
- error = ENOBUFS;
- goto out;
- }
- }
-#endif
+
m->m_data += max_linkhdr;
m->m_len = hdrlen;
@@ -965,6 +1056,7 @@ send:
SOCKBUF_UNLOCK(&so->so_snd);
(void) m_free(m);
error = ENOBUFS;
+ sack_rxmit = 0;
goto out;
}
}
@@ -975,7 +1067,7 @@ send:
* give data to the user when a buffer fills or
* a PUSH comes in.)
*/
- if (off + len == so->so_snd.sb_cc)
+ if ((off + len == sbused(&so->so_snd)) && !(flags & TH_SYN))
flags |= TH_PUSH;
SOCKBUF_UNLOCK(&so->so_snd);
} else {
@@ -989,15 +1081,16 @@ send:
else
TCPSTAT_INC(tcps_sndwinup);
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
+ sack_rxmit = 0;
goto out;
}
#ifdef INET6
if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
MHLEN >= hdrlen) {
- MH_ALIGN(m, hdrlen);
+ M_ALIGN(m, hdrlen);
} else
#endif
m->m_data += max_linkhdr;
@@ -1036,7 +1129,7 @@ send:
* resend those bits a number of times as per
* RFC 3168.
*/
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
if (tp->t_rxtshift >= 1) {
if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
flags |= TH_ECE|TH_CWR;
@@ -1153,7 +1246,7 @@ send:
tp->snd_up = tp->snd_una; /* drag it along */
#ifdef TCP_SIGNATURE
- if (tp->t_flags & TF_SIGNATURE) {
+ if (to.to_flags & TOF_SIGNATURE) {
int sigoff = to.to_signature - opt;
tcp_signature_compute(m, 0, len, optlen,
(u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
@@ -1195,13 +1288,12 @@ send:
/*
* Enable TSO and specify the size of the segments.
* The TCP pseudo header checksum is always provided.
- * XXX: Fixme: This is currently not the case for IPv6.
*/
if (tso) {
- KASSERT(len > tp->t_maxopd - optlen,
+ KASSERT(len > tp->t_maxseg - optlen,
("%s: len <= tso_segsz", __func__));
m->m_pkthdr.csum_flags |= CSUM_TSO;
- m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+ m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
}
#ifdef IPSEC
@@ -1214,75 +1306,6 @@ send:
__func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
#endif
- /*
- * In transmit state, time the transmission and arrange for
- * the retransmit. In persist state, just set snd_max.
- */
- if ((tp->t_flags & TF_FORCEDATA) == 0 ||
- !tcp_timer_active(tp, TT_PERSIST)) {
- tcp_seq startseq = tp->snd_nxt;
-
- /*
- * Advance snd_nxt over sequence space of this segment.
- */
- if (flags & (TH_SYN|TH_FIN)) {
- if (flags & TH_SYN)
- tp->snd_nxt++;
- if (flags & TH_FIN) {
- tp->snd_nxt++;
- tp->t_flags |= TF_SENTFIN;
- }
- }
- if (sack_rxmit)
- goto timer;
- tp->snd_nxt += len;
- if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
- tp->snd_max = tp->snd_nxt;
- /*
- * Time this transmission if not a retransmission and
- * not currently timing anything.
- */
- if (tp->t_rtttime == 0) {
- tp->t_rtttime = ticks;
- tp->t_rtseq = startseq;
- TCPSTAT_INC(tcps_segstimed);
- }
- }
-
- /*
- * Set retransmit timer if not currently set,
- * and not doing a pure ack or a keep-alive probe.
- * Initial value for retransmit timer is smoothed
- * round-trip time + 2 * round-trip time variance.
- * Initialize shift counter which is used for backoff
- * of retransmit time.
- */
-timer:
- if (!tcp_timer_active(tp, TT_REXMT) &&
- ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
- (tp->snd_nxt != tp->snd_una))) {
- if (tcp_timer_active(tp, TT_PERSIST)) {
- tcp_timer_activate(tp, TT_PERSIST, 0);
- tp->t_rxtshift = 0;
- }
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
- }
- } else {
- /*
- * Persist case, update snd_max but since we are in
- * persist mode (no window) we do not update snd_nxt.
- */
- int xlen = len;
- if (flags & TH_SYN)
- ++xlen;
- if (flags & TH_FIN) {
- ++xlen;
- tp->t_flags |= TF_SENTFIN;
- }
- if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
- tp->snd_max = tp->snd_nxt + len;
- }
-
/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
hhook_run_tcp_est_out(tp, th, &to, len, tso);
@@ -1306,6 +1329,7 @@ timer:
ipov->ih_len = save;
}
#endif /* TCPDEBUG */
+ TCP_PROBE3(debug__output, tp, th, mtod(m, const char *));
/*
* Fill in IP length and desired time to live and
@@ -1314,7 +1338,7 @@ timer:
* the template, but need a way to checksum without them.
*/
/*
- * m->m_pkthdr.len should have been set before cksum calcuration,
+ * m->m_pkthdr.len should have been set before checksum calculation,
* because in6_cksum() need it.
*/
#ifdef INET6
@@ -1330,13 +1354,35 @@ timer:
*/
ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
+ /*
+ * Set the packet size here for the benefit of DTrace probes.
+ * ip6_output() will set it properly; it's supposed to include
+ * the option header lengths as well.
+ */
+ ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
+
+ if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ else
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+
+ if (tp->t_state == TCPS_SYN_SENT)
+ TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
+
+ TCP_PROBE5(send, NULL, tp, ip6, tp, th);
+
+#ifdef TCPPCAP
+ /* Save packet, if requested. */
+ tcp_pcap_add(th, m, &(tp->t_outpkts));
+#endif
+
/* TODO: IPv6 IP6TOS_ECT bit on */
error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, NULL, tp->t_inpcb);
if (error == EMSGSIZE && ro.ro_rt != NULL)
- mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ mtu = ro.ro_rt->rt_mtu;
RO_RTFREE(&ro);
}
#endif /* INET6 */
@@ -1345,10 +1391,7 @@ timer:
#endif
#ifdef INET
{
- struct route ro;
-
- bzero(&ro, sizeof(ro));
- ip->ip_len = m->m_pkthdr.len;
+ ip->ip_len = htons(m->m_pkthdr.len);
#ifdef INET6
if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
@@ -1361,18 +1404,126 @@ timer:
*
* NB: Don't set DF on small MTU/MSS to have a safe fallback.
*/
- if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
- ip->ip_off |= IP_DF;
+ if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
+ ip->ip_off |= htons(IP_DF);
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ } else {
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ }
+
+ if (tp->t_state == TCPS_SYN_SENT)
+ TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
- error = ip_output(m, tp->t_inpcb->inp_options, &ro,
+ TCP_PROBE5(send, NULL, tp, ip, tp, th);
+
+#ifdef TCPPCAP
+ /* Save packet, if requested. */
+ tcp_pcap_add(th, m, &(tp->t_outpkts));
+#endif
+
+ error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
tp->t_inpcb);
- if (error == EMSGSIZE && ro.ro_rt != NULL)
- mtu = ro.ro_rt->rt_rmx.rmx_mtu;
- RO_RTFREE(&ro);
+ if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL)
+ mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu;
}
#endif /* INET */
+
+out:
+ /*
+ * In transmit state, time the transmission and arrange for
+ * the retransmit. In persist state, just set snd_max.
+ */
+ if ((tp->t_flags & TF_FORCEDATA) == 0 ||
+ !tcp_timer_active(tp, TT_PERSIST)) {
+ tcp_seq startseq = tp->snd_nxt;
+
+ /*
+ * Advance snd_nxt over sequence space of this segment.
+ */
+ if (flags & (TH_SYN|TH_FIN)) {
+ if (flags & TH_SYN)
+ tp->snd_nxt++;
+ if (flags & TH_FIN) {
+ tp->snd_nxt++;
+ tp->t_flags |= TF_SENTFIN;
+ }
+ }
+ if (sack_rxmit)
+ goto timer;
+ tp->snd_nxt += len;
+ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_max = tp->snd_nxt;
+ /*
+ * Time this transmission if not a retransmission and
+ * not currently timing anything.
+ */
+ if (tp->t_rtttime == 0) {
+ tp->t_rtttime = ticks;
+ tp->t_rtseq = startseq;
+ TCPSTAT_INC(tcps_segstimed);
+ }
+ }
+
+ /*
+ * Set retransmit timer if not currently set,
+ * and not doing a pure ack or a keep-alive probe.
+ * Initial value for retransmit timer is smoothed
+ * round-trip time + 2 * round-trip time variance.
+ * Initialize shift counter which is used for backoff
+ * of retransmit time.
+ */
+timer:
+ if (!tcp_timer_active(tp, TT_REXMT) &&
+ ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
+ (tp->snd_nxt != tp->snd_una))) {
+ if (tcp_timer_active(tp, TT_PERSIST)) {
+ tcp_timer_activate(tp, TT_PERSIST, 0);
+ tp->t_rxtshift = 0;
+ }
+ tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ } else if (len == 0 && sbavail(&so->so_snd) &&
+ !tcp_timer_active(tp, TT_REXMT) &&
+ !tcp_timer_active(tp, TT_PERSIST)) {
+ /*
+ * Avoid a situation where we do not set persist timer
+ * after a zero window condition. For example:
+ * 1) A -> B: packet with enough data to fill the window
+ * 2) B -> A: ACK for #1 + new data (0 window
+ * advertisement)
+ * 3) A -> B: ACK for #2, 0 len packet
+ *
+ * In this case, A will not activate the persist timer,
+ * because it chose to send a packet. Unless tcp_output
+ * is called for some other reason (delayed ack timer,
+ * another input packet from B, socket syscall), A will
+ * not send zero window probes.
+ *
+ * So, if you send a 0-length packet, but there is data
+ * in the socket buffer, and neither the rexmt or
+ * persist timer is already set, then activate the
+ * persist timer.
+ */
+ tp->t_rxtshift = 0;
+ tcp_setpersist(tp);
+ }
+ } else {
+ /*
+ * Persist case, update snd_max but since we are in
+ * persist mode (no window) we do not update snd_nxt.
+ */
+ int xlen = len;
+ if (flags & TH_SYN)
+ ++xlen;
+ if (flags & TH_FIN) {
+ ++xlen;
+ tp->t_flags |= TF_SENTFIN;
+ }
+ if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
+ tp->snd_max = tp->snd_nxt + len;
+ }
+
if (error) {
/*
@@ -1400,16 +1551,13 @@ timer:
} else
tp->snd_nxt -= len;
}
-out:
SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */
switch (error) {
case EPERM:
tp->t_softerror = error;
return (error);
case ENOBUFS:
- if (!tcp_timer_active(tp, TT_REXMT) &&
- !tcp_timer_active(tp, TT_PERSIST))
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ TCP_XMIT_TIMER_ASSERT(tp, len, flags);
tp->snd_cwnd = tp->t_maxseg;
return (0);
case EMSGSIZE:
@@ -1481,10 +1629,10 @@ tcp_setpersist(struct tcpcb *tp)
if (tcp_timer_active(tp, TT_REXMT))
panic("tcp_setpersist: retransmit pending");
/*
- * Start/restart persistance timer.
+ * Start/restart persistence timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
- TCPTV_PERSMIN, TCPTV_PERSMAX);
+ tcp_persmin, tcp_persmax);
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
@@ -1510,7 +1658,7 @@ tcp_setpersist(struct tcpcb *tp)
int
tcp_addoptions(struct tcpopt *to, u_char *optp)
{
- u_int mask, optlen = 0;
+ u_int32_t mask, optlen = 0;
for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
if ((to->to_flags & mask) != mask)
@@ -1572,6 +1720,7 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
optp += sizeof(to->to_tsecr);
break;
+#ifdef TCP_SIGNATURE
case TOF_SIGNATURE:
{
int siglen = TCPOLEN_SIGNATURE - 2;
@@ -1590,6 +1739,7 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
*optp++ = 0;
break;
}
+#endif
case TOF_SACK:
{
int sackblks = 0;
@@ -1620,6 +1770,25 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
TCPSTAT_INC(tcps_sack_send_blocks);
break;
}
+#ifdef TCP_RFC7413
+ case TOF_FASTOPEN:
+ {
+ int total_len;
+
+ /* XXX is there any point to aligning this option? */
+ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+ if (TCP_MAXOLEN - optlen < total_len)
+ continue;
+ *optp++ = TCPOPT_FAST_OPEN;
+ *optp++ = total_len;
+ if (to->to_tfo_len > 0) {
+ bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+ optp += to->to_tfo_len;
+ }
+ optlen += total_len;
+ break;
+ }
+#endif
default:
panic("%s: unknown TCP option type", __func__);
break;
diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c
index 2570a5f3..49184a5f 100644
--- a/freebsd/sys/netinet/tcp_reass.c
+++ b/freebsd/sys/netinet/tcp_reass.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -51,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -76,67 +78,46 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
-static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS);
-
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
-static VNET_DEFINE(int, tcp_reass_maxseg) = 0;
-#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg)
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
- &VNET_NAME(tcp_reass_maxseg), 0,
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
"Global maximum number of TCP Segments in Reassembly Queue");
-SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments,
- (CTLTYPE_INT | CTLFLAG_RD), NULL, 0, &tcp_reass_sysctl_qsize, "I",
+static uma_zone_t tcp_reass_zone;
+SYSCTL_UMA_CUR(_net_inet_tcp_reass, OID_AUTO, cursegments, 0,
+ &tcp_reass_zone,
"Global number of TCP Segments currently in Reassembly Queue");
-static VNET_DEFINE(int, tcp_reass_overflows) = 0;
-#define V_tcp_reass_overflows VNET(tcp_reass_overflows)
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows,
- CTLFLAG_RD,
- &VNET_NAME(tcp_reass_overflows), 0,
- "Global number of TCP Segment Reassembly Queue Overflows");
-
-static VNET_DEFINE(uma_zone_t, tcp_reass_zone);
-#define V_tcp_reass_zone VNET(tcp_reass_zone)
-
/* Initialize TCP reassembly queue */
static void
tcp_reass_zone_change(void *tag)
{
/* Set the zone limit and read back the effective value. */
- V_tcp_reass_maxseg = nmbclusters / 16;
- V_tcp_reass_maxseg = uma_zone_set_max(V_tcp_reass_zone,
- V_tcp_reass_maxseg);
+ tcp_reass_maxseg = nmbclusters / 16;
+ tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
+ tcp_reass_maxseg);
}
void
-tcp_reass_init(void)
+tcp_reass_global_init(void)
{
- V_tcp_reass_maxseg = nmbclusters / 16;
+ tcp_reass_maxseg = nmbclusters / 16;
TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
- &V_tcp_reass_maxseg);
- V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
/* Set the zone limit and read back the effective value. */
- V_tcp_reass_maxseg = uma_zone_set_max(V_tcp_reass_zone,
- V_tcp_reass_maxseg);
+ tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
+ tcp_reass_maxseg);
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
}
-#ifdef VIMAGE
-void
-tcp_reass_destroy(void)
-{
-
- uma_zdestroy(V_tcp_reass_zone);
-}
-#endif
-
void
tcp_reass_flush(struct tcpcb *tp)
{
@@ -147,7 +128,7 @@ tcp_reass_flush(struct tcpcb *tp)
while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(qe, tqe_q);
m_freem(qe->tqe_m);
- uma_zfree(V_tcp_reass_zone, qe);
+ uma_zfree(tcp_reass_zone, qe);
tp->t_segqlen--;
}
@@ -156,15 +137,6 @@ tcp_reass_flush(struct tcpcb *tp)
tp, tp->t_segqlen));
}
-static int
-tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
-{
- int qsize;
-
- qsize = uma_zone_get_cur(V_tcp_reass_zone);
- return (sysctl_handle_int(oidp, &qsize, 0, req));
-}
-
int
tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
{
@@ -209,15 +181,14 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
*/
if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
- V_tcp_reass_overflows++;
- TCPSTAT_INC(tcps_rcvmemdrop);
- m_freem(m);
+ TCPSTAT_INC(tcps_rcvreassfull);
*tlenp = 0;
if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: queue limit reached, "
"segment dropped\n", s, __func__);
free(s, M_TCPLOG);
}
+ m_freem(m);
return (0);
}
@@ -228,7 +199,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
* Use a temporary structure on the stack for the missing segment
* when the zone is exhausted. Otherwise we may get stuck.
*/
- te = uma_zalloc(V_tcp_reass_zone, M_NOWAIT);
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
TCPSTAT_INC(tcps_rcvmemdrop);
@@ -279,7 +250,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
m_freem(m);
if (te != &tqs)
- uma_zfree(V_tcp_reass_zone, te);
+ uma_zfree(tcp_reass_zone, te);
tp->t_segqlen--;
/*
* Try to present any queued data
@@ -316,7 +287,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- uma_zfree(V_tcp_reass_zone, q);
+ uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
}
@@ -353,13 +324,12 @@ present:
if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
m_freem(q->tqe_m);
else
- sbappendstream_locked(&so->so_rcv, q->tqe_m);
+ sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
if (q != &tqs)
- uma_zfree(V_tcp_reass_zone, q);
+ uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
- ND6_HINT(tp);
sorwakeup_locked(so);
return (flags);
}
diff --git a/freebsd/sys/netinet/tcp_sack.c b/freebsd/sys/netinet/tcp_sack.c
index 9cc1d86a..c7e32cba 100644
--- a/freebsd/sys/netinet/tcp_sack.c
+++ b/freebsd/sys/netinet/tcp_sack.c
@@ -97,6 +97,7 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -130,24 +131,24 @@ VNET_DECLARE(struct uma_zone *, sack_hole_zone);
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
VNET_DEFINE(int, tcp_do_sack) = 1;
#define V_tcp_do_sack VNET(tcp_do_sack)
-SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_sack), 0, "Enable/Disable TCP SACK support");
VNET_DEFINE(int, tcp_sack_maxholes) = 128;
#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes)
-SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_sack_maxholes), 0,
"Maximum number of TCP SACK holes allowed per connection");
VNET_DEFINE(int, tcp_sack_globalmaxholes) = 65536;
#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes)
-SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_sack_globalmaxholes), 0,
"Global maximum number of TCP SACK holes");
VNET_DEFINE(int, tcp_sack_globalholes) = 0;
#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes)
-SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcp_sack_globalholes), 0,
"Global number of TCP SACK holes currently allocated");
@@ -346,17 +347,22 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
+ * Returns 1 if incoming ACK has previously unknown SACK information,
+ * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
+ * to that (i.e. left edge moving) would also be considered a change in SACK
+ * information which is slightly different than rfc6675.
*/
-void
+int
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
- int i, j, num_sack_blks;
+ int i, j, num_sack_blks, sack_changed;
INP_WLOCK_ASSERT(tp->t_inpcb);
num_sack_blks = 0;
+ sack_changed = 0;
/*
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
* treat [SND.UNA, SEG.ACK) as if it is a SACK block.
@@ -370,6 +376,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
* received new blocks from the other side.
*/
if (to->to_flags & TOF_SACK) {
+ tp->sackhint.sacked_bytes = 0; /* reset */
for (i = 0; i < to->to_nsacks; i++) {
bcopy((to->to_sacks + i * TCPOLEN_SACK),
&sack, sizeof(sack));
@@ -380,8 +387,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
SEQ_GT(sack.start, th_ack) &&
SEQ_LT(sack.start, tp->snd_max) &&
SEQ_GT(sack.end, tp->snd_una) &&
- SEQ_LEQ(sack.end, tp->snd_max))
+ SEQ_LEQ(sack.end, tp->snd_max)) {
sack_blocks[num_sack_blks++] = sack;
+ tp->sackhint.sacked_bytes +=
+ (sack.end-sack.start);
+ }
}
}
/*
@@ -389,12 +399,12 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
* received.
*/
if (num_sack_blks == 0)
- return;
+ return (sack_changed);
/*
* Sort the SACK blocks so we can update the scoreboard with just one
- * pass. The overhead of sorting upto 4+1 elements is less than
- * making upto 4+1 passes over the scoreboard.
+ * pass. The overhead of sorting up to 4+1 elements is less than
+ * making up to 4+1 passes over the scoreboard.
*/
for (i = 0; i < num_sack_blks; i++) {
for (j = i + 1; j < num_sack_blks; j++) {
@@ -440,6 +450,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
+ sack_changed = 1;
} else {
/*
* We failed to add a new hole based on the current
@@ -456,9 +467,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
SEQ_LT(tp->snd_fack, sblkp->end))
tp->snd_fack = sblkp->end;
}
- } else if (SEQ_LT(tp->snd_fack, sblkp->end))
+ } else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
/* fack is advanced. */
tp->snd_fack = sblkp->end;
+ sack_changed = 1;
+ }
/* We must have at least one SACK hole in scoreboard. */
KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
("SACK scoreboard must not be empty"));
@@ -487,6 +500,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
+ sack_changed = 1;
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -542,6 +556,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
else
sblkp--;
}
+ return (sack_changed);
}
/*
@@ -586,7 +601,7 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
}
#if 0
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
index b175c0c0..cff9bd7b 100644
--- a/freebsd/sys/netinet/tcp_subr.c
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -47,18 +47,21 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
+#include <sys/eventhandler.h>
#include <sys/hhook.h>
#include <sys/kernel.h>
#include <sys/khelp.h>
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
+#include <sys/refcount.h>
#include <sys/mbuf.h>
#ifdef INET6
#include <sys/domain.h>
#endif
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
@@ -68,10 +71,12 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/vnet.h>
-#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -79,22 +84,32 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_icmp.h>
#include <netinet/ip_var.h>
#ifdef INET6
+#include <netinet/icmp6.h>
#include <netinet/ip6.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
+#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
+#include <netinet/cc/cc.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
#include <netinet/tcpip.h>
+#ifdef TCPPCAP
+#include <netinet/tcp_pcap.h>
+#endif
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
@@ -125,6 +140,8 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
#endif
+struct rwlock tcp_function_lock;
+
static int
sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
{
@@ -141,8 +158,8 @@ sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
return (error);
}
-SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0,
&sysctl_net_inet_tcp_mss_check, "I",
"Default TCP Maximum Segment Size");
@@ -163,8 +180,8 @@ sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
return (error);
}
-SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
&sysctl_net_inet_tcp_mss_v6_check, "I",
"Default TCP Maximum Segment Size for IPv6");
#endif /* INET6 */
@@ -178,12 +195,12 @@ SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
* checking. This setting prevents us from sending too small packets.
*/
VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_minmss), 0,
"Minimum TCP Maximum Segment Size");
VNET_DEFINE(int, tcp_do_rfc1323) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc1323), 0,
"Enable rfc1323 (high performance TCP) extensions");
@@ -191,30 +208,30 @@ static int tcp_log_debug = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
&tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
-static int tcp_tcbhashsize = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN,
+static int tcp_tcbhashsize;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
static int do_tcpdrain = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
"Enable tcp_drain routine for extra help when low on mbufs");
-SYSCTL_VNET_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD,
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
static VNET_DEFINE(int, icmp_may_rst) = 1;
#define V_icmp_may_rst VNET(icmp_may_rst)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp_may_rst), 0,
"Certain ICMP unreachable messages may abort connections in SYN_SENT");
static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
#define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_isn_reseed_interval), 0,
"Seconds between reseeding of ISN secret");
-static int tcp_soreceive_stream = 0;
+static int tcp_soreceive_stream;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
&tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets");
@@ -231,9 +248,193 @@ VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
static struct inpcb *tcp_notify(struct inpcb *, int);
static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
+static void tcp_mtudisc(struct inpcb *, int);
static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
void *ip4hdr, const void *ip6hdr);
+
+static struct tcp_function_block tcp_def_funcblk = {
+ "default",
+ tcp_output,
+ tcp_do_segment,
+ tcp_default_ctloutput,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ 0
+};
+
+int t_functions_inited = 0;
+struct tcp_funchead t_functions;
+static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
+
+static void
+init_tcp_functions(void)
+{
+ if (t_functions_inited == 0) {
+ TAILQ_INIT(&t_functions);
+ rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
+ t_functions_inited = 1;
+ }
+}
+
+static struct tcp_function_block *
+find_tcp_functions_locked(struct tcp_function_set *fs)
+{
+ struct tcp_function *f;
+ struct tcp_function_block *blk=NULL;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) {
+ blk = f->tf_fb;
+ break;
+ }
+ }
+ return(blk);
+}
+
+static struct tcp_function_block *
+find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
+{
+ struct tcp_function_block *rblk=NULL;
+ struct tcp_function *f;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (f->tf_fb == blk) {
+ rblk = blk;
+ if (s) {
+ *s = f;
+ }
+ break;
+ }
+ }
+ return (rblk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_functions(struct tcp_function_set *fs)
+{
+ struct tcp_function_block *blk;
+
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(fs);
+ if (blk)
+ refcount_acquire(&blk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(blk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_fb(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *rblk;
+
+ rw_rlock(&tcp_function_lock);
+ rblk = find_tcp_fb_locked(blk, NULL);
+ if (rblk)
+ refcount_acquire(&rblk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(rblk);
+}
+
+
+static int
+sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
+{
+ int error=ENOENT;
+ struct tcp_function_set fs;
+ struct tcp_function_block *blk;
+
+ memset(&fs, 0, sizeof(fs));
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
+ if (blk) {
+ /* Found him */
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ fs.pcbcnt = blk->tfb_refcnt;
+ }
+ rw_runlock(&tcp_function_lock);
+ error = sysctl_handle_string(oidp, fs.function_set_name,
+ sizeof(fs.function_set_name), req);
+
+ /* Check for error or no change */
+ if (error != 0 || req->newptr == NULL)
+ return(error);
+
+ rw_wlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(&fs);
+ if ((blk == NULL) ||
+ (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) {
+ error = ENOENT;
+ goto done;
+ }
+ tcp_func_set_ptr = blk;
+done:
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
+ CTLTYPE_STRING | CTLFLAG_RW,
+ NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
+ "Set/get the default TCP functions");
+
+static int
+sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
+{
+ int error, cnt, linesz;
+ struct tcp_function *f;
+ char *buffer, *cp;
+ size_t bufsz, outsz;
+
+ cnt = 0;
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ cnt++;
+ }
+ rw_runlock(&tcp_function_lock);
+
+ bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1;
+ buffer = malloc(bufsz, M_TEMP, M_WAITOK);
+
+ error = 0;
+ cp = buffer;
+
+ linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count");
+ cp += linesz;
+ bufsz -= linesz;
+ outsz = linesz;
+
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ linesz = snprintf(cp, bufsz, "%-32s%c %u\n",
+ f->tf_fb->tfb_tcp_block_name,
+ (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
+ f->tf_fb->tfb_refcnt);
+ if (linesz >= bufsz) {
+ error = EOVERFLOW;
+ break;
+ }
+ cp += linesz;
+ bufsz -= linesz;
+ outsz += linesz;
+ }
+ rw_runlock(&tcp_function_lock);
+ if (error == 0)
+ error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
+ free(buffer, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
+ CTLTYPE_STRING|CTLFLAG_RD,
+ NULL, 0, sysctl_net_inet_list_available, "A",
+ "list available TCP Function sets");
+
/*
* Target size of TCP PCB hash tables. Must be a power of two.
*
@@ -241,7 +442,7 @@ static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
* variable net.inet.tcp.tcbhashsize
*/
#ifndef TCBHASHSIZE
-#define TCBHASHSIZE 512
+#define TCBHASHSIZE 0
#endif
/*
@@ -261,6 +462,8 @@ static VNET_DEFINE(uma_zone_t, tcpcb_zone);
#define V_tcpcb_zone VNET(tcpcb_zone)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
+MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
+
static struct mtx isn_mtx;
#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
@@ -288,48 +491,196 @@ tcp_inpcb_init(void *mem, int size, int flags)
return (0);
}
+/*
+ * Take a value and get the next power of 2 that doesn't overflow.
+ * Used to size the tcp_inpcb hash buckets.
+ */
+static int
+maketcp_hashsize(int size)
+{
+ int hashsize;
+
+ /*
+ * auto tune.
+ * get the next power of 2 higher than maxsockets.
+ */
+ hashsize = 1 << fls(size);
+ /* catch overflow, and just go one power of 2 smaller */
+ if (hashsize < size) {
+ hashsize = 1 << (fls(size) - 1);
+ }
+ return (hashsize);
+}
+
+int
+register_tcp_functions(struct tcp_function_block *blk, int wait)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *n;
+ struct tcp_function_set fs;
+
+ if (t_functions_inited == 0) {
+ init_tcp_functions();
+ }
+ if ((blk->tfb_tcp_output == NULL) ||
+ (blk->tfb_tcp_do_segment == NULL) ||
+ (blk->tfb_tcp_ctloutput == NULL) ||
+ (strlen(blk->tfb_tcp_block_name) == 0)) {
+ /*
+ * These functions are required and you
+ * need a name.
+ */
+ return (EINVAL);
+ }
+ if (blk->tfb_tcp_timer_stop_all ||
+ blk->tfb_tcp_timer_activate ||
+ blk->tfb_tcp_timer_active ||
+ blk->tfb_tcp_timer_stop) {
+ /*
+ * If you define one timer function you
+ * must have them all.
+ */
+ if ((blk->tfb_tcp_timer_stop_all == NULL) ||
+ (blk->tfb_tcp_timer_activate == NULL) ||
+ (blk->tfb_tcp_timer_active == NULL) ||
+ (blk->tfb_tcp_timer_stop == NULL)) {
+ return (EINVAL);
+ }
+ }
+ n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
+ if (n == NULL) {
+ return (ENOMEM);
+ }
+ n->tf_fb = blk;
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ rw_wlock(&tcp_function_lock);
+ lblk = find_tcp_functions_locked(&fs);
+ if (lblk) {
+ /* Duplicate name space not allowed */
+ rw_wunlock(&tcp_function_lock);
+ free(n, M_TCPFUNCTIONS);
+ return (EALREADY);
+ }
+ refcount_init(&blk->tfb_refcnt, 0);
+ blk->tfb_flags = 0;
+ TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
+ rw_wunlock(&tcp_function_lock);
+ return(0);
+}
+
+int
+deregister_tcp_functions(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *f;
+ int error=ENOENT;
+
+ if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
+ /* You can't un-register the default */
+ return (EPERM);
+ }
+ rw_wlock(&tcp_function_lock);
+ if (blk == tcp_func_set_ptr) {
+ /* You can't free the current default */
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ if (blk->tfb_refcnt) {
+ /* Still tcb attached, mark it. */
+ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ lblk = find_tcp_fb_locked(blk, &f);
+ if (lblk) {
+ /* Found */
+ TAILQ_REMOVE(&t_functions, f, tf_next);
+ f->tf_fb = NULL;
+ free(f, M_TCPFUNCTIONS);
+ error = 0;
+ }
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
void
tcp_init(void)
{
+ const char *tcbhash_tuneable;
int hashsize;
+ tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
+
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
&V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
-
hashsize = TCBHASHSIZE;
- TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
+ TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
+ if (hashsize == 0) {
+ /*
+ * Auto tune the hash size based on maxsockets.
+ * A perfect hash would have a 1:1 mapping
+ * (hashsize = maxsockets) however it's been
+ * suggested that O(2) average is better.
+ */
+ hashsize = maketcp_hashsize(maxsockets / 4);
+ /*
+ * Our historical default is 512,
+ * do not autotune lower than this.
+ */
+ if (hashsize < 512)
+ hashsize = 512;
+ if (bootverbose && IS_DEFAULT_VNET(curvnet))
+ printf("%s: %s auto tuned to %d\n", __func__,
+ tcbhash_tuneable, hashsize);
+ }
+ /*
+ * We require a hashsize to be a power of two.
+ * Previously if it was not a power of two we would just reset it
+ * back to 512, which could be a nasty surprise if you did not notice
+ * the error message.
+ * Instead what we do is clip it to the closest power of two lower
+ * than the specified hash value.
+ */
if (!powerof2(hashsize)) {
- printf("WARNING: TCB hash size not a power of 2\n");
- hashsize = 512; /* safe default */
+ int oldhashsize = hashsize;
+
+ hashsize = maketcp_hashsize(hashsize);
+ /* prevent absurdly low value */
+ if (hashsize < 16)
+ hashsize = 16;
+ printf("%s: WARNING: TCB hash size not a power of 2, "
+ "clipped from %d to %d.\n", __func__, oldhashsize,
+ hashsize);
}
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
- "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
- IPI_HASHFIELDS_4TUPLE);
+ "tcp_inpcb", tcp_inpcb_init, NULL, 0, IPI_HASHFIELDS_4TUPLE);
/*
* These have to be type stable for the benefit of the timers.
*/
V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
uma_zone_set_max(V_tcpcb_zone, maxsockets);
+ uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
tcp_tw_init();
syncache_init();
tcp_hc_init();
- tcp_reass_init();
TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
/* Skip initialization of globals for non-default instances. */
if (!IS_DEFAULT_VNET(curvnet))
return;
+ tcp_reass_global_init();
+
/* XXX virtualize those bellow? */
tcp_delacktime = TCPTV_DELACK;
tcp_keepinit = TCPTV_KEEP_INIT;
@@ -340,11 +691,15 @@ tcp_init(void)
tcp_rexmit_min = TCPTV_MIN;
if (tcp_rexmit_min < 1)
tcp_rexmit_min = 1;
+ tcp_persmin = TCPTV_PERSMIN;
+ tcp_persmax = TCPTV_PERSMAX;
tcp_rexmit_slop = TCPTV_CPU_VAR;
tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
tcp_tcbhashsize = hashsize;
+ /* Setup the tcp function block list */
+ init_tcp_functions();
+ register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
- TUNABLE_INT_FETCH("net.inet.tcp.soreceive_stream", &tcp_soreceive_stream);
if (tcp_soreceive_stream) {
#ifdef INET
tcp_usrreqs.pru_soreceive = soreceive_stream;
@@ -370,21 +725,64 @@ tcp_init(void)
SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
+#ifdef TCPPCAP
+ tcp_pcap_init();
+#endif
+
+#ifdef TCP_RFC7413
+ tcp_fastopen_init();
+#endif
}
#ifdef VIMAGE
-void
-tcp_destroy(void)
+static void
+tcp_destroy(void *unused __unused)
{
+ int error, n;
- tcp_reass_destroy();
+ /*
+ * All our processes are gone, all our sockets should be cleaned
+ * up, which means, we should be past the tcp_discardcb() calls.
+ * Sleep to let all tcpcb timers really disappear and cleanup.
+ */
+ for (;;) {
+ INP_LIST_RLOCK(&V_tcbinfo);
+ n = V_tcbinfo.ipi_count;
+ INP_LIST_RUNLOCK(&V_tcbinfo);
+ if (n == 0)
+ break;
+ pause("tcpdes", hz / 10);
+ }
tcp_hc_destroy();
syncache_destroy();
tcp_tw_destroy();
in_pcbinfo_destroy(&V_tcbinfo);
+ /* tcp_discardcb() clears the sack_holes up. */
uma_zdestroy(V_sack_hole_zone);
uma_zdestroy(V_tcpcb_zone);
+
+#ifdef TCP_RFC7413
+ /*
+ * Cannot free the zone until all tcpcbs are released as we attach
+ * the allocations to them.
+ */
+ tcp_fastopen_destroy();
+#endif
+
+ error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister helper hook "
+ "type=%d, id=%d: error %d returned\n", __func__,
+ HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error);
+ }
+ error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister helper hook "
+ "type=%d, id=%d: error %d returned\n", __func__,
+ HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error);
+ }
}
+VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL);
#endif
void
@@ -473,31 +871,33 @@ tcpip_maketemplate(struct inpcb *inp)
/*
* Send a single message to the TCP at address specified by
* the given TCP/IP header. If m == NULL, then we make a copy
- * of the tcpiphdr at ti and send directly to the addressed host.
+ * of the tcpiphdr at th and send directly to the addressed host.
* This is used to force keep alive messages out using the TCP
* template for a connection. If flags are given then we send
- * a message back to the TCP which originated the * segment ti,
+ * a message back to the TCP which originated the segment th,
* and discard the mbuf containing it and any other attached mbufs.
*
* In any case the ack and sequence number of the transmitted
* segment are as specified by the parameters.
*
- * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
+ * NOTE: If m != NULL, then th must point to *inside* the mbuf.
*/
void
tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
tcp_seq ack, tcp_seq seq, int flags)
{
- int tlen;
- int win = 0;
+ struct tcpopt to;
+ struct inpcb *inp;
struct ip *ip;
+ struct mbuf *optm;
struct tcphdr *nth;
+ u_char *optp;
#ifdef INET6
struct ip6_hdr *ip6;
int isipv6;
#endif /* INET6 */
- int ipflags = 0;
- struct inpcb *inp;
+ int optlen, tlen, win;
+ bool incl_opts;
KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
@@ -514,18 +914,21 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
} else
inp = NULL;
+ incl_opts = false;
+ win = 0;
if (tp != NULL) {
if (!(flags & TH_RST)) {
win = sbspace(&inp->inp_socket->so_rcv);
if (win > (long)TCP_MAXWIN << tp->rcv_scale)
win = (long)TCP_MAXWIN << tp->rcv_scale;
}
+ if ((tp->t_flags & TF_NOOPT) == 0)
+ incl_opts = true;
}
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- tlen = 0;
m->m_data += max_linkhdr;
#ifdef INET6
if (isipv6) {
@@ -535,35 +938,71 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
nth = (struct tcphdr *)(ip6 + 1);
} else
#endif /* INET6 */
- {
- bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
- ip = mtod(m, struct ip *);
- nth = (struct tcphdr *)(ip + 1);
- }
+ {
+ bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+ ip = mtod(m, struct ip *);
+ nth = (struct tcphdr *)(ip + 1);
+ }
bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
flags = TH_ACK;
+ } else if (!M_WRITABLE(m)) {
+ struct mbuf *n;
+
+ /* Can't reuse 'm', allocate a new mbuf. */
+ n = m_gethdr(M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ m_freem(m);
+ return;
+ }
+
+ if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
+ m_freem(m);
+ m_freem(n);
+ return;
+ }
+
+ n->m_data += max_linkhdr;
+ /* m_len is set later */
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+#ifdef INET6
+ if (isipv6) {
+ bcopy((caddr_t)ip6, mtod(n, caddr_t),
+ sizeof(struct ip6_hdr));
+ ip6 = mtod(n, struct ip6_hdr *);
+ xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
+ nth = (struct tcphdr *)(ip6 + 1);
+ } else
+#endif /* INET6 */
+ {
+ bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip));
+ ip = mtod(n, struct ip *);
+ xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
+ nth = (struct tcphdr *)(ip + 1);
+ }
+ bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
+ xchg(nth->th_dport, nth->th_sport, uint16_t);
+ th = nth;
+ m_freem(m);
+ m = n;
} else {
/*
* reuse the mbuf.
- * XXX MRT We inherrit the FIB, which is lucky.
+ * XXX MRT We inherit the FIB, which is lucky.
*/
m_freem(m->m_next);
m->m_next = NULL;
m->m_data = (caddr_t)ipgen;
- m_addr_changed(m);
/* m_len is set later */
- tlen = 0;
-#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
#ifdef INET6
if (isipv6) {
xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
nth = (struct tcphdr *)(ip6 + 1);
} else
#endif /* INET6 */
- {
- xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
- nth = (struct tcphdr *)(ip + 1);
- }
+ {
+ xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
+ nth = (struct tcphdr *)(ip + 1);
+ }
if (th != nth) {
/*
* this is usually a case when an extension header
@@ -576,13 +1015,65 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
xchg(nth->th_dport, nth->th_sport, uint16_t);
#undef xchg
}
+ tlen = 0;
+#ifdef INET6
+ if (isipv6)
+ tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+#endif
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
+ tlen = sizeof (struct tcpiphdr);
+#endif
+#ifdef INVARIANTS
+ m->m_len = 0;
+ KASSERT(M_TRAILINGSPACE(m) >= tlen,
+ ("Not enough trailing space for message (m=%p, need=%d, have=%ld)",
+ m, tlen, (long)M_TRAILINGSPACE(m)));
+#endif
+ m->m_len = tlen;
+ to.to_flags = 0;
+ if (incl_opts) {
+ /* Make sure we have room. */
+ if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
+ m->m_next = m_get(M_NOWAIT, MT_DATA);
+ if (m->m_next) {
+ optp = mtod(m->m_next, u_char *);
+ optm = m->m_next;
+ } else
+ incl_opts = false;
+ } else {
+ optp = (u_char *) (nth + 1);
+ optm = m;
+ }
+ }
+ if (incl_opts) {
+ /* Timestamps. */
+ if (tp->t_flags & TF_RCVD_TSTMP) {
+ to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
+ to.to_tsecr = tp->ts_recent;
+ to.to_flags |= TOF_TS;
+ }
+#ifdef TCP_SIGNATURE
+ /* TCP-MD5 (RFC2385). */
+ if (tp->t_flags & TF_SIGNATURE)
+ to.to_flags |= TOF_SIGNATURE;
+#endif
+
+ /* Add the options. */
+ tlen += optlen = tcp_addoptions(&to, optp);
+
+ /* Update m_len in the correct mbuf. */
+ optm->m_len += optlen;
+ } else
+ optlen = 0;
#ifdef INET6
if (isipv6) {
ip6->ip6_flow = 0;
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_TCP;
- ip6->ip6_plen = 0; /* Set in ip6_output(). */
- tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+ ip6->ip6_plen = htons(tlen - sizeof(*ip6));
}
#endif
#if defined(INET) && defined(INET6)
@@ -590,14 +1081,12 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#endif
#ifdef INET
{
- tlen += sizeof (struct tcpiphdr);
- ip->ip_len = tlen;
+ ip->ip_len = htons(tlen);
ip->ip_ttl = V_ip_defttl;
if (V_path_mtu_discovery)
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
}
#endif
- m->m_len = tlen;
m->m_pkthdr.len = tlen;
m->m_pkthdr.rcvif = NULL;
#ifdef MAC
@@ -619,7 +1108,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
nth->th_seq = htonl(seq);
nth->th_ack = htonl(ack);
nth->th_x2 = 0;
- nth->th_off = sizeof (struct tcphdr) >> 2;
+ nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
nth->th_flags = flags;
if (tp != NULL)
nth->th_win = htons((u_short) (win >> tp->rcv_scale));
@@ -627,6 +1116,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
nth->th_win = htons((u_short)win);
nth->th_urp = 0;
+#ifdef TCP_SIGNATURE
+ if (to.to_flags & TOF_SIGNATURE) {
+ tcp_signature_compute(m, 0, 0, optlen, to.to_signature,
+ IPSEC_DIR_OUTBOUND);
+ }
+#endif
+
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
@@ -651,15 +1147,21 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
#endif
+ TCP_PROBE3(debug__output, tp, th, mtod(m, const char *));
+ if (flags & TH_RST)
+ TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *),
+ tp, nth);
+
+ TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth);
#ifdef INET6
if (isipv6)
- (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
+ (void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
#endif /* INET6 */
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
- (void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
+ (void) ip_output(m, NULL, NULL, 0, NULL, inp);
#endif
}
@@ -687,7 +1189,10 @@ tcp_newtcpcb(struct inpcb *inp)
tp->ccv = &tm->ccv;
tp->ccv->type = IPPROTO_TCP;
tp->ccv->ccvc.tcp = tp;
-
+ rw_rlock(&tcp_function_lock);
+ tp->t_fb = tcp_func_set_ptr;
+ refcount_acquire(&tp->t_fb->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
/*
* Use the current system default CC algorithm.
*/
@@ -698,12 +1203,18 @@ tcp_newtcpcb(struct inpcb *inp)
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
tp->osd = &tm->osd;
if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
@@ -713,25 +1224,31 @@ tcp_newtcpcb(struct inpcb *inp)
#endif
tp->t_timers = &tm->tt;
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
- tp->t_maxseg = tp->t_maxopd =
+ tp->t_maxseg =
#ifdef INET6
isipv6 ? V_tcp_v6mssdflt :
#endif /* INET6 */
V_tcp_mssdflt;
/* Set up our timeouts. */
- callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE);
+ callout_init(&tp->t_timers->tt_rexmt, 1);
+ callout_init(&tp->t_timers->tt_persist, 1);
+ callout_init(&tp->t_timers->tt_keep, 1);
+ callout_init(&tp->t_timers->tt_2msl, 1);
+ callout_init(&tp->t_timers->tt_delack, 1);
if (V_tcp_do_rfc1323)
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
if (V_tcp_do_sack)
tp->t_flags |= TF_SACK_PERMIT;
TAILQ_INIT(&tp->snd_holes);
- tp->t_inpcb = inp; /* XXX */
+ /*
+ * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
+ * is called.
+ */
+ in_pcbref(inp); /* Reference for tcpcb */
+ tp->t_inpcb = inp;
+
/*
* Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
* rtt estimate. Set rttvar so that srtt + 4 * rttvar gives
@@ -751,6 +1268,15 @@ tcp_newtcpcb(struct inpcb *inp)
*/
inp->inp_ip_ttl = V_ip_defttl;
inp->inp_ppcb = tp;
+#ifdef TCPPCAP
+ /*
+ * Init the TCP PCAP queues.
+ */
+ tcp_pcap_tcpcb_init(tp);
+#endif
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
return (tp); /* XXX */
}
@@ -779,7 +1305,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
/*
* New connections already part way through being initialised
* with the CC algo we're removing will not race with this code
@@ -809,7 +1335,7 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
@@ -827,12 +1353,12 @@ tcp_drop(struct tcpcb *tp, int errno)
{
struct socket *so = tp->t_inpcb->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
if (TCPS_HAVERCVDSYN(tp->t_state)) {
- tp->t_state = TCPS_CLOSED;
- (void) tcp_output(tp);
+ tcp_state_change(tp, TCPS_CLOSED);
+ (void) tp->t_fb->tfb_tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -850,6 +1376,7 @@ tcp_discardcb(struct tcpcb *tp)
#ifdef INET6
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
+ int released;
INP_WLOCK_ASSERT(inp);
@@ -857,22 +1384,27 @@ tcp_discardcb(struct tcpcb *tp)
* Make sure that all of our timers are stopped before we delete the
* PCB.
*
- * XXXRW: Really, we would like to use callout_drain() here in order
- * to avoid races experienced in tcp_timer.c where a timer is already
- * executing at this point. However, we can't, both because we're
- * running in a context where we can't sleep, and also because we
- * hold locks required by the timers. What we instead need to do is
- * test to see if callout_drain() is required, and if so, defer some
- * portion of the remainder of tcp_discardcb() to an asynchronous
- * context that can callout_drain() and then continue. Some care
- * will be required to ensure that no further processing takes place
- * on the tcpcb, even though it hasn't been freed (a flag?).
+ * If stopping a timer fails, we schedule a discard function in same
+ * callout, and the last discard function called will take care of
+ * deleting the tcpcb.
*/
- callout_stop(&tp->t_timers->tt_rexmt);
- callout_stop(&tp->t_timers->tt_persist);
- callout_stop(&tp->t_timers->tt_keep);
- callout_stop(&tp->t_timers->tt_2msl);
- callout_stop(&tp->t_timers->tt_delack);
+ tp->t_timers->tt_draincnt = 0;
+ tcp_timer_stop(tp, TT_REXMT);
+ tcp_timer_stop(tp, TT_PERSIST);
+ tcp_timer_stop(tp, TT_KEEP);
+ tcp_timer_stop(tp, TT_2MSL);
+ tcp_timer_stop(tp, TT_DELACK);
+ if (tp->t_fb->tfb_tcp_timer_stop_all) {
+ /*
+ * Call the stop-all function of the methods,
+ * this function should call the tcp_timer_stop()
+ * method with each of the function specific timeouts.
+ * That stop will be called via the tfb_tcp_timer_stop()
+ * which should use the async drain function of the
+ * callout system (see tcp_var.h).
+ */
+ tp->t_fb->tfb_tcp_timer_stop_all(tp);
+ }
/*
* If we got enough samples through the srtt filter,
@@ -893,7 +1425,7 @@ tcp_discardcb(struct tcpcb *tp)
* Update the ssthresh always when the conditions below
* are satisfied. This gives us better new start value
* for the congestion avoidance for new connections.
- * ssthresh is only set if packet loss occured on a session.
+ * ssthresh is only set if packet loss occurred on a session.
*
* XXXRW: 'so' may be NULL here, and/or socket buffer may be
* being torn down. Ideally this code would not use 'so'.
@@ -909,14 +1441,14 @@ tcp_discardcb(struct tcpcb *tp)
ssthresh = 2;
ssthresh *= (u_long)(tp->t_maxseg +
#ifdef INET6
- (isipv6 ? sizeof (struct ip6_hdr) +
- sizeof (struct tcphdr) :
+ (isipv6 ? sizeof (struct ip6_hdr) +
+ sizeof (struct tcphdr) :
#endif
- sizeof (struct tcpiphdr)
+ sizeof (struct tcpiphdr)
#ifdef INET6
- )
+ )
#endif
- );
+ );
} else
ssthresh = 0;
metrics.rmx_ssthresh = ssthresh;
@@ -941,6 +1473,12 @@ tcp_discardcb(struct tcpcb *tp)
tcp_free_sackholes(tp);
+#ifdef TCPPCAP
+ /* Free the TCP PCAP queues. */
+ tcp_pcap_drain(&(tp->t_inpkts));
+ tcp_pcap_drain(&(tp->t_outpkts));
+#endif
+
/* Allow the CC algorithm to clean up after itself. */
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
@@ -949,8 +1487,51 @@ tcp_discardcb(struct tcpcb *tp)
CC_ALGO(tp) = NULL;
inp->inp_ppcb = NULL;
- tp->t_inpcb = NULL;
- uma_zfree(V_tcpcb_zone, tp);
+ if (tp->t_timers->tt_draincnt == 0) {
+ /* We own the last reference on tcpcb, let's free it. */
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_inpcb = NULL;
+ uma_zfree(V_tcpcb_zone, tp);
+ released = in_pcbrele_wlocked(inp);
+ KASSERT(!released, ("%s: inp %p should not have been released "
+ "here", __func__, inp));
+ }
+}
+
+void
+tcp_timer_discard(void *ptp)
+{
+ struct inpcb *inp;
+ struct tcpcb *tp;
+
+ tp = (struct tcpcb *)ptp;
+ CURVNET_SET(tp->t_vnet);
+ INP_INFO_RLOCK(&V_tcbinfo);
+ inp = tp->t_inpcb;
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
+ __func__, tp));
+ INP_WLOCK(inp);
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
+ ("%s: tcpcb has to be stopped here", __func__));
+ tp->t_timers->tt_draincnt--;
+ if (tp->t_timers->tt_draincnt == 0) {
+ /* We own the last reference on this tcpcb, let's free it. */
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_inpcb = NULL;
+ uma_zfree(V_tcpcb_zone, tp);
+ if (in_pcbrele_wlocked(inp)) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
+ return;
+ }
+ }
+ INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
}
/*
@@ -963,15 +1544,27 @@ tcp_close(struct tcpcb *tp)
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
#ifdef TCP_OFFLOAD
if (tp->t_state == TCPS_LISTEN)
tcp_offload_listen_stop(tp);
#endif
+#ifdef TCP_RFC7413
+ /*
+ * This releases the TFO pending counter resource for TFO listen
+ * sockets as well as passively-created TFO sockets that transition
+ * from SYN_RECEIVED to CLOSED.
+ */
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+ }
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
+ TCPSTATES_DEC(tp->t_state);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
so = inp->inp_socket;
soisdisconnected(so);
@@ -1009,9 +1602,9 @@ tcp_drain(void)
* XXX: The "Net/3" implementation doesn't imply that the TCP
* reassembly queue should be flushed, but in a situation
* where we're really low on mbufs, this is potentially
- * usefull.
+ * useful.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
if (inpb->inp_flags & INP_TIMEWAIT)
continue;
@@ -1019,10 +1612,17 @@ tcp_drain(void)
if ((tcpb = intotcpcb(inpb)) != NULL) {
tcp_reass_flush(tcpb);
tcp_clean_sackreport(tcpb);
+#ifdef TCPPCAP
+ if (tcp_pcap_aggressive_free) {
+ /* Free the TCP PCAP queues. */
+ tcp_pcap_drain(&(tcpb->t_inpkts));
+ tcp_pcap_drain(&(tcpb->t_outpkts));
+ }
+#endif
}
INP_WUNLOCK(inpb);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
@@ -1041,7 +1641,7 @@ tcp_notify(struct inpcb *inp, int error)
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1061,6 +1661,10 @@ tcp_notify(struct inpcb *inp, int error)
if (tp->t_state == TCPS_ESTABLISHED &&
(error == EHOSTUNREACH || error == ENETUNREACH ||
error == EHOSTDOWN)) {
+ if (inp->inp_route.ro_rt) {
+ RTFREE(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = (struct rtentry *)NULL;
+ }
return (inp);
} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
tp->t_softerror) {
@@ -1093,7 +1697,8 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
* resource-intensive to repeat twice on every request.
*/
if (req->oldptr == NULL) {
- n = V_tcbinfo.ipi_count + syncache_pcbcount();
+ n = V_tcbinfo.ipi_count +
+ counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
return (0);
@@ -1105,12 +1710,12 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
gencnt = V_tcbinfo.ipi_gencnt;
n = V_tcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
- m = syncache_pcbcount();
+ m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+ (n + m) * sizeof(struct xtcpcb));
@@ -1130,10 +1735,8 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return (ENOMEM);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
@@ -1158,7 +1761,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
n = i;
error = 0;
@@ -1196,14 +1799,14 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (!error) {
/*
@@ -1213,11 +1816,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
@@ -1354,16 +1957,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
notify = tcp_drop_syn_sent;
- /*
- * Redirects don't need to be handled up here.
- */
- else if (PRC_IS_REDIRECT(cmd))
- return;
- /*
- * Source quench is depreciated.
- */
- else if (cmd == PRC_QUENCH)
- return;
+
/*
* Hostdead is ugly because it goes linearly through all PCBs.
* XXX: We never get this from ICMP, otherwise it makes an
@@ -1373,75 +1967,79 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
ip = NULL;
else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
return;
- if (ip != NULL) {
- icp = (struct icmp *)((caddr_t)ip
- - offsetof(struct icmp, icmp_ip));
- th = (struct tcphdr *)((caddr_t)ip
- + (ip->ip_hl << 2));
- INP_INFO_WLOCK(&V_tcbinfo);
- inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
- ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
- if (inp != NULL) {
- if (!(inp->inp_flags & INP_TIMEWAIT) &&
- !(inp->inp_flags & INP_DROPPED) &&
- !(inp->inp_socket == NULL)) {
- icmp_tcp_seq = htonl(th->th_seq);
- tp = intotcpcb(inp);
- if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
- SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
- if (cmd == PRC_MSGSIZE) {
- /*
- * MTU discovery:
- * If we got a needfrag set the MTU
- * in the route to the suggested new
- * value (if given) and then notify.
- */
- bzero(&inc, sizeof(inc));
- inc.inc_faddr = faddr;
- inc.inc_fibnum =
- inp->inp_inc.inc_fibnum;
-
- mtu = ntohs(icp->icmp_nextmtu);
- /*
- * If no alternative MTU was
- * proposed, try the next smaller
- * one. ip->ip_len has already
- * been swapped in icmp_input().
- */
- if (!mtu)
- mtu = ip_next_mtu(ip->ip_len,
- 1);
- if (mtu < V_tcp_minmss
- + sizeof(struct tcpiphdr))
- mtu = V_tcp_minmss
- + sizeof(struct tcpiphdr);
- /*
- * Only cache the MTU if it
- * is smaller than the interface
- * or route MTU. tcp_mtudisc()
- * will do right thing by itself.
- */
- if (mtu <= tcp_maxmtu(&inc, NULL))
+
+ if (ip == NULL) {
+ in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
+ return;
+ }
+
+ icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
+ th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+ INP_INFO_RLOCK(&V_tcbinfo);
+ inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
+ th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
+ if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
+ /* signal EHOSTDOWN, as it flushes the cached route */
+ inp = (*notify)(inp, EHOSTDOWN);
+ if (inp != NULL)
+ INP_WUNLOCK(inp);
+ } else if (inp != NULL) {
+ if (!(inp->inp_flags & INP_TIMEWAIT) &&
+ !(inp->inp_flags & INP_DROPPED) &&
+ !(inp->inp_socket == NULL)) {
+ icmp_tcp_seq = ntohl(th->th_seq);
+ tp = intotcpcb(inp);
+ if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
+ SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
+ if (cmd == PRC_MSGSIZE) {
+ /*
+ * MTU discovery:
+ * If we got a needfrag set the MTU
+ * in the route to the suggested new
+ * value (if given) and then notify.
+ */
+ mtu = ntohs(icp->icmp_nextmtu);
+ /*
+ * If no alternative MTU was
+ * proposed, try the next smaller
+ * one.
+ */
+ if (!mtu)
+ mtu = ip_next_mtu(
+ ntohs(ip->ip_len), 1);
+ if (mtu < V_tcp_minmss +
+ sizeof(struct tcpiphdr))
+ mtu = V_tcp_minmss +
+ sizeof(struct tcpiphdr);
+ /*
+ * Only process the offered MTU if it
+ * is smaller than the current one.
+ */
+ if (mtu < tp->t_maxseg +
+ sizeof(struct tcpiphdr)) {
+ bzero(&inc, sizeof(inc));
+ inc.inc_faddr = faddr;
+ inc.inc_fibnum =
+ inp->inp_inc.inc_fibnum;
tcp_hc_updatemtu(&inc, mtu);
- tcp_mtudisc(inp, mtu);
- } else
- inp = (*notify)(inp,
- inetctlerrmap[cmd]);
- }
+ tcp_mtudisc(inp, mtu);
+ }
+ } else
+ inp = (*notify)(inp,
+ inetctlerrmap[cmd]);
}
- if (inp != NULL)
- INP_WUNLOCK(inp);
- } else {
- bzero(&inc, sizeof(inc));
- inc.inc_fport = th->th_dport;
- inc.inc_lport = th->th_sport;
- inc.inc_faddr = faddr;
- inc.inc_laddr = ip->ip_src;
- syncache_unreach(&inc, th);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
- } else
- in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
+ if (inp != NULL)
+ INP_WUNLOCK(inp);
+ } else {
+ bzero(&inc, sizeof(inc));
+ inc.inc_fport = th->th_dport;
+ inc.inc_lport = th->th_sport;
+ inc.inc_faddr = faddr;
+ inc.inc_laddr = ip->ip_src;
+ syncache_unreach(&inc, th);
+ }
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
#endif /* INET */
@@ -1449,75 +2047,146 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
void
tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
{
- struct tcphdr th;
+ struct in6_addr *dst;
+ struct tcphdr *th;
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct ip6_hdr *ip6;
struct mbuf *m;
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ struct icmp6_hdr *icmp6;
struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL;
- int off;
- struct tcp_portonly {
- u_int16_t th_sport;
- u_int16_t th_dport;
- } *thp;
+ struct in_conninfo inc;
+ tcp_seq icmp_tcp_seq;
+ unsigned int mtu;
+ unsigned int off;
+
if (sa->sa_family != AF_INET6 ||
sa->sa_len != sizeof(struct sockaddr_in6))
return;
- if (cmd == PRC_MSGSIZE)
- notify = tcp_mtudisc_notify;
- else if (!PRC_IS_REDIRECT(cmd) &&
- ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
- return;
- /* Source quench is depreciated. */
- else if (cmd == PRC_QUENCH)
- return;
-
/* if the parameter is from icmp6, decode it. */
if (d != NULL) {
ip6cp = (struct ip6ctlparam *)d;
+ icmp6 = ip6cp->ip6c_icmp6;
m = ip6cp->ip6c_m;
ip6 = ip6cp->ip6c_ip6;
off = ip6cp->ip6c_off;
sa6_src = ip6cp->ip6c_src;
+ dst = ip6cp->ip6c_finaldst;
} else {
m = NULL;
ip6 = NULL;
off = 0; /* fool gcc */
sa6_src = &sa6_any;
+ dst = NULL;
}
- if (ip6 != NULL) {
- struct in_conninfo inc;
- /*
- * XXX: We assume that when IPV6 is non NULL,
- * M and OFF are valid.
- */
+ if (cmd == PRC_MSGSIZE)
+ notify = tcp_mtudisc_notify;
+ else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
+ cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) &&
+ ip6 != NULL)
+ notify = tcp_drop_syn_sent;
- /* check if we can safely examine src and dst ports */
- if (m->m_pkthdr.len < off + sizeof(*thp))
- return;
+ /*
+ * Hostdead is ugly because it goes linearly through all PCBs.
+ * XXX: We never get this from ICMP, otherwise it makes an
+ * excellent DoS attack on machines with many connections.
+ */
+ else if (cmd == PRC_HOSTDEAD)
+ ip6 = NULL;
+ else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)
+ return;
- bzero(&th, sizeof(th));
- m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
+ if (ip6 == NULL) {
+ in6_pcbnotify(&V_tcbinfo, sa, 0,
+ (const struct sockaddr *)sa6_src,
+ 0, cmd, NULL, notify);
+ return;
+ }
- in6_pcbnotify(&V_tcbinfo, sa, th.th_dport,
- (struct sockaddr *)ip6cp->ip6c_src,
- th.th_sport, cmd, NULL, notify);
+ /* Check if we can safely get the ports from the tcp hdr */
+ if (m == NULL ||
+ (m->m_pkthdr.len <
+ (int32_t) (off + offsetof(struct tcphdr, th_seq)))) {
+ return;
+ }
+ th = (struct tcphdr *) mtodo(ip6cp->ip6c_m, ip6cp->ip6c_off);
+ INP_INFO_RLOCK(&V_tcbinfo);
+ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, th->th_dport,
+ &ip6->ip6_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
+ if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
+ /* signal EHOSTDOWN, as it flushes the cached route */
+ inp = (*notify)(inp, EHOSTDOWN);
+ if (inp != NULL)
+ INP_WUNLOCK(inp);
+ } else if (inp != NULL) {
+ if (!(inp->inp_flags & INP_TIMEWAIT) &&
+ !(inp->inp_flags & INP_DROPPED) &&
+ !(inp->inp_socket == NULL)) {
+ icmp_tcp_seq = ntohl(th->th_seq);
+ tp = intotcpcb(inp);
+ if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
+ SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
+ if (cmd == PRC_MSGSIZE) {
+ /*
+ * MTU discovery:
+ * If we got a needfrag set the MTU
+ * in the route to the suggested new
+ * value (if given) and then notify.
+ */
+ mtu = ntohl(icmp6->icmp6_mtu);
+ /*
+ * If no alternative MTU was
+ * proposed, or the proposed
+ * MTU was too small, set to
+ * the min.
+ */
+ if (mtu < IPV6_MMTU)
+ mtu = IPV6_MMTU - 8;
+
+
+ bzero(&inc, sizeof(inc));
+ inc.inc_fibnum = M_GETFIB(m);
+ inc.inc_flags |= INC_ISIPV6;
+ inc.inc6_faddr = *dst;
+ if (in6_setscope(&inc.inc6_faddr,
+ m->m_pkthdr.rcvif, NULL))
+ goto unlock_inp;
+
+ /*
+ * Only process the offered MTU if it
+ * is smaller than the current one.
+ */
+ if (mtu < tp->t_maxseg +
+ (sizeof (*th) + sizeof (*ip6))) {
+ tcp_hc_updatemtu(&inc, mtu);
+ tcp_mtudisc(inp, mtu);
+ ICMP6STAT_INC(icp6s_pmtuchg);
+ }
+ } else
+ inp = (*notify)(inp,
+ inet6ctlerrmap[cmd]);
+ }
+ }
+unlock_inp:
+ if (inp != NULL)
+ INP_WUNLOCK(inp);
+ } else {
bzero(&inc, sizeof(inc));
- inc.inc_fport = th.th_dport;
- inc.inc_lport = th.th_sport;
- inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
- inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
+ inc.inc_fibnum = M_GETFIB(m);
inc.inc_flags |= INC_ISIPV6;
- INP_INFO_WLOCK(&V_tcbinfo);
- syncache_unreach(&inc, &th);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- } else
- in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
- 0, cmd, NULL, notify);
+ inc.inc_fport = th->th_dport;
+ inc.inc_lport = th->th_sport;
+ inc.inc6_faddr = *dst;
+ inc.inc6_laddr = ip6->ip6_src;
+ syncache_unreach(&inc, th);
+ }
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
#endif /* INET6 */
@@ -1647,7 +2316,7 @@ tcp_drop_syn_sent(struct inpcb *inp, int errno)
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1675,10 +2344,11 @@ static struct inpcb *
tcp_mtudisc_notify(struct inpcb *inp, int error)
{
- return (tcp_mtudisc(inp, -1));
+ tcp_mtudisc(inp, -1);
+ return (inp);
}
-struct inpcb *
+static void
tcp_mtudisc(struct inpcb *inp, int mtuoffer)
{
struct tcpcb *tp;
@@ -1687,7 +2357,7 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
(inp->inp_flags & INP_DROPPED))
- return (inp);
+ return;
tp = intotcpcb(inp);
KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
@@ -1708,8 +2378,7 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output(tp);
- return (inp);
+ tp->t_fb->tfb_tcp_output(tp);
}
#ifdef INET
@@ -1722,27 +2391,20 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
u_long
tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
{
- struct route sro;
- struct sockaddr_in *dst;
+ struct nhop4_extended nh4;
struct ifnet *ifp;
u_long maxmtu = 0;
KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
- bzero(&sro, sizeof(sro));
if (inc->inc_faddr.s_addr != INADDR_ANY) {
- dst = (struct sockaddr_in *)&sro.ro_dst;
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(*dst);
- dst->sin_addr = inc->inc_faddr;
- in_rtalloc_ign(&sro, 0, inc->inc_fibnum);
- }
- if (sro.ro_rt != NULL) {
- ifp = sro.ro_rt->rt_ifp;
- if (sro.ro_rt->rt_rmx.rmx_mtu == 0)
- maxmtu = ifp->if_mtu;
- else
- maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+
+ if (fib4_lookup_nh_ext(inc->inc_fibnum, inc->inc_faddr,
+ NHR_REF, 0, &nh4) != 0)
+ return (0);
+
+ ifp = nh4.nh_ifp;
+ maxmtu = nh4.nh_mtu;
/* Report additional interface capabilities. */
if (cap != NULL) {
@@ -1754,7 +2416,7 @@ tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
}
}
- RTFREE(sro.ro_rt);
+ fib4_free_nh_ext(inc->inc_fibnum, &nh4);
}
return (maxmtu);
}
@@ -1764,26 +2426,22 @@ tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
u_long
tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
{
- struct route_in6 sro6;
+ struct nhop6_extended nh6;
+ struct in6_addr dst6;
+ uint32_t scopeid;
struct ifnet *ifp;
u_long maxmtu = 0;
KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
- bzero(&sro6, sizeof(sro6));
if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
- sro6.ro_dst.sin6_family = AF_INET6;
- sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
- sro6.ro_dst.sin6_addr = inc->inc6_faddr;
- in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum);
- }
- if (sro6.ro_rt != NULL) {
- ifp = sro6.ro_rt->rt_ifp;
- if (sro6.ro_rt->rt_rmx.rmx_mtu == 0)
- maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
- else
- maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
- IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+ in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid);
+ if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0,
+ 0, &nh6) != 0)
+ return (0);
+
+ ifp = nh6.nh_ifp;
+ maxmtu = nh6.nh_mtu;
/* Report additional interface capabilities. */
if (cap != NULL) {
@@ -1795,13 +2453,66 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
}
}
- RTFREE(sro6.ro_rt);
+ fib6_free_nh_ext(inc->inc_fibnum, &nh6);
}
return (maxmtu);
}
#endif /* INET6 */
+/*
+ * Calculate effective SMSS per RFC5681 definition for a given TCP
+ * connection at its current state, taking into account SACK and etc.
+ */
+u_int
+tcp_maxseg(const struct tcpcb *tp)
+{
+ u_int optlen;
+
+ if (tp->t_flags & TF_NOOPT)
+ return (tp->t_maxseg);
+
+ /*
+ * Here we have a simplified code from tcp_addoptions(),
+ * without a proper loop, and having most of paddings hardcoded.
+ * We might make mistakes with padding here in some edge cases,
+ * but this is harmless, since result of tcp_maxseg() is used
+ * only in cwnd and ssthresh estimations.
+ */
+#define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4)
+ if (TCPS_HAVEESTABLISHED(tp->t_state)) {
+ if (tp->t_flags & TF_RCVD_TSTMP)
+ optlen = TCPOLEN_TSTAMP_APPA;
+ else
+ optlen = 0;
+#ifdef TCP_SIGNATURE
+ if (tp->t_flags & TF_SIGNATURE)
+ optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
+ optlen += TCPOLEN_SACKHDR;
+ optlen += tp->rcv_numsacks * TCPOLEN_SACK;
+ optlen = PAD(optlen);
+ }
+ } else {
+ if (tp->t_flags & TF_REQ_TSTMP)
+ optlen = TCPOLEN_TSTAMP_APPA;
+ else
+ optlen = PAD(TCPOLEN_MAXSEG);
+ if (tp->t_flags & TF_REQ_SCALE)
+ optlen += PAD(TCPOLEN_WINDOW);
+#ifdef TCP_SIGNATURE
+ if (tp->t_flags & TF_SIGNATURE)
+ optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+ if (tp->t_flags & TF_SACK_PERMIT)
+ optlen += PAD(TCPOLEN_SACK_PERMITTED);
+ }
+#undef PAD
+ optlen = min(optlen, TCP_MAXOLEN);
+ return (tp->t_maxseg - optlen);
+}
+
#ifdef IPSEC
/* compute ESP/AH header size for TCP, including outer IP header. */
size_t
@@ -1816,9 +2527,10 @@ ipsec_hdrsiz_tcp(struct tcpcb *tp)
#endif
struct tcphdr *th;
- if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
+ if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) ||
+ (!key_havesp(IPSEC_DIR_OUTBOUND)))
return (0);
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (!m)
return (0);
@@ -1859,55 +2571,20 @@ tcp_signature_apply(void *fstate, void *data, u_int len)
}
/*
- * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
- *
- * Parameters:
- * m pointer to head of mbuf chain
- * _unused
- * len length of TCP segment data, excluding options
- * optlen length of TCP segment options
- * buf pointer to storage for computed MD5 digest
- * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND)
- *
- * We do this over ip, tcphdr, segment data, and the key in the SADB.
- * When called from tcp_input(), we can be sure that th_sum has been
- * zeroed out and verified already.
- *
- * Return 0 if successful, otherwise return -1.
- *
* XXX The key is retrieved from the system's PF_KEY SADB, by keying a
* search with the destination IP address, and a 'magic SPI' to be
* determined by the application. This is hardcoded elsewhere to 1179
- * right now. Another branch of this code exists which uses the SPD to
- * specify per-application flows but it is unstable.
- */
-int
-tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
- u_char *buf, u_int direction)
+*/
+struct secasvar *
+tcp_get_sav(struct mbuf *m, u_int direction)
{
union sockaddr_union dst;
-#ifdef INET
- struct ippseudo ippseudo;
-#endif
- MD5_CTX ctx;
- int doff;
- struct ip *ip;
-#ifdef INET
- struct ipovly *ipovly;
-#endif
struct secasvar *sav;
- struct tcphdr *th;
+ struct ip *ip;
#ifdef INET6
struct ip6_hdr *ip6;
- struct in6_addr in6;
char ip6buf[INET6_ADDRSTRLEN];
- uint32_t plen;
- uint16_t nhdr;
#endif
- u_short savecsum;
-
- KASSERT(m != NULL, ("NULL mbuf chain"));
- KASSERT(buf != NULL, ("NULL signature pointer"));
/* Extract the destination from the IP header in the mbuf. */
bzero(&dst, sizeof(union sockaddr_union));
@@ -1934,7 +2611,7 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
break;
#endif
default:
- return (EINVAL);
+ return (NULL);
/* NOTREACHED */
break;
}
@@ -1949,9 +2626,61 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) :
#endif
"(unsupported)"));
- return (EINVAL);
}
+ return (sav);
+}
+
+/*
+ * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
+ *
+ * Parameters:
+ * m pointer to head of mbuf chain
+ * len length of TCP segment data, excluding options
+ * optlen length of TCP segment options
+ * buf pointer to storage for computed MD5 digest
+ * sav pointer to security assosiation
+ *
+ * We do this over ip, tcphdr, segment data, and the key in the SADB.
+ * When called from tcp_input(), we can be sure that th_sum has been
+ * zeroed out and verified already.
+ *
+ * Releases reference to SADB key before return.
+ *
+ * Return 0 if successful, otherwise return -1.
+ *
+ */
+int
+tcp_signature_do_compute(struct mbuf *m, int len, int optlen,
+ u_char *buf, struct secasvar *sav)
+{
+#ifdef INET
+ struct ippseudo ippseudo;
+#endif
+ MD5_CTX ctx;
+ int doff;
+ struct ip *ip;
+#ifdef INET
+ struct ipovly *ipovly;
+#endif
+ struct tcphdr *th;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+ struct in6_addr in6;
+ uint32_t plen;
+ uint16_t nhdr;
+#endif
+ u_short savecsum;
+
+ KASSERT(m != NULL, ("NULL mbuf chain"));
+ KASSERT(buf != NULL, ("NULL signature pointer"));
+
+ /* Extract the destination from the IP header in the mbuf. */
+ ip = mtod(m, struct ip *);
+#ifdef INET6
+ ip6 = NULL; /* Make the compiler happy. */
+#endif
+
MD5Init(&ctx);
/*
* Step 1: Update MD5 hash with IP(v6) pseudo-header.
@@ -2008,7 +2737,8 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
break;
#endif
default:
- return (EINVAL);
+ KEY_FREESAV(&sav);
+ return (-1);
/* NOTREACHED */
break;
}
@@ -2042,6 +2772,23 @@ tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
}
/*
+ * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
+ *
+ * Return 0 if successful, otherwise return -1.
+ */
+int
+tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
+ u_char *buf, u_int direction)
+{
+ struct secasvar *sav;
+
+ if ((sav = tcp_get_sav(m, direction)) == NULL)
+ return (-1);
+
+ return (tcp_signature_do_compute(m, len, optlen, buf, sav));
+}
+
+/*
* Verify the TCP-MD5 hash of a TCP segment. (RFC2385)
*
* Parameters:
@@ -2170,7 +2917,7 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
default:
return (EINVAL);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -2209,12 +2956,12 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
-SYSCTL_VNET_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
- CTLTYPE_STRUCT|CTLFLAG_WR|CTLFLAG_SKIP, NULL,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
+ CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
0, sysctl_drop, "", "Drop TCP connection");
/*
@@ -2332,3 +3079,21 @@ tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
panic("%s: string too long", __func__);
return (s);
}
+
+/*
+ * A subroutine which makes it easy to track TCP state changes with DTrace.
+ * This function shouldn't be called for t_state initializations that don't
+ * correspond to actual TCP state transitions.
+ */
+void
+tcp_state_change(struct tcpcb *tp, int newstate)
+{
+#if defined(KDTRACE_HOOKS)
+ int pstate = tp->t_state;
+#endif
+
+ TCPSTATES_DEC(tp->t_state);
+ TCPSTATES_INC(newstate);
+ tp->t_state = newstate;
+ TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
+}
diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c
index 10bd00ae..d7da3a01 100644
--- a/freebsd/sys/netinet/tcp_syncache.c
+++ b/freebsd/sys/netinet/tcp_syncache.c
@@ -2,13 +2,13 @@
/*-
* Copyright (c) 2001 McAfee, Inc.
- * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
+ * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jonathan Lemon
* and McAfee Research, the Security Research Division of McAfee, Inc. under
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
- * DARPA CHATS research program.
+ * DARPA CHATS research program. [2001 McAfee, Inc.]
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -42,6 +42,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/hash.h>
+#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/limits.h>
@@ -49,7 +51,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
-#include <sys/md5.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/random.h>
#include <sys/socket.h>
@@ -57,9 +58,13 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <sys/ucred.h>
+#include <sys/md5.h>
+#include <crypto/siphash/siphash.h>
+
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -78,6 +83,9 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -104,13 +112,13 @@ __FBSDID("$FreeBSD$");
static VNET_DEFINE(int, tcp_syncookies) = 1;
#define V_tcp_syncookies VNET(tcp_syncookies)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookies), 0,
"Use TCP SYN cookies if the syncache overflows");
static VNET_DEFINE(int, tcp_syncookiesonly) = 0;
#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
@@ -121,20 +129,27 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
static void syncache_drop(struct syncache *, struct syncache_head *);
static void syncache_free(struct syncache *);
static void syncache_insert(struct syncache *, struct syncache_head *);
-struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
-static int syncache_respond(struct syncache *);
+static int syncache_respond(struct syncache *, struct syncache_head *, int,
+ const struct mbuf *);
static struct socket *syncache_socket(struct syncache *, struct socket *,
struct mbuf *m);
-static int syncache_sysctl_count(SYSCTL_HANDLER_ARGS);
static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
int docallout);
static void syncache_timer(void *);
-static void syncookie_generate(struct syncache_head *, struct syncache *,
- u_int32_t *);
+
+static uint32_t syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t,
+ uint8_t *, uintptr_t);
+static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *);
static struct syncache
*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
- struct syncache *, struct tcpopt *, struct tcphdr *,
+ struct syncache *, struct tcphdr *, struct tcpopt *,
struct socket *);
+static void syncookie_reseed(void *);
+#ifdef INVARIANTS
+static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
+ struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+ struct socket *lso);
+#endif
/*
* Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
@@ -153,54 +168,32 @@ static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0,
"TCP SYN cache");
-SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
+SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(tcp_syncache.bucket_limit), 0,
"Per-bucket hash limit for syncache");
-SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
+SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(tcp_syncache.cache_limit), 0,
"Overall entry limit for syncache");
-SYSCTL_VNET_PROC(_net_inet_tcp_syncache, OID_AUTO, count, (CTLTYPE_UINT|CTLFLAG_RD),
- NULL, 0, &syncache_sysctl_count, "IU",
- "Current number of entries in syncache");
+SYSCTL_UMA_CUR(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_VNET,
+ &VNET_NAME(tcp_syncache.zone), "Current number of entries in syncache");
-SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
+SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(tcp_syncache.hashsize), 0,
"Size of TCP syncache hashtable");
-SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
+SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_syncache.rexmt_limit), 0,
"Limit on SYN/ACK retransmissions");
VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail,
- CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0,
+SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0,
"Send reset on socket allocation failure");
static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
-#define SYNCACHE_HASH(inc, mask) \
- ((V_tcp_syncache.hash_secret ^ \
- (inc)->inc_faddr.s_addr ^ \
- ((inc)->inc_faddr.s_addr >> 16) ^ \
- (inc)->inc_fport ^ (inc)->inc_lport) & mask)
-
-#define SYNCACHE_HASH6(inc, mask) \
- ((V_tcp_syncache.hash_secret ^ \
- (inc)->inc6_faddr.s6_addr32[0] ^ \
- (inc)->inc6_faddr.s6_addr32[3] ^ \
- (inc)->inc_fport ^ (inc)->inc_lport) & mask)
-
-#define ENDPTS_EQ(a, b) ( \
- (a)->ie_fport == (b)->ie_fport && \
- (a)->ie_lport == (b)->ie_lport && \
- (a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr && \
- (a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr \
-)
-
-#define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
-
#define SCH_LOCK(sch) mtx_lock(&(sch)->sch_mtx)
#define SCH_UNLOCK(sch) mtx_unlock(&(sch)->sch_mtx)
#define SCH_LOCK_ASSERT(sch) mtx_assert(&(sch)->sch_mtx, MA_OWNED)
@@ -254,17 +247,19 @@ syncache_init(void)
V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize *
sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO);
- /* Initialize the hash buckets. */
- for (i = 0; i < V_tcp_syncache.hashsize; i++) {
#ifdef VIMAGE
- V_tcp_syncache.hashbase[i].sch_vnet = curvnet;
+ V_tcp_syncache.vnet = curvnet;
#endif
+
+ /* Initialize the hash buckets. */
+ for (i = 0; i < V_tcp_syncache.hashsize; i++) {
TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
NULL, MTX_DEF);
callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
&V_tcp_syncache.hashbase[i].sch_mtx, 0);
V_tcp_syncache.hashbase[i].sch_length = 0;
+ V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache;
}
/* Create the syncache entry zone. */
@@ -272,6 +267,13 @@ syncache_init(void)
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone,
V_tcp_syncache.cache_limit);
+
+ /* Start the SYN cookie reseeder callout. */
+ callout_init(&V_tcp_syncache.secret.reseed, 1);
+ arc4rand(V_tcp_syncache.secret.key[0], SYNCOOKIE_SECRET_SIZE, 0);
+ arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0);
+ callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz,
+ syncookie_reseed, &V_tcp_syncache);
}
#ifdef VIMAGE
@@ -282,6 +284,12 @@ syncache_destroy(void)
struct syncache *sc, *nsc;
int i;
+ /*
+ * Stop the re-seed timer before freeing resources. No need to
+ * possibly schedule it another time.
+ */
+ callout_drain(&V_tcp_syncache.secret.reseed);
+
/* Cleanup hash buckets: stop timers, free entries, destroy locks. */
for (i = 0; i < V_tcp_syncache.hashsize; i++) {
@@ -308,15 +316,6 @@ syncache_destroy(void)
}
#endif
-static int
-syncache_sysctl_count(SYSCTL_HANDLER_ARGS)
-{
- int count;
-
- count = uma_zone_get_cur(V_tcp_syncache.zone);
- return (sysctl_handle_int(oidp, &count, 0, req));
-}
-
/*
* Inserts a syncache entry into the specified bucket row.
* Locks and unlocks the syncache_head autonomously.
@@ -359,6 +358,7 @@ syncache_insert(struct syncache *sc, struct syncache_head *sch)
SCH_UNLOCK(sch);
+ TCPSTATES_INC(TCPS_SYN_RECEIVED);
TCPSTAT_INC(tcps_sc_added);
}
@@ -372,6 +372,7 @@ syncache_drop(struct syncache *sc, struct syncache_head *sch)
SCH_LOCK_ASSERT(sch);
+ TCPSTATES_DEC(TCPS_SYN_RECEIVED);
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
@@ -393,7 +394,7 @@ static void
syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
{
sc->sc_rxttime = ticks +
- TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
+ TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]);
sc->sc_rxmits++;
if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
sch->sch_nextc = sc->sc_rxttime;
@@ -416,7 +417,7 @@ syncache_timer(void *xsch)
int tick = ticks;
char *s;
- CURVNET_SET(sch->sch_vnet);
+ CURVNET_SET(sch->sch_sc->vnet);
/* NB: syncache_head has already been locked by the callout. */
SCH_LOCK_ASSERT(sch);
@@ -459,7 +460,7 @@ syncache_timer(void *xsch)
free(s, M_TCPLOG);
}
- (void) syncache_respond(sc);
+ syncache_respond(sc, sch, 1, NULL);
TCPSTAT_INC(tcps_sc_retransmitted);
syncache_timeout(sc, sch, 0);
}
@@ -473,46 +474,34 @@ syncache_timer(void *xsch)
* Find an entry in the syncache.
* Returns always with locked syncache_head plus a matching entry or NULL.
*/
-struct syncache *
+static struct syncache *
syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
{
struct syncache *sc;
struct syncache_head *sch;
+ uint32_t hash;
-#ifdef INET6
- if (inc->inc_flags & INC_ISIPV6) {
- sch = &V_tcp_syncache.hashbase[
- SYNCACHE_HASH6(inc, V_tcp_syncache.hashmask)];
- *schp = sch;
-
- SCH_LOCK(sch);
+ /*
+ * The hash is built on foreign port + local port + foreign address.
+ * We rely on the fact that struct in_conninfo starts with 16 bits
+ * of foreign port, then 16 bits of local port then followed by 128
+ * bits of foreign address. In case of IPv4 address, the first 3
+ * 32-bit words of the address always are zeroes.
+ */
+ hash = jenkins_hash32((uint32_t *)&inc->inc_ie, 5,
+ V_tcp_syncache.hash_secret) & V_tcp_syncache.hashmask;
- /* Circle through bucket row to find matching entry. */
- TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
- if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
- return (sc);
- }
- } else
-#endif
- {
- sch = &V_tcp_syncache.hashbase[
- SYNCACHE_HASH(inc, V_tcp_syncache.hashmask)];
- *schp = sch;
+ sch = &V_tcp_syncache.hashbase[hash];
+ *schp = sch;
+ SCH_LOCK(sch);
- SCH_LOCK(sch);
+ /* Circle through bucket row to find matching entry. */
+ TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash)
+ if (bcmp(&inc->inc_ie, &sc->sc_inc.inc_ie,
+ sizeof(struct in_endpoints)) == 0)
+ break;
- /* Circle through bucket row to find matching entry. */
- TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
-#ifdef INET6
- if (sc->sc_inc.inc_flags & INC_ISIPV6)
- continue;
-#endif
- if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
- return (sc);
- }
- }
- SCH_LOCK_ASSERT(*schp);
- return (NULL); /* always returns with locked sch */
+ return (sc); /* Always returns with locked sch. */
}
/*
@@ -644,17 +633,20 @@ done:
/*
* Build a new TCP socket structure from a syncache entry.
+ *
+ * On success return the newly created socket with its underlying inp locked.
*/
static struct socket *
syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
{
+ struct tcp_function_block *blk;
struct inpcb *inp = NULL;
struct socket *so;
struct tcpcb *tp;
int error;
char *s;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Ok, create the full blown connection, and set things up
@@ -662,7 +654,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
* connection when the SYN arrived. If we can't create
* the connection, abort it.
*/
- so = sonewconn(lso, SS_ISCONNECTED);
+ so = sonewconn(lso, 0);
if (so == NULL) {
/*
* Drop the connection; we will either send a RST or
@@ -685,6 +677,15 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
inp = sotoinpcb(so);
inp->inp_inc.inc_fibnum = so->so_fibnum;
INP_WLOCK(inp);
+ /*
+ * Exclusive pcbinfo lock is not required in syncache socket case even
+ * if two inpcb locks can be acquired simultaneously:
+ * - the inpcb in LISTEN state,
+ * - the newly created inp.
+ *
+ * In this case, an inp cannot be at same time in LISTEN state and
+ * just created by an accept() call.
+ */
INP_HASH_WLOCK(&V_tcbinfo);
/* Insert new socket into PCB hash list. */
@@ -702,6 +703,15 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#endif
/*
+ * If there's an mbuf and it has a flowid, then let's initialise the
+ * inp with that particular flowid.
+ */
+ if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+ inp->inp_flowid = m->m_pkthdr.flowid;
+ inp->inp_flowtype = M_HASHTYPE_GET(m);
+ }
+
+ /*
* Install in the reservation hash table for now, but don't yet
* install a connection group since the full 4-tuple isn't yet
* configured.
@@ -824,11 +834,31 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#endif /* INET */
INP_HASH_WUNLOCK(&V_tcbinfo);
tp = intotcpcb(inp);
- tp->t_state = TCPS_SYN_RECEIVED;
+ tcp_state_change(tp, TCPS_SYN_RECEIVED);
tp->iss = sc->sc_iss;
tp->irs = sc->sc_irs;
tcp_rcvseqinit(tp);
tcp_sendseqinit(tp);
+ blk = sototcpcb(lso)->t_fb;
+ if (blk != tp->t_fb) {
+ /*
+ * Our parents t_fb was not the default,
+ * we need to release our ref on tp->t_fb and
+ * pickup one on the new entry.
+ */
+ struct tcp_function_block *rblk;
+
+ rblk = find_and_ref_tcp_fb(blk);
+ KASSERT(rblk != NULL,
+ ("cannot find blk %p out of syncache?", blk));
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_fb = rblk;
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
+ }
tp->snd_wl1 = sc->sc_irs;
tp->snd_max = tp->iss + 1;
tp->snd_nxt = tp->iss + 1;
@@ -898,7 +928,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- INP_WUNLOCK(inp);
+ soisconnected(so);
TCPSTAT_INC(tcps_accepts);
return (so);
@@ -917,6 +947,9 @@ abort2:
* in the syncache, and if its there, we pull it out of
* the cache and turn it into a full-blown connection in
* the SYN-RECEIVED state.
+ *
+ * On syncache_socket() success the newly created socket
+ * has its underlying inp locked.
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -931,12 +964,22 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* Global TCP locks are held because we manipulate the PCB lists
* and create a new socket.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
("%s: can handle only ACK", __func__));
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
+
+#ifdef INVARIANTS
+ /*
+ * Test code for syncookies comparing the syncache stored
+ * values with the reconstructed values from the cookie.
+ */
+ if (sc != NULL)
+ syncookie_cmp(inc, sch, sc, th, to, *lsop);
+#endif
+
if (sc == NULL) {
/*
* There is no syncache entry, so see if this ACK is
@@ -956,7 +999,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto failed;
}
bzero(&scs, sizeof(scs));
- sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
+ sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
SCH_UNLOCK(sch);
if (sc == NULL) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
@@ -966,7 +1009,16 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto failed;
}
} else {
- /* Pull out the entry to unlock the bucket row. */
+ /*
+ * Pull out the entry to unlock the bucket row.
+ *
+ * NOTE: We must decrease TCPS_SYN_RECEIVED count here, not
+ * tcp_state_change(). The tcpcb is not existent at this
+ * moment. A new one will be allocated via syncache_socket->
+ * sonewconn->tcp_usr_attach in TCPS_CLOSED state, then
+ * syncache_socket() will change it to TCPS_SYN_RECEIVED.
+ */
+ TCPSTATES_DEC(TCPS_SYN_RECEIVED);
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
#ifdef TCP_OFFLOAD
@@ -1002,12 +1054,32 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto failed;
}
+ /*
+ * If timestamps were not negotiated during SYN/ACK they
+ * must not appear on any segment during this session.
+ */
if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
"segment rejected\n", s, __func__);
goto failed;
}
+
+ /*
+ * If timestamps were negotiated during SYN/ACK they should
+ * appear on every segment during this session.
+ * XXXAO: This is only informal as there have been unverified
+ * reports of non-compliants stacks.
+ */
+ if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+ "no action\n", s, __func__);
+ free(s, M_TCPLOG);
+ s = NULL;
+ }
+ }
+
/*
* If timestamps were negotiated the reflected timestamp
* must be equal to what we actually sent in the SYN|ACK.
@@ -1040,6 +1112,39 @@ failed:
return (0);
}
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+ uint64_t response_cookie)
+{
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ unsigned int *pending_counter;
+
+ /*
+ * Global TCP locks are held because we manipulate the PCB lists
+ * and create a new socket.
+ */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+ *lsop = syncache_socket(sc, *lsop, m);
+ if (*lsop == NULL) {
+ TCPSTAT_INC(tcps_sc_aborted);
+ atomic_subtract_int(pending_counter, 1);
+ } else {
+ inp = sotoinpcb(*lsop);
+ tp = intotcpcb(inp);
+ tp->t_flags |= TF_FASTOPEN;
+ tp->t_tfo_cookie = response_cookie;
+ tp->snd_max = tp->iss;
+ tp->snd_nxt = tp->iss;
+ tp->t_tfo_pending = pending_counter;
+ TCPSTAT_INC(tcps_sc_completed);
+ }
+}
+#endif /* TCP_RFC7413 */
+
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1052,9 +1157,16 @@ failed:
* DoS attack, an attacker could send data which would eventually
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set. In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
*/
-static void
-_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+int
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
void *todctx)
{
@@ -1063,10 +1175,10 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct syncache *sc = NULL;
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
- u_int32_t flowtmp;
u_int ltflags;
int win, sb_hiwat, ip_ttl, ip_tos;
char *s;
+ int rv = 0;
#ifdef INET6
int autoflowlabel = 0;
#endif
@@ -1075,8 +1187,12 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
struct syncache scs;
struct ucred *cred;
+#ifdef TCP_RFC7413
+ uint64_t tfo_response_cookie;
+ int tfo_cookie_valid = 0;
+ int tfo_response_cookie_valid = 0;
+#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
("%s: unexpected tcp flags", __func__));
@@ -1100,6 +1216,29 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sb_hiwat = so->so_rcv.sb_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
+#ifdef TCP_RFC7413
+ if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+ /*
+ * Limit the number of pending TFO connections to
+ * approximately half of the queue limit. This prevents TFO
+ * SYN floods from starving the service by filling the
+ * listen queue with bogus TFO connections.
+ */
+ if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
+ (so->so_qlimit / 2)) {
+ int result;
+
+ result = tcp_fastopen_check_cookie(inc,
+ to->to_tfo_cookie, to->to_tfo_len,
+ &tfo_response_cookie);
+ tfo_cookie_valid = (result > 0);
+ tfo_response_cookie_valid = (result >= 0);
+ } else
+ atomic_subtract_int(tp->t_tfo_pending, 1);
+ }
+#endif
+
/* By the time we drop the lock these should no longer be used. */
so = NULL;
tp = NULL;
@@ -1107,13 +1246,14 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#ifdef MAC
if (mac_syncache_init(&maclabel) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
goto done;
} else
mac_syncache_create(maclabel, inp);
#endif
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+#ifdef TCP_RFC7413
+ if (!tfo_cookie_valid)
+#endif
+ INP_WUNLOCK(inp);
/*
* Remember the IP options, if any.
@@ -1142,6 +1282,10 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid)
+ INP_WUNLOCK(inp);
+#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1174,7 +1318,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
s, __func__);
free(s, M_TCPLOG);
}
- if (syncache_respond(sc) == 0) {
+ if (syncache_respond(sc, sch, 1, m) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1184,6 +1328,14 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto done;
}
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ bzero(&scs, sizeof(scs));
+ sc = &scs;
+ goto skip_alloc;
+ }
+#endif
+
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
/*
@@ -1207,7 +1359,13 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
}
}
-
+
+#ifdef TCP_RFC7413
+skip_alloc:
+ if (!tfo_cookie_valid && tfo_response_cookie_valid)
+ sc->sc_tfo_cookie = &tfo_response_cookie;
+#endif
+
/*
* Fill in the syncache values.
*/
@@ -1271,7 +1429,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* With the default maxsockbuf of 256K, a scale factor
* of 3 will be chosen by this algorithm. Those who
* choose a larger maxsockbuf should watch out
- * for the compatiblity problems mentioned above.
+ * for the compatibility problems mentioned above.
*
* RFC1323: The Window field in a SYN (i.e., a <SYN>
* or <SYN,ACK>) segment itself is never scaled.
@@ -1286,11 +1444,9 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
#ifdef TCP_SIGNATURE
/*
- * If listening socket requested TCP digests, and received SYN
+ * If listening socket requested TCP digests, OR received SYN
* contains the option, flag this in the syncache so that
* syncache_respond() will do the right thing with the SYN+ACK.
- * XXX: Currently we always record the option by default and will
- * attempt to use it in syncache_respond().
*/
if (to->to_flags & TOF_SIGNATURE || ltflags & TF_SIGNATURE)
sc->sc_flags |= SCF_SIGNATURE;
@@ -1304,25 +1460,32 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
sc->sc_flags |= SCF_ECN;
- if (V_tcp_syncookies) {
- syncookie_generate(sch, sc, &flowtmp);
+ if (V_tcp_syncookies)
+ sc->sc_iss = syncookie_generate(sch, sc);
#ifdef INET6
- if (autoflowlabel)
- sc->sc_flowlabel = flowtmp;
-#endif
- } else {
-#ifdef INET6
- if (autoflowlabel)
- sc->sc_flowlabel =
- (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
-#endif
+ if (autoflowlabel) {
+ if (V_tcp_syncookies)
+ sc->sc_flowlabel = sc->sc_iss;
+ else
+ sc->sc_flowlabel = ip6_randomflowlabel();
+ sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK;
}
+#endif
SCH_UNLOCK(sch);
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
+ /* INP_WUNLOCK(inp) will be performed by the called */
+ rv = 1;
+ goto tfo_done;
+ }
+#endif
+
/*
* Do a standard 3-way handshake.
*/
- if (syncache_respond(sc) == 0) {
+ if (syncache_respond(sc, sch, 0, m) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1336,21 +1499,29 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
done:
+ if (m) {
+ *lsop = NULL;
+ m_freem(m);
+ }
+#ifdef TCP_RFC7413
+tfo_done:
+#endif
if (cred != NULL)
crfree(cred);
#ifdef MAC
if (sc == &scs)
mac_syncache_destroy(&maclabel);
#endif
- if (m) {
-
- *lsop = NULL;
- m_freem(m);
- }
+ return (rv);
}
+/*
+ * Send SYN|ACK to the peer. Either in response to the peer's SYN,
+ * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
+ */
static int
-syncache_respond(struct syncache *sc)
+syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
+ const struct mbuf *m0)
{
struct ip *ip = NULL;
struct mbuf *m;
@@ -1361,6 +1532,9 @@ syncache_respond(struct syncache *sc)
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
#endif
+#ifdef TCP_SIGNATURE
+ struct secasvar *sav;
+#endif
hlen =
#ifdef INET6
@@ -1379,7 +1553,7 @@ syncache_respond(struct syncache *sc)
("syncache: mbuf too small"));
/* Create the IP+TCP header from scratch. */
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
#ifdef MAC
@@ -1413,7 +1587,7 @@ syncache_respond(struct syncache *sc)
ip = mtod(m, struct ip *);
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(struct ip) >> 2;
- ip->ip_len = tlen;
+ ip->ip_len = htons(tlen);
ip->ip_id = 0;
ip->ip_off = 0;
ip->ip_sum = 0;
@@ -1431,7 +1605,7 @@ syncache_respond(struct syncache *sc)
* 2) the SCF_UNREACH flag has been set
*/
if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
th = (struct tcphdr *)(ip + 1);
}
@@ -1471,8 +1645,39 @@ syncache_respond(struct syncache *sc)
if (sc->sc_flags & SCF_SACK)
to.to_flags |= TOF_SACKPERM;
#ifdef TCP_SIGNATURE
- if (sc->sc_flags & SCF_SIGNATURE)
- to.to_flags |= TOF_SIGNATURE;
+ sav = NULL;
+ if (sc->sc_flags & SCF_SIGNATURE) {
+ sav = tcp_get_sav(m, IPSEC_DIR_OUTBOUND);
+ if (sav != NULL)
+ to.to_flags |= TOF_SIGNATURE;
+ else {
+
+ /*
+ * We've got SCF_SIGNATURE flag
+ * inherited from listening socket,
+ * but no SADB key for given source
+ * address. Assume signature is not
+ * required and remove signature flag
+ * instead of silently dropping
+ * connection.
+ */
+ if (locked == 0)
+ SCH_LOCK(sch);
+ sc->sc_flags &= ~SCF_SIGNATURE;
+ if (locked == 0)
+ SCH_UNLOCK(sch);
+ }
+ }
+#endif
+
+#ifdef TCP_RFC7413
+ if (sc->sc_tfo_cookie) {
+ to.to_flags |= TOF_FASTOPEN;
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = sc->sc_tfo_cookie;
+ /* don't send cookie again when retransmitting response */
+ sc->sc_tfo_cookie = NULL;
+ }
#endif
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
@@ -1483,20 +1688,29 @@ syncache_respond(struct syncache *sc)
#ifdef TCP_SIGNATURE
if (sc->sc_flags & SCF_SIGNATURE)
- tcp_signature_compute(m, 0, 0, optlen,
- to.to_signature, IPSEC_DIR_OUTBOUND);
+ tcp_signature_do_compute(m, 0, optlen,
+ to.to_signature, sav);
#endif
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6)
ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
else
#endif
- ip->ip_len += optlen;
+ ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
} else
optlen = 0;
M_SETFIB(m, sc->sc_inc.inc_fibnum);
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ /*
+ * If we have peer's SYN and it has a flowid, then let's assign it to
+ * our SYN|ACK. ip6_output() and ip_output() will not assign flowid
+ * to SYN|ACK due to lack of inp here.
+ */
+ if (m0 != NULL && M_HASHTYPE_GET(m0) != M_HASHTYPE_NONE) {
+ m->m_pkthdr.flowid = m0->m_pkthdr.flowid;
+ M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0));
+ }
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
@@ -1538,292 +1752,379 @@ syncache_respond(struct syncache *sc)
return (error);
}
-void
-syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m)
-{
- _syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
-}
-
-void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, void *tod, void *todctx)
-{
-
- _syncache_add(inc, to, th, inp, lsop, NULL, tod, todctx);
-}
/*
- * The purpose of SYN cookies is to avoid keeping track of all SYN's we
- * receive and to be able to handle SYN floods from bogus source addresses
- * (where we will never receive any reply). SYN floods try to exhaust all
- * our memory and available slots in the SYN cache table to cause a denial
- * of service to legitimate users of the local host.
+ * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks
+ * that exceed the capacity of the syncache by avoiding the storage of any
+ * of the SYNs we receive. Syncookies defend against blind SYN flooding
+ * attacks where the attacker does not have access to our responses.
+ *
+ * Syncookies encode and include all necessary information about the
+ * connection setup within the SYN|ACK that we send back. That way we
+ * can avoid keeping any local state until the ACK to our SYN|ACK returns
+ * (if ever). Normally the syncache and syncookies are running in parallel
+ * with the latter taking over when the former is exhausted. When matching
+ * syncache entry is found the syncookie is ignored.
*
- * The idea of SYN cookies is to encode and include all necessary information
- * about the connection setup state within the SYN-ACK we send back and thus
- * to get along without keeping any local state until the ACK to the SYN-ACK
- * arrives (if ever). Everything we need to know should be available from
- * the information we encoded in the SYN-ACK.
+ * The only reliable information persisting the 3WHS is our initial sequence
+ * number ISS of 32 bits. Syncookies embed a cryptographically sufficient
+ * strong hash (MAC) value and a few bits of TCP SYN options in the ISS
+ * of our SYN|ACK. The MAC can be recomputed when the ACK to our SYN|ACK
+ * returns and signifies a legitimate connection if it matches the ACK.
*
- * More information about the theory behind SYN cookies and its first
- * discussion and specification can be found at:
- * http://cr.yp.to/syncookies.html (overview)
- * http://cr.yp.to/syncookies/archive (gory details)
+ * The available space of 32 bits to store the hash and to encode the SYN
+ * option information is very tight and we should have at least 24 bits for
+ * the MAC to keep the number of guesses by blind spoofing reasonably high.
*
- * This implementation extends the orginal idea and first implementation
- * of FreeBSD by using not only the initial sequence number field to store
- * information but also the timestamp field if present. This way we can
- * keep track of the entire state we need to know to recreate the session in
- * its original form. Almost all TCP speakers implement RFC1323 timestamps
- * these days. For those that do not we still have to live with the known
- * shortcomings of the ISN only SYN cookies.
+ * SYN option information we have to encode to fully restore a connection:
+ * MSS: is imporant to chose an optimal segment size to avoid IP level
+ * fragmentation along the path. The common MSS values can be encoded
+ * in a 3-bit table. Uncommon values are captured by the next lower value
+ * in the table leading to a slight increase in packetization overhead.
+ * WSCALE: is necessary to allow large windows to be used for high delay-
+ * bandwidth product links. Not scaling the window when it was initially
+ * negotiated is bad for performance as lack of scaling further decreases
+ * the apparent available send window. We only need to encode the WSCALE
+ * we received from the remote end. Our end can be recalculated at any
+ * time. The common WSCALE values can be encoded in a 3-bit table.
+ * Uncommon values are captured by the next lower value in the table
+ * making us under-estimate the available window size halving our
+ * theoretically possible maximum throughput for that connection.
+ * SACK: Greatly assists in packet loss recovery and requires 1 bit.
+ * TIMESTAMP and SIGNATURE is not encoded because they are permanent options
+ * that are included in all segments on a connection. We enable them when
+ * the ACK has them.
*
- * Cookie layers:
+ * Security of syncookies and attack vectors:
*
- * Initial sequence number we send:
- * 31|................................|0
- * DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
- * D = MD5 Digest (first dword)
- * M = MSS index
- * R = Rotation of secret
- * P = Odd or Even secret
+ * The MAC is computed over (faddr||laddr||fport||lport||irs||flags||secmod)
+ * together with the gloabl secret to make it unique per connection attempt.
+ * Thus any change of any of those parameters results in a different MAC output
+ * in an unpredictable way unless a collision is encountered. 24 bits of the
+ * MAC are embedded into the ISS.
*
- * The MD5 Digest is computed with over following parameters:
- * a) randomly rotated secret
- * b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
- * c) the received initial sequence number from remote host
- * d) the rotation offset and odd/even bit
+ * To prevent replay attacks two rotating global secrets are updated with a
+ * new random value every 15 seconds. The life-time of a syncookie is thus
+ * 15-30 seconds.
*
- * Timestamp we send:
- * 31|................................|0
- * DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
- * D = MD5 Digest (third dword) (only as filler)
- * S = Requested send window scale
- * R = Requested receive window scale
- * A = SACK allowed
- * 5 = TCP-MD5 enabled (not implemented yet)
- * XORed with MD5 Digest (forth dword)
+ * Vector 1: Attacking the secret. This requires finding a weakness in the
+ * MAC itself or the way it is used here. The attacker can do a chosen plain
+ * text attack by varying and testing the all parameters under his control.
+ * The strength depends on the size and randomness of the secret, and the
+ * cryptographic security of the MAC function. Due to the constant updating
+ * of the secret the attacker has at most 29.999 seconds to find the secret
+ * and launch spoofed connections. After that he has to start all over again.
*
- * The timestamp isn't cryptographically secure and doesn't need to be.
- * The double use of the MD5 digest dwords ties it to a specific remote/
- * local host/port, remote initial sequence number and our local time
- * limited secret. A received timestamp is reverted (XORed) and then
- * the contained MD5 dword is compared to the computed one to ensure the
- * timestamp belongs to the SYN-ACK we sent. The other parameters may
- * have been tampered with but this isn't different from supplying bogus
- * values in the SYN in the first place.
+ * Vector 2: Collision attack on the MAC of a single ACK. With a 24 bit MAC
+ * size an average of 4,823 attempts are required for a 50% chance of success
+ * to spoof a single syncookie (birthday collision paradox). However the
+ * attacker is blind and doesn't know if one of his attempts succeeded unless
+ * he has a side channel to interfere success from. A single connection setup
+ * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets.
+ * This many attempts are required for each one blind spoofed connection. For
+ * every additional spoofed connection he has to launch another N attempts.
+ * Thus for a sustained rate 100 spoofed connections per second approximately
+ * 1,800,000 packets per second would have to be sent.
*
- * Some problems with SYN cookies remain however:
- * Consider the problem of a recreated (and retransmitted) cookie. If the
- * original SYN was accepted, the connection is established. The second
- * SYN is inflight, and if it arrives with an ISN that falls within the
- * receive window, the connection is killed.
+ * NB: The MAC function should be fast so that it doesn't become a CPU
+ * exhaustion attack vector itself.
*
- * Notes:
- * A heuristic to determine when to accept syn cookies is not necessary.
- * An ACK flood would cause the syncookie verification to be attempted,
- * but a SYN flood causes syncookies to be generated. Both are of equal
- * cost, so there's no point in trying to optimize the ACK flood case.
- * Also, if you don't process certain ACKs for some reason, then all someone
- * would have to do is launch a SYN and ACK flood at the same time, which
- * would stop cookie verification and defeat the entire purpose of syncookies.
+ * References:
+ * RFC4987 TCP SYN Flooding Attacks and Common Mitigations
+ * SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996
+ * http://cr.yp.to/syncookies.html (overview)
+ * http://cr.yp.to/syncookies/archive (details)
+ *
+ *
+ * Schematic construction of a syncookie enabled Initial Sequence Number:
+ * 0 1 2 3
+ * 12345678901234567890123456789012
+ * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP|
+ *
+ * x 24 MAC (truncated)
+ * W 3 Send Window Scale index
+ * M 3 MSS index
+ * S 1 SACK permitted
+ * P 1 Odd/even secret
*/
-static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
-static void
-syncookie_generate(struct syncache_head *sch, struct syncache *sc,
- u_int32_t *flowlabel)
+/*
+ * Distribution and probability of certain MSS values. Those in between are
+ * rounded down to the next lower one.
+ * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
+ * .2% .3% 5% 7% 7% 20% 15% 45%
+ */
+static int tcp_sc_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
+
+/*
+ * Distribution and probability of certain WSCALE values. We have to map the
+ * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3
+ * bits based on prevalence of certain values. Where we don't have an exact
+ * match for are rounded down to the next lower one letting us under-estimate
+ * the true available window. At the moment this would happen only for the
+ * very uncommon values 3, 5 and those above 8 (more than 16MB socket buffer
+ * and window size). The absence of the WSCALE option (no scaling in either
+ * direction) is encoded with index zero.
+ * [WSCALE values histograms, Allman, 2012]
+ * X 10 10 35 5 6 14 10% by host
+ * X 11 4 5 5 18 49 3% by connections
+ */
+static int tcp_sc_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
+
+/*
+ * Compute the MAC for the SYN cookie. SIPHASH-2-4 is chosen for its speed
+ * and good cryptographic properties.
+ */
+static uint32_t
+syncookie_mac(struct in_conninfo *inc, tcp_seq irs, uint8_t flags,
+ uint8_t *secbits, uintptr_t secmod)
{
- MD5_CTX ctx;
- u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
- u_int32_t data;
- u_int32_t *secbits;
- u_int off, pmss, mss;
- int i;
+ SIPHASH_CTX ctx;
+ uint32_t siphash[2];
+
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, secbits);
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+ SipHash_Update(&ctx, &inc->inc_laddr, sizeof(inc->inc_laddr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+ SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(inc->inc6_laddr));
+ break;
+#endif
+ }
+ SipHash_Update(&ctx, &inc->inc_fport, sizeof(inc->inc_fport));
+ SipHash_Update(&ctx, &inc->inc_lport, sizeof(inc->inc_lport));
+ SipHash_Update(&ctx, &irs, sizeof(irs));
+ SipHash_Update(&ctx, &flags, sizeof(flags));
+ SipHash_Update(&ctx, &secmod, sizeof(secmod));
+ SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+ return (siphash[0] ^ siphash[1]);
+}
+
+static tcp_seq
+syncookie_generate(struct syncache_head *sch, struct syncache *sc)
+{
+ u_int i, mss, secbit, wscale;
+ uint32_t iss, hash;
+ uint8_t *secbits;
+ union syncookie cookie;
SCH_LOCK_ASSERT(sch);
- /* Which of the two secrets to use. */
- secbits = sch->sch_oddeven ?
- sch->sch_secbits_odd : sch->sch_secbits_even;
-
- /* Reseed secret if too old. */
- if (sch->sch_reseed < time_uptime) {
- sch->sch_oddeven = sch->sch_oddeven ? 0 : 1; /* toggle */
- secbits = sch->sch_oddeven ?
- sch->sch_secbits_odd : sch->sch_secbits_even;
- for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
- secbits[i] = arc4random();
- sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
- }
+ cookie.cookie = 0;
- /* Secret rotation offset. */
- off = sc->sc_iss & 0x7; /* iss was randomized before */
+ /* Map our computed MSS into the 3-bit index. */
+ mss = min(tcp_mssopt(&sc->sc_inc), max(sc->sc_peer_mss, V_tcp_minmss));
+ for (i = nitems(tcp_sc_msstab) - 1; tcp_sc_msstab[i] > mss && i > 0;
+ i--)
+ ;
+ cookie.flags.mss_idx = i;
- /* Maximum segment size calculation. */
- pmss =
- max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)), V_tcp_minmss);
- for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
- if (tcp_sc_msstab[mss] <= pmss)
- break;
+ /*
+ * Map the send window scale into the 3-bit index but only if
+ * the wscale option was received.
+ */
+ if (sc->sc_flags & SCF_WINSCALE) {
+ wscale = sc->sc_requested_s_scale;
+ for (i = nitems(tcp_sc_wstab) - 1;
+ tcp_sc_wstab[i] > wscale && i > 0;
+ i--)
+ ;
+ cookie.flags.wscale_idx = i;
+ }
- /* Fold parameters and MD5 digest into the ISN we will send. */
- data = sch->sch_oddeven;/* odd or even secret, 1 bit */
- data |= off << 1; /* secret offset, derived from iss, 3 bits */
- data |= mss << 4; /* mss, 3 bits */
+ /* Can we do SACK? */
+ if (sc->sc_flags & SCF_SACK)
+ cookie.flags.sack_ok = 1;
- MD5Init(&ctx);
- MD5Update(&ctx, ((u_int8_t *)secbits) + off,
- SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
- MD5Update(&ctx, secbits, off);
- MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
- MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
- MD5Update(&ctx, &data, sizeof(data));
- MD5Final((u_int8_t *)&md5_buffer, &ctx);
+ /* Which of the two secrets to use. */
+ secbit = sch->sch_sc->secret.oddeven & 0x1;
+ cookie.flags.odd_even = secbit;
- data |= (md5_buffer[0] << 7);
- sc->sc_iss = data;
+ secbits = sch->sch_sc->secret.key[secbit];
+ hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits,
+ (uintptr_t)sch);
-#ifdef INET6
- *flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
-#endif
+ /*
+ * Put the flags into the hash and XOR them to get better ISS number
+ * variance. This doesn't enhance the cryptographic strength and is
+ * done to prevent the 8 cookie bits from showing up directly on the
+ * wire.
+ */
+ iss = hash & ~0xff;
+ iss |= cookie.cookie ^ (hash >> 24);
- /* Additional parameters are stored in the timestamp if present. */
+ /* Randomize the timestamp. */
if (sc->sc_flags & SCF_TIMESTAMP) {
- data = ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
- data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
- data |= sc->sc_requested_s_scale << 2; /* SWIN scale, 4 bits */
- data |= sc->sc_requested_r_scale << 6; /* RWIN scale, 4 bits */
- data |= md5_buffer[2] << 10; /* more digest bits */
- data ^= md5_buffer[3];
- sc->sc_ts = data;
- sc->sc_tsoff = data - tcp_ts_getticks(); /* after XOR */
+ sc->sc_ts = arc4random();
+ sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
}
TCPSTAT_INC(tcps_sc_sendcookie);
+ return (iss);
}
static struct syncache *
syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
- struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
- struct socket *so)
+ struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+ struct socket *lso)
{
- MD5_CTX ctx;
- u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
- u_int32_t data = 0;
- u_int32_t *secbits;
+ uint32_t hash;
+ uint8_t *secbits;
tcp_seq ack, seq;
- int off, mss, wnd, flags;
+ int wnd, wscale = 0;
+ union syncookie cookie;
SCH_LOCK_ASSERT(sch);
/*
- * Pull information out of SYN-ACK/ACK and
- * revert sequence number advances.
+ * Pull information out of SYN-ACK/ACK and revert sequence number
+ * advances.
*/
ack = th->th_ack - 1;
seq = th->th_seq - 1;
- off = (ack >> 1) & 0x7;
- mss = (ack >> 4) & 0x7;
- flags = ack & 0x7f;
-
- /* Which of the two secrets to use. */
- secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
/*
- * The secret wasn't updated for the lifetime of a syncookie,
- * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
+ * Unpack the flags containing enough information to restore the
+ * connection.
*/
- if (sch->sch_reseed + SYNCOOKIE_LIFETIME < time_uptime) {
- return (NULL);
- }
+ cookie.cookie = (ack & 0xff) ^ (ack >> 24);
- /* Recompute the digest so we can compare it. */
- MD5Init(&ctx);
- MD5Update(&ctx, ((u_int8_t *)secbits) + off,
- SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
- MD5Update(&ctx, secbits, off);
- MD5Update(&ctx, inc, sizeof(*inc));
- MD5Update(&ctx, &seq, sizeof(seq));
- MD5Update(&ctx, &flags, sizeof(flags));
- MD5Final((u_int8_t *)&md5_buffer, &ctx);
-
- /* Does the digest part of or ACK'ed ISS match? */
- if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
- return (NULL);
+ /* Which of the two secrets to use. */
+ secbits = sch->sch_sc->secret.key[cookie.flags.odd_even];
- /* Does the digest part of our reflected timestamp match? */
- if (to->to_flags & TOF_TS) {
- data = md5_buffer[3] ^ to->to_tsecr;
- if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
- return (NULL);
- }
+ hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch);
+
+ /* The recomputed hash matches the ACK if this was a genuine cookie. */
+ if ((ack & ~0xff) != (hash & ~0xff))
+ return (NULL);
/* Fill in the syncache values. */
+ sc->sc_flags = 0;
bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
sc->sc_ipopts = NULL;
sc->sc_irs = seq;
sc->sc_iss = ack;
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ sc->sc_ip_ttl = sotoinpcb(lso)->inp_ip_ttl;
+ sc->sc_ip_tos = sotoinpcb(lso)->inp_ip_tos;
+ break;
+#endif
#ifdef INET6
- if (inc->inc_flags & INC_ISIPV6) {
- if (sotoinpcb(so)->inp_flags & IN6P_AUTOFLOWLABEL)
- sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
- } else
+ case INC_ISIPV6:
+ if (sotoinpcb(lso)->inp_flags & IN6P_AUTOFLOWLABEL)
+ sc->sc_flowlabel = sc->sc_iss & IPV6_FLOWLABEL_MASK;
+ break;
#endif
- {
- sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl;
- sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos;
}
- /* Additional parameters that were encoded in the timestamp. */
- if (data) {
+ sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
+
+ /* We can simply recompute receive window scale we sent earlier. */
+ while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
+ wscale++;
+
+ /* Only use wscale if it was enabled in the orignal SYN. */
+ if (cookie.flags.wscale_idx > 0) {
+ sc->sc_requested_r_scale = wscale;
+ sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
+ sc->sc_flags |= SCF_WINSCALE;
+ }
+
+ wnd = sbspace(&lso->so_rcv);
+ wnd = imax(wnd, 0);
+ wnd = imin(wnd, TCP_MAXWIN);
+ sc->sc_wnd = wnd;
+
+ if (cookie.flags.sack_ok)
+ sc->sc_flags |= SCF_SACK;
+
+ if (to->to_flags & TOF_TS) {
sc->sc_flags |= SCF_TIMESTAMP;
sc->sc_tsreflect = to->to_tsval;
sc->sc_ts = to->to_tsecr;
sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
- sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
- sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
- sc->sc_requested_s_scale = min((data >> 2) & 0xf,
- TCP_MAX_WINSHIFT);
- sc->sc_requested_r_scale = min((data >> 6) & 0xf,
- TCP_MAX_WINSHIFT);
- if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
- sc->sc_flags |= SCF_WINSCALE;
- } else
- sc->sc_flags |= SCF_NOOPT;
+ }
- wnd = sbspace(&so->so_rcv);
- wnd = imax(wnd, 0);
- wnd = imin(wnd, TCP_MAXWIN);
- sc->sc_wnd = wnd;
+ if (to->to_flags & TOF_SIGNATURE)
+ sc->sc_flags |= SCF_SIGNATURE;
sc->sc_rxmits = 0;
- sc->sc_peer_mss = tcp_sc_msstab[mss];
TCPSTAT_INC(tcps_sc_recvcookie);
return (sc);
}
-/*
- * Returns the current number of syncache entries. This number
- * will probably change before you get around to calling
- * syncache_pcblist.
- */
-
-int
-syncache_pcbcount(void)
+#ifdef INVARIANTS
+static int
+syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
+ struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+ struct socket *lso)
{
- struct syncache_head *sch;
- int count, i;
+ struct syncache scs, *scx;
+ char *s;
- for (count = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
- /* No need to lock for a read. */
- sch = &V_tcp_syncache.hashbase[i];
- count += sch->sch_length;
+ bzero(&scs, sizeof(scs));
+ scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
+
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
+ return (0);
+
+ if (scx != NULL) {
+ if (sc->sc_peer_mss != scx->sc_peer_mss)
+ log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n",
+ s, __func__, sc->sc_peer_mss, scx->sc_peer_mss);
+
+ if (sc->sc_requested_r_scale != scx->sc_requested_r_scale)
+ log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n",
+ s, __func__, sc->sc_requested_r_scale,
+ scx->sc_requested_r_scale);
+
+ if (sc->sc_requested_s_scale != scx->sc_requested_s_scale)
+ log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n",
+ s, __func__, sc->sc_requested_s_scale,
+ scx->sc_requested_s_scale);
+
+ if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK))
+ log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__);
}
- return count;
+
+ if (s != NULL)
+ free(s, M_TCPLOG);
+ return (0);
+}
+#endif /* INVARIANTS */
+
+static void
+syncookie_reseed(void *arg)
+{
+ struct tcp_syncache *sc = arg;
+ uint8_t *secbits;
+ int secbit;
+
+ /*
+ * Reseeding the secret doesn't have to be protected by a lock.
+ * It only must be ensured that the new random values are visible
+ * to all CPUs in a SMP environment. The atomic with release
+ * semantics ensures that.
+ */
+ secbit = (sc->secret.oddeven & 0x1) ? 0 : 1;
+ secbits = sc->secret.key[secbit];
+ arc4rand(secbits, SYNCOOKIE_SECRET_SIZE, 0);
+ atomic_add_rel_int(&sc->secret.oddeven, 1);
+
+ /* Reschedule ourself. */
+ callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz);
}
/*
diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h
index c55bfbcd..6b12c13a 100644
--- a/freebsd/sys/netinet/tcp_syncache.h
+++ b/freebsd/sys/netinet/tcp_syncache.h
@@ -41,13 +41,11 @@ void syncache_destroy(void);
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-void syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void tcp_offload_syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, void *, void *);
+int syncache_add(struct in_conninfo *, struct tcpopt *,
+ struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
+ void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
-int syncache_pcbcount(void);
int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
struct syncache {
@@ -75,7 +73,10 @@ struct syncache {
#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
-
+#ifdef TCP_RFC7413
+ void *sc_tfo_cookie; /* for TCP Fast Open response */
+#endif
+ void *sc_pspare; /* TCP_SIGNATURE */
u_int32_t sc_spare[2]; /* UTO */
};
@@ -91,20 +92,23 @@ struct syncache {
#define SCF_SACK 0x80 /* send SACK option */
#define SCF_ECN 0x100 /* send ECN setup packet */
-#define SYNCOOKIE_SECRET_SIZE 8 /* dwords */
-#define SYNCOOKIE_LIFETIME 16 /* seconds */
-
struct syncache_head {
- struct vnet *sch_vnet;
struct mtx sch_mtx;
TAILQ_HEAD(sch_head, syncache) sch_bucket;
struct callout sch_timer;
int sch_nextc;
u_int sch_length;
- u_int sch_oddeven;
- u_int32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
- u_int32_t sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
- u_int sch_reseed; /* time_uptime, seconds */
+ struct tcp_syncache *sch_sc;
+};
+
+#define SYNCOOKIE_SECRET_SIZE 16
+#define SYNCOOKIE_LIFETIME 15 /* seconds */
+
+struct syncookie_secret {
+ volatile u_int oddeven;
+ uint8_t key[2][SYNCOOKIE_SECRET_SIZE];
+ struct callout reseed;
+ u_int lifetime;
};
struct tcp_syncache {
@@ -115,7 +119,20 @@ struct tcp_syncache {
u_int bucket_limit;
u_int cache_limit;
u_int rexmt_limit;
- u_int hash_secret;
+ uint32_t hash_secret;
+ struct vnet *vnet;
+ struct syncookie_secret secret;
+};
+
+/* Internal use for the syncookie functions. */
+union syncookie {
+ uint8_t cookie;
+ struct {
+ uint8_t odd_even:1,
+ sack_ok:1,
+ wscale_idx:3,
+ mss_idx:3;
+ } flags;
};
#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c
index db952e42..edfc3829 100644
--- a/freebsd/sys/netinet/tcp_timer.c
+++ b/freebsd/sys/netinet/tcp_timer.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_tcpdebug.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
@@ -52,24 +53,40 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/route.h>
+#include <net/rss_config.h>
#include <net/vnet.h>
+#include <net/netisr.h>
-#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_rss.h>
#include <netinet/in_systm.h>
#ifdef INET6
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/cc/cc.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+int tcp_persmin;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
+
+int tcp_persmax;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
+
int tcp_keepinit;
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
&tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
@@ -121,17 +138,110 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
/* max idle probes */
int tcp_maxpersistidle;
-static int tcp_rexmit_drop_options = 1;
+static int tcp_rexmit_drop_options = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
+static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
+#define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
+ CTLFLAG_RW|CTLFLAG_VNET,
+ &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
+ "Path MTU Discovery Black Hole Detection Enabled");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
+#define V_tcp_pmtud_blackhole_activated \
+ VNET(tcp_pmtud_blackhole_activated)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
+ CTLFLAG_RD|CTLFLAG_VNET,
+ &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
+ "Path MTU Discovery Black Hole Detection, Activation Count");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
+#define V_tcp_pmtud_blackhole_activated_min_mss \
+ VNET(tcp_pmtud_blackhole_activated_min_mss)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
+ CTLFLAG_RD|CTLFLAG_VNET,
+ &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
+ "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
+#define V_tcp_pmtud_blackhole_failed VNET(tcp_pmtud_blackhole_failed)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
+ CTLFLAG_RD|CTLFLAG_VNET,
+ &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
+ "Path MTU Discovery Black Hole Detection, Failure Count");
+
+#ifdef INET
+static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
+#define V_tcp_pmtud_blackhole_mss VNET(tcp_pmtud_blackhole_mss)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
+ CTLFLAG_RW|CTLFLAG_VNET,
+ &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
+ "Path MTU Discovery Black Hole Detection lowered MSS");
+#endif
+
+#ifdef INET6
+static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
+#define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss)
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
+ CTLFLAG_RW|CTLFLAG_VNET,
+ &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
+ "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
+#endif
+
+#ifdef RSS
+static int per_cpu_timers = 1;
+#else
static int per_cpu_timers = 0;
+#endif
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
&per_cpu_timers , 0, "run tcp timers on all cpus");
+#if 0
#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
+#endif
+
+/*
+ * Map the given inp to a CPU id.
+ *
+ * This queries RSS if it's compiled in, else it defaults to the current
+ * CPU ID.
+ */
+static inline int
+inp_to_cpuid(struct inpcb *inp)
+{
+ u_int cpuid;
+
+#ifdef RSS
+ if (per_cpu_timers) {
+ cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
+ if (cpuid == NETISR_CPUID_NONE)
+ return (curcpu); /* XXX */
+ else
+ return (cpuid);
+ }
+#else
+ /* Legacy, pre-RSS behaviour */
+ if (per_cpu_timers) {
+ /*
+ * We don't have a flowid -> cpuid mapping, so cheat and
+ * just map unknown cpuids to curcpu. Not the best, but
+ * apparently better than defaulting to swi 0.
+ */
+ cpuid = inp->inp_flowid % (mp_maxid + 1);
+ if (! CPU_ABSENT(cpuid))
+ return (cpuid);
+ return (curcpu);
+ }
+#endif
+ /* Default for RSS and non-RSS - cpuid 0 */
+ else {
+ return (0);
+ }
+}
/*
* Tcp protocol timeout routine called every 500 ms.
@@ -146,9 +256,7 @@ tcp_slowtimo(void)
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INP_INFO_WLOCK(&V_tcbinfo);
(void) tcp_tw_2msl_scan(0);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
@@ -162,10 +270,6 @@ int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
-static int tcp_timer_race;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
- 0, "Count of t_inpcb races on tcp_discardcb");
-
/*
* TCP timer processing.
*/
@@ -178,18 +282,7 @@ tcp_timer_delack(void *xtp)
CURVNET_SET(tp->t_vnet);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_delack) ||
!callout_active(&tp->t_timers->tt_delack)) {
@@ -203,14 +296,65 @@ tcp_timer_delack(void *xtp)
CURVNET_RESTORE();
return;
}
-
tp->t_flags |= TF_ACKNOW;
TCPSTAT_INC(tcps_delack);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(inp);
CURVNET_RESTORE();
}
+/*
+ * When a timer wants to remove a TCB it must
+ * hold the INP_INFO_RLOCK(). The timer function
+ * should only have grabbed the INP_WLOCK() when
+ * it entered. To safely switch to holding both the
+ * INP_INFO_RLOCK() and the INP_WLOCK() we must first
+ * grab a reference on the inp, which will hold the inp
+ * so that it can't be removed. We then unlock the INP_WLOCK(),
+ * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK()
+ * we proceed again to get the INP_WLOCK() (this preserves proper
+ * lock order). After acquiring the INP_WLOCK we must check if someone
+ * else deleted the pcb i.e. the inp_flags check.
+ * If so we return 1 otherwise we return 0.
+ *
+ * No matter what the tcp_inpinfo_lock_add() function
+ * returns the caller must afterwards call tcp_inpinfo_lock_del()
+ * to drop the locks and reference properly.
+ */
+
+int
+tcp_inpinfo_lock_add(struct inpcb *inp)
+{
+ in_pcbref(inp);
+ INP_WUNLOCK(inp);
+ INP_INFO_RLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ return(1);
+ }
+ return(0);
+
+}
+
+void
+tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp)
+{
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ if (inp && (tp == NULL)) {
+ /*
+ * If tcp_close/drop() gets called and tp
+ * returns NULL, then the function dropped
+ * the inp lock, we hold a reference keeping
+ * this around, so we must re-aquire the
+ * INP_WLOCK() in order to proceed with
+ * our dropping the inp reference.
+ */
+ INP_WLOCK(inp);
+ }
+ if (inp && in_pcbrele_wlocked(inp) == 0)
+ INP_WUNLOCK(inp);
+}
+
void
tcp_timer_2msl(void *xtp)
{
@@ -222,62 +366,66 @@ tcp_timer_2msl(void *xtp)
ostate = tp->t_state;
#endif
- /*
- * XXXRW: Does this actually happen?
- */
- INP_INFO_WLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
tcp_free_sackholes(tp);
if (callout_pending(&tp->t_timers->tt_2msl) ||
!callout_active(&tp->t_timers->tt_2msl)) {
INP_WUNLOCK(tp->t_inpcb);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_2msl);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
/*
* 2 MSL timeout in shutdown went off. If we're closed but
* still waiting for peer to close and connection has been idle
- * too long, or if 2MSL time is up from TIME_WAIT, delete connection
- * control block. Otherwise, check again in a bit.
+ * too long delete connection control block. Otherwise, check
+ * again in a bit.
+ *
+ * If in TIME_WAIT state just ignore as this timeout is handled in
+ * tcp_tw_2msl_scan().
*
* If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
* there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
* Ignore fact that there were recent incoming segments.
*/
+ if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
+ INP_WUNLOCK(inp);
+ CURVNET_RESTORE();
+ return;
+ }
if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
tp->t_inpcb && tp->t_inpcb->inp_socket &&
(tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
TCPSTAT_INC(tcps_finwait2_drops);
+ if (tcp_inpinfo_lock_add(inp)) {
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
+ }
tp = tcp_close(tp);
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
} else {
- if (tp->t_state != TCPS_TIME_WAIT &&
- ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
- callout_reset_on(&tp->t_timers->tt_2msl,
- TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
- else
- tp = tcp_close(tp);
+ if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
+ callout_reset(&tp->t_timers->tt_2msl,
+ TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
+ } else {
+ if (tcp_inpinfo_lock_add(inp)) {
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
+ }
+ tp = tcp_close(tp);
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
+ }
}
#ifdef TCPDEBUG
@@ -285,9 +433,11 @@ tcp_timer_2msl(void *xtp)
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+
if (tp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+out:
CURVNET_RESTORE();
}
@@ -303,36 +453,23 @@ tcp_timer_keep(void *xtp)
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_keep) ||
!callout_active(&tp->t_timers->tt_keep)) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_keep);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
/*
* Keep-alive timer went off; send something
* or drop connection if idle for too long.
@@ -364,24 +501,29 @@ tcp_timer_keep(void *xtp)
tp->rcv_nxt, tp->snd_una - 1, 0);
free(t_template, M_TEMP);
}
- callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
- tcp_timer_keep, tp, INP_CPU(inp));
+ callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
+ tcp_timer_keep, tp);
} else
- callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
- tcp_timer_keep, tp, INP_CPU(inp));
+ callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
+ tcp_timer_keep, tp);
#ifdef TCPDEBUG
if (inp->inp_socket->so_options & SO_DEBUG)
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
dropit:
TCPSTAT_INC(tcps_keepdrops);
+
+ if (tcp_inpinfo_lock_add(inp)) {
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
+ }
tp = tcp_drop(tp, ETIMEDOUT);
#ifdef TCPDEBUG
@@ -389,9 +531,9 @@ dropit:
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
- if (tp != NULL)
- INP_WUNLOCK(tp->t_inpcb);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+ tcp_inpinfo_lock_del(inp, tp);
+out:
CURVNET_RESTORE();
}
@@ -406,38 +548,25 @@ tcp_timer_persist(void *xtp)
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_persist) ||
!callout_active(&tp->t_timers->tt_persist)) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_persist);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
/*
- * Persistance timer into zero window.
+ * Persistence timer into zero window.
* Force a byte to be output, if possible.
*/
TCPSTAT_INC(tcps_persisttimeo);
@@ -452,7 +581,12 @@ tcp_timer_persist(void *xtp)
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
TCPSTAT_INC(tcps_persistdrop);
+ if (tcp_inpinfo_lock_add(inp)) {
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
+ }
tp = tcp_drop(tp, ETIMEDOUT);
+ tcp_inpinfo_lock_del(inp, tp);
goto out;
}
/*
@@ -462,22 +596,26 @@ tcp_timer_persist(void *xtp)
if (tp->t_state > TCPS_CLOSE_WAIT &&
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
TCPSTAT_INC(tcps_persistdrop);
+ if (tcp_inpinfo_lock_add(inp)) {
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
+ }
tp = tcp_drop(tp, ETIMEDOUT);
+ tcp_inpinfo_lock_del(inp, tp);
goto out;
}
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
-out:
#ifdef TCPDEBUG
if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
#endif
- if (tp != NULL)
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+ INP_WUNLOCK(inp);
+out:
CURVNET_RESTORE();
}
@@ -487,44 +625,34 @@ tcp_timer_rexmt(void * xtp)
struct tcpcb *tp = xtp;
CURVNET_SET(tp->t_vnet);
int rexmt;
- int headlocked;
struct inpcb *inp;
#ifdef TCPDEBUG
int ostate;
ostate = tp->t_state;
#endif
- INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_RUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_rexmt) ||
!callout_active(&tp->t_timers->tt_rexmt)) {
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_rexmt);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
tcp_free_sackholes(tp);
+ if (tp->t_fb->tfb_tcp_rexmit_tmr) {
+ /* The stack has a timer action too. */
+ (*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
+ }
/*
* Retransmission timer went off. Message has not
* been acked within retransmit interval. Back off
@@ -533,30 +661,15 @@ tcp_timer_rexmt(void * xtp)
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
TCPSTAT_INC(tcps_timeoutdrop);
- in_pcbref(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
+ if (tcp_inpinfo_lock_add(inp)) {
+ tcp_inpinfo_lock_del(inp, tp);
+ goto out;
}
-
tp = tcp_drop(tp, tp->t_softerror ?
tp->t_softerror : ETIMEDOUT);
- headlocked = 1;
+ tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
- headlocked = 0;
if (tp->t_state == TCPS_SYN_SENT) {
/*
* If the SYN was retransmitted, indicate CWND to be
@@ -589,12 +702,120 @@ tcp_timer_rexmt(void * xtp)
} else
tp->t_flags &= ~TF_PREVVALID;
TCPSTAT_INC(tcps_rexmttimeo);
- if (tp->t_state == TCPS_SYN_SENT)
+ if ((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_SYN_RECEIVED))
rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
TCPT_RANGESET(tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
+
+ /*
+ * We enter the path for PLMTUD if connection is established or, if
+ * connection is FIN_WAIT_1 status, reason for the last is that if
+ * amount of data we send is very small, we could send it in couple of
+ * packets and process straight to FIN. In that case we won't catch
+ * ESTABLISHED state.
+ */
+ if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
+ || (tp->t_state == TCPS_FIN_WAIT_1))) {
+#ifdef INET6
+ int isipv6;
+#endif
+
+ /*
+ * Idea here is that at each stage of mtu probe (usually, 1448
+ * -> 1188 -> 524) should be given 2 chances to recover before
+ * further clamping down. 'tp->t_rxtshift % 2 == 0' should
+ * take care of that.
+ */
+ if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
+ (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
+ (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
+ /*
+ * Enter Path MTU Black-hole Detection mechanism:
+ * - Disable Path MTU Discovery (IP "DF" bit).
+ * - Reduce MTU to lower value than what we
+ * negotiated with peer.
+ */
+ /* Record that we may have found a black hole. */
+ tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
+
+ /* Keep track of previous MSS. */
+ tp->t_pmtud_saved_maxseg = tp->t_maxseg;
+
+ /*
+ * Reduce the MSS to blackhole value or to the default
+ * in an attempt to retransmit.
+ */
+#ifdef INET6
+ isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
+ if (isipv6 &&
+ tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) {
+ /* Use the sysctl tuneable blackhole MSS. */
+ tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
+ V_tcp_pmtud_blackhole_activated++;
+ } else if (isipv6) {
+ /* Use the default MSS. */
+ tp->t_maxseg = V_tcp_v6mssdflt;
+ /*
+ * Disable Path MTU Discovery when we switch to
+ * minmss.
+ */
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ V_tcp_pmtud_blackhole_activated_min_mss++;
+ }
+#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) {
+ /* Use the sysctl tuneable blackhole MSS. */
+ tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
+ V_tcp_pmtud_blackhole_activated++;
+ } else {
+ /* Use the default MSS. */
+ tp->t_maxseg = V_tcp_mssdflt;
+ /*
+ * Disable Path MTU Discovery when we switch to
+ * minmss.
+ */
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ V_tcp_pmtud_blackhole_activated_min_mss++;
+ }
+#endif
+ /*
+ * Reset the slow-start flight size
+ * as it may depend on the new MSS.
+ */
+ if (CC_ALGO(tp)->conn_init != NULL)
+ CC_ALGO(tp)->conn_init(tp->ccv);
+ } else {
+ /*
+ * If further retransmissions are still unsuccessful
+ * with a lowered MTU, maybe this isn't a blackhole and
+ * we restore the previous MSS and blackhole detection
+ * flags.
+ * The limit '6' is determined by giving each probe
+ * stage (1448, 1188, 524) 2 chances to recover.
+ */
+ if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
+ (tp->t_rxtshift > 6)) {
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
+ tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+ V_tcp_pmtud_blackhole_failed++;
+ /*
+ * Reset the slow-start flight size as it
+ * may depend on the new MSS.
+ */
+ if (CC_ALGO(tp)->conn_init != NULL)
+ CC_ALGO(tp)->conn_init(tp->ccv);
+ }
+ }
+ }
+
/*
* Disable RFC1323 and SACK if we haven't got any response to
* our third SYN to work-around some broken terminal servers
@@ -615,7 +836,9 @@ tcp_timer_rexmt(void * xtp)
#ifdef INET6
if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
in6_losing(tp->t_inpcb);
+ else
#endif
+ in_losing(tp->t_inpcb);
tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
tp->t_srtt = 0;
}
@@ -632,34 +855,35 @@ tcp_timer_rexmt(void * xtp)
cc_cong_signal(tp, NULL, CC_RTO);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
-out:
#ifdef TCPDEBUG
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
- if (tp != NULL)
- INP_WUNLOCK(inp);
- if (headlocked)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+ INP_WUNLOCK(inp);
+out:
CURVNET_RESTORE();
}
void
-tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
{
struct callout *t_callout;
- void *f_callout;
+ timeout_t *f_callout;
struct inpcb *inp = tp->t_inpcb;
- int cpu = INP_CPU(inp);
+ int cpu = inp_to_cpuid(inp);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
return;
#endif
+ if (tp->t_timers->tt_flags & TT_STOPPED)
+ return;
+
switch (timer_type) {
case TT_DELACK:
t_callout = &tp->t_timers->tt_delack;
@@ -682,7 +906,11 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
f_callout = tcp_timer_2msl;
break;
default:
- panic("bad timer_type");
+ if (tp->t_fb->tfb_tcp_timer_activate) {
+ tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
+ return;
+ }
+ panic("tp %p bad timer_type %#x", tp, timer_type);
}
if (delta == 0) {
callout_stop(t_callout);
@@ -692,7 +920,7 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
}
int
-tcp_timer_active(struct tcpcb *tp, int timer_type)
+tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
{
struct callout *t_callout;
@@ -713,28 +941,79 @@ tcp_timer_active(struct tcpcb *tp, int timer_type)
t_callout = &tp->t_timers->tt_2msl;
break;
default:
- panic("bad timer_type");
+ if (tp->t_fb->tfb_tcp_timer_active) {
+ return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
+ }
+ panic("tp %p bad timer_type %#x", tp, timer_type);
}
return callout_active(t_callout);
}
+void
+tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+{
+ struct callout *t_callout;
+
+ tp->t_timers->tt_flags |= TT_STOPPED;
+ switch (timer_type) {
+ case TT_DELACK:
+ t_callout = &tp->t_timers->tt_delack;
+ break;
+ case TT_REXMT:
+ t_callout = &tp->t_timers->tt_rexmt;
+ break;
+ case TT_PERSIST:
+ t_callout = &tp->t_timers->tt_persist;
+ break;
+ case TT_KEEP:
+ t_callout = &tp->t_timers->tt_keep;
+ break;
+ case TT_2MSL:
+ t_callout = &tp->t_timers->tt_2msl;
+ break;
+ default:
+ if (tp->t_fb->tfb_tcp_timer_stop) {
+ /*
+ * XXXrrs we need to look at this with the
+ * stop case below (flags).
+ */
+ tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
+ return;
+ }
+ panic("tp %p bad timer_type %#x", tp, timer_type);
+ }
+
+ if (callout_async_drain(t_callout, tcp_timer_discard) == 0) {
+ /*
+ * Can't stop the callout, defer tcpcb actual deletion
+ * to the last one. We do this using the async drain
+ * function and incrementing the count in
+ */
+ tp->t_timers->tt_draincnt++;
+ }
+}
+
#define ticks_to_msecs(t) (1000*(t) / hz)
void
-tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
+tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
+ struct xtcp_timer *xtimer)
{
- bzero(xtimer, sizeof(struct xtcp_timer));
+ sbintime_t now;
+
+ bzero(xtimer, sizeof(*xtimer));
if (timer == NULL)
return;
+ now = getsbinuptime();
if (callout_active(&timer->tt_delack))
- xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
+ xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_rexmt))
- xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
+ xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_persist))
- xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
+ xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_keep))
- xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
+ xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_2msl))
- xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+ xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
}
diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h
index 0da58fd8..bb78062d 100644
--- a/freebsd/sys/netinet/tcp_timer.h
+++ b/freebsd/sys/netinet/tcp_timer.h
@@ -76,9 +76,8 @@
#define TCPTV_SRTTBASE 0 /* base roundtrip time;
if 0, no idea yet */
#define TCPTV_RTOBASE ( 3*hz) /* assumed RTO if no info */
-#define TCPTV_SRTTDFLT ( 3*hz) /* assumed RTT if no info */
-#define TCPTV_PERSMIN ( 5*hz) /* retransmit persistence */
+#define TCPTV_PERSMIN ( 5*hz) /* minimum persist interval */
#define TCPTV_PERSMAX ( 60*hz) /* maximum persist interval */
#define TCPTV_KEEP_INIT ( 75*hz) /* initial connect keepalive */
@@ -122,7 +121,7 @@
#ifdef TCPTIMERS
static const char *tcptimers[] =
- { "REXMT", "PERSIST", "KEEP", "2MSL" };
+ { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" };
#endif
/*
@@ -146,12 +145,27 @@ struct tcp_timer {
struct callout tt_keep; /* keepalive */
struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
struct callout tt_delack; /* delayed ACK timer */
+ uint32_t tt_flags; /* Timers flags */
+ uint32_t tt_draincnt; /* Count being drained */
};
-#define TT_DELACK 0x01
-#define TT_REXMT 0x02
-#define TT_PERSIST 0x04
-#define TT_KEEP 0x08
-#define TT_2MSL 0x10
+
+/*
+ * Flags for the tt_flags field.
+ */
+#define TT_DELACK 0x0001
+#define TT_REXMT 0x0002
+#define TT_PERSIST 0x0004
+#define TT_KEEP 0x0008
+#define TT_2MSL 0x0010
+#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
+
+#define TT_DELACK_RST 0x0100
+#define TT_REXMT_RST 0x0200
+#define TT_PERSIST_RST 0x0400
+#define TT_KEEP_RST 0x0800
+#define TT_2MSL_RST 0x1000
+
+#define TT_STOPPED 0x00010000
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
@@ -159,6 +173,8 @@ struct tcp_timer {
#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+extern int tcp_persmin; /* minimum persist interval */
+extern int tcp_persmax; /* maximum persist interval */
extern int tcp_keepinit; /* time to establish connection */
extern int tcp_keepidle; /* time before keepalive probes begin */
extern int tcp_keepintvl; /* time between keepalive probes */
@@ -170,14 +186,19 @@ extern int tcp_rexmit_slop;
extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
+extern int tcp_syn_backoff[];
extern int tcp_finwait2_timeout;
extern int tcp_fast_finwait2_recycle;
+int tcp_inpinfo_lock_add(struct inpcb *inp);
+void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp);
+
void tcp_timer_init(void);
void tcp_timer_2msl(void *xtp);
+void tcp_timer_discard(void *);
struct tcptw *
- tcp_tw_2msl_scan(int _reuse); /* XXX temporary */
+ tcp_tw_2msl_scan(int reuse); /* XXX temporary? */
void tcp_timer_keep(void *xtp);
void tcp_timer_persist(void *xtp);
void tcp_timer_rexmt(void *xtp);
diff --git a/freebsd/sys/netinet/tcp_timewait.c b/freebsd/sys/netinet/tcp_timewait.c
index 9034fab4..330e842e 100644
--- a/freebsd/sys/netinet/tcp_timewait.c
+++ b/freebsd/sys/netinet/tcp_timewait.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -93,20 +94,41 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
static VNET_DEFINE(uma_zone_t, tcptw_zone);
-#define V_tcptw_zone VNET(tcptw_zone)
+#define V_tcptw_zone VNET(tcptw_zone)
static int maxtcptw;
/*
* The timed wait queue contains references to each of the TCP sessions
* currently in the TIME_WAIT state. The queue pointers, including the
* queue pointers in each tcptw structure, are protected using the global
- * tcbinfo lock, which must be held over queue iteration and modification.
+ * timewait lock, which must be held over queue iteration and modification.
+ *
+ * Rules on tcptw usage:
+ * - a inpcb is always freed _after_ its tcptw
+ * - a tcptw relies on its inpcb reference counting for memory stability
+ * - a tcptw is dereferenceable only while its inpcb is locked
*/
static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
-#define V_twq_2msl VNET(twq_2msl)
+#define V_twq_2msl VNET(twq_2msl)
+
+/* Global timewait lock */
+static VNET_DEFINE(struct rwlock, tw_lock);
+#define V_tw_lock VNET(tw_lock)
+
+#define TW_LOCK_INIT(tw, d) rw_init_flags(&(tw), (d), 0)
+#define TW_LOCK_DESTROY(tw) rw_destroy(&(tw))
+#define TW_RLOCK(tw) rw_rlock(&(tw))
+#define TW_WLOCK(tw) rw_wlock(&(tw))
+#define TW_RUNLOCK(tw) rw_runlock(&(tw))
+#define TW_WUNLOCK(tw) rw_wunlock(&(tw))
+#define TW_LOCK_ASSERT(tw) rw_assert(&(tw), RA_LOCKED)
+#define TW_RLOCK_ASSERT(tw) rw_assert(&(tw), RA_RLOCKED)
+#define TW_WLOCK_ASSERT(tw) rw_assert(&(tw), RA_WLOCKED)
+#define TW_UNLOCK_ASSERT(tw) rw_assert(&(tw), RA_UNLOCKED)
static void tcp_tw_2msl_reset(struct tcptw *, int);
-static void tcp_tw_2msl_stop(struct tcptw *);
+static void tcp_tw_2msl_stop(struct tcptw *, int);
+static int tcp_twrespond(struct tcptw *, int);
static int
tcptw_auto_size(void)
@@ -149,7 +171,7 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
VNET_DEFINE(int, nolocaltimewait) = 0;
#define V_nolocaltimewait VNET(nolocaltimewait)
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nolocaltimewait), 0,
"Do not create compressed TCP TIME_WAIT entries for local connections");
@@ -166,13 +188,14 @@ tcp_tw_init(void)
{
V_tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw);
if (maxtcptw == 0)
uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
else
uma_zone_set_max(V_tcptw_zone, maxtcptw);
TAILQ_INIT(&V_twq_2msl);
+ TW_LOCK_INIT(V_tw_lock, "tcptw");
}
#ifdef VIMAGE
@@ -181,11 +204,12 @@ tcp_tw_destroy(void)
{
struct tcptw *tw;
- INP_INFO_WLOCK(&V_tcbinfo);
- while((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
+ INP_INFO_RLOCK(&V_tcbinfo);
+ while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone);
}
#endif
@@ -206,7 +230,7 @@ tcp_twstart(struct tcpcb *tp)
int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* tcp_tw_2msl_reset(). */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if (V_nolocaltimewait) {
@@ -229,8 +253,23 @@ tcp_twstart(struct tcpcb *tp)
}
}
+
+ /*
+ * For use only by DTrace. We do not reference the state
+ * after this point so modifying it in place is not a problem.
+ */
+ tcp_state_change(tp, TCPS_TIME_WAIT);
+
tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
if (tw == NULL) {
+ /*
+ * Reached limit on total number of TIMEWAIT connections
+ * allowed. Remove a connection from TIMEWAIT queue in LRU
+ * fashion to make room for this connection.
+ *
+ * XXX: Check if it possible to always have enough room
+ * in advance based on guarantees provided by uma_zalloc().
+ */
tw = tcp_tw_2msl_scan(1);
if (tw == NULL) {
tp = tcp_close(tp);
@@ -239,7 +278,12 @@ tcp_twstart(struct tcpcb *tp)
return;
}
}
+ /*
+ * The tcptw will hold a reference on its inpcb until tcp_twclose
+ * is called
+ */
tw->tw_inpcb = inp;
+ in_pcbref(inp); /* Reference from tw */
/*
* Recover last window size sent.
@@ -313,53 +357,19 @@ tcp_twstart(struct tcpcb *tp)
INP_WUNLOCK(inp);
}
-#if 0
-/*
- * The appromixate rate of ISN increase of Microsoft TCP stacks;
- * the actual rate is slightly higher due to the addition of
- * random positive increments.
- *
- * Most other new OSes use semi-randomized ISN values, so we
- * do not need to worry about them.
- */
-#define MS_ISN_BYTES_PER_SECOND 250000
-
-/*
- * Determine if the ISN we will generate has advanced beyond the last
- * sequence number used by the previous connection. If so, indicate
- * that it is safe to recycle this tw socket by returning 1.
- */
-int
-tcp_twrecycleable(struct tcptw *tw)
-{
- tcp_seq new_iss = tw->iss;
- tcp_seq new_irs = tw->irs;
-
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
- new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
-
- if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt))
- return (1);
- else
- return (0);
-}
-#endif
-
/*
* Returns 1 if the TIME_WAIT state was killed and we should start over,
* looking for a pcb in the listen state. Returns 0 otherwise.
*/
int
-tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
+tcp_twcheck(struct inpcb *inp, struct tcpopt *to __unused, struct tcphdr *th,
struct mbuf *m, int tlen)
{
struct tcptw *tw;
int thflags;
tcp_seq seq;
- /* tcbinfo lock required for tcp_twclose(), tcp_tw_2msl_reset(). */
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
/*
@@ -460,11 +470,10 @@ tcp_twclose(struct tcptw *tw, int reuse)
inp = tw->tw_inpcb;
KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* tcp_tw_2msl_stop(). */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* in_pcbfree() */
INP_WLOCK_ASSERT(inp);
- tw->tw_inpcb = NULL;
- tcp_tw_2msl_stop(tw);
+ tcp_tw_2msl_stop(tw, reuse);
inp->inp_ppcb = NULL;
in_pcbdrop(inp);
@@ -493,17 +502,17 @@ tcp_twclose(struct tcptw *tw, int reuse)
*/
INP_WUNLOCK(inp);
}
- } else
+ } else {
+ /*
+ * The socket has been already cleaned-up for us, only free the
+ * inpcb.
+ */
in_pcbfree(inp);
+ }
TCPSTAT_INC(tcps_closed);
- crfree(tw->tw_cred);
- tw->tw_cred = NULL;
- if (reuse)
- return;
- uma_zfree(V_tcptw_zone, tw);
}
-int
+static int
tcp_twrespond(struct tcptw *tw, int flags)
{
struct inpcb *inp = tw->tw_inpcb;
@@ -525,7 +534,7 @@ tcp_twrespond(struct tcptw *tw, int flags)
INP_WLOCK_ASSERT(inp);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
m->m_data += max_linkhdr;
@@ -596,9 +605,9 @@ tcp_twrespond(struct tcptw *tw, int flags)
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
- ip->ip_len = m->m_pkthdr.len;
+ ip->ip_len = htons(m->m_pkthdr.len);
if (V_path_mtu_discovery)
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
error = ip_output(m, inp->inp_options, NULL,
((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, inp);
@@ -616,36 +625,114 @@ static void
tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
{
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tw->tw_inpcb);
+
+ TW_WLOCK(V_tw_lock);
if (rearm)
TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
tw->tw_time = ticks + 2 * tcp_msl;
TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl);
+ TW_WUNLOCK(V_tw_lock);
}
static void
-tcp_tw_2msl_stop(struct tcptw *tw)
+tcp_tw_2msl_stop(struct tcptw *tw, int reuse)
{
+ struct ucred *cred;
+ struct inpcb *inp;
+ int released;
+
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ TW_WLOCK(V_tw_lock);
+ inp = tw->tw_inpcb;
+ tw->tw_inpcb = NULL;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
+ cred = tw->tw_cred;
+ tw->tw_cred = NULL;
+ TW_WUNLOCK(V_tw_lock);
+
+ if (cred != NULL)
+ crfree(cred);
+
+ released = in_pcbrele_wlocked(inp);
+ KASSERT(!released, ("%s: inp should not be released here", __func__));
+
+ if (!reuse)
+ uma_zfree(V_tcptw_zone, tw);
+ TCPSTATES_DEC(TCPS_TIME_WAIT);
}
struct tcptw *
tcp_tw_2msl_scan(int reuse)
{
struct tcptw *tw;
+ struct inpcb *inp;
+
+#ifdef INVARIANTS
+ if (reuse) {
+ /*
+ * Exclusive pcbinfo lock is not required in reuse case even if
+ * two inpcb locks can be acquired simultaneously:
+ * - the inpcb transitioning to TIME_WAIT state in
+ * tcp_tw_start(),
+ * - the inpcb closed by tcp_twclose().
+ *
+ * It is because only inpcbs in FIN_WAIT2 or CLOSING states can
+ * transition in TIME_WAIT state. Then a pcbcb cannot be in
+ * TIME_WAIT list and transitioning to TIME_WAIT state at same
+ * time.
+ */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
for (;;) {
+ TW_RLOCK(V_tw_lock);
tw = TAILQ_FIRST(&V_twq_2msl);
- if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0))
+ if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
+ TW_RUNLOCK(V_tw_lock);
break;
- INP_WLOCK(tw->tw_inpcb);
- tcp_twclose(tw, reuse);
- if (reuse)
- return (tw);
+ }
+ KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
+ __func__));
+
+ inp = tw->tw_inpcb;
+ in_pcbref(inp);
+ TW_RUNLOCK(V_tw_lock);
+
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
+
+ INP_WLOCK(inp);
+ tw = intotw(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ KASSERT(tw == NULL, ("%s: held last inp "
+ "reference but tw not NULL", __func__));
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ continue;
+ }
+
+ if (tw == NULL) {
+ /* tcp_twclose() has already been called */
+ INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ continue;
+ }
+
+ tcp_twclose(tw, reuse);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ if (reuse)
+ return tw;
+ } else {
+ /* INP_INFO lock is busy, continue later. */
+ INP_WLOCK(inp);
+ if (!in_pcbrele_wlocked(inp))
+ INP_WUNLOCK(inp);
+ break;
+ }
}
- return (NULL);
+
+ return NULL;
}
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
index 61711a6e..d5fa680f 100644
--- a/freebsd/sys/netinet/tcp_usrreq.c
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/limits.h>
#include <sys/malloc.h>
+#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
@@ -66,11 +67,12 @@ __FBSDID("$FreeBSD$");
#endif
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
-#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -81,11 +83,19 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
+#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
+#include <netinet/cc/cc.h>
+#ifdef TCPPCAP
+#include <netinet/tcp_pcap.h>
+#endif
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
@@ -147,6 +157,7 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
tp = intotcpcb(inp);
out:
TCPDEBUG2(PRU_ATTACH);
+ TCP_PROBE2(debug__user, tp, PRU_ATTACH);
return error;
}
@@ -164,7 +175,7 @@ tcp_detach(struct socket *so, struct inpcb *inp)
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
@@ -184,6 +195,21 @@ tcp_detach(struct socket *so, struct inpcb *inp)
* present until timewait ends.
*
* XXXRW: Would it be cleaner to free the tcptw here?
+ *
+ * Astute question indeed, from twtcp perspective there are
+ * three cases to consider:
+ *
+ * #1 tcp_detach is called at tcptw creation time by
+ * tcp_twstart, then do not discard the newly created tcptw
+ * and leave inpcb present until timewait ends
+ * #2 tcp_detach is called at timewait end (or reuse) by
+ * tcp_twclose, then the tcptw has already been discarded
+ * (or reused) and inpcb is freed here
+ * #3 tcp_detach is called() after timewait ends (or reuse)
+ * (e.g. by soclose), then tcptw has already been discarded
+ * (or reused) and inpcb is freed here
+ *
+ * In all three cases the tcptw should not be freed here.
*/
if (inp->inp_flags & INP_DROPPED) {
KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
@@ -227,15 +253,20 @@ static void
tcp_usr_detach(struct socket *so)
{
struct inpcb *inp;
+ int rlock = 0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
+ INP_INFO_RLOCK(&V_tcbinfo);
+ rlock = 1;
+ }
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (rlock)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
#ifdef INET
@@ -276,6 +307,7 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
+ TCP_PROBE2(debug__user, tp, PRU_BIND);
INP_WUNLOCK(inp);
return (error);
@@ -336,6 +368,7 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
+ TCP_PROBE2(debug__user, tp, PRU_BIND);
INP_WUNLOCK(inp);
return (error);
}
@@ -369,7 +402,7 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
- tp->t_state = TCPS_LISTEN;
+ tcp_state_change(tp, TCPS_LISTEN);
solisten_proto(so, backlog);
#ifdef TCP_OFFLOAD
if ((so->so_options & SO_NO_OFFLOAD) == 0)
@@ -378,8 +411,13 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
+ TCP_PROBE2(debug__user, tp, PRU_LISTEN);
INP_WUNLOCK(inp);
return (error);
}
@@ -414,7 +452,7 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
}
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
- tp->t_state = TCPS_LISTEN;
+ tcp_state_change(tp, TCPS_LISTEN);
solisten_proto(so, backlog);
#ifdef TCP_OFFLOAD
if ((so->so_options & SO_NO_OFFLOAD) == 0)
@@ -423,8 +461,13 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
+ TCP_PROBE2(debug__user, tp, PRU_LISTEN);
INP_WUNLOCK(inp);
return (error);
}
@@ -462,8 +505,12 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
+ if (inp->inp_flags & INP_TIMEWAIT) {
+ error = EADDRINUSE;
+ goto out;
+ }
+ if (inp->inp_flags & INP_DROPPED) {
+ error = ECONNREFUSED;
goto out;
}
tp = intotcpcb(inp);
@@ -477,9 +524,10 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
+ TCP_PROBE2(debug__user, tp, PRU_CONNECT);
INP_WUNLOCK(inp);
return (error);
}
@@ -509,8 +557,12 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
+ if (inp->inp_flags & INP_TIMEWAIT) {
+ error = EADDRINUSE;
+ goto out;
+ }
+ if (inp->inp_flags & INP_DROPPED) {
+ error = ECONNREFUSED;
goto out;
}
tp = intotcpcb(inp);
@@ -543,7 +595,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
goto out;
}
#endif
@@ -561,10 +613,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
+ TCP_PROBE2(debug__user, tp, PRU_CONNECT);
INP_WUNLOCK(inp);
return (error);
}
@@ -589,11 +642,13 @@ tcp_usr_disconnect(struct socket *so)
int error = 0;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ if (inp->inp_flags & INP_TIMEWAIT)
+ goto out;
+ if (inp->inp_flags & INP_DROPPED) {
error = ECONNRESET;
goto out;
}
@@ -602,8 +657,9 @@ tcp_usr_disconnect(struct socket *so)
tcp_disconnect(tp);
out:
TCPDEBUG2(PRU_DISCONNECT);
+ TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -611,13 +667,6 @@ out:
/*
* Accept a connection. Essentially all the work is done at higher levels;
* just return the address of the peer, storing through addr.
- *
- * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
- * and is described in detail in the commit log entry for r175612. Acquiring
- * it delays an accept(2) racing with sonewconn(), which inserts the socket
- * before the inpcb address/port fields are initialized. A better fix would
- * prevent the socket from being placed in the listen queue until all fields
- * are fully initialized.
*/
static int
tcp_usr_accept(struct socket *so, struct sockaddr **nam)
@@ -634,7 +683,6 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -653,8 +701,8 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
out:
TCPDEBUG2(PRU_ACCEPT);
+ TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (error == 0)
*nam = in_sockaddr(port, &addr);
return error;
@@ -704,6 +752,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
out:
TCPDEBUG2(PRU_ACCEPT);
+ TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
if (error == 0) {
@@ -727,7 +776,7 @@ tcp_usr_shutdown(struct socket *so)
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
@@ -740,12 +789,13 @@ tcp_usr_shutdown(struct socket *so)
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
+ TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -770,15 +820,28 @@ tcp_usr_rcvd(struct socket *so, int flags)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+#ifdef TCP_RFC7413
+ /*
+ * For passively-created TFO connections, don't attempt a window
+ * update while still in SYN_RECEIVED as this may trigger an early
+ * SYN|ACK. It is preferable to have the SYN|ACK be sent along with
+ * application response data, or failing that, when the DELACK timer
+ * expires.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ goto out;
+#endif
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
tcp_offload_rcvd(tp);
else
#endif
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
+ TCP_PROBE2(debug__user, tp, PRU_RCVD);
INP_WUNLOCK(inp);
return (error);
}
@@ -807,14 +870,18 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* this call.
*/
if (flags & PRUS_EOF)
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
if (control)
m_freem(control);
- if (m)
+ /*
+ * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible
+ * for freeing memory.
+ */
+ if (m && (flags & PRUS_NOTREADY) == 0)
m_freem(m);
error = ECONNRESET;
goto out;
@@ -836,13 +903,12 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
m_freem(control); /* empty control, just free it */
}
if (!(flags & PRUS_OOB)) {
- sbappendstream(&so->so_snd, m);
+ sbappendstream(&so->so_snd, m, flags);
if (nam && tp->t_state < TCPS_SYN_SENT) {
/*
* Do implied connect if not yet connected,
* initialize window to default value, and
- * initialize maxseg/maxopd using peer's cached
- * MSS.
+ * initialize maxseg using peer's cached MSS.
*/
#ifdef INET6
if (isipv6)
@@ -864,14 +930,15 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* Close the send side of the connection after
* the data is sent.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
socantsendmore(so);
tcp_usrclosed(tp);
}
- if (!(inp->inp_flags & INP_DROPPED)) {
+ if (!(inp->inp_flags & INP_DROPPED) &&
+ !(flags & PRUS_NOTREADY)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -894,14 +961,13 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
- sbappendstream_locked(&so->so_snd, m);
+ sbappendstream_locked(&so->so_snd, m, flags);
SOCKBUF_UNLOCK(&so->so_snd);
if (nam && tp->t_state < TCPS_SYN_SENT) {
/*
* Do implied connect if not yet connected,
* initialize window to default value, and
- * initialize maxseg/maxopd using peer's cached
- * MSS.
+ * initialize maxseg using peer's cached MSS.
*/
#ifdef INET6
if (isipv6)
@@ -918,17 +984,48 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tcp_mss(tp, -1);
}
- tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
- tp->t_flags |= TF_FORCEDATA;
- error = tcp_output(tp);
- tp->t_flags &= ~TF_FORCEDATA;
+ tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
+ if (!(flags & PRUS_NOTREADY)) {
+ tp->t_flags |= TF_FORCEDATA;
+ error = tp->t_fb->tfb_tcp_output(tp);
+ tp->t_flags &= ~TF_FORCEDATA;
+ }
}
out:
TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
+ TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
+ ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
if (flags & PRUS_EOF)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ return (error);
+}
+
+static int
+tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
+{
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ int error;
+
+ inp = sotoinpcb(so);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
+ for (int i = 0; i < count; i++)
+ m = m_free(m);
+ return (ECONNRESET);
+ }
+ tp = intotcpcb(inp);
+
+ SOCKBUF_LOCK(&so->so_snd);
+ error = sbready(&so->so_snd, m, count);
+ SOCKBUF_UNLOCK(&so->so_snd);
+ if (error == 0)
+ error = tp->t_fb->tfb_tcp_output(tp);
+ INP_WUNLOCK(inp);
+
return (error);
}
@@ -945,7 +1042,7 @@ tcp_usr_abort(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL"));
@@ -959,6 +1056,7 @@ tcp_usr_abort(struct socket *so)
TCPDEBUG1();
tcp_drop(tp, ECONNABORTED);
TCPDEBUG2(PRU_ABORT);
+ TCP_PROBE2(debug__user, tp, PRU_ABORT);
}
if (!(inp->inp_flags & INP_DROPPED)) {
SOCK_LOCK(so);
@@ -967,7 +1065,7 @@ tcp_usr_abort(struct socket *so)
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
/*
@@ -983,7 +1081,7 @@ tcp_usr_close(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL"));
@@ -998,6 +1096,7 @@ tcp_usr_close(struct socket *so)
TCPDEBUG1();
tcp_disconnect(tp);
TCPDEBUG2(PRU_CLOSE);
+ TCP_PROBE2(debug__user, tp, PRU_CLOSE);
}
if (!(inp->inp_flags & INP_DROPPED)) {
SOCK_LOCK(so);
@@ -1006,7 +1105,7 @@ tcp_usr_close(struct socket *so)
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
/*
@@ -1047,6 +1146,7 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
out:
TCPDEBUG2(PRU_RCVOOB);
+ TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
INP_WUNLOCK(inp);
return (error);
}
@@ -1066,6 +1166,7 @@ struct pr_usrreqs tcp_usrreqs = {
.pru_rcvd = tcp_usr_rcvd,
.pru_rcvoob = tcp_usr_rcvoob,
.pru_send = tcp_usr_send,
+ .pru_ready = tcp_usr_ready,
.pru_shutdown = tcp_usr_shutdown,
.pru_sockaddr = in_getsockaddr,
.pru_sosetlabel = in_pcbsosetlabel,
@@ -1088,6 +1189,7 @@ struct pr_usrreqs tcp6_usrreqs = {
.pru_rcvd = tcp_usr_rcvd,
.pru_rcvoob = tcp_usr_rcvoob,
.pru_send = tcp_usr_send,
+ .pru_ready = tcp_usr_ready,
.pru_shutdown = tcp_usr_shutdown,
.pru_sockaddr = in6_mapped_sockaddr,
.pru_sosetlabel = in_pcbsosetlabel,
@@ -1154,7 +1256,7 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
- tp->t_state = TCPS_SYN_SENT;
+ tcp_state_change(tp, TCPS_SYN_SENT);
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1170,10 +1272,7 @@ out:
static int
tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
{
- struct inpcb *inp = tp->t_inpcb, *oinp;
- struct socket *so = inp->inp_socket;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
- struct in6_addr addr6;
+ struct inpcb *inp = tp->t_inpcb;
int error;
INP_WLOCK_ASSERT(inp);
@@ -1184,39 +1283,9 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
if (error)
goto out;
}
-
- /*
- * Cannot simply call in_pcbconnect, because there might be an
- * earlier incarnation of this same connection still in
- * TIME_WAIT state, creating an ADDRINUSE error.
- * in6_pcbladdr() also handles scope zone IDs.
- *
- * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked()
- * outside of in6_pcb.c if there were an in6_pcbconnect_setup().
- */
- error = in6_pcbladdr(inp, nam, &addr6);
- if (error)
- goto out;
- oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo,
- &sin6->sin6_addr, sin6->sin6_port,
- IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
- ? &addr6
- : &inp->in6p_laddr,
- inp->inp_lport, 0, NULL);
- if (oinp) {
- error = EADDRINUSE;
+ error = in6_pcbconnect(inp, nam, td->td_ucred);
+ if (error != 0)
goto out;
- }
- if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
- inp->in6p_laddr = addr6;
- inp->in6p_faddr = sin6->sin6_addr;
- inp->inp_fport = sin6->sin6_port;
- /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
- inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
- if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
- inp->inp_flow |=
- (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
- in_pcbrehash(inp);
INP_HASH_WUNLOCK(&V_tcbinfo);
/* Compute window scaling to request. */
@@ -1224,9 +1293,9 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
(TCP_MAXWIN << tp->request_r_scale) < sb_max)
tp->request_r_scale++;
- soisconnecting(so);
+ soisconnecting(inp->inp_socket);
TCPSTAT_INC(tcps_connattempt);
- tp->t_state = TCPS_SYN_SENT;
+ tcp_state_change(tp, TCPS_SYN_SENT);
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1294,25 +1363,25 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
* has to revalidate that the connection is still valid for the socket
* option.
*/
-#define INP_WLOCK_RECHECK(inp) do { \
+#define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do { \
INP_WLOCK(inp); \
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \
INP_WUNLOCK(inp); \
+ cleanup; \
return (ECONNRESET); \
} \
tp = intotcpcb(inp); \
} while(0)
+#define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
- int error, opt, optval;
- u_int ui;
+ int error;
struct inpcb *inp;
struct tcpcb *tp;
- struct tcp_info ti;
- char buf[TCP_CA_NAME_MAX];
- struct cc_algo *algo;
+ struct tcp_function_block *blk;
+ struct tcp_function_set fsn;
error = 0;
inp = sotoinpcb(so);
@@ -1340,6 +1409,128 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(inp);
return (ECONNRESET);
}
+ tp = intotcpcb(inp);
+ /*
+ * Protect the TCP option TCP_FUNCTION_BLK so
+ * that a sub-function can *never* overwrite this.
+ */
+ if ((sopt->sopt_dir == SOPT_SET) &&
+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &fsn, sizeof fsn,
+ sizeof fsn);
+ if (error)
+ return (error);
+ INP_WLOCK_RECHECK(inp);
+ blk = find_and_ref_tcp_functions(&fsn);
+ if (blk == NULL) {
+ INP_WUNLOCK(inp);
+ return (ENOENT);
+ }
+ if (tp->t_fb == blk) {
+ /* You already have this */
+ refcount_release(&blk->tfb_refcnt);
+ INP_WUNLOCK(inp);
+ return (0);
+ }
+ if (tp->t_state != TCPS_CLOSED) {
+ int error=EINVAL;
+ /*
+ * The user has advanced the state
+ * past the initial point, we may not
+ * be able to switch.
+ */
+ if (blk->tfb_tcp_handoff_ok != NULL) {
+ /*
+ * Does the stack provide a
+ * query mechanism, if so it may
+ * still be possible?
+ */
+ error = (*blk->tfb_tcp_handoff_ok)(tp);
+ }
+ if (error) {
+ refcount_release(&blk->tfb_refcnt);
+ INP_WUNLOCK(inp);
+ return(error);
+ }
+ }
+ if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+ refcount_release(&blk->tfb_refcnt);
+ INP_WUNLOCK(inp);
+ return (ENOENT);
+ }
+ /*
+ * Release the old refcnt, the
+ * lookup acquired a ref on the
+ * new one already.
+ */
+ if (tp->t_fb->tfb_tcp_fb_fini) {
+ /*
+ * Tell the stack to cleanup with 0 i.e.
+ * the tcb is not going away.
+ */
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
+ }
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_fb = blk;
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
+ sopt->sopt_name);
+ }
+#endif
+ INP_WUNLOCK(inp);
+ return (error);
+ } else if ((sopt->sopt_dir == SOPT_GET) &&
+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
+ strcpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name);
+ fsn.pcbcnt = tp->t_fb->tfb_refcnt;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &fsn, sizeof fsn);
+ return (error);
+ }
+ /* Pass in the INP locked, called must unlock it */
+ return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp));
+}
+
+int
+tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
+{
+ int error, opt, optval;
+ u_int ui;
+ struct tcp_info ti;
+ struct cc_algo *algo;
+ char *pbuf, buf[TCP_CA_NAME_MAX];
+ size_t len;
+
+ /*
+ * For TCP_CCALGOOPT forward the control to CC module, for both
+ * SOPT_SET and SOPT_GET.
+ */
+ switch (sopt->sopt_name) {
+ case TCP_CCALGOOPT:
+ INP_WUNLOCK(inp);
+ pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
+ error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
+ sopt->sopt_valsize);
+ if (error) {
+ free(pbuf, M_TEMP);
+ return (error);
+ }
+ INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
+ if (CC_ALGO(tp)->ctl_output != NULL)
+ error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf);
+ else
+ error = ENOENT;
+ INP_WUNLOCK(inp);
+ if (error == 0 && sopt->sopt_dir == SOPT_GET)
+ error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
+ free(pbuf, M_TEMP);
+ return (error);
+ }
switch (sopt->sopt_dir) {
case SOPT_SET:
@@ -1408,7 +1599,7 @@ unlock_and_done:
else if (tp->t_flags & TF_NOPUSH) {
tp->t_flags &= ~TF_NOPUSH;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
}
goto unlock_and_done;
@@ -1434,50 +1625,45 @@ unlock_and_done:
case TCP_CONGESTION:
INP_WUNLOCK(inp);
- bzero(buf, sizeof(buf));
- error = sooptcopyin(sopt, &buf, sizeof(buf), 1);
+ error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
if (error)
break;
+ buf[sopt->sopt_valsize] = '\0';
INP_WLOCK_RECHECK(inp);
+ CC_LIST_RLOCK();
+ STAILQ_FOREACH(algo, &cc_list, entries)
+ if (strncmp(buf, algo->name,
+ TCP_CA_NAME_MAX) == 0)
+ break;
+ CC_LIST_RUNLOCK();
+ if (algo == NULL) {
+ INP_WUNLOCK(inp);
+ error = EINVAL;
+ break;
+ }
/*
- * Return EINVAL if we can't find the requested cc algo.
+ * We hold a write lock over the tcb so it's safe to
+ * do these things without ordering concerns.
*/
- error = EINVAL;
- CC_LIST_RLOCK();
- STAILQ_FOREACH(algo, &cc_list, entries) {
- if (strncmp(buf, algo->name, TCP_CA_NAME_MAX)
- == 0) {
- /* We've found the requested algo. */
- error = 0;
- /*
- * We hold a write lock over the tcb
- * so it's safe to do these things
- * without ordering concerns.
- */
- if (CC_ALGO(tp)->cb_destroy != NULL)
- CC_ALGO(tp)->cb_destroy(tp->ccv);
- CC_ALGO(tp) = algo;
- /*
- * If something goes pear shaped
- * initialising the new algo,
- * fall back to newreno (which
- * does not require initialisation).
- */
- if (algo->cb_init != NULL)
- if (algo->cb_init(tp->ccv) > 0) {
- CC_ALGO(tp) = &newreno_cc_algo;
- /*
- * The only reason init
- * should fail is
- * because of malloc.
- */
- error = ENOMEM;
- }
- break; /* Break the STAILQ_FOREACH. */
- }
+ if (CC_ALGO(tp)->cb_destroy != NULL)
+ CC_ALGO(tp)->cb_destroy(tp->ccv);
+ CC_ALGO(tp) = algo;
+ /*
+ * If something goes pear shaped initialising the new
+ * algo, fall back to newreno (which does not
+ * require initialisation).
+ */
+ if (algo->cb_init != NULL &&
+ algo->cb_init(tp->ccv) != 0) {
+ CC_ALGO(tp) = &newreno_cc_algo;
+ /*
+ * The only reason init should fail is
+ * because of malloc.
+ */
+ error = ENOMEM;
}
- CC_LIST_RUNLOCK();
- goto unlock_and_done;
+ INP_WUNLOCK(inp);
+ break;
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
@@ -1535,8 +1721,49 @@ unlock_and_done:
(TP_MAXIDLE(tp) > 0))
tcp_timer_activate(tp, TT_2MSL,
TP_MAXIDLE(tp));
+ goto unlock_and_done;
+
+#ifdef TCPPCAP
+ case TCP_PCAP_OUT:
+ case TCP_PCAP_IN:
INP_WUNLOCK(inp);
- break;
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ INP_WLOCK_RECHECK(inp);
+ if (optval >= 0)
+ tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
+ &(tp->t_outpkts) : &(tp->t_inpkts),
+ optval);
+ else
+ error = EINVAL;
+ goto unlock_and_done;
+#endif
+
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ INP_WUNLOCK(inp);
+ if (!V_tcp_fastopen_enabled)
+ return (EPERM);
+
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ INP_WLOCK_RECHECK(inp);
+ if (optval) {
+ tp->t_flags |= TF_FASTOPEN;
+ if ((tp->t_state == TCPS_LISTEN) &&
+ (tp->t_tfo_pending == NULL))
+ tp->t_tfo_pending =
+ tcp_fastopen_alloc_counter();
+ } else
+ tp->t_flags &= ~TF_FASTOPEN;
+ goto unlock_and_done;
+#endif
default:
INP_WUNLOCK(inp);
@@ -1582,11 +1809,48 @@ unlock_and_done:
error = sooptcopyout(sopt, &ti, sizeof ti);
break;
case TCP_CONGESTION:
- bzero(buf, sizeof(buf));
- strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
+ len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, buf, len + 1);
+ break;
+ case TCP_KEEPIDLE:
+ case TCP_KEEPINTVL:
+ case TCP_KEEPINIT:
+ case TCP_KEEPCNT:
+ switch (sopt->sopt_name) {
+ case TCP_KEEPIDLE:
+ ui = tp->t_keepidle / hz;
+ break;
+ case TCP_KEEPINTVL:
+ ui = tp->t_keepintvl / hz;
+ break;
+ case TCP_KEEPINIT:
+ ui = tp->t_keepinit / hz;
+ break;
+ case TCP_KEEPCNT:
+ ui = tp->t_keepcnt;
+ break;
+ }
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &ui, sizeof(ui));
+ break;
+#ifdef TCPPCAP
+ case TCP_PCAP_OUT:
+ case TCP_PCAP_IN:
+ optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
+ &(tp->t_outpkts) : &(tp->t_inpkts));
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
+#endif
+
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ optval = tp->t_flags & TF_FASTOPEN;
INP_WUNLOCK(inp);
- error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
break;
+#endif
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1597,6 +1861,7 @@ unlock_and_done:
return (error);
}
#undef INP_WLOCK_RECHECK
+#undef INP_WLOCK_RECHECK_CLEANUP
/*
* Attach TCP protocol to socket, allocating
@@ -1617,10 +1882,10 @@ tcp_attach(struct socket *so)
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
inp = sotoinpcb(so);
@@ -1636,12 +1901,13 @@ tcp_attach(struct socket *so)
if (tp == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ TCPSTATES_INC(TCPS_CLOSED);
return (0);
}
@@ -1659,7 +1925,7 @@ tcp_disconnect(struct tcpcb *tp)
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
/*
@@ -1679,7 +1945,7 @@ tcp_disconnect(struct tcpcb *tp)
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
}
@@ -1697,7 +1963,7 @@ static void
tcp_usrclosed(struct tcpcb *tp)
{
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
switch (tp->t_state) {
@@ -1705,9 +1971,9 @@ tcp_usrclosed(struct tcpcb *tp)
#ifdef TCP_OFFLOAD
tcp_offload_listen_stop(tp);
#endif
+ tcp_state_change(tp, TCPS_CLOSED);
/* FALLTHROUGH */
case TCPS_CLOSED:
- tp->t_state = TCPS_CLOSED;
tp = tcp_close(tp);
/*
* tcp_close() should never return NULL here as the socket is
@@ -1723,11 +1989,11 @@ tcp_usrclosed(struct tcpcb *tp)
break;
case TCPS_ESTABLISHED:
- tp->t_state = TCPS_FIN_WAIT_1;
+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
break;
case TCPS_CLOSE_WAIT:
- tp->t_state = TCPS_LAST_ACK;
+ tcp_state_change(tp, TCPS_LAST_ACK);
break;
}
if (tp->t_state >= TCPS_FIN_WAIT_2) {
@@ -1910,6 +2176,10 @@ db_print_tflags(u_int t_flags)
db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_FASTOPEN) {
+ db_printf("%sTF_FASTOPEN", comma ? ", " : "");
+ comma = 1;
+ }
}
static void
@@ -1984,8 +2254,8 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
"0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
db_print_indent(indent);
- db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n",
- tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
+ db_printf("t_rcvtime: %u t_startime: %u\n",
+ tp->t_rcvtime, tp->t_starttime);
db_print_indent(indent);
db_printf("t_rttime: %u t_rtsq: 0x%08x\n",
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
index dbd9ed11..5dcd35b8 100644
--- a/freebsd/sys/netinet/tcp_var.h
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -34,9 +34,11 @@
#define _NETINET_TCP_VAR_H_
#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
#ifdef _KERNEL
#include <net/vnet.h>
+#include <sys/mbuf.h>
/*
* Kernel variables for tcp.
@@ -73,7 +75,12 @@ struct sackhint {
tcp_seq last_sack_ack; /* Most recent/largest sacked ack */
int ispare; /* explicit pad for 64bit alignment */
- uint64_t _pad[2]; /* 1 sacked_bytes, 1 TBD */
+ int sacked_bytes; /*
+ * Total sacked bytes reported by the
+ * receiver via sack option
+ */
+ uint32_t _pad1[1]; /* TBD */
+ uint64_t _pad[1]; /* TBD */
};
struct tcptemp {
@@ -83,17 +90,75 @@ struct tcptemp {
#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */
-/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
-#ifdef INET6
-#define ND6_HINT(tp) \
-do { \
- if ((tp) && (tp)->t_inpcb && \
- ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \
- nd6_nud_hint(NULL, NULL, 0); \
-} while (0)
-#else
-#define ND6_HINT(tp)
-#endif
+/*
+ * TODO: We yet need to brave plowing in
+ * to tcp_input() and the pru_usrreq() block.
+ * Right now these go to the old standards which
+ * are somewhat ok, but in the long term may
+ * need to be changed. If we do tackle tcp_input()
+ * then we need to get rid of the tcp_do_segment()
+ * function below.
+ */
+/* Flags for tcp functions */
+#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
+struct tcpcb;
+struct inpcb;
+struct sockopt;
+struct socket;
+
+/*
+ * If defining the optional tcp_timers, in the
+ * tfb_tcp_timer_stop call you must use the
+ * callout_async_drain() function with the
+ * tcp_timer_discard callback. You should check
+ * the return of callout_async_drain() and if 0
+ * increment tt_draincnt. Since the timer sub-system
+ * does not know your callbacks you must provide a
+ * stop_all function that loops through and calls
+ * tcp_timer_stop() with each of your defined timers.
+ * Adding a tfb_tcp_handoff_ok function allows the socket
+ * option to change stacks to query you even if the
+ * connection is in a later stage. You return 0 to
+ * say you can take over and run your stack, you return
+ * non-zero (an error number) to say no you can't.
+ * If the function is undefined you can only change
+ * in the early states (before connect or listen).
+ * tfb_tcp_fb_fini is changed to add a flag to tell
+ * the old stack if the tcb is being destroyed or
+ * not. A one in the flag means the TCB is being
+ * destroyed, a zero indicates its transitioning to
+ * another stack (via socket option).
+ */
+struct tcp_function_block {
+ char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
+ int (*tfb_tcp_output)(struct tcpcb *);
+ void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *,
+ int, int, uint8_t,
+ int);
+ int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
+ struct inpcb *inp, struct tcpcb *tp);
+ /* Optional memory allocation/free routine */
+ void (*tfb_tcp_fb_init)(struct tcpcb *);
+ void (*tfb_tcp_fb_fini)(struct tcpcb *, int);
+ /* Optional timers, must define all if you define one */
+ int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
+ void (*tfb_tcp_timer_activate)(struct tcpcb *,
+ uint32_t, u_int);
+ int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
+ void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
+ void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
+ int (*tfb_tcp_handoff_ok)(struct tcpcb *);
+ volatile uint32_t tfb_refcnt;
+ uint32_t tfb_flags;
+};
+
+struct tcp_function {
+ TAILQ_ENTRY(tcp_function) tf_next;
+ struct tcp_function_block *tf_fb;
+};
+
+TAILQ_HEAD(tcp_funchead, tcp_function);
/*
* Tcp control block, one per tcp; fields:
@@ -113,7 +178,7 @@ struct tcpcb {
struct vnet *t_vnet; /* back pointer to parent vnet */
- tcp_seq snd_una; /* send unacknowledged */
+ tcp_seq snd_una; /* sent but unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
* used to recognize retransmits
*/
@@ -140,8 +205,6 @@ struct tcpcb {
u_long snd_spare2; /* unused */
tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
- u_int t_maxopd; /* mss plus options */
-
u_int t_rcvtime; /* inactivity time */
u_int t_starttime; /* time connection was established */
u_int t_rtttime; /* RTT measurement start time */
@@ -152,6 +215,7 @@ struct tcpcb {
int t_rxtcur; /* current retransmit value (ticks) */
u_int t_maxseg; /* maximum segment size */
+ u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */
int t_srtt; /* smoothed round-trip time */
int t_rttvar; /* variance in round-trip time */
@@ -208,13 +272,35 @@ struct tcpcb {
u_int t_keepintvl; /* interval between keepalives */
u_int t_keepcnt; /* number of keepalives before close */
- u_int t_tsomax; /* tso burst length limit */
-
- uint32_t t_ispare[7]; /* 5 UTO, 2 TBD */
- void *t_pspare2[4]; /* 4 TBD */
- uint64_t _pad[5]; /* 5 TBD (1-2 CC/RTT?) */
- uint32_t t_tsomaxsegcount; /* TSO maximum segment count */
- uint32_t t_tsomaxsegsize; /* TSO maximum segment size in bytes */
+ u_int t_tsomax; /* TSO total burst length limit in bytes */
+ u_int t_tsomaxsegcount; /* TSO maximum segment count */
+ u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */
+ u_int t_flags2; /* More tcpcb flags storage */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
+ uint64_t t_tfo_cookie; /* TCP Fast Open cookie */
+#else
+ uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
+#endif
+ struct tcp_function_block *t_fb;/* TCP function call block */
+ void *t_fb_ptr; /* Pointer to t_fb specific data */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */
+ void *t_pspare2[1]; /* 1 TCP_SIGNATURE */
+#else
+ void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */
+#endif
+#if defined(_KERNEL) && defined(TCPPCAP)
+ struct mbufq t_inpkts; /* List of saved input packets. */
+ struct mbufq t_outpkts; /* List of saved output packets. */
+#ifdef _LP64
+ uint64_t _pad[0]; /* all used! */
+#else
+ uint64_t _pad[2]; /* 2 are available */
+#endif /* _LP64 */
+#else
+ uint64_t _pad[6];
+#endif /* defined(_KERNEL) && defined(TCPPCAP) */
};
/*
@@ -249,6 +335,7 @@ struct tcpcb {
#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
+#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
@@ -286,6 +373,13 @@ struct tcpcb {
#endif /* TCP_SIGNATURE */
/*
+ * Flags for PLPMTU handling, t_flags2
+ */
+#define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */
+#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
+#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
+
+/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
* It's basically used to reduce the number of parameters
@@ -294,21 +388,24 @@ struct tcpcb {
* options in tcp_addoptions.
*/
struct tcpopt {
- u_int64_t to_flags; /* which options are present */
+ u_int32_t to_flags; /* which options are present */
#define TOF_MSS 0x0001 /* maximum segment size */
#define TOF_SCALE 0x0002 /* window scaling */
#define TOF_SACKPERM 0x0004 /* SACK permitted */
#define TOF_TS 0x0010 /* timestamp */
#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_MAXOPT 0x0100
+#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
+#define TOF_MAXOPT 0x0200
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
u_char *to_signature; /* pointer to the TCP-MD5 signature */
+ u_char *to_tfo_cookie; /* pointer to the TFO cookie */
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
+ u_int8_t to_tfo_len; /* TFO cookie length */
u_int32_t to_spare; /* UTO */
};
@@ -322,7 +419,6 @@ struct hc_metrics_lite { /* must stay in sync with hc_metrics */
u_long rmx_ssthresh; /* outbound gateway buffer limit */
u_long rmx_rtt; /* estimated round trip time */
u_long rmx_rttvar; /* estimated rtt variance */
- u_long rmx_bandwidth; /* estimated bandwidth */
u_long rmx_cwnd; /* congestion window */
u_long rmx_sendpipe; /* outbound delay-bandwidth product */
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
@@ -357,6 +453,8 @@ struct tcptw {
u_int t_starttime;
int tw_time;
TAILQ_ENTRY(tcptw) tw_2msl;
+ void *tw_pspare; /* TCP_SIGNATURE */
+ u_int *tw_spare; /* TCP_SIGNATURE */
};
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
@@ -404,125 +502,133 @@ struct tcptw {
* but that's inconvenient at the moment.
*/
struct tcpstat {
- u_long tcps_connattempt; /* connections initiated */
- u_long tcps_accepts; /* connections accepted */
- u_long tcps_connects; /* connections established */
- u_long tcps_drops; /* connections dropped */
- u_long tcps_conndrops; /* embryonic connections dropped */
- u_long tcps_minmssdrops; /* average minmss too low drops */
- u_long tcps_closed; /* conn. closed (includes drops) */
- u_long tcps_segstimed; /* segs where we tried to get rtt */
- u_long tcps_rttupdated; /* times we succeeded */
- u_long tcps_delack; /* delayed acks sent */
- u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
- u_long tcps_rexmttimeo; /* retransmit timeouts */
- u_long tcps_persisttimeo; /* persist timeouts */
- u_long tcps_keeptimeo; /* keepalive timeouts */
- u_long tcps_keepprobe; /* keepalive probes sent */
- u_long tcps_keepdrops; /* connections dropped in keepalive */
-
- u_long tcps_sndtotal; /* total packets sent */
- u_long tcps_sndpack; /* data packets sent */
- u_long tcps_sndbyte; /* data bytes sent */
- u_long tcps_sndrexmitpack; /* data packets retransmitted */
- u_long tcps_sndrexmitbyte; /* data bytes retransmitted */
- u_long tcps_sndrexmitbad; /* unnecessary packet retransmissions */
- u_long tcps_sndacks; /* ack-only packets sent */
- u_long tcps_sndprobe; /* window probes sent */
- u_long tcps_sndurg; /* packets sent with URG only */
- u_long tcps_sndwinup; /* window update-only packets sent */
- u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
-
- u_long tcps_rcvtotal; /* total packets received */
- u_long tcps_rcvpack; /* packets received in sequence */
- u_long tcps_rcvbyte; /* bytes received in sequence */
- u_long tcps_rcvbadsum; /* packets received with ccksum errs */
- u_long tcps_rcvbadoff; /* packets received with bad offset */
- u_long tcps_rcvmemdrop; /* packets dropped for lack of memory */
- u_long tcps_rcvshort; /* packets received too short */
- u_long tcps_rcvduppack; /* duplicate-only packets received */
- u_long tcps_rcvdupbyte; /* duplicate-only bytes received */
- u_long tcps_rcvpartduppack; /* packets with some duplicate data */
- u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
- u_long tcps_rcvoopack; /* out-of-order packets received */
- u_long tcps_rcvoobyte; /* out-of-order bytes received */
- u_long tcps_rcvpackafterwin; /* packets with data after window */
- u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */
- u_long tcps_rcvafterclose; /* packets rcvd after "close" */
- u_long tcps_rcvwinprobe; /* rcvd window probe packets */
- u_long tcps_rcvdupack; /* rcvd duplicate acks */
- u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */
- u_long tcps_rcvackpack; /* rcvd ack packets */
- u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */
- u_long tcps_rcvwinupd; /* rcvd window update packets */
- u_long tcps_pawsdrop; /* segments dropped due to PAWS */
- u_long tcps_predack; /* times hdr predict ok for acks */
- u_long tcps_preddat; /* times hdr predict ok for data pkts */
- u_long tcps_pcbcachemiss;
- u_long tcps_cachedrtt; /* times cached RTT in route updated */
- u_long tcps_cachedrttvar; /* times cached rttvar updated */
- u_long tcps_cachedssthresh; /* times cached ssthresh updated */
- u_long tcps_usedrtt; /* times RTT initialized from route */
- u_long tcps_usedrttvar; /* times RTTVAR initialized from rt */
- u_long tcps_usedssthresh; /* times ssthresh initialized from rt*/
- u_long tcps_persistdrop; /* timeout in persist state */
- u_long tcps_badsyn; /* bogus SYN, e.g. premature ACK */
- u_long tcps_mturesent; /* resends due to MTU discovery */
- u_long tcps_listendrop; /* listen queue overflows */
- u_long tcps_badrst; /* ignored RSTs in the window */
-
- u_long tcps_sc_added; /* entry added to syncache */
- u_long tcps_sc_retransmitted; /* syncache entry was retransmitted */
- u_long tcps_sc_dupsyn; /* duplicate SYN packet */
- u_long tcps_sc_dropped; /* could not reply to packet */
- u_long tcps_sc_completed; /* successful extraction of entry */
- u_long tcps_sc_bucketoverflow; /* syncache per-bucket limit hit */
- u_long tcps_sc_cacheoverflow; /* syncache cache limit hit */
- u_long tcps_sc_reset; /* RST removed entry from syncache */
- u_long tcps_sc_stale; /* timed out or listen socket gone */
- u_long tcps_sc_aborted; /* syncache entry aborted */
- u_long tcps_sc_badack; /* removed due to bad ACK */
- u_long tcps_sc_unreach; /* ICMP unreachable received */
- u_long tcps_sc_zonefail; /* zalloc() failed */
- u_long tcps_sc_sendcookie; /* SYN cookie sent */
- u_long tcps_sc_recvcookie; /* SYN cookie received */
-
- u_long tcps_hc_added; /* entry added to hostcache */
- u_long tcps_hc_bucketoverflow; /* hostcache per bucket limit hit */
-
- u_long tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */
+ uint64_t tcps_connattempt; /* connections initiated */
+ uint64_t tcps_accepts; /* connections accepted */
+ uint64_t tcps_connects; /* connections established */
+ uint64_t tcps_drops; /* connections dropped */
+ uint64_t tcps_conndrops; /* embryonic connections dropped */
+ uint64_t tcps_minmssdrops; /* average minmss too low drops */
+ uint64_t tcps_closed; /* conn. closed (includes drops) */
+ uint64_t tcps_segstimed; /* segs where we tried to get rtt */
+ uint64_t tcps_rttupdated; /* times we succeeded */
+ uint64_t tcps_delack; /* delayed acks sent */
+ uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
+ uint64_t tcps_rexmttimeo; /* retransmit timeouts */
+ uint64_t tcps_persisttimeo; /* persist timeouts */
+ uint64_t tcps_keeptimeo; /* keepalive timeouts */
+ uint64_t tcps_keepprobe; /* keepalive probes sent */
+ uint64_t tcps_keepdrops; /* connections dropped in keepalive */
+
+ uint64_t tcps_sndtotal; /* total packets sent */
+ uint64_t tcps_sndpack; /* data packets sent */
+ uint64_t tcps_sndbyte; /* data bytes sent */
+ uint64_t tcps_sndrexmitpack; /* data packets retransmitted */
+ uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */
+ uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */
+ uint64_t tcps_sndacks; /* ack-only packets sent */
+ uint64_t tcps_sndprobe; /* window probes sent */
+ uint64_t tcps_sndurg; /* packets sent with URG only */
+ uint64_t tcps_sndwinup; /* window update-only packets sent */
+ uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
+
+ uint64_t tcps_rcvtotal; /* total packets received */
+ uint64_t tcps_rcvpack; /* packets received in sequence */
+ uint64_t tcps_rcvbyte; /* bytes received in sequence */
+ uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */
+ uint64_t tcps_rcvbadoff; /* packets received with bad offset */
+ uint64_t tcps_rcvreassfull; /* packets dropped for no reass space */
+ uint64_t tcps_rcvshort; /* packets received too short */
+ uint64_t tcps_rcvduppack; /* duplicate-only packets received */
+ uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */
+ uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */
+ uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
+ uint64_t tcps_rcvoopack; /* out-of-order packets received */
+ uint64_t tcps_rcvoobyte; /* out-of-order bytes received */
+ uint64_t tcps_rcvpackafterwin; /* packets with data after window */
+ uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */
+ uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */
+ uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */
+ uint64_t tcps_rcvdupack; /* rcvd duplicate acks */
+ uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */
+ uint64_t tcps_rcvackpack; /* rcvd ack packets */
+ uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */
+ uint64_t tcps_rcvwinupd; /* rcvd window update packets */
+ uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */
+ uint64_t tcps_predack; /* times hdr predict ok for acks */
+ uint64_t tcps_preddat; /* times hdr predict ok for data pkts */
+ uint64_t tcps_pcbcachemiss;
+ uint64_t tcps_cachedrtt; /* times cached RTT in route updated */
+ uint64_t tcps_cachedrttvar; /* times cached rttvar updated */
+ uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */
+ uint64_t tcps_usedrtt; /* times RTT initialized from route */
+ uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */
+ uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/
+ uint64_t tcps_persistdrop; /* timeout in persist state */
+ uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */
+ uint64_t tcps_mturesent; /* resends due to MTU discovery */
+ uint64_t tcps_listendrop; /* listen queue overflows */
+ uint64_t tcps_badrst; /* ignored RSTs in the window */
+
+ uint64_t tcps_sc_added; /* entry added to syncache */
+ uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */
+ uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */
+ uint64_t tcps_sc_dropped; /* could not reply to packet */
+ uint64_t tcps_sc_completed; /* successful extraction of entry */
+ uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
+ uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */
+ uint64_t tcps_sc_reset; /* RST removed entry from syncache */
+ uint64_t tcps_sc_stale; /* timed out or listen socket gone */
+ uint64_t tcps_sc_aborted; /* syncache entry aborted */
+ uint64_t tcps_sc_badack; /* removed due to bad ACK */
+ uint64_t tcps_sc_unreach; /* ICMP unreachable received */
+ uint64_t tcps_sc_zonefail; /* zalloc() failed */
+ uint64_t tcps_sc_sendcookie; /* SYN cookie sent */
+ uint64_t tcps_sc_recvcookie; /* SYN cookie received */
+
+ uint64_t tcps_hc_added; /* entry added to hostcache */
+ uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
+
+ uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */
/* SACK related stats */
- u_long tcps_sack_recovery_episode; /* SACK recovery episodes */
- u_long tcps_sack_rexmits; /* SACK rexmit segments */
- u_long tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
- u_long tcps_sack_rcv_blocks; /* SACK blocks (options) received */
- u_long tcps_sack_send_blocks; /* SACK blocks (options) sent */
- u_long tcps_sack_sboverflow; /* times scoreboard overflowed */
+ uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
+ uint64_t tcps_sack_rexmits; /* SACK rexmit segments */
+ uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
+ uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */
+ uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */
+ uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */
/* ECN related stats */
- u_long tcps_ecn_ce; /* ECN Congestion Experienced */
- u_long tcps_ecn_ect0; /* ECN Capable Transport */
- u_long tcps_ecn_ect1; /* ECN Capable Transport */
- u_long tcps_ecn_shs; /* ECN successful handshakes */
- u_long tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */
+ uint64_t tcps_ecn_ce; /* ECN Congestion Experienced */
+ uint64_t tcps_ecn_ect0; /* ECN Capable Transport */
+ uint64_t tcps_ecn_ect1; /* ECN Capable Transport */
+ uint64_t tcps_ecn_shs; /* ECN successful handshakes */
+ uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */
/* TCP_SIGNATURE related stats */
- u_long tcps_sig_rcvgoodsig; /* Total matching signature received */
- u_long tcps_sig_rcvbadsig; /* Total bad signature received */
- u_long tcps_sig_err_buildsig; /* Mismatching signature received */
- u_long tcps_sig_err_sigopt; /* No signature expected by socket */
- u_long tcps_sig_err_nosigopt; /* No signature provided by segment */
+ uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */
+ uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */
+ uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */
+ uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */
+ uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */
- u_long _pad[12]; /* 6 UTO, 6 TBD */
+ uint64_t _pad[12]; /* 6 UTO, 6 TBD */
};
+#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
+
#ifdef _KERNEL
+#define TI_UNLOCKED 1
+#define TI_RLOCKED 2
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define TCPSTAT_ADD(name, val) V_tcpstat.name += (val)
+#define TCPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val))
#define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1)
/*
@@ -530,7 +636,15 @@ struct tcpstat {
*/
void kmod_tcpstat_inc(int statnum);
#define KMOD_TCPSTAT_INC(name) \
- kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(u_long))
+ kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t))
+
+/*
+ * Running TCP connection count by state.
+ */
+VNET_DECLARE(counter_u64_t, tcps_states[TCP_NSTATES]);
+#define V_tcps_states VNET(tcps_states)
+#define TCPSTATES_INC(state) counter_u64_add(V_tcps_states[state], 1)
+#define TCPSTATES_DEC(state) counter_u64_add(V_tcps_states[state], -1)
/*
* TCP specific helper hook point identifiers.
@@ -574,11 +688,11 @@ struct xtcpcb {
#endif
/*
- * Names for TCP sysctl objects
+ * Identifiers for TCP sysctl nodes
*/
#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */
#define TCPCTL_MSSDFLT 3 /* MSS default */
-#define TCPCTL_STATS 4 /* statistics (read-only) */
+#define TCPCTL_STATS 4 /* statistics */
#define TCPCTL_RTTDFLT 5 /* default RTT estimate */
#define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */
#define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */
@@ -590,26 +704,7 @@ struct xtcpcb {
#define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */
#define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */
#define TCPCTL_DROP 15 /* drop tcp connection */
-#define TCPCTL_MAXID 16
-#define TCPCTL_FINWAIT2_TIMEOUT 17
-
-#define TCPCTL_NAMES { \
- { 0, 0 }, \
- { "rfc1323", CTLTYPE_INT }, \
- { "mssdflt", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "rttdflt", CTLTYPE_INT }, \
- { "keepidle", CTLTYPE_INT }, \
- { "keepintvl", CTLTYPE_INT }, \
- { "sendspace", CTLTYPE_INT }, \
- { "recvspace", CTLTYPE_INT }, \
- { "keepinit", CTLTYPE_INT }, \
- { "pcblist", CTLTYPE_STRUCT }, \
- { "delacktime", CTLTYPE_INT }, \
- { "v6mssdflt", CTLTYPE_INT }, \
- { "maxid", CTLTYPE_INT }, \
-}
-
+#define TCPCTL_STATES 16 /* connection counts by TCP state */
#ifdef _KERNEL
#ifdef SYSCTL_DECL
@@ -620,13 +715,12 @@ MALLOC_DECLARE(M_TCPLOG);
VNET_DECLARE(struct inpcbhead, tcb); /* queue of active tcpcb's */
VNET_DECLARE(struct inpcbinfo, tcbinfo);
-VNET_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
extern int tcp_log_in_vain;
VNET_DECLARE(int, tcp_mssdflt); /* XXX */
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_delack_enabled);
VNET_DECLARE(int, tcp_do_rfc3390);
-VNET_DECLARE(int, tcp_do_initcwnd10);
+VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_sendspace);
VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, path_mtu_discovery);
@@ -634,12 +728,11 @@ VNET_DECLARE(int, tcp_do_rfc3465);
VNET_DECLARE(int, tcp_abc_l_var);
#define V_tcb VNET(tcb)
#define V_tcbinfo VNET(tcbinfo)
-#define V_tcpstat VNET(tcpstat)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_delack_enabled VNET(tcp_delack_enabled)
#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390)
-#define V_tcp_do_initcwnd10 VNET(tcp_do_initcwnd10)
+#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_sendspace VNET(tcp_sendspace)
#define V_tcp_recvspace VNET(tcp_recvspace)
#define V_path_mtu_discovery VNET(path_mtu_discovery)
@@ -659,50 +752,69 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
#define V_tcp_hhh VNET(tcp_hhh)
+VNET_DECLARE(int, tcp_do_rfc6675_pipe);
+#define V_tcp_do_rfc6675_pipe VNET(tcp_do_rfc6675_pipe)
+
int tcp_addoptions(struct tcpopt *, u_char *);
int tcp_ccalgounload(struct cc_algo *unload_algo);
struct tcpcb *
tcp_close(struct tcpcb *);
void tcp_discardcb(struct tcpcb *);
void tcp_twstart(struct tcpcb *);
-#if 0
-int tcp_twrecycleable(struct tcptw *tw);
-#endif
-void tcp_twclose(struct tcptw *_tw, int _reuse);
+void tcp_twclose(struct tcptw *, int);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
struct tcpcb *
tcp_drop(struct tcpcb *, int);
void tcp_drain(void);
void tcp_init(void);
-#ifdef VIMAGE
-void tcp_destroy(void);
-#endif
void tcp_fini(void *);
char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
const void *);
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
-void tcp_reass_init(void);
+void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
-#ifdef VIMAGE
-void tcp_reass_destroy(void);
-#endif
-void tcp_input(struct mbuf *, int);
+void tcp_dooptions(struct tcpopt *, u_char *, int, int);
+void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
+ struct tcpcb *, int, int);
+void tcp_pulloutofband(struct socket *,
+ struct tcphdr *, struct mbuf *, int);
+void tcp_xmit_timer(struct tcpcb *, int);
+void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
+void cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
+ uint16_t type);
+void cc_conn_init(struct tcpcb *tp);
+void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
+void hhook_run_tcp_est_in(struct tcpcb *tp,
+ struct tcphdr *th, struct tcpopt *to);
+
+int tcp_input(struct mbuf **, int *, int);
+void tcp_do_segment(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *, int, int, uint8_t,
+ int);
+
+int register_tcp_functions(struct tcp_function_block *blk, int wait);
+int deregister_tcp_functions(struct tcp_function_block *blk);
+struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
+struct tcp_function_block *find_and_ref_tcp_fb(struct tcp_function_block *blk);
+int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp);
+
u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
+u_int tcp_maxseg(const struct tcpcb *);
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
struct tcp_ifcap *);
void tcp_mss(struct tcpcb *, int);
int tcp_mssopt(struct in_conninfo *);
struct inpcb *
tcp_drop_syn_sent(struct inpcb *, int);
-struct inpcb *
- tcp_mtudisc(struct inpcb *, int);
struct tcpcb *
tcp_newtcpcb(struct inpcb *);
int tcp_output(struct tcpcb *);
+void tcp_state_change(struct tcpcb *, int);
void tcp_respond(struct tcpcb *, void *,
struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
void tcp_tw_init(void);
@@ -712,19 +824,25 @@ void tcp_tw_destroy(void);
void tcp_tw_zone_change(void);
int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
struct mbuf *, int);
-int tcp_twrespond(struct tcptw *, int);
void tcp_setpersist(struct tcpcb *);
#ifdef TCP_SIGNATURE
+struct secasvar;
+struct secasvar *tcp_get_sav(struct mbuf *, u_int);
+int tcp_signature_do_compute(struct mbuf *, int, int, u_char *,
+ struct secasvar *);
int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int);
int tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *,
struct tcphdr *, u_int);
+int tcp_signature_check(struct mbuf *m, int off0, int tlen, int optlen,
+ struct tcpopt *to, struct tcphdr *th, u_int tcpbflag);
#endif
void tcp_slowtimo(void);
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
-void tcp_timer_activate(struct tcpcb *, int, u_int);
-int tcp_timer_active(struct tcpcb *, int);
+void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+int tcp_timer_active(struct tcpcb *, uint32_t);
+void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
@@ -741,7 +859,7 @@ void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
extern struct pr_usrreqs tcp_usrreqs;
tcp_seq tcp_new_isn(struct tcpcb *);
-void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
@@ -750,9 +868,29 @@ void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
u_long tcp_seq_subtract(u_long, u_long );
+int tcp_compute_pipe(struct tcpcb *);
-void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
+static inline void
+tcp_fields_to_host(struct tcphdr *th)
+{
+ th->th_seq = ntohl(th->th_seq);
+ th->th_ack = ntohl(th->th_ack);
+ th->th_win = ntohs(th->th_win);
+ th->th_urp = ntohs(th->th_urp);
+}
+
+#ifdef TCP_SIGNATURE
+static inline void
+tcp_fields_to_net(struct tcphdr *th)
+{
+
+ th->th_seq = htonl(th->th_seq);
+ th->th_ack = htonl(th->th_ack);
+ th->th_win = htons(th->th_win);
+ th->th_urp = htons(th->th_urp);
+}
+#endif
#endif /* _KERNEL */
#endif /* _NETINET_TCP_VAR_H_ */
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
index bf95e954..7eb11648 100644
--- a/freebsd/sys/netinet/udp_usrreq.c
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -5,6 +5,7 @@
* The Regents of the University of California.
* Copyright (c) 2008 Robert N. M. Watson
* Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2014 Kevin Lo
* All rights reserved.
*
* Portions of this software were developed by Robert N. M. Watson under
@@ -44,10 +45,10 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/domain.h>
@@ -60,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -71,9 +73,12 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/rss_config.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -90,6 +95,8 @@ __FBSDID("$FreeBSD$");
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <netinet/udplite.h>
+#include <netinet/in_rss.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -101,8 +108,9 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
/*
- * UDP protocol implementation.
+ * UDP and UDP-Lite protocols implementation.
* Per RFC 768, August, 1980.
+ * Per RFC 3828, July, 2004.
*/
/*
@@ -112,7 +120,7 @@ __FBSDID("$FreeBSD$");
* cause problems (especially for NFS data blocks).
*/
VNET_DEFINE(int, udp_cksum) = 1;
-SYSCTL_VNET_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(udp_cksum), 0, "compute udp checksum");
int udp_log_in_vain = 0;
@@ -120,12 +128,17 @@ SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
&udp_log_in_vain, 0, "Log all incoming UDP packets");
VNET_DEFINE(int, udp_blackhole) = 0;
-SYSCTL_VNET_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(udp_blackhole), 0,
"Do not send port unreachables for refused connects");
+static VNET_DEFINE(int, udp_require_l2_bcast) = 0;
+#define V_udp_require_l2_bcast VNET(udp_require_l2_bcast)
+SYSCTL_INT(_net_inet_udp, OID_AUTO, require_l2_bcast, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(udp_require_l2_bcast), 0,
+ "Only treat packets sent to an L2 broadcast address as broadcast packets");
+
u_long udp_sendspace = 9216; /* really max datagram size */
- /* 40 1K datagrams */
SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
&udp_sendspace, 0, "Maximum outgoing UDP datagram size");
@@ -135,13 +148,15 @@ u_long udp_recvspace = 40 * (1024 +
#else
sizeof(struct sockaddr_in)
#endif
- );
+ ); /* 40 1K datagrams */
SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
&udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
VNET_DEFINE(struct inpcbinfo, udbinfo);
+VNET_DEFINE(struct inpcbhead, ulitecb);
+VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
static VNET_DEFINE(uma_zone_t, udpcb_zone);
#define V_udpcb_zone VNET(udpcb_zone)
@@ -149,11 +164,14 @@ static VNET_DEFINE(uma_zone_t, udpcb_zone);
#define UDBHASHSIZE 128
#endif
-VNET_DEFINE(struct udpstat, udpstat); /* from udp_var.h */
-SYSCTL_VNET_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(udpstat), udpstat,
- "UDP statistics (struct udpstat, netinet/udp_var.h)");
+VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat); /* from udp_var.h */
+VNET_PCPUSTAT_SYSINIT(udpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
+ udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(udpstat);
+#endif /* VIMAGE */
#ifdef INET
static void udp_detach(struct socket *so);
static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
@@ -187,20 +205,47 @@ udp_inpcb_init(void *mem, int size, int flags)
return (0);
}
+static int
+udplite_inpcb_init(void *mem, int size, int flags)
+{
+ struct inpcb *inp;
+
+ inp = mem;
+ INP_LOCK_INIT(inp, "inp", "udpliteinp");
+ return (0);
+}
+
void
udp_init(void)
{
+ /*
+ * For now default to 2-tuple UDP hashing - until the fragment
+ * reassembly code can also update the flowid.
+ *
+ * Once we can calculate the flowid that way and re-establish
+ * a 4-tuple, flip this to 4-tuple.
+ */
in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
- "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ "udp_inpcb", udp_inpcb_init, NULL, 0,
IPI_HASHFIELDS_2TUPLE);
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
uma_zone_set_max(V_udpcb_zone, maxsockets);
+ uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
+void
+udplite_init(void)
+{
+
+ in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
+ UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
+ 0, IPI_HASHFIELDS_2TUPLE);
+}
+
/*
* Kernel module interface for updating udpstat. The argument is an index
* into udpstat treated as an array of u_long. While this encodes the
@@ -212,7 +257,7 @@ void
kmod_udpstat_inc(int statnum)
{
- (*((u_long *)&V_udpstat + statnum))++;
+ counter_u64_add(VNET(udpstat)[statnum], 1);
}
int
@@ -235,13 +280,23 @@ udp_discardcb(struct udpcb *up)
}
#ifdef VIMAGE
-void
-udp_destroy(void)
+static void
+udp_destroy(void *unused __unused)
{
in_pcbinfo_destroy(&V_udbinfo);
uma_zdestroy(V_udpcb_zone);
}
+VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
+
+static void
+udplite_destroy(void *unused __unused)
+{
+
+ in_pcbinfo_destroy(&V_ulitecbinfo);
+}
+VNET_SYSUNINIT(udplite, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udplite_destroy,
+ NULL);
#endif
#ifdef INET
@@ -251,14 +306,23 @@ udp_destroy(void)
* contains the source address. If the socket ends up being an IPv6 socket,
* udp_append() will convert to a sockaddr_in6 before passing the address
* into the socket code.
+ *
+ * In the normal case udp_append() will return 0, indicating that you
+ * must unlock the inp. However if a tunneling protocol is in place we increment
+ * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
+ * then decrement the reference count. If the inp_rele returns 1, indicating the
+ * inp is gone, we return that to the caller to tell them *not* to unlock
+ * the inp. In the case of multi-cast this will cause the distribution
+ * to stop (though most tunneling protocols known currently do *not* use
+ * multicast).
*/
-static void
+static int
udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
struct sockaddr_in *udp_in)
{
struct sockaddr *append_sa;
struct socket *so;
- struct mbuf *opts = 0;
+ struct mbuf *opts = NULL;
#ifdef INET6
struct sockaddr_in6 udp_in6;
#endif
@@ -271,21 +335,21 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
*/
up = intoudpcb(inp);
if (up->u_tun_func != NULL) {
- (*up->u_tun_func)(n, off, inp);
- return;
+ in_pcbref(inp);
+ INP_RUNLOCK(inp);
+ (*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in,
+ up->u_tun_ctx);
+ INP_RLOCK(inp);
+ return (in_pcbrele_rlocked(inp));
}
- if (n == NULL)
- return;
-
off += sizeof(struct udphdr);
#ifdef IPSEC
/* Check AH/ESP integrity. */
if (ipsec4_in_reject(n, inp)) {
m_freem(n);
- IPSECSTAT_INC(in_polvio);
- return;
+ return (0);
}
#ifdef IPSEC_NAT_T
up = intoudpcb(inp);
@@ -293,14 +357,14 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */
n = udp4_espdecap(inp, n, off);
if (n == NULL) /* Consumed. */
- return;
+ return (0);
}
#endif /* IPSEC_NAT_T */
#endif /* IPSEC */
#ifdef MAC
if (mac_inpcb_check_deliver(inp, n) != 0) {
m_freem(n);
- return;
+ return (0);
}
#endif /* MAC */
if (inp->inp_flags & INP_CONTROLOPTS ||
@@ -334,22 +398,28 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
UDPSTAT_INC(udps_fullsock);
} else
sorwakeup_locked(so);
+ return (0);
}
-void
-udp_input(struct mbuf *m, int off)
+int
+udp_input(struct mbuf **mp, int *offp, int proto)
{
- int iphlen = off;
struct ip *ip;
struct udphdr *uh;
struct ifnet *ifp;
struct inpcb *inp;
- int len;
+ uint16_t len, ip_len;
+ struct inpcbinfo *pcbinfo;
struct ip save_ip;
struct sockaddr_in udp_in;
+ struct mbuf *m;
struct m_tag *fwd_tag;
+ int cscov_partial, iphlen;
+ m = *mp;
+ iphlen = *offp;
ifp = m->m_pkthdr.rcvif;
+ *mp = NULL;
UDPSTAT_INC(udps_ipackets);
/*
@@ -358,7 +428,7 @@ udp_input(struct mbuf *m, int off)
* check the checksum with options still present.
*/
if (iphlen > sizeof (struct ip)) {
- ip_stripoptions(m, (struct mbuf *)0);
+ ip_stripoptions(m);
iphlen = sizeof(struct ip);
}
@@ -367,13 +437,14 @@ udp_input(struct mbuf *m, int off)
*/
ip = mtod(m, struct ip *);
if (m->m_len < iphlen + sizeof(struct udphdr)) {
- if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+ if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
UDPSTAT_INC(udps_hdrops);
- return;
+ return (IPPROTO_DONE);
}
ip = mtod(m, struct ip *);
}
uh = (struct udphdr *)((caddr_t)ip + iphlen);
+ cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
/*
* Destination port of 0 is illegal, based on RFC768.
@@ -396,13 +467,20 @@ udp_input(struct mbuf *m, int off)
* reflect UDP length, drop.
*/
len = ntohs((u_short)uh->uh_ulen);
- if (ip->ip_len != len) {
- if (len > ip->ip_len || len < sizeof(struct udphdr)) {
+ ip_len = ntohs(ip->ip_len) - iphlen;
+ if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
+ /* Zero means checksum over the complete packet. */
+ if (len == 0)
+ len = ip_len;
+ cscov_partial = 0;
+ }
+ if (ip_len != len) {
+ if (len > ip_len || len < sizeof(struct udphdr)) {
UDPSTAT_INC(udps_badlen);
goto badunlocked;
}
- m_adj(m, len - ip->ip_len);
- /* ip->ip_len = len; */
+ if (proto == IPPROTO_UDP)
+ m_adj(m, len - ip_len);
}
/*
@@ -420,39 +498,53 @@ udp_input(struct mbuf *m, int off)
if (uh->uh_sum) {
u_short uh_sum;
- if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+ if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
+ !cscov_partial) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
uh_sum = m->m_pkthdr.csum_data;
else
uh_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htonl((u_short)len +
- m->m_pkthdr.csum_data + IPPROTO_UDP));
+ m->m_pkthdr.csum_data + proto));
uh_sum ^= 0xffff;
} else {
char b[9];
bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
bzero(((struct ipovly *)ip)->ih_x1, 9);
- ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+ ((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ?
+ uh->uh_ulen : htons(ip_len);
uh_sum = in_cksum(m, len + sizeof (struct ip));
bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
}
if (uh_sum) {
UDPSTAT_INC(udps_badsum);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
}
- } else
- UDPSTAT_INC(udps_nosum);
+ } else {
+ if (proto == IPPROTO_UDP) {
+ UDPSTAT_INC(udps_nosum);
+ } else {
+ /* UDPLite requires a checksum */
+ /* XXX: What is the right UDPLite MIB counter here? */
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+ }
+ pcbinfo = udp_get_inpcbinfo(proto);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
- in_broadcast(ip->ip_dst, ifp)) {
+ ((!V_udp_require_l2_bcast || m->m_flags & M_BCAST) &&
+ in_broadcast(ip->ip_dst, ifp))) {
struct inpcb *last;
+ struct inpcbhead *pcblist;
struct ip_moptions *imo;
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK(pcbinfo);
+ pcblist = udp_get_pcblist(proto);
last = NULL;
- LIST_FOREACH(inp, &V_udb, inp_list) {
+ LIST_FOREACH(inp, pcblist, inp_list) {
if (inp->inp_lport != uh->uh_dport)
continue;
#ifdef INET6
@@ -511,8 +603,14 @@ udp_input(struct mbuf *m, int off)
if (last != NULL) {
struct mbuf *n;
- n = m_copy(m, 0, M_COPYALL);
- udp_append(last, ip, n, iphlen, &udp_in);
+ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+ UDP_PROBE(receive, NULL, last, ip,
+ last, uh);
+ if (udp_append(last, ip, n, iphlen,
+ &udp_in)) {
+ goto inp_lost;
+ }
+ }
INP_RUNLOCK(last);
}
last = inp;
@@ -538,13 +636,15 @@ udp_input(struct mbuf *m, int off)
UDPSTAT_INC(udps_noportbcast);
if (inp)
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK(pcbinfo);
goto badunlocked;
}
- udp_append(last, ip, m, iphlen, &udp_in);
- INP_RUNLOCK(last);
- INP_INFO_RUNLOCK(&V_udbinfo);
- return;
+ UDP_PROBE(receive, NULL, last, ip, last, uh);
+ if (udp_append(last, ip, m, iphlen, &udp_in) == 0)
+ INP_RUNLOCK(last);
+ inp_lost:
+ INP_INFO_RUNLOCK(pcbinfo);
+ return (IPPROTO_DONE);
}
/*
@@ -564,7 +664,7 @@ udp_input(struct mbuf *m, int off)
* Transparently forwarded. Pretend to be the destination.
* Already got one like this?
*/
- inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
if (!inp) {
/*
@@ -572,7 +672,7 @@ udp_input(struct mbuf *m, int off)
* Because we've rewritten the destination address,
* any hardware-generated hash is ignored.
*/
- inp = in_pcblookup(&V_udbinfo, ip->ip_src,
+ inp = in_pcblookup(pcbinfo, ip->ip_src,
uh->uh_sport, next_hop->sin_addr,
next_hop->sin_port ? htons(next_hop->sin_port) :
uh->uh_dport, INPLOOKUP_WILDCARD |
@@ -582,7 +682,7 @@ udp_input(struct mbuf *m, int off)
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP_NEXTHOP;
} else
- inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
INPLOOKUP_RLOCKPCB, ifp, m);
if (inp == NULL) {
@@ -605,9 +705,8 @@ udp_input(struct mbuf *m, int off)
if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
goto badunlocked;
*ip = save_ip;
- ip->ip_len += iphlen;
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
- return;
+ return (IPPROTO_DONE);
}
/*
@@ -617,14 +716,27 @@ udp_input(struct mbuf *m, int off)
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
INP_RUNLOCK(inp);
m_freem(m);
- return;
+ return (IPPROTO_DONE);
+ }
+ if (cscov_partial) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
}
- udp_append(inp, ip, m, iphlen, &udp_in);
- INP_RUNLOCK(inp);
- return;
+
+ UDP_PROBE(receive, NULL, inp, ip, inp, uh);
+ if (udp_append(inp, ip, m, iphlen, &udp_in) == 0)
+ INP_RUNLOCK(inp);
+ return (IPPROTO_DONE);
badunlocked:
m_freem(m);
+ return (IPPROTO_DONE);
}
#endif /* INET */
@@ -643,6 +755,11 @@ udp_notify(struct inpcb *inp, int errno)
* or a write lock, but a read lock is sufficient.
*/
INP_LOCK_ASSERT(inp);
+ if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
+ errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
+ RTFREE(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = (struct rtentry *)NULL;
+ }
inp->inp_socket->so_error = errno;
sorwakeup(inp->inp_socket);
@@ -651,8 +768,9 @@ udp_notify(struct inpcb *inp, int errno)
}
#ifdef INET
-void
-udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+static void
+udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
+ struct inpcbinfo *pcbinfo)
{
struct ip *ip = vip;
struct udphdr *uh;
@@ -663,11 +781,11 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
return;
- /*
- * Redirects don't need to be handled up here.
- */
- if (PRC_IS_REDIRECT(cmd))
+ if (PRC_IS_REDIRECT(cmd)) {
+ /* signal EHOSTDOWN, as it flushes the cached route */
+ in_pcbnotifyall(&V_udbinfo, faddr, EHOSTDOWN, udp_notify);
return;
+ }
/*
* Hostdead is ugly because it goes linearly through all PCBs.
@@ -681,7 +799,7 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
return;
if (ip != NULL) {
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport,
+ inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
INP_RLOCK_ASSERT(inp);
@@ -689,11 +807,39 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
udp_notify(inp, inetctlerrmap[cmd]);
}
INP_RUNLOCK(inp);
+ } else {
+ inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
+ ip->ip_src, uh->uh_sport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
+ if (inp != NULL) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ if (up->u_icmp_func != NULL) {
+ INP_RUNLOCK(inp);
+ (*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx);
+ } else {
+ INP_RUNLOCK(inp);
+ }
+ }
}
} else
- in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd],
+ in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
udp_notify);
}
+void
+udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+
+ return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
+}
+
+void
+udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+
+ return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
+}
#endif /* INET */
static int
@@ -740,7 +886,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == 0)
+ if (inp_list == NULL)
return (ENOMEM);
INP_INFO_RLOCK(&V_udbinfo);
@@ -849,16 +995,16 @@ SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
int
udp_ctloutput(struct socket *so, struct sockopt *sopt)
{
- int error = 0, optval;
struct inpcb *inp;
-#ifdef IPSEC_NAT_T
struct udpcb *up;
-#endif
+ int isudplite, error, optval;
+ error = 0;
+ isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
INP_WLOCK(inp);
- if (sopt->sopt_level != IPPROTO_UDP) {
+ if (sopt->sopt_level != so->so_proto->pr_protocol) {
#ifdef INET6
if (INP_CHECK_SOCKAF(so, AF_INET6)) {
INP_WUNLOCK(inp);
@@ -916,6 +1062,34 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
}
INP_WUNLOCK(inp);
break;
+ case UDPLITE_SEND_CSCOV:
+ case UDPLITE_RECV_CSCOV:
+ if (!isudplite) {
+ INP_WUNLOCK(inp);
+ error = ENOPROTOOPT;
+ break;
+ }
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &optval, sizeof(optval),
+ sizeof(optval));
+ if (error != 0)
+ break;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+ INP_WLOCK(inp);
+ up = intoudpcb(inp);
+ KASSERT(up != NULL, ("%s: up == NULL", __func__));
+ if ((optval != 0 && optval < 8) || (optval > 65535)) {
+ INP_WUNLOCK(inp);
+ error = EINVAL;
+ break;
+ }
+ if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
+ up->u_txcslen = optval;
+ else
+ up->u_rxcslen = optval;
+ INP_WUNLOCK(inp);
+ break;
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -933,6 +1107,22 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
#endif
+ case UDPLITE_SEND_CSCOV:
+ case UDPLITE_RECV_CSCOV:
+ if (!isudplite) {
+ INP_WUNLOCK(inp);
+ error = ENOPROTOOPT;
+ break;
+ }
+ up = intoudpcb(inp);
+ KASSERT(up != NULL, ("%s: up == NULL", __func__));
+ if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
+ optval = up->u_txcslen;
+ else
+ optval = up->u_rxcslen;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof(optval));
+ break;
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -955,12 +1145,18 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
int len = m->m_pkthdr.len;
struct in_addr faddr, laddr;
struct cmsghdr *cm;
+ struct inpcbinfo *pcbinfo;
struct sockaddr_in *sin, src;
+ int cscov_partial = 0;
int error = 0;
int ipflags;
u_short fport, lport;
- int unlock_udbinfo;
+ int unlock_udbinfo, unlock_inp;
u_char tos;
+ uint8_t pr;
+ uint16_t cscov = 0;
+ uint32_t flowid = 0;
+ uint8_t flowtype = M_HASHTYPE_NONE;
/*
* udp_output() may need to temporarily bind or connect the current
@@ -976,7 +1172,15 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
}
src.sin_family = 0;
- INP_RLOCK(inp);
+ sin = (struct sockaddr_in *)addr;
+ if (sin == NULL ||
+ (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
+ INP_WLOCK(inp);
+ unlock_inp = UH_WLOCKED;
+ } else {
+ INP_RLOCK(inp);
+ unlock_inp = UH_RLOCKED;
+ }
tos = inp->inp_ip_tos;
if (control != NULL) {
/*
@@ -984,7 +1188,10 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* stored in a single mbuf.
*/
if (control->m_next) {
- INP_RUNLOCK(inp);
+ if (unlock_inp == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
m_freem(control);
m_freem(m);
return (EINVAL);
@@ -1024,6 +1231,31 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
tos = *(u_char *)CMSG_DATA(cm);
break;
+ case IP_FLOWID:
+ if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+ error = EINVAL;
+ break;
+ }
+ flowid = *(uint32_t *) CMSG_DATA(cm);
+ break;
+
+ case IP_FLOWTYPE:
+ if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+ error = EINVAL;
+ break;
+ }
+ flowtype = *(uint32_t *) CMSG_DATA(cm);
+ break;
+
+#ifdef RSS
+ case IP_RSSBUCKETID:
+ if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+ error = EINVAL;
+ break;
+ }
+ /* This is just a placeholder for now */
+ break;
+#endif /* RSS */
default:
error = ENOPROTOOPT;
break;
@@ -1034,7 +1266,10 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
m_freem(control);
}
if (error) {
- INP_RUNLOCK(inp);
+ if (unlock_inp == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
m_freem(m);
return (error);
}
@@ -1055,12 +1290,12 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
*
* XXXRW: Check that hash locking update here is correct.
*/
+ pr = inp->inp_socket->so_proto->pr_protocol;
+ pcbinfo = udp_get_inpcbinfo(pr);
sin = (struct sockaddr_in *)addr;
if (sin != NULL &&
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
- INP_RUNLOCK(inp);
- INP_WLOCK(inp);
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
unlock_udbinfo = UH_WLOCKED;
} else if ((sin != NULL && (
(sin->sin_addr.s_addr == INADDR_ANY) ||
@@ -1068,7 +1303,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
(inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) {
- INP_HASH_RLOCK(&V_udbinfo);
+ INP_HASH_RLOCK(pcbinfo);
unlock_udbinfo = UH_RLOCKED;
} else
unlock_udbinfo = UH_UNLOCKED;
@@ -1081,7 +1316,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
laddr = inp->inp_laddr;
lport = inp->inp_lport;
if (src.sin_family == AF_INET) {
- INP_HASH_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if ((lport == 0) ||
(laddr.s_addr == INADDR_ANY &&
src.sin_addr.s_addr == INADDR_ANY)) {
@@ -1132,7 +1367,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
inp->inp_lport == 0 ||
sin->sin_addr.s_addr == INADDR_ANY ||
sin->sin_addr.s_addr == INADDR_BROADCAST) {
- INP_HASH_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
&lport, &faddr.s_addr, &fport, NULL,
td->td_ucred);
@@ -1147,7 +1382,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
if (inp->inp_laddr.s_addr == INADDR_ANY &&
inp->inp_lport == 0) {
INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(&V_udbinfo);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
/*
* Remember addr if jailed, to prevent
* rebinding.
@@ -1181,7 +1416,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* link-layer headers. Immediate slide the data pointer back forward
* since we won't use that space at this layer.
*/
- M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
+ M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
goto release;
@@ -1196,12 +1431,30 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
*/
ui = mtod(m, struct udpiphdr *);
bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
- ui->ui_pr = IPPROTO_UDP;
+ ui->ui_pr = pr;
ui->ui_src = laddr;
ui->ui_dst = faddr;
ui->ui_sport = lport;
ui->ui_dport = fport;
ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
+ if (pr == IPPROTO_UDPLITE) {
+ struct udpcb *up;
+ uint16_t plen;
+
+ up = intoudpcb(inp);
+ cscov = up->u_txcslen;
+ plen = (u_short)len + sizeof(struct udphdr);
+ if (cscov >= plen)
+ cscov = 0;
+ ui->ui_len = htons(plen);
+ ui->ui_ulen = htons(cscov);
+ /*
+ * For UDP-Lite, checksum coverage length of zero means
+ * the entire UDPLite packet is covered by the checksum.
+ */
+ cscov_partial = (cscov == 0) ? 0 : 1;
+ } else
+ ui->ui_v = IPVERSION << 4;
/*
* Set the Don't Fragment bit in the IP header.
@@ -1210,7 +1463,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct ip *ip;
ip = (struct ip *)&ui->ui_i;
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
}
ipflags = 0;
@@ -1228,27 +1481,90 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
/*
* Set up checksum and output datagram.
*/
- if (V_udp_cksum) {
+ ui->ui_sum = 0;
+ if (pr == IPPROTO_UDPLITE) {
+ if (inp->inp_flags & INP_ONESBCAST)
+ faddr.s_addr = INADDR_BROADCAST;
+ if (cscov_partial) {
+ if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
+ ui->ui_sum = 0xffff;
+ } else {
+ if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
+ ui->ui_sum = 0xffff;
+ }
+ } else if (V_udp_cksum) {
if (inp->inp_flags & INP_ONESBCAST)
faddr.s_addr = INADDR_BROADCAST;
ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
- htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
+ htons((u_short)len + sizeof(struct udphdr) + pr));
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
- } else
- ui->ui_sum = 0;
- ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+ }
+ ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
((struct ip *)ui)->ip_tos = tos; /* XXX */
UDPSTAT_INC(udps_opackets);
+ /*
+ * Setup flowid / RSS information for outbound socket.
+ *
+ * Once the UDP code decides to set a flowid some other way,
+ * this allows the flowid to be overridden by userland.
+ */
+ if (flowtype != M_HASHTYPE_NONE) {
+ m->m_pkthdr.flowid = flowid;
+ M_HASHTYPE_SET(m, flowtype);
+#ifdef RSS
+ } else {
+ uint32_t hash_val, hash_type;
+ /*
+ * Calculate an appropriate RSS hash for UDP and
+ * UDP Lite.
+ *
+ * The called function will take care of figuring out
+ * whether a 2-tuple or 4-tuple hash is required based
+ * on the currently configured scheme.
+ *
+ * Later later on connected socket values should be
+ * cached in the inpcb and reused, rather than constantly
+ * re-calculating it.
+ *
+ * UDP Lite is a different protocol number and will
+ * likely end up being hashed as a 2-tuple until
+ * RSS / NICs grow UDP Lite protocol awareness.
+ */
+ if (rss_proto_software_hash_v4(faddr, laddr, fport, lport,
+ pr, &hash_val, &hash_type) == 0) {
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
+ }
+#endif
+ }
+
+#ifdef RSS
+ /*
+ * Don't override with the inp cached flowid value.
+ *
+ * Depending upon the kind of send being done, the inp
+ * flowid/flowtype values may actually not be appropriate
+ * for this particular socket send.
+ *
+ * We should either leave the flowid at zero (which is what is
+ * currently done) or set it to some software generated
+ * hash value based on the packet contents.
+ */
+ ipflags |= IP_NODEFAULTFLOWID;
+#endif /* RSS */
+
if (unlock_udbinfo == UH_WLOCKED)
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK(&V_udbinfo);
- error = ip_output(m, inp->inp_options, NULL, ipflags,
+ INP_HASH_RUNLOCK(pcbinfo);
+ UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
+ error = ip_output(m, inp->inp_options,
+ (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
inp->inp_moptions, inp);
- if (unlock_udbinfo == UH_WLOCKED)
+ if (unlock_inp == UH_WLOCKED)
INP_WUNLOCK(inp);
else
INP_RUNLOCK(inp);
@@ -1256,10 +1572,10 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
release:
if (unlock_udbinfo == UH_WLOCKED) {
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
} else if (unlock_udbinfo == UH_RLOCKED) {
- INP_HASH_RUNLOCK(&V_udbinfo);
+ INP_HASH_RUNLOCK(pcbinfo);
INP_RUNLOCK(inp);
} else
INP_RUNLOCK(inp);
@@ -1297,7 +1613,7 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
if (minlen > m->m_pkthdr.len)
minlen = m->m_pkthdr.len;
if ((m = m_pullup(m, minlen)) == NULL) {
- IPSECSTAT_INC(in_inval);
+ IPSECSTAT_INC(ips_in_inval);
return (NULL); /* Bypass caller processing. */
}
data = mtod(m, caddr_t); /* Points to ip header. */
@@ -1337,7 +1653,7 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
uint32_t spi;
if (payload <= sizeof(struct esp)) {
- IPSECSTAT_INC(in_inval);
+ IPSECSTAT_INC(ips_in_inval);
m_freem(m);
return (NULL); /* Discard. */
}
@@ -1358,7 +1674,7 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
2 * sizeof(uint16_t), M_NOWAIT);
if (tag == NULL) {
- IPSECSTAT_INC(in_nomem);
+ IPSECSTAT_INC(ips_in_nomem);
m_freem(m);
return (NULL); /* Discard. */
}
@@ -1387,7 +1703,7 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
m_adj(m, skip);
ip = mtod(m, struct ip *);
- ip->ip_len -= skip;
+ ip->ip_len = htons(ntohs(ip->ip_len) - skip);
ip->ip_p = IPPROTO_ESP;
/*
@@ -1397,7 +1713,8 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
- (void) ipsec4_common_input(m, iphlen, ip->ip_p);
+ (void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p),
+ AF_INET, ip->ip_p);
return (NULL); /* NB: consumed, bypass processing. */
}
#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
@@ -1406,15 +1723,17 @@ static void
udp_abort(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -1424,17 +1743,19 @@ static int
udp_attach(struct socket *so, int proto, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
error = soreserve(so, udp_sendspace, udp_recvspace);
if (error)
return (error);
- INP_INFO_WLOCK(&V_udbinfo);
- error = in_pcballoc(so, &V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
+ error = in_pcballoc(so, pcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
@@ -1446,18 +1767,18 @@ udp_attach(struct socket *so, int proto, struct thread *td)
if (error) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (0);
}
#endif /* INET */
int
-udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
+udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)
{
struct inpcb *inp;
struct udpcb *up;
@@ -1468,11 +1789,14 @@ udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
INP_WLOCK(inp);
up = intoudpcb(inp);
- if (up->u_tun_func != NULL) {
+ if ((up->u_tun_func != NULL) ||
+ (up->u_icmp_func != NULL)) {
INP_WUNLOCK(inp);
return (EBUSY);
}
up->u_tun_func = f;
+ up->u_icmp_func = i;
+ up->u_tun_ctx = ctx;
INP_WUNLOCK(inp);
return (0);
}
@@ -1482,14 +1806,16 @@ static int
udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
INP_WLOCK(inp);
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
}
@@ -1498,15 +1824,17 @@ static void
udp_close(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_close: inp == NULL"));
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -1516,9 +1844,11 @@ static int
udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
- int error;
+ struct inpcbinfo *pcbinfo;
struct sockaddr_in *sin;
+ int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -1532,9 +1862,9 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
INP_WUNLOCK(inp);
return (error);
}
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in_pcbconnect(inp, nam, td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
INP_WUNLOCK(inp);
@@ -1545,20 +1875,22 @@ static void
udp_detach(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
("udp_detach: not disconnected"));
- INP_INFO_WLOCK(&V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
INP_WLOCK(inp);
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: up == NULL", __func__));
inp->inp_ppcb = NULL;
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
udp_discardcb(up);
}
@@ -1566,7 +1898,9 @@ static int
udp_disconnect(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
INP_WLOCK(inp);
@@ -1574,10 +1908,10 @@ udp_disconnect(struct socket *so)
INP_WUNLOCK(inp);
return (ENOTCONN);
}
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
diff --git a/freebsd/sys/netinet/udp_var.h b/freebsd/sys/netinet/udp_var.h
index 6b9b5362..172d969d 100644
--- a/freebsd/sys/netinet/udp_var.h
+++ b/freebsd/sys/netinet/udp_var.h
@@ -42,6 +42,7 @@ struct udpiphdr {
struct udphdr ui_u; /* udp header */
};
#define ui_x1 ui_i.ih_x1
+#define ui_v ui_i.ih_x1[0]
#define ui_pr ui_i.ih_pr
#define ui_len ui_i.ih_len
#define ui_src ui_i.ih_src
@@ -51,14 +52,23 @@ struct udpiphdr {
#define ui_ulen ui_u.uh_ulen
#define ui_sum ui_u.uh_sum
-typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *);
+struct inpcb;
+struct mbuf;
+typedef void(*udp_tun_func_t)(struct mbuf *, int, struct inpcb *,
+ const struct sockaddr *, void *);
+typedef void(*udp_tun_icmp_t)(int, struct sockaddr *, void *, void *);
+
/*
* UDP control block; one per udp.
*/
struct udpcb {
udp_tun_func_t u_tun_func; /* UDP kernel tunneling callback. */
+ udp_tun_icmp_t u_icmp_func; /* UDP kernel tunneling icmp callback */
u_int u_flags; /* Generic UDP flags. */
+ uint16_t u_rxcslen; /* Coverage for incoming datagrams. */
+ uint16_t u_txcslen; /* Coverage for outgoing datagrams. */
+ void *u_tun_ctx; /* Tunneling callback context. */
};
#define intoudpcb(ip) ((struct udpcb *)(ip)->inp_ppcb)
@@ -72,96 +82,107 @@ struct udpcb {
struct udpstat {
/* input statistics: */
- u_long udps_ipackets; /* total input packets */
- u_long udps_hdrops; /* packet shorter than header */
- u_long udps_badsum; /* checksum error */
- u_long udps_nosum; /* no checksum */
- u_long udps_badlen; /* data length larger than packet */
- u_long udps_noport; /* no socket on port */
- u_long udps_noportbcast; /* of above, arrived as broadcast */
- u_long udps_fullsock; /* not delivered, input socket full */
- u_long udpps_pcbcachemiss; /* input packets missing pcb cache */
- u_long udpps_pcbhashmiss; /* input packets not for hashed pcb */
+ uint64_t udps_ipackets; /* total input packets */
+ uint64_t udps_hdrops; /* packet shorter than header */
+ uint64_t udps_badsum; /* checksum error */
+ uint64_t udps_nosum; /* no checksum */
+ uint64_t udps_badlen; /* data length larger than packet */
+ uint64_t udps_noport; /* no socket on port */
+ uint64_t udps_noportbcast; /* of above, arrived as broadcast */
+ uint64_t udps_fullsock; /* not delivered, input socket full */
+ uint64_t udpps_pcbcachemiss; /* input packets missing pcb cache */
+ uint64_t udpps_pcbhashmiss; /* input packets not for hashed pcb */
/* output statistics: */
- u_long udps_opackets; /* total output packets */
- u_long udps_fastout; /* output packets on fast path */
+ uint64_t udps_opackets; /* total output packets */
+ uint64_t udps_fastout; /* output packets on fast path */
/* of no socket on port, arrived as multicast */
- u_long udps_noportmcast;
- u_long udps_filtermcast; /* blocked by multicast filter */
+ uint64_t udps_noportmcast;
+ uint64_t udps_filtermcast; /* blocked by multicast filter */
};
#ifdef _KERNEL
+#include <sys/counter.h>
#ifdef __rtems__
#include <errno.h>
#undef errno
#endif /* __rtems__ */
+
+VNET_PCPUSTAT_DECLARE(struct udpstat, udpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define UDPSTAT_ADD(name, val) V_udpstat.name += (val)
+#define UDPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct udpstat, udpstat, name, (val))
#define UDPSTAT_INC(name) UDPSTAT_ADD(name, 1)
/*
* Kernel module consumers must use this accessor macro.
*/
void kmod_udpstat_inc(int statnum);
-#define KMOD_UDPSTAT_INC(name) \
- kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(u_long))
+#define KMOD_UDPSTAT_INC(name) \
+ kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(uint64_t))
#endif
/*
- * Names for UDP sysctl objects.
+ * Identifiers for UDP sysctl nodes.
*/
#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */
#define UDPCTL_STATS 2 /* statistics (read-only) */
#define UDPCTL_MAXDGRAM 3 /* max datagram size */
#define UDPCTL_RECVSPACE 4 /* default receive buffer space */
#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */
-#define UDPCTL_MAXID 6
-
-#define UDPCTL_NAMES { \
- { 0, 0 }, \
- { "checksum", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "maxdgram", CTLTYPE_INT }, \
- { "recvspace", CTLTYPE_INT }, \
- { "pcblist", CTLTYPE_STRUCT }, \
-}
#ifdef _KERNEL
+#include <netinet/in_pcb.h>
SYSCTL_DECL(_net_inet_udp);
extern struct pr_usrreqs udp_usrreqs;
VNET_DECLARE(struct inpcbhead, udb);
VNET_DECLARE(struct inpcbinfo, udbinfo);
+VNET_DECLARE(struct inpcbhead, ulitecb);
+VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
#define V_udb VNET(udb)
#define V_udbinfo VNET(udbinfo)
+#define V_ulitecb VNET(ulitecb)
+#define V_ulitecbinfo VNET(ulitecbinfo)
extern u_long udp_sendspace;
extern u_long udp_recvspace;
VNET_DECLARE(int, udp_cksum);
-VNET_DECLARE(struct udpstat, udpstat);
VNET_DECLARE(int, udp_blackhole);
#define V_udp_cksum VNET(udp_cksum)
-#define V_udpstat VNET(udpstat)
#define V_udp_blackhole VNET(udp_blackhole)
extern int udp_log_in_vain;
-int udp_newudpcb(struct inpcb *);
-void udp_discardcb(struct udpcb *);
+static __inline struct inpcbinfo *
+udp_get_inpcbinfo(int protocol)
+{
+ return (protocol == IPPROTO_UDP) ? &V_udbinfo : &V_ulitecbinfo;
+}
-void udp_ctlinput(int, struct sockaddr *, void *);
-int udp_ctloutput(struct socket *, struct sockopt *);
-void udp_init(void);
-#ifdef VIMAGE
-void udp_destroy(void);
-#endif
-void udp_input(struct mbuf *, int);
+static __inline struct inpcbhead *
+udp_get_pcblist(int protocol)
+{
+ return (protocol == IPPROTO_UDP) ? &V_udb : &V_ulitecb;
+}
+
+int udp_newudpcb(struct inpcb *);
+void udp_discardcb(struct udpcb *);
+
+void udp_ctlinput(int, struct sockaddr *, void *);
+void udplite_ctlinput(int, struct sockaddr *, void *);
+int udp_ctloutput(struct socket *, struct sockopt *);
+void udp_init(void);
+void udplite_init(void);
+int udp_input(struct mbuf **, int *, int);
+void udplite_input(struct mbuf *, int);
struct inpcb *udp_notify(struct inpcb *inp, int errno);
-int udp_shutdown(struct socket *so);
+int udp_shutdown(struct socket *so);
-int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f);
-#endif
+int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f,
+ udp_tun_icmp_t i, void *ctx);
-#endif
+#endif /* _KERNEL */
+
+#endif /* _NETINET_UDP_VAR_H_ */
diff --git a/freebsd/sys/libkern/fls.c b/freebsd/sys/netinet/udplite.h
index c6766815..0e23cd70 100644
--- a/freebsd/sys/libkern/fls.c
+++ b/freebsd/sys/netinet/udplite.h
@@ -1,8 +1,6 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
/*-
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2014, Kevin Lo
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -12,9 +10,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -27,24 +22,17 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $FreeBSD$
*/
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/libkern.h>
+#ifndef _NETINET_UDPLITE_H_
+#define _NETINET_UDPLITE_H_
-/*
- * Find Last Set bit
+/*
+ * User-settable options (used with setsockopt).
*/
-int
-fls(int mask)
-{
- int bit;
+#define UDPLITE_SEND_CSCOV 2 /* Sender checksum coverage. */
+#define UDPLITE_RECV_CSCOV 4 /* Receiver checksum coverage. */
- if (mask == 0)
- return (0);
- for (bit = 1; mask != 1; bit++)
- mask = (unsigned int)mask >> 1;
- return (bit);
-}
+#endif /* !_NETINET_UDPLITE_H_ */
diff --git a/freebsd/sys/netinet6/dest6.c b/freebsd/sys/netinet6/dest6.c
index c8c6f547..94386ddd 100644
--- a/freebsd/sys/netinet6/dest6.c
+++ b/freebsd/sys/netinet6/dest6.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c
index 511c8601..4cbd3000 100644
--- a/freebsd/sys/netinet6/frag6.c
+++ b/freebsd/sys/netinet6/frag6.c
@@ -34,11 +34,14 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_rss.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
+#include <sys/eventhandler.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <rtems/bsd/sys/errno.h>
@@ -47,6 +50,8 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
+#include <net/netisr.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -60,13 +65,6 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-/*
- * Define it to get a correct behavior on per-interface statistics.
- * You will need to perform an extra routing table lookup, per fragment,
- * to do it. This may, or may not be, a performance hit.
- */
-#define IN6_IFSTAT_STRICT
-
static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
static void frag6_deq(struct ip6asfrag *);
static void frag6_insque(struct ip6q *, struct ip6q *);
@@ -139,7 +137,7 @@ frag6_init(void)
* fragment's Fragment header.
* -> should grab it from the first fragment only
*
- * The following note also contradicts with fragment rule - noone is going to
+ * The following note also contradicts with fragment rule - no one is going to
* send different fragment with different next header field.
*
* additional note (p22):
@@ -161,14 +159,17 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
struct ip6_frag *ip6f;
struct ip6q *q6;
struct ip6asfrag *af6, *ip6af, *af6dwn;
-#ifdef IN6_IFSTAT_STRICT
struct in6_ifaddr *ia;
-#endif
int offset = *offp, nxt, i, next;
int first_frag = 0;
int fragoff, frgpartlen; /* must be larger than u_int16_t */
struct ifnet *dstifp;
u_int8_t ecn, ecn0;
+#ifdef RSS
+ struct m_tag *mtag;
+ struct ip6_direct_ctx *ip6dc;
+#endif
+
#if 0
char ip6buf[INET6_ADDRSTRLEN];
#endif
@@ -184,18 +185,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
#endif
dstifp = NULL;
-#ifdef IN6_IFSTAT_STRICT
/* find the destination interface of the packet. */
- if ((ia = ip6_getdstifaddr(m)) != NULL) {
+ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia != NULL) {
dstifp = ia->ia_ifp;
ifa_free(&ia->ia_ifa);
}
-#else
- /* we are violating the spec, this is not the destination interface */
- if ((m->m_flags & M_PKTHDR) != 0)
- dstifp = m->m_pkthdr.rcvif;
-#endif
-
/* jumbo payload can't contain a fragment header */
if (ip6->ip6_plen == 0) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
@@ -539,8 +534,8 @@ insert:
frag6_deq(af6);
while (t->m_next)
t = t->m_next;
- t->m_next = IP6_REASS_MBUF(af6);
- m_adj(t->m_next, af6->ip6af_offset);
+ m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
+ m_cat(t, IP6_REASS_MBUF(af6));
free(af6, M_FTABLE);
af6 = af6dwn;
}
@@ -557,27 +552,16 @@ insert:
*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
#endif
- /* Delete frag6 header */
- if (m->m_len >= offset + sizeof(struct ip6_frag)) {
- /* This is the only possible case with !PULLDOWN_TEST */
- ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
- offset);
- m->m_data += sizeof(struct ip6_frag);
- m->m_len -= sizeof(struct ip6_frag);
- } else {
- /* this comes with no copy if the boundary is on cluster */
- if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
+ frag6_remque(q6);
+ V_frag6_nfrags -= q6->ip6q_nfrag;
#ifdef MAC
- mac_ip6q_destroy(q6);
+ mac_ip6q_destroy(q6);
#endif
- free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
- goto dropfrag;
- }
- m_adj(t, sizeof(struct ip6_frag));
- m_cat(m, t);
+ free(q6, M_FTABLE);
+ V_frag6_nfragpackets--;
+
+ goto dropfrag;
}
/*
@@ -604,9 +588,31 @@ insert:
m->m_pkthdr.len = plen;
}
+#ifdef RSS
+ mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
+ M_NOWAIT);
+ if (mtag == NULL)
+ goto dropfrag;
+
+ ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
+ ip6dc->ip6dc_nxt = nxt;
+ ip6dc->ip6dc_off = offset;
+
+ m_tag_prepend(m, mtag);
+#endif
+
+ IP6Q_UNLOCK();
IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
+#ifdef RSS
+ /*
+ * Queue/dispatch for reprocessing.
+ */
+ netisr_dispatch(NETISR_IPV6_DIRECT, m);
+ return IPPROTO_DONE;
+#endif
+
/*
* Tell launch routine the next header
*/
@@ -614,7 +620,6 @@ insert:
*mp = m;
*offp = offset;
- IP6Q_UNLOCK();
return nxt;
dropfrag:
@@ -791,3 +796,27 @@ frag6_drain(void)
IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
+
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
+{
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct mbuf *t;
+
+ /* Delete frag6 header. */
+ if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+ /* This is the only possible case with !PULLDOWN_TEST. */
+ bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
+ offset);
+ m->m_data += sizeof(struct ip6_frag);
+ m->m_len -= sizeof(struct ip6_frag);
+ } else {
+ /* This comes with no copy if the boundary is on cluster. */
+ if ((t = m_split(m, offset, wait)) == NULL)
+ return (ENOMEM);
+ m_adj(t, sizeof(struct ip6_frag));
+ m_cat(m, t);
+ }
+
+ return (0);
+}
diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c
index 569b5dfa..6e3a4873 100644
--- a/freebsd/sys/netinet6/icmp6.c
+++ b/freebsd/sys/netinet6/icmp6.c
@@ -65,9 +65,10 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#define MBUF_PRIVATE /* XXXRW: Optimisation tries to avoid M_EXT mbufs */
+
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/sys/param.h>
#include <sys/domain.h>
@@ -87,6 +88,7 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
@@ -100,6 +102,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <netinet/tcp_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6protosw.h>
@@ -109,14 +112,14 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#include <netinet6/send.h>
-#ifdef IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
-
extern struct domain inet6domain;
-VNET_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_SYSINIT(icmp6stat);
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
+#endif /* VIMAGE */
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
VNET_DECLARE(struct inpcbhead, ripcb);
@@ -157,7 +160,7 @@ void
kmod_icmp6stat_inc(int statnum)
{
- (*((u_quad_t *)&V_icmp6stat + statnum))++;
+ counter_u64_add(VNET(icmp6stat)[statnum], 1);
}
static void
@@ -362,7 +365,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
- M_PREPEND(m, preplen, M_DONTWAIT); /* FIB is also copied over. */
+ M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */
if (m == NULL) {
nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
return;
@@ -474,22 +477,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto freeit;
}
- if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
- /*
- * Deliver very specific ICMP6 type only.
- * This is important to deliver TOOBIG. Otherwise PMTUD
- * will not work.
- */
- switch (icmp6->icmp6_type) {
- case ICMP6_DST_UNREACH:
- case ICMP6_PACKET_TOO_BIG:
- case ICMP6_TIME_EXCEEDED:
- break;
- default:
- goto freeit;
- }
- }
-
ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
icmp6_ifstat_inc(ifp, ifs6_in_msg);
if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
@@ -500,15 +487,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
+ case ICMP6_DST_UNREACH_ADDR: /* PRC_HOSTDEAD is a DOS */
code = PRC_UNREACH_NET;
break;
case ICMP6_DST_UNREACH_ADMIN:
icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
break;
- case ICMP6_DST_UNREACH_ADDR:
- code = PRC_HOSTDEAD;
- break;
case ICMP6_DST_UNREACH_BEYONDSCOPE:
/* I mean "source address was incorrect." */
code = PRC_PARAMPROB;
@@ -575,28 +560,21 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
/* Give up remote */
break;
}
- if ((n->m_flags & M_EXT) != 0
+ if (!M_WRITABLE(n)
|| n->m_len < off + sizeof(struct icmp6_hdr)) {
struct mbuf *n0 = n;
- const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
int n0len;
- MGETHDR(n, M_DONTWAIT, n0->m_type);
- n0len = n0->m_pkthdr.len; /* save for use below */
- if (n)
- M_MOVE_PKTHDR(n, n0); /* FIB copied. */
- if (n && maxlen >= MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
+ CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
+ n = m_gethdr(M_NOWAIT, n0->m_type);
if (n == NULL) {
/* Give up remote */
m_freem(n0);
break;
}
+
+ m_move_pkthdr(n, n0); /* FIB copied. */
+ n0len = n0->m_pkthdr.len; /* save for use below */
/*
* Copy IPv6 and ICMPv6 only.
*/
@@ -683,31 +661,27 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
} else {
struct prison *pr;
u_char *p;
- int maxlen, maxhlen, hlen;
+ int maxhlen, hlen;
/*
* XXX: this combination of flags is pointless,
* but should we keep this for compatibility?
*/
- if ((V_icmp6_nodeinfo & 5) != 5)
+ if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
+ ICMP6_NODEINFO_TMPADDROK)) !=
+ (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
break;
if (code != 0)
goto badcode;
- maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
- if (maxlen >= MCLBYTES) {
+
+ CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
+ n = m_gethdr(M_NOWAIT, m->m_type);
+ if (n == NULL) {
/* Give up remote */
break;
}
- MGETHDR(n, M_DONTWAIT, m->m_type);
- if (n && maxlen > MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
- if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) {
+ if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
/*
* Previous code did a blind M_COPY_PKTHDR
* and said "just for rcvif". If true, then
@@ -718,13 +692,8 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
m_free(n);
n = NULL;
}
- if (n == NULL) {
- /* Give up remote */
- break;
- }
- n->m_pkthdr.rcvif = NULL;
- n->m_len = 0;
- maxhlen = M_TRAILINGSPACE(n) - maxlen;
+ maxhlen = M_TRAILINGSPACE(n) -
+ (sizeof(*nip6) + sizeof(*nicmp6) + 4);
#ifndef __rtems__
pr = curthread->td_ucred->cr_prison;
#else /* __rtems__ */
@@ -771,7 +740,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_router_solicit))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* give up local */
/* Send incoming SeND packet to user space. */
@@ -809,7 +778,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_router_advert))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* Send incoming SeND-protected/ND packet to user space. */
if (send_sendso_input_hook != NULL) {
@@ -840,7 +809,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_solicit))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
if (send_sendso_input_hook != NULL) {
error = send_sendso_input_hook(m, ifp,
SND_IN, ip6len);
@@ -869,7 +838,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_advert))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* Send incoming SeND-protected/ND packet to user space. */
if (send_sendso_input_hook != NULL) {
@@ -900,7 +869,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_redirect))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
if (send_sendso_input_hook != NULL) {
error = send_sendso_input_hook(m, ifp,
SND_IN, ip6len);
@@ -1181,8 +1150,6 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
ip6cp.ip6c_src = &icmp6src;
ip6cp.ip6c_nxt = nxt;
- m_addr_changed(m);
-
if (icmp6type == ICMP6_PACKET_TOO_BIG) {
notifymtu = ntohl(icmp6->icmp6_mtu);
ip6cp.ip6c_cmdarg = (void *)&notifymtu;
@@ -1329,7 +1296,8 @@ ni6_input(struct mbuf *m, int off)
goto bad;
/* else it's a link-local multicast, fine */
} else { /* unicast or anycast */
- if ((ia6 = ip6_getdstifaddr(m)) == NULL)
+ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia6 == NULL)
goto bad; /* XXX impossible */
if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
@@ -1505,26 +1473,23 @@ ni6_input(struct mbuf *m, int off)
break;
}
- /* allocate an mbuf to reply. */
- MGETHDR(n, M_DONTWAIT, m->m_type);
+ /* Allocate an mbuf to reply. */
+ if (replylen > MCLBYTES) {
+ /*
+ * XXX: should we try to allocate more? But MCLBYTES
+ * is probably much larger than IPV6_MMTU...
+ */
+ goto bad;
+ }
+ if (replylen > MHLEN)
+ n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
+ else
+ n = m_gethdr(M_NOWAIT, m->m_type);
if (n == NULL) {
m_freem(m);
return (NULL);
}
- M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */
- if (replylen > MHLEN) {
- if (replylen > MCLBYTES) {
- /*
- * XXX: should we try to allocate more? But MCLBYTES
- * is probably much larger than IPV6_MMTU...
- */
- goto bad;
- }
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- goto bad;
- }
- }
+ m_move_pkthdr(n, m); /* just for recvif and FIB */
n->m_pkthdr.len = n->m_len = replylen;
/* copy mbuf header and IPv6 + Node Information base headers */
@@ -1623,16 +1588,13 @@ ni6_nametodns(const char *name, int namelen, int old)
else
len = MCLBYTES;
- /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
- MGET(m, M_DONTWAIT, MT_DATA);
- if (m && len > MLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0)
- goto fail;
- }
- if (!m)
+ /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
+ if (len > MLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
goto fail;
- m->m_next = NULL;
if (old) {
m->m_len = len;
@@ -1793,7 +1755,7 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
}
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
addrsofif = 0;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -1880,7 +1842,7 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
again:
- for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
+ for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) {
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
@@ -1965,8 +1927,8 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
ltime = ND6_INFINITE_LIFETIME;
else {
if (ifa6->ia6_lifetime.ia6t_expire >
- time_second)
- ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
+ time_uptime)
+ ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
else
ltime = 0;
}
@@ -2078,7 +2040,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
*/
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
m->m_len <= MHLEN) {
- MGET(n, M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data,
@@ -2128,7 +2090,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
m->m_len <= MHLEN) {
struct mbuf *n;
- MGET(n, M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data, m->m_len);
@@ -2166,13 +2128,13 @@ icmp6_rip6_input(struct mbuf **mp, int off)
void
icmp6_reflect(struct mbuf *m, size_t off)
{
+ struct in6_addr src6, *srcp;
struct ip6_hdr *ip6;
struct icmp6_hdr *icmp6;
struct in6_ifaddr *ia = NULL;
- int plen;
- int type, code;
struct ifnet *outif = NULL;
- struct in6_addr origdst, src, *srcp = NULL;
+ int plen;
+ int type, code, hlim;
/* too short to reflect */
if (off < sizeof(struct ip6_hdr)) {
@@ -2218,13 +2180,8 @@ icmp6_reflect(struct mbuf *m, size_t off)
icmp6 = (struct icmp6_hdr *)(ip6 + 1);
type = icmp6->icmp6_type; /* keep type for statistics */
code = icmp6->icmp6_code; /* ditto. */
-
- origdst = ip6->ip6_dst;
- /*
- * ip6_input() drops a packet if its src is multicast.
- * So, the src is never multicast.
- */
- ip6->ip6_dst = ip6->ip6_src;
+ hlim = 0;
+ srcp = NULL;
/*
* If the incoming packet was addressed directly to us (i.e. unicast),
@@ -2232,74 +2189,59 @@ icmp6_reflect(struct mbuf *m, size_t off)
* The IN6_IFF_NOTREADY case should be VERY rare, but is possible
* (for example) when we encounter an error while forwarding procedure
* destined to a duplicated address of ours.
- * Note that ip6_getdstifaddr() may fail if we are in an error handling
- * procedure of an outgoing packet of our own, in which case we need
- * to search in the ifaddr list.
*/
- if (!IN6_IS_ADDR_MULTICAST(&origdst)) {
- if ((ia = ip6_getdstifaddr(m))) {
- if (!(ia->ia6_flags &
- (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)))
- srcp = &ia->ia_addr.sin6_addr;
- } else {
- struct sockaddr_in6 d;
-
- bzero(&d, sizeof(d));
- d.sin6_family = AF_INET6;
- d.sin6_len = sizeof(d);
- d.sin6_addr = origdst;
- ia = (struct in6_ifaddr *)
- ifa_ifwithaddr((struct sockaddr *)&d);
- if (ia &&
- !(ia->ia6_flags &
- (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
- srcp = &ia->ia_addr.sin6_addr;
- }
+ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia != NULL && !(ia->ia6_flags &
+ (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
+ src6 = ia->ia_addr.sin6_addr;
+ srcp = &src6;
+
+ if (m->m_pkthdr.rcvif != NULL) {
+ /* XXX: This may not be the outgoing interface */
+ hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
+ } else
+ hlim = V_ip6_defhlim;
}
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
}
if (srcp == NULL) {
- int e;
- struct sockaddr_in6 sin6;
- struct route_in6 ro;
+ int error;
+ struct in6_addr dst6;
+ uint32_t scopeid;
/*
* This case matches to multicasts, our anycast, or unicasts
* that we do not own. Select a source address based on the
* source address of the erroneous packet.
*/
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(sin6);
- sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */
-
- bzero(&ro, sizeof(ro));
- e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &src);
- if (ro.ro_rt)
- RTFREE(ro.ro_rt); /* XXX: we could use this */
- if (e) {
+ in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
+ error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+ scopeid, NULL, &src6, &hlim);
+
+ if (error) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG,
"icmp6_reflect: source can't be determined: "
"dst=%s, error=%d\n",
- ip6_sprintf(ip6buf, &sin6.sin6_addr), e));
+ ip6_sprintf(ip6buf, &ip6->ip6_dst), error));
goto bad;
}
- srcp = &src;
+ srcp = &src6;
}
-
+ /*
+ * ip6_input() drops a packet if its src is multicast.
+ * So, the src is never multicast.
+ */
+ ip6->ip6_dst = ip6->ip6_src;
ip6->ip6_src = *srcp;
ip6->ip6_flow = 0;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_ICMPV6;
- if (outif)
- ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
- else if (m->m_pkthdr.rcvif) {
- /* XXX: This may not be the outgoing interface */
- ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
- } else
- ip6->ip6_hlim = V_ip6_defhlim;
+ ip6->ip6_hlim = hlim;
icmp6->icmp6_cksum = 0;
icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
@@ -2311,19 +2253,13 @@ icmp6_reflect(struct mbuf *m, size_t off)
m->m_flags &= ~(M_BCAST|M_MCAST);
- m_addr_changed(m);
-
ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
if (outif)
icmp6_ifoutstat_inc(outif, type, code);
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
return;
bad:
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
m_freem(m);
return;
}
@@ -2365,7 +2301,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
int icmp6len = ntohs(ip6->ip6_plen);
char *lladdr = NULL;
int lladdrlen = 0;
- struct rtentry *rt = NULL;
int is_router;
int is_onlink;
struct in6_addr src6 = ip6->ip6_src;
@@ -2420,18 +2355,13 @@ icmp6_redirect_input(struct mbuf *m, int off)
}
{
/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
- struct sockaddr_in6 sin6;
- struct in6_addr *gw6;
-
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB);
- if (rt) {
- if (rt->rt_gateway == NULL ||
- rt->rt_gateway->sa_family != AF_INET6) {
- RTFREE_LOCKED(rt);
+ struct nhop6_basic nh6;
+ struct in6_addr kdst;
+ uint32_t scopeid;
+
+ in6_splitscope(&reddst6, &kdst, &scopeid);
+ if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){
+ if ((nh6.nh_flags & NHF_GATEWAY) == 0) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; no route "
"with inet6 gateway found for redirect dst: %s\n",
@@ -2439,14 +2369,12 @@ icmp6_redirect_input(struct mbuf *m, int off)
goto bad;
}
- gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
- if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
- RTFREE_LOCKED(rt);
+ if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; "
"not equal to gw-for-src=%s (must be same): "
"%s\n",
- ip6_sprintf(ip6buf, gw6),
+ ip6_sprintf(ip6buf, &nh6.nh_addr),
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
@@ -2457,8 +2385,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
- RTFREE_LOCKED(rt);
- rt = NULL;
}
if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
nd6log((LOG_ERR,
@@ -2480,7 +2406,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
- /* validation passed */
icmp6len -= sizeof(*nd_rd);
nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
@@ -2505,31 +2430,45 @@ icmp6_redirect_input(struct mbuf *m, int off)
goto bad;
}
+ /* Validation passed. */
+
/* RFC 2461 8.3 */
nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
- if (!is_onlink) { /* better router case. perform rtredirect. */
- /* perform rtredirect */
+ /*
+ * Install a gateway route in the better-router case or an interface
+ * route in the on-link-destination case.
+ */
+ {
struct sockaddr_in6 sdst;
struct sockaddr_in6 sgw;
struct sockaddr_in6 ssrc;
+ struct sockaddr *gw;
+ int rt_flags;
u_int fibnum;
bzero(&sdst, sizeof(sdst));
- bzero(&sgw, sizeof(sgw));
bzero(&ssrc, sizeof(ssrc));
- sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
- sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
- sizeof(struct sockaddr_in6);
- bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
+ sdst.sin6_family = ssrc.sin6_family = AF_INET6;
+ sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
+ rt_flags = RTF_HOST;
+ if (is_router) {
+ bzero(&sgw, sizeof(sgw));
+ sgw.sin6_family = AF_INET6;
+ sgw.sin6_len = sizeof(struct sockaddr_in6);
+ bcopy(&redtgt6, &sgw.sin6_addr,
+ sizeof(struct in6_addr));
+ gw = (struct sockaddr *)&sgw;
+ rt_flags |= RTF_GATEWAY;
+ } else
+ gw = ifp->if_addr->ifa_addr;
for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
- in6_rtredirect((struct sockaddr *)&sdst,
- (struct sockaddr *)&sgw, (struct sockaddr *)NULL,
- RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc,
- fibnum);
+ in6_rtredirect((struct sockaddr *)&sdst, gw,
+ (struct sockaddr *)NULL, rt_flags,
+ (struct sockaddr *)&ssrc, fibnum);
}
/* finally update cached route in each socket via pfctlinput */
{
@@ -2540,9 +2479,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
sdst.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
-#ifdef IPSEC
- key_sa_routechange((struct sockaddr *)&sdst);
-#endif /* IPSEC */
}
freeit:
@@ -2609,14 +2545,10 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
#if IPV6_MMTU >= MCLBYTES
# error assumption failed about IPV6_MMTU and MCLBYTES
#endif
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
- if (m && IPV6_MMTU >= MHLEN)
- MCLGET(m, M_DONTWAIT);
- if (!m)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (m == NULL)
goto fail;
M_SETFIB(m, rt->rt_fibnum);
- m->m_pkthdr.rcvif = NULL;
- m->m_len = 0;
maxlen = M_TRAILINGSPACE(m);
maxlen = min(IPV6_MMTU, maxlen);
/* just for safety */
@@ -2711,7 +2643,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_opt->nd_opt_len = len >> 3;
lladdr = (char *)(nd_opt + 1);
- bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen);
+ bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
p += len;
}
}
diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c
index 66888fa8..f5d82524 100644
--- a/freebsd/sys/netinet6/in6.c
+++ b/freebsd/sys/netinet6/in6.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@@ -81,6 +82,8 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/syslog.h>
#include <net/if.h>
@@ -97,6 +100,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
+#include <netinet/ip_carp.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -105,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6_mroute.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/scope6_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_pcb.h>
VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
@@ -135,49 +140,36 @@ const struct in6_addr in6mask128 = IN6MASK128;
const struct sockaddr_in6 sa6_any =
{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
-static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t,
- struct ifnet *, struct thread *);
-static int in6_ifinit(struct ifnet *, struct in6_ifaddr *,
- struct sockaddr_in6 *, int);
+static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *,
+ struct in6_aliasreq *, int);
static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
-int (*faithprefix_p)(struct in6_addr *);
+static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *,
+ struct in6_ifaddr *, int);
+static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *,
+ struct in6_aliasreq *, int flags);
+static int in6_update_ifa_internal(struct ifnet *, struct in6_aliasreq *,
+ struct in6_ifaddr *, int, int);
+static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *,
+ struct in6_ifaddr *, int);
#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa))
#define ia62ifa(ia6) (&((ia6)->ia_ifa))
+
void
-in6_ifaddloop(struct ifaddr *ifa)
+in6_newaddrmsg(struct in6_ifaddr *ia, int cmd)
{
struct sockaddr_dl gateway;
struct sockaddr_in6 mask, addr;
struct rtentry rt;
- struct in6_ifaddr *ia;
- struct ifnet *ifp;
- struct llentry *ln;
-
- ia = ifa2ia6(ifa);
- ifp = ifa->ifa_ifp;
- IF_AFDATA_LOCK(ifp);
- ifa->ifa_rtrequest = nd6_rtrequest;
- ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR |
- LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr);
- IF_AFDATA_UNLOCK(ifp);
- if (ln != NULL) {
- ln->la_expire = 0; /* for IPv6 this means permanent */
- ln->ln_state = ND6_LLINFO_REACHABLE;
- /*
- * initialize for rtmsg generation
- */
- bzero(&gateway, sizeof(gateway));
- gateway.sdl_len = sizeof(gateway);
- gateway.sdl_family = AF_LINK;
- gateway.sdl_nlen = 0;
- gateway.sdl_alen = 6;
- memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned,
- sizeof(ln->ll_addr));
- LLE_WUNLOCK(ln);
- }
+
+ /*
+ * initialize for rtmsg generation
+ */
+ bzero(&gateway, sizeof(gateway));
+ gateway.sdl_len = sizeof(gateway);
+ gateway.sdl_family = AF_LINK;
bzero(&rt, sizeof(rt));
rt.rt_gateway = (struct sockaddr *)&gateway;
@@ -185,42 +177,11 @@ in6_ifaddloop(struct ifaddr *ifa)
memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
rt_mask(&rt) = (struct sockaddr *)&mask;
rt_key(&rt) = (struct sockaddr *)&addr;
- rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC;
+ rt.rt_flags = RTF_HOST | RTF_STATIC;
+ if (cmd == RTM_ADD)
+ rt.rt_flags |= RTF_UP;
/* Announce arrival of local address to all FIBs. */
- rt_newaddrmsg(RTM_ADD, ifa, 0, &rt);
-}
-
-void
-in6_ifremloop(struct ifaddr *ifa)
-{
- struct sockaddr_dl gateway;
- struct sockaddr_in6 mask, addr;
- struct rtentry rt0;
- struct in6_ifaddr *ia;
- struct ifnet *ifp;
-
- ia = ifa2ia6(ifa);
- ifp = ifa->ifa_ifp;
- memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
- memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
- lltable_prefix_free(AF_INET6, (struct sockaddr *)&addr,
- (struct sockaddr *)&mask, LLE_STATIC);
-
- /*
- * initialize for rtmsg generation
- */
- bzero(&gateway, sizeof(gateway));
- gateway.sdl_len = sizeof(gateway);
- gateway.sdl_family = AF_LINK;
- gateway.sdl_nlen = 0;
- gateway.sdl_alen = ifp->if_addrlen;
- bzero(&rt0, sizeof(rt0));
- rt0.rt_gateway = (struct sockaddr *)&gateway;
- rt_mask(&rt0) = (struct sockaddr *)&mask;
- rt_key(&rt0) = (struct sockaddr *)&addr;
- rt0.rt_flags = RTF_HOST | RTF_STATIC;
- /* Announce removal of local address to all FIBs. */
- rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0);
+ rt_newaddrmsg(cmd, &ia->ia_ifa, 0, &rt);
}
int
@@ -275,7 +236,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
struct in6_ifaddr *ia = NULL;
struct in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
+ int carp_attached = 0;
int error;
+ u_long ocmd = cmd;
+
+ /*
+ * Compat to make pre-10.x ifconfig(8) operable.
+ */
+ if (cmd == OSIOCAIFADDR_IN6)
+ cmd = SIOCAIFADDR_IN6;
switch (cmd) {
case SIOCGETSGCNT_IN6:
@@ -317,8 +286,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
/* FALLTHROUGH */
case OSIOCGIFINFO_IN6:
case SIOCGIFINFO_IN6:
- case SIOCGDRLST_IN6:
- case SIOCGPRLST_IN6:
case SIOCGNBRINFO_IN6:
case SIOCGDEFIFACE_IN6:
return (nd6_ioctl(cmd, data, ifp));
@@ -366,26 +333,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
return (scope6_ioctl(cmd, data, ifp));
}
- switch (cmd) {
- case SIOCALIFADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_ADDIFADDR);
- if (error)
- return (error);
- }
- return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
-
- case SIOCDLIFADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_DELIFADDR);
- if (error)
- return (error);
- }
- /* FALLTHROUGH */
- case SIOCGLIFADDR:
- return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
- }
-
/*
* Find address for this interface, if it exists.
*
@@ -417,7 +364,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
case SIOCSPFXFLUSH_IN6:
case SIOCSRTRFLUSH_IN6:
case SIOCGIFALIFETIME_IN6:
- case SIOCSIFALIFETIME_IN6:
case SIOCGIFSTAT_IN6:
case SIOCGIFSTAT_ICMP6:
sa6 = &ifr->ifr_addr;
@@ -516,34 +462,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
goto out;
}
break;
-
- case SIOCSIFALIFETIME_IN6:
- {
- struct in6_addrlifetime *lt;
-
- if (td != NULL) {
- error = priv_check(td, PRIV_NETINET_ALIFETIME6);
- if (error)
- goto out;
- }
- if (ia == NULL) {
- error = EADDRNOTAVAIL;
- goto out;
- }
- /* sanity for overflow - beware unsigned */
- lt = &ifr->ifr_ifru.ifru_lifetime;
- if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
- lt->ia6t_vltime + time_second < time_second) {
- error = EINVAL;
- goto out;
- }
- if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
- lt->ia6t_pltime + time_second < time_second) {
- error = EINVAL;
- goto out;
- }
- break;
- }
}
switch (cmd) {
@@ -576,17 +494,17 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
break;
case SIOCGIFSTAT_IN6:
- bzero(&ifr->ifr_ifru.ifru_stat,
- sizeof(ifr->ifr_ifru.ifru_stat));
- ifr->ifr_ifru.ifru_stat =
- *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat;
+ COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+ ifp->if_afdata[AF_INET6])->in6_ifstat,
+ &ifr->ifr_ifru.ifru_stat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t));
break;
case SIOCGIFSTAT_ICMP6:
- bzero(&ifr->ifr_ifru.ifru_icmp6stat,
- sizeof(ifr->ifr_ifru.ifru_icmp6stat));
- ifr->ifr_ifru.ifru_icmp6stat =
- *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat;
+ COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+ ifp->if_afdata[AF_INET6])->icmp6_ifstat,
+ &ifr->ifr_ifru.ifru_icmp6stat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
break;
case SIOCGIFALIFETIME_IN6:
@@ -629,24 +547,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
}
break;
- case SIOCSIFALIFETIME_IN6:
- ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime;
- /* for sanity */
- if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
- ia->ia6_lifetime.ia6t_expire =
- time_second + ia->ia6_lifetime.ia6t_vltime;
- } else
- ia->ia6_lifetime.ia6t_expire = 0;
- if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
- ia->ia6_lifetime.ia6t_preferred =
- time_second + ia->ia6_lifetime.ia6t_pltime;
- } else
- ia->ia6_lifetime.ia6t_preferred = 0;
- break;
-
case SIOCAIFADDR_IN6:
{
- int i;
struct nd_prefixctl pr0;
struct nd_prefix *pr;
@@ -667,6 +569,18 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
break;
}
+ if (cmd == ocmd && ifra->ifra_vhid > 0) {
+ if (carp_attach_p != NULL)
+ error = (*carp_attach_p)(&ia->ia_ifa,
+ ifra->ifra_vhid);
+ else
+ error = EPROTONOSUPPORT;
+ if (error)
+ goto out;
+ else
+ carp_attached = 1;
+ }
+
/*
* then, make the prefix on-link on the interface.
* XXX: we'd rather create the prefix before the address, but
@@ -683,14 +597,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
NULL);
if (pr0.ndpr_plen == 128) {
- break; /* we don't need to install a host route. */
+ /* we don't need to install a host route. */
+ goto aifaddr_out;
}
pr0.ndpr_prefix = ifra->ifra_addr;
/* apply the mask for safety. */
- for (i = 0; i < 4; i++) {
- pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
- ifra->ifra_prefixmask.sin6_addr.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr,
+ &ifra->ifra_prefixmask.sin6_addr);
+
/*
* XXX: since we don't have an API to set prefix (not address)
* lifetimes, we just use the same lifetimes as addresses.
@@ -710,12 +624,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
* nd6_prelist_add will install the corresponding
* interface route.
*/
- if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
- goto out;
- if (pr == NULL) {
- log(LOG_ERR, "nd6_prelist_add succeeded but "
- "no prefix\n");
- error = EINVAL;
+ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
+ if (carp_attached)
+ (*carp_detach_p)(&ia->ia_ifa);
goto out;
}
}
@@ -746,32 +657,28 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
* that is, this address might make other addresses detached.
*/
pfxlist_onlink_check();
- if (error == 0 && ia) {
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
- /*
- * Try to clear the flag when a new
- * IPv6 address is added onto an
- * IFDISABLED interface and it
- * succeeds.
- */
- struct in6_ndireq nd;
-
- memset(&nd, 0, sizeof(nd));
- nd.ndi.flags = ND_IFINFO(ifp)->flags;
- nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
- if (nd6_ioctl(SIOCSIFINFO_FLAGS,
- (caddr_t)&nd, ifp) < 0)
- log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
- "SIOCSIFINFO_FLAGS for -ifdisabled "
- "failed.");
- /*
- * Ignore failure of clearing the flag
- * intentionally. The failure means
- * address duplication was detected.
- */
- }
- EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+
+aifaddr_out:
+ /*
+ * Try to clear the flag when a new IPv6 address is added
+ * onto an IFDISABLED interface and it succeeds.
+ */
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ struct in6_ndireq nd;
+
+ memset(&nd, 0, sizeof(nd));
+ nd.ndi.flags = ND_IFINFO(ifp)->flags;
+ nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
+ if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0)
+ log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
+ "SIOCSIFINFO_FLAGS for -ifdisabled "
+ "failed.");
+ /*
+ * Ignore failure of clearing the flag intentionally.
+ * The failure means address duplication was detected.
+ */
}
+ EVENTHANDLER_INVOKE(ifaddr_event, ifp);
break;
}
@@ -823,27 +730,24 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol)
{
char ip6buf[INET6_ADDRSTRLEN];
- struct sockaddr_in6 mltaddr, mltmask;
- struct in6_addr llsol;
+ struct in6_addr mltaddr;
struct in6_multi_mship *imm;
- struct rtentry *rt;
int delay, error;
KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__));
/* Join solicited multicast addr for new host id. */
- bzero(&llsol, sizeof(struct in6_addr));
- llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
- llsol.s6_addr32[1] = 0;
- llsol.s6_addr32[2] = htonl(1);
- llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
- llsol.s6_addr8[12] = 0xff;
- if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) {
+ bzero(&mltaddr, sizeof(struct in6_addr));
+ mltaddr.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
+ mltaddr.s6_addr32[2] = htonl(1);
+ mltaddr.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
+ mltaddr.s6_addr8[12] = 0xff;
+ if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) {
/* XXX: should not happen */
log(LOG_ERR, "%s: in6_setscope failed\n", __func__);
goto cleanup;
}
- delay = 0;
+ delay = error = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
/*
* We need a random delay for DAD on the address being
@@ -853,62 +757,28 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
*/
delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
}
- imm = in6_joingroup(ifp, &llsol, &error, delay);
+ imm = in6_joingroup(ifp, &mltaddr, &error, delay);
if (imm == NULL) {
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
- "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &llsol),
+ nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
+ "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
*in6m_sol = imm->i6mm_maddr;
- bzero(&mltmask, sizeof(mltmask));
- mltmask.sin6_len = sizeof(struct sockaddr_in6);
- mltmask.sin6_family = AF_INET6;
- mltmask.sin6_addr = in6mask32;
-#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */
-
/*
* Join link-local all-nodes address.
*/
- bzero(&mltaddr, sizeof(mltaddr));
- mltaddr.sin6_len = sizeof(struct sockaddr_in6);
- mltaddr.sin6_family = AF_INET6;
- mltaddr.sin6_addr = in6addr_linklocal_allnodes;
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
+ mltaddr = in6addr_linklocal_allnodes;
+ if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
goto cleanup; /* XXX: should not fail */
- /*
- * XXX: do we really need this automatic routes? We should probably
- * reconsider this stuff. Most applications actually do not need the
- * routes, since they usually specify the outgoing interface.
- */
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL) {
- /* XXX: only works in !SCOPEDROUTING case. */
- if (memcmp(&mltaddr.sin6_addr,
- &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
- MLTMASK_LEN)) {
- RTFREE_LOCKED(rt);
- rt = NULL;
- }
- }
- if (rt == NULL) {
- error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- goto cleanup;
- } else
- RTFREE_LOCKED(rt);
-
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
+ imm = in6_joingroup(ifp, &mltaddr, &error, 0);
if (imm == NULL) {
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
- "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
+ "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
+ if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
@@ -924,24 +794,26 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
*/
delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
}
- if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
+ if (in6_nigroup(ifp, NULL, -1, &mltaddr) == 0) {
/* XXX jinmei */
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay);
+ imm = in6_joingroup(ifp, &mltaddr, &error, delay);
if (imm == NULL)
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ nd6log((LOG_WARNING,
+ "%s: in6_joingroup failed for %s on %s "
"(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ &mltaddr), if_name(ifp), error));
/* XXX not very fatal, go on... */
else
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
}
- if (V_icmp6_nodeinfo_oldmcprefix &&
- in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay);
+ if (V_icmp6_nodeinfo_oldmcprefix &&
+ in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr) == 0) {
+ imm = in6_joingroup(ifp, &mltaddr, &error, delay);
if (imm == NULL)
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ nd6log((LOG_WARNING,
+ "%s: in6_joingroup failed for %s on %s "
"(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ &mltaddr), if_name(ifp), error));
/* XXX not very fatal, go on... */
else
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
@@ -951,38 +823,18 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
* Join interface-local all-nodes address.
* (ff01::1%ifN, and ff01::%ifN/32)
*/
- mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
+ mltaddr = in6addr_nodelocal_allnodes;
+ if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
goto cleanup; /* XXX: should not fail */
- /* XXX: again, do we really need the route? */
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL) {
- if (memcmp(&mltaddr.sin6_addr,
- &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
- MLTMASK_LEN)) {
- RTFREE_LOCKED(rt);
- rt = NULL;
- }
- }
- if (rt == NULL) {
- error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- goto cleanup;
- } else
- RTFREE_LOCKED(rt);
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
+ imm = in6_joingroup(ifp, &mltaddr, &error, 0);
if (imm == NULL) {
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
"(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ &mltaddr), if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
-#undef MLTMASK_LEN
cleanup:
return (error);
@@ -992,17 +844,65 @@ cleanup:
* Update parameters of an IPv6 interface address.
* If necessary, a new entry is created and linked into address chains.
* This function is separated from in6_control().
- * XXX: should this be performed under splnet()?
*/
int
in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
struct in6_ifaddr *ia, int flags)
{
- int error = 0, hostIsNew = 0, plen = -1;
+ int error, hostIsNew = 0;
+
+ if ((error = in6_validate_ifra(ifp, ifra, ia, flags)) != 0)
+ return (error);
+
+ if (ia == NULL) {
+ hostIsNew = 1;
+ if ((ia = in6_alloc_ifa(ifp, ifra, flags)) == NULL)
+ return (ENOBUFS);
+ }
+
+ error = in6_update_ifa_internal(ifp, ifra, ia, hostIsNew, flags);
+ if (error != 0) {
+ if (hostIsNew != 0) {
+ in6_unlink_ifa(ia, ifp);
+ ifa_free(&ia->ia_ifa);
+ }
+ return (error);
+ }
+
+ if (hostIsNew)
+ error = in6_broadcast_ifa(ifp, ifra, ia, flags);
+
+ return (error);
+}
+
+/*
+ * Fill in basic IPv6 address request info.
+ */
+void
+in6_prepare_ifra(struct in6_aliasreq *ifra, const struct in6_addr *addr,
+ const struct in6_addr *mask)
+{
+
+ memset(ifra, 0, sizeof(struct in6_aliasreq));
+
+ ifra->ifra_addr.sin6_family = AF_INET6;
+ ifra->ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
+ if (addr != NULL)
+ ifra->ifra_addr.sin6_addr = *addr;
+
+ ifra->ifra_prefixmask.sin6_family = AF_INET6;
+ ifra->ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
+ if (mask != NULL)
+ ifra->ifra_prefixmask.sin6_addr = *mask;
+}
+
+static int
+in6_validate_ifra(struct ifnet *ifp, struct in6_aliasreq *ifra,
+ struct in6_ifaddr *ia, int flags)
+{
+ int plen = -1;
struct sockaddr_in6 dst6;
struct in6_addrlifetime *lt;
- struct in6_multi *in6m_sol;
- int delay;
char ip6buf[INET6_ADDRSTRLEN];
/* Validate parameters */
@@ -1017,6 +917,14 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
return (EAFNOSUPPORT);
+
+ /*
+ * Validate address
+ */
+ if (ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6) ||
+ ifra->ifra_addr.sin6_family != AF_INET6)
+ return (EINVAL);
+
/*
* validate ifra_prefixmask. don't check sin6_family, netmask
* does not carry fields other than sin6_len.
@@ -1069,6 +977,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
if (sa6_embedscope(&dst6, 0))
return (EINVAL); /* XXX: should be impossible */
}
+ /* Modify original ifra_dstaddr to reflect changes */
+ ifra->ifra_dstaddr = dst6;
+
/*
* The destination address can be specified only for a p2p or a
* loopback interface. If specified, the corresponding prefix length
@@ -1104,94 +1015,102 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
return (0); /* there's nothing to do */
}
- /*
- * If this is a new address, allocate a new ifaddr and link it
- * into chains.
- */
- if (ia == NULL) {
- hostIsNew = 1;
- /*
- * When in6_update_ifa() is called in a process of a received
- * RA, it is called under an interrupt context. So, we should
- * call malloc with M_NOWAIT.
- */
- ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR,
- M_NOWAIT);
- if (ia == NULL)
- return (ENOBUFS);
- bzero((caddr_t)ia, sizeof(*ia));
- ifa_init(&ia->ia_ifa);
- LIST_INIT(&ia->ia6_memberships);
- /* Initialize the address and masks, and put time stamp */
- ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
- ia->ia_addr.sin6_family = AF_INET6;
- ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
- ia->ia6_createtime = time_second;
- if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
- /*
- * XXX: some functions expect that ifa_dstaddr is not
- * NULL for p2p interfaces.
- */
- ia->ia_ifa.ifa_dstaddr =
- (struct sockaddr *)&ia->ia_dstaddr;
- } else {
- ia->ia_ifa.ifa_dstaddr = NULL;
- }
- ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
- ia->ia_ifp = ifp;
- ifa_ref(&ia->ia_ifa); /* if_addrhead */
- IF_ADDR_WLOCK(ifp);
- TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
- IF_ADDR_WUNLOCK(ifp);
-
- ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */
- IN6_IFADDR_WLOCK();
- TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
- IN6_IFADDR_WUNLOCK();
- }
-
- /* update timestamp */
- ia->ia6_updatetime = time_second;
-
- /* set prefix mask */
- if (ifra->ifra_prefixmask.sin6_len) {
+ /* Check prefix mask */
+ if (ia != NULL && ifra->ifra_prefixmask.sin6_len != 0) {
/*
* We prohibit changing the prefix length of an existing
* address, because
* + such an operation should be rare in IPv6, and
* + the operation would confuse prefix management.
*/
- if (ia->ia_prefixmask.sin6_len &&
+ if (ia->ia_prefixmask.sin6_len != 0 &&
in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
- nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an"
- " existing (%s) address should not be changed\n",
+ nd6log((LOG_INFO, "in6_validate_ifa: the prefix length "
+ "of an existing %s address should not be changed\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
- error = EINVAL;
- goto unlink;
+
+ return (EINVAL);
}
- ia->ia_prefixmask = ifra->ifra_prefixmask;
}
+ return (0);
+}
+
+
+/*
+ * Allocate a new ifaddr and link it into chains.
+ */
+static struct in6_ifaddr *
+in6_alloc_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags)
+{
+ struct in6_ifaddr *ia;
+
/*
- * If a new destination address is specified, scrub the old one and
- * install the new destination. Note that the interface must be
- * p2p or loopback (see the check above.)
+ * When in6_alloc_ifa() is called in a process of a received
+ * RA, it is called under an interrupt context. So, we should
+ * call malloc with M_NOWAIT.
*/
- if (dst6.sin6_family == AF_INET6 &&
- !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
- int e;
+ ia = (struct in6_ifaddr *)ifa_alloc(sizeof(*ia), M_NOWAIT);
+ if (ia == NULL)
+ return (NULL);
+ LIST_INIT(&ia->ia6_memberships);
+ /* Initialize the address and masks, and put time stamp */
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ ia->ia_addr.sin6_family = AF_INET6;
+ ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
+ /* XXX: Can we assign ,sin6_addr and skip the rest? */
+ ia->ia_addr = ifra->ifra_addr;
+ ia->ia6_createtime = time_uptime;
+ if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
+ /*
+ * Some functions expect that ifa_dstaddr is not
+ * NULL for p2p interfaces.
+ */
+ ia->ia_ifa.ifa_dstaddr =
+ (struct sockaddr *)&ia->ia_dstaddr;
+ } else {
+ ia->ia_ifa.ifa_dstaddr = NULL;
+ }
- if ((ia->ia_flags & IFA_ROUTE) != 0 &&
- (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) {
- nd6log((LOG_ERR, "in6_update_ifa: failed to remove "
- "a route to the old destination: %s\n",
- ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
- /* proceed anyway... */
- } else
- ia->ia_flags &= ~IFA_ROUTE;
- ia->ia_dstaddr = dst6;
+ /* set prefix mask if any */
+ ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
+ if (ifra->ifra_prefixmask.sin6_len != 0) {
+ ia->ia_prefixmask.sin6_family = AF_INET6;
+ ia->ia_prefixmask.sin6_len = ifra->ifra_prefixmask.sin6_len;
+ ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr;
}
+ ia->ia_ifp = ifp;
+ ifa_ref(&ia->ia_ifa); /* if_addrhead */
+ IF_ADDR_WLOCK(ifp);
+ TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
+
+ ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */
+ IN6_IFADDR_WLOCK();
+ TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
+ LIST_INSERT_HEAD(IN6ADDR_HASH(&ia->ia_addr.sin6_addr), ia, ia6_hash);
+ IN6_IFADDR_WUNLOCK();
+
+ return (ia);
+}
+
+/*
+ * Update/configure interface address parameters:
+ *
+ * 1) Update lifetime
+ * 2) Update interface metric ad flags
+ * 3) Notify other subsystems
+ */
+static int
+in6_update_ifa_internal(struct ifnet *ifp, struct in6_aliasreq *ifra,
+ struct in6_ifaddr *ia, int hostIsNew, int flags)
+{
+ int error;
+
+ /* update timestamp */
+ ia->ia6_updatetime = time_uptime;
+
/*
* Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred
* to see if the address is deprecated or invalidated, but initialize
@@ -1200,71 +1119,85 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
ia->ia6_lifetime = ifra->ifra_lifetime;
if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_expire =
- time_second + ia->ia6_lifetime.ia6t_vltime;
+ time_uptime + ia->ia6_lifetime.ia6t_vltime;
} else
ia->ia6_lifetime.ia6t_expire = 0;
if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_preferred =
- time_second + ia->ia6_lifetime.ia6t_pltime;
+ time_uptime + ia->ia6_lifetime.ia6t_pltime;
} else
ia->ia6_lifetime.ia6t_preferred = 0;
- /* reset the interface and routing table appropriately. */
- if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0)
- goto unlink;
-
- /*
- * configure address flags.
- */
- ia->ia6_flags = ifra->ifra_flags;
/*
* backward compatibility - if IN6_IFF_DEPRECATED is set from the
* userland, make it deprecated.
*/
if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
ia->ia6_lifetime.ia6t_pltime = 0;
- ia->ia6_lifetime.ia6t_preferred = time_second;
+ ia->ia6_lifetime.ia6t_preferred = time_uptime;
}
+
+ /*
+ * configure address flags.
+ */
+ ia->ia6_flags = ifra->ifra_flags;
+
/*
* Make the address tentative before joining multicast addresses,
* so that corresponding MLD responses would not have a tentative
* source address.
*/
ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */
- if (hostIsNew && in6if_do_dad(ifp))
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
-
- /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
/*
- * We are done if we have simply modified an existing address.
+ * DAD should be performed for an new address or addresses on
+ * an interface with ND6_IFF_IFDISABLED.
*/
- if (!hostIsNew)
- return (error);
+ if (in6if_do_dad(ifp) &&
+ (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)))
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
- /*
- * Beyond this point, we should call in6_purgeaddr upon an error,
- * not just go to unlink.
- */
+ /* notify other subsystems */
+ error = in6_notify_ifa(ifp, ia, ifra, hostIsNew);
+
+ return (error);
+}
+
+/*
+ * Do link-level ifa job:
+ * 1) Add lle entry for added address
+ * 2) Notifies routing socket users about new address
+ * 3) join appropriate multicast group
+ * 4) start DAD if enabled
+ */
+static int
+in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
+ struct in6_ifaddr *ia, int flags)
+{
+ struct in6_multi *in6m_sol;
+ int error = 0;
+
+ /* Add local address to lltable, if necessary (ex. on p2p link). */
+ if ((error = nd6_add_ifa_lle(ia)) != 0) {
+ in6_purgeaddr(&ia->ia_ifa);
+ ifa_free(&ia->ia_ifa);
+ return (error);
+ }
/* Join necessary multicast groups. */
in6m_sol = NULL;
if ((ifp->if_flags & IFF_MULTICAST) != 0) {
error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol);
- if (error)
- goto cleanup;
+ if (error != 0) {
+ in6_purgeaddr(&ia->ia_ifa);
+ ifa_free(&ia->ia_ifa);
+ return (error);
+ }
}
- /*
- * Perform DAD, if needed.
- * XXX It may be of use, if we can administratively disable DAD.
- */
- if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
- (ia->ia6_flags & IN6_IFF_TENTATIVE))
- {
- int mindelay, maxdelay;
+ /* Perform DAD, if the address is TENTATIVE. */
+ if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
+ int delay, mindelay, maxdelay;
delay = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
@@ -1295,159 +1228,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
nd6_dad_start((struct ifaddr *)ia, delay);
}
- KASSERT(hostIsNew, ("in6_update_ifa: !hostIsNew"));
+ in6_newaddrmsg(ia, RTM_ADD);
ifa_free(&ia->ia_ifa);
return (error);
-
- unlink:
- /*
- * XXX: if a change of an existing address failed, keep the entry
- * anyway.
- */
- if (hostIsNew) {
- in6_unlink_ifa(ia, ifp);
- ifa_free(&ia->ia_ifa);
- }
- return (error);
-
- cleanup:
- KASSERT(hostIsNew, ("in6_update_ifa: cleanup: !hostIsNew"));
- ifa_free(&ia->ia_ifa);
- in6_purgeaddr(&ia->ia_ifa);
- return error;
-}
-
-/*
- * Leave multicast groups. Factored out from in6_purgeaddr().
- * This entire work should only be done once, for the default FIB.
- */
-static int
-in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0)
-{
- struct sockaddr_in6 mltaddr, mltmask;
- struct in6_multi_mship *imm;
- struct rtentry *rt;
- struct sockaddr_in6 sin6;
- int error;
-
- /*
- * Leave from multicast groups we have joined for the interface.
- */
- while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
- LIST_REMOVE(imm, i6mm_chain);
- in6_leavegroup(imm);
- }
-
- /*
- * Remove the link-local all-nodes address.
- */
- bzero(&mltmask, sizeof(mltmask));
- mltmask.sin6_len = sizeof(struct sockaddr_in6);
- mltmask.sin6_family = AF_INET6;
- mltmask.sin6_addr = in6mask32;
-
- bzero(&mltaddr, sizeof(mltaddr));
- mltaddr.sin6_len = sizeof(struct sockaddr_in6);
- mltaddr.sin6_family = AF_INET6;
- mltaddr.sin6_addr = in6addr_linklocal_allnodes;
-
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
- return (error);
-
- /*
- * As for the mltaddr above, proactively prepare the sin6 to avoid
- * rtentry un- and re-locking.
- */
- if (ifa0 != NULL) {
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_len = sizeof(sin6);
- sin6.sin6_family = AF_INET6;
- memcpy(&sin6.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr,
- sizeof(sin6.sin6_addr));
- error = in6_setscope(&sin6.sin6_addr, ifa0->ifa_ifp, NULL);
- if (error != 0)
- return (error);
- }
-
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL && rt->rt_gateway != NULL &&
- (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
- &ia->ia_addr.sin6_addr,
- sizeof(ia->ia_addr.sin6_addr)) == 0)) {
- /*
- * If no more IPv6 address exists on this interface then
- * remove the multicast address route.
- */
- if (ifa0 == NULL) {
- memcpy(&mltaddr.sin6_addr,
- &satosin6(rt_key(rt))->sin6_addr,
- sizeof(mltaddr.sin6_addr));
- RTFREE_LOCKED(rt);
- error = in6_rtrequest(RTM_DELETE,
- (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- log(LOG_INFO, "%s: link-local all-nodes "
- "multicast address deletion error\n",
- __func__);
- } else {
- /*
- * Replace the gateway of the route.
- */
- memcpy(rt->rt_gateway, &sin6, sizeof(sin6));
- RTFREE_LOCKED(rt);
- }
- } else {
- if (rt != NULL)
- RTFREE_LOCKED(rt);
- }
-
- /*
- * Remove the node-local all-nodes address.
- */
- mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
- return (error);
-
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL && rt->rt_gateway != NULL &&
- (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
- &ia->ia_addr.sin6_addr,
- sizeof(ia->ia_addr.sin6_addr)) == 0)) {
- /*
- * If no more IPv6 address exists on this interface then
- * remove the multicast address route.
- */
- if (ifa0 == NULL) {
- memcpy(&mltaddr.sin6_addr,
- &satosin6(rt_key(rt))->sin6_addr,
- sizeof(mltaddr.sin6_addr));
-
- RTFREE_LOCKED(rt);
- error = in6_rtrequest(RTM_DELETE,
- (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- log(LOG_INFO, "%s: node-local all-nodes"
- "multicast address deletion error\n",
- __func__);
- } else {
- /*
- * Replace the gateway of the route.
- */
- memcpy(rt->rt_gateway, &sin6, sizeof(sin6));
- RTFREE_LOCKED(rt);
- }
- } else {
- if (rt != NULL)
- RTFREE_LOCKED(rt);
- }
-
- return (0);
}
void
@@ -1455,26 +1238,11 @@ in6_purgeaddr(struct ifaddr *ifa)
{
struct ifnet *ifp = ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
+ struct in6_multi_mship *imm;
int plen, error;
- struct ifaddr *ifa0;
- /*
- * find another IPv6 address as the gateway for the
- * link-local and node-local all-nodes multicast
- * address routes
- */
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) {
- if ((ifa0->ifa_addr->sa_family != AF_INET6) ||
- memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr,
- &ia->ia_addr.sin6_addr, sizeof(struct in6_addr)) == 0)
- continue;
- else
- break;
- }
- if (ifa0 != NULL)
- ifa_ref(ifa0);
- IF_ADDR_RUNLOCK(ifp);
+ if (ifa->ifa_carp)
+ (*carp_detach_p)(ifa);
/*
* Remove the loopback route to the interface address.
@@ -1491,32 +1259,30 @@ in6_purgeaddr(struct ifaddr *ifa)
/* stop DAD processing */
nd6_dad_stop(ifa);
- /* Remove local address entry from lltable. */
- in6_ifremloop(ifa);
-
/* Leave multicast groups. */
- error = in6_purgeaddr_mc(ifp, ia, ifa0);
-
- if (ifa0 != NULL)
- ifa_free(ifa0);
-
+ while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
if ((ia->ia_flags & IFA_ROUTE) && plen == 128) {
error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags |
- (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0);
+ (ia->ia_dstaddr.sin6_family == AF_INET6 ? RTF_HOST : 0));
if (error != 0)
log(LOG_INFO, "%s: err=%d, destination address delete "
"failed\n", __func__, error);
ia->ia_flags &= ~IFA_ROUTE;
}
+ in6_newaddrmsg(ia, RTM_DELETE);
in6_unlink_ifa(ia, ifp);
}
static void
in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
{
- int s = splnet();
+ char ip6buf[INET6_ADDRSTRLEN];
+ int remove_lle;
IF_ADDR_WLOCK(ifp);
TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
@@ -1530,21 +1296,28 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
*/
IN6_IFADDR_WLOCK();
TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
+ LIST_REMOVE(ia, ia6_hash);
IN6_IFADDR_WUNLOCK();
/*
* Release the reference to the base prefix. There should be a
* positive reference.
*/
+ remove_lle = 0;
if (ia->ia6_ndpr == NULL) {
nd6log((LOG_NOTICE,
"in6_unlink_ifa: autoconf'ed address "
- "%p has no prefix\n", ia));
+ "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
} else {
ia->ia6_ndpr->ndpr_refcnt--;
+ /* Do not delete lles within prefix if refcont != 0 */
+ if (ia->ia6_ndpr->ndpr_refcnt == 0)
+ remove_lle = 1;
ia->ia6_ndpr = NULL;
}
+ nd6_rem_ifa_lle(ia, remove_lle);
+
/*
* Also, if the address being removed is autoconf'ed, call
* pfxlist_onlink_check() since the release might affect the status of
@@ -1554,335 +1327,63 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
pfxlist_onlink_check();
}
ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */
- splx(s);
-}
-
-void
-in6_purgeif(struct ifnet *ifp)
-{
- struct ifaddr *ifa, *nifa;
-
- TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
- continue;
- in6_purgeaddr(ifa);
- }
-
- in6_ifdetach(ifp);
}
/*
- * SIOC[GAD]LIFADDR.
- * SIOCGLIFADDR: get first address. (?)
- * SIOCGLIFADDR with IFLR_PREFIX:
- * get first address that matches the specified prefix.
- * SIOCALIFADDR: add the specified address.
- * SIOCALIFADDR with IFLR_PREFIX:
- * add the specified prefix, filling hostid part from
- * the first link-local address. prefixlen must be <= 64.
- * SIOCDLIFADDR: delete the specified address.
- * SIOCDLIFADDR with IFLR_PREFIX:
- * delete the first address that matches the specified prefix.
- * return values:
- * EINVAL on invalid parameters
- * EADDRNOTAVAIL on prefix match failed/specified address not found
- * other values may be returned from in6_ioctl()
- *
- * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64.
- * this is to accomodate address naming scheme other than RFC2374,
- * in the future.
- * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
- * address encoding scheme. (see figure on page 8)
+ * Notifies other subsystems about address change/arrival:
+ * 1) Notifies device handler on the first IPv6 address assignment
+ * 2) Handle routing table changes for P2P links and route
+ * 3) Handle routing table changes for address host route
*/
static int
-in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
- struct ifnet *ifp, struct thread *td)
+in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia,
+ struct in6_aliasreq *ifra, int hostIsNew)
{
- struct if_laddrreq *iflr = (struct if_laddrreq *)data;
+ int error = 0, plen, ifacount = 0;
struct ifaddr *ifa;
- struct sockaddr *sa;
-
- /* sanity checks */
- if (!data || !ifp) {
- panic("invalid argument to in6_lifaddr_ioctl");
- /* NOTREACHED */
- }
-
- switch (cmd) {
- case SIOCGLIFADDR:
- /* address must be specified on GET with IFLR_PREFIX */
- if ((iflr->flags & IFLR_PREFIX) == 0)
- break;
- /* FALLTHROUGH */
- case SIOCALIFADDR:
- case SIOCDLIFADDR:
- /* address must be specified on ADD and DELETE */
- sa = (struct sockaddr *)&iflr->addr;
- if (sa->sa_family != AF_INET6)
- return EINVAL;
- if (sa->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
- /* XXX need improvement */
- sa = (struct sockaddr *)&iflr->dstaddr;
- if (sa->sa_family && sa->sa_family != AF_INET6)
- return EINVAL;
- if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
- break;
- default: /* shouldn't happen */
-#if 0
- panic("invalid cmd to in6_lifaddr_ioctl");
- /* NOTREACHED */
-#else
- return EOPNOTSUPP;
-#endif
- }
- if (sizeof(struct in6_addr) * 8 < iflr->prefixlen)
- return EINVAL;
-
- switch (cmd) {
- case SIOCALIFADDR:
- {
- struct in6_aliasreq ifra;
- struct in6_addr *hostid = NULL;
- int prefixlen;
-
- ifa = NULL;
- if ((iflr->flags & IFLR_PREFIX) != 0) {
- struct sockaddr_in6 *sin6;
-
- /*
- * hostid is to fill in the hostid part of the
- * address. hostid points to the first link-local
- * address attached to the interface.
- */
- ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);
- if (!ifa)
- return EADDRNOTAVAIL;
- hostid = IFA_IN6(ifa);
-
- /* prefixlen must be <= 64. */
- if (64 < iflr->prefixlen) {
- if (ifa != NULL)
- ifa_free(ifa);
- return EINVAL;
- }
- prefixlen = iflr->prefixlen;
-
- /* hostid part must be zero. */
- sin6 = (struct sockaddr_in6 *)&iflr->addr;
- if (sin6->sin6_addr.s6_addr32[2] != 0 ||
- sin6->sin6_addr.s6_addr32[3] != 0) {
- if (ifa != NULL)
- ifa_free(ifa);
- return EINVAL;
- }
- } else
- prefixlen = iflr->prefixlen;
-
- /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
- bzero(&ifra, sizeof(ifra));
- bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name));
-
- bcopy(&iflr->addr, &ifra.ifra_addr,
- ((struct sockaddr *)&iflr->addr)->sa_len);
- if (hostid) {
- /* fill in hostid part */
- ifra.ifra_addr.sin6_addr.s6_addr32[2] =
- hostid->s6_addr32[2];
- ifra.ifra_addr.sin6_addr.s6_addr32[3] =
- hostid->s6_addr32[3];
- }
-
- if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */
- bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
- ((struct sockaddr *)&iflr->dstaddr)->sa_len);
- if (hostid) {
- ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] =
- hostid->s6_addr32[2];
- ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] =
- hostid->s6_addr32[3];
- }
- }
- if (ifa != NULL)
- ifa_free(ifa);
-
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);
-
- ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
- return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td);
- }
- case SIOCGLIFADDR:
- case SIOCDLIFADDR:
- {
- struct in6_ifaddr *ia;
- struct in6_addr mask, candidate, match;
- struct sockaddr_in6 *sin6;
- int cmp;
-
- bzero(&mask, sizeof(mask));
- if (iflr->flags & IFLR_PREFIX) {
- /* lookup a prefix rather than address. */
- in6_prefixlen2mask(&mask, iflr->prefixlen);
-
- sin6 = (struct sockaddr_in6 *)&iflr->addr;
- bcopy(&sin6->sin6_addr, &match, sizeof(match));
- match.s6_addr32[0] &= mask.s6_addr32[0];
- match.s6_addr32[1] &= mask.s6_addr32[1];
- match.s6_addr32[2] &= mask.s6_addr32[2];
- match.s6_addr32[3] &= mask.s6_addr32[3];
-
- /* if you set extra bits, that's wrong */
- if (bcmp(&match, &sin6->sin6_addr, sizeof(match)))
- return EINVAL;
-
- cmp = 1;
- } else {
- if (cmd == SIOCGLIFADDR) {
- /* on getting an address, take the 1st match */
- cmp = 0; /* XXX */
- } else {
- /* on deleting an address, do exact match */
- in6_prefixlen2mask(&mask, 128);
- sin6 = (struct sockaddr_in6 *)&iflr->addr;
- bcopy(&sin6->sin6_addr, &match, sizeof(match));
-
- cmp = 1;
- }
- }
+ struct sockaddr_in6 *pdst;
+ char ip6buf[INET6_ADDRSTRLEN];
+ /*
+ * Give the interface a chance to initialize
+ * if this is its first address,
+ */
+ if (hostIsNew != 0) {
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
- if (!cmp)
- break;
-
- /*
- * XXX: this is adhoc, but is necessary to allow
- * a user to specify fe80::/64 (not /10) for a
- * link-local address.
- */
- bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate));
- in6_clearscope(&candidate);
- candidate.s6_addr32[0] &= mask.s6_addr32[0];
- candidate.s6_addr32[1] &= mask.s6_addr32[1];
- candidate.s6_addr32[2] &= mask.s6_addr32[2];
- candidate.s6_addr32[3] &= mask.s6_addr32[3];
- if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
- break;
+ ifacount++;
}
- if (ifa != NULL)
- ifa_ref(ifa);
IF_ADDR_RUNLOCK(ifp);
- if (!ifa)
- return EADDRNOTAVAIL;
- ia = ifa2ia6(ifa);
-
- if (cmd == SIOCGLIFADDR) {
- int error;
-
- /* fill in the if_laddrreq structure */
- bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len);
- error = sa6_recoverscope(
- (struct sockaddr_in6 *)&iflr->addr);
- if (error != 0) {
- ifa_free(ifa);
- return (error);
- }
-
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
- bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
- ia->ia_dstaddr.sin6_len);
- error = sa6_recoverscope(
- (struct sockaddr_in6 *)&iflr->dstaddr);
- if (error != 0) {
- ifa_free(ifa);
- return (error);
- }
- } else
- bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
-
- iflr->prefixlen =
- in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
-
- iflr->flags = ia->ia6_flags; /* XXX */
- ifa_free(ifa);
-
- return 0;
- } else {
- struct in6_aliasreq ifra;
-
- /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
- bzero(&ifra, sizeof(ifra));
- bcopy(iflr->iflr_name, ifra.ifra_name,
- sizeof(ifra.ifra_name));
-
- bcopy(&ia->ia_addr, &ifra.ifra_addr,
- ia->ia_addr.sin6_len);
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
- bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
- ia->ia_dstaddr.sin6_len);
- } else {
- bzero(&ifra.ifra_dstaddr,
- sizeof(ifra.ifra_dstaddr));
- }
- bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr,
- ia->ia_prefixmask.sin6_len);
-
- ifra.ifra_flags = ia->ia6_flags;
- ifa_free(ifa);
- return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra,
- ifp, td);
- }
- }
}
- return EOPNOTSUPP; /* just for safety */
-}
-
-/*
- * Initialize an interface's IPv6 address and routing table entry.
- */
-static int
-in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
- struct sockaddr_in6 *sin6, int newhost)
-{
- int error = 0, plen, ifacount = 0;
- int s = splimp();
- struct ifaddr *ifa;
-
- /*
- * Give the interface a chance to initialize
- * if this is its first address,
- * and to validate the address if necessary.
- */
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
- continue;
- ifacount++;
- }
- IF_ADDR_RUNLOCK(ifp);
-
- ia->ia_addr = *sin6;
-
if (ifacount <= 1 && ifp->if_ioctl) {
error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
- if (error) {
- splx(s);
+ if (error)
return (error);
- }
}
- splx(s);
-
- ia->ia_ifa.ifa_metric = ifp->if_metric;
- /* we could do in(6)_socktrim here, but just omit it at this moment. */
+ /*
+ * If a new destination address is specified, scrub the old one and
+ * install the new destination. Note that the interface must be
+ * p2p or loopback.
+ */
+ pdst = &ifra->ifra_dstaddr;
+ if (pdst->sin6_family == AF_INET6 &&
+ !IN6_ARE_ADDR_EQUAL(&pdst->sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
+ if ((ia->ia_flags & IFA_ROUTE) != 0 &&
+ (rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST) != 0)) {
+ nd6log((LOG_ERR, "in6_update_ifa_internal: failed to "
+ "remove a route to the old destination: %s\n",
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
+ /* proceed anyway... */
+ } else
+ ia->ia_flags &= ~IFA_ROUTE;
+ ia->ia_dstaddr = *pdst;
+ }
/*
- * Special case:
* If a new destination address is specified for a point-to-point
* interface, install a route to the destination as an interface
* direct route.
@@ -1893,19 +1394,19 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
ia->ia_dstaddr.sin6_family == AF_INET6) {
int rtflags = RTF_UP | RTF_HOST;
- error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags);
- if (error)
- return (error);
- ia->ia_flags |= IFA_ROUTE;
/*
* Handle the case for ::1 .
*/
if (ifp->if_flags & IFF_LOOPBACK)
ia->ia_flags |= IFA_RTSELF;
+ error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags);
+ if (error)
+ return (error);
+ ia->ia_flags |= IFA_ROUTE;
}
/*
- * add a loopback route to self
+ * add a loopback route to self if not exists
*/
if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) {
error = ifa_add_loopback_route((struct ifaddr *)ia,
@@ -1914,10 +1415,6 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
ia->ia_flags |= IFA_RTSELF;
}
- /* Add local address to lltable, if necessary (ex. on p2p link). */
- if (newhost)
- in6_ifaddloop(&(ia->ia_ifa));
-
return (error);
}
@@ -1949,11 +1446,35 @@ in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
/*
+ * find the internet address corresponding to a given address.
+ * ifaddr is returned referenced.
+ */
+struct in6_ifaddr *
+in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid)
+{
+ struct rm_priotracker in6_ifa_tracker;
+ struct in6_ifaddr *ia;
+
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+ LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) {
+ if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) {
+ if (zoneid != 0 &&
+ zoneid != ia->ia_addr.sin6_scope_id)
+ continue;
+ ifa_ref(&ia->ia_ifa);
+ break;
+ }
+ }
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+ return (ia);
+}
+
+/*
* find the internet address corresponding to a given interface and address.
* ifaddr is returned referenced.
*/
struct in6_ifaddr *
-in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
+in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr)
{
struct ifaddr *ifa;
@@ -1982,7 +1503,7 @@ in6ifa_llaonifp(struct ifnet *ifp)
if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
return (NULL);
- if_addr_rlock(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1992,7 +1513,7 @@ in6ifa_llaonifp(struct ifnet *ifp)
IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
break;
}
- if_addr_runlock(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return ((struct in6_ifaddr *)ifa);
}
@@ -2082,20 +1603,21 @@ ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
int
in6_localaddr(struct in6_addr *in6)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_ifaddr *ia;
if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
return 1;
- IN6_IFADDR_RLOCK();
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
&ia->ia_prefixmask.sin6_addr)) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return 1;
}
}
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0);
}
@@ -2107,37 +1629,67 @@ in6_localaddr(struct in6_addr *in6)
int
in6_localip(struct in6_addr *in6)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_ifaddr *ia;
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+ LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) {
if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (1);
}
}
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0);
}
+
+/*
+ * Return 1 if an internet address is configured on an interface.
+ */
+int
+in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr)
+{
+ struct in6_addr in6;
+ struct ifaddr *ifa;
+ struct in6_ifaddr *ia6;
+ in6 = *addr;
+ if (in6_clearscope(&in6))
+ return (0);
+ in6_setscope(&in6, ifp, NULL);
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ ia6 = (struct in6_ifaddr *)ifa;
+ if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) {
+ IF_ADDR_RUNLOCK(ifp);
+ return (1);
+ }
+ }
+ IF_ADDR_RUNLOCK(ifp);
+
+ return (0);
+}
int
in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_ifaddr *ia;
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
- &sa6->sin6_addr) &&
- (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) {
- IN6_IFADDR_RUNLOCK();
- return (1); /* true */
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+ LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) {
+ if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) {
+ if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+ return (1); /* true */
+ }
+ break;
}
-
- /* XXX: do we still have to go thru the rest of the list? */
}
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0); /* false */
}
@@ -2222,7 +1774,7 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
{
int dst_scope = in6_addrscope(dst), blen = -1, tlen;
struct ifaddr *ifa;
- struct in6_ifaddr *besta = 0;
+ struct in6_ifaddr *besta = NULL;
struct in6_ifaddr *dep[2]; /* last-resort: deprecated */
dep[0] = dep[1] = NULL;
@@ -2347,37 +1899,24 @@ in6if_do_dad(struct ifnet *ifp)
if ((ifp->if_flags & IFF_LOOPBACK) != 0)
return (0);
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ||
+ (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD))
return (0);
- switch (ifp->if_type) {
-#ifdef IFT_DUMMY
- case IFT_DUMMY:
-#endif
- case IFT_FAITH:
- /*
- * These interfaces do not have the IFF_LOOPBACK flag,
- * but loop packets back. We do not have to do DAD on such
- * interfaces. We should even omit it, because loop-backed
- * NS would confuse the DAD procedure.
- */
- return (0);
- default:
- /*
- * Our DAD routine requires the interface up and running.
- * However, some interfaces can be up before the RUNNING
- * status. Additionaly, users may try to assign addresses
- * before the interface becomes up (or running).
- * We simply skip DAD in such a case as a work around.
- * XXX: we should rather mark "tentative" on such addresses,
- * and do DAD after the interface becomes ready.
- */
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)))
- return (0);
+ /*
+ * Our DAD routine requires the interface up and running.
+ * However, some interfaces can be up before the RUNNING
+ * status. Additionally, users may try to assign addresses
+ * before the interface becomes up (or running).
+ * This function returns EAGAIN in that case.
+ * The caller should mark "tentative" on the address instead of
+ * performing DAD immediately.
+ */
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ return (EAGAIN);
- return (1);
- }
+ return (1);
}
/*
@@ -2391,7 +1930,7 @@ in6_setmaxmtu(void)
struct ifnet *ifp;
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
/* this function can be called during ifnet initialization */
if (!ifp->if_afdata[AF_INET6])
continue;
@@ -2417,18 +1956,10 @@ in6_if2idlen(struct ifnet *ifp)
{
switch (ifp->if_type) {
case IFT_ETHER: /* RFC2464 */
-#ifdef IFT_PROPVIRTUAL
case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */
-#endif
-#ifdef IFT_L2VLAN
case IFT_L2VLAN: /* ditto */
-#endif
-#ifdef IFT_IEEE80211
case IFT_IEEE80211: /* ditto */
-#endif
-#ifdef IFT_MIP
- case IFT_MIP: /* ditto */
-#endif
+ case IFT_BRIDGE: /* bridge(4) only does Ethernet-like links */
case IFT_INFINIBAND:
return (64);
case IFT_FDDI: /* RFC2467 */
@@ -2468,25 +1999,38 @@ in6_if2idlen(struct ifnet *ifp)
struct in6_llentry {
struct llentry base;
- struct sockaddr_in6 l3_addr6;
};
+#define IN6_LLTBL_DEFAULT_HSIZE 32
+#define IN6_LLTBL_HASH(k, h) \
+ (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
+
/*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
+ * Do actual deallocation of @lle.
*/
static void
-in6_lltable_free(struct lltable *llt, struct llentry *lle)
+in6_lltable_destroy_lle_unlocked(struct llentry *lle)
{
- LLE_WUNLOCK(lle);
+
LLE_LOCK_DESTROY(lle);
+ LLE_REQ_DESTROY(lle);
free(lle, M_LLTABLE);
}
+/*
+ * Called by LLE_FREE_LOCKED when number of references
+ * drops to zero.
+ */
+static void
+in6_lltable_destroy_lle(struct llentry *lle)
+{
+
+ LLE_WUNLOCK(lle);
+ in6_lltable_destroy_lle_unlocked(lle);
+}
+
static struct llentry *
-in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
+in6_lltable_new(const struct in6_addr *addr6, u_int flags)
{
struct in6_llentry *lle;
@@ -2494,45 +2038,69 @@ in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
if (lle == NULL) /* NB: caller generates msg */
return NULL;
- lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
+ lle->base.r_l3addr.addr6 = *addr6;
lle->base.lle_refcnt = 1;
- lle->base.lle_free = in6_lltable_free;
+ lle->base.lle_free = in6_lltable_destroy_lle;
LLE_LOCK_INIT(&lle->base);
- callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
- CALLOUT_RETURNUNLOCKED);
+ LLE_REQ_INIT(&lle->base);
+ callout_init(&lle->base.lle_timer, 1);
return (&lle->base);
}
+static int
+in6_lltable_match_prefix(const struct sockaddr *saddr,
+ const struct sockaddr *smask, u_int flags, struct llentry *lle)
+{
+ const struct in6_addr *addr, *mask, *lle_addr;
+
+ addr = &((const struct sockaddr_in6 *)saddr)->sin6_addr;
+ mask = &((const struct sockaddr_in6 *)smask)->sin6_addr;
+ lle_addr = &lle->r_l3addr.addr6;
+
+ if (IN6_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
+ return (0);
+
+ if (lle->la_flags & LLE_IFADDR) {
+
+ /*
+ * Delete LLE_IFADDR records IFF address & flag matches.
+ * Note that addr is the interface address within prefix
+ * being matched.
+ */
+ if (IN6_ARE_ADDR_EQUAL(addr, lle_addr) &&
+ (flags & LLE_STATIC) != 0)
+ return (1);
+ return (0);
+ }
+
+ /* flags & LLE_STATIC means deleting both dynamic and static entries */
+ if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
+ return (1);
+
+ return (0);
+}
+
static void
-in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
- const struct sockaddr *mask, u_int flags)
+in6_lltable_free_entry(struct lltable *llt, struct llentry *lle)
{
- const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix;
- const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask;
- struct llentry *lle, *next;
- int i;
+ struct ifnet *ifp;
- /*
- * (flags & LLE_STATIC) means deleting all entries
- * including static ND6 entries.
- */
- IF_AFDATA_WLOCK(llt->llt_ifp);
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
- if (IN6_ARE_MASKED_ADDR_EQUAL(
- &satosin6(L3_ADDR(lle))->sin6_addr,
- &pfx->sin6_addr, &msk->sin6_addr) &&
- ((flags & LLE_STATIC) ||
- !(lle->la_flags & LLE_STATIC))) {
- LLE_WLOCK(lle);
- if (callout_stop(&lle->la_timer))
- LLE_REMREF(lle);
- llentry_free(lle);
- }
- }
+ LLE_WLOCK_ASSERT(lle);
+ KASSERT(llt != NULL, ("lltable is NULL"));
+
+ /* Unlink entry from table */
+ if ((lle->la_flags & LLE_LINKED) != 0) {
+
+ ifp = llt->llt_ifp;
+ IF_AFDATA_WLOCK_ASSERT(ifp);
+ lltable_unlink_entry(llt, lle);
}
- IF_AFDATA_WUNLOCK(llt->llt_ifp);
+
+ if (callout_stop(&lle->lle_timer) > 0)
+ LLE_REMREF(lle);
+
+ llentry_free(lle);
}
static int
@@ -2540,122 +2108,178 @@ in6_lltable_rtcheck(struct ifnet *ifp,
u_int flags,
const struct sockaddr *l3addr)
{
- struct rtentry *rt;
+ const struct sockaddr_in6 *sin6;
+ struct nhop6_basic nh6;
+ struct in6_addr dst;
+ uint32_t scopeid;
+ int error;
char ip6buf[INET6_ADDRSTRLEN];
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
/* Our local addresses are always only installed on the default FIB. */
- /* XXX rtalloc1 should take a const param */
- rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0,
- RT_DEFAULT_FIB);
- if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
+
+ sin6 = (const struct sockaddr_in6 *)l3addr;
+ in6_splitscope(&sin6->sin6_addr, &dst, &scopeid);
+ error = fib6_lookup_nh_basic(RT_DEFAULT_FIB, &dst, scopeid, 0, 0, &nh6);
+ if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) {
struct ifaddr *ifa;
/*
* Create an ND6 cache for an IPv6 neighbor
* that is not covered by our own prefix.
*/
- /* XXX ifaof_ifpforaddr should take a const param */
- ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp);
+ ifa = ifaof_ifpforaddr(l3addr, ifp);
if (ifa != NULL) {
ifa_free(ifa);
- if (rt != NULL)
- RTFREE_LOCKED(rt);
return 0;
}
log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
- ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr));
- if (rt != NULL)
- RTFREE_LOCKED(rt);
+ ip6_sprintf(ip6buf, &sin6->sin6_addr));
return EINVAL;
}
- RTFREE_LOCKED(rt);
return 0;
}
-static struct llentry *
-in6_lltable_lookup(struct lltable *llt, u_int flags,
- const struct sockaddr *l3addr)
+static inline uint32_t
+in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize)
+{
+
+ return (IN6_LLTBL_HASH(dst->s6_addr32[3], hsize));
+}
+
+static uint32_t
+in6_lltable_hash(const struct llentry *lle, uint32_t hsize)
+{
+
+ return (in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize));
+}
+
+static void
+in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
+{
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ bzero(sin6, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ sin6->sin6_addr = lle->r_l3addr.addr6;
+}
+
+static inline struct llentry *
+in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst)
{
- const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
- struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
struct llentries *lleh;
- u_int hashkey;
-
- IF_AFDATA_LOCK_ASSERT(ifp);
- KASSERT(l3addr->sa_family == AF_INET6,
- ("sin_family %d", l3addr->sa_family));
+ u_int hashidx;
- hashkey = sin6->sin6_addr.s6_addr32[3];
- lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
+ hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize);
+ lleh = &llt->lle_head[hashidx];
LIST_FOREACH(lle, lleh, lle_next) {
- struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle);
if (lle->la_flags & LLE_DELETED)
continue;
- if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr,
- sizeof(struct in6_addr)) == 0)
+ if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst))
break;
}
- if (lle == NULL) {
- if (!(flags & LLE_CREATE))
- return (NULL);
- IF_AFDATA_WLOCK_ASSERT(ifp);
- /*
- * A route that covers the given address must have
- * been installed 1st because we are doing a resolution,
- * verify this.
- */
- if (!(flags & LLE_IFADDR) &&
- in6_lltable_rtcheck(ifp, flags, l3addr) != 0)
- return NULL;
-
- lle = in6_lltable_new(l3addr, flags);
- if (lle == NULL) {
- log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
- return NULL;
- }
- lle->la_flags = flags & ~LLE_CREATE;
- if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
- bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
- lle->la_flags |= (LLE_VALID | LLE_STATIC);
- }
+ return (lle);
+}
- lle->lle_tbl = llt;
- lle->lle_head = lleh;
- lle->la_flags |= LLE_LINKED;
- LIST_INSERT_HEAD(lleh, lle, lle_next);
- } else if (flags & LLE_DELETE) {
- if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
- LLE_WLOCK(lle);
- lle->la_flags |= LLE_DELETED;
+static void
+in6_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ lle->la_flags |= LLE_DELETED;
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
#ifdef DIAGNOSTIC
- log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
#endif
- if ((lle->la_flags &
- (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
- llentry_free(lle);
- else
- LLE_WUNLOCK(lle);
- }
- lle = (void *)-1;
+ llentry_free(lle);
+}
+
+static struct llentry *
+in6_lltable_alloc(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
+ struct ifnet *ifp = llt->llt_ifp;
+ struct llentry *lle;
+ char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+
+ KASSERT(l3addr->sa_family == AF_INET6,
+ ("sin_family %d", l3addr->sa_family));
+
+ /*
+ * A route that covers the given address must have
+ * been installed 1st because we are doing a resolution,
+ * verify this.
+ */
+ if (!(flags & LLE_IFADDR) &&
+ in6_lltable_rtcheck(ifp, flags, l3addr) != 0)
+ return (NULL);
+
+ lle = in6_lltable_new(&sin6->sin6_addr, flags);
+ if (lle == NULL) {
+ log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
+ return (NULL);
}
- if (LLE_IS_VALID(lle)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WLOCK(lle);
- else
- LLE_RLOCK(lle);
+ lle->la_flags = flags;
+ if ((flags & LLE_IFADDR) == LLE_IFADDR) {
+ linkhdrsize = LLE_MAX_LINKHDR;
+ if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0) {
+ in6_lltable_destroy_lle_unlocked(lle);
+ return (NULL);
+ }
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
+ lle->la_flags |= LLE_STATIC;
}
+
+ if ((lle->la_flags & LLE_STATIC) != 0)
+ lle->ln_state = ND6_LLINFO_REACHABLE;
+
+ return (lle);
+}
+
+static struct llentry *
+in6_lltable_lookup(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
+ struct llentry *lle;
+
+ IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
+ KASSERT(l3addr->sa_family == AF_INET6,
+ ("sin_family %d", l3addr->sa_family));
+
+ lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);
+
+ if (lle == NULL)
+ return (NULL);
+
+ KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
+ (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
+ flags));
+
+ if (flags & LLE_UNLOCKED)
+ return (lle);
+
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WLOCK(lle);
+ else
+ LLE_RLOCK(lle);
return (lle);
}
static int
-in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
+in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
+ struct sysctl_req *wr)
{
struct ifnet *ifp = llt->llt_ifp;
- struct llentry *lle;
/* XXX stack use */
struct {
struct rt_msghdr rtm;
@@ -2668,39 +2292,32 @@ in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
#endif
struct sockaddr_dl sdl;
} ndpc;
- int i, error;
-
- if (ifp->if_flags & IFF_LOOPBACK)
- return 0;
-
- LLTABLE_LOCK_ASSERT();
-
- error = 0;
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
- struct sockaddr_dl *sdl;
+ struct sockaddr_dl *sdl;
+ int error;
- /* skip deleted or invalid entries */
- if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID)
- continue;
+ bzero(&ndpc, sizeof(ndpc));
+ /* skip deleted entries */
+ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
+ return (0);
/* Skip if jailed and not a valid IP of the prison. */
- if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
- continue;
+ lltable_fill_sa_entry(lle,
+ (struct sockaddr *)&ndpc.sin6);
+ if (prison_if(wr->td->td_ucred,
+ (struct sockaddr *)&ndpc.sin6) != 0)
+ return (0);
/*
* produce a msg made of:
* struct rt_msghdr;
* struct sockaddr_in6 (IPv6)
* struct sockaddr_dl;
*/
- bzero(&ndpc, sizeof(ndpc));
ndpc.rtm.rtm_msglen = sizeof(ndpc);
ndpc.rtm.rtm_version = RTM_VERSION;
ndpc.rtm.rtm_type = RTM_GET;
ndpc.rtm.rtm_flags = RTF_UP;
ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
- ndpc.sin6.sin6_family = AF_INET6;
- ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
- bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));
+ if (V_deembed_scopeid)
+ sa6_recoverscope(&ndpc.sin6);
/* publish */
if (lle->la_flags & LLE_PUB)
@@ -2709,22 +2326,56 @@ in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
sdl = &ndpc.sdl;
sdl->sdl_family = AF_LINK;
sdl->sdl_len = sizeof(*sdl);
- sdl->sdl_alen = ifp->if_addrlen;
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
- bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
- ndpc.rtm.rtm_rmx.rmx_expire =
- lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
+ if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
+ sdl->sdl_alen = ifp->if_addrlen;
+ bcopy(lle->ll_addr, LLADDR(sdl),
+ ifp->if_addrlen);
+ } else {
+ sdl->sdl_alen = 0;
+ bzero(LLADDR(sdl), ifp->if_addrlen);
+ }
+ if (lle->la_expire != 0)
+ ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire +
+ lle->lle_remtime / hz +
+ time_second - time_uptime;
ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
if (lle->la_flags & LLE_STATIC)
ndpc.rtm.rtm_flags |= RTF_STATIC;
+ if (lle->la_flags & LLE_IFADDR)
+ ndpc.rtm.rtm_flags |= RTF_PINNED;
+ if (lle->ln_router != 0)
+ ndpc.rtm.rtm_flags |= RTF_GATEWAY;
+ ndpc.rtm.rtm_rmx.rmx_pksent = lle->la_asked;
+ /* Store state in rmx_weight value */
+ ndpc.rtm.rtm_rmx.rmx_state = lle->ln_state;
ndpc.rtm.rtm_index = ifp->if_index;
error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
- if (error)
- break;
- }
- }
- return error;
+
+ return (error);
+}
+
+static struct lltable *
+in6_lltattach(struct ifnet *ifp)
+{
+ struct lltable *llt;
+
+ llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE);
+ llt->llt_af = AF_INET6;
+ llt->llt_ifp = ifp;
+
+ llt->llt_lookup = in6_lltable_lookup;
+ llt->llt_alloc_entry = in6_lltable_alloc;
+ llt->llt_delete_entry = in6_lltable_delete_entry;
+ llt->llt_dump_entry = in6_lltable_dump_entry;
+ llt->llt_hash = in6_lltable_hash;
+ llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry;
+ llt->llt_free_entry = in6_lltable_free_entry;
+ llt->llt_match_prefix = in6_lltable_match_prefix;
+ lltable_link(llt);
+
+ return (llt);
}
void *
@@ -2732,32 +2383,45 @@ in6_domifattach(struct ifnet *ifp)
{
struct in6_ifextra *ext;
+ /* There are not IPv6-capable interfaces. */
+ switch (ifp->if_type) {
+ case IFT_PFLOG:
+ case IFT_PFSYNC:
+ case IFT_USB:
+ return (NULL);
+ }
ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
bzero(ext, sizeof(*ext));
- ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat),
- M_IFADDR, M_WAITOK);
- bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat));
+ ext->in6_ifstat = malloc(sizeof(counter_u64_t) *
+ sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK);
+ COUNTER_ARRAY_ALLOC(ext->in6_ifstat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK);
- ext->icmp6_ifstat =
- (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat),
- M_IFADDR, M_WAITOK);
- bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat));
+ ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) *
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR,
+ M_WAITOK);
+ COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK);
ext->nd_ifinfo = nd6_ifattach(ifp);
ext->scope6_id = scope6_ifattach(ifp);
- ext->lltable = lltable_init(ifp, AF_INET6);
- if (ext->lltable != NULL) {
- ext->lltable->llt_prefix_free = in6_lltable_prefix_free;
- ext->lltable->llt_lookup = in6_lltable_lookup;
- ext->lltable->llt_dump = in6_lltable_dump;
- }
+ ext->lltable = in6_lltattach(ifp);
ext->mld_ifinfo = mld_domifattach(ifp);
return ext;
}
+int
+in6_domifmtu(struct ifnet *ifp)
+{
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return ifp->if_mtu;
+
+ return (IN6_LINKMTU(ifp));
+}
+
void
in6_domifdetach(struct ifnet *ifp, void *aux)
{
@@ -2765,9 +2429,13 @@ in6_domifdetach(struct ifnet *ifp, void *aux)
mld_domifdetach(ifp);
scope6_ifdetach(ext->scope6_id);
- nd6_ifdetach(ext->nd_ifinfo);
+ nd6_ifdetach(ifp, ext->nd_ifinfo);
lltable_free(ext->lltable);
+ COUNTER_ARRAY_FREE(ext->in6_ifstat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t));
free(ext->in6_ifstat, M_IFADDR);
+ COUNTER_ARRAY_FREE(ext->icmp6_ifstat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
free(ext->icmp6_ifstat, M_IFADDR);
free(ext, M_IFADDR);
}
diff --git a/freebsd/sys/netinet6/in6.h b/freebsd/sys/netinet6/in6.h
index 616f1009..62c5e0b0 100644
--- a/freebsd/sys/netinet6/in6.h
+++ b/freebsd/sys/netinet6/in6.h
@@ -361,11 +361,11 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
#define IFA6_IS_DEPRECATED(a) \
((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
- (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
(a)->ia6_lifetime.ia6t_pltime)
#define IFA6_IS_INVALID(a) \
((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \
- (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
(a)->ia6_lifetime.ia6t_vltime)
#endif /* _KERNEL */
@@ -376,12 +376,24 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
struct route_in6 {
struct rtentry *ro_rt;
struct llentry *ro_lle;
- struct in6_addr *ro_ia6;
- int ro_flags;
+ /*
+ * ro_prepend and ro_plen are only used for bpf to pass in a
+ * preformed header. They are not cacheable.
+ */
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
+ uint16_t ro_mtu; /* saved ro_rt mtu */
+ uint16_t spare;
struct sockaddr_in6 ro_dst;
};
#endif
+#ifdef _KERNEL
+#define MTAG_ABI_IPV6 1444287380 /* IPv6 ABI */
+#define IPV6_TAG_DIRECT 0 /* direct-dispatch IPv6 */
+#endif /* _KERNEL */
+
/*
* Options for use with [gs]etsockopt at the IPV6 level.
* First word of comment is data type; bool is stored in int.
@@ -424,8 +436,7 @@ struct route_in6 {
#define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */
#endif /* IPSEC */
-#define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */
-
+ /* 29; unused; was IPV6_FAITH */
#if 1 /* IPV6FIREWALL */
#define IPV6_FW_ADD 30 /* add a firewall rule to chain */
#define IPV6_FW_DEL 31 /* delete a firewall rule from chain */
@@ -481,6 +492,14 @@ struct route_in6 {
#define IPV6_BINDANY 64 /* bool: allow bind to any address */
+#define IPV6_BINDMULTI 65 /* bool; allow multibind to same addr/port */
+#define IPV6_RSS_LISTEN_BUCKET 66 /* int; set RSS listen bucket */
+#define IPV6_FLOWID 67 /* int; flowid of given socket */
+#define IPV6_FLOWTYPE 68 /* int; flowtype of given socket */
+#define IPV6_RSSBUCKETID 69 /* int; RSS bucket ID of given socket */
+#define IPV6_RECVFLOWID 70 /* bool; receive IP6 flowid/flowtype w/ datagram */
+#define IPV6_RECVRSSBUCKETID 71 /* bool; receive IP6 RSS bucket id w/ datagram */
+
/*
* The following option is private; do not use it from user applications.
* It is deliberately defined to the same value as IP_MSFILTER.
@@ -574,7 +593,7 @@ struct ip6_mtuinfo {
#define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */
#define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */
#define IPV6CTL_ACCEPT_RTADV 12
-#define IPV6CTL_KEEPFAITH 13
+ /* 13; unused; was: IPV6CTL_KEEPFAITH */
#define IPV6CTL_LOG_INTERVAL 14
#define IPV6CTL_HDRNESTLIMIT 15
#define IPV6CTL_DAD_COUNT 16
@@ -588,9 +607,9 @@ struct ip6_mtuinfo {
#define IPV6CTL_MAPPED_ADDR 23
#endif
#define IPV6CTL_V6ONLY 24
-#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */
-#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */
-#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */
+/* IPV6CTL_RTEXPIRE 25 deprecated */
+/* IPV6CTL_RTMINEXPIRE 26 deprecated */
+/* IPV6CTL_RTMAXCACHE 27 deprecated */
#define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */
#define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */
@@ -618,17 +637,25 @@ struct ip6_mtuinfo {
* receiving IF. */
#define IPV6CTL_RFC6204W3 50 /* Accept defroute even when forwarding
enabled */
-#define IPV6CTL_MAXID 51
+#define IPV6CTL_INTRQMAXLEN 51 /* max length of IPv6 netisr queue */
+#define IPV6CTL_INTRDQMAXLEN 52 /* max length of direct IPv6 netisr
+ * queue */
+#define IPV6CTL_MAXID 53
#endif /* __BSD_VISIBLE */
/*
- * Redefinition of mbuf flags
- */
-#define M_AUTHIPHDR M_PROTO2
-#define M_DECRYPTED M_PROTO3
-#define M_LOOP M_PROTO4
-#define M_AUTHIPDGM M_PROTO5
-#define M_RTALERT_MLD M_PROTO6
+ * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/,
+ * the protocol specific mbuf flags are shared between them.
+ */
+#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */
+#define M_IP6_NEXTHOP M_PROTO2 /* explicit ip nexthop */
+#define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */
+#define M_SKIP_FIREWALL M_PROTO3 /* skip firewall processing */
+#define M_AUTHIPHDR M_PROTO4
+#define M_DECRYPTED M_PROTO5
+#define M_LOOP M_PROTO6
+#define M_AUTHIPDGM M_PROTO7
+#define M_RTALERT_MLD M_PROTO8
#ifdef _KERNEL
struct cmsghdr;
@@ -636,9 +663,13 @@ struct ip6_hdr;
int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t);
int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t);
+int in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t,
+ u_int32_t);
int in6_localaddr(struct in6_addr *);
int in6_localip(struct in6_addr *);
-int in6_addrscope(struct in6_addr *);
+int in6_ifhasaddr(struct ifnet *, struct in6_addr *);
+int in6_addrscope(const struct in6_addr *);
+char *ip6_sprintf(char *, const struct in6_addr *);
struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *);
extern void in6_if_up(struct ifnet *);
struct sockaddr;
@@ -656,7 +687,6 @@ extern void addrsel_policy_init(void);
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
-extern int (*faithprefix_p)(struct in6_addr *);
#endif /* _KERNEL */
#ifndef _SIZE_T_DECLARED
diff --git a/freebsd/sys/netinet6/in6_cksum.c b/freebsd/sys/netinet6/in6_cksum.c
index e129ca71..6eebdadc 100644
--- a/freebsd/sys/netinet6/in6_cksum.c
+++ b/freebsd/sys/netinet6/in6_cksum.c
@@ -147,9 +147,11 @@ in6_cksum_pseudo(struct ip6_hdr *ip6, uint32_t len, uint8_t nxt, uint16_t csum)
* off is an offset where TCP/UDP/ICMP6 header starts.
* len is a total length of a transport segment.
* (e.g. TCP header + TCP payload)
+ * cov is the number of bytes to be taken into account for the checksum
*/
int
-in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+in6_cksum_partial(struct mbuf *m, u_int8_t nxt, u_int32_t off,
+ u_int32_t len, u_int32_t cov)
{
struct ip6_hdr *ip6;
u_int16_t *w, scope;
@@ -217,9 +219,9 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
}
w = (u_int16_t *)(mtod(m, u_char *) + off);
mlen = m->m_len - off;
- if (len < mlen)
- mlen = len;
- len -= mlen;
+ if (cov < mlen)
+ mlen = cov;
+ cov -= mlen;
/*
* Force to even boundary.
*/
@@ -275,7 +277,7 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
* Lastly calculate a summary of the rest of mbufs.
*/
- for (;m && len; m = m->m_next) {
+ for (;m && cov; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_int16_t *);
@@ -292,12 +294,12 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
sum += s_util.s;
w = (u_int16_t *)((char *)w + 1);
mlen = m->m_len - 1;
- len--;
+ cov--;
} else
mlen = m->m_len;
- if (len < mlen)
- mlen = len;
- len -= mlen;
+ if (cov < mlen)
+ mlen = cov;
+ cov -= mlen;
/*
* Force to even boundary.
*/
@@ -345,7 +347,7 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
} else if (mlen == -1)
s_util.c[0] = *(char *)w;
}
- if (len)
+ if (cov)
panic("in6_cksum: out of data");
if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the
@@ -357,3 +359,9 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
REDUCE;
return (~sum & 0xffff);
}
+
+int
+in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+{
+ return (in6_cksum_partial(m, nxt, off, len, len));
+}
diff --git a/freebsd/sys/netinet6/in6_fib.c b/freebsd/sys/netinet6/in6_fib.c
new file mode 100644
index 00000000..824db1fc
--- /dev/null
+++ b/freebsd/sys/netinet6/in6_fib.c
@@ -0,0 +1,278 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_mpath.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/vnet.h>
+
+#ifdef RADIX_MPATH
+#include <net/radix_mpath.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_mroute.h>
+#include <netinet/ip6.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
+
+#include <net/if_types.h>
+
+#ifdef INET6
+static void fib6_rte_to_nh_extended(struct rtentry *rte,
+ const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6);
+static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+ uint32_t flags, struct nhop6_basic *pnh6);
+static struct ifnet *fib6_get_ifaifp(struct rtentry *rte);
+#define RNTORT(p) ((struct rtentry *)(p))
+
+/*
+ * Gets real interface for the @rte.
+ * Returns rt_ifp for !IFF_LOOPBACK routers.
+ * Extracts "real" address interface from interface address
+ * loopback routes.
+ */
+static struct ifnet *
+fib6_get_ifaifp(struct rtentry *rte)
+{
+ struct ifnet *ifp;
+ struct sockaddr_dl *sdl;
+
+ ifp = rte->rt_ifp;
+ if ((ifp->if_flags & IFF_LOOPBACK) &&
+ rte->rt_gateway->sa_family == AF_LINK) {
+ sdl = (struct sockaddr_dl *)rte->rt_gateway;
+ return (ifnet_byindex(sdl->sdl_index));
+ }
+
+ return (ifp);
+}
+
+static void
+fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+ uint32_t flags, struct nhop6_basic *pnh6)
+{
+ struct sockaddr_in6 *gw;
+
+ /* Do explicit nexthop zero unless we're copying it */
+ memset(pnh6, 0, sizeof(*pnh6));
+
+ if ((flags & NHR_IFAIF) != 0)
+ pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ else
+ pnh6->nh_ifp = rte->rt_ifp;
+
+ pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
+ if (rte->rt_flags & RTF_GATEWAY) {
+ gw = (struct sockaddr_in6 *)rte->rt_gateway;
+ pnh6->nh_addr = gw->sin6_addr;
+ in6_clearscope(&pnh6->nh_addr);
+ } else
+ pnh6->nh_addr = *dst;
+ /* Set flags */
+ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
+ gw = (struct sockaddr_in6 *)rt_key(rte);
+ if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
+ pnh6->nh_flags |= NHF_DEFAULT;
+}
+
+static void
+fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst,
+ uint32_t flags, struct nhop6_extended *pnh6)
+{
+ struct sockaddr_in6 *gw;
+
+ /* Do explicit nexthop zero unless we're copying it */
+ memset(pnh6, 0, sizeof(*pnh6));
+
+ if ((flags & NHR_IFAIF) != 0)
+ pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ else
+ pnh6->nh_ifp = rte->rt_ifp;
+
+ pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
+ if (rte->rt_flags & RTF_GATEWAY) {
+ gw = (struct sockaddr_in6 *)rte->rt_gateway;
+ pnh6->nh_addr = gw->sin6_addr;
+ in6_clearscope(&pnh6->nh_addr);
+ } else
+ pnh6->nh_addr = *dst;
+ /* Set flags */
+ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
+ gw = (struct sockaddr_in6 *)rt_key(rte);
+ if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
+ pnh6->nh_flags |= NHF_DEFAULT;
+}
+
+/*
+ * Performs IPv6 route table lookup on @dst. Returns 0 on success.
+ * Stores basic nexthop info into provided @pnh6 structure.
+ * Note that
+ * - nh_ifp represents logical transmit interface (rt_ifp) by default
+ * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
+ * - mtu from logical transmit interface will be returned.
+ * - nh_ifp cannot be safely dereferenced
+ * - nh_ifp represents rt_ifp (e.g. if looking up address on
+ * interface "ix0" pointer to "ix0" interface will be returned instead
+ * of "lo0")
+ * - howewer mtu from "transmit" interface will be returned.
+ * - scope will be embedded in nh_addr
+ */
+int
+fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid,
+ uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct sockaddr_in6 sin6;
+ struct rtentry *rte;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (ENOENT);
+
+ /* Prepare lookup key */
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_addr = *dst;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst))
+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rte = RNTORT(rn);
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rte->rt_ifp)) {
+ fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6);
+ RIB_RUNLOCK(rh);
+ return (0);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+/*
+ * Performs IPv6 route table lookup on @dst. Returns 0 on success.
+ * Stores extended nexthop info into provided @pnh6 structure.
+ * Note that
+ * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified.
+ * - in that case you need to call fib6_free_nh_ext()
+ * - nh_ifp represents logical transmit interface (rt_ifp) by default
+ * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
+ * - mtu from logical transmit interface will be returned.
+ * - scope will be embedded in nh_addr
+ */
+int
+fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
+ uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct sockaddr_in6 sin6;
+ struct rtentry *rte;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (ENOENT);
+
+ /* Prepare lookup key */
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_addr = *dst;
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst))
+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rte = RNTORT(rn);
+#ifdef RADIX_MPATH
+ rte = rt_mpath_select(rte, flowid);
+ if (rte == NULL) {
+ RIB_RUNLOCK(rh);
+ return (ENOENT);
+ }
+#endif
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rte->rt_ifp)) {
+ fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags,
+ pnh6);
+ if ((flags & NHR_REF) != 0) {
+ /* TODO: Do lwref on egress ifp's */
+ }
+ RIB_RUNLOCK(rh);
+
+ return (0);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+void
+fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6)
+{
+
+}
+
+#endif
+
diff --git a/freebsd/sys/netinet6/in6_fib.h b/freebsd/sys/netinet6/in6_fib.h
new file mode 100644
index 00000000..3d58cd22
--- /dev/null
+++ b/freebsd/sys/netinet6/in6_fib.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2015
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IN6_FIB_H_
+#define _NETINET6_IN6_FIB_H_
+
+/* Basic nexthop info used for uRPF/mtu checks */
+struct nhop6_basic {
+ struct ifnet *nh_ifp; /* Logical egress interface */
+ uint16_t nh_mtu; /* nexthop mtu */
+ uint16_t nh_flags; /* nhop flags */
+ uint8_t spare[4];
+ struct in6_addr nh_addr; /* GW/DST IPv4 address */
+};
+
+/* Does not differ from nhop6_basic */
+struct nhop6_extended {
+ struct ifnet *nh_ifp; /* Logical egress interface */
+ uint16_t nh_mtu; /* nexthop mtu */
+ uint16_t nh_flags; /* nhop flags */
+ uint8_t spare[4];
+ struct in6_addr nh_addr; /* GW/DST IPv6 address */
+ uint64_t spare2[2];
+};
+
+int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst,
+ uint32_t scopeid, uint32_t flags, uint32_t flowid,struct nhop6_basic *pnh6);
+int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,
+ uint32_t scopeid, uint32_t flags, uint32_t flowid,
+ struct nhop6_extended *pnh6);
+void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6);
+#endif
+
diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c
index 9e0f37f0..6e1fb8b1 100644
--- a/freebsd/sys/netinet6/in6_gif.c
+++ b/freebsd/sys/netinet6/in6_gif.c
@@ -38,6 +38,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -51,7 +53,9 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -62,29 +66,28 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
-#include <netinet6/in6_gif.h>
#include <netinet6/in6_var.h>
#endif
-#include <netinet6/ip6protosw.h>
#include <netinet/ip_ecn.h>
#ifdef INET6
#include <netinet6/ip6_ecn.h>
+#include <netinet6/in6_fib.h>
#endif
#include <net/if_gif.h>
-VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
+#define GIF_HLIM 30
+static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
#define V_ip6_gif_hlim VNET(ip6_gif_hlim)
SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW,
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip6_gif_hlim), 0, "");
-static int gif_validate6(const struct ip6_hdr *, struct gif_softc *,
- struct ifnet *);
+static int in6_gif_input(struct mbuf **, int *, int);
extern struct domain inet6domain;
-struct ip6protosw in6_gif_protosw = {
+static struct protosw in6_gif_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = 0, /* IPPROTO_IPV[46] */
@@ -96,112 +99,24 @@ struct ip6protosw in6_gif_protosw = {
};
int
-in6_gif_output(struct ifnet *ifp,
- int family, /* family of the packet to be encapsulate */
- struct mbuf *m)
+in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
+ GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
- struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst;
- struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)sc->gif_psrc;
- struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)sc->gif_pdst;
struct ip6_hdr *ip6;
- struct etherip_header eiphdr;
- int error, len, proto;
- u_int8_t itos, otos;
-
- GIF_LOCK_ASSERT(sc);
-
- if (sin6_src == NULL || sin6_dst == NULL ||
- sin6_src->sin6_family != AF_INET6 ||
- sin6_dst->sin6_family != AF_INET6) {
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- {
- struct ip *ip;
-
- proto = IPPROTO_IPV4;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return ENOBUFS;
- }
- ip = mtod(m, struct ip *);
- itos = ip->ip_tos;
- break;
- }
-#endif
-#ifdef INET6
- case AF_INET6:
- {
- struct ip6_hdr *ip6;
- proto = IPPROTO_IPV6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return ENOBUFS;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- break;
- }
-#endif
- case AF_LINK:
- proto = IPPROTO_ETHERIP;
-
- /*
- * GIF_SEND_REVETHIP (disabled by default) intentionally
- * sends an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if ((sc->gif_options & GIF_SEND_REVETHIP)) {
- eiphdr.eip_ver = 0;
- eiphdr.eip_resvl = ETHERIP_VERSION;
- eiphdr.eip_resvh = 0;
- } else {
- eiphdr.eip_ver = ETHERIP_VERSION;
- eiphdr.eip_resvl = 0;
- eiphdr.eip_resvh = 0;
- }
- /* prepend Ethernet-in-IP header */
- M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- if (m && m->m_len < sizeof(struct etherip_header))
- m = m_pullup(m, sizeof(struct etherip_header));
- if (m == NULL)
- return ENOBUFS;
- bcopy(&eiphdr, mtod(m, struct etherip_header *),
- sizeof(struct etherip_header));
- break;
-
- default:
-#ifdef DEBUG
- printf("in6_gif_output: warning: unknown family %d passed\n",
- family);
-#endif
- m_freem(m);
- return EAFNOSUPPORT;
- }
+ int len;
/* prepend new IP header */
len = sizeof(struct ip6_hdr);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK)
+ if (proto == IPPROTO_ETHERIP)
len += ETHERIP_ALIGN;
#endif
- M_PREPEND(m, len, M_DONTWAIT);
- if (m != NULL && m->m_len < len)
- m = m_pullup(m, len);
- if (m == NULL) {
- printf("ENOBUFS in in6_gif_output %d\n", __LINE__);
- return ENOBUFS;
- }
+ M_PREPEND(m, len, M_NOWAIT);
+ if (m == NULL)
+ return (ENOBUFS);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK) {
+ if (proto == IPPROTO_ETHERIP) {
len = mtod(m, vm_offset_t) & 3;
KASSERT(len == 0 || len == ETHERIP_ALIGN,
("in6_gif_output: unexpected misalignment"));
@@ -211,261 +126,107 @@ in6_gif_output(struct ifnet *ifp,
#endif
ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = 0;
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
- ip6->ip6_plen = htons((u_short)m->m_pkthdr.len);
- ip6->ip6_nxt = proto;
- ip6->ip6_hlim = V_ip6_gif_hlim;
- ip6->ip6_src = sin6_src->sin6_addr;
- /* bidirectional configured tunnel mode */
- if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr))
- ip6->ip6_dst = sin6_dst->sin6_addr;
- else {
+ GIF_RLOCK(sc);
+ if (sc->gif_family != AF_INET6) {
m_freem(m);
- return ENETUNREACH;
+ GIF_RUNLOCK(sc);
+ return (ENETDOWN);
}
- ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
- &otos, &itos);
- ip6->ip6_flow &= ~htonl(0xff << 20);
- ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
+ bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
+ GIF_RUNLOCK(sc);
- M_SETFIB(m, sc->gif_fibnum);
-
- if (dst->sin6_family != sin6_dst->sin6_family ||
- !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) {
- /* cache route doesn't match */
- bzero(dst, sizeof(*dst));
- dst->sin6_family = sin6_dst->sin6_family;
- dst->sin6_len = sizeof(struct sockaddr_in6);
- dst->sin6_addr = sin6_dst->sin6_addr;
- if (sc->gif_ro6.ro_rt) {
- RTFREE(sc->gif_ro6.ro_rt);
- sc->gif_ro6.ro_rt = NULL;
- }
-#if 0
- GIF2IFP(sc)->if_mtu = GIF_MTU;
-#endif
- }
-
- if (sc->gif_ro6.ro_rt == NULL) {
- in6_rtalloc(&sc->gif_ro6, sc->gif_fibnum);
- if (sc->gif_ro6.ro_rt == NULL) {
- m_freem(m);
- return ENETUNREACH;
- }
-
- /* if it constitutes infinite encapsulation, punt. */
- if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
- m_freem(m);
- return ENETUNREACH; /*XXX*/
- }
-#if 0
- ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu
- - sizeof(struct ip6_hdr);
-#endif
- }
-
- m_addr_changed(m);
-
-#ifdef IPV6_MINMTU
+ ip6->ip6_flow |= htonl((uint32_t)ecn << 20);
+ ip6->ip6_nxt = proto;
+ ip6->ip6_hlim = V_ip6_gif_hlim;
/*
* force fragmentation to minimum MTU, to avoid path MTU discovery.
* it is too painful to ask for resend of inner packet, to achieve
* path MTU discovery for encapsulated packets.
*/
- error = ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL);
-#else
- error = ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL);
-#endif
-
- if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
- sc->gif_ro6.ro_rt != NULL) {
- RTFREE(sc->gif_ro6.ro_rt);
- sc->gif_ro6.ro_rt = NULL;
- }
-
- return (error);
+ return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL));
}
-int
+static int
in6_gif_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
- struct ifnet *gifp = NULL;
+ struct ifnet *gifp;
struct gif_softc *sc;
struct ip6_hdr *ip6;
- int af = 0;
- u_int32_t otos;
+ uint8_t ecn;
- ip6 = mtod(m, struct ip6_hdr *);
-
- sc = (struct gif_softc *)encap_getarg(m);
+ sc = encap_getarg(m);
if (sc == NULL) {
m_freem(m);
IP6STAT_INC(ip6s_nogif);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
-
gifp = GIF2IFP(sc);
- if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
- m_freem(m);
- IP6STAT_INC(ip6s_nogif);
- return IPPROTO_DONE;
- }
-
- otos = ip6->ip6_flow;
- m_adj(m, *offp);
-
- switch (proto) {
-#ifdef INET
- case IPPROTO_IPV4:
- {
- struct ip *ip;
- u_int8_t otos8;
- af = AF_INET;
- otos8 = (ntohl(otos) >> 20) & 0xff;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return IPPROTO_DONE;
- }
- ip = mtod(m, struct ip *);
- if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos8, &ip->ip_tos) == 0) {
- m_freem(m);
- return IPPROTO_DONE;
- }
- break;
- }
-#endif /* INET */
-#ifdef INET6
- case IPPROTO_IPV6:
- {
- struct ip6_hdr *ip6;
- af = AF_INET6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return IPPROTO_DONE;
- }
+ if ((gifp->if_flags & IFF_UP) != 0) {
ip6 = mtod(m, struct ip6_hdr *);
- if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos, &ip6->ip6_flow) == 0) {
- m_freem(m);
- return IPPROTO_DONE;
- }
- break;
- }
-#endif
- case IPPROTO_ETHERIP:
- af = AF_LINK;
- break;
-
- default:
- IP6STAT_INC(ip6s_nogif);
+ ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ m_adj(m, *offp);
+ gif_input(m, gifp, proto, ecn);
+ } else {
m_freem(m);
- return IPPROTO_DONE;
+ IP6STAT_INC(ip6s_nogif);
}
-
- gif_input(m, af, gifp);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
/*
- * validate outer address.
+ * we know that we are in IFF_UP, outer address available, and outer family
+ * matched the physical addr family. see gif_encapcheck().
*/
-static int
-gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc,
- struct ifnet *ifp)
+int
+in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- struct sockaddr_in6 *src, *dst;
+ const struct ip6_hdr *ip6;
+ struct gif_softc *sc;
+ int ret;
- src = (struct sockaddr_in6 *)sc->gif_psrc;
- dst = (struct sockaddr_in6 *)sc->gif_pdst;
+ /* sanity check done in caller */
+ sc = (struct gif_softc *)arg;
+ GIF_RLOCK_ASSERT(sc);
/*
* Check for address match. Note that the check is for an incoming
* packet. We should compare the *source* address in our configuration
* and the *destination* address of the packet, and vice versa.
*/
- if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
- !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
- return 0;
-
- /* martian filters on outer source - done in ip6_input */
+ ip6 = mtod(m, const struct ip6_hdr *);
+ if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
+ return (0);
+ ret = 128;
+ if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
+ if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
+ return (0);
+ } else
+ ret += 128;
/* ingress filters on outer source */
- if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) {
- struct sockaddr_in6 sin6;
- struct rtentry *rt;
+ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
+ struct nhop6_basic nh6;
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_addr = ip6->ip6_src;
- sin6.sin6_scope_id = 0; /* XXX */
+ /* XXX empty scope id */
+ if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0,
+ &nh6) != 0)
+ return (0);
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL,
- sc->gif_fibnum);
- if (!rt || rt->rt_ifp != ifp) {
-#if 0
- char ip6buf[INET6_ADDRSTRLEN];
- log(LOG_WARNING, "%s: packet from %s dropped "
- "due to ingress filter\n", if_name(GIF2IFP(sc)),
- ip6_sprintf(ip6buf, &sin6.sin6_addr));
-#endif
- if (rt)
- RTFREE_LOCKED(rt);
- return 0;
- }
- RTFREE_LOCKED(rt);
+ if (nh6.nh_ifp != m->m_pkthdr.rcvif)
+ return (0);
}
-
- return 128 * 2;
-}
-
-/*
- * we know that we are in IFF_UP, outer address available, and outer family
- * matched the physical addr family. see gif_encapcheck().
- * sanity check for arg should have been done in the caller.
- */
-int
-gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg)
-{
- struct ip6_hdr ip6;
- struct gif_softc *sc;
- struct ifnet *ifp;
-
- /* sanity check done in caller */
- sc = (struct gif_softc *)arg;
-
- /* LINTED const cast */
- m_copydata(m, 0, sizeof(ip6), (caddr_t)&ip6);
- ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
-
- return gif_validate6(&ip6, sc, ifp);
+ return (ret);
}
int
in6_gif_attach(struct gif_softc *sc)
{
- sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck,
- (void *)&in6_gif_protosw, sc);
- if (sc->encap_cookie6 == NULL)
- return EEXIST;
- return 0;
-}
-
-int
-in6_gif_detach(struct gif_softc *sc)
-{
- int error;
- error = encap_detach(sc->encap_cookie6);
- if (error == 0)
- sc->encap_cookie6 = NULL;
- return error;
+ KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
+ sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck,
+ (void *)&in6_gif_protosw, sc);
+ if (sc->gif_ecookie == NULL)
+ return (EEXIST);
+ return (0);
}
diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c
index a8f03017..791e9e27 100644
--- a/freebsd/sys/netinet6/in6_ifattach.c
+++ b/freebsd/sys/netinet6/in6_ifattach.c
@@ -41,11 +41,14 @@ __FBSDID("$FreeBSD$");
#include <sys/sockio.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/proc.h>
+#include <sys/rmlock.h>
#include <sys/syslog.h>
#include <sys/md5.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -279,9 +282,7 @@ found:
case IFT_ISO88025:
case IFT_ATM:
case IFT_IEEE1394:
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
/* IEEE802/EUI64 cases - what others? */
/* IEEE1394 uses 16byte length address starting with EUI64 */
if (addrlen > 8)
@@ -343,9 +344,7 @@ found:
break;
case IFT_GIF:
-#ifdef IFT_STF
case IFT_STF:
-#endif
/*
* RFC2893 says: "SHOULD use IPv4 address as ifid source".
* however, IPv4 address is not very suitable as unique
@@ -412,7 +411,7 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
/* next, try to get it from some other hardware interface */
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp == ifp0)
continue;
if (in6_get_hw_ifid(ifp, in6) != 0)
@@ -460,21 +459,13 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
struct in6_ifaddr *ia;
struct in6_aliasreq ifra;
struct nd_prefixctl pr0;
- int i, error;
+ int error;
/*
* configure link-local address.
*/
- bzero(&ifra, sizeof(ifra));
-
- /*
- * in6_update_ifa() does not use ifra_name, but we accurately set it
- * for safety.
- */
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
+ in6_prepare_ifra(&ifra, NULL, &in6mask64);
- ifra.ifra_addr.sin6_family = AF_INET6;
- ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000);
ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
@@ -490,9 +481,6 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL))
return (-1);
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_prefixmask.sin6_family = AF_INET6;
- ifra.ifra_prefixmask.sin6_addr = in6mask64;
/* link-local addresses should NEVER expire. */
ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
@@ -537,10 +525,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL);
pr0.ndpr_prefix = ifra.ifra_addr;
/* apply the mask for safety. (nd6_prelist_add will apply it again) */
- for (i = 0; i < 4; i++) {
- pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
- in6mask64.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr, &in6mask64);
/*
* Initialize parameters. The link-local prefix must always be
* on-link, and its lifetimes never expire.
@@ -573,17 +558,7 @@ in6_ifattach_loopback(struct ifnet *ifp)
struct in6_aliasreq ifra;
int error;
- bzero(&ifra, sizeof(ifra));
-
- /*
- * in6_update_ifa() does not use ifra_name, but we accurately set it
- * for safety.
- */
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
-
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_prefixmask.sin6_family = AF_INET6;
- ifra.ifra_prefixmask.sin6_addr = in6mask128;
+ in6_prepare_ifra(&ifra, &in6addr_loopback, &in6mask128);
/*
* Always initialize ia_dstaddr (= broadcast address) to loopback
@@ -593,20 +568,10 @@ in6_ifattach_loopback(struct ifnet *ifp)
ifra.ifra_dstaddr.sin6_family = AF_INET6;
ifra.ifra_dstaddr.sin6_addr = in6addr_loopback;
- ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_addr.sin6_family = AF_INET6;
- ifra.ifra_addr.sin6_addr = in6addr_loopback;
-
/* the loopback address should NEVER expire. */
ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
- /* we don't need to perform DAD on loopback interfaces. */
- ifra.ifra_flags |= IN6_IFF_NODAD;
-
- /* skip registration to the prefix list. XXX should be temporary. */
- ifra.ifra_flags |= IN6_IFF_NOPFX;
-
/*
* We are sure that this is a newly assigned address, so we can set
* NULL to the 3rd arg.
@@ -734,15 +699,8 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
struct in6_ifaddr *ia;
struct in6_addr in6;
- /* some of the interfaces are inherently not IPv6 capable */
- switch (ifp->if_type) {
- case IFT_PFLOG:
- case IFT_PFSYNC:
- ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
- ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
+ if (ifp->if_afdata[AF_INET6] == NULL)
return;
- }
-
/*
* quirks based on interface type
*/
@@ -813,64 +771,45 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
/*
* NOTE: in6_ifdetach() does not support loopback if at this moment.
- * We don't need this function in bsdi, because interfaces are never removed
- * from the ifnet list in bsdi.
+ *
+ * When shutting down a VNET we clean up layers top-down. In that case
+ * upper layer protocols (ulp) are cleaned up already and locks are destroyed
+ * and we must not call into these cleanup functions anymore, thus purgeulp
+ * is set to 0 in that case by in6_ifdetach_destroy().
+ * The normal case of destroying a (cloned) interface still needs to cleanup
+ * everything related to the interface and will have purgeulp set to 1.
*/
-void
-in6_ifdetach(struct ifnet *ifp)
+static void
+_in6_ifdetach(struct ifnet *ifp, int purgeulp)
{
- struct in6_ifaddr *ia;
struct ifaddr *ifa, *next;
- struct radix_node_head *rnh;
- struct rtentry *rt;
- struct sockaddr_in6 sin6;
- struct in6_multi_mship *imm;
- /* remove neighbor management table */
- nd6_purge(ifp);
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return;
- /* nuke any of IPv6 addresses we have */
+ /*
+ * Remove neighbor management table.
+ * Enabling the nd6_purge will panic on vmove for interfaces on VNET
+ * teardown as the IPv6 layer is cleaned up already and the locks
+ * are destroyed.
+ */
+ if (purgeulp)
+ nd6_purge(ifp);
+
+ /*
+ * nuke any of IPv6 addresses we have
+ * XXX: all addresses should be already removed
+ */
TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
in6_purgeaddr(ifa);
}
-
- /* undo everything done by in6_ifattach(), just in case */
- TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
- if (ifa->ifa_addr->sa_family != AF_INET6
- || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) {
- continue;
- }
-
- ia = (struct in6_ifaddr *)ifa;
-
- /*
- * leave from multicast groups we have joined for the interface
- */
- while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
- LIST_REMOVE(imm, i6mm_chain);
- in6_leavegroup(imm);
- }
-
- /* Remove link-local from the routing table. */
- if (ia->ia_flags & IFA_ROUTE)
- (void)rtinit(&ia->ia_ifa, RTM_DELETE, ia->ia_flags);
-
- /* remove from the linked list */
- IF_ADDR_WLOCK(ifp);
- TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
- IF_ADDR_WUNLOCK(ifp);
- ifa_free(ifa); /* if_addrhead */
-
- IN6_IFADDR_WLOCK();
- TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
- IN6_IFADDR_WUNLOCK();
- ifa_free(ifa);
+ if (purgeulp) {
+ in6_pcbpurgeif0(&V_udbinfo, ifp);
+ in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
+ in6_pcbpurgeif0(&V_ripcbinfo, ifp);
}
-
- in6_pcbpurgeif0(&V_udbinfo, ifp);
- in6_pcbpurgeif0(&V_ripcbinfo, ifp);
/* leave from all multicast groups joined */
in6_purgemaddrs(ifp);
@@ -882,32 +821,22 @@ in6_ifdetach(struct ifnet *ifp)
* prefixes after removing all addresses above.
* (Or can we just delay calling nd6_purge until at this point?)
*/
- nd6_purge(ifp);
+ if (purgeulp)
+ nd6_purge(ifp);
+}
- /*
- * Remove route to link-local allnodes multicast (ff02::1).
- * These only get automatically installed for the default FIB.
- */
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_family = AF_INET6;
- sin6.sin6_addr = in6addr_linklocal_allnodes;
- if (in6_setscope(&sin6.sin6_addr, ifp, NULL))
- /* XXX: should not fail */
- return;
- /* XXX grab lock first to avoid LOR */
- rnh = rt_tables_get_rnh(RT_DEFAULT_FIB, AF_INET6);
- if (rnh != NULL) {
- RADIX_NODE_HEAD_LOCK(rnh);
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED,
- RT_DEFAULT_FIB);
- if (rt) {
- if (rt->rt_ifp == ifp)
- rtexpunge(rt);
- RTFREE_LOCKED(rt);
- }
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
+void
+in6_ifdetach(struct ifnet *ifp)
+{
+
+ _in6_ifdetach(ifp, 1);
+}
+
+void
+in6_ifdetach_destroy(struct ifnet *ifp)
+{
+
+ _in6_ifdetach(ifp, 0);
}
int
@@ -948,7 +877,9 @@ in6_tmpaddrtimer(void *arg)
V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet);
bzero(nullbuf, sizeof(nullbuf));
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ continue;
ndi = ND_IFINFO(ifp);
if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
/*
@@ -997,3 +928,29 @@ in6_purgemaddrs(struct ifnet *ifp)
IN6_MULTI_UNLOCK();
}
+
+void
+in6_ifattach_destroy(void)
+{
+
+ callout_drain(&V_in6_tmpaddrtimer_ch);
+}
+
+static void
+in6_ifattach_init(void *dummy)
+{
+
+ /* Timer for regeneranation of temporary addresses randomize ID. */
+ callout_init(&V_in6_tmpaddrtimer_ch, 0);
+ callout_reset(&V_in6_tmpaddrtimer_ch,
+ (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
+ V_ip6_temp_regen_advance) * hz,
+ in6_tmpaddrtimer, curvnet);
+}
+
+/*
+ * Cheat.
+ * This must be after route_init(), which is now SI_ORDER_THIRD.
+ */
+SYSINIT(in6_ifattach_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
+ in6_ifattach_init, NULL);
diff --git a/freebsd/sys/netinet6/in6_ifattach.h b/freebsd/sys/netinet6/in6_ifattach.h
index af627313..a34530db 100644
--- a/freebsd/sys/netinet6/in6_ifattach.h
+++ b/freebsd/sys/netinet6/in6_ifattach.h
@@ -35,7 +35,9 @@
#ifdef _KERNEL
void in6_ifattach(struct ifnet *, struct ifnet *);
+void in6_ifattach_destroy(void);
void in6_ifdetach(struct ifnet *);
+void in6_ifdetach_destroy(struct ifnet *);
int in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int);
void in6_tmpaddrtimer(void *);
int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c
index d32d57c6..174f1109 100644
--- a/freebsd/sys/netinet6/in6_mcast.c
+++ b/freebsd/sys/netinet6/in6_mcast.c
@@ -54,12 +54,14 @@ __FBSDID("$FreeBSD$");
#include <sys/tree.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -159,21 +161,18 @@ static SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0,
static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
- CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxgrpsrc, 0,
+ CTLFLAG_RWTUN, &in6_mcast_maxgrpsrc, 0,
"Max source filters per group");
-TUNABLE_ULONG("net.inet6.ip6.mcast.maxgrpsrc", &in6_mcast_maxgrpsrc);
static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER;
SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc,
- CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxsocksrc, 0,
+ CTLFLAG_RWTUN, &in6_mcast_maxsocksrc, 0,
"Max source filters per socket");
-TUNABLE_ULONG("net.inet6.ip6.mcast.maxsocksrc", &in6_mcast_maxsocksrc);
/* TODO Virtualize this switch. */
int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
-SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
+SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
&in6_mcast_loop, 0, "Loopback multicast datagrams by default");
-TUNABLE_INT("net.inet6.ip6.mcast.loop", &in6_mcast_loop);
static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
@@ -473,9 +472,9 @@ in6_mc_get(struct ifnet *ifp, const struct in6_addr *group,
*/
inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO);
if (inm == NULL) {
+ IF_ADDR_WUNLOCK(ifp);
if_delmulti_ifma(ifma);
- error = ENOMEM;
- goto out_locked;
+ return (ENOMEM);
}
inm->in6m_addr = *group;
inm->in6m_ifp = ifp;
@@ -483,7 +482,7 @@ in6_mc_get(struct ifnet *ifp, const struct in6_addr *group,
inm->in6m_ifma = ifma;
inm->in6m_refcount = 1;
inm->in6m_state = MLD_NOT_MEMBER;
- IFQ_SET_MAXLEN(&inm->in6m_scq, MLD_MAX_STATE_CHANGES);
+ mbufq_init(&inm->in6m_scq, MLD_MAX_STATE_CHANGES);
inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED;
inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
@@ -577,7 +576,7 @@ in6m_clear_recorded(struct in6_multi *inm)
*
* Return 0 if the source didn't exist or was already marked as recorded.
* Return 1 if the source was marked as recorded by this function.
- * Return <0 if any error occured (negated errno code).
+ * Return <0 if any error occurred (negated errno code).
*/
int
in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr)
@@ -1078,7 +1077,7 @@ in6m_purge(struct in6_multi *inm)
inm->in6m_nsrc--;
}
/* Free state-change requests that might be queued. */
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
}
/*
@@ -1187,7 +1186,7 @@ in6_mc_join_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
IN6_MULTI_LOCK_ASSERT();
CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__,
- ip6_sprintf(ip6tbuf, mcaddr), ifp, ifp->if_xname);
+ ip6_sprintf(ip6tbuf, mcaddr), ifp, if_name(ifp));
error = 0;
inm = NULL;
@@ -1278,7 +1277,7 @@ in6_mc_leave_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
CTR5(KTR_MLD, "%s: leave inm %p, %s/%s, imf %p", __func__,
inm, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_xname),
+ (in6m_is_ifp_detached(inm) ? "null" : if_name(inm->in6m_ifp)),
imf);
/*
@@ -1776,28 +1775,22 @@ static struct ifnet *
in6p_lookup_mcast_ifp(const struct inpcb *in6p,
const struct sockaddr_in6 *gsin6)
{
- struct route_in6 ro6;
- struct ifnet *ifp;
+ struct nhop6_basic nh6;
+ struct in6_addr dst;
+ uint32_t scopeid;
+ uint32_t fibnum;
KASSERT(in6p->inp_vflag & INP_IPV6,
("%s: not INP_IPV6 inpcb", __func__));
KASSERT(gsin6->sin6_family == AF_INET6,
("%s: not AF_INET6 group", __func__));
- KASSERT(IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr),
- ("%s: not multicast", __func__));
- ifp = NULL;
- memset(&ro6, 0, sizeof(struct route_in6));
- memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6));
- rtalloc_ign_fib((struct route *)&ro6, 0,
- in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB);
- if (ro6.ro_rt != NULL) {
- ifp = ro6.ro_rt->rt_ifp;
- KASSERT(ifp != NULL, ("%s: null ifp", __func__));
- RTFREE(ro6.ro_rt);
- }
+ in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid);
+ fibnum = in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
+ if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0)
+ return (NULL);
- return (ifp);
+ return (nh6.nh_ifp);
}
/*
@@ -1853,8 +1846,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
if (mreq.ipv6mr_interface == 0) {
ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
} else {
- if (mreq.ipv6mr_interface < 0 ||
- V_if_index < mreq.ipv6mr_interface)
+ if (V_if_index < mreq.ipv6mr_interface)
return (EADDRNOTAVAIL);
ifp = ifnet_byindex(mreq.ipv6mr_interface);
}
@@ -2198,7 +2190,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
* XXX SCOPE6 lock potentially taken here.
*/
if (ifindex != 0) {
- if (ifindex < 0 || V_if_index < ifindex)
+ if (V_if_index < ifindex)
return (EADDRNOTAVAIL);
ifp = ifnet_byindex(ifindex);
if (ifp == NULL)
@@ -2353,13 +2345,17 @@ in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
if (error)
return (error);
- if (ifindex < 0 || V_if_index < ifindex)
+ if (V_if_index < ifindex)
return (EINVAL);
-
- ifp = ifnet_byindex(ifindex);
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
- return (EADDRNOTAVAIL);
-
+ if (ifindex == 0)
+ ifp = NULL;
+ else {
+ ifp = ifnet_byindex(ifindex);
+ if (ifp == NULL)
+ return (EINVAL);
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EADDRNOTAVAIL);
+ }
imo = in6p_findmoptions(inp);
imo->im6o_multicast_ifp = ifp;
INP_WUNLOCK(inp);
@@ -2805,13 +2801,13 @@ in6m_print(const struct in6_multi *inm)
printf("addr %s ifp %p(%s) ifma %p\n",
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
inm->in6m_ifp,
- inm->in6m_ifp->if_xname,
+ if_name(inm->in6m_ifp),
inm->in6m_ifma);
printf("timer %u state %s refcount %u scq.len %u\n",
inm->in6m_timer,
in6m_state_str(inm->in6m_state),
inm->in6m_refcount,
- inm->in6m_scq.ifq_len);
+ mbufq_len(&inm->in6m_scq));
printf("mli %p nsrc %lu sctimer %u scrv %u\n",
inm->in6m_mli,
inm->in6m_nsrc,
diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c
index bf69996d..95e376c7 100644
--- a/freebsd/sys/netinet6/in6_pcb.c
+++ b/freebsd/sys/netinet6/in6_pcb.c
@@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_pcbgroup.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -96,6 +97,8 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -112,7 +115,8 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#include <netinet6/scope6_var.h>
-struct in6_addr zeroin6_addr;
+static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *,
+ struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *);
int
in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
@@ -208,6 +212,7 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
&sin6->sin6_addr, lport,
INPLOOKUP_WILDCARD, cred);
if (t &&
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
((t->inp_flags & INP_TIMEWAIT) == 0) &&
(so->so_type != SOCK_STREAM ||
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
@@ -221,6 +226,16 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
0)
#endif /* __rtems__ */
return (EADDRINUSE);
+
+ /*
+ * If the socket is a BINDMULTI socket, then
+ * the credentials need to match and the
+ * original socket also has to have been bound
+ * with BINDMULTI.
+ */
+ if (t && (! in_pcbbind_check_bindmulti(inp, t)))
+ return (EADDRINUSE);
+
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
@@ -231,6 +246,7 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
sin.sin_addr, lport,
INPLOOKUP_WILDCARD, cred);
if (t &&
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
((t->inp_flags &
INP_TIMEWAIT) == 0) &&
(so->so_type != SOCK_STREAM ||
@@ -243,6 +259,9 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
0)
#endif /* __rtems__ */
return (EADDRINUSE);
+
+ if (t && (! in_pcbbind_check_bindmulti(inp, t)))
+ return (EADDRINUSE);
}
#endif
}
@@ -318,13 +337,12 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
* a bit of a kludge, but cleaning up the internal interfaces would
* have forced minor changes in every protocol).
*/
-int
+static int
in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
struct in6_addr *plocal_addr6)
{
register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
int error = 0;
- struct ifnet *ifp = NULL;
int scope_ambiguous = 0;
struct in6_addr in6a;
@@ -354,20 +372,15 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0)
return (error);
- error = in6_selectsrc(sin6, inp->in6p_outputopts,
- inp, NULL, inp->inp_cred, &ifp, &in6a);
+ error = in6_selectsrc_socket(sin6, inp->in6p_outputopts,
+ inp, inp->inp_cred, scope_ambiguous, &in6a, NULL);
if (error)
return (error);
- if (ifp && scope_ambiguous &&
- (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) {
- return(error);
- }
-
/*
* Do not update this earlier, in case we return with an error.
*
- * XXX: this in6_selectsrc result might replace the bound local
+ * XXX: this in6_selectsrc_socket result might replace the bound local
* address with the address specified by setsockopt(IPV6_PKTINFO).
* Is it the intended behavior?
*/
@@ -702,8 +715,9 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
* Look for an unconnected (wildcard foreign addr) PCB that
* matches the local address and port we're looking for.
*/
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(&in6addr_any), lport, 0,
+ pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -784,7 +798,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
struct ip6_moptions *im6o;
int i, gap;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_WLOCK(pcbinfo);
LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(in6p);
im6o = in6p->in6p_moptions;
@@ -815,7 +829,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
}
INP_WUNLOCK(in6p);
}
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
}
/*
@@ -828,9 +842,12 @@ void
in6_losing(struct inpcb *in6p)
{
- /*
- * We don't store route pointers in the routing table anymore
- */
+ if (in6p->inp_route6.ro_rt) {
+ RTFREE(in6p->inp_route6.ro_rt);
+ in6p->inp_route6.ro_rt = (struct rtentry *)NULL;
+ }
+ if (in6p->inp_route.ro_lle)
+ LLE_FREE(in6p->inp_route.ro_lle); /* zeros ro_lle */
return;
}
@@ -841,9 +858,13 @@ in6_losing(struct inpcb *in6p)
struct inpcb *
in6_rtchange(struct inpcb *inp, int errno)
{
- /*
- * We don't store route pointers in the routing table anymore
- */
+
+ if (inp->inp_route6.ro_rt) {
+ RTFREE(inp->inp_route6.ro_rt);
+ inp->inp_route6.ro_rt = (struct rtentry *)NULL;
+ }
+ if (inp->inp_route.ro_lle)
+ LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */
return inp;
}
@@ -859,21 +880,14 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
struct inpcbhead *head;
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
- int faith;
-
- if (faithprefix_p != NULL)
- faith = (*faithprefix_p)(laddr);
- else
- faith = 0;
/*
* First look for an exact match.
*/
tmpinp = NULL;
INP_GROUP_LOCK(pcbgroup);
- head = &pcbgroup->ipg_hashbase[
- INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
- pcbgroup->ipg_hashmask)];
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)];
LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -899,7 +913,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
}
/*
- * Then look for a wildcard match, if requested.
+ * Then look for a wildcard match in the pcbgroup.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
struct inpcb *local_wild = NULL, *local_exact = NULL;
@@ -913,9 +927,9 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
* 3. non-jailed, non-wild.
* 4. non-jailed, wild.
*/
- head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_wildmask)];
- LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ head = &pcbgroup->ipg_hashbase[
+ INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -925,9 +939,67 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
continue;
}
+ injail = prison_flag(inp->inp_cred, PR_IP6);
+ if (injail) {
+ if (prison_check_ip6(inp->inp_cred,
+ laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+ if (inp != NULL)
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+ struct inpcb *jail_wild = NULL;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(&in6addr_any), lport, 0,
+ pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
/* XXX inp locking */
- if (faith && (inp->inp_flags & INP_FAITH) == 0)
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+ inp->inp_lport != lport) {
continue;
+ }
injail = prison_flag(inp->inp_cred, PR_IP6);
if (injail) {
@@ -985,7 +1057,7 @@ found:
/*
* Lookup PCB in hash list.
*/
-struct inpcb *
+static struct inpcb *
in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
int lookupflags, struct ifnet *ifp)
@@ -993,25 +1065,18 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
struct inpcbhead *head;
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
- int faith;
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
INP_HASH_LOCK_ASSERT(pcbinfo);
- if (faithprefix_p != NULL)
- faith = (*faithprefix_p)(laddr);
- else
- faith = 0;
-
/*
* First look for an exact match.
*/
tmpinp = NULL;
- head = &pcbinfo->ipi_hashbase[
- INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
- pcbinfo->ipi_hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -1049,8 +1114,9 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
* 3. non-jailed, non-wild.
* 4. non-jailed, wild.
*/
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(&in6addr_any), lport, 0,
+ pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -1061,10 +1127,6 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
continue;
}
- /* XXX inp locking */
- if (faith && (inp->inp_flags & INP_FAITH) == 0)
- continue;
-
injail = prison_flag(inp->inp_cred, PR_IP6);
if (injail) {
if (prison_check_ip6(inp->inp_cred,
@@ -1145,7 +1207,7 @@ struct inpcb *
in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
-#if defined(PCBGROUP)
+#if defined(PCBGROUP) && !defined(RSS)
struct inpcbgroup *pcbgroup;
#endif
@@ -1154,7 +1216,17 @@ in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
-#if defined(PCBGROUP)
+ /*
+ * When not using RSS, use connection groups in preference to the
+ * reservation table when looking up 4-tuples. When using RSS, just
+ * use the reservation table, due to the cost of the Toeplitz hash
+ * in software.
+ *
+ * XXXRW: This policy belongs in the pcbgroup code, as in principle
+ * we could be doing RSS with a non-Toeplitz hash that is affordable
+ * in software.
+ */
+#if defined(PCBGROUP) && !defined(RSS)
if (in_pcbgroup_enabled(pcbinfo)) {
pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
fport);
@@ -1181,16 +1253,27 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
("%s: LOCKPCB not set", __func__));
#ifdef PCBGROUP
- if (in_pcbgroup_enabled(pcbinfo)) {
+ /*
+ * If we can use a hardware-generated hash to look up the connection
+ * group, use that connection group to find the inpcb. Otherwise
+ * fall back on a software hash -- or the reservation table if we're
+ * using RSS.
+ *
+ * XXXRW: As above, that policy belongs in the pcbgroup code.
+ */
+ if (in_pcbgroup_enabled(pcbinfo) &&
+ M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) {
pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
m->m_pkthdr.flowid);
if (pcbgroup != NULL)
return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
fport, laddr, lport, lookupflags, ifp));
+#ifndef RSS
pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
fport);
return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
laddr, lport, lookupflags, ifp));
+#endif
}
#endif
return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h
index 19d151b7..e758dace 100644
--- a/freebsd/sys/netinet6/in6_pcb.h
+++ b/freebsd/sys/netinet6/in6_pcb.h
@@ -86,7 +86,6 @@ int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
int in6_pcbconnect_mbuf(struct inpcb *, struct sockaddr *,
struct ucred *, struct mbuf *);
void in6_pcbdisconnect(struct inpcb *);
-int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *);
struct inpcb *
in6_pcblookup_local(struct inpcbinfo *,
struct in6_addr *, u_short, int,
@@ -96,10 +95,6 @@ struct inpcb *
u_int, struct in6_addr *, u_int, int,
struct ifnet *);
struct inpcb *
- in6_pcblookup_hash_locked(struct inpcbinfo *, struct in6_addr *,
- u_int, struct in6_addr *, u_int, int,
- struct ifnet *);
-struct inpcb *
in6_pcblookup_mbuf(struct inpcbinfo *, struct in6_addr *,
u_int, struct in6_addr *, u_int, int,
struct ifnet *ifp, struct mbuf *);
diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c
index a6c3b4e8..8a9c1cd9 100644
--- a/freebsd/sys/netinet6/in6_proto.c
+++ b/freebsd/sys/netinet6/in6_proto.c
@@ -80,12 +80,14 @@ __FBSDID("$FreeBSD$");
#include <sys/protosw.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/domain.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/radix.h>
#include <net/route.h>
#ifdef RADIX_MPATH
@@ -128,10 +130,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-#endif
-
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@@ -151,15 +149,12 @@ static struct pr_usrreqs nousrreqs;
.pr_usrreqs = &nousrreqs \
}
-struct ip6protosw inet6sw[] = {
+struct protosw inet6sw[] = {
{
.pr_type = 0,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPV6,
.pr_init = ip6_init,
-#ifdef VIMAGE
- .pr_destroy = ip6_destroy,
-#endif
.pr_slowtimo = frag6_slowtimo,
.pr_drain = frag6_drain,
.pr_usrreqs = &nousrreqs,
@@ -211,15 +206,28 @@ struct ip6protosw inet6sw[] = {
.pr_type = SOCK_STREAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_SCTP,
- .pr_flags = PR_WANTRCVD,
+ .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD,
.pr_input = sctp6_input,
- .pr_ctlinput = sctp6_ctlinput,
+ .pr_ctlinput = sctp6_ctlinput,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp6_usrreqs
},
#endif /* SCTP */
{
+ .pr_type = SOCK_DGRAM,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_UDPLITE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = udp6_input,
+ .pr_ctlinput = udplite6_ctlinput,
+ .pr_ctloutput = udp_ctloutput,
+#ifndef INET /* Do not call initialization twice. */
+ .pr_init = udplite_init,
+#endif
+ .pr_usrreqs = &udp6_usrreqs,
+},
+{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_RAW,
@@ -324,6 +332,17 @@ struct ip6protosw inet6sw[] = {
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_GRE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+ .pr_input = encap6_input,
+ .pr_output = rip6_output,
+ .pr_ctloutput = rip6_ctloutput,
+ .pr_init = encap_init,
+ .pr_usrreqs = &rip6_usrreqs
+},
+{
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap6_input,
@@ -361,8 +380,7 @@ struct domain inet6domain = {
.dom_family = AF_INET6,
.dom_name = "internet6",
.dom_protosw = (struct protosw *)inet6sw,
- .dom_protoswNPROTOSW = (struct protosw *)
- &inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])],
+ .dom_protoswNPROTOSW = (struct protosw *)&inet6sw[nitems(inet6sw)],
#ifdef RADIX_MPATH
.dom_rtattach = rn6_mpath_inithead,
#else
@@ -371,10 +389,9 @@ struct domain inet6domain = {
#ifdef VIMAGE
.dom_rtdetach = in6_detachhead,
#endif
- .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3,
- .dom_maxrtkey = sizeof(struct sockaddr_in6),
.dom_ifattach = in6_domifattach,
- .dom_ifdetach = in6_domifdetach
+ .dom_ifdetach = in6_domifdetach,
+ .dom_ifmtu = in6_domifmtu
};
VNET_DOMAIN_SET(inet6);
@@ -416,7 +433,6 @@ VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix
VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */
VNET_DEFINE(int, ip6_v6only) = 1;
-VNET_DEFINE(int, ip6_keepfaith) = 0;
VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L;
#ifdef IPSTEALTH
VNET_DEFINE(int, ip6stealth) = 0;
@@ -433,16 +449,6 @@ VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS
VNET_DEFINE(int, pmtu_expire) = 60*10;
VNET_DEFINE(int, pmtu_probe) = 60*2;
-/* raw IP6 parameters */
-/*
- * Nominal space allocated to a raw ip socket.
- */
-#define RIPV6SNDQ 8192
-#define RIPV6RCVQ 8192
-
-VNET_DEFINE(u_long, rip6_sendspace) = RIPV6SNDQ;
-VNET_DEFINE(u_long, rip6_recvspace) = RIPV6RCVQ;
-
/* ICMPV6 parameters */
VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */
VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */
@@ -452,11 +458,6 @@ VNET_DEFINE(int, icmp6_nodeinfo) =
(ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
-/* UDP on IP6 parameters */
-VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */
-VNET_DEFINE(int, udp6_recvspace) = 40 * (1024 + sizeof(struct sockaddr_in6));
- /* 40 1K datagrams */
-
/*
* sysctl related items.
*/
@@ -479,158 +480,170 @@ SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6");
static int
sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS)
{
- int error = 0;
- int old;
-
- VNET_SYSCTL_ARG(req, arg1);
+ int error, val;
- error = SYSCTL_OUT(req, arg1, sizeof(int));
- if (error || !req->newptr)
+ val = V_ip6_temp_preferred_lifetime;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
return (error);
- old = V_ip6_temp_preferred_lifetime;
- error = SYSCTL_IN(req, arg1, sizeof(int));
- if (V_ip6_temp_preferred_lifetime <
- V_ip6_desync_factor + V_ip6_temp_regen_advance) {
- V_ip6_temp_preferred_lifetime = old;
+ if (val < V_ip6_desync_factor + V_ip6_temp_regen_advance)
return (EINVAL);
- }
- return (error);
+ V_ip6_temp_preferred_lifetime = val;
+ return (0);
}
static int
sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
{
- int error = 0;
- int old;
+ int error, val;
- VNET_SYSCTL_ARG(req, arg1);
-
- error = SYSCTL_OUT(req, arg1, sizeof(int));
- if (error || !req->newptr)
+ val = V_ip6_temp_valid_lifetime;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
return (error);
- old = V_ip6_temp_valid_lifetime;
- error = SYSCTL_IN(req, arg1, sizeof(int));
- if (V_ip6_temp_valid_lifetime < V_ip6_temp_preferred_lifetime) {
- V_ip6_temp_preferred_lifetime = old;
+ if (val < V_ip6_temp_preferred_lifetime)
return (EINVAL);
- }
- return (error);
+ V_ip6_temp_valid_lifetime = val;
+ return (0);
}
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_RW,
- &VNET_NAME(ip6_forwarding), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
- &VNET_NAME(ip6_sendredirects), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW,
- &VNET_NAME(ip6_defhlim), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(ip6stat), ip6stat, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
- CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
- CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
- "Default value of per-interface flag for accepting ICMPv6 Router"
- "Advertisement messages");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
- CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
+ "Enable IPv6 forwarding between interfaces");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0,
+ "Send a redirect message when forwarding back to a source link");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defhlim), 0,
+ "Default hop limit");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
+ ip6stat,
+ "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0,
+ "Maximum allowed number of outstanding fragmented IPv6 packets");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
+ "Default value of per-interface flag for accepting ICMPv6 RA messages");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
"Default value of per-interface flag to control whether routers "
"sending ICMPv6 RA messages on that interface are added into the "
- "default router list.");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, CTLFLAG_RW,
- &VNET_NAME(ip6_norbit_raif), 0,
- "Always set 0 to R flag in ICMPv6 NA messages when accepting RA"
- " on the interface.");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
- CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
+ "default router list");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_norbit_raif), 0,
+ "Always set clear the R flag in ICMPv6 NA messages when accepting RA "
+ "on the interface");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
"Accept the default router list from ICMPv6 RA messages even "
- "when packet forwarding enabled.");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
- &VNET_NAME(ip6_keepfaith), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
- CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit,
- CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, CTLFLAG_RW,
- &VNET_NAME(ip6_dad_count), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel,
- CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim,
- CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, "");
+ "when packet forwarding is enabled");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0,
+ "Frequency in seconds at which to log IPv6 forwarding errors");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0,
+ "Maximum allowed number of nested protocol headers");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0,
+ "Number of ICMPv6 NS messages sent during duplicate address detection");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0,
+ "Provide an IPv6 flowlabel in outbound packets");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0,
+ "Default hop limit for multicast packets");
SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version,
- CTLFLAG_RD, __KAME_VERSION, 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated,
- CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune, CTLFLAG_RW,
- &VNET_NAME(ip6_rr_prune), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr,
- CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0, "");
-SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_preferred_lifetime), 0,
- sysctl_ip6_temppltime, "I", "");
-SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_valid_lifetime), 0,
- sysctl_ip6_tempvltime, "I", "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only, CTLFLAG_RW,
- &VNET_NAME(ip6_v6only), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
- CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
- "Default value of per-interface flag for automatically adding an IPv6"
- " link-local address to interfaces when attached");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RW,
- &VNET_NAME(rip6stat), rip6stat, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
- CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
- CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,"");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, CTLFLAG_RW,
- &VNET_NAME(ip6_maxfrags), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_RW,
- &VNET_NAME(ip6_mcast_pmtu), 0, "");
+ CTLFLAG_RD, __KAME_VERSION, 0,
+ "KAME version string");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0,
+ "Allow the use of addresses whose preferred lifetimes have expired");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rr_prune), 0,
+ ""); /* XXX unused */
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0,
+ "Create RFC3041 temporary addresses for autoconfigured addresses");
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ NULL, 0, sysctl_ip6_temppltime, "I",
+ "Maximum preferred lifetime for temporary addresses");
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ NULL, 0, sysctl_ip6_tempvltime, "I",
+ "Maximum valid lifetime for temporary addresses");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_v6only), 0,
+ "Restrict AF_INET6 sockets to IPv6 addresses only");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
+ "Default value of per-interface flag for automatically adding an IPv6 "
+ "link-local address to interfaces when attached");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats,
+ struct rip6stat, rip6stat,
+ "Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0,
+ "Prefer RFC3041 temporary addresses in source address selection");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
+ "Use the default scope zone when none is specified");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0,
+ "Maximum allowed number of outstanding IPv6 packet fragments");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
+ "Enable path MTU discovery for multicast packets");
#ifdef IPSTEALTH
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW,
- &VNET_NAME(ip6stealth), 0, "");
-#endif
-
-#ifdef FLOWTABLE
-VNET_DEFINE(int, ip6_output_flowtable_size) = 2048;
-VNET_DEFINE(struct flowtable *, ip6_ft);
-#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
-
-SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
- &VNET_NAME(ip6_output_flowtable_size), 2048,
- "number of entries in the per-cpu output flow caches");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip6stealth), 0,
+ "Forward IPv6 packets without decrementing their TTL");
#endif
/* net.inet6.icmp6 */
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
- CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
- CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(icmp6stat), icmp6stat, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW,
- &VNET_NAME(nd6_prune), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW,
- &VNET_NAME(nd6_delay), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
- CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
- CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
- CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW,
- &VNET_NAME(icmp6_nodeinfo), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
- nodeinfo_oldmcprefix, CTLFLAG_RW,
- &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
- "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup"
- " for compatibility with KAME implememtation.");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
- CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
- CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_RW,
- &VNET_NAME(nd6_debug), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
- nd6_onlink_ns_rfc4861, CTLFLAG_RW, &VNET_NAME(nd6_onlink_ns_rfc4861),
- 0, "Accept 'on-link' nd6 NS in compliance with RFC 4861.");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0,
+ "Accept ICMPv6 redirect messages");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0,
+ ""); /* XXX unused */
+SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
+ struct icmp6stat, icmp6stat,
+ "ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0,
+ "Frequency in seconds of checks for expired prefixes and routers");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0,
+ "Delay in seconds before probing for reachability");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0,
+ "Number of ICMPv6 NS messages sent during reachability detection");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0,
+ "Number of ICMPv6 NS messages sent during address resolution");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0,
+ "Create a loopback route when configuring an IPv6 address");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo), 0,
+ "Mask of enabled RF4620 node information query types");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
+ nodeinfo_oldmcprefix, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
+ "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup "
+ "for compatibility with KAME implementation");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0,
+ "Maximum number of ICMPv6 error messages per second");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0,
+ ""); /* XXX unused */
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0,
+ "Log NDP debug messages");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
+ nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(nd6_onlink_ns_rfc4861), 0,
+ "Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861");
diff --git a/freebsd/sys/netinet6/in6_rmx.c b/freebsd/sys/netinet6/in6_rmx.c
index 4c59a1ad..f04e0058 100644
--- a/freebsd/sys/netinet6/in6_rmx.c
+++ b/freebsd/sys/netinet6/in6_rmx.c
@@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
-#include <sys/sysctl.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -78,7 +77,9 @@ __FBSDID("$FreeBSD$");
#include <sys/callout.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
@@ -104,14 +105,12 @@ extern int in6_detachhead(void **head, int off);
* Do what we need to do when inserting a route.
*/
static struct radix_node *
-in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+in6_addroute(void *v_arg, void *n_arg, struct radix_head *head,
struct radix_node *treenodes)
{
struct rtentry *rt = (struct rtentry *)treenodes;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
- struct radix_node *ret;
- RADIX_NODE_HEAD_WLOCK_ASSERT(head);
if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
rt->rt_flags |= RTF_MULTICAST;
@@ -137,116 +136,69 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
}
}
- if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
- rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
+ if (rt->rt_ifp != NULL) {
- ret = rn_addroute(v_arg, n_arg, head, treenodes);
- if (ret == NULL) {
- struct rtentry *rt2;
/*
- * We are trying to add a net route, but can't.
- * The following case should be allowed, so we'll make a
- * special check for this:
- * Two IPv6 addresses with the same prefix is assigned
- * to a single interrface.
- * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
- * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
- * In this case, (*1) and (*2) want to add the same
- * net route entry, 3ffe:0501:: -> if0.
- * This case should not raise an error.
+ * Check route MTU:
+ * inherit interface MTU if not set or
+ * check if MTU is too large.
*/
- rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED,
- rt->rt_fibnum);
- if (rt2) {
- if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0)
- && rt2->rt_gateway
- && rt2->rt_gateway->sa_family == AF_LINK
- && rt2->rt_ifp == rt->rt_ifp) {
- ret = rt2->rt_nodes;
- }
- RTFREE_LOCKED(rt2);
- }
+ if (rt->rt_mtu == 0) {
+ rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
+ } else if (rt->rt_mtu > IN6_LINKMTU(rt->rt_ifp))
+ rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
}
- return (ret);
-}
-SYSCTL_DECL(_net_inet6_ip6);
-
-static VNET_DEFINE(int, rtq_toomany6) = 128;
- /* 128 cached routes is ``too many'' */
-#define V_rtq_toomany6 VNET(rtq_toomany6)
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
- &VNET_NAME(rtq_toomany6) , 0, "");
-
-struct rtqk_arg {
- struct radix_node_head *rnh;
- int mode;
- int updating;
- int draining;
- int killed;
- int found;
- time_t nextstop;
-};
+ return (rn_addroute(v_arg, n_arg, head, treenodes));
+}
/*
* Age old PMTUs.
*/
struct mtuex_arg {
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
time_t nextstop;
};
static VNET_DEFINE(struct callout, rtq_mtutimer);
#define V_rtq_mtutimer VNET(rtq_mtutimer)
static int
-in6_mtuexpire(struct radix_node *rn, void *rock)
+in6_mtuexpire(struct rtentry *rt, void *rock)
{
- struct rtentry *rt = (struct rtentry *)rn;
struct mtuex_arg *ap = rock;
- /* sanity */
- if (!rt)
- panic("rt == NULL in in6_mtuexpire");
-
- if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
- if (rt->rt_rmx.rmx_expire <= time_uptime) {
+ if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
+ if (rt->rt_expire <= time_uptime) {
rt->rt_flags |= RTF_PROBEMTU;
} else {
- ap->nextstop = lmin(ap->nextstop,
- rt->rt_rmx.rmx_expire);
+ ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
}
}
- return 0;
+ return (0);
}
#define MTUTIMO_DEFAULT (60*1)
static void
-in6_mtutimo_one(struct radix_node_head *rnh)
+in6_mtutimo_setwa(struct rib_head *rnh, uint32_t fibum, int af,
+ void *_arg)
{
- struct mtuex_arg arg;
+ struct mtuex_arg *arg;
- arg.rnh = rnh;
- arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ arg = (struct mtuex_arg *)_arg;
+
+ arg->rnh = rnh;
}
static void
in6_mtutimo(void *rock)
{
CURVNET_SET_QUIET((struct vnet *) rock);
- struct radix_node_head *rnh;
struct timeval atv;
- u_int fibnum;
+ struct mtuex_arg arg;
- for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
- rnh = rt_tables_get_rnh(fibnum, AF_INET6);
- if (rnh != NULL)
- in6_mtutimo_one(rnh);
- }
+ rt_foreach_fib_walk(AF_INET6, in6_mtutimo_setwa, in6_mtuexpire, &arg);
atv.tv_sec = MTUTIMO_DEFAULT;
atv.tv_usec = 0;
@@ -256,10 +208,6 @@ in6_mtutimo(void *rock)
/*
* Initialize our routing tree.
- * XXX MRT When off == 0, we are being called from vfs_export.c
- * so just set up their table and leave. (we know what the correct
- * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd
- * see also comments in in_inithead() vfs_export.c and domain.h
*/
static VNET_DEFINE(int, _in6_rt_was_here);
#define V__in6_rt_was_here VNET(_in6_rt_was_here)
@@ -267,24 +215,22 @@ static VNET_DEFINE(int, _in6_rt_was_here);
int
in6_inithead(void **head, int off)
{
- struct radix_node_head *rnh;
+ struct rib_head *rh;
- if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
- return 0; /* See above */
+ rh = rt_table_init(offsetof(struct sockaddr_in6, sin6_addr) << 3);
+ if (rh == NULL)
+ return (0);
- if (off == 0) /* See above */
- return 1; /* only do the rest for the real thing */
-
- rnh = *head;
- rnh->rnh_addaddr = in6_addroute;
+ rh->rnh_addaddr = in6_addroute;
+ *head = (void *)rh;
if (V__in6_rt_was_here == 0) {
- callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
+ callout_init(&V_rtq_mtutimer, 1);
in6_mtutimo(curvnet); /* kick off timeout first time */
V__in6_rt_was_here = 1;
}
- return 1;
+ return (1);
}
#ifdef VIMAGE
@@ -293,6 +239,8 @@ in6_detachhead(void **head, int off)
{
callout_drain(&V_rtq_mtutimer);
+ rt_table_destroy((struct rib_head *)(*head));
+
return (1);
}
#endif
diff --git a/freebsd/sys/netinet6/in6_rss.h b/freebsd/sys/netinet6/in6_rss.h
new file mode 100644
index 00000000..f5b48c71
--- /dev/null
+++ b/freebsd/sys/netinet6/in6_rss.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IN6_RSS_H_
+#define _NETINET6_IN6_RSS_H_
+
+#include <netinet/in.h> /* in_addr_t */
+
+/*
+ * Network stack interface to generate a hash for a protocol tuple.
+ */
+uint32_t rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport,
+ const struct in6_addr *dst, u_short dstport);
+uint32_t rss_hash_ip6_2tuple(const struct in6_addr *src,
+ const struct in6_addr *dst);
+
+/*
+ * Functions to calculate a software RSS hash for a given mbuf or
+ * packet detail.
+ */
+int rss_mbuf_software_hash_v6(const struct mbuf *m, int dir,
+ uint32_t *hashval, uint32_t *hashtype);
+int rss_proto_software_hash_v6(const struct in6_addr *src,
+ const struct in6_addr *dst, u_short src_port,
+ u_short dst_port, int proto, uint32_t *hashval,
+ uint32_t *hashtype);
+struct mbuf * rss_soft_m2cpuid_v6(struct mbuf *m, uintptr_t source,
+ u_int *cpuid);
+
+#endif /* !_NETINET6_IN6_RSS_H_ */
diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c
index a69ecf24..2a50a975 100644
--- a/freebsd/sys/netinet6/in6_src.c
+++ b/freebsd/sys/netinet6/in6_src.c
@@ -84,9 +84,11 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/rmlock.h>
#include <sys/sx.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/if_llatbl.h>
@@ -105,6 +107,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
@@ -133,8 +136,11 @@ static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct rtentry **, int, u_int);
static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
- struct ip6_moptions *, struct route_in6 *ro, struct ifnet **,
+ struct ip6_moptions *, struct ifnet **,
struct ifnet *, u_int);
+static int in6_selectsrc(uint32_t, struct sockaddr_in6 *,
+ struct ip6_pktopts *, struct inpcb *, struct ucred *,
+ struct ifnet **, struct in6_addr *);
static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
@@ -142,7 +148,7 @@ static void init_policy_queue(void);
static int add_addrsel_policyent(struct in6_addrpolicy *);
static int delete_addrsel_policyent(struct in6_addrpolicy *);
static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
- void *);
+ void *);
static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
@@ -174,11 +180,12 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
goto out; /* XXX: we can't use 'break' here */ \
} while(0)
-int
-in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
- struct inpcb *inp, struct route_in6 *ro, struct ucred *cred,
+static int
+in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
+ struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred,
struct ifnet **ifpp, struct in6_addr *srcp)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_addr dst, tmp;
struct ifnet *ifp = NULL, *oifp = NULL;
struct in6_ifaddr *ia = NULL, *ia_best = NULL;
@@ -221,12 +228,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
*/
if (opts && (pi = opts->ip6po_pktinfo) &&
!IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
- struct sockaddr_in6 srcsock;
- struct in6_ifaddr *ia6;
-
/* get the outgoing interface */
- if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp,
- (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB))
+ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
+ fibnum))
!= 0)
return (error);
@@ -237,33 +241,36 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* the interface must be specified; otherwise, ifa_ifwithaddr()
* will fail matching the address.
*/
- bzero(&srcsock, sizeof(srcsock));
- srcsock.sin6_family = AF_INET6;
- srcsock.sin6_len = sizeof(srcsock);
- srcsock.sin6_addr = pi->ipi6_addr;
+ tmp = pi->ipi6_addr;
if (ifp) {
- error = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
+ error = in6_setscope(&tmp, ifp, &odstzone);
if (error)
return (error);
}
if (cred != NULL && (error = prison_local_ip6(cred,
- &srcsock.sin6_addr, (inp != NULL &&
- (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
+ &tmp, (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
return (error);
- ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(
- (struct sockaddr *)&srcsock);
- if (ia6 == NULL ||
- (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
- if (ia6 != NULL)
- ifa_free(&ia6->ia_ifa);
- return (EADDRNOTAVAIL);
- }
- pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
+ /*
+ * If IPV6_BINDANY socket option is set, we allow to specify
+ * non local addresses as source address in IPV6_PKTINFO
+ * ancillary data.
+ */
+ if ((inp->inp_flags & INP_BINDANY) == 0) {
+ ia = in6ifa_ifwithaddr(&tmp, 0 /* XXX */);
+ if (ia == NULL || (ia->ia6_flags & (IN6_IFF_ANYCAST |
+ IN6_IFF_NOTREADY))) {
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
+ return (EADDRNOTAVAIL);
+ }
+ bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp));
+ ifa_free(&ia->ia_ifa);
+ } else
+ bcopy(&tmp, srcp, sizeof(*srcp));
+ pi->ipi6_addr = tmp; /* XXX: this overrides pi */
if (ifpp)
*ifpp = ifp;
- bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp));
- ifa_free(&ia6->ia_ifa);
return (0);
}
@@ -291,7 +298,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* the outgoing interface and the destination address.
*/
/* get the outgoing interface */
- if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp,
+ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
(inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0)
return (error);
@@ -304,7 +311,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
return (error);
rule = 0;
- IN6_IFADDR_RLOCK();
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
int new_scope = -1, new_matchlen = -1;
struct in6_addrpolicy *new_policy = NULL;
@@ -445,6 +452,14 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
REPLACE(8);
/*
+ * Rule 9: prefer address with better virtual status.
+ */
+ if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa))
+ REPLACE(9);
+ if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa))
+ NEXT(9);
+
+ /*
* Rule 10: prefer address with `prefer_source' flag.
*/
if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 &&
@@ -494,7 +509,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
}
if ((ia = ia_best) == NULL) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
IP6STAT_INC(ip6s_sources_none);
return (EADDRNOTAVAIL);
}
@@ -511,7 +526,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
tmp = ia->ia_addr.sin6_addr;
if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL &&
(inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
IP6STAT_INC(ip6s_sources_none);
return (EADDRNOTAVAIL);
}
@@ -530,11 +545,84 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
IP6STAT_INC(ip6s_sources_otherscope[best_scope]);
if (IFA6_IS_DEPRECATED(ia))
IP6STAT_INC(ip6s_sources_deprecated[best_scope]);
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0);
}
/*
+ * Select source address based on @inp, @dstsock and @opts.
+ * Stores selected address to @srcp. If @scope_ambiguous is set,
+ * embed scope from selected outgoing interface. If @hlim pointer
+ * is provided, stores calculated hop limit there.
+ * Returns 0 on success.
+ */
+int
+in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+ struct inpcb *inp, struct ucred *cred, int scope_ambiguous,
+ struct in6_addr *srcp, int *hlim)
+{
+ struct ifnet *retifp;
+ uint32_t fibnum;
+ int error;
+
+ fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
+ retifp = NULL;
+
+ error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp);
+ if (error != 0)
+ return (error);
+
+ if (hlim != NULL)
+ *hlim = in6_selecthlim(inp, retifp);
+
+ if (retifp == NULL || scope_ambiguous == 0)
+ return (0);
+
+ /*
+ * Application should provide a proper zone ID or the use of
+ * default zone IDs should be enabled. Unfortunately, some
+ * applications do not behave as it should, so we need a
+ * workaround. Even if an appropriate ID is not determined
+ * (when it's required), if we can determine the outgoing
+ * interface. determine the zone ID based on the interface.
+ */
+ error = in6_setscope(&dstsock->sin6_addr, retifp, NULL);
+
+ return (error);
+}
+
+/*
+ * Select source address based on @fibnum, @dst and @scopeid.
+ * Stores selected address to @srcp.
+ * Returns 0 on success.
+ *
+ * Used by non-socket based consumers (ND code mostly)
+ */
+int
+in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
+ uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp,
+ int *hlim)
+{
+ struct ifnet *retifp;
+ struct sockaddr_in6 dst_sa;
+ int error;
+
+ retifp = ifp;
+ bzero(&dst_sa, sizeof(dst_sa));
+ dst_sa.sin6_family = AF_INET6;
+ dst_sa.sin6_len = sizeof(dst_sa);
+ dst_sa.sin6_addr = *dst;
+ dst_sa.sin6_scope_id = scopeid;
+ sa6_embedscope(&dst_sa, 0);
+
+ error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp);
+ if (hlim != NULL)
+ *hlim = in6_selecthlim(NULL, retifp);
+
+ return (error);
+}
+
+/*
* clone - meaningful only for bsdi and freebsd
*/
static int
@@ -548,6 +636,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
struct sockaddr_in6 *sin6_next;
struct in6_pktinfo *pi = NULL;
struct in6_addr *dst = &dstsock->sin6_addr;
+ uint32_t zoneid;
#if 0
char ip6buf[INET6_ADDRSTRLEN];
@@ -578,7 +667,6 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
} else
goto getroute;
}
-
/*
* If the destination address is a multicast address and the outgoing
* interface for the address is specified by the caller, use it.
@@ -587,6 +675,18 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
goto done; /* we do not need a route for multicast. */
}
+ /*
+ * If destination address is LLA or link- or node-local multicast,
+ * use it's embedded scope zone id to determine outgoing interface.
+ */
+ if (IN6_IS_ADDR_MC_LINKLOCAL(dst) ||
+ IN6_IS_ADDR_MC_NODELOCAL(dst)) {
+ zoneid = ntohs(in6_getscope(dst));
+ if (zoneid > 0) {
+ ifp = in6_getlinkifnet(zoneid);
+ goto done;
+ }
+ }
getroute:
/*
@@ -595,81 +695,38 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
*/
if (opts && opts->ip6po_nexthop) {
struct route_in6 *ron;
- struct llentry *la;
-
- sin6_next = satosin6(opts->ip6po_nexthop);
-
- /* at this moment, we only support AF_INET6 next hops */
- if (sin6_next->sin6_family != AF_INET6) {
- error = EAFNOSUPPORT; /* or should we proceed? */
- goto done;
- }
-
- /*
- * If the next hop is an IPv6 address, then the node identified
- * by that address must be a neighbor of the sending host.
- */
- ron = &opts->ip6po_nextroute;
- /*
- * XXX what do we do here?
- * PLZ to be fixing
- */
-
- if (ron->ro_rt == NULL) {
- in6_rtalloc(ron, fibnum); /* multi path case? */
- if (ron->ro_rt == NULL) {
- if (ron->ro_rt) {
- RTFREE(ron->ro_rt);
- ron->ro_rt = NULL;
- }
- error = EHOSTUNREACH;
+ sin6_next = satosin6(opts->ip6po_nexthop);
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6_next->sin6_addr)) {
+ /*
+ * Next hop is LLA, thus it should be neighbor.
+ * Determine outgoing interface by zone index.
+ */
+ zoneid = ntohs(in6_getscope(&sin6_next->sin6_addr));
+ if (zoneid > 0) {
+ ifp = in6_getlinkifnet(zoneid);
goto done;
- }
- }
-
- rt = ron->ro_rt;
- ifp = rt->rt_ifp;
- IF_AFDATA_RLOCK(ifp);
- la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr);
- IF_AFDATA_RUNLOCK(ifp);
- if (la != NULL)
- LLE_RUNLOCK(la);
- else {
- error = EHOSTUNREACH;
- goto done;
- }
-#if 0
- if ((ron->ro_rt &&
- (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
- (RTF_UP | RTF_LLINFO)) ||
- !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
- &sin6_next->sin6_addr)) {
- if (ron->ro_rt) {
- RTFREE(ron->ro_rt);
- ron->ro_rt = NULL;
}
- *satosin6(&ron->ro_dst) = *sin6_next;
}
+ ron = &opts->ip6po_nextroute;
+ /* Use a cached route if it exists and is valid. */
+ if (ron->ro_rt != NULL && (
+ (ron->ro_rt->rt_flags & RTF_UP) == 0 ||
+ ron->ro_dst.sin6_family != AF_INET6 ||
+ !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr,
+ &sin6_next->sin6_addr)))
+ RO_RTFREE(ron);
if (ron->ro_rt == NULL) {
+ ron->ro_dst = *sin6_next;
in6_rtalloc(ron, fibnum); /* multi path case? */
- if (ron->ro_rt == NULL ||
- !(ron->ro_rt->rt_flags & RTF_LLINFO)) {
- if (ron->ro_rt) {
- RTFREE(ron->ro_rt);
- ron->ro_rt = NULL;
- }
- error = EHOSTUNREACH;
- goto done;
- }
}
-#endif
-
/*
- * When cloning is required, try to allocate a route to the
- * destination so that the caller can store path MTU
- * information.
+ * The node identified by that address must be a
+ * neighbor of the sending host.
*/
+ if (ron->ro_rt == NULL ||
+ (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0)
+ error = EHOSTUNREACH;
goto done;
}
@@ -782,24 +839,27 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
static int
in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
- struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp,
+ struct ip6_moptions *mopts, struct ifnet **retifp,
struct ifnet *oifp, u_int fibnum)
{
int error;
struct route_in6 sro;
struct rtentry *rt = NULL;
+ int rt_flags;
KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__));
- if (ro == NULL) {
- bzero(&sro, sizeof(sro));
- ro = &sro;
- }
+ bzero(&sro, sizeof(sro));
+ rt_flags = 0;
+
+ error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum);
- if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
- &rt, 1, fibnum)) != 0) {
- if (ro == &sro && rt && rt == sro.ro_rt)
- RTFREE(rt);
+ if (rt)
+ rt_flags = rt->rt_flags;
+ if (rt && rt == sro.ro_rt)
+ RTFREE(rt);
+
+ if (error != 0) {
/* Help ND. See oifp comment in in6_selectsrc(). */
if (oifp != NULL && fibnum == RT_DEFAULT_FIB) {
*retifp = oifp;
@@ -825,16 +885,12 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* Although this may not be very harmful, it should still be confusing.
* We thus reject the case here.
*/
- if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
- int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- if (ro == &sro && rt && rt == sro.ro_rt)
- RTFREE(rt);
- return (flags);
+ if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) {
+ error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ return (error);
}
- if (ro == &sro && rt && rt == sro.ro_rt)
- RTFREE(rt);
return (0);
}
@@ -882,19 +938,16 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp)
else if (ifp)
return (ND_IFINFO(ifp)->chlim);
else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
- struct route_in6 ro6;
- struct ifnet *lifp;
-
- bzero(&ro6, sizeof(ro6));
- ro6.ro_dst.sin6_family = AF_INET6;
- ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
- ro6.ro_dst.sin6_addr = in6p->in6p_faddr;
- in6_rtalloc(&ro6, in6p->inp_inc.inc_fibnum);
- if (ro6.ro_rt) {
- lifp = ro6.ro_rt->rt_ifp;
- RTFREE(ro6.ro_rt);
- if (lifp)
- return (ND_IFINFO(lifp)->chlim);
+ struct nhop6_basic nh6;
+ struct in6_addr dst;
+ uint32_t fibnum, scopeid;
+ int hlim;
+
+ fibnum = in6p->inp_inc.inc_fibnum;
+ in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid);
+ if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){
+ hlim = ND_IFINFO(nh6.nh_ifp)->chlim;
+ return (hlim);
}
}
return (V_ip6_defhlim);
@@ -1005,7 +1058,6 @@ in6_src_sysctl(SYSCTL_HANDLER_ARGS)
int
in6_src_ioctl(u_long cmd, caddr_t data)
{
- int i;
struct in6_addrpolicy ent0;
if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
@@ -1019,10 +1071,7 @@ in6_src_ioctl(u_long cmd, caddr_t data)
if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
return (EINVAL);
/* clear trailing garbages (if any) of the prefix address. */
- for (i = 0; i < 4; i++) {
- ent0.addr.sin6_addr.s6_addr32[i] &=
- ent0.addrmask.sin6_addr.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr);
ent0.use = 0;
switch (cmd) {
@@ -1125,8 +1174,7 @@ delete_addrsel_policyent(struct in6_addrpolicy *key)
}
static int
-walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *),
- void *w)
+walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
{
struct addrsel_policyent *pol;
int error = 0;
diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h
index 90530a68..77e5920b 100644
--- a/freebsd/sys/netinet6/in6_var.h
+++ b/freebsd/sys/netinet6/in6_var.h
@@ -65,8 +65,10 @@
#define _NETINET6_IN6_VAR_H_
#include <sys/tree.h>
+#include <sys/counter.h>
#ifdef _KERNEL
+#include <sys/fnv_hash.h>
#include <sys/libkern.h>
#endif
@@ -95,19 +97,20 @@ struct in6_addrlifetime {
struct nd_ifinfo;
struct scope6_id;
struct lltable;
-struct mld_ifinfo;
+struct mld_ifsoftc;
struct in6_ifextra {
- struct in6_ifstat *in6_ifstat;
- struct icmp6_ifstat *icmp6_ifstat;
+ counter_u64_t *in6_ifstat;
+ counter_u64_t *icmp6_ifstat;
struct nd_ifinfo *nd_ifinfo;
struct scope6_id *scope6_id;
struct lltable *lltable;
- struct mld_ifinfo *mld_ifinfo;
+ struct mld_ifsoftc *mld_ifinfo;
};
#define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable)
+#ifdef _KERNEL
struct in6_ifaddr {
struct ifaddr ia_ifa; /* protocol-independent info */
#define ia_ifp ia_ifa.ifa_ifp
@@ -131,10 +134,14 @@ struct in6_ifaddr {
/* multicast addresses joined from the kernel */
LIST_HEAD(, in6_multi_mship) ia6_memberships;
+ /* entry in bucket of inet6 addresses */
+ LIST_ENTRY(in6_ifaddr) ia6_hash;
};
/* List of in6_ifaddr's. */
TAILQ_HEAD(in6_ifaddrhead, in6_ifaddr);
+LIST_HEAD(in6_ifaddrlisthead, in6_ifaddr);
+#endif /* _KERNEL */
/* control structure to manage address selection policy */
struct in6_addrpolicy {
@@ -149,37 +156,37 @@ struct in6_addrpolicy {
* IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12).
*/
struct in6_ifstat {
- u_quad_t ifs6_in_receive; /* # of total input datagram */
- u_quad_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */
- u_quad_t ifs6_in_toobig; /* # of datagrams exceeded MTU */
- u_quad_t ifs6_in_noroute; /* # of datagrams with no route */
- u_quad_t ifs6_in_addrerr; /* # of datagrams with invalid dst */
- u_quad_t ifs6_in_protounknown; /* # of datagrams with unknown proto */
+ uint64_t ifs6_in_receive; /* # of total input datagram */
+ uint64_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */
+ uint64_t ifs6_in_toobig; /* # of datagrams exceeded MTU */
+ uint64_t ifs6_in_noroute; /* # of datagrams with no route */
+ uint64_t ifs6_in_addrerr; /* # of datagrams with invalid dst */
+ uint64_t ifs6_in_protounknown; /* # of datagrams with unknown proto */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_in_truncated; /* # of truncated datagrams */
- u_quad_t ifs6_in_discard; /* # of discarded datagrams */
+ uint64_t ifs6_in_truncated; /* # of truncated datagrams */
+ uint64_t ifs6_in_discard; /* # of discarded datagrams */
/* NOTE: fragment timeout is not here */
- u_quad_t ifs6_in_deliver; /* # of datagrams delivered to ULP */
+ uint64_t ifs6_in_deliver; /* # of datagrams delivered to ULP */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_out_forward; /* # of datagrams forwarded */
+ uint64_t ifs6_out_forward; /* # of datagrams forwarded */
/* NOTE: increment on outgoing if */
- u_quad_t ifs6_out_request; /* # of outgoing datagrams from ULP */
+ uint64_t ifs6_out_request; /* # of outgoing datagrams from ULP */
/* NOTE: does not include forwrads */
- u_quad_t ifs6_out_discard; /* # of discarded datagrams */
- u_quad_t ifs6_out_fragok; /* # of datagrams fragmented */
- u_quad_t ifs6_out_fragfail; /* # of datagrams failed on fragment */
- u_quad_t ifs6_out_fragcreat; /* # of fragment datagrams */
+ uint64_t ifs6_out_discard; /* # of discarded datagrams */
+ uint64_t ifs6_out_fragok; /* # of datagrams fragmented */
+ uint64_t ifs6_out_fragfail; /* # of datagrams failed on fragment */
+ uint64_t ifs6_out_fragcreat; /* # of fragment datagrams */
/* NOTE: this is # after fragment */
- u_quad_t ifs6_reass_reqd; /* # of incoming fragmented packets */
+ uint64_t ifs6_reass_reqd; /* # of incoming fragmented packets */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_reass_ok; /* # of reassembled packets */
+ uint64_t ifs6_reass_ok; /* # of reassembled packets */
/* NOTE: this is # after reass */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_reass_fail; /* # of reass failures */
+ uint64_t ifs6_reass_fail; /* # of reass failures */
/* NOTE: may not be packet count */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_in_mcast; /* # of inbound multicast datagrams */
- u_quad_t ifs6_out_mcast; /* # of outbound multicast datagrams */
+ uint64_t ifs6_in_mcast; /* # of inbound multicast datagrams */
+ uint64_t ifs6_out_mcast; /* # of outbound multicast datagrams */
};
/*
@@ -191,77 +198,77 @@ struct icmp6_ifstat {
* Input statistics
*/
/* ipv6IfIcmpInMsgs, total # of input messages */
- u_quad_t ifs6_in_msg;
+ uint64_t ifs6_in_msg;
/* ipv6IfIcmpInErrors, # of input error messages */
- u_quad_t ifs6_in_error;
+ uint64_t ifs6_in_error;
/* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */
- u_quad_t ifs6_in_dstunreach;
+ uint64_t ifs6_in_dstunreach;
/* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */
- u_quad_t ifs6_in_adminprohib;
+ uint64_t ifs6_in_adminprohib;
/* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */
- u_quad_t ifs6_in_timeexceed;
+ uint64_t ifs6_in_timeexceed;
/* ipv6IfIcmpInParmProblems, # of input parameter problem errors */
- u_quad_t ifs6_in_paramprob;
+ uint64_t ifs6_in_paramprob;
/* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */
- u_quad_t ifs6_in_pkttoobig;
+ uint64_t ifs6_in_pkttoobig;
/* ipv6IfIcmpInEchos, # of input echo requests */
- u_quad_t ifs6_in_echo;
+ uint64_t ifs6_in_echo;
/* ipv6IfIcmpInEchoReplies, # of input echo replies */
- u_quad_t ifs6_in_echoreply;
+ uint64_t ifs6_in_echoreply;
/* ipv6IfIcmpInRouterSolicits, # of input router solicitations */
- u_quad_t ifs6_in_routersolicit;
+ uint64_t ifs6_in_routersolicit;
/* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */
- u_quad_t ifs6_in_routeradvert;
+ uint64_t ifs6_in_routeradvert;
/* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */
- u_quad_t ifs6_in_neighborsolicit;
+ uint64_t ifs6_in_neighborsolicit;
/* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */
- u_quad_t ifs6_in_neighboradvert;
+ uint64_t ifs6_in_neighboradvert;
/* ipv6IfIcmpInRedirects, # of input redirects */
- u_quad_t ifs6_in_redirect;
+ uint64_t ifs6_in_redirect;
/* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */
- u_quad_t ifs6_in_mldquery;
+ uint64_t ifs6_in_mldquery;
/* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */
- u_quad_t ifs6_in_mldreport;
+ uint64_t ifs6_in_mldreport;
/* ipv6IfIcmpInGroupMembReductions, # of input MLD done */
- u_quad_t ifs6_in_mlddone;
+ uint64_t ifs6_in_mlddone;
/*
* Output statistics. We should solve unresolved routing problem...
*/
/* ipv6IfIcmpOutMsgs, total # of output messages */
- u_quad_t ifs6_out_msg;
+ uint64_t ifs6_out_msg;
/* ipv6IfIcmpOutErrors, # of output error messages */
- u_quad_t ifs6_out_error;
+ uint64_t ifs6_out_error;
/* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */
- u_quad_t ifs6_out_dstunreach;
+ uint64_t ifs6_out_dstunreach;
/* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */
- u_quad_t ifs6_out_adminprohib;
+ uint64_t ifs6_out_adminprohib;
/* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */
- u_quad_t ifs6_out_timeexceed;
+ uint64_t ifs6_out_timeexceed;
/* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */
- u_quad_t ifs6_out_paramprob;
+ uint64_t ifs6_out_paramprob;
/* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */
- u_quad_t ifs6_out_pkttoobig;
+ uint64_t ifs6_out_pkttoobig;
/* ipv6IfIcmpOutEchos, # of output echo requests */
- u_quad_t ifs6_out_echo;
+ uint64_t ifs6_out_echo;
/* ipv6IfIcmpOutEchoReplies, # of output echo replies */
- u_quad_t ifs6_out_echoreply;
+ uint64_t ifs6_out_echoreply;
/* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */
- u_quad_t ifs6_out_routersolicit;
+ uint64_t ifs6_out_routersolicit;
/* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */
- u_quad_t ifs6_out_routeradvert;
+ uint64_t ifs6_out_routeradvert;
/* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */
- u_quad_t ifs6_out_neighborsolicit;
+ uint64_t ifs6_out_neighborsolicit;
/* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */
- u_quad_t ifs6_out_neighboradvert;
+ uint64_t ifs6_out_neighboradvert;
/* ipv6IfIcmpOutRedirects, # of output redirects */
- u_quad_t ifs6_out_redirect;
+ uint64_t ifs6_out_redirect;
/* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */
- u_quad_t ifs6_out_mldquery;
+ uint64_t ifs6_out_mldquery;
/* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */
- u_quad_t ifs6_out_mldreport;
+ uint64_t ifs6_out_mldreport;
/* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */
- u_quad_t ifs6_out_mlddone;
+ uint64_t ifs6_out_mlddone;
};
struct in6_ifreq {
@@ -287,6 +294,17 @@ struct in6_aliasreq {
struct sockaddr_in6 ifra_prefixmask;
int ifra_flags;
struct in6_addrlifetime ifra_lifetime;
+ int ifra_vhid;
+};
+
+/* pre-10.x compat */
+struct oin6_aliasreq {
+ char ifra_name[IFNAMSIZ];
+ struct sockaddr_in6 ifra_addr;
+ struct sockaddr_in6 ifra_dstaddr;
+ struct sockaddr_in6 ifra_prefixmask;
+ int ifra_flags;
+ struct in6_addrlifetime ifra_lifetime;
};
/* prefix type macro */
@@ -391,6 +409,12 @@ struct in6_rrenumreq {
(((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
(((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
(((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
+#define IN6_MASK_ADDR(a, m) do { \
+ (a)->s6_addr32[0] &= (m)->s6_addr32[0]; \
+ (a)->s6_addr32[1] &= (m)->s6_addr32[1]; \
+ (a)->s6_addr32[2] &= (m)->s6_addr32[2]; \
+ (a)->s6_addr32[3] &= (m)->s6_addr32[3]; \
+} while (0)
#endif
#define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq)
@@ -409,7 +433,8 @@ struct in6_rrenumreq {
#define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq)
#define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq)
-#define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq)
+#define OSIOCAIFADDR_IN6 _IOW('i', 26, struct oin6_aliasreq)
+#define SIOCAIFADDR_IN6 _IOW('i', 27, struct in6_aliasreq)
#define SIOCSIFPHYADDR_IN6 _IOW('i', 70, struct in6_aliasreq)
#define SIOCGIFPSRCADDR_IN6 _IOWR('i', 71, struct in6_ifreq)
@@ -417,11 +442,6 @@ struct in6_rrenumreq {
#define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq)
-#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist)
-#ifdef _KERNEL
-/* XXX: SIOCGPRLST_IN6 is exposed in KAME but in6_oprlist is not. */
-#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_oprlist)
-#endif
#ifdef _KERNEL
#define OSIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq)
#endif
@@ -433,7 +453,6 @@ struct in6_rrenumreq {
#define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq)
#define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq)
-#define SIOCSIFALIFETIME_IN6 _IOWR('i', 82, struct in6_ifreq)
#define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq)
#define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq)
@@ -469,14 +488,11 @@ struct in6_rrenumreq {
#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */
#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */
#define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address
- * (used only at first SIOC* call)
+ * (obsolete)
*/
#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */
#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */
#define IN6_IFF_PREFER_SOURCE 0x0100 /* preferred address for SAS */
-#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management.
- * XXX: this should be temporary.
- */
/* do not input/output */
#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)
@@ -488,26 +504,45 @@ struct in6_rrenumreq {
#ifdef _KERNEL
VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DECLARE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DECLARE(u_long, in6_ifaddrhmask);
#define V_in6_ifaddrhead VNET(in6_ifaddrhead)
+#define V_in6_ifaddrhashtbl VNET(in6_ifaddrhashtbl)
+#define V_in6_ifaddrhmask VNET(in6_ifaddrhmask)
+
+#define IN6ADDR_NHASH_LOG2 8
+#define IN6ADDR_NHASH (1 << IN6ADDR_NHASH_LOG2)
+#define IN6ADDR_HASHVAL(x) (in6_addrhash(x))
+#define IN6ADDR_HASH(x) \
+ (&V_in6_ifaddrhashtbl[IN6ADDR_HASHVAL(x) & V_in6_ifaddrhmask])
+
+static __inline uint32_t
+in6_addrhash(const struct in6_addr *in6)
+{
+ uint32_t x;
+
+ x = in6->s6_addr32[0] ^ in6->s6_addr32[1] ^ in6->s6_addr32[2] ^
+ in6->s6_addr32[3];
+ return (fnv_32_buf(&x, sizeof(x), FNV1_32_INIT));
+}
+
+extern struct rmlock in6_ifaddr_lock;
+#define IN6_IFADDR_LOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_LOCKED)
+#define IN6_IFADDR_RLOCK(t) rm_rlock(&in6_ifaddr_lock, (t))
+#define IN6_IFADDR_RLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_RLOCKED)
+#define IN6_IFADDR_RUNLOCK(t) rm_runlock(&in6_ifaddr_lock, (t))
+#define IN6_IFADDR_WLOCK() rm_wlock(&in6_ifaddr_lock)
+#define IN6_IFADDR_WLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_WLOCKED)
+#define IN6_IFADDR_WUNLOCK() rm_wunlock(&in6_ifaddr_lock)
-extern struct rwlock in6_ifaddr_lock;
-#define IN6_IFADDR_LOCK_ASSERT( ) rw_assert(&in6_ifaddr_lock, RA_LOCKED)
-#define IN6_IFADDR_RLOCK() rw_rlock(&in6_ifaddr_lock)
-#define IN6_IFADDR_RLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_RLOCKED)
-#define IN6_IFADDR_RUNLOCK() rw_runlock(&in6_ifaddr_lock)
-#define IN6_IFADDR_WLOCK() rw_wlock(&in6_ifaddr_lock)
-#define IN6_IFADDR_WLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_WLOCKED)
-#define IN6_IFADDR_WUNLOCK() rw_wunlock(&in6_ifaddr_lock)
-
-VNET_DECLARE(struct icmp6stat, icmp6stat);
-#define V_icmp6stat VNET(icmp6stat)
#define in6_ifstat_inc(ifp, tag) \
do { \
if (ifp) \
- ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->in6_ifstat->tag++; \
+ counter_u64_add(((struct in6_ifextra *) \
+ ((ifp)->if_afdata[AF_INET6]))->in6_ifstat[ \
+ offsetof(struct in6_ifstat, tag) / sizeof(uint64_t)], 1);\
} while (/*CONSTCOND*/ 0)
-extern struct in6_addr zeroin6_addr;
extern u_char inet6ctlerrmap[];
VNET_DECLARE(unsigned long, in6_maxmtu);
#define V_in6_maxmtu VNET(in6_maxmtu)
@@ -552,7 +587,6 @@ ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b)
return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr)));
}
RB_PROTOTYPE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
-#endif /* _KERNEL */
/*
* IPv6 multicast PCB-layer group filter descriptor.
@@ -603,12 +637,12 @@ struct in6_multi {
u_int in6m_timer; /* MLD6 listener report timer */
/* New fields for MLDv2 follow. */
- struct mld_ifinfo *in6m_mli; /* MLD info */
+ struct mld_ifsoftc *in6m_mli; /* MLD info */
SLIST_ENTRY(in6_multi) in6m_nrele; /* to-be-released by MLD */
struct ip6_msource_tree in6m_srcs; /* tree of sources */
u_long in6m_nsrc; /* # of tree entries */
- struct ifqueue in6m_scq; /* queue of pending
+ struct mbufq in6m_scq; /* queue of pending
* state-change packets */
struct timeval in6m_lastgsrtv; /* last G-S-R query */
uint16_t in6m_sctimer; /* state-change timer */
@@ -652,8 +686,6 @@ im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims,
return (MCAST_UNDEFINED);
}
-#ifdef _KERNEL
-
/*
* Lock macros for IPv6 layer multicast address lists. IPv6 lock goes
* before link layer multicast locks in the lock order. In most cases,
@@ -756,18 +788,20 @@ int in6_control(struct socket *, u_long, caddr_t, struct ifnet *,
struct thread *);
int in6_update_ifa(struct ifnet *, struct in6_aliasreq *,
struct in6_ifaddr *, int);
+void in6_prepare_ifra(struct in6_aliasreq *, const struct in6_addr *,
+ const struct in6_addr *);
void in6_purgeaddr(struct ifaddr *);
int in6if_do_dad(struct ifnet *);
-void in6_purgeif(struct ifnet *);
void in6_savemkludge(struct in6_ifaddr *);
void *in6_domifattach(struct ifnet *);
void in6_domifdetach(struct ifnet *, void *);
+int in6_domifmtu(struct ifnet *);
void in6_setmaxmtu(void);
int in6_if2idlen(struct ifnet *);
struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int);
-struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *);
+struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, const struct in6_addr *);
+struct in6_ifaddr *in6ifa_ifwithaddr(const struct in6_addr *, uint32_t);
struct in6_ifaddr *in6ifa_llaonifp(struct ifnet *);
-char *ip6_sprintf(char *, const struct in6_addr *);
int in6_addr2zoneid(struct ifnet *, struct in6_addr *, u_int32_t *);
int in6_matchlen(struct in6_addr *, struct in6_addr *);
int in6_are_prefix_equal(struct in6_addr *, struct in6_addr *, int);
@@ -777,12 +811,11 @@ int in6_prefix_ioctl(struct socket *, u_long, caddr_t,
int in6_prefix_add_ifid(int, struct in6_ifaddr *);
void in6_prefix_remove_ifid(int, struct in6_ifaddr *);
void in6_purgeprefix(struct ifnet *);
-void in6_ifremloop(struct ifaddr *);
-void in6_ifaddloop(struct ifaddr *);
int in6_is_addr_deprecated(struct sockaddr_in6 *);
int in6_src_ioctl(u_long, caddr_t);
+void in6_newaddrmsg(struct in6_ifaddr *, int);
/*
* Extended API for IPv6 FIB support.
*/
diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c
index 6efae91a..50583537 100644
--- a/freebsd/sys/netinet6/ip6_forward.c
+++ b/freebsd/sys/netinet6/ip6_forward.c
@@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_ipstealth.h>
@@ -53,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/pfil.h>
@@ -72,13 +72,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_pcb.h>
#ifdef IPSEC
+#include <netinet6/ip6_ipsec.h>
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#endif /* IPSEC */
-#include <netinet6/ip6protosw.h>
-
/*
* Forward a packet. If some error occurs return the sender
* an icmp packet. Note we can't always generate a meaningful
@@ -105,29 +104,10 @@ ip6_forward(struct mbuf *m, int srcrt)
struct in6_addr src_in6, dst_in6, odst;
#ifdef IPSEC
struct secpolicy *sp = NULL;
- int ipsecrt = 0;
-#endif
-#ifdef SCTP
- int sw_csum;
#endif
struct m_tag *fwd_tag;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
-#ifdef IPSEC
- /*
- * Check AH/ESP integrity.
- */
- /*
- * Don't increment ip6s_cantforward because this is the check
- * before forwarding packet actually.
- */
- if (ipsec6_in_reject(m, NULL)) {
- IPSEC6STAT_INC(in_polvio);
- m_freem(m);
- return;
- }
-#endif /* IPSEC */
-
/*
* Do not forward packets to multicast destination (should be handled
* by ip6_mforward().
@@ -139,8 +119,8 @@ ip6_forward(struct mbuf *m, int srcrt)
IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
IP6STAT_INC(ip6s_cantforward);
/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
@@ -152,6 +132,17 @@ ip6_forward(struct mbuf *m, int srcrt)
m_freem(m);
return;
}
+#ifdef IPSEC
+ /*
+ * Check if this packet has an active SA and needs to be dropped
+ * instead of forwarded.
+ */
+ if (ip6_ipsec_fwd(m) != 0) {
+ IP6STAT_INC(ip6s_cantforward);
+ m_freem(m);
+ return;
+ }
+#endif /* IPSEC */
#ifdef IPSTEALTH
if (!V_ip6stealth) {
@@ -181,10 +172,9 @@ ip6_forward(struct mbuf *m, int srcrt)
#ifdef IPSEC
/* get a security policy for this packet */
- sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
- IP_FORWARDING, &error);
+ sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error);
if (sp == NULL) {
- IPSEC6STAT_INC(out_inval);
+ IPSEC6STAT_INC(ips_out_inval);
IP6STAT_INC(ip6s_cantforward);
if (mcopy) {
#if 0
@@ -205,7 +195,7 @@ ip6_forward(struct mbuf *m, int srcrt)
/*
* This packet is just discarded.
*/
- IPSEC6STAT_INC(out_polvio);
+ IPSEC6STAT_INC(ips_out_polvio);
IP6STAT_INC(ip6s_cantforward);
KEY_FREESP(&sp);
if (mcopy) {
@@ -253,12 +243,10 @@ ip6_forward(struct mbuf *m, int srcrt)
{
struct ipsecrequest *isr = NULL;
- struct ipsec_output_state state;
/*
* when the kernel forwards a packet, it is not proper to apply
- * IPsec transport mode to the packet is not proper. this check
- * avoid from this.
+ * IPsec transport mode to the packet. This check avoid from this.
* at present, if there is even a transport mode SA request in the
* security policy, the kernel does not apply IPsec to the packet.
* this check is not enough because the following case is valid.
@@ -286,18 +274,27 @@ ip6_forward(struct mbuf *m, int srcrt)
*
* IPv6 [ESP|AH] IPv6 [extension headers] payload
*/
- bzero(&state, sizeof(state));
- state.m = m;
- state.ro = NULL; /* update at ipsec6_output_tunnel() */
- state.dst = NULL; /* update at ipsec6_output_tunnel() */
-
- error = ipsec6_output_tunnel(&state, sp, 0);
- m = state.m;
- KEY_FREESP(&sp);
+ /*
+ * If we need to encapsulate the packet, do it here
+ * ipsec6_proces_packet will send the packet using ip6_output
+ */
+ error = ipsec6_process_packet(m, sp->req);
+ /* Release SP if an error occurred */
+ if (error != 0)
+ KEY_FREESP(&sp);
+ if (error == EJUSTRETURN) {
+ /*
+ * We had a SP with a level of 'use' and no SA. We
+ * will just continue to process the packet without
+ * IPsec processing.
+ */
+ error = 0;
+ goto skip_ipsec;
+ }
if (error) {
- /* mbuf is already reclaimed in ipsec6_output_tunnel. */
+ /* mbuf is already reclaimed in ipsec6_process_packet. */
switch (error) {
case EHOSTUNREACH:
case ENETUNREACH:
@@ -320,7 +317,6 @@ ip6_forward(struct mbuf *m, int srcrt)
m_freem(mcopy);
#endif
}
- m_freem(m);
return;
} else {
/*
@@ -332,25 +328,7 @@ ip6_forward(struct mbuf *m, int srcrt)
m = NULL;
goto freecopy;
}
-
- if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){
- /*
- * now tunnel mode headers are added. we are originating
- * packet instead of forwarding the packet.
- */
- ip6_output(m, NULL, NULL, IPV6_FORWARDING/*XXX*/, NULL, NULL,
- NULL);
- goto freecopy;
- }
-
- /* adjust pointer */
- dst = (struct sockaddr_in6 *)state.dst;
- rt = state.ro ? state.ro->ro_rt : NULL;
- if (dst != NULL && rt != NULL)
- ipsecrt = 1;
}
- if (ipsecrt)
- goto skip_routing;
skip_ipsec:
#endif
again:
@@ -361,6 +339,7 @@ again:
dst->sin6_addr = ip6->ip6_dst;
again2:
rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
+ rt = rin6.ro_rt;
if (rin6.ro_rt != NULL)
RT_UNLOCK(rin6.ro_rt);
else {
@@ -372,10 +351,6 @@ again2:
}
goto bad;
}
- rt = rin6.ro_rt;
-#ifdef IPSEC
-skip_routing:
-#endif
/*
* Source scope check: if a packet can't be delivered to its
@@ -398,17 +373,13 @@ skip_routing:
IP6STAT_INC(ip6s_badscope);
goto bad;
}
- if (inzone != outzone
-#ifdef IPSEC
- && !ipsecrt
-#endif
- ) {
+ if (inzone != outzone) {
IP6STAT_INC(ip6s_cantforward);
IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
@@ -439,46 +410,6 @@ skip_routing:
goto bad;
}
- if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
- in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
- if (mcopy) {
- u_long mtu;
-#ifdef IPSEC
- struct secpolicy *sp;
- int ipsecerror;
- size_t ipsechdrsiz;
-#endif /* IPSEC */
-
- mtu = IN6_LINKMTU(rt->rt_ifp);
-#ifdef IPSEC
- /*
- * When we do IPsec tunnel ingress, we need to play
- * with the link value (decrement IPsec header size
- * from mtu value). The code is much simpler than v4
- * case, as we have the outgoing interface for
- * encapsulated packet as "rt->rt_ifp".
- */
- sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
- IP_FORWARDING, &ipsecerror);
- if (sp) {
- ipsechdrsiz = ipsec_hdrsiz(mcopy,
- IPSEC_DIR_OUTBOUND, NULL);
- if (ipsechdrsiz < mtu)
- mtu -= ipsechdrsiz;
- }
-
- /*
- * if mtu becomes less than minimum MTU,
- * tell minimum MTU (and I'll need to fragment it).
- */
- if (mtu < IPV6_MMTU)
- mtu = IPV6_MMTU;
-#endif /* IPSEC */
- icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
- }
- goto bad;
- }
-
if (rt->rt_flags & RTF_GATEWAY)
dst = (struct sockaddr_in6 *)rt->rt_gateway;
@@ -492,9 +423,6 @@ skip_routing:
* modified by a redirect.
*/
if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
-#ifdef IPSEC
- !ipsecrt &&
-#endif /* IPSEC */
(rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
/*
@@ -573,23 +501,12 @@ skip_routing:
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
m->m_flags |= M_SKIP_FIREWALL;
/* If destination is now ourself drop to ip6_input(). */
- if (in6_localip(&ip6->ip6_dst)) {
+ if (in6_localip(&ip6->ip6_dst))
m->m_flags |= M_FASTFWD_OURS;
- if (m->m_pkthdr.rcvif == NULL)
- m->m_pkthdr.rcvif = V_loif;
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
- m->m_pkthdr.csum_flags |=
- CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xffff;
- }
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
- m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-#endif
- error = netisr_queue(NETISR_IPV6, m);
- goto out;
- } else
+ else {
+ RTFREE(rt);
goto again; /* Redo the routing table lookup. */
+ }
}
/* See if local, if yes, send it to netisr. */
@@ -616,11 +533,46 @@ skip_routing:
m->m_flags |= M_SKIP_FIREWALL;
m->m_flags &= ~M_IP6_NEXTHOP;
m_tag_delete(m, fwd_tag);
+ RTFREE(rt);
goto again2;
}
pass:
- error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
+ /* See if the size was changed by the packet filter. */
+ if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
+ in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
+ if (mcopy) {
+ u_long mtu;
+#ifdef IPSEC
+ size_t ipsechdrsiz;
+#endif /* IPSEC */
+
+ mtu = IN6_LINKMTU(rt->rt_ifp);
+#ifdef IPSEC
+ /*
+ * When we do IPsec tunnel ingress, we need to play
+ * with the link value (decrement IPsec header size
+ * from mtu value). The code is much simpler than v4
+ * case, as we have the outgoing interface for
+ * encapsulated packet as "rt->rt_ifp".
+ */
+ ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND,
+ NULL);
+ if (ipsechdrsiz < mtu)
+ mtu -= ipsechdrsiz;
+ /*
+ * if mtu becomes less than minimum MTU,
+ * tell minimum MTU (and I'll need to fragment it).
+ */
+ if (mtu < IPV6_MMTU)
+ mtu = IPV6_MMTU;
+#endif /* IPSEC */
+ icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
+ }
+ goto bad;
+ }
+
+ error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL);
if (error) {
in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
IP6STAT_INC(ip6s_cantforward);
@@ -671,10 +623,6 @@ pass:
bad:
m_freem(m);
out:
- if (rt != NULL
-#ifdef IPSEC
- && !ipsecrt
-#endif
- )
+ if (rt != NULL)
RTFREE(rt);
}
diff --git a/freebsd/sys/netinet6/ip6_id.c b/freebsd/sys/netinet6/ip6_id.c
index e277def6..4e1a74e6 100644
--- a/freebsd/sys/netinet6/ip6_id.c
+++ b/freebsd/sys/netinet6/ip6_id.c
@@ -99,6 +99,7 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/route.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -109,7 +110,7 @@ __FBSDID("$FreeBSD$");
struct randomtab {
const int ru_bits; /* resulting bits */
- const long ru_out; /* Time after wich will be reseeded */
+ const long ru_out; /* Time after which will be reseeded */
const u_int32_t ru_max; /* Uniq cycle, avoid blackjack prediction */
const u_int32_t ru_gen; /* Starting generator */
const u_int32_t ru_n; /* ru_n: prime, ru_n - 1: product of pfacts[] */
@@ -129,7 +130,7 @@ struct randomtab {
static struct randomtab randomtab_32 = {
32, /* resulting bits */
- 180, /* Time after wich will be reseeded */
+ 180, /* Time after which will be reseeded */
1000000000, /* Uniq cycle, avoid blackjack prediction */
2, /* Starting generator */
2147483629, /* RU_N-1 = 2^2*3^2*59652323 */
@@ -140,7 +141,7 @@ static struct randomtab randomtab_32 = {
static struct randomtab randomtab_20 = {
20, /* resulting bits */
- 180, /* Time after wich will be reseeded */
+ 180, /* Time after which will be reseeded */
200000, /* Uniq cycle, avoid blackjack prediction */
2, /* Starting generator */
524269, /* RU_N-1 = 2^2*3^2*14563 */
@@ -223,7 +224,7 @@ initid(struct randomtab *p)
p->ru_g = pmod(p->ru_gen, j, p->ru_n);
p->ru_counter = 0;
- p->ru_reseed = time_second + p->ru_out;
+ p->ru_reseed = time_uptime + p->ru_out;
p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1));
}
@@ -233,7 +234,7 @@ randomid(struct randomtab *p)
int i, n;
u_int32_t tmp;
- if (p->ru_counter >= p->ru_max || time_second > p->ru_reseed)
+ if (p->ru_counter >= p->ru_max || time_uptime > p->ru_reseed)
initid(p);
tmp = arc4random();
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c
index 10536316..c7ffe759 100644
--- a/freebsd/sys/netinet6/ip6_input.c
+++ b/freebsd/sys/netinet6/ip6_input.c
@@ -67,33 +67,41 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/hhook.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/syslog.h>
+#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/netisr.h>
+#include <net/rss_config.h>
#include <net/pfil.h>
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/ip_var.h>
#include <netinet/in_systm.h>
#include <net/if_llatbl.h>
@@ -108,7 +116,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <netinet6/scope6_var.h>
#include <netinet6/in6_ifattach.h>
+#include <netinet6/mld6_var.h>
#include <netinet6/nd6.h>
+#include <netinet6/in6_rss.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -118,39 +128,84 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-VNET_DECLARE(int, ip6_output_flowtable_size);
-#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
-#endif
-
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DEFINE(u_long, in6_ifaddrhmask);
static struct netisr_handler ip6_nh = {
.nh_name = "ip6",
.nh_handler = ip6_input,
.nh_proto = NETISR_IPV6,
+#ifdef RSS
+ .nh_m2cpuid = rss_soft_m2cpuid_v6,
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_HYBRID,
+#else
.nh_policy = NETISR_POLICY_FLOW,
+#endif
};
-VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch);
-#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch)
+static int
+sysctl_netinet6_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
+{
+ int error, qlimit;
+
+ netisr_getqlimit(&ip6_nh, &qlimit);
+ error = sysctl_handle_int(oidp, &qlimit, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qlimit < 1)
+ return (EINVAL);
+ return (netisr_setqlimit(&ip6_nh, qlimit));
+}
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRQMAXLEN, intr_queue_maxlen,
+ CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_queue_maxlen, "I",
+ "Maximum size of the IPv6 input queue");
+
+#ifdef RSS
+static struct netisr_handler ip6_direct_nh = {
+ .nh_name = "ip6_direct",
+ .nh_handler = ip6_direct_input,
+ .nh_proto = NETISR_IPV6_DIRECT,
+ .nh_m2cpuid = rss_soft_m2cpuid_v6,
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_HYBRID,
+};
+
+static int
+sysctl_netinet6_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
+{
+ int error, qlimit;
+
+ netisr_getqlimit(&ip6_direct_nh, &qlimit);
+ error = sysctl_handle_int(oidp, &qlimit, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qlimit < 1)
+ return (EINVAL);
+ return (netisr_setqlimit(&ip6_direct_nh, qlimit));
+}
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
+ CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_direct_queue_maxlen,
+ "I", "Maximum size of the IPv6 direct input queue");
+
+#endif
VNET_DEFINE(struct pfil_head, inet6_pfil_hook);
-VNET_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_SYSINIT(ip6stat);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(ip6stat);
+#endif /* VIMAGE */
-struct rwlock in6_ifaddr_lock;
-RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
+struct rmlock in6_ifaddr_lock;
+RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
-static void ip6_init2(void *);
-static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
-static struct ip6aux *ip6_addaux(struct mbuf *);
-static struct ip6aux *ip6_findaux(struct mbuf *m);
-static void ip6_delaux (struct mbuf *);
static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
#ifdef PULLDOWN_TEST
static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
@@ -163,7 +218,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
void
ip6_init(void)
{
- struct ip6protosw *pr;
+ struct protosw *pr;
int i;
TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
@@ -172,6 +227,8 @@ ip6_init(void)
TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr);
TAILQ_INIT(&V_in6_ifaddrhead);
+ V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR,
+ &V_in6_ifaddrhmask);
/* Initialize packet filter hooks. */
V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
@@ -180,40 +237,36 @@ ip6_init(void)
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
+ if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6,
+ &V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
+ HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
+ printf("%s: WARNING: unable to register input helper hook\n",
+ __func__);
+ if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET6,
+ &V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
+ HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
+ printf("%s: WARNING: unable to register output helper hook\n",
+ __func__);
+
scope6_init();
addrsel_policy_init();
nd6_init();
frag6_init();
-#ifdef FLOWTABLE
- if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
- &V_ip6_output_flowtable_size)) {
- if (V_ip6_output_flowtable_size < 256)
- V_ip6_output_flowtable_size = 256;
- if (!powerof2(V_ip6_output_flowtable_size)) {
- printf("flowtable must be power of 2 size\n");
- V_ip6_output_flowtable_size = 2048;
- }
- } else {
- /*
- * round up to the next power of 2
- */
- V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
- }
- V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
-#endif
-
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */
- if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet)) {
+ netisr_register_vnet(&ip6_nh);
+#ifdef RSS
+ netisr_register_vnet(&ip6_direct_nh);
+#endif
return;
-
-#ifdef DIAGNOSTIC
- if (sizeof(struct protosw) != sizeof(struct ip6protosw))
- panic("sizeof(protosw) != sizeof(ip6protosw)");
+ }
#endif
- pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
+
+ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
panic("ip6_init");
@@ -224,8 +277,8 @@ ip6_init(void)
* Cycle through IP protocols and put them into the appropriate place
* in ip6_protox[].
*/
- for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
- pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
+ for (pr = inet6domain.dom_protosw;
+ pr < inet6domain.dom_protoswNPROTOSW; pr++)
if (pr->pr_domain->dom_family == PF_INET6 &&
pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
/* Be careful to only index valid IP protocols. */
@@ -234,6 +287,9 @@ ip6_init(void)
}
netisr_register(&ip6_nh);
+#ifdef RSS
+ netisr_register(&ip6_direct_nh);
+#endif
}
/*
@@ -243,7 +299,7 @@ ip6_init(void)
int
ip6proto_register(short ip6proto)
{
- struct ip6protosw *pr;
+ struct protosw *pr;
/* Sanity checks. */
if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
@@ -253,7 +309,7 @@ ip6proto_register(short ip6proto)
* The protocol slot must not be occupied by another protocol
* already. An index pointing to IPPROTO_RAW is unused.
*/
- pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
+ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
return (EPFNOSUPPORT);
if (ip6_protox[ip6proto] != pr - inet6sw) /* IPPROTO_RAW */
@@ -262,8 +318,8 @@ ip6proto_register(short ip6proto)
/*
* Find the protocol position in inet6sw[] and set the index.
*/
- for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
- pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) {
+ for (pr = inet6domain.dom_protosw;
+ pr < inet6domain.dom_protoswNPROTOSW; pr++) {
if (pr->pr_domain->dom_family == PF_INET6 &&
pr->pr_protocol && pr->pr_protocol == ip6proto) {
ip6_protox[pr->pr_protocol] = pr - inet6sw;
@@ -276,14 +332,14 @@ ip6proto_register(short ip6proto)
int
ip6proto_unregister(short ip6proto)
{
- struct ip6protosw *pr;
+ struct protosw *pr;
/* Sanity checks. */
if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
return (EPROTONOSUPPORT);
/* Check if the protocol was indeed registered. */
- pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
+ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
return (EPFNOSUPPORT);
if (ip6_protox[ip6proto] == pr - inet6sw) /* IPPROTO_RAW */
@@ -295,43 +351,61 @@ ip6proto_unregister(short ip6proto)
}
#ifdef VIMAGE
-void
-ip6_destroy()
+static void
+ip6_destroy(void *unused __unused)
{
+ struct ifaddr *ifa, *nifa;
+ struct ifnet *ifp;
+ int error;
- nd6_destroy();
- callout_drain(&V_in6_tmpaddrtimer_ch);
-}
+#ifdef RSS
+ netisr_unregister_vnet(&ip6_direct_nh);
#endif
+ netisr_unregister_vnet(&ip6_nh);
+
+ if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil hook, "
+ "error %d\n", __func__, error);
+ error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister input helper hook "
+ "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET6: "
+ "error %d returned\n", __func__, error);
+ }
+ error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET6]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister output helper hook "
+ "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: "
+ "error %d returned\n", __func__, error);
+ }
-static int
-ip6_init2_vnet(const void *unused __unused)
-{
-
- /* nd6_timer_init */
- callout_init(&V_nd6_timer_ch, 0);
- callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
-
- /* timer for regeneranation of temporary addresses randomize ID */
- callout_init(&V_in6_tmpaddrtimer_ch, 0);
- callout_reset(&V_in6_tmpaddrtimer_ch,
- (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
- V_ip6_temp_regen_advance) * hz,
- in6_tmpaddrtimer, curvnet);
+ /* Cleanup addresses. */
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ /* Cannot lock here - lock recursion. */
+ /* IF_ADDR_LOCK(ifp); */
+ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
- return (0);
-}
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ in6_purgeaddr(ifa);
+ }
+ /* IF_ADDR_UNLOCK(ifp); */
+ in6_ifdetach_destroy(ifp);
+ mld_domifdetach(ifp);
+ /* Make sure any routes are gone as well. */
+ rt_flushifroutes_af(ifp, AF_INET6);
+ }
+ IFNET_RUNLOCK();
-static void
-ip6_init2(void *dummy)
-{
+ nd6_destroy();
+ in6_ifattach_destroy();
- ip6_init2_vnet(NULL);
+ hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
}
-/* cheat */
-/* This must be after route_init(), which is now SI_ORDER_THIRD */
-SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
+VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL);
+#endif
static int
ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off,
@@ -410,22 +484,78 @@ out:
return (1);
}
+#ifdef RSS
+/*
+ * IPv6 direct input routine.
+ *
+ * This is called when reinjecting completed fragments where
+ * all of the previous checking and book-keeping has been done.
+ */
+void
+ip6_direct_input(struct mbuf *m)
+{
+ int off, nxt;
+ int nest;
+ struct m_tag *mtag;
+ struct ip6_direct_ctx *ip6dc;
+
+ mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL);
+ KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!"));
+
+ ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
+ nxt = ip6dc->ip6dc_nxt;
+ off = ip6dc->ip6dc_off;
+
+ nest = 0;
+
+ m_tag_delete(m, mtag);
+
+ while (nxt != IPPROTO_DONE) {
+ if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
+ IP6STAT_INC(ip6s_toomanyhdr);
+ goto bad;
+ }
+
+ /*
+ * protection against faulty packet - there should be
+ * more sanity checks in header chain processing.
+ */
+ if (m->m_pkthdr.len < off) {
+ IP6STAT_INC(ip6s_tooshort);
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
+ goto bad;
+ }
+
+#ifdef IPSEC
+ /*
+ * enforce IPsec policy checking if we are seeing last header.
+ * note that we do not visit this with protocols with pcb layer
+ * code - like udp/tcp/raw ip.
+ */
+ if (ip6_ipsec_input(m, nxt))
+ goto bad;
+#endif /* IPSEC */
+
+ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
+ }
+ return;
+bad:
+ m_freem(m);
+}
+#endif
+
void
ip6_input(struct mbuf *m)
{
+ struct in6_addr odst;
struct ip6_hdr *ip6;
- int off = sizeof(struct ip6_hdr), nest;
+ struct in6_ifaddr *ia;
u_int32_t plen;
u_int32_t rtalert = ~0;
+ int off = sizeof(struct ip6_hdr), nest;
int nxt, ours = 0;
- struct ifnet *deliverifp = NULL, *ifp = NULL;
- struct in6_addr odst;
- struct route_in6 rin6;
int srcrt = 0;
- struct llentry *lle = NULL;
- struct sockaddr_in6 dst6, *dst;
- bzero(&rin6, sizeof(struct route_in6));
#ifdef IPSEC
/*
* should the inner packet be considered authentic?
@@ -438,18 +568,12 @@ ip6_input(struct mbuf *m)
#endif /* IPSEC */
- /*
- * make sure we don't have onion peering information into m_tag.
- */
- ip6_delaux(m);
-
if (m->m_flags & M_FASTFWD_OURS) {
/*
* Firewall changed destination to local.
*/
m->m_flags &= ~M_FASTFWD_OURS;
ours = 1;
- deliverifp = m->m_pkthdr.rcvif;
ip6 = mtod(m, struct ip6_hdr *);
goto hbhcheck;
}
@@ -476,10 +600,8 @@ ip6_input(struct mbuf *m)
}
/* drop the packet if IPv6 operation is disabled on the IF */
- if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
- m_freem(m);
- return;
- }
+ if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED))
+ goto bad;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
IP6STAT_INC(ip6s_total);
@@ -493,21 +615,16 @@ ip6_input(struct mbuf *m)
if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
struct mbuf *n;
- MGETHDR(n, M_DONTWAIT, MT_HEADER);
- if (n)
- M_MOVE_PKTHDR(n, m);
- if (n && n->m_pkthdr.len > MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_freem(n);
- n = NULL;
- }
- }
+ if (m->m_pkthdr.len > MHLEN)
+ n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ n = m_gethdr(M_NOWAIT, MT_DATA);
if (n == NULL) {
m_freem(m);
return; /* ENOBUFS */
}
+ m_move_pkthdr(n, m);
m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
n->m_len = n->m_pkthdr.len;
m_freem(m);
@@ -536,6 +653,8 @@ ip6_input(struct mbuf *m)
IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]);
+ IP_PROBE(receive, NULL, NULL, ip6, m->m_pkthdr.rcvif, NULL, ip6);
+
/*
* Check against address spoofing/corruption.
*/
@@ -643,7 +762,6 @@ ip6_input(struct mbuf *m)
if (m->m_flags & M_FASTFWD_OURS) {
m->m_flags &= ~M_FASTFWD_OURS;
ours = 1;
- deliverifp = m->m_pkthdr.rcvif;
goto hbhcheck;
}
if ((m->m_flags & M_IP6_NEXTHOP) &&
@@ -654,7 +772,7 @@ ip6_input(struct mbuf *m)
* connected host.
*/
ip6_forward(m, 1);
- goto out;
+ return;
}
passin:
@@ -677,7 +795,6 @@ passin:
IP6STAT_INC(ip6s_badscope);
goto bad;
}
-
/*
* Multicast check. Assume packet is for us to avoid
* prematurely taking locks.
@@ -685,167 +802,16 @@ passin:
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
ours = 1;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
- deliverifp = m->m_pkthdr.rcvif;
goto hbhcheck;
}
-
- /*
- * Unicast check
- */
-
- bzero(&dst6, sizeof(dst6));
- dst6.sin6_family = AF_INET6;
- dst6.sin6_len = sizeof(struct sockaddr_in6);
- dst6.sin6_addr = ip6->ip6_dst;
- ifp = m->m_pkthdr.rcvif;
- IF_AFDATA_RLOCK(ifp);
- lle = lla_lookup(LLTABLE6(ifp), 0,
- (struct sockaddr *)&dst6);
- IF_AFDATA_RUNLOCK(ifp);
- if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) {
- struct ifaddr *ifa;
- struct in6_ifaddr *ia6;
- int bad;
-
- bad = 1;
-#define sa_equal(a1, a2) \
- (bcmp((a1), (a2), ((a1))->sin6_len) == 0)
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != dst6.sin6_family)
- continue;
- if (sa_equal(&dst6, ifa->ifa_addr))
- break;
- }
- KASSERT(ifa != NULL, ("%s: ifa not found for lle %p",
- __func__, lle));
-#undef sa_equal
-
- ia6 = (struct in6_ifaddr *)ifa;
- if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
- /* Count the packet in the ip address stats */
- ia6->ia_ifa.if_ipackets++;
- ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
-
- /*
- * record address information into m_tag.
- */
- (void)ip6_setdstifaddr(m, ia6);
-
- bad = 0;
- } else {
- char ip6bufs[INET6_ADDRSTRLEN];
- char ip6bufd[INET6_ADDRSTRLEN];
- /* address is not ready, so discard the packet. */
- nd6log((LOG_INFO,
- "ip6_input: packet to an unready address %s->%s\n",
- ip6_sprintf(ip6bufs, &ip6->ip6_src),
- ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
- }
- IF_ADDR_RUNLOCK(ifp);
- LLE_RUNLOCK(lle);
- if (bad)
- goto bad;
- else {
- ours = 1;
- deliverifp = ifp;
- goto hbhcheck;
- }
- }
- if (lle != NULL)
- LLE_RUNLOCK(lle);
-
- dst = &rin6.ro_dst;
- dst->sin6_len = sizeof(struct sockaddr_in6);
- dst->sin6_family = AF_INET6;
- dst->sin6_addr = ip6->ip6_dst;
- rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
- if (rin6.ro_rt)
- RT_UNLOCK(rin6.ro_rt);
-
-#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
-
/*
- * Accept the packet if the forwarding interface to the destination
- * according to the routing table is the loopback interface,
- * unless the associated route has a gateway.
- * Note that this approach causes to accept a packet if there is a
- * route to the loopback interface for the destination of the packet.
- * But we think it's even useful in some situations, e.g. when using
- * a special daemon which wants to intercept the packet.
- *
- * XXX: some OSes automatically make a cloned route for the destination
- * of an outgoing packet. If the outgoing interface of the packet
- * is a loopback one, the kernel would consider the packet to be
- * accepted, even if we have no such address assinged on the interface.
- * We check the cloned flag of the route entry to reject such cases,
- * assuming that route entries for our own addresses are not made by
- * cloning (it should be true because in6_addloop explicitly installs
- * the host route). However, we might have to do an explicit check
- * while it would be less efficient. Or, should we rather install a
- * reject route for such a case?
+ * Unicast check
+ * XXX: For now we keep link-local IPv6 addresses with embedded
+ * scope zone id, therefore we use zero zoneid here.
*/
- if (rin6.ro_rt &&
- (rin6.ro_rt->rt_flags &
- (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
-#ifdef RTF_WASCLONED
- !(rin6.ro_rt->rt_flags & RTF_WASCLONED) &&
-#endif
-#ifdef RTF_CLONED
- !(rin6.ro_rt->rt_flags & RTF_CLONED) &&
-#endif
-#if 0
- /*
- * The check below is redundant since the comparison of
- * the destination and the key of the rtentry has
- * already done through looking up the routing table.
- */
- IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
- &rt6_key(rin6.ro_rt)->sin6_addr)
-#endif
- rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) {
- int free_ia6 = 0;
- struct in6_ifaddr *ia6;
-
- /*
- * found the loopback route to the interface address
- */
- if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) {
- struct sockaddr_in6 dest6;
-
- bzero(&dest6, sizeof(dest6));
- dest6.sin6_family = AF_INET6;
- dest6.sin6_len = sizeof(dest6);
- dest6.sin6_addr = ip6->ip6_dst;
- ia6 = (struct in6_ifaddr *)
- ifa_ifwithaddr((struct sockaddr *)&dest6);
- if (ia6 == NULL)
- goto bad;
- free_ia6 = 1;
- }
- else
- ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa;
-
- /*
- * record address information into m_tag.
- */
- (void)ip6_setdstifaddr(m, ia6);
-
- /*
- * packets to a tentative, duplicated, or somehow invalid
- * address must not be accepted.
- */
- if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
- /* this address is ready */
- ours = 1;
- deliverifp = ia6->ia_ifp; /* correct? */
- /* Count the packet in the ip address stats */
- ia6->ia_ifa.if_ipackets++;
- ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
- if (ia6 != NULL && free_ia6 != 0)
- ifa_free(&ia6->ia_ifa);
- goto hbhcheck;
- } else {
+ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia != NULL) {
+ if (ia->ia6_flags & IN6_IFF_NOTREADY) {
char ip6bufs[INET6_ADDRSTRLEN];
char ip6bufd[INET6_ADDRSTRLEN];
/* address is not ready, so discard the packet. */
@@ -853,24 +819,15 @@ passin:
"ip6_input: packet to an unready address %s->%s\n",
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
-
- if (ia6 != NULL && free_ia6 != 0)
- ifa_free(&ia6->ia_ifa);
+ ifa_free(&ia->ia_ifa);
goto bad;
}
- }
-
- /*
- * FAITH (Firewall Aided Internet Translator)
- */
- if (V_ip6_keepfaith) {
- if (rin6.ro_rt && rin6.ro_rt->rt_ifp &&
- rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) {
- /* XXX do we need more sanity checks? */
- ours = 1;
- deliverifp = rin6.ro_rt->rt_ifp; /* faith */
- goto hbhcheck;
- }
+ /* Count the packet in the ip address stats */
+ counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
+ ifa_free(&ia->ia_ifa);
+ ours = 1;
+ goto hbhcheck;
}
/*
@@ -885,47 +842,25 @@ passin:
hbhcheck:
/*
- * record address information into m_tag, if we don't have one yet.
- * note that we are unable to record it, if the address is not listed
- * as our interface address (e.g. multicast addresses, addresses
- * within FAITH prefixes and such).
- */
- if (deliverifp) {
- struct in6_ifaddr *ia6;
-
- if ((ia6 = ip6_getdstifaddr(m)) != NULL) {
- ifa_free(&ia6->ia_ifa);
- } else {
- ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
- if (ia6) {
- if (!ip6_setdstifaddr(m, ia6)) {
- /*
- * XXX maybe we should drop the packet here,
- * as we could not provide enough information
- * to the upper layers.
- */
- }
- ifa_free(&ia6->ia_ifa);
- }
- }
- }
-
- /*
* Process Hop-by-Hop options header if it's contained.
* m may be modified in ip6_hopopts_input().
* If a JumboPayload option is included, plen will also be modified.
*/
plen = (u_int32_t)ntohs(ip6->ip6_plen);
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
- int error;
-
- error = ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours);
- if (error != 0)
- goto out;
+ if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0)
+ return;
} else
nxt = ip6->ip6_nxt;
/*
+ * Use mbuf flags to propagate Router Alert option to
+ * ICMPv6 layer, as hop-by-hop options have been stripped.
+ */
+ if (rtalert != ~0)
+ m->m_flags |= M_RTALERT_MLD;
+
+ /*
* Check that the amount of data in the buffers
* is as at least much as the IPv6 header would have us expect.
* Trim mbufs if longer than we expect.
@@ -968,7 +903,7 @@ passin:
}
} else if (!ours) {
ip6_forward(m, srcrt);
- goto out;
+ return;
}
ip6 = mtod(m, struct ip6_hdr *);
@@ -993,7 +928,7 @@ passin:
* Tell launch routine the next header
*/
IP6STAT_INC(ip6s_delivered);
- in6_ifstat_inc(deliverifp, ifs6_in_deliver);
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_deliver);
nest = 0;
while (nxt != IPPROTO_DONE) {
@@ -1022,56 +957,11 @@ passin:
goto bad;
#endif /* IPSEC */
- /*
- * Use mbuf flags to propagate Router Alert option to
- * ICMPv6 layer, as hop-by-hop options have been stripped.
- */
- if (nxt == IPPROTO_ICMPV6 && rtalert != ~0)
- m->m_flags |= M_RTALERT_MLD;
-
nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
}
- goto out;
+ return;
bad:
m_freem(m);
-out:
- if (rin6.ro_rt)
- RTFREE(rin6.ro_rt);
-}
-
-/*
- * set/grab in6_ifaddr correspond to IPv6 destination address.
- * XXX backward compatibility wrapper
- *
- * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag,
- * and then bump it when the tag is copied, and release it when the tag is
- * freed. Unfortunately, m_tags don't support deep copies (yet), so instead
- * we just bump the ia refcount when we receive it. This should be fixed.
- */
-static struct ip6aux *
-ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
-{
- struct ip6aux *ip6a;
-
- ip6a = ip6_addaux(m);
- if (ip6a)
- ip6a->ip6a_dstia6 = ia6;
- return ip6a; /* NULL if failed to set */
-}
-
-struct in6_ifaddr *
-ip6_getdstifaddr(struct mbuf *m)
-{
- struct ip6aux *ip6a;
- struct in6_ifaddr *ia;
-
- ip6a = ip6_findaux(m);
- if (ip6a) {
- ia = ip6a->ip6a_dstia6;
- ifa_ref(&ia->ia_ifa);
- return ia;
- } else
- return NULL;
}
/*
@@ -1601,6 +1491,44 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
loopend:
;
}
+
+ if (in6p->inp_flags2 & INP_RECVFLOWID) {
+ uint32_t flowid, flow_type;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ /*
+ * XXX should handle the failure of one or the
+ * other - don't populate both?
+ */
+ *mp = sbcreatecontrol((caddr_t) &flowid,
+ sizeof(uint32_t), IPV6_FLOWID, IPPROTO_IPV6);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ *mp = sbcreatecontrol((caddr_t) &flow_type,
+ sizeof(uint32_t), IPV6_FLOWTYPE, IPPROTO_IPV6);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+
+#ifdef RSS
+ if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) {
+ uint32_t flowid, flow_type;
+ uint32_t rss_bucketid;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
+ *mp = sbcreatecontrol((caddr_t) &rss_bucketid,
+ sizeof(uint32_t), IPV6_RSSBUCKETID, IPPROTO_IPV6);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+ }
+#endif
+
}
#undef IS2292
@@ -1674,22 +1602,12 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
else
elen = (ip6e.ip6e_len + 1) << 3;
- MGET(n, M_DONTWAIT, MT_DATA);
- if (n && elen >= MLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
- if (!n)
- return NULL;
-
- n->m_len = 0;
- if (elen >= M_TRAILINGSPACE(n)) {
- m_free(n);
+ if (elen > MLEN)
+ n = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ n = m_get(M_NOWAIT, MT_DATA);
+ if (n == NULL)
return NULL;
- }
m_copydata(m, off, elen, mtod(n, caddr_t));
n->m_len = elen;
@@ -1710,7 +1628,7 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
* we develop `neater' mechanism to process extension headers.
*/
char *
-ip6_get_prevhdr(struct mbuf *m, int off)
+ip6_get_prevhdr(const struct mbuf *m, int off)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -1749,7 +1667,7 @@ ip6_get_prevhdr(struct mbuf *m, int off)
* get next header offset. m will be retained.
*/
int
-ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
+ip6_nexthdr(const struct mbuf *m, int off, int proto, int *nxtp)
{
struct ip6_hdr ip6;
struct ip6_ext ip6e;
@@ -1817,14 +1735,14 @@ ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
return -1;
}
- return -1;
+ /* NOTREACHED */
}
/*
* get offset for the last header in the chain. m will be kept untainted.
*/
int
-ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
+ip6_lasthdr(const struct mbuf *m, int off, int proto, int *nxtp)
{
int newoff;
int nxt;
@@ -1847,42 +1765,6 @@ ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
}
}
-static struct ip6aux *
-ip6_addaux(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
- if (!mtag) {
- mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
- M_NOWAIT);
- if (mtag) {
- m_tag_prepend(m, mtag);
- bzero(mtag + 1, sizeof(struct ip6aux));
- }
- }
- return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
-}
-
-static struct ip6aux *
-ip6_findaux(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
- return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
-}
-
-static void
-ip6_delaux(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
- if (mtag)
- m_tag_delete(m, mtag);
-}
-
/*
* System control for IP6
*/
diff --git a/freebsd/sys/netinet6/ip6_ipsec.h b/freebsd/sys/netinet6/ip6_ipsec.h
index 86d1b005..e335d850 100644
--- a/freebsd/sys/netinet6/ip6_ipsec.h
+++ b/freebsd/sys/netinet6/ip6_ipsec.h
@@ -35,8 +35,7 @@
int ip6_ipsec_filtertunnel(struct mbuf *);
int ip6_ipsec_fwd(struct mbuf *);
int ip6_ipsec_input(struct mbuf *, int);
-int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
- struct ifnet **, struct secpolicy **sp);
+int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *);
#if 0
int ip6_ipsec_mtu(struct mbuf *);
#endif
diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c
index 02a98026..f74b71c3 100644
--- a/freebsd/sys/netinet6/ip6_mroute.c
+++ b/freebsd/sys/netinet6/ip6_mroute.c
@@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -106,6 +107,7 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/raw_cb.h>
#include <net/vnet.h>
@@ -116,19 +118,16 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_encap.h>
#include <netinet/ip6.h>
+#include <netinet/in_kdtrace.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/ip6_mroute.h>
-#include <netinet6/ip6protosw.h>
#include <netinet6/pim6.h>
#include <netinet6/pim6_var.h>
static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
-/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */
-#define M_HASCL(m) ((m)->m_flags & M_EXT)
-
static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
@@ -140,7 +139,7 @@ extern int in6_mcast_loop;
extern struct domain inet6domain;
static const struct encaptab *pim6_encap_cookie;
-static const struct ip6protosw in6_pim_protosw = {
+static const struct protosw in6_pim_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
@@ -199,9 +198,34 @@ static struct mtx mfc6_mtx;
static u_char n6expire[MF6CTBLSIZ];
static struct mif6 mif6table[MAXMIFS];
-SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
- &mif6table, sizeof(mif6table), "S,mif6[MAXMIFS]",
- "IPv6 Multicast Interfaces (struct mif6[MAXMIFS], netinet6/ip6_mroute.h)");
+static int
+sysctl_mif6table(SYSCTL_HANDLER_ARGS)
+{
+ struct mif6_sctl *out;
+ int error;
+
+ out = malloc(sizeof(struct mif6_sctl) * MAXMIFS, M_TEMP, M_WAITOK);
+ for (int i = 0; i < MAXMIFS; i++) {
+ out[i].m6_flags = mif6table[i].m6_flags;
+ out[i].m6_rate_limit = mif6table[i].m6_rate_limit;
+ out[i].m6_lcl_addr = mif6table[i].m6_lcl_addr;
+ if (mif6table[i].m6_ifp != NULL)
+ out[i].m6_ifp = mif6table[i].m6_ifp->if_index;
+ else
+ out[i].m6_ifp = 0;
+ out[i].m6_pkt_in = mif6table[i].m6_pkt_in;
+ out[i].m6_pkt_out = mif6table[i].m6_pkt_out;
+ out[i].m6_bytes_in = mif6table[i].m6_bytes_in;
+ out[i].m6_bytes_out = mif6table[i].m6_bytes_out;
+ }
+ error = SYSCTL_OUT(req, out, sizeof(struct mif6_sctl) * MAXMIFS);
+ free(out, M_TEMP);
+ return (error);
+}
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mif6table, CTLTYPE_OPAQUE | CTLFLAG_RD,
+ NULL, 0, sysctl_mif6table, "S,mif6_sctl[MAXMIFS]",
+ "IPv6 Multicast Interfaces (struct mif6_sctl[MAXMIFS], "
+ "netinet6/ip6_mroute.h)");
static struct mtx mif6_mtx;
#define MIF6_LOCK() mtx_lock(&mif6_mtx)
@@ -359,7 +383,7 @@ X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
mifi_t mifi;
if (so != V_ip6_mrouter && sopt->sopt_name != MRT6_INIT)
- return (EACCES);
+ return (EPERM);
switch (sopt->sopt_name) {
case MRT6_INIT:
@@ -614,7 +638,7 @@ X_ip6_mrouter_done(void)
for (rte = rt->mf6c_stall; rte != NULL; ) {
struct rtdetq *n = rte->next;
- m_free(rte->m);
+ m_freem(rte->m);
free(rte, M_MRTABLE6);
rte = n;
}
@@ -1078,8 +1102,8 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
*/
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
IP6STAT_INC(ip6s_cantforward);
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
@@ -1128,7 +1152,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
* Pullup packet header if needed before storing it,
* as other references may modify it in the meantime.
*/
- if (mb0 && (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
+ if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
if (mb0 == NULL) {
free(rte, M_MRTABLE6);
@@ -1397,7 +1421,7 @@ ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
mm = m_copy(m, 0, sizeof(struct ip6_hdr));
if (mm &&
- (M_HASCL(mm) ||
+ (!M_WRITABLE(mm) ||
mm->m_len < sizeof(struct ip6_hdr)))
mm = m_pullup(mm, sizeof(struct ip6_hdr));
if (mm == NULL)
@@ -1527,7 +1551,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
*/
mb_copy = m_copy(m, 0, M_COPYALL);
if (mb_copy &&
- (M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
+ (!M_WRITABLE(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
if (mb_copy == NULL) {
return;
@@ -1561,15 +1585,8 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
* If configured to loop back multicasts by default,
* loop back a copy now.
*/
- if (in6_mcast_loop) {
- struct sockaddr_in6 dst6;
-
- bzero(&dst6, sizeof(dst6));
- dst6.sin6_len = sizeof(struct sockaddr_in6);
- dst6.sin6_family = AF_INET6;
- dst6.sin6_addr = ip6->ip6_dst;
- ip6_mloopback(ifp, m, &dst6);
- }
+ if (in6_mcast_loop)
+ ip6_mloopback(ifp, m);
/*
* Put the packet into the sending queue of the outgoing interface
@@ -1583,10 +1600,13 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
dst6.sin6_len = sizeof(struct sockaddr_in6);
dst6.sin6_family = AF_INET6;
dst6.sin6_addr = ip6->ip6_dst;
+
+ IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
/*
* We just call if_output instead of nd6_output here, since
* we need no ND for a multicast forwarded packet...right?
*/
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
error = (*ifp->if_output)(ifp, mb_copy,
(struct sockaddr *)&dst6, NULL);
MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
@@ -1626,11 +1646,10 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
ip6_sprintf(ip6bufd, &ip6->ip6_dst));
PIM6STAT_INC(pim6s_snd_registers);
- /* Make a copy of the packet to send to the user level process */
- MGETHDR(mm, M_DONTWAIT, MT_HEADER);
+ /* Make a copy of the packet to send to the user level process. */
+ mm = m_gethdr(M_NOWAIT, MT_DATA);
if (mm == NULL)
return (ENOBUFS);
- mm->m_pkthdr.rcvif = NULL;
mm->m_data += max_linkhdr;
mm->m_len = sizeof(struct ip6_hdr);
@@ -1949,4 +1968,4 @@ static moduledata_t ip6_mroutemod = {
0
};
-DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_ANY);
diff --git a/freebsd/sys/netinet6/ip6_mroute.h b/freebsd/sys/netinet6/ip6_mroute.h
index 33b41310..51e1d496 100644
--- a/freebsd/sys/netinet6/ip6_mroute.h
+++ b/freebsd/sys/netinet6/ip6_mroute.h
@@ -121,19 +121,19 @@ struct mf6cctl {
* The kernel's multicast routing statistics.
*/
struct mrt6stat {
- u_quad_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */
- u_quad_t mrt6s_mfc_misses; /* # forw. cache hash table misses */
- u_quad_t mrt6s_upcalls; /* # calls to multicast routing daemon */
- u_quad_t mrt6s_no_route; /* no route for packet's origin */
- u_quad_t mrt6s_bad_tunnel; /* malformed tunnel options */
- u_quad_t mrt6s_cant_tunnel; /* no room for tunnel options */
- u_quad_t mrt6s_wrong_if; /* arrived on wrong interface */
- u_quad_t mrt6s_upq_ovflw; /* upcall Q overflow */
- u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */
- u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */
- u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
- u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
- u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */
+ uint64_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */
+ uint64_t mrt6s_mfc_misses; /* # forw. cache hash table misses */
+ uint64_t mrt6s_upcalls; /* # calls to multicast routing daemon */
+ uint64_t mrt6s_no_route; /* no route for packet's origin */
+ uint64_t mrt6s_bad_tunnel; /* malformed tunnel options */
+ uint64_t mrt6s_cant_tunnel; /* no room for tunnel options */
+ uint64_t mrt6s_wrong_if; /* arrived on wrong interface */
+ uint64_t mrt6s_upq_ovflw; /* upcall Q overflow */
+ uint64_t mrt6s_cache_cleanups; /* # entries with no upcalls */
+ uint64_t mrt6s_drop_sel; /* pkts dropped selectively */
+ uint64_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
+ uint64_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
+ uint64_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */
};
#ifdef MRT6_OINIT
@@ -194,6 +194,20 @@ struct sioc_mif_req6 {
u_quad_t obytes; /* Output byte count on mif */
};
+/*
+ * Structure to export 'struct mif6' to userland via sysctl.
+ */
+struct mif6_sctl {
+ u_char m6_flags; /* MIFF_ flags defined above */
+ u_int m6_rate_limit; /* max rate */
+ struct in6_addr m6_lcl_addr; /* local interface address */
+ uint32_t m6_ifp; /* interface index */
+ u_quad_t m6_pkt_in; /* # pkts in on interface */
+ u_quad_t m6_pkt_out; /* # pkts out on interface */
+ u_quad_t m6_bytes_in; /* # bytes in on interface */
+ u_quad_t m6_bytes_out; /* # bytes out on interface */
+};
+
#if defined(_KERNEL) || defined(KERNEL)
/*
* The kernel's multicast-interface structure.
diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c
index 95231631..d3dc973e 100644
--- a/freebsd/sys/netinet6/ip6_output.c
+++ b/freebsd/sys/netinet6/ip6_output.c
@@ -67,10 +67,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_sctp.h>
#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
@@ -88,14 +88,17 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/pfil.h>
+#include <net/rss_config.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -103,6 +106,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_pcb.h>
#include <netinet/tcp_var.h>
#include <netinet6/nd6.h>
+#include <netinet6/in6_rss.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -132,6 +136,8 @@ struct ip6_exthdrs {
struct mbuf *ip6e_dest2;
};
+static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
+
static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
struct ucred *, int);
static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
@@ -145,8 +151,12 @@ static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
struct ip6_frag **);
static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
-static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
- struct ifnet *, struct in6_addr *, u_long *, int *, u_int);
+static int ip6_getpmtu(struct route_in6 *, int,
+ struct ifnet *, const struct in6_addr *, u_long *, int *, u_int,
+ u_int);
+static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
+ u_long *, int *, u_int);
+static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
@@ -186,7 +196,7 @@ static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
}\
} while (/*CONSTCOND*/ 0)
-static void
+void
in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
{
u_short csum;
@@ -198,8 +208,8 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
if (offset + sizeof(u_short) > m->m_len) {
printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
- "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset,
- m->m_pkthdr.csum_flags);
+ "csum_flags=%b\n", __func__, m->m_len, plen, offset,
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
/*
* XXX this should not happen, but if it does, the correct
* behavior may be to insert the checksum in the appropriate
@@ -210,6 +220,64 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
*(u_short *)(m->m_data + offset) = csum;
}
+int
+ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
+ int mtu, uint32_t id)
+{
+ struct mbuf *m, **mnext, *m_frgpart;
+ struct ip6_hdr *ip6, *mhip6;
+ struct ip6_frag *ip6f;
+ int off;
+ int error;
+ int tlen = m0->m_pkthdr.len;
+
+ m = m0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ mnext = &m->m_nextpkt;
+
+ for (off = hlen; off < tlen; off += mtu) {
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (!m) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m->m_flags = m0->m_flags & M_COPYFLAGS;
+ *mnext = m;
+ mnext = &m->m_nextpkt;
+ m->m_data += max_linkhdr;
+ mhip6 = mtod(m, struct ip6_hdr *);
+ *mhip6 = *ip6;
+ m->m_len = sizeof(*mhip6);
+ error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+ if (error) {
+ IP6STAT_INC(ip6s_odropped);
+ return (error);
+ }
+ ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
+ if (off + mtu >= tlen)
+ mtu = tlen - off;
+ else
+ ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
+ mhip6->ip6_plen = htons((u_short)(mtu + hlen +
+ sizeof(*ip6f) - sizeof(struct ip6_hdr)));
+ if ((m_frgpart = m_copy(m0, off, mtu)) == NULL) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m_cat(m, m_frgpart);
+ m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
+ m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
+ m->m_pkthdr.rcvif = NULL;
+ ip6f->ip6f_reserved = 0;
+ ip6f->ip6f_ident = id;
+ ip6f->ip6f_nxt = nextproto;
+ IP6STAT_INC(ip6s_ofragments);
+ in6_ifstat_inc(ifp, ifs6_out_fragcreat);
+ }
+
+ return (0);
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@@ -220,22 +288,25 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
* skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
* then result of route lookup is stored in ro->ro_rt.
*
- * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
+ * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
- * which is rt_rmx.rmx_mtu.
+ * which is rt_mtu.
*
* ifpp - XXX: just for statistics
*/
+/*
+ * XXX TODO: no flowid is assigned for outbound flows?
+ */
int
ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
struct ifnet **ifpp, struct inpcb *inp)
{
- struct ip6_hdr *ip6, *mhip6;
+ struct ip6_hdr *ip6;
struct ifnet *ifp, *origifp;
struct mbuf *m = m0;
struct mbuf *mprev = NULL;
- int hlen, tlen, len, off;
+ int hlen, tlen, len;
struct route_in6 ip6route;
struct rtentry *rt = NULL;
struct sockaddr_in6 *dst, src_sa, dst_sa;
@@ -246,31 +317,25 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
int alwaysfrag, dontfrag;
u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
struct ip6_exthdrs exthdrs;
- struct in6_addr finaldst, src0, dst0;
+ struct in6_addr src0, dst0;
u_int32_t zone;
struct route_in6 *ro_pmtu = NULL;
int hdrsplit = 0;
- int needipsec = 0;
int sw_csum, tso;
-#ifdef IPSEC
- struct ipsec_output_state state;
- struct ip6_rthdr *rh = NULL;
- int needipsectun = 0;
- int segleft_org = 0;
- struct secpolicy *sp = NULL;
-#endif /* IPSEC */
+ int needfiblookup;
+ uint32_t fibnum;
struct m_tag *fwd_tag = NULL;
+ uint32_t id;
- ip6 = mtod(m, struct ip6_hdr *);
- if (ip6 == NULL) {
- printf ("ip6 is NULL");
- goto bad;
- }
-
- if (inp != NULL)
+ if (inp != NULL) {
M_SETFIB(m, inp->inp_inc.inc_fibnum);
+ if ((flags & IP_NODEFAULTFLOWID) == 0) {
+ /* unconditionally set flowid */
+ m->m_pkthdr.flowid = inp->inp_flowid;
+ M_HASHTYPE_SET(m, inp->inp_flowtype);
+ }
+ }
- finaldst = ip6->ip6_dst;
bzero(&exthdrs, sizeof(exthdrs));
if (opt) {
/* Hop-by-Hop options header */
@@ -299,27 +364,14 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
/*
* IPSec checking which handles several cases.
* FAST IPSEC: We re-injected the packet.
+ * XXX: need scope argument.
*/
- switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
+ switch(ip6_ipsec_output(&m, inp, &error))
{
case 1: /* Bad packet */
goto freehdrs;
- case -1: /* Do IPSec */
- needipsec = 1;
- /*
- * Do delayed checksums now, as we may send before returning.
- */
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
- plen = m->m_pkthdr.len - sizeof(*ip6);
- in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
- m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
- }
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
- sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
- m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
- }
-#endif
+ case -1: /* IPSec done */
+ goto done;
case 0: /* No IPSec */
default:
break;
@@ -339,15 +391,15 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
optlen += exthdrs.ip6e_rthdr->m_len;
unfragpartlen = optlen + sizeof(struct ip6_hdr);
- /* NOTE: we don't add AH/ESP length here. do that later. */
+ /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
if (exthdrs.ip6e_dest2)
optlen += exthdrs.ip6e_dest2->m_len;
/*
- * If we need IPsec, or there is at least one extension header,
+ * If there is at least one extension header,
* separate IP6 header from the payload.
*/
- if ((needipsec || optlen) && !hdrsplit) {
+ if (optlen && !hdrsplit) {
if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
m = NULL;
goto freehdrs;
@@ -356,7 +408,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
hdrsplit++;
}
- /* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
/* adjust mbuf packet header length */
@@ -422,72 +473,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
IPPROTO_ROUTING);
-#ifdef IPSEC
- if (!needipsec)
- goto skip_ipsec2;
-
- /*
- * pointers after IPsec headers are not valid any more.
- * other pointers need a great care too.
- * (IPsec routines should not mangle mbufs prior to AH/ESP)
- */
- exthdrs.ip6e_dest2 = NULL;
-
- if (exthdrs.ip6e_rthdr) {
- rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
- segleft_org = rh->ip6r_segleft;
- rh->ip6r_segleft = 0;
- }
-
- bzero(&state, sizeof(state));
- state.m = m;
- error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
- &needipsectun);
- m = state.m;
- if (error == EJUSTRETURN) {
- /*
- * We had a SP with a level of 'use' and no SA. We
- * will just continue to process the packet without
- * IPsec processing.
- */
- ;
- } else if (error) {
- /* mbuf is already reclaimed in ipsec6_output_trans. */
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("[%s:%d] (ipsec): error code %d\n",
- __func__, __LINE__, error);
- /* FALLTHROUGH */
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
- } else if (!needipsectun) {
- /*
- * In the FAST IPSec case we have already
- * re-injected the packet and it has been freed
- * by the ipsec_done() function. So, just clean
- * up after ourselves.
- */
- m = NULL;
- goto done;
- }
- if (exthdrs.ip6e_rthdr) {
- /* ah6_output doesn't modify mbuf chain */
- rh->ip6r_segleft = segleft_org;
- }
-skip_ipsec2:;
-#endif /* IPSEC */
-
/*
* If there is a routing header, discard the packet.
*/
@@ -514,29 +499,20 @@ skip_ipsec2:;
/*
* Route packet.
*/
- if (ro == 0) {
+ if (ro == NULL) {
ro = &ip6route;
bzero((caddr_t)ro, sizeof(*ro));
- }
+ } else
+ ro->ro_flags |= RT_LLE_CACHE;
ro_pmtu = ro;
if (opt && opt->ip6po_rthdr)
ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst;
#ifdef FLOWTABLE
- if (ro->ro_rt == NULL) {
- struct flentry *fle;
-
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
- if (fle != NULL)
- flow_to_route_in6(fle, ro);
- }
+ if (ro->ro_rt == NULL)
+ (void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
#endif
+ fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
again:
/*
* if specified, try to fill in the traffic class field.
@@ -563,82 +539,18 @@ again:
else
ip6->ip6_hlim = V_ip6_defmcasthlim;
}
-
-#ifdef IPSEC
/*
- * We may re-inject packets into the stack here.
+ * Validate route against routing table additions;
+ * a better/more specific route might have been added.
+ * Make sure address family is set in route.
*/
- if (needipsec && needipsectun) {
- struct ipsec_output_state state;
-
- /*
- * All the extension headers will become inaccessible
- * (since they can be encrypted).
- * Don't panic, we need no more updates to extension headers
- * on inner IPv6 packet (since they are now encapsulated).
- *
- * IPv6 [ESP|AH] IPv6 [extension headers] payload
- */
- bzero(&exthdrs, sizeof(exthdrs));
- exthdrs.ip6e_ip6 = m;
-
- bzero(&state, sizeof(state));
- state.m = m;
- state.ro = (struct route *)ro;
- state.dst = (struct sockaddr *)dst;
-
- error = ipsec6_output_tunnel(&state, sp, flags);
-
- m = state.m;
- ro = (struct route_in6 *)state.ro;
- dst = (struct sockaddr_in6 *)state.dst;
- if (error == EJUSTRETURN) {
- /*
- * We had a SP with a level of 'use' and no SA. We
- * will just continue to process the packet without
- * IPsec processing.
- */
- ;
- } else if (error) {
- /* mbuf is already reclaimed in ipsec6_output_tunnel. */
- m0 = m = NULL;
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("[%s:%d] (ipsec): error code %d\n",
- __func__, __LINE__, error);
- /* FALLTHROUGH */
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
- } else {
- /*
- * In the FAST IPSec case we have already
- * re-injected the packet and it has been freed
- * by the ipsec_done() function. So, just clean
- * up after ourselves.
- */
- m = NULL;
- goto done;
- }
-
- exthdrs.ip6e_ip6 = m;
+ if (inp) {
+ ro->ro_dst.sin6_family = AF_INET6;
+ RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
}
-#endif /* IPSEC */
-
- /* adjust pointer */
- ip6 = mtod(m, struct ip6_hdr *);
-
- if (ro->ro_rt && fwd_tag == NULL) {
+ if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
+ ro->ro_dst.sin6_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
rt = ro->ro_rt;
ifp = ro->ro_rt->rt_ifp;
} else {
@@ -649,7 +561,7 @@ again:
dst_sa.sin6_addr = ip6->ip6_dst;
}
error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
- &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
+ &rt, fibnum);
if (error != 0) {
if (ifp != NULL)
in6_ifstat_inc(ifp, ifs6_out_discard);
@@ -673,7 +585,7 @@ again:
}
if (rt != NULL) {
ia = (struct in6_ifaddr *)(rt->rt_ifa);
- rt->rt_use++;
+ counter_u64_add(rt->rt_pksent, 1);
}
@@ -758,7 +670,7 @@ again:
* thus deferring a hash lookup and lock acquisition
* at the expense of an m_copym().
*/
- ip6_mloopback(ifp, m, dst);
+ ip6_mloopback(ifp, m);
} else {
/*
* If we are acting as a multicast router, perform
@@ -776,9 +688,7 @@ again:
/*
* XXX: ip6_mforward expects that rcvif is NULL
* when it is called from the originating path.
- * However, it is not always the case, since
- * some versions of MGETHDR() does not
- * initialize the field.
+ * However, it may not always be the case.
*/
m->m_pkthdr.rcvif = NULL;
if (ip6_mforward(ip6, ifp, m) != 0) {
@@ -810,8 +720,8 @@ again:
*ifpp = ifp;
/* Determine path MTU. */
- if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
- &alwaysfrag, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0)
+ if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
+ &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
goto bad;
/*
@@ -887,8 +797,10 @@ again:
error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
if (error != 0 || m == NULL)
goto done;
+ /* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
+ needfiblookup = 0;
/* See if destination IP address was changed by packet filter. */
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
m->m_flags |= M_SKIP_FIREWALL;
@@ -908,9 +820,20 @@ again:
#endif
error = netisr_queue(NETISR_IPV6, m);
goto done;
- } else
- goto again; /* Redo the routing table lookup. */
+ } else {
+ RO_RTFREE(ro);
+ needfiblookup = 1; /* Redo the routing table lookup. */
+ }
}
+ /* See if fib was changed by packet filter. */
+ if (fibnum != M_GETFIB(m)) {
+ m->m_flags |= M_SKIP_FIREWALL;
+ fibnum = M_GETFIB(m);
+ RO_RTFREE(ro);
+ needfiblookup = 1;
+ }
+ if (needfiblookup)
+ goto again;
/* See if local, if yes, send it to netisr. */
if (m->m_flags & M_FASTFWD_OURS) {
@@ -1018,11 +941,13 @@ passout:
ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
if (ia6) {
/* Record statistics for this interface address. */
- ia6->ia_ifa.if_opackets++;
- ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
+ counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
+ counter_u64_add(ia6->ia_ifa.ifa_obytes,
+ m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
- error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
+ error = nd6_output_ifp(ifp, origifp, m, dst,
+ (struct route *)ro);
goto done;
}
@@ -1040,13 +965,8 @@ passout:
in6_ifstat_inc(ifp, ifs6_out_fragfail);
goto bad;
} else {
- struct mbuf **mnext, *m_frgpart;
- struct ip6_frag *ip6f;
- u_int32_t id = htonl(ip6_randomid());
u_char nextproto;
- int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
-
/*
* Too large for the destination or interface;
* fragment if possible.
@@ -1064,18 +984,6 @@ passout:
}
/*
- * Verify that we have any chance at all of being able to queue
- * the packet or packet fragments
- */
- if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
- < tlen /* - hlen */)) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto bad;
- }
-
-
- /*
* If the interface will not calculate checksums on
* fragmented packets, then do it here.
* XXX-BZ handle the hw offloading case. Need flags.
@@ -1090,8 +998,6 @@ passout:
m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
}
#endif
- mnext = &m->m_nextpkt;
-
/*
* Change the next header field of the last header in the
* unfragmentable part.
@@ -1116,47 +1022,9 @@ passout:
* chain.
*/
m0 = m;
- for (off = hlen; off < tlen; off += len) {
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
- if (!m) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m->m_pkthdr.rcvif = NULL;
- m->m_flags = m0->m_flags & M_COPYFLAGS; /* incl. FIB */
- *mnext = m;
- mnext = &m->m_nextpkt;
- m->m_data += max_linkhdr;
- mhip6 = mtod(m, struct ip6_hdr *);
- *mhip6 = *ip6;
- m->m_len = sizeof(*mhip6);
- error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
- if (error) {
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
- if (off + len >= tlen)
- len = tlen - off;
- else
- ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
- mhip6->ip6_plen = htons((u_short)(len + hlen +
- sizeof(*ip6f) - sizeof(struct ip6_hdr)));
- if ((m_frgpart = m_copy(m0, off, len)) == 0) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m_cat(m, m_frgpart);
- m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
- m->m_pkthdr.rcvif = NULL;
- ip6f->ip6f_reserved = 0;
- ip6f->ip6f_ident = id;
- ip6f->ip6f_nxt = nextproto;
- IP6STAT_INC(ip6s_ofragments);
- in6_ifstat_inc(ifp, ifs6_out_fragcreat);
- }
+ id = htonl(ip6_randomid());
+ if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
+ goto sendorfree;
in6_ifstat_inc(ifp, ifs6_out_fragok);
}
@@ -1174,10 +1042,12 @@ sendorfree:
if (error == 0) {
/* Record statistics for this interface address. */
if (ia) {
- ia->ia_ifa.if_opackets++;
- ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_obytes,
+ m->m_pkthdr.len);
}
- error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
+ error = nd6_output_ifp(ifp, origifp, m, dst,
+ (struct route *)ro);
} else
m_freem(m);
}
@@ -1186,15 +1056,13 @@ sendorfree:
IP6STAT_INC(ip6s_fragmented);
done:
- if (ro == &ip6route)
+ /*
+ * Release the route if using our private route, or if
+ * (with flowtable) we don't have our own reference.
+ */
+ if (ro == &ip6route ||
+ (ro != NULL && ro->ro_flags & RT_NORTREF))
RO_RTFREE(ro);
- if (ro_pmtu == &ip6route)
- RO_RTFREE(ro_pmtu);
-#ifdef IPSEC
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif
-
return (error);
freehdrs:
@@ -1217,17 +1085,12 @@ ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
if (hlen > MCLBYTES)
return (ENOBUFS); /* XXX */
- MGET(m, M_DONTWAIT, MT_DATA);
- if (!m)
+ if (hlen > MLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
return (ENOBUFS);
-
- if (hlen > MLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- return (ENOBUFS);
- }
- }
m->m_len = hlen;
if (hdr)
bcopy(hdr, mtod(m, caddr_t), hlen);
@@ -1254,9 +1117,9 @@ ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
* jumbo payload option, allocate a cluster to store the whole options.
* Otherwise, use it to store the options.
*/
- if (exthdrs->ip6e_hbh == 0) {
- MGET(mopt, M_DONTWAIT, MT_DATA);
- if (mopt == 0)
+ if (exthdrs->ip6e_hbh == NULL) {
+ mopt = m_get(M_NOWAIT, MT_DATA);
+ if (mopt == NULL)
return (ENOBUFS);
mopt->m_len = JUMBOOPTLEN;
optbuf = mtod(mopt, u_char *);
@@ -1287,15 +1150,8 @@ ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
* As a consequence, we must always prepare a cluster
* at this point.
*/
- MGET(n, M_DONTWAIT, MT_DATA);
- if (n) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_freem(n);
- n = NULL;
- }
- }
- if (!n)
+ n = m_getcl(M_NOWAIT, MT_DATA, 0);
+ if (n == NULL)
return (ENOBUFS);
n->m_len = oldoptlen + JUMBOOPTLEN;
bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
@@ -1342,8 +1198,8 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
if (hlen > sizeof(struct ip6_hdr)) {
n = m_copym(m0, sizeof(struct ip6_hdr),
- hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
- if (n == 0)
+ hlen - sizeof(struct ip6_hdr), M_NOWAIT);
+ if (n == NULL)
return (ENOBUFS);
m->m_next = n;
} else
@@ -1353,7 +1209,7 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
for (mlast = n; mlast->m_next; mlast = mlast->m_next)
;
- if ((mlast->m_flags & M_EXT) == 0 &&
+ if (M_WRITABLE(mlast) &&
M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
/* use the trailing space of the last mbuf for the fragment hdr */
*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
@@ -1364,8 +1220,8 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
/* allocate a new mbuf for the fragment header */
struct mbuf *mfrg;
- MGET(mfrg, M_DONTWAIT, MT_DATA);
- if (mfrg == 0)
+ mfrg = m_get(M_NOWAIT, MT_DATA);
+ if (mfrg == NULL)
return (ENOBUFS);
mfrg->m_len = sizeof(struct ip6_frag);
*frghdrp = mtod(mfrg, struct ip6_frag *);
@@ -1375,35 +1231,105 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
return (0);
}
+/*
+ * Calculates IPv6 path mtu for destination @dst.
+ * Resulting MTU is stored in @mtup.
+ *
+ * Returns 0 on success.
+ */
static int
-ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
- struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
- int *alwaysfragp, u_int fibnum)
+ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
{
- u_int32_t mtu = 0;
- int alwaysfrag = 0;
- int error = 0;
+ struct nhop6_extended nh6;
+ struct in6_addr kdst;
+ uint32_t scopeid;
+ struct ifnet *ifp;
+ u_long mtu;
+ int error;
- if (ro_pmtu != ro) {
- /* The first hop and the final destination may differ. */
- struct sockaddr_in6 *sa6_dst =
- (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
- if (ro_pmtu->ro_rt &&
- ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
- !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
- RTFREE(ro_pmtu->ro_rt);
- ro_pmtu->ro_rt = (struct rtentry *)NULL;
- }
- if (ro_pmtu->ro_rt == NULL) {
+ in6_splitscope(dst, &kdst, &scopeid);
+ if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0)
+ return (EHOSTUNREACH);
+
+ ifp = nh6.nh_ifp;
+ mtu = nh6.nh_mtu;
+
+ error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0);
+ fib6_free_nh_ext(fibnum, &nh6);
+
+ return (error);
+}
+
+/*
+ * Calculates IPv6 path MTU for @dst based on transmit @ifp,
+ * and cached data in @ro_pmtu.
+ * MTU from (successful) route lookup is saved (along with dst)
+ * inside @ro_pmtu to avoid subsequent route lookups after packet
+ * filter processing.
+ *
+ * Stores mtu and always-frag value into @mtup and @alwaysfragp.
+ * Returns 0 on success.
+ */
+static int
+ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
+ struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
+ int *alwaysfragp, u_int fibnum, u_int proto)
+{
+ struct nhop6_basic nh6;
+ struct in6_addr kdst;
+ uint32_t scopeid;
+ struct sockaddr_in6 *sa6_dst;
+ u_long mtu;
+
+ mtu = 0;
+ if (do_lookup) {
+
+ /*
+ * Here ro_pmtu has final destination address, while
+ * ro might represent immediate destination.
+ * Use ro_pmtu destination since mtu might differ.
+ */
+ sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
+ if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
+ ro_pmtu->ro_mtu = 0;
+
+ if (ro_pmtu->ro_mtu == 0) {
bzero(sa6_dst, sizeof(*sa6_dst));
sa6_dst->sin6_family = AF_INET6;
sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
sa6_dst->sin6_addr = *dst;
- in6_rtalloc(ro_pmtu, fibnum);
+ in6_splitscope(dst, &kdst, &scopeid);
+ if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0,
+ &nh6) == 0)
+ ro_pmtu->ro_mtu = nh6.nh_mtu;
}
+
+ mtu = ro_pmtu->ro_mtu;
}
- if (ro_pmtu->ro_rt) {
+
+ if (ro_pmtu->ro_rt)
+ mtu = ro_pmtu->ro_rt->rt_mtu;
+
+ return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
+}
+
+/*
+ * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and
+ * hostcache data for @dst.
+ * Stores mtu and always-frag value into @mtup and @alwaysfragp.
+ *
+ * Returns 0 on success.
+ */
+static int
+ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
+ u_long *mtup, int *alwaysfragp, u_int proto)
+{
+ u_long mtu = 0;
+ int alwaysfrag = 0;
+ int error = 0;
+
+ if (rt_mtu > 0) {
u_int32_t ifmtu;
struct in_conninfo inc;
@@ -1411,14 +1337,16 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
inc.inc_flags |= INC_ISIPV6;
inc.inc6_faddr = *dst;
- if (ifp == NULL)
- ifp = ro_pmtu->ro_rt->rt_ifp;
ifmtu = IN6_LINKMTU(ifp);
- mtu = tcp_hc_getmtu(&inc);
+
+ /* TCP is known to react to pmtu changes so skip hc */
+ if (proto != IPPROTO_TCP)
+ mtu = tcp_hc_getmtu(&inc);
+
if (mtu)
- mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
+ mtu = min(mtu, rt_mtu);
else
- mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
+ mtu = rt_mtu;
if (mtu == 0)
mtu = ifmtu;
else if (mtu < IPV6_MMTU) {
@@ -1432,17 +1360,6 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
*/
alwaysfrag = 1;
mtu = IPV6_MMTU;
- } else if (mtu > ifmtu) {
- /*
- * The MTU on the route is larger than the MTU on
- * the interface! This shouldn't happen, unless the
- * MTU of the interface has been changed after the
- * interface was brought up. Change the MTU in the
- * route to match the interface MTU (as long as the
- * field isn't locked).
- */
- mtu = ifmtu;
- ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
}
} else if (ifp) {
mtu = IN6_LINKMTU(ifp);
@@ -1468,6 +1385,10 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
int level, op, optname;
int optlen;
struct thread *td;
+#ifdef RSS
+ uint32_t rss_bucket;
+ int retval;
+#endif
level = sopt->sopt_level;
op = sopt->sopt_dir;
@@ -1561,16 +1482,23 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
/* FALLTHROUGH */
case IPV6_UNICAST_HOPS:
case IPV6_HOPLIMIT:
- case IPV6_FAITH:
case IPV6_RECVPKTINFO:
case IPV6_RECVHOPLIMIT:
case IPV6_RECVRTHDR:
case IPV6_RECVPATHMTU:
case IPV6_RECVTCLASS:
+ case IPV6_RECVFLOWID:
+#ifdef RSS
+ case IPV6_RECVRSSBUCKETID:
+#endif
case IPV6_V6ONLY:
case IPV6_AUTOFLOWLABEL:
case IPV6_BINDANY:
+ case IPV6_BINDMULTI:
+#ifdef RSS
+ case IPV6_RSS_LISTEN_BUCKET:
+#endif
if (optname == IPV6_BINDANY && td != NULL) {
error = priv_check(td,
PRIV_NETINET_BINDANY);
@@ -1620,6 +1548,16 @@ do { \
} while (/*CONSTCOND*/ 0)
#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
+#define OPTSET2(bit, val) do { \
+ INP_WLOCK(in6p); \
+ if (val) \
+ in6p->inp_flags2 |= bit; \
+ else \
+ in6p->inp_flags2 &= ~bit; \
+ INP_WUNLOCK(in6p); \
+} while (0)
+#define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
+
case IPV6_RECVPKTINFO:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
@@ -1691,10 +1629,6 @@ do { \
OPTSET(IN6P_RTHDR);
break;
- case IPV6_FAITH:
- OPTSET(INP_FAITH);
- break;
-
case IPV6_RECVPATHMTU:
/*
* We ignore this option for TCP
@@ -1706,6 +1640,16 @@ do { \
OPTSET(IN6P_MTU);
break;
+ case IPV6_RECVFLOWID:
+ OPTSET2(INP_RECVFLOWID, optval);
+ break;
+
+#ifdef RSS
+ case IPV6_RECVRSSBUCKETID:
+ OPTSET2(INP_RECVRSSBUCKETID, optval);
+ break;
+#endif
+
case IPV6_V6ONLY:
/*
* make setsockopt(IPV6_V6ONLY)
@@ -1738,6 +1682,21 @@ do { \
case IPV6_BINDANY:
OPTSET(INP_BINDANY);
break;
+
+ case IPV6_BINDMULTI:
+ OPTSET2(INP_BINDMULTI, optval);
+ break;
+#ifdef RSS
+ case IPV6_RSS_LISTEN_BUCKET:
+ if ((optval >= 0) &&
+ (optval < rss_getnumbuckets())) {
+ in6p->inp_rss_listen_bucket = optval;
+ OPTSET2(INP_RSS_BUCKET_SET, 1);
+ } else {
+ error = EINVAL;
+ }
+ break;
+#endif
}
break;
@@ -1947,12 +1906,19 @@ do { \
case IPV6_RECVRTHDR:
case IPV6_RECVPATHMTU:
- case IPV6_FAITH:
case IPV6_V6ONLY:
case IPV6_PORTRANGE:
case IPV6_RECVTCLASS:
case IPV6_AUTOFLOWLABEL:
case IPV6_BINDANY:
+ case IPV6_FLOWID:
+ case IPV6_FLOWTYPE:
+ case IPV6_RECVFLOWID:
+#ifdef RSS
+ case IPV6_RSSBUCKETID:
+ case IPV6_RECVRSSBUCKETID:
+#endif
+ case IPV6_BINDMULTI:
switch (optname) {
case IPV6_RECVHOPOPTS:
@@ -1987,10 +1953,6 @@ do { \
optval = OPTBIT(IN6P_MTU);
break;
- case IPV6_FAITH:
- optval = OPTBIT(INP_FAITH);
- break;
-
case IPV6_V6ONLY:
optval = OPTBIT(IN6P_IPV6_V6ONLY);
break;
@@ -2018,6 +1980,39 @@ do { \
case IPV6_BINDANY:
optval = OPTBIT(INP_BINDANY);
break;
+
+ case IPV6_FLOWID:
+ optval = in6p->inp_flowid;
+ break;
+
+ case IPV6_FLOWTYPE:
+ optval = in6p->inp_flowtype;
+ break;
+
+ case IPV6_RECVFLOWID:
+ optval = OPTBIT2(INP_RECVFLOWID);
+ break;
+#ifdef RSS
+ case IPV6_RSSBUCKETID:
+ retval =
+ rss_hash2bucket(in6p->inp_flowid,
+ in6p->inp_flowtype,
+ &rss_bucket);
+ if (retval == 0)
+ optval = rss_bucket;
+ else
+ error = EINVAL;
+ break;
+
+ case IPV6_RECVRSSBUCKETID:
+ optval = OPTBIT2(INP_RECVRSSBUCKETID);
+ break;
+#endif
+
+ case IPV6_BINDMULTI:
+ optval = OPTBIT2(INP_BINDMULTI);
+ break;
+
}
if (error)
break;
@@ -2029,9 +2024,6 @@ do { \
{
u_long pmtu = 0;
struct ip6_mtuinfo mtuinfo;
- struct route_in6 sro;
-
- bzero(&sro, sizeof(sro));
if (!(so->so_state & SS_ISCONNECTED))
return (ENOTCONN);
@@ -2040,11 +2032,8 @@ do { \
* routing, or optional information to specify
* the outgoing interface.
*/
- error = ip6_getpmtu(&sro, NULL, NULL,
- &in6p->in6p_faddr, &pmtu, NULL,
- so->so_fibnum);
- if (sro.ro_rt)
- RTFREE(sro.ro_rt);
+ error = ip6_getpmtu_ctl(so->so_fibnum,
+ &in6p->in6p_faddr, &pmtu);
if (error)
break;
if (pmtu > IPV6_MAXPACKET)
@@ -2307,12 +2296,14 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
switch (optname) {
case IPV6_PKTINFO:
- if (pktopt && pktopt->ip6po_pktinfo)
- optdata = (void *)pktopt->ip6po_pktinfo;
- else {
+ optdata = (void *)&null_pktinfo;
+ if (pktopt && pktopt->ip6po_pktinfo) {
+ bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
+ sizeof(null_pktinfo));
+ in6_clearscope(&null_pktinfo.ipi6_addr);
+ } else {
/* XXX: we don't have to do this every time... */
bzero(&null_pktinfo, sizeof(null_pktinfo));
- optdata = (void *)&null_pktinfo;
}
optdatalen = sizeof(struct in6_pktinfo);
break;
@@ -2529,7 +2520,7 @@ int
ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
{
- struct cmsghdr *cm = 0;
+ struct cmsghdr *cm = NULL;
if (control == NULL || opt == NULL)
return (EINVAL);
@@ -2666,18 +2657,30 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
return (EINVAL);
}
-
+ if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
+ return (EINVAL);
/* validate the interface index if specified. */
- if (pktinfo->ipi6_ifindex > V_if_index ||
- pktinfo->ipi6_ifindex < 0) {
+ if (pktinfo->ipi6_ifindex > V_if_index)
return (ENXIO);
- }
if (pktinfo->ipi6_ifindex) {
ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
if (ifp == NULL)
return (ENXIO);
}
-
+ if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL ||
+ (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0))
+ return (ENETDOWN);
+
+ if (ifp != NULL &&
+ !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
+ struct in6_ifaddr *ia;
+
+ in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
+ ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
+ if (ia == NULL)
+ return (EADDRNOTAVAIL);
+ ifa_free(&ia->ia_ifa);
+ }
/*
* We store the address anyway, and let in6_selectsrc()
* validate the specified address. This is because ipi6_addr
@@ -2987,7 +2990,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
* pointer that might NOT be &loif -- easier than replicating that code here.
*/
void
-ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
+ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
{
struct mbuf *copym;
struct ip6_hdr *ip6;
@@ -3001,20 +3004,12 @@ ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
* is in an mbuf cluster, so that we can safely override the IPv6
* header portion later.
*/
- if ((copym->m_flags & M_EXT) != 0 ||
+ if (!M_WRITABLE(copym) ||
copym->m_len < sizeof(struct ip6_hdr)) {
copym = m_pullup(copym, sizeof(struct ip6_hdr));
if (copym == NULL)
return;
}
-
-#ifdef DIAGNOSTIC
- if (copym->m_len < sizeof(*ip6)) {
- m_freem(copym);
- return;
- }
-#endif
-
ip6 = mtod(copym, struct ip6_hdr *);
/*
* clear embedded scope identifiers if necessary.
@@ -3022,8 +3017,12 @@ ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
*/
in6_clearscope(&ip6->ip6_src);
in6_clearscope(&ip6->ip6_dst);
-
- (void)if_simloop(ifp, copym, dst->sin6_family, 0);
+ if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
+ CSUM_PSEUDO_HDR;
+ copym->m_pkthdr.csum_data = 0xffff;
+ }
+ if_simloop(ifp, copym, AF_INET6, 0);
}
/*
@@ -3037,13 +3036,13 @@ ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
ip6 = mtod(m, struct ip6_hdr *);
if (m->m_len > sizeof(*ip6)) {
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
- if (mh == 0) {
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
+ if (mh == NULL) {
m_freem(m);
return ENOBUFS;
}
- M_MOVE_PKTHDR(mh, m);
- MH_ALIGN(mh, sizeof(*ip6));
+ m_move_pkthdr(mh, m);
+ M_ALIGN(mh, sizeof(*ip6));
m->m_len -= sizeof(*ip6);
m->m_data += sizeof(*ip6);
mh->m_next = m;
diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h
index 4e8c42bd..e52a3206 100644
--- a/freebsd/sys/netinet6/ip6_var.h
+++ b/freebsd/sys/netinet6/ip6_var.h
@@ -99,6 +99,14 @@ struct ip6asfrag {
#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
/*
+ * IP6 reinjecting structure.
+ */
+struct ip6_direct_ctx {
+ uint32_t ip6dc_nxt; /* next header to process */
+ uint32_t ip6dc_off; /* offset to next header */
+};
+
+/*
* Structure attached to inpcb.in6p_moptions and
* passed to ip6_output when IPv6 multicast options are in use.
* This structure is lazy-allocated.
@@ -181,39 +189,39 @@ struct ip6_pktopts {
*/
struct ip6stat {
- u_quad_t ip6s_total; /* total packets received */
- u_quad_t ip6s_tooshort; /* packet too short */
- u_quad_t ip6s_toosmall; /* not enough data */
- u_quad_t ip6s_fragments; /* fragments received */
- u_quad_t ip6s_fragdropped; /* frags dropped(dups, out of space) */
- u_quad_t ip6s_fragtimeout; /* fragments timed out */
- u_quad_t ip6s_fragoverflow; /* fragments that exceeded limit */
- u_quad_t ip6s_forward; /* packets forwarded */
- u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */
- u_quad_t ip6s_redirectsent; /* packets forwarded on same net */
- u_quad_t ip6s_delivered; /* datagrams delivered to upper level*/
- u_quad_t ip6s_localout; /* total ip packets generated here */
- u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */
- u_quad_t ip6s_reassembled; /* total packets reassembled ok */
- u_quad_t ip6s_fragmented; /* datagrams successfully fragmented */
- u_quad_t ip6s_ofragments; /* output fragments created */
- u_quad_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
- u_quad_t ip6s_badoptions; /* error in option processing */
- u_quad_t ip6s_noroute; /* packets discarded due to no route */
- u_quad_t ip6s_badvers; /* ip6 version != 6 */
- u_quad_t ip6s_rawout; /* total raw ip packets generated */
- u_quad_t ip6s_badscope; /* scope error */
- u_quad_t ip6s_notmember; /* don't join this multicast group */
+ uint64_t ip6s_total; /* total packets received */
+ uint64_t ip6s_tooshort; /* packet too short */
+ uint64_t ip6s_toosmall; /* not enough data */
+ uint64_t ip6s_fragments; /* fragments received */
+ uint64_t ip6s_fragdropped; /* frags dropped(dups, out of space) */
+ uint64_t ip6s_fragtimeout; /* fragments timed out */
+ uint64_t ip6s_fragoverflow; /* fragments that exceeded limit */
+ uint64_t ip6s_forward; /* packets forwarded */
+ uint64_t ip6s_cantforward; /* packets rcvd for unreachable dest */
+ uint64_t ip6s_redirectsent; /* packets forwarded on same net */
+ uint64_t ip6s_delivered; /* datagrams delivered to upper level*/
+ uint64_t ip6s_localout; /* total ip packets generated here */
+ uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */
+ uint64_t ip6s_reassembled; /* total packets reassembled ok */
+ uint64_t ip6s_fragmented; /* datagrams successfully fragmented */
+ uint64_t ip6s_ofragments; /* output fragments created */
+ uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
+ uint64_t ip6s_badoptions; /* error in option processing */
+ uint64_t ip6s_noroute; /* packets discarded due to no route */
+ uint64_t ip6s_badvers; /* ip6 version != 6 */
+ uint64_t ip6s_rawout; /* total raw ip packets generated */
+ uint64_t ip6s_badscope; /* scope error */
+ uint64_t ip6s_notmember; /* don't join this multicast group */
#define IP6S_HDRCNT 256 /* headers count */
- u_quad_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
- u_quad_t ip6s_m1; /* one mbuf */
+ uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
+ uint64_t ip6s_m1; /* one mbuf */
#define IP6S_M2MMAX 32
- u_quad_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */
- u_quad_t ip6s_mext1; /* one ext mbuf */
- u_quad_t ip6s_mext2m; /* two or more ext mbuf */
- u_quad_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */
- u_quad_t ip6s_nogif; /* no match gif found */
- u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */
+ uint64_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */
+ uint64_t ip6s_mext1; /* one ext mbuf */
+ uint64_t ip6s_mext2m; /* two or more ext mbuf */
+ uint64_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */
+ uint64_t ip6s_nogif; /* no match gif found */
+ uint64_t ip6s_toomanyhdr; /* discarded due to too many headers */
/*
* statistics for improvement of the source address selection
@@ -223,81 +231,51 @@ struct ip6stat {
#define IP6S_RULESMAX 16
#define IP6S_SCOPECNT 16
/* number of times that address selection fails */
- u_quad_t ip6s_sources_none;
+ uint64_t ip6s_sources_none;
/* number of times that an address on the outgoing I/F is chosen */
- u_quad_t ip6s_sources_sameif[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_sameif[IP6S_SCOPECNT];
/* number of times that an address on a non-outgoing I/F is chosen */
- u_quad_t ip6s_sources_otherif[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_otherif[IP6S_SCOPECNT];
/*
* number of times that an address that has the same scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_samescope[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_samescope[IP6S_SCOPECNT];
/*
* number of times that an address that has a different scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_otherscope[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT];
/* number of times that a deprecated address is chosen */
- u_quad_t ip6s_sources_deprecated[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT];
/* number of times that each rule of source selection is applied. */
- u_quad_t ip6s_sources_rule[IP6S_RULESMAX];
+ uint64_t ip6s_sources_rule[IP6S_RULESMAX];
};
#ifdef _KERNEL
-#define IP6STAT_ADD(name, val) V_ip6stat.name += (val)
-#define IP6STAT_SUB(name, val) V_ip6stat.name -= (val)
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat);
+#define IP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val))
+#define IP6STAT_SUB(name, val) IP6STAT_ADD(name, -(val))
#define IP6STAT_INC(name) IP6STAT_ADD(name, 1)
#define IP6STAT_DEC(name) IP6STAT_SUB(name, 1)
#endif
#ifdef _KERNEL
-/*
- * IPv6 onion peeling state.
- * it will be initialized when we come into ip6_input().
- * XXX do not make it a kitchen sink!
- */
-struct ip6aux {
- u_int32_t ip6a_flags;
-#define IP6A_SWAP 0x01 /* swapped home/care-of on packet */
-#define IP6A_HASEEN 0x02 /* HA was present */
-#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */
-#define IP6A_RTALERTSEEN 0x08 /* rtalert present */
-
- /* ip6.ip6_src */
- struct in6_addr ip6a_careof; /* care-of address of the peer */
- struct in6_addr ip6a_home; /* home address of the peer */
- u_int16_t ip6a_bruid; /* BR unique identifier */
-
- /* ip6.ip6_dst */
- struct in6_ifaddr *ip6a_dstia6; /* my ifaddr that matches ip6_dst */
-
- /* rtalert */
- u_int16_t ip6a_rtalert; /* rtalert option value */
-
- /*
- * decapsulation history will be here.
- * with IPsec it may not be accurate.
- */
-};
-#endif
-
-#ifdef _KERNEL
/* flags passed to ip6_output as last parameter */
#define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */
#define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */
#define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */
-#define M_IP6_NEXTHOP M_PROTO7 /* explicit ip nexthop */
-
#ifdef __NO_STRICT_ALIGNMENT
#define IP6_HDR_ALIGNED_P(ip) 1
#else
#define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0)
#endif
-VNET_DECLARE(struct ip6stat, ip6stat); /* statistics */
VNET_DECLARE(int, ip6_defhlim); /* default hop limit */
VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */
VNET_DECLARE(int, ip6_forwarding); /* act as router? */
@@ -306,7 +284,6 @@ VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix
* walk list every 5 sec. */
VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */
VNET_DECLARE(int, ip6_v6only);
-#define V_ip6stat VNET(ip6stat)
#define V_ip6_defhlim VNET(ip6_defhlim)
#define V_ip6_defmcasthlim VNET(ip6_defmcasthlim)
#define V_ip6_forwarding VNET(ip6_forwarding)
@@ -327,7 +304,6 @@ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
* receiving IF. */
VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when
forwarding enabled */
-VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */
VNET_DECLARE(int, ip6_log_interval);
VNET_DECLARE(time_t, ip6_log_time);
VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension
@@ -341,7 +317,6 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_no_radr VNET(ip6_no_radr)
#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
#define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3)
-#define V_ip6_keepfaith VNET(ip6_keepfaith)
#define V_ip6_log_interval VNET(ip6_log_interval)
#define V_ip6_log_time VNET(ip6_log_time)
#define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit)
@@ -379,24 +354,17 @@ int icmp6_ctloutput(struct socket *, struct sockopt *sopt);
struct in6_ifaddr;
void ip6_init(void);
-#ifdef VIMAGE
-void ip6_destroy(void);
-#endif
int ip6proto_register(short);
int ip6proto_unregister(short);
void ip6_input(struct mbuf *);
-struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *);
+void ip6_direct_input(struct mbuf *);
void ip6_freepcbopts(struct ip6_pktopts *);
int ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
-char * ip6_get_prevhdr(struct mbuf *, int);
-int ip6_nexthdr(struct mbuf *, int, int, int *);
-int ip6_lasthdr(struct mbuf *, int, int, int *);
-
-#ifdef __notyet__
-struct ip6aux *ip6_findaux(struct mbuf *);
-#endif
+char * ip6_get_prevhdr(const struct mbuf *, int);
+int ip6_nexthdr(const struct mbuf *, int, int, int *);
+int ip6_lasthdr(const struct mbuf *, int, int, int *);
extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *,
struct mbuf *);
@@ -411,7 +379,7 @@ int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
void ip6_forward(struct mbuf *, int);
-void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
+void ip6_mloopback(struct ifnet *, struct mbuf *);
int ip6_output(struct mbuf *, struct ip6_pktopts *,
struct route_in6 *,
int,
@@ -425,6 +393,9 @@ int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *,
void ip6_clearpktopts(struct ip6_pktopts *, int);
struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
int ip6_optlen(struct inpcb *);
+int ip6_deletefraghdr(struct mbuf *, int, int);
+int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
+ uint32_t);
int route6_input(struct mbuf **, int *, int);
@@ -437,16 +408,17 @@ void rip6_init(void);
int rip6_input(struct mbuf **, int *, int);
void rip6_ctlinput(int, struct sockaddr *, void *);
int rip6_ctloutput(struct socket *, struct sockopt *);
-int rip6_output(struct mbuf *, ...);
+int rip6_output(struct mbuf *, struct socket *, ...);
int rip6_usrreq(struct socket *,
int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *);
int dest6_input(struct mbuf **, int *, int);
int none_input(struct mbuf **, int *, int);
-int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *,
- struct inpcb *inp, struct route_in6 *, struct ucred *cred,
- struct ifnet **, struct in6_addr *);
+int in6_selectsrc_socket(struct sockaddr_in6 *, struct ip6_pktopts *,
+ struct inpcb *, struct ucred *, int, struct in6_addr *, int *);
+int in6_selectsrc_addr(uint32_t, const struct in6_addr *,
+ uint32_t, struct ifnet *, struct in6_addr *, int *);
int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct rtentry **);
@@ -455,6 +427,7 @@ int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *,
struct rtentry **, u_int);
u_int32_t ip6_randomid(void);
u_int32_t ip6_randomflowlabel(void);
+void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset);
#endif /* _KERNEL */
#endif /* !_NETINET6_IP6_VAR_H_ */
diff --git a/freebsd/sys/netinet6/ip6protosw.h b/freebsd/sys/netinet6/ip6protosw.h
index ec802a51..9e80a698 100644
--- a/freebsd/sys/netinet6/ip6protosw.h
+++ b/freebsd/sys/netinet6/ip6protosw.h
@@ -92,7 +92,7 @@ struct pr_usrreqs;
*
* ip6c_finaldst usually points to ip6c_ip6->ip6_dst. if the original
* (internal) packet carries a routing header, it may point the final
- * dstination address in the routing header.
+ * destination address in the routing header.
*
* ip6c_src: ip6c_ip6->ip6_src + scope info + flowlabel in ip6c_ip6
* (beware of flowlabel, if you try to compare it against others)
@@ -110,39 +110,8 @@ struct ip6ctlparam {
u_int8_t ip6c_nxt; /* final next header field */
};
-struct ip6protosw {
- short pr_type; /* socket type used for */
- struct domain *pr_domain; /* domain protocol a member of */
- short pr_protocol; /* protocol number */
- short pr_flags; /* see below */
-
-/* protocol-protocol hooks */
- int (*pr_input) /* input to protocol (from below) */
- (struct mbuf **, int *, int);
- int (*pr_output) /* output to protocol (from above) */
- (struct mbuf *, ...);
- void (*pr_ctlinput) /* control input (from below) */
- (int, struct sockaddr *, void *);
- int (*pr_ctloutput) /* control output (from above) */
- (struct socket *, struct sockopt *);
-
-/* utility hooks */
- void (*pr_init) /* initialization hook */
- (void);
- void (*pr_destroy) /* cleanup hook */
- (void);
-
- void (*pr_fasttimo) /* fast timeout (200ms) */
- (void);
- void (*pr_slowtimo) /* slow timeout (500ms) */
- (void);
- void (*pr_drain) /* flush any excess space possible */
- (void);
- struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */
-};
-
#ifdef _KERNEL
-extern struct ip6protosw inet6sw[];
+extern struct protosw inet6sw[];
#endif
#endif /* !_NETINET6_IP6PROTOSW_H_ */
diff --git a/freebsd/sys/netinet6/ip_fw_nat64.h b/freebsd/sys/netinet6/ip_fw_nat64.h
new file mode 100644
index 00000000..a5c38b2a
--- /dev/null
+++ b/freebsd/sys/netinet6/ip_fw_nat64.h
@@ -0,0 +1,154 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IP_FW_NAT64_H_
+#define _NETINET6_IP_FW_NAT64_H_
+
+struct ipfw_nat64stl_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+};
+
+struct ipfw_nat64lsn_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+
+ uint64_t nomatch4; /* No addr/port match */
+ uint64_t jcalls; /* Number of job handler calls */
+ uint64_t jrequests; /* Number of job requests */
+ uint64_t jhostsreq; /* Number of job host requests */
+ uint64_t jportreq; /* Number of portgroup requests */
+ uint64_t jhostfails; /* Number of failed host allocs */
+ uint64_t jportfails; /* Number of failed portgroup allocs */
+ uint64_t jreinjected; /* Number of packets reinjected to q */
+ uint64_t jmaxlen; /* Max queue length reached */
+ uint64_t jnomem; /* No memory to alloc queue item */
+
+ uint64_t screated; /* Number of states created */
+ uint64_t sdeleted; /* Number of states deleted */
+ uint64_t spgcreated; /* Number of portgroups created */
+ uint64_t spgdeleted; /* Number of portgroups deleted */
+ uint64_t hostcount; /* Number of hosts */
+ uint64_t tcpchunks; /* Number of TCP chunks */
+ uint64_t udpchunks; /* Number of UDP chunks */
+ uint64_t icmpchunks; /* Number of ICMP chunks */
+
+ uint64_t _reserved[4];
+};
+
+#define NAT64_LOG 0x0001 /* Enable logging via BPF */
+
+typedef struct _ipfw_nat64stl_cfg {
+ char name[64]; /* NAT name */
+ ipfw_obj_ntlv ntlv6; /* object name tlv */
+ ipfw_obj_ntlv ntlv4; /* object name tlv */
+ struct in6_addr prefix6; /* NAT64 prefix */
+ uint8_t plen6; /* Prefix length */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare[2];
+ uint32_t flags;
+} ipfw_nat64stl_cfg;
+
+/*
+ * NAT64LSN default configuration values
+ */
+#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */
+#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */
+#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */
+#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */
+#define NAT64LSN_TCP_FIN_AGE 180 /* State's TTL after FIN/RST received */
+#define NAT64LSN_UDP_AGE 120 /* TTL for UDP states */
+#define NAT64LSN_ICMP_AGE 60 /* TTL for ICMP states */
+#define NAT64LSN_HOST_AGE 3600 /* TTL for stale host entry */
+#define NAT64LSN_PG_AGE 900 /* TTL for stale ports groups */
+
+typedef struct _ipfw_nat64lsn_cfg {
+ char name[64]; /* NAT name */
+ uint32_t flags;
+ uint32_t max_ports; /* Max ports per client */
+ uint32_t agg_prefix_len; /* Prefix length to count */
+ uint32_t agg_prefix_max; /* Max hosts per agg prefix */
+ struct in_addr prefix4;
+ uint16_t plen4; /* Prefix length */
+ uint16_t plen6; /* Prefix length */
+ struct in6_addr prefix6; /* NAT64 prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint16_t min_port; /* Min port group # to use */
+ uint16_t max_port; /* Max port group # to use */
+ uint16_t nh_delete_delay;/* Stale host delete delay */
+ uint16_t pg_delete_delay;/* Stale portgroup delete delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare;
+} ipfw_nat64lsn_cfg;
+
+typedef struct _ipfw_nat64lsn_state {
+ struct in_addr daddr; /* Remote IPv4 address */
+ uint16_t dport; /* Remote destination port */
+ uint16_t aport; /* Local alias port */
+ uint16_t sport; /* Source port */
+ uint8_t flags; /* State flags */
+ uint8_t spare[3];
+ uint16_t idle; /* Last used time */
+} ipfw_nat64lsn_state;
+
+typedef struct _ipfw_nat64lsn_stg {
+ uint64_t next_idx; /* next state index */
+ struct in_addr alias4; /* IPv4 alias address */
+ uint8_t proto; /* protocol */
+ uint8_t flags;
+ uint16_t spare;
+ struct in6_addr host6; /* Bound IPv6 host */
+ uint32_t count; /* Number of states */
+ uint32_t spare2;
+} ipfw_nat64lsn_stg;
+
+#endif /* _NETINET6_IP_FW_NAT64_H_ */
+
diff --git a/freebsd/sys/netinet6/ip_fw_nptv6.h b/freebsd/sys/netinet6/ip_fw_nptv6.h
new file mode 100644
index 00000000..e2357eff
--- /dev/null
+++ b/freebsd/sys/netinet6/ip_fw_nptv6.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IP_FW_NPTV6_H_
+#define _NETINET6_IP_FW_NPTV6_H_
+
+struct ipfw_nptv6_stats {
+ uint64_t in2ex; /* Int->Ext packets translated */
+ uint64_t ex2in; /* Ext->Int packets translated */
+ uint64_t dropped; /* dropped due to some errors */
+ uint64_t reserved[5];
+};
+
+typedef struct _ipfw_nptv6_cfg {
+ char name[64]; /* NPTv6 instance name */
+ struct in6_addr internal; /* NPTv6 internal prefix */
+ struct in6_addr external; /* NPTv6 external prefix */
+ uint8_t plen; /* Prefix length */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare[2];
+ uint32_t flags;
+} ipfw_nptv6_cfg;
+
+#endif /* _NETINET6_IP_FW_NPTV6_H_ */
+
diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c
index 25f03411..26efa852 100644
--- a/freebsd/sys/netinet6/mld6.c
+++ b/freebsd/sys/netinet6/mld6.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ktr.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -103,49 +104,49 @@ __FBSDID("$FreeBSD$");
#define KTR_MLD KTR_INET6
#endif
-static struct mld_ifinfo *
+static struct mld_ifsoftc *
mli_alloc_locked(struct ifnet *);
static void mli_delete_locked(const struct ifnet *);
static void mld_dispatch_packet(struct mbuf *);
-static void mld_dispatch_queue(struct ifqueue *, int);
-static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *);
+static void mld_dispatch_queue(struct mbufq *, int);
+static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *);
static void mld_fasttimo_vnet(void);
static int mld_handle_state_change(struct in6_multi *,
- struct mld_ifinfo *);
-static int mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
+ struct mld_ifsoftc *);
+static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *,
const int);
#ifdef KTR
static char * mld_rec_type_to_str(const int);
#endif
-static void mld_set_version(struct mld_ifinfo *, const int);
+static void mld_set_version(struct mld_ifsoftc *, const int);
static void mld_slowtimo_vnet(void);
static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
/*const*/ struct mld_hdr *);
static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
/*const*/ struct mld_hdr *);
-static void mld_v1_process_group_timer(struct mld_ifinfo *,
+static void mld_v1_process_group_timer(struct mld_ifsoftc *,
struct in6_multi *);
-static void mld_v1_process_querier_timers(struct mld_ifinfo *);
+static void mld_v1_process_querier_timers(struct mld_ifsoftc *);
static int mld_v1_transmit_report(struct in6_multi *, const int);
static void mld_v1_update_group(struct in6_multi *, const int);
-static void mld_v2_cancel_link_timers(struct mld_ifinfo *);
-static void mld_v2_dispatch_general_query(struct mld_ifinfo *);
+static void mld_v2_cancel_link_timers(struct mld_ifsoftc *);
+static void mld_v2_dispatch_general_query(struct mld_ifsoftc *);
static struct mbuf *
mld_v2_encap_report(struct ifnet *, struct mbuf *);
-static int mld_v2_enqueue_filter_change(struct ifqueue *,
+static int mld_v2_enqueue_filter_change(struct mbufq *,
struct in6_multi *);
-static int mld_v2_enqueue_group_record(struct ifqueue *,
+static int mld_v2_enqueue_group_record(struct mbufq *,
struct in6_multi *, const int, const int, const int,
const int);
static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
struct mbuf *, const int, const int);
static int mld_v2_merge_state_changes(struct in6_multi *,
- struct ifqueue *);
-static void mld_v2_process_group_timers(struct mld_ifinfo *,
- struct ifqueue *, struct ifqueue *,
+ struct mbufq *);
+static void mld_v2_process_group_timers(struct mld_ifsoftc *,
+ struct mbufq *, struct mbufq *,
struct in6_multi *, const int);
static int mld_v2_process_group_query(struct in6_multi *,
- struct mld_ifinfo *mli, int, struct mbuf *, const int);
+ struct mld_ifsoftc *mli, int, struct mbuf *, const int);
static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS);
static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS);
@@ -207,7 +208,7 @@ static MALLOC_DEFINE(M_MLD, "mld", "mld state");
* VIMAGE-wide globals.
*/
static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0};
-static VNET_DEFINE(LIST_HEAD(, mld_ifinfo), mli_head);
+static VNET_DEFINE(LIST_HEAD(, mld_ifsoftc), mli_head);
static VNET_DEFINE(int, interface_timers_running6);
static VNET_DEFINE(int, state_change_timers_running6);
static VNET_DEFINE(int, current_state_timers_running6);
@@ -226,8 +227,8 @@ SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW, 0,
/*
* Virtualized sysctls.
*/
-SYSCTL_VNET_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I",
"Rate limit for MLDv2 Group-and-Source queries in seconds");
@@ -239,14 +240,12 @@ static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo,
"Per-interface MLDv2 state");
static int mld_v1enable = 1;
-SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW,
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN,
&mld_v1enable, 0, "Enable fallback to MLDv1");
-TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable);
static int mld_use_allow = 1;
-SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW,
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN,
&mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
-TUNABLE_INT("net.inet6.mld.use_allow", &mld_use_allow);
/*
* Packed Router Alert option structure declaration.
@@ -277,7 +276,7 @@ mld_save_context(struct mbuf *m, struct ifnet *ifp)
{
#ifdef VIMAGE
- m->m_pkthdr.header = ifp->if_vnet;
+ m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
#endif /* VIMAGE */
m->m_pkthdr.flowid = ifp->if_index;
}
@@ -286,7 +285,7 @@ static __inline void
mld_scrub_context(struct mbuf *m)
{
- m->m_pkthdr.header = NULL;
+ m->m_pkthdr.PH_loc.ptr = NULL;
m->m_pkthdr.flowid = 0;
}
@@ -302,8 +301,9 @@ mld_restore_context(struct mbuf *m)
{
#if defined(VIMAGE) && defined(INVARIANTS)
- KASSERT(curvnet == m->m_pkthdr.header,
- ("%s: called when curvnet was not restored", __func__));
+ KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr,
+ ("%s: called when curvnet was not restored: cuvnet %p m ptr %p",
+ __func__, curvnet, m->m_pkthdr.PH_loc.ptr));
#endif
return (m->m_pkthdr.flowid);
}
@@ -347,7 +347,7 @@ out_locked:
}
/*
- * Expose struct mld_ifinfo to userland, keyed by ifindex.
+ * Expose struct mld_ifsoftc to userland, keyed by ifindex.
* For use by ifmcstat(8).
*
* SMPng: NOTE: Does an unlocked ifindex space read.
@@ -361,7 +361,7 @@ sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS)
int error;
u_int namelen;
struct ifnet *ifp;
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
name = (int *)arg1;
namelen = arg2;
@@ -392,8 +392,17 @@ sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS)
LIST_FOREACH(mli, &V_mli_head, mli_link) {
if (ifp == mli->mli_ifp) {
- error = SYSCTL_OUT(req, mli,
- sizeof(struct mld_ifinfo));
+ struct mld_ifinfo info;
+
+ info.mli_version = mli->mli_version;
+ info.mli_v1_timer = mli->mli_v1_timer;
+ info.mli_v2_timer = mli->mli_v2_timer;
+ info.mli_flags = mli->mli_flags;
+ info.mli_rv = mli->mli_rv;
+ info.mli_qi = mli->mli_qi;
+ info.mli_qri = mli->mli_qri;
+ info.mli_uri = mli->mli_uri;
+ error = SYSCTL_OUT(req, &info, sizeof(info));
break;
}
}
@@ -409,15 +418,12 @@ out_locked:
* VIMAGE: Assumes the vnet pointer has been set.
*/
static void
-mld_dispatch_queue(struct ifqueue *ifq, int limit)
+mld_dispatch_queue(struct mbufq *mq, int limit)
{
struct mbuf *m;
- for (;;) {
- _IF_DEQUEUE(ifq, m);
- if (m == NULL)
- break;
- CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, ifq, m);
+ while ((m = mbufq_dequeue(mq)) != NULL) {
+ CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m);
mld_dispatch_packet(m);
if (--limit == 0)
break;
@@ -460,13 +466,13 @@ mld_is_addr_reported(const struct in6_addr *addr)
*
* SMPng: Normally called with IF_AFDATA_LOCK held.
*/
-struct mld_ifinfo *
+struct mld_ifsoftc *
mld_domifattach(struct ifnet *ifp)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
- __func__, ifp, ifp->if_xname);
+ __func__, ifp, if_name(ifp));
MLD_LOCK();
@@ -484,14 +490,14 @@ mld_domifattach(struct ifnet *ifp)
/*
* VIMAGE: assume curvnet set by caller.
*/
-static struct mld_ifinfo *
+static struct mld_ifsoftc *
mli_alloc_locked(/*const*/ struct ifnet *ifp)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
MLD_LOCK_ASSERT();
- mli = malloc(sizeof(struct mld_ifinfo), M_MLD, M_NOWAIT|M_ZERO);
+ mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_NOWAIT|M_ZERO);
if (mli == NULL)
goto out;
@@ -502,18 +508,13 @@ mli_alloc_locked(/*const*/ struct ifnet *ifp)
mli->mli_qi = MLD_QI_INIT;
mli->mli_qri = MLD_QRI_INIT;
mli->mli_uri = MLD_URI_INIT;
-
SLIST_INIT(&mli->mli_relinmhead);
-
- /*
- * Responses to general queries are subject to bounds.
- */
- IFQ_SET_MAXLEN(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS);
+ mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS);
LIST_INSERT_HEAD(&V_mli_head, mli, mli_link);
- CTR2(KTR_MLD, "allocate mld_ifinfo for ifp %p(%s)",
- ifp, ifp->if_xname);
+ CTR2(KTR_MLD, "allocate mld_ifsoftc for ifp %p(%s)",
+ ifp, if_name(ifp));
out:
return (mli);
@@ -533,12 +534,12 @@ out:
void
mld_ifdetach(struct ifnet *ifp)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct ifmultiaddr *ifma;
struct in6_multi *inm, *tinm;
CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp,
- ifp->if_xname);
+ if_name(ifp));
IN6_MULTI_LOCK_ASSERT();
MLD_LOCK();
@@ -579,7 +580,7 @@ mld_domifdetach(struct ifnet *ifp)
{
CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
- __func__, ifp, ifp->if_xname);
+ __func__, ifp, if_name(ifp));
MLD_LOCK();
mli_delete_locked(ifp);
@@ -589,10 +590,10 @@ mld_domifdetach(struct ifnet *ifp)
static void
mli_delete_locked(const struct ifnet *ifp)
{
- struct mld_ifinfo *mli, *tmli;
+ struct mld_ifsoftc *mli, *tmli;
- CTR3(KTR_MLD, "%s: freeing mld_ifinfo for ifp %p(%s)",
- __func__, ifp, ifp->if_xname);
+ CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)",
+ __func__, ifp, if_name(ifp));
MLD_LOCK_ASSERT();
@@ -601,7 +602,7 @@ mli_delete_locked(const struct ifnet *ifp)
/*
* Free deferred General Query responses.
*/
- _IF_DRAIN(&mli->mli_gq);
+ mbufq_drain(&mli->mli_gq);
LIST_REMOVE(mli, mli_link);
@@ -613,9 +614,6 @@ mli_delete_locked(const struct ifnet *ifp)
return;
}
}
-#ifdef INVARIANTS
- panic("%s: mld_ifinfo not found for ifp %p\n", __func__, ifp);
-#endif
}
/*
@@ -630,7 +628,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
/*const*/ struct mld_hdr *mld)
{
struct ifmultiaddr *ifma;
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct in6_multi *inm;
int is_general_query;
uint16_t timer;
@@ -643,7 +641,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!mld_v1enable) {
CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
@@ -654,7 +652,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
@@ -689,7 +687,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* Switch to MLDv1 host compatibility mode.
*/
mli = MLD_IFINFO(ifp);
- KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
mld_set_version(mli, MLD_VERSION_1);
timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE;
@@ -703,7 +701,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* interface, kick the report timer.
*/
CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET6 ||
ifma->ifma_protospec == NULL)
@@ -721,7 +719,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (inm != NULL) {
CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
mld_v1_update_group(inm, timer);
}
/* XXX Clear embedded scope ID as userland won't expect it. */
@@ -759,7 +757,7 @@ mld_v1_update_group(struct in6_multi *inm, const int timer)
CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname, timer);
+ if_name(inm->in6m_ifp), timer);
IN6_MULTI_LOCK_ASSERT();
@@ -806,7 +804,7 @@ static int
mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
struct mbuf *m, const int off, const int icmp6len)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct mldv2_query *mld;
struct in6_multi *inm;
uint32_t maxdelay, nsrc, qqi;
@@ -826,11 +824,11 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
- CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname);
+ CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp));
mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
@@ -888,7 +886,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
MLD_LOCK();
mli = MLD_IFINFO(ifp);
- KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
/*
* Discard the v2 query if we're in Compatibility Mode.
@@ -919,7 +917,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* Otherwise, reset the interface timer.
*/
CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
V_interface_timers_running6 = 1;
@@ -949,7 +947,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
}
}
CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)",
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
/*
* If there is a pending General Query response
* scheduled sooner than the selected delay, no
@@ -973,12 +971,12 @@ out_locked:
}
/*
- * Process a recieved MLDv2 group-specific or group-and-source-specific
+ * Process a received MLDv2 group-specific or group-and-source-specific
* query.
- * Return <0 if any error occured. Currently this is ignored.
+ * Return <0 if any error occurred. Currently this is ignored.
*/
static int
-mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifinfo *mli,
+mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
int timer, struct mbuf *m0, const int off)
{
struct mldv2_query *mld;
@@ -1106,7 +1104,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!mld_v1enable) {
CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
@@ -1122,7 +1120,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (EINVAL);
}
@@ -1136,7 +1134,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
!IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_dst),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (EINVAL);
}
@@ -1161,7 +1159,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
ifa_free(&ia->ia_ifa);
CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)",
- ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, ifp->if_xname);
+ ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp));
/*
* Embed scope ID of receiving interface in MLD query for lookup
@@ -1182,7 +1180,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
*/
inm = in6m_lookup_locked(ifp, &mld->mld_addr);
if (inm != NULL) {
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
mli = inm->in6m_mli;
KASSERT(mli != NULL,
@@ -1208,7 +1206,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
CTR3(KTR_MLD,
"report suppressed for %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
case MLD_LAZY_MEMBER:
inm->in6m_state = MLD_LAZY_MEMBER;
break;
@@ -1329,10 +1327,10 @@ mld_fasttimo(void)
static void
mld_fasttimo_vnet(void)
{
- struct ifqueue scq; /* State-change packets */
- struct ifqueue qrq; /* Query response packets */
+ struct mbufq scq; /* State-change packets */
+ struct mbufq qrq; /* Query response packets */
struct ifnet *ifp;
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct ifmultiaddr *ifma;
struct in6_multi *inm, *tinm;
int uri_fasthz;
@@ -1389,12 +1387,8 @@ mld_fasttimo_vnet(void)
if (mli->mli_version == MLD_VERSION_2) {
uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri *
PR_FASTHZ);
-
- memset(&qrq, 0, sizeof(struct ifqueue));
- IFQ_SET_MAXLEN(&qrq, MLD_MAX_G_GS_PACKETS);
-
- memset(&scq, 0, sizeof(struct ifqueue));
- IFQ_SET_MAXLEN(&scq, MLD_MAX_STATE_CHANGE_PACKETS);
+ mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS);
+ mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS);
}
IF_ADDR_RLOCK(ifp);
@@ -1461,7 +1455,7 @@ out_locked:
* Will update the global pending timer flags.
*/
static void
-mld_v1_process_group_timer(struct mld_ifinfo *mli, struct in6_multi *inm)
+mld_v1_process_group_timer(struct mld_ifsoftc *mli, struct in6_multi *inm)
{
int report_timer_expired;
@@ -1505,8 +1499,8 @@ mld_v1_process_group_timer(struct mld_ifinfo *mli, struct in6_multi *inm)
* Note: Unlocked read from mli.
*/
static void
-mld_v2_process_group_timers(struct mld_ifinfo *mli,
- struct ifqueue *qrq, struct ifqueue *scq,
+mld_v2_process_group_timers(struct mld_ifsoftc *mli,
+ struct mbufq *qrq, struct mbufq *scq,
struct in6_multi *inm, const int uri_fasthz)
{
int query_response_timer_expired;
@@ -1601,7 +1595,7 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli,
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
/*
* If we are leaving the group for good, make sure
@@ -1626,14 +1620,14 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli,
* as per Section 9.12.
*/
static void
-mld_set_version(struct mld_ifinfo *mli, const int version)
+mld_set_version(struct mld_ifsoftc *mli, const int version)
{
int old_version_timer;
MLD_LOCK_ASSERT();
CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__,
- version, mli->mli_ifp, mli->mli_ifp->if_xname);
+ version, mli->mli_ifp, if_name(mli->mli_ifp));
if (version == MLD_VERSION_1) {
/*
@@ -1656,14 +1650,14 @@ mld_set_version(struct mld_ifinfo *mli, const int version)
* joined on it; state-change, general-query, and group-query timers.
*/
static void
-mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
+mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
{
struct ifmultiaddr *ifma;
struct ifnet *ifp;
struct in6_multi *inm, *tinm;
CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__,
- mli->mli_ifp, mli->mli_ifp->if_xname);
+ mli->mli_ifp, if_name(mli->mli_ifp));
IN6_MULTI_LOCK_ASSERT();
MLD_LOCK_ASSERT();
@@ -1714,7 +1708,7 @@ mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
/*
* Free any pending MLDv2 state-change records.
*/
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
break;
}
}
@@ -1749,7 +1743,7 @@ mld_slowtimo(void)
static void
mld_slowtimo_vnet(void)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
MLD_LOCK();
@@ -1765,7 +1759,7 @@ mld_slowtimo_vnet(void)
* See Section 9.12 of RFC 3810.
*/
static void
-mld_v1_process_querier_timers(struct mld_ifinfo *mli)
+mld_v1_process_querier_timers(struct mld_ifsoftc *mli)
{
MLD_LOCK_ASSERT();
@@ -1777,7 +1771,7 @@ mld_v1_process_querier_timers(struct mld_ifinfo *mli)
CTR5(KTR_MLD,
"%s: transition from v%d -> v%d on %p(%s)",
__func__, mli->mli_version, MLD_VERSION_2,
- mli->mli_ifp, mli->mli_ifp->if_xname);
+ mli->mli_ifp, if_name(mli->mli_ifp));
mli->mli_version = MLD_VERSION_2;
}
}
@@ -1801,13 +1795,13 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
/* ia may be NULL if link-local address is tentative. */
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL) {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (ENOMEM);
}
- MGET(md, M_DONTWAIT, MT_DATA);
+ md = m_get(M_NOWAIT, MT_DATA);
if (md == NULL) {
m_free(mh);
if (ia != NULL)
@@ -1821,7 +1815,7 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
* that ether_output() does not need to allocate another mbuf
* for the header in the most common case.
*/
- MH_ALIGN(mh, sizeof(struct ip6_hdr));
+ M_ALIGN(mh, sizeof(struct ip6_hdr));
mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
mh->m_len = sizeof(struct ip6_hdr);
@@ -1881,7 +1875,7 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
int
mld_change_state(struct in6_multi *inm, const int delay)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct ifnet *ifp;
int error;
@@ -1906,7 +1900,7 @@ mld_change_state(struct in6_multi *inm, const int delay)
MLD_LOCK();
mli = MLD_IFINFO(ifp);
- KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
/*
* If we detect a state transition to or from MCAST_UNDEFINED
@@ -1949,11 +1943,11 @@ out_locked:
* initial state change for delay ticks (in units of PR_FASTHZ).
*/
static int
-mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
+mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli,
const int delay)
{
struct ifnet *ifp;
- struct ifqueue *ifq;
+ struct mbufq *mq;
int error, retval, syncstates;
int odelay;
#ifdef KTR
@@ -1962,7 +1956,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp, inm->in6m_ifp->if_xname);
+ inm->in6m_ifp, if_name(inm->in6m_ifp));
error = 0;
syncstates = 1;
@@ -2040,9 +2034,9 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
* Don't kick the timers if there is nothing to do,
* or if an error occurred.
*/
- ifq = &inm->in6m_scq;
- _IF_DRAIN(ifq);
- retval = mld_v2_enqueue_group_record(ifq, inm, 1,
+ mq = &inm->in6m_scq;
+ mbufq_drain(mq);
+ retval = mld_v2_enqueue_group_record(mq, inm, 1,
0, 0, (mli->mli_flags & MLIF_USEALLOW));
CTR2(KTR_MLD, "%s: enqueue record = %d",
__func__, retval);
@@ -2088,7 +2082,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
}
return (error);
@@ -2098,7 +2092,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
* Issue an intermediate state change during the life-cycle.
*/
static int
-mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
+mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli)
{
struct ifnet *ifp;
int retval;
@@ -2108,7 +2102,7 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp, inm->in6m_ifp->if_xname);
+ inm->in6m_ifp, if_name(inm->in6m_ifp));
ifp = inm->in6m_ifp;
@@ -2130,11 +2124,11 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
return (0);
}
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
(mli->mli_flags & MLIF_USEALLOW));
@@ -2162,7 +2156,7 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
* to INCLUDE {} for immediate transmission.
*/
static void
-mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
+mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli)
{
int syncstates;
#ifdef KTR
@@ -2173,7 +2167,7 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp, inm->in6m_ifp->if_xname);
+ inm->in6m_ifp, if_name(inm->in6m_ifp));
IN6_MULTI_LOCK_ASSERT();
MLD_LOCK_ASSERT();
@@ -2207,13 +2201,13 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
* TO_IN {} to be sent on the next fast timeout,
* giving us an opportunity to merge reports.
*/
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
inm->in6m_timer = 0;
inm->in6m_scrv = mli->mli_rv;
CTR4(KTR_MLD, "%s: Leaving %s/%s with %d "
"pending retransmissions.", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname, inm->in6m_scrv);
+ if_name(inm->in6m_ifp), inm->in6m_scrv);
if (inm->in6m_scrv == 0) {
inm->in6m_state = MLD_NOT_MEMBER;
inm->in6m_sctimer = 0;
@@ -2248,10 +2242,10 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s",
- __func__, &inm->in6m_addr, inm->in6m_ifp->if_xname);
+ __func__, &inm->in6m_addr, if_name(inm->in6m_ifp));
}
}
@@ -2283,7 +2277,7 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
* no record(s) were appended.
*/
static int
-mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
+mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm,
const int is_state_change, const int is_group_query,
const int is_source_query, const int use_block_allow)
{
@@ -2398,12 +2392,12 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
* Generate the filter list changes using a separate function.
*/
if (is_filter_list_change)
- return (mld_v2_enqueue_filter_change(ifq, inm));
+ return (mld_v2_enqueue_filter_change(mq, inm));
if (type == MLD_DO_NOTHING) {
CTR3(KTR_MLD, "%s: nothing to do for %s/%s",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
return (0);
}
@@ -2419,7 +2413,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__,
mld_rec_type_to_str(type),
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
/*
* Check if we have a packet in the tail of the queue for this
@@ -2429,7 +2423,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
* Note: Group records for G/GSR query responses MUST be sent
* in their own packet.
*/
- m0 = ifq->ifq_tail;
+ m0 = mbufq_last(mq);
if (!is_group_query &&
m0 != NULL &&
(m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
@@ -2441,7 +2435,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
m = m0;
CTR1(KTR_MLD, "%s: use existing packet", __func__);
} else {
- if (_IF_QFULL(ifq)) {
+ if (mbufq_full(mq)) {
CTR1(KTR_MLD, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
@@ -2449,9 +2443,9 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
if (!is_state_change && !is_group_query)
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (-ENOMEM);
@@ -2554,7 +2548,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
if (m != m0) {
CTR1(KTR_MLD, "%s: enqueueing first packet", __func__);
m->m_pkthdr.PH_vt.vt_nrecs = 1;
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
} else
m->m_pkthdr.PH_vt.vt_nrecs++;
@@ -2570,13 +2564,13 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
* Always try for a cluster first.
*/
while (nims != NULL) {
- if (_IF_QFULL(ifq)) {
+ if (mbufq_full(mq)) {
CTR1(KTR_MLD, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (-ENOMEM);
mld_save_context(m, ifp);
@@ -2629,7 +2623,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
nbytes += (msrcs * sizeof(struct in6_addr));
CTR1(KTR_MLD, "%s: enqueueing next packet", __func__);
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
}
return (nbytes);
@@ -2669,7 +2663,7 @@ typedef enum {
* no record(s) were appended.
*/
static int
-mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
+mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm)
{
static const int MINRECLEN =
sizeof(struct mldv2_record) + sizeof(struct in6_addr);
@@ -2715,7 +2709,7 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
*/
while (drt != REC_FULL) {
do {
- m0 = ifq->ifq_tail;
+ m0 = mbufq_last(mq);
if (m0 != NULL &&
(m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
MLD_V2_REPORT_MAXRECS) &&
@@ -2728,9 +2722,9 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
CTR1(KTR_MLD,
"%s: use previous packet", __func__);
} else {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
CTR1(KTR_MLD,
"%s: m_get*() failed", __func__);
@@ -2859,7 +2853,7 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
*/
m->m_pkthdr.PH_vt.vt_nrecs++;
if (m != m0)
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
nbytes += npbytes;
} while (nims != NULL);
drt |= crt;
@@ -2873,9 +2867,9 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
}
static int
-mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
+mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq)
{
- struct ifqueue *gq;
+ struct mbufq *gq;
struct mbuf *m; /* pending state-change */
struct mbuf *m0; /* copy of pending state-change */
struct mbuf *mt; /* last state-change in packet */
@@ -2898,13 +2892,13 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
gq = &inm->in6m_scq;
#ifdef KTR
- if (gq->ifq_head == NULL) {
+ if (mbufq_first(gq) == NULL) {
CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty",
__func__, inm);
}
#endif
- m = gq->ifq_head;
+ m = mbufq_first(gq);
while (m != NULL) {
/*
* Only merge the report into the current packet if
@@ -2915,7 +2909,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
* allocated clusters.
*/
domerge = 0;
- mt = ifscq->ifq_tail;
+ mt = mbufq_last(scq);
if (mt != NULL) {
recslen = m_length(m, NULL);
@@ -2927,7 +2921,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
domerge = 1;
}
- if (!domerge && _IF_QFULL(gq)) {
+ if (!domerge && mbufq_full(gq)) {
CTR2(KTR_MLD,
"%s: outbound queue full, skipping whole packet %p",
__func__, m);
@@ -2940,7 +2934,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
if (!docopy) {
CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m);
- _IF_DEQUEUE(gq, m0);
+ m0 = mbufq_dequeue(gq);
m = m0->m_nextpkt;
} else {
CTR2(KTR_MLD, "%s: copying %p", __func__, m);
@@ -2952,9 +2946,9 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
}
if (!domerge) {
- CTR3(KTR_MLD, "%s: queueing %p to ifscq %p)",
- __func__, m0, ifscq);
- _IF_ENQUEUE(ifscq, m0);
+ CTR3(KTR_MLD, "%s: queueing %p to scq %p)",
+ __func__, m0, scq);
+ mbufq_enqueue(scq, m0);
} else {
struct mbuf *mtl; /* last mbuf of packet mt */
@@ -2978,7 +2972,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
* Respond to a pending MLDv2 General Query.
*/
static void
-mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
+mld_v2_dispatch_general_query(struct mld_ifsoftc *mli)
{
struct ifmultiaddr *ifma;
struct ifnet *ifp;
@@ -2991,6 +2985,15 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
KASSERT(mli->mli_version == MLD_VERSION_2,
("%s: called when version %d", __func__, mli->mli_version));
+ /*
+ * Check that there are some packets queued. If so, send them first.
+ * For large number of groups the reply to general query can take
+ * many packets, we should finish sending them before starting of
+ * queuing the new reply.
+ */
+ if (mbufq_len(&mli->mli_gq) != 0)
+ goto send;
+
ifp = mli->mli_ifp;
IF_ADDR_RLOCK(ifp);
@@ -3026,12 +3029,13 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
}
IF_ADDR_RUNLOCK(ifp);
+send:
mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
/*
* Slew transmission of bursts over 500ms intervals.
*/
- if (mli->mli_gq.ifq_head != NULL) {
+ if (mbufq_first(&mli->mli_gq) != NULL) {
mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
MLD_RESPONSE_BURST_INTERVAL);
V_interface_timers_running6 = 1;
@@ -3100,7 +3104,7 @@ mld_dispatch_packet(struct mbuf *m)
}
mld_scrub_context(m0);
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m0->m_pkthdr.rcvif = V_loif;
ip6 = mtod(m0, struct ip6_hdr *);
@@ -3175,14 +3179,14 @@ mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
if (ia == NULL)
CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__);
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL) {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
m_freem(m);
return (NULL);
}
- MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
+ M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
mldreclen = m_length(m, NULL);
CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen);
@@ -3260,7 +3264,7 @@ mld_init(void *unused __unused)
mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
mld_po.ip6po_flags = IP6PO_DONTFRAG;
}
-SYSINIT(mld_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_init, NULL);
+SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL);
static void
mld_uninit(void *unused __unused)
@@ -3269,7 +3273,7 @@ mld_uninit(void *unused __unused)
CTR1(KTR_MLD, "%s: tearing down", __func__);
MLD_LOCK_DESTROY();
}
-SYSUNINIT(mld_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_uninit, NULL);
+SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL);
static void
vnet_mld_init(const void *unused __unused)
@@ -3279,19 +3283,17 @@ vnet_mld_init(const void *unused __unused)
LIST_INIT(&V_mli_head);
}
-VNET_SYSINIT(vnet_mld_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_init,
+VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init,
NULL);
static void
vnet_mld_uninit(const void *unused __unused)
{
+ /* This can happen if we shutdown the network stack. */
CTR1(KTR_MLD, "%s: tearing down", __func__);
-
- KASSERT(LIST_EMPTY(&V_mli_head),
- ("%s: mli list not empty; ifnets not detached?", __func__));
}
-VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_uninit,
+VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit,
NULL);
static int
@@ -3313,4 +3315,4 @@ static moduledata_t mld_mod = {
mld_modevent,
0
};
-DECLARE_MODULE(mld, mld_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY);
diff --git a/freebsd/sys/netinet6/mld6_var.h b/freebsd/sys/netinet6/mld6_var.h
index e62ec236..be7e9035 100644
--- a/freebsd/sys/netinet6/mld6_var.h
+++ b/freebsd/sys/netinet6/mld6_var.h
@@ -35,31 +35,6 @@
* implementation-specific definitions.
*/
-#ifdef _KERNEL
-
-/*
- * Per-link MLD state.
- */
-struct mld_ifinfo {
- LIST_ENTRY(mld_ifinfo) mli_link;
- struct ifnet *mli_ifp; /* interface this instance belongs to */
- uint32_t mli_version; /* MLDv1 Host Compatibility Mode */
- uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */
- uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/
- uint32_t mli_flags; /* MLD per-interface flags */
- uint32_t mli_rv; /* MLDv2 Robustness Variable */
- uint32_t mli_qi; /* MLDv2 Query Interval (s) */
- uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */
- uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */
- SLIST_HEAD(,in6_multi) mli_relinmhead; /* released groups */
- struct ifqueue mli_gq; /* queue of general query responses */
-};
-#define MLIF_SILENT 0x00000001 /* Do not use MLD on this ifp */
-#define MLIF_USEALLOW 0x00000002 /* Use ALLOW/BLOCK for joins/leaves */
-
-#define MLD_RANDOM_DELAY(X) (arc4random() % (X) + 1)
-#define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */
-
/*
* MLD per-group states.
*/
@@ -129,6 +104,44 @@ struct mld_ifinfo {
sizeof(struct icmp6_hdr))
/*
+ * Structure returned by net.inet6.mld.ifinfo.
+ */
+struct mld_ifinfo {
+ uint32_t mli_version; /* MLDv1 Host Compatibility Mode */
+ uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */
+ uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/
+ uint32_t mli_flags; /* MLD per-interface flags */
+#define MLIF_SILENT 0x00000001 /* Do not use MLD on this ifp */
+#define MLIF_USEALLOW 0x00000002 /* Use ALLOW/BLOCK for joins/leaves */
+ uint32_t mli_rv; /* MLDv2 Robustness Variable */
+ uint32_t mli_qi; /* MLDv2 Query Interval (s) */
+ uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */
+ uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */
+};
+
+#ifdef _KERNEL
+/*
+ * Per-link MLD state.
+ */
+struct mld_ifsoftc {
+ LIST_ENTRY(mld_ifsoftc) mli_link;
+ struct ifnet *mli_ifp; /* interface this instance belongs to */
+ uint32_t mli_version; /* MLDv1 Host Compatibility Mode */
+ uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */
+ uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/
+ uint32_t mli_flags; /* MLD per-interface flags */
+ uint32_t mli_rv; /* MLDv2 Robustness Variable */
+ uint32_t mli_qi; /* MLDv2 Query Interval (s) */
+ uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */
+ uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */
+ SLIST_HEAD(,in6_multi) mli_relinmhead; /* released groups */
+ struct mbufq mli_gq; /* queue of general query responses */
+};
+
+#define MLD_RANDOM_DELAY(X) (arc4random() % (X) + 1)
+#define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */
+
+/*
* Subsystem lock macros.
* The MLD lock is only taken with MLD. Currently it is system-wide.
* VIMAGE: The lock could be pushed to per-VIMAGE granularity in future.
@@ -147,7 +160,7 @@ struct mld_ifinfo {
(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->mld_ifinfo)
int mld_change_state(struct in6_multi *, const int);
-struct mld_ifinfo *
+struct mld_ifsoftc *
mld_domifattach(struct ifnet *);
void mld_domifdetach(struct ifnet *);
void mld_fasttimo(void);
diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c
index 4369ebac..d1c7036d 100644
--- a/freebsd/sys/netinet6/nd6.c
+++ b/freebsd/sys/netinet6/nd6.c
@@ -52,9 +52,11 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
#include <sys/queue.h>
+#include <sys/sdt.h>
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arc.h>
#include <net/if_dl.h>
#include <net/if_types.h>
@@ -64,8 +66,8 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <net/if_llatbl.h>
-#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
#include <netinet/if_ether.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
@@ -83,7 +85,9 @@ __FBSDID("$FreeBSD$");
#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
-#define SIN6(s) ((struct sockaddr_in6 *)s)
+#define SIN6(s) ((const struct sockaddr_in6 *)(s))
+
+MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
/* timer values */
VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */
@@ -111,54 +115,124 @@ VNET_DEFINE(int, nd6_debug) = 1;
VNET_DEFINE(int, nd6_debug) = 0;
#endif
-/* for debugging? */
-#if 0
-static int nd6_inuse, nd6_allocated;
-#endif
+static eventhandler_tag lle_event_eh, iflladdr_event_eh;
VNET_DEFINE(struct nd_drhead, nd_defrouter);
VNET_DEFINE(struct nd_prhead, nd_prefix);
+VNET_DEFINE(struct rwlock, nd6_lock);
VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
-static struct sockaddr_in6 all1_sa;
-
int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
-static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *,
+static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *,
struct ifnet *);
static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
static void nd6_slowtimo(void *);
static int regen_tmpaddr(struct in6_ifaddr *);
-static struct llentry *nd6_free(struct llentry *, int);
+static void nd6_free(struct llentry **, int);
+static void nd6_free_redirect(const struct llentry *);
static void nd6_llinfo_timer(void *);
+static void nd6_llinfo_settimer_locked(struct llentry *, long);
static void clear_llinfo_pqueue(struct llentry *);
+static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
+ const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **);
+static int nd6_need_cache(struct ifnet *);
+
static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch)
VNET_DEFINE(struct callout, nd6_timer_ch);
+#define V_nd6_timer_ch VNET(nd6_timer_ch)
+
+static void
+nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
+{
+ struct rt_addrinfo rtinfo;
+ struct sockaddr_in6 dst;
+ struct sockaddr_dl gw;
+ struct ifnet *ifp;
+ int type;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ if (lltable_get_af(lle->lle_tbl) != AF_INET6)
+ return;
+
+ switch (evt) {
+ case LLENTRY_RESOLVED:
+ type = RTM_ADD;
+ KASSERT(lle->la_flags & LLE_VALID,
+ ("%s: %p resolved but not valid?", __func__, lle));
+ break;
+ case LLENTRY_EXPIRED:
+ type = RTM_DELETE;
+ break;
+ default:
+ return;
+ }
+
+ ifp = lltable_get_ifp(lle->lle_tbl);
+
+ bzero(&dst, sizeof(dst));
+ bzero(&gw, sizeof(gw));
+ bzero(&rtinfo, sizeof(rtinfo));
+ lltable_fill_sa_entry(lle, (struct sockaddr *)&dst);
+ dst.sin6_scope_id = in6_getscopezone(ifp,
+ in6_addrscope(&dst.sin6_addr));
+ gw.sdl_len = sizeof(struct sockaddr_dl);
+ gw.sdl_family = AF_LINK;
+ gw.sdl_alen = ifp->if_addrlen;
+ gw.sdl_index = ifp->if_index;
+ gw.sdl_type = ifp->if_type;
+ if (evt == LLENTRY_RESOLVED)
+ bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
+ rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
+ rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
+ rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
+ rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | (
+ type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB);
+}
+
+/*
+ * A handler for interface link layer address change event.
+ */
+static void
+nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+
+ lltable_update_ifaddr(LLTABLE6(ifp));
+}
void
nd6_init(void)
{
- int i;
- LIST_INIT(&V_nd_prefix);
+ rw_init(&V_nd6_lock, "nd6");
- all1_sa.sin6_family = AF_INET6;
- all1_sa.sin6_len = sizeof(struct sockaddr_in6);
- for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
- all1_sa.sin6_addr.s6_addr[i] = 0xff;
+ LIST_INIT(&V_nd_prefix);
/* initialization of the default router list */
TAILQ_INIT(&V_nd_defrouter);
- /* start timer */
+ /* Start timers. */
callout_init(&V_nd6_slowtimo_ch, 0);
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, curvnet);
+
+ callout_init(&V_nd6_timer_ch, 0);
+ callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
+
+ nd6_dad_init();
+ if (IS_DEFAULT_VNET(curvnet)) {
+ lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
+ NULL, EVENTHANDLER_PRI_ANY);
+ iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
+ nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ }
}
#ifdef VIMAGE
@@ -168,6 +242,11 @@ nd6_destroy()
callout_drain(&V_nd6_slowtimo_ch);
callout_drain(&V_nd6_timer_ch);
+ if (IS_DEFAULT_VNET(curvnet)) {
+ EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
+ }
+ rw_destroy(&V_nd6_lock);
}
#endif
@@ -176,7 +255,7 @@ nd6_ifattach(struct ifnet *ifp)
{
struct nd_ifinfo *nd;
- nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO);
+ nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
nd->initialized = 1;
nd->chlim = IPV6_DEFHLIM;
@@ -215,8 +294,19 @@ nd6_ifattach(struct ifnet *ifp)
}
void
-nd6_ifdetach(struct nd_ifinfo *nd)
+nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
{
+ struct ifaddr *ifa, *next;
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+
+ /* stop DAD processing */
+ nd6_dad_stop(ifa);
+ }
+ IF_ADDR_RUNLOCK(ifp);
free(nd, M_IP6NDP);
}
@@ -228,6 +318,8 @@ nd6_ifdetach(struct nd_ifinfo *nd)
void
nd6_setmtu(struct ifnet *ifp)
{
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return;
nd6_setmtu0(ifp, ND_IFINFO(ifp));
}
@@ -372,6 +464,7 @@ nd6_options(union nd_opts *ndopts)
case ND_OPT_TARGET_LINKADDR:
case ND_OPT_MTU:
case ND_OPT_REDIRECTED_HEADER:
+ case ND_OPT_NONCE:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
nd6log((LOG_INFO,
"duplicated ND6 option found (type=%d)\n",
@@ -401,7 +494,7 @@ nd6_options(union nd_opts *ndopts)
default:
/*
* Unknown options must be silently ignored,
- * to accomodate future extension to the protocol.
+ * to accommodate future extension to the protocol.
*/
nd6log((LOG_DEBUG,
"nd6_options: unsupported option %d - "
@@ -426,7 +519,7 @@ skip1:
/*
* ND6 timer routine to handle ND6 entries
*/
-void
+static void
nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
{
int canceled;
@@ -436,48 +529,257 @@ nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
if (tick < 0) {
ln->la_expire = 0;
ln->ln_ntick = 0;
- canceled = callout_stop(&ln->ln_timer_ch);
+ canceled = callout_stop(&ln->lle_timer);
} else {
- ln->la_expire = time_second + tick / hz;
+ ln->la_expire = time_uptime + tick / hz;
LLE_ADDREF(ln);
if (tick > INT_MAX) {
ln->ln_ntick = tick - INT_MAX;
- canceled = callout_reset(&ln->ln_timer_ch, INT_MAX,
+ canceled = callout_reset(&ln->lle_timer, INT_MAX,
nd6_llinfo_timer, ln);
} else {
ln->ln_ntick = 0;
- canceled = callout_reset(&ln->ln_timer_ch, tick,
+ canceled = callout_reset(&ln->lle_timer, tick,
nd6_llinfo_timer, ln);
}
}
- if (canceled)
+ if (canceled > 0)
LLE_REMREF(ln);
}
-void
-nd6_llinfo_settimer(struct llentry *ln, long tick)
+/*
+ * Gets source address of the first packet in hold queue
+ * and stores it in @src.
+ * Returns pointer to @src (if hold queue is not empty) or NULL.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline struct in6_addr *
+nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
{
+ struct ip6_hdr hdr;
+ struct mbuf *m;
- LLE_WLOCK(ln);
- nd6_llinfo_settimer_locked(ln, tick);
- LLE_WUNLOCK(ln);
+ if (ln->la_hold == NULL)
+ return (NULL);
+
+ /*
+ * assume every packet in la_hold has the same IP header
+ */
+ m = ln->la_hold;
+ if (sizeof(hdr) > m->m_len)
+ return (NULL);
+
+ m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr);
+ *src = hdr.ip6_src;
+
+ return (src);
}
-static void
+/*
+ * Checks if we need to switch from STALE state.
+ *
+ * RFC 4861 requires switching from STALE to DELAY state
+ * on first packet matching entry, waiting V_nd6_delay and
+ * transition to PROBE state (if upper layer confirmation was
+ * not received).
+ *
+ * This code performs a bit differently:
+ * On packet hit we don't change state (but desired state
+ * can be guessed by control plane). However, after V_nd6_delay
+ * seconds code will transition to PROBE state (so DELAY state
+ * is kinda skipped in most situations).
+ *
+ * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
+ * we perform the following upon entering STALE state:
+ *
+ * 1) Arm timer to run each V_nd6_delay seconds to make sure that
+ * if packet was transmitted at the start of given interval, we
+ * would be able to switch to PROBE state in V_nd6_delay seconds
+ * as user expects.
+ *
+ * 2) Reschedule timer until original V_nd6_gctimer expires keeping
+ * lle in STALE state (remaining timer value stored in lle_remtime).
+ *
+ * 3) Reschedule timer if packet was transmitted less that V_nd6_delay
+ * seconds ago.
+ *
+ * Returns non-zero value if the entry is still STALE (storing
+ * the next timer interval in @pdelay).
+ *
+ * Returns zero value if original timer expired or we need to switch to
+ * PROBE (store that in @do_switch variable).
+ */
+static int
+nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
+{
+ int nd_delay, nd_gctimer, r_skip_req;
+ time_t lle_hittime;
+ long delay;
+
+ *do_switch = 0;
+ nd_gctimer = V_nd6_gctimer;
+ nd_delay = V_nd6_delay;
+
+ LLE_REQ_LOCK(lle);
+ r_skip_req = lle->r_skip_req;
+ lle_hittime = lle->lle_hittime;
+ LLE_REQ_UNLOCK(lle);
+
+ if (r_skip_req > 0) {
+
+ /*
+ * Nonzero r_skip_req value was set upon entering
+ * STALE state. Since value was not changed, no
+ * packets were passed using this lle. Ask for
+ * timer reschedule and keep STALE state.
+ */
+ delay = (long)(MIN(nd_gctimer, nd_delay));
+ delay *= hz;
+ if (lle->lle_remtime > delay)
+ lle->lle_remtime -= delay;
+ else {
+ delay = lle->lle_remtime;
+ lle->lle_remtime = 0;
+ }
+
+ if (delay == 0) {
+
+ /*
+ * The original ng6_gctime timeout ended,
+ * no more rescheduling.
+ */
+ return (0);
+ }
+
+ *pdelay = delay;
+ return (1);
+ }
+
+ /*
+ * Packet received. Verify timestamp
+ */
+ delay = (long)(time_uptime - lle_hittime);
+ if (delay < nd_delay) {
+
+ /*
+ * V_nd6_delay still not passed since the first
+ * hit in STALE state.
+ * Reshedule timer and return.
+ */
+ *pdelay = (long)(nd_delay - delay) * hz;
+ return (1);
+ }
+
+ /* Request switching to probe */
+ *do_switch = 1;
+ return (0);
+}
+
+
+/*
+ * Switch @lle state to new state optionally arming timers.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+__noinline void
+nd6_llinfo_setstate(struct llentry *lle, int newstate)
+{
+ struct ifnet *ifp;
+ int nd_gctimer, nd_delay;
+ long delay, remtime;
+
+ delay = 0;
+ remtime = 0;
+
+ switch (newstate) {
+ case ND6_LLINFO_INCOMPLETE:
+ ifp = lle->lle_tbl->llt_ifp;
+ delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
+ break;
+ case ND6_LLINFO_REACHABLE:
+ if (!ND6_LLINFO_PERMANENT(lle)) {
+ ifp = lle->lle_tbl->llt_ifp;
+ delay = (long)ND_IFINFO(ifp)->reachable * hz;
+ }
+ break;
+ case ND6_LLINFO_STALE:
+
+ /*
+ * Notify fast path that we want to know if any packet
+ * is transmitted by setting r_skip_req.
+ */
+ LLE_REQ_LOCK(lle);
+ lle->r_skip_req = 1;
+ LLE_REQ_UNLOCK(lle);
+ nd_delay = V_nd6_delay;
+ nd_gctimer = V_nd6_gctimer;
+
+ delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
+ remtime = (long)nd_gctimer * hz - delay;
+ break;
+ case ND6_LLINFO_DELAY:
+ lle->la_asked = 0;
+ delay = (long)V_nd6_delay * hz;
+ break;
+ }
+
+ if (delay > 0)
+ nd6_llinfo_settimer_locked(lle, delay);
+
+ lle->lle_remtime = remtime;
+ lle->ln_state = newstate;
+}
+
+/*
+ * Timer-dependent part of nd state machine.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
nd6_llinfo_timer(void *arg)
{
struct llentry *ln;
- struct in6_addr *dst;
+ struct in6_addr *dst, *pdst, *psrc, src;
struct ifnet *ifp;
- struct nd_ifinfo *ndi = NULL;
+ struct nd_ifinfo *ndi;
+ int do_switch, send_ns;
+ long delay;
KASSERT(arg != NULL, ("%s: arg NULL", __func__));
ln = (struct llentry *)arg;
- LLE_WLOCK_ASSERT(ln);
- ifp = ln->lle_tbl->llt_ifp;
-
+ ifp = lltable_get_ifp(ln->lle_tbl);
CURVNET_SET(ifp->if_vnet);
+ ND6_RLOCK();
+ LLE_WLOCK(ln);
+ if (callout_pending(&ln->lle_timer)) {
+ /*
+ * Here we are a bit odd here in the treatment of
+ * active/pending. If the pending bit is set, it got
+ * rescheduled before I ran. The active
+ * bit we ignore, since if it was stopped
+ * in ll_tablefree() and was currently running
+ * it would have return 0 so the code would
+ * not have deleted it since the callout could
+ * not be stopped so we want to go through
+ * with the delete here now. If the callout
+ * was restarted, the pending bit will be back on and
+ * we just want to bail since the callout_reset would
+ * return 1 and our reference would have been removed
+ * by nd6_llinfo_settimer_locked above since canceled
+ * would have been 1.
+ */
+ LLE_WUNLOCK(ln);
+ ND6_RUNLOCK();
+ CURVNET_RESTORE();
+ return;
+ }
+ ndi = ND_IFINFO(ifp);
+ send_ns = 0;
+ dst = &ln->r_l3addr.addr6;
+ pdst = dst;
+
if (ln->ln_ntick > 0) {
if (ln->ln_ntick > INT_MAX) {
ln->ln_ntick -= INT_MAX;
@@ -489,15 +791,12 @@ nd6_llinfo_timer(void *arg)
goto done;
}
- ndi = ND_IFINFO(ifp);
- dst = &L3_ADDR_SIN6(ln)->sin6_addr;
if (ln->la_flags & LLE_STATIC) {
goto done;
}
if (ln->la_flags & LLE_DELETED) {
- (void)nd6_free(ln, 0);
- ln = NULL;
+ nd6_free(&ln, 0);
goto done;
}
@@ -505,10 +804,9 @@ nd6_llinfo_timer(void *arg)
case ND6_LLINFO_INCOMPLETE:
if (ln->la_asked < V_nd6_mmaxtries) {
ln->la_asked++;
- nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, NULL, dst, ln, 0);
- LLE_WLOCK(ln);
+ send_ns = 1;
+ /* Send NS to multicast address */
+ pdst = NULL;
} else {
struct mbuf *m = ln->la_hold;
if (m) {
@@ -523,55 +821,59 @@ nd6_llinfo_timer(void *arg)
ln->la_hold = m0;
clear_llinfo_pqueue(ln);
}
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT);
- (void)nd6_free(ln, 0);
- ln = NULL;
+ nd6_free(&ln, 0);
if (m != NULL)
icmp6_error2(m, ICMP6_DST_UNREACH,
ICMP6_DST_UNREACH_ADDR, 0, ifp);
}
break;
case ND6_LLINFO_REACHABLE:
- if (!ND6_LLINFO_PERMANENT(ln)) {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
- }
+ if (!ND6_LLINFO_PERMANENT(ln))
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
break;
case ND6_LLINFO_STALE:
- /* Garbage Collection(RFC 2461 5.3) */
- if (!ND6_LLINFO_PERMANENT(ln)) {
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
- (void)nd6_free(ln, 1);
- ln = NULL;
+ if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
+
+ /*
+ * No packet has used this entry and GC timeout
+ * has not been passed. Reshedule timer and
+ * return.
+ */
+ nd6_llinfo_settimer_locked(ln, delay);
+ break;
}
- break;
+
+ if (do_switch == 0) {
+
+ /*
+ * GC timer has ended and entry hasn't been used.
+ * Run Garbage collector (RFC 4861, 5.3)
+ */
+ if (!ND6_LLINFO_PERMANENT(ln))
+ nd6_free(&ln, 1);
+ break;
+ }
+
+ /* Entry has been used AND delay timer has ended. */
+
+ /* FALLTHROUGH */
case ND6_LLINFO_DELAY:
if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
/* We need NUD */
ln->la_asked = 1;
- ln->ln_state = ND6_LLINFO_PROBE;
- nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, dst, dst, ln, 0);
- LLE_WLOCK(ln);
- } else {
- ln->ln_state = ND6_LLINFO_STALE; /* XXX */
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
- }
+ nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE);
+ send_ns = 1;
+ } else
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */
break;
case ND6_LLINFO_PROBE:
if (ln->la_asked < V_nd6_umaxtries) {
ln->la_asked++;
- nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, dst, dst, ln, 0);
- LLE_WLOCK(ln);
+ send_ns = 1;
} else {
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
- (void)nd6_free(ln, 0);
- ln = NULL;
+ nd6_free(&ln, 0);
}
break;
default:
@@ -580,6 +882,16 @@ nd6_llinfo_timer(void *arg)
}
done:
if (ln != NULL)
+ ND6_RUNLOCK();
+ if (send_ns != 0) {
+ nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+ psrc = nd6_llinfo_get_holdsrc(ln, &src);
+ LLE_FREE_LOCKED(ln);
+ ln = NULL;
+ nd6_ns_output(ifp, psrc, pdst, dst, NULL);
+ }
+
+ if (ln != NULL)
LLE_FREE_LOCKED(ln);
CURVNET_RESTORE();
}
@@ -592,19 +904,23 @@ void
nd6_timer(void *arg)
{
CURVNET_SET((struct vnet *) arg);
- int s;
+ struct nd_drhead drq;
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
struct in6_ifaddr *ia6, *nia6;
- callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
- nd6_timer, curvnet);
+ TAILQ_INIT(&drq);
/* expire default router list */
- s = splnet();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
- if (dr->expire && dr->expire < time_second)
- defrtrlist_del(dr);
+ ND6_WLOCK();
+ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
+ if (dr->expire && dr->expire < time_uptime)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
/*
@@ -670,8 +986,31 @@ nd6_timer(void *arg)
goto addrloop;
}
}
+ } else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
+ /*
+ * Schedule DAD for a tentative address. This happens
+ * if the interface was down or not running
+ * when the address was configured.
+ */
+ int delay;
+
+ delay = arc4random() %
+ (MAX_RTR_SOLICITATION_DELAY * hz);
+ nd6_dad_start((struct ifaddr *)ia6, delay);
} else {
/*
+ * Check status of the interface. If it is down,
+ * mark the address as tentative for future DAD.
+ */
+ if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
+ (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
+ == 0 ||
+ (ND_IFINFO(ia6->ia_ifp)->flags &
+ ND6_IFF_IFDISABLED) != 0) {
+ ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
+ ia6->ia6_flags |= IN6_IFF_TENTATIVE;
+ }
+ /*
* A new RA might have made a deprecated address
* preferred.
*/
@@ -687,7 +1026,7 @@ nd6_timer(void *arg)
* prefix is not necessary.
*/
if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
- time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
+ time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) {
/*
* address expiration and prefix expiration are
@@ -696,7 +1035,10 @@ nd6_timer(void *arg)
prelist_remove(pr);
}
}
- splx(s);
+
+ callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
+ nd6_timer, curvnet);
+
CURVNET_RESTORE();
}
@@ -748,11 +1090,10 @@ regen_tmpaddr(struct in6_ifaddr *ia6)
* address with the prefix.
*/
if (!IFA6_IS_DEPRECATED(it6))
- public_ifa6 = it6;
-
- if (public_ifa6 != NULL)
- ifa_ref(&public_ifa6->ia_ifa);
+ public_ifa6 = it6;
}
+ if (public_ifa6 != NULL)
+ ifa_ref(&public_ifa6->ia_ifa);
IF_ADDR_RUNLOCK(ifp);
if (public_ifa6 != NULL) {
@@ -772,35 +1113,43 @@ regen_tmpaddr(struct in6_ifaddr *ia6)
}
/*
- * Nuke neighbor cache/prefix/default router management table, right before
- * ifp goes away.
+ * Remove prefix and default router list entries corresponding to ifp. Neighbor
+ * cache entries are freed in in6_domifdetach().
*/
void
nd6_purge(struct ifnet *ifp)
{
+ struct nd_drhead drq;
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
+ TAILQ_INIT(&drq);
+
/*
* Nuke default router list entries toward ifp.
* We defer removal of default router list entries that is installed
* in the routing table, in order to keep additional side effects as
* small as possible.
*/
+ ND6_WLOCK();
TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
if (dr->installed)
continue;
-
if (dr->ifp == ifp)
- defrtrlist_del(dr);
+ defrouter_unlink(dr, &drq);
}
TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
if (!dr->installed)
continue;
-
if (dr->ifp == ifp)
- defrtrlist_del(dr);
+ defrouter_unlink(dr, &drq);
+ }
+ ND6_WUNLOCK();
+
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
/* Nuke prefix list entries toward ifp */
@@ -814,14 +1163,6 @@ nd6_purge(struct ifnet *ifp)
*/
pr->ndpr_refcnt = 0;
- /*
- * Previously, pr->ndpr_addr is removed as well,
- * but I strongly believe we don't have to do it.
- * nd6_purge() is only called from in6_ifdetach(),
- * which removes all the associated interface addresses
- * by itself.
- * (jinmei@kame.net 20010129)
- */
prelist_remove(pr);
}
}
@@ -834,14 +1175,6 @@ nd6_purge(struct ifnet *ifp)
/* Refresh default router list. */
defrouter_select();
}
-
- /* XXXXX
- * We do not nuke the neighbor cache entries here any more
- * because the neighbor cache is kept in if_afdata[AF_INET6].
- * nd6_purge() is invoked by in6_ifdetach() which is called
- * from if_detach() where everything gets purged. So let
- * in6_domifdetach() do the actual L2 table purging work.
- */
}
/*
@@ -849,11 +1182,10 @@ nd6_purge(struct ifnet *ifp)
* Returns the llentry locked
*/
struct llentry *
-nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
+nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
{
struct sockaddr_in6 sin6;
struct llentry *ln;
- int llflags;
bzero(&sin6, sizeof(sin6));
sin6.sin6_len = sizeof(struct sockaddr_in6);
@@ -862,16 +1194,26 @@ nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
IF_AFDATA_LOCK_ASSERT(ifp);
- llflags = 0;
- if (flags & ND6_CREATE)
- llflags |= LLE_CREATE;
- if (flags & ND6_EXCLUSIVE)
- llflags |= LLE_EXCLUSIVE;
-
- ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
- if ((ln != NULL) && (llflags & LLE_CREATE))
+ ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
+
+ return (ln);
+}
+
+struct llentry *
+nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
+{
+ struct sockaddr_in6 sin6;
+ struct llentry *ln;
+
+ bzero(&sin6, sizeof(sin6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = *addr6;
+
+ ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6);
+ if (ln != NULL)
ln->ln_state = ND6_LLINFO_NOSTATE;
-
+
return (ln);
}
@@ -881,10 +1223,14 @@ nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
* to not reenter the routing code from within itself.
*/
static int
-nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
+nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
{
struct nd_prefix *pr;
struct ifaddr *dstaddr;
+ struct rt_addrinfo info;
+ struct sockaddr_in6 rt_key;
+ struct sockaddr *dst6;
+ int fibnum;
/*
* A link-local address is always a neighbor.
@@ -909,6 +1255,13 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
return (0);
}
+ bzero(&rt_key, sizeof(rt_key));
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
+
+ /* Always use the default FIB here. XXME - why? */
+ fibnum = RT_DEFAULT_FIB;
+
/*
* If the address matches one of our addresses,
* it should be a neighbor.
@@ -920,12 +1273,13 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
continue;
if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
- struct rtentry *rt;
/* Always use the default FIB here. */
- rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix,
- 0, 0, RT_DEFAULT_FIB);
- if (rt == NULL)
+ dst6 = (struct sockaddr *)&pr->ndpr_prefix;
+
+ /* Restore length field before retrying lookup */
+ rt_key.sin6_len = sizeof(rt_key);
+ if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0)
continue;
/*
* This is the case where multiple interfaces
@@ -938,11 +1292,8 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* differ.
*/
if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
- &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) {
- RTFREE_LOCKED(rt);
+ &rt_key.sin6_addr))
continue;
- }
- RTFREE_LOCKED(rt);
}
if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
@@ -954,7 +1305,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* If the address is assigned on the node of the other side of
* a p2p interface, the address should be a neighbor.
*/
- dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
+ dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS);
if (dstaddr != NULL) {
if (dstaddr->ifa_ifp == ifp) {
ifa_free(dstaddr);
@@ -982,7 +1333,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* XXX: should take care of the destination of a p2p link?
*/
int
-nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
+nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
{
struct llentry *lle;
int rc = 0;
@@ -1009,15 +1360,31 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* Since the function would cause significant changes in the kernel, DO NOT
* make it global, unless you have a strong reason for the change, and are sure
* that the change is safe.
+ *
+ * Set noinline to be dtrace-friendly
*/
-static struct llentry *
-nd6_free(struct llentry *ln, int gc)
+static __noinline void
+nd6_free(struct llentry **lnp, int gc)
{
- struct llentry *next;
- struct nd_defrouter *dr;
struct ifnet *ifp;
+ struct llentry *ln;
+ struct nd_defrouter *dr;
+
+ ln = *lnp;
+ *lnp = NULL;
LLE_WLOCK_ASSERT(ln);
+ ND6_RLOCK_ASSERT();
+
+ ifp = lltable_get_ifp(ln->lle_tbl);
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
+ dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp);
+ else
+ dr = NULL;
+ ND6_RUNLOCK();
+
+ if ((ln->la_flags & LLE_DELETED) == 0)
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
/*
* we used to have pfctlinput(PRC_HOSTDEAD) here.
@@ -1027,11 +1394,7 @@ nd6_free(struct llentry *ln, int gc)
/* cancel timer */
nd6_llinfo_settimer_locked(ln, -1);
- ifp = ln->lle_tbl->llt_ifp;
-
if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
- dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
-
if (dr != NULL && dr->expire &&
ln->ln_state == ND6_LLINFO_STALE && gc) {
/*
@@ -1046,17 +1409,17 @@ nd6_free(struct llentry *ln, int gc)
* XXX: the check for ln_state would be redundant,
* but we intentionally keep it just in case.
*/
- if (dr->expire > time_second)
+ if (dr->expire > time_uptime)
nd6_llinfo_settimer_locked(ln,
- (dr->expire - time_second) * hz);
+ (dr->expire - time_uptime) * hz);
else
nd6_llinfo_settimer_locked(ln,
(long)V_nd6_gctimer * hz);
- next = LIST_NEXT(ln, lle_next);
LLE_REMREF(ln);
LLE_WUNLOCK(ln);
- return (next);
+ defrouter_rele(dr);
+ return;
}
if (dr) {
@@ -1091,7 +1454,7 @@ nd6_free(struct llentry *ln, int gc)
* is in the Default Router List.
* See a corresponding comment in nd6_na_input().
*/
- rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
+ rt6_flush(&ln->r_l3addr.addr6, ifp);
}
if (dr) {
@@ -1109,83 +1472,66 @@ nd6_free(struct llentry *ln, int gc)
defrouter_select();
}
+ /*
+ * If this entry was added by an on-link redirect, remove the
+ * corresponding host route.
+ */
+ if (ln->la_flags & LLE_REDIRECT)
+ nd6_free_redirect(ln);
+
if (ln->ln_router || dr)
LLE_WLOCK(ln);
}
/*
- * Before deleting the entry, remember the next entry as the
- * return value. We need this because pfxlist_onlink_check() above
- * might have freed other entries (particularly the old next entry) as
- * a side effect (XXX).
- */
- next = LIST_NEXT(ln, lle_next);
-
- /*
* Save to unlock. We still hold an extra reference and will not
* free(9) in llentry_free() if someone else holds one as well.
*/
LLE_WUNLOCK(ln);
IF_AFDATA_LOCK(ifp);
LLE_WLOCK(ln);
-
/* Guard against race with other llentry_free(). */
if (ln->la_flags & LLE_LINKED) {
+ /* Remove callout reference */
LLE_REMREF(ln);
- llentry_free(ln);
- } else
- LLE_FREE_LOCKED(ln);
-
+ lltable_unlink_entry(ln->lle_tbl, ln);
+ }
IF_AFDATA_UNLOCK(ifp);
- return (next);
+ llentry_free(ln);
+ if (dr != NULL)
+ defrouter_rele(dr);
}
-/*
- * Upper-layer reachability hint for Neighbor Unreachability Detection.
- *
- * XXX cost-effective methods?
- */
-void
-nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
+static int
+nd6_isdynrte(const struct rtentry *rt, void *xap)
{
- struct llentry *ln;
- struct ifnet *ifp;
- if ((dst6 == NULL) || (rt == NULL))
- return;
-
- ifp = rt->rt_ifp;
- IF_AFDATA_RLOCK(ifp);
- ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
- IF_AFDATA_RUNLOCK(ifp);
- if (ln == NULL)
- return;
+ if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC))
+ return (1);
- if (ln->ln_state < ND6_LLINFO_REACHABLE)
- goto done;
+ return (0);
+}
+/*
+ * Remove the rtentry for the given llentry,
+ * both of which were installed by a redirect.
+ */
+static void
+nd6_free_redirect(const struct llentry *ln)
+{
+ int fibnum;
+ struct sockaddr_in6 sin6;
+ struct rt_addrinfo info;
- /*
- * if we get upper-layer reachability confirmation many times,
- * it is possible we have false information.
- */
- if (!force) {
- ln->ln_byhint++;
- if (ln->ln_byhint > V_nd6_maxnudhint) {
- goto done;
- }
- }
+ lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
+ memset(&info, 0, sizeof(info));
+ info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6;
+ info.rti_filter = nd6_isdynrte;
- ln->ln_state = ND6_LLINFO_REACHABLE;
- if (!ND6_LLINFO_PERMANENT(ln)) {
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
- }
-done:
- LLE_WUNLOCK(ln);
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
+ rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
}
-
/*
* Rejuvenate this function for routing operations related
* processing.
@@ -1197,7 +1543,6 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
struct nd_defrouter *dr;
struct ifnet *ifp;
- RT_LOCK_ASSERT(rt);
gateway = (struct sockaddr_in6 *)rt->rt_gateway;
ifp = rt->rt_ifp;
@@ -1216,12 +1561,13 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
/*
* check for default route
*/
- if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
- &SIN6(rt_key(rt))->sin6_addr)) {
-
+ if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
+ &SIN6(rt_key(rt))->sin6_addr)) {
dr = defrouter_lookup(&gateway->sin6_addr, ifp);
- if (dr != NULL)
+ if (dr != NULL) {
dr->installed = 0;
+ defrouter_rele(dr);
+ }
}
break;
}
@@ -1231,100 +1577,14 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
int
nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
{
- struct in6_drlist *drl = (struct in6_drlist *)data;
- struct in6_oprlist *oprl = (struct in6_oprlist *)data;
struct in6_ndireq *ndi = (struct in6_ndireq *)data;
struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
- struct nd_defrouter *dr;
- struct nd_prefix *pr;
- int i = 0, error = 0;
- int s;
+ int error = 0;
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return (EPFNOSUPPORT);
switch (cmd) {
- case SIOCGDRLST_IN6:
- /*
- * obsolete API, use sysctl under net.inet6.icmp6
- */
- bzero(drl, sizeof(*drl));
- s = splnet();
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- if (i >= DRLSTSIZ)
- break;
- drl->defrouter[i].rtaddr = dr->rtaddr;
- in6_clearscope(&drl->defrouter[i].rtaddr);
-
- drl->defrouter[i].flags = dr->flags;
- drl->defrouter[i].rtlifetime = dr->rtlifetime;
- drl->defrouter[i].expire = dr->expire;
- drl->defrouter[i].if_index = dr->ifp->if_index;
- i++;
- }
- splx(s);
- break;
- case SIOCGPRLST_IN6:
- /*
- * obsolete API, use sysctl under net.inet6.icmp6
- *
- * XXX the structure in6_prlist was changed in backward-
- * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6,
- * in6_prlist is used for nd6_sysctl() - fill_prlist().
- */
- /*
- * XXX meaning of fields, especialy "raflags", is very
- * differnet between RA prefix list and RR/static prefix list.
- * how about separating ioctls into two?
- */
- bzero(oprl, sizeof(*oprl));
- s = splnet();
- LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
- struct nd_pfxrouter *pfr;
- int j;
-
- if (i >= PRLSTSIZ)
- break;
- oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
- oprl->prefix[i].raflags = pr->ndpr_raf;
- oprl->prefix[i].prefixlen = pr->ndpr_plen;
- oprl->prefix[i].vltime = pr->ndpr_vltime;
- oprl->prefix[i].pltime = pr->ndpr_pltime;
- oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
- if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
- oprl->prefix[i].expire = 0;
- else {
- time_t maxexpire;
-
- /* XXX: we assume time_t is signed. */
- maxexpire = (-1) &
- ~((time_t)1 <<
- ((sizeof(maxexpire) * 8) - 1));
- if (pr->ndpr_vltime <
- maxexpire - pr->ndpr_lastupdate) {
- oprl->prefix[i].expire =
- pr->ndpr_lastupdate +
- pr->ndpr_vltime;
- } else
- oprl->prefix[i].expire = maxexpire;
- }
-
- j = 0;
- LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
- if (j < DRLSTSIZ) {
-#define RTRADDR oprl->prefix[i].advrtr[j]
- RTRADDR = pfr->router->rtaddr;
- in6_clearscope(&RTRADDR);
-#undef RTRADDR
- }
- j++;
- }
- oprl->prefix[i].advrtrs = j;
- oprl->prefix[i].origin = PR_ORIG_RA;
-
- i++;
- }
- splx(s);
-
- break;
case OSIOCGIFINFO_IN6:
#define ND ndi->ndi
/* XXX: old ndp(8) assumes a positive value for linkmtu. */
@@ -1344,7 +1604,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
case SIOCSIFINFO_IN6:
/*
* used to change host variables from userland.
- * intented for a use on router to reflect RA configurations.
+ * intended for a use on router to reflect RA configurations.
*/
/* 0 means 'unspecified' */
if (ND.linkmtu != 0) {
@@ -1384,22 +1644,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
* do not clear ND6_IFF_IFDISABLED.
* See RFC 4862, Section 5.4.5.
*/
- int duplicated_linklocal = 0;
-
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ia = (struct in6_ifaddr *)ifa;
if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
- IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
- duplicated_linklocal = 1;
+ IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
- }
}
IF_ADDR_RUNLOCK(ifp);
- if (duplicated_linklocal) {
+ if (ifa != NULL) {
+ /* LLA is duplicated. */
ND.flags |= ND6_IFF_IFDISABLED;
log(LOG_ERR, "Cannot enable an interface"
" with a link-local address marked"
@@ -1415,14 +1672,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
/* Mark all IPv6 address as tentative. */
ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
- continue;
- ia = (struct in6_ifaddr *)ifa;
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ if (V_ip6_dad_count > 0 &&
+ (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+ ifa_link) {
+ if (ifa->ifa_addr->sa_family !=
+ AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ }
+ IF_ADDR_RUNLOCK(ifp);
}
- IF_ADDR_RUNLOCK(ifp);
}
if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
@@ -1440,20 +1702,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
* address is assigned, and IFF_UP, try to
* assign one.
*/
- int haslinklocal = 0;
-
IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+ ifa_link) {
+ if (ifa->ifa_addr->sa_family !=
+ AF_INET6)
continue;
ia = (struct in6_ifaddr *)ifa;
- if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
- haslinklocal = 1;
+ if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
- }
}
IF_ADDR_RUNLOCK(ifp);
- if (!haslinklocal)
+ if (ifa != NULL)
+ /* No LLA is configured. */
in6_ifattach(ifp, NULL);
}
}
@@ -1471,7 +1732,6 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
/* flush all the prefix advertised by routers */
struct nd_prefix *pr, *next;
- s = splnet();
LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
struct in6_ifaddr *ia, *ia_next;
@@ -1490,21 +1750,28 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
}
prelist_remove(pr);
}
- splx(s);
break;
}
case SIOCSRTRFLUSH_IN6:
{
/* flush all the default routers */
- struct nd_defrouter *dr, *next;
+ struct nd_drhead drq;
+ struct nd_defrouter *dr;
+
+ TAILQ_INIT(&drq);
- s = splnet();
defrouter_reset();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) {
- defrtrlist_del(dr);
+
+ ND6_WLOCK();
+ while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
+
defrouter_select();
- splx(s);
break;
}
case SIOCGNBRINFO_IN6:
@@ -1526,7 +1793,11 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
nbi->state = ln->ln_state;
nbi->asked = ln->la_asked;
nbi->isrouter = ln->ln_router;
- nbi->expire = ln->la_expire;
+ if (ln->la_expire == 0)
+ nbi->expire = 0;
+ else
+ nbi->expire = ln->la_expire + ln->lle_remtime / hz +
+ (time_second - time_uptime);
LLE_RUNLOCK(ln);
break;
}
@@ -1540,31 +1811,108 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
}
/*
+ * Calculates new isRouter value based on provided parameters and
+ * returns it.
+ */
+static int
+nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr,
+ int ln_router)
+{
+
+ /*
+ * ICMP6 type dependent behavior.
+ *
+ * NS: clear IsRouter if new entry
+ * RS: clear IsRouter
+ * RA: set IsRouter if there's lladdr
+ * redir: clear IsRouter if new entry
+ *
+ * RA case, (1):
+ * The spec says that we must set IsRouter in the following cases:
+ * - If lladdr exist, set IsRouter. This means (1-5).
+ * - If it is old entry (!newentry), set IsRouter. This means (7).
+ * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
+ * A quetion arises for (1) case. (1) case has no lladdr in the
+ * neighbor cache, this is similar to (6).
+ * This case is rare but we figured that we MUST NOT set IsRouter.
+ *
+ * is_new old_addr new_addr NS RS RA redir
+ * D R
+ * 0 n n (1) c ? s
+ * 0 y n (2) c s s
+ * 0 n y (3) c s s
+ * 0 y y (4) c s s
+ * 0 y y (5) c s s
+ * 1 -- n (6) c c c s
+ * 1 -- y (7) c c s c s
+ *
+ * (c=clear s=set)
+ */
+ switch (type & 0xff) {
+ case ND_NEIGHBOR_SOLICIT:
+ /*
+ * New entry must have is_router flag cleared.
+ */
+ if (is_new) /* (6-7) */
+ ln_router = 0;
+ break;
+ case ND_REDIRECT:
+ /*
+ * If the icmp is a redirect to a better router, always set the
+ * is_router flag. Otherwise, if the entry is newly created,
+ * clear the flag. [RFC 2461, sec 8.3]
+ */
+ if (code == ND_REDIRECT_ROUTER)
+ ln_router = 1;
+ else {
+ if (is_new) /* (6-7) */
+ ln_router = 0;
+ }
+ break;
+ case ND_ROUTER_SOLICIT:
+ /*
+ * is_router flag must always be cleared.
+ */
+ ln_router = 0;
+ break;
+ case ND_ROUTER_ADVERT:
+ /*
+ * Mark an entry with lladdr as a router.
+ */
+ if ((!is_new && (old_addr || new_addr)) || /* (2-5) */
+ (is_new && new_addr)) { /* (7) */
+ ln_router = 1;
+ }
+ break;
+ }
+
+ return (ln_router);
+}
+
+/*
* Create neighbor cache entry and cache link-layer address,
* on reception of inbound ND6 packets. (RS/RA/NS/redirect)
*
* type - ICMP6 type
* code - type dependent information
*
- * XXXXX
- * The caller of this function already acquired the ndp
- * cache table lock because the cache entry is returned.
*/
-struct llentry *
+void
nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
int lladdrlen, int type, int code)
{
- struct llentry *ln = NULL;
+ struct llentry *ln = NULL, *ln_tmp;
int is_newentry;
int do_update;
int olladdr;
int llchange;
int flags;
- int newstate = 0;
uint16_t router = 0;
struct sockaddr_in6 sin6;
struct mbuf *chain = NULL;
- int static_route = 0;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
IF_AFDATA_UNLOCK_ASSERT(ifp);
@@ -1573,7 +1921,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
/* nothing must be updated for unspecified address */
if (IN6_IS_ADDR_UNSPECIFIED(from))
- return NULL;
+ return;
/*
* Validation about ifp->if_addrlen and lladdrlen must be done in
@@ -1584,197 +1932,122 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* Spec says nothing in sections for RA, RS and NA. There's small
* description on it in NS section (RFC 2461 7.2.3).
*/
- flags = lladdr ? ND6_EXCLUSIVE : 0;
+ flags = lladdr ? LLE_EXCLUSIVE : 0;
IF_AFDATA_RLOCK(ifp);
ln = nd6_lookup(from, flags, ifp);
IF_AFDATA_RUNLOCK(ifp);
+ is_newentry = 0;
if (ln == NULL) {
- flags |= ND6_EXCLUSIVE;
- IF_AFDATA_LOCK(ifp);
- ln = nd6_lookup(from, flags | ND6_CREATE, ifp);
- IF_AFDATA_UNLOCK(ifp);
- is_newentry = 1;
- } else {
- /* do nothing if static ndp is set */
- if (ln->la_flags & LLE_STATIC) {
- static_route = 1;
- goto done;
+ flags |= LLE_EXCLUSIVE;
+ ln = nd6_alloc(from, 0, ifp);
+ if (ln == NULL)
+ return;
+
+ /*
+ * Since we already know all the data for the new entry,
+ * fill it before insertion.
+ */
+ if (lladdr != NULL) {
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
}
- is_newentry = 0;
+
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(ln);
+ /* Prefer any existing lle over newly-created one */
+ ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp);
+ if (ln_tmp == NULL)
+ lltable_link_entry(LLTABLE6(ifp), ln);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (ln_tmp == NULL) {
+ /* No existing lle, mark as new entry (6,7) */
+ is_newentry = 1;
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
+ if (lladdr != NULL) /* (7) */
+ EVENTHANDLER_INVOKE(lle_event, ln,
+ LLENTRY_RESOLVED);
+ } else {
+ lltable_free_entry(LLTABLE6(ifp), ln);
+ ln = ln_tmp;
+ ln_tmp = NULL;
+ }
+ }
+ /* do nothing if static ndp is set */
+ if ((ln->la_flags & LLE_STATIC)) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+ return;
}
- if (ln == NULL)
- return (NULL);
olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
if (olladdr && lladdr) {
- llchange = bcmp(lladdr, &ln->ll_addr,
+ llchange = bcmp(lladdr, ln->ll_addr,
ifp->if_addrlen);
- } else
+ } else if (!olladdr && lladdr)
+ llchange = 1;
+ else
llchange = 0;
/*
* newentry olladdr lladdr llchange (*=record)
* 0 n n -- (1)
* 0 y n -- (2)
- * 0 n y -- (3) * STALE
+ * 0 n y y (3) * STALE
* 0 y y n (4) *
* 0 y y y (5) * STALE
* 1 -- n -- (6) NOSTATE(= PASSIVE)
* 1 -- y -- (7) * STALE
*/
- if (lladdr) { /* (3-5) and (7) */
+ do_update = 0;
+ if (is_newentry == 0 && llchange != 0) {
+ do_update = 1; /* (3,5) */
+
/*
* Record source link-layer address
* XXX is it dependent to ifp->if_type?
*/
- bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
- ln->la_flags |= LLE_VALID;
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
- }
-
- if (!is_newentry) {
- if ((!olladdr && lladdr != NULL) || /* (3) */
- (olladdr && lladdr != NULL && llchange)) { /* (5) */
- do_update = 1;
- newstate = ND6_LLINFO_STALE;
- } else /* (1-2,4) */
- do_update = 0;
- } else {
- do_update = 1;
- if (lladdr == NULL) /* (6) */
- newstate = ND6_LLINFO_NOSTATE;
- else /* (7) */
- newstate = ND6_LLINFO_STALE;
- }
-
- if (do_update) {
- /*
- * Update the state of the neighbor cache.
- */
- ln->ln_state = newstate;
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
- if (ln->ln_state == ND6_LLINFO_STALE) {
- /*
- * XXX: since nd6_output() below will cause
- * state tansition to DELAY and reset the timer,
- * we must set the timer now, although it is actually
- * meaningless.
- */
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+ if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off) == 0) {
+ /* Entry was deleted */
+ return;
+ }
- if (ln->la_hold) {
- struct mbuf *m_hold, *m_hold_next;
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
- /*
- * reset the la_hold in advance, to explicitly
- * prevent a la_hold lookup in nd6_output()
- * (wouldn't happen, though...)
- */
- for (m_hold = ln->la_hold, ln->la_hold = NULL;
- m_hold; m_hold = m_hold_next) {
- m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
- /*
- * we assume ifp is not a p2p here, so
- * just set the 2nd argument as the
- * 1st one.
- */
- nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
- }
- /*
- * If we have mbufs in the chain we need to do
- * deferred transmit. Copy the address from the
- * llentry before dropping the lock down below.
- */
- if (chain != NULL)
- memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
- }
- } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
- /* probe right away */
- nd6_llinfo_settimer_locked((void *)ln, 0);
- }
+ if (ln->la_hold != NULL)
+ nd6_grab_holdchain(ln, &chain, &sin6);
}
- /*
- * ICMP6 type dependent behavior.
- *
- * NS: clear IsRouter if new entry
- * RS: clear IsRouter
- * RA: set IsRouter if there's lladdr
- * redir: clear IsRouter if new entry
- *
- * RA case, (1):
- * The spec says that we must set IsRouter in the following cases:
- * - If lladdr exist, set IsRouter. This means (1-5).
- * - If it is old entry (!newentry), set IsRouter. This means (7).
- * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
- * A quetion arises for (1) case. (1) case has no lladdr in the
- * neighbor cache, this is similar to (6).
- * This case is rare but we figured that we MUST NOT set IsRouter.
- *
- * newentry olladdr lladdr llchange NS RS RA redir
- * D R
- * 0 n n -- (1) c ? s
- * 0 y n -- (2) c s s
- * 0 n y -- (3) c s s
- * 0 y y n (4) c s s
- * 0 y y y (5) c s s
- * 1 -- n -- (6) c c c s
- * 1 -- y -- (7) c c s c s
- *
- * (c=clear s=set)
- */
- switch (type & 0xff) {
- case ND_NEIGHBOR_SOLICIT:
- /*
- * New entry must have is_router flag cleared.
- */
- if (is_newentry) /* (6-7) */
- ln->ln_router = 0;
- break;
- case ND_REDIRECT:
- /*
- * If the icmp is a redirect to a better router, always set the
- * is_router flag. Otherwise, if the entry is newly created,
- * clear the flag. [RFC 2461, sec 8.3]
- */
- if (code == ND_REDIRECT_ROUTER)
- ln->ln_router = 1;
- else if (is_newentry) /* (6-7) */
- ln->ln_router = 0;
- break;
- case ND_ROUTER_SOLICIT:
- /*
- * is_router flag must always be cleared.
- */
- ln->ln_router = 0;
- break;
- case ND_ROUTER_ADVERT:
- /*
- * Mark an entry with lladdr as a router.
- */
- if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */
- (is_newentry && lladdr)) { /* (7) */
- ln->ln_router = 1;
- }
- break;
- }
+ /* Calculates new router status */
+ router = nd6_is_router(type, code, is_newentry, olladdr,
+ lladdr != NULL ? 1 : 0, ln->ln_router);
- if (ln != NULL) {
- static_route = (ln->la_flags & LLE_STATIC);
- router = ln->ln_router;
+ ln->ln_router = router;
+ /* Mark non-router redirects with special flag */
+ if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER)
+ ln->la_flags |= LLE_REDIRECT;
- if (flags & ND6_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- if (static_route)
- ln = NULL;
- }
- if (chain)
- nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+
+ if (chain != NULL)
+ nd6_flush_holdchain(ifp, ifp, chain, &sin6);
/*
* When the link-layer address of a router changes, select the
@@ -1791,25 +2064,13 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* for those are not autoconfigured hosts, we explicitly avoid such
* cases for safety.
*/
- if (do_update && router &&
+ if ((do_update || is_newentry) && router &&
ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
/*
* guaranteed recursion
*/
defrouter_select();
}
-
- return (ln);
-done:
- if (ln != NULL) {
- if (flags & ND6_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- if (static_route)
- ln = NULL;
- }
- return (ln);
}
static void
@@ -1822,7 +2083,9 @@ nd6_slowtimo(void *arg)
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, curvnet);
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ continue;
nd6if = ND_IFINFO(ifp);
if (nd6if->basereachable && /* already initialized */
(nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
@@ -1840,55 +2103,176 @@ nd6_slowtimo(void *arg)
CURVNET_RESTORE();
}
-int
-nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
- struct sockaddr_in6 *dst, struct rtentry *rt0)
+void
+nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
+ struct sockaddr_in6 *sin6)
{
- return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
+ LLE_WLOCK_ASSERT(ln);
+
+ *chain = ln->la_hold;
+ ln->la_hold = NULL;
+ lltable_fill_sa_entry(ln, (struct sockaddr *)sin6);
+
+ if (ln->ln_state == ND6_LLINFO_STALE) {
+
+ /*
+ * The first time we send a packet to a
+ * neighbor whose entry is STALE, we have
+ * to change the state to DELAY and a sets
+ * a timer to expire in DELAY_FIRST_PROBE_TIME
+ * seconds to ensure do neighbor unreachability
+ * detection on expiration.
+ * (RFC 2461 7.3.3)
+ */
+ nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY);
+ }
}
+int
+nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
+ struct sockaddr_in6 *dst, struct route *ro)
+{
+ int error;
+ int ip6len;
+ struct ip6_hdr *ip6;
+ struct m_tag *mtag;
+
+#ifdef MAC
+ mac_netinet6_nd6_send(ifp, m);
+#endif
+
+ /*
+ * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
+ * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
+ * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
+ * to be diverted to user space. When re-injected into the kernel,
+ * send_output() will directly dispatch them to the outgoing interface.
+ */
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
+ if (mtag != NULL) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+ /* Use the SEND socket */
+ error = send_sendso_input_hook(m, ifp, SND_OUT,
+ ip6len);
+ /* -1 == no app on SEND socket */
+ if (error == 0 || error != -1)
+ return (error);
+ }
+ }
+
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
+ IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
+ mtod(m, struct ip6_hdr *));
+
+ if ((ifp->if_flags & IFF_LOOPBACK) == 0)
+ origifp = ifp;
+
+ error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro);
+ return (error);
+}
/*
- * Note that I'm not enforcing any global serialization
- * lle state or asked changes here as the logic is too
- * complicated to avoid having to always acquire an exclusive
- * lock
- * KMM
+ * Lookup link headerfor @sa_dst address. Stores found
+ * data in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * If destination LLE does not exists or lle state modification
+ * is required, call "slow" version.
*
+ * Return values:
+ * - 0 on success (address copied to buffer).
+ * - EWOULDBLOCK (no local error, but address is still unresolved)
+ * - other errors (alloc failure, etc)
*/
-#define senderr(e) { error = (e); goto bad;}
-
int
-nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
- struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
- struct mbuf **chain)
+nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
+ const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
+ struct llentry **plle)
{
- struct mbuf *m = m0;
- struct m_tag *mtag;
- struct llentry *ln = lle;
- struct ip6_hdr *ip6;
- int error = 0;
- int flags = 0;
- int ip6len;
+ struct llentry *ln = NULL;
+ const struct sockaddr_in6 *dst6;
-#ifdef INVARIANTS
- if (lle != NULL) {
-
- LLE_WLOCK_ASSERT(lle);
+ if (pflags != NULL)
+ *pflags = 0;
+
+ dst6 = (const struct sockaddr_in6 *)sa_dst;
- KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
+ /* discard the packet if IPv6 operation is disabled on the interface */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
+ m_freem(m);
+ return (ENETDOWN); /* better error? */
}
-#endif
- if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
- goto sendpkt;
- if (nd6_need_cache(ifp) == 0)
- goto sendpkt;
+ if (m != NULL && m->m_flags & M_MCAST) {
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_FDDI:
+ case IFT_L2VLAN:
+ case IFT_IEEE80211:
+ case IFT_BRIDGE:
+ case IFT_ISO88025:
+ ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
+ desten);
+ return (0);
+ default:
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ }
- /*
- * next hop determination. This routine is derived from ether_output.
- */
+ IF_AFDATA_RLOCK(ifp);
+ ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED,
+ ifp);
+ if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
+ /* Entry found, let's copy lle info */
+ bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
+ if (pflags != NULL)
+ *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
+ /* Check if we have feedback request from nd6 timer */
+ if (ln->r_skip_req != 0) {
+ LLE_REQ_LOCK(ln);
+ ln->r_skip_req = 0; /* Notify that entry was used */
+ ln->lle_hittime = time_uptime;
+ LLE_REQ_UNLOCK(ln);
+ }
+ if (plle) {
+ LLE_ADDREF(ln);
+ *plle = ln;
+ LLE_WUNLOCK(ln);
+ }
+ IF_AFDATA_RUNLOCK(ifp);
+ return (0);
+ } else if (plle && ln)
+ LLE_WUNLOCK(ln);
+ IF_AFDATA_RUNLOCK(ifp);
+
+ return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle));
+}
+
+
+/*
+ * Do L2 address resolution for @sa_dst address. Stores found
+ * address in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * Heavy version.
+ * Function assume that destination LLE does not exist,
+ * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline int
+nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
+ const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags,
+ struct llentry **plle)
+{
+ struct llentry *lle = NULL, *lle_tmp;
+ struct in6_addr *psrc, src;
+ int send_ns, ll_len;
+ char *lladdr;
/*
* Address resolution or Neighbor Unreachability Detection
@@ -1896,50 +2280,54 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
* At this point, the destination of the packet must be a unicast
* or an anycast address(i.e. not a multicast).
*/
-
- flags = (lle != NULL) ? LLE_EXCLUSIVE : 0;
- if (ln == NULL) {
- retry:
+ if (lle == NULL) {
IF_AFDATA_RLOCK(ifp);
- ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
+ lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
IF_AFDATA_RUNLOCK(ifp);
- if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
+ if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
/*
* Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
* the condition below is not very efficient. But we believe
* it is tolerable, because this should be a rare case.
*/
- flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
- IF_AFDATA_LOCK(ifp);
- ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
- IF_AFDATA_UNLOCK(ifp);
+ lle = nd6_alloc(&dst->sin6_addr, 0, ifp);
+ if (lle == NULL) {
+ char ip6buf[INET6_ADDRSTRLEN];
+ log(LOG_DEBUG,
+ "nd6_output: can't allocate llinfo for %s "
+ "(ln=%p)\n",
+ ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(lle);
+ /* Prefer any existing entry over newly-created one */
+ lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
+ if (lle_tmp == NULL)
+ lltable_link_entry(LLTABLE6(ifp), lle);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (lle_tmp != NULL) {
+ lltable_free_entry(LLTABLE6(ifp), lle);
+ lle = lle_tmp;
+ lle_tmp = NULL;
+ }
}
}
- if (ln == NULL) {
- if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
- !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
- char ip6buf[INET6_ADDRSTRLEN];
- log(LOG_DEBUG,
- "nd6_output: can't allocate llinfo for %s "
- "(ln=%p)\n",
- ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
- senderr(EIO); /* XXX: good error? */
+ if (lle == NULL) {
+ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
+ m_freem(m);
+ return (ENOBUFS);
}
- goto sendpkt; /* send anyway */
- }
- /* We don't have to do link-layer address resolution on a p2p link. */
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
- ln->ln_state < ND6_LLINFO_REACHABLE) {
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+ if (m != NULL)
+ m_freem(m);
+ return (ENOBUFS);
}
+ LLE_WLOCK_ASSERT(lle);
+
/*
* The first time we send a packet to a neighbor whose entry is
* STALE, we have to change the state to DELAY and a sets a timer to
@@ -1947,49 +2335,46 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
* neighbor unreachability detection on expiration.
* (RFC 2461 7.3.3)
*/
- if (ln->ln_state == ND6_LLINFO_STALE) {
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
- ln->la_asked = 0;
- ln->ln_state = ND6_LLINFO_DELAY;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
- }
+ if (lle->ln_state == ND6_LLINFO_STALE)
+ nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
/*
* If the neighbor cache entry has a state other than INCOMPLETE
* (i.e. its link-layer address is already resolved), just
* send the packet.
*/
- if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
- goto sendpkt;
+ if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
+ if (flags & LLE_ADDRONLY) {
+ lladdr = lle->ll_addr;
+ ll_len = ifp->if_addrlen;
+ } else {
+ lladdr = lle->r_linkdata;
+ ll_len = lle->r_hdrlen;
+ }
+ bcopy(lladdr, desten, ll_len);
+ if (pflags != NULL)
+ *pflags = lle->la_flags;
+ if (plle) {
+ LLE_ADDREF(lle);
+ *plle = lle;
+ }
+ LLE_WUNLOCK(lle);
+ return (0);
+ }
/*
* There is a neighbor cache entry, but no ethernet address
* response yet. Append this latest packet to the end of the
- * packet queue in the mbuf, unless the number of the packet
- * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen,
+ * packet queue in the mbuf. When it exceeds nd6_maxqueuelen,
* the oldest packet in the queue will be removed.
*/
- if (ln->ln_state == ND6_LLINFO_NOSTATE)
- ln->ln_state = ND6_LLINFO_INCOMPLETE;
-
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
- LLE_WLOCK_ASSERT(ln);
-
- if (ln->la_hold) {
+ if (lle->la_hold != NULL) {
struct mbuf *m_hold;
int i;
i = 0;
- for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
+ for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
i++;
if (m_hold->m_nextpkt == NULL) {
m_hold->m_nextpkt = m;
@@ -1997,134 +2382,63 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
}
}
while (i >= V_nd6_maxqueuelen) {
- m_hold = ln->la_hold;
- ln->la_hold = ln->la_hold->m_nextpkt;
+ m_hold = lle->la_hold;
+ lle->la_hold = lle->la_hold->m_nextpkt;
m_freem(m_hold);
i--;
}
} else {
- ln->la_hold = m;
+ lle->la_hold = m;
}
/*
* If there has been no NS for the neighbor after entering the
* INCOMPLETE state, send the first solicitation.
+ * Note that for newly-created lle la_asked will be 0,
+ * so we will transition from ND6_LLINFO_NOSTATE to
+ * ND6_LLINFO_INCOMPLETE state here.
*/
- if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
- ln->la_asked++;
-
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(ifp)->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
- if (lle != NULL && ln == lle)
- LLE_WLOCK(lle);
-
- } else if (lle == NULL || ln != lle) {
- /*
- * We did the lookup (no lle arg) so we
- * need to do the unlock here.
- */
- LLE_WUNLOCK(ln);
- }
-
- return (0);
-
- sendpkt:
- /* discard the packet if IPv6 operation is disabled on the interface */
- if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
- error = ENETDOWN; /* better error? */
- goto bad;
- }
- /*
- * ln is valid and the caller did not pass in
- * an llentry
- */
- if ((ln != NULL) && (lle == NULL)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- }
-
-#ifdef MAC
- mac_netinet6_nd6_send(ifp, m);
-#endif
+ psrc = NULL;
+ send_ns = 0;
+ if (lle->la_asked == 0) {
+ lle->la_asked++;
+ send_ns = 1;
+ psrc = nd6_llinfo_get_holdsrc(lle, &src);
- /*
- * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
- * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
- * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
- * to be diverted to user space. When re-injected into the kernel,
- * send_output() will directly dispatch them to the outgoing interface.
- */
- if (send_sendso_input_hook != NULL) {
- mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
- if (mtag != NULL) {
- ip6 = mtod(m, struct ip6_hdr *);
- ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
- /* Use the SEND socket */
- error = send_sendso_input_hook(m, ifp, SND_OUT,
- ip6len);
- /* -1 == no app on SEND socket */
- if (error == 0 || error != -1)
- return (error);
- }
+ nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE);
}
+ LLE_WUNLOCK(lle);
+ if (send_ns != 0)
+ nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL);
- /*
- * We were passed in a pointer to an lle with the lock held
- * this means that we can't call if_output as we will
- * recurse on the lle lock - so what we do is we create
- * a list of mbufs to send and transmit them in the caller
- * after the lock is dropped
- */
- if (lle != NULL) {
- if (*chain == NULL)
- *chain = m;
- else {
- struct mbuf *mb;
+ return (EWOULDBLOCK);
+}
- /*
- * append mbuf to end of deferred chain
- */
- mb = *chain;
- while (mb->m_nextpkt != NULL)
- mb = mb->m_nextpkt;
- mb->m_nextpkt = m;
- }
- return (error);
- }
- /* Reset layer specific mbuf flags to avoid confusing lower layers. */
- m->m_flags &= ~(M_PROTOFLAGS);
- if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
- return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
- NULL));
- }
- error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
- return (error);
+/*
+ * Do L2 address resolution for @sa_dst address. Stores found
+ * address in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * Return values:
+ * - 0 on success (address copied to buffer).
+ * - EWOULDBLOCK (no local error, but address is still unresolved)
+ * - other errors (alloc failure, etc)
+ */
+int
+nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags)
+{
+ int error;
- bad:
- /*
- * ln is valid and the caller did not pass in
- * an llentry
- */
- if ((ln != NULL) && (lle == NULL)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- }
- if (m)
- m_freem(m);
+ flags |= LLE_ADDRONLY;
+ error = nd6_resolve_slow(ifp, flags, NULL,
+ (const struct sockaddr_in6 *)dst, desten, pflags, NULL);
return (error);
}
-#undef senderr
-
int
-nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
- struct sockaddr_in6 *dst, struct route *ro)
+nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
+ struct sockaddr_in6 *dst)
{
struct mbuf *m, *m_head;
struct ifnet *outifp;
@@ -2139,20 +2453,17 @@ nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
while (m_head) {
m = m_head;
m_head = m_head->m_nextpkt;
- error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);
+ error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
}
/*
* XXX
- * note that intermediate errors are blindly ignored - but this is
- * the same convention as used with nd6_output when called by
- * nd6_cache_lladdr
+ * note that intermediate errors are blindly ignored
*/
return (error);
}
-
-int
+static int
nd6_need_cache(struct ifnet *ifp)
{
/*
@@ -2167,19 +2478,9 @@ nd6_need_cache(struct ifnet *ifp)
case IFT_ETHER:
case IFT_FDDI:
case IFT_IEEE1394:
-#ifdef IFT_L2VLAN
case IFT_L2VLAN:
-#endif
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
- case IFT_GIF: /* XXX need more cases? */
- case IFT_PPP:
- case IFT_TUNNEL:
case IFT_BRIDGE:
case IFT_PROPVIRTUAL:
return (1);
@@ -2189,75 +2490,76 @@ nd6_need_cache(struct ifnet *ifp)
}
/*
- * the callers of this function need to be re-worked to drop
- * the lle lock, drop here for now
+ * Add pernament ND6 link-layer record for given
+ * interface address.
+ *
+ * Very similar to IPv4 arp_ifinit(), but:
+ * 1) IPv6 DAD is performed in different place
+ * 2) It is called by IPv6 protocol stack in contrast to
+ * arp_ifinit() which is typically called in SIOCSIFADDR
+ * driver ioctl handler.
+ *
*/
int
-nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, u_char *desten, struct llentry **lle)
+nd6_add_ifa_lle(struct in6_ifaddr *ia)
{
- struct llentry *ln;
+ struct ifnet *ifp;
+ struct llentry *ln, *ln_tmp;
+ struct sockaddr *dst;
- *lle = NULL;
- IF_AFDATA_UNLOCK_ASSERT(ifp);
- if (m != NULL && m->m_flags & M_MCAST) {
- int i;
+ ifp = ia->ia_ifa.ifa_ifp;
+ if (nd6_need_cache(ifp) == 0)
+ return (0);
- switch (ifp->if_type) {
- case IFT_ETHER:
- case IFT_FDDI:
-#ifdef IFT_L2VLAN
- case IFT_L2VLAN:
-#endif
-#ifdef IFT_IEEE80211
- case IFT_IEEE80211:
-#endif
- case IFT_BRIDGE:
- case IFT_ISO88025:
- ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
- desten);
- return (0);
- case IFT_IEEE1394:
- /*
- * netbsd can use if_broadcastaddr, but we don't do so
- * to reduce # of ifdef.
- */
- for (i = 0; i < ifp->if_addrlen; i++)
- desten[i] = ~0;
- return (0);
- case IFT_ARCNET:
- *desten = 0;
- return (0);
- default:
- m_freem(m);
- return (EAFNOSUPPORT);
- }
- }
+ ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
+ dst = (struct sockaddr *)&ia->ia_addr;
+ ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst);
+ if (ln == NULL)
+ return (ENOBUFS);
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(ln);
+ /* Unlink any entry if exists */
+ ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst);
+ if (ln_tmp != NULL)
+ lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
+ lltable_link_entry(LLTABLE6(ifp), ln);
+ IF_AFDATA_WUNLOCK(ifp);
- /*
- * the entry should have been created in nd6_store_lladdr
- */
- IF_AFDATA_RLOCK(ifp);
- ln = lla_lookup(LLTABLE6(ifp), 0, dst);
- IF_AFDATA_RUNLOCK(ifp);
- if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
- if (ln != NULL)
- LLE_RUNLOCK(ln);
- /* this could happen, if we could not allocate memory */
- m_freem(m);
- return (1);
- }
+ if (ln_tmp != NULL)
+ EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED);
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
+
+ LLE_WUNLOCK(ln);
+ if (ln_tmp != NULL)
+ llentry_free(ln_tmp);
- bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
- *lle = ln;
- LLE_RUNLOCK(ln);
- /*
- * A *small* use after free race exists here
- */
return (0);
}
+/*
+ * Removes either all lle entries for given @ia, or lle
+ * corresponding to @ia address.
+ */
+void
+nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
+{
+ struct sockaddr_in6 mask, addr;
+ struct sockaddr *saddr, *smask;
+ struct ifnet *ifp;
+
+ ifp = ia->ia_ifa.ifa_ifp;
+ memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
+ memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
+ saddr = (struct sockaddr *)&addr;
+ smask = (struct sockaddr *)&mask;
+
+ if (all != 0)
+ lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC);
+ else
+ lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
+}
+
static void
clear_llinfo_pqueue(struct llentry *ln)
{
@@ -2269,22 +2571,24 @@ clear_llinfo_pqueue(struct llentry *ln)
}
ln->la_hold = NULL;
- return;
}
static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
-#ifdef SYSCTL_DECL
+
SYSCTL_DECL(_net_inet6_icmp6);
-#endif
-SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
- CTLFLAG_RD, nd6_sysctl_drlist, "");
-SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
- CTLFLAG_RD, nd6_sysctl_prlist, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
- CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
- CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
+SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter",
+ "NDP default router list");
+SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
+ "NDP prefix list");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
+SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
static int
nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
@@ -2293,30 +2597,33 @@ nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
struct nd_defrouter *dr;
int error;
- if (req->newptr)
+ if (req->newptr != NULL)
return (EPERM);
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
bzero(&d, sizeof(d));
d.rtaddr.sin6_family = AF_INET6;
d.rtaddr.sin6_len = sizeof(d.rtaddr);
- /*
- * XXX locking
- */
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
d.rtaddr.sin6_addr = dr->rtaddr;
error = sa6_recoverscope(&d.rtaddr);
if (error != 0)
- return (error);
- d.flags = dr->flags;
+ break;
+ d.flags = dr->raflags;
d.rtlifetime = dr->rtlifetime;
- d.expire = dr->expire;
+ d.expire = dr->expire + (time_second - time_uptime);
d.if_index = dr->ifp->if_index;
error = SYSCTL_OUT(req, &d, sizeof(d));
if (error != 0)
- return (error);
+ break;
}
- return (0);
+ ND6_RUNLOCK();
+ return (error);
}
static int
@@ -2333,15 +2640,17 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
if (req->newptr)
return (EPERM);
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
bzero(&p, sizeof(p));
p.origin = PR_ORIG_RA;
bzero(&s6, sizeof(s6));
s6.sin6_family = AF_INET6;
s6.sin6_len = sizeof(s6);
- /*
- * XXX locking
- */
+ ND6_RLOCK();
LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
p.prefix = pr->ndpr_prefix;
if (sa6_recoverscope(&p.prefix)) {
@@ -2362,7 +2671,8 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
p.expire = pr->ndpr_lastupdate +
- pr->ndpr_vltime;
+ pr->ndpr_vltime +
+ (time_second - time_uptime);
else
p.expire = maxexpire;
}
@@ -2373,7 +2683,7 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
p.advrtrs++;
error = SYSCTL_OUT(req, &p, sizeof(p));
if (error != 0)
- return (error);
+ break;
LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
s6.sin6_addr = pfr->router->rtaddr;
if (sa6_recoverscope(&s6))
@@ -2382,8 +2692,9 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
ip6_sprintf(ip6buf, &pfr->router->rtaddr));
error = SYSCTL_OUT(req, &s6, sizeof(s6));
if (error != 0)
- return (error);
+ break;
}
}
- return (0);
+ ND6_RUNLOCK();
+ return (error);
}
diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h
index 94202e10..33ac4386 100644
--- a/freebsd/sys/netinet6/nd6.h
+++ b/freebsd/sys/netinet6/nd6.h
@@ -87,9 +87,7 @@ struct nd_ifinfo {
#define ND6_IFF_AUTO_LINKLOCAL 0x20
#define ND6_IFF_NO_RADR 0x40
#define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */
-
-#define ND6_CREATE LLE_CREATE
-#define ND6_EXCLUSIVE LLE_EXCLUSIVE
+#define ND6_IFF_NO_DAD 0x100
#ifdef _KERNEL
#define ND_IFINFO(ifp) \
@@ -234,14 +232,15 @@ struct in6_ndifreq {
((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
TAILQ_HEAD(nd_drhead, nd_defrouter);
-struct nd_defrouter {
+struct nd_defrouter {
TAILQ_ENTRY(nd_defrouter) dr_entry;
- struct in6_addr rtaddr;
- u_char flags; /* flags on RA message */
+ struct in6_addr rtaddr;
+ u_char raflags; /* flags on RA message */
u_short rtlifetime;
u_long expire;
- struct ifnet *ifp;
+ struct ifnet *ifp;
int installed; /* is installed into kernel routing table */
+ u_int refcnt;
};
struct nd_prefixctl {
@@ -317,6 +316,10 @@ struct nd_pfxrouter {
LIST_HEAD(nd_prhead, nd_prefix);
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_IP6NDP);
+#endif
+
/* nd6.c */
VNET_DECLARE(int, nd6_prune);
VNET_DECLARE(int, nd6_delay);
@@ -341,10 +344,20 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
#define V_nd6_debug VNET(nd6_debug)
#define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861)
-#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
+/* Lock for the prefix and default router lists. */
+VNET_DECLARE(struct rwlock, nd6_lock);
+#define V_nd6_lock VNET(nd6_lock)
+
+#define ND6_RLOCK() rw_rlock(&V_nd6_lock)
+#define ND6_RUNLOCK() rw_runlock(&V_nd6_lock)
+#define ND6_WLOCK() rw_wlock(&V_nd6_lock)
+#define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock)
+#define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED)
+#define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED)
+#define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED)
+#define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED)
-VNET_DECLARE(struct callout, nd6_timer_ch);
-#define V_nd6_timer_ch VNET(nd6_timer_ch)
+#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
/* nd6_rtr.c */
VNET_DECLARE(int, nd6_defifindex);
@@ -359,7 +372,7 @@ VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */
#define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance)
union nd_opts {
- struct nd_opt_hdr *nd_opt_array[8]; /* max = target address list */
+ struct nd_opt_hdr *nd_opt_array[16]; /* max = ND_OPT_NONCE */
struct {
struct nd_opt_hdr *zero;
struct nd_opt_hdr *src_lladdr;
@@ -367,6 +380,16 @@ union nd_opts {
struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
struct nd_opt_rd_hdr *rh;
struct nd_opt_mtu *mtu;
+ struct nd_opt_hdr *__res6;
+ struct nd_opt_hdr *__res7;
+ struct nd_opt_hdr *__res8;
+ struct nd_opt_hdr *__res9;
+ struct nd_opt_hdr *__res10;
+ struct nd_opt_hdr *__res11;
+ struct nd_opt_hdr *__res12;
+ struct nd_opt_hdr *__res13;
+ struct nd_opt_nonce *nonce;
+ struct nd_opt_hdr *__res15;
struct nd_opt_hdr *search; /* multiple opts */
struct nd_opt_hdr *last; /* multiple opts */
int done;
@@ -379,6 +402,7 @@ union nd_opts {
#define nd_opts_pi_end nd_opt_each.pi_end
#define nd_opts_rh nd_opt_each.rh
#define nd_opts_mtu nd_opt_each.mtu
+#define nd_opts_nonce nd_opt_each.nonce
#define nd_opts_search nd_opt_each.search
#define nd_opts_last nd_opt_each.last
#define nd_opts_done nd_opt_each.done
@@ -390,34 +414,32 @@ void nd6_init(void);
void nd6_destroy(void);
#endif
struct nd_ifinfo *nd6_ifattach(struct ifnet *);
-void nd6_ifdetach(struct nd_ifinfo *);
-int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *);
+void nd6_ifdetach(struct ifnet *, struct nd_ifinfo *);
+int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *);
void nd6_option_init(void *, int, union nd_opts *);
struct nd_opt_hdr *nd6_option(union nd_opts *);
int nd6_options(union nd_opts *);
-struct llentry *nd6_lookup(struct in6_addr *, int, struct ifnet *);
+struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *);
+struct llentry *nd6_alloc(const struct in6_addr *, int, struct ifnet *);
void nd6_setmtu(struct ifnet *);
-void nd6_llinfo_settimer(struct llentry *, long);
-void nd6_llinfo_settimer_locked(struct llentry *, long);
+void nd6_llinfo_setstate(struct llentry *lle, int newstate);
void nd6_timer(void *);
void nd6_purge(struct ifnet *);
-void nd6_nud_hint(struct rtentry *, struct in6_addr *, int);
-int nd6_resolve(struct ifnet *, struct rtentry *, struct mbuf *,
- struct sockaddr *, u_char *);
-void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags);
+int nd6_resolve(struct ifnet *, int, struct mbuf *,
+ const struct sockaddr *, u_char *, uint32_t *, struct llentry **);
int nd6_ioctl(u_long, caddr_t, struct ifnet *);
-struct llentry *nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
+void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
char *, int, int, int);
-int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct rtentry *);
-int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct rtentry *, struct llentry *,
- struct mbuf **);
-int nd6_output_flush(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct route *);
-int nd6_need_cache(struct ifnet *);
-int nd6_storelladdr(struct ifnet *, struct mbuf *,
- struct sockaddr *, u_char *, struct llentry **);
+void nd6_grab_holdchain(struct llentry *, struct mbuf **,
+ struct sockaddr_in6 *);
+int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *);
+int nd6_add_ifa_lle(struct in6_ifaddr *);
+void nd6_rem_ifa_lle(struct in6_ifaddr *, int);
+int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *, struct route *);
/* nd6_nbr.c */
void nd6_na_input(struct mbuf *, int, int);
@@ -425,24 +447,28 @@ void nd6_na_output(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, u_long, int, struct sockaddr *);
void nd6_ns_input(struct mbuf *, int, int);
void nd6_ns_output(struct ifnet *, const struct in6_addr *,
- const struct in6_addr *, struct llentry *, int);
+ const struct in6_addr *, const struct in6_addr *, uint8_t *);
caddr_t nd6_ifptomac(struct ifnet *);
+void nd6_dad_init(void);
void nd6_dad_start(struct ifaddr *, int);
void nd6_dad_stop(struct ifaddr *);
-void nd6_dad_duplicated(struct ifaddr *);
/* nd6_rtr.c */
void nd6_rs_input(struct mbuf *, int, int);
void nd6_ra_input(struct mbuf *, int, int);
-void prelist_del(struct nd_prefix *);
void defrouter_reset(void);
void defrouter_select(void);
-void defrtrlist_del(struct nd_defrouter *);
+void defrouter_ref(struct nd_defrouter *);
+void defrouter_rele(struct nd_defrouter *);
+bool defrouter_remove(struct in6_addr *, struct ifnet *);
+void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *);
+void defrouter_del(struct nd_defrouter *);
void prelist_remove(struct nd_prefix *);
int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
struct nd_prefix **);
void pfxlist_onlink_check(void);
struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
+struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);
struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
void rt6_flush(struct in6_addr *, struct ifnet *);
int nd6_setdefaultiface(int);
diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c
index cb765549..df50fa93 100644
--- a/freebsd/sys/netinet6/nd6_nbr.c
+++ b/freebsd/sys/netinet6/nd6_nbr.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
+#include <sys/libkern.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
#include <sys/mbuf.h>
@@ -50,9 +51,11 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/errno.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/callout.h>
+#include <sys/refcount.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -62,11 +65,11 @@ __FBSDID("$FreeBSD$");
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <net/if_llatbl.h>
-#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
#include <netinet/ip6.h>
@@ -80,19 +83,32 @@ __FBSDID("$FreeBSD$");
#define SDL(s) ((struct sockaddr_dl *)s)
struct dadq;
-static struct dadq *nd6_dad_find(struct ifaddr *);
-static void nd6_dad_starttimer(struct dadq *, int);
+static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
+static void nd6_dad_add(struct dadq *dp);
+static void nd6_dad_del(struct dadq *dp);
+static void nd6_dad_rele(struct dadq *);
+static void nd6_dad_starttimer(struct dadq *, int, int);
static void nd6_dad_stoptimer(struct dadq *);
static void nd6_dad_timer(struct dadq *);
-static void nd6_dad_ns_output(struct dadq *, struct ifaddr *);
-static void nd6_dad_ns_input(struct ifaddr *);
+static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
+static void nd6_dad_ns_output(struct dadq *);
+static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
static void nd6_dad_na_input(struct ifaddr *);
static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, u_long, int, struct sockaddr *, u_int);
+static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
+ const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int);
-VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/
-VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */
-#define V_dad_ignore_ns VNET(dad_ignore_ns)
+static VNET_DEFINE(int, dad_enhanced) = 1;
+#define V_dad_enhanced VNET(dad_enhanced)
+
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(dad_enhanced), 0,
+ "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
+
+static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to
+ transmit DAD packet */
#define V_dad_maxtry VNET(dad_maxtry)
/*
@@ -229,42 +245,40 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
/* (1) and (3) check. */
if (ifp->if_carp)
ifa = (*carp_iamatch6_p)(ifp, &taddr6);
- if (ifa == NULL)
+ else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/* (2) check. */
if (ifa == NULL) {
- struct route_in6 ro;
- int need_proxy;
+ struct sockaddr_dl rt_gateway;
+ struct rt_addrinfo info;
+ struct sockaddr_in6 dst6;
- bzero(&ro, sizeof(ro));
- ro.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
- ro.ro_dst.sin6_family = AF_INET6;
- ro.ro_dst.sin6_addr = taddr6;
+ bzero(&dst6, sizeof(dst6));
+ dst6.sin6_len = sizeof(struct sockaddr_in6);
+ dst6.sin6_family = AF_INET6;
+ dst6.sin6_addr = taddr6;
+
+ bzero(&rt_gateway, sizeof(rt_gateway));
+ rt_gateway.sdl_len = sizeof(rt_gateway);
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
/* Always use the default FIB. */
-#ifdef RADIX_MPATH
- rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE,
- RT_DEFAULT_FIB);
-#else
- in6_rtalloc(&ro, RT_DEFAULT_FIB);
-#endif
- need_proxy = (ro.ro_rt &&
- (ro.ro_rt->rt_flags & RTF_ANNOUNCE) != 0 &&
- ro.ro_rt->rt_gateway->sa_family == AF_LINK);
- if (ro.ro_rt != NULL) {
- if (need_proxy)
- proxydl = *SDL(ro.ro_rt->rt_gateway);
- RTFREE(ro.ro_rt);
- }
- if (need_proxy) {
- /*
- * proxy NDP for single entry
- */
- ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
- IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
- if (ifa)
- proxy = 1;
+ if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6,
+ 0, 0, &info) == 0) {
+ if ((info.rti_flags & RTF_ANNOUNCE) != 0 &&
+ rt_gateway.sdl_family == AF_LINK) {
+
+ /*
+ * proxy NDP for single entry
+ */
+ proxydl = *SDL(&rt_gateway);
+ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(
+ ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
+ if (ifa)
+ proxy = 1;
+ }
}
}
if (ifa == NULL) {
@@ -316,7 +330,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
* silently ignore it.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
- nd6_dad_ns_input(ifa);
+ nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce);
goto freeit;
}
@@ -377,12 +391,14 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
* Based on RFC 2461
* Based on RFC 2462 (duplicate address detection)
*
- * ln - for source address determination
- * dad - duplicate address detection
+ * ln - for source address determination
+ * nonce - If non-NULL, NS is used for duplicate address detection and
+ * the value (length is ND_OPT_NONCE_LEN) is used as a random nonce.
*/
-void
-nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
- const struct in6_addr *taddr6, struct llentry *ln, int dad)
+static void
+nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
+ const struct in6_addr *daddr6, const struct in6_addr *taddr6,
+ uint8_t *nonce, u_int fibnum)
{
struct mbuf *m;
struct m_tag *mtag;
@@ -392,7 +408,6 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
int icmp6len;
int maxlen;
caddr_t mac;
- struct route_in6 ro;
if (IN6_IS_ADDR_MULTICAST(taddr6))
return;
@@ -400,27 +415,17 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_ns);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
- if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
- printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES "
- "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
- return;
- }
+ KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+ "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+ __func__, max_linkhdr, maxlen, MCLBYTES));
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m && max_linkhdr + maxlen >= MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+ if (max_linkhdr + maxlen > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- m->m_pkthdr.rcvif = NULL;
-
- bzero(&ro, sizeof(ro));
+ M_SETFIB(m, fibnum);
if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) {
m->m_flags |= M_MCAST;
@@ -431,7 +436,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
icmp6len = sizeof(*nd_ns);
m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
- m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */
+ m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */
/* fill neighbor solicitation packet */
ip6 = mtod(m, struct ip6_hdr *);
@@ -453,8 +458,8 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
goto bad;
}
- if (!dad) {
- struct ifaddr *ifa;
+ if (nonce == NULL) {
+ struct ifaddr *ifa = NULL;
/*
* RFC2461 7.2.2:
@@ -466,60 +471,33 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
* interface should be used."
*
* We use the source address for the prompting packet
- * (saddr6), if:
- * - saddr6 is given from the caller (by giving "ln"), and
- * - saddr6 belongs to the outgoing interface.
+ * (saddr6), if saddr6 belongs to the outgoing interface.
* Otherwise, we perform the source address selection as usual.
*/
- struct in6_addr *hsrc;
- hsrc = NULL;
- if (ln != NULL) {
- LLE_RLOCK(ln);
- if (ln->la_hold != NULL) {
- struct ip6_hdr *hip6; /* hold ip6 */
-
- /*
- * assuming every packet in la_hold has the same IP
- * header
- */
- hip6 = mtod(ln->la_hold, struct ip6_hdr *);
- /* XXX pullup? */
- if (sizeof(*hip6) < ln->la_hold->m_len) {
- ip6->ip6_src = hip6->ip6_src;
- hsrc = &hip6->ip6_src;
- }
- }
- LLE_RUNLOCK(ln);
- }
- if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
- hsrc)) != NULL) {
+ if (saddr6 != NULL)
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6);
+ if (ifa != NULL) {
/* ip6_src set already. */
+ ip6->ip6_src = *saddr6;
ifa_free(ifa);
} else {
int error;
- struct sockaddr_in6 dst_sa;
- struct in6_addr src_in;
- struct ifnet *oifp;
-
- bzero(&dst_sa, sizeof(dst_sa));
- dst_sa.sin6_family = AF_INET6;
- dst_sa.sin6_len = sizeof(dst_sa);
- dst_sa.sin6_addr = ip6->ip6_dst;
-
- oifp = ifp;
- error = in6_selectsrc(&dst_sa, NULL,
- NULL, &ro, NULL, &oifp, &src_in);
+ struct in6_addr dst6, src6;
+ uint32_t scopeid;
+
+ in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
+ error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+ scopeid, ifp, &src6, NULL);
if (error) {
char ip6buf[INET6_ADDRSTRLEN];
- nd6log((LOG_DEBUG,
- "nd6_ns_output: source can't be "
- "determined: dst=%s, error=%d\n",
- ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
+ nd6log((LOG_DEBUG, "%s: source can't be "
+ "determined: dst=%s, error=%d\n", __func__,
+ ip6_sprintf(ip6buf, &dst6),
error));
goto bad;
}
- ip6->ip6_src = src_in;
+ ip6->ip6_src = src6;
}
} else {
/*
@@ -550,7 +528,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
* Multicast NS MUST add one add the option
* Unicast NS SHOULD add one add the option
*/
- if (!dad && (mac = nd6_ifptomac(ifp))) {
+ if (nonce == NULL && (mac = nd6_ifptomac(ifp))) {
int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
/* 8 byte alignments... */
@@ -564,7 +542,26 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
nd_opt->nd_opt_len = optlen >> 3;
bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
}
+ /*
+ * Add a Nonce option (RFC 3971) to detect looped back NS messages.
+ * This behavior is documented as Enhanced Duplicate Address
+ * Detection in RFC 7527.
+ * net.inet6.ip6.dad_enhanced=0 disables this.
+ */
+ if (V_dad_enhanced != 0 && nonce != NULL) {
+ int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN;
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
+ /* 8-byte alignment is required. */
+ optlen = (optlen + 7) & ~7;
+ m->m_pkthdr.len += optlen;
+ m->m_len += optlen;
+ icmp6len += optlen;
+ bzero((caddr_t)nd_opt, optlen);
+ nd_opt->nd_opt_type = ND_OPT_NONCE;
+ nd_opt->nd_opt_len = optlen >> 3;
+ bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN);
+ }
ip6->ip6_plen = htons((u_short)icmp6len);
nd_ns->nd_ns_cksum = 0;
nd_ns->nd_ns_cksum =
@@ -579,24 +576,27 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
m_tag_prepend(m, mtag);
}
- ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL);
+ ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0,
+ &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
- /* We don't cache this route. */
- RO_RTFREE(&ro);
-
return;
bad:
- if (ro.ro_rt) {
- RTFREE(ro.ro_rt);
- }
m_freem(m);
- return;
}
+#ifndef BURN_BRIDGES
+void
+nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6,
+ const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce)
+{
+
+ nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB);
+}
+#endif
/*
* Neighbor advertisement input handling.
*
@@ -626,8 +626,10 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
struct llentry *ln = NULL;
union nd_opts ndopts;
struct mbuf *chain = NULL;
- struct m_tag *mtag;
struct sockaddr_in6 sin6;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
if (ip6->ip6_hlim != 255) {
@@ -653,6 +655,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
+ memset(&sin6, 0, sizeof(sin6));
taddr6 = nd_na->nd_na_target;
if (in6_setscope(&taddr6, ifp, NULL))
@@ -685,7 +688,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
}
- ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+ /*
+ * This effectively disables the DAD check on a non-master CARP
+ * address.
+ */
+ if (ifp->if_carp)
+ ifa = (*carp_iamatch6_p)(ifp, &taddr6);
+ else
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/*
* Target address matches one of my interface address.
@@ -742,20 +752,21 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
/*
* Record link-layer address, and update the state.
*/
- bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
- ln->la_flags |= LLE_VALID;
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
- if (is_solicited) {
- ln->ln_state = ND6_LLINFO_REACHABLE;
- ln->ln_byhint = 0;
- if (!ND6_LLINFO_PERMANENT(ln)) {
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz);
- }
- } else {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+
+ if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off) == 0) {
+ ln = NULL;
+ goto freeit;
}
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
+ if (is_solicited)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
+ else
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
if ((ln->ln_router = is_router) != 0) {
/*
* This means a router's state has changed from
@@ -774,7 +785,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
llchange = 0;
else {
if (ln->la_flags & LLE_VALID) {
- if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
+ if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
llchange = 1;
else
llchange = 0;
@@ -806,10 +817,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* If state is REACHABLE, make it STALE.
* no other updates should be done.
*/
- if (ln->ln_state == ND6_LLINFO_REACHABLE) {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
- }
+ if (ln->ln_state == ND6_LLINFO_REACHABLE)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
goto freeit;
} else if (is_override /* (2a) */
|| (!is_override && (lladdr != NULL && !llchange)) /* (2b) */
@@ -818,8 +827,15 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* Update link-local address, if any.
*/
if (lladdr != NULL) {
- bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
- ln->la_flags |= LLE_VALID;
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ goto freeit;
+ if (lltable_try_set_entry_addr(ifp, ln, linkhdr,
+ linkhdrsize, lladdr_off) == 0) {
+ ln = NULL;
+ goto freeit;
+ }
EVENTHANDLER_INVOKE(lle_event, ln,
LLENTRY_RESOLVED);
}
@@ -829,19 +845,11 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* If not solicited and the link-layer address was
* changed, make it STALE.
*/
- if (is_solicited) {
- ln->ln_state = ND6_LLINFO_REACHABLE;
- ln->ln_byhint = 0;
- if (!ND6_LLINFO_PERMANENT(ln)) {
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(ifp)->reachable * hz);
- }
- } else {
- if (lladdr != NULL && llchange) {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln,
- (long)V_nd6_gctimer * hz);
- }
+ if (is_solicited)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
+ else {
+ if (lladdr != NULL && llchange)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
}
}
@@ -851,31 +859,19 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* Remove the sender from the Default Router List and
* update the Destination Cache entries.
*/
- struct nd_defrouter *dr;
- struct in6_addr *in6;
-
- in6 = &L3_ADDR_SIN6(ln)->sin6_addr;
+ struct ifnet *nd6_ifp;
- /*
- * Lock to protect the default router list.
- * XXX: this might be unnecessary, since this function
- * is only called under the network software interrupt
- * context. However, we keep it just for safety.
- */
- dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
- if (dr)
- defrtrlist_del(dr);
- else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
- ND6_IFF_ACCEPT_RTADV) {
+ nd6_ifp = lltable_get_ifp(ln->lle_tbl);
+ if (!defrouter_remove(&ln->r_l3addr.addr6, nd6_ifp) &&
+ (ND_IFINFO(nd6_ifp)->flags &
+ ND6_IFF_ACCEPT_RTADV) != 0)
/*
* Even if the neighbor is not in the default
- * router list, the neighbor may be used
- * as a next hop for some destinations
- * (e.g. redirect case). So we must
- * call rt6_flush explicitly.
+ * router list, the neighbor may be used as a
+ * next hop for some destinations (e.g. redirect
+ * case). So we must call rt6_flush explicitly.
*/
rt6_flush(&ip6->ip6_src, ifp);
- }
}
ln->ln_router = is_router;
}
@@ -884,43 +880,15 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* rt->rt_flags &= ~RTF_REJECT;
*/
ln->la_asked = 0;
- if (ln->la_hold) {
- struct mbuf *m_hold, *m_hold_next;
-
- /*
- * reset the la_hold in advance, to explicitly
- * prevent a la_hold lookup in nd6_output()
- * (wouldn't happen, though...)
- */
- for (m_hold = ln->la_hold, ln->la_hold = NULL;
- m_hold; m_hold = m_hold_next) {
- m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
- /*
- * we assume ifp is not a loopback here, so just set
- * the 2nd argument as the 1st one.
- */
-
- if (send_sendso_input_hook != NULL) {
- mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
- sizeof(unsigned short), M_NOWAIT);
- if (mtag == NULL)
- goto bad;
- m_tag_prepend(m, mtag);
- }
-
- nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
- }
- }
+ if (ln->la_hold != NULL)
+ nd6_grab_holdchain(ln, &chain, &sin6);
freeit:
- if (ln != NULL) {
- if (chain)
- memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
+ if (ln != NULL)
LLE_WUNLOCK(ln);
- if (chain)
- nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
- }
+ if (chain != NULL)
+ nd6_flush_holdchain(ifp, ifp, chain, &sin6);
+
if (checklink)
pfxlist_onlink_check();
@@ -954,42 +922,30 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
{
struct mbuf *m;
struct m_tag *mtag;
- struct ifnet *oifp;
struct ip6_hdr *ip6;
struct nd_neighbor_advert *nd_na;
struct ip6_moptions im6o;
- struct in6_addr src, daddr6;
- struct sockaddr_in6 dst_sa;
+ struct in6_addr daddr6, dst6, src6;
+ uint32_t scopeid;
+
int icmp6len, maxlen, error;
caddr_t mac = NULL;
- struct route_in6 ro;
-
- bzero(&ro, sizeof(ro));
daddr6 = *daddr6_0; /* make a local copy for modification */
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_na);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
- if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
- printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES "
- "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
- return;
- }
+ KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+ "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+ __func__, max_linkhdr, maxlen, MCLBYTES));
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m && max_linkhdr + maxlen >= MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+ if (max_linkhdr + maxlen > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- m->m_pkthdr.rcvif = NULL;
M_SETFIB(m, fibnum);
if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
@@ -1001,7 +957,7 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
icmp6len = sizeof(*nd_na);
m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
- m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */
+ m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */
/* fill neighbor advertisement packet */
ip6 = mtod(m, struct ip6_hdr *);
@@ -1023,25 +979,21 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
flags &= ~ND_NA_FLAG_SOLICITED;
}
ip6->ip6_dst = daddr6;
- bzero(&dst_sa, sizeof(struct sockaddr_in6));
- dst_sa.sin6_family = AF_INET6;
- dst_sa.sin6_len = sizeof(struct sockaddr_in6);
- dst_sa.sin6_addr = daddr6;
/*
* Select a source whose scope is the same as that of the dest.
*/
- bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa));
- oifp = ifp;
- error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src);
+ in6_splitscope(&daddr6, &dst6, &scopeid);
+ error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+ scopeid, ifp, &src6, NULL);
if (error) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
"determined: dst=%s, error=%d\n",
- ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error));
+ ip6_sprintf(ip6buf, &daddr6), error));
goto bad;
}
- ip6->ip6_src = src;
+ ip6->ip6_src = src6;
nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
nd_na->nd_na_code = 0;
@@ -1104,22 +1056,15 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
m_tag_prepend(m, mtag);
}
- ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL);
+ ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]);
- /* We don't cache this route. */
- RO_RTFREE(&ro);
-
return;
bad:
- if (ro.ro_rt) {
- RTFREE(ro.ro_rt);
- }
m_freem(m);
- return;
}
#ifndef BURN_BRIDGES
@@ -1142,15 +1087,8 @@ nd6_ifptomac(struct ifnet *ifp)
case IFT_ETHER:
case IFT_FDDI:
case IFT_IEEE1394:
-#ifdef IFT_L2VLAN
case IFT_L2VLAN:
-#endif
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
case IFT_BRIDGE:
case IFT_ISO88025:
@@ -1168,31 +1106,80 @@ struct dadq {
int dad_ns_ocount; /* NS sent so far */
int dad_ns_icount;
int dad_na_icount;
+ int dad_ns_lcount; /* looped back NS */
+ int dad_loopbackprobe; /* probing state for loopback detection */
struct callout dad_timer_ch;
struct vnet *dad_vnet;
+ u_int dad_refcnt;
+#define ND_OPT_NONCE_LEN32 \
+ ((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t))
+ uint32_t dad_nonce[ND_OPT_NONCE_LEN32];
};
static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
-VNET_DEFINE(int, dad_init) = 0;
-#define V_dadq VNET(dadq)
-#define V_dad_init VNET(dad_init)
+static VNET_DEFINE(struct rwlock, dad_rwlock);
+#define V_dadq VNET(dadq)
+#define V_dad_rwlock VNET(dad_rwlock)
+
+#define DADQ_RLOCK() rw_rlock(&V_dad_rwlock)
+#define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock)
+#define DADQ_WLOCK() rw_wlock(&V_dad_rwlock)
+#define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock)
+
+static void
+nd6_dad_add(struct dadq *dp)
+{
+
+ DADQ_WLOCK();
+ TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
+ DADQ_WUNLOCK();
+}
+
+static void
+nd6_dad_del(struct dadq *dp)
+{
+
+ DADQ_WLOCK();
+ TAILQ_REMOVE(&V_dadq, dp, dad_list);
+ DADQ_WUNLOCK();
+ nd6_dad_rele(dp);
+}
static struct dadq *
-nd6_dad_find(struct ifaddr *ifa)
+nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
{
struct dadq *dp;
- TAILQ_FOREACH(dp, &V_dadq, dad_list)
- if (dp->dad_ifa == ifa)
- return (dp);
+ DADQ_RLOCK();
+ TAILQ_FOREACH(dp, &V_dadq, dad_list) {
+ if (dp->dad_ifa != ifa)
+ continue;
+ /*
+ * Skip if the nonce matches the received one.
+ * +2 in the length is required because of type and
+ * length fields are included in a header.
+ */
+ if (n != NULL &&
+ n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
+ memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
+ ND_OPT_NONCE_LEN) == 0) {
+ dp->dad_ns_lcount++;
+ continue;
+ }
+ refcount_acquire(&dp->dad_refcnt);
+ break;
+ }
+ DADQ_RUNLOCK();
- return (NULL);
+ return (dp);
}
static void
-nd6_dad_starttimer(struct dadq *dp, int ticks)
+nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns)
{
+ if (send_ns != 0)
+ nd6_dad_ns_output(dp);
callout_reset(&dp->dad_timer_ch, ticks,
(void (*)(void *))nd6_dad_timer, (void *)dp);
}
@@ -1201,7 +1188,25 @@ static void
nd6_dad_stoptimer(struct dadq *dp)
{
- callout_stop(&dp->dad_timer_ch);
+ callout_drain(&dp->dad_timer_ch);
+}
+
+static void
+nd6_dad_rele(struct dadq *dp)
+{
+
+ if (refcount_release(&dp->dad_refcnt)) {
+ ifa_free(dp->dad_ifa);
+ free(dp, M_IP6NDP);
+ }
+}
+
+void
+nd6_dad_init(void)
+{
+
+ rw_init(&V_dad_rwlock, "nd6 DAD queue");
+ TAILQ_INIT(&V_dadq);
}
/*
@@ -1214,11 +1219,6 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
- if (!V_dad_init) {
- TAILQ_INIT(&V_dadq);
- V_dad_init++;
- }
-
/*
* If we don't need DAD, don't do it.
* There are several cases:
@@ -1243,17 +1243,26 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
}
if (ifa->ifa_ifp == NULL)
panic("nd6_dad_start: ifa->ifa_ifp == NULL");
- if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
+ if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) {
+ ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
return;
}
- if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)
+ if (!(ifa->ifa_ifp->if_flags & IFF_UP) ||
+ !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
+ (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) {
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
return;
- if (nd6_dad_find(ifa) != NULL) {
- /* DAD already in progress */
+ }
+ if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
+ /*
+ * DAD is already in progress. Let the existing entry
+ * finish it.
+ */
+ nd6_dad_rele(dp);
return;
}
- dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT);
+ dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO);
if (dp == NULL) {
log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
"%s(%s)\n",
@@ -1261,13 +1270,10 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
return;
}
- bzero(dp, sizeof(*dp));
callout_init(&dp->dad_timer_ch, 0);
#ifdef VIMAGE
dp->dad_vnet = curvnet;
#endif
- TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list);
-
nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
@@ -1278,17 +1284,14 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
* (re)initialization.
*/
dp->dad_ifa = ifa;
- ifa_ref(ifa); /* just for safety */
+ ifa_ref(dp->dad_ifa);
dp->dad_count = V_ip6_dad_count;
dp->dad_ns_icount = dp->dad_na_icount = 0;
dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
- if (delay == 0) {
- nd6_dad_ns_output(dp, ifa);
- nd6_dad_starttimer(dp,
- (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
- } else {
- nd6_dad_starttimer(dp, delay);
- }
+ dp->dad_ns_lcount = dp->dad_loopbackprobe = 0;
+ refcount_init(&dp->dad_refcnt, 1);
+ nd6_dad_add(dp);
+ nd6_dad_starttimer(dp, delay, 0);
}
/*
@@ -1299,9 +1302,7 @@ nd6_dad_stop(struct ifaddr *ifa)
{
struct dadq *dp;
- if (!V_dad_init)
- return;
- dp = nd6_dad_find(ifa);
+ dp = nd6_dad_find(ifa, NULL);
if (!dp) {
/* DAD wasn't started yet */
return;
@@ -1309,53 +1310,61 @@ nd6_dad_stop(struct ifaddr *ifa)
nd6_dad_stoptimer(dp);
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
+ /*
+ * The DAD queue entry may have been removed by nd6_dad_timer() while
+ * we were waiting for it to stop, so re-do the lookup.
+ */
+ nd6_dad_rele(dp);
+ if (nd6_dad_find(ifa, NULL) == NULL)
+ return;
+
+ nd6_dad_del(dp);
+ nd6_dad_rele(dp);
}
static void
nd6_dad_timer(struct dadq *dp)
{
CURVNET_SET(dp->dad_vnet);
- int s;
struct ifaddr *ifa = dp->dad_ifa;
+ struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
char ip6buf[INET6_ADDRSTRLEN];
- s = splnet(); /* XXX */
-
/* Sanity check */
if (ia == NULL) {
log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
- goto done;
+ goto err;
+ }
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ /* Do not need DAD for ifdisabled interface. */
+ log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
+ "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
+ goto err;
}
if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
- goto done;
+ goto err;
}
if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
- goto done;
+ goto err;
}
- /* timeouted with IFF_{RUNNING,UP} check */
- if (dp->dad_ns_tcount > V_dad_maxtry) {
- nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
+ /* Stop DAD if the interface is down even after dad_maxtry attempts. */
+ if ((dp->dad_ns_tcount > V_dad_maxtry) &&
+ (((ifp->if_flags & IFF_UP) == 0) ||
+ ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) {
+ nd6log((LOG_INFO, "%s: could not run DAD "
+ "because the interface was down or not running.\n",
if_name(ifa->ifa_ifp)));
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
- goto done;
+ goto err;
}
/* Need more checks? */
@@ -1363,84 +1372,85 @@ nd6_dad_timer(struct dadq *dp)
/*
* We have more NS to go. Send NS packet for DAD.
*/
- nd6_dad_ns_output(dp, ifa);
nd6_dad_starttimer(dp,
- (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
+ (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1);
+ goto done;
} else {
/*
* We have transmitted sufficient number of DAD packets.
* See what we've got.
*/
- int duplicate;
-
- duplicate = 0;
-
- if (dp->dad_na_icount) {
+ if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
+ /* We've seen NS or NA, means DAD has failed. */
+ nd6_dad_duplicated(ifa, dp);
+ else if (V_dad_enhanced != 0 &&
+ dp->dad_ns_lcount > 0 &&
+ dp->dad_ns_lcount > dp->dad_loopbackprobe) {
/*
- * the check is in nd6_dad_na_input(),
- * but just in case
+ * Sec. 4.1 in RFC 7527 requires transmission of
+ * additional probes until the loopback condition
+ * becomes clear when a looped back probe is detected.
*/
- duplicate++;
- }
-
- if (dp->dad_ns_icount) {
- /* We've seen NS, means DAD has failed. */
- duplicate++;
- }
-
- if (duplicate) {
- /* (*dp) will be freed in nd6_dad_duplicated() */
- dp = NULL;
- nd6_dad_duplicated(ifa);
+ log(LOG_ERR, "%s: a looped back NS message is "
+ "detected during DAD for %s. "
+ "Another DAD probes are being sent.\n",
+ if_name(ifa->ifa_ifp),
+ ip6_sprintf(ip6buf, IFA_IN6(ifa)));
+ dp->dad_loopbackprobe = dp->dad_ns_lcount;
+ /*
+ * Send an NS immediately and increase dad_count by
+ * V_nd6_mmaxtries - 1.
+ */
+ dp->dad_count =
+ dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
+ nd6_dad_starttimer(dp,
+ (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000,
+ 1);
+ goto done;
} else {
/*
* We are done with DAD. No NA came, no NS came.
- * No duplicate address found.
+ * No duplicate address found. Check IFDISABLED flag
+ * again in case that it is changed between the
+ * beginning of this function and here.
*/
- ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0)
+ ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
nd6log((LOG_DEBUG,
"%s: DAD complete for %s - no duplicates found\n",
if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
+ if (dp->dad_ns_lcount > 0)
+ log(LOG_ERR, "%s: DAD completed while "
+ "a looped back NS message is detected "
+ "during DAD for %s.\n",
+ if_name(ifa->ifa_ifp),
+ ip6_sprintf(ip6buf, IFA_IN6(ifa)));
}
}
-
+err:
+ nd6_dad_del(dp);
done:
- splx(s);
CURVNET_RESTORE();
}
-void
-nd6_dad_duplicated(struct ifaddr *ifa)
+static void
+nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
{
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct ifnet *ifp;
- struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
- dp = nd6_dad_find(ifa);
- if (dp == NULL) {
- log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n");
- return;
- }
-
log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
- "NS in/out=%d/%d, NA in=%d\n",
+ "NS in/out/loopback=%d/%d/%d, NA in=%d\n",
if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
- dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
+ dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount,
+ dp->dad_na_icount);
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
ia->ia6_flags |= IN6_IFF_DUPLICATED;
- /* We are done with DAD, with duplicate address found. (failure) */
- nd6_dad_stoptimer(dp);
-
ifp = ifa->ifa_ifp;
log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
@@ -1466,9 +1476,7 @@ nd6_dad_duplicated(struct ifaddr *ifa)
case IFT_FDDI:
case IFT_ATM:
case IFT_IEEE1394:
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
case IFT_INFINIBAND:
in6 = ia->ia_addr.sin6_addr;
if (in6_get_hw_ifid(ifp, &in6) == 0 &&
@@ -1481,18 +1489,14 @@ nd6_dad_duplicated(struct ifaddr *ifa)
break;
}
}
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
}
static void
-nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
+nd6_dad_ns_output(struct dadq *dp)
{
- struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
- struct ifnet *ifp = ifa->ifa_ifp;
+ struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa;
+ struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
+ int i;
dp->dad_ns_tcount++;
if ((ifp->if_flags & IFF_UP) == 0) {
@@ -1503,17 +1507,29 @@ nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
}
dp->dad_ns_ocount++;
- nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1);
+ if (V_dad_enhanced != 0) {
+ for (i = 0; i < ND_OPT_NONCE_LEN32; i++)
+ dp->dad_nonce[i] = arc4random();
+ /*
+ * XXXHRS: Note that in the case that
+ * DupAddrDetectTransmits > 1, multiple NS messages with
+ * different nonces can be looped back in an unexpected
+ * order. The current implementation recognizes only
+ * the latest nonce on the sender side. Practically it
+ * should work well in almost all cases.
+ */
+ }
+ nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr,
+ (uint8_t *)&dp->dad_nonce[0]);
}
static void
-nd6_dad_ns_input(struct ifaddr *ifa)
+nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
{
struct in6_ifaddr *ia;
struct ifnet *ifp;
const struct in6_addr *taddr6;
struct dadq *dp;
- int duplicate;
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_ns_input");
@@ -1521,39 +1537,15 @@ nd6_dad_ns_input(struct ifaddr *ifa)
ia = (struct in6_ifaddr *)ifa;
ifp = ifa->ifa_ifp;
taddr6 = &ia->ia_addr.sin6_addr;
- duplicate = 0;
- dp = nd6_dad_find(ifa);
-
- /* Quickhack - completely ignore DAD NS packets */
- if (V_dad_ignore_ns) {
- char ip6buf[INET6_ADDRSTRLEN];
- nd6log((LOG_INFO,
- "nd6_dad_ns_input: ignoring DAD NS packet for "
- "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
- if_name(ifa->ifa_ifp)));
+ /* Ignore Nonce option when Enhanced DAD is disabled. */
+ if (V_dad_enhanced == 0)
+ ndopt_nonce = NULL;
+ dp = nd6_dad_find(ifa, ndopt_nonce);
+ if (dp == NULL)
return;
- }
-
- /*
- * if I'm yet to start DAD, someone else started using this address
- * first. I have a duplicate and you win.
- */
- if (dp == NULL || dp->dad_ns_ocount == 0)
- duplicate++;
-
- /* XXX more checks for loopback situation - see nd6_dad_timer too */
- if (duplicate) {
- dp = NULL; /* will be freed in nd6_dad_duplicated() */
- nd6_dad_duplicated(ifa);
- } else {
- /*
- * not sure if I got a duplicate.
- * increment ns count and see what happens.
- */
- if (dp)
- dp->dad_ns_icount++;
- }
+ dp->dad_ns_icount++;
+ nd6_dad_rele(dp);
}
static void
@@ -1564,10 +1556,9 @@ nd6_dad_na_input(struct ifaddr *ifa)
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_na_input");
- dp = nd6_dad_find(ifa);
- if (dp)
+ dp = nd6_dad_find(ifa, NULL);
+ if (dp != NULL) {
dp->dad_na_icount++;
-
- /* remove the address. */
- nd6_dad_duplicated(ifa);
+ nd6_dad_rele(dp);
+ }
}
diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c
index 8d150ae4..c8d7c0ef 100644
--- a/freebsd/sys/netinet6/nd6_rtr.c
+++ b/freebsd/sys/netinet6/nd6_rtr.c
@@ -41,20 +41,24 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/refcount.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <rtems/bsd/sys/errno.h>
+#include <sys/rmlock.h>
#include <sys/rwlock.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/radix.h>
#include <net/vnet.h>
@@ -89,7 +93,7 @@ static void in6_init_address_ltimes(struct nd_prefix *,
static int nd6_prefix_onlink(struct nd_prefix *);
static int nd6_prefix_offlink(struct nd_prefix *);
-static int rt6_deleteroute(struct radix_node *, void *);
+static int rt6_deleteroute(const struct rtentry *, void *);
VNET_DECLARE(int, nd6_recalc_reachtm_interval);
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
@@ -220,6 +224,8 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
struct nd_defrouter *dr;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ dr = NULL;
+
/*
* We only accept RAs only when the per-interface flag
* ND6_IFF_ACCEPT_RTADV is on the receiving interface.
@@ -272,7 +278,7 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
bzero(&dr0, sizeof(dr0));
dr0.rtaddr = saddr6;
- dr0.flags = nd_ra->nd_ra_flags_reserved;
+ dr0.raflags = nd_ra->nd_ra_flags_reserved;
/*
* Effectively-disable routes from RA messages when
* ND6_IFF_NO_RADR enabled on the receiving interface or
@@ -284,7 +290,7 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
dr0.rtlifetime = 0;
else
dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
- dr0.expire = time_second + dr0.rtlifetime;
+ dr0.expire = time_uptime + dr0.rtlifetime;
dr0.ifp = ifp;
/* unspecified or not? (RFC 2461 6.3.4) */
if (advreachable) {
@@ -369,6 +375,10 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
(void)prelist_update(&pr, dr, m, mcast);
}
}
+ if (dr != NULL) {
+ defrouter_rele(dr);
+ dr = NULL;
+ }
/*
* MTU
@@ -446,10 +456,6 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
m_freem(m);
}
-/*
- * default router list proccessing sub routines
- */
-
/* tell the change to user processes watching the routing socket. */
static void
nd6_rtmsg(int cmd, struct rtentry *rt)
@@ -478,12 +484,15 @@ nd6_rtmsg(int cmd, struct rtentry *rt)
ifa_free(ifa);
}
+/*
+ * default router list processing sub routines
+ */
+
static void
defrouter_addreq(struct nd_defrouter *new)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *newrt = NULL;
- int s;
int error;
bzero(&def, sizeof(def));
@@ -495,7 +504,6 @@ defrouter_addreq(struct nd_defrouter *new)
def.sin6_family = gate.sin6_family = AF_INET6;
gate.sin6_addr = new->rtaddr;
- s = splnet();
error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
(struct sockaddr *)&gate, (struct sockaddr *)&mask,
RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
@@ -505,21 +513,46 @@ defrouter_addreq(struct nd_defrouter *new)
}
if (error == 0)
new->installed = 1;
- splx(s);
- return;
}
struct nd_defrouter *
-defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
{
struct nd_defrouter *dr;
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
+ ND6_LOCK_ASSERT();
+ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
+ if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
+ defrouter_ref(dr);
return (dr);
- }
+ }
+ return (NULL);
+}
+
+struct nd_defrouter *
+defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_RLOCK();
+ dr = defrouter_lookup_locked(addr, ifp);
+ ND6_RUNLOCK();
+ return (dr);
+}
+
+void
+defrouter_ref(struct nd_defrouter *dr)
+{
+
+ refcount_acquire(&dr->refcnt);
+}
+
+void
+defrouter_rele(struct nd_defrouter *dr)
+{
- return (NULL); /* search failed */
+ if (refcount_release(&dr->refcnt))
+ free(dr, M_IP6NDP);
}
/*
@@ -554,15 +587,41 @@ defrouter_delreq(struct nd_defrouter *dr)
}
/*
- * remove all default routes from default router list
+ * Remove all default routes from default router list.
*/
void
defrouter_reset(void)
{
- struct nd_defrouter *dr;
+ struct nd_defrouter *dr, **dra;
+ int count, i;
+
+ count = i = 0;
+ /*
+ * We can't delete routes with the ND lock held, so make a copy of the
+ * current default router list and use that when deleting routes.
+ */
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
- defrouter_delreq(dr);
+ count++;
+ ND6_RUNLOCK();
+
+ dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO);
+
+ ND6_RLOCK();
+ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+ if (i == count)
+ break;
+ defrouter_ref(dr);
+ dra[i++] = dr;
+ }
+ ND6_RUNLOCK();
+
+ for (i = 0; i < count && dra[i] != NULL; i++) {
+ defrouter_delreq(dra[i]);
+ defrouter_rele(dra[i]);
+ }
+ free(dra, M_TEMP);
/*
* XXX should we also nuke any default routers in the kernel, by
@@ -570,12 +629,53 @@ defrouter_reset(void)
*/
}
+/*
+ * Look up a matching default router list entry and remove it. Returns true if a
+ * matching entry was found, false otherwise.
+ */
+bool
+defrouter_remove(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_WLOCK();
+ dr = defrouter_lookup_locked(addr, ifp);
+ if (dr == NULL) {
+ ND6_WUNLOCK();
+ return (false);
+ }
+
+ defrouter_unlink(dr, NULL);
+ ND6_WUNLOCK();
+ defrouter_del(dr);
+ defrouter_rele(dr);
+ return (true);
+}
+
+/*
+ * Remove a router from the global list and optionally stash it in a
+ * caller-supplied queue.
+ *
+ * The ND lock must be held.
+ */
+void
+defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq)
+{
+
+ ND6_WLOCK_ASSERT();
+ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+ if (drq != NULL)
+ TAILQ_INSERT_TAIL(drq, dr, dr_entry);
+}
+
void
-defrtrlist_del(struct nd_defrouter *dr)
+defrouter_del(struct nd_defrouter *dr)
{
struct nd_defrouter *deldr = NULL;
struct nd_prefix *pr;
+ ND6_UNLOCK_ASSERT();
+
/*
* Flush all the routing table entries that use the router
* as a next hop.
@@ -587,7 +687,6 @@ defrtrlist_del(struct nd_defrouter *dr)
deldr = dr;
defrouter_delreq(dr);
}
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
/*
* Also delete all the pointers to the router in each prefix lists.
@@ -607,7 +706,10 @@ defrtrlist_del(struct nd_defrouter *dr)
if (deldr)
defrouter_select();
- free(dr, M_IP6NDP);
+ /*
+ * Release the list reference.
+ */
+ defrouter_rele(dr);
}
/*
@@ -634,16 +736,16 @@ defrtrlist_del(struct nd_defrouter *dr)
void
defrouter_select(void)
{
- int s = splnet();
- struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
+ struct nd_defrouter *dr, *selected_dr, *installed_dr;
struct llentry *ln = NULL;
+ ND6_RLOCK();
/*
* Let's handle easy case (3) first:
* If default router list is empty, there's nothing to be done.
*/
if (TAILQ_EMPTY(&V_nd_defrouter)) {
- splx(s);
+ ND6_RUNLOCK();
return;
}
@@ -652,12 +754,14 @@ defrouter_select(void)
* We just pick up the first reachable one (if any), assuming that
* the ordering rule of the list described in defrtrlist_update().
*/
+ selected_dr = installed_dr = NULL;
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
IF_AFDATA_RLOCK(dr->ifp);
if (selected_dr == NULL &&
(ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln)) {
selected_dr = dr;
+ defrouter_ref(selected_dr);
}
IF_AFDATA_RUNLOCK(dr->ifp);
if (ln != NULL) {
@@ -665,12 +769,15 @@ defrouter_select(void)
ln = NULL;
}
- if (dr->installed && installed_dr == NULL)
- installed_dr = dr;
- else if (dr->installed && installed_dr) {
- /* this should not happen. warn for diagnosis. */
- log(LOG_ERR, "defrouter_select: more than one router"
- " is installed\n");
+ if (dr->installed) {
+ if (installed_dr == NULL) {
+ installed_dr = dr;
+ defrouter_ref(installed_dr);
+ } else {
+ /* this should not happen. warn for diagnosis. */
+ log(LOG_ERR,
+ "defrouter_select: more than one router is installed\n");
+ }
}
}
/*
@@ -682,21 +789,25 @@ defrouter_select(void)
* or when the new one has a really higher preference value.
*/
if (selected_dr == NULL) {
- if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
+ if (installed_dr == NULL ||
+ TAILQ_NEXT(installed_dr, dr_entry) == NULL)
selected_dr = TAILQ_FIRST(&V_nd_defrouter);
else
selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
- } else if (installed_dr) {
+ defrouter_ref(selected_dr);
+ } else if (installed_dr != NULL) {
IF_AFDATA_RLOCK(installed_dr->ifp);
if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln) &&
rtpref(selected_dr) <= rtpref(installed_dr)) {
+ defrouter_rele(selected_dr);
selected_dr = installed_dr;
}
IF_AFDATA_RUNLOCK(installed_dr->ifp);
if (ln != NULL)
LLE_RUNLOCK(ln);
}
+ ND6_RUNLOCK();
/*
* If the selected router is different than the installed one,
@@ -704,13 +815,13 @@ defrouter_select(void)
* Note that the selected router is never NULL here.
*/
if (installed_dr != selected_dr) {
- if (installed_dr)
+ if (installed_dr != NULL) {
defrouter_delreq(installed_dr);
+ defrouter_rele(installed_dr);
+ }
defrouter_addreq(selected_dr);
}
-
- splx(s);
- return;
+ defrouter_rele(selected_dr);
}
/*
@@ -720,7 +831,7 @@ defrouter_select(void)
static int
rtpref(struct nd_defrouter *dr)
{
- switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
+ switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
case ND_RA_FLAG_RTPREF_HIGH:
return (RTPREF_HIGH);
case ND_RA_FLAG_RTPREF_MEDIUM:
@@ -734,7 +845,7 @@ rtpref(struct nd_defrouter *dr)
* serious bug of kernel internal. We thus always bark here.
* Or, can we even panic?
*/
- log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
+ log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
return (RTPREF_INVALID);
}
/* NOTREACHED */
@@ -744,63 +855,50 @@ static struct nd_defrouter *
defrtrlist_update(struct nd_defrouter *new)
{
struct nd_defrouter *dr, *n;
- int s = splnet();
+ int oldpref;
- if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
- /* entry exists */
- if (new->rtlifetime == 0) {
- defrtrlist_del(dr);
- dr = NULL;
- } else {
- int oldpref = rtpref(dr);
+ if (new->rtlifetime == 0) {
+ defrouter_remove(&new->rtaddr, new->ifp);
+ return (NULL);
+ }
- /* override */
- dr->flags = new->flags; /* xxx flag check */
- dr->rtlifetime = new->rtlifetime;
- dr->expire = new->expire;
+ ND6_WLOCK();
+ dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
+ if (dr != NULL) {
+ oldpref = rtpref(dr);
- /*
- * If the preference does not change, there's no need
- * to sort the entries. Also make sure the selected
- * router is still installed in the kernel.
- */
- if (dr->installed && rtpref(new) == oldpref) {
- splx(s);
- return (dr);
- }
+ /* override */
+ dr->raflags = new->raflags; /* XXX flag check */
+ dr->rtlifetime = new->rtlifetime;
+ dr->expire = new->expire;
- /*
- * preferred router may be changed, so relocate
- * this router.
- * XXX: calling TAILQ_REMOVE directly is a bad manner.
- * However, since defrtrlist_del() has many side
- * effects, we intentionally do so here.
- * defrouter_select() below will handle routing
- * changes later.
- */
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
- n = dr;
- goto insert;
+ /*
+ * If the preference does not change, there's no need
+ * to sort the entries. Also make sure the selected
+ * router is still installed in the kernel.
+ */
+ if (dr->installed && rtpref(new) == oldpref) {
+ ND6_WUNLOCK();
+ return (dr);
}
- splx(s);
- return (dr);
- }
-
- /* entry does not exist */
- if (new->rtlifetime == 0) {
- splx(s);
- return (NULL);
- }
- n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
- if (n == NULL) {
- splx(s);
- return (NULL);
+ /*
+ * The preferred router may have changed, so relocate this
+ * router.
+ */
+ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+ n = dr;
+ } else {
+ n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
+ if (n == NULL) {
+ ND6_WUNLOCK();
+ return (NULL);
+ }
+ memcpy(n, new, sizeof(*n));
+ /* Initialize with an extra reference for the caller. */
+ refcount_init(&n->refcnt, 2);
}
- bzero(n, sizeof(*n));
- *n = *new;
-insert:
/*
* Insert the new router in the Default Router List;
* The Default Router List should be in the descending order
@@ -813,15 +911,14 @@ insert:
if (rtpref(n) > rtpref(dr))
break;
}
- if (dr)
+ if (dr != NULL)
TAILQ_INSERT_BEFORE(dr, n, dr_entry);
else
TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+ ND6_WUNLOCK();
defrouter_select();
- splx(s);
-
return (n);
}
@@ -843,11 +940,11 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
{
struct nd_pfxrouter *new;
- new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+ new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
if (new == NULL)
return;
- bzero(new, sizeof(*new));
new->router = dr;
+ defrouter_ref(dr);
LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
@@ -857,7 +954,9 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
static void
pfxrtr_del(struct nd_pfxrouter *pfr)
{
+
LIST_REMOVE(pfr, pfr_entry);
+ defrouter_rele(pfr->router);
free(pfr, M_IP6NDP);
}
@@ -884,13 +983,11 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
{
struct nd_prefix *new = NULL;
int error = 0;
- int i, s;
char ip6buf[INET6_ADDRSTRLEN];
- new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+ new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
if (new == NULL)
- return(ENOMEM);
- bzero(new, sizeof(*new));
+ return (ENOMEM);
new->ndpr_ifp = pr->ndpr_ifp;
new->ndpr_prefix = pr->ndpr_prefix;
new->ndpr_plen = pr->ndpr_plen;
@@ -899,24 +996,18 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
new->ndpr_flags = pr->ndpr_flags;
if ((error = in6_init_prefix_ltimes(new)) != 0) {
free(new, M_IP6NDP);
- return(error);
+ return (error);
}
- new->ndpr_lastupdate = time_second;
- if (newp != NULL)
- *newp = new;
+ new->ndpr_lastupdate = time_uptime;
/* initialization */
LIST_INIT(&new->ndpr_advrtrs);
in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
/* make prefix in the canonical form */
- for (i = 0; i < 4; i++)
- new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
- new->ndpr_mask.s6_addr32[i];
+ IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask);
- s = splnet();
/* link ndpr_entry to nd_prefix list */
LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
- splx(s);
/* ND_OPT_PI_FLAG_ONLINK processing */
if (new->ndpr_raf_onlink) {
@@ -931,17 +1022,18 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
}
}
- if (dr)
+ if (dr != NULL)
pfxrtr_add(new, dr);
-
- return 0;
+ if (newp != NULL)
+ *newp = new;
+ return (0);
}
void
prelist_remove(struct nd_prefix *pr)
{
struct nd_pfxrouter *pfr, *next;
- int e, s;
+ int e;
char ip6buf[INET6_ADDRSTRLEN];
/* make sure to invalidate the prefix until it is really freed. */
@@ -966,17 +1058,13 @@ prelist_remove(struct nd_prefix *pr)
if (pr->ndpr_refcnt > 0)
return; /* notice here? */
- s = splnet();
-
/* unlink ndpr_entry from nd_prefix list */
LIST_REMOVE(pr, ndpr_entry);
- /* free list of routers that adversed the prefix */
+ /* free list of routers that advertised the prefix */
LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
- free(pfr, M_IP6NDP);
+ pfxrtr_del(pfr);
}
- splx(s);
-
free(pr, M_IP6NDP);
pfxlist_onlink_check();
@@ -994,9 +1082,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
struct ifaddr *ifa;
struct ifnet *ifp = new->ndpr_ifp;
struct nd_prefix *pr;
- int s = splnet();
int error = 0;
- int newprefix = 0;
int auth;
struct in6_addrlifetime lt6_tmp;
char ip6buf[INET6_ADDRSTRLEN];
@@ -1032,7 +1118,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
pr->ndpr_vltime = new->ndpr_vltime;
pr->ndpr_pltime = new->ndpr_pltime;
(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
- pr->ndpr_lastupdate = time_second;
+ pr->ndpr_lastupdate = time_uptime;
}
if (new->ndpr_raf_onlink &&
@@ -1054,23 +1140,17 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
if (dr && pfxrtr_lookup(pr, dr) == NULL)
pfxrtr_add(pr, dr);
} else {
- struct nd_prefix *newpr = NULL;
-
- newprefix = 1;
-
if (new->ndpr_vltime == 0)
goto end;
if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
goto end;
- error = nd6_prelist_add(new, dr, &newpr);
- if (error != 0 || newpr == NULL) {
+ error = nd6_prelist_add(new, dr, &pr);
+ if (error != 0) {
nd6log((LOG_NOTICE, "prelist_update: "
- "nd6_prelist_add failed for %s/%d on %s "
- "errno=%d, returnpr=%p\n",
+ "nd6_prelist_add failed for %s/%d on %s errno=%d\n",
ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
- new->ndpr_plen, if_name(new->ndpr_ifp),
- error, newpr));
+ new->ndpr_plen, if_name(new->ndpr_ifp), error));
goto end; /* we should just give up in this case. */
}
@@ -1081,13 +1161,11 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
* addresses. Thus, we explicitly make sure that the prefix
* itself expires now.
*/
- if (newpr->ndpr_raf_onlink == 0) {
- newpr->ndpr_vltime = 0;
- newpr->ndpr_pltime = 0;
- in6_init_prefix_ltimes(newpr);
+ if (pr->ndpr_raf_onlink == 0) {
+ pr->ndpr_vltime = 0;
+ pr->ndpr_pltime = 0;
+ in6_init_prefix_ltimes(pr);
}
-
- pr = newpr;
}
/*
@@ -1170,7 +1248,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
remaininglifetime = ND6_INFINITE_LIFETIME;
- else if (time_second - ifa6->ia6_updatetime >
+ else if (time_uptime - ifa6->ia6_updatetime >
lt6_tmp.ia6t_vltime) {
/*
* The case of "invalid" address. We should usually
@@ -1179,7 +1257,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
remaininglifetime = 0;
} else
remaininglifetime = lt6_tmp.ia6t_vltime -
- (time_second - ifa6->ia6_updatetime);
+ (time_uptime - ifa6->ia6_updatetime);
/* when not updating, keep the current stored lifetime. */
lt6_tmp.ia6t_vltime = remaininglifetime;
@@ -1215,18 +1293,18 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
u_int32_t maxvltime, maxpltime;
if (V_ip6_temp_valid_lifetime >
- (u_int32_t)((time_second - ifa6->ia6_createtime) +
+ (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxvltime = V_ip6_temp_valid_lifetime -
- (time_second - ifa6->ia6_createtime) -
+ (time_uptime - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxvltime = 0;
if (V_ip6_temp_preferred_lifetime >
- (u_int32_t)((time_second - ifa6->ia6_createtime) +
+ (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxpltime = V_ip6_temp_preferred_lifetime -
- (time_second - ifa6->ia6_createtime) -
+ (time_uptime - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxpltime = 0;
@@ -1241,7 +1319,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
}
}
ifa6->ia6_lifetime = lt6_tmp;
- ifa6->ia6_updatetime = time_second;
+ ifa6->ia6_updatetime = time_uptime;
}
IF_ADDR_RUNLOCK(ifp);
if (ia6_match == NULL && new->ndpr_vltime) {
@@ -1319,7 +1397,6 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
}
end:
- splx(s);
return error;
}
@@ -1363,12 +1440,13 @@ find_pfxlist_reachable_router(struct nd_prefix *pr)
* is no router around us.
*/
void
-pfxlist_onlink_check()
+pfxlist_onlink_check(void)
{
struct nd_prefix *pr;
struct in6_ifaddr *ifa;
struct nd_defrouter *dr;
struct nd_pfxrouter *pfxrtr = NULL;
+ struct rm_priotracker in6_ifa_tracker;
/*
* Check if there is a prefix that has a reachable advertising
@@ -1384,6 +1462,7 @@ pfxlist_onlink_check()
* that does not advertise any prefixes.
*/
if (pr == NULL) {
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
struct nd_prefix *pr0;
@@ -1394,6 +1473,7 @@ pfxlist_onlink_check()
if (pfxrtr != NULL)
break;
}
+ ND6_RUNLOCK();
}
if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
/*
@@ -1424,7 +1504,7 @@ pfxlist_onlink_check()
find_pfxlist_reachable_router(pr) == NULL)
pr->ndpr_stateflags |= NDPRF_DETACHED;
if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
- find_pfxlist_reachable_router(pr) != 0)
+ find_pfxlist_reachable_router(pr) != NULL)
pr->ndpr_stateflags &= ~NDPRF_DETACHED;
}
} else {
@@ -1497,9 +1577,8 @@ pfxlist_onlink_check()
* detached. Note, however, that a manually configured address should
* always be attached.
* The precise detection logic is same as the one for prefixes.
- *
- * XXXRW: in6_ifaddrhead locking.
*/
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
continue;
@@ -1534,8 +1613,7 @@ pfxlist_onlink_check()
ifa->ia6_flags |= IN6_IFF_DETACHED;
}
}
- }
- else {
+ } else {
TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
continue;
@@ -1548,13 +1626,14 @@ pfxlist_onlink_check()
}
}
}
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
}
static int
nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
{
static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
struct rtentry *rt;
struct sockaddr_in6 mask6;
u_long rtflags;
@@ -1583,7 +1662,7 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
/* XXX what if rhn == NULL? */
- RADIX_NODE_HEAD_LOCK(rnh);
+ RIB_WLOCK(rnh);
RT_LOCK(rt);
if (rt_setgate(rt, rt_key(rt),
(struct sockaddr *)&null_sdl) == 0) {
@@ -1593,7 +1672,7 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
dl->sdl_type = rt->rt_ifp->if_type;
dl->sdl_index = rt->rt_ifp->if_index;
}
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ RIB_WUNLOCK(rnh);
nd6_rtmsg(RTM_ADD, rt);
RT_UNLOCK(rt);
pr->ndpr_stateflags |= NDPRF_ONLINK;
@@ -1755,6 +1834,7 @@ nd6_prefix_offlink(struct nd_prefix *pr)
}
}
error = a_failure;
+ a_failure = 1;
if (error == 0) {
pr->ndpr_stateflags &= ~NDPRF_ONLINK;
@@ -1793,7 +1873,8 @@ nd6_prefix_offlink(struct nd_prefix *pr)
&opr->ndpr_prefix.sin6_addr),
opr->ndpr_plen, if_name(ifp),
if_name(opr->ndpr_ifp), e));
- }
+ } else
+ a_failure = 0;
}
}
} else {
@@ -1805,6 +1886,10 @@ nd6_prefix_offlink(struct nd_prefix *pr)
if_name(ifp), error));
}
+ if (a_failure)
+ lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6,
+ (struct sockaddr *)&mask6, LLE_STATIC);
+
return (error);
}
@@ -1860,22 +1945,9 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
}
/* make ifaddr */
+ in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask);
- bzero(&ifra, sizeof(ifra));
- /*
- * in6_update_ifa() does not use ifra_name, but we accurately set it
- * for safety.
- */
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
- ifra.ifra_addr.sin6_family = AF_INET6;
- ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
- /* prefix */
- ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
- ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
- ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
- ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
- ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
-
+ IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
/* interface ID */
ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
(ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
@@ -1887,12 +1959,6 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
(ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
ifa_free(ifa);
- /* new prefix mask. */
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_prefixmask.sin6_family = AF_INET6;
- bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
- sizeof(ifra.ifra_prefixmask.sin6_addr));
-
/* lifetimes. */
ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
@@ -1949,24 +2015,21 @@ int
in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
{
struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
- struct in6_ifaddr *newia, *ia;
+ struct in6_ifaddr *newia;
struct in6_aliasreq ifra;
- int i, error;
+ int error;
int trylimit = 3; /* XXX: adhoc value */
int updateflags;
u_int32_t randid[2];
time_t vltime0, pltime0;
- bzero(&ifra, sizeof(ifra));
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
- ifra.ifra_addr = ia0->ia_addr;
- /* copy prefix mask */
- ifra.ifra_prefixmask = ia0->ia_prefixmask;
+ in6_prepare_ifra(&ifra, &ia0->ia_addr.sin6_addr,
+ &ia0->ia_prefixmask.sin6_addr);
+
+ ifra.ifra_addr = ia0->ia_addr; /* XXX: do we need this ? */
/* clear the old IFID */
- for (i = 0; i < 4; i++) {
- ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
- ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr,
+ &ifra.ifra_prefixmask.sin6_addr);
again:
if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
@@ -1986,26 +2049,18 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
* there may be a time lag between generation of the ID and generation
* of the address. So, we'll do one more sanity check.
*/
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
- &ifra.ifra_addr.sin6_addr)) {
- if (trylimit-- == 0) {
- IN6_IFADDR_RUNLOCK();
- /*
- * Give up. Something strange should have
- * happened.
- */
- nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
- "find a unique random IFID\n"));
- return (EEXIST);
- }
- IN6_IFADDR_RUNLOCK();
+
+ if (in6_localip(&ifra.ifra_addr.sin6_addr) != 0) {
+ if (trylimit-- > 0) {
forcegen = 1;
goto again;
}
+
+ /* Give up. Something strange should have happened. */
+ nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
+ "find a unique random IFID\n"));
+ return (EEXIST);
}
- IN6_IFADDR_RUNLOCK();
/*
* The Valid Lifetime is the lower of the Valid Lifetime of the
@@ -2017,7 +2072,7 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_vltime -
- (time_second - ia0->ia6_updatetime));
+ (time_uptime - ia0->ia6_updatetime));
if (vltime0 > V_ip6_temp_valid_lifetime)
vltime0 = V_ip6_temp_valid_lifetime;
} else
@@ -2025,7 +2080,7 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_pltime -
- (time_second - ia0->ia6_updatetime));
+ (time_uptime - ia0->ia6_updatetime));
if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
pltime0 = V_ip6_temp_preferred_lifetime -
V_ip6_desync_factor;
@@ -2083,11 +2138,11 @@ in6_init_prefix_ltimes(struct nd_prefix *ndpr)
if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_preferred = 0;
else
- ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
+ ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_expire = 0;
else
- ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
+ ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
return 0;
}
@@ -2099,7 +2154,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_expire = 0;
else {
- lt6->ia6t_expire = time_second;
+ lt6->ia6t_expire = time_uptime;
lt6->ia6t_expire += lt6->ia6t_vltime;
}
@@ -2107,7 +2162,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_preferred = 0;
else {
- lt6->ia6t_preferred = time_second;
+ lt6->ia6t_preferred = time_uptime;
lt6->ia6t_preferred += lt6->ia6t_pltime;
}
}
@@ -2120,34 +2175,19 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
void
rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
{
- struct radix_node_head *rnh;
- u_int fibnum;
- int s = splnet();
/* We'll care only link-local addresses */
- if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
- splx(s);
+ if (!IN6_IS_ADDR_LINKLOCAL(gateway))
return;
- }
/* XXX Do we really need to walk any but the default FIB? */
- for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
- rnh = rt_tables_get_rnh(fibnum, AF_INET6);
- if (rnh == NULL)
- continue;
-
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
- splx(s);
+ rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway);
}
static int
-rt6_deleteroute(struct radix_node *rn, void *arg)
+rt6_deleteroute(const struct rtentry *rt, void *arg)
{
#define SIN6(s) ((struct sockaddr_in6 *)s)
- struct rtentry *rt = (struct rtentry *)rn;
struct in6_addr *gate = (struct in6_addr *)arg;
if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
@@ -2172,8 +2212,7 @@ rt6_deleteroute(struct radix_node *rn, void *arg)
if ((rt->rt_flags & RTF_HOST) == 0)
return (0);
- return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
- rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum));
+ return (1);
#undef SIN6
}
diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h
index 060836ba..7f9262bb 100644
--- a/freebsd/sys/netinet6/pim6_var.h
+++ b/freebsd/sys/netinet6/pim6_var.h
@@ -42,13 +42,13 @@
#define _NETINET6_PIM6_VAR_H_
struct pim6stat {
- u_quad_t pim6s_rcv_total; /* total PIM messages received */
- u_quad_t pim6s_rcv_tooshort; /* received with too few bytes */
- u_quad_t pim6s_rcv_badsum; /* received with bad checksum */
- u_quad_t pim6s_rcv_badversion; /* received bad PIM version */
- u_quad_t pim6s_rcv_registers; /* received registers */
- u_quad_t pim6s_rcv_badregisters; /* received invalid registers */
- u_quad_t pim6s_snd_registers; /* sent registers */
+ uint64_t pim6s_rcv_total; /* total PIM messages received */
+ uint64_t pim6s_rcv_tooshort; /* received with too few bytes */
+ uint64_t pim6s_rcv_badsum; /* received with bad checksum */
+ uint64_t pim6s_rcv_badversion; /* received bad PIM version */
+ uint64_t pim6s_rcv_registers; /* received registers */
+ uint64_t pim6s_rcv_badregisters; /* received invalid registers */
+ uint64_t pim6s_snd_registers; /* sent registers */
};
#if (defined(KERNEL)) || (defined(_KERNEL))
@@ -56,13 +56,8 @@ int pim6_input(struct mbuf **, int*, int);
#endif /* KERNEL */
/*
- * Names for PIM sysctl objects
+ * Identifiers for PIM sysctl nodes
*/
#define PIM6CTL_STATS 1 /* statistics (read-only) */
-#define PIM6CTL_MAXID 2
-#define PIM6CTL_NAMES { \
- { 0, 0 }, \
- { 0, 0 }, \
-}
#endif /* _NETINET6_PIM6_VAR_H_ */
diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c
index e2d6693a..dfd7c45b 100644
--- a/freebsd/sys/netinet6/raw_ip6.c
+++ b/freebsd/sys/netinet6/raw_ip6.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/jail.h>
+#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -83,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -126,7 +128,12 @@ VNET_DECLARE(struct inpcbinfo, ripcbinfo);
extern u_long rip_sendspace;
extern u_long rip_recvspace;
-VNET_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_SYSINIT(rip6stat);
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(rip6stat);
+#endif /* VIMAGE */
/*
* Hooks for multicast routing. They all default to NULL, so leave them not
@@ -158,18 +165,12 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct mbuf *m = *mp;
register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
register struct inpcb *in6p;
- struct inpcb *last = 0;
+ struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
RIP6STAT_INC(rip6s_ipackets);
- if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
- /* XXX Send icmp6 host/port unreach? */
- m_freem(m);
- return (IPPROTO_DONE);
- }
-
init_sin6(&fromsa, m); /* general init */
ifp = m->m_pkthdr.rcvif;
@@ -265,7 +266,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
*/
if (n && ipsec6_in_reject(n, last)) {
m_freem(n);
- IPSEC6STAT_INC(in_polvio);
/* Do not inject data into pcb. */
} else
#endif /* IPSEC */
@@ -297,7 +297,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
*/
if ((last != NULL) && ipsec6_in_reject(m, last)) {
m_freem(m);
- IPSEC6STAT_INC(in_polvio);
IP6STAT_DEC(ip6s_delivered);
/* Do not inject data into pcb. */
INP_RUNLOCK(last);
@@ -385,17 +384,10 @@ rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
* may have setup with control call.
*/
int
-#if __STDC__
-rip6_output(struct mbuf *m, ...)
-#else
-rip6_output(m, va_alist)
- struct mbuf *m;
- va_dcl
-#endif
+rip6_output(struct mbuf *m, struct socket *so, ...)
{
struct mbuf *control;
struct m_tag *mtag;
- struct socket *so;
struct sockaddr_in6 *dstsock;
struct in6_addr *dst;
struct ip6_hdr *ip6;
@@ -407,11 +399,11 @@ rip6_output(m, va_alist)
int type = 0, code = 0; /* for ICMPv6 output statistics only */
int scope_ambiguous = 0;
int use_defzone = 0;
+ int hlim = 0;
struct in6_addr in6a;
va_list ap;
- va_start(ap, m);
- so = va_arg(ap, struct socket *);
+ va_start(ap, so);
dstsock = va_arg(ap, struct sockaddr_in6 *);
control = va_arg(ap, struct mbuf *);
va_end(ap);
@@ -461,7 +453,7 @@ rip6_output(m, va_alist)
code = icmp6->icmp6_code;
}
- M_PREPEND(m, sizeof(*ip6), M_DONTWAIT);
+ M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
goto bad;
@@ -471,8 +463,9 @@ rip6_output(m, va_alist)
/*
* Source address selection.
*/
- error = in6_selectsrc(dstsock, optp, in6p, NULL, so->so_cred,
- &oifp, &in6a);
+ error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred,
+ scope_ambiguous, &in6a, &hlim);
+
if (error)
goto bad;
error = prison_check_ip6(in6p->inp_cred, &in6a);
@@ -480,19 +473,6 @@ rip6_output(m, va_alist)
goto bad;
ip6->ip6_src = in6a;
- if (oifp && scope_ambiguous) {
- /*
- * Application should provide a proper zone ID or the use of
- * default zone IDs should be enabled. Unfortunately, some
- * applications do not behave as it should, so we need a
- * workaround. Even if an appropriate ID is not determined
- * (when it's required), if we can determine the outgoing
- * interface. determine the zone ID based on the interface.
- */
- error = in6_setscope(&dstsock->sin6_addr, oifp, NULL);
- if (error != 0)
- goto bad;
- }
ip6->ip6_dst = dstsock->sin6_addr;
/*
@@ -507,7 +487,7 @@ rip6_output(m, va_alist)
* ip6_plen will be filled in ip6_output, so not fill it here.
*/
ip6->ip6_nxt = in6p->inp_ip_p;
- ip6->ip6_hlim = in6_selecthlim(in6p, oifp);
+ ip6->ip6_hlim = hlim;
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
in6p->in6p_cksum != -1) {
@@ -795,7 +775,6 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
struct inpcb *inp;
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
struct in6_addr in6a;
- struct ifnet *ifp = NULL;
int error = 0, scope_ambiguous = 0;
inp = sotoinpcb(so);
@@ -824,21 +803,14 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
INP_INFO_WLOCK(&V_ripcbinfo);
INP_WLOCK(inp);
/* Source address selection. XXX: need pcblookup? */
- error = in6_selectsrc(addr, inp->in6p_outputopts,
- inp, NULL, so->so_cred, &ifp, &in6a);
+ error = in6_selectsrc_socket(addr, inp->in6p_outputopts,
+ inp, so->so_cred, scope_ambiguous, &in6a, NULL);
if (error) {
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_ripcbinfo);
return (error);
}
- /* XXX: see above */
- if (ifp && scope_ambiguous &&
- (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) {
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
- return (error);
- }
inp->in6p_faddr = addr->sin6_addr;
inp->in6p_laddr = in6a;
soisconnected(so);
diff --git a/freebsd/sys/netinet6/raw_ip6.h b/freebsd/sys/netinet6/raw_ip6.h
index cc4bcdd0..5eec5fff 100644
--- a/freebsd/sys/netinet6/raw_ip6.h
+++ b/freebsd/sys/netinet6/raw_ip6.h
@@ -37,21 +37,23 @@
* ICMPv6 stat is counted separately. see netinet/icmp6.h
*/
struct rip6stat {
- u_quad_t rip6s_ipackets; /* total input packets */
- u_quad_t rip6s_isum; /* input checksum computations */
- u_quad_t rip6s_badsum; /* of above, checksum error */
- u_quad_t rip6s_nosock; /* no matching socket */
- u_quad_t rip6s_nosockmcast; /* of above, arrived as multicast */
- u_quad_t rip6s_fullsock; /* not delivered, input socket full */
+ uint64_t rip6s_ipackets; /* total input packets */
+ uint64_t rip6s_isum; /* input checksum computations */
+ uint64_t rip6s_badsum; /* of above, checksum error */
+ uint64_t rip6s_nosock; /* no matching socket */
+ uint64_t rip6s_nosockmcast; /* of above, arrived as multicast */
+ uint64_t rip6s_fullsock; /* not delivered, input socket full */
- u_quad_t rip6s_opackets; /* total output packets */
+ uint64_t rip6s_opackets; /* total output packets */
};
#ifdef _KERNEL
-#define RIP6STAT_ADD(name, val) V_rip6stat.name += (val)
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct rip6stat, rip6stat);
+#define RIP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct rip6stat, rip6stat, name, (val))
#define RIP6STAT_INC(name) RIP6STAT_ADD(name, 1)
-VNET_DECLARE(struct rip6stat, rip6stat);
-#define V_rip6stat VNET(rip6stat)
-#endif
+#endif /* _KERNEL */
#endif
diff --git a/freebsd/sys/netinet6/route6.c b/freebsd/sys/netinet6/route6.c
index 90738461..d698d328 100644
--- a/freebsd/sys/netinet6/route6.c
+++ b/freebsd/sys/netinet6/route6.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet6/in6_var.h>
diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c
index 2ccd2f7a..0f8ead2d 100644
--- a/freebsd/sys/netinet6/scope6.c
+++ b/freebsd/sys/netinet6/scope6.c
@@ -41,9 +41,11 @@ __FBSDID("$FreeBSD$");
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/queue.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -58,6 +60,11 @@ VNET_DEFINE(int, ip6_use_defzone) = 1;
#else
VNET_DEFINE(int, ip6_use_defzone) = 0;
#endif
+VNET_DEFINE(int, deembed_scopeid) = 1;
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, deembed_scopeid, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(deembed_scopeid), 0,
+ "Extract embedded zone ID and set it to sin6_scope_id in sockaddr_in6.");
/*
* The scope6_lock protects the global sid default stored in
@@ -95,22 +102,14 @@ scope6_ifattach(struct ifnet *ifp)
{
struct scope6_id *sid;
- sid = (struct scope6_id *)malloc(sizeof(*sid), M_IFADDR, M_WAITOK);
- bzero(sid, sizeof(*sid));
-
+ sid = malloc(sizeof(*sid), M_IFADDR, M_WAITOK | M_ZERO);
/*
* XXX: IPV6_ADDR_SCOPE_xxx macros are not standard.
* Should we rather hardcode here?
*/
sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index;
sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index;
-#ifdef MULTI_SCOPE
- /* by default, we don't care about scope boundary for these scopes. */
- sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1;
- sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1;
-#endif
-
- return sid;
+ return (sid);
}
void
@@ -230,62 +229,24 @@ scope6_get(struct ifnet *ifp, struct scope6_id *idlist)
* Get a scope of the address. Node-local, link-local, site-local or global.
*/
int
-in6_addrscope(struct in6_addr *addr)
+in6_addrscope(const struct in6_addr *addr)
{
- int scope;
-
- if (addr->s6_addr[0] == 0xfe) {
- scope = addr->s6_addr[1] & 0xc0;
-
- switch (scope) {
- case 0x80:
- return IPV6_ADDR_SCOPE_LINKLOCAL;
- break;
- case 0xc0:
- return IPV6_ADDR_SCOPE_SITELOCAL;
- break;
- default:
- return IPV6_ADDR_SCOPE_GLOBAL; /* just in case */
- break;
- }
- }
-
-
- if (addr->s6_addr[0] == 0xff) {
- scope = addr->s6_addr[1] & 0x0f;
+ if (IN6_IS_ADDR_MULTICAST(addr)) {
/*
- * due to other scope such as reserved,
- * return scope doesn't work.
+ * Addresses with reserved value F must be treated as
+ * global multicast addresses.
*/
- switch (scope) {
- case IPV6_ADDR_SCOPE_INTFACELOCAL:
- return IPV6_ADDR_SCOPE_INTFACELOCAL;
- break;
- case IPV6_ADDR_SCOPE_LINKLOCAL:
- return IPV6_ADDR_SCOPE_LINKLOCAL;
- break;
- case IPV6_ADDR_SCOPE_SITELOCAL:
- return IPV6_ADDR_SCOPE_SITELOCAL;
- break;
- default:
- return IPV6_ADDR_SCOPE_GLOBAL;
- break;
- }
+ if (IPV6_ADDR_MC_SCOPE(addr) == 0x0f)
+ return (IPV6_ADDR_SCOPE_GLOBAL);
+ return (IPV6_ADDR_MC_SCOPE(addr));
}
-
- /*
- * Regard loopback and unspecified addresses as global, since
- * they have no ambiguity.
- */
- if (bcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) {
- if (addr->s6_addr[15] == 1) /* loopback */
- return IPV6_ADDR_SCOPE_LINKLOCAL;
- if (addr->s6_addr[15] == 0) /* unspecified */
- return IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */
- }
-
- return IPV6_ADDR_SCOPE_GLOBAL;
+ if (IN6_IS_ADDR_LINKLOCAL(addr) ||
+ IN6_IS_ADDR_LOOPBACK(addr))
+ return (IPV6_ADDR_SCOPE_LINKLOCAL);
+ if (IN6_IS_ADDR_SITELOCAL(addr))
+ return (IPV6_ADDR_SCOPE_SITELOCAL);
+ return (IPV6_ADDR_SCOPE_GLOBAL);
}
/*
@@ -359,7 +320,6 @@ scope6_addr2default(struct in6_addr *addr)
int
sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
{
- struct ifnet *ifp;
u_int32_t zoneid;
if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok)
@@ -374,15 +334,11 @@ sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
* zone IDs assuming a one-to-one mapping between interfaces
* and links.
*/
- if (V_if_index < zoneid)
- return (ENXIO);
- ifp = ifnet_byindex(zoneid);
- if (ifp == NULL) /* XXX: this can happen for some OS */
+ if (V_if_index < zoneid || ifnet_byindex(zoneid) == NULL)
return (ENXIO);
/* XXX assignment to 16bit from 32bit variable */
sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff);
-
sin6->sin6_scope_id = 0;
}
@@ -398,12 +354,6 @@ sa6_recoverscope(struct sockaddr_in6 *sin6)
char ip6buf[INET6_ADDRSTRLEN];
u_int32_t zoneid;
- if (sin6->sin6_scope_id != 0) {
- log(LOG_NOTICE,
- "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n",
- ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id);
- /* XXX: proceed anyway... */
- }
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) {
/*
@@ -414,8 +364,19 @@ sa6_recoverscope(struct sockaddr_in6 *sin6)
/* sanity check */
if (V_if_index < zoneid)
return (ENXIO);
+#if 0
+ /* XXX: Disabled due to possible deadlock. */
if (!ifnet_byindex(zoneid))
return (ENXIO);
+#endif
+ if (sin6->sin6_scope_id != 0 &&
+ zoneid != sin6->sin6_scope_id) {
+ log(LOG_NOTICE,
+ "%s: embedded scope mismatch: %s%%%d. "
+ "sin6_scope_id was overridden\n", __func__,
+ ip6_sprintf(ip6buf, &sin6->sin6_addr),
+ sin6->sin6_scope_id);
+ }
sin6->sin6_addr.s6_addr16[1] = 0;
sin6->sin6_scope_id = zoneid;
}
@@ -438,63 +399,35 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
u_int32_t zoneid = 0;
struct scope6_id *sid;
- IF_AFDATA_RLOCK(ifp);
-
- sid = SID(ifp);
-
-#ifdef DIAGNOSTIC
- if (sid == NULL) { /* should not happen */
- panic("in6_setscope: scope array is NULL");
- /* NOTREACHED */
- }
-#endif
-
/*
* special case: the loopback address can only belong to a loopback
* interface.
*/
if (IN6_IS_ADDR_LOOPBACK(in6)) {
- if (!(ifp->if_flags & IFF_LOOPBACK)) {
- IF_AFDATA_RUNLOCK(ifp);
+ if (!(ifp->if_flags & IFF_LOOPBACK))
return (EINVAL);
- } else {
- if (ret_id != NULL)
- *ret_id = 0; /* there's no ambiguity */
+ } else {
+ scope = in6_addrscope(in6);
+ if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL ||
+ scope == IPV6_ADDR_SCOPE_LINKLOCAL) {
+ /*
+ * Currently we use interface indeces as the
+ * zone IDs for interface-local and link-local
+ * scopes.
+ */
+ zoneid = ifp->if_index;
+ in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
+ } else if (scope != IPV6_ADDR_SCOPE_GLOBAL) {
+ IF_AFDATA_RLOCK(ifp);
+ sid = SID(ifp);
+ zoneid = sid->s6id_list[scope];
IF_AFDATA_RUNLOCK(ifp);
- return (0);
}
}
- scope = in6_addrscope(in6);
- switch (scope) {
- case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_LINKLOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_SITELOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_ORGLOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL];
- break;
-
- default:
- zoneid = 0; /* XXX: treat as global. */
- break;
- }
- IF_AFDATA_RUNLOCK(ifp);
-
if (ret_id != NULL)
*ret_id = zoneid;
- if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6))
- in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
-
return (0);
}
@@ -528,3 +461,114 @@ in6_getscope(struct in6_addr *in6)
return (0);
}
+
+/*
+ * Return pointer to ifnet structure, corresponding to the zone id of
+ * link-local scope.
+ */
+struct ifnet*
+in6_getlinkifnet(uint32_t zoneid)
+{
+
+ return (ifnet_byindex((u_short)zoneid));
+}
+
+/*
+ * Return zone id for the specified scope.
+ */
+uint32_t
+in6_getscopezone(const struct ifnet *ifp, int scope)
+{
+
+ if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL ||
+ scope == IPV6_ADDR_SCOPE_LINKLOCAL)
+ return (ifp->if_index);
+ if (scope >= 0 && scope < IPV6_ADDR_SCOPES_COUNT)
+ return (SID(ifp)->s6id_list[scope]);
+ return (0);
+}
+
+/*
+ * Extracts scope from adddress @dst, stores cleared address
+ * inside @dst and zone inside @scopeid
+ */
+void
+in6_splitscope(const struct in6_addr *src, struct in6_addr *dst,
+ uint32_t *scopeid)
+{
+ uint32_t zoneid;
+
+ *dst = *src;
+ zoneid = ntohs(in6_getscope(dst));
+ in6_clearscope(dst);
+ *scopeid = zoneid;
+}
+
+/*
+ * This function is for checking sockaddr_in6 structure passed
+ * from the application level (usually).
+ *
+ * sin6_scope_id should be set for link-local unicast, link-local and
+ * interface-local multicast addresses.
+ *
+ * If it is zero, then look into default zone ids. If default zone id is
+ * not set or disabled, then return error.
+ */
+int
+sa6_checkzone(struct sockaddr_in6 *sa6)
+{
+ int scope;
+
+ scope = in6_addrscope(&sa6->sin6_addr);
+ if (scope == IPV6_ADDR_SCOPE_GLOBAL)
+ return (sa6->sin6_scope_id ? EINVAL: 0);
+ if (IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr) &&
+ scope != IPV6_ADDR_SCOPE_LINKLOCAL &&
+ scope != IPV6_ADDR_SCOPE_INTFACELOCAL) {
+ if (sa6->sin6_scope_id == 0 && V_ip6_use_defzone != 0)
+ sa6->sin6_scope_id = V_sid_default.s6id_list[scope];
+ return (0);
+ }
+ /*
+ * Since ::1 address always configured on the lo0, we can
+ * automatically set its zone id, when it is not specified.
+ * Return error, when specified zone id doesn't match with
+ * actual value.
+ */
+ if (IN6_IS_ADDR_LOOPBACK(&sa6->sin6_addr)) {
+ if (sa6->sin6_scope_id == 0)
+ sa6->sin6_scope_id = in6_getscopezone(V_loif, scope);
+ else if (sa6->sin6_scope_id != in6_getscopezone(V_loif, scope))
+ return (EADDRNOTAVAIL);
+ }
+ /* XXX: we can validate sin6_scope_id here */
+ if (sa6->sin6_scope_id != 0)
+ return (0);
+ if (V_ip6_use_defzone != 0)
+ sa6->sin6_scope_id = V_sid_default.s6id_list[scope];
+ /* Return error if we can't determine zone id */
+ return (sa6->sin6_scope_id ? 0: EADDRNOTAVAIL);
+}
+
+/*
+ * This function is similar to sa6_checkzone, but it uses given ifp
+ * to initialize sin6_scope_id.
+ */
+int
+sa6_checkzone_ifp(struct ifnet *ifp, struct sockaddr_in6 *sa6)
+{
+ int scope;
+
+ scope = in6_addrscope(&sa6->sin6_addr);
+ if (scope == IPV6_ADDR_SCOPE_LINKLOCAL ||
+ scope == IPV6_ADDR_SCOPE_INTFACELOCAL) {
+ if (sa6->sin6_scope_id == 0) {
+ sa6->sin6_scope_id = in6_getscopezone(ifp, scope);
+ return (0);
+ } else if (sa6->sin6_scope_id != in6_getscopezone(ifp, scope))
+ return (EADDRNOTAVAIL);
+ }
+ return (sa6_checkzone(sa6));
+}
+
+
diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h
index 990325e9..e38d77a9 100644
--- a/freebsd/sys/netinet6/scope6_var.h
+++ b/freebsd/sys/netinet6/scope6_var.h
@@ -34,14 +34,20 @@
#define _NETINET6_SCOPE6_VAR_H_
#ifdef _KERNEL
+#include <net/vnet.h>
+
+#define IPV6_ADDR_SCOPES_COUNT 16
struct scope6_id {
/*
* 16 is correspondent to 4bit multicast scope field.
* i.e. from node-local to global with some reserved/unassigned types.
*/
- u_int32_t s6id_list[16];
+ uint32_t s6id_list[IPV6_ADDR_SCOPES_COUNT];
};
+VNET_DECLARE(int, deembed_scopeid);
+#define V_deembed_scopeid VNET(deembed_scopeid)
+
void scope6_init(void);
struct scope6_id *scope6_ifattach(struct ifnet *);
void scope6_ifdetach(struct scope6_id *);
@@ -51,9 +57,14 @@ int scope6_get_default(struct scope6_id *);
u_int32_t scope6_addr2default(struct in6_addr *);
int sa6_embedscope(struct sockaddr_in6 *, int);
int sa6_recoverscope(struct sockaddr_in6 *);
+int sa6_checkzone(struct sockaddr_in6 *);
+int sa6_checkzone_ifp(struct ifnet *, struct sockaddr_in6 *);
int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *);
int in6_clearscope(struct in6_addr *);
uint16_t in6_getscope(struct in6_addr *);
+uint32_t in6_getscopezone(const struct ifnet *, int);
+void in6_splitscope(const struct in6_addr *, struct in6_addr *, uint32_t *);
+struct ifnet* in6_getlinkifnet(uint32_t);
#endif /* _KERNEL */
#endif /* _NETINET6_SCOPE6_VAR_H_ */
diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c
index c8bc6620..962a622e 100644
--- a/freebsd/sys/netinet6/sctp6_usrreq.c
+++ b/freebsd/sys/netinet6/sctp6_usrreq.c
@@ -41,9 +41,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_pcb.h>
#include <netinet/sctp_header.h>
#include <netinet/sctp_var.h>
-#ifdef INET6
#include <netinet6/sctp6_var.h>
-#endif
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_output.h>
#include <netinet/sctp_uio.h>
@@ -56,13 +54,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_output.h>
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_crc32.h>
+#include <netinet/icmp6.h>
#include <netinet/udp.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
-#ifdef INET6
#include <netipsec/ipsec6.h>
-#endif /* INET6 */
#endif /* IPSEC */
extern struct protosw inetsw[];
@@ -85,7 +82,8 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
#endif
uint32_t mflowid;
- uint8_t use_mflowid;
+ uint8_t mflowtype;
+ uint16_t fibnum;
iphlen = *offp;
if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) {
@@ -96,13 +94,7 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
#ifdef SCTP_MBUF_LOGGING
/* Log in any input mbufs */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_INPUT);
- }
- }
+ sctp_log_mbc(m, SCTP_MBUF_INPUT);
}
#endif
#ifdef SCTP_PACKET_LOGGING
@@ -111,17 +103,13 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
}
#endif
SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
- "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
+ "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
m->m_pkthdr.len,
if_name(m->m_pkthdr.rcvif),
- m->m_pkthdr.csum_flags);
- if (m->m_flags & M_FLOWID) {
- mflowid = m->m_pkthdr.flowid;
- use_mflowid = 1;
- } else {
- mflowid = 0;
- use_mflowid = 0;
- }
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+ mflowid = m->m_pkthdr.flowid;
+ mflowtype = M_HASHTYPE_GET(m);
+ fibnum = M_GETFIB(m);
SCTP_STAT_INCR(sctps_recvpackets);
SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
/* Get IP, SCTP, and first chunk header together in the first mbuf. */
@@ -151,10 +139,6 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
if (in6_setscope(&dst.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) {
goto out;
}
- if (faithprefix_p != NULL && (*faithprefix_p) (&dst.sin6_addr)) {
- /* XXX send icmp6 host/port unreach? */
- goto out;
- }
length = ntohs(ip6->ip6_plen) + iphlen;
/* Validate mbuf chain length with IP payload length. */
if (SCTP_HEADER_LEN(m) != length) {
@@ -186,7 +170,7 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
compute_crc,
#endif
ecn_bits,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
out:
if (m) {
@@ -202,250 +186,224 @@ sctp6_input(struct mbuf **i_pak, int *offp, int proto SCTP_UNUSED)
return (sctp6_input_with_port(i_pak, offp, 0));
}
-static void
-sctp6_notify_mbuf(struct sctp_inpcb *inp, struct icmp6_hdr *icmp6,
- struct sctphdr *sh, struct sctp_tcb *stcb, struct sctp_nets *net)
-{
- uint32_t nxtsz;
-
- if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
- (icmp6 == NULL) || (sh == NULL)) {
- goto out;
- }
- /* First do we even look at it? */
- if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag))
- goto out;
-
- if (icmp6->icmp6_type != ICMP6_PACKET_TOO_BIG) {
- /* not PACKET TO BIG */
- goto out;
- }
- /*
- * ok we need to look closely. We could even get smarter and look at
- * anyone that we sent to in case we get a different ICMP that tells
- * us there is no way to reach a host, but for this impl, all we
- * care about is MTU discovery.
- */
- nxtsz = ntohl(icmp6->icmp6_mtu);
- /* Stop any PMTU timer */
- sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL, SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1);
-
- /* Adjust destination size limit */
- if (net->mtu > nxtsz) {
- net->mtu = nxtsz;
- if (net->port) {
- net->mtu -= sizeof(struct udphdr);
- }
- }
- /* now what about the ep? */
- if (stcb->asoc.smallest_mtu > nxtsz) {
- struct sctp_tmit_chunk *chk;
-
- /* Adjust that too */
- stcb->asoc.smallest_mtu = nxtsz;
- /* now off to subtract IP_DF flag if needed */
-
- TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
- if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) {
- chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
- }
- }
- TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
- if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) {
- /*
- * For this guy we also mark for immediate
- * resend since we sent to big of chunk
- */
- chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
- if (chk->sent != SCTP_DATAGRAM_RESEND)
- stcb->asoc.sent_queue_retran_cnt++;
- chk->sent = SCTP_DATAGRAM_RESEND;
- chk->rec.data.doing_fast_retransmit = 0;
-
- chk->sent = SCTP_DATAGRAM_RESEND;
- /* Clear any time so NO RTT is being done */
- chk->sent_rcv_time.tv_sec = 0;
- chk->sent_rcv_time.tv_usec = 0;
- stcb->asoc.total_flight -= chk->send_size;
- net->flight_size -= chk->send_size;
- }
- }
- }
- sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL);
-out:
- if (stcb) {
- SCTP_TCB_UNLOCK(stcb);
- }
-}
-
-
void
sctp6_notify(struct sctp_inpcb *inp,
- struct icmp6_hdr *icmph,
- struct sctphdr *sh,
- struct sockaddr *to,
struct sctp_tcb *stcb,
- struct sctp_nets *net)
+ struct sctp_nets *net,
+ uint8_t icmp6_type,
+ uint8_t icmp6_code,
+ uint16_t next_mtu)
{
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
-
- /* protection */
- if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
- (sh == NULL) || (to == NULL)) {
- if (stcb)
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- /* First job is to verify the vtag matches what I would send */
- if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- if (icmph->icmp6_type != ICMP_UNREACH) {
- /* We only care about unreachable */
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- if ((icmph->icmp6_code == ICMP_UNREACH_NET) ||
- (icmph->icmp6_code == ICMP_UNREACH_HOST) ||
- (icmph->icmp6_code == ICMP_UNREACH_NET_UNKNOWN) ||
- (icmph->icmp6_code == ICMP_UNREACH_HOST_UNKNOWN) ||
- (icmph->icmp6_code == ICMP_UNREACH_ISOLATED) ||
- (icmph->icmp6_code == ICMP_UNREACH_NET_PROHIB) ||
- (icmph->icmp6_code == ICMP_UNREACH_HOST_PROHIB) ||
- (icmph->icmp6_code == ICMP_UNREACH_FILTER_PROHIB)) {
-
- /*
- * Hmm reachablity problems we must examine closely. If its
- * not reachable, we may have lost a network. Or if there is
- * NO protocol at the other end named SCTP. well we consider
- * it a OOTB abort.
- */
- if (net->dest_state & SCTP_ADDR_REACHABLE) {
- /* Ok that destination is NOT reachable */
- net->dest_state &= ~SCTP_ADDR_REACHABLE;
- net->dest_state &= ~SCTP_ADDR_PF;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
- stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED);
+ int timer_stopped;
+
+ switch (icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ if ((icmp6_code == ICMP6_DST_UNREACH_NOROUTE) ||
+ (icmp6_code == ICMP6_DST_UNREACH_ADMIN) ||
+ (icmp6_code == ICMP6_DST_UNREACH_BEYONDSCOPE) ||
+ (icmp6_code == ICMP6_DST_UNREACH_ADDR)) {
+ /* Mark the net unreachable. */
+ if (net->dest_state & SCTP_ADDR_REACHABLE) {
+ /* Ok that destination is not reachable */
+ net->dest_state &= ~SCTP_ADDR_REACHABLE;
+ net->dest_state &= ~SCTP_ADDR_PF;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+ stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED);
+ }
}
SCTP_TCB_UNLOCK(stcb);
- } else if ((icmph->icmp6_code == ICMP_UNREACH_PROTOCOL) ||
- (icmph->icmp6_code == ICMP_UNREACH_PORT)) {
- /*
- * Here the peer is either playing tricks on us, including
- * an address that belongs to someone who does not support
- * SCTP OR was a userland implementation that shutdown and
- * now is dead. In either case treat it like a OOTB abort
- * with no TCB
- */
- sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
+ break;
+ case ICMP6_PARAM_PROB:
+ /* Treat it like an ABORT. */
+ if (icmp6_code == ICMP6_PARAMPROB_NEXTHEADER) {
+ sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- so = SCTP_INP_SO(inp);
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- SCTP_SOCKET_LOCK(so, 1);
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- SCTP_SOCKET_UNLOCK(so, 1);
- /* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
+ SCTP_SOCKET_UNLOCK(so, 1);
#endif
- /* no need to unlock here, since the TCB is gone */
- } else {
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ break;
+ case ICMP6_PACKET_TOO_BIG:
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ timer_stopped = 1;
+ sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
+ } else {
+ timer_stopped = 0;
+ }
+ /* Update the path MTU. */
+ if (net->mtu > next_mtu) {
+ net->mtu = next_mtu;
+ if (net->port) {
+ net->mtu -= sizeof(struct udphdr);
+ }
+ }
+ /* Update the association MTU */
+ if (stcb->asoc.smallest_mtu > next_mtu) {
+ sctp_pathmtu_adjustment(stcb, next_mtu);
+ }
+ /* Finally, start the PMTU timer if it was running before. */
+ if (timer_stopped) {
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+ }
SCTP_TCB_UNLOCK(stcb);
+ break;
+ default:
+ SCTP_TCB_UNLOCK(stcb);
+ break;
}
}
-
-
void
sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
{
+ struct ip6ctlparam *ip6cp;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
struct sctphdr sh;
- struct ip6ctlparam *ip6cp = NULL;
- uint32_t vrf_id;
-
- vrf_id = SCTP_DEFAULT_VRFID;
+ struct sockaddr_in6 src, dst;
if (pktdst->sa_family != AF_INET6 ||
- pktdst->sa_len != sizeof(struct sockaddr_in6))
+ pktdst->sa_len != sizeof(struct sockaddr_in6)) {
return;
-
- if ((unsigned)cmd >= PRC_NCMDS)
+ }
+ if ((unsigned)cmd >= PRC_NCMDS) {
return;
+ }
if (PRC_IS_REDIRECT(cmd)) {
d = NULL;
} else if (inet6ctlerrmap[cmd] == 0) {
return;
}
- /* if the parameter is from icmp6, decode it. */
+ /* If the parameter is from icmp6, decode it. */
if (d != NULL) {
ip6cp = (struct ip6ctlparam *)d;
} else {
ip6cp = (struct ip6ctlparam *)NULL;
}
- if (ip6cp) {
+ if (ip6cp != NULL) {
/*
* XXX: We assume that when IPV6 is non NULL, M and OFF are
* valid.
*/
- /* check if we can safely examine src and dst ports */
- struct sctp_inpcb *inp = NULL;
- struct sctp_tcb *stcb = NULL;
- struct sctp_nets *net = NULL;
- struct sockaddr_in6 final;
-
- if (ip6cp->ip6c_m == NULL)
+ if (ip6cp->ip6c_m == NULL) {
return;
-
+ }
+ /*
+ * Check if we can safely examine the ports and the
+ * verification tag of the SCTP common header.
+ */
+ if (ip6cp->ip6c_m->m_pkthdr.len <
+ (int32_t) (ip6cp->ip6c_off + offsetof(struct sctphdr, checksum))) {
+ return;
+ }
+ /* Copy out the port numbers and the verification tag. */
bzero(&sh, sizeof(sh));
- bzero(&final, sizeof(final));
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off,
+ sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t),
+ (caddr_t)&sh);
+ memset(&src, 0, sizeof(struct sockaddr_in6));
+ src.sin6_family = AF_INET6;
+ src.sin6_len = sizeof(struct sockaddr_in6);
+ src.sin6_port = sh.src_port;
+ src.sin6_addr = ip6cp->ip6c_ip6->ip6_src;
+ if (in6_setscope(&src.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
+ return;
+ }
+ memset(&dst, 0, sizeof(struct sockaddr_in6));
+ dst.sin6_family = AF_INET6;
+ dst.sin6_len = sizeof(struct sockaddr_in6);
+ dst.sin6_port = sh.dest_port;
+ dst.sin6_addr = ip6cp->ip6c_ip6->ip6_dst;
+ if (in6_setscope(&dst.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
+ return;
+ }
inp = NULL;
net = NULL;
- m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(sh),
- (caddr_t)&sh);
- ip6cp->ip6c_src->sin6_port = sh.src_port;
- final.sin6_len = sizeof(final);
- final.sin6_family = AF_INET6;
- final.sin6_addr = ((struct sockaddr_in6 *)pktdst)->sin6_addr;
- final.sin6_port = sh.dest_port;
- stcb = sctp_findassociation_addr_sa((struct sockaddr *)&final,
- (struct sockaddr *)ip6cp->ip6c_src,
- &inp, &net, 1, vrf_id);
- /* inp's ref-count increased && stcb locked */
- if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
- if (cmd == PRC_MSGSIZE) {
- sctp6_notify_mbuf(inp,
- ip6cp->ip6c_icmp6,
- &sh,
- stcb,
- net);
- /* inp's ref-count reduced && stcb unlocked */
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
+ (struct sockaddr *)&src,
+ &inp, &net, 1, SCTP_DEFAULT_VRFID);
+ if ((stcb != NULL) &&
+ (net != NULL) &&
+ (inp != NULL)) {
+ /* Check the verification tag */
+ if (ntohl(sh.v_tag) != 0) {
+ /*
+ * This must be the verification tag used
+ * for sending out packets. We don't
+ * consider packets reflecting the
+ * verification tag.
+ */
+ if (ntohl(sh.v_tag) != stcb->asoc.peer_vtag) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
} else {
- sctp6_notify(inp, ip6cp->ip6c_icmp6, &sh,
- (struct sockaddr *)&final,
- stcb, net);
- /* inp's ref-count reduced && stcb unlocked */
+ if (ip6cp->ip6c_m->m_pkthdr.len >=
+ ip6cp->ip6c_off + sizeof(struct sctphdr) +
+ sizeof(struct sctp_chunkhdr) +
+ offsetof(struct sctp_init, a_rwnd)) {
+ /*
+ * In this case we can check if we
+ * got an INIT chunk and if the
+ * initiate tag matches.
+ */
+ uint32_t initiate_tag;
+ uint8_t chunk_type;
+
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off +
+ sizeof(struct sctphdr),
+ sizeof(uint8_t),
+ (caddr_t)&chunk_type);
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off +
+ sizeof(struct sctphdr) +
+ sizeof(struct sctp_chunkhdr),
+ sizeof(uint32_t),
+ (caddr_t)&initiate_tag);
+ if ((chunk_type != SCTP_INITIATION) ||
+ (ntohl(initiate_tag) != stcb->asoc.my_vtag)) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
}
+ sctp6_notify(inp, stcb, net,
+ ip6cp->ip6c_icmp6->icmp6_type,
+ ip6cp->ip6c_icmp6->icmp6_code,
+ (uint16_t) ntohl(ip6cp->ip6c_icmp6->icmp6_mtu));
} else {
- if (PRC_IS_REDIRECT(cmd) && inp) {
- in6_rtchange((struct in6pcb *)inp,
- inet6ctlerrmap[cmd]);
- }
- if (inp) {
+ if ((stcb == NULL) && (inp != NULL)) {
/* reduce inp's ref-count */
SCTP_INP_WLOCK(inp);
SCTP_INP_DECR_REF(inp);
SCTP_INP_WUNLOCK(inp);
}
- if (stcb)
+ if (stcb) {
SCTP_TCB_UNLOCK(stcb);
+ }
}
}
}
@@ -848,7 +806,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
#ifdef INET
struct in6pcb *inp6;
struct sockaddr_in6 *sin6;
- struct sockaddr_storage ss;
+ union sctp_sockstore store;
#endif
@@ -932,8 +890,8 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
/* convert v4-mapped into v4 addr */
- in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6);
- addr = (struct sockaddr *)&ss;
+ in6_sin6_2_sin(&store.sin, sin6);
+ addr = &store.sa;
}
#endif /* INET */
/* Now do we connect? */
@@ -964,7 +922,9 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
return (EALREADY);
}
/* We are GOOD to go */
- stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+ stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
+ inp->sctp_ep.pre_open_stream_count,
+ inp->sctp_ep.port, p);
SCTP_ASOC_CREATE_UNLOCK(inp);
if (stcb == NULL) {
/* Gak! no memory */
@@ -1023,7 +983,10 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
- goto notConn6;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+ return (ENOENT);
}
fnd = 0;
sin_a6 = NULL;
@@ -1040,7 +1003,10 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
}
if ((!fnd) || (sin_a6 == NULL)) {
/* punt */
- goto notConn6;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+ return (ENOENT);
}
vrf_id = inp->def_vrf_id;
sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, net, 0, vrf_id);
@@ -1049,7 +1015,6 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
}
} else {
/* For the bound all case you get back 0 */
- notConn6:
memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr));
}
} else {
@@ -1061,7 +1026,7 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
if (laddr->ifa->address.sa.sa_family == AF_INET6) {
struct sockaddr_in6 *sin_a;
- sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
+ sin_a = &laddr->ifa->address.sin6;
sin6->sin6_addr = sin_a->sin6_addr;
fnd = 1;
break;
@@ -1138,8 +1103,11 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
return (ENOENT);
}
- if ((error = sa6_recoverscope(sin6)) != 0)
+ if ((error = sa6_recoverscope(sin6)) != 0) {
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, error);
return (error);
+ }
*addr = (struct sockaddr *)sin6;
return (0);
}
@@ -1147,10 +1115,6 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr)
static int
sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
{
-#ifdef INET
- struct sockaddr *addr;
-
-#endif
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
@@ -1162,19 +1126,21 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
error = sctp6_getaddr(so, nam);
#ifdef INET
if (error) {
+ struct sockaddr_in6 *sin6;
+
/* try v4 next if v6 failed */
error = sctp_ingetaddr(so, nam);
if (error) {
return (error);
}
- addr = *nam;
- /* if I'm V6ONLY, convert it to v4-mapped */
- if (SCTP_IPV6_V6ONLY(inp6)) {
- struct sockaddr_in6 sin6;
-
- in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
- memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+ if (sin6 == NULL) {
+ SCTP_FREE_SONAME(*nam);
+ return (ENOMEM);
}
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+ SCTP_FREE_SONAME(*nam);
+ *nam = (struct sockaddr *)sin6;
}
#endif
return (error);
@@ -1184,10 +1150,6 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
static int
sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
-#ifdef INET
- struct sockaddr *addr;
-
-#endif
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
@@ -1199,19 +1161,21 @@ sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
error = sctp6_peeraddr(so, nam);
#ifdef INET
if (error) {
+ struct sockaddr_in6 *sin6;
+
/* try v4 next if v6 failed */
error = sctp_peeraddr(so, nam);
if (error) {
return (error);
}
- addr = *nam;
- /* if I'm V6ONLY, convert it to v4-mapped */
- if (SCTP_IPV6_V6ONLY(inp6)) {
- struct sockaddr_in6 sin6;
-
- in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
- memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+ if (sin6 == NULL) {
+ SCTP_FREE_SONAME(*nam);
+ return (ENOMEM);
}
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+ SCTP_FREE_SONAME(*nam);
+ *nam = (struct sockaddr *)sin6;
}
#endif
return (error);
diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h
index 79d4c52b..782567c5 100644
--- a/freebsd/sys/netinet6/sctp6_var.h
+++ b/freebsd/sys/netinet6/sctp6_var.h
@@ -47,10 +47,9 @@ int
sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct proc *);
void sctp6_ctlinput(int, struct sockaddr *, void *);
-extern void
-sctp6_notify(struct sctp_inpcb *, struct icmp6_hdr *,
- struct sctphdr *, struct sockaddr *,
- struct sctp_tcb *, struct sctp_nets *);
+void
+sctp6_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
+ uint8_t, uint8_t, uint16_t);
#endif
#endif
diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c
index 8342cf7c..790bed2b 100644
--- a/freebsd/sys/netinet6/udp6_usrreq.c
+++ b/freebsd/sys/netinet6/udp6_usrreq.c
@@ -3,6 +3,7 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2014 Kevin Lo
* All rights reserved.
*
* Portions of this software were developed by Robert N. M. Watson under
@@ -73,8 +74,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/jail.h>
@@ -84,6 +85,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -93,10 +95,13 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/route.h>
+#include <net/rss_config.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -108,10 +113,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <netinet/udplite.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
+#include <netinet6/in6_rss.h>
#include <netinet6/udp6_var.h>
#include <netinet6/scope6_var.h>
@@ -130,27 +137,39 @@ __FBSDID("$FreeBSD$");
extern struct protosw inetsw[];
static void udp6_detach(struct socket *so);
-static void
+static int
udp6_append(struct inpcb *inp, struct mbuf *n, int off,
struct sockaddr_in6 *fromsa)
{
struct socket *so;
struct mbuf *opts;
+ struct udpcb *up;
INP_LOCK_ASSERT(inp);
+ /*
+ * Engage the tunneling protocol.
+ */
+ up = intoudpcb(inp);
+ if (up->u_tun_func != NULL) {
+ in_pcbref(inp);
+ INP_RUNLOCK(inp);
+ (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
+ up->u_tun_ctx);
+ INP_RLOCK(inp);
+ return (in_pcbrele_rlocked(inp));
+ }
#ifdef IPSEC
/* Check AH/ESP integrity. */
if (ipsec6_in_reject(n, inp)) {
m_freem(n);
- IPSEC6STAT_INC(in_polvio);
- return;
+ return (0);
}
#endif /* IPSEC */
#ifdef MAC
if (mac_inpcb_check_deliver(inp, n) != 0) {
m_freem(n);
- return;
+ return (0);
}
#endif
opts = NULL;
@@ -170,6 +189,7 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off,
UDPSTAT_INC(udps_fullsock);
} else
sorwakeup_locked(so);
+ return (0);
}
int
@@ -180,22 +200,19 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
struct ip6_hdr *ip6;
struct udphdr *uh;
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
int off = *offp;
+ int cscov_partial;
int plen, ulen;
struct sockaddr_in6 fromsa;
struct m_tag *fwd_tag;
uint16_t uh_sum;
+ uint8_t nxt;
ifp = m->m_pkthdr.rcvif;
ip6 = mtod(m, struct ip6_hdr *);
- if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
- /* XXX send icmp6 host/port unreach? */
- m_freem(m);
- return (IPPROTO_DONE);
- }
-
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
ip6 = mtod(m, struct ip6_hdr *);
@@ -217,28 +234,43 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
ulen = ntohs((u_short)uh->uh_ulen);
- if (plen != ulen) {
- UDPSTAT_INC(udps_badlen);
- goto badunlocked;
- }
-
- /*
- * Checksum extended UDP header and data.
- */
- if (uh->uh_sum == 0) {
- UDPSTAT_INC(udps_nosum);
- goto badunlocked;
+ nxt = proto;
+ cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
+ if (nxt == IPPROTO_UDPLITE) {
+ /* Zero means checksum over the complete packet. */
+ if (ulen == 0)
+ ulen = plen;
+ if (ulen == plen)
+ cscov_partial = 0;
+ if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
+ /* XXX: What is the right UDPLite MIB counter? */
+ goto badunlocked;
+ }
+ if (uh->uh_sum == 0) {
+ /* XXX: What is the right UDPLite MIB counter? */
+ goto badunlocked;
+ }
+ } else {
+ if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
+ UDPSTAT_INC(udps_badlen);
+ goto badunlocked;
+ }
+ if (uh->uh_sum == 0) {
+ UDPSTAT_INC(udps_nosum);
+ goto badunlocked;
+ }
}
- if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+ if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
+ !cscov_partial) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
uh_sum = m->m_pkthdr.csum_data;
else
- uh_sum = in6_cksum_pseudo(ip6, ulen,
- IPPROTO_UDP, m->m_pkthdr.csum_data);
+ uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
+ m->m_pkthdr.csum_data);
uh_sum ^= 0xffff;
} else
- uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen);
+ uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
if (uh_sum != 0) {
UDPSTAT_INC(udps_badsum);
@@ -251,11 +283,13 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
init_sin6(&fromsa, m);
fromsa.sin6_port = uh->uh_sport;
+ pcbinfo = udp_get_inpcbinfo(nxt);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
struct inpcb *last;
+ struct inpcbhead *pcblist;
struct ip6_moptions *imo;
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK(pcbinfo);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -271,8 +305,9 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* here. We need udphdr for IPsec processing so we do that
* later.
*/
+ pcblist = udp_get_pcblist(nxt);
last = NULL;
- LIST_FOREACH(inp, &V_udb, inp_list) {
+ LIST_FOREACH(inp, pcblist, inp_list) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
if (inp->inp_lport != uh->uh_dport)
@@ -335,20 +370,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
INP_RLOCK(last);
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- udp6_append(last, n, off, &fromsa);
- } else {
- /*
- * Engage the tunneling
- * protocol we will have to
- * leave the info_lock up,
- * since we are hunting
- * through multiple UDP's.
- *
- */
- (*up->u_tun_func)(n, off, last);
- }
+ UDP_PROBE(receive, NULL, last, ip6,
+ last, uh);
+ if (udp6_append(last, n, off, &fromsa))
+ goto inp_lost;
INP_RUNLOCK(last);
}
}
@@ -377,17 +402,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
goto badheadlocked;
}
INP_RLOCK(last);
- INP_INFO_RUNLOCK(&V_udbinfo);
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- udp6_append(last, m, off, &fromsa);
- } else {
- /*
- * Engage the tunneling protocol.
- */
- (*up->u_tun_func)(m, off, last);
- }
- INP_RUNLOCK(last);
+ INP_INFO_RUNLOCK(pcbinfo);
+ UDP_PROBE(receive, NULL, last, ip6, last, uh);
+ if (udp6_append(last, m, off, &fromsa) == 0)
+ INP_RUNLOCK(last);
+ inp_lost:
return (IPPROTO_DONE);
}
/*
@@ -407,8 +426,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* Transparently forwarded. Pretend to be the destination.
* Already got one like this?
*/
- inp = in6_pcblookup_mbuf(&V_udbinfo,
- &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
+ uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
if (!inp) {
/*
@@ -416,7 +435,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* Because we've rewritten the destination address,
* any hardware-generated hash is ignored.
*/
- inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src,
+ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
uh->uh_sport, &next_hop6->sin6_addr,
next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
uh->uh_dport, INPLOOKUP_WILDCARD |
@@ -426,7 +445,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP6_NEXTHOP;
} else
- inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src,
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif, m);
@@ -457,28 +476,29 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
}
INP_RLOCK_ASSERT(inp);
up = intoudpcb(inp);
- if (up->u_tun_func == NULL) {
- udp6_append(inp, m, off, &fromsa);
- } else {
- /*
- * Engage the tunneling protocol.
- */
-
- (*up->u_tun_func)(m, off, inp);
+ if (cscov_partial) {
+ if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
}
- INP_RUNLOCK(inp);
+ UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
+ if (udp6_append(inp, m, off, &fromsa) == 0)
+ INP_RUNLOCK(inp);
return (IPPROTO_DONE);
badheadlocked:
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK(pcbinfo);
badunlocked:
if (m)
m_freem(m);
return (IPPROTO_DONE);
}
-void
-udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+static void
+udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
+ struct inpcbinfo *pcbinfo)
{
struct udphdr uh;
struct ip6_hdr *ip6;
@@ -534,14 +554,51 @@ udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
bzero(&uh, sizeof(uh));
m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
- (void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport,
+ if (!PRC_IS_REDIRECT(cmd)) {
+ /* Check to see if its tunneled */
+ struct inpcb *inp;
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst,
+ uh.uh_dport, &ip6->ip6_src, uh.uh_sport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
+ m->m_pkthdr.rcvif, m);
+ if (inp != NULL) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ if (up->u_icmp_func) {
+ /* Yes it is. */
+ INP_RUNLOCK(inp);
+ (*up->u_icmp_func)(cmd, (struct sockaddr *)ip6cp->ip6c_src,
+ d, up->u_tun_ctx);
+ return;
+ } else {
+ /* Can't find it. */
+ INP_RUNLOCK(inp);
+ }
+ }
+ }
+ (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
(struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
cmdarg, notify);
} else
- (void) in6_pcbnotify(&V_udbinfo, sa, 0,
+ (void)in6_pcbnotify(pcbinfo, sa, 0,
(const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
}
+void
+udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+ return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
+}
+
+void
+udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+ return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
+}
+
static int
udp6_getcred(SYSCTL_HANDLER_ARGS)
{
@@ -598,10 +655,12 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
struct udphdr *udp6;
struct in6_addr *laddr, *faddr, in6a;
struct sockaddr_in6 *sin6 = NULL;
- struct ifnet *oifp = NULL;
+ int cscov_partial = 0;
int scope_ambiguous = 0;
u_short fport;
int error = 0;
+ uint8_t nxt;
+ uint16_t cscov = 0;
struct ip6_pktopts *optp, opt;
int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
int flags;
@@ -632,9 +691,11 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
return (error);
}
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
if (control) {
if ((error = ip6_setpktopts(control, &opt,
- inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0)
+ inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
goto release;
optp = &opt;
} else
@@ -644,8 +705,6 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
faddr = &sin6->sin6_addr;
/*
- * IPv4 version of udp_output calls in_pcbconnect in this case,
- * which needs splnet and affects performance.
* Since we saw no essential reason for calling in_pcbconnect,
* we get rid of such kind of logic, and call in6_selectsrc
* and in6_pcbsetport in order to fill in the local address
@@ -695,15 +754,10 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
}
if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
- error = in6_selectsrc(sin6, optp, inp, NULL,
- td->td_ucred, &oifp, &in6a);
+ error = in6_selectsrc_socket(sin6, optp, inp,
+ td->td_ucred, scope_ambiguous, &in6a, NULL);
if (error)
goto release;
- if (oifp && scope_ambiguous &&
- (error = in6_setscope(&sin6->sin6_addr,
- oifp, NULL))) {
- goto release;
- }
laddr = &in6a;
} else
laddr = &inp->in6p_laddr; /* XXX */
@@ -751,8 +805,8 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
* Calculate data length and get a mbuf
* for UDP and IP6 headers.
*/
- M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
- if (m == 0) {
+ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
+ if (m == NULL) {
error = ENOBUFS;
goto release;
}
@@ -763,7 +817,20 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
udp6->uh_dport = fport;
- if (plen <= 0xffff)
+ if (nxt == IPPROTO_UDPLITE) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ cscov = up->u_txcslen;
+ if (cscov >= plen)
+ cscov = 0;
+ udp6->uh_ulen = htons(cscov);
+ /*
+ * For UDP-Lite, checksum coverage length of zero means
+ * the entire UDPLite packet is covered by the checksum.
+ */
+ cscov_partial = (cscov == 0) ? 0 : 1;
+ } else if (plen <= 0xffff)
udp6->uh_ulen = htons((u_short)plen);
else
udp6->uh_ulen = 0;
@@ -775,23 +842,66 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
-#if 0 /* ip6_plen will be filled in ip6_output. */
ip6->ip6_plen = htons((u_short)plen);
-#endif
- ip6->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_nxt = nxt;
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
ip6->ip6_src = *laddr;
ip6->ip6_dst = *faddr;
- udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0);
- m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
- m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ if (cscov_partial) {
+ if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
+ sizeof(struct ip6_hdr), plen, cscov)) == 0)
+ udp6->uh_sum = 0xffff;
+ } else {
+ udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+
+#ifdef RSS
+ {
+ uint32_t hash_val, hash_type;
+ uint8_t pr;
+ pr = inp->inp_socket->so_proto->pr_protocol;
+ /*
+ * Calculate an appropriate RSS hash for UDP and
+ * UDP Lite.
+ *
+ * The called function will take care of figuring out
+ * whether a 2-tuple or 4-tuple hash is required based
+ * on the currently configured scheme.
+ *
+ * Later later on connected socket values should be
+ * cached in the inpcb and reused, rather than constantly
+ * re-calculating it.
+ *
+ * UDP Lite is a different protocol number and will
+ * likely end up being hashed as a 2-tuple until
+ * RSS / NICs grow UDP Lite protocol awareness.
+ */
+ if (rss_proto_software_hash_v6(faddr, laddr, fport,
+ inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
+ }
+ }
+#endif
flags = 0;
+#ifdef RSS
+ /*
+ * Don't override with the inp cached flowid.
+ *
+ * Until the whole UDP path is vetted, it may actually
+ * be incorrect.
+ */
+ flags |= IP_NODEFAULTFLOWID;
+#endif
+ UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
UDPSTAT_INC(udps_opackets);
- error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
- NULL, inp);
+ error = ip6_output(m, optp, NULL, flags,
+ inp->in6p_moptions, NULL, inp);
break;
case AF_INET:
error = EAFNOSUPPORT;
@@ -814,26 +924,32 @@ static void
udp6_abort(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
+ INP_WLOCK(inp);
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
+ INP_WUNLOCK(inp);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
(*pru->pru_abort)(so);
return;
}
#endif
- INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -843,8 +959,10 @@ static int
udp6_attach(struct socket *so, int proto, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
@@ -853,10 +971,10 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
if (error)
return (error);
}
- INP_INFO_WLOCK(&V_udbinfo);
- error = in_pcballoc(so, &V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
+ error = in_pcballoc(so, pcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
inp = (struct inpcb *)so->so_pcb;
@@ -877,11 +995,11 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
if (error) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (0);
}
@@ -889,13 +1007,15 @@ static int
udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
INP_WLOCK(inp);
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -923,7 +1043,7 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
#ifdef INET
out:
#endif
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
}
@@ -932,25 +1052,31 @@ static void
udp6_close(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
+ INP_WLOCK(inp);
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
+ INP_WUNLOCK(inp);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
(*pru->pru_disconnect)(so);
return;
}
#endif
- INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -960,9 +1086,11 @@ static int
udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct sockaddr_in6 *sin6;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
sin6 = (struct sockaddr_in6 *)nam;
KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
@@ -989,10 +1117,10 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
if (error != 0)
goto out;
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
goto out;
@@ -1007,9 +1135,9 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
if (error != 0)
goto out;
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in6_pcbconnect(inp, nam, td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
out:
@@ -1021,18 +1149,20 @@ static void
udp6_detach(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
INP_WLOCK(inp);
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: up == NULL", __func__));
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
udp_discardcb(up);
}
@@ -1040,32 +1170,37 @@ static int
udp6_disconnect(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
+ INP_WLOCK(inp);
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
+ INP_WUNLOCK(inp);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
(void)(*pru->pru_disconnect)(so);
return (0);
}
#endif
- INP_WLOCK(inp);
-
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = ENOTCONN;
goto out;
}
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
@@ -1079,8 +1214,10 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error = 0;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
@@ -1099,9 +1236,9 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
int hasv4addr;
- struct sockaddr_in6 *sin6 = 0;
+ struct sockaddr_in6 *sin6 = NULL;
- if (addr == 0)
+ if (addr == NULL)
hasv4addr = (inp->inp_vflag & INP_IPV4);
else {
sin6 = (struct sockaddr_in6 *)addr;
@@ -1110,7 +1247,10 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
}
if (hasv4addr) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
+ nxt = (inp->inp_socket->so_proto->pr_protocol ==
+ IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
/*
* XXXRW: We release UDP-layer locks before calling
* udp_send() in order to avoid recursion. However,
@@ -1122,7 +1262,7 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
INP_WUNLOCK(inp);
if (sin6)
in6_sin6_2_sin_in_sock(addr);
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
/* addr will just be freed in sendit(). */
return ((*pru->pru_send)(so, flags, m, addr, control,
td));
@@ -1132,11 +1272,9 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
#ifdef MAC
mac_inpcb_create_mbuf(inp, m);
#endif
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = udp6_output(inp, m, addr, control, td);
- INP_HASH_WUNLOCK(&V_udbinfo);
-#ifdef INET
-#endif
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
diff --git a/freebsd/sys/netinet6/udp6_var.h b/freebsd/sys/netinet6/udp6_var.h
index ae53c5a8..cdab98b0 100644
--- a/freebsd/sys/netinet6/udp6_var.h
+++ b/freebsd/sys/netinet6/udp6_var.h
@@ -69,6 +69,7 @@ SYSCTL_DECL(_net_inet6_udp6);
extern struct pr_usrreqs udp6_usrreqs;
void udp6_ctlinput(int, struct sockaddr *, void *);
+void udplite6_ctlinput(int, struct sockaddr *, void *);
int udp6_input(struct mbuf **, int *, int);
#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm.h b/freebsd/sys/netpfil/ipfw/dn_aqm.h
new file mode 100644
index 00000000..d01e98eb
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_aqm.h
@@ -0,0 +1,167 @@
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * API for writing an Active Queue Management algorithm for Dummynet
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_AQM_H
+#define _IP_DN_AQM_H
+
+
+/* NOW is the current time in millisecond*/
+#define NOW ((dn_cfg.curr_time * tick) / 1000)
+
+#define AQM_UNOW (dn_cfg.curr_time * tick)
+#define AQM_TIME_1US ((aqm_time_t)(1))
+#define AQM_TIME_1MS ((aqm_time_t)(1000))
+#define AQM_TIME_1S ((aqm_time_t)(AQM_TIME_1MS * 1000))
+
+/* aqm time allows to store up to 4294 seconds */
+typedef uint32_t aqm_time_t;
+typedef int32_t aqm_stime_t;
+
+#define DN_AQM_MTAG_TS 55345
+
+/* Macro for variable bounding */
+#define BOUND_VAR(x,l,h) ((x) > (h)? (h) : ((x) > (l)? (x) : (l)))
+
+/* sysctl variable to count number of dropped packets */
+extern unsigned long io_pkt_drop;
+
+/*
+ * Structure for holding data and function pointers that together represent a
+ * AQM algorithm.
+ */
+ struct dn_aqm {
+#define DN_AQM_NAME_MAX 50
+ char name[DN_AQM_NAME_MAX]; /* name of AQM algorithm */
+ uint32_t type; /* AQM type number */
+
+ /* Methods implemented by AQM algorithm:
+ *
+ * enqueue enqueue packet 'm' on queue 'q'.
+ * Return 0 on success, 1 on drop.
+ *
+ * dequeue dequeue a packet from queue 'q'.
+ * Return a packet, NULL if no packet available.
+ *
+ * config configure AQM algorithm
+ * If required, this function should allocate space to store
+ * the configurations and set 'fs->aqmcfg' to point to this space.
+ * 'dn_extra_parms' includes array of parameters send
+ * from ipfw userland command.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * deconfig deconfigure AQM algorithm.
+ * The allocated configuration memory space should be freed here.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * init initialise AQM status variables of queue 'q'
+ * This function is used to allocate space and init AQM status for a
+ * queue and q->aqm_status to point to this space.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * cleanup cleanup AQM status variables of queue 'q'
+ * The allocated memory space for AQM status should be freed here.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * getconfig retrieve AQM configurations
+ * This function is used to return AQM parameters to userland
+ * command. The function should fill 'dn_extra_parms' struct with
+ * the AQM configurations using 'par' array.
+ *
+ */
+
+ int (*enqueue)(struct dn_queue *, struct mbuf *);
+ struct mbuf * (*dequeue)(struct dn_queue *);
+ int (*config)(struct dn_fsk *, struct dn_extra_parms *ep, int);
+ int (*deconfig)(struct dn_fsk *);
+ int (*init)(struct dn_queue *);
+ int (*cleanup)(struct dn_queue *);
+ int (*getconfig)(struct dn_fsk *, struct dn_extra_parms *);
+
+ int ref_count; /*Number of queues instances in the system */
+ int cfg_ref_count; /*Number of AQM instances in the system */
+ SLIST_ENTRY (dn_aqm) next; /* Next AQM in the list */
+};
+
+/* Helper function to update queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+update_stats(struct dn_queue *q, int len, int drop)
+{
+ int inc = 0;
+ struct dn_flow *sni;
+ struct dn_flow *qni;
+
+ sni = &q->_si->ni;
+ qni = &q->ni;
+
+ if (len < 0)
+ inc = -1;
+ else if(len > 0)
+ inc = 1;
+
+ if (drop) {
+ qni->drops++;
+ sni->drops++;
+ io_pkt_drop++;
+ } else {
+ /*update queue stats */
+ qni->length += inc;
+ qni->len_bytes += len;
+
+ /*update scheduler instance stats */
+ sni->length += inc;
+ sni->len_bytes += len;
+ }
+ /* tot_pkts is updated in dn_enqueue function */
+}
+
+
+/* kernel module related function */
+int
+dn_aqm_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_DNAQM_MODULE(name, dnaqm) \
+ static moduledata_t name##_mod = { \
+ #name, dn_aqm_modevent, dnaqm \
+ }; \
+ DECLARE_MODULE(name, name##_mod, \
+ SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \
+ MODULE_DEPEND(name, dummynet, 3, 3, 3)
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h b/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h
new file mode 100644
index 00000000..f5618e76
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h
@@ -0,0 +1,222 @@
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD$
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols@pollere.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * o The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_CODEL_H
+#define _IP_DN_AQM_CODEL_H
+
+
+// XXX How to choose MTAG?
+#define FIX_POINT_BITS 16
+
+enum {
+ CODEL_ECN_ENABLED = 1
+};
+
+/* Codel parameters */
+struct dn_aqm_codel_parms {
+ aqm_time_t target;
+ aqm_time_t interval;
+ uint32_t flags;
+};
+
+/* codel status variables */
+struct codel_status {
+ uint32_t count; /* number of dropped pkts since entering drop state */
+ uint16_t dropping; /* dropping state */
+ aqm_time_t drop_next_time; /* time for next drop */
+ aqm_time_t first_above_time; /* time for first ts over target we observed */
+ uint16_t isqrt; /* last isqrt for control low */
+ uint16_t maxpkt_size; /* max packet size seen so far */
+};
+
+struct mbuf *codel_extract_head(struct dn_queue *, aqm_time_t *);
+aqm_time_t control_law(struct codel_status *,
+ struct dn_aqm_codel_parms *, aqm_time_t );
+
+__inline static struct mbuf *
+codel_dodequeue(struct dn_queue *q, aqm_time_t now, uint16_t *ok_to_drop)
+{
+ struct mbuf * m;
+ struct dn_aqm_codel_parms *cprms;
+ struct codel_status *cst;
+ aqm_time_t pkt_ts, sojourn_time;
+
+ *ok_to_drop = 0;
+ m = codel_extract_head(q, &pkt_ts);
+
+ cst = q->aqm_status;
+
+ if (m == NULL) {
+ /* queue is empty - we can't be above target */
+ cst->first_above_time= 0;
+ return m;
+ }
+
+ cprms = q->fs->aqmcfg;
+
+ /* To span a large range of bandwidths, CoDel runs two
+ * different AQMs in parallel. One is sojourn-time-based
+ * and takes effect when the time to send an MTU-sized
+ * packet is less than target. The 1st term of the "if"
+ * below does this. The other is backlog-based and takes
+ * effect when the time to send an MTU-sized packet is >=
+ * target. The goal here is to keep the output link
+ * utilization high by never allowing the queue to get
+ * smaller than the amount that arrives in a typical
+ * interarrival time (MTU-sized packets arriving spaced
+ * by the amount of time it takes to send such a packet on
+ * the bottleneck). The 2nd term of the "if" does this.
+ */
+ sojourn_time = now - pkt_ts;
+ if (sojourn_time < cprms->target || q->ni.len_bytes <= cst->maxpkt_size) {
+ /* went below - stay below for at least interval */
+ cst->first_above_time = 0;
+ } else {
+ if (cst->first_above_time == 0) {
+ /* just went above from below. if still above at
+ * first_above_time, will say it's ok to drop. */
+ cst->first_above_time = now + cprms->interval;
+ } else if (now >= cst->first_above_time) {
+ *ok_to_drop = 1;
+ }
+ }
+ return m;
+}
+
+/*
+ * Dequeue a packet from queue 'q'
+ */
+__inline static struct mbuf *
+codel_dequeue(struct dn_queue *q)
+{
+ struct mbuf *m;
+ struct dn_aqm_codel_parms *cprms;
+ struct codel_status *cst;
+ aqm_time_t now;
+ uint16_t ok_to_drop;
+
+ cst = q->aqm_status;;
+ cprms = q->fs->aqmcfg;
+ now = AQM_UNOW;
+
+ m = codel_dodequeue(q, now, &ok_to_drop);
+ if (cst->dropping) {
+ if (!ok_to_drop) {
+ /* sojourn time below target - leave dropping state */
+ cst->dropping = false;
+ }
+ /*
+ * Time for the next drop. Drop current packet and dequeue
+ * next. If the dequeue doesn't take us out of dropping
+ * state, schedule the next drop. A large backlog might
+ * result in drop rates so high that the next drop should
+ * happen now, hence the 'while' loop.
+ */
+ while (now >= cst->drop_next_time && cst->dropping) {
+
+ /* mark the packet */
+ if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+ cst->count++;
+ /* schedule the next mark. */
+ cst->drop_next_time = control_law(cst, cprms,
+ cst->drop_next_time);
+ return m;
+ }
+
+ /* drop the packet */
+ update_stats(q, 0, 1);
+ FREE_PKT(m);
+ m = codel_dodequeue(q, now, &ok_to_drop);
+
+ if (!ok_to_drop) {
+ /* leave dropping state */
+ cst->dropping = false;
+ } else {
+ cst->count++;
+ /* schedule the next drop. */
+ cst->drop_next_time = control_law(cst, cprms,
+ cst->drop_next_time);
+ }
+ }
+ /* If we get here we're not in dropping state. The 'ok_to_drop'
+ * return from dodequeue means that the sojourn time has been
+ * above 'target' for 'interval' so enter dropping state.
+ */
+ } else if (ok_to_drop) {
+
+ /* if ECN option is disabled or the packet cannot be marked,
+ * drop the packet and extract another.
+ */
+ if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+ update_stats(q, 0, 1);
+ FREE_PKT(m);
+ m = codel_dodequeue(q, now, &ok_to_drop);
+ }
+
+ cst->dropping = true;
+
+ /* If min went above target close to when it last went
+ * below, assume that the drop rate that controlled the
+ * queue on the last cycle is a good starting point to
+ * control it now. ('drop_next' will be at most 'interval'
+ * later than the time of the last drop so 'now - drop_next'
+ * is a good approximation of the time from the last drop
+ * until now.)
+ */
+ cst->count = (cst->count > 2 && ((aqm_stime_t)now -
+ (aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)?
+ cst->count - 2 : 1;
+ /* we don't have to set initial guess for Newton's method isqrt as
+ * we initilaize isqrt in control_law function when count == 1 */
+ cst->drop_next_time = control_law(cst, cprms, now);
+ }
+
+ return m;
+}
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h b/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h
new file mode 100644
index 00000000..aa2fceba
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h
@@ -0,0 +1,153 @@
+/*
+ * PIE - Proportional Integral controller Enhanced AQM algorithm.
+ *
+ * $FreeBSD$
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_PIE_H
+#define _IP_DN_AQM_PIE_H
+
+#define DN_AQM_PIE 2
+#define PIE_DQ_THRESHOLD_BITS 14
+/* 2^14 =16KB */
+#define PIE_DQ_THRESHOLD (1UL << PIE_DQ_THRESHOLD_BITS)
+#define MEAN_PKTSIZE 800
+
+/* 31-bits because random() generates range from 0->(2**31)-1 */
+#define PIE_PROB_BITS 31
+#define PIE_MAX_PROB ((1ULL<<PIE_PROB_BITS) -1)
+
+/* for 16-bits, we have 3-bits for integer part and 13-bits for fraction */
+#define PIE_FIX_POINT_BITS 13
+#define PIE_SCALE (1UL<<PIE_FIX_POINT_BITS)
+
+
+/* PIE options */
+enum {
+ PIE_ECN_ENABLED =1,
+ PIE_CAPDROP_ENABLED = 2,
+ PIE_ON_OFF_MODE_ENABLED = 4,
+ PIE_DEPRATEEST_ENABLED = 8,
+ PIE_DERAND_ENABLED = 16
+};
+
+/* PIE parameters */
+struct dn_aqm_pie_parms {
+ aqm_time_t qdelay_ref; /* AQM Latency Target (default: 15ms) */
+ aqm_time_t tupdate; /* a period to calculate drop probability (default:15ms) */
+ aqm_time_t max_burst; /* AQM Max Burst Allowance (default: 150ms) */
+ uint16_t max_ecnth; /*AQM Max ECN Marking Threshold (default: 10%) */
+ uint16_t alpha; /* (default: 1/8) */
+ uint16_t beta; /* (default: 1+1/4) */
+ uint32_t flags; /* PIE options */
+};
+
+/* PIE status variables */
+struct pie_status{
+ struct callout aqm_pie_callout;
+ aqm_time_t burst_allowance;
+ uint32_t drop_prob;
+ aqm_time_t current_qdelay;
+ aqm_time_t qdelay_old;
+ uint64_t accu_prob;
+ aqm_time_t measurement_start;
+ aqm_time_t avg_dq_time;
+ uint32_t dq_count;
+ uint32_t sflags;
+ struct dn_aqm_pie_parms *parms; /* pointer to PIE configurations */
+ /* pointer to parent queue of FQ-PIE sub-queues, or queue of owner fs. */
+ struct dn_queue *pq;
+ struct mtx lock_mtx;
+ uint32_t one_third_q_size; /* 1/3 of queue size, for speed optization */
+};
+
+enum {
+ ENQUE = 1,
+ DROP,
+ MARKECN
+};
+
+/* PIE current state */
+enum {
+ PIE_ACTIVE = 1,
+ PIE_INMEASUREMENT = 2
+};
+
+/*
+ * Check if eneque should drop packet to control delay or not based on
+ * PIe algorithm.
+ * return DROP if it is time to drop or ENQUE otherwise.
+ * This function is used by PIE and FQ-PIE.
+ */
+__inline static int
+drop_early(struct pie_status *pst, uint32_t qlen)
+{
+ struct dn_aqm_pie_parms *pprms;
+
+ pprms = pst->parms;
+
+ /* queue is not congested */
+
+ if ((pst->qdelay_old < (pprms->qdelay_ref >> 1)
+ && pst->drop_prob < PIE_MAX_PROB / 5 )
+ || qlen <= 2 * MEAN_PKTSIZE)
+ return ENQUE;
+
+
+ if (pst->drop_prob == 0)
+ pst->accu_prob = 0;
+
+ /* increment accu_prob */
+ if (pprms->flags & PIE_DERAND_ENABLED)
+ pst->accu_prob += pst->drop_prob;
+
+ /* De-randomize option
+ * if accu_prob < 0.85 -> enqueue
+ * if accu_prob>8.5 ->drop
+ * between 0.85 and 8.5 || !De-randomize --> drop on prob
+ *
+ * (0.85 = 17/20 ,8.5 = 17/2)
+ */
+ if (pprms->flags & PIE_DERAND_ENABLED) {
+ if(pst->accu_prob < (uint64_t) (PIE_MAX_PROB * 17 / 20))
+ return ENQUE;
+ if( pst->accu_prob >= (uint64_t) (PIE_MAX_PROB * 17 / 2))
+ return DROP;
+ }
+
+ if (random() < pst->drop_prob) {
+ pst->accu_prob = 0;
+ return DROP;
+ }
+
+ return ENQUE;
+}
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_heap.c b/freebsd/sys/netpfil/ipfw/dn_heap.c
deleted file mode 100644
index 15e2870d..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_heap.c
+++ /dev/null
@@ -1,554 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Binary heap and hash tables, used in dummynet
- *
- * $FreeBSD$
- */
-
-#include <sys/cdefs.h>
-#include <rtems/bsd/sys/param.h>
-#ifdef _KERNEL
-__FBSDID("$FreeBSD$");
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <netpfil/ipfw/dn_heap.h>
-#ifndef log
-#define log(x, arg...)
-#endif
-
-#else /* !_KERNEL */
-
-#include <stdio.h>
-#include <dn_test.h>
-#include <strings.h>
-#include <stdlib.h>
-
-#include "dn_heap.h"
-#define log(x, arg...) fprintf(stderr, ## arg)
-#define panic(x...) fprintf(stderr, ## x), exit(1)
-#define MALLOC_DEFINE(a, b, c)
-static void *my_malloc(int s) { return malloc(s); }
-static void my_free(void *p) { free(p); }
-#define malloc(s, t, w) my_malloc(s)
-#define free(p, t) my_free(p)
-#endif /* !_KERNEL */
-
-MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
-
-/*
- * Heap management functions.
- *
- * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
- * Some macros help finding parent/children so we can optimize them.
- *
- * heap_init() is called to expand the heap when needed.
- * Increment size in blocks of 16 entries.
- * Returns 1 on error, 0 on success
- */
-#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
-#define HEAP_LEFT(x) ( (x)+(x) + 1 )
-#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
-#define HEAP_INCREMENT 15
-
-static int
-heap_resize(struct dn_heap *h, unsigned int new_size)
-{
- struct dn_heap_entry *p;
-
- if (h->size >= new_size ) /* have enough room */
- return 0;
-#if 1 /* round to the next power of 2 */
- new_size |= new_size >> 1;
- new_size |= new_size >> 2;
- new_size |= new_size >> 4;
- new_size |= new_size >> 8;
- new_size |= new_size >> 16;
-#else
- new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT;
-#endif
- p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT);
- if (p == NULL) {
- printf("--- %s, resize %d failed\n", __func__, new_size );
- return 1; /* error */
- }
- if (h->size > 0) {
- bcopy(h->p, p, h->size * sizeof(*p) );
- free(h->p, M_DN_HEAP);
- }
- h->p = p;
- h->size = new_size;
- return 0;
-}
-
-int
-heap_init(struct dn_heap *h, int size, int ofs)
-{
- if (heap_resize(h, size))
- return 1;
- h->elements = 0;
- h->ofs = ofs;
- return 0;
-}
-
-/*
- * Insert element in heap. Normally, p != NULL, we insert p in
- * a new position and bubble up. If p == NULL, then the element is
- * already in place, and key is the position where to start the
- * bubble-up.
- * Returns 1 on failure (cannot allocate new heap entry)
- *
- * If ofs > 0 the position (index, int) of the element in the heap is
- * also stored in the element itself at the given offset in bytes.
- */
-#define SET_OFFSET(h, i) do { \
- if (h->ofs > 0) \
- *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = i; \
- } while (0)
-/*
- * RESET_OFFSET is used for sanity checks. It sets ofs
- * to an invalid value.
- */
-#define RESET_OFFSET(h, i) do { \
- if (h->ofs > 0) \
- *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = -16; \
- } while (0)
-
-int
-heap_insert(struct dn_heap *h, uint64_t key1, void *p)
-{
- int son = h->elements;
-
- //log("%s key %llu p %p\n", __FUNCTION__, key1, p);
- if (p == NULL) { /* data already there, set starting point */
- son = key1;
- } else { /* insert new element at the end, possibly resize */
- son = h->elements;
- if (son == h->size) /* need resize... */
- // XXX expand by 16 or so
- if (heap_resize(h, h->elements+16) )
- return 1; /* failure... */
- h->p[son].object = p;
- h->p[son].key = key1;
- h->elements++;
- }
- /* make sure that son >= father along the path */
- while (son > 0) {
- int father = HEAP_FATHER(son);
- struct dn_heap_entry tmp;
-
- if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
- break; /* found right position */
- /* son smaller than father, swap and repeat */
- HEAP_SWAP(h->p[son], h->p[father], tmp);
- SET_OFFSET(h, son);
- son = father;
- }
- SET_OFFSET(h, son);
- return 0;
-}
-
-/*
- * remove top element from heap, or obj if obj != NULL
- */
-void
-heap_extract(struct dn_heap *h, void *obj)
-{
- int child, father, max = h->elements - 1;
-
- if (max < 0) {
- printf("--- %s: empty heap 0x%p\n", __FUNCTION__, h);
- return;
- }
- if (obj == NULL)
- father = 0; /* default: move up smallest child */
- else { /* extract specific element, index is at offset */
- if (h->ofs <= 0)
- panic("%s: extract from middle not set on %p\n",
- __FUNCTION__, h);
- father = *((int *)((char *)obj + h->ofs));
- if (father < 0 || father >= h->elements) {
- panic("%s: father %d out of bound 0..%d\n",
- __FUNCTION__, father, h->elements);
- }
- }
- /*
- * below, father is the index of the empty element, which
- * we replace at each step with the smallest child until we
- * reach the bottom level.
- */
- // XXX why removing RESET_OFFSET increases runtime by 10% ?
- RESET_OFFSET(h, father);
- while ( (child = HEAP_LEFT(father)) <= max ) {
- if (child != max &&
- DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
- child++; /* take right child, otherwise left */
- h->p[father] = h->p[child];
- SET_OFFSET(h, father);
- father = child;
- }
- h->elements--;
- if (father != max) {
- /*
- * Fill hole with last entry and bubble up,
- * reusing the insert code
- */
- h->p[father] = h->p[max];
- heap_insert(h, father, NULL);
- }
-}
-
-#if 0
-/*
- * change object position and update references
- * XXX this one is never used!
- */
-static void
-heap_move(struct dn_heap *h, uint64_t new_key, void *object)
-{
- int temp, i, max = h->elements-1;
- struct dn_heap_entry *p, buf;
-
- if (h->ofs <= 0)
- panic("cannot move items on this heap");
- p = h->p; /* shortcut */
-
- i = *((int *)((char *)object + h->ofs));
- if (DN_KEY_LT(new_key, p[i].key) ) { /* must move up */
- p[i].key = new_key;
- for (; i>0 &&
- DN_KEY_LT(new_key, p[(temp = HEAP_FATHER(i))].key);
- i = temp ) { /* bubble up */
- HEAP_SWAP(p[i], p[temp], buf);
- SET_OFFSET(h, i);
- }
- } else { /* must move down */
- p[i].key = new_key;
- while ( (temp = HEAP_LEFT(i)) <= max ) {
- /* found left child */
- if (temp != max &&
- DN_KEY_LT(p[temp+1].key, p[temp].key))
- temp++; /* select child with min key */
- if (DN_KEY_LT(>p[temp].key, new_key)) {
- /* go down */
- HEAP_SWAP(p[i], p[temp], buf);
- SET_OFFSET(h, i);
- } else
- break;
- i = temp;
- }
- }
- SET_OFFSET(h, i);
-}
-#endif /* heap_move, unused */
-
-/*
- * heapify() will reorganize data inside an array to maintain the
- * heap property. It is needed when we delete a bunch of entries.
- */
-static void
-heapify(struct dn_heap *h)
-{
- int i;
-
- for (i = 0; i < h->elements; i++ )
- heap_insert(h, i , NULL);
-}
-
-int
-heap_scan(struct dn_heap *h, int (*fn)(void *, uintptr_t),
- uintptr_t arg)
-{
- int i, ret, found;
-
- for (i = found = 0 ; i < h->elements ;) {
- ret = fn(h->p[i].object, arg);
- if (ret & HEAP_SCAN_DEL) {
- h->elements-- ;
- h->p[i] = h->p[h->elements] ;
- found++ ;
- } else
- i++ ;
- if (ret & HEAP_SCAN_END)
- break;
- }
- if (found)
- heapify(h);
- return found;
-}
-
-/*
- * cleanup the heap and free data structure
- */
-void
-heap_free(struct dn_heap *h)
-{
- if (h->size >0 )
- free(h->p, M_DN_HEAP);
- bzero(h, sizeof(*h) );
-}
-
-/*
- * hash table support.
- */
-
-struct dn_ht {
- int buckets; /* how many buckets, really buckets - 1*/
- int entries; /* how many entries */
- int ofs; /* offset of link field */
- uint32_t (*hash)(uintptr_t, int, void *arg);
- int (*match)(void *_el, uintptr_t key, int, void *);
- void *(*newh)(uintptr_t, int, void *);
- void **ht; /* bucket heads */
-};
-/*
- * Initialize, allocating bucket pointers inline.
- * Recycle previous record if possible.
- * If the 'newh' function is not supplied, we assume that the
- * key passed to ht_find is the same object to be stored in.
- */
-struct dn_ht *
-dn_ht_init(struct dn_ht *ht, int buckets, int ofs,
- uint32_t (*h)(uintptr_t, int, void *),
- int (*match)(void *, uintptr_t, int, void *),
- void *(*newh)(uintptr_t, int, void *))
-{
- int l;
-
- /*
- * Notes about rounding bucket size to a power of two.
- * Given the original bucket size, we compute the nearest lower and
- * higher power of two, minus 1 (respectively b_min and b_max) because
- * this value will be used to do an AND with the index returned
- * by hash function.
- * To choice between these two values, the original bucket size is
- * compared with b_min. If the original size is greater than 4/3 b_min,
- * we round the bucket size to b_max, else to b_min.
- * This ratio try to round to the nearest power of two, advantaging
- * the greater size if the different between two power is relatively
- * big.
- * Rounding the bucket size to a power of two avoid the use of
- * module when calculating the correct bucket.
- * The ht->buckets variable store the bucket size - 1 to simply
- * do an AND between the index returned by hash function and ht->bucket
- * instead of a module.
- */
- int b_min; /* min buckets */
- int b_max; /* max buckets */
- int b_ori; /* original buckets */
-
- if (h == NULL || match == NULL) {
- printf("--- missing hash or match function");
- return NULL;
- }
- if (buckets < 1 || buckets > 65536)
- return NULL;
-
- b_ori = buckets;
- /* calculate next power of 2, - 1*/
- buckets |= buckets >> 1;
- buckets |= buckets >> 2;
- buckets |= buckets >> 4;
- buckets |= buckets >> 8;
- buckets |= buckets >> 16;
-
- b_max = buckets; /* Next power */
- b_min = buckets >> 1; /* Previous power */
-
- /* Calculate the 'nearest' bucket size */
- if (b_min * 4000 / 3000 < b_ori)
- buckets = b_max;
- else
- buckets = b_min;
-
- if (ht) { /* see if we can reuse */
- if (buckets <= ht->buckets) {
- ht->buckets = buckets;
- } else {
- /* free pointers if not allocated inline */
- if (ht->ht != (void *)(ht + 1))
- free(ht->ht, M_DN_HEAP);
- free(ht, M_DN_HEAP);
- ht = NULL;
- }
- }
- if (ht == NULL) {
- /* Allocate buckets + 1 entries because buckets is use to
- * do the AND with the index returned by hash function
- */
- l = sizeof(*ht) + (buckets + 1) * sizeof(void **);
- ht = malloc(l, M_DN_HEAP, M_NOWAIT | M_ZERO);
- }
- if (ht) {
- ht->ht = (void **)(ht + 1);
- ht->buckets = buckets;
- ht->ofs = ofs;
- ht->hash = h;
- ht->match = match;
- ht->newh = newh;
- }
- return ht;
-}
-
-/* dummy callback for dn_ht_free to unlink all */
-static int
-do_del(void *obj, void *arg)
-{
- return DNHT_SCAN_DEL;
-}
-
-void
-dn_ht_free(struct dn_ht *ht, int flags)
-{
- if (ht == NULL)
- return;
- if (flags & DNHT_REMOVE) {
- (void)dn_ht_scan(ht, do_del, NULL);
- } else {
- if (ht->ht && ht->ht != (void *)(ht + 1))
- free(ht->ht, M_DN_HEAP);
- free(ht, M_DN_HEAP);
- }
-}
-
-int
-dn_ht_entries(struct dn_ht *ht)
-{
- return ht ? ht->entries : 0;
-}
-
-/* lookup and optionally create or delete element */
-void *
-dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg)
-{
- int i;
- void **pp, *p;
-
- if (ht == NULL) /* easy on an empty hash */
- return NULL;
- i = (ht->buckets == 1) ? 0 :
- (ht->hash(key, flags, arg) & ht->buckets);
-
- for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) {
- if (flags & DNHT_MATCH_PTR) {
- if (key == (uintptr_t)p)
- break;
- } else if (ht->match(p, key, flags, arg)) /* found match */
- break;
- }
- if (p) {
- if (flags & DNHT_REMOVE) {
- /* link in the next element */
- *pp = *(void **)((char *)p + ht->ofs);
- *(void **)((char *)p + ht->ofs) = NULL;
- ht->entries--;
- }
- } else if (flags & DNHT_INSERT) {
- // printf("%s before calling new, bucket %d ofs %d\n",
- // __FUNCTION__, i, ht->ofs);
- p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
- // printf("%s newh returns %p\n", __FUNCTION__, p);
- if (p) {
- ht->entries++;
- *(void **)((char *)p + ht->ofs) = ht->ht[i];
- ht->ht[i] = p;
- }
- }
- return p;
-}
-
-/*
- * do a scan with the option to delete the object. Extract next before
- * running the callback because the element may be destroyed there.
- */
-int
-dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg)
-{
- int i, ret, found = 0;
- void **curp, *cur, *next;
-
- if (ht == NULL || fn == NULL)
- return 0;
- for (i = 0; i <= ht->buckets; i++) {
- curp = &ht->ht[i];
- while ( (cur = *curp) != NULL) {
- next = *(void **)((char *)cur + ht->ofs);
- ret = fn(cur, arg);
- if (ret & DNHT_SCAN_DEL) {
- found++;
- ht->entries--;
- *curp = next;
- } else {
- curp = (void **)((char *)cur + ht->ofs);
- }
- if (ret & DNHT_SCAN_END)
- return found;
- }
- }
- return found;
-}
-
-/*
- * Similar to dn_ht_scan(), except that the scan is performed only
- * in the bucket 'bucket'. The function returns a correct bucket number if
- * the original is invalid.
- * If the callback returns DNHT_SCAN_END, the function move the ht->ht[i]
- * pointer to the last entry processed. Moreover, the bucket number passed
- * by caller is decremented, because usually the caller increment it.
- */
-int
-dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *),
- void *arg)
-{
- int i, ret, found = 0;
- void **curp, *cur, *next;
-
- if (ht == NULL || fn == NULL)
- return 0;
- if (*bucket > ht->buckets)
- *bucket = 0;
- i = *bucket;
-
- curp = &ht->ht[i];
- while ( (cur = *curp) != NULL) {
- next = *(void **)((char *)cur + ht->ofs);
- ret = fn(cur, arg);
- if (ret & DNHT_SCAN_DEL) {
- found++;
- ht->entries--;
- *curp = next;
- } else {
- curp = (void **)((char *)cur + ht->ofs);
- }
- if (ret & DNHT_SCAN_END)
- return found;
- }
- return found;
-}
diff --git a/freebsd/sys/netpfil/ipfw/dn_heap.h b/freebsd/sys/netpfil/ipfw/dn_heap.h
index c95473ad..cb6e03ef 100644
--- a/freebsd/sys/netpfil/ipfw/dn_heap.h
+++ b/freebsd/sys/netpfil/ipfw/dn_heap.h
@@ -83,7 +83,7 @@ enum {
* heap_insert() adds a key-pointer pair to the heap
*
* HEAP_TOP() returns a pointer to the top element of the heap,
- * but makes no checks on its existance (XXX should we change ?)
+ * but makes no checks on its existence (XXX should we change ?)
*
* heap_extract() removes the entry at the top, returing the pointer.
* (the key should have been read before).
@@ -146,7 +146,7 @@ int heap_scan(struct dn_heap *, int (*)(void *, uintptr_t), uintptr_t);
* of the dn_ht_find(), and of the callbacks:
*
* DNHT_KEY_IS_OBJ means the key is the object pointer.
- * It is usally of interest for the hash and match functions.
+ * It is usually of interest for the hash and match functions.
*
* DNHT_MATCH_PTR during a lookup, match pointers instead
* of calling match(). Normally used when removing specific
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched.h b/freebsd/sys/netpfil/ipfw/dn_sched.h
index ab823fe7..ab32771b 100644
--- a/freebsd/sys/netpfil/ipfw/dn_sched.h
+++ b/freebsd/sys/netpfil/ipfw/dn_sched.h
@@ -132,6 +132,10 @@ struct dn_alg {
int (*free_fsk)(struct dn_fsk *f);
int (*new_queue)(struct dn_queue *q);
int (*free_queue)(struct dn_queue *q);
+#ifdef NEW_AQM
+ /* Getting scheduler extra parameters */
+ int (*getconfig)(struct dn_schk *, struct dn_extra_parms *);
+#endif
/* run-time fields */
int ref_count; /* XXX number of instances in the system */
@@ -165,7 +169,13 @@ dn_dequeue(struct dn_queue *q)
struct mbuf *m = q->mq.head;
if (m == NULL)
return NULL;
+#ifdef NEW_AQM
+ /* Call AQM dequeue function */
+ if (q->fs->aqmfp && q->fs->aqmfp->dequeue )
+ return q->fs->aqmfp->dequeue(q);
+#endif
q->mq.head = m->m_nextpkt;
+ q->mq.count--;
/* Update stats for the queue */
q->ni.length--;
@@ -186,6 +196,6 @@ int dn_sched_modevent(module_t mod, int cmd, void *arg);
#name, dn_sched_modevent, dnsched \
}; \
DECLARE_MODULE(name, name##_mod, \
- SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \
- MODULE_DEPEND(name, dummynet, 3, 3, 3);
+ SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); \
+ MODULE_DEPEND(name, dummynet, 3, 3, 3)
#endif /* _DN_SCHED_H */
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c b/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c
deleted file mode 100644
index 154a7ac6..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_fifo.c
+++ /dev/null
@@ -1,122 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-/*
- * This file implements a FIFO scheduler for a single queue.
- * The queue is allocated as part of the scheduler instance,
- * and there is a single flowset is in the template which stores
- * queue size and policy.
- * Enqueue and dequeue use the default library functions.
- */
-static int
-fifo_enqueue(struct dn_sch_inst *si, struct dn_queue *q, struct mbuf *m)
-{
- /* XXX if called with q != NULL and m=NULL, this is a
- * re-enqueue from an existing scheduler, which we should
- * handle.
- */
- return dn_enqueue((struct dn_queue *)(si+1), m, 0);
-}
-
-static struct mbuf *
-fifo_dequeue(struct dn_sch_inst *si)
-{
- return dn_dequeue((struct dn_queue *)(si + 1));
-}
-
-static int
-fifo_new_sched(struct dn_sch_inst *si)
-{
- /* This scheduler instance contains the queue */
- struct dn_queue *q = (struct dn_queue *)(si + 1);
-
- set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
- q->_si = si;
- q->fs = si->sched->fs;
- return 0;
-}
-
-static int
-fifo_free_sched(struct dn_sch_inst *si)
-{
- struct dn_queue *q = (struct dn_queue *)(si + 1);
- dn_free_pkts(q->mq.head);
- bzero(q, sizeof(*q));
- return 0;
-}
-
-/*
- * FIFO scheduler descriptor
- * contains the type of the scheduler, the name, the size of extra
- * data structures, and function pointers.
- */
-static struct dn_alg fifo_desc = {
- _SI( .type = ) DN_SCHED_FIFO,
- _SI( .name = ) "FIFO",
- _SI( .flags = ) 0,
-
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct dn_queue),
- _SI( .q_datalen = ) 0,
-
- _SI( .enqueue = ) fifo_enqueue,
- _SI( .dequeue = ) fifo_dequeue,
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) fifo_new_sched,
- _SI( .free_sched = ) fifo_free_sched,
- _SI( .new_fsk = ) NULL,
- _SI( .free_fsk = ) NULL,
- _SI( .new_queue = ) NULL,
- _SI( .free_queue = ) NULL,
-};
-
-DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h
new file mode 100644
index 00000000..4b65781e
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h
@@ -0,0 +1,167 @@
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FQ_Codel Structures and helper functions
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_H
+#define _IP_DN_SCHED_FQ_CODEL_H
+
+/* list of queues */
+STAILQ_HEAD(fq_codel_list, fq_codel_flow) ;
+
+/* fq_codel parameters including codel */
+struct dn_sch_fq_codel_parms {
+ struct dn_aqm_codel_parms ccfg; /* CoDel Parameters */
+ /* FQ_CODEL Parameters */
+ uint32_t flows_cnt; /* number of flows */
+ uint32_t limit; /* hard limit of fq_codel queue size*/
+ uint32_t quantum;
+}; /* defaults */
+
+/* flow (sub-queue) stats */
+struct flow_stats {
+ uint64_t tot_pkts; /* statistics counters */
+ uint64_t tot_bytes;
+ uint32_t length; /* Queue length, in packets */
+ uint32_t len_bytes; /* Queue length, in bytes */
+ uint32_t drops;
+};
+
+/* A flow of packets (sub-queue).*/
+struct fq_codel_flow {
+ struct mq mq; /* list of packets */
+ struct flow_stats stats; /* statistics */
+ int deficit;
+ int active; /* 1: flow is active (in a list) */
+ struct codel_status cst;
+ STAILQ_ENTRY(fq_codel_flow) flowchain;
+};
+
+/* extra fq_codel scheduler configurations */
+struct fq_codel_schk {
+ struct dn_sch_fq_codel_parms cfg;
+};
+
+/* fq_codel scheduler instance */
+struct fq_codel_si {
+ struct dn_sch_inst _si; /* standard scheduler instance */
+ struct dn_queue main_q; /* main queue is after si directly */
+
+ struct fq_codel_flow *flows; /* array of flows (queues) */
+ uint32_t perturbation; /* random value */
+ struct fq_codel_list newflows; /* list of new queues */
+ struct fq_codel_list oldflows; /* list of old queues */
+};
+
+/* Helper function to update queue&main-queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+fq_update_stats(struct fq_codel_flow *q, struct fq_codel_si *si, int len,
+ int drop)
+{
+ int inc = 0;
+
+ if (len < 0)
+ inc = -1;
+ else if (len > 0)
+ inc = 1;
+
+ if (drop) {
+ si->main_q.ni.drops ++;
+ q->stats.drops ++;
+ si->_si.ni.drops ++;
+ io_pkt_drop ++;
+ }
+
+ if (!drop || (drop && len < 0)) {
+ /* Update stats for the main queue */
+ si->main_q.ni.length += inc;
+ si->main_q.ni.len_bytes += len;
+
+ /*update sub-queue stats */
+ q->stats.length += inc;
+ q->stats.len_bytes += len;
+
+ /*update scheduler instance stats */
+ si->_si.ni.length += inc;
+ si->_si.ni.len_bytes += len;
+ }
+
+ if (inc > 0) {
+ si->main_q.ni.tot_bytes += len;
+ si->main_q.ni.tot_pkts ++;
+
+ q->stats.tot_bytes +=len;
+ q->stats.tot_pkts++;
+
+ si->_si.ni.tot_bytes +=len;
+ si->_si.ni.tot_pkts ++;
+ }
+
+}
+
+/* extract the head of fq_codel sub-queue */
+__inline static struct mbuf *
+fq_codel_extract_head(struct fq_codel_flow *q, aqm_time_t *pkt_ts, struct fq_codel_si *si)
+{
+ struct mbuf *m = q->mq.head;
+
+ if (m == NULL)
+ return m;
+ q->mq.head = m->m_nextpkt;
+
+ fq_update_stats(q, si, -m->m_pkthdr.len, 0);
+
+ if (si->main_q.ni.length == 0) /* queue is now idle */
+ si->main_q.q_time = dn_cfg.curr_time;
+
+ /* extract packet timestamp*/
+ struct m_tag *mtag;
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL){
+ D("timestamp tag is not found!");
+ *pkt_ts = 0;
+ } else {
+ *pkt_ts = *(aqm_time_t *)(mtag + 1);
+ m_tag_delete(m,mtag);
+ }
+
+ return m;
+}
+
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
new file mode 100644
index 00000000..da663dc8
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
@@ -0,0 +1,187 @@
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD$
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
+ *
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols@pollere.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * o The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_HELPER_H
+#define _IP_DN_SCHED_FQ_CODEL_HELPER_H
+
+__inline static struct mbuf *
+fqc_dodequeue(struct fq_codel_flow *q, aqm_time_t now, uint16_t *ok_to_drop,
+ struct fq_codel_si *si)
+{
+ struct mbuf * m;
+ struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+ aqm_time_t pkt_ts, sojourn_time;
+
+ *ok_to_drop = 0;
+ m = fq_codel_extract_head(q, &pkt_ts, si);
+
+ if (m == NULL) {
+ /*queue is empty - we can't be above target*/
+ q->cst.first_above_time= 0;
+ return m;
+ }
+
+ /* To span a large range of bandwidths, CoDel runs two
+ * different AQMs in parallel. One is sojourn-time-based
+ * and takes effect when the time to send an MTU-sized
+ * packet is less than target. The 1st term of the "if"
+ * below does this. The other is backlog-based and takes
+ * effect when the time to send an MTU-sized packet is >=
+ * target. The goal here is to keep the output link
+ * utilization high by never allowing the queue to get
+ * smaller than the amount that arrives in a typical
+ * interarrival time (MTU-sized packets arriving spaced
+ * by the amount of time it takes to send such a packet on
+ * the bottleneck). The 2nd term of the "if" does this.
+ */
+ sojourn_time = now - pkt_ts;
+ if (sojourn_time < schk->cfg.ccfg.target || q->stats.len_bytes <= q->cst.maxpkt_size) {
+ /* went below - stay below for at least interval */
+ q->cst.first_above_time = 0;
+ } else {
+ if (q->cst.first_above_time == 0) {
+ /* just went above from below. if still above at
+ * first_above_time, will say it's ok to drop. */
+ q->cst.first_above_time = now + schk->cfg.ccfg.interval;
+ } else if (now >= q->cst.first_above_time) {
+ *ok_to_drop = 1;
+ }
+ }
+ return m;
+}
+
+/* Codel dequeue function */
+__inline static struct mbuf *
+fqc_codel_dequeue(struct fq_codel_flow *q, struct fq_codel_si *si)
+{
+ struct mbuf *m;
+ struct dn_aqm_codel_parms *cprms;
+ struct codel_status *cst;
+ aqm_time_t now;
+ uint16_t ok_to_drop;
+ struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+
+ cst = &q->cst;
+ cprms = &schk->cfg.ccfg;
+
+ now = AQM_UNOW;
+ m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+ if (cst->dropping) {
+ if (!ok_to_drop) {
+ /* sojourn time below target - leave dropping state */
+ cst->dropping = false;
+ }
+
+ /* Time for the next drop. Drop current packet and dequeue
+ * next. If the dequeue doesn't take us out of dropping
+ * state, schedule the next drop. A large backlog might
+ * result in drop rates so high that the next drop should
+ * happen now, hence the 'while' loop.
+ */
+ while (now >= cst->drop_next_time && cst->dropping) {
+
+ /* mark the packet */
+ if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+ cst->count++;
+ /* schedule the next mark. */
+ cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+ return m;
+ }
+
+ /* drop the packet */
+ fq_update_stats(q, si, 0, 1);
+ m_freem(m);
+ m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+ if (!ok_to_drop) {
+ /* leave dropping state */
+ cst->dropping = false;
+ } else {
+ cst->count++;
+ /* schedule the next drop. */
+ cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+ }
+ }
+ /* If we get here we're not in dropping state. The 'ok_to_drop'
+ * return from dodequeue means that the sojourn time has been
+ * above 'target' for 'interval' so enter dropping state.
+ */
+ } else if (ok_to_drop) {
+
+ /* if ECN option is disabled or the packet cannot be marked,
+ * drop the packet and extract another.
+ */
+ if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+ fq_update_stats(q, si, 0, 1);
+ m_freem(m);
+ m = fqc_dodequeue(q, now, &ok_to_drop,si);
+ }
+
+ cst->dropping = true;
+
+ /* If min went above target close to when it last went
+ * below, assume that the drop rate that controlled the
+ * queue on the last cycle is a good starting point to
+ * control it now. ('drop_next' will be at most 'interval'
+ * later than the time of the last drop so 'now - drop_next'
+ * is a good approximation of the time from the last drop
+ * until now.)
+ */
+ cst->count = (cst->count > 2 && ((aqm_stime_t)now -
+ (aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)? cst->count - 2 : 1;
+
+ /* we don't have to set initial guess for Newton's method isqrt as
+ * we initilaize isqrt in control_law function when count == 1 */
+ cst->drop_next_time = control_law(cst, cprms, now);
+ }
+
+ return m;
+}
+
+#endif
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_prio.c b/freebsd/sys/netpfil/ipfw/dn_sched_prio.c
deleted file mode 100644
index 0679db9d..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_prio.c
+++ /dev/null
@@ -1,231 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#define DN_SCHED_PRIO 5 //XXX
-
-#if !defined(_KERNEL) || !defined(__linux__)
-#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
-#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif
-
-#ifdef __MIPSEL__
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif
-
-/* Size of the array of queues pointers. */
-#define BITMAP_T unsigned long
-#define MAXPRIO (sizeof(BITMAP_T) * 8)
-
-/*
- * The scheduler instance contains an array of pointers to queues,
- * one for each priority, and a bitmap listing backlogged queues.
- */
-struct prio_si {
- BITMAP_T bitmap; /* array bitmap */
- struct dn_queue *q_array[MAXPRIO]; /* Array of queues pointers */
-};
-
-/*
- * If a queue with the same priority is already backlogged, use
- * that one instead of the queue passed as argument.
- */
-static int
-prio_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
- struct prio_si *si = (struct prio_si *)(_si + 1);
- int prio = q->fs->fs.par[0];
-
- if (test_bit(prio, &si->bitmap) == 0) {
- /* No queue with this priority, insert */
- __set_bit(prio, &si->bitmap);
- si->q_array[prio] = q;
- } else { /* use the existing queue */
- q = si->q_array[prio];
- }
- if (dn_enqueue(q, m, 0))
- return 1;
- return 0;
-}
-
-/*
- * Packets are dequeued only from the highest priority queue.
- * The function ffs() return the lowest bit in the bitmap that rapresent
- * the array index (-1) which contains the pointer to the highest priority
- * queue.
- * After the dequeue, if this queue become empty, it is index is removed
- * from the bitmap.
- * Scheduler is idle if the bitmap is empty
- *
- * NOTE: highest priority is 0, lowest is sched->max_prio_q
- */
-static struct mbuf *
-prio_dequeue(struct dn_sch_inst *_si)
-{
- struct prio_si *si = (struct prio_si *)(_si + 1);
- struct mbuf *m;
- struct dn_queue *q;
- int prio;
-
- if (si->bitmap == 0) /* scheduler idle */
- return NULL;
-
- prio = ffs(si->bitmap) - 1;
-
- /* Take the highest priority queue in the scheduler */
- q = si->q_array[prio];
- // assert(q)
-
- m = dn_dequeue(q);
- if (q->mq.head == NULL) {
- /* Queue is now empty, remove from scheduler
- * and mark it
- */
- si->q_array[prio] = NULL;
- __clear_bit(prio, &si->bitmap);
- }
- return m;
-}
-
-static int
-prio_new_sched(struct dn_sch_inst *_si)
-{
- struct prio_si *si = (struct prio_si *)(_si + 1);
-
- bzero(si->q_array, sizeof(si->q_array));
- si->bitmap = 0;
-
- return 0;
-}
-
-static int
-prio_new_fsk(struct dn_fsk *fs)
-{
- /* Check if the prioritiy is between 0 and MAXPRIO-1 */
- ipdn_bound_var(&fs->fs.par[0], 0, 0, MAXPRIO - 1, "PRIO priority");
- return 0;
-}
-
-static int
-prio_new_queue(struct dn_queue *q)
-{
- struct prio_si *si = (struct prio_si *)(q->_si + 1);
- int prio = q->fs->fs.par[0];
- struct dn_queue *oldq;
-
- q->ni.oid.subtype = DN_SCHED_PRIO;
-
- if (q->mq.head == NULL)
- return 0;
-
- /* Queue already full, must insert in the scheduler or append
- * mbufs to existing queue. This partly duplicates prio_enqueue
- */
- if (test_bit(prio, &si->bitmap) == 0) {
- /* No queue with this priority, insert */
- __set_bit(prio, &si->bitmap);
- si->q_array[prio] = q;
- } else if ( (oldq = si->q_array[prio]) != q) {
- /* must append to the existing queue.
- * can simply append q->mq.head to q2->...
- * and add the counters to those of q2
- */
- oldq->mq.tail->m_nextpkt = q->mq.head;
- oldq->mq.tail = q->mq.tail;
- oldq->ni.length += q->ni.length;
- q->ni.length = 0;
- oldq->ni.len_bytes += q->ni.len_bytes;
- q->ni.len_bytes = 0;
- q->mq.tail = q->mq.head = NULL;
- }
- return 0;
-}
-
-static int
-prio_free_queue(struct dn_queue *q)
-{
- int prio = q->fs->fs.par[0];
- struct prio_si *si = (struct prio_si *)(q->_si + 1);
-
- if (si->q_array[prio] == q) {
- si->q_array[prio] = NULL;
- __clear_bit(prio, &si->bitmap);
- }
- return 0;
-}
-
-
-static struct dn_alg prio_desc = {
- _SI( .type = ) DN_SCHED_PRIO,
- _SI( .name = ) "PRIO",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- /* we need extra space in the si and the queue */
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct prio_si),
- _SI( .q_datalen = ) 0,
-
- _SI( .enqueue = ) prio_enqueue,
- _SI( .dequeue = ) prio_dequeue,
-
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) prio_new_sched,
- _SI( .free_sched = ) NULL,
-
- _SI( .new_fsk = ) prio_new_fsk,
- _SI( .free_fsk = ) NULL,
-
- _SI( .new_queue = ) prio_new_queue,
- _SI( .free_queue = ) prio_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_prio, &prio_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c b/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c
deleted file mode 100644
index 461c40a5..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_qfq.c
+++ /dev/null
@@ -1,866 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#ifdef QFQ_DEBUG
-struct qfq_sched;
-static void dump_sched(struct qfq_sched *q, const char *msg);
-#define NO(x) x
-#else
-#define NO(x)
-#endif
-#define DN_SCHED_QFQ 4 // XXX Where?
-typedef unsigned long bitmap;
-
-/*
- * bitmaps ops are critical. Some linux versions have __fls
- * and the bitmap ops. Some machines have ffs
- */
-#if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
-int fls(unsigned int n)
-{
- int i = 0;
- for (i = 0; n > 0; n >>= 1, i++)
- ;
- return i;
-}
-#endif
-
-#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
-static inline unsigned long __fls(unsigned long word)
-{
- return fls(word) - 1;
-}
-#endif
-
-#if !defined(_KERNEL) || !defined(__linux__)
-#ifdef QFQ_DEBUG
-int test_bit(int ix, bitmap *p)
-{
- if (ix < 0 || ix > 31)
- D("bad index %d", ix);
- return *p & (1<<ix);
-}
-void __set_bit(int ix, bitmap *p)
-{
- if (ix < 0 || ix > 31)
- D("bad index %d", ix);
- *p |= (1<<ix);
-}
-void __clear_bit(int ix, bitmap *p)
-{
- if (ix < 0 || ix > 31)
- D("bad index %d", ix);
- *p &= ~(1<<ix);
-}
-#else /* !QFQ_DEBUG */
-/* XXX do we have fast version, or leave it to the compiler ? */
-#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
-#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif /* !QFQ_DEBUG */
-#endif /* !__linux__ */
-
-#ifdef __MIPSEL__
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif
-
-/*-------------------------------------------*/
-/*
-
-Virtual time computations.
-
-S, F and V are all computed in fixed point arithmetic with
-FRAC_BITS decimal bits.
-
- QFQ_MAX_INDEX is the maximum index allowed for a group. We need
- one bit per index.
- QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
- The layout of the bits is as below:
-
- [ MTU_SHIFT ][ FRAC_BITS ]
- [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
- ^.__grp->index = 0
- *.__grp->slot_shift
-
- where MIN_SLOT_SHIFT is derived by difference from the others.
-
-The max group index corresponds to Lmax/w_min, where
-Lmax=1<<MTU_SHIFT, w_min = 1 .
-From this, and knowing how many groups (MAX_INDEX) we want,
-we can derive the shift corresponding to each group.
-
-Because we often need to compute
- F = S + len/w_i and V = V + len/wsum
-instead of storing w_i store the value
- inv_w = (1<<FRAC_BITS)/w_i
-so we can do F = S + len * inv_w * wsum.
-We use W_TOT in the formulas so we can easily move between
-static and adaptive weight sum.
-
-The per-scheduler-instance data contain all the data structures
-for the scheduler: bitmaps and bucket lists.
-
- */
-/*
- * Maximum number of consecutive slots occupied by backlogged classes
- * inside a group. This is approx lmax/lmin + 5.
- * XXX check because it poses constraints on MAX_INDEX
- */
-#define QFQ_MAX_SLOTS 32
-/*
- * Shifts used for class<->group mapping. Class weights are
- * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
- * group with the smallest index that can support the L_i / r_i
- * configured for the class.
- *
- * grp->index is the index of the group; and grp->slot_shift
- * is the shift for the corresponding (scaled) sigma_i.
- *
- * When computing the group index, we do (len<<FP_SHIFT)/weight,
- * then compute an FLS (which is like a log2()), and if the result
- * is below the MAX_INDEX region we use 0 (which is the same as
- * using a larger len).
- */
-#define QFQ_MAX_INDEX 19
-#define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */
-
-#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
-#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
-//#define IWSUM (q->i_wsum)
-#define IWSUM ((1<<FRAC_BITS)/QFQ_MAX_WSUM)
-
-#define FRAC_BITS 30 /* fixed point arithmetic */
-#define ONE_FP (1UL << FRAC_BITS)
-
-#define QFQ_MTU_SHIFT 11 /* log2(max_len) */
-#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
-
-/*
- * Possible group states, also indexes for the bitmaps array in
- * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
- */
-enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
-
-struct qfq_group;
-/*
- * additional queue info. Some of this info should come from
- * the flowset, we copy them here for faster processing.
- * This is an overlay of the struct dn_queue
- */
-struct qfq_class {
- struct dn_queue _q;
- uint64_t S, F; /* flow timestamps (exact) */
- struct qfq_class *next; /* Link for the slot list. */
-
- /* group we belong to. In principle we would need the index,
- * which is log_2(lmax/weight), but we never reference it
- * directly, only the group.
- */
- struct qfq_group *grp;
-
- /* these are copied from the flowset. */
- uint32_t inv_w; /* ONE_FP/weight */
- uint32_t lmax; /* Max packet size for this flow. */
-};
-
-/* Group descriptor, see the paper for details.
- * Basically this contains the bucket lists
- */
-struct qfq_group {
- uint64_t S, F; /* group timestamps (approx). */
- unsigned int slot_shift; /* Slot shift. */
- unsigned int index; /* Group index. */
- unsigned int front; /* Index of the front slot. */
- bitmap full_slots; /* non-empty slots */
-
- /* Array of lists of active classes. */
- struct qfq_class *slots[QFQ_MAX_SLOTS];
-};
-
-/* scheduler instance descriptor. */
-struct qfq_sched {
- uint64_t V; /* Precise virtual time. */
- uint32_t wsum; /* weight sum */
- NO(uint32_t i_wsum; /* ONE_FP/w_sum */
- uint32_t _queued; /* debugging */
- uint32_t loops; /* debugging */)
- bitmap bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
- struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
-};
-
-/*---- support functions ----------------------------*/
-
-/* Generic comparison function, handling wraparound. */
-static inline int qfq_gt(uint64_t a, uint64_t b)
-{
- return (int64_t)(a - b) > 0;
-}
-
-/* Round a precise timestamp to its slotted value. */
-static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
-{
- return ts & ~((1ULL << shift) - 1);
-}
-
-/* return the pointer to the group with lowest index in the bitmap */
-static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
- unsigned long bitmap)
-{
- int index = ffs(bitmap) - 1; // zero-based
- return &q->groups[index];
-}
-
-/*
- * Calculate a flow index, given its weight and maximum packet length.
- * index = log_2(maxlen/weight) but we need to apply the scaling.
- * This is used only once at flow creation.
- */
-static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
-{
- uint64_t slot_size = (uint64_t)maxlen *inv_w;
- unsigned long size_map;
- int index = 0;
-
- size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
- if (!size_map)
- goto out;
-
- index = __fls(size_map) + 1; // basically a log_2()
- index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
-
- if (index < 0)
- index = 0;
-
-out:
- ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
- return index;
-}
-/*---- end support functions ----*/
-
-/*-------- API calls --------------------------------*/
-/*
- * Validate and copy parameters from flowset.
- */
-static int
-qfq_new_queue(struct dn_queue *_q)
-{
- struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
- struct qfq_class *cl = (struct qfq_class *)_q;
- int i;
- uint32_t w; /* approximated weight */
-
- /* import parameters from the flowset. They should be correct
- * already.
- */
- w = _q->fs->fs.par[0];
- cl->lmax = _q->fs->fs.par[1];
- if (!w || w > QFQ_MAX_WEIGHT) {
- w = 1;
- D("rounding weight to 1");
- }
- cl->inv_w = ONE_FP/w;
- w = ONE_FP/cl->inv_w;
- if (q->wsum + w > QFQ_MAX_WSUM)
- return EINVAL;
-
- i = qfq_calc_index(cl->inv_w, cl->lmax);
- cl->grp = &q->groups[i];
- q->wsum += w;
- // XXX cl->S = q->V; ?
- // XXX compute q->i_wsum
- return 0;
-}
-
-/* remove an empty queue */
-static int
-qfq_free_queue(struct dn_queue *_q)
-{
- struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
- struct qfq_class *cl = (struct qfq_class *)_q;
- if (cl->inv_w) {
- q->wsum -= ONE_FP/cl->inv_w;
- cl->inv_w = 0; /* reset weight to avoid run twice */
- }
- return 0;
-}
-
-/* Calculate a mask to mimic what would be ffs_from(). */
-static inline unsigned long
-mask_from(unsigned long bitmap, int from)
-{
- return bitmap & ~((1UL << from) - 1);
-}
-
-/*
- * The state computation relies on ER=0, IR=1, EB=2, IB=3
- * First compute eligibility comparing grp->S, q->V,
- * then check if someone is blocking us and possibly add EB
- */
-static inline unsigned int
-qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
-{
- /* if S > V we are not eligible */
- unsigned int state = qfq_gt(grp->S, q->V);
- unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
- struct qfq_group *next;
-
- if (mask) {
- next = qfq_ffs(q, mask);
- if (qfq_gt(grp->F, next->F))
- state |= EB;
- }
-
- return state;
-}
-
-/*
- * In principle
- * q->bitmaps[dst] |= q->bitmaps[src] & mask;
- * q->bitmaps[src] &= ~mask;
- * but we should make sure that src != dst
- */
-static inline void
-qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
-{
- q->bitmaps[dst] |= q->bitmaps[src] & mask;
- q->bitmaps[src] &= ~mask;
-}
-
-static inline void
-qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
-{
- unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
- struct qfq_group *next;
-
- if (mask) {
- next = qfq_ffs(q, mask);
- if (!qfq_gt(next->F, old_finish))
- return;
- }
-
- mask = (1UL << index) - 1;
- qfq_move_groups(q, mask, EB, ER);
- qfq_move_groups(q, mask, IB, IR);
-}
-
-/*
- * perhaps
- *
- old_V ^= q->V;
- old_V >>= QFQ_MIN_SLOT_SHIFT;
- if (old_V) {
- ...
- }
- *
- */
-static inline void
-qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
-{
- unsigned long mask, vslot, old_vslot;
-
- vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
- old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
-
- if (vslot != old_vslot) {
- mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
- qfq_move_groups(q, mask, IR, ER);
- qfq_move_groups(q, mask, IB, EB);
- }
-}
-
-/*
- * XXX we should make sure that slot becomes less than 32.
- * This is guaranteed by the input values.
- * roundedS is always cl->S rounded on grp->slot_shift bits.
- */
-static inline void
-qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
-{
- uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
- unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
-
- cl->next = grp->slots[i];
- grp->slots[i] = cl;
- __set_bit(slot, &grp->full_slots);
-}
-
-/*
- * remove the entry from the slot
- */
-static inline void
-qfq_front_slot_remove(struct qfq_group *grp)
-{
- struct qfq_class **h = &grp->slots[grp->front];
-
- *h = (*h)->next;
- if (!*h)
- __clear_bit(0, &grp->full_slots);
-}
-
-/*
- * Returns the first full queue in a group. As a side effect,
- * adjust the bucket list so the first non-empty bucket is at
- * position 0 in full_slots.
- */
-static inline struct qfq_class *
-qfq_slot_scan(struct qfq_group *grp)
-{
- int i;
-
- ND("grp %d full %x", grp->index, grp->full_slots);
- if (!grp->full_slots)
- return NULL;
-
- i = ffs(grp->full_slots) - 1; // zero-based
- if (i > 0) {
- grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
- grp->full_slots >>= i;
- }
-
- return grp->slots[grp->front];
-}
-
-/*
- * adjust the bucket list. When the start time of a group decreases,
- * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
- * move the objects. The mask of occupied slots must be shifted
- * because we use ffs() to find the first non-empty slot.
- * This covers decreases in the group's start time, but what about
- * increases of the start time ?
- * Here too we should make sure that i is less than 32
- */
-static inline void
-qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
-{
- unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
-
- grp->full_slots <<= i;
- grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
-}
-
-
-static inline void
-qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
-{
- bitmap ineligible;
-
- ineligible = q->bitmaps[IR] | q->bitmaps[IB];
- if (ineligible) {
- if (!q->bitmaps[ER]) {
- struct qfq_group *grp;
- grp = qfq_ffs(q, ineligible);
- if (qfq_gt(grp->S, q->V))
- q->V = grp->S;
- }
- qfq_make_eligible(q, old_V);
- }
-}
-
-/*
- * Updates the class, returns true if also the group needs to be updated.
- */
-static inline int
-qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
- struct qfq_class *cl)
-{
-
- cl->S = cl->F;
- if (cl->_q.mq.head == NULL) {
- qfq_front_slot_remove(grp);
- } else {
- unsigned int len;
- uint64_t roundedS;
-
- len = cl->_q.mq.head->m_pkthdr.len;
- cl->F = cl->S + (uint64_t)len * cl->inv_w;
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
- if (roundedS == grp->S)
- return 0;
-
- qfq_front_slot_remove(grp);
- qfq_slot_insert(grp, cl, roundedS);
- }
- return 1;
-}
-
-static struct mbuf *
-qfq_dequeue(struct dn_sch_inst *si)
-{
- struct qfq_sched *q = (struct qfq_sched *)(si + 1);
- struct qfq_group *grp;
- struct qfq_class *cl;
- struct mbuf *m;
- uint64_t old_V;
-
- NO(q->loops++;)
- if (!q->bitmaps[ER]) {
- NO(if (q->queued)
- dump_sched(q, "start dequeue");)
- return NULL;
- }
-
- grp = qfq_ffs(q, q->bitmaps[ER]);
-
- cl = grp->slots[grp->front];
- /* extract from the first bucket in the bucket list */
- m = dn_dequeue(&cl->_q);
-
- if (!m) {
- D("BUG/* non-workconserving leaf */");
- return NULL;
- }
- NO(q->queued--;)
- old_V = q->V;
- q->V += (uint64_t)m->m_pkthdr.len * IWSUM;
- ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
-
- if (qfq_update_class(q, grp, cl)) {
- uint64_t old_F = grp->F;
- cl = qfq_slot_scan(grp);
- if (!cl) { /* group gone, remove from ER */
- __clear_bit(grp->index, &q->bitmaps[ER]);
- // grp->S = grp->F + 1; // XXX debugging only
- } else {
- uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
- unsigned int s;
-
- if (grp->S == roundedS)
- goto skip_unblock;
- grp->S = roundedS;
- grp->F = roundedS + (2ULL << grp->slot_shift);
- /* remove from ER and put in the new set */
- __clear_bit(grp->index, &q->bitmaps[ER]);
- s = qfq_calc_state(q, grp);
- __set_bit(grp->index, &q->bitmaps[s]);
- }
- /* we need to unblock even if the group has gone away */
- qfq_unblock_groups(q, grp->index, old_F);
- }
-
-skip_unblock:
- qfq_update_eligible(q, old_V);
- NO(if (!q->bitmaps[ER] && q->queued)
- dump_sched(q, "end dequeue");)
-
- return m;
-}
-
-/*
- * Assign a reasonable start time for a new flow k in group i.
- * Admissible values for \hat(F) are multiples of \sigma_i
- * no greater than V+\sigma_i . Larger values mean that
- * we had a wraparound so we consider the timestamp to be stale.
- *
- * If F is not stale and F >= V then we set S = F.
- * Otherwise we should assign S = V, but this may violate
- * the ordering in ER. So, if we have groups in ER, set S to
- * the F_j of the first group j which would be blocking us.
- * We are guaranteed not to move S backward because
- * otherwise our group i would still be blocked.
- */
-static inline void
-qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
-{
- unsigned long mask;
- uint32_t limit, roundedF;
- int slot_shift = cl->grp->slot_shift;
-
- roundedF = qfq_round_down(cl->F, slot_shift);
- limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
-
- if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
- /* timestamp was stale */
- mask = mask_from(q->bitmaps[ER], cl->grp->index);
- if (mask) {
- struct qfq_group *next = qfq_ffs(q, mask);
- if (qfq_gt(roundedF, next->F)) {
- cl->S = next->F;
- return;
- }
- }
- cl->S = q->V;
- } else { /* timestamp is not stale */
- cl->S = cl->F;
- }
-}
-
-static int
-qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
-{
- struct qfq_sched *q = (struct qfq_sched *)(si + 1);
- struct qfq_group *grp;
- struct qfq_class *cl = (struct qfq_class *)_q;
- uint64_t roundedS;
- int s;
-
- NO(q->loops++;)
- DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
- _q, cl->inv_w, cl->grp->index);
- /* XXX verify that the packet obeys the parameters */
- if (m != _q->mq.head) {
- if (dn_enqueue(_q, m, 0)) /* packet was dropped */
- return 1;
- NO(q->queued++;)
- if (m != _q->mq.head)
- return 0;
- }
- /* If reach this point, queue q was idle */
- grp = cl->grp;
- qfq_update_start(q, cl); /* adjust start time */
- /* compute new finish time and rounded start. */
- cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
-
- /*
- * insert cl in the correct bucket.
- * If cl->S >= grp->S we don't need to adjust the
- * bucket list and simply go to the insertion phase.
- * Otherwise grp->S is decreasing, we must make room
- * in the bucket list, and also recompute the group state.
- * Finally, if there were no flows in this group and nobody
- * was in ER make sure to adjust V.
- */
- if (grp->full_slots) {
- if (!qfq_gt(grp->S, cl->S))
- goto skip_update;
- /* create a slot for this cl->S */
- qfq_slot_rotate(q, grp, roundedS);
- /* group was surely ineligible, remove */
- __clear_bit(grp->index, &q->bitmaps[IR]);
- __clear_bit(grp->index, &q->bitmaps[IB]);
- } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
- q->V = roundedS;
-
- grp->S = roundedS;
- grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
- s = qfq_calc_state(q, grp);
- __set_bit(grp->index, &q->bitmaps[s]);
- ND("new state %d 0x%x", s, q->bitmaps[s]);
- ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
-skip_update:
- qfq_slot_insert(grp, cl, roundedS);
-
- return 0;
-}
-
-
-#if 0
-static inline void
-qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
- struct qfq_class *cl, struct qfq_class **pprev)
-{
- unsigned int i, offset;
- uint64_t roundedS;
-
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
- offset = (roundedS - grp->S) >> grp->slot_shift;
- i = (grp->front + offset) % QFQ_MAX_SLOTS;
-
-#ifdef notyet
- if (!pprev) {
- pprev = &grp->slots[i];
- while (*pprev && *pprev != cl)
- pprev = &(*pprev)->next;
- }
-#endif
-
- *pprev = cl->next;
- if (!grp->slots[i])
- __clear_bit(offset, &grp->full_slots);
-}
-
-/*
- * called to forcibly destroy a queue.
- * If the queue is not in the front bucket, or if it has
- * other queues in the front bucket, we can simply remove
- * the queue with no other side effects.
- * Otherwise we must propagate the event up.
- * XXX description to be completed.
- */
-static void
-qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
- struct qfq_class **pprev)
-{
- struct qfq_group *grp = &q->groups[cl->index];
- unsigned long mask;
- uint64_t roundedS;
- int s;
-
- cl->F = cl->S; // not needed if the class goes away.
- qfq_slot_remove(q, grp, cl, pprev);
-
- if (!grp->full_slots) {
- /* nothing left in the group, remove from all sets.
- * Do ER last because if we were blocking other groups
- * we must unblock them.
- */
- __clear_bit(grp->index, &q->bitmaps[IR]);
- __clear_bit(grp->index, &q->bitmaps[EB]);
- __clear_bit(grp->index, &q->bitmaps[IB]);
-
- if (test_bit(grp->index, &q->bitmaps[ER]) &&
- !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
- mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
- if (mask)
- mask = ~((1UL << __fls(mask)) - 1);
- else
- mask = ~0UL;
- qfq_move_groups(q, mask, EB, ER);
- qfq_move_groups(q, mask, IB, IR);
- }
- __clear_bit(grp->index, &q->bitmaps[ER]);
- } else if (!grp->slots[grp->front]) {
- cl = qfq_slot_scan(grp);
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
- if (grp->S != roundedS) {
- __clear_bit(grp->index, &q->bitmaps[ER]);
- __clear_bit(grp->index, &q->bitmaps[IR]);
- __clear_bit(grp->index, &q->bitmaps[EB]);
- __clear_bit(grp->index, &q->bitmaps[IB]);
- grp->S = roundedS;
- grp->F = roundedS + (2ULL << grp->slot_shift);
- s = qfq_calc_state(q, grp);
- __set_bit(grp->index, &q->bitmaps[s]);
- }
- }
- qfq_update_eligible(q, q->V);
-}
-#endif
-
-static int
-qfq_new_fsk(struct dn_fsk *f)
-{
- ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
- ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
- ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
- return 0;
-}
-
-/*
- * initialize a new scheduler instance
- */
-static int
-qfq_new_sched(struct dn_sch_inst *si)
-{
- struct qfq_sched *q = (struct qfq_sched *)(si + 1);
- struct qfq_group *grp;
- int i;
-
- for (i = 0; i <= QFQ_MAX_INDEX; i++) {
- grp = &q->groups[i];
- grp->index = i;
- grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
- (QFQ_MAX_INDEX - i);
- }
- return 0;
-}
-
-/*
- * QFQ scheduler descriptor
- */
-static struct dn_alg qfq_desc = {
- _SI( .type = ) DN_SCHED_QFQ,
- _SI( .name = ) "QFQ",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct qfq_sched),
- _SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
-
- _SI( .enqueue = ) qfq_enqueue,
- _SI( .dequeue = ) qfq_dequeue,
-
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) qfq_new_sched,
- _SI( .free_sched = ) NULL,
- _SI( .new_fsk = ) qfq_new_fsk,
- _SI( .free_fsk = ) NULL,
- _SI( .new_queue = ) qfq_new_queue,
- _SI( .free_queue = ) qfq_free_queue,
-};
-
-DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
-
-#ifdef QFQ_DEBUG
-static void
-dump_groups(struct qfq_sched *q, uint32_t mask)
-{
- int i, j;
-
- for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
- struct qfq_group *g = &q->groups[i];
-
- if (0 == (mask & (1<<i)))
- continue;
- for (j = 0; j < QFQ_MAX_SLOTS; j++) {
- if (g->slots[j])
- D(" bucket %d %p", j, g->slots[j]);
- }
- D("full_slots 0x%x", g->full_slots);
- D(" %2d S 0x%20llx F 0x%llx %c", i,
- g->S, g->F,
- mask & (1<<i) ? '1' : '0');
- }
-}
-
-static void
-dump_sched(struct qfq_sched *q, const char *msg)
-{
- D("--- in %s: ---", msg);
- ND("loops %d queued %d V 0x%llx", q->loops, q->queued, q->V);
- D(" ER 0x%08x", q->bitmaps[ER]);
- D(" EB 0x%08x", q->bitmaps[EB]);
- D(" IR 0x%08x", q->bitmaps[IR]);
- D(" IB 0x%08x", q->bitmaps[IB]);
- dump_groups(q, 0xffffffff);
-};
-#endif /* QFQ_DEBUG */
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_rr.c b/freebsd/sys/netpfil/ipfw/dn_sched_rr.c
deleted file mode 100644
index c1862ab0..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_rr.c
+++ /dev/null
@@ -1,309 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#define DN_SCHED_RR 3 // XXX Where?
-
-struct rr_queue {
- struct dn_queue q; /* Standard queue */
- int status; /* 1: queue is in the list */
- int credit; /* Number of bytes to transmit */
- int quantum; /* quantum * C */
- struct rr_queue *qnext; /* */
-};
-
-/* struct rr_schk contains global config parameters
- * and is right after dn_schk
- */
-struct rr_schk {
- int min_q; /* Min quantum */
- int max_q; /* Max quantum */
- int q_bytes; /* Bytes per quantum */
-};
-
-/* per-instance round robin list, right after dn_sch_inst */
-struct rr_si {
- struct rr_queue *head, *tail; /* Pointer to current queue */
-};
-
-/* Append a queue to the rr list */
-static inline void
-rr_append(struct rr_queue *q, struct rr_si *si)
-{
- q->status = 1; /* mark as in-rr_list */
- q->credit = q->quantum; /* initialize credit */
-
- /* append to the tail */
- if (si->head == NULL)
- si->head = q;
- else
- si->tail->qnext = q;
- si->tail = q; /* advance the tail pointer */
- q->qnext = si->head; /* make it circular */
-}
-
-/* Remove the head queue from circular list. */
-static inline void
-rr_remove_head(struct rr_si *si)
-{
- if (si->head == NULL)
- return; /* empty queue */
- si->head->status = 0;
-
- if (si->head == si->tail) {
- si->head = si->tail = NULL;
- return;
- }
-
- si->head = si->head->qnext;
- si->tail->qnext = si->head;
-}
-
-/* Remove a queue from circular list.
- * XXX see if ti can be merge with remove_queue()
- */
-static inline void
-remove_queue_q(struct rr_queue *q, struct rr_si *si)
-{
- struct rr_queue *prev;
-
- if (q->status != 1)
- return;
- if (q == si->head) {
- rr_remove_head(si);
- return;
- }
-
- for (prev = si->head; prev; prev = prev->qnext) {
- if (prev->qnext != q)
- continue;
- prev->qnext = q->qnext;
- if (q == si->tail)
- si->tail = prev;
- q->status = 0;
- break;
- }
-}
-
-
-static inline void
-next_pointer(struct rr_si *si)
-{
- if (si->head == NULL)
- return; /* empty queue */
-
- si->head = si->head->qnext;
- si->tail = si->tail->qnext;
-}
-
-static int
-rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
- struct rr_si *si;
- struct rr_queue *rrq;
-
- if (m != q->mq.head) {
- if (dn_enqueue(q, m, 0)) /* packet was dropped */
- return 1;
- if (m != q->mq.head)
- return 0;
- }
-
- /* If reach this point, queue q was idle */
- si = (struct rr_si *)(_si + 1);
- rrq = (struct rr_queue *)q;
-
- if (rrq->status == 1) /* Queue is already in the queue list */
- return 0;
-
- /* Insert the queue in the queue list */
- rr_append(rrq, si);
-
- return 0;
-}
-
-static struct mbuf *
-rr_dequeue(struct dn_sch_inst *_si)
-{
- /* Access scheduler instance private data */
- struct rr_si *si = (struct rr_si *)(_si + 1);
- struct rr_queue *rrq;
- uint64_t len;
-
- while ( (rrq = si->head) ) {
- struct mbuf *m = rrq->q.mq.head;
- if ( m == NULL) {
- /* empty queue, remove from list */
- rr_remove_head(si);
- continue;
- }
- len = m->m_pkthdr.len;
-
- if (len > rrq->credit) {
- /* Packet too big */
- rrq->credit += rrq->quantum;
- /* Try next queue */
- next_pointer(si);
- } else {
- rrq->credit -= len;
- return dn_dequeue(&rrq->q);
- }
- }
-
- /* no packet to dequeue*/
- return NULL;
-}
-
-static int
-rr_config(struct dn_schk *_schk)
-{
- struct rr_schk *schk = (struct rr_schk *)(_schk + 1);
- ND("called");
-
- /* use reasonable quantums (64..2k bytes, default 1500) */
- schk->min_q = 64;
- schk->max_q = 2048;
- schk->q_bytes = 1500; /* quantum */
-
- return 0;
-}
-
-static int
-rr_new_sched(struct dn_sch_inst *_si)
-{
- struct rr_si *si = (struct rr_si *)(_si + 1);
-
- ND("called");
- si->head = si->tail = NULL;
-
- return 0;
-}
-
-static int
-rr_free_sched(struct dn_sch_inst *_si)
-{
- ND("called");
- /* Nothing to do? */
- return 0;
-}
-
-static int
-rr_new_fsk(struct dn_fsk *fs)
-{
- struct rr_schk *schk = (struct rr_schk *)(fs->sched + 1);
- /* par[0] is the weight, par[1] is the quantum step */
- ipdn_bound_var(&fs->fs.par[0], 1,
- 1, 65536, "RR weight");
- ipdn_bound_var(&fs->fs.par[1], schk->q_bytes,
- schk->min_q, schk->max_q, "RR quantum");
- return 0;
-}
-
-static int
-rr_new_queue(struct dn_queue *_q)
-{
- struct rr_queue *q = (struct rr_queue *)_q;
-
- _q->ni.oid.subtype = DN_SCHED_RR;
-
- q->quantum = _q->fs->fs.par[0] * _q->fs->fs.par[1];
- ND("called, q->quantum %d", q->quantum);
- q->credit = q->quantum;
- q->status = 0;
-
- if (_q->mq.head != NULL) {
- /* Queue NOT empty, insert in the queue list */
- rr_append(q, (struct rr_si *)(_q->_si + 1));
- }
- return 0;
-}
-
-static int
-rr_free_queue(struct dn_queue *_q)
-{
- struct rr_queue *q = (struct rr_queue *)_q;
-
- ND("called");
- if (q->status == 1) {
- struct rr_si *si = (struct rr_si *)(_q->_si + 1);
- remove_queue_q(q, si);
- }
- return 0;
-}
-
-/*
- * RR scheduler descriptor
- * contains the type of the scheduler, the name, the size of the
- * structures and function pointers.
- */
-static struct dn_alg rr_desc = {
- _SI( .type = ) DN_SCHED_RR,
- _SI( .name = ) "RR",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct rr_si),
- _SI( .q_datalen = ) sizeof(struct rr_queue) - sizeof(struct dn_queue),
-
- _SI( .enqueue = ) rr_enqueue,
- _SI( .dequeue = ) rr_dequeue,
-
- _SI( .config = ) rr_config,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) rr_new_sched,
- _SI( .free_sched = ) rr_free_sched,
- _SI( .new_fsk = ) rr_new_fsk,
- _SI( .free_fsk = ) NULL,
- _SI( .new_queue = ) rr_new_queue,
- _SI( .free_queue = ) rr_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_rr, &rr_desc);
diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c b/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c
deleted file mode 100644
index 77c4bbad..00000000
--- a/freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c
+++ /dev/null
@@ -1,375 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#ifndef MAX64
-#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
-#endif
-
-/*
- * timestamps are computed on 64 bit using fixed point arithmetic.
- * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
- * and sum of weights, respectively. FRAC_BITS is the number of
- * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
- * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
- * using an unsigned 32-bit division, and to avoid wraparounds we need
- * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
- * As an example
- * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
- */
-#ifndef FRAC_BITS
-#define FRAC_BITS 28 /* shift for fixed point arithmetic */
-#define ONE_FP (1UL << FRAC_BITS)
-#endif
-
-/*
- * Private information for the scheduler instance:
- * sch_heap (key is Finish time) returns the next queue to serve
- * ne_heap (key is Start time) stores not-eligible queues
- * idle_heap (key=start/finish time) stores idle flows. It must
- * support extract-from-middle.
- * A flow is only in 1 of the three heaps.
- * XXX todo: use a more efficient data structure, e.g. a tree sorted
- * by F with min_subtree(S) in each node
- */
-struct wf2qp_si {
- struct dn_heap sch_heap; /* top extract - key Finish time */
- struct dn_heap ne_heap; /* top extract - key Start time */
- struct dn_heap idle_heap; /* random extract - key Start=Finish time */
- uint64_t V; /* virtual time */
- uint32_t inv_wsum; /* inverse of sum of weights */
- uint32_t wsum; /* sum of weights */
-};
-
-struct wf2qp_queue {
- struct dn_queue _q;
- uint64_t S, F; /* start time, finish time */
- uint32_t inv_w; /* ONE_FP / weight */
- int32_t heap_pos; /* position (index) of struct in heap */
-};
-
-/*
- * This file implements a WF2Q+ scheduler as it has been in dummynet
- * since 2000.
- * The scheduler supports per-flow queues and has O(log N) complexity.
- *
- * WF2Q+ needs to drain entries from the idle heap so that we
- * can keep the sum of weights up to date. We can do it whenever
- * we get a chance, or periodically, or following some other
- * strategy. The function idle_check() drains at most N elements
- * from the idle heap.
- */
-static void
-idle_check(struct wf2qp_si *si, int n, int force)
-{
- struct dn_heap *h = &si->idle_heap;
- while (n-- > 0 && h->elements > 0 &&
- (force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
- struct dn_queue *q = HEAP_TOP(h)->object;
- struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
-
- heap_extract(h, NULL);
- /* XXX to let the flowset delete the queue we should
- * mark it as 'unused' by the scheduler.
- */
- alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
- si->wsum -= q->fs->fs.par[0]; /* adjust sum of weights */
- if (si->wsum > 0)
- si->inv_wsum = ONE_FP/si->wsum;
- }
-}
-
-static int
-wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
- struct dn_fsk *fs = q->fs;
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
- struct wf2qp_queue *alg_fq;
- uint64_t len = m->m_pkthdr.len;
-
- if (m != q->mq.head) {
- if (dn_enqueue(q, m, 0)) /* packet was dropped */
- return 1;
- if (m != q->mq.head) /* queue was already busy */
- return 0;
- }
-
- /* If reach this point, queue q was idle */
- alg_fq = (struct wf2qp_queue *)q;
-
- if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
- /* F<S means timestamps are invalid ->brand new queue. */
- alg_fq->S = si->V; /* init start time */
- si->wsum += fs->fs.par[0]; /* add weight of new queue. */
- si->inv_wsum = ONE_FP/si->wsum;
- } else { /* if it was idle then it was in the idle heap */
- heap_extract(&si->idle_heap, q);
- alg_fq->S = MAX64(alg_fq->F, si->V); /* compute new S */
- }
- alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
-
- /* if nothing is backlogged, make sure this flow is eligible */
- if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
- si->V = MAX64(alg_fq->S, si->V);
-
- /*
- * Look at eligibility. A flow is not eligibile if S>V (when
- * this happens, it means that there is some other flow already
- * scheduled for the same pipe, so the sch_heap cannot be
- * empty). If the flow is not eligible we just store it in the
- * ne_heap. Otherwise, we store in the sch_heap.
- * Note that for all flows in sch_heap (SCH), S_i <= V,
- * and for all flows in ne_heap (NEH), S_i > V.
- * So when we need to compute max(V, min(S_i)) forall i in
- * SCH+NEH, we only need to look into NEH.
- */
- if (DN_KEY_LT(si->V, alg_fq->S)) {
- /* S>V means flow Not eligible. */
- if (si->sch_heap.elements == 0)
- D("++ ouch! not eligible but empty scheduler!");
- heap_insert(&si->ne_heap, alg_fq->S, q);
- } else {
- heap_insert(&si->sch_heap, alg_fq->F, q);
- }
- return 0;
-}
-
-/* XXX invariant: sch > 0 || V >= min(S in neh) */
-static struct mbuf *
-wf2qp_dequeue(struct dn_sch_inst *_si)
-{
- /* Access scheduler instance private data */
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
- struct mbuf *m;
- struct dn_queue *q;
- struct dn_heap *sch = &si->sch_heap;
- struct dn_heap *neh = &si->ne_heap;
- struct wf2qp_queue *alg_fq;
-
- if (sch->elements == 0 && neh->elements == 0) {
- /* we have nothing to do. We could kill the idle heap
- * altogether and reset V
- */
- idle_check(si, 0x7fffffff, 1);
- si->V = 0;
- si->wsum = 0; /* should be set already */
- return NULL; /* quick return if nothing to do */
- }
- idle_check(si, 1, 0); /* drain something from the idle heap */
-
- /* make sure at least one element is eligible, bumping V
- * and moving entries that have become eligible.
- * We need to repeat the first part twice, before and
- * after extracting the candidate, or enqueue() will
- * find the data structure in a wrong state.
- */
- m = NULL;
- for(;;) {
- /*
- * Compute V = max(V, min(S_i)). Remember that all elements
- * in sch have by definition S_i <= V so if sch is not empty,
- * V is surely the max and we must not update it. Conversely,
- * if sch is empty we only need to look at neh.
- * We don't need to move the queues, as it will be done at the
- * next enqueue
- */
- if (sch->elements == 0 && neh->elements > 0) {
- si->V = MAX64(si->V, HEAP_TOP(neh)->key);
- }
- while (neh->elements > 0 &&
- DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
- q = HEAP_TOP(neh)->object;
- alg_fq = (struct wf2qp_queue *)q;
- heap_extract(neh, NULL);
- heap_insert(sch, alg_fq->F, q);
- }
- if (m) /* pkt found in previous iteration */
- break;
- /* ok we have at least one eligible pkt */
- q = HEAP_TOP(sch)->object;
- alg_fq = (struct wf2qp_queue *)q;
- m = dn_dequeue(q);
- heap_extract(sch, NULL); /* Remove queue from heap. */
- si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
- alg_fq->S = alg_fq->F; /* Update start time. */
- if (q->mq.head == 0) { /* not backlogged any more. */
- heap_insert(&si->idle_heap, alg_fq->F, q);
- } else { /* Still backlogged. */
- /* Update F, store in neh or sch */
- uint64_t len = q->mq.head->m_pkthdr.len;
- alg_fq->F += len * alg_fq->inv_w;
- if (DN_KEY_LEQ(alg_fq->S, si->V)) {
- heap_insert(sch, alg_fq->F, q);
- } else {
- heap_insert(neh, alg_fq->S, q);
- }
- }
- }
- return m;
-}
-
-static int
-wf2qp_new_sched(struct dn_sch_inst *_si)
-{
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
- int ofs = offsetof(struct wf2qp_queue, heap_pos);
-
- /* all heaps support extract from middle */
- if (heap_init(&si->idle_heap, 16, ofs) ||
- heap_init(&si->sch_heap, 16, ofs) ||
- heap_init(&si->ne_heap, 16, ofs)) {
- heap_free(&si->ne_heap);
- heap_free(&si->sch_heap);
- heap_free(&si->idle_heap);
- return ENOMEM;
- }
- return 0;
-}
-
-static int
-wf2qp_free_sched(struct dn_sch_inst *_si)
-{
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
-
- heap_free(&si->sch_heap);
- heap_free(&si->ne_heap);
- heap_free(&si->idle_heap);
-
- return 0;
-}
-
-static int
-wf2qp_new_fsk(struct dn_fsk *fs)
-{
- ipdn_bound_var(&fs->fs.par[0], 1,
- 1, 100, "WF2Q+ weight");
- return 0;
-}
-
-static int
-wf2qp_new_queue(struct dn_queue *_q)
-{
- struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
-
- _q->ni.oid.subtype = DN_SCHED_WF2QP;
- q->F = 0; /* not strictly necessary */
- q->S = q->F + 1; /* mark timestamp as invalid. */
- q->inv_w = ONE_FP / _q->fs->fs.par[0];
- if (_q->mq.head != NULL) {
- wf2qp_enqueue(_q->_si, _q, _q->mq.head);
- }
- return 0;
-}
-
-/*
- * Called when the infrastructure removes a queue (e.g. flowset
- * is reconfigured). Nothing to do if we did not 'own' the queue,
- * otherwise remove it from the right heap and adjust the sum
- * of weights.
- */
-static int
-wf2qp_free_queue(struct dn_queue *q)
-{
- struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
- struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
-
- if (alg_fq->S >= alg_fq->F + 1)
- return 0; /* nothing to do, not in any heap */
- si->wsum -= q->fs->fs.par[0];
- if (si->wsum > 0)
- si->inv_wsum = ONE_FP/si->wsum;
-
- /* extract from the heap. XXX TODO we may need to adjust V
- * to make sure the invariants hold.
- */
- if (q->mq.head == NULL) {
- heap_extract(&si->idle_heap, q);
- } else if (DN_KEY_LT(si->V, alg_fq->S)) {
- heap_extract(&si->ne_heap, q);
- } else {
- heap_extract(&si->sch_heap, q);
- }
- return 0;
-}
-
-/*
- * WF2Q+ scheduler descriptor
- * contains the type of the scheduler, the name, the size of the
- * structures and function pointers.
- */
-static struct dn_alg wf2qp_desc = {
- _SI( .type = ) DN_SCHED_WF2QP,
- _SI( .name = ) "WF2Q+",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- /* we need extra space in the si and the queue */
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct wf2qp_si),
- _SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
- sizeof(struct dn_queue),
-
- _SI( .enqueue = ) wf2qp_enqueue,
- _SI( .dequeue = ) wf2qp_dequeue,
-
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) wf2qp_new_sched,
- _SI( .free_sched = ) wf2qp_free_sched,
-
- _SI( .new_fsk = ) wf2qp_new_fsk,
- _SI( .free_fsk = ) NULL,
-
- _SI( .new_queue = ) wf2qp_new_queue,
- _SI( .free_queue = ) wf2qp_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_glue.c b/freebsd/sys/netpfil/ipfw/ip_dn_glue.c
deleted file mode 100644
index 8e0cc36d..00000000
--- a/freebsd/sys/netpfil/ipfw/ip_dn_glue.c
+++ /dev/null
@@ -1,848 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- *
- * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
- */
-
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/time.h>
-#include <sys/taskqueue.h>
-#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ip_dummynet.h>
-
-#include <netpfil/ipfw/ip_fw_private.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-
-/* FREEBSD7.2 ip_dummynet.h r191715*/
-
-struct dn_heap_entry7 {
- int64_t key; /* sorting key. Topmost element is smallest one */
- void *object; /* object pointer */
-};
-
-struct dn_heap7 {
- int size;
- int elements;
- int offset; /* XXX if > 0 this is the offset of direct ptr to obj */
- struct dn_heap_entry7 *p; /* really an array of "size" entries */
-};
-
-/* Common to 7.2 and 8 */
-struct dn_flow_set {
- SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */
-
- u_short fs_nr ; /* flow_set number */
- u_short flags_fs;
-#define DNOLD_HAVE_FLOW_MASK 0x0001
-#define DNOLD_IS_RED 0x0002
-#define DNOLD_IS_GENTLE_RED 0x0004
-#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */
-#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */
-#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
-#define DNOLD_IS_PIPE 0x4000
-#define DNOLD_IS_QUEUE 0x8000
-
- struct dn_pipe7 *pipe ; /* pointer to parent pipe */
- u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */
-
- int weight ; /* WFQ queue weight */
- int qsize ; /* queue size in slots or bytes */
- int plr ; /* pkt loss rate (2^31-1 means 100%) */
-
- struct ipfw_flow_id flow_mask ;
-
- /* hash table of queues onto this flow_set */
- int rq_size ; /* number of slots */
- int rq_elements ; /* active elements */
- struct dn_flow_queue7 **rq; /* array of rq_size entries */
-
- u_int32_t last_expired ; /* do not expire too frequently */
- int backlogged ; /* #active queues for this flowset */
-
- /* RED parameters */
-#define SCALE_RED 16
-#define SCALE(x) ( (x) << SCALE_RED )
-#define SCALE_VAL(x) ( (x) >> SCALE_RED )
-#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
- int w_q ; /* queue weight (scaled) */
- int max_th ; /* maximum threshold for queue (scaled) */
- int min_th ; /* minimum threshold for queue (scaled) */
- int max_p ; /* maximum value for p_b (scaled) */
- u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
- u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
- u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
- u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
- u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
- u_int lookup_depth ; /* depth of lookup table */
- int lookup_step ; /* granularity inside the lookup table */
- int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
- int avg_pkt_size ; /* medium packet size */
- int max_pkt_size ; /* max packet size */
-};
-SLIST_HEAD(dn_flow_set_head, dn_flow_set);
-
-#define DN_IS_PIPE 0x4000
-#define DN_IS_QUEUE 0x8000
-struct dn_flow_queue7 {
- struct dn_flow_queue7 *next ;
- struct ipfw_flow_id id ;
-
- struct mbuf *head, *tail ; /* queue of packets */
- u_int len ;
- u_int len_bytes ;
-
- u_long numbytes;
-
- u_int64_t tot_pkts ; /* statistics counters */
- u_int64_t tot_bytes ;
- u_int32_t drops ;
-
- int hash_slot ; /* debugging/diagnostic */
-
- /* RED parameters */
- int avg ; /* average queue length est. (scaled) */
- int count ; /* arrivals since last RED drop */
- int random ; /* random value (scaled) */
- u_int32_t q_time; /* start of queue idle time */
-
- /* WF2Q+ support */
- struct dn_flow_set *fs ; /* parent flow set */
- int heap_pos ; /* position (index) of struct in heap */
- int64_t sched_time ; /* current time when queue enters ready_heap */
-
- int64_t S,F ; /* start time, finish time */
-};
-
-struct dn_pipe7 { /* a pipe */
- SLIST_ENTRY(dn_pipe7) next; /* linked list in a hash slot */
-
- int pipe_nr ; /* number */
- int bandwidth; /* really, bytes/tick. */
- int delay ; /* really, ticks */
-
- struct mbuf *head, *tail ; /* packets in delay line */
-
- /* WF2Q+ */
- struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
- struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
- struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
-
- int64_t V ; /* virtual time */
- int sum; /* sum of weights of all active sessions */
-
- int numbytes;
-
- int64_t sched_time ; /* time pipe was scheduled in ready_heap */
-
- /*
- * When the tx clock come from an interface (if_name[0] != '\0'), its name
- * is stored below, whereas the ifp is filled when the rule is configured.
- */
- char if_name[IFNAMSIZ];
- struct ifnet *ifp ;
- int ready ; /* set if ifp != NULL and we got a signal from it */
-
- struct dn_flow_set fs ; /* used with fixed-rate flows */
-};
-SLIST_HEAD(dn_pipe_head7, dn_pipe7);
-
-
-/* FREEBSD8 ip_dummynet.h r196045 */
-struct dn_flow_queue8 {
- struct dn_flow_queue8 *next ;
- struct ipfw_flow_id id ;
-
- struct mbuf *head, *tail ; /* queue of packets */
- u_int len ;
- u_int len_bytes ;
-
- uint64_t numbytes ; /* credit for transmission (dynamic queues) */
- int64_t extra_bits; /* extra bits simulating unavailable channel */
-
- u_int64_t tot_pkts ; /* statistics counters */
- u_int64_t tot_bytes ;
- u_int32_t drops ;
-
- int hash_slot ; /* debugging/diagnostic */
-
- /* RED parameters */
- int avg ; /* average queue length est. (scaled) */
- int count ; /* arrivals since last RED drop */
- int random ; /* random value (scaled) */
- int64_t idle_time; /* start of queue idle time */
-
- /* WF2Q+ support */
- struct dn_flow_set *fs ; /* parent flow set */
- int heap_pos ; /* position (index) of struct in heap */
- int64_t sched_time ; /* current time when queue enters ready_heap */
-
- int64_t S,F ; /* start time, finish time */
-};
-
-struct dn_pipe8 { /* a pipe */
- SLIST_ENTRY(dn_pipe8) next; /* linked list in a hash slot */
-
- int pipe_nr ; /* number */
- int bandwidth; /* really, bytes/tick. */
- int delay ; /* really, ticks */
-
- struct mbuf *head, *tail ; /* packets in delay line */
-
- /* WF2Q+ */
- struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
- struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
- struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
-
- int64_t V ; /* virtual time */
- int sum; /* sum of weights of all active sessions */
-
- /* Same as in dn_flow_queue, numbytes can become large */
- int64_t numbytes; /* bits I can transmit (more or less). */
- uint64_t burst; /* burst size, scaled: bits * hz */
-
- int64_t sched_time ; /* time pipe was scheduled in ready_heap */
- int64_t idle_time; /* start of pipe idle time */
-
- char if_name[IFNAMSIZ];
- struct ifnet *ifp ;
- int ready ; /* set if ifp != NULL and we got a signal from it */
-
- struct dn_flow_set fs ; /* used with fixed-rate flows */
-
- /* fields to simulate a delay profile */
-#define ED_MAX_NAME_LEN 32
- char name[ED_MAX_NAME_LEN];
- int loss_level;
- int samples_no;
- int *samples;
-};
-
-#define ED_MAX_SAMPLES_NO 1024
-struct dn_pipe_max8 {
- struct dn_pipe8 pipe;
- int samples[ED_MAX_SAMPLES_NO];
-};
-SLIST_HEAD(dn_pipe_head8, dn_pipe8);
-
-/*
- * Changes from 7.2 to 8:
- * dn_pipe:
- * numbytes from int to int64_t
- * add burst (int64_t)
- * add idle_time (int64_t)
- * add profile
- * add struct dn_pipe_max
- * add flag DN_HAS_PROFILE
- *
- * dn_flow_queue
- * numbytes from u_long to int64_t
- * add extra_bits (int64_t)
- * q_time from u_int32_t to int64_t and name idle_time
- *
- * dn_flow_set unchanged
- *
- */
-
-/* NOTE:XXX copied from dummynet.c */
-#define O_NEXT(p, len) ((void *)((char *)p + len))
-static void
-oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
-{
- oid->len = len;
- oid->type = type;
- oid->subtype = 0;
- oid->id = id;
-}
-/* make room in the buffer and move the pointer forward */
-static void *
-o_next(struct dn_id **o, int len, int type)
-{
- struct dn_id *ret = *o;
- oid_fill(ret, len, type, 0);
- *o = O_NEXT(*o, len);
- return ret;
-}
-
-
-static size_t pipesize7 = sizeof(struct dn_pipe7);
-static size_t pipesize8 = sizeof(struct dn_pipe8);
-static size_t pipesizemax8 = sizeof(struct dn_pipe_max8);
-
-/* Indicate 'ipfw' version
- * 1: from FreeBSD 7.2
- * 0: from FreeBSD 8
- * -1: unknow (for now is unused)
- *
- * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
- * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
- * it is suppose to be the FreeBSD 8 version.
- */
-static int is7 = 0;
-
-static int
-convertflags2new(int src)
-{
- int dst = 0;
-
- if (src & DNOLD_HAVE_FLOW_MASK)
- dst |= DN_HAVE_MASK;
- if (src & DNOLD_QSIZE_IS_BYTES)
- dst |= DN_QSIZE_BYTES;
- if (src & DNOLD_NOERROR)
- dst |= DN_NOERROR;
- if (src & DNOLD_IS_RED)
- dst |= DN_IS_RED;
- if (src & DNOLD_IS_GENTLE_RED)
- dst |= DN_IS_GENTLE_RED;
- if (src & DNOLD_HAS_PROFILE)
- dst |= DN_HAS_PROFILE;
-
- return dst;
-}
-
-static int
-convertflags2old(int src)
-{
- int dst = 0;
-
- if (src & DN_HAVE_MASK)
- dst |= DNOLD_HAVE_FLOW_MASK;
- if (src & DN_IS_RED)
- dst |= DNOLD_IS_RED;
- if (src & DN_IS_GENTLE_RED)
- dst |= DNOLD_IS_GENTLE_RED;
- if (src & DN_NOERROR)
- dst |= DNOLD_NOERROR;
- if (src & DN_HAS_PROFILE)
- dst |= DNOLD_HAS_PROFILE;
- if (src & DN_QSIZE_BYTES)
- dst |= DNOLD_QSIZE_IS_BYTES;
-
- return dst;
-}
-
-static int
-dn_compat_del(void *v)
-{
- struct dn_pipe7 *p = (struct dn_pipe7 *) v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *) v;
- struct {
- struct dn_id oid;
- uintptr_t a[1]; /* add more if we want a list */
- } cmd;
-
- /* XXX DN_API_VERSION ??? */
- oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
-
- if (is7) {
- if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
- return EINVAL;
- if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
- return EINVAL;
- } else {
- if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0)
- return EINVAL;
- if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0)
- return EINVAL;
- }
-
- if (p->pipe_nr != 0) { /* pipe x delete */
- cmd.a[0] = p->pipe_nr;
- cmd.oid.subtype = DN_LINK;
- } else { /* queue x delete */
- cmd.oid.subtype = DN_FS;
- cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr;
- }
-
- return do_config(&cmd, cmd.oid.len);
-}
-
-static int
-dn_compat_config_queue(struct dn_fs *fs, void* v)
-{
- struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
- struct dn_flow_set *f;
-
- if (is7)
- f = &p7->fs;
- else
- f = &p8->fs;
-
- fs->fs_nr = f->fs_nr;
- fs->sched_nr = f->parent_nr;
- fs->flow_mask = f->flow_mask;
- fs->buckets = f->rq_size;
- fs->qsize = f->qsize;
- fs->plr = f->plr;
- fs->par[0] = f->weight;
- fs->flags = convertflags2new(f->flags_fs);
- if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) {
- fs->w_q = f->w_q;
- fs->max_th = f->max_th;
- fs->min_th = f->min_th;
- fs->max_p = f->max_p;
- }
-
- return 0;
-}
-
-static int
-dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p,
- struct dn_fs *fs, void* v)
-{
- struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
- int i = p7->pipe_nr;
-
- sch->sched_nr = i;
- sch->oid.subtype = 0;
- p->link_nr = i;
- fs->fs_nr = i + 2*DN_MAX_ID;
- fs->sched_nr = i + DN_MAX_ID;
-
- /* Common to 7 and 8 */
- p->bandwidth = p7->bandwidth;
- p->delay = p7->delay;
- if (!is7) {
- /* FreeBSD 8 has burst */
- p->burst = p8->burst;
- }
-
- /* fill the fifo flowset */
- dn_compat_config_queue(fs, v);
- fs->fs_nr = i + 2*DN_MAX_ID;
- fs->sched_nr = i + DN_MAX_ID;
-
- /* Move scheduler related parameter from fs to sch */
- sch->buckets = fs->buckets; /*XXX*/
- fs->buckets = 0;
- if (fs->flags & DN_HAVE_MASK) {
- sch->flags |= DN_HAVE_MASK;
- fs->flags &= ~DN_HAVE_MASK;
- sch->sched_mask = fs->flow_mask;
- bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id));
- }
-
- return 0;
-}
-
-static int
-dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p,
- void *v)
-{
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-
- p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]);
-
- pf->link_nr = p->link_nr;
- pf->loss_level = p8->loss_level;
-// pf->bandwidth = p->bandwidth; //XXX bandwidth redundant?
- pf->samples_no = p8->samples_no;
- strncpy(pf->name, p8->name,sizeof(pf->name));
- bcopy(p8->samples, pf->samples, sizeof(pf->samples));
-
- return 0;
-}
-
-/*
- * If p->pipe_nr != 0 the command is 'pipe x config', so need to create
- * the three main struct, else only a flowset is created
- */
-static int
-dn_compat_configure(void *v)
-{
- struct dn_id *buf = NULL, *base;
- struct dn_sch *sch = NULL;
- struct dn_link *p = NULL;
- struct dn_fs *fs = NULL;
- struct dn_profile *pf = NULL;
- int lmax;
- int error;
-
- struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-
- int i; /* number of object to configure */
-
- lmax = sizeof(struct dn_id); /* command header */
- lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
- sizeof(struct dn_fs) + sizeof(struct dn_profile);
-
- base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
- o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
- base->id = DN_API_VERSION;
-
- /* pipe_nr is the same in p7 and p8 */
- i = p7->pipe_nr;
- if (i != 0) { /* pipe config */
- sch = o_next(&buf, sizeof(*sch), DN_SCH);
- p = o_next(&buf, sizeof(*p), DN_LINK);
- fs = o_next(&buf, sizeof(*fs), DN_FS);
-
- error = dn_compat_config_pipe(sch, p, fs, v);
- if (error) {
- free(buf, M_DUMMYNET);
- return error;
- }
- if (!is7 && p8->samples_no > 0) {
- /* Add profiles*/
- pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
- error = dn_compat_config_profile(pf, p, v);
- if (error) {
- free(buf, M_DUMMYNET);
- return error;
- }
- }
- } else { /* queue config */
- fs = o_next(&buf, sizeof(*fs), DN_FS);
- error = dn_compat_config_queue(fs, v);
- if (error) {
- free(buf, M_DUMMYNET);
- return error;
- }
- }
- error = do_config(base, (char *)buf - (char *)base);
-
- if (buf)
- free(buf, M_DUMMYNET);
- return error;
-}
-
-int
-dn_compat_calc_size(void)
-{
- int need = 0;
- /* XXX use FreeBSD 8 struct size */
- /* NOTE:
- * - half scheduler: schk_count/2
- * - all flowset: fsk_count
- * - all flowset queues: queue_count
- * - all pipe queue: si_count
- */
- need += dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2;
- need += dn_cfg.fsk_count * sizeof(struct dn_flow_set);
- need += dn_cfg.si_count * sizeof(struct dn_flow_queue8);
- need += dn_cfg.queue_count * sizeof(struct dn_flow_queue8);
-
- return need;
-}
-
-int
-dn_c_copy_q (void *_ni, void *arg)
-{
- struct copy_args *a = arg;
- struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start;
- struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start;
- struct dn_flow *ni = (struct dn_flow *)_ni;
- int size = 0;
-
- /* XXX hash slot not set */
- /* No difference between 7.2/8 */
- fq7->len = ni->length;
- fq7->len_bytes = ni->len_bytes;
- fq7->id = ni->fid;
-
- if (is7) {
- size = sizeof(struct dn_flow_queue7);
- fq7->tot_pkts = ni->tot_pkts;
- fq7->tot_bytes = ni->tot_bytes;
- fq7->drops = ni->drops;
- } else {
- size = sizeof(struct dn_flow_queue8);
- fq8->tot_pkts = ni->tot_pkts;
- fq8->tot_bytes = ni->tot_bytes;
- fq8->drops = ni->drops;
- }
-
- *a->start += size;
- return 0;
-}
-
-int
-dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq)
-{
- struct dn_link *l = &s->link;
- struct dn_fsk *f = s->fs;
-
- struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start;
- struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start;
- struct dn_flow_set *fs;
- int size = 0;
-
- if (is7) {
- fs = &pipe7->fs;
- size = sizeof(struct dn_pipe7);
- } else {
- fs = &pipe8->fs;
- size = sizeof(struct dn_pipe8);
- }
-
- /* These 4 field are the same in pipe7 and pipe8 */
- pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE;
- pipe7->bandwidth = l->bandwidth;
- pipe7->delay = l->delay * 1000 / hz;
- pipe7->pipe_nr = l->link_nr - DN_MAX_ID;
-
- if (!is7) {
- if (s->profile) {
- struct dn_profile *pf = s->profile;
- strncpy(pipe8->name, pf->name, sizeof(pf->name));
- pipe8->loss_level = pf->loss_level;
- pipe8->samples_no = pf->samples_no;
- }
- pipe8->burst = div64(l->burst , 8 * hz);
- }
-
- fs->flow_mask = s->sch.sched_mask;
- fs->rq_size = s->sch.buckets ? s->sch.buckets : 1;
-
- fs->parent_nr = l->link_nr - DN_MAX_ID;
- fs->qsize = f->fs.qsize;
- fs->plr = f->fs.plr;
- fs->w_q = f->fs.w_q;
- fs->max_th = f->max_th;
- fs->min_th = f->min_th;
- fs->max_p = f->fs.max_p;
- fs->rq_elements = nq;
-
- fs->flags_fs = convertflags2old(f->fs.flags);
-
- *a->start += size;
- return 0;
-}
-
-
-int
-dn_compat_copy_pipe(struct copy_args *a, void *_o)
-{
- int have = a->end - *a->start;
- int need = 0;
- int pipe_size = sizeof(struct dn_pipe8);
- int queue_size = sizeof(struct dn_flow_queue8);
- int n_queue = 0; /* number of queues */
-
- struct dn_schk *s = (struct dn_schk *)_o;
- /* calculate needed space:
- * - struct dn_pipe
- * - if there are instances, dn_queue * n_instances
- */
- n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) :
- (s->siht ? 1 : 0));
- need = pipe_size + queue_size * n_queue;
- if (have < need) {
- D("have %d < need %d", have, need);
- return 1;
- }
- /* copy pipe */
- dn_c_copy_pipe(s, a, n_queue);
-
- /* copy queues */
- if (s->sch.flags & DN_HAVE_MASK)
- dn_ht_scan(s->siht, dn_c_copy_q, a);
- else if (s->siht)
- dn_c_copy_q(s->siht, a);
- return 0;
-}
-
-int
-dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq)
-{
- struct dn_flow_set *fs = (struct dn_flow_set *)*a->start;
-
- fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
- fs->fs_nr = f->fs.fs_nr;
- fs->qsize = f->fs.qsize;
- fs->plr = f->fs.plr;
- fs->w_q = f->fs.w_q;
- fs->max_th = f->max_th;
- fs->min_th = f->min_th;
- fs->max_p = f->fs.max_p;
- fs->flow_mask = f->fs.flow_mask;
- fs->rq_elements = nq;
- fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1);
- fs->parent_nr = f->fs.sched_nr;
- fs->weight = f->fs.par[0];
-
- fs->flags_fs = convertflags2old(f->fs.flags);
- *a->start += sizeof(struct dn_flow_set);
- return 0;
-}
-
-int
-dn_compat_copy_queue(struct copy_args *a, void *_o)
-{
- int have = a->end - *a->start;
- int need = 0;
- int fs_size = sizeof(struct dn_flow_set);
- int queue_size = sizeof(struct dn_flow_queue8);
-
- struct dn_fsk *fs = (struct dn_fsk *)_o;
- int n_queue = 0; /* number of queues */
-
- n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) :
- (fs->qht ? 1 : 0));
-
- need = fs_size + queue_size * n_queue;
- if (have < need) {
- D("have < need");
- return 1;
- }
-
- /* copy flowset */
- dn_c_copy_fs(fs, a, n_queue);
-
- /* copy queues */
- if (fs->fs.flags & DN_HAVE_MASK)
- dn_ht_scan(fs->qht, dn_c_copy_q, a);
- else if (fs->qht)
- dn_c_copy_q(fs->qht, a);
-
- return 0;
-}
-
-int
-copy_data_helper_compat(void *_o, void *_arg)
-{
- struct copy_args *a = _arg;
-
- if (a->type == DN_COMPAT_PIPE) {
- struct dn_schk *s = _o;
- if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) {
- return 0; /* not old type */
- }
- /* copy pipe parameters, and if instance exists, copy
- * other parameters and eventually queues.
- */
- if(dn_compat_copy_pipe(a, _o))
- return DNHT_SCAN_END;
- } else if (a->type == DN_COMPAT_QUEUE) {
- struct dn_fsk *fs = _o;
- if (fs->fs.fs_nr >= DN_MAX_ID)
- return 0;
- if (dn_compat_copy_queue(a, _o))
- return DNHT_SCAN_END;
- }
- return 0;
-}
-
-/* Main function to manage old requests */
-int
-ip_dummynet_compat(struct sockopt *sopt)
-{
- int error=0;
- void *v = NULL;
- struct dn_id oid;
-
- /* Lenght of data, used to found ipfw version... */
- int len = sopt->sopt_valsize;
-
- /* len can be 0 if command was dummynet_flush */
- if (len == pipesize7) {
- D("setting compatibility with FreeBSD 7.2");
- is7 = 1;
- }
- else if (len == pipesize8 || len == pipesizemax8) {
- D("setting compatibility with FreeBSD 8");
- is7 = 0;
- }
-
- switch (sopt->sopt_name) {
- default:
- printf("dummynet: -- unknown option %d", sopt->sopt_name);
- error = EINVAL;
- break;
-
- case IP_DUMMYNET_FLUSH:
- oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
- do_config(&oid, oid.len);
- break;
-
- case IP_DUMMYNET_DEL:
- v = malloc(len, M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, v, len, len);
- if (error)
- break;
- error = dn_compat_del(v);
- free(v, M_TEMP);
- break;
-
- case IP_DUMMYNET_CONFIGURE:
- v = malloc(len, M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, v, len, len);
- if (error)
- break;
- error = dn_compat_configure(v);
- free(v, M_TEMP);
- break;
-
- case IP_DUMMYNET_GET: {
- void *buf;
- int ret;
- int original_size = sopt->sopt_valsize;
- int size;
-
- ret = dummynet_get(sopt, &buf);
- if (ret)
- return 0;//XXX ?
- size = sopt->sopt_valsize;
- sopt->sopt_valsize = original_size;
- D("size=%d, buf=%p", size, buf);
- ret = sooptcopyout(sopt, buf, size);
- if (ret)
- printf(" %s ERROR sooptcopyout\n", __FUNCTION__);
- if (buf)
- free(buf, M_DUMMYNET);
- }
- }
-
- return error;
-}
-
-
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_io.c b/freebsd/sys/netpfil/ipfw/ip_dn_io.c
deleted file mode 100644
index 23392a55..00000000
--- a/freebsd/sys/netpfil/ipfw/ip_dn_io.c
+++ /dev/null
@@ -1,852 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Dummynet portions related to packet handling.
- */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/sysctl.h>
-
-#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <net/netisr.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/ip.h> /* ip_len, ip_off */
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ip_dummynet.h>
-#include <netinet/if_ether.h> /* various ether_* routines */
-#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
-#include <netinet6/ip6_var.h>
-
-#include <netpfil/ipfw/ip_fw_private.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-
-/*
- * We keep a private variable for the simulation time, but we could
- * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
- * instead of dn_cfg.curr_time
- */
-
-struct dn_parms dn_cfg;
-//VNET_DEFINE(struct dn_parms, _base_dn_cfg);
-
-static long tick_last; /* Last tick duration (usec). */
-static long tick_delta; /* Last vs standard tick diff (usec). */
-static long tick_delta_sum; /* Accumulated tick difference (usec).*/
-static long tick_adjustment; /* Tick adjustments done. */
-static long tick_lost; /* Lost(coalesced) ticks number. */
-/* Adjusted vs non-adjusted curr_time difference (ticks). */
-static long tick_diff;
-
-static unsigned long io_pkt;
-static unsigned long io_pkt_fast;
-static unsigned long io_pkt_drop;
-
-/*
- * We use a heap to store entities for which we have pending timer events.
- * The heap is checked at every tick and all entities with expired events
- * are extracted.
- */
-
-MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
-
-extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
-
-#ifdef SYSCTL_NODE
-
-SYSBEGIN(f4)
-
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
-static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-
-/* wrapper to pass dn_cfg fields to SYSCTL_* */
-//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x))
-#define DC(x) (&(dn_cfg.x))
-/* parameters */
-
-static int
-sysctl_hash_size(SYSCTL_HANDLER_ARGS)
-{
- int error, value;
-
- value = dn_cfg.hash_size;
- error = sysctl_handle_int(oidp, &value, 0, req);
- if (error != 0 || req->newptr == NULL)
- return (error);
- if (value < 16 || value > 65536)
- return (EINVAL);
- dn_cfg.hash_size = value;
- return (0);
-}
-
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
- CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
- "I", "Default hash table size");
-
-static int
-sysctl_limits(SYSCTL_HANDLER_ARGS)
-{
- int error;
- long value;
-
- if (arg2 != 0)
- value = dn_cfg.slot_limit;
- else
- value = dn_cfg.byte_limit;
- error = sysctl_handle_long(oidp, &value, 0, req);
-
- if (error != 0 || req->newptr == NULL)
- return (error);
- if (arg2 != 0) {
- if (value < 1)
- return (EINVAL);
- dn_cfg.slot_limit = value;
- } else {
- if (value < 1500)
- return (EINVAL);
- dn_cfg.byte_limit = value;
- }
- return (0);
-}
-
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
- CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits,
- "L", "Upper limit in slots for pipe queue.");
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
- CTLTYPE_LONG | CTLFLAG_RW, 0, 0, sysctl_limits,
- "L", "Upper limit in bytes for pipe queue.");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
- CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io.");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
- CTLFLAG_RW, DC(debug), 0, "Dummynet debug level");
-
-/* RED parameters */
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
- CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
- CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
- CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size");
-
-/* time adjustment */
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
- CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
- CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
- CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
- CTLFLAG_RD, &tick_diff, 0,
- "Adjusted vs non-adjusted curr_time difference (ticks).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
- CTLFLAG_RD, &tick_lost, 0,
- "Number of ticks coalesced by dummynet taskqueue.");
-
-/* Drain parameters */
-SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire,
- CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes");
-SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
- CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes");
-
-/* statistics */
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
- CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
- CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
- CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
- CTLFLAG_RD, DC(queue_count), 0, "Number of queues");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
- CTLFLAG_RD, &io_pkt, 0,
- "Number of packets passed to dummynet.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
- CTLFLAG_RD, &io_pkt_fast, 0,
- "Number of packets bypassed dummynet scheduler.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
- CTLFLAG_RD, &io_pkt_drop, 0,
- "Number of packets dropped by dummynet.");
-#undef DC
-SYSEND
-
-#endif
-
-static void dummynet_send(struct mbuf *);
-
-/*
- * Packets processed by dummynet have an mbuf tag associated with
- * them that carries their dummynet state.
- * Outside dummynet, only the 'rule' field is relevant, and it must
- * be at the beginning of the structure.
- */
-struct dn_pkt_tag {
- struct ipfw_rule_ref rule; /* matching rule */
-
- /* second part, dummynet specific */
- int dn_dir; /* action when packet comes out.*/
- /* see ip_fw_private.h */
- uint64_t output_time; /* when the pkt is due for delivery*/
- struct ifnet *ifp; /* interface, for ip_output */
- struct _ip6dn_args ip6opt; /* XXX ipv6 options */
-};
-
-/*
- * Return the mbuf tag holding the dummynet state (it should
- * be the first one on the list).
- */
-static struct dn_pkt_tag *
-dn_tag_get(struct mbuf *m)
-{
- struct m_tag *mtag = m_tag_first(m);
- KASSERT(mtag != NULL &&
- mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
- mtag->m_tag_id == PACKET_TAG_DUMMYNET,
- ("packet on dummynet queue w/o dummynet tag!"));
- return (struct dn_pkt_tag *)(mtag+1);
-}
-
-static inline void
-mq_append(struct mq *q, struct mbuf *m)
-{
- if (q->head == NULL)
- q->head = m;
- else
- q->tail->m_nextpkt = m;
- q->tail = m;
- m->m_nextpkt = NULL;
-}
-
-/*
- * Dispose a list of packet. Use a functions so if we need to do
- * more work, this is a central point to do it.
- */
-void dn_free_pkts(struct mbuf *mnext)
-{
- struct mbuf *m;
-
- while ((m = mnext) != NULL) {
- mnext = m->m_nextpkt;
- FREE_PKT(m);
- }
-}
-
-static int
-red_drops (struct dn_queue *q, int len)
-{
- /*
- * RED algorithm
- *
- * RED calculates the average queue size (avg) using a low-pass filter
- * with an exponential weighted (w_q) moving average:
- * avg <- (1-w_q) * avg + w_q * q_size
- * where q_size is the queue length (measured in bytes or * packets).
- *
- * If q_size == 0, we compute the idle time for the link, and set
- * avg = (1 - w_q)^(idle/s)
- * where s is the time needed for transmitting a medium-sized packet.
- *
- * Now, if avg < min_th the packet is enqueued.
- * If avg > max_th the packet is dropped. Otherwise, the packet is
- * dropped with probability P function of avg.
- */
-
- struct dn_fsk *fs = q->fs;
- int64_t p_b = 0;
-
- /* Queue in bytes or packets? */
- uint32_t q_size = (fs->fs.flags & DN_QSIZE_BYTES) ?
- q->ni.len_bytes : q->ni.length;
-
- /* Average queue size estimation. */
- if (q_size != 0) {
- /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
- int diff = SCALE(q_size) - q->avg;
- int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
-
- q->avg += (int)v;
- } else {
- /*
- * Queue is empty, find for how long the queue has been
- * empty and use a lookup table for computing
- * (1 - * w_q)^(idle_time/s) where s is the time to send a
- * (small) packet.
- * XXX check wraps...
- */
- if (q->avg) {
- u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step);
-
- q->avg = (t < fs->lookup_depth) ?
- SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
- }
- }
-
- /* Should i drop? */
- if (q->avg < fs->min_th) {
- q->count = -1;
- return (0); /* accept packet */
- }
- if (q->avg >= fs->max_th) { /* average queue >= max threshold */
- if (fs->fs.flags & DN_IS_GENTLE_RED) {
- /*
- * According to Gentle-RED, if avg is greater than
- * max_th the packet is dropped with a probability
- * p_b = c_3 * avg - c_4
- * where c_3 = (1 - max_p) / max_th
- * c_4 = 1 - 2 * max_p
- */
- p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
- fs->c_4;
- } else {
- q->count = -1;
- return (1);
- }
- } else if (q->avg > fs->min_th) {
- /*
- * We compute p_b using the linear dropping function
- * p_b = c_1 * avg - c_2
- * where c_1 = max_p / (max_th - min_th)
- * c_2 = max_p * min_th / (max_th - min_th)
- */
- p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
- }
-
- if (fs->fs.flags & DN_QSIZE_BYTES)
- p_b = div64((p_b * len) , fs->max_pkt_size);
- if (++q->count == 0)
- q->random = random() & 0xffff;
- else {
- /*
- * q->count counts packets arrived since last drop, so a greater
- * value of q->count means a greater packet drop probability.
- */
- if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
- q->count = 0;
- /* After a drop we calculate a new random value. */
- q->random = random() & 0xffff;
- return (1); /* drop */
- }
- }
- /* End of RED algorithm. */
-
- return (0); /* accept */
-
-}
-
-/*
- * Enqueue a packet in q, subject to space and queue management policy
- * (whose parameters are in q->fs).
- * Update stats for the queue and the scheduler.
- * Return 0 on success, 1 on drop. The packet is consumed anyways.
- */
-int
-dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
-{
- struct dn_fs *f;
- struct dn_flow *ni; /* stats for scheduler instance */
- uint64_t len;
-
- if (q->fs == NULL || q->_si == NULL) {
- printf("%s fs %p si %p, dropping\n",
- __FUNCTION__, q->fs, q->_si);
- FREE_PKT(m);
- return 1;
- }
- f = &(q->fs->fs);
- ni = &q->_si->ni;
- len = m->m_pkthdr.len;
- /* Update statistics, then check reasons to drop pkt. */
- q->ni.tot_bytes += len;
- q->ni.tot_pkts++;
- ni->tot_bytes += len;
- ni->tot_pkts++;
- if (drop)
- goto drop;
- if (f->plr && random() < f->plr)
- goto drop;
- if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
- goto drop;
- if (f->flags & DN_QSIZE_BYTES) {
- if (q->ni.len_bytes > f->qsize)
- goto drop;
- } else if (q->ni.length >= f->qsize) {
- goto drop;
- }
- mq_append(&q->mq, m);
- q->ni.length++;
- q->ni.len_bytes += len;
- ni->length++;
- ni->len_bytes += len;
- return 0;
-
-drop:
- io_pkt_drop++;
- q->ni.drops++;
- ni->drops++;
- FREE_PKT(m);
- return 1;
-}
-
-/*
- * Fetch packets from the delay line which are due now. If there are
- * leftover packets, reinsert the delay line in the heap.
- * Runs under scheduler lock.
- */
-static void
-transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
-{
- struct mbuf *m;
- struct dn_pkt_tag *pkt = NULL;
-
- dline->oid.subtype = 0; /* not in heap */
- while ((m = dline->mq.head) != NULL) {
- pkt = dn_tag_get(m);
- if (!DN_KEY_LEQ(pkt->output_time, now))
- break;
- dline->mq.head = m->m_nextpkt;
- mq_append(q, m);
- }
- if (m != NULL) {
- dline->oid.subtype = 1; /* in heap */
- heap_insert(&dn_cfg.evheap, pkt->output_time, dline);
- }
-}
-
-/*
- * Convert the additional MAC overheads/delays into an equivalent
- * number of bits for the given data rate. The samples are
- * in milliseconds so we need to divide by 1000.
- */
-static uint64_t
-extra_bits(struct mbuf *m, struct dn_schk *s)
-{
- int index;
- uint64_t bits;
- struct dn_profile *pf = s->profile;
-
- if (!pf || pf->samples_no == 0)
- return 0;
- index = random() % pf->samples_no;
- bits = div64((uint64_t)pf->samples[index] * s->link.bandwidth, 1000);
- if (index >= pf->loss_level) {
- struct dn_pkt_tag *dt = dn_tag_get(m);
- if (dt)
- dt->dn_dir = DIR_DROP;
- }
- return bits;
-}
-
-/*
- * Send traffic from a scheduler instance due by 'now'.
- * Return a pointer to the head of the queue.
- */
-static struct mbuf *
-serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now)
-{
- struct mq def_q;
- struct dn_schk *s = si->sched;
- struct mbuf *m = NULL;
- int delay_line_idle = (si->dline.mq.head == NULL);
- int done, bw;
-
- if (q == NULL) {
- q = &def_q;
- q->head = NULL;
- }
-
- bw = s->link.bandwidth;
- si->kflags &= ~DN_ACTIVE;
-
- if (bw > 0)
- si->credit += (now - si->sched_time) * bw;
- else
- si->credit = 0;
- si->sched_time = now;
- done = 0;
- while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
- uint64_t len_scaled;
-
- done++;
- len_scaled = (bw == 0) ? 0 : hz *
- (m->m_pkthdr.len * 8 + extra_bits(m, s));
- si->credit -= len_scaled;
- /* Move packet in the delay line */
- dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ;
- mq_append(&si->dline.mq, m);
- }
-
- /*
- * If credit >= 0 the instance is idle, mark time.
- * Otherwise put back in the heap, and adjust the output
- * time of the last inserted packet, m, which was too early.
- */
- if (si->credit >= 0) {
- si->idle_time = now;
- } else {
- uint64_t t;
- KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
- t = div64(bw - 1 - si->credit, bw);
- if (m)
- dn_tag_get(m)->output_time += t;
- si->kflags |= DN_ACTIVE;
- heap_insert(&dn_cfg.evheap, now + t, si);
- }
- if (delay_line_idle && done)
- transmit_event(q, &si->dline, now);
- return q->head;
-}
-
-/*
- * The timer handler for dummynet. Time is computed in ticks, but
- * but the code is tolerant to the actual rate at which this is called.
- * Once complete, the function reschedules itself for the next tick.
- */
-void
-dummynet_task(void *context, int pending)
-{
- struct timeval t;
- struct mq q = { NULL, NULL }; /* queue to accumulate results */
-
- CURVNET_SET((struct vnet *)context);
-
- DN_BH_WLOCK();
-
- /* Update number of lost(coalesced) ticks. */
- tick_lost += pending - 1;
-
- getmicrouptime(&t);
- /* Last tick duration (usec). */
- tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 +
- (t.tv_usec - dn_cfg.prev_t.tv_usec);
- /* Last tick vs standard tick difference (usec). */
- tick_delta = (tick_last * hz - 1000000) / hz;
- /* Accumulated tick difference (usec). */
- tick_delta_sum += tick_delta;
-
- dn_cfg.prev_t = t;
-
- /*
- * Adjust curr_time if the accumulated tick difference is
- * greater than the 'standard' tick. Since curr_time should
- * be monotonically increasing, we do positive adjustments
- * as required, and throttle curr_time in case of negative
- * adjustment.
- */
- dn_cfg.curr_time++;
- if (tick_delta_sum - tick >= 0) {
- int diff = tick_delta_sum / tick;
-
- dn_cfg.curr_time += diff;
- tick_diff += diff;
- tick_delta_sum %= tick;
- tick_adjustment++;
- } else if (tick_delta_sum + tick <= 0) {
- dn_cfg.curr_time--;
- tick_diff--;
- tick_delta_sum += tick;
- tick_adjustment++;
- }
-
- /* serve pending events, accumulate in q */
- for (;;) {
- struct dn_id *p; /* generic parameter to handler */
-
- if (dn_cfg.evheap.elements == 0 ||
- DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key))
- break;
- p = HEAP_TOP(&dn_cfg.evheap)->object;
- heap_extract(&dn_cfg.evheap, NULL);
-
- if (p->type == DN_SCH_I) {
- serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time);
- } else { /* extracted a delay line */
- transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
- }
- }
- if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
- dn_cfg.expire_cycle = 0;
- dn_drain_scheduler();
- dn_drain_queue();
- }
-
- DN_BH_WUNLOCK();
- dn_reschedule();
- if (q.head != NULL)
- dummynet_send(q.head);
- CURVNET_RESTORE();
-}
-
-/*
- * forward a chain of packets to the proper destination.
- * This runs outside the dummynet lock.
- */
-static void
-dummynet_send(struct mbuf *m)
-{
- struct mbuf *n;
-
- for (; m != NULL; m = n) {
- struct ifnet *ifp = NULL; /* gcc 3.4.6 complains */
- struct m_tag *tag;
- int dst;
-
- n = m->m_nextpkt;
- m->m_nextpkt = NULL;
- tag = m_tag_first(m);
- if (tag == NULL) { /* should not happen */
- dst = DIR_DROP;
- } else {
- struct dn_pkt_tag *pkt = dn_tag_get(m);
- /* extract the dummynet info, rename the tag
- * to carry reinject info.
- */
- dst = pkt->dn_dir;
- ifp = pkt->ifp;
- tag->m_tag_cookie = MTAG_IPFW_RULE;
- tag->m_tag_id = 0;
- }
-
- switch (dst) {
- case DIR_OUT:
- SET_HOST_IPLEN(mtod(m, struct ip *));
- ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
- break ;
-
- case DIR_IN :
- /* put header in network format for ip_input() */
- //SET_NET_IPLEN(mtod(m, struct ip *));
- netisr_dispatch(NETISR_IP, m);
- break;
-
-#ifdef INET6
- case DIR_IN | PROTO_IPV6:
- netisr_dispatch(NETISR_IPV6, m);
- break;
-
- case DIR_OUT | PROTO_IPV6:
- ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
- break;
-#endif
-
- case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
- if (bridge_dn_p != NULL)
- ((*bridge_dn_p)(m, ifp));
- else
- printf("dummynet: if_bridge not loaded\n");
-
- break;
-
- case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
- /*
- * The Ethernet code assumes the Ethernet header is
- * contiguous in the first mbuf header.
- * Insure this is true.
- */
- if (m->m_len < ETHER_HDR_LEN &&
- (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
- printf("dummynet/ether: pullup failed, "
- "dropping packet\n");
- break;
- }
- ether_demux(m->m_pkthdr.rcvif, m);
- break;
-
- case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */
- ether_output_frame(ifp, m);
- break;
-
- case DIR_DROP:
- /* drop the packet after some time */
- FREE_PKT(m);
- break;
-
- default:
- printf("dummynet: bad switch %d!\n", dst);
- FREE_PKT(m);
- break;
- }
- }
-}
-
-static inline int
-tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
-{
- struct dn_pkt_tag *dt;
- struct m_tag *mtag;
-
- mtag = m_tag_get(PACKET_TAG_DUMMYNET,
- sizeof(*dt), M_NOWAIT | M_ZERO);
- if (mtag == NULL)
- return 1; /* Cannot allocate packet header. */
- m_tag_prepend(m, mtag); /* Attach to mbuf chain. */
- dt = (struct dn_pkt_tag *)(mtag + 1);
- dt->rule = fwa->rule;
- dt->rule.info &= IPFW_ONEPASS; /* only keep this info */
- dt->dn_dir = dir;
- dt->ifp = fwa->oif;
- /* dt->output tame is updated as we move through */
- dt->output_time = dn_cfg.curr_time;
- return 0;
-}
-
-
-/*
- * dummynet hook for packets.
- * We use the argument to locate the flowset fs and the sched_set sch
- * associated to it. The we apply flow_mask and sched_mask to
- * determine the queue and scheduler instances.
- *
- * dir where shall we send the packet after dummynet.
- * *m0 the mbuf with the packet
- * ifp the 'ifp' parameter from the caller.
- * NULL in ip_input, destination interface in ip_output,
- */
-int
-dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
-{
- struct mbuf *m = *m0;
- struct dn_fsk *fs = NULL;
- struct dn_sch_inst *si;
- struct dn_queue *q = NULL; /* default */
-
- int fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
- ((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0);
- DN_BH_WLOCK();
- io_pkt++;
- /* we could actually tag outside the lock, but who cares... */
- if (tag_mbuf(m, dir, fwa))
- goto dropit;
- if (dn_cfg.busy) {
- /* if the upper half is busy doing something expensive,
- * lets queue the packet and move forward
- */
- mq_append(&dn_cfg.pending, m);
- m = *m0 = NULL; /* consumed */
- goto done; /* already active, nothing to do */
- }
- /* XXX locate_flowset could be optimised with a direct ref. */
- fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL);
- if (fs == NULL)
- goto dropit; /* This queue/pipe does not exist! */
- if (fs->sched == NULL) /* should not happen */
- goto dropit;
- /* find scheduler instance, possibly applying sched_mask */
- si = ipdn_si_find(fs->sched, &(fwa->f_id));
- if (si == NULL)
- goto dropit;
- /*
- * If the scheduler supports multiple queues, find the right one
- * (otherwise it will be ignored by enqueue).
- */
- if (fs->sched->fp->flags & DN_MULTIQUEUE) {
- q = ipdn_q_find(fs, si, &(fwa->f_id));
- if (q == NULL)
- goto dropit;
- }
- if (fs->sched->fp->enqueue(si, q, m)) {
- /* packet was dropped by enqueue() */
- m = *m0 = NULL;
- goto dropit;
- }
-
- if (si->kflags & DN_ACTIVE) {
- m = *m0 = NULL; /* consumed */
- goto done; /* already active, nothing to do */
- }
-
- /* compute the initial allowance */
- if (si->idle_time < dn_cfg.curr_time) {
- /* Do this only on the first packet on an idle pipe */
- struct dn_link *p = &fs->sched->link;
-
- si->sched_time = dn_cfg.curr_time;
- si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
- if (p->burst) {
- uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
- if (burst > p->burst)
- burst = p->burst;
- si->credit += burst;
- }
- }
- /* pass through scheduler and delay line */
- m = serve_sched(NULL, si, dn_cfg.curr_time);
-
- /* optimization -- pass it back to ipfw for immediate send */
- /* XXX Don't call dummynet_send() if scheduler return the packet
- * just enqueued. This avoid a lock order reversal.
- *
- */
- if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
- /* fast io, rename the tag * to carry reinject info. */
- struct m_tag *tag = m_tag_first(m);
-
- tag->m_tag_cookie = MTAG_IPFW_RULE;
- tag->m_tag_id = 0;
- io_pkt_fast++;
- if (m->m_nextpkt != NULL) {
- printf("dummynet: fast io: pkt chain detected!\n");
- m->m_nextpkt = NULL;
- }
- m = NULL;
- } else {
- *m0 = NULL;
- }
-done:
- DN_BH_WUNLOCK();
- if (m)
- dummynet_send(m);
- return 0;
-
-dropit:
- io_pkt_drop++;
- DN_BH_WUNLOCK();
- if (m)
- FREE_PKT(m);
- *m0 = NULL;
- return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS;
-}
diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_private.h b/freebsd/sys/netpfil/ipfw/ip_dn_private.h
index 159ddc9a..2fce1366 100644
--- a/freebsd/sys/netpfil/ipfw/ip_dn_private.h
+++ b/freebsd/sys/netpfil/ipfw/ip_dn_private.h
@@ -81,8 +81,13 @@ SLIST_HEAD(dn_fsk_head, dn_fsk);
SLIST_HEAD(dn_queue_head, dn_queue);
SLIST_HEAD(dn_alg_head, dn_alg);
+#ifdef NEW_AQM
+SLIST_HEAD(dn_aqm_head, dn_aqm); /* for new AQMs */
+#endif
+
struct mq { /* a basic queue of packets*/
struct mbuf *head, *tail;
+ int count;
};
static inline void
@@ -91,7 +96,7 @@ set_oid(struct dn_id *o, int type, int len)
o->type = type;
o->len = len;
o->subtype = 0;
-};
+}
/*
* configuration and global data for a dummynet instance
@@ -135,6 +140,9 @@ struct dn_parms {
/* list of flowsets without a scheduler -- use sch_chain */
struct dn_fsk_head fsu; /* list of unlinked flowsets */
struct dn_alg_head schedlist; /* list of algorithms */
+#ifdef NEW_AQM
+ struct dn_aqm_head aqmlist; /* list of AQMs */
+#endif
/* Store the fs/sch to scan when draining. The value is the
* bucket number of the hash table. Expire can be disabled
@@ -231,6 +239,10 @@ struct dn_fsk { /* kernel side of a flowset */
int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
int avg_pkt_size ; /* medium packet size */
int max_pkt_size ; /* max packet size */
+#ifdef NEW_AQM
+ struct dn_aqm *aqmfp; /* Pointer to AQM functions */
+ void *aqmcfg; /* configuration parameters for AQM */
+#endif
};
/*
@@ -253,6 +265,9 @@ struct dn_queue {
int count; /* arrivals since last RED drop */
int random; /* random value (scaled) */
uint64_t q_time; /* start of queue idle time */
+#ifdef NEW_AQM
+ void *aqm_status; /* per-queue status variables*/
+#endif
};
@@ -400,4 +415,49 @@ int do_config(void *p, int l);
void dn_drain_scheduler(void);
void dn_drain_queue(void);
+#ifdef NEW_AQM
+int ecn_mark(struct mbuf* m);
+
+/* moved from ip_dn_io.c to here to be available for AQMs modules*/
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+#ifdef USERSPACE
+ // buffers from netmap need to be copied
+ // XXX note that the routine is not expected to fail
+ ND("append %p to %p", m, q);
+ if (m->m_flags & M_STACK) {
+ struct mbuf *m_new;
+ void *p;
+ int l, ofs;
+
+ ofs = m->m_data - m->__m_extbuf;
+ // XXX allocate
+ MGETHDR(m_new, M_NOWAIT, MT_DATA);
+ ND("*** WARNING, volatile buf %p ext %p %d dofs %d m_new %p",
+ m, m->__m_extbuf, m->__m_extlen, ofs, m_new);
+ p = m_new->__m_extbuf; /* new pointer */
+ l = m_new->__m_extlen; /* new len */
+ if (l <= m->__m_extlen) {
+ panic("extlen too large");
+ }
+
+ *m_new = *m; // copy
+ m_new->m_flags &= ~M_STACK;
+ m_new->__m_extbuf = p; // point to new buffer
+ _pkt_copy(m->__m_extbuf, p, m->__m_extlen);
+ m_new->m_data = p + ofs;
+ m = m_new;
+ }
+#endif /* USERSPACE */
+ if (q->head == NULL)
+ q->head = m;
+ else
+ q->tail->m_nextpkt = m;
+ q->count++;
+ q->tail = m;
+ m->m_nextpkt = NULL;
+}
+#endif /* NEW_AQM */
+
#endif /* _IP_DN_PRIVATE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_dummynet.c b/freebsd/sys/netpfil/ipfw/ip_dummynet.c
deleted file mode 100644
index 40c37d80..00000000
--- a/freebsd/sys/netpfil/ipfw/ip_dummynet.c
+++ /dev/null
@@ -1,2309 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
- * Portions Copyright (c) 2000 Akamba Corp.
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Configuration and internal object management for dummynet.
- */
-
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <rtems/bsd/sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <rtems/bsd/sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/time.h>
-#include <sys/taskqueue.h>
-#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ip_dummynet.h>
-
-#include <netpfil/ipfw/ip_fw_private.h>
-#include <netpfil/ipfw/dn_heap.h>
-#include <netpfil/ipfw/ip_dn_private.h>
-#include <netpfil/ipfw/dn_sched.h>
-
-/* which objects to copy */
-#define DN_C_LINK 0x01
-#define DN_C_SCH 0x02
-#define DN_C_FLOW 0x04
-#define DN_C_FS 0x08
-#define DN_C_QUEUE 0x10
-
-/* we use this argument in case of a schk_new */
-struct schk_new_arg {
- struct dn_alg *fp;
- struct dn_sch *sch;
-};
-
-/*---- callout hooks. ----*/
-static struct callout dn_timeout;
-static struct task dn_task;
-static struct taskqueue *dn_tq = NULL;
-
-static void
-dummynet(void * __unused unused)
-{
-
- taskqueue_enqueue(dn_tq, &dn_task);
-}
-
-void
-dn_reschedule(void)
-{
- callout_reset(&dn_timeout, 1, dummynet, NULL);
-}
-/*----- end of callout hooks -----*/
-
-/* Return a scheduler descriptor given the type or name. */
-static struct dn_alg *
-find_sched_type(int type, char *name)
-{
- struct dn_alg *d;
-
- SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
- if (d->type == type || (name && !strcasecmp(d->name, name)))
- return d;
- }
- return NULL; /* not found */
-}
-
-int
-ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
-{
- int oldv = *v;
- const char *op = NULL;
- if (dflt < lo)
- dflt = lo;
- if (dflt > hi)
- dflt = hi;
- if (oldv < lo) {
- *v = dflt;
- op = "Bump";
- } else if (oldv > hi) {
- *v = hi;
- op = "Clamp";
- } else
- return *v;
- if (op && msg)
- printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
- return *v;
-}
-
-/*---- flow_id mask, hash and compare functions ---*/
-/*
- * The flow_id includes the 5-tuple, the queue/pipe number
- * which we store in the extra area in host order,
- * and for ipv6 also the flow_id6.
- * XXX see if we want the tos byte (can store in 'flags')
- */
-static struct ipfw_flow_id *
-flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
-{
- int is_v6 = IS_IP6_FLOW_ID(id);
-
- id->dst_port &= mask->dst_port;
- id->src_port &= mask->src_port;
- id->proto &= mask->proto;
- id->extra &= mask->extra;
- if (is_v6) {
- APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
- APPLY_MASK(&id->src_ip6, &mask->src_ip6);
- id->flow_id6 &= mask->flow_id6;
- } else {
- id->dst_ip &= mask->dst_ip;
- id->src_ip &= mask->src_ip;
- }
- return id;
-}
-
-/* computes an OR of two masks, result in dst and also returned */
-static struct ipfw_flow_id *
-flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
-{
- int is_v6 = IS_IP6_FLOW_ID(dst);
-
- dst->dst_port |= src->dst_port;
- dst->src_port |= src->src_port;
- dst->proto |= src->proto;
- dst->extra |= src->extra;
- if (is_v6) {
-#define OR_MASK(_d, _s) \
- (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
- (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
- (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
- (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
- OR_MASK(&dst->dst_ip6, &src->dst_ip6);
- OR_MASK(&dst->src_ip6, &src->src_ip6);
-#undef OR_MASK
- dst->flow_id6 |= src->flow_id6;
- } else {
- dst->dst_ip |= src->dst_ip;
- dst->src_ip |= src->src_ip;
- }
- return dst;
-}
-
-static int
-nonzero_mask(struct ipfw_flow_id *m)
-{
- if (m->dst_port || m->src_port || m->proto || m->extra)
- return 1;
- if (IS_IP6_FLOW_ID(m)) {
- return
- m->dst_ip6.__u6_addr.__u6_addr32[0] ||
- m->dst_ip6.__u6_addr.__u6_addr32[1] ||
- m->dst_ip6.__u6_addr.__u6_addr32[2] ||
- m->dst_ip6.__u6_addr.__u6_addr32[3] ||
- m->src_ip6.__u6_addr.__u6_addr32[0] ||
- m->src_ip6.__u6_addr.__u6_addr32[1] ||
- m->src_ip6.__u6_addr.__u6_addr32[2] ||
- m->src_ip6.__u6_addr.__u6_addr32[3] ||
- m->flow_id6;
- } else {
- return m->dst_ip || m->src_ip;
- }
-}
-
-/* XXX we may want a better hash function */
-static uint32_t
-flow_id_hash(struct ipfw_flow_id *id)
-{
- uint32_t i;
-
- if (IS_IP6_FLOW_ID(id)) {
- uint32_t *d = (uint32_t *)&id->dst_ip6;
- uint32_t *s = (uint32_t *)&id->src_ip6;
- i = (d[0] ) ^ (d[1]) ^
- (d[2] ) ^ (d[3]) ^
- (d[0] >> 15) ^ (d[1] >> 15) ^
- (d[2] >> 15) ^ (d[3] >> 15) ^
- (s[0] << 1) ^ (s[1] << 1) ^
- (s[2] << 1) ^ (s[3] << 1) ^
- (s[0] << 16) ^ (s[1] << 16) ^
- (s[2] << 16) ^ (s[3] << 16) ^
- (id->dst_port << 1) ^ (id->src_port) ^
- (id->extra) ^
- (id->proto ) ^ (id->flow_id6);
- } else {
- i = (id->dst_ip) ^ (id->dst_ip >> 15) ^
- (id->src_ip << 1) ^ (id->src_ip >> 16) ^
- (id->extra) ^
- (id->dst_port << 1) ^ (id->src_port) ^ (id->proto);
- }
- return i;
-}
-
-/* Like bcmp, returns 0 if ids match, 1 otherwise. */
-static int
-flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
-{
- int is_v6 = IS_IP6_FLOW_ID(id1);
-
- if (!is_v6) {
- if (IS_IP6_FLOW_ID(id2))
- return 1; /* different address families */
-
- return (id1->dst_ip == id2->dst_ip &&
- id1->src_ip == id2->src_ip &&
- id1->dst_port == id2->dst_port &&
- id1->src_port == id2->src_port &&
- id1->proto == id2->proto &&
- id1->extra == id2->extra) ? 0 : 1;
- }
- /* the ipv6 case */
- return (
- !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
- !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
- id1->dst_port == id2->dst_port &&
- id1->src_port == id2->src_port &&
- id1->proto == id2->proto &&
- id1->extra == id2->extra &&
- id1->flow_id6 == id2->flow_id6) ? 0 : 1;
-}
-/*--------- end of flow-id mask, hash and compare ---------*/
-
-/*--- support functions for the qht hashtable ----
- * Entries are hashed by flow-id
- */
-static uint32_t
-q_hash(uintptr_t key, int flags, void *arg)
-{
- /* compute the hash slot from the flow id */
- struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
- &((struct dn_queue *)key)->ni.fid :
- (struct ipfw_flow_id *)key;
-
- return flow_id_hash(id);
-}
-
-static int
-q_match(void *obj, uintptr_t key, int flags, void *arg)
-{
- struct dn_queue *o = (struct dn_queue *)obj;
- struct ipfw_flow_id *id2;
-
- if (flags & DNHT_KEY_IS_OBJ) {
- /* compare pointers */
- id2 = &((struct dn_queue *)key)->ni.fid;
- } else {
- id2 = (struct ipfw_flow_id *)key;
- }
- return (0 == flow_id_cmp(&o->ni.fid, id2));
-}
-
-/*
- * create a new queue instance for the given 'key'.
- */
-static void *
-q_new(uintptr_t key, int flags, void *arg)
-{
- struct dn_queue *q, *template = arg;
- struct dn_fsk *fs = template->fs;
- int size = sizeof(*q) + fs->sched->fp->q_datalen;
-
- q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (q == NULL) {
- D("no memory for new queue");
- return NULL;
- }
-
- set_oid(&q->ni.oid, DN_QUEUE, size);
- if (fs->fs.flags & DN_QHT_HASH)
- q->ni.fid = *(struct ipfw_flow_id *)key;
- q->fs = fs;
- q->_si = template->_si;
- q->_si->q_count++;
-
- if (fs->sched->fp->new_queue)
- fs->sched->fp->new_queue(q);
- dn_cfg.queue_count++;
- return q;
-}
-
-/*
- * Notify schedulers that a queue is going away.
- * If (flags & DN_DESTROY), also free the packets.
- * The version for callbacks is called q_delete_cb().
- */
-static void
-dn_delete_queue(struct dn_queue *q, int flags)
-{
- struct dn_fsk *fs = q->fs;
-
- // D("fs %p si %p\n", fs, q->_si);
- /* notify the parent scheduler that the queue is going away */
- if (fs && fs->sched->fp->free_queue)
- fs->sched->fp->free_queue(q);
- q->_si->q_count--;
- q->_si = NULL;
- if (flags & DN_DESTROY) {
- if (q->mq.head)
- dn_free_pkts(q->mq.head);
- bzero(q, sizeof(*q)); // safety
- free(q, M_DUMMYNET);
- dn_cfg.queue_count--;
- }
-}
-
-static int
-q_delete_cb(void *q, void *arg)
-{
- int flags = (int)(uintptr_t)arg;
- dn_delete_queue(q, flags);
- return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
-}
-
-/*
- * calls dn_delete_queue/q_delete_cb on all queues,
- * which notifies the parent scheduler and possibly drains packets.
- * flags & DN_DESTROY: drains queues and destroy qht;
- */
-static void
-qht_delete(struct dn_fsk *fs, int flags)
-{
- ND("fs %d start flags %d qht %p",
- fs->fs.fs_nr, flags, fs->qht);
- if (!fs->qht)
- return;
- if (fs->fs.flags & DN_QHT_HASH) {
- dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
- if (flags & DN_DESTROY) {
- dn_ht_free(fs->qht, 0);
- fs->qht = NULL;
- }
- } else {
- dn_delete_queue((struct dn_queue *)(fs->qht), flags);
- if (flags & DN_DESTROY)
- fs->qht = NULL;
- }
-}
-
-/*
- * Find and possibly create the queue for a MULTIQUEUE scheduler.
- * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
- */
-struct dn_queue *
-ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
- struct ipfw_flow_id *id)
-{
- struct dn_queue template;
-
- template._si = si;
- template.fs = fs;
-
- if (fs->fs.flags & DN_QHT_HASH) {
- struct ipfw_flow_id masked_id;
- if (fs->qht == NULL) {
- fs->qht = dn_ht_init(NULL, fs->fs.buckets,
- offsetof(struct dn_queue, q_next),
- q_hash, q_match, q_new);
- if (fs->qht == NULL)
- return NULL;
- }
- masked_id = *id;
- flow_id_mask(&fs->fsk_mask, &masked_id);
- return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
- DNHT_INSERT, &template);
- } else {
- if (fs->qht == NULL)
- fs->qht = q_new(0, 0, &template);
- return (struct dn_queue *)fs->qht;
- }
-}
-/*--- end of queue hash table ---*/
-
-/*--- support functions for the sch_inst hashtable ----
- *
- * These are hashed by flow-id
- */
-static uint32_t
-si_hash(uintptr_t key, int flags, void *arg)
-{
- /* compute the hash slot from the flow id */
- struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
- &((struct dn_sch_inst *)key)->ni.fid :
- (struct ipfw_flow_id *)key;
-
- return flow_id_hash(id);
-}
-
-static int
-si_match(void *obj, uintptr_t key, int flags, void *arg)
-{
- struct dn_sch_inst *o = obj;
- struct ipfw_flow_id *id2;
-
- id2 = (flags & DNHT_KEY_IS_OBJ) ?
- &((struct dn_sch_inst *)key)->ni.fid :
- (struct ipfw_flow_id *)key;
- return flow_id_cmp(&o->ni.fid, id2) == 0;
-}
-
-/*
- * create a new instance for the given 'key'
- * Allocate memory for instance, delay line and scheduler private data.
- */
-static void *
-si_new(uintptr_t key, int flags, void *arg)
-{
- struct dn_schk *s = arg;
- struct dn_sch_inst *si;
- int l = sizeof(*si) + s->fp->si_datalen;
-
- si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (si == NULL)
- goto error;
-
- /* Set length only for the part passed up to userland. */
- set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
- set_oid(&(si->dline.oid), DN_DELAY_LINE,
- sizeof(struct delay_line));
- /* mark si and dline as outside the event queue */
- si->ni.oid.id = si->dline.oid.id = -1;
-
- si->sched = s;
- si->dline.si = si;
-
- if (s->fp->new_sched && s->fp->new_sched(si)) {
- D("new_sched error");
- goto error;
- }
- if (s->sch.flags & DN_HAVE_MASK)
- si->ni.fid = *(struct ipfw_flow_id *)key;
-
- dn_cfg.si_count++;
- return si;
-
-error:
- if (si) {
- bzero(si, sizeof(*si)); // safety
- free(si, M_DUMMYNET);
- }
- return NULL;
-}
-
-/*
- * Callback from siht to delete all scheduler instances. Remove
- * si and delay line from the system heap, destroy all queues.
- * We assume that all flowset have been notified and do not
- * point to us anymore.
- */
-static int
-si_destroy(void *_si, void *arg)
-{
- struct dn_sch_inst *si = _si;
- struct dn_schk *s = si->sched;
- struct delay_line *dl = &si->dline;
-
- if (dl->oid.subtype) /* remove delay line from event heap */
- heap_extract(&dn_cfg.evheap, dl);
- dn_free_pkts(dl->mq.head); /* drain delay line */
- if (si->kflags & DN_ACTIVE) /* remove si from event heap */
- heap_extract(&dn_cfg.evheap, si);
- if (s->fp->free_sched)
- s->fp->free_sched(si);
- bzero(si, sizeof(*si)); /* safety */
- free(si, M_DUMMYNET);
- dn_cfg.si_count--;
- return DNHT_SCAN_DEL;
-}
-
-/*
- * Find the scheduler instance for this packet. If we need to apply
- * a mask, do on a local copy of the flow_id to preserve the original.
- * Assume siht is always initialized if we have a mask.
- */
-struct dn_sch_inst *
-ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id)
-{
-
- if (s->sch.flags & DN_HAVE_MASK) {
- struct ipfw_flow_id id_t = *id;
- flow_id_mask(&s->sch.sched_mask, &id_t);
- return dn_ht_find(s->siht, (uintptr_t)&id_t,
- DNHT_INSERT, s);
- }
- if (!s->siht)
- s->siht = si_new(0, 0, s);
- return (struct dn_sch_inst *)s->siht;
-}
-
-/* callback to flush credit for the scheduler instance */
-static int
-si_reset_credit(void *_si, void *arg)
-{
- struct dn_sch_inst *si = _si;
- struct dn_link *p = &si->sched->link;
-
- si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0);
- return 0;
-}
-
-static void
-schk_reset_credit(struct dn_schk *s)
-{
- if (s->sch.flags & DN_HAVE_MASK)
- dn_ht_scan(s->siht, si_reset_credit, NULL);
- else if (s->siht)
- si_reset_credit(s->siht, NULL);
-}
-/*---- end of sch_inst hashtable ---------------------*/
-
-/*-------------------------------------------------------
- * flowset hash (fshash) support. Entries are hashed by fs_nr.
- * New allocations are put in the fsunlinked list, from which
- * they are removed when they point to a specific scheduler.
- */
-static uint32_t
-fsk_hash(uintptr_t key, int flags, void *arg)
-{
- uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_fsk *)key)->fs.fs_nr;
-
- return ( (i>>8)^(i>>4)^i );
-}
-
-static int
-fsk_match(void *obj, uintptr_t key, int flags, void *arg)
-{
- struct dn_fsk *fs = obj;
- int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_fsk *)key)->fs.fs_nr;
-
- return (fs->fs.fs_nr == i);
-}
-
-static void *
-fsk_new(uintptr_t key, int flags, void *arg)
-{
- struct dn_fsk *fs;
-
- fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (fs) {
- set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs));
- dn_cfg.fsk_count++;
- fs->drain_bucket = 0;
- SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
- }
- return fs;
-}
-
-/*
- * detach flowset from its current scheduler. Flags as follows:
- * DN_DETACH removes from the fsk_list
- * DN_DESTROY deletes individual queues
- * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked).
- */
-static void
-fsk_detach(struct dn_fsk *fs, int flags)
-{
- if (flags & DN_DELETE_FS)
- flags |= DN_DESTROY;
- ND("fs %d from sched %d flags %s %s %s",
- fs->fs.fs_nr, fs->fs.sched_nr,
- (flags & DN_DELETE_FS) ? "DEL_FS":"",
- (flags & DN_DESTROY) ? "DEL":"",
- (flags & DN_DETACH) ? "DET":"");
- if (flags & DN_DETACH) { /* detach from the list */
- struct dn_fsk_head *h;
- h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu;
- SLIST_REMOVE(h, fs, dn_fsk, sch_chain);
- }
- /* Free the RED parameters, they will be recomputed on
- * subsequent attach if needed.
- */
- if (fs->w_q_lookup)
- free(fs->w_q_lookup, M_DUMMYNET);
- fs->w_q_lookup = NULL;
- qht_delete(fs, flags);
- if (fs->sched && fs->sched->fp->free_fsk)
- fs->sched->fp->free_fsk(fs);
- fs->sched = NULL;
- if (flags & DN_DELETE_FS) {
- bzero(fs, sizeof(*fs)); /* safety */
- free(fs, M_DUMMYNET);
- dn_cfg.fsk_count--;
- } else {
- SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
- }
-}
-
-/*
- * Detach or destroy all flowsets in a list.
- * flags specifies what to do:
- * DN_DESTROY: flush all queues
- * DN_DELETE_FS: DN_DESTROY + destroy flowset
- * DN_DELETE_FS implies DN_DESTROY
- */
-static void
-fsk_detach_list(struct dn_fsk_head *h, int flags)
-{
- struct dn_fsk *fs;
- int n = 0; /* only for stats */
-
- ND("head %p flags %x", h, flags);
- while ((fs = SLIST_FIRST(h))) {
- SLIST_REMOVE_HEAD(h, sch_chain);
- n++;
- fsk_detach(fs, flags);
- }
- ND("done %d flowsets", n);
-}
-
-/*
- * called on 'queue X delete' -- removes the flowset from fshash,
- * deletes all queues for the flowset, and removes the flowset.
- */
-static int
-delete_fs(int i, int locked)
-{
- struct dn_fsk *fs;
- int err = 0;
-
- if (!locked)
- DN_BH_WLOCK();
- fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL);
- ND("fs %d found %p", i, fs);
- if (fs) {
- fsk_detach(fs, DN_DETACH | DN_DELETE_FS);
- err = 0;
- } else
- err = EINVAL;
- if (!locked)
- DN_BH_WUNLOCK();
- return err;
-}
-
-/*----- end of flowset hashtable support -------------*/
-
-/*------------------------------------------------------------
- * Scheduler hash. When searching by index we pass sched_nr,
- * otherwise we pass struct dn_sch * which is the first field in
- * struct dn_schk so we can cast between the two. We use this trick
- * because in the create phase (but it should be fixed).
- */
-static uint32_t
-schk_hash(uintptr_t key, int flags, void *_arg)
-{
- uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_schk *)key)->sch.sched_nr;
- return ( (i>>8)^(i>>4)^i );
-}
-
-static int
-schk_match(void *obj, uintptr_t key, int flags, void *_arg)
-{
- struct dn_schk *s = (struct dn_schk *)obj;
- int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_schk *)key)->sch.sched_nr;
- return (s->sch.sched_nr == i);
-}
-
-/*
- * Create the entry and intialize with the sched hash if needed.
- * Leave s->fp unset so we can tell whether a dn_ht_find() returns
- * a new object or a previously existing one.
- */
-static void *
-schk_new(uintptr_t key, int flags, void *arg)
-{
- struct schk_new_arg *a = arg;
- struct dn_schk *s;
- int l = sizeof(*s) +a->fp->schk_datalen;
-
- s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (s == NULL)
- return NULL;
- set_oid(&s->link.oid, DN_LINK, sizeof(s->link));
- s->sch = *a->sch; // copy initial values
- s->link.link_nr = s->sch.sched_nr;
- SLIST_INIT(&s->fsk_list);
- /* initialize the hash table or create the single instance */
- s->fp = a->fp; /* si_new needs this */
- s->drain_bucket = 0;
- if (s->sch.flags & DN_HAVE_MASK) {
- s->siht = dn_ht_init(NULL, s->sch.buckets,
- offsetof(struct dn_sch_inst, si_next),
- si_hash, si_match, si_new);
- if (s->siht == NULL) {
- free(s, M_DUMMYNET);
- return NULL;
- }
- }
- s->fp = NULL; /* mark as a new scheduler */
- dn_cfg.schk_count++;
- return s;
-}
-
-/*
- * Callback for sched delete. Notify all attached flowsets to
- * detach from the scheduler, destroy the internal flowset, and
- * all instances. The scheduler goes away too.
- * arg is 0 (only detach flowsets and destroy instances)
- * DN_DESTROY (detach & delete queues, delete schk)
- * or DN_DELETE_FS (delete queues and flowsets, delete schk)
- */
-static int
-schk_delete_cb(void *obj, void *arg)
-{
- struct dn_schk *s = obj;
-#if 0
- int a = (int)arg;
- ND("sched %d arg %s%s",
- s->sch.sched_nr,
- a&DN_DESTROY ? "DEL ":"",
- a&DN_DELETE_FS ? "DEL_FS":"");
-#endif
- fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0);
- /* no more flowset pointing to us now */
- if (s->sch.flags & DN_HAVE_MASK) {
- dn_ht_scan(s->siht, si_destroy, NULL);
- dn_ht_free(s->siht, 0);
- } else if (s->siht)
- si_destroy(s->siht, NULL);
- if (s->profile) {
- free(s->profile, M_DUMMYNET);
- s->profile = NULL;
- }
- s->siht = NULL;
- if (s->fp->destroy)
- s->fp->destroy(s);
- bzero(s, sizeof(*s)); // safety
- free(obj, M_DUMMYNET);
- dn_cfg.schk_count--;
- return DNHT_SCAN_DEL;
-}
-
-/*
- * called on a 'sched X delete' command. Deletes a single scheduler.
- * This is done by removing from the schedhash, unlinking all
- * flowsets and deleting their traffic.
- */
-static int
-delete_schk(int i)
-{
- struct dn_schk *s;
-
- s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
- ND("%d %p", i, s);
- if (!s)
- return EINVAL;
- delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */
- /* then detach flowsets, delete traffic */
- schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY);
- return 0;
-}
-/*--- end of schk hashtable support ---*/
-
-static int
-copy_obj(char **start, char *end, void *_o, const char *msg, int i)
-{
- struct dn_id *o = _o;
- int have = end - *start;
-
- if (have < o->len || o->len == 0 || o->type == 0) {
- D("(WARN) type %d %s %d have %d need %d",
- o->type, msg, i, have, o->len);
- return 1;
- }
- ND("type %d %s %d len %d", o->type, msg, i, o->len);
- bcopy(_o, *start, o->len);
- if (o->type == DN_LINK) {
- /* Adjust burst parameter for link */
- struct dn_link *l = (struct dn_link *)*start;
- l->burst = div64(l->burst, 8 * hz);
- l->delay = l->delay * 1000 / hz;
- } else if (o->type == DN_SCH) {
- /* Set id->id to the number of instances */
- struct dn_schk *s = _o;
- struct dn_id *id = (struct dn_id *)(*start);
- id->id = (s->sch.flags & DN_HAVE_MASK) ?
- dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
- }
- *start += o->len;
- return 0;
-}
-
-/* Specific function to copy a queue.
- * Copies only the user-visible part of a queue (which is in
- * a struct dn_flow), and sets len accordingly.
- */
-static int
-copy_obj_q(char **start, char *end, void *_o, const char *msg, int i)
-{
- struct dn_id *o = _o;
- int have = end - *start;
- int len = sizeof(struct dn_flow); /* see above comment */
-
- if (have < len || o->len == 0 || o->type != DN_QUEUE) {
- D("ERROR type %d %s %d have %d need %d",
- o->type, msg, i, have, len);
- return 1;
- }
- ND("type %d %s %d len %d", o->type, msg, i, len);
- bcopy(_o, *start, len);
- ((struct dn_id*)(*start))->len = len;
- *start += len;
- return 0;
-}
-
-static int
-copy_q_cb(void *obj, void *arg)
-{
- struct dn_queue *q = obj;
- struct copy_args *a = arg;
- struct dn_flow *ni = (struct dn_flow *)(*a->start);
- if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1))
- return DNHT_SCAN_END;
- ni->oid.type = DN_FLOW; /* override the DN_QUEUE */
- ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL);
- return 0;
-}
-
-static int
-copy_q(struct copy_args *a, struct dn_fsk *fs, int flags)
-{
- if (!fs->qht)
- return 0;
- if (fs->fs.flags & DN_QHT_HASH)
- dn_ht_scan(fs->qht, copy_q_cb, a);
- else
- copy_q_cb(fs->qht, a);
- return 0;
-}
-
-/*
- * This routine only copies the initial part of a profile ? XXX
- */
-static int
-copy_profile(struct copy_args *a, struct dn_profile *p)
-{
- int have = a->end - *a->start;
- /* XXX here we check for max length */
- int profile_len = sizeof(struct dn_profile) -
- ED_MAX_SAMPLES_NO*sizeof(int);
-
- if (p == NULL)
- return 0;
- if (have < profile_len) {
- D("error have %d need %d", have, profile_len);
- return 1;
- }
- bcopy(p, *a->start, profile_len);
- ((struct dn_id *)(*a->start))->len = profile_len;
- *a->start += profile_len;
- return 0;
-}
-
-static int
-copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags)
-{
- struct dn_fs *ufs = (struct dn_fs *)(*a->start);
- if (!fs)
- return 0;
- ND("flowset %d", fs->fs.fs_nr);
- if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr))
- return DNHT_SCAN_END;
- ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ?
- dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0);
- if (flags) { /* copy queues */
- copy_q(a, fs, 0);
- }
- return 0;
-}
-
-static int
-copy_si_cb(void *obj, void *arg)
-{
- struct dn_sch_inst *si = obj;
- struct copy_args *a = arg;
- struct dn_flow *ni = (struct dn_flow *)(*a->start);
- if (copy_obj(a->start, a->end, &si->ni, "inst",
- si->sched->sch.sched_nr))
- return DNHT_SCAN_END;
- ni->oid.type = DN_FLOW; /* override the DN_SCH_I */
- ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL);
- return 0;
-}
-
-static int
-copy_si(struct copy_args *a, struct dn_schk *s, int flags)
-{
- if (s->sch.flags & DN_HAVE_MASK)
- dn_ht_scan(s->siht, copy_si_cb, a);
- else if (s->siht)
- copy_si_cb(s->siht, a);
- return 0;
-}
-
-/*
- * compute a list of children of a scheduler and copy up
- */
-static int
-copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags)
-{
- struct dn_fsk *fs;
- struct dn_id *o;
- uint32_t *p;
-
- int n = 0, space = sizeof(*o);
- SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
- if (fs->fs.fs_nr < DN_MAX_ID)
- n++;
- }
- space += n * sizeof(uint32_t);
- DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n);
- if (a->end - *(a->start) < space)
- return DNHT_SCAN_END;
- o = (struct dn_id *)(*(a->start));
- o->len = space;
- *a->start += o->len;
- o->type = DN_TEXT;
- p = (uint32_t *)(o+1);
- SLIST_FOREACH(fs, &s->fsk_list, sch_chain)
- if (fs->fs.fs_nr < DN_MAX_ID)
- *p++ = fs->fs.fs_nr;
- return 0;
-}
-
-static int
-copy_data_helper(void *_o, void *_arg)
-{
- struct copy_args *a = _arg;
- uint32_t *r = a->extra->r; /* start of first range */
- uint32_t *lim; /* first invalid pointer */
- int n;
-
- lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len);
-
- if (a->type == DN_LINK || a->type == DN_SCH) {
- /* pipe|sched show, we receive a dn_schk */
- struct dn_schk *s = _o;
-
- n = s->sch.sched_nr;
- if (a->type == DN_SCH && n >= DN_MAX_ID)
- return 0; /* not a scheduler */
- if (a->type == DN_LINK && n <= DN_MAX_ID)
- return 0; /* not a pipe */
-
- /* see if the object is within one of our ranges */
- for (;r < lim; r += 2) {
- if (n < r[0] || n > r[1])
- continue;
- /* Found a valid entry, copy and we are done */
- if (a->flags & DN_C_LINK) {
- if (copy_obj(a->start, a->end,
- &s->link, "link", n))
- return DNHT_SCAN_END;
- if (copy_profile(a, s->profile))
- return DNHT_SCAN_END;
- if (copy_flowset(a, s->fs, 0))
- return DNHT_SCAN_END;
- }
- if (a->flags & DN_C_SCH) {
- if (copy_obj(a->start, a->end,
- &s->sch, "sched", n))
- return DNHT_SCAN_END;
- /* list all attached flowsets */
- if (copy_fsk_list(a, s, 0))
- return DNHT_SCAN_END;
- }
- if (a->flags & DN_C_FLOW)
- copy_si(a, s, 0);
- break;
- }
- } else if (a->type == DN_FS) {
- /* queue show, skip internal flowsets */
- struct dn_fsk *fs = _o;
-
- n = fs->fs.fs_nr;
- if (n >= DN_MAX_ID)
- return 0;
- /* see if the object is within one of our ranges */
- for (;r < lim; r += 2) {
- if (n < r[0] || n > r[1])
- continue;
- if (copy_flowset(a, fs, 0))
- return DNHT_SCAN_END;
- copy_q(a, fs, 0);
- break; /* we are done */
- }
- }
- return 0;
-}
-
-static inline struct dn_schk *
-locate_scheduler(int i)
-{
- return dn_ht_find(dn_cfg.schedhash, i, 0, NULL);
-}
-
-/*
- * red parameters are in fixed point arithmetic.
- */
-static int
-config_red(struct dn_fsk *fs)
-{
- int64_t s, idle, weight, w0;
- int t, i;
-
- fs->w_q = fs->fs.w_q;
- fs->max_p = fs->fs.max_p;
- ND("called");
- /* Doing stuff that was in userland */
- i = fs->sched->link.bandwidth;
- s = (i <= 0) ? 0 :
- hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i;
-
- idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */
- fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth);
- /* fs->lookup_step not scaled, */
- if (!fs->lookup_step)
- fs->lookup_step = 1;
- w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled
-
- for (t = fs->lookup_step; t > 1; --t)
- weight = SCALE_MUL(weight, w0);
- fs->lookup_weight = (int)(weight); // scaled
-
- /* Now doing stuff that was in kerneland */
- fs->min_th = SCALE(fs->fs.min_th);
- fs->max_th = SCALE(fs->fs.max_th);
-
- fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
- fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
-
- if (fs->fs.flags & DN_IS_GENTLE_RED) {
- fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th;
- fs->c_4 = SCALE(1) - 2 * fs->max_p;
- }
-
- /* If the lookup table already exist, free and create it again. */
- if (fs->w_q_lookup) {
- free(fs->w_q_lookup, M_DUMMYNET);
- fs->w_q_lookup = NULL;
- }
- if (dn_cfg.red_lookup_depth == 0) {
- printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
- "must be > 0\n");
- fs->fs.flags &= ~DN_IS_RED;
- fs->fs.flags &= ~DN_IS_GENTLE_RED;
- return (EINVAL);
- }
- fs->lookup_depth = dn_cfg.red_lookup_depth;
- fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int),
- M_DUMMYNET, M_NOWAIT);
- if (fs->w_q_lookup == NULL) {
- printf("dummynet: sorry, cannot allocate red lookup table\n");
- fs->fs.flags &= ~DN_IS_RED;
- fs->fs.flags &= ~DN_IS_GENTLE_RED;
- return(ENOSPC);
- }
-
- /* Fill the lookup table with (1 - w_q)^x */
- fs->w_q_lookup[0] = SCALE(1) - fs->w_q;
-
- for (i = 1; i < fs->lookup_depth; i++)
- fs->w_q_lookup[i] =
- SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight);
-
- if (dn_cfg.red_avg_pkt_size < 1)
- dn_cfg.red_avg_pkt_size = 512;
- fs->avg_pkt_size = dn_cfg.red_avg_pkt_size;
- if (dn_cfg.red_max_pkt_size < 1)
- dn_cfg.red_max_pkt_size = 1500;
- fs->max_pkt_size = dn_cfg.red_max_pkt_size;
- ND("exit");
- return 0;
-}
-
-/* Scan all flowset attached to this scheduler and update red */
-static void
-update_red(struct dn_schk *s)
-{
- struct dn_fsk *fs;
- SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
- if (fs && (fs->fs.flags & DN_IS_RED))
- config_red(fs);
- }
-}
-
-/* attach flowset to scheduler s, possibly requeue */
-static void
-fsk_attach(struct dn_fsk *fs, struct dn_schk *s)
-{
- ND("remove fs %d from fsunlinked, link to sched %d",
- fs->fs.fs_nr, s->sch.sched_nr);
- SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain);
- fs->sched = s;
- SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
- if (s->fp->new_fsk)
- s->fp->new_fsk(fs);
- /* XXX compute fsk_mask */
- fs->fsk_mask = fs->fs.flow_mask;
- if (fs->sched->sch.flags & DN_HAVE_MASK)
- flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask);
- if (fs->qht) {
- /*
- * we must drain qht according to the old
- * type, and reinsert according to the new one.
- * The requeue is complex -- in general we need to
- * reclassify every single packet.
- * For the time being, let's hope qht is never set
- * when we reach this point.
- */
- D("XXX TODO requeue from fs %d to sch %d",
- fs->fs.fs_nr, s->sch.sched_nr);
- fs->qht = NULL;
- }
- /* set the new type for qht */
- if (nonzero_mask(&fs->fsk_mask))
- fs->fs.flags |= DN_QHT_HASH;
- else
- fs->fs.flags &= ~DN_QHT_HASH;
-
- /* XXX config_red() can fail... */
- if (fs->fs.flags & DN_IS_RED)
- config_red(fs);
-}
-
-/* update all flowsets which may refer to this scheduler */
-static void
-update_fs(struct dn_schk *s)
-{
- struct dn_fsk *fs, *tmp;
-
- SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
- if (s->sch.sched_nr != fs->fs.sched_nr) {
- D("fs %d for sch %d not %d still unlinked",
- fs->fs.fs_nr, fs->fs.sched_nr,
- s->sch.sched_nr);
- continue;
- }
- fsk_attach(fs, s);
- }
-}
-
-/*
- * Configuration -- to preserve backward compatibility we use
- * the following scheme (N is 65536)
- * NUMBER SCHED LINK FLOWSET
- * 1 .. N-1 (1)WFQ (2)WFQ (3)queue
- * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1
- * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1
- *
- * "pipe i config" configures #1, #2 and #3
- * "sched i config" configures #1 and possibly #6
- * "queue i config" configures #3
- * #1 is configured with 'pipe i config' or 'sched i config'
- * #2 is configured with 'pipe i config', and created if not
- * existing with 'sched i config'
- * #3 is configured with 'queue i config'
- * #4 is automatically configured after #1, can only be FIFO
- * #5 is automatically configured after #2
- * #6 is automatically created when #1 is !MULTIQUEUE,
- * and can be updated.
- * #7 is automatically configured after #2
- */
-
-/*
- * configure a link (and its FIFO instance)
- */
-static int
-config_link(struct dn_link *p, struct dn_id *arg)
-{
- int i;
-
- if (p->oid.len != sizeof(*p)) {
- D("invalid pipe len %d", p->oid.len);
- return EINVAL;
- }
- i = p->link_nr;
- if (i <= 0 || i >= DN_MAX_ID)
- return EINVAL;
- /*
- * The config program passes parameters as follows:
- * bw = bits/second (0 means no limits),
- * delay = ms, must be translated into ticks.
- * qsize = slots/bytes
- * burst ???
- */
- p->delay = (p->delay * hz) / 1000;
- /* Scale burst size: bytes -> bits * hz */
- p->burst *= 8 * hz;
-
- DN_BH_WLOCK();
- /* do it twice, base link and FIFO link */
- for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
- struct dn_schk *s = locate_scheduler(i);
- if (s == NULL) {
- DN_BH_WUNLOCK();
- D("sched %d not found", i);
- return EINVAL;
- }
- /* remove profile if exists */
- if (s->profile) {
- free(s->profile, M_DUMMYNET);
- s->profile = NULL;
- }
- /* copy all parameters */
- s->link.oid = p->oid;
- s->link.link_nr = i;
- s->link.delay = p->delay;
- if (s->link.bandwidth != p->bandwidth) {
- /* XXX bandwidth changes, need to update red params */
- s->link.bandwidth = p->bandwidth;
- update_red(s);
- }
- s->link.burst = p->burst;
- schk_reset_credit(s);
- }
- dn_cfg.id++;
- DN_BH_WUNLOCK();
- return 0;
-}
-
-/*
- * configure a flowset. Can be called from inside with locked=1,
- */
-static struct dn_fsk *
-config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked)
-{
- int i;
- struct dn_fsk *fs;
-
- if (nfs->oid.len != sizeof(*nfs)) {
- D("invalid flowset len %d", nfs->oid.len);
- return NULL;
- }
- i = nfs->fs_nr;
- if (i <= 0 || i >= 3*DN_MAX_ID)
- return NULL;
- ND("flowset %d", i);
- /* XXX other sanity checks */
- if (nfs->flags & DN_QSIZE_BYTES) {
- ipdn_bound_var(&nfs->qsize, 16384,
- 1500, dn_cfg.byte_limit, NULL); // "queue byte size");
- } else {
- ipdn_bound_var(&nfs->qsize, 50,
- 1, dn_cfg.slot_limit, NULL); // "queue slot size");
- }
- if (nfs->flags & DN_HAVE_MASK) {
- /* make sure we have some buckets */
- ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size,
- 1, dn_cfg.max_hash_size, "flowset buckets");
- } else {
- nfs->buckets = 1; /* we only need 1 */
- }
- if (!locked)
- DN_BH_WLOCK();
- do { /* exit with break when done */
- struct dn_schk *s;
- int flags = nfs->sched_nr ? DNHT_INSERT : 0;
- int j;
- int oldc = dn_cfg.fsk_count;
- fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL);
- if (fs == NULL) {
- D("missing sched for flowset %d", i);
- break;
- }
- /* grab some defaults from the existing one */
- if (nfs->sched_nr == 0) /* reuse */
- nfs->sched_nr = fs->fs.sched_nr;
- for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) {
- if (nfs->par[j] == -1) /* reuse */
- nfs->par[j] = fs->fs.par[j];
- }
- if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
- ND("flowset %d unchanged", i);
- break; /* no change, nothing to do */
- }
- if (oldc != dn_cfg.fsk_count) /* new item */
- dn_cfg.id++;
- s = locate_scheduler(nfs->sched_nr);
- /* detach from old scheduler if needed, preserving
- * queues if we need to reattach. Then update the
- * configuration, and possibly attach to the new sched.
- */
- DX(2, "fs %d changed sched %d@%p to %d@%p",
- fs->fs.fs_nr,
- fs->fs.sched_nr, fs->sched, nfs->sched_nr, s);
- if (fs->sched) {
- int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY);
- flags |= DN_DESTROY; /* XXX temporary */
- fsk_detach(fs, flags);
- }
- fs->fs = *nfs; /* copy configuration */
- if (s != NULL)
- fsk_attach(fs, s);
- } while (0);
- if (!locked)
- DN_BH_WUNLOCK();
- return fs;
-}
-
-/*
- * config/reconfig a scheduler and its FIFO variant.
- * For !MULTIQUEUE schedulers, also set up the flowset.
- *
- * On reconfigurations (detected because s->fp is set),
- * detach existing flowsets preserving traffic, preserve link,
- * and delete the old scheduler creating a new one.
- */
-static int
-config_sched(struct dn_sch *_nsch, struct dn_id *arg)
-{
- struct dn_schk *s;
- struct schk_new_arg a; /* argument for schk_new */
- int i;
- struct dn_link p; /* copy of oldlink */
- struct dn_profile *pf = NULL; /* copy of old link profile */
- /* Used to preserv mask parameter */
- struct ipfw_flow_id new_mask;
- int new_buckets = 0;
- int new_flags = 0;
- int pipe_cmd;
- int err = ENOMEM;
-
- a.sch = _nsch;
- if (a.sch->oid.len != sizeof(*a.sch)) {
- D("bad sched len %d", a.sch->oid.len);
- return EINVAL;
- }
- i = a.sch->sched_nr;
- if (i <= 0 || i >= DN_MAX_ID)
- return EINVAL;
- /* make sure we have some buckets */
- if (a.sch->flags & DN_HAVE_MASK)
- ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size,
- 1, dn_cfg.max_hash_size, "sched buckets");
- /* XXX other sanity checks */
- bzero(&p, sizeof(p));
-
- pipe_cmd = a.sch->flags & DN_PIPE_CMD;
- a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set?
- if (pipe_cmd) {
- /* Copy mask parameter */
- new_mask = a.sch->sched_mask;
- new_buckets = a.sch->buckets;
- new_flags = a.sch->flags;
- }
- DN_BH_WLOCK();
-again: /* run twice, for wfq and fifo */
- /*
- * lookup the type. If not supplied, use the previous one
- * or default to WF2Q+. Otherwise, return an error.
- */
- dn_cfg.id++;
- a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name);
- if (a.fp != NULL) {
- /* found. Lookup or create entry */
- s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a);
- } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) {
- /* No type. search existing s* or retry with WF2Q+ */
- s = dn_ht_find(dn_cfg.schedhash, i, 0, &a);
- if (s != NULL) {
- a.fp = s->fp;
- /* Scheduler exists, skip to FIFO scheduler
- * if command was pipe config...
- */
- if (pipe_cmd)
- goto next;
- } else {
- /* New scheduler, create a wf2q+ with no mask
- * if command was pipe config...
- */
- if (pipe_cmd) {
- /* clear mask parameter */
- bzero(&a.sch->sched_mask, sizeof(new_mask));
- a.sch->buckets = 0;
- a.sch->flags &= ~DN_HAVE_MASK;
- }
- a.sch->oid.subtype = DN_SCHED_WF2QP;
- goto again;
- }
- } else {
- D("invalid scheduler type %d %s",
- a.sch->oid.subtype, a.sch->name);
- err = EINVAL;
- goto error;
- }
- /* normalize name and subtype */
- a.sch->oid.subtype = a.fp->type;
- bzero(a.sch->name, sizeof(a.sch->name));
- strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name));
- if (s == NULL) {
- D("cannot allocate scheduler %d", i);
- goto error;
- }
- /* restore existing link if any */
- if (p.link_nr) {
- s->link = p;
- if (!pf || pf->link_nr != p.link_nr) { /* no saved value */
- s->profile = NULL; /* XXX maybe not needed */
- } else {
- s->profile = malloc(sizeof(struct dn_profile),
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (s->profile == NULL) {
- D("cannot allocate profile");
- goto error; //XXX
- }
- bcopy(pf, s->profile, sizeof(*pf));
- }
- }
- p.link_nr = 0;
- if (s->fp == NULL) {
- DX(2, "sched %d new type %s", i, a.fp->name);
- } else if (s->fp != a.fp ||
- bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) {
- /* already existing. */
- DX(2, "sched %d type changed from %s to %s",
- i, s->fp->name, a.fp->name);
- DX(4, " type/sub %d/%d -> %d/%d",
- s->sch.oid.type, s->sch.oid.subtype,
- a.sch->oid.type, a.sch->oid.subtype);
- if (s->link.link_nr == 0)
- D("XXX WARNING link 0 for sched %d", i);
- p = s->link; /* preserve link */
- if (s->profile) {/* preserve profile */
- if (!pf)
- pf = malloc(sizeof(*pf),
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (pf) /* XXX should issue a warning otherwise */
- bcopy(s->profile, pf, sizeof(*pf));
- }
- /* remove from the hash */
- dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
- /* Detach flowsets, preserve queues. */
- // schk_delete_cb(s, NULL);
- // XXX temporarily, kill queues
- schk_delete_cb(s, (void *)DN_DESTROY);
- goto again;
- } else {
- DX(4, "sched %d unchanged type %s", i, a.fp->name);
- }
- /* complete initialization */
- s->sch = *a.sch;
- s->fp = a.fp;
- s->cfg = arg;
- // XXX schk_reset_credit(s);
- /* create the internal flowset if needed,
- * trying to reuse existing ones if available
- */
- if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) {
- s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL);
- if (!s->fs) {
- struct dn_fs fs;
- bzero(&fs, sizeof(fs));
- set_oid(&fs.oid, DN_FS, sizeof(fs));
- fs.fs_nr = i + DN_MAX_ID;
- fs.sched_nr = i;
- s->fs = config_fs(&fs, NULL, 1 /* locked */);
- }
- if (!s->fs) {
- schk_delete_cb(s, (void *)DN_DESTROY);
- D("error creating internal fs for %d", i);
- goto error;
- }
- }
- /* call init function after the flowset is created */
- if (s->fp->config)
- s->fp->config(s);
- update_fs(s);
-next:
- if (i < DN_MAX_ID) { /* now configure the FIFO instance */
- i += DN_MAX_ID;
- if (pipe_cmd) {
- /* Restore mask parameter for FIFO */
- a.sch->sched_mask = new_mask;
- a.sch->buckets = new_buckets;
- a.sch->flags = new_flags;
- } else {
- /* sched config shouldn't modify the FIFO scheduler */
- if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) {
- /* FIFO already exist, don't touch it */
- err = 0; /* and this is not an error */
- goto error;
- }
- }
- a.sch->sched_nr = i;
- a.sch->oid.subtype = DN_SCHED_FIFO;
- bzero(a.sch->name, sizeof(a.sch->name));
- goto again;
- }
- err = 0;
-error:
- DN_BH_WUNLOCK();
- if (pf)
- free(pf, M_DUMMYNET);
- return err;
-}
-
-/*
- * attach a profile to a link
- */
-static int
-config_profile(struct dn_profile *pf, struct dn_id *arg)
-{
- struct dn_schk *s;
- int i, olen, err = 0;
-
- if (pf->oid.len < sizeof(*pf)) {
- D("short profile len %d", pf->oid.len);
- return EINVAL;
- }
- i = pf->link_nr;
- if (i <= 0 || i >= DN_MAX_ID)
- return EINVAL;
- /* XXX other sanity checks */
- DN_BH_WLOCK();
- for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
- s = locate_scheduler(i);
-
- if (s == NULL) {
- err = EINVAL;
- break;
- }
- dn_cfg.id++;
- /*
- * If we had a profile and the new one does not fit,
- * or it is deleted, then we need to free memory.
- */
- if (s->profile && (pf->samples_no == 0 ||
- s->profile->oid.len < pf->oid.len)) {
- free(s->profile, M_DUMMYNET);
- s->profile = NULL;
- }
- if (pf->samples_no == 0)
- continue;
- /*
- * new profile, possibly allocate memory
- * and copy data.
- */
- if (s->profile == NULL)
- s->profile = malloc(pf->oid.len,
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (s->profile == NULL) {
- D("no memory for profile %d", i);
- err = ENOMEM;
- break;
- }
- /* preserve larger length XXX double check */
- olen = s->profile->oid.len;
- if (olen < pf->oid.len)
- olen = pf->oid.len;
- bcopy(pf, s->profile, pf->oid.len);
- s->profile->oid.len = olen;
- }
- DN_BH_WUNLOCK();
- return err;
-}
-
-/*
- * Delete all objects:
- */
-static void
-dummynet_flush(void)
-{
-
- /* delete all schedulers and related links/queues/flowsets */
- dn_ht_scan(dn_cfg.schedhash, schk_delete_cb,
- (void *)(uintptr_t)DN_DELETE_FS);
- /* delete all remaining (unlinked) flowsets */
- DX(4, "still %d unlinked fs", dn_cfg.fsk_count);
- dn_ht_free(dn_cfg.fshash, DNHT_REMOVE);
- fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS);
- /* Reinitialize system heap... */
- heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
-}
-
-/*
- * Main handler for configuration. We are guaranteed to be called
- * with an oid which is at least a dn_id.
- * - the first object is the command (config, delete, flush, ...)
- * - config_link must be issued after the corresponding config_sched
- * - parameters (DN_TXT) for an object must preceed the object
- * processed on a config_sched.
- */
-int
-do_config(void *p, int l)
-{
- struct dn_id *next, *o;
- int err = 0, err2 = 0;
- struct dn_id *arg = NULL;
- uintptr_t *a;
-
- o = p;
- if (o->id != DN_API_VERSION) {
- D("invalid api version got %d need %d",
- o->id, DN_API_VERSION);
- return EINVAL;
- }
- for (; l >= sizeof(*o); o = next) {
- struct dn_id *prev = arg;
- if (o->len < sizeof(*o) || l < o->len) {
- D("bad len o->len %d len %d", o->len, l);
- err = EINVAL;
- break;
- }
- l -= o->len;
- next = (struct dn_id *)((char *)o + o->len);
- err = 0;
- switch (o->type) {
- default:
- D("cmd %d not implemented", o->type);
- break;
-
-#ifdef EMULATE_SYSCTL
- /* sysctl emulation.
- * if we recognize the command, jump to the correct
- * handler and return
- */
- case DN_SYSCTL_SET:
- err = kesysctl_emu_set(p, l);
- return err;
-#endif
-
- case DN_CMD_CONFIG: /* simply a header */
- break;
-
- case DN_CMD_DELETE:
- /* the argument is in the first uintptr_t after o */
- a = (uintptr_t *)(o+1);
- if (o->len < sizeof(*o) + sizeof(*a)) {
- err = EINVAL;
- break;
- }
- switch (o->subtype) {
- case DN_LINK:
- /* delete base and derived schedulers */
- DN_BH_WLOCK();
- err = delete_schk(*a);
- err2 = delete_schk(*a + DN_MAX_ID);
- DN_BH_WUNLOCK();
- if (!err)
- err = err2;
- break;
-
- default:
- D("invalid delete type %d",
- o->subtype);
- err = EINVAL;
- break;
-
- case DN_FS:
- err = (*a <1 || *a >= DN_MAX_ID) ?
- EINVAL : delete_fs(*a, 0) ;
- break;
- }
- break;
-
- case DN_CMD_FLUSH:
- DN_BH_WLOCK();
- dummynet_flush();
- DN_BH_WUNLOCK();
- break;
- case DN_TEXT: /* store argument the next block */
- prev = NULL;
- arg = o;
- break;
- case DN_LINK:
- err = config_link((struct dn_link *)o, arg);
- break;
- case DN_PROFILE:
- err = config_profile((struct dn_profile *)o, arg);
- break;
- case DN_SCH:
- err = config_sched((struct dn_sch *)o, arg);
- break;
- case DN_FS:
- err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
- break;
- }
- if (prev)
- arg = NULL;
- if (err != 0)
- break;
- }
- return err;
-}
-
-static int
-compute_space(struct dn_id *cmd, struct copy_args *a)
-{
- int x = 0, need = 0;
- int profile_size = sizeof(struct dn_profile) -
- ED_MAX_SAMPLES_NO*sizeof(int);
-
- /* NOTE about compute space:
- * NP = dn_cfg.schk_count
- * NSI = dn_cfg.si_count
- * NF = dn_cfg.fsk_count
- * NQ = dn_cfg.queue_count
- * - ipfw pipe show
- * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
- * link, scheduler template, flowset
- * integrated in scheduler and header
- * for flowset list
- * (NSI)*(dn_flow) all scheduler instance (includes
- * the queue instance)
- * - ipfw sched show
- * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
- * link, scheduler template, flowset
- * integrated in scheduler and header
- * for flowset list
- * (NSI * dn_flow) all scheduler instances
- * (NF * sizeof(uint_32)) space for flowset list linked to scheduler
- * (NQ * dn_queue) all queue [XXXfor now not listed]
- * - ipfw queue show
- * (NF * dn_fs) all flowset
- * (NQ * dn_queue) all queues
- */
- switch (cmd->subtype) {
- default:
- return -1;
- /* XXX where do LINK and SCH differ ? */
- /* 'ipfw sched show' could list all queues associated to
- * a scheduler. This feature for now is disabled
- */
- case DN_LINK: /* pipe show */
- x = DN_C_LINK | DN_C_SCH | DN_C_FLOW;
- need += dn_cfg.schk_count *
- (sizeof(struct dn_fs) + profile_size) / 2;
- need += dn_cfg.fsk_count * sizeof(uint32_t);
- break;
- case DN_SCH: /* sched show */
- need += dn_cfg.schk_count *
- (sizeof(struct dn_fs) + profile_size) / 2;
- need += dn_cfg.fsk_count * sizeof(uint32_t);
- x = DN_C_SCH | DN_C_LINK | DN_C_FLOW;
- break;
- case DN_FS: /* queue show */
- x = DN_C_FS | DN_C_QUEUE;
- break;
- case DN_GET_COMPAT: /* compatibility mode */
- need = dn_compat_calc_size();
- break;
- }
- a->flags = x;
- if (x & DN_C_SCH) {
- need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2;
- /* NOT also, each fs might be attached to a sched */
- need += dn_cfg.schk_count * sizeof(struct dn_id) / 2;
- }
- if (x & DN_C_FS)
- need += dn_cfg.fsk_count * sizeof(struct dn_fs);
- if (x & DN_C_LINK) {
- need += dn_cfg.schk_count * sizeof(struct dn_link) / 2;
- }
- /*
- * When exporting a queue to userland, only pass up the
- * struct dn_flow, which is the only visible part.
- */
-
- if (x & DN_C_QUEUE)
- need += dn_cfg.queue_count * sizeof(struct dn_flow);
- if (x & DN_C_FLOW)
- need += dn_cfg.si_count * (sizeof(struct dn_flow));
- return need;
-}
-
-/*
- * If compat != NULL dummynet_get is called in compatibility mode.
- * *compat will be the pointer to the buffer to pass to ipfw
- */
-int
-dummynet_get(struct sockopt *sopt, void **compat)
-{
- int have, i, need, error;
- char *start = NULL, *buf;
- size_t sopt_valsize;
- struct dn_id *cmd;
- struct copy_args a;
- struct copy_range r;
- int l = sizeof(struct dn_id);
-
- bzero(&a, sizeof(a));
- bzero(&r, sizeof(r));
-
- /* save and restore original sopt_valsize around copyin */
- sopt_valsize = sopt->sopt_valsize;
-
- cmd = &r.o;
-
- if (!compat) {
- /* copy at least an oid, and possibly a full object */
- error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd));
- sopt->sopt_valsize = sopt_valsize;
- if (error)
- goto done;
- l = cmd->len;
-#ifdef EMULATE_SYSCTL
- /* sysctl emulation. */
- if (cmd->type == DN_SYSCTL_GET)
- return kesysctl_emu_get(sopt);
-#endif
- if (l > sizeof(r)) {
- /* request larger than default, allocate buffer */
- cmd = malloc(l, M_DUMMYNET, M_WAITOK);
- error = sooptcopyin(sopt, cmd, l, l);
- sopt->sopt_valsize = sopt_valsize;
- if (error)
- goto done;
- }
- } else { /* compatibility */
- error = 0;
- cmd->type = DN_CMD_GET;
- cmd->len = sizeof(struct dn_id);
- cmd->subtype = DN_GET_COMPAT;
- // cmd->id = sopt_valsize;
- D("compatibility mode");
- }
- a.extra = (struct copy_range *)cmd;
- if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
- uint32_t *rp = (uint32_t *)(cmd + 1);
- cmd->len += 2* sizeof(uint32_t);
- rp[0] = 1;
- rp[1] = DN_MAX_ID - 1;
- if (cmd->subtype == DN_LINK) {
- rp[0] += DN_MAX_ID;
- rp[1] += DN_MAX_ID;
- }
- }
- /* Count space (under lock) and allocate (outside lock).
- * Exit with lock held if we manage to get enough buffer.
- * Try a few times then give up.
- */
- for (have = 0, i = 0; i < 10; i++) {
- DN_BH_WLOCK();
- need = compute_space(cmd, &a);
-
- /* if there is a range, ignore value from compute_space() */
- if (l > sizeof(*cmd))
- need = sopt_valsize - sizeof(*cmd);
-
- if (need < 0) {
- DN_BH_WUNLOCK();
- error = EINVAL;
- goto done;
- }
- need += sizeof(*cmd);
- cmd->id = need;
- if (have >= need)
- break;
-
- DN_BH_WUNLOCK();
- if (start)
- free(start, M_DUMMYNET);
- start = NULL;
- if (need > sopt_valsize)
- break;
-
- have = need;
- start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO);
- }
-
- if (start == NULL) {
- if (compat) {
- *compat = NULL;
- error = 1; // XXX
- } else {
- error = sooptcopyout(sopt, cmd, sizeof(*cmd));
- }
- goto done;
- }
- ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, "
- "%d:%d si %d, %d:%d queues %d",
- dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH,
- dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK,
- dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS,
- dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I,
- dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE);
- sopt->sopt_valsize = sopt_valsize;
- a.type = cmd->subtype;
-
- if (compat == NULL) {
- bcopy(cmd, start, sizeof(*cmd));
- ((struct dn_id*)(start))->len = sizeof(struct dn_id);
- buf = start + sizeof(*cmd);
- } else
- buf = start;
- a.start = &buf;
- a.end = start + have;
- /* start copying other objects */
- if (compat) {
- a.type = DN_COMPAT_PIPE;
- dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a);
- a.type = DN_COMPAT_QUEUE;
- dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a);
- } else if (a.type == DN_FS) {
- dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
- } else {
- dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
- }
- DN_BH_WUNLOCK();
-
- if (compat) {
- *compat = start;
- sopt->sopt_valsize = buf - start;
- /* free() is done by ip_dummynet_compat() */
- start = NULL; //XXX hack
- } else {
- error = sooptcopyout(sopt, start, buf - start);
- }
-done:
- if (cmd && cmd != &r.o)
- free(cmd, M_DUMMYNET);
- if (start)
- free(start, M_DUMMYNET);
- return error;
-}
-
-/* Callback called on scheduler instance to delete it if idle */
-static int
-drain_scheduler_cb(void *_si, void *arg)
-{
- struct dn_sch_inst *si = _si;
-
- if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
- return 0;
-
- if (si->sched->fp->flags & DN_MULTIQUEUE) {
- if (si->q_count == 0)
- return si_destroy(si, NULL);
- else
- return 0;
- } else { /* !DN_MULTIQUEUE */
- if ((si+1)->ni.length == 0)
- return si_destroy(si, NULL);
- else
- return 0;
- }
- return 0; /* unreachable */
-}
-
-/* Callback called on scheduler to check if it has instances */
-static int
-drain_scheduler_sch_cb(void *_s, void *arg)
-{
- struct dn_schk *s = _s;
-
- if (s->sch.flags & DN_HAVE_MASK) {
- dn_ht_scan_bucket(s->siht, &s->drain_bucket,
- drain_scheduler_cb, NULL);
- s->drain_bucket++;
- } else {
- if (s->siht) {
- if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
- s->siht = NULL;
- }
- }
- return 0;
-}
-
-/* Called every tick, try to delete a 'bucket' of scheduler */
-void
-dn_drain_scheduler(void)
-{
- dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
- drain_scheduler_sch_cb, NULL);
- dn_cfg.drain_sch++;
-}
-
-/* Callback called on queue to delete if it is idle */
-static int
-drain_queue_cb(void *_q, void *arg)
-{
- struct dn_queue *q = _q;
-
- if (q->ni.length == 0) {
- dn_delete_queue(q, DN_DESTROY);
- return DNHT_SCAN_DEL; /* queue is deleted */
- }
-
- return 0; /* queue isn't deleted */
-}
-
-/* Callback called on flowset used to check if it has queues */
-static int
-drain_queue_fs_cb(void *_fs, void *arg)
-{
- struct dn_fsk *fs = _fs;
-
- if (fs->fs.flags & DN_QHT_HASH) {
- /* Flowset has a hash table for queues */
- dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
- drain_queue_cb, NULL);
- fs->drain_bucket++;
- } else {
- /* No hash table for this flowset, null the pointer
- * if the queue is deleted
- */
- if (fs->qht) {
- if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
- fs->qht = NULL;
- }
- }
- return 0;
-}
-
-/* Called every tick, try to delete a 'bucket' of queue */
-void
-dn_drain_queue(void)
-{
- /* scan a bucket of flowset */
- dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
- drain_queue_fs_cb, NULL);
- dn_cfg.drain_fs++;
-}
-
-/*
- * Handler for the various dummynet socket options
- */
-static int
-ip_dn_ctl(struct sockopt *sopt)
-{
- void *p = NULL;
- int error, l;
-
- error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
- if (error)
- return (error);
-
- /* Disallow sets in really-really secure mode. */
- if (sopt->sopt_dir == SOPT_SET) {
- error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
- if (error)
- return (error);
- }
-
- switch (sopt->sopt_name) {
- default :
- D("dummynet: unknown option %d", sopt->sopt_name);
- error = EINVAL;
- break;
-
- case IP_DUMMYNET_FLUSH:
- case IP_DUMMYNET_CONFIGURE:
- case IP_DUMMYNET_DEL: /* remove a pipe or queue */
- case IP_DUMMYNET_GET:
- D("dummynet: compat option %d", sopt->sopt_name);
- error = ip_dummynet_compat(sopt);
- break;
-
- case IP_DUMMYNET3 :
- if (sopt->sopt_dir == SOPT_GET) {
- error = dummynet_get(sopt, NULL);
- break;
- }
- l = sopt->sopt_valsize;
- if (l < sizeof(struct dn_id) || l > 12000) {
- D("argument len %d invalid", l);
- break;
- }
- p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ?
- error = sooptcopyin(sopt, p, l, l);
- if (error)
- break ;
- error = do_config(p, l);
- break;
- }
-
- if (p != NULL)
- free(p, M_TEMP);
-
- return error ;
-}
-
-
-static void
-ip_dn_init(void)
-{
- if (dn_cfg.init_done)
- return;
- printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet);
- dn_cfg.init_done = 1;
- /* Set defaults here. MSVC does not accept initializers,
- * and this is also useful for vimages
- */
- /* queue limits */
- dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */
- dn_cfg.byte_limit = 1024 * 1024;
- dn_cfg.expire = 1;
-
- /* RED parameters */
- dn_cfg.red_lookup_depth = 256; /* default lookup table depth */
- dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */
- dn_cfg.red_max_pkt_size = 1500; /* default max packet size */
-
- /* hash tables */
- dn_cfg.max_hash_size = 65536; /* max in the hash tables */
- dn_cfg.hash_size = 64; /* default hash size */
-
- /* create hash tables for schedulers and flowsets.
- * In both we search by key and by pointer.
- */
- dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
- offsetof(struct dn_schk, schk_next),
- schk_hash, schk_match, schk_new);
- dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
- offsetof(struct dn_fsk, fsk_next),
- fsk_hash, fsk_match, fsk_new);
-
- /* bucket index to drain object */
- dn_cfg.drain_fs = 0;
- dn_cfg.drain_sch = 0;
-
- heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
- SLIST_INIT(&dn_cfg.fsu);
- SLIST_INIT(&dn_cfg.schedlist);
-
- DN_LOCK_INIT();
-
- TASK_INIT(&dn_task, 0, dummynet_task, curvnet);
- dn_tq = taskqueue_create("dummynet", M_WAITOK,
- taskqueue_thread_enqueue, &dn_tq);
- taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
-
- callout_init(&dn_timeout, CALLOUT_MPSAFE);
- callout_reset(&dn_timeout, 1, dummynet, NULL);
-
- /* Initialize curr_time adjustment mechanics. */
- getmicrouptime(&dn_cfg.prev_t);
-}
-
-static void
-ip_dn_destroy(int last)
-{
- callout_drain(&dn_timeout);
-
- DN_BH_WLOCK();
- if (last) {
- ND("removing last instance\n");
- ip_dn_ctl_ptr = NULL;
- ip_dn_io_ptr = NULL;
- }
-
- dummynet_flush();
- DN_BH_WUNLOCK();
- taskqueue_drain(dn_tq, &dn_task);
- taskqueue_free(dn_tq);
-
- dn_ht_free(dn_cfg.schedhash, 0);
- dn_ht_free(dn_cfg.fshash, 0);
- heap_free(&dn_cfg.evheap);
-
- DN_LOCK_DESTROY();
-}
-
-static int
-dummynet_modevent(module_t mod, int type, void *data)
-{
-
- if (type == MOD_LOAD) {
- if (ip_dn_io_ptr) {
- printf("DUMMYNET already loaded\n");
- return EEXIST ;
- }
- ip_dn_init();
- ip_dn_ctl_ptr = ip_dn_ctl;
- ip_dn_io_ptr = dummynet_io;
- return 0;
- } else if (type == MOD_UNLOAD) {
- ip_dn_destroy(1 /* last */);
- return 0;
- } else
- return EOPNOTSUPP;
-}
-
-/* modevent helpers for the modules */
-static int
-load_dn_sched(struct dn_alg *d)
-{
- struct dn_alg *s;
-
- if (d == NULL)
- return 1; /* error */
- ip_dn_init(); /* just in case, we need the lock */
-
- /* Check that mandatory funcs exists */
- if (d->enqueue == NULL || d->dequeue == NULL) {
- D("missing enqueue or dequeue for %s", d->name);
- return 1;
- }
-
- /* Search if scheduler already exists */
- DN_BH_WLOCK();
- SLIST_FOREACH(s, &dn_cfg.schedlist, next) {
- if (strcmp(s->name, d->name) == 0) {
- D("%s already loaded", d->name);
- break; /* scheduler already exists */
- }
- }
- if (s == NULL)
- SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next);
- DN_BH_WUNLOCK();
- D("dn_sched %s %sloaded", d->name, s ? "not ":"");
- return s ? 1 : 0;
-}
-
-static int
-unload_dn_sched(struct dn_alg *s)
-{
- struct dn_alg *tmp, *r;
- int err = EINVAL;
-
- ND("called for %s", s->name);
-
- DN_BH_WLOCK();
- SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) {
- if (strcmp(s->name, r->name) != 0)
- continue;
- ND("ref_count = %d", r->ref_count);
- err = (r->ref_count != 0) ? EBUSY : 0;
- if (err == 0)
- SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next);
- break;
- }
- DN_BH_WUNLOCK();
- D("dn_sched %s %sunloaded", s->name, err ? "not ":"");
- return err;
-}
-
-int
-dn_sched_modevent(module_t mod, int cmd, void *arg)
-{
- struct dn_alg *sch = arg;
-
- if (cmd == MOD_LOAD)
- return load_dn_sched(sch);
- else if (cmd == MOD_UNLOAD)
- return unload_dn_sched(sch);
- else
- return EINVAL;
-}
-
-static moduledata_t dummynet_mod = {
- "dummynet", dummynet_modevent, NULL
-};
-
-#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN
-#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */
-DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD);
-MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
-MODULE_VERSION(dummynet, 3);
-
-/*
- * Starting up. Done in order after dummynet_modevent() has been called.
- * VNET_SYSINIT is also called for each existing vnet and each new vnet.
- */
-//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL);
-
-/*
- * Shutdown handlers up shop. These are done in REVERSE ORDER, but still
- * after dummynet_modevent() has been called. Not called on reboot.
- * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
- * or when the module is unloaded.
- */
-//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL);
-
-/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw2.c b/freebsd/sys/netpfil/ipfw/ip_fw2.c
index 224ba937..a3a11819 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw2.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw2.c
@@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ipdivert.h>
#include <rtems/bsd/local/opt_inet.h>
#ifndef INET
-#error IPFIREWALL requires INET.
+#error "IPFIREWALL requires INET"
#endif /* INET */
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/condvar.h>
+#include <sys/counter.h>
#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -54,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -61,11 +63,13 @@ __FBSDID("$FreeBSD$");
#include <sys/ucred.h>
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
-#include <net/pf_mtag.h>
#include <net/pfil.h>
#include <net/vnet.h>
+#include <netpfil/pf/pf_mtag.h>
+
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
@@ -82,7 +86,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
+#include <netinet/in_fib.h>
#ifdef INET6
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/scope6_var.h>
#include <netinet6/ip6_var.h>
@@ -101,10 +107,6 @@ __FBSDID("$FreeBSD$");
* All ipfw global variables are here.
*/
-/* ipfw_vnet_ready controls when we are open for business */
-static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
-#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
-
static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs)
@@ -121,9 +123,20 @@ VNET_DEFINE(int, autoinc_step);
VNET_DEFINE(int, fw_one_pass) = 1;
VNET_DEFINE(unsigned int, fw_tables_max);
+VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */
/* Use 128 tables by default */
static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
+#ifndef LINEAR_SKIPTO
+static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
+ int tablearg, int jump_backwards);
+#define JUMP(ch, f, num, targ, back) jump_fast(ch, f, num, targ, back)
+#else
+static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
+ int tablearg, int jump_backwards);
+#define JUMP(ch, f, num, targ, back) jump_linear(ch, f, num, targ, back)
+#endif
+
/*
* Each rule belongs to one of 32 different sets (0..31).
* The variable set_disable contains one bit per set.
@@ -144,6 +157,9 @@ VNET_DEFINE(int, verbose_limit);
/* layer3_chain contains the list of rules for layer 3 */
VNET_DEFINE(struct ip_fw_chain, layer3_chain);
+/* ipfw_vnet_ready controls when we are open for business */
+VNET_DEFINE(int, ipfw_vnet_ready) = 0;
+
VNET_DEFINE(int, ipfw_nat_ready) = 0;
ipfw_nat_t *ipfw_nat_ptr = NULL;
@@ -156,45 +172,51 @@ ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
#ifdef SYSCTL_NODE
uint32_t dummy_def = IPFW_DEFAULT_RULE;
static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
+static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS);
SYSBEGIN(f3)
SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
- CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
"Only do a single pass through ipfw when using dummynet(4)");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
- CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
"Rule number auto-increment step");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
- CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
"Log matches to ipfw rules");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
- CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
"Set upper limit of matches of ipfw rules logged");
SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
&dummy_def, 0,
"The default/max possible rule number.");
-SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
- CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
- "Maximum number of tables");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
+ "Maximum number of concurrently used tables");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ 0, 0, sysctl_ipfw_tables_sets, "IU",
+ "Use per-set namespace for tables");
SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
&default_to_accept, 0,
"Make the default rule accept all packets.");
-TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
-TUNABLE_INT("net.inet.ip.fw.tables_max", &default_fw_tables);
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
- CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
+TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
"Number of static rules");
#ifdef INET6
SYSCTL_DECL(_net_inet6_ip6);
SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
- CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
+SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
+ &VNET_NAME(fw_deny_unknown_exthdrs), 0,
"Deny packets with unknown IPv6 Extension Headers");
-SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
- CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0,
+SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
+ &VNET_NAME(fw_permit_single_frag6), 0,
"Permit single packet IPv6 fragments");
#endif /* INET6 */
@@ -352,15 +374,18 @@ tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
}
static int
-iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg)
+iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain,
+ uint32_t *tablearg)
{
+
if (ifp == NULL) /* no iface with this packet, match fails */
- return 0;
+ return (0);
+
/* Check by name or by IP address */
if (cmd->name[0] != '\0') { /* match by name */
if (cmd->name[0] == '\1') /* use tablearg to match */
- return ipfw_lookup_table_extended(chain, cmd->p.glob,
- ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE);
+ return ipfw_lookup_table_extended(chain, cmd->p.kidx, 0,
+ &ifp->if_index, tablearg);
/* Check name */
if (cmd->p.glob) {
if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
@@ -370,7 +395,7 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uin
return(1);
}
} else {
-#ifdef __FreeBSD__ /* and OSX too ? */
+#if !defined(USERSPACE) && defined(__FreeBSD__) /* and OSX too ? */
struct ifaddr *ia;
if_addr_rlock(ifp);
@@ -413,50 +438,33 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uin
static int
verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
{
-#ifndef __FreeBSD__
+#if defined(USERSPACE) || !defined(__FreeBSD__)
return 0;
#else
- struct route ro;
- struct sockaddr_in *dst;
-
- bzero(&ro, sizeof(ro));
-
- dst = (struct sockaddr_in *)&(ro.ro_dst);
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(*dst);
- dst->sin_addr = src;
- in_rtalloc_ign(&ro, 0, fib);
+ struct nhop4_basic nh4;
- if (ro.ro_rt == NULL)
- return 0;
+ if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0)
+ return (0);
/*
* If ifp is provided, check for equality with rtentry.
* We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
* in order to pass packets injected back by if_simloop():
- * if useloopback == 1 routing entry (via lo0) for our own address
+ * routing entry (via lo0) for our own address
* may exist, so we need to handle routing assymetry.
*/
- if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
- RTFREE(ro.ro_rt);
- return 0;
- }
+ if (ifp != NULL && ifp != nh4.nh_ifp)
+ return (0);
/* if no ifp provided, check if rtentry is not default route */
- if (ifp == NULL &&
- satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
- RTFREE(ro.ro_rt);
- return 0;
- }
+ if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0)
+ return (0);
/* or if this is a blackhole/reject route */
- if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- RTFREE(ro.ro_rt);
- return 0;
- }
+ if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
+ return (0);
/* found valid route */
- RTFREE(ro.ro_rt);
return 1;
#endif /* __FreeBSD__ */
}
@@ -482,79 +490,62 @@ flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
}
/* support for IP6_*_ME opcodes */
+static const struct in6_addr lla_mask = {{{
+ 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+}}};
+
static int
-search_ip6_addr_net (struct in6_addr * ip6_addr)
+ipfw_localip6(struct in6_addr *in6)
{
- struct ifnet *mdc;
- struct ifaddr *mdc2;
- struct in6_ifaddr *fdm;
- struct in6_addr copia;
-
- TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
- if_addr_rlock(mdc);
- TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
- if (mdc2->ifa_addr->sa_family == AF_INET6) {
- fdm = (struct in6_ifaddr *)mdc2;
- copia = fdm->ia_addr.sin6_addr;
- /* need for leaving scope_id in the sock_addr */
- in6_clearscope(&copia);
- if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
- if_addr_runlock(mdc);
- return 1;
- }
- }
+ struct rm_priotracker in6_ifa_tracker;
+ struct in6_ifaddr *ia;
+
+ if (IN6_IS_ADDR_MULTICAST(in6))
+ return (0);
+
+ if (!IN6_IS_ADDR_LINKLOCAL(in6))
+ return (in6_localip(in6));
+
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+ TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
+ continue;
+ if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
+ in6, &lla_mask)) {
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+ return (1);
}
- if_addr_runlock(mdc);
}
- return 0;
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+ return (0);
}
static int
verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
{
- struct route_in6 ro;
- struct sockaddr_in6 *dst;
+ struct nhop6_basic nh6;
- bzero(&ro, sizeof(ro));
-
- dst = (struct sockaddr_in6 * )&(ro.ro_dst);
- dst->sin6_family = AF_INET6;
- dst->sin6_len = sizeof(*dst);
- dst->sin6_addr = *src;
+ if (IN6_IS_SCOPE_LINKLOCAL(src))
+ return (1);
- in6_rtalloc_ign(&ro, 0, fib);
- if (ro.ro_rt == NULL)
- return 0;
+ if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0)
+ return (0);
- /*
- * if ifp is provided, check for equality with rtentry
- * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
- * to support the case of sending packets to an address of our own.
- * (where the former interface is the first argument of if_simloop()
- * (=ifp), the latter is lo0)
- */
- if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
- RTFREE(ro.ro_rt);
- return 0;
- }
+ /* If ifp is provided, check for equality with route table. */
+ if (ifp != NULL && ifp != nh6.nh_ifp)
+ return (0);
/* if no ifp provided, check if rtentry is not default route */
- if (ifp == NULL &&
- IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
- RTFREE(ro.ro_rt);
- return 0;
- }
+ if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0)
+ return (0);
/* or if this is a blackhole/reject route */
- if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- RTFREE(ro.ro_rt);
- return 0;
- }
+ if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
+ return (0);
/* found valid route */
- RTFREE(ro.ro_rt);
return 1;
-
}
static int
@@ -632,8 +623,6 @@ send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
m_adj(m, args->L3offset);
#endif
if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
- /* We need the IP header in host order for icmp_error(). */
- SET_HOST_IPLEN(ip);
icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
} else if (args->f_id.proto == IPPROTO_TCP) {
struct tcphdr *const tcp =
@@ -666,6 +655,9 @@ static int
check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
struct ucred **uc)
{
+#if defined(USERSPACE)
+ return 0; // not supported in userspace
+#else
#ifndef __FreeBSD__
/* XXX */
return cred_check(insn, proto, oif,
@@ -776,6 +768,7 @@ check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
#endif /* __rtems__ */
return (match);
#endif /* __FreeBSD__ */
+#endif /* not supported in userspace */
}
/*
@@ -793,9 +786,10 @@ set_match(struct ip_fw_args *args, int slot,
args->rule.rulenum = chain->map[slot]->rulenum;
}
+#ifndef LINEAR_SKIPTO
/*
* Helper function to enable cached rule lookups using
- * x_next and next_rule fields in ipfw rule.
+ * cached_id and cached_pos fields in ipfw rule.
*/
static int
jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
@@ -803,28 +797,51 @@ jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
{
int f_pos;
- /* If possible use cached f_pos (in f->next_rule),
- * whose version is written in f->next_rule
+ /* If possible use cached f_pos (in f->cached_pos),
+ * whose version is written in f->cached_id
* (horrible hacks to avoid changing the ABI).
*/
- if (num != IP_FW_TABLEARG && (uintptr_t)f->x_next == chain->id)
- f_pos = (uintptr_t)f->next_rule;
+ if (num != IP_FW_TARG && f->cached_id == chain->id)
+ f_pos = f->cached_pos;
else {
- int i = IP_FW_ARG_TABLEARG(num);
+ int i = IP_FW_ARG_TABLEARG(chain, num, skipto);
/* make sure we do not jump backward */
if (jump_backwards == 0 && i <= f->rulenum)
i = f->rulenum + 1;
- f_pos = ipfw_find_rule(chain, i, 0);
+ if (chain->idxmap != NULL)
+ f_pos = chain->idxmap[i];
+ else
+ f_pos = ipfw_find_rule(chain, i, 0);
/* update the cache */
- if (num != IP_FW_TABLEARG) {
- f->next_rule = (void *)(uintptr_t)f_pos;
- f->x_next = (void *)(uintptr_t)chain->id;
+ if (num != IP_FW_TARG) {
+ f->cached_id = chain->id;
+ f->cached_pos = f_pos;
}
}
return (f_pos);
}
+#else
+/*
+ * Helper function to enable real fast rule lookups.
+ */
+static int
+jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
+ int tablearg, int jump_backwards)
+{
+ int f_pos;
+
+ num = IP_FW_ARG_TABLEARG(chain, num, skipto);
+ /* make sure we do not jump backward */
+ if (jump_backwards == 0 && num <= f->rulenum)
+ num = f->rulenum + 1;
+ f_pos = chain->idxmap[num];
+
+ return (f_pos);
+}
+#endif
+#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f)
/*
* The main check routine for the firewall.
*
@@ -929,7 +946,7 @@ ipfw_chk(struct ip_fw_args *args)
* offset == 0 means that (if this is an IPv4 packet)
* this is the first or only fragment.
* For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
- * or there is a single packet fragement (fragement header added
+ * or there is a single packet fragment (fragment header added
* without needed). We will treat a single packet fragment as if
* there was no fragment header (or log/block depending on the
* V_fw_permit_single_frag6 sysctl setting).
@@ -964,6 +981,7 @@ ipfw_chk(struct ip_fw_args *args)
* MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
*/
int dyn_dir = MATCH_UNKNOWN;
+ uint16_t dyn_name = 0;
ipfw_dyn_rule *q = NULL;
struct ip_fw_chain *chain = &V_layer3_chain;
@@ -984,6 +1002,7 @@ ipfw_chk(struct ip_fw_args *args)
int is_ipv4 = 0;
int done = 0; /* flag to exit the outer loop */
+ IPFW_RLOCK_TRACKER;
if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
return (IP_FW_PASS); /* accept */
@@ -1249,9 +1268,9 @@ do { \
args->f_id.dst_port = dst_port = ntohs(dst_port);
}
- IPFW_RLOCK(chain);
+ IPFW_PF_RLOCK(chain);
if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
- IPFW_RUNLOCK(chain);
+ IPFW_PF_RUNLOCK(chain);
return (IP_FW_PASS); /* accept */
}
if (args->rule.slot) {
@@ -1471,9 +1490,10 @@ do { \
proto != IPPROTO_UDP)
break;
else if (v == 2)
- key = htonl(dst_port);
+ key = dst_port;
else if (v == 3)
- key = htonl(src_port);
+ key = src_port;
+#ifndef USERSPACE
else if (v == 4 || v == 5) {
check_uidgid(
(ipfw_insn_u32 *)cmd,
@@ -1499,8 +1519,9 @@ do { \
else if (v == 5 /* O_JAIL */)
key = ucred_cache.xid;
#endif /* !__FreeBSD__ */
- key = htonl(key);
- } else
+ }
+#endif /* !USERSPACE */
+ else
break;
}
match = ipfw_lookup_table(chain,
@@ -1517,8 +1538,9 @@ do { \
void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ?
&args->f_id.dst_ip6: &args->f_id.src_ip6;
match = ipfw_lookup_table_extended(chain,
- cmd->arg1, pkey, &v,
- IPFW_TABLE_CIDR);
+ cmd->arg1,
+ sizeof(struct in6_addr),
+ pkey, &v);
if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
match = ((ipfw_insn_u32 *)cmd)->d[0] == v;
if (match)
@@ -1526,6 +1548,17 @@ do { \
}
break;
+ case O_IP_FLOW_LOOKUP:
+ {
+ uint32_t v = 0;
+ match = ipfw_lookup_table_extended(chain,
+ cmd->arg1, 0, &args->f_id, &v);
+ if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
+ match = ((ipfw_insn_u32 *)cmd)->d[0] == v;
+ if (match)
+ tablearg = v;
+ }
+ break;
case O_IP_SRC_MASK:
case O_IP_DST_MASK:
if (is_ipv4) {
@@ -1551,7 +1584,7 @@ do { \
#ifdef INET6
/* FALLTHROUGH */
case O_IP6_SRC_ME:
- match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
+ match= is_ipv6 && ipfw_localip6(&args->f_id.src_ip6);
#endif
break;
@@ -1590,7 +1623,7 @@ do { \
#ifdef INET6
/* FALLTHROUGH */
case O_IP6_DST_ME:
- match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
+ match= is_ipv6 && ipfw_localip6(&args->f_id.dst_ip6);
#endif
break;
@@ -1697,7 +1730,7 @@ do { \
break;
/* DSCP bitmask is stored as low_u32 high_u32 */
- if (x > 32)
+ if (x >= 32)
match = *(p + 1) & (1 << (x - 32));
else
match = *p & (1 << x);
@@ -1732,9 +1765,11 @@ do { \
break;
case O_TCPOPTS:
- PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
- match = (proto == IPPROTO_TCP && offset == 0 &&
- tcpopts_match(TCP(ulp), cmd));
+ if (proto == IPPROTO_TCP && offset == 0 && ulp){
+ PULLUP_LEN(hlen, ulp,
+ (TCP(ulp)->th_off << 2));
+ match = tcpopts_match(TCP(ulp), cmd);
+ }
break;
case O_TCPSEQ:
@@ -1778,27 +1813,37 @@ do { \
case O_ALTQ: {
struct pf_mtag *at;
+ struct m_tag *mtag;
ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+ /*
+ * ALTQ uses mbuf tags from another
+ * packet filtering system - pf(4).
+ * We allocate a tag in its format
+ * and fill it in, pretending to be pf(4).
+ */
match = 1;
at = pf_find_mtag(m);
if (at != NULL && at->qid != 0)
break;
- at = pf_get_mtag(m);
- if (at == NULL) {
+ mtag = m_tag_get(PACKET_TAG_PF,
+ sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
+ if (mtag == NULL) {
/*
* Let the packet fall back to the
* default ALTQ.
*/
break;
}
+ m_tag_prepend(m, mtag);
+ at = (struct pf_mtag *)(mtag + 1);
at->qid = altq->qid;
at->hdr = ip;
break;
}
case O_LOG:
- ipfw_log(f, hlen, args, m,
+ ipfw_log(chain, f, hlen, args, m,
oif, offset | ip6f_mf, tablearg, ip);
match = 1;
break;
@@ -1920,7 +1965,7 @@ do { \
case O_TAG: {
struct m_tag *mtag;
- uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
+ uint32_t tag = TARG(cmd->arg1, tag);
/* Packet is already tagged with this tag? */
mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
@@ -1954,6 +1999,7 @@ do { \
break;
case O_SOCKARG: {
+#ifndef USERSPACE /* not supported in userspace */
struct inpcb *inp = args->inp;
struct inpcbinfo *pi;
@@ -1972,7 +2018,7 @@ do { \
* certainly be inp_user_cookie?
*/
- /* For incomming packet, lookup up the
+ /* For incoming packet, lookup up the
inpcb using the src/dest ip/port tuple */
if (inp == NULL) {
inp = in_pcblookup(pi,
@@ -1994,12 +2040,13 @@ do { \
match = 1;
}
}
+#endif /* !USERSPACE */
break;
}
case O_TAGGED: {
struct m_tag *mtag;
- uint32_t tag = IP_FW_ARG_TABLEARG(cmd->arg1);
+ uint32_t tag = TARG(cmd->arg1, tag);
if (cmdlen == 1) {
match = m_tag_locate(m, MTAG_IPFW,
@@ -2070,7 +2117,7 @@ do { \
*/
case O_LIMIT:
case O_KEEP_STATE:
- if (ipfw_install_state(f,
+ if (ipfw_install_state(chain, f,
(ipfw_insn_limit *)cmd, args, tablearg)) {
/* error or limit violation */
retval = IP_FW_DENY;
@@ -2085,17 +2132,35 @@ do { \
/*
* dynamic rules are checked at the first
* keep-state or check-state occurrence,
- * with the result being stored in dyn_dir.
+ * with the result being stored in dyn_dir
+ * and dyn_name.
* The compiler introduces a PROBE_STATE
* instruction for us when we have a
* KEEP_STATE (because PROBE_STATE needs
* to be run first).
+ *
+ * (dyn_dir == MATCH_UNKNOWN) means this is
+ * first lookup for such f_id. Do lookup.
+ *
+ * (dyn_dir != MATCH_UNKNOWN &&
+ * dyn_name != 0 && dyn_name != cmd->arg1)
+ * means previous lookup didn't find dynamic
+ * rule for specific state name and current
+ * lookup will search rule with another state
+ * name. Redo lookup.
+ *
+ * (dyn_dir != MATCH_UNKNOWN && dyn_name == 0)
+ * means previous lookup was for `any' name
+ * and it didn't find rule. No need to do
+ * lookup again.
*/
- if (dyn_dir == MATCH_UNKNOWN &&
+ if ((dyn_dir == MATCH_UNKNOWN ||
+ (dyn_name != 0 &&
+ dyn_name != cmd->arg1)) &&
(q = ipfw_lookup_dyn_rule(&args->f_id,
&dyn_dir, proto == IPPROTO_TCP ?
- TCP(ulp) : NULL))
- != NULL) {
+ TCP(ulp): NULL,
+ (dyn_name = cmd->arg1))) != NULL) {
/*
* Found dynamic entry, update stats
* and jump to the 'action' part of
@@ -2137,7 +2202,7 @@ do { \
case O_PIPE:
case O_QUEUE:
set_match(args, f_pos, chain);
- args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+ args->rule.info = TARG(cmd->arg1, pipe);
if (cmd->opcode == O_PIPE)
args->rule.info |= IPFW_IS_PIPE;
if (V_fw_one_pass)
@@ -2157,7 +2222,7 @@ do { \
retval = (cmd->opcode == O_DIVERT) ?
IP_FW_DIVERT : IP_FW_TEE;
set_match(args, f_pos, chain);
- args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+ args->rule.info = TARG(cmd->arg1, divert);
break;
case O_COUNT:
@@ -2167,7 +2232,7 @@ do { \
case O_SKIPTO:
IPFW_INC_RULE_COUNTER(f, pktlen);
- f_pos = jump_fast(chain, f, cmd->arg1, tablearg, 0);
+ f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0);
/*
* Skip disabled rules, and re-enter
* the inner loop with the correct
@@ -2256,7 +2321,7 @@ do { \
if (IS_CALL) {
stack[mtag->m_tag_id] = f->rulenum;
mtag->m_tag_id++;
- f_pos = jump_fast(chain, f, cmd->arg1,
+ f_pos = JUMP(chain, f, cmd->arg1,
tablearg, 1);
} else { /* `return' action */
mtag->m_tag_id--;
@@ -2328,13 +2393,48 @@ do { \
if (q == NULL || q->rule != f ||
dyn_dir == MATCH_FORWARD) {
struct sockaddr_in *sa;
+
sa = &(((ipfw_insn_sa *)cmd)->sa);
if (sa->sin_addr.s_addr == INADDR_ANY) {
- bcopy(sa, &args->hopstore,
- sizeof(*sa));
- args->hopstore.sin_addr.s_addr =
- htonl(tablearg);
- args->next_hop = &args->hopstore;
+#ifdef INET6
+ /*
+ * We use O_FORWARD_IP opcode for
+ * fwd rule with tablearg, but tables
+ * now support IPv6 addresses. And
+ * when we are inspecting IPv6 packet,
+ * we can use nh6 field from
+ * table_value as next_hop6 address.
+ */
+ if (is_ipv6) {
+ struct sockaddr_in6 *sa6;
+
+ sa6 = args->next_hop6 =
+ &args->hopstore6;
+ sa6->sin6_family = AF_INET6;
+ sa6->sin6_len = sizeof(*sa6);
+ sa6->sin6_addr = TARG_VAL(
+ chain, tablearg, nh6);
+ /*
+ * Set sin6_scope_id only for
+ * link-local unicast addresses.
+ */
+ if (IN6_IS_ADDR_LINKLOCAL(
+ &sa6->sin6_addr))
+ sa6->sin6_scope_id =
+ TARG_VAL(chain,
+ tablearg,
+ zoneid);
+ } else
+#endif
+ {
+ sa = args->next_hop =
+ &args->hopstore;
+ sa->sin_family = AF_INET;
+ sa->sin_len = sizeof(*sa);
+ sa->sin_addr.s_addr = htonl(
+ TARG_VAL(chain, tablearg,
+ nh4));
+ }
} else {
args->next_hop = sa;
}
@@ -2364,7 +2464,7 @@ do { \
case O_NETGRAPH:
case O_NGTEE:
set_match(args, f_pos, chain);
- args->rule.info = IP_FW_ARG_TABLEARG(cmd->arg1);
+ args->rule.info = TARG(cmd->arg1, netgraph);
if (V_fw_one_pass)
args->rule.info |= IPFW_ONEPASS;
retval = (cmd->opcode == O_NETGRAPH) ?
@@ -2377,7 +2477,7 @@ do { \
uint32_t fib;
IPFW_INC_RULE_COUNTER(f, pktlen);
- fib = IP_FW_ARG_TABLEARG(cmd->arg1);
+ fib = TARG(cmd->arg1, fib) & 0x7FFF;
if (fib >= rt_numfibs)
fib = 0;
M_SETFIB(m, fib);
@@ -2389,15 +2489,16 @@ do { \
case O_SETDSCP: {
uint16_t code;
- code = IP_FW_ARG_TABLEARG(cmd->arg1) & 0x3F;
+ code = TARG(cmd->arg1, dscp) & 0x3F;
l = 0; /* exit inner loop */
if (is_ipv4) {
- uint16_t a;
+ uint16_t old;
- a = ip->ip_tos;
- ip->ip_tos = (code << 2) | (ip->ip_tos & 0x03);
- a += ntohs(ip->ip_sum) - ip->ip_tos;
- ip->ip_sum = htons(a);
+ old = *(uint16_t *)ip;
+ ip->ip_tos = (code << 2) |
+ (ip->ip_tos & 0x03);
+ ip->ip_sum = cksum_adjust(ip->ip_sum,
+ old, *(uint16_t *)ip);
} else if (is_ipv6) {
uint8_t *v;
@@ -2425,20 +2526,20 @@ do { \
set_match(args, f_pos, chain);
/* Check if this is 'global' nat rule */
- if (cmd->arg1 == 0) {
+ if (cmd->arg1 == IP_FW_NAT44_GLOBAL) {
retval = ipfw_nat_ptr(args, NULL, m);
break;
}
t = ((ipfw_insn_nat *)cmd)->nat;
if (t == NULL) {
- nat_id = IP_FW_ARG_TABLEARG(cmd->arg1);
+ nat_id = TARG(cmd->arg1, nat);
t = (*lookup_nat_ptr)(&chain->nat, nat_id);
if (t == NULL) {
retval = IP_FW_DENY;
break;
}
- if (cmd->arg1 != IP_FW_TABLEARG)
+ if (cmd->arg1 != IP_FW_TARG)
((ipfw_insn_nat *)cmd)->nat = t;
}
retval = ipfw_nat_ptr(args, t, m);
@@ -2454,11 +2555,6 @@ do { \
/* if not fragmented, go to next rule */
if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
break;
- /*
- * ip_reass() expects len & off in host
- * byte order.
- */
- SET_HOST_IPLEN(ip);
args->m = m = ip_reass(m);
@@ -2472,7 +2568,6 @@ do { \
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
- SET_NET_IPLEN(ip);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);
@@ -2484,6 +2579,11 @@ do { \
done = 1; /* exit outer loop */
break;
}
+ case O_EXTERNAL_ACTION:
+ l = 0; /* in any case exit inner loop */
+ retval = ipfw_run_eaction(chain, args,
+ cmd, &done);
+ break;
default:
panic("-- unknown opcode %d\n", cmd->opcode);
@@ -2521,7 +2621,7 @@ do { \
retval = IP_FW_DENY;
printf("ipfw: ouch!, skip past end of rules, denying packet\n");
}
- IPFW_RUNLOCK(chain);
+ IPFW_PF_RUNLOCK(chain);
#ifdef __FreeBSD__
if (ucred_cache != NULL)
crfree(ucred_cache);
@@ -2553,7 +2653,27 @@ sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
return (ipfw_resize_tables(&V_layer3_chain, ntables));
}
+
+/*
+ * Switches table namespace between global and per-set.
+ */
+static int
+sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int sets;
+
+ sets = V_fw_tables_sets;
+
+ error = sysctl_handle_int(oidp, &sets, 0, req);
+ /* Read operation or some error */
+ if ((error != 0) || (req->newptr == NULL))
+ return (error);
+
+ return (ipfw_switch_tables_namespace(&V_layer3_chain, sets));
+}
#endif
+
/*
* Module and VNET glue
*/
@@ -2607,7 +2727,8 @@ ipfw_init(void)
if (default_fw_tables > IPFW_TABLES_MAX)
default_fw_tables = IPFW_TABLES_MAX;
- ipfw_log_bpf(1); /* init */
+ ipfw_init_sopt_handler();
+ ipfw_iface_init();
return (error);
}
@@ -2619,7 +2740,8 @@ static void
ipfw_destroy(void)
{
- ipfw_log_bpf(0); /* uninit */
+ ipfw_iface_destroy();
+ ipfw_destroy_sopt_handler();
printf("IP firewall unloaded\n");
}
#endif /* __rtems__ */
@@ -2631,12 +2753,14 @@ ipfw_destroy(void)
static int
vnet_ipfw_init(const void *unused)
{
- int error;
+ int error, first;
struct ip_fw *rule = NULL;
struct ip_fw_chain *chain;
chain = &V_layer3_chain;
+ first = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
+
/* First set up some values that are compile time options */
V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
V_fw_deny_unknown_exthdrs = 1;
@@ -2650,16 +2774,19 @@ vnet_ipfw_init(const void *unused)
LIST_INIT(&chain->nat);
#endif
+ /* Init shared services hash table */
+ ipfw_init_srv(chain);
+
+ ipfw_init_obj_rewriter();
+ ipfw_init_counters();
/* insert the default rule and create the initial map */
chain->n_rules = 1;
- chain->static_len = sizeof(struct ip_fw);
chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO);
- if (chain->map)
- rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO);
+ rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw));
/* Set initial number of tables */
V_fw_tables_max = default_fw_tables;
- error = ipfw_init_tables(chain);
+ error = ipfw_init_tables(chain, first);
if (error) {
printf("ipfw2: setting up tables failed\n");
free(chain->map, M_IPFW);
@@ -2676,18 +2803,24 @@ vnet_ipfw_init(const void *unused)
rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
chain->default_rule = chain->map[0] = rule;
chain->id = rule->id = 1;
+ /* Pre-calculate rules length for legacy dump format */
+ chain->static_len = sizeof(struct ip_fw_rule0);
IPFW_LOCK_INIT(chain);
ipfw_dyn_init(chain);
+ ipfw_eaction_init(chain, first);
+#ifdef LINEAR_SKIPTO
+ ipfw_init_skipto_cache(chain);
+#endif
+ ipfw_bpf_init(first);
/* First set up some values that are compile time options */
V_ipfw_vnet_ready = 1; /* Open for business */
/*
- * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
- * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
- * we still keep the module alive because the sockopt and
- * layer2 paths are still useful.
+ * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
+ * Even if the latter two fail we still keep the module alive
+ * because the sockopt and layer2 paths are still useful.
* ipfw[6]_hook return 0 on success, ENOENT on failure,
* so we can ignore the exact return value and just set a flag.
*
@@ -2697,8 +2830,7 @@ vnet_ipfw_init(const void *unused)
* In layer2 we have the same behaviour, except that V_ether_ipfw
* is checked on each packet because there are no pfil hooks.
*/
- V_ip_fw_ctl_ptr = ipfw_ctl;
- V_ip_fw_chk_ptr = ipfw_chk;
+ V_ip_fw_ctl_ptr = ipfw_ctl3;
error = ipfw_attach_hooks(1);
return (error);
}
@@ -2710,9 +2842,9 @@ vnet_ipfw_init(const void *unused)
static int
vnet_ipfw_uninit(const void *unused)
{
- struct ip_fw *reap, *rule;
+ struct ip_fw *reap;
struct ip_fw_chain *chain = &V_layer3_chain;
- int i;
+ int i, last;
V_ipfw_vnet_ready = 0; /* tell new callers to go away */
/*
@@ -2721,33 +2853,39 @@ vnet_ipfw_uninit(const void *unused)
* sure the update is propagated and nobody will be in.
*/
(void)ipfw_attach_hooks(0 /* detach */);
- V_ip_fw_chk_ptr = NULL;
V_ip_fw_ctl_ptr = NULL;
+
+ last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
+
IPFW_UH_WLOCK(chain);
IPFW_UH_WUNLOCK(chain);
- IPFW_UH_WLOCK(chain);
- IPFW_WLOCK(chain);
ipfw_dyn_uninit(0); /* run the callout_drain */
- IPFW_WUNLOCK(chain);
- ipfw_destroy_tables(chain);
+ IPFW_UH_WLOCK(chain);
+
reap = NULL;
IPFW_WLOCK(chain);
- for (i = 0; i < chain->n_rules; i++) {
- rule = chain->map[i];
- rule->x_next = reap;
- reap = rule;
- }
- if (chain->map)
- free(chain->map, M_IPFW);
+ for (i = 0; i < chain->n_rules; i++)
+ ipfw_reap_add(chain, &reap, chain->map[i]);
+ free(chain->map, M_IPFW);
+#ifdef LINEAR_SKIPTO
+ ipfw_destroy_skipto_cache(chain);
+#endif
IPFW_WUNLOCK(chain);
IPFW_UH_WUNLOCK(chain);
+ ipfw_destroy_tables(chain, last);
+ ipfw_eaction_uninit(chain, last);
if (reap != NULL)
ipfw_reap_rules(reap);
+ vnet_ipfw_iface_destroy(chain);
+ ipfw_destroy_srv(chain);
IPFW_LOCK_DESTROY(chain);
ipfw_dyn_uninit(1); /* free the remaining parts */
- return 0;
+ ipfw_destroy_counters();
+ ipfw_destroy_obj_rewriter();
+ ipfw_bpf_uninit(last);
+ return (0);
}
#endif /* __rtems__ */
@@ -2793,13 +2931,14 @@ static moduledata_t ipfwmod = {
};
/* Define startup order. */
-#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL
#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */
#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */
#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */
DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
-MODULE_VERSION(ipfw, 2);
+FEATURE(ipfw_ctl3, "ipfw new sockopt calls");
+MODULE_VERSION(ipfw, 3);
/* should declare some dependencies here */
/*
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c b/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c
new file mode 100644
index 00000000..3127809b
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c
@@ -0,0 +1,211 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_pflog.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/bpf.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_var.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+
+static VNET_DEFINE(struct ifnet *, log_if);
+static VNET_DEFINE(struct ifnet *, pflog_if);
+static VNET_DEFINE(struct if_clone *, ipfw_cloner);
+static VNET_DEFINE(struct if_clone *, ipfwlog_cloner);
+#define V_ipfw_cloner VNET(ipfw_cloner)
+#define V_ipfwlog_cloner VNET(ipfwlog_cloner)
+#define V_log_if VNET(log_if)
+#define V_pflog_if VNET(pflog_if)
+
+static struct rmlock log_if_lock;
+#define LOGIF_LOCK_INIT(x) rm_init(&log_if_lock, "ipfw log_if lock")
+#define LOGIF_LOCK_DESTROY(x) rm_destroy(&log_if_lock)
+#define LOGIF_RLOCK_TRACKER struct rm_priotracker _log_tracker
+#define LOGIF_RLOCK(x) rm_rlock(&log_if_lock, &_log_tracker)
+#define LOGIF_RUNLOCK(x) rm_runlock(&log_if_lock, &_log_tracker)
+#define LOGIF_WLOCK(x) rm_wlock(&log_if_lock)
+#define LOGIF_WUNLOCK(x) rm_wunlock(&log_if_lock)
+
+static const char ipfwname[] = "ipfw";
+static const char ipfwlogname[] = "ipfwlog";
+
+static int
+ipfw_bpf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+
+ return (EINVAL);
+}
+
+static int
+ipfw_bpf_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro)
+{
+
+ if (m != NULL)
+ FREE_PKT(m);
+ return (0);
+}
+
+static void
+ipfw_clone_destroy(struct ifnet *ifp)
+{
+
+ LOGIF_WLOCK();
+ if (ifp->if_hdrlen == ETHER_HDR_LEN)
+ V_log_if = NULL;
+ else
+ V_pflog_if = NULL;
+ LOGIF_WUNLOCK();
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+}
+
+static int
+ipfw_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL)
+ return (ENOSPC);
+ if_initname(ifp, ipfwname, unit);
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_mtu = 65536;
+ ifp->if_ioctl = ipfw_bpf_ioctl;
+ ifp->if_output = ipfw_bpf_output;
+ ifp->if_hdrlen = ETHER_HDR_LEN;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+ LOGIF_WLOCK();
+ if (V_log_if != NULL) {
+ LOGIF_WUNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ return (EEXIST);
+ }
+ V_log_if = ifp;
+ LOGIF_WUNLOCK();
+ return (0);
+}
+
+static int
+ipfwlog_clone_create(struct if_clone *ifc, int unit, caddr_t params)
+{
+ struct ifnet *ifp;
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL)
+ return (ENOSPC);
+ if_initname(ifp, ipfwlogname, unit);
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_mtu = 65536;
+ ifp->if_ioctl = ipfw_bpf_ioctl;
+ ifp->if_output = ipfw_bpf_output;
+ ifp->if_hdrlen = PFLOG_HDRLEN;
+ if_attach(ifp);
+ bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+ LOGIF_WLOCK();
+ if (V_pflog_if != NULL) {
+ LOGIF_WUNLOCK();
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ return (EEXIST);
+ }
+ V_pflog_if = ifp;
+ LOGIF_WUNLOCK();
+ return (0);
+}
+
+void
+ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m)
+{
+ LOGIF_RLOCK_TRACKER;
+
+ LOGIF_RLOCK();
+ if (dlen == ETHER_HDR_LEN) {
+ if (V_log_if == NULL) {
+ LOGIF_RUNLOCK();
+ return;
+ }
+ BPF_MTAP2(V_log_if, data, dlen, m);
+ } else if (dlen == PFLOG_HDRLEN) {
+ if (V_pflog_if == NULL) {
+ LOGIF_RUNLOCK();
+ return;
+ }
+ BPF_MTAP2(V_pflog_if, data, dlen, m);
+ }
+ LOGIF_RUNLOCK();
+}
+
+void
+ipfw_bpf_init(int first)
+{
+
+ if (first) {
+ LOGIF_LOCK_INIT();
+ V_log_if = NULL;
+ V_pflog_if = NULL;
+ }
+ V_ipfw_cloner = if_clone_simple(ipfwname, ipfw_clone_create,
+ ipfw_clone_destroy, 0);
+ V_ipfwlog_cloner = if_clone_simple(ipfwlogname, ipfwlog_clone_create,
+ ipfw_clone_destroy, 0);
+}
+
+void
+ipfw_bpf_uninit(int last)
+{
+
+ if_clone_detach(V_ipfw_cloner);
+ if_clone_detach(V_ipfwlog_cloner);
+ if (last)
+ LOGIF_LOCK_DESTROY();
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c b/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c
new file mode 100644
index 00000000..4696faac
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c
@@ -0,0 +1,1822 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define DEB(x)
+#define DDB(x) x
+
+/*
+ * Dynamic rule support for ipfw
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <net/ethernet.h> /* for ETHERTYPE_IP */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h> /* ip_defttl */
+#include <netinet/ip_fw.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+
+#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#include <machine/in_cksum.h> /* XXX for in_cksum */
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+/*
+ * Description of dynamic rules.
+ *
+ * Dynamic rules are stored in lists accessed through a hash table
+ * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
+ * be modified through the sysctl variable dyn_buckets which is
+ * updated when the table becomes empty.
+ *
+ * XXX currently there is only one list, ipfw_dyn.
+ *
+ * When a packet is received, its address fields are first masked
+ * with the mask defined for the rule, then hashed, then matched
+ * against the entries in the corresponding list.
+ * Dynamic rules can be used for different purposes:
+ * + stateful rules;
+ * + enforcing limits on the number of sessions;
+ * + in-kernel NAT (not implemented yet)
+ *
+ * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
+ * measured in seconds and depending on the flags.
+ *
+ * The total number of dynamic rules is equal to UMA zone items count.
+ * The max number of dynamic rules is dyn_max. When we reach
+ * the maximum number of rules we do not create anymore. This is
+ * done to avoid consuming too much memory, but also too much
+ * time when searching on each packet (ideally, we should try instead
+ * to put a limit on the length of the list on each bucket...).
+ *
+ * Each dynamic rule holds a pointer to the parent ipfw rule so
+ * we know what action to perform. Dynamic rules are removed when
+ * the parent rule is deleted. This can be changed by dyn_keep_states
+ * sysctl.
+ *
+ * There are some limitations with dynamic rules -- we do not
+ * obey the 'randomized match', and we do not do multiple
+ * passes through the firewall. XXX check the latter!!!
+ */
+
+struct ipfw_dyn_bucket {
+ struct mtx mtx; /* Bucket protecting lock */
+ ipfw_dyn_rule *head; /* Pointer to first rule */
+};
+
+/*
+ * Static variables followed by global ones
+ */
+static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v);
+static VNET_DEFINE(u_int32_t, dyn_buckets_max);
+static VNET_DEFINE(u_int32_t, curr_dyn_buckets);
+static VNET_DEFINE(struct callout, ipfw_timeout);
+#define V_ipfw_dyn_v VNET(ipfw_dyn_v)
+#define V_dyn_buckets_max VNET(dyn_buckets_max)
+#define V_curr_dyn_buckets VNET(curr_dyn_buckets)
+#define V_ipfw_timeout VNET(ipfw_timeout)
+
+static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone);
+#define V_ipfw_dyn_rule_zone VNET(ipfw_dyn_rule_zone)
+
+#define IPFW_BUCK_LOCK_INIT(b) \
+ mtx_init(&(b)->mtx, "IPFW dynamic bucket", NULL, MTX_DEF)
+#define IPFW_BUCK_LOCK_DESTROY(b) \
+ mtx_destroy(&(b)->mtx)
+#define IPFW_BUCK_LOCK(i) mtx_lock(&V_ipfw_dyn_v[(i)].mtx)
+#define IPFW_BUCK_UNLOCK(i) mtx_unlock(&V_ipfw_dyn_v[(i)].mtx)
+#define IPFW_BUCK_ASSERT(i) mtx_assert(&V_ipfw_dyn_v[(i)].mtx, MA_OWNED)
+
+
+static VNET_DEFINE(int, dyn_keep_states);
+#define V_dyn_keep_states VNET(dyn_keep_states)
+
+/*
+ * Timeouts for various events in handing dynamic rules.
+ */
+static VNET_DEFINE(u_int32_t, dyn_ack_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_syn_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_fin_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_rst_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_udp_lifetime);
+static VNET_DEFINE(u_int32_t, dyn_short_lifetime);
+
+#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime)
+#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime)
+#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime)
+#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime)
+#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime)
+#define V_dyn_short_lifetime VNET(dyn_short_lifetime)
+
+/*
+ * Keepalives are sent if dyn_keepalive is set. They are sent every
+ * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
+ * seconds of lifetime of a rule.
+ * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
+ * than dyn_keepalive_period.
+ */
+
+static VNET_DEFINE(u_int32_t, dyn_keepalive_interval);
+static VNET_DEFINE(u_int32_t, dyn_keepalive_period);
+static VNET_DEFINE(u_int32_t, dyn_keepalive);
+static VNET_DEFINE(time_t, dyn_keepalive_last);
+
+#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval)
+#define V_dyn_keepalive_period VNET(dyn_keepalive_period)
+#define V_dyn_keepalive VNET(dyn_keepalive)
+#define V_dyn_keepalive_last VNET(dyn_keepalive_last)
+
+static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */
+
+#define DYN_COUNT uma_zone_get_cur(V_ipfw_dyn_rule_zone)
+#define V_dyn_max VNET(dyn_max)
+
+/* for userspace, we emulate the uma_zone_counter with ipfw_dyn_count */
+static int ipfw_dyn_count; /* number of objects */
+
+#ifdef USERSPACE /* emulation of UMA object counters for userspace */
+#define uma_zone_get_cur(x) ipfw_dyn_count
+#endif /* USERSPACE */
+
+static int last_log; /* Log ratelimiting */
+
+static void ipfw_dyn_tick(void *vnetx);
+static void check_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *, int, int);
+#ifdef SYSCTL_NODE
+
+static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS);
+static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS);
+
+SYSBEGIN(f2)
+
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_buckets_max), 0,
+ "Max number of dyn. buckets");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0,
+ "Current Number of dyn. buckets");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RD, 0, 0, sysctl_ipfw_dyn_count, "IU",
+ "Number of dyn. rules");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_dyn_max, "IU",
+ "Max number of dyn. rules");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0,
+ "Lifetime of dyn. rules for acks");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0,
+ "Lifetime of dyn. rules for syn");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0,
+ "Lifetime of dyn. rules for fin");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0,
+ "Lifetime of dyn. rules for rst");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0,
+ "Lifetime of dyn. rules for UDP");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0,
+ "Lifetime of dyn. rules for other situations");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0,
+ "Enable keepalives for dyn. rules");
+SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keep_states,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0,
+ "Do not flush dynamic states on rule deletion");
+
+SYSEND
+
+#endif /* SYSCTL_NODE */
+
+
+#ifdef INET6
+static __inline int
+hash_packet6(struct ipfw_flow_id *id)
+{
+ u_int32_t i;
+ i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
+ (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
+ (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
+ (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
+ (id->dst_port) ^ (id->src_port);
+ return i;
+}
+#endif
+
+/*
+ * IMPORTANT: the hash function for dynamic rules must be commutative
+ * in source and destination (ip,port), because rules are bidirectional
+ * and we want to find both in the same bucket.
+ */
+static __inline int
+hash_packet(struct ipfw_flow_id *id, int buckets)
+{
+ u_int32_t i;
+
+#ifdef INET6
+ if (IS_IP6_FLOW_ID(id))
+ i = hash_packet6(id);
+ else
+#endif /* INET6 */
+ i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
+ i &= (buckets - 1);
+ return i;
+}
+
+#if 0
+#define DYN_DEBUG(fmt, ...) do { \
+ printf("%s: " fmt "\n", __func__, __VA_ARGS__); \
+} while (0)
+#else
+#define DYN_DEBUG(fmt, ...)
+#endif
+
+static char *default_state_name = "default";
+struct dyn_state_obj {
+ struct named_object no;
+ char name[64];
+};
+
+#define DYN_STATE_OBJ(ch, cmd) \
+ ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1))
+/*
+ * Classifier callback.
+ * Return 0 if opcode contains object that should be referenced
+ * or rewritten.
+ */
+static int
+dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+
+ DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
+ /* Don't rewrite "check-state any" */
+ if (cmd->arg1 == 0 &&
+ cmd->opcode == O_CHECK_STATE)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+dyn_update(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+ DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
+}
+
+static int
+dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ ipfw_obj_ntlv *ntlv;
+ const char *name;
+
+ DYN_DEBUG("uidx %d", ti->uidx);
+ if (ti->uidx != 0) {
+ if (ti->tlvs == NULL)
+ return (EINVAL);
+ /* Search ntlv in the buffer provided by user */
+ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+ IPFW_TLV_STATE_NAME);
+ if (ntlv == NULL)
+ return (EINVAL);
+ name = ntlv->name;
+ } else
+ name = default_state_name;
+ /*
+ * Search named object with corresponding name.
+ * Since states objects are global - ignore the set value
+ * and use zero instead.
+ */
+ *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0,
+ IPFW_TLV_STATE_NAME, name);
+ /*
+ * We always return success here.
+ * The caller will check *pno and mark object as unresolved,
+ * then it will automatically create "default" object.
+ */
+ return (0);
+}
+
+static struct named_object *
+dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+
+ DYN_DEBUG("kidx %d", idx);
+ return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx));
+}
+
+static int
+dyn_create(struct ip_fw_chain *ch, struct tid_info *ti,
+ uint16_t *pkidx)
+{
+ struct namedobj_instance *ni;
+ struct dyn_state_obj *obj;
+ struct named_object *no;
+ ipfw_obj_ntlv *ntlv;
+ char *name;
+
+ DYN_DEBUG("uidx %d", ti->uidx);
+ if (ti->uidx != 0) {
+ if (ti->tlvs == NULL)
+ return (EINVAL);
+ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+ IPFW_TLV_STATE_NAME);
+ if (ntlv == NULL)
+ return (EINVAL);
+ name = ntlv->name;
+ } else
+ name = default_state_name;
+
+ ni = CHAIN_TO_SRV(ch);
+ obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO);
+ obj->no.name = obj->name;
+ obj->no.etlv = IPFW_TLV_STATE_NAME;
+ strlcpy(obj->name, name, sizeof(obj->name));
+
+ IPFW_UH_WLOCK(ch);
+ no = ipfw_objhash_lookup_name_type(ni, 0,
+ IPFW_TLV_STATE_NAME, name);
+ if (no != NULL) {
+ /*
+ * Object is already created.
+ * Just return its kidx and bump refcount.
+ */
+ *pkidx = no->kidx;
+ no->refcnt++;
+ IPFW_UH_WUNLOCK(ch);
+ free(obj, M_IPFW);
+ DYN_DEBUG("\tfound kidx %d", *pkidx);
+ return (0);
+ }
+ if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) {
+ DYN_DEBUG("\talloc_idx failed for %s", name);
+ IPFW_UH_WUNLOCK(ch);
+ free(obj, M_IPFW);
+ return (ENOSPC);
+ }
+ ipfw_objhash_add(ni, &obj->no);
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, obj->no.kidx) = obj;
+ IPFW_WUNLOCK(ch);
+ obj->no.refcnt++;
+ *pkidx = obj->no.kidx;
+ IPFW_UH_WUNLOCK(ch);
+ DYN_DEBUG("\tcreated kidx %d", *pkidx);
+ return (0);
+}
+
+static void
+dyn_destroy(struct ip_fw_chain *ch, struct named_object *no)
+{
+ struct dyn_state_obj *obj;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ KASSERT(no->refcnt == 1,
+ ("Destroying object '%s' (type %u, idx %u) with refcnt %u",
+ no->name, no->etlv, no->kidx, no->refcnt));
+
+ DYN_DEBUG("kidx %d", no->kidx);
+ IPFW_WLOCK(ch);
+ obj = SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx);
+
+ free(obj, M_IPFW);
+}
+
+static struct opcode_obj_rewrite dyn_opcodes[] = {
+ {
+ O_KEEP_STATE, IPFW_TLV_STATE_NAME,
+ dyn_classify, dyn_update,
+ dyn_findbyname, dyn_findbykidx,
+ dyn_create, dyn_destroy
+ },
+ {
+ O_CHECK_STATE, IPFW_TLV_STATE_NAME,
+ dyn_classify, dyn_update,
+ dyn_findbyname, dyn_findbykidx,
+ dyn_create, dyn_destroy
+ },
+ {
+ O_PROBE_STATE, IPFW_TLV_STATE_NAME,
+ dyn_classify, dyn_update,
+ dyn_findbyname, dyn_findbykidx,
+ dyn_create, dyn_destroy
+ },
+ {
+ O_LIMIT, IPFW_TLV_STATE_NAME,
+ dyn_classify, dyn_update,
+ dyn_findbyname, dyn_findbykidx,
+ dyn_create, dyn_destroy
+ },
+};
+/**
+ * Print customizable flow id description via log(9) facility.
+ */
+static void
+print_dyn_rule_flags(struct ipfw_flow_id *id, int dyn_type, int log_flags,
+ char *prefix, char *postfix)
+{
+ struct in_addr da;
+#ifdef INET6
+ char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+#else
+ char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
+#endif
+
+#ifdef INET6
+ if (IS_IP6_FLOW_ID(id)) {
+ ip6_sprintf(src, &id->src_ip6);
+ ip6_sprintf(dst, &id->dst_ip6);
+ } else
+#endif
+ {
+ da.s_addr = htonl(id->src_ip);
+ inet_ntop(AF_INET, &da, src, sizeof(src));
+ da.s_addr = htonl(id->dst_ip);
+ inet_ntop(AF_INET, &da, dst, sizeof(dst));
+ }
+ log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n",
+ prefix, dyn_type, src, id->src_port, dst,
+ id->dst_port, DYN_COUNT, postfix);
+}
+
+#define print_dyn_rule(id, dtype, prefix, postfix) \
+ print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix)
+
+#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0)
+#define TIME_LE(a,b) ((int)((a)-(b)) < 0)
+
+static void
+dyn_update_proto_state(ipfw_dyn_rule *q, const struct ipfw_flow_id *id,
+ const struct tcphdr *tcp, int dir)
+{
+ uint32_t ack;
+ u_char flags;
+
+ if (id->proto == IPPROTO_TCP) {
+ flags = id->_flags & (TH_FIN | TH_SYN | TH_RST);
+#define BOTH_SYN (TH_SYN | (TH_SYN << 8))
+#define BOTH_FIN (TH_FIN | (TH_FIN << 8))
+#define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8))
+#define ACK_FWD 0x10000 /* fwd ack seen */
+#define ACK_REV 0x20000 /* rev ack seen */
+
+ q->state |= (dir == MATCH_FORWARD) ? flags : (flags << 8);
+ switch (q->state & TCP_FLAGS) {
+ case TH_SYN: /* opening */
+ q->expire = time_uptime + V_dyn_syn_lifetime;
+ break;
+
+ case BOTH_SYN: /* move to established */
+ case BOTH_SYN | TH_FIN: /* one side tries to close */
+ case BOTH_SYN | (TH_FIN << 8):
+#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0)
+ if (tcp == NULL)
+ break;
+
+ ack = ntohl(tcp->th_ack);
+ if (dir == MATCH_FORWARD) {
+ if (q->ack_fwd == 0 ||
+ _SEQ_GE(ack, q->ack_fwd)) {
+ q->ack_fwd = ack;
+ q->state |= ACK_FWD;
+ }
+ } else {
+ if (q->ack_rev == 0 ||
+ _SEQ_GE(ack, q->ack_rev)) {
+ q->ack_rev = ack;
+ q->state |= ACK_REV;
+ }
+ }
+ if ((q->state & (ACK_FWD | ACK_REV)) ==
+ (ACK_FWD | ACK_REV)) {
+ q->expire = time_uptime + V_dyn_ack_lifetime;
+ q->state &= ~(ACK_FWD | ACK_REV);
+ }
+ break;
+
+ case BOTH_SYN | BOTH_FIN: /* both sides closed */
+ if (V_dyn_fin_lifetime >= V_dyn_keepalive_period)
+ V_dyn_fin_lifetime =
+ V_dyn_keepalive_period - 1;
+ q->expire = time_uptime + V_dyn_fin_lifetime;
+ break;
+
+ default:
+#if 0
+ /*
+ * reset or some invalid combination, but can also
+ * occur if we use keep-state the wrong way.
+ */
+ if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
+ printf("invalid state: 0x%x\n", q->state);
+#endif
+ if (V_dyn_rst_lifetime >= V_dyn_keepalive_period)
+ V_dyn_rst_lifetime =
+ V_dyn_keepalive_period - 1;
+ q->expire = time_uptime + V_dyn_rst_lifetime;
+ break;
+ }
+ } else if (id->proto == IPPROTO_UDP) {
+ q->expire = time_uptime + V_dyn_udp_lifetime;
+ } else {
+ /* other protocols */
+ q->expire = time_uptime + V_dyn_short_lifetime;
+ }
+}
+
+/*
+ * Lookup a dynamic rule, locked version.
+ */
+static ipfw_dyn_rule *
+lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int i, int *match_direction,
+ struct tcphdr *tcp, uint16_t kidx)
+{
+ /*
+ * Stateful ipfw extensions.
+ * Lookup into dynamic session queue.
+ */
+ ipfw_dyn_rule *prev, *q = NULL;
+ int dir;
+
+ IPFW_BUCK_ASSERT(i);
+
+ dir = MATCH_NONE;
+ for (prev = NULL, q = V_ipfw_dyn_v[i].head; q; prev = q, q = q->next) {
+ if (q->dyn_type == O_LIMIT_PARENT)
+ continue;
+
+ if (pkt->proto != q->id.proto)
+ continue;
+
+ if (kidx != 0 && kidx != q->kidx)
+ continue;
+
+ if (IS_IP6_FLOW_ID(pkt)) {
+ if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.src_ip6) &&
+ IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.dst_ip6) &&
+ pkt->src_port == q->id.src_port &&
+ pkt->dst_port == q->id.dst_port) {
+ dir = MATCH_FORWARD;
+ break;
+ }
+ if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.dst_ip6) &&
+ IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.src_ip6) &&
+ pkt->src_port == q->id.dst_port &&
+ pkt->dst_port == q->id.src_port) {
+ dir = MATCH_REVERSE;
+ break;
+ }
+ } else {
+ if (pkt->src_ip == q->id.src_ip &&
+ pkt->dst_ip == q->id.dst_ip &&
+ pkt->src_port == q->id.src_port &&
+ pkt->dst_port == q->id.dst_port) {
+ dir = MATCH_FORWARD;
+ break;
+ }
+ if (pkt->src_ip == q->id.dst_ip &&
+ pkt->dst_ip == q->id.src_ip &&
+ pkt->src_port == q->id.dst_port &&
+ pkt->dst_port == q->id.src_port) {
+ dir = MATCH_REVERSE;
+ break;
+ }
+ }
+ }
+ if (q == NULL)
+ goto done; /* q = NULL, not found */
+
+ if (prev != NULL) { /* found and not in front */
+ prev->next = q->next;
+ q->next = V_ipfw_dyn_v[i].head;
+ V_ipfw_dyn_v[i].head = q;
+ }
+
+ /* update state according to flags */
+ dyn_update_proto_state(q, pkt, tcp, dir);
+done:
+ if (match_direction != NULL)
+ *match_direction = dir;
+ return (q);
+}
+
+ipfw_dyn_rule *
+ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
+ struct tcphdr *tcp, uint16_t kidx)
+{
+ ipfw_dyn_rule *q;
+ int i;
+
+ i = hash_packet(pkt, V_curr_dyn_buckets);
+
+ IPFW_BUCK_LOCK(i);
+ q = lookup_dyn_rule_locked(pkt, i, match_direction, tcp, kidx);
+ if (q == NULL)
+ IPFW_BUCK_UNLOCK(i);
+ /* NB: return table locked when q is not NULL */
+ return q;
+}
+
+/*
+ * Unlock bucket mtx
+ * @p - pointer to dynamic rule
+ */
+void
+ipfw_dyn_unlock(ipfw_dyn_rule *q)
+{
+
+ IPFW_BUCK_UNLOCK(q->bucket);
+}
+
+static int
+resize_dynamic_table(struct ip_fw_chain *chain, int nbuckets)
+{
+ int i, k, nbuckets_old;
+ ipfw_dyn_rule *q;
+ struct ipfw_dyn_bucket *dyn_v, *dyn_v_old;
+
+ /* Check if given number is power of 2 and less than 64k */
+ if ((nbuckets > 65536) || (!powerof2(nbuckets)))
+ return 1;
+
+ CTR3(KTR_NET, "%s: resize dynamic hash: %d -> %d", __func__,
+ V_curr_dyn_buckets, nbuckets);
+
+ /* Allocate and initialize new hash */
+ dyn_v = malloc(nbuckets * sizeof(*dyn_v), M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ for (i = 0 ; i < nbuckets; i++)
+ IPFW_BUCK_LOCK_INIT(&dyn_v[i]);
+
+ /*
+ * Call upper half lock, as get_map() do to ease
+ * read-only access to dynamic rules hash from sysctl
+ */
+ IPFW_UH_WLOCK(chain);
+
+ /*
+ * Acquire chain write lock to permit hash access
+ * for main traffic path without additional locks
+ */
+ IPFW_WLOCK(chain);
+
+ /* Save old values */
+ nbuckets_old = V_curr_dyn_buckets;
+ dyn_v_old = V_ipfw_dyn_v;
+
+ /* Skip relinking if array is not set up */
+ if (V_ipfw_dyn_v == NULL)
+ V_curr_dyn_buckets = 0;
+
+ /* Re-link all dynamic states */
+ for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
+ while (V_ipfw_dyn_v[i].head != NULL) {
+ /* Remove from current chain */
+ q = V_ipfw_dyn_v[i].head;
+ V_ipfw_dyn_v[i].head = q->next;
+
+ /* Get new hash value */
+ k = hash_packet(&q->id, nbuckets);
+ q->bucket = k;
+ /* Add to the new head */
+ q->next = dyn_v[k].head;
+ dyn_v[k].head = q;
+ }
+ }
+
+ /* Update current pointers/buckets values */
+ V_curr_dyn_buckets = nbuckets;
+ V_ipfw_dyn_v = dyn_v;
+
+ IPFW_WUNLOCK(chain);
+
+ IPFW_UH_WUNLOCK(chain);
+
+ /* Start periodic callout on initial creation */
+ if (dyn_v_old == NULL) {
+ callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, curvnet, 0);
+ return (0);
+ }
+
+ /* Destroy all mutexes */
+ for (i = 0 ; i < nbuckets_old ; i++)
+ IPFW_BUCK_LOCK_DESTROY(&dyn_v_old[i]);
+
+ /* Free old hash */
+ free(dyn_v_old, M_IPFW);
+
+ return 0;
+}
+
+/**
+ * Install state of type 'type' for a dynamic session.
+ * The hash table contains two type of rules:
+ * - regular rules (O_KEEP_STATE)
+ * - rules for sessions with limited number of sess per user
+ * (O_LIMIT). When they are created, the parent is
+ * increased by 1, and decreased on delete. In this case,
+ * the third parameter is the parent rule and not the chain.
+ * - "parent" rules for the above (O_LIMIT_PARENT).
+ */
+static ipfw_dyn_rule *
+add_dyn_rule(struct ipfw_flow_id *id, int i, uint8_t dyn_type,
+ struct ip_fw *rule, uint16_t kidx)
+{
+ ipfw_dyn_rule *r;
+
+ IPFW_BUCK_ASSERT(i);
+
+ r = uma_zalloc(V_ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO);
+ if (r == NULL) {
+ if (last_log != time_uptime) {
+ last_log = time_uptime;
+ log(LOG_DEBUG,
+ "ipfw: Cannot allocate dynamic state, "
+ "consider increasing net.inet.ip.fw.dyn_max\n");
+ }
+ return NULL;
+ }
+ ipfw_dyn_count++;
+
+ /*
+ * refcount on parent is already incremented, so
+ * it is safe to use parent unlocked.
+ */
+ if (dyn_type == O_LIMIT) {
+ ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule;
+ if ( parent->dyn_type != O_LIMIT_PARENT)
+ panic("invalid parent");
+ r->parent = parent;
+ rule = parent->rule;
+ }
+
+ r->id = *id;
+ r->expire = time_uptime + V_dyn_syn_lifetime;
+ r->rule = rule;
+ r->dyn_type = dyn_type;
+ IPFW_ZERO_DYN_COUNTER(r);
+ r->count = 0;
+ r->kidx = kidx;
+ r->bucket = i;
+ r->next = V_ipfw_dyn_v[i].head;
+ V_ipfw_dyn_v[i].head = r;
+ DEB(print_dyn_rule(id, dyn_type, "add dyn entry", "total");)
+ return r;
+}
+
+/**
+ * lookup dynamic parent rule using pkt and rule as search keys.
+ * If the lookup fails, then install one.
+ */
+static ipfw_dyn_rule *
+lookup_dyn_parent(struct ipfw_flow_id *pkt, int *pindex, struct ip_fw *rule,
+ uint16_t kidx)
+{
+ ipfw_dyn_rule *q;
+ int i, is_v6;
+
+ is_v6 = IS_IP6_FLOW_ID(pkt);
+ i = hash_packet( pkt, V_curr_dyn_buckets );
+ *pindex = i;
+ IPFW_BUCK_LOCK(i);
+ for (q = V_ipfw_dyn_v[i].head ; q != NULL ; q=q->next)
+ if (q->dyn_type == O_LIMIT_PARENT &&
+ kidx == q->kidx &&
+ rule == q->rule &&
+ pkt->proto == q->id.proto &&
+ pkt->src_port == q->id.src_port &&
+ pkt->dst_port == q->id.dst_port &&
+ (
+ (is_v6 &&
+ IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
+ &(q->id.src_ip6)) &&
+ IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
+ &(q->id.dst_ip6))) ||
+ (!is_v6 &&
+ pkt->src_ip == q->id.src_ip &&
+ pkt->dst_ip == q->id.dst_ip)
+ )
+ ) {
+ q->expire = time_uptime + V_dyn_short_lifetime;
+ DEB(print_dyn_rule(pkt, q->dyn_type,
+ "lookup_dyn_parent found", "");)
+ return q;
+ }
+
+ /* Add virtual limiting rule */
+ return add_dyn_rule(pkt, i, O_LIMIT_PARENT, rule, kidx);
+}
+
+/**
+ * Install dynamic state for rule type cmd->o.opcode
+ *
+ * Returns 1 (failure) if state is not installed because of errors or because
+ * session limitations are enforced.
+ */
+int
+ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
+ ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg)
+{
+ ipfw_dyn_rule *q;
+ int i;
+
+ DEB(print_dyn_rule(&args->f_id, cmd->o.opcode, "install_state",
+ (cmd->o.arg1 == 0 ? "": DYN_STATE_OBJ(chain, &cmd->o)->name));)
+
+ i = hash_packet(&args->f_id, V_curr_dyn_buckets);
+
+ IPFW_BUCK_LOCK(i);
+
+ q = lookup_dyn_rule_locked(&args->f_id, i, NULL, NULL, cmd->o.arg1);
+ if (q != NULL) { /* should never occur */
+ DEB(
+ if (last_log != time_uptime) {
+ last_log = time_uptime;
+ printf("ipfw: %s: entry already present, done\n",
+ __func__);
+ })
+ IPFW_BUCK_UNLOCK(i);
+ return (0);
+ }
+
+ /*
+ * State limiting is done via uma(9) zone limiting.
+ * Save pointer to newly-installed rule and reject
+ * packet if add_dyn_rule() returned NULL.
+ * Note q is currently set to NULL.
+ */
+
+ switch (cmd->o.opcode) {
+ case O_KEEP_STATE: /* bidir rule */
+ q = add_dyn_rule(&args->f_id, i, O_KEEP_STATE, rule,
+ cmd->o.arg1);
+ break;
+
+ case O_LIMIT: { /* limit number of sessions */
+ struct ipfw_flow_id id;
+ ipfw_dyn_rule *parent;
+ uint32_t conn_limit;
+ uint16_t limit_mask = cmd->limit_mask;
+ int pindex;
+
+ conn_limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit);
+
+ DEB(
+ if (cmd->conn_limit == IP_FW_TARG)
+ printf("ipfw: %s: O_LIMIT rule, conn_limit: %u "
+ "(tablearg)\n", __func__, conn_limit);
+ else
+ printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n",
+ __func__, conn_limit);
+ )
+
+ id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
+ id.proto = args->f_id.proto;
+ id.addr_type = args->f_id.addr_type;
+ id.fib = M_GETFIB(args->m);
+
+ if (IS_IP6_FLOW_ID (&(args->f_id))) {
+ bzero(&id.src_ip6, sizeof(id.src_ip6));
+ bzero(&id.dst_ip6, sizeof(id.dst_ip6));
+
+ if (limit_mask & DYN_SRC_ADDR)
+ id.src_ip6 = args->f_id.src_ip6;
+ if (limit_mask & DYN_DST_ADDR)
+ id.dst_ip6 = args->f_id.dst_ip6;
+ } else {
+ if (limit_mask & DYN_SRC_ADDR)
+ id.src_ip = args->f_id.src_ip;
+ if (limit_mask & DYN_DST_ADDR)
+ id.dst_ip = args->f_id.dst_ip;
+ }
+ if (limit_mask & DYN_SRC_PORT)
+ id.src_port = args->f_id.src_port;
+ if (limit_mask & DYN_DST_PORT)
+ id.dst_port = args->f_id.dst_port;
+
+ /*
+ * We have to release lock for previous bucket to
+ * avoid possible deadlock
+ */
+ IPFW_BUCK_UNLOCK(i);
+
+ parent = lookup_dyn_parent(&id, &pindex, rule, cmd->o.arg1);
+ if (parent == NULL) {
+ printf("ipfw: %s: add parent failed\n", __func__);
+ IPFW_BUCK_UNLOCK(pindex);
+ return (1);
+ }
+
+ if (parent->count >= conn_limit) {
+ if (V_fw_verbose && last_log != time_uptime) {
+ last_log = time_uptime;
+ char sbuf[24];
+ last_log = time_uptime;
+ snprintf(sbuf, sizeof(sbuf),
+ "%d drop session",
+ parent->rule->rulenum);
+ print_dyn_rule_flags(&args->f_id,
+ cmd->o.opcode,
+ LOG_SECURITY | LOG_DEBUG,
+ sbuf, "too many entries");
+ }
+ IPFW_BUCK_UNLOCK(pindex);
+ return (1);
+ }
+ /* Increment counter on parent */
+ parent->count++;
+ IPFW_BUCK_UNLOCK(pindex);
+
+ IPFW_BUCK_LOCK(i);
+ q = add_dyn_rule(&args->f_id, i, O_LIMIT,
+ (struct ip_fw *)parent, cmd->o.arg1);
+ if (q == NULL) {
+ /* Decrement index and notify caller */
+ IPFW_BUCK_UNLOCK(i);
+ IPFW_BUCK_LOCK(pindex);
+ parent->count--;
+ IPFW_BUCK_UNLOCK(pindex);
+ return (1);
+ }
+ break;
+ }
+ default:
+ printf("ipfw: %s: unknown dynamic rule type %u\n",
+ __func__, cmd->o.opcode);
+ }
+
+ if (q == NULL) {
+ IPFW_BUCK_UNLOCK(i);
+ return (1); /* Notify caller about failure */
+ }
+
+ dyn_update_proto_state(q, &args->f_id, NULL, MATCH_FORWARD);
+ IPFW_BUCK_UNLOCK(i);
+ return (0);
+}
+
+/*
+ * Generate a TCP packet, containing either a RST or a keepalive.
+ * When flags & TH_RST, we are sending a RST packet, because of a
+ * "reset" action matched the packet.
+ * Otherwise we are sending a keepalive, and flags & TH_
+ * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
+ * so that MAC can label the reply appropriately.
+ */
+struct mbuf *
+ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
+ u_int32_t ack, int flags)
+{
+ struct mbuf *m = NULL; /* stupid compiler */
+ int len, dir;
+ struct ip *h = NULL; /* stupid compiler */
+#ifdef INET6
+ struct ip6_hdr *h6 = NULL;
+#endif
+ struct tcphdr *th = NULL;
+
+ MGETHDR(m, M_NOWAIT, MT_DATA);
+ if (m == NULL)
+ return (NULL);
+
+ M_SETFIB(m, id->fib);
+#ifdef MAC
+ if (replyto != NULL)
+ mac_netinet_firewall_reply(replyto, m);
+ else
+ mac_netinet_firewall_send(m);
+#else
+ (void)replyto; /* don't warn about unused arg */
+#endif
+
+ switch (id->addr_type) {
+ case 4:
+ len = sizeof(struct ip) + sizeof(struct tcphdr);
+ break;
+#ifdef INET6
+ case 6:
+ len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ break;
+#endif
+ default:
+ /* XXX: log me?!? */
+ FREE_PKT(m);
+ return (NULL);
+ }
+ dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
+
+ m->m_data += max_linkhdr;
+ m->m_flags |= M_SKIP_FIREWALL;
+ m->m_pkthdr.len = m->m_len = len;
+ m->m_pkthdr.rcvif = NULL;
+ bzero(m->m_data, len);
+
+ switch (id->addr_type) {
+ case 4:
+ h = mtod(m, struct ip *);
+
+ /* prepare for checksum */
+ h->ip_p = IPPROTO_TCP;
+ h->ip_len = htons(sizeof(struct tcphdr));
+ if (dir) {
+ h->ip_src.s_addr = htonl(id->src_ip);
+ h->ip_dst.s_addr = htonl(id->dst_ip);
+ } else {
+ h->ip_src.s_addr = htonl(id->dst_ip);
+ h->ip_dst.s_addr = htonl(id->src_ip);
+ }
+
+ th = (struct tcphdr *)(h + 1);
+ break;
+#ifdef INET6
+ case 6:
+ h6 = mtod(m, struct ip6_hdr *);
+
+ /* prepare for checksum */
+ h6->ip6_nxt = IPPROTO_TCP;
+ h6->ip6_plen = htons(sizeof(struct tcphdr));
+ if (dir) {
+ h6->ip6_src = id->src_ip6;
+ h6->ip6_dst = id->dst_ip6;
+ } else {
+ h6->ip6_src = id->dst_ip6;
+ h6->ip6_dst = id->src_ip6;
+ }
+
+ th = (struct tcphdr *)(h6 + 1);
+ break;
+#endif
+ }
+
+ if (dir) {
+ th->th_sport = htons(id->src_port);
+ th->th_dport = htons(id->dst_port);
+ } else {
+ th->th_sport = htons(id->dst_port);
+ th->th_dport = htons(id->src_port);
+ }
+ th->th_off = sizeof(struct tcphdr) >> 2;
+
+ if (flags & TH_RST) {
+ if (flags & TH_ACK) {
+ th->th_seq = htonl(ack);
+ th->th_flags = TH_RST;
+ } else {
+ if (flags & TH_SYN)
+ seq++;
+ th->th_ack = htonl(seq);
+ th->th_flags = TH_RST | TH_ACK;
+ }
+ } else {
+ /*
+ * Keepalive - use caller provided sequence numbers
+ */
+ th->th_seq = htonl(seq);
+ th->th_ack = htonl(ack);
+ th->th_flags = TH_ACK;
+ }
+
+ switch (id->addr_type) {
+ case 4:
+ th->th_sum = in_cksum(m, len);
+
+ /* finish the ip header */
+ h->ip_v = 4;
+ h->ip_hl = sizeof(*h) >> 2;
+ h->ip_tos = IPTOS_LOWDELAY;
+ h->ip_off = htons(0);
+ h->ip_len = htons(len);
+ h->ip_ttl = V_ip_defttl;
+ h->ip_sum = 0;
+ break;
+#ifdef INET6
+ case 6:
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
+ sizeof(struct tcphdr));
+
+ /* finish the ip6 header */
+ h6->ip6_vfc |= IPV6_VERSION;
+ h6->ip6_hlim = IPV6_DEFHLIM;
+ break;
+#endif
+ }
+
+ return (m);
+}
+
+/*
+ * Queue keepalive packets for given dynamic rule
+ */
+static struct mbuf **
+ipfw_dyn_send_ka(struct mbuf **mtailp, ipfw_dyn_rule *q)
+{
+ struct mbuf *m_rev, *m_fwd;
+
+ m_rev = (q->state & ACK_REV) ? NULL :
+ ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN);
+ m_fwd = (q->state & ACK_FWD) ? NULL :
+ ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, q->ack_rev, 0);
+
+ if (m_rev != NULL) {
+ *mtailp = m_rev;
+ mtailp = &(*mtailp)->m_nextpkt;
+ }
+ if (m_fwd != NULL) {
+ *mtailp = m_fwd;
+ mtailp = &(*mtailp)->m_nextpkt;
+ }
+
+ return (mtailp);
+}
+
+/*
+ * This procedure is used to perform various maintenance
+ * on dynamic hash list. Currently it is called every second.
+ */
+static void
+ipfw_dyn_tick(void * vnetx)
+{
+ struct ip_fw_chain *chain;
+ int check_ka = 0;
+#ifdef VIMAGE
+ struct vnet *vp = vnetx;
+#endif
+
+ CURVNET_SET(vp);
+
+ chain = &V_layer3_chain;
+
+ /* Run keepalive checks every keepalive_period iff ka is enabled */
+ if ((V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) &&
+ (V_dyn_keepalive != 0)) {
+ V_dyn_keepalive_last = time_uptime;
+ check_ka = 1;
+ }
+
+ check_dyn_rules(chain, NULL, check_ka, 1);
+
+ callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0);
+
+ CURVNET_RESTORE();
+}
+
+
+/*
+ * Walk through all dynamic states doing generic maintenance:
+ * 1) free expired states
+ * 2) free all states based on deleted rule / set
+ * 3) send keepalives for states if needed
+ *
+ * @chain - pointer to current ipfw rules chain
+ * @rule - delete all states originated by given rule if != NULL
+ * @set - delete all states originated by any rule in set @set if != RESVD_SET
+ * @check_ka - perform checking/sending keepalives
+ * @timer - indicate call from timer routine.
+ *
+ * Timer routine must call this function unlocked to permit
+ * sending keepalives/resizing table.
+ *
+ * Others has to call function with IPFW_UH_WLOCK held.
+ * Additionally, function assume that dynamic rule/set is
+ * ALREADY deleted so no new states can be generated by
+ * 'deleted' rules.
+ *
+ * Write lock is needed to ensure that unused parent rules
+ * are not freed by other instance (see stage 2, 3)
+ */
+static void
+check_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt,
+ int check_ka, int timer)
+{
+ struct mbuf *m0, *m, *mnext, **mtailp;
+ struct ip *h;
+ int i, dyn_count, new_buckets = 0, max_buckets;
+ int expired = 0, expired_limits = 0, parents = 0, total = 0;
+ ipfw_dyn_rule *q, *q_prev, *q_next;
+ ipfw_dyn_rule *exp_head, **exptailp;
+ ipfw_dyn_rule *exp_lhead, **expltailp;
+
+ KASSERT(V_ipfw_dyn_v != NULL, ("%s: dynamic table not allocated",
+ __func__));
+
+ /* Avoid possible LOR */
+ KASSERT(!check_ka || timer, ("%s: keepalive check with lock held",
+ __func__));
+
+ /*
+ * Do not perform any checks if we currently have no dynamic states
+ */
+ if (DYN_COUNT == 0)
+ return;
+
+ /* Expired states */
+ exp_head = NULL;
+ exptailp = &exp_head;
+
+ /* Expired limit states */
+ exp_lhead = NULL;
+ expltailp = &exp_lhead;
+
+ /*
+ * We make a chain of packets to go out here -- not deferring
+ * until after we drop the IPFW dynamic rule lock would result
+ * in a lock order reversal with the normal packet input -> ipfw
+ * call stack.
+ */
+ m0 = NULL;
+ mtailp = &m0;
+
+ /* Protect from hash resizing */
+ if (timer != 0)
+ IPFW_UH_WLOCK(chain);
+ else
+ IPFW_UH_WLOCK_ASSERT(chain);
+
+#define NEXT_RULE() { q_prev = q; q = q->next ; continue; }
+
+ /* Stage 1: perform requested deletion */
+ for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
+ IPFW_BUCK_LOCK(i);
+ for (q = V_ipfw_dyn_v[i].head, q_prev = q; q ; ) {
+ /* account every rule */
+ total++;
+
+ /* Skip parent rules at all */
+ if (q->dyn_type == O_LIMIT_PARENT) {
+ parents++;
+ NEXT_RULE();
+ }
+
+ /*
+ * Remove rules which are:
+ * 1) expired
+ * 2) matches deletion range
+ */
+ if ((TIME_LEQ(q->expire, time_uptime)) ||
+ (rt != NULL && ipfw_match_range(q->rule, rt))) {
+ if (TIME_LE(time_uptime, q->expire) &&
+ q->dyn_type == O_KEEP_STATE &&
+ V_dyn_keep_states != 0) {
+ /*
+ * Do not delete state if
+ * it is not expired and
+ * dyn_keep_states is ON.
+ * However we need to re-link it
+ * to any other stable rule
+ */
+ q->rule = chain->default_rule;
+ NEXT_RULE();
+ }
+
+ /* Unlink q from current list */
+ q_next = q->next;
+ if (q == V_ipfw_dyn_v[i].head)
+ V_ipfw_dyn_v[i].head = q_next;
+ else
+ q_prev->next = q_next;
+
+ q->next = NULL;
+
+ /* queue q to expire list */
+ if (q->dyn_type != O_LIMIT) {
+ *exptailp = q;
+ exptailp = &(*exptailp)->next;
+ DEB(print_dyn_rule(&q->id, q->dyn_type,
+ "unlink entry", "left");
+ )
+ } else {
+ /* Separate list for limit rules */
+ *expltailp = q;
+ expltailp = &(*expltailp)->next;
+ expired_limits++;
+ DEB(print_dyn_rule(&q->id, q->dyn_type,
+ "unlink limit entry", "left");
+ )
+ }
+
+ q = q_next;
+ expired++;
+ continue;
+ }
+
+ /*
+ * Check if we need to send keepalive:
+ * we need to ensure if is time to do KA,
+ * this is established TCP session, and
+ * expire time is within keepalive interval
+ */
+ if ((check_ka != 0) && (q->id.proto == IPPROTO_TCP) &&
+ ((q->state & BOTH_SYN) == BOTH_SYN) &&
+ (TIME_LEQ(q->expire, time_uptime +
+ V_dyn_keepalive_interval)))
+ mtailp = ipfw_dyn_send_ka(mtailp, q);
+
+ NEXT_RULE();
+ }
+ IPFW_BUCK_UNLOCK(i);
+ }
+
+ /* Stage 2: decrement counters from O_LIMIT parents */
+ if (expired_limits != 0) {
+ /*
+ * XXX: Note that deleting set with more than one
+ * heavily-used LIMIT rules can result in overwhelming
+ * locking due to lack of per-hash value sorting
+ *
+ * We should probably think about:
+ * 1) pre-allocating hash of size, say,
+ * MAX(16, V_curr_dyn_buckets / 1024)
+ * 2) checking if expired_limits is large enough
+ * 3) If yes, init hash (or its part), re-link
+ * current list and start decrementing procedure in
+ * each bucket separately
+ */
+
+ /*
+ * Small optimization: do not unlock bucket until
+ * we see the next item resides in different bucket
+ */
+ if (exp_lhead != NULL) {
+ i = exp_lhead->parent->bucket;
+ IPFW_BUCK_LOCK(i);
+ }
+ for (q = exp_lhead; q != NULL; q = q->next) {
+ if (i != q->parent->bucket) {
+ IPFW_BUCK_UNLOCK(i);
+ i = q->parent->bucket;
+ IPFW_BUCK_LOCK(i);
+ }
+
+ /* Decrease parent refcount */
+ q->parent->count--;
+ }
+ if (exp_lhead != NULL)
+ IPFW_BUCK_UNLOCK(i);
+ }
+
+ /*
+ * We protectet ourselves from unused parent deletion
+ * (from the timer function) by holding UH write lock.
+ */
+
+ /* Stage 3: remove unused parent rules */
+ if ((parents != 0) && (expired != 0)) {
+ for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
+ IPFW_BUCK_LOCK(i);
+ for (q = V_ipfw_dyn_v[i].head, q_prev = q ; q ; ) {
+ if (q->dyn_type != O_LIMIT_PARENT)
+ NEXT_RULE();
+
+ if (q->count != 0)
+ NEXT_RULE();
+
+ /* Parent rule without consumers */
+
+ /* Unlink q from current list */
+ q_next = q->next;
+ if (q == V_ipfw_dyn_v[i].head)
+ V_ipfw_dyn_v[i].head = q_next;
+ else
+ q_prev->next = q_next;
+
+ q->next = NULL;
+
+ /* Add to expired list */
+ *exptailp = q;
+ exptailp = &(*exptailp)->next;
+
+ DEB(print_dyn_rule(&q->id, q->dyn_type,
+ "unlink parent entry", "left");
+ )
+
+ expired++;
+
+ q = q_next;
+ }
+ IPFW_BUCK_UNLOCK(i);
+ }
+ }
+
+#undef NEXT_RULE
+
+ if (timer != 0) {
+ /*
+ * Check if we need to resize hash:
+ * if current number of states exceeds number of buckes in hash,
+ * grow hash size to the minimum power of 2 which is bigger than
+ * current states count. Limit hash size by 64k.
+ */
+ max_buckets = (V_dyn_buckets_max > 65536) ?
+ 65536 : V_dyn_buckets_max;
+
+ dyn_count = DYN_COUNT;
+
+ if ((dyn_count > V_curr_dyn_buckets * 2) &&
+ (dyn_count < max_buckets)) {
+ new_buckets = V_curr_dyn_buckets;
+ while (new_buckets < dyn_count) {
+ new_buckets *= 2;
+
+ if (new_buckets >= max_buckets)
+ break;
+ }
+ }
+
+ IPFW_UH_WUNLOCK(chain);
+ }
+
+ /* Finally delete old states ad limits if any */
+ for (q = exp_head; q != NULL; q = q_next) {
+ q_next = q->next;
+ uma_zfree(V_ipfw_dyn_rule_zone, q);
+ ipfw_dyn_count--;
+ }
+
+ for (q = exp_lhead; q != NULL; q = q_next) {
+ q_next = q->next;
+ uma_zfree(V_ipfw_dyn_rule_zone, q);
+ ipfw_dyn_count--;
+ }
+
+ /*
+ * The rest code MUST be called from timer routine only
+ * without holding any locks
+ */
+ if (timer == 0)
+ return;
+
+ /* Send keepalive packets if any */
+ for (m = m0; m != NULL; m = mnext) {
+ mnext = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ h = mtod(m, struct ip *);
+ if (h->ip_v == 4)
+ ip_output(m, NULL, NULL, 0, NULL, NULL);
+#ifdef INET6
+ else
+ ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
+#endif
+ }
+
+ /* Run table resize without holding any locks */
+ if (new_buckets != 0)
+ resize_dynamic_table(chain, new_buckets);
+}
+
+/*
+ * Deletes all dynamic rules originated by given rule or all rules in
+ * given set. Specify RESVD_SET to indicate set should not be used.
+ * @chain - pointer to current ipfw rules chain
+ * @rr - delete all states originated by rules in matched range.
+ *
+ * Function has to be called with IPFW_UH_WLOCK held.
+ * Additionally, function assume that dynamic rule/set is
+ * ALREADY deleted so no new states can be generated by
+ * 'deleted' rules.
+ */
+void
+ipfw_expire_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
+{
+
+ check_dyn_rules(chain, rt, 0, 0);
+}
+
+/*
+ * Check if rule contains at least one dynamic opcode.
+ *
+ * Returns 1 if such opcode is found, 0 otherwise.
+ */
+int
+ipfw_is_dyn_rule(struct ip_fw *rule)
+{
+ int cmdlen, l;
+ ipfw_insn *cmd;
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ switch (cmd->opcode) {
+ case O_LIMIT:
+ case O_KEEP_STATE:
+ case O_PROBE_STATE:
+ case O_CHECK_STATE:
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+void
+ipfw_dyn_init(struct ip_fw_chain *chain)
+{
+
+ V_ipfw_dyn_v = NULL;
+ V_dyn_buckets_max = 256; /* must be power of 2 */
+ V_curr_dyn_buckets = 256; /* must be power of 2 */
+
+ V_dyn_ack_lifetime = 300;
+ V_dyn_syn_lifetime = 20;
+ V_dyn_fin_lifetime = 1;
+ V_dyn_rst_lifetime = 1;
+ V_dyn_udp_lifetime = 10;
+ V_dyn_short_lifetime = 5;
+
+ V_dyn_keepalive_interval = 20;
+ V_dyn_keepalive_period = 5;
+ V_dyn_keepalive = 1; /* do send keepalives */
+ V_dyn_keepalive_last = time_uptime;
+
+ V_dyn_max = 16384; /* max # of dynamic rules */
+
+ V_ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule",
+ sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+
+ /* Enforce limit on dynamic rules */
+ uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max);
+
+ callout_init(&V_ipfw_timeout, 1);
+
+ /*
+ * This can potentially be done on first dynamic rule
+ * being added to chain.
+ */
+ resize_dynamic_table(chain, V_curr_dyn_buckets);
+ IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes);
+}
+
+void
+ipfw_dyn_uninit(int pass)
+{
+ int i;
+
+ if (pass == 0) {
+ callout_drain(&V_ipfw_timeout);
+ return;
+ }
+ IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes);
+
+ if (V_ipfw_dyn_v != NULL) {
+ /*
+ * Skip deleting all dynamic states -
+ * uma_zdestroy() does this more efficiently;
+ */
+
+ /* Destroy all mutexes */
+ for (i = 0 ; i < V_curr_dyn_buckets ; i++)
+ IPFW_BUCK_LOCK_DESTROY(&V_ipfw_dyn_v[i]);
+ free(V_ipfw_dyn_v, M_IPFW);
+ V_ipfw_dyn_v = NULL;
+ }
+
+ uma_zdestroy(V_ipfw_dyn_rule_zone);
+}
+
+#ifdef SYSCTL_NODE
+/*
+ * Get/set maximum number of dynamic states in given VNET instance.
+ */
+static int
+sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int nstates;
+
+ nstates = V_dyn_max;
+
+ error = sysctl_handle_int(oidp, &nstates, 0, req);
+ /* Read operation or some error */
+ if ((error != 0) || (req->newptr == NULL))
+ return (error);
+
+ V_dyn_max = nstates;
+ uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max);
+
+ return (0);
+}
+
+/*
+ * Get current number of dynamic states in given VNET instance.
+ */
+static int
+sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int nstates;
+
+ nstates = DYN_COUNT;
+
+ error = sysctl_handle_int(oidp, &nstates, 0, req);
+
+ return (error);
+}
+#endif
+
+/*
+ * Returns size of dynamic states in legacy format
+ */
+int
+ipfw_dyn_len(void)
+{
+
+ return (V_ipfw_dyn_v == NULL) ? 0 :
+ (DYN_COUNT * sizeof(ipfw_dyn_rule));
+}
+
+/*
+ * Returns number of dynamic states.
+ * Used by dump format v1 (current).
+ */
+int
+ipfw_dyn_get_count(void)
+{
+
+ return (V_ipfw_dyn_v == NULL) ? 0 : DYN_COUNT;
+}
+
+static void
+export_dyn_rule(ipfw_dyn_rule *src, ipfw_dyn_rule *dst)
+{
+
+ memcpy(dst, src, sizeof(*src));
+ memcpy(&(dst->rule), &(src->rule->rulenum), sizeof(src->rule->rulenum));
+ /*
+ * store set number into high word of
+ * dst->rule pointer.
+ */
+ memcpy((char *)&dst->rule + sizeof(src->rule->rulenum),
+ &(src->rule->set), sizeof(src->rule->set));
+ /*
+ * store a non-null value in "next".
+ * The userland code will interpret a
+ * NULL here as a marker
+ * for the last dynamic rule.
+ */
+ memcpy(&dst->next, &dst, sizeof(dst));
+ dst->expire =
+ TIME_LEQ(dst->expire, time_uptime) ? 0 : dst->expire - time_uptime;
+}
+
+/*
+ * Fills int buffer given by @sd with dynamic states.
+ * Used by dump format v1 (current).
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd)
+{
+ ipfw_dyn_rule *p;
+ ipfw_obj_dyntlv *dst, *last;
+ ipfw_obj_ctlv *ctlv;
+ int i;
+ size_t sz;
+
+ if (V_ipfw_dyn_v == NULL)
+ return (0);
+
+ IPFW_UH_RLOCK_ASSERT(chain);
+
+ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
+ if (ctlv == NULL)
+ return (ENOMEM);
+ sz = sizeof(ipfw_obj_dyntlv);
+ ctlv->head.type = IPFW_TLV_DYNSTATE_LIST;
+ ctlv->objsize = sz;
+ last = NULL;
+
+ for (i = 0 ; i < V_curr_dyn_buckets; i++) {
+ IPFW_BUCK_LOCK(i);
+ for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) {
+ dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, sz);
+ if (dst == NULL) {
+ IPFW_BUCK_UNLOCK(i);
+ return (ENOMEM);
+ }
+
+ export_dyn_rule(p, &dst->state);
+ dst->head.length = sz;
+ dst->head.type = IPFW_TLV_DYN_ENT;
+ last = dst;
+ }
+ IPFW_BUCK_UNLOCK(i);
+ }
+
+ if (last != NULL) /* mark last dynamic rule */
+ last->head.flags = IPFW_DF_LAST;
+
+ return (0);
+}
+
+/*
+ * Fill given buffer with dynamic states (legacy format).
+ * IPFW_UH_RLOCK has to be held while calling.
+ */
+void
+ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep)
+{
+ ipfw_dyn_rule *p, *last = NULL;
+ char *bp;
+ int i;
+
+ if (V_ipfw_dyn_v == NULL)
+ return;
+ bp = *pbp;
+
+ IPFW_UH_RLOCK_ASSERT(chain);
+
+ for (i = 0 ; i < V_curr_dyn_buckets; i++) {
+ IPFW_BUCK_LOCK(i);
+ for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) {
+ if (bp + sizeof *p <= ep) {
+ ipfw_dyn_rule *dst =
+ (ipfw_dyn_rule *)bp;
+
+ export_dyn_rule(p, dst);
+ last = dst;
+ bp += sizeof(ipfw_dyn_rule);
+ }
+ }
+ IPFW_BUCK_UNLOCK(i);
+ }
+
+ if (last != NULL) /* mark last dynamic rule */
+ bzero(&last->next, sizeof(last));
+ *pbp = bp;
+}
+/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c b/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c
new file mode 100644
index 00000000..2c6ba8b9
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c
@@ -0,0 +1,383 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/hash.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/queue.h>
+#include <net/pfil.h>
+
+#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#include <rtems/bsd/local/opt_ipfw.h>
+
+/*
+ * External actions support for ipfw.
+ *
+ * This code provides KPI for implementing loadable modules, that
+ * can provide handlers for external action opcodes in the ipfw's
+ * rules.
+ * Module should implement opcode handler with type ipfw_eaction_t.
+ * This handler will be called by ipfw_chk() function when
+ * O_EXTERNAL_ACTION opcode will be matched. The handler must return
+ * value used as return value in ipfw_chk(), i.e. IP_FW_PASS,
+ * IP_FW_DENY (see ip_fw_private.h).
+ * Also the last argument must be set by handler. If it is zero,
+ * the search continues to the next rule. If it has non zero value,
+ * the search terminates.
+ *
+ * The module that implements external action should register its
+ * handler and name with ipfw_add_eaction() function.
+ * This function will return eaction_id, that can be used by module.
+ *
+ * It is possible to pass some additional information to external
+ * action handler via the O_EXTERNAL_INSTANCE opcode. This opcode
+ * will be next after the O_EXTERNAL_ACTION opcode. cmd->arg1 will
+ * contain index of named object related to instance of external action.
+ *
+ * In case when eaction module uses named instances, it should register
+ * opcode rewriting routines for O_EXTERNAL_INSTANCE opcode. The
+ * classifier callback can look back into O_EXTERNAL_ACTION opcode (it
+ * must be in the (ipfw_insn *)(cmd - 1)). By arg1 from O_EXTERNAL_ACTION
+ * it can deteremine eaction_id and compare it with its own.
+ * The macro IPFW_TLV_EACTION_NAME(eaction_id) can be used to deteremine
+ * the type of named_object related to external action instance.
+ *
+ * On module unload handler should be deregistered with ipfw_del_eaction()
+ * function using known eaction_id.
+ */
+
+struct eaction_obj {
+ struct named_object no;
+ ipfw_eaction_t *handler;
+ char name[64];
+};
+
+#define EACTION_OBJ(ch, cmd) \
+ ((struct eaction_obj *)SRV_OBJECT((ch), (cmd)->arg1))
+
+#if 0
+#define EACTION_DEBUG(fmt, ...) do { \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#else
+#define EACTION_DEBUG(fmt, ...)
+#endif
+
+const char *default_eaction_typename = "drop";
+static int
+default_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+
+ *done = 1; /* terminate the search */
+ return (IP_FW_DENY);
+}
+
+/*
+ * Opcode rewriting callbacks.
+ */
+static int
+eaction_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+
+ EACTION_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1);
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+eaction_update(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+ EACTION_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
+}
+
+static int
+eaction_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ ipfw_obj_ntlv *ntlv;
+
+ if (ti->tlvs == NULL)
+ return (EINVAL);
+
+ /* Search ntlv in the buffer provided by user */
+ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+ IPFW_TLV_EACTION);
+ if (ntlv == NULL)
+ return (EINVAL);
+ EACTION_DEBUG("name %s, uidx %u, type %u", ntlv->name,
+ ti->uidx, ti->type);
+ /*
+ * Search named object with corresponding name.
+ * Since eaction objects are global - ignore the set value
+ * and use zero instead.
+ */
+ *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch),
+ 0, IPFW_TLV_EACTION, ntlv->name);
+ if (*pno == NULL)
+ return (ESRCH);
+ return (0);
+}
+
+static struct named_object *
+eaction_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+
+ EACTION_DEBUG("kidx %u", idx);
+ return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx));
+}
+
+static struct opcode_obj_rewrite eaction_opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_ACTION,
+ .etlv = IPFW_TLV_EACTION,
+ .classifier = eaction_classify,
+ .update = eaction_update,
+ .find_byname = eaction_findbyname,
+ .find_bykidx = eaction_findbykidx,
+ },
+};
+
+static int
+create_eaction_obj(struct ip_fw_chain *ch, ipfw_eaction_t handler,
+ const char *name, uint16_t *eaction_id)
+{
+ struct namedobj_instance *ni;
+ struct eaction_obj *obj;
+
+ IPFW_UH_UNLOCK_ASSERT(ch);
+
+ ni = CHAIN_TO_SRV(ch);
+ obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO);
+ obj->no.name = obj->name;
+ obj->no.etlv = IPFW_TLV_EACTION;
+ obj->handler = handler;
+ strlcpy(obj->name, name, sizeof(obj->name));
+
+ IPFW_UH_WLOCK(ch);
+ if (ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_EACTION,
+ name) != NULL) {
+ /*
+ * Object is already created.
+ * We don't allow eactions with the same name.
+ */
+ IPFW_UH_WUNLOCK(ch);
+ free(obj, M_IPFW);
+ EACTION_DEBUG("External action with typename "
+ "'%s' already exists", name);
+ return (EEXIST);
+ }
+ if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ free(obj, M_IPFW);
+ EACTION_DEBUG("alloc_idx failed");
+ return (ENOSPC);
+ }
+ ipfw_objhash_add(ni, &obj->no);
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, obj->no.kidx) = obj;
+ IPFW_WUNLOCK(ch);
+ obj->no.refcnt++;
+ IPFW_UH_WUNLOCK(ch);
+
+ if (eaction_id != NULL)
+ *eaction_id = obj->no.kidx;
+ return (0);
+}
+
+static void
+destroy_eaction_obj(struct ip_fw_chain *ch, struct named_object *no)
+{
+ struct namedobj_instance *ni;
+ struct eaction_obj *obj;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ni = CHAIN_TO_SRV(ch);
+ IPFW_WLOCK(ch);
+ obj = SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+ ipfw_objhash_del(ni, no);
+ ipfw_objhash_free_idx(ni, no->kidx);
+ free(obj, M_IPFW);
+}
+
+/*
+ * Resets all eaction opcodes to default handlers.
+ */
+static void
+reset_eaction_obj(struct ip_fw_chain *ch, uint16_t eaction_id)
+{
+ struct named_object *no;
+ struct ip_fw *rule;
+ ipfw_insn *cmd;
+ int i;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ no = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0,
+ IPFW_TLV_EACTION, default_eaction_typename);
+ if (no == NULL)
+ panic("Default external action handler is not found");
+ if (eaction_id == no->kidx)
+ panic("Wrong eaction_id");
+ EACTION_DEBUG("replace id %u with %u", eaction_id, no->kidx);
+ IPFW_WLOCK(ch);
+ for (i = 0; i < ch->n_rules; i++) {
+ rule = ch->map[i];
+ cmd = ACTION_PTR(rule);
+ if (cmd->opcode != O_EXTERNAL_ACTION)
+ continue;
+ if (cmd->arg1 != eaction_id)
+ continue;
+ cmd->arg1 = no->kidx; /* Set to default id */
+ /*
+ * XXX: we only bump refcount on default_eaction.
+ * Refcount on the original object will be just
+ * ignored on destroy. But on default_eaction it
+ * will be decremented on rule deletion.
+ */
+ no->refcnt++;
+ /*
+ * Since named_object related to this instance will be
+ * also destroyed, truncate the chain of opcodes to
+ * remove O_EXTERNAL_INSTANCE opcode.
+ */
+ if (rule->act_ofs < rule->cmd_len - 1) {
+ EACTION_DEBUG("truncate rule %d", rule->rulenum);
+ rule->cmd_len--;
+ }
+ }
+ IPFW_WUNLOCK(ch);
+}
+
+/*
+ * Initialize external actions framework.
+ * Create object with default eaction handler "drop".
+ */
+int
+ipfw_eaction_init(struct ip_fw_chain *ch, int first)
+{
+ int error;
+
+ error = create_eaction_obj(ch, default_eaction,
+ default_eaction_typename, NULL);
+ if (error != 0)
+ return (error);
+ IPFW_ADD_OBJ_REWRITER(first, eaction_opcodes);
+ EACTION_DEBUG("External actions support initialized");
+ return (0);
+}
+
+void
+ipfw_eaction_uninit(struct ip_fw_chain *ch, int last)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ ni = CHAIN_TO_SRV(ch);
+
+ IPFW_UH_WLOCK(ch);
+ no = ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_EACTION,
+ default_eaction_typename);
+ if (no != NULL)
+ destroy_eaction_obj(ch, no);
+ IPFW_UH_WUNLOCK(ch);
+ IPFW_DEL_OBJ_REWRITER(last, eaction_opcodes);
+ EACTION_DEBUG("External actions support uninitialized");
+}
+
+/*
+ * Registers external action handler to the global array.
+ * On success it returns eaction id, otherwise - zero.
+ */
+uint16_t
+ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
+ const char *name)
+{
+ uint16_t eaction_id;
+
+ eaction_id = 0;
+ if (ipfw_check_object_name_generic(name) == 0) {
+ create_eaction_obj(ch, handler, name, &eaction_id);
+ EACTION_DEBUG("Registered external action '%s' with id %u",
+ name, eaction_id);
+ }
+ return (eaction_id);
+}
+
+/*
+ * Deregisters external action handler with id eaction_id.
+ */
+int
+ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id)
+{
+ struct named_object *no;
+
+ IPFW_UH_WLOCK(ch);
+ no = ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), eaction_id);
+ if (no == NULL || no->etlv != IPFW_TLV_EACTION) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EINVAL);
+ }
+ if (no->refcnt > 1)
+ reset_eaction_obj(ch, eaction_id);
+ EACTION_DEBUG("External action '%s' with id %u unregistered",
+ no->name, eaction_id);
+ destroy_eaction_obj(ch, no);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+int
+ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+
+ return (EACTION_OBJ(ch, cmd)->handler(ch, args, cmd, done));
+}
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_iface.c b/freebsd/sys/netpfil/ipfw/ip_fw_iface.c
new file mode 100644
index 00000000..f8973a91
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_iface.c
@@ -0,0 +1,541 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 Yandex LLC.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Kernel interface tracking API.
+ *
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/eventhandler.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+
+#define CHAIN_TO_II(ch) ((struct namedobj_instance *)ch->ifcfg)
+
+#define DEFAULT_IFACES 128
+
+static void handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+ uint16_t ifindex);
+static void handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+ uint16_t ifindex);
+static int list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_XIFLIST, 0, HDIR_GET, list_ifaces },
+};
+
+/*
+ * FreeBSD Kernel interface.
+ */
+static void ipfw_kifhandler(void *arg, struct ifnet *ifp);
+static int ipfw_kiflookup(char *name);
+static void iface_khandler_register(void);
+static void iface_khandler_deregister(void);
+
+static eventhandler_tag ipfw_ifdetach_event, ipfw_ifattach_event;
+static int num_vnets = 0;
+static struct mtx vnet_mtx;
+
+/*
+ * Checks if kernel interface is contained in our tracked
+ * interface list and calls attach/detach handler.
+ */
+static void
+ipfw_kifhandler(void *arg, struct ifnet *ifp)
+{
+ struct ip_fw_chain *ch;
+ struct ipfw_iface *iif;
+ struct namedobj_instance *ii;
+ uintptr_t htype;
+
+ if (V_ipfw_vnet_ready == 0)
+ return;
+
+ ch = &V_layer3_chain;
+ htype = (uintptr_t)arg;
+
+ IPFW_UH_WLOCK(ch);
+ ii = CHAIN_TO_II(ch);
+ if (ii == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return;
+ }
+ iif = (struct ipfw_iface*)ipfw_objhash_lookup_name(ii, 0,
+ if_name(ifp));
+ if (iif != NULL) {
+ if (htype == 1)
+ handle_ifattach(ch, iif, ifp->if_index);
+ else
+ handle_ifdetach(ch, iif, ifp->if_index);
+ }
+ IPFW_UH_WUNLOCK(ch);
+}
+
+/*
+ * Reference current VNET as iface tracking API user.
+ * Registers interface tracking handlers for first VNET.
+ */
+static void
+iface_khandler_register()
+{
+ int create;
+
+ create = 0;
+
+ mtx_lock(&vnet_mtx);
+ if (num_vnets == 0)
+ create = 1;
+ num_vnets++;
+ mtx_unlock(&vnet_mtx);
+
+ if (create == 0)
+ return;
+
+ printf("IPFW: starting up interface tracker\n");
+
+ ipfw_ifdetach_event = EVENTHANDLER_REGISTER(
+ ifnet_departure_event, ipfw_kifhandler, NULL,
+ EVENTHANDLER_PRI_ANY);
+ ipfw_ifattach_event = EVENTHANDLER_REGISTER(
+ ifnet_arrival_event, ipfw_kifhandler, (void*)((uintptr_t)1),
+ EVENTHANDLER_PRI_ANY);
+}
+
+/*
+ *
+ * Detach interface event handlers on last VNET instance
+ * detach.
+ */
+static void
+iface_khandler_deregister()
+{
+ int destroy;
+
+ destroy = 0;
+ mtx_lock(&vnet_mtx);
+ if (num_vnets == 1)
+ destroy = 1;
+ num_vnets--;
+ mtx_unlock(&vnet_mtx);
+
+ if (destroy == 0)
+ return;
+
+ EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
+ ipfw_ifattach_event);
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+ ipfw_ifdetach_event);
+}
+
+/*
+ * Retrieves ifindex for given @name.
+ *
+ * Returns ifindex or 0.
+ */
+static int
+ipfw_kiflookup(char *name)
+{
+ struct ifnet *ifp;
+ int ifindex;
+
+ ifindex = 0;
+
+ if ((ifp = ifunit_ref(name)) != NULL) {
+ ifindex = ifp->if_index;
+ if_rele(ifp);
+ }
+
+ return (ifindex);
+}
+
+/*
+ * Global ipfw startup hook.
+ * Since we perform lazy initialization, do nothing except
+ * mutex init.
+ */
+int
+ipfw_iface_init()
+{
+
+ mtx_init(&vnet_mtx, "IPFW ifhandler mtx", NULL, MTX_DEF);
+ IPFW_ADD_SOPT_HANDLER(1, scodes);
+ return (0);
+}
+
+/*
+ * Global ipfw destroy hook.
+ * Unregister khandlers iff init has been done.
+ */
+void
+ipfw_iface_destroy()
+{
+
+ IPFW_DEL_SOPT_HANDLER(1, scodes);
+ mtx_destroy(&vnet_mtx);
+}
+
+/*
+ * Perform actual init on internal request.
+ * Inits both namehash and global khandler.
+ */
+static void
+vnet_ipfw_iface_init(struct ip_fw_chain *ch)
+{
+ struct namedobj_instance *ii;
+
+ ii = ipfw_objhash_create(DEFAULT_IFACES);
+ IPFW_UH_WLOCK(ch);
+ if (ch->ifcfg == NULL) {
+ ch->ifcfg = ii;
+ ii = NULL;
+ }
+ IPFW_UH_WUNLOCK(ch);
+
+ if (ii != NULL) {
+ /* Already initialized. Free namehash. */
+ ipfw_objhash_destroy(ii);
+ } else {
+ /* We're the first ones. Init kernel hooks. */
+ iface_khandler_register();
+ }
+}
+
+static int
+destroy_iface(struct namedobj_instance *ii, struct named_object *no,
+ void *arg)
+{
+
+ /* Assume all consumers have been already detached */
+ free(no, M_IPFW);
+ return (0);
+}
+
+/*
+ * Per-VNET ipfw detach hook.
+ *
+ */
+void
+vnet_ipfw_iface_destroy(struct ip_fw_chain *ch)
+{
+ struct namedobj_instance *ii;
+
+ IPFW_UH_WLOCK(ch);
+ ii = CHAIN_TO_II(ch);
+ ch->ifcfg = NULL;
+ IPFW_UH_WUNLOCK(ch);
+
+ if (ii != NULL) {
+ ipfw_objhash_foreach(ii, destroy_iface, ch);
+ ipfw_objhash_destroy(ii);
+ iface_khandler_deregister();
+ }
+}
+
+/*
+ * Notify the subsystem that we are interested in tracking
+ * interface @name. This function has to be called without
+ * holding any locks to permit allocating the necessary states
+ * for proper interface tracking.
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_iface_ref(struct ip_fw_chain *ch, char *name,
+ struct ipfw_ifc *ic)
+{
+ struct namedobj_instance *ii;
+ struct ipfw_iface *iif, *tmp;
+
+ if (strlen(name) >= sizeof(iif->ifname))
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+
+ ii = CHAIN_TO_II(ch);
+ if (ii == NULL) {
+
+ /*
+ * First request to subsystem.
+ * Let's perform init.
+ */
+ IPFW_UH_WUNLOCK(ch);
+ vnet_ipfw_iface_init(ch);
+ IPFW_UH_WLOCK(ch);
+ ii = CHAIN_TO_II(ch);
+ }
+
+ iif = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name);
+
+ if (iif != NULL) {
+ iif->no.refcnt++;
+ ic->iface = iif;
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+ }
+
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Not found. Let's create one */
+ iif = malloc(sizeof(struct ipfw_iface), M_IPFW, M_WAITOK | M_ZERO);
+ TAILQ_INIT(&iif->consumers);
+ iif->no.name = iif->ifname;
+ strlcpy(iif->ifname, name, sizeof(iif->ifname));
+
+ /*
+ * Ref & link to the list.
+ *
+ * We assume ifnet_arrival_event / ifnet_departure_event
+ * are not holding any locks.
+ */
+ iif->no.refcnt = 1;
+ IPFW_UH_WLOCK(ch);
+
+ tmp = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name);
+ if (tmp != NULL) {
+ /* Interface has been created since unlock. Ref and return */
+ tmp->no.refcnt++;
+ ic->iface = tmp;
+ IPFW_UH_WUNLOCK(ch);
+ free(iif, M_IPFW);
+ return (0);
+ }
+
+ iif->ifindex = ipfw_kiflookup(name);
+ if (iif->ifindex != 0)
+ iif->resolved = 1;
+
+ ipfw_objhash_add(ii, &iif->no);
+ ic->iface = iif;
+
+ IPFW_UH_WUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Adds @ic to the list of iif interface consumers.
+ * Must be called with holding both UH+WLOCK.
+ * Callback may be immediately called (if interface exists).
+ */
+void
+ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
+{
+ struct ipfw_iface *iif;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ IPFW_WLOCK_ASSERT(ch);
+
+ iif = ic->iface;
+
+ TAILQ_INSERT_TAIL(&iif->consumers, ic, next);
+ if (iif->resolved != 0)
+ ic->cb(ch, ic->cbdata, iif->ifindex);
+}
+
+/*
+ * Unlinks interface tracker object @ic from interface.
+ * Must be called while holding UH lock.
+ */
+void
+ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
+{
+ struct ipfw_iface *iif;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ iif = ic->iface;
+ TAILQ_REMOVE(&iif->consumers, ic, next);
+}
+
+/*
+ * Unreference interface specified by @ic.
+ * Must be called while holding UH lock.
+ */
+void
+ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic)
+{
+ struct ipfw_iface *iif;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ iif = ic->iface;
+ ic->iface = NULL;
+
+ iif->no.refcnt--;
+ /* TODO: check for references & delete */
+}
+
+/*
+ * Interface arrival handler.
+ */
+static void
+handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+ uint16_t ifindex)
+{
+ struct ipfw_ifc *ic;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ iif->gencnt++;
+ iif->resolved = 1;
+ iif->ifindex = ifindex;
+
+ IPFW_WLOCK(ch);
+ TAILQ_FOREACH(ic, &iif->consumers, next)
+ ic->cb(ch, ic->cbdata, iif->ifindex);
+ IPFW_WUNLOCK(ch);
+}
+
+/*
+ * Interface departure handler.
+ */
+static void
+handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif,
+ uint16_t ifindex)
+{
+ struct ipfw_ifc *ic;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ IPFW_WLOCK(ch);
+ TAILQ_FOREACH(ic, &iif->consumers, next)
+ ic->cb(ch, ic->cbdata, 0);
+ IPFW_WUNLOCK(ch);
+
+ iif->gencnt++;
+ iif->resolved = 0;
+ iif->ifindex = 0;
+}
+
+struct dump_iface_args {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_iface_internal(struct namedobj_instance *ii, struct named_object *no,
+ void *arg)
+{
+ ipfw_iface_info *i;
+ struct dump_iface_args *da;
+ struct ipfw_iface *iif;
+
+ da = (struct dump_iface_args *)arg;
+
+ i = (ipfw_iface_info *)ipfw_get_sopt_space(da->sd, sizeof(*i));
+ KASSERT(i != NULL, ("previously checked buffer is not enough"));
+
+ iif = (struct ipfw_iface *)no;
+
+ strlcpy(i->ifname, iif->ifname, sizeof(i->ifname));
+ if (iif->resolved)
+ i->flags |= IPFW_IFFLAG_RESOLVED;
+ i->ifindex = iif->ifindex;
+ i->refcnt = iif->no.refcnt;
+ i->gencnt = iif->gencnt;
+ return (0);
+}
+
+/*
+ * Lists all interface currently tracked by ipfw.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_iface_info x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct namedobj_instance *ii;
+ struct _ipfw_obj_lheader *olh;
+ struct dump_iface_args da;
+ uint32_t count, size;
+
+ olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+ if (olh == NULL)
+ return (EINVAL);
+ if (sd->valsize < olh->size)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ ii = CHAIN_TO_II(ch);
+ if (ii != NULL)
+ count = ipfw_objhash_count(ii);
+ else
+ count = 0;
+ size = count * sizeof(ipfw_iface_info) + sizeof(ipfw_obj_lheader);
+
+ /* Fill in header regadless of buffer size */
+ olh->count = count;
+ olh->objsize = sizeof(ipfw_iface_info);
+
+ if (size > olh->size) {
+ olh->size = size;
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ olh->size = size;
+
+ da.ch = ch;
+ da.sd = sd;
+
+ if (ii != NULL)
+ ipfw_objhash_foreach(ii, export_iface_internal, &da);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_log.c b/freebsd/sys/netpfil/ipfw/ip_fw_log.c
index 60b0df7d..658e1256 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_log.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_log.c
@@ -41,16 +41,15 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
-#include <sys/mbuf.h>
#include <sys/kernel.h>
+#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
+#include <net/if_var.h>
#include <net/vnet.h>
-#include <net/if_types.h> /* for IFT_ETHER */
-#include <net/bpf.h> /* for BPF */
#include <netinet/in.h>
#include <netinet/ip.h>
@@ -83,111 +82,48 @@ __FBSDID("$FreeBSD$");
#define ICMP(p) ((struct icmphdr *)(p))
#define ICMP6(p) ((struct icmp6_hdr *)(p))
+#ifdef __APPLE__
+#undef snprintf
+#define snprintf sprintf
+#define SNPARGS(buf, len) buf + len
+#define SNP(buf) buf
+#else /* !__APPLE__ */
#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
#define SNP(buf) buf, sizeof(buf)
+#endif /* !__APPLE__ */
-#ifdef WITHOUT_BPF
-void
-ipfw_log_bpf(int onoff)
-{
-}
-#else /* !WITHOUT_BPF */
-static struct ifnet *log_if; /* hook to attach to bpf */
-
-/* we use this dummy function for all ifnet callbacks */
-static int
-log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
-{
- return EINVAL;
-}
-
-static int
-ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
-{
- if (m != NULL)
- m_freem(m);
- return EINVAL;
-}
-
-static void
-ipfw_log_start(struct ifnet* ifp)
-{
- panic("ipfw_log_start() must not be called");
-}
-
-static const u_char ipfwbroadcastaddr[6] =
- { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-
-void
-ipfw_log_bpf(int onoff)
-{
- struct ifnet *ifp;
-
- if (onoff) {
- if (log_if)
- return;
- ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL)
- return;
- if_initname(ifp, "ipfw", 0);
- ifp->if_mtu = 65536;
- ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_init = (void *)log_dummy;
- ifp->if_ioctl = log_dummy;
- ifp->if_start = ipfw_log_start;
- ifp->if_output = ipfw_log_output;
- ifp->if_addrlen = 6;
- ifp->if_hdrlen = 14;
- if_attach(ifp);
- ifp->if_broadcastaddr = ipfwbroadcastaddr;
- ifp->if_baudrate = IF_Mbps(10);
- bpfattach(ifp, DLT_EN10MB, 14);
- log_if = ifp;
- } else {
- if (log_if) {
- ether_ifdetach(log_if);
- if_free(log_if);
- }
- log_if = NULL;
- }
-}
-#endif /* !WITHOUT_BPF */
-
+#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f)
/*
* We enter here when we have a rule with O_LOG.
* XXX this function alone takes about 2Kbytes of code!
*/
void
-ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
- struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
- struct ip *ip)
+ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
+ struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
+ u_short offset, uint32_t tablearg, struct ip *ip)
{
char *action;
int limit_reached = 0;
char action2[92], proto[128], fragment[32];
if (V_fw_verbose == 0) {
-#ifndef WITHOUT_BPF
-
- if (log_if == NULL || log_if->if_bpf == NULL)
- return;
-
if (args->eh) /* layer2, use orig hdr */
- BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m);
else {
/* Add fake header. Later we will store
* more info in the header.
*/
if (ip->ip_v == 4)
- BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
- else if (ip->ip_v == 6)
- BPF_MTAP2(log_if, "DDDDDDSSSSSS\x86\xdd", ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
+ ETHER_HDR_LEN, m);
+ else if (ip->ip_v == 6)
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
+ ETHER_HDR_LEN, m);
else
/* Obviously bogus EtherType. */
- BPF_MTAP2(log_if, "DDDDDDSSSSSS\xff\xff", ETHER_HDR_LEN, m);
+ ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
+ ETHER_HDR_LEN, m);
}
-#endif /* !WITHOUT_BPF */
return;
}
/* the old 'log' function */
@@ -254,27 +190,27 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
break;
case O_DIVERT:
snprintf(SNPARGS(action2, 0), "Divert %d",
- cmd->arg1);
+ TARG(cmd->arg1, divert));
break;
case O_TEE:
snprintf(SNPARGS(action2, 0), "Tee %d",
- cmd->arg1);
+ TARG(cmd->arg1, divert));
break;
case O_SETFIB:
snprintf(SNPARGS(action2, 0), "SetFib %d",
- IP_FW_ARG_TABLEARG(cmd->arg1));
+ TARG(cmd->arg1, fib) & 0x7FFF);
break;
case O_SKIPTO:
snprintf(SNPARGS(action2, 0), "SkipTo %d",
- IP_FW_ARG_TABLEARG(cmd->arg1));
+ TARG(cmd->arg1, skipto));
break;
case O_PIPE:
snprintf(SNPARGS(action2, 0), "Pipe %d",
- IP_FW_ARG_TABLEARG(cmd->arg1));
+ TARG(cmd->arg1, pipe));
break;
case O_QUEUE:
snprintf(SNPARGS(action2, 0), "Queue %d",
- IP_FW_ARG_TABLEARG(cmd->arg1));
+ TARG(cmd->arg1, pipe));
break;
case O_FORWARD_IP: {
ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
@@ -435,7 +371,7 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
#ifdef INET6
if (IS_IP6_FLOW_ID(&(args->f_id))) {
- if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
+ if (offset || ip6f_mf)
snprintf(SNPARGS(fragment, 0),
" (frag %08x:%d@%d%s)",
args->f_id.extra,
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
index 5d4dcc9f..58bc1f3c 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c
@@ -33,17 +33,18 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/eventhandler.h>
#include <sys/malloc.h>
+#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/module.h>
#include <sys/rwlock.h>
-
-#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */
+#include <sys/rmlock.h>
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
@@ -55,6 +56,45 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h> /* XXX for in_cksum */
+struct cfg_spool {
+ LIST_ENTRY(cfg_spool) _next; /* chain of spool instances */
+ struct in_addr addr;
+ uint16_t port;
+};
+
+/* Nat redirect configuration. */
+struct cfg_redir {
+ LIST_ENTRY(cfg_redir) _next; /* chain of redir instances */
+ uint16_t mode; /* type of redirect mode */
+ uint16_t proto; /* protocol: tcp/udp */
+ struct in_addr laddr; /* local ip address */
+ struct in_addr paddr; /* public ip address */
+ struct in_addr raddr; /* remote ip address */
+ uint16_t lport; /* local port */
+ uint16_t pport; /* public port */
+ uint16_t rport; /* remote port */
+ uint16_t pport_cnt; /* number of public ports */
+ uint16_t rport_cnt; /* number of remote ports */
+ struct alias_link **alink;
+ u_int16_t spool_cnt; /* num of entry in spool chain */
+ /* chain of spool instances */
+ LIST_HEAD(spool_chain, cfg_spool) spool_chain;
+};
+
+/* Nat configuration data struct. */
+struct cfg_nat {
+ /* chain of nat instances */
+ LIST_ENTRY(cfg_nat) _next;
+ int id; /* nat id */
+ struct in_addr ip; /* nat ip address */
+ struct libalias *lib; /* libalias instance */
+ int mode; /* aliasing mode */
+ int redir_cnt; /* number of entry in spool chain */
+ /* chain of redir instances */
+ LIST_HEAD(redir_chain, cfg_redir) redir_chain;
+ char if_name[IF_NAMESIZE]; /* interface name */
+};
+
static eventhandler_tag ifaddr_event_tag;
static void
@@ -66,8 +106,12 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp)
KASSERT(curvnet == ifp->if_vnet,
("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
+
+ if (V_ipfw_vnet_ready == 0 || V_ipfw_nat_ready == 0)
+ return;
+
chain = &V_layer3_chain;
- IPFW_WLOCK(chain);
+ IPFW_UH_WLOCK(chain);
/* Check every nat entry... */
LIST_FOREACH(ptr, &chain->nat, _next) {
/* ...using nic 'ifp->if_xname' as dynamic alias address. */
@@ -79,13 +123,15 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp)
continue;
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
+ IPFW_WLOCK(chain);
ptr->ip = ((struct sockaddr_in *)
(ifa->ifa_addr))->sin_addr;
LibAliasSetAddress(ptr->lib, ptr->ip);
+ IPFW_WUNLOCK(chain);
}
if_addr_runlock(ifp);
}
- IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
}
/*
@@ -117,11 +163,11 @@ del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
num = 1; /* Number of alias_link to delete. */
switch (r->mode) {
- case REDIR_PORT:
+ case NAT44_REDIR_PORT:
num = r->pport_cnt;
/* FALLTHROUGH */
- case REDIR_ADDR:
- case REDIR_PROTO:
+ case NAT44_REDIR_ADDR:
+ case NAT44_REDIR_PROTO:
/* Delete all libalias redirect entry. */
for (i = 0; i < num; i++)
LibAliasRedirectDelete(n->lib, r->alink[i]);
@@ -142,27 +188,41 @@ del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
}
}
-static void
+static int
add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
{
- struct cfg_redir *r, *ser_r;
- struct cfg_spool *s, *ser_s;
+ struct cfg_redir *r;
+ struct cfg_spool *s;
+ struct nat44_cfg_redir *ser_r;
+ struct nat44_cfg_spool *ser_s;
+
int cnt, off, i;
for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
- ser_r = (struct cfg_redir *)&buf[off];
- r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
- memcpy(r, ser_r, SOF_REDIR);
+ ser_r = (struct nat44_cfg_redir *)&buf[off];
+ r = malloc(sizeof(*r), M_IPFW, M_WAITOK | M_ZERO);
+ r->mode = ser_r->mode;
+ r->laddr = ser_r->laddr;
+ r->paddr = ser_r->paddr;
+ r->raddr = ser_r->raddr;
+ r->lport = ser_r->lport;
+ r->pport = ser_r->pport;
+ r->rport = ser_r->rport;
+ r->pport_cnt = ser_r->pport_cnt;
+ r->rport_cnt = ser_r->rport_cnt;
+ r->proto = ser_r->proto;
+ r->spool_cnt = ser_r->spool_cnt;
+ //memcpy(r, ser_r, SOF_REDIR);
LIST_INIT(&r->spool_chain);
- off += SOF_REDIR;
+ off += sizeof(struct nat44_cfg_redir);
r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
M_IPFW, M_WAITOK | M_ZERO);
switch (r->mode) {
- case REDIR_ADDR:
+ case NAT44_REDIR_ADDR:
r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
r->paddr);
break;
- case REDIR_PORT:
+ case NAT44_REDIR_PORT:
for (i = 0 ; i < r->pport_cnt; i++) {
/* If remotePort is all ports, set it to 0. */
u_short remotePortCopy = r->rport + i;
@@ -178,7 +238,7 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
}
}
break;
- case REDIR_PROTO:
+ case NAT44_REDIR_PROTO:
r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
r->raddr, r->paddr, r->proto);
break;
@@ -186,25 +246,41 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
printf("unknown redirect mode: %u\n", r->mode);
break;
}
- /* XXX perhaps return an error instead of panic ? */
- if (r->alink[0] == NULL)
- panic("LibAliasRedirect* returned NULL");
+ if (r->alink[0] == NULL) {
+ printf("LibAliasRedirect* returned NULL\n");
+ free(r->alink, M_IPFW);
+ free(r, M_IPFW);
+ return (EINVAL);
+ }
/* LSNAT handling. */
for (i = 0; i < r->spool_cnt; i++) {
- ser_s = (struct cfg_spool *)&buf[off];
- s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
- memcpy(s, ser_s, SOF_SPOOL);
+ ser_s = (struct nat44_cfg_spool *)&buf[off];
+ s = malloc(sizeof(*s), M_IPFW, M_WAITOK | M_ZERO);
+ s->addr = ser_s->addr;
+ s->port = ser_s->port;
LibAliasAddServer(ptr->lib, r->alink[0],
s->addr, htons(s->port));
- off += SOF_SPOOL;
+ off += sizeof(struct nat44_cfg_spool);
/* Hook spool entry. */
LIST_INSERT_HEAD(&r->spool_chain, s, _next);
}
/* And finally hook this redir entry. */
LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
}
+
+ return (0);
+}
+
+static void
+free_nat_instance(struct cfg_nat *ptr)
+{
+
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
}
+
/*
* ipfw_nat - perform mbuf header translation.
*
@@ -345,11 +421,11 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
if (ldt) {
struct tcphdr *th;
struct udphdr *uh;
- u_short cksum;
+ uint16_t ip_len, cksum;
- ip->ip_len = ntohs(ip->ip_len);
+ ip_len = ntohs(ip->ip_len);
cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
+ htons(ip->ip_p + ip_len - (ip->ip_hl << 2)));
switch (ip->ip_p) {
case IPPROTO_TCP:
@@ -375,7 +451,6 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
in_delayed_cksum(mcl);
mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
- ip->ip_len = htons(ip->ip_len);
}
args->m = mcl;
return (IP_FW_NAT);
@@ -393,60 +468,68 @@ lookup_nat(struct nat_list *l, int nat_id)
return res;
}
-static int
-ipfw_nat_cfg(struct sockopt *sopt)
+static struct cfg_nat *
+lookup_nat_name(struct nat_list *l, char *name)
{
- struct cfg_nat *cfg, *ptr;
- char *buf;
- struct ip_fw_chain *chain = &V_layer3_chain;
- size_t len;
- int gencnt, error = 0;
+ struct cfg_nat *res;
+ int id;
+ char *errptr;
- len = sopt->sopt_valsize;
- buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
- if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0)
- goto out;
+ id = strtol(name, &errptr, 10);
+ if (id == 0 || *errptr != '\0')
+ return (NULL);
- cfg = (struct cfg_nat *)buf;
- if (cfg->id < 0) {
- error = EINVAL;
- goto out;
+ LIST_FOREACH(res, l, _next) {
+ if (res->id == id)
+ break;
}
+ return (res);
+}
+
+/* IP_FW3 configuration routines */
+
+static void
+nat44_config(struct ip_fw_chain *chain, struct nat44_cfg_nat *ucfg)
+{
+ struct cfg_nat *ptr, *tcfg;
+ int gencnt;
/*
* Find/create nat rule.
*/
- IPFW_WLOCK(chain);
+ IPFW_UH_WLOCK(chain);
gencnt = chain->gencnt;
- ptr = lookup_nat(&chain->nat, cfg->id);
+ ptr = lookup_nat_name(&chain->nat, ucfg->name);
if (ptr == NULL) {
- IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
/* New rule: allocate and init new instance. */
ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO);
ptr->lib = LibAliasInit(NULL);
LIST_INIT(&ptr->redir_chain);
} else {
/* Entry already present: temporarily unhook it. */
+ IPFW_WLOCK(chain);
LIST_REMOVE(ptr, _next);
- flush_nat_ptrs(chain, cfg->id);
+ flush_nat_ptrs(chain, ptr->id);
IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
}
/*
- * Basic nat configuration.
+ * Basic nat (re)configuration.
*/
- ptr->id = cfg->id;
+ ptr->id = strtol(ucfg->name, NULL, 10);
/*
* XXX - what if this rule doesn't nat any ip and just
* redirect?
* do we set aliasaddress to 0.0.0.0?
*/
- ptr->ip = cfg->ip;
- ptr->redir_cnt = cfg->redir_cnt;
- ptr->mode = cfg->mode;
- LibAliasSetMode(ptr->lib, cfg->mode, ~0);
+ ptr->ip = ucfg->ip;
+ ptr->redir_cnt = ucfg->redir_cnt;
+ ptr->mode = ucfg->mode;
+ strlcpy(ptr->if_name, ucfg->if_name, sizeof(ptr->if_name));
+ LibAliasSetMode(ptr->lib, ptr->mode, ~0);
LibAliasSetAddress(ptr->lib, ptr->ip);
- memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
/*
* Redir and LSNAT configuration.
@@ -454,16 +537,453 @@ ipfw_nat_cfg(struct sockopt *sopt)
/* Delete old cfgs. */
del_redir_spool_cfg(ptr, &ptr->redir_chain);
/* Add new entries. */
- add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
+ add_redir_spool_cfg((char *)(ucfg + 1), ptr);
+ IPFW_UH_WLOCK(chain);
- IPFW_WLOCK(chain);
/* Extra check to avoid race with another ipfw_nat_cfg() */
- if (gencnt != chain->gencnt &&
- ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL))
- LIST_REMOVE(cfg, _next);
+ tcfg = NULL;
+ if (gencnt != chain->gencnt)
+ tcfg = lookup_nat_name(&chain->nat, ucfg->name);
+ IPFW_WLOCK(chain);
+ if (tcfg != NULL)
+ LIST_REMOVE(tcfg, _next);
LIST_INSERT_HEAD(&chain->nat, ptr, _next);
+ IPFW_WUNLOCK(chain);
chain->gencnt++;
+
+ IPFW_UH_WUNLOCK(chain);
+
+ if (tcfg != NULL)
+ free_nat_instance(ptr);
+}
+
+/*
+ * Creates/configure nat44 instance
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header nat44_cfg_nat .. ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nat44_cfg_nat *ucfg;
+ int id;
+ size_t read;
+ char *errptr;
+
+ /* Check minimum header size */
+ if (sd->valsize < (sizeof(*oh) + sizeof(*ucfg)))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+
+ /* Basic length checks for TLVs */
+ if (oh->ntlv.head.length != sizeof(oh->ntlv))
+ return (EINVAL);
+
+ ucfg = (struct nat44_cfg_nat *)(oh + 1);
+
+ /* Check if name is properly terminated and looks like number */
+ if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
+ return (EINVAL);
+ id = strtol(ucfg->name, &errptr, 10);
+ if (id == 0 || *errptr != '\0')
+ return (EINVAL);
+
+ read = sizeof(*oh) + sizeof(*ucfg);
+ /* Check number of redirs */
+ if (sd->valsize < read + ucfg->redir_cnt*sizeof(struct nat44_cfg_redir))
+ return (EINVAL);
+
+ nat44_config(chain, ucfg);
+ return (0);
+}
+
+/*
+ * Destroys given nat instances.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_destroy(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct cfg_nat *ptr;
+ ipfw_obj_ntlv *ntlv;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+
+ /* Basic length checks for TLVs */
+ if (oh->ntlv.head.length != sizeof(oh->ntlv))
+ return (EINVAL);
+
+ ntlv = &oh->ntlv;
+ /* Check if name is properly terminated */
+ if (strnlen(ntlv->name, sizeof(ntlv->name)) == sizeof(ntlv->name))
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(chain);
+ ptr = lookup_nat_name(&chain->nat, ntlv->name);
+ if (ptr == NULL) {
+ IPFW_UH_WUNLOCK(chain);
+ return (ESRCH);
+ }
+ IPFW_WLOCK(chain);
+ LIST_REMOVE(ptr, _next);
+ flush_nat_ptrs(chain, ptr->id);
IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+
+ free_nat_instance(ptr);
+
+ return (0);
+}
+
+static void
+export_nat_cfg(struct cfg_nat *ptr, struct nat44_cfg_nat *ucfg)
+{
+
+ snprintf(ucfg->name, sizeof(ucfg->name), "%d", ptr->id);
+ ucfg->ip = ptr->ip;
+ ucfg->redir_cnt = ptr->redir_cnt;
+ ucfg->mode = ptr->mode;
+ strlcpy(ucfg->if_name, ptr->if_name, sizeof(ucfg->if_name));
+}
+
+/*
+ * Gets config for given nat instance
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header nat44_cfg_nat .. ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_get_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nat44_cfg_nat *ucfg;
+ struct cfg_nat *ptr;
+ struct cfg_redir *r;
+ struct cfg_spool *s;
+ struct nat44_cfg_redir *ser_r;
+ struct nat44_cfg_spool *ser_s;
+ size_t sz;
+
+ sz = sizeof(*oh) + sizeof(*ucfg);
+ /* Check minimum header size */
+ if (sd->valsize < sz)
+ return (EINVAL);
+
+ oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+
+ /* Basic length checks for TLVs */
+ if (oh->ntlv.head.length != sizeof(oh->ntlv))
+ return (EINVAL);
+
+ ucfg = (struct nat44_cfg_nat *)(oh + 1);
+
+ /* Check if name is properly terminated */
+ if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(chain);
+ ptr = lookup_nat_name(&chain->nat, ucfg->name);
+ if (ptr == NULL) {
+ IPFW_UH_RUNLOCK(chain);
+ return (ESRCH);
+ }
+
+ export_nat_cfg(ptr, ucfg);
+
+ /* Estimate memory amount */
+ sz = sizeof(ipfw_obj_header) + sizeof(struct nat44_cfg_nat);
+ LIST_FOREACH(r, &ptr->redir_chain, _next) {
+ sz += sizeof(struct nat44_cfg_redir);
+ LIST_FOREACH(s, &r->spool_chain, _next)
+ sz += sizeof(struct nat44_cfg_spool);
+ }
+
+ ucfg->size = sz;
+ if (sd->valsize < sz) {
+
+ /*
+ * Submitted buffer size is not enough.
+ * WE've already filled in @ucfg structure with
+ * relevant info including size, so we
+ * can return. Buffer will be flushed automatically.
+ */
+ IPFW_UH_RUNLOCK(chain);
+ return (ENOMEM);
+ }
+
+ /* Size OK, let's copy data */
+ LIST_FOREACH(r, &ptr->redir_chain, _next) {
+ ser_r = (struct nat44_cfg_redir *)ipfw_get_sopt_space(sd,
+ sizeof(*ser_r));
+ ser_r->mode = r->mode;
+ ser_r->laddr = r->laddr;
+ ser_r->paddr = r->paddr;
+ ser_r->raddr = r->raddr;
+ ser_r->lport = r->lport;
+ ser_r->pport = r->pport;
+ ser_r->rport = r->rport;
+ ser_r->pport_cnt = r->pport_cnt;
+ ser_r->rport_cnt = r->rport_cnt;
+ ser_r->proto = r->proto;
+ ser_r->spool_cnt = r->spool_cnt;
+
+ LIST_FOREACH(s, &r->spool_chain, _next) {
+ ser_s = (struct nat44_cfg_spool *)ipfw_get_sopt_space(
+ sd, sizeof(*ser_s));
+
+ ser_s->addr = s->addr;
+ ser_s->port = s->port;
+ }
+ }
+
+ IPFW_UH_RUNLOCK(chain);
+
+ return (0);
+}
+
+/*
+ * Lists all nat44 instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader nat44_cfg_nat x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_list_nat(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat44_cfg_nat *ucfg;
+ struct cfg_nat *ptr;
+ int nat_count;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+ IPFW_UH_RLOCK(chain);
+ nat_count = 0;
+ LIST_FOREACH(ptr, &chain->nat, _next)
+ nat_count++;
+
+ olh->count = nat_count;
+ olh->objsize = sizeof(struct nat44_cfg_nat);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(chain);
+ return (ENOMEM);
+ }
+
+ LIST_FOREACH(ptr, &chain->nat, _next) {
+ ucfg = (struct nat44_cfg_nat *)ipfw_get_sopt_space(sd,
+ sizeof(*ucfg));
+ export_nat_cfg(ptr, ucfg);
+ }
+
+ IPFW_UH_RUNLOCK(chain);
+
+ return (0);
+}
+
+/*
+ * Gets log for given nat instance
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header nat44_cfg_nat ]
+ * Reply: [ ipfw_obj_header nat44_cfg_nat LOGBUFFER ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat44_get_log(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nat44_cfg_nat *ucfg;
+ struct cfg_nat *ptr;
+ void *pbuf;
+ size_t sz;
+
+ sz = sizeof(*oh) + sizeof(*ucfg);
+ /* Check minimum header size */
+ if (sd->valsize < sz)
+ return (EINVAL);
+
+ oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+
+ /* Basic length checks for TLVs */
+ if (oh->ntlv.head.length != sizeof(oh->ntlv))
+ return (EINVAL);
+
+ ucfg = (struct nat44_cfg_nat *)(oh + 1);
+
+ /* Check if name is properly terminated */
+ if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name))
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(chain);
+ ptr = lookup_nat_name(&chain->nat, ucfg->name);
+ if (ptr == NULL) {
+ IPFW_UH_RUNLOCK(chain);
+ return (ESRCH);
+ }
+
+ if (ptr->lib->logDesc == NULL) {
+ IPFW_UH_RUNLOCK(chain);
+ return (ENOENT);
+ }
+
+ export_nat_cfg(ptr, ucfg);
+
+ /* Estimate memory amount */
+ ucfg->size = sizeof(struct nat44_cfg_nat) + LIBALIAS_BUF_SIZE;
+ if (sd->valsize < sz + sizeof(*oh)) {
+
+ /*
+ * Submitted buffer size is not enough.
+ * WE've already filled in @ucfg structure with
+ * relevant info including size, so we
+ * can return. Buffer will be flushed automatically.
+ */
+ IPFW_UH_RUNLOCK(chain);
+ return (ENOMEM);
+ }
+
+ pbuf = (void *)ipfw_get_sopt_space(sd, LIBALIAS_BUF_SIZE);
+ memcpy(pbuf, ptr->lib->logDesc, LIBALIAS_BUF_SIZE);
+
+ IPFW_UH_RUNLOCK(chain);
+
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_NAT44_XCONFIG, 0, HDIR_SET, nat44_cfg },
+ { IP_FW_NAT44_DESTROY, 0, HDIR_SET, nat44_destroy },
+ { IP_FW_NAT44_XGETCONFIG, 0, HDIR_GET, nat44_get_cfg },
+ { IP_FW_NAT44_LIST_NAT, 0, HDIR_GET, nat44_list_nat },
+ { IP_FW_NAT44_XGETLOG, 0, HDIR_GET, nat44_get_log },
+};
+
+
+/*
+ * Legacy configuration routines
+ */
+
+struct cfg_spool_legacy {
+ LIST_ENTRY(cfg_spool_legacy) _next;
+ struct in_addr addr;
+ u_short port;
+};
+
+struct cfg_redir_legacy {
+ LIST_ENTRY(cfg_redir) _next;
+ u_int16_t mode;
+ struct in_addr laddr;
+ struct in_addr paddr;
+ struct in_addr raddr;
+ u_short lport;
+ u_short pport;
+ u_short rport;
+ u_short pport_cnt;
+ u_short rport_cnt;
+ int proto;
+ struct alias_link **alink;
+ u_int16_t spool_cnt;
+ LIST_HEAD(, cfg_spool_legacy) spool_chain;
+};
+
+struct cfg_nat_legacy {
+ LIST_ENTRY(cfg_nat_legacy) _next;
+ int id;
+ struct in_addr ip;
+ char if_name[IF_NAMESIZE];
+ int mode;
+ struct libalias *lib;
+ int redir_cnt;
+ LIST_HEAD(, cfg_redir_legacy) redir_chain;
+};
+
+static int
+ipfw_nat_cfg(struct sockopt *sopt)
+{
+ struct cfg_nat_legacy *cfg;
+ struct nat44_cfg_nat *ucfg;
+ struct cfg_redir_legacy *rdir;
+ struct nat44_cfg_redir *urdir;
+ char *buf;
+ size_t len, len2;
+ int error, i;
+
+ len = sopt->sopt_valsize;
+ len2 = len + 128;
+
+ /*
+ * Allocate 2x buffer to store converted structures.
+ * new redir_cfg has shrunk, so we're sure that
+ * new buffer size is enough.
+ */
+ buf = malloc(roundup2(len, 8) + len2, M_TEMP, M_WAITOK | M_ZERO);
+ error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat_legacy));
+ if (error != 0)
+ goto out;
+
+ cfg = (struct cfg_nat_legacy *)buf;
+ if (cfg->id < 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ ucfg = (struct nat44_cfg_nat *)&buf[roundup2(len, 8)];
+ snprintf(ucfg->name, sizeof(ucfg->name), "%d", cfg->id);
+ strlcpy(ucfg->if_name, cfg->if_name, sizeof(ucfg->if_name));
+ ucfg->ip = cfg->ip;
+ ucfg->mode = cfg->mode;
+ ucfg->redir_cnt = cfg->redir_cnt;
+
+ if (len < sizeof(*cfg) + cfg->redir_cnt * sizeof(*rdir)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ urdir = (struct nat44_cfg_redir *)(ucfg + 1);
+ rdir = (struct cfg_redir_legacy *)(cfg + 1);
+ for (i = 0; i < cfg->redir_cnt; i++) {
+ urdir->mode = rdir->mode;
+ urdir->laddr = rdir->laddr;
+ urdir->paddr = rdir->paddr;
+ urdir->raddr = rdir->raddr;
+ urdir->lport = rdir->lport;
+ urdir->pport = rdir->pport;
+ urdir->rport = rdir->rport;
+ urdir->pport_cnt = rdir->pport_cnt;
+ urdir->rport_cnt = rdir->rport_cnt;
+ urdir->proto = rdir->proto;
+ urdir->spool_cnt = rdir->spool_cnt;
+
+ urdir++;
+ rdir++;
+ }
+
+ nat44_config(&V_layer3_chain, ucfg);
out:
free(buf, M_TEMP);
@@ -479,18 +999,18 @@ ipfw_nat_del(struct sockopt *sopt)
sooptcopyin(sopt, &i, sizeof i, sizeof i);
/* XXX validate i */
- IPFW_WLOCK(chain);
+ IPFW_UH_WLOCK(chain);
ptr = lookup_nat(&chain->nat, i);
if (ptr == NULL) {
- IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
return (EINVAL);
}
+ IPFW_WLOCK(chain);
LIST_REMOVE(ptr, _next);
flush_nat_ptrs(chain, i);
IPFW_WUNLOCK(chain);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
+ IPFW_UH_WUNLOCK(chain);
+ free_nat_instance(ptr);
return (0);
}
@@ -499,28 +1019,31 @@ ipfw_nat_get_cfg(struct sockopt *sopt)
{
struct ip_fw_chain *chain = &V_layer3_chain;
struct cfg_nat *n;
+ struct cfg_nat_legacy *ucfg;
struct cfg_redir *r;
struct cfg_spool *s;
+ struct cfg_redir_legacy *ser_r;
+ struct cfg_spool_legacy *ser_s;
char *data;
int gencnt, nat_cnt, len, error;
nat_cnt = 0;
len = sizeof(nat_cnt);
- IPFW_RLOCK(chain);
+ IPFW_UH_RLOCK(chain);
retry:
gencnt = chain->gencnt;
/* Estimate memory amount */
LIST_FOREACH(n, &chain->nat, _next) {
nat_cnt++;
- len += sizeof(struct cfg_nat);
+ len += sizeof(struct cfg_nat_legacy);
LIST_FOREACH(r, &n->redir_chain, _next) {
- len += sizeof(struct cfg_redir);
+ len += sizeof(struct cfg_redir_legacy);
LIST_FOREACH(s, &r->spool_chain, _next)
- len += sizeof(struct cfg_spool);
+ len += sizeof(struct cfg_spool_legacy);
}
}
- IPFW_RUNLOCK(chain);
+ IPFW_UH_RUNLOCK(chain);
data = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
bcopy(&nat_cnt, data, sizeof(nat_cnt));
@@ -528,25 +1051,43 @@ retry:
nat_cnt = 0;
len = sizeof(nat_cnt);
- IPFW_RLOCK(chain);
+ IPFW_UH_RLOCK(chain);
if (gencnt != chain->gencnt) {
free(data, M_TEMP);
goto retry;
}
/* Serialize all the data. */
LIST_FOREACH(n, &chain->nat, _next) {
- bcopy(n, &data[len], sizeof(struct cfg_nat));
- len += sizeof(struct cfg_nat);
+ ucfg = (struct cfg_nat_legacy *)&data[len];
+ ucfg->id = n->id;
+ ucfg->ip = n->ip;
+ ucfg->redir_cnt = n->redir_cnt;
+ ucfg->mode = n->mode;
+ strlcpy(ucfg->if_name, n->if_name, sizeof(ucfg->if_name));
+ len += sizeof(struct cfg_nat_legacy);
LIST_FOREACH(r, &n->redir_chain, _next) {
- bcopy(r, &data[len], sizeof(struct cfg_redir));
- len += sizeof(struct cfg_redir);
+ ser_r = (struct cfg_redir_legacy *)&data[len];
+ ser_r->mode = r->mode;
+ ser_r->laddr = r->laddr;
+ ser_r->paddr = r->paddr;
+ ser_r->raddr = r->raddr;
+ ser_r->lport = r->lport;
+ ser_r->pport = r->pport;
+ ser_r->rport = r->rport;
+ ser_r->pport_cnt = r->pport_cnt;
+ ser_r->rport_cnt = r->rport_cnt;
+ ser_r->proto = r->proto;
+ ser_r->spool_cnt = r->spool_cnt;
+ len += sizeof(struct cfg_redir_legacy);
LIST_FOREACH(s, &r->spool_chain, _next) {
- bcopy(s, &data[len], sizeof(struct cfg_spool));
- len += sizeof(struct cfg_spool);
+ ser_s = (struct cfg_spool_legacy *)&data[len];
+ ser_s->addr = s->addr;
+ ser_s->port = s->port;
+ len += sizeof(struct cfg_spool_legacy);
}
}
}
- IPFW_RUNLOCK(chain);
+ IPFW_UH_RUNLOCK(chain);
error = sooptcopyout(sopt, data, len);
free(data, M_TEMP);
@@ -561,6 +1102,7 @@ ipfw_nat_get_log(struct sockopt *sopt)
struct cfg_nat *ptr;
int i, size;
struct ip_fw_chain *chain;
+ IPFW_RLOCK_TRACKER;
chain = &V_layer3_chain;
@@ -609,14 +1151,12 @@ vnet_ipfw_nat_uninit(const void *arg __unused)
chain = &V_layer3_chain;
IPFW_WLOCK(chain);
+ V_ipfw_nat_ready = 0;
LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
LIST_REMOVE(ptr, _next);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
+ free_nat_instance(ptr);
}
flush_nat_ptrs(chain, -1 /* flush all */);
- V_ipfw_nat_ready = 0;
IPFW_WUNLOCK(chain);
return (0);
}
@@ -632,6 +1172,7 @@ ipfw_nat_init(void)
ipfw_nat_del_ptr = ipfw_nat_del;
ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
ipfw_nat_get_log_ptr = ipfw_nat_get_log;
+ IPFW_ADD_SOPT_HANDLER(1, scodes);
ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
NULL, EVENTHANDLER_PRI_ANY);
@@ -643,6 +1184,7 @@ ipfw_nat_destroy(void)
EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
/* deregister ipfw_nat */
+ IPFW_DEL_SOPT_HANDLER(1, scodes);
ipfw_nat_ptr = NULL;
lookup_nat_ptr = NULL;
ipfw_nat_cfg_ptr = NULL;
@@ -677,14 +1219,14 @@ static moduledata_t ipfw_nat_mod = {
};
/* Define startup order. */
-#define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
-#define IPFW_NAT_MODEVENT_ORDER (SI_ORDER_ANY - 128)
+#define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL
+#define IPFW_NAT_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
#define IPFW_NAT_MODULE_ORDER (IPFW_NAT_MODEVENT_ORDER + 1)
#define IPFW_NAT_VNET_ORDER (IPFW_NAT_MODEVENT_ORDER + 2)
DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
-MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
+MODULE_DEPEND(ipfw_nat, ipfw, 3, 3, 3);
MODULE_VERSION(ipfw_nat, 1);
SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c b/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
index d2e1b448..59c13aa5 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/route.h>
+#include <net/ethernet.h>
#include <net/pfil.h>
#include <net/vnet.h>
@@ -60,6 +61,7 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
#endif
#include <netgraph/ng_ipfw.h>
@@ -76,26 +78,39 @@ static VNET_DEFINE(int, fw6_enable) = 1;
#define V_fw6_enable VNET(fw6_enable)
#endif
+static VNET_DEFINE(int, fwlink_enable) = 0;
+#define V_fwlink_enable VNET(fwlink_enable)
+
int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
/* Forward declarations. */
static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
+int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
+int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
#ifdef SYSCTL_NODE
SYSBEGIN(f1)
SYSCTL_DECL(_net_inet_ip_fw);
-SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0,
- ipfw_chg_hook, "I", "Enable ipfw");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+ &VNET_NAME(fw_enable), 0, ipfw_chg_hook, "I", "Enable ipfw");
#ifdef INET6
SYSCTL_DECL(_net_inet6_ip6_fw);
-SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0,
- ipfw_chg_hook, "I", "Enable ipfw+6");
+SYSCTL_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+ &VNET_NAME(fw6_enable), 0, ipfw_chg_hook, "I", "Enable ipfw+6");
#endif /* INET6 */
+SYSCTL_DECL(_net_link_ether);
+SYSCTL_PROC(_net_link_ether, OID_AUTO, ipfw,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+ &VNET_NAME(fwlink_enable), 0, ipfw_chg_hook, "I",
+ "Pass ether pkts through firewall");
+
SYSEND
#endif /* SYSCTL_NODE */
@@ -106,7 +121,7 @@ SYSEND
* The packet may be consumed.
*/
int
-ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
struct ip_fw_args args;
@@ -114,10 +129,6 @@ ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
int ipfw;
int ret;
- /* all the processing now uses ip_len in net format */
- if (mtod(*m0, struct ip *)->ip_v == 4)
- SET_NET_IPLEN(mtod(*m0, struct ip *));
-
/* convert dir to IPFW values */
dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
bzero(&args, sizeof(args));
@@ -131,11 +142,8 @@ again:
if (tag != NULL) {
args.rule = *((struct ipfw_rule_ref *)(tag+1));
m_tag_delete(*m0, tag);
- if (args.rule.info & IPFW_ONEPASS) {
- if (mtod(*m0, struct ip *)->ip_v == 4)
- SET_HOST_IPLEN(mtod(*m0, struct ip *));
+ if (args.rule.info & IPFW_ONEPASS)
return (0);
- }
}
args.m = *m0;
@@ -192,8 +200,20 @@ again:
}
#ifdef INET6
if (args.next_hop6 != NULL) {
- bcopy(args.next_hop6, (fwd_tag+1), len);
- if (in6_localip(&args.next_hop6->sin6_addr))
+ struct sockaddr_in6 *sa6;
+
+ sa6 = (struct sockaddr_in6 *)(fwd_tag + 1);
+ bcopy(args.next_hop6, sa6, len);
+ /*
+ * If nh6 address is link-local we should convert
+ * it to kernel internal form before doing any
+ * comparisons.
+ */
+ if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) {
+ ret = EACCES;
+ break;
+ }
+ if (in6_localip(&sa6->sin6_addr))
(*m0)->m_flags |= M_FASTFWD_OURS;
(*m0)->m_flags |= M_IP6_NEXTHOP;
}
@@ -279,8 +299,112 @@ again:
FREE_PKT(*m0);
*m0 = NULL;
}
- if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
- SET_HOST_IPLEN(mtod(*m0, struct ip *));
+
+ return ret;
+}
+
+/*
+ * ipfw processing for ethernet packets (in and out).
+ * Inteface is NULL from ether_demux, and ifp from
+ * ether_output_frame.
+ */
+int
+ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir,
+ struct inpcb *inp)
+{
+ struct ether_header *eh;
+ struct ether_header save_eh;
+ struct mbuf *m;
+ int i, ret;
+ struct ip_fw_args args;
+ struct m_tag *mtag;
+
+ /* fetch start point from rule, if any */
+ mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
+ if (mtag == NULL) {
+ args.rule.slot = 0;
+ } else {
+ /* dummynet packet, already partially processed */
+ struct ipfw_rule_ref *r;
+
+ /* XXX can we free it after use ? */
+ mtag->m_tag_id = PACKET_TAG_NONE;
+ r = (struct ipfw_rule_ref *)(mtag + 1);
+ if (r->info & IPFW_ONEPASS)
+ return (0);
+ args.rule = *r;
+ }
+
+ /* I need some amt of data to be contiguous */
+ m = *m0;
+ i = min(m->m_pkthdr.len, max_protohdr);
+ if (m->m_len < i) {
+ m = m_pullup(m, i);
+ if (m == NULL) {
+ *m0 = m;
+ return (0);
+ }
+ }
+ eh = mtod(m, struct ether_header *);
+ save_eh = *eh; /* save copy for restore below */
+ m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
+
+ args.m = m; /* the packet we are looking at */
+ args.oif = dir == PFIL_OUT ? dst: NULL; /* destination, if any */
+ args.next_hop = NULL; /* we do not support forward yet */
+ args.next_hop6 = NULL; /* we do not support forward yet */
+ args.eh = &save_eh; /* MAC header for bridged/MAC packets */
+ args.inp = NULL; /* used by ipfw uid/gid/jail rules */
+ i = ipfw_chk(&args);
+ m = args.m;
+ if (m != NULL) {
+ /*
+ * Restore Ethernet header, as needed, in case the
+ * mbuf chain was replaced by ipfw.
+ */
+ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+ if (m == NULL) {
+ *m0 = NULL;
+ return (0);
+ }
+ if (eh != mtod(m, struct ether_header *))
+ bcopy(&save_eh, mtod(m, struct ether_header *),
+ ETHER_HDR_LEN);
+ }
+ *m0 = m;
+
+ ret = 0;
+ /* Check result of ipfw_chk() */
+ switch (i) {
+ case IP_FW_PASS:
+ break;
+
+ case IP_FW_DENY:
+ ret = EACCES;
+ break; /* i.e. drop */
+
+ case IP_FW_DUMMYNET:
+ ret = EACCES;
+ int dir;
+
+ if (ip_dn_io_ptr == NULL)
+ break; /* i.e. drop */
+
+ *m0 = NULL;
+ dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
+ ip_dn_io_ptr(&m, dir, &args);
+ return 0;
+
+ default:
+ KASSERT(0, ("%s: unknown retval", __func__));
+ }
+
+ if (ret != 0) {
+ if (*m0)
+ FREE_PKT(*m0);
+ *m0 = NULL;
+ }
+
return ret;
}
@@ -303,7 +427,7 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
clone = *m0; /* use the original mbuf */
*m0 = NULL;
} else {
- clone = m_dup(*m0, M_DONTWAIT);
+ clone = m_dup(*m0, M_NOWAIT);
/* If we cannot duplicate the mbuf, we sacrifice the divert
* chain and continue with the tee-ed packet.
*/
@@ -325,7 +449,6 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
int hlen;
struct mbuf *reass;
- SET_HOST_IPLEN(ip); /* ip_reass wants host order */
reass = ip_reass(clone); /* Reassemble packet. */
if (reass == NULL)
return 0; /* not an error */
@@ -336,7 +459,6 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
*/
ip = mtod(reass, struct ip *);
hlen = ip->ip_hl << 2;
- SET_NET_IPLEN(ip);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);
@@ -385,13 +507,16 @@ static int
ipfw_hook(int onoff, int pf)
{
struct pfil_head *pfh;
+ pfil_func_t hook_func;
pfh = pfil_head_get(PFIL_TYPE_AF, pf);
if (pfh == NULL)
return ENOENT;
+ hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet;
+
(void) (onoff ? pfil_add_hook : pfil_remove_hook)
- (ipfw_check_hook, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
+ (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
return 0;
}
@@ -415,51 +540,50 @@ ipfw_attach_hooks(int arg)
printf("ipfw6_hook() error\n");
}
#endif
+ if (arg == 0) /* detach */
+ ipfw_hook(0, AF_LINK);
+ else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) {
+ error = ENOENT;
+ printf("ipfw_link_hook() error\n");
+ }
return error;
}
int
ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
{
- int enable;
- int oldenable;
+ int newval;
int error;
int af;
- if (arg1 == &VNET_NAME(fw_enable)) {
- enable = V_fw_enable;
+ if (arg1 == &V_fw_enable)
af = AF_INET;
- }
#ifdef INET6
- else if (arg1 == &VNET_NAME(fw6_enable)) {
- enable = V_fw6_enable;
+ else if (arg1 == &V_fw6_enable)
af = AF_INET6;
- }
#endif
+ else if (arg1 == &V_fwlink_enable)
+ af = AF_LINK;
else
return (EINVAL);
- oldenable = enable;
-
- error = sysctl_handle_int(oidp, &enable, 0, req);
+ newval = *(int *)arg1;
+ /* Handle sysctl change */
+ error = sysctl_handle_int(oidp, &newval, 0, req);
if (error)
return (error);
- enable = (enable) ? 1 : 0;
+ /* Formalize new value */
+ newval = (newval) ? 1 : 0;
- if (enable == oldenable)
+ if (*(int *)arg1 == newval)
return (0);
- error = ipfw_hook(enable, af);
+ error = ipfw_hook(newval, af);
if (error)
return (error);
- if (af == AF_INET)
- V_fw_enable = enable;
-#ifdef INET6
- else if (af == AF_INET6)
- V_fw6_enable = enable;
-#endif
+ *(int *)arg1 = newval;
return (0);
}
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_private.h b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
index ceabf88d..3b483625 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_private.h
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_private.h
@@ -66,14 +66,12 @@ enum {
*/
struct _ip6dn_args {
struct ip6_pktopts *opt_or;
- struct route_in6 ro_or;
int flags_or;
struct ip6_moptions *im6o_or;
struct ifnet *origifp_or;
struct ifnet *ifp_or;
struct sockaddr_in6 dst_or;
u_long mtu_or;
- struct route_in6 ro_pmtu_or;
};
@@ -104,7 +102,10 @@ struct ip_fw_args {
struct inpcb *inp;
struct _ip6dn_args dummypar; /* dummynet->ip6_output */
- struct sockaddr_in hopstore; /* store here if cannot use a pointer */
+ union { /* store here if cannot use a pointer */
+ struct sockaddr_in hopstore;
+ struct sockaddr_in6 hopstore6;
+ };
};
MALLOC_DECLARE(M_IPFW);
@@ -152,10 +153,13 @@ void ipfw_nat_destroy(void);
/* In ip_fw_log.c */
struct ip;
-void ipfw_log_bpf(int);
-void ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
- struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
- struct ip *ip);
+struct ip_fw_chain;
+void ipfw_bpf_init(int);
+void ipfw_bpf_uninit(int);
+void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
+void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
+ struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif,
+ u_short offset, uint32_t tablearg, struct ip *ip);
VNET_DECLARE(u_int64_t, norule_counter);
#define V_norule_counter VNET(norule_counter)
VNET_DECLARE(int, verbose_limit);
@@ -176,22 +180,26 @@ enum { /* result for matching dynamic rules */
* Eventually we may implement it with a callback on the function.
*/
struct ip_fw_chain;
-void ipfw_expire_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int);
+struct sockopt_data;
+int ipfw_is_dyn_rule(struct ip_fw *rule);
+void ipfw_expire_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *);
void ipfw_dyn_unlock(ipfw_dyn_rule *q);
struct tcphdr;
struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
u_int32_t, u_int32_t, int);
-int ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
- struct ip_fw_args *args, uint32_t tablearg);
+int ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule,
+ ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg);
ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt,
- int *match_direction, struct tcphdr *tcp);
+ int *match_direction, struct tcphdr *tcp, uint16_t kidx);
void ipfw_remove_dyn_children(struct ip_fw *rule);
void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep);
+int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd);
void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */
void ipfw_dyn_uninit(int); /* per-vnet deinitialization */
int ipfw_dyn_len(void);
+int ipfw_dyn_get_count(void);
/* common variables */
VNET_DECLARE(int, fw_one_pass);
@@ -203,6 +211,9 @@ VNET_DECLARE(int, fw_verbose);
VNET_DECLARE(struct ip_fw_chain, layer3_chain);
#define V_layer3_chain VNET(layer3_chain)
+VNET_DECLARE(int, ipfw_vnet_ready);
+#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
+
VNET_DECLARE(u_int32_t, set_disable);
#define V_set_disable VNET(set_disable)
@@ -212,23 +223,66 @@ VNET_DECLARE(int, autoinc_step);
VNET_DECLARE(unsigned int, fw_tables_max);
#define V_fw_tables_max VNET(fw_tables_max)
+VNET_DECLARE(unsigned int, fw_tables_sets);
+#define V_fw_tables_sets VNET(fw_tables_sets)
+
+struct tables_config;
+
+#ifdef _KERNEL
+/*
+ * Here we have the structure representing an ipfw rule.
+ *
+ * It starts with a general area
+ * followed by an array of one or more instructions, which the code
+ * accesses as an array of 32-bit values.
+ *
+ * Given a rule pointer r:
+ *
+ * r->cmd is the start of the first instruction.
+ * ACTION_PTR(r) is the start of the first action (things to do
+ * once a rule matched).
+ */
+
+struct ip_fw {
+ uint16_t act_ofs; /* offset of action in 32-bit units */
+ uint16_t cmd_len; /* # of 32-bit words in cmd */
+ uint16_t rulenum; /* rule number */
+ uint8_t set; /* rule set (0..31) */
+ uint8_t flags; /* currently unused */
+ counter_u64_t cntr; /* Pointer to rule counters */
+ uint32_t timestamp; /* tv_sec of last match */
+ uint32_t id; /* rule id */
+ uint32_t cached_id; /* used by jump_fast */
+ uint32_t cached_pos; /* used by jump_fast */
+
+ ipfw_insn cmd[1]; /* storage for commands */
+};
+
+#define IPFW_RULE_CNTR_SIZE (2 * sizeof(uint64_t))
+
+#endif
+
struct ip_fw_chain {
struct ip_fw **map; /* array of rule ptrs to ease lookup */
uint32_t id; /* ruleset id */
int n_rules; /* number of static rules */
- LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
- struct radix_node_head **tables; /* IPv4 tables */
- struct radix_node_head **xtables; /* extended tables */
- uint8_t *tabletype; /* Array of table types */
+ void *tablestate; /* runtime table info */
+ void *valuestate; /* runtime table value info */
+ int *idxmap; /* skipto array of rules */
+ void **srvstate; /* runtime service mappings */
#if defined( __linux__ ) || defined( _WIN32 )
spinlock_t rwmtx;
#else
- struct rwlock rwmtx;
+ struct rmlock rwmtx;
#endif
- int static_len; /* total len of static rules */
+ int static_len; /* total len of static rules (v0) */
uint32_t gencnt; /* NAT generation count */
- struct ip_fw *reap; /* list of rules to reap */
+ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
struct ip_fw *default_rule;
+ struct tables_config *tblcfg; /* tables module data */
+ void *ifcfg; /* interface module data */
+ int *idxmap_back; /* standby skipto array of rules */
+ struct namedobj_instance *srvmap; /* cfg name->number mappings */
#if defined( __linux__ ) || defined( _WIN32 )
spinlock_t uh_lock;
#else
@@ -236,13 +290,81 @@ struct ip_fw_chain {
#endif
};
+/* 64-byte structure representing multi-field table value */
+struct table_value {
+ uint32_t tag; /* O_TAG/O_TAGGED */
+ uint32_t pipe; /* O_PIPE/O_QUEUE */
+ uint16_t divert; /* O_DIVERT/O_TEE */
+ uint16_t skipto; /* skipto, CALLRET */
+ uint32_t netgraph; /* O_NETGRAPH/O_NGTEE */
+ uint32_t fib; /* O_SETFIB */
+ uint32_t nat; /* O_NAT */
+ uint32_t nh4;
+ uint8_t dscp;
+ uint8_t spare0;
+ uint16_t spare1;
+ /* -- 32 bytes -- */
+ struct in6_addr nh6;
+ uint32_t limit; /* O_LIMIT */
+ uint32_t zoneid; /* scope zone id for nh6 */
+ uint64_t refcnt; /* Number of references */
+};
+
+
+struct named_object {
+ TAILQ_ENTRY(named_object) nn_next; /* namehash */
+ TAILQ_ENTRY(named_object) nv_next; /* valuehash */
+ char *name; /* object name */
+ uint16_t etlv; /* Export TLV id */
+ uint8_t subtype;/* object subtype within class */
+ uint8_t set; /* set object belongs to */
+ uint16_t kidx; /* object kernel index */
+ uint16_t spare;
+ uint32_t ocnt; /* object counter for internal use */
+ uint32_t refcnt; /* number of references */
+};
+TAILQ_HEAD(namedobjects_head, named_object);
+
struct sockopt; /* used by tcp_var.h */
+struct sockopt_data {
+ caddr_t kbuf; /* allocated buffer */
+ size_t ksize; /* given buffer size */
+ size_t koff; /* data already used */
+ size_t kavail; /* number of bytes available */
+ size_t ktotal; /* total bytes pushed */
+ struct sockopt *sopt; /* socket data */
+ caddr_t sopt_val; /* sopt user buffer */
+ size_t valsize; /* original data size */
+};
+
+struct ipfw_ifc;
+
+typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata,
+ uint16_t ifindex);
+
+struct ipfw_iface {
+ struct named_object no;
+ char ifname[64];
+ int resolved;
+ uint16_t ifindex;
+ uint16_t spare;
+ uint64_t gencnt;
+ TAILQ_HEAD(, ipfw_ifc) consumers;
+};
+
+struct ipfw_ifc {
+ TAILQ_ENTRY(ipfw_ifc) next;
+ struct ipfw_iface *iface;
+ ipfw_ifc_cb *cb;
+ void *cbdata;
+};
/* Macro for working with various counters */
#define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \
- (_cntr)->pcnt++; \
- (_cntr)->bcnt += _bytes; \
- (_cntr)->timestamp = time_uptime; \
+ counter_u64_add((_cntr)->cntr, 1); \
+ counter_u64_add((_cntr)->cntr + 1, _bytes); \
+ if ((_cntr)->timestamp != time_uptime) \
+ (_cntr)->timestamp = time_uptime; \
} while (0)
#define IPFW_INC_DYN_COUNTER(_cntr, _bytes) do { \
@@ -251,8 +373,8 @@ struct sockopt; /* used by tcp_var.h */
} while (0)
#define IPFW_ZERO_RULE_COUNTER(_cntr) do { \
- (_cntr)->pcnt = 0; \
- (_cntr)->bcnt = 0; \
+ counter_u64_zero((_cntr)->cntr); \
+ counter_u64_zero((_cntr)->cntr + 1); \
(_cntr)->timestamp = 0; \
} while (0)
@@ -261,12 +383,15 @@ struct sockopt; /* used by tcp_var.h */
(_cntr)->bcnt = 0; \
} while (0)
-#define IP_FW_ARG_TABLEARG(a) ((a) == IP_FW_TABLEARG) ? tablearg : (a)
+#define TARG_VAL(ch, k, f) ((struct table_value *)((ch)->valuestate))[k].f
+#define IP_FW_ARG_TABLEARG(ch, a, f) \
+ (((a) == IP_FW_TARG) ? TARG_VAL(ch, tablearg, f) : (a))
/*
* The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
* so the variable and the macros must be here.
*/
+#if defined( __linux__ ) || defined( _WIN32 )
#define IPFW_LOCK_INIT(_chain) do { \
rw_init(&(_chain)->rwmtx, "IPFW static rules"); \
rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \
@@ -280,49 +405,354 @@ struct sockopt; /* used by tcp_var.h */
#define IPFW_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_RLOCKED)
#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
-#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
-#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
-#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
-#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+#define IPFW_RLOCK_TRACKER
+#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
+#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
+#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p)
+#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p)
+#else /* FreeBSD */
+#define IPFW_LOCK_INIT(_chain) do { \
+ rm_init(&(_chain)->rwmtx, "IPFW static rules"); \
+ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \
+ } while (0)
+
+#define IPFW_LOCK_DESTROY(_chain) do { \
+ rm_destroy(&(_chain)->rwmtx); \
+ rw_destroy(&(_chain)->uh_lock); \
+ } while (0)
+
+#define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED)
+#define IPFW_WLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_WLOCKED)
+
+#define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker
+#define IPFW_RLOCK(p) rm_rlock(&(p)->rwmtx, &_tracker)
+#define IPFW_RUNLOCK(p) rm_runlock(&(p)->rwmtx, &_tracker)
+#define IPFW_WLOCK(p) rm_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rm_wunlock(&(p)->rwmtx)
+#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p)
+#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p)
+#endif
#define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
#define IPFW_UH_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_WLOCKED)
+#define IPFW_UH_UNLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_UNLOCKED)
#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
+struct obj_idx {
+ uint16_t uidx; /* internal index supplied by userland */
+ uint16_t kidx; /* kernel object index */
+ uint16_t off; /* tlv offset from rule end in 4-byte words */
+ uint8_t spare;
+ uint8_t type; /* object type within its category */
+};
+
+struct rule_check_info {
+ uint16_t flags; /* rule-specific check flags */
+ uint16_t object_opcodes; /* num of opcodes referencing objects */
+ uint16_t urule_numoff; /* offset of rulenum in bytes */
+ uint8_t version; /* rule version */
+ uint8_t spare;
+ ipfw_obj_ctlv *ctlv; /* name TLV containter */
+ struct ip_fw *krule; /* resulting rule pointer */
+ caddr_t urule; /* original rule pointer */
+ struct obj_idx obuf[8]; /* table references storage */
+};
+
+/* Legacy interface support */
+/*
+ * FreeBSD 8 export rule format
+ */
+struct ip_fw_rule0 {
+ struct ip_fw *x_next; /* linked list of rules */
+ struct ip_fw *next_rule; /* ptr to next [skipto] rule */
+ /* 'next_rule' is used to pass up 'set_disable' status */
+
+ uint16_t act_ofs; /* offset of action in 32-bit units */
+ uint16_t cmd_len; /* # of 32-bit words in cmd */
+ uint16_t rulenum; /* rule number */
+ uint8_t set; /* rule set (0..31) */
+ uint8_t _pad; /* padding */
+ uint32_t id; /* rule id */
+
+ /* These fields are present in all rules. */
+ uint64_t pcnt; /* Packet counter */
+ uint64_t bcnt; /* Byte counter */
+ uint32_t timestamp; /* tv_sec of last match */
+
+ ipfw_insn cmd[1]; /* storage for commands */
+};
+
+struct ip_fw_bcounter0 {
+ uint64_t pcnt; /* Packet counter */
+ uint64_t bcnt; /* Byte counter */
+ uint32_t timestamp; /* tv_sec of last match */
+};
+
+/* Kernel rule length */
+/*
+ * RULE _K_ SIZE _V_ ->
+ * get kernel size from userland rool version _V_.
+ * RULE _U_ SIZE _V_ ->
+ * get user size version _V_ from kernel rule
+ * RULESIZE _V_ ->
+ * get user size rule length
+ */
+/* FreeBSD8 <> current kernel format */
+#define RULEUSIZE0(r) (sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4)
+#define RULEKSIZE0(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
+/* FreeBSD11 <> current kernel format */
+#define RULEUSIZE1(r) (roundup2(sizeof(struct ip_fw_rule) + \
+ (r)->cmd_len * 4 - 4, 8))
+#define RULEKSIZE1(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
+
+/*
+ * Tables/Objects index rewriting code
+ */
+
+/* Default and maximum number of ipfw tables/objects. */
+#define IPFW_TABLES_MAX 65536
+#define IPFW_TABLES_DEFAULT 128
+#define IPFW_OBJECTS_MAX 65536
+#define IPFW_OBJECTS_DEFAULT 1024
+
+#define CHAIN_TO_SRV(ch) ((ch)->srvmap)
+#define SRV_OBJECT(ch, idx) ((ch)->srvstate[(idx)])
+
+struct tid_info {
+ uint32_t set; /* table set */
+ uint16_t uidx; /* table index */
+ uint8_t type; /* table type */
+ uint8_t atype;
+ uint8_t spare;
+ int tlen; /* Total TLV size block */
+ void *tlvs; /* Pointer to first TLV */
+};
+
+/*
+ * Classifier callback. Checks if @cmd opcode contains kernel object reference.
+ * If true, returns its index and type.
+ * Returns 0 if match is found, 1 overwise.
+ */
+typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
+/*
+ * Updater callback. Sets kernel object reference index to @puidx
+ */
+typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx);
+/*
+ * Finder callback. Tries to find named object by name (specified via @ti).
+ * Stores found named object pointer in @pno.
+ * If object was not found, NULL is stored.
+ *
+ * Return 0 if input data was valid.
+ */
+typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch,
+ struct tid_info *ti, struct named_object **pno);
+/*
+ * Another finder callback. Tries to findex named object by kernel index.
+ *
+ * Returns pointer to named object or NULL.
+ */
+typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch,
+ uint16_t kidx);
+/*
+ * Object creator callback. Tries to create object specified by @ti.
+ * Stores newly-allocated object index in @pkidx.
+ *
+ * Returns 0 on success.
+ */
+typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti,
+ uint16_t *pkidx);
+/*
+ * Object destroy callback. Intended to free resources allocated by
+ * create_object callback.
+ */
+typedef void (ipfw_obj_destroy_cb)(struct ip_fw_chain *ch,
+ struct named_object *no);
+/*
+ * Sets handler callback. Handles moving and swaping set of named object.
+ * SWAP_ALL moves all named objects from set `set' to `new_set' and vise versa;
+ * TEST_ALL checks that there aren't any named object with conflicting names;
+ * MOVE_ALL moves all named objects from set `set' to `new_set';
+ * COUNT_ONE used to count number of references used by object with kidx `set';
+ * TEST_ONE checks that named object with kidx `set' can be moved to `new_set`;
+ * MOVE_ONE moves named object with kidx `set' to set `new_set'.
+ */
+enum ipfw_sets_cmd {
+ SWAP_ALL = 0, TEST_ALL, MOVE_ALL, COUNT_ONE, TEST_ONE, MOVE_ONE
+};
+typedef int (ipfw_obj_sets_cb)(struct ip_fw_chain *ch,
+ uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
+
+
+struct opcode_obj_rewrite {
+ uint32_t opcode; /* Opcode to act upon */
+ uint32_t etlv; /* Relevant export TLV id */
+ ipfw_obj_rw_cl *classifier; /* Check if rewrite is needed */
+ ipfw_obj_rw_upd *update; /* update cmd with new value */
+ ipfw_obj_fname_cb *find_byname; /* Find named object by name */
+ ipfw_obj_fidx_cb *find_bykidx; /* Find named object by kidx */
+ ipfw_obj_create_cb *create_object; /* Create named object */
+ ipfw_obj_destroy_cb *destroy_object;/* Destroy named object */
+ ipfw_obj_sets_cb *manage_sets; /* Swap or move sets */
+};
+
+#define IPFW_ADD_OBJ_REWRITER(f, c) do { \
+ if ((f) != 0) \
+ ipfw_add_obj_rewriter(c, \
+ sizeof(c) / sizeof(c[0])); \
+ } while(0)
+#define IPFW_DEL_OBJ_REWRITER(l, c) do { \
+ if ((l) != 0) \
+ ipfw_del_obj_rewriter(c, \
+ sizeof(c) / sizeof(c[0])); \
+ } while(0)
+
+/* In ip_fw_iface.c */
+int ipfw_iface_init(void);
+void ipfw_iface_destroy(void);
+void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch);
+int ipfw_iface_ref(struct ip_fw_chain *ch, char *name,
+ struct ipfw_ifc *ic);
+void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
+void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
+void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic);
+
/* In ip_fw_sockopt.c */
+void ipfw_init_skipto_cache(struct ip_fw_chain *chain);
+void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain);
int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
-int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
-int ipfw_ctl(struct sockopt *sopt);
+int ipfw_ctl3(struct sockopt *sopt);
int ipfw_chk(struct ip_fw_args *args);
+void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
+ struct ip_fw *rule);
void ipfw_reap_rules(struct ip_fw *head);
-
-/* In ip_fw_pfil */
-int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
- struct inpcb *inp);
+void ipfw_init_counters(void);
+void ipfw_destroy_counters(void);
+struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize);
+int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt);
+
+typedef int (sopt_handler_f)(struct ip_fw_chain *ch,
+ ip_fw3_opheader *op3, struct sockopt_data *sd);
+struct ipfw_sopt_handler {
+ uint16_t opcode;
+ uint8_t version;
+ uint8_t dir;
+ sopt_handler_f *handler;
+ uint64_t refcnt;
+};
+#define HDIR_SET 0x01 /* Handler is used to set some data */
+#define HDIR_GET 0x02 /* Handler is used to retrieve data */
+#define HDIR_BOTH HDIR_GET|HDIR_SET
+
+void ipfw_init_sopt_handler(void);
+void ipfw_destroy_sopt_handler(void);
+void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
+int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count);
+caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed);
+caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed);
+#define IPFW_ADD_SOPT_HANDLER(f, c) do { \
+ if ((f) != 0) \
+ ipfw_add_sopt_handler(c, \
+ sizeof(c) / sizeof(c[0])); \
+ } while(0)
+#define IPFW_DEL_SOPT_HANDLER(l, c) do { \
+ if ((l) != 0) \
+ ipfw_del_sopt_handler(c, \
+ sizeof(c) / sizeof(c[0])); \
+ } while(0)
+
+struct namedobj_instance;
+typedef int (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *,
+ void *arg);
+typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, const void *key,
+ uint32_t kopt);
+typedef int (objhash_cmp_f)(struct named_object *no, const void *key,
+ uint32_t kopt);
+struct namedobj_instance *ipfw_objhash_create(uint32_t items);
+void ipfw_objhash_destroy(struct namedobj_instance *);
+void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks);
+void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni,
+ void **idx, int *blocks);
+void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni,
+ void **idx, int *blocks);
+void ipfw_objhash_bitmap_free(void *idx, int blocks);
+void ipfw_objhash_set_hashf(struct namedobj_instance *ni, objhash_hash_f *f);
+struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni,
+ uint32_t set, char *name);
+struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni,
+ uint32_t set, uint32_t type, const char *name);
+struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni,
+ uint16_t idx);
+int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
+ struct named_object *b);
+void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no);
+void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no);
+uint32_t ipfw_objhash_count(struct namedobj_instance *ni);
+uint32_t ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type);
+int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f,
+ void *arg);
+int ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
+ void *arg, uint16_t type);
+int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx);
+int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx);
+void ipfw_objhash_set_funcs(struct namedobj_instance *ni,
+ objhash_hash_f *hash_f, objhash_cmp_f *cmp_f);
+int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
+ uint32_t etlv, struct named_object **pno);
+void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv);
+ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx,
+ uint32_t etlv);
+void ipfw_init_obj_rewriter(void);
+void ipfw_destroy_obj_rewriter(void);
+void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
+int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
+
+int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
+ struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti);
+void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx);
+int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx);
+void ipfw_init_srv(struct ip_fw_chain *ch);
+void ipfw_destroy_srv(struct ip_fw_chain *ch);
+int ipfw_check_object_name_generic(const char *name);
+int ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
+ uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd);
+
+/* In ip_fw_eaction.c */
+typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+int ipfw_eaction_init(struct ip_fw_chain *ch, int first);
+void ipfw_eaction_uninit(struct ip_fw_chain *ch, int last);
+
+uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler,
+ const char *name);
+int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id);
+int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
/* In ip_fw_table.c */
-struct radix_node;
+struct table_info;
+
+typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val);
+
int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
uint32_t *val);
-int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint32_t *val, int type);
-int ipfw_init_tables(struct ip_fw_chain *ch);
-void ipfw_destroy_tables(struct ip_fw_chain *ch);
-int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl);
-int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value);
-int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type);
-int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
-int ipfw_dump_table_entry(struct radix_node *rn, void *arg);
-int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl);
-int ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
-int ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl);
+int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl,
+ uint16_t plen, void *paddr, uint32_t *val);
+struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
+ uint16_t kidx);
+int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
+void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
+int ipfw_init_tables(struct ip_fw_chain *ch, int first);
int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
+int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);
+void ipfw_destroy_tables(struct ip_fw_chain *ch, int last);
/* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
@@ -341,5 +771,22 @@ extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
+/* Helper functions for IP checksum adjustment */
+static __inline uint16_t
+cksum_add(uint16_t sum, uint16_t a)
+{
+ uint16_t res;
+
+ res = sum + a;
+ return (res + (res < a));
+}
+
+static __inline uint16_t
+cksum_adjust(uint16_t oldsum, uint16_t old, uint16_t new)
+{
+
+ return (~cksum_add(cksum_add(~oldsum, ~old), new));
+}
+
#endif /* _KERNEL */
#endif /* _IPFW2_PRIVATE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
index 95cd8c81..468e4ad4 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -2,6 +2,8 @@
/*-
* Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
*
* Supported by: Valeria Paoli
*
@@ -31,8 +33,8 @@
__FBSDID("$FreeBSD$");
/*
- * Sockopt support for ipfw. The routines here implement
- * the upper half of the ipfw code.
+ * Control socket and rule management routines for ipfw.
+ * Control is currently implemented via IP_FW3 setsockopt() code.
*/
#include <rtems/bsd/local/opt_ipfw.h>
@@ -51,30 +53,174 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/fnv_hash.h>
#include <net/if.h>
#include <net/route.h>
#include <net/vnet.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
#include <netinet/in.h>
#include <netinet/ip_var.h> /* hooks */
#include <netinet/ip_fw.h>
#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
#ifdef MAC
#include <security/mac/mac_framework.h>
#endif
+static int ipfw_ctl(struct sockopt *sopt);
+static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len,
+ struct rule_check_info *ci);
+static int check_ipfw_rule1(struct ip_fw_rule *rule, int size,
+ struct rule_check_info *ci);
+static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size,
+ struct rule_check_info *ci);
+static int rewrite_rule_uidx(struct ip_fw_chain *chain,
+ struct rule_check_info *ci);
+
+#define NAMEDOBJ_HASH_SIZE 32
+
+struct namedobj_instance {
+ struct namedobjects_head *names;
+ struct namedobjects_head *values;
+ uint32_t nn_size; /* names hash size */
+ uint32_t nv_size; /* number hash size */
+ u_long *idx_mask; /* used items bitmask */
+ uint32_t max_blocks; /* number of "long" blocks in bitmask */
+ uint32_t count; /* number of items */
+ uint16_t free_off[IPFW_MAX_SETS]; /* first possible free offset */
+ objhash_hash_f *hash_f;
+ objhash_cmp_f *cmp_f;
+};
+#define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */
+
+static uint32_t objhash_hash_name(struct namedobj_instance *ni,
+ const void *key, uint32_t kopt);
+static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val);
+static int objhash_cmp_name(struct named_object *no, const void *name,
+ uint32_t set);
+
MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
+static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+
+/* ctl3 handler data */
+struct mtx ctl3_lock;
+#define CTL3_LOCK_INIT() mtx_init(&ctl3_lock, "ctl3_lock", NULL, MTX_DEF)
+#define CTL3_LOCK_DESTROY() mtx_destroy(&ctl3_lock)
+#define CTL3_LOCK() mtx_lock(&ctl3_lock)
+#define CTL3_UNLOCK() mtx_unlock(&ctl3_lock)
+
+static struct ipfw_sopt_handler *ctl3_handlers;
+static size_t ctl3_hsize;
+static uint64_t ctl3_refct, ctl3_gencnt;
+#define CTL3_SMALLBUF 4096 /* small page-size write buffer */
+#define CTL3_LARGEBUF 16 * 1024 * 1024 /* handle large rulesets */
+
+static int ipfw_flush_sopt_data(struct sockopt_data *sd);
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_XGET, 0, HDIR_GET, dump_config },
+ { IP_FW_XADD, 0, HDIR_BOTH, add_rules },
+ { IP_FW_XDEL, 0, HDIR_BOTH, del_rules },
+ { IP_FW_XZERO, 0, HDIR_SET, clear_rules },
+ { IP_FW_XRESETLOG, 0, HDIR_SET, clear_rules },
+ { IP_FW_XMOVE, 0, HDIR_SET, move_rules },
+ { IP_FW_SET_SWAP, 0, HDIR_SET, manage_sets },
+ { IP_FW_SET_MOVE, 0, HDIR_SET, manage_sets },
+ { IP_FW_SET_ENABLE, 0, HDIR_SET, manage_sets },
+ { IP_FW_DUMP_SOPTCODES, 0, HDIR_GET, dump_soptcodes },
+ { IP_FW_DUMP_SRVOBJECTS,0, HDIR_GET, dump_srvobjects },
+};
+
+static int
+set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule);
+static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd,
+ uint16_t *puidx, uint8_t *ptype);
+static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
+ uint32_t *bmask);
+static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
+ struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti);
+static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd,
+ struct tid_info *ti, struct obj_idx *pidx, int *unresolved);
+static void unref_rule_objects(struct ip_fw_chain *chain, struct ip_fw *rule);
+static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd,
+ struct obj_idx *oib, struct obj_idx *end);
+static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
+ struct sockopt_data *sd);
+
+/*
+ * Opcode object rewriter variables
+ */
+struct opcode_obj_rewrite *ctl3_rewriters;
+static size_t ctl3_rsize;
+
/*
- * static variables followed by global ones (none in this file)
+ * static variables followed by global ones
*/
+static VNET_DEFINE(uma_zone_t, ipfw_cntr_zone);
+#define V_ipfw_cntr_zone VNET(ipfw_cntr_zone)
+
+void
+ipfw_init_counters()
+{
+
+ V_ipfw_cntr_zone = uma_zcreate("IPFW counters",
+ IPFW_RULE_CNTR_SIZE, NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, UMA_ZONE_PCPU);
+}
+
+void
+ipfw_destroy_counters()
+{
+
+ uma_zdestroy(V_ipfw_cntr_zone);
+}
+
+struct ip_fw *
+ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize)
+{
+ struct ip_fw *rule;
+
+ rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO);
+ rule->cntr = uma_zalloc(V_ipfw_cntr_zone, M_WAITOK | M_ZERO);
+
+ return (rule);
+}
+
+static void
+free_rule(struct ip_fw *rule)
+{
+
+ uma_zfree(V_ipfw_cntr_zone, rule->cntr);
+ free(rule, M_IPFW);
+}
+
+
/*
* Find the smallest rule >= key, id.
* We could use bsearch but it is so simple that we code it directly
@@ -96,11 +242,109 @@ ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
lo = i + 1; /* continue from the next one */
else /* r->id >= id */
hi = i; /* this might be good */
- };
+ }
return hi;
}
/*
+ * Builds skipto cache on rule set @map.
+ */
+static void
+update_skipto_cache(struct ip_fw_chain *chain, struct ip_fw **map)
+{
+ int *smap, rulenum;
+ int i, mi;
+
+ IPFW_UH_WLOCK_ASSERT(chain);
+
+ mi = 0;
+ rulenum = map[mi]->rulenum;
+ smap = chain->idxmap_back;
+
+ if (smap == NULL)
+ return;
+
+ for (i = 0; i < 65536; i++) {
+ smap[i] = mi;
+ /* Use the same rule index until i < rulenum */
+ if (i != rulenum || i == 65535)
+ continue;
+ /* Find next rule with num > i */
+ rulenum = map[++mi]->rulenum;
+ while (rulenum == i)
+ rulenum = map[++mi]->rulenum;
+ }
+}
+
+/*
+ * Swaps prepared (backup) index with current one.
+ */
+static void
+swap_skipto_cache(struct ip_fw_chain *chain)
+{
+ int *map;
+
+ IPFW_UH_WLOCK_ASSERT(chain);
+ IPFW_WLOCK_ASSERT(chain);
+
+ map = chain->idxmap;
+ chain->idxmap = chain->idxmap_back;
+ chain->idxmap_back = map;
+}
+
+/*
+ * Allocate and initialize skipto cache.
+ */
+void
+ipfw_init_skipto_cache(struct ip_fw_chain *chain)
+{
+ int *idxmap, *idxmap_back;
+
+ idxmap = malloc(65536 * sizeof(uint32_t *), M_IPFW,
+ M_WAITOK | M_ZERO);
+ idxmap_back = malloc(65536 * sizeof(uint32_t *), M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ /*
+ * Note we may be called at any time after initialization,
+ * for example, on first skipto rule, so we need to
+ * provide valid chain->idxmap on return
+ */
+
+ IPFW_UH_WLOCK(chain);
+ if (chain->idxmap != NULL) {
+ IPFW_UH_WUNLOCK(chain);
+ free(idxmap, M_IPFW);
+ free(idxmap_back, M_IPFW);
+ return;
+ }
+
+ /* Set backup pointer first to permit building cache */
+ chain->idxmap_back = idxmap_back;
+ update_skipto_cache(chain, chain->map);
+ IPFW_WLOCK(chain);
+ /* It is now safe to set chain->idxmap ptr */
+ chain->idxmap = idxmap;
+ swap_skipto_cache(chain);
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+}
+
+/*
+ * Destroys skipto cache.
+ */
+void
+ipfw_destroy_skipto_cache(struct ip_fw_chain *chain)
+{
+
+ if (chain->idxmap != NULL)
+ free(chain->idxmap, M_IPFW);
+ if (chain->idxmap != NULL)
+ free(chain->idxmap_back, M_IPFW);
+}
+
+
+/*
* allocate a new map, returns the chain locked. extra is the number
* of entries to add or delete.
*/
@@ -110,11 +354,12 @@ get_map(struct ip_fw_chain *chain, int extra, int locked)
for (;;) {
struct ip_fw **map;
- int i;
+ int i, mflags;
+
+ mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK);
i = chain->n_rules + extra;
- map = malloc(i * sizeof(struct ip_fw *), M_IPFW,
- locked ? M_NOWAIT : M_WAITOK);
+ map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags);
if (map == NULL) {
printf("%s: cannot allocate map\n", __FUNCTION__);
return NULL;
@@ -143,69 +388,403 @@ swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
chain->n_rules = new_len;
old_map = chain->map;
chain->map = new_map;
+ swap_skipto_cache(chain);
IPFW_WUNLOCK(chain);
return old_map;
}
+
+static void
+export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr)
+{
+ struct timeval boottime;
+
+ cntr->size = sizeof(*cntr);
+
+ if (krule->cntr != NULL) {
+ cntr->pcnt = counter_u64_fetch(krule->cntr);
+ cntr->bcnt = counter_u64_fetch(krule->cntr + 1);
+ cntr->timestamp = krule->timestamp;
+ }
+ if (cntr->timestamp > 0) {
+ getboottime(&boottime);
+ cntr->timestamp += boottime.tv_sec;
+ }
+}
+
+static void
+export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr)
+{
+ struct timeval boottime;
+
+ if (krule->cntr != NULL) {
+ cntr->pcnt = counter_u64_fetch(krule->cntr);
+ cntr->bcnt = counter_u64_fetch(krule->cntr + 1);
+ cntr->timestamp = krule->timestamp;
+ }
+ if (cntr->timestamp > 0) {
+ getboottime(&boottime);
+ cntr->timestamp += boottime.tv_sec;
+ }
+}
+
+/*
+ * Copies rule @urule from v1 userland format (current).
+ * to kernel @krule.
+ * Assume @krule is zeroed.
+ */
+static void
+import_rule1(struct rule_check_info *ci)
+{
+ struct ip_fw_rule *urule;
+ struct ip_fw *krule;
+
+ urule = (struct ip_fw_rule *)ci->urule;
+ krule = (struct ip_fw *)ci->krule;
+
+ /* copy header */
+ krule->act_ofs = urule->act_ofs;
+ krule->cmd_len = urule->cmd_len;
+ krule->rulenum = urule->rulenum;
+ krule->set = urule->set;
+ krule->flags = urule->flags;
+
+ /* Save rulenum offset */
+ ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum);
+
+ /* Copy opcodes */
+ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
+}
+
+/*
+ * Export rule into v1 format (Current).
+ * Layout:
+ * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT)
+ * [ ip_fw_rule ] OR
+ * [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs).
+ * ]
+ * Assume @data is zeroed.
+ */
+static void
+export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs)
+{
+ struct ip_fw_bcounter *cntr;
+ struct ip_fw_rule *urule;
+ ipfw_obj_tlv *tlv;
+
+ /* Fill in TLV header */
+ tlv = (ipfw_obj_tlv *)data;
+ tlv->type = IPFW_TLV_RULE_ENT;
+ tlv->length = len;
+
+ if (rcntrs != 0) {
+ /* Copy counters */
+ cntr = (struct ip_fw_bcounter *)(tlv + 1);
+ urule = (struct ip_fw_rule *)(cntr + 1);
+ export_cntr1_base(krule, cntr);
+ } else
+ urule = (struct ip_fw_rule *)(tlv + 1);
+
+ /* copy header */
+ urule->act_ofs = krule->act_ofs;
+ urule->cmd_len = krule->cmd_len;
+ urule->rulenum = krule->rulenum;
+ urule->set = krule->set;
+ urule->flags = krule->flags;
+ urule->id = krule->id;
+
+ /* Copy opcodes */
+ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t));
+}
+
+
+/*
+ * Copies rule @urule from FreeBSD8 userland format (v0)
+ * to kernel @krule.
+ * Assume @krule is zeroed.
+ */
+static void
+import_rule0(struct rule_check_info *ci)
+{
+ struct ip_fw_rule0 *urule;
+ struct ip_fw *krule;
+ int cmdlen, l;
+ ipfw_insn *cmd;
+ ipfw_insn_limit *lcmd;
+ ipfw_insn_if *cmdif;
+
+ urule = (struct ip_fw_rule0 *)ci->urule;
+ krule = (struct ip_fw *)ci->krule;
+
+ /* copy header */
+ krule->act_ofs = urule->act_ofs;
+ krule->cmd_len = urule->cmd_len;
+ krule->rulenum = urule->rulenum;
+ krule->set = urule->set;
+ if ((urule->_pad & 1) != 0)
+ krule->flags |= IPFW_RULE_NOOPT;
+
+ /* Save rulenum offset */
+ ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum);
+
+ /* Copy opcodes */
+ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
+
+ /*
+ * Alter opcodes:
+ * 1) convert tablearg value from 65535 to 0
+ * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room
+ * for targ).
+ * 3) convert table number in iface opcodes to u16
+ * 4) convert old `nat global` into new 65535
+ */
+ l = krule->cmd_len;
+ cmd = krule->cmd;
+ cmdlen = 0;
+
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ switch (cmd->opcode) {
+ /* Opcodes supporting tablearg */
+ case O_TAG:
+ case O_TAGGED:
+ case O_PIPE:
+ case O_QUEUE:
+ case O_DIVERT:
+ case O_TEE:
+ case O_SKIPTO:
+ case O_CALLRETURN:
+ case O_NETGRAPH:
+ case O_NGTEE:
+ case O_NAT:
+ if (cmd->arg1 == IP_FW_TABLEARG)
+ cmd->arg1 = IP_FW_TARG;
+ else if (cmd->arg1 == 0)
+ cmd->arg1 = IP_FW_NAT44_GLOBAL;
+ break;
+ case O_SETFIB:
+ case O_SETDSCP:
+ if (cmd->arg1 == IP_FW_TABLEARG)
+ cmd->arg1 = IP_FW_TARG;
+ else
+ cmd->arg1 |= 0x8000;
+ break;
+ case O_LIMIT:
+ lcmd = (ipfw_insn_limit *)cmd;
+ if (lcmd->conn_limit == IP_FW_TABLEARG)
+ lcmd->conn_limit = IP_FW_TARG;
+ break;
+ /* Interface tables */
+ case O_XMIT:
+ case O_RECV:
+ case O_VIA:
+ /* Interface table, possibly */
+ cmdif = (ipfw_insn_if *)cmd;
+ if (cmdif->name[0] != '\1')
+ break;
+
+ cmdif->p.kidx = (uint16_t)cmdif->p.glob;
+ break;
+ }
+ }
+}
+
+/*
+ * Copies rule @krule from kernel to FreeBSD8 userland format (v0)
+ */
+static void
+export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len)
+{
+ int cmdlen, l;
+ ipfw_insn *cmd;
+ ipfw_insn_limit *lcmd;
+ ipfw_insn_if *cmdif;
+
+ /* copy header */
+ memset(urule, 0, len);
+ urule->act_ofs = krule->act_ofs;
+ urule->cmd_len = krule->cmd_len;
+ urule->rulenum = krule->rulenum;
+ urule->set = krule->set;
+ if ((krule->flags & IPFW_RULE_NOOPT) != 0)
+ urule->_pad |= 1;
+
+ /* Copy opcodes */
+ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t));
+
+ /* Export counters */
+ export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt);
+
+ /*
+ * Alter opcodes:
+ * 1) convert tablearg value from 0 to 65535
+ * 2) Remove highest bit from O_SETFIB/O_SETDSCP values.
+ * 3) convert table number in iface opcodes to int
+ */
+ l = urule->cmd_len;
+ cmd = urule->cmd;
+ cmdlen = 0;
+
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ switch (cmd->opcode) {
+ /* Opcodes supporting tablearg */
+ case O_TAG:
+ case O_TAGGED:
+ case O_PIPE:
+ case O_QUEUE:
+ case O_DIVERT:
+ case O_TEE:
+ case O_SKIPTO:
+ case O_CALLRETURN:
+ case O_NETGRAPH:
+ case O_NGTEE:
+ case O_NAT:
+ if (cmd->arg1 == IP_FW_TARG)
+ cmd->arg1 = IP_FW_TABLEARG;
+ else if (cmd->arg1 == IP_FW_NAT44_GLOBAL)
+ cmd->arg1 = 0;
+ break;
+ case O_SETFIB:
+ case O_SETDSCP:
+ if (cmd->arg1 == IP_FW_TARG)
+ cmd->arg1 = IP_FW_TABLEARG;
+ else
+ cmd->arg1 &= ~0x8000;
+ break;
+ case O_LIMIT:
+ lcmd = (ipfw_insn_limit *)cmd;
+ if (lcmd->conn_limit == IP_FW_TARG)
+ lcmd->conn_limit = IP_FW_TABLEARG;
+ break;
+ /* Interface tables */
+ case O_XMIT:
+ case O_RECV:
+ case O_VIA:
+ /* Interface table, possibly */
+ cmdif = (ipfw_insn_if *)cmd;
+ if (cmdif->name[0] != '\1')
+ break;
+
+ cmdif->p.glob = cmdif->p.kidx;
+ break;
+ }
+ }
+}
+
/*
- * Add a new rule to the list. Copy the rule into a malloc'ed area, then
- * possibly create a rule number and add the rule to the list.
+ * Add new rule(s) to the list possibly creating rule number for each.
* Update the rule_number in the input struct so the caller knows it as well.
- * XXX DO NOT USE FOR THE DEFAULT RULE.
* Must be called without IPFW_UH held
*/
-int
-ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
+static int
+commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count)
{
- struct ip_fw *rule;
- int i, l, insert_before;
+ int error, i, insert_before, tcount;
+ uint16_t rulenum, *pnum;
+ struct rule_check_info *ci;
+ struct ip_fw *krule;
struct ip_fw **map; /* the new array of pointers */
- if (chain->map == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE - 1)
- return (EINVAL);
+ /* Check if we need to do table/obj index remap */
+ tcount = 0;
+ for (ci = rci, i = 0; i < count; ci++, i++) {
+ if (ci->object_opcodes == 0)
+ continue;
+
+ /*
+ * Rule has some object opcodes.
+ * We need to find (and create non-existing)
+ * kernel objects, and reference existing ones.
+ */
+ error = rewrite_rule_uidx(chain, ci);
+ if (error != 0) {
+
+ /*
+ * rewrite failed, state for current rule
+ * has been reverted. Check if we need to
+ * revert more.
+ */
+ if (tcount > 0) {
+
+ /*
+ * We have some more table rules
+ * we need to rollback.
+ */
+
+ IPFW_UH_WLOCK(chain);
+ while (ci != rci) {
+ ci--;
+ if (ci->object_opcodes == 0)
+ continue;
+ unref_rule_objects(chain,ci->krule);
+
+ }
+ IPFW_UH_WUNLOCK(chain);
+
+ }
+
+ return (error);
+ }
+
+ tcount++;
+ }
- l = RULESIZE(input_rule);
- rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
- if (rule == NULL)
- return (ENOSPC);
/* get_map returns with IPFW_UH_WLOCK if successful */
- map = get_map(chain, 1, 0 /* not locked */);
+ map = get_map(chain, count, 0 /* not locked */);
if (map == NULL) {
- free(rule, M_IPFW);
- return ENOSPC;
- }
+ if (tcount > 0) {
+ /* Unbind tables */
+ IPFW_UH_WLOCK(chain);
+ for (ci = rci, i = 0; i < count; ci++, i++) {
+ if (ci->object_opcodes == 0)
+ continue;
+
+ unref_rule_objects(chain, ci->krule);
+ }
+ IPFW_UH_WUNLOCK(chain);
+ }
- bcopy(input_rule, rule, l);
- /* clear fields not settable from userland */
- rule->x_next = NULL;
- rule->next_rule = NULL;
- IPFW_ZERO_RULE_COUNTER(rule);
+ return (ENOSPC);
+ }
if (V_autoinc_step < 1)
V_autoinc_step = 1;
else if (V_autoinc_step > 1000)
V_autoinc_step = 1000;
+
+ /* FIXME: Handle count > 1 */
+ ci = rci;
+ krule = ci->krule;
+ rulenum = krule->rulenum;
+
/* find the insertion point, we will insert before */
- insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+ insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE;
i = ipfw_find_rule(chain, insert_before, 0);
/* duplicate first part */
if (i > 0)
bcopy(chain->map, map, i * sizeof(struct ip_fw *));
- map[i] = rule;
+ map[i] = krule;
/* duplicate remaining part, we always have the default rule */
bcopy(chain->map + i, map + i + 1,
sizeof(struct ip_fw *) *(chain->n_rules - i));
- if (rule->rulenum == 0) {
- /* write back the number */
- rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
- if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
- rule->rulenum += V_autoinc_step;
- input_rule->rulenum = rule->rulenum;
+ if (rulenum == 0) {
+ /* Compute rule number and write it back */
+ rulenum = i > 0 ? map[i-1]->rulenum : 0;
+ if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
+ rulenum += V_autoinc_step;
+ krule->rulenum = rulenum;
+ /* Save number to userland rule */
+ pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff);
+ *pnum = rulenum;
}
- rule->id = chain->id + 1;
+ krule->id = chain->id + 1;
+ update_skipto_cache(chain, map);
map = swap_map(chain, map, chain->n_rules + 1);
- chain->static_len += l;
+ chain->static_len += RULEUSIZE0(krule);
IPFW_UH_WUNLOCK(chain);
if (map)
free(map, M_IPFW);
@@ -213,6 +792,23 @@ ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
}
/*
+ * Adds @rule to the list of rules to reap
+ */
+void
+ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
+ struct ip_fw *rule)
+{
+
+ IPFW_UH_WLOCK_ASSERT(chain);
+
+ /* Unlink rule from everywhere */
+ unref_rule_objects(chain, rule);
+
+ *((struct ip_fw **)rule) = *head;
+ *head = rule;
+}
+
+/*
* Reclaim storage associated with a list of rules. This is
* typically the list created using remove_rule.
* A NULL pointer on input is handled correctly.
@@ -223,22 +819,12 @@ ipfw_reap_rules(struct ip_fw *head)
struct ip_fw *rule;
while ((rule = head) != NULL) {
- head = head->x_next;
- free(rule, M_IPFW);
+ head = *((struct ip_fw **)head);
+ free_rule(rule);
}
}
/*
- * Used by del_entry() to check if a rule should be kept.
- * Returns 1 if the rule must be kept, 0 otherwise.
- *
- * Called with cmd = {0,1,5}.
- * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ;
- * cmd == 1 matches on set numbers only, rule numbers are ignored;
- * cmd == 5 matches on rule and set numbers.
- *
- * n == 0 is a wildcard for rule numbers, there is no wildcard for sets.
- *
* Rules to keep are
* (default || reserved || !match_set || !match_number)
* where
@@ -255,14 +841,608 @@ ipfw_reap_rules(struct ip_fw *head)
* // number is ignored for cmd == 1 or n == 0
*
*/
+int
+ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt)
+{
+
+ /* Don't match default rule for modification queries */
+ if (rule->rulenum == IPFW_DEFAULT_RULE &&
+ (rt->flags & IPFW_RCFLAG_DEFAULT) == 0)
+ return (0);
+
+ /* Don't match rules in reserved set for flush requests */
+ if ((rt->flags & IPFW_RCFLAG_ALL) != 0 && rule->set == RESVD_SET)
+ return (0);
+
+ /* If we're filtering by set, don't match other sets */
+ if ((rt->flags & IPFW_RCFLAG_SET) != 0 && rule->set != rt->set)
+ return (0);
+
+ if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 &&
+ (rule->rulenum < rt->start_rule || rule->rulenum > rt->end_rule))
+ return (0);
+
+ return (1);
+}
+
+struct manage_sets_args {
+ uint16_t set;
+ uint8_t new_set;
+};
+
+static int
+swap_sets_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct manage_sets_args *args;
+
+ args = (struct manage_sets_args *)arg;
+ if (no->set == (uint8_t)args->set)
+ no->set = args->new_set;
+ else if (no->set == args->new_set)
+ no->set = (uint8_t)args->set;
+ return (0);
+}
+
+static int
+move_sets_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct manage_sets_args *args;
+
+ args = (struct manage_sets_args *)arg;
+ if (no->set == (uint8_t)args->set)
+ no->set = args->new_set;
+ return (0);
+}
+
+static int
+test_sets_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct manage_sets_args *args;
+
+ args = (struct manage_sets_args *)arg;
+ if (no->set != (uint8_t)args->set)
+ return (0);
+ if (ipfw_objhash_lookup_name_type(ni, args->new_set,
+ no->etlv, no->name) != NULL)
+ return (EEXIST);
+ return (0);
+}
+
+/*
+ * Generic function to handler moving and swapping sets.
+ */
+int
+ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type,
+ uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd)
+{
+ struct manage_sets_args args;
+ struct named_object *no;
+
+ args.set = set;
+ args.new_set = new_set;
+ switch (cmd) {
+ case SWAP_ALL:
+ return (ipfw_objhash_foreach_type(ni, swap_sets_cb,
+ &args, type));
+ case TEST_ALL:
+ return (ipfw_objhash_foreach_type(ni, test_sets_cb,
+ &args, type));
+ case MOVE_ALL:
+ return (ipfw_objhash_foreach_type(ni, move_sets_cb,
+ &args, type));
+ case COUNT_ONE:
+ /*
+ * @set used to pass kidx.
+ * When @new_set is zero - reset object counter,
+ * otherwise increment it.
+ */
+ no = ipfw_objhash_lookup_kidx(ni, set);
+ if (new_set != 0)
+ no->ocnt++;
+ else
+ no->ocnt = 0;
+ return (0);
+ case TEST_ONE:
+ /* @set used to pass kidx */
+ no = ipfw_objhash_lookup_kidx(ni, set);
+ /*
+ * First check number of references:
+ * when it differs, this mean other rules are holding
+ * reference to given object, so it is not possible to
+ * change its set. Note that refcnt may account references
+ * to some going-to-be-added rules. Since we don't know
+ * their numbers (and even if they will be added) it is
+ * perfectly OK to return error here.
+ */
+ if (no->ocnt != no->refcnt)
+ return (EBUSY);
+ if (ipfw_objhash_lookup_name_type(ni, new_set, type,
+ no->name) != NULL)
+ return (EEXIST);
+ return (0);
+ case MOVE_ONE:
+ /* @set used to pass kidx */
+ no = ipfw_objhash_lookup_kidx(ni, set);
+ no->set = new_set;
+ return (0);
+ }
+ return (EINVAL);
+}
+
+/*
+ * Delete rules matching range @rt.
+ * Saves number of deleted rules in @ndel.
+ *
+ * Returns 0 on success.
+ */
+static int
+delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel)
+{
+ struct ip_fw *reap, *rule, **map;
+ int end, start;
+ int i, n, ndyn, ofs;
+
+ reap = NULL;
+ IPFW_UH_WLOCK(chain); /* arbitrate writers */
+
+ /*
+ * Stage 1: Determine range to inspect.
+ * Range is half-inclusive, e.g [start, end).
+ */
+ start = 0;
+ end = chain->n_rules - 1;
+
+ if ((rt->flags & IPFW_RCFLAG_RANGE) != 0) {
+ start = ipfw_find_rule(chain, rt->start_rule, 0);
+
+ end = ipfw_find_rule(chain, rt->end_rule, 0);
+ if (rt->end_rule != IPFW_DEFAULT_RULE)
+ while (chain->map[end]->rulenum == rt->end_rule)
+ end++;
+ }
+
+ /* Allocate new map of the same size */
+ map = get_map(chain, 0, 1 /* locked */);
+ if (map == NULL) {
+ IPFW_UH_WUNLOCK(chain);
+ return (ENOMEM);
+ }
+
+ n = 0;
+ ndyn = 0;
+ ofs = start;
+ /* 1. bcopy the initial part of the map */
+ if (start > 0)
+ bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+ /* 2. copy active rules between start and end */
+ for (i = start; i < end; i++) {
+ rule = chain->map[i];
+ if (ipfw_match_range(rule, rt) == 0) {
+ map[ofs++] = rule;
+ continue;
+ }
+
+ n++;
+ if (ipfw_is_dyn_rule(rule) != 0)
+ ndyn++;
+ }
+ /* 3. copy the final part of the map */
+ bcopy(chain->map + end, map + ofs,
+ (chain->n_rules - end) * sizeof(struct ip_fw *));
+ /* 4. recalculate skipto cache */
+ update_skipto_cache(chain, map);
+ /* 5. swap the maps (under UH_WLOCK + WHLOCK) */
+ map = swap_map(chain, map, chain->n_rules - n);
+ /* 6. Remove all dynamic states originated by deleted rules */
+ if (ndyn > 0)
+ ipfw_expire_dyn_rules(chain, rt);
+ /* 7. now remove the rules deleted from the old map */
+ for (i = start; i < end; i++) {
+ rule = map[i];
+ if (ipfw_match_range(rule, rt) == 0)
+ continue;
+ chain->static_len -= RULEUSIZE0(rule);
+ ipfw_reap_add(chain, &reap, rule);
+ }
+ IPFW_UH_WUNLOCK(chain);
+
+ ipfw_reap_rules(reap);
+ if (map != NULL)
+ free(map, M_IPFW);
+ *ndel = n;
+ return (0);
+}
+
+static int
+move_objects(struct ip_fw_chain *ch, ipfw_range_tlv *rt)
+{
+ struct opcode_obj_rewrite *rw;
+ struct ip_fw *rule;
+ ipfw_insn *cmd;
+ int cmdlen, i, l, c;
+ uint16_t kidx;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ /* Stage 1: count number of references by given rules */
+ for (c = 0, i = 0; i < ch->n_rules - 1; i++) {
+ rule = ch->map[i];
+ if (ipfw_match_range(rule, rt) == 0)
+ continue;
+ if (rule->set == rt->new_set) /* nothing to do */
+ continue;
+ /* Search opcodes with named objects */
+ for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
+ l > 0; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+ rw = find_op_rw(cmd, &kidx, NULL);
+ if (rw == NULL || rw->manage_sets == NULL)
+ continue;
+ /*
+ * When manage_sets() returns non-zero value to
+ * COUNT_ONE command, consider this as an object
+ * doesn't support sets (e.g. disabled with sysctl).
+ * So, skip checks for this object.
+ */
+ if (rw->manage_sets(ch, kidx, 1, COUNT_ONE) != 0)
+ continue;
+ c++;
+ }
+ }
+ if (c == 0) /* No objects found */
+ return (0);
+ /* Stage 2: verify "ownership" */
+ for (c = 0, i = 0; (i < ch->n_rules - 1) && c == 0; i++) {
+ rule = ch->map[i];
+ if (ipfw_match_range(rule, rt) == 0)
+ continue;
+ if (rule->set == rt->new_set) /* nothing to do */
+ continue;
+ /* Search opcodes with named objects */
+ for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
+ l > 0 && c == 0; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+ rw = find_op_rw(cmd, &kidx, NULL);
+ if (rw == NULL || rw->manage_sets == NULL)
+ continue;
+ /* Test for ownership and conflicting names */
+ c = rw->manage_sets(ch, kidx,
+ (uint8_t)rt->new_set, TEST_ONE);
+ }
+ }
+ /* Stage 3: change set and cleanup */
+ for (i = 0; i < ch->n_rules - 1; i++) {
+ rule = ch->map[i];
+ if (ipfw_match_range(rule, rt) == 0)
+ continue;
+ if (rule->set == rt->new_set) /* nothing to do */
+ continue;
+ /* Search opcodes with named objects */
+ for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd;
+ l > 0; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+ rw = find_op_rw(cmd, &kidx, NULL);
+ if (rw == NULL || rw->manage_sets == NULL)
+ continue;
+ /* cleanup object counter */
+ rw->manage_sets(ch, kidx,
+ 0 /* reset counter */, COUNT_ONE);
+ if (c != 0)
+ continue;
+ /* change set */
+ rw->manage_sets(ch, kidx,
+ (uint8_t)rt->new_set, MOVE_ONE);
+ }
+ }
+ return (c);
+}/*
+ * Changes set of given rule rannge @rt
+ * with each other.
+ *
+ * Returns 0 on success.
+ */
+static int
+move_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
+{
+ struct ip_fw *rule;
+ int i;
+
+ IPFW_UH_WLOCK(chain);
+
+ /*
+ * Move rules with matching paramenerts to a new set.
+ * This one is much more complex. We have to ensure
+ * that all referenced tables (if any) are referenced
+ * by given rule subset only. Otherwise, we can't move
+ * them to new set and have to return error.
+ */
+ if ((i = move_objects(chain, rt)) != 0) {
+ IPFW_UH_WUNLOCK(chain);
+ return (i);
+ }
+
+ /* XXX: We have to do swap holding WLOCK */
+ for (i = 0; i < chain->n_rules; i++) {
+ rule = chain->map[i];
+ if (ipfw_match_range(rule, rt) == 0)
+ continue;
+ rule->set = rt->new_set;
+ }
+
+ IPFW_UH_WUNLOCK(chain);
+
+ return (0);
+}
+
+/*
+ * Clear counters for a specific rule.
+ * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
+ * so we only care that rules do not disappear.
+ */
+static void
+clear_counters(struct ip_fw *rule, int log_only)
+{
+ ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
+
+ if (log_only == 0)
+ IPFW_ZERO_RULE_COUNTER(rule);
+ if (l->o.opcode == O_LOG)
+ l->log_left = l->max_log;
+}
+
+/*
+ * Flushes rules counters and/or log values on matching range.
+ *
+ * Returns number of items cleared.
+ */
+static int
+clear_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int log_only)
+{
+ struct ip_fw *rule;
+ int num;
+ int i;
+
+ num = 0;
+ rt->flags |= IPFW_RCFLAG_DEFAULT;
+
+ IPFW_UH_WLOCK(chain); /* arbitrate writers */
+ for (i = 0; i < chain->n_rules; i++) {
+ rule = chain->map[i];
+ if (ipfw_match_range(rule, rt) == 0)
+ continue;
+ clear_counters(rule, log_only);
+ num++;
+ }
+ IPFW_UH_WUNLOCK(chain);
+
+ return (num);
+}
+
+static int
+check_range_tlv(ipfw_range_tlv *rt)
+{
+
+ if (rt->head.length != sizeof(*rt))
+ return (1);
+ if (rt->start_rule > rt->end_rule)
+ return (1);
+ if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS)
+ return (1);
+
+ if ((rt->flags & IPFW_RCFLAG_USER) != rt->flags)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Delete rules matching specified parameters
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ * Reply: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Saves number of deleted rules in ipfw_range_tlv->new_set.
+ *
+ * Returns 0 on success.
+ */
+static int
+del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_range_header *rh;
+ int error, ndel;
+
+ if (sd->valsize != sizeof(*rh))
+ return (EINVAL);
+
+ rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+ if (check_range_tlv(&rh->range) != 0)
+ return (EINVAL);
+
+ ndel = 0;
+ if ((error = delete_range(chain, &rh->range, &ndel)) != 0)
+ return (error);
+
+ /* Save number of rules deleted */
+ rh->range.new_set = ndel;
+ return (0);
+}
+
+/*
+ * Move rules/sets matching specified parameters
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Returns 0 on success.
+ */
+static int
+move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_range_header *rh;
+
+ if (sd->valsize != sizeof(*rh))
+ return (EINVAL);
+
+ rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+ if (check_range_tlv(&rh->range) != 0)
+ return (EINVAL);
+
+ return (move_range(chain, &rh->range));
+}
+
+/*
+ * Clear rule accounting data matching specified parameters
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ * Reply: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Saves number of cleared rules in ipfw_range_tlv->new_set.
+ *
+ * Returns 0 on success.
+ */
static int
-keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
+clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- return
- (rule->rulenum == IPFW_DEFAULT_RULE) ||
- (cmd == 0 && n == 0 && rule->set == RESVD_SET) ||
- !(cmd == 0 || rule->set == set) ||
- !(cmd == 1 || n == 0 || n == rule->rulenum);
+ ipfw_range_header *rh;
+ int log_only, num;
+ char *msg;
+
+ if (sd->valsize != sizeof(*rh))
+ return (EINVAL);
+
+ rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+ if (check_range_tlv(&rh->range) != 0)
+ return (EINVAL);
+
+ log_only = (op3->opcode == IP_FW_XRESETLOG);
+
+ num = clear_range(chain, &rh->range, log_only);
+
+ if (rh->range.flags & IPFW_RCFLAG_ALL)
+ msg = log_only ? "All logging counts reset" :
+ "Accounting cleared";
+ else
+ msg = log_only ? "logging count reset" : "cleared";
+
+ if (V_fw_verbose) {
+ int lev = LOG_SECURITY | LOG_NOTICE;
+ log(lev, "ipfw: %s.\n", msg);
+ }
+
+ /* Save number of rules cleared */
+ rh->range.new_set = num;
+ return (0);
+}
+
+static void
+enable_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
+{
+ uint32_t v_set;
+
+ IPFW_UH_WLOCK_ASSERT(chain);
+
+ /* Change enabled/disabled sets mask */
+ v_set = (V_set_disable | rt->set) & ~rt->new_set;
+ v_set &= ~(1 << RESVD_SET); /* set RESVD_SET always enabled */
+ IPFW_WLOCK(chain);
+ V_set_disable = v_set;
+ IPFW_WUNLOCK(chain);
+}
+
+static int
+swap_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int mv)
+{
+ struct opcode_obj_rewrite *rw;
+ struct ip_fw *rule;
+ int i;
+
+ IPFW_UH_WLOCK_ASSERT(chain);
+
+ if (rt->set == rt->new_set) /* nothing to do */
+ return (0);
+
+ if (mv != 0) {
+ /*
+ * Berfore moving the rules we need to check that
+ * there aren't any conflicting named objects.
+ */
+ for (rw = ctl3_rewriters;
+ rw < ctl3_rewriters + ctl3_rsize; rw++) {
+ if (rw->manage_sets == NULL)
+ continue;
+ i = rw->manage_sets(chain, (uint8_t)rt->set,
+ (uint8_t)rt->new_set, TEST_ALL);
+ if (i != 0)
+ return (EEXIST);
+ }
+ }
+ /* Swap or move two sets */
+ for (i = 0; i < chain->n_rules - 1; i++) {
+ rule = chain->map[i];
+ if (rule->set == (uint8_t)rt->set)
+ rule->set = (uint8_t)rt->new_set;
+ else if (rule->set == (uint8_t)rt->new_set && mv == 0)
+ rule->set = (uint8_t)rt->set;
+ }
+ for (rw = ctl3_rewriters; rw < ctl3_rewriters + ctl3_rsize; rw++) {
+ if (rw->manage_sets == NULL)
+ continue;
+ rw->manage_sets(chain, (uint8_t)rt->set,
+ (uint8_t)rt->new_set, mv != 0 ? MOVE_ALL: SWAP_ALL);
+ }
+ return (0);
+}
+
+/*
+ * Swaps or moves set
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_range_tlv ]
+ *
+ * Returns 0 on success.
+ */
+static int
+manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_range_header *rh;
+ int ret;
+
+ if (sd->valsize != sizeof(*rh))
+ return (EINVAL);
+
+ rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize);
+
+ if (rh->range.head.length != sizeof(ipfw_range_tlv))
+ return (1);
+ /* enable_sets() expects bitmasks. */
+ if (op3->opcode != IP_FW_SET_ENABLE &&
+ (rh->range.set >= IPFW_MAX_SETS ||
+ rh->range.new_set >= IPFW_MAX_SETS))
+ return (EINVAL);
+
+ ret = 0;
+ IPFW_UH_WLOCK(chain);
+ switch (op3->opcode) {
+ case IP_FW_SET_SWAP:
+ case IP_FW_SET_MOVE:
+ ret = swap_sets(chain, &rh->range,
+ op3->opcode == IP_FW_SET_MOVE);
+ break;
+ case IP_FW_SET_ENABLE:
+ enable_sets(chain, &rh->range);
+ break;
+ }
+ IPFW_UH_WUNLOCK(chain);
+
+ return (ret);
}
/**
@@ -282,12 +1462,11 @@ keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
static int
del_entry(struct ip_fw_chain *chain, uint32_t arg)
{
- struct ip_fw *rule;
uint32_t num; /* rule number or old_set */
uint8_t cmd, new_set;
- int start, end, i, ofs, n;
- struct ip_fw **map = NULL;
+ int do_del, ndel;
int error = 0;
+ ipfw_range_tlv rt;
num = arg & 0xffff;
cmd = (arg >> 24) & 0xff;
@@ -303,149 +1482,60 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
return EINVAL;
}
- IPFW_UH_WLOCK(chain); /* arbitrate writers */
- chain->reap = NULL; /* prepare for deletions */
+ /* Convert old requests into new representation */
+ memset(&rt, 0, sizeof(rt));
+ rt.start_rule = num;
+ rt.end_rule = num;
+ rt.set = num;
+ rt.new_set = new_set;
+ do_del = 0;
switch (cmd) {
- case 0: /* delete rules "num" (num == 0 matches all) */
- case 1: /* delete all rules in set N */
- case 5: /* delete rules with number N and set "new_set". */
-
- /*
- * Locate first rule to delete (start), the rule after
- * the last one to delete (end), and count how many
- * rules to delete (n). Always use keep_rule() to
- * determine which rules to keep.
- */
- n = 0;
- if (cmd == 1) {
- /* look for a specific set including RESVD_SET.
- * Must scan the entire range, ignore num.
- */
- new_set = num;
- for (start = -1, end = i = 0; i < chain->n_rules; i++) {
- if (keep_rule(chain->map[i], cmd, new_set, 0))
- continue;
- if (start < 0)
- start = i;
- end = i;
- n++;
- }
- end++; /* first non-matching */
- } else {
- /* Optimized search on rule numbers */
- start = ipfw_find_rule(chain, num, 0);
- for (end = start; end < chain->n_rules; end++) {
- rule = chain->map[end];
- if (num > 0 && rule->rulenum != num)
- break;
- if (!keep_rule(rule, cmd, new_set, num))
- n++;
- }
- }
-
- if (n == 0) {
- /* A flush request (arg == 0 or cmd == 1) on empty
- * ruleset returns with no error. On the contrary,
- * if there is no match on a specific request,
- * we return EINVAL.
- */
- if (arg != 0 && cmd != 1)
- error = EINVAL;
- break;
- }
-
- /* We have something to delete. Allocate the new map */
- map = get_map(chain, -n, 1 /* locked */);
- if (map == NULL) {
- error = EINVAL;
- break;
- }
-
- /* 1. bcopy the initial part of the map */
- if (start > 0)
- bcopy(chain->map, map, start * sizeof(struct ip_fw *));
- /* 2. copy active rules between start and end */
- for (i = ofs = start; i < end; i++) {
- rule = chain->map[i];
- if (keep_rule(rule, cmd, new_set, num))
- map[ofs++] = rule;
- }
- /* 3. copy the final part of the map */
- bcopy(chain->map + end, map + ofs,
- (chain->n_rules - end) * sizeof(struct ip_fw *));
- /* 4. swap the maps (under BH_LOCK) */
- map = swap_map(chain, map, chain->n_rules - n);
- /* 5. now remove the rules deleted from the old map */
- if (cmd == 1)
- ipfw_expire_dyn_rules(chain, NULL, new_set);
- for (i = start; i < end; i++) {
- rule = map[i];
- if (keep_rule(rule, cmd, new_set, num))
- continue;
- chain->static_len -= RULESIZE(rule);
- if (cmd != 1)
- ipfw_expire_dyn_rules(chain, rule, RESVD_SET);
- rule->x_next = chain->reap;
- chain->reap = rule;
- }
+ case 0: /* delete rules numbered "rulenum" */
+ if (num == 0)
+ rt.flags |= IPFW_RCFLAG_ALL;
+ else
+ rt.flags |= IPFW_RCFLAG_RANGE;
+ do_del = 1;
break;
-
- /*
- * In the next 3 cases the loop stops at (n_rules - 1)
- * because the default rule is never eligible..
- */
-
- case 2: /* move rules with given RULE number to new set */
- for (i = 0; i < chain->n_rules - 1; i++) {
- rule = chain->map[i];
- if (rule->rulenum == num)
- rule->set = new_set;
- }
+ case 1: /* delete rules in set "rulenum" */
+ rt.flags |= IPFW_RCFLAG_SET;
+ do_del = 1;
break;
-
- case 3: /* move rules with given SET number to new set */
- for (i = 0; i < chain->n_rules - 1; i++) {
- rule = chain->map[i];
- if (rule->set == num)
- rule->set = new_set;
- }
+ case 5: /* delete rules "rulenum" and set "new_set" */
+ rt.flags |= IPFW_RCFLAG_RANGE | IPFW_RCFLAG_SET;
+ rt.set = new_set;
+ rt.new_set = 0;
+ do_del = 1;
break;
-
- case 4: /* swap two sets */
- for (i = 0; i < chain->n_rules - 1; i++) {
- rule = chain->map[i];
- if (rule->set == num)
- rule->set = new_set;
- else if (rule->set == new_set)
- rule->set = num;
- }
+ case 2: /* move rules "rulenum" to set "new_set" */
+ rt.flags |= IPFW_RCFLAG_RANGE;
break;
+ case 3: /* move rules from set "rulenum" to set "new_set" */
+ IPFW_UH_WLOCK(chain);
+ error = swap_sets(chain, &rt, 1);
+ IPFW_UH_WUNLOCK(chain);
+ return (error);
+ case 4: /* swap sets "rulenum" and "new_set" */
+ IPFW_UH_WLOCK(chain);
+ error = swap_sets(chain, &rt, 0);
+ IPFW_UH_WUNLOCK(chain);
+ return (error);
+ default:
+ return (ENOTSUP);
}
- rule = chain->reap;
- chain->reap = NULL;
- IPFW_UH_WUNLOCK(chain);
- ipfw_reap_rules(rule);
- if (map)
- free(map, M_IPFW);
- return error;
-}
+ if (do_del != 0) {
+ if ((error = delete_range(chain, &rt, &ndel)) != 0)
+ return (error);
-/*
- * Clear counters for a specific rule.
- * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
- * so we only care that rules do not disappear.
- */
-static void
-clear_counters(struct ip_fw *rule, int log_only)
-{
- ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
+ if (ndel == 0 && (cmd != 1 && num != 0))
+ return (EINVAL);
- if (log_only == 0)
- IPFW_ZERO_RULE_COUNTER(rule);
- if (l->o.opcode == O_LOG)
- l->log_left = l->max_log;
+ return (0);
+ }
+
+ return (move_range(chain, &rt));
}
/**
@@ -516,23 +1606,57 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
return (0);
}
+
/*
- * Check validity of the structure before insert.
- * Rules are simple, so this mostly need to check rule sizes.
+ * Check rule head in FreeBSD11 format
+ *
*/
static int
-check_ipfw_struct(struct ip_fw *rule, int size)
+check_ipfw_rule1(struct ip_fw_rule *rule, int size,
+ struct rule_check_info *ci)
{
- int l, cmdlen = 0;
- int have_action=0;
- ipfw_insn *cmd;
+ int l;
+
+ if (size < sizeof(*rule)) {
+ printf("ipfw: rule too short\n");
+ return (EINVAL);
+ }
+
+ /* Check for valid cmd_len */
+ l = roundup2(RULESIZE(rule), sizeof(uint64_t));
+ if (l != size) {
+ printf("ipfw: size mismatch (have %d want %d)\n", size, l);
+ return (EINVAL);
+ }
+ if (rule->act_ofs >= rule->cmd_len) {
+ printf("ipfw: bogus action offset (%u > %u)\n",
+ rule->act_ofs, rule->cmd_len - 1);
+ return (EINVAL);
+ }
+
+ if (rule->rulenum > IPFW_DEFAULT_RULE - 1)
+ return (EINVAL);
+
+ return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci));
+}
+
+/*
+ * Check rule head in FreeBSD8 format
+ *
+ */
+static int
+check_ipfw_rule0(struct ip_fw_rule0 *rule, int size,
+ struct rule_check_info *ci)
+{
+ int l;
if (size < sizeof(*rule)) {
printf("ipfw: rule too short\n");
return (EINVAL);
}
- /* first, check for valid size */
- l = RULESIZE(rule);
+
+ /* Check for valid cmd_len */
+ l = sizeof(*rule) + rule->cmd_len * 4 - 4;
if (l != size) {
printf("ipfw: size mismatch (have %d want %d)\n", size, l);
return (EINVAL);
@@ -542,12 +1666,26 @@ check_ipfw_struct(struct ip_fw *rule, int size)
rule->act_ofs, rule->cmd_len - 1);
return (EINVAL);
}
+
+ if (rule->rulenum > IPFW_DEFAULT_RULE - 1)
+ return (EINVAL);
+
+ return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci));
+}
+
+static int
+check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
+{
+ int cmdlen, l;
+ int have_action;
+
+ have_action = 0;
+
/*
* Now go for the individual checks. Very simple ones, basically only
* instruction sizes.
*/
- for (l = rule->cmd_len, cmd = rule->cmd ;
- l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) {
cmdlen = F_LEN(cmd);
if (cmdlen > l) {
printf("ipfw: opcode %d size truncated\n",
@@ -557,6 +1695,10 @@ check_ipfw_struct(struct ip_fw *rule, int size)
switch (cmd->opcode) {
case O_PROBE_STATE:
case O_KEEP_STATE:
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
+ goto bad_size;
+ ci->object_opcodes++;
+ break;
case O_PROTO:
case O_IP_SRC_ME:
case O_IP_DST_ME:
@@ -588,6 +1730,35 @@ check_ipfw_struct(struct ip_fw *rule, int size)
goto bad_size;
break;
+ case O_EXTERNAL_ACTION:
+ if (cmd->arg1 == 0 ||
+ cmdlen != F_INSN_SIZE(ipfw_insn)) {
+ printf("ipfw: invalid external "
+ "action opcode\n");
+ return (EINVAL);
+ }
+ ci->object_opcodes++;
+ /* Do we have O_EXTERNAL_INSTANCE opcode? */
+ if (l != cmdlen) {
+ l -= cmdlen;
+ cmd += cmdlen;
+ cmdlen = F_LEN(cmd);
+ if (cmd->opcode != O_EXTERNAL_INSTANCE) {
+ printf("ipfw: invalid opcode "
+ "next to external action %u\n",
+ cmd->opcode);
+ return (EINVAL);
+ }
+ if (cmd->arg1 == 0 ||
+ cmdlen != F_INSN_SIZE(ipfw_insn)) {
+ printf("ipfw: invalid external "
+ "action instance opcode\n");
+ return (EINVAL);
+ }
+ ci->object_opcodes++;
+ }
+ goto check_action;
+
case O_FIB:
if (cmdlen != F_INSN_SIZE(ipfw_insn))
goto bad_size;
@@ -601,10 +1772,10 @@ check_ipfw_struct(struct ip_fw *rule, int size)
case O_SETFIB:
if (cmdlen != F_INSN_SIZE(ipfw_insn))
goto bad_size;
- if ((cmd->arg1 != IP_FW_TABLEARG) &&
- (cmd->arg1 >= rt_numfibs)) {
+ if ((cmd->arg1 != IP_FW_TARG) &&
+ ((cmd->arg1 & 0x7FFF) >= rt_numfibs)) {
printf("ipfw: invalid fib number %d\n",
- cmd->arg1);
+ cmd->arg1 & 0x7FFF);
return EINVAL;
}
goto check_action;
@@ -625,6 +1796,7 @@ check_ipfw_struct(struct ip_fw *rule, int size)
case O_LIMIT:
if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
goto bad_size;
+ ci->object_opcodes++;
break;
case O_LOG:
@@ -639,7 +1811,7 @@ check_ipfw_struct(struct ip_fw *rule, int size)
case O_IP_SRC_MASK:
case O_IP_DST_MASK:
/* only odd command lengths */
- if ( !(cmdlen & 1) || cmdlen > 31)
+ if ((cmdlen & 1) == 0)
goto bad_size;
break;
@@ -666,6 +1838,18 @@ check_ipfw_struct(struct ip_fw *rule, int size)
cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
cmdlen != F_INSN_SIZE(ipfw_insn_u32))
goto bad_size;
+ ci->object_opcodes++;
+ break;
+ case O_IP_FLOW_LOOKUP:
+ if (cmd->arg1 >= V_fw_tables_max) {
+ printf("ipfw: invalid table number %d\n",
+ cmd->arg1);
+ return (EINVAL);
+ }
+ if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
+ cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+ goto bad_size;
+ ci->object_opcodes++;
break;
case O_MACADDR2:
if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
@@ -700,6 +1884,7 @@ check_ipfw_struct(struct ip_fw *rule, int size)
case O_VIA:
if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
goto bad_size;
+ ci->object_opcodes++;
break;
case O_ALTQ:
@@ -742,8 +1927,10 @@ check_ipfw_struct(struct ip_fw *rule, int size)
if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
goto bad_size;
goto check_action;
- case O_FORWARD_MAC: /* XXX not implemented yet */
case O_CHECK_STATE:
+ ci->object_opcodes++;
+ /* FALLTHROUGH */
+ case O_FORWARD_MAC: /* XXX not implemented yet */
case O_COUNT:
case O_ACCEPT:
case O_DENY:
@@ -763,14 +1950,14 @@ check_action:
printf("ipfw: opcode %d, multiple actions"
" not allowed\n",
cmd->opcode);
- return EINVAL;
+ return (EINVAL);
}
have_action = 1;
if (l != cmdlen) {
printf("ipfw: opcode %d, action must be"
" last opcode\n",
cmd->opcode);
- return EINVAL;
+ return (EINVAL);
}
break;
#ifdef INET6
@@ -813,25 +2000,25 @@ check_action:
case O_IP6_DST_MASK:
case O_ICMP6TYPE:
printf("ipfw: no IPv6 support in kernel\n");
- return EPROTONOSUPPORT;
+ return (EPROTONOSUPPORT);
#endif
default:
printf("ipfw: opcode %d, unknown opcode\n",
cmd->opcode);
- return EINVAL;
+ return (EINVAL);
}
}
}
if (have_action == 0) {
printf("ipfw: missing action\n");
- return EINVAL;
+ return (EINVAL);
}
return 0;
bad_size:
printf("ipfw: opcode %d size %d wrong\n",
cmd->opcode, cmdlen);
- return EINVAL;
+ return (EINVAL);
}
@@ -863,8 +2050,8 @@ struct ip_fw7 {
ipfw_insn cmd[1]; /* storage for commands */
};
- int convert_rule_to_7(struct ip_fw *rule);
-int convert_rule_to_8(struct ip_fw *rule);
+static int convert_rule_to_7(struct ip_fw_rule0 *rule);
+static int convert_rule_to_8(struct ip_fw_rule0 *rule);
#ifndef RULESIZE7
#define RULESIZE7(rule) (sizeof(struct ip_fw7) + \
@@ -882,10 +2069,15 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
{
char *bp = buf;
char *ep = bp + space;
- struct ip_fw *rule, *dst;
- int l, i;
+ struct ip_fw *rule;
+ struct ip_fw_rule0 *dst;
+ struct timeval boottime;
+ int error, i, l, warnflag;
time_t boot_seconds;
+ warnflag = 0;
+
+ getboottime(&boottime);
boot_seconds = boottime.tv_sec;
for (i = 0; i < chain->n_rules; i++) {
rule = chain->map[i];
@@ -894,9 +2086,12 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
/* Convert rule to FreeBSd 7.2 format */
l = RULESIZE7(rule);
if (bp + l + sizeof(uint32_t) <= ep) {
- int error;
bcopy(rule, bp, l + sizeof(uint32_t));
- error = convert_rule_to_7((struct ip_fw *) bp);
+ error = set_legacy_obj_kidx(chain,
+ (struct ip_fw_rule0 *)bp);
+ if (error != 0)
+ return (0);
+ error = convert_rule_to_7((struct ip_fw_rule0 *) bp);
if (error)
return 0; /*XXX correct? */
/*
@@ -914,76 +2109,1631 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
continue; /* go to next rule */
}
- /* normal mode, don't touch rules */
- l = RULESIZE(rule);
+ l = RULEUSIZE0(rule);
if (bp + l > ep) { /* should not happen */
printf("overflow dumping static rules\n");
break;
}
- dst = (struct ip_fw *)bp;
- bcopy(rule, dst, l);
+ dst = (struct ip_fw_rule0 *)bp;
+ export_rule0(rule, dst, l);
+ error = set_legacy_obj_kidx(chain, dst);
+
/*
* XXX HACK. Store the disable mask in the "next"
* pointer in a wild attempt to keep the ABI the same.
* Why do we do this on EVERY rule?
+ *
+ * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask
+ * so we need to fail _after_ saving at least one mask.
*/
bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
if (dst->timestamp)
dst->timestamp += boot_seconds;
bp += l;
+
+ if (error != 0) {
+ if (error == 2) {
+ /* Non-fatal table rewrite error. */
+ warnflag = 1;
+ continue;
+ }
+ printf("Stop on rule %d. Fail to convert table\n",
+ rule->rulenum);
+ break;
+ }
}
+ if (warnflag != 0)
+ printf("ipfw: process %s is using legacy interfaces,"
+ " consider rebuilding\n", "");
ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */
return (bp - (char *)buf);
}
-#define IP_FW3_OPLENGTH(x) ((x)->sopt_valsize - sizeof(ip_fw3_opheader))
-/**
- * {set|get}sockopt parser.
+struct dump_args {
+ uint32_t b; /* start rule */
+ uint32_t e; /* end rule */
+ uint32_t rcount; /* number of rules */
+ uint32_t rsize; /* rules size */
+ uint32_t tcount; /* number of tables */
+ int rcounters; /* counters */
+};
+
+void
+ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv)
+{
+
+ ntlv->head.type = no->etlv;
+ ntlv->head.length = sizeof(*ntlv);
+ ntlv->idx = no->kidx;
+ strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
+}
+
+/*
+ * Export named object info in instance @ni, identified by @kidx
+ * to ipfw_obj_ntlv. TLV is allocated from @sd space.
+ *
+ * Returns 0 on success.
+ */
+static int
+export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
+ struct sockopt_data *sd)
+{
+ struct named_object *no;
+ ipfw_obj_ntlv *ntlv;
+
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(no != NULL, ("invalid object kernel index passed"));
+
+ ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
+ if (ntlv == NULL)
+ return (ENOMEM);
+
+ ipfw_export_obj_ntlv(no, ntlv);
+ return (0);
+}
+
+/*
+ * Dumps static rules with table TLVs in buffer @sd.
+ *
+ * Returns 0 on success.
+ */
+static int
+dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da,
+ uint32_t *bmask, struct sockopt_data *sd)
+{
+ int error;
+ int i, l;
+ uint32_t tcount;
+ ipfw_obj_ctlv *ctlv;
+ struct ip_fw *krule;
+ struct namedobj_instance *ni;
+ caddr_t dst;
+
+ /* Dump table names first (if any) */
+ if (da->tcount > 0) {
+ /* Header first */
+ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
+ if (ctlv == NULL)
+ return (ENOMEM);
+ ctlv->head.type = IPFW_TLV_TBLNAME_LIST;
+ ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) +
+ sizeof(*ctlv);
+ ctlv->count = da->tcount;
+ ctlv->objsize = sizeof(ipfw_obj_ntlv);
+ }
+
+ i = 0;
+ tcount = da->tcount;
+ ni = ipfw_get_table_objhash(chain);
+ while (tcount > 0) {
+ if ((bmask[i / 32] & (1 << (i % 32))) == 0) {
+ i++;
+ continue;
+ }
+
+ /* Jump to shared named object bitmask */
+ if (i >= IPFW_TABLES_MAX) {
+ ni = CHAIN_TO_SRV(chain);
+ i -= IPFW_TABLES_MAX;
+ bmask += IPFW_TABLES_MAX / 32;
+ }
+
+ if ((error = export_objhash_ntlv(ni, i, sd)) != 0)
+ return (error);
+
+ i++;
+ tcount--;
+ }
+
+ /* Dump rules */
+ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
+ if (ctlv == NULL)
+ return (ENOMEM);
+ ctlv->head.type = IPFW_TLV_RULE_LIST;
+ ctlv->head.length = da->rsize + sizeof(*ctlv);
+ ctlv->count = da->rcount;
+
+ for (i = da->b; i < da->e; i++) {
+ krule = chain->map[i];
+
+ l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv);
+ if (da->rcounters != 0)
+ l += sizeof(struct ip_fw_bcounter);
+ dst = (caddr_t)ipfw_get_sopt_space(sd, l);
+ if (dst == NULL)
+ return (ENOMEM);
+
+ export_rule1(krule, dst, l, da->rcounters);
+ }
+
+ return (0);
+}
+
+/*
+ * Marks every object index used in @rule with bit in @bmask.
+ * Used to generate bitmask of referenced tables/objects for given ruleset
+ * or its part.
+ *
+ * Returns number of newly-referenced objects.
+ */
+static int
+mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
+ uint32_t *bmask)
+{
+ struct opcode_obj_rewrite *rw;
+ ipfw_insn *cmd;
+ int bidx, cmdlen, l, count;
+ uint16_t kidx;
+ uint8_t subtype;
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ count = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ rw = find_op_rw(cmd, &kidx, &subtype);
+ if (rw == NULL)
+ continue;
+
+ bidx = kidx / 32;
+ /*
+ * Maintain separate bitmasks for table and
+ * non-table objects.
+ */
+ if (rw->etlv != IPFW_TLV_TBL_NAME)
+ bidx += IPFW_TABLES_MAX / 32;
+
+ if ((bmask[bidx] & (1 << (kidx % 32))) == 0)
+ count++;
+
+ bmask[bidx] |= 1 << (kidx % 32);
+ }
+
+ return (count);
+}
+
+/*
+ * Dumps requested objects data
+ * Data layout (version 0)(current):
+ * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags
+ * size = ipfw_cfg_lheader.size
+ * Reply: [ ipfw_cfg_lheader
+ * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional)
+ * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST)
+ * ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ]
+ * ] (optional)
+ * [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional)
+ * ]
+ * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize.
+ * The rest (size, count) are set to zero and needs to be ignored.
+ *
+ * Returns 0 on success.
*/
+static int
+dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_cfg_lheader *hdr;
+ struct ip_fw *rule;
+ size_t sz, rnum;
+ uint32_t hdr_flags;
+ int error, i;
+ struct dump_args da;
+ uint32_t *bmask;
+
+ hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr));
+ if (hdr == NULL)
+ return (EINVAL);
+
+ error = 0;
+ bmask = NULL;
+ /* Allocate needed state. Note we allocate 2xspace mask, for table&srv */
+ if (hdr->flags & IPFW_CFG_GET_STATIC)
+ bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO);
+
+ IPFW_UH_RLOCK(chain);
+
+ /*
+ * STAGE 1: Determine size/count for objects in range.
+ * Prepare used tables bitmask.
+ */
+ sz = sizeof(ipfw_cfg_lheader);
+ memset(&da, 0, sizeof(da));
+
+ da.b = 0;
+ da.e = chain->n_rules;
+
+ if (hdr->end_rule != 0) {
+ /* Handle custom range */
+ if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE)
+ rnum = IPFW_DEFAULT_RULE;
+ da.b = ipfw_find_rule(chain, rnum, 0);
+ rnum = hdr->end_rule;
+ rnum = (rnum < IPFW_DEFAULT_RULE) ? rnum+1 : IPFW_DEFAULT_RULE;
+ da.e = ipfw_find_rule(chain, rnum, 0) + 1;
+ }
+
+ if (hdr->flags & IPFW_CFG_GET_STATIC) {
+ for (i = da.b; i < da.e; i++) {
+ rule = chain->map[i];
+ da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv);
+ da.rcount++;
+ /* Update bitmask of used objects for given range */
+ da.tcount += mark_object_kidx(chain, rule, bmask);
+ }
+ /* Add counters if requested */
+ if (hdr->flags & IPFW_CFG_GET_COUNTERS) {
+ da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount;
+ da.rcounters = 1;
+ }
+
+ if (da.tcount > 0)
+ sz += da.tcount * sizeof(ipfw_obj_ntlv) +
+ sizeof(ipfw_obj_ctlv);
+ sz += da.rsize + sizeof(ipfw_obj_ctlv);
+ }
+
+ if (hdr->flags & IPFW_CFG_GET_STATES)
+ sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) +
+ sizeof(ipfw_obj_ctlv);
+
+
+ /*
+ * Fill header anyway.
+ * Note we have to save header fields to stable storage
+ * buffer inside @sd can be flushed after dumping rules
+ */
+ hdr->size = sz;
+ hdr->set_mask = ~V_set_disable;
+ hdr_flags = hdr->flags;
+ hdr = NULL;
+
+ if (sd->valsize < sz) {
+ error = ENOMEM;
+ goto cleanup;
+ }
+
+ /* STAGE2: Store actual data */
+ if (hdr_flags & IPFW_CFG_GET_STATIC) {
+ error = dump_static_rules(chain, &da, bmask, sd);
+ if (error != 0)
+ goto cleanup;
+ }
+
+ if (hdr_flags & IPFW_CFG_GET_STATES)
+ error = ipfw_dump_states(chain, sd);
+
+cleanup:
+ IPFW_UH_RUNLOCK(chain);
+
+ if (bmask != NULL)
+ free(bmask, M_TEMP);
+
+ return (error);
+}
+
int
-ipfw_ctl(struct sockopt *sopt)
+ipfw_check_object_name_generic(const char *name)
+{
+ int nsize;
+
+ nsize = sizeof(((ipfw_obj_ntlv *)0)->name);
+ if (strnlen(name, nsize) == nsize)
+ return (EINVAL);
+ if (name[0] == '\0')
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * Creates non-existent objects referenced by rule.
+ *
+ * Return 0 on success.
+ */
+int
+create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
+ struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti)
+{
+ struct opcode_obj_rewrite *rw;
+ struct obj_idx *p;
+ uint16_t kidx;
+ int error;
+
+ /*
+ * Compatibility stuff: do actual creation for non-existing,
+ * but referenced objects.
+ */
+ for (p = oib; p < pidx; p++) {
+ if (p->kidx != 0)
+ continue;
+
+ ti->uidx = p->uidx;
+ ti->type = p->type;
+ ti->atype = 0;
+
+ rw = find_op_rw(cmd + p->off, NULL, NULL);
+ KASSERT(rw != NULL, ("Unable to find handler for op %d",
+ (cmd + p->off)->opcode));
+
+ if (rw->create_object == NULL)
+ error = EOPNOTSUPP;
+ else
+ error = rw->create_object(ch, ti, &kidx);
+ if (error == 0) {
+ p->kidx = kidx;
+ continue;
+ }
+
+ /*
+ * Error happened. We have to rollback everything.
+ * Drop all already acquired references.
+ */
+ IPFW_UH_WLOCK(ch);
+ unref_oib_objects(ch, cmd, oib, pidx);
+ IPFW_UH_WUNLOCK(ch);
+
+ return (error);
+ }
+
+ return (0);
+}
+
+/*
+ * Compatibility function for old ipfw(8) binaries.
+ * Rewrites table/nat kernel indices with userland ones.
+ * Convert tables matching '/^\d+$/' to their atoi() value.
+ * Use number 65535 for other tables.
+ *
+ * Returns 0 on success.
+ */
+static int
+set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule)
+{
+ struct opcode_obj_rewrite *rw;
+ struct named_object *no;
+ ipfw_insn *cmd;
+ char *end;
+ long val;
+ int cmdlen, error, l;
+ uint16_t kidx, uidx;
+ uint8_t subtype;
+
+ error = 0;
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ /* Check if is index in given opcode */
+ rw = find_op_rw(cmd, &kidx, &subtype);
+ if (rw == NULL)
+ continue;
+
+ /* Try to find referenced kernel object */
+ no = rw->find_bykidx(ch, kidx);
+ if (no == NULL)
+ continue;
+
+ val = strtol(no->name, &end, 10);
+ if (*end == '\0' && val < 65535) {
+ uidx = val;
+ } else {
+
+ /*
+ * We are called via legacy opcode.
+ * Save error and show table as fake number
+ * not to make ipfw(8) hang.
+ */
+ uidx = 65535;
+ error = 2;
+ }
+
+ rw->update(cmd, uidx);
+ }
+
+ return (error);
+}
+
+
+/*
+ * Unreferences all already-referenced objects in given @cmd rule,
+ * using information in @oib.
+ *
+ * Used to rollback partially converted rule on error.
+ */
+static void
+unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib,
+ struct obj_idx *end)
+{
+ struct opcode_obj_rewrite *rw;
+ struct named_object *no;
+ struct obj_idx *p;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ for (p = oib; p < end; p++) {
+ if (p->kidx == 0)
+ continue;
+
+ rw = find_op_rw(cmd + p->off, NULL, NULL);
+ KASSERT(rw != NULL, ("Unable to find handler for op %d",
+ (cmd + p->off)->opcode));
+
+ /* Find & unref by existing idx */
+ no = rw->find_bykidx(ch, p->kidx);
+ KASSERT(no != NULL, ("Ref'd object %d disappeared", p->kidx));
+ no->refcnt--;
+ }
+}
+
+/*
+ * Remove references from every object used in @rule.
+ * Used at rule removal code.
+ */
+static void
+unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule)
+{
+ struct opcode_obj_rewrite *rw;
+ struct named_object *no;
+ ipfw_insn *cmd;
+ int cmdlen, l;
+ uint16_t kidx;
+ uint8_t subtype;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ rw = find_op_rw(cmd, &kidx, &subtype);
+ if (rw == NULL)
+ continue;
+ no = rw->find_bykidx(ch, kidx);
+
+ KASSERT(no != NULL, ("table id %d not found", kidx));
+ KASSERT(no->subtype == subtype,
+ ("wrong type %d (%d) for table id %d",
+ no->subtype, subtype, kidx));
+ KASSERT(no->refcnt > 0, ("refcount for table %d is %d",
+ kidx, no->refcnt));
+
+ if (no->refcnt == 1 && rw->destroy_object != NULL)
+ rw->destroy_object(ch, no);
+ else
+ no->refcnt--;
+ }
+}
+
+
+/*
+ * Find and reference object (if any) stored in instruction @cmd.
+ *
+ * Saves object info in @pidx, sets
+ * - @unresolved to 1 if object should exists but not found
+ *
+ * Returns non-zero value in case of error.
+ */
+static int
+ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti,
+ struct obj_idx *pidx, int *unresolved)
+{
+ struct named_object *no;
+ struct opcode_obj_rewrite *rw;
+ int error;
+
+ /* Check if this opcode is candidate for rewrite */
+ rw = find_op_rw(cmd, &ti->uidx, &ti->type);
+ if (rw == NULL)
+ return (0);
+
+ /* Need to rewrite. Save necessary fields */
+ pidx->uidx = ti->uidx;
+ pidx->type = ti->type;
+
+ /* Try to find referenced kernel object */
+ error = rw->find_byname(ch, ti, &no);
+ if (error != 0)
+ return (error);
+ if (no == NULL) {
+ /*
+ * Report about unresolved object for automaic
+ * creation.
+ */
+ *unresolved = 1;
+ return (0);
+ }
+
+ /* Found. Bump refcount and update kidx. */
+ no->refcnt++;
+ rw->update(cmd, no->kidx);
+ return (0);
+}
+
+/*
+ * Finds and bumps refcount for objects referenced by given @rule.
+ * Auto-creates non-existing tables.
+ * Fills in @oib array with userland/kernel indexes.
+ *
+ * Returns 0 on success.
+ */
+static int
+ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
+ struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti)
+{
+ struct obj_idx *pidx;
+ ipfw_insn *cmd;
+ int cmdlen, error, l, unresolved;
+
+ pidx = oib;
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ error = 0;
+
+ IPFW_UH_WLOCK(ch);
+
+ /* Increase refcount on each existing referenced table. */
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+ unresolved = 0;
+
+ error = ref_opcode_object(ch, cmd, ti, pidx, &unresolved);
+ if (error != 0)
+ break;
+ /*
+ * Compatibility stuff for old clients:
+ * prepare to automaitcally create non-existing objects.
+ */
+ if (unresolved != 0) {
+ pidx->off = rule->cmd_len - l;
+ pidx++;
+ }
+ }
+
+ if (error != 0) {
+ /* Unref everything we have already done */
+ unref_oib_objects(ch, rule->cmd, oib, pidx);
+ IPFW_UH_WUNLOCK(ch);
+ return (error);
+ }
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Perform auto-creation for non-existing objects */
+ if (pidx != oib)
+ error = create_objects_compat(ch, rule->cmd, oib, pidx, ti);
+
+ /* Calculate real number of dynamic objects */
+ ci->object_opcodes = (uint16_t)(pidx - oib);
+
+ return (error);
+}
+
+/*
+ * Checks is opcode is referencing table of appropriate type.
+ * Adds reference count for found table if true.
+ * Rewrites user-supplied opcode values with kernel ones.
+ *
+ * Returns 0 on success and appropriate error code otherwise.
+ */
+static int
+rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci)
+{
+ int error;
+ ipfw_insn *cmd;
+ uint8_t type;
+ struct obj_idx *p, *pidx_first, *pidx_last;
+ struct tid_info ti;
+
+ /*
+ * Prepare an array for storing opcode indices.
+ * Use stack allocation by default.
+ */
+ if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) {
+ /* Stack */
+ pidx_first = ci->obuf;
+ } else
+ pidx_first = malloc(
+ ci->object_opcodes * sizeof(struct obj_idx),
+ M_IPFW, M_WAITOK | M_ZERO);
+
+ error = 0;
+ type = 0;
+ memset(&ti, 0, sizeof(ti));
+
+ /* Use set rule is assigned to. */
+ ti.set = ci->krule->set;
+ if (ci->ctlv != NULL) {
+ ti.tlvs = (void *)(ci->ctlv + 1);
+ ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv);
+ }
+
+ /* Reference all used tables and other objects */
+ error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti);
+ if (error != 0)
+ goto free;
+ /*
+ * Note that ref_rule_objects() might have updated ci->object_opcodes
+ * to reflect actual number of object opcodes.
+ */
+
+ /* Perform rewrite of remaining opcodes */
+ p = pidx_first;
+ pidx_last = pidx_first + ci->object_opcodes;
+ for (p = pidx_first; p < pidx_last; p++) {
+ cmd = ci->krule->cmd + p->off;
+ update_opcode_kidx(cmd, p->kidx);
+ }
+
+free:
+ if (pidx_first != ci->obuf)
+ free(pidx_first, M_IPFW);
+
+ return (error);
+}
+
+/*
+ * Adds one or more rules to ipfw @chain.
+ * Data layout (version 0)(current):
+ * Request:
+ * [
+ * ip_fw3_opheader
+ * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1)
+ * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3)
+ * ]
+ * Reply:
+ * [
+ * ip_fw3_opheader
+ * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional)
+ * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ]
+ * ]
+ *
+ * Rules in reply are modified to store their actual ruleset number.
+ *
+ * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending
+ * according to their idx field and there has to be no duplicates.
+ * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending.
+ * (*3) Each ip_fw structure needs to be aligned to u64 boundary.
+ *
+ * Returns 0 on success.
+ */
+static int
+add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_ctlv *ctlv, *rtlv, *tstate;
+ ipfw_obj_ntlv *ntlv;
+ int clen, error, idx;
+ uint32_t count, read;
+ struct ip_fw_rule *r;
+ struct rule_check_info rci, *ci, *cbuf;
+ int i, rsize;
+
+ op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize);
+ ctlv = (ipfw_obj_ctlv *)(op3 + 1);
+
+ read = sizeof(ip_fw3_opheader);
+ rtlv = NULL;
+ tstate = NULL;
+ cbuf = NULL;
+ memset(&rci, 0, sizeof(struct rule_check_info));
+
+ if (read + sizeof(*ctlv) > sd->valsize)
+ return (EINVAL);
+
+ if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) {
+ clen = ctlv->head.length;
+ /* Check size and alignment */
+ if (clen > sd->valsize || clen < sizeof(*ctlv))
+ return (EINVAL);
+ if ((clen % sizeof(uint64_t)) != 0)
+ return (EINVAL);
+
+ /*
+ * Some table names or other named objects.
+ * Check for validness.
+ */
+ count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv);
+ if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv))
+ return (EINVAL);
+
+ /*
+ * Check each TLV.
+ * Ensure TLVs are sorted ascending and
+ * there are no duplicates.
+ */
+ idx = -1;
+ ntlv = (ipfw_obj_ntlv *)(ctlv + 1);
+ while (count > 0) {
+ if (ntlv->head.length != sizeof(ipfw_obj_ntlv))
+ return (EINVAL);
+
+ error = ipfw_check_object_name_generic(ntlv->name);
+ if (error != 0)
+ return (error);
+
+ if (ntlv->idx <= idx)
+ return (EINVAL);
+
+ idx = ntlv->idx;
+ count--;
+ ntlv++;
+ }
+
+ tstate = ctlv;
+ read += ctlv->head.length;
+ ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
+ }
+
+ if (read + sizeof(*ctlv) > sd->valsize)
+ return (EINVAL);
+
+ if (ctlv->head.type == IPFW_TLV_RULE_LIST) {
+ clen = ctlv->head.length;
+ if (clen + read > sd->valsize || clen < sizeof(*ctlv))
+ return (EINVAL);
+ if ((clen % sizeof(uint64_t)) != 0)
+ return (EINVAL);
+
+ /*
+ * TODO: Permit adding multiple rules at once
+ */
+ if (ctlv->count != 1)
+ return (ENOTSUP);
+
+ clen -= sizeof(*ctlv);
+
+ if (ctlv->count > clen / sizeof(struct ip_fw_rule))
+ return (EINVAL);
+
+ /* Allocate state for each rule or use stack */
+ if (ctlv->count == 1) {
+ memset(&rci, 0, sizeof(struct rule_check_info));
+ cbuf = &rci;
+ } else
+ cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP,
+ M_WAITOK | M_ZERO);
+ ci = cbuf;
+
+ /*
+ * Check each rule for validness.
+ * Ensure numbered rules are sorted ascending
+ * and properly aligned
+ */
+ idx = 0;
+ r = (struct ip_fw_rule *)(ctlv + 1);
+ count = 0;
+ error = 0;
+ while (clen > 0) {
+ rsize = roundup2(RULESIZE(r), sizeof(uint64_t));
+ if (rsize > clen || ctlv->count <= count) {
+ error = EINVAL;
+ break;
+ }
+
+ ci->ctlv = tstate;
+ error = check_ipfw_rule1(r, rsize, ci);
+ if (error != 0)
+ break;
+
+ /* Check sorting */
+ if (r->rulenum != 0 && r->rulenum < idx) {
+ printf("rulenum %d idx %d\n", r->rulenum, idx);
+ error = EINVAL;
+ break;
+ }
+ idx = r->rulenum;
+
+ ci->urule = (caddr_t)r;
+
+ rsize = roundup2(rsize, sizeof(uint64_t));
+ clen -= rsize;
+ r = (struct ip_fw_rule *)((caddr_t)r + rsize);
+ count++;
+ ci++;
+ }
+
+ if (ctlv->count != count || error != 0) {
+ if (cbuf != &rci)
+ free(cbuf, M_TEMP);
+ return (EINVAL);
+ }
+
+ rtlv = ctlv;
+ read += ctlv->head.length;
+ ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length);
+ }
+
+ if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) {
+ if (cbuf != NULL && cbuf != &rci)
+ free(cbuf, M_TEMP);
+ return (EINVAL);
+ }
+
+ /*
+ * Passed rules seems to be valid.
+ * Allocate storage and try to add them to chain.
+ */
+ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) {
+ clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule);
+ ci->krule = ipfw_alloc_rule(chain, clen);
+ import_rule1(ci);
+ }
+
+ if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) {
+ /* Free allocate krules */
+ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++)
+ free_rule(ci->krule);
+ }
+
+ if (cbuf != NULL && cbuf != &rci)
+ free(cbuf, M_TEMP);
+
+ return (error);
+}
+
+/*
+ * Lists all sopts currently registered.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_sopt_info x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct _ipfw_obj_lheader *olh;
+ ipfw_sopt_info *i;
+ struct ipfw_sopt_handler *sh;
+ uint32_t count, n, size;
+
+ olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+ if (olh == NULL)
+ return (EINVAL);
+ if (sd->valsize < olh->size)
+ return (EINVAL);
+
+ CTL3_LOCK();
+ count = ctl3_hsize;
+ size = count * sizeof(ipfw_sopt_info) + sizeof(ipfw_obj_lheader);
+
+ /* Fill in header regadless of buffer size */
+ olh->count = count;
+ olh->objsize = sizeof(ipfw_sopt_info);
+
+ if (size > olh->size) {
+ olh->size = size;
+ CTL3_UNLOCK();
+ return (ENOMEM);
+ }
+ olh->size = size;
+
+ for (n = 1; n <= count; n++) {
+ i = (ipfw_sopt_info *)ipfw_get_sopt_space(sd, sizeof(*i));
+ KASSERT(i != NULL, ("previously checked buffer is not enough"));
+ sh = &ctl3_handlers[n];
+ i->opcode = sh->opcode;
+ i->version = sh->version;
+ i->refcnt = sh->refcnt;
+ }
+ CTL3_UNLOCK();
+
+ return (0);
+}
+
+/*
+ * Compares two opcodes.
+ * Used both in qsort() and bsearch().
+ *
+ * Returns 0 if match is found.
+ */
+static int
+compare_opcodes(const void *_a, const void *_b)
+{
+ const struct opcode_obj_rewrite *a, *b;
+
+ a = (const struct opcode_obj_rewrite *)_a;
+ b = (const struct opcode_obj_rewrite *)_b;
+
+ if (a->opcode < b->opcode)
+ return (-1);
+ else if (a->opcode > b->opcode)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * XXX: Rewrite bsearch()
+ */
+static int
+find_op_rw_range(uint16_t op, struct opcode_obj_rewrite **plo,
+ struct opcode_obj_rewrite **phi)
+{
+ struct opcode_obj_rewrite *ctl3_max, *lo, *hi, h, *rw;
+
+ memset(&h, 0, sizeof(h));
+ h.opcode = op;
+
+ rw = (struct opcode_obj_rewrite *)bsearch(&h, ctl3_rewriters,
+ ctl3_rsize, sizeof(h), compare_opcodes);
+ if (rw == NULL)
+ return (1);
+
+ /* Find the first element matching the same opcode */
+ lo = rw;
+ for ( ; lo > ctl3_rewriters && (lo - 1)->opcode == op; lo--)
+ ;
+
+ /* Find the last element matching the same opcode */
+ hi = rw;
+ ctl3_max = ctl3_rewriters + ctl3_rsize;
+ for ( ; (hi + 1) < ctl3_max && (hi + 1)->opcode == op; hi++)
+ ;
+
+ *plo = lo;
+ *phi = hi;
+
+ return (0);
+}
+
+/*
+ * Finds opcode object rewriter based on @code.
+ *
+ * Returns pointer to handler or NULL.
+ */
+static struct opcode_obj_rewrite *
+find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ struct opcode_obj_rewrite *rw, *lo, *hi;
+ uint16_t uidx;
+ uint8_t subtype;
+
+ if (find_op_rw_range(cmd->opcode, &lo, &hi) != 0)
+ return (NULL);
+
+ for (rw = lo; rw <= hi; rw++) {
+ if (rw->classifier(cmd, &uidx, &subtype) == 0) {
+ if (puidx != NULL)
+ *puidx = uidx;
+ if (ptype != NULL)
+ *ptype = subtype;
+ return (rw);
+ }
+ }
+
+ return (NULL);
+}
+int
+classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx)
+{
+
+ if (find_op_rw(cmd, puidx, NULL) == 0)
+ return (1);
+ return (0);
+}
+
+void
+update_opcode_kidx(ipfw_insn *cmd, uint16_t idx)
+{
+ struct opcode_obj_rewrite *rw;
+
+ rw = find_op_rw(cmd, NULL, NULL);
+ KASSERT(rw != NULL, ("No handler to update opcode %d", cmd->opcode));
+ rw->update(cmd, idx);
+}
+
+void
+ipfw_init_obj_rewriter()
+{
+
+ ctl3_rewriters = NULL;
+ ctl3_rsize = 0;
+}
+
+void
+ipfw_destroy_obj_rewriter()
+{
+
+ if (ctl3_rewriters != NULL)
+ free(ctl3_rewriters, M_IPFW);
+ ctl3_rewriters = NULL;
+ ctl3_rsize = 0;
+}
+
+/*
+ * Adds one or more opcode object rewrite handlers to the global array.
+ * Function may sleep.
+ */
+void
+ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
+{
+ size_t sz;
+ struct opcode_obj_rewrite *tmp;
+
+ CTL3_LOCK();
+
+ for (;;) {
+ sz = ctl3_rsize + count;
+ CTL3_UNLOCK();
+ tmp = malloc(sizeof(*rw) * sz, M_IPFW, M_WAITOK | M_ZERO);
+ CTL3_LOCK();
+ if (ctl3_rsize + count <= sz)
+ break;
+
+ /* Retry */
+ free(tmp, M_IPFW);
+ }
+
+ /* Merge old & new arrays */
+ sz = ctl3_rsize + count;
+ memcpy(tmp, ctl3_rewriters, ctl3_rsize * sizeof(*rw));
+ memcpy(&tmp[ctl3_rsize], rw, count * sizeof(*rw));
+ qsort(tmp, sz, sizeof(*rw), compare_opcodes);
+ /* Switch new and free old */
+ if (ctl3_rewriters != NULL)
+ free(ctl3_rewriters, M_IPFW);
+ ctl3_rewriters = tmp;
+ ctl3_rsize = sz;
+
+ CTL3_UNLOCK();
+}
+
+/*
+ * Removes one or more object rewrite handlers from the global array.
+ */
+int
+ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
+{
+ size_t sz;
+ struct opcode_obj_rewrite *ctl3_max, *ktmp, *lo, *hi;
+ int i;
+
+ CTL3_LOCK();
+
+ for (i = 0; i < count; i++) {
+ if (find_op_rw_range(rw[i].opcode, &lo, &hi) != 0)
+ continue;
+
+ for (ktmp = lo; ktmp <= hi; ktmp++) {
+ if (ktmp->classifier != rw[i].classifier)
+ continue;
+
+ ctl3_max = ctl3_rewriters + ctl3_rsize;
+ sz = (ctl3_max - (ktmp + 1)) * sizeof(*ktmp);
+ memmove(ktmp, ktmp + 1, sz);
+ ctl3_rsize--;
+ break;
+ }
+
+ }
+
+ if (ctl3_rsize == 0) {
+ if (ctl3_rewriters != NULL)
+ free(ctl3_rewriters, M_IPFW);
+ ctl3_rewriters = NULL;
+ }
+
+ CTL3_UNLOCK();
+
+ return (0);
+}
+
+static int
+export_objhash_ntlv_internal(struct namedobj_instance *ni,
+ struct named_object *no, void *arg)
+{
+ struct sockopt_data *sd;
+ ipfw_obj_ntlv *ntlv;
+
+ sd = (struct sockopt_data *)arg;
+ ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
+ if (ntlv == NULL)
+ return (ENOMEM);
+ ipfw_export_obj_ntlv(no, ntlv);
+ return (0);
+}
+
+/*
+ * Lists all service objects.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ] size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader [ ipfw_obj_ntlv x N ] (optional) ]
+ * Returns 0 on success
+ */
+static int
+dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *hdr;
+ int count;
+
+ hdr = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr));
+ if (hdr == NULL)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(chain);
+ count = ipfw_objhash_count(CHAIN_TO_SRV(chain));
+ hdr->size = sizeof(ipfw_obj_lheader) + count * sizeof(ipfw_obj_ntlv);
+ if (sd->valsize < hdr->size) {
+ IPFW_UH_RUNLOCK(chain);
+ return (ENOMEM);
+ }
+ hdr->count = count;
+ hdr->objsize = sizeof(ipfw_obj_ntlv);
+ if (count > 0)
+ ipfw_objhash_foreach(CHAIN_TO_SRV(chain),
+ export_objhash_ntlv_internal, sd);
+ IPFW_UH_RUNLOCK(chain);
+ return (0);
+}
+
+/*
+ * Compares two sopt handlers (code, version and handler ptr).
+ * Used both as qsort() and bsearch().
+ * Does not compare handler for latter case.
+ *
+ * Returns 0 if match is found.
+ */
+static int
+compare_sh(const void *_a, const void *_b)
+{
+ const struct ipfw_sopt_handler *a, *b;
+
+ a = (const struct ipfw_sopt_handler *)_a;
+ b = (const struct ipfw_sopt_handler *)_b;
+
+ if (a->opcode < b->opcode)
+ return (-1);
+ else if (a->opcode > b->opcode)
+ return (1);
+
+ if (a->version < b->version)
+ return (-1);
+ else if (a->version > b->version)
+ return (1);
+
+ /* bsearch helper */
+ if (a->handler == NULL)
+ return (0);
+
+ if ((uintptr_t)a->handler < (uintptr_t)b->handler)
+ return (-1);
+ else if ((uintptr_t)a->handler > (uintptr_t)b->handler)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Finds sopt handler based on @code and @version.
+ *
+ * Returns pointer to handler or NULL.
+ */
+static struct ipfw_sopt_handler *
+find_sh(uint16_t code, uint8_t version, sopt_handler_f *handler)
+{
+ struct ipfw_sopt_handler *sh, h;
+
+ memset(&h, 0, sizeof(h));
+ h.opcode = code;
+ h.version = version;
+ h.handler = handler;
+
+ sh = (struct ipfw_sopt_handler *)bsearch(&h, ctl3_handlers,
+ ctl3_hsize, sizeof(h), compare_sh);
+
+ return (sh);
+}
+
+static int
+find_ref_sh(uint16_t opcode, uint8_t version, struct ipfw_sopt_handler *psh)
+{
+ struct ipfw_sopt_handler *sh;
+
+ CTL3_LOCK();
+ if ((sh = find_sh(opcode, version, NULL)) == NULL) {
+ CTL3_UNLOCK();
+ printf("ipfw: ipfw_ctl3 invalid option %d""v""%d\n",
+ opcode, version);
+ return (EINVAL);
+ }
+ sh->refcnt++;
+ ctl3_refct++;
+ /* Copy handler data to requested buffer */
+ *psh = *sh;
+ CTL3_UNLOCK();
+
+ return (0);
+}
+
+static void
+find_unref_sh(struct ipfw_sopt_handler *psh)
+{
+ struct ipfw_sopt_handler *sh;
+
+ CTL3_LOCK();
+ sh = find_sh(psh->opcode, psh->version, NULL);
+ KASSERT(sh != NULL, ("ctl3 handler disappeared"));
+ sh->refcnt--;
+ ctl3_refct--;
+ CTL3_UNLOCK();
+}
+
+void
+ipfw_init_sopt_handler()
+{
+
+ CTL3_LOCK_INIT();
+ IPFW_ADD_SOPT_HANDLER(1, scodes);
+}
+
+void
+ipfw_destroy_sopt_handler()
+{
+
+ IPFW_DEL_SOPT_HANDLER(1, scodes);
+ CTL3_LOCK_DESTROY();
+}
+
+/*
+ * Adds one or more sockopt handlers to the global array.
+ * Function may sleep.
+ */
+void
+ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count)
+{
+ size_t sz;
+ struct ipfw_sopt_handler *tmp;
+
+ CTL3_LOCK();
+
+ for (;;) {
+ sz = ctl3_hsize + count;
+ CTL3_UNLOCK();
+ tmp = malloc(sizeof(*sh) * sz, M_IPFW, M_WAITOK | M_ZERO);
+ CTL3_LOCK();
+ if (ctl3_hsize + count <= sz)
+ break;
+
+ /* Retry */
+ free(tmp, M_IPFW);
+ }
+
+ /* Merge old & new arrays */
+ sz = ctl3_hsize + count;
+ memcpy(tmp, ctl3_handlers, ctl3_hsize * sizeof(*sh));
+ memcpy(&tmp[ctl3_hsize], sh, count * sizeof(*sh));
+ qsort(tmp, sz, sizeof(*sh), compare_sh);
+ /* Switch new and free old */
+ if (ctl3_handlers != NULL)
+ free(ctl3_handlers, M_IPFW);
+ ctl3_handlers = tmp;
+ ctl3_hsize = sz;
+ ctl3_gencnt++;
+
+ CTL3_UNLOCK();
+}
+
+/*
+ * Removes one or more sockopt handlers from the global array.
+ */
+int
+ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count)
+{
+ size_t sz;
+ struct ipfw_sopt_handler *tmp, *h;
+ int i;
+
+ CTL3_LOCK();
+
+ for (i = 0; i < count; i++) {
+ tmp = &sh[i];
+ h = find_sh(tmp->opcode, tmp->version, tmp->handler);
+ if (h == NULL)
+ continue;
+
+ sz = (ctl3_handlers + ctl3_hsize - (h + 1)) * sizeof(*h);
+ memmove(h, h + 1, sz);
+ ctl3_hsize--;
+ }
+
+ if (ctl3_hsize == 0) {
+ if (ctl3_handlers != NULL)
+ free(ctl3_handlers, M_IPFW);
+ ctl3_handlers = NULL;
+ }
+
+ ctl3_gencnt++;
+
+ CTL3_UNLOCK();
+
+ return (0);
+}
+
+/*
+ * Writes data accumulated in @sd to sockopt buffer.
+ * Zeroes internal @sd buffer.
+ */
+static int
+ipfw_flush_sopt_data(struct sockopt_data *sd)
+{
+ struct sockopt *sopt;
+ int error;
+ size_t sz;
+
+ sz = sd->koff;
+ if (sz == 0)
+ return (0);
+
+ sopt = sd->sopt;
+
+ if (sopt->sopt_dir == SOPT_GET) {
+ error = copyout(sd->kbuf, sopt->sopt_val, sz);
+ if (error != 0)
+ return (error);
+ }
+
+ memset(sd->kbuf, 0, sd->ksize);
+ sd->ktotal += sz;
+ sd->koff = 0;
+ if (sd->ktotal + sd->ksize < sd->valsize)
+ sd->kavail = sd->ksize;
+ else
+ sd->kavail = sd->valsize - sd->ktotal;
+
+ /* Update sopt buffer data */
+ sopt->sopt_valsize = sd->ktotal;
+ sopt->sopt_val = sd->sopt_val + sd->ktotal;
+
+ return (0);
+}
+
+/*
+ * Ensures that @sd buffer has contiguous @neeeded number of
+ * bytes.
+ *
+ * Returns pointer to requested space or NULL.
+ */
+caddr_t
+ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed)
{
-#define RULE_MAXSIZE (256*sizeof(u_int32_t))
int error;
- size_t size, len, valsize;
- struct ip_fw *buf, *rule;
+ caddr_t addr;
+
+ if (sd->kavail < needed) {
+ /*
+ * Flush data and try another time.
+ */
+ error = ipfw_flush_sopt_data(sd);
+
+ if (sd->kavail < needed || error != 0)
+ return (NULL);
+ }
+
+ addr = sd->kbuf + sd->koff;
+ sd->koff += needed;
+ sd->kavail -= needed;
+ return (addr);
+}
+
+/*
+ * Requests @needed contiguous bytes from @sd buffer.
+ * Function is used to notify subsystem that we are
+ * interesed in first @needed bytes (request header)
+ * and the rest buffer can be safely zeroed.
+ *
+ * Returns pointer to requested space or NULL.
+ */
+caddr_t
+ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed)
+{
+ caddr_t addr;
+
+ if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL)
+ return (NULL);
+
+ if (sd->kavail > 0)
+ memset(sd->kbuf + sd->koff, 0, sd->kavail);
+
+ return (addr);
+}
+
+/*
+ * New sockopt handler.
+ */
+int
+ipfw_ctl3(struct sockopt *sopt)
+{
+ int error, locked;
+ size_t size, valsize;
struct ip_fw_chain *chain;
- u_int32_t rulenum[2];
- uint32_t opt;
- char xbuf[128];
+ char xbuf[256];
+ struct sockopt_data sdata;
+ struct ipfw_sopt_handler h;
ip_fw3_opheader *op3 = NULL;
error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
- if (error)
+ if (error != 0)
+ return (error);
+
+ if (sopt->sopt_name != IP_FW3)
+ return (ipfw_ctl(sopt));
+
+ chain = &V_layer3_chain;
+ error = 0;
+
+ /* Save original valsize before it is altered via sooptcopyin() */
+ valsize = sopt->sopt_valsize;
+ memset(&sdata, 0, sizeof(sdata));
+ /* Read op3 header first to determine actual operation */
+ op3 = (ip_fw3_opheader *)xbuf;
+ error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3));
+ if (error != 0)
+ return (error);
+ sopt->sopt_valsize = valsize;
+
+ /*
+ * Find and reference command.
+ */
+ error = find_ref_sh(op3->opcode, op3->version, &h);
+ if (error != 0)
return (error);
/*
* Disallow modifications in really-really secure mode, but still allow
* the logging counters to be reset.
*/
- if (sopt->sopt_name == IP_FW_ADD ||
- (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
+ if ((h.dir & HDIR_SET) != 0 && h.opcode != IP_FW_XRESETLOG) {
error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
- if (error)
+ if (error != 0) {
+ find_unref_sh(&h);
return (error);
+ }
}
+ /*
+ * Fill in sockopt_data structure that may be useful for
+ * IP_FW3 get requests.
+ */
+ locked = 0;
+ if (valsize <= sizeof(xbuf)) {
+ /* use on-stack buffer */
+ sdata.kbuf = xbuf;
+ sdata.ksize = sizeof(xbuf);
+ sdata.kavail = valsize;
+ } else {
+
+ /*
+ * Determine opcode type/buffer size:
+ * allocate sliding-window buf for data export or
+ * contiguous buffer for special ops.
+ */
+ if ((h.dir & HDIR_SET) != 0) {
+ /* Set request. Allocate contigous buffer. */
+ if (valsize > CTL3_LARGEBUF) {
+ find_unref_sh(&h);
+ return (EFBIG);
+ }
+
+ size = valsize;
+ } else {
+ /* Get request. Allocate sliding window buffer */
+ size = (valsize<CTL3_SMALLBUF) ? valsize:CTL3_SMALLBUF;
+
+ if (size < valsize) {
+ /* We have to wire user buffer */
+ error = vslock(sopt->sopt_val, valsize);
+ if (error != 0)
+ return (error);
+ locked = 1;
+ }
+ }
+
+ sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
+ sdata.ksize = size;
+ sdata.kavail = size;
+ }
+
+ sdata.sopt = sopt;
+ sdata.sopt_val = sopt->sopt_val;
+ sdata.valsize = valsize;
+
+ /*
+ * Copy either all request (if valsize < bsize_max)
+ * or first bsize_max bytes to guarantee most consumers
+ * that all necessary data has been copied).
+ * Anyway, copy not less than sizeof(ip_fw3_opheader).
+ */
+ if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize,
+ sizeof(ip_fw3_opheader))) != 0)
+ return (error);
+ op3 = (ip_fw3_opheader *)sdata.kbuf;
+
+ /* Finally, run handler */
+ error = h.handler(chain, op3, &sdata);
+ find_unref_sh(&h);
+
+ /* Flush state and free buffers */
+ if (error == 0)
+ error = ipfw_flush_sopt_data(&sdata);
+ else
+ ipfw_flush_sopt_data(&sdata);
+
+ if (locked != 0)
+ vsunlock(sdata.sopt_val, valsize);
+
+ /* Restore original pointer and set number of bytes written */
+ sopt->sopt_val = sdata.sopt_val;
+ sopt->sopt_valsize = sdata.ktotal;
+ if (sdata.kbuf != xbuf)
+ free(sdata.kbuf, M_TEMP);
+
+ return (error);
+}
+
+/**
+ * {set|get}sockopt parser.
+ */
+int
+ipfw_ctl(struct sockopt *sopt)
+{
+#define RULE_MAXSIZE (512*sizeof(u_int32_t))
+ int error;
+ size_t size, valsize;
+ struct ip_fw *buf;
+ struct ip_fw_rule0 *rule;
+ struct ip_fw_chain *chain;
+ u_int32_t rulenum[2];
+ uint32_t opt;
+ struct rule_check_info ci;
+ IPFW_RLOCK_TRACKER;
+
chain = &V_layer3_chain;
error = 0;
/* Save original valsize before it is altered via sooptcopyin() */
valsize = sopt->sopt_valsize;
- if ((opt = sopt->sopt_name) == IP_FW3) {
- /*
- * Copy not less than sizeof(ip_fw3_opheader).
- * We hope any IP_FW3 command will fit into 128-byte buffer.
- */
- if ((error = sooptcopyin(sopt, xbuf, sizeof(xbuf),
- sizeof(ip_fw3_opheader))) != 0)
+ opt = sopt->sopt_name;
+
+ /*
+ * Disallow modifications in really-really secure mode, but still allow
+ * the logging counters to be reset.
+ */
+ if (opt == IP_FW_ADD ||
+ (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) {
+ error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+ if (error != 0)
return (error);
- op3 = (ip_fw3_opheader *)xbuf;
- opt = op3->opcode;
}
switch (opt) {
@@ -1006,9 +3756,7 @@ ipfw_ctl(struct sockopt *sopt)
size += ipfw_dyn_len();
if (size >= sopt->sopt_valsize)
break;
- buf = malloc(size, M_TEMP, M_WAITOK);
- if (buf == NULL)
- break;
+ buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
IPFW_UH_RLOCK(chain);
/* check again how much space we need */
want = chain->static_len + ipfw_dyn_len();
@@ -1033,6 +3781,8 @@ ipfw_ctl(struct sockopt *sopt)
error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
sizeof(struct ip_fw7) );
+ memset(&ci, 0, sizeof(struct rule_check_info));
+
/*
* If the size of commands equals RULESIZE7 then we assume
* a FreeBSD7.2 binary is talking to us (set is7=1).
@@ -1042,25 +3792,30 @@ ipfw_ctl(struct sockopt *sopt)
* the first ipfw command is 'ipfw [pipe] list')
* the ipfw binary may crash or loop infinitly...
*/
- if (sopt->sopt_valsize == RULESIZE7(rule)) {
+ size = sopt->sopt_valsize;
+ if (size == RULESIZE7(rule)) {
is7 = 1;
error = convert_rule_to_8(rule);
if (error) {
free(rule, M_TEMP);
return error;
}
- if (error == 0)
- error = check_ipfw_struct(rule, RULESIZE(rule));
- } else {
+ size = RULESIZE(rule);
+ } else
is7 = 0;
if (error == 0)
- error = check_ipfw_struct(rule, sopt->sopt_valsize);
- }
+ error = check_ipfw_rule0(rule, size, &ci);
if (error == 0) {
- /* locking is done within ipfw_add_rule() */
- error = ipfw_add_rule(chain, rule);
- size = RULESIZE(rule);
- if (!error && sopt->sopt_dir == SOPT_GET) {
+ /* locking is done within add_rule() */
+ struct ip_fw *krule;
+ krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule));
+ ci.urule = (caddr_t)rule;
+ ci.krule = krule;
+ import_rule0(&ci);
+ error = commit_rules(chain, &ci, 1);
+ if (error != 0)
+ free_rule(ci.krule);
+ else if (sopt->sopt_dir == SOPT_GET) {
if (is7) {
error = convert_rule_to_7(rule);
size = RULESIZE7(rule);
@@ -1119,82 +3874,64 @@ ipfw_ctl(struct sockopt *sopt)
sopt->sopt_name == IP_FW_RESETLOG);
break;
- /*--- TABLE manipulations are protected by the IPFW_LOCK ---*/
+ /*--- TABLE opcodes ---*/
case IP_FW_TABLE_ADD:
- {
- ipfw_table_entry ent;
-
- error = sooptcopyin(sopt, &ent,
- sizeof(ent), sizeof(ent));
- if (error)
- break;
- error = ipfw_add_table_entry(chain, ent.tbl,
- &ent.addr, sizeof(ent.addr), ent.masklen,
- IPFW_TABLE_CIDR, ent.value);
- }
- break;
-
case IP_FW_TABLE_DEL:
{
ipfw_table_entry ent;
+ struct tentry_info tei;
+ struct tid_info ti;
+ struct table_value v;
error = sooptcopyin(sopt, &ent,
sizeof(ent), sizeof(ent));
if (error)
break;
- error = ipfw_del_table_entry(chain, ent.tbl,
- &ent.addr, sizeof(ent.addr), ent.masklen, IPFW_TABLE_CIDR);
- }
- break;
-
- case IP_FW_TABLE_XADD: /* IP_FW3 */
- case IP_FW_TABLE_XDEL: /* IP_FW3 */
- {
- ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1);
-
- /* Check minimum header size */
- if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) {
- error = EINVAL;
- break;
- }
- /* Check if len field is valid */
- if (xent->len > sizeof(ipfw_table_xentry)) {
- error = EINVAL;
- break;
- }
-
- len = xent->len - offsetof(ipfw_table_xentry, k);
-
- error = (opt == IP_FW_TABLE_XADD) ?
- ipfw_add_table_entry(chain, xent->tbl, &xent->k,
- len, xent->masklen, xent->type, xent->value) :
- ipfw_del_table_entry(chain, xent->tbl, &xent->k,
- len, xent->masklen, xent->type);
+ memset(&tei, 0, sizeof(tei));
+ tei.paddr = &ent.addr;
+ tei.subtype = AF_INET;
+ tei.masklen = ent.masklen;
+ ipfw_import_table_value_legacy(ent.value, &v);
+ tei.pvalue = &v;
+ memset(&ti, 0, sizeof(ti));
+ ti.uidx = ent.tbl;
+ ti.type = IPFW_TABLE_CIDR;
+
+ error = (opt == IP_FW_TABLE_ADD) ?
+ add_table_entry(chain, &ti, &tei, 0, 1) :
+ del_table_entry(chain, &ti, &tei, 0, 1);
}
break;
+
case IP_FW_TABLE_FLUSH:
{
u_int16_t tbl;
+ struct tid_info ti;
error = sooptcopyin(sopt, &tbl,
sizeof(tbl), sizeof(tbl));
if (error)
break;
- error = ipfw_flush_table(chain, tbl);
+ memset(&ti, 0, sizeof(ti));
+ ti.uidx = tbl;
+ error = flush_table(chain, &ti);
}
break;
case IP_FW_TABLE_GETSIZE:
{
u_int32_t tbl, cnt;
+ struct tid_info ti;
if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
sizeof(tbl))))
break;
+ memset(&ti, 0, sizeof(ti));
+ ti.uidx = tbl;
IPFW_RLOCK(chain);
- error = ipfw_count_table(chain, tbl, &cnt);
+ error = ipfw_count_table(chain, &ti, &cnt);
IPFW_RUNLOCK(chain);
if (error)
break;
@@ -1205,6 +3942,7 @@ ipfw_ctl(struct sockopt *sopt)
case IP_FW_TABLE_LIST:
{
ipfw_table *tbl;
+ struct tid_info ti;
if (sopt->sopt_valsize < sizeof(*tbl)) {
error = EINVAL;
@@ -1219,8 +3957,10 @@ ipfw_ctl(struct sockopt *sopt)
}
tbl->size = (size - sizeof(*tbl)) /
sizeof(ipfw_table_entry);
+ memset(&ti, 0, sizeof(ti));
+ ti.uidx = tbl->tbl;
IPFW_RLOCK(chain);
- error = ipfw_dump_table(chain, tbl);
+ error = ipfw_dump_table_legacy(chain, &ti, tbl);
IPFW_RUNLOCK(chain);
if (error) {
free(tbl, M_TEMP);
@@ -1231,62 +3971,6 @@ ipfw_ctl(struct sockopt *sopt)
}
break;
- case IP_FW_TABLE_XGETSIZE: /* IP_FW3 */
- {
- uint32_t *tbl;
-
- if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) {
- error = EINVAL;
- break;
- }
-
- tbl = (uint32_t *)(op3 + 1);
-
- IPFW_RLOCK(chain);
- error = ipfw_count_xtable(chain, *tbl, tbl);
- IPFW_RUNLOCK(chain);
- if (error)
- break;
- error = sooptcopyout(sopt, op3, sopt->sopt_valsize);
- }
- break;
-
- case IP_FW_TABLE_XLIST: /* IP_FW3 */
- {
- ipfw_xtable *tbl;
-
- if ((size = valsize) < sizeof(ipfw_xtable)) {
- error = EINVAL;
- break;
- }
-
- tbl = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
- memcpy(tbl, op3, sizeof(ipfw_xtable));
-
- /* Get maximum number of entries we can store */
- tbl->size = (size - sizeof(ipfw_xtable)) /
- sizeof(ipfw_table_xentry);
- IPFW_RLOCK(chain);
- error = ipfw_dump_xtable(chain, tbl);
- IPFW_RUNLOCK(chain);
- if (error) {
- free(tbl, M_TEMP);
- break;
- }
-
- /* Revert size field back to bytes */
- tbl->size = tbl->size * sizeof(ipfw_table_xentry) +
- sizeof(ipfw_table);
- /*
- * Since we call sooptcopyin() with small buffer, sopt_valsize is
- * decreased to reflect supplied buffer size. Set it back to original value
- */
- sopt->sopt_valsize = valsize;
- error = sooptcopyout(sopt, tbl, size);
- free(tbl, M_TEMP);
- }
- break;
-
/*--- NAT operations are protected by the IPFW_LOCK ---*/
case IP_FW_NAT_CFG:
if (IPFW_NAT_LOADED)
@@ -1336,18 +4020,16 @@ ipfw_ctl(struct sockopt *sopt)
return (error);
#undef RULE_MAXSIZE
}
-
-
#define RULE_MAXSIZE (256*sizeof(u_int32_t))
/* Functions to convert rules 7.2 <==> 8.0 */
-int
-convert_rule_to_7(struct ip_fw *rule)
+static int
+convert_rule_to_7(struct ip_fw_rule0 *rule)
{
/* Used to modify original rule */
struct ip_fw7 *rule7 = (struct ip_fw7 *)rule;
/* copy of original rule, version 8 */
- struct ip_fw *tmp;
+ struct ip_fw_rule0 *tmp;
/* Used to copy commands */
ipfw_insn *ccmd, *dst;
@@ -1360,13 +4042,12 @@ convert_rule_to_7(struct ip_fw *rule)
bcopy(rule, tmp, RULE_MAXSIZE);
/* Copy fields */
- rule7->_pad = tmp->_pad;
+ //rule7->_pad = tmp->_pad;
rule7->set = tmp->set;
rule7->rulenum = tmp->rulenum;
rule7->cmd_len = tmp->cmd_len;
rule7->act_ofs = tmp->act_ofs;
rule7->next_rule = (struct ip_fw7 *)tmp->next_rule;
- rule7->next = (struct ip_fw7 *)tmp->x_next;
rule7->cmd_len = tmp->cmd_len;
rule7->pcnt = tmp->pcnt;
rule7->bcnt = tmp->bcnt;
@@ -1396,8 +4077,8 @@ convert_rule_to_7(struct ip_fw *rule)
return 0;
}
-int
-convert_rule_to_8(struct ip_fw *rule)
+static int
+convert_rule_to_8(struct ip_fw_rule0 *rule)
{
/* Used to modify original rule */
struct ip_fw7 *rule7 = (struct ip_fw7 *) rule;
@@ -1439,7 +4120,6 @@ convert_rule_to_8(struct ip_fw *rule)
rule->cmd_len = tmp->cmd_len;
rule->act_ofs = tmp->act_ofs;
rule->next_rule = (struct ip_fw *)tmp->next_rule;
- rule->x_next = (struct ip_fw *)tmp->next;
rule->cmd_len = tmp->cmd_len;
rule->id = 0; /* XXX see if is ok = 0 */
rule->pcnt = tmp->pcnt;
@@ -1450,4 +4130,486 @@ convert_rule_to_8(struct ip_fw *rule)
return 0;
}
+/*
+ * Named object api
+ *
+ */
+
+void
+ipfw_init_srv(struct ip_fw_chain *ch)
+{
+
+ ch->srvmap = ipfw_objhash_create(IPFW_OBJECTS_DEFAULT);
+ ch->srvstate = malloc(sizeof(void *) * IPFW_OBJECTS_DEFAULT,
+ M_IPFW, M_WAITOK | M_ZERO);
+}
+
+void
+ipfw_destroy_srv(struct ip_fw_chain *ch)
+{
+
+ free(ch->srvstate, M_IPFW);
+ ipfw_objhash_destroy(ch->srvmap);
+}
+
+/*
+ * Allocate new bitmask which can be used to enlarge/shrink
+ * named instance index.
+ */
+void
+ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks)
+{
+ size_t size;
+ int max_blocks;
+ u_long *idx_mask;
+
+ KASSERT((items % BLOCK_ITEMS) == 0,
+ ("bitmask size needs to power of 2 and greater or equal to %zu",
+ BLOCK_ITEMS));
+
+ max_blocks = items / BLOCK_ITEMS;
+ size = items / 8;
+ idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK);
+ /* Mark all as free */
+ memset(idx_mask, 0xFF, size * IPFW_MAX_SETS);
+ *idx_mask &= ~(u_long)1; /* Skip index 0 */
+
+ *idx = idx_mask;
+ *pblocks = max_blocks;
+}
+
+/*
+ * Copy current bitmask index to new one.
+ */
+void
+ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks)
+{
+ int old_blocks, new_blocks;
+ u_long *old_idx, *new_idx;
+ int i;
+
+ old_idx = ni->idx_mask;
+ old_blocks = ni->max_blocks;
+ new_idx = *idx;
+ new_blocks = *blocks;
+
+ for (i = 0; i < IPFW_MAX_SETS; i++) {
+ memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i],
+ old_blocks * sizeof(u_long));
+ }
+}
+
+/*
+ * Swaps current @ni index with new one.
+ */
+void
+ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks)
+{
+ int old_blocks;
+ u_long *old_idx;
+
+ old_idx = ni->idx_mask;
+ old_blocks = ni->max_blocks;
+
+ ni->idx_mask = *idx;
+ ni->max_blocks = *blocks;
+
+ /* Save old values */
+ *idx = old_idx;
+ *blocks = old_blocks;
+}
+
+void
+ipfw_objhash_bitmap_free(void *idx, int blocks)
+{
+
+ free(idx, M_IPFW);
+}
+
+/*
+ * Creates named hash instance.
+ * Must be called without holding any locks.
+ * Return pointer to new instance.
+ */
+struct namedobj_instance *
+ipfw_objhash_create(uint32_t items)
+{
+ struct namedobj_instance *ni;
+ int i;
+ size_t size;
+
+ size = sizeof(struct namedobj_instance) +
+ sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE +
+ sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE;
+
+ ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO);
+ ni->nn_size = NAMEDOBJ_HASH_SIZE;
+ ni->nv_size = NAMEDOBJ_HASH_SIZE;
+
+ ni->names = (struct namedobjects_head *)(ni +1);
+ ni->values = &ni->names[ni->nn_size];
+
+ for (i = 0; i < ni->nn_size; i++)
+ TAILQ_INIT(&ni->names[i]);
+
+ for (i = 0; i < ni->nv_size; i++)
+ TAILQ_INIT(&ni->values[i]);
+
+ /* Set default hashing/comparison functions */
+ ni->hash_f = objhash_hash_name;
+ ni->cmp_f = objhash_cmp_name;
+
+ /* Allocate bitmask separately due to possible resize */
+ ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks);
+
+ return (ni);
+}
+
+void
+ipfw_objhash_destroy(struct namedobj_instance *ni)
+{
+
+ free(ni->idx_mask, M_IPFW);
+ free(ni, M_IPFW);
+}
+
+void
+ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f,
+ objhash_cmp_f *cmp_f)
+{
+
+ ni->hash_f = hash_f;
+ ni->cmp_f = cmp_f;
+}
+
+static uint32_t
+objhash_hash_name(struct namedobj_instance *ni, const void *name, uint32_t set)
+{
+
+ return (fnv_32_str((const char *)name, FNV1_32_INIT));
+}
+
+static int
+objhash_cmp_name(struct named_object *no, const void *name, uint32_t set)
+{
+
+ if ((strcmp(no->name, (const char *)name) == 0) && (no->set == set))
+ return (0);
+
+ return (1);
+}
+
+static uint32_t
+objhash_hash_idx(struct namedobj_instance *ni, uint32_t val)
+{
+ uint32_t v;
+
+ v = val % (ni->nv_size - 1);
+
+ return (v);
+}
+
+struct named_object *
+ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name)
+{
+ struct named_object *no;
+ uint32_t hash;
+
+ hash = ni->hash_f(ni, name, set) % ni->nn_size;
+
+ TAILQ_FOREACH(no, &ni->names[hash], nn_next) {
+ if (ni->cmp_f(no, name, set) == 0)
+ return (no);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Find named object by @uid.
+ * Check @tlvs for valid data inside.
+ *
+ * Returns pointer to found TLV or NULL.
+ */
+ipfw_obj_ntlv *
+ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv)
+{
+ ipfw_obj_ntlv *ntlv;
+ uintptr_t pa, pe;
+ int l;
+
+ pa = (uintptr_t)tlvs;
+ pe = pa + len;
+ l = 0;
+ for (; pa < pe; pa += l) {
+ ntlv = (ipfw_obj_ntlv *)pa;
+ l = ntlv->head.length;
+
+ if (l != sizeof(*ntlv))
+ return (NULL);
+
+ if (ntlv->idx != uidx)
+ continue;
+ /*
+ * When userland has specified zero TLV type, do
+ * not compare it with eltv. In some cases userland
+ * doesn't know what type should it have. Use only
+ * uidx and name for search named_object.
+ */
+ if (ntlv->head.type != 0 &&
+ ntlv->head.type != (uint16_t)etlv)
+ continue;
+
+ if (ipfw_check_object_name_generic(ntlv->name) != 0)
+ return (NULL);
+
+ return (ntlv);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Finds object config based on either legacy index
+ * or name in ntlv.
+ * Note @ti structure contains unchecked data from userland.
+ *
+ * Returns 0 in success and fills in @pno with found config
+ */
+int
+ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti,
+ uint32_t etlv, struct named_object **pno)
+{
+ char *name;
+ ipfw_obj_ntlv *ntlv;
+ uint32_t set;
+
+ if (ti->tlvs == NULL)
+ return (EINVAL);
+
+ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, etlv);
+ if (ntlv == NULL)
+ return (EINVAL);
+ name = ntlv->name;
+
+ /*
+ * Use set provided by @ti instead of @ntlv one.
+ * This is needed due to different sets behavior
+ * controlled by V_fw_tables_sets.
+ */
+ set = ti->set;
+ *pno = ipfw_objhash_lookup_name(ni, set, name);
+ if (*pno == NULL)
+ return (ESRCH);
+ return (0);
+}
+
+/*
+ * Find named object by name, considering also its TLV type.
+ */
+struct named_object *
+ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set,
+ uint32_t type, const char *name)
+{
+ struct named_object *no;
+ uint32_t hash;
+
+ hash = ni->hash_f(ni, name, set) % ni->nn_size;
+
+ TAILQ_FOREACH(no, &ni->names[hash], nn_next) {
+ if (ni->cmp_f(no, name, set) == 0 &&
+ no->etlv == (uint16_t)type)
+ return (no);
+ }
+
+ return (NULL);
+}
+
+struct named_object *
+ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx)
+{
+ struct named_object *no;
+ uint32_t hash;
+
+ hash = objhash_hash_idx(ni, kidx);
+
+ TAILQ_FOREACH(no, &ni->values[hash], nv_next) {
+ if (no->kidx == kidx)
+ return (no);
+ }
+
+ return (NULL);
+}
+
+int
+ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
+ struct named_object *b)
+{
+
+ if ((strcmp(a->name, b->name) == 0) && a->set == b->set)
+ return (1);
+
+ return (0);
+}
+
+void
+ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no)
+{
+ uint32_t hash;
+
+ hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size;
+ TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next);
+
+ hash = objhash_hash_idx(ni, no->kidx);
+ TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next);
+
+ ni->count++;
+}
+
+void
+ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no)
+{
+ uint32_t hash;
+
+ hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size;
+ TAILQ_REMOVE(&ni->names[hash], no, nn_next);
+
+ hash = objhash_hash_idx(ni, no->kidx);
+ TAILQ_REMOVE(&ni->values[hash], no, nv_next);
+
+ ni->count--;
+}
+
+uint32_t
+ipfw_objhash_count(struct namedobj_instance *ni)
+{
+
+ return (ni->count);
+}
+
+uint32_t
+ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type)
+{
+ struct named_object *no;
+ uint32_t count;
+ int i;
+
+ count = 0;
+ for (i = 0; i < ni->nn_size; i++) {
+ TAILQ_FOREACH(no, &ni->names[i], nn_next) {
+ if (no->etlv == type)
+ count++;
+ }
+ }
+ return (count);
+}
+
+/*
+ * Runs @func for each found named object.
+ * It is safe to delete objects from callback
+ */
+int
+ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg)
+{
+ struct named_object *no, *no_tmp;
+ int i, ret;
+
+ for (i = 0; i < ni->nn_size; i++) {
+ TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) {
+ ret = f(ni, no, arg);
+ if (ret != 0)
+ return (ret);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Runs @f for each found named object with type @type.
+ * It is safe to delete objects from callback
+ */
+int
+ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f,
+ void *arg, uint16_t type)
+{
+ struct named_object *no, *no_tmp;
+ int i, ret;
+
+ for (i = 0; i < ni->nn_size; i++) {
+ TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) {
+ if (no->etlv != type)
+ continue;
+ ret = f(ni, no, arg);
+ if (ret != 0)
+ return (ret);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Removes index from given set.
+ * Returns 0 on success.
+ */
+int
+ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx)
+{
+ u_long *mask;
+ int i, v;
+
+ i = idx / BLOCK_ITEMS;
+ v = idx % BLOCK_ITEMS;
+
+ if (i >= ni->max_blocks)
+ return (1);
+
+ mask = &ni->idx_mask[i];
+
+ if ((*mask & ((u_long)1 << v)) != 0)
+ return (1);
+
+ /* Mark as free */
+ *mask |= (u_long)1 << v;
+
+ /* Update free offset */
+ if (ni->free_off[0] > i)
+ ni->free_off[0] = i;
+
+ return (0);
+}
+
+/*
+ * Allocate new index in given instance and stores in in @pidx.
+ * Returns 0 on success.
+ */
+int
+ipfw_objhash_alloc_idx(void *n, uint16_t *pidx)
+{
+ struct namedobj_instance *ni;
+ u_long *mask;
+ int i, off, v;
+
+ ni = (struct namedobj_instance *)n;
+
+ off = ni->free_off[0];
+ mask = &ni->idx_mask[off];
+
+ for (i = off; i < ni->max_blocks; i++, mask++) {
+ if ((v = ffsl(*mask)) == 0)
+ continue;
+
+ /* Mark as busy */
+ *mask &= ~ ((u_long)1 << (v - 1));
+
+ ni->free_off[0] = i;
+
+ v = BLOCK_ITEMS * i + v - 1;
+
+ *pidx = v;
+ return (0);
+ }
+
+ return (1);
+}
+
/* end of file */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.c b/freebsd/sys/netpfil/ipfw/ip_fw_table.c
index 71579795..9d2baad2 100644
--- a/freebsd/sys/netpfil/ipfw/ip_fw_table.c
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table.c
@@ -2,6 +2,8 @@
/*-
* Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -29,24 +31,18 @@
__FBSDID("$FreeBSD$");
/*
- * Lookup table support for ipfw
+ * Lookup table support for ipfw.
*
- * Lookup tables are implemented (at the moment) using the radix
- * tree used for routing tables. Tables store key-value entries, where
- * keys are network prefixes (addr/masklen), and values are integers.
- * As a degenerate case we can interpret keys as 32-bit integers
- * (with a /32 mask).
+ * This file contains handlers for all generic tables' operations:
+ * add/del/flush entries, list/dump tables etc..
*
- * The table is protected by the IPFW lock even for manipulation coming
- * from userland, because operations are typically fast.
+ * Table data modification is protected by both UH and runtime lock
+ * while reading configuration/data is protected by UH lock.
+ *
+ * Lookup algorithms for all table types are located in ip_fw_table_algo.c
*/
#include <rtems/bsd/local/opt_ipfw.h>
-#include <rtems/bsd/local/opt_inet.h>
-#ifndef INET
-#error IPFIREWALL requires INET.
-#endif /* INET */
-#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -54,713 +50,3296 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/queue.h>
#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
-#include <net/radix.h>
-#include <net/route.h>
-#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
#include <netinet/ip_fw.h>
#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
-#ifdef MAC
-#include <security/mac/mac_framework.h>
-#endif
+ /*
+ * Table has the following `type` concepts:
+ *
+ * `no.type` represents lookup key type (addr, ifp, uid, etc..)
+ * vmask represents bitmask of table values which are present at the moment.
+ * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
+ * single-value-for-all approach.
+ */
+struct table_config {
+ struct named_object no;
+ uint8_t tflags; /* type flags */
+ uint8_t locked; /* 1 if locked from changes */
+ uint8_t linked; /* 1 if already linked */
+ uint8_t ochanged; /* used by set swapping */
+ uint8_t vshared; /* 1 if using shared value array */
+ uint8_t spare[3];
+ uint32_t count; /* Number of records */
+ uint32_t limit; /* Max number of records */
+ uint32_t vmask; /* bitmask with supported values */
+ uint32_t ocount; /* used by set swapping */
+ uint64_t gencnt; /* generation count */
+ char tablename[64]; /* table name */
+ struct table_algo *ta; /* Callbacks for given algo */
+ void *astate; /* algorithm state */
+ struct table_info ti_copy; /* data to put to table_info */
+ struct namedobj_instance *vi;
+};
-MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
+ struct table_config **tc);
+static struct table_config *find_table(struct namedobj_instance *ni,
+ struct tid_info *ti);
+static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
+ struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
+static void free_table_config(struct namedobj_instance *ni,
+ struct table_config *tc);
+static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
+ char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
+static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
+static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
+static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
+#define OP_ADD 1
+#define OP_DEL 0
+static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
+ struct sockopt_data *sd);
+static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
+ ipfw_xtable_info *i);
+static int dump_table_tentry(void *e, void *arg);
+static int dump_table_xentry(void *e, void *arg);
+
+static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
+ struct tid_info *b);
+
+static int check_table_name(const char *name);
+static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
+ struct table_config *tc, struct table_info *ti, uint32_t count);
+static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
+
+static struct table_algo *find_table_algo(struct tables_config *tableconf,
+ struct tid_info *ti, char *name);
+
+static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
+static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
+
+#define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash)
+#define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k]))
+
+#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */
-struct table_entry {
- struct radix_node rn[2];
- struct sockaddr_in addr, mask;
- u_int32_t value;
-};
+void
+rollback_toperation_state(struct ip_fw_chain *ch, void *object)
+{
+ struct tables_config *tcfg;
+ struct op_state *os;
-struct xaddr_iface {
- uint8_t if_len; /* length of this struct */
- uint8_t pad[7]; /* Align name */
- char ifname[IF_NAMESIZE]; /* Interface name */
-};
+ tcfg = CHAIN_TO_TCFG(ch);
+ TAILQ_FOREACH(os, &tcfg->state_list, next)
+ os->func(object, os);
+}
+
+void
+add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
+{
+ struct tables_config *tcfg;
+
+ tcfg = CHAIN_TO_TCFG(ch);
+ TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
+}
+
+void
+del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
+{
+ struct tables_config *tcfg;
+
+ tcfg = CHAIN_TO_TCFG(ch);
+ TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
+}
+
+void
+tc_ref(struct table_config *tc)
+{
+
+ tc->no.refcnt++;
+}
+
+void
+tc_unref(struct table_config *tc)
+{
+
+ tc->no.refcnt--;
+}
+
+static struct table_value *
+get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
+{
+ struct table_value *pval;
+
+ pval = (struct table_value *)ch->valuestate;
+
+ return (&pval[kidx]);
+}
-struct table_xentry {
- struct radix_node rn[2];
- union {
-#ifdef INET6
- struct sockaddr_in6 addr6;
-#endif
- struct xaddr_iface iface;
- } a;
- union {
-#ifdef INET6
- struct sockaddr_in6 mask6;
-#endif
- struct xaddr_iface ifmask;
- } m;
- u_int32_t value;
-};
/*
- * The radix code expects addr and mask to be array of bytes,
- * with the first byte being the length of the array. rn_inithead
- * is called with the offset in bits of the lookup key within the
- * array. If we use a sockaddr_in as the underlying type,
- * sin_len is conveniently located at offset 0, sin_addr is at
- * offset 4 and normally aligned.
- * But for portability, let's avoid assumption and make the code explicit
+ * Checks if we're able to insert/update entry @tei into table
+ * w.r.t @tc limits.
+ * May alter @tei to indicate insertion error / insert
+ * options.
+ *
+ * Returns 0 if operation can be performed/
*/
-#define KEY_LEN(v) *((uint8_t *)&(v))
-#define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr))
+static int
+check_table_limit(struct table_config *tc, struct tentry_info *tei)
+{
+
+ if (tc->limit == 0 || tc->count < tc->limit)
+ return (0);
+
+ if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
+ /* Notify userland on error cause */
+ tei->flags |= TEI_FLAGS_LIMIT;
+ return (EFBIG);
+ }
+
+ /*
+ * We have UPDATE flag set.
+ * Permit updating record (if found),
+ * but restrict adding new one since we've
+ * already hit the limit.
+ */
+ tei->flags |= TEI_FLAGS_DONTADD;
+
+ return (0);
+}
+
/*
- * Do not require radix to compare more than actual IPv4/IPv6 address
+ * Convert algorithm callback return code into
+ * one of pre-defined states known by userland.
*/
-#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
-#define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr))
-#define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname))
+static void
+store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
+{
+ int flag;
-#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
-#define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr))
-#define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname))
+ flag = 0;
+ switch (error) {
+ case 0:
+ if (op == OP_ADD && num != 0)
+ flag = TEI_FLAGS_ADDED;
+ if (op == OP_DEL)
+ flag = TEI_FLAGS_DELETED;
+ break;
+ case ENOENT:
+ flag = TEI_FLAGS_NOTFOUND;
+ break;
+ case EEXIST:
+ flag = TEI_FLAGS_EXISTS;
+ break;
+ default:
+ flag = TEI_FLAGS_ERROR;
+ }
-#ifdef INET6
-static inline void
-ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+ tei->flags |= flag;
+}
+
+/*
+ * Creates and references table with default parameters.
+ * Saves table config, algo and allocated kidx info @ptc, @pta and
+ * @pkidx if non-zero.
+ * Used for table auto-creation to support old binaries.
+ *
+ * Returns 0 on success.
+ */
+static int
+create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
+ uint16_t *pkidx)
{
- uint32_t *cp;
+ ipfw_xtable_info xi;
+ int error;
+
+ memset(&xi, 0, sizeof(xi));
+ /* Set default value mask for legacy clients */
+ xi.vmask = IPFW_VTYPE_LEGACY;
+
+ error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
+ if (error != 0)
+ return (error);
- for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
- *cp++ = 0xFFFFFFFF;
- *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+ return (0);
+}
+
+/*
+ * Find and reference existing table optionally
+ * creating new one.
+ *
+ * Saves found table config into @ptc.
+ * Note function may drop/acquire UH_WLOCK.
+ * Returns 0 if table was found/created and referenced
+ * or non-zero return code.
+ */
+static int
+find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct tentry_info *tei, uint32_t count, int op,
+ struct table_config **ptc)
+{
+ struct namedobj_instance *ni;
+ struct table_config *tc;
+ uint16_t kidx;
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ni = CHAIN_TO_NI(ch);
+ tc = NULL;
+ if ((tc = find_table(ni, ti)) != NULL) {
+ /* check table type */
+ if (tc->no.subtype != ti->type)
+ return (EINVAL);
+
+ if (tc->locked != 0)
+ return (EACCES);
+
+ /* Try to exit early on limit hit */
+ if (op == OP_ADD && count == 1 &&
+ check_table_limit(tc, tei) != 0)
+ return (EFBIG);
+
+ /* Reference and return */
+ tc->no.refcnt++;
+ *ptc = tc;
+ return (0);
+ }
+
+ if (op == OP_DEL)
+ return (ESRCH);
+
+ /* Compatibility mode: create new table for old clients */
+ if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
+ return (ESRCH);
+
+ IPFW_UH_WUNLOCK(ch);
+ error = create_table_compat(ch, ti, &kidx);
+ IPFW_UH_WLOCK(ch);
+
+ if (error != 0)
+ return (error);
+
+ tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
+
+ /* OK, now we've got referenced table. */
+ *ptc = tc;
+ return (0);
+}
+
+/*
+ * Rolls back already @added to @tc entries using state array @ta_buf_m.
+ * Assume the following layout:
+ * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
+ * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
+ * for storing deleted state
+ */
+static void
+rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
+ struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
+ uint32_t count, uint32_t added)
+{
+ struct table_algo *ta;
+ struct tentry_info *ptei;
+ caddr_t v, vv;
+ size_t ta_buf_sz;
+ int error, i;
+ uint32_t num;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ta = tc->ta;
+ ta_buf_sz = ta->ta_buf_size;
+ v = ta_buf_m;
+ vv = v + count * ta_buf_sz;
+ for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
+ ptei = &tei[i];
+ if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
+
+ /*
+ * We have old value stored by previous
+ * call in @ptei->value. Do add once again
+ * to restore it.
+ */
+ error = ta->add(tc->astate, tinfo, ptei, v, &num);
+ KASSERT(error == 0, ("rollback UPDATE fail"));
+ KASSERT(num == 0, ("rollback UPDATE fail2"));
+ continue;
+ }
+
+ error = ta->prepare_del(ch, ptei, vv);
+ KASSERT(error == 0, ("pre-rollback INSERT failed"));
+ error = ta->del(tc->astate, tinfo, ptei, vv, &num);
+ KASSERT(error == 0, ("rollback INSERT failed"));
+ tc->count -= num;
+ }
+}
+
+/*
+ * Prepares add/del state for all @count entries in @tei.
+ * Uses either stack buffer (@ta_buf) or allocates a new one.
+ * Stores pointer to allocated buffer back to @ta_buf.
+ *
+ * Returns 0 on success.
+ */
+static int
+prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
+ struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
+{
+ caddr_t ta_buf_m, v;
+ size_t ta_buf_sz, sz;
+ struct tentry_info *ptei;
+ int error, i;
+
+ error = 0;
+ ta_buf_sz = ta->ta_buf_size;
+ if (count == 1) {
+ /* Sigle add/delete, use on-stack buffer */
+ memset(*ta_buf, 0, TA_BUF_SZ);
+ ta_buf_m = *ta_buf;
+ } else {
+
+ /*
+ * Multiple adds/deletes, allocate larger buffer
+ *
+ * Note we need 2xcount buffer for add case:
+ * we have hold both ADD state
+ * and DELETE state (this may be needed
+ * if we need to rollback all changes)
+ */
+ sz = count * ta_buf_sz;
+ ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
+ M_WAITOK | M_ZERO);
+ }
+
+ v = ta_buf_m;
+ for (i = 0; i < count; i++, v += ta_buf_sz) {
+ ptei = &tei[i];
+ error = (op == OP_ADD) ?
+ ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
+
+ /*
+ * Some syntax error (incorrect mask, or address, or
+ * anything). Return error regardless of atomicity
+ * settings.
+ */
+ if (error != 0)
+ break;
+ }
+
+ *ta_buf = ta_buf_m;
+ return (error);
}
-#endif
+/*
+ * Flushes allocated state for each @count entries in @tei.
+ * Frees @ta_buf_m if differs from stack buffer @ta_buf.
+ */
+static void
+flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
+ struct tentry_info *tei, uint32_t count, int rollback,
+ caddr_t ta_buf_m, caddr_t ta_buf)
+{
+ caddr_t v;
+ struct tentry_info *ptei;
+ size_t ta_buf_sz;
+ int i;
+
+ ta_buf_sz = ta->ta_buf_size;
+
+ /* Run cleaning callback anyway */
+ v = ta_buf_m;
+ for (i = 0; i < count; i++, v += ta_buf_sz) {
+ ptei = &tei[i];
+ ta->flush_entry(ch, ptei, v);
+ if (ptei->ptv != NULL) {
+ free(ptei->ptv, M_IPFW);
+ ptei->ptv = NULL;
+ }
+ }
+
+ /* Clean up "deleted" state in case of rollback */
+ if (rollback != 0) {
+ v = ta_buf_m + count * ta_buf_sz;
+ for (i = 0; i < count; i++, v += ta_buf_sz)
+ ta->flush_entry(ch, &tei[i], v);
+ }
+
+ if (ta_buf_m != ta_buf)
+ free(ta_buf_m, M_TEMP);
+}
+
+
+static void
+rollback_add_entry(void *object, struct op_state *_state)
+{
+ struct ip_fw_chain *ch;
+ struct tableop_state *ts;
+
+ ts = (struct tableop_state *)_state;
+
+ if (ts->tc != object && ts->ch != object)
+ return;
+
+ ch = ts->ch;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ /* Call specifid unlockers */
+ rollback_table_values(ts);
+
+ /* Indicate we've called */
+ ts->modified = 1;
+}
+
+/*
+ * Adds/updates one or more entries in table @ti.
+ *
+ * Function may drop/reacquire UH wlock multiple times due to
+ * items alloc, algorithm callbacks (check_space), value linkage
+ * (new values, value storage realloc), etc..
+ * Other processes like other adds (which may involve storage resize),
+ * table swaps (which changes table data and may change algo type),
+ * table modify (which may change value mask) may be executed
+ * simultaneously so we need to deal with it.
+ *
+ * The following approach was implemented:
+ * we have per-chain linked list, protected with UH lock.
+ * add_table_entry prepares special on-stack structure wthich is passed
+ * to its descendants. Users add this structure to this list before unlock.
+ * After performing needed operations and acquiring UH lock back, each user
+ * checks if structure has changed. If true, it rolls local state back and
+ * returns without error to the caller.
+ * add_table_entry() on its own checks if structure has changed and restarts
+ * its operation from the beginning (goto restart).
+ *
+ * Functions which are modifying fields of interest (currently
+ * resize_shared_value_storage() and swap_tables() )
+ * traverses given list while holding UH lock immediately before
+ * performing their operations calling function provided be list entry
+ * ( currently rollback_add_entry ) which performs rollback for all necessary
+ * state and sets appropriate values in structure indicating rollback
+ * has happened.
+ *
+ * Algo interaction:
+ * Function references @ti first to ensure table won't
+ * disappear or change its type.
+ * After that, prepare_add callback is called for each @tei entry.
+ * Next, we try to add each entry under UH+WHLOCK
+ * using add() callback.
+ * Finally, we free all state by calling flush_entry callback
+ * for each @tei.
+ *
+ * Returns 0 on success.
+ */
int
-ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value)
-{
- struct radix_node_head *rnh, **rnh_ptr;
- struct table_entry *ent;
- struct table_xentry *xent;
- struct radix_node *rn;
- in_addr_t addr;
- int offset;
- void *ent_ptr;
- struct sockaddr *addr_ptr, *mask_ptr;
- char c;
-
- if (tbl >= V_fw_tables_max)
- return (EINVAL);
+add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct tentry_info *tei, uint8_t flags, uint32_t count)
+{
+ struct table_config *tc;
+ struct table_algo *ta;
+ uint16_t kidx;
+ int error, first_error, i, rollback;
+ uint32_t num, numadd;
+ struct tentry_info *ptei;
+ struct tableop_state ts;
+ char ta_buf[TA_BUF_SZ];
+ caddr_t ta_buf_m, v;
+
+ memset(&ts, 0, sizeof(ts));
+ ta = NULL;
+ IPFW_UH_WLOCK(ch);
- switch (type) {
- case IPFW_TABLE_CIDR:
- if (plen == sizeof(in_addr_t)) {
-#ifdef INET
- /* IPv4 case */
- if (mlen > 32)
- return (EINVAL);
- ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
- ent->value = value;
- /* Set 'total' structure length */
- KEY_LEN(ent->addr) = KEY_LEN_INET;
- KEY_LEN(ent->mask) = KEY_LEN_INET;
- /* Set offset of IPv4 address in bits */
- offset = OFF_LEN_INET;
- ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
- addr = *((in_addr_t *)paddr);
- ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
- /* Set pointers */
- rnh_ptr = &ch->tables[tbl];
- ent_ptr = ent;
- addr_ptr = (struct sockaddr *)&ent->addr;
- mask_ptr = (struct sockaddr *)&ent->mask;
-#endif
-#ifdef INET6
- } else if (plen == sizeof(struct in6_addr)) {
- /* IPv6 case */
- if (mlen > 128)
- return (EINVAL);
- xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
- xent->value = value;
- /* Set 'total' structure length */
- KEY_LEN(xent->a.addr6) = KEY_LEN_INET6;
- KEY_LEN(xent->m.mask6) = KEY_LEN_INET6;
- /* Set offset of IPv6 address in bits */
- offset = OFF_LEN_INET6;
- ipv6_writemask(&xent->m.mask6.sin6_addr, mlen);
- memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr));
- APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr);
- /* Set pointers */
- rnh_ptr = &ch->xtables[tbl];
- ent_ptr = xent;
- addr_ptr = (struct sockaddr *)&xent->a.addr6;
- mask_ptr = (struct sockaddr *)&xent->m.mask6;
-#endif
- } else {
- /* Unknown CIDR type */
- return (EINVAL);
+ /*
+ * Find and reference existing table.
+ */
+restart:
+ if (ts.modified != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ flush_batch_buffer(ch, ta, tei, count, rollback,
+ ta_buf_m, ta_buf);
+ memset(&ts, 0, sizeof(ts));
+ ta = NULL;
+ IPFW_UH_WLOCK(ch);
+ }
+
+ error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
+ if (error != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (error);
+ }
+ ta = tc->ta;
+
+ /* Fill in tablestate */
+ ts.ch = ch;
+ ts.opstate.func = rollback_add_entry;
+ ts.tc = tc;
+ ts.vshared = tc->vshared;
+ ts.vmask = tc->vmask;
+ ts.ta = ta;
+ ts.tei = tei;
+ ts.count = count;
+ rollback = 0;
+ add_toperation_state(ch, &ts);
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Allocate memory and prepare record(s) */
+ /* Pass stack buffer by default */
+ ta_buf_m = ta_buf;
+ error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
+
+ IPFW_UH_WLOCK(ch);
+ del_toperation_state(ch, &ts);
+ /* Drop reference we've used in first search */
+ tc->no.refcnt--;
+
+ /* Check prepare_batch_buffer() error */
+ if (error != 0)
+ goto cleanup;
+
+ /*
+ * Check if table swap has happened.
+ * (so table algo might be changed).
+ * Restart operation to achieve consistent behavior.
+ */
+ if (ts.modified != 0)
+ goto restart;
+
+ /*
+ * Link all values values to shared/per-table value array.
+ *
+ * May release/reacquire UH_WLOCK.
+ */
+ error = ipfw_link_table_values(ch, &ts);
+ if (error != 0)
+ goto cleanup;
+ if (ts.modified != 0)
+ goto restart;
+
+ /*
+ * Ensure we are able to add all entries without additional
+ * memory allocations. May release/reacquire UH_WLOCK.
+ */
+ kidx = tc->no.kidx;
+ error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
+ if (error != 0)
+ goto cleanup;
+ if (ts.modified != 0)
+ goto restart;
+
+ /* We've got valid table in @tc. Let's try to add data */
+ kidx = tc->no.kidx;
+ ta = tc->ta;
+ numadd = 0;
+ first_error = 0;
+
+ IPFW_WLOCK(ch);
+
+ v = ta_buf_m;
+ for (i = 0; i < count; i++, v += ta->ta_buf_size) {
+ ptei = &tei[i];
+ num = 0;
+ /* check limit before adding */
+ if ((error = check_table_limit(tc, ptei)) == 0) {
+ error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
+ ptei, v, &num);
+ /* Set status flag to inform userland */
+ store_tei_result(ptei, OP_ADD, error, num);
}
+ if (error == 0) {
+ /* Update number of records to ease limit checking */
+ tc->count += num;
+ numadd += num;
+ continue;
+ }
+
+ if (first_error == 0)
+ first_error = error;
+
+ /*
+ * Some error have happened. Check our atomicity
+ * settings: continue if atomicity is not required,
+ * rollback changes otherwise.
+ */
+ if ((flags & IPFW_CTF_ATOMIC) == 0)
+ continue;
+
+ rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
+ tei, ta_buf_m, count, i);
+
+ rollback = 1;
break;
+ }
+
+ IPFW_WUNLOCK(ch);
+
+ ipfw_garbage_table_values(ch, tc, tei, count, rollback);
+
+ /* Permit post-add algorithm grow/rehash. */
+ if (numadd != 0)
+ check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
+
+ /* Return first error to user, if any */
+ error = first_error;
+
+cleanup:
+ IPFW_UH_WUNLOCK(ch);
+
+ flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
- case IPFW_TABLE_INTERFACE:
- /* Check if string is terminated */
- c = ((char *)paddr)[IF_NAMESIZE - 1];
- ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
- if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
- return (EINVAL);
+ return (error);
+}
- /* Include last \0 into comparison */
- mlen++;
-
- xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
- xent->value = value;
- /* Set 'total' structure length */
- KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen;
- KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen;
- /* Set offset of interface name in bits */
- offset = OFF_LEN_IFACE;
- memcpy(xent->a.iface.ifname, paddr, mlen);
- /* Assume direct match */
- /* TODO: Add interface pattern matching */
-#if 0
- memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE);
- mask_ptr = (struct sockaddr *)&xent->m.ifmask;
-#endif
- /* Set pointers */
- rnh_ptr = &ch->xtables[tbl];
- ent_ptr = xent;
- addr_ptr = (struct sockaddr *)&xent->a.iface;
- mask_ptr = NULL;
- break;
+/*
+ * Deletes one or more entries in table @ti.
+ *
+ * Returns 0 on success.
+ */
+int
+del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct tentry_info *tei, uint8_t flags, uint32_t count)
+{
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct tentry_info *ptei;
+ uint16_t kidx;
+ int error, first_error, i;
+ uint32_t num, numdel;
+ char ta_buf[TA_BUF_SZ];
+ caddr_t ta_buf_m, v;
- default:
- return (EINVAL);
+ /*
+ * Find and reference existing table.
+ */
+ IPFW_UH_WLOCK(ch);
+ error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
+ if (error != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (error);
+ }
+ ta = tc->ta;
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Allocate memory and prepare record(s) */
+ /* Pass stack buffer by default */
+ ta_buf_m = ta_buf;
+ error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
+ if (error != 0)
+ goto cleanup;
+
+ IPFW_UH_WLOCK(ch);
+
+ /* Drop reference we've used in first search */
+ tc->no.refcnt--;
+
+ /*
+ * Check if table algo is still the same.
+ * (changed ta may be the result of table swap).
+ */
+ if (ta != tc->ta) {
+ IPFW_UH_WUNLOCK(ch);
+ error = EINVAL;
+ goto cleanup;
}
+ kidx = tc->no.kidx;
+ numdel = 0;
+ first_error = 0;
+
IPFW_WLOCK(ch);
+ v = ta_buf_m;
+ for (i = 0; i < count; i++, v += ta->ta_buf_size) {
+ ptei = &tei[i];
+ num = 0;
+ error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
+ &num);
+ /* Save state for userland */
+ store_tei_result(ptei, OP_DEL, error, num);
+ if (error != 0 && first_error == 0)
+ first_error = error;
+ tc->count -= num;
+ numdel += num;
+ }
+ IPFW_WUNLOCK(ch);
- /* Check if tabletype is valid */
- if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) {
- IPFW_WUNLOCK(ch);
- free(ent_ptr, M_IPFW_TBL);
- return (EINVAL);
+ /* Unlink non-used values */
+ ipfw_garbage_table_values(ch, tc, tei, count, 0);
+
+ if (numdel != 0) {
+ /* Run post-del hook to permit shrinking */
+ check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
}
- /* Check if radix tree exists */
- if ((rnh = *rnh_ptr) == NULL) {
- IPFW_WUNLOCK(ch);
- /* Create radix for a new table */
- if (!rn_inithead((void **)&rnh, offset)) {
- free(ent_ptr, M_IPFW_TBL);
- return (ENOMEM);
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Return first error to user, if any */
+ error = first_error;
+
+cleanup:
+ flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
+
+ return (error);
+}
+
+/*
+ * Ensure that table @tc has enough space to add @count entries without
+ * need for reallocation.
+ *
+ * Callbacks order:
+ * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
+ *
+ * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
+ * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
+ * 3) modify (UH_WLOCK + WLOCK) - switch pointers
+ * 4) flush_modify (UH_WLOCK) - free state, if needed
+ *
+ * Returns 0 on success.
+ */
+static int
+check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
+ struct table_config *tc, struct table_info *ti, uint32_t count)
+{
+ struct table_algo *ta;
+ uint64_t pflags;
+ char ta_buf[TA_BUF_SZ];
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ error = 0;
+ ta = tc->ta;
+ if (ta->need_modify == NULL)
+ return (0);
+
+ /* Acquire reference not to loose @tc between locks/unlocks */
+ tc->no.refcnt++;
+
+ /*
+ * TODO: think about avoiding race between large add/large delete
+ * operation on algorithm which implements shrinking along with
+ * growing.
+ */
+ while (true) {
+ pflags = 0;
+ if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
+ error = 0;
+ break;
}
- IPFW_WLOCK(ch);
- if (*rnh_ptr != NULL) {
- /* Tree is already attached by other thread */
- rn_detachhead((void **)&rnh);
- rnh = *rnh_ptr;
- /* Check table type another time */
- if (ch->tabletype[tbl] != type) {
- IPFW_WUNLOCK(ch);
- free(ent_ptr, M_IPFW_TBL);
- return (EINVAL);
- }
- } else {
- *rnh_ptr = rnh;
- /*
- * Set table type. It can be set already
- * (if we have IPv6-only table) but setting
- * it another time does not hurt
+ /* We have to shrink/grow table */
+ if (ts != NULL)
+ add_toperation_state(ch, ts);
+ IPFW_UH_WUNLOCK(ch);
+
+ memset(&ta_buf, 0, sizeof(ta_buf));
+ error = ta->prepare_mod(ta_buf, &pflags);
+
+ IPFW_UH_WLOCK(ch);
+ if (ts != NULL)
+ del_toperation_state(ch, ts);
+
+ if (error != 0)
+ break;
+
+ if (ts != NULL && ts->modified != 0) {
+
+ /*
+ * Swap operation has happened
+ * so we're currently operating on other
+ * table data. Stop doing this.
+ */
+ ta->flush_mod(ta_buf);
+ break;
+ }
+
+ /* Check if we still need to alter table */
+ ti = KIDX_TO_TI(ch, tc->no.kidx);
+ if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
+ IPFW_UH_WUNLOCK(ch);
+
+ /*
+ * Other thread has already performed resize.
+ * Flush our state and return.
*/
- ch->tabletype[tbl] = type;
+ ta->flush_mod(ta_buf);
+ break;
+ }
+
+ error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
+ if (error == 0) {
+ /* Do actual modification */
+ IPFW_WLOCK(ch);
+ ta->modify(tc->astate, ti, ta_buf, pflags);
+ IPFW_WUNLOCK(ch);
}
+
+ /* Anyway, flush data and retry */
+ ta->flush_mod(ta_buf);
}
- rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr);
- IPFW_WUNLOCK(ch);
+ tc->no.refcnt--;
+ return (error);
+}
- if (rn == NULL) {
- free(ent_ptr, M_IPFW_TBL);
- return (EEXIST);
+/*
+ * Adds or deletes record in table.
+ * Data layout (v0):
+ * Request: [ ip_fw3_opheader ipfw_table_xentry ]
+ *
+ * Returns 0 on success
+ */
+static int
+manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_table_xentry *xent;
+ struct tentry_info tei;
+ struct tid_info ti;
+ struct table_value v;
+ int error, hdrlen, read;
+
+ hdrlen = offsetof(ipfw_table_xentry, k);
+
+ /* Check minimum header size */
+ if (sd->valsize < (sizeof(*op3) + hdrlen))
+ return (EINVAL);
+
+ read = sizeof(ip_fw3_opheader);
+
+ /* Check if xentry len field is valid */
+ xent = (ipfw_table_xentry *)(op3 + 1);
+ if (xent->len < hdrlen || xent->len + read > sd->valsize)
+ return (EINVAL);
+
+ memset(&tei, 0, sizeof(tei));
+ tei.paddr = &xent->k;
+ tei.masklen = xent->masklen;
+ ipfw_import_table_value_legacy(xent->value, &v);
+ tei.pvalue = &v;
+ /* Old requests compatibility */
+ tei.flags = TEI_FLAGS_COMPAT;
+ if (xent->type == IPFW_TABLE_ADDR) {
+ if (xent->len - hdrlen == sizeof(in_addr_t))
+ tei.subtype = AF_INET;
+ else
+ tei.subtype = AF_INET6;
}
- return (0);
+
+ memset(&ti, 0, sizeof(ti));
+ ti.uidx = xent->tbl;
+ ti.type = xent->type;
+
+ error = (op3->opcode == IP_FW_TABLE_XADD) ?
+ add_table_entry(ch, &ti, &tei, 0, 1) :
+ del_table_entry(ch, &ti, &tei, 0, 1);
+
+ return (error);
}
-int
-ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type)
+/*
+ * Adds or deletes record in table.
+ * Data layout (v1)(current):
+ * Request: [ ipfw_obj_header
+ * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
+ * ]
+ *
+ * Returns 0 on success
+ */
+static int
+manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- struct radix_node_head *rnh, **rnh_ptr;
- struct table_entry *ent;
- in_addr_t addr;
- struct sockaddr_in sa, mask;
- struct sockaddr *sa_ptr, *mask_ptr;
- char c;
+ ipfw_obj_tentry *tent, *ptent;
+ ipfw_obj_ctlv *ctlv;
+ ipfw_obj_header *oh;
+ struct tentry_info *ptei, tei, *tei_buf;
+ struct tid_info ti;
+ int error, i, kidx, read;
+
+ /* Check minimum header size */
+ if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
+ return (EINVAL);
- if (tbl >= V_fw_tables_max)
+ /* Check if passed data is too long */
+ if (sd->valsize != sd->kavail)
return (EINVAL);
- switch (type) {
- case IPFW_TABLE_CIDR:
- if (plen == sizeof(in_addr_t)) {
- /* Set 'total' structure length */
- KEY_LEN(sa) = KEY_LEN_INET;
- KEY_LEN(mask) = KEY_LEN_INET;
- mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
- addr = *((in_addr_t *)paddr);
- sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
- rnh_ptr = &ch->tables[tbl];
- sa_ptr = (struct sockaddr *)&sa;
- mask_ptr = (struct sockaddr *)&mask;
-#ifdef INET6
- } else if (plen == sizeof(struct in6_addr)) {
- /* IPv6 case */
- if (mlen > 128)
- return (EINVAL);
- struct sockaddr_in6 sa6, mask6;
- memset(&sa6, 0, sizeof(struct sockaddr_in6));
- memset(&mask6, 0, sizeof(struct sockaddr_in6));
- /* Set 'total' structure length */
- KEY_LEN(sa6) = KEY_LEN_INET6;
- KEY_LEN(mask6) = KEY_LEN_INET6;
- ipv6_writemask(&mask6.sin6_addr, mlen);
- memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
- APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr);
- rnh_ptr = &ch->xtables[tbl];
- sa_ptr = (struct sockaddr *)&sa6;
- mask_ptr = (struct sockaddr *)&mask6;
-#endif
- } else {
- /* Unknown CIDR type */
- return (EINVAL);
- }
- break;
+ oh = (ipfw_obj_header *)sd->kbuf;
- case IPFW_TABLE_INTERFACE:
- /* Check if string is terminated */
- c = ((char *)paddr)[IF_NAMESIZE - 1];
- ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
- if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
- return (EINVAL);
+ /* Basic length checks for TLVs */
+ if (oh->ntlv.head.length != sizeof(oh->ntlv))
+ return (EINVAL);
- struct xaddr_iface ifname, ifmask;
- memset(&ifname, 0, sizeof(ifname));
-
- /* Include last \0 into comparison */
- mlen++;
-
- /* Set 'total' structure length */
- KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
- KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
- /* Assume direct match */
- /* FIXME: Add interface pattern matching */
-#if 0
- memset(ifmask.ifname, 0xFF, IF_NAMESIZE);
- mask_ptr = (struct sockaddr *)&ifmask;
-#endif
- mask_ptr = NULL;
- memcpy(ifname.ifname, paddr, mlen);
- /* Set pointers */
- rnh_ptr = &ch->xtables[tbl];
- sa_ptr = (struct sockaddr *)&ifname;
+ read = sizeof(*oh);
- break;
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ if (ctlv->head.length + read != sd->valsize)
+ return (EINVAL);
- default:
+ read += sizeof(*ctlv);
+ tent = (ipfw_obj_tentry *)(ctlv + 1);
+ if (ctlv->count * sizeof(*tent) + read != sd->valsize)
return (EINVAL);
+
+ if (ctlv->count == 0)
+ return (0);
+
+ /*
+ * Mark entire buffer as "read".
+ * This instructs sopt api write it back
+ * after function return.
+ */
+ ipfw_get_sopt_header(sd, sd->valsize);
+
+ /* Perform basic checks for each entry */
+ ptent = tent;
+ kidx = tent->idx;
+ for (i = 0; i < ctlv->count; i++, ptent++) {
+ if (ptent->head.length != sizeof(*ptent))
+ return (EINVAL);
+ if (ptent->idx != kidx)
+ return (ENOTSUP);
}
- IPFW_WLOCK(ch);
- if ((rnh = *rnh_ptr) == NULL) {
- IPFW_WUNLOCK(ch);
+ /* Convert data into kernel request objects */
+ objheader_to_ti(oh, &ti);
+ ti.type = oh->ntlv.type;
+ ti.uidx = kidx;
+
+ /* Use on-stack buffer for single add/del */
+ if (ctlv->count == 1) {
+ memset(&tei, 0, sizeof(tei));
+ tei_buf = &tei;
+ } else
+ tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
+ M_WAITOK | M_ZERO);
+
+ ptei = tei_buf;
+ ptent = tent;
+ for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
+ ptei->paddr = &ptent->k;
+ ptei->subtype = ptent->subtype;
+ ptei->masklen = ptent->masklen;
+ if (ptent->head.flags & IPFW_TF_UPDATE)
+ ptei->flags |= TEI_FLAGS_UPDATE;
+
+ ipfw_import_table_value_v1(&ptent->v.value);
+ ptei->pvalue = (struct table_value *)&ptent->v.value;
+ }
+
+ error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
+ add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
+ del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
+
+ /* Translate result back to userland */
+ ptei = tei_buf;
+ ptent = tent;
+ for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
+ if (ptei->flags & TEI_FLAGS_ADDED)
+ ptent->result = IPFW_TR_ADDED;
+ else if (ptei->flags & TEI_FLAGS_DELETED)
+ ptent->result = IPFW_TR_DELETED;
+ else if (ptei->flags & TEI_FLAGS_UPDATED)
+ ptent->result = IPFW_TR_UPDATED;
+ else if (ptei->flags & TEI_FLAGS_LIMIT)
+ ptent->result = IPFW_TR_LIMIT;
+ else if (ptei->flags & TEI_FLAGS_ERROR)
+ ptent->result = IPFW_TR_ERROR;
+ else if (ptei->flags & TEI_FLAGS_NOTFOUND)
+ ptent->result = IPFW_TR_NOTFOUND;
+ else if (ptei->flags & TEI_FLAGS_EXISTS)
+ ptent->result = IPFW_TR_EXISTS;
+ ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
+ }
+
+ if (tei_buf != &tei)
+ free(tei_buf, M_TEMP);
+
+ return (error);
+}
+
+/*
+ * Looks up an entry in given table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_tentry ]
+ * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
+ *
+ * Returns 0 on success
+ */
+static int
+find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_tentry *tent;
+ ipfw_obj_header *oh;
+ struct tid_info ti;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct table_info *kti;
+ struct namedobj_instance *ni;
+ int error;
+ size_t sz;
+
+ /* Check minimum header size */
+ sz = sizeof(*oh) + sizeof(*tent);
+ if (sd->valsize != sz)
+ return (EINVAL);
+
+ oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ tent = (ipfw_obj_tentry *)(oh + 1);
+
+ /* Basic length checks for TLVs */
+ if (oh->ntlv.head.length != sizeof(oh->ntlv))
+ return (EINVAL);
+
+ objheader_to_ti(oh, &ti);
+ ti.type = oh->ntlv.type;
+ ti.uidx = tent->idx;
+
+ IPFW_UH_RLOCK(ch);
+ ni = CHAIN_TO_NI(ch);
+
+ /*
+ * Find existing table and check its type .
+ */
+ ta = NULL;
+ if ((tc = find_table(ni, &ti)) == NULL) {
+ IPFW_UH_RUNLOCK(ch);
return (ESRCH);
}
- if (ch->tabletype[tbl] != type) {
- IPFW_WUNLOCK(ch);
+ /* check table type */
+ if (tc->no.subtype != ti.type) {
+ IPFW_UH_RUNLOCK(ch);
return (EINVAL);
}
- ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
- IPFW_WUNLOCK(ch);
+ kti = KIDX_TO_TI(ch, tc->no.kidx);
+ ta = tc->ta;
- if (ent == NULL)
- return (ESRCH);
+ if (ta->find_tentry == NULL)
+ return (ENOTSUP);
- free(ent, M_IPFW_TBL);
- return (0);
+ error = ta->find_tentry(tc->astate, kti, tent);
+
+ IPFW_UH_RUNLOCK(ch);
+
+ return (error);
}
+/*
+ * Flushes all entries or destroys given table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
static int
-flush_table_entry(struct radix_node *rn, void *arg)
+flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- struct radix_node_head * const rnh = arg;
- struct table_entry *ent;
+ int error;
+ struct _ipfw_obj_header *oh;
+ struct tid_info ti;
- ent = (struct table_entry *)
- rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
- if (ent != NULL)
- free(ent, M_IPFW_TBL);
- return (0);
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (struct _ipfw_obj_header *)op3;
+ objheader_to_ti(oh, &ti);
+
+ if (op3->opcode == IP_FW_TABLE_XDESTROY)
+ error = destroy_table(ch, &ti);
+ else if (op3->opcode == IP_FW_TABLE_XFLUSH)
+ error = flush_table(ch, &ti);
+ else
+ return (ENOTSUP);
+
+ return (error);
}
+static void
+restart_flush(void *object, struct op_state *_state)
+{
+ struct tableop_state *ts;
+
+ ts = (struct tableop_state *)_state;
+
+ if (ts->tc != object)
+ return;
+
+ /* Indicate we've called */
+ ts->modified = 1;
+}
+
+/*
+ * Flushes given table.
+ *
+ * Function create new table instance with the same
+ * parameters, swaps it with old one and
+ * flushes state without holding runtime WLOCK.
+ *
+ * Returns 0 on success.
+ */
int
-ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
+flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
{
- struct radix_node_head *rnh, *xrnh;
+ struct namedobj_instance *ni;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct table_info ti_old, ti_new, *tablestate;
+ void *astate_old, *astate_new;
+ char algostate[64], *pstate;
+ struct tableop_state ts;
+ int error, need_gc;
+ uint16_t kidx;
+ uint8_t tflags;
- if (tbl >= V_fw_tables_max)
- return (EINVAL);
+ /*
+ * Stage 1: save table algorithm.
+ * Reference found table to ensure it won't disappear.
+ */
+ IPFW_UH_WLOCK(ch);
+ ni = CHAIN_TO_NI(ch);
+ if ((tc = find_table(ni, ti)) == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ need_gc = 0;
+ astate_new = NULL;
+ memset(&ti_new, 0, sizeof(ti_new));
+restart:
+ /* Set up swap handler */
+ memset(&ts, 0, sizeof(ts));
+ ts.opstate.func = restart_flush;
+ ts.tc = tc;
+
+ ta = tc->ta;
+ /* Do not flush readonly tables */
+ if ((ta->flags & TA_FLAG_READONLY) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EACCES);
+ }
+ /* Save startup algo parameters */
+ if (ta->print_config != NULL) {
+ ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
+ algostate, sizeof(algostate));
+ pstate = algostate;
+ } else
+ pstate = NULL;
+ tflags = tc->tflags;
+ tc->no.refcnt++;
+ add_toperation_state(ch, &ts);
+ IPFW_UH_WUNLOCK(ch);
+
+ /*
+ * Stage 1.5: if this is not the first attempt, destroy previous state
+ */
+ if (need_gc != 0) {
+ ta->destroy(astate_new, &ti_new);
+ need_gc = 0;
+ }
/*
- * We free both (IPv4 and extended) radix trees and
- * clear table type here to permit table to be reused
- * for different type without module reload
+ * Stage 2: allocate new table instance using same algo.
*/
+ memset(&ti_new, 0, sizeof(struct table_info));
+ error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
+
+ /*
+ * Stage 3: swap old state pointers with newly-allocated ones.
+ * Decrease refcount.
+ */
+ IPFW_UH_WLOCK(ch);
+ tc->no.refcnt--;
+ del_toperation_state(ch, &ts);
+
+ if (error != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (error);
+ }
+
+ /*
+ * Restart operation if table swap has happened:
+ * even if algo may be the same, algo init parameters
+ * may change. Restart operation instead of doing
+ * complex checks.
+ */
+ if (ts.modified != 0) {
+ /* Delay destroying data since we're holding UH lock */
+ need_gc = 1;
+ goto restart;
+ }
+
+ ni = CHAIN_TO_NI(ch);
+ kidx = tc->no.kidx;
+ tablestate = (struct table_info *)ch->tablestate;
IPFW_WLOCK(ch);
- /* Set IPv4 table pointer to zero */
- if ((rnh = ch->tables[tbl]) != NULL)
- ch->tables[tbl] = NULL;
- /* Set extended table pointer to zero */
- if ((xrnh = ch->xtables[tbl]) != NULL)
- ch->xtables[tbl] = NULL;
- /* Zero table type */
- ch->tabletype[tbl] = 0;
+ ti_old = tablestate[kidx];
+ tablestate[kidx] = ti_new;
IPFW_WUNLOCK(ch);
- if (rnh != NULL) {
- rnh->rnh_walktree(rnh, flush_table_entry, rnh);
- rn_detachhead((void **)&rnh);
+ astate_old = tc->astate;
+ tc->astate = astate_new;
+ tc->ti_copy = ti_new;
+ tc->count = 0;
+
+ /* Notify algo on real @ti address */
+ if (ta->change_ti != NULL)
+ ta->change_ti(tc->astate, &tablestate[kidx]);
+
+ /*
+ * Stage 4: unref values.
+ */
+ ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
+ IPFW_UH_WUNLOCK(ch);
+
+ /*
+ * Stage 5: perform real flush/destroy.
+ */
+ ta->destroy(astate_old, &ti_old);
+
+ return (0);
+}
+
+/*
+ * Swaps two tables.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
+ *
+ * Returns 0 on success
+ */
+static int
+swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ int error;
+ struct _ipfw_obj_header *oh;
+ struct tid_info ti_a, ti_b;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
+ return (EINVAL);
+
+ oh = (struct _ipfw_obj_header *)op3;
+ ntlv_to_ti(&oh->ntlv, &ti_a);
+ ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
+
+ error = swap_tables(ch, &ti_a, &ti_b);
+
+ return (error);
+}
+
+/*
+ * Swaps two tables of the same type/valtype.
+ *
+ * Checks if tables are compatible and limits
+ * permits swap, than actually perform swap.
+ *
+ * Each table consists of 2 different parts:
+ * config:
+ * @tc (with name, set, kidx) and rule bindings, which is "stable".
+ * number of items
+ * table algo
+ * runtime:
+ * runtime data @ti (ch->tablestate)
+ * runtime cache in @tc
+ * algo-specific data (@tc->astate)
+ *
+ * So we switch:
+ * all runtime data
+ * number of items
+ * table algo
+ *
+ * After that we call @ti change handler for each table.
+ *
+ * Note that referencing @tc won't protect tc->ta from change.
+ * XXX: Do we need to restrict swap between locked tables?
+ * XXX: Do we need to exchange ftype?
+ *
+ * Returns 0 on success.
+ */
+static int
+swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
+ struct tid_info *b)
+{
+ struct namedobj_instance *ni;
+ struct table_config *tc_a, *tc_b;
+ struct table_algo *ta;
+ struct table_info ti, *tablestate;
+ void *astate;
+ uint32_t count;
+
+ /*
+ * Stage 1: find both tables and ensure they are of
+ * the same type.
+ */
+ IPFW_UH_WLOCK(ch);
+ ni = CHAIN_TO_NI(ch);
+ if ((tc_a = find_table(ni, a)) == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ if ((tc_b = find_table(ni, b)) == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ /* It is very easy to swap between the same table */
+ if (tc_a == tc_b) {
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+ }
+
+ /* Check type and value are the same */
+ if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EINVAL);
}
- if (xrnh != NULL) {
- xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh);
- rn_detachhead((void **)&xrnh);
+ /* Check limits before swap */
+ if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
+ (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EFBIG);
}
+ /* Check if one of the tables is readonly */
+ if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EACCES);
+ }
+
+ /* Notify we're going to swap */
+ rollback_toperation_state(ch, tc_a);
+ rollback_toperation_state(ch, tc_b);
+
+ /* Everything is fine, prepare to swap */
+ tablestate = (struct table_info *)ch->tablestate;
+ ti = tablestate[tc_a->no.kidx];
+ ta = tc_a->ta;
+ astate = tc_a->astate;
+ count = tc_a->count;
+
+ IPFW_WLOCK(ch);
+ /* a <- b */
+ tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
+ tc_a->ta = tc_b->ta;
+ tc_a->astate = tc_b->astate;
+ tc_a->count = tc_b->count;
+ /* b <- a */
+ tablestate[tc_b->no.kidx] = ti;
+ tc_b->ta = ta;
+ tc_b->astate = astate;
+ tc_b->count = count;
+ IPFW_WUNLOCK(ch);
+
+ /* Ensure tc.ti copies are in sync */
+ tc_a->ti_copy = tablestate[tc_a->no.kidx];
+ tc_b->ti_copy = tablestate[tc_b->no.kidx];
+
+ /* Notify both tables on @ti change */
+ if (tc_a->ta->change_ti != NULL)
+ tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
+ if (tc_b->ta->change_ti != NULL)
+ tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
+
+ IPFW_UH_WUNLOCK(ch);
+
return (0);
}
-void
-ipfw_destroy_tables(struct ip_fw_chain *ch)
+/*
+ * Destroys table specified by @ti.
+ * Data layout (v0)(current):
+ * Request: [ ip_fw3_opheader ]
+ *
+ * Returns 0 on success
+ */
+static int
+destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
{
- uint16_t tbl;
+ struct namedobj_instance *ni;
+ struct table_config *tc;
- /* Flush all tables */
- for (tbl = 0; tbl < V_fw_tables_max; tbl++)
- ipfw_flush_table(ch, tbl);
+ IPFW_UH_WLOCK(ch);
- /* Free pointers itself */
- free(ch->tables, M_IPFW);
- free(ch->xtables, M_IPFW);
- free(ch->tabletype, M_IPFW);
+ ni = CHAIN_TO_NI(ch);
+ if ((tc = find_table(ni, ti)) == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ /* Do not permit destroying referenced tables */
+ if (tc->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ unlink_table(ch, tc);
+ IPFW_WUNLOCK(ch);
+
+ /* Free obj index */
+ if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
+ printf("Error unlinking kidx %d from table %s\n",
+ tc->no.kidx, tc->tablename);
+
+ /* Unref values used in tables while holding UH lock */
+ ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
+ IPFW_UH_WUNLOCK(ch);
+
+ free_table_config(ni, tc);
+
+ return (0);
}
-int
-ipfw_init_tables(struct ip_fw_chain *ch)
+static uint32_t
+roundup2p(uint32_t v)
{
- /* Allocate pointers */
- ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
- return (0);
+
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v++;
+
+ return (v);
}
+/*
+ * Grow tables index.
+ *
+ * Returns 0 on success.
+ */
int
ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
{
- struct radix_node_head **tables, **xtables, *rnh;
- struct radix_node_head **tables_old, **xtables_old;
- uint8_t *tabletype, *tabletype_old;
unsigned int ntables_old, tbl;
+ struct namedobj_instance *ni;
+ void *new_idx, *old_tablestate, *tablestate;
+ struct table_info *ti;
+ struct table_config *tc;
+ int i, new_blocks;
/* Check new value for validity */
+ if (ntables == 0)
+ return (EINVAL);
if (ntables > IPFW_TABLES_MAX)
ntables = IPFW_TABLES_MAX;
+ /* Alight to nearest power of 2 */
+ ntables = (unsigned int)roundup2p(ntables);
/* Allocate new pointers */
- tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
+ tablestate = malloc(ntables * sizeof(struct table_info),
+ M_IPFW, M_WAITOK | M_ZERO);
- IPFW_WLOCK(ch);
+ ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
+
+ IPFW_UH_WLOCK(ch);
tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
+ ni = CHAIN_TO_NI(ch);
- /* Copy old table pointers */
- memcpy(tables, ch->tables, sizeof(void *) * tbl);
- memcpy(xtables, ch->xtables, sizeof(void *) * tbl);
- memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl);
+ /* Temporary restrict decreasing max_tables */
+ if (ntables < V_fw_tables_max) {
- /* Change pointers and number of tables */
- tables_old = ch->tables;
- xtables_old = ch->xtables;
- tabletype_old = ch->tabletype;
- ch->tables = tables;
- ch->xtables = xtables;
- ch->tabletype = tabletype;
+ /*
+ * FIXME: Check if we really can shrink
+ */
+ IPFW_UH_WUNLOCK(ch);
+ return (EINVAL);
+ }
+
+ /* Copy table info/indices */
+ memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
+ ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
+
+ IPFW_WLOCK(ch);
+
+ /* Change pointers */
+ old_tablestate = ch->tablestate;
+ ch->tablestate = tablestate;
+ ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
ntables_old = V_fw_tables_max;
V_fw_tables_max = ntables;
IPFW_WUNLOCK(ch);
- /* Check if we need to destroy radix trees */
- if (ntables < ntables_old) {
- for (tbl = ntables; tbl < ntables_old; tbl++) {
- if ((rnh = tables_old[tbl]) != NULL) {
- rnh->rnh_walktree(rnh, flush_table_entry, rnh);
- rn_detachhead((void **)&rnh);
- }
+ /* Notify all consumers that their @ti pointer has changed */
+ ti = (struct table_info *)ch->tablestate;
+ for (i = 0; i < tbl; i++, ti++) {
+ if (ti->lookup == NULL)
+ continue;
+ tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
+ if (tc == NULL || tc->ta->change_ti == NULL)
+ continue;
- if ((rnh = xtables_old[tbl]) != NULL) {
- rnh->rnh_walktree(rnh, flush_table_entry, rnh);
- rn_detachhead((void **)&rnh);
- }
- }
+ tc->ta->change_ti(tc->astate, ti);
}
+ IPFW_UH_WUNLOCK(ch);
+
/* Free old pointers */
- free(tables_old, M_IPFW);
- free(xtables_old, M_IPFW);
- free(tabletype_old, M_IPFW);
+ free(old_tablestate, M_IPFW);
+ ipfw_objhash_bitmap_free(new_idx, new_blocks);
+
+ return (0);
+}
+
+/*
+ * Lookup table's named object by its @kidx.
+ */
+struct named_object *
+ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
+}
+
+/*
+ * Take reference to table specified in @ntlv.
+ * On success return its @kidx.
+ */
+int
+ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
+{
+ struct tid_info ti;
+ struct table_config *tc;
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ntlv_to_ti(ntlv, &ti);
+ error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
+ if (error != 0)
+ return (error);
+
+ if (tc == NULL)
+ return (ESRCH);
+
+ tc_ref(tc);
+ *kidx = tc->no.kidx;
return (0);
}
+void
+ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_NI(ch);
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(no != NULL, ("Table with index %d not found", kidx));
+ no->refcnt--;
+}
+
+/*
+ * Lookup an IP @addr in table @tbl.
+ * Stores found value in @val.
+ *
+ * Returns 1 if @addr was found.
+ */
int
ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
uint32_t *val)
{
- struct radix_node_head *rnh;
- struct table_entry *ent;
- struct sockaddr_in sa;
+ struct table_info *ti;
- if (tbl >= V_fw_tables_max)
- return (0);
- if ((rnh = ch->tables[tbl]) == NULL)
- return (0);
- KEY_LEN(sa) = KEY_LEN_INET;
- sa.sin_addr.s_addr = addr;
- ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh));
- if (ent != NULL) {
- *val = ent->value;
- return (1);
+ ti = KIDX_TO_TI(ch, tbl);
+
+ return (ti->lookup(ti, &addr, sizeof(in_addr_t), val));
+}
+
+/*
+ * Lookup an arbtrary key @paddr of legth @plen in table @tbl.
+ * Stores found value in @val.
+ *
+ * Returns 1 if key was found.
+ */
+int
+ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
+ void *paddr, uint32_t *val)
+{
+ struct table_info *ti;
+
+ ti = KIDX_TO_TI(ch, tbl);
+
+ return (ti->lookup(ti, paddr, plen, val));
+}
+
+/*
+ * Info/List/dump support for tables.
+ *
+ */
+
+/*
+ * High-level 'get' cmds sysctl handlers
+ */
+
+/*
+ * Lists all tables currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct _ipfw_obj_lheader *olh;
+ int error;
+
+ olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+ if (olh == NULL)
+ return (EINVAL);
+ if (sd->valsize < olh->size)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ error = export_tables(ch, olh, sd);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (error);
+}
+
+/*
+ * Store table info to buffer provided by @sd.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
+ * Reply: [ ipfw_obj_header ipfw_xtable_info ]
+ *
+ * Returns 0 on success.
+ */
+static int
+describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct _ipfw_obj_header *oh;
+ struct table_config *tc;
+ struct tid_info ti;
+ size_t sz;
+
+ sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
+ oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+
+ objheader_to_ti(oh, &ti);
+
+ IPFW_UH_RLOCK(ch);
+ if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
}
+
+ export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
+ IPFW_UH_RUNLOCK(ch);
+
return (0);
}
-int
-ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint32_t *val, int type)
+/*
+ * Modifies existing table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_xtable_info ]
+ *
+ * Returns 0 on success
+ */
+static int
+modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- struct radix_node_head *rnh;
- struct table_xentry *xent;
- struct sockaddr_in6 sa6;
- struct xaddr_iface iface;
+ struct _ipfw_obj_header *oh;
+ ipfw_xtable_info *i;
+ char *tname;
+ struct tid_info ti;
+ struct namedobj_instance *ni;
+ struct table_config *tc;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
+ return (EINVAL);
- if (tbl >= V_fw_tables_max)
- return (0);
- if ((rnh = ch->xtables[tbl]) == NULL)
- return (0);
+ oh = (struct _ipfw_obj_header *)sd->kbuf;
+ i = (ipfw_xtable_info *)(oh + 1);
- switch (type) {
- case IPFW_TABLE_CIDR:
- KEY_LEN(sa6) = KEY_LEN_INET6;
- memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
- xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
- break;
+ /*
+ * Verify user-supplied strings.
+ * Check for null-terminated/zero-length strings/
+ */
+ tname = oh->ntlv.name;
+ if (check_table_name(tname) != 0)
+ return (EINVAL);
- case IPFW_TABLE_INTERFACE:
- KEY_LEN(iface) = KEY_LEN_IFACE +
- strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
- /* Assume direct match */
- /* FIXME: Add interface pattern matching */
- xent = (struct table_xentry *)(rnh->rnh_matchaddr(&iface, rnh));
- break;
+ objheader_to_ti(oh, &ti);
+ ti.type = i->type;
- default:
- return (0);
+ IPFW_UH_WLOCK(ch);
+ ni = CHAIN_TO_NI(ch);
+ if ((tc = find_table(ni, &ti)) == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
}
- if (xent != NULL) {
- *val = xent->value;
- return (1);
+ /* Do not support any modifications for readonly tables */
+ if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EACCES);
}
+
+ if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
+ tc->limit = i->limit;
+ if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
+ tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
+ IPFW_UH_WUNLOCK(ch);
+
return (0);
}
+/*
+ * Creates new table.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_xtable_info ]
+ *
+ * Returns 0 on success
+ */
static int
-count_table_entry(struct radix_node *rn, void *arg)
+create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- u_int32_t * const cnt = arg;
+ struct _ipfw_obj_header *oh;
+ ipfw_xtable_info *i;
+ char *tname, *aname;
+ struct tid_info ti;
+ struct namedobj_instance *ni;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
+ return (EINVAL);
+
+ oh = (struct _ipfw_obj_header *)sd->kbuf;
+ i = (ipfw_xtable_info *)(oh + 1);
+
+ /*
+ * Verify user-supplied strings.
+ * Check for null-terminated/zero-length strings/
+ */
+ tname = oh->ntlv.name;
+ aname = i->algoname;
+ if (check_table_name(tname) != 0 ||
+ strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
+ return (EINVAL);
+
+ if (aname[0] == '\0') {
+ /* Use default algorithm */
+ aname = NULL;
+ }
+
+ objheader_to_ti(oh, &ti);
+ ti.type = i->type;
+
+ ni = CHAIN_TO_NI(ch);
+
+ IPFW_UH_RLOCK(ch);
+ if (find_table(ni, &ti) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ return (create_table_internal(ch, &ti, aname, i, NULL, 0));
+}
+
+/*
+ * Creates new table based on @ti and @aname.
+ *
+ * Assume @aname to be checked and valid.
+ * Stores allocated table kidx inside @pkidx (if non-NULL).
+ * Reference created table if @compat is non-zero.
+ *
+ * Returns 0 on success.
+ */
+static int
+create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
+ char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
+{
+ struct namedobj_instance *ni;
+ struct table_config *tc, *tc_new, *tmp;
+ struct table_algo *ta;
+ uint16_t kidx;
+
+ ni = CHAIN_TO_NI(ch);
+
+ ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
+ if (ta == NULL)
+ return (ENOTSUP);
+
+ tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
+ if (tc == NULL)
+ return (ENOMEM);
+
+ tc->vmask = i->vmask;
+ tc->limit = i->limit;
+ if (ta->flags & TA_FLAG_READONLY)
+ tc->locked = 1;
+ else
+ tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
+
+ IPFW_UH_WLOCK(ch);
+
+ /* Check if table has been already created */
+ tc_new = find_table(ni, ti);
+ if (tc_new != NULL) {
+
+ /*
+ * Compat: do not fail if we're
+ * requesting to create existing table
+ * which has the same type
+ */
+ if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
+ IPFW_UH_WUNLOCK(ch);
+ free_table_config(ni, tc);
+ return (EEXIST);
+ }
+
+ /* Exchange tc and tc_new for proper refcounting & freeing */
+ tmp = tc;
+ tc = tc_new;
+ tc_new = tmp;
+ } else {
+ /* New table */
+ if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ printf("Unable to allocate table index."
+ " Consider increasing net.inet.ip.fw.tables_max");
+ free_table_config(ni, tc);
+ return (EBUSY);
+ }
+ tc->no.kidx = kidx;
+ tc->no.etlv = IPFW_TLV_TBL_NAME;
+
+ IPFW_WLOCK(ch);
+ link_table(ch, tc);
+ IPFW_WUNLOCK(ch);
+ }
+
+ if (compat != 0)
+ tc->no.refcnt++;
+ if (pkidx != NULL)
+ *pkidx = tc->no.kidx;
+
+ IPFW_UH_WUNLOCK(ch);
+
+ if (tc_new != NULL)
+ free_table_config(ni, tc_new);
- (*cnt)++;
return (0);
}
+static void
+ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
+{
+
+ memset(ti, 0, sizeof(struct tid_info));
+ ti->set = ntlv->set;
+ ti->uidx = ntlv->idx;
+ ti->tlvs = ntlv;
+ ti->tlen = ntlv->head.length;
+}
+
+static void
+objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
+{
+
+ ntlv_to_ti(&oh->ntlv, ti);
+}
+
+struct namedobj_instance *
+ipfw_get_table_objhash(struct ip_fw_chain *ch)
+{
+
+ return (CHAIN_TO_NI(ch));
+}
+
+/*
+ * Exports basic table info as name TLV.
+ * Used inside dump_static_rules() to provide info
+ * about all tables referenced by current ruleset.
+ *
+ * Returns 0 on success.
+ */
int
-ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
+ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
+ struct sockopt_data *sd)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+ ipfw_obj_ntlv *ntlv;
+
+ ni = CHAIN_TO_NI(ch);
+
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(no != NULL, ("invalid table kidx passed"));
+
+ ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
+ if (ntlv == NULL)
+ return (ENOMEM);
+
+ ntlv->head.type = IPFW_TLV_TBL_NAME;
+ ntlv->head.length = sizeof(*ntlv);
+ ntlv->idx = no->kidx;
+ strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
+
+ return (0);
+}
+
+struct dump_args {
+ struct ip_fw_chain *ch;
+ struct table_info *ti;
+ struct table_config *tc;
+ struct sockopt_data *sd;
+ uint32_t cnt;
+ uint16_t uidx;
+ int error;
+ uint32_t size;
+ ipfw_table_entry *ent;
+ ta_foreach_f *f;
+ void *farg;
+ ipfw_obj_tentry tent;
+};
+
+static int
+count_ext_entries(void *e, void *arg)
{
- struct radix_node_head *rnh;
+ struct dump_args *da;
- if (tbl >= V_fw_tables_max)
+ da = (struct dump_args *)arg;
+ da->cnt++;
+
+ return (0);
+}
+
+/*
+ * Gets number of items from table either using
+ * internal counter or calling algo callback for
+ * externally-managed tables.
+ *
+ * Returns number of records.
+ */
+static uint32_t
+table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
+{
+ struct table_info *ti;
+ struct table_algo *ta;
+ struct dump_args da;
+
+ ti = KIDX_TO_TI(ch, tc->no.kidx);
+ ta = tc->ta;
+
+ /* Use internal counter for self-managed tables */
+ if ((ta->flags & TA_FLAG_READONLY) == 0)
+ return (tc->count);
+
+ /* Use callback to quickly get number of items */
+ if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
+ return (ta->get_count(tc->astate, ti));
+
+ /* Count number of iterms ourselves */
+ memset(&da, 0, sizeof(da));
+ ta->foreach(tc->astate, ti, count_ext_entries, &da);
+
+ return (da.cnt);
+}
+
+/*
+ * Exports table @tc info into standard ipfw_xtable_info format.
+ */
+static void
+export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
+ ipfw_xtable_info *i)
+{
+ struct table_info *ti;
+ struct table_algo *ta;
+
+ i->type = tc->no.subtype;
+ i->tflags = tc->tflags;
+ i->vmask = tc->vmask;
+ i->set = tc->no.set;
+ i->kidx = tc->no.kidx;
+ i->refcnt = tc->no.refcnt;
+ i->count = table_get_count(ch, tc);
+ i->limit = tc->limit;
+ i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
+ i->size = i->count * sizeof(ipfw_obj_tentry);
+ i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
+ strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
+ ti = KIDX_TO_TI(ch, tc->no.kidx);
+ ta = tc->ta;
+ if (ta->print_config != NULL) {
+ /* Use algo function to print table config to string */
+ ta->print_config(tc->astate, ti, i->algoname,
+ sizeof(i->algoname));
+ } else
+ strlcpy(i->algoname, ta->name, sizeof(i->algoname));
+ /* Dump algo-specific data, if possible */
+ if (ta->dump_tinfo != NULL) {
+ ta->dump_tinfo(tc->astate, ti, &i->ta_info);
+ i->ta_info.flags |= IPFW_TATFLAGS_DATA;
+ }
+}
+
+struct dump_table_args {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_table_internal(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ ipfw_xtable_info *i;
+ struct dump_table_args *dta;
+
+ dta = (struct dump_table_args *)arg;
+
+ i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
+ KASSERT(i != NULL, ("previously checked buffer is not enough"));
+
+ export_table_info(dta->ch, (struct table_config *)no, i);
+ return (0);
+}
+
+/*
+ * Export all tables as ipfw_xtable_info structures to
+ * storage provided by @sd.
+ *
+ * If supplied buffer is too small, fills in required size
+ * and returns ENOMEM.
+ * Returns 0 on success.
+ */
+static int
+export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
+ struct sockopt_data *sd)
+{
+ uint32_t size;
+ uint32_t count;
+ struct dump_table_args dta;
+
+ count = ipfw_objhash_count(CHAIN_TO_NI(ch));
+ size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
+
+ /* Fill in header regadless of buffer size */
+ olh->count = count;
+ olh->objsize = sizeof(ipfw_xtable_info);
+
+ if (size > olh->size) {
+ olh->size = size;
+ return (ENOMEM);
+ }
+
+ olh->size = size;
+
+ dta.ch = ch;
+ dta.sd = sd;
+
+ ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
+
+ return (0);
+}
+
+/*
+ * Dumps all table data
+ * Data layout (v1)(current):
+ * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
+ * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct _ipfw_obj_header *oh;
+ ipfw_xtable_info *i;
+ struct tid_info ti;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct dump_args da;
+ uint32_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
+ oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+
+ i = (ipfw_xtable_info *)(oh + 1);
+ objheader_to_ti(oh, &ti);
+
+ IPFW_UH_RLOCK(ch);
+ if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ export_table_info(ch, tc, i);
+
+ if (sd->valsize < i->size) {
+
+ /*
+ * Submitted buffer size is not enough.
+ * WE've already filled in @i structure with
+ * relevant table info including size, so we
+ * can return. Buffer will be flushed automatically.
+ */
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+
+ /*
+ * Do the actual dump in eXtended format
+ */
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+ da.tc = tc;
+ da.sd = sd;
+
+ ta = tc->ta;
+
+ ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (da.error);
+}
+
+/*
+ * Dumps all table data
+ * Data layout (version 0)(legacy):
+ * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
+ * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_xtable *xtbl;
+ struct tid_info ti;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct dump_args da;
+ size_t sz, count;
+
+ xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
+ if (xtbl == NULL)
return (EINVAL);
- *cnt = 0;
- if ((rnh = ch->tables[tbl]) == NULL)
+
+ memset(&ti, 0, sizeof(ti));
+ ti.uidx = xtbl->tbl;
+
+ IPFW_UH_RLOCK(ch);
+ if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
+ IPFW_UH_RUNLOCK(ch);
return (0);
- rnh->rnh_walktree(rnh, count_table_entry, cnt);
+ }
+ count = table_get_count(ch, tc);
+ sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
+
+ xtbl->cnt = count;
+ xtbl->size = sz;
+ xtbl->type = tc->no.subtype;
+ xtbl->tbl = ti.uidx;
+
+ if (sd->valsize < sz) {
+
+ /*
+ * Submitted buffer size is not enough.
+ * WE've already filled in @i structure with
+ * relevant table info including size, so we
+ * can return. Buffer will be flushed automatically.
+ */
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+
+ /* Do the actual dump in eXtended format */
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+ da.tc = tc;
+ da.sd = sd;
+
+ ta = tc->ta;
+
+ ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Legacy function to retrieve number of items in table.
+ */
+static int
+get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ uint32_t *tbl;
+ struct tid_info ti;
+ size_t sz;
+ int error;
+
+ sz = sizeof(*op3) + sizeof(uint32_t);
+ op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
+ if (op3 == NULL)
+ return (EINVAL);
+
+ tbl = (uint32_t *)(op3 + 1);
+ memset(&ti, 0, sizeof(ti));
+ ti.uidx = *tbl;
+ IPFW_UH_RLOCK(ch);
+ error = ipfw_count_xtable(ch, &ti, tbl);
+ IPFW_UH_RUNLOCK(ch);
+ return (error);
+}
+
+/*
+ * Legacy IP_FW_TABLE_GETSIZE handler
+ */
+int
+ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
+{
+ struct table_config *tc;
+
+ if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
+ return (ESRCH);
+ *cnt = table_get_count(ch, tc);
+ return (0);
+}
+
+/*
+ * Legacy IP_FW_TABLE_XGETSIZE handler
+ */
+int
+ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
+{
+ struct table_config *tc;
+ uint32_t count;
+
+ if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
+ *cnt = 0;
+ return (0); /* 'table all list' requires success */
+ }
+
+ count = table_get_count(ch, tc);
+ *cnt = count * sizeof(ipfw_table_xentry);
+ if (count > 0)
+ *cnt += sizeof(ipfw_xtable);
return (0);
}
static int
-dump_table_entry(struct radix_node *rn, void *arg)
+dump_table_entry(void *e, void *arg)
{
- struct table_entry * const n = (struct table_entry *)rn;
- ipfw_table * const tbl = arg;
+ struct dump_args *da;
+ struct table_config *tc;
+ struct table_algo *ta;
ipfw_table_entry *ent;
+ struct table_value *pval;
+ int error;
+
+ da = (struct dump_args *)arg;
+
+ tc = da->tc;
+ ta = tc->ta;
- if (tbl->cnt == tbl->size)
+ /* Out of memory, returning */
+ if (da->cnt == da->size)
return (1);
- ent = &tbl->ent[tbl->cnt];
- ent->tbl = tbl->tbl;
- if (in_nullhost(n->mask.sin_addr))
- ent->masklen = 0;
- else
- ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
- ent->addr = n->addr.sin_addr.s_addr;
- ent->value = n->value;
- tbl->cnt++;
+ ent = da->ent++;
+ ent->tbl = da->uidx;
+ da->cnt++;
+
+ error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
+ if (error != 0)
+ return (error);
+
+ ent->addr = da->tent.k.addr.s_addr;
+ ent->masklen = da->tent.masklen;
+ pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
+ ent->value = ipfw_export_table_value_legacy(pval);
+
return (0);
}
+/*
+ * Dumps table in pre-8.1 legacy format.
+ */
int
-ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
+ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
+ ipfw_table *tbl)
{
- struct radix_node_head *rnh;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct dump_args da;
- if (tbl->tbl >= V_fw_tables_max)
- return (EINVAL);
tbl->cnt = 0;
- if ((rnh = ch->tables[tbl->tbl]) == NULL)
+
+ if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
+ return (0); /* XXX: We should return ESRCH */
+
+ ta = tc->ta;
+
+ /* This dump format supports IPv4 only */
+ if (tc->no.subtype != IPFW_TABLE_ADDR)
return (0);
- rnh->rnh_walktree(rnh, dump_table_entry, tbl);
+
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+ da.tc = tc;
+ da.ent = &tbl->ent[0];
+ da.size = tbl->size;
+
+ tbl->cnt = 0;
+ ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
+ tbl->cnt = da.cnt;
+
+ return (0);
+}
+
+/*
+ * Dumps table entry in eXtended format (v1)(current).
+ */
+static int
+dump_table_tentry(void *e, void *arg)
+{
+ struct dump_args *da;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct table_value *pval;
+ ipfw_obj_tentry *tent;
+ int error;
+
+ da = (struct dump_args *)arg;
+
+ tc = da->tc;
+ ta = tc->ta;
+
+ tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
+ /* Out of memory, returning */
+ if (tent == NULL) {
+ da->error = ENOMEM;
+ return (1);
+ }
+ tent->head.length = sizeof(ipfw_obj_tentry);
+ tent->idx = da->uidx;
+
+ error = ta->dump_tentry(tc->astate, da->ti, e, tent);
+ if (error != 0)
+ return (error);
+
+ pval = get_table_value(da->ch, da->tc, tent->v.kidx);
+ ipfw_export_table_value_v1(pval, &tent->v.value);
+
+ return (0);
+}
+
+/*
+ * Dumps table entry in eXtended format (v0).
+ */
+static int
+dump_table_xentry(void *e, void *arg)
+{
+ struct dump_args *da;
+ struct table_config *tc;
+ struct table_algo *ta;
+ ipfw_table_xentry *xent;
+ ipfw_obj_tentry *tent;
+ struct table_value *pval;
+ int error;
+
+ da = (struct dump_args *)arg;
+
+ tc = da->tc;
+ ta = tc->ta;
+
+ xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
+ /* Out of memory, returning */
+ if (xent == NULL)
+ return (1);
+ xent->len = sizeof(ipfw_table_xentry);
+ xent->tbl = da->uidx;
+
+ memset(&da->tent, 0, sizeof(da->tent));
+ tent = &da->tent;
+ error = ta->dump_tentry(tc->astate, da->ti, e, tent);
+ if (error != 0)
+ return (error);
+
+ /* Convert current format to previous one */
+ xent->masklen = tent->masklen;
+ pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
+ xent->value = ipfw_export_table_value_legacy(pval);
+ /* Apply some hacks */
+ if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
+ xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
+ xent->flags = IPFW_TCF_INET;
+ } else
+ memcpy(&xent->k, &tent->k, sizeof(xent->k));
+
return (0);
}
+/*
+ * Helper function to export table algo data
+ * to tentry format before calling user function.
+ *
+ * Returns 0 on success.
+ */
static int
-count_table_xentry(struct radix_node *rn, void *arg)
+prepare_table_tentry(void *e, void *arg)
{
- uint32_t * const cnt = arg;
+ struct dump_args *da;
+ struct table_config *tc;
+ struct table_algo *ta;
+ int error;
+
+ da = (struct dump_args *)arg;
+
+ tc = da->tc;
+ ta = tc->ta;
+
+ error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
+ if (error != 0)
+ return (error);
+
+ da->f(&da->tent, da->farg);
- (*cnt) += sizeof(ipfw_table_xentry);
return (0);
}
+/*
+ * Allow external consumers to read table entries in standard format.
+ */
int
-ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
+ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
+ ta_foreach_f *f, void *arg)
+{
+ struct namedobj_instance *ni;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct dump_args da;
+
+ ni = CHAIN_TO_NI(ch);
+
+ tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
+ if (tc == NULL)
+ return (ESRCH);
+
+ ta = tc->ta;
+
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.ti = KIDX_TO_TI(ch, tc->no.kidx);
+ da.tc = tc;
+ da.f = f;
+ da.farg = arg;
+
+ ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
+
+ return (0);
+}
+
+/*
+ * Table algorithms
+ */
+
+/*
+ * Finds algorithm by index, table type or supplied name.
+ *
+ * Returns pointer to algo or NULL.
+ */
+static struct table_algo *
+find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
{
- struct radix_node_head *rnh;
+ int i, l;
+ struct table_algo *ta;
+
+ if (ti->type > IPFW_TABLE_MAXTYPE)
+ return (NULL);
+
+ /* Search by index */
+ if (ti->atype != 0) {
+ if (ti->atype > tcfg->algo_count)
+ return (NULL);
+ return (tcfg->algo[ti->atype]);
+ }
+
+ if (name == NULL) {
+ /* Return default algorithm for given type if set */
+ return (tcfg->def_algo[ti->type]);
+ }
+
+ /* Search by name */
+ /* TODO: better search */
+ for (i = 1; i <= tcfg->algo_count; i++) {
+ ta = tcfg->algo[i];
+
+ /*
+ * One can supply additional algorithm
+ * parameters so we compare only the first word
+ * of supplied name:
+ * 'addr:chash hsize=32'
+ * '^^^^^^^^^'
+ *
+ */
+ l = strlen(ta->name);
+ if (strncmp(name, ta->name, l) != 0)
+ continue;
+ if (name[l] != '\0' && name[l] != ' ')
+ continue;
+ /* Check if we're requesting proper table type */
+ if (ti->type != 0 && ti->type != ta->type)
+ return (NULL);
+ return (ta);
+ }
- if (tbl >= V_fw_tables_max)
+ return (NULL);
+}
+
+/*
+ * Register new table algo @ta.
+ * Stores algo id inside @idx.
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
+ int *idx)
+{
+ struct tables_config *tcfg;
+ struct table_algo *ta_new;
+ size_t sz;
+
+ if (size > sizeof(struct table_algo))
return (EINVAL);
- *cnt = 0;
- if ((rnh = ch->tables[tbl]) != NULL)
- rnh->rnh_walktree(rnh, count_table_xentry, cnt);
- if ((rnh = ch->xtables[tbl]) != NULL)
- rnh->rnh_walktree(rnh, count_table_xentry, cnt);
- /* Return zero if table is empty */
- if (*cnt > 0)
- (*cnt) += sizeof(ipfw_xtable);
+
+ /* Check for the required on-stack size for add/del */
+ sz = roundup2(ta->ta_buf_size, sizeof(void *));
+ if (sz > TA_BUF_SZ)
+ return (EINVAL);
+
+ KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
+
+ /* Copy algorithm data to stable storage. */
+ ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
+ memcpy(ta_new, ta, size);
+
+ tcfg = CHAIN_TO_TCFG(ch);
+
+ KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
+
+ tcfg->algo[++tcfg->algo_count] = ta_new;
+ ta_new->idx = tcfg->algo_count;
+
+ /* Set algorithm as default one for given type */
+ if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
+ tcfg->def_algo[ta_new->type] == NULL)
+ tcfg->def_algo[ta_new->type] = ta_new;
+
+ *idx = ta_new->idx;
+
return (0);
}
+/*
+ * Unregisters table algo using @idx as id.
+ * XXX: It is NOT safe to call this function in any place
+ * other than ipfw instance destroy handler.
+ */
+void
+ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
+{
+ struct tables_config *tcfg;
+ struct table_algo *ta;
+
+ tcfg = CHAIN_TO_TCFG(ch);
+
+ KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
+ idx, tcfg->algo_count));
+ ta = tcfg->algo[idx];
+ KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
+
+ if (tcfg->def_algo[ta->type] == ta)
+ tcfg->def_algo[ta->type] = NULL;
+
+ free(ta, M_IPFW);
+}
+
+/*
+ * Lists all table algorithms currently available.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
+ *
+ * Returns 0 on success
+ */
static int
-dump_table_xentry_base(struct radix_node *rn, void *arg)
+list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
{
- struct table_entry * const n = (struct table_entry *)rn;
- ipfw_xtable * const tbl = arg;
- ipfw_table_xentry *xent;
+ struct _ipfw_obj_lheader *olh;
+ struct tables_config *tcfg;
+ ipfw_ta_info *i;
+ struct table_algo *ta;
+ uint32_t count, n, size;
+
+ olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+ if (olh == NULL)
+ return (EINVAL);
+ if (sd->valsize < olh->size)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ tcfg = CHAIN_TO_TCFG(ch);
+ count = tcfg->algo_count;
+ size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
+
+ /* Fill in header regadless of buffer size */
+ olh->count = count;
+ olh->objsize = sizeof(ipfw_ta_info);
+
+ if (size > olh->size) {
+ olh->size = size;
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ olh->size = size;
+
+ for (n = 1; n <= count; n++) {
+ i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
+ KASSERT(i != NULL, ("previously checked buffer is not enough"));
+ ta = tcfg->algo[n];
+ strlcpy(i->algoname, ta->name, sizeof(i->algoname));
+ i->type = ta->type;
+ i->refcnt = ta->refcnt;
+ }
+
+ IPFW_UH_RUNLOCK(ch);
- /* Out of memory, returning */
- if (tbl->cnt == tbl->size)
- return (1);
- xent = &tbl->xent[tbl->cnt];
- xent->len = sizeof(ipfw_table_xentry);
- xent->tbl = tbl->tbl;
- if (in_nullhost(n->mask.sin_addr))
- xent->masklen = 0;
- else
- xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
- /* Save IPv4 address as deprecated IPv6 compatible */
- xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
- xent->value = n->value;
- tbl->cnt++;
return (0);
}
static int
-dump_table_xentry_extended(struct radix_node *rn, void *arg)
+classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
{
- struct table_xentry * const n = (struct table_xentry *)rn;
- ipfw_xtable * const tbl = arg;
- ipfw_table_xentry *xent;
-#ifdef INET6
- int i;
- uint32_t *v;
-#endif
- /* Out of memory, returning */
- if (tbl->cnt == tbl->size)
+ /* Basic IPv4/IPv6 or u32 lookups */
+ *puidx = cmd->arg1;
+ /* Assume ADDR by default */
+ *ptype = IPFW_TABLE_ADDR;
+ int v;
+
+ if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
+ /*
+ * generic lookup. The key must be
+ * in 32bit big-endian format.
+ */
+ v = ((ipfw_insn_u32 *)cmd)->d[1];
+ switch (v) {
+ case 0:
+ case 1:
+ /* IPv4 src/dst */
+ break;
+ case 2:
+ case 3:
+ /* src/dst port */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ case 4:
+ /* uid/gid */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ case 5:
+ /* jid */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ case 6:
+ /* dscp */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ }
+ }
+
+ return (0);
+}
+
+static int
+classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn_if *cmdif;
+
+ /* Interface table, possibly */
+ cmdif = (ipfw_insn_if *)cmd;
+ if (cmdif->name[0] != '\1')
return (1);
- xent = &tbl->xent[tbl->cnt];
- xent->len = sizeof(ipfw_table_xentry);
- xent->tbl = tbl->tbl;
-
- switch (tbl->type) {
-#ifdef INET6
- case IPFW_TABLE_CIDR:
- /* Count IPv6 mask */
- v = (uint32_t *)&n->m.mask6.sin6_addr;
- for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++)
- xent->masklen += bitcount32(*v);
- memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr));
- break;
-#endif
- case IPFW_TABLE_INTERFACE:
- /* Assume exact mask */
- xent->masklen = 8 * IF_NAMESIZE;
- memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE);
+
+ *ptype = IPFW_TABLE_INTERFACE;
+ *puidx = cmdif->p.kidx;
+
+ return (0);
+}
+
+static int
+classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+
+ *puidx = cmd->arg1;
+ *ptype = IPFW_TABLE_FLOW;
+
+ return (0);
+}
+
+static void
+update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static void
+update_via(ipfw_insn *cmd, uint16_t idx)
+{
+ ipfw_insn_if *cmdif;
+
+ cmdif = (ipfw_insn_if *)cmd;
+ cmdif->p.kidx = idx;
+}
+
+static int
+table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ struct table_config *tc;
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
+ if (error != 0)
+ return (error);
+
+ *pno = &tc->no;
+ return (0);
+}
+
+/* XXX: sets-sets! */
+static struct named_object *
+table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct table_config *tc;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_NI(ch);
+ tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(tc != NULL, ("Table with index %d not found", idx));
+
+ return (&tc->no);
+}
+
+static int
+table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ switch (cmd) {
+ case SWAP_ALL:
+ case TEST_ALL:
+ /*
+ * Return success for TEST_ALL, since nothing prevents
+ * move rules from one set to another. All tables are
+ * accessible from all sets when per-set tables sysctl
+ * is disabled.
+ */
+ case MOVE_ALL:
+ case TEST_ONE:
+ case MOVE_ONE:
+ /*
+ * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
+ * if set number will be used in hash function. Currently
+ * we can just use generic handler that replaces set value.
+ */
+ if (V_fw_tables_sets == 0)
+ return (0);
break;
-
- default:
- /* unknown, skip entry */
+ case COUNT_ONE:
+ /*
+ * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
+ * disabled. This allow skip table's opcodes from additional
+ * checks when specific rules moved to another set.
+ */
+ if (V_fw_tables_sets == 0)
+ return (EOPNOTSUPP);
+ }
+ /* Use generic sets handler when per-set sysctl is enabled. */
+ return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_IP_SRC_LOOKUP,
+ .etlv = IPFW_TLV_TBL_NAME,
+ .classifier = classify_srcdst,
+ .update = update_arg1,
+ .find_byname = table_findbyname,
+ .find_bykidx = table_findbykidx,
+ .create_object = create_table_compat,
+ .manage_sets = table_manage_sets,
+ },
+ {
+ .opcode = O_IP_DST_LOOKUP,
+ .etlv = IPFW_TLV_TBL_NAME,
+ .classifier = classify_srcdst,
+ .update = update_arg1,
+ .find_byname = table_findbyname,
+ .find_bykidx = table_findbykidx,
+ .create_object = create_table_compat,
+ .manage_sets = table_manage_sets,
+ },
+ {
+ .opcode = O_IP_FLOW_LOOKUP,
+ .etlv = IPFW_TLV_TBL_NAME,
+ .classifier = classify_flow,
+ .update = update_arg1,
+ .find_byname = table_findbyname,
+ .find_bykidx = table_findbykidx,
+ .create_object = create_table_compat,
+ .manage_sets = table_manage_sets,
+ },
+ {
+ .opcode = O_XMIT,
+ .etlv = IPFW_TLV_TBL_NAME,
+ .classifier = classify_via,
+ .update = update_via,
+ .find_byname = table_findbyname,
+ .find_bykidx = table_findbykidx,
+ .create_object = create_table_compat,
+ .manage_sets = table_manage_sets,
+ },
+ {
+ .opcode = O_RECV,
+ .etlv = IPFW_TLV_TBL_NAME,
+ .classifier = classify_via,
+ .update = update_via,
+ .find_byname = table_findbyname,
+ .find_bykidx = table_findbykidx,
+ .create_object = create_table_compat,
+ .manage_sets = table_manage_sets,
+ },
+ {
+ .opcode = O_VIA,
+ .etlv = IPFW_TLV_TBL_NAME,
+ .classifier = classify_via,
+ .update = update_via,
+ .find_byname = table_findbyname,
+ .find_bykidx = table_findbykidx,
+ .create_object = create_table_compat,
+ .manage_sets = table_manage_sets,
+ },
+};
+
+static int
+test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
+ void *arg __unused)
+{
+
+ /* Check that there aren't any tables in not default set */
+ if (no->set != 0)
+ return (EBUSY);
+ return (0);
+}
+
+/*
+ * Switch between "set 0" and "rule's set" table binding,
+ * Check all ruleset bindings and permits changing
+ * IFF each binding has both rule AND table in default set (set 0).
+ *
+ * Returns 0 on success.
+ */
+int
+ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
+{
+ struct opcode_obj_rewrite *rw;
+ struct namedobj_instance *ni;
+ struct named_object *no;
+ struct ip_fw *rule;
+ ipfw_insn *cmd;
+ int cmdlen, i, l;
+ uint16_t kidx;
+ uint8_t subtype;
+
+ IPFW_UH_WLOCK(ch);
+
+ if (V_fw_tables_sets == sets) {
+ IPFW_UH_WUNLOCK(ch);
return (0);
}
+ ni = CHAIN_TO_NI(ch);
+ if (sets == 0) {
+ /*
+ * Prevent disabling sets support if we have some tables
+ * in not default sets.
+ */
+ if (ipfw_objhash_foreach_type(ni, test_sets_cb,
+ NULL, IPFW_TLV_TBL_NAME) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+ }
+ /*
+ * Scan all rules and examine tables opcodes.
+ */
+ for (i = 0; i < ch->n_rules; i++) {
+ rule = ch->map[i];
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+ /* Check only tables opcodes */
+ for (kidx = 0, rw = opcodes;
+ rw < opcodes + nitems(opcodes); rw++) {
+ if (rw->opcode != cmd->opcode)
+ continue;
+ if (rw->classifier(cmd, &kidx, &subtype) == 0)
+ break;
+ }
+ if (kidx == 0)
+ continue;
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ /* Check if both table object and rule has the set 0 */
+ if (no->set != 0 || rule->set != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ }
+ }
+ V_fw_tables_sets = sets;
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+/*
+ * Checks table name for validity.
+ * Enforce basic length checks, the rest
+ * should be done in userland.
+ *
+ * Returns 0 if name is considered valid.
+ */
+static int
+check_table_name(const char *name)
+{
+
+ /*
+ * TODO: do some more complicated checks
+ */
+ return (ipfw_check_object_name_generic(name));
+}
+
+/*
+ * Finds table config based on either legacy index
+ * or name in ntlv.
+ * Note @ti structure contains unchecked data from userland.
+ *
+ * Returns 0 in success and fills in @tc with found config
+ */
+static int
+find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
+ struct table_config **tc)
+{
+ char *name, bname[16];
+ struct named_object *no;
+ ipfw_obj_ntlv *ntlv;
+ uint32_t set;
+
+ if (ti->tlvs != NULL) {
+ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+ IPFW_TLV_TBL_NAME);
+ if (ntlv == NULL)
+ return (EINVAL);
+ name = ntlv->name;
+
+ /*
+ * Use set provided by @ti instead of @ntlv one.
+ * This is needed due to different sets behavior
+ * controlled by V_fw_tables_sets.
+ */
+ set = (V_fw_tables_sets != 0) ? ti->set : 0;
+ } else {
+ snprintf(bname, sizeof(bname), "%d", ti->uidx);
+ name = bname;
+ set = 0;
+ }
+
+ no = ipfw_objhash_lookup_name(ni, set, name);
+ *tc = (struct table_config *)no;
+
+ return (0);
+}
+
+/*
+ * Finds table config based on either legacy index
+ * or name in ntlv.
+ * Note @ti structure contains unchecked data from userland.
+ *
+ * Returns pointer to table_config or NULL.
+ */
+static struct table_config *
+find_table(struct namedobj_instance *ni, struct tid_info *ti)
+{
+ struct table_config *tc;
+
+ if (find_table_err(ni, ti, &tc) != 0)
+ return (NULL);
+
+ return (tc);
+}
+
+/*
+ * Allocate new table config structure using
+ * specified @algo and @aname.
+ *
+ * Returns pointer to config or NULL.
+ */
+static struct table_config *
+alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct table_algo *ta, char *aname, uint8_t tflags)
+{
+ char *name, bname[16];
+ struct table_config *tc;
+ int error;
+ ipfw_obj_ntlv *ntlv;
+ uint32_t set;
+
+ if (ti->tlvs != NULL) {
+ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
+ IPFW_TLV_TBL_NAME);
+ if (ntlv == NULL)
+ return (NULL);
+ name = ntlv->name;
+ set = ntlv->set;
+ } else {
+ /* Compat part: convert number to string representation */
+ snprintf(bname, sizeof(bname), "%d", ti->uidx);
+ name = bname;
+ set = 0;
+ }
+
+ tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
+ tc->no.name = tc->tablename;
+ tc->no.subtype = ta->type;
+ tc->no.set = set;
+ tc->tflags = tflags;
+ tc->ta = ta;
+ strlcpy(tc->tablename, name, sizeof(tc->tablename));
+ /* Set "shared" value type by default */
+ tc->vshared = 1;
+
+ /* Preallocate data structures for new tables */
+ error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
+ if (error != 0) {
+ free(tc, M_IPFW);
+ return (NULL);
+ }
+
+ return (tc);
+}
+
+/*
+ * Destroys table state and config.
+ */
+static void
+free_table_config(struct namedobj_instance *ni, struct table_config *tc)
+{
+
+ KASSERT(tc->linked == 0, ("free() on linked config"));
+ /* UH lock MUST NOT be held */
+
+ /*
+ * We're using ta without any locking/referencing.
+ * TODO: fix this if we're going to use unloadable algos.
+ */
+ tc->ta->destroy(tc->astate, &tc->ti_copy);
+ free(tc, M_IPFW);
+}
+
+/*
+ * Links @tc to @chain table named instance.
+ * Sets appropriate type/states in @chain table info.
+ */
+static void
+link_table(struct ip_fw_chain *ch, struct table_config *tc)
+{
+ struct namedobj_instance *ni;
+ struct table_info *ti;
+ uint16_t kidx;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ IPFW_WLOCK_ASSERT(ch);
+
+ ni = CHAIN_TO_NI(ch);
+ kidx = tc->no.kidx;
+
+ ipfw_objhash_add(ni, &tc->no);
+
+ ti = KIDX_TO_TI(ch, kidx);
+ *ti = tc->ti_copy;
+
+ /* Notify algo on real @ti address */
+ if (tc->ta->change_ti != NULL)
+ tc->ta->change_ti(tc->astate, ti);
+
+ tc->linked = 1;
+ tc->ta->refcnt++;
+}
+
+/*
+ * Unlinks @tc from @chain table named instance.
+ * Zeroes states in @chain and stores them in @tc.
+ */
+static void
+unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
+{
+ struct namedobj_instance *ni;
+ struct table_info *ti;
+ uint16_t kidx;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ IPFW_WLOCK_ASSERT(ch);
+
+ ni = CHAIN_TO_NI(ch);
+ kidx = tc->no.kidx;
+
+ /* Clear state. @ti copy is already saved inside @tc */
+ ipfw_objhash_del(ni, &tc->no);
+ ti = KIDX_TO_TI(ch, kidx);
+ memset(ti, 0, sizeof(struct table_info));
+ tc->linked = 0;
+ tc->ta->refcnt--;
+
+ /* Notify algo on real @ti address */
+ if (tc->ta->change_ti != NULL)
+ tc->ta->change_ti(tc->astate, NULL);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table },
+ { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 },
+ { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 },
+ { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table },
+ { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table },
+ { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables },
+ { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 },
+ { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 },
+ { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 },
+ { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 },
+ { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 },
+ { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 },
+ { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry },
+ { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table },
+ { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo },
+ { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size },
+};
- xent->value = n->value;
- tbl->cnt++;
+static int
+destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+
+ unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
+ if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
+ printf("Error unlinking kidx %d from table %s\n",
+ no->kidx, no->name);
+ free_table_config(ni, (struct table_config *)no);
return (0);
}
+/*
+ * Shuts tables module down.
+ */
+void
+ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+
+ /* Remove all tables from working set */
+ IPFW_UH_WLOCK(ch);
+ IPFW_WLOCK(ch);
+ ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
+ IPFW_WUNLOCK(ch);
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Free pointers itself */
+ free(ch->tablestate, M_IPFW);
+
+ ipfw_table_value_destroy(ch, last);
+ ipfw_table_algo_destroy(ch);
+
+ ipfw_objhash_destroy(CHAIN_TO_NI(ch));
+ free(CHAIN_TO_TCFG(ch), M_IPFW);
+}
+
+/*
+ * Starts tables module.
+ */
int
-ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl)
+ipfw_init_tables(struct ip_fw_chain *ch, int first)
{
- struct radix_node_head *rnh;
+ struct tables_config *tcfg;
- if (tbl->tbl >= V_fw_tables_max)
- return (EINVAL);
- tbl->cnt = 0;
- tbl->type = ch->tabletype[tbl->tbl];
- if ((rnh = ch->tables[tbl->tbl]) != NULL)
- rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl);
- if ((rnh = ch->xtables[tbl->tbl]) != NULL)
- rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl);
+ /* Allocate pointers */
+ ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
+ M_IPFW, M_WAITOK | M_ZERO);
+
+ tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
+ tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
+ ch->tblcfg = tcfg;
+
+ ipfw_table_value_init(ch, first);
+ ipfw_table_algo_init(ch);
+
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
return (0);
}
-/* end of file */
+
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.h b/freebsd/sys/netpfil/ipfw/ip_fw_table.h
new file mode 100644
index 00000000..d6578482
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table.h
@@ -0,0 +1,234 @@
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IPFW2_TABLE_H
+#define _IPFW2_TABLE_H
+
+/*
+ * Internal constants and data structures used by ipfw tables
+ * not meant to be exported outside the kernel.
+ */
+#ifdef _KERNEL
+
+struct table_algo;
+struct tables_config {
+ struct namedobj_instance *namehash;
+ struct namedobj_instance *valhash;
+ uint32_t val_size;
+ uint32_t algo_count;
+ struct table_algo *algo[256];
+ struct table_algo *def_algo[IPFW_TABLE_MAXTYPE + 1];
+ TAILQ_HEAD(op_state_l,op_state) state_list;
+};
+#define CHAIN_TO_TCFG(chain) ((struct tables_config *)(chain)->tblcfg)
+
+struct table_info {
+ table_lookup_t *lookup; /* Lookup function */
+ void *state; /* Lookup radix/other structure */
+ void *xstate; /* eXtended state */
+ u_long data; /* Hints for given func */
+};
+
+struct table_value;
+struct tentry_info {
+ void *paddr;
+ struct table_value *pvalue;
+ void *ptv; /* Temporary field to hold obj */
+ uint8_t masklen; /* mask length */
+ uint8_t subtype;
+ uint16_t flags; /* record flags */
+ uint32_t value; /* value index */
+};
+#define TEI_FLAGS_UPDATE 0x0001 /* Add or update rec if exists */
+#define TEI_FLAGS_UPDATED 0x0002 /* Entry has been updated */
+#define TEI_FLAGS_COMPAT 0x0004 /* Called from old ABI */
+#define TEI_FLAGS_DONTADD 0x0008 /* Do not create new rec */
+#define TEI_FLAGS_ADDED 0x0010 /* Entry was added */
+#define TEI_FLAGS_DELETED 0x0020 /* Entry was deleted */
+#define TEI_FLAGS_LIMIT 0x0040 /* Limit was hit */
+#define TEI_FLAGS_ERROR 0x0080 /* Unknown request error */
+#define TEI_FLAGS_NOTFOUND 0x0100 /* Entry was not found */
+#define TEI_FLAGS_EXISTS 0x0200 /* Entry already exists */
+
+typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state,
+ struct table_info *ti, char *data, uint8_t tflags);
+typedef void (ta_destroy)(void *ta_state, struct table_info *ti);
+typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+typedef int (ta_add)(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+typedef int (ta_del)(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+typedef void (ta_flush_entry)(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+
+typedef int (ta_need_modify)(void *ta_state, struct table_info *ti,
+ uint32_t count, uint64_t *pflags);
+typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags);
+typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti,
+ void *ta_buf, uint64_t *pflags);
+typedef void (ta_modify)(void *ta_state, struct table_info *ti,
+ void *ta_buf, uint64_t pflags);
+typedef void (ta_flush_mod)(void *ta_buf);
+
+typedef void (ta_change_ti)(void *ta_state, struct table_info *ti);
+typedef void (ta_print_config)(void *ta_state, struct table_info *ti, char *buf,
+ size_t bufsize);
+
+typedef int ta_foreach_f(void *node, void *arg);
+typedef void ta_foreach(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg);
+typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent);
+typedef int ta_find_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent);
+typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti,
+ ipfw_ta_tinfo *tinfo);
+typedef uint32_t ta_get_count(void *ta_state, struct table_info *ti);
+
+struct table_algo {
+ char name[16];
+ uint32_t idx;
+ uint32_t type;
+ uint32_t refcnt;
+ uint32_t flags;
+ uint32_t vlimit;
+ size_t ta_buf_size;
+ ta_init *init;
+ ta_destroy *destroy;
+ ta_prepare_add *prepare_add;
+ ta_prepare_del *prepare_del;
+ ta_add *add;
+ ta_del *del;
+ ta_flush_entry *flush_entry;
+ ta_find_tentry *find_tentry;
+ ta_need_modify *need_modify;
+ ta_prepare_mod *prepare_mod;
+ ta_fill_mod *fill_mod;
+ ta_modify *modify;
+ ta_flush_mod *flush_mod;
+ ta_change_ti *change_ti;
+ ta_foreach *foreach;
+ ta_dump_tentry *dump_tentry;
+ ta_print_config *print_config;
+ ta_dump_tinfo *dump_tinfo;
+ ta_get_count *get_count;
+};
+#define TA_FLAG_DEFAULT 0x01 /* Algo is default for given type */
+#define TA_FLAG_READONLY 0x02 /* Algo does not support modifications*/
+#define TA_FLAG_EXTCOUNTER 0x04 /* Algo has external counter available*/
+
+int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta,
+ size_t size, int *idx);
+void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx);
+
+void ipfw_table_algo_init(struct ip_fw_chain *chain);
+void ipfw_table_algo_destroy(struct ip_fw_chain *chain);
+
+MALLOC_DECLARE(M_IPFW_TBL);
+/* Exported to support legacy opcodes */
+int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct tentry_info *tei, uint8_t flags, uint32_t count);
+int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct tentry_info *tei, uint8_t flags, uint32_t count);
+int flush_table(struct ip_fw_chain *ch, struct tid_info *ti);
+void ipfw_import_table_value_legacy(uint32_t value, struct table_value *v);
+uint32_t ipfw_export_table_value_legacy(struct table_value *v);
+int ipfw_get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+
+/* ipfw_table_value.c functions */
+struct table_config;
+struct tableop_state;
+void ipfw_table_value_init(struct ip_fw_chain *ch, int first);
+void ipfw_table_value_destroy(struct ip_fw_chain *ch, int last);
+int ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts);
+void ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+ struct tentry_info *tei, uint32_t count, int rollback);
+void ipfw_import_table_value_v1(ipfw_table_value *iv);
+void ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *iv);
+void ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+ struct table_algo *ta, void *astate, struct table_info *ti);
+void rollback_table_values(struct tableop_state *ts);
+
+int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain,
+ struct rule_check_info *ci);
+int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule,
+ uint32_t *bmask);
+int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
+ struct sockopt_data *sd);
+void ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule);
+struct namedobj_instance *ipfw_get_table_objhash(struct ip_fw_chain *ch);
+
+/* utility functions */
+int ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
+ uint32_t new_set);
+void ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t old_set,
+ uint32_t new_set, int mv);
+int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
+ ta_foreach_f f, void *arg);
+
+/* internal functions */
+void tc_ref(struct table_config *tc);
+void tc_unref(struct table_config *tc);
+
+struct op_state;
+typedef void (op_rollback_f)(void *object, struct op_state *state);
+struct op_state {
+ TAILQ_ENTRY(op_state) next; /* chain link */
+ op_rollback_f *func;
+};
+
+struct tableop_state {
+ struct op_state opstate;
+ struct ip_fw_chain *ch;
+ struct table_config *tc;
+ struct table_algo *ta;
+ struct tentry_info *tei;
+ uint32_t count;
+ uint32_t vmask;
+ int vshared;
+ int modified;
+};
+
+void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts);
+void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts);
+void rollback_toperation_state(struct ip_fw_chain *ch, void *object);
+
+/* Legacy interfaces */
+int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti,
+ uint32_t *cnt);
+int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti,
+ uint32_t *cnt);
+int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
+ ipfw_table *tbl);
+
+
+#endif /* _KERNEL */
+#endif /* _IPFW2_TABLE_H */
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c b/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c
new file mode 100644
index 00000000..e4c82131
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c
@@ -0,0 +1,4112 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Lookup table algorithms.
+ *
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+#include <rtems/bsd/local/opt_inet.h>
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
+#include <net/radix.h>
+#include <net/route.h>
+#include <net/route_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_fib.h>
+#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+#include <netinet6/in6_fib.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
+
+
+/*
+ * IPFW table lookup algorithms.
+ *
+ * What is needed to add another table algo?
+ *
+ * Algo init:
+ * * struct table_algo has to be filled with:
+ * name: "type:algoname" format, e.g. "addr:radix". Currently
+ * there are the following types: "addr", "iface", "number" and "flow".
+ * type: one of IPFW_TABLE_* types
+ * flags: one or more TA_FLAGS_*
+ * ta_buf_size: size of structure used to store add/del item state.
+ * Needs to be less than TA_BUF_SZ.
+ * callbacks: see below for description.
+ * * ipfw_add_table_algo / ipfw_del_table_algo has to be called
+ *
+ * Callbacks description:
+ *
+ * -init: request to initialize new table instance.
+ * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state,
+ * struct table_info *ti, char *data, uint8_t tflags);
+ * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
+ *
+ * Allocate all structures needed for normal operations.
+ * * Caller may want to parse @data for some algo-specific
+ * options provided by userland.
+ * * Caller may want to save configuration state pointer to @ta_state
+ * * Caller needs to save desired runtime structure pointer(s)
+ * inside @ti fields. Note that it is not correct to save
+ * @ti pointer at this moment. Use -change_ti hook for that.
+ * * Caller has to fill in ti->lookup to appropriate function
+ * pointer.
+ *
+ *
+ *
+ * -destroy: request to destroy table instance.
+ * typedef void (ta_destroy)(void *ta_state, struct table_info *ti);
+ * MANDATORY, unlocked. (M_WAITOK).
+ *
+ * Frees all table entries and all tables structures allocated by -init.
+ *
+ *
+ *
+ * -prepare_add: request to allocate state for adding new entry.
+ * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei,
+ * void *ta_buf);
+ * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
+ *
+ * Allocates state and fills it in with all necessary data (EXCEPT value)
+ * from @tei to minimize operations needed to be done under WLOCK.
+ * "value" field has to be copied to new entry in @add callback.
+ * Buffer ta_buf of size ta->ta_buf_sz may be used to store
+ * allocated state.
+ *
+ *
+ *
+ * -prepare_del: request to set state for deleting existing entry.
+ * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei,
+ * void *ta_buf);
+ * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success.
+ *
+ * Buffer ta_buf of size ta->ta_buf_sz may be used to store
+ * allocated state. Caller should use on-stack ta_buf allocation
+ * instead of doing malloc().
+ *
+ *
+ *
+ * -add: request to insert new entry into runtime/config structures.
+ * typedef int (ta_add)(void *ta_state, struct table_info *ti,
+ * struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+ * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
+ *
+ * Insert new entry using previously-allocated state in @ta_buf.
+ * * @tei may have the following flags:
+ * TEI_FLAGS_UPDATE: request to add or update entry.
+ * TEI_FLAGS_DONTADD: request to update (but not add) entry.
+ * * Caller is required to do the following:
+ * copy real entry value from @tei
+ * entry added: return 0, set 1 to @pnum
+ * entry updated: return 0, store 0 to @pnum, store old value in @tei,
+ * add TEI_FLAGS_UPDATED flag to @tei.
+ * entry exists: return EEXIST
+ * entry not found: return ENOENT
+ * other error: return non-zero error code.
+ *
+ *
+ *
+ * -del: request to delete existing entry from runtime/config structures.
+ * typedef int (ta_del)(void *ta_state, struct table_info *ti,
+ * struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+ * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
+ *
+ * Delete entry using previously set up in @ta_buf.
+ * * Caller is required to do the following:
+ * entry deleted: return 0, set 1 to @pnum, store old value in @tei.
+ * entry not found: return ENOENT
+ * other error: return non-zero error code.
+ *
+ *
+ *
+ * -flush_entry: flush entry state created by -prepare_add / -del / others
+ * typedef void (ta_flush_entry)(struct ip_fw_chain *ch,
+ * struct tentry_info *tei, void *ta_buf);
+ * MANDATORY, may be locked. (M_NOWAIT).
+ *
+ * Delete state allocated by:
+ * -prepare_add (-add returned EEXIST|UPDATED)
+ * -prepare_del (if any)
+ * -del
+ * * Caller is required to handle empty @ta_buf correctly.
+ *
+ *
+ * -find_tentry: finds entry specified by key @tei
+ * typedef int ta_find_tentry(void *ta_state, struct table_info *ti,
+ * ipfw_obj_tentry *tent);
+ * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success.
+ *
+ * Finds entry specified by given key.
+ * * Caller is required to do the following:
+ * entry found: returns 0, export entry to @tent
+ * entry not found: returns ENOENT
+ *
+ *
+ * -need_modify: checks if @ti has enough space to hold another @count items.
+ * typedef int (ta_need_modify)(void *ta_state, struct table_info *ti,
+ * uint32_t count, uint64_t *pflags);
+ * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has.
+ *
+ * Checks if given table has enough space to add @count items without
+ * resize. Caller may use @pflags to store desired modification data.
+ *
+ *
+ *
+ * -prepare_mod: allocate structures for table modification.
+ * typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags);
+ * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success.
+ *
+ * Allocate all needed state for table modification. Caller
+ * should use `struct mod_item` to store new state in @ta_buf.
+ * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf.
+ *
+ *
+ *
+ * -fill_mod: copy some data to new state/
+ * typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti,
+ * void *ta_buf, uint64_t *pflags);
+ * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success.
+ *
+ * Copy as much data as we can to minimize changes under WLOCK.
+ * For example, array can be merged inside this callback.
+ *
+ *
+ *
+ * -modify: perform final modification.
+ * typedef void (ta_modify)(void *ta_state, struct table_info *ti,
+ * void *ta_buf, uint64_t pflags);
+ * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT).
+ *
+ * Performs all changes necessary to switch to new structures.
+ * * Caller should save old pointers to @ta_buf storage.
+ *
+ *
+ *
+ * -flush_mod: flush table modification state.
+ * typedef void (ta_flush_mod)(void *ta_buf);
+ * OPTIONAL(need_modify), unlocked. (M_WAITOK).
+ *
+ * Performs flush for the following:
+ * - prepare_mod (modification was not necessary)
+ * - modify (for the old state)
+ *
+ *
+ *
+ * -change_gi: monitor table info pointer changes
+ * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti);
+ * OPTIONAL, locked (UH). (M_NOWAIT).
+ *
+ * Called on @ti pointer changed. Called immediately after -init
+ * to set initial state.
+ *
+ *
+ *
+ * -foreach: calls @f for each table entry
+ * typedef void ta_foreach(void *ta_state, struct table_info *ti,
+ * ta_foreach_f *f, void *arg);
+ * MANDATORY, locked(UH). (M_NOWAIT).
+ *
+ * Runs callback with specified argument for each table entry,
+ * Typically used for dumping table entries.
+ *
+ *
+ *
+ * -dump_tentry: dump table entry in current @tentry format.
+ * typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e,
+ * ipfw_obj_tentry *tent);
+ * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success.
+ *
+ * Dumps entry @e to @tent.
+ *
+ *
+ * -print_config: prints custom algorithm options into buffer.
+ * typedef void (ta_print_config)(void *ta_state, struct table_info *ti,
+ * char *buf, size_t bufsize);
+ * OPTIONAL. locked(UH). (M_NOWAIT).
+ *
+ * Prints custom algorithm options in the format suitable to pass
+ * back to -init callback.
+ *
+ *
+ *
+ * -dump_tinfo: dumps algo-specific info.
+ * typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti,
+ * ipfw_ta_tinfo *tinfo);
+ * OPTIONAL. locked(UH). (M_NOWAIT).
+ *
+ * Dumps options like items size/hash size, etc.
+ */
+
+MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+
+/*
+ * Utility structures/functions common to more than one algo
+ */
+
+struct mod_item {
+ void *main_ptr;
+ size_t size;
+ void *main_ptr6;
+ size_t size6;
+};
+
+static int badd(const void *key, void *item, void *base, size_t nmemb,
+ size_t size, int (*compar) (const void *, const void *));
+static int bdel(const void *key, void *base, size_t nmemb, size_t size,
+ int (*compar) (const void *, const void *));
+
+
+/*
+ * ADDR implementation using radix
+ *
+ */
+
+/*
+ * The radix code expects addr and mask to be array of bytes,
+ * with the first byte being the length of the array. rn_inithead
+ * is called with the offset in bits of the lookup key within the
+ * array. If we use a sockaddr_in as the underlying type,
+ * sin_len is conveniently located at offset 0, sin_addr is at
+ * offset 4 and normally aligned.
+ * But for portability, let's avoid assumption and make the code explicit
+ */
+#define KEY_LEN(v) *((uint8_t *)&(v))
+/*
+ * Do not require radix to compare more than actual IPv4/IPv6 address
+ */
+#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
+#define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr))
+
+#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
+#define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr))
+
+struct radix_addr_entry {
+ struct radix_node rn[2];
+ struct sockaddr_in addr;
+ uint32_t value;
+ uint8_t masklen;
+};
+
+struct sa_in6 {
+ uint8_t sin6_len;
+ uint8_t sin6_family;
+ uint8_t pad[2];
+ struct in6_addr sin6_addr;
+};
+
+struct radix_addr_xentry {
+ struct radix_node rn[2];
+ struct sa_in6 addr6;
+ uint32_t value;
+ uint8_t masklen;
+};
+
+struct radix_cfg {
+ struct radix_node_head *head4;
+ struct radix_node_head *head6;
+ size_t count4;
+ size_t count6;
+};
+
+struct ta_buf_radix
+{
+ void *ent_ptr;
+ struct sockaddr *addr_ptr;
+ struct sockaddr *mask_ptr;
+ union {
+ struct {
+ struct sockaddr_in sa;
+ struct sockaddr_in ma;
+ } a4;
+ struct {
+ struct sa_in6 sa;
+ struct sa_in6 ma;
+ } a6;
+ } addr;
+};
+
+static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val);
+static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state,
+ struct table_info *ti, char *data, uint8_t tflags);
+static int flush_radix_entry(struct radix_node *rn, void *arg);
+static void ta_destroy_radix(void *ta_state, struct table_info *ti);
+static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti,
+ ipfw_ta_tinfo *tinfo);
+static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti,
+ void *e, ipfw_obj_tentry *tent);
+static int ta_find_radix_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent);
+static void ta_foreach_radix(void *ta_state, struct table_info *ti,
+ ta_foreach_f *f, void *arg);
+static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
+ struct sockaddr *ma, int *set_mask);
+static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_add_radix(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_del_radix(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_need_modify_radix(void *ta_state, struct table_info *ti,
+ uint32_t count, uint64_t *pflags);
+
+static int
+ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct radix_node_head *rnh;
+
+ if (keylen == sizeof(in_addr_t)) {
+ struct radix_addr_entry *ent;
+ struct sockaddr_in sa;
+ KEY_LEN(sa) = KEY_LEN_INET;
+ sa.sin_addr.s_addr = *((in_addr_t *)key);
+ rnh = (struct radix_node_head *)ti->state;
+ ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, &rnh->rh));
+ if (ent != NULL) {
+ *val = ent->value;
+ return (1);
+ }
+ } else {
+ struct radix_addr_xentry *xent;
+ struct sa_in6 sa6;
+ KEY_LEN(sa6) = KEY_LEN_INET6;
+ memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr));
+ rnh = (struct radix_node_head *)ti->xstate;
+ xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, &rnh->rh));
+ if (xent != NULL) {
+ *val = xent->value;
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * New table
+ */
+static int
+ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+ char *data, uint8_t tflags)
+{
+ struct radix_cfg *cfg;
+
+ if (!rn_inithead(&ti->state, OFF_LEN_INET))
+ return (ENOMEM);
+ if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) {
+ rn_detachhead(&ti->state);
+ return (ENOMEM);
+ }
+
+ cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+ *ta_state = cfg;
+ ti->lookup = ta_lookup_radix;
+
+ return (0);
+}
+
+static int
+flush_radix_entry(struct radix_node *rn, void *arg)
+{
+ struct radix_node_head * const rnh = arg;
+ struct radix_addr_entry *ent;
+
+ ent = (struct radix_addr_entry *)
+ rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, &rnh->rh);
+ if (ent != NULL)
+ free(ent, M_IPFW_TBL);
+ return (0);
+}
+
+static void
+ta_destroy_radix(void *ta_state, struct table_info *ti)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+
+ cfg = (struct radix_cfg *)ta_state;
+
+ rnh = (struct radix_node_head *)(ti->state);
+ rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh);
+ rn_detachhead(&ti->state);
+
+ rnh = (struct radix_node_head *)(ti->xstate);
+ rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh);
+ rn_detachhead(&ti->xstate);
+
+ free(cfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+ struct radix_cfg *cfg;
+
+ cfg = (struct radix_cfg *)ta_state;
+
+ tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
+ tinfo->taclass4 = IPFW_TACLASS_RADIX;
+ tinfo->count4 = cfg->count4;
+ tinfo->itemsize4 = sizeof(struct radix_addr_entry);
+ tinfo->taclass6 = IPFW_TACLASS_RADIX;
+ tinfo->count6 = cfg->count6;
+ tinfo->itemsize6 = sizeof(struct radix_addr_xentry);
+}
+
+static int
+ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent)
+{
+ struct radix_addr_entry *n;
+#ifdef INET6
+ struct radix_addr_xentry *xn;
+#endif
+
+ n = (struct radix_addr_entry *)e;
+
+ /* Guess IPv4/IPv6 radix by sockaddr family */
+ if (n->addr.sin_family == AF_INET) {
+ tent->k.addr.s_addr = n->addr.sin_addr.s_addr;
+ tent->masklen = n->masklen;
+ tent->subtype = AF_INET;
+ tent->v.kidx = n->value;
+#ifdef INET6
+ } else {
+ xn = (struct radix_addr_xentry *)e;
+ memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr));
+ tent->masklen = xn->masklen;
+ tent->subtype = AF_INET6;
+ tent->v.kidx = xn->value;
+#endif
+ }
+
+ return (0);
+}
+
+static int
+ta_find_radix_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent)
+{
+ struct radix_node_head *rnh;
+ void *e;
+
+ e = NULL;
+ if (tent->subtype == AF_INET) {
+ struct sockaddr_in sa;
+ KEY_LEN(sa) = KEY_LEN_INET;
+ sa.sin_addr.s_addr = tent->k.addr.s_addr;
+ rnh = (struct radix_node_head *)ti->state;
+ e = rnh->rnh_matchaddr(&sa, &rnh->rh);
+ } else {
+ struct sa_in6 sa6;
+ KEY_LEN(sa6) = KEY_LEN_INET6;
+ memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr));
+ rnh = (struct radix_node_head *)ti->xstate;
+ e = rnh->rnh_matchaddr(&sa6, &rnh->rh);
+ }
+
+ if (e != NULL) {
+ ta_dump_radix_tentry(ta_state, ti, e, tent);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+static void
+ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg)
+{
+ struct radix_node_head *rnh;
+
+ rnh = (struct radix_node_head *)(ti->state);
+ rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg);
+
+ rnh = (struct radix_node_head *)(ti->xstate);
+ rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg);
+}
+
+
+#ifdef INET6
+static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask);
+
+static inline void
+ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+{
+ uint32_t *cp;
+
+ for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
+ *cp++ = 0xFFFFFFFF;
+ if (mask > 0)
+ *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+}
+#endif
+
+static void
+tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
+ struct sockaddr *ma, int *set_mask)
+{
+ int mlen;
+#ifdef INET
+ struct sockaddr_in *addr, *mask;
+#endif
+#ifdef INET6
+ struct sa_in6 *addr6, *mask6;
+#endif
+ in_addr_t a4;
+
+ mlen = tei->masklen;
+
+ if (tei->subtype == AF_INET) {
+#ifdef INET
+ addr = (struct sockaddr_in *)sa;
+ mask = (struct sockaddr_in *)ma;
+ /* Set 'total' structure length */
+ KEY_LEN(*addr) = KEY_LEN_INET;
+ KEY_LEN(*mask) = KEY_LEN_INET;
+ addr->sin_family = AF_INET;
+ mask->sin_addr.s_addr =
+ htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
+ a4 = *((in_addr_t *)tei->paddr);
+ addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr;
+ if (mlen != 32)
+ *set_mask = 1;
+ else
+ *set_mask = 0;
+#endif
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ /* IPv6 case */
+ addr6 = (struct sa_in6 *)sa;
+ mask6 = (struct sa_in6 *)ma;
+ /* Set 'total' structure length */
+ KEY_LEN(*addr6) = KEY_LEN_INET6;
+ KEY_LEN(*mask6) = KEY_LEN_INET6;
+ addr6->sin6_family = AF_INET6;
+ ipv6_writemask(&mask6->sin6_addr, mlen);
+ memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr));
+ APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr);
+ if (mlen != 128)
+ *set_mask = 1;
+ else
+ *set_mask = 0;
+#endif
+ }
+}
+
+static int
+ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_radix *tb;
+ struct radix_addr_entry *ent;
+#ifdef INET6
+ struct radix_addr_xentry *xent;
+#endif
+ struct sockaddr *addr, *mask;
+ int mlen, set_mask;
+
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ mlen = tei->masklen;
+ set_mask = 0;
+
+ if (tei->subtype == AF_INET) {
+#ifdef INET
+ if (mlen > 32)
+ return (EINVAL);
+ ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ ent->masklen = mlen;
+
+ addr = (struct sockaddr *)&ent->addr;
+ mask = (struct sockaddr *)&tb->addr.a4.ma;
+ tb->ent_ptr = ent;
+#endif
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ /* IPv6 case */
+ if (mlen > 128)
+ return (EINVAL);
+ xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ xent->masklen = mlen;
+
+ addr = (struct sockaddr *)&xent->addr6;
+ mask = (struct sockaddr *)&tb->addr.a6.ma;
+ tb->ent_ptr = xent;
+#endif
+ } else {
+ /* Unknown CIDR type */
+ return (EINVAL);
+ }
+
+ tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
+ /* Set pointers */
+ tb->addr_ptr = addr;
+ if (set_mask != 0)
+ tb->mask_ptr = mask;
+
+ return (0);
+}
+
+static int
+ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ struct ta_buf_radix *tb;
+ uint32_t *old_value, value;
+
+ cfg = (struct radix_cfg *)ta_state;
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ /* Save current entry value from @tei */
+ if (tei->subtype == AF_INET) {
+ rnh = ti->state;
+ ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value;
+ } else {
+ rnh = ti->xstate;
+ ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value;
+ }
+
+ /* Search for an entry first */
+ rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, &rnh->rh);
+ if (rn != NULL) {
+ if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+ return (EEXIST);
+ /* Record already exists. Update value if we're asked to */
+ if (tei->subtype == AF_INET)
+ old_value = &((struct radix_addr_entry *)rn)->value;
+ else
+ old_value = &((struct radix_addr_xentry *)rn)->value;
+
+ value = *old_value;
+ *old_value = tei->value;
+ tei->value = value;
+
+ /* Indicate that update has happened instead of addition */
+ tei->flags |= TEI_FLAGS_UPDATED;
+ *pnum = 0;
+
+ return (0);
+ }
+
+ if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+ return (EFBIG);
+
+ rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, &rnh->rh,tb->ent_ptr);
+ if (rn == NULL) {
+ /* Unknown error */
+ return (EINVAL);
+ }
+
+ if (tei->subtype == AF_INET)
+ cfg->count4++;
+ else
+ cfg->count6++;
+ tb->ent_ptr = NULL;
+ *pnum = 1;
+
+ return (0);
+}
+
+static int
+ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_radix *tb;
+ struct sockaddr *addr, *mask;
+ int mlen, set_mask;
+
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ mlen = tei->masklen;
+ set_mask = 0;
+
+ if (tei->subtype == AF_INET) {
+ if (mlen > 32)
+ return (EINVAL);
+
+ addr = (struct sockaddr *)&tb->addr.a4.sa;
+ mask = (struct sockaddr *)&tb->addr.a4.ma;
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ if (mlen > 128)
+ return (EINVAL);
+
+ addr = (struct sockaddr *)&tb->addr.a6.sa;
+ mask = (struct sockaddr *)&tb->addr.a6.ma;
+#endif
+ } else
+ return (EINVAL);
+
+ tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
+ tb->addr_ptr = addr;
+ if (set_mask != 0)
+ tb->mask_ptr = mask;
+
+ return (0);
+}
+
+static int
+ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct radix_cfg *cfg;
+ struct radix_node_head *rnh;
+ struct radix_node *rn;
+ struct ta_buf_radix *tb;
+
+ cfg = (struct radix_cfg *)ta_state;
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ if (tei->subtype == AF_INET)
+ rnh = ti->state;
+ else
+ rnh = ti->xstate;
+
+ rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, &rnh->rh);
+
+ if (rn == NULL)
+ return (ENOENT);
+
+ /* Save entry value to @tei */
+ if (tei->subtype == AF_INET)
+ tei->value = ((struct radix_addr_entry *)rn)->value;
+ else
+ tei->value = ((struct radix_addr_xentry *)rn)->value;
+
+ tb->ent_ptr = rn;
+
+ if (tei->subtype == AF_INET)
+ cfg->count4--;
+ else
+ cfg->count6--;
+ *pnum = 1;
+
+ return (0);
+}
+
+static void
+ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_radix *tb;
+
+ tb = (struct ta_buf_radix *)ta_buf;
+
+ if (tb->ent_ptr != NULL)
+ free(tb->ent_ptr, M_IPFW_TBL);
+}
+
+static int
+ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count,
+ uint64_t *pflags)
+{
+
+ /*
+ * radix does not require additional memory allocations
+ * other than nodes itself. Adding new masks to the tree do
+ * but we don't have any API to call (and we don't known which
+ * sizes do we need).
+ */
+ return (0);
+}
+
+struct table_algo addr_radix = {
+ .name = "addr:radix",
+ .type = IPFW_TABLE_ADDR,
+ .flags = TA_FLAG_DEFAULT,
+ .ta_buf_size = sizeof(struct ta_buf_radix),
+ .init = ta_init_radix,
+ .destroy = ta_destroy_radix,
+ .prepare_add = ta_prepare_add_radix,
+ .prepare_del = ta_prepare_del_radix,
+ .add = ta_add_radix,
+ .del = ta_del_radix,
+ .flush_entry = ta_flush_radix_entry,
+ .foreach = ta_foreach_radix,
+ .dump_tentry = ta_dump_radix_tentry,
+ .find_tentry = ta_find_radix_tentry,
+ .dump_tinfo = ta_dump_radix_tinfo,
+ .need_modify = ta_need_modify_radix,
+};
+
+
+/*
+ * addr:hash cmds
+ *
+ *
+ * ti->data:
+ * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
+ * [ 8][ 8[ 8][ 8]
+ *
+ * inv.mask4: 32 - mask
+ * inv.mask6:
+ * 1) _slow lookup: mask
+ * 2) _aligned: (128 - mask) / 8
+ * 3) _64: 8
+ *
+ *
+ * pflags:
+ * [v4=1/v6=0][hsize]
+ * [ 32][ 32]
+ */
+
+struct chashentry;
+
+SLIST_HEAD(chashbhead, chashentry);
+
+struct chash_cfg {
+ struct chashbhead *head4;
+ struct chashbhead *head6;
+ size_t size4;
+ size_t size6;
+ size_t items4;
+ size_t items6;
+ uint8_t mask4;
+ uint8_t mask6;
+};
+
+struct chashentry {
+ SLIST_ENTRY(chashentry) next;
+ uint32_t value;
+ uint32_t type;
+ union {
+ uint32_t a4; /* Host format */
+ struct in6_addr a6; /* Network format */
+ } a;
+};
+
+struct ta_buf_chash
+{
+ void *ent_ptr;
+ struct chashentry ent;
+};
+
+#ifdef INET
+static __inline uint32_t hash_ip(uint32_t addr, int hsize);
+#endif
+#ifdef INET6
+static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize);
+static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize);
+static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key,
+ int mask, int hsize);
+static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask,
+ int hsize);
+#endif
+static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val);
+static int ta_lookup_chash_aligned(struct table_info *ti, void *key,
+ uint32_t keylen, uint32_t *val);
+static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val);
+static int chash_parse_opts(struct chash_cfg *cfg, char *data);
+static void ta_print_chash_config(void *ta_state, struct table_info *ti,
+ char *buf, size_t bufsize);
+static int ta_log2(uint32_t v);
+static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state,
+ struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_chash(void *ta_state, struct table_info *ti);
+static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti,
+ ipfw_ta_tinfo *tinfo);
+static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti,
+ void *e, ipfw_obj_tentry *tent);
+static uint32_t hash_ent(struct chashentry *ent, int af, int mlen,
+ uint32_t size);
+static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent);
+static int ta_find_chash_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent);
+static void ta_foreach_chash(void *ta_state, struct table_info *ti,
+ ta_foreach_f *f, void *arg);
+static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_add_chash(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_del_chash(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_need_modify_chash(void *ta_state, struct table_info *ti,
+ uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t *pflags);
+static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t pflags);
+static void ta_flush_mod_chash(void *ta_buf);
+
+
+#ifdef INET
+static __inline uint32_t
+hash_ip(uint32_t addr, int hsize)
+{
+
+ return (addr % (hsize - 1));
+}
+#endif
+
+#ifdef INET6
+static __inline uint32_t
+hash_ip6(struct in6_addr *addr6, int hsize)
+{
+ uint32_t i;
+
+ i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^
+ addr6->s6_addr32[2] ^ addr6->s6_addr32[3];
+
+ return (i % (hsize - 1));
+}
+
+
+static __inline uint16_t
+hash_ip64(struct in6_addr *addr6, int hsize)
+{
+ uint32_t i;
+
+ i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1];
+
+ return (i % (hsize - 1));
+}
+
+
+static __inline uint32_t
+hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize)
+{
+ struct in6_addr mask6;
+
+ ipv6_writemask(&mask6, mask);
+ memcpy(addr6, key, sizeof(struct in6_addr));
+ APPLY_MASK(addr6, &mask6);
+ return (hash_ip6(addr6, hsize));
+}
+
+static __inline uint32_t
+hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize)
+{
+ uint64_t *paddr;
+
+ paddr = (uint64_t *)addr6;
+ *paddr = 0;
+ *(paddr + 1) = 0;
+ memcpy(addr6, key, mask);
+ return (hash_ip6(addr6, hsize));
+}
+#endif
+
+static int
+ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct chashbhead *head;
+ struct chashentry *ent;
+ uint16_t hash, hsize;
+ uint8_t imask;
+
+ if (keylen == sizeof(in_addr_t)) {
+#ifdef INET
+ head = (struct chashbhead *)ti->state;
+ imask = ti->data >> 24;
+ hsize = 1 << ((ti->data & 0xFFFF) >> 8);
+ uint32_t a;
+ a = ntohl(*((in_addr_t *)key));
+ a = a >> imask;
+ hash = hash_ip(a, hsize);
+ SLIST_FOREACH(ent, &head[hash], next) {
+ if (ent->a.a4 == a) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+#endif
+ } else {
+#ifdef INET6
+ /* IPv6: worst scenario: non-round mask */
+ struct in6_addr addr6;
+ head = (struct chashbhead *)ti->xstate;
+ imask = (ti->data & 0xFF0000) >> 16;
+ hsize = 1 << (ti->data & 0xFF);
+ hash = hash_ip6_slow(&addr6, key, imask, hsize);
+ SLIST_FOREACH(ent, &head[hash], next) {
+ if (memcmp(&ent->a.a6, &addr6, 16) == 0) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+#endif
+ }
+
+ return (0);
+}
+
+static int
+ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct chashbhead *head;
+ struct chashentry *ent;
+ uint16_t hash, hsize;
+ uint8_t imask;
+
+ if (keylen == sizeof(in_addr_t)) {
+#ifdef INET
+ head = (struct chashbhead *)ti->state;
+ imask = ti->data >> 24;
+ hsize = 1 << ((ti->data & 0xFFFF) >> 8);
+ uint32_t a;
+ a = ntohl(*((in_addr_t *)key));
+ a = a >> imask;
+ hash = hash_ip(a, hsize);
+ SLIST_FOREACH(ent, &head[hash], next) {
+ if (ent->a.a4 == a) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+#endif
+ } else {
+#ifdef INET6
+ /* IPv6: aligned to 8bit mask */
+ struct in6_addr addr6;
+ uint64_t *paddr, *ptmp;
+ head = (struct chashbhead *)ti->xstate;
+ imask = (ti->data & 0xFF0000) >> 16;
+ hsize = 1 << (ti->data & 0xFF);
+
+ hash = hash_ip6_al(&addr6, key, imask, hsize);
+ paddr = (uint64_t *)&addr6;
+ SLIST_FOREACH(ent, &head[hash], next) {
+ ptmp = (uint64_t *)&ent->a.a6;
+ if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+#endif
+ }
+
+ return (0);
+}
+
+static int
+ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct chashbhead *head;
+ struct chashentry *ent;
+ uint16_t hash, hsize;
+ uint8_t imask;
+
+ if (keylen == sizeof(in_addr_t)) {
+#ifdef INET
+ head = (struct chashbhead *)ti->state;
+ imask = ti->data >> 24;
+ hsize = 1 << ((ti->data & 0xFFFF) >> 8);
+ uint32_t a;
+ a = ntohl(*((in_addr_t *)key));
+ a = a >> imask;
+ hash = hash_ip(a, hsize);
+ SLIST_FOREACH(ent, &head[hash], next) {
+ if (ent->a.a4 == a) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+#endif
+ } else {
+#ifdef INET6
+ /* IPv6: /64 */
+ uint64_t a6, *paddr;
+ head = (struct chashbhead *)ti->xstate;
+ paddr = (uint64_t *)key;
+ hsize = 1 << (ti->data & 0xFF);
+ a6 = *paddr;
+ hash = hash_ip64((struct in6_addr *)key, hsize);
+ SLIST_FOREACH(ent, &head[hash], next) {
+ paddr = (uint64_t *)&ent->a.a6;
+ if (a6 == *paddr) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+#endif
+ }
+
+ return (0);
+}
+
+static int
+chash_parse_opts(struct chash_cfg *cfg, char *data)
+{
+ char *pdel, *pend, *s;
+ int mask4, mask6;
+
+ mask4 = cfg->mask4;
+ mask6 = cfg->mask6;
+
+ if (data == NULL)
+ return (0);
+ if ((pdel = strchr(data, ' ')) == NULL)
+ return (0);
+ while (*pdel == ' ')
+ pdel++;
+ if (strncmp(pdel, "masks=", 6) != 0)
+ return (EINVAL);
+ if ((s = strchr(pdel, ' ')) != NULL)
+ *s++ = '\0';
+
+ pdel += 6;
+ /* Need /XX[,/YY] */
+ if (*pdel++ != '/')
+ return (EINVAL);
+ mask4 = strtol(pdel, &pend, 10);
+ if (*pend == ',') {
+ /* ,/YY */
+ pdel = pend + 1;
+ if (*pdel++ != '/')
+ return (EINVAL);
+ mask6 = strtol(pdel, &pend, 10);
+ if (*pend != '\0')
+ return (EINVAL);
+ } else if (*pend != '\0')
+ return (EINVAL);
+
+ if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128)
+ return (EINVAL);
+
+ cfg->mask4 = mask4;
+ cfg->mask6 = mask6;
+
+ return (0);
+}
+
+static void
+ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf,
+ size_t bufsize)
+{
+ struct chash_cfg *cfg;
+
+ cfg = (struct chash_cfg *)ta_state;
+
+ if (cfg->mask4 != 32 || cfg->mask6 != 128)
+ snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash",
+ cfg->mask4, cfg->mask6);
+ else
+ snprintf(buf, bufsize, "%s", "addr:hash");
+}
+
+static int
+ta_log2(uint32_t v)
+{
+ uint32_t r;
+
+ r = 0;
+ while (v >>= 1)
+ r++;
+
+ return (r);
+}
+
+/*
+ * New table.
+ * We assume 'data' to be either NULL or the following format:
+ * 'addr:hash [masks=/32[,/128]]'
+ */
+static int
+ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+ char *data, uint8_t tflags)
+{
+ int error, i;
+ uint32_t hsize;
+ struct chash_cfg *cfg;
+
+ cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+ cfg->mask4 = 32;
+ cfg->mask6 = 128;
+
+ if ((error = chash_parse_opts(cfg, data)) != 0) {
+ free(cfg, M_IPFW);
+ return (error);
+ }
+
+ cfg->size4 = 128;
+ cfg->size6 = 128;
+
+ cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW,
+ M_WAITOK | M_ZERO);
+ cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < cfg->size4; i++)
+ SLIST_INIT(&cfg->head4[i]);
+ for (i = 0; i < cfg->size6; i++)
+ SLIST_INIT(&cfg->head6[i]);
+
+
+ *ta_state = cfg;
+ ti->state = cfg->head4;
+ ti->xstate = cfg->head6;
+
+ /* Store data depending on v6 mask length */
+ hsize = ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
+ if (cfg->mask6 == 64) {
+ ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16|
+ hsize;
+ ti->lookup = ta_lookup_chash_64;
+ } else if ((cfg->mask6 % 8) == 0) {
+ ti->data = (32 - cfg->mask4) << 24 |
+ cfg->mask6 << 13 | hsize;
+ ti->lookup = ta_lookup_chash_aligned;
+ } else {
+ /* don't do that! */
+ ti->data = (32 - cfg->mask4) << 24 |
+ cfg->mask6 << 16 | hsize;
+ ti->lookup = ta_lookup_chash_slow;
+ }
+
+ return (0);
+}
+
+static void
+ta_destroy_chash(void *ta_state, struct table_info *ti)
+{
+ struct chash_cfg *cfg;
+ struct chashentry *ent, *ent_next;
+ int i;
+
+ cfg = (struct chash_cfg *)ta_state;
+
+ for (i = 0; i < cfg->size4; i++)
+ SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
+ free(ent, M_IPFW_TBL);
+
+ for (i = 0; i < cfg->size6; i++)
+ SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
+ free(ent, M_IPFW_TBL);
+
+ free(cfg->head4, M_IPFW);
+ free(cfg->head6, M_IPFW);
+
+ free(cfg, M_IPFW);
+}
+
+static void
+ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+ struct chash_cfg *cfg;
+
+ cfg = (struct chash_cfg *)ta_state;
+
+ tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
+ tinfo->taclass4 = IPFW_TACLASS_HASH;
+ tinfo->size4 = cfg->size4;
+ tinfo->count4 = cfg->items4;
+ tinfo->itemsize4 = sizeof(struct chashentry);
+ tinfo->taclass6 = IPFW_TACLASS_HASH;
+ tinfo->size6 = cfg->size6;
+ tinfo->count6 = cfg->items6;
+ tinfo->itemsize6 = sizeof(struct chashentry);
+}
+
+static int
+ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent)
+{
+ struct chash_cfg *cfg;
+ struct chashentry *ent;
+
+ cfg = (struct chash_cfg *)ta_state;
+ ent = (struct chashentry *)e;
+
+ if (ent->type == AF_INET) {
+ tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4));
+ tent->masklen = cfg->mask4;
+ tent->subtype = AF_INET;
+ tent->v.kidx = ent->value;
+#ifdef INET6
+ } else {
+ memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr));
+ tent->masklen = cfg->mask6;
+ tent->subtype = AF_INET6;
+ tent->v.kidx = ent->value;
+#endif
+ }
+
+ return (0);
+}
+
+static uint32_t
+hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size)
+{
+ uint32_t hash;
+
+ hash = 0;
+
+ if (af == AF_INET) {
+#ifdef INET
+ hash = hash_ip(ent->a.a4, size);
+#endif
+ } else {
+#ifdef INET6
+ if (mlen == 64)
+ hash = hash_ip64(&ent->a.a6, size);
+ else
+ hash = hash_ip6(&ent->a.a6, size);
+#endif
+ }
+
+ return (hash);
+}
+
+static int
+tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent)
+{
+ int mlen;
+#ifdef INET6
+ struct in6_addr mask6;
+#endif
+
+
+ mlen = tei->masklen;
+
+ if (tei->subtype == AF_INET) {
+#ifdef INET
+ if (mlen > 32)
+ return (EINVAL);
+ ent->type = AF_INET;
+
+ /* Calculate masked address */
+ ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen);
+#endif
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ /* IPv6 case */
+ if (mlen > 128)
+ return (EINVAL);
+ ent->type = AF_INET6;
+
+ ipv6_writemask(&mask6, mlen);
+ memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr));
+ APPLY_MASK(&ent->a.a6, &mask6);
+#endif
+ } else {
+ /* Unknown CIDR type */
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+ta_find_chash_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent)
+{
+ struct chash_cfg *cfg;
+ struct chashbhead *head;
+ struct chashentry ent, *tmp;
+ struct tentry_info tei;
+ int error;
+ uint32_t hash;
+
+ cfg = (struct chash_cfg *)ta_state;
+
+ memset(&ent, 0, sizeof(ent));
+ memset(&tei, 0, sizeof(tei));
+
+ if (tent->subtype == AF_INET) {
+ tei.paddr = &tent->k.addr;
+ tei.masklen = cfg->mask4;
+ tei.subtype = AF_INET;
+
+ if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
+ return (error);
+
+ head = cfg->head4;
+ hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4);
+ /* Check for existence */
+ SLIST_FOREACH(tmp, &head[hash], next) {
+ if (tmp->a.a4 != ent.a.a4)
+ continue;
+
+ ta_dump_chash_tentry(ta_state, ti, tmp, tent);
+ return (0);
+ }
+ } else {
+ tei.paddr = &tent->k.addr6;
+ tei.masklen = cfg->mask6;
+ tei.subtype = AF_INET6;
+
+ if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
+ return (error);
+
+ head = cfg->head6;
+ hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6);
+ /* Check for existence */
+ SLIST_FOREACH(tmp, &head[hash], next) {
+ if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0)
+ continue;
+ ta_dump_chash_tentry(ta_state, ti, tmp, tent);
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+static void
+ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg)
+{
+ struct chash_cfg *cfg;
+ struct chashentry *ent, *ent_next;
+ int i;
+
+ cfg = (struct chash_cfg *)ta_state;
+
+ for (i = 0; i < cfg->size4; i++)
+ SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
+ f(ent, arg);
+
+ for (i = 0; i < cfg->size6; i++)
+ SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
+ f(ent, arg);
+}
+
+static int
+ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_chash *tb;
+ struct chashentry *ent;
+ int error;
+
+ tb = (struct ta_buf_chash *)ta_buf;
+
+ ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
+
+ error = tei_to_chash_ent(tei, ent);
+ if (error != 0) {
+ free(ent, M_IPFW_TBL);
+ return (error);
+ }
+ tb->ent_ptr = ent;
+
+ return (0);
+}
+
+static int
+ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct chash_cfg *cfg;
+ struct chashbhead *head;
+ struct chashentry *ent, *tmp;
+ struct ta_buf_chash *tb;
+ int exists;
+ uint32_t hash, value;
+
+ cfg = (struct chash_cfg *)ta_state;
+ tb = (struct ta_buf_chash *)ta_buf;
+ ent = (struct chashentry *)tb->ent_ptr;
+ hash = 0;
+ exists = 0;
+
+ /* Read current value from @tei */
+ ent->value = tei->value;
+
+ /* Read cuurrent value */
+ if (tei->subtype == AF_INET) {
+ if (tei->masklen != cfg->mask4)
+ return (EINVAL);
+ head = cfg->head4;
+ hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
+
+ /* Check for existence */
+ SLIST_FOREACH(tmp, &head[hash], next) {
+ if (tmp->a.a4 == ent->a.a4) {
+ exists = 1;
+ break;
+ }
+ }
+ } else {
+ if (tei->masklen != cfg->mask6)
+ return (EINVAL);
+ head = cfg->head6;
+ hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
+ /* Check for existence */
+ SLIST_FOREACH(tmp, &head[hash], next) {
+ if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) {
+ exists = 1;
+ break;
+ }
+ }
+ }
+
+ if (exists == 1) {
+ if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+ return (EEXIST);
+ /* Record already exists. Update value if we're asked to */
+ value = tmp->value;
+ tmp->value = tei->value;
+ tei->value = value;
+ /* Indicate that update has happened instead of addition */
+ tei->flags |= TEI_FLAGS_UPDATED;
+ *pnum = 0;
+ } else {
+ if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+ return (EFBIG);
+ SLIST_INSERT_HEAD(&head[hash], ent, next);
+ tb->ent_ptr = NULL;
+ *pnum = 1;
+
+ /* Update counters */
+ if (tei->subtype == AF_INET)
+ cfg->items4++;
+ else
+ cfg->items6++;
+ }
+
+ return (0);
+}
+
+static int
+ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_chash *tb;
+
+ tb = (struct ta_buf_chash *)ta_buf;
+
+ return (tei_to_chash_ent(tei, &tb->ent));
+}
+
+static int
+ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct chash_cfg *cfg;
+ struct chashbhead *head;
+ struct chashentry *tmp, *tmp_next, *ent;
+ struct ta_buf_chash *tb;
+ uint32_t hash;
+
+ cfg = (struct chash_cfg *)ta_state;
+ tb = (struct ta_buf_chash *)ta_buf;
+ ent = &tb->ent;
+
+ if (tei->subtype == AF_INET) {
+ if (tei->masklen != cfg->mask4)
+ return (EINVAL);
+ head = cfg->head4;
+ hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
+
+ SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
+ if (tmp->a.a4 != ent->a.a4)
+ continue;
+
+ SLIST_REMOVE(&head[hash], tmp, chashentry, next);
+ cfg->items4--;
+ tb->ent_ptr = tmp;
+ tei->value = tmp->value;
+ *pnum = 1;
+ return (0);
+ }
+ } else {
+ if (tei->masklen != cfg->mask6)
+ return (EINVAL);
+ head = cfg->head6;
+ hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
+ SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
+ if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0)
+ continue;
+
+ SLIST_REMOVE(&head[hash], tmp, chashentry, next);
+ cfg->items6--;
+ tb->ent_ptr = tmp;
+ tei->value = tmp->value;
+ *pnum = 1;
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+static void
+ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_chash *tb;
+
+ tb = (struct ta_buf_chash *)ta_buf;
+
+ if (tb->ent_ptr != NULL)
+ free(tb->ent_ptr, M_IPFW_TBL);
+}
+
+/*
+ * Hash growing callbacks.
+ */
+
+static int
+ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count,
+ uint64_t *pflags)
+{
+ struct chash_cfg *cfg;
+ uint64_t data;
+
+ /*
+ * Since we don't know exact number of IPv4/IPv6 records in @count,
+ * ignore non-zero @count value at all. Check current hash sizes
+ * and return appropriate data.
+ */
+
+ cfg = (struct chash_cfg *)ta_state;
+
+ data = 0;
+ if (cfg->items4 > cfg->size4 && cfg->size4 < 65536)
+ data |= (cfg->size4 * 2) << 16;
+ if (cfg->items6 > cfg->size6 && cfg->size6 < 65536)
+ data |= cfg->size6 * 2;
+
+ if (data != 0) {
+ *pflags = data;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Allocate new, larger chash.
+ */
+static int
+ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags)
+{
+ struct mod_item *mi;
+ struct chashbhead *head;
+ int i;
+
+ mi = (struct mod_item *)ta_buf;
+
+ memset(mi, 0, sizeof(struct mod_item));
+ mi->size = (*pflags >> 16) & 0xFFFF;
+ mi->size6 = *pflags & 0xFFFF;
+ if (mi->size > 0) {
+ head = malloc(sizeof(struct chashbhead) * mi->size,
+ M_IPFW, M_WAITOK | M_ZERO);
+ for (i = 0; i < mi->size; i++)
+ SLIST_INIT(&head[i]);
+ mi->main_ptr = head;
+ }
+
+ if (mi->size6 > 0) {
+ head = malloc(sizeof(struct chashbhead) * mi->size6,
+ M_IPFW, M_WAITOK | M_ZERO);
+ for (i = 0; i < mi->size6; i++)
+ SLIST_INIT(&head[i]);
+ mi->main_ptr6 = head;
+ }
+
+ return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t *pflags)
+{
+
+ /* In is not possible to do rehash if we're not holidng WLOCK. */
+ return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t pflags)
+{
+ struct mod_item *mi;
+ struct chash_cfg *cfg;
+ struct chashbhead *old_head, *new_head;
+ struct chashentry *ent, *ent_next;
+ int af, i, mlen;
+ uint32_t nhash;
+ size_t old_size, new_size;
+
+ mi = (struct mod_item *)ta_buf;
+ cfg = (struct chash_cfg *)ta_state;
+
+ /* Check which hash we need to grow and do we still need that */
+ if (mi->size > 0 && cfg->size4 < mi->size) {
+ new_head = (struct chashbhead *)mi->main_ptr;
+ new_size = mi->size;
+ old_size = cfg->size4;
+ old_head = ti->state;
+ mlen = cfg->mask4;
+ af = AF_INET;
+
+ for (i = 0; i < old_size; i++) {
+ SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
+ nhash = hash_ent(ent, af, mlen, new_size);
+ SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
+ }
+ }
+
+ ti->state = new_head;
+ cfg->head4 = new_head;
+ cfg->size4 = mi->size;
+ mi->main_ptr = old_head;
+ }
+
+ if (mi->size6 > 0 && cfg->size6 < mi->size6) {
+ new_head = (struct chashbhead *)mi->main_ptr6;
+ new_size = mi->size6;
+ old_size = cfg->size6;
+ old_head = ti->xstate;
+ mlen = cfg->mask6;
+ af = AF_INET6;
+
+ for (i = 0; i < old_size; i++) {
+ SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
+ nhash = hash_ent(ent, af, mlen, new_size);
+ SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
+ }
+ }
+
+ ti->xstate = new_head;
+ cfg->head6 = new_head;
+ cfg->size6 = mi->size6;
+ mi->main_ptr6 = old_head;
+ }
+
+ /* Update lower 32 bits with new values */
+ ti->data &= 0xFFFFFFFF00000000;
+ ti->data |= ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_chash(void *ta_buf)
+{
+ struct mod_item *mi;
+
+ mi = (struct mod_item *)ta_buf;
+ if (mi->main_ptr != NULL)
+ free(mi->main_ptr, M_IPFW);
+ if (mi->main_ptr6 != NULL)
+ free(mi->main_ptr6, M_IPFW);
+}
+
+struct table_algo addr_hash = {
+ .name = "addr:hash",
+ .type = IPFW_TABLE_ADDR,
+ .ta_buf_size = sizeof(struct ta_buf_chash),
+ .init = ta_init_chash,
+ .destroy = ta_destroy_chash,
+ .prepare_add = ta_prepare_add_chash,
+ .prepare_del = ta_prepare_del_chash,
+ .add = ta_add_chash,
+ .del = ta_del_chash,
+ .flush_entry = ta_flush_chash_entry,
+ .foreach = ta_foreach_chash,
+ .dump_tentry = ta_dump_chash_tentry,
+ .find_tentry = ta_find_chash_tentry,
+ .print_config = ta_print_chash_config,
+ .dump_tinfo = ta_dump_chash_tinfo,
+ .need_modify = ta_need_modify_chash,
+ .prepare_mod = ta_prepare_mod_chash,
+ .fill_mod = ta_fill_mod_chash,
+ .modify = ta_modify_chash,
+ .flush_mod = ta_flush_mod_chash,
+};
+
+
+/*
+ * Iface table cmds.
+ *
+ * Implementation:
+ *
+ * Runtime part:
+ * - sorted array of "struct ifidx" pointed by ti->state.
+ * Array is allocated with rounding up to IFIDX_CHUNK. Only existing
+ * interfaces are stored in array, however its allocated size is
+ * sufficient to hold all table records if needed.
+ * - current array size is stored in ti->data
+ *
+ * Table data:
+ * - "struct iftable_cfg" is allocated to store table state (ta_state).
+ * - All table records are stored inside namedobj instance.
+ *
+ */
+
+struct ifidx {
+ uint16_t kidx;
+ uint16_t spare;
+ uint32_t value;
+};
+#define DEFAULT_IFIDX_SIZE 64
+
+struct iftable_cfg;
+
+struct ifentry {
+ struct named_object no;
+ struct ipfw_ifc ic;
+ struct iftable_cfg *icfg;
+ uint32_t value;
+ int linked;
+};
+
+struct iftable_cfg {
+ struct namedobj_instance *ii;
+ struct ip_fw_chain *ch;
+ struct table_info *ti;
+ void *main_ptr;
+ size_t size; /* Number of items allocated in array */
+ size_t count; /* Number of all items */
+ size_t used; /* Number of items _active_ now */
+};
+
+struct ta_buf_ifidx
+{
+ struct ifentry *ife;
+ uint32_t value;
+};
+
+int compare_ifidx(const void *k, const void *v);
+static struct ifidx * ifidx_find(struct table_info *ti, void *key);
+static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val);
+static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state,
+ struct table_info *ti, char *data, uint8_t tflags);
+static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti);
+static int destroy_ifidx_locked(struct namedobj_instance *ii,
+ struct named_object *no, void *arg);
+static void ta_destroy_ifidx(void *ta_state, struct table_info *ti);
+static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti,
+ ipfw_ta_tinfo *tinfo);
+static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_add_ifidx(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_del_ifidx(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_ifidx_entry(struct ip_fw_chain *ch,
+ struct tentry_info *tei, void *ta_buf);
+static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex);
+static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti,
+ uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti,
+ void *ta_buf, uint64_t *pflags);
+static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t pflags);
+static void ta_flush_mod_ifidx(void *ta_buf);
+static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent);
+static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent);
+static int foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
+ void *arg);
+static void ta_foreach_ifidx(void *ta_state, struct table_info *ti,
+ ta_foreach_f *f, void *arg);
+
+int
+compare_ifidx(const void *k, const void *v)
+{
+ const struct ifidx *ifidx;
+ uint16_t key;
+
+ key = *((const uint16_t *)k);
+ ifidx = (const struct ifidx *)v;
+
+ if (key < ifidx->kidx)
+ return (-1);
+ else if (key > ifidx->kidx)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Adds item @item with key @key into ascending-sorted array @base.
+ * Assumes @base has enough additional storage.
+ *
+ * Returns 1 on success, 0 on duplicate key.
+ */
+static int
+badd(const void *key, void *item, void *base, size_t nmemb,
+ size_t size, int (*compar) (const void *, const void *))
+{
+ int min, max, mid, shift, res;
+ caddr_t paddr;
+
+ if (nmemb == 0) {
+ memcpy(base, item, size);
+ return (1);
+ }
+
+ /* Binary search */
+ min = 0;
+ max = nmemb - 1;
+ mid = 0;
+ while (min <= max) {
+ mid = (min + max) / 2;
+ res = compar(key, (const void *)((caddr_t)base + mid * size));
+ if (res == 0)
+ return (0);
+
+ if (res > 0)
+ min = mid + 1;
+ else
+ max = mid - 1;
+ }
+
+ /* Item not found. */
+ res = compar(key, (const void *)((caddr_t)base + mid * size));
+ if (res > 0)
+ shift = mid + 1;
+ else
+ shift = mid;
+
+ paddr = (caddr_t)base + shift * size;
+ if (nmemb > shift)
+ memmove(paddr + size, paddr, (nmemb - shift) * size);
+
+ memcpy(paddr, item, size);
+
+ return (1);
+}
+
+/*
+ * Deletes item with key @key from ascending-sorted array @base.
+ *
+ * Returns 1 on success, 0 for non-existent key.
+ */
+static int
+bdel(const void *key, void *base, size_t nmemb, size_t size,
+ int (*compar) (const void *, const void *))
+{
+ caddr_t item;
+ size_t sz;
+
+ item = (caddr_t)bsearch(key, base, nmemb, size, compar);
+
+ if (item == NULL)
+ return (0);
+
+ sz = (caddr_t)base + nmemb * size - item;
+
+ if (sz > 0)
+ memmove(item, item + size, sz);
+
+ return (1);
+}
+
+static struct ifidx *
+ifidx_find(struct table_info *ti, void *key)
+{
+ struct ifidx *ifi;
+
+ ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx),
+ compare_ifidx);
+
+ return (ifi);
+}
+
+static int
+ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct ifidx *ifi;
+
+ ifi = ifidx_find(ti, key);
+
+ if (ifi != NULL) {
+ *val = ifi->value;
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+ char *data, uint8_t tflags)
+{
+ struct iftable_cfg *icfg;
+
+ icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+ icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE);
+ icfg->size = DEFAULT_IFIDX_SIZE;
+ icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW,
+ M_WAITOK | M_ZERO);
+ icfg->ch = ch;
+
+ *ta_state = icfg;
+ ti->state = icfg->main_ptr;
+ ti->lookup = ta_lookup_ifidx;
+
+ return (0);
+}
+
+/*
+ * Handle tableinfo @ti pointer change (on table array resize).
+ */
+static void
+ta_change_ti_ifidx(void *ta_state, struct table_info *ti)
+{
+ struct iftable_cfg *icfg;
+
+ icfg = (struct iftable_cfg *)ta_state;
+ icfg->ti = ti;
+}
+
+static int
+destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no,
+ void *arg)
+{
+ struct ifentry *ife;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ ife = (struct ifentry *)no;
+
+ ipfw_iface_del_notify(ch, &ife->ic);
+ ipfw_iface_unref(ch, &ife->ic);
+ free(ife, M_IPFW_TBL);
+ return (0);
+}
+
+
+/*
+ * Destroys table @ti
+ */
+static void
+ta_destroy_ifidx(void *ta_state, struct table_info *ti)
+{
+ struct iftable_cfg *icfg;
+ struct ip_fw_chain *ch;
+
+ icfg = (struct iftable_cfg *)ta_state;
+ ch = icfg->ch;
+
+ if (icfg->main_ptr != NULL)
+ free(icfg->main_ptr, M_IPFW);
+
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch);
+ IPFW_UH_WUNLOCK(ch);
+
+ ipfw_objhash_destroy(icfg->ii);
+
+ free(icfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+ struct iftable_cfg *cfg;
+
+ cfg = (struct iftable_cfg *)ta_state;
+
+ tinfo->taclass4 = IPFW_TACLASS_ARRAY;
+ tinfo->size4 = cfg->size;
+ tinfo->count4 = cfg->used;
+ tinfo->itemsize4 = sizeof(struct ifidx);
+}
+
+/*
+ * Prepare state to add to the table:
+ * allocate ifentry and reference needed interface.
+ */
+static int
+ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_ifidx *tb;
+ char *ifname;
+ struct ifentry *ife;
+
+ tb = (struct ta_buf_ifidx *)ta_buf;
+
+ /* Check if string is terminated */
+ ifname = (char *)tei->paddr;
+ if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
+ return (EINVAL);
+
+ ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO);
+ ife->ic.cb = if_notifier;
+ ife->ic.cbdata = ife;
+
+ if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) {
+ free(ife, M_IPFW_TBL);
+ return (EINVAL);
+ }
+
+ /* Use ipfw_iface 'ifname' field as stable storage */
+ ife->no.name = ife->ic.iface->ifname;
+
+ tb->ife = ife;
+
+ return (0);
+}
+
+static int
+ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct iftable_cfg *icfg;
+ struct ifentry *ife, *tmp;
+ struct ta_buf_ifidx *tb;
+ struct ipfw_iface *iif;
+ struct ifidx *ifi;
+ char *ifname;
+ uint32_t value;
+
+ tb = (struct ta_buf_ifidx *)ta_buf;
+ ifname = (char *)tei->paddr;
+ icfg = (struct iftable_cfg *)ta_state;
+ ife = tb->ife;
+
+ ife->icfg = icfg;
+ ife->value = tei->value;
+
+ tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
+
+ if (tmp != NULL) {
+ if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+ return (EEXIST);
+
+ /* Exchange values in @tmp and @tei */
+ value = tmp->value;
+ tmp->value = tei->value;
+ tei->value = value;
+
+ iif = tmp->ic.iface;
+ if (iif->resolved != 0) {
+ /* We have to update runtime value, too */
+ ifi = ifidx_find(ti, &iif->ifindex);
+ ifi->value = ife->value;
+ }
+
+ /* Indicate that update has happened instead of addition */
+ tei->flags |= TEI_FLAGS_UPDATED;
+ *pnum = 0;
+ return (0);
+ }
+
+ if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+ return (EFBIG);
+
+ /* Link to internal list */
+ ipfw_objhash_add(icfg->ii, &ife->no);
+
+ /* Link notifier (possible running its callback) */
+ ipfw_iface_add_notify(icfg->ch, &ife->ic);
+ icfg->count++;
+
+ tb->ife = NULL;
+ *pnum = 1;
+
+ return (0);
+}
+
+/*
+ * Prepare to delete key from table.
+ * Do basic interface name checks.
+ */
+static int
+ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_ifidx *tb;
+ char *ifname;
+
+ tb = (struct ta_buf_ifidx *)ta_buf;
+
+ /* Check if string is terminated */
+ ifname = (char *)tei->paddr;
+ if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
+ return (EINVAL);
+
+ return (0);
+}
+
+/*
+ * Remove key from both configuration list and
+ * runtime array. Removed interface notification.
+ */
+static int
+ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct iftable_cfg *icfg;
+ struct ifentry *ife;
+ struct ta_buf_ifidx *tb;
+ char *ifname;
+ uint16_t ifindex;
+ int res;
+
+ tb = (struct ta_buf_ifidx *)ta_buf;
+ ifname = (char *)tei->paddr;
+ icfg = (struct iftable_cfg *)ta_state;
+ ife = tb->ife;
+
+ ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
+
+ if (ife == NULL)
+ return (ENOENT);
+
+ if (ife->linked != 0) {
+ /* We have to remove item from runtime */
+ ifindex = ife->ic.iface->ifindex;
+
+ res = bdel(&ifindex, icfg->main_ptr, icfg->used,
+ sizeof(struct ifidx), compare_ifidx);
+
+ KASSERT(res == 1, ("index %d does not exist", ifindex));
+ icfg->used--;
+ ti->data = icfg->used;
+ ife->linked = 0;
+ }
+
+ /* Unlink from local list */
+ ipfw_objhash_del(icfg->ii, &ife->no);
+ /* Unlink notifier and deref */
+ ipfw_iface_del_notify(icfg->ch, &ife->ic);
+ ipfw_iface_unref(icfg->ch, &ife->ic);
+
+ icfg->count--;
+ tei->value = ife->value;
+
+ tb->ife = ife;
+ *pnum = 1;
+
+ return (0);
+}
+
+/*
+ * Flush deleted entry.
+ * Drops interface reference and frees entry.
+ */
+static void
+ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_ifidx *tb;
+
+ tb = (struct ta_buf_ifidx *)ta_buf;
+
+ if (tb->ife != NULL)
+ free(tb->ife, M_IPFW_TBL);
+}
+
+
+/*
+ * Handle interface announce/withdrawal for particular table.
+ * Every real runtime array modification happens here.
+ */
+static void
+if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex)
+{
+ struct ifentry *ife;
+ struct ifidx ifi;
+ struct iftable_cfg *icfg;
+ struct table_info *ti;
+ int res;
+
+ ife = (struct ifentry *)cbdata;
+ icfg = ife->icfg;
+ ti = icfg->ti;
+
+ KASSERT(ti != NULL, ("ti=NULL, check change_ti handler"));
+
+ if (ife->linked == 0 && ifindex != 0) {
+ /* Interface announce */
+ ifi.kidx = ifindex;
+ ifi.spare = 0;
+ ifi.value = ife->value;
+ res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used,
+ sizeof(struct ifidx), compare_ifidx);
+ KASSERT(res == 1, ("index %d already exists", ifindex));
+ icfg->used++;
+ ti->data = icfg->used;
+ ife->linked = 1;
+ } else if (ife->linked != 0 && ifindex == 0) {
+ /* Interface withdrawal */
+ ifindex = ife->ic.iface->ifindex;
+
+ res = bdel(&ifindex, icfg->main_ptr, icfg->used,
+ sizeof(struct ifidx), compare_ifidx);
+
+ KASSERT(res == 1, ("index %d does not exist", ifindex));
+ icfg->used--;
+ ti->data = icfg->used;
+ ife->linked = 0;
+ }
+}
+
+
+/*
+ * Table growing callbacks.
+ */
+
+static int
+ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count,
+ uint64_t *pflags)
+{
+ struct iftable_cfg *cfg;
+ uint32_t size;
+
+ cfg = (struct iftable_cfg *)ta_state;
+
+ size = cfg->size;
+ while (size < cfg->count + count)
+ size *= 2;
+
+ if (size != cfg->size) {
+ *pflags = size;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Allocate ned, larger runtime ifidx array.
+ */
+static int
+ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags)
+{
+ struct mod_item *mi;
+
+ mi = (struct mod_item *)ta_buf;
+
+ memset(mi, 0, sizeof(struct mod_item));
+ mi->size = *pflags;
+ mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t *pflags)
+{
+ struct mod_item *mi;
+ struct iftable_cfg *icfg;
+
+ mi = (struct mod_item *)ta_buf;
+ icfg = (struct iftable_cfg *)ta_state;
+
+ /* Check if we still need to grow array */
+ if (icfg->size >= mi->size) {
+ *pflags = 0;
+ return (0);
+ }
+
+ memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx));
+
+ return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t pflags)
+{
+ struct mod_item *mi;
+ struct iftable_cfg *icfg;
+ void *old_ptr;
+
+ mi = (struct mod_item *)ta_buf;
+ icfg = (struct iftable_cfg *)ta_state;
+
+ old_ptr = icfg->main_ptr;
+ icfg->main_ptr = mi->main_ptr;
+ icfg->size = mi->size;
+ ti->state = icfg->main_ptr;
+
+ mi->main_ptr = old_ptr;
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_ifidx(void *ta_buf)
+{
+ struct mod_item *mi;
+
+ mi = (struct mod_item *)ta_buf;
+ if (mi->main_ptr != NULL)
+ free(mi->main_ptr, M_IPFW);
+}
+
+static int
+ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent)
+{
+ struct ifentry *ife;
+
+ ife = (struct ifentry *)e;
+
+ tent->masklen = 8 * IF_NAMESIZE;
+ memcpy(&tent->k, ife->no.name, IF_NAMESIZE);
+ tent->v.kidx = ife->value;
+
+ return (0);
+}
+
+static int
+ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent)
+{
+ struct iftable_cfg *icfg;
+ struct ifentry *ife;
+ char *ifname;
+
+ icfg = (struct iftable_cfg *)ta_state;
+ ifname = tent->k.iface;
+
+ if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
+ return (EINVAL);
+
+ ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
+
+ if (ife != NULL) {
+ ta_dump_ifidx_tentry(ta_state, ti, ife, tent);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+struct wa_ifidx {
+ ta_foreach_f *f;
+ void *arg;
+};
+
+static int
+foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
+ void *arg)
+{
+ struct ifentry *ife;
+ struct wa_ifidx *wa;
+
+ ife = (struct ifentry *)no;
+ wa = (struct wa_ifidx *)arg;
+
+ wa->f(ife, wa->arg);
+ return (0);
+}
+
+static void
+ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg)
+{
+ struct iftable_cfg *icfg;
+ struct wa_ifidx wa;
+
+ icfg = (struct iftable_cfg *)ta_state;
+
+ wa.f = f;
+ wa.arg = arg;
+
+ ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa);
+}
+
+struct table_algo iface_idx = {
+ .name = "iface:array",
+ .type = IPFW_TABLE_INTERFACE,
+ .flags = TA_FLAG_DEFAULT,
+ .ta_buf_size = sizeof(struct ta_buf_ifidx),
+ .init = ta_init_ifidx,
+ .destroy = ta_destroy_ifidx,
+ .prepare_add = ta_prepare_add_ifidx,
+ .prepare_del = ta_prepare_del_ifidx,
+ .add = ta_add_ifidx,
+ .del = ta_del_ifidx,
+ .flush_entry = ta_flush_ifidx_entry,
+ .foreach = ta_foreach_ifidx,
+ .dump_tentry = ta_dump_ifidx_tentry,
+ .find_tentry = ta_find_ifidx_tentry,
+ .dump_tinfo = ta_dump_ifidx_tinfo,
+ .need_modify = ta_need_modify_ifidx,
+ .prepare_mod = ta_prepare_mod_ifidx,
+ .fill_mod = ta_fill_mod_ifidx,
+ .modify = ta_modify_ifidx,
+ .flush_mod = ta_flush_mod_ifidx,
+ .change_ti = ta_change_ti_ifidx,
+};
+
+/*
+ * Number array cmds.
+ *
+ * Implementation:
+ *
+ * Runtime part:
+ * - sorted array of "struct numarray" pointed by ti->state.
+ * Array is allocated with rounding up to NUMARRAY_CHUNK.
+ * - current array size is stored in ti->data
+ *
+ */
+
+struct numarray {
+ uint32_t number;
+ uint32_t value;
+};
+
+struct numarray_cfg {
+ void *main_ptr;
+ size_t size; /* Number of items allocated in array */
+ size_t used; /* Number of items _active_ now */
+};
+
+struct ta_buf_numarray
+{
+ struct numarray na;
+};
+
+int compare_numarray(const void *k, const void *v);
+static struct numarray *numarray_find(struct table_info *ti, void *key);
+static int ta_lookup_numarray(struct table_info *ti, void *key,
+ uint32_t keylen, uint32_t *val);
+static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state,
+ struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_numarray(void *ta_state, struct table_info *ti);
+static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti,
+ ipfw_ta_tinfo *tinfo);
+static int ta_prepare_add_numarray(struct ip_fw_chain *ch,
+ struct tentry_info *tei, void *ta_buf);
+static int ta_add_numarray(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_del_numarray(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_numarray_entry(struct ip_fw_chain *ch,
+ struct tentry_info *tei, void *ta_buf);
+static int ta_need_modify_numarray(void *ta_state, struct table_info *ti,
+ uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti,
+ void *ta_buf, uint64_t *pflags);
+static void ta_modify_numarray(void *ta_state, struct table_info *ti,
+ void *ta_buf, uint64_t pflags);
+static void ta_flush_mod_numarray(void *ta_buf);
+static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti,
+ void *e, ipfw_obj_tentry *tent);
+static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent);
+static void ta_foreach_numarray(void *ta_state, struct table_info *ti,
+ ta_foreach_f *f, void *arg);
+
+int
+compare_numarray(const void *k, const void *v)
+{
+ const struct numarray *na;
+ uint32_t key;
+
+ key = *((const uint32_t *)k);
+ na = (const struct numarray *)v;
+
+ if (key < na->number)
+ return (-1);
+ else if (key > na->number)
+ return (1);
+
+ return (0);
+}
+
+static struct numarray *
+numarray_find(struct table_info *ti, void *key)
+{
+ struct numarray *ri;
+
+ ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray),
+ compare_ifidx);
+
+ return (ri);
+}
+
+static int
+ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct numarray *ri;
+
+ ri = numarray_find(ti, key);
+
+ if (ri != NULL) {
+ *val = ri->value;
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+ char *data, uint8_t tflags)
+{
+ struct numarray_cfg *cfg;
+
+ cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+ cfg->size = 16;
+ cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ *ta_state = cfg;
+ ti->state = cfg->main_ptr;
+ ti->lookup = ta_lookup_numarray;
+
+ return (0);
+}
+
+/*
+ * Destroys table @ti
+ */
+static void
+ta_destroy_numarray(void *ta_state, struct table_info *ti)
+{
+ struct numarray_cfg *cfg;
+
+ cfg = (struct numarray_cfg *)ta_state;
+
+ if (cfg->main_ptr != NULL)
+ free(cfg->main_ptr, M_IPFW);
+
+ free(cfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+ struct numarray_cfg *cfg;
+
+ cfg = (struct numarray_cfg *)ta_state;
+
+ tinfo->taclass4 = IPFW_TACLASS_ARRAY;
+ tinfo->size4 = cfg->size;
+ tinfo->count4 = cfg->used;
+ tinfo->itemsize4 = sizeof(struct numarray);
+}
+
+/*
+ * Prepare for addition/deletion to an array.
+ */
+static int
+ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_numarray *tb;
+
+ tb = (struct ta_buf_numarray *)ta_buf;
+
+ tb->na.number = *((uint32_t *)tei->paddr);
+
+ return (0);
+}
+
+static int
+ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct numarray_cfg *cfg;
+ struct ta_buf_numarray *tb;
+ struct numarray *ri;
+ int res;
+ uint32_t value;
+
+ tb = (struct ta_buf_numarray *)ta_buf;
+ cfg = (struct numarray_cfg *)ta_state;
+
+ /* Read current value from @tei */
+ tb->na.value = tei->value;
+
+ ri = numarray_find(ti, &tb->na.number);
+
+ if (ri != NULL) {
+ if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+ return (EEXIST);
+
+ /* Exchange values between ri and @tei */
+ value = ri->value;
+ ri->value = tei->value;
+ tei->value = value;
+ /* Indicate that update has happened instead of addition */
+ tei->flags |= TEI_FLAGS_UPDATED;
+ *pnum = 0;
+ return (0);
+ }
+
+ if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+ return (EFBIG);
+
+ res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used,
+ sizeof(struct numarray), compare_numarray);
+
+ KASSERT(res == 1, ("number %d already exists", tb->na.number));
+ cfg->used++;
+ ti->data = cfg->used;
+ *pnum = 1;
+
+ return (0);
+}
+
+/*
+ * Remove key from both configuration list and
+ * runtime array. Removed interface notification.
+ */
+static int
+ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct numarray_cfg *cfg;
+ struct ta_buf_numarray *tb;
+ struct numarray *ri;
+ int res;
+
+ tb = (struct ta_buf_numarray *)ta_buf;
+ cfg = (struct numarray_cfg *)ta_state;
+
+ ri = numarray_find(ti, &tb->na.number);
+ if (ri == NULL)
+ return (ENOENT);
+
+ tei->value = ri->value;
+
+ res = bdel(&tb->na.number, cfg->main_ptr, cfg->used,
+ sizeof(struct numarray), compare_numarray);
+
+ KASSERT(res == 1, ("number %u does not exist", tb->na.number));
+ cfg->used--;
+ ti->data = cfg->used;
+ *pnum = 1;
+
+ return (0);
+}
+
+static void
+ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+
+ /* We don't have any state, do nothing */
+}
+
+
+/*
+ * Table growing callbacks.
+ */
+
+static int
+ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count,
+ uint64_t *pflags)
+{
+ struct numarray_cfg *cfg;
+ size_t size;
+
+ cfg = (struct numarray_cfg *)ta_state;
+
+ size = cfg->size;
+ while (size < cfg->used + count)
+ size *= 2;
+
+ if (size != cfg->size) {
+ *pflags = size;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Allocate new, larger runtime array.
+ */
+static int
+ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags)
+{
+ struct mod_item *mi;
+
+ mi = (struct mod_item *)ta_buf;
+
+ memset(mi, 0, sizeof(struct mod_item));
+ mi->size = *pflags;
+ mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t *pflags)
+{
+ struct mod_item *mi;
+ struct numarray_cfg *cfg;
+
+ mi = (struct mod_item *)ta_buf;
+ cfg = (struct numarray_cfg *)ta_state;
+
+ /* Check if we still need to grow array */
+ if (cfg->size >= mi->size) {
+ *pflags = 0;
+ return (0);
+ }
+
+ memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray));
+
+ return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t pflags)
+{
+ struct mod_item *mi;
+ struct numarray_cfg *cfg;
+ void *old_ptr;
+
+ mi = (struct mod_item *)ta_buf;
+ cfg = (struct numarray_cfg *)ta_state;
+
+ old_ptr = cfg->main_ptr;
+ cfg->main_ptr = mi->main_ptr;
+ cfg->size = mi->size;
+ ti->state = cfg->main_ptr;
+
+ mi->main_ptr = old_ptr;
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_numarray(void *ta_buf)
+{
+ struct mod_item *mi;
+
+ mi = (struct mod_item *)ta_buf;
+ if (mi->main_ptr != NULL)
+ free(mi->main_ptr, M_IPFW);
+}
+
+static int
+ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent)
+{
+ struct numarray *na;
+
+ na = (struct numarray *)e;
+
+ tent->k.key = na->number;
+ tent->v.kidx = na->value;
+
+ return (0);
+}
+
+static int
+ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent)
+{
+ struct numarray_cfg *cfg;
+ struct numarray *ri;
+
+ cfg = (struct numarray_cfg *)ta_state;
+
+ ri = numarray_find(ti, &tent->k.key);
+
+ if (ri != NULL) {
+ ta_dump_numarray_tentry(ta_state, ti, ri, tent);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+static void
+ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg)
+{
+ struct numarray_cfg *cfg;
+ struct numarray *array;
+ int i;
+
+ cfg = (struct numarray_cfg *)ta_state;
+ array = cfg->main_ptr;
+
+ for (i = 0; i < cfg->used; i++)
+ f(&array[i], arg);
+}
+
+struct table_algo number_array = {
+ .name = "number:array",
+ .type = IPFW_TABLE_NUMBER,
+ .ta_buf_size = sizeof(struct ta_buf_numarray),
+ .init = ta_init_numarray,
+ .destroy = ta_destroy_numarray,
+ .prepare_add = ta_prepare_add_numarray,
+ .prepare_del = ta_prepare_add_numarray,
+ .add = ta_add_numarray,
+ .del = ta_del_numarray,
+ .flush_entry = ta_flush_numarray_entry,
+ .foreach = ta_foreach_numarray,
+ .dump_tentry = ta_dump_numarray_tentry,
+ .find_tentry = ta_find_numarray_tentry,
+ .dump_tinfo = ta_dump_numarray_tinfo,
+ .need_modify = ta_need_modify_numarray,
+ .prepare_mod = ta_prepare_mod_numarray,
+ .fill_mod = ta_fill_mod_numarray,
+ .modify = ta_modify_numarray,
+ .flush_mod = ta_flush_mod_numarray,
+};
+
+/*
+ * flow:hash cmds
+ *
+ *
+ * ti->data:
+ * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
+ * [ 8][ 8[ 8][ 8]
+ *
+ * inv.mask4: 32 - mask
+ * inv.mask6:
+ * 1) _slow lookup: mask
+ * 2) _aligned: (128 - mask) / 8
+ * 3) _64: 8
+ *
+ *
+ * pflags:
+ * [hsize4][hsize6]
+ * [ 16][ 16]
+ */
+
+struct fhashentry;
+
+SLIST_HEAD(fhashbhead, fhashentry);
+
+struct fhashentry {
+ SLIST_ENTRY(fhashentry) next;
+ uint8_t af;
+ uint8_t proto;
+ uint16_t spare0;
+ uint16_t dport;
+ uint16_t sport;
+ uint32_t value;
+ uint32_t spare1;
+};
+
+struct fhashentry4 {
+ struct fhashentry e;
+ struct in_addr dip;
+ struct in_addr sip;
+};
+
+struct fhashentry6 {
+ struct fhashentry e;
+ struct in6_addr dip6;
+ struct in6_addr sip6;
+};
+
+struct fhash_cfg {
+ struct fhashbhead *head;
+ size_t size;
+ size_t items;
+ struct fhashentry4 fe4;
+ struct fhashentry6 fe6;
+};
+
+struct ta_buf_fhash {
+ void *ent_ptr;
+ struct fhashentry6 fe6;
+};
+
+static __inline int cmp_flow_ent(struct fhashentry *a,
+ struct fhashentry *b, size_t sz);
+static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize);
+static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize);
+static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size);
+static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val);
+static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state,
+struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_fhash(void *ta_state, struct table_info *ti);
+static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti,
+ ipfw_ta_tinfo *tinfo);
+static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti,
+ void *e, ipfw_obj_tentry *tent);
+static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent);
+static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent);
+static void ta_foreach_fhash(void *ta_state, struct table_info *ti,
+ ta_foreach_f *f, void *arg);
+static int ta_prepare_add_fhash(struct ip_fw_chain *ch,
+ struct tentry_info *tei, void *ta_buf);
+static int ta_add_fhash(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_del_fhash(void *ta_state, struct table_info *ti,
+ struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
+static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf);
+static int ta_need_modify_fhash(void *ta_state, struct table_info *ti,
+ uint32_t count, uint64_t *pflags);
+static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags);
+static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti,
+ void *ta_buf, uint64_t *pflags);
+static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t pflags);
+static void ta_flush_mod_fhash(void *ta_buf);
+
+static __inline int
+cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz)
+{
+ uint64_t *ka, *kb;
+
+ ka = (uint64_t *)(&a->next + 1);
+ kb = (uint64_t *)(&b->next + 1);
+
+ if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0))
+ return (1);
+
+ return (0);
+}
+
+static __inline uint32_t
+hash_flow4(struct fhashentry4 *f, int hsize)
+{
+ uint32_t i;
+
+ i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport);
+
+ return (i % (hsize - 1));
+}
+
+static __inline uint32_t
+hash_flow6(struct fhashentry6 *f, int hsize)
+{
+ uint32_t i;
+
+ i = (f->dip6.__u6_addr.__u6_addr32[2]) ^
+ (f->dip6.__u6_addr.__u6_addr32[3]) ^
+ (f->sip6.__u6_addr.__u6_addr32[2]) ^
+ (f->sip6.__u6_addr.__u6_addr32[3]) ^
+ (f->e.dport) ^ (f->e.sport);
+
+ return (i % (hsize - 1));
+}
+
+static uint32_t
+hash_flow_ent(struct fhashentry *ent, uint32_t size)
+{
+ uint32_t hash;
+
+ if (ent->af == AF_INET) {
+ hash = hash_flow4((struct fhashentry4 *)ent, size);
+ } else {
+ hash = hash_flow6((struct fhashentry6 *)ent, size);
+ }
+
+ return (hash);
+}
+
+static int
+ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+ struct fhashbhead *head;
+ struct fhashentry *ent;
+ struct fhashentry4 *m4;
+ struct ipfw_flow_id *id;
+ uint16_t hash, hsize;
+
+ id = (struct ipfw_flow_id *)key;
+ head = (struct fhashbhead *)ti->state;
+ hsize = ti->data;
+ m4 = (struct fhashentry4 *)ti->xstate;
+
+ if (id->addr_type == 4) {
+ struct fhashentry4 f;
+
+ /* Copy hash mask */
+ f = *m4;
+
+ f.dip.s_addr &= id->dst_ip;
+ f.sip.s_addr &= id->src_ip;
+ f.e.dport &= id->dst_port;
+ f.e.sport &= id->src_port;
+ f.e.proto &= id->proto;
+ hash = hash_flow4(&f, hsize);
+ SLIST_FOREACH(ent, &head[hash], next) {
+ if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+ } else if (id->addr_type == 6) {
+ struct fhashentry6 f;
+ uint64_t *fp, *idp;
+
+ /* Copy hash mask */
+ f = *((struct fhashentry6 *)(m4 + 1));
+
+ /* Handle lack of __u6_addr.__u6_addr64 */
+ fp = (uint64_t *)&f.dip6;
+ idp = (uint64_t *)&id->dst_ip6;
+ /* src IPv6 is stored after dst IPv6 */
+ *fp++ &= *idp++;
+ *fp++ &= *idp++;
+ *fp++ &= *idp++;
+ *fp &= *idp;
+ f.e.dport &= id->dst_port;
+ f.e.sport &= id->src_port;
+ f.e.proto &= id->proto;
+ hash = hash_flow6(&f, hsize);
+ SLIST_FOREACH(ent, &head[hash], next) {
+ if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) {
+ *val = ent->value;
+ return (1);
+ }
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * New table.
+ */
+static int
+ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+ char *data, uint8_t tflags)
+{
+ int i;
+ struct fhash_cfg *cfg;
+ struct fhashentry4 *fe4;
+ struct fhashentry6 *fe6;
+
+ cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO);
+
+ cfg->size = 512;
+
+ cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < cfg->size; i++)
+ SLIST_INIT(&cfg->head[i]);
+
+ /* Fill in fe masks based on @tflags */
+ fe4 = &cfg->fe4;
+ fe6 = &cfg->fe6;
+ if (tflags & IPFW_TFFLAG_SRCIP) {
+ memset(&fe4->sip, 0xFF, sizeof(fe4->sip));
+ memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6));
+ }
+ if (tflags & IPFW_TFFLAG_DSTIP) {
+ memset(&fe4->dip, 0xFF, sizeof(fe4->dip));
+ memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6));
+ }
+ if (tflags & IPFW_TFFLAG_SRCPORT) {
+ memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport));
+ memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport));
+ }
+ if (tflags & IPFW_TFFLAG_DSTPORT) {
+ memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport));
+ memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport));
+ }
+ if (tflags & IPFW_TFFLAG_PROTO) {
+ memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto));
+ memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto));
+ }
+
+ fe4->e.af = AF_INET;
+ fe6->e.af = AF_INET6;
+
+ *ta_state = cfg;
+ ti->state = cfg->head;
+ ti->xstate = &cfg->fe4;
+ ti->data = cfg->size;
+ ti->lookup = ta_lookup_fhash;
+
+ return (0);
+}
+
+static void
+ta_destroy_fhash(void *ta_state, struct table_info *ti)
+{
+ struct fhash_cfg *cfg;
+ struct fhashentry *ent, *ent_next;
+ int i;
+
+ cfg = (struct fhash_cfg *)ta_state;
+
+ for (i = 0; i < cfg->size; i++)
+ SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
+ free(ent, M_IPFW_TBL);
+
+ free(cfg->head, M_IPFW);
+ free(cfg, M_IPFW);
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+ struct fhash_cfg *cfg;
+
+ cfg = (struct fhash_cfg *)ta_state;
+
+ tinfo->flags = IPFW_TATFLAGS_AFITEM;
+ tinfo->taclass4 = IPFW_TACLASS_HASH;
+ tinfo->size4 = cfg->size;
+ tinfo->count4 = cfg->items;
+ tinfo->itemsize4 = sizeof(struct fhashentry4);
+ tinfo->itemsize6 = sizeof(struct fhashentry6);
+}
+
+static int
+ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent)
+{
+ struct fhash_cfg *cfg;
+ struct fhashentry *ent;
+ struct fhashentry4 *fe4;
+#ifdef INET6
+ struct fhashentry6 *fe6;
+#endif
+ struct tflow_entry *tfe;
+
+ cfg = (struct fhash_cfg *)ta_state;
+ ent = (struct fhashentry *)e;
+ tfe = &tent->k.flow;
+
+ tfe->af = ent->af;
+ tfe->proto = ent->proto;
+ tfe->dport = htons(ent->dport);
+ tfe->sport = htons(ent->sport);
+ tent->v.kidx = ent->value;
+ tent->subtype = ent->af;
+
+ if (ent->af == AF_INET) {
+ fe4 = (struct fhashentry4 *)ent;
+ tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr);
+ tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr);
+ tent->masklen = 32;
+#ifdef INET6
+ } else {
+ fe6 = (struct fhashentry6 *)ent;
+ tfe->a.a6.sip6 = fe6->sip6;
+ tfe->a.a6.dip6 = fe6->dip6;
+ tent->masklen = 128;
+#endif
+ }
+
+ return (0);
+}
+
+static int
+tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent)
+{
+#ifdef INET
+ struct fhashentry4 *fe4;
+#endif
+#ifdef INET6
+ struct fhashentry6 *fe6;
+#endif
+ struct tflow_entry *tfe;
+
+ tfe = (struct tflow_entry *)tei->paddr;
+
+ ent->af = tei->subtype;
+ ent->proto = tfe->proto;
+ ent->dport = ntohs(tfe->dport);
+ ent->sport = ntohs(tfe->sport);
+
+ if (tei->subtype == AF_INET) {
+#ifdef INET
+ fe4 = (struct fhashentry4 *)ent;
+ fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr);
+ fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr);
+#endif
+#ifdef INET6
+ } else if (tei->subtype == AF_INET6) {
+ fe6 = (struct fhashentry6 *)ent;
+ fe6->sip6 = tfe->a.a6.sip6;
+ fe6->dip6 = tfe->a.a6.dip6;
+#endif
+ } else {
+ /* Unknown CIDR type */
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+
+static int
+ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent)
+{
+ struct fhash_cfg *cfg;
+ struct fhashbhead *head;
+ struct fhashentry *ent, *tmp;
+ struct fhashentry6 fe6;
+ struct tentry_info tei;
+ int error;
+ uint32_t hash;
+ size_t sz;
+
+ cfg = (struct fhash_cfg *)ta_state;
+
+ ent = &fe6.e;
+
+ memset(&fe6, 0, sizeof(fe6));
+ memset(&tei, 0, sizeof(tei));
+
+ tei.paddr = &tent->k.flow;
+ tei.subtype = tent->subtype;
+
+ if ((error = tei_to_fhash_ent(&tei, ent)) != 0)
+ return (error);
+
+ head = cfg->head;
+ hash = hash_flow_ent(ent, cfg->size);
+
+ if (tei.subtype == AF_INET)
+ sz = 2 * sizeof(struct in_addr);
+ else
+ sz = 2 * sizeof(struct in6_addr);
+
+ /* Check for existence */
+ SLIST_FOREACH(tmp, &head[hash], next) {
+ if (cmp_flow_ent(tmp, ent, sz) != 0) {
+ ta_dump_fhash_tentry(ta_state, ti, tmp, tent);
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+static void
+ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg)
+{
+ struct fhash_cfg *cfg;
+ struct fhashentry *ent, *ent_next;
+ int i;
+
+ cfg = (struct fhash_cfg *)ta_state;
+
+ for (i = 0; i < cfg->size; i++)
+ SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
+ f(ent, arg);
+}
+
+static int
+ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_fhash *tb;
+ struct fhashentry *ent;
+ size_t sz;
+ int error;
+
+ tb = (struct ta_buf_fhash *)ta_buf;
+
+ if (tei->subtype == AF_INET)
+ sz = sizeof(struct fhashentry4);
+ else if (tei->subtype == AF_INET6)
+ sz = sizeof(struct fhashentry6);
+ else
+ return (EINVAL);
+
+ ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO);
+
+ error = tei_to_fhash_ent(tei, ent);
+ if (error != 0) {
+ free(ent, M_IPFW_TBL);
+ return (error);
+ }
+ tb->ent_ptr = ent;
+
+ return (0);
+}
+
+static int
+ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct fhash_cfg *cfg;
+ struct fhashbhead *head;
+ struct fhashentry *ent, *tmp;
+ struct ta_buf_fhash *tb;
+ int exists;
+ uint32_t hash, value;
+ size_t sz;
+
+ cfg = (struct fhash_cfg *)ta_state;
+ tb = (struct ta_buf_fhash *)ta_buf;
+ ent = (struct fhashentry *)tb->ent_ptr;
+ exists = 0;
+
+ /* Read current value from @tei */
+ ent->value = tei->value;
+
+ head = cfg->head;
+ hash = hash_flow_ent(ent, cfg->size);
+
+ if (tei->subtype == AF_INET)
+ sz = 2 * sizeof(struct in_addr);
+ else
+ sz = 2 * sizeof(struct in6_addr);
+
+ /* Check for existence */
+ SLIST_FOREACH(tmp, &head[hash], next) {
+ if (cmp_flow_ent(tmp, ent, sz) != 0) {
+ exists = 1;
+ break;
+ }
+ }
+
+ if (exists == 1) {
+ if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
+ return (EEXIST);
+ /* Record already exists. Update value if we're asked to */
+ /* Exchange values between tmp and @tei */
+ value = tmp->value;
+ tmp->value = tei->value;
+ tei->value = value;
+ /* Indicate that update has happened instead of addition */
+ tei->flags |= TEI_FLAGS_UPDATED;
+ *pnum = 0;
+ } else {
+ if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
+ return (EFBIG);
+
+ SLIST_INSERT_HEAD(&head[hash], ent, next);
+ tb->ent_ptr = NULL;
+ *pnum = 1;
+
+ /* Update counters and check if we need to grow hash */
+ cfg->items++;
+ }
+
+ return (0);
+}
+
+static int
+ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_fhash *tb;
+
+ tb = (struct ta_buf_fhash *)ta_buf;
+
+ return (tei_to_fhash_ent(tei, &tb->fe6.e));
+}
+
+static int
+ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
+ void *ta_buf, uint32_t *pnum)
+{
+ struct fhash_cfg *cfg;
+ struct fhashbhead *head;
+ struct fhashentry *ent, *tmp;
+ struct ta_buf_fhash *tb;
+ uint32_t hash;
+ size_t sz;
+
+ cfg = (struct fhash_cfg *)ta_state;
+ tb = (struct ta_buf_fhash *)ta_buf;
+ ent = &tb->fe6.e;
+
+ head = cfg->head;
+ hash = hash_flow_ent(ent, cfg->size);
+
+ if (tei->subtype == AF_INET)
+ sz = 2 * sizeof(struct in_addr);
+ else
+ sz = 2 * sizeof(struct in6_addr);
+
+ /* Check for existence */
+ SLIST_FOREACH(tmp, &head[hash], next) {
+ if (cmp_flow_ent(tmp, ent, sz) == 0)
+ continue;
+
+ SLIST_REMOVE(&head[hash], tmp, fhashentry, next);
+ tei->value = tmp->value;
+ *pnum = 1;
+ cfg->items--;
+ tb->ent_ptr = tmp;
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+static void
+ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
+ void *ta_buf)
+{
+ struct ta_buf_fhash *tb;
+
+ tb = (struct ta_buf_fhash *)ta_buf;
+
+ if (tb->ent_ptr != NULL)
+ free(tb->ent_ptr, M_IPFW_TBL);
+}
+
+/*
+ * Hash growing callbacks.
+ */
+
+static int
+ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count,
+ uint64_t *pflags)
+{
+ struct fhash_cfg *cfg;
+
+ cfg = (struct fhash_cfg *)ta_state;
+
+ if (cfg->items > cfg->size && cfg->size < 65536) {
+ *pflags = cfg->size * 2;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Allocate new, larger fhash.
+ */
+static int
+ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags)
+{
+ struct mod_item *mi;
+ struct fhashbhead *head;
+ int i;
+
+ mi = (struct mod_item *)ta_buf;
+
+ memset(mi, 0, sizeof(struct mod_item));
+ mi->size = *pflags;
+ head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < mi->size; i++)
+ SLIST_INIT(&head[i]);
+
+ mi->main_ptr = head;
+
+ return (0);
+}
+
+/*
+ * Copy data from old runtime array to new one.
+ */
+static int
+ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t *pflags)
+{
+
+ /* In is not possible to do rehash if we're not holidng WLOCK. */
+ return (0);
+}
+
+/*
+ * Switch old & new arrays.
+ */
+static void
+ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
+ uint64_t pflags)
+{
+ struct mod_item *mi;
+ struct fhash_cfg *cfg;
+ struct fhashbhead *old_head, *new_head;
+ struct fhashentry *ent, *ent_next;
+ int i;
+ uint32_t nhash;
+ size_t old_size;
+
+ mi = (struct mod_item *)ta_buf;
+ cfg = (struct fhash_cfg *)ta_state;
+
+ old_size = cfg->size;
+ old_head = ti->state;
+
+ new_head = (struct fhashbhead *)mi->main_ptr;
+ for (i = 0; i < old_size; i++) {
+ SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
+ nhash = hash_flow_ent(ent, mi->size);
+ SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
+ }
+ }
+
+ ti->state = new_head;
+ ti->data = mi->size;
+ cfg->head = new_head;
+ cfg->size = mi->size;
+
+ mi->main_ptr = old_head;
+}
+
+/*
+ * Free unneded array.
+ */
+static void
+ta_flush_mod_fhash(void *ta_buf)
+{
+ struct mod_item *mi;
+
+ mi = (struct mod_item *)ta_buf;
+ if (mi->main_ptr != NULL)
+ free(mi->main_ptr, M_IPFW);
+}
+
+struct table_algo flow_hash = {
+ .name = "flow:hash",
+ .type = IPFW_TABLE_FLOW,
+ .flags = TA_FLAG_DEFAULT,
+ .ta_buf_size = sizeof(struct ta_buf_fhash),
+ .init = ta_init_fhash,
+ .destroy = ta_destroy_fhash,
+ .prepare_add = ta_prepare_add_fhash,
+ .prepare_del = ta_prepare_del_fhash,
+ .add = ta_add_fhash,
+ .del = ta_del_fhash,
+ .flush_entry = ta_flush_fhash_entry,
+ .foreach = ta_foreach_fhash,
+ .dump_tentry = ta_dump_fhash_tentry,
+ .find_tentry = ta_find_fhash_tentry,
+ .dump_tinfo = ta_dump_fhash_tinfo,
+ .need_modify = ta_need_modify_fhash,
+ .prepare_mod = ta_prepare_mod_fhash,
+ .fill_mod = ta_fill_mod_fhash,
+ .modify = ta_modify_fhash,
+ .flush_mod = ta_flush_mod_fhash,
+};
+
+/*
+ * Kernel fibs bindings.
+ *
+ * Implementation:
+ *
+ * Runtime part:
+ * - fully relies on route API
+ * - fib number is stored in ti->data
+ *
+ */
+
+static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val);
+static int kfib_parse_opts(int *pfib, char *data);
+static void ta_print_kfib_config(void *ta_state, struct table_info *ti,
+ char *buf, size_t bufsize);
+static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state,
+ struct table_info *ti, char *data, uint8_t tflags);
+static void ta_destroy_kfib(void *ta_state, struct table_info *ti);
+static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti,
+ ipfw_ta_tinfo *tinfo);
+static int contigmask(uint8_t *p, int len);
+static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent);
+static int ta_dump_kfib_tentry_int(struct sockaddr *paddr,
+ struct sockaddr *pmask, ipfw_obj_tentry *tent);
+static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent);
+static void ta_foreach_kfib(void *ta_state, struct table_info *ti,
+ ta_foreach_f *f, void *arg);
+
+
+static int
+ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
+ uint32_t *val)
+{
+#ifdef INET
+ struct nhop4_basic nh4;
+ struct in_addr in;
+#endif
+#ifdef INET6
+ struct nhop6_basic nh6;
+#endif
+ int error;
+
+ error = ENOENT;
+#ifdef INET
+ if (keylen == 4) {
+ in.s_addr = *(in_addr_t *)key;
+ error = fib4_lookup_nh_basic(ti->data,
+ in, 0, 0, &nh4);
+ }
+#endif
+#ifdef INET6
+ if (keylen == 6)
+ error = fib6_lookup_nh_basic(ti->data,
+ (struct in6_addr *)key, 0, 0, 0, &nh6);
+#endif
+
+ if (error != 0)
+ return (0);
+
+ *val = 0;
+
+ return (1);
+}
+
+/* Parse 'fib=%d' */
+static int
+kfib_parse_opts(int *pfib, char *data)
+{
+ char *pdel, *pend, *s;
+ int fibnum;
+
+ if (data == NULL)
+ return (0);
+ if ((pdel = strchr(data, ' ')) == NULL)
+ return (0);
+ while (*pdel == ' ')
+ pdel++;
+ if (strncmp(pdel, "fib=", 4) != 0)
+ return (EINVAL);
+ if ((s = strchr(pdel, ' ')) != NULL)
+ *s++ = '\0';
+
+ pdel += 4;
+ /* Need \d+ */
+ fibnum = strtol(pdel, &pend, 10);
+ if (*pend != '\0')
+ return (EINVAL);
+
+ *pfib = fibnum;
+
+ return (0);
+}
+
+static void
+ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf,
+ size_t bufsize)
+{
+
+ if (ti->data != 0)
+ snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data);
+ else
+ snprintf(buf, bufsize, "%s", "addr:kfib");
+}
+
+static int
+ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
+ char *data, uint8_t tflags)
+{
+ int error, fibnum;
+
+ fibnum = 0;
+ if ((error = kfib_parse_opts(&fibnum, data)) != 0)
+ return (error);
+
+ if (fibnum >= rt_numfibs)
+ return (E2BIG);
+
+ ti->data = fibnum;
+ ti->lookup = ta_lookup_kfib;
+
+ return (0);
+}
+
+/*
+ * Destroys table @ti
+ */
+static void
+ta_destroy_kfib(void *ta_state, struct table_info *ti)
+{
+
+}
+
+/*
+ * Provide algo-specific table info
+ */
+static void
+ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
+{
+
+ tinfo->flags = IPFW_TATFLAGS_AFDATA;
+ tinfo->taclass4 = IPFW_TACLASS_RADIX;
+ tinfo->count4 = 0;
+ tinfo->itemsize4 = sizeof(struct rtentry);
+ tinfo->taclass6 = IPFW_TACLASS_RADIX;
+ tinfo->count6 = 0;
+ tinfo->itemsize6 = sizeof(struct rtentry);
+}
+
+static int
+contigmask(uint8_t *p, int len)
+{
+ int i, n;
+
+ for (i = 0; i < len ; i++)
+ if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
+ break;
+ for (n= i + 1; n < len; n++)
+ if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0)
+ return (-1); /* mask not contiguous */
+ return (i);
+}
+
+
+static int
+ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
+ ipfw_obj_tentry *tent)
+{
+ struct rtentry *rte;
+
+ rte = (struct rtentry *)e;
+
+ return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent);
+}
+
+static int
+ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask,
+ ipfw_obj_tentry *tent)
+{
+#ifdef INET
+ struct sockaddr_in *addr, *mask;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *addr6, *mask6;
+#endif
+ int len;
+
+ len = 0;
+
+ /* Guess IPv4/IPv6 radix by sockaddr family */
+#ifdef INET
+ if (paddr->sa_family == AF_INET) {
+ addr = (struct sockaddr_in *)paddr;
+ mask = (struct sockaddr_in *)pmask;
+ tent->k.addr.s_addr = addr->sin_addr.s_addr;
+ len = 32;
+ if (mask != NULL)
+ len = contigmask((uint8_t *)&mask->sin_addr, 32);
+ if (len == -1)
+ len = 0;
+ tent->masklen = len;
+ tent->subtype = AF_INET;
+ tent->v.kidx = 0; /* Do we need to put GW here? */
+ }
+#endif
+#ifdef INET6
+ if (paddr->sa_family == AF_INET6) {
+ addr6 = (struct sockaddr_in6 *)paddr;
+ mask6 = (struct sockaddr_in6 *)pmask;
+ memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr));
+ len = 128;
+ if (mask6 != NULL)
+ len = contigmask((uint8_t *)&mask6->sin6_addr, 128);
+ if (len == -1)
+ len = 0;
+ tent->masklen = len;
+ tent->subtype = AF_INET6;
+ tent->v.kidx = 0;
+ }
+#endif
+
+ return (0);
+}
+
+static int
+ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
+ ipfw_obj_tentry *tent)
+{
+ struct rt_addrinfo info;
+ struct sockaddr_in6 key6, dst6, mask6;
+ struct sockaddr *dst, *key, *mask;
+
+ /* Prepare sockaddr for prefix/mask and info */
+ bzero(&dst6, sizeof(dst6));
+ dst6.sin6_len = sizeof(dst6);
+ dst = (struct sockaddr *)&dst6;
+ bzero(&mask6, sizeof(mask6));
+ mask6.sin6_len = sizeof(mask6);
+ mask = (struct sockaddr *)&mask6;
+
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_DST] = dst;
+ info.rti_info[RTAX_NETMASK] = mask;
+
+ /* Prepare the lookup key */
+ bzero(&key6, sizeof(key6));
+ key6.sin6_family = tent->subtype;
+ key = (struct sockaddr *)&key6;
+
+ if (tent->subtype == AF_INET) {
+ ((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr;
+ key6.sin6_len = sizeof(struct sockaddr_in);
+ } else {
+ key6.sin6_addr = tent->k.addr6;
+ key6.sin6_len = sizeof(struct sockaddr_in6);
+ }
+
+ if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0)
+ return (ENOENT);
+ if ((info.rti_addrs & RTA_NETMASK) == 0)
+ mask = NULL;
+
+ ta_dump_kfib_tentry_int(dst, mask, tent);
+
+ return (0);
+}
+
+static void
+ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f,
+ void *arg)
+{
+ struct rib_head *rh;
+ int error;
+
+ rh = rt_tables_get_rnh(ti->data, AF_INET);
+ if (rh != NULL) {
+ RIB_RLOCK(rh);
+ error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg);
+ RIB_RUNLOCK(rh);
+ }
+
+ rh = rt_tables_get_rnh(ti->data, AF_INET6);
+ if (rh != NULL) {
+ RIB_RLOCK(rh);
+ error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg);
+ RIB_RUNLOCK(rh);
+ }
+}
+
+struct table_algo addr_kfib = {
+ .name = "addr:kfib",
+ .type = IPFW_TABLE_ADDR,
+ .flags = TA_FLAG_READONLY,
+ .ta_buf_size = 0,
+ .init = ta_init_kfib,
+ .destroy = ta_destroy_kfib,
+ .foreach = ta_foreach_kfib,
+ .dump_tentry = ta_dump_kfib_tentry,
+ .find_tentry = ta_find_kfib_tentry,
+ .dump_tinfo = ta_dump_kfib_tinfo,
+ .print_config = ta_print_kfib_config,
+};
+
+void
+ipfw_table_algo_init(struct ip_fw_chain *ch)
+{
+ size_t sz;
+
+ /*
+ * Register all algorithms presented here.
+ */
+ sz = sizeof(struct table_algo);
+ ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx);
+ ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx);
+ ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx);
+ ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx);
+ ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx);
+ ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx);
+}
+
+void
+ipfw_table_algo_destroy(struct ip_fw_chain *ch)
+{
+
+ ipfw_del_table_algo(ch, addr_radix.idx);
+ ipfw_del_table_algo(ch, addr_hash.idx);
+ ipfw_del_table_algo(ch, iface_idx.idx);
+ ipfw_del_table_algo(ch, number_array.idx);
+ ipfw_del_table_algo(ch, flow_hash.idx);
+ ipfw_del_table_algo(ch, addr_kfib.idx);
+}
+
+
diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c b/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c
new file mode 100644
index 00000000..ef42e401
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c
@@ -0,0 +1,810 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 Yandex LLC
+ * Copyright (c) 2014 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Multi-field value support for ipfw tables.
+ *
+ * This file contains necessary functions to convert
+ * large multi-field values into u32 indices suitable to be fed
+ * to various table algorithms. Other machinery like proper refcounting,
+ * internal structures resizing are also kept here.
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/hash.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/queue.h>
+#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_table.h>
+
+static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key,
+ uint32_t kopt);
+static int cmp_table_value(struct named_object *no, const void *key,
+ uint32_t kopt);
+
+static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd);
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_TABLE_VLIST, 0, HDIR_GET, list_table_values },
+};
+
+#define CHAIN_TO_VI(chain) (CHAIN_TO_TCFG(chain)->valhash)
+
+struct table_val_link
+{
+ struct named_object no;
+ struct table_value *pval; /* Pointer to real table value */
+};
+#define VALDATA_START_SIZE 64 /* Allocate 64-items array by default */
+
+struct vdump_args {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+ struct table_value *pval;
+ int error;
+};
+
+
+static uint32_t
+hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt)
+{
+
+ return (hash32_buf(key, 56, 0));
+}
+
+static int
+cmp_table_value(struct named_object *no, const void *key, uint32_t kopt)
+{
+
+ return (memcmp(((struct table_val_link *)no)->pval, key, 56));
+}
+
+static void
+mask_table_value(struct table_value *src, struct table_value *dst,
+ uint32_t mask)
+{
+#define _MCPY(f, b) if ((mask & (b)) != 0) { dst->f = src->f; }
+
+ memset(dst, 0, sizeof(*dst));
+ _MCPY(tag, IPFW_VTYPE_TAG);
+ _MCPY(pipe, IPFW_VTYPE_PIPE);
+ _MCPY(divert, IPFW_VTYPE_DIVERT);
+ _MCPY(skipto, IPFW_VTYPE_SKIPTO);
+ _MCPY(netgraph, IPFW_VTYPE_NETGRAPH);
+ _MCPY(fib, IPFW_VTYPE_FIB);
+ _MCPY(nat, IPFW_VTYPE_NAT);
+ _MCPY(dscp, IPFW_VTYPE_DSCP);
+ _MCPY(nh4, IPFW_VTYPE_NH4);
+ _MCPY(nh6, IPFW_VTYPE_NH6);
+ _MCPY(zoneid, IPFW_VTYPE_NH6);
+#undef _MCPY
+}
+
+static void
+get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, int vshared,
+ struct table_value **ptv, struct namedobj_instance **pvi)
+{
+ struct table_value *pval;
+ struct namedobj_instance *vi;
+
+ if (vshared != 0) {
+ pval = (struct table_value *)ch->valuestate;
+ vi = CHAIN_TO_VI(ch);
+ } else {
+ pval = NULL;
+ vi = NULL;
+ //pval = (struct table_value *)&tc->ti.data;
+ }
+
+ if (ptv != NULL)
+ *ptv = pval;
+ if (pvi != NULL)
+ *pvi = vi;
+}
+
+/*
+ * Update pointers to real vaues after @pval change.
+ */
+static int
+update_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
+{
+ struct vdump_args *da;
+ struct table_val_link *ptv;
+ struct table_value *pval;
+
+ da = (struct vdump_args *)arg;
+ ptv = (struct table_val_link *)no;
+
+ pval = da->pval;
+ ptv->pval = &pval[ptv->no.kidx];
+ ptv->no.name = (char *)&pval[ptv->no.kidx];
+ return (0);
+}
+
+/*
+ * Grows value storage shared among all tables.
+ * Drops/reacquires UH locks.
+ * Notifies other running adds on @ch shared storage resize.
+ * Note function does not guarantee that free space
+ * will be available after invocation, so one caller needs
+ * to roll cycle himself.
+ *
+ * Returns 0 if case of no errors.
+ */
+static int
+resize_shared_value_storage(struct ip_fw_chain *ch)
+{
+ struct tables_config *tcfg;
+ struct namedobj_instance *vi;
+ struct table_value *pval, *valuestate, *old_valuestate;
+ void *new_idx;
+ struct vdump_args da;
+ int new_blocks;
+ int val_size, val_size_old;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ valuestate = NULL;
+ new_idx = NULL;
+
+ pval = (struct table_value *)ch->valuestate;
+ vi = CHAIN_TO_VI(ch);
+ tcfg = CHAIN_TO_TCFG(ch);
+
+ val_size = tcfg->val_size * 2;
+
+ if (val_size == (1 << 30))
+ return (ENOSPC);
+
+ IPFW_UH_WUNLOCK(ch);
+
+ valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW,
+ M_WAITOK | M_ZERO);
+ ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx,
+ &new_blocks);
+
+ IPFW_UH_WLOCK(ch);
+
+ /*
+ * Check if we still need to resize
+ */
+ if (tcfg->val_size >= val_size)
+ goto done;
+
+ /* Update pointers and notify everyone we're changing @ch */
+ pval = (struct table_value *)ch->valuestate;
+ rollback_toperation_state(ch, ch);
+
+ /* Good. Let's merge */
+ memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size);
+ ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
+
+ IPFW_WLOCK(ch);
+ /* Change pointers */
+ old_valuestate = ch->valuestate;
+ ch->valuestate = valuestate;
+ valuestate = old_valuestate;
+ ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
+
+ val_size_old = tcfg->val_size;
+ tcfg->val_size = val_size;
+ val_size = val_size_old;
+ IPFW_WUNLOCK(ch);
+ /* Update pointers to reflect resize */
+ memset(&da, 0, sizeof(da));
+ da.pval = (struct table_value *)ch->valuestate;
+ ipfw_objhash_foreach(vi, update_tvalue, &da);
+
+done:
+ free(valuestate, M_IPFW);
+ ipfw_objhash_bitmap_free(new_idx, new_blocks);
+
+ return (0);
+}
+
+/*
+ * Drops reference for table value with index @kidx, stored in @pval and
+ * @vi. Frees value if it has no references.
+ */
+static void
+unref_table_value(struct namedobj_instance *vi, struct table_value *pval,
+ uint32_t kidx)
+{
+ struct table_val_link *ptvl;
+
+ KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx));
+ if (--pval[kidx].refcnt > 0)
+ return;
+
+ /* Last reference, delete item */
+ ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx);
+ KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx));
+ ipfw_objhash_del(vi, &ptvl->no);
+ ipfw_objhash_free_idx(vi, kidx);
+ free(ptvl, M_IPFW);
+}
+
+struct flush_args {
+ struct ip_fw_chain *ch;
+ struct table_algo *ta;
+ struct table_info *ti;
+ void *astate;
+ ipfw_obj_tentry tent;
+};
+
+static int
+unref_table_value_cb(void *e, void *arg)
+{
+ struct flush_args *fa;
+ struct ip_fw_chain *ch;
+ struct table_algo *ta;
+ ipfw_obj_tentry *tent;
+ int error;
+
+ fa = (struct flush_args *)arg;
+
+ ta = fa->ta;
+ memset(&fa->tent, 0, sizeof(fa->tent));
+ tent = &fa->tent;
+ error = ta->dump_tentry(fa->astate, fa->ti, e, tent);
+ if (error != 0)
+ return (error);
+
+ ch = fa->ch;
+
+ unref_table_value(CHAIN_TO_VI(ch),
+ (struct table_value *)ch->valuestate, tent->v.kidx);
+
+ return (0);
+}
+
+/*
+ * Drop references for each value used in @tc.
+ */
+void
+ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+ struct table_algo *ta, void *astate, struct table_info *ti)
+{
+ struct flush_args fa;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ memset(&fa, 0, sizeof(fa));
+ fa.ch = ch;
+ fa.ta = ta;
+ fa.astate = astate;
+ fa.ti = ti;
+
+ ta->foreach(astate, ti, unref_table_value_cb, &fa);
+}
+
+/*
+ * Table operation state handler.
+ * Called when we are going to change something in @tc which
+ * may lead to inconsistencies in on-going table data addition.
+ *
+ * Here we rollback all already committed state (table values, currently)
+ * and set "modified" field to non-zero value to indicate
+ * that we need to restart original operation.
+ */
+void
+rollback_table_values(struct tableop_state *ts)
+{
+ struct ip_fw_chain *ch;
+ struct table_value *pval;
+ struct tentry_info *ptei;
+ struct namedobj_instance *vi;
+ int i;
+
+ ch = ts->ch;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ /* Get current table value pointer */
+ get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
+
+ for (i = 0; i < ts->count; i++) {
+ ptei = &ts->tei[i];
+
+ if (ptei->value == 0)
+ continue;
+
+ unref_table_value(vi, pval, ptei->value);
+ }
+}
+
+/*
+ * Allocate new value index in either shared or per-table array.
+ * Function may drop/reacquire UH lock.
+ *
+ * Returns 0 on success.
+ */
+static int
+alloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts,
+ struct namedobj_instance *vi, uint16_t *pvidx)
+{
+ int error, vlimit;
+ uint16_t vidx;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ error = ipfw_objhash_alloc_idx(vi, &vidx);
+ if (error != 0) {
+
+ /*
+ * We need to resize array. This involves
+ * lock/unlock, so we need to check "modified"
+ * state.
+ */
+ ts->opstate.func(ts->tc, &ts->opstate);
+ error = resize_shared_value_storage(ch);
+ return (error); /* ts->modified should be set, we will restart */
+ }
+
+ vlimit = ts->ta->vlimit;
+ if (vlimit != 0 && vidx >= vlimit) {
+
+ /*
+ * Algorithm is not able to store given index.
+ * We have to rollback state, start using
+ * per-table value array or return error
+ * if we're already using it.
+ *
+ * TODO: do not rollback state if
+ * atomicity is not required.
+ */
+ if (ts->vshared != 0) {
+ /* shared -> per-table */
+ return (ENOSPC); /* TODO: proper error */
+ }
+
+ /* per-table. Fail for now. */
+ return (ENOSPC); /* TODO: proper error */
+ }
+
+ *pvidx = vidx;
+ return (0);
+}
+
+/*
+ * Drops value reference for unused values (updates, deletes, partially
+ * successful adds or rollbacks).
+ */
+void
+ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,
+ struct tentry_info *tei, uint32_t count, int rollback)
+{
+ int i;
+ struct tentry_info *ptei;
+ struct table_value *pval;
+ struct namedobj_instance *vi;
+
+ /*
+ * We have two slightly different ADD cases here:
+ * either (1) we are successful / partially successful,
+ * in that case we need
+ * * to ignore ADDED entries values
+ * * rollback every other values (either UPDATED since
+ * old value has been stored there, or some failure like
+ * EXISTS or LIMIT or simply "ignored" case.
+ *
+ * (2): atomic rollback of partially successful operation
+ * in that case we simply need to unref all entries.
+ *
+ * DELETE case is simpler: no atomic support there, so
+ * we simply unref all non-zero values.
+ */
+
+ /*
+ * Get current table value pointers.
+ * XXX: Properly read vshared
+ */
+ get_value_ptrs(ch, tc, 1, &pval, &vi);
+
+ for (i = 0; i < count; i++) {
+ ptei = &tei[i];
+
+ if (ptei->value == 0) {
+
+ /*
+ * We may be deleting non-existing record.
+ * Skip.
+ */
+ continue;
+ }
+
+ if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) {
+ ptei->value = 0;
+ continue;
+ }
+
+ unref_table_value(vi, pval, ptei->value);
+ ptei->value = 0;
+ }
+}
+
+/*
+ * Main function used to link values of entries going to be added,
+ * to the index. Since we may perform many UH locks drops/acquires,
+ * handle changes by checking tablestate "modified" field.
+ *
+ * Success: return 0.
+ */
+int
+ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts)
+{
+ int error, i, found;
+ struct namedobj_instance *vi;
+ struct table_config *tc;
+ struct tentry_info *tei, *ptei;
+ uint32_t count, vlimit;
+ uint16_t vidx;
+ struct table_val_link *ptv;
+ struct table_value tval, *pval;
+
+ /*
+ * Stage 1: reference all existing values and
+ * save their indices.
+ */
+ IPFW_UH_WLOCK_ASSERT(ch);
+ get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
+
+ error = 0;
+ found = 0;
+ vlimit = ts->ta->vlimit;
+ vidx = 0;
+ tc = ts->tc;
+ tei = ts->tei;
+ count = ts->count;
+ for (i = 0; i < count; i++) {
+ ptei = &tei[i];
+ ptei->value = 0; /* Ensure value is always 0 in the beginning */
+ mask_table_value(ptei->pvalue, &tval, ts->vmask);
+ ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
+ (char *)&tval);
+ if (ptv == NULL)
+ continue;
+ /* Deal with vlimit later */
+ if (vlimit > 0 && vlimit <= ptv->no.kidx)
+ continue;
+
+ /* Value found. Bump refcount */
+ ptv->pval->refcnt++;
+ ptei->value = ptv->no.kidx;
+ found++;
+ }
+
+ if (ts->count == found) {
+ /* We've found all values , no need ts create new ones */
+ return (0);
+ }
+
+ /*
+ * we have added some state here, let's attach operation
+ * state ts the list ts be able ts rollback if necessary.
+ */
+ add_toperation_state(ch, ts);
+ /* Ensure table won't disappear */
+ tc_ref(tc);
+ IPFW_UH_WUNLOCK(ch);
+
+ /*
+ * Stage 2: allocate objects for non-existing values.
+ */
+ for (i = 0; i < count; i++) {
+ ptei = &tei[i];
+ if (ptei->value != 0)
+ continue;
+ if (ptei->ptv != NULL)
+ continue;
+ ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW,
+ M_WAITOK | M_ZERO);
+ }
+
+ /*
+ * Stage 3: allocate index numbers for new values
+ * and link them to index.
+ */
+ IPFW_UH_WLOCK(ch);
+ tc_unref(tc);
+ del_toperation_state(ch, ts);
+ if (ts->modified != 0) {
+
+ /*
+ * In general, we should free all state/indexes here
+ * and return. However, we keep allocated state instead
+ * to ensure we achieve some progress on each restart.
+ */
+ return (0);
+ }
+
+ KASSERT(pval == ch->valuestate, ("resize_storage() notify failure"));
+
+ /* Let's try to link values */
+ for (i = 0; i < count; i++) {
+ ptei = &tei[i];
+
+ /* Check if record has appeared */
+ mask_table_value(ptei->pvalue, &tval, ts->vmask);
+ ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
+ (char *)&tval);
+ if (ptv != NULL) {
+ ptv->pval->refcnt++;
+ ptei->value = ptv->no.kidx;
+ continue;
+ }
+
+ /* May perform UH unlock/lock */
+ error = alloc_table_vidx(ch, ts, vi, &vidx);
+ if (error != 0) {
+ ts->opstate.func(ts->tc, &ts->opstate);
+ return (error);
+ }
+ /* value storage resize has happened, return */
+ if (ts->modified != 0)
+ return (0);
+
+ /* Finally, we have allocated valid index, let's add entry */
+ ptei->value = vidx;
+ ptv = (struct table_val_link *)ptei->ptv;
+ ptei->ptv = NULL;
+
+ ptv->no.kidx = vidx;
+ ptv->no.name = (char *)&pval[vidx];
+ ptv->pval = &pval[vidx];
+ memcpy(ptv->pval, &tval, sizeof(struct table_value));
+ pval[vidx].refcnt = 1;
+ ipfw_objhash_add(vi, &ptv->no);
+ }
+
+ return (0);
+}
+
+/*
+ * Compatibility function used to import data from old
+ * IP_FW_TABLE_ADD / IP_FW_TABLE_XADD opcodes.
+ */
+void
+ipfw_import_table_value_legacy(uint32_t value, struct table_value *v)
+{
+
+ memset(v, 0, sizeof(*v));
+ v->tag = value;
+ v->pipe = value;
+ v->divert = value;
+ v->skipto = value;
+ v->netgraph = value;
+ v->fib = value;
+ v->nat = value;
+ v->nh4 = value; /* host format */
+ v->dscp = value;
+ v->limit = value;
+}
+
+/*
+ * Export data to legacy table dumps opcodes.
+ */
+uint32_t
+ipfw_export_table_value_legacy(struct table_value *v)
+{
+
+ /*
+ * TODO: provide more compatibility depending on
+ * vmask value.
+ */
+ return (v->tag);
+}
+
+/*
+ * Imports table value from current userland format.
+ * Saves value in kernel format to the same place.
+ */
+void
+ipfw_import_table_value_v1(ipfw_table_value *iv)
+{
+ struct table_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.tag = iv->tag;
+ v.pipe = iv->pipe;
+ v.divert = iv->divert;
+ v.skipto = iv->skipto;
+ v.netgraph = iv->netgraph;
+ v.fib = iv->fib;
+ v.nat = iv->nat;
+ v.dscp = iv->dscp;
+ v.nh4 = iv->nh4;
+ v.nh6 = iv->nh6;
+ v.limit = iv->limit;
+ v.zoneid = iv->zoneid;
+
+ memcpy(iv, &v, sizeof(ipfw_table_value));
+}
+
+/*
+ * Export real table value @v to current userland format.
+ * Note that @v and @piv may point to the same memory.
+ */
+void
+ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv)
+{
+ ipfw_table_value iv;
+
+ memset(&iv, 0, sizeof(iv));
+ iv.tag = v->tag;
+ iv.pipe = v->pipe;
+ iv.divert = v->divert;
+ iv.skipto = v->skipto;
+ iv.netgraph = v->netgraph;
+ iv.fib = v->fib;
+ iv.nat = v->nat;
+ iv.dscp = v->dscp;
+ iv.limit = v->limit;
+ iv.nh4 = v->nh4;
+ iv.nh6 = v->nh6;
+ iv.zoneid = v->zoneid;
+
+ memcpy(piv, &iv, sizeof(iv));
+}
+
+/*
+ * Exports real value data into ipfw_table_value structure.
+ * Utilizes "spare1" field to store kernel index.
+ */
+static int
+dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
+{
+ struct vdump_args *da;
+ struct table_val_link *ptv;
+ struct table_value *v;
+
+ da = (struct vdump_args *)arg;
+ ptv = (struct table_val_link *)no;
+
+ v = (struct table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));
+ /* Out of memory, returning */
+ if (v == NULL) {
+ da->error = ENOMEM;
+ return (ENOMEM);
+ }
+
+ memcpy(v, ptv->pval, sizeof(*v));
+ v->spare1 = ptv->no.kidx;
+ return (0);
+}
+
+/*
+ * Dumps all shared/table value data
+ * Data layout (v1)(current):
+ * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
+ * Reply: [ ipfw_obj_lheader ipfw_table_value x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct _ipfw_obj_lheader *olh;
+ struct namedobj_instance *vi;
+ struct vdump_args da;
+ uint32_t count, size;
+
+ olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
+ if (olh == NULL)
+ return (EINVAL);
+ if (sd->valsize < olh->size)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ vi = CHAIN_TO_VI(ch);
+
+ count = ipfw_objhash_count(vi);
+ size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader);
+
+ /* Fill in header regadless of buffer size */
+ olh->count = count;
+ olh->objsize = sizeof(ipfw_table_value);
+
+ if (size > olh->size) {
+ olh->size = size;
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ olh->size = size;
+
+ /*
+ * Do the actual value dump
+ */
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach(vi, dump_tvalue, &da);
+
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+void
+ipfw_table_value_init(struct ip_fw_chain *ch, int first)
+{
+ struct tables_config *tcfg;
+
+ ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value),
+ M_IPFW, M_WAITOK | M_ZERO);
+
+ tcfg = ch->tblcfg;
+
+ tcfg->val_size = VALDATA_START_SIZE;
+ tcfg->valhash = ipfw_objhash_create(tcfg->val_size);
+ ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value,
+ cmp_table_value);
+
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+}
+
+static int
+destroy_value(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+
+ free(no, M_IPFW);
+ return (0);
+}
+
+void
+ipfw_table_value_destroy(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+
+ free(ch->valuestate, M_IPFW);
+ ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch);
+ ipfw_objhash_destroy(CHAIN_TO_VI(ch));
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
new file mode 100644
index 00000000..03ca9599
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c
@@ -0,0 +1,131 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+
+
+int nat64_debug = 0;
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_debug, CTLFLAG_RW,
+ &nat64_debug, 0, "Debug level for NAT64 module");
+
+int nat64_allow_private = 0;
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_allow_private, CTLFLAG_RW,
+ &nat64_allow_private, 0,
+ "Allow use of non-global IPv4 addresses with NAT64");
+
+static int
+vnet_ipfw_nat64_init(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int first, error;
+
+ ch = &V_layer3_chain;
+ first = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ error = nat64stl_init(ch, first);
+ if (error != 0)
+ return (error);
+ error = nat64lsn_init(ch, first);
+ if (error != 0) {
+ nat64stl_uninit(ch, first);
+ return (error);
+ }
+ return (0);
+}
+
+static int
+vnet_ipfw_nat64_uninit(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int last;
+
+ ch = &V_layer3_chain;
+ last = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ nat64stl_uninit(ch, last);
+ nat64lsn_uninit(ch, last);
+ return (0);
+}
+
+static int
+ipfw_nat64_modevent(module_t mod, int type, void *unused)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t ipfw_nat64_mod = {
+ "ipfw_nat64",
+ ipfw_nat64_modevent,
+ 0
+};
+
+/* Define startup order. */
+#define IPFW_NAT64_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_NAT64_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
+#define IPFW_NAT64_MODULE_ORDER (IPFW_NAT64_MODEVENT_ORDER + 1)
+#define IPFW_NAT64_VNET_ORDER (IPFW_NAT64_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat64, ipfw_nat64_mod, IPFW_NAT64_SI_SUB_FIREWALL,
+ SI_ORDER_ANY);
+MODULE_DEPEND(ipfw_nat64, ipfw, 3, 3, 3);
+MODULE_VERSION(ipfw_nat64, 1);
+
+VNET_SYSINIT(vnet_ipfw_nat64_init, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_init, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat64_uninit, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_uninit, NULL);
diff --git a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
new file mode 100644
index 00000000..1d2bb774
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h
@@ -0,0 +1,117 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64_H_
+#define _IP_FW_NAT64_H_
+
+#define DPRINTF(mask, fmt, ...) \
+ if (nat64_debug & (mask)) \
+ printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__)
+#define DP_GENERIC 0x0001
+#define DP_OBJ 0x0002
+#define DP_JQUEUE 0x0004
+#define DP_STATE 0x0008
+#define DP_DROPS 0x0010
+#define DP_ALL 0xFFFF
+extern int nat64_debug;
+
+#if 0
+#define NAT64NOINLINE __noinline
+#else
+#define NAT64NOINLINE
+#endif
+
+int nat64stl_init(struct ip_fw_chain *ch, int first);
+void nat64stl_uninit(struct ip_fw_chain *ch, int last);
+int nat64lsn_init(struct ip_fw_chain *ch, int first);
+void nat64lsn_uninit(struct ip_fw_chain *ch, int last);
+
+struct ip_fw_nat64_stats {
+ counter_u64_t opcnt64; /* 6to4 of packets translated */
+ counter_u64_t opcnt46; /* 4to6 of packets translated */
+ counter_u64_t ofrags; /* number of fragments generated */
+ counter_u64_t ifrags; /* number of fragments received */
+ counter_u64_t oerrors; /* number of output errors */
+ counter_u64_t noroute4;
+ counter_u64_t noroute6;
+ counter_u64_t nomatch4; /* No addr/port match */
+ counter_u64_t noproto; /* Protocol not supported */
+ counter_u64_t nomem; /* mbufs allocation failed */
+ counter_u64_t dropped; /* number of packets silently
+ * dropped due to some errors/
+ * unsupported/etc.
+ */
+
+ counter_u64_t jrequests; /* number of jobs requests queued */
+ counter_u64_t jcalls; /* number of jobs handler calls */
+ counter_u64_t jhostsreq; /* number of hosts requests */
+ counter_u64_t jportreq;
+ counter_u64_t jhostfails;
+ counter_u64_t jportfails;
+ counter_u64_t jmaxlen;
+ counter_u64_t jnomem;
+ counter_u64_t jreinjected;
+
+ counter_u64_t screated;
+ counter_u64_t sdeleted;
+ counter_u64_t spgcreated;
+ counter_u64_t spgdeleted;
+};
+
+#define IPFW_NAT64_VERSION 1
+#define NAT64STATS (sizeof(struct ip_fw_nat64_stats) / sizeof(uint64_t))
+typedef struct _nat64_stats_block {
+ counter_u64_t stats[NAT64STATS];
+} nat64_stats_block;
+#define NAT64STAT_ADD(s, f, v) \
+ counter_u64_add((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)], (v))
+#define NAT64STAT_INC(s, f) NAT64STAT_ADD(s, f, 1)
+#define NAT64STAT_FETCH(s, f) \
+ counter_u64_fetch((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)])
+
+#define L3HDR(_ip, _t) ((_t)((u_int32_t *)(_ip) + (_ip)->ip_hl))
+#define TCP(p) ((struct tcphdr *)(p))
+#define UDP(p) ((struct udphdr *)(p))
+#define ICMP(p) ((struct icmphdr *)(p))
+#define ICMP6(p) ((struct icmp6_hdr *)(p))
+
+#define NAT64SKIP 0
+#define NAT64RETURN 1
+#define NAT64MFREE -1
+
+/* Well-known prefix 64:ff9b::/96 */
+#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b)
+#define IN6_IS_ADDR_WKPFX(a) \
+ ((a)->s6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \
+ (a)->s6_addr32[1] == 0 && (a)->s6_addr32[2] == 0)
+
+#endif
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c
new file mode 100644
index 00000000..d2507674
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c
@@ -0,0 +1,1574 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rtems/bsd/local/opt_ipfw.h>
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <machine/in_cksum.h>
+
+static void
+nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
+{
+
+ logdata->dir = PF_OUT;
+ logdata->af = family;
+ ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
+}
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+static NAT64NOINLINE struct sockaddr* nat64_find_route4(struct route *ro,
+ in_addr_t dest, struct mbuf *m);
+static NAT64NOINLINE struct sockaddr* nat64_find_route6(struct route_in6 *ro,
+ struct in6_addr *dest, struct mbuf *m);
+
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+ void *logdata)
+{
+ int error;
+
+ if (logdata != NULL)
+ nat64_log(logdata, m, dst->sa_family);
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (error);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+ struct route_in6 ro6;
+ struct route ro4, *ro;
+ struct sockaddr *dst;
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
+ struct ip *ip4;
+ int error;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ ro = &ro4;
+ dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute4);
+ break;
+ case (IPV6_VERSION >> 4):
+ ip6 = (struct ip6_hdr *)ip4;
+ ro = (struct route *)&ro6;
+ dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute6);
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to unknown IP version");
+ return (EAFNOSUPPORT);
+ }
+ if (dst == NULL) {
+ FREE_ROUTE(ro);
+ m_freem(m);
+ return (EHOSTUNREACH);
+ }
+ if (logdata != NULL)
+ nat64_log(logdata, m, dst->sa_family);
+ ifp = ro->ro_rt->rt_ifp;
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ FREE_ROUTE(ro);
+ return (error);
+}
+#else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+static NAT64NOINLINE int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats,
+ void *logdata)
+{
+ struct ip *ip4;
+ int ret, af;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ af = AF_INET;
+ ret = NETISR_IP;
+ break;
+ case (IPV6_VERSION >> 4):
+ af = AF_INET6;
+ ret = NETISR_IPV6;
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "unknown IP version");
+ return (EAFNOSUPPORT);
+ }
+ if (logdata != NULL)
+ nat64_log(logdata, m, af);
+ ret = netisr_queue(ret, m);
+ if (ret != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (ret);
+}
+
+static NAT64NOINLINE int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata)
+{
+
+ return (nat64_output(NULL, m, NULL, NULL, stats, logdata));
+}
+#endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+
+
+#if 0
+void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize);
+
+void
+print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize)
+{
+ char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf));
+ inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf));
+ snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt);
+}
+
+
+static NAT64NOINLINE int
+nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6)
+{
+
+ /* assume the prefix is properly filled with zeros */
+ bcopy(&cfg->prefix, ip6, sizeof(*ip6));
+ switch (cfg->plen) {
+ case 32:
+ case 96:
+ ip6->s6_addr32[cfg->plen / 32] = ia;
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia >> (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia << (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32);
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[2] = ia >> 8;
+ ip6->s6_addr32[3] = ia << 24;
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[2] = ia << 8;
+ ip6->s6_addr32[3] = ia >> 24;
+#endif
+ break;
+ default:
+ return (0);
+ };
+ ip6->s6_addr8[8] = 0;
+ return (1);
+}
+
+static NAT64NOINLINE in_addr_t
+nat64_extract_ip4(struct in6_addr *ip6, int plen)
+{
+ in_addr_t ia;
+
+ /*
+ * According to RFC 6052 p2.2:
+ * IPv4-embedded IPv6 addresses are composed of a variable-length
+ * prefix, the embedded IPv4 address, and a variable length suffix.
+ * The suffix bits are reserved for future extensions and SHOULD
+ * be set to zero.
+ */
+ switch (plen) {
+ case 32:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
+ goto badip6;
+ break;
+ case 40:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
+ goto badip6;
+ break;
+ case 48:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
+ goto badip6;
+ break;
+ case 56:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
+ goto badip6;
+ break;
+ case 64:
+ if (ip6->s6_addr8[8] != 0 ||
+ (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
+ goto badip6;
+ };
+ switch (plen) {
+ case 32:
+ case 96:
+ ia = ip6->s6_addr32[plen / 32];
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[1] << (plen % 32)) |
+ (ip6->s6_addr32[2] >> (24 - plen % 32));
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[1] >> (plen % 32)) |
+ (ip6->s6_addr32[2] << (24 - plen % 32));
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
+#endif
+ break;
+ default:
+ return (0);
+ };
+ if (nat64_check_ip4(ia) != 0 ||
+ nat64_check_private_ip4(ia) != 0)
+ goto badip4;
+
+ return (ia);
+badip4:
+ DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia);
+ return (0);
+badip6:
+ DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address");
+ return (0);
+}
+#endif
+
+/*
+ * According to RFC 1624 the equation for incremental checksum update is:
+ * HC' = ~(~HC + ~m + m') -- [Eqn. 3]
+ * HC' = HC - ~m - m' -- [Eqn. 4]
+ * So, when we are replacing IPv4 addresses to IPv6, we
+ * can assume, that new bytes previously were zeros, and vise versa -
+ * when we replacing IPv6 addresses to IPv4, now unused bytes become
+ * zeros. The payload length in pseudo header has bigger size, but one
+ * half of it should be zero. Using the equation 4 we get:
+ * HC' = HC - (~m0 + m0') -- m0 is first changed word
+ * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word
+ * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
+ * = HC - sum(~m[i] + m'[i])
+ *
+ * The function result should be used as follows:
+ * IPv6 to IPv4: HC' = cksum_add(HC, result)
+ * IPv4 to IPv6: HC' = cksum_add(HC, ~result)
+ */
+static NAT64NOINLINE uint16_t
+nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
+{
+ uint32_t sum;
+ uint16_t *p;
+
+ sum = ~ip->ip_src.s_addr >> 16;
+ sum += ~ip->ip_src.s_addr & 0xffff;
+ sum += ~ip->ip_dst.s_addr >> 16;
+ sum += ~ip->ip_dst.s_addr & 0xffff;
+
+ for (p = (uint16_t *)&ip6->ip6_src;
+ p < (uint16_t *)(&ip6->ip6_src + 2); p++)
+ sum += *p;
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (sum);
+}
+
+#if __FreeBSD_version < 1100000
+#define ip_fillid(ip) (ip)->ip_id = ip_newid()
+#endif
+static NAT64NOINLINE void
+nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
+ uint16_t plen, uint8_t proto, struct ip *ip)
+{
+
+ /* assume addresses are already initialized */
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ ip->ip_len = htons(sizeof(*ip) + plen);
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
+#else
+ /* Forwarding code will decrement TTL. */
+ ip->ip_ttl = ip6->ip6_hlim;
+#endif
+ ip->ip_sum = 0;
+ ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
+ ip_fillid(ip);
+ if (frag != NULL) {
+ ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
+ if (frag->ip6f_offlg & IP6F_MORE_FRAG)
+ ip->ip_off |= htons(IP_MF);
+ } else {
+ ip->ip_off = htons(IP_DF);
+ }
+ ip->ip_sum = in_cksum_hdr(ip);
+}
+
+#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
+static NAT64NOINLINE int
+nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq,
+ struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off)
+{
+ struct ip6_frag ip6f;
+ struct mbuf *n;
+ uint16_t hlen, len, offset;
+ int plen;
+
+ plen = ntohs(ip6->ip6_plen);
+ hlen = sizeof(struct ip6_hdr);
+
+ /* Fragmentation isn't needed */
+ if (ip_off == 0 && plen <= mtu - hlen) {
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+ }
+ bcopy(ip6, mtod(m, void *), hlen);
+ if (mbufq_enqueue(mq, m) != 0) {
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
+ return (ENOBUFS);
+ }
+ return (0);
+ }
+
+ hlen += sizeof(struct ip6_frag);
+ ip6f.ip6f_reserved = 0;
+ ip6f.ip6f_nxt = ip6->ip6_nxt;
+ ip6->ip6_nxt = IPPROTO_FRAGMENT;
+ if (ip_off != 0) {
+ /*
+ * We have got an IPv4 fragment.
+ * Use offset value and ip_id from original fragment.
+ */
+ ip6f.ip6f_ident = htonl(ntohs(ip_id));
+ offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
+ NAT64STAT_INC(stats, ifrags);
+ } else {
+ /* The packet size exceeds interface MTU */
+ ip6f.ip6f_ident = htonl(ip6_randomid());
+ offset = 0; /* First fragment*/
+ }
+ while (plen > 0 && m != NULL) {
+ n = NULL;
+ len = FRAGSZ(mtu) & ~7;
+ if (len > plen)
+ len = plen;
+ ip6->ip6_plen = htons(len + sizeof(ip6f));
+ ip6f.ip6f_offlg = ntohs(offset);
+ if (len < plen || (ip_off & htons(IP_MF)) != 0)
+ ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
+ offset += len;
+ plen -= len;
+ if (plen > 0) {
+ n = m_split(m, len, M_NOWAIT);
+ if (n == NULL)
+ goto fail;
+ }
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL)
+ goto fail;
+ bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
+ bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
+ sizeof(struct ip6_frag));
+ if (mbufq_enqueue(mq, m) != 0)
+ goto fail;
+ m = n;
+ }
+ NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
+ return (0);
+fail:
+ if (m != NULL)
+ m_freem(m);
+ if (n != NULL)
+ m_freem(n);
+ mbufq_drain(mq);
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+}
+
+#if __FreeBSD_version < 1100000
+#define rt_expire rt_rmx.rmx_expire
+#define rt_mtu rt_rmx.rmx_mtu
+#endif
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m)
+{
+ struct sockaddr_in6 *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in6 *)&ro->ro_dst;
+ dst->sin6_family = AF_INET6;
+ dst->sin6_len = sizeof(*dst);
+ dst->sin6_addr = *dest;
+ IN6_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in6 *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP6_PLEN 64
+static NAT64NOINLINE void
+nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct icmp6_hdr *icmp6;
+ struct ip6_hdr *ip6, *oip6;
+ struct mbuf *n;
+ int len, plen;
+
+ len = 0;
+ plen = nat64_getlasthdr(m, &len);
+ if (plen < 0) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ /*
+ * Do not send ICMPv6 in reply to ICMPv6 errors.
+ */
+ if (plen == IPPROTO_ICMPV6) {
+ if (m->m_len < len + sizeof(*icmp6)) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ icmp6 = mtodo(m, len);
+ if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ND_REDIRECT) {
+ DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
+ "ICMPv6 errors");
+ goto freeit;
+ }
+ }
+ /*
+ if (icmp6_ratelimit(&ip6->ip6_src, type, code))
+ goto freeit;
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ switch (type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEEDED:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMPv6 payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv6 datagram */
+ plen = len + sizeof(struct icmp6_hdr);
+ n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ /*
+ * Move pkthdr from original mbuf. We should have initialized some
+ * fields, because we can reinject this mbuf to netisr and it will
+ * go trough input path (it requires at least rcvif should be set).
+ * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
+ * in the chain, when we will do M_PREPEND() or make some type of
+ * tunneling.
+ */
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
+ oip6 = mtod(n, struct ip6_hdr *);
+ oip6->ip6_src = ip6->ip6_dst;
+ oip6->ip6_dst = ip6->ip6_src;
+ oip6->ip6_nxt = IPPROTO_ICMPV6;
+ oip6->ip6_flow = 0;
+ oip6->ip6_vfc |= IPV6_VERSION;
+ oip6->ip6_hlim = V_ip6_defhlim;
+ oip6->ip6_plen = htons(plen);
+
+ icmp6 = mtodo(n, sizeof(struct ip6_hdr));
+ icmp6->icmp6_cksum = 0;
+ icmp6->icmp6_type = type;
+ icmp6->icmp6_code = code;
+ icmp6->icmp6_mtu = htonl(mtu);
+
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr)));
+ icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
+ sizeof(struct ip6_hdr), plen);
+ m_freem(m);
+ nat64_output_one(n, stats, logdata);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+static NAT64NOINLINE struct sockaddr*
+nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m)
+{
+ struct sockaddr_in *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr.s_addr = dest;
+ IN_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP_PLEN 64
+static NAT64NOINLINE void
+nat64_icmp_reflect(struct mbuf *m, uint8_t type,
+ uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata)
+{
+ struct icmp *icmp;
+ struct ip *ip, *oip;
+ struct mbuf *n;
+ int len, plen;
+
+ ip = mtod(m, struct ip *);
+ /* Do not send ICMP error if packet is not the first fragment */
+ if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
+ DPRINTF(DP_DROPS, "not first fragment");
+ goto freeit;
+ }
+ /* Do not send ICMP in reply to ICMP errors */
+ if (ip->ip_p == IPPROTO_ICMP) {
+ if (m->m_len < (ip->ip_hl << 2)) {
+ DPRINTF(DP_DROPS, "mbuf isn't contigious");
+ goto freeit;
+ }
+ icmp = mtodo(m, ip->ip_hl << 2);
+ if (!ICMP_INFOTYPE(icmp->icmp_type)) {
+ DPRINTF(DP_DROPS, "do not send ICMP in reply to "
+ "ICMP errors");
+ goto freeit;
+ }
+ }
+ switch (type) {
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ case ICMP_PARAMPROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMP payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv4 datagram */
+ plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
+ n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
+ oip = mtod(n, struct ip *);
+ oip->ip_v = IPVERSION;
+ oip->ip_hl = sizeof(struct ip) >> 2;
+ oip->ip_tos = 0;
+ oip->ip_len = htons(n->m_pkthdr.len);
+ oip->ip_ttl = V_ip_defttl;
+ oip->ip_p = IPPROTO_ICMP;
+ ip_fillid(oip);
+ oip->ip_off = htons(IP_DF);
+ oip->ip_src = ip->ip_dst;
+ oip->ip_dst = ip->ip_src;
+ oip->ip_sum = 0;
+ oip->ip_sum = in_cksum_hdr(oip);
+
+ icmp = mtodo(n, sizeof(struct ip));
+ icmp->icmp_type = type;
+ icmp->icmp_code = code;
+ icmp->icmp_cksum = 0;
+ icmp->icmp_pmvoid = 0;
+ icmp->icmp_nextmtu = htons(mtu);
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
+ sizeof(struct icmphdr) + sizeof(uint32_t)));
+ icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
+ sizeof(struct ip));
+ m_freem(m);
+ nat64_output_one(n, stats, logdata);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+/* Translate ICMP echo request/reply into ICMPv6 */
+static void
+nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
+ uint16_t id, uint8_t type)
+{
+ uint16_t old;
+
+ old = *(uint16_t *)icmp6; /* save type+code in one word */
+ icmp6->icmp6_type = type;
+ /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, *(uint16_t *)icmp6);
+ if (id != 0) {
+ old = icmp6->icmp6_id;
+ icmp6->icmp6_id = id;
+ /* Reflect ICMP id translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, id);
+ }
+ /* Reflect IPv6 pseudo header in the cksum */
+ icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
+ IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
+}
+
+static NAT64NOINLINE struct mbuf *
+nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
+ int offset, nat64_stats_block *stats)
+{
+ struct ip ip;
+ struct icmp *icmp;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct ip6_hdr *eip6;
+ struct mbuf *n;
+ uint32_t mtu;
+ int len, hlen, plen;
+ uint8_t type, code;
+
+ if (m->m_len < offset + ICMP_MINLEN)
+ m = m_pullup(m, offset + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (m);
+ }
+ mtu = 0;
+ icmp = mtodo(m, offset);
+ /* RFC 7915 p4.2 */
+ switch (icmp->icmp_type) {
+ case ICMP_ECHOREPLY:
+ type = ICMP6_ECHO_REPLY;
+ code = 0;
+ break;
+ case ICMP_UNREACH:
+ type = ICMP6_DST_UNREACH;
+ switch (icmp->icmp_code) {
+ case ICMP_UNREACH_NET:
+ case ICMP_UNREACH_HOST:
+ case ICMP_UNREACH_SRCFAIL:
+ case ICMP_UNREACH_NET_UNKNOWN:
+ case ICMP_UNREACH_HOST_UNKNOWN:
+ case ICMP_UNREACH_TOSNET:
+ case ICMP_UNREACH_TOSHOST:
+ code = ICMP6_DST_UNREACH_NOROUTE;
+ break;
+ case ICMP_UNREACH_PROTOCOL:
+ type = ICMP6_PARAM_PROB;
+ code = ICMP6_PARAMPROB_NEXTHEADER;
+ break;
+ case ICMP_UNREACH_PORT:
+ code = ICMP6_DST_UNREACH_NOPORT;
+ break;
+ case ICMP_UNREACH_NEEDFRAG:
+ type = ICMP6_PACKET_TOO_BIG;
+ code = 0;
+ /* XXX: needs an additional look */
+ mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
+ break;
+ case ICMP_UNREACH_NET_PROHIB:
+ case ICMP_UNREACH_HOST_PROHIB:
+ case ICMP_UNREACH_FILTER_PROHIB:
+ case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+ code = ICMP6_DST_UNREACH_ADMIN;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+ icmp->icmp_type, icmp->icmp_code);
+ goto freeit;
+ }
+ break;
+ case ICMP_TIMXCEED:
+ type = ICMP6_TIME_EXCEEDED;
+ code = icmp->icmp_code;
+ break;
+ case ICMP_ECHO:
+ type = ICMP6_ECHO_REQUEST;
+ code = 0;
+ break;
+ case ICMP_PARAMPROB:
+ type = ICMP6_PARAM_PROB;
+ switch (icmp->icmp_code) {
+ case ICMP_PARAMPROB_ERRATPTR:
+ case ICMP_PARAMPROB_LENGTH:
+ code = ICMP6_PARAMPROB_HEADER;
+ switch (icmp->icmp_pptr) {
+ case 0: /* Version/IHL */
+ case 1: /* Type Of Service */
+ mtu = icmp->icmp_pptr;
+ break;
+ case 2: /* Total Length */
+ case 3: mtu = 4; /* Payload Length */
+ break;
+ case 8: /* Time to Live */
+ mtu = 7; /* Hop Limit */
+ break;
+ case 9: /* Protocol */
+ mtu = 6; /* Next Header */
+ break;
+ case 12: /* Source address */
+ case 13:
+ case 14:
+ case 15:
+ mtu = 8;
+ break;
+ case 16: /* Destination address */
+ case 17:
+ case 18:
+ case 19:
+ mtu = 24;
+ break;
+ default: /* Silently drop */
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+ " code %d, pptr %d", icmp->icmp_type,
+ icmp->icmp_code, icmp->icmp_pptr);
+ goto freeit;
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
+ " code %d, pptr %d", icmp->icmp_type,
+ icmp->icmp_code, icmp->icmp_pptr);
+ goto freeit;
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
+ icmp->icmp_type, icmp->icmp_code);
+ goto freeit;
+ }
+ /*
+ * For echo request/reply we can use original payload,
+ * but we need adjust icmp_cksum, because ICMPv6 cksum covers
+ * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
+ */
+ if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
+ nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
+ return (m);
+ }
+ /*
+ * For other types of ICMP messages we need to translate inner
+ * IPv4 header to IPv6 header.
+ * Assume ICMP src is the same as payload dst
+ * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
+ * and ( NATIP1, Hostdst1 ) in ICMP copy header.
+ * In that case, we already have map for NATIP1 and GWsrc1.
+ * The only thing we need is to copy IPv6 map prefix to
+ * Hostdst1.
+ */
+ hlen = offset + ICMP_MINLEN;
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ goto freeit;
+ }
+ m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
+ if (ip.ip_v != IPVERSION) {
+ DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
+ goto freeit;
+ }
+ hlen += ip.ip_hl << 2; /* Skip inner IP header */
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) {
+ DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
+ ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
+ goto freeit;
+ }
+ if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ goto freeit;
+ }
+#if 0
+ /*
+ * Check that inner source matches the outer destination.
+ * XXX: We need some method to convert IPv4 into IPv6 address here,
+ * and compare IPv6 addresses.
+ */
+ if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
+ DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
+ "%04x vs %04x", ip.ip_src.s_addr,
+ nat64_get_ip4(&ip6->ip6_dst));
+ goto freeit;
+ }
+#endif
+ /*
+ * Create new mbuf for ICMPv6 datagram.
+ * NOTE: len is data length just after inner IP header.
+ */
+ len = m->m_pkthdr.len - hlen;
+ if (sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
+ len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
+ sizeof(struct ip6_hdr);
+ plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
+ n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return (NULL);
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, offset + plen + max_hdr);
+ n->m_len = n->m_pkthdr.len = offset + plen;
+ /* Adjust ip6_plen in outer header */
+ ip6->ip6_plen = htons(plen);
+ /* Construct new inner IPv6 header */
+ eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
+ eip6->ip6_src = ip6->ip6_dst;
+ /* Use the fact that we have single /96 prefix for IPv4 map */
+ eip6->ip6_dst = ip6->ip6_src;
+ nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr);
+
+ eip6->ip6_flow = htonl(ip.ip_tos << 20);
+ eip6->ip6_vfc |= IPV6_VERSION;
+ eip6->ip6_hlim = ip.ip_ttl;
+ eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
+ eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
+ m_copydata(m, hlen, len, (char *)(eip6 + 1));
+ /*
+ * We need to translate source port in the inner ULP header,
+ * and adjust ULP checksum.
+ */
+ switch (ip.ip_p) {
+ case IPPROTO_TCP:
+ if (len < offsetof(struct tcphdr, th_sum))
+ break;
+ tcp = TCP(eip6 + 1);
+ if (icmpid != 0) {
+ tcp->th_sum = cksum_adjust(tcp->th_sum,
+ tcp->th_sport, icmpid);
+ tcp->th_sport = icmpid;
+ }
+ tcp->th_sum = cksum_add(tcp->th_sum,
+ ~nat64_cksum_convert(eip6, &ip));
+ break;
+ case IPPROTO_UDP:
+ if (len < offsetof(struct udphdr, uh_sum))
+ break;
+ udp = UDP(eip6 + 1);
+ if (icmpid != 0) {
+ udp->uh_sum = cksum_adjust(udp->uh_sum,
+ udp->uh_sport, icmpid);
+ udp->uh_sport = icmpid;
+ }
+ udp->uh_sum = cksum_add(udp->uh_sum,
+ ~nat64_cksum_convert(eip6, &ip));
+ break;
+ case IPPROTO_ICMP:
+ /*
+ * Check if this is an ICMP error message for echo request
+ * that we sent. I.e. ULP in the data containing invoking
+ * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
+ */
+ icmp = (struct icmp *)(eip6 + 1);
+ if (icmp->icmp_type != ICMP_ECHO) {
+ m_freem(n);
+ goto freeit;
+ }
+ /*
+ * For our client this original datagram should looks
+ * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
+ * Thus we need adjust icmp_cksum and convert type from
+ * ICMP_ECHO to ICMP6_ECHO_REQUEST.
+ */
+ nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
+ ICMP6_ECHO_REQUEST);
+ }
+ m_freem(m);
+ /* Convert ICMPv4 into ICMPv6 header */
+ icmp = mtodo(n, offset);
+ ICMP6(icmp)->icmp6_type = type;
+ ICMP6(icmp)->icmp6_code = code;
+ ICMP6(icmp)->icmp6_mtu = htonl(mtu);
+ ICMP6(icmp)->icmp6_cksum = 0;
+ ICMP6(icmp)->icmp6_cksum = cksum_add(
+ ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
+ in_cksum_skip(n, n->m_pkthdr.len, offset));
+ return (n);
+freeit:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NULL);
+}
+
+int
+nat64_getlasthdr(struct mbuf *m, int *offset)
+{
+ struct ip6_hdr *ip6;
+ struct ip6_hbh *hbh;
+ int proto, hlen;
+
+ if (offset != NULL)
+ hlen = *offset;
+ else
+ hlen = 0;
+
+ if (m->m_len < hlen + sizeof(*ip6))
+ return (-1);
+
+ ip6 = mtodo(m, hlen);
+ hlen += sizeof(*ip6);
+ proto = ip6->ip6_nxt;
+ /* Skip extension headers */
+ while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_DSTOPTS) {
+ hbh = mtodo(m, hlen);
+ /*
+ * We expect mbuf has contigious data up to
+ * upper level header.
+ */
+ if (m->m_len < hlen)
+ return (-1);
+ /*
+ * We doesn't support Jumbo payload option,
+ * so return error.
+ */
+ if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
+ return (-1);
+ proto = hbh->ip6h_nxt;
+ hlen += hbh->ip6h_len << 3;
+ }
+ if (offset != NULL)
+ *offset = hlen;
+ return (proto);
+}
+
+int
+nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+ void *logdata)
+{
+ struct route_in6 ro;
+ struct ip6_hdr ip6;
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct mbufq mq;
+ struct sockaddr *dst;
+ uint32_t mtu;
+ uint16_t ip_id, ip_off;
+ uint16_t *csum;
+ int plen, hlen;
+ uint8_t proto;
+
+ ip = mtod(m, struct ip*);
+
+ if (ip->ip_ttl <= IPTTLDEC) {
+ nat64_icmp_reflect(m, ICMP_TIMXCEED,
+ ICMP_TIMXCEED_INTRANS, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_dst = *daddr;
+ ip6.ip6_src = *saddr;
+
+ hlen = ip->ip_hl << 2;
+ plen = ntohs(ip->ip_len) - hlen;
+ proto = ip->ip_p;
+
+ /* Save ip_id and ip_off, both are in network byte order */
+ ip_id = ip->ip_id;
+ ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
+
+ /* Fragment length must be multiple of 8 octets */
+ if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
+ nat64_icmp_reflect(m, ICMP_PARAMPROB,
+ ICMP_PARAMPROB_LENGTH, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+ /* Fragmented ICMP is unsupported */
+ if (proto == IPPROTO_ICMP && ip_off != 0) {
+ DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ dst = nat64_find_route6(&ro, &ip6.ip6_dst, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute6);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
+ stats, logdata);
+ return (NAT64RETURN);
+ }
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) {
+ FREE_ROUTE(&ro);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+ FRAGSZ(mtu) + sizeof(struct ip), stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_flow = htonl(ip->ip_tos << 20);
+ ip6.ip6_vfc |= IPV6_VERSION;
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
+#else
+ /* Forwarding code will decrement HLIM. */
+ ip6.ip6_hlim = ip->ip_ttl;
+#endif
+ ip6.ip6_plen = htons(plen);
+ ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (lport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_dport, lport);
+ tcp->th_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (lport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_dport, lport);
+ udp->uh_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_ICMP:
+ m = nat64_icmp_translate(m, &ip6, lport, hlen, stats);
+ if (m == NULL) {
+ FREE_ROUTE(&ro);
+ /* stats already accounted */
+ return (NAT64RETURN);
+ }
+ }
+
+ m_adj(m, hlen);
+ mbufq_init(&mq, 255);
+ nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off);
+ while ((m = mbufq_dequeue(&mq)) != NULL) {
+ if (nat64_output(ifp, m, dst, (struct route *)&ro, stats,
+ logdata) != 0)
+ break;
+ NAT64STAT_INC(stats, opcnt46);
+ }
+ mbufq_drain(&mq);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
+int
+nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct ip ip;
+ struct icmp6_hdr *icmp6;
+ struct ip6_frag *ip6f;
+ struct ip6_hdr *ip6, *ip6i;
+ uint32_t mtu;
+ int plen, proto;
+ uint8_t type, code;
+
+ if (hlen == 0) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0)
+ return (NAT64SKIP);
+
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto != IPPROTO_ICMPV6) {
+ DPRINTF(DP_DROPS,
+ "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ }
+
+ /*
+ * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
+ * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
+ */
+ icmp6 = mtodo(m, hlen);
+ mtu = 0;
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ type = ICMP_UNREACH;
+ switch (icmp6->icmp6_code) {
+ case ICMP6_DST_UNREACH_NOROUTE:
+ case ICMP6_DST_UNREACH_BEYONDSCOPE:
+ case ICMP6_DST_UNREACH_ADDR:
+ code = ICMP_UNREACH_HOST;
+ break;
+ case ICMP6_DST_UNREACH_ADMIN:
+ code = ICMP_UNREACH_HOST_PROHIB;
+ break;
+ case ICMP6_DST_UNREACH_NOPORT:
+ code = ICMP_UNREACH_PORT;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d", icmp6->icmp6_type,
+ icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ break;
+ case ICMP6_PACKET_TOO_BIG:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_NEEDFRAG;
+ mtu = ntohl(icmp6->icmp6_mtu);
+ if (mtu < IPV6_MMTU) {
+ DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
+ " code %d", mtu, icmp6->icmp6_type,
+ icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /*
+ * Adjust MTU to reflect difference between
+ * IPv6 an IPv4 headers.
+ */
+ mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
+ break;
+ case ICMP6_TIME_EXCEED_TRANSIT:
+ type = ICMP_TIMXCEED;
+ code = ICMP_TIMXCEED_INTRANS;
+ break;
+ case ICMP6_PARAM_PROB:
+ switch (icmp6->icmp6_code) {
+ case ICMP6_PARAMPROB_HEADER:
+ type = ICMP_PARAMPROB;
+ code = ICMP_PARAMPROB_ERRATPTR;
+ mtu = ntohl(icmp6->icmp6_pptr);
+ switch (mtu) {
+ case 0: /* Version/Traffic Class */
+ case 1: /* Traffic Class/Flow Label */
+ break;
+ case 4: /* Payload Length */
+ case 5:
+ mtu = 2;
+ break;
+ case 6: /* Next Header */
+ mtu = 9;
+ break;
+ case 7: /* Hop Limit */
+ mtu = 8;
+ break;
+ default:
+ if (mtu >= 8 && mtu <= 23) {
+ mtu = 12; /* Source address */
+ break;
+ }
+ if (mtu >= 24 && mtu <= 39) {
+ mtu = 16; /* Destination address */
+ break;
+ }
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d, pptr %d", icmp6->icmp6_type,
+ icmp6->icmp6_code, mtu);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ case ICMP6_PARAMPROB_NEXTHEADER:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_PROTOCOL;
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
+ " code %d, pptr %d", icmp6->icmp6_type,
+ icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ break;
+ default:
+ DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
+ icmp6->icmp6_type, icmp6->icmp6_code);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ DPRINTF(DP_DROPS, "Message is too short %d",
+ m->m_pkthdr.len);
+ return (NAT64MFREE);
+ }
+ /*
+ * We need at least ICMP_MINLEN bytes of original datagram payload
+ * to generate ICMP message. It is nice that ICMP_MINLEN is equal
+ * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
+ * header we will not have to do m_pullup() again.
+ *
+ * What we have here:
+ * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
+ * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
+ * We need to translate it to:
+ *
+ * Outer header: (alias_host, v4exthost)
+ * Inner header: (v4exthost, alias_host) [sport, alias_port]
+ *
+ * Assume caller function has checked if v4mapPRefix+v4host
+ * matches configured prefix.
+ * The only two things we should be provided with are mapping between
+ * IPv6iHost <> alias_host and between dport and alias_port.
+ */
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6i = mtodo(m, hlen);
+ ip6f = NULL;
+ proto = ip6i->ip6_nxt;
+ plen = ntohs(ip6i->ip6_plen);
+ hlen += sizeof(struct ip6_hdr);
+ if (proto == IPPROTO_FRAGMENT) {
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
+ ICMP_MINLEN)
+ goto fail;
+ ip6f = mtodo(m, hlen);
+ proto = ip6f->ip6f_nxt;
+ plen -= sizeof(struct ip6_frag);
+ hlen += sizeof(struct ip6_frag);
+ /* Ajust MTU to reflect frag header size */
+ if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
+ mtu -= sizeof(struct ip6_frag);
+ }
+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+ DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
+ proto);
+ goto fail;
+ }
+ if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6i->ip6_dst) != 0) {
+ DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
+ goto fail;
+ }
+ /* Check if outer dst is the same as inner src */
+ if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
+ DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
+ goto fail;
+ }
+
+ /* Now we need to make a fake IPv4 packet to generate ICMP message */
+ ip.ip_dst.s_addr = aaddr;
+ ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src);
+ /* XXX: Make fake ulp header */
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+ ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
+#endif
+ nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
+ m_adj(m, hlen - sizeof(struct ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata);
+ return (NAT64RETURN);
+fail:
+ /*
+ * We must call m_freem() because mbuf pointer could be
+ * changed with m_pullup().
+ */
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64RETURN);
+}
+
+int
+nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata)
+{
+ struct route ro;
+ struct ip ip;
+ struct ifnet *ifp;
+ struct ip6_frag *frag;
+ struct ip6_hdr *ip6;
+ struct icmp6_hdr *icmp6;
+ struct sockaddr *dst;
+ uint16_t *csum;
+ uint32_t mtu;
+ int plen, hlen, proto;
+
+ /*
+ * XXX: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0) {
+ return (NAT64SKIP);
+ }
+
+ /* Starting from this point we must not return zero */
+ ip.ip_src.s_addr = aaddr;
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
+ DPRINTF(DP_GENERIC, "invalid source address: %08x",
+ ip.ip_src.s_addr);
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst);
+ if (ip.ip_dst.s_addr == 0) {
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
+ nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
+ ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ hlen = 0;
+ plen = ntohs(ip6->ip6_plen);
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto < 0) {
+ DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ frag = NULL;
+ if (proto == IPPROTO_FRAGMENT) {
+ /* ipfw_chk should m_pullup up to frag header */
+ if (m->m_len < hlen + sizeof(*frag)) {
+ DPRINTF(DP_DROPS,
+ "dropped due to mbuf isn't contigious");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ frag = mtodo(m, hlen);
+ proto = frag->ip6f_nxt;
+ hlen += sizeof(*frag);
+ /* Fragmented ICMPv6 is unsupported */
+ if (proto == IPPROTO_ICMPV6) {
+ DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /* Fragment length must be multiple of 8 octets */
+ if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
+ ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
+ nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
+ ICMP6_PARAMPROB_HEADER,
+ offsetof(struct ip6_hdr, ip6_plen), stats,
+ logdata);
+ return (NAT64RETURN);
+ }
+ }
+ plen -= hlen - sizeof(struct ip6_hdr);
+ if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
+ DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
+ plen, m->m_pkthdr.len, hlen);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ icmp6 = NULL; /* Make gcc happy */
+ if (proto == IPPROTO_ICMPV6) {
+ icmp6 = mtodo(m, hlen);
+ if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
+ icmp6->icmp6_type != ICMP6_ECHO_REPLY)
+ return (nat64_handle_icmp6(m, hlen, aaddr, aport,
+ stats, logdata));
+ }
+ dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute4);
+ nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
+ ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata);
+ return (NAT64RETURN);
+ }
+
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip)) {
+ FREE_ROUTE(&ro);
+ nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats,
+ logdata);
+ return (NAT64RETURN);
+ }
+ nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (aport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_sport, aport);
+ tcp->th_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (aport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_sport, aport);
+ udp->uh_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_ICMPV6:
+ /* Checksum in ICMPv6 covers pseudo header */
+ csum = &icmp6->icmp6_cksum;
+ *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
+ IPPROTO_ICMPV6, 0));
+ /* Convert ICMPv6 types to ICMP */
+ mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
+ icmp6->icmp6_type = ICMP_ECHO;
+ else /* ICMP6_ECHO_REPLY */
+ icmp6->icmp6_type = ICMP_ECHOREPLY;
+ *csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6);
+ if (aport != 0) {
+ uint16_t old_id = icmp6->icmp6_id;
+ icmp6->icmp6_id = aport;
+ *csum = cksum_adjust(*csum, old_id, aport);
+ }
+ break;
+ };
+
+ m_adj(m, hlen - sizeof(ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0)
+ NAT64STAT_INC(stats, opcnt64);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h
new file mode 100644
index 00000000..9f653954
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64_TRANSLATE_H_
+#define _IP_FW_NAT64_TRANSLATE_H_
+
+#ifdef RTALLOC_NOLOCK
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_fib_nolock((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc_nolock((ro), (fib))
+#define FREE_ROUTE(ro)
+#else
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_ign_fib((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc((ro), (fib))
+#define FREE_ROUTE(ro) RO_RTFREE((ro))
+#endif
+
+static inline int
+nat64_check_ip6(struct in6_addr *addr)
+{
+
+ /* XXX: We should really check /8 */
+ if (addr->s6_addr16[0] == 0 || /* 0000::/8 Reserved by IETF */
+ IN6_IS_ADDR_MULTICAST(addr) || IN6_IS_ADDR_LINKLOCAL(addr))
+ return (1);
+ return (0);
+}
+
+extern int nat64_allow_private;
+static inline int
+nat64_check_private_ip4(in_addr_t ia)
+{
+
+ if (nat64_allow_private)
+ return (0);
+ /* WKPFX must not be used to represent non-global IPv4 addresses */
+// if (cfg->flags & NAT64_WKPFX) {
+ /* IN_PRIVATE */
+ if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
+ (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
+ (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
+ return (1);
+ /*
+ * RFC 5735:
+ * 192.0.0.0/24 - reserved for IETF protocol assignments
+ * 192.88.99.0/24 - for use as 6to4 relay anycast addresses
+ * 198.18.0.0/15 - for use in benchmark tests
+ * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
+ * in documentation and example code
+ */
+ if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
+ (ia & htonl(0xffffff00)) == htonl(0xcb007100))
+ return (1);
+// }
+ return (0);
+}
+
+static inline int
+nat64_check_ip4(in_addr_t ia)
+{
+
+ /* IN_LOOPBACK */
+ if ((ia & htonl(0xff000000)) == htonl(0x7f000000))
+ return (1);
+ /* IN_LINKLOCAL */
+ if ((ia & htonl(0xffff0000)) == htonl(0xa9fe0000))
+ return (1);
+ /* IN_MULTICAST & IN_EXPERIMENTAL */
+ if ((ia & htonl(0xe0000000)) == htonl(0xe0000000))
+ return (1);
+ return (0);
+}
+
+#define nat64_get_ip4(_ip6) ((_ip6)->s6_addr32[3])
+#define nat64_set_ip4(_ip6, _ip4) (_ip6)->s6_addr32[3] = (_ip4)
+
+int nat64_getlasthdr(struct mbuf *m, int *offset);
+int nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats,
+ void *logdata);
+int nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata);
+int nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats, void *logdata);
+
+#endif
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c
new file mode 100644
index 00000000..ce666213
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -0,0 +1,1772 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/pf/pf.h>
+
+MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
+
+static void nat64lsn_periodic(void *data);
+#define PERIODIC_DELAY 4
+static uint8_t nat64lsn_proto_map[256];
+uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+#define NAT64_FLAG_FIN 0x01 /* FIN was seen */
+#define NAT64_FLAG_SYN 0x02 /* First syn in->out */
+#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */
+#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
+
+#define NAT64_FLAG_RDR 0x80 /* Port redirect */
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+/*
+ * Delayed job queue, used to create new hosts
+ * and new portgroups
+ */
+enum nat64lsn_jtype {
+ JTYPE_NEWHOST = 1,
+ JTYPE_NEWPORTGROUP,
+ JTYPE_DELPORTGROUP,
+};
+
+struct nat64lsn_job_item {
+ TAILQ_ENTRY(nat64lsn_job_item) next;
+ enum nat64lsn_jtype jtype;
+ struct nat64lsn_host *nh;
+ struct nat64lsn_portgroup *pg;
+ void *spare_idx;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+ uint8_t done;
+ int needs_idx;
+ int delcount;
+ unsigned int fhash; /* Flow hash */
+ uint32_t aaddr; /* Last used address (net) */
+ struct mbuf *m;
+ struct ipfw_flow_id f_id;
+ uint64_t delmask[NAT64LSN_PGPTRNMASK];
+};
+
+static struct mtx jmtx;
+#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
+#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
+#define JQUEUE_LOCK() mtx_lock(&jmtx)
+#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
+
+static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_head *jhead, int jlen);
+
+static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, int jtype);
+static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+ int needs_idx);
+static int nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
+ struct ipfw_flow_id *f_id, struct mbuf **pm);
+
+static int alloc_portgroup(struct nat64lsn_job_item *ji);
+static void destroy_portgroup(struct nat64lsn_portgroup *pg);
+static void destroy_host6(struct nat64lsn_host *nh);
+static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+static int attach_portgroup(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+
+/* XXX tmp */
+static uma_zone_t nat64lsn_host_zone;
+static uma_zone_t nat64lsn_pg_zone;
+static uma_zone_t nat64lsn_pgidx_zone;
+
+static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_host *nh);
+
+#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16))
+#define I6_first(_ph, h) (_ph)[h]
+#define I6_next(x) (x)->next
+#define I6_val(x) (&(x)->addr)
+#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b)
+#define I6_lock(a, b)
+#define I6_unlock(a, b)
+
+#define I6HASH_FIND(_cfg, _res, _a) \
+ CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
+#define I6HASH_INSERT(_cfg, _i) \
+ CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
+#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \
+ CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
+
+#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \
+ CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
+
+#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8)
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+ unsigned int result = 0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ result = 33 * result ^ h[i];
+
+ return (result);
+}
+
+/*
+static size_t
+bitmask_size(size_t num, int *level)
+{
+ size_t x;
+ int c;
+
+ for (c = 0, x = num; num > 1; num /= 64, c++)
+ ;
+
+ return (x);
+}
+
+static void
+bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
+{
+ size_t x, z;
+
+ memset(pmask, 0xFF, bufsize);
+ for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
+ ;
+ pmask[x] ~= 0x01;
+}
+*/
+
+static void
+nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ uint32_t n, uint32_t sn)
+{
+
+ memset(plog, 0, sizeof(plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(n);
+ plog->subrulenr = htonl(sn);
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+/*
+ * Inspects icmp packets to see if the message contains different
+ * packet header so we need to alter @addr and @port.
+ */
+static int
+inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
+ uint16_t *port)
+{
+ struct ip *ip;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct icmphdr *icmp;
+ int off;
+ uint8_t proto;
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ off = (ip->ip_hl << 2) + ICMP_MINLEN;
+ if ((*m)->m_len < off)
+ *m = m_pullup(*m, off);
+ if (*m == NULL)
+ return (ENOMEM);
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ icmp = L3HDR(ip, struct icmphdr *);
+ switch (icmp->icmp_type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ /* Use icmp ID as distinguisher */
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ /*
+ * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
+ * of ULP header.
+ */
+ if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
+ return (EINVAL);
+ if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
+ *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ ip = mtodo(*m, off); /* Inner IP header */
+ proto = ip->ip_p;
+ off += ip->ip_hl << 2; /* Skip inner IP header */
+ *addr = ntohl(ip->ip_src.s_addr);
+ if ((*m)->m_len < off + ICMP_MINLEN)
+ *m = m_pullup(*m, off + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ switch (proto) {
+ case IPPROTO_TCP:
+ tcp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_TCP;
+ *port = ntohs(tcp->th_sport);
+ return (0);
+ case IPPROTO_UDP:
+ udp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_UDP;
+ *port = ntohs(udp->uh_sport);
+ return (0);
+ case IPPROTO_ICMP:
+ /*
+ * We will translate only ICMP errors for our ICMP
+ * echo requests.
+ */
+ icmp = mtodo(*m, off);
+ if (icmp->icmp_type != ICMP_ECHO)
+ return (EOPNOTSUPP);
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ };
+ return (EOPNOTSUPP);
+}
+
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+ uint8_t result;
+
+ result = flags & (TH_FIN|TH_SYN);
+ result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
+ result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
+
+ return (result);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct in6_addr src6;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ struct nat64lsn_state *st;
+ struct ip *ip;
+ uint32_t addr;
+ uint16_t state_flags, state_ts;
+ uint16_t port, lport;
+ uint8_t nat_proto;
+ int ret;
+
+ addr = f_id->dst_ip;
+ port = f_id->dst_port;
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* Check if protocol is supported and get its short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* We might need to handle icmp differently */
+ if (nat_proto == NAT_PROTO_ICMP) {
+ ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
+ if (ret != 0) {
+ if (ret == ENOMEM)
+ NAT64STAT_INC(&cfg->stats, nomem);
+ else
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+ /* XXX: Check addr for validity */
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+ }
+
+ /* Calc portgroup offset w.r.t protocol */
+ pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
+
+ /* Check if this port is occupied by any portgroup */
+ if (pg == NULL) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+#if 0
+ DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
+ _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
+#endif
+ return (cfg->nomatch_verdict);
+ }
+
+ /* TODO: Check flags to see if we need to do some static mapping */
+ nh = pg->host;
+
+ /* Prepare some fields we might need to update */
+ SET_AGE(state_ts);
+ ip = mtod(*pm, struct ip *);
+ if (ip->ip_p == IPPROTO_TCP)
+ state_flags = convert_tcp_flags(
+ L3HDR(ip, struct tcphdr *)->th_flags);
+ else
+ state_flags = 0;
+
+ /* Lock host and get port mapping */
+ NAT64_LOCK(nh);
+
+ st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != state_flags)
+ st->flags |= state_flags;
+ lport = htons(st->u.s.lport);
+
+ NAT64_UNLOCK(nh);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off);
+ } else
+ logdata = NULL;
+
+ src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0];
+ src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1];
+ src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2];
+ src6.s6_addr32[3] = htonl(f_id->src_ip);
+
+ ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
+ &cfg->stats, logdata);
+
+ if (ret == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (ret == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL;
+
+ return (IP_FW_DENY);
+}
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+ const char *px, int off)
+{
+ char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
+
+ if ((nat64_debug & DP_STATE) == 0)
+ return;
+ inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
+ inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
+ inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
+
+ DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> "
+ "%s:%d AGE %d", px, pg->idx, st, off,
+ s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
+ d, st->u.s.fport, GET_AGE(st->timestamp));
+}
+
+/*
+ * Check if particular TCP state is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static int
+nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_state *st, int age)
+{
+ int ttl;
+
+ if (st->flags & NAT64_FLAG_FIN)
+ ttl = cfg->st_close_ttl;
+ else if (st->flags & NAT64_FLAG_ESTAB)
+ ttl = cfg->st_estab_ttl;
+ else if (st->flags & NAT64_FLAG_SYN)
+ ttl = cfg->st_syn_ttl;
+ else
+ ttl = cfg->st_syn_ttl;
+
+ if (age > ttl)
+ return (1);
+ return (0);
+}
+
+/*
+ * Check if nat state @st is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static NAT64NOINLINE int
+nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
+{
+ int age, delete;
+
+ age = GET_AGE(st->timestamp);
+ delete = 0;
+
+ /* Skip immutable records */
+ if (st->flags & NAT64_FLAG_RDR)
+ return (0);
+
+ switch (pg->nat_proto) {
+ case NAT_PROTO_TCP:
+ delete = nat64lsn_periodic_check_tcp(cfg, st, age);
+ break;
+ case NAT_PROTO_UDP:
+ if (age > cfg->st_udp_ttl)
+ delete = 1;
+ break;
+ case NAT_PROTO_ICMP:
+ if (age > cfg->st_icmp_ttl)
+ delete = 1;
+ break;
+ }
+
+ return (delete);
+}
+
+
+/*
+ * The following structures and functions
+ * are used to perform SLIST_FOREACH_SAFE()
+ * analog for states identified by struct st_ptr.
+ */
+
+struct st_idx {
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ struct st_ptr sidx_next;
+};
+
+static struct st_idx *
+st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_ptr *sidx, struct st_idx *si)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ if (sidx->idx == 0) {
+ memset(si, 0, sizeof(*si));
+ return (si);
+ }
+
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
+ st = &pg->states[sidx->off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_idx *si)
+{
+ struct st_ptr sidx;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ sidx = si->sidx_next;
+ if (sidx.idx == 0) {
+ memset(si, 0, sizeof(*si));
+ si->st = NULL;
+ si->pg = NULL;
+ return (si);
+ }
+
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_save_cond(struct st_idx *si_dst, struct st_idx *si)
+{
+ if (si->st != NULL)
+ *si_dst = *si;
+
+ return (si_dst);
+}
+
+unsigned int
+nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
+{
+ struct st_idx si, si_prev;
+ int i;
+ unsigned int delcount;
+
+ delcount = 0;
+ for (i = 0; i < nh->hsize; i++) {
+ memset(&si_prev, 0, sizeof(si_prev));
+ for (st_first(cfg, nh, &nh->phash[i], &si);
+ si.st != NULL;
+ st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
+ if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
+ continue;
+ nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
+ si.st->cur.off);
+ /* Unlink from hash */
+ if (si_prev.st != NULL)
+ si_prev.st->next = si.st->next;
+ else
+ nh->phash[i] = si.st->next;
+ /* Delete state and free its data */
+ PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
+ memset(si.st, 0, sizeof(struct nat64lsn_state));
+ si.st = NULL;
+ delcount++;
+
+ /* Update portgroup timestamp */
+ SET_AGE(si.pg->timestamp);
+ }
+ }
+ NAT64STAT_ADD(&cfg->stats, sdeleted, delcount);
+ return (delcount);
+}
+
+/*
+ * Checks if portgroup is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
+{
+
+ if (!PG_IS_EMPTY(pg))
+ return (0);
+ if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
+ return (0);
+ return (1);
+}
+
+/*
+ * Checks if host record is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
+{
+
+ if (nh->pg_used != 0)
+ return (0);
+ if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
+ return (0);
+ return (1);
+}
+
+struct nat64lsn_periodic_data {
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+};
+
+static NAT64NOINLINE int
+nat64lsn_periodic_chkhost(struct nat64lsn_host *nh,
+ struct nat64lsn_periodic_data *d)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_job_item *ji;
+ uint64_t delmask[NAT64LSN_PGPTRNMASK];
+ int delcount, i;
+
+ delcount = 0;
+ memset(delmask, 0, sizeof(delmask));
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d",
+ stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
+ if (!stale_nh(d->cfg, nh)) {
+ /* Non-stale host. Inspect internals */
+ NAT64_LOCK(nh);
+
+ /* Stage 1: Check&expire states */
+ if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
+ SET_AGE(nh->timestamp);
+
+ /* Stage 2: Check if we need to expire */
+ for (i = 0; i < nh->pg_used; i++) {
+ pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+
+ /* Check if we can delete portgroup */
+ if (stale_pg(d->cfg, pg) == 0)
+ continue;
+
+ DPRINTF(DP_JQUEUE, "Check PG %d", i);
+ delmask[i / 64] |= ((uint64_t)1 << (i % 64));
+ delcount++;
+ }
+
+ NAT64_UNLOCK(nh);
+ if (delcount == 0)
+ return (0);
+ }
+
+ DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount);
+ /* We have something to delete - add it to queue */
+ ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
+ if (ji == NULL)
+ return (0);
+
+ ji->haddr = nh->addr;
+ ji->delcount = delcount;
+ memcpy(ji->delmask, delmask, sizeof(ji->delmask));
+
+ TAILQ_INSERT_TAIL(&d->jhead, ji, next);
+ d->jlen++;
+ return (0);
+}
+
+/*
+ * This procedure is used to perform various maintance
+ * on dynamic hash list. Currently it is called every second.
+ */
+static void
+nat64lsn_periodic(void *data)
+{
+ struct ip_fw_chain *ch;
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_periodic_data d;
+ struct nat64lsn_host *nh, *tmp;
+
+ cfg = (struct nat64lsn_cfg *) data;
+ ch = cfg->ch;
+ CURVNET_SET(cfg->vp);
+
+ memset(&d, 0, sizeof(d));
+ d.cfg = cfg;
+ TAILQ_INIT(&d.jhead);
+
+ IPFW_RLOCK(ch);
+
+ /* Stage 1: foreach host, check all its portgroups */
+ I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
+
+ /* Enqueue everything we have requested */
+ nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
+
+ callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
+
+ IPFW_RUNLOCK(ch);
+
+ CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE void
+reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji->m == NULL)
+ return;
+
+ /* Request has failed or packet type is wrong */
+ if (ji->f_id.addr_type != 6 || ji->done == 0) {
+ m_freem(ji->m);
+ ji->m = NULL;
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d",
+ ji->jtype, ji->done);
+ return;
+ }
+
+ /*
+ * XXX: Limit recursion level
+ */
+
+ NAT64STAT_INC(&cfg->stats, jreinjected);
+ DPRINTF(DP_JQUEUE, "Reinject mbuf");
+ nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
+}
+
+static void
+destroy_portgroup(struct nat64lsn_portgroup *pg)
+{
+
+ DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg);
+ uma_zfree(nat64lsn_pg_zone, pg);
+}
+
+static NAT64NOINLINE int
+alloc_portgroup(struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
+ if (pg == NULL)
+ return (1);
+
+ if (ji->needs_idx != 0) {
+ ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+ /* Failed alloc isn't always fatal, so don't check */
+ }
+ memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
+ pg->nat_proto = ji->nat_proto;
+ ji->pg = pg;
+ return (0);
+
+}
+
+static void
+destroy_host6(struct nat64lsn_host *nh)
+{
+ char a[INET6_ADDRSTRLEN];
+ int i;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh,
+ nh->pg_used);
+ NAT64_LOCK_DESTROY(nh);
+ for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++)
+ uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i));
+ uma_zfree(nat64lsn_host_zone, nh);
+}
+
+static NAT64NOINLINE int
+alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh;
+ char a[INET6_ADDRSTRLEN];
+
+ nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
+ if (nh == NULL)
+ return (1);
+ PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT);
+ if (PORTGROUP_CHUNK(nh, 0) == NULL) {
+ uma_zfree(nat64lsn_host_zone, nh);
+ return (2);
+ }
+ if (alloc_portgroup(ji) != 0) {
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0));
+ uma_zfree(nat64lsn_host_zone, nh);
+ return (3);
+ }
+
+ NAT64_LOCK_INIT(nh);
+ nh->addr = ji->haddr;
+ nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */
+ nh->pg_allocated = NAT64LSN_PGIDX_CHUNK;
+ nh->pg_used = 0;
+ ji->nh = nh;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh);
+ return (0);
+}
+
+/*
+ * Finds free @pg index inside @nh
+ */
+static NAT64NOINLINE int
+find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx)
+{
+ int i;
+
+ for (i = 0; i < nh->pg_allocated; i++) {
+ if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) {
+ *idx = i;
+ return (0);
+ }
+ }
+ return (1);
+}
+
+static NAT64NOINLINE int
+attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_host *nh;
+
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL) {
+ /* Add new host to list */
+ nh = ji->nh;
+ I6HASH_INSERT(cfg, nh);
+ cfg->ihcount++;
+ ji->nh = NULL;
+
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh);
+ /*
+ * Try to add portgroup.
+ * Note it will automatically set
+ * 'done' on ji if successful.
+ */
+ if (attach_portgroup(cfg, ji) != 0) {
+ DPRINTF(DP_DROPS, "%s %p failed to attach PG",
+ a, nh);
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ return (1);
+ }
+ return (0);
+ }
+
+ /*
+ * nh isn't NULL. This probably means we had several simultaneous
+ * host requests. The previous one request has already attached
+ * this host. Requeue attached mbuf and mark job as done, but
+ * leave nh and pg pointers not changed, so nat64lsn_do_request()
+ * will release all allocated resources.
+ */
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ, "%s %p is already attached as %p",
+ a, ji->nh, nh);
+ ji->done = 1;
+ return (0);
+}
+
+static NAT64NOINLINE int
+find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
+ int nat_proto, uint16_t *aport, int *ppg_idx)
+{
+ int j, pg_idx;
+
+ pg_idx = addr_off * _ADDR_PG_COUNT +
+ (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
+
+ for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
+ if (cfg->pg[pg_idx + j] != NULL)
+ continue;
+
+ *aport = j * NAT64_CHUNK_SIZE;
+ *ppg_idx = pg_idx + j;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * XXX: This function needs to be rewritten to
+ * use free bitmask for faster pg finding,
+ * additionally, it should take into consideration
+ * a) randomization and
+ * b) previous addresses allocated to given nat instance
+ *
+ */
+static NAT64NOINLINE int
+find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
+ uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
+{
+ int i, nat_proto;
+
+ /*
+ * XXX: Use bitmask index to be able to find/check if IP address
+ * has some spare pg's
+ */
+ nat_proto = ji->nat_proto;
+
+ /* First, try to use same address */
+ if (ji->aaddr != 0) {
+ i = ntohl(ji->aaddr) - cfg->prefix4;
+ if (find_pg_place_addr(cfg, i, nat_proto, aport,
+ ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ /* Next, try to use random address based on flow hash */
+ i = ji->fhash % (1 << (32 - cfg->plen4));
+ if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+
+
+ /* Last one: simply find ANY available */
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
+ if (find_pg_place_addr(cfg, i, nat_proto, aport,
+ ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ return (1);
+}
+
+static NAT64NOINLINE int
+attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ uint32_t aaddr;
+ uint16_t aport;
+ int nh_pg_idx, pg_idx;
+
+ pg = ji->pg;
+
+ /*
+ * Find source host and bind: we can't rely on
+ * pg->host
+ */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL)
+ return (1);
+
+ /* Find spare port chunk */
+ if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) {
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a);
+ return (2);
+ }
+
+ /* Expand PG indexes if needed */
+ if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) {
+ PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) =
+ ji->spare_idx;
+ nh->pg_allocated += NAT64LSN_PGIDX_CHUNK;
+ ji->spare_idx = NULL;
+ }
+
+ /* Find empty index to store PG in the @nh */
+ if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) {
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s",
+ a);
+ return (3);
+ }
+
+ cfg->pg[pg_idx] = pg;
+ cfg->protochunks[pg->nat_proto]++;
+ NAT64STAT_INC(&cfg->stats, spgcreated);
+
+ pg->aaddr = aaddr;
+ pg->aport = aport;
+ pg->host = nh;
+ pg->idx = pg_idx;
+ SET_AGE(pg->timestamp);
+
+ PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg;
+ if (nh->pg_used == nh_pg_idx)
+ nh->pg_used++;
+ SET_AGE(nh->timestamp);
+
+ ji->pg = NULL;
+ ji->done = 1;
+
+ return (0);
+}
+
+static NAT64NOINLINE void
+consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh, *nh_tmp;
+ struct nat64lsn_portgroup *pg, *pg_list[256];
+ int i, pg_lidx, idx;
+
+ /* Find source host */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL || nh->pg_used == 0)
+ return;
+
+ memset(pg_list, 0, sizeof(pg_list));
+ pg_lidx = 0;
+
+ NAT64_LOCK(nh);
+
+ for (i = nh->pg_used - 1; i >= 0; i--) {
+ if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0)
+ continue;
+ pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+
+ /* Check that PG isn't busy. */
+ if (stale_pg(cfg, pg) == 0)
+ continue;
+
+ /* DO delete */
+ pg_list[pg_lidx++] = pg;
+ PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL;
+
+ idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
+ pg->aport);
+ KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
+ cfg->pg[idx] = NULL;
+ cfg->protochunks[pg->nat_proto]--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+
+ /* Decrease pg_used */
+ while (nh->pg_used > 0 &&
+ PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL)
+ nh->pg_used--;
+
+ /* Check if on-stack buffer has ended */
+ if (pg_lidx == nitems(pg_list))
+ break;
+ }
+
+ NAT64_UNLOCK(nh);
+
+ if (stale_nh(cfg, nh)) {
+ I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
+ KASSERT(nh != NULL, ("Unable to find address"));
+ cfg->ihcount--;
+ ji->nh = nh;
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ KASSERT(nh == NULL, ("Failed to delete address"));
+ }
+
+ /* TODO: Delay freeing portgroups */
+ while (pg_lidx > 0) {
+ pg_lidx--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+ destroy_portgroup(pg_list[pg_lidx]);
+ }
+}
+
+/*
+ * Main request handler.
+ * Responsible for handling jqueue, e.g.
+ * creating new hosts, addind/deleting portgroups.
+ */
+static NAT64NOINLINE void
+nat64lsn_do_request(void *data)
+{
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_job_head jhead;
+ struct nat64lsn_job_item *ji;
+ int jcount, nhsize;
+ struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
+ struct ip_fw_chain *ch;
+ int delcount;
+
+ CURVNET_SET(cfg->vp);
+
+ TAILQ_INIT(&jhead);
+
+ /* XXX: We're running unlocked here */
+
+ ch = cfg->ch;
+ delcount = 0;
+ IPFW_RLOCK(ch);
+
+ /* Grab queue */
+ JQUEUE_LOCK();
+ TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
+ jcount = cfg->jlen;
+ cfg->jlen = 0;
+ JQUEUE_UNLOCK();
+
+ /* check if we need to resize hash */
+ nhsize = 0;
+ if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
+ ;
+ } else if (cfg->ihcount < cfg->ihsize * 4) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
+ ;
+ }
+
+ IPFW_RUNLOCK(ch);
+
+ if (TAILQ_EMPTY(&jhead)) {
+ CURVNET_RESTORE();
+ return;
+ }
+
+ NAT64STAT_INC(&cfg->stats, jcalls);
+ DPRINTF(DP_JQUEUE, "count=%d", jcount);
+
+ /*
+ * TODO:
+ * What we should do here is to build a hash
+ * to ensure we don't have lots of duplicate requests.
+ * Skip this for now.
+ *
+ * TODO: Limit per-call number of items
+ */
+
+ /* Pre-allocate everything for entire chain */
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (alloc_host6(cfg, ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jhostfails);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (alloc_portgroup(ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ break;
+ case JTYPE_DELPORTGROUP:
+ delcount += ji->delcount;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * TODO: Alloc hew hash
+ */
+ nhsize = 0;
+ if (nhsize > 0) {
+ /* XXX: */
+ }
+
+ /* Apply all changes in batch */
+ IPFW_UH_WLOCK(ch);
+ IPFW_WLOCK(ch);
+
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (ji->nh != NULL)
+ attach_host6(cfg, ji);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (ji->pg != NULL &&
+ attach_portgroup(cfg, ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ break;
+ case JTYPE_DELPORTGROUP:
+ consider_del_portgroup(cfg, ji);
+ break;
+ }
+ }
+
+ if (nhsize > 0) {
+ /* XXX: Move everything to new hash */
+ }
+
+ IPFW_WUNLOCK(ch);
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Flush unused entries */
+ while (!TAILQ_EMPTY(&jhead)) {
+ ji = TAILQ_FIRST(&jhead);
+ TAILQ_REMOVE(&jhead, ji, next);
+ if (ji->nh != NULL)
+ destroy_host6(ji->nh);
+ if (ji->pg != NULL)
+ destroy_portgroup(ji->pg);
+ if (ji->m != NULL)
+ reinject_mbuf(cfg, ji);
+ if (ji->spare_idx != NULL)
+ uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx);
+ free(ji, M_IPFW);
+ }
+ CURVNET_RESTORE();
+}
+
+static NAT64NOINLINE struct nat64lsn_job_item *
+nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ int jtype)
+{
+ struct nat64lsn_job_item *ji;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+
+ /*
+ * Do not try to lock possibly contested mutex if we're near the limit.
+ * Drop packet instead.
+ */
+ if (cfg->jlen >= cfg->jmaxlen) {
+ NAT64STAT_INC(&cfg->stats, jmaxlen);
+ return (NULL);
+ }
+
+ memset(&haddr, 0, sizeof(haddr));
+ nat_proto = 0;
+ if (f_id != NULL) {
+ haddr = f_id->src_ip6;
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+
+ DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d",
+ nat_proto, f_id->proto);
+
+ if (nat_proto == 0)
+ return (NULL);
+ }
+
+ ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW,
+ M_NOWAIT | M_ZERO);
+
+ if (ji == NULL) {
+ NAT64STAT_INC(&cfg->stats, jnomem);
+ return (NULL);
+ }
+
+ ji->jtype = jtype;
+
+ if (f_id != NULL) {
+ ji->f_id = *f_id;
+ ji->haddr = haddr;
+ ji->nat_proto = nat_proto;
+ }
+
+ return (ji);
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji == NULL)
+ return;
+
+ JQUEUE_LOCK();
+ TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
+ cfg->jlen++;
+ NAT64STAT_INC(&cfg->stats, jrequests);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static NAT64NOINLINE void
+nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_head *jhead, int jlen)
+{
+
+ if (TAILQ_EMPTY(jhead))
+ return;
+
+ /* Attach current queue to execution one */
+ JQUEUE_LOCK();
+ TAILQ_CONCAT(&cfg->jhead, jhead, next);
+ cfg->jlen += jlen;
+ NAT64STAT_ADD(&cfg->stats, jrequests, jlen);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static unsigned int
+flow6_hash(const struct ipfw_flow_id *f_id)
+{
+ unsigned char hbuf[36];
+
+ memcpy(hbuf, &f_id->dst_ip6, 16);
+ memcpy(&hbuf[16], &f_id->src_ip6, 16);
+ memcpy(&hbuf[32], &f_id->dst_port, 2);
+ memcpy(&hbuf[32], &f_id->src_port, 2);
+
+ return (djb_hash(hbuf, sizeof(hbuf)));
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_host(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST);
+ if (ji == NULL) {
+ m_freem(m);
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jhostsreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE int
+nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr,
+ int needs_idx)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
+ if (ji == NULL) {
+ m_freem(m);
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "failed to create job");
+ } else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ ji->aaddr = aaddr;
+ ji->needs_idx = needs_idx;
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jportreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static NAT64NOINLINE struct nat64lsn_state *
+nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
+ int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ int i, hval, off;
+
+ /* XXX: create additional bitmask for selecting proper portgroup */
+ for (i = 0; i < nh->pg_used; i++) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+ if (*aaddr == 0)
+ *aaddr = pg->aaddr;
+ if (pg->nat_proto != nat_proto)
+ continue;
+
+ off = PG_GET_FREE_IDX(pg);
+ if (off != 0) {
+ /* We have found spare state. Use it */
+ off--;
+ PG_MARK_BUSY_IDX(pg, off);
+ st = &pg->states[off];
+
+ /*
+ * Fill in new info. Assume state was zeroed.
+ * Timestamp and flags will be filled by caller.
+ */
+ st->u.s = kst->u.s;
+ st->cur.idx = i + 1;
+ st->cur.off = off;
+
+ /* Insert into host hash table */
+ hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
+ st->next = nh->phash[hval];
+ nh->phash[hval] = st->cur;
+
+ nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
+
+ NAT64STAT_INC(&cfg->stats, screated);
+
+ return (st);
+ }
+ /* Saev last used alias affress */
+ *aaddr = pg->aaddr;
+ }
+
+ return (NULL);
+}
+
+static NAT64NOINLINE int
+nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct pfloghdr loghdr, *logdata;
+ char a[INET6_ADDRSTRLEN];
+ struct nat64lsn_host *nh;
+ struct st_ptr sidx;
+ struct nat64lsn_state *st, kst;
+ struct nat64lsn_portgroup *pg;
+ struct icmp6_hdr *icmp6;
+ uint32_t aaddr;
+ int action, hval, nat_proto, proto;
+ uint16_t aport, state_ts, state_flags;
+
+ /* Check if af/protocol is supported and get it short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ /*
+ * Since we can be called from jobs handler, we need
+ * to free mbuf by self, do not leave this task to
+ * ipfw_check_packet().
+ */
+ NAT64STAT_INC(&cfg->stats, noproto);
+ m_freem(*pm);
+ *pm = NULL;
+ return (IP_FW_DENY);
+ }
+
+ /* Try to find host first */
+ I6HASH_FIND(cfg, nh, &f_id->src_ip6);
+
+ if (nh == NULL)
+ return (nat64lsn_request_host(cfg, f_id, pm));
+
+ /* Fill-in on-stack state structure */
+ kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3];
+ kst.u.s.fport = f_id->dst_port;
+ kst.u.s.lport = f_id->src_port;
+
+ /* Prepare some fields we might need to update */
+ hval = 0;
+ proto = nat64_getlasthdr(*pm, &hval);
+ if (proto < 0) {
+ NAT64STAT_INC(&cfg->stats, dropped);
+ DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
+ m_freem(*pm);
+ *pm = NULL;
+ return (IP_FW_DENY);
+ }
+
+ SET_AGE(state_ts);
+ if (proto == IPPROTO_TCP)
+ state_flags = convert_tcp_flags(
+ TCP(mtodo(*pm, hval))->th_flags);
+ else
+ state_flags = 0;
+ if (proto == IPPROTO_ICMPV6) {
+ /* Alter local port data */
+ icmp6 = mtodo(*pm, hval);
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+ kst.u.s.lport = ntohs(icmp6->icmp6_id);
+ }
+
+ hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
+ pg = NULL;
+ st = NULL;
+
+ /* OK, let's find state in host hash */
+ NAT64_LOCK(nh);
+ sidx = nh->phash[hval];
+ int k = 0;
+ while (sidx.idx != 0) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+ //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
+ //st->next.idx, st->next.off);
+ if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
+ break;
+ if (k++ > 1000) {
+ DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n",
+ sidx.idx, sidx.off, st->next.idx, st->next.off);
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d",
+ a, nh, curcpu);
+ k = 0;
+ }
+ sidx = st->next;
+ }
+
+ if (sidx.idx == 0) {
+ aaddr = 0;
+ st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
+ if (st == NULL) {
+ /* No free states. Request more if we can */
+ if (nh->pg_used >= cfg->max_chunks) {
+ /* Limit reached */
+ NAT64STAT_INC(&cfg->stats, dropped);
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DPRINTF(DP_DROPS, "PG limit reached "
+ " for host %s (used %u, allocated %u, "
+ "limit %u)", a,
+ nh->pg_used * NAT64_CHUNK_SIZE,
+ nh->pg_allocated * NAT64_CHUNK_SIZE,
+ cfg->max_chunks * NAT64_CHUNK_SIZE);
+ m_freem(*pm);
+ *pm = NULL;
+ NAT64_UNLOCK(nh);
+ return (IP_FW_DENY);
+ }
+ if ((nh->pg_allocated <=
+ nh->pg_used + NAT64LSN_REMAININGPG) &&
+ nh->pg_allocated < cfg->max_chunks)
+ action = 1; /* Request new indexes */
+ else
+ action = 0;
+ NAT64_UNLOCK(nh);
+ //DPRINTF("No state, unlock for %p", nh);
+ return (nat64lsn_request_portgroup(cfg, f_id,
+ pm, aaddr, action));
+ }
+
+ /* We've got new state. */
+ sidx = st->cur;
+ pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ }
+
+ /* Okay, state found */
+
+ /* Update necessary fileds */
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != 0)
+ st->flags |= state_flags;
+
+ /* Copy needed state data */
+ aaddr = pg->aaddr;
+ aport = htons(pg->aport + sidx.off);
+
+ NAT64_UNLOCK(nh);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off);
+ } else
+ logdata = NULL;
+
+ action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats, logdata);
+ if (action == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (action == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL; /* mark mbuf as consumed */
+ return (IP_FW_DENY);
+}
+
+/*
+ * Main dataplane entry point.
+ */
+int
+ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64lsn_cfg *cfg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(ch);
+
+ *done = 1; /* terminate the search */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64lsn_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
+ break;
+ case 6:
+ ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
+ break;
+ default:
+ return (0);
+ }
+ return (ret);
+}
+
+static int
+nat64lsn_ctor_host(void *mem, int size, void *arg, int flags)
+{
+ struct nat64lsn_host *nh;
+
+ nh = (struct nat64lsn_host *)mem;
+ memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr));
+ memset(nh->phash, 0, sizeof(nh->phash));
+ return (0);
+}
+
+static int
+nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags)
+{
+
+ memset(mem, 0, size);
+ return (0);
+}
+
+void
+nat64lsn_init_internal(void)
+{
+
+ memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
+ /* Set up supported protocol map */
+ nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
+ nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
+ nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
+ nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
+ /* Fill in reverse proto map */
+ memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
+ nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
+ nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
+ nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
+
+ JQUEUE_LOCK_INIT();
+ nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
+ sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+ nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone",
+ sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone",
+ sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK,
+ nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+
+void
+nat64lsn_uninit_internal(void)
+{
+
+ JQUEUE_LOCK_DESTROY();
+ uma_zdestroy(nat64lsn_host_zone);
+ uma_zdestroy(nat64lsn_pg_zone);
+ uma_zdestroy(nat64lsn_pgidx_zone);
+}
+
+void
+nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
+{
+
+ callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
+ nat64lsn_periodic, cfg);
+}
+
+struct nat64lsn_cfg *
+nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ TAILQ_INIT(&cfg->jhead);
+ cfg->vp = curvnet;
+ cfg->ch = ch;
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+
+ cfg->ihsize = NAT64LSN_HSIZE;
+ cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ callout_init(&cfg->periodic, CALLOUT_MPSAFE);
+ callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
+
+ return (cfg);
+}
+
+/*
+ * Destroy all hosts callback.
+ * Called on module unload when all activity already finished, so
+ * can work without any locks.
+ */
+static NAT64NOINLINE int
+nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_portgroup *pg;
+ int i;
+
+ for (i = nh->pg_used; i > 0; i--) {
+ pg = PORTGROUP_BYSIDX(cfg, nh, i);
+ if (pg == NULL)
+ continue;
+ cfg->pg[pg->idx] = NULL;
+ destroy_portgroup(pg);
+ nh->pg_used--;
+ }
+ destroy_host6(nh);
+ cfg->ihcount--;
+ return (0);
+}
+
+void
+nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
+{
+ struct nat64lsn_host *nh, *tmp;
+
+ JQUEUE_LOCK();
+ callout_drain(&cfg->jcallout);
+ JQUEUE_UNLOCK();
+
+ callout_drain(&cfg->periodic);
+ I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg);
+ DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount);
+
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+ free(cfg->ih, M_IPFW);
+ free(cfg->pg, M_IPFW);
+ free(cfg, M_IPFW);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h
new file mode 100644
index 00000000..e6ceb1dd
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -0,0 +1,351 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64LSN_H_
+#define _IP_FW_NAT64LSN_H_
+
+#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
+#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
+
+#define NAT64_MIN_PORT 1024
+#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
+
+struct st_ptr {
+ uint8_t idx; /* index in nh->pg_ptr array.
+ * NOTE: it starts from 1.
+ */
+ uint8_t off;
+};
+#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
+#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
+#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
+ NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
+
+struct nat64lsn_portgroup;
+/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
+struct nat64lsn_host {
+ struct rwlock h_lock; /* Host states lock */
+
+ struct in6_addr addr;
+ struct nat64lsn_host *next;
+ uint16_t timestamp; /* Last altered */
+ uint16_t hsize; /* ports hash size */
+ uint16_t pg_used; /* Number of portgroups used */
+#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before
+ * requesting of new chunk of indexes.
+ */
+ uint16_t pg_allocated; /* Number of portgroups indexes
+ * allocated.
+ */
+#define NAT64LSN_HSIZE 64
+ struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
+ /*
+ * PG indexes are stored in chunks with 32 elements.
+ * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
+ */
+#define NAT64LSN_PGIDX_CHUNK 32
+#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \
+ NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
+ struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
+};
+
+#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
+#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
+
+#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
+#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
+#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
+#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
+#define NAT64_LOCK(h) NAT64_WLOCK(h)
+#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
+#define NAT64_LOCK_INIT(h) do { \
+ rw_init(&(h)->h_lock, "NAT64 host lock"); \
+ } while (0)
+
+#define NAT64_LOCK_DESTROY(h) do { \
+ rw_destroy(&(h)->h_lock); \
+ } while (0)
+
+/* Internal proto index */
+#define NAT_PROTO_TCP 1
+#define NAT_PROTO_UDP 2
+#define NAT_PROTO_ICMP 3
+
+#define NAT_MAX_PROTO 4
+extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+VNET_DECLARE(uint16_t, nat64lsn_eid);
+#define V_nat64lsn_eid VNET(nat64lsn_eid)
+#define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
+
+/* Timestamp macro */
+#define _CT ((int)time_uptime % 65536)
+#define SET_AGE(x) (x) = _CT
+#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
+ (int)65536 + _CT - (x))
+
+#ifdef __LP64__
+/* ffsl() is capable of checking 64-bit ints */
+#define _FFS64
+#endif
+
+/* 16 bytes */
+struct nat64lsn_state {
+ union {
+ struct {
+ in_addr_t faddr; /* Remote IPv4 address */
+ uint16_t fport; /* Remote IPv4 port */
+ uint16_t lport; /* Local IPv6 port */
+ }s;
+ uint64_t hkey;
+ } u;
+ uint8_t nat_proto;
+ uint8_t flags;
+ uint16_t timestamp;
+ struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
+ struct st_ptr next; /* Next entry index */
+};
+
+/*
+ * 1024+32 bytes per 64 states, used to store state
+ * AND for outside-in state lookup
+ */
+struct nat64lsn_portgroup {
+ struct nat64lsn_host *host; /* IPv6 source host info */
+ in_addr_t aaddr; /* Alias addr, network format */
+ uint16_t aport; /* Base port */
+ uint16_t timestamp;
+ uint8_t nat_proto;
+ uint8_t spare[3];
+ uint32_t idx;
+#ifdef _FFS64
+ uint64_t freemask; /* Mask of free entries */
+#else
+ uint32_t freemask[2]; /* Mask of free entries */
+#endif
+ struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
+};
+#ifdef _FFS64
+#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
+#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
+#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
+#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
+#else
+#define PG_MARK_BUSY_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
+#define PG_MARK_FREE_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
+#define PG_IS_FREE_IDX(_pg, _idx) \
+ ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
+#define PG_IS_EMPTY(_pg) \
+ ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
+
+static inline int
+_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
+{
+ int i;
+
+ if ((i = ffsl(pg->freemask[0])) != 0)
+ return (i);
+ if ((i = ffsl(pg->freemask[1])) != 0)
+ return (i + 32);
+ return (0);
+}
+
+#endif
+
+TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
+
+#define NAT64LSN_FLAGSMASK (NAT64_LOG)
+struct nat64lsn_cfg {
+ struct named_object no;
+ //struct nat64_exthost *ex; /* Pointer to external addr array */
+ struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
+ struct nat64lsn_host **ih; /* Host hash */
+ uint32_t prefix4; /* IPv4 prefix */
+ uint32_t pmask4; /* IPv4 prefix mask */
+ uint32_t ihsize; /* IPv6 host hash size */
+ uint8_t plen4;
+ uint8_t plen6;
+ uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
+ uint8_t nomatch_final; /* Exit outer loop? */
+ struct in6_addr prefix6; /* IPv6 prefix to embed IPv4 hosts */
+
+ uint32_t ihcount; /* Number of items in host hash */
+ int max_chunks; /* Max chunks per client */
+ int agg_prefix_len; /* Prefix length to count */
+ int agg_prefix_max; /* Max hosts per agg prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint32_t flags;
+ uint16_t min_chunk; /* Min port group # to use */
+ uint16_t max_chunk; /* Max port group # to use */
+ uint16_t nh_delete_delay; /* Stale host delete delay */
+ uint16_t pg_delete_delay; /* Stale portgroup del delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
+
+ struct callout periodic;
+ struct callout jcallout;
+ struct ip_fw_chain *ch;
+ struct vnet *vp;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+ char name[64]; /* Nat instance name */
+ nat64_stats_block stats;
+};
+
+struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
+ size_t numaddr);
+void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_init_internal(void);
+void nat64lsn_uninit_internal(void);
+int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+ const char *px, int off);
+/*
+ * Portgroup layout
+ * addr x nat_proto x port_off
+ *
+ */
+
+#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
+#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
+
+#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
+#define __GET_PORTGROUP_IDX(_proto, _port) \
+ ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
+ ((_port) >> NAT64_CHUNK_SIZE_BITS))
+
+#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
+ GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
+ __GET_PORTGROUP_IDX(_proto, _port)
+#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
+ ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
+
+#define PORTGROUP_CHUNK(_nh, _idx) \
+ ((_nh)->pg_ptr[(_idx)])
+#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
+ (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
+ [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
+
+
+/* Chained hash table */
+#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ for ( ; _x != NULL; _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x == NULL) \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
+ _PX##unlock(_ph, _buck);
+
+#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
+ unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _PX##next(_i) = _PX##first(_ph, _buck); \
+ _PX##first(_ph, _buck) = _i; \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ _tmp = NULL; \
+ for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x != NULL) { \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _buck) = _PX##next(_x); \
+ else \
+ _PX##next(_tmp) = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _PX##lock(_ph, _i); \
+ _x = _PX##first(_ph, _i); \
+ _tmp = NULL; \
+ for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_cb(_x, _arg) == 0) \
+ continue; \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _i) = _PX##next(_x); \
+ else \
+ _tmp = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _i); \
+ } \
+} while(0)
+
+#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
+ unsigned int _buck; \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _x = _PX##first(_ph, _i); \
+ _y = _x; \
+ while (_y != NULL) { \
+ _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
+ _y = _PX##next(_x); \
+ _PX##next(_x) = _PX##first(_nph, _buck); \
+ _PX##first(_nph, _buck) = _x; \
+ } \
+ } \
+} while(0)
+
+#endif /* _IP_FW_NAT64LSN_H_ */
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c
new file mode 100644
index 00000000..a20a52ea
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -0,0 +1,919 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
+
+static struct nat64lsn_cfg *
+nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64LSN_NAME, name);
+
+ return (cfg);
+}
+
+static void
+nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
+{
+
+ if (uc->max_ports == 0)
+ uc->max_ports = NAT64LSN_MAX_PORTS;
+ else
+ uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE);
+ if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR)
+ uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR;
+ if (uc->jmaxlen == 0)
+ uc->jmaxlen = NAT64LSN_JMAXLEN;
+ if (uc->jmaxlen > 65536)
+ uc->jmaxlen = 65536;
+ if (uc->nh_delete_delay == 0)
+ uc->nh_delete_delay = NAT64LSN_HOST_AGE;
+ if (uc->pg_delete_delay == 0)
+ uc->pg_delete_delay = NAT64LSN_PG_AGE;
+ if (uc->st_syn_ttl == 0)
+ uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
+ if (uc->st_close_ttl == 0)
+ uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
+ if (uc->st_estab_ttl == 0)
+ uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
+ if (uc->st_udp_ttl == 0)
+ uc->st_udp_ttl = NAT64LSN_UDP_AGE;
+ if (uc->st_icmp_ttl == 0)
+ uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
+}
+
+/*
+ * Creates new nat64lsn instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64lsn_cfg *uc;
+ struct nat64lsn_cfg *cfg;
+ struct namedobj_instance *ni;
+ uint32_t addr4, mask4;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64lsn_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+
+ if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ if (uc->plen4 > 32)
+ return (EINVAL);
+ if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0))
+ return (EINVAL);
+
+ /* XXX: Check prefix4 to be global */
+ addr4 = ntohl(uc->prefix4.s_addr);
+ mask4 = ~((1 << (32 - uc->plen4)) - 1);
+ if ((addr4 & mask4) != addr4)
+ return (EINVAL);
+
+ /* XXX: Check prefix6 */
+ if (uc->min_port == 0)
+ uc->min_port = NAT64_MIN_PORT;
+ if (uc->max_port == 0)
+ uc->max_port = 65535;
+ if (uc->min_port > uc->max_port)
+ return (EINVAL);
+ uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE);
+ uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE);
+
+ nat64lsn_default_config(uc);
+
+ ni = CHAIN_TO_SRV(ch);
+ IPFW_UH_RLOCK(ch);
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
+ strlcpy(cfg->name, uc->name, sizeof(cfg->name));
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
+ cfg->no.set = uc->set;
+
+ cfg->prefix4 = addr4;
+ cfg->pmask4 = addr4 | ~mask4;
+ /* XXX: Copy 96 bits */
+ cfg->plen6 = 96;
+ memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8);
+ cfg->plen4 = uc->plen4;
+ cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->agg_prefix_len = uc->agg_prefix_len;
+ cfg->agg_prefix_max = uc->agg_prefix_max;
+
+ cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;
+ cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
+
+ cfg->jmaxlen = uc->jmaxlen;
+ cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->pg_delete_delay = uc->pg_delete_delay;
+ cfg->st_syn_ttl = uc->st_syn_ttl;
+ cfg->st_close_ttl = uc->st_close_ttl;
+ cfg->st_estab_ttl = uc->st_estab_ttl;
+ cfg->st_udp_ttl = uc->st_udp_ttl;
+ cfg->st_icmp_ttl = uc->st_icmp_ttl;
+
+ cfg->nomatch_verdict = IP_FW_DENY;
+ cfg->nomatch_final = 1; /* Exit outer loop by default */
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (EEXIST);
+ }
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (ENOSPC);
+ }
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_start_instance(cfg);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static void
+nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)op3;
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ struct ipfw_nat64lsn_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, nomatch4);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+
+ __COPY_STAT_FIELD(cfg, stats, jcalls);
+ __COPY_STAT_FIELD(cfg, stats, jrequests);
+ __COPY_STAT_FIELD(cfg, stats, jhostsreq);
+ __COPY_STAT_FIELD(cfg, stats, jportreq);
+ __COPY_STAT_FIELD(cfg, stats, jhostfails);
+ __COPY_STAT_FIELD(cfg, stats, jportfails);
+ __COPY_STAT_FIELD(cfg, stats, jmaxlen);
+ __COPY_STAT_FIELD(cfg, stats, jnomem);
+ __COPY_STAT_FIELD(cfg, stats, jreinjected);
+ __COPY_STAT_FIELD(cfg, stats, screated);
+ __COPY_STAT_FIELD(cfg, stats, sdeleted);
+ __COPY_STAT_FIELD(cfg, stats, spgcreated);
+ __COPY_STAT_FIELD(cfg, stats, spgdeleted);
+
+ stats->hostcount = cfg->ihcount;
+ stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
+ stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
+ stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
+}
+#undef __COPY_STAT_FIELD
+
+static void
+nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ ipfw_nat64lsn_cfg *uc)
+{
+
+ uc->flags = cfg->flags & NAT64LSN_FLAGSMASK;
+ uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
+ uc->agg_prefix_len = cfg->agg_prefix_len;
+ uc->agg_prefix_max = cfg->agg_prefix_max;
+
+ uc->jmaxlen = cfg->jmaxlen;
+ uc->nh_delete_delay = cfg->nh_delete_delay;
+ uc->pg_delete_delay = cfg->pg_delete_delay;
+ uc->st_syn_ttl = cfg->st_syn_ttl;
+ uc->st_close_ttl = cfg->st_close_ttl;
+ uc->st_estab_ttl = cfg->st_estab_ttl;
+ uc->st_udp_ttl = cfg->st_udp_ttl;
+ uc->st_icmp_ttl = cfg->st_icmp_ttl;
+ uc->prefix4.s_addr = htonl(cfg->prefix4);
+ uc->prefix6 = cfg->prefix6;
+ uc->plen4 = cfg->plen4;
+ uc->plen6 = cfg->plen6;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+}
+
+struct nat64_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
+ ipfw_nat64lsn_cfg *uc;
+
+ uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
+ sizeof(*uc));
+ nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
+ return (0);
+}
+
+/*
+ * Lists all nat64 lsn instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64LSN_NAME);
+ olh->objsize = sizeof(ipfw_nat64lsn_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
+ IPFW_TLV_NAT64LSN_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Change existing nat64lsn instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_nat64lsn_cfg *uc;
+ struct nat64lsn_cfg *cfg;
+ struct namedobj_instance *ni;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+ sizeof(*oh) + sizeof(*uc));
+ uc = (ipfw_nat64lsn_cfg *)(oh + 1);
+
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ ni = CHAIN_TO_SRV(ch);
+ if (sd->sopt->sopt_dir == SOPT_GET) {
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ nat64lsn_export_config(ch, cfg, uc);
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+
+ nat64lsn_default_config(uc);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EEXIST);
+ }
+
+ /*
+ * For now allow to change only following values:
+ * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
+ * tcp_est_age, udp_age, icmp_age, flags, max_ports.
+ */
+
+ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->jmaxlen = uc->jmaxlen;
+ cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->pg_delete_delay = uc->pg_delete_delay;
+ cfg->st_syn_ttl = uc->st_syn_ttl;
+ cfg->st_close_ttl = uc->st_close_ttl;
+ cfg->st_estab_ttl = uc->st_estab_ttl;
+ cfg->st_udp_ttl = uc->st_udp_ttl;
+ cfg->st_icmp_ttl = uc->st_icmp_ttl;
+ cfg->flags = uc->flags & NAT64LSN_FLAGSMASK;
+
+ IPFW_UH_WUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Get nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64lsn_stats stats;
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reset nat64lsn statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+/*
+ * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
+ * ipfw_nat64lsn_state x count, ... ] ]
+ */
+static int
+export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
+ ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
+{
+ ipfw_nat64lsn_state *ste;
+ struct nat64lsn_state *st;
+ int i, count;
+
+ NAT64_LOCK(pg->host);
+ count = 0;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_BUSY_IDX(pg, i))
+ count++;
+ }
+ DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count);
+
+ if (count == 0) {
+ stg->count = 0;
+ NAT64_UNLOCK(pg->host);
+ return (0);
+ }
+ ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
+ count * sizeof(ipfw_nat64lsn_state));
+ if (ste == NULL) {
+ NAT64_UNLOCK(pg->host);
+ return (1);
+ }
+
+ stg->alias4.s_addr = pg->aaddr;
+ stg->proto = nat64lsn_rproto_map[pg->nat_proto];
+ stg->flags = 0;
+ stg->host6 = pg->host->addr;
+ stg->count = count;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_FREE_IDX(pg, i))
+ continue;
+ st = &pg->states[i];
+ ste->daddr.s_addr = st->u.s.faddr;
+ ste->dport = st->u.s.fport;
+ ste->aport = pg->aport + i;
+ ste->sport = st->u.s.lport;
+ ste->flags = st->flags; /* XXX filter flags */
+ ste->idle = GET_AGE(st->timestamp);
+ ste++;
+ }
+ NAT64_UNLOCK(pg->host);
+
+ return (0);
+}
+
+static int
+get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+
+ if (*port < 65536 - NAT64_CHUNK_SIZE) {
+ *port += NAT64_CHUNK_SIZE;
+ return (0);
+ }
+ *port = 0;
+
+ if (*nat_proto < NAT_MAX_PROTO - 1) {
+ *nat_proto += 1;
+ return (0);
+ }
+ *nat_proto = 1;
+
+ if (*addr < cfg->pmask4) {
+ *addr += 1;
+ return (0);
+ }
+
+ /* End of space. */
+ return (1);
+}
+
+#define PACK_IDX(addr, proto, port) \
+ ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
+#define UNPACK_IDX(idx, addr, proto, port) \
+ (addr) = (uint32_t)((idx) >> 32); \
+ (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
+ (proto) = (uint8_t)(((idx) >> 8) & 0xFF)
+
+static struct nat64lsn_portgroup *
+get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+ uint64_t pre_pack, post_pack;
+
+ pg = NULL;
+ pre_pack = PACK_IDX(*addr, *nat_proto, *port);
+ for (;;) {
+ if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
+ /* End of states */
+ return (pg);
+ }
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg != NULL)
+ break;
+ }
+
+ post_pack = PACK_IDX(*addr, *nat_proto, *port);
+ if (pre_pack == post_pack)
+ DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d",
+ *addr, *nat_proto, *port);
+ return (pg);
+}
+
+static NAT64NOINLINE struct nat64lsn_portgroup *
+get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg == NULL)
+ pg = get_next_pg(cfg, addr, nat_proto, port);
+
+ return (pg);
+}
+
+/*
+ * Lists nat64lsn states.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ ipfw_nat64lsn_stg *stg;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_portgroup *pg, *pg_next;
+ uint64_t next_idx;
+ size_t sz;
+ uint32_t addr, states;
+ uint16_t port;
+ uint8_t nat_proto;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(uint64_t);
+ /* Check minimum header size */
+ if (sd->valsize < sz)
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ od = (ipfw_obj_data *)(oh + 1);
+ if (od->head.type != IPFW_TLV_OBJDATA ||
+ od->head.length != sz - sizeof(ipfw_obj_header))
+ return (EINVAL);
+
+ next_idx = *(uint64_t *)(od + 1);
+ /* Translate index to the request position to start from */
+ UNPACK_IDX(next_idx, addr, nat_proto, port);
+ if (nat_proto >= NAT_MAX_PROTO)
+ return (EINVAL);
+ if (nat_proto == 0 && addr != 0)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ /* Fill in starting point */
+ if (addr == 0) {
+ addr = cfg->prefix4;
+ nat_proto = 1;
+ port = 0;
+ }
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ IPFW_UH_RUNLOCK(ch);
+ DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u",
+ (uintmax_t)next_idx, addr, cfg->pmask4);
+ return (EINVAL);
+ }
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
+ od = (ipfw_obj_data *)(oh + 1);
+ od->head.type = IPFW_TLV_OBJDATA;
+ od->head.length = sz - sizeof(ipfw_obj_header);
+ stg = (ipfw_nat64lsn_stg *)(od + 1);
+
+ pg = get_first_pg(cfg, &addr, &nat_proto, &port);
+ if (pg == NULL) {
+ /* No states */
+ stg->next_idx = 0xFF;
+ stg->count = 0;
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+ states = 0;
+ pg_next = NULL;
+ while (pg != NULL) {
+ pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
+ if (pg_next == NULL)
+ stg->next_idx = 0xFF;
+ else
+ stg->next_idx = PACK_IDX(addr, nat_proto, port);
+
+ if (export_pg_states(cfg, pg, stg, sd) != 0) {
+ IPFW_UH_RUNLOCK(ch);
+ return (states == 0 ? ENOMEM: 0);
+ }
+ states += stg->count;
+ od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
+ sz += stg->count * sizeof(ipfw_nat64lsn_state);
+ if (pg_next != NULL) {
+ sz += sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ break;
+ stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
+ sizeof(ipfw_nat64lsn_stg));
+ }
+ pg = pg_next;
+ }
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create },
+ { IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy },
+ { IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64lsn_config },
+ { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
+ { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
+ { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats },
+ { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },
+};
+
+static int
+nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64lsn_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64LSN_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64lsn_classify,
+ .update = nat64lsn_update_arg1,
+ .find_byname = nat64lsn_findbyname,
+ .find_bykidx = nat64lsn_findbykidx,
+ .manage_sets = nat64lsn_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64lsn_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64lsn_detach_config(ch, cfg);
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+int
+nat64lsn_init(struct ip_fw_chain *ch, int first)
+{
+
+ if (first != 0)
+ nat64lsn_init_internal();
+ V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
+ if (V_nat64lsn_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64lsn_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64lsn_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64LSN_NAME);
+ V_nat64lsn_eid = 0;
+ IPFW_UH_WUNLOCK(ch);
+ if (last != 0)
+ nat64lsn_uninit_internal();
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c
new file mode 100644
index 00000000..36e6e268
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c
@@ -0,0 +1,262 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_pflog.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/ip_fw_nat64.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netpfil/pf/pf.h>
+
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+
+static void
+nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
+ uint32_t kidx)
+{
+ static uint32_t pktid = 0;
+
+ memset(plog, 0, sizeof(plog));
+ plog->length = PFLOG_REAL_HDRLEN;
+ plog->af = family;
+ plog->action = PF_NAT;
+ plog->dir = PF_IN;
+ plog->rulenr = htonl(kidx);
+ plog->subrulenr = htonl(++pktid);
+ plog->ruleset[0] = '\0';
+ strlcpy(plog->ifname, "NAT64STL", sizeof(plog->ifname));
+ ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+}
+
+static int
+nat64stl_handle_ip4(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct in6_addr saddr, daddr;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip*);
+ if (nat64_check_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip->ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_dst.s_addr) != 0)
+ return (NAT64SKIP);
+
+ daddr = TARG_VAL(chain, tablearg, nh6);
+ if (nat64_check_ip6(&daddr) != 0)
+ return (NAT64MFREE);
+ saddr = cfg->prefix6;
+ nat64_set_ip4(&saddr, ip->ip_src.s_addr);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_do_handle_ip4(m, &saddr, &daddr, 0, &cfg->stats,
+ logdata));
+}
+
+static int
+nat64stl_handle_ip6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct pfloghdr loghdr, *logdata;
+ struct ip6_hdr *ip6;
+ uint32_t aaddr;
+
+ aaddr = htonl(TARG_VAL(chain, tablearg, nh4));
+
+ /*
+ * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* Check ip6_dst matches configured prefix */
+ if (bcmp(&ip6->ip6_dst, &cfg->prefix6, cfg->plen6 / 8) != 0)
+ return (NAT64SKIP);
+
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_do_handle_ip6(m, aaddr, 0, &cfg->stats, logdata));
+}
+
+static int
+nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m)
+{
+ struct pfloghdr loghdr, *logdata;
+ nat64_stats_block *stats;
+ struct ip6_hdr *ip6i;
+ struct icmp6_hdr *icmp6;
+ uint32_t tablearg;
+ int hlen, proto;
+
+ hlen = 0;
+ stats = &cfg->stats;
+ proto = nat64_getlasthdr(m, &hlen);
+ if (proto != IPPROTO_ICMPV6) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ icmp6 = mtodo(m, hlen);
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEED_TRANSIT:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ /*
+ * Use destination address from inner IPv6 header to determine
+ * IPv4 mapped address.
+ */
+ ip6i = mtodo(m, hlen);
+ if (ipfw_lookup_table_extended(chain, cfg->map64,
+ sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) {
+ m_freem(m);
+ return (NAT64RETURN);
+ }
+ if (cfg->flags & NAT64_LOG) {
+ logdata = &loghdr;
+ nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx);
+ } else
+ logdata = NULL;
+ return (nat64_handle_icmp6(m, 0,
+ htonl(TARG_VAL(chain, tablearg, nh4)), 0, stats, logdata));
+}
+
+int
+ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64stl_cfg *cfg;
+ uint32_t tablearg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(chain);
+
+ *done = 0; /* try next rule if not matched */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64stl_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(chain, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ ret = ipfw_lookup_table(chain, cfg->map46,
+ htonl(args->f_id.dst_ip), &tablearg);
+ break;
+ case 6:
+ ret = ipfw_lookup_table_extended(chain, cfg->map64,
+ sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg);
+ break;
+ default:
+ return (0);
+ }
+ if (ret == 0) {
+ /*
+ * In case when packet is ICMPv6 message from an intermediate
+ * router, the source address of message will not match the
+ * addresses from our map64 table.
+ */
+ if (args->f_id.proto != IPPROTO_ICMPV6)
+ return (0);
+
+ ret = nat64stl_handle_icmp6(chain, cfg, args->m);
+ } else {
+ if (args->f_id.addr_type == 4)
+ ret = nat64stl_handle_ip4(chain, cfg, args->m,
+ tablearg);
+ else
+ ret = nat64stl_handle_ip6(chain, cfg, args->m,
+ tablearg);
+ }
+ if (ret == NAT64SKIP)
+ return (0);
+
+ *done = 1; /* terminate the search */
+ if (ret == NAT64MFREE)
+ m_freem(args->m);
+ args->m = NULL;
+ return (IP_FW_DENY);
+}
+
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h
new file mode 100644
index 00000000..42ec20ea
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NAT64STL_H_
+#define _IP_FW_NAT64STL_H_
+
+struct nat64stl_cfg {
+ struct named_object no;
+
+ uint16_t map64; /* table with 6to4 mapping */
+ uint16_t map46; /* table with 4to6 mapping */
+
+ struct in6_addr prefix6;/* IPv6 prefix */
+ uint8_t plen6; /* prefix length */
+ uint8_t flags; /* flags for internal use */
+#define NAT64STL_KIDX 0x0100
+#define NAT64STL_46T 0x0200
+#define NAT64STL_64T 0x0400
+#define NAT64STL_FLAGSMASK (NAT64_LOG) /* flags to pass to userland */
+ char name[64];
+ nat64_stats_block stats;
+};
+
+VNET_DECLARE(uint16_t, nat64stl_eid);
+#define V_nat64stl_eid VNET(nat64stl_eid)
+#define IPFW_TLV_NAT64STL_NAME IPFW_TLV_EACTION_NAME(V_nat64stl_eid)
+
+int ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+#endif
+
diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c b/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c
new file mode 100644
index 00000000..6ee04867
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c
@@ -0,0 +1,623 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64stl_eid) = 0;
+
+static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set);
+static void nat64stl_free_config(struct nat64stl_cfg *cfg);
+static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni,
+ const char *name, uint8_t set);
+
+static struct nat64stl_cfg *
+nat64stl_alloc_config(const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64STL_NAME;
+ cfg->no.set = set;
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ return (cfg);
+}
+
+static void
+nat64stl_free_config(struct nat64stl_cfg *cfg)
+{
+
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+ free(cfg, M_IPFW);
+}
+
+static void
+nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *uc)
+{
+ struct named_object *no;
+
+ uc->prefix6 = cfg->prefix6;
+ uc->plen6 = cfg->plen6;
+ uc->flags = cfg->flags & NAT64STL_FLAGSMASK;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64);
+ ipfw_export_obj_ntlv(no, &uc->ntlv6);
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46);
+ ipfw_export_obj_ntlv(no, &uc->ntlv4);
+}
+
+struct nat64stl_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg;
+ ipfw_nat64stl_cfg *uc;
+
+ uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
+ nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc);
+ return (0);
+}
+
+static struct nat64stl_cfg *
+nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64STL_NAME, name);
+
+ return (cfg);
+}
+
+
+static int
+nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *i)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0)
+ return (ENOSPC);
+ cfg->flags |= NAT64STL_KIDX;
+
+ if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64STL_46T;
+
+ if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64STL_64T;
+
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ return (0);
+}
+
+/*
+ * Creates new nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64stl_cfg *uc;
+ struct namedobj_instance *ni;
+ struct nat64stl_cfg *cfg;
+ int error;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64stl_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+ if (!IN6_IS_ADDR_WKPFX(&uc->prefix6))
+ return (EINVAL);
+ if (uc->plen6 != 96 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ /* XXX: check types of tables */
+
+ ni = CHAIN_TO_SRV(ch);
+ error = 0;
+
+ IPFW_UH_RLOCK(ch);
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64stl_alloc_config(uc->name, uc->set);
+ cfg->prefix6 = uc->prefix6;
+ cfg->plen6 = uc->plen6;
+ cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (EEXIST);
+ }
+ error = nat64stl_create_internal(ch, cfg, uc);
+ if (error == 0) {
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+ }
+
+ if (cfg->flags & NAT64STL_KIDX)
+ ipfw_objhash_free_idx(ni, cfg->no.kidx);
+ if (cfg->flags & NAT64STL_46T)
+ ipfw_unref_table(ch, cfg->map46);
+ if (cfg->flags & NAT64STL_64T)
+ ipfw_unref_table(ch, cfg->map64);
+
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (error);
+}
+
+/*
+ * Change existing nat64stl instance configuration.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ * Reply: [ ipfw_obj_header ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_nat64stl_cfg *uc;
+ struct nat64stl_cfg *cfg;
+ struct namedobj_instance *ni;
+
+ if (sd->valsize != sizeof(*oh) + sizeof(*uc))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
+ sizeof(*oh) + sizeof(*uc));
+ uc = (ipfw_nat64stl_cfg *)(oh + 1);
+
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ ni = CHAIN_TO_SRV(ch);
+ if (sd->sopt->sopt_dir == SOPT_GET) {
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ nat64stl_export_config(ch, cfg, uc);
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EEXIST);
+ }
+
+ /*
+ * For now allow to change only following values:
+ * flags.
+ */
+
+ cfg->flags = uc->flags & NAT64STL_FLAGSMASK;
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static void
+nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+ ipfw_unref_table(ch, cfg->map46);
+ ipfw_unref_table(ch, cfg->map64);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nat64stl_cfg *cfg;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64stl_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+/*
+ * Lists all nat64stl instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64stl_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64STL_NAME);
+ olh->objsize = sizeof(ipfw_nat64stl_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
+ &da, IPFW_TLV_NAT64STL_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ struct ipfw_nat64stl_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+}
+
+/*
+ * Get nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64stl_stats stats;
+ struct nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reset nat64stl statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+
+ { IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create },
+ { IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy },
+ { IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config },
+ { IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list },
+ { IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats },
+ { IP_FW_NAT64STL_RESET_STATS,0, HDIR_SET, nat64stl_reset_stats },
+};
+
+static int
+nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64stl_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64STL_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64stl_classify,
+ .update = nat64stl_update_arg1,
+ .find_byname = nat64stl_findbyname,
+ .find_bykidx = nat64stl_findbykidx,
+ .manage_sets = nat64stl_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64stl_detach_config(ch, cfg);
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+int
+nat64stl_init(struct ip_fw_chain *ch, int first)
+{
+
+ V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl");
+ if (V_nat64stl_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64stl_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64stl_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64STL_NAME);
+ V_nat64stl_eid = 0;
+ IPFW_UH_WUNLOCK(ch);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c b/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c
new file mode 100644
index 00000000..92a2c7a3
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c
@@ -0,0 +1,101 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nptv6/nptv6.h>
+
+static int
+vnet_ipfw_nptv6_init(const void *arg __unused)
+{
+
+ return (nptv6_init(&V_layer3_chain, IS_DEFAULT_VNET(curvnet)));
+}
+
+static int
+vnet_ipfw_nptv6_uninit(const void *arg __unused)
+{
+
+ nptv6_uninit(&V_layer3_chain, IS_DEFAULT_VNET(curvnet));
+ return (0);
+}
+
+static int
+ipfw_nptv6_modevent(module_t mod, int type, void *unused)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t ipfw_nptv6_mod = {
+ "ipfw_nptv6",
+ ipfw_nptv6_modevent,
+ 0
+};
+
+/* Define startup order. */
+#define IPFW_NPTV6_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_NPTV6_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
+#define IPFW_NPTV6_MODULE_ORDER (IPFW_NPTV6_MODEVENT_ORDER + 1)
+#define IPFW_NPTV6_VNET_ORDER (IPFW_NPTV6_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nptv6, ipfw_nptv6_mod, IPFW_NPTV6_SI_SUB_FIREWALL,
+ IPFW_NPTV6_MODULE_ORDER);
+MODULE_DEPEND(ipfw_nptv6, ipfw, 3, 3, 3);
+MODULE_VERSION(ipfw_nptv6, 1);
+
+VNET_SYSINIT(vnet_ipfw_nptv6_init, IPFW_NPTV6_SI_SUB_FIREWALL,
+ IPFW_NPTV6_VNET_ORDER, vnet_ipfw_nptv6_init, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nptv6_uninit, IPFW_NPTV6_SI_SUB_FIREWALL,
+ IPFW_NPTV6_VNET_ORDER, vnet_ipfw_nptv6_uninit, NULL);
diff --git a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c
new file mode 100644
index 00000000..4256d028
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c
@@ -0,0 +1,894 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/netisr.h>
+#include <net/pfil.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nptv6/nptv6.h>
+
+static VNET_DEFINE(uint16_t, nptv6_eid) = 0;
+#define V_nptv6_eid VNET(nptv6_eid)
+#define IPFW_TLV_NPTV6_NAME IPFW_TLV_EACTION_NAME(V_nptv6_eid)
+
+static struct nptv6_cfg *nptv6_alloc_config(const char *name, uint8_t set);
+static void nptv6_free_config(struct nptv6_cfg *cfg);
+static struct nptv6_cfg *nptv6_find(struct namedobj_instance *ni,
+ const char *name, uint8_t set);
+static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp,
+ int offset);
+static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp,
+ int offset);
+
+#define NPTV6_LOOKUP(chain, cmd) \
+ (struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+
+#ifndef IN6_MASK_ADDR
+#define IN6_MASK_ADDR(a, m) do { \
+ (a)->s6_addr32[0] &= (m)->s6_addr32[0]; \
+ (a)->s6_addr32[1] &= (m)->s6_addr32[1]; \
+ (a)->s6_addr32[2] &= (m)->s6_addr32[2]; \
+ (a)->s6_addr32[3] &= (m)->s6_addr32[3]; \
+} while (0)
+#endif
+#ifndef IN6_ARE_MASKED_ADDR_EQUAL
+#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \
+ (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
+ (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
+ (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
+ (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
+#endif
+
+#if 0
+#define NPTV6_DEBUG(fmt, ...) do { \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define NPTV6_IPDEBUG(fmt, ...) do { \
+ char _s[INET6_ADDRSTRLEN], _d[INET6_ADDRSTRLEN]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#else
+#define NPTV6_DEBUG(fmt, ...)
+#define NPTV6_IPDEBUG(fmt, ...)
+#endif
+
+static int
+nptv6_getlasthdr(struct nptv6_cfg *cfg, struct mbuf *m, int *offset)
+{
+ struct ip6_hdr *ip6;
+ struct ip6_hbh *hbh;
+ int proto, hlen;
+
+ hlen = (offset == NULL) ? 0: *offset;
+ if (m->m_len < hlen)
+ return (-1);
+ ip6 = mtodo(m, hlen);
+ hlen += sizeof(*ip6);
+ proto = ip6->ip6_nxt;
+ while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_DSTOPTS) {
+ hbh = mtodo(m, hlen);
+ if (m->m_len < hlen)
+ return (-1);
+ proto = hbh->ip6h_nxt;
+ hlen += hbh->ip6h_len << 3;
+ }
+ if (offset != NULL)
+ *offset = hlen;
+ return (proto);
+}
+
+static int
+nptv6_translate_icmpv6(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
+{
+ struct icmp6_hdr *icmp6;
+ struct ip6_hdr *ip6;
+ struct mbuf *m;
+
+ m = *mp;
+ if (offset > m->m_len)
+ return (-1);
+ icmp6 = mtodo(m, offset);
+ NPTV6_DEBUG("ICMPv6 type %d", icmp6->icmp6_type);
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEEDED:
+ case ICMP6_PARAM_PROB:
+ break;
+ case ICMP6_ECHO_REQUEST:
+ case ICMP6_ECHO_REPLY:
+ /* nothing to translate */
+ return (0);
+ default:
+ /*
+ * XXX: We can add some checks to not translate NDP and MLD
+ * messages. Currently user must explicitly allow these message
+ * types, otherwise packets will be dropped.
+ */
+ return (-1);
+ }
+ offset += sizeof(*icmp6);
+ if (offset + sizeof(*ip6) > m->m_pkthdr.len)
+ return (-1);
+ if (offset + sizeof(*ip6) > m->m_len)
+ *mp = m = m_pullup(m, offset + sizeof(*ip6));
+ if (m == NULL)
+ return (-1);
+ ip6 = mtodo(m, offset);
+ NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
+ inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+ inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+ ip6->ip6_nxt);
+ if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
+ &cfg->external, &cfg->mask))
+ return (nptv6_rewrite_external(cfg, mp, offset));
+ else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
+ &cfg->internal, &cfg->mask))
+ return (nptv6_rewrite_internal(cfg, mp, offset));
+ /*
+ * Addresses in the inner IPv6 header doesn't matched to
+ * our prefixes.
+ */
+ return (-1);
+}
+
+static int
+nptv6_search_index(struct nptv6_cfg *cfg, struct in6_addr *a)
+{
+ int idx;
+
+ if (cfg->flags & NPTV6_48PLEN)
+ return (3);
+
+ /* Search suitable word index for adjustment */
+ for (idx = 4; idx < 8; idx++)
+ if (a->s6_addr16[idx] != 0xffff)
+ break;
+ /*
+ * RFC 6296 p3.7: If an NPTv6 Translator discovers a datagram with
+ * an IID of all-zeros while performing address mapping, that
+ * datagram MUST be dropped, and an ICMPv6 Parameter Problem error
+ * SHOULD be generated.
+ */
+ if (idx == 8 ||
+ (a->s6_addr32[2] == 0 && a->s6_addr32[3] == 0))
+ return (-1);
+ return (idx);
+}
+
+static void
+nptv6_copy_addr(struct in6_addr *src, struct in6_addr *dst,
+ struct in6_addr *mask)
+{
+ int i;
+
+ for (i = 0; i < 8 && mask->s6_addr8[i] != 0; i++) {
+ dst->s6_addr8[i] &= ~mask->s6_addr8[i];
+ dst->s6_addr8[i] |= src->s6_addr8[i] & mask->s6_addr8[i];
+ }
+}
+
+static int
+nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
+{
+ struct in6_addr *addr;
+ struct ip6_hdr *ip6;
+ int idx, proto;
+ uint16_t adj;
+
+ ip6 = mtodo(*mp, offset);
+ NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
+ inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+ inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+ ip6->ip6_nxt);
+ if (offset == 0)
+ addr = &ip6->ip6_src;
+ else {
+ /*
+ * When we rewriting inner IPv6 header, we need to rewrite
+ * destination address back to external prefix. The datagram in
+ * the ICMPv6 payload should looks like it was send from
+ * external prefix.
+ */
+ addr = &ip6->ip6_dst;
+ }
+ idx = nptv6_search_index(cfg, addr);
+ if (idx < 0) {
+ /*
+ * Do not send ICMPv6 error when offset isn't zero.
+ * This means we are rewriting inner IPv6 header in the
+ * ICMPv6 error message.
+ */
+ if (offset == 0) {
+ icmp6_error2(*mp, ICMP6_DST_UNREACH,
+ ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
+ *mp = NULL;
+ }
+ return (IP_FW_DENY);
+ }
+ adj = addr->s6_addr16[idx];
+ nptv6_copy_addr(&cfg->external, addr, &cfg->mask);
+ adj = cksum_add(adj, cfg->adjustment);
+ if (adj == 0xffff)
+ adj = 0;
+ addr->s6_addr16[idx] = adj;
+ if (offset == 0) {
+ /*
+ * We may need to translate addresses in the inner IPv6
+ * header for ICMPv6 error messages.
+ */
+ proto = nptv6_getlasthdr(cfg, *mp, &offset);
+ if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
+ nptv6_translate_icmpv6(cfg, mp, offset) != 0))
+ return (IP_FW_DENY);
+ NPTV6STAT_INC(cfg, in2ex);
+ }
+ return (0);
+}
+
+static int
+nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
+{
+ struct in6_addr *addr;
+ struct ip6_hdr *ip6;
+ int idx, proto;
+ uint16_t adj;
+
+ ip6 = mtodo(*mp, offset);
+ NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
+ inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+ inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+ ip6->ip6_nxt);
+ if (offset == 0)
+ addr = &ip6->ip6_dst;
+ else {
+ /*
+ * When we rewriting inner IPv6 header, we need to rewrite
+ * source address back to internal prefix. The datagram in
+ * the ICMPv6 payload should looks like it was send from
+ * internal prefix.
+ */
+ addr = &ip6->ip6_src;
+ }
+ idx = nptv6_search_index(cfg, addr);
+ if (idx < 0) {
+ /*
+ * Do not send ICMPv6 error when offset isn't zero.
+ * This means we are rewriting inner IPv6 header in the
+ * ICMPv6 error message.
+ */
+ if (offset == 0) {
+ icmp6_error2(*mp, ICMP6_DST_UNREACH,
+ ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
+ *mp = NULL;
+ }
+ return (IP_FW_DENY);
+ }
+ adj = addr->s6_addr16[idx];
+ nptv6_copy_addr(&cfg->internal, addr, &cfg->mask);
+ adj = cksum_add(adj, ~cfg->adjustment);
+ if (adj == 0xffff)
+ adj = 0;
+ addr->s6_addr16[idx] = adj;
+ if (offset == 0) {
+ /*
+ * We may need to translate addresses in the inner IPv6
+ * header for ICMPv6 error messages.
+ */
+ proto = nptv6_getlasthdr(cfg, *mp, &offset);
+ if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
+ nptv6_translate_icmpv6(cfg, mp, offset) != 0))
+ return (IP_FW_DENY);
+ NPTV6STAT_INC(cfg, ex2in);
+ }
+ return (0);
+}
+
+/*
+ * ipfw external action handler.
+ */
+static int
+ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ struct ip6_hdr *ip6;
+ struct nptv6_cfg *cfg;
+ ipfw_insn *icmd;
+ int ret;
+
+ *done = 0; /* try next rule if not matched */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nptv6_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL)
+ return (0);
+ /*
+ * We need act as router, so when forwarding is disabled -
+ * do nothing.
+ */
+ if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6)
+ return (0);
+ /*
+ * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ret = IP_FW_DENY;
+ ip6 = mtod(args->m, struct ip6_hdr *);
+ NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d",
+ cmd->arg1, icmd->arg1,
+ inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
+ inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
+ ip6->ip6_nxt);
+ if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
+ &cfg->internal, &cfg->mask)) {
+ /*
+ * XXX: Do not translate packets when both src and dst
+ * are from internal prefix.
+ */
+ if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
+ &cfg->internal, &cfg->mask))
+ return (0);
+ ret = nptv6_rewrite_internal(cfg, &args->m, 0);
+ } else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
+ &cfg->external, &cfg->mask))
+ ret = nptv6_rewrite_external(cfg, &args->m, 0);
+ else
+ return (0);
+ /*
+ * If address wasn't rewrited - free mbuf.
+ */
+ if (ret != 0) {
+ if (args->m != NULL) {
+ m_freem(args->m);
+ args->m = NULL; /* mark mbuf as consumed */
+ }
+ NPTV6STAT_INC(cfg, dropped);
+ }
+ /* Terminate the search if one_pass is set */
+ *done = V_fw_one_pass;
+ /* Update args->f_id when one_pass is off */
+ if (*done == 0 && ret == 0) {
+ ip6 = mtod(args->m, struct ip6_hdr *);
+ args->f_id.src_ip6 = ip6->ip6_src;
+ args->f_id.dst_ip6 = ip6->ip6_dst;
+ }
+ return (ret);
+}
+
+static struct nptv6_cfg *
+nptv6_alloc_config(const char *name, uint8_t set)
+{
+ struct nptv6_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nptv6_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ COUNTER_ARRAY_ALLOC(cfg->stats, NPTV6STATS, M_WAITOK);
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NPTV6_NAME;
+ cfg->no.set = set;
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ return (cfg);
+}
+
+static void
+nptv6_free_config(struct nptv6_cfg *cfg)
+{
+
+ COUNTER_ARRAY_FREE(cfg->stats, NPTV6STATS);
+ free(cfg, M_IPFW);
+}
+
+static void
+nptv6_export_config(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
+ ipfw_nptv6_cfg *uc)
+{
+
+ uc->internal = cfg->internal;
+ uc->external = cfg->external;
+ uc->plen = cfg->plen;
+ uc->flags = cfg->flags & NPTV6_FLAGSMASK;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+}
+
+struct nptv6_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nptv6_dump_arg *da = (struct nptv6_dump_arg *)arg;
+ ipfw_nptv6_cfg *uc;
+
+ uc = (ipfw_nptv6_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
+ nptv6_export_config(da->ch, (struct nptv6_cfg *)no, uc);
+ return (0);
+}
+
+static struct nptv6_cfg *
+nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nptv6_cfg *cfg;
+
+ cfg = (struct nptv6_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NPTV6_NAME, name);
+
+ return (cfg);
+}
+
+static void
+nptv6_calculate_adjustment(struct nptv6_cfg *cfg)
+{
+ uint16_t i, e;
+ uint16_t *p;
+
+ /* Calculate checksum of internal prefix */
+ for (i = 0, p = (uint16_t *)&cfg->internal;
+ p < (uint16_t *)(&cfg->internal + 1); p++)
+ i = cksum_add(i, *p);
+
+ /* Calculate checksum of external prefix */
+ for (e = 0, p = (uint16_t *)&cfg->external;
+ p < (uint16_t *)(&cfg->external + 1); p++)
+ e = cksum_add(e, *p);
+
+ /* Adjustment value for Int->Ext direction */
+ cfg->adjustment = cksum_add(~e, i);
+}
+
+/*
+ * Creates new NPTv6 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nptv6_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct in6_addr mask;
+ ipfw_obj_lheader *olh;
+ ipfw_nptv6_cfg *uc;
+ struct namedobj_instance *ni;
+ struct nptv6_cfg *cfg;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nptv6_cfg *)(olh + 1);
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+ if (uc->plen < 8 || uc->plen > 64 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+ if (IN6_IS_ADDR_MULTICAST(&uc->internal) ||
+ IN6_IS_ADDR_MULTICAST(&uc->external) ||
+ IN6_IS_ADDR_UNSPECIFIED(&uc->internal) ||
+ IN6_IS_ADDR_UNSPECIFIED(&uc->external) ||
+ IN6_IS_ADDR_LINKLOCAL(&uc->internal) ||
+ IN6_IS_ADDR_LINKLOCAL(&uc->external))
+ return (EINVAL);
+ in6_prefixlen2mask(&mask, uc->plen);
+ if (IN6_ARE_MASKED_ADDR_EQUAL(&uc->internal, &uc->external, &mask))
+ return (EINVAL);
+
+ ni = CHAIN_TO_SRV(ch);
+ IPFW_UH_RLOCK(ch);
+ if (nptv6_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nptv6_alloc_config(uc->name, uc->set);
+ cfg->plen = uc->plen;
+ if (cfg->plen <= 48)
+ cfg->flags |= NPTV6_48PLEN;
+ cfg->internal = uc->internal;
+ cfg->external = uc->external;
+ cfg->mask = mask;
+ IN6_MASK_ADDR(&cfg->internal, &mask);
+ IN6_MASK_ADDR(&cfg->external, &mask);
+ nptv6_calculate_adjustment(cfg);
+
+ IPFW_UH_WLOCK(ch);
+ if (ipfw_objhash_alloc_idx(ni, &cfg->no.kidx) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ nptv6_free_config(cfg);
+ return (ENOSPC);
+ }
+ ipfw_objhash_add(ni, &cfg->no);
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+/*
+ * Destroys NPTv6 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nptv6_cfg *cfg;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+ IPFW_UH_WUNLOCK(ch);
+
+ nptv6_free_config(cfg);
+ return (0);
+}
+
+/*
+ * Get or change nptv6 instance config.
+ * Request: [ ipfw_obj_header [ ipfw_nptv6_cfg ] ]
+ */
+static int
+nptv6_config(struct ip_fw_chain *chain, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Lists all NPTv6 instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nptv6_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nptv6_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NPTV6_NAME);
+ olh->objsize = sizeof(ipfw_nptv6_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
+ &da, IPFW_TLV_NPTV6_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NPTV6STAT_FETCH(_cfg, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
+ struct ipfw_nptv6_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, in2ex);
+ __COPY_STAT_FIELD(cfg, stats, ex2in);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+}
+
+/*
+ * Get NPTv6 statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nptv6_stats stats;
+ struct nptv6_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = 1;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reset NPTv6 statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct nptv6_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
+ oh->ntlv.set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ COUNTER_ARRAY_ZERO(cfg->stats, NPTV6STATS);
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_NPTV6_CREATE, 0, HDIR_SET, nptv6_create },
+ { IP_FW_NPTV6_DESTROY,0, HDIR_SET, nptv6_destroy },
+ { IP_FW_NPTV6_CONFIG, 0, HDIR_BOTH, nptv6_config },
+ { IP_FW_NPTV6_LIST, 0, HDIR_GET, nptv6_list },
+ { IP_FW_NPTV6_STATS, 0, HDIR_GET, nptv6_stats },
+ { IP_FW_NPTV6_RESET_STATS,0, HDIR_SET, nptv6_reset_stats },
+};
+
+static int
+nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d",
+ cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1);
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nptv6_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+ NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
+}
+
+static int
+nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NPTV6_NAME, pno);
+ NPTV6_DEBUG("uidx %u, type %u, err %d", ti->uidx, ti->type, err);
+ return (err);
+}
+
+static struct named_object *
+nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NPT with index %d not found", idx));
+
+ NPTV6_DEBUG("kidx %u -> %s", idx, no->name);
+ return (no);
+}
+
+static int
+nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nptv6_classify,
+ .update = nptv6_update_arg1,
+ .find_byname = nptv6_findbyname,
+ .find_bykidx = nptv6_findbykidx,
+ .manage_sets = nptv6_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nptv6_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ ipfw_objhash_del(ni, &cfg->no);
+ ipfw_objhash_free_idx(ni, cfg->no.kidx);
+ nptv6_free_config(cfg);
+ return (0);
+}
+
+int
+nptv6_init(struct ip_fw_chain *ch, int first)
+{
+
+ V_nptv6_eid = ipfw_add_eaction(ch, ipfw_nptv6, "nptv6");
+ if (V_nptv6_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nptv6_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nptv6_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ IPFW_UH_WLOCK(ch);
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NPTV6_NAME);
+ V_nptv6_eid = 0;
+ IPFW_UH_WUNLOCK(ch);
+}
+
diff --git a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h
new file mode 100644
index 00000000..95b04bfe
--- /dev/null
+++ b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_NPTV6_H_
+#define _IP_FW_NPTV6_H_
+
+#include <netinet6/ip_fw_nptv6.h>
+
+#ifdef _KERNEL
+#define NPTV6STATS (sizeof(struct ipfw_nptv6_stats) / sizeof(uint64_t))
+#define NPTV6STAT_ADD(c, f, v) \
+ counter_u64_add((c)->stats[ \
+ offsetof(struct ipfw_nptv6_stats, f) / sizeof(uint64_t)], (v))
+#define NPTV6STAT_INC(c, f) NPTV6STAT_ADD(c, f, 1)
+#define NPTV6STAT_FETCH(c, f) \
+ counter_u64_fetch((c)->stats[ \
+ offsetof(struct ipfw_nptv6_stats, f) / sizeof(uint64_t)])
+
+struct nptv6_cfg {
+ struct named_object no;
+
+ struct in6_addr internal; /* Internal IPv6 prefix */
+ struct in6_addr external; /* External IPv6 prefix */
+ struct in6_addr mask; /* IPv6 prefix mask */
+ uint16_t adjustment; /* Checksum adjustment value */
+ uint8_t plen; /* Prefix length */
+ uint8_t flags; /* Flags for internal use */
+#define NPTV6_48PLEN 0x0001
+ char name[64]; /* Instance name */
+ counter_u64_t stats[NPTV6STATS]; /* Statistics counters */
+};
+#define NPTV6_FLAGSMASK 0
+
+int nptv6_init(struct ip_fw_chain *ch, int first);
+void nptv6_uninit(struct ip_fw_chain *ch, int last);
+#endif /* _KERNEL */
+
+#endif /* _IP_FW_NPTV6_H_ */
+
diff --git a/freebsd/sys/netpfil/pf/if_pflog.c b/freebsd/sys/netpfil/pf/if_pflog.c
new file mode 100644
index 00000000..3a364abc
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/if_pflog.c
@@ -0,0 +1,320 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr) and
+ * Niels Provos (provos@physnet.uni-hamburg.de).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis
+ * and Niels Provos.
+ * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos.
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ *
+ * $OpenBSD: if_pflog.c,v 1.26 2007/10/18 21:58:18 mpf Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_bpf.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <net/bpf.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_pflog.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#if defined(INET) || defined(INET6)
+#include <netinet/in.h>
+#endif
+#ifdef INET
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#endif /* INET6 */
+
+#ifdef INET
+#include <machine/in_cksum.h>
+#endif /* INET */
+
+#define PFLOGMTU (32768 + MHLEN + MLEN)
+
+#ifdef PFLOGDEBUG
+#define DPRINTF(x) do { if (pflogdebug) printf x ; } while (0)
+#else
+#define DPRINTF(x)
+#endif
+
+static int pflogoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+static void pflogattach(int);
+static int pflogioctl(struct ifnet *, u_long, caddr_t);
+static void pflogstart(struct ifnet *);
+static int pflog_clone_create(struct if_clone *, int, caddr_t);
+static void pflog_clone_destroy(struct ifnet *);
+
+static const char pflogname[] = "pflog";
+
+static VNET_DEFINE(struct if_clone *, pflog_cloner);
+#define V_pflog_cloner VNET(pflog_cloner)
+
+VNET_DEFINE(struct ifnet *, pflogifs[PFLOGIFS_MAX]); /* for fast access */
+#define V_pflogifs VNET(pflogifs)
+
+static void
+pflogattach(int npflog __unused)
+{
+ int i;
+ for (i = 0; i < PFLOGIFS_MAX; i++)
+ V_pflogifs[i] = NULL;
+ V_pflog_cloner = if_clone_simple(pflogname, pflog_clone_create,
+ pflog_clone_destroy, 1);
+}
+
+static int
+pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+{
+ struct ifnet *ifp;
+
+ if (unit >= PFLOGIFS_MAX)
+ return (EINVAL);
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL) {
+ return (ENOSPC);
+ }
+ if_initname(ifp, pflogname, unit);
+ ifp->if_mtu = PFLOGMTU;
+ ifp->if_ioctl = pflogioctl;
+ ifp->if_output = pflogoutput;
+ ifp->if_start = pflogstart;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_hdrlen = PFLOG_HDRLEN;
+ if_attach(ifp);
+
+ bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN);
+
+ V_pflogifs[unit] = ifp;
+
+ return (0);
+}
+
+static void
+pflog_clone_destroy(struct ifnet *ifp)
+{
+ int i;
+
+ for (i = 0; i < PFLOGIFS_MAX; i++)
+ if (V_pflogifs[i] == ifp)
+ V_pflogifs[i] = NULL;
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+}
+
+/*
+ * Start output on the pflog interface.
+ */
+static void
+pflogstart(struct ifnet *ifp)
+{
+ struct mbuf *m;
+
+ for (;;) {
+ IF_LOCK(&ifp->if_snd);
+ _IF_DEQUEUE(&ifp->if_snd, m);
+ IF_UNLOCK(&ifp->if_snd);
+
+ if (m == NULL)
+ return;
+ else
+ m_freem(m);
+ }
+}
+
+static int
+pflogoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *rt)
+{
+ m_freem(m);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ if (ifp->if_flags & IFF_UP)
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ else
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ break;
+ default:
+ return (ENOTTY);
+ }
+
+ return (0);
+}
+
+static int
+pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
+ u_int8_t reason, struct pf_rule *rm, struct pf_rule *am,
+ struct pf_ruleset *ruleset, struct pf_pdesc *pd, int lookupsafe)
+{
+ struct ifnet *ifn;
+ struct pfloghdr hdr;
+
+ if (kif == NULL || m == NULL || rm == NULL || pd == NULL)
+ return ( 1);
+
+ if ((ifn = V_pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
+ return (0);
+
+ bzero(&hdr, sizeof(hdr));
+ hdr.length = PFLOG_REAL_HDRLEN;
+ hdr.af = af;
+ hdr.action = rm->action;
+ hdr.reason = reason;
+ memcpy(hdr.ifname, kif->pfik_name, sizeof(hdr.ifname));
+
+ if (am == NULL) {
+ hdr.rulenr = htonl(rm->nr);
+ hdr.subrulenr = 1;
+ } else {
+ hdr.rulenr = htonl(am->nr);
+ hdr.subrulenr = htonl(rm->nr);
+ if (ruleset != NULL && ruleset->anchor != NULL)
+ strlcpy(hdr.ruleset, ruleset->anchor->name,
+ sizeof(hdr.ruleset));
+ }
+ /*
+ * XXXGL: we avoid pf_socket_lookup() when we are holding
+ * state lock, since this leads to unsafe LOR.
+ * These conditions are very very rare, however.
+ */
+ if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe)
+ pd->lookup.done = pf_socket_lookup(dir, pd, m);
+ if (pd->lookup.done > 0)
+ hdr.uid = pd->lookup.uid;
+ else
+ hdr.uid = UID_MAX;
+ hdr.pid = NO_PID;
+ hdr.rule_uid = rm->cuid;
+ hdr.rule_pid = rm->cpid;
+ hdr.dir = dir;
+
+#ifdef INET
+ if (af == AF_INET && dir == PF_OUT) {
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+ ip->ip_sum = 0;
+ ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
+ }
+#endif /* INET */
+
+ if_inc_counter(ifn, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifn, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+ BPF_MTAP2(ifn, &hdr, PFLOG_HDRLEN, m);
+
+ return (0);
+}
+
+static void
+vnet_pflog_init(const void *unused __unused)
+{
+
+ pflogattach(1);
+}
+VNET_SYSINIT(vnet_pflog_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
+ vnet_pflog_init, NULL);
+
+static void
+vnet_pflog_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_pflog_cloner);
+}
+/*
+ * Detach after pf is gone; otherwise we might touch pflog memory
+ * from within pf after freeing pflog.
+ */
+VNET_SYSUNINIT(vnet_pflog_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
+ vnet_pflog_uninit, NULL);
+
+static int
+pflog_modevent(module_t mod, int type, void *data)
+{
+ int error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ PF_RULES_WLOCK();
+ pflog_packet_ptr = pflog_packet;
+ PF_RULES_WUNLOCK();
+ break;
+ case MOD_UNLOAD:
+ PF_RULES_WLOCK();
+ pflog_packet_ptr = NULL;
+ PF_RULES_WUNLOCK();
+ break;
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return error;
+}
+
+static moduledata_t pflog_mod = { pflogname, pflog_modevent, 0 };
+
+#define PFLOG_MODVER 1
+
+/* Do not run before pf is initialized as we depend on its locks. */
+DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
+MODULE_VERSION(pflog, PFLOG_MODVER);
+MODULE_DEPEND(pflog, pf, PF_MODVER, PF_MODVER, PF_MODVER);
diff --git a/freebsd/sys/netpfil/pf/if_pfsync.c b/freebsd/sys/netpfil/pf/if_pfsync.c
new file mode 100644
index 00000000..d6a0dfc0
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/if_pfsync.c
@@ -0,0 +1,2421 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002 Michael Shalayeff
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
+ *
+ * Revisions picked from OpenBSD after revision 1.110 import:
+ * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
+ * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
+ * 1.120, 1.175 - use monotonic time_uptime
+ * 1.122 - reduce number of updates for non-TCP sessions
+ * 1.125, 1.127 - rewrite merge or stale processing
+ * 1.128 - cleanups
+ * 1.146 - bzero() mbuf before sparsely filling it with data
+ * 1.170 - SIOCSIFMTU checks
+ * 1.126, 1.142 - deferred packets processing
+ * 1.173 - correct expire time processing
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+
+#include <net/bpf.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_carp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+
+#define PFSYNC_MINPKT ( \
+ sizeof(struct ip) + \
+ sizeof(struct pfsync_header) + \
+ sizeof(struct pfsync_subheader) )
+
+struct pfsync_pkt {
+ struct ip *ip;
+ struct in_addr src;
+ u_int8_t flags;
+};
+
+static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
+ struct pfsync_state_peer *);
+static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
+static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
+
+static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
+ pfsync_in_clr, /* PFSYNC_ACT_CLR */
+ pfsync_in_ins, /* PFSYNC_ACT_INS */
+ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */
+ pfsync_in_upd, /* PFSYNC_ACT_UPD */
+ pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */
+ pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */
+ pfsync_in_del, /* PFSYNC_ACT_DEL */
+ pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */
+ pfsync_in_error, /* PFSYNC_ACT_INS_F */
+ pfsync_in_error, /* PFSYNC_ACT_DEL_F */
+ pfsync_in_bus, /* PFSYNC_ACT_BUS */
+ pfsync_in_tdb, /* PFSYNC_ACT_TDB */
+ pfsync_in_eof /* PFSYNC_ACT_EOF */
+};
+
+struct pfsync_q {
+ void (*write)(struct pf_state *, void *);
+ size_t len;
+ u_int8_t action;
+};
+
+/* we have one of these for every PFSYNC_S_ */
+static void pfsync_out_state(struct pf_state *, void *);
+static void pfsync_out_iack(struct pf_state *, void *);
+static void pfsync_out_upd_c(struct pf_state *, void *);
+static void pfsync_out_del(struct pf_state *, void *);
+
+static struct pfsync_q pfsync_qs[] = {
+ { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS },
+ { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
+ { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD },
+ { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
+ { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }
+};
+
+static void pfsync_q_ins(struct pf_state *, int);
+static void pfsync_q_del(struct pf_state *);
+
+static void pfsync_update_state(struct pf_state *);
+
+struct pfsync_upd_req_item {
+ TAILQ_ENTRY(pfsync_upd_req_item) ur_entry;
+ struct pfsync_upd_req ur_msg;
+};
+
+struct pfsync_deferral {
+ struct pfsync_softc *pd_sc;
+ TAILQ_ENTRY(pfsync_deferral) pd_entry;
+ u_int pd_refs;
+ struct callout pd_tmo;
+
+ struct pf_state *pd_st;
+ struct mbuf *pd_m;
+};
+
+struct pfsync_softc {
+ /* Configuration */
+ struct ifnet *sc_ifp;
+ struct ifnet *sc_sync_if;
+ struct ip_moptions sc_imo;
+ struct in_addr sc_sync_peer;
+ uint32_t sc_flags;
+#define PFSYNCF_OK 0x00000001
+#define PFSYNCF_DEFER 0x00000002
+#define PFSYNCF_PUSH 0x00000004
+ uint8_t sc_maxupdates;
+ struct ip sc_template;
+ struct callout sc_tmo;
+ struct mtx sc_mtx;
+
+ /* Queued data */
+ size_t sc_len;
+ TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT];
+ TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list;
+ TAILQ_HEAD(, pfsync_deferral) sc_deferrals;
+ u_int sc_deferred;
+ void *sc_plus;
+ size_t sc_pluslen;
+
+ /* Bulk update info */
+ struct mtx sc_bulk_mtx;
+ uint32_t sc_ureq_sent;
+ int sc_bulk_tries;
+ uint32_t sc_ureq_received;
+ int sc_bulk_hashid;
+ uint64_t sc_bulk_stateid;
+ uint32_t sc_bulk_creatorid;
+ struct callout sc_bulk_tmo;
+ struct callout sc_bulkfail_tmo;
+};
+
+#define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx)
+#define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
+#define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+
+#define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx)
+#define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx)
+#define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
+
+static const char pfsyncname[] = "pfsync";
+static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
+static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL;
+#define V_pfsyncif VNET(pfsyncif)
+static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
+#define V_pfsync_swi_cookie VNET(pfsync_swi_cookie)
+static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
+#define V_pfsyncstats VNET(pfsyncstats)
+static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
+#define V_pfsync_carp_adj VNET(pfsync_carp_adj)
+
+static void pfsync_timeout(void *);
+static void pfsync_push(struct pfsync_softc *);
+static void pfsyncintr(void *);
+static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
+ void *);
+static void pfsync_multicast_cleanup(struct pfsync_softc *);
+static void pfsync_pointers_init(void);
+static void pfsync_pointers_uninit(void);
+static int pfsync_init(void);
+static void pfsync_uninit(void);
+
+SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
+SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(pfsyncstats), pfsyncstats,
+ "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
+SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
+ &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
+
+static int pfsync_clone_create(struct if_clone *, int, caddr_t);
+static void pfsync_clone_destroy(struct ifnet *);
+static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
+ struct pf_state_peer *);
+static int pfsyncoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+static int pfsyncioctl(struct ifnet *, u_long, caddr_t);
+
+static int pfsync_defer(struct pf_state *, struct mbuf *);
+static void pfsync_undefer(struct pfsync_deferral *, int);
+static void pfsync_undefer_state(struct pf_state *, int);
+static void pfsync_defer_tmo(void *);
+
+static void pfsync_request_update(u_int32_t, u_int64_t);
+static void pfsync_update_state_req(struct pf_state *);
+
+static void pfsync_drop(struct pfsync_softc *);
+static void pfsync_sendout(int);
+static void pfsync_send_plus(void *, size_t);
+
+static void pfsync_bulk_start(void);
+static void pfsync_bulk_status(u_int8_t);
+static void pfsync_bulk_update(void *);
+static void pfsync_bulk_fail(void *);
+
+#ifdef IPSEC
+static void pfsync_update_net_tdb(struct pfsync_tdb *);
+#endif
+
+#define PFSYNC_MAX_BULKTRIES 12
+
+VNET_DEFINE(struct if_clone *, pfsync_cloner);
+#define V_pfsync_cloner VNET(pfsync_cloner)
+
+static int
+pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
+{
+ struct pfsync_softc *sc;
+ struct ifnet *ifp;
+ int q;
+
+ if (unit != 0)
+ return (EINVAL);
+
+ sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
+ sc->sc_flags |= PFSYNCF_OK;
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++)
+ TAILQ_INIT(&sc->sc_qs[q]);
+
+ TAILQ_INIT(&sc->sc_upd_req_list);
+ TAILQ_INIT(&sc->sc_deferrals);
+
+ sc->sc_len = PFSYNC_MINPKT;
+ sc->sc_maxupdates = 128;
+
+ ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
+ if (ifp == NULL) {
+ free(sc, M_PFSYNC);
+ return (ENOSPC);
+ }
+ if_initname(ifp, pfsyncname, unit);
+ ifp->if_softc = sc;
+ ifp->if_ioctl = pfsyncioctl;
+ ifp->if_output = pfsyncoutput;
+ ifp->if_type = IFT_PFSYNC;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_hdrlen = sizeof(struct pfsync_header);
+ ifp->if_mtu = ETHERMTU;
+ mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
+ mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
+ callout_init(&sc->sc_tmo, 1);
+ callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
+ callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
+
+ if_attach(ifp);
+
+ bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+
+ V_pfsyncif = sc;
+
+ return (0);
+}
+
+static void
+pfsync_clone_destroy(struct ifnet *ifp)
+{
+ struct pfsync_softc *sc = ifp->if_softc;
+
+ /*
+ * At this stage, everything should have already been
+ * cleared by pfsync_uninit(), and we have only to
+ * drain callouts.
+ */
+ while (sc->sc_deferred > 0) {
+ struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
+
+ TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+ sc->sc_deferred--;
+ if (callout_stop(&pd->pd_tmo) > 0) {
+ pf_release_state(pd->pd_st);
+ m_freem(pd->pd_m);
+ free(pd, M_PFSYNC);
+ } else {
+ pd->pd_refs++;
+ callout_drain(&pd->pd_tmo);
+ free(pd, M_PFSYNC);
+ }
+ }
+
+ callout_drain(&sc->sc_tmo);
+ callout_drain(&sc->sc_bulkfail_tmo);
+ callout_drain(&sc->sc_bulk_tmo);
+
+ if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+ (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
+ bpfdetach(ifp);
+ if_detach(ifp);
+
+ pfsync_drop(sc);
+
+ if_free(ifp);
+ if (sc->sc_imo.imo_membership)
+ pfsync_multicast_cleanup(sc);
+ mtx_destroy(&sc->sc_mtx);
+ mtx_destroy(&sc->sc_bulk_mtx);
+ free(sc, M_PFSYNC);
+
+ V_pfsyncif = NULL;
+}
+
+static int
+pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
+ struct pf_state_peer *d)
+{
+ if (s->scrub.scrub_flag && d->scrub == NULL) {
+ d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
+ if (d->scrub == NULL)
+ return (ENOMEM);
+ }
+
+ return (0);
+}
+
+
+static int
+pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+#ifndef __NO_STRICT_ALIGNMENT
+ struct pfsync_state_key key[2];
+#endif
+ struct pfsync_state_key *kw, *ks;
+ struct pf_state *st = NULL;
+ struct pf_state_key *skw = NULL, *sks = NULL;
+ struct pf_rule *r = NULL;
+ struct pfi_kif *kif;
+ int error;
+
+ PF_RULES_RASSERT();
+
+ if (sp->creatorid == 0) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("%s: invalid creator id: %08x\n", __func__,
+ ntohl(sp->creatorid));
+ return (EINVAL);
+ }
+
+ if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("%s: unknown interface: %s\n", __func__,
+ sp->ifname);
+ if (flags & PFSYNC_SI_IOCTL)
+ return (EINVAL);
+ return (0); /* skip this state */
+ }
+
+ /*
+ * If the ruleset checksums match or the state is coming from the ioctl,
+ * it's safe to associate the state with the rule of that number.
+ */
+ if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
+ (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
+ pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
+ r = pf_main_ruleset.rules[
+ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
+ else
+ r = &V_pf_default_rule;
+
+ if ((r->max_states &&
+ counter_u64_fetch(r->states_cur) >= r->max_states))
+ goto cleanup;
+
+ /*
+ * XXXGL: consider M_WAITOK in ioctl path after.
+ */
+ if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
+ goto cleanup;
+
+ if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
+ goto cleanup;
+
+#ifndef __NO_STRICT_ALIGNMENT
+ bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
+ kw = &key[PF_SK_WIRE];
+ ks = &key[PF_SK_STACK];
+#else
+ kw = &sp->key[PF_SK_WIRE];
+ ks = &sp->key[PF_SK_STACK];
+#endif
+
+ if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
+ PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
+ kw->port[0] != ks->port[0] ||
+ kw->port[1] != ks->port[1]) {
+ sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+ if (sks == NULL)
+ goto cleanup;
+ } else
+ sks = skw;
+
+ /* allocate memory for scrub info */
+ if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
+ pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
+ goto cleanup;
+
+ /* Copy to state key(s). */
+ skw->addr[0] = kw->addr[0];
+ skw->addr[1] = kw->addr[1];
+ skw->port[0] = kw->port[0];
+ skw->port[1] = kw->port[1];
+ skw->proto = sp->proto;
+ skw->af = sp->af;
+ if (sks != skw) {
+ sks->addr[0] = ks->addr[0];
+ sks->addr[1] = ks->addr[1];
+ sks->port[0] = ks->port[0];
+ sks->port[1] = ks->port[1];
+ sks->proto = sp->proto;
+ sks->af = sp->af;
+ }
+
+ /* copy to state */
+ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
+ st->creation = time_uptime - ntohl(sp->creation);
+ st->expire = time_uptime;
+ if (sp->expire) {
+ uint32_t timeout;
+
+ timeout = r->timeout[sp->timeout];
+ if (!timeout)
+ timeout = V_pf_default_rule.timeout[sp->timeout];
+
+ /* sp->expire may have been adaptively scaled by export. */
+ st->expire -= timeout - ntohl(sp->expire);
+ }
+
+ st->direction = sp->direction;
+ st->log = sp->log;
+ st->timeout = sp->timeout;
+ st->state_flags = sp->state_flags;
+
+ st->id = sp->id;
+ st->creatorid = sp->creatorid;
+ pf_state_peer_ntoh(&sp->src, &st->src);
+ pf_state_peer_ntoh(&sp->dst, &st->dst);
+
+ st->rule.ptr = r;
+ st->nat_rule.ptr = NULL;
+ st->anchor.ptr = NULL;
+ st->rt_kif = NULL;
+
+ st->pfsync_time = time_uptime;
+ st->sync_state = PFSYNC_S_NONE;
+
+ if (!(flags & PFSYNC_SI_IOCTL))
+ st->state_flags |= PFSTATE_NOSYNC;
+
+ if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
+ goto cleanup_state;
+
+ /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+ counter_u64_add(r->states_cur, 1);
+ counter_u64_add(r->states_tot, 1);
+
+ if (!(flags & PFSYNC_SI_IOCTL)) {
+ st->state_flags &= ~PFSTATE_NOSYNC;
+ if (st->state_flags & PFSTATE_ACK) {
+ pfsync_q_ins(st, PFSYNC_S_IACK);
+ pfsync_push(sc);
+ }
+ }
+ st->state_flags &= ~PFSTATE_ACK;
+ PF_STATE_UNLOCK(st);
+
+ return (0);
+
+cleanup:
+ error = ENOMEM;
+ if (skw == sks)
+ sks = NULL;
+ if (skw != NULL)
+ uma_zfree(V_pf_state_key_z, skw);
+ if (sks != NULL)
+ uma_zfree(V_pf_state_key_z, sks);
+
+cleanup_state: /* pf_state_insert() frees the state keys. */
+ if (st) {
+ if (st->dst.scrub)
+ uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
+ if (st->src.scrub)
+ uma_zfree(V_pf_state_scrub_z, st->src.scrub);
+ uma_zfree(V_pf_state_z, st);
+ }
+ return (error);
+}
+
+static int
+pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_pkt pkt;
+ struct mbuf *m = *mp;
+ struct ip *ip = mtod(m, struct ip *);
+ struct pfsync_header *ph;
+ struct pfsync_subheader subh;
+
+ int offset, len;
+ int rv;
+ uint16_t count;
+
+ *mp = NULL;
+ V_pfsyncstats.pfsyncs_ipackets++;
+
+ /* Verify that we have a sync interface configured. */
+ if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
+ (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ goto done;
+
+ /* verify that the packet came in on the right interface */
+ if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
+ V_pfsyncstats.pfsyncs_badif++;
+ goto done;
+ }
+
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ /* verify that the IP TTL is 255. */
+ if (ip->ip_ttl != PFSYNC_DFLTTL) {
+ V_pfsyncstats.pfsyncs_badttl++;
+ goto done;
+ }
+
+ offset = ip->ip_hl << 2;
+ if (m->m_pkthdr.len < offset + sizeof(*ph)) {
+ V_pfsyncstats.pfsyncs_hdrops++;
+ goto done;
+ }
+
+ if (offset + sizeof(*ph) > m->m_len) {
+ if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
+ V_pfsyncstats.pfsyncs_hdrops++;
+ return (IPPROTO_DONE);
+ }
+ ip = mtod(m, struct ip *);
+ }
+ ph = (struct pfsync_header *)((char *)ip + offset);
+
+ /* verify the version */
+ if (ph->version != PFSYNC_VERSION) {
+ V_pfsyncstats.pfsyncs_badver++;
+ goto done;
+ }
+
+ len = ntohs(ph->len) + offset;
+ if (m->m_pkthdr.len < len) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ goto done;
+ }
+
+ /* Cheaper to grab this now than having to mess with mbufs later */
+ pkt.ip = ip;
+ pkt.src = ip->ip_src;
+ pkt.flags = 0;
+
+ /*
+ * Trusting pf_chksum during packet processing, as well as seeking
+ * in interface name tree, require holding PF_RULES_RLOCK().
+ */
+ PF_RULES_RLOCK();
+ if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+ pkt.flags |= PFSYNC_SI_CKSUM;
+
+ offset += sizeof(*ph);
+ while (offset <= len - sizeof(subh)) {
+ m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
+ offset += sizeof(subh);
+
+ if (subh.action >= PFSYNC_ACT_MAX) {
+ V_pfsyncstats.pfsyncs_badact++;
+ PF_RULES_RUNLOCK();
+ goto done;
+ }
+
+ count = ntohs(subh.count);
+ V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
+ rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
+ if (rv == -1) {
+ PF_RULES_RUNLOCK();
+ return (IPPROTO_DONE);
+ }
+
+ offset += rv;
+ }
+ PF_RULES_RUNLOCK();
+
+done:
+ m_freem(m);
+ return (IPPROTO_DONE);
+}
+
+static int
+pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_clr *clr;
+ struct mbuf *mp;
+ int len = sizeof(*clr) * count;
+ int i, offp;
+ u_int32_t creatorid;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ clr = (struct pfsync_clr *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ creatorid = clr[i].creatorid;
+
+ if (clr[i].ifname[0] != '\0' &&
+ pfi_kif_find(clr[i].ifname) == NULL)
+ continue;
+
+ for (int i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+ struct pf_state *s;
+relock:
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ if (s->creatorid == creatorid) {
+ s->state_flags |= PFSTATE_NOSYNC;
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ goto relock;
+ }
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+ }
+
+ return (len);
+}
+
+static int
+pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct mbuf *mp;
+ struct pfsync_state *sa, *sp;
+ int len = sizeof(*sp) * count;
+ int i, offp;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ sa = (struct pfsync_state *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
+
+ /* Check for invalid values. */
+ if (sp->timeout >= PFTM_MAX ||
+ sp->src.state > PF_TCPS_PROXY_DST ||
+ sp->dst.state > PF_TCPS_PROXY_DST ||
+ sp->direction > PF_OUT ||
+ (sp->af != AF_INET && sp->af != AF_INET6)) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("%s: invalid value\n", __func__);
+ V_pfsyncstats.pfsyncs_badval++;
+ continue;
+ }
+
+ if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
+ /* Drop out, but process the rest of the actions. */
+ break;
+ }
+
+ return (len);
+}
+
+static int
+pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_ins_ack *ia, *iaa;
+ struct pf_state *st;
+
+ struct mbuf *mp;
+ int len = count * sizeof(*ia);
+ int offp, i;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ ia = &iaa[i];
+
+ st = pf_find_state_byid(ia->id, ia->creatorid);
+ if (st == NULL)
+ continue;
+
+ if (st->state_flags & PFSTATE_ACK) {
+ PFSYNC_LOCK(V_pfsyncif);
+ pfsync_undefer_state(st, 0);
+ PFSYNC_UNLOCK(V_pfsyncif);
+ }
+ PF_STATE_UNLOCK(st);
+ }
+ /*
+ * XXX this is not yet implemented, but we know the size of the
+ * message so we can skip it.
+ */
+
+ return (count * sizeof(struct pfsync_ins_ack));
+}
+
+static int
+pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
+ struct pfsync_state_peer *dst)
+{
+ int sync = 0;
+
+ PF_STATE_LOCK_ASSERT(st);
+
+ /*
+ * The state should never go backwards except
+ * for syn-proxy states. Neither should the
+ * sequence window slide backwards.
+ */
+ if ((st->src.state > src->state &&
+ (st->src.state < PF_TCPS_PROXY_SRC ||
+ src->state >= PF_TCPS_PROXY_SRC)) ||
+
+ (st->src.state == src->state &&
+ SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
+ sync++;
+ else
+ pf_state_peer_ntoh(src, &st->src);
+
+ if ((st->dst.state > dst->state) ||
+
+ (st->dst.state >= TCPS_SYN_SENT &&
+ SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
+ sync++;
+ else
+ pf_state_peer_ntoh(dst, &st->dst);
+
+ return (sync);
+}
+
+static int
+pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_state *sa, *sp;
+ struct pf_state *st;
+ int sync;
+
+ struct mbuf *mp;
+ int len = count * sizeof(*sp);
+ int offp, i;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ sa = (struct pfsync_state *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
+
+ /* check for invalid values */
+ if (sp->timeout >= PFTM_MAX ||
+ sp->src.state > PF_TCPS_PROXY_DST ||
+ sp->dst.state > PF_TCPS_PROXY_DST) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pfsync_input: PFSYNC_ACT_UPD: "
+ "invalid value\n");
+ }
+ V_pfsyncstats.pfsyncs_badval++;
+ continue;
+ }
+
+ st = pf_find_state_byid(sp->id, sp->creatorid);
+ if (st == NULL) {
+ /* insert the update */
+ if (pfsync_state_import(sp, 0))
+ V_pfsyncstats.pfsyncs_badstate++;
+ continue;
+ }
+
+ if (st->state_flags & PFSTATE_ACK) {
+ PFSYNC_LOCK(sc);
+ pfsync_undefer_state(st, 1);
+ PFSYNC_UNLOCK(sc);
+ }
+
+ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
+ sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
+ else {
+ sync = 0;
+
+ /*
+ * Non-TCP protocol state machine always go
+ * forwards
+ */
+ if (st->src.state > sp->src.state)
+ sync++;
+ else
+ pf_state_peer_ntoh(&sp->src, &st->src);
+ if (st->dst.state > sp->dst.state)
+ sync++;
+ else
+ pf_state_peer_ntoh(&sp->dst, &st->dst);
+ }
+ if (sync < 2) {
+ pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
+ pf_state_peer_ntoh(&sp->dst, &st->dst);
+ st->expire = time_uptime;
+ st->timeout = sp->timeout;
+ }
+ st->pfsync_time = time_uptime;
+
+ if (sync) {
+ V_pfsyncstats.pfsyncs_stale++;
+
+ pfsync_update_state(st);
+ PF_STATE_UNLOCK(st);
+ PFSYNC_LOCK(sc);
+ pfsync_push(sc);
+ PFSYNC_UNLOCK(sc);
+ continue;
+ }
+ PF_STATE_UNLOCK(st);
+ }
+
+ return (len);
+}
+
+static int
+pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_upd_c *ua, *up;
+ struct pf_state *st;
+ int len = count * sizeof(*up);
+ int sync;
+ struct mbuf *mp;
+ int offp, i;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ ua = (struct pfsync_upd_c *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ up = &ua[i];
+
+ /* check for invalid values */
+ if (up->timeout >= PFTM_MAX ||
+ up->src.state > PF_TCPS_PROXY_DST ||
+ up->dst.state > PF_TCPS_PROXY_DST) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pfsync_input: "
+ "PFSYNC_ACT_UPD_C: "
+ "invalid value\n");
+ }
+ V_pfsyncstats.pfsyncs_badval++;
+ continue;
+ }
+
+ st = pf_find_state_byid(up->id, up->creatorid);
+ if (st == NULL) {
+ /* We don't have this state. Ask for it. */
+ PFSYNC_LOCK(sc);
+ pfsync_request_update(up->creatorid, up->id);
+ PFSYNC_UNLOCK(sc);
+ continue;
+ }
+
+ if (st->state_flags & PFSTATE_ACK) {
+ PFSYNC_LOCK(sc);
+ pfsync_undefer_state(st, 1);
+ PFSYNC_UNLOCK(sc);
+ }
+
+ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
+ sync = pfsync_upd_tcp(st, &up->src, &up->dst);
+ else {
+ sync = 0;
+
+ /*
+ * Non-TCP protocol state machine always go
+ * forwards
+ */
+ if (st->src.state > up->src.state)
+ sync++;
+ else
+ pf_state_peer_ntoh(&up->src, &st->src);
+ if (st->dst.state > up->dst.state)
+ sync++;
+ else
+ pf_state_peer_ntoh(&up->dst, &st->dst);
+ }
+ if (sync < 2) {
+ pfsync_alloc_scrub_memory(&up->dst, &st->dst);
+ pf_state_peer_ntoh(&up->dst, &st->dst);
+ st->expire = time_uptime;
+ st->timeout = up->timeout;
+ }
+ st->pfsync_time = time_uptime;
+
+ if (sync) {
+ V_pfsyncstats.pfsyncs_stale++;
+
+ pfsync_update_state(st);
+ PF_STATE_UNLOCK(st);
+ PFSYNC_LOCK(sc);
+ pfsync_push(sc);
+ PFSYNC_UNLOCK(sc);
+ continue;
+ }
+ PF_STATE_UNLOCK(st);
+ }
+
+ return (len);
+}
+
+static int
+pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_upd_req *ur, *ura;
+ struct mbuf *mp;
+ int len = count * sizeof(*ur);
+ int i, offp;
+
+ struct pf_state *st;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ ura = (struct pfsync_upd_req *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ ur = &ura[i];
+
+ if (ur->id == 0 && ur->creatorid == 0)
+ pfsync_bulk_start();
+ else {
+ st = pf_find_state_byid(ur->id, ur->creatorid);
+ if (st == NULL) {
+ V_pfsyncstats.pfsyncs_badstate++;
+ continue;
+ }
+ if (st->state_flags & PFSTATE_NOSYNC) {
+ PF_STATE_UNLOCK(st);
+ continue;
+ }
+
+ pfsync_update_state_req(st);
+ PF_STATE_UNLOCK(st);
+ }
+ }
+
+ return (len);
+}
+
+static int
+pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct mbuf *mp;
+ struct pfsync_state *sa, *sp;
+ struct pf_state *st;
+ int len = count * sizeof(*sp);
+ int offp, i;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ sa = (struct pfsync_state *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
+
+ st = pf_find_state_byid(sp->id, sp->creatorid);
+ if (st == NULL) {
+ V_pfsyncstats.pfsyncs_badstate++;
+ continue;
+ }
+ st->state_flags |= PFSTATE_NOSYNC;
+ pf_unlink_state(st, PF_ENTER_LOCKED);
+ }
+
+ return (len);
+}
+
+static int
+pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct mbuf *mp;
+ struct pfsync_del_c *sa, *sp;
+ struct pf_state *st;
+ int len = count * sizeof(*sp);
+ int offp, i;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ sa = (struct pfsync_del_c *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++) {
+ sp = &sa[i];
+
+ st = pf_find_state_byid(sp->id, sp->creatorid);
+ if (st == NULL) {
+ V_pfsyncstats.pfsyncs_badstate++;
+ continue;
+ }
+
+ st->state_flags |= PFSTATE_NOSYNC;
+ pf_unlink_state(st, PF_ENTER_LOCKED);
+ }
+
+ return (len);
+}
+
+static int
+pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bus *bus;
+ struct mbuf *mp;
+ int len = count * sizeof(*bus);
+ int offp;
+
+ PFSYNC_BLOCK(sc);
+
+ /* If we're not waiting for a bulk update, who cares. */
+ if (sc->sc_ureq_sent == 0) {
+ PFSYNC_BUNLOCK(sc);
+ return (len);
+ }
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ PFSYNC_BUNLOCK(sc);
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ bus = (struct pfsync_bus *)(mp->m_data + offp);
+
+ switch (bus->status) {
+ case PFSYNC_BUS_START:
+ callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
+ V_pf_limits[PF_LIMIT_STATES].limit /
+ ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
+ sizeof(struct pfsync_state)),
+ pfsync_bulk_fail, sc);
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: received bulk update start\n");
+ break;
+
+ case PFSYNC_BUS_END:
+ if (time_uptime - ntohl(bus->endtime) >=
+ sc->sc_ureq_sent) {
+ /* that's it, we're happy */
+ sc->sc_ureq_sent = 0;
+ sc->sc_bulk_tries = 0;
+ callout_stop(&sc->sc_bulkfail_tmo);
+ if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+ (*carp_demote_adj_p)(-V_pfsync_carp_adj,
+ "pfsync bulk done");
+ sc->sc_flags |= PFSYNCF_OK;
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: received valid "
+ "bulk update end\n");
+ } else {
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: received invalid "
+ "bulk update end: bad timestamp\n");
+ }
+ break;
+ }
+ PFSYNC_BUNLOCK(sc);
+
+ return (len);
+}
+
+static int
+pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ int len = count * sizeof(struct pfsync_tdb);
+
+#if defined(IPSEC)
+ struct pfsync_tdb *tp;
+ struct mbuf *mp;
+ int offp;
+ int i;
+ int s;
+
+ mp = m_pulldown(m, offset, len, &offp);
+ if (mp == NULL) {
+ V_pfsyncstats.pfsyncs_badlen++;
+ return (-1);
+ }
+ tp = (struct pfsync_tdb *)(mp->m_data + offp);
+
+ for (i = 0; i < count; i++)
+ pfsync_update_net_tdb(&tp[i]);
+#endif
+
+ return (len);
+}
+
+#if defined(IPSEC)
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+static void
+pfsync_update_net_tdb(struct pfsync_tdb *pt)
+{
+ struct tdb *tdb;
+ int s;
+
+ /* check for invalid values */
+ if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+ (pt->dst.sa.sa_family != AF_INET &&
+ pt->dst.sa.sa_family != AF_INET6))
+ goto bad;
+
+ tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
+ if (tdb) {
+ pt->rpl = ntohl(pt->rpl);
+ pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
+
+ /* Neither replay nor byte counter should ever decrease. */
+ if (pt->rpl < tdb->tdb_rpl ||
+ pt->cur_bytes < tdb->tdb_cur_bytes) {
+ goto bad;
+ }
+
+ tdb->tdb_rpl = pt->rpl;
+ tdb->tdb_cur_bytes = pt->cur_bytes;
+ }
+ return;
+
+bad:
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+ "invalid value\n");
+ V_pfsyncstats.pfsyncs_badstate++;
+ return;
+}
+#endif
+
+
+static int
+pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ /* check if we are at the right place in the packet */
+ if (offset != m->m_pkthdr.len)
+ V_pfsyncstats.pfsyncs_badlen++;
+
+ /* we're done. free and let the caller return */
+ m_freem(m);
+ return (-1);
+}
+
+static int
+pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
+{
+ V_pfsyncstats.pfsyncs_badact++;
+
+ m_freem(m);
+ return (-1);
+}
+
+static int
+pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *rt)
+{
+ m_freem(m);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct pfsync_softc *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct pfsyncreq pfsyncr;
+ int error;
+
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ PFSYNC_LOCK(sc);
+ if (ifp->if_flags & IFF_UP) {
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ PFSYNC_UNLOCK(sc);
+ pfsync_pointers_init();
+ } else {
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ PFSYNC_UNLOCK(sc);
+ pfsync_pointers_uninit();
+ }
+ break;
+ case SIOCSIFMTU:
+ if (!sc->sc_sync_if ||
+ ifr->ifr_mtu <= PFSYNC_MINPKT ||
+ ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
+ return (EINVAL);
+ if (ifr->ifr_mtu < ifp->if_mtu) {
+ PFSYNC_LOCK(sc);
+ if (sc->sc_len > PFSYNC_MINPKT)
+ pfsync_sendout(1);
+ PFSYNC_UNLOCK(sc);
+ }
+ ifp->if_mtu = ifr->ifr_mtu;
+ break;
+ case SIOCGETPFSYNC:
+ bzero(&pfsyncr, sizeof(pfsyncr));
+ PFSYNC_LOCK(sc);
+ if (sc->sc_sync_if) {
+ strlcpy(pfsyncr.pfsyncr_syncdev,
+ sc->sc_sync_if->if_xname, IFNAMSIZ);
+ }
+ pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
+ pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
+ pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER ==
+ (sc->sc_flags & PFSYNCF_DEFER));
+ PFSYNC_UNLOCK(sc);
+ return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
+
+ case SIOCSETPFSYNC:
+ {
+ struct ip_moptions *imo = &sc->sc_imo;
+ struct ifnet *sifp;
+ struct ip *ip;
+ void *mship = NULL;
+
+ if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
+ return (error);
+ if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
+ return (error);
+
+ if (pfsyncr.pfsyncr_maxupdates > 255)
+ return (EINVAL);
+
+ if (pfsyncr.pfsyncr_syncdev[0] == 0)
+ sifp = NULL;
+ else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
+ return (EINVAL);
+
+ if (sifp != NULL && (
+ pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
+ pfsyncr.pfsyncr_syncpeer.s_addr ==
+ htonl(INADDR_PFSYNC_GROUP)))
+ mship = malloc((sizeof(struct in_multi *) *
+ IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
+
+ PFSYNC_LOCK(sc);
+ if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
+ sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
+ else
+ sc->sc_sync_peer.s_addr =
+ pfsyncr.pfsyncr_syncpeer.s_addr;
+
+ sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
+ if (pfsyncr.pfsyncr_defer) {
+ sc->sc_flags |= PFSYNCF_DEFER;
+ pfsync_defer_ptr = pfsync_defer;
+ } else {
+ sc->sc_flags &= ~PFSYNCF_DEFER;
+ pfsync_defer_ptr = NULL;
+ }
+
+ if (sifp == NULL) {
+ if (sc->sc_sync_if)
+ if_rele(sc->sc_sync_if);
+ sc->sc_sync_if = NULL;
+ if (imo->imo_membership)
+ pfsync_multicast_cleanup(sc);
+ PFSYNC_UNLOCK(sc);
+ break;
+ }
+
+ if (sc->sc_len > PFSYNC_MINPKT &&
+ (sifp->if_mtu < sc->sc_ifp->if_mtu ||
+ (sc->sc_sync_if != NULL &&
+ sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
+ sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
+ pfsync_sendout(1);
+
+ if (imo->imo_membership)
+ pfsync_multicast_cleanup(sc);
+
+ if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
+ error = pfsync_multicast_setup(sc, sifp, mship);
+ if (error) {
+ if_rele(sifp);
+ free(mship, M_PFSYNC);
+ return (error);
+ }
+ }
+ if (sc->sc_sync_if)
+ if_rele(sc->sc_sync_if);
+ sc->sc_sync_if = sifp;
+
+ ip = &sc->sc_template;
+ bzero(ip, sizeof(*ip));
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(sc->sc_template) >> 2;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ /* len and id are set later. */
+ ip->ip_off = htons(IP_DF);
+ ip->ip_ttl = PFSYNC_DFLTTL;
+ ip->ip_p = IPPROTO_PFSYNC;
+ ip->ip_src.s_addr = INADDR_ANY;
+ ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
+
+ /* Request a full state table update. */
+ if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+ (*carp_demote_adj_p)(V_pfsync_carp_adj,
+ "pfsync bulk start");
+ sc->sc_flags &= ~PFSYNCF_OK;
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: requesting bulk update\n");
+ pfsync_request_update(0, 0);
+ PFSYNC_UNLOCK(sc);
+ PFSYNC_BLOCK(sc);
+ sc->sc_ureq_sent = time_uptime;
+ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
+ sc);
+ PFSYNC_BUNLOCK(sc);
+
+ break;
+ }
+ default:
+ return (ENOTTY);
+ }
+
+ return (0);
+}
+
+static void
+pfsync_out_state(struct pf_state *st, void *buf)
+{
+ struct pfsync_state *sp = buf;
+
+ pfsync_state_export(sp, st);
+}
+
+static void
+pfsync_out_iack(struct pf_state *st, void *buf)
+{
+ struct pfsync_ins_ack *iack = buf;
+
+ iack->id = st->id;
+ iack->creatorid = st->creatorid;
+}
+
+static void
+pfsync_out_upd_c(struct pf_state *st, void *buf)
+{
+ struct pfsync_upd_c *up = buf;
+
+ bzero(up, sizeof(*up));
+ up->id = st->id;
+ pf_state_peer_hton(&st->src, &up->src);
+ pf_state_peer_hton(&st->dst, &up->dst);
+ up->creatorid = st->creatorid;
+ up->timeout = st->timeout;
+}
+
+static void
+pfsync_out_del(struct pf_state *st, void *buf)
+{
+ struct pfsync_del_c *dp = buf;
+
+ dp->id = st->id;
+ dp->creatorid = st->creatorid;
+ st->state_flags |= PFSTATE_NOSYNC;
+}
+
+static void
+pfsync_drop(struct pfsync_softc *sc)
+{
+ struct pf_state *st, *next;
+ struct pfsync_upd_req_item *ur;
+ int q;
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ continue;
+
+ TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
+ KASSERT(st->sync_state == q,
+ ("%s: st->sync_state == q",
+ __func__));
+ st->sync_state = PFSYNC_S_NONE;
+ pf_release_state(st);
+ }
+ TAILQ_INIT(&sc->sc_qs[q]);
+ }
+
+ while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+ free(ur, M_PFSYNC);
+ }
+
+ sc->sc_plus = NULL;
+ sc->sc_len = PFSYNC_MINPKT;
+}
+
+static void
+pfsync_sendout(int schedswi)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct ifnet *ifp = sc->sc_ifp;
+ struct mbuf *m;
+ struct ip *ip;
+ struct pfsync_header *ph;
+ struct pfsync_subheader *subh;
+ struct pf_state *st;
+ struct pfsync_upd_req_item *ur;
+ int offset;
+ int q, count = 0;
+
+ KASSERT(sc != NULL, ("%s: null sc", __func__));
+ KASSERT(sc->sc_len > PFSYNC_MINPKT,
+ ("%s: sc_len %zu", __func__, sc->sc_len));
+ PFSYNC_LOCK_ASSERT(sc);
+
+ if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
+ pfsync_drop(sc);
+ return;
+ }
+
+ m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (m == NULL) {
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
+ V_pfsyncstats.pfsyncs_onomem++;
+ return;
+ }
+ m->m_data += max_linkhdr;
+ m->m_len = m->m_pkthdr.len = sc->sc_len;
+
+ /* build the ip header */
+ ip = (struct ip *)m->m_data;
+ bcopy(&sc->sc_template, ip, sizeof(*ip));
+ offset = sizeof(*ip);
+
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip_fillid(ip);
+
+ /* build the pfsync header */
+ ph = (struct pfsync_header *)(m->m_data + offset);
+ bzero(ph, sizeof(*ph));
+ offset += sizeof(*ph);
+
+ ph->version = PFSYNC_VERSION;
+ ph->len = htons(sc->sc_len - sizeof(*ip));
+ bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
+
+ /* walk the queues */
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ continue;
+
+ subh = (struct pfsync_subheader *)(m->m_data + offset);
+ offset += sizeof(*subh);
+
+ count = 0;
+ TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
+ KASSERT(st->sync_state == q,
+ ("%s: st->sync_state == q",
+ __func__));
+ /*
+ * XXXGL: some of write methods do unlocked reads
+ * of state data :(
+ */
+ pfsync_qs[q].write(st, m->m_data + offset);
+ offset += pfsync_qs[q].len;
+ st->sync_state = PFSYNC_S_NONE;
+ pf_release_state(st);
+ count++;
+ }
+ TAILQ_INIT(&sc->sc_qs[q]);
+
+ bzero(subh, sizeof(*subh));
+ subh->action = pfsync_qs[q].action;
+ subh->count = htons(count);
+ V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
+ }
+
+ if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+ subh = (struct pfsync_subheader *)(m->m_data + offset);
+ offset += sizeof(*subh);
+
+ count = 0;
+ while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+
+ bcopy(&ur->ur_msg, m->m_data + offset,
+ sizeof(ur->ur_msg));
+ offset += sizeof(ur->ur_msg);
+ free(ur, M_PFSYNC);
+ count++;
+ }
+
+ bzero(subh, sizeof(*subh));
+ subh->action = PFSYNC_ACT_UPD_REQ;
+ subh->count = htons(count);
+ V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
+ }
+
+ /* has someone built a custom region for us to add? */
+ if (sc->sc_plus != NULL) {
+ bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
+ offset += sc->sc_pluslen;
+
+ sc->sc_plus = NULL;
+ }
+
+ subh = (struct pfsync_subheader *)(m->m_data + offset);
+ offset += sizeof(*subh);
+
+ bzero(subh, sizeof(*subh));
+ subh->action = PFSYNC_ACT_EOF;
+ subh->count = htons(1);
+ V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
+
+ /* we're done, let's put it on the wire */
+ if (ifp->if_bpf) {
+ m->m_data += sizeof(*ip);
+ m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+ BPF_MTAP(ifp, m);
+ m->m_data -= sizeof(*ip);
+ m->m_len = m->m_pkthdr.len = sc->sc_len;
+ }
+
+ if (sc->sc_sync_if == NULL) {
+ sc->sc_len = PFSYNC_MINPKT;
+ m_freem(m);
+ return;
+ }
+
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
+ sc->sc_len = PFSYNC_MINPKT;
+
+ if (!_IF_QFULL(&sc->sc_ifp->if_snd))
+ _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+ else {
+ m_freem(m);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
+ }
+ if (schedswi)
+ swi_sched(V_pfsync_swi_cookie, 0);
+}
+
+static void
+pfsync_insert_state(struct pf_state *st)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+
+ if (st->state_flags & PFSTATE_NOSYNC)
+ return;
+
+ if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
+ st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
+ st->state_flags |= PFSTATE_NOSYNC;
+ return;
+ }
+
+ KASSERT(st->sync_state == PFSYNC_S_NONE,
+ ("%s: st->sync_state %u", __func__, st->sync_state));
+
+ PFSYNC_LOCK(sc);
+ if (sc->sc_len == PFSYNC_MINPKT)
+ callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+ pfsync_q_ins(st, PFSYNC_S_INS);
+ PFSYNC_UNLOCK(sc);
+
+ st->sync_updates = 0;
+}
+
+static int
+pfsync_defer(struct pf_state *st, struct mbuf *m)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_deferral *pd;
+
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ return (0);
+
+ PFSYNC_LOCK(sc);
+
+ if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
+ !(sc->sc_flags & PFSYNCF_DEFER)) {
+ PFSYNC_UNLOCK(sc);
+ return (0);
+ }
+
+ if (sc->sc_deferred >= 128)
+ pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+
+ pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
+ if (pd == NULL)
+ return (0);
+ sc->sc_deferred++;
+
+ m->m_flags |= M_SKIP_FIREWALL;
+ st->state_flags |= PFSTATE_ACK;
+
+ pd->pd_sc = sc;
+ pd->pd_refs = 0;
+ pd->pd_st = st;
+ pf_ref_state(st);
+ pd->pd_m = m;
+
+ TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
+ callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+ callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
+
+ pfsync_push(sc);
+
+ return (1);
+}
+
+static void
+pfsync_undefer(struct pfsync_deferral *pd, int drop)
+{
+ struct pfsync_softc *sc = pd->pd_sc;
+ struct mbuf *m = pd->pd_m;
+ struct pf_state *st = pd->pd_st;
+
+ PFSYNC_LOCK_ASSERT(sc);
+
+ TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+ sc->sc_deferred--;
+ pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
+ free(pd, M_PFSYNC);
+ pf_release_state(st);
+
+ if (drop)
+ m_freem(m);
+ else {
+ _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+ pfsync_push(sc);
+ }
+}
+
+static void
+pfsync_defer_tmo(void *arg)
+{
+ struct pfsync_deferral *pd = arg;
+ struct pfsync_softc *sc = pd->pd_sc;
+ struct mbuf *m = pd->pd_m;
+ struct pf_state *st = pd->pd_st;
+
+ PFSYNC_LOCK_ASSERT(sc);
+
+ CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
+
+ TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+ sc->sc_deferred--;
+ pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
+ if (pd->pd_refs == 0)
+ free(pd, M_PFSYNC);
+ PFSYNC_UNLOCK(sc);
+
+ ip_output(m, NULL, NULL, 0, NULL, NULL);
+
+ pf_release_state(st);
+
+ CURVNET_RESTORE();
+}
+
+static void
+pfsync_undefer_state(struct pf_state *st, int drop)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_deferral *pd;
+
+ PFSYNC_LOCK_ASSERT(sc);
+
+ TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
+ if (pd->pd_st == st) {
+ if (callout_stop(&pd->pd_tmo) > 0)
+ pfsync_undefer(pd, drop);
+ return;
+ }
+ }
+
+ panic("%s: unable to find deferred state", __func__);
+}
+
+static void
+pfsync_update_state(struct pf_state *st)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ int sync = 0;
+
+ PF_STATE_LOCK_ASSERT(st);
+ PFSYNC_LOCK(sc);
+
+ if (st->state_flags & PFSTATE_ACK)
+ pfsync_undefer_state(st, 0);
+ if (st->state_flags & PFSTATE_NOSYNC) {
+ if (st->sync_state != PFSYNC_S_NONE)
+ pfsync_q_del(st);
+ PFSYNC_UNLOCK(sc);
+ return;
+ }
+
+ if (sc->sc_len == PFSYNC_MINPKT)
+ callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+ switch (st->sync_state) {
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_INS:
+ /* we're already handling it */
+
+ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
+ st->sync_updates++;
+ if (st->sync_updates >= sc->sc_maxupdates)
+ sync = 1;
+ }
+ break;
+
+ case PFSYNC_S_IACK:
+ pfsync_q_del(st);
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(st, PFSYNC_S_UPD_C);
+ st->sync_updates = 0;
+ break;
+
+ default:
+ panic("%s: unexpected sync state %d", __func__, st->sync_state);
+ }
+
+ if (sync || (time_uptime - st->pfsync_time) < 2)
+ pfsync_push(sc);
+
+ PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_request_update(u_int32_t creatorid, u_int64_t id)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_upd_req_item *item;
+ size_t nlen = sizeof(struct pfsync_upd_req);
+
+ PFSYNC_LOCK_ASSERT(sc);
+
+ /*
+ * This code does a bit to prevent multiple update requests for the
+ * same state being generated. It searches current subheader queue,
+ * but it doesn't lookup into queue of already packed datagrams.
+ */
+ TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
+ if (item->ur_msg.id == id &&
+ item->ur_msg.creatorid == creatorid)
+ return;
+
+ item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
+ if (item == NULL)
+ return; /* XXX stats */
+
+ item->ur_msg.id = id;
+ item->ur_msg.creatorid = creatorid;
+
+ if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+ nlen += sizeof(struct pfsync_subheader);
+
+ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
+ pfsync_sendout(1);
+
+ nlen = sizeof(struct pfsync_subheader) +
+ sizeof(struct pfsync_upd_req);
+ }
+
+ TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
+ sc->sc_len += nlen;
+}
+
+static void
+pfsync_update_state_req(struct pf_state *st)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+
+ PF_STATE_LOCK_ASSERT(st);
+ PFSYNC_LOCK(sc);
+
+ if (st->state_flags & PFSTATE_NOSYNC) {
+ if (st->sync_state != PFSYNC_S_NONE)
+ pfsync_q_del(st);
+ PFSYNC_UNLOCK(sc);
+ return;
+ }
+
+ switch (st->sync_state) {
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_IACK:
+ pfsync_q_del(st);
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(st, PFSYNC_S_UPD);
+ pfsync_push(sc);
+ break;
+
+ case PFSYNC_S_INS:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_DEL:
+ /* we're already handling it */
+ break;
+
+ default:
+ panic("%s: unexpected sync state %d", __func__, st->sync_state);
+ }
+
+ PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_delete_state(struct pf_state *st)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+
+ PFSYNC_LOCK(sc);
+ if (st->state_flags & PFSTATE_ACK)
+ pfsync_undefer_state(st, 1);
+ if (st->state_flags & PFSTATE_NOSYNC) {
+ if (st->sync_state != PFSYNC_S_NONE)
+ pfsync_q_del(st);
+ PFSYNC_UNLOCK(sc);
+ return;
+ }
+
+ if (sc->sc_len == PFSYNC_MINPKT)
+ callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+
+ switch (st->sync_state) {
+ case PFSYNC_S_INS:
+ /* We never got to tell the world so just forget about it. */
+ pfsync_q_del(st);
+ break;
+
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_IACK:
+ pfsync_q_del(st);
+ /* FALLTHROUGH to putting it on the del list */
+
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(st, PFSYNC_S_DEL);
+ break;
+
+ default:
+ panic("%s: unexpected sync state %d", __func__, st->sync_state);
+ }
+ PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_clear_states(u_int32_t creatorid, const char *ifname)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ struct {
+ struct pfsync_subheader subh;
+ struct pfsync_clr clr;
+ } __packed r;
+
+ bzero(&r, sizeof(r));
+
+ r.subh.action = PFSYNC_ACT_CLR;
+ r.subh.count = htons(1);
+ V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
+
+ strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
+ r.clr.creatorid = creatorid;
+
+ PFSYNC_LOCK(sc);
+ pfsync_send_plus(&r, sizeof(r));
+ PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_q_ins(struct pf_state *st, int q)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ size_t nlen = pfsync_qs[q].len;
+
+ PFSYNC_LOCK_ASSERT(sc);
+
+ KASSERT(st->sync_state == PFSYNC_S_NONE,
+ ("%s: st->sync_state %u", __func__, st->sync_state));
+ KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
+ sc->sc_len));
+
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ nlen += sizeof(struct pfsync_subheader);
+
+ if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
+ pfsync_sendout(1);
+
+ nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
+ }
+
+ sc->sc_len += nlen;
+ TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+ st->sync_state = q;
+ pf_ref_state(st);
+}
+
+static void
+pfsync_q_del(struct pf_state *st)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+ int q = st->sync_state;
+
+ PFSYNC_LOCK_ASSERT(sc);
+ KASSERT(st->sync_state != PFSYNC_S_NONE,
+ ("%s: st->sync_state != PFSYNC_S_NONE", __func__));
+
+ sc->sc_len -= pfsync_qs[q].len;
+ TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ st->sync_state = PFSYNC_S_NONE;
+ pf_release_state(st);
+
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ sc->sc_len -= sizeof(struct pfsync_subheader);
+}
+
+static void
+pfsync_bulk_start(void)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: received bulk update request\n");
+
+ PFSYNC_BLOCK(sc);
+
+ sc->sc_ureq_received = time_uptime;
+ sc->sc_bulk_hashid = 0;
+ sc->sc_bulk_stateid = 0;
+ pfsync_bulk_status(PFSYNC_BUS_START);
+ callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
+ PFSYNC_BUNLOCK(sc);
+}
+
+static void
+pfsync_bulk_update(void *arg)
+{
+ struct pfsync_softc *sc = arg;
+ struct pf_state *s;
+ int i, sent = 0;
+
+ PFSYNC_BLOCK_ASSERT(sc);
+ CURVNET_SET(sc->sc_ifp->if_vnet);
+
+ /*
+ * Start with last state from previous invocation.
+ * It may had gone, in this case start from the
+ * hash slot.
+ */
+ s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
+
+ if (s != NULL)
+ i = PF_IDHASH(s);
+ else
+ i = sc->sc_bulk_hashid;
+
+ for (; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+
+ if (s != NULL)
+ PF_HASHROW_ASSERT(ih);
+ else {
+ PF_HASHROW_LOCK(ih);
+ s = LIST_FIRST(&ih->states);
+ }
+
+ for (; s; s = LIST_NEXT(s, entry)) {
+
+ if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
+ sizeof(struct pfsync_state)) {
+ /* We've filled a packet. */
+ sc->sc_bulk_hashid = i;
+ sc->sc_bulk_stateid = s->id;
+ sc->sc_bulk_creatorid = s->creatorid;
+ PF_HASHROW_UNLOCK(ih);
+ callout_reset(&sc->sc_bulk_tmo, 1,
+ pfsync_bulk_update, sc);
+ goto full;
+ }
+
+ if (s->sync_state == PFSYNC_S_NONE &&
+ s->timeout < PFTM_MAX &&
+ s->pfsync_time <= sc->sc_ureq_received) {
+ pfsync_update_state_req(s);
+ sent++;
+ }
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+
+ /* We're done. */
+ pfsync_bulk_status(PFSYNC_BUS_END);
+
+full:
+ CURVNET_RESTORE();
+}
+
+static void
+pfsync_bulk_status(u_int8_t status)
+{
+ struct {
+ struct pfsync_subheader subh;
+ struct pfsync_bus bus;
+ } __packed r;
+
+ struct pfsync_softc *sc = V_pfsyncif;
+
+ bzero(&r, sizeof(r));
+
+ r.subh.action = PFSYNC_ACT_BUS;
+ r.subh.count = htons(1);
+ V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
+
+ r.bus.creatorid = V_pf_status.hostid;
+ r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
+ r.bus.status = status;
+
+ PFSYNC_LOCK(sc);
+ pfsync_send_plus(&r, sizeof(r));
+ PFSYNC_UNLOCK(sc);
+}
+
+static void
+pfsync_bulk_fail(void *arg)
+{
+ struct pfsync_softc *sc = arg;
+
+ CURVNET_SET(sc->sc_ifp->if_vnet);
+
+ PFSYNC_BLOCK_ASSERT(sc);
+
+ if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
+ /* Try again */
+ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
+ pfsync_bulk_fail, V_pfsyncif);
+ PFSYNC_LOCK(sc);
+ pfsync_request_update(0, 0);
+ PFSYNC_UNLOCK(sc);
+ } else {
+ /* Pretend like the transfer was ok. */
+ sc->sc_ureq_sent = 0;
+ sc->sc_bulk_tries = 0;
+ PFSYNC_LOCK(sc);
+ if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
+ (*carp_demote_adj_p)(-V_pfsync_carp_adj,
+ "pfsync bulk fail");
+ sc->sc_flags |= PFSYNCF_OK;
+ PFSYNC_UNLOCK(sc);
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: failed to receive bulk update\n");
+ }
+
+ CURVNET_RESTORE();
+}
+
+static void
+pfsync_send_plus(void *plus, size_t pluslen)
+{
+ struct pfsync_softc *sc = V_pfsyncif;
+
+ PFSYNC_LOCK_ASSERT(sc);
+
+ if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
+ pfsync_sendout(1);
+
+ sc->sc_plus = plus;
+ sc->sc_len += (sc->sc_pluslen = pluslen);
+
+ pfsync_sendout(1);
+}
+
+static void
+pfsync_timeout(void *arg)
+{
+ struct pfsync_softc *sc = arg;
+
+ CURVNET_SET(sc->sc_ifp->if_vnet);
+ PFSYNC_LOCK(sc);
+ pfsync_push(sc);
+ PFSYNC_UNLOCK(sc);
+ CURVNET_RESTORE();
+}
+
+static void
+pfsync_push(struct pfsync_softc *sc)
+{
+
+ PFSYNC_LOCK_ASSERT(sc);
+
+ sc->sc_flags |= PFSYNCF_PUSH;
+ swi_sched(V_pfsync_swi_cookie, 0);
+}
+
+static void
+pfsyncintr(void *arg)
+{
+ struct pfsync_softc *sc = arg;
+ struct mbuf *m, *n;
+
+ CURVNET_SET(sc->sc_ifp->if_vnet);
+
+ PFSYNC_LOCK(sc);
+ if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
+ pfsync_sendout(0);
+ sc->sc_flags &= ~PFSYNCF_PUSH;
+ }
+ _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
+ PFSYNC_UNLOCK(sc);
+
+ for (; m != NULL; m = n) {
+
+ n = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ /*
+ * We distinguish between a deferral packet and our
+ * own pfsync packet based on M_SKIP_FIREWALL
+ * flag. This is XXX.
+ */
+ if (m->m_flags & M_SKIP_FIREWALL)
+ ip_output(m, NULL, NULL, 0, NULL, NULL);
+ else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
+ NULL) == 0)
+ V_pfsyncstats.pfsyncs_opackets++;
+ else
+ V_pfsyncstats.pfsyncs_oerrors++;
+ }
+ CURVNET_RESTORE();
+}
+
+static int
+pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
+{
+ struct ip_moptions *imo = &sc->sc_imo;
+ int error;
+
+ if (!(ifp->if_flags & IFF_MULTICAST))
+ return (EADDRNOTAVAIL);
+
+ imo->imo_membership = (struct in_multi **)mship;
+ imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ imo->imo_multicast_vif = -1;
+
+ if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
+ &imo->imo_membership[0])) != 0) {
+ imo->imo_membership = NULL;
+ return (error);
+ }
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifp;
+ imo->imo_multicast_ttl = PFSYNC_DFLTTL;
+ imo->imo_multicast_loop = 0;
+
+ return (0);
+}
+
+static void
+pfsync_multicast_cleanup(struct pfsync_softc *sc)
+{
+ struct ip_moptions *imo = &sc->sc_imo;
+
+ in_leavegroup(imo->imo_membership[0], NULL);
+ free(imo->imo_membership, M_PFSYNC);
+ imo->imo_membership = NULL;
+ imo->imo_multicast_ifp = NULL;
+}
+
+#ifdef INET
+extern struct domain inetdomain;
+static struct protosw in_pfsync_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_PFSYNC,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = pfsync_input,
+ .pr_output = rip_output,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+#endif
+
+static void
+pfsync_pointers_init()
+{
+
+ PF_RULES_WLOCK();
+ pfsync_state_import_ptr = pfsync_state_import;
+ pfsync_insert_state_ptr = pfsync_insert_state;
+ pfsync_update_state_ptr = pfsync_update_state;
+ pfsync_delete_state_ptr = pfsync_delete_state;
+ pfsync_clear_states_ptr = pfsync_clear_states;
+ pfsync_defer_ptr = pfsync_defer;
+ PF_RULES_WUNLOCK();
+}
+
+static void
+pfsync_pointers_uninit()
+{
+
+ PF_RULES_WLOCK();
+ pfsync_state_import_ptr = NULL;
+ pfsync_insert_state_ptr = NULL;
+ pfsync_update_state_ptr = NULL;
+ pfsync_delete_state_ptr = NULL;
+ pfsync_clear_states_ptr = NULL;
+ pfsync_defer_ptr = NULL;
+ PF_RULES_WUNLOCK();
+}
+
+static void
+vnet_pfsync_init(const void *unused __unused)
+{
+ int error;
+
+ V_pfsync_cloner = if_clone_simple(pfsyncname,
+ pfsync_clone_create, pfsync_clone_destroy, 1);
+ error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif,
+ SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
+ if (error) {
+ if_clone_detach(V_pfsync_cloner);
+ log(LOG_INFO, "swi_add() failed in %s\n", __func__);
+ }
+}
+VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
+ vnet_pfsync_init, NULL);
+
+static void
+vnet_pfsync_uninit(const void *unused __unused)
+{
+
+ if_clone_detach(V_pfsync_cloner);
+ swi_remove(V_pfsync_swi_cookie);
+}
+/*
+ * Detach after pf is gone; otherwise we might touch pfsync memory
+ * from within pf after freeing pfsync.
+ */
+VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
+ vnet_pfsync_uninit, NULL);
+
+static int
+pfsync_init()
+{
+#ifdef INET
+ int error;
+
+ error = pf_proto_register(PF_INET, &in_pfsync_protosw);
+ if (error)
+ return (error);
+ error = ipproto_register(IPPROTO_PFSYNC);
+ if (error) {
+ pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+ return (error);
+ }
+#endif
+ pfsync_pointers_init();
+
+ return (0);
+}
+
+static void
+pfsync_uninit()
+{
+
+ pfsync_pointers_uninit();
+
+#ifdef INET
+ ipproto_unregister(IPPROTO_PFSYNC);
+ pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
+#endif
+}
+
+static int
+pfsync_modevent(module_t mod, int type, void *data)
+{
+ int error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = pfsync_init();
+ break;
+ case MOD_QUIESCE:
+ /*
+ * Module should not be unloaded due to race conditions.
+ */
+ error = EBUSY;
+ break;
+ case MOD_UNLOAD:
+ pfsync_uninit();
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+static moduledata_t pfsync_mod = {
+ pfsyncname,
+ pfsync_modevent,
+ 0
+};
+
+#define PFSYNC_MODVER 1
+
+/* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
+DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
+MODULE_VERSION(pfsync, PFSYNC_MODVER);
+MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
diff --git a/freebsd/sys/netpfil/pf/in4_cksum.c b/freebsd/sys/netpfil/pf/in4_cksum.c
new file mode 100644
index 00000000..19cc8ac4
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/in4_cksum.c
@@ -0,0 +1,122 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $FreeBSD$ */
+/* $OpenBSD: in4_cksum.c,v 1.7 2003/06/02 23:28:13 millert Exp $ */
+/* $KAME: in4_cksum.c,v 1.10 2001/11/30 10:06:15 itojun Exp $ */
+/* $NetBSD: in_cksum.c,v 1.13 1996/10/13 02:03:03 christos Exp $ */
+
+/*
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1988, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+#include <machine/in_cksum.h>
+
+#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; (void)ADDCARRY(sum);}
+
+int in4_cksum(struct mbuf *, u_int8_t, int, int);
+
+int
+in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
+{
+ union {
+ struct ipovly ipov;
+ u_int16_t w[10];
+ } u;
+ union {
+ u_int16_t s[2];
+ u_int32_t l;
+ } l_util;
+
+ u_int16_t *w;
+ int psum;
+ int sum = 0;
+
+ if (nxt != 0) {
+ /* pseudo header */
+ if (off < sizeof(struct ipovly))
+ panic("in4_cksum: offset too short");
+ if (m->m_len < sizeof(struct ip))
+ panic("in4_cksum: bad mbuf chain");
+ bzero(&u.ipov, sizeof(u.ipov));
+ u.ipov.ih_len = htons(len);
+ u.ipov.ih_pr = nxt;
+ u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
+ u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+ w = u.w;
+ /* assumes sizeof(ipov) == 20 */
+ sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
+ sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
+ }
+
+ psum = in_cksum_skip(m, len + off, off);
+ psum = ~psum & 0xffff;
+ sum += psum;
+ REDUCE;
+ return (~sum & 0xffff);
+}
diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c
new file mode 100644
index 00000000..7ac181b5
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf.c
@@ -0,0 +1,6657 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ * $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_bpf.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/hash.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/limits.h>
+#include <sys/mbuf.h>
+#include <sys/md5.h>
+#include <sys/random.h>
+#include <sys/refcount.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/ucred.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/route.h>
+#include <net/radix_mpath.h>
+#include <net/vnet.h>
+
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+#include <net/if_pfsync.h>
+
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/in_fib.h>
+#include <netinet/ip.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/scope6_var.h>
+#endif /* INET6 */
+
+#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+
+/*
+ * Global variables
+ */
+
+/* state tables */
+VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]);
+VNET_DEFINE(struct pf_palist, pf_pabuf);
+VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active);
+VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive);
+VNET_DEFINE(struct pf_kstatus, pf_status);
+
+VNET_DEFINE(u_int32_t, ticket_altqs_active);
+VNET_DEFINE(u_int32_t, ticket_altqs_inactive);
+VNET_DEFINE(int, altqs_inactive_open);
+VNET_DEFINE(u_int32_t, ticket_pabuf);
+
+VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx);
+#define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx)
+VNET_DEFINE(u_char, pf_tcp_secret[16]);
+#define V_pf_tcp_secret VNET(pf_tcp_secret)
+VNET_DEFINE(int, pf_tcp_secret_init);
+#define V_pf_tcp_secret_init VNET(pf_tcp_secret_init)
+VNET_DEFINE(int, pf_tcp_iss_off);
+#define V_pf_tcp_iss_off VNET(pf_tcp_iss_off)
+
+/*
+ * Queue for pf_intr() sends.
+ */
+static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
+struct pf_send_entry {
+ STAILQ_ENTRY(pf_send_entry) pfse_next;
+ struct mbuf *pfse_m;
+ enum {
+ PFSE_IP,
+ PFSE_IP6,
+ PFSE_ICMP,
+ PFSE_ICMP6,
+ } pfse_type;
+ struct {
+ int type;
+ int code;
+ int mtu;
+ } icmpopts;
+};
+
+STAILQ_HEAD(pf_send_head, pf_send_entry);
+static VNET_DEFINE(struct pf_send_head, pf_sendqueue);
+#define V_pf_sendqueue VNET(pf_sendqueue)
+
+static struct mtx pf_sendqueue_mtx;
+MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
+#define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx)
+#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx)
+
+/*
+ * Queue for pf_overload_task() tasks.
+ */
+struct pf_overload_entry {
+ SLIST_ENTRY(pf_overload_entry) next;
+ struct pf_addr addr;
+ sa_family_t af;
+ uint8_t dir;
+ struct pf_rule *rule;
+};
+
+SLIST_HEAD(pf_overload_head, pf_overload_entry);
+static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
+#define V_pf_overloadqueue VNET(pf_overloadqueue)
+static VNET_DEFINE(struct task, pf_overloadtask);
+#define V_pf_overloadtask VNET(pf_overloadtask)
+
+static struct mtx pf_overloadqueue_mtx;
+MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
+ "pf overload/flush queue", MTX_DEF);
+#define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx)
+#define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx)
+
+VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
+struct mtx pf_unlnkdrules_mtx;
+MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
+ MTX_DEF);
+
+static VNET_DEFINE(uma_zone_t, pf_sources_z);
+#define V_pf_sources_z VNET(pf_sources_z)
+uma_zone_t pf_mtag_z;
+VNET_DEFINE(uma_zone_t, pf_state_z);
+VNET_DEFINE(uma_zone_t, pf_state_key_z);
+
+VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
+#define PFID_CPUBITS 8
+#define PFID_CPUSHIFT (sizeof(uint64_t) * NBBY - PFID_CPUBITS)
+#define PFID_CPUMASK ((uint64_t)((1 << PFID_CPUBITS) - 1) << PFID_CPUSHIFT)
+#define PFID_MAXID (~PFID_CPUMASK)
+CTASSERT((1 << PFID_CPUBITS) >= MAXCPU);
+
+static void pf_src_tree_remove_state(struct pf_state *);
+static void pf_init_threshold(struct pf_threshold *, u_int32_t,
+ u_int32_t);
+static void pf_add_threshold(struct pf_threshold *);
+static int pf_check_threshold(struct pf_threshold *);
+
+static void pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
+ u_int16_t *, u_int16_t *, struct pf_addr *,
+ u_int16_t, u_int8_t, sa_family_t);
+static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
+ struct tcphdr *, struct pf_state_peer *);
+static void pf_change_icmp(struct pf_addr *, u_int16_t *,
+ struct pf_addr *, struct pf_addr *, u_int16_t,
+ u_int16_t *, u_int16_t *, u_int16_t *,
+ u_int16_t *, u_int8_t, sa_family_t);
+static void pf_send_tcp(struct mbuf *,
+ const struct pf_rule *, sa_family_t,
+ const struct pf_addr *, const struct pf_addr *,
+ u_int16_t, u_int16_t, u_int32_t, u_int32_t,
+ u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
+ u_int16_t, struct ifnet *);
+static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
+ sa_family_t, struct pf_rule *);
+static void pf_detach_state(struct pf_state *);
+static int pf_state_key_attach(struct pf_state_key *,
+ struct pf_state_key *, struct pf_state *);
+static void pf_state_key_detach(struct pf_state *, int);
+static int pf_state_key_ctor(void *, int, void *, int);
+static u_int32_t pf_tcp_iss(struct pf_pdesc *);
+static int pf_test_rule(struct pf_rule **, struct pf_state **,
+ int, struct pfi_kif *, struct mbuf *, int,
+ struct pf_pdesc *, struct pf_rule **,
+ struct pf_ruleset **, struct inpcb *);
+static int pf_create_state(struct pf_rule *, struct pf_rule *,
+ struct pf_rule *, struct pf_pdesc *,
+ struct pf_src_node *, struct pf_state_key *,
+ struct pf_state_key *, struct mbuf *, int,
+ u_int16_t, u_int16_t, int *, struct pfi_kif *,
+ struct pf_state **, int, u_int16_t, u_int16_t,
+ int);
+static int pf_test_fragment(struct pf_rule **, int,
+ struct pfi_kif *, struct mbuf *, void *,
+ struct pf_pdesc *, struct pf_rule **,
+ struct pf_ruleset **);
+static int pf_tcp_track_full(struct pf_state_peer *,
+ struct pf_state_peer *, struct pf_state **,
+ struct pfi_kif *, struct mbuf *, int,
+ struct pf_pdesc *, u_short *, int *);
+static int pf_tcp_track_sloppy(struct pf_state_peer *,
+ struct pf_state_peer *, struct pf_state **,
+ struct pf_pdesc *, u_short *);
+static int pf_test_state_tcp(struct pf_state **, int,
+ struct pfi_kif *, struct mbuf *, int,
+ void *, struct pf_pdesc *, u_short *);
+static int pf_test_state_udp(struct pf_state **, int,
+ struct pfi_kif *, struct mbuf *, int,
+ void *, struct pf_pdesc *);
+static int pf_test_state_icmp(struct pf_state **, int,
+ struct pfi_kif *, struct mbuf *, int,
+ void *, struct pf_pdesc *, u_short *);
+static int pf_test_state_other(struct pf_state **, int,
+ struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
+static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
+ sa_family_t);
+static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
+ sa_family_t);
+static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
+ int, u_int16_t);
+static int pf_check_proto_cksum(struct mbuf *, int, int,
+ u_int8_t, sa_family_t);
+static void pf_print_state_parts(struct pf_state *,
+ struct pf_state_key *, struct pf_state_key *);
+static int pf_addr_wrap_neq(struct pf_addr_wrap *,
+ struct pf_addr_wrap *);
+static struct pf_state *pf_find_state(struct pfi_kif *,
+ struct pf_state_key_cmp *, u_int);
+static int pf_src_connlimit(struct pf_state **);
+static void pf_overload_task(void *v, int pending);
+static int pf_insert_src_node(struct pf_src_node **,
+ struct pf_rule *, struct pf_addr *, sa_family_t);
+static u_int pf_purge_expired_states(u_int, int);
+static void pf_purge_unlinked_rules(void);
+static int pf_mtag_uminit(void *, int, int);
+static void pf_mtag_free(struct m_tag *);
+#ifdef INET
+static void pf_route(struct mbuf **, struct pf_rule *, int,
+ struct ifnet *, struct pf_state *,
+ struct pf_pdesc *);
+#endif /* INET */
+#ifdef INET6
+static void pf_change_a6(struct pf_addr *, u_int16_t *,
+ struct pf_addr *, u_int8_t);
+static void pf_route6(struct mbuf **, struct pf_rule *, int,
+ struct ifnet *, struct pf_state *,
+ struct pf_pdesc *);
+#endif /* INET6 */
+
+int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
+
+extern int pf_end_threads;
+
+VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
+
+#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \
+ (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
+
+#define STATE_LOOKUP(i, k, d, s, pd) \
+ do { \
+ (s) = pf_find_state((i), (k), (d)); \
+ if ((s) == NULL) \
+ return (PF_DROP); \
+ if (PACKET_LOOPED(pd)) \
+ return (PF_PASS); \
+ if ((d) == PF_OUT && \
+ (((s)->rule.ptr->rt == PF_ROUTETO && \
+ (s)->rule.ptr->direction == PF_OUT) || \
+ ((s)->rule.ptr->rt == PF_REPLYTO && \
+ (s)->rule.ptr->direction == PF_IN)) && \
+ (s)->rt_kif != NULL && \
+ (s)->rt_kif != (i)) \
+ return (PF_PASS); \
+ } while (0)
+
+#define BOUND_IFACE(r, k) \
+ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
+
+#define STATE_INC_COUNTERS(s) \
+ do { \
+ counter_u64_add(s->rule.ptr->states_cur, 1); \
+ counter_u64_add(s->rule.ptr->states_tot, 1); \
+ if (s->anchor.ptr != NULL) { \
+ counter_u64_add(s->anchor.ptr->states_cur, 1); \
+ counter_u64_add(s->anchor.ptr->states_tot, 1); \
+ } \
+ if (s->nat_rule.ptr != NULL) { \
+ counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
+ counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
+ } \
+ } while (0)
+
+#define STATE_DEC_COUNTERS(s) \
+ do { \
+ if (s->nat_rule.ptr != NULL) \
+ counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
+ if (s->anchor.ptr != NULL) \
+ counter_u64_add(s->anchor.ptr->states_cur, -1); \
+ counter_u64_add(s->rule.ptr->states_cur, -1); \
+ } while (0)
+
+static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
+VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
+VNET_DEFINE(struct pf_idhash *, pf_idhash);
+VNET_DEFINE(struct pf_srchash *, pf_srchash);
+
+SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
+
+u_long pf_hashmask;
+u_long pf_srchashmask;
+static u_long pf_hashsize;
+static u_long pf_srchashsize;
+
+SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
+ &pf_hashsize, 0, "Size of pf(4) states hashtable");
+SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
+ &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
+
+VNET_DEFINE(void *, pf_swi_cookie);
+
+VNET_DEFINE(uint32_t, pf_hashseed);
+#define V_pf_hashseed VNET(pf_hashseed)
+
+int
+pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
+{
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if (a->addr32[0] > b->addr32[0])
+ return (1);
+ if (a->addr32[0] < b->addr32[0])
+ return (-1);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (a->addr32[3] > b->addr32[3])
+ return (1);
+ if (a->addr32[3] < b->addr32[3])
+ return (-1);
+ if (a->addr32[2] > b->addr32[2])
+ return (1);
+ if (a->addr32[2] < b->addr32[2])
+ return (-1);
+ if (a->addr32[1] > b->addr32[1])
+ return (1);
+ if (a->addr32[1] < b->addr32[1])
+ return (-1);
+ if (a->addr32[0] > b->addr32[0])
+ return (1);
+ if (a->addr32[0] < b->addr32[0])
+ return (-1);
+ break;
+#endif /* INET6 */
+ default:
+ panic("%s: unknown address family %u", __func__, af);
+ }
+ return (0);
+}
+
+static __inline uint32_t
+pf_hashkey(struct pf_state_key *sk)
+{
+ uint32_t h;
+
+ h = murmur3_32_hash32((uint32_t *)sk,
+ sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
+ V_pf_hashseed);
+
+ return (h & pf_hashmask);
+}
+
+static __inline uint32_t
+pf_hashsrc(struct pf_addr *addr, sa_family_t af)
+{
+ uint32_t h;
+
+ switch (af) {
+ case AF_INET:
+ h = murmur3_32_hash32((uint32_t *)&addr->v4,
+ sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
+ break;
+ case AF_INET6:
+ h = murmur3_32_hash32((uint32_t *)&addr->v6,
+ sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
+ break;
+ default:
+ panic("%s: unknown address family %u", __func__, af);
+ }
+
+ return (h & pf_srchashmask);
+}
+
+#ifdef ALTQ
+static int
+pf_state_hash(struct pf_state *s)
+{
+ u_int32_t hv = (intptr_t)s / sizeof(*s);
+
+ hv ^= crc32(&s->src, sizeof(s->src));
+ hv ^= crc32(&s->dst, sizeof(s->dst));
+ if (hv == 0)
+ hv = 1;
+ return (hv);
+}
+#endif
+
+#ifdef INET6
+void
+pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ dst->addr32[0] = src->addr32[0];
+ break;
+#endif /* INET */
+ case AF_INET6:
+ dst->addr32[0] = src->addr32[0];
+ dst->addr32[1] = src->addr32[1];
+ dst->addr32[2] = src->addr32[2];
+ dst->addr32[3] = src->addr32[3];
+ break;
+ }
+}
+#endif /* INET6 */
+
+static void
+pf_init_threshold(struct pf_threshold *threshold,
+ u_int32_t limit, u_int32_t seconds)
+{
+ threshold->limit = limit * PF_THRESHOLD_MULT;
+ threshold->seconds = seconds;
+ threshold->count = 0;
+ threshold->last = time_uptime;
+}
+
+static void
+pf_add_threshold(struct pf_threshold *threshold)
+{
+ u_int32_t t = time_uptime, diff = t - threshold->last;
+
+ if (diff >= threshold->seconds)
+ threshold->count = 0;
+ else
+ threshold->count -= threshold->count * diff /
+ threshold->seconds;
+ threshold->count += PF_THRESHOLD_MULT;
+ threshold->last = t;
+}
+
+static int
+pf_check_threshold(struct pf_threshold *threshold)
+{
+ return (threshold->count > threshold->limit);
+}
+
+static int
+pf_src_connlimit(struct pf_state **state)
+{
+ struct pf_overload_entry *pfoe;
+ int bad = 0;
+
+ PF_STATE_LOCK_ASSERT(*state);
+
+ (*state)->src_node->conn++;
+ (*state)->src.tcp_est = 1;
+ pf_add_threshold(&(*state)->src_node->conn_rate);
+
+ if ((*state)->rule.ptr->max_src_conn &&
+ (*state)->rule.ptr->max_src_conn <
+ (*state)->src_node->conn) {
+ counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
+ bad++;
+ }
+
+ if ((*state)->rule.ptr->max_src_conn_rate.limit &&
+ pf_check_threshold(&(*state)->src_node->conn_rate)) {
+ counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
+ bad++;
+ }
+
+ if (!bad)
+ return (0);
+
+ /* Kill this state. */
+ (*state)->timeout = PFTM_PURGE;
+ (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+
+ if ((*state)->rule.ptr->overload_tbl == NULL)
+ return (1);
+
+ /* Schedule overloading and flushing task. */
+ pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
+ if (pfoe == NULL)
+ return (1); /* too bad :( */
+
+ bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
+ pfoe->af = (*state)->key[PF_SK_WIRE]->af;
+ pfoe->rule = (*state)->rule.ptr;
+ pfoe->dir = (*state)->direction;
+ PF_OVERLOADQ_LOCK();
+ SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
+ PF_OVERLOADQ_UNLOCK();
+ taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
+
+ return (1);
+}
+
+static void
+pf_overload_task(void *v, int pending)
+{
+ struct pf_overload_head queue;
+ struct pfr_addr p;
+ struct pf_overload_entry *pfoe, *pfoe1;
+ uint32_t killed = 0;
+
+ CURVNET_SET((struct vnet *)v);
+
+ PF_OVERLOADQ_LOCK();
+ queue = V_pf_overloadqueue;
+ SLIST_INIT(&V_pf_overloadqueue);
+ PF_OVERLOADQ_UNLOCK();
+
+ bzero(&p, sizeof(p));
+ SLIST_FOREACH(pfoe, &queue, next) {
+ counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("%s: blocking address ", __func__);
+ pf_print_host(&pfoe->addr, 0, pfoe->af);
+ printf("\n");
+ }
+
+ p.pfra_af = pfoe->af;
+ switch (pfoe->af) {
+#ifdef INET
+ case AF_INET:
+ p.pfra_net = 32;
+ p.pfra_ip4addr = pfoe->addr.v4;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ p.pfra_net = 128;
+ p.pfra_ip6addr = pfoe->addr.v6;
+ break;
+#endif
+ }
+
+ PF_RULES_WLOCK();
+ pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
+ PF_RULES_WUNLOCK();
+ }
+
+ /*
+ * Remove those entries, that don't need flushing.
+ */
+ SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+ if (pfoe->rule->flush == 0) {
+ SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
+ free(pfoe, M_PFTEMP);
+ } else
+ counter_u64_add(
+ V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
+
+ /* If nothing to flush, return. */
+ if (SLIST_EMPTY(&queue)) {
+ CURVNET_RESTORE();
+ return;
+ }
+
+ for (int i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+ struct pf_state_key *sk;
+ struct pf_state *s;
+
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ sk = s->key[PF_SK_WIRE];
+ SLIST_FOREACH(pfoe, &queue, next)
+ if (sk->af == pfoe->af &&
+ ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
+ pfoe->rule == s->rule.ptr) &&
+ ((pfoe->dir == PF_OUT &&
+ PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
+ (pfoe->dir == PF_IN &&
+ PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
+ s->timeout = PFTM_PURGE;
+ s->src.state = s->dst.state = TCPS_CLOSED;
+ killed++;
+ }
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+ SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+ free(pfoe, M_PFTEMP);
+ if (V_pf_status.debug >= PF_DEBUG_MISC)
+ printf("%s: %u states killed", __func__, killed);
+
+ CURVNET_RESTORE();
+}
+
+/*
+ * Can return locked on failure, so that we can consistently
+ * allocate and insert a new one.
+ */
+struct pf_src_node *
+pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
+ int returnlocked)
+{
+ struct pf_srchash *sh;
+ struct pf_src_node *n;
+
+ counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
+
+ sh = &V_pf_srchash[pf_hashsrc(src, af)];
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH(n, &sh->nodes, entry)
+ if (n->rule.ptr == rule && n->af == af &&
+ ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
+ (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
+ break;
+ if (n != NULL) {
+ n->states++;
+ PF_HASHROW_UNLOCK(sh);
+ } else if (returnlocked == 0)
+ PF_HASHROW_UNLOCK(sh);
+
+ return (n);
+}
+
+static int
+pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
+ struct pf_addr *src, sa_family_t af)
+{
+
+ KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
+ rule->rpool.opts & PF_POOL_STICKYADDR),
+ ("%s for non-tracking rule %p", __func__, rule));
+
+ if (*sn == NULL)
+ *sn = pf_find_src_node(src, rule, af, 1);
+
+ if (*sn == NULL) {
+ struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
+
+ PF_HASHROW_ASSERT(sh);
+
+ if (!rule->max_src_nodes ||
+ counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
+ (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
+ else
+ counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES],
+ 1);
+ if ((*sn) == NULL) {
+ PF_HASHROW_UNLOCK(sh);
+ return (-1);
+ }
+
+ pf_init_threshold(&(*sn)->conn_rate,
+ rule->max_src_conn_rate.limit,
+ rule->max_src_conn_rate.seconds);
+
+ (*sn)->af = af;
+ (*sn)->rule.ptr = rule;
+ PF_ACPY(&(*sn)->addr, src, af);
+ LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
+ (*sn)->creation = time_uptime;
+ (*sn)->ruletype = rule->action;
+ (*sn)->states = 1;
+ if ((*sn)->rule.ptr != NULL)
+ counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
+ PF_HASHROW_UNLOCK(sh);
+ counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
+ } else {
+ if (rule->max_src_states &&
+ (*sn)->states >= rule->max_src_states) {
+ counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
+ 1);
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+void
+pf_unlink_src_node(struct pf_src_node *src)
+{
+
+ PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]);
+ LIST_REMOVE(src, entry);
+ if (src->rule.ptr)
+ counter_u64_add(src->rule.ptr->src_nodes, -1);
+}
+
+u_int
+pf_free_src_nodes(struct pf_src_node_list *head)
+{
+ struct pf_src_node *sn, *tmp;
+ u_int count = 0;
+
+ LIST_FOREACH_SAFE(sn, head, entry, tmp) {
+ uma_zfree(V_pf_sources_z, sn);
+ count++;
+ }
+
+ counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
+
+ return (count);
+}
+
+void
+pf_mtag_initialize()
+{
+
+ pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
+ sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
+ UMA_ALIGN_PTR, 0);
+}
+
+/* Per-vnet data storage structures initialization. */
+void
+pf_initialize()
+{
+ struct pf_keyhash *kh;
+ struct pf_idhash *ih;
+ struct pf_srchash *sh;
+ u_int i;
+
+ if (pf_hashsize == 0 || !powerof2(pf_hashsize))
+ pf_hashsize = PF_HASHSIZ;
+ if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
+ pf_srchashsize = PF_HASHSIZ / 4;
+
+ V_pf_hashseed = arc4random();
+
+ /* States and state keys storage. */
+ V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
+ uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
+ uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
+
+ V_pf_state_key_z = uma_zcreate("pf state keys",
+ sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash),
+ M_PFHASH, M_WAITOK | M_ZERO);
+ V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash),
+ M_PFHASH, M_WAITOK | M_ZERO);
+ pf_hashmask = pf_hashsize - 1;
+ for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
+ i++, kh++, ih++) {
+ mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
+ mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
+ }
+
+ /* Source nodes. */
+ V_pf_sources_z = uma_zcreate("pf source nodes",
+ sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
+ 0);
+ V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
+ uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
+ uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
+ V_pf_srchash = malloc(pf_srchashsize * sizeof(struct pf_srchash),
+ M_PFHASH, M_WAITOK|M_ZERO);
+ pf_srchashmask = pf_srchashsize - 1;
+ for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++)
+ mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
+
+ /* ALTQ */
+ TAILQ_INIT(&V_pf_altqs[0]);
+ TAILQ_INIT(&V_pf_altqs[1]);
+ TAILQ_INIT(&V_pf_pabuf);
+ V_pf_altqs_active = &V_pf_altqs[0];
+ V_pf_altqs_inactive = &V_pf_altqs[1];
+
+ /* Send & overload+flush queues. */
+ STAILQ_INIT(&V_pf_sendqueue);
+ SLIST_INIT(&V_pf_overloadqueue);
+ TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
+
+ /* Unlinked, but may be referenced rules. */
+ TAILQ_INIT(&V_pf_unlinked_rules);
+}
+
+void
+pf_mtag_cleanup()
+{
+
+ uma_zdestroy(pf_mtag_z);
+}
+
+void
+pf_cleanup()
+{
+ struct pf_keyhash *kh;
+ struct pf_idhash *ih;
+ struct pf_srchash *sh;
+ struct pf_send_entry *pfse, *next;
+ u_int i;
+
+ for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
+ i++, kh++, ih++) {
+ KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
+ __func__));
+ KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
+ __func__));
+ mtx_destroy(&kh->lock);
+ mtx_destroy(&ih->lock);
+ }
+ free(V_pf_keyhash, M_PFHASH);
+ free(V_pf_idhash, M_PFHASH);
+
+ for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
+ KASSERT(LIST_EMPTY(&sh->nodes),
+ ("%s: source node hash not empty", __func__));
+ mtx_destroy(&sh->lock);
+ }
+ free(V_pf_srchash, M_PFHASH);
+
+ STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
+ m_freem(pfse->pfse_m);
+ free(pfse, M_PFTEMP);
+ }
+
+ uma_zdestroy(V_pf_sources_z);
+ uma_zdestroy(V_pf_state_z);
+ uma_zdestroy(V_pf_state_key_z);
+}
+
+static int
+pf_mtag_uminit(void *mem, int size, int how)
+{
+ struct m_tag *t;
+
+ t = (struct m_tag *)mem;
+ t->m_tag_cookie = MTAG_ABI_COMPAT;
+ t->m_tag_id = PACKET_TAG_PF;
+ t->m_tag_len = sizeof(struct pf_mtag);
+ t->m_tag_free = pf_mtag_free;
+
+ return (0);
+}
+
+static void
+pf_mtag_free(struct m_tag *t)
+{
+
+ uma_zfree(pf_mtag_z, t);
+}
+
+struct pf_mtag *
+pf_get_mtag(struct mbuf *m)
+{
+ struct m_tag *mtag;
+
+ if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
+ return ((struct pf_mtag *)(mtag + 1));
+
+ mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
+ if (mtag == NULL)
+ return (NULL);
+ bzero(mtag + 1, sizeof(struct pf_mtag));
+ m_tag_prepend(m, mtag);
+
+ return ((struct pf_mtag *)(mtag + 1));
+}
+
+static int
+pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
+ struct pf_state *s)
+{
+ struct pf_keyhash *khs, *khw, *kh;
+ struct pf_state_key *sk, *cur;
+ struct pf_state *si, *olds = NULL;
+ int idx;
+
+ KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
+ KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
+ KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
+
+ /*
+ * We need to lock hash slots of both keys. To avoid deadlock
+ * we always lock the slot with lower address first. Unlock order
+ * isn't important.
+ *
+ * We also need to lock ID hash slot before dropping key
+ * locks. On success we return with ID hash slot locked.
+ */
+
+ if (skw == sks) {
+ khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
+ PF_HASHROW_LOCK(khs);
+ } else {
+ khs = &V_pf_keyhash[pf_hashkey(sks)];
+ khw = &V_pf_keyhash[pf_hashkey(skw)];
+ if (khs == khw) {
+ PF_HASHROW_LOCK(khs);
+ } else if (khs < khw) {
+ PF_HASHROW_LOCK(khs);
+ PF_HASHROW_LOCK(khw);
+ } else {
+ PF_HASHROW_LOCK(khw);
+ PF_HASHROW_LOCK(khs);
+ }
+ }
+
+#define KEYS_UNLOCK() do { \
+ if (khs != khw) { \
+ PF_HASHROW_UNLOCK(khs); \
+ PF_HASHROW_UNLOCK(khw); \
+ } else \
+ PF_HASHROW_UNLOCK(khs); \
+} while (0)
+
+ /*
+ * First run: start with wire key.
+ */
+ sk = skw;
+ kh = khw;
+ idx = PF_SK_WIRE;
+
+keyattach:
+ LIST_FOREACH(cur, &kh->keys, entry)
+ if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
+ break;
+
+ if (cur != NULL) {
+ /* Key exists. Check for same kif, if none, add to key. */
+ TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
+ struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
+
+ PF_HASHROW_LOCK(ih);
+ if (si->kif == s->kif &&
+ si->direction == s->direction) {
+ if (sk->proto == IPPROTO_TCP &&
+ si->src.state >= TCPS_FIN_WAIT_2 &&
+ si->dst.state >= TCPS_FIN_WAIT_2) {
+ /*
+ * New state matches an old >FIN_WAIT_2
+ * state. We can't drop key hash locks,
+ * thus we can't unlink it properly.
+ *
+ * As a workaround we drop it into
+ * TCPS_CLOSED state, schedule purge
+ * ASAP and push it into the very end
+ * of the slot TAILQ, so that it won't
+ * conflict with our new state.
+ */
+ si->src.state = si->dst.state =
+ TCPS_CLOSED;
+ si->timeout = PFTM_PURGE;
+ olds = si;
+ } else {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: %s key attach "
+ "failed on %s: ",
+ (idx == PF_SK_WIRE) ?
+ "wire" : "stack",
+ s->kif->pfik_name);
+ pf_print_state_parts(s,
+ (idx == PF_SK_WIRE) ?
+ sk : NULL,
+ (idx == PF_SK_STACK) ?
+ sk : NULL);
+ printf(", existing: ");
+ pf_print_state_parts(si,
+ (idx == PF_SK_WIRE) ?
+ sk : NULL,
+ (idx == PF_SK_STACK) ?
+ sk : NULL);
+ printf("\n");
+ }
+ PF_HASHROW_UNLOCK(ih);
+ KEYS_UNLOCK();
+ uma_zfree(V_pf_state_key_z, sk);
+ if (idx == PF_SK_STACK)
+ pf_detach_state(s);
+ return (EEXIST); /* collision! */
+ }
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+ uma_zfree(V_pf_state_key_z, sk);
+ s->key[idx] = cur;
+ } else {
+ LIST_INSERT_HEAD(&kh->keys, sk, entry);
+ s->key[idx] = sk;
+ }
+
+stateattach:
+ /* List is sorted, if-bound states before floating. */
+ if (s->kif == V_pfi_all)
+ TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
+ else
+ TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
+
+ if (olds) {
+ TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
+ TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
+ key_list[idx]);
+ olds = NULL;
+ }
+
+ /*
+ * Attach done. See how should we (or should not?)
+ * attach a second key.
+ */
+ if (sks == skw) {
+ s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
+ idx = PF_SK_STACK;
+ sks = NULL;
+ goto stateattach;
+ } else if (sks != NULL) {
+ /*
+ * Continue attaching with stack key.
+ */
+ sk = sks;
+ kh = khs;
+ idx = PF_SK_STACK;
+ sks = NULL;
+ goto keyattach;
+ }
+
+ PF_STATE_LOCK(s);
+ KEYS_UNLOCK();
+
+ KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
+ ("%s failure", __func__));
+
+ return (0);
+#undef KEYS_UNLOCK
+}
+
+static void
+pf_detach_state(struct pf_state *s)
+{
+ struct pf_state_key *sks = s->key[PF_SK_STACK];
+ struct pf_keyhash *kh;
+
+ if (sks != NULL) {
+ kh = &V_pf_keyhash[pf_hashkey(sks)];
+ PF_HASHROW_LOCK(kh);
+ if (s->key[PF_SK_STACK] != NULL)
+ pf_state_key_detach(s, PF_SK_STACK);
+ /*
+ * If both point to same key, then we are done.
+ */
+ if (sks == s->key[PF_SK_WIRE]) {
+ pf_state_key_detach(s, PF_SK_WIRE);
+ PF_HASHROW_UNLOCK(kh);
+ return;
+ }
+ PF_HASHROW_UNLOCK(kh);
+ }
+
+ if (s->key[PF_SK_WIRE] != NULL) {
+ kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
+ PF_HASHROW_LOCK(kh);
+ if (s->key[PF_SK_WIRE] != NULL)
+ pf_state_key_detach(s, PF_SK_WIRE);
+ PF_HASHROW_UNLOCK(kh);
+ }
+}
+
+static void
+pf_state_key_detach(struct pf_state *s, int idx)
+{
+ struct pf_state_key *sk = s->key[idx];
+#ifdef INVARIANTS
+ struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
+
+ PF_HASHROW_ASSERT(kh);
+#endif
+ TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
+ s->key[idx] = NULL;
+
+ if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
+ LIST_REMOVE(sk, entry);
+ uma_zfree(V_pf_state_key_z, sk);
+ }
+}
+
+static int
+pf_state_key_ctor(void *mem, int size, void *arg, int flags)
+{
+ struct pf_state_key *sk = mem;
+
+ bzero(sk, sizeof(struct pf_state_key_cmp));
+ TAILQ_INIT(&sk->states[PF_SK_WIRE]);
+ TAILQ_INIT(&sk->states[PF_SK_STACK]);
+
+ return (0);
+}
+
+struct pf_state_key *
+pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
+ struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
+{
+ struct pf_state_key *sk;
+
+ sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+ if (sk == NULL)
+ return (NULL);
+
+ PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
+ PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
+ sk->port[pd->sidx] = sport;
+ sk->port[pd->didx] = dport;
+ sk->proto = pd->proto;
+ sk->af = pd->af;
+
+ return (sk);
+}
+
+struct pf_state_key *
+pf_state_key_clone(struct pf_state_key *orig)
+{
+ struct pf_state_key *sk;
+
+ sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+ if (sk == NULL)
+ return (NULL);
+
+ bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
+
+ return (sk);
+}
+
+int
+pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
+ struct pf_state_key *sks, struct pf_state *s)
+{
+ struct pf_idhash *ih;
+ struct pf_state *cur;
+ int error;
+
+ KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
+ ("%s: sks not pristine", __func__));
+ KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
+ ("%s: skw not pristine", __func__));
+ KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
+
+ s->kif = kif;
+
+ if (s->id == 0 && s->creatorid == 0) {
+ /* XXX: should be atomic, but probability of collision low */
+ if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
+ V_pf_stateid[curcpu] = 1;
+ s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
+ s->id = htobe64(s->id);
+ s->creatorid = V_pf_status.hostid;
+ }
+
+ /* Returns with ID locked on success. */
+ if ((error = pf_state_key_attach(skw, sks, s)) != 0)
+ return (error);
+
+ ih = &V_pf_idhash[PF_IDHASH(s)];
+ PF_HASHROW_ASSERT(ih);
+ LIST_FOREACH(cur, &ih->states, entry)
+ if (cur->id == s->id && cur->creatorid == s->creatorid)
+ break;
+
+ if (cur != NULL) {
+ PF_HASHROW_UNLOCK(ih);
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: state ID collision: "
+ "id: %016llx creatorid: %08x\n",
+ (unsigned long long)be64toh(s->id),
+ ntohl(s->creatorid));
+ }
+ pf_detach_state(s);
+ return (EEXIST);
+ }
+ LIST_INSERT_HEAD(&ih->states, s, entry);
+ /* One for keys, one for ID hash. */
+ refcount_init(&s->refs, 2);
+
+ counter_u64_add(V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
+ if (pfsync_insert_state_ptr != NULL)
+ pfsync_insert_state_ptr(s);
+
+ /* Returns locked. */
+ return (0);
+}
+
+/*
+ * Find state by ID: returns with locked row on success.
+ */
+struct pf_state *
+pf_find_state_byid(uint64_t id, uint32_t creatorid)
+{
+ struct pf_idhash *ih;
+ struct pf_state *s;
+
+ counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
+
+ ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))];
+
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry)
+ if (s->id == id && s->creatorid == creatorid)
+ break;
+
+ if (s == NULL)
+ PF_HASHROW_UNLOCK(ih);
+
+ return (s);
+}
+
+/*
+ * Find state by key.
+ * Returns with ID hash slot locked on success.
+ */
+static struct pf_state *
+pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
+{
+ struct pf_keyhash *kh;
+ struct pf_state_key *sk;
+ struct pf_state *s;
+ int idx;
+
+ counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
+
+ kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
+
+ PF_HASHROW_LOCK(kh);
+ LIST_FOREACH(sk, &kh->keys, entry)
+ if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
+ break;
+ if (sk == NULL) {
+ PF_HASHROW_UNLOCK(kh);
+ return (NULL);
+ }
+
+ idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
+
+ /* List is sorted, if-bound states before floating ones. */
+ TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
+ if (s->kif == V_pfi_all || s->kif == kif) {
+ PF_STATE_LOCK(s);
+ PF_HASHROW_UNLOCK(kh);
+ if (s->timeout >= PFTM_MAX) {
+ /*
+ * State is either being processed by
+ * pf_unlink_state() in an other thread, or
+ * is scheduled for immediate expiry.
+ */
+ PF_STATE_UNLOCK(s);
+ return (NULL);
+ }
+ return (s);
+ }
+ PF_HASHROW_UNLOCK(kh);
+
+ return (NULL);
+}
+
+struct pf_state *
+pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
+{
+ struct pf_keyhash *kh;
+ struct pf_state_key *sk;
+ struct pf_state *s, *ret = NULL;
+ int idx, inout = 0;
+
+ counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
+
+ kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
+
+ PF_HASHROW_LOCK(kh);
+ LIST_FOREACH(sk, &kh->keys, entry)
+ if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
+ break;
+ if (sk == NULL) {
+ PF_HASHROW_UNLOCK(kh);
+ return (NULL);
+ }
+ switch (dir) {
+ case PF_IN:
+ idx = PF_SK_WIRE;
+ break;
+ case PF_OUT:
+ idx = PF_SK_STACK;
+ break;
+ case PF_INOUT:
+ idx = PF_SK_WIRE;
+ inout = 1;
+ break;
+ default:
+ panic("%s: dir %u", __func__, dir);
+ }
+second_run:
+ TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
+ if (more == NULL) {
+ PF_HASHROW_UNLOCK(kh);
+ return (s);
+ }
+
+ if (ret)
+ (*more)++;
+ else
+ ret = s;
+ }
+ if (inout == 1) {
+ inout = 0;
+ idx = PF_SK_STACK;
+ goto second_run;
+ }
+ PF_HASHROW_UNLOCK(kh);
+
+ return (ret);
+}
+
+/* END state table stuff */
+
+static void
+pf_send(struct pf_send_entry *pfse)
+{
+
+ PF_SENDQ_LOCK();
+ STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
+ PF_SENDQ_UNLOCK();
+ swi_sched(V_pf_swi_cookie, 0);
+}
+
+void
+pf_intr(void *v)
+{
+ struct pf_send_head queue;
+ struct pf_send_entry *pfse, *next;
+
+ CURVNET_SET((struct vnet *)v);
+
+ PF_SENDQ_LOCK();
+ queue = V_pf_sendqueue;
+ STAILQ_INIT(&V_pf_sendqueue);
+ PF_SENDQ_UNLOCK();
+
+ STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
+ switch (pfse->pfse_type) {
+#ifdef INET
+ case PFSE_IP:
+ ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
+ break;
+ case PFSE_ICMP:
+ icmp_error(pfse->pfse_m, pfse->icmpopts.type,
+ pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case PFSE_IP6:
+ ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
+ NULL);
+ break;
+ case PFSE_ICMP6:
+ icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
+ pfse->icmpopts.code, pfse->icmpopts.mtu);
+ break;
+#endif /* INET6 */
+ default:
+ panic("%s: unknown type", __func__);
+ }
+ free(pfse, M_PFTEMP);
+ }
+ CURVNET_RESTORE();
+}
+
+void
+pf_purge_thread(void *unused __unused)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ u_int idx = 0;
+
+ for (;;) {
+ PF_RULES_RLOCK();
+ rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
+ PF_RULES_RUNLOCK();
+
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+
+ if (pf_end_threads) {
+ pf_end_threads++;
+ wakeup(pf_purge_thread);
+ kproc_exit(0);
+ }
+
+ /* Process 1/interval fraction of the state table every run. */
+ idx = pf_purge_expired_states(idx, pf_hashmask /
+ (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
+
+ /* Purge other expired types every PFTM_INTERVAL seconds. */
+ if (idx == 0) {
+ /*
+ * Order is important:
+ * - states and src nodes reference rules
+ * - states and rules reference kifs
+ */
+ pf_purge_expired_fragments();
+ pf_purge_expired_src_nodes();
+ pf_purge_unlinked_rules();
+ pfi_kif_purge();
+ }
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK();
+ }
+ /* not reached */
+}
+
+void
+pf_unload_vnet_purge(void)
+{
+
+ /*
+ * To cleanse up all kifs and rules we need
+ * two runs: first one clears reference flags,
+ * then pf_purge_expired_states() doesn't
+ * raise them, and then second run frees.
+ */
+ pf_purge_unlinked_rules();
+ pfi_kif_purge();
+
+ /*
+ * Now purge everything.
+ */
+ pf_purge_expired_states(0, pf_hashmask);
+ pf_purge_expired_fragments();
+ pf_purge_expired_src_nodes();
+
+ /*
+ * Now all kifs & rules should be unreferenced,
+ * thus should be successfully freed.
+ */
+ pf_purge_unlinked_rules();
+ pfi_kif_purge();
+}
+
+
+u_int32_t
+pf_state_expires(const struct pf_state *state)
+{
+ u_int32_t timeout;
+ u_int32_t start;
+ u_int32_t end;
+ u_int32_t states;
+
+ /* handle all PFTM_* > PFTM_MAX here */
+ if (state->timeout == PFTM_PURGE)
+ return (time_uptime);
+ KASSERT(state->timeout != PFTM_UNLINKED,
+ ("pf_state_expires: timeout == PFTM_UNLINKED"));
+ KASSERT((state->timeout < PFTM_MAX),
+ ("pf_state_expires: timeout > PFTM_MAX"));
+ timeout = state->rule.ptr->timeout[state->timeout];
+ if (!timeout)
+ timeout = V_pf_default_rule.timeout[state->timeout];
+ start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
+ if (start) {
+ end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
+ states = counter_u64_fetch(state->rule.ptr->states_cur);
+ } else {
+ start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
+ end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
+ states = V_pf_status.states;
+ }
+ if (end && states > start && start < end) {
+ if (states < end)
+ return (state->expire + timeout * (end - states) /
+ (end - start));
+ else
+ return (time_uptime);
+ }
+ return (state->expire + timeout);
+}
+
+void
+pf_purge_expired_src_nodes()
+{
+ struct pf_src_node_list freelist;
+ struct pf_srchash *sh;
+ struct pf_src_node *cur, *next;
+ int i;
+
+ LIST_INIT(&freelist);
+ for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
+ if (cur->states == 0 && cur->expire <= time_uptime) {
+ pf_unlink_src_node(cur);
+ LIST_INSERT_HEAD(&freelist, cur, entry);
+ } else if (cur->rule.ptr != NULL)
+ cur->rule.ptr->rule_flag |= PFRULE_REFS;
+ PF_HASHROW_UNLOCK(sh);
+ }
+
+ pf_free_src_nodes(&freelist);
+
+ V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
+}
+
+static void
+pf_src_tree_remove_state(struct pf_state *s)
+{
+ struct pf_src_node *sn;
+ struct pf_srchash *sh;
+ uint32_t timeout;
+
+ timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ?
+ s->rule.ptr->timeout[PFTM_SRC_NODE] :
+ V_pf_default_rule.timeout[PFTM_SRC_NODE];
+
+ if (s->src_node != NULL) {
+ sn = s->src_node;
+ sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
+ PF_HASHROW_LOCK(sh);
+ if (s->src.tcp_est)
+ --sn->conn;
+ if (--sn->states == 0)
+ sn->expire = time_uptime + timeout;
+ PF_HASHROW_UNLOCK(sh);
+ }
+ if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
+ sn = s->nat_src_node;
+ sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
+ PF_HASHROW_LOCK(sh);
+ if (--sn->states == 0)
+ sn->expire = time_uptime + timeout;
+ PF_HASHROW_UNLOCK(sh);
+ }
+ s->src_node = s->nat_src_node = NULL;
+}
+
+/*
+ * Unlink and potentilly free a state. Function may be
+ * called with ID hash row locked, but always returns
+ * unlocked, since it needs to go through key hash locking.
+ */
+int
+pf_unlink_state(struct pf_state *s, u_int flags)
+{
+ struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
+
+ if ((flags & PF_ENTER_LOCKED) == 0)
+ PF_HASHROW_LOCK(ih);
+ else
+ PF_HASHROW_ASSERT(ih);
+
+ if (s->timeout == PFTM_UNLINKED) {
+ /*
+ * State is being processed
+ * by pf_unlink_state() in
+ * an other thread.
+ */
+ PF_HASHROW_UNLOCK(ih);
+ return (0); /* XXXGL: undefined actually */
+ }
+
+ if (s->src.state == PF_TCPS_PROXY_DST) {
+ /* XXX wire key the right one? */
+ pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
+ &s->key[PF_SK_WIRE]->addr[1],
+ &s->key[PF_SK_WIRE]->addr[0],
+ s->key[PF_SK_WIRE]->port[1],
+ s->key[PF_SK_WIRE]->port[0],
+ s->src.seqhi, s->src.seqlo + 1,
+ TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
+ }
+
+ LIST_REMOVE(s, entry);
+ pf_src_tree_remove_state(s);
+
+ if (pfsync_delete_state_ptr != NULL)
+ pfsync_delete_state_ptr(s);
+
+ STATE_DEC_COUNTERS(s);
+
+ s->timeout = PFTM_UNLINKED;
+
+ PF_HASHROW_UNLOCK(ih);
+
+ pf_detach_state(s);
+ refcount_release(&s->refs);
+
+ return (pf_release_state(s));
+}
+
+void
+pf_free_state(struct pf_state *cur)
+{
+
+ KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
+ KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
+ cur->timeout));
+
+ pf_normalize_tcp_cleanup(cur);
+ uma_zfree(V_pf_state_z, cur);
+ counter_u64_add(V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
+}
+
+/*
+ * Called only from pf_purge_thread(), thus serialized.
+ */
+static u_int
+pf_purge_expired_states(u_int i, int maxcheck)
+{
+ struct pf_idhash *ih;
+ struct pf_state *s;
+
+ V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+
+ /*
+ * Go through hash and unlink states that expire now.
+ */
+ while (maxcheck > 0) {
+
+ ih = &V_pf_idhash[i];
+relock:
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ if (pf_state_expires(s) <= time_uptime) {
+ V_pf_status.states -=
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ goto relock;
+ }
+ s->rule.ptr->rule_flag |= PFRULE_REFS;
+ if (s->nat_rule.ptr != NULL)
+ s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
+ if (s->anchor.ptr != NULL)
+ s->anchor.ptr->rule_flag |= PFRULE_REFS;
+ s->kif->pfik_flags |= PFI_IFLAG_REFS;
+ if (s->rt_kif)
+ s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
+ }
+ PF_HASHROW_UNLOCK(ih);
+
+ /* Return when we hit end of hash. */
+ if (++i > pf_hashmask) {
+ V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+ return (0);
+ }
+
+ maxcheck--;
+ }
+
+ V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
+
+ return (i);
+}
+
+static void
+pf_purge_unlinked_rules()
+{
+ struct pf_rulequeue tmpq;
+ struct pf_rule *r, *r1;
+
+ /*
+ * If we have overloading task pending, then we'd
+ * better skip purging this time. There is a tiny
+ * probability that overloading task references
+ * an already unlinked rule.
+ */
+ PF_OVERLOADQ_LOCK();
+ if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
+ PF_OVERLOADQ_UNLOCK();
+ return;
+ }
+ PF_OVERLOADQ_UNLOCK();
+
+ /*
+ * Do naive mark-and-sweep garbage collecting of old rules.
+ * Reference flag is raised by pf_purge_expired_states()
+ * and pf_purge_expired_src_nodes().
+ *
+ * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
+ * use a temporary queue.
+ */
+ TAILQ_INIT(&tmpq);
+ PF_UNLNKDRULES_LOCK();
+ TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
+ if (!(r->rule_flag & PFRULE_REFS)) {
+ TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
+ TAILQ_INSERT_TAIL(&tmpq, r, entries);
+ } else
+ r->rule_flag &= ~PFRULE_REFS;
+ }
+ PF_UNLNKDRULES_UNLOCK();
+
+ if (!TAILQ_EMPTY(&tmpq)) {
+ PF_RULES_WLOCK();
+ TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
+ TAILQ_REMOVE(&tmpq, r, entries);
+ pf_free_rule(r);
+ }
+ PF_RULES_WUNLOCK();
+ }
+}
+
+void
+pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET: {
+ u_int32_t a = ntohl(addr->addr32[0]);
+ printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
+ (a>>8)&255, a&255);
+ if (p) {
+ p = ntohs(p);
+ printf(":%u", p);
+ }
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: {
+ u_int16_t b;
+ u_int8_t i, curstart, curend, maxstart, maxend;
+ curstart = curend = maxstart = maxend = 255;
+ for (i = 0; i < 8; i++) {
+ if (!addr->addr16[i]) {
+ if (curstart == 255)
+ curstart = i;
+ curend = i;
+ } else {
+ if ((curend - curstart) >
+ (maxend - maxstart)) {
+ maxstart = curstart;
+ maxend = curend;
+ }
+ curstart = curend = 255;
+ }
+ }
+ if ((curend - curstart) >
+ (maxend - maxstart)) {
+ maxstart = curstart;
+ maxend = curend;
+ }
+ for (i = 0; i < 8; i++) {
+ if (i >= maxstart && i <= maxend) {
+ if (i == 0)
+ printf(":");
+ if (i == maxend)
+ printf(":");
+ } else {
+ b = ntohs(addr->addr16[i]);
+ printf("%x", b);
+ if (i < 7)
+ printf(":");
+ }
+ }
+ if (p) {
+ p = ntohs(p);
+ printf("[%u]", p);
+ }
+ break;
+ }
+#endif /* INET6 */
+ }
+}
+
+void
+pf_print_state(struct pf_state *s)
+{
+ pf_print_state_parts(s, NULL, NULL);
+}
+
+static void
+pf_print_state_parts(struct pf_state *s,
+ struct pf_state_key *skwp, struct pf_state_key *sksp)
+{
+ struct pf_state_key *skw, *sks;
+ u_int8_t proto, dir;
+
+ /* Do our best to fill these, but they're skipped if NULL */
+ skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
+ sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
+ proto = skw ? skw->proto : (sks ? sks->proto : 0);
+ dir = s ? s->direction : 0;
+
+ switch (proto) {
+ case IPPROTO_IPV4:
+ printf("IPv4");
+ break;
+ case IPPROTO_IPV6:
+ printf("IPv6");
+ break;
+ case IPPROTO_TCP:
+ printf("TCP");
+ break;
+ case IPPROTO_UDP:
+ printf("UDP");
+ break;
+ case IPPROTO_ICMP:
+ printf("ICMP");
+ break;
+ case IPPROTO_ICMPV6:
+ printf("ICMPv6");
+ break;
+ default:
+ printf("%u", proto);
+ break;
+ }
+ switch (dir) {
+ case PF_IN:
+ printf(" in");
+ break;
+ case PF_OUT:
+ printf(" out");
+ break;
+ }
+ if (skw) {
+ printf(" wire: ");
+ pf_print_host(&skw->addr[0], skw->port[0], skw->af);
+ printf(" ");
+ pf_print_host(&skw->addr[1], skw->port[1], skw->af);
+ }
+ if (sks) {
+ printf(" stack: ");
+ if (sks != skw) {
+ pf_print_host(&sks->addr[0], sks->port[0], sks->af);
+ printf(" ");
+ pf_print_host(&sks->addr[1], sks->port[1], sks->af);
+ } else
+ printf("-");
+ }
+ if (s) {
+ if (proto == IPPROTO_TCP) {
+ printf(" [lo=%u high=%u win=%u modulator=%u",
+ s->src.seqlo, s->src.seqhi,
+ s->src.max_win, s->src.seqdiff);
+ if (s->src.wscale && s->dst.wscale)
+ printf(" wscale=%u",
+ s->src.wscale & PF_WSCALE_MASK);
+ printf("]");
+ printf(" [lo=%u high=%u win=%u modulator=%u",
+ s->dst.seqlo, s->dst.seqhi,
+ s->dst.max_win, s->dst.seqdiff);
+ if (s->src.wscale && s->dst.wscale)
+ printf(" wscale=%u",
+ s->dst.wscale & PF_WSCALE_MASK);
+ printf("]");
+ }
+ printf(" %u:%u", s->src.state, s->dst.state);
+ }
+}
+
+void
+pf_print_flags(u_int8_t f)
+{
+ if (f)
+ printf(" ");
+ if (f & TH_FIN)
+ printf("F");
+ if (f & TH_SYN)
+ printf("S");
+ if (f & TH_RST)
+ printf("R");
+ if (f & TH_PUSH)
+ printf("P");
+ if (f & TH_ACK)
+ printf("A");
+ if (f & TH_URG)
+ printf("U");
+ if (f & TH_ECE)
+ printf("E");
+ if (f & TH_CWR)
+ printf("W");
+}
+
+#define PF_SET_SKIP_STEPS(i) \
+ do { \
+ while (head[i] != cur) { \
+ head[i]->skip[i].ptr = cur; \
+ head[i] = TAILQ_NEXT(head[i], entries); \
+ } \
+ } while (0)
+
+void
+pf_calc_skip_steps(struct pf_rulequeue *rules)
+{
+ struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
+ int i;
+
+ cur = TAILQ_FIRST(rules);
+ prev = cur;
+ for (i = 0; i < PF_SKIP_COUNT; ++i)
+ head[i] = cur;
+ while (cur != NULL) {
+
+ if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
+ PF_SET_SKIP_STEPS(PF_SKIP_IFP);
+ if (cur->direction != prev->direction)
+ PF_SET_SKIP_STEPS(PF_SKIP_DIR);
+ if (cur->af != prev->af)
+ PF_SET_SKIP_STEPS(PF_SKIP_AF);
+ if (cur->proto != prev->proto)
+ PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
+ if (cur->src.neg != prev->src.neg ||
+ pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
+ PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
+ if (cur->src.port[0] != prev->src.port[0] ||
+ cur->src.port[1] != prev->src.port[1] ||
+ cur->src.port_op != prev->src.port_op)
+ PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
+ if (cur->dst.neg != prev->dst.neg ||
+ pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
+ PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
+ if (cur->dst.port[0] != prev->dst.port[0] ||
+ cur->dst.port[1] != prev->dst.port[1] ||
+ cur->dst.port_op != prev->dst.port_op)
+ PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
+
+ prev = cur;
+ cur = TAILQ_NEXT(cur, entries);
+ }
+ for (i = 0; i < PF_SKIP_COUNT; ++i)
+ PF_SET_SKIP_STEPS(i);
+}
+
+static int
+pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
+{
+ if (aw1->type != aw2->type)
+ return (1);
+ switch (aw1->type) {
+ case PF_ADDR_ADDRMASK:
+ case PF_ADDR_RANGE:
+ if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
+ return (1);
+ if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
+ return (1);
+ return (0);
+ case PF_ADDR_DYNIFTL:
+ return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
+ case PF_ADDR_NOROUTE:
+ case PF_ADDR_URPFFAILED:
+ return (0);
+ case PF_ADDR_TABLE:
+ return (aw1->p.tbl != aw2->p.tbl);
+ default:
+ printf("invalid address type: %d\n", aw1->type);
+ return (1);
+ }
+}
+
+/**
+ * Checksum updates are a little complicated because the checksum in the TCP/UDP
+ * header isn't always a full checksum. In some cases (i.e. output) it's a
+ * pseudo-header checksum, which is a partial checksum over src/dst IP
+ * addresses, protocol number and length.
+ *
+ * That means we have the following cases:
+ * * Input or forwarding: we don't have TSO, the checksum fields are full
+ * checksums, we need to update the checksum whenever we change anything.
+ * * Output (i.e. the checksum is a pseudo-header checksum):
+ * x The field being updated is src/dst address or affects the length of
+ * the packet. We need to update the pseudo-header checksum (note that this
+ * checksum is not ones' complement).
+ * x Some other field is being modified (e.g. src/dst port numbers): We
+ * don't have to update anything.
+ **/
+u_int16_t
+pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
+{
+ u_int32_t l;
+
+ if (udp && !cksum)
+ return (0x0000);
+ l = cksum + old - new;
+ l = (l >> 16) + (l & 65535);
+ l = l & 65535;
+ if (udp && !l)
+ return (0xFFFF);
+ return (l);
+}
+
+u_int16_t
+pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
+ u_int16_t new, u_int8_t udp)
+{
+ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+ return (cksum);
+
+ return (pf_cksum_fixup(cksum, old, new, udp));
+}
+
+static void
+pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
+ u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
+ sa_family_t af)
+{
+ struct pf_addr ao;
+ u_int16_t po = *p;
+
+ PF_ACPY(&ao, a, af);
+ PF_ACPY(a, an, af);
+
+ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+ *pc = ~*pc;
+
+ *p = pn;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+ ao.addr16[0], an->addr16[0], 0),
+ ao.addr16[1], an->addr16[1], 0);
+ *p = pn;
+
+ *pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
+ ao.addr16[0], an->addr16[0], u),
+ ao.addr16[1], an->addr16[1], u);
+
+ *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(*pc,
+ ao.addr16[0], an->addr16[0], u),
+ ao.addr16[1], an->addr16[1], u),
+ ao.addr16[2], an->addr16[2], u),
+ ao.addr16[3], an->addr16[3], u),
+ ao.addr16[4], an->addr16[4], u),
+ ao.addr16[5], an->addr16[5], u),
+ ao.addr16[6], an->addr16[6], u),
+ ao.addr16[7], an->addr16[7], u);
+
+ *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+ break;
+#endif /* INET6 */
+ }
+
+ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
+ CSUM_DELAY_DATA_IPV6)) {
+ *pc = ~*pc;
+ if (! *pc)
+ *pc = 0xffff;
+ }
+}
+
+/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
+void
+pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
+{
+ u_int32_t ao;
+
+ memcpy(&ao, a, sizeof(ao));
+ memcpy(a, &an, sizeof(u_int32_t));
+ *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
+ ao % 65536, an % 65536, u);
+}
+
+void
+pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
+{
+ u_int32_t ao;
+
+ memcpy(&ao, a, sizeof(ao));
+ memcpy(a, &an, sizeof(u_int32_t));
+
+ *c = pf_proto_cksum_fixup(m,
+ pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
+ ao % 65536, an % 65536, udp);
+}
+
+#ifdef INET6
+static void
+pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
+{
+ struct pf_addr ao;
+
+ PF_ACPY(&ao, a, AF_INET6);
+ PF_ACPY(a, an, AF_INET6);
+
+ *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(*c,
+ ao.addr16[0], an->addr16[0], u),
+ ao.addr16[1], an->addr16[1], u),
+ ao.addr16[2], an->addr16[2], u),
+ ao.addr16[3], an->addr16[3], u),
+ ao.addr16[4], an->addr16[4], u),
+ ao.addr16[5], an->addr16[5], u),
+ ao.addr16[6], an->addr16[6], u),
+ ao.addr16[7], an->addr16[7], u);
+}
+#endif /* INET6 */
+
+static void
+pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
+ struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
+ u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
+{
+ struct pf_addr oia, ooa;
+
+ PF_ACPY(&oia, ia, af);
+ if (oa)
+ PF_ACPY(&ooa, oa, af);
+
+ /* Change inner protocol port, fix inner protocol checksum. */
+ if (ip != NULL) {
+ u_int16_t oip = *ip;
+ u_int32_t opc;
+
+ if (pc != NULL)
+ opc = *pc;
+ *ip = np;
+ if (pc != NULL)
+ *pc = pf_cksum_fixup(*pc, oip, *ip, u);
+ *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
+ if (pc != NULL)
+ *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
+ }
+ /* Change inner ip address, fix inner ip and icmp checksums. */
+ PF_ACPY(ia, na, af);
+ switch (af) {
+#ifdef INET
+ case AF_INET: {
+ u_int32_t oh2c = *h2c;
+
+ *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
+ oia.addr16[0], ia->addr16[0], 0),
+ oia.addr16[1], ia->addr16[1], 0);
+ *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+ oia.addr16[0], ia->addr16[0], 0),
+ oia.addr16[1], ia->addr16[1], 0);
+ *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(*ic,
+ oia.addr16[0], ia->addr16[0], u),
+ oia.addr16[1], ia->addr16[1], u),
+ oia.addr16[2], ia->addr16[2], u),
+ oia.addr16[3], ia->addr16[3], u),
+ oia.addr16[4], ia->addr16[4], u),
+ oia.addr16[5], ia->addr16[5], u),
+ oia.addr16[6], ia->addr16[6], u),
+ oia.addr16[7], ia->addr16[7], u);
+ break;
+#endif /* INET6 */
+ }
+ /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
+ if (oa) {
+ PF_ACPY(oa, na, af);
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
+ ooa.addr16[0], oa->addr16[0], 0),
+ ooa.addr16[1], oa->addr16[1], 0);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+ pf_cksum_fixup(pf_cksum_fixup(*ic,
+ ooa.addr16[0], oa->addr16[0], u),
+ ooa.addr16[1], oa->addr16[1], u),
+ ooa.addr16[2], oa->addr16[2], u),
+ ooa.addr16[3], oa->addr16[3], u),
+ ooa.addr16[4], oa->addr16[4], u),
+ ooa.addr16[5], oa->addr16[5], u),
+ ooa.addr16[6], oa->addr16[6], u),
+ ooa.addr16[7], oa->addr16[7], u);
+ break;
+#endif /* INET6 */
+ }
+ }
+}
+
+
+/*
+ * Need to modulate the sequence numbers in the TCP SACK option
+ * (credits to Krzysztof Pfaff for report and patch)
+ */
+static int
+pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
+ struct tcphdr *th, struct pf_state_peer *dst)
+{
+ int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
+ u_int8_t opts[TCP_MAXOLEN], *opt = opts;
+ int copyback = 0, i, olen;
+ struct sackblk sack;
+
+#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
+ if (hlen < TCPOLEN_SACKLEN ||
+ !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
+ return 0;
+
+ while (hlen >= TCPOLEN_SACKLEN) {
+ olen = opt[1];
+ switch (*opt) {
+ case TCPOPT_EOL: /* FALLTHROUGH */
+ case TCPOPT_NOP:
+ opt++;
+ hlen--;
+ break;
+ case TCPOPT_SACK:
+ if (olen > hlen)
+ olen = hlen;
+ if (olen >= TCPOLEN_SACKLEN) {
+ for (i = 2; i + TCPOLEN_SACK <= olen;
+ i += TCPOLEN_SACK) {
+ memcpy(&sack, &opt[i], sizeof(sack));
+ pf_change_proto_a(m, &sack.start, &th->th_sum,
+ htonl(ntohl(sack.start) - dst->seqdiff), 0);
+ pf_change_proto_a(m, &sack.end, &th->th_sum,
+ htonl(ntohl(sack.end) - dst->seqdiff), 0);
+ memcpy(&opt[i], &sack, sizeof(sack));
+ }
+ copyback = 1;
+ }
+ /* FALLTHROUGH */
+ default:
+ if (olen < 2)
+ olen = 2;
+ hlen -= olen;
+ opt += olen;
+ }
+ }
+
+ if (copyback)
+ m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
+ return (copyback);
+}
+
+static void
+pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
+ const struct pf_addr *saddr, const struct pf_addr *daddr,
+ u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
+ u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
+ u_int16_t rtag, struct ifnet *ifp)
+{
+ struct pf_send_entry *pfse;
+ struct mbuf *m;
+ int len, tlen;
+#ifdef INET
+ struct ip *h = NULL;
+#endif /* INET */
+#ifdef INET6
+ struct ip6_hdr *h6 = NULL;
+#endif /* INET6 */
+ struct tcphdr *th;
+ char *opt;
+ struct pf_mtag *pf_mtag;
+
+ len = 0;
+ th = NULL;
+
+ /* maximum segment size tcp option */
+ tlen = sizeof(struct tcphdr);
+ if (mss)
+ tlen += 4;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ len = sizeof(struct ip) + tlen;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ len = sizeof(struct ip6_hdr) + tlen;
+ break;
+#endif /* INET6 */
+ default:
+ panic("%s: unsupported af %d", __func__, af);
+ }
+
+ /* Allocate outgoing queue entry, mbuf and mbuf tag. */
+ pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
+ if (pfse == NULL)
+ return;
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ free(pfse, M_PFTEMP);
+ return;
+ }
+#ifdef MAC
+ mac_netinet_firewall_send(m);
+#endif
+ if ((pf_mtag = pf_get_mtag(m)) == NULL) {
+ free(pfse, M_PFTEMP);
+ m_freem(m);
+ return;
+ }
+ if (tag)
+ m->m_flags |= M_SKIP_FIREWALL;
+ pf_mtag->tag = rtag;
+
+ if (r != NULL && r->rtableid >= 0)
+ M_SETFIB(m, r->rtableid);
+
+#ifdef ALTQ
+ if (r != NULL && r->qid) {
+ pf_mtag->qid = r->qid;
+
+ /* add hints for ecn */
+ pf_mtag->hdr = mtod(m, struct ip *);
+ }
+#endif /* ALTQ */
+ m->m_data += max_linkhdr;
+ m->m_pkthdr.len = m->m_len = len;
+ m->m_pkthdr.rcvif = NULL;
+ bzero(m->m_data, len);
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ h = mtod(m, struct ip *);
+
+ /* IP header fields included in the TCP checksum */
+ h->ip_p = IPPROTO_TCP;
+ h->ip_len = htons(tlen);
+ h->ip_src.s_addr = saddr->v4.s_addr;
+ h->ip_dst.s_addr = daddr->v4.s_addr;
+
+ th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ h6 = mtod(m, struct ip6_hdr *);
+
+ /* IP header fields included in the TCP checksum */
+ h6->ip6_nxt = IPPROTO_TCP;
+ h6->ip6_plen = htons(tlen);
+ memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
+ memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
+
+ th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
+ break;
+#endif /* INET6 */
+ }
+
+ /* TCP header */
+ th->th_sport = sport;
+ th->th_dport = dport;
+ th->th_seq = htonl(seq);
+ th->th_ack = htonl(ack);
+ th->th_off = tlen >> 2;
+ th->th_flags = flags;
+ th->th_win = htons(win);
+
+ if (mss) {
+ opt = (char *)(th + 1);
+ opt[0] = TCPOPT_MAXSEG;
+ opt[1] = 4;
+ HTONS(mss);
+ bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
+ }
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ /* TCP checksum */
+ th->th_sum = in_cksum(m, len);
+
+ /* Finish the IP header */
+ h->ip_v = 4;
+ h->ip_hl = sizeof(*h) >> 2;
+ h->ip_tos = IPTOS_LOWDELAY;
+ h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
+ h->ip_len = htons(len);
+ h->ip_ttl = ttl ? ttl : V_ip_defttl;
+ h->ip_sum = 0;
+
+ pfse->pfse_type = PFSE_IP;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ /* TCP checksum */
+ th->th_sum = in6_cksum(m, IPPROTO_TCP,
+ sizeof(struct ip6_hdr), tlen);
+
+ h6->ip6_vfc |= IPV6_VERSION;
+ h6->ip6_hlim = IPV6_DEFHLIM;
+
+ pfse->pfse_type = PFSE_IP6;
+ break;
+#endif /* INET6 */
+ }
+ pfse->pfse_m = m;
+ pf_send(pfse);
+}
+
+static int
+pf_ieee8021q_setpcp(struct mbuf *m, u_int8_t prio)
+{
+ struct m_tag *mtag;
+
+ KASSERT(prio <= PF_PRIO_MAX,
+ ("%s with invalid pcp", __func__));
+
+ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL);
+ if (mtag == NULL) {
+ mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_OUT,
+ sizeof(uint8_t), M_NOWAIT);
+ if (mtag == NULL)
+ return (ENOMEM);
+ m_tag_prepend(m, mtag);
+ }
+
+ *(uint8_t *)(mtag + 1) = prio;
+ return (0);
+}
+
+static int
+pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m)
+{
+ struct m_tag *mtag;
+ u_int8_t mpcp;
+
+ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL);
+ if (mtag == NULL)
+ return (0);
+
+ if (prio == PF_PRIO_ZERO)
+ prio = 0;
+
+ mpcp = *(uint8_t *)(mtag + 1);
+
+ return (mpcp == prio);
+}
+
+static void
+pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
+ struct pf_rule *r)
+{
+ struct pf_send_entry *pfse;
+ struct mbuf *m0;
+ struct pf_mtag *pf_mtag;
+
+ /* Allocate outgoing queue entry, mbuf and mbuf tag. */
+ pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
+ if (pfse == NULL)
+ return;
+
+ if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
+ free(pfse, M_PFTEMP);
+ return;
+ }
+
+ if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
+ free(pfse, M_PFTEMP);
+ return;
+ }
+ /* XXX: revisit */
+ m0->m_flags |= M_SKIP_FIREWALL;
+
+ if (r->rtableid >= 0)
+ M_SETFIB(m0, r->rtableid);
+
+#ifdef ALTQ
+ if (r->qid) {
+ pf_mtag->qid = r->qid;
+ /* add hints for ecn */
+ pf_mtag->hdr = mtod(m0, struct ip *);
+ }
+#endif /* ALTQ */
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ pfse->pfse_type = PFSE_ICMP;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ pfse->pfse_type = PFSE_ICMP6;
+ break;
+#endif /* INET6 */
+ }
+ pfse->pfse_m = m0;
+ pfse->icmpopts.type = type;
+ pfse->icmpopts.code = code;
+ pf_send(pfse);
+}
+
+/*
+ * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
+ * If n is 0, they match if they are equal. If n is != 0, they match if they
+ * are different.
+ */
+int
+pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
+ struct pf_addr *b, sa_family_t af)
+{
+ int match = 0;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if ((a->addr32[0] & m->addr32[0]) ==
+ (b->addr32[0] & m->addr32[0]))
+ match++;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (((a->addr32[0] & m->addr32[0]) ==
+ (b->addr32[0] & m->addr32[0])) &&
+ ((a->addr32[1] & m->addr32[1]) ==
+ (b->addr32[1] & m->addr32[1])) &&
+ ((a->addr32[2] & m->addr32[2]) ==
+ (b->addr32[2] & m->addr32[2])) &&
+ ((a->addr32[3] & m->addr32[3]) ==
+ (b->addr32[3] & m->addr32[3])))
+ match++;
+ break;
+#endif /* INET6 */
+ }
+ if (match) {
+ if (n)
+ return (0);
+ else
+ return (1);
+ } else {
+ if (n)
+ return (1);
+ else
+ return (0);
+ }
+}
+
+/*
+ * Return 1 if b <= a <= e, otherwise return 0.
+ */
+int
+pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
+ struct pf_addr *a, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
+ (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
+ return (0);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: {
+ int i;
+
+ /* check a >= b */
+ for (i = 0; i < 4; ++i)
+ if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
+ break;
+ else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
+ return (0);
+ /* check a <= e */
+ for (i = 0; i < 4; ++i)
+ if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
+ break;
+ else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
+ return (0);
+ break;
+ }
+#endif /* INET6 */
+ }
+ return (1);
+}
+
+static int
+pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
+{
+ switch (op) {
+ case PF_OP_IRG:
+ return ((p > a1) && (p < a2));
+ case PF_OP_XRG:
+ return ((p < a1) || (p > a2));
+ case PF_OP_RRG:
+ return ((p >= a1) && (p <= a2));
+ case PF_OP_EQ:
+ return (p == a1);
+ case PF_OP_NE:
+ return (p != a1);
+ case PF_OP_LT:
+ return (p < a1);
+ case PF_OP_LE:
+ return (p <= a1);
+ case PF_OP_GT:
+ return (p > a1);
+ case PF_OP_GE:
+ return (p >= a1);
+ }
+ return (0); /* never reached */
+}
+
+int
+pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
+{
+ NTOHS(a1);
+ NTOHS(a2);
+ NTOHS(p);
+ return (pf_match(op, a1, a2, p));
+}
+
+static int
+pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
+{
+ if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+ return (0);
+ return (pf_match(op, a1, a2, u));
+}
+
+static int
+pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
+{
+ if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
+ return (0);
+ return (pf_match(op, a1, a2, g));
+}
+
+int
+pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
+{
+ if (*tag == -1)
+ *tag = mtag;
+
+ return ((!r->match_tag_not && r->match_tag == *tag) ||
+ (r->match_tag_not && r->match_tag != *tag));
+}
+
+int
+pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
+{
+
+ KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
+
+ if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
+ return (ENOMEM);
+
+ pd->pf_mtag->tag = tag;
+
+ return (0);
+}
+
+#define PF_ANCHOR_STACKSIZE 32
+struct pf_anchor_stackframe {
+ struct pf_ruleset *rs;
+ struct pf_rule *r; /* XXX: + match bit */
+ struct pf_anchor *child;
+};
+
+/*
+ * XXX: We rely on malloc(9) returning pointer aligned addresses.
+ */
+#define PF_ANCHORSTACK_MATCH 0x00000001
+#define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH)
+
+#define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
+#define PF_ANCHOR_RULE(f) (struct pf_rule *) \
+ ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
+#define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \
+ ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
+} while (0)
+
+void
+pf_step_into_anchor(struct pf_anchor_stackframe *stack, int *depth,
+ struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
+ int *match)
+{
+ struct pf_anchor_stackframe *f;
+
+ PF_RULES_RASSERT();
+
+ if (match)
+ *match = 0;
+ if (*depth >= PF_ANCHOR_STACKSIZE) {
+ printf("%s: anchor stack overflow on %s\n",
+ __func__, (*r)->anchor->name);
+ *r = TAILQ_NEXT(*r, entries);
+ return;
+ } else if (*depth == 0 && a != NULL)
+ *a = *r;
+ f = stack + (*depth)++;
+ f->rs = *rs;
+ f->r = *r;
+ if ((*r)->anchor_wildcard) {
+ struct pf_anchor_node *parent = &(*r)->anchor->children;
+
+ if ((f->child = RB_MIN(pf_anchor_node, parent)) == NULL) {
+ *r = NULL;
+ return;
+ }
+ *rs = &f->child->ruleset;
+ } else {
+ f->child = NULL;
+ *rs = &(*r)->anchor->ruleset;
+ }
+ *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+}
+
+int
+pf_step_out_of_anchor(struct pf_anchor_stackframe *stack, int *depth,
+ struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
+ int *match)
+{
+ struct pf_anchor_stackframe *f;
+ struct pf_rule *fr;
+ int quick = 0;
+
+ PF_RULES_RASSERT();
+
+ do {
+ if (*depth <= 0)
+ break;
+ f = stack + *depth - 1;
+ fr = PF_ANCHOR_RULE(f);
+ if (f->child != NULL) {
+ struct pf_anchor_node *parent;
+
+ /*
+ * This block traverses through
+ * a wildcard anchor.
+ */
+ parent = &fr->anchor->children;
+ if (match != NULL && *match) {
+ /*
+ * If any of "*" matched, then
+ * "foo/ *" matched, mark frame
+ * appropriately.
+ */
+ PF_ANCHOR_SET_MATCH(f);
+ *match = 0;
+ }
+ f->child = RB_NEXT(pf_anchor_node, parent, f->child);
+ if (f->child != NULL) {
+ *rs = &f->child->ruleset;
+ *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+ if (*r == NULL)
+ continue;
+ else
+ break;
+ }
+ }
+ (*depth)--;
+ if (*depth == 0 && a != NULL)
+ *a = NULL;
+ *rs = f->rs;
+ if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
+ quick = fr->quick;
+ *r = TAILQ_NEXT(fr, entries);
+ } while (*r == NULL);
+
+ return (quick);
+}
+
+#ifdef INET6
+void
+pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
+ struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+ ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+ break;
+#endif /* INET */
+ case AF_INET6:
+ naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
+ ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
+ naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
+ ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
+ naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
+ ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
+ naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
+ ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
+ break;
+ }
+}
+
+void
+pf_addr_inc(struct pf_addr *addr, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
+ break;
+#endif /* INET */
+ case AF_INET6:
+ if (addr->addr32[3] == 0xffffffff) {
+ addr->addr32[3] = 0;
+ if (addr->addr32[2] == 0xffffffff) {
+ addr->addr32[2] = 0;
+ if (addr->addr32[1] == 0xffffffff) {
+ addr->addr32[1] = 0;
+ addr->addr32[0] =
+ htonl(ntohl(addr->addr32[0]) + 1);
+ } else
+ addr->addr32[1] =
+ htonl(ntohl(addr->addr32[1]) + 1);
+ } else
+ addr->addr32[2] =
+ htonl(ntohl(addr->addr32[2]) + 1);
+ } else
+ addr->addr32[3] =
+ htonl(ntohl(addr->addr32[3]) + 1);
+ break;
+ }
+}
+#endif /* INET6 */
+
+int
+pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
+{
+ struct pf_addr *saddr, *daddr;
+ u_int16_t sport, dport;
+ struct inpcbinfo *pi;
+ struct inpcb *inp;
+
+ pd->lookup.uid = UID_MAX;
+ pd->lookup.gid = GID_MAX;
+
+ switch (pd->proto) {
+ case IPPROTO_TCP:
+ if (pd->hdr.tcp == NULL)
+ return (-1);
+ sport = pd->hdr.tcp->th_sport;
+ dport = pd->hdr.tcp->th_dport;
+ pi = &V_tcbinfo;
+ break;
+ case IPPROTO_UDP:
+ if (pd->hdr.udp == NULL)
+ return (-1);
+ sport = pd->hdr.udp->uh_sport;
+ dport = pd->hdr.udp->uh_dport;
+ pi = &V_udbinfo;
+ break;
+ default:
+ return (-1);
+ }
+ if (direction == PF_IN) {
+ saddr = pd->src;
+ daddr = pd->dst;
+ } else {
+ u_int16_t p;
+
+ p = sport;
+ sport = dport;
+ dport = p;
+ saddr = pd->dst;
+ daddr = pd->src;
+ }
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
+ dport, INPLOOKUP_RLOCKPCB, NULL, m);
+ if (inp == NULL) {
+ inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
+ daddr->v4, dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, NULL, m);
+ if (inp == NULL)
+ return (-1);
+ }
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
+ dport, INPLOOKUP_RLOCKPCB, NULL, m);
+ if (inp == NULL) {
+ inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
+ &daddr->v6, dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, NULL, m);
+ if (inp == NULL)
+ return (-1);
+ }
+ break;
+#endif /* INET6 */
+
+ default:
+ return (-1);
+ }
+ INP_RLOCK_ASSERT(inp);
+#ifndef __rtems__
+ pd->lookup.uid = inp->inp_cred->cr_uid;
+ pd->lookup.gid = inp->inp_cred->cr_groups[0];
+#else /* __rtems__ */
+ pd->lookup.uid = BSD_DEFAULT_UID;
+ pd->lookup.gid = BSD_DEFAULT_GID;
+#endif /* __rtems__ */
+ INP_RUNLOCK(inp);
+
+ return (1);
+}
+
+static u_int8_t
+pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+ int hlen;
+ u_int8_t hdr[60];
+ u_int8_t *opt, optlen;
+ u_int8_t wscale = 0;
+
+ hlen = th_off << 2; /* hlen <= sizeof(hdr) */
+ if (hlen <= sizeof(struct tcphdr))
+ return (0);
+ if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+ return (0);
+ opt = hdr + sizeof(struct tcphdr);
+ hlen -= sizeof(struct tcphdr);
+ while (hlen >= 3) {
+ switch (*opt) {
+ case TCPOPT_EOL:
+ case TCPOPT_NOP:
+ ++opt;
+ --hlen;
+ break;
+ case TCPOPT_WINDOW:
+ wscale = opt[2];
+ if (wscale > TCP_MAX_WINSHIFT)
+ wscale = TCP_MAX_WINSHIFT;
+ wscale |= PF_WSCALE_FLAG;
+ /* FALLTHROUGH */
+ default:
+ optlen = opt[1];
+ if (optlen < 2)
+ optlen = 2;
+ hlen -= optlen;
+ opt += optlen;
+ break;
+ }
+ }
+ return (wscale);
+}
+
+static u_int16_t
+pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
+{
+ int hlen;
+ u_int8_t hdr[60];
+ u_int8_t *opt, optlen;
+ u_int16_t mss = V_tcp_mssdflt;
+
+ hlen = th_off << 2; /* hlen <= sizeof(hdr) */
+ if (hlen <= sizeof(struct tcphdr))
+ return (0);
+ if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
+ return (0);
+ opt = hdr + sizeof(struct tcphdr);
+ hlen -= sizeof(struct tcphdr);
+ while (hlen >= TCPOLEN_MAXSEG) {
+ switch (*opt) {
+ case TCPOPT_EOL:
+ case TCPOPT_NOP:
+ ++opt;
+ --hlen;
+ break;
+ case TCPOPT_MAXSEG:
+ bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
+ NTOHS(mss);
+ /* FALLTHROUGH */
+ default:
+ optlen = opt[1];
+ if (optlen < 2)
+ optlen = 2;
+ hlen -= optlen;
+ opt += optlen;
+ break;
+ }
+ }
+ return (mss);
+}
+
+static u_int16_t
+pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
+{
+#ifdef INET
+ struct nhop4_basic nh4;
+#endif /* INET */
+#ifdef INET6
+ struct nhop6_basic nh6;
+ struct in6_addr dst6;
+ uint32_t scopeid;
+#endif /* INET6 */
+ int hlen = 0;
+ uint16_t mss = 0;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ hlen = sizeof(struct ip);
+ if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) == 0)
+ mss = nh4.nh_mtu - hlen - sizeof(struct tcphdr);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ hlen = sizeof(struct ip6_hdr);
+ in6_splitscope(&addr->v6, &dst6, &scopeid);
+ if (fib6_lookup_nh_basic(rtableid, &dst6, scopeid, 0,0,&nh6)==0)
+ mss = nh6.nh_mtu - hlen - sizeof(struct tcphdr);
+ break;
+#endif /* INET6 */
+ }
+
+ mss = max(V_tcp_mssdflt, mss);
+ mss = min(mss, offer);
+ mss = max(mss, 64); /* sanity - at least max opt space */
+ return (mss);
+}
+
+static u_int32_t
+pf_tcp_iss(struct pf_pdesc *pd)
+{
+ MD5_CTX ctx;
+ u_int32_t digest[4];
+
+ if (V_pf_tcp_secret_init == 0) {
+ read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
+ MD5Init(&V_pf_tcp_secret_ctx);
+ MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
+ sizeof(V_pf_tcp_secret));
+ V_pf_tcp_secret_init = 1;
+ }
+
+ ctx = V_pf_tcp_secret_ctx;
+
+ MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
+ MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
+ if (pd->af == AF_INET6) {
+ MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
+ MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
+ } else {
+ MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
+ MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
+ }
+ MD5Final((u_char *)digest, &ctx);
+ V_pf_tcp_iss_off += 4096;
+#define ISN_RANDOM_INCREMENT (4096 - 1)
+ return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
+ V_pf_tcp_iss_off);
+#undef ISN_RANDOM_INCREMENT
+}
+
+static int
+pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
+ struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
+ struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
+{
+ struct pf_rule *nr = NULL;
+ struct pf_addr * const saddr = pd->src;
+ struct pf_addr * const daddr = pd->dst;
+ sa_family_t af = pd->af;
+ struct pf_rule *r, *a = NULL;
+ struct pf_ruleset *ruleset = NULL;
+ struct pf_src_node *nsn = NULL;
+ struct tcphdr *th = pd->hdr.tcp;
+ struct pf_state_key *sk = NULL, *nk = NULL;
+ u_short reason;
+ int rewrite = 0, hdrlen = 0;
+ int tag = -1, rtableid = -1;
+ int asd = 0;
+ int match = 0;
+ int state_icmp = 0;
+ u_int16_t sport = 0, dport = 0;
+ u_int16_t bproto_sum = 0, bip_sum = 0;
+ u_int8_t icmptype = 0, icmpcode = 0;
+ struct pf_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
+
+ PF_RULES_RASSERT();
+
+ if (inp != NULL) {
+ INP_LOCK_ASSERT(inp);
+#ifndef __rtems__
+ pd->lookup.uid = inp->inp_cred->cr_uid;
+ pd->lookup.gid = inp->inp_cred->cr_groups[0];
+#else /* __rtems__ */
+ pd->lookup.uid = BSD_DEFAULT_UID;
+ pd->lookup.gid = BSD_DEFAULT_GID;
+#endif /* __rtems__ */
+ pd->lookup.done = 1;
+ }
+
+ switch (pd->proto) {
+ case IPPROTO_TCP:
+ sport = th->th_sport;
+ dport = th->th_dport;
+ hdrlen = sizeof(*th);
+ break;
+ case IPPROTO_UDP:
+ sport = pd->hdr.udp->uh_sport;
+ dport = pd->hdr.udp->uh_dport;
+ hdrlen = sizeof(*pd->hdr.udp);
+ break;
+#ifdef INET
+ case IPPROTO_ICMP:
+ if (pd->af != AF_INET)
+ break;
+ sport = dport = pd->hdr.icmp->icmp_id;
+ hdrlen = sizeof(*pd->hdr.icmp);
+ icmptype = pd->hdr.icmp->icmp_type;
+ icmpcode = pd->hdr.icmp->icmp_code;
+
+ if (icmptype == ICMP_UNREACH ||
+ icmptype == ICMP_SOURCEQUENCH ||
+ icmptype == ICMP_REDIRECT ||
+ icmptype == ICMP_TIMXCEED ||
+ icmptype == ICMP_PARAMPROB)
+ state_icmp++;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ if (af != AF_INET6)
+ break;
+ sport = dport = pd->hdr.icmp6->icmp6_id;
+ hdrlen = sizeof(*pd->hdr.icmp6);
+ icmptype = pd->hdr.icmp6->icmp6_type;
+ icmpcode = pd->hdr.icmp6->icmp6_code;
+
+ if (icmptype == ICMP6_DST_UNREACH ||
+ icmptype == ICMP6_PACKET_TOO_BIG ||
+ icmptype == ICMP6_TIME_EXCEEDED ||
+ icmptype == ICMP6_PARAM_PROB)
+ state_icmp++;
+ break;
+#endif /* INET6 */
+ default:
+ sport = dport = hdrlen = 0;
+ break;
+ }
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+
+ /* check packet for BINAT/NAT/RDR */
+ if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
+ &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) {
+ KASSERT(sk != NULL, ("%s: null sk", __func__));
+ KASSERT(nk != NULL, ("%s: null nk", __func__));
+
+ if (pd->ip_sum)
+ bip_sum = *pd->ip_sum;
+
+ switch (pd->proto) {
+ case IPPROTO_TCP:
+ bproto_sum = th->th_sum;
+ pd->proto_sum = &th->th_sum;
+
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+ nk->port[pd->sidx] != sport) {
+ pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
+ &th->th_sum, &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 0, af);
+ pd->sport = &th->th_sport;
+ sport = th->th_sport;
+ }
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+ nk->port[pd->didx] != dport) {
+ pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
+ &th->th_sum, &nk->addr[pd->didx],
+ nk->port[pd->didx], 0, af);
+ dport = th->th_dport;
+ pd->dport = &th->th_dport;
+ }
+ rewrite++;
+ break;
+ case IPPROTO_UDP:
+ bproto_sum = pd->hdr.udp->uh_sum;
+ pd->proto_sum = &pd->hdr.udp->uh_sum;
+
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
+ nk->port[pd->sidx] != sport) {
+ pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
+ pd->ip_sum, &pd->hdr.udp->uh_sum,
+ &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 1, af);
+ sport = pd->hdr.udp->uh_sport;
+ pd->sport = &pd->hdr.udp->uh_sport;
+ }
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
+ nk->port[pd->didx] != dport) {
+ pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
+ pd->ip_sum, &pd->hdr.udp->uh_sum,
+ &nk->addr[pd->didx],
+ nk->port[pd->didx], 1, af);
+ dport = pd->hdr.udp->uh_dport;
+ pd->dport = &pd->hdr.udp->uh_dport;
+ }
+ rewrite++;
+ break;
+#ifdef INET
+ case IPPROTO_ICMP:
+ nk->port[0] = nk->port[1];
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
+ pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
+ nk->addr[pd->sidx].v4.s_addr, 0);
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
+ pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
+ nk->addr[pd->didx].v4.s_addr, 0);
+
+ if (nk->port[1] != pd->hdr.icmp->icmp_id) {
+ pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+ pd->hdr.icmp->icmp_cksum, sport,
+ nk->port[1], 0);
+ pd->hdr.icmp->icmp_id = nk->port[1];
+ pd->sport = &pd->hdr.icmp->icmp_id;
+ }
+ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ nk->port[0] = nk->port[1];
+ if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
+ pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
+ &nk->addr[pd->sidx], 0);
+
+ if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
+ pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
+ &nk->addr[pd->didx], 0);
+ rewrite++;
+ break;
+#endif /* INET */
+ default:
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if (PF_ANEQ(saddr,
+ &nk->addr[pd->sidx], AF_INET))
+ pf_change_a(&saddr->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->sidx].v4.s_addr, 0);
+
+ if (PF_ANEQ(daddr,
+ &nk->addr[pd->didx], AF_INET))
+ pf_change_a(&daddr->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->didx].v4.s_addr, 0);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (PF_ANEQ(saddr,
+ &nk->addr[pd->sidx], AF_INET6))
+ PF_ACPY(saddr, &nk->addr[pd->sidx], af);
+
+ if (PF_ANEQ(daddr,
+ &nk->addr[pd->didx], AF_INET6))
+ PF_ACPY(saddr, &nk->addr[pd->didx], af);
+ break;
+#endif /* INET */
+ }
+ break;
+ }
+ if (nr->natpass)
+ r = NULL;
+ pd->nat_rule = nr;
+ }
+
+ while (r != NULL) {
+ r->evaluations++;
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
+ r = r->skip[PF_SKIP_IFP].ptr;
+ else if (r->direction && r->direction != direction)
+ r = r->skip[PF_SKIP_DIR].ptr;
+ else if (r->af && r->af != af)
+ r = r->skip[PF_SKIP_AF].ptr;
+ else if (r->proto && r->proto != pd->proto)
+ r = r->skip[PF_SKIP_PROTO].ptr;
+ else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+ r->src.neg, kif, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+ /* tcp/udp only. port_op always 0 in other cases */
+ else if (r->src.port_op && !pf_match_port(r->src.port_op,
+ r->src.port[0], r->src.port[1], sport))
+ r = r->skip[PF_SKIP_SRC_PORT].ptr;
+ else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+ r->dst.neg, NULL, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ /* tcp/udp only. port_op always 0 in other cases */
+ else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+ r->dst.port[0], r->dst.port[1], dport))
+ r = r->skip[PF_SKIP_DST_PORT].ptr;
+ /* icmp only. type always 0 in other cases */
+ else if (r->type && r->type != icmptype + 1)
+ r = TAILQ_NEXT(r, entries);
+ /* icmp only. type always 0 in other cases */
+ else if (r->code && r->code != icmpcode + 1)
+ r = TAILQ_NEXT(r, entries);
+ else if (r->tos && !(r->tos == pd->tos))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->rule_flag & PFRULE_FRAGMENT)
+ r = TAILQ_NEXT(r, entries);
+ else if (pd->proto == IPPROTO_TCP &&
+ (r->flagset & th->th_flags) != r->flags)
+ r = TAILQ_NEXT(r, entries);
+ /* tcp/udp only. uid.op always 0 in other cases */
+ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+ pf_socket_lookup(direction, pd, m), 1)) &&
+ !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
+ pd->lookup.uid))
+ r = TAILQ_NEXT(r, entries);
+ /* tcp/udp only. gid.op always 0 in other cases */
+ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+ pf_socket_lookup(direction, pd, m), 1)) &&
+ !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
+ pd->lookup.gid))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->prio &&
+ !pf_match_ieee8021q_pcp(r->prio, m))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->prob &&
+ r->prob <= arc4random())
+ r = TAILQ_NEXT(r, entries);
+ else if (r->match_tag && !pf_match_tag(m, r, &tag,
+ pd->pf_mtag ? pd->pf_mtag->tag : 0))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->os_fingerprint != PF_OSFP_ANY &&
+ (pd->proto != IPPROTO_TCP || !pf_osfp_match(
+ pf_osfp_fingerprint(pd, m, off, th),
+ r->os_fingerprint)))
+ r = TAILQ_NEXT(r, entries);
+ else {
+ if (r->tag)
+ tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
+ if (r->anchor == NULL) {
+ match = 1;
+ *rm = r;
+ *am = a;
+ *rsm = ruleset;
+ if ((*rm)->quick)
+ break;
+ r = TAILQ_NEXT(r, entries);
+ } else
+ pf_step_into_anchor(anchor_stack, &asd,
+ &ruleset, PF_RULESET_FILTER, &r, &a,
+ &match);
+ }
+ if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
+ &ruleset, PF_RULESET_FILTER, &r, &a, &match))
+ break;
+ }
+ r = *rm;
+ a = *am;
+ ruleset = *rsm;
+
+ REASON_SET(&reason, PFRES_MATCH);
+
+ if (r->log || (nr != NULL && nr->log)) {
+ if (rewrite)
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+ PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
+ ruleset, pd, 1);
+ }
+
+ if ((r->action == PF_DROP) &&
+ ((r->rule_flag & PFRULE_RETURNRST) ||
+ (r->rule_flag & PFRULE_RETURNICMP) ||
+ (r->rule_flag & PFRULE_RETURN))) {
+ /* undo NAT changes, if they have taken place */
+ if (nr != NULL) {
+ PF_ACPY(saddr, &sk->addr[pd->sidx], af);
+ PF_ACPY(daddr, &sk->addr[pd->didx], af);
+ if (pd->sport)
+ *pd->sport = sk->port[pd->sidx];
+ if (pd->dport)
+ *pd->dport = sk->port[pd->didx];
+ if (pd->proto_sum)
+ *pd->proto_sum = bproto_sum;
+ if (pd->ip_sum)
+ *pd->ip_sum = bip_sum;
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+ }
+ if (pd->proto == IPPROTO_TCP &&
+ ((r->rule_flag & PFRULE_RETURNRST) ||
+ (r->rule_flag & PFRULE_RETURN)) &&
+ !(th->th_flags & TH_RST)) {
+ u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
+ int len = 0;
+#ifdef INET
+ struct ip *h4;
+#endif
+#ifdef INET6
+ struct ip6_hdr *h6;
+#endif
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ h4 = mtod(m, struct ip *);
+ len = ntohs(h4->ip_len) - off;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ h6 = mtod(m, struct ip6_hdr *);
+ len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
+ break;
+#endif
+ }
+
+ if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
+ REASON_SET(&reason, PFRES_PROTCKSUM);
+ else {
+ if (th->th_flags & TH_SYN)
+ ack++;
+ if (th->th_flags & TH_FIN)
+ ack++;
+ pf_send_tcp(m, r, af, pd->dst,
+ pd->src, th->th_dport, th->th_sport,
+ ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
+ r->return_ttl, 1, 0, kif->pfik_ifp);
+ }
+ } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
+ r->return_icmp)
+ pf_send_icmp(m, r->return_icmp >> 8,
+ r->return_icmp & 255, af, r);
+ else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
+ r->return_icmp6)
+ pf_send_icmp(m, r->return_icmp6 >> 8,
+ r->return_icmp6 & 255, af, r);
+ }
+
+ if (r->action == PF_DROP)
+ goto cleanup;
+
+ if (tag > 0 && pf_tag_packet(m, pd, tag)) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ goto cleanup;
+ }
+ if (rtableid >= 0)
+ M_SETFIB(m, rtableid);
+
+ if (!state_icmp && (r->keep_state || nr != NULL ||
+ (pd->flags & PFDESC_TCP_NORM))) {
+ int action;
+ action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
+ sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
+ hdrlen);
+ if (action != PF_PASS)
+ return (action);
+ } else {
+ if (sk != NULL)
+ uma_zfree(V_pf_state_key_z, sk);
+ if (nk != NULL)
+ uma_zfree(V_pf_state_key_z, nk);
+ }
+
+ /* copy back packet headers if we performed NAT operations */
+ if (rewrite)
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+
+ if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
+ direction == PF_OUT &&
+ pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
+ /*
+ * We want the state created, but we dont
+ * want to send this in case a partner
+ * firewall has to know about it to allow
+ * replies through it.
+ */
+ return (PF_DEFER);
+
+ return (PF_PASS);
+
+cleanup:
+ if (sk != NULL)
+ uma_zfree(V_pf_state_key_z, sk);
+ if (nk != NULL)
+ uma_zfree(V_pf_state_key_z, nk);
+ return (PF_DROP);
+}
+
+static int
+pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
+ struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
+ struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
+ u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
+ int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
+{
+ struct pf_state *s = NULL;
+ struct pf_src_node *sn = NULL;
+ struct tcphdr *th = pd->hdr.tcp;
+ u_int16_t mss = V_tcp_mssdflt;
+ u_short reason;
+
+ /* check maximums */
+ if (r->max_states &&
+ (counter_u64_fetch(r->states_cur) >= r->max_states)) {
+ counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
+ REASON_SET(&reason, PFRES_MAXSTATES);
+ return (PF_DROP);
+ }
+ /* src node for filter rule */
+ if ((r->rule_flag & PFRULE_SRCTRACK ||
+ r->rpool.opts & PF_POOL_STICKYADDR) &&
+ pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
+ goto csfailed;
+ }
+ /* src node for translation rule */
+ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
+ pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
+ goto csfailed;
+ }
+ s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
+ if (s == NULL) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ goto csfailed;
+ }
+ s->rule.ptr = r;
+ s->nat_rule.ptr = nr;
+ s->anchor.ptr = a;
+ STATE_INC_COUNTERS(s);
+ if (r->allow_opts)
+ s->state_flags |= PFSTATE_ALLOWOPTS;
+ if (r->rule_flag & PFRULE_STATESLOPPY)
+ s->state_flags |= PFSTATE_SLOPPY;
+ s->log = r->log & PF_LOG_ALL;
+ s->sync_state = PFSYNC_S_NONE;
+ if (nr != NULL)
+ s->log |= nr->log & PF_LOG_ALL;
+ switch (pd->proto) {
+ case IPPROTO_TCP:
+ s->src.seqlo = ntohl(th->th_seq);
+ s->src.seqhi = s->src.seqlo + pd->p_len + 1;
+ if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
+ r->keep_state == PF_STATE_MODULATE) {
+ /* Generate sequence number modulator */
+ if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
+ 0)
+ s->src.seqdiff = 1;
+ pf_change_proto_a(m, &th->th_seq, &th->th_sum,
+ htonl(s->src.seqlo + s->src.seqdiff), 0);
+ *rewrite = 1;
+ } else
+ s->src.seqdiff = 0;
+ if (th->th_flags & TH_SYN) {
+ s->src.seqhi++;
+ s->src.wscale = pf_get_wscale(m, off,
+ th->th_off, pd->af);
+ }
+ s->src.max_win = MAX(ntohs(th->th_win), 1);
+ if (s->src.wscale & PF_WSCALE_MASK) {
+ /* Remove scale factor from initial window */
+ int win = s->src.max_win;
+ win += 1 << (s->src.wscale & PF_WSCALE_MASK);
+ s->src.max_win = (win - 1) >>
+ (s->src.wscale & PF_WSCALE_MASK);
+ }
+ if (th->th_flags & TH_FIN)
+ s->src.seqhi++;
+ s->dst.seqhi = 1;
+ s->dst.max_win = 1;
+ s->src.state = TCPS_SYN_SENT;
+ s->dst.state = TCPS_CLOSED;
+ s->timeout = PFTM_TCP_FIRST_PACKET;
+ break;
+ case IPPROTO_UDP:
+ s->src.state = PFUDPS_SINGLE;
+ s->dst.state = PFUDPS_NO_TRAFFIC;
+ s->timeout = PFTM_UDP_FIRST_PACKET;
+ break;
+ case IPPROTO_ICMP:
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+#endif
+ s->timeout = PFTM_ICMP_FIRST_PACKET;
+ break;
+ default:
+ s->src.state = PFOTHERS_SINGLE;
+ s->dst.state = PFOTHERS_NO_TRAFFIC;
+ s->timeout = PFTM_OTHER_FIRST_PACKET;
+ }
+
+ if (r->rt && r->rt != PF_FASTROUTE) {
+ if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) {
+ REASON_SET(&reason, PFRES_MAPFAILED);
+ pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
+ uma_zfree(V_pf_state_z, s);
+ goto csfailed;
+ }
+ s->rt_kif = r->rpool.cur->kif;
+ }
+
+ s->creation = time_uptime;
+ s->expire = time_uptime;
+
+ if (sn != NULL)
+ s->src_node = sn;
+ if (nsn != NULL) {
+ /* XXX We only modify one side for now. */
+ PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
+ s->nat_src_node = nsn;
+ }
+ if (pd->proto == IPPROTO_TCP) {
+ if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
+ off, pd, th, &s->src, &s->dst)) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
+ uma_zfree(V_pf_state_z, s);
+ return (PF_DROP);
+ }
+ if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
+ pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
+ &s->src, &s->dst, rewrite)) {
+ /* This really shouldn't happen!!! */
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("pf_normalize_tcp_stateful failed on first pkt"));
+ pf_normalize_tcp_cleanup(s);
+ pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
+ uma_zfree(V_pf_state_z, s);
+ return (PF_DROP);
+ }
+ }
+ s->direction = pd->dir;
+
+ /*
+ * sk/nk could already been setup by pf_get_translation().
+ */
+ if (nr == NULL) {
+ KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
+ __func__, nr, sk, nk));
+ sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
+ if (sk == NULL)
+ goto csfailed;
+ nk = sk;
+ } else
+ KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
+ __func__, nr, sk, nk));
+
+ /* Swap sk/nk for PF_OUT. */
+ if (pf_state_insert(BOUND_IFACE(r, kif),
+ (pd->dir == PF_IN) ? sk : nk,
+ (pd->dir == PF_IN) ? nk : sk, s)) {
+ if (pd->proto == IPPROTO_TCP)
+ pf_normalize_tcp_cleanup(s);
+ REASON_SET(&reason, PFRES_STATEINS);
+ pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
+ uma_zfree(V_pf_state_z, s);
+ return (PF_DROP);
+ } else
+ *sm = s;
+
+ if (tag > 0)
+ s->tag = tag;
+ if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
+ TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
+ s->src.state = PF_TCPS_PROXY_SRC;
+ /* undo NAT changes, if they have taken place */
+ if (nr != NULL) {
+ struct pf_state_key *skt = s->key[PF_SK_WIRE];
+ if (pd->dir == PF_OUT)
+ skt = s->key[PF_SK_STACK];
+ PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
+ PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
+ if (pd->sport)
+ *pd->sport = skt->port[pd->sidx];
+ if (pd->dport)
+ *pd->dport = skt->port[pd->didx];
+ if (pd->proto_sum)
+ *pd->proto_sum = bproto_sum;
+ if (pd->ip_sum)
+ *pd->ip_sum = bip_sum;
+ m_copyback(m, off, hdrlen, pd->hdr.any);
+ }
+ s->src.seqhi = htonl(arc4random());
+ /* Find mss option */
+ int rtid = M_GETFIB(m);
+ mss = pf_get_mss(m, off, th->th_off, pd->af);
+ mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
+ mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
+ s->src.mss = mss;
+ pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
+ th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
+ TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
+ REASON_SET(&reason, PFRES_SYNPROXY);
+ return (PF_SYNPROXY_DROP);
+ }
+
+ return (PF_PASS);
+
+csfailed:
+ if (sk != NULL)
+ uma_zfree(V_pf_state_key_z, sk);
+ if (nk != NULL)
+ uma_zfree(V_pf_state_key_z, nk);
+
+ if (sn != NULL) {
+ struct pf_srchash *sh;
+
+ sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
+ PF_HASHROW_LOCK(sh);
+ if (--sn->states == 0 && sn->expire == 0) {
+ pf_unlink_src_node(sn);
+ uma_zfree(V_pf_sources_z, sn);
+ counter_u64_add(
+ V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
+ }
+ PF_HASHROW_UNLOCK(sh);
+ }
+
+ if (nsn != sn && nsn != NULL) {
+ struct pf_srchash *sh;
+
+ sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
+ PF_HASHROW_LOCK(sh);
+ if (--nsn->states == 0 && nsn->expire == 0) {
+ pf_unlink_src_node(nsn);
+ uma_zfree(V_pf_sources_z, nsn);
+ counter_u64_add(
+ V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
+ }
+ PF_HASHROW_UNLOCK(sh);
+ }
+
+ return (PF_DROP);
+}
+
+static int
+pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
+ struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
+ struct pf_ruleset **rsm)
+{
+ struct pf_rule *r, *a = NULL;
+ struct pf_ruleset *ruleset = NULL;
+ sa_family_t af = pd->af;
+ u_short reason;
+ int tag = -1;
+ int asd = 0;
+ int match = 0;
+ struct pf_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
+
+ PF_RULES_RASSERT();
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
+ r = r->skip[PF_SKIP_IFP].ptr;
+ else if (r->direction && r->direction != direction)
+ r = r->skip[PF_SKIP_DIR].ptr;
+ else if (r->af && r->af != af)
+ r = r->skip[PF_SKIP_AF].ptr;
+ else if (r->proto && r->proto != pd->proto)
+ r = r->skip[PF_SKIP_PROTO].ptr;
+ else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+ r->src.neg, kif, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+ else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+ r->dst.neg, NULL, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ else if (r->tos && !(r->tos == pd->tos))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->os_fingerprint != PF_OSFP_ANY)
+ r = TAILQ_NEXT(r, entries);
+ else if (pd->proto == IPPROTO_UDP &&
+ (r->src.port_op || r->dst.port_op))
+ r = TAILQ_NEXT(r, entries);
+ else if (pd->proto == IPPROTO_TCP &&
+ (r->src.port_op || r->dst.port_op || r->flagset))
+ r = TAILQ_NEXT(r, entries);
+ else if ((pd->proto == IPPROTO_ICMP ||
+ pd->proto == IPPROTO_ICMPV6) &&
+ (r->type || r->code))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->prio &&
+ !pf_match_ieee8021q_pcp(r->prio, m))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->prob && r->prob <=
+ (arc4random() % (UINT_MAX - 1) + 1))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->match_tag && !pf_match_tag(m, r, &tag,
+ pd->pf_mtag ? pd->pf_mtag->tag : 0))
+ r = TAILQ_NEXT(r, entries);
+ else {
+ if (r->anchor == NULL) {
+ match = 1;
+ *rm = r;
+ *am = a;
+ *rsm = ruleset;
+ if ((*rm)->quick)
+ break;
+ r = TAILQ_NEXT(r, entries);
+ } else
+ pf_step_into_anchor(anchor_stack, &asd,
+ &ruleset, PF_RULESET_FILTER, &r, &a,
+ &match);
+ }
+ if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
+ &ruleset, PF_RULESET_FILTER, &r, &a, &match))
+ break;
+ }
+ r = *rm;
+ a = *am;
+ ruleset = *rsm;
+
+ REASON_SET(&reason, PFRES_MATCH);
+
+ if (r->log)
+ PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
+ 1);
+
+ if (r->action != PF_PASS)
+ return (PF_DROP);
+
+ if (tag > 0 && pf_tag_packet(m, pd, tag)) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ return (PF_DROP);
+ }
+
+ return (PF_PASS);
+}
+
+static int
+pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
+ struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
+ struct pf_pdesc *pd, u_short *reason, int *copyback)
+{
+ struct tcphdr *th = pd->hdr.tcp;
+ u_int16_t win = ntohs(th->th_win);
+ u_int32_t ack, end, seq, orig_seq;
+ u_int8_t sws, dws;
+ int ackskew;
+
+ if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
+ sws = src->wscale & PF_WSCALE_MASK;
+ dws = dst->wscale & PF_WSCALE_MASK;
+ } else
+ sws = dws = 0;
+
+ /*
+ * Sequence tracking algorithm from Guido van Rooij's paper:
+ * http://www.madison-gurkha.com/publications/tcp_filtering/
+ * tcp_filtering.ps
+ */
+
+ orig_seq = seq = ntohl(th->th_seq);
+ if (src->seqlo == 0) {
+ /* First packet from this end. Set its state */
+
+ if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
+ src->scrub == NULL) {
+ if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
+ REASON_SET(reason, PFRES_MEMORY);
+ return (PF_DROP);
+ }
+ }
+
+ /* Deferred generation of sequence number modulator */
+ if (dst->seqdiff && !src->seqdiff) {
+ /* use random iss for the TCP server */
+ while ((src->seqdiff = arc4random() - seq) == 0)
+ ;
+ ack = ntohl(th->th_ack) - dst->seqdiff;
+ pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
+ src->seqdiff), 0);
+ pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
+ *copyback = 1;
+ } else {
+ ack = ntohl(th->th_ack);
+ }
+
+ end = seq + pd->p_len;
+ if (th->th_flags & TH_SYN) {
+ end++;
+ if (dst->wscale & PF_WSCALE_FLAG) {
+ src->wscale = pf_get_wscale(m, off, th->th_off,
+ pd->af);
+ if (src->wscale & PF_WSCALE_FLAG) {
+ /* Remove scale factor from initial
+ * window */
+ sws = src->wscale & PF_WSCALE_MASK;
+ win = ((u_int32_t)win + (1 << sws) - 1)
+ >> sws;
+ dws = dst->wscale & PF_WSCALE_MASK;
+ } else {
+ /* fixup other window */
+ dst->max_win <<= dst->wscale &
+ PF_WSCALE_MASK;
+ /* in case of a retrans SYN|ACK */
+ dst->wscale = 0;
+ }
+ }
+ }
+ if (th->th_flags & TH_FIN)
+ end++;
+
+ src->seqlo = seq;
+ if (src->state < TCPS_SYN_SENT)
+ src->state = TCPS_SYN_SENT;
+
+ /*
+ * May need to slide the window (seqhi may have been set by
+ * the crappy stack check or if we picked up the connection
+ * after establishment)
+ */
+ if (src->seqhi == 1 ||
+ SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
+ src->seqhi = end + MAX(1, dst->max_win << dws);
+ if (win > src->max_win)
+ src->max_win = win;
+
+ } else {
+ ack = ntohl(th->th_ack) - dst->seqdiff;
+ if (src->seqdiff) {
+ /* Modulate sequence numbers */
+ pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
+ src->seqdiff), 0);
+ pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
+ *copyback = 1;
+ }
+ end = seq + pd->p_len;
+ if (th->th_flags & TH_SYN)
+ end++;
+ if (th->th_flags & TH_FIN)
+ end++;
+ }
+
+ if ((th->th_flags & TH_ACK) == 0) {
+ /* Let it pass through the ack skew check */
+ ack = dst->seqlo;
+ } else if ((ack == 0 &&
+ (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
+ /* broken tcp stacks do not set ack */
+ (dst->state < TCPS_SYN_SENT)) {
+ /*
+ * Many stacks (ours included) will set the ACK number in an
+ * FIN|ACK if the SYN times out -- no sequence to ACK.
+ */
+ ack = dst->seqlo;
+ }
+
+ if (seq == end) {
+ /* Ease sequencing restrictions on no data packets */
+ seq = src->seqlo;
+ end = seq;
+ }
+
+ ackskew = dst->seqlo - ack;
+
+
+ /*
+ * Need to demodulate the sequence numbers in any TCP SACK options
+ * (Selective ACK). We could optionally validate the SACK values
+ * against the current ACK window, either forwards or backwards, but
+ * I'm not confident that SACK has been implemented properly
+ * everywhere. It wouldn't surprise me if several stacks accidentally
+ * SACK too far backwards of previously ACKed data. There really aren't
+ * any security implications of bad SACKing unless the target stack
+ * doesn't validate the option length correctly. Someone trying to
+ * spoof into a TCP connection won't bother blindly sending SACK
+ * options anyway.
+ */
+ if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
+ if (pf_modulate_sack(m, off, pd, th, dst))
+ *copyback = 1;
+ }
+
+
+#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
+ if (SEQ_GEQ(src->seqhi, end) &&
+ /* Last octet inside other's window space */
+ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
+ /* Retrans: not more than one window back */
+ (ackskew >= -MAXACKWINDOW) &&
+ /* Acking not more than one reassembled fragment backwards */
+ (ackskew <= (MAXACKWINDOW << sws)) &&
+ /* Acking not more than one window forward */
+ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
+ (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
+ (pd->flags & PFDESC_IP_REAS) == 0)) {
+ /* Require an exact/+1 sequence match on resets when possible */
+
+ if (dst->scrub || src->scrub) {
+ if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+ *state, src, dst, copyback))
+ return (PF_DROP);
+ }
+
+ /* update max window */
+ if (src->max_win < win)
+ src->max_win = win;
+ /* synchronize sequencing */
+ if (SEQ_GT(end, src->seqlo))
+ src->seqlo = end;
+ /* slide the window of what the other end can send */
+ if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+ dst->seqhi = ack + MAX((win << sws), 1);
+
+
+ /* update states */
+ if (th->th_flags & TH_SYN)
+ if (src->state < TCPS_SYN_SENT)
+ src->state = TCPS_SYN_SENT;
+ if (th->th_flags & TH_FIN)
+ if (src->state < TCPS_CLOSING)
+ src->state = TCPS_CLOSING;
+ if (th->th_flags & TH_ACK) {
+ if (dst->state == TCPS_SYN_SENT) {
+ dst->state = TCPS_ESTABLISHED;
+ if (src->state == TCPS_ESTABLISHED &&
+ (*state)->src_node != NULL &&
+ pf_src_connlimit(state)) {
+ REASON_SET(reason, PFRES_SRCLIMIT);
+ return (PF_DROP);
+ }
+ } else if (dst->state == TCPS_CLOSING)
+ dst->state = TCPS_FIN_WAIT_2;
+ }
+ if (th->th_flags & TH_RST)
+ src->state = dst->state = TCPS_TIME_WAIT;
+
+ /* update expire time */
+ (*state)->expire = time_uptime;
+ if (src->state >= TCPS_FIN_WAIT_2 &&
+ dst->state >= TCPS_FIN_WAIT_2)
+ (*state)->timeout = PFTM_TCP_CLOSED;
+ else if (src->state >= TCPS_CLOSING &&
+ dst->state >= TCPS_CLOSING)
+ (*state)->timeout = PFTM_TCP_FIN_WAIT;
+ else if (src->state < TCPS_ESTABLISHED ||
+ dst->state < TCPS_ESTABLISHED)
+ (*state)->timeout = PFTM_TCP_OPENING;
+ else if (src->state >= TCPS_CLOSING ||
+ dst->state >= TCPS_CLOSING)
+ (*state)->timeout = PFTM_TCP_CLOSING;
+ else
+ (*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+ /* Fall through to PASS packet */
+
+ } else if ((dst->state < TCPS_SYN_SENT ||
+ dst->state >= TCPS_FIN_WAIT_2 ||
+ src->state >= TCPS_FIN_WAIT_2) &&
+ SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
+ /* Within a window forward of the originating packet */
+ SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
+ /* Within a window backward of the originating packet */
+
+ /*
+ * This currently handles three situations:
+ * 1) Stupid stacks will shotgun SYNs before their peer
+ * replies.
+ * 2) When PF catches an already established stream (the
+ * firewall rebooted, the state table was flushed, routes
+ * changed...)
+ * 3) Packets get funky immediately after the connection
+ * closes (this should catch Solaris spurious ACK|FINs
+ * that web servers like to spew after a close)
+ *
+ * This must be a little more careful than the above code
+ * since packet floods will also be caught here. We don't
+ * update the TTL here to mitigate the damage of a packet
+ * flood and so the same code can handle awkward establishment
+ * and a loosened connection close.
+ * In the establishment case, a correct peer response will
+ * validate the connection, go through the normal state code
+ * and keep updating the state TTL.
+ */
+
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: loose state match: ");
+ pf_print_state(*state);
+ pf_print_flags(th->th_flags);
+ printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+ "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
+ pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
+ (unsigned long long)(*state)->packets[1],
+ pd->dir == PF_IN ? "in" : "out",
+ pd->dir == (*state)->direction ? "fwd" : "rev");
+ }
+
+ if (dst->scrub || src->scrub) {
+ if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
+ *state, src, dst, copyback))
+ return (PF_DROP);
+ }
+
+ /* update max window */
+ if (src->max_win < win)
+ src->max_win = win;
+ /* synchronize sequencing */
+ if (SEQ_GT(end, src->seqlo))
+ src->seqlo = end;
+ /* slide the window of what the other end can send */
+ if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
+ dst->seqhi = ack + MAX((win << sws), 1);
+
+ /*
+ * Cannot set dst->seqhi here since this could be a shotgunned
+ * SYN and not an already established connection.
+ */
+
+ if (th->th_flags & TH_FIN)
+ if (src->state < TCPS_CLOSING)
+ src->state = TCPS_CLOSING;
+ if (th->th_flags & TH_RST)
+ src->state = dst->state = TCPS_TIME_WAIT;
+
+ /* Fall through to PASS packet */
+
+ } else {
+ if ((*state)->dst.state == TCPS_SYN_SENT &&
+ (*state)->src.state == TCPS_SYN_SENT) {
+ /* Send RST for state mismatches during handshake */
+ if (!(th->th_flags & TH_RST))
+ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+ pd->dst, pd->src, th->th_dport,
+ th->th_sport, ntohl(th->th_ack), 0,
+ TH_RST, 0, 0,
+ (*state)->rule.ptr->return_ttl, 1, 0,
+ kif->pfik_ifp);
+ src->seqlo = 0;
+ src->seqhi = 1;
+ src->max_win = 1;
+ } else if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: BAD state: ");
+ pf_print_state(*state);
+ pf_print_flags(th->th_flags);
+ printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
+ "pkts=%llu:%llu dir=%s,%s\n",
+ seq, orig_seq, ack, pd->p_len, ackskew,
+ (unsigned long long)(*state)->packets[0],
+ (unsigned long long)(*state)->packets[1],
+ pd->dir == PF_IN ? "in" : "out",
+ pd->dir == (*state)->direction ? "fwd" : "rev");
+ printf("pf: State failure on: %c %c %c %c | %c %c\n",
+ SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
+ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
+ ' ': '2',
+ (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
+ (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
+ SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
+ SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
+ }
+ REASON_SET(reason, PFRES_BADSTATE);
+ return (PF_DROP);
+ }
+
+ return (PF_PASS);
+}
+
+static int
+pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
+ struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
+{
+ struct tcphdr *th = pd->hdr.tcp;
+
+ if (th->th_flags & TH_SYN)
+ if (src->state < TCPS_SYN_SENT)
+ src->state = TCPS_SYN_SENT;
+ if (th->th_flags & TH_FIN)
+ if (src->state < TCPS_CLOSING)
+ src->state = TCPS_CLOSING;
+ if (th->th_flags & TH_ACK) {
+ if (dst->state == TCPS_SYN_SENT) {
+ dst->state = TCPS_ESTABLISHED;
+ if (src->state == TCPS_ESTABLISHED &&
+ (*state)->src_node != NULL &&
+ pf_src_connlimit(state)) {
+ REASON_SET(reason, PFRES_SRCLIMIT);
+ return (PF_DROP);
+ }
+ } else if (dst->state == TCPS_CLOSING) {
+ dst->state = TCPS_FIN_WAIT_2;
+ } else if (src->state == TCPS_SYN_SENT &&
+ dst->state < TCPS_SYN_SENT) {
+ /*
+ * Handle a special sloppy case where we only see one
+ * half of the connection. If there is a ACK after
+ * the initial SYN without ever seeing a packet from
+ * the destination, set the connection to established.
+ */
+ dst->state = src->state = TCPS_ESTABLISHED;
+ if ((*state)->src_node != NULL &&
+ pf_src_connlimit(state)) {
+ REASON_SET(reason, PFRES_SRCLIMIT);
+ return (PF_DROP);
+ }
+ } else if (src->state == TCPS_CLOSING &&
+ dst->state == TCPS_ESTABLISHED &&
+ dst->seqlo == 0) {
+ /*
+ * Handle the closing of half connections where we
+ * don't see the full bidirectional FIN/ACK+ACK
+ * handshake.
+ */
+ dst->state = TCPS_CLOSING;
+ }
+ }
+ if (th->th_flags & TH_RST)
+ src->state = dst->state = TCPS_TIME_WAIT;
+
+ /* update expire time */
+ (*state)->expire = time_uptime;
+ if (src->state >= TCPS_FIN_WAIT_2 &&
+ dst->state >= TCPS_FIN_WAIT_2)
+ (*state)->timeout = PFTM_TCP_CLOSED;
+ else if (src->state >= TCPS_CLOSING &&
+ dst->state >= TCPS_CLOSING)
+ (*state)->timeout = PFTM_TCP_FIN_WAIT;
+ else if (src->state < TCPS_ESTABLISHED ||
+ dst->state < TCPS_ESTABLISHED)
+ (*state)->timeout = PFTM_TCP_OPENING;
+ else if (src->state >= TCPS_CLOSING ||
+ dst->state >= TCPS_CLOSING)
+ (*state)->timeout = PFTM_TCP_CLOSING;
+ else
+ (*state)->timeout = PFTM_TCP_ESTABLISHED;
+
+ return (PF_PASS);
+}
+
+static int
+pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
+ struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
+ u_short *reason)
+{
+ struct pf_state_key_cmp key;
+ struct tcphdr *th = pd->hdr.tcp;
+ int copyback = 0;
+ struct pf_state_peer *src, *dst;
+ struct pf_state_key *sk;
+
+ bzero(&key, sizeof(key));
+ key.af = pd->af;
+ key.proto = IPPROTO_TCP;
+ if (direction == PF_IN) { /* wire side, straight */
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ key.port[0] = th->th_sport;
+ key.port[1] = th->th_dport;
+ } else { /* stack side, reverse */
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
+ key.port[1] = th->th_sport;
+ key.port[0] = th->th_dport;
+ }
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ if (direction == (*state)->direction) {
+ src = &(*state)->src;
+ dst = &(*state)->dst;
+ } else {
+ src = &(*state)->dst;
+ dst = &(*state)->src;
+ }
+
+ sk = (*state)->key[pd->didx];
+
+ if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
+ if (direction != (*state)->direction) {
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_SYNPROXY_DROP);
+ }
+ if (th->th_flags & TH_SYN) {
+ if (ntohl(th->th_seq) != (*state)->src.seqlo) {
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_DROP);
+ }
+ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+ pd->src, th->th_dport, th->th_sport,
+ (*state)->src.seqhi, ntohl(th->th_seq) + 1,
+ TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_SYNPROXY_DROP);
+ } else if (!(th->th_flags & TH_ACK) ||
+ (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+ (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_DROP);
+ } else if ((*state)->src_node != NULL &&
+ pf_src_connlimit(state)) {
+ REASON_SET(reason, PFRES_SRCLIMIT);
+ return (PF_DROP);
+ } else
+ (*state)->src.state = PF_TCPS_PROXY_DST;
+ }
+ if ((*state)->src.state == PF_TCPS_PROXY_DST) {
+ if (direction == (*state)->direction) {
+ if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
+ (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
+ (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_DROP);
+ }
+ (*state)->src.max_win = MAX(ntohs(th->th_win), 1);
+ if ((*state)->dst.seqhi == 1)
+ (*state)->dst.seqhi = htonl(arc4random());
+ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+ &sk->addr[pd->sidx], &sk->addr[pd->didx],
+ sk->port[pd->sidx], sk->port[pd->didx],
+ (*state)->dst.seqhi, 0, TH_SYN, 0,
+ (*state)->src.mss, 0, 0, (*state)->tag, NULL);
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_SYNPROXY_DROP);
+ } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
+ (TH_SYN|TH_ACK)) ||
+ (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_DROP);
+ } else {
+ (*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
+ (*state)->dst.seqlo = ntohl(th->th_seq);
+ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
+ pd->src, th->th_dport, th->th_sport,
+ ntohl(th->th_ack), ntohl(th->th_seq) + 1,
+ TH_ACK, (*state)->src.max_win, 0, 0, 0,
+ (*state)->tag, NULL);
+ pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
+ &sk->addr[pd->sidx], &sk->addr[pd->didx],
+ sk->port[pd->sidx], sk->port[pd->didx],
+ (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
+ TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
+ (*state)->src.seqdiff = (*state)->dst.seqhi -
+ (*state)->src.seqlo;
+ (*state)->dst.seqdiff = (*state)->src.seqhi -
+ (*state)->dst.seqlo;
+ (*state)->src.seqhi = (*state)->src.seqlo +
+ (*state)->dst.max_win;
+ (*state)->dst.seqhi = (*state)->dst.seqlo +
+ (*state)->src.max_win;
+ (*state)->src.wscale = (*state)->dst.wscale = 0;
+ (*state)->src.state = (*state)->dst.state =
+ TCPS_ESTABLISHED;
+ REASON_SET(reason, PFRES_SYNPROXY);
+ return (PF_SYNPROXY_DROP);
+ }
+ }
+
+ if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
+ dst->state >= TCPS_FIN_WAIT_2 &&
+ src->state >= TCPS_FIN_WAIT_2) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: state reuse ");
+ pf_print_state(*state);
+ pf_print_flags(th->th_flags);
+ printf("\n");
+ }
+ /* XXX make sure it's the same direction ?? */
+ (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+ pf_unlink_state(*state, PF_ENTER_LOCKED);
+ *state = NULL;
+ return (PF_DROP);
+ }
+
+ if ((*state)->state_flags & PFSTATE_SLOPPY) {
+ if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
+ return (PF_DROP);
+ } else {
+ if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
+ &copyback) == PF_DROP)
+ return (PF_DROP);
+ }
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+ nk->port[pd->sidx] != th->th_sport)
+ pf_change_ap(m, pd->src, &th->th_sport,
+ pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 0, pd->af);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+ nk->port[pd->didx] != th->th_dport)
+ pf_change_ap(m, pd->dst, &th->th_dport,
+ pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
+ nk->port[pd->didx], 0, pd->af);
+ copyback = 1;
+ }
+
+ /* Copyback sequence modulation or stateful scrub changes if needed */
+ if (copyback)
+ m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+ return (PF_PASS);
+}
+
+static int
+pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
+ struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
+{
+ struct pf_state_peer *src, *dst;
+ struct pf_state_key_cmp key;
+ struct udphdr *uh = pd->hdr.udp;
+
+ bzero(&key, sizeof(key));
+ key.af = pd->af;
+ key.proto = IPPROTO_UDP;
+ if (direction == PF_IN) { /* wire side, straight */
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ key.port[0] = uh->uh_sport;
+ key.port[1] = uh->uh_dport;
+ } else { /* stack side, reverse */
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
+ key.port[1] = uh->uh_sport;
+ key.port[0] = uh->uh_dport;
+ }
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ if (direction == (*state)->direction) {
+ src = &(*state)->src;
+ dst = &(*state)->dst;
+ } else {
+ src = &(*state)->dst;
+ dst = &(*state)->src;
+ }
+
+ /* update states */
+ if (src->state < PFUDPS_SINGLE)
+ src->state = PFUDPS_SINGLE;
+ if (dst->state == PFUDPS_SINGLE)
+ dst->state = PFUDPS_MULTIPLE;
+
+ /* update expire time */
+ (*state)->expire = time_uptime;
+ if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
+ (*state)->timeout = PFTM_UDP_MULTIPLE;
+ else
+ (*state)->timeout = PFTM_UDP_SINGLE;
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
+ nk->port[pd->sidx] != uh->uh_sport)
+ pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
+ &uh->uh_sum, &nk->addr[pd->sidx],
+ nk->port[pd->sidx], 1, pd->af);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
+ nk->port[pd->didx] != uh->uh_dport)
+ pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
+ &uh->uh_sum, &nk->addr[pd->didx],
+ nk->port[pd->didx], 1, pd->af);
+ m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
+ }
+
+ return (PF_PASS);
+}
+
+static int
+pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
+ struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
+{
+ struct pf_addr *saddr = pd->src, *daddr = pd->dst;
+ u_int16_t icmpid = 0, *icmpsum;
+ u_int8_t icmptype;
+ int state_icmp = 0;
+ struct pf_state_key_cmp key;
+
+ bzero(&key, sizeof(key));
+ switch (pd->proto) {
+#ifdef INET
+ case IPPROTO_ICMP:
+ icmptype = pd->hdr.icmp->icmp_type;
+ icmpid = pd->hdr.icmp->icmp_id;
+ icmpsum = &pd->hdr.icmp->icmp_cksum;
+
+ if (icmptype == ICMP_UNREACH ||
+ icmptype == ICMP_SOURCEQUENCH ||
+ icmptype == ICMP_REDIRECT ||
+ icmptype == ICMP_TIMXCEED ||
+ icmptype == ICMP_PARAMPROB)
+ state_icmp++;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ icmptype = pd->hdr.icmp6->icmp6_type;
+ icmpid = pd->hdr.icmp6->icmp6_id;
+ icmpsum = &pd->hdr.icmp6->icmp6_cksum;
+
+ if (icmptype == ICMP6_DST_UNREACH ||
+ icmptype == ICMP6_PACKET_TOO_BIG ||
+ icmptype == ICMP6_TIME_EXCEEDED ||
+ icmptype == ICMP6_PARAM_PROB)
+ state_icmp++;
+ break;
+#endif /* INET6 */
+ }
+
+ if (!state_icmp) {
+
+ /*
+ * ICMP query/reply message not related to a TCP/UDP packet.
+ * Search for an ICMP state.
+ */
+ key.af = pd->af;
+ key.proto = pd->proto;
+ key.port[0] = key.port[1] = icmpid;
+ if (direction == PF_IN) { /* wire side, straight */
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ } else { /* stack side, reverse */
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
+ }
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ (*state)->expire = time_uptime;
+ (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ if (PF_ANEQ(pd->src,
+ &nk->addr[pd->sidx], AF_INET))
+ pf_change_a(&saddr->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->sidx].v4.s_addr, 0);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
+ AF_INET))
+ pf_change_a(&daddr->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->didx].v4.s_addr, 0);
+
+ if (nk->port[0] !=
+ pd->hdr.icmp->icmp_id) {
+ pd->hdr.icmp->icmp_cksum =
+ pf_cksum_fixup(
+ pd->hdr.icmp->icmp_cksum, icmpid,
+ nk->port[pd->sidx], 0);
+ pd->hdr.icmp->icmp_id =
+ nk->port[pd->sidx];
+ }
+
+ m_copyback(m, off, ICMP_MINLEN,
+ (caddr_t )pd->hdr.icmp);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (PF_ANEQ(pd->src,
+ &nk->addr[pd->sidx], AF_INET6))
+ pf_change_a6(saddr,
+ &pd->hdr.icmp6->icmp6_cksum,
+ &nk->addr[pd->sidx], 0);
+
+ if (PF_ANEQ(pd->dst,
+ &nk->addr[pd->didx], AF_INET6))
+ pf_change_a6(daddr,
+ &pd->hdr.icmp6->icmp6_cksum,
+ &nk->addr[pd->didx], 0);
+
+ m_copyback(m, off, sizeof(struct icmp6_hdr),
+ (caddr_t )pd->hdr.icmp6);
+ break;
+#endif /* INET6 */
+ }
+ }
+ return (PF_PASS);
+
+ } else {
+ /*
+ * ICMP error message in response to a TCP/UDP packet.
+ * Extract the inner TCP/UDP header and search for that state.
+ */
+
+ struct pf_pdesc pd2;
+ bzero(&pd2, sizeof pd2);
+#ifdef INET
+ struct ip h2;
+#endif /* INET */
+#ifdef INET6
+ struct ip6_hdr h2_6;
+ int terminal = 0;
+#endif /* INET6 */
+ int ipoff2 = 0;
+ int off2 = 0;
+
+ pd2.af = pd->af;
+ /* Payload packet is from the opposite direction. */
+ pd2.sidx = (direction == PF_IN) ? 1 : 0;
+ pd2.didx = (direction == PF_IN) ? 0 : 1;
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ /* offset of h2 in mbuf chain */
+ ipoff2 = off + ICMP_MINLEN;
+
+ if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
+ NULL, reason, pd2.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: ICMP error message too short "
+ "(ip)\n"));
+ return (PF_DROP);
+ }
+ /*
+ * ICMP error messages don't refer to non-first
+ * fragments
+ */
+ if (h2.ip_off & htons(IP_OFFMASK)) {
+ REASON_SET(reason, PFRES_FRAG);
+ return (PF_DROP);
+ }
+
+ /* offset of protocol header that follows h2 */
+ off2 = ipoff2 + (h2.ip_hl << 2);
+
+ pd2.proto = h2.ip_p;
+ pd2.src = (struct pf_addr *)&h2.ip_src;
+ pd2.dst = (struct pf_addr *)&h2.ip_dst;
+ pd2.ip_sum = &h2.ip_sum;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ ipoff2 = off + sizeof(struct icmp6_hdr);
+
+ if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
+ NULL, reason, pd2.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: ICMP error message too short "
+ "(ip6)\n"));
+ return (PF_DROP);
+ }
+ pd2.proto = h2_6.ip6_nxt;
+ pd2.src = (struct pf_addr *)&h2_6.ip6_src;
+ pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
+ pd2.ip_sum = NULL;
+ off2 = ipoff2 + sizeof(h2_6);
+ do {
+ switch (pd2.proto) {
+ case IPPROTO_FRAGMENT:
+ /*
+ * ICMPv6 error messages for
+ * non-first fragments
+ */
+ REASON_SET(reason, PFRES_FRAG);
+ return (PF_DROP);
+ case IPPROTO_AH:
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS: {
+ /* get next header and header length */
+ struct ip6_ext opt6;
+
+ if (!pf_pull_hdr(m, off2, &opt6,
+ sizeof(opt6), NULL, reason,
+ pd2.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: ICMPv6 short opt\n"));
+ return (PF_DROP);
+ }
+ if (pd2.proto == IPPROTO_AH)
+ off2 += (opt6.ip6e_len + 2) * 4;
+ else
+ off2 += (opt6.ip6e_len + 1) * 8;
+ pd2.proto = opt6.ip6e_nxt;
+ /* goto the next header */
+ break;
+ }
+ default:
+ terminal++;
+ break;
+ }
+ } while (!terminal);
+ break;
+#endif /* INET6 */
+ }
+
+ switch (pd2.proto) {
+ case IPPROTO_TCP: {
+ struct tcphdr th;
+ u_int32_t seq;
+ struct pf_state_peer *src, *dst;
+ u_int8_t dws;
+ int copyback = 0;
+
+ /*
+ * Only the first 8 bytes of the TCP header can be
+ * expected. Don't access any TCP header fields after
+ * th_seq, an ackskew test is not possible.
+ */
+ if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
+ pd2.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: ICMP error message too short "
+ "(tcp)\n"));
+ return (PF_DROP);
+ }
+
+ key.af = pd2.af;
+ key.proto = IPPROTO_TCP;
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[pd2.sidx] = th.th_sport;
+ key.port[pd2.didx] = th.th_dport;
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ if (direction == (*state)->direction) {
+ src = &(*state)->dst;
+ dst = &(*state)->src;
+ } else {
+ src = &(*state)->src;
+ dst = &(*state)->dst;
+ }
+
+ if (src->wscale && dst->wscale)
+ dws = dst->wscale & PF_WSCALE_MASK;
+ else
+ dws = 0;
+
+ /* Demodulate sequence number */
+ seq = ntohl(th.th_seq) - src->seqdiff;
+ if (src->seqdiff) {
+ pf_change_a(&th.th_seq, icmpsum,
+ htonl(seq), 0);
+ copyback = 1;
+ }
+
+ if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
+ (!SEQ_GEQ(src->seqhi, seq) ||
+ !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: BAD ICMP %d:%d ",
+ icmptype, pd->hdr.icmp->icmp_code);
+ pf_print_host(pd->src, 0, pd->af);
+ printf(" -> ");
+ pf_print_host(pd->dst, 0, pd->af);
+ printf(" state: ");
+ pf_print_state(*state);
+ printf(" seq=%u\n", seq);
+ }
+ REASON_SET(reason, PFRES_BADSTATE);
+ return (PF_DROP);
+ } else {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf: OK ICMP %d:%d ",
+ icmptype, pd->hdr.icmp->icmp_code);
+ pf_print_host(pd->src, 0, pd->af);
+ printf(" -> ");
+ pf_print_host(pd->dst, 0, pd->af);
+ printf(" state: ");
+ pf_print_state(*state);
+ printf(" seq=%u\n", seq);
+ }
+ }
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != th.th_sport)
+ pf_change_icmp(pd2.src, &th.th_sport,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, pd2.af);
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != th.th_dport)
+ pf_change_icmp(pd2.dst, &th.th_dport,
+ saddr, &nk->addr[pd2.didx],
+ nk->port[pd2.didx], NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, pd2.af);
+ copyback = 1;
+ }
+
+ if (copyback) {
+ switch (pd2.af) {
+#ifdef INET
+ case AF_INET:
+ m_copyback(m, off, ICMP_MINLEN,
+ (caddr_t )pd->hdr.icmp);
+ m_copyback(m, ipoff2, sizeof(h2),
+ (caddr_t )&h2);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ m_copyback(m, off,
+ sizeof(struct icmp6_hdr),
+ (caddr_t )pd->hdr.icmp6);
+ m_copyback(m, ipoff2, sizeof(h2_6),
+ (caddr_t )&h2_6);
+ break;
+#endif /* INET6 */
+ }
+ m_copyback(m, off2, 8, (caddr_t)&th);
+ }
+
+ return (PF_PASS);
+ break;
+ }
+ case IPPROTO_UDP: {
+ struct udphdr uh;
+
+ if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
+ NULL, reason, pd2.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: ICMP error message too short "
+ "(udp)\n"));
+ return (PF_DROP);
+ }
+
+ key.af = pd2.af;
+ key.proto = IPPROTO_UDP;
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[pd2.sidx] = uh.uh_sport;
+ key.port[pd2.didx] = uh.uh_dport;
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != uh.uh_sport)
+ pf_change_icmp(pd2.src, &uh.uh_sport,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], &uh.uh_sum,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 1, pd2.af);
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != uh.uh_dport)
+ pf_change_icmp(pd2.dst, &uh.uh_dport,
+ saddr, &nk->addr[pd2.didx],
+ nk->port[pd2.didx], &uh.uh_sum,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 1, pd2.af);
+
+ switch (pd2.af) {
+#ifdef INET
+ case AF_INET:
+ m_copyback(m, off, ICMP_MINLEN,
+ (caddr_t )pd->hdr.icmp);
+ m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ m_copyback(m, off,
+ sizeof(struct icmp6_hdr),
+ (caddr_t )pd->hdr.icmp6);
+ m_copyback(m, ipoff2, sizeof(h2_6),
+ (caddr_t )&h2_6);
+ break;
+#endif /* INET6 */
+ }
+ m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
+ }
+ return (PF_PASS);
+ break;
+ }
+#ifdef INET
+ case IPPROTO_ICMP: {
+ struct icmp iih;
+
+ if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
+ NULL, reason, pd2.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: ICMP error message too short i"
+ "(icmp)\n"));
+ return (PF_DROP);
+ }
+
+ key.af = pd2.af;
+ key.proto = IPPROTO_ICMP;
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[0] = key.port[1] = iih.icmp_id;
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != iih.icmp_id)
+ pf_change_icmp(pd2.src, &iih.icmp_id,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, AF_INET);
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != iih.icmp_id)
+ pf_change_icmp(pd2.dst, &iih.icmp_id,
+ saddr, &nk->addr[pd2.didx],
+ nk->port[pd2.didx], NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, AF_INET);
+
+ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
+ m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+ m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
+ }
+ return (PF_PASS);
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case IPPROTO_ICMPV6: {
+ struct icmp6_hdr iih;
+
+ if (!pf_pull_hdr(m, off2, &iih,
+ sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: ICMP error message too short "
+ "(icmp6)\n"));
+ return (PF_DROP);
+ }
+
+ key.af = pd2.af;
+ key.proto = IPPROTO_ICMPV6;
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[0] = key.port[1] = iih.icmp6_id;
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af) ||
+ nk->port[pd2.sidx] != iih.icmp6_id)
+ pf_change_icmp(pd2.src, &iih.icmp6_id,
+ daddr, &nk->addr[pd2.sidx],
+ nk->port[pd2.sidx], NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, AF_INET6);
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af) ||
+ nk->port[pd2.didx] != iih.icmp6_id)
+ pf_change_icmp(pd2.dst, &iih.icmp6_id,
+ saddr, &nk->addr[pd2.didx],
+ nk->port[pd2.didx], NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, AF_INET6);
+
+ m_copyback(m, off, sizeof(struct icmp6_hdr),
+ (caddr_t)pd->hdr.icmp6);
+ m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
+ m_copyback(m, off2, sizeof(struct icmp6_hdr),
+ (caddr_t)&iih);
+ }
+ return (PF_PASS);
+ break;
+ }
+#endif /* INET6 */
+ default: {
+ key.af = pd2.af;
+ key.proto = pd2.proto;
+ PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+ PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ key.port[0] = key.port[1] = 0;
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] !=
+ (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk =
+ (*state)->key[pd->didx];
+
+ if (PF_ANEQ(pd2.src,
+ &nk->addr[pd2.sidx], pd2.af))
+ pf_change_icmp(pd2.src, NULL, daddr,
+ &nk->addr[pd2.sidx], 0, NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, pd2.af);
+
+ if (PF_ANEQ(pd2.dst,
+ &nk->addr[pd2.didx], pd2.af))
+ pf_change_icmp(pd2.dst, NULL, saddr,
+ &nk->addr[pd2.didx], 0, NULL,
+ pd2.ip_sum, icmpsum,
+ pd->ip_sum, 0, pd2.af);
+
+ switch (pd2.af) {
+#ifdef INET
+ case AF_INET:
+ m_copyback(m, off, ICMP_MINLEN,
+ (caddr_t)pd->hdr.icmp);
+ m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ m_copyback(m, off,
+ sizeof(struct icmp6_hdr),
+ (caddr_t )pd->hdr.icmp6);
+ m_copyback(m, ipoff2, sizeof(h2_6),
+ (caddr_t )&h2_6);
+ break;
+#endif /* INET6 */
+ }
+ }
+ return (PF_PASS);
+ break;
+ }
+ }
+ }
+}
+
+static int
+pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
+ struct mbuf *m, struct pf_pdesc *pd)
+{
+ struct pf_state_peer *src, *dst;
+ struct pf_state_key_cmp key;
+
+ bzero(&key, sizeof(key));
+ key.af = pd->af;
+ key.proto = pd->proto;
+ if (direction == PF_IN) {
+ PF_ACPY(&key.addr[0], pd->src, key.af);
+ PF_ACPY(&key.addr[1], pd->dst, key.af);
+ key.port[0] = key.port[1] = 0;
+ } else {
+ PF_ACPY(&key.addr[1], pd->src, key.af);
+ PF_ACPY(&key.addr[0], pd->dst, key.af);
+ key.port[1] = key.port[0] = 0;
+ }
+
+ STATE_LOOKUP(kif, &key, direction, *state, pd);
+
+ if (direction == (*state)->direction) {
+ src = &(*state)->src;
+ dst = &(*state)->dst;
+ } else {
+ src = &(*state)->dst;
+ dst = &(*state)->src;
+ }
+
+ /* update states */
+ if (src->state < PFOTHERS_SINGLE)
+ src->state = PFOTHERS_SINGLE;
+ if (dst->state == PFOTHERS_SINGLE)
+ dst->state = PFOTHERS_MULTIPLE;
+
+ /* update expire time */
+ (*state)->expire = time_uptime;
+ if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
+ (*state)->timeout = PFTM_OTHER_MULTIPLE;
+ else
+ (*state)->timeout = PFTM_OTHER_SINGLE;
+
+ /* translate source/destination address, if necessary */
+ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
+ struct pf_state_key *nk = (*state)->key[pd->didx];
+
+ KASSERT(nk, ("%s: nk is null", __func__));
+ KASSERT(pd, ("%s: pd is null", __func__));
+ KASSERT(pd->src, ("%s: pd->src is null", __func__));
+ KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
+ pf_change_a(&pd->src->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->sidx].v4.s_addr,
+ 0);
+
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
+ pf_change_a(&pd->dst->v4.s_addr,
+ pd->ip_sum,
+ nk->addr[pd->didx].v4.s_addr,
+ 0);
+
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
+ PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
+
+ if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
+ PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
+#endif /* INET6 */
+ }
+ }
+ return (PF_PASS);
+}
+
+/*
+ * ipoff and off are measured from the start of the mbuf chain.
+ * h must be at "ipoff" on the mbuf chain.
+ */
+void *
+pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
+ u_short *actionp, u_short *reasonp, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET: {
+ struct ip *h = mtod(m, struct ip *);
+ u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+ if (fragoff) {
+ if (fragoff >= len)
+ ACTION_SET(actionp, PF_PASS);
+ else {
+ ACTION_SET(actionp, PF_DROP);
+ REASON_SET(reasonp, PFRES_FRAG);
+ }
+ return (NULL);
+ }
+ if (m->m_pkthdr.len < off + len ||
+ ntohs(h->ip_len) < off + len) {
+ ACTION_SET(actionp, PF_DROP);
+ REASON_SET(reasonp, PFRES_SHORT);
+ return (NULL);
+ }
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: {
+ struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+
+ if (m->m_pkthdr.len < off + len ||
+ (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
+ (unsigned)(off + len)) {
+ ACTION_SET(actionp, PF_DROP);
+ REASON_SET(reasonp, PFRES_SHORT);
+ return (NULL);
+ }
+ break;
+ }
+#endif /* INET6 */
+ }
+ m_copydata(m, off, len, p);
+ return (p);
+}
+
+#ifdef RADIX_MPATH
+static int
+pf_routable_oldmpath(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+ int rtableid)
+{
+ struct radix_node_head *rnh;
+ struct sockaddr_in *dst;
+ int ret = 1;
+ int check_mpath;
+#ifdef INET6
+ struct sockaddr_in6 *dst6;
+ struct route_in6 ro;
+#else
+ struct route ro;
+#endif
+ struct radix_node *rn;
+ struct rtentry *rt;
+ struct ifnet *ifp;
+
+ check_mpath = 0;
+ /* XXX: stick to table 0 for now */
+ rnh = rt_tables_get_rnh(0, af);
+ if (rnh != NULL && rn_mpath_capable(rnh))
+ check_mpath = 1;
+ bzero(&ro, sizeof(ro));
+ switch (af) {
+ case AF_INET:
+ dst = satosin(&ro.ro_dst);
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr = addr->v4;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ /*
+ * Skip check for addresses with embedded interface scope,
+ * as they would always match anyway.
+ */
+ if (IN6_IS_SCOPE_EMBED(&addr->v6))
+ goto out;
+ dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
+ dst6->sin6_family = AF_INET6;
+ dst6->sin6_len = sizeof(*dst6);
+ dst6->sin6_addr = addr->v6;
+ break;
+#endif /* INET6 */
+ default:
+ return (0);
+ }
+
+ /* Skip checks for ipsec interfaces */
+ if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
+ goto out;
+
+ switch (af) {
+#ifdef INET6
+ case AF_INET6:
+ in6_rtalloc_ign(&ro, 0, rtableid);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ in_rtalloc_ign((struct route *)&ro, 0, rtableid);
+ break;
+#endif
+ }
+
+ if (ro.ro_rt != NULL) {
+ /* No interface given, this is a no-route check */
+ if (kif == NULL)
+ goto out;
+
+ if (kif->pfik_ifp == NULL) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Perform uRPF check if passed input interface */
+ ret = 0;
+ rn = (struct radix_node *)ro.ro_rt;
+ do {
+ rt = (struct rtentry *)rn;
+ ifp = rt->rt_ifp;
+
+ if (kif->pfik_ifp == ifp)
+ ret = 1;
+ rn = rn_mpath_next(rn);
+ } while (check_mpath == 1 && rn != NULL && ret == 0);
+ } else
+ ret = 0;
+out:
+ if (ro.ro_rt != NULL)
+ RTFREE(ro.ro_rt);
+ return (ret);
+}
+#endif
+
+int
+pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+ int rtableid)
+{
+#ifdef INET
+ struct nhop4_basic nh4;
+#endif
+#ifdef INET6
+ struct nhop6_basic nh6;
+#endif
+ struct ifnet *ifp;
+#ifdef RADIX_MPATH
+ struct radix_node_head *rnh;
+
+ /* XXX: stick to table 0 for now */
+ rnh = rt_tables_get_rnh(0, af);
+ if (rnh != NULL && rn_mpath_capable(rnh))
+ return (pf_routable_oldmpath(addr, af, kif, rtableid));
+#endif
+ /*
+ * Skip check for addresses with embedded interface scope,
+ * as they would always match anyway.
+ */
+ if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
+ return (1);
+
+ if (af != AF_INET && af != AF_INET6)
+ return (0);
+
+ /* Skip checks for ipsec interfaces */
+ if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
+ return (1);
+
+ ifp = NULL;
+
+ switch (af) {
+#ifdef INET6
+ case AF_INET6:
+ if (fib6_lookup_nh_basic(rtableid, &addr->v6, 0, 0, 0, &nh6)!=0)
+ return (0);
+ ifp = nh6.nh_ifp;
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) != 0)
+ return (0);
+ ifp = nh4.nh_ifp;
+ break;
+#endif
+ }
+
+ /* No interface given, this is a no-route check */
+ if (kif == NULL)
+ return (1);
+
+ if (kif->pfik_ifp == NULL)
+ return (0);
+
+ /* Perform uRPF check if passed input interface */
+ if (kif->pfik_ifp == ifp)
+ return (1);
+ return (0);
+}
+
+#ifdef INET
+static void
+pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+ struct pf_state *s, struct pf_pdesc *pd)
+{
+ struct mbuf *m0, *m1;
+ struct sockaddr_in dst;
+ struct ip *ip;
+ struct ifnet *ifp = NULL;
+ struct pf_addr naddr;
+ struct pf_src_node *sn = NULL;
+ int error = 0;
+ uint16_t ip_len, ip_off;
+
+ KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
+ KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
+ __func__));
+
+ if ((pd->pf_mtag == NULL &&
+ ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
+ pd->pf_mtag->routed++ > 3) {
+ m0 = *m;
+ *m = NULL;
+ goto bad_locked;
+ }
+
+ if (r->rt == PF_DUPTO) {
+ if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+ if (s)
+ PF_STATE_UNLOCK(s);
+ return;
+ }
+ } else {
+ if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+ if (s)
+ PF_STATE_UNLOCK(s);
+ return;
+ }
+ m0 = *m;
+ }
+
+ ip = mtod(m0, struct ip *);
+
+ bzero(&dst, sizeof(dst));
+ dst.sin_family = AF_INET;
+ dst.sin_len = sizeof(dst);
+ dst.sin_addr = ip->ip_dst;
+
+ if (r->rt == PF_FASTROUTE) {
+ struct nhop4_basic nh4;
+
+ if (s)
+ PF_STATE_UNLOCK(s);
+
+ if (fib4_lookup_nh_basic(M_GETFIB(m0), ip->ip_dst, 0,
+ m0->m_pkthdr.flowid, &nh4) != 0) {
+ KMOD_IPSTAT_INC(ips_noroute);
+ error = EHOSTUNREACH;
+ goto bad;
+ }
+
+ ifp = nh4.nh_ifp;
+ dst.sin_addr = nh4.nh_addr;
+ } else {
+ if (TAILQ_EMPTY(&r->rpool.list)) {
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
+ goto bad_locked;
+ }
+ if (s == NULL) {
+ pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
+ &naddr, NULL, &sn);
+ if (!PF_AZERO(&naddr, AF_INET))
+ dst.sin_addr.s_addr = naddr.v4.s_addr;
+ ifp = r->rpool.cur->kif ?
+ r->rpool.cur->kif->pfik_ifp : NULL;
+ } else {
+ if (!PF_AZERO(&s->rt_addr, AF_INET))
+ dst.sin_addr.s_addr =
+ s->rt_addr.v4.s_addr;
+ ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+ PF_STATE_UNLOCK(s);
+ }
+ }
+ if (ifp == NULL)
+ goto bad;
+
+ if (oifp != ifp) {
+ if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+ goto bad;
+ else if (m0 == NULL)
+ goto done;
+ if (m0->m_len < sizeof(struct ip)) {
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
+ goto bad;
+ }
+ ip = mtod(m0, struct ip *);
+ }
+
+ if (ifp->if_flags & IFF_LOOPBACK)
+ m0->m_flags |= M_SKIP_FIREWALL;
+
+ ip_len = ntohs(ip->ip_len);
+ ip_off = ntohs(ip->ip_off);
+
+ /* Copied from FreeBSD 10.0-CURRENT ip_output. */
+ m0->m_pkthdr.csum_flags |= CSUM_IP;
+ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
+ in_delayed_cksum(m0);
+ m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+#ifdef SCTP
+ if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
+ sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+ m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
+ }
+#endif
+
+ /*
+ * If small enough for interface, or the interface will take
+ * care of the fragmentation for us, we can just send directly.
+ */
+ if (ip_len <= ifp->if_mtu ||
+ (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
+ ip->ip_sum = 0;
+ if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
+ ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
+ m0->m_pkthdr.csum_flags &= ~CSUM_IP;
+ }
+ m_clrprotoflags(m0); /* Avoid confusing lower layers. */
+ error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
+ goto done;
+ }
+
+ /* Balk when DF bit is set or the interface didn't support TSO. */
+ if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
+ error = EMSGSIZE;
+ KMOD_IPSTAT_INC(ips_cantfrag);
+ if (r->rt != PF_DUPTO) {
+ icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
+ ifp->if_mtu);
+ goto done;
+ } else
+ goto bad;
+ }
+
+ error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
+ if (error)
+ goto bad;
+
+ for (; m0; m0 = m1) {
+ m1 = m0->m_nextpkt;
+ m0->m_nextpkt = NULL;
+ if (error == 0) {
+ m_clrprotoflags(m0);
+ error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
+ } else
+ m_freem(m0);
+ }
+
+ if (error == 0)
+ KMOD_IPSTAT_INC(ips_fragmented);
+
+done:
+ if (r->rt != PF_DUPTO)
+ *m = NULL;
+ return;
+
+bad_locked:
+ if (s)
+ PF_STATE_UNLOCK(s);
+bad:
+ m_freem(m0);
+ goto done;
+}
+#endif /* INET */
+
+#ifdef INET6
+static void
+pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
+ struct pf_state *s, struct pf_pdesc *pd)
+{
+ struct mbuf *m0;
+ struct sockaddr_in6 dst;
+ struct ip6_hdr *ip6;
+ struct ifnet *ifp = NULL;
+ struct pf_addr naddr;
+ struct pf_src_node *sn = NULL;
+
+ KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
+ KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
+ __func__));
+
+ if ((pd->pf_mtag == NULL &&
+ ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
+ pd->pf_mtag->routed++ > 3) {
+ m0 = *m;
+ *m = NULL;
+ goto bad_locked;
+ }
+
+ if (r->rt == PF_DUPTO) {
+ if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+ if (s)
+ PF_STATE_UNLOCK(s);
+ return;
+ }
+ } else {
+ if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+ if (s)
+ PF_STATE_UNLOCK(s);
+ return;
+ }
+ m0 = *m;
+ }
+
+ ip6 = mtod(m0, struct ip6_hdr *);
+
+ bzero(&dst, sizeof(dst));
+ dst.sin6_family = AF_INET6;
+ dst.sin6_len = sizeof(dst);
+ dst.sin6_addr = ip6->ip6_dst;
+
+ /* Cheat. XXX why only in the v6 case??? */
+ if (r->rt == PF_FASTROUTE) {
+ if (s)
+ PF_STATE_UNLOCK(s);
+ m0->m_flags |= M_SKIP_FIREWALL;
+ ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
+ *m = NULL;
+ return;
+ }
+
+ if (TAILQ_EMPTY(&r->rpool.list)) {
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
+ goto bad_locked;
+ }
+ if (s == NULL) {
+ pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
+ &naddr, NULL, &sn);
+ if (!PF_AZERO(&naddr, AF_INET6))
+ PF_ACPY((struct pf_addr *)&dst.sin6_addr,
+ &naddr, AF_INET6);
+ ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
+ } else {
+ if (!PF_AZERO(&s->rt_addr, AF_INET6))
+ PF_ACPY((struct pf_addr *)&dst.sin6_addr,
+ &s->rt_addr, AF_INET6);
+ ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+ }
+
+ if (s)
+ PF_STATE_UNLOCK(s);
+
+ if (ifp == NULL)
+ goto bad;
+
+ if (oifp != ifp) {
+ if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS)
+ goto bad;
+ else if (m0 == NULL)
+ goto done;
+ if (m0->m_len < sizeof(struct ip6_hdr)) {
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
+ __func__));
+ goto bad;
+ }
+ ip6 = mtod(m0, struct ip6_hdr *);
+ }
+
+ if (ifp->if_flags & IFF_LOOPBACK)
+ m0->m_flags |= M_SKIP_FIREWALL;
+
+ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
+ ~ifp->if_hwassist) {
+ uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
+ in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
+ m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+
+ /*
+ * If the packet is too large for the outgoing interface,
+ * send back an icmp6 error.
+ */
+ if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
+ dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+ if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
+ nd6_output_ifp(ifp, ifp, m0, &dst, NULL);
+ else {
+ in6_ifstat_inc(ifp, ifs6_in_toobig);
+ if (r->rt != PF_DUPTO)
+ icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
+ else
+ goto bad;
+ }
+
+done:
+ if (r->rt != PF_DUPTO)
+ *m = NULL;
+ return;
+
+bad_locked:
+ if (s)
+ PF_STATE_UNLOCK(s);
+bad:
+ m_freem(m0);
+ goto done;
+}
+#endif /* INET6 */
+
+/*
+ * FreeBSD supports cksum offloads for the following drivers.
+ * em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
+ * ti(4), txp(4), xl(4)
+ *
+ * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
+ * network driver performed cksum including pseudo header, need to verify
+ * csum_data
+ * CSUM_DATA_VALID :
+ * network driver performed cksum, needs to additional pseudo header
+ * cksum computation with partial csum_data(i.e. lack of H/W support for
+ * pseudo header, for instance hme(4), sk(4) and possibly gem(4))
+ *
+ * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
+ * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
+ * TCP/UDP layer.
+ * Also, set csum_data to 0xffff to force cksum validation.
+ */
+static int
+pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
+{
+ u_int16_t sum = 0;
+ int hw_assist = 0;
+ struct ip *ip;
+
+ if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
+ return (1);
+ if (m->m_pkthdr.len < off + len)
+ return (1);
+
+ switch (p) {
+ case IPPROTO_TCP:
+ if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+ if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+ sum = m->m_pkthdr.csum_data;
+ } else {
+ ip = mtod(m, struct ip *);
+ sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htonl((u_short)len +
+ m->m_pkthdr.csum_data + IPPROTO_TCP));
+ }
+ sum ^= 0xffff;
+ ++hw_assist;
+ }
+ break;
+ case IPPROTO_UDP:
+ if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+ if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
+ sum = m->m_pkthdr.csum_data;
+ } else {
+ ip = mtod(m, struct ip *);
+ sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htonl((u_short)len +
+ m->m_pkthdr.csum_data + IPPROTO_UDP));
+ }
+ sum ^= 0xffff;
+ ++hw_assist;
+ }
+ break;
+ case IPPROTO_ICMP:
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+#endif /* INET6 */
+ break;
+ default:
+ return (1);
+ }
+
+ if (!hw_assist) {
+ switch (af) {
+ case AF_INET:
+ if (p == IPPROTO_ICMP) {
+ if (m->m_len < off)
+ return (1);
+ m->m_data += off;
+ m->m_len -= off;
+ sum = in_cksum(m, len);
+ m->m_data -= off;
+ m->m_len += off;
+ } else {
+ if (m->m_len < sizeof(struct ip))
+ return (1);
+ sum = in4_cksum(m, p, off, len);
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ if (m->m_len < sizeof(struct ip6_hdr))
+ return (1);
+ sum = in6_cksum(m, p, off, len);
+ break;
+#endif /* INET6 */
+ default:
+ return (1);
+ }
+ }
+ if (sum) {
+ switch (p) {
+ case IPPROTO_TCP:
+ {
+ KMOD_TCPSTAT_INC(tcps_rcvbadsum);
+ break;
+ }
+ case IPPROTO_UDP:
+ {
+ KMOD_UDPSTAT_INC(udps_badsum);
+ break;
+ }
+#ifdef INET
+ case IPPROTO_ICMP:
+ {
+ KMOD_ICMPSTAT_INC(icps_checksum);
+ break;
+ }
+#endif
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ {
+ KMOD_ICMP6STAT_INC(icp6s_checksum);
+ break;
+ }
+#endif /* INET6 */
+ }
+ return (1);
+ } else {
+ if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
+ m->m_pkthdr.csum_flags |=
+ (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ m->m_pkthdr.csum_data = 0xffff;
+ }
+ }
+ return (0);
+}
+
+
+#ifdef INET
+int
+pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
+{
+ struct pfi_kif *kif;
+ u_short action, reason = 0, log = 0;
+ struct mbuf *m = *m0;
+ struct ip *h = NULL;
+ struct m_tag *ipfwtag;
+ struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+ struct pf_state *s = NULL;
+ struct pf_ruleset *ruleset = NULL;
+ struct pf_pdesc pd;
+ int off, dirndx, pqid = 0;
+
+ M_ASSERTPKTHDR(m);
+
+ if (!V_pf_status.running)
+ return (PF_PASS);
+
+ memset(&pd, 0, sizeof(pd));
+
+ kif = (struct pfi_kif *)ifp->if_pf_kif;
+
+ if (kif == NULL) {
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
+ return (PF_DROP);
+ }
+ if (kif->pfik_flags & PFI_IFLAG_SKIP)
+ return (PF_PASS);
+
+ if (m->m_flags & M_SKIP_FIREWALL)
+ return (PF_PASS);
+
+ pd.pf_mtag = pf_find_mtag(m);
+
+ PF_RULES_RLOCK();
+
+ if (ip_divert_ptr != NULL &&
+ ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
+ struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
+ if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
+ if (pd.pf_mtag == NULL &&
+ ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+ action = PF_DROP;
+ goto done;
+ }
+ pd.pf_mtag->flags |= PF_PACKET_LOOPED;
+ m_tag_delete(m, ipfwtag);
+ }
+ if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
+ m->m_flags |= M_FASTFWD_OURS;
+ pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
+ }
+ } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
+ /* We do IP header normalization and packet reassembly here */
+ action = PF_DROP;
+ goto done;
+ }
+ m = *m0; /* pf_normalize messes with m0 */
+ h = mtod(m, struct ip *);
+
+ off = h->ip_hl << 2;
+ if (off < (int)sizeof(struct ip)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_SHORT);
+ log = 1;
+ goto done;
+ }
+
+ pd.src = (struct pf_addr *)&h->ip_src;
+ pd.dst = (struct pf_addr *)&h->ip_dst;
+ pd.sport = pd.dport = NULL;
+ pd.ip_sum = &h->ip_sum;
+ pd.proto_sum = NULL;
+ pd.proto = h->ip_p;
+ pd.dir = dir;
+ pd.sidx = (dir == PF_IN) ? 0 : 1;
+ pd.didx = (dir == PF_IN) ? 1 : 0;
+ pd.af = AF_INET;
+ pd.tos = h->ip_tos;
+ pd.tot_len = ntohs(h->ip_len);
+
+ /* handle fragments that didn't get reassembled by normalization */
+ if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
+ action = pf_test_fragment(&r, dir, kif, m, h,
+ &pd, &a, &ruleset);
+ goto done;
+ }
+
+ switch (h->ip_p) {
+
+ case IPPROTO_TCP: {
+ struct tcphdr th;
+
+ pd.hdr.tcp = &th;
+ if (!pf_pull_hdr(m, off, &th, sizeof(th),
+ &action, &reason, AF_INET)) {
+ log = action != PF_PASS;
+ goto done;
+ }
+ pd.p_len = pd.tot_len - off - (th.th_off << 2);
+ if ((th.th_flags & TH_ACK) && pd.p_len == 0)
+ pqid = 1;
+ action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+ if (action == PF_DROP)
+ goto done;
+ action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+ &reason);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+ case IPPROTO_UDP: {
+ struct udphdr uh;
+
+ pd.hdr.udp = &uh;
+ if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+ &action, &reason, AF_INET)) {
+ log = action != PF_PASS;
+ goto done;
+ }
+ if (uh.uh_dport == 0 ||
+ ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+ ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_SHORT);
+ goto done;
+ }
+ action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+ case IPPROTO_ICMP: {
+ struct icmp ih;
+
+ pd.hdr.icmp = &ih;
+ if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
+ &action, &reason, AF_INET)) {
+ log = action != PF_PASS;
+ goto done;
+ }
+ action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
+ &reason);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+#ifdef INET6
+ case IPPROTO_ICMPV6: {
+ action = PF_DROP;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
+ goto done;
+ }
+#endif
+
+ default:
+ action = pf_test_state_other(&s, dir, kif, m, &pd);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+done:
+ PF_RULES_RUNLOCK();
+ if (action == PF_PASS && h->ip_hl > 5 &&
+ !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_IPOPTIONS);
+ log = r->log;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: dropping packet with ip options\n"));
+ }
+
+ if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ }
+ if (r->rtableid >= 0)
+ M_SETFIB(m, r->rtableid);
+
+ if (r->scrub_flags & PFSTATE_SETPRIO) {
+ if (pd.tos & IPTOS_LOWDELAY)
+ pqid = 1;
+ if (pf_ieee8021q_setpcp(m, r->set_prio[pqid])) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ log = 1;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: failed to allocate 802.1q mtag\n"));
+ }
+ }
+
+#ifdef ALTQ
+ if (action == PF_PASS && r->qid) {
+ if (pd.pf_mtag == NULL &&
+ ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ } else {
+ if (s != NULL)
+ pd.pf_mtag->qid_hash = pf_state_hash(s);
+ if (pqid || (pd.tos & IPTOS_LOWDELAY))
+ pd.pf_mtag->qid = r->pqid;
+ else
+ pd.pf_mtag->qid = r->qid;
+ /* Add hints for ecn. */
+ pd.pf_mtag->hdr = h;
+ }
+
+ }
+#endif /* ALTQ */
+
+ /*
+ * connections redirected to loopback should not match sockets
+ * bound specifically to loopback due to security implications,
+ * see tcp_input() and in_pcblookup_listen().
+ */
+ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+ pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+ (s->nat_rule.ptr->action == PF_RDR ||
+ s->nat_rule.ptr->action == PF_BINAT) &&
+ (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+ m->m_flags |= M_SKIP_FIREWALL;
+
+ if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
+ !PACKET_LOOPED(&pd)) {
+
+ ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
+ sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
+ if (ipfwtag != NULL) {
+ ((struct ipfw_rule_ref *)(ipfwtag+1))->info =
+ ntohs(r->divert.port);
+ ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
+
+ if (s)
+ PF_STATE_UNLOCK(s);
+
+ m_tag_prepend(m, ipfwtag);
+ if (m->m_flags & M_FASTFWD_OURS) {
+ if (pd.pf_mtag == NULL &&
+ ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ log = 1;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: failed to allocate tag\n"));
+ } else {
+ pd.pf_mtag->flags |=
+ PF_FASTFWD_OURS_PRESENT;
+ m->m_flags &= ~M_FASTFWD_OURS;
+ }
+ }
+ ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT);
+ *m0 = NULL;
+
+ return (action);
+ } else {
+ /* XXX: ipfw has the same behaviour! */
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ log = 1;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: failed to allocate divert tag\n"));
+ }
+ }
+
+ if (log) {
+ struct pf_rule *lr;
+
+ if (s != NULL && s->nat_rule.ptr != NULL &&
+ s->nat_rule.ptr->log & PF_LOG_ALL)
+ lr = s->nat_rule.ptr;
+ else
+ lr = r;
+ PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
+ (s == NULL));
+ }
+
+ kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+ kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
+
+ if (action == PF_PASS || r->action == PF_DROP) {
+ dirndx = (dir == PF_OUT);
+ r->packets[dirndx]++;
+ r->bytes[dirndx] += pd.tot_len;
+ if (a != NULL) {
+ a->packets[dirndx]++;
+ a->bytes[dirndx] += pd.tot_len;
+ }
+ if (s != NULL) {
+ if (s->nat_rule.ptr != NULL) {
+ s->nat_rule.ptr->packets[dirndx]++;
+ s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+ }
+ if (s->src_node != NULL) {
+ s->src_node->packets[dirndx]++;
+ s->src_node->bytes[dirndx] += pd.tot_len;
+ }
+ if (s->nat_src_node != NULL) {
+ s->nat_src_node->packets[dirndx]++;
+ s->nat_src_node->bytes[dirndx] += pd.tot_len;
+ }
+ dirndx = (dir == s->direction) ? 0 : 1;
+ s->packets[dirndx]++;
+ s->bytes[dirndx] += pd.tot_len;
+ }
+ tr = r;
+ nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+ if (nr != NULL && r == &V_pf_default_rule)
+ tr = nr;
+ if (tr->src.addr.type == PF_ADDR_TABLE)
+ pfr_update_stats(tr->src.addr.p.tbl,
+ (s == NULL) ? pd.src :
+ &s->key[(s->direction == PF_IN)]->
+ addr[(s->direction == PF_OUT)],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->src.neg);
+ if (tr->dst.addr.type == PF_ADDR_TABLE)
+ pfr_update_stats(tr->dst.addr.p.tbl,
+ (s == NULL) ? pd.dst :
+ &s->key[(s->direction == PF_IN)]->
+ addr[(s->direction == PF_IN)],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->dst.neg);
+ }
+
+ switch (action) {
+ case PF_SYNPROXY_DROP:
+ m_freem(*m0);
+ case PF_DEFER:
+ *m0 = NULL;
+ action = PF_PASS;
+ break;
+ case PF_DROP:
+ m_freem(*m0);
+ *m0 = NULL;
+ break;
+ default:
+ /* pf_route() returns unlocked. */
+ if (r->rt) {
+ pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
+ return (action);
+ }
+ break;
+ }
+ if (s)
+ PF_STATE_UNLOCK(s);
+
+ return (action);
+}
+#endif /* INET */
+
+#ifdef INET6
+int
+pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
+{
+ struct pfi_kif *kif;
+ u_short action, reason = 0, log = 0;
+ struct mbuf *m = *m0, *n = NULL;
+ struct m_tag *mtag;
+ struct ip6_hdr *h = NULL;
+ struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
+ struct pf_state *s = NULL;
+ struct pf_ruleset *ruleset = NULL;
+ struct pf_pdesc pd;
+ int off, terminal = 0, dirndx, rh_cnt = 0, pqid = 0;
+ int fwdir = dir;
+
+ M_ASSERTPKTHDR(m);
+
+ /* Detect packet forwarding.
+ * If the input interface is different from the output interface we're
+ * forwarding.
+ * We do need to be careful about bridges. If the
+ * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a
+ * bridge, so if the input interface is a bridge member and the output
+ * interface is its bridge or a member of the same bridge we're not
+ * actually forwarding but bridging.
+ */
+ if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif &&
+ (m->m_pkthdr.rcvif->if_bridge == NULL ||
+ (m->m_pkthdr.rcvif->if_bridge != ifp->if_softc &&
+ m->m_pkthdr.rcvif->if_bridge != ifp->if_bridge)))
+ fwdir = PF_FWD;
+
+ if (!V_pf_status.running)
+ return (PF_PASS);
+
+ memset(&pd, 0, sizeof(pd));
+ pd.pf_mtag = pf_find_mtag(m);
+
+ if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
+ return (PF_PASS);
+
+ kif = (struct pfi_kif *)ifp->if_pf_kif;
+ if (kif == NULL) {
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
+ return (PF_DROP);
+ }
+ if (kif->pfik_flags & PFI_IFLAG_SKIP)
+ return (PF_PASS);
+
+ if (m->m_flags & M_SKIP_FIREWALL)
+ return (PF_PASS);
+
+ PF_RULES_RLOCK();
+
+ /* We do IP header normalization and packet reassembly here */
+ if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
+ action = PF_DROP;
+ goto done;
+ }
+ m = *m0; /* pf_normalize messes with m0 */
+ h = mtod(m, struct ip6_hdr *);
+
+#if 1
+ /*
+ * we do not support jumbogram yet. if we keep going, zero ip6_plen
+ * will do something bad, so drop the packet for now.
+ */
+ if (htons(h->ip6_plen) == 0) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_NORM); /*XXX*/
+ goto done;
+ }
+#endif
+
+ pd.src = (struct pf_addr *)&h->ip6_src;
+ pd.dst = (struct pf_addr *)&h->ip6_dst;
+ pd.sport = pd.dport = NULL;
+ pd.ip_sum = NULL;
+ pd.proto_sum = NULL;
+ pd.dir = dir;
+ pd.sidx = (dir == PF_IN) ? 0 : 1;
+ pd.didx = (dir == PF_IN) ? 1 : 0;
+ pd.af = AF_INET6;
+ pd.tos = 0;
+ pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
+
+ off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
+ pd.proto = h->ip6_nxt;
+ do {
+ switch (pd.proto) {
+ case IPPROTO_FRAGMENT:
+ action = pf_test_fragment(&r, dir, kif, m, h,
+ &pd, &a, &ruleset);
+ if (action == PF_DROP)
+ REASON_SET(&reason, PFRES_FRAG);
+ goto done;
+ case IPPROTO_ROUTING: {
+ struct ip6_rthdr rthdr;
+
+ if (rh_cnt++) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: IPv6 more than one rthdr\n"));
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_IPOPTIONS);
+ log = 1;
+ goto done;
+ }
+ if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
+ &reason, pd.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: IPv6 short rthdr\n"));
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_SHORT);
+ log = 1;
+ goto done;
+ }
+ if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: IPv6 rthdr0\n"));
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_IPOPTIONS);
+ log = 1;
+ goto done;
+ }
+ /* FALLTHROUGH */
+ }
+ case IPPROTO_AH:
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS: {
+ /* get next header and header length */
+ struct ip6_ext opt6;
+
+ if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
+ NULL, &reason, pd.af)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: IPv6 short opt\n"));
+ action = PF_DROP;
+ log = 1;
+ goto done;
+ }
+ if (pd.proto == IPPROTO_AH)
+ off += (opt6.ip6e_len + 2) * 4;
+ else
+ off += (opt6.ip6e_len + 1) * 8;
+ pd.proto = opt6.ip6e_nxt;
+ /* goto the next header */
+ break;
+ }
+ default:
+ terminal++;
+ break;
+ }
+ } while (!terminal);
+
+ /* if there's no routing header, use unmodified mbuf for checksumming */
+ if (!n)
+ n = m;
+
+ switch (pd.proto) {
+
+ case IPPROTO_TCP: {
+ struct tcphdr th;
+
+ pd.hdr.tcp = &th;
+ if (!pf_pull_hdr(m, off, &th, sizeof(th),
+ &action, &reason, AF_INET6)) {
+ log = action != PF_PASS;
+ goto done;
+ }
+ pd.p_len = pd.tot_len - off - (th.th_off << 2);
+ action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
+ if (action == PF_DROP)
+ goto done;
+ action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
+ &reason);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+ case IPPROTO_UDP: {
+ struct udphdr uh;
+
+ pd.hdr.udp = &uh;
+ if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
+ &action, &reason, AF_INET6)) {
+ log = action != PF_PASS;
+ goto done;
+ }
+ if (uh.uh_dport == 0 ||
+ ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
+ ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_SHORT);
+ goto done;
+ }
+ action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+ case IPPROTO_ICMP: {
+ action = PF_DROP;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
+ goto done;
+ }
+
+ case IPPROTO_ICMPV6: {
+ struct icmp6_hdr ih;
+
+ pd.hdr.icmp6 = &ih;
+ if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
+ &action, &reason, AF_INET6)) {
+ log = action != PF_PASS;
+ goto done;
+ }
+ action = pf_test_state_icmp(&s, dir, kif,
+ m, off, h, &pd, &reason);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+ default:
+ action = pf_test_state_other(&s, dir, kif, m, &pd);
+ if (action == PF_PASS) {
+ if (pfsync_update_state_ptr != NULL)
+ pfsync_update_state_ptr(s);
+ r = s->rule.ptr;
+ a = s->anchor.ptr;
+ log = s->log;
+ } else if (s == NULL)
+ action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
+ &a, &ruleset, inp);
+ break;
+ }
+
+done:
+ PF_RULES_RUNLOCK();
+ if (n != m) {
+ m_freem(n);
+ n = NULL;
+ }
+
+ /* handle dangerous IPv6 extension headers. */
+ if (action == PF_PASS && rh_cnt &&
+ !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_IPOPTIONS);
+ log = r->log;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: dropping packet with dangerous v6 headers\n"));
+ }
+
+ if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ }
+ if (r->rtableid >= 0)
+ M_SETFIB(m, r->rtableid);
+
+ if (r->scrub_flags & PFSTATE_SETPRIO) {
+ if (pd.tos & IPTOS_LOWDELAY)
+ pqid = 1;
+ if (pf_ieee8021q_setpcp(m, r->set_prio[pqid])) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ log = 1;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: failed to allocate 802.1q mtag\n"));
+ }
+ }
+
+#ifdef ALTQ
+ if (action == PF_PASS && r->qid) {
+ if (pd.pf_mtag == NULL &&
+ ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
+ action = PF_DROP;
+ REASON_SET(&reason, PFRES_MEMORY);
+ } else {
+ if (s != NULL)
+ pd.pf_mtag->qid_hash = pf_state_hash(s);
+ if (pd.tos & IPTOS_LOWDELAY)
+ pd.pf_mtag->qid = r->pqid;
+ else
+ pd.pf_mtag->qid = r->qid;
+ /* Add hints for ecn. */
+ pd.pf_mtag->hdr = h;
+ }
+ }
+#endif /* ALTQ */
+
+ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
+ pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
+ (s->nat_rule.ptr->action == PF_RDR ||
+ s->nat_rule.ptr->action == PF_BINAT) &&
+ IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
+ m->m_flags |= M_SKIP_FIREWALL;
+
+ /* XXX: Anybody working on it?! */
+ if (r->divert.port)
+ printf("pf: divert(9) is not supported for IPv6\n");
+
+ if (log) {
+ struct pf_rule *lr;
+
+ if (s != NULL && s->nat_rule.ptr != NULL &&
+ s->nat_rule.ptr->log & PF_LOG_ALL)
+ lr = s->nat_rule.ptr;
+ else
+ lr = r;
+ PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
+ &pd, (s == NULL));
+ }
+
+ kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
+ kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
+
+ if (action == PF_PASS || r->action == PF_DROP) {
+ dirndx = (dir == PF_OUT);
+ r->packets[dirndx]++;
+ r->bytes[dirndx] += pd.tot_len;
+ if (a != NULL) {
+ a->packets[dirndx]++;
+ a->bytes[dirndx] += pd.tot_len;
+ }
+ if (s != NULL) {
+ if (s->nat_rule.ptr != NULL) {
+ s->nat_rule.ptr->packets[dirndx]++;
+ s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
+ }
+ if (s->src_node != NULL) {
+ s->src_node->packets[dirndx]++;
+ s->src_node->bytes[dirndx] += pd.tot_len;
+ }
+ if (s->nat_src_node != NULL) {
+ s->nat_src_node->packets[dirndx]++;
+ s->nat_src_node->bytes[dirndx] += pd.tot_len;
+ }
+ dirndx = (dir == s->direction) ? 0 : 1;
+ s->packets[dirndx]++;
+ s->bytes[dirndx] += pd.tot_len;
+ }
+ tr = r;
+ nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
+ if (nr != NULL && r == &V_pf_default_rule)
+ tr = nr;
+ if (tr->src.addr.type == PF_ADDR_TABLE)
+ pfr_update_stats(tr->src.addr.p.tbl,
+ (s == NULL) ? pd.src :
+ &s->key[(s->direction == PF_IN)]->addr[0],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->src.neg);
+ if (tr->dst.addr.type == PF_ADDR_TABLE)
+ pfr_update_stats(tr->dst.addr.p.tbl,
+ (s == NULL) ? pd.dst :
+ &s->key[(s->direction == PF_IN)]->addr[1],
+ pd.af, pd.tot_len, dir == PF_OUT,
+ r->action == PF_PASS, tr->dst.neg);
+ }
+
+ switch (action) {
+ case PF_SYNPROXY_DROP:
+ m_freem(*m0);
+ case PF_DEFER:
+ *m0 = NULL;
+ action = PF_PASS;
+ break;
+ case PF_DROP:
+ m_freem(*m0);
+ *m0 = NULL;
+ break;
+ default:
+ /* pf_route6() returns unlocked. */
+ if (r->rt) {
+ pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
+ return (action);
+ }
+ break;
+ }
+
+ if (s)
+ PF_STATE_UNLOCK(s);
+
+ /* If reassembled packet passed, create new fragments. */
+ if (action == PF_PASS && *m0 && fwdir == PF_FWD &&
+ (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL)
+ action = pf_refragment6(ifp, m0, mtag);
+
+ return (action);
+}
+#endif /* INET6 */
diff --git a/freebsd/sys/netpfil/pf/pf.h b/freebsd/sys/netpfil/pf/pf.h
new file mode 100644
index 00000000..ac0e0fb9
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _NET_PF_H_
+#define _NET_PF_H_
+
+#define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0)
+#define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1)
+
+#define PF_MD5_DIGEST_LENGTH 16
+#ifdef MD5_DIGEST_LENGTH
+#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH
+#error
+#endif
+#endif
+
+enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD };
+enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
+ PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER };
+enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
+ PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX };
+enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT,
+ PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG };
+enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY };
+enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL,
+ PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER,
+ PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET };
+enum { PF_GET_NONE, PF_GET_CLR_CNTR };
+enum { PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH };
+
+/*
+ * Note about PFTM_*: real indices into pf_rule.timeout[] come before
+ * PFTM_MAX, special cases afterwards. See pf_state_expires().
+ */
+enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED,
+ PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED,
+ PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE,
+ PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY,
+ PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE,
+ PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL,
+ PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE,
+ PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED };
+
+/* PFTM default values */
+#define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */
+#define PFTM_TCP_OPENING_VAL 30 /* No response yet */
+#define PFTM_TCP_ESTABLISHED_VAL 24*60*60/* Established */
+#define PFTM_TCP_CLOSING_VAL 15 * 60 /* Half closed */
+#define PFTM_TCP_FIN_WAIT_VAL 45 /* Got both FINs */
+#define PFTM_TCP_CLOSED_VAL 90 /* Got a RST */
+#define PFTM_UDP_FIRST_PACKET_VAL 60 /* First UDP packet */
+#define PFTM_UDP_SINGLE_VAL 30 /* Unidirectional */
+#define PFTM_UDP_MULTIPLE_VAL 60 /* Bidirectional */
+#define PFTM_ICMP_FIRST_PACKET_VAL 20 /* First ICMP packet */
+#define PFTM_ICMP_ERROR_REPLY_VAL 10 /* Got error response */
+#define PFTM_OTHER_FIRST_PACKET_VAL 60 /* First packet */
+#define PFTM_OTHER_SINGLE_VAL 30 /* Unidirectional */
+#define PFTM_OTHER_MULTIPLE_VAL 60 /* Bidirectional */
+#define PFTM_FRAG_VAL 30 /* Fragment expire */
+#define PFTM_INTERVAL_VAL 10 /* Expire interval */
+#define PFTM_SRC_NODE_VAL 0 /* Source tracking */
+#define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */
+
+enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO };
+enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
+ PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
+#define PF_POOL_IDMASK 0x0f
+enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM,
+ PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN };
+enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
+ PF_ADDR_TABLE, PF_ADDR_URPFFAILED,
+ PF_ADDR_RANGE };
+#define PF_POOL_TYPEMASK 0x0f
+#define PF_POOL_STICKYADDR 0x20
+#define PF_WSCALE_FLAG 0x80
+#define PF_WSCALE_MASK 0x0f
+
+#define PF_LOG 0x01
+#define PF_LOG_ALL 0x02
+#define PF_LOG_SOCKET_LOOKUP 0x04
+
+/* Reasons code for passing/dropping a packet */
+#define PFRES_MATCH 0 /* Explicit match of a rule */
+#define PFRES_BADOFF 1 /* Bad offset for pull_hdr */
+#define PFRES_FRAG 2 /* Dropping following fragment */
+#define PFRES_SHORT 3 /* Dropping short packet */
+#define PFRES_NORM 4 /* Dropping by normalizer */
+#define PFRES_MEMORY 5 /* Dropped due to lacking mem */
+#define PFRES_TS 6 /* Bad TCP Timestamp (RFC1323) */
+#define PFRES_CONGEST 7 /* Congestion (of ipintrq) */
+#define PFRES_IPOPTIONS 8 /* IP option */
+#define PFRES_PROTCKSUM 9 /* Protocol checksum invalid */
+#define PFRES_BADSTATE 10 /* State mismatch */
+#define PFRES_STATEINS 11 /* State insertion failure */
+#define PFRES_MAXSTATES 12 /* State limit */
+#define PFRES_SRCLIMIT 13 /* Source node/conn limit */
+#define PFRES_SYNPROXY 14 /* SYN proxy */
+#define PFRES_MAPFAILED 15 /* pf_map_addr() failed */
+#define PFRES_MAX 16 /* total+1 */
+
+#define PFRES_NAMES { \
+ "match", \
+ "bad-offset", \
+ "fragment", \
+ "short", \
+ "normalize", \
+ "memory", \
+ "bad-timestamp", \
+ "congestion", \
+ "ip-option", \
+ "proto-cksum", \
+ "state-mismatch", \
+ "state-insert", \
+ "state-limit", \
+ "src-limit", \
+ "synproxy", \
+ "map-failed", \
+ NULL \
+}
+
+/* Counters for other things we want to keep track of */
+#define LCNT_STATES 0 /* states */
+#define LCNT_SRCSTATES 1 /* max-src-states */
+#define LCNT_SRCNODES 2 /* max-src-nodes */
+#define LCNT_SRCCONN 3 /* max-src-conn */
+#define LCNT_SRCCONNRATE 4 /* max-src-conn-rate */
+#define LCNT_OVERLOAD_TABLE 5 /* entry added to overload table */
+#define LCNT_OVERLOAD_FLUSH 6 /* state entries flushed */
+#define LCNT_MAX 7 /* total+1 */
+
+#define LCNT_NAMES { \
+ "max states per rule", \
+ "max-src-states", \
+ "max-src-nodes", \
+ "max-src-conn", \
+ "max-src-conn-rate", \
+ "overload table insertion", \
+ "overload flush states", \
+ NULL \
+}
+
+/* state operation counters */
+#define FCNT_STATE_SEARCH 0
+#define FCNT_STATE_INSERT 1
+#define FCNT_STATE_REMOVALS 2
+#define FCNT_MAX 3
+
+/* src_node operation counters */
+#define SCNT_SRC_NODE_SEARCH 0
+#define SCNT_SRC_NODE_INSERT 1
+#define SCNT_SRC_NODE_REMOVALS 2
+#define SCNT_MAX 3
+
+#define PF_TABLE_NAME_SIZE 32
+#define PF_QNAME_SIZE 64
+
+struct pf_status {
+ uint64_t counters[PFRES_MAX];
+ uint64_t lcounters[LCNT_MAX];
+ uint64_t fcounters[FCNT_MAX];
+ uint64_t scounters[SCNT_MAX];
+ uint64_t pcounters[2][2][3];
+ uint64_t bcounters[2][2];
+ uint32_t running;
+ uint32_t states;
+ uint32_t src_nodes;
+ uint32_t since;
+ uint32_t debug;
+ uint32_t hostid;
+ char ifname[IFNAMSIZ];
+ uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
+};
+
+#endif /* _NET_PF_H_ */
diff --git a/freebsd/sys/netpfil/pf/pf_altq.h b/freebsd/sys/netpfil/pf/pf_altq.h
new file mode 100644
index 00000000..3efd4ff7
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_altq.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _NET_PF_ALTQ_H_
+#define _NET_PF_ALTQ_H_
+
+struct cbq_opts {
+ u_int minburst;
+ u_int maxburst;
+ u_int pktsize;
+ u_int maxpktsize;
+ u_int ns_per_byte;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ int flags;
+};
+
+struct codel_opts {
+ u_int target;
+ u_int interval;
+ int ecn;
+};
+
+struct priq_opts {
+ int flags;
+};
+
+struct hfsc_opts {
+ /* real-time service curve */
+ u_int rtsc_m1; /* slope of the 1st segment in bps */
+ u_int rtsc_d; /* the x-projection of m1 in msec */
+ u_int rtsc_m2; /* slope of the 2nd segment in bps */
+ /* link-sharing service curve */
+ u_int lssc_m1;
+ u_int lssc_d;
+ u_int lssc_m2;
+ /* upper-limit service curve */
+ u_int ulsc_m1;
+ u_int ulsc_d;
+ u_int ulsc_m2;
+ int flags;
+};
+
+/*
+ * XXX this needs some work
+ */
+struct fairq_opts {
+ u_int nbuckets;
+ u_int hogs_m1;
+ int flags;
+
+ /* link sharing service curve */
+ u_int lssc_m1;
+ u_int lssc_d;
+ u_int lssc_m2;
+};
+
+struct pf_altq {
+ char ifname[IFNAMSIZ];
+
+ void *altq_disc; /* discipline-specific state */
+ TAILQ_ENTRY(pf_altq) entries;
+
+ /* scheduler spec */
+ uint8_t scheduler; /* scheduler type */
+ uint16_t tbrsize; /* tokenbucket regulator size */
+ uint32_t ifbandwidth; /* interface bandwidth */
+
+ /* queue spec */
+ char qname[PF_QNAME_SIZE]; /* queue name */
+ char parent[PF_QNAME_SIZE]; /* parent name */
+ uint32_t parent_qid; /* parent queue id */
+ uint32_t bandwidth; /* queue bandwidth */
+ uint8_t priority; /* priority */
+ uint8_t local_flags; /* dynamic interface */
+#define PFALTQ_FLAG_IF_REMOVED 0x01
+
+ uint16_t qlimit; /* queue size limit */
+ uint16_t flags; /* misc flags */
+ union {
+ struct cbq_opts cbq_opts;
+ struct codel_opts codel_opts;
+ struct priq_opts priq_opts;
+ struct hfsc_opts hfsc_opts;
+ struct fairq_opts fairq_opts;
+ } pq_u;
+
+ uint32_t qid; /* return value */
+};
+
+#endif /* _NET_PF_ALTQ_H_ */
diff --git a/freebsd/sys/netpfil/pf/pf_if.c b/freebsd/sys/netpfil/pf/pf_if.c
new file mode 100644
index 00000000..d1c54b22
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_if.c
@@ -0,0 +1,924 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2003 Cedric Berger
+ * Copyright (c) 2005 Henning Brauer <henning@openbsd.org>
+ * Copyright (c) 2005 Ryan McBride <mcbride@openbsd.org>
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/eventhandler.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/route.h>
+
+VNET_DEFINE(struct pfi_kif *, pfi_all);
+static VNET_DEFINE(long, pfi_update);
+#define V_pfi_update VNET(pfi_update)
+#define PFI_BUFFER_MAX 0x10000
+
+VNET_DECLARE(int, pf_vnet_active);
+#define V_pf_vnet_active VNET(pf_vnet_active)
+
+static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
+static VNET_DEFINE(int, pfi_buffer_cnt);
+static VNET_DEFINE(int, pfi_buffer_max);
+#define V_pfi_buffer VNET(pfi_buffer)
+#define V_pfi_buffer_cnt VNET(pfi_buffer_cnt)
+#define V_pfi_buffer_max VNET(pfi_buffer_max)
+
+eventhandler_tag pfi_attach_cookie;
+eventhandler_tag pfi_detach_cookie;
+eventhandler_tag pfi_attach_group_cookie;
+eventhandler_tag pfi_change_group_cookie;
+eventhandler_tag pfi_detach_group_cookie;
+eventhandler_tag pfi_ifaddr_event_cookie;
+
+static void pfi_attach_ifnet(struct ifnet *);
+static void pfi_attach_ifgroup(struct ifg_group *);
+
+static void pfi_kif_update(struct pfi_kif *);
+static void pfi_dynaddr_update(struct pfi_dynaddr *dyn);
+static void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int,
+ int);
+static void pfi_instance_add(struct ifnet *, int, int);
+static void pfi_address_add(struct sockaddr *, int, int);
+static int pfi_if_compare(struct pfi_kif *, struct pfi_kif *);
+static int pfi_skip_if(const char *, struct pfi_kif *);
+static int pfi_unmask(void *);
+static void pfi_attach_ifnet_event(void * __unused, struct ifnet *);
+static void pfi_detach_ifnet_event(void * __unused, struct ifnet *);
+static void pfi_attach_group_event(void *, struct ifg_group *);
+static void pfi_change_group_event(void *, char *);
+static void pfi_detach_group_event(void *, struct ifg_group *);
+static void pfi_ifaddr_event(void * __unused, struct ifnet *);
+
+RB_HEAD(pfi_ifhead, pfi_kif);
+static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
+static VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
+#define V_pfi_ifs VNET(pfi_ifs)
+
+#define PFI_BUFFER_MAX 0x10000
+MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database");
+
+LIST_HEAD(pfi_list, pfi_kif);
+static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs);
+#define V_pfi_unlinked_kifs VNET(pfi_unlinked_kifs)
+static struct mtx pfi_unlnkdkifs_mtx;
+MTX_SYSINIT(pfi_unlnkdkifs_mtx, &pfi_unlnkdkifs_mtx, "pf unlinked interfaces",
+ MTX_DEF);
+
+void
+pfi_initialize_vnet(void)
+{
+ struct ifg_group *ifg;
+ struct ifnet *ifp;
+ struct pfi_kif *kif;
+
+ V_pfi_buffer_max = 64;
+ V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer),
+ PFI_MTYPE, M_WAITOK);
+
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+ PF_RULES_WLOCK();
+ V_pfi_all = pfi_kif_attach(kif, IFG_ALL);
+ PF_RULES_WUNLOCK();
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
+ pfi_attach_ifgroup(ifg);
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link)
+ pfi_attach_ifnet(ifp);
+ IFNET_RUNLOCK();
+}
+
+void
+pfi_initialize(void)
+{
+
+ pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event,
+ pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+ pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
+ pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event,
+ pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+ pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event,
+ pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+ pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event,
+ pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
+ pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
+ pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+void
+pfi_cleanup_vnet(void)
+{
+ struct pfi_kif *kif;
+
+ PF_RULES_WASSERT();
+
+ V_pfi_all = NULL;
+ while ((kif = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
+ RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
+ if (kif->pfik_group)
+ kif->pfik_group->ifg_pf_kif = NULL;
+ if (kif->pfik_ifp)
+ kif->pfik_ifp->if_pf_kif = NULL;
+ free(kif, PFI_MTYPE);
+ }
+
+ mtx_lock(&pfi_unlnkdkifs_mtx);
+ while ((kif = LIST_FIRST(&V_pfi_unlinked_kifs))) {
+ LIST_REMOVE(kif, pfik_list);
+ free(kif, PFI_MTYPE);
+ }
+ mtx_unlock(&pfi_unlnkdkifs_mtx);
+
+ free(V_pfi_buffer, PFI_MTYPE);
+}
+
+void
+pfi_cleanup(void)
+{
+
+ EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
+ EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie);
+ EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
+ EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
+ EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
+}
+
+struct pfi_kif *
+pfi_kif_find(const char *kif_name)
+{
+ struct pfi_kif_cmp s;
+
+ PF_RULES_ASSERT();
+
+ bzero(&s, sizeof(s));
+ strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
+
+ return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s));
+}
+
+struct pfi_kif *
+pfi_kif_attach(struct pfi_kif *kif, const char *kif_name)
+{
+ struct pfi_kif *kif1;
+
+ PF_RULES_WASSERT();
+ KASSERT(kif != NULL, ("%s: null kif", __func__));
+
+ kif1 = pfi_kif_find(kif_name);
+ if (kif1 != NULL) {
+ free(kif, PFI_MTYPE);
+ return (kif1);
+ }
+
+ bzero(kif, sizeof(*kif));
+ strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
+ /*
+ * It seems that the value of time_second is in unintialzied state
+ * when pf sets interface statistics clear time in boot phase if pf
+ * was statically linked to kernel. Instead of setting the bogus
+ * time value have pfi_get_ifaces handle this case. In
+ * pfi_get_ifaces it uses time_second if it sees the time is 0.
+ */
+ kif->pfik_tzero = time_second > 1 ? time_second : 0;
+ TAILQ_INIT(&kif->pfik_dynaddrs);
+
+ RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif);
+
+ return (kif);
+}
+
+void
+pfi_kif_ref(struct pfi_kif *kif)
+{
+
+ PF_RULES_WASSERT();
+ kif->pfik_rulerefs++;
+}
+
+void
+pfi_kif_unref(struct pfi_kif *kif)
+{
+
+ PF_RULES_WASSERT();
+ KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif));
+
+ kif->pfik_rulerefs--;
+
+ if (kif->pfik_rulerefs > 0)
+ return;
+
+ /* kif referencing an existing ifnet or group should exist. */
+ if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
+ return;
+
+ RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
+
+ kif->pfik_flags |= PFI_IFLAG_REFS;
+
+ mtx_lock(&pfi_unlnkdkifs_mtx);
+ LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list);
+ mtx_unlock(&pfi_unlnkdkifs_mtx);
+}
+
+void
+pfi_kif_purge(void)
+{
+ struct pfi_kif *kif, *kif1;
+
+ /*
+ * Do naive mark-and-sweep garbage collecting of old kifs.
+ * Reference flag is raised by pf_purge_expired_states().
+ */
+ mtx_lock(&pfi_unlnkdkifs_mtx);
+ LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) {
+ if (!(kif->pfik_flags & PFI_IFLAG_REFS)) {
+ LIST_REMOVE(kif, pfik_list);
+ free(kif, PFI_MTYPE);
+ } else
+ kif->pfik_flags &= ~PFI_IFLAG_REFS;
+ }
+ mtx_unlock(&pfi_unlnkdkifs_mtx);
+}
+
+int
+pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
+{
+ struct ifg_list *p;
+
+ if (rule_kif == NULL || rule_kif == packet_kif)
+ return (1);
+
+ if (rule_kif->pfik_group != NULL)
+ /* XXXGL: locking? */
+ TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
+ if (p->ifgl_group == rule_kif->pfik_group)
+ return (1);
+
+ return (0);
+}
+
+static void
+pfi_attach_ifnet(struct ifnet *ifp)
+{
+ struct pfi_kif *kif;
+
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+
+ PF_RULES_WLOCK();
+ V_pfi_update++;
+ kif = pfi_kif_attach(kif, ifp->if_xname);
+
+ kif->pfik_ifp = ifp;
+ ifp->if_pf_kif = kif;
+
+ pfi_kif_update(kif);
+ PF_RULES_WUNLOCK();
+}
+
+static void
+pfi_attach_ifgroup(struct ifg_group *ifg)
+{
+ struct pfi_kif *kif;
+
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+
+ PF_RULES_WLOCK();
+ V_pfi_update++;
+ kif = pfi_kif_attach(kif, ifg->ifg_group);
+
+ kif->pfik_group = ifg;
+ ifg->ifg_pf_kif = kif;
+ PF_RULES_WUNLOCK();
+}
+
+int
+pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
+{
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ switch (dyn->pfid_acnt4) {
+ case 0:
+ return (0);
+ case 1:
+ return (PF_MATCHA(0, &dyn->pfid_addr4,
+ &dyn->pfid_mask4, a, AF_INET));
+ default:
+ return (pfr_match_addr(dyn->pfid_kt, a, AF_INET));
+ }
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ switch (dyn->pfid_acnt6) {
+ case 0:
+ return (0);
+ case 1:
+ return (PF_MATCHA(0, &dyn->pfid_addr6,
+ &dyn->pfid_mask6, a, AF_INET6));
+ default:
+ return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6));
+ }
+ break;
+#endif /* INET6 */
+ default:
+ return (0);
+ }
+}
+
+int
+pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
+{
+ struct pfi_dynaddr *dyn;
+ char tblname[PF_TABLE_NAME_SIZE];
+ struct pf_ruleset *ruleset = NULL;
+ struct pfi_kif *kif;
+ int rv = 0;
+
+ PF_RULES_WASSERT();
+ KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u",
+ __func__, aw->type));
+ KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn));
+
+ if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) {
+ free(dyn, PFI_MTYPE);
+ return (ENOMEM);
+ }
+
+ if (!strcmp(aw->v.ifname, "self"))
+ dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL);
+ else
+ dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname);
+ pfi_kif_ref(dyn->pfid_kif);
+
+ dyn->pfid_net = pfi_unmask(&aw->v.a.mask);
+ if (af == AF_INET && dyn->pfid_net == 32)
+ dyn->pfid_net = 128;
+ strlcpy(tblname, aw->v.ifname, sizeof(tblname));
+ if (aw->iflags & PFI_AFLAG_NETWORK)
+ strlcat(tblname, ":network", sizeof(tblname));
+ if (aw->iflags & PFI_AFLAG_BROADCAST)
+ strlcat(tblname, ":broadcast", sizeof(tblname));
+ if (aw->iflags & PFI_AFLAG_PEER)
+ strlcat(tblname, ":peer", sizeof(tblname));
+ if (aw->iflags & PFI_AFLAG_NOALIAS)
+ strlcat(tblname, ":0", sizeof(tblname));
+ if (dyn->pfid_net != 128)
+ snprintf(tblname + strlen(tblname),
+ sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net);
+ if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) {
+ rv = ENOMEM;
+ goto _bad;
+ }
+
+ if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) {
+ rv = ENOMEM;
+ goto _bad;
+ }
+
+ dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE;
+ dyn->pfid_iflags = aw->iflags;
+ dyn->pfid_af = af;
+
+ TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
+ aw->p.dyn = dyn;
+ pfi_kif_update(dyn->pfid_kif);
+
+ return (0);
+
+_bad:
+ if (dyn->pfid_kt != NULL)
+ pfr_detach_table(dyn->pfid_kt);
+ if (ruleset != NULL)
+ pf_remove_if_empty_ruleset(ruleset);
+ if (dyn->pfid_kif != NULL)
+ pfi_kif_unref(dyn->pfid_kif);
+ free(dyn, PFI_MTYPE);
+
+ return (rv);
+}
+
+static void
+pfi_kif_update(struct pfi_kif *kif)
+{
+ struct ifg_list *ifgl;
+ struct pfi_dynaddr *p;
+
+ PF_RULES_WASSERT();
+
+ /* update all dynaddr */
+ TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry)
+ pfi_dynaddr_update(p);
+
+ /* again for all groups kif is member of */
+ if (kif->pfik_ifp != NULL) {
+ IF_ADDR_RLOCK(kif->pfik_ifp);
+ TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
+ pfi_kif_update((struct pfi_kif *)
+ ifgl->ifgl_group->ifg_pf_kif);
+ IF_ADDR_RUNLOCK(kif->pfik_ifp);
+ }
+}
+
+static void
+pfi_dynaddr_update(struct pfi_dynaddr *dyn)
+{
+ struct pfi_kif *kif;
+ struct pfr_ktable *kt;
+
+ PF_RULES_WASSERT();
+ KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt,
+ ("%s: bad argument", __func__));
+
+ kif = dyn->pfid_kif;
+ kt = dyn->pfid_kt;
+
+ if (kt->pfrkt_larg != V_pfi_update) {
+ /* this table needs to be brought up-to-date */
+ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags);
+ kt->pfrkt_larg = V_pfi_update;
+ }
+ pfr_dynaddr_update(kt, dyn);
+}
+
+static void
+pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
+{
+ int e, size2 = 0;
+ struct ifg_member *ifgm;
+
+ V_pfi_buffer_cnt = 0;
+
+ if (kif->pfik_ifp != NULL)
+ pfi_instance_add(kif->pfik_ifp, net, flags);
+ else if (kif->pfik_group != NULL) {
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
+ pfi_instance_add(ifgm->ifgm_ifp, net, flags);
+ IFNET_RUNLOCK_NOSLEEP();
+ }
+
+ if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
+ NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
+ printf("%s: cannot set %d new addresses into table %s: %d\n",
+ __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e);
+}
+
+static void
+pfi_instance_add(struct ifnet *ifp, int net, int flags)
+{
+ struct ifaddr *ia;
+ int got4 = 0, got6 = 0;
+ int net2, af;
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) {
+ if (ia->ifa_addr == NULL)
+ continue;
+ af = ia->ifa_addr->sa_family;
+ if (af != AF_INET && af != AF_INET6)
+ continue;
+ /*
+ * XXX: For point-to-point interfaces, (ifname:0) and IPv4,
+ * jump over addresses without a proper route to work
+ * around a problem with ppp not fully removing the
+ * address used during IPCP.
+ */
+ if ((ifp->if_flags & IFF_POINTOPOINT) &&
+ !(ia->ifa_flags & IFA_ROUTE) &&
+ (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET))
+ continue;
+ if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6)
+ continue;
+ if ((flags & PFI_AFLAG_BROADCAST) &&
+ !(ifp->if_flags & IFF_BROADCAST))
+ continue;
+ if ((flags & PFI_AFLAG_PEER) &&
+ !(ifp->if_flags & IFF_POINTOPOINT))
+ continue;
+ if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
+ IN6_IS_ADDR_LINKLOCAL(
+ &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr))
+ continue;
+ if (flags & PFI_AFLAG_NOALIAS) {
+ if (af == AF_INET && got4)
+ continue;
+ if (af == AF_INET6 && got6)
+ continue;
+ }
+ if (af == AF_INET)
+ got4 = 1;
+ else if (af == AF_INET6)
+ got6 = 1;
+ net2 = net;
+ if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) {
+ if (af == AF_INET)
+ net2 = pfi_unmask(&((struct sockaddr_in *)
+ ia->ifa_netmask)->sin_addr);
+ else if (af == AF_INET6)
+ net2 = pfi_unmask(&((struct sockaddr_in6 *)
+ ia->ifa_netmask)->sin6_addr);
+ }
+ if (af == AF_INET && net2 > 32)
+ net2 = 32;
+ if (flags & PFI_AFLAG_BROADCAST)
+ pfi_address_add(ia->ifa_broadaddr, af, net2);
+ else if (flags & PFI_AFLAG_PEER)
+ pfi_address_add(ia->ifa_dstaddr, af, net2);
+ else
+ pfi_address_add(ia->ifa_addr, af, net2);
+ }
+ IF_ADDR_RUNLOCK(ifp);
+}
+
+static void
+pfi_address_add(struct sockaddr *sa, int af, int net)
+{
+ struct pfr_addr *p;
+ int i;
+
+ if (V_pfi_buffer_cnt >= V_pfi_buffer_max) {
+ int new_max = V_pfi_buffer_max * 2;
+
+ if (new_max > PFI_BUFFER_MAX) {
+ printf("%s: address buffer full (%d/%d)\n", __func__,
+ V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+ return;
+ }
+ p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE,
+ M_NOWAIT);
+ if (p == NULL) {
+ printf("%s: no memory to grow buffer (%d/%d)\n",
+ __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX);
+ return;
+ }
+ memcpy(p, V_pfi_buffer, V_pfi_buffer_max * sizeof(*V_pfi_buffer));
+ /* no need to zero buffer */
+ free(V_pfi_buffer, PFI_MTYPE);
+ V_pfi_buffer = p;
+ V_pfi_buffer_max = new_max;
+ }
+ if (af == AF_INET && net > 32)
+ net = 128;
+ p = V_pfi_buffer + V_pfi_buffer_cnt++;
+ bzero(p, sizeof(*p));
+ p->pfra_af = af;
+ p->pfra_net = net;
+ if (af == AF_INET)
+ p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr;
+ else if (af == AF_INET6) {
+ p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr;
+ if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr))
+ p->pfra_ip6addr.s6_addr16[1] = 0;
+ }
+ /* mask network address bits */
+ if (net < 128)
+ ((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8));
+ for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++)
+ ((caddr_t)p)[i] = 0;
+}
+
+void
+pfi_dynaddr_remove(struct pfi_dynaddr *dyn)
+{
+
+ KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__));
+ KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__));
+
+ TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
+ pfi_kif_unref(dyn->pfid_kif);
+ pfr_detach_table(dyn->pfid_kt);
+ free(dyn, PFI_MTYPE);
+}
+
+void
+pfi_dynaddr_copyout(struct pf_addr_wrap *aw)
+{
+
+ KASSERT(aw->type == PF_ADDR_DYNIFTL,
+ ("%s: type %u", __func__, aw->type));
+
+ if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL)
+ return;
+ aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6;
+}
+
+static int
+pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
+{
+ return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ));
+}
+
+void
+pfi_update_status(const char *name, struct pf_status *pfs)
+{
+ struct pfi_kif *p;
+ struct pfi_kif_cmp key;
+ struct ifg_member p_member, *ifgm;
+ TAILQ_HEAD(, ifg_member) ifg_members;
+ int i, j, k;
+
+ strlcpy(key.pfik_name, name, sizeof(key.pfik_name));
+ p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key);
+ if (p == NULL)
+ return;
+
+ if (p->pfik_group != NULL) {
+ bcopy(&p->pfik_group->ifg_members, &ifg_members,
+ sizeof(ifg_members));
+ } else {
+ /* build a temporary list for p only */
+ bzero(&p_member, sizeof(p_member));
+ p_member.ifgm_ifp = p->pfik_ifp;
+ TAILQ_INIT(&ifg_members);
+ TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next);
+ }
+ if (pfs) {
+ bzero(pfs->pcounters, sizeof(pfs->pcounters));
+ bzero(pfs->bcounters, sizeof(pfs->bcounters));
+ }
+ TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) {
+ if (ifgm->ifgm_ifp == NULL || ifgm->ifgm_ifp->if_pf_kif == NULL)
+ continue;
+ p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif;
+
+ /* just clear statistics */
+ if (pfs == NULL) {
+ bzero(p->pfik_packets, sizeof(p->pfik_packets));
+ bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
+ p->pfik_tzero = time_second;
+ continue;
+ }
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++) {
+ pfs->pcounters[i][j][k] +=
+ p->pfik_packets[i][j][k];
+ pfs->bcounters[i][j] +=
+ p->pfik_bytes[i][j][k];
+ }
+ }
+}
+
+void
+pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
+{
+ struct pfi_kif *p, *nextp;
+ int n = 0;
+
+ for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) {
+ nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+ if (pfi_skip_if(name, p))
+ continue;
+ if (*size <= n++)
+ break;
+ if (!p->pfik_tzero)
+ p->pfik_tzero = time_second;
+ bcopy(p, buf++, sizeof(*buf));
+ nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
+ }
+ *size = n;
+}
+
+static int
+pfi_skip_if(const char *filter, struct pfi_kif *p)
+{
+ int n;
+
+ if (filter == NULL || !*filter)
+ return (0);
+ if (!strcmp(p->pfik_name, filter))
+ return (0); /* exact match */
+ n = strlen(filter);
+ if (n < 1 || n >= IFNAMSIZ)
+ return (1); /* sanity check */
+ if (filter[n-1] >= '0' && filter[n-1] <= '9')
+ return (1); /* only do exact match in that case */
+ if (strncmp(p->pfik_name, filter, n))
+ return (1); /* prefix doesn't match */
+ return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9');
+}
+
+int
+pfi_set_flags(const char *name, int flags)
+{
+ struct pfi_kif *p;
+
+ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+ if (pfi_skip_if(name, p))
+ continue;
+ p->pfik_flags |= flags;
+ }
+ return (0);
+}
+
+int
+pfi_clear_flags(const char *name, int flags)
+{
+ struct pfi_kif *p;
+
+ RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
+ if (pfi_skip_if(name, p))
+ continue;
+ p->pfik_flags &= ~flags;
+ }
+ return (0);
+}
+
+/* from pf_print_state.c */
+static int
+pfi_unmask(void *addr)
+{
+ struct pf_addr *m = addr;
+ int i = 31, j = 0, b = 0;
+ u_int32_t tmp;
+
+ while (j < 4 && m->addr32[j] == 0xffffffff) {
+ b += 32;
+ j++;
+ }
+ if (j < 4) {
+ tmp = ntohl(m->addr32[j]);
+ for (i = 31; tmp & (1 << i); --i)
+ b++;
+ }
+ return (b);
+}
+
+static void
+pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+
+ CURVNET_SET(ifp->if_vnet);
+ if (V_pf_vnet_active == 0) {
+ /* Avoid teardown race in the least expensive way. */
+ CURVNET_RESTORE();
+ return;
+ }
+ pfi_attach_ifnet(ifp);
+#ifdef ALTQ
+ PF_RULES_WLOCK();
+ pf_altq_ifnet_event(ifp, 0);
+ PF_RULES_WUNLOCK();
+#endif
+ CURVNET_RESTORE();
+}
+
+static void
+pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
+{
+ struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif;
+
+ if (kif == NULL)
+ return;
+
+ CURVNET_SET(ifp->if_vnet);
+ if (V_pf_vnet_active == 0) {
+ /* Avoid teardown race in the least expensive way. */
+ CURVNET_RESTORE();
+ return;
+ }
+ PF_RULES_WLOCK();
+ V_pfi_update++;
+ pfi_kif_update(kif);
+
+ kif->pfik_ifp = NULL;
+ ifp->if_pf_kif = NULL;
+#ifdef ALTQ
+ pf_altq_ifnet_event(ifp, 1);
+#endif
+ PF_RULES_WUNLOCK();
+ CURVNET_RESTORE();
+}
+
+static void
+pfi_attach_group_event(void *arg , struct ifg_group *ifg)
+{
+
+ CURVNET_SET((struct vnet *)arg);
+ if (V_pf_vnet_active == 0) {
+ /* Avoid teardown race in the least expensive way. */
+ CURVNET_RESTORE();
+ return;
+ }
+ pfi_attach_ifgroup(ifg);
+ CURVNET_RESTORE();
+}
+
+static void
+pfi_change_group_event(void *arg, char *gname)
+{
+ struct pfi_kif *kif;
+
+ CURVNET_SET((struct vnet *)arg);
+ if (V_pf_vnet_active == 0) {
+ /* Avoid teardown race in the least expensive way. */
+ CURVNET_RESTORE();
+ return;
+ }
+
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+ PF_RULES_WLOCK();
+ V_pfi_update++;
+ kif = pfi_kif_attach(kif, gname);
+ pfi_kif_update(kif);
+ PF_RULES_WUNLOCK();
+ CURVNET_RESTORE();
+}
+
+static void
+pfi_detach_group_event(void *arg, struct ifg_group *ifg)
+{
+ struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif;
+
+ if (kif == NULL)
+ return;
+
+ CURVNET_SET((struct vnet *)arg);
+ if (V_pf_vnet_active == 0) {
+ /* Avoid teardown race in the least expensive way. */
+ CURVNET_RESTORE();
+ return;
+ }
+ PF_RULES_WLOCK();
+ V_pfi_update++;
+
+ kif->pfik_group = NULL;
+ ifg->ifg_pf_kif = NULL;
+ PF_RULES_WUNLOCK();
+ CURVNET_RESTORE();
+}
+
+static void
+pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
+{
+ if (ifp->if_pf_kif == NULL)
+ return;
+
+ CURVNET_SET(ifp->if_vnet);
+ if (V_pf_vnet_active == 0) {
+ /* Avoid teardown race in the least expensive way. */
+ CURVNET_RESTORE();
+ return;
+ }
+ PF_RULES_WLOCK();
+ if (ifp && ifp->if_pf_kif) {
+ V_pfi_update++;
+ pfi_kif_update(ifp->if_pf_kif);
+ }
+ PF_RULES_WUNLOCK();
+ CURVNET_RESTORE();
+}
diff --git a/freebsd/sys/netpfil/pf/pf_ioctl.c b/freebsd/sys/netpfil/pf/pf_ioctl.c
new file mode 100644
index 00000000..9c1523ca
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_ioctl.c
@@ -0,0 +1,3872 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ * $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_bpf.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/endian.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
+#include <sys/interrupt.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/md5.h>
+#include <sys/ucred.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+#include <net/route.h>
+#include <net/pfil.h>
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+#include <net/if_pflog.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/ip_icmp.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+#ifdef ALTQ
+#include <net/altq/altq.h>
+#endif
+
+static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t,
+ u_int8_t, u_int8_t, u_int8_t);
+
+static void pf_mv_pool(struct pf_palist *, struct pf_palist *);
+static void pf_empty_pool(struct pf_palist *);
+static int pfioctl(struct cdev *, u_long, caddr_t, int,
+ struct thread *);
+#ifdef ALTQ
+static int pf_begin_altq(u_int32_t *);
+static int pf_rollback_altq(u_int32_t);
+static int pf_commit_altq(u_int32_t);
+static int pf_enable_altq(struct pf_altq *);
+static int pf_disable_altq(struct pf_altq *);
+static u_int32_t pf_qname2qid(char *);
+static void pf_qid_unref(u_int32_t);
+#endif /* ALTQ */
+static int pf_begin_rules(u_int32_t *, int, const char *);
+static int pf_rollback_rules(u_int32_t, int, char *);
+static int pf_setup_pfsync_matching(struct pf_ruleset *);
+static void pf_hash_rule(MD5_CTX *, struct pf_rule *);
+static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
+static int pf_commit_rules(u_int32_t, int, char *);
+static int pf_addr_setup(struct pf_ruleset *,
+ struct pf_addr_wrap *, sa_family_t);
+static void pf_addr_copyout(struct pf_addr_wrap *);
+
+VNET_DEFINE(struct pf_rule, pf_default_rule);
+
+#ifdef ALTQ
+static VNET_DEFINE(int, pf_altq_running);
+#define V_pf_altq_running VNET(pf_altq_running)
+#endif
+
+#define TAGID_MAX 50000
+struct pf_tagname {
+ TAILQ_ENTRY(pf_tagname) entries;
+ char name[PF_TAG_NAME_SIZE];
+ uint16_t tag;
+ int ref;
+};
+
+TAILQ_HEAD(pf_tags, pf_tagname);
+#define V_pf_tags VNET(pf_tags)
+VNET_DEFINE(struct pf_tags, pf_tags);
+#define V_pf_qids VNET(pf_qids)
+VNET_DEFINE(struct pf_tags, pf_qids);
+static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names");
+static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
+static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
+
+#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
+#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
+#endif
+
+static u_int16_t tagname2tag(struct pf_tags *, char *);
+static u_int16_t pf_tagname2tag(char *);
+static void tag_unref(struct pf_tags *, u_int16_t);
+
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+
+struct cdev *pf_dev;
+
+/*
+ * XXX - These are new and need to be checked when moveing to a new version
+ */
+static void pf_clear_states(void);
+static int pf_clear_tables(void);
+static void pf_clear_srcnodes(struct pf_src_node *);
+static void pf_kill_srcnodes(struct pfioc_src_node_kill *);
+static void pf_tbladdr_copyout(struct pf_addr_wrap *);
+
+/*
+ * Wrapper functions for pfil(9) hooks
+ */
+#ifdef INET
+static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+ int dir, struct inpcb *inp);
+static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+ int dir, struct inpcb *inp);
+#endif
+#ifdef INET6
+static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp,
+ int dir, struct inpcb *inp);
+static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp,
+ int dir, struct inpcb *inp);
+#endif
+
+static int hook_pf(void);
+static int dehook_pf(void);
+static int shutdown_pf(void);
+static int pf_load(void);
+static int pf_unload(void);
+
+static struct cdevsw pf_cdevsw = {
+ .d_ioctl = pfioctl,
+ .d_name = PF_NAME,
+ .d_version = D_VERSION,
+};
+
+static volatile VNET_DEFINE(int, pf_pfil_hooked);
+#define V_pf_pfil_hooked VNET(pf_pfil_hooked)
+
+/*
+ * We need a flag that is neither hooked nor running to know when
+ * the VNET is "valid". We primarily need this to control (global)
+ * external event, e.g., eventhandlers.
+ */
+VNET_DEFINE(int, pf_vnet_active);
+#define V_pf_vnet_active VNET(pf_vnet_active)
+
+int pf_end_threads;
+
+struct rwlock pf_rules_lock;
+struct sx pf_ioctl_lock;
+
+/* pfsync */
+pfsync_state_import_t *pfsync_state_import_ptr = NULL;
+pfsync_insert_state_t *pfsync_insert_state_ptr = NULL;
+pfsync_update_state_t *pfsync_update_state_ptr = NULL;
+pfsync_delete_state_t *pfsync_delete_state_ptr = NULL;
+pfsync_clear_states_t *pfsync_clear_states_ptr = NULL;
+pfsync_defer_t *pfsync_defer_ptr = NULL;
+/* pflog */
+pflog_packet_t *pflog_packet_ptr = NULL;
+
+static void
+pfattach_vnet(void)
+{
+ u_int32_t *my_timeout = V_pf_default_rule.timeout;
+
+ pf_initialize();
+ pfr_initialize();
+ pfi_initialize_vnet();
+ pf_normalize_init();
+
+ V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
+ V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
+
+ RB_INIT(&V_pf_anchors);
+ pf_init_ruleset(&pf_main_ruleset);
+
+ /* default rule should never be garbage collected */
+ V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
+#ifdef PF_DEFAULT_TO_DROP
+ V_pf_default_rule.action = PF_DROP;
+#else
+ V_pf_default_rule.action = PF_PASS;
+#endif
+ V_pf_default_rule.nr = -1;
+ V_pf_default_rule.rtableid = -1;
+
+ V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
+ V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
+ V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
+
+ /* initialize default timeouts */
+ my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
+ my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
+ my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
+ my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
+ my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
+ my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
+ my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
+ my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
+ my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
+ my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
+ my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
+ my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
+ my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
+ my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
+ my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
+ my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
+ my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
+ my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
+ my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
+ my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
+
+ bzero(&V_pf_status, sizeof(V_pf_status));
+ V_pf_status.debug = PF_DEBUG_URGENT;
+
+ V_pf_pfil_hooked = 0;
+
+ /* XXX do our best to avoid a conflict */
+ V_pf_status.hostid = arc4random();
+
+ for (int i = 0; i < PFRES_MAX; i++)
+ V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK);
+ for (int i = 0; i < LCNT_MAX; i++)
+ V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK);
+ for (int i = 0; i < FCNT_MAX; i++)
+ V_pf_status.fcounters[i] = counter_u64_alloc(M_WAITOK);
+ for (int i = 0; i < SCNT_MAX; i++)
+ V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
+
+ if (swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET,
+ INTR_MPSAFE, &V_pf_swi_cookie) != 0)
+ /* XXXGL: leaked all above. */
+ return;
+}
+
+
+static struct pf_pool *
+pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
+ u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
+ u_int8_t check_ticket)
+{
+ struct pf_ruleset *ruleset;
+ struct pf_rule *rule;
+ int rs_num;
+
+ ruleset = pf_find_ruleset(anchor);
+ if (ruleset == NULL)
+ return (NULL);
+ rs_num = pf_get_ruleset_number(rule_action);
+ if (rs_num >= PF_RULESET_MAX)
+ return (NULL);
+ if (active) {
+ if (check_ticket && ticket !=
+ ruleset->rules[rs_num].active.ticket)
+ return (NULL);
+ if (r_last)
+ rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+ pf_rulequeue);
+ else
+ rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+ } else {
+ if (check_ticket && ticket !=
+ ruleset->rules[rs_num].inactive.ticket)
+ return (NULL);
+ if (r_last)
+ rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+ pf_rulequeue);
+ else
+ rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
+ }
+ if (!r_last) {
+ while ((rule != NULL) && (rule->nr != rule_number))
+ rule = TAILQ_NEXT(rule, entries);
+ }
+ if (rule == NULL)
+ return (NULL);
+
+ return (&rule->rpool);
+}
+
+static void
+pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb)
+{
+ struct pf_pooladdr *mv_pool_pa;
+
+ while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
+ TAILQ_REMOVE(poola, mv_pool_pa, entries);
+ TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
+ }
+}
+
+static void
+pf_empty_pool(struct pf_palist *poola)
+{
+ struct pf_pooladdr *pa;
+
+ while ((pa = TAILQ_FIRST(poola)) != NULL) {
+ switch (pa->addr.type) {
+ case PF_ADDR_DYNIFTL:
+ pfi_dynaddr_remove(pa->addr.p.dyn);
+ break;
+ case PF_ADDR_TABLE:
+ /* XXX: this could be unfinished pooladdr on pabuf */
+ if (pa->addr.p.tbl != NULL)
+ pfr_detach_table(pa->addr.p.tbl);
+ break;
+ }
+ if (pa->kif)
+ pfi_kif_unref(pa->kif);
+ TAILQ_REMOVE(poola, pa, entries);
+ free(pa, M_PFRULE);
+ }
+}
+
+static void
+pf_unlink_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
+{
+
+ PF_RULES_WASSERT();
+
+ TAILQ_REMOVE(rulequeue, rule, entries);
+
+ PF_UNLNKDRULES_LOCK();
+ rule->rule_flag |= PFRULE_REFS;
+ TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
+ PF_UNLNKDRULES_UNLOCK();
+}
+
+void
+pf_free_rule(struct pf_rule *rule)
+{
+
+ PF_RULES_WASSERT();
+
+ if (rule->tag)
+ tag_unref(&V_pf_tags, rule->tag);
+ if (rule->match_tag)
+ tag_unref(&V_pf_tags, rule->match_tag);
+#ifdef ALTQ
+ if (rule->pqid != rule->qid)
+ pf_qid_unref(rule->pqid);
+ pf_qid_unref(rule->qid);
+#endif
+ switch (rule->src.addr.type) {
+ case PF_ADDR_DYNIFTL:
+ pfi_dynaddr_remove(rule->src.addr.p.dyn);
+ break;
+ case PF_ADDR_TABLE:
+ pfr_detach_table(rule->src.addr.p.tbl);
+ break;
+ }
+ switch (rule->dst.addr.type) {
+ case PF_ADDR_DYNIFTL:
+ pfi_dynaddr_remove(rule->dst.addr.p.dyn);
+ break;
+ case PF_ADDR_TABLE:
+ pfr_detach_table(rule->dst.addr.p.tbl);
+ break;
+ }
+ if (rule->overload_tbl)
+ pfr_detach_table(rule->overload_tbl);
+ if (rule->kif)
+ pfi_kif_unref(rule->kif);
+ pf_anchor_remove(rule);
+ pf_empty_pool(&rule->rpool.list);
+ counter_u64_free(rule->states_cur);
+ counter_u64_free(rule->states_tot);
+ counter_u64_free(rule->src_nodes);
+ free(rule, M_PFRULE);
+}
+
+static u_int16_t
+tagname2tag(struct pf_tags *head, char *tagname)
+{
+ struct pf_tagname *tag, *p = NULL;
+ u_int16_t new_tagid = 1;
+
+ PF_RULES_WASSERT();
+
+ TAILQ_FOREACH(tag, head, entries)
+ if (strcmp(tagname, tag->name) == 0) {
+ tag->ref++;
+ return (tag->tag);
+ }
+
+ /*
+ * to avoid fragmentation, we do a linear search from the beginning
+ * and take the first free slot we find. if there is none or the list
+ * is empty, append a new entry at the end.
+ */
+
+ /* new entry */
+ if (!TAILQ_EMPTY(head))
+ for (p = TAILQ_FIRST(head); p != NULL &&
+ p->tag == new_tagid; p = TAILQ_NEXT(p, entries))
+ new_tagid = p->tag + 1;
+
+ if (new_tagid > TAGID_MAX)
+ return (0);
+
+ /* allocate and fill new struct pf_tagname */
+ tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO);
+ if (tag == NULL)
+ return (0);
+ strlcpy(tag->name, tagname, sizeof(tag->name));
+ tag->tag = new_tagid;
+ tag->ref++;
+
+ if (p != NULL) /* insert new entry before p */
+ TAILQ_INSERT_BEFORE(p, tag, entries);
+ else /* either list empty or no free slot in between */
+ TAILQ_INSERT_TAIL(head, tag, entries);
+
+ return (tag->tag);
+}
+
+static void
+tag_unref(struct pf_tags *head, u_int16_t tag)
+{
+ struct pf_tagname *p, *next;
+
+ PF_RULES_WASSERT();
+
+ for (p = TAILQ_FIRST(head); p != NULL; p = next) {
+ next = TAILQ_NEXT(p, entries);
+ if (tag == p->tag) {
+ if (--p->ref == 0) {
+ TAILQ_REMOVE(head, p, entries);
+ free(p, M_PFTAG);
+ }
+ break;
+ }
+ }
+}
+
+static u_int16_t
+pf_tagname2tag(char *tagname)
+{
+ return (tagname2tag(&V_pf_tags, tagname));
+}
+
+#ifdef ALTQ
+static u_int32_t
+pf_qname2qid(char *qname)
+{
+ return ((u_int32_t)tagname2tag(&V_pf_qids, qname));
+}
+
+static void
+pf_qid_unref(u_int32_t qid)
+{
+ tag_unref(&V_pf_qids, (u_int16_t)qid);
+}
+
+static int
+pf_begin_altq(u_int32_t *ticket)
+{
+ struct pf_altq *altq;
+ int error = 0;
+
+ PF_RULES_WASSERT();
+
+ /* Purge the old altq list */
+ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+ if (altq->qname[0] == 0 &&
+ (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ /* detach and destroy the discipline */
+ error = altq_remove(altq);
+ } else
+ pf_qid_unref(altq->qid);
+ free(altq, M_PFALTQ);
+ }
+ if (error)
+ return (error);
+ *ticket = ++V_ticket_altqs_inactive;
+ V_altqs_inactive_open = 1;
+ return (0);
+}
+
+static int
+pf_rollback_altq(u_int32_t ticket)
+{
+ struct pf_altq *altq;
+ int error = 0;
+
+ PF_RULES_WASSERT();
+
+ if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+ return (0);
+ /* Purge the old altq list */
+ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+ if (altq->qname[0] == 0 &&
+ (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ /* detach and destroy the discipline */
+ error = altq_remove(altq);
+ } else
+ pf_qid_unref(altq->qid);
+ free(altq, M_PFALTQ);
+ }
+ V_altqs_inactive_open = 0;
+ return (error);
+}
+
+static int
+pf_commit_altq(u_int32_t ticket)
+{
+ struct pf_altqqueue *old_altqs;
+ struct pf_altq *altq;
+ int err, error = 0;
+
+ PF_RULES_WASSERT();
+
+ if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
+ return (EBUSY);
+
+ /* swap altqs, keep the old. */
+ old_altqs = V_pf_altqs_active;
+ V_pf_altqs_active = V_pf_altqs_inactive;
+ V_pf_altqs_inactive = old_altqs;
+ V_ticket_altqs_active = V_ticket_altqs_inactive;
+
+ /* Attach new disciplines */
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+ if (altq->qname[0] == 0 &&
+ (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ /* attach the discipline */
+ error = altq_pfattach(altq);
+ if (error == 0 && V_pf_altq_running)
+ error = pf_enable_altq(altq);
+ if (error != 0)
+ return (error);
+ }
+ }
+
+ /* Purge the old altq list */
+ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) {
+ TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries);
+ if (altq->qname[0] == 0 &&
+ (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
+ /* detach and destroy the discipline */
+ if (V_pf_altq_running)
+ error = pf_disable_altq(altq);
+ err = altq_pfdetach(altq);
+ if (err != 0 && error == 0)
+ error = err;
+ err = altq_remove(altq);
+ if (err != 0 && error == 0)
+ error = err;
+ } else
+ pf_qid_unref(altq->qid);
+ free(altq, M_PFALTQ);
+ }
+
+ V_altqs_inactive_open = 0;
+ return (error);
+}
+
+static int
+pf_enable_altq(struct pf_altq *altq)
+{
+ struct ifnet *ifp;
+ struct tb_profile tb;
+ int error = 0;
+
+ if ((ifp = ifunit(altq->ifname)) == NULL)
+ return (EINVAL);
+
+ if (ifp->if_snd.altq_type != ALTQT_NONE)
+ error = altq_enable(&ifp->if_snd);
+
+ /* set tokenbucket regulator */
+ if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ tb.rate = altq->ifbandwidth;
+ tb.depth = altq->tbrsize;
+ error = tbr_set(&ifp->if_snd, &tb);
+ }
+
+ return (error);
+}
+
+static int
+pf_disable_altq(struct pf_altq *altq)
+{
+ struct ifnet *ifp;
+ struct tb_profile tb;
+ int error;
+
+ if ((ifp = ifunit(altq->ifname)) == NULL)
+ return (EINVAL);
+
+ /*
+ * when the discipline is no longer referenced, it was overridden
+ * by a new one. if so, just return.
+ */
+ if (altq->altq_disc != ifp->if_snd.altq_disc)
+ return (0);
+
+ error = altq_disable(&ifp->if_snd);
+
+ if (error == 0) {
+ /* clear tokenbucket regulator */
+ tb.rate = 0;
+ error = tbr_set(&ifp->if_snd, &tb);
+ }
+
+ return (error);
+}
+
+void
+pf_altq_ifnet_event(struct ifnet *ifp, int remove)
+{
+ struct ifnet *ifp1;
+ struct pf_altq *a1, *a2, *a3;
+ u_int32_t ticket;
+ int error = 0;
+
+ /* Interrupt userland queue modifications */
+ if (V_altqs_inactive_open)
+ pf_rollback_altq(V_ticket_altqs_inactive);
+
+ /* Start new altq ruleset */
+ if (pf_begin_altq(&ticket))
+ return;
+
+ /* Copy the current active set */
+ TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
+ a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
+ if (a2 == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bcopy(a1, a2, sizeof(struct pf_altq));
+
+ if (a2->qname[0] != 0) {
+ if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
+ error = EBUSY;
+ free(a2, M_PFALTQ);
+ break;
+ }
+ a2->altq_disc = NULL;
+ TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) {
+ if (strncmp(a3->ifname, a2->ifname,
+ IFNAMSIZ) == 0 && a3->qname[0] == 0) {
+ a2->altq_disc = a3->altq_disc;
+ break;
+ }
+ }
+ }
+ /* Deactivate the interface in question */
+ a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
+ if ((ifp1 = ifunit(a2->ifname)) == NULL ||
+ (remove && ifp1 == ifp)) {
+ a2->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+ } else {
+ error = altq_add(a2);
+
+ if (ticket != V_ticket_altqs_inactive)
+ error = EBUSY;
+
+ if (error) {
+ free(a2, M_PFALTQ);
+ break;
+ }
+ }
+
+ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
+ }
+
+ if (error != 0)
+ pf_rollback_altq(ticket);
+ else
+ pf_commit_altq(ticket);
+}
+#endif /* ALTQ */
+
+static int
+pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
+{
+ struct pf_ruleset *rs;
+ struct pf_rule *rule;
+
+ PF_RULES_WASSERT();
+
+ if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+ return (EINVAL);
+ rs = pf_find_or_create_ruleset(anchor);
+ if (rs == NULL)
+ return (EINVAL);
+ while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+ pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
+ rs->rules[rs_num].inactive.rcount--;
+ }
+ *ticket = ++rs->rules[rs_num].inactive.ticket;
+ rs->rules[rs_num].inactive.open = 1;
+ return (0);
+}
+
+static int
+pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+ struct pf_ruleset *rs;
+ struct pf_rule *rule;
+
+ PF_RULES_WASSERT();
+
+ if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+ return (EINVAL);
+ rs = pf_find_ruleset(anchor);
+ if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+ rs->rules[rs_num].inactive.ticket != ticket)
+ return (0);
+ while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
+ pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
+ rs->rules[rs_num].inactive.rcount--;
+ }
+ rs->rules[rs_num].inactive.open = 0;
+ return (0);
+}
+
+#define PF_MD5_UPD(st, elm) \
+ MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
+
+#define PF_MD5_UPD_STR(st, elm) \
+ MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
+
+#define PF_MD5_UPD_HTONL(st, elm, stor) do { \
+ (stor) = htonl((st)->elm); \
+ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
+} while (0)
+
+#define PF_MD5_UPD_HTONS(st, elm, stor) do { \
+ (stor) = htons((st)->elm); \
+ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
+} while (0)
+
+static void
+pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
+{
+ PF_MD5_UPD(pfr, addr.type);
+ switch (pfr->addr.type) {
+ case PF_ADDR_DYNIFTL:
+ PF_MD5_UPD(pfr, addr.v.ifname);
+ PF_MD5_UPD(pfr, addr.iflags);
+ break;
+ case PF_ADDR_TABLE:
+ PF_MD5_UPD(pfr, addr.v.tblname);
+ break;
+ case PF_ADDR_ADDRMASK:
+ /* XXX ignore af? */
+ PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
+ PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
+ break;
+ }
+
+ PF_MD5_UPD(pfr, port[0]);
+ PF_MD5_UPD(pfr, port[1]);
+ PF_MD5_UPD(pfr, neg);
+ PF_MD5_UPD(pfr, port_op);
+}
+
+static void
+pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
+{
+ u_int16_t x;
+ u_int32_t y;
+
+ pf_hash_rule_addr(ctx, &rule->src);
+ pf_hash_rule_addr(ctx, &rule->dst);
+ PF_MD5_UPD_STR(rule, label);
+ PF_MD5_UPD_STR(rule, ifname);
+ PF_MD5_UPD_STR(rule, match_tagname);
+ PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
+ PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
+ PF_MD5_UPD_HTONL(rule, prob, y);
+ PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
+ PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
+ PF_MD5_UPD(rule, uid.op);
+ PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
+ PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
+ PF_MD5_UPD(rule, gid.op);
+ PF_MD5_UPD_HTONL(rule, rule_flag, y);
+ PF_MD5_UPD(rule, action);
+ PF_MD5_UPD(rule, direction);
+ PF_MD5_UPD(rule, af);
+ PF_MD5_UPD(rule, quick);
+ PF_MD5_UPD(rule, ifnot);
+ PF_MD5_UPD(rule, match_tag_not);
+ PF_MD5_UPD(rule, natpass);
+ PF_MD5_UPD(rule, keep_state);
+ PF_MD5_UPD(rule, proto);
+ PF_MD5_UPD(rule, type);
+ PF_MD5_UPD(rule, code);
+ PF_MD5_UPD(rule, flags);
+ PF_MD5_UPD(rule, flagset);
+ PF_MD5_UPD(rule, allow_opts);
+ PF_MD5_UPD(rule, rt);
+ PF_MD5_UPD(rule, tos);
+}
+
+static int
+pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
+{
+ struct pf_ruleset *rs;
+ struct pf_rule *rule, **old_array;
+ struct pf_rulequeue *old_rules;
+ int error;
+ u_int32_t old_rcount;
+
+ PF_RULES_WASSERT();
+
+ if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
+ return (EINVAL);
+ rs = pf_find_ruleset(anchor);
+ if (rs == NULL || !rs->rules[rs_num].inactive.open ||
+ ticket != rs->rules[rs_num].inactive.ticket)
+ return (EBUSY);
+
+ /* Calculate checksum for the main ruleset */
+ if (rs == &pf_main_ruleset) {
+ error = pf_setup_pfsync_matching(rs);
+ if (error != 0)
+ return (error);
+ }
+
+ /* Swap rules, keep the old. */
+ old_rules = rs->rules[rs_num].active.ptr;
+ old_rcount = rs->rules[rs_num].active.rcount;
+ old_array = rs->rules[rs_num].active.ptr_array;
+
+ rs->rules[rs_num].active.ptr =
+ rs->rules[rs_num].inactive.ptr;
+ rs->rules[rs_num].active.ptr_array =
+ rs->rules[rs_num].inactive.ptr_array;
+ rs->rules[rs_num].active.rcount =
+ rs->rules[rs_num].inactive.rcount;
+ rs->rules[rs_num].inactive.ptr = old_rules;
+ rs->rules[rs_num].inactive.ptr_array = old_array;
+ rs->rules[rs_num].inactive.rcount = old_rcount;
+
+ rs->rules[rs_num].active.ticket =
+ rs->rules[rs_num].inactive.ticket;
+ pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
+
+
+ /* Purge the old rule list. */
+ while ((rule = TAILQ_FIRST(old_rules)) != NULL)
+ pf_unlink_rule(old_rules, rule);
+ if (rs->rules[rs_num].inactive.ptr_array)
+ free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
+ rs->rules[rs_num].inactive.ptr_array = NULL;
+ rs->rules[rs_num].inactive.rcount = 0;
+ rs->rules[rs_num].inactive.open = 0;
+ pf_remove_if_empty_ruleset(rs);
+
+ return (0);
+}
+
+static int
+pf_setup_pfsync_matching(struct pf_ruleset *rs)
+{
+ MD5_CTX ctx;
+ struct pf_rule *rule;
+ int rs_cnt;
+ u_int8_t digest[PF_MD5_DIGEST_LENGTH];
+
+ MD5Init(&ctx);
+ for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
+ /* XXX PF_RULESET_SCRUB as well? */
+ if (rs_cnt == PF_RULESET_SCRUB)
+ continue;
+
+ if (rs->rules[rs_cnt].inactive.ptr_array)
+ free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
+ rs->rules[rs_cnt].inactive.ptr_array = NULL;
+
+ if (rs->rules[rs_cnt].inactive.rcount) {
+ rs->rules[rs_cnt].inactive.ptr_array =
+ malloc(sizeof(caddr_t) *
+ rs->rules[rs_cnt].inactive.rcount,
+ M_TEMP, M_NOWAIT);
+
+ if (!rs->rules[rs_cnt].inactive.ptr_array)
+ return (ENOMEM);
+ }
+
+ TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
+ entries) {
+ pf_hash_rule(&ctx, rule);
+ (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
+ }
+ }
+
+ MD5Final(digest, &ctx);
+ memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
+ return (0);
+}
+
+static int
+pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr,
+ sa_family_t af)
+{
+ int error = 0;
+
+ switch (addr->type) {
+ case PF_ADDR_TABLE:
+ addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
+ if (addr->p.tbl == NULL)
+ error = ENOMEM;
+ break;
+ case PF_ADDR_DYNIFTL:
+ error = pfi_dynaddr_setup(addr, af);
+ break;
+ }
+
+ return (error);
+}
+
+static void
+pf_addr_copyout(struct pf_addr_wrap *addr)
+{
+
+ switch (addr->type) {
+ case PF_ADDR_DYNIFTL:
+ pfi_dynaddr_copyout(addr);
+ break;
+ case PF_ADDR_TABLE:
+ pf_tbladdr_copyout(addr);
+ break;
+ }
+}
+
+static int
+pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
+{
+ int error = 0;
+
+ /* XXX keep in sync with switch() below */
+ if (securelevel_gt(td->td_ucred, 2))
+ switch (cmd) {
+ case DIOCGETRULES:
+ case DIOCGETRULE:
+ case DIOCGETADDRS:
+ case DIOCGETADDR:
+ case DIOCGETSTATE:
+ case DIOCSETSTATUSIF:
+ case DIOCGETSTATUS:
+ case DIOCCLRSTATUS:
+ case DIOCNATLOOK:
+ case DIOCSETDEBUG:
+ case DIOCGETSTATES:
+ case DIOCGETTIMEOUT:
+ case DIOCCLRRULECTRS:
+ case DIOCGETLIMIT:
+ case DIOCGETALTQS:
+ case DIOCGETALTQ:
+ case DIOCGETQSTATS:
+ case DIOCGETRULESETS:
+ case DIOCGETRULESET:
+ case DIOCRGETTABLES:
+ case DIOCRGETTSTATS:
+ case DIOCRCLRTSTATS:
+ case DIOCRCLRADDRS:
+ case DIOCRADDADDRS:
+ case DIOCRDELADDRS:
+ case DIOCRSETADDRS:
+ case DIOCRGETADDRS:
+ case DIOCRGETASTATS:
+ case DIOCRCLRASTATS:
+ case DIOCRTSTADDRS:
+ case DIOCOSFPGET:
+ case DIOCGETSRCNODES:
+ case DIOCCLRSRCNODES:
+ case DIOCIGETIFACES:
+ case DIOCGIFSPEED:
+ case DIOCSETIFFLAG:
+ case DIOCCLRIFFLAG:
+ break;
+ case DIOCRCLRTABLES:
+ case DIOCRADDTABLES:
+ case DIOCRDELTABLES:
+ case DIOCRSETTFLAGS:
+ if (((struct pfioc_table *)addr)->pfrio_flags &
+ PFR_FLAG_DUMMY)
+ break; /* dummy operation ok */
+ return (EPERM);
+ default:
+ return (EPERM);
+ }
+
+ if (!(flags & FWRITE))
+ switch (cmd) {
+ case DIOCGETRULES:
+ case DIOCGETADDRS:
+ case DIOCGETADDR:
+ case DIOCGETSTATE:
+ case DIOCGETSTATUS:
+ case DIOCGETSTATES:
+ case DIOCGETTIMEOUT:
+ case DIOCGETLIMIT:
+ case DIOCGETALTQS:
+ case DIOCGETALTQ:
+ case DIOCGETQSTATS:
+ case DIOCGETRULESETS:
+ case DIOCGETRULESET:
+ case DIOCNATLOOK:
+ case DIOCRGETTABLES:
+ case DIOCRGETTSTATS:
+ case DIOCRGETADDRS:
+ case DIOCRGETASTATS:
+ case DIOCRTSTADDRS:
+ case DIOCOSFPGET:
+ case DIOCGETSRCNODES:
+ case DIOCIGETIFACES:
+ case DIOCGIFSPEED:
+ break;
+ case DIOCRCLRTABLES:
+ case DIOCRADDTABLES:
+ case DIOCRDELTABLES:
+ case DIOCRCLRTSTATS:
+ case DIOCRCLRADDRS:
+ case DIOCRADDADDRS:
+ case DIOCRDELADDRS:
+ case DIOCRSETADDRS:
+ case DIOCRSETTFLAGS:
+ if (((struct pfioc_table *)addr)->pfrio_flags &
+ PFR_FLAG_DUMMY) {
+ flags |= FWRITE; /* need write lock for dummy */
+ break; /* dummy operation ok */
+ }
+ return (EACCES);
+ case DIOCGETRULE:
+ if (((struct pfioc_rule *)addr)->action ==
+ PF_GET_CLR_CNTR)
+ return (EACCES);
+ break;
+ default:
+ return (EACCES);
+ }
+
+ CURVNET_SET(TD_TO_VNET(td));
+
+ switch (cmd) {
+ case DIOCSTART:
+ sx_xlock(&pf_ioctl_lock);
+ if (V_pf_status.running)
+ error = EEXIST;
+ else {
+ int cpu;
+
+ error = hook_pf();
+ if (error) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: pfil registration failed\n"));
+ break;
+ }
+ V_pf_status.running = 1;
+ V_pf_status.since = time_second;
+
+ CPU_FOREACH(cpu)
+ V_pf_stateid[cpu] = time_second;
+
+ DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
+ }
+ break;
+
+ case DIOCSTOP:
+ sx_xlock(&pf_ioctl_lock);
+ if (!V_pf_status.running)
+ error = ENOENT;
+ else {
+ V_pf_status.running = 0;
+ error = dehook_pf();
+ if (error) {
+ V_pf_status.running = 1;
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: pfil unregistration failed\n"));
+ }
+ V_pf_status.since = time_second;
+ DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
+ }
+ break;
+
+ case DIOCADDRULE: {
+ struct pfioc_rule *pr = (struct pfioc_rule *)addr;
+ struct pf_ruleset *ruleset;
+ struct pf_rule *rule, *tail;
+ struct pf_pooladdr *pa;
+ struct pfi_kif *kif = NULL;
+ int rs_num;
+
+ if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+ error = EINVAL;
+ break;
+ }
+#ifndef INET
+ if (pr->rule.af == AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET */
+#ifndef INET6
+ if (pr->rule.af == AF_INET6) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET6 */
+
+ rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
+ bcopy(&pr->rule, rule, sizeof(struct pf_rule));
+ if (rule->ifname[0])
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+ rule->states_cur = counter_u64_alloc(M_WAITOK);
+ rule->states_tot = counter_u64_alloc(M_WAITOK);
+ rule->src_nodes = counter_u64_alloc(M_WAITOK);
+#ifndef __rtems__
+ rule->cuid = td->td_ucred->cr_ruid;
+ rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+#else /* __rtems__ */
+ rule->cuid = BSD_DEFAULT_UID;
+ rule->cpid = BSD_DEFAULT_PID;
+#endif /* __rtems__ */
+ TAILQ_INIT(&rule->rpool.list);
+
+#define ERROUT(x) { error = (x); goto DIOCADDRULE_error; }
+
+ PF_RULES_WLOCK();
+ pr->anchor[sizeof(pr->anchor) - 1] = 0;
+ ruleset = pf_find_ruleset(pr->anchor);
+ if (ruleset == NULL)
+ ERROUT(EINVAL);
+ rs_num = pf_get_ruleset_number(pr->rule.action);
+ if (rs_num >= PF_RULESET_MAX)
+ ERROUT(EINVAL);
+ if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("ticket: %d != [%d]%d\n", pr->ticket, rs_num,
+ ruleset->rules[rs_num].inactive.ticket));
+ ERROUT(EBUSY);
+ }
+ if (pr->pool_ticket != V_ticket_pabuf) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pool_ticket: %d != %d\n", pr->pool_ticket,
+ V_ticket_pabuf));
+ ERROUT(EBUSY);
+ }
+
+ tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
+ pf_rulequeue);
+ if (tail)
+ rule->nr = tail->nr + 1;
+ else
+ rule->nr = 0;
+ if (rule->ifname[0]) {
+ rule->kif = pfi_kif_attach(kif, rule->ifname);
+ pfi_kif_ref(rule->kif);
+ } else
+ rule->kif = NULL;
+
+ if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
+ error = EBUSY;
+
+#ifdef ALTQ
+ /* set queue IDs */
+ if (rule->qname[0] != 0) {
+ if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
+ error = EBUSY;
+ else if (rule->pqname[0] != 0) {
+ if ((rule->pqid =
+ pf_qname2qid(rule->pqname)) == 0)
+ error = EBUSY;
+ } else
+ rule->pqid = rule->qid;
+ }
+#endif
+ if (rule->tagname[0])
+ if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
+ error = EBUSY;
+ if (rule->match_tagname[0])
+ if ((rule->match_tag =
+ pf_tagname2tag(rule->match_tagname)) == 0)
+ error = EBUSY;
+ if (rule->rt && !rule->direction)
+ error = EINVAL;
+ if (!rule->log)
+ rule->logif = 0;
+ if (rule->logif >= PFLOGIFS_MAX)
+ error = EINVAL;
+ if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
+ error = ENOMEM;
+ if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
+ error = ENOMEM;
+ if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
+ error = EINVAL;
+ if (rule->scrub_flags & PFSTATE_SETPRIO &&
+ (rule->set_prio[0] > PF_PRIO_MAX ||
+ rule->set_prio[1] > PF_PRIO_MAX))
+ error = EINVAL;
+ TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+ if (pa->addr.type == PF_ADDR_TABLE) {
+ pa->addr.p.tbl = pfr_attach_table(ruleset,
+ pa->addr.v.tblname);
+ if (pa->addr.p.tbl == NULL)
+ error = ENOMEM;
+ }
+
+ rule->overload_tbl = NULL;
+ if (rule->overload_tblname[0]) {
+ if ((rule->overload_tbl = pfr_attach_table(ruleset,
+ rule->overload_tblname)) == NULL)
+ error = EINVAL;
+ else
+ rule->overload_tbl->pfrkt_flags |=
+ PFR_TFLAG_ACTIVE;
+ }
+
+ pf_mv_pool(&V_pf_pabuf, &rule->rpool.list);
+ if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
+ (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
+ (rule->rt > PF_FASTROUTE)) &&
+ (TAILQ_FIRST(&rule->rpool.list) == NULL))
+ error = EINVAL;
+
+ if (error) {
+ pf_free_rule(rule);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
+ rule->evaluations = rule->packets[0] = rule->packets[1] =
+ rule->bytes[0] = rule->bytes[1] = 0;
+ TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
+ rule, entries);
+ ruleset->rules[rs_num].inactive.rcount++;
+ PF_RULES_WUNLOCK();
+ break;
+
+#undef ERROUT
+DIOCADDRULE_error:
+ PF_RULES_WUNLOCK();
+ counter_u64_free(rule->states_cur);
+ counter_u64_free(rule->states_tot);
+ counter_u64_free(rule->src_nodes);
+ free(rule, M_PFRULE);
+ if (kif)
+ free(kif, PFI_MTYPE);
+ break;
+ }
+
+ case DIOCGETRULES: {
+ struct pfioc_rule *pr = (struct pfioc_rule *)addr;
+ struct pf_ruleset *ruleset;
+ struct pf_rule *tail;
+ int rs_num;
+
+ PF_RULES_WLOCK();
+ pr->anchor[sizeof(pr->anchor) - 1] = 0;
+ ruleset = pf_find_ruleset(pr->anchor);
+ if (ruleset == NULL) {
+ PF_RULES_WUNLOCK();
+ error = EINVAL;
+ break;
+ }
+ rs_num = pf_get_ruleset_number(pr->rule.action);
+ if (rs_num >= PF_RULESET_MAX) {
+ PF_RULES_WUNLOCK();
+ error = EINVAL;
+ break;
+ }
+ tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
+ pf_rulequeue);
+ if (tail)
+ pr->nr = tail->nr + 1;
+ else
+ pr->nr = 0;
+ pr->ticket = ruleset->rules[rs_num].active.ticket;
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCGETRULE: {
+ struct pfioc_rule *pr = (struct pfioc_rule *)addr;
+ struct pf_ruleset *ruleset;
+ struct pf_rule *rule;
+ int rs_num, i;
+
+ PF_RULES_WLOCK();
+ pr->anchor[sizeof(pr->anchor) - 1] = 0;
+ ruleset = pf_find_ruleset(pr->anchor);
+ if (ruleset == NULL) {
+ PF_RULES_WUNLOCK();
+ error = EINVAL;
+ break;
+ }
+ rs_num = pf_get_ruleset_number(pr->rule.action);
+ if (rs_num >= PF_RULESET_MAX) {
+ PF_RULES_WUNLOCK();
+ error = EINVAL;
+ break;
+ }
+ if (pr->ticket != ruleset->rules[rs_num].active.ticket) {
+ PF_RULES_WUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
+ while ((rule != NULL) && (rule->nr != pr->nr))
+ rule = TAILQ_NEXT(rule, entries);
+ if (rule == NULL) {
+ PF_RULES_WUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ bcopy(rule, &pr->rule, sizeof(struct pf_rule));
+ pr->rule.u_states_cur = counter_u64_fetch(rule->states_cur);
+ pr->rule.u_states_tot = counter_u64_fetch(rule->states_tot);
+ pr->rule.u_src_nodes = counter_u64_fetch(rule->src_nodes);
+ if (pf_anchor_copyout(ruleset, rule, pr)) {
+ PF_RULES_WUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ pf_addr_copyout(&pr->rule.src.addr);
+ pf_addr_copyout(&pr->rule.dst.addr);
+ for (i = 0; i < PF_SKIP_COUNT; ++i)
+ if (rule->skip[i].ptr == NULL)
+ pr->rule.skip[i].nr = -1;
+ else
+ pr->rule.skip[i].nr =
+ rule->skip[i].ptr->nr;
+
+ if (pr->action == PF_GET_CLR_CNTR) {
+ rule->evaluations = 0;
+ rule->packets[0] = rule->packets[1] = 0;
+ rule->bytes[0] = rule->bytes[1] = 0;
+ counter_u64_zero(rule->states_tot);
+ }
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCCHANGERULE: {
+ struct pfioc_rule *pcr = (struct pfioc_rule *)addr;
+ struct pf_ruleset *ruleset;
+ struct pf_rule *oldrule = NULL, *newrule = NULL;
+ struct pfi_kif *kif = NULL;
+ struct pf_pooladdr *pa;
+ u_int32_t nr = 0;
+ int rs_num;
+
+ if (pcr->action < PF_CHANGE_ADD_HEAD ||
+ pcr->action > PF_CHANGE_GET_TICKET) {
+ error = EINVAL;
+ break;
+ }
+ if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
+ error = EINVAL;
+ break;
+ }
+
+ if (pcr->action != PF_CHANGE_REMOVE) {
+#ifndef INET
+ if (pcr->rule.af == AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET */
+#ifndef INET6
+ if (pcr->rule.af == AF_INET6) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET6 */
+ newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK);
+ bcopy(&pcr->rule, newrule, sizeof(struct pf_rule));
+ if (newrule->ifname[0])
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+ newrule->states_cur = counter_u64_alloc(M_WAITOK);
+ newrule->states_tot = counter_u64_alloc(M_WAITOK);
+ newrule->src_nodes = counter_u64_alloc(M_WAITOK);
+#ifndef __rtems__
+ newrule->cuid = td->td_ucred->cr_ruid;
+ newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
+#else /* __rtems__ */
+ newrule->cuid = BSD_DEFAULT_UID;
+ newrule->cpid = BSD_DEFAULT_PID;
+#endif /* __rtems__ */
+ TAILQ_INIT(&newrule->rpool.list);
+ }
+
+#define ERROUT(x) { error = (x); goto DIOCCHANGERULE_error; }
+
+ PF_RULES_WLOCK();
+ if (!(pcr->action == PF_CHANGE_REMOVE ||
+ pcr->action == PF_CHANGE_GET_TICKET) &&
+ pcr->pool_ticket != V_ticket_pabuf)
+ ERROUT(EBUSY);
+
+ ruleset = pf_find_ruleset(pcr->anchor);
+ if (ruleset == NULL)
+ ERROUT(EINVAL);
+
+ rs_num = pf_get_ruleset_number(pcr->rule.action);
+ if (rs_num >= PF_RULESET_MAX)
+ ERROUT(EINVAL);
+
+ if (pcr->action == PF_CHANGE_GET_TICKET) {
+ pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
+ ERROUT(0);
+ } else if (pcr->ticket !=
+ ruleset->rules[rs_num].active.ticket)
+ ERROUT(EINVAL);
+
+ if (pcr->action != PF_CHANGE_REMOVE) {
+ if (newrule->ifname[0]) {
+ newrule->kif = pfi_kif_attach(kif,
+ newrule->ifname);
+ pfi_kif_ref(newrule->kif);
+ } else
+ newrule->kif = NULL;
+
+ if (newrule->rtableid > 0 &&
+ newrule->rtableid >= rt_numfibs)
+ error = EBUSY;
+
+#ifdef ALTQ
+ /* set queue IDs */
+ if (newrule->qname[0] != 0) {
+ if ((newrule->qid =
+ pf_qname2qid(newrule->qname)) == 0)
+ error = EBUSY;
+ else if (newrule->pqname[0] != 0) {
+ if ((newrule->pqid =
+ pf_qname2qid(newrule->pqname)) == 0)
+ error = EBUSY;
+ } else
+ newrule->pqid = newrule->qid;
+ }
+#endif /* ALTQ */
+ if (newrule->tagname[0])
+ if ((newrule->tag =
+ pf_tagname2tag(newrule->tagname)) == 0)
+ error = EBUSY;
+ if (newrule->match_tagname[0])
+ if ((newrule->match_tag = pf_tagname2tag(
+ newrule->match_tagname)) == 0)
+ error = EBUSY;
+ if (newrule->rt && !newrule->direction)
+ error = EINVAL;
+ if (!newrule->log)
+ newrule->logif = 0;
+ if (newrule->logif >= PFLOGIFS_MAX)
+ error = EINVAL;
+ if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
+ error = ENOMEM;
+ if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
+ error = ENOMEM;
+ if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
+ error = EINVAL;
+ TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
+ if (pa->addr.type == PF_ADDR_TABLE) {
+ pa->addr.p.tbl =
+ pfr_attach_table(ruleset,
+ pa->addr.v.tblname);
+ if (pa->addr.p.tbl == NULL)
+ error = ENOMEM;
+ }
+
+ newrule->overload_tbl = NULL;
+ if (newrule->overload_tblname[0]) {
+ if ((newrule->overload_tbl = pfr_attach_table(
+ ruleset, newrule->overload_tblname)) ==
+ NULL)
+ error = EINVAL;
+ else
+ newrule->overload_tbl->pfrkt_flags |=
+ PFR_TFLAG_ACTIVE;
+ }
+
+ pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list);
+ if (((((newrule->action == PF_NAT) ||
+ (newrule->action == PF_RDR) ||
+ (newrule->action == PF_BINAT) ||
+ (newrule->rt > PF_FASTROUTE)) &&
+ !newrule->anchor)) &&
+ (TAILQ_FIRST(&newrule->rpool.list) == NULL))
+ error = EINVAL;
+
+ if (error) {
+ pf_free_rule(newrule);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
+ newrule->evaluations = 0;
+ newrule->packets[0] = newrule->packets[1] = 0;
+ newrule->bytes[0] = newrule->bytes[1] = 0;
+ }
+ pf_empty_pool(&V_pf_pabuf);
+
+ if (pcr->action == PF_CHANGE_ADD_HEAD)
+ oldrule = TAILQ_FIRST(
+ ruleset->rules[rs_num].active.ptr);
+ else if (pcr->action == PF_CHANGE_ADD_TAIL)
+ oldrule = TAILQ_LAST(
+ ruleset->rules[rs_num].active.ptr, pf_rulequeue);
+ else {
+ oldrule = TAILQ_FIRST(
+ ruleset->rules[rs_num].active.ptr);
+ while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
+ oldrule = TAILQ_NEXT(oldrule, entries);
+ if (oldrule == NULL) {
+ if (newrule != NULL)
+ pf_free_rule(newrule);
+ PF_RULES_WUNLOCK();
+ error = EINVAL;
+ break;
+ }
+ }
+
+ if (pcr->action == PF_CHANGE_REMOVE) {
+ pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
+ oldrule);
+ ruleset->rules[rs_num].active.rcount--;
+ } else {
+ if (oldrule == NULL)
+ TAILQ_INSERT_TAIL(
+ ruleset->rules[rs_num].active.ptr,
+ newrule, entries);
+ else if (pcr->action == PF_CHANGE_ADD_HEAD ||
+ pcr->action == PF_CHANGE_ADD_BEFORE)
+ TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
+ else
+ TAILQ_INSERT_AFTER(
+ ruleset->rules[rs_num].active.ptr,
+ oldrule, newrule, entries);
+ ruleset->rules[rs_num].active.rcount++;
+ }
+
+ nr = 0;
+ TAILQ_FOREACH(oldrule,
+ ruleset->rules[rs_num].active.ptr, entries)
+ oldrule->nr = nr++;
+
+ ruleset->rules[rs_num].active.ticket++;
+
+ pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
+ pf_remove_if_empty_ruleset(ruleset);
+
+ PF_RULES_WUNLOCK();
+ break;
+
+#undef ERROUT
+DIOCCHANGERULE_error:
+ PF_RULES_WUNLOCK();
+ if (newrule != NULL) {
+ counter_u64_free(newrule->states_cur);
+ counter_u64_free(newrule->states_tot);
+ counter_u64_free(newrule->src_nodes);
+ free(newrule, M_PFRULE);
+ }
+ if (kif != NULL)
+ free(kif, PFI_MTYPE);
+ break;
+ }
+
+ case DIOCCLRSTATES: {
+ struct pf_state *s;
+ struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
+ u_int i, killed = 0;
+
+ for (i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+
+relock_DIOCCLRSTATES:
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry)
+ if (!psk->psk_ifname[0] ||
+ !strcmp(psk->psk_ifname,
+ s->kif->pfik_name)) {
+ /*
+ * Don't send out individual
+ * delete messages.
+ */
+ s->state_flags |= PFSTATE_NOSYNC;
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ killed++;
+ goto relock_DIOCCLRSTATES;
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+ psk->psk_killed = killed;
+ if (pfsync_clear_states_ptr != NULL)
+ pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname);
+ break;
+ }
+
+ case DIOCKILLSTATES: {
+ struct pf_state *s;
+ struct pf_state_key *sk;
+ struct pf_addr *srcaddr, *dstaddr;
+ u_int16_t srcport, dstport;
+ struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
+ u_int i, killed = 0;
+
+ if (psk->psk_pfcmp.id) {
+ if (psk->psk_pfcmp.creatorid == 0)
+ psk->psk_pfcmp.creatorid = V_pf_status.hostid;
+ if ((s = pf_find_state_byid(psk->psk_pfcmp.id,
+ psk->psk_pfcmp.creatorid))) {
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ psk->psk_killed = 1;
+ }
+ break;
+ }
+
+ for (i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+
+relock_DIOCKILLSTATES:
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ sk = s->key[PF_SK_WIRE];
+ if (s->direction == PF_OUT) {
+ srcaddr = &sk->addr[1];
+ dstaddr = &sk->addr[0];
+ srcport = sk->port[1];
+ dstport = sk->port[0];
+ } else {
+ srcaddr = &sk->addr[0];
+ dstaddr = &sk->addr[1];
+ srcport = sk->port[0];
+ dstport = sk->port[1];
+ }
+
+ if ((!psk->psk_af || sk->af == psk->psk_af)
+ && (!psk->psk_proto || psk->psk_proto ==
+ sk->proto) &&
+ PF_MATCHA(psk->psk_src.neg,
+ &psk->psk_src.addr.v.a.addr,
+ &psk->psk_src.addr.v.a.mask,
+ srcaddr, sk->af) &&
+ PF_MATCHA(psk->psk_dst.neg,
+ &psk->psk_dst.addr.v.a.addr,
+ &psk->psk_dst.addr.v.a.mask,
+ dstaddr, sk->af) &&
+ (psk->psk_src.port_op == 0 ||
+ pf_match_port(psk->psk_src.port_op,
+ psk->psk_src.port[0], psk->psk_src.port[1],
+ srcport)) &&
+ (psk->psk_dst.port_op == 0 ||
+ pf_match_port(psk->psk_dst.port_op,
+ psk->psk_dst.port[0], psk->psk_dst.port[1],
+ dstport)) &&
+ (!psk->psk_label[0] ||
+ (s->rule.ptr->label[0] &&
+ !strcmp(psk->psk_label,
+ s->rule.ptr->label))) &&
+ (!psk->psk_ifname[0] ||
+ !strcmp(psk->psk_ifname,
+ s->kif->pfik_name))) {
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ killed++;
+ goto relock_DIOCKILLSTATES;
+ }
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+ psk->psk_killed = killed;
+ break;
+ }
+
+ case DIOCADDSTATE: {
+ struct pfioc_state *ps = (struct pfioc_state *)addr;
+ struct pfsync_state *sp = &ps->state;
+
+ if (sp->timeout >= PFTM_MAX) {
+ error = EINVAL;
+ break;
+ }
+ if (pfsync_state_import_ptr != NULL) {
+ PF_RULES_RLOCK();
+ error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL);
+ PF_RULES_RUNLOCK();
+ } else
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ case DIOCGETSTATE: {
+ struct pfioc_state *ps = (struct pfioc_state *)addr;
+ struct pf_state *s;
+
+ s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
+ if (s == NULL) {
+ error = ENOENT;
+ break;
+ }
+
+ pfsync_state_export(&ps->state, s);
+ PF_STATE_UNLOCK(s);
+ break;
+ }
+
+ case DIOCGETSTATES: {
+ struct pfioc_states *ps = (struct pfioc_states *)addr;
+ struct pf_state *s;
+ struct pfsync_state *pstore, *p;
+ int i, nr;
+
+ if (ps->ps_len == 0) {
+ nr = uma_zone_get_cur(V_pf_state_z);
+ ps->ps_len = sizeof(struct pfsync_state) * nr;
+ break;
+ }
+
+ p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK);
+ nr = 0;
+
+ for (i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+
+ if (s->timeout == PFTM_UNLINKED)
+ continue;
+
+ if ((nr+1) * sizeof(*p) > ps->ps_len) {
+ PF_HASHROW_UNLOCK(ih);
+ goto DIOCGETSTATES_full;
+ }
+ pfsync_state_export(p, s);
+ p++;
+ nr++;
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+DIOCGETSTATES_full:
+ error = copyout(pstore, ps->ps_states,
+ sizeof(struct pfsync_state) * nr);
+ if (error) {
+ free(pstore, M_TEMP);
+ break;
+ }
+ ps->ps_len = sizeof(struct pfsync_state) * nr;
+ free(pstore, M_TEMP);
+
+ break;
+ }
+
+ case DIOCGETSTATUS: {
+ struct pf_status *s = (struct pf_status *)addr;
+
+ PF_RULES_RLOCK();
+ s->running = V_pf_status.running;
+ s->since = V_pf_status.since;
+ s->debug = V_pf_status.debug;
+ s->hostid = V_pf_status.hostid;
+ s->states = V_pf_status.states;
+ s->src_nodes = V_pf_status.src_nodes;
+
+ for (int i = 0; i < PFRES_MAX; i++)
+ s->counters[i] =
+ counter_u64_fetch(V_pf_status.counters[i]);
+ for (int i = 0; i < LCNT_MAX; i++)
+ s->lcounters[i] =
+ counter_u64_fetch(V_pf_status.lcounters[i]);
+ for (int i = 0; i < FCNT_MAX; i++)
+ s->fcounters[i] =
+ counter_u64_fetch(V_pf_status.fcounters[i]);
+ for (int i = 0; i < SCNT_MAX; i++)
+ s->scounters[i] =
+ counter_u64_fetch(V_pf_status.scounters[i]);
+
+ bcopy(V_pf_status.ifname, s->ifname, IFNAMSIZ);
+ bcopy(V_pf_status.pf_chksum, s->pf_chksum,
+ PF_MD5_DIGEST_LENGTH);
+
+ pfi_update_status(s->ifname, s);
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCSETSTATUSIF: {
+ struct pfioc_if *pi = (struct pfioc_if *)addr;
+
+ if (pi->ifname[0] == 0) {
+ bzero(V_pf_status.ifname, IFNAMSIZ);
+ break;
+ }
+ PF_RULES_WLOCK();
+ strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCCLRSTATUS: {
+ PF_RULES_WLOCK();
+ for (int i = 0; i < PFRES_MAX; i++)
+ counter_u64_zero(V_pf_status.counters[i]);
+ for (int i = 0; i < FCNT_MAX; i++)
+ counter_u64_zero(V_pf_status.fcounters[i]);
+ for (int i = 0; i < SCNT_MAX; i++)
+ counter_u64_zero(V_pf_status.scounters[i]);
+ V_pf_status.since = time_second;
+ if (*V_pf_status.ifname)
+ pfi_update_status(V_pf_status.ifname, NULL);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCNATLOOK: {
+ struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr;
+ struct pf_state_key *sk;
+ struct pf_state *state;
+ struct pf_state_key_cmp key;
+ int m = 0, direction = pnl->direction;
+ int sidx, didx;
+
+ /* NATLOOK src and dst are reversed, so reverse sidx/didx */
+ sidx = (direction == PF_IN) ? 1 : 0;
+ didx = (direction == PF_IN) ? 0 : 1;
+
+ if (!pnl->proto ||
+ PF_AZERO(&pnl->saddr, pnl->af) ||
+ PF_AZERO(&pnl->daddr, pnl->af) ||
+ ((pnl->proto == IPPROTO_TCP ||
+ pnl->proto == IPPROTO_UDP) &&
+ (!pnl->dport || !pnl->sport)))
+ error = EINVAL;
+ else {
+ bzero(&key, sizeof(key));
+ key.af = pnl->af;
+ key.proto = pnl->proto;
+ PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
+ key.port[sidx] = pnl->sport;
+ PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
+ key.port[didx] = pnl->dport;
+
+ state = pf_find_state_all(&key, direction, &m);
+
+ if (m > 1)
+ error = E2BIG; /* more than one state */
+ else if (state != NULL) {
+ /* XXXGL: not locked read */
+ sk = state->key[sidx];
+ PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
+ pnl->rsport = sk->port[sidx];
+ PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
+ pnl->rdport = sk->port[didx];
+ } else
+ error = ENOENT;
+ }
+ break;
+ }
+
+ case DIOCSETTIMEOUT: {
+ struct pfioc_tm *pt = (struct pfioc_tm *)addr;
+ int old;
+
+ if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
+ pt->seconds < 0) {
+ error = EINVAL;
+ break;
+ }
+ PF_RULES_WLOCK();
+ old = V_pf_default_rule.timeout[pt->timeout];
+ if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
+ pt->seconds = 1;
+ V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
+ if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
+ wakeup(pf_purge_thread);
+ pt->seconds = old;
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCGETTIMEOUT: {
+ struct pfioc_tm *pt = (struct pfioc_tm *)addr;
+
+ if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
+ error = EINVAL;
+ break;
+ }
+ PF_RULES_RLOCK();
+ pt->seconds = V_pf_default_rule.timeout[pt->timeout];
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCGETLIMIT: {
+ struct pfioc_limit *pl = (struct pfioc_limit *)addr;
+
+ if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
+ error = EINVAL;
+ break;
+ }
+ PF_RULES_RLOCK();
+ pl->limit = V_pf_limits[pl->index].limit;
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCSETLIMIT: {
+ struct pfioc_limit *pl = (struct pfioc_limit *)addr;
+ int old_limit;
+
+ PF_RULES_WLOCK();
+ if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
+ V_pf_limits[pl->index].zone == NULL) {
+ PF_RULES_WUNLOCK();
+ error = EINVAL;
+ break;
+ }
+ uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit);
+ old_limit = V_pf_limits[pl->index].limit;
+ V_pf_limits[pl->index].limit = pl->limit;
+ pl->limit = old_limit;
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCSETDEBUG: {
+ u_int32_t *level = (u_int32_t *)addr;
+
+ PF_RULES_WLOCK();
+ V_pf_status.debug = *level;
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCCLRRULECTRS: {
+ /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
+ struct pf_ruleset *ruleset = &pf_main_ruleset;
+ struct pf_rule *rule;
+
+ PF_RULES_WLOCK();
+ TAILQ_FOREACH(rule,
+ ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
+ rule->evaluations = 0;
+ rule->packets[0] = rule->packets[1] = 0;
+ rule->bytes[0] = rule->bytes[1] = 0;
+ }
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCGIFSPEED: {
+ struct pf_ifspeed *psp = (struct pf_ifspeed *)addr;
+ struct pf_ifspeed ps;
+ struct ifnet *ifp;
+
+ if (psp->ifname[0] != 0) {
+ /* Can we completely trust user-land? */
+ strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
+ ifp = ifunit(ps.ifname);
+ if (ifp != NULL)
+ psp->baudrate = ifp->if_baudrate;
+ else
+ error = EINVAL;
+ } else
+ error = EINVAL;
+ break;
+ }
+
+#ifdef ALTQ
+ case DIOCSTARTALTQ: {
+ struct pf_altq *altq;
+
+ PF_RULES_WLOCK();
+ /* enable all altq interfaces on active list */
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+ if (altq->qname[0] == 0 && (altq->local_flags &
+ PFALTQ_FLAG_IF_REMOVED) == 0) {
+ error = pf_enable_altq(altq);
+ if (error != 0)
+ break;
+ }
+ }
+ if (error == 0)
+ V_pf_altq_running = 1;
+ PF_RULES_WUNLOCK();
+ DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
+ break;
+ }
+
+ case DIOCSTOPALTQ: {
+ struct pf_altq *altq;
+
+ PF_RULES_WLOCK();
+ /* disable all altq interfaces on active list */
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
+ if (altq->qname[0] == 0 && (altq->local_flags &
+ PFALTQ_FLAG_IF_REMOVED) == 0) {
+ error = pf_disable_altq(altq);
+ if (error != 0)
+ break;
+ }
+ }
+ if (error == 0)
+ V_pf_altq_running = 0;
+ PF_RULES_WUNLOCK();
+ DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
+ break;
+ }
+
+ case DIOCADDALTQ: {
+ struct pfioc_altq *pa = (struct pfioc_altq *)addr;
+ struct pf_altq *altq, *a;
+ struct ifnet *ifp;
+
+ altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK);
+ bcopy(&pa->altq, altq, sizeof(struct pf_altq));
+ altq->local_flags = 0;
+
+ PF_RULES_WLOCK();
+ if (pa->ticket != V_ticket_altqs_inactive) {
+ PF_RULES_WUNLOCK();
+ free(altq, M_PFALTQ);
+ error = EBUSY;
+ break;
+ }
+
+ /*
+ * if this is for a queue, find the discipline and
+ * copy the necessary fields
+ */
+ if (altq->qname[0] != 0) {
+ if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
+ PF_RULES_WUNLOCK();
+ error = EBUSY;
+ free(altq, M_PFALTQ);
+ break;
+ }
+ altq->altq_disc = NULL;
+ TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) {
+ if (strncmp(a->ifname, altq->ifname,
+ IFNAMSIZ) == 0 && a->qname[0] == 0) {
+ altq->altq_disc = a->altq_disc;
+ break;
+ }
+ }
+ }
+
+ if ((ifp = ifunit(altq->ifname)) == NULL)
+ altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
+ else
+ error = altq_add(altq);
+
+ if (error) {
+ PF_RULES_WUNLOCK();
+ free(altq, M_PFALTQ);
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
+ bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCGETALTQS: {
+ struct pfioc_altq *pa = (struct pfioc_altq *)addr;
+ struct pf_altq *altq;
+
+ PF_RULES_RLOCK();
+ pa->nr = 0;
+ TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
+ pa->nr++;
+ pa->ticket = V_ticket_altqs_active;
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCGETALTQ: {
+ struct pfioc_altq *pa = (struct pfioc_altq *)addr;
+ struct pf_altq *altq;
+ u_int32_t nr;
+
+ PF_RULES_RLOCK();
+ if (pa->ticket != V_ticket_altqs_active) {
+ PF_RULES_RUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ nr = 0;
+ altq = TAILQ_FIRST(V_pf_altqs_active);
+ while ((altq != NULL) && (nr < pa->nr)) {
+ altq = TAILQ_NEXT(altq, entries);
+ nr++;
+ }
+ if (altq == NULL) {
+ PF_RULES_RUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ bcopy(altq, &pa->altq, sizeof(struct pf_altq));
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCCHANGEALTQ:
+ /* CHANGEALTQ not supported yet! */
+ error = ENODEV;
+ break;
+
+ case DIOCGETQSTATS: {
+ struct pfioc_qstats *pq = (struct pfioc_qstats *)addr;
+ struct pf_altq *altq;
+ u_int32_t nr;
+ int nbytes;
+
+ PF_RULES_RLOCK();
+ if (pq->ticket != V_ticket_altqs_active) {
+ PF_RULES_RUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ nbytes = pq->nbytes;
+ nr = 0;
+ altq = TAILQ_FIRST(V_pf_altqs_active);
+ while ((altq != NULL) && (nr < pq->nr)) {
+ altq = TAILQ_NEXT(altq, entries);
+ nr++;
+ }
+ if (altq == NULL) {
+ PF_RULES_RUNLOCK();
+ error = EBUSY;
+ break;
+ }
+
+ if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
+ PF_RULES_RUNLOCK();
+ error = ENXIO;
+ break;
+ }
+ PF_RULES_RUNLOCK();
+ error = altq_getqstats(altq, pq->buf, &nbytes);
+ if (error == 0) {
+ pq->scheduler = altq->scheduler;
+ pq->nbytes = nbytes;
+ }
+ break;
+ }
+#endif /* ALTQ */
+
+ case DIOCBEGINADDRS: {
+ struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr;
+
+ PF_RULES_WLOCK();
+ pf_empty_pool(&V_pf_pabuf);
+ pp->ticket = ++V_ticket_pabuf;
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCADDADDR: {
+ struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr;
+ struct pf_pooladdr *pa;
+ struct pfi_kif *kif = NULL;
+
+#ifndef INET
+ if (pp->af == AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET */
+#ifndef INET6
+ if (pp->af == AF_INET6) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET6 */
+ if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
+ pp->addr.addr.type != PF_ADDR_DYNIFTL &&
+ pp->addr.addr.type != PF_ADDR_TABLE) {
+ error = EINVAL;
+ break;
+ }
+ pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
+ bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr));
+ if (pa->ifname[0])
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+ PF_RULES_WLOCK();
+ if (pp->ticket != V_ticket_pabuf) {
+ PF_RULES_WUNLOCK();
+ if (pa->ifname[0])
+ free(kif, PFI_MTYPE);
+ free(pa, M_PFRULE);
+ error = EBUSY;
+ break;
+ }
+ if (pa->ifname[0]) {
+ pa->kif = pfi_kif_attach(kif, pa->ifname);
+ pfi_kif_ref(pa->kif);
+ } else
+ pa->kif = NULL;
+ if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
+ pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
+ if (pa->ifname[0])
+ pfi_kif_unref(pa->kif);
+ PF_RULES_WUNLOCK();
+ free(pa, M_PFRULE);
+ break;
+ }
+ TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCGETADDRS: {
+ struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr;
+ struct pf_pool *pool;
+ struct pf_pooladdr *pa;
+
+ PF_RULES_RLOCK();
+ pp->nr = 0;
+ pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+ pp->r_num, 0, 1, 0);
+ if (pool == NULL) {
+ PF_RULES_RUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ TAILQ_FOREACH(pa, &pool->list, entries)
+ pp->nr++;
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCGETADDR: {
+ struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr;
+ struct pf_pool *pool;
+ struct pf_pooladdr *pa;
+ u_int32_t nr = 0;
+
+ PF_RULES_RLOCK();
+ pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
+ pp->r_num, 0, 1, 1);
+ if (pool == NULL) {
+ PF_RULES_RUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ pa = TAILQ_FIRST(&pool->list);
+ while ((pa != NULL) && (nr < pp->nr)) {
+ pa = TAILQ_NEXT(pa, entries);
+ nr++;
+ }
+ if (pa == NULL) {
+ PF_RULES_RUNLOCK();
+ error = EBUSY;
+ break;
+ }
+ bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr));
+ pf_addr_copyout(&pp->addr.addr);
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCCHANGEADDR: {
+ struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr;
+ struct pf_pool *pool;
+ struct pf_pooladdr *oldpa = NULL, *newpa = NULL;
+ struct pf_ruleset *ruleset;
+ struct pfi_kif *kif = NULL;
+
+ if (pca->action < PF_CHANGE_ADD_HEAD ||
+ pca->action > PF_CHANGE_REMOVE) {
+ error = EINVAL;
+ break;
+ }
+ if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
+ pca->addr.addr.type != PF_ADDR_DYNIFTL &&
+ pca->addr.addr.type != PF_ADDR_TABLE) {
+ error = EINVAL;
+ break;
+ }
+
+ if (pca->action != PF_CHANGE_REMOVE) {
+#ifndef INET
+ if (pca->af == AF_INET) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET */
+#ifndef INET6
+ if (pca->af == AF_INET6) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+#endif /* INET6 */
+ newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
+ bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
+ if (newpa->ifname[0])
+ kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+ newpa->kif = NULL;
+ }
+
+#define ERROUT(x) { error = (x); goto DIOCCHANGEADDR_error; }
+ PF_RULES_WLOCK();
+ ruleset = pf_find_ruleset(pca->anchor);
+ if (ruleset == NULL)
+ ERROUT(EBUSY);
+
+ pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action,
+ pca->r_num, pca->r_last, 1, 1);
+ if (pool == NULL)
+ ERROUT(EBUSY);
+
+ if (pca->action != PF_CHANGE_REMOVE) {
+ if (newpa->ifname[0]) {
+ newpa->kif = pfi_kif_attach(kif, newpa->ifname);
+ pfi_kif_ref(newpa->kif);
+ kif = NULL;
+ }
+
+ switch (newpa->addr.type) {
+ case PF_ADDR_DYNIFTL:
+ error = pfi_dynaddr_setup(&newpa->addr,
+ pca->af);
+ break;
+ case PF_ADDR_TABLE:
+ newpa->addr.p.tbl = pfr_attach_table(ruleset,
+ newpa->addr.v.tblname);
+ if (newpa->addr.p.tbl == NULL)
+ error = ENOMEM;
+ break;
+ }
+ if (error)
+ goto DIOCCHANGEADDR_error;
+ }
+
+ switch (pca->action) {
+ case PF_CHANGE_ADD_HEAD:
+ oldpa = TAILQ_FIRST(&pool->list);
+ break;
+ case PF_CHANGE_ADD_TAIL:
+ oldpa = TAILQ_LAST(&pool->list, pf_palist);
+ break;
+ default:
+ oldpa = TAILQ_FIRST(&pool->list);
+ for (int i = 0; oldpa && i < pca->nr; i++)
+ oldpa = TAILQ_NEXT(oldpa, entries);
+
+ if (oldpa == NULL)
+ ERROUT(EINVAL);
+ }
+
+ if (pca->action == PF_CHANGE_REMOVE) {
+ TAILQ_REMOVE(&pool->list, oldpa, entries);
+ switch (oldpa->addr.type) {
+ case PF_ADDR_DYNIFTL:
+ pfi_dynaddr_remove(oldpa->addr.p.dyn);
+ break;
+ case PF_ADDR_TABLE:
+ pfr_detach_table(oldpa->addr.p.tbl);
+ break;
+ }
+ if (oldpa->kif)
+ pfi_kif_unref(oldpa->kif);
+ free(oldpa, M_PFRULE);
+ } else {
+ if (oldpa == NULL)
+ TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
+ else if (pca->action == PF_CHANGE_ADD_HEAD ||
+ pca->action == PF_CHANGE_ADD_BEFORE)
+ TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
+ else
+ TAILQ_INSERT_AFTER(&pool->list, oldpa,
+ newpa, entries);
+ }
+
+ pool->cur = TAILQ_FIRST(&pool->list);
+ PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
+ PF_RULES_WUNLOCK();
+ break;
+
+#undef ERROUT
+DIOCCHANGEADDR_error:
+ if (newpa->kif)
+ pfi_kif_unref(newpa->kif);
+ PF_RULES_WUNLOCK();
+ if (newpa != NULL)
+ free(newpa, M_PFRULE);
+ if (kif != NULL)
+ free(kif, PFI_MTYPE);
+ break;
+ }
+
+ case DIOCGETRULESETS: {
+ struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr;
+ struct pf_ruleset *ruleset;
+ struct pf_anchor *anchor;
+
+ PF_RULES_RLOCK();
+ pr->path[sizeof(pr->path) - 1] = 0;
+ if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+ PF_RULES_RUNLOCK();
+ error = ENOENT;
+ break;
+ }
+ pr->nr = 0;
+ if (ruleset->anchor == NULL) {
+ /* XXX kludge for pf_main_ruleset */
+ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+ if (anchor->parent == NULL)
+ pr->nr++;
+ } else {
+ RB_FOREACH(anchor, pf_anchor_node,
+ &ruleset->anchor->children)
+ pr->nr++;
+ }
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCGETRULESET: {
+ struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr;
+ struct pf_ruleset *ruleset;
+ struct pf_anchor *anchor;
+ u_int32_t nr = 0;
+
+ PF_RULES_RLOCK();
+ pr->path[sizeof(pr->path) - 1] = 0;
+ if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
+ PF_RULES_RUNLOCK();
+ error = ENOENT;
+ break;
+ }
+ pr->name[0] = 0;
+ if (ruleset->anchor == NULL) {
+ /* XXX kludge for pf_main_ruleset */
+ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors)
+ if (anchor->parent == NULL && nr++ == pr->nr) {
+ strlcpy(pr->name, anchor->name,
+ sizeof(pr->name));
+ break;
+ }
+ } else {
+ RB_FOREACH(anchor, pf_anchor_node,
+ &ruleset->anchor->children)
+ if (nr++ == pr->nr) {
+ strlcpy(pr->name, anchor->name,
+ sizeof(pr->name));
+ break;
+ }
+ }
+ if (!pr->name[0])
+ error = EBUSY;
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCRCLRTABLES: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+
+ if (io->pfrio_esize != 0) {
+ error = ENODEV;
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
+ io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCRADDTABLES: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_table *pfrts;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_table)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_table);
+ pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfrts, totlen);
+ if (error) {
+ free(pfrts, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_add_tables(pfrts, io->pfrio_size,
+ &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ free(pfrts, M_TEMP);
+ break;
+ }
+
+ case DIOCRDELTABLES: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_table *pfrts;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_table)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_table);
+ pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfrts, totlen);
+ if (error) {
+ free(pfrts, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_del_tables(pfrts, io->pfrio_size,
+ &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ free(pfrts, M_TEMP);
+ break;
+ }
+
+ case DIOCRGETTABLES: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_table *pfrts;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_table)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_table);
+ pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+ PF_RULES_RLOCK();
+ error = pfr_get_tables(&io->pfrio_table, pfrts,
+ &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_RUNLOCK();
+ if (error == 0)
+ error = copyout(pfrts, io->pfrio_buffer, totlen);
+ free(pfrts, M_TEMP);
+ break;
+ }
+
+ case DIOCRGETTSTATS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_tstats *pfrtstats;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_tstats);
+ pfrtstats = malloc(totlen, M_TEMP, M_WAITOK);
+ PF_RULES_WLOCK();
+ error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
+ &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ if (error == 0)
+ error = copyout(pfrtstats, io->pfrio_buffer, totlen);
+ free(pfrtstats, M_TEMP);
+ break;
+ }
+
+ case DIOCRCLRTSTATS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_table *pfrts;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_table)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_table);
+ pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfrts, totlen);
+ if (error) {
+ free(pfrts, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_clr_tstats(pfrts, io->pfrio_size,
+ &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ free(pfrts, M_TEMP);
+ break;
+ }
+
+ case DIOCRSETTFLAGS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_table *pfrts;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_table)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_table);
+ pfrts = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfrts, totlen);
+ if (error) {
+ free(pfrts, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_set_tflags(pfrts, io->pfrio_size,
+ io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
+ &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ free(pfrts, M_TEMP);
+ break;
+ }
+
+ case DIOCRCLRADDRS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+
+ if (io->pfrio_esize != 0) {
+ error = ENODEV;
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
+ io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCRADDADDRS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_addr *pfras;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_addr);
+ pfras = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfras, totlen);
+ if (error) {
+ free(pfras, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_add_addrs(&io->pfrio_table, pfras,
+ io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
+ PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+ error = copyout(pfras, io->pfrio_buffer, totlen);
+ free(pfras, M_TEMP);
+ break;
+ }
+
+ case DIOCRDELADDRS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_addr *pfras;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_addr);
+ pfras = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfras, totlen);
+ if (error) {
+ free(pfras, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_del_addrs(&io->pfrio_table, pfras,
+ io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
+ PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+ error = copyout(pfras, io->pfrio_buffer, totlen);
+ free(pfras, M_TEMP);
+ break;
+ }
+
+ case DIOCRSETADDRS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_addr *pfras;
+ size_t totlen, count;
+
+ if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+ error = ENODEV;
+ break;
+ }
+ count = max(io->pfrio_size, io->pfrio_size2);
+ totlen = count * sizeof(struct pfr_addr);
+ pfras = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfras, totlen);
+ if (error) {
+ free(pfras, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_set_addrs(&io->pfrio_table, pfras,
+ io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
+ &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
+ PFR_FLAG_USERIOCTL, 0);
+ PF_RULES_WUNLOCK();
+ if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+ error = copyout(pfras, io->pfrio_buffer, totlen);
+ free(pfras, M_TEMP);
+ break;
+ }
+
+ case DIOCRGETADDRS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_addr *pfras;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_addr);
+ pfras = malloc(totlen, M_TEMP, M_WAITOK);
+ PF_RULES_RLOCK();
+ error = pfr_get_addrs(&io->pfrio_table, pfras,
+ &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_RUNLOCK();
+ if (error == 0)
+ error = copyout(pfras, io->pfrio_buffer, totlen);
+ free(pfras, M_TEMP);
+ break;
+ }
+
+ case DIOCRGETASTATS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_astats *pfrastats;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_astats)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_astats);
+ pfrastats = malloc(totlen, M_TEMP, M_WAITOK);
+ PF_RULES_RLOCK();
+ error = pfr_get_astats(&io->pfrio_table, pfrastats,
+ &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_RUNLOCK();
+ if (error == 0)
+ error = copyout(pfrastats, io->pfrio_buffer, totlen);
+ free(pfrastats, M_TEMP);
+ break;
+ }
+
+ case DIOCRCLRASTATS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_addr *pfras;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_addr);
+ pfras = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfras, totlen);
+ if (error) {
+ free(pfras, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_clr_astats(&io->pfrio_table, pfras,
+ io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
+ PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
+ error = copyout(pfras, io->pfrio_buffer, totlen);
+ free(pfras, M_TEMP);
+ break;
+ }
+
+ case DIOCRTSTADDRS: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_addr *pfras;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_addr);
+ pfras = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfras, totlen);
+ if (error) {
+ free(pfras, M_TEMP);
+ break;
+ }
+ PF_RULES_RLOCK();
+ error = pfr_tst_addrs(&io->pfrio_table, pfras,
+ io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
+ PFR_FLAG_USERIOCTL);
+ PF_RULES_RUNLOCK();
+ if (error == 0)
+ error = copyout(pfras, io->pfrio_buffer, totlen);
+ free(pfras, M_TEMP);
+ break;
+ }
+
+ case DIOCRINADEFINE: {
+ struct pfioc_table *io = (struct pfioc_table *)addr;
+ struct pfr_addr *pfras;
+ size_t totlen;
+
+ if (io->pfrio_esize != sizeof(struct pfr_addr)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = io->pfrio_size * sizeof(struct pfr_addr);
+ pfras = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->pfrio_buffer, pfras, totlen);
+ if (error) {
+ free(pfras, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ error = pfr_ina_define(&io->pfrio_table, pfras,
+ io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
+ io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
+ PF_RULES_WUNLOCK();
+ free(pfras, M_TEMP);
+ break;
+ }
+
+ case DIOCOSFPADD: {
+ struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+ PF_RULES_WLOCK();
+ error = pf_osfp_add(io);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCOSFPGET: {
+ struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
+ PF_RULES_RLOCK();
+ error = pf_osfp_get(io);
+ PF_RULES_RUNLOCK();
+ break;
+ }
+
+ case DIOCXBEGIN: {
+ struct pfioc_trans *io = (struct pfioc_trans *)addr;
+ struct pfioc_trans_e *ioes, *ioe;
+ size_t totlen;
+ int i;
+
+ if (io->esize != sizeof(*ioe)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = sizeof(struct pfioc_trans_e) * io->size;
+ ioes = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->array, ioes, totlen);
+ if (error) {
+ free(ioes, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+ switch (ioe->rs_num) {
+#ifdef ALTQ
+ case PF_RULESET_ALTQ:
+ if (ioe->anchor[0]) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ error = EINVAL;
+ goto fail;
+ }
+ if ((error = pf_begin_altq(&ioe->ticket))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail;
+ }
+ break;
+#endif /* ALTQ */
+ case PF_RULESET_TABLE:
+ {
+ struct pfr_table table;
+
+ bzero(&table, sizeof(table));
+ strlcpy(table.pfrt_anchor, ioe->anchor,
+ sizeof(table.pfrt_anchor));
+ if ((error = pfr_ina_begin(&table,
+ &ioe->ticket, NULL, 0))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail;
+ }
+ break;
+ }
+ default:
+ if ((error = pf_begin_rules(&ioe->ticket,
+ ioe->rs_num, ioe->anchor))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail;
+ }
+ break;
+ }
+ }
+ PF_RULES_WUNLOCK();
+ error = copyout(ioes, io->array, totlen);
+ free(ioes, M_TEMP);
+ break;
+ }
+
+ case DIOCXROLLBACK: {
+ struct pfioc_trans *io = (struct pfioc_trans *)addr;
+ struct pfioc_trans_e *ioe, *ioes;
+ size_t totlen;
+ int i;
+
+ if (io->esize != sizeof(*ioe)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = sizeof(struct pfioc_trans_e) * io->size;
+ ioes = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->array, ioes, totlen);
+ if (error) {
+ free(ioes, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+ switch (ioe->rs_num) {
+#ifdef ALTQ
+ case PF_RULESET_ALTQ:
+ if (ioe->anchor[0]) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ error = EINVAL;
+ goto fail;
+ }
+ if ((error = pf_rollback_altq(ioe->ticket))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail; /* really bad */
+ }
+ break;
+#endif /* ALTQ */
+ case PF_RULESET_TABLE:
+ {
+ struct pfr_table table;
+
+ bzero(&table, sizeof(table));
+ strlcpy(table.pfrt_anchor, ioe->anchor,
+ sizeof(table.pfrt_anchor));
+ if ((error = pfr_ina_rollback(&table,
+ ioe->ticket, NULL, 0))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail; /* really bad */
+ }
+ break;
+ }
+ default:
+ if ((error = pf_rollback_rules(ioe->ticket,
+ ioe->rs_num, ioe->anchor))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail; /* really bad */
+ }
+ break;
+ }
+ }
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ break;
+ }
+
+ case DIOCXCOMMIT: {
+ struct pfioc_trans *io = (struct pfioc_trans *)addr;
+ struct pfioc_trans_e *ioe, *ioes;
+ struct pf_ruleset *rs;
+ size_t totlen;
+ int i;
+
+ if (io->esize != sizeof(*ioe)) {
+ error = ENODEV;
+ break;
+ }
+ totlen = sizeof(struct pfioc_trans_e) * io->size;
+ ioes = malloc(totlen, M_TEMP, M_WAITOK);
+ error = copyin(io->array, ioes, totlen);
+ if (error) {
+ free(ioes, M_TEMP);
+ break;
+ }
+ PF_RULES_WLOCK();
+ /* First makes sure everything will succeed. */
+ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+ switch (ioe->rs_num) {
+#ifdef ALTQ
+ case PF_RULESET_ALTQ:
+ if (ioe->anchor[0]) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ error = EINVAL;
+ goto fail;
+ }
+ if (!V_altqs_inactive_open || ioe->ticket !=
+ V_ticket_altqs_inactive) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ error = EBUSY;
+ goto fail;
+ }
+ break;
+#endif /* ALTQ */
+ case PF_RULESET_TABLE:
+ rs = pf_find_ruleset(ioe->anchor);
+ if (rs == NULL || !rs->topen || ioe->ticket !=
+ rs->tticket) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ error = EBUSY;
+ goto fail;
+ }
+ break;
+ default:
+ if (ioe->rs_num < 0 || ioe->rs_num >=
+ PF_RULESET_MAX) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ error = EINVAL;
+ goto fail;
+ }
+ rs = pf_find_ruleset(ioe->anchor);
+ if (rs == NULL ||
+ !rs->rules[ioe->rs_num].inactive.open ||
+ rs->rules[ioe->rs_num].inactive.ticket !=
+ ioe->ticket) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ error = EBUSY;
+ goto fail;
+ }
+ break;
+ }
+ }
+ /* Now do the commit - no errors should happen here. */
+ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
+ switch (ioe->rs_num) {
+#ifdef ALTQ
+ case PF_RULESET_ALTQ:
+ if ((error = pf_commit_altq(ioe->ticket))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail; /* really bad */
+ }
+ break;
+#endif /* ALTQ */
+ case PF_RULESET_TABLE:
+ {
+ struct pfr_table table;
+
+ bzero(&table, sizeof(table));
+ strlcpy(table.pfrt_anchor, ioe->anchor,
+ sizeof(table.pfrt_anchor));
+ if ((error = pfr_ina_commit(&table,
+ ioe->ticket, NULL, NULL, 0))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail; /* really bad */
+ }
+ break;
+ }
+ default:
+ if ((error = pf_commit_rules(ioe->ticket,
+ ioe->rs_num, ioe->anchor))) {
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ goto fail; /* really bad */
+ }
+ break;
+ }
+ }
+ PF_RULES_WUNLOCK();
+ free(ioes, M_TEMP);
+ break;
+ }
+
+ case DIOCGETSRCNODES: {
+ struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr;
+ struct pf_srchash *sh;
+ struct pf_src_node *n, *p, *pstore;
+ uint32_t i, nr = 0;
+
+ if (psn->psn_len == 0) {
+ for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
+ i++, sh++) {
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH(n, &sh->nodes, entry)
+ nr++;
+ PF_HASHROW_UNLOCK(sh);
+ }
+ psn->psn_len = sizeof(struct pf_src_node) * nr;
+ break;
+ }
+
+ p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK);
+ for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
+ i++, sh++) {
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH(n, &sh->nodes, entry) {
+ int secs = time_uptime, diff;
+
+ if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
+ break;
+
+ bcopy(n, p, sizeof(struct pf_src_node));
+ if (n->rule.ptr != NULL)
+ p->rule.nr = n->rule.ptr->nr;
+ p->creation = secs - p->creation;
+ if (p->expire > secs)
+ p->expire -= secs;
+ else
+ p->expire = 0;
+
+ /* Adjust the connection rate estimate. */
+ diff = secs - n->conn_rate.last;
+ if (diff >= n->conn_rate.seconds)
+ p->conn_rate.count = 0;
+ else
+ p->conn_rate.count -=
+ n->conn_rate.count * diff /
+ n->conn_rate.seconds;
+ p++;
+ nr++;
+ }
+ PF_HASHROW_UNLOCK(sh);
+ }
+ error = copyout(pstore, psn->psn_src_nodes,
+ sizeof(struct pf_src_node) * nr);
+ if (error) {
+ free(pstore, M_TEMP);
+ break;
+ }
+ psn->psn_len = sizeof(struct pf_src_node) * nr;
+ free(pstore, M_TEMP);
+ break;
+ }
+
+ case DIOCCLRSRCNODES: {
+
+ pf_clear_srcnodes(NULL);
+ pf_purge_expired_src_nodes();
+ break;
+ }
+
+ case DIOCKILLSRCNODES:
+ pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
+ break;
+
+ case DIOCSETHOSTID: {
+ u_int32_t *hostid = (u_int32_t *)addr;
+
+ PF_RULES_WLOCK();
+ if (*hostid == 0)
+ V_pf_status.hostid = arc4random();
+ else
+ V_pf_status.hostid = *hostid;
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCOSFPFLUSH:
+ PF_RULES_WLOCK();
+ pf_osfp_flush();
+ PF_RULES_WUNLOCK();
+ break;
+
+ case DIOCIGETIFACES: {
+ struct pfioc_iface *io = (struct pfioc_iface *)addr;
+ struct pfi_kif *ifstore;
+ size_t bufsiz;
+
+ if (io->pfiio_esize != sizeof(struct pfi_kif)) {
+ error = ENODEV;
+ break;
+ }
+
+ bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
+ ifstore = malloc(bufsiz, M_TEMP, M_WAITOK);
+ PF_RULES_RLOCK();
+ pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
+ PF_RULES_RUNLOCK();
+ error = copyout(ifstore, io->pfiio_buffer, bufsiz);
+ free(ifstore, M_TEMP);
+ break;
+ }
+
+ case DIOCSETIFFLAG: {
+ struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+ PF_RULES_WLOCK();
+ error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ case DIOCCLRIFFLAG: {
+ struct pfioc_iface *io = (struct pfioc_iface *)addr;
+
+ PF_RULES_WLOCK();
+ error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
+ PF_RULES_WUNLOCK();
+ break;
+ }
+
+ default:
+ error = ENODEV;
+ break;
+ }
+fail:
+ if (sx_xlocked(&pf_ioctl_lock))
+ sx_xunlock(&pf_ioctl_lock);
+ CURVNET_RESTORE();
+
+ return (error);
+}
+
+void
+pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
+{
+ bzero(sp, sizeof(struct pfsync_state));
+
+ /* copy from state key */
+ sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
+ sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
+ sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
+ sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
+ sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
+ sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
+ sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
+ sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
+ sp->proto = st->key[PF_SK_WIRE]->proto;
+ sp->af = st->key[PF_SK_WIRE]->af;
+
+ /* copy from state */
+ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+ bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
+ sp->creation = htonl(time_uptime - st->creation);
+ sp->expire = pf_state_expires(st);
+ if (sp->expire <= time_uptime)
+ sp->expire = htonl(0);
+ else
+ sp->expire = htonl(sp->expire - time_uptime);
+
+ sp->direction = st->direction;
+ sp->log = st->log;
+ sp->timeout = st->timeout;
+ sp->state_flags = st->state_flags;
+ if (st->src_node)
+ sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
+ if (st->nat_src_node)
+ sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
+
+ sp->id = st->id;
+ sp->creatorid = st->creatorid;
+ pf_state_peer_hton(&st->src, &sp->src);
+ pf_state_peer_hton(&st->dst, &sp->dst);
+
+ if (st->rule.ptr == NULL)
+ sp->rule = htonl(-1);
+ else
+ sp->rule = htonl(st->rule.ptr->nr);
+ if (st->anchor.ptr == NULL)
+ sp->anchor = htonl(-1);
+ else
+ sp->anchor = htonl(st->anchor.ptr->nr);
+ if (st->nat_rule.ptr == NULL)
+ sp->nat_rule = htonl(-1);
+ else
+ sp->nat_rule = htonl(st->nat_rule.ptr->nr);
+
+ pf_state_counter_hton(st->packets[0], sp->packets[0]);
+ pf_state_counter_hton(st->packets[1], sp->packets[1]);
+ pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+ pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
+
+}
+
+static void
+pf_tbladdr_copyout(struct pf_addr_wrap *aw)
+{
+ struct pfr_ktable *kt;
+
+ KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));
+
+ kt = aw->p.tbl;
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+ kt = kt->pfrkt_root;
+ aw->p.tbl = NULL;
+ aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
+ kt->pfrkt_cnt : -1;
+}
+
+/*
+ * XXX - Check for version missmatch!!!
+ */
+static void
+pf_clear_states(void)
+{
+ struct pf_state *s;
+ u_int i;
+
+ for (i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+relock:
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ s->timeout = PFTM_PURGE;
+ /* Don't send out individual delete messages. */
+ s->state_flags |= PFSTATE_NOSYNC;
+ pf_unlink_state(s, PF_ENTER_LOCKED);
+ goto relock;
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+}
+
+static int
+pf_clear_tables(void)
+{
+ struct pfioc_table io;
+ int error;
+
+ bzero(&io, sizeof(io));
+
+ error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
+ io.pfrio_flags);
+
+ return (error);
+}
+
+static void
+pf_clear_srcnodes(struct pf_src_node *n)
+{
+ struct pf_state *s;
+ int i;
+
+ for (i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ if (n == NULL || n == s->src_node)
+ s->src_node = NULL;
+ if (n == NULL || n == s->nat_src_node)
+ s->nat_src_node = NULL;
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+
+ if (n == NULL) {
+ struct pf_srchash *sh;
+
+ for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
+ i++, sh++) {
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH(n, &sh->nodes, entry) {
+ n->expire = 1;
+ n->states = 0;
+ }
+ PF_HASHROW_UNLOCK(sh);
+ }
+ } else {
+ /* XXX: hash slot should already be locked here. */
+ n->expire = 1;
+ n->states = 0;
+ }
+}
+
+static void
+pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
+{
+ struct pf_src_node_list kill;
+
+ LIST_INIT(&kill);
+ for (int i = 0; i <= pf_srchashmask; i++) {
+ struct pf_srchash *sh = &V_pf_srchash[i];
+ struct pf_src_node *sn, *tmp;
+
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
+ if (PF_MATCHA(psnk->psnk_src.neg,
+ &psnk->psnk_src.addr.v.a.addr,
+ &psnk->psnk_src.addr.v.a.mask,
+ &sn->addr, sn->af) &&
+ PF_MATCHA(psnk->psnk_dst.neg,
+ &psnk->psnk_dst.addr.v.a.addr,
+ &psnk->psnk_dst.addr.v.a.mask,
+ &sn->raddr, sn->af)) {
+ pf_unlink_src_node(sn);
+ LIST_INSERT_HEAD(&kill, sn, entry);
+ sn->expire = 1;
+ }
+ PF_HASHROW_UNLOCK(sh);
+ }
+
+ for (int i = 0; i <= pf_hashmask; i++) {
+ struct pf_idhash *ih = &V_pf_idhash[i];
+ struct pf_state *s;
+
+ PF_HASHROW_LOCK(ih);
+ LIST_FOREACH(s, &ih->states, entry) {
+ if (s->src_node && s->src_node->expire == 1)
+ s->src_node = NULL;
+ if (s->nat_src_node && s->nat_src_node->expire == 1)
+ s->nat_src_node = NULL;
+ }
+ PF_HASHROW_UNLOCK(ih);
+ }
+
+ psnk->psnk_killed = pf_free_src_nodes(&kill);
+}
+
+/*
+ * XXX - Check for version missmatch!!!
+ */
+
+/*
+ * Duplicate pfctl -Fa operation to get rid of as much as we can.
+ */
+static int
+shutdown_pf(void)
+{
+ int error = 0;
+ u_int32_t t[5];
+ char nn = '\0';
+
+ do {
+ if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
+ != 0) {
+ DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
+ break;
+ }
+ if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
+ != 0) {
+ DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
+ break; /* XXX: rollback? */
+ }
+ if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
+ != 0) {
+ DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
+ break; /* XXX: rollback? */
+ }
+ if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
+ != 0) {
+ DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
+ break; /* XXX: rollback? */
+ }
+ if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
+ != 0) {
+ DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
+ break; /* XXX: rollback? */
+ }
+
+ /* XXX: these should always succeed here */
+ pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
+ pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
+ pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
+ pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
+ pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
+
+ if ((error = pf_clear_tables()) != 0)
+ break;
+
+#ifdef ALTQ
+ if ((error = pf_begin_altq(&t[0])) != 0) {
+ DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
+ break;
+ }
+ pf_commit_altq(t[0]);
+#endif
+
+ pf_clear_states();
+
+ pf_clear_srcnodes(NULL);
+
+ /* status does not use malloced mem so no need to cleanup */
+ /* fingerprints and interfaces have their own cleanup code */
+
+ /* Free counters last as we updated them during shutdown. */
+ counter_u64_free(V_pf_default_rule.states_cur);
+ counter_u64_free(V_pf_default_rule.states_tot);
+ counter_u64_free(V_pf_default_rule.src_nodes);
+
+ for (int i = 0; i < PFRES_MAX; i++)
+ counter_u64_free(V_pf_status.counters[i]);
+ for (int i = 0; i < LCNT_MAX; i++)
+ counter_u64_free(V_pf_status.lcounters[i]);
+ for (int i = 0; i < FCNT_MAX; i++)
+ counter_u64_free(V_pf_status.fcounters[i]);
+ for (int i = 0; i < SCNT_MAX; i++)
+ counter_u64_free(V_pf_status.scounters[i]);
+ } while(0);
+
+ return (error);
+}
+
+#ifdef INET
+static int
+pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+ struct inpcb *inp)
+{
+ int chk;
+
+ chk = pf_test(PF_IN, ifp, m, inp);
+ if (chk && *m) {
+ m_freem(*m);
+ *m = NULL;
+ }
+
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
+}
+
+static int
+pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+ struct inpcb *inp)
+{
+ int chk;
+
+ chk = pf_test(PF_OUT, ifp, m, inp);
+ if (chk && *m) {
+ m_freem(*m);
+ *m = NULL;
+ }
+
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
+}
+#endif
+
+#ifdef INET6
+static int
+pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+ struct inpcb *inp)
+{
+ int chk;
+
+ /*
+ * In case of loopback traffic IPv6 uses the real interface in
+ * order to support scoped addresses. In order to support stateful
+ * filtering we have change this to lo0 as it is the case in IPv4.
+ */
+ CURVNET_SET(ifp->if_vnet);
+ chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp);
+ CURVNET_RESTORE();
+ if (chk && *m) {
+ m_freem(*m);
+ *m = NULL;
+ }
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
+}
+
+static int
+pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
+ struct inpcb *inp)
+{
+ int chk;
+
+ CURVNET_SET(ifp->if_vnet);
+ chk = pf_test6(PF_OUT, ifp, m, inp);
+ CURVNET_RESTORE();
+ if (chk && *m) {
+ m_freem(*m);
+ *m = NULL;
+ }
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
+}
+#endif /* INET6 */
+
+static int
+hook_pf(void)
+{
+#ifdef INET
+ struct pfil_head *pfh_inet;
+#endif
+#ifdef INET6
+ struct pfil_head *pfh_inet6;
+#endif
+
+ if (V_pf_pfil_hooked)
+ return (0);
+
+#ifdef INET
+ pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+ if (pfh_inet == NULL)
+ return (ESRCH); /* XXX */
+ pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
+ pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+#endif
+#ifdef INET6
+ pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+ if (pfh_inet6 == NULL) {
+#ifdef INET
+ pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+ pfh_inet);
+ pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+ pfh_inet);
+#endif
+ return (ESRCH); /* XXX */
+ }
+ pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
+ pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
+#endif
+
+ V_pf_pfil_hooked = 1;
+ return (0);
+}
+
+static int
+dehook_pf(void)
+{
+#ifdef INET
+ struct pfil_head *pfh_inet;
+#endif
+#ifdef INET6
+ struct pfil_head *pfh_inet6;
+#endif
+
+ if (V_pf_pfil_hooked == 0)
+ return (0);
+
+#ifdef INET
+ pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
+ if (pfh_inet == NULL)
+ return (ESRCH); /* XXX */
+ pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
+ pfh_inet);
+ pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
+ pfh_inet);
+#endif
+#ifdef INET6
+ pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+ if (pfh_inet6 == NULL)
+ return (ESRCH); /* XXX */
+ pfil_remove_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK,
+ pfh_inet6);
+ pfil_remove_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK,
+ pfh_inet6);
+#endif
+
+ V_pf_pfil_hooked = 0;
+ return (0);
+}
+
+static void
+pf_load_vnet(void)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_pf_pfil_hooked = 0;
+ TAILQ_INIT(&V_pf_tags);
+ TAILQ_INIT(&V_pf_qids);
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK();
+
+ pfattach_vnet();
+ V_pf_vnet_active = 1;
+}
+
+static int
+pf_load(void)
+{
+ int error;
+
+ rw_init(&pf_rules_lock, "pf rulesets");
+ sx_init(&pf_ioctl_lock, "pf ioctl");
+
+ pf_mtag_initialize();
+
+ pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
+ if (pf_dev == NULL)
+ return (ENOMEM);
+
+ pf_end_threads = 0;
+ error = kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pf purge");
+ if (error != 0)
+ return (error);
+
+ pfi_initialize();
+
+ return (0);
+}
+
+static void
+pf_unload_vnet(void)
+{
+ int error;
+
+ V_pf_vnet_active = 0;
+ V_pf_status.running = 0;
+ swi_remove(V_pf_swi_cookie);
+ error = dehook_pf();
+ if (error) {
+ /*
+ * Should not happen!
+ * XXX Due to error code ESRCH, kldunload will show
+ * a message like 'No such process'.
+ */
+ printf("%s : pfil unregisteration fail\n", __FUNCTION__);
+ return;
+ }
+
+ pf_unload_vnet_purge();
+
+ PF_RULES_WLOCK();
+ shutdown_pf();
+ PF_RULES_WUNLOCK();
+
+ pf_normalize_cleanup();
+ PF_RULES_WLOCK();
+ pfi_cleanup_vnet();
+ PF_RULES_WUNLOCK();
+ pfr_cleanup();
+ pf_osfp_flush();
+ pf_cleanup();
+ if (IS_DEFAULT_VNET(curvnet))
+ pf_mtag_cleanup();
+}
+
+static int
+pf_unload(void)
+{
+ int error = 0;
+
+ pf_end_threads = 1;
+ while (pf_end_threads < 2) {
+ wakeup_one(pf_purge_thread);
+ rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0);
+ }
+
+ if (pf_dev != NULL)
+ destroy_dev(pf_dev);
+
+ pfi_cleanup();
+
+ rw_destroy(&pf_rules_lock);
+ sx_destroy(&pf_ioctl_lock);
+
+ return (error);
+}
+
+static void
+vnet_pf_init(void *unused __unused)
+{
+
+ pf_load_vnet();
+}
+VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
+ vnet_pf_init, NULL);
+
+static void
+vnet_pf_uninit(const void *unused __unused)
+{
+
+ pf_unload_vnet();
+}
+VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
+ vnet_pf_uninit, NULL);
+
+
+static int
+pf_modevent(module_t mod, int type, void *data)
+{
+ int error = 0;
+
+ switch(type) {
+ case MOD_LOAD:
+ error = pf_load();
+ break;
+ case MOD_QUIESCE:
+ /*
+ * Module should not be unloaded due to race conditions.
+ */
+ error = EBUSY;
+ break;
+ case MOD_UNLOAD:
+ error = pf_unload();
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+static moduledata_t pf_mod = {
+ "pf",
+ pf_modevent,
+ 0
+};
+
+DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
+MODULE_VERSION(pf, PF_MODVER);
diff --git a/freebsd/sys/netpfil/pf/pf_lb.c b/freebsd/sys/netpfil/pf/pf_lb.c
new file mode 100644
index 00000000..033c3879
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_lb.c
@@ -0,0 +1,681 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002 - 2008 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_pf.h>
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+
+#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
+
+static void pf_hash(struct pf_addr *, struct pf_addr *,
+ struct pf_poolhashkey *, sa_family_t);
+static struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
+ int, int, struct pfi_kif *,
+ struct pf_addr *, u_int16_t, struct pf_addr *,
+ uint16_t, int, struct pf_anchor_stackframe *);
+static int pf_get_sport(sa_family_t, uint8_t, struct pf_rule *,
+ struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
+ uint16_t *, uint16_t, uint16_t, struct pf_src_node **);
+
+#define mix(a,b,c) \
+ do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+ } while (0)
+
+/*
+ * hash function based on bridge_hash in if_bridge.c
+ */
+static void
+pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
+ struct pf_poolhashkey *key, sa_family_t af)
+{
+ u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ a += inaddr->addr32[0];
+ b += key->key32[1];
+ mix(a, b, c);
+ hash->addr32[0] = c + key->key32[2];
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ a += inaddr->addr32[0];
+ b += inaddr->addr32[2];
+ mix(a, b, c);
+ hash->addr32[0] = c;
+ a += inaddr->addr32[1];
+ b += inaddr->addr32[3];
+ c += key->key32[1];
+ mix(a, b, c);
+ hash->addr32[1] = c;
+ a += inaddr->addr32[2];
+ b += inaddr->addr32[1];
+ c += key->key32[2];
+ mix(a, b, c);
+ hash->addr32[2] = c;
+ a += inaddr->addr32[3];
+ b += inaddr->addr32[0];
+ c += key->key32[3];
+ mix(a, b, c);
+ hash->addr32[3] = c;
+ break;
+#endif /* INET6 */
+ }
+}
+
+static struct pf_rule *
+pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
+ int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
+ struct pf_addr *daddr, uint16_t dport, int rs_num,
+ struct pf_anchor_stackframe *anchor_stack)
+{
+ struct pf_rule *r, *rm = NULL;
+ struct pf_ruleset *ruleset = NULL;
+ int tag = -1;
+ int rtableid = -1;
+ int asd = 0;
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
+ while (r && rm == NULL) {
+ struct pf_rule_addr *src = NULL, *dst = NULL;
+ struct pf_addr_wrap *xdst = NULL;
+
+ if (r->action == PF_BINAT && direction == PF_IN) {
+ src = &r->dst;
+ if (r->rpool.cur != NULL)
+ xdst = &r->rpool.cur->addr;
+ } else {
+ src = &r->src;
+ dst = &r->dst;
+ }
+
+ r->evaluations++;
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
+ r = r->skip[PF_SKIP_IFP].ptr;
+ else if (r->direction && r->direction != direction)
+ r = r->skip[PF_SKIP_DIR].ptr;
+ else if (r->af && r->af != pd->af)
+ r = r->skip[PF_SKIP_AF].ptr;
+ else if (r->proto && r->proto != pd->proto)
+ r = r->skip[PF_SKIP_PROTO].ptr;
+ else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
+ src->neg, kif, M_GETFIB(m)))
+ r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
+ PF_SKIP_DST_ADDR].ptr;
+ else if (src->port_op && !pf_match_port(src->port_op,
+ src->port[0], src->port[1], sport))
+ r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
+ PF_SKIP_DST_PORT].ptr;
+ else if (dst != NULL &&
+ PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
+ M_GETFIB(m)))
+ r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
+ 0, NULL, M_GETFIB(m)))
+ r = TAILQ_NEXT(r, entries);
+ else if (dst != NULL && dst->port_op &&
+ !pf_match_port(dst->port_op, dst->port[0],
+ dst->port[1], dport))
+ r = r->skip[PF_SKIP_DST_PORT].ptr;
+ else if (r->match_tag && !pf_match_tag(m, r, &tag,
+ pd->pf_mtag ? pd->pf_mtag->tag : 0))
+ r = TAILQ_NEXT(r, entries);
+ else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
+ IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
+ off, pd->hdr.tcp), r->os_fingerprint)))
+ r = TAILQ_NEXT(r, entries);
+ else {
+ if (r->tag)
+ tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
+ if (r->anchor == NULL) {
+ rm = r;
+ } else
+ pf_step_into_anchor(anchor_stack, &asd,
+ &ruleset, rs_num, &r, NULL, NULL);
+ }
+ if (r == NULL)
+ pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
+ rs_num, &r, NULL, NULL);
+ }
+
+ if (tag > 0 && pf_tag_packet(m, pd, tag))
+ return (NULL);
+ if (rtableid >= 0)
+ M_SETFIB(m, rtableid);
+
+ if (rm != NULL && (rm->action == PF_NONAT ||
+ rm->action == PF_NORDR || rm->action == PF_NOBINAT))
+ return (NULL);
+ return (rm);
+}
+
+static int
+pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
+ struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
+ uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
+ uint16_t high, struct pf_src_node **sn)
+{
+ struct pf_state_key_cmp key;
+ struct pf_addr init_addr;
+
+ bzero(&init_addr, sizeof(init_addr));
+ if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+ return (1);
+
+ if (proto == IPPROTO_ICMP) {
+ low = 1;
+ high = 65535;
+ }
+
+ bzero(&key, sizeof(key));
+ key.af = af;
+ key.proto = proto;
+ key.port[0] = dport;
+ PF_ACPY(&key.addr[0], daddr, key.af);
+
+ do {
+ PF_ACPY(&key.addr[1], naddr, key.af);
+
+ /*
+ * port search; start random, step;
+ * similar 2 portloop in in_pcbbind
+ */
+ if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
+ proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
+ /*
+ * XXX bug: icmp states don't use the id on both sides.
+ * (traceroute -I through nat)
+ */
+ key.port[1] = sport;
+ if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
+ *nport = sport;
+ return (0);
+ }
+ } else if (low == high) {
+ key.port[1] = htons(low);
+ if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
+ *nport = htons(low);
+ return (0);
+ }
+ } else {
+ uint16_t tmp, cut;
+
+ if (low > high) {
+ tmp = low;
+ low = high;
+ high = tmp;
+ }
+ /* low < high */
+ cut = arc4random() % (1 + high - low) + low;
+ /* low <= cut <= high */
+ for (tmp = cut; tmp <= high; ++(tmp)) {
+ key.port[1] = htons(tmp);
+ if (pf_find_state_all(&key, PF_IN, NULL) ==
+ NULL) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ }
+ for (tmp = cut - 1; tmp >= low; --(tmp)) {
+ key.port[1] = htons(tmp);
+ if (pf_find_state_all(&key, PF_IN, NULL) ==
+ NULL) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ }
+ }
+
+ switch (r->rpool.opts & PF_POOL_TYPEMASK) {
+ case PF_POOL_RANDOM:
+ case PF_POOL_ROUNDROBIN:
+ if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
+ return (1);
+ break;
+ case PF_POOL_NONE:
+ case PF_POOL_SRCHASH:
+ case PF_POOL_BITMASK:
+ default:
+ return (1);
+ }
+ } while (! PF_AEQ(&init_addr, naddr, af) );
+ return (1); /* none available */
+}
+
+int
+pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
+ struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
+{
+ struct pf_pool *rpool = &r->rpool;
+ struct pf_addr *raddr = NULL, *rmask = NULL;
+
+ /* Try to find a src_node if none was given and this
+ is a sticky-address rule. */
+ if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+ (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
+ *sn = pf_find_src_node(saddr, r, af, 0);
+
+ /* If a src_node was found or explicitly given and it has a non-zero
+ route address, use this address. A zeroed address is found if the
+ src node was created just a moment ago in pf_create_state and it
+ needs to be filled in with routing decision calculated here. */
+ if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
+ PF_ACPY(naddr, &(*sn)->raddr, af);
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("pf_map_addr: src tracking maps ");
+ pf_print_host(saddr, 0, af);
+ printf(" to ");
+ pf_print_host(naddr, 0, af);
+ printf("\n");
+ }
+ return (0);
+ }
+
+ /* Find the route using chosen algorithm. Store the found route
+ in src_node if it was given or found. */
+ if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
+ return (1);
+ if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
+ (rpool->opts & PF_POOL_TYPEMASK) !=
+ PF_POOL_ROUNDROBIN)
+ return (1);
+ raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
+ rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
+ (rpool->opts & PF_POOL_TYPEMASK) !=
+ PF_POOL_ROUNDROBIN)
+ return (1);
+ raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
+ rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
+ break;
+#endif /* INET6 */
+ }
+ } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
+ return (1); /* unsupported */
+ } else {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ }
+
+ switch (rpool->opts & PF_POOL_TYPEMASK) {
+ case PF_POOL_NONE:
+ PF_ACPY(naddr, raddr, af);
+ break;
+ case PF_POOL_BITMASK:
+ PF_POOLMASK(naddr, raddr, rmask, saddr, af);
+ break;
+ case PF_POOL_RANDOM:
+ if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ rpool->counter.addr32[0] = htonl(arc4random());
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (rmask->addr32[3] != 0xffffffff)
+ rpool->counter.addr32[3] =
+ htonl(arc4random());
+ else
+ break;
+ if (rmask->addr32[2] != 0xffffffff)
+ rpool->counter.addr32[2] =
+ htonl(arc4random());
+ else
+ break;
+ if (rmask->addr32[1] != 0xffffffff)
+ rpool->counter.addr32[1] =
+ htonl(arc4random());
+ else
+ break;
+ if (rmask->addr32[0] != 0xffffffff)
+ rpool->counter.addr32[0] =
+ htonl(arc4random());
+ break;
+#endif /* INET6 */
+ }
+ PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+ PF_ACPY(init_addr, naddr, af);
+
+ } else {
+ PF_AINC(&rpool->counter, af);
+ PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+ }
+ break;
+ case PF_POOL_SRCHASH:
+ {
+ unsigned char hash[16];
+
+ pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+ PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+ break;
+ }
+ case PF_POOL_ROUNDROBIN:
+ {
+ struct pf_pooladdr *acur = rpool->cur;
+
+ /*
+ * XXXGL: in the round-robin case we need to store
+ * the round-robin machine state in the rule, thus
+ * forwarding thread needs to modify rule.
+ *
+ * This is done w/o locking, because performance is assumed
+ * more important than round-robin precision.
+ *
+ * In the simpliest case we just update the "rpool->cur"
+ * pointer. However, if pool contains tables or dynamic
+ * addresses, then "tblidx" is also used to store machine
+ * state. Since "tblidx" is int, concurrent access to it can't
+ * lead to inconsistence, only to lost of precision.
+ *
+ * Things get worse, if table contains not hosts, but
+ * prefixes. In this case counter also stores machine state,
+ * and for IPv6 address, counter can't be updated atomically.
+ * Probably, using round-robin on a table containing IPv6
+ * prefixes (or even IPv4) would cause a panic.
+ */
+
+ if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ if (!pfr_pool_get(rpool->cur->addr.p.tbl,
+ &rpool->tblidx, &rpool->counter, af))
+ goto get_addr;
+ } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, af))
+ goto get_addr;
+ } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
+ goto get_addr;
+
+ try_next:
+ if (TAILQ_NEXT(rpool->cur, entries) == NULL)
+ rpool->cur = TAILQ_FIRST(&rpool->list);
+ else
+ rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+ if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ rpool->tblidx = -1;
+ if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ &rpool->tblidx, &rpool->counter, af)) {
+ /* table contains no address of type 'af' */
+ if (rpool->cur != acur)
+ goto try_next;
+ return (1);
+ }
+ } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ rpool->tblidx = -1;
+ if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, af)) {
+ /* table contains no address of type 'af' */
+ if (rpool->cur != acur)
+ goto try_next;
+ return (1);
+ }
+ } else {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ PF_ACPY(&rpool->counter, raddr, af);
+ }
+
+ get_addr:
+ PF_ACPY(naddr, &rpool->counter, af);
+ if (init_addr != NULL && PF_AZERO(init_addr, af))
+ PF_ACPY(init_addr, naddr, af);
+ PF_AINC(&rpool->counter, af);
+ break;
+ }
+ }
+ if (*sn != NULL)
+ PF_ACPY(&(*sn)->raddr, naddr, af);
+
+ if (V_pf_status.debug >= PF_DEBUG_MISC &&
+ (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
+ printf("pf_map_addr: selected address ");
+ pf_print_host(naddr, 0, af);
+ printf("\n");
+ }
+
+ return (0);
+}
+
+struct pf_rule *
+pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
+ struct pfi_kif *kif, struct pf_src_node **sn,
+ struct pf_state_key **skp, struct pf_state_key **nkp,
+ struct pf_addr *saddr, struct pf_addr *daddr,
+ uint16_t sport, uint16_t dport, struct pf_anchor_stackframe *anchor_stack)
+{
+ struct pf_rule *r = NULL;
+ struct pf_addr *naddr;
+ uint16_t *nport;
+
+ PF_RULES_RASSERT();
+ KASSERT(*skp == NULL, ("*skp not NULL"));
+ KASSERT(*nkp == NULL, ("*nkp not NULL"));
+
+ if (direction == PF_OUT) {
+ r = pf_match_translation(pd, m, off, direction, kif, saddr,
+ sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
+ if (r == NULL)
+ r = pf_match_translation(pd, m, off, direction, kif,
+ saddr, sport, daddr, dport, PF_RULESET_NAT,
+ anchor_stack);
+ } else {
+ r = pf_match_translation(pd, m, off, direction, kif, saddr,
+ sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
+ if (r == NULL)
+ r = pf_match_translation(pd, m, off, direction, kif,
+ saddr, sport, daddr, dport, PF_RULESET_BINAT,
+ anchor_stack);
+ }
+
+ if (r == NULL)
+ return (NULL);
+
+ switch (r->action) {
+ case PF_NONAT:
+ case PF_NOBINAT:
+ case PF_NORDR:
+ return (NULL);
+ }
+
+ *skp = pf_state_key_setup(pd, saddr, daddr, sport, dport);
+ if (*skp == NULL)
+ return (NULL);
+ *nkp = pf_state_key_clone(*skp);
+ if (*nkp == NULL) {
+ uma_zfree(V_pf_state_key_z, skp);
+ *skp = NULL;
+ return (NULL);
+ }
+
+ /* XXX We only modify one side for now. */
+ naddr = &(*nkp)->addr[1];
+ nport = &(*nkp)->port[1];
+
+ switch (r->action) {
+ case PF_NAT:
+ if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, daddr,
+ dport, naddr, nport, r->rpool.proxy_port[0],
+ r->rpool.proxy_port[1], sn)) {
+ DPFPRINTF(PF_DEBUG_MISC,
+ ("pf: NAT proxy port allocation (%u-%u) failed\n",
+ r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
+ goto notrans;
+ }
+ break;
+ case PF_BINAT:
+ switch (direction) {
+ case PF_OUT:
+ if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ if (r->rpool.cur->addr.p.dyn->
+ pfid_acnt4 < 1)
+ goto notrans;
+ PF_POOLMASK(naddr,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_addr4,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_mask4, saddr, AF_INET);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (r->rpool.cur->addr.p.dyn->
+ pfid_acnt6 < 1)
+ goto notrans;
+ PF_POOLMASK(naddr,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_addr6,
+ &r->rpool.cur->addr.p.dyn->
+ pfid_mask6, saddr, AF_INET6);
+ break;
+#endif /* INET6 */
+ }
+ } else
+ PF_POOLMASK(naddr,
+ &r->rpool.cur->addr.v.a.addr,
+ &r->rpool.cur->addr.v.a.mask, saddr,
+ pd->af);
+ break;
+ case PF_IN:
+ if (r->src.addr.type == PF_ADDR_DYNIFTL) {
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
+ if (r->src.addr.p.dyn-> pfid_acnt4 < 1)
+ goto notrans;
+ PF_POOLMASK(naddr,
+ &r->src.addr.p.dyn->pfid_addr4,
+ &r->src.addr.p.dyn->pfid_mask4,
+ daddr, AF_INET);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (r->src.addr.p.dyn->pfid_acnt6 < 1)
+ goto notrans;
+ PF_POOLMASK(naddr,
+ &r->src.addr.p.dyn->pfid_addr6,
+ &r->src.addr.p.dyn->pfid_mask6,
+ daddr, AF_INET6);
+ break;
+#endif /* INET6 */
+ }
+ } else
+ PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
+ &r->src.addr.v.a.mask, daddr, pd->af);
+ break;
+ }
+ break;
+ case PF_RDR: {
+ if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
+ goto notrans;
+ if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
+ PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
+ daddr, pd->af);
+
+ if (r->rpool.proxy_port[1]) {
+ uint32_t tmp_nport;
+
+ tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
+ (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
+ 1)) + r->rpool.proxy_port[0];
+
+ /* Wrap around if necessary. */
+ if (tmp_nport > 65535)
+ tmp_nport -= 65535;
+ *nport = htons((uint16_t)tmp_nport);
+ } else if (r->rpool.proxy_port[0])
+ *nport = htons(r->rpool.proxy_port[0]);
+ break;
+ }
+ default:
+ panic("%s: unknown action %u", __func__, r->action);
+ }
+
+ /* Return success only if translation really happened. */
+ if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp)))
+ return (r);
+
+notrans:
+ uma_zfree(V_pf_state_key_z, *nkp);
+ uma_zfree(V_pf_state_key_z, *skp);
+ *skp = *nkp = NULL;
+ *sn = NULL;
+
+ return (NULL);
+}
diff --git a/freebsd/sys/netpfil/pf/pf_mtag.h b/freebsd/sys/netpfil/pf/pf_mtag.h
new file mode 100644
index 00000000..fd8554ae
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_mtag.h
@@ -0,0 +1,64 @@
+/* $FreeBSD$ */
+/*
+ * Copyright (c) 2001 Daniel Hartmeier
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NET_PF_MTAG_H_
+#define _NET_PF_MTAG_H_
+
+#ifdef _KERNEL
+
+#define PF_TAG_GENERATED 0x01
+#define PF_TAG_FRAGCACHE 0x02
+#define PF_TAG_TRANSLATE_LOCALHOST 0x04
+#define PF_PACKET_LOOPED 0x08
+#define PF_FASTFWD_OURS_PRESENT 0x10
+#define PF_REASSEMBLED 0x20
+
+struct pf_mtag {
+ void *hdr; /* saved hdr pos in mbuf, for ECN */
+ u_int32_t qid; /* queue id */
+ u_int32_t qid_hash; /* queue hashid used by WFQ like algos */
+ u_int16_t tag; /* tag id */
+ u_int8_t flags;
+ u_int8_t routed;
+};
+
+static __inline struct pf_mtag *
+pf_find_mtag(struct mbuf *m)
+{
+ struct m_tag *mtag;
+
+ if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
+ return (NULL);
+
+ return ((struct pf_mtag *)(mtag + 1));
+}
+#endif /* _KERNEL */
+#endif /* _NET_PF_MTAG_H_ */
diff --git a/freebsd/sys/netpfil/pf/pf_norm.c b/freebsd/sys/netpfil/pf/pf_norm.c
new file mode 100644
index 00000000..86d2c8eb
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_norm.c
@@ -0,0 +1,1843 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_pf.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+#include <net/if_pflog.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+struct pf_frent {
+ TAILQ_ENTRY(pf_frent) fr_next;
+ struct mbuf *fe_m;
+ uint16_t fe_hdrlen; /* ipv4 header length with ip options
+ ipv6, extension, fragment header */
+ uint16_t fe_extoff; /* last extension header offset or 0 */
+ uint16_t fe_len; /* fragment length */
+ uint16_t fe_off; /* fragment offset */
+ uint16_t fe_mff; /* more fragment flag */
+};
+
+struct pf_fragment_cmp {
+ struct pf_addr frc_src;
+ struct pf_addr frc_dst;
+ uint32_t frc_id;
+ sa_family_t frc_af;
+ uint8_t frc_proto;
+};
+
+struct pf_fragment {
+ struct pf_fragment_cmp fr_key;
+#define fr_src fr_key.frc_src
+#define fr_dst fr_key.frc_dst
+#define fr_id fr_key.frc_id
+#define fr_af fr_key.frc_af
+#define fr_proto fr_key.frc_proto
+
+ RB_ENTRY(pf_fragment) fr_entry;
+ TAILQ_ENTRY(pf_fragment) frag_next;
+ uint32_t fr_timeout;
+ uint16_t fr_maxlen; /* maximum length of single fragment */
+ TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
+};
+
+struct pf_fragment_tag {
+ uint16_t ft_hdrlen; /* header length of reassembled pkt */
+ uint16_t ft_extoff; /* last extension header offset or 0 */
+ uint16_t ft_maxlen; /* maximum fragment payload length */
+ uint32_t ft_id; /* fragment id */
+};
+
+static struct mtx pf_frag_mtx;
+MTX_SYSINIT(pf_frag_mtx, &pf_frag_mtx, "pf fragments", MTX_DEF);
+#define PF_FRAG_LOCK() mtx_lock(&pf_frag_mtx)
+#define PF_FRAG_UNLOCK() mtx_unlock(&pf_frag_mtx)
+#define PF_FRAG_ASSERT() mtx_assert(&pf_frag_mtx, MA_OWNED)
+
+VNET_DEFINE(uma_zone_t, pf_state_scrub_z); /* XXX: shared with pfsync */
+
+static VNET_DEFINE(uma_zone_t, pf_frent_z);
+#define V_pf_frent_z VNET(pf_frent_z)
+static VNET_DEFINE(uma_zone_t, pf_frag_z);
+#define V_pf_frag_z VNET(pf_frag_z)
+
+TAILQ_HEAD(pf_fragqueue, pf_fragment);
+TAILQ_HEAD(pf_cachequeue, pf_fragment);
+static VNET_DEFINE(struct pf_fragqueue, pf_fragqueue);
+#define V_pf_fragqueue VNET(pf_fragqueue)
+RB_HEAD(pf_frag_tree, pf_fragment);
+static VNET_DEFINE(struct pf_frag_tree, pf_frag_tree);
+#define V_pf_frag_tree VNET(pf_frag_tree)
+static int pf_frag_compare(struct pf_fragment *,
+ struct pf_fragment *);
+static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
+
+static void pf_flush_fragments(void);
+static void pf_free_fragment(struct pf_fragment *);
+static void pf_remove_fragment(struct pf_fragment *);
+static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
+ struct tcphdr *, int, sa_family_t);
+static struct pf_frent *pf_create_fragment(u_short *);
+static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
+ struct pf_frag_tree *tree);
+static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
+ struct pf_frent *, u_short *);
+static int pf_isfull_fragment(struct pf_fragment *);
+static struct mbuf *pf_join_fragment(struct pf_fragment *);
+#ifdef INET
+static void pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t);
+static int pf_reassemble(struct mbuf **, struct ip *, int, u_short *);
+#endif /* INET */
+#ifdef INET6
+static int pf_reassemble6(struct mbuf **, struct ip6_hdr *,
+ struct ip6_frag *, uint16_t, uint16_t, u_short *);
+static void pf_scrub_ip6(struct mbuf **, uint8_t);
+#endif /* INET6 */
+
+#define DPFPRINTF(x) do { \
+ if (V_pf_status.debug >= PF_DEBUG_MISC) { \
+ printf("%s: ", __func__); \
+ printf x ; \
+ } \
+} while(0)
+
+#ifdef INET
+static void
+pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key)
+{
+
+ key->frc_src.v4 = ip->ip_src;
+ key->frc_dst.v4 = ip->ip_dst;
+ key->frc_af = AF_INET;
+ key->frc_proto = ip->ip_p;
+ key->frc_id = ip->ip_id;
+}
+#endif /* INET */
+
+void
+pf_normalize_init(void)
+{
+
+ V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ V_pf_state_scrub_z = uma_zcreate("pf state scrubs",
+ sizeof(struct pf_state_scrub), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+
+ V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z;
+ V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
+ uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT);
+ uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached");
+
+ TAILQ_INIT(&V_pf_fragqueue);
+}
+
+void
+pf_normalize_cleanup(void)
+{
+
+ uma_zdestroy(V_pf_state_scrub_z);
+ uma_zdestroy(V_pf_frent_z);
+ uma_zdestroy(V_pf_frag_z);
+}
+
+static int
+pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
+{
+ int diff;
+
+ if ((diff = a->fr_id - b->fr_id) != 0)
+ return (diff);
+ if ((diff = a->fr_proto - b->fr_proto) != 0)
+ return (diff);
+ if ((diff = a->fr_af - b->fr_af) != 0)
+ return (diff);
+ if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
+ return (diff);
+ if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
+ return (diff);
+ return (0);
+}
+
+void
+pf_purge_expired_fragments(void)
+{
+ struct pf_fragment *frag;
+ u_int32_t expire = time_uptime -
+ V_pf_default_rule.timeout[PFTM_FRAG];
+
+ PF_FRAG_LOCK();
+ while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) {
+ if (frag->fr_timeout > expire)
+ break;
+
+ DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+ pf_free_fragment(frag);
+ }
+
+ PF_FRAG_UNLOCK();
+}
+
+/*
+ * Try to flush old fragments to make space for new ones
+ */
+static void
+pf_flush_fragments(void)
+{
+ struct pf_fragment *frag;
+ int goal;
+
+ PF_FRAG_ASSERT();
+
+ goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
+ DPFPRINTF(("trying to free %d frag entriess\n", goal));
+ while (goal < uma_zone_get_cur(V_pf_frent_z)) {
+ frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
+ if (frag)
+ pf_free_fragment(frag);
+ else
+ break;
+ }
+}
+
+/* Frees the fragments and all associated entries */
+static void
+pf_free_fragment(struct pf_fragment *frag)
+{
+ struct pf_frent *frent;
+
+ PF_FRAG_ASSERT();
+
+ /* Free all fragments */
+ for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
+ frent = TAILQ_FIRST(&frag->fr_queue)) {
+ TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+
+ m_freem(frent->fe_m);
+ uma_zfree(V_pf_frent_z, frent);
+ }
+
+ pf_remove_fragment(frag);
+}
+
+static struct pf_fragment *
+pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
+{
+ struct pf_fragment *frag;
+
+ PF_FRAG_ASSERT();
+
+ frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
+ if (frag != NULL) {
+ /* XXX Are we sure we want to update the timeout? */
+ frag->fr_timeout = time_uptime;
+ TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+ TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
+ }
+
+ return (frag);
+}
+
+/* Removes a fragment from the fragment queue and frees the fragment */
+static void
+pf_remove_fragment(struct pf_fragment *frag)
+{
+
+ PF_FRAG_ASSERT();
+
+ RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
+ TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
+ uma_zfree(V_pf_frag_z, frag);
+}
+
+static struct pf_frent *
+pf_create_fragment(u_short *reason)
+{
+ struct pf_frent *frent;
+
+ PF_FRAG_ASSERT();
+
+ frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+ if (frent == NULL) {
+ pf_flush_fragments();
+ frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+ if (frent == NULL) {
+ REASON_SET(reason, PFRES_MEMORY);
+ return (NULL);
+ }
+ }
+
+ return (frent);
+}
+
+static struct pf_fragment *
+pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
+ u_short *reason)
+{
+ struct pf_frent *after, *next, *prev;
+ struct pf_fragment *frag;
+ uint16_t total;
+
+ PF_FRAG_ASSERT();
+
+ /* No empty fragments. */
+ if (frent->fe_len == 0) {
+ DPFPRINTF(("bad fragment: len 0"));
+ goto bad_fragment;
+ }
+
+ /* All fragments are 8 byte aligned. */
+ if (frent->fe_mff && (frent->fe_len & 0x7)) {
+ DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len));
+ goto bad_fragment;
+ }
+
+ /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
+ if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
+ DPFPRINTF(("bad fragment: max packet %d",
+ frent->fe_off + frent->fe_len));
+ goto bad_fragment;
+ }
+
+ DPFPRINTF((key->frc_af == AF_INET ?
+ "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
+ key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len));
+
+ /* Fully buffer all of the fragments in this fragment queue. */
+ frag = pf_find_fragment(key, &V_pf_frag_tree);
+
+ /* Create a new reassembly queue for this packet. */
+ if (frag == NULL) {
+ frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+ if (frag == NULL) {
+ pf_flush_fragments();
+ frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+ if (frag == NULL) {
+ REASON_SET(reason, PFRES_MEMORY);
+ goto drop_fragment;
+ }
+ }
+
+ *(struct pf_fragment_cmp *)frag = *key;
+ frag->fr_timeout = time_uptime;
+ frag->fr_maxlen = frent->fe_len;
+ TAILQ_INIT(&frag->fr_queue);
+
+ RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
+ TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
+
+ /* We do not have a previous fragment. */
+ TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+
+ return (frag);
+ }
+
+ KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
+
+ /* Remember maximum fragment len for refragmentation. */
+ if (frent->fe_len > frag->fr_maxlen)
+ frag->fr_maxlen = frent->fe_len;
+
+ /* Maximum data we have seen already. */
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+
+ /* Non terminal fragments must have more fragments flag. */
+ if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
+ goto bad_fragment;
+
+ /* Check if we saw the last fragment already. */
+ if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
+ if (frent->fe_off + frent->fe_len > total ||
+ (frent->fe_off + frent->fe_len == total && frent->fe_mff))
+ goto bad_fragment;
+ } else {
+ if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
+ goto bad_fragment;
+ }
+
+ /* Find a fragment after the current one. */
+ prev = NULL;
+ TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
+ if (after->fe_off > frent->fe_off)
+ break;
+ prev = after;
+ }
+
+ KASSERT(prev != NULL || after != NULL,
+ ("prev != NULL || after != NULL"));
+
+ if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
+ uint16_t precut;
+
+ precut = prev->fe_off + prev->fe_len - frent->fe_off;
+ if (precut >= frent->fe_len)
+ goto bad_fragment;
+ DPFPRINTF(("overlap -%d", precut));
+ m_adj(frent->fe_m, precut);
+ frent->fe_off += precut;
+ frent->fe_len -= precut;
+ }
+
+ for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
+ after = next) {
+ uint16_t aftercut;
+
+ aftercut = frent->fe_off + frent->fe_len - after->fe_off;
+ DPFPRINTF(("adjust overlap %d", aftercut));
+ if (aftercut < after->fe_len) {
+ m_adj(after->fe_m, aftercut);
+ after->fe_off += aftercut;
+ after->fe_len -= aftercut;
+ break;
+ }
+
+ /* This fragment is completely overlapped, lose it. */
+ next = TAILQ_NEXT(after, fr_next);
+ m_freem(after->fe_m);
+ TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
+ uma_zfree(V_pf_frent_z, after);
+ }
+
+ if (prev == NULL)
+ TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+ else
+ TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+
+ return (frag);
+
+bad_fragment:
+ REASON_SET(reason, PFRES_FRAG);
+drop_fragment:
+ uma_zfree(V_pf_frent_z, frent);
+ return (NULL);
+}
+
+static int
+pf_isfull_fragment(struct pf_fragment *frag)
+{
+ struct pf_frent *frent, *next;
+ uint16_t off, total;
+
+ /* Check if we are completely reassembled */
+ if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
+ return (0);
+
+ /* Maximum data we have seen already */
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+
+ /* Check if we have all the data */
+ off = 0;
+ for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
+ next = TAILQ_NEXT(frent, fr_next);
+
+ off += frent->fe_len;
+ if (off < total && (next == NULL || next->fe_off != off)) {
+ DPFPRINTF(("missing fragment at %d, next %d, total %d",
+ off, next == NULL ? -1 : next->fe_off, total));
+ return (0);
+ }
+ }
+ DPFPRINTF(("%d < %d?", off, total));
+ if (off < total)
+ return (0);
+ KASSERT(off == total, ("off == total"));
+
+ return (1);
+}
+
+static struct mbuf *
+pf_join_fragment(struct pf_fragment *frag)
+{
+ struct mbuf *m, *m2;
+ struct pf_frent *frent, *next;
+
+ frent = TAILQ_FIRST(&frag->fr_queue);
+ next = TAILQ_NEXT(frent, fr_next);
+
+ m = frent->fe_m;
+ m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len);
+ uma_zfree(V_pf_frent_z, frent);
+ for (frent = next; frent != NULL; frent = next) {
+ next = TAILQ_NEXT(frent, fr_next);
+
+ m2 = frent->fe_m;
+ /* Strip off ip header. */
+ m_adj(m2, frent->fe_hdrlen);
+ /* Strip off any trailing bytes. */
+ m_adj(m2, frent->fe_len - m2->m_pkthdr.len);
+
+ uma_zfree(V_pf_frent_z, frent);
+ m_cat(m, m2);
+ }
+
+ /* Remove from fragment queue. */
+ pf_remove_fragment(frag);
+
+ return (m);
+}
+
+#ifdef INET
+static int
+pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
+{
+ struct mbuf *m = *m0;
+ struct pf_frent *frent;
+ struct pf_fragment *frag;
+ struct pf_fragment_cmp key;
+ uint16_t total, hdrlen;
+
+ /* Get an entry for the fragment queue */
+ if ((frent = pf_create_fragment(reason)) == NULL)
+ return (PF_DROP);
+
+ frent->fe_m = m;
+ frent->fe_hdrlen = ip->ip_hl << 2;
+ frent->fe_extoff = 0;
+ frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
+ frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+ frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
+
+ pf_ip2key(ip, dir, &key);
+
+ if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+ return (PF_DROP);
+
+ /* The mbuf is part of the fragment entry, no direct free or access */
+ m = *m0 = NULL;
+
+ if (!pf_isfull_fragment(frag))
+ return (PF_PASS); /* drop because *m0 is NULL, no error */
+
+ /* We have all the data */
+ frent = TAILQ_FIRST(&frag->fr_queue);
+ KASSERT(frent != NULL, ("frent != NULL"));
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+ hdrlen = frent->fe_hdrlen;
+
+ m = *m0 = pf_join_fragment(frag);
+ frag = NULL;
+
+ if (m->m_flags & M_PKTHDR) {
+ int plen = 0;
+ for (m = *m0; m; m = m->m_next)
+ plen += m->m_len;
+ m = *m0;
+ m->m_pkthdr.len = plen;
+ }
+
+ ip = mtod(m, struct ip *);
+ ip->ip_len = htons(hdrlen + total);
+ ip->ip_off &= ~(IP_MF|IP_OFFMASK);
+
+ if (hdrlen + total > IP_MAXPACKET) {
+ DPFPRINTF(("drop: too big: %d", total));
+ ip->ip_len = 0;
+ REASON_SET(reason, PFRES_SHORT);
+ /* PF_DROP requires a valid mbuf *m0 in pf_test() */
+ return (PF_DROP);
+ }
+
+ DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
+ return (PF_PASS);
+}
+#endif /* INET */
+
+#ifdef INET6
+static int
+pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
+ uint16_t hdrlen, uint16_t extoff, u_short *reason)
+{
+ struct mbuf *m = *m0;
+ struct pf_frent *frent;
+ struct pf_fragment *frag;
+ struct pf_fragment_cmp key;
+ struct m_tag *mtag;
+ struct pf_fragment_tag *ftag;
+ int off;
+ uint32_t frag_id;
+ uint16_t total, maxlen;
+ uint8_t proto;
+
+ PF_FRAG_LOCK();
+
+ /* Get an entry for the fragment queue. */
+ if ((frent = pf_create_fragment(reason)) == NULL) {
+ PF_FRAG_UNLOCK();
+ return (PF_DROP);
+ }
+
+ frent->fe_m = m;
+ frent->fe_hdrlen = hdrlen;
+ frent->fe_extoff = extoff;
+ frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
+ frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
+ frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
+
+ key.frc_src.v6 = ip6->ip6_src;
+ key.frc_dst.v6 = ip6->ip6_dst;
+ key.frc_af = AF_INET6;
+ /* Only the first fragment's protocol is relevant. */
+ key.frc_proto = 0;
+ key.frc_id = fraghdr->ip6f_ident;
+
+ if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+ PF_FRAG_UNLOCK();
+ return (PF_DROP);
+ }
+
+ /* The mbuf is part of the fragment entry, no direct free or access. */
+ m = *m0 = NULL;
+
+ if (!pf_isfull_fragment(frag)) {
+ PF_FRAG_UNLOCK();
+ return (PF_PASS); /* Drop because *m0 is NULL, no error. */
+ }
+
+ /* We have all the data. */
+ extoff = frent->fe_extoff;
+ maxlen = frag->fr_maxlen;
+ frag_id = frag->fr_id;
+ frent = TAILQ_FIRST(&frag->fr_queue);
+ KASSERT(frent != NULL, ("frent != NULL"));
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+ hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
+
+ m = *m0 = pf_join_fragment(frag);
+ frag = NULL;
+
+ PF_FRAG_UNLOCK();
+
+ /* Take protocol from first fragment header. */
+ m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off);
+ KASSERT(m, ("%s: short mbuf chain", __func__));
+ proto = *(mtod(m, caddr_t) + off);
+ m = *m0;
+
+ /* Delete frag6 header */
+ if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0)
+ goto fail;
+
+ if (m->m_flags & M_PKTHDR) {
+ int plen = 0;
+ for (m = *m0; m; m = m->m_next)
+ plen += m->m_len;
+ m = *m0;
+ m->m_pkthdr.len = plen;
+ }
+
+ if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag),
+ M_NOWAIT)) == NULL)
+ goto fail;
+ ftag = (struct pf_fragment_tag *)(mtag + 1);
+ ftag->ft_hdrlen = hdrlen;
+ ftag->ft_extoff = extoff;
+ ftag->ft_maxlen = maxlen;
+ ftag->ft_id = frag_id;
+ m_tag_prepend(m, mtag);
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
+ if (extoff) {
+ /* Write protocol into next field of last extension header. */
+ m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
+ &off);
+ KASSERT(m, ("%s: short mbuf chain", __func__));
+ *(mtod(m, char *) + off) = proto;
+ m = *m0;
+ } else
+ ip6->ip6_nxt = proto;
+
+ if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
+ DPFPRINTF(("drop: too big: %d", total));
+ ip6->ip6_plen = 0;
+ REASON_SET(reason, PFRES_SHORT);
+ /* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
+ return (PF_DROP);
+ }
+
+ DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen)));
+ return (PF_PASS);
+
+fail:
+ REASON_SET(reason, PFRES_MEMORY);
+ /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */
+ return (PF_DROP);
+}
+#endif /* INET6 */
+
+#ifdef INET6
+int
+pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag)
+{
+ struct mbuf *m = *m0, *t;
+ struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1);
+ struct pf_pdesc pd;
+ uint32_t frag_id;
+ uint16_t hdrlen, extoff, maxlen;
+ uint8_t proto;
+ int error, action;
+
+ hdrlen = ftag->ft_hdrlen;
+ extoff = ftag->ft_extoff;
+ maxlen = ftag->ft_maxlen;
+ frag_id = ftag->ft_id;
+ m_tag_delete(m, mtag);
+ mtag = NULL;
+ ftag = NULL;
+
+ if (extoff) {
+ int off;
+
+ /* Use protocol from next field of last extension header */
+ m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
+ &off);
+ KASSERT((m != NULL), ("pf_refragment6: short mbuf chain"));
+ proto = *(mtod(m, caddr_t) + off);
+ *(mtod(m, char *) + off) = IPPROTO_FRAGMENT;
+ m = *m0;
+ } else {
+ struct ip6_hdr *hdr;
+
+ hdr = mtod(m, struct ip6_hdr *);
+ proto = hdr->ip6_nxt;
+ hdr->ip6_nxt = IPPROTO_FRAGMENT;
+ }
+
+ /*
+ * Maxlen may be less than 8 if there was only a single
+ * fragment. As it was fragmented before, add a fragment
+ * header also for a single fragment. If total or maxlen
+ * is less than 8, ip6_fragment() will return EMSGSIZE and
+ * we drop the packet.
+ */
+ error = ip6_fragment(ifp, m, hdrlen, proto, maxlen, frag_id);
+ m = (*m0)->m_nextpkt;
+ (*m0)->m_nextpkt = NULL;
+ if (error == 0) {
+ /* The first mbuf contains the unfragmented packet. */
+ m_freem(*m0);
+ *m0 = NULL;
+ action = PF_PASS;
+ } else {
+ /* Drop expects an mbuf to free. */
+ DPFPRINTF(("refragment error %d", error));
+ action = PF_DROP;
+ }
+ for (t = m; m; m = t) {
+ t = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ m->m_flags |= M_SKIP_FIREWALL;
+ memset(&pd, 0, sizeof(pd));
+ pd.pf_mtag = pf_find_mtag(m);
+ if (error == 0)
+ ip6_forward(m, 0);
+ else
+ m_freem(m);
+ }
+
+ return (action);
+}
+#endif /* INET6 */
+
+#ifdef INET
+int
+pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
+ struct pf_pdesc *pd)
+{
+ struct mbuf *m = *m0;
+ struct pf_rule *r;
+ struct ip *h = mtod(m, struct ip *);
+ int mff = (ntohs(h->ip_off) & IP_MF);
+ int hlen = h->ip_hl << 2;
+ u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+ u_int16_t max;
+ int ip_len;
+ int ip_off;
+ int tag = -1;
+ int verdict;
+
+ PF_RULES_RASSERT();
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
+ r = r->skip[PF_SKIP_IFP].ptr;
+ else if (r->direction && r->direction != dir)
+ r = r->skip[PF_SKIP_DIR].ptr;
+ else if (r->af && r->af != AF_INET)
+ r = r->skip[PF_SKIP_AF].ptr;
+ else if (r->proto && r->proto != h->ip_p)
+ r = r->skip[PF_SKIP_PROTO].ptr;
+ else if (PF_MISMATCHAW(&r->src.addr,
+ (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
+ r->src.neg, kif, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+ else if (PF_MISMATCHAW(&r->dst.addr,
+ (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
+ r->dst.neg, NULL, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ else if (r->match_tag && !pf_match_tag(m, r, &tag,
+ pd->pf_mtag ? pd->pf_mtag->tag : 0))
+ r = TAILQ_NEXT(r, entries);
+ else
+ break;
+ }
+
+ if (r == NULL || r->action == PF_NOSCRUB)
+ return (PF_PASS);
+ else {
+ r->packets[dir == PF_OUT]++;
+ r->bytes[dir == PF_OUT] += pd->tot_len;
+ }
+
+ /* Check for illegal packets */
+ if (hlen < (int)sizeof(struct ip)) {
+ REASON_SET(reason, PFRES_NORM);
+ goto drop;
+ }
+
+ if (hlen > ntohs(h->ip_len)) {
+ REASON_SET(reason, PFRES_NORM);
+ goto drop;
+ }
+
+ /* Clear IP_DF if the rule uses the no-df option */
+ if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+ u_int16_t ip_off = h->ip_off;
+
+ h->ip_off &= htons(~IP_DF);
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+ }
+
+ /* We will need other tests here */
+ if (!fragoff && !mff)
+ goto no_fragment;
+
+ /* We're dealing with a fragment now. Don't allow fragments
+ * with IP_DF to enter the cache. If the flag was cleared by
+ * no-df above, fine. Otherwise drop it.
+ */
+ if (h->ip_off & htons(IP_DF)) {
+ DPFPRINTF(("IP_DF\n"));
+ goto bad;
+ }
+
+ ip_len = ntohs(h->ip_len) - hlen;
+ ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
+
+ /* All fragments are 8 byte aligned */
+ if (mff && (ip_len & 0x7)) {
+ DPFPRINTF(("mff and %d\n", ip_len));
+ goto bad;
+ }
+
+ /* Respect maximum length */
+ if (fragoff + ip_len > IP_MAXPACKET) {
+ DPFPRINTF(("max packet %d\n", fragoff + ip_len));
+ goto bad;
+ }
+ max = fragoff + ip_len;
+
+ /* Fully buffer all of the fragments
+ * Might return a completely reassembled mbuf, or NULL */
+ PF_FRAG_LOCK();
+ DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
+ verdict = pf_reassemble(m0, h, dir, reason);
+ PF_FRAG_UNLOCK();
+
+ if (verdict != PF_PASS)
+ return (PF_DROP);
+
+ m = *m0;
+ if (m == NULL)
+ return (PF_DROP);
+
+ h = mtod(m, struct ip *);
+
+ no_fragment:
+ /* At this point, only IP_DF is allowed in ip_off */
+ if (h->ip_off & ~htons(IP_DF)) {
+ u_int16_t ip_off = h->ip_off;
+
+ h->ip_off &= htons(IP_DF);
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+ }
+
+ pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos);
+
+ return (PF_PASS);
+
+ bad:
+ DPFPRINTF(("dropping bad fragment\n"));
+ REASON_SET(reason, PFRES_FRAG);
+ drop:
+ if (r != NULL && r->log)
+ PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd,
+ 1);
+
+ return (PF_DROP);
+}
+#endif
+
+#ifdef INET6
+int
+pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
+ u_short *reason, struct pf_pdesc *pd)
+{
+ struct mbuf *m = *m0;
+ struct pf_rule *r;
+ struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+ int extoff;
+ int off;
+ struct ip6_ext ext;
+ struct ip6_opt opt;
+ struct ip6_opt_jumbo jumbo;
+ struct ip6_frag frag;
+ u_int32_t jumbolen = 0, plen;
+ int optend;
+ int ooff;
+ u_int8_t proto;
+ int terminal;
+
+ PF_RULES_RASSERT();
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
+ r = r->skip[PF_SKIP_IFP].ptr;
+ else if (r->direction && r->direction != dir)
+ r = r->skip[PF_SKIP_DIR].ptr;
+ else if (r->af && r->af != AF_INET6)
+ r = r->skip[PF_SKIP_AF].ptr;
+#if 0 /* header chain! */
+ else if (r->proto && r->proto != h->ip6_nxt)
+ r = r->skip[PF_SKIP_PROTO].ptr;
+#endif
+ else if (PF_MISMATCHAW(&r->src.addr,
+ (struct pf_addr *)&h->ip6_src, AF_INET6,
+ r->src.neg, kif, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+ else if (PF_MISMATCHAW(&r->dst.addr,
+ (struct pf_addr *)&h->ip6_dst, AF_INET6,
+ r->dst.neg, NULL, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ else
+ break;
+ }
+
+ if (r == NULL || r->action == PF_NOSCRUB)
+ return (PF_PASS);
+ else {
+ r->packets[dir == PF_OUT]++;
+ r->bytes[dir == PF_OUT] += pd->tot_len;
+ }
+
+ /* Check for illegal packets */
+ if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
+ goto drop;
+
+ extoff = 0;
+ off = sizeof(struct ip6_hdr);
+ proto = h->ip6_nxt;
+ terminal = 0;
+ do {
+ switch (proto) {
+ case IPPROTO_FRAGMENT:
+ goto fragment;
+ break;
+ case IPPROTO_AH:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+ NULL, AF_INET6))
+ goto shortpkt;
+ extoff = off;
+ if (proto == IPPROTO_AH)
+ off += (ext.ip6e_len + 2) * 4;
+ else
+ off += (ext.ip6e_len + 1) * 8;
+ proto = ext.ip6e_nxt;
+ break;
+ case IPPROTO_HOPOPTS:
+ if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
+ NULL, AF_INET6))
+ goto shortpkt;
+ extoff = off;
+ optend = off + (ext.ip6e_len + 1) * 8;
+ ooff = off + sizeof(ext);
+ do {
+ if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
+ sizeof(opt.ip6o_type), NULL, NULL,
+ AF_INET6))
+ goto shortpkt;
+ if (opt.ip6o_type == IP6OPT_PAD1) {
+ ooff++;
+ continue;
+ }
+ if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
+ NULL, NULL, AF_INET6))
+ goto shortpkt;
+ if (ooff + sizeof(opt) + opt.ip6o_len > optend)
+ goto drop;
+ switch (opt.ip6o_type) {
+ case IP6OPT_JUMBO:
+ if (h->ip6_plen != 0)
+ goto drop;
+ if (!pf_pull_hdr(m, ooff, &jumbo,
+ sizeof(jumbo), NULL, NULL,
+ AF_INET6))
+ goto shortpkt;
+ memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
+ sizeof(jumbolen));
+ jumbolen = ntohl(jumbolen);
+ if (jumbolen <= IPV6_MAXPACKET)
+ goto drop;
+ if (sizeof(struct ip6_hdr) + jumbolen !=
+ m->m_pkthdr.len)
+ goto drop;
+ break;
+ default:
+ break;
+ }
+ ooff += sizeof(opt) + opt.ip6o_len;
+ } while (ooff < optend);
+
+ off = optend;
+ proto = ext.ip6e_nxt;
+ break;
+ default:
+ terminal = 1;
+ break;
+ }
+ } while (!terminal);
+
+ /* jumbo payload option must be present, or plen > 0 */
+ if (ntohs(h->ip6_plen) == 0)
+ plen = jumbolen;
+ else
+ plen = ntohs(h->ip6_plen);
+ if (plen == 0)
+ goto drop;
+ if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+ goto shortpkt;
+
+ pf_scrub_ip6(&m, r->min_ttl);
+
+ return (PF_PASS);
+
+ fragment:
+ /* Jumbo payload packets cannot be fragmented. */
+ plen = ntohs(h->ip6_plen);
+ if (plen == 0 || jumbolen)
+ goto drop;
+ if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+ goto shortpkt;
+
+ if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
+ goto shortpkt;
+
+ /* Offset now points to data portion. */
+ off += sizeof(frag);
+
+ /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */
+ if (pf_reassemble6(m0, h, &frag, off, extoff, reason) != PF_PASS)
+ return (PF_DROP);
+ m = *m0;
+ if (m == NULL)
+ return (PF_DROP);
+
+ pd->flags |= PFDESC_IP_REAS;
+ return (PF_PASS);
+
+ shortpkt:
+ REASON_SET(reason, PFRES_SHORT);
+ if (r != NULL && r->log)
+ PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
+ 1);
+ return (PF_DROP);
+
+ drop:
+ REASON_SET(reason, PFRES_NORM);
+ if (r != NULL && r->log)
+ PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
+ 1);
+ return (PF_DROP);
+}
+#endif /* INET6 */
+
+int
+pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
+ int off, void *h, struct pf_pdesc *pd)
+{
+ struct pf_rule *r, *rm = NULL;
+ struct tcphdr *th = pd->hdr.tcp;
+ int rewrite = 0;
+ u_short reason;
+ u_int8_t flags;
+ sa_family_t af = pd->af;
+
+ PF_RULES_RASSERT();
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+ while (r != NULL) {
+ r->evaluations++;
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
+ r = r->skip[PF_SKIP_IFP].ptr;
+ else if (r->direction && r->direction != dir)
+ r = r->skip[PF_SKIP_DIR].ptr;
+ else if (r->af && r->af != af)
+ r = r->skip[PF_SKIP_AF].ptr;
+ else if (r->proto && r->proto != pd->proto)
+ r = r->skip[PF_SKIP_PROTO].ptr;
+ else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+ r->src.neg, kif, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+ else if (r->src.port_op && !pf_match_port(r->src.port_op,
+ r->src.port[0], r->src.port[1], th->th_sport))
+ r = r->skip[PF_SKIP_SRC_PORT].ptr;
+ else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+ r->dst.neg, NULL, M_GETFIB(m)))
+ r = r->skip[PF_SKIP_DST_ADDR].ptr;
+ else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+ r->dst.port[0], r->dst.port[1], th->th_dport))
+ r = r->skip[PF_SKIP_DST_PORT].ptr;
+ else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
+ pf_osfp_fingerprint(pd, m, off, th),
+ r->os_fingerprint))
+ r = TAILQ_NEXT(r, entries);
+ else {
+ rm = r;
+ break;
+ }
+ }
+
+ if (rm == NULL || rm->action == PF_NOSCRUB)
+ return (PF_PASS);
+ else {
+ r->packets[dir == PF_OUT]++;
+ r->bytes[dir == PF_OUT] += pd->tot_len;
+ }
+
+ if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
+ pd->flags |= PFDESC_TCP_NORM;
+
+ flags = th->th_flags;
+ if (flags & TH_SYN) {
+ /* Illegal packet */
+ if (flags & TH_RST)
+ goto tcp_drop;
+
+ if (flags & TH_FIN)
+ goto tcp_drop;
+ } else {
+ /* Illegal packet */
+ if (!(flags & (TH_ACK|TH_RST)))
+ goto tcp_drop;
+ }
+
+ if (!(flags & TH_ACK)) {
+ /* These flags are only valid if ACK is set */
+ if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
+ goto tcp_drop;
+ }
+
+ /* Check for illegal header length */
+ if (th->th_off < (sizeof(struct tcphdr) >> 2))
+ goto tcp_drop;
+
+ /* If flags changed, or reserved data set, then adjust */
+ if (flags != th->th_flags || th->th_x2 != 0) {
+ u_int16_t ov, nv;
+
+ ov = *(u_int16_t *)(&th->th_ack + 1);
+ th->th_flags = flags;
+ th->th_x2 = 0;
+ nv = *(u_int16_t *)(&th->th_ack + 1);
+
+ th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0);
+ rewrite = 1;
+ }
+
+ /* Remove urgent pointer, if TH_URG is not set */
+ if (!(flags & TH_URG) && th->th_urp) {
+ th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp,
+ 0, 0);
+ th->th_urp = 0;
+ rewrite = 1;
+ }
+
+ /* Process options */
+ if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
+ rewrite = 1;
+
+ /* copy back packet headers if we sanitized */
+ if (rewrite)
+ m_copyback(m, off, sizeof(*th), (caddr_t)th);
+
+ return (PF_PASS);
+
+ tcp_drop:
+ REASON_SET(&reason, PFRES_NORM);
+ if (rm != NULL && r->log)
+ PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd,
+ 1);
+ return (PF_DROP);
+}
+
+int
+pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
+ struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
+{
+ u_int32_t tsval, tsecr;
+ u_int8_t hdr[60];
+ u_int8_t *opt;
+
+ KASSERT((src->scrub == NULL),
+ ("pf_normalize_tcp_init: src->scrub != NULL"));
+
+ src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT);
+ if (src->scrub == NULL)
+ return (1);
+
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET: {
+ struct ip *h = mtod(m, struct ip *);
+ src->scrub->pfss_ttl = h->ip_ttl;
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: {
+ struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+ src->scrub->pfss_ttl = h->ip6_hlim;
+ break;
+ }
+#endif /* INET6 */
+ }
+
+
+ /*
+ * All normalizations below are only begun if we see the start of
+ * the connections. They must all set an enabled bit in pfss_flags
+ */
+ if ((th->th_flags & TH_SYN) == 0)
+ return (0);
+
+
+ if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
+ pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+ /* Diddle with TCP options */
+ int hlen;
+ opt = hdr + sizeof(struct tcphdr);
+ hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+ while (hlen >= TCPOLEN_TIMESTAMP) {
+ switch (*opt) {
+ case TCPOPT_EOL: /* FALLTHROUGH */
+ case TCPOPT_NOP:
+ opt++;
+ hlen--;
+ break;
+ case TCPOPT_TIMESTAMP:
+ if (opt[1] >= TCPOLEN_TIMESTAMP) {
+ src->scrub->pfss_flags |=
+ PFSS_TIMESTAMP;
+ src->scrub->pfss_ts_mod =
+ htonl(arc4random());
+
+ /* note PFSS_PAWS not set yet */
+ memcpy(&tsval, &opt[2],
+ sizeof(u_int32_t));
+ memcpy(&tsecr, &opt[6],
+ sizeof(u_int32_t));
+ src->scrub->pfss_tsval0 = ntohl(tsval);
+ src->scrub->pfss_tsval = ntohl(tsval);
+ src->scrub->pfss_tsecr = ntohl(tsecr);
+ getmicrouptime(&src->scrub->pfss_last);
+ }
+ /* FALLTHROUGH */
+ default:
+ hlen -= MAX(opt[1], 2);
+ opt += MAX(opt[1], 2);
+ break;
+ }
+ }
+ }
+
+ return (0);
+}
+
+void
+pf_normalize_tcp_cleanup(struct pf_state *state)
+{
+ if (state->src.scrub)
+ uma_zfree(V_pf_state_scrub_z, state->src.scrub);
+ if (state->dst.scrub)
+ uma_zfree(V_pf_state_scrub_z, state->dst.scrub);
+
+ /* Someday... flush the TCP segment reassembly descriptors. */
+}
+
+int
+pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
+ u_short *reason, struct tcphdr *th, struct pf_state *state,
+ struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
+{
+ struct timeval uptime;
+ u_int32_t tsval, tsecr;
+ u_int tsval_from_last;
+ u_int8_t hdr[60];
+ u_int8_t *opt;
+ int copyback = 0;
+ int got_ts = 0;
+
+ KASSERT((src->scrub || dst->scrub),
+ ("%s: src->scrub && dst->scrub!", __func__));
+
+ /*
+ * Enforce the minimum TTL seen for this connection. Negate a common
+ * technique to evade an intrusion detection system and confuse
+ * firewall state code.
+ */
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET: {
+ if (src->scrub) {
+ struct ip *h = mtod(m, struct ip *);
+ if (h->ip_ttl > src->scrub->pfss_ttl)
+ src->scrub->pfss_ttl = h->ip_ttl;
+ h->ip_ttl = src->scrub->pfss_ttl;
+ }
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: {
+ if (src->scrub) {
+ struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+ if (h->ip6_hlim > src->scrub->pfss_ttl)
+ src->scrub->pfss_ttl = h->ip6_hlim;
+ h->ip6_hlim = src->scrub->pfss_ttl;
+ }
+ break;
+ }
+#endif /* INET6 */
+ }
+
+ if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
+ ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
+ (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
+ pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
+ /* Diddle with TCP options */
+ int hlen;
+ opt = hdr + sizeof(struct tcphdr);
+ hlen = (th->th_off << 2) - sizeof(struct tcphdr);
+ while (hlen >= TCPOLEN_TIMESTAMP) {
+ switch (*opt) {
+ case TCPOPT_EOL: /* FALLTHROUGH */
+ case TCPOPT_NOP:
+ opt++;
+ hlen--;
+ break;
+ case TCPOPT_TIMESTAMP:
+ /* Modulate the timestamps. Can be used for
+ * NAT detection, OS uptime determination or
+ * reboot detection.
+ */
+
+ if (got_ts) {
+ /* Huh? Multiple timestamps!? */
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ DPFPRINTF(("multiple TS??"));
+ pf_print_state(state);
+ printf("\n");
+ }
+ REASON_SET(reason, PFRES_TS);
+ return (PF_DROP);
+ }
+ if (opt[1] >= TCPOLEN_TIMESTAMP) {
+ memcpy(&tsval, &opt[2],
+ sizeof(u_int32_t));
+ if (tsval && src->scrub &&
+ (src->scrub->pfss_flags &
+ PFSS_TIMESTAMP)) {
+ tsval = ntohl(tsval);
+ pf_change_proto_a(m, &opt[2],
+ &th->th_sum,
+ htonl(tsval +
+ src->scrub->pfss_ts_mod),
+ 0);
+ copyback = 1;
+ }
+
+ /* Modulate TS reply iff valid (!0) */
+ memcpy(&tsecr, &opt[6],
+ sizeof(u_int32_t));
+ if (tsecr && dst->scrub &&
+ (dst->scrub->pfss_flags &
+ PFSS_TIMESTAMP)) {
+ tsecr = ntohl(tsecr)
+ - dst->scrub->pfss_ts_mod;
+ pf_change_proto_a(m, &opt[6],
+ &th->th_sum, htonl(tsecr),
+ 0);
+ copyback = 1;
+ }
+ got_ts = 1;
+ }
+ /* FALLTHROUGH */
+ default:
+ hlen -= MAX(opt[1], 2);
+ opt += MAX(opt[1], 2);
+ break;
+ }
+ }
+ if (copyback) {
+ /* Copyback the options, caller copys back header */
+ *writeback = 1;
+ m_copyback(m, off + sizeof(struct tcphdr),
+ (th->th_off << 2) - sizeof(struct tcphdr), hdr +
+ sizeof(struct tcphdr));
+ }
+ }
+
+
+ /*
+ * Must invalidate PAWS checks on connections idle for too long.
+ * The fastest allowed timestamp clock is 1ms. That turns out to
+ * be about 24 days before it wraps. XXX Right now our lowerbound
+ * TS echo check only works for the first 12 days of a connection
+ * when the TS has exhausted half its 32bit space
+ */
+#define TS_MAX_IDLE (24*24*60*60)
+#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
+
+ getmicrouptime(&uptime);
+ if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
+ (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
+ time_uptime - state->creation > TS_MAX_CONN)) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ DPFPRINTF(("src idled out of PAWS\n"));
+ pf_print_state(state);
+ printf("\n");
+ }
+ src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
+ | PFSS_PAWS_IDLED;
+ }
+ if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
+ uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ DPFPRINTF(("dst idled out of PAWS\n"));
+ pf_print_state(state);
+ printf("\n");
+ }
+ dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
+ | PFSS_PAWS_IDLED;
+ }
+
+ if (got_ts && src->scrub && dst->scrub &&
+ (src->scrub->pfss_flags & PFSS_PAWS) &&
+ (dst->scrub->pfss_flags & PFSS_PAWS)) {
+ /* Validate that the timestamps are "in-window".
+ * RFC1323 describes TCP Timestamp options that allow
+ * measurement of RTT (round trip time) and PAWS
+ * (protection against wrapped sequence numbers). PAWS
+ * gives us a set of rules for rejecting packets on
+ * long fat pipes (packets that were somehow delayed
+ * in transit longer than the time it took to send the
+ * full TCP sequence space of 4Gb). We can use these
+ * rules and infer a few others that will let us treat
+ * the 32bit timestamp and the 32bit echoed timestamp
+ * as sequence numbers to prevent a blind attacker from
+ * inserting packets into a connection.
+ *
+ * RFC1323 tells us:
+ * - The timestamp on this packet must be greater than
+ * or equal to the last value echoed by the other
+ * endpoint. The RFC says those will be discarded
+ * since it is a dup that has already been acked.
+ * This gives us a lowerbound on the timestamp.
+ * timestamp >= other last echoed timestamp
+ * - The timestamp will be less than or equal to
+ * the last timestamp plus the time between the
+ * last packet and now. The RFC defines the max
+ * clock rate as 1ms. We will allow clocks to be
+ * up to 10% fast and will allow a total difference
+ * or 30 seconds due to a route change. And this
+ * gives us an upperbound on the timestamp.
+ * timestamp <= last timestamp + max ticks
+ * We have to be careful here. Windows will send an
+ * initial timestamp of zero and then initialize it
+ * to a random value after the 3whs; presumably to
+ * avoid a DoS by having to call an expensive RNG
+ * during a SYN flood. Proof MS has at least one
+ * good security geek.
+ *
+ * - The TCP timestamp option must also echo the other
+ * endpoints timestamp. The timestamp echoed is the
+ * one carried on the earliest unacknowledged segment
+ * on the left edge of the sequence window. The RFC
+ * states that the host will reject any echoed
+ * timestamps that were larger than any ever sent.
+ * This gives us an upperbound on the TS echo.
+ * tescr <= largest_tsval
+ * - The lowerbound on the TS echo is a little more
+ * tricky to determine. The other endpoint's echoed
+ * values will not decrease. But there may be
+ * network conditions that re-order packets and
+ * cause our view of them to decrease. For now the
+ * only lowerbound we can safely determine is that
+ * the TS echo will never be less than the original
+ * TS. XXX There is probably a better lowerbound.
+ * Remove TS_MAX_CONN with better lowerbound check.
+ * tescr >= other original TS
+ *
+ * It is also important to note that the fastest
+ * timestamp clock of 1ms will wrap its 32bit space in
+ * 24 days. So we just disable TS checking after 24
+ * days of idle time. We actually must use a 12d
+ * connection limit until we can come up with a better
+ * lowerbound to the TS echo check.
+ */
+ struct timeval delta_ts;
+ int ts_fudge;
+
+
+ /*
+ * PFTM_TS_DIFF is how many seconds of leeway to allow
+ * a host's timestamp. This can happen if the previous
+ * packet got delayed in transit for much longer than
+ * this packet.
+ */
+ if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
+ ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF];
+
+ /* Calculate max ticks since the last timestamp */
+#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
+#define TS_MICROSECS 1000000 /* microseconds per second */
+ delta_ts = uptime;
+ timevalsub(&delta_ts, &src->scrub->pfss_last);
+ tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
+ tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
+
+ if ((src->state >= TCPS_ESTABLISHED &&
+ dst->state >= TCPS_ESTABLISHED) &&
+ (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
+ SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
+ (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
+ SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
+ /* Bad RFC1323 implementation or an insertion attack.
+ *
+ * - Solaris 2.6 and 2.7 are known to send another ACK
+ * after the FIN,FIN|ACK,ACK closing that carries
+ * an old timestamp.
+ */
+
+ DPFPRINTF(("Timestamp failed %c%c%c%c\n",
+ SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
+ SEQ_GT(tsval, src->scrub->pfss_tsval +
+ tsval_from_last) ? '1' : ' ',
+ SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
+ SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
+ DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
+ "idle: %jus %lums\n",
+ tsval, tsecr, tsval_from_last,
+ (uintmax_t)delta_ts.tv_sec,
+ delta_ts.tv_usec / 1000));
+ DPFPRINTF((" src->tsval: %u tsecr: %u\n",
+ src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
+ DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u"
+ "\n", dst->scrub->pfss_tsval,
+ dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ pf_print_state(state);
+ pf_print_flags(th->th_flags);
+ printf("\n");
+ }
+ REASON_SET(reason, PFRES_TS);
+ return (PF_DROP);
+ }
+
+ /* XXX I'd really like to require tsecr but it's optional */
+
+ } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
+ ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
+ || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
+ src->scrub && dst->scrub &&
+ (src->scrub->pfss_flags & PFSS_PAWS) &&
+ (dst->scrub->pfss_flags & PFSS_PAWS)) {
+ /* Didn't send a timestamp. Timestamps aren't really useful
+ * when:
+ * - connection opening or closing (often not even sent).
+ * but we must not let an attacker to put a FIN on a
+ * data packet to sneak it through our ESTABLISHED check.
+ * - on a TCP reset. RFC suggests not even looking at TS.
+ * - on an empty ACK. The TS will not be echoed so it will
+ * probably not help keep the RTT calculation in sync and
+ * there isn't as much danger when the sequence numbers
+ * got wrapped. So some stacks don't include TS on empty
+ * ACKs :-(
+ *
+ * To minimize the disruption to mostly RFC1323 conformant
+ * stacks, we will only require timestamps on data packets.
+ *
+ * And what do ya know, we cannot require timestamps on data
+ * packets. There appear to be devices that do legitimate
+ * TCP connection hijacking. There are HTTP devices that allow
+ * a 3whs (with timestamps) and then buffer the HTTP request.
+ * If the intermediate device has the HTTP response cache, it
+ * will spoof the response but not bother timestamping its
+ * packets. So we can look for the presence of a timestamp in
+ * the first data packet and if there, require it in all future
+ * packets.
+ */
+
+ if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
+ /*
+ * Hey! Someone tried to sneak a packet in. Or the
+ * stack changed its RFC1323 behavior?!?!
+ */
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ DPFPRINTF(("Did not receive expected RFC1323 "
+ "timestamp\n"));
+ pf_print_state(state);
+ pf_print_flags(th->th_flags);
+ printf("\n");
+ }
+ REASON_SET(reason, PFRES_TS);
+ return (PF_DROP);
+ }
+ }
+
+
+ /*
+ * We will note if a host sends his data packets with or without
+ * timestamps. And require all data packets to contain a timestamp
+ * if the first does. PAWS implicitly requires that all data packets be
+ * timestamped. But I think there are middle-man devices that hijack
+ * TCP streams immediately after the 3whs and don't timestamp their
+ * packets (seen in a WWW accelerator or cache).
+ */
+ if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
+ (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
+ if (got_ts)
+ src->scrub->pfss_flags |= PFSS_DATA_TS;
+ else {
+ src->scrub->pfss_flags |= PFSS_DATA_NOTS;
+ if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
+ (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
+ /* Don't warn if other host rejected RFC1323 */
+ DPFPRINTF(("Broken RFC1323 stack did not "
+ "timestamp data packet. Disabled PAWS "
+ "security.\n"));
+ pf_print_state(state);
+ pf_print_flags(th->th_flags);
+ printf("\n");
+ }
+ }
+ }
+
+
+ /*
+ * Update PAWS values
+ */
+ if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
+ (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
+ getmicrouptime(&src->scrub->pfss_last);
+ if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
+ (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+ src->scrub->pfss_tsval = tsval;
+
+ if (tsecr) {
+ if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
+ (src->scrub->pfss_flags & PFSS_PAWS) == 0)
+ src->scrub->pfss_tsecr = tsecr;
+
+ if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
+ (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
+ src->scrub->pfss_tsval0 == 0)) {
+ /* tsval0 MUST be the lowest timestamp */
+ src->scrub->pfss_tsval0 = tsval;
+ }
+
+ /* Only fully initialized after a TS gets echoed */
+ if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
+ src->scrub->pfss_flags |= PFSS_PAWS;
+ }
+ }
+
+ /* I have a dream.... TCP segment reassembly.... */
+ return (0);
+}
+
+static int
+pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
+ int off, sa_family_t af)
+{
+ u_int16_t *mss;
+ int thoff;
+ int opt, cnt, optlen = 0;
+ int rewrite = 0;
+ u_char opts[TCP_MAXOLEN];
+ u_char *optp = opts;
+
+ thoff = th->th_off << 2;
+ cnt = thoff - sizeof(struct tcphdr);
+
+ if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
+ NULL, NULL, af))
+ return (rewrite);
+
+ for (; cnt > 0; cnt -= optlen, optp += optlen) {
+ opt = optp[0];
+ if (opt == TCPOPT_EOL)
+ break;
+ if (opt == TCPOPT_NOP)
+ optlen = 1;
+ else {
+ if (cnt < 2)
+ break;
+ optlen = optp[1];
+ if (optlen < 2 || optlen > cnt)
+ break;
+ }
+ switch (opt) {
+ case TCPOPT_MAXSEG:
+ mss = (u_int16_t *)(optp + 2);
+ if ((ntohs(*mss)) > r->max_mss) {
+ th->th_sum = pf_proto_cksum_fixup(m,
+ th->th_sum, *mss, htons(r->max_mss), 0);
+ *mss = htons(r->max_mss);
+ rewrite = 1;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (rewrite)
+ m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
+
+ return (rewrite);
+}
+
+#ifdef INET
+static void
+pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos)
+{
+ struct mbuf *m = *m0;
+ struct ip *h = mtod(m, struct ip *);
+
+ /* Clear IP_DF if no-df was requested */
+ if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
+ u_int16_t ip_off = h->ip_off;
+
+ h->ip_off &= htons(~IP_DF);
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
+ }
+
+ /* Enforce a minimum ttl, may cause endless packet loops */
+ if (min_ttl && h->ip_ttl < min_ttl) {
+ u_int16_t ip_ttl = h->ip_ttl;
+
+ h->ip_ttl = min_ttl;
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
+ }
+
+ /* Enforce tos */
+ if (flags & PFRULE_SET_TOS) {
+ u_int16_t ov, nv;
+
+ ov = *(u_int16_t *)h;
+ h->ip_tos = tos;
+ nv = *(u_int16_t *)h;
+
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
+ }
+
+ /* random-id, but not for fragments */
+ if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) {
+ uint16_t ip_id = h->ip_id;
+
+ ip_fillid(h);
+ h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
+ }
+}
+#endif /* INET */
+
+#ifdef INET6
+static void
+pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl)
+{
+ struct mbuf *m = *m0;
+ struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+
+ /* Enforce a minimum ttl, may cause endless packet loops */
+ if (min_ttl && h->ip6_hlim < min_ttl)
+ h->ip6_hlim = min_ttl;
+}
+#endif
diff --git a/freebsd/sys/netpfil/pf/pf_osfp.c b/freebsd/sys/netpfil/pf/pf_osfp.c
new file mode 100644
index 00000000..33bef4c8
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_osfp.c
@@ -0,0 +1,530 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * $OpenBSD: pf_osfp.c,v 1.14 2008/06/12 18:17:01 henning Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#include <netinet/ip6.h>
+
+static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints");
+#define DPFPRINTF(format, x...) \
+ if (V_pf_status.debug >= PF_DEBUG_NOISY) \
+ printf(format , ##x)
+
+SLIST_HEAD(pf_osfp_list, pf_os_fingerprint);
+static VNET_DEFINE(struct pf_osfp_list, pf_osfp_list) =
+ SLIST_HEAD_INITIALIZER();
+#define V_pf_osfp_list VNET(pf_osfp_list)
+
+static struct pf_osfp_enlist *pf_osfp_fingerprint_hdr(const struct ip *,
+ const struct ip6_hdr *,
+ const struct tcphdr *);
+static struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *,
+ struct pf_os_fingerprint *, u_int8_t);
+static struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *,
+ struct pf_os_fingerprint *);
+static void pf_osfp_insert(struct pf_osfp_list *,
+ struct pf_os_fingerprint *);
+#ifdef PFDEBUG
+static struct pf_os_fingerprint *pf_osfp_validate(void);
+#endif
+
+/*
+ * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only)
+ * Returns the list of possible OSes.
+ */
+struct pf_osfp_enlist *
+pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off,
+ const struct tcphdr *tcp)
+{
+ struct ip *ip;
+ struct ip6_hdr *ip6;
+ char hdr[60];
+
+ if ((pd->af != PF_INET && pd->af != PF_INET6) ||
+ pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp))
+ return (NULL);
+
+ if (pd->af == PF_INET) {
+ ip = mtod(m, struct ip *);
+ ip6 = (struct ip6_hdr *)NULL;
+ } else {
+ ip = (struct ip *)NULL;
+ ip6 = mtod(m, struct ip6_hdr *);
+ }
+ if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL,
+ pd->af)) return (NULL);
+
+ return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr));
+}
+
+static struct pf_osfp_enlist *
+pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp)
+{
+ struct pf_os_fingerprint fp, *fpresult;
+ int cnt, optlen = 0;
+ const u_int8_t *optp;
+ char srcname[128];
+
+ if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN)
+ return (NULL);
+ if (ip) {
+ if ((ip->ip_off & htons(IP_OFFMASK)) != 0)
+ return (NULL);
+ }
+
+ memset(&fp, 0, sizeof(fp));
+
+ if (ip) {
+ fp.fp_psize = ntohs(ip->ip_len);
+ fp.fp_ttl = ip->ip_ttl;
+ if (ip->ip_off & htons(IP_DF))
+ fp.fp_flags |= PF_OSFP_DF;
+ strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname));
+ }
+#ifdef INET6
+ else if (ip6) {
+ /* jumbo payload? */
+ fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+ fp.fp_ttl = ip6->ip6_hlim;
+ fp.fp_flags |= PF_OSFP_DF;
+ fp.fp_flags |= PF_OSFP_INET6;
+ strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src),
+ sizeof(srcname));
+ }
+#endif
+ else
+ return (NULL);
+ fp.fp_wsize = ntohs(tcp->th_win);
+
+
+ cnt = (tcp->th_off << 2) - sizeof(*tcp);
+ optp = (const u_int8_t *)((const char *)tcp + sizeof(*tcp));
+ for (; cnt > 0; cnt -= optlen, optp += optlen) {
+ if (*optp == TCPOPT_EOL)
+ break;
+
+ fp.fp_optcnt++;
+ if (*optp == TCPOPT_NOP) {
+ fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) |
+ PF_OSFP_TCPOPT_NOP;
+ optlen = 1;
+ } else {
+ if (cnt < 2)
+ return (NULL);
+ optlen = optp[1];
+ if (optlen > cnt || optlen < 2)
+ return (NULL);
+ switch (*optp) {
+ case TCPOPT_MAXSEG:
+ if (optlen >= TCPOLEN_MAXSEG)
+ memcpy(&fp.fp_mss, &optp[2],
+ sizeof(fp.fp_mss));
+ fp.fp_tcpopts = (fp.fp_tcpopts <<
+ PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS;
+ NTOHS(fp.fp_mss);
+ break;
+ case TCPOPT_WINDOW:
+ if (optlen >= TCPOLEN_WINDOW)
+ memcpy(&fp.fp_wscale, &optp[2],
+ sizeof(fp.fp_wscale));
+ NTOHS(fp.fp_wscale);
+ fp.fp_tcpopts = (fp.fp_tcpopts <<
+ PF_OSFP_TCPOPT_BITS) |
+ PF_OSFP_TCPOPT_WSCALE;
+ break;
+ case TCPOPT_SACK_PERMITTED:
+ fp.fp_tcpopts = (fp.fp_tcpopts <<
+ PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK;
+ break;
+ case TCPOPT_TIMESTAMP:
+ if (optlen >= TCPOLEN_TIMESTAMP) {
+ u_int32_t ts;
+ memcpy(&ts, &optp[2], sizeof(ts));
+ if (ts == 0)
+ fp.fp_flags |= PF_OSFP_TS0;
+
+ }
+ fp.fp_tcpopts = (fp.fp_tcpopts <<
+ PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS;
+ break;
+ default:
+ return (NULL);
+ }
+ }
+ optlen = MAX(optlen, 1); /* paranoia */
+ }
+
+ DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) "
+ "(TS=%s,M=%s%d,W=%s%d)\n",
+ srcname, ntohs(tcp->th_sport),
+ fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0,
+ fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt,
+ (fp.fp_flags & PF_OSFP_TS0) ? "0" : "",
+ (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+ (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+ fp.fp_mss,
+ (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+ (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+ fp.fp_wscale);
+
+ if ((fpresult = pf_osfp_find(&V_pf_osfp_list, &fp,
+ PF_OSFP_MAXTTL_OFFSET)))
+ return (&fpresult->fp_oses);
+ return (NULL);
+}
+
+/* Match a fingerprint ID against a list of OSes */
+int
+pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os)
+{
+ struct pf_osfp_entry *entry;
+ int os_class, os_version, os_subtype;
+ int en_class, en_version, en_subtype;
+
+ if (os == PF_OSFP_ANY)
+ return (1);
+ if (list == NULL) {
+ DPFPRINTF("osfp no match against %x\n", os);
+ return (os == PF_OSFP_UNKNOWN);
+ }
+ PF_OSFP_UNPACK(os, os_class, os_version, os_subtype);
+ SLIST_FOREACH(entry, list, fp_entry) {
+ PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype);
+ if ((os_class == PF_OSFP_ANY || en_class == os_class) &&
+ (os_version == PF_OSFP_ANY || en_version == os_version) &&
+ (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) {
+ DPFPRINTF("osfp matched %s %s %s %x==%x\n",
+ entry->fp_class_nm, entry->fp_version_nm,
+ entry->fp_subtype_nm, os, entry->fp_os);
+ return (1);
+ }
+ }
+ DPFPRINTF("fingerprint 0x%x didn't match\n", os);
+ return (0);
+}
+
+/* Flush the fingerprint list */
+void
+pf_osfp_flush(void)
+{
+ struct pf_os_fingerprint *fp;
+ struct pf_osfp_entry *entry;
+
+ while ((fp = SLIST_FIRST(&V_pf_osfp_list))) {
+ SLIST_REMOVE_HEAD(&V_pf_osfp_list, fp_next);
+ while ((entry = SLIST_FIRST(&fp->fp_oses))) {
+ SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry);
+ free(entry, M_PFOSFP);
+ }
+ free(fp, M_PFOSFP);
+ }
+}
+
+
+/* Add a fingerprint */
+int
+pf_osfp_add(struct pf_osfp_ioctl *fpioc)
+{
+ struct pf_os_fingerprint *fp, fpadd;
+ struct pf_osfp_entry *entry;
+
+ PF_RULES_WASSERT();
+
+ memset(&fpadd, 0, sizeof(fpadd));
+ fpadd.fp_tcpopts = fpioc->fp_tcpopts;
+ fpadd.fp_wsize = fpioc->fp_wsize;
+ fpadd.fp_psize = fpioc->fp_psize;
+ fpadd.fp_mss = fpioc->fp_mss;
+ fpadd.fp_flags = fpioc->fp_flags;
+ fpadd.fp_optcnt = fpioc->fp_optcnt;
+ fpadd.fp_wscale = fpioc->fp_wscale;
+ fpadd.fp_ttl = fpioc->fp_ttl;
+
+#if 0 /* XXX RYAN wants to fix logging */
+ DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d "
+ "(TS=%s,M=%s%d,W=%s%d) %x\n",
+ fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm,
+ fpioc->fp_os.fp_subtype_nm,
+ (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" :
+ (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" :
+ (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" :
+ (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "",
+ fpadd.fp_wsize,
+ fpadd.fp_ttl,
+ (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0,
+ (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" :
+ (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "",
+ fpadd.fp_psize,
+ (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt,
+ (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "",
+ (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" :
+ (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "",
+ fpadd.fp_mss,
+ (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" :
+ (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "",
+ fpadd.fp_wscale,
+ fpioc->fp_os.fp_os);
+#endif
+
+ if ((fp = pf_osfp_find_exact(&V_pf_osfp_list, &fpadd))) {
+ SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+ if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os))
+ return (EEXIST);
+ }
+ if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT))
+ == NULL)
+ return (ENOMEM);
+ } else {
+ if ((fp = malloc(sizeof(*fp), M_PFOSFP, M_ZERO | M_NOWAIT))
+ == NULL)
+ return (ENOMEM);
+ fp->fp_tcpopts = fpioc->fp_tcpopts;
+ fp->fp_wsize = fpioc->fp_wsize;
+ fp->fp_psize = fpioc->fp_psize;
+ fp->fp_mss = fpioc->fp_mss;
+ fp->fp_flags = fpioc->fp_flags;
+ fp->fp_optcnt = fpioc->fp_optcnt;
+ fp->fp_wscale = fpioc->fp_wscale;
+ fp->fp_ttl = fpioc->fp_ttl;
+ SLIST_INIT(&fp->fp_oses);
+ if ((entry = malloc(sizeof(*entry), M_PFOSFP, M_NOWAIT))
+ == NULL) {
+ free(fp, M_PFOSFP);
+ return (ENOMEM);
+ }
+ pf_osfp_insert(&V_pf_osfp_list, fp);
+ }
+ memcpy(entry, &fpioc->fp_os, sizeof(*entry));
+
+ /* Make sure the strings are NUL terminated */
+ entry->fp_class_nm[sizeof(entry->fp_class_nm)-1] = '\0';
+ entry->fp_version_nm[sizeof(entry->fp_version_nm)-1] = '\0';
+ entry->fp_subtype_nm[sizeof(entry->fp_subtype_nm)-1] = '\0';
+
+ SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry);
+
+#ifdef PFDEBUG
+ if ((fp = pf_osfp_validate()))
+ printf("Invalid fingerprint list\n");
+#endif /* PFDEBUG */
+ return (0);
+}
+
+
+/* Find a fingerprint in the list */
+static struct pf_os_fingerprint *
+pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find,
+ u_int8_t ttldiff)
+{
+ struct pf_os_fingerprint *f;
+
+#define MATCH_INT(_MOD, _DC, _field) \
+ if ((f->fp_flags & _DC) == 0) { \
+ if ((f->fp_flags & _MOD) == 0) { \
+ if (f->_field != find->_field) \
+ continue; \
+ } else { \
+ if (f->_field == 0 || find->_field % f->_field) \
+ continue; \
+ } \
+ }
+
+ SLIST_FOREACH(f, list, fp_next) {
+ if (f->fp_tcpopts != find->fp_tcpopts ||
+ f->fp_optcnt != find->fp_optcnt ||
+ f->fp_ttl < find->fp_ttl ||
+ f->fp_ttl - find->fp_ttl > ttldiff ||
+ (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) !=
+ (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)))
+ continue;
+
+ MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize)
+ MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss)
+ MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale)
+ if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) {
+ if (f->fp_flags & PF_OSFP_WSIZE_MSS) {
+ if (find->fp_mss == 0)
+ continue;
+
+/*
+ * Some "smart" NAT devices and DSL routers will tweak the MSS size and
+ * will set it to whatever is suitable for the link type.
+ */
+#define SMART_MSS 1460
+ if ((find->fp_wsize % find->fp_mss ||
+ find->fp_wsize / find->fp_mss !=
+ f->fp_wsize) &&
+ (find->fp_wsize % SMART_MSS ||
+ find->fp_wsize / SMART_MSS !=
+ f->fp_wsize))
+ continue;
+ } else if (f->fp_flags & PF_OSFP_WSIZE_MTU) {
+ if (find->fp_mss == 0)
+ continue;
+
+#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr))
+#define SMART_MTU (SMART_MSS + MTUOFF)
+ if ((find->fp_wsize % (find->fp_mss + MTUOFF) ||
+ find->fp_wsize / (find->fp_mss + MTUOFF) !=
+ f->fp_wsize) &&
+ (find->fp_wsize % SMART_MTU ||
+ find->fp_wsize / SMART_MTU !=
+ f->fp_wsize))
+ continue;
+ } else if (f->fp_flags & PF_OSFP_WSIZE_MOD) {
+ if (f->fp_wsize == 0 || find->fp_wsize %
+ f->fp_wsize)
+ continue;
+ } else {
+ if (f->fp_wsize != find->fp_wsize)
+ continue;
+ }
+ }
+ return (f);
+ }
+
+ return (NULL);
+}
+
+/* Find an exact fingerprint in the list */
+static struct pf_os_fingerprint *
+pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find)
+{
+ struct pf_os_fingerprint *f;
+
+ SLIST_FOREACH(f, list, fp_next) {
+ if (f->fp_tcpopts == find->fp_tcpopts &&
+ f->fp_wsize == find->fp_wsize &&
+ f->fp_psize == find->fp_psize &&
+ f->fp_mss == find->fp_mss &&
+ f->fp_flags == find->fp_flags &&
+ f->fp_optcnt == find->fp_optcnt &&
+ f->fp_wscale == find->fp_wscale &&
+ f->fp_ttl == find->fp_ttl)
+ return (f);
+ }
+
+ return (NULL);
+}
+
+/* Insert a fingerprint into the list */
+static void
+pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins)
+{
+ struct pf_os_fingerprint *f, *prev = NULL;
+
+ /* XXX need to go semi tree based. can key on tcp options */
+
+ SLIST_FOREACH(f, list, fp_next)
+ prev = f;
+ if (prev)
+ SLIST_INSERT_AFTER(prev, ins, fp_next);
+ else
+ SLIST_INSERT_HEAD(list, ins, fp_next);
+}
+
+/* Fill a fingerprint by its number (from an ioctl) */
+int
+pf_osfp_get(struct pf_osfp_ioctl *fpioc)
+{
+ struct pf_os_fingerprint *fp;
+ struct pf_osfp_entry *entry;
+ int num = fpioc->fp_getnum;
+ int i = 0;
+
+
+ memset(fpioc, 0, sizeof(*fpioc));
+ SLIST_FOREACH(fp, &V_pf_osfp_list, fp_next) {
+ SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) {
+ if (i++ == num) {
+ fpioc->fp_mss = fp->fp_mss;
+ fpioc->fp_wsize = fp->fp_wsize;
+ fpioc->fp_flags = fp->fp_flags;
+ fpioc->fp_psize = fp->fp_psize;
+ fpioc->fp_ttl = fp->fp_ttl;
+ fpioc->fp_wscale = fp->fp_wscale;
+ fpioc->fp_getnum = num;
+ memcpy(&fpioc->fp_os, entry,
+ sizeof(fpioc->fp_os));
+ return (0);
+ }
+ }
+ }
+
+ return (EBUSY);
+}
+
+
+#ifdef PFDEBUG
+/* Validate that each signature is reachable */
+static struct pf_os_fingerprint *
+pf_osfp_validate(void)
+{
+ struct pf_os_fingerprint *f, *f2, find;
+
+ SLIST_FOREACH(f, &V_pf_osfp_list, fp_next) {
+ memcpy(&find, f, sizeof(find));
+
+ /* We do a few MSS/th_win percolations to make things unique */
+ if (find.fp_mss == 0)
+ find.fp_mss = 128;
+ if (f->fp_flags & PF_OSFP_WSIZE_MSS)
+ find.fp_wsize *= find.fp_mss;
+ else if (f->fp_flags & PF_OSFP_WSIZE_MTU)
+ find.fp_wsize *= (find.fp_mss + 40);
+ else if (f->fp_flags & PF_OSFP_WSIZE_MOD)
+ find.fp_wsize *= 2;
+ if (f != (f2 = pf_osfp_find(&V_pf_osfp_list, &find, 0))) {
+ if (f2)
+ printf("Found \"%s %s %s\" instead of "
+ "\"%s %s %s\"\n",
+ SLIST_FIRST(&f2->fp_oses)->fp_class_nm,
+ SLIST_FIRST(&f2->fp_oses)->fp_version_nm,
+ SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm,
+ SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+ SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+ SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+ else
+ printf("Couldn't find \"%s %s %s\"\n",
+ SLIST_FIRST(&f->fp_oses)->fp_class_nm,
+ SLIST_FIRST(&f->fp_oses)->fp_version_nm,
+ SLIST_FIRST(&f->fp_oses)->fp_subtype_nm);
+ return (f);
+ }
+ }
+ return (NULL);
+}
+#endif /* PFDEBUG */
diff --git a/freebsd/sys/netpfil/pf/pf_ruleset.c b/freebsd/sys/netpfil/pf/pf_ruleset.c
new file mode 100644
index 00000000..e16643aa
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_ruleset.c
@@ -0,0 +1,426 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2001 Daniel Hartmeier
+ * Copyright (c) 2002,2003 Henning Brauer
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Effort sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F30602-01-2-0537.
+ *
+ * $OpenBSD: pf_ruleset.c,v 1.2 2008/12/18 15:31:37 dhill Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/socket.h>
+#ifdef _KERNEL
+# include <sys/systm.h>
+# include <sys/refcount.h>
+#endif /* _KERNEL */
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif /* INET6 */
+
+
+#ifdef _KERNEL
+#define DPFPRINTF(format, x...) \
+ if (V_pf_status.debug >= PF_DEBUG_NOISY) \
+ printf(format , ##x)
+#define rs_malloc(x) malloc(x, M_TEMP, M_NOWAIT|M_ZERO)
+#define rs_free(x) free(x, M_TEMP)
+
+#else
+/* Userland equivalents so we can lend code to pfctl et al. */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define rs_malloc(x) calloc(1, x)
+#define rs_free(x) free(x)
+
+#ifdef PFDEBUG
+#include <sys/stdarg.h>
+#define DPFPRINTF(format, x...) fprintf(stderr, format , ##x)
+#else
+#define DPFPRINTF(format, x...) ((void)0)
+#endif /* PFDEBUG */
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+VNET_DEFINE(struct pf_anchor_global, pf_anchors);
+VNET_DEFINE(struct pf_anchor, pf_main_anchor);
+#else /* ! _KERNEL */
+struct pf_anchor_global pf_anchors;
+struct pf_anchor pf_main_anchor;
+#undef V_pf_anchors
+#define V_pf_anchors pf_anchors
+#undef pf_main_ruleset
+#define pf_main_ruleset pf_main_anchor.ruleset
+#endif /* _KERNEL */
+
+static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
+
+static struct pf_anchor *pf_find_anchor(const char *);
+
+RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare);
+RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare);
+
+static __inline int
+pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b)
+{
+ int c = strcmp(a->path, b->path);
+
+ return (c ? (c < 0 ? -1 : 1) : 0);
+}
+
+int
+pf_get_ruleset_number(u_int8_t action)
+{
+ switch (action) {
+ case PF_SCRUB:
+ case PF_NOSCRUB:
+ return (PF_RULESET_SCRUB);
+ break;
+ case PF_PASS:
+ case PF_DROP:
+ return (PF_RULESET_FILTER);
+ break;
+ case PF_NAT:
+ case PF_NONAT:
+ return (PF_RULESET_NAT);
+ break;
+ case PF_BINAT:
+ case PF_NOBINAT:
+ return (PF_RULESET_BINAT);
+ break;
+ case PF_RDR:
+ case PF_NORDR:
+ return (PF_RULESET_RDR);
+ break;
+ default:
+ return (PF_RULESET_MAX);
+ break;
+ }
+}
+
+void
+pf_init_ruleset(struct pf_ruleset *ruleset)
+{
+ int i;
+
+ memset(ruleset, 0, sizeof(struct pf_ruleset));
+ for (i = 0; i < PF_RULESET_MAX; i++) {
+ TAILQ_INIT(&ruleset->rules[i].queues[0]);
+ TAILQ_INIT(&ruleset->rules[i].queues[1]);
+ ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0];
+ ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1];
+ }
+}
+
+static struct pf_anchor *
+pf_find_anchor(const char *path)
+{
+ struct pf_anchor *key, *found;
+
+ key = (struct pf_anchor *)rs_malloc(sizeof(*key));
+ if (key == NULL)
+ return (NULL);
+ strlcpy(key->path, path, sizeof(key->path));
+ found = RB_FIND(pf_anchor_global, &V_pf_anchors, key);
+ rs_free(key);
+ return (found);
+}
+
+struct pf_ruleset *
+pf_find_ruleset(const char *path)
+{
+ struct pf_anchor *anchor;
+
+ while (*path == '/')
+ path++;
+ if (!*path)
+ return (&pf_main_ruleset);
+ anchor = pf_find_anchor(path);
+ if (anchor == NULL)
+ return (NULL);
+ else
+ return (&anchor->ruleset);
+}
+
+struct pf_ruleset *
+pf_find_or_create_ruleset(const char *path)
+{
+ char *p, *q, *r;
+ struct pf_ruleset *ruleset;
+ struct pf_anchor *anchor = NULL, *dup, *parent = NULL;
+
+ if (path[0] == 0)
+ return (&pf_main_ruleset);
+ while (*path == '/')
+ path++;
+ ruleset = pf_find_ruleset(path);
+ if (ruleset != NULL)
+ return (ruleset);
+ p = (char *)rs_malloc(MAXPATHLEN);
+ if (p == NULL)
+ return (NULL);
+ strlcpy(p, path, MAXPATHLEN);
+ while (parent == NULL && (q = strrchr(p, '/')) != NULL) {
+ *q = 0;
+ if ((ruleset = pf_find_ruleset(p)) != NULL) {
+ parent = ruleset->anchor;
+ break;
+ }
+ }
+ if (q == NULL)
+ q = p;
+ else
+ q++;
+ strlcpy(p, path, MAXPATHLEN);
+ if (!*q) {
+ rs_free(p);
+ return (NULL);
+ }
+ while ((r = strchr(q, '/')) != NULL || *q) {
+ if (r != NULL)
+ *r = 0;
+ if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE ||
+ (parent != NULL && strlen(parent->path) >=
+ MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) {
+ rs_free(p);
+ return (NULL);
+ }
+ anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor));
+ if (anchor == NULL) {
+ rs_free(p);
+ return (NULL);
+ }
+ RB_INIT(&anchor->children);
+ strlcpy(anchor->name, q, sizeof(anchor->name));
+ if (parent != NULL) {
+ strlcpy(anchor->path, parent->path,
+ sizeof(anchor->path));
+ strlcat(anchor->path, "/", sizeof(anchor->path));
+ }
+ strlcat(anchor->path, anchor->name, sizeof(anchor->path));
+ if ((dup = RB_INSERT(pf_anchor_global, &V_pf_anchors, anchor)) !=
+ NULL) {
+ printf("pf_find_or_create_ruleset: RB_INSERT1 "
+ "'%s' '%s' collides with '%s' '%s'\n",
+ anchor->path, anchor->name, dup->path, dup->name);
+ rs_free(anchor);
+ rs_free(p);
+ return (NULL);
+ }
+ if (parent != NULL) {
+ anchor->parent = parent;
+ if ((dup = RB_INSERT(pf_anchor_node, &parent->children,
+ anchor)) != NULL) {
+ printf("pf_find_or_create_ruleset: "
+ "RB_INSERT2 '%s' '%s' collides with "
+ "'%s' '%s'\n", anchor->path, anchor->name,
+ dup->path, dup->name);
+ RB_REMOVE(pf_anchor_global, &V_pf_anchors,
+ anchor);
+ rs_free(anchor);
+ rs_free(p);
+ return (NULL);
+ }
+ }
+ pf_init_ruleset(&anchor->ruleset);
+ anchor->ruleset.anchor = anchor;
+ parent = anchor;
+ if (r != NULL)
+ q = r + 1;
+ else
+ *q = 0;
+ }
+ rs_free(p);
+ return (&anchor->ruleset);
+}
+
+void
+pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset)
+{
+ struct pf_anchor *parent;
+ int i;
+
+ while (ruleset != NULL) {
+ if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL ||
+ !RB_EMPTY(&ruleset->anchor->children) ||
+ ruleset->anchor->refcnt > 0 || ruleset->tables > 0 ||
+ ruleset->topen)
+ return;
+ for (i = 0; i < PF_RULESET_MAX; ++i)
+ if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) ||
+ !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) ||
+ ruleset->rules[i].inactive.open)
+ return;
+ RB_REMOVE(pf_anchor_global, &V_pf_anchors, ruleset->anchor);
+ if ((parent = ruleset->anchor->parent) != NULL)
+ RB_REMOVE(pf_anchor_node, &parent->children,
+ ruleset->anchor);
+ rs_free(ruleset->anchor);
+ if (parent == NULL)
+ return;
+ ruleset = &parent->ruleset;
+ }
+}
+
+int
+pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s,
+ const char *name)
+{
+ char *p, *path;
+ struct pf_ruleset *ruleset;
+
+ r->anchor = NULL;
+ r->anchor_relative = 0;
+ r->anchor_wildcard = 0;
+ if (!name[0])
+ return (0);
+ path = (char *)rs_malloc(MAXPATHLEN);
+ if (path == NULL)
+ return (1);
+ if (name[0] == '/')
+ strlcpy(path, name + 1, MAXPATHLEN);
+ else {
+ /* relative path */
+ r->anchor_relative = 1;
+ if (s->anchor == NULL || !s->anchor->path[0])
+ path[0] = 0;
+ else
+ strlcpy(path, s->anchor->path, MAXPATHLEN);
+ while (name[0] == '.' && name[1] == '.' && name[2] == '/') {
+ if (!path[0]) {
+ printf("pf_anchor_setup: .. beyond root\n");
+ rs_free(path);
+ return (1);
+ }
+ if ((p = strrchr(path, '/')) != NULL)
+ *p = 0;
+ else
+ path[0] = 0;
+ r->anchor_relative++;
+ name += 3;
+ }
+ if (path[0])
+ strlcat(path, "/", MAXPATHLEN);
+ strlcat(path, name, MAXPATHLEN);
+ }
+ if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) {
+ r->anchor_wildcard = 1;
+ *p = 0;
+ }
+ ruleset = pf_find_or_create_ruleset(path);
+ rs_free(path);
+ if (ruleset == NULL || ruleset->anchor == NULL) {
+ printf("pf_anchor_setup: ruleset\n");
+ return (1);
+ }
+ r->anchor = ruleset->anchor;
+ r->anchor->refcnt++;
+ return (0);
+}
+
+int
+pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r,
+ struct pfioc_rule *pr)
+{
+ pr->anchor_call[0] = 0;
+ if (r->anchor == NULL)
+ return (0);
+ if (!r->anchor_relative) {
+ strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call));
+ strlcat(pr->anchor_call, r->anchor->path,
+ sizeof(pr->anchor_call));
+ } else {
+ char *a, *p;
+ int i;
+
+ a = (char *)rs_malloc(MAXPATHLEN);
+ if (a == NULL)
+ return (1);
+ if (rs->anchor == NULL)
+ a[0] = 0;
+ else
+ strlcpy(a, rs->anchor->path, MAXPATHLEN);
+ for (i = 1; i < r->anchor_relative; ++i) {
+ if ((p = strrchr(a, '/')) == NULL)
+ p = a;
+ *p = 0;
+ strlcat(pr->anchor_call, "../",
+ sizeof(pr->anchor_call));
+ }
+ if (strncmp(a, r->anchor->path, strlen(a))) {
+ printf("pf_anchor_copyout: '%s' '%s'\n", a,
+ r->anchor->path);
+ rs_free(a);
+ return (1);
+ }
+ if (strlen(r->anchor->path) > strlen(a))
+ strlcat(pr->anchor_call, r->anchor->path + (a[0] ?
+ strlen(a) + 1 : 0), sizeof(pr->anchor_call));
+ rs_free(a);
+ }
+ if (r->anchor_wildcard)
+ strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*",
+ sizeof(pr->anchor_call));
+ return (0);
+}
+
+void
+pf_anchor_remove(struct pf_rule *r)
+{
+ if (r->anchor == NULL)
+ return;
+ if (r->anchor->refcnt <= 0) {
+ printf("pf_anchor_remove: broken refcount\n");
+ r->anchor = NULL;
+ return;
+ }
+ if (!--r->anchor->refcnt)
+ pf_remove_if_empty_ruleset(&r->anchor->ruleset);
+ r->anchor = NULL;
+}
diff --git a/freebsd/sys/netpfil/pf/pf_table.c b/freebsd/sys/netpfil/pf/pf_table.c
new file mode 100644
index 00000000..26b6f4e9
--- /dev/null
+++ b/freebsd/sys/netpfil/pf/pf_table.c
@@ -0,0 +1,2195 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2002 Cedric Berger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: pf_table.c,v 1.79 2008/10/08 06:24:50 mcbride Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <vm/uma.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+#include <net/pfvar.h>
+
+#define ACCEPT_FLAGS(flags, oklist) \
+ do { \
+ if ((flags & ~(oklist)) & \
+ PFR_FLAG_ALLMASK) \
+ return (EINVAL); \
+ } while (0)
+
+#define FILLIN_SIN(sin, addr) \
+ do { \
+ (sin).sin_len = sizeof(sin); \
+ (sin).sin_family = AF_INET; \
+ (sin).sin_addr = (addr); \
+ } while (0)
+
+#define FILLIN_SIN6(sin6, addr) \
+ do { \
+ (sin6).sin6_len = sizeof(sin6); \
+ (sin6).sin6_family = AF_INET6; \
+ (sin6).sin6_addr = (addr); \
+ } while (0)
+
+#define SWAP(type, a1, a2) \
+ do { \
+ type tmp = a1; \
+ a1 = a2; \
+ a2 = tmp; \
+ } while (0)
+
+#define SUNION2PF(su, af) (((af)==AF_INET) ? \
+ (struct pf_addr *)&(su)->sin.sin_addr : \
+ (struct pf_addr *)&(su)->sin6.sin6_addr)
+
+#define AF_BITS(af) (((af)==AF_INET)?32:128)
+#define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af))
+#define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
+#define KENTRY_RNF_ROOT(ke) \
+ ((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0)
+
+#define NO_ADDRESSES (-1)
+#define ENQUEUE_UNMARKED_ONLY (1)
+#define INVERT_NEG_FLAG (1)
+
+struct pfr_walktree {
+ enum pfrw_op {
+ PFRW_MARK,
+ PFRW_SWEEP,
+ PFRW_ENQUEUE,
+ PFRW_GET_ADDRS,
+ PFRW_GET_ASTATS,
+ PFRW_POOL_GET,
+ PFRW_DYNADDR_UPDATE
+ } pfrw_op;
+ union {
+ struct pfr_addr *pfrw1_addr;
+ struct pfr_astats *pfrw1_astats;
+ struct pfr_kentryworkq *pfrw1_workq;
+ struct pfr_kentry *pfrw1_kentry;
+ struct pfi_dynaddr *pfrw1_dyn;
+ } pfrw_1;
+ int pfrw_free;
+};
+#define pfrw_addr pfrw_1.pfrw1_addr
+#define pfrw_astats pfrw_1.pfrw1_astats
+#define pfrw_workq pfrw_1.pfrw1_workq
+#define pfrw_kentry pfrw_1.pfrw1_kentry
+#define pfrw_dyn pfrw_1.pfrw1_dyn
+#define pfrw_cnt pfrw_free
+
+#define senderr(e) do { rv = (e); goto _bad; } while (0)
+
+static MALLOC_DEFINE(M_PFTABLE, "pf_table", "pf(4) tables structures");
+static VNET_DEFINE(uma_zone_t, pfr_kentry_z);
+#define V_pfr_kentry_z VNET(pfr_kentry_z)
+static VNET_DEFINE(uma_zone_t, pfr_kcounters_z);
+#define V_pfr_kcounters_z VNET(pfr_kcounters_z)
+
+static struct pf_addr pfr_ffaddr = {
+ .addr32 = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
+};
+
+static void pfr_copyout_addr(struct pfr_addr *,
+ struct pfr_kentry *ke);
+static int pfr_validate_addr(struct pfr_addr *);
+static void pfr_enqueue_addrs(struct pfr_ktable *,
+ struct pfr_kentryworkq *, int *, int);
+static void pfr_mark_addrs(struct pfr_ktable *);
+static struct pfr_kentry
+ *pfr_lookup_addr(struct pfr_ktable *,
+ struct pfr_addr *, int);
+static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *);
+static void pfr_destroy_kentries(struct pfr_kentryworkq *);
+static void pfr_destroy_kentry(struct pfr_kentry *);
+static void pfr_insert_kentries(struct pfr_ktable *,
+ struct pfr_kentryworkq *, long);
+static void pfr_remove_kentries(struct pfr_ktable *,
+ struct pfr_kentryworkq *);
+static void pfr_clstats_kentries(struct pfr_kentryworkq *, long,
+ int);
+static void pfr_reset_feedback(struct pfr_addr *, int);
+static void pfr_prepare_network(union sockaddr_union *, int, int);
+static int pfr_route_kentry(struct pfr_ktable *,
+ struct pfr_kentry *);
+static int pfr_unroute_kentry(struct pfr_ktable *,
+ struct pfr_kentry *);
+static int pfr_walktree(struct radix_node *, void *);
+static int pfr_validate_table(struct pfr_table *, int, int);
+static int pfr_fix_anchor(char *);
+static void pfr_commit_ktable(struct pfr_ktable *, long);
+static void pfr_insert_ktables(struct pfr_ktableworkq *);
+static void pfr_insert_ktable(struct pfr_ktable *);
+static void pfr_setflags_ktables(struct pfr_ktableworkq *);
+static void pfr_setflags_ktable(struct pfr_ktable *, int);
+static void pfr_clstats_ktables(struct pfr_ktableworkq *, long,
+ int);
+static void pfr_clstats_ktable(struct pfr_ktable *, long, int);
+static struct pfr_ktable
+ *pfr_create_ktable(struct pfr_table *, long, int);
+static void pfr_destroy_ktables(struct pfr_ktableworkq *, int);
+static void pfr_destroy_ktable(struct pfr_ktable *, int);
+static int pfr_ktable_compare(struct pfr_ktable *,
+ struct pfr_ktable *);
+static struct pfr_ktable
+ *pfr_lookup_table(struct pfr_table *);
+static void pfr_clean_node_mask(struct pfr_ktable *,
+ struct pfr_kentryworkq *);
+static int pfr_table_count(struct pfr_table *, int);
+static int pfr_skip_table(struct pfr_table *,
+ struct pfr_ktable *, int);
+static struct pfr_kentry
+ *pfr_kentry_byidx(struct pfr_ktable *, int, int);
+
+static RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+static RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
+
+struct pfr_ktablehead pfr_ktables;
+struct pfr_table pfr_nulltable;
+int pfr_ktable_cnt;
+
+void
+pfr_initialize(void)
+{
+
+ V_pfr_kentry_z = uma_zcreate("pf table entries",
+ sizeof(struct pfr_kentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
+ 0);
+ V_pfr_kcounters_z = uma_zcreate("pf table counters",
+ sizeof(struct pfr_kcounters), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ V_pf_limits[PF_LIMIT_TABLE_ENTRIES].zone = V_pfr_kentry_z;
+ V_pf_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
+}
+
+void
+pfr_cleanup(void)
+{
+
+ uma_zdestroy(V_pfr_kentry_z);
+ uma_zdestroy(V_pfr_kcounters_z);
+}
+
+int
+pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
+{
+ struct pfr_ktable *kt;
+ struct pfr_kentryworkq workq;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+ if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+ if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+ return (EPERM);
+ pfr_enqueue_addrs(kt, &workq, ndel, 0);
+
+ if (!(flags & PFR_FLAG_DUMMY)) {
+ pfr_remove_kentries(kt, &workq);
+ KASSERT(kt->pfrkt_cnt == 0, ("%s: non-null pfrkt_cnt", __func__));
+ }
+ return (0);
+}
+
+int
+pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+ int *nadd, int flags)
+{
+ struct pfr_ktable *kt, *tmpkt;
+ struct pfr_kentryworkq workq;
+ struct pfr_kentry *p, *q;
+ struct pfr_addr *ad;
+ int i, rv, xadd = 0;
+ long tzero = time_second;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+ if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+ if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+ return (EPERM);
+ tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+ if (tmpkt == NULL)
+ return (ENOMEM);
+ SLIST_INIT(&workq);
+ for (i = 0, ad = addr; i < size; i++, ad++) {
+ if (pfr_validate_addr(ad))
+ senderr(EINVAL);
+ p = pfr_lookup_addr(kt, ad, 1);
+ q = pfr_lookup_addr(tmpkt, ad, 1);
+ if (flags & PFR_FLAG_FEEDBACK) {
+ if (q != NULL)
+ ad->pfra_fback = PFR_FB_DUPLICATE;
+ else if (p == NULL)
+ ad->pfra_fback = PFR_FB_ADDED;
+ else if (p->pfrke_not != ad->pfra_not)
+ ad->pfra_fback = PFR_FB_CONFLICT;
+ else
+ ad->pfra_fback = PFR_FB_NONE;
+ }
+ if (p == NULL && q == NULL) {
+ p = pfr_create_kentry(ad);
+ if (p == NULL)
+ senderr(ENOMEM);
+ if (pfr_route_kentry(tmpkt, p)) {
+ pfr_destroy_kentry(p);
+ ad->pfra_fback = PFR_FB_NONE;
+ } else {
+ SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+ xadd++;
+ }
+ }
+ }
+ pfr_clean_node_mask(tmpkt, &workq);
+ if (!(flags & PFR_FLAG_DUMMY))
+ pfr_insert_kentries(kt, &workq, tzero);
+ else
+ pfr_destroy_kentries(&workq);
+ if (nadd != NULL)
+ *nadd = xadd;
+ pfr_destroy_ktable(tmpkt, 0);
+ return (0);
+_bad:
+ pfr_clean_node_mask(tmpkt, &workq);
+ pfr_destroy_kentries(&workq);
+ if (flags & PFR_FLAG_FEEDBACK)
+ pfr_reset_feedback(addr, size);
+ pfr_destroy_ktable(tmpkt, 0);
+ return (rv);
+}
+
+int
+pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+ int *ndel, int flags)
+{
+ struct pfr_ktable *kt;
+ struct pfr_kentryworkq workq;
+ struct pfr_kentry *p;
+ struct pfr_addr *ad;
+ int i, rv, xdel = 0, log = 1;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+ if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+ if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+ return (EPERM);
+ /*
+ * there are two algorithms to choose from here.
+ * with:
+ * n: number of addresses to delete
+ * N: number of addresses in the table
+ *
+ * one is O(N) and is better for large 'n'
+ * one is O(n*LOG(N)) and is better for small 'n'
+ *
+ * following code try to decide which one is best.
+ */
+ for (i = kt->pfrkt_cnt; i > 0; i >>= 1)
+ log++;
+ if (size > kt->pfrkt_cnt/log) {
+ /* full table scan */
+ pfr_mark_addrs(kt);
+ } else {
+ /* iterate over addresses to delete */
+ for (i = 0, ad = addr; i < size; i++, ad++) {
+ if (pfr_validate_addr(ad))
+ return (EINVAL);
+ p = pfr_lookup_addr(kt, ad, 1);
+ if (p != NULL)
+ p->pfrke_mark = 0;
+ }
+ }
+ SLIST_INIT(&workq);
+ for (i = 0, ad = addr; i < size; i++, ad++) {
+ if (pfr_validate_addr(ad))
+ senderr(EINVAL);
+ p = pfr_lookup_addr(kt, ad, 1);
+ if (flags & PFR_FLAG_FEEDBACK) {
+ if (p == NULL)
+ ad->pfra_fback = PFR_FB_NONE;
+ else if (p->pfrke_not != ad->pfra_not)
+ ad->pfra_fback = PFR_FB_CONFLICT;
+ else if (p->pfrke_mark)
+ ad->pfra_fback = PFR_FB_DUPLICATE;
+ else
+ ad->pfra_fback = PFR_FB_DELETED;
+ }
+ if (p != NULL && p->pfrke_not == ad->pfra_not &&
+ !p->pfrke_mark) {
+ p->pfrke_mark = 1;
+ SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+ xdel++;
+ }
+ }
+ if (!(flags & PFR_FLAG_DUMMY))
+ pfr_remove_kentries(kt, &workq);
+ if (ndel != NULL)
+ *ndel = xdel;
+ return (0);
+_bad:
+ if (flags & PFR_FLAG_FEEDBACK)
+ pfr_reset_feedback(addr, size);
+ return (rv);
+}
+
+int
+pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+ int *size2, int *nadd, int *ndel, int *nchange, int flags,
+ u_int32_t ignore_pfrt_flags)
+{
+ struct pfr_ktable *kt, *tmpkt;
+ struct pfr_kentryworkq addq, delq, changeq;
+ struct pfr_kentry *p, *q;
+ struct pfr_addr ad;
+ int i, rv, xadd = 0, xdel = 0, xchange = 0;
+ long tzero = time_second;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+ if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
+ PFR_FLAG_USERIOCTL))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+ if (kt->pfrkt_flags & PFR_TFLAG_CONST)
+ return (EPERM);
+ tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0);
+ if (tmpkt == NULL)
+ return (ENOMEM);
+ pfr_mark_addrs(kt);
+ SLIST_INIT(&addq);
+ SLIST_INIT(&delq);
+ SLIST_INIT(&changeq);
+ for (i = 0; i < size; i++) {
+ /*
+ * XXXGL: undertand pf_if usage of this function
+ * and make ad a moving pointer
+ */
+ bcopy(addr + i, &ad, sizeof(ad));
+ if (pfr_validate_addr(&ad))
+ senderr(EINVAL);
+ ad.pfra_fback = PFR_FB_NONE;
+ p = pfr_lookup_addr(kt, &ad, 1);
+ if (p != NULL) {
+ if (p->pfrke_mark) {
+ ad.pfra_fback = PFR_FB_DUPLICATE;
+ goto _skip;
+ }
+ p->pfrke_mark = 1;
+ if (p->pfrke_not != ad.pfra_not) {
+ SLIST_INSERT_HEAD(&changeq, p, pfrke_workq);
+ ad.pfra_fback = PFR_FB_CHANGED;
+ xchange++;
+ }
+ } else {
+ q = pfr_lookup_addr(tmpkt, &ad, 1);
+ if (q != NULL) {
+ ad.pfra_fback = PFR_FB_DUPLICATE;
+ goto _skip;
+ }
+ p = pfr_create_kentry(&ad);
+ if (p == NULL)
+ senderr(ENOMEM);
+ if (pfr_route_kentry(tmpkt, p)) {
+ pfr_destroy_kentry(p);
+ ad.pfra_fback = PFR_FB_NONE;
+ } else {
+ SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+ ad.pfra_fback = PFR_FB_ADDED;
+ xadd++;
+ }
+ }
+_skip:
+ if (flags & PFR_FLAG_FEEDBACK)
+ bcopy(&ad, addr + i, sizeof(ad));
+ }
+ pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
+ if ((flags & PFR_FLAG_FEEDBACK) && *size2) {
+ if (*size2 < size+xdel) {
+ *size2 = size+xdel;
+ senderr(0);
+ }
+ i = 0;
+ SLIST_FOREACH(p, &delq, pfrke_workq) {
+ pfr_copyout_addr(&ad, p);
+ ad.pfra_fback = PFR_FB_DELETED;
+ bcopy(&ad, addr + size + i, sizeof(ad));
+ i++;
+ }
+ }
+ pfr_clean_node_mask(tmpkt, &addq);
+ if (!(flags & PFR_FLAG_DUMMY)) {
+ pfr_insert_kentries(kt, &addq, tzero);
+ pfr_remove_kentries(kt, &delq);
+ pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+ } else
+ pfr_destroy_kentries(&addq);
+ if (nadd != NULL)
+ *nadd = xadd;
+ if (ndel != NULL)
+ *ndel = xdel;
+ if (nchange != NULL)
+ *nchange = xchange;
+ if ((flags & PFR_FLAG_FEEDBACK) && size2)
+ *size2 = size+xdel;
+ pfr_destroy_ktable(tmpkt, 0);
+ return (0);
+_bad:
+ pfr_clean_node_mask(tmpkt, &addq);
+ pfr_destroy_kentries(&addq);
+ if (flags & PFR_FLAG_FEEDBACK)
+ pfr_reset_feedback(addr, size);
+ pfr_destroy_ktable(tmpkt, 0);
+ return (rv);
+}
+
+int
+pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+ int *nmatch, int flags)
+{
+ struct pfr_ktable *kt;
+ struct pfr_kentry *p;
+ struct pfr_addr *ad;
+ int i, xmatch = 0;
+
+ PF_RULES_RASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE);
+ if (pfr_validate_table(tbl, 0, 0))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+
+ for (i = 0, ad = addr; i < size; i++, ad++) {
+ if (pfr_validate_addr(ad))
+ return (EINVAL);
+ if (ADDR_NETWORK(ad))
+ return (EINVAL);
+ p = pfr_lookup_addr(kt, ad, 0);
+ if (flags & PFR_FLAG_REPLACE)
+ pfr_copyout_addr(ad, p);
+ ad->pfra_fback = (p == NULL) ? PFR_FB_NONE :
+ (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH);
+ if (p != NULL && !p->pfrke_not)
+ xmatch++;
+ }
+ if (nmatch != NULL)
+ *nmatch = xmatch;
+ return (0);
+}
+
+int
+pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
+ int flags)
+{
+ struct pfr_ktable *kt;
+ struct pfr_walktree w;
+ int rv;
+
+ PF_RULES_RASSERT();
+
+ ACCEPT_FLAGS(flags, 0);
+ if (pfr_validate_table(tbl, 0, 0))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+ if (kt->pfrkt_cnt > *size) {
+ *size = kt->pfrkt_cnt;
+ return (0);
+ }
+
+ bzero(&w, sizeof(w));
+ w.pfrw_op = PFRW_GET_ADDRS;
+ w.pfrw_addr = addr;
+ w.pfrw_free = kt->pfrkt_cnt;
+ rv = kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+ if (!rv)
+ rv = kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh,
+ pfr_walktree, &w);
+ if (rv)
+ return (rv);
+
+ KASSERT(w.pfrw_free == 0, ("%s: corruption detected (%d)", __func__,
+ w.pfrw_free));
+
+ *size = kt->pfrkt_cnt;
+ return (0);
+}
+
+int
+pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
+ int flags)
+{
+ struct pfr_ktable *kt;
+ struct pfr_walktree w;
+ struct pfr_kentryworkq workq;
+ int rv;
+ long tzero = time_second;
+
+ PF_RULES_RASSERT();
+
+ /* XXX PFR_FLAG_CLSTATS disabled */
+ ACCEPT_FLAGS(flags, 0);
+ if (pfr_validate_table(tbl, 0, 0))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+ if (kt->pfrkt_cnt > *size) {
+ *size = kt->pfrkt_cnt;
+ return (0);
+ }
+
+ bzero(&w, sizeof(w));
+ w.pfrw_op = PFRW_GET_ASTATS;
+ w.pfrw_astats = addr;
+ w.pfrw_free = kt->pfrkt_cnt;
+ rv = kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+ if (!rv)
+ rv = kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh,
+ pfr_walktree, &w);
+ if (!rv && (flags & PFR_FLAG_CLSTATS)) {
+ pfr_enqueue_addrs(kt, &workq, NULL, 0);
+ pfr_clstats_kentries(&workq, tzero, 0);
+ }
+ if (rv)
+ return (rv);
+
+ if (w.pfrw_free) {
+ printf("pfr_get_astats: corruption detected (%d).\n",
+ w.pfrw_free);
+ return (ENOTTY);
+ }
+ *size = kt->pfrkt_cnt;
+ return (0);
+}
+
+int
+pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+ int *nzero, int flags)
+{
+ struct pfr_ktable *kt;
+ struct pfr_kentryworkq workq;
+ struct pfr_kentry *p;
+ struct pfr_addr *ad;
+ int i, rv, xzero = 0;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
+ if (pfr_validate_table(tbl, 0, 0))
+ return (EINVAL);
+ kt = pfr_lookup_table(tbl);
+ if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (ESRCH);
+ SLIST_INIT(&workq);
+ for (i = 0, ad = addr; i < size; i++, ad++) {
+ if (pfr_validate_addr(ad))
+ senderr(EINVAL);
+ p = pfr_lookup_addr(kt, ad, 1);
+ if (flags & PFR_FLAG_FEEDBACK) {
+ ad->pfra_fback = (p != NULL) ?
+ PFR_FB_CLEARED : PFR_FB_NONE;
+ }
+ if (p != NULL) {
+ SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
+ xzero++;
+ }
+ }
+
+ if (!(flags & PFR_FLAG_DUMMY))
+ pfr_clstats_kentries(&workq, 0, 0);
+ if (nzero != NULL)
+ *nzero = xzero;
+ return (0);
+_bad:
+ if (flags & PFR_FLAG_FEEDBACK)
+ pfr_reset_feedback(addr, size);
+ return (rv);
+}
+
+static int
+pfr_validate_addr(struct pfr_addr *ad)
+{
+ int i;
+
+ switch (ad->pfra_af) {
+#ifdef INET
+ case AF_INET:
+ if (ad->pfra_net > 32)
+ return (-1);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (ad->pfra_net > 128)
+ return (-1);
+ break;
+#endif /* INET6 */
+ default:
+ return (-1);
+ }
+ if (ad->pfra_net < 128 &&
+ (((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8))))
+ return (-1);
+ for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++)
+ if (((caddr_t)ad)[i])
+ return (-1);
+ if (ad->pfra_not && ad->pfra_not != 1)
+ return (-1);
+ if (ad->pfra_fback)
+ return (-1);
+ return (0);
+}
+
+static void
+pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq,
+ int *naddr, int sweep)
+{
+ struct pfr_walktree w;
+
+ SLIST_INIT(workq);
+ bzero(&w, sizeof(w));
+ w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE;
+ w.pfrw_workq = workq;
+ if (kt->pfrkt_ip4 != NULL)
+ if (kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh,
+ pfr_walktree, &w))
+ printf("pfr_enqueue_addrs: IPv4 walktree failed.\n");
+ if (kt->pfrkt_ip6 != NULL)
+ if (kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh,
+ pfr_walktree, &w))
+ printf("pfr_enqueue_addrs: IPv6 walktree failed.\n");
+ if (naddr != NULL)
+ *naddr = w.pfrw_cnt;
+}
+
+static void
+pfr_mark_addrs(struct pfr_ktable *kt)
+{
+ struct pfr_walktree w;
+
+ bzero(&w, sizeof(w));
+ w.pfrw_op = PFRW_MARK;
+ if (kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w))
+ printf("pfr_mark_addrs: IPv4 walktree failed.\n");
+ if (kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh, pfr_walktree, &w))
+ printf("pfr_mark_addrs: IPv6 walktree failed.\n");
+}
+
+
+static struct pfr_kentry *
+pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
+{
+ union sockaddr_union sa, mask;
+ struct radix_head *head = NULL;
+ struct pfr_kentry *ke;
+
+ PF_RULES_ASSERT();
+
+ bzero(&sa, sizeof(sa));
+ if (ad->pfra_af == AF_INET) {
+ FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
+ head = &kt->pfrkt_ip4->rh;
+ } else if ( ad->pfra_af == AF_INET6 ) {
+ FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr);
+ head = &kt->pfrkt_ip6->rh;
+ }
+ if (ADDR_NETWORK(ad)) {
+ pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
+ ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
+ if (ke && KENTRY_RNF_ROOT(ke))
+ ke = NULL;
+ } else {
+ ke = (struct pfr_kentry *)rn_match(&sa, head);
+ if (ke && KENTRY_RNF_ROOT(ke))
+ ke = NULL;
+ if (exact && ke && KENTRY_NETWORK(ke))
+ ke = NULL;
+ }
+ return (ke);
+}
+
+static struct pfr_kentry *
+pfr_create_kentry(struct pfr_addr *ad)
+{
+ struct pfr_kentry *ke;
+
+ ke = uma_zalloc(V_pfr_kentry_z, M_NOWAIT | M_ZERO);
+ if (ke == NULL)
+ return (NULL);
+
+ if (ad->pfra_af == AF_INET)
+ FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
+ else if (ad->pfra_af == AF_INET6)
+ FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
+ ke->pfrke_af = ad->pfra_af;
+ ke->pfrke_net = ad->pfra_net;
+ ke->pfrke_not = ad->pfra_not;
+ return (ke);
+}
+
+static void
+pfr_destroy_kentries(struct pfr_kentryworkq *workq)
+{
+ struct pfr_kentry *p, *q;
+
+ for (p = SLIST_FIRST(workq); p != NULL; p = q) {
+ q = SLIST_NEXT(p, pfrke_workq);
+ pfr_destroy_kentry(p);
+ }
+}
+
+static void
+pfr_destroy_kentry(struct pfr_kentry *ke)
+{
+ if (ke->pfrke_counters)
+ uma_zfree(V_pfr_kcounters_z, ke->pfrke_counters);
+ uma_zfree(V_pfr_kentry_z, ke);
+}
+
+static void
+pfr_insert_kentries(struct pfr_ktable *kt,
+ struct pfr_kentryworkq *workq, long tzero)
+{
+ struct pfr_kentry *p;
+ int rv, n = 0;
+
+ SLIST_FOREACH(p, workq, pfrke_workq) {
+ rv = pfr_route_kentry(kt, p);
+ if (rv) {
+ printf("pfr_insert_kentries: cannot route entry "
+ "(code=%d).\n", rv);
+ break;
+ }
+ p->pfrke_tzero = tzero;
+ n++;
+ }
+ kt->pfrkt_cnt += n;
+}
+
+int
+pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero)
+{
+ struct pfr_kentry *p;
+ int rv;
+
+ p = pfr_lookup_addr(kt, ad, 1);
+ if (p != NULL)
+ return (0);
+ p = pfr_create_kentry(ad);
+ if (p == NULL)
+ return (ENOMEM);
+
+ rv = pfr_route_kentry(kt, p);
+ if (rv)
+ return (rv);
+
+ p->pfrke_tzero = tzero;
+ kt->pfrkt_cnt++;
+
+ return (0);
+}
+
+static void
+pfr_remove_kentries(struct pfr_ktable *kt,
+ struct pfr_kentryworkq *workq)
+{
+ struct pfr_kentry *p;
+ int n = 0;
+
+ SLIST_FOREACH(p, workq, pfrke_workq) {
+ pfr_unroute_kentry(kt, p);
+ n++;
+ }
+ kt->pfrkt_cnt -= n;
+ pfr_destroy_kentries(workq);
+}
+
+static void
+pfr_clean_node_mask(struct pfr_ktable *kt,
+ struct pfr_kentryworkq *workq)
+{
+ struct pfr_kentry *p;
+
+ SLIST_FOREACH(p, workq, pfrke_workq)
+ pfr_unroute_kentry(kt, p);
+}
+
+static void
+pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange)
+{
+ struct pfr_kentry *p;
+
+ SLIST_FOREACH(p, workq, pfrke_workq) {
+ if (negchange)
+ p->pfrke_not = !p->pfrke_not;
+ if (p->pfrke_counters) {
+ uma_zfree(V_pfr_kcounters_z, p->pfrke_counters);
+ p->pfrke_counters = NULL;
+ }
+ p->pfrke_tzero = tzero;
+ }
+}
+
+static void
+pfr_reset_feedback(struct pfr_addr *addr, int size)
+{
+ struct pfr_addr *ad;
+ int i;
+
+ for (i = 0, ad = addr; i < size; i++, ad++)
+ ad->pfra_fback = PFR_FB_NONE;
+}
+
+static void
+pfr_prepare_network(union sockaddr_union *sa, int af, int net)
+{
+ int i;
+
+ bzero(sa, sizeof(*sa));
+ if (af == AF_INET) {
+ sa->sin.sin_len = sizeof(sa->sin);
+ sa->sin.sin_family = AF_INET;
+ sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0;
+ } else if (af == AF_INET6) {
+ sa->sin6.sin6_len = sizeof(sa->sin6);
+ sa->sin6.sin6_family = AF_INET6;
+ for (i = 0; i < 4; i++) {
+ if (net <= 32) {
+ sa->sin6.sin6_addr.s6_addr32[i] =
+ net ? htonl(-1 << (32-net)) : 0;
+ break;
+ }
+ sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF;
+ net -= 32;
+ }
+ }
+}
+
+static int
+pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+ union sockaddr_union mask;
+ struct radix_node *rn;
+ struct radix_head *head = NULL;
+
+ PF_RULES_WASSERT();
+
+ bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
+ if (ke->pfrke_af == AF_INET)
+ head = &kt->pfrkt_ip4->rh;
+ else if (ke->pfrke_af == AF_INET6)
+ head = &kt->pfrkt_ip6->rh;
+
+ if (KENTRY_NETWORK(ke)) {
+ pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+ rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node);
+ } else
+ rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node);
+
+ return (rn == NULL ? -1 : 0);
+}
+
+static int
+pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
+{
+ union sockaddr_union mask;
+ struct radix_node *rn;
+ struct radix_head *head = NULL;
+
+ if (ke->pfrke_af == AF_INET)
+ head = &kt->pfrkt_ip4->rh;
+ else if (ke->pfrke_af == AF_INET6)
+ head = &kt->pfrkt_ip6->rh;
+
+ if (KENTRY_NETWORK(ke)) {
+ pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
+ rn = rn_delete(&ke->pfrke_sa, &mask, head);
+ } else
+ rn = rn_delete(&ke->pfrke_sa, NULL, head);
+
+ if (rn == NULL) {
+ printf("pfr_unroute_kentry: delete failed.\n");
+ return (-1);
+ }
+ return (0);
+}
+
+static void
+pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
+{
+ bzero(ad, sizeof(*ad));
+ if (ke == NULL)
+ return;
+ ad->pfra_af = ke->pfrke_af;
+ ad->pfra_net = ke->pfrke_net;
+ ad->pfra_not = ke->pfrke_not;
+ if (ad->pfra_af == AF_INET)
+ ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
+ else if (ad->pfra_af == AF_INET6)
+ ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
+}
+
+static int
+pfr_walktree(struct radix_node *rn, void *arg)
+{
+ struct pfr_kentry *ke = (struct pfr_kentry *)rn;
+ struct pfr_walktree *w = arg;
+
+ switch (w->pfrw_op) {
+ case PFRW_MARK:
+ ke->pfrke_mark = 0;
+ break;
+ case PFRW_SWEEP:
+ if (ke->pfrke_mark)
+ break;
+ /* FALLTHROUGH */
+ case PFRW_ENQUEUE:
+ SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq);
+ w->pfrw_cnt++;
+ break;
+ case PFRW_GET_ADDRS:
+ if (w->pfrw_free-- > 0) {
+ pfr_copyout_addr(w->pfrw_addr, ke);
+ w->pfrw_addr++;
+ }
+ break;
+ case PFRW_GET_ASTATS:
+ if (w->pfrw_free-- > 0) {
+ struct pfr_astats as;
+
+ pfr_copyout_addr(&as.pfras_a, ke);
+
+ if (ke->pfrke_counters) {
+ bcopy(ke->pfrke_counters->pfrkc_packets,
+ as.pfras_packets, sizeof(as.pfras_packets));
+ bcopy(ke->pfrke_counters->pfrkc_bytes,
+ as.pfras_bytes, sizeof(as.pfras_bytes));
+ } else {
+ bzero(as.pfras_packets, sizeof(as.pfras_packets));
+ bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
+ as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
+ }
+ as.pfras_tzero = ke->pfrke_tzero;
+
+ bcopy(&as, w->pfrw_astats, sizeof(as));
+ w->pfrw_astats++;
+ }
+ break;
+ case PFRW_POOL_GET:
+ if (ke->pfrke_not)
+ break; /* negative entries are ignored */
+ if (!w->pfrw_cnt--) {
+ w->pfrw_kentry = ke;
+ return (1); /* finish search */
+ }
+ break;
+ case PFRW_DYNADDR_UPDATE:
+ {
+ union sockaddr_union pfr_mask;
+
+ if (ke->pfrke_af == AF_INET) {
+ if (w->pfrw_dyn->pfid_acnt4++ > 0)
+ break;
+ pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net);
+ w->pfrw_dyn->pfid_addr4 = *SUNION2PF(&ke->pfrke_sa,
+ AF_INET);
+ w->pfrw_dyn->pfid_mask4 = *SUNION2PF(&pfr_mask,
+ AF_INET);
+ } else if (ke->pfrke_af == AF_INET6){
+ if (w->pfrw_dyn->pfid_acnt6++ > 0)
+ break;
+ pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net);
+ w->pfrw_dyn->pfid_addr6 = *SUNION2PF(&ke->pfrke_sa,
+ AF_INET6);
+ w->pfrw_dyn->pfid_mask6 = *SUNION2PF(&pfr_mask,
+ AF_INET6);
+ }
+ break;
+ }
+ }
+ return (0);
+}
+
+int
+pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
+{
+ struct pfr_ktableworkq workq;
+ struct pfr_ktable *p;
+ int xdel = 0;
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS);
+ if (pfr_fix_anchor(filter->pfrt_anchor))
+ return (EINVAL);
+ if (pfr_table_count(filter, flags) < 0)
+ return (ENOENT);
+
+ SLIST_INIT(&workq);
+ RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+ if (pfr_skip_table(filter, p, flags))
+ continue;
+ if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR))
+ continue;
+ if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ continue;
+ p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ xdel++;
+ }
+ if (!(flags & PFR_FLAG_DUMMY))
+ pfr_setflags_ktables(&workq);
+ if (ndel != NULL)
+ *ndel = xdel;
+ return (0);
+}
+
+int
+pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
+{
+ struct pfr_ktableworkq addq, changeq;
+ struct pfr_ktable *p, *q, *r, key;
+ int i, rv, xadd = 0;
+ long tzero = time_second;
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+ SLIST_INIT(&addq);
+ SLIST_INIT(&changeq);
+ for (i = 0; i < size; i++) {
+ bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+ if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
+ flags & PFR_FLAG_USERIOCTL))
+ senderr(EINVAL);
+ key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
+ p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+ if (p == NULL) {
+ p = pfr_create_ktable(&key.pfrkt_t, tzero, 1);
+ if (p == NULL)
+ senderr(ENOMEM);
+ SLIST_FOREACH(q, &addq, pfrkt_workq) {
+ if (!pfr_ktable_compare(p, q))
+ goto _skip;
+ }
+ SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
+ xadd++;
+ if (!key.pfrkt_anchor[0])
+ goto _skip;
+
+ /* find or create root table */
+ bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
+ r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+ if (r != NULL) {
+ p->pfrkt_root = r;
+ goto _skip;
+ }
+ SLIST_FOREACH(q, &addq, pfrkt_workq) {
+ if (!pfr_ktable_compare(&key, q)) {
+ p->pfrkt_root = q;
+ goto _skip;
+ }
+ }
+ key.pfrkt_flags = 0;
+ r = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+ if (r == NULL)
+ senderr(ENOMEM);
+ SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
+ p->pfrkt_root = r;
+ } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+ SLIST_FOREACH(q, &changeq, pfrkt_workq)
+ if (!pfr_ktable_compare(&key, q))
+ goto _skip;
+ p->pfrkt_nflags = (p->pfrkt_flags &
+ ~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
+ SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
+ xadd++;
+ }
+_skip:
+ ;
+ }
+ if (!(flags & PFR_FLAG_DUMMY)) {
+ pfr_insert_ktables(&addq);
+ pfr_setflags_ktables(&changeq);
+ } else
+ pfr_destroy_ktables(&addq, 0);
+ if (nadd != NULL)
+ *nadd = xadd;
+ return (0);
+_bad:
+ pfr_destroy_ktables(&addq, 0);
+ return (rv);
+}
+
+int
+pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
+{
+ struct pfr_ktableworkq workq;
+ struct pfr_ktable *p, *q, key;
+ int i, xdel = 0;
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+ SLIST_INIT(&workq);
+ for (i = 0; i < size; i++) {
+ bcopy(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+ if (pfr_validate_table(&key.pfrkt_t, 0,
+ flags & PFR_FLAG_USERIOCTL))
+ return (EINVAL);
+ p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+ if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+ SLIST_FOREACH(q, &workq, pfrkt_workq)
+ if (!pfr_ktable_compare(p, q))
+ goto _skip;
+ p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ xdel++;
+ }
+_skip:
+ ;
+ }
+
+ if (!(flags & PFR_FLAG_DUMMY))
+ pfr_setflags_ktables(&workq);
+ if (ndel != NULL)
+ *ndel = xdel;
+ return (0);
+}
+
+int
+pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
+ int flags)
+{
+ struct pfr_ktable *p;
+ int n, nn;
+
+ PF_RULES_RASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
+ if (pfr_fix_anchor(filter->pfrt_anchor))
+ return (EINVAL);
+ n = nn = pfr_table_count(filter, flags);
+ if (n < 0)
+ return (ENOENT);
+ if (n > *size) {
+ *size = n;
+ return (0);
+ }
+ RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+ if (pfr_skip_table(filter, p, flags))
+ continue;
+ if (n-- <= 0)
+ continue;
+ bcopy(&p->pfrkt_t, tbl++, sizeof(*tbl));
+ }
+
+ KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
+
+ *size = nn;
+ return (0);
+}
+
+int
+pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
+ int flags)
+{
+ struct pfr_ktable *p;
+ struct pfr_ktableworkq workq;
+ int n, nn;
+ long tzero = time_second;
+
+ /* XXX PFR_FLAG_CLSTATS disabled */
+ ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
+ if (pfr_fix_anchor(filter->pfrt_anchor))
+ return (EINVAL);
+ n = nn = pfr_table_count(filter, flags);
+ if (n < 0)
+ return (ENOENT);
+ if (n > *size) {
+ *size = n;
+ return (0);
+ }
+ SLIST_INIT(&workq);
+ RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+ if (pfr_skip_table(filter, p, flags))
+ continue;
+ if (n-- <= 0)
+ continue;
+ bcopy(&p->pfrkt_ts, tbl++, sizeof(*tbl));
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ }
+ if (flags & PFR_FLAG_CLSTATS)
+ pfr_clstats_ktables(&workq, tzero,
+ flags & PFR_FLAG_ADDRSTOO);
+
+ KASSERT(n == 0, ("%s: corruption detected (%d)", __func__, n));
+
+ *size = nn;
+ return (0);
+}
+
+int
+pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
+{
+ struct pfr_ktableworkq workq;
+ struct pfr_ktable *p, key;
+ int i, xzero = 0;
+ long tzero = time_second;
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
+ SLIST_INIT(&workq);
+ for (i = 0; i < size; i++) {
+ bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+ if (pfr_validate_table(&key.pfrkt_t, 0, 0))
+ return (EINVAL);
+ p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+ if (p != NULL) {
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ xzero++;
+ }
+ }
+ if (!(flags & PFR_FLAG_DUMMY))
+ pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO);
+ if (nzero != NULL)
+ *nzero = xzero;
+ return (0);
+}
+
+int
+pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
+ int *nchange, int *ndel, int flags)
+{
+ struct pfr_ktableworkq workq;
+ struct pfr_ktable *p, *q, key;
+ int i, xchange = 0, xdel = 0;
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+ if ((setflag & ~PFR_TFLAG_USRMASK) ||
+ (clrflag & ~PFR_TFLAG_USRMASK) ||
+ (setflag & clrflag))
+ return (EINVAL);
+ SLIST_INIT(&workq);
+ for (i = 0; i < size; i++) {
+ bcopy(tbl + i, &key.pfrkt_t, sizeof(key.pfrkt_t));
+ if (pfr_validate_table(&key.pfrkt_t, 0,
+ flags & PFR_FLAG_USERIOCTL))
+ return (EINVAL);
+ p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+ if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
+ p->pfrkt_nflags = (p->pfrkt_flags | setflag) &
+ ~clrflag;
+ if (p->pfrkt_nflags == p->pfrkt_flags)
+ goto _skip;
+ SLIST_FOREACH(q, &workq, pfrkt_workq)
+ if (!pfr_ktable_compare(p, q))
+ goto _skip;
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) &&
+ (clrflag & PFR_TFLAG_PERSIST) &&
+ !(p->pfrkt_flags & PFR_TFLAG_REFERENCED))
+ xdel++;
+ else
+ xchange++;
+ }
+_skip:
+ ;
+ }
+ if (!(flags & PFR_FLAG_DUMMY))
+ pfr_setflags_ktables(&workq);
+ if (nchange != NULL)
+ *nchange = xchange;
+ if (ndel != NULL)
+ *ndel = xdel;
+ return (0);
+}
+
+int
+pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
+{
+ struct pfr_ktableworkq workq;
+ struct pfr_ktable *p;
+ struct pf_ruleset *rs;
+ int xdel = 0;
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+ rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
+ if (rs == NULL)
+ return (ENOMEM);
+ SLIST_INIT(&workq);
+ RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+ if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+ pfr_skip_table(trs, p, 0))
+ continue;
+ p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ xdel++;
+ }
+ if (!(flags & PFR_FLAG_DUMMY)) {
+ pfr_setflags_ktables(&workq);
+ if (ticket != NULL)
+ *ticket = ++rs->tticket;
+ rs->topen = 1;
+ } else
+ pf_remove_if_empty_ruleset(rs);
+ if (ndel != NULL)
+ *ndel = xdel;
+ return (0);
+}
+
+int
+pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
+ int *nadd, int *naddr, u_int32_t ticket, int flags)
+{
+ struct pfr_ktableworkq tableq;
+ struct pfr_kentryworkq addrq;
+ struct pfr_ktable *kt, *rt, *shadow, key;
+ struct pfr_kentry *p;
+ struct pfr_addr *ad;
+ struct pf_ruleset *rs;
+ int i, rv, xadd = 0, xaddr = 0;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
+ if (size && !(flags & PFR_FLAG_ADDRSTOO))
+ return (EINVAL);
+ if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
+ flags & PFR_FLAG_USERIOCTL))
+ return (EINVAL);
+ rs = pf_find_ruleset(tbl->pfrt_anchor);
+ if (rs == NULL || !rs->topen || ticket != rs->tticket)
+ return (EBUSY);
+ tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
+ SLIST_INIT(&tableq);
+ kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
+ if (kt == NULL) {
+ kt = pfr_create_ktable(tbl, 0, 1);
+ if (kt == NULL)
+ return (ENOMEM);
+ SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
+ xadd++;
+ if (!tbl->pfrt_anchor[0])
+ goto _skip;
+
+ /* find or create root table */
+ bzero(&key, sizeof(key));
+ strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name));
+ rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
+ if (rt != NULL) {
+ kt->pfrkt_root = rt;
+ goto _skip;
+ }
+ rt = pfr_create_ktable(&key.pfrkt_t, 0, 1);
+ if (rt == NULL) {
+ pfr_destroy_ktables(&tableq, 0);
+ return (ENOMEM);
+ }
+ SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq);
+ kt->pfrkt_root = rt;
+ } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
+ xadd++;
+_skip:
+ shadow = pfr_create_ktable(tbl, 0, 0);
+ if (shadow == NULL) {
+ pfr_destroy_ktables(&tableq, 0);
+ return (ENOMEM);
+ }
+ SLIST_INIT(&addrq);
+ for (i = 0, ad = addr; i < size; i++, ad++) {
+ if (pfr_validate_addr(ad))
+ senderr(EINVAL);
+ if (pfr_lookup_addr(shadow, ad, 1) != NULL)
+ continue;
+ p = pfr_create_kentry(ad);
+ if (p == NULL)
+ senderr(ENOMEM);
+ if (pfr_route_kentry(shadow, p)) {
+ pfr_destroy_kentry(p);
+ continue;
+ }
+ SLIST_INSERT_HEAD(&addrq, p, pfrke_workq);
+ xaddr++;
+ }
+ if (!(flags & PFR_FLAG_DUMMY)) {
+ if (kt->pfrkt_shadow != NULL)
+ pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+ kt->pfrkt_flags |= PFR_TFLAG_INACTIVE;
+ pfr_insert_ktables(&tableq);
+ shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ?
+ xaddr : NO_ADDRESSES;
+ kt->pfrkt_shadow = shadow;
+ } else {
+ pfr_clean_node_mask(shadow, &addrq);
+ pfr_destroy_ktable(shadow, 0);
+ pfr_destroy_ktables(&tableq, 0);
+ pfr_destroy_kentries(&addrq);
+ }
+ if (nadd != NULL)
+ *nadd = xadd;
+ if (naddr != NULL)
+ *naddr = xaddr;
+ return (0);
+_bad:
+ pfr_destroy_ktable(shadow, 0);
+ pfr_destroy_ktables(&tableq, 0);
+ pfr_destroy_kentries(&addrq);
+ return (rv);
+}
+
+int
+pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
+{
+ struct pfr_ktableworkq workq;
+ struct pfr_ktable *p;
+ struct pf_ruleset *rs;
+ int xdel = 0;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+ rs = pf_find_ruleset(trs->pfrt_anchor);
+ if (rs == NULL || !rs->topen || ticket != rs->tticket)
+ return (0);
+ SLIST_INIT(&workq);
+ RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+ if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+ pfr_skip_table(trs, p, 0))
+ continue;
+ p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ xdel++;
+ }
+ if (!(flags & PFR_FLAG_DUMMY)) {
+ pfr_setflags_ktables(&workq);
+ rs->topen = 0;
+ pf_remove_if_empty_ruleset(rs);
+ }
+ if (ndel != NULL)
+ *ndel = xdel;
+ return (0);
+}
+
+int
+pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
+ int *nchange, int flags)
+{
+ struct pfr_ktable *p, *q;
+ struct pfr_ktableworkq workq;
+ struct pf_ruleset *rs;
+ int xadd = 0, xchange = 0;
+ long tzero = time_second;
+
+ PF_RULES_WASSERT();
+
+ ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
+ rs = pf_find_ruleset(trs->pfrt_anchor);
+ if (rs == NULL || !rs->topen || ticket != rs->tticket)
+ return (EBUSY);
+
+ SLIST_INIT(&workq);
+ RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
+ if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
+ pfr_skip_table(trs, p, 0))
+ continue;
+ SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
+ if (p->pfrkt_flags & PFR_TFLAG_ACTIVE)
+ xchange++;
+ else
+ xadd++;
+ }
+
+ if (!(flags & PFR_FLAG_DUMMY)) {
+ for (p = SLIST_FIRST(&workq); p != NULL; p = q) {
+ q = SLIST_NEXT(p, pfrkt_workq);
+ pfr_commit_ktable(p, tzero);
+ }
+ rs->topen = 0;
+ pf_remove_if_empty_ruleset(rs);
+ }
+ if (nadd != NULL)
+ *nadd = xadd;
+ if (nchange != NULL)
+ *nchange = xchange;
+
+ return (0);
+}
+
+static void
+pfr_commit_ktable(struct pfr_ktable *kt, long tzero)
+{
+ struct pfr_ktable *shadow = kt->pfrkt_shadow;
+ int nflags;
+
+ PF_RULES_WASSERT();
+
+ if (shadow->pfrkt_cnt == NO_ADDRESSES) {
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ pfr_clstats_ktable(kt, tzero, 1);
+ } else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
+ /* kt might contain addresses */
+ struct pfr_kentryworkq addrq, addq, changeq, delq, garbageq;
+ struct pfr_kentry *p, *q, *next;
+ struct pfr_addr ad;
+
+ pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
+ pfr_mark_addrs(kt);
+ SLIST_INIT(&addq);
+ SLIST_INIT(&changeq);
+ SLIST_INIT(&delq);
+ SLIST_INIT(&garbageq);
+ pfr_clean_node_mask(shadow, &addrq);
+ for (p = SLIST_FIRST(&addrq); p != NULL; p = next) {
+ next = SLIST_NEXT(p, pfrke_workq); /* XXX */
+ pfr_copyout_addr(&ad, p);
+ q = pfr_lookup_addr(kt, &ad, 1);
+ if (q != NULL) {
+ if (q->pfrke_not != p->pfrke_not)
+ SLIST_INSERT_HEAD(&changeq, q,
+ pfrke_workq);
+ q->pfrke_mark = 1;
+ SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
+ } else {
+ p->pfrke_tzero = tzero;
+ SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
+ }
+ }
+ pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY);
+ pfr_insert_kentries(kt, &addq, tzero);
+ pfr_remove_kentries(kt, &delq);
+ pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
+ pfr_destroy_kentries(&garbageq);
+ } else {
+ /* kt cannot contain addresses */
+ SWAP(struct radix_node_head *, kt->pfrkt_ip4,
+ shadow->pfrkt_ip4);
+ SWAP(struct radix_node_head *, kt->pfrkt_ip6,
+ shadow->pfrkt_ip6);
+ SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt);
+ pfr_clstats_ktable(kt, tzero, 1);
+ }
+ nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) |
+ (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE)
+ & ~PFR_TFLAG_INACTIVE;
+ pfr_destroy_ktable(shadow, 0);
+ kt->pfrkt_shadow = NULL;
+ pfr_setflags_ktable(kt, nflags);
+}
+
+static int
+pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
+{
+ int i;
+
+ if (!tbl->pfrt_name[0])
+ return (-1);
+ if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR))
+ return (-1);
+ if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1])
+ return (-1);
+ for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++)
+ if (tbl->pfrt_name[i])
+ return (-1);
+ if (pfr_fix_anchor(tbl->pfrt_anchor))
+ return (-1);
+ if (tbl->pfrt_flags & ~allowedflags)
+ return (-1);
+ return (0);
+}
+
+/*
+ * Rewrite anchors referenced by tables to remove slashes
+ * and check for validity.
+ */
+static int
+pfr_fix_anchor(char *anchor)
+{
+ size_t siz = MAXPATHLEN;
+ int i;
+
+ if (anchor[0] == '/') {
+ char *path;
+ int off;
+
+ path = anchor;
+ off = 1;
+ while (*++path == '/')
+ off++;
+ bcopy(path, anchor, siz - off);
+ memset(anchor + siz - off, 0, off);
+ }
+ if (anchor[siz - 1])
+ return (-1);
+ for (i = strlen(anchor); i < siz; i++)
+ if (anchor[i])
+ return (-1);
+ return (0);
+}
+
+static int
+pfr_table_count(struct pfr_table *filter, int flags)
+{
+ struct pf_ruleset *rs;
+
+ PF_RULES_ASSERT();
+
+ if (flags & PFR_FLAG_ALLRSETS)
+ return (pfr_ktable_cnt);
+ if (filter->pfrt_anchor[0]) {
+ rs = pf_find_ruleset(filter->pfrt_anchor);
+ return ((rs != NULL) ? rs->tables : -1);
+ }
+ return (pf_main_ruleset.tables);
+}
+
+static int
+pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags)
+{
+ if (flags & PFR_FLAG_ALLRSETS)
+ return (0);
+ if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor))
+ return (1);
+ return (0);
+}
+
+static void
+pfr_insert_ktables(struct pfr_ktableworkq *workq)
+{
+ struct pfr_ktable *p;
+
+ SLIST_FOREACH(p, workq, pfrkt_workq)
+ pfr_insert_ktable(p);
+}
+
+static void
+pfr_insert_ktable(struct pfr_ktable *kt)
+{
+
+ PF_RULES_WASSERT();
+
+ RB_INSERT(pfr_ktablehead, &pfr_ktables, kt);
+ pfr_ktable_cnt++;
+ if (kt->pfrkt_root != NULL)
+ if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++)
+ pfr_setflags_ktable(kt->pfrkt_root,
+ kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR);
+}
+
+static void
+pfr_setflags_ktables(struct pfr_ktableworkq *workq)
+{
+ struct pfr_ktable *p, *q;
+
+ for (p = SLIST_FIRST(workq); p; p = q) {
+ q = SLIST_NEXT(p, pfrkt_workq);
+ pfr_setflags_ktable(p, p->pfrkt_nflags);
+ }
+}
+
+static void
+pfr_setflags_ktable(struct pfr_ktable *kt, int newf)
+{
+ struct pfr_kentryworkq addrq;
+
+ PF_RULES_WASSERT();
+
+ if (!(newf & PFR_TFLAG_REFERENCED) &&
+ !(newf & PFR_TFLAG_PERSIST))
+ newf &= ~PFR_TFLAG_ACTIVE;
+ if (!(newf & PFR_TFLAG_ACTIVE))
+ newf &= ~PFR_TFLAG_USRMASK;
+ if (!(newf & PFR_TFLAG_SETMASK)) {
+ RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt);
+ if (kt->pfrkt_root != NULL)
+ if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR])
+ pfr_setflags_ktable(kt->pfrkt_root,
+ kt->pfrkt_root->pfrkt_flags &
+ ~PFR_TFLAG_REFDANCHOR);
+ pfr_destroy_ktable(kt, 1);
+ pfr_ktable_cnt--;
+ return;
+ }
+ if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) {
+ pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+ pfr_remove_kentries(kt, &addrq);
+ }
+ if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) {
+ pfr_destroy_ktable(kt->pfrkt_shadow, 1);
+ kt->pfrkt_shadow = NULL;
+ }
+ kt->pfrkt_flags = newf;
+}
+
+static void
+pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse)
+{
+ struct pfr_ktable *p;
+
+ SLIST_FOREACH(p, workq, pfrkt_workq)
+ pfr_clstats_ktable(p, tzero, recurse);
+}
+
+static void
+pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
+{
+ struct pfr_kentryworkq addrq;
+
+ if (recurse) {
+ pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+ pfr_clstats_kentries(&addrq, tzero, 0);
+ }
+ bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
+ bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
+ kt->pfrkt_match = kt->pfrkt_nomatch = 0;
+ kt->pfrkt_tzero = tzero;
+}
+
+static struct pfr_ktable *
+pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
+{
+ struct pfr_ktable *kt;
+ struct pf_ruleset *rs;
+
+ PF_RULES_WASSERT();
+
+ kt = malloc(sizeof(*kt), M_PFTABLE, M_NOWAIT|M_ZERO);
+ if (kt == NULL)
+ return (NULL);
+ kt->pfrkt_t = *tbl;
+
+ if (attachruleset) {
+ rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
+ if (!rs) {
+ pfr_destroy_ktable(kt, 0);
+ return (NULL);
+ }
+ kt->pfrkt_rs = rs;
+ rs->tables++;
+ }
+
+ if (!rn_inithead((void **)&kt->pfrkt_ip4,
+ offsetof(struct sockaddr_in, sin_addr) * 8) ||
+ !rn_inithead((void **)&kt->pfrkt_ip6,
+ offsetof(struct sockaddr_in6, sin6_addr) * 8)) {
+ pfr_destroy_ktable(kt, 0);
+ return (NULL);
+ }
+ kt->pfrkt_tzero = tzero;
+
+ return (kt);
+}
+
+static void
+pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
+{
+ struct pfr_ktable *p, *q;
+
+ for (p = SLIST_FIRST(workq); p; p = q) {
+ q = SLIST_NEXT(p, pfrkt_workq);
+ pfr_destroy_ktable(p, flushaddr);
+ }
+}
+
+static void
+pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
+{
+ struct pfr_kentryworkq addrq;
+
+ if (flushaddr) {
+ pfr_enqueue_addrs(kt, &addrq, NULL, 0);
+ pfr_clean_node_mask(kt, &addrq);
+ pfr_destroy_kentries(&addrq);
+ }
+ if (kt->pfrkt_ip4 != NULL)
+ rn_detachhead((void **)&kt->pfrkt_ip4);
+ if (kt->pfrkt_ip6 != NULL)
+ rn_detachhead((void **)&kt->pfrkt_ip6);
+ if (kt->pfrkt_shadow != NULL)
+ pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr);
+ if (kt->pfrkt_rs != NULL) {
+ kt->pfrkt_rs->tables--;
+ pf_remove_if_empty_ruleset(kt->pfrkt_rs);
+ }
+ free(kt, M_PFTABLE);
+}
+
+static int
+pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q)
+{
+ int d;
+
+ if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE)))
+ return (d);
+ return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor));
+}
+
+static struct pfr_ktable *
+pfr_lookup_table(struct pfr_table *tbl)
+{
+ /* struct pfr_ktable start like a struct pfr_table */
+ return (RB_FIND(pfr_ktablehead, &pfr_ktables,
+ (struct pfr_ktable *)tbl));
+}
+
+int
+pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
+{
+ struct pfr_kentry *ke = NULL;
+ int match;
+
+ PF_RULES_RASSERT();
+
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+ kt = kt->pfrkt_root;
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (0);
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct sockaddr_in sin;
+
+ bzero(&sin, sizeof(sin));
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = a->addr32[0];
+ ke = (struct pfr_kentry *)rn_match(&sin, &kt->pfrkt_ip4->rh);
+ if (ke && KENTRY_RNF_ROOT(ke))
+ ke = NULL;
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 sin6;
+
+ bzero(&sin6, sizeof(sin6));
+ sin6.sin6_len = sizeof(sin6);
+ sin6.sin6_family = AF_INET6;
+ bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
+ ke = (struct pfr_kentry *)rn_match(&sin6, &kt->pfrkt_ip6->rh);
+ if (ke && KENTRY_RNF_ROOT(ke))
+ ke = NULL;
+ break;
+ }
+#endif /* INET6 */
+ }
+ match = (ke && !ke->pfrke_not);
+ if (match)
+ kt->pfrkt_match++;
+ else
+ kt->pfrkt_nomatch++;
+ return (match);
+}
+
+void
+pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
+ u_int64_t len, int dir_out, int op_pass, int notrule)
+{
+ struct pfr_kentry *ke = NULL;
+
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+ kt = kt->pfrkt_root;
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct sockaddr_in sin;
+
+ bzero(&sin, sizeof(sin));
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = a->addr32[0];
+ ke = (struct pfr_kentry *)rn_match(&sin, &kt->pfrkt_ip4->rh);
+ if (ke && KENTRY_RNF_ROOT(ke))
+ ke = NULL;
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 sin6;
+
+ bzero(&sin6, sizeof(sin6));
+ sin6.sin6_len = sizeof(sin6);
+ sin6.sin6_family = AF_INET6;
+ bcopy(a, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
+ ke = (struct pfr_kentry *)rn_match(&sin6, &kt->pfrkt_ip6->rh);
+ if (ke && KENTRY_RNF_ROOT(ke))
+ ke = NULL;
+ break;
+ }
+#endif /* INET6 */
+ default:
+ panic("%s: unknown address family %u", __func__, af);
+ }
+ if ((ke == NULL || ke->pfrke_not) != notrule) {
+ if (op_pass != PFR_OP_PASS)
+ printf("pfr_update_stats: assertion failed.\n");
+ op_pass = PFR_OP_XPASS;
+ }
+ kt->pfrkt_packets[dir_out][op_pass]++;
+ kt->pfrkt_bytes[dir_out][op_pass] += len;
+ if (ke != NULL && op_pass != PFR_OP_XPASS &&
+ (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
+ if (ke->pfrke_counters == NULL)
+ ke->pfrke_counters = uma_zalloc(V_pfr_kcounters_z,
+ M_NOWAIT | M_ZERO);
+ if (ke->pfrke_counters != NULL) {
+ ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++;
+ ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len;
+ }
+ }
+}
+
+struct pfr_ktable *
+pfr_attach_table(struct pf_ruleset *rs, char *name)
+{
+ struct pfr_ktable *kt, *rt;
+ struct pfr_table tbl;
+ struct pf_anchor *ac = rs->anchor;
+
+ PF_RULES_WASSERT();
+
+ bzero(&tbl, sizeof(tbl));
+ strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name));
+ if (ac != NULL)
+ strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
+ kt = pfr_lookup_table(&tbl);
+ if (kt == NULL) {
+ kt = pfr_create_ktable(&tbl, time_second, 1);
+ if (kt == NULL)
+ return (NULL);
+ if (ac != NULL) {
+ bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
+ rt = pfr_lookup_table(&tbl);
+ if (rt == NULL) {
+ rt = pfr_create_ktable(&tbl, 0, 1);
+ if (rt == NULL) {
+ pfr_destroy_ktable(kt, 0);
+ return (NULL);
+ }
+ pfr_insert_ktable(rt);
+ }
+ kt->pfrkt_root = rt;
+ }
+ pfr_insert_ktable(kt);
+ }
+ if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++)
+ pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED);
+ return (kt);
+}
+
+void
+pfr_detach_table(struct pfr_ktable *kt)
+{
+
+ PF_RULES_WASSERT();
+ KASSERT(kt->pfrkt_refcnt[PFR_REFCNT_RULE] > 0, ("%s: refcount %d\n",
+ __func__, kt->pfrkt_refcnt[PFR_REFCNT_RULE]));
+
+ if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE])
+ pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
+}
+
+int
+pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
+ sa_family_t af)
+{
+ struct pf_addr *addr, *cur, *mask;
+ union sockaddr_union uaddr, umask;
+ struct pfr_kentry *ke, *ke2 = NULL;
+ int idx = -1, use_counter = 0;
+
+ switch (af) {
+ case AF_INET:
+ uaddr.sin.sin_len = sizeof(struct sockaddr_in);
+ uaddr.sin.sin_family = AF_INET;
+ break;
+ case AF_INET6:
+ uaddr.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ uaddr.sin6.sin6_family = AF_INET6;
+ break;
+ }
+ addr = SUNION2PF(&uaddr, af);
+
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+ kt = kt->pfrkt_root;
+ if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+ return (-1);
+
+ if (pidx != NULL)
+ idx = *pidx;
+ if (counter != NULL && idx >= 0)
+ use_counter = 1;
+ if (idx < 0)
+ idx = 0;
+
+_next_block:
+ ke = pfr_kentry_byidx(kt, idx, af);
+ if (ke == NULL) {
+ kt->pfrkt_nomatch++;
+ return (1);
+ }
+ pfr_prepare_network(&umask, af, ke->pfrke_net);
+ cur = SUNION2PF(&ke->pfrke_sa, af);
+ mask = SUNION2PF(&umask, af);
+
+ if (use_counter) {
+ /* is supplied address within block? */
+ if (!PF_MATCHA(0, cur, mask, counter, af)) {
+ /* no, go to next block in table */
+ idx++;
+ use_counter = 0;
+ goto _next_block;
+ }
+ PF_ACPY(addr, counter, af);
+ } else {
+ /* use first address of block */
+ PF_ACPY(addr, cur, af);
+ }
+
+ if (!KENTRY_NETWORK(ke)) {
+ /* this is a single IP address - no possible nested block */
+ PF_ACPY(counter, addr, af);
+ *pidx = idx;
+ kt->pfrkt_match++;
+ return (0);
+ }
+ for (;;) {
+ /* we don't want to use a nested block */
+ switch (af) {
+ case AF_INET:
+ ke2 = (struct pfr_kentry *)rn_match(&uaddr,
+ &kt->pfrkt_ip4->rh);
+ break;
+ case AF_INET6:
+ ke2 = (struct pfr_kentry *)rn_match(&uaddr,
+ &kt->pfrkt_ip6->rh);
+ break;
+ }
+ /* no need to check KENTRY_RNF_ROOT() here */
+ if (ke2 == ke) {
+ /* lookup return the same block - perfect */
+ PF_ACPY(counter, addr, af);
+ *pidx = idx;
+ kt->pfrkt_match++;
+ return (0);
+ }
+
+ /* we need to increase the counter past the nested block */
+ pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net);
+ PF_POOLMASK(addr, addr, SUNION2PF(&umask, af), &pfr_ffaddr, af);
+ PF_AINC(addr, af);
+ if (!PF_MATCHA(0, cur, mask, addr, af)) {
+ /* ok, we reached the end of our main block */
+ /* go to next block in table */
+ idx++;
+ use_counter = 0;
+ goto _next_block;
+ }
+ }
+}
+
+static struct pfr_kentry *
+pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af)
+{
+ struct pfr_walktree w;
+
+ bzero(&w, sizeof(w));
+ w.pfrw_op = PFRW_POOL_GET;
+ w.pfrw_cnt = idx;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+ return (w.pfrw_kentry);
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh, pfr_walktree, &w);
+ return (w.pfrw_kentry);
+#endif /* INET6 */
+ default:
+ return (NULL);
+ }
+}
+
+void
+pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
+{
+ struct pfr_walktree w;
+
+ bzero(&w, sizeof(w));
+ w.pfrw_op = PFRW_DYNADDR_UPDATE;
+ w.pfrw_dyn = dyn;
+
+ dyn->pfid_acnt4 = 0;
+ dyn->pfid_acnt6 = 0;
+ if (!dyn->pfid_af || dyn->pfid_af == AF_INET)
+ kt->pfrkt_ip4->rnh_walktree(&kt->pfrkt_ip4->rh, pfr_walktree, &w);
+ if (!dyn->pfid_af || dyn->pfid_af == AF_INET6)
+ kt->pfrkt_ip6->rnh_walktree(&kt->pfrkt_ip6->rh, pfr_walktree, &w);
+}
diff --git a/freebsd/sys/nios2/include/machine/in_cksum.h b/freebsd/sys/nios2/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/nios2/include/machine/in_cksum.h
+++ b/freebsd/sys/nios2/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/opencrypto/cast.c b/freebsd/sys/opencrypto/cast.c
index 2f3e1cf8..1fb62f20 100644
--- a/freebsd/sys/opencrypto/cast.c
+++ b/freebsd/sys/opencrypto/cast.c
@@ -129,7 +129,7 @@ u_int32_t t, l, r;
}
-/***** Key Schedual *****/
+/***** Key Schedule *****/
void cast_setkey(cast_key* key, u_int8_t* rawkey, int keybytes)
{
diff --git a/freebsd/sys/opencrypto/criov.c b/freebsd/sys/opencrypto/criov.c
index efcbee1d..83a30716 100644
--- a/freebsd/sys/opencrypto/criov.c
+++ b/freebsd/sys/opencrypto/criov.c
@@ -101,35 +101,31 @@ cuio_copyback(struct uio* uio, int off, int len, caddr_t cp)
}
/*
- * Return a pointer to iov/offset of location in iovec list.
+ * Return the index and offset of location in iovec list.
*/
-struct iovec *
+int
cuio_getptr(struct uio *uio, int loc, int *off)
{
- struct iovec *iov = uio->uio_iov;
- int iol = uio->uio_iovcnt;
+ int ind, len;
- while (loc >= 0) {
- /* Normal end of search */
- if (loc < iov->iov_len) {
+ ind = 0;
+ while (loc >= 0 && ind < uio->uio_iovcnt) {
+ len = uio->uio_iov[ind].iov_len;
+ if (len > loc) {
*off = loc;
- return (iov);
+ return (ind);
}
+ loc -= len;
+ ind++;
+ }
- loc -= iov->iov_len;
- if (iol == 0) {
- if (loc == 0) {
- /* Point at the end of valid data */
- *off = iov->iov_len;
- return (iov);
- } else
- return (NULL);
- } else {
- iov++, iol--;
- }
- }
+ if (ind > 0 && loc == 0) {
+ ind--;
+ *off = uio->uio_iov[ind].iov_len;
+ return (ind);
+ }
- return (NULL);
+ return (-1);
}
/*
@@ -198,3 +194,50 @@ crypto_apply(int flags, caddr_t buf, int off, int len,
error = (*f)(arg, buf + off, len);
return (error);
}
+
+int
+crypto_mbuftoiov(struct mbuf *mbuf, struct iovec **iovptr, int *cnt,
+ int *allocated)
+{
+ struct iovec *iov;
+ struct mbuf *m, *mtmp;
+ int i, j;
+
+ *allocated = 0;
+ iov = *iovptr;
+ if (iov == NULL)
+ *cnt = 0;
+
+ m = mbuf;
+ i = 0;
+ while (m != NULL) {
+ if (i == *cnt) {
+ /* we need to allocate a larger array */
+ j = 1;
+ mtmp = m;
+ while ((mtmp = mtmp->m_next) != NULL)
+ j++;
+ iov = malloc(sizeof *iov * (i + j), M_CRYPTO_DATA,
+ M_NOWAIT);
+ if (iov == NULL)
+ return ENOMEM;
+ *allocated = 1;
+ *cnt = i + j;
+ memcpy(iov, *iovptr, sizeof *iov * i);
+ }
+
+ iov[i].iov_base = m->m_data;
+ iov[i].iov_len = m->m_len;
+
+ i++;
+ m = m->m_next;
+ }
+
+ if (*allocated)
+ KASSERT(*cnt == i, ("did not allocate correct amount: %d != %d",
+ *cnt, i));
+
+ *iovptr = iov;
+ *cnt = i;
+ return 0;
+}
diff --git a/freebsd/sys/opencrypto/crypto.c b/freebsd/sys/opencrypto/crypto.c
index ee8dbc2b..4a013648 100644
--- a/freebsd/sys/opencrypto/crypto.c
+++ b/freebsd/sys/opencrypto/crypto.c
@@ -59,7 +59,6 @@ __FBSDID("$FreeBSD$");
#define CRYPTO_TIMING /* enable timing support */
#include <rtems/bsd/local/opt_ddb.h>
-#include <rtems/bsd/local/opt_kdtrace.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -164,10 +163,10 @@ int crypto_userasymcrypto = 1; /* userland may do asym crypto reqs */
SYSCTL_INT(_kern, OID_AUTO, userasymcrypto, CTLFLAG_RW,
&crypto_userasymcrypto, 0,
"Enable/disable user-mode access to asymmetric crypto support");
-int crypto_devallowsoft = 0; /* only use hardware crypto for asym */
+int crypto_devallowsoft = 0; /* only use hardware crypto */
SYSCTL_INT(_kern, OID_AUTO, cryptodevallowsoft, CTLFLAG_RW,
&crypto_devallowsoft, 0,
- "Enable/disable use of software asym crypto support");
+ "Enable/disable use of software crypto by /dev/crypto");
MALLOC_DEFINE(M_CRYPTO_DATA, "crypto", "crypto session records");
@@ -376,9 +375,8 @@ again:
best = cap;
}
}
- if (best != NULL)
- return best;
- if (match == CRYPTOCAP_F_HARDWARE && (flags & CRYPTOCAP_F_SOFTWARE)) {
+ if (best == NULL && match == CRYPTOCAP_F_HARDWARE &&
+ (flags & CRYPTOCAP_F_SOFTWARE)) {
/* sort of an Algol 68-style for loop */
match = CRYPTOCAP_F_SOFTWARE;
goto again;
@@ -429,9 +427,12 @@ crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int crid)
(*sid) <<= 32;
(*sid) |= (lid & 0xffffffff);
cap->cc_sessions++;
- }
- } else
+ } else
+ CRYPTDEB("dev newsession failed");
+ } else {
+ CRYPTDEB("no driver");
err = EINVAL;
+ }
CRYPTO_DRIVER_UNLOCK();
return err;
}
@@ -917,7 +918,7 @@ again:
}
/*
- * Dispatch an assymetric crypto request.
+ * Dispatch an asymmetric crypto request.
*/
static int
crypto_kinvoke(struct cryptkop *krp, int crid)
@@ -1187,8 +1188,8 @@ crypto_kdone(struct cryptkop *krp)
/* XXX: What if driver is loaded in the meantime? */
if (krp->krp_hid < crypto_drivers_num) {
cap = &crypto_drivers[krp->krp_hid];
+ KASSERT(cap->cc_koperations > 0, ("cc_koperations == 0"));
cap->cc_koperations--;
- KASSERT(cap->cc_koperations >= 0, ("cc_koperations < 0"));
if (cap->cc_flags & CRYPTOCAP_F_CLEANUP)
crypto_remove(cap);
}
diff --git a/freebsd/sys/opencrypto/cryptodeflate.c b/freebsd/sys/opencrypto/cryptodeflate.c
new file mode 100644
index 00000000..ed891964
--- /dev/null
+++ b/freebsd/sys/opencrypto/cryptodeflate.c
@@ -0,0 +1,265 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: deflate.c,v 1.3 2001/08/20 02:45:22 hugh Exp $ */
+
+/*-
+ * Copyright (c) 2001 Jean-Jacques Bernard-Gundol (jj@wabbitt.org)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This file contains a wrapper around the deflate algo compression
+ * functions using the zlib library (see libkern/zlib.c and sys/zlib.h})
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <rtems/bsd/sys/param.h>
+#include <sys/malloc.h>
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/sdt.h>
+#include <sys/systm.h>
+#include <sys/zlib.h>
+
+#include <opencrypto/cryptodev.h>
+#include <opencrypto/deflate.h>
+
+SDT_PROVIDER_DECLARE(opencrypto);
+SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, entry,
+ "int", "u_int32_t");
+SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, bad,
+ "int", "int", "int", "int", "int");
+SDT_PROBE_DEFINE5(opencrypto, deflate, deflate_global, iter,
+ "int", "int", "int", "int", "int");
+SDT_PROBE_DEFINE2(opencrypto, deflate, deflate_global, return,
+ "int", "u_int32_t");
+
+int window_inflate = -1 * MAX_WBITS;
+int window_deflate = -12;
+
+/*
+ * This function takes a block of data and (de)compress it using the deflate
+ * algorithm
+ */
+
+u_int32_t
+deflate_global(data, size, decomp, out)
+ u_int8_t *data;
+ u_int32_t size;
+ int decomp;
+ u_int8_t **out;
+{
+ /* decomp indicates whether we compress (0) or decompress (1) */
+
+ z_stream zbuf;
+ u_int8_t *output;
+ u_int32_t count, result;
+ int error, i;
+ struct deflate_buf *bufh, *bufp;
+
+ SDT_PROBE2(opencrypto, deflate, deflate_global, entry, decomp, size);
+
+ bufh = bufp = NULL;
+ if (!decomp) {
+ i = 1;
+ } else {
+ /*
+ * Choose a buffer with 4x the size of the input buffer
+ * for the size of the output buffer in the case of
+ * decompression. If it's not sufficient, it will need to be
+ * updated while the decompression is going on.
+ */
+ i = 4;
+ }
+ /*
+ * Make sure we do have enough output space. Repeated calls to
+ * deflate need at least 6 bytes of output buffer space to avoid
+ * repeated markers. We will always provide at least 16 bytes.
+ */
+ while ((size * i) < 16)
+ i++;
+
+ bufh = bufp = malloc(sizeof(*bufp) + (size_t)(size * i),
+ M_CRYPTO_DATA, M_NOWAIT);
+ if (bufp == NULL) {
+ SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ decomp, 0, __LINE__, 0, 0);
+ goto bad2;
+ }
+ bufp->next = NULL;
+ bufp->size = size * i;
+
+ bzero(&zbuf, sizeof(z_stream));
+ zbuf.zalloc = z_alloc;
+ zbuf.zfree = z_free;
+ zbuf.opaque = Z_NULL;
+ zbuf.next_in = data; /* Data that is going to be processed. */
+ zbuf.avail_in = size; /* Total length of data to be processed. */
+ zbuf.next_out = bufp->data;
+ zbuf.avail_out = bufp->size;
+
+ error = decomp ? inflateInit2(&zbuf, window_inflate) :
+ deflateInit2(&zbuf, Z_DEFAULT_COMPRESSION, Z_METHOD,
+ window_deflate, Z_MEMLEVEL, Z_DEFAULT_STRATEGY);
+ if (error != Z_OK) {
+ SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ decomp, error, __LINE__, 0, 0);
+ goto bad;
+ }
+
+ for (;;) {
+ error = decomp ? inflate(&zbuf, Z_SYNC_FLUSH) :
+ deflate(&zbuf, Z_FINISH);
+ if (error != Z_OK && error != Z_STREAM_END) {
+ /*
+ * Unfortunately we are limited to 5 arguments,
+ * thus use two probes.
+ */
+ SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ decomp, error, __LINE__,
+ zbuf.avail_in, zbuf.avail_out);
+ SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ decomp, error, __LINE__,
+ zbuf.state->dummy, zbuf.total_out);
+ goto bad;
+ }
+ SDT_PROBE5(opencrypto, deflate, deflate_global, iter,
+ decomp, error, __LINE__,
+ zbuf.avail_in, zbuf.avail_out);
+ SDT_PROBE5(opencrypto, deflate, deflate_global, iter,
+ decomp, error, __LINE__,
+ zbuf.state->dummy, zbuf.total_out);
+ if (decomp && zbuf.avail_in == 0 && error == Z_STREAM_END) {
+ /* Done. */
+ break;
+ } else if (!decomp && error == Z_STREAM_END) {
+ /* Done. */
+ break;
+ } else if (zbuf.avail_out == 0) {
+ struct deflate_buf *p;
+
+ /* We need more output space for another iteration. */
+ p = malloc(sizeof(*p) + (size_t)(size * i),
+ M_CRYPTO_DATA, M_NOWAIT);
+ if (p == NULL) {
+ SDT_PROBE5(opencrypto, deflate, deflate_global,
+ bad, decomp, 0, __LINE__, 0, 0);
+ goto bad;
+ }
+ p->next = NULL;
+ p->size = size * i;
+ bufp->next = p;
+ bufp = p;
+ zbuf.next_out = bufp->data;
+ zbuf.avail_out = bufp->size;
+ } else {
+ /* Unexpect result. */
+ /*
+ * Unfortunately we are limited to 5 arguments,
+ * thus, again, use two probes.
+ */
+ SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ decomp, error, __LINE__,
+ zbuf.avail_in, zbuf.avail_out);
+ SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ decomp, error, __LINE__,
+ zbuf.state->dummy, zbuf.total_out);
+ goto bad;
+ }
+ }
+
+ result = count = zbuf.total_out;
+
+ *out = malloc(result, M_CRYPTO_DATA, M_NOWAIT);
+ if (*out == NULL) {
+ SDT_PROBE5(opencrypto, deflate, deflate_global, bad,
+ decomp, 0, __LINE__, 0, 0);
+ goto bad;
+ }
+ if (decomp)
+ inflateEnd(&zbuf);
+ else
+ deflateEnd(&zbuf);
+ output = *out;
+ for (bufp = bufh; bufp != NULL; ) {
+ if (count > bufp->size) {
+ struct deflate_buf *p;
+
+ bcopy(bufp->data, *out, bufp->size);
+ *out += bufp->size;
+ count -= bufp->size;
+ p = bufp;
+ bufp = bufp->next;
+ free(p, M_CRYPTO_DATA);
+ } else {
+ /* It should be the last buffer. */
+ bcopy(bufp->data, *out, count);
+ *out += count;
+ free(bufp, M_CRYPTO_DATA);
+ bufp = NULL;
+ count = 0;
+ }
+ }
+ *out = output;
+ SDT_PROBE2(opencrypto, deflate, deflate_global, return, decomp, result);
+ return result;
+
+bad:
+ if (decomp)
+ inflateEnd(&zbuf);
+ else
+ deflateEnd(&zbuf);
+ for (bufp = bufh; bufp != NULL; ) {
+ struct deflate_buf *p;
+
+ p = bufp;
+ bufp = bufp->next;
+ free(p, M_CRYPTO_DATA);
+ }
+bad2:
+ *out = NULL;
+ return 0;
+}
+
+void *
+z_alloc(nil, type, size)
+ void *nil;
+ u_int type, size;
+{
+ void *ptr;
+
+ ptr = malloc(type *size, M_CRYPTO_DATA, M_NOWAIT);
+ return ptr;
+}
+
+void
+z_free(nil, ptr)
+ void *nil, *ptr;
+{
+ free(ptr, M_CRYPTO_DATA);
+}
diff --git a/freebsd/sys/opencrypto/cryptodev.h b/freebsd/sys/opencrypto/cryptodev.h
index e2995221..d14fb3a8 100644
--- a/freebsd/sys/opencrypto/cryptodev.h
+++ b/freebsd/sys/opencrypto/cryptodev.h
@@ -23,6 +23,12 @@
* PURPOSE.
*
* Copyright (c) 2001 Theo de Raadt
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -72,6 +78,7 @@
#define SHA2_512_HASH_LEN 64
#define MD5_KPDK_HASH_LEN 16
#define SHA1_KPDK_HASH_LEN 20
+#define AES_GMAC_HASH_LEN 16
/* Maximum hash algorithm result length */
#define HASH_MAX_LEN SHA2_512_HASH_LEN /* Keep this updated */
@@ -80,40 +87,87 @@
#define MD5_HMAC_BLOCK_LEN 64
#define SHA1_HMAC_BLOCK_LEN 64
#define RIPEMD160_HMAC_BLOCK_LEN 64
-#define SHA2_256_HMAC_BLOCK_LEN 64
-#define SHA2_384_HMAC_BLOCK_LEN 128
-#define SHA2_512_HMAC_BLOCK_LEN 128
+#define SHA2_256_HMAC_BLOCK_LEN 64
+#define SHA2_384_HMAC_BLOCK_LEN 128
+#define SHA2_512_HMAC_BLOCK_LEN 128
/* Maximum HMAC block length */
-#define HMAC_MAX_BLOCK_LEN SHA2_512_HMAC_BLOCK_LEN /* Keep this updated */
+#define HMAC_MAX_BLOCK_LEN SHA2_512_HMAC_BLOCK_LEN /* Keep this updated */
#define HMAC_IPAD_VAL 0x36
#define HMAC_OPAD_VAL 0x5C
+/* HMAC Key Length */
+#define NULL_HMAC_KEY_LEN 0
+#define MD5_HMAC_KEY_LEN 16
+#define SHA1_HMAC_KEY_LEN 20
+#define RIPEMD160_HMAC_KEY_LEN 20
+#define SHA2_256_HMAC_KEY_LEN 32
+#define SHA2_384_HMAC_KEY_LEN 48
+#define SHA2_512_HMAC_KEY_LEN 64
+#define AES_128_GMAC_KEY_LEN 16
+#define AES_192_GMAC_KEY_LEN 24
+#define AES_256_GMAC_KEY_LEN 32
/* Encryption algorithm block sizes */
-#define NULL_BLOCK_LEN 4
-#define DES_BLOCK_LEN 8
-#define DES3_BLOCK_LEN 8
-#define BLOWFISH_BLOCK_LEN 8
-#define SKIPJACK_BLOCK_LEN 8
-#define CAST128_BLOCK_LEN 8
-#define RIJNDAEL128_BLOCK_LEN 16
-#define AES_BLOCK_LEN RIJNDAEL128_BLOCK_LEN
-#define CAMELLIA_BLOCK_LEN 16
-#define EALG_MAX_BLOCK_LEN AES_BLOCK_LEN /* Keep this updated */
+#define NULL_BLOCK_LEN 4 /* IPsec to maintain alignment */
+#define DES_BLOCK_LEN 8
+#define DES3_BLOCK_LEN 8
+#define BLOWFISH_BLOCK_LEN 8
+#define SKIPJACK_BLOCK_LEN 8
+#define CAST128_BLOCK_LEN 8
+#define RIJNDAEL128_BLOCK_LEN 16
+#define AES_BLOCK_LEN 16
+#define AES_ICM_BLOCK_LEN 1
+#define ARC4_BLOCK_LEN 1
+#define CAMELLIA_BLOCK_LEN 16
+#define EALG_MAX_BLOCK_LEN AES_BLOCK_LEN /* Keep this updated */
+
+/* IV Lengths */
+
+#define ARC4_IV_LEN 1
+#define AES_GCM_IV_LEN 12
+#define AES_XTS_IV_LEN 8
+#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
+
+/* Min and Max Encryption Key Sizes */
+#define NULL_MIN_KEY 0
+#define NULL_MAX_KEY 256 /* 2048 bits, max key */
+#define DES_MIN_KEY 8
+#define DES_MAX_KEY DES_MIN_KEY
+#define TRIPLE_DES_MIN_KEY 24
+#define TRIPLE_DES_MAX_KEY TRIPLE_DES_MIN_KEY
+#define BLOWFISH_MIN_KEY 5
+#define BLOWFISH_MAX_KEY 56 /* 448 bits, max key */
+#define CAST_MIN_KEY 5
+#define CAST_MAX_KEY 16
+#define SKIPJACK_MIN_KEY 10
+#define SKIPJACK_MAX_KEY SKIPJACK_MIN_KEY
+#define RIJNDAEL_MIN_KEY 16
+#define RIJNDAEL_MAX_KEY 32
+#define AES_MIN_KEY RIJNDAEL_MIN_KEY
+#define AES_MAX_KEY RIJNDAEL_MAX_KEY
+#define AES_XTS_MIN_KEY (2 * AES_MIN_KEY)
+#define AES_XTS_MAX_KEY (2 * AES_MAX_KEY)
+#define ARC4_MIN_KEY 1
+#define ARC4_MAX_KEY 32
+#define CAMELLIA_MIN_KEY 8
+#define CAMELLIA_MAX_KEY 32
+
+/* Maximum hash algorithm result length */
+#define AALG_MAX_RESULT_LEN 64 /* Keep this updated */
#define CRYPTO_ALGORITHM_MIN 1
-#define CRYPTO_DES_CBC 1
-#define CRYPTO_3DES_CBC 2
-#define CRYPTO_BLF_CBC 3
-#define CRYPTO_CAST_CBC 4
-#define CRYPTO_SKIPJACK_CBC 5
-#define CRYPTO_MD5_HMAC 6
-#define CRYPTO_SHA1_HMAC 7
-#define CRYPTO_RIPEMD160_HMAC 8
-#define CRYPTO_MD5_KPDK 9
-#define CRYPTO_SHA1_KPDK 10
-#define CRYPTO_RIJNDAEL128_CBC 11 /* 128 bit blocksize */
-#define CRYPTO_AES_CBC 11 /* 128 bit blocksize -- the same as above */
-#define CRYPTO_ARC4 12
+#define CRYPTO_DES_CBC 1
+#define CRYPTO_3DES_CBC 2
+#define CRYPTO_BLF_CBC 3
+#define CRYPTO_CAST_CBC 4
+#define CRYPTO_SKIPJACK_CBC 5
+#define CRYPTO_MD5_HMAC 6
+#define CRYPTO_SHA1_HMAC 7
+#define CRYPTO_RIPEMD160_HMAC 8
+#define CRYPTO_MD5_KPDK 9
+#define CRYPTO_SHA1_KPDK 10
+#define CRYPTO_RIJNDAEL128_CBC 11 /* 128 bit blocksize */
+#define CRYPTO_AES_CBC 11 /* 128 bit blocksize -- the same as above */
+#define CRYPTO_ARC4 12
#define CRYPTO_MD5 13
#define CRYPTO_SHA1 14
#define CRYPTO_NULL_HMAC 15
@@ -122,9 +176,18 @@
#define CRYPTO_SHA2_256_HMAC 18
#define CRYPTO_SHA2_384_HMAC 19
#define CRYPTO_SHA2_512_HMAC 20
-#define CRYPTO_CAMELLIA_CBC 21
+#define CRYPTO_CAMELLIA_CBC 21
#define CRYPTO_AES_XTS 22
-#define CRYPTO_ALGORITHM_MAX 22 /* Keep updated - see below */
+#define CRYPTO_AES_ICM 23 /* commonly known as CTR mode */
+#define CRYPTO_AES_NIST_GMAC 24 /* cipher side */
+#define CRYPTO_AES_NIST_GCM_16 25 /* 16 byte ICV */
+#define CRYPTO_AES_128_NIST_GMAC 26 /* auth side */
+#define CRYPTO_AES_192_NIST_GMAC 27 /* auth side */
+#define CRYPTO_AES_256_NIST_GMAC 28 /* auth side */
+#define CRYPTO_ALGORITHM_MAX 28 /* Keep updated - see below */
+
+#define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \
+ (x) <= CRYPTO_ALGORITHM_MAX)
/* Algorithm flags */
#define CRYPTO_ALG_FLAG_SUPPORTED 0x01 /* Algorithm is supported */
@@ -182,6 +245,20 @@ struct crypt_op {
caddr_t iv;
};
+/* op and flags the same as crypt_op */
+struct crypt_aead {
+ u_int32_t ses;
+ u_int16_t op; /* i.e. COP_ENCRYPT */
+ u_int16_t flags;
+ u_int len;
+ u_int aadlen;
+ u_int ivlen;
+ caddr_t src, dst; /* become iov[] inside kernel */
+ caddr_t aad; /* additional authenticated data */
+ caddr_t tag; /* must fit for chosen TAG length */
+ caddr_t iv;
+};
+
/*
* Parameters for looking up a crypto driver/device by
* device name or by id. The latter are returned for
@@ -239,6 +316,7 @@ struct crypt_kop {
#define CIOCGSESSION2 _IOWR('c', 106, struct session2_op)
#define CIOCKEY2 _IOWR('c', 107, struct crypt_kop)
#define CIOCFINDDEV _IOWR('c', 108, struct crypt_find_op)
+#define CIOCCRYPTAEAD _IOWR('c', 109, struct crypt_aead)
struct cryptotstat {
struct timespec acc; /* total accumulated time */
@@ -269,6 +347,14 @@ struct cryptostats {
};
#ifdef _KERNEL
+
+#if 0
+#define CRYPTDEB(s) do { printf("%s:%d: %s\n", __FILE__, __LINE__, s); \
+ } while (0)
+#else
+#define CRYPTDEB(s) do { } while (0)
+#endif
+
/* Standard initialization structure beginning */
struct cryptoini {
int cri_alg; /* Algorithm to use */
@@ -292,14 +378,15 @@ struct cryptodesc {
place, so don't copy. */
#define CRD_F_IV_EXPLICIT 0x04 /* IV explicitly provided */
#define CRD_F_DSA_SHA_NEEDED 0x08 /* Compute SHA-1 of buffer for DSA */
+#define CRD_F_COMP 0x0f /* Set when doing compression */
#define CRD_F_KEY_EXPLICIT 0x10 /* Key explicitly provided */
-#define CRD_F_COMP 0x0f /* Set when doing compression */
struct cryptoini CRD_INI; /* Initialization/context data */
-#define crd_iv CRD_INI.cri_iv
-#define crd_key CRD_INI.cri_key
-#define crd_alg CRD_INI.cri_alg
-#define crd_klen CRD_INI.cri_klen
+#define crd_esn CRD_INI.cri_esn
+#define crd_iv CRD_INI.cri_iv
+#define crd_key CRD_INI.cri_key
+#define crd_alg CRD_INI.cri_alg
+#define crd_klen CRD_INI.cri_klen
struct cryptodesc *crd_next;
};
@@ -324,9 +411,8 @@ struct cryptop {
*/
int crp_flags;
-#define CRYPTO_F_IMBUF 0x0001 /* Input/output are mbuf chains */
-#define CRYPTO_F_IOV 0x0002 /* Input/output are uio */
-#define CRYPTO_F_REL 0x0004 /* Must return data in same place */
+#define CRYPTO_F_IMBUF 0x0001 /* Input/output are mbuf chains */
+#define CRYPTO_F_IOV 0x0002 /* Input/output are uio */
#define CRYPTO_F_BATCH 0x0008 /* Batch op if possible */
#define CRYPTO_F_CBIMM 0x0010 /* Do callback immediately */
#define CRYPTO_F_DONE 0x0020 /* Operation completed */
@@ -341,12 +427,12 @@ struct cryptop {
struct bintime crp_tstamp; /* performance time stamp */
};
-#define CRYPTO_BUF_CONTIG 0x0
-#define CRYPTO_BUF_IOV 0x1
-#define CRYPTO_BUF_MBUF 0x2
+#define CRYPTO_BUF_CONTIG 0x0
+#define CRYPTO_BUF_IOV 0x1
+#define CRYPTO_BUF_MBUF 0x2
-#define CRYPTO_OP_DECRYPT 0x0
-#define CRYPTO_OP_ENCRYPT 0x1
+#define CRYPTO_OP_DECRYPT 0x0
+#define CRYPTO_OP_ENCRYPT 0x1
/*
* Hints passed to process methods.
@@ -381,9 +467,9 @@ MALLOC_DECLARE(M_CRYPTO_DATA);
extern int crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int hard);
extern int crypto_freesession(u_int64_t sid);
-#define CRYPTOCAP_F_HARDWARE CRYPTO_FLAG_HARDWARE
-#define CRYPTOCAP_F_SOFTWARE CRYPTO_FLAG_SOFTWARE
-#define CRYPTOCAP_F_SYNC 0x04000000 /* operates synchronously */
+#define CRYPTOCAP_F_HARDWARE CRYPTO_FLAG_HARDWARE
+#define CRYPTOCAP_F_SOFTWARE CRYPTO_FLAG_SOFTWARE
+#define CRYPTOCAP_F_SYNC 0x04000000 /* operates synchronously */
extern int32_t crypto_get_driverid(device_t dev, int flags);
extern int crypto_find_driver(const char *);
extern device_t crypto_find_device_byhid(int hid);
@@ -418,10 +504,15 @@ extern int crypto_devallowsoft; /* only use hardware crypto */
struct uio;
extern void cuio_copydata(struct uio* uio, int off, int len, caddr_t cp);
extern void cuio_copyback(struct uio* uio, int off, int len, caddr_t cp);
-extern struct iovec *cuio_getptr(struct uio *uio, int loc, int *off);
+extern int cuio_getptr(struct uio *uio, int loc, int *off);
extern int cuio_apply(struct uio *uio, int off, int len,
int (*f)(void *, void *, u_int), void *arg);
+struct mbuf;
+struct iovec;
+extern int crypto_mbuftoiov(struct mbuf *mbuf, struct iovec **iovptr,
+ int *cnt, int *allocated);
+
extern void crypto_copyback(int flags, caddr_t buf, int off, int size,
caddr_t in);
extern void crypto_copydata(int flags, caddr_t buf, int off, int size,
diff --git a/freebsd/sys/opencrypto/cryptosoft.c b/freebsd/sys/opencrypto/cryptosoft.c
index 6c934d76..53569456 100644
--- a/freebsd/sys/opencrypto/cryptosoft.c
+++ b/freebsd/sys/opencrypto/cryptosoft.c
@@ -11,6 +11,12 @@
* supported the development of this code.
*
* Copyright (c) 2000, 2001 Angelos D. Keromytis
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
*
* Permission to use, copy, and modify this software with or without fee
* is hereby granted, provided that this entire notice is included in
@@ -37,6 +43,10 @@ __FBSDID("$FreeBSD$");
#include <sys/random.h>
#include <sys/kernel.h>
#include <sys/uio.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/endian.h>
+#include <sys/limits.h>
#include <crypto/blowfish/blowfish.h>
#include <crypto/sha1.h>
@@ -56,14 +66,18 @@ __FBSDID("$FreeBSD$");
static int32_t swcr_id;
static struct swcr_data **swcr_sessions = NULL;
static u_int32_t swcr_sesnum;
+/* Protects swcr_sessions pointer, not data. */
+static struct rwlock swcr_sessions_lock;
u_int8_t hmac_ipad_buffer[HMAC_MAX_BLOCK_LEN];
u_int8_t hmac_opad_buffer[HMAC_MAX_BLOCK_LEN];
static int swcr_encdec(struct cryptodesc *, struct swcr_data *, caddr_t, int);
static int swcr_authcompute(struct cryptodesc *, struct swcr_data *, caddr_t, int);
+static int swcr_authenc(struct cryptop *crp);
static int swcr_compdec(struct cryptodesc *, struct swcr_data *, caddr_t, int);
static int swcr_freesession(device_t dev, u_int64_t tid);
+static int swcr_freesession_locked(device_t dev, u_int64_t tid);
/*
* Apply a symmetric encryption/decryption algorithm.
@@ -73,36 +87,48 @@ swcr_encdec(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf,
int flags)
{
unsigned char iv[EALG_MAX_BLOCK_LEN], blk[EALG_MAX_BLOCK_LEN], *idat;
- unsigned char *ivp, piv[EALG_MAX_BLOCK_LEN];
+ unsigned char *ivp, *nivp, iv2[EALG_MAX_BLOCK_LEN];
struct enc_xform *exf;
- int i, k, j, blks;
+ int i, j, k, blks, ind, count, ivlen;
+ struct uio *uio, uiolcl;
+ struct iovec iovlcl[4];
+ struct iovec *iov;
+ int iovcnt, iovalloc;
+ int error;
+
+ error = 0;
exf = sw->sw_exf;
blks = exf->blocksize;
+ ivlen = exf->ivsize;
/* Check for non-padded data */
if (crd->crd_len % blks)
return EINVAL;
+ if (crd->crd_alg == CRYPTO_AES_ICM &&
+ (crd->crd_flags & CRD_F_IV_EXPLICIT) == 0)
+ return (EINVAL);
+
/* Initialize the IV */
if (crd->crd_flags & CRD_F_ENCRYPT) {
/* IV explicitly provided ? */
if (crd->crd_flags & CRD_F_IV_EXPLICIT)
- bcopy(crd->crd_iv, iv, blks);
+ bcopy(crd->crd_iv, iv, ivlen);
else
- arc4rand(iv, blks, 0);
+ arc4rand(iv, ivlen, 0);
/* Do we need to write the IV */
if (!(crd->crd_flags & CRD_F_IV_PRESENT))
- crypto_copyback(flags, buf, crd->crd_inject, blks, iv);
+ crypto_copyback(flags, buf, crd->crd_inject, ivlen, iv);
} else { /* Decryption */
- /* IV explicitly provided ? */
+ /* IV explicitly provided ? */
if (crd->crd_flags & CRD_F_IV_EXPLICIT)
- bcopy(crd->crd_iv, iv, blks);
+ bcopy(crd->crd_iv, iv, ivlen);
else {
/* Get IV off buf */
- crypto_copydata(flags, buf, crd->crd_inject, blks, iv);
+ crypto_copydata(flags, buf, crd->crd_inject, ivlen, iv);
}
}
@@ -111,341 +137,186 @@ swcr_encdec(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf,
if (sw->sw_kschedule)
exf->zerokey(&(sw->sw_kschedule));
+
error = exf->setkey(&sw->sw_kschedule,
crd->crd_key, crd->crd_klen / 8);
if (error)
return (error);
}
+ iov = iovlcl;
+ iovcnt = nitems(iovlcl);
+ iovalloc = 0;
+ uio = &uiolcl;
+ if ((flags & CRYPTO_F_IMBUF) != 0) {
+ error = crypto_mbuftoiov((struct mbuf *)buf, &iov, &iovcnt,
+ &iovalloc);
+ if (error)
+ return (error);
+ uio->uio_iov = iov;
+ uio->uio_iovcnt = iovcnt;
+ } else if ((flags & CRYPTO_F_IOV) != 0)
+ uio = (struct uio *)buf;
+ else {
+ iov[0].iov_base = buf;
+ iov[0].iov_len = crd->crd_skip + crd->crd_len;
+ uio->uio_iov = iov;
+ uio->uio_iovcnt = 1;
+ }
+
ivp = iv;
- /*
- * xforms that provide a reinit method perform all IV
- * handling themselves.
- */
- if (exf->reinit)
+ if (exf->reinit) {
+ /*
+ * xforms that provide a reinit method perform all IV
+ * handling themselves.
+ */
exf->reinit(sw->sw_kschedule, iv);
+ }
- if (flags & CRYPTO_F_IMBUF) {
- struct mbuf *m = (struct mbuf *) buf;
+ count = crd->crd_skip;
+ ind = cuio_getptr(uio, count, &k);
+ if (ind == -1) {
+ error = EINVAL;
+ goto out;
+ }
- /* Find beginning of data */
- m = m_getptr(m, crd->crd_skip, &k);
- if (m == NULL)
- return EINVAL;
+ i = crd->crd_len;
- i = crd->crd_len;
-
- while (i > 0) {
- /*
- * If there's insufficient data at the end of
- * an mbuf, we have to do some copying.
- */
- if (m->m_len < k + blks && m->m_len != k) {
- m_copydata(m, k, blks, blk);
-
- /* Actual encryption/decryption */
- if (exf->reinit) {
- if (crd->crd_flags & CRD_F_ENCRYPT) {
- exf->encrypt(sw->sw_kschedule,
- blk);
- } else {
- exf->decrypt(sw->sw_kschedule,
- blk);
- }
- } else if (crd->crd_flags & CRD_F_ENCRYPT) {
- /* XOR with previous block */
- for (j = 0; j < blks; j++)
- blk[j] ^= ivp[j];
-
- exf->encrypt(sw->sw_kschedule, blk);
-
- /*
- * Keep encrypted block for XOR'ing
- * with next block
- */
- bcopy(blk, iv, blks);
- ivp = iv;
- } else { /* decrypt */
- /*
- * Keep encrypted block for XOR'ing
- * with next block
- */
- if (ivp == iv)
- bcopy(blk, piv, blks);
- else
- bcopy(blk, iv, blks);
-
- exf->decrypt(sw->sw_kschedule, blk);
-
- /* XOR with previous block */
- for (j = 0; j < blks; j++)
- blk[j] ^= ivp[j];
-
- if (ivp == iv)
- bcopy(piv, iv, blks);
- else
- ivp = iv;
+ while (i > 0) {
+ /*
+ * If there's insufficient data at the end of
+ * an iovec, we have to do some copying.
+ */
+ if (uio->uio_iov[ind].iov_len < k + blks &&
+ uio->uio_iov[ind].iov_len != k) {
+ cuio_copydata(uio, count, blks, blk);
+
+ /* Actual encryption/decryption */
+ if (exf->reinit) {
+ if (crd->crd_flags & CRD_F_ENCRYPT) {
+ exf->encrypt(sw->sw_kschedule,
+ blk);
+ } else {
+ exf->decrypt(sw->sw_kschedule,
+ blk);
}
-
- /* Copy back decrypted block */
- m_copyback(m, k, blks, blk);
-
- /* Advance pointer */
- m = m_getptr(m, k + blks, &k);
- if (m == NULL)
- return EINVAL;
-
- i -= blks;
-
- /* Could be done... */
- if (i == 0)
- break;
+ } else if (crd->crd_flags & CRD_F_ENCRYPT) {
+ /* XOR with previous block */
+ for (j = 0; j < blks; j++)
+ blk[j] ^= ivp[j];
+
+ exf->encrypt(sw->sw_kschedule, blk);
+
+ /*
+ * Keep encrypted block for XOR'ing
+ * with next block
+ */
+ bcopy(blk, iv, blks);
+ ivp = iv;
+ } else { /* decrypt */
+ /*
+ * Keep encrypted block for XOR'ing
+ * with next block
+ */
+ nivp = (ivp == iv) ? iv2 : iv;
+ bcopy(blk, nivp, blks);
+
+ exf->decrypt(sw->sw_kschedule, blk);
+
+ /* XOR with previous block */
+ for (j = 0; j < blks; j++)
+ blk[j] ^= ivp[j];
+
+ ivp = nivp;
}
- /* Skip possibly empty mbufs */
- if (k == m->m_len) {
- for (m = m->m_next; m && m->m_len == 0;
- m = m->m_next)
- ;
- k = 0;
- }
-
- /* Sanity check */
- if (m == NULL)
- return EINVAL;
+ /* Copy back decrypted block */
+ cuio_copyback(uio, count, blks, blk);
- /*
- * Warning: idat may point to garbage here, but
- * we only use it in the while() loop, only if
- * there are indeed enough data.
- */
- idat = mtod(m, unsigned char *) + k;
-
- while (m->m_len >= k + blks && i > 0) {
- if (exf->reinit) {
- if (crd->crd_flags & CRD_F_ENCRYPT) {
- exf->encrypt(sw->sw_kschedule,
- idat);
- } else {
- exf->decrypt(sw->sw_kschedule,
- idat);
- }
- } else if (crd->crd_flags & CRD_F_ENCRYPT) {
- /* XOR with previous block/IV */
- for (j = 0; j < blks; j++)
- idat[j] ^= ivp[j];
-
- exf->encrypt(sw->sw_kschedule, idat);
- ivp = idat;
- } else { /* decrypt */
- /*
- * Keep encrypted block to be used
- * in next block's processing.
- */
- if (ivp == iv)
- bcopy(idat, piv, blks);
- else
- bcopy(idat, iv, blks);
-
- exf->decrypt(sw->sw_kschedule, idat);
-
- /* XOR with previous block/IV */
- for (j = 0; j < blks; j++)
- idat[j] ^= ivp[j];
-
- if (ivp == iv)
- bcopy(piv, iv, blks);
- else
- ivp = iv;
- }
+ count += blks;
- idat += blks;
- k += blks;
- i -= blks;
+ /* Advance pointer */
+ ind = cuio_getptr(uio, count, &k);
+ if (ind == -1) {
+ error = EINVAL;
+ goto out;
}
- }
- return 0; /* Done with mbuf encryption/decryption */
- } else if (flags & CRYPTO_F_IOV) {
- struct uio *uio = (struct uio *) buf;
- struct iovec *iov;
+ i -= blks;
- /* Find beginning of data */
- iov = cuio_getptr(uio, crd->crd_skip, &k);
- if (iov == NULL)
- return EINVAL;
+ /* Could be done... */
+ if (i == 0)
+ break;
+ }
- i = crd->crd_len;
-
- while (i > 0) {
- /*
- * If there's insufficient data at the end of
- * an iovec, we have to do some copying.
- */
- if (iov->iov_len < k + blks && iov->iov_len != k) {
- cuio_copydata(uio, k, blks, blk);
-
- /* Actual encryption/decryption */
- if (exf->reinit) {
- if (crd->crd_flags & CRD_F_ENCRYPT) {
- exf->encrypt(sw->sw_kschedule,
- blk);
- } else {
- exf->decrypt(sw->sw_kschedule,
- blk);
- }
- } else if (crd->crd_flags & CRD_F_ENCRYPT) {
- /* XOR with previous block */
- for (j = 0; j < blks; j++)
- blk[j] ^= ivp[j];
-
- exf->encrypt(sw->sw_kschedule, blk);
-
- /*
- * Keep encrypted block for XOR'ing
- * with next block
- */
- bcopy(blk, iv, blks);
- ivp = iv;
- } else { /* decrypt */
- /*
- * Keep encrypted block for XOR'ing
- * with next block
- */
- if (ivp == iv)
- bcopy(blk, piv, blks);
- else
- bcopy(blk, iv, blks);
-
- exf->decrypt(sw->sw_kschedule, blk);
-
- /* XOR with previous block */
- for (j = 0; j < blks; j++)
- blk[j] ^= ivp[j];
-
- if (ivp == iv)
- bcopy(piv, iv, blks);
- else
- ivp = iv;
+ /*
+ * Warning: idat may point to garbage here, but
+ * we only use it in the while() loop, only if
+ * there are indeed enough data.
+ */
+ idat = (char *)uio->uio_iov[ind].iov_base + k;
+
+ while (uio->uio_iov[ind].iov_len >= k + blks && i > 0) {
+ if (exf->reinit) {
+ if (crd->crd_flags & CRD_F_ENCRYPT) {
+ exf->encrypt(sw->sw_kschedule,
+ idat);
+ } else {
+ exf->decrypt(sw->sw_kschedule,
+ idat);
}
-
- /* Copy back decrypted block */
- cuio_copyback(uio, k, blks, blk);
-
- /* Advance pointer */
- iov = cuio_getptr(uio, k + blks, &k);
- if (iov == NULL)
- return EINVAL;
-
- i -= blks;
-
- /* Could be done... */
- if (i == 0)
- break;
+ } else if (crd->crd_flags & CRD_F_ENCRYPT) {
+ /* XOR with previous block/IV */
+ for (j = 0; j < blks; j++)
+ idat[j] ^= ivp[j];
+
+ exf->encrypt(sw->sw_kschedule, idat);
+ ivp = idat;
+ } else { /* decrypt */
+ /*
+ * Keep encrypted block to be used
+ * in next block's processing.
+ */
+ nivp = (ivp == iv) ? iv2 : iv;
+ bcopy(idat, nivp, blks);
+
+ exf->decrypt(sw->sw_kschedule, idat);
+
+ /* XOR with previous block/IV */
+ for (j = 0; j < blks; j++)
+ idat[j] ^= ivp[j];
+
+ ivp = nivp;
}
- /*
- * Warning: idat may point to garbage here, but
- * we only use it in the while() loop, only if
- * there are indeed enough data.
- */
- idat = (char *)iov->iov_base + k;
-
- while (iov->iov_len >= k + blks && i > 0) {
- if (exf->reinit) {
- if (crd->crd_flags & CRD_F_ENCRYPT) {
- exf->encrypt(sw->sw_kschedule,
- idat);
- } else {
- exf->decrypt(sw->sw_kschedule,
- idat);
- }
- } else if (crd->crd_flags & CRD_F_ENCRYPT) {
- /* XOR with previous block/IV */
- for (j = 0; j < blks; j++)
- idat[j] ^= ivp[j];
-
- exf->encrypt(sw->sw_kschedule, idat);
- ivp = idat;
- } else { /* decrypt */
- /*
- * Keep encrypted block to be used
- * in next block's processing.
- */
- if (ivp == iv)
- bcopy(idat, piv, blks);
- else
- bcopy(idat, iv, blks);
-
- exf->decrypt(sw->sw_kschedule, idat);
-
- /* XOR with previous block/IV */
- for (j = 0; j < blks; j++)
- idat[j] ^= ivp[j];
-
- if (ivp == iv)
- bcopy(piv, iv, blks);
- else
- ivp = iv;
- }
-
- idat += blks;
- k += blks;
- i -= blks;
- }
- if (k == iov->iov_len) {
- iov++;
- k = 0;
- }
+ idat += blks;
+ count += blks;
+ k += blks;
+ i -= blks;
}
- return 0; /* Done with iovec encryption/decryption */
- } else { /* contiguous buffer */
- if (exf->reinit) {
- for (i = crd->crd_skip;
- i < crd->crd_skip + crd->crd_len; i += blks) {
- if (crd->crd_flags & CRD_F_ENCRYPT)
- exf->encrypt(sw->sw_kschedule, buf + i);
- else
- exf->decrypt(sw->sw_kschedule, buf + i);
- }
- } else if (crd->crd_flags & CRD_F_ENCRYPT) {
- for (i = crd->crd_skip;
- i < crd->crd_skip + crd->crd_len; i += blks) {
- /* XOR with the IV/previous block, as appropriate. */
- if (i == crd->crd_skip)
- for (k = 0; k < blks; k++)
- buf[i + k] ^= ivp[k];
- else
- for (k = 0; k < blks; k++)
- buf[i + k] ^= buf[i + k - blks];
- exf->encrypt(sw->sw_kschedule, buf + i);
- }
- } else { /* Decrypt */
- /*
- * Start at the end, so we don't need to keep the encrypted
- * block as the IV for the next block.
- */
- for (i = crd->crd_skip + crd->crd_len - blks;
- i >= crd->crd_skip; i -= blks) {
- exf->decrypt(sw->sw_kschedule, buf + i);
-
- /* XOR with the IV/previous block, as appropriate */
- if (i == crd->crd_skip)
- for (k = 0; k < blks; k++)
- buf[i + k] ^= ivp[k];
- else
- for (k = 0; k < blks; k++)
- buf[i + k] ^= buf[i + k - blks];
+ /*
+ * Advance to the next iov if the end of the current iov
+ * is aligned with the end of a cipher block.
+ * Note that the code is equivalent to calling:
+ * ind = cuio_getptr(uio, count, &k);
+ */
+ if (i > 0 && k == uio->uio_iov[ind].iov_len) {
+ k = 0;
+ ind++;
+ if (ind >= uio->uio_iovcnt) {
+ error = EINVAL;
+ goto out;
}
}
-
- return 0; /* Done with contiguous buffer encryption/decryption */
}
- /* Unreachable */
- return EINVAL;
+out:
+ if (iovalloc)
+ free(iov, M_CRYPTO_DATA);
+
+ return (error);
}
static void
@@ -580,6 +451,181 @@ swcr_authcompute(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf,
return 0;
}
+CTASSERT(INT_MAX <= (1ll<<39) - 256); /* GCM: plain text < 2^39-256 */
+CTASSERT(INT_MAX <= (uint64_t)-1); /* GCM: associated data <= 2^64-1 */
+
+/*
+ * Apply a combined encryption-authentication transformation
+ */
+static int
+swcr_authenc(struct cryptop *crp)
+{
+ uint32_t blkbuf[howmany(EALG_MAX_BLOCK_LEN, sizeof(uint32_t))];
+ u_char *blk = (u_char *)blkbuf;
+ u_char aalg[AALG_MAX_RESULT_LEN];
+ u_char uaalg[AALG_MAX_RESULT_LEN];
+ u_char iv[EALG_MAX_BLOCK_LEN];
+ union authctx ctx;
+ struct cryptodesc *crd, *crda = NULL, *crde = NULL;
+ struct swcr_data *sw, *swa, *swe = NULL;
+ struct auth_hash *axf = NULL;
+ struct enc_xform *exf = NULL;
+ caddr_t buf = (caddr_t)crp->crp_buf;
+ uint32_t *blkp;
+ int aadlen, blksz, i, ivlen, len, iskip, oskip, r;
+
+ ivlen = blksz = iskip = oskip = 0;
+
+ for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
+ for (sw = swcr_sessions[crp->crp_sid & 0xffffffff];
+ sw && sw->sw_alg != crd->crd_alg;
+ sw = sw->sw_next)
+ ;
+ if (sw == NULL)
+ return (EINVAL);
+
+ switch (sw->sw_alg) {
+ case CRYPTO_AES_NIST_GCM_16:
+ case CRYPTO_AES_NIST_GMAC:
+ swe = sw;
+ crde = crd;
+ exf = swe->sw_exf;
+ ivlen = 12;
+ break;
+ case CRYPTO_AES_128_NIST_GMAC:
+ case CRYPTO_AES_192_NIST_GMAC:
+ case CRYPTO_AES_256_NIST_GMAC:
+ swa = sw;
+ crda = crd;
+ axf = swa->sw_axf;
+ if (swa->sw_ictx == 0)
+ return (EINVAL);
+ bcopy(swa->sw_ictx, &ctx, axf->ctxsize);
+ blksz = axf->blocksize;
+ break;
+ default:
+ return (EINVAL);
+ }
+ }
+ if (crde == NULL || crda == NULL)
+ return (EINVAL);
+
+ if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16 &&
+ (crde->crd_flags & CRD_F_IV_EXPLICIT) == 0)
+ return (EINVAL);
+
+ if (crde->crd_klen != crda->crd_klen)
+ return (EINVAL);
+
+ /* Initialize the IV */
+ if (crde->crd_flags & CRD_F_ENCRYPT) {
+ /* IV explicitly provided ? */
+ if (crde->crd_flags & CRD_F_IV_EXPLICIT)
+ bcopy(crde->crd_iv, iv, ivlen);
+ else
+ arc4rand(iv, ivlen, 0);
+
+ /* Do we need to write the IV */
+ if (!(crde->crd_flags & CRD_F_IV_PRESENT))
+ crypto_copyback(crp->crp_flags, buf, crde->crd_inject,
+ ivlen, iv);
+
+ } else { /* Decryption */
+ /* IV explicitly provided ? */
+ if (crde->crd_flags & CRD_F_IV_EXPLICIT)
+ bcopy(crde->crd_iv, iv, ivlen);
+ else {
+ /* Get IV off buf */
+ crypto_copydata(crp->crp_flags, buf, crde->crd_inject,
+ ivlen, iv);
+ }
+ }
+
+ /* Supply MAC with IV */
+ if (axf->Reinit)
+ axf->Reinit(&ctx, iv, ivlen);
+
+ /* Supply MAC with AAD */
+ aadlen = crda->crd_len;
+
+ for (i = iskip; i < crda->crd_len; i += blksz) {
+ len = MIN(crda->crd_len - i, blksz - oskip);
+ crypto_copydata(crp->crp_flags, buf, crda->crd_skip + i, len,
+ blk + oskip);
+ bzero(blk + len + oskip, blksz - len - oskip);
+ axf->Update(&ctx, blk, blksz);
+ oskip = 0; /* reset initial output offset */
+ }
+
+ if (exf->reinit)
+ exf->reinit(swe->sw_kschedule, iv);
+
+ /* Do encryption/decryption with MAC */
+ for (i = 0; i < crde->crd_len; i += blksz) {
+ len = MIN(crde->crd_len - i, blksz);
+ if (len < blksz)
+ bzero(blk, blksz);
+ crypto_copydata(crp->crp_flags, buf, crde->crd_skip + i, len,
+ blk);
+ if (crde->crd_flags & CRD_F_ENCRYPT) {
+ exf->encrypt(swe->sw_kschedule, blk);
+ axf->Update(&ctx, blk, len);
+ crypto_copyback(crp->crp_flags, buf,
+ crde->crd_skip + i, len, blk);
+ } else {
+ axf->Update(&ctx, blk, len);
+ }
+ }
+
+ /* Do any required special finalization */
+ switch (crda->crd_alg) {
+ case CRYPTO_AES_128_NIST_GMAC:
+ case CRYPTO_AES_192_NIST_GMAC:
+ case CRYPTO_AES_256_NIST_GMAC:
+ /* length block */
+ bzero(blk, blksz);
+ blkp = (uint32_t *)blk + 1;
+ *blkp = htobe32(aadlen * 8);
+ blkp = (uint32_t *)blk + 3;
+ *blkp = htobe32(crde->crd_len * 8);
+ axf->Update(&ctx, blk, blksz);
+ break;
+ }
+
+ /* Finalize MAC */
+ axf->Final(aalg, &ctx);
+
+ /* Validate tag */
+ if (!(crde->crd_flags & CRD_F_ENCRYPT)) {
+ crypto_copydata(crp->crp_flags, buf, crda->crd_inject,
+ axf->hashsize, uaalg);
+
+ r = timingsafe_bcmp(aalg, uaalg, axf->hashsize);
+ if (r == 0) {
+ /* tag matches, decrypt data */
+ for (i = 0; i < crde->crd_len; i += blksz) {
+ len = MIN(crde->crd_len - i, blksz);
+ if (len < blksz)
+ bzero(blk, blksz);
+ crypto_copydata(crp->crp_flags, buf,
+ crde->crd_skip + i, len, blk);
+ if (!(crde->crd_flags & CRD_F_ENCRYPT)) {
+ exf->decrypt(swe->sw_kschedule, blk);
+ }
+ crypto_copyback(crp->crp_flags, buf,
+ crde->crd_skip + i, len, blk);
+ }
+ } else
+ return (EBADMSG);
+ } else {
+ /* Inject the authentication data */
+ crypto_copyback(crp->crp_flags, buf, crda->crd_inject,
+ axf->hashsize, aalg);
+ }
+
+ return (0);
+}
+
/*
* Apply a compression/decompression algorithm
*/
@@ -667,11 +713,13 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
struct enc_xform *txf;
struct comp_algo *cxf;
u_int32_t i;
+ int len;
int error;
if (sid == NULL || cri == NULL)
return EINVAL;
+ rw_wlock(&swcr_sessions_lock);
if (swcr_sessions) {
for (i = 1; i < swcr_sesnum; i++)
if (swcr_sessions[i] == NULL)
@@ -694,6 +742,7 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
swcr_sesnum = 0;
else
swcr_sesnum /= 2;
+ rw_wunlock(&swcr_sessions_lock);
return ENOBUFS;
}
@@ -707,6 +756,7 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
swcr_sessions = swd;
}
+ rw_downgrade(&swcr_sessions_lock);
swd = &swcr_sessions[i];
*sid = i;
@@ -714,7 +764,8 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
*swd = malloc(sizeof(struct swcr_data),
M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
if (*swd == NULL) {
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return ENOBUFS;
}
@@ -740,6 +791,16 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
case CRYPTO_AES_XTS:
txf = &enc_xform_aes_xts;
goto enccommon;
+ case CRYPTO_AES_ICM:
+ txf = &enc_xform_aes_icm;
+ goto enccommon;
+ case CRYPTO_AES_NIST_GCM_16:
+ txf = &enc_xform_aes_nist_gcm;
+ goto enccommon;
+ case CRYPTO_AES_NIST_GMAC:
+ txf = &enc_xform_aes_nist_gmac;
+ (*swd)->sw_exf = txf;
+ break;
case CRYPTO_CAMELLIA_CBC:
txf = &enc_xform_camellia;
goto enccommon;
@@ -751,7 +812,8 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
error = txf->setkey(&((*swd)->sw_kschedule),
cri->cri_key, cri->cri_klen / 8);
if (error) {
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return error;
}
}
@@ -782,14 +844,16 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
(*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
if ((*swd)->sw_ictx == NULL) {
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return ENOBUFS;
}
(*swd)->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
if ((*swd)->sw_octx == NULL) {
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return ENOBUFS;
}
@@ -812,14 +876,16 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
(*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
if ((*swd)->sw_ictx == NULL) {
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return ENOBUFS;
}
(*swd)->sw_octx = malloc(cri->cri_klen / 8,
M_CRYPTO_DATA, M_NOWAIT);
if ((*swd)->sw_octx == NULL) {
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return ENOBUFS;
}
@@ -843,7 +909,8 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
(*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
M_NOWAIT);
if ((*swd)->sw_ictx == NULL) {
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return ENOBUFS;
}
@@ -852,12 +919,41 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
(*swd)->sw_axf = axf;
break;
#endif
+
+ case CRYPTO_AES_128_NIST_GMAC:
+ axf = &auth_hash_nist_gmac_aes_128;
+ goto auth4common;
+
+ case CRYPTO_AES_192_NIST_GMAC:
+ axf = &auth_hash_nist_gmac_aes_192;
+ goto auth4common;
+
+ case CRYPTO_AES_256_NIST_GMAC:
+ axf = &auth_hash_nist_gmac_aes_256;
+ auth4common:
+ len = cri->cri_klen / 8;
+ if (len != 16 && len != 24 && len != 32)
+ return EINVAL;
+
+ (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ M_NOWAIT);
+ if ((*swd)->sw_ictx == NULL) {
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
+ return ENOBUFS;
+ }
+ axf->Init((*swd)->sw_ictx);
+ axf->Setkey((*swd)->sw_ictx, cri->cri_key, len);
+ (*swd)->sw_axf = axf;
+ break;
+
case CRYPTO_DEFLATE_COMP:
cxf = &comp_algo_deflate;
(*swd)->sw_cxf = cxf;
break;
default:
- swcr_freesession(dev, i);
+ swcr_freesession_locked(dev, i);
+ rw_runlock(&swcr_sessions_lock);
return EINVAL;
}
@@ -865,14 +961,26 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri)
cri = cri->cri_next;
swd = &((*swd)->sw_next);
}
+ rw_runlock(&swcr_sessions_lock);
return 0;
}
+static int
+swcr_freesession(device_t dev, u_int64_t tid)
+{
+ int error;
+
+ rw_rlock(&swcr_sessions_lock);
+ error = swcr_freesession_locked(dev, tid);
+ rw_runlock(&swcr_sessions_lock);
+ return error;
+}
+
/*
* Free a session.
*/
static int
-swcr_freesession(device_t dev, u_int64_t tid)
+swcr_freesession_locked(device_t dev, u_int64_t tid)
{
struct swcr_data *swd;
struct enc_xform *txf;
@@ -899,6 +1007,9 @@ swcr_freesession(device_t dev, u_int64_t tid)
case CRYPTO_SKIPJACK_CBC:
case CRYPTO_RIJNDAEL128_CBC:
case CRYPTO_AES_XTS:
+ case CRYPTO_AES_ICM:
+ case CRYPTO_AES_NIST_GCM_16:
+ case CRYPTO_AES_NIST_GMAC:
case CRYPTO_CAMELLIA_CBC:
case CRYPTO_NULL_CBC:
txf = swd->sw_exf;
@@ -977,11 +1088,15 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
goto done;
}
- lid = crp->crp_sid & 0xffffffff;
- if (lid >= swcr_sesnum || lid == 0 || swcr_sessions[lid] == NULL) {
+ lid = CRYPTO_SESID2LID(crp->crp_sid);
+ rw_rlock(&swcr_sessions_lock);
+ if (swcr_sessions == NULL || lid >= swcr_sesnum || lid == 0 ||
+ swcr_sessions[lid] == NULL) {
+ rw_runlock(&swcr_sessions_lock);
crp->crp_etype = ENOENT;
goto done;
}
+ rw_runlock(&swcr_sessions_lock);
/* Go through crypto descriptors, processing as we go */
for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
@@ -995,10 +1110,17 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
* XXX between the various instances of an algorithm (so we can
* XXX locate the correct crypto context).
*/
+ rw_rlock(&swcr_sessions_lock);
+ if (swcr_sessions == NULL) {
+ rw_runlock(&swcr_sessions_lock);
+ crp->crp_etype = ENOENT;
+ goto done;
+ }
for (sw = swcr_sessions[lid];
sw && sw->sw_alg != crd->crd_alg;
sw = sw->sw_next)
;
+ rw_runlock(&swcr_sessions_lock);
/* No such context ? */
if (sw == NULL) {
@@ -1013,6 +1135,7 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
case CRYPTO_SKIPJACK_CBC:
case CRYPTO_RIJNDAEL128_CBC:
case CRYPTO_AES_XTS:
+ case CRYPTO_AES_ICM:
case CRYPTO_CAMELLIA_CBC:
if ((crp->crp_etype = swcr_encdec(crd, sw,
crp->crp_buf, crp->crp_flags)) != 0)
@@ -1037,6 +1160,14 @@ swcr_process(device_t dev, struct cryptop *crp, int hint)
goto done;
break;
+ case CRYPTO_AES_NIST_GCM_16:
+ case CRYPTO_AES_NIST_GMAC:
+ case CRYPTO_AES_128_NIST_GMAC:
+ case CRYPTO_AES_192_NIST_GMAC:
+ case CRYPTO_AES_256_NIST_GMAC:
+ crp->crp_etype = swcr_authenc(crp);
+ goto done;
+
case CRYPTO_DEFLATE_COMP:
if ((crp->crp_etype = swcr_compdec(crd, sw,
crp->crp_buf, crp->crp_flags)) != 0)
@@ -1076,6 +1207,7 @@ swcr_probe(device_t dev)
static int
swcr_attach(device_t dev)
{
+ rw_init(&swcr_sessions_lock, "swcr_sessions_lock");
memset(hmac_ipad_buffer, HMAC_IPAD_VAL, HMAC_MAX_BLOCK_LEN);
memset(hmac_opad_buffer, HMAC_OPAD_VAL, HMAC_MAX_BLOCK_LEN);
@@ -1106,6 +1238,12 @@ swcr_attach(device_t dev)
REGISTER(CRYPTO_SHA1);
REGISTER(CRYPTO_RIJNDAEL128_CBC);
REGISTER(CRYPTO_AES_XTS);
+ REGISTER(CRYPTO_AES_ICM);
+ REGISTER(CRYPTO_AES_NIST_GCM_16);
+ REGISTER(CRYPTO_AES_NIST_GMAC);
+ REGISTER(CRYPTO_AES_128_NIST_GMAC);
+ REGISTER(CRYPTO_AES_192_NIST_GMAC);
+ REGISTER(CRYPTO_AES_256_NIST_GMAC);
REGISTER(CRYPTO_CAMELLIA_CBC);
REGISTER(CRYPTO_DEFLATE_COMP);
#undef REGISTER
@@ -1117,8 +1255,11 @@ static int
swcr_detach(device_t dev)
{
crypto_unregister_all(swcr_id);
- if (swcr_sessions != NULL)
- free(swcr_sessions, M_CRYPTO_DATA);
+ rw_wlock(&swcr_sessions_lock);
+ free(swcr_sessions, M_CRYPTO_DATA);
+ swcr_sessions = NULL;
+ rw_wunlock(&swcr_sessions_lock);
+ rw_destroy(&swcr_sessions_lock);
return 0;
}
diff --git a/freebsd/sys/opencrypto/deflate.h b/freebsd/sys/opencrypto/deflate.h
index dcf7a844..d31a3bf2 100644
--- a/freebsd/sys/opencrypto/deflate.h
+++ b/freebsd/sys/opencrypto/deflate.h
@@ -36,7 +36,7 @@
#ifndef _CRYPTO_DEFLATE_H_
#define _CRYPTO_DEFLATE_H_
-#include <net/zlib.h>
+#include <sys/zlib.h>
#define Z_METHOD 8
#define Z_MEMLEVEL 8
diff --git a/freebsd/sys/opencrypto/gfmult.c b/freebsd/sys/opencrypto/gfmult.c
new file mode 100644
index 00000000..5ac44087
--- /dev/null
+++ b/freebsd/sys/opencrypto/gfmult.c
@@ -0,0 +1,277 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by John-Mark Gurney under
+ * the sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include "gfmult.h"
+
+#define REV_POLY_REDUCT 0xe1 /* 0x87 bit reversed */
+
+/* reverse the bits of a nibble */
+static const uint8_t nib_rev[] = {
+ 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+ 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
+};
+
+/* calculate v * 2 */
+static inline struct gf128
+gf128_mulalpha(struct gf128 v)
+{
+ uint64_t mask;
+
+ mask = !!(v.v[1] & 1);
+ mask = ~(mask - 1);
+ v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63);
+ v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56);
+
+ return v;
+}
+
+/*
+ * Generate a table for 0-16 * h. Store the results in the table w/ indexes
+ * bit reversed, and the words striped across the values.
+ */
+void
+gf128_genmultable(struct gf128 h, struct gf128table *t)
+{
+ struct gf128 tbl[16];
+ int i;
+
+ tbl[0] = MAKE_GF128(0, 0);
+ tbl[1] = h;
+
+ for (i = 2; i < 16; i += 2) {
+ tbl[i] = gf128_mulalpha(tbl[i / 2]);
+ tbl[i + 1] = gf128_add(tbl[i], h);
+ }
+
+ for (i = 0; i < 16; i++) {
+ t->a[nib_rev[i]] = tbl[i].v[0] >> 32;
+ t->b[nib_rev[i]] = tbl[i].v[0];
+ t->c[nib_rev[i]] = tbl[i].v[1] >> 32;
+ t->d[nib_rev[i]] = tbl[i].v[1];
+ }
+}
+
+/*
+ * Generate tables containing h, h^2, h^3 and h^4, starting at 0.
+ */
+void
+gf128_genmultable4(struct gf128 h, struct gf128table4 *t)
+{
+ struct gf128 h2, h3, h4;
+
+ gf128_genmultable(h, &t->tbls[0]);
+
+ h2 = gf128_mul(h, &t->tbls[0]);
+
+ gf128_genmultable(h2, &t->tbls[1]);
+
+ h3 = gf128_mul(h, &t->tbls[1]);
+ gf128_genmultable(h3, &t->tbls[2]);
+
+ h4 = gf128_mul(h2, &t->tbls[1]);
+ gf128_genmultable(h4, &t->tbls[3]);
+}
+
+/*
+ * Read a row from the table.
+ */
+static inline struct gf128
+readrow(struct gf128table *tbl, unsigned bits)
+{
+ struct gf128 r;
+
+ bits = bits % 16;
+
+ r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits];
+ r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits];
+
+ return r;
+}
+
+/*
+ * These are the reduction values. Since we are dealing with bit reversed
+ * version, the values need to be bit reversed, AND the indexes are also
+ * bit reversed to make lookups quicker.
+ */
+static uint16_t reduction[] = {
+ 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
+ 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
+};
+
+/*
+ * Calculate:
+ * (x*2^4 + word[3,0]*h) *
+ * 2^4 + word[7,4]*h) *
+ * ...
+ * 2^4 + word[63,60]*h
+ */
+static struct gf128
+gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl)
+{
+ struct gf128 row;
+ unsigned bits;
+ unsigned redbits;
+ int i;
+
+ for (i = 0; i < 64; i += 4) {
+ bits = word % 16;
+
+ /* fetch row */
+ row = readrow(tbl, bits);
+
+ /* x * 2^4 */
+ redbits = x.v[1] % 16;
+ x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
+ x.v[0] >>= 4;
+ x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
+
+ word >>= 4;
+
+ x = gf128_add(x, row);
+ }
+
+ return x;
+}
+
+/*
+ * Calculate
+ * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) *
+ * ...
+ * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h
+ *
+ * Passing/returning struct is .5% faster than passing in via pointer on
+ * amd64.
+ */
+static struct gf128
+gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd,
+ struct gf128 x, struct gf128table4 *tbl)
+{
+ struct gf128 rowa, rowb, rowc, rowd;
+ unsigned bitsa, bitsb, bitsc, bitsd;
+ unsigned redbits;
+ int i;
+
+ /*
+ * XXX - nibble reverse words to save a shift? probably not as
+ * nibble reverse would take 20 ops (5 * 4) verse 16
+ */
+
+ for (i = 0; i < 64; i += 4) {
+ bitsa = worda % 16;
+ bitsb = wordb % 16;
+ bitsc = wordc % 16;
+ bitsd = wordd % 16;
+
+ /* fetch row */
+ rowa = readrow(&tbl->tbls[3], bitsa);
+ rowb = readrow(&tbl->tbls[2], bitsb);
+ rowc = readrow(&tbl->tbls[1], bitsc);
+ rowd = readrow(&tbl->tbls[0], bitsd);
+
+ /* x * 2^4 */
+ redbits = x.v[1] % 16;
+ x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
+ x.v[0] >>= 4;
+ x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
+
+ worda >>= 4;
+ wordb >>= 4;
+ wordc >>= 4;
+ wordd >>= 4;
+
+ x = gf128_add(x, gf128_add(rowa, gf128_add(rowb,
+ gf128_add(rowc, rowd))));
+ }
+
+ return x;
+}
+
+struct gf128
+gf128_mul(struct gf128 v, struct gf128table *tbl)
+{
+ struct gf128 ret;
+
+ ret = MAKE_GF128(0, 0);
+
+ ret = gfmultword(v.v[1], ret, tbl);
+ ret = gfmultword(v.v[0], ret, tbl);
+
+ return ret;
+}
+
+/*
+ * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
+ * (((a*h+b)*h+c)*h+d)*h
+ */
+struct gf128
+gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d,
+ struct gf128table4 *tbl)
+{
+ struct gf128 tmp;
+
+ tmp = MAKE_GF128(0, 0);
+
+ tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
+ tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
+
+ return tmp;
+}
+
+/*
+ * a = data[0..15] + r
+ * b = data[16..31]
+ * c = data[32..47]
+ * d = data[48..63]
+ *
+ * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
+ * (((a*h+b)*h+c)*h+d)*h
+ */
+struct gf128
+gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl)
+{
+ struct gf128 a, b, c, d;
+ struct gf128 tmp;
+
+ tmp = MAKE_GF128(0, 0);
+
+ a = gf128_add(r, gf128_read(&v[0*16]));
+ b = gf128_read(&v[1*16]);
+ c = gf128_read(&v[2*16]);
+ d = gf128_read(&v[3*16]);
+
+ tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
+ tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
+
+ return tmp;
+}
diff --git a/freebsd/sys/opencrypto/gfmult.h b/freebsd/sys/opencrypto/gfmult.h
new file mode 100644
index 00000000..c385618a
--- /dev/null
+++ b/freebsd/sys/opencrypto/gfmult.h
@@ -0,0 +1,128 @@
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by John-Mark Gurney under
+ * the sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _GFMULT_H_
+#define _GFMULT_H_
+
+#ifdef __APPLE__
+#define __aligned(x) __attribute__((__aligned__(x)))
+#define be64dec(buf) __builtin_bswap64(*(uint64_t *)buf)
+#define be64enc(buf, x) (*(uint64_t *)buf = __builtin_bswap64(x))
+#else
+#include <sys/endian.h>
+#endif
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h>
+#include <strings.h>
+#endif
+
+#define REQ_ALIGN (16 * 4)
+/*
+ * The rows are striped across cache lines. Note that the indexes
+ * are bit reversed to make accesses quicker.
+ */
+struct gf128table {
+ uint32_t a[16] __aligned(REQ_ALIGN); /* bits 0 - 31 */
+ uint32_t b[16] __aligned(REQ_ALIGN); /* bits 63 - 32 */
+ uint32_t c[16] __aligned(REQ_ALIGN); /* bits 95 - 64 */
+ uint32_t d[16] __aligned(REQ_ALIGN); /* bits 127 - 96 */
+} __aligned(REQ_ALIGN);
+
+/*
+ * A set of tables that contain h, h^2, h^3, h^4. To be used w/ gf128_mul4.
+ */
+struct gf128table4 {
+ struct gf128table tbls[4];
+};
+
+/*
+ * GCM per spec is bit reversed in memory. So byte 0 is really bit reversed
+ * and contains bits 0-7. We can deal w/ this by using right shifts and
+ * related math instead of having to bit reverse everything. This means that
+ * the low bits are in v[0] (bits 0-63) and reverse order, while the high
+ * bits are in v[1] (bits 64-127) and reverse order. The high bit of v[0] is
+ * bit 0, and the low bit of v[1] is bit 127.
+ */
+struct gf128 {
+ uint64_t v[2];
+};
+
+/* Note that we don't bit reverse in MAKE_GF128. */
+#define MAKE_GF128(a, b) ((struct gf128){.v = { (a), (b) } })
+#define GF128_EQ(a, b) ((((a).v[0] ^ (b).v[0]) | \
+ ((a).v[1] ^ (b).v[1])) == 0)
+
+static inline struct gf128
+gf128_read(const uint8_t *buf)
+{
+ struct gf128 r;
+
+ r.v[0] = be64dec(buf);
+ buf += sizeof(uint64_t);
+
+ r.v[1] = be64dec(buf);
+
+ return r;
+}
+
+static inline void
+gf128_write(struct gf128 v, uint8_t *buf)
+{
+ uint64_t tmp;
+
+ be64enc(buf, v.v[0]);
+ buf += sizeof tmp;
+
+ be64enc(buf, v.v[1]);
+}
+
+static inline struct gf128 __pure /* XXX - __pure2 instead */
+gf128_add(struct gf128 a, struct gf128 b)
+{
+ a.v[0] ^= b.v[0];
+ a.v[1] ^= b.v[1];
+
+ return a;
+}
+
+void gf128_genmultable(struct gf128 h, struct gf128table *t);
+void gf128_genmultable4(struct gf128 h, struct gf128table4 *t);
+struct gf128 gf128_mul(struct gf128 v, struct gf128table *tbl);
+struct gf128 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c,
+ struct gf128 d, struct gf128table4 *tbl);
+struct gf128 gf128_mul4b(struct gf128 r, const uint8_t *v,
+ struct gf128table4 *tbl);
+
+#endif /* _GFMULT_H_ */
diff --git a/freebsd/sys/opencrypto/gmac.c b/freebsd/sys/opencrypto/gmac.c
new file mode 100644
index 00000000..ac6344a4
--- /dev/null
+++ b/freebsd/sys/opencrypto/gmac.c
@@ -0,0 +1,121 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by John-Mark Gurney under
+ * the sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <opencrypto/gfmult.h>
+#include <opencrypto/gmac.h>
+
+void
+AES_GMAC_Init(struct aes_gmac_ctx *agc)
+{
+
+ bzero(agc, sizeof *agc);
+}
+
+void
+AES_GMAC_Setkey(struct aes_gmac_ctx *agc, const uint8_t *key, uint16_t klen)
+{
+ const uint8_t zeros[GMAC_BLOCK_LEN] = {};
+ struct gf128 h;
+ uint8_t hbuf[GMAC_BLOCK_LEN];
+
+ agc->rounds = rijndaelKeySetupEnc(agc->keysched, key, klen * 8);
+
+ rijndaelEncrypt(agc->keysched, agc->rounds, zeros, hbuf);
+
+ h = gf128_read(hbuf);
+ gf128_genmultable4(h, &agc->ghashtbl);
+
+ explicit_bzero(&h, sizeof h);
+ explicit_bzero(hbuf, sizeof hbuf);
+}
+
+void
+AES_GMAC_Reinit(struct aes_gmac_ctx *agc, const uint8_t *iv, uint16_t ivlen)
+{
+
+ KASSERT(ivlen <= sizeof agc->counter, ("passed ivlen too large!"));
+ bcopy(iv, agc->counter, ivlen);
+}
+
+int
+AES_GMAC_Update(struct aes_gmac_ctx *agc, const uint8_t *data, uint16_t len)
+{
+ struct gf128 v;
+ uint8_t buf[GMAC_BLOCK_LEN] = {};
+ int i;
+
+ v = agc->hash;
+
+ while (len > 0) {
+ if (len >= 4*GMAC_BLOCK_LEN) {
+ i = 4*GMAC_BLOCK_LEN;
+ v = gf128_mul4b(v, data, &agc->ghashtbl);
+ } else if (len >= GMAC_BLOCK_LEN) {
+ i = GMAC_BLOCK_LEN;
+ v = gf128_add(v, gf128_read(data));
+ v = gf128_mul(v, &agc->ghashtbl.tbls[0]);
+ } else {
+ i = len;
+ bcopy(data, buf, i);
+ v = gf128_add(v, gf128_read(&buf[0]));
+ v = gf128_mul(v, &agc->ghashtbl.tbls[0]);
+ explicit_bzero(buf, sizeof buf);
+ }
+ len -= i;
+ data += i;
+ }
+
+ agc->hash = v;
+ explicit_bzero(&v, sizeof v);
+
+ return (0);
+}
+
+void
+AES_GMAC_Final(uint8_t digest[GMAC_DIGEST_LEN], struct aes_gmac_ctx *agc)
+{
+ uint8_t enccntr[GMAC_BLOCK_LEN];
+ struct gf128 a;
+
+ /* XXX - zero additional bytes? */
+ agc->counter[GMAC_BLOCK_LEN - 1] = 1;
+
+ rijndaelEncrypt(agc->keysched, agc->rounds, agc->counter, enccntr);
+ a = gf128_add(agc->hash, gf128_read(enccntr));
+ gf128_write(a, digest);
+
+ explicit_bzero(enccntr, sizeof enccntr);
+}
diff --git a/freebsd/sys/opencrypto/gmac.h b/freebsd/sys/opencrypto/gmac.h
new file mode 100644
index 00000000..909b78c7
--- /dev/null
+++ b/freebsd/sys/opencrypto/gmac.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by John-Mark Gurney under
+ * the sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _GMAC_H_
+#define _GMAC_H_
+
+#include "gfmult.h"
+#include <crypto/rijndael/rijndael.h>
+
+#define GMAC_BLOCK_LEN 16
+#define GMAC_DIGEST_LEN 16
+
+struct aes_gmac_ctx {
+ struct gf128table4 ghashtbl;
+ struct gf128 hash;
+ uint32_t keysched[4*(RIJNDAEL_MAXNR + 1)];
+ uint8_t counter[GMAC_BLOCK_LEN];
+ int rounds;
+};
+
+void AES_GMAC_Init(struct aes_gmac_ctx *);
+void AES_GMAC_Setkey(struct aes_gmac_ctx *, const uint8_t *, uint16_t);
+void AES_GMAC_Reinit(struct aes_gmac_ctx *, const uint8_t *, uint16_t);
+int AES_GMAC_Update(struct aes_gmac_ctx *, const uint8_t *, uint16_t);
+void AES_GMAC_Final(uint8_t [GMAC_DIGEST_LEN], struct aes_gmac_ctx *);
+
+#endif /* _GMAC_H_ */
diff --git a/freebsd/sys/opencrypto/skipjack.h b/freebsd/sys/opencrypto/skipjack.h
index 3e88418c..80367ea4 100644
--- a/freebsd/sys/opencrypto/skipjack.h
+++ b/freebsd/sys/opencrypto/skipjack.h
@@ -14,6 +14,11 @@
* 29 May 1998
*/
+#ifndef _SKIPJACK_H_
+#define _SKIPJACK_H_
+
extern void skipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key);
extern void skipjack_backwards(u_int8_t *cipher, u_int8_t *plain, u_int8_t **key);
extern void subkey_table_gen(u_int8_t *key, u_int8_t **key_tables);
+
+#endif
diff --git a/freebsd/sys/opencrypto/xform.c b/freebsd/sys/opencrypto/xform.c
index 1227ad21..f0ffb153 100644
--- a/freebsd/sys/opencrypto/xform.c
+++ b/freebsd/sys/opencrypto/xform.c
@@ -3,8 +3,9 @@
/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
/*-
* The authors of this code are John Ioannidis (ji@tla.org),
- * Angelos D. Keromytis (kermit@csd.uch.gr) and
- * Niels Provos (provos@physnet.uni-hamburg.de).
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
*
* This code was written by John Ioannidis for BSD/OS in Athens, Greece,
* in November 1995.
@@ -17,11 +18,21 @@
*
* Additional features in 1999 by Angelos D. Keromytis.
*
+ * AES XTS implementation in 2008 by Damien Miller
+ *
* Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
* Angelos D. Keromytis and Niels Provos.
*
* Copyright (C) 2001, Angelos D. Keromytis.
*
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
* Permission to use, copy, and modify this software with or without fee
* is hereby granted, provided that this entire notice is included in
* all copies of any software which is or includes a copy or
@@ -66,750 +77,39 @@ __FBSDID("$FreeBSD$");
#include <opencrypto/cryptodev.h>
#include <opencrypto/xform.h>
-static int null_setkey(u_int8_t **, u_int8_t *, int);
-static int des1_setkey(u_int8_t **, u_int8_t *, int);
-static int des3_setkey(u_int8_t **, u_int8_t *, int);
-static int blf_setkey(u_int8_t **, u_int8_t *, int);
-static int cast5_setkey(u_int8_t **, u_int8_t *, int);
-static int skipjack_setkey(u_int8_t **, u_int8_t *, int);
-static int rijndael128_setkey(u_int8_t **, u_int8_t *, int);
-static int aes_xts_setkey(u_int8_t **, u_int8_t *, int);
-static int cml_setkey(u_int8_t **, u_int8_t *, int);
-
-static void null_encrypt(caddr_t, u_int8_t *);
-static void des1_encrypt(caddr_t, u_int8_t *);
-static void des3_encrypt(caddr_t, u_int8_t *);
-static void blf_encrypt(caddr_t, u_int8_t *);
-static void cast5_encrypt(caddr_t, u_int8_t *);
-static void skipjack_encrypt(caddr_t, u_int8_t *);
-static void rijndael128_encrypt(caddr_t, u_int8_t *);
-static void aes_xts_encrypt(caddr_t, u_int8_t *);
-static void cml_encrypt(caddr_t, u_int8_t *);
-
-static void null_decrypt(caddr_t, u_int8_t *);
-static void des1_decrypt(caddr_t, u_int8_t *);
-static void des3_decrypt(caddr_t, u_int8_t *);
-static void blf_decrypt(caddr_t, u_int8_t *);
-static void cast5_decrypt(caddr_t, u_int8_t *);
-static void skipjack_decrypt(caddr_t, u_int8_t *);
-static void rijndael128_decrypt(caddr_t, u_int8_t *);
-static void aes_xts_decrypt(caddr_t, u_int8_t *);
-static void cml_decrypt(caddr_t, u_int8_t *);
-
-static void null_zerokey(u_int8_t **);
-static void des1_zerokey(u_int8_t **);
-static void des3_zerokey(u_int8_t **);
-static void blf_zerokey(u_int8_t **);
-static void cast5_zerokey(u_int8_t **);
-static void skipjack_zerokey(u_int8_t **);
-static void rijndael128_zerokey(u_int8_t **);
-static void aes_xts_zerokey(u_int8_t **);
-static void cml_zerokey(u_int8_t **);
-
-static void aes_xts_reinit(caddr_t, u_int8_t *);
-
-static void null_init(void *);
-static int null_update(void *, u_int8_t *, u_int16_t);
-static void null_final(u_int8_t *, void *);
-static int MD5Update_int(void *, u_int8_t *, u_int16_t);
-static void SHA1Init_int(void *);
-static int SHA1Update_int(void *, u_int8_t *, u_int16_t);
-static void SHA1Final_int(u_int8_t *, void *);
-static int RMD160Update_int(void *, u_int8_t *, u_int16_t);
-static int SHA256Update_int(void *, u_int8_t *, u_int16_t);
-static int SHA384Update_int(void *, u_int8_t *, u_int16_t);
-static int SHA512Update_int(void *, u_int8_t *, u_int16_t);
-
-static u_int32_t deflate_compress(u_int8_t *, u_int32_t, u_int8_t **);
-static u_int32_t deflate_decompress(u_int8_t *, u_int32_t, u_int8_t **);
-
MALLOC_DEFINE(M_XDATA, "xform", "xform data buffers");
/* Encryption instances */
-struct enc_xform enc_xform_null = {
- CRYPTO_NULL_CBC, "NULL",
- /* NB: blocksize of 4 is to generate a properly aligned ESP header */
- NULL_BLOCK_LEN, 0, 256, /* 2048 bits, max key */
- null_encrypt,
- null_decrypt,
- null_setkey,
- null_zerokey,
- NULL
-};
-
-struct enc_xform enc_xform_des = {
- CRYPTO_DES_CBC, "DES",
- DES_BLOCK_LEN, 8, 8,
- des1_encrypt,
- des1_decrypt,
- des1_setkey,
- des1_zerokey,
- NULL
-};
-
-struct enc_xform enc_xform_3des = {
- CRYPTO_3DES_CBC, "3DES",
- DES3_BLOCK_LEN, 24, 24,
- des3_encrypt,
- des3_decrypt,
- des3_setkey,
- des3_zerokey,
- NULL
-};
-
-struct enc_xform enc_xform_blf = {
- CRYPTO_BLF_CBC, "Blowfish",
- BLOWFISH_BLOCK_LEN, 5, 56 /* 448 bits, max key */,
- blf_encrypt,
- blf_decrypt,
- blf_setkey,
- blf_zerokey,
- NULL
-};
-
-struct enc_xform enc_xform_cast5 = {
- CRYPTO_CAST_CBC, "CAST-128",
- CAST128_BLOCK_LEN, 5, 16,
- cast5_encrypt,
- cast5_decrypt,
- cast5_setkey,
- cast5_zerokey,
- NULL
-};
-
-struct enc_xform enc_xform_skipjack = {
- CRYPTO_SKIPJACK_CBC, "Skipjack",
- SKIPJACK_BLOCK_LEN, 10, 10,
- skipjack_encrypt,
- skipjack_decrypt,
- skipjack_setkey,
- skipjack_zerokey,
- NULL
-};
-
-struct enc_xform enc_xform_rijndael128 = {
- CRYPTO_RIJNDAEL128_CBC, "Rijndael-128/AES",
- RIJNDAEL128_BLOCK_LEN, 8, 32,
- rijndael128_encrypt,
- rijndael128_decrypt,
- rijndael128_setkey,
- rijndael128_zerokey,
- NULL
-};
-
-struct enc_xform enc_xform_aes_xts = {
- CRYPTO_AES_XTS, "AES-XTS",
- RIJNDAEL128_BLOCK_LEN, 32, 64,
- aes_xts_encrypt,
- aes_xts_decrypt,
- aes_xts_setkey,
- aes_xts_zerokey,
- aes_xts_reinit
-};
-
struct enc_xform enc_xform_arc4 = {
CRYPTO_ARC4, "ARC4",
- 1, 1, 32,
+ ARC4_BLOCK_LEN, ARC4_IV_LEN, ARC4_MIN_KEY, ARC4_MAX_KEY,
+ NULL,
NULL,
NULL,
NULL,
NULL,
- NULL
-};
-
-struct enc_xform enc_xform_camellia = {
- CRYPTO_CAMELLIA_CBC, "Camellia",
- CAMELLIA_BLOCK_LEN, 8, 32,
- cml_encrypt,
- cml_decrypt,
- cml_setkey,
- cml_zerokey,
- NULL
-};
-
-/* Authentication instances */
-struct auth_hash auth_hash_null = {
- CRYPTO_NULL_HMAC, "NULL-HMAC",
- 0, NULL_HASH_LEN, NULL_HMAC_BLOCK_LEN, sizeof(int), /* NB: context isn't used */
- null_init, null_update, null_final
-};
-
-struct auth_hash auth_hash_hmac_md5 = {
- CRYPTO_MD5_HMAC, "HMAC-MD5",
- 16, MD5_HASH_LEN, MD5_HMAC_BLOCK_LEN, sizeof(MD5_CTX),
- (void (*) (void *)) MD5Init, MD5Update_int,
- (void (*) (u_int8_t *, void *)) MD5Final
-};
-
-struct auth_hash auth_hash_hmac_sha1 = {
- CRYPTO_SHA1_HMAC, "HMAC-SHA1",
- 20, SHA1_HASH_LEN, SHA1_HMAC_BLOCK_LEN, sizeof(SHA1_CTX),
- SHA1Init_int, SHA1Update_int, SHA1Final_int
-};
-
-struct auth_hash auth_hash_hmac_ripemd_160 = {
- CRYPTO_RIPEMD160_HMAC, "HMAC-RIPEMD-160",
- 20, RIPEMD160_HASH_LEN, RIPEMD160_HMAC_BLOCK_LEN, sizeof(RMD160_CTX),
- (void (*)(void *)) RMD160Init, RMD160Update_int,
- (void (*)(u_int8_t *, void *)) RMD160Final
-};
-
-struct auth_hash auth_hash_key_md5 = {
- CRYPTO_MD5_KPDK, "Keyed MD5",
- 0, MD5_KPDK_HASH_LEN, 0, sizeof(MD5_CTX),
- (void (*)(void *)) MD5Init, MD5Update_int,
- (void (*)(u_int8_t *, void *)) MD5Final
-};
-
-struct auth_hash auth_hash_key_sha1 = {
- CRYPTO_SHA1_KPDK, "Keyed SHA1",
- 0, SHA1_KPDK_HASH_LEN, 0, sizeof(SHA1_CTX),
- SHA1Init_int, SHA1Update_int, SHA1Final_int
-};
-
-struct auth_hash auth_hash_hmac_sha2_256 = {
- CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256",
- 32, SHA2_256_HASH_LEN, SHA2_256_HMAC_BLOCK_LEN, sizeof(SHA256_CTX),
- (void (*)(void *)) SHA256_Init, SHA256Update_int,
- (void (*)(u_int8_t *, void *)) SHA256_Final
-};
-
-struct auth_hash auth_hash_hmac_sha2_384 = {
- CRYPTO_SHA2_384_HMAC, "HMAC-SHA2-384",
- 48, SHA2_384_HASH_LEN, SHA2_384_HMAC_BLOCK_LEN, sizeof(SHA384_CTX),
- (void (*)(void *)) SHA384_Init, SHA384Update_int,
- (void (*)(u_int8_t *, void *)) SHA384_Final
-};
-
-struct auth_hash auth_hash_hmac_sha2_512 = {
- CRYPTO_SHA2_512_HMAC, "HMAC-SHA2-512",
- 64, SHA2_512_HASH_LEN, SHA2_512_HMAC_BLOCK_LEN, sizeof(SHA512_CTX),
- (void (*)(void *)) SHA512_Init, SHA512Update_int,
- (void (*)(u_int8_t *, void *)) SHA512_Final
-};
-
-/* Compression instance */
-struct comp_algo comp_algo_deflate = {
- CRYPTO_DEFLATE_COMP, "Deflate",
- 90, deflate_compress,
- deflate_decompress
-};
-
-/*
- * Encryption wrapper routines.
- */
-static void
-null_encrypt(caddr_t key, u_int8_t *blk)
-{
-}
-static void
-null_decrypt(caddr_t key, u_int8_t *blk)
-{
-}
-static int
-null_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- *sched = NULL;
- return 0;
-}
-static void
-null_zerokey(u_int8_t **sched)
-{
- *sched = NULL;
-}
-
-static void
-des1_encrypt(caddr_t key, u_int8_t *blk)
-{
- des_cblock *cb = (des_cblock *) blk;
- des_key_schedule *p = (des_key_schedule *) key;
-
- des_ecb_encrypt(cb, cb, p[0], DES_ENCRYPT);
-}
-
-static void
-des1_decrypt(caddr_t key, u_int8_t *blk)
-{
- des_cblock *cb = (des_cblock *) blk;
- des_key_schedule *p = (des_key_schedule *) key;
-
- des_ecb_encrypt(cb, cb, p[0], DES_DECRYPT);
-}
-
-static int
-des1_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- des_key_schedule *p;
- int err;
-
- p = malloc(sizeof (des_key_schedule),
- M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
- if (p != NULL) {
- des_set_key((des_cblock *) key, p[0]);
- err = 0;
- } else
- err = ENOMEM;
- *sched = (u_int8_t *) p;
- return err;
-}
-
-static void
-des1_zerokey(u_int8_t **sched)
-{
- bzero(*sched, sizeof (des_key_schedule));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-static void
-des3_encrypt(caddr_t key, u_int8_t *blk)
-{
- des_cblock *cb = (des_cblock *) blk;
- des_key_schedule *p = (des_key_schedule *) key;
-
- des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_ENCRYPT);
-}
-
-static void
-des3_decrypt(caddr_t key, u_int8_t *blk)
-{
- des_cblock *cb = (des_cblock *) blk;
- des_key_schedule *p = (des_key_schedule *) key;
-
- des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_DECRYPT);
-}
-
-static int
-des3_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- des_key_schedule *p;
- int err;
-
- p = malloc(3*sizeof (des_key_schedule),
- M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
- if (p != NULL) {
- des_set_key((des_cblock *)(key + 0), p[0]);
- des_set_key((des_cblock *)(key + 8), p[1]);
- des_set_key((des_cblock *)(key + 16), p[2]);
- err = 0;
- } else
- err = ENOMEM;
- *sched = (u_int8_t *) p;
- return err;
-}
-
-static void
-des3_zerokey(u_int8_t **sched)
-{
- bzero(*sched, 3*sizeof (des_key_schedule));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-static void
-blf_encrypt(caddr_t key, u_int8_t *blk)
-{
- BF_LONG t[2];
-
- memcpy(t, blk, sizeof (t));
- t[0] = ntohl(t[0]);
- t[1] = ntohl(t[1]);
- /* NB: BF_encrypt expects the block in host order! */
- BF_encrypt(t, (BF_KEY *) key);
- t[0] = htonl(t[0]);
- t[1] = htonl(t[1]);
- memcpy(blk, t, sizeof (t));
-}
-
-static void
-blf_decrypt(caddr_t key, u_int8_t *blk)
-{
- BF_LONG t[2];
-
- memcpy(t, blk, sizeof (t));
- t[0] = ntohl(t[0]);
- t[1] = ntohl(t[1]);
- /* NB: BF_decrypt expects the block in host order! */
- BF_decrypt(t, (BF_KEY *) key);
- t[0] = htonl(t[0]);
- t[1] = htonl(t[1]);
- memcpy(blk, t, sizeof (t));
-}
-
-static int
-blf_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- int err;
-
- *sched = malloc(sizeof(BF_KEY),
- M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
- if (*sched != NULL) {
- BF_set_key((BF_KEY *) *sched, len, key);
- err = 0;
- } else
- err = ENOMEM;
- return err;
-}
-
-static void
-blf_zerokey(u_int8_t **sched)
-{
- bzero(*sched, sizeof(BF_KEY));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-static void
-cast5_encrypt(caddr_t key, u_int8_t *blk)
-{
- cast_encrypt((cast_key *) key, blk, blk);
-}
-
-static void
-cast5_decrypt(caddr_t key, u_int8_t *blk)
-{
- cast_decrypt((cast_key *) key, blk, blk);
-}
-
-static int
-cast5_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- int err;
-
- *sched = malloc(sizeof(cast_key), M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
- if (*sched != NULL) {
- cast_setkey((cast_key *)*sched, key, len);
- err = 0;
- } else
- err = ENOMEM;
- return err;
-}
-
-static void
-cast5_zerokey(u_int8_t **sched)
-{
- bzero(*sched, sizeof(cast_key));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-static void
-skipjack_encrypt(caddr_t key, u_int8_t *blk)
-{
- skipjack_forwards(blk, blk, (u_int8_t **) key);
-}
-
-static void
-skipjack_decrypt(caddr_t key, u_int8_t *blk)
-{
- skipjack_backwards(blk, blk, (u_int8_t **) key);
-}
-
-static int
-skipjack_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- int err;
-
- /* NB: allocate all the memory that's needed at once */
- *sched = malloc(10 * (sizeof(u_int8_t *) + 0x100),
- M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
- if (*sched != NULL) {
- u_int8_t** key_tables = (u_int8_t**) *sched;
- u_int8_t* table = (u_int8_t*) &key_tables[10];
- int k;
-
- for (k = 0; k < 10; k++) {
- key_tables[k] = table;
- table += 0x100;
- }
- subkey_table_gen(key, (u_int8_t **) *sched);
- err = 0;
- } else
- err = ENOMEM;
- return err;
-}
-
-static void
-skipjack_zerokey(u_int8_t **sched)
-{
- bzero(*sched, 10 * (sizeof(u_int8_t *) + 0x100));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-static void
-rijndael128_encrypt(caddr_t key, u_int8_t *blk)
-{
- rijndael_encrypt((rijndael_ctx *) key, (u_char *) blk, (u_char *) blk);
-}
-
-static void
-rijndael128_decrypt(caddr_t key, u_int8_t *blk)
-{
- rijndael_decrypt(((rijndael_ctx *) key), (u_char *) blk,
- (u_char *) blk);
-}
-
-static int
-rijndael128_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- int err;
-
- if (len != 16 && len != 24 && len != 32)
- return (EINVAL);
- *sched = malloc(sizeof(rijndael_ctx), M_CRYPTO_DATA,
- M_NOWAIT|M_ZERO);
- if (*sched != NULL) {
- rijndael_set_key((rijndael_ctx *) *sched, (u_char *) key,
- len * 8);
- err = 0;
- } else
- err = ENOMEM;
- return err;
-}
-
-static void
-rijndael128_zerokey(u_int8_t **sched)
-{
- bzero(*sched, sizeof(rijndael_ctx));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-#define AES_XTS_BLOCKSIZE 16
-#define AES_XTS_IVSIZE 8
-#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
-
-struct aes_xts_ctx {
- rijndael_ctx key1;
- rijndael_ctx key2;
- u_int8_t tweak[AES_XTS_BLOCKSIZE];
};
-void
-aes_xts_reinit(caddr_t key, u_int8_t *iv)
-{
- struct aes_xts_ctx *ctx = (struct aes_xts_ctx *)key;
- u_int64_t blocknum;
- u_int i;
-
- /*
- * Prepare tweak as E_k2(IV). IV is specified as LE representation
- * of a 64-bit block number which we allow to be passed in directly.
- */
- bcopy(iv, &blocknum, AES_XTS_IVSIZE);
- for (i = 0; i < AES_XTS_IVSIZE; i++) {
- ctx->tweak[i] = blocknum & 0xff;
- blocknum >>= 8;
- }
- /* Last 64 bits of IV are always zero */
- bzero(ctx->tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
-
- rijndael_encrypt(&ctx->key2, ctx->tweak, ctx->tweak);
-}
-static void
-aes_xts_crypt(struct aes_xts_ctx *ctx, u_int8_t *data, u_int do_encrypt)
-{
- u_int8_t block[AES_XTS_BLOCKSIZE];
- u_int i, carry_in, carry_out;
+/* Include the encryption algorithms */
+#include "xform_null.c"
+#include "xform_des1.c"
+#include "xform_des3.c"
+#include "xform_blf.c"
+#include "xform_cast5.c"
+#include "xform_skipjack.c"
+#include "xform_rijndael.c"
+#include "xform_aes_icm.c"
+#include "xform_aes_xts.c"
+#include "xform_cml.c"
- for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
- block[i] = data[i] ^ ctx->tweak[i];
-
- if (do_encrypt)
- rijndael_encrypt(&ctx->key1, block, data);
- else
- rijndael_decrypt(&ctx->key1, block, data);
-
- for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
- data[i] ^= ctx->tweak[i];
-
- /* Exponentiate tweak */
- carry_in = 0;
- for (i = 0; i < AES_XTS_BLOCKSIZE; i++) {
- carry_out = ctx->tweak[i] & 0x80;
- ctx->tweak[i] = (ctx->tweak[i] << 1) | (carry_in ? 1 : 0);
- carry_in = carry_out;
- }
- if (carry_in)
- ctx->tweak[0] ^= AES_XTS_ALPHA;
- bzero(block, sizeof(block));
-}
-
-void
-aes_xts_encrypt(caddr_t key, u_int8_t *data)
-{
- aes_xts_crypt((struct aes_xts_ctx *)key, data, 1);
-}
-
-void
-aes_xts_decrypt(caddr_t key, u_int8_t *data)
-{
- aes_xts_crypt((struct aes_xts_ctx *)key, data, 0);
-}
-
-int
-aes_xts_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- struct aes_xts_ctx *ctx;
-
- if (len != 32 && len != 64)
- return EINVAL;
-
- *sched = malloc(sizeof(struct aes_xts_ctx), M_CRYPTO_DATA,
- M_NOWAIT | M_ZERO);
- if (*sched == NULL)
- return ENOMEM;
- ctx = (struct aes_xts_ctx *)*sched;
-
- rijndael_set_key(&ctx->key1, key, len * 4);
- rijndael_set_key(&ctx->key2, key + (len / 2), len * 4);
-
- return 0;
-}
-
-void
-aes_xts_zerokey(u_int8_t **sched)
-{
- bzero(*sched, sizeof(struct aes_xts_ctx));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-static void
-cml_encrypt(caddr_t key, u_int8_t *blk)
-{
- camellia_encrypt((camellia_ctx *) key, (u_char *) blk, (u_char *) blk);
-}
-
-static void
-cml_decrypt(caddr_t key, u_int8_t *blk)
-{
- camellia_decrypt(((camellia_ctx *) key), (u_char *) blk,
- (u_char *) blk);
-}
-
-static int
-cml_setkey(u_int8_t **sched, u_int8_t *key, int len)
-{
- int err;
-
- if (len != 16 && len != 24 && len != 32)
- return (EINVAL);
- *sched = malloc(sizeof(camellia_ctx), M_CRYPTO_DATA,
- M_NOWAIT|M_ZERO);
- if (*sched != NULL) {
- camellia_set_key((camellia_ctx *) *sched, (u_char *) key,
- len * 8);
- err = 0;
- } else
- err = ENOMEM;
- return err;
-}
-
-static void
-cml_zerokey(u_int8_t **sched)
-{
- bzero(*sched, sizeof(camellia_ctx));
- free(*sched, M_CRYPTO_DATA);
- *sched = NULL;
-}
-
-/*
- * And now for auth.
- */
-
-static void
-null_init(void *ctx)
-{
-}
-
-static int
-null_update(void *ctx, u_int8_t *buf, u_int16_t len)
-{
- return 0;
-}
-
-static void
-null_final(u_int8_t *buf, void *ctx)
-{
- if (buf != (u_int8_t *) 0)
- bzero(buf, 12);
-}
-
-static int
-RMD160Update_int(void *ctx, u_int8_t *buf, u_int16_t len)
-{
- RMD160Update(ctx, buf, len);
- return 0;
-}
-
-static int
-MD5Update_int(void *ctx, u_int8_t *buf, u_int16_t len)
-{
- MD5Update(ctx, buf, len);
- return 0;
-}
-
-static void
-SHA1Init_int(void *ctx)
-{
- SHA1Init(ctx);
-}
-
-static int
-SHA1Update_int(void *ctx, u_int8_t *buf, u_int16_t len)
-{
- SHA1Update(ctx, buf, len);
- return 0;
-}
-
-static void
-SHA1Final_int(u_int8_t *blk, void *ctx)
-{
- SHA1Final(blk, ctx);
-}
-
-static int
-SHA256Update_int(void *ctx, u_int8_t *buf, u_int16_t len)
-{
- SHA256_Update(ctx, buf, len);
- return 0;
-}
-
-static int
-SHA384Update_int(void *ctx, u_int8_t *buf, u_int16_t len)
-{
- SHA384_Update(ctx, buf, len);
- return 0;
-}
-
-static int
-SHA512Update_int(void *ctx, u_int8_t *buf, u_int16_t len)
-{
- SHA512_Update(ctx, buf, len);
- return 0;
-}
-
-/*
- * And compression
- */
+/* Include the authentication and hashing algorithms */
+#include "xform_gmac.c"
+#include "xform_md5.c"
+#include "xform_rmd160.c"
+#include "xform_sha1.c"
+#include "xform_sha2.c"
-static u_int32_t
-deflate_compress(data, size, out)
- u_int8_t *data;
- u_int32_t size;
- u_int8_t **out;
-{
- return deflate_global(data, size, 0, out);
-}
+/* Include the compression algorithms */
+#include "xform_deflate.c"
-static u_int32_t
-deflate_decompress(data, size, out)
- u_int8_t *data;
- u_int32_t size;
- u_int8_t **out;
-{
- return deflate_global(data, size, 1, out);
-}
diff --git a/freebsd/sys/opencrypto/xform.h b/freebsd/sys/opencrypto/xform.h
index 8df7b07e..a176fe75 100644
--- a/freebsd/sys/opencrypto/xform.h
+++ b/freebsd/sys/opencrypto/xform.h
@@ -9,6 +9,12 @@
* supported the development of this code.
*
* Copyright (c) 2000 Angelos D. Keromytis
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
*
* Permission to use, copy, and modify this software without fee
* is hereby granted, provided that this entire notice is included in
@@ -27,76 +33,15 @@
#include <sys/md5.h>
#include <crypto/sha1.h>
-#include <crypto/sha2/sha2.h>
+#include <crypto/sha2/sha256.h>
+#include <crypto/sha2/sha384.h>
+#include <crypto/sha2/sha512.h>
#include <opencrypto/rmd160.h>
+#include <opencrypto/gmac.h>
-/* Declarations */
-struct auth_hash {
- int type;
- char *name;
- u_int16_t keysize;
- u_int16_t hashsize;
- u_int16_t blocksize;
- u_int16_t ctxsize;
- void (*Init) (void *);
- int (*Update) (void *, u_int8_t *, u_int16_t);
- void (*Final) (u_int8_t *, void *);
-};
-
-/* XXX use a define common with other hash stuff ! */
-#define AH_ALEN_MAX 64 /* max authenticator hash length */
-
-struct enc_xform {
- int type;
- char *name;
- u_int16_t blocksize;
- u_int16_t minkey, maxkey;
- void (*encrypt) (caddr_t, u_int8_t *);
- void (*decrypt) (caddr_t, u_int8_t *);
- int (*setkey) (u_int8_t **, u_int8_t *, int len);
- void (*zerokey) (u_int8_t **);
- void (*reinit) (caddr_t, u_int8_t *);
-};
-
-struct comp_algo {
- int type;
- char *name;
- size_t minlen;
- u_int32_t (*compress) (u_int8_t *, u_int32_t, u_int8_t **);
- u_int32_t (*decompress) (u_int8_t *, u_int32_t, u_int8_t **);
-};
-
-union authctx {
- MD5_CTX md5ctx;
- SHA1_CTX sha1ctx;
- RMD160_CTX rmd160ctx;
- SHA256_CTX sha256ctx;
- SHA384_CTX sha384ctx;
- SHA512_CTX sha512ctx;
-};
-
-extern struct enc_xform enc_xform_null;
-extern struct enc_xform enc_xform_des;
-extern struct enc_xform enc_xform_3des;
-extern struct enc_xform enc_xform_blf;
-extern struct enc_xform enc_xform_cast5;
-extern struct enc_xform enc_xform_skipjack;
-extern struct enc_xform enc_xform_rijndael128;
-extern struct enc_xform enc_xform_aes_xts;
-extern struct enc_xform enc_xform_arc4;
-extern struct enc_xform enc_xform_camellia;
-
-extern struct auth_hash auth_hash_null;
-extern struct auth_hash auth_hash_key_md5;
-extern struct auth_hash auth_hash_key_sha1;
-extern struct auth_hash auth_hash_hmac_md5;
-extern struct auth_hash auth_hash_hmac_sha1;
-extern struct auth_hash auth_hash_hmac_ripemd_160;
-extern struct auth_hash auth_hash_hmac_sha2_256;
-extern struct auth_hash auth_hash_hmac_sha2_384;
-extern struct auth_hash auth_hash_hmac_sha2_512;
-
-extern struct comp_algo comp_algo_deflate;
+#include <opencrypto/xform_auth.h>
+#include <opencrypto/xform_comp.h>
+#include <opencrypto/xform_enc.h>
#ifdef _KERNEL
#include <sys/malloc.h>
diff --git a/freebsd/sys/opencrypto/xform_aes_icm.c b/freebsd/sys/opencrypto/xform_aes_icm.c
new file mode 100644
index 00000000..94fa377a
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_aes_icm.c
@@ -0,0 +1,154 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/xform_enc.h>
+
+static int aes_icm_setkey(u_int8_t **, u_int8_t *, int);
+static void aes_icm_crypt(caddr_t, u_int8_t *);
+static void aes_icm_zerokey(u_int8_t **);
+static void aes_icm_reinit(caddr_t, u_int8_t *);
+static void aes_gcm_reinit(caddr_t, u_int8_t *);
+
+/* Encryption instances */
+struct enc_xform enc_xform_aes_icm = {
+ CRYPTO_AES_ICM, "AES-ICM",
+ AES_BLOCK_LEN, AES_BLOCK_LEN, AES_MIN_KEY, AES_MAX_KEY,
+ aes_icm_crypt,
+ aes_icm_crypt,
+ aes_icm_setkey,
+ aes_icm_zerokey,
+ aes_icm_reinit,
+};
+
+struct enc_xform enc_xform_aes_nist_gcm = {
+ CRYPTO_AES_NIST_GCM_16, "AES-GCM",
+ AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY,
+ aes_icm_crypt,
+ aes_icm_crypt,
+ aes_icm_setkey,
+ aes_icm_zerokey,
+ aes_gcm_reinit,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+aes_icm_reinit(caddr_t key, u_int8_t *iv)
+{
+ struct aes_icm_ctx *ctx;
+
+ ctx = (struct aes_icm_ctx *)key;
+ bcopy(iv, ctx->ac_block, AESICM_BLOCKSIZE);
+}
+
+static void
+aes_gcm_reinit(caddr_t key, u_int8_t *iv)
+{
+ struct aes_icm_ctx *ctx;
+
+ aes_icm_reinit(key, iv);
+
+ ctx = (struct aes_icm_ctx *)key;
+ /* GCM starts with 2 as counter 1 is used for final xor of tag. */
+ bzero(&ctx->ac_block[AESICM_BLOCKSIZE - 4], 4);
+ ctx->ac_block[AESICM_BLOCKSIZE - 1] = 2;
+}
+
+static void
+aes_icm_crypt(caddr_t key, u_int8_t *data)
+{
+ struct aes_icm_ctx *ctx;
+ u_int8_t keystream[AESICM_BLOCKSIZE];
+ int i;
+
+ ctx = (struct aes_icm_ctx *)key;
+ rijndaelEncrypt(ctx->ac_ek, ctx->ac_nr, ctx->ac_block, keystream);
+ for (i = 0; i < AESICM_BLOCKSIZE; i++)
+ data[i] ^= keystream[i];
+ explicit_bzero(keystream, sizeof(keystream));
+
+ /* increment counter */
+ for (i = AESICM_BLOCKSIZE - 1;
+ i >= 0; i--)
+ if (++ctx->ac_block[i]) /* continue on overflow */
+ break;
+}
+
+static int
+aes_icm_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ struct aes_icm_ctx *ctx;
+
+ if (len != 16 && len != 24 && len != 32)
+ return EINVAL;
+
+ *sched = KMALLOC(sizeof(struct aes_icm_ctx), M_CRYPTO_DATA,
+ M_NOWAIT | M_ZERO);
+ if (*sched == NULL)
+ return ENOMEM;
+
+ ctx = (struct aes_icm_ctx *)*sched;
+ ctx->ac_nr = rijndaelKeySetupEnc(ctx->ac_ek, (u_char *)key, len * 8);
+ return 0;
+}
+
+static void
+aes_icm_zerokey(u_int8_t **sched)
+{
+
+ bzero(*sched, sizeof(struct aes_icm_ctx));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_aes_xts.c b/freebsd/sys/opencrypto/xform_aes_xts.c
new file mode 100644
index 00000000..ab20368e
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_aes_xts.c
@@ -0,0 +1,166 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/xform_enc.h>
+
+static int aes_xts_setkey(u_int8_t **, u_int8_t *, int);
+static void aes_xts_encrypt(caddr_t, u_int8_t *);
+static void aes_xts_decrypt(caddr_t, u_int8_t *);
+static void aes_xts_zerokey(u_int8_t **);
+static void aes_xts_reinit(caddr_t, u_int8_t *);
+
+/* Encryption instances */
+struct enc_xform enc_xform_aes_xts = {
+ CRYPTO_AES_XTS, "AES-XTS",
+ AES_BLOCK_LEN, AES_XTS_IV_LEN, AES_XTS_MIN_KEY, AES_XTS_MAX_KEY,
+ aes_xts_encrypt,
+ aes_xts_decrypt,
+ aes_xts_setkey,
+ aes_xts_zerokey,
+ aes_xts_reinit
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+aes_xts_reinit(caddr_t key, u_int8_t *iv)
+{
+ struct aes_xts_ctx *ctx = (struct aes_xts_ctx *)key;
+ u_int64_t blocknum;
+ u_int i;
+
+ /*
+ * Prepare tweak as E_k2(IV). IV is specified as LE representation
+ * of a 64-bit block number which we allow to be passed in directly.
+ */
+ bcopy(iv, &blocknum, AES_XTS_IVSIZE);
+ for (i = 0; i < AES_XTS_IVSIZE; i++) {
+ ctx->tweak[i] = blocknum & 0xff;
+ blocknum >>= 8;
+ }
+ /* Last 64 bits of IV are always zero */
+ bzero(ctx->tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
+
+ rijndael_encrypt(&ctx->key2, ctx->tweak, ctx->tweak);
+}
+
+static void
+aes_xts_crypt(struct aes_xts_ctx *ctx, u_int8_t *data, u_int do_encrypt)
+{
+ u_int8_t block[AES_XTS_BLOCKSIZE];
+ u_int i, carry_in, carry_out;
+
+ for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
+ block[i] = data[i] ^ ctx->tweak[i];
+
+ if (do_encrypt)
+ rijndael_encrypt(&ctx->key1, block, data);
+ else
+ rijndael_decrypt(&ctx->key1, block, data);
+
+ for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
+ data[i] ^= ctx->tweak[i];
+
+ /* Exponentiate tweak */
+ carry_in = 0;
+ for (i = 0; i < AES_XTS_BLOCKSIZE; i++) {
+ carry_out = ctx->tweak[i] & 0x80;
+ ctx->tweak[i] = (ctx->tweak[i] << 1) | (carry_in ? 1 : 0);
+ carry_in = carry_out;
+ }
+ if (carry_in)
+ ctx->tweak[0] ^= AES_XTS_ALPHA;
+ bzero(block, sizeof(block));
+}
+
+static void
+aes_xts_encrypt(caddr_t key, u_int8_t *data)
+{
+ aes_xts_crypt((struct aes_xts_ctx *)key, data, 1);
+}
+
+static void
+aes_xts_decrypt(caddr_t key, u_int8_t *data)
+{
+ aes_xts_crypt((struct aes_xts_ctx *)key, data, 0);
+}
+
+static int
+aes_xts_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ struct aes_xts_ctx *ctx;
+
+ if (len != 32 && len != 64)
+ return EINVAL;
+
+ *sched = KMALLOC(sizeof(struct aes_xts_ctx), M_CRYPTO_DATA,
+ M_NOWAIT | M_ZERO);
+ if (*sched == NULL)
+ return ENOMEM;
+ ctx = (struct aes_xts_ctx *)*sched;
+
+ rijndael_set_key(&ctx->key1, key, len * 4);
+ rijndael_set_key(&ctx->key2, key + (len / 2), len * 4);
+
+ return 0;
+}
+
+static void
+aes_xts_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, sizeof(struct aes_xts_ctx));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_auth.h b/freebsd/sys/opencrypto/xform_auth.h
new file mode 100644
index 00000000..dac6e811
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_auth.h
@@ -0,0 +1,89 @@
+/* $FreeBSD$ */
+/* $OpenBSD: xform.h,v 1.8 2001/08/28 12:20:43 ben Exp $ */
+
+/*-
+ * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
+ *
+ * This code was written by Angelos D. Keromytis in Athens, Greece, in
+ * February 2000. Network Security Technologies Inc. (NSTI) kindly
+ * supported the development of this code.
+ *
+ * Copyright (c) 2000 Angelos D. Keromytis
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all source code copies of any software which is or includes a copy or
+ * modification of this software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#ifndef _CRYPTO_XFORM_AUTH_H_
+#define _CRYPTO_XFORM_AUTH_H_
+
+#include <sys/malloc.h>
+#include <rtems/bsd/sys/errno.h>
+
+#include <sys/md5.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2/sha256.h>
+#include <crypto/sha2/sha384.h>
+#include <crypto/sha2/sha512.h>
+#include <opencrypto/rmd160.h>
+#include <opencrypto/gmac.h>
+
+#include <opencrypto/cryptodev.h>
+#include <opencrypto/xform_userland.h>
+
+/* XXX use a define common with other hash stuff ! */
+#define AH_ALEN_MAX 64 /* max authenticator hash length */
+
+/* Declarations */
+struct auth_hash {
+ int type;
+ char *name;
+ u_int16_t keysize;
+ u_int16_t hashsize;
+ u_int16_t ctxsize;
+ u_int16_t blocksize;
+ void (*Init) (void *);
+ void (*Setkey) (void *, const u_int8_t *, u_int16_t);
+ void (*Reinit) (void *, const u_int8_t *, u_int16_t);
+ int (*Update) (void *, const u_int8_t *, u_int16_t);
+ void (*Final) (u_int8_t *, void *);
+};
+
+extern struct auth_hash auth_hash_null;
+extern struct auth_hash auth_hash_key_md5;
+extern struct auth_hash auth_hash_key_sha1;
+extern struct auth_hash auth_hash_hmac_md5;
+extern struct auth_hash auth_hash_hmac_sha1;
+extern struct auth_hash auth_hash_hmac_ripemd_160;
+extern struct auth_hash auth_hash_hmac_sha2_256;
+extern struct auth_hash auth_hash_hmac_sha2_384;
+extern struct auth_hash auth_hash_hmac_sha2_512;
+extern struct auth_hash auth_hash_nist_gmac_aes_128;
+extern struct auth_hash auth_hash_nist_gmac_aes_192;
+extern struct auth_hash auth_hash_nist_gmac_aes_256;
+
+union authctx {
+ MD5_CTX md5ctx;
+ SHA1_CTX sha1ctx;
+ RMD160_CTX rmd160ctx;
+ SHA256_CTX sha256ctx;
+ SHA384_CTX sha384ctx;
+ SHA512_CTX sha512ctx;
+ struct aes_gmac_ctx aes_gmac_ctx;
+};
+
+#endif /* _CRYPTO_XFORM_AUTH_H_ */
diff --git a/freebsd/sys/opencrypto/xform_blf.c b/freebsd/sys/opencrypto/xform_blf.c
new file mode 100644
index 00000000..b88185b6
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_blf.c
@@ -0,0 +1,129 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <crypto/blowfish/blowfish.h>
+#include <opencrypto/xform_enc.h>
+
+static int blf_setkey(u_int8_t **, u_int8_t *, int);
+static void blf_encrypt(caddr_t, u_int8_t *);
+static void blf_decrypt(caddr_t, u_int8_t *);
+static void blf_zerokey(u_int8_t **);
+
+/* Encryption instances */
+struct enc_xform enc_xform_blf = {
+ CRYPTO_BLF_CBC, "Blowfish",
+ BLOWFISH_BLOCK_LEN, BLOWFISH_BLOCK_LEN, BLOWFISH_MIN_KEY,
+ BLOWFISH_MAX_KEY,
+ blf_encrypt,
+ blf_decrypt,
+ blf_setkey,
+ blf_zerokey,
+ NULL,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+blf_encrypt(caddr_t key, u_int8_t *blk)
+{
+ BF_LONG t[2];
+
+ memcpy(t, blk, sizeof (t));
+ t[0] = ntohl(t[0]);
+ t[1] = ntohl(t[1]);
+ /* NB: BF_encrypt expects the block in host order! */
+ BF_encrypt(t, (BF_KEY *) key);
+ t[0] = htonl(t[0]);
+ t[1] = htonl(t[1]);
+ memcpy(blk, t, sizeof (t));
+}
+
+static void
+blf_decrypt(caddr_t key, u_int8_t *blk)
+{
+ BF_LONG t[2];
+
+ memcpy(t, blk, sizeof (t));
+ t[0] = ntohl(t[0]);
+ t[1] = ntohl(t[1]);
+ /* NB: BF_decrypt expects the block in host order! */
+ BF_decrypt(t, (BF_KEY *) key);
+ t[0] = htonl(t[0]);
+ t[1] = htonl(t[1]);
+ memcpy(blk, t, sizeof (t));
+}
+
+static int
+blf_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ int err;
+
+ *sched = KMALLOC(sizeof(BF_KEY),
+ M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
+ if (*sched != NULL) {
+ BF_set_key((BF_KEY *) *sched, len, key);
+ err = 0;
+ } else
+ err = ENOMEM;
+ return err;
+}
+
+static void
+blf_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, sizeof(BF_KEY));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_cast5.c b/freebsd/sys/opencrypto/xform_cast5.c
new file mode 100644
index 00000000..6929faa3
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_cast5.c
@@ -0,0 +1,109 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/cast.h>
+#include <opencrypto/xform_enc.h>
+
+static int cast5_setkey(u_int8_t **, u_int8_t *, int);
+static void cast5_encrypt(caddr_t, u_int8_t *);
+static void cast5_decrypt(caddr_t, u_int8_t *);
+static void cast5_zerokey(u_int8_t **);
+
+/* Encryption instances */
+struct enc_xform enc_xform_cast5 = {
+ CRYPTO_CAST_CBC, "CAST-128",
+ CAST128_BLOCK_LEN, CAST128_BLOCK_LEN, CAST_MIN_KEY, CAST_MAX_KEY,
+ cast5_encrypt,
+ cast5_decrypt,
+ cast5_setkey,
+ cast5_zerokey,
+ NULL,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+cast5_encrypt(caddr_t key, u_int8_t *blk)
+{
+ cast_encrypt((cast_key *) key, blk, blk);
+}
+
+static void
+cast5_decrypt(caddr_t key, u_int8_t *blk)
+{
+ cast_decrypt((cast_key *) key, blk, blk);
+}
+
+static int
+cast5_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ int err;
+
+ *sched = KMALLOC(sizeof(cast_key), M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
+ if (*sched != NULL) {
+ cast_setkey((cast_key *)*sched, key, len);
+ err = 0;
+ } else
+ err = ENOMEM;
+ return err;
+}
+
+static void
+cast5_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, sizeof(cast_key));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_cml.c b/freebsd/sys/opencrypto/xform_cml.c
new file mode 100644
index 00000000..c99b8c65
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_cml.c
@@ -0,0 +1,115 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <crypto/camellia/camellia.h>
+#include <opencrypto/xform_enc.h>
+
+static int cml_setkey(u_int8_t **, u_int8_t *, int);
+static void cml_encrypt(caddr_t, u_int8_t *);
+static void cml_decrypt(caddr_t, u_int8_t *);
+static void cml_zerokey(u_int8_t **);
+
+/* Encryption instances */
+struct enc_xform enc_xform_camellia = {
+ CRYPTO_CAMELLIA_CBC, "Camellia",
+ CAMELLIA_BLOCK_LEN, CAMELLIA_BLOCK_LEN, CAMELLIA_MIN_KEY,
+ CAMELLIA_MAX_KEY,
+ cml_encrypt,
+ cml_decrypt,
+ cml_setkey,
+ cml_zerokey,
+ NULL,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+cml_encrypt(caddr_t key, u_int8_t *blk)
+{
+ camellia_encrypt((camellia_ctx *) key, (u_char *) blk, (u_char *) blk);
+}
+
+static void
+cml_decrypt(caddr_t key, u_int8_t *blk)
+{
+ camellia_decrypt(((camellia_ctx *) key), (u_char *) blk,
+ (u_char *) blk);
+}
+
+static int
+cml_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ int err;
+
+ if (len != 16 && len != 24 && len != 32)
+ return (EINVAL);
+ *sched = KMALLOC(sizeof(camellia_ctx), M_CRYPTO_DATA,
+ M_NOWAIT|M_ZERO);
+ if (*sched != NULL) {
+ camellia_set_key((camellia_ctx *) *sched, (u_char *) key,
+ len * 8);
+ err = 0;
+ } else
+ err = ENOMEM;
+ return err;
+}
+
+static void
+cml_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, sizeof(camellia_ctx));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_comp.h b/freebsd/sys/opencrypto/xform_comp.h
new file mode 100644
index 00000000..30bf288c
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_comp.h
@@ -0,0 +1,52 @@
+/* $FreeBSD$ */
+/* $OpenBSD: xform.h,v 1.8 2001/08/28 12:20:43 ben Exp $ */
+
+/*-
+ * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
+ *
+ * This code was written by Angelos D. Keromytis in Athens, Greece, in
+ * February 2000. Network Security Technologies Inc. (NSTI) kindly
+ * supported the development of this code.
+ *
+ * Copyright (c) 2000 Angelos D. Keromytis
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all source code copies of any software which is or includes a copy or
+ * modification of this software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#ifndef _CRYPTO_XFORM_COMP_H_
+#define _CRYPTO_XFORM_COMP_H_
+
+#include <sys/malloc.h>
+#include <rtems/bsd/sys/errno.h>
+
+#include <opencrypto/deflate.h>
+#include <opencrypto/cryptodev.h>
+#include <opencrypto/xform_userland.h>
+
+/* Declarations */
+struct comp_algo {
+ int type;
+ char *name;
+ size_t minlen;
+ u_int32_t (*compress) (u_int8_t *, u_int32_t, u_int8_t **);
+ u_int32_t (*decompress) (u_int8_t *, u_int32_t, u_int8_t **);
+};
+
+extern struct comp_algo comp_algo_deflate;
+
+#endif /* _CRYPTO_XFORM_COMP_H_ */
diff --git a/freebsd/sys/opencrypto/xform_deflate.c b/freebsd/sys/opencrypto/xform_deflate.c
new file mode 100644
index 00000000..a17b87f4
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_deflate.c
@@ -0,0 +1,88 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/deflate.h>
+#include <opencrypto/xform_comp.h>
+
+static u_int32_t deflate_compress(u_int8_t *, u_int32_t, u_int8_t **);
+static u_int32_t deflate_decompress(u_int8_t *, u_int32_t, u_int8_t **);
+
+/* Compression instance */
+struct comp_algo comp_algo_deflate = {
+ CRYPTO_DEFLATE_COMP, "Deflate",
+ 90, deflate_compress,
+ deflate_decompress
+};
+
+/*
+ * And compression
+ */
+
+static u_int32_t
+deflate_compress(data, size, out)
+ u_int8_t *data;
+ u_int32_t size;
+ u_int8_t **out;
+{
+ return deflate_global(data, size, 0, out);
+}
+
+static u_int32_t
+deflate_decompress(data, size, out)
+ u_int8_t *data;
+ u_int32_t size;
+ u_int8_t **out;
+{
+ return deflate_global(data, size, 1, out);
+}
diff --git a/freebsd/sys/opencrypto/xform_des1.c b/freebsd/sys/opencrypto/xform_des1.c
new file mode 100644
index 00000000..91e52e52
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_des1.c
@@ -0,0 +1,118 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <crypto/des/des.h>
+#include <opencrypto/xform_enc.h>
+
+static int des1_setkey(u_int8_t **, u_int8_t *, int);
+static void des1_encrypt(caddr_t, u_int8_t *);
+static void des1_decrypt(caddr_t, u_int8_t *);
+static void des1_zerokey(u_int8_t **);
+
+/* Encryption instances */
+struct enc_xform enc_xform_des = {
+ CRYPTO_DES_CBC, "DES",
+ DES_BLOCK_LEN, DES_BLOCK_LEN, DES_MIN_KEY, DES_MAX_KEY,
+ des1_encrypt,
+ des1_decrypt,
+ des1_setkey,
+ des1_zerokey,
+ NULL,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+des1_encrypt(caddr_t key, u_int8_t *blk)
+{
+ des_cblock *cb = (des_cblock *) blk;
+ des_key_schedule *p = (des_key_schedule *) key;
+
+ des_ecb_encrypt(cb, cb, p[0], DES_ENCRYPT);
+}
+
+static void
+des1_decrypt(caddr_t key, u_int8_t *blk)
+{
+ des_cblock *cb = (des_cblock *) blk;
+ des_key_schedule *p = (des_key_schedule *) key;
+
+ des_ecb_encrypt(cb, cb, p[0], DES_DECRYPT);
+}
+
+static int
+des1_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ des_key_schedule *p;
+ int err;
+
+ p = KMALLOC(sizeof (des_key_schedule),
+ M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
+ if (p != NULL) {
+ des_set_key((des_cblock *) key, p[0]);
+ err = 0;
+ } else
+ err = ENOMEM;
+ *sched = (u_int8_t *) p;
+ return err;
+}
+
+static void
+des1_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, sizeof (des_key_schedule));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_des3.c b/freebsd/sys/opencrypto/xform_des3.c
new file mode 100644
index 00000000..e47de600
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_des3.c
@@ -0,0 +1,121 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <crypto/des/des.h>
+#include <opencrypto/xform_enc.h>
+
+static int des3_setkey(u_int8_t **, u_int8_t *, int);
+static void des3_encrypt(caddr_t, u_int8_t *);
+static void des3_decrypt(caddr_t, u_int8_t *);
+static void des3_zerokey(u_int8_t **);
+
+/* Encryption instances */
+struct enc_xform enc_xform_3des = {
+ CRYPTO_3DES_CBC, "3DES",
+ DES3_BLOCK_LEN, DES3_BLOCK_LEN, TRIPLE_DES_MIN_KEY,
+ TRIPLE_DES_MAX_KEY,
+ des3_encrypt,
+ des3_decrypt,
+ des3_setkey,
+ des3_zerokey,
+ NULL,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+des3_encrypt(caddr_t key, u_int8_t *blk)
+{
+ des_cblock *cb = (des_cblock *) blk;
+ des_key_schedule *p = (des_key_schedule *) key;
+
+ des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_ENCRYPT);
+}
+
+static void
+des3_decrypt(caddr_t key, u_int8_t *blk)
+{
+ des_cblock *cb = (des_cblock *) blk;
+ des_key_schedule *p = (des_key_schedule *) key;
+
+ des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_DECRYPT);
+}
+
+static int
+des3_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ des_key_schedule *p;
+ int err;
+
+ p = KMALLOC(3*sizeof (des_key_schedule),
+ M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
+ if (p != NULL) {
+ des_set_key((des_cblock *)(key + 0), p[0]);
+ des_set_key((des_cblock *)(key + 8), p[1]);
+ des_set_key((des_cblock *)(key + 16), p[2]);
+ err = 0;
+ } else
+ err = ENOMEM;
+ *sched = (u_int8_t *) p;
+ return err;
+}
+
+static void
+des3_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, 3*sizeof (des_key_schedule));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_enc.h b/freebsd/sys/opencrypto/xform_enc.h
new file mode 100644
index 00000000..aa99b1e1
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_enc.h
@@ -0,0 +1,92 @@
+/* $FreeBSD$ */
+/* $OpenBSD: xform.h,v 1.8 2001/08/28 12:20:43 ben Exp $ */
+
+/*-
+ * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
+ *
+ * This code was written by Angelos D. Keromytis in Athens, Greece, in
+ * February 2000. Network Security Technologies Inc. (NSTI) kindly
+ * supported the development of this code.
+ *
+ * Copyright (c) 2000 Angelos D. Keromytis
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all source code copies of any software which is or includes a copy or
+ * modification of this software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#ifndef _CRYPTO_XFORM_ENC_H_
+#define _CRYPTO_XFORM_ENC_H_
+
+#include <sys/malloc.h>
+#include <rtems/bsd/sys/errno.h>
+#include <crypto/blowfish/blowfish.h>
+#include <crypto/des/des.h>
+#include <crypto/rijndael/rijndael.h>
+#include <crypto/camellia/camellia.h>
+#include <opencrypto/cast.h>
+#include <opencrypto/skipjack.h>
+#include <opencrypto/cryptodev.h>
+#include <opencrypto/xform_userland.h>
+
+#define AESICM_BLOCKSIZE AES_BLOCK_LEN
+#define AES_XTS_BLOCKSIZE 16
+#define AES_XTS_IVSIZE 8
+#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */
+
+/* Declarations */
+struct enc_xform {
+ int type;
+ char *name;
+ u_int16_t blocksize;
+ u_int16_t ivsize;
+ u_int16_t minkey, maxkey;
+ void (*encrypt) (caddr_t, u_int8_t *);
+ void (*decrypt) (caddr_t, u_int8_t *);
+ int (*setkey) (u_int8_t **, u_int8_t *, int len);
+ void (*zerokey) (u_int8_t **);
+ void (*reinit) (caddr_t, u_int8_t *);
+};
+
+
+extern struct enc_xform enc_xform_null;
+extern struct enc_xform enc_xform_des;
+extern struct enc_xform enc_xform_3des;
+extern struct enc_xform enc_xform_blf;
+extern struct enc_xform enc_xform_cast5;
+extern struct enc_xform enc_xform_skipjack;
+extern struct enc_xform enc_xform_rijndael128;
+extern struct enc_xform enc_xform_aes_icm;
+extern struct enc_xform enc_xform_aes_nist_gcm;
+extern struct enc_xform enc_xform_aes_nist_gmac;
+extern struct enc_xform enc_xform_aes_xts;
+extern struct enc_xform enc_xform_arc4;
+extern struct enc_xform enc_xform_camellia;
+
+struct aes_icm_ctx {
+ u_int32_t ac_ek[4*(RIJNDAEL_MAXNR + 1)];
+ /* ac_block is initalized to IV */
+ u_int8_t ac_block[AESICM_BLOCKSIZE];
+ int ac_nr;
+};
+
+struct aes_xts_ctx {
+ rijndael_ctx key1;
+ rijndael_ctx key2;
+ u_int8_t tweak[AES_XTS_BLOCKSIZE];
+};
+
+#endif /* _CRYPTO_XFORM_ENC_H_ */
diff --git a/freebsd/sys/opencrypto/xform_gmac.c b/freebsd/sys/opencrypto/xform_gmac.c
new file mode 100644
index 00000000..be846fbf
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_gmac.c
@@ -0,0 +1,101 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/gmac.h>
+#include <opencrypto/xform_auth.h>
+
+/* Encryption instances */
+struct enc_xform enc_xform_aes_nist_gmac = {
+ CRYPTO_AES_NIST_GMAC, "AES-GMAC",
+ AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+};
+
+/* Authentication instances */
+struct auth_hash auth_hash_nist_gmac_aes_128 = {
+ CRYPTO_AES_128_NIST_GMAC, "GMAC-AES-128",
+ AES_128_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx),
+ GMAC_BLOCK_LEN,
+ (void (*)(void *)) AES_GMAC_Init,
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey,
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit,
+ (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update,
+ (void (*)(u_int8_t *, void *)) AES_GMAC_Final
+};
+
+struct auth_hash auth_hash_nist_gmac_aes_192 = {
+ CRYPTO_AES_192_NIST_GMAC, "GMAC-AES-192",
+ AES_192_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx),
+ GMAC_BLOCK_LEN,
+ (void (*)(void *)) AES_GMAC_Init,
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey,
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit,
+ (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update,
+ (void (*)(u_int8_t *, void *)) AES_GMAC_Final
+};
+
+struct auth_hash auth_hash_nist_gmac_aes_256 = {
+ CRYPTO_AES_256_NIST_GMAC, "GMAC-AES-256",
+ AES_256_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx),
+ GMAC_BLOCK_LEN,
+ (void (*)(void *)) AES_GMAC_Init,
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey,
+ (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit,
+ (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update,
+ (void (*)(u_int8_t *, void *)) AES_GMAC_Final
+};
diff --git a/freebsd/sys/opencrypto/xform_md5.c b/freebsd/sys/opencrypto/xform_md5.c
new file mode 100644
index 00000000..fd69a1c7
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_md5.c
@@ -0,0 +1,83 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/md5.h>
+#include <opencrypto/xform_auth.h>
+
+static int MD5Update_int(void *, const u_int8_t *, u_int16_t);
+
+/* Authentication instances */
+struct auth_hash auth_hash_hmac_md5 = {
+ CRYPTO_MD5_HMAC, "HMAC-MD5",
+ MD5_HMAC_KEY_LEN, MD5_HASH_LEN, sizeof(MD5_CTX), MD5_HMAC_BLOCK_LEN,
+ (void (*) (void *)) MD5Init, NULL, NULL, MD5Update_int,
+ (void (*) (u_int8_t *, void *)) MD5Final
+};
+
+struct auth_hash auth_hash_key_md5 = {
+ CRYPTO_MD5_KPDK, "Keyed MD5",
+ NULL_HMAC_KEY_LEN, MD5_KPDK_HASH_LEN, sizeof(MD5_CTX), 0,
+ (void (*)(void *)) MD5Init, NULL, NULL, MD5Update_int,
+ (void (*)(u_int8_t *, void *)) MD5Final
+};
+
+/*
+ * And now for auth.
+ */
+static int
+MD5Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ MD5Update(ctx, buf, len);
+ return 0;
+}
diff --git a/freebsd/sys/opencrypto/xform_null.c b/freebsd/sys/opencrypto/xform_null.c
new file mode 100644
index 00000000..6dcf3ffd
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_null.c
@@ -0,0 +1,138 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/xform_auth.h>
+#include <opencrypto/xform_enc.h>
+
+static int null_setkey(u_int8_t **, u_int8_t *, int);
+static void null_encrypt(caddr_t, u_int8_t *);
+static void null_decrypt(caddr_t, u_int8_t *);
+static void null_zerokey(u_int8_t **);
+
+static void null_init(void *);
+static void null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len);
+static int null_update(void *, const u_int8_t *, u_int16_t);
+static void null_final(u_int8_t *, void *);
+
+/* Encryption instances */
+struct enc_xform enc_xform_null = {
+ CRYPTO_NULL_CBC, "NULL",
+ /* NB: blocksize of 4 is to generate a properly aligned ESP header */
+ NULL_BLOCK_LEN, 0, NULL_MIN_KEY, NULL_MAX_KEY,
+ null_encrypt,
+ null_decrypt,
+ null_setkey,
+ null_zerokey,
+ NULL,
+};
+
+/* Authentication instances */
+struct auth_hash auth_hash_null = { /* NB: context isn't used */
+ CRYPTO_NULL_HMAC, "NULL-HMAC",
+ NULL_HMAC_KEY_LEN, NULL_HASH_LEN, sizeof(int), NULL_HMAC_BLOCK_LEN,
+ null_init, null_reinit, null_reinit, null_update, null_final
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+null_encrypt(caddr_t key, u_int8_t *blk)
+{
+}
+
+static void
+null_decrypt(caddr_t key, u_int8_t *blk)
+{
+}
+
+static int
+null_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ *sched = NULL;
+ return 0;
+}
+
+static void
+null_zerokey(u_int8_t **sched)
+{
+ *sched = NULL;
+}
+
+/*
+ * And now for auth.
+ */
+
+static void
+null_init(void *ctx)
+{
+}
+
+static void
+null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+}
+
+static int
+null_update(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ return 0;
+}
+
+static void
+null_final(u_int8_t *buf, void *ctx)
+{
+ if (buf != (u_int8_t *) 0)
+ bzero(buf, 12);
+}
diff --git a/freebsd/sys/opencrypto/xform_rijndael.c b/freebsd/sys/opencrypto/xform_rijndael.c
new file mode 100644
index 00000000..9dcb4d3c
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_rijndael.c
@@ -0,0 +1,115 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <crypto/rijndael/rijndael.h>
+#include <opencrypto/xform_enc.h>
+
+static int rijndael128_setkey(u_int8_t **, u_int8_t *, int);
+static void rijndael128_encrypt(caddr_t, u_int8_t *);
+static void rijndael128_decrypt(caddr_t, u_int8_t *);
+static void rijndael128_zerokey(u_int8_t **);
+
+/* Encryption instances */
+struct enc_xform enc_xform_rijndael128 = {
+ CRYPTO_RIJNDAEL128_CBC, "Rijndael-128/AES",
+ RIJNDAEL128_BLOCK_LEN, RIJNDAEL128_BLOCK_LEN, RIJNDAEL_MIN_KEY,
+ RIJNDAEL_MAX_KEY,
+ rijndael128_encrypt,
+ rijndael128_decrypt,
+ rijndael128_setkey,
+ rijndael128_zerokey,
+ NULL,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+rijndael128_encrypt(caddr_t key, u_int8_t *blk)
+{
+ rijndael_encrypt((rijndael_ctx *) key, (u_char *) blk, (u_char *) blk);
+}
+
+static void
+rijndael128_decrypt(caddr_t key, u_int8_t *blk)
+{
+ rijndael_decrypt(((rijndael_ctx *) key), (u_char *) blk,
+ (u_char *) blk);
+}
+
+static int
+rijndael128_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ int err;
+
+ if (len != 16 && len != 24 && len != 32)
+ return (EINVAL);
+ *sched = KMALLOC(sizeof(rijndael_ctx), M_CRYPTO_DATA,
+ M_NOWAIT|M_ZERO);
+ if (*sched != NULL) {
+ rijndael_set_key((rijndael_ctx *) *sched, (u_char *) key,
+ len * 8);
+ err = 0;
+ } else
+ err = ENOMEM;
+ return err;
+}
+
+static void
+rijndael128_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, sizeof(rijndael_ctx));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_rmd160.c b/freebsd/sys/opencrypto/xform_rmd160.c
new file mode 100644
index 00000000..016bd627
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_rmd160.c
@@ -0,0 +1,77 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/rmd160.h>
+#include <opencrypto/xform_auth.h>
+
+static int RMD160Update_int(void *, const u_int8_t *, u_int16_t);
+
+/* Authentication instances */
+struct auth_hash auth_hash_hmac_ripemd_160 = {
+ CRYPTO_RIPEMD160_HMAC, "HMAC-RIPEMD-160",
+ RIPEMD160_HMAC_KEY_LEN, RIPEMD160_HASH_LEN, sizeof(RMD160_CTX),
+ RIPEMD160_HMAC_BLOCK_LEN,
+ (void (*)(void *)) RMD160Init, NULL, NULL, RMD160Update_int,
+ (void (*)(u_int8_t *, void *)) RMD160Final
+};
+
+/*
+ * And now for auth.
+ */
+static int
+RMD160Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ RMD160Update(ctx, buf, len);
+ return 0;
+}
diff --git a/freebsd/sys/opencrypto/xform_sha1.c b/freebsd/sys/opencrypto/xform_sha1.c
new file mode 100644
index 00000000..76489048
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_sha1.c
@@ -0,0 +1,95 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <crypto/sha1.h>
+#include <opencrypto/xform_auth.h>
+
+static void SHA1Init_int(void *);
+static int SHA1Update_int(void *, const u_int8_t *, u_int16_t);
+static void SHA1Final_int(u_int8_t *, void *);
+
+/* Authentication instances */
+struct auth_hash auth_hash_hmac_sha1 = {
+ CRYPTO_SHA1_HMAC, "HMAC-SHA1",
+ SHA1_HMAC_KEY_LEN, SHA1_HASH_LEN, sizeof(SHA1_CTX), SHA1_HMAC_BLOCK_LEN,
+ SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int
+};
+
+struct auth_hash auth_hash_key_sha1 = {
+ CRYPTO_SHA1_KPDK, "Keyed SHA1",
+ NULL_HMAC_KEY_LEN, SHA1_KPDK_HASH_LEN, sizeof(SHA1_CTX), 0,
+ SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int
+};
+
+/*
+ * And now for auth.
+ */
+static void
+SHA1Init_int(void *ctx)
+{
+ SHA1Init(ctx);
+}
+
+static int
+SHA1Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ SHA1Update(ctx, buf, len);
+ return 0;
+}
+
+static void
+SHA1Final_int(u_int8_t *blk, void *ctx)
+{
+ SHA1Final(blk, ctx);
+}
diff --git a/freebsd/sys/opencrypto/xform_sha2.c b/freebsd/sys/opencrypto/xform_sha2.c
new file mode 100644
index 00000000..f3f895d0
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_sha2.c
@@ -0,0 +1,111 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <crypto/sha2/sha256.h>
+#include <crypto/sha2/sha384.h>
+#include <crypto/sha2/sha512.h>
+#include <opencrypto/xform_auth.h>
+
+static int SHA256Update_int(void *, const u_int8_t *, u_int16_t);
+static int SHA384Update_int(void *, const u_int8_t *, u_int16_t);
+static int SHA512Update_int(void *, const u_int8_t *, u_int16_t);
+
+/* Authentication instances */
+struct auth_hash auth_hash_hmac_sha2_256 = {
+ CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256",
+ SHA2_256_HMAC_KEY_LEN, SHA2_256_HASH_LEN, sizeof(SHA256_CTX),
+ SHA2_256_HMAC_BLOCK_LEN,
+ (void (*)(void *)) SHA256_Init, NULL, NULL, SHA256Update_int,
+ (void (*)(u_int8_t *, void *)) SHA256_Final
+};
+
+struct auth_hash auth_hash_hmac_sha2_384 = {
+ CRYPTO_SHA2_384_HMAC, "HMAC-SHA2-384",
+ SHA2_384_HMAC_KEY_LEN, SHA2_384_HASH_LEN, sizeof(SHA384_CTX),
+ SHA2_384_HMAC_BLOCK_LEN,
+ (void (*)(void *)) SHA384_Init, NULL, NULL, SHA384Update_int,
+ (void (*)(u_int8_t *, void *)) SHA384_Final
+};
+
+struct auth_hash auth_hash_hmac_sha2_512 = {
+ CRYPTO_SHA2_512_HMAC, "HMAC-SHA2-512",
+ SHA2_512_HMAC_KEY_LEN, SHA2_512_HASH_LEN, sizeof(SHA512_CTX),
+ SHA2_512_HMAC_BLOCK_LEN,
+ (void (*)(void *)) SHA512_Init, NULL, NULL, SHA512Update_int,
+ (void (*)(u_int8_t *, void *)) SHA512_Final
+};
+
+/*
+ * And now for auth.
+ */
+static int
+SHA256Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ SHA256_Update(ctx, buf, len);
+ return 0;
+}
+
+static int
+SHA384Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ SHA384_Update(ctx, buf, len);
+ return 0;
+}
+
+static int
+SHA512Update_int(void *ctx, const u_int8_t *buf, u_int16_t len)
+{
+ SHA512_Update(ctx, buf, len);
+ return 0;
+}
diff --git a/freebsd/sys/opencrypto/xform_skipjack.c b/freebsd/sys/opencrypto/xform_skipjack.c
new file mode 100644
index 00000000..066381c9
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_skipjack.c
@@ -0,0 +1,119 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */
+/*-
+ * The authors of this code are John Ioannidis (ji@tla.org),
+ * Angelos D. Keromytis (kermit@csd.uch.gr),
+ * Niels Provos (provos@physnet.uni-hamburg.de) and
+ * Damien Miller (djm@mindrot.org).
+ *
+ * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
+ * in November 1995.
+ *
+ * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
+ * by Angelos D. Keromytis.
+ *
+ * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
+ * and Niels Provos.
+ *
+ * Additional features in 1999 by Angelos D. Keromytis.
+ *
+ * AES XTS implementation in 2008 by Damien Miller
+ *
+ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
+ * Angelos D. Keromytis and Niels Provos.
+ *
+ * Copyright (C) 2001, Angelos D. Keromytis.
+ *
+ * Copyright (C) 2008, Damien Miller
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by John-Mark Gurney
+ * under sponsorship of the FreeBSD Foundation and
+ * Rubicon Communications, LLC (Netgate).
+ *
+ * Permission to use, copy, and modify this software with or without fee
+ * is hereby granted, provided that this entire notice is included in
+ * all copies of any software which is or includes a copy or
+ * modification of this software.
+ * You may use this code under the GNU public license if you so wish. Please
+ * contribute changes back to the authors under this freer than GPL license
+ * so that we may further the use of strong encryption without limitations to
+ * all.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+ * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+ * PURPOSE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <opencrypto/skipjack.h>
+#include <opencrypto/xform_enc.h>
+
+static int skipjack_setkey(u_int8_t **, u_int8_t *, int);
+static void skipjack_encrypt(caddr_t, u_int8_t *);
+static void skipjack_decrypt(caddr_t, u_int8_t *);
+static void skipjack_zerokey(u_int8_t **);
+
+/* Encryption instances */
+struct enc_xform enc_xform_skipjack = {
+ CRYPTO_SKIPJACK_CBC, "Skipjack",
+ SKIPJACK_BLOCK_LEN, SKIPJACK_BLOCK_LEN, SKIPJACK_MIN_KEY,
+ SKIPJACK_MAX_KEY,
+ skipjack_encrypt,
+ skipjack_decrypt, skipjack_setkey,
+ skipjack_zerokey,
+ NULL,
+};
+
+/*
+ * Encryption wrapper routines.
+ */
+static void
+skipjack_encrypt(caddr_t key, u_int8_t *blk)
+{
+ skipjack_forwards(blk, blk, (u_int8_t **) key);
+}
+
+static void
+skipjack_decrypt(caddr_t key, u_int8_t *blk)
+{
+ skipjack_backwards(blk, blk, (u_int8_t **) key);
+}
+
+static int
+skipjack_setkey(u_int8_t **sched, u_int8_t *key, int len)
+{
+ int err;
+
+ /* NB: allocate all the memory that's needed at once */
+ *sched = KMALLOC(10 * (sizeof(u_int8_t *) + 0x100),
+ M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
+ if (*sched != NULL) {
+ u_int8_t** key_tables = (u_int8_t**) *sched;
+ u_int8_t* table = (u_int8_t*) &key_tables[10];
+ int k;
+
+ for (k = 0; k < 10; k++) {
+ key_tables[k] = table;
+ table += 0x100;
+ }
+ subkey_table_gen(key, (u_int8_t **) *sched);
+ err = 0;
+ } else
+ err = ENOMEM;
+ return err;
+}
+
+static void
+skipjack_zerokey(u_int8_t **sched)
+{
+ bzero(*sched, 10 * (sizeof(u_int8_t *) + 0x100));
+ KFREE(*sched, M_CRYPTO_DATA);
+ *sched = NULL;
+}
diff --git a/freebsd/sys/opencrypto/xform_userland.h b/freebsd/sys/opencrypto/xform_userland.h
new file mode 100644
index 00000000..04266dc8
--- /dev/null
+++ b/freebsd/sys/opencrypto/xform_userland.h
@@ -0,0 +1,48 @@
+/*-
+ * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _CRYPTO_XFORM_USERLAND_H_
+#define _CRYPTO_XFORM_USERLAND_H_
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#define KMALLOC(size, type, flags) malloc(size, type, flags)
+#define KFREE(ptr, type) free(ptr, type)
+#else /* not _KERNEL */
+#ifdef _STAND
+#include <stand.h>
+#else /* !_STAND */
+#include <stdlib.h>
+#include <string.h>
+#endif /* _STAND */
+#define KMALLOC(size, type, flags) malloc(size)
+#define KFREE(ptr, type) free(ptr)
+#endif /* _KERNEL */
+
+
+#endif /* _CRYPTO_XFORM_USERLAND_H_ */
diff --git a/freebsd/sys/powerpc/include/machine/cpufunc.h b/freebsd/sys/powerpc/include/machine/cpufunc.h
index 4c2c7b43..c949d53d 100644
--- a/freebsd/sys/powerpc/include/machine/cpufunc.h
+++ b/freebsd/sys/powerpc/include/machine/cpufunc.h
@@ -29,16 +29,6 @@
#ifndef _MACHINE_CPUFUNC_H_
#define _MACHINE_CPUFUNC_H_
-/*
- * Required for user-space atomic.h includes
- */
-static __inline void
-powerpc_mb(void)
-{
-
- __asm __volatile("eieio; sync" : : : "memory");
-}
-
#ifdef _KERNEL
#include <sys/types.h>
diff --git a/freebsd/sys/powerpc/include/machine/in_cksum.h b/freebsd/sys/powerpc/include/machine/in_cksum.h
index 4fe1b402..37bfb200 100644
--- a/freebsd/sys/powerpc/include/machine/in_cksum.h
+++ b/freebsd/sys/powerpc/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/powerpc/include/machine/psl.h b/freebsd/sys/powerpc/include/machine/psl.h
index 92bfa6ca..f0a0fa4c 100644
--- a/freebsd/sys/powerpc/include/machine/psl.h
+++ b/freebsd/sys/powerpc/include/machine/psl.h
@@ -35,74 +35,45 @@
#ifndef _MACHINE_PSL_H_
#define _MACHINE_PSL_H_
-#if defined(E500)
/*
- * Machine State Register (MSR) - e500 core
- *
- * The PowerPC e500 does not implement the following bits:
- *
- * FP, FE0, FE1 - reserved, always cleared, setting has no effect.
- *
+ * Machine State Register (MSR) - All cores
*/
+#define PSL_VEC 0x02000000UL /* AltiVec/SPE vector unit available */
+#define PSL_VSX 0x00800000UL /* Vector-Scalar unit available */
+#define PSL_EE 0x00008000UL /* external interrupt enable */
+#define PSL_PR 0x00004000UL /* privilege mode (1 == user) */
+#define PSL_FP 0x00002000UL /* floating point enable */
+#define PSL_ME 0x00001000UL /* machine check enable */
+#define PSL_FE0 0x00000800UL /* floating point interrupt mode 0 */
+#define PSL_BE 0x00000200UL /* branch trace enable */
+#define PSL_FE1 0x00000100UL /* floating point interrupt mode 1 */
+#define PSL_PMM 0x00000004UL /* performance monitor mark */
+
+/* Machine State Register - Book-E cores */
#define PSL_UCLE 0x04000000UL /* User mode cache lock enable */
-#define PSL_SPE 0x02000000UL /* SPE enable */
#define PSL_WE 0x00040000UL /* Wait state enable */
#define PSL_CE 0x00020000UL /* Critical interrupt enable */
-#define PSL_EE 0x00008000UL /* External interrupt enable */
-#define PSL_PR 0x00004000UL /* User mode */
-#define PSL_FP 0x00002000UL /* Floating point available */
-#define PSL_ME 0x00001000UL /* Machine check interrupt enable */
-#define PSL_FE0 0x00000800UL /* Floating point exception mode 0 */
-#define PSL_UBLE 0x00000400UL /* BTB lock enable */
+#define PSL_UBLE 0x00000400UL /* BTB lock enable - e500 only */
+#define PSL_DWE 0x00000400UL /* Debug Wait Enable - 440 only*/
#define PSL_DE 0x00000200UL /* Debug interrupt enable */
-#define PSL_FE1 0x00000100UL /* Floating point exception mode 1 */
#define PSL_IS 0x00000020UL /* Instruction address space */
#define PSL_DS 0x00000010UL /* Data address space */
-#define PSL_PMM 0x00000004UL /* Performance monitor mark */
-
-#define PSL_FE_DFLT 0x00000000UL /* default == none */
-
-/* Initial kernel MSR, use IS=1 ad DS=1. */
-#define PSL_KERNSET_INIT (PSL_IS | PSL_DS)
-#define PSL_KERNSET (PSL_CE | PSL_ME | PSL_EE)
-#define PSL_USERSET (PSL_KERNSET | PSL_PR)
-
-#else /* if defined(E500) */
-/*
- * Machine State Register (MSR)
- *
- * The PowerPC 601 does not implement the following bits:
- *
- * VEC, POW, ILE, BE, RI, LE[*]
- *
- * [*] Little-endian mode on the 601 is implemented in the HID0 register.
- */
+/* Machine State Register (MSR) - AIM cores */
#ifdef __powerpc64__
#define PSL_SF 0x8000000000000000UL /* 64-bit addressing */
#define PSL_HV 0x1000000000000000UL /* hyper-privileged mode */
#endif
-#define PSL_VEC 0x02000000UL /* AltiVec vector unit available */
#define PSL_POW 0x00040000UL /* power management */
#define PSL_ILE 0x00010000UL /* interrupt endian mode (1 == le) */
-#define PSL_EE 0x00008000UL /* external interrupt enable */
-#define PSL_PR 0x00004000UL /* privilege mode (1 == user) */
-#define PSL_FP 0x00002000UL /* floating point enable */
-#define PSL_ME 0x00001000UL /* machine check enable */
-#define PSL_FE0 0x00000800UL /* floating point interrupt mode 0 */
#define PSL_SE 0x00000400UL /* single-step trace enable */
-#define PSL_BE 0x00000200UL /* branch trace enable */
-#define PSL_FE1 0x00000100UL /* floating point interrupt mode 1 */
-#define PSL_IP 0x00000040UL /* interrupt prefix */
+#define PSL_IP 0x00000040UL /* interrupt prefix - 601 only */
#define PSL_IR 0x00000020UL /* instruction address relocation */
#define PSL_DR 0x00000010UL /* data address relocation */
-#define PSL_PMM 0x00000004UL /* performance monitor mark */
#define PSL_RI 0x00000002UL /* recoverable interrupt */
#define PSL_LE 0x00000001UL /* endian mode (1 == le) */
-#define PSL_601_MASK ~(PSL_POW|PSL_ILE|PSL_BE|PSL_RI|PSL_LE)
-
/*
* Floating-point exception modes:
*/
@@ -112,20 +83,24 @@
#define PSL_FE_PREC (PSL_FE0 | PSL_FE1) /* precise */
#define PSL_FE_DFLT PSL_FE_DIS /* default == none */
-/*
- * Note that PSL_POW and PSL_ILE are not in the saved copy of the MSR
- */
-#define PSL_MBO 0
-#define PSL_MBZ 0
-
+#if defined(BOOKE_E500)
+/* Initial kernel MSR, use IS=1 ad DS=1. */
+#define PSL_KERNSET_INIT (PSL_IS | PSL_DS)
+#define PSL_KERNSET (PSL_CE | PSL_ME | PSL_EE)
+#define PSL_SRR1_MASK 0x00000000UL /* No mask on Book-E */
+#elif defined(BOOKE_PPC4XX)
+#define PSL_KERNSET (PSL_CE | PSL_ME | PSL_EE | PSL_FP)
+#define PSL_SRR1_MASK 0x00000000UL /* No mask on Book-E */
+#elif defined(AIM)
#ifdef __powerpc64__
#define PSL_KERNSET (PSL_SF | PSL_EE | PSL_ME | PSL_IR | PSL_DR | PSL_RI)
#else
#define PSL_KERNSET (PSL_EE | PSL_ME | PSL_IR | PSL_DR | PSL_RI)
#endif
-#define PSL_USERSET (PSL_KERNSET | PSL_PR)
+#define PSL_SRR1_MASK 0x783f0000UL /* Bits 1-4, 10-15 (ppc32), 33-36, 42-47 (ppc64) */
+#endif
-#define PSL_USERSTATIC (PSL_USERSET | PSL_IP | 0x87c0008c)
+#define PSL_USERSET (PSL_KERNSET | PSL_PR)
+#define PSL_USERSTATIC (~(PSL_VEC | PSL_FP | PSL_FE0 | PSL_FE1) & ~PSL_SRR1_MASK)
-#endif /* if defined(E500) */
#endif /* _MACHINE_PSL_H_ */
diff --git a/freebsd/sys/powerpc/include/machine/spr.h b/freebsd/sys/powerpc/include/machine/spr.h
index e3569876..86dac97b 100644
--- a/freebsd/sys/powerpc/include/machine/spr.h
+++ b/freebsd/sys/powerpc/include/machine/spr.h
@@ -115,9 +115,9 @@
#define SRR1_ISI_NOEXECUTE 0x10000000 /* Memory marked no-execute */
#define SRR1_ISI_PP 0x08000000 /* PP bits forbid access */
#define SPR_DECAR 0x036 /* ..8 Decrementer auto reload */
-#define SPR_EIE 0x050 /* ..8 Exception Interrupt ??? */
-#define SPR_EID 0x051 /* ..8 Exception Interrupt ??? */
-#define SPR_NRI 0x052 /* ..8 Exception Interrupt ??? */
+#define SPR_EIE 0x050 /* ..8 Exception Interrupt ??? */
+#define SPR_EID 0x051 /* ..8 Exception Interrupt ??? */
+#define SPR_NRI 0x052 /* ..8 Exception Interrupt ??? */
#define SPR_USPRG0 0x100 /* 4.. User SPR General 0 */
#define SPR_VRSAVE 0x100 /* .6. AltiVec VRSAVE */
#define SPR_SPRG0 0x110 /* 468 SPR General 0 */
@@ -140,6 +140,7 @@
#define MPC603e 0x0006
#define MPC603ev 0x0007
#define MPC750 0x0008
+#define MPC750CL 0x7000 /* Nintendo Wii's Broadway */
#define MPC604ev 0x0009
#define MPC7400 0x000c
#define MPC620 0x0014
@@ -167,6 +168,9 @@
#define IBMPOWER3PLUS 0x0041
#define IBM970MP 0x0044
#define IBM970GX 0x0045
+#define IBMPOWER7PLUS 0x004a
+#define IBMPOWER8E 0x004b
+#define IBMPOWER8 0x004d
#define MPC860 0x0050
#define IBMCELLBE 0x0070
#define MPC8240 0x0081
@@ -184,6 +188,9 @@
#define MPC8245 0x8081
#define FSL_E500v1 0x8020
#define FSL_E500v2 0x8021
+#define FSL_E500mc 0x8023
+#define FSL_E5500 0x8024
+#define FSL_E6500 0x8040
#define SPR_IBAT0U 0x210 /* .68 Instruction BAT Reg 0 Upper */
#define SPR_IBAT0U 0x210 /* .6. Instruction BAT Reg 0 Upper */
@@ -202,49 +209,49 @@
#define SPR_DBAT2L 0x21d /* .6. Data BAT Reg 2 Lower */
#define SPR_DBAT3U 0x21e /* .6. Data BAT Reg 3 Upper */
#define SPR_DBAT3L 0x21f /* .6. Data BAT Reg 3 Lower */
-#define SPR_IC_CST 0x230 /* ..8 Instruction Cache CSR */
-#define IC_CST_IEN 0x80000000 /* I cache is ENabled (RO) */
-#define IC_CST_CMD_INVALL 0x0c000000 /* I cache invalidate all */
-#define IC_CST_CMD_UNLOCKALL 0x0a000000 /* I cache unlock all */
-#define IC_CST_CMD_UNLOCK 0x08000000 /* I cache unlock block */
-#define IC_CST_CMD_LOADLOCK 0x06000000 /* I cache load & lock block */
-#define IC_CST_CMD_DISABLE 0x04000000 /* I cache disable */
-#define IC_CST_CMD_ENABLE 0x02000000 /* I cache enable */
-#define IC_CST_CCER1 0x00200000 /* I cache error type 1 (RO) */
-#define IC_CST_CCER2 0x00100000 /* I cache error type 2 (RO) */
-#define IC_CST_CCER3 0x00080000 /* I cache error type 3 (RO) */
+#define SPR_IC_CST 0x230 /* ..8 Instruction Cache CSR */
+#define IC_CST_IEN 0x80000000 /* I cache is ENabled (RO) */
+#define IC_CST_CMD_INVALL 0x0c000000 /* I cache invalidate all */
+#define IC_CST_CMD_UNLOCKALL 0x0a000000 /* I cache unlock all */
+#define IC_CST_CMD_UNLOCK 0x08000000 /* I cache unlock block */
+#define IC_CST_CMD_LOADLOCK 0x06000000 /* I cache load & lock block */
+#define IC_CST_CMD_DISABLE 0x04000000 /* I cache disable */
+#define IC_CST_CMD_ENABLE 0x02000000 /* I cache enable */
+#define IC_CST_CCER1 0x00200000 /* I cache error type 1 (RO) */
+#define IC_CST_CCER2 0x00100000 /* I cache error type 2 (RO) */
+#define IC_CST_CCER3 0x00080000 /* I cache error type 3 (RO) */
#define SPR_IBAT4U 0x230 /* .6. Instruction BAT Reg 4 Upper */
-#define SPR_IC_ADR 0x231 /* ..8 Instruction Cache Address */
+#define SPR_IC_ADR 0x231 /* ..8 Instruction Cache Address */
#define SPR_IBAT4L 0x231 /* .6. Instruction BAT Reg 4 Lower */
-#define SPR_IC_DAT 0x232 /* ..8 Instruction Cache Data */
+#define SPR_IC_DAT 0x232 /* ..8 Instruction Cache Data */
#define SPR_IBAT5U 0x232 /* .6. Instruction BAT Reg 5 Upper */
#define SPR_IBAT5L 0x233 /* .6. Instruction BAT Reg 5 Lower */
#define SPR_IBAT6U 0x234 /* .6. Instruction BAT Reg 6 Upper */
#define SPR_IBAT6L 0x235 /* .6. Instruction BAT Reg 6 Lower */
#define SPR_IBAT7U 0x236 /* .6. Instruction BAT Reg 7 Upper */
#define SPR_IBAT7L 0x237 /* .6. Instruction BAT Reg 7 Lower */
-#define SPR_DC_CST 0x230 /* ..8 Data Cache CSR */
-#define DC_CST_DEN 0x80000000 /* D cache ENabled (RO) */
-#define DC_CST_DFWT 0x40000000 /* D cache Force Write-Thru (RO) */
-#define DC_CST_LES 0x20000000 /* D cache Little Endian Swap (RO) */
-#define DC_CST_CMD_FLUSH 0x0e000000 /* D cache invalidate all */
-#define DC_CST_CMD_INVALL 0x0c000000 /* D cache invalidate all */
-#define DC_CST_CMD_UNLOCKALL 0x0a000000 /* D cache unlock all */
-#define DC_CST_CMD_UNLOCK 0x08000000 /* D cache unlock block */
-#define DC_CST_CMD_CLRLESWAP 0x07000000 /* D cache clr little-endian swap */
-#define DC_CST_CMD_LOADLOCK 0x06000000 /* D cache load & lock block */
-#define DC_CST_CMD_SETLESWAP 0x05000000 /* D cache set little-endian swap */
-#define DC_CST_CMD_DISABLE 0x04000000 /* D cache disable */
-#define DC_CST_CMD_CLRFWT 0x03000000 /* D cache clear forced write-thru */
-#define DC_CST_CMD_ENABLE 0x02000000 /* D cache enable */
-#define DC_CST_CMD_SETFWT 0x01000000 /* D cache set forced write-thru */
-#define DC_CST_CCER1 0x00200000 /* D cache error type 1 (RO) */
-#define DC_CST_CCER2 0x00100000 /* D cache error type 2 (RO) */
-#define DC_CST_CCER3 0x00080000 /* D cache error type 3 (RO) */
+#define SPR_DC_CST 0x230 /* ..8 Data Cache CSR */
+#define DC_CST_DEN 0x80000000 /* D cache ENabled (RO) */
+#define DC_CST_DFWT 0x40000000 /* D cache Force Write-Thru (RO) */
+#define DC_CST_LES 0x20000000 /* D cache Little Endian Swap (RO) */
+#define DC_CST_CMD_FLUSH 0x0e000000 /* D cache invalidate all */
+#define DC_CST_CMD_INVALL 0x0c000000 /* D cache invalidate all */
+#define DC_CST_CMD_UNLOCKALL 0x0a000000 /* D cache unlock all */
+#define DC_CST_CMD_UNLOCK 0x08000000 /* D cache unlock block */
+#define DC_CST_CMD_CLRLESWAP 0x07000000 /* D cache clr little-endian swap */
+#define DC_CST_CMD_LOADLOCK 0x06000000 /* D cache load & lock block */
+#define DC_CST_CMD_SETLESWAP 0x05000000 /* D cache set little-endian swap */
+#define DC_CST_CMD_DISABLE 0x04000000 /* D cache disable */
+#define DC_CST_CMD_CLRFWT 0x03000000 /* D cache clear forced write-thru */
+#define DC_CST_CMD_ENABLE 0x02000000 /* D cache enable */
+#define DC_CST_CMD_SETFWT 0x01000000 /* D cache set forced write-thru */
+#define DC_CST_CCER1 0x00200000 /* D cache error type 1 (RO) */
+#define DC_CST_CCER2 0x00100000 /* D cache error type 2 (RO) */
+#define DC_CST_CCER3 0x00080000 /* D cache error type 3 (RO) */
#define SPR_DBAT4U 0x238 /* .6. Data BAT Reg 4 Upper */
-#define SPR_DC_ADR 0x231 /* ..8 Data Cache Address */
+#define SPR_DC_ADR 0x231 /* ..8 Data Cache Address */
#define SPR_DBAT4L 0x239 /* .6. Data BAT Reg 4 Lower */
-#define SPR_DC_DAT 0x232 /* ..8 Data Cache Data */
+#define SPR_DC_DAT 0x232 /* ..8 Data Cache Data */
#define SPR_DBAT5U 0x23a /* .6. Data BAT Reg 5 Upper */
#define SPR_DBAT5L 0x23b /* .6. Data BAT Reg 5 Lower */
#define SPR_DBAT6U 0x23c /* .6. Data BAT Reg 6 Upper */
@@ -252,46 +259,67 @@
#define SPR_DBAT7U 0x23e /* .6. Data BAT Reg 7 Upper */
#define SPR_DBAT7L 0x23f /* .6. Data BAT Reg 7 Lower */
#define SPR_MI_CTR 0x310 /* ..8 IMMU control */
-#define Mx_CTR_GPM 0x80000000 /* Group Protection Mode */
-#define Mx_CTR_PPM 0x40000000 /* Page Protection Mode */
-#define Mx_CTR_CIDEF 0x20000000 /* Cache-Inhibit DEFault */
-#define MD_CTR_WTDEF 0x20000000 /* Write-Through DEFault */
-#define Mx_CTR_RSV4 0x08000000 /* Reserve 4 TLB entries */
-#define MD_CTR_TWAM 0x04000000 /* TableWalk Assist Mode */
-#define Mx_CTR_PPCS 0x02000000 /* Priv/user state compare mode */
-#define Mx_CTR_TLB_INDX 0x000001f0 /* TLB index mask */
-#define Mx_CTR_TLB_INDX_BITPOS 8 /* TLB index shift */
+#define Mx_CTR_GPM 0x80000000 /* Group Protection Mode */
+#define Mx_CTR_PPM 0x40000000 /* Page Protection Mode */
+#define Mx_CTR_CIDEF 0x20000000 /* Cache-Inhibit DEFault */
+#define MD_CTR_WTDEF 0x20000000 /* Write-Through DEFault */
+#define Mx_CTR_RSV4 0x08000000 /* Reserve 4 TLB entries */
+#define MD_CTR_TWAM 0x04000000 /* TableWalk Assist Mode */
+#define Mx_CTR_PPCS 0x02000000 /* Priv/user state compare mode */
+#define Mx_CTR_TLB_INDX 0x000001f0 /* TLB index mask */
+#define Mx_CTR_TLB_INDX_BITPOS 8 /* TLB index shift */
#define SPR_MI_AP 0x312 /* ..8 IMMU access protection */
-#define Mx_GP_SUPER(n) (0 << (2*(15-(n)))) /* access is supervisor */
-#define Mx_GP_PAGE (1 << (2*(15-(n)))) /* access is page protect */
-#define Mx_GP_SWAPPED (2 << (2*(15-(n)))) /* access is swapped */
-#define Mx_GP_USER (3 << (2*(15-(n)))) /* access is user */
+#define Mx_GP_SUPER(n) (0 << (2*(15-(n)))) /* access is supervisor */
+#define Mx_GP_PAGE (1 << (2*(15-(n)))) /* access is page protect */
+#define Mx_GP_SWAPPED (2 << (2*(15-(n)))) /* access is swapped */
+#define Mx_GP_USER (3 << (2*(15-(n)))) /* access is user */
#define SPR_MI_EPN 0x313 /* ..8 IMMU effective number */
-#define Mx_EPN_EPN 0xfffff000 /* Effective Page Number mask */
-#define Mx_EPN_EV 0x00000020 /* Entry Valid */
-#define Mx_EPN_ASID 0x0000000f /* Address Space ID */
+#define Mx_EPN_EPN 0xfffff000 /* Effective Page Number mask */
+#define Mx_EPN_EV 0x00000020 /* Entry Valid */
+#define Mx_EPN_ASID 0x0000000f /* Address Space ID */
#define SPR_MI_TWC 0x315 /* ..8 IMMU tablewalk control */
-#define MD_TWC_L2TB 0xfffff000 /* Level-2 Tablewalk Base */
-#define Mx_TWC_APG 0x000001e0 /* Access Protection Group */
-#define Mx_TWC_G 0x00000010 /* Guarded memory */
-#define Mx_TWC_PS 0x0000000c /* Page Size (L1) */
-#define MD_TWC_WT 0x00000002 /* Write-Through */
-#define Mx_TWC_V 0x00000001 /* Entry Valid */
+#define MD_TWC_L2TB 0xfffff000 /* Level-2 Tablewalk Base */
+#define Mx_TWC_APG 0x000001e0 /* Access Protection Group */
+#define Mx_TWC_G 0x00000010 /* Guarded memory */
+#define Mx_TWC_PS 0x0000000c /* Page Size (L1) */
+#define MD_TWC_WT 0x00000002 /* Write-Through */
+#define Mx_TWC_V 0x00000001 /* Entry Valid */
#define SPR_MI_RPN 0x316 /* ..8 IMMU real (phys) page number */
-#define Mx_RPN_RPN 0xfffff000 /* Real Page Number */
-#define Mx_RPN_PP 0x00000ff0 /* Page Protection */
-#define Mx_RPN_SPS 0x00000008 /* Small Page Size */
-#define Mx_RPN_SH 0x00000004 /* SHared page */
-#define Mx_RPN_CI 0x00000002 /* Cache Inhibit */
-#define Mx_RPN_V 0x00000001 /* Valid */
+#define Mx_RPN_RPN 0xfffff000 /* Real Page Number */
+#define Mx_RPN_PP 0x00000ff0 /* Page Protection */
+#define Mx_RPN_SPS 0x00000008 /* Small Page Size */
+#define Mx_RPN_SH 0x00000004 /* SHared page */
+#define Mx_RPN_CI 0x00000002 /* Cache Inhibit */
+#define Mx_RPN_V 0x00000001 /* Valid */
#define SPR_MD_CTR 0x318 /* ..8 DMMU control */
#define SPR_M_CASID 0x319 /* ..8 CASID */
-#define M_CASID 0x0000000f /* Current AS Id */
+#define M_CASID 0x0000000f /* Current AS Id */
#define SPR_MD_AP 0x31a /* ..8 DMMU access protection */
#define SPR_MD_EPN 0x31b /* ..8 DMMU effective number */
+
+#define SPR_970MMCR0 0x31b /* ... Monitor Mode Control Register 0 (PPC 970) */
+#define SPR_970MMCR0_PMC1SEL(x) ((x) << 8) /* PMC1 selector (970) */
+#define SPR_970MMCR0_PMC2SEL(x) ((x) << 1) /* PMC2 selector (970) */
+#define SPR_970MMCR1 0x31e /* ... Monitor Mode Control Register 1 (PPC 970) */
+#define SPR_970MMCR1_PMC3SEL(x) (((x) & 0x1f) << 27) /* PMC 3 selector */
+#define SPR_970MMCR1_PMC4SEL(x) (((x) & 0x1f) << 22) /* PMC 4 selector */
+#define SPR_970MMCR1_PMC5SEL(x) (((x) & 0x1f) << 17) /* PMC 5 selector */
+#define SPR_970MMCR1_PMC6SEL(x) (((x) & 0x1f) << 12) /* PMC 6 selector */
+#define SPR_970MMCR1_PMC7SEL(x) (((x) & 0x1f) << 7) /* PMC 7 selector */
+#define SPR_970MMCR1_PMC8SEL(x) (((x) & 0x1f) << 2) /* PMC 8 selector */
+#define SPR_970MMCRA 0x312 /* ... Monitor Mode Control Register 2 (PPC 970) */
+#define SPR_970PMC1 0x313 /* ... PMC 1 */
+#define SPR_970PMC2 0x314 /* ... PMC 2 */
+#define SPR_970PMC3 0x315 /* ... PMC 3 */
+#define SPR_970PMC4 0x316 /* ... PMC 4 */
+#define SPR_970PMC5 0x317 /* ... PMC 5 */
+#define SPR_970PMC6 0x318 /* ... PMC 6 */
+#define SPR_970PMC7 0x319 /* ... PMC 7 */
+#define SPR_970PMC8 0x31a /* ... PMC 8 */
+
#define SPR_M_TWB 0x31c /* ..8 MMU tablewalk base */
-#define M_TWB_L1TB 0xfffff000 /* level-1 translation base */
-#define M_TWB_L1INDX 0x00000ffc /* level-1 index */
+#define M_TWB_L1TB 0xfffff000 /* level-1 translation base */
+#define M_TWB_L1INDX 0x00000ffc /* level-1 index */
#define SPR_MD_TWC 0x31d /* ..8 DMMU tablewalk control */
#define SPR_MD_RPN 0x31e /* ..8 DMMU real (phys) page number */
#define SPR_MD_TW 0x31f /* ..8 MMU tablewalk scratch */
@@ -307,8 +335,8 @@
#define SPR_UMMCR1 0x3ac /* .6. User Monitor Mode Control Register 1 */
#define SPR_ZPR 0x3b0 /* 4.. Zone Protection Register */
#define SPR_MMCR2 0x3b0 /* .6. Monitor Mode Control Register 2 */
-#define SPR_MMCR2_THRESHMULT_32 0x80000000 /* Multiply MMCR0 threshold by 32 */
-#define SPR_MMCR2_THRESHMULT_2 0x00000000 /* Multiply MMCR0 threshold by 2 */
+#define SPR_MMCR2_THRESHMULT_32 0x80000000 /* Multiply MMCR0 threshold by 32 */
+#define SPR_MMCR2_THRESHMULT_2 0x00000000 /* Multiply MMCR0 threshold by 2 */
#define SPR_PID 0x3b1 /* 4.. Process ID */
#define SPR_PMC5 0x3b1 /* .6. Performance Counter Register 5 */
#define SPR_PMC6 0x3b2 /* .6. Performance Counter Register 6 */
@@ -318,20 +346,6 @@
#define SPR_DVC1 0x3b6 /* 4.. Data Value Compare 1 */
#define SPR_DVC2 0x3b7 /* 4.. Data Value Compare 2 */
#define SPR_MMCR0 0x3b8 /* .6. Monitor Mode Control Register 0 */
-
-#define SPR_970MMCR0 0x31b /* ... Monitor Mode Control Register 0 (PPC 970) */
-#define SPR_970MMCR1 0x31e /* ... Monitor Mode Control Register 1 (PPC 970) */
-#define SPR_970MMCRA 0x312 /* ... Monitor Mode Control Register 2 (PPC 970) */
-#define SPR_970MMCR0 0x31b /* ... Monitor Mode Control Register 0 (PPC 970) */
-#define SPR_970PMC1 0x313 /* ... PMC 1 */
-#define SPR_970PMC2 0x314 /* ... PMC 2 */
-#define SPR_970PMC3 0x315 /* ... PMC 3 */
-#define SPR_970PMC4 0x316 /* ... PMC 4 */
-#define SPR_970PMC5 0x317 /* ... PMC 5 */
-#define SPR_970PMC6 0x318 /* ... PMC 6 */
-#define SPR_970PMC7 0x319 /* ... PMC 7 */
-#define SPR_970PMC8 0x31a /* ... PMC 8 */
-
#define SPR_MMCR0_FC 0x80000000 /* Freeze counters */
#define SPR_MMCR0_FCS 0x40000000 /* Freeze counters in supervisor mode */
#define SPR_MMCR0_FCP 0x20000000 /* Freeze counters in user mode */
@@ -350,8 +364,6 @@
#define SPR_MMCR0_TRIGGER 0x00002000 /* Trigger */
#define SPR_MMCR0_PMC1SEL(x) (((x) & 0x3f) << 6) /* PMC1 selector */
#define SPR_MMCR0_PMC2SEL(x) (((x) & 0x3f) << 0) /* PMC2 selector */
-#define SPR_970MMCR0_PMC1SEL(x) ((x) << 8) /* PMC1 selector (970) */
-#define SPR_970MMCR0_PMC2SEL(x) ((x) << 1) /* PMC2 selector (970) */
#define SPR_SGR 0x3b9 /* 4.. Storage Guarded Register */
#define SPR_PMC1 0x3b9 /* .6. Performance Counter Register 1 */
#define SPR_DCWR 0x3ba /* 4.. Data Cache Write-through Register */
@@ -421,6 +433,7 @@
#define SPR_SRR3 0x3df /* 4.. Save/Restore Register 3 */
#define SPR_HID0 0x3f0 /* ..8 Hardware Implementation Register 0 */
#define SPR_HID1 0x3f1 /* ..8 Hardware Implementation Register 1 */
+#define SPR_HID2 0x3f3 /* ..8 Hardware Implementation Register 2 */
#define SPR_HID4 0x3f4 /* ..8 Hardware Implementation Register 4 */
#define SPR_HID5 0x3f6 /* ..8 Hardware Implementation Register 5 */
#define SPR_HID6 0x3f9 /* ..8 Hardware Implementation Register 6 */
@@ -452,7 +465,7 @@
#define SPR_DAC1 0x3f6 /* 4.. Data Address Compare 1 */
#define SPR_DAC2 0x3f7 /* 4.. Data Address Compare 2 */
#define SPR_PIR 0x3ff /* .6. Processor Identification Register */
-#elif defined(E500)
+#elif defined(BOOKE)
#define SPR_PIR 0x11e /* ..8 Processor Identification Register */
#define SPR_DBSR 0x130 /* ..8 Debug Status Register */
#define DBSR_IDE 0x80000000 /* Imprecise debug event. */
@@ -514,6 +527,16 @@
#define MSSCR0_EMODE 0x00200000 /* 10: MPX bus mode (read-only) */
#define MSSCR0_ABD 0x00100000 /* 11: address bus driven (read-only) */
#define MSSCR0_MBZ 0x000fffff /* 12-31: must be zero */
+#define MSSCR0_L2PFE 0x00000003 /* 30-31: L2 prefetch enable */
+#define SPR_MSSSR0 0x3f7 /* .6. Memory Subsystem Status Register (MPC745x) */
+#define MSSSR0_L2TAG 0x00040000 /* 13: L2 tag parity error */
+#define MSSSR0_L2DAT 0x00020000 /* 14: L2 data parity error */
+#define MSSSR0_L3TAG 0x00010000 /* 15: L3 tag parity error */
+#define MSSSR0_L3DAT 0x00008000 /* 16: L3 data parity error */
+#define MSSSR0_APE 0x00004000 /* 17: Address parity error */
+#define MSSSR0_DPE 0x00002000 /* 18: Data parity error */
+#define MSSSR0_TEA 0x00001000 /* 19: Bus transfer error acknowledge */
+#define SPR_LDSTCR 0x3f8 /* .6. Load/Store Control Register */
#define SPR_L2PM 0x3f8 /* .6. L2 Private Memory Control Register */
#define SPR_L2CR 0x3f9 /* .6. L2 Control Register */
#define L2CR_L2E 0x80000000 /* 0: L2 enable */
@@ -538,12 +561,14 @@
Setting this bit disables instruction
caching. */
#define L2CR_L2I 0x00200000 /* 10: L2 global invalidate. */
+#define L2CR_L2IO_7450 0x00010000 /* 11: L2 instruction-only (MPC745x). */
#define L2CR_L2CTL 0x00100000 /* 11: L2 RAM control (ZZ enable).
Enables automatic operation of the
L2ZZ (low-power mode) signal. */
#define L2CR_L2WT 0x00080000 /* 12: L2 write-through. */
#define L2CR_L2TS 0x00040000 /* 13: L2 test support. */
#define L2CR_L2OH 0x00030000 /* 14-15: L2 output hold. */
+#define L2CR_L2DO_7450 0x00010000 /* 15: L2 data-only (MPC745x). */
#define L2CR_L2SL 0x00008000 /* 16: L2 DLL slow. */
#define L2CR_L2DF 0x00004000 /* 17: L2 differential clock. */
#define L2CR_L2BYP 0x00002000 /* 18: L2 DLL bypass. */
@@ -554,7 +579,6 @@
#define L2CR_L2DRO 0x00000100 /* 23: L2DLL rollover checkstop enable. */
#define L2CR_L2IP 0x00000001 /* 31: L2 global invalidate in */
/* progress (read only). */
-
#define SPR_L3CR 0x3fa /* .6. L3 Control Register */
#define L3CR_L3E 0x80000000 /* 0: L3 enable */
#define L3CR_L3PE 0x40000000 /* 1: L3 data parity enable */
@@ -582,15 +606,15 @@
#define SPR_ICCR 0x3fb /* 4.. Instruction Cache Cachability Register */
#define SPR_THRM1 0x3fc /* .6. Thermal Management Register */
#define SPR_THRM2 0x3fd /* .6. Thermal Management Register */
-#define SPR_THRM_TIN 0x80000000 /* Thermal interrupt bit (RO) */
-#define SPR_THRM_TIV 0x40000000 /* Thermal interrupt valid (RO) */
-#define SPR_THRM_THRESHOLD(x) ((x) << 23) /* Thermal sensor threshold */
-#define SPR_THRM_TID 0x00000004 /* Thermal interrupt direction */
-#define SPR_THRM_TIE 0x00000002 /* Thermal interrupt enable */
-#define SPR_THRM_VALID 0x00000001 /* Valid bit */
+#define SPR_THRM_TIN 0x80000000 /* Thermal interrupt bit (RO) */
+#define SPR_THRM_TIV 0x40000000 /* Thermal interrupt valid (RO) */
+#define SPR_THRM_THRESHOLD(x) ((x) << 23) /* Thermal sensor threshold */
+#define SPR_THRM_TID 0x00000004 /* Thermal interrupt direction */
+#define SPR_THRM_TIE 0x00000002 /* Thermal interrupt enable */
+#define SPR_THRM_VALID 0x00000001 /* Valid bit */
#define SPR_THRM3 0x3fe /* .6. Thermal Management Register */
-#define SPR_THRM_TIMER(x) ((x) << 1) /* Sampling interval timer */
-#define SPR_THRM_ENABLE 0x00000001 /* TAU Enable */
+#define SPR_THRM_TIMER(x) ((x) << 1) /* Sampling interval timer */
+#define SPR_THRM_ENABLE 0x00000001 /* TAU Enable */
#define SPR_FPECR 0x3fe /* .6. Floating-Point Exception Cause Register */
/* Time Base Register declarations */
@@ -600,7 +624,7 @@
#define TBR_TBWU 0x11d /* 468 Time Base Upper - supervisor, write */
/* Performance counter declarations */
-#define PMC_OVERFLOW 0x80000000 /* Counter has overflowed */
+#define PMC_OVERFLOW 0x80000000 /* Counter has overflowed */
/* The first five countable [non-]events are common to many PMC's */
#define PMCN_NONE 0 /* Count nothing */
@@ -616,7 +640,7 @@
#if defined(AIM)
-#define SPR_ESR 0x3d4 /* 4.. Exception Syndrome Register */
+#define SPR_ESR 0x3d4 /* 4.. Exception Syndrome Register */
#define ESR_MCI 0x80000000 /* Machine check - instruction */
#define ESR_PIL 0x08000000 /* Program interrupt - illegal */
#define ESR_PPR 0x04000000 /* Program interrupt - privileged */
@@ -626,7 +650,11 @@
#define ESR_DIZ 0x00800000 /* Data/instruction storage interrupt - zone fault */
#define ESR_U0F 0x00008000 /* Data storage interrupt - U0 fault */
-#elif defined(E500)
+#elif defined(BOOKE)
+
+#define SPR_MCARU 0x239 /* ..8 Machine Check Address register upper bits */
+#define SPR_MCSR 0x23c /* ..8 Machine Check Syndrome register */
+#define SPR_MCAR 0x23d /* ..8 Machine Check Address register */
#define SPR_ESR 0x003e /* ..8 Exception Syndrome Register */
#define ESR_PIL 0x08000000 /* Program interrupt - illegal */
@@ -643,6 +671,19 @@
#define SPR_MCSRR0 0x23a /* ..8 570 Machine check SRR0 */
#define SPR_MCSRR1 0x23b /* ..8 571 Machine check SRR1 */
+#define SPR_MMUCR 0x3b2 /* 4.. MMU Control Register */
+#define MMUCR_SWOA (0x80000000 >> 7)
+#define MMUCR_U1TE (0x80000000 >> 9)
+#define MMUCR_U2SWOAE (0x80000000 >> 10)
+#define MMUCR_DULXE (0x80000000 >> 12)
+#define MMUCR_IULXE (0x80000000 >> 13)
+#define MMUCR_STS (0x80000000 >> 15)
+#define MMUCR_STID_MASK (0xFF000000 >> 24)
+
+#define SPR_MMUCSR0 0x3f4 /* ..8 1012 MMU Control and Status Register 0 */
+#define MMUCSR0_L2TLB0_FI 0x04 /* TLB0 flash invalidate */
+#define MMUCSR0_L2TLB1_FI 0x02 /* TLB1 flash invalidate */
+
#define SPR_SVR 0x3ff /* ..8 1023 System Version Register */
#define SVR_MPC8533 0x8034
#define SVR_MPC8533E 0x803c
@@ -662,10 +703,16 @@
#define SVR_P2010E 0x80eb
#define SVR_P2020 0x80e2
#define SVR_P2020E 0x80ea
+#define SVR_P2041 0x8210
+#define SVR_P2041E 0x8218
+#define SVR_P3041 0x8211
+#define SVR_P3041E 0x8219
#define SVR_P4040 0x8200
#define SVR_P4040E 0x8208
#define SVR_P4080 0x8201
#define SVR_P4080E 0x8209
+#define SVR_P5020 0x8220
+#define SVR_P5020E 0x8228
#define SVR_VER(svr) (((svr) >> 16) & 0xffff)
#define SPR_PID0 0x030 /* ..8 Process ID Register 0 */
@@ -708,6 +755,18 @@
#define SPR_MAS5 0x275 /* ..8 MMU Assist Register 5 Book-E */
#define SPR_MAS6 0x276 /* ..8 MMU Assist Register 6 Book-E/e500 */
#define SPR_MAS7 0x3B0 /* ..8 MMU Assist Register 7 Book-E/e500 */
+#define SPR_MAS8 0x155 /* ..8 MMU Assist Register 8 Book-E/e500 */
+
+#define SPR_L1CFG0 0x203 /* ..8 L1 cache configuration register 0 */
+#define SPR_L1CFG1 0x204 /* ..8 L1 cache configuration register 1 */
+
+#define SPR_CCR1 0x378
+#define CCR1_L2COBE 0x00000040
+
+#define DCR_L2DCDCRAI 0x0000 /* L2 D-Cache DCR Address Pointer */
+#define DCR_L2DCDCRDI 0x0001 /* L2 D-Cache DCR Data Indirect */
+#define DCR_L2CR0 0x00 /* L2 Cache Configuration Register 0 */
+#define L2CR0_AS 0x30000000
#define SPR_L1CSR0 0x3F2 /* ..8 L1 Cache Control and Status Register 0 */
#define L1CSR0_DCPE 0x00010000 /* Data Cache Parity Enable */
@@ -716,13 +775,20 @@
#define L1CSR0_DCE 0x00000001 /* Data Cache Enable */
#define SPR_L1CSR1 0x3F3 /* ..8 L1 Cache Control and Status Register 1 */
#define L1CSR1_ICPE 0x00010000 /* Instruction Cache Parity Enable */
+#define L1CSR1_ICUL 0x00000400 /* Instr Cache Unable to Lock */
#define L1CSR1_ICLFR 0x00000100 /* Instruction Cache Lock Bits Flash Reset */
#define L1CSR1_ICFI 0x00000002 /* Instruction Cache Flash Invalidate */
#define L1CSR1_ICE 0x00000001 /* Instruction Cache Enable */
+#define SPR_L2CSR0 0x3F9 /* ..8 L2 Cache Control and Status Register 0 */
+#define L2CSR0_L2E 0x80000000 /* L2 Cache Enable */
+#define L2CSR0_L2PE 0x40000000 /* L2 Cache Parity Enable */
+#define L2CSR0_L2FI 0x00200000 /* L2 Cache Flash Invalidate */
+#define L2CSR0_L2LFC 0x00000400 /* L2 Cache Lock Flags Clear */
+
#define SPR_BUCSR 0x3F5 /* ..8 Branch Unit Control and Status Register */
#define BUCSR_BPEN 0x00000001 /* Branch Prediction Enable */
+#define BUCSR_BBFI 0x00000200 /* Branch Buffer Flash Invalidate */
-#endif /* #elif defined(E500) */
-
+#endif /* BOOKE */
#endif /* !_POWERPC_SPR_H_ */
diff --git a/freebsd/sys/rpc/types.h b/freebsd/sys/rpc/types.h
index dd51a8af..62b5aca1 100644
--- a/freebsd/sys/rpc/types.h
+++ b/freebsd/sys/rpc/types.h
@@ -94,7 +94,7 @@ struct netbuf {
};
/*
- * The format of the addres and options arguments of the XTI t_bind call.
+ * The format of the address and options arguments of the XTI t_bind call.
* Only provided for compatibility, it should not be used.
*/
diff --git a/freebsd/sys/security/audit/audit.h b/freebsd/sys/security/audit/audit.h
index 69f6c328..4483d1b3 100644
--- a/freebsd/sys/security/audit/audit.h
+++ b/freebsd/sys/security/audit/audit.h
@@ -95,7 +95,7 @@ void audit_arg_pid(pid_t pid);
void audit_arg_process(struct proc *p);
void audit_arg_signum(u_int signum);
void audit_arg_socket(int sodomain, int sotype, int soprotocol);
-void audit_arg_sockaddr(struct thread *td, struct sockaddr *sa);
+void audit_arg_sockaddr(struct thread *td, int dirfd, struct sockaddr *sa);
void audit_arg_auid(uid_t auid);
void audit_arg_auditinfo(struct auditinfo *au_info);
void audit_arg_auditinfo_addr(struct auditinfo_addr *au_info);
@@ -114,7 +114,8 @@ void audit_arg_auditon(union auditon_udata *udata);
void audit_arg_file(struct proc *p, struct file *fp);
void audit_arg_argv(char *argv, int argc, int length);
void audit_arg_envv(char *envv, int envc, int length);
-void audit_arg_rights(cap_rights_t rights);
+void audit_arg_rights(cap_rights_t *rightsp);
+void audit_arg_fcntl_rights(uint32_t fcntlrights);
void audit_sysclose(struct thread *td, int fd);
void audit_cred_copy(struct ucred *src, struct ucred *dest);
void audit_cred_destroy(struct ucred *cred);
@@ -211,6 +212,11 @@ void audit_thread_free(struct thread *td);
audit_arg_groupset((gidset), (gidset_size)); \
} while (0)
+#define AUDIT_ARG_LOGIN(login) do { \
+ if (AUDITING_TD(curthread)) \
+ audit_arg_login((login)); \
+} while (0)
+
#define AUDIT_ARG_MODE(mode) do { \
if (AUDITING_TD(curthread)) \
audit_arg_mode((mode)); \
@@ -241,6 +247,11 @@ void audit_thread_free(struct thread *td);
audit_arg_rights((rights)); \
} while (0)
+#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) do { \
+ if (AUDITING_TD(curthread)) \
+ audit_arg_fcntl_rights((fcntlrights)); \
+} while (0)
+
#define AUDIT_ARG_RUID(ruid) do { \
if (AUDITING_TD(curthread)) \
audit_arg_ruid((ruid)); \
@@ -261,6 +272,11 @@ void audit_thread_free(struct thread *td);
audit_arg_socket((sodomain), (sotype), (soprotocol)); \
} while (0)
+#define AUDIT_ARG_SOCKADDR(td, dirfd, sa) do { \
+ if (AUDITING_TD(curthread)) \
+ audit_arg_sockaddr((td), (dirfd), (sa)); \
+} while (0)
+
#define AUDIT_ARG_SUID(suid) do { \
if (AUDITING_TD(curthread)) \
audit_arg_suid((suid)); \
@@ -343,16 +359,19 @@ void audit_thread_free(struct thread *td);
#define AUDIT_ARG_FFLAGS(fflags)
#define AUDIT_ARG_GID(gid)
#define AUDIT_ARG_GROUPSET(gidset, gidset_size)
+#define AUDIT_ARG_LOGIN(login)
#define AUDIT_ARG_MODE(mode)
#define AUDIT_ARG_OWNER(uid, gid)
#define AUDIT_ARG_PID(pid)
#define AUDIT_ARG_PROCESS(p)
#define AUDIT_ARG_RGID(rgid)
#define AUDIT_ARG_RIGHTS(rights)
+#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights)
#define AUDIT_ARG_RUID(ruid)
#define AUDIT_ARG_SIGNUM(signum)
#define AUDIT_ARG_SGID(sgid)
#define AUDIT_ARG_SOCKET(sodomain, sotype, soprotocol)
+#define AUDIT_ARG_SOCKADDR(td, dirfd, sa)
#define AUDIT_ARG_SUID(suid)
#define AUDIT_ARG_TEXT(text)
#define AUDIT_ARG_UID(uid)
diff --git a/freebsd/sys/security/mac/mac_framework.h b/freebsd/sys/security/mac/mac_framework.h
index 92aedea4..7068d477 100644
--- a/freebsd/sys/security/mac/mac_framework.h
+++ b/freebsd/sys/security/mac/mac_framework.h
@@ -195,8 +195,6 @@ void mac_mount_create(struct ucred *cred, struct mount *mp);
void mac_mount_destroy(struct mount *);
void mac_mount_init(struct mount *);
-void mac_netatalk_aarp_send(struct ifnet *ifp, struct mbuf *m);
-
void mac_netinet_arp_send(struct ifnet *ifp, struct mbuf *m);
void mac_netinet_firewall_reply(struct mbuf *mrecv, struct mbuf *msend);
void mac_netinet_firewall_send(struct mbuf *m);
@@ -243,6 +241,8 @@ int mac_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
int prot, int flags);
int mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd,
accmode_t accmode);
+int mac_posixshm_check_read(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd);
int mac_posixshm_check_setmode(struct ucred *cred, struct shmfd *shmfd,
mode_t mode);
int mac_posixshm_check_setowner(struct ucred *cred, struct shmfd *shmfd,
@@ -252,6 +252,8 @@ int mac_posixshm_check_stat(struct ucred *active_cred,
int mac_posixshm_check_truncate(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd);
int mac_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd);
+int mac_posixshm_check_write(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd);
void mac_posixshm_create(struct ucred *cred, struct shmfd *shmfd);
void mac_posixshm_destroy(struct shmfd *);
void mac_posixshm_init(struct shmfd *);
diff --git a/freebsd/sys/sh/include/machine/in_cksum.h b/freebsd/sys/sh/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/sh/include/machine/in_cksum.h
+++ b/freebsd/sys/sh/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/sparc/include/machine/in_cksum.h b/freebsd/sys/sparc/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/sparc/include/machine/in_cksum.h
+++ b/freebsd/sys/sparc/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/sparc64/include/machine/in_cksum.h b/freebsd/sys/sparc64/include/machine/in_cksum.h
index d5d167f5..b13882e0 100644
--- a/freebsd/sys/sparc64/include/machine/in_cksum.h
+++ b/freebsd/sys/sparc64/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/sys/_bitset.h b/freebsd/sys/sys/_bitset.h
new file mode 100644
index 00000000..2f5301d5
--- /dev/null
+++ b/freebsd/sys/sys/_bitset.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__BITSET_H_
+#define _SYS__BITSET_H_
+
+/*
+ * Macros addressing word and bit within it, tuned to make compiler
+ * optimize cases when SETSIZE fits into single machine word.
+ */
+#define _BITSET_BITS (sizeof(long) * 8)
+
+#define __howmany(x, y) (((x) + ((y) - 1)) / (y))
+
+#define __bitset_words(_s) (__howmany(_s, _BITSET_BITS))
+
+#define BITSET_DEFINE(t, _s) \
+struct t { \
+ long __bits[__bitset_words((_s))]; \
+}
+
+/*
+ * Helper to declare a bitset without it's size being a constant.
+ *
+ * Sadly we cannot declare a bitset struct with '__bits[]', because it's
+ * the only member of the struct and the compiler complains.
+ */
+#define BITSET_DEFINE_VAR(t) BITSET_DEFINE(t, 1)
+
+#endif /* !_SYS__BITSET_H_ */
diff --git a/freebsd/sys/sys/_callout.h b/freebsd/sys/sys/_callout.h
index b8c3ce92..a9134c8d 100644
--- a/freebsd/sys/sys/_callout.h
+++ b/freebsd/sys/sys/_callout.h
@@ -42,19 +42,23 @@
struct lock_object;
-SLIST_HEAD(callout_list, callout);
+LIST_HEAD(callout_list, callout);
+SLIST_HEAD(callout_slist, callout);
TAILQ_HEAD(callout_tailq, callout);
struct callout {
union {
+ LIST_ENTRY(callout) le;
SLIST_ENTRY(callout) sle;
TAILQ_ENTRY(callout) tqe;
} c_links;
- int c_time; /* ticks to the event */
+ sbintime_t c_time; /* ticks to the event */
+ sbintime_t c_precision; /* delta allowed wrt opt */
void *c_arg; /* function argument */
void (*c_func)(void *); /* function to call */
struct lock_object *c_lock; /* lock to handle */
- int c_flags; /* state of this entry */
+ short c_flags; /* User State */
+ short c_iflags; /* Internal State */
volatile int c_cpu; /* CPU we're scheduled on */
};
diff --git a/freebsd/sys/sys/_cpuset.h b/freebsd/sys/sys/_cpuset.h
new file mode 100644
index 00000000..1ddafac2
--- /dev/null
+++ b/freebsd/sys/sys/_cpuset.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__CPUSET_H_
+#define _SYS__CPUSET_H_
+
+#include <sys/_bitset.h>
+
+#ifdef _KERNEL
+#define CPU_SETSIZE MAXCPU
+#endif
+
+#define CPU_MAXSIZE 256
+
+#ifndef CPU_SETSIZE
+#define CPU_SETSIZE CPU_MAXSIZE
+#endif
+
+BITSET_DEFINE(_cpuset, CPU_SETSIZE);
+typedef struct _cpuset cpuset_t;
+
+#endif /* !_SYS__CPUSET_H_ */
diff --git a/freebsd/sys/sys/_mutex.h b/freebsd/sys/sys/_mutex.h
index 2f4a674e..96f53238 100644
--- a/freebsd/sys/sys/_mutex.h
+++ b/freebsd/sys/sys/_mutex.h
@@ -34,8 +34,16 @@
#include <machine/rtems-bsd-mutex.h>
#endif /* __rtems__ */
+#include <machine/param.h>
+
/*
* Sleep/spin mutex.
+ *
+ * All mutex implementations must always have a member called mtx_lock.
+ * Other locking primitive structures are not allowed to use this name
+ * for their members.
+ * If this rule needs to change, the bits in the mutex implementation must
+ * be modified appropriately.
*/
struct mtx {
struct lock_object lock_object; /* Common lock properties. */
@@ -46,4 +54,22 @@ struct mtx {
#endif /* __rtems__ */
};
+/*
+ * Members of struct mtx_padalign must mirror members of struct mtx.
+ * mtx_padalign mutexes can use the mtx(9) API transparently without
+ * modification.
+ * Pad-aligned mutexes used within structures should generally be the
+ * first member of the struct. Otherwise, the compiler can generate
+ * additional padding for the struct to keep a correct alignment for
+ * the mutex.
+ */
+#ifndef __rtems__
+struct mtx_padalign {
+ struct lock_object lock_object; /* Common lock properties. */
+ volatile uintptr_t mtx_lock; /* Owner and flags. */
+} __aligned(CACHE_LINE_SIZE);
+#else /* __rtems__ */
+#define mtx_padalign mtx
+#endif /* __rtems__ */
+
#endif /* !_SYS__MUTEX_H_ */
diff --git a/freebsd/sys/sys/_pctrie.h b/freebsd/sys/sys/_pctrie.h
new file mode 100644
index 00000000..45f69b20
--- /dev/null
+++ b/freebsd/sys/sys/_pctrie.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __SYS_PCTRIE_H_
+#define __SYS_PCTRIE_H_
+
+/*
+ * Radix tree root.
+ */
+struct pctrie {
+ uintptr_t pt_root;
+};
+
+#ifdef _KERNEL
+
+static __inline boolean_t
+pctrie_is_empty(struct pctrie *ptree)
+{
+
+ return (ptree->pt_root == 0);
+}
+
+#endif /* _KERNEL */
+#endif /* !__SYS_PCTRIE_H_ */
diff --git a/freebsd/sys/sys/_rmlock.h b/freebsd/sys/sys/_rmlock.h
index 46672bb2..adc2bc56 100644
--- a/freebsd/sys/sys/_rmlock.h
+++ b/freebsd/sys/sys/_rmlock.h
@@ -32,17 +32,17 @@
#ifndef _SYS__RMLOCK_H_
#define _SYS__RMLOCK_H_
-/*
- * XXXUPS remove as soon as we have per cpu variable
- * linker sets and can define rm_queue in _rm_lock.h
-*/
-#include <sys/pcpu.h>
/*
* Mostly reader/occasional writer lock.
*/
LIST_HEAD(rmpriolist,rm_priotracker);
+struct rm_queue {
+ struct rm_queue *volatile rmq_next;
+ struct rm_queue *volatile rmq_prev;
+};
+
#ifndef __rtems__
struct rmlock {
struct lock_object lock_object;
@@ -59,8 +59,8 @@ struct rmlock {
#define rm_lock_mtx _rm_lock._rm_lock_mtx
#define rm_lock_sx _rm_lock._rm_lock_sx
#else /* __rtems__ */
-#include <sys/rwlock.h>
-#define rmlock rwlock
+#include <sys/_rwlock.h>
+#define rmlock rwlock
#endif /* __rtems__ */
struct rm_priotracker {
diff --git a/freebsd/sys/sys/_rwlock.h b/freebsd/sys/sys/_rwlock.h
index 95b21283..029fb98b 100644
--- a/freebsd/sys/sys/_rwlock.h
+++ b/freebsd/sys/sys/_rwlock.h
@@ -32,8 +32,16 @@
#include <machine/rtems-bsd-mutex.h>
#endif /* __rtems__ */
+#include <machine/param.h>
+
/*
* Reader/writer lock.
+ *
+ * All reader/writer lock implementations must always have a member
+ * called rw_lock. Other locking primitive structures are not allowed to
+ * use this name for their members.
+ * If this rule needs to change, the bits in the reader/writer lock
+ * implementation must be modified appropriately.
*/
struct rwlock {
struct lock_object lock_object;
@@ -44,4 +52,22 @@ struct rwlock {
#endif /* __rtems__ */
};
+#ifndef __rtems__
+/*
+ * Members of struct rwlock_padalign must mirror members of struct rwlock.
+ * rwlock_padalign rwlocks can use the rwlock(9) API transparently without
+ * modification.
+ * Pad-aligned rwlocks used within structures should generally be the
+ * first member of the struct. Otherwise, the compiler can generate
+ * additional padding for the struct to keep a correct alignment for
+ * the rwlock.
+ */
+struct rwlock_padalign {
+ struct lock_object lock_object;
+ volatile uintptr_t rw_lock;
+} __aligned(CACHE_LINE_SIZE);
+#else /* __rtems__ */
+#define rwlock_padalign rwlock
+#endif /* __rtems__ */
+
#endif /* !_SYS__RWLOCK_H_ */
diff --git a/freebsd/sys/sys/_task.h b/freebsd/sys/sys/_task.h
index 11fd1bc0..d3be7198 100644
--- a/freebsd/sys/sys/_task.h
+++ b/freebsd/sys/sys/_task.h
@@ -42,13 +42,32 @@
* (q) taskqueue lock
*/
typedef void task_fn_t(void *context, int pending);
+typedef void gtask_fn_t(void *context);
struct task {
STAILQ_ENTRY(task) ta_link; /* (q) link for queue */
- u_short ta_pending; /* (q) count times queued */
+ uint16_t ta_pending; /* (q) count times queued */
u_short ta_priority; /* (c) Priority */
task_fn_t *ta_func; /* (c) task handler */
void *ta_context; /* (c) argument for handler */
};
+struct gtask {
+ STAILQ_ENTRY(gtask) ta_link; /* (q) link for queue */
+ uint16_t ta_flags; /* (q) state flags */
+ u_short ta_priority; /* (c) Priority */
+ gtask_fn_t *ta_func; /* (c) task handler */
+ void *ta_context; /* (c) argument for handler */
+};
+
+struct grouptask {
+ struct gtask gt_task;
+ void *gt_taskqueue;
+ LIST_ENTRY(grouptask) gt_list;
+ void *gt_uniq;
+ char *gt_name;
+ int16_t gt_irq;
+ int16_t gt_cpu;
+};
+
#endif /* !_SYS__TASK_H_ */
diff --git a/freebsd/sys/sys/_unrhdr.h b/freebsd/sys/sys/_unrhdr.h
new file mode 100644
index 00000000..f3c25d16
--- /dev/null
+++ b/freebsd/sys/sys/_unrhdr.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2004 Poul-Henning Kamp
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_UNRHDR_H
+#define _SYS_UNRHDR_H
+
+#include <sys/queue.h>
+
+struct mtx;
+
+/* Header element for a unr number space. */
+
+struct unrhdr {
+ TAILQ_HEAD(unrhd,unr) head;
+ u_int low; /* Lowest item */
+ u_int high; /* Highest item */
+ u_int busy; /* Count of allocated items */
+ u_int alloc; /* Count of memory allocations */
+ u_int first; /* items in allocated from start */
+ u_int last; /* items free at end */
+ struct mtx *mtx;
+ TAILQ_HEAD(unrfr,unr) ppfree; /* Items to be freed after mtx
+ lock dropped */
+};
+
+#endif
diff --git a/freebsd/sys/sys/acl.h b/freebsd/sys/sys/acl.h
index 77ebf89d..21b3e381 100644
--- a/freebsd/sys/sys/acl.h
+++ b/freebsd/sys/sys/acl.h
@@ -249,11 +249,12 @@ typedef void *acl_t;
#define ACL_ENTRY_INHERIT_ONLY 0x0008
#define ACL_ENTRY_SUCCESSFUL_ACCESS 0x0010
#define ACL_ENTRY_FAILED_ACCESS 0x0020
+#define ACL_ENTRY_INHERITED 0x0080
#define ACL_FLAGS_BITS (ACL_ENTRY_FILE_INHERIT | \
ACL_ENTRY_DIRECTORY_INHERIT | ACL_ENTRY_NO_PROPAGATE_INHERIT | \
ACL_ENTRY_INHERIT_ONLY | ACL_ENTRY_SUCCESSFUL_ACCESS | \
- ACL_ENTRY_FAILED_ACCESS)
+ ACL_ENTRY_FAILED_ACCESS | ACL_ENTRY_INHERITED)
/*
* Undefined value in ae_id field. ae_id should be set to this value
diff --git a/freebsd/sys/sys/aio.h b/freebsd/sys/sys/aio.h
index 02e34a85..ab6f766f 100644
--- a/freebsd/sys/sys/aio.h
+++ b/freebsd/sys/sys/aio.h
@@ -21,6 +21,11 @@
#include <sys/types.h>
#include <sys/signal.h>
+#ifdef _KERNEL
+#include <sys/queue.h>
+#include <sys/event.h>
+#include <sys/signalvar.h>
+#endif
/*
* Returned by aio_cancel:
@@ -37,6 +42,7 @@
#define LIO_READ 0x2
#ifdef _KERNEL
#define LIO_SYNC 0x3
+#define LIO_MLOCK 0x4
#endif
/*
@@ -45,10 +51,30 @@
#define LIO_NOWAIT 0x0
#define LIO_WAIT 0x1
+#ifndef __rtems__
/*
* Maximum number of allowed LIO operations
*/
#define AIO_LISTIO_MAX 16
+#endif /* __rtems__ */
+
+#ifdef _KERNEL
+
+/* Default values of tunables for the AIO worker pool. */
+
+#ifndef MAX_AIO_PROCS
+#define MAX_AIO_PROCS 32
+#endif
+
+#ifndef TARGET_AIO_PROCS
+#define TARGET_AIO_PROCS 4
+#endif
+
+#ifndef AIOD_LIFETIME_DEFAULT
+#define AIOD_LIFETIME_DEFAULT (30 * hz)
+#endif
+
+#endif
/*
* Private members for aiocb -- don't access
@@ -78,7 +104,105 @@ typedef struct aiocb {
#endif
} aiocb_t;
-#ifndef _KERNEL
+#ifdef _KERNEL
+
+typedef void aio_cancel_fn_t(struct kaiocb *);
+typedef void aio_handle_fn_t(struct kaiocb *);
+
+/*
+ * Kernel version of an I/O control block.
+ *
+ * Locking key:
+ * * - need not protected
+ * a - locked by kaioinfo lock
+ * b - locked by backend lock
+ * c - locked by aio_job_mtx
+ */
+struct kaiocb {
+ TAILQ_ENTRY(kaiocb) list; /* (b) backend-specific list of jobs */
+ TAILQ_ENTRY(kaiocb) plist; /* (a) lists of pending / done jobs */
+ TAILQ_ENTRY(kaiocb) allist; /* (a) list of all jobs in proc */
+ int jobflags; /* (a) job flags */
+ int inblock; /* (*) input blocks */
+ int outblock; /* (*) output blocks */
+ int msgsnd; /* (*) messages sent */
+ int msgrcv; /* (*) messages received */
+ struct proc *userproc; /* (*) user process */
+ struct ucred *cred; /* (*) active credential when created */
+ struct file *fd_file; /* (*) pointer to file structure */
+ struct aioliojob *lio; /* (*) optional lio job */
+ struct aiocb *ujob; /* (*) pointer in userspace of aiocb */
+ struct knlist klist; /* (a) list of knotes */
+ struct aiocb uaiocb; /* (*) copy of user I/O control block */
+ ksiginfo_t ksi; /* (a) realtime signal info */
+ uint64_t seqno; /* (*) job number */
+ aio_cancel_fn_t *cancel_fn; /* (a) backend cancel function */
+ aio_handle_fn_t *handle_fn; /* (c) backend handle function */
+ union { /* Backend-specific data fields */
+ struct { /* BIO backend */
+ struct bio *bp; /* (*) BIO pointer */
+ struct buf *pbuf; /* (*) buffer pointer */
+ struct vm_page *pages[btoc(MAXPHYS)+1]; /* (*) */
+ int npages; /* (*) number of pages */
+ };
+ struct { /* fsync() requests */
+ int pending; /* (a) number of pending I/O */
+ };
+ struct {
+ void *backend1;
+ void *backend2;
+ long backend3;
+ int backend4;
+ };
+ };
+};
+
+struct socket;
+struct sockbuf;
+
+/*
+ * AIO backends should permit cancellation of queued requests waiting to
+ * be serviced by installing a cancel routine while the request is
+ * queued. The cancellation routine should dequeue the request if
+ * necessary and cancel it. Care must be used to handle races between
+ * queueing and dequeueing requests and cancellation.
+ *
+ * When queueing a request somewhere such that it can be cancelled, the
+ * caller should:
+ *
+ * 1) Acquire lock that protects the associated queue.
+ * 2) Call aio_set_cancel_function() to install the cancel routine.
+ * 3) If that fails, the request has a pending cancel and should be
+ * cancelled via aio_cancel().
+ * 4) Queue the request.
+ *
+ * When dequeueing a request to service it or hand it off to somewhere else,
+ * the caller should:
+ *
+ * 1) Acquire the lock that protects the associated queue.
+ * 2) Dequeue the request.
+ * 3) Call aio_clear_cancel_function() to clear the cancel routine.
+ * 4) If that fails, the cancel routine is about to be called. The
+ * caller should ignore the request.
+ *
+ * The cancel routine should:
+ *
+ * 1) Acquire the lock that protects the associated queue.
+ * 2) Call aio_cancel_cleared() to determine if the request is already
+ * dequeued due to a race with dequeueing thread.
+ * 3) If that fails, dequeue the request.
+ * 4) Cancel the request via aio_cancel().
+ */
+
+bool aio_cancel_cleared(struct kaiocb *job);
+void aio_cancel(struct kaiocb *job);
+bool aio_clear_cancel_function(struct kaiocb *job);
+void aio_complete(struct kaiocb *job, long status, int error);
+void aio_schedule(struct kaiocb *job, aio_handle_fn_t *func);
+bool aio_set_cancel_function(struct kaiocb *job, aio_cancel_fn_t *func);
+void aio_switch_vmspace(struct kaiocb *job);
+
+#else /* !_KERNEL */
struct timespec;
@@ -99,7 +223,8 @@ int aio_write(struct aiocb *);
* "acb_list" is an array of "nacb_listent" I/O control blocks.
* when all I/Os are complete, the optional signal "sig" is sent.
*/
-int lio_listio(int, struct aiocb * const [], int, struct sigevent *);
+int lio_listio(int, struct aiocb *__restrict const *__restrict, int,
+ struct sigevent *);
/*
* Get completion status
@@ -126,21 +251,18 @@ int aio_cancel(int, struct aiocb *);
*/
int aio_suspend(const struct aiocb * const[], int, const struct timespec *);
+/*
+ * Asynchronous mlock
+ */
+int aio_mlock(struct aiocb *);
+
#ifdef __BSD_VISIBLE
-int aio_waitcomplete(struct aiocb **, struct timespec *);
+ssize_t aio_waitcomplete(struct aiocb **, struct timespec *);
#endif
int aio_fsync(int op, struct aiocb *aiocbp);
__END_DECLS
-#else
+#endif /* !_KERNEL */
-/* Forward declarations for prototypes below. */
-struct socket;
-struct sockbuf;
-
-extern void (*aio_swake)(struct socket *, struct sockbuf *);
-
-#endif
-
-#endif
+#endif /* !_SYS_AIO_H_ */
diff --git a/freebsd/sys/sys/ata.h b/freebsd/sys/sys/ata.h
index f46dd50c..72104140 100644
--- a/freebsd/sys/sys/ata.h
+++ b/freebsd/sys/sys/ata.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
+ * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -105,6 +105,10 @@ struct ata_params {
/*069*/ u_int16_t support3;
#define ATA_SUPPORT_RZAT 0x0020
#define ATA_SUPPORT_DRAT 0x4000
+#define ATA_SUPPORT_ZONE_MASK 0x0003
+#define ATA_SUPPORT_ZONE_NR 0x0000
+#define ATA_SUPPORT_ZONE_HOST_AWARE 0x0001
+#define ATA_SUPPORT_ZONE_DEV_MANAGED 0x0002
u_int16_t reserved70;
/*071*/ u_int16_t rlsovlap; /* rel time (us) for overlap */
/*072*/ u_int16_t rlsservice; /* rel time (us) for service */
@@ -228,7 +232,14 @@ struct ata_params {
#define ATA_SUPPORT_RWLOGDMAEXT 0x0008
#define ATA_SUPPORT_MICROCODE3 0x0010
#define ATA_SUPPORT_FREEFALL 0x0020
+#define ATA_SUPPORT_SENSE_REPORT 0x0040
+#define ATA_SUPPORT_EPC 0x0080
/*120*/ u_int16_t enabled2;
+#define ATA_ENABLED_WRITEREADVERIFY 0x0002
+#define ATA_ENABLED_WRITEUNCORREXT 0x0004
+#define ATA_ENABLED_FREEFALL 0x0020
+#define ATA_ENABLED_SENSE_REPORT 0x0040
+#define ATA_ENABLED_EPC 0x0080
u_int16_t reserved121[6];
/*127*/ u_int16_t removable_status;
/*128*/ u_int16_t security_status;
@@ -262,6 +273,8 @@ struct ata_params {
/*215*/ u_int16_t nv_cache_size_1;
u_int16_t nv_cache_size_2;
/*217*/ u_int16_t media_rotation_rate;
+#define ATA_RATE_NOT_REPORTED 0x0000
+#define ATA_RATE_NON_ROTATING 0x0001
u_int16_t reserved218;
/*219*/ u_int16_t nv_cache_opt;
/*220*/ u_int16_t wrv_mode;
@@ -296,8 +309,14 @@ struct ata_params {
#define ATA_MAX_28BIT_LBA 268435455UL
/* ATA Status Register */
-#define ATA_STATUS_ERROR 0x01
-#define ATA_STATUS_DEVICE_FAULT 0x20
+#define ATA_STATUS_ERROR 0x01
+#define ATA_STATUS_SENSE_AVAIL 0x02
+#define ATA_STATUS_ALIGN_ERR 0x04
+#define ATA_STATUS_DATA_REQ 0x08
+#define ATA_STATUS_DEF_WRITE_ERR 0x10
+#define ATA_STATUS_DEVICE_FAULT 0x20
+#define ATA_STATUS_DEVICE_READY 0x40
+#define ATA_STATUS_BUSY 0x80
/* ATA Error Register */
#define ATA_ERROR_ABORT 0x04
@@ -333,6 +352,7 @@ struct ata_params {
#define ATA_UDMA6 0x46
#define ATA_SA150 0x47
#define ATA_SA300 0x48
+#define ATA_SA600 0x49
#define ATA_DMA_MAX 0x4f
@@ -365,13 +385,36 @@ struct ata_params {
#define ATA_WRITE_LOG_EXT 0x3f
#define ATA_READ_VERIFY 0x40
#define ATA_READ_VERIFY48 0x42
+#define ATA_WRITE_UNCORRECTABLE48 0x45 /* write uncorrectable 48bit LBA */
+#define ATA_WU_PSEUDO 0x55 /* pseudo-uncorrectable error */
+#define ATA_WU_FLAGGED 0xaa /* flagged-uncorrectable error */
#define ATA_READ_LOG_DMA_EXT 0x47 /* read log DMA ext - PIO Data-In */
+#define ATA_ZAC_MANAGEMENT_IN 0x4a /* ZAC management in */
+#define ATA_ZM_REPORT_ZONES 0x00 /* report zones */
#define ATA_READ_FPDMA_QUEUED 0x60 /* read DMA NCQ */
#define ATA_WRITE_FPDMA_QUEUED 0x61 /* write DMA NCQ */
+#define ATA_NCQ_NON_DATA 0x63 /* NCQ non-data command */
+#define ATA_ABORT_NCQ_QUEUE 0x00 /* abort NCQ queue */
+#define ATA_DEADLINE_HANDLING 0x01 /* deadline handling */
+#define ATA_SET_FEATURES 0x05 /* set features */
+#define ATA_ZERO_EXT 0x06 /* zero ext */
+#define ATA_NCQ_ZAC_MGMT_OUT 0x07 /* NCQ ZAC mgmt out no data */
#define ATA_SEND_FPDMA_QUEUED 0x64 /* send DMA NCQ */
-#define ATA_RECV_FPDMA_QUEUED 0x65 /* recieve DMA NCQ */
+#define ATA_SFPDMA_DSM 0x00 /* Data set management */
+#define ATA_SFPDMA_DSM_TRIM 0x01 /* Set trim bit in auxiliary */
+#define ATA_SFPDMA_HYBRID_EVICT 0x01 /* Hybrid Evict */
+#define ATA_SFPDMA_WLDMA 0x02 /* Write Log DMA EXT */
+#define ATA_SFPDMA_ZAC_MGMT_OUT 0x03 /* NCQ ZAC mgmt out w/data */
+#define ATA_RECV_FPDMA_QUEUED 0x65 /* receive DMA NCQ */
+#define ATA_RFPDMA_RL_DMA_EXT 0x00 /* Read Log DMA EXT */
+#define ATA_RFPDMA_ZAC_MGMT_IN 0x02 /* NCQ ZAC mgmt in w/data */
#define ATA_SEP_ATTN 0x67 /* SEP request */
#define ATA_SEEK 0x70 /* seek */
+#define ATA_ZAC_MANAGEMENT_OUT 0x9f /* ZAC management out */
+#define ATA_ZM_CLOSE_ZONE 0x01 /* close zone */
+#define ATA_ZM_FINISH_ZONE 0x02 /* finish zone */
+#define ATA_ZM_OPEN_ZONE 0x03 /* open zone */
+#define ATA_ZM_RWP 0x04 /* reset write pointer */
#define ATA_PACKET_CMD 0xa0 /* packet command */
#define ATA_ATAPI_IDENTIFY 0xa1 /* get ATAPI params*/
#define ATA_SERVICE 0xa2 /* service command */
@@ -391,24 +434,36 @@ struct ata_params {
#define ATA_IDLE_CMD 0xe3 /* idle */
#define ATA_READ_BUFFER 0xe4 /* read buffer */
#define ATA_READ_PM 0xe4 /* read portmultiplier */
+#define ATA_CHECK_POWER_MODE 0xe5 /* device power mode */
#define ATA_SLEEP 0xe6 /* sleep */
#define ATA_FLUSHCACHE 0xe7 /* flush cache to disk */
#define ATA_WRITE_PM 0xe8 /* write portmultiplier */
#define ATA_FLUSHCACHE48 0xea /* flush cache to disk */
#define ATA_ATA_IDENTIFY 0xec /* get ATA params */
#define ATA_SETFEATURES 0xef /* features command */
-#define ATA_SF_SETXFER 0x03 /* set transfer mode */
#define ATA_SF_ENAB_WCACHE 0x02 /* enable write cache */
#define ATA_SF_DIS_WCACHE 0x82 /* disable write cache */
+#define ATA_SF_SETXFER 0x03 /* set transfer mode */
+#define ATA_SF_APM 0x05 /* Enable APM feature set */
#define ATA_SF_ENAB_PUIS 0x06 /* enable PUIS */
#define ATA_SF_DIS_PUIS 0x86 /* disable PUIS */
#define ATA_SF_PUIS_SPINUP 0x07 /* PUIS spin-up */
+#define ATA_SF_WRV 0x0b /* Enable Write-Read-Verify */
+#define ATA_SF_DLC 0x0c /* Enable device life control */
+#define ATA_SF_SATA 0x10 /* Enable use of SATA feature */
+#define ATA_SF_FFC 0x41 /* Free-fall Control */
+#define ATA_SF_MHIST 0x43 /* Set Max Host Sect. Times */
+#define ATA_SF_RATE 0x45 /* Set Rate Basis */
+#define ATA_SF_EPC 0x4A /* Extended Power Conditions */
#define ATA_SF_ENAB_RCACHE 0xaa /* enable readahead cache */
#define ATA_SF_DIS_RCACHE 0x55 /* disable readahead cache */
#define ATA_SF_ENAB_RELIRQ 0x5d /* enable release interrupt */
#define ATA_SF_DIS_RELIRQ 0xdd /* disable release interrupt */
#define ATA_SF_ENAB_SRVIRQ 0x5e /* enable service interrupt */
#define ATA_SF_DIS_SRVIRQ 0xde /* disable service interrupt */
+#define ATA_SF_LPSAERC 0x62 /* Long Phys Sect Align ErrRep*/
+#define ATA_SF_DSN 0x63 /* Device Stats Notification */
+#define ATA_CHECK_POWER_MODE 0xe5 /* Check Power Mode */
#define ATA_SECURITY_SET_PASSWORD 0xf1 /* set drive password */
#define ATA_SECURITY_UNLOCK 0xf2 /* unlock drive using passwd */
#define ATA_SECURITY_ERASE_PREPARE 0xf3 /* prepare to erase drive */
@@ -535,6 +590,333 @@ struct atapi_sense {
u_int8_t specific2; /* sense key specific */
} __packed;
+/*
+ * SET FEATURES subcommands
+ */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * These values go in the LBA 3:0.
+ */
+#define ATA_SF_EPC_RESTORE 0x00 /* Restore Power Condition Settings */
+#define ATA_SF_EPC_GOTO 0x01 /* Go To Power Condition */
+#define ATA_SF_EPC_SET_TIMER 0x02 /* Set Power Condition Timer */
+#define ATA_SF_EPC_SET_STATE 0x03 /* Set Power Condition State */
+#define ATA_SF_EPC_ENABLE 0x04 /* Enable the EPC feature set */
+#define ATA_SF_EPC_DISABLE 0x05 /* Disable the EPC feature set */
+#define ATA_SF_EPC_SET_SOURCE 0x06 /* Set EPC Power Source */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Power Condition ID field
+ * These values go in the count register.
+ */
+#define ATA_EPC_STANDBY_Z 0x00 /* Substate of PM2:Standby */
+#define ATA_EPC_STANDBY_Y 0x01 /* Substate of PM2:Standby */
+#define ATA_EPC_IDLE_A 0x81 /* Substate of PM1:Idle */
+#define ATA_EPC_IDLE_B 0x82 /* Substate of PM1:Idle */
+#define ATA_EPC_IDLE_C 0x83 /* Substate of PM1:Idle */
+#define ATA_EPC_ALL 0xff /* All supported power conditions */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Restore Power Conditions Settings subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_RST_DFLT 0x40 /* 1=Rst from Default, 0= from Saved */
+#define ATA_SF_EPC_RST_SAVE 0x10 /* 1=Save on completion */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Got To Power Condition subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_GOTO_DELAY 0x02000000 /* Delayed entry bit */
+#define ATA_SF_EPC_GOTO_HOLD 0x01000000 /* Hold Power Cond bit */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Set Power Condition Timer subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_TIMER_MASK 0x00ffff00 /* Timer field */
+#define ATA_SF_EPC_TIMER_SHIFT 8
+#define ATA_SF_EPC_TIMER_SEC 0x00000080 /* Timer units, 1=sec, 0=.1s */
+#define ATA_SF_EPC_TIMER_EN 0x00000020 /* Enable/disable cond. */
+#define ATA_SF_EPC_TIMER_SAVE 0x00000010 /* Save settings on comp. */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Set Power Condition State subcommand
+ * These values go in the LBA register.
+ */
+#define ATA_SF_EPC_SETCON_EN 0x00000020 /* Enable power cond. */
+#define ATA_SF_EPC_SETCON_SAVE 0x00000010 /* Save settings on comp */
+
+/*
+ * SET FEATURES command
+ * Extended Power Conditions subcommand -- ATA_SF_EPC (0x4A)
+ * Set EPC Power Source subcommand
+ * These values go in the count register.
+ */
+#define ATA_SF_EPC_SRC_UNKNOWN 0x0000 /* Unknown source */
+#define ATA_SF_EPC_SRC_BAT 0x0001 /* battery source */
+#define ATA_SF_EPC_SRC_NOT_BAT 0x0002 /* not battery source */
+
+#define ATA_LOG_DIRECTORY 0x00 /* Directory of all logs */
+#define ATA_POWER_COND_LOG 0x08 /* Power Conditions Log */
+#define ATA_PCL_IDLE 0x00 /* Idle Power Conditions Page */
+#define ATA_PCL_STANDBY 0x01 /* Standby Power Conditions Page */
+#define ATA_IDENTIFY_DATA_LOG 0x30 /* Identify Device Data Log */
+#define ATA_IDL_PAGE_LIST 0x00 /* List of supported pages */
+#define ATA_IDL_IDENTIFY_DATA 0x01 /* Copy of Identify Device data */
+#define ATA_IDL_CAPACITY 0x02 /* Capacity */
+#define ATA_IDL_SUP_CAP 0x03 /* Supported Capabilities */
+#define ATA_IDL_CUR_SETTINGS 0x04 /* Current Settings */
+#define ATA_IDL_ATA_STRINGS 0x05 /* ATA Strings */
+#define ATA_IDL_SECURITY 0x06 /* Security */
+#define ATA_IDL_PARALLEL_ATA 0x07 /* Parallel ATA */
+#define ATA_IDL_SERIAL_ATA 0x08 /* Seiral ATA */
+#define ATA_IDL_ZDI 0x09 /* Zoned Device Information */
+
+struct ata_gp_log_dir {
+ uint8_t header[2];
+#define ATA_GP_LOG_DIR_VERSION 0x0001
+ uint8_t num_pages[255*2]; /* Number of log pages at address */
+};
+
+/*
+ * ATA Power Conditions log descriptor
+ */
+struct ata_power_cond_log_desc {
+ uint8_t reserved1;
+ uint8_t flags;
+#define ATA_PCL_COND_SUPPORTED 0x80
+#define ATA_PCL_COND_SAVEABLE 0x40
+#define ATA_PCL_COND_CHANGEABLE 0x20
+#define ATA_PCL_DEFAULT_TIMER_EN 0x10
+#define ATA_PCL_SAVED_TIMER_EN 0x08
+#define ATA_PCL_CURRENT_TIMER_EN 0x04
+#define ATA_PCL_HOLD_PC_NOT_SUP 0x02
+ uint8_t reserved2[2];
+ uint8_t default_timer[4];
+ uint8_t saved_timer[4];
+ uint8_t current_timer[4];
+ uint8_t nom_time_to_active[4];
+ uint8_t min_timer[4];
+ uint8_t max_timer[4];
+ uint8_t num_transitions_to_pc[4];
+ uint8_t hours_in_pc[4];
+ uint8_t reserved3[28];
+};
+
+/*
+ * ATA Power Conditions Log (0x08), Idle power conditions page (0x00)
+ */
+struct ata_power_cond_log_idle {
+ struct ata_power_cond_log_desc idle_a_desc;
+ struct ata_power_cond_log_desc idle_b_desc;
+ struct ata_power_cond_log_desc idle_c_desc;
+ uint8_t reserved[320];
+};
+
+/*
+ * ATA Power Conditions Log (0x08), Standby power conditions page (0x01)
+ */
+struct ata_power_cond_log_standby {
+ uint8_t reserved[384];
+ struct ata_power_cond_log_desc standby_y_desc;
+ struct ata_power_cond_log_desc standby_z_desc;
+};
+
+/*
+ * ATA IDENTIFY DEVICE data log (0x30) page 0x00
+ * List of Supported IDENTIFY DEVICE data pages.
+ */
+struct ata_identify_log_pages {
+ uint8_t header[8];
+#define ATA_IDLOG_REVISION 0x0000000000000001
+ uint8_t entry_count;
+ uint8_t entries[503];
+};
+
+/*
+ * ATA IDENTIFY DEVICE data log (0x30)
+ * Capacity (Page 0x02).
+ */
+struct ata_identify_log_capacity {
+ uint8_t header[8];
+#define ATA_CAP_HEADER_VALID 0x8000000000000000
+#define ATA_CAP_PAGE_NUM_MASK 0x0000000000ff0000
+#define ATA_CAP_PAGE_NUM_SHIFT 16
+#define ATA_CAP_REV_MASK 0x00000000000000ff
+ uint8_t capacity[8];
+#define ATA_CAP_CAPACITY_VALID 0x8000000000000000
+#define ATA_CAP_ACCESSIBLE_CAP 0x0000ffffffffffff
+ uint8_t phys_logical_sect_size[8];
+#define ATA_CAP_PL_VALID 0x8000000000000000
+#define ATA_CAP_LTOP_REL_SUP 0x4000000000000000
+#define ATA_CAP_LOG_SECT_SUP 0x2000000000000000
+#define ATA_CAP_ALIGN_ERR_MASK 0x0000000000300000
+#define ATA_CAP_LTOP_MASK 0x00000000000f0000
+#define ATA_CAP_LOG_SECT_OFF 0x000000000000ffff
+ uint8_t logical_sect_size[8];
+#define ATA_CAP_LOG_SECT_VALID 0x8000000000000000
+#define ATA_CAP_LOG_SECT_SIZE 0x00000000ffffffff
+ uint8_t nominal_buffer_size[8];
+#define ATA_CAP_NOM_BUF_VALID 0x8000000000000000
+#define ATA_CAP_NOM_BUF_SIZE 0x7fffffffffffffff
+ uint8_t reserved[472];
+};
+
+/*
+ * ATA IDENTIFY DEVICE data log (0x30)
+ * Supported Capabilities (Page 0x03).
+ */
+
+struct ata_identify_log_sup_cap {
+ uint8_t header[8];
+#define ATA_SUP_CAP_HEADER_VALID 0x8000000000000000
+#define ATA_SUP_CAP_PAGE_NUM_MASK 0x0000000000ff0000
+#define ATA_SUP_CAP_PAGE_NUM_SHIFT 16
+#define ATA_SUP_CAP_REV_MASK 0x00000000000000ff
+ uint8_t sup_cap[8];
+#define ATA_SUP_CAP_VALID 0x8000000000000000
+#define ATA_SC_SET_SECT_CONFIG_SUP 0x0002000000000000 /* Set Sect Conf*/
+#define ATA_SC_ZERO_EXT_SUP 0x0001000000000000 /* Zero EXT */
+#define ATA_SC_SUCC_NCQ_SENSE_SUP 0x0000800000000000 /* Succ. NCQ Sns */
+#define ATA_SC_DLC_SUP 0x0000400000000000 /* DLC */
+#define ATA_SC_RQSN_DEV_FAULT_SUP 0x0000200000000000 /* Req Sns Dev Flt*/
+#define ATA_SC_DSN_SUP 0x0000100000000000 /* DSN */
+#define ATA_SC_LP_STANDBY_SUP 0x0000080000000000 /* LP Standby */
+#define ATA_SC_SET_EPC_PS_SUP 0x0000040000000000 /* Set EPC PS */
+#define ATA_SC_AMAX_ADDR_SUP 0x0000020000000000 /* AMAX Addr */
+#define ATA_SC_DRAT_SUP 0x0000008000000000 /* DRAT */
+#define ATA_SC_LPS_MISALGN_SUP 0x0000004000000000 /* LPS Misalign */
+#define ATA_SC_RB_DMA_SUP 0x0000001000000000 /* Read Buf DMA */
+#define ATA_SC_WB_DMA_SUP 0x0000000800000000 /* Write Buf DMA */
+#define ATA_SC_DNLD_MC_DMA_SUP 0x0000000200000000 /* DL MCode DMA */
+#define ATA_SC_28BIT_SUP 0x0000000100000000 /* 28-bit */
+#define ATA_SC_RZAT_SUP 0x0000000080000000 /* RZAT */
+#define ATA_SC_NOP_SUP 0x0000000020000000 /* NOP */
+#define ATA_SC_READ_BUFFER_SUP 0x0000000010000000 /* Read Buffer */
+#define ATA_SC_WRITE_BUFFER_SUP 0x0000000008000000 /* Write Buffer */
+#define ATA_SC_READ_LOOK_AHEAD_SUP 0x0000000002000000 /* Read Look-Ahead*/
+#define ATA_SC_VOLATILE_WC_SUP 0x0000000001000000 /* Volatile WC */
+#define ATA_SC_SMART_SUP 0x0000000000800000 /* SMART */
+#define ATA_SC_FLUSH_CACHE_EXT_SUP 0x0000000000400000 /* Flush Cache Ext */
+#define ATA_SC_48BIT_SUP 0x0000000000100000 /* 48-Bit */
+#define ATA_SC_SPINUP_SUP 0x0000000000040000 /* Spin-Up */
+#define ATA_SC_PUIS_SUP 0x0000000000020000 /* PUIS */
+#define ATA_SC_APM_SUP 0x0000000000010000 /* APM */
+#define ATA_SC_DL_MICROCODE_SUP 0x0000000000004000 /* DL Microcode */
+#define ATA_SC_UNLOAD_SUP 0x0000000000002000 /* Unload */
+#define ATA_SC_WRITE_FUA_EXT_SUP 0x0000000000001000 /* Write FUA EXT */
+#define ATA_SC_GPL_SUP 0x0000000000000800 /* GPL */
+#define ATA_SC_STREAMING_SUP 0x0000000000000400 /* Streaming */
+#define ATA_SC_SMART_SELFTEST_SUP 0x0000000000000100 /* SMART self-test */
+#define ATA_SC_SMART_ERR_LOG_SUP 0x0000000000000080 /* SMART Err Log */
+#define ATA_SC_EPC_SUP 0x0000000000000040 /* EPC */
+#define ATA_SC_SENSE_SUP 0x0000000000000020 /* Sense data */
+#define ATA_SC_FREEFALL_SUP 0x0000000000000010 /* Free-Fall */
+#define ATA_SC_DM_MODE3_SUP 0x0000000000000008 /* DM Mode 3 */
+#define ATA_SC_GPL_DMA_SUP 0x0000000000000004 /* GPL DMA */
+#define ATA_SC_WRITE_UNCOR_SUP 0x0000000000000002 /* Write uncorr. */
+#define ATA_SC_WRV_SUP 0x0000000000000001 /* WRV */
+ uint8_t download_code_cap[8];
+#define ATA_DL_CODE_VALID 0x8000000000000000
+#define ATA_DLC_DM_OFFSETS_DEFER_SUP 0x0000000400000000
+#define ATA_DLC_DM_IMMED_SUP 0x0000000200000000
+#define ATA_DLC_DM_OFF_IMMED_SUP 0x0000000100000000
+#define ATA_DLC_DM_MAX_XFER_SIZE_MASK 0x00000000ffff0000
+#define ATA_DLC_DM_MAX_XFER_SIZE_SHIFT 16
+#define ATA_DLC_DM_MIN_XFER_SIZE_MASK 0x000000000000ffff
+ uint8_t nom_media_rotation_rate[8];
+#define ATA_NOM_MEDIA_ROTATION_VALID 0x8000000000000000
+#define ATA_ROTATION_MASK 0x000000000000ffff
+ uint8_t form_factor[8];
+#define ATA_FORM_FACTOR_VALID 0x8000000000000000
+#define ATA_FF_MASK 0x000000000000000f
+#define ATA_FF_NOT_REPORTED 0x0000000000000000 /* Not reported */
+#define ATA_FF_525_IN 0x0000000000000001 /* 5.25 inch */
+#define ATA_FF_35_IN 0x0000000000000002 /* 3.5 inch */
+#define ATA_FF_25_IN 0x0000000000000003 /* 2.5 inch */
+#define ATA_FF_18_IN 0x0000000000000004 /* 1.8 inch */
+#define ATA_FF_LT_18_IN 0x0000000000000005 /* < 1.8 inch */
+#define ATA_FF_MSATA 0x0000000000000006 /* mSATA */
+#define ATA_FF_M2 0x0000000000000007 /* M.2 */
+#define ATA_FF_MICROSSD 0x0000000000000008 /* MicroSSD */
+#define ATA_FF_CFAST 0x0000000000000009 /* CFast */
+ uint8_t wrv_sec_cnt_mode3[8];
+#define ATA_WRV_MODE3_VALID 0x8000000000000000
+#define ATA_WRV_MODE3_COUNT 0x00000000ffffffff
+ uint8_t wrv_sec_cnt_mode2[8];
+#define ATA_WRV_MODE2_VALID 0x8000000000000000
+#define ATA_WRV_MODE2_COUNT 0x00000000ffffffff
+ uint8_t wwn[16];
+ /* XXX KDM need to figure out how to handle 128-bit fields */
+ uint8_t dsm[8];
+#define ATA_DSM_VALID 0x8000000000000000
+#define ATA_LB_MARKUP_SUP 0x000000000000ff00
+#define ATA_TRIM_SUP 0x0000000000000001
+ uint8_t util_per_unit_time[16];
+ /* XXX KDM need to figure out how to handle 128-bit fields */
+ uint8_t util_usage_rate_sup[8];
+#define ATA_UTIL_USAGE_RATE_VALID 0x8000000000000000
+#define ATA_SETTING_RATE_SUP 0x0000000000800000
+#define ATA_SINCE_POWERON_SUP 0x0000000000000100
+#define ATA_POH_RATE_SUP 0x0000000000000010
+#define ATA_DATE_TIME_RATE_SUP 0x0000000000000001
+ uint8_t zoned_cap[8];
+#define ATA_ZONED_VALID 0x8000000000000000
+#define ATA_ZONED_MASK 0x0000000000000003
+ uint8_t sup_zac_cap[8];
+#define ATA_SUP_ZAC_CAP_VALID 0x8000000000000000
+#define ATA_ND_RWP_SUP 0x0000000000000010 /* Reset Write Ptr*/
+#define ATA_ND_FINISH_ZONE_SUP 0x0000000000000008 /* Finish Zone */
+#define ATA_ND_CLOSE_ZONE_SUP 0x0000000000000004 /* Close Zone */
+#define ATA_ND_OPEN_ZONE_SUP 0x0000000000000002 /* Open Zone */
+#define ATA_REPORT_ZONES_SUP 0x0000000000000001 /* Report Zones */
+ uint8_t reserved[392];
+};
+
+/*
+ * ATA Identify Device Data Log Zoned Device Information Page (0x09).
+ * Current as of ZAC r04a, August 25, 2015.
+ */
+struct ata_zoned_info_log {
+ uint8_t header[8];
+#define ATA_ZDI_HEADER_VALID 0x8000000000000000
+#define ATA_ZDI_PAGE_NUM_MASK 0x0000000000ff0000
+#define ATA_ZDI_PAGE_NUM_SHIFT 16
+#define ATA_ZDI_REV_MASK 0x00000000000000ff
+ uint8_t zoned_cap[8];
+#define ATA_ZDI_CAP_VALID 0x8000000000000000
+#define ATA_ZDI_CAP_URSWRZ 0x0000000000000001
+ uint8_t zoned_settings[8];
+#define ATA_ZDI_SETTINGS_VALID 0x8000000000000000
+ uint8_t optimal_seq_zones[8];
+#define ATA_ZDI_OPT_SEQ_VALID 0x8000000000000000
+#define ATA_ZDI_OPT_SEQ_MASK 0x00000000ffffffff
+ uint8_t optimal_nonseq_zones[8];
+#define ATA_ZDI_OPT_NS_VALID 0x8000000000000000
+#define ATA_ZDI_OPT_NS_MASK 0x00000000ffffffff
+ uint8_t max_seq_req_zones[8];
+#define ATA_ZDI_MAX_SEQ_VALID 0x8000000000000000
+#define ATA_ZDI_MAX_SEQ_MASK 0x00000000ffffffff
+ uint8_t version_info[8];
+#define ATA_ZDI_VER_VALID 0x8000000000000000
+#define ATA_ZDI_VER_ZAC_SUP 0x0100000000000000
+#define ATA_ZDI_VER_ZAC_MASK 0x00000000000000ff
+ uint8_t reserved[456];
+};
+
struct ata_ioc_request {
union {
struct {
diff --git a/freebsd/sys/sys/bitset.h b/freebsd/sys/sys/bitset.h
new file mode 100644
index 00000000..723c39b0
--- /dev/null
+++ b/freebsd/sys/sys/bitset.h
@@ -0,0 +1,208 @@
+/*-
+ * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_BITSET_H_
+#define _SYS_BITSET_H_
+
+#define __bitset_mask(_s, n) \
+ (1L << ((__bitset_words((_s)) == 1) ? \
+ (__size_t)(n) : ((n) % _BITSET_BITS)))
+
+#define __bitset_word(_s, n) \
+ ((__bitset_words((_s)) == 1) ? 0 : ((n) / _BITSET_BITS))
+
+#define BIT_CLR(_s, n, p) \
+ ((p)->__bits[__bitset_word(_s, n)] &= ~__bitset_mask((_s), (n)))
+
+#define BIT_COPY(_s, f, t) (void)(*(t) = *(f))
+
+#define BIT_ISSET(_s, n, p) \
+ ((((p)->__bits[__bitset_word(_s, n)] & __bitset_mask((_s), (n))) != 0))
+
+#define BIT_SET(_s, n, p) \
+ ((p)->__bits[__bitset_word(_s, n)] |= __bitset_mask((_s), (n)))
+
+#define BIT_ZERO(_s, p) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ (p)->__bits[__i] = 0L; \
+} while (0)
+
+#define BIT_FILL(_s, p) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ (p)->__bits[__i] = -1L; \
+} while (0)
+
+#define BIT_SETOF(_s, n, p) do { \
+ BIT_ZERO(_s, p); \
+ (p)->__bits[__bitset_word(_s, n)] = __bitset_mask((_s), (n)); \
+} while (0)
+
+/* Is p empty. */
+#define BIT_EMPTY(_s, p) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ if ((p)->__bits[__i]) \
+ break; \
+ __i == __bitset_words((_s)); \
+})
+
+/* Is p full set. */
+#define BIT_ISFULLSET(_s, p) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ if ((p)->__bits[__i] != (long)-1) \
+ break; \
+ __i == __bitset_words((_s)); \
+})
+
+/* Is c a subset of p. */
+#define BIT_SUBSET(_s, p, c) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ if (((c)->__bits[__i] & \
+ (p)->__bits[__i]) != \
+ (c)->__bits[__i]) \
+ break; \
+ __i == __bitset_words((_s)); \
+})
+
+/* Are there any common bits between b & c? */
+#define BIT_OVERLAP(_s, p, c) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ if (((c)->__bits[__i] & \
+ (p)->__bits[__i]) != 0) \
+ break; \
+ __i != __bitset_words((_s)); \
+})
+
+/* Compare two sets, returns 0 if equal 1 otherwise. */
+#define BIT_CMP(_s, p, c) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ if (((c)->__bits[__i] != \
+ (p)->__bits[__i])) \
+ break; \
+ __i != __bitset_words((_s)); \
+})
+
+#define BIT_OR(_s, d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ (d)->__bits[__i] |= (s)->__bits[__i]; \
+} while (0)
+
+#define BIT_AND(_s, d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ (d)->__bits[__i] &= (s)->__bits[__i]; \
+} while (0)
+
+#define BIT_NAND(_s, d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ (d)->__bits[__i] &= ~(s)->__bits[__i]; \
+} while (0)
+
+#define BIT_CLR_ATOMIC(_s, n, p) \
+ atomic_clear_long(&(p)->__bits[__bitset_word(_s, n)], \
+ __bitset_mask((_s), n))
+
+#define BIT_SET_ATOMIC(_s, n, p) \
+ atomic_set_long(&(p)->__bits[__bitset_word(_s, n)], \
+ __bitset_mask((_s), n))
+
+#define BIT_SET_ATOMIC_ACQ(_s, n, p) \
+ atomic_set_acq_long(&(p)->__bits[__bitset_word(_s, n)], \
+ __bitset_mask((_s), n))
+
+/* Convenience functions catering special cases. */
+#define BIT_AND_ATOMIC(_s, d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ atomic_clear_long(&(d)->__bits[__i], \
+ ~(s)->__bits[__i]); \
+} while (0)
+
+#define BIT_OR_ATOMIC(_s, d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ atomic_set_long(&(d)->__bits[__i], \
+ (s)->__bits[__i]); \
+} while (0)
+
+#define BIT_COPY_STORE_REL(_s, f, t) do { \
+ __size_t __i; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ atomic_store_rel_long(&(t)->__bits[__i], \
+ (f)->__bits[__i]); \
+} while (0)
+
+#define BIT_FFS(_s, p) __extension__ ({ \
+ __size_t __i; \
+ int __bit; \
+ \
+ __bit = 0; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) { \
+ if ((p)->__bits[__i] != 0) { \
+ __bit = ffsl((p)->__bits[__i]); \
+ __bit += __i * _BITSET_BITS; \
+ break; \
+ } \
+ } \
+ __bit; \
+})
+
+#define BIT_COUNT(_s, p) __extension__ ({ \
+ __size_t __i; \
+ int __count; \
+ \
+ __count = 0; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ __count += __bitcountl((p)->__bits[__i]); \
+ __count; \
+})
+
+#define BITSET_T_INITIALIZER(x) \
+ { .__bits = { x } }
+
+#define BITSET_FSET(n) \
+ [ 0 ... ((n) - 1) ] = (-1L)
+
+/*
+ * Dynamically allocate a bitset.
+ */
+#define BITSET_ALLOC(_s, mt, mf) \
+ malloc(__bitset_words(_s) * sizeof(long), mt, (mf))
+
+#endif /* !_SYS_BITSET_H_ */
diff --git a/freebsd/sys/sys/bitstring.h b/freebsd/sys/sys/bitstring.h
index 125ef51d..32465d11 100644
--- a/freebsd/sys/sys/bitstring.h
+++ b/freebsd/sys/sys/bitstring.h
@@ -29,118 +29,274 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
+ * Copyright (c) 2014 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
* $FreeBSD$
*/
-
#ifndef _SYS_BITSTRING_H_
#define _SYS_BITSTRING_H_
-typedef unsigned char bitstr_t;
+#ifdef _KERNEL
+#include <sys/libkern.h>
+#include <sys/malloc.h>
+#endif
+
+#include <sys/types.h>
-/* internal macros */
- /* byte of the bitstring bit is in */
-#define _bit_byte(bit) \
- ((bit) >> 3)
+typedef unsigned long bitstr_t;
- /* mask for the bit within its byte */
-#define _bit_mask(bit) \
- (1 << ((bit)&0x7))
+/*---------------------- Private Implementation Details ----------------------*/
+#define _BITSTR_MASK (~0UL)
+#define _BITSTR_BITS (sizeof(bitstr_t) * 8)
-/* external macros */
- /* bytes in a bitstring of nbits bits */
-#define bitstr_size(nbits) \
- (((nbits) + 7) >> 3)
+#ifdef roundup2
+#define _bit_roundup2 roundup2
+#else
+#define _bit_roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
+#endif
- /* allocate a bitstring */
-#define bit_alloc(nbits) \
- (bitstr_t *)calloc((size_t)bitstr_size(nbits), sizeof(bitstr_t))
+/* bitstr_t in bit string containing the bit. */
+static inline int
+_bit_idx(int _bit)
+{
+ return (_bit / _BITSTR_BITS);
+}
- /* allocate a bitstring on the stack */
+/* bit number within bitstr_t at _bit_idx(_bit). */
+static inline int
+_bit_offset(int _bit)
+{
+ return (_bit % _BITSTR_BITS);
+}
+
+/* Mask for the bit within its long. */
+static inline bitstr_t
+_bit_mask(int _bit)
+{
+ return (1UL << _bit_offset(_bit));
+}
+
+static inline bitstr_t
+_bit_make_mask(int _start, int _stop)
+{
+ return ((_BITSTR_MASK << _bit_offset(_start)) &
+ (_BITSTR_MASK >> (_BITSTR_BITS - _bit_offset(_stop) - 1)));
+}
+
+/*----------------------------- Public Interface -----------------------------*/
+/* Number of bytes allocated for a bit string of nbits bits */
+#define bitstr_size(_nbits) (_bit_roundup2(_nbits, _BITSTR_BITS) / 8)
+
+/* Allocate a bit string initialized with no bits set. */
+#ifdef _KERNEL
+static inline bitstr_t *
+bit_alloc(int _nbits, struct malloc_type *type, int flags)
+{
+ return ((bitstr_t *)malloc(bitstr_size(_nbits), type, flags | M_ZERO));
+}
+#else
+static inline bitstr_t *
+bit_alloc(int _nbits)
+{
+ return ((bitstr_t *)calloc(bitstr_size(_nbits), 1));
+}
+#endif
+
+/* Allocate a bit string on the stack */
#define bit_decl(name, nbits) \
- ((name)[bitstr_size(nbits)])
-
- /* is bit N of bitstring name set? */
-#define bit_test(name, bit) \
- ((name)[_bit_byte(bit)] & _bit_mask(bit))
-
- /* set bit N of bitstring name */
-#define bit_set(name, bit) \
- ((name)[_bit_byte(bit)] |= _bit_mask(bit))
-
- /* clear bit N of bitstring name */
-#define bit_clear(name, bit) \
- ((name)[_bit_byte(bit)] &= ~_bit_mask(bit))
-
- /* clear bits start ... stop in bitstring */
-#define bit_nclear(name, start, stop) do { \
- register bitstr_t *_name = (name); \
- register int _start = (start), _stop = (stop); \
- register int _startbyte = _bit_byte(_start); \
- register int _stopbyte = _bit_byte(_stop); \
- if (_startbyte == _stopbyte) { \
- _name[_startbyte] &= ((0xff >> (8 - (_start&0x7))) | \
- (0xff << ((_stop&0x7) + 1))); \
- } else { \
- _name[_startbyte] &= 0xff >> (8 - (_start&0x7)); \
- while (++_startbyte < _stopbyte) \
- _name[_startbyte] = 0; \
- _name[_stopbyte] &= 0xff << ((_stop&0x7) + 1); \
- } \
-} while (0)
-
- /* set bits start ... stop in bitstring */
-#define bit_nset(name, start, stop) do { \
- register bitstr_t *_name = (name); \
- register int _start = (start), _stop = (stop); \
- register int _startbyte = _bit_byte(_start); \
- register int _stopbyte = _bit_byte(_stop); \
- if (_startbyte == _stopbyte) { \
- _name[_startbyte] |= ((0xff << (_start&0x7)) & \
- (0xff >> (7 - (_stop&0x7)))); \
- } else { \
- _name[_startbyte] |= 0xff << ((_start)&0x7); \
- while (++_startbyte < _stopbyte) \
- _name[_startbyte] = 0xff; \
- _name[_stopbyte] |= 0xff >> (7 - (_stop&0x7)); \
- } \
-} while (0)
-
- /* find first bit clear in name */
-#define bit_ffc(name, nbits, value) do { \
- register bitstr_t *_name = (name); \
- register int _byte, _nbits = (nbits); \
- register int _stopbyte = _bit_byte(_nbits - 1), _value = -1; \
- if (_nbits > 0) \
- for (_byte = 0; _byte <= _stopbyte; ++_byte) \
- if (_name[_byte] != 0xff) { \
- bitstr_t _lb; \
- _value = _byte << 3; \
- for (_lb = _name[_byte]; (_lb&0x1); \
- ++_value, _lb >>= 1); \
- break; \
- } \
- if (_value >= nbits) \
- _value = -1; \
- *(value) = _value; \
-} while (0)
-
- /* find first bit set in name */
-#define bit_ffs(name, nbits, value) do { \
- register bitstr_t *_name = (name); \
- register int _byte, _nbits = (nbits); \
- register int _stopbyte = _bit_byte(_nbits - 1), _value = -1; \
- if (_nbits > 0) \
- for (_byte = 0; _byte <= _stopbyte; ++_byte) \
- if (_name[_byte]) { \
- bitstr_t _lb; \
- _value = _byte << 3; \
- for (_lb = _name[_byte]; !(_lb&0x1); \
- ++_value, _lb >>= 1); \
- break; \
- } \
- if (_value >= nbits) \
- _value = -1; \
- *(value) = _value; \
-} while (0)
-
-#endif /* !_SYS_BITSTRING_H_ */
+ ((name)[bitstr_size(nbits) / sizeof(bitstr_t)])
+
+/* Is bit N of bit string set? */
+static inline int
+bit_test(const bitstr_t *_bitstr, int _bit)
+{
+ return ((_bitstr[_bit_idx(_bit)] & _bit_mask(_bit)) != 0);
+}
+
+/* Set bit N of bit string. */
+static inline void
+bit_set(bitstr_t *_bitstr, int _bit)
+{
+ _bitstr[_bit_idx(_bit)] |= _bit_mask(_bit);
+}
+
+/* clear bit N of bit string name */
+static inline void
+bit_clear(bitstr_t *_bitstr, int _bit)
+{
+ _bitstr[_bit_idx(_bit)] &= ~_bit_mask(_bit);
+}
+
+/* Set bits start ... stop inclusive in bit string. */
+static inline void
+bit_nset(bitstr_t *_bitstr, int _start, int _stop)
+{
+ bitstr_t *_stopbitstr;
+
+ _stopbitstr = _bitstr + _bit_idx(_stop);
+ _bitstr += _bit_idx(_start);
+
+ if (_bitstr == _stopbitstr) {
+ *_bitstr |= _bit_make_mask(_start, _stop);
+ } else {
+ *_bitstr |= _bit_make_mask(_start, _BITSTR_BITS - 1);
+ while (++_bitstr < _stopbitstr)
+ *_bitstr = _BITSTR_MASK;
+ *_stopbitstr |= _bit_make_mask(0, _stop);
+ }
+}
+
+/* Clear bits start ... stop inclusive in bit string. */
+static inline void
+bit_nclear(bitstr_t *_bitstr, int _start, int _stop)
+{
+ bitstr_t *_stopbitstr;
+
+ _stopbitstr = _bitstr + _bit_idx(_stop);
+ _bitstr += _bit_idx(_start);
+
+ if (_bitstr == _stopbitstr) {
+ *_bitstr &= ~_bit_make_mask(_start, _stop);
+ } else {
+ *_bitstr &= ~_bit_make_mask(_start, _BITSTR_BITS - 1);
+ while (++_bitstr < _stopbitstr)
+ *_bitstr = 0;
+ *_stopbitstr &= ~_bit_make_mask(0, _stop);
+ }
+}
+
+/* Find the first bit set in bit string at or after bit start. */
+static inline void
+bit_ffs_at(bitstr_t *_bitstr, int _start, int _nbits, int *_result)
+{
+ bitstr_t *_curbitstr;
+ bitstr_t *_stopbitstr;
+ bitstr_t _test;
+ int _value, _offset;
+
+ if (_nbits > 0) {
+ _curbitstr = _bitstr + _bit_idx(_start);
+ _stopbitstr = _bitstr + _bit_idx(_nbits - 1);
+
+ _test = *_curbitstr;
+ if (_bit_offset(_start) != 0)
+ _test &= _bit_make_mask(_start, _BITSTR_BITS - 1);
+ while (_test == 0 && _curbitstr < _stopbitstr)
+ _test = *(++_curbitstr);
+
+ _offset = ffsl(_test);
+ _value = ((_curbitstr - _bitstr) * _BITSTR_BITS) + _offset - 1;
+ if (_offset == 0 || _value >= _nbits)
+ _value = -1;
+ } else {
+ _value = -1;
+ }
+ *_result = _value;
+}
+
+/* Find the first bit clear in bit string at or after bit start. */
+static inline void
+bit_ffc_at(bitstr_t *_bitstr, int _start, int _nbits, int *_result)
+{
+ bitstr_t *_curbitstr;
+ bitstr_t *_stopbitstr;
+ bitstr_t _test;
+ int _value, _offset;
+
+ if (_nbits > 0) {
+ _curbitstr = _bitstr + _bit_idx(_start);
+ _stopbitstr = _bitstr + _bit_idx(_nbits - 1);
+
+ _test = *_curbitstr;
+ if (_bit_offset(_start) != 0)
+ _test |= _bit_make_mask(0, _start - 1);
+ while (_test == _BITSTR_MASK && _curbitstr < _stopbitstr)
+ _test = *(++_curbitstr);
+
+ _offset = ffsl(~_test);
+ _value = ((_curbitstr - _bitstr) * _BITSTR_BITS) + _offset - 1;
+ if (_offset == 0 || _value >= _nbits)
+ _value = -1;
+ } else {
+ _value = -1;
+ }
+ *_result = _value;
+}
+
+/* Find the first bit set in bit string. */
+static inline void
+bit_ffs(bitstr_t *_bitstr, int _nbits, int *_result)
+{
+ bit_ffs_at(_bitstr, /*start*/0, _nbits, _result);
+}
+
+/* Find the first bit clear in bit string. */
+static inline void
+bit_ffc(bitstr_t *_bitstr, int _nbits, int *_result)
+{
+ bit_ffc_at(_bitstr, /*start*/0, _nbits, _result);
+}
+
+/* Count the number of bits set in a bitstr of size _nbits at or after _start */
+static inline void
+bit_count(bitstr_t *_bitstr, int _start, int _nbits, int *_result)
+{
+ bitstr_t *_curbitstr, mask;
+ int _value = 0, curbitstr_len;
+
+ if (_start >= _nbits)
+ goto out;
+
+ _curbitstr = _bitstr + _bit_idx(_start);
+ _nbits -= _BITSTR_BITS * _bit_idx(_start);
+ _start -= _BITSTR_BITS * _bit_idx(_start);
+
+ if (_start > 0) {
+ curbitstr_len = (int)_BITSTR_BITS < _nbits ?
+ (int)_BITSTR_BITS : _nbits;
+ mask = _bit_make_mask(_start, _bit_offset(curbitstr_len - 1));
+ _value += __bitcountl(*_curbitstr & mask);
+ _curbitstr++;
+ _nbits -= _BITSTR_BITS;
+ }
+ while (_nbits >= (int)_BITSTR_BITS) {
+ _value += __bitcountl(*_curbitstr);
+ _curbitstr++;
+ _nbits -= _BITSTR_BITS;
+ }
+ if (_nbits > 0) {
+ mask = _bit_make_mask(0, _bit_offset(_nbits - 1));
+ _value += __bitcountl(*_curbitstr & mask);
+ }
+
+out:
+ *_result = _value;
+}
+
+#endif /* _SYS_BITSTRING_H_ */
diff --git a/freebsd/sys/sys/buf.h b/freebsd/sys/sys/buf.h
index e87fd420..f32c6ca1 100644
--- a/freebsd/sys/sys/buf.h
+++ b/freebsd/sys/sys/buf.h
@@ -98,37 +98,37 @@ struct buf {
void *b_caller1;
caddr_t b_data;
int b_error;
- uint8_t b_iocmd;
- uint8_t b_ioflags;
+ uint16_t b_iocmd; /* BIO_* bio_cmd from bio.h */
+ uint16_t b_ioflags; /* BIO_* bio_flags from bio.h */
off_t b_iooffset;
long b_resid;
void (*b_iodone)(struct buf *);
daddr_t b_blkno; /* Underlying physical block number. */
off_t b_offset; /* Offset into file. */
TAILQ_ENTRY(buf) b_bobufs; /* (V) Buffer's associated vnode. */
- struct buf *b_left; /* (V) splay tree link */
- struct buf *b_right; /* (V) splay tree link */
uint32_t b_vflags; /* (V) BV_* flags */
- TAILQ_ENTRY(buf) b_freelist; /* (Q) Free list position inactive. */
unsigned short b_qindex; /* (Q) buffer queue index */
uint32_t b_flags; /* B_* flags. */
b_xflags_t b_xflags; /* extra flags */
struct lock b_lock; /* Buffer lock */
long b_bufsize; /* Allocated buffer size. */
- long b_runningbufspace; /* when I/O is running, pipelining */
- caddr_t b_kvabase; /* base kva for buffer */
- caddr_t b_kvaalloc; /* allocated kva for B_KVAALLOC */
+ int b_runningbufspace; /* when I/O is running, pipelining */
int b_kvasize; /* size of kva for buffer */
- daddr_t b_lblkno; /* Logical block number. */
- struct vnode *b_vp; /* Device vnode. */
int b_dirtyoff; /* Offset in buffer of dirty region. */
int b_dirtyend; /* Offset of end of dirty region. */
+ caddr_t b_kvabase; /* base kva for buffer */
+ daddr_t b_lblkno; /* Logical block number. */
+ struct vnode *b_vp; /* Device vnode. */
struct ucred *b_rcred; /* Read credentials reference. */
struct ucred *b_wcred; /* Write credentials reference. */
- void *b_saveaddr; /* Original b_addr for physio. */
- union pager_info {
- int pg_reqpage;
- } b_pager;
+ union {
+ TAILQ_ENTRY(buf) b_freelist; /* (Q) */
+ struct {
+ void (*b_pgiodone)(void *, vm_page_t *, int, int);
+ int b_pgbefore;
+ int b_pgafter;
+ };
+ };
union cluster_info {
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;
@@ -139,7 +139,6 @@ struct buf {
void *b_fsprivate1;
void *b_fsprivate2;
void *b_fsprivate3;
- int b_pin_count;
};
#define b_object b_bufobj->bo_object
@@ -200,24 +199,24 @@ struct buf {
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_VALIDSUSPWRT 0x00000040 /* Valid write during suspension. */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
-#define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
+#define B_00000100 0x00000100 /* Available flag. */
#define B_DONE 0x00000200 /* I/O completed. */
#define B_EINTR 0x00000400 /* I/O was interrupted */
-#define B_UNMAPPED 0x00000800 /* KVA is not mapped. */
-#define B_KVAALLOC 0x00001000 /* But allocated. */
+#define B_NOREUSE 0x00000800 /* Contents not reused once released. */
+#define B_00001000 0x00001000 /* Available flag. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
-#define B_BARRIER 0x00004000 /* Write this and all preceeding first. */
+#define B_BARRIER 0x00004000 /* Write this and all preceding first. */
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
#define B_MALLOC 0x00010000 /* malloced b_data */
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
-#define B_000400000 0x00040000 /* Available flag. */
-#define B_000800000 0x00080000 /* Available flag. */
+#define B_00040000 0x00040000 /* Available flag. */
+#define B_00080000 0x00080000 /* Available flag. */
#define B_00100000 0x00100000 /* Available flag. */
-#define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */
+#define B_00200000 0x00200000 /* Available flag. */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
-#define B_00800000 0x00800000 /* Available flag. */
+#define B_FS_FLAG1 0x00800000 /* Available flag for FS use. */
#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */
-#define B_NEEDSGIANT 0x02000000 /* Buffer's vnode needs giant. */
+#define B_INFREECNT 0x02000000 /* buf is counted in numfreebufs */
#define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */
#define B_MANAGED 0x08000000 /* Managed by FS. */
#define B_RAM 0x10000000 /* Read ahead mark (flag) */
@@ -226,10 +225,10 @@ struct buf {
#define B_REMFREE 0x80000000 /* Delayed bremfree */
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
- "\33paging\32needsgiant\31nocopy\30b23\27relbuf\26dirty\25b20" \
+ "\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \
"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
- "\15b12\14b11\13eintr\12done\11persist\10delwri\7validsuspwrt" \
- "\6cache\5deferred\4direct\3async\2needcommit\1age"
+ "\15b12\14noreuse\13eintr\12done\11b8\10delwri" \
+ "\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
/*
* These flags are kept in b_xflags.
@@ -250,9 +249,9 @@ struct buf {
#define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */
#define BV_BKGRDINPROG 0x00000002 /* Background write in progress */
#define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
-#define BV_INFREECNT 0x80000000 /* buf is counted in numfreebufs */
+#define BV_BKGRDERR 0x00000008 /* Error from background write */
-#define PRINT_BUF_VFLAGS "\20\40infreecnt\3bkgrdwait\2bkgrdinprog\1scanned"
+#define PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned"
#ifdef _KERNEL
/*
@@ -273,7 +272,7 @@ extern const char *buf_wmesg; /* Default buffer lock message */
* Get a lock sleeping non-interruptably until it becomes available.
*/
#define BUF_LOCK(bp, locktype, interlock) \
- _lockmgr_args(&(bp)->b_lock, (locktype), (interlock), \
+ _lockmgr_args_rw(&(bp)->b_lock, (locktype), (interlock), \
LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, \
LOCK_FILE, LOCK_LINE)
@@ -281,7 +280,7 @@ extern const char *buf_wmesg; /* Default buffer lock message */
* Get a lock sleeping with specified interruptably and timeout.
*/
#define BUF_TIMELOCK(bp, locktype, interlock, wmesg, catch, timo) \
- _lockmgr_args(&(bp)->b_lock, (locktype) | LK_TIMELOCK, \
+ _lockmgr_args_rw(&(bp)->b_lock, (locktype) | LK_TIMELOCK, \
(interlock), (wmesg), (PRIBIO + 4) | (catch), (timo), \
LOCK_FILE, LOCK_LINE)
@@ -355,12 +354,6 @@ extern const char *buf_wmesg; /* Default buffer lock message */
_lockmgr_disown(&(bp)->b_lock, LOCK_FILE, LOCK_LINE)
#endif
-/*
- * Find out if the lock has waiters or not.
- */
-#define BUF_LOCKWAITERS(bp) \
- lockmgr_waiters(&(bp)->b_lock)
-
#endif /* _KERNEL */
struct buf_queue_head {
@@ -371,15 +364,11 @@ struct buf_queue_head {
};
/*
- * This structure describes a clustered I/O. It is stored in the b_saveaddr
- * field of the buffer on which I/O is done. At I/O completion, cluster
- * callback uses the structure to parcel I/O's to individual buffers, and
- * then free's this structure.
+ * This structure describes a clustered I/O.
*/
struct cluster_save {
long bs_bcount; /* Saved b_bcount. */
long bs_bufsize; /* Saved b_bufsize. */
- void *bs_saveaddr; /* Saved b_addr. */
int bs_nchildren; /* Number of associated buffers. */
struct buf **bs_children; /* List of associated buffers. */
};
@@ -428,7 +417,6 @@ buf_deallocate(struct buf *bp)
{
if (bioops.io_deallocate)
(*bioops.io_deallocate)(bp);
- BUF_LOCKFREE(bp);
}
static __inline int
@@ -469,33 +457,40 @@ extern int dirtybufthresh;
extern int bdwriteskip;
extern int dirtybufferflushes;
extern int altbufferflushes;
-extern int buf_maxio; /* nominal maximum I/O for buffer */
-extern struct buf *buf; /* The buffer headers. */
-extern char *buffers; /* The buffer contents. */
-extern int bufpages; /* Number of memory pages in the buffer pool. */
-extern struct buf *swbuf; /* Swap I/O buffer headers. */
extern int nswbuf; /* Number of swap I/O buffer headers. */
extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
-extern caddr_t unmapped_buf;
+extern int vnode_async_pbuf_freecnt; /* Number of pbufs for vnode pager,
+ asynchronous reads */
+extern caddr_t unmapped_buf; /* Data address for unmapped buffers. */
+
+static inline int
+buf_mapped(struct buf *bp)
+{
+
+ return (bp->b_data != unmapped_buf);
+}
void runningbufwakeup(struct buf *);
void waitrunningbufspace(void);
caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
void bufinit(void);
+void bufshutdown(int);
void bdata2bio(struct buf *bp, struct bio *bip);
void bwillwrite(void);
int buf_dirty_count_severe(void);
void bremfree(struct buf *);
void bremfreef(struct buf *); /* XXX Force bremfree, only for nfs. */
-int bread(struct vnode *, daddr_t, int, struct ucred *, struct buf **);
-int bread_gb(struct vnode *, daddr_t, int, struct ucred *,
- int gbflags, struct buf **);
-void breada(struct vnode *, daddr_t *, int *, int, struct ucred *);
-int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
- struct ucred *, struct buf **);
+#define bread(vp, blkno, size, cred, bpp) \
+ breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, 0, bpp)
+#define bread_gb(vp, blkno, size, cred, gbflags, bpp) \
+ breadn_flags(vp, blkno, size, NULL, NULL, 0, cred, \
+ gbflags, bpp)
+#define breadn(vp, blkno, size, rablkno, rabsize, cnt, cred, bpp) \
+ breadn_flags(vp, blkno, size, rablkno, rabsize, cnt, cred, 0, bpp)
int breadn_flags(struct vnode *, daddr_t, int, daddr_t *, int *, int,
struct ucred *, int, struct buf **);
+void breada(struct vnode *, daddr_t *, int *, int, struct ucred *);
void bdwrite(struct buf *);
void bawrite(struct buf *);
void babarrierwrite(struct buf *);
@@ -506,6 +501,7 @@ void bufstrategy(struct bufobj *, struct buf *);
void brelse(struct buf *);
void bqrelse(struct buf *);
int vfs_bio_awrite(struct buf *);
+void vfs_drain_busy_pages(struct buf *bp);
struct buf * getpbuf(int *);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
@@ -518,13 +514,9 @@ void bufdone_finish(struct buf *);
void bd_speedup(void);
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
- struct ucred *, long, int, struct buf **);
-int cluster_wbuild(struct vnode *, long, daddr_t, int);
-void cluster_write(struct vnode *, struct buf *, u_quad_t, int);
-int cluster_read_gb(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, int, struct buf **);
-int cluster_wbuild_gb(struct vnode *, long, daddr_t, int, int);
-void cluster_write_gb(struct vnode *, struct buf *, u_quad_t, int, int);
+int cluster_wbuild(struct vnode *, long, daddr_t, int, int);
+void cluster_write(struct vnode *, struct buf *, u_quad_t, int, int);
void vfs_bio_bzero_buf(struct buf *bp, int base, int size);
void vfs_bio_set_valid(struct buf *, int base, int size);
void vfs_bio_clrbuf(struct buf *);
@@ -544,9 +536,6 @@ void reassignbuf(struct buf *);
struct buf *trypbuf(int *);
void bwait(struct buf *, u_char, const char *);
void bdone(struct buf *);
-void bpin(struct buf *);
-void bunpin(struct buf *);
-void bunpin_wait(struct buf *);
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/buf_ring.h b/freebsd/sys/sys/buf_ring.h
index ee7a48ce..4fa72824 100644
--- a/freebsd/sys/sys/buf_ring.h
+++ b/freebsd/sys/sys/buf_ring.h
@@ -47,25 +47,14 @@ struct buf_ring {
int br_prod_size;
int br_prod_mask;
uint64_t br_drops;
- uint64_t br_prod_bufs;
- /*
- * Pad out to next L2 cache line
- */
- uint64_t _pad0[11];
-
- volatile uint32_t br_cons_head;
+ volatile uint32_t br_cons_head __aligned(CACHE_LINE_SIZE);
volatile uint32_t br_cons_tail;
int br_cons_size;
int br_cons_mask;
-
- /*
- * Pad out to next L2 cache line
- */
- uint64_t _pad1[14];
#ifdef DEBUG_BUFRING
struct mtx *br_lock;
#endif
- void *br_ring[0];
+ void *br_ring[0] __aligned(CACHE_LINE_SIZE);
};
/*
@@ -75,9 +64,7 @@ struct buf_ring {
static __inline int
buf_ring_enqueue(struct buf_ring *br, void *buf)
{
- uint32_t prod_head, prod_next;
- uint32_t cons_tail;
- int success;
+ uint32_t prod_head, prod_next, cons_tail;
#ifdef DEBUG_BUFRING
int i;
for (i = br->br_cons_head; i != br->br_prod_head;
@@ -89,35 +76,34 @@ buf_ring_enqueue(struct buf_ring *br, void *buf)
critical_enter();
do {
prod_head = br->br_prod_head;
+ prod_next = (prod_head + 1) & br->br_prod_mask;
cons_tail = br->br_cons_tail;
- prod_next = (prod_head + 1) & br->br_prod_mask;
-
if (prod_next == cons_tail) {
- br->br_drops++;
- critical_exit();
- return (ENOBUFS);
+ rmb();
+ if (prod_head == br->br_prod_head &&
+ cons_tail == br->br_cons_tail) {
+ br->br_drops++;
+ critical_exit();
+ return (ENOBUFS);
+ }
+ continue;
}
-
- success = atomic_cmpset_int((volatile int *)&br->br_prod_head, prod_head,
- prod_next);
- } while (success == 0);
+ } while (!atomic_cmpset_acq_int(&br->br_prod_head, prod_head, prod_next));
#ifdef DEBUG_BUFRING
if (br->br_ring[prod_head] != NULL)
panic("dangling value in enqueue");
#endif
br->br_ring[prod_head] = buf;
- wmb();
/*
* If there are other enqueues in progress
- * that preceeded us, we need to wait for them
+ * that preceded us, we need to wait for them
* to complete
*/
while (br->br_prod_tail != prod_head)
cpu_spinwait();
- br->br_prod_bufs++;
- br->br_prod_tail = prod_next;
+ atomic_store_rel_int(&br->br_prod_tail, prod_next);
critical_exit();
return (0);
}
@@ -130,41 +116,32 @@ static __inline void *
buf_ring_dequeue_mc(struct buf_ring *br)
{
uint32_t cons_head, cons_next;
- uint32_t prod_tail;
void *buf;
- int success;
critical_enter();
do {
cons_head = br->br_cons_head;
- prod_tail = br->br_prod_tail;
-
cons_next = (cons_head + 1) & br->br_cons_mask;
-
- if (cons_head == prod_tail) {
+
+ if (cons_head == br->br_prod_tail) {
critical_exit();
return (NULL);
}
-
- success = atomic_cmpset_int((volatile int *)&br->br_cons_head, cons_head,
- cons_next);
- } while (success == 0);
+ } while (!atomic_cmpset_acq_int(&br->br_cons_head, cons_head, cons_next));
buf = br->br_ring[cons_head];
#ifdef DEBUG_BUFRING
br->br_ring[cons_head] = NULL;
#endif
- rmb();
-
/*
* If there are other dequeues in progress
- * that preceeded us, we need to wait for them
+ * that preceded us, we need to wait for them
* to complete
*/
while (br->br_cons_tail != cons_head)
cpu_spinwait();
- br->br_cons_tail = cons_next;
+ atomic_store_rel_int(&br->br_cons_tail, cons_next);
critical_exit();
return (buf);
@@ -184,9 +161,38 @@ buf_ring_dequeue_sc(struct buf_ring *br)
#endif
uint32_t prod_tail;
void *buf;
-
+
+ /*
+ * This is a workaround to allow using buf_ring on ARM and ARM64.
+ * ARM64TODO: Fix buf_ring in a generic way.
+ * REMARKS: It is suspected that br_cons_head does not require
+ * load_acq operation, but this change was extensively tested
+ * and confirmed it's working. To be reviewed once again in
+ * FreeBSD-12.
+ *
+ * Preventing following situation:
+
+ * Core(0) - buf_ring_enqueue() Core(1) - buf_ring_dequeue_sc()
+ * ----------------------------------------- ----------------------------------------------
+ *
+ * cons_head = br->br_cons_head;
+ * atomic_cmpset_acq_32(&br->br_prod_head, ...));
+ * buf = br->br_ring[cons_head]; <see <1>>
+ * br->br_ring[prod_head] = buf;
+ * atomic_store_rel_32(&br->br_prod_tail, ...);
+ * prod_tail = br->br_prod_tail;
+ * if (cons_head == prod_tail)
+ * return (NULL);
+ * <condition is false and code uses invalid(old) buf>`
+ *
+ * <1> Load (on core 1) from br->br_ring[cons_head] can be reordered (speculative readed) by CPU.
+ */
+#if defined(__arm__) || defined(__aarch64__)
+ cons_head = atomic_load_acq_32(&br->br_cons_head);
+#else
cons_head = br->br_cons_head;
- prod_tail = br->br_prod_tail;
+#endif
+ prod_tail = atomic_load_acq_32(&br->br_prod_tail);
cons_next = (cons_head + 1) & br->br_cons_mask;
#ifdef PREFETCH_DEFINED
@@ -291,6 +297,37 @@ buf_ring_peek(struct buf_ring *br)
return (br->br_ring[br->br_cons_head]);
}
+static __inline void *
+buf_ring_peek_clear_sc(struct buf_ring *br)
+{
+#ifdef DEBUG_BUFRING
+ void *ret;
+
+ if (!mtx_owned(br->br_lock))
+ panic("lock not held on single consumer dequeue");
+#endif
+ /*
+ * I believe it is safe to not have a memory barrier
+ * here because we control cons and tail is worst case
+ * a lagging indicator so we worst case we might
+ * return NULL immediately after a buffer has been enqueued
+ */
+ if (br->br_cons_head == br->br_prod_tail)
+ return (NULL);
+
+#ifdef DEBUG_BUFRING
+ /*
+ * Single consumer, i.e. cons_head will not move while we are
+ * running, so atomic_swap_ptr() is not necessary here.
+ */
+ ret = br->br_ring[br->br_cons_head];
+ br->br_ring[br->br_cons_head] = NULL;
+ return (ret);
+#else
+ return (br->br_ring[br->br_cons_head]);
+#endif
+}
+
static __inline int
buf_ring_full(struct buf_ring *br)
{
diff --git a/freebsd/sys/sys/bufobj.h b/freebsd/sys/sys/bufobj.h
index 916b2565..0fa6c8ce 100644
--- a/freebsd/sys/sys/bufobj.h
+++ b/freebsd/sys/sys/bufobj.h
@@ -53,7 +53,8 @@
#include <sys/queue.h>
#include <sys/_lock.h>
-#include <sys/_mutex.h>
+#include <sys/_rwlock.h>
+#include <sys/_pctrie.h>
struct bufobj;
struct buf_ops;
@@ -62,10 +63,10 @@ extern struct buf_ops buf_ops_bio;
TAILQ_HEAD(buflists, buf);
-/* A Buffer splay list */
+/* A Buffer list & trie */
struct bufv {
struct buflists bv_hd; /* Sorted blocklist */
- struct buf *bv_root; /* Buf splay tree */
+ struct pctrie bv_root; /* Buf trie */
int bv_cnt; /* Number of buffers */
};
@@ -88,13 +89,8 @@ struct buf_ops {
#define BO_BDFLUSH(bo, bp) ((bo)->bo_ops->bop_bdflush((bo), (bp)))
struct bufobj {
- struct mtx bo_mtx; /* Mutex which protects "i" things */
- struct bufv bo_clean; /* i Clean buffers */
- struct bufv bo_dirty; /* i Dirty buffers */
- long bo_numoutput; /* i Writes in progress */
- u_int bo_flag; /* i Flags */
+ struct rwlock bo_lock; /* Lock which protects "i" things */
struct buf_ops *bo_ops; /* - Buffer operations */
- int bo_bsize; /* - Block size for i/o */
struct vm_object *bo_object; /* v Place to store VM object */
LIST_ENTRY(bufobj) bo_synclist; /* S dirty vnode list */
void *bo_private; /* private pointer */
@@ -103,6 +99,11 @@ struct bufobj {
* XXX: only to keep the syncer working
* XXX: for now.
*/
+ struct bufv bo_clean; /* i Clean buffers */
+ struct bufv bo_dirty; /* i Dirty buffers */
+ long bo_numoutput; /* i Writes in progress */
+ u_int bo_flag; /* i Flags */
+ int bo_bsize; /* - Block size for i/o */
};
/*
@@ -111,13 +112,16 @@ struct bufobj {
*/
#define BO_ONWORKLST (1 << 0) /* On syncer work-list */
#define BO_WWAIT (1 << 1) /* Wait for output to complete */
-#define BO_NEEDSGIANT (1 << 2) /* Require giant for child buffers. */
-
-#define BO_MTX(bo) (&(bo)->bo_mtx)
-#define BO_LOCK(bo) mtx_lock(BO_MTX((bo)))
-#define BO_UNLOCK(bo) mtx_unlock(BO_MTX((bo)))
-#define ASSERT_BO_LOCKED(bo) mtx_assert(BO_MTX((bo)), MA_OWNED)
-#define ASSERT_BO_UNLOCKED(bo) mtx_assert(BO_MTX((bo)), MA_NOTOWNED)
+#define BO_DEAD (1 << 2) /* Dead; only with INVARIANTS */
+
+#define BO_LOCKPTR(bo) (&(bo)->bo_lock)
+#define BO_LOCK(bo) rw_wlock(BO_LOCKPTR((bo)))
+#define BO_UNLOCK(bo) rw_wunlock(BO_LOCKPTR((bo)))
+#define BO_RLOCK(bo) rw_rlock(BO_LOCKPTR((bo)))
+#define BO_RUNLOCK(bo) rw_runlock(BO_LOCKPTR((bo)))
+#define ASSERT_BO_WLOCKED(bo) rw_assert(BO_LOCKPTR((bo)), RA_WLOCKED)
+#define ASSERT_BO_LOCKED(bo) rw_assert(BO_LOCKPTR((bo)), RA_LOCKED)
+#define ASSERT_BO_UNLOCKED(bo) rw_assert(BO_LOCKPTR((bo)), RA_UNLOCKED)
void bufobj_wdrop(struct bufobj *bo);
void bufobj_wref(struct bufobj *bo);
diff --git a/freebsd/sys/sys/bus.h b/freebsd/sys/sys/bus.h
index f0406732..6e356e9c 100644
--- a/freebsd/sys/sys/bus.h
+++ b/freebsd/sys/sys/bus.h
@@ -30,7 +30,9 @@
#define _SYS_BUS_H_
#include <machine/_limits.h>
+#include <machine/_bus.h>
#include <sys/_bus_dma.h>
+#include <sys/ioccom.h>
/**
* @defgroup NEWBUS newbus - a generic framework for managing devices
@@ -70,14 +72,66 @@ struct u_device {
char dv_pnpinfo[128]; /**< @brief Plug and play info */
char dv_location[128]; /**< @brief Where is the device? */
uint32_t dv_devflags; /**< @brief API Flags for device */
- uint16_t dv_flags; /**< @brief flags for dev date */
+ uint16_t dv_flags; /**< @brief flags for dev state */
device_state_t dv_state; /**< @brief State of attachment */
/* XXX more driver info? */
};
+/* Flags exported via dv_flags. */
+#define DF_ENABLED 0x01 /* device should be probed/attached */
+#define DF_FIXEDCLASS 0x02 /* devclass specified at create time */
+#define DF_WILDCARD 0x04 /* unit was originally wildcard */
+#define DF_DESCMALLOCED 0x08 /* description was malloced */
+#define DF_QUIET 0x10 /* don't print verbose attach message */
+#define DF_DONENOMATCH 0x20 /* don't execute DEVICE_NOMATCH again */
+#define DF_EXTERNALSOFTC 0x40 /* softc not allocated by us */
+#define DF_REBID 0x80 /* Can rebid after attach */
+#define DF_SUSPENDED 0x100 /* Device is suspended. */
+
+/**
+ * @brief Device request structure used for ioctl's.
+ *
+ * Used for ioctl's on /dev/devctl2. All device ioctl's
+ * must have parameter definitions which begin with dr_name.
+ */
+struct devreq_buffer {
+ void *buffer;
+ size_t length;
+};
+
+struct devreq {
+ char dr_name[128];
+ int dr_flags; /* request-specific flags */
+ union {
+ struct devreq_buffer dru_buffer;
+ void *dru_data;
+ } dr_dru;
+#define dr_buffer dr_dru.dru_buffer /* variable-sized buffer */
+#define dr_data dr_dru.dru_data /* fixed-size buffer */
+};
+
+#define DEV_ATTACH _IOW('D', 1, struct devreq)
+#define DEV_DETACH _IOW('D', 2, struct devreq)
+#define DEV_ENABLE _IOW('D', 3, struct devreq)
+#define DEV_DISABLE _IOW('D', 4, struct devreq)
+#define DEV_SUSPEND _IOW('D', 5, struct devreq)
+#define DEV_RESUME _IOW('D', 6, struct devreq)
+#define DEV_SET_DRIVER _IOW('D', 7, struct devreq)
+#define DEV_RESCAN _IOW('D', 9, struct devreq)
+#define DEV_DELETE _IOW('D', 10, struct devreq)
+
+/* Flags for DEV_DETACH and DEV_DISABLE. */
+#define DEVF_FORCE_DETACH 0x0000001
+
+/* Flags for DEV_SET_DRIVER. */
+#define DEVF_SET_DRIVER_DETACH 0x0000001 /* Detach existing driver. */
+
+/* Flags for DEV_DELETE. */
+#define DEVF_FORCE_DELETE 0x0000001
+
#ifdef _KERNEL
-#include <sys/queue.h>
+#include <sys/eventhandler.h>
#include <sys/kobj.h>
/**
@@ -92,6 +146,15 @@ void devctl_notify(const char *__system, const char *__subsystem,
const char *__type, const char *__data);
void devctl_queue_data_f(char *__data, int __flags);
void devctl_queue_data(char *__data);
+void devctl_safe_quote(char *__dst, const char *__src, size_t len);
+
+/**
+ * Device name parsers. Hook to allow device enumerators to map
+ * scheme-specific names to a device.
+ */
+typedef void (*dev_lookup_fn)(void *arg, const char *name,
+ device_t *result);
+EVENTHANDLER_DECLARE(dev_lookup, dev_lookup_fn);
/**
* @brief A device driver (included mainly for compatibility with
@@ -122,8 +185,7 @@ typedef struct kobj_class driver_t;
typedef struct devclass *devclass_t;
/**
- * @brief A device method (included mainly for compatibility with
- * FreeBSD 4.x).
+ * @brief A device method
*/
#define device_method_t kobj_method_t
@@ -178,11 +240,8 @@ typedef void driver_intr_t(void*);
* spls implicit in names like INTR_TYPE_TTY. In the meantime, don't
* confuse things by renaming them (Grog, 18 July 2000).
*
- * We define this in terms of bits because some devices may belong
- * to multiple classes (and therefore need to be included in
- * multiple interrupt masks, which is what this really serves to
- * indicate. Buses which do interrupt remapping will want to
- * change their type to reflect what sort of devices are underneath.
+ * Buses which do interrupt remapping will want to change their type
+ * to reflect what sort of devices are underneath.
*/
enum intr_type {
INTR_TYPE_TTY = 1,
@@ -213,6 +272,16 @@ enum intr_polarity {
INTR_POLARITY_LOW = 2
};
+/**
+ * CPU sets supported by bus_get_cpus(). Note that not all sets may be
+ * supported for a given device. If a request is not supported by a
+ * device (or its parents), then bus_get_cpus() will fail with EINVAL.
+ */
+enum cpu_sets {
+ LOCAL_CPUS = 0,
+ INTR_CPUS
+};
+
typedef int (*devop_t)(void);
/**
@@ -225,6 +294,31 @@ struct driver {
KOBJ_CLASS_FIELDS;
};
+/**
+ * @brief A resource mapping.
+ */
+struct resource_map {
+ bus_space_tag_t r_bustag;
+ bus_space_handle_t r_bushandle;
+ bus_size_t r_size;
+ void *r_vaddr;
+};
+
+/**
+ * @brief Optional properties of a resource mapping request.
+ */
+struct resource_map_request {
+ size_t size;
+ rman_res_t offset;
+ rman_res_t length;
+ vm_memattr_t memattr;
+};
+
+void resource_init_map_request_impl(struct resource_map_request *_args,
+ size_t _sz);
+#define resource_init_map_request(rmr) \
+ resource_init_map_request_impl((rmr), sizeof(*(rmr)))
+
/*
* Definitions for drivers which need to keep simple lists of resources
* for their child devices.
@@ -240,9 +334,9 @@ struct resource_list_entry {
int rid; /**< @brief resource identifier */
int flags; /**< @brief resource flags */
struct resource *res; /**< @brief the real resource when allocated */
- u_long start; /**< @brief start of resource range */
- u_long end; /**< @brief end of resource range */
- u_long count; /**< @brief count within range */
+ rman_res_t start; /**< @brief start of resource range */
+ rman_res_t end; /**< @brief end of resource range */
+ rman_res_t count; /**< @brief count within range */
};
STAILQ_HEAD(resource_list, resource_list_entry);
@@ -255,10 +349,10 @@ void resource_list_free(struct resource_list *rl);
struct resource_list_entry *
resource_list_add(struct resource_list *rl,
int type, int rid,
- u_long start, u_long end, u_long count);
+ rman_res_t start, rman_res_t end, rman_res_t count);
int resource_list_add_next(struct resource_list *rl,
int type,
- u_long start, u_long end, u_long count);
+ rman_res_t start, rman_res_t end, rman_res_t count);
int resource_list_busy(struct resource_list *rl,
int type, int rid);
int resource_list_reserved(struct resource_list *rl, int type, int rid);
@@ -271,17 +365,20 @@ struct resource *
resource_list_alloc(struct resource_list *rl,
device_t bus, device_t child,
int type, int *rid,
- u_long start, u_long end,
- u_long count, u_int flags);
+ rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags);
int resource_list_release(struct resource_list *rl,
device_t bus, device_t child,
int type, int rid, struct resource *res);
+int resource_list_release_active(struct resource_list *rl,
+ device_t bus, device_t child,
+ int type);
struct resource *
resource_list_reserve(struct resource_list *rl,
device_t bus, device_t child,
int type, int *rid,
- u_long start, u_long end,
- u_long count, u_int flags);
+ rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags);
int resource_list_unreserve(struct resource_list *rl,
device_t bus, device_t child,
int type, int rid);
@@ -307,12 +404,12 @@ device_t
bus_generic_add_child(device_t dev, u_int order, const char *name,
int unit);
int bus_generic_adjust_resource(device_t bus, device_t child, int type,
- struct resource *r, u_long start,
- u_long end);
+ struct resource *r, rman_res_t start,
+ rman_res_t end);
struct resource *
bus_generic_alloc_resource(device_t bus, device_t child, int type,
- int *rid, u_long start, u_long end,
- u_long count, u_int flags);
+ int *rid, rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags);
int bus_generic_attach(device_t dev);
int bus_generic_bind_intr(device_t dev, device_t child,
struct resource *irq, int cpu);
@@ -326,12 +423,22 @@ int bus_generic_deactivate_resource(device_t dev, device_t child, int type,
int rid, struct resource *r);
int bus_generic_detach(device_t dev);
void bus_generic_driver_added(device_t dev, driver_t *driver);
+int bus_generic_get_cpus(device_t dev, device_t child, enum cpu_sets op,
+ size_t setsize, struct _cpuset *cpuset);
bus_dma_tag_t
bus_generic_get_dma_tag(device_t dev, device_t child);
+bus_space_tag_t
+ bus_generic_get_bus_tag(device_t dev, device_t child);
+int bus_generic_get_domain(device_t dev, device_t child, int *domain);
struct resource_list *
bus_generic_get_resource_list (device_t, device_t);
+int bus_generic_map_resource(device_t dev, device_t child, int type,
+ struct resource *r,
+ struct resource_map_request *args,
+ struct resource_map *map);
void bus_generic_new_pass(device_t dev);
int bus_print_child_header(device_t dev, device_t child);
+int bus_print_child_domain(device_t dev, device_t child);
int bus_print_child_footer(device_t dev, device_t child);
int bus_generic_print_child(device_t dev, device_t child);
int bus_generic_probe(device_t dev);
@@ -340,6 +447,7 @@ int bus_generic_read_ivar(device_t dev, device_t child, int which,
int bus_generic_release_resource(device_t bus, device_t child,
int type, int rid, struct resource *r);
int bus_generic_resume(device_t dev);
+int bus_generic_resume_child(device_t dev, device_t child);
int bus_generic_setup_intr(device_t dev, device_t child,
struct resource *irq, int flags,
driver_filter_t *filter, driver_intr_t *intr,
@@ -347,21 +455,26 @@ int bus_generic_setup_intr(device_t dev, device_t child,
struct resource *
bus_generic_rl_alloc_resource (device_t, device_t, int, int *,
- u_long, u_long, u_long, u_int);
+ rman_res_t, rman_res_t, rman_res_t, u_int);
void bus_generic_rl_delete_resource (device_t, device_t, int, int);
-int bus_generic_rl_get_resource (device_t, device_t, int, int, u_long *,
- u_long *);
-int bus_generic_rl_set_resource (device_t, device_t, int, int, u_long,
- u_long);
+int bus_generic_rl_get_resource (device_t, device_t, int, int, rman_res_t *,
+ rman_res_t *);
+int bus_generic_rl_set_resource (device_t, device_t, int, int, rman_res_t,
+ rman_res_t);
int bus_generic_rl_release_resource (device_t, device_t, int, int,
struct resource *);
int bus_generic_shutdown(device_t dev);
int bus_generic_suspend(device_t dev);
+int bus_generic_suspend_child(device_t dev, device_t child);
int bus_generic_teardown_intr(device_t dev, device_t child,
struct resource *irq, void *cookie);
+int bus_generic_unmap_resource(device_t dev, device_t child, int type,
+ struct resource *r,
+ struct resource_map *map);
int bus_generic_write_ivar(device_t dev, device_t child, int which,
uintptr_t value);
+int bus_null_rescan(device_t dev);
/*
* Wrapper functions for the BUS_*_RESOURCE methods to make client code
@@ -380,15 +493,24 @@ void bus_release_resources(device_t dev, const struct resource_spec *rs,
struct resource **res);
int bus_adjust_resource(device_t child, int type, struct resource *r,
- u_long start, u_long end);
+ rman_res_t start, rman_res_t end);
struct resource *bus_alloc_resource(device_t dev, int type, int *rid,
- u_long start, u_long end, u_long count,
- u_int flags);
+ rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags);
int bus_activate_resource(device_t dev, int type, int rid,
struct resource *r);
int bus_deactivate_resource(device_t dev, int type, int rid,
struct resource *r);
+int bus_map_resource(device_t dev, int type, struct resource *r,
+ struct resource_map_request *args,
+ struct resource_map *map);
+int bus_unmap_resource(device_t dev, int type, struct resource *r,
+ struct resource_map *map);
+int bus_get_cpus(device_t dev, enum cpu_sets op, size_t setsize,
+ struct _cpuset *cpuset);
bus_dma_tag_t bus_get_dma_tag(device_t dev);
+bus_space_tag_t bus_get_bus_tag(device_t dev);
+int bus_get_domain(device_t dev, int *domain);
int bus_release_resource(device_t dev, int type, int rid,
struct resource *r);
int bus_free_resource(device_t dev, int type, struct resource *r);
@@ -398,13 +520,13 @@ int bus_setup_intr(device_t dev, struct resource *r, int flags,
int bus_teardown_intr(device_t dev, struct resource *r, void *cookie);
int bus_bind_intr(device_t dev, struct resource *r, int cpu);
int bus_describe_intr(device_t dev, struct resource *irq, void *cookie,
- const char *fmt, ...);
+ const char *fmt, ...) __printflike(4, 5);
int bus_set_resource(device_t dev, int type, int rid,
- u_long start, u_long count);
+ rman_res_t start, rman_res_t count);
int bus_get_resource(device_t dev, int type, int rid,
- u_long *startp, u_long *countp);
-u_long bus_get_resource_start(device_t dev, int type, int rid);
-u_long bus_get_resource_count(device_t dev, int type, int rid);
+ rman_res_t *startp, rman_res_t *countp);
+rman_res_t bus_get_resource_start(device_t dev, int type, int rid);
+rman_res_t bus_get_resource_count(device_t dev, int type, int rid);
void bus_delete_resource(device_t dev, int type, int rid);
int bus_child_present(device_t child);
int bus_child_pnpinfo_str(device_t child, char *buf, size_t buflen);
@@ -414,7 +536,14 @@ void bus_enumerate_hinted_children(device_t bus);
static __inline struct resource *
bus_alloc_resource_any(device_t dev, int type, int *rid, u_int flags)
{
- return (bus_alloc_resource(dev, type, rid, 0ul, ~0ul, 1, flags));
+ return (bus_alloc_resource(dev, type, rid, 0, ~0, 1, flags));
+}
+
+static __inline struct resource *
+bus_alloc_resource_anywhere(device_t dev, int type, int *rid,
+ rman_res_t count, u_int flags)
+{
+ return (bus_alloc_resource(dev, type, rid, 0, ~0, count, flags));
}
/*
@@ -450,7 +579,9 @@ struct sysctl_oid *device_get_sysctl_tree(device_t dev);
int device_is_alive(device_t dev); /* did probe succeed? */
int device_is_attached(device_t dev); /* did attach succeed? */
int device_is_enabled(device_t dev);
+int device_is_suspended(device_t dev);
int device_is_quiet(device_t dev);
+device_t device_lookup_by_name(const char *name);
int device_print_prettyname(device_t dev);
int device_printf(device_t dev, const char *, ...) __printflike(2, 3);
int device_probe(device_t dev);
@@ -461,6 +592,7 @@ void device_quiet(device_t dev);
void device_set_desc(device_t dev, const char* desc);
void device_set_desc_copy(device_t dev, const char* desc);
int device_set_devclass(device_t dev, const char *classname);
+int device_set_devclass_fixed(device_t dev, const char *classname);
int device_set_driver(device_t dev, driver_t *driver);
void device_set_flags(device_t dev, u_int32_t flags);
void device_set_softc(device_t dev, void *softc);
@@ -513,6 +645,8 @@ int resource_set_long(const char *name, int unit, const char *resname,
long value);
int resource_set_string(const char *name, int unit, const char *resname,
const char *value);
+int resource_unset_value(const char *name, int unit, const char *resname);
+
/*
* Functions for maintaining and checking consistency of
* bus information exported to userspace.
@@ -548,7 +682,7 @@ void bus_data_generation_update(void);
#define BUS_PROBE_DEFAULT (-20) /* Base OS default driver */
#define BUS_PROBE_LOW_PRIORITY (-40) /* Older, less desirable drivers */
#define BUS_PROBE_GENERIC (-100) /* generic driver for dev */
-#define BUS_PROBE_HOOVER (-500) /* Generic dev for all devs on bus */
+#define BUS_PROBE_HOOVER (-1000000) /* Driver for any dev on bus */
#define BUS_PROBE_NOWILDCARD (-2000000000) /* No wildcard device matches */
/**
@@ -569,6 +703,12 @@ void bus_data_generation_update(void);
#define BUS_PASS_SCHEDULER 60 /* Start scheduler. */
#define BUS_PASS_DEFAULT __INT_MAX /* Everything else. */
+#define BUS_PASS_ORDER_FIRST 0
+#define BUS_PASS_ORDER_EARLY 2
+#define BUS_PASS_ORDER_MIDDLE 5
+#define BUS_PASS_ORDER_LATE 7
+#define BUS_PASS_ORDER_LAST 9
+
extern int bus_current_pass;
void bus_set_pass(int pass);
diff --git a/freebsd/sys/sys/bus_dma.h b/freebsd/sys/sys/bus_dma.h
index 6e91a012..1a2ecd6b 100644
--- a/freebsd/sys/sys/bus_dma.h
+++ b/freebsd/sys/sys/bus_dma.h
@@ -168,7 +168,7 @@ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op);
*/
/* XXX Should probably allow specification of alignment */
int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
- bus_size_t boundary, bus_addr_t lowaddr,
+ bus_addr_t boundary, bus_addr_t lowaddr,
bus_addr_t highaddr, bus_dma_filter_t *filtfunc,
void *filtfuncarg, bus_size_t maxsize, int nsegments,
bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
@@ -240,6 +240,15 @@ int bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map,
void *callback_arg, int flags);
/*
+ * Placeholder for use by busdma implementations which do not benefit
+ * from optimized procedure to load an array of vm_page_t. Falls back
+ * to do _bus_dmamap_load_phys() in loop.
+ */
+int bus_dmamap_load_ma_triv(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
+ bus_dma_segment_t *segs, int *segp);
+
+/*
* XXX sparc64 uses the same interface, but a much different implementation.
* <machine/bus_dma.h> for the sparc64 arch contains the equivalent
* declarations.
@@ -273,13 +282,25 @@ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map);
/*
- * Perform a synchronization operation on the given map.
+ * Perform a synchronization operation on the given map. If the map
+ * is NULL we have a fully IO-coherent system. On every ARM architecture
+ * there must be a memory barrier placed to ensure that all data
+ * accesses are visible before going any further.
*/
void _bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_dmasync_op_t);
+#if defined(__arm__)
+ #define __BUS_DMAMAP_SYNC_DEFAULT mb()
+#elif defined(__aarch64__)
+ #define __BUS_DMAMAP_SYNC_DEFAULT dmb(sy)
+#else
+ #define __BUS_DMAMAP_SYNC_DEFAULT do {} while (0)
+#endif
#define bus_dmamap_sync(dmat, dmamap, op) \
do { \
if ((dmamap) != NULL) \
_bus_dmamap_sync(dmat, dmamap, op); \
+ else \
+ __BUS_DMAMAP_SYNC_DEFAULT; \
} while (0)
/*
@@ -317,6 +338,10 @@ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
vm_paddr_t paddr, bus_size_t buflen,
int flags, bus_dma_segment_t *segs, int *segp);
+int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
+ bus_dma_segment_t *segs, int *segp);
+
bus_dma_segment_t *_bus_dmamap_complete(bus_dma_tag_t dmat,
bus_dmamap_t map,
bus_dma_segment_t *segs,
diff --git a/freebsd/sys/sys/callout.h b/freebsd/sys/sys/callout.h
index 95b9a32b..f58fa587 100644
--- a/freebsd/sys/sys/callout.h
+++ b/freebsd/sys/sys/callout.h
@@ -43,45 +43,95 @@
#define CALLOUT_LOCAL_ALLOC 0x0001 /* was allocated from callfree */
#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */
#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */
-#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */
+#define CALLOUT_MPSAFE 0x0008 /* deprecated */
#define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */
#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */
#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */
+#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */
+#define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */
+
+#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */
+#define C_PRELBITS 7
+#define C_PRELRANGE ((1 << C_PRELBITS) - 1)
+#define C_PREL(x) (((x) + 1) << 1)
+#define C_PRELGET(x) (int)((((x) >> 1) & C_PRELRANGE) - 1)
+#define C_HARDCLOCK 0x0100 /* align to hardclock() calls */
+#define C_ABSOLUTE 0x0200 /* event time is absolute. */
+#define C_PRECALC 0x0400 /* event time is pre-calculated. */
struct callout_handle {
struct callout *callout;
};
-#ifdef _KERNEL
-extern int ncallout;
+/* Flags for callout_stop_safe() */
+#define CS_DRAIN 0x0001 /* callout_drain(), wait allowed */
+#define CS_EXECUTING 0x0002 /* Positive return value indicates that
+ the callout was executing */
+#ifdef _KERNEL
+/*
+ * Note the flags field is actually *two* fields. The c_flags
+ * field is the one that caller operations that may, or may not have
+ * a lock touches i.e. callout_deactivate(). The other, the c_iflags,
+ * is the internal flags that *must* be kept correct on which the
+ * callout system depend on e.g. callout_pending().
+ * The c_iflag is used internally by the callout system to determine which
+ * list the callout is on and track internal state. Callers *should not*
+ * use the c_flags field directly but should use the macros provided.
+ *
+ * The c_iflags field holds internal flags that are protected by internal
+ * locks of the callout subsystem. The c_flags field holds external flags.
+ * The caller must hold its own lock while manipulating or reading external
+ * flags via callout_active(), callout_deactivate(), callout_reset*(), or
+ * callout_stop() to avoid races.
+ */
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
-#define callout_drain(c) _callout_stop_safe(c, 1)
+#define callout_drain(c) _callout_stop_safe(c, CS_DRAIN, NULL)
void callout_init(struct callout *, int);
void _callout_init_lock(struct callout *, struct lock_object *, int);
#define callout_init_mtx(c, mtx, flags) \
_callout_init_lock((c), ((mtx) != NULL) ? &(mtx)->lock_object : \
NULL, (flags))
+#define callout_init_rm(c, rm, flags) \
+ _callout_init_lock((c), ((rm) != NULL) ? &(rm)->lock_object : \
+ NULL, (flags))
#define callout_init_rw(c, rw, flags) \
_callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \
NULL, (flags))
-#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
-int callout_reset_on(struct callout *, int, void (*)(void *), void *, int);
+#define callout_pending(c) ((c)->c_iflags & CALLOUT_PENDING)
+int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
+ void (*)(void *), void *, int, int);
+#define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \
+ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags))
+#define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \
+ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), PCPU_GET(cpuid),\
+ (flags))
+#define callout_reset_on(c, to_ticks, fn, arg, cpu) \
+ callout_reset_sbt_on((c), tick_sbt * (to_ticks), 0, (fn), (arg), \
+ (cpu), C_HARDCLOCK)
#define callout_reset(c, on_tick, fn, arg) \
- callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu)
+ callout_reset_on((c), (on_tick), (fn), (arg), -1)
#define callout_reset_curcpu(c, on_tick, fn, arg) \
callout_reset_on((c), (on_tick), (fn), (arg), PCPU_GET(cpuid))
+#define callout_schedule_sbt_on(c, sbt, pr, cpu, flags) \
+ callout_reset_sbt_on((c), (sbt), (pr), (c)->c_func, (c)->c_arg, \
+ (cpu), (flags))
+#define callout_schedule_sbt(c, sbt, pr, flags) \
+ callout_schedule_sbt_on((c), (sbt), (pr), -1, (flags))
+#define callout_schedule_sbt_curcpu(c, sbt, pr, flags) \
+ callout_schedule_sbt_on((c), (sbt), (pr), PCPU_GET(cpuid), (flags))
int callout_schedule(struct callout *, int);
int callout_schedule_on(struct callout *, int, int);
#define callout_schedule_curcpu(c, on_tick) \
callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
-#define callout_stop(c) _callout_stop_safe(c, 0)
-int _callout_stop_safe(struct callout *, int);
-void callout_tick(void);
-int callout_tickstofirst(int limit);
-extern void (*callout_new_inserted)(int cpu, int ticks);
-
+#define callout_stop(c) _callout_stop_safe(c, 0, NULL)
+int _callout_stop_safe(struct callout *, int, void (*)(void *));
+void callout_process(sbintime_t now);
+#define callout_async_drain(c, d) \
+ _callout_stop_safe(c, 0, d)
+void callout_when(sbintime_t sbt, sbintime_t precision, int flags,
+ sbintime_t *sbt_res, sbintime_t *prec_res);
#endif
#endif /* _SYS_CALLOUT_H_ */
diff --git a/freebsd/sys/sys/capability.h b/freebsd/sys/sys/capability.h
index 81446a28..8b1c229f 100644
--- a/freebsd/sys/sys/capability.h
+++ b/freebsd/sys/sys/capability.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2008-2010 Robert N. M. Watson
+ * Copyright (c) 2014 Robert N. M. Watson
* All rights reserved.
*
* This software was developed at the University of Cambridge Computer
@@ -30,180 +30,14 @@
*/
/*
- * Definitions for FreeBSD capabilities facility.
+ * Historically, the key userspace and kernel Capsicum definitions were found
+ * in this file. However, it conflicted with POSIX.1e's capability.h, so has
+ * been renamed capsicum.h. The file remains for backwards compatibility
+ * reasons as a nested include.
*/
#ifndef _SYS_CAPABILITY_H_
#define _SYS_CAPABILITY_H_
-#include <sys/cdefs.h>
-#include <sys/types.h>
-
-#include <sys/file.h>
-
-/*
- * Possible rights on capabilities.
- *
- * Notes:
- * Some system calls don't require a capability in order to perform an
- * operation on an fd. These include: close, dup, dup2.
- *
- * sendfile is authorized using CAP_READ on the file and CAP_WRITE on the
- * socket.
- *
- * mmap() and aio*() system calls will need special attention as they may
- * involve reads or writes depending a great deal on context.
- */
-
-/* General file I/O. */
-#define CAP_READ 0x0000000000000001ULL /* read/recv */
-#define CAP_WRITE 0x0000000000000002ULL /* write/send */
-#define CAP_MMAP 0x0000000000000004ULL /* mmap */
-#define CAP_MAPEXEC 0x0000000000000008ULL /* mmap(2) as exec */
-#define CAP_FEXECVE 0x0000000000000010ULL
-#define CAP_FSYNC 0x0000000000000020ULL
-#define CAP_FTRUNCATE 0x0000000000000040ULL
-#define CAP_SEEK 0x0000000000000080ULL
-
-/* VFS methods. */
-#define CAP_FCHFLAGS 0x0000000000000100ULL
-#define CAP_FCHDIR 0x0000000000000200ULL
-#define CAP_FCHMOD 0x0000000000000400ULL
-#define CAP_FCHOWN 0x0000000000000800ULL
-#define CAP_FCNTL 0x0000000000001000ULL
-#define CAP_FPATHCONF 0x0000000000002000ULL
-#define CAP_FLOCK 0x0000000000004000ULL
-#define CAP_FSCK 0x0000000000008000ULL
-#define CAP_FSTAT 0x0000000000010000ULL
-#define CAP_FSTATFS 0x0000000000020000ULL
-#define CAP_FUTIMES 0x0000000000040000ULL
-#define CAP_CREATE 0x0000000000080000ULL
-#define CAP_DELETE 0x0000000000100000ULL
-#define CAP_MKDIR 0x0000000000200000ULL
-#define CAP_RMDIR 0x0000000000400000ULL
-#define CAP_MKFIFO 0x0000000000800000ULL
-
-/* Lookups - used to constrain *at() calls. */
-#define CAP_LOOKUP 0x0000000001000000ULL
-
-/* Extended attributes. */
-#define CAP_EXTATTR_DELETE 0x0000000002000000ULL
-#define CAP_EXTATTR_GET 0x0000000004000000ULL
-#define CAP_EXTATTR_LIST 0x0000000008000000ULL
-#define CAP_EXTATTR_SET 0x0000000010000000ULL
-
-/* Access Control Lists. */
-#define CAP_ACL_CHECK 0x0000000020000000ULL
-#define CAP_ACL_DELETE 0x0000000040000000ULL
-#define CAP_ACL_GET 0x0000000080000000ULL
-#define CAP_ACL_SET 0x0000000100000000ULL
-
-/* Socket operations. */
-#define CAP_ACCEPT 0x0000000200000000ULL
-#define CAP_BIND 0x0000000400000000ULL
-#define CAP_CONNECT 0x0000000800000000ULL
-#define CAP_GETPEERNAME 0x0000001000000000ULL
-#define CAP_GETSOCKNAME 0x0000002000000000ULL
-#define CAP_GETSOCKOPT 0x0000004000000000ULL
-#define CAP_LISTEN 0x0000008000000000ULL
-#define CAP_PEELOFF 0x0000010000000000ULL
-#define CAP_SETSOCKOPT 0x0000020000000000ULL
-#define CAP_SHUTDOWN 0x0000040000000000ULL
-
-#define CAP_SOCK_ALL \
- (CAP_ACCEPT | CAP_BIND | CAP_CONNECT \
- | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT \
- | CAP_LISTEN | CAP_PEELOFF | CAP_SETSOCKOPT | CAP_SHUTDOWN)
-
-/* Mandatory Access Control. */
-#define CAP_MAC_GET 0x0000080000000000ULL
-#define CAP_MAC_SET 0x0000100000000000ULL
-
-/* Methods on semaphores. */
-#define CAP_SEM_GETVALUE 0x0000200000000000ULL
-#define CAP_SEM_POST 0x0000400000000000ULL
-#define CAP_SEM_WAIT 0x0000800000000000ULL
-
-/* kqueue events. */
-#define CAP_POLL_EVENT 0x0001000000000000ULL
-#define CAP_POST_EVENT 0x0002000000000000ULL
-
-/* Strange and powerful rights that should not be given lightly. */
-#define CAP_IOCTL 0x0004000000000000ULL
-#define CAP_TTYHOOK 0x0008000000000000ULL
-
-/* Process management via process descriptors. */
-#define CAP_PDGETPID 0x0010000000000000ULL
-#define CAP_PDWAIT 0x0020000000000000ULL
-#define CAP_PDKILL 0x0040000000000000ULL
-
-/* The mask of all valid method rights. */
-#define CAP_MASK_VALID 0x007fffffffffffffULL
-
-#ifdef _KERNEL
-
-#define IN_CAPABILITY_MODE(td) (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE)
-
-/*
- * Create a capability to wrap a file object.
- */
-int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
- int *capfd);
-
-/*
- * Unwrap a capability if its rights mask is a superset of 'rights'.
- *
- * Unwrapping a non-capability is effectively a no-op; the value of fp_cap
- * is simply copied into fpp.
- */
-int cap_funwrap(struct file *fp_cap, cap_rights_t rights,
- struct file **fpp);
-int cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights,
- u_char *maxprotp, struct file **fpp);
-
-/*
- * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to
- * extract the rights from a capability. However, this should not be used by
- * kernel code generally, instead cap_funwrap() should be used in order to
- * keep all access control in one place.
- */
-cap_rights_t cap_rights(struct file *fp_cap);
-
-#else /* !_KERNEL */
-
-__BEGIN_DECLS
-
-/*
- * cap_enter(): Cause the process to enter capability mode, which will
- * prevent it from directly accessing global namespaces. System calls will
- * be limited to process-local, process-inherited, or file descriptor
- * operations. If already in capability mode, a no-op.
- *
- * Currently, process-inherited operations are not properly handled -- in
- * particular, we're interested in things like waitpid(2), kill(2), etc,
- * being properly constrained. One possible solution is to introduce process
- * descriptors.
- */
-int cap_enter(void);
-
-/*
- * cap_getmode(): Are we in capability mode?
- */
-int cap_getmode(u_int* modep);
-
-/*
- * cap_new(): Create a new capability derived from an existing file
- * descriptor with the specified rights. If the existing file descriptor is
- * a capability, then the new rights must be a subset of the existing rights.
- */
-int cap_new(int fd, cap_rights_t rights);
-
-/*
- * cap_getrights(): Query the rights on a capability.
- */
-int cap_getrights(int fd, cap_rights_t *rightsp);
-
-__END_DECLS
-
-#endif /* !_KERNEL */
+#include <sys/capsicum.h>
#endif /* !_SYS_CAPABILITY_H_ */
diff --git a/freebsd/sys/sys/caprights.h b/freebsd/sys/sys/caprights.h
new file mode 100644
index 00000000..eb8e454f
--- /dev/null
+++ b/freebsd/sys/sys/caprights.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2013 FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Pawel Jakub Dawidek under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_CAPRIGHTS_H_
+#define _SYS_CAPRIGHTS_H_
+
+/*
+ * The top two bits in the first element of the cr_rights[] array contain
+ * total number of elements in the array - 2. This means if those two bits are
+ * equal to 0, we have 2 array elements.
+ * The top two bits in all remaining array elements should be 0.
+ * The next five bits contain array index. Only one bit is used and bit position
+ * in this five-bits range defines array index. This means there can be at most
+ * five array elements.
+ */
+#define CAP_RIGHTS_VERSION_00 0
+/*
+#define CAP_RIGHTS_VERSION_01 1
+#define CAP_RIGHTS_VERSION_02 2
+#define CAP_RIGHTS_VERSION_03 3
+*/
+#define CAP_RIGHTS_VERSION CAP_RIGHTS_VERSION_00
+
+struct cap_rights {
+ uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
+};
+
+#ifndef _CAP_RIGHTS_T_DECLARED
+#define _CAP_RIGHTS_T_DECLARED
+typedef struct cap_rights cap_rights_t;
+#endif
+
+#endif /* !_SYS_CAPRIGHTS_H_ */
diff --git a/freebsd/sys/sys/capsicum.h b/freebsd/sys/sys/capsicum.h
new file mode 100644
index 00000000..af4da032
--- /dev/null
+++ b/freebsd/sys/sys/capsicum.h
@@ -0,0 +1,450 @@
+/*-
+ * Copyright (c) 2008-2010, 2015 Robert N. M. Watson
+ * Copyright (c) 2012 FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed at the University of Cambridge Computer
+ * Laboratory with support from a grant from Google, Inc.
+ *
+ * Portions of this software were developed by Pawel Jakub Dawidek under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Definitions for FreeBSD capabilities facility.
+ */
+#ifndef _SYS_CAPSICUM_H_
+#define _SYS_CAPSICUM_H_
+
+#include <sys/cdefs.h>
+#include <rtems/bsd/sys/param.h>
+
+#include <sys/caprights.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+
+#ifndef _KERNEL
+#include <stdbool.h>
+#endif
+
+#define CAPRIGHT(idx, bit) ((1ULL << (57 + (idx))) | (bit))
+
+/*
+ * Possible rights on capabilities.
+ *
+ * Notes:
+ * Some system calls don't require a capability in order to perform an
+ * operation on an fd. These include: close, dup, dup2.
+ *
+ * sendfile is authorized using CAP_READ on the file and CAP_WRITE on the
+ * socket.
+ *
+ * mmap() and aio*() system calls will need special attention as they may
+ * involve reads or writes depending a great deal on context.
+ */
+
+/* INDEX 0 */
+
+/*
+ * General file I/O.
+ */
+/* Allows for openat(O_RDONLY), read(2), readv(2). */
+#define CAP_READ CAPRIGHT(0, 0x0000000000000001ULL)
+/* Allows for openat(O_WRONLY | O_APPEND), write(2), writev(2). */
+#define CAP_WRITE CAPRIGHT(0, 0x0000000000000002ULL)
+/* Allows for lseek(fd, 0, SEEK_CUR). */
+#define CAP_SEEK_TELL CAPRIGHT(0, 0x0000000000000004ULL)
+/* Allows for lseek(2). */
+#define CAP_SEEK (CAP_SEEK_TELL | 0x0000000000000008ULL)
+/* Allows for aio_read(2), pread(2), preadv(2). */
+#define CAP_PREAD (CAP_SEEK | CAP_READ)
+/*
+ * Allows for aio_write(2), openat(O_WRONLY) (without O_APPEND), pwrite(2),
+ * pwritev(2).
+ */
+#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
+/* Allows for mmap(PROT_NONE). */
+#define CAP_MMAP CAPRIGHT(0, 0x0000000000000010ULL)
+/* Allows for mmap(PROT_READ). */
+#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
+/* Allows for mmap(PROT_WRITE). */
+#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
+/* Allows for mmap(PROT_EXEC). */
+#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000020ULL)
+/* Allows for mmap(PROT_READ | PROT_WRITE). */
+#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
+/* Allows for mmap(PROT_READ | PROT_EXEC). */
+#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
+/* Allows for mmap(PROT_WRITE | PROT_EXEC). */
+#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
+/* Allows for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). */
+#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
+/* Allows for openat(O_CREAT). */
+#define CAP_CREATE CAPRIGHT(0, 0x0000000000000040ULL)
+/* Allows for openat(O_EXEC) and fexecve(2) in turn. */
+#define CAP_FEXECVE CAPRIGHT(0, 0x0000000000000080ULL)
+/* Allows for openat(O_SYNC), openat(O_FSYNC), fsync(2), aio_fsync(2). */
+#define CAP_FSYNC CAPRIGHT(0, 0x0000000000000100ULL)
+/* Allows for openat(O_TRUNC), ftruncate(2). */
+#define CAP_FTRUNCATE CAPRIGHT(0, 0x0000000000000200ULL)
+
+/* Lookups - used to constrain *at() calls. */
+#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
+
+/* VFS methods. */
+/* Allows for fchdir(2). */
+#define CAP_FCHDIR CAPRIGHT(0, 0x0000000000000800ULL)
+/* Allows for fchflags(2). */
+#define CAP_FCHFLAGS CAPRIGHT(0, 0x0000000000001000ULL)
+/* Allows for fchflags(2) and chflagsat(2). */
+#define CAP_CHFLAGSAT (CAP_FCHFLAGS | CAP_LOOKUP)
+/* Allows for fchmod(2). */
+#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
+/* Allows for fchmod(2) and fchmodat(2). */
+#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
+/* Allows for fchown(2). */
+#define CAP_FCHOWN CAPRIGHT(0, 0x0000000000004000ULL)
+/* Allows for fchown(2) and fchownat(2). */
+#define CAP_FCHOWNAT (CAP_FCHOWN | CAP_LOOKUP)
+/* Allows for fcntl(2). */
+#define CAP_FCNTL CAPRIGHT(0, 0x0000000000008000ULL)
+/*
+ * Allows for flock(2), openat(O_SHLOCK), openat(O_EXLOCK),
+ * fcntl(F_SETLK_REMOTE), fcntl(F_SETLKW), fcntl(F_SETLK), fcntl(F_GETLK).
+ */
+#define CAP_FLOCK CAPRIGHT(0, 0x0000000000010000ULL)
+/* Allows for fpathconf(2). */
+#define CAP_FPATHCONF CAPRIGHT(0, 0x0000000000020000ULL)
+/* Allows for UFS background-fsck operations. */
+#define CAP_FSCK CAPRIGHT(0, 0x0000000000040000ULL)
+/* Allows for fstat(2). */
+#define CAP_FSTAT CAPRIGHT(0, 0x0000000000080000ULL)
+/* Allows for fstat(2), fstatat(2) and faccessat(2). */
+#define CAP_FSTATAT (CAP_FSTAT | CAP_LOOKUP)
+/* Allows for fstatfs(2). */
+#define CAP_FSTATFS CAPRIGHT(0, 0x0000000000100000ULL)
+/* Allows for futimens(2) and futimes(2). */
+#define CAP_FUTIMES CAPRIGHT(0, 0x0000000000200000ULL)
+/* Allows for futimens(2), futimes(2), futimesat(2) and utimensat(2). */
+#define CAP_FUTIMESAT (CAP_FUTIMES | CAP_LOOKUP)
+/* Allows for linkat(2) (target directory descriptor). */
+#define CAP_LINKAT_TARGET (CAP_LOOKUP | 0x0000000000400000ULL)
+/* Allows for mkdirat(2). */
+#define CAP_MKDIRAT (CAP_LOOKUP | 0x0000000000800000ULL)
+/* Allows for mkfifoat(2). */
+#define CAP_MKFIFOAT (CAP_LOOKUP | 0x0000000001000000ULL)
+/* Allows for mknodat(2). */
+#define CAP_MKNODAT (CAP_LOOKUP | 0x0000000002000000ULL)
+/* Allows for renameat(2) (source directory descriptor). */
+#define CAP_RENAMEAT_SOURCE (CAP_LOOKUP | 0x0000000004000000ULL)
+/* Allows for symlinkat(2). */
+#define CAP_SYMLINKAT (CAP_LOOKUP | 0x0000000008000000ULL)
+/*
+ * Allows for unlinkat(2) and renameat(2) if destination object exists and
+ * will be removed.
+ */
+#define CAP_UNLINKAT (CAP_LOOKUP | 0x0000000010000000ULL)
+
+/* Socket operations. */
+/* Allows for accept(2) and accept4(2). */
+#define CAP_ACCEPT CAPRIGHT(0, 0x0000000020000000ULL)
+/* Allows for bind(2). */
+#define CAP_BIND CAPRIGHT(0, 0x0000000040000000ULL)
+/* Allows for connect(2). */
+#define CAP_CONNECT CAPRIGHT(0, 0x0000000080000000ULL)
+/* Allows for getpeername(2). */
+#define CAP_GETPEERNAME CAPRIGHT(0, 0x0000000100000000ULL)
+/* Allows for getsockname(2). */
+#define CAP_GETSOCKNAME CAPRIGHT(0, 0x0000000200000000ULL)
+/* Allows for getsockopt(2). */
+#define CAP_GETSOCKOPT CAPRIGHT(0, 0x0000000400000000ULL)
+/* Allows for listen(2). */
+#define CAP_LISTEN CAPRIGHT(0, 0x0000000800000000ULL)
+/* Allows for sctp_peeloff(2). */
+#define CAP_PEELOFF CAPRIGHT(0, 0x0000001000000000ULL)
+#define CAP_RECV CAP_READ
+#define CAP_SEND CAP_WRITE
+/* Allows for setsockopt(2). */
+#define CAP_SETSOCKOPT CAPRIGHT(0, 0x0000002000000000ULL)
+/* Allows for shutdown(2). */
+#define CAP_SHUTDOWN CAPRIGHT(0, 0x0000004000000000ULL)
+
+/* Allows for bindat(2) on a directory descriptor. */
+#define CAP_BINDAT (CAP_LOOKUP | 0x0000008000000000ULL)
+/* Allows for connectat(2) on a directory descriptor. */
+#define CAP_CONNECTAT (CAP_LOOKUP | 0x0000010000000000ULL)
+
+/* Allows for linkat(2) (source directory descriptor). */
+#define CAP_LINKAT_SOURCE (CAP_LOOKUP | 0x0000020000000000ULL)
+/* Allows for renameat(2) (target directory descriptor). */
+#define CAP_RENAMEAT_TARGET (CAP_LOOKUP | 0x0000040000000000ULL)
+
+#define CAP_SOCK_CLIENT \
+ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
+ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
+#define CAP_SOCK_SERVER \
+ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
+ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
+ CAP_SETSOCKOPT | CAP_SHUTDOWN)
+
+/* All used bits for index 0. */
+#define CAP_ALL0 CAPRIGHT(0, 0x000007FFFFFFFFFFULL)
+
+/* Available bits for index 0. */
+#define CAP_UNUSED0_44 CAPRIGHT(0, 0x0000080000000000ULL)
+/* ... */
+#define CAP_UNUSED0_57 CAPRIGHT(0, 0x0100000000000000ULL)
+
+/* INDEX 1 */
+
+/* Mandatory Access Control. */
+/* Allows for mac_get_fd(3). */
+#define CAP_MAC_GET CAPRIGHT(1, 0x0000000000000001ULL)
+/* Allows for mac_set_fd(3). */
+#define CAP_MAC_SET CAPRIGHT(1, 0x0000000000000002ULL)
+
+/* Methods on semaphores. */
+#define CAP_SEM_GETVALUE CAPRIGHT(1, 0x0000000000000004ULL)
+#define CAP_SEM_POST CAPRIGHT(1, 0x0000000000000008ULL)
+#define CAP_SEM_WAIT CAPRIGHT(1, 0x0000000000000010ULL)
+
+/* Allows select(2) and poll(2) on descriptor. */
+#define CAP_EVENT CAPRIGHT(1, 0x0000000000000020ULL)
+/* Allows for kevent(2) on kqueue descriptor with eventlist != NULL. */
+#define CAP_KQUEUE_EVENT CAPRIGHT(1, 0x0000000000000040ULL)
+
+/* Strange and powerful rights that should not be given lightly. */
+/* Allows for ioctl(2). */
+#define CAP_IOCTL CAPRIGHT(1, 0x0000000000000080ULL)
+#define CAP_TTYHOOK CAPRIGHT(1, 0x0000000000000100ULL)
+
+/* Process management via process descriptors. */
+/* Allows for pdgetpid(2). */
+#define CAP_PDGETPID CAPRIGHT(1, 0x0000000000000200ULL)
+/* Allows for pdwait4(2). */
+#define CAP_PDWAIT CAPRIGHT(1, 0x0000000000000400ULL)
+/* Allows for pdkill(2). */
+#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
+
+/* Extended attributes. */
+/* Allows for extattr_delete_fd(2). */
+#define CAP_EXTATTR_DELETE CAPRIGHT(1, 0x0000000000001000ULL)
+/* Allows for extattr_get_fd(2). */
+#define CAP_EXTATTR_GET CAPRIGHT(1, 0x0000000000002000ULL)
+/* Allows for extattr_list_fd(2). */
+#define CAP_EXTATTR_LIST CAPRIGHT(1, 0x0000000000004000ULL)
+/* Allows for extattr_set_fd(2). */
+#define CAP_EXTATTR_SET CAPRIGHT(1, 0x0000000000008000ULL)
+
+/* Access Control Lists. */
+/* Allows for acl_valid_fd_np(3). */
+#define CAP_ACL_CHECK CAPRIGHT(1, 0x0000000000010000ULL)
+/* Allows for acl_delete_fd_np(3). */
+#define CAP_ACL_DELETE CAPRIGHT(1, 0x0000000000020000ULL)
+/* Allows for acl_get_fd(3) and acl_get_fd_np(3). */
+#define CAP_ACL_GET CAPRIGHT(1, 0x0000000000040000ULL)
+/* Allows for acl_set_fd(3) and acl_set_fd_np(3). */
+#define CAP_ACL_SET CAPRIGHT(1, 0x0000000000080000ULL)
+
+/* Allows for kevent(2) on kqueue descriptor with changelist != NULL. */
+#define CAP_KQUEUE_CHANGE CAPRIGHT(1, 0x0000000000100000ULL)
+
+#define CAP_KQUEUE (CAP_KQUEUE_EVENT | CAP_KQUEUE_CHANGE)
+
+/* All used bits for index 1. */
+#define CAP_ALL1 CAPRIGHT(1, 0x00000000001FFFFFULL)
+
+/* Available bits for index 1. */
+#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000200000ULL)
+/* ... */
+#define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL)
+
+/* Backward compatibility. */
+#define CAP_POLL_EVENT CAP_EVENT
+
+#define CAP_ALL(rights) do { \
+ (rights)->cr_rights[0] = \
+ ((uint64_t)CAP_RIGHTS_VERSION << 62) | CAP_ALL0; \
+ (rights)->cr_rights[1] = CAP_ALL1; \
+} while (0)
+
+#define CAP_NONE(rights) do { \
+ (rights)->cr_rights[0] = \
+ ((uint64_t)CAP_RIGHTS_VERSION << 62) | CAPRIGHT(0, 0ULL); \
+ (rights)->cr_rights[1] = CAPRIGHT(1, 0ULL); \
+} while (0)
+
+#define CAPRVER(right) ((int)((right) >> 62))
+#define CAPVER(rights) CAPRVER((rights)->cr_rights[0])
+#define CAPARSIZE(rights) (CAPVER(rights) + 2)
+#define CAPIDXBIT(right) ((int)(((right) >> 57) & 0x1F))
+
+/*
+ * Allowed fcntl(2) commands.
+ */
+#define CAP_FCNTL_GETFL (1 << F_GETFL)
+#define CAP_FCNTL_SETFL (1 << F_SETFL)
+#define CAP_FCNTL_GETOWN (1 << F_GETOWN)
+#define CAP_FCNTL_SETOWN (1 << F_SETOWN)
+#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL | \
+ CAP_FCNTL_GETOWN | CAP_FCNTL_SETOWN)
+
+#define CAP_IOCTLS_ALL SSIZE_MAX
+
+__BEGIN_DECLS
+
+#ifndef __rtems__
+#define cap_rights_init(...) \
+ __cap_rights_init(CAP_RIGHTS_VERSION, __VA_ARGS__, 0ULL)
+cap_rights_t *__cap_rights_init(int version, cap_rights_t *rights, ...);
+
+#define cap_rights_set(...) \
+ __cap_rights_set(__VA_ARGS__, 0ULL)
+cap_rights_t *__cap_rights_set(cap_rights_t *rights, ...);
+
+#define cap_rights_clear(...) \
+ __cap_rights_clear(__VA_ARGS__, 0ULL)
+cap_rights_t *__cap_rights_clear(cap_rights_t *rights, ...);
+#else /* __rtems__ */
+#define cap_rights_init(...) (void)0
+#define cap_rights_set(...) (void)0
+#define cap_rights_clear(...) (void)0
+#endif /* __rtems__ */
+
+#define cap_rights_is_set(...) \
+ __cap_rights_is_set(__VA_ARGS__, 0ULL)
+bool __cap_rights_is_set(const cap_rights_t *rights, ...);
+
+bool cap_rights_is_valid(const cap_rights_t *rights);
+cap_rights_t *cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
+cap_rights_t *cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
+bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
+
+__END_DECLS
+
+#ifdef _KERNEL
+
+#include <sys/systm.h>
+
+#define IN_CAPABILITY_MODE(td) (((td)->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0)
+
+struct filedesc;
+struct filedescent;
+
+/*
+ * Test whether a capability grants the requested rights.
+ */
+int cap_check(const cap_rights_t *havep, const cap_rights_t *needp);
+/*
+ * Convert capability rights into VM access flags.
+ */
+u_char cap_rights_to_vmprot(cap_rights_t *havep);
+
+/*
+ * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to
+ * extract the rights from a capability.
+ */
+cap_rights_t *cap_rights_fde(struct filedescent *fde);
+cap_rights_t *cap_rights(struct filedesc *fdp, int fd);
+
+int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd);
+int cap_fcntl_check_fde(struct filedescent *fde, int cmd);
+int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd);
+
+#else /* !_KERNEL */
+
+__BEGIN_DECLS
+/*
+ * cap_enter(): Cause the process to enter capability mode, which will
+ * prevent it from directly accessing global namespaces. System calls will
+ * be limited to process-local, process-inherited, or file descriptor
+ * operations. If already in capability mode, a no-op.
+ */
+#ifndef __rtems__
+int cap_enter(void);
+#else /* __rtems__ */
+static inline int
+cap_enter(void)
+{
+
+ return (0);
+}
+#endif /* __rtems__ */
+
+/*
+ * Are we sandboxed (in capability mode)?
+ * This is a libc wrapper around the cap_getmode(2) system call.
+ */
+bool cap_sandboxed(void);
+
+/*
+ * cap_getmode(): Are we in capability mode?
+ */
+int cap_getmode(u_int *modep);
+
+/*
+ * Limits capability rights for the given descriptor (CAP_*).
+ */
+#ifndef __rtems__
+int cap_rights_limit(int fd, const cap_rights_t *rights);
+#else /* __rtems__ */
+static inline int
+cap_rights_limit(int fd, const cap_rights_t *rights)
+{
+
+ return (0);
+}
+#endif /* __rtems__ */
+/*
+ * Returns capability rights for the given descriptor.
+ */
+#define cap_rights_get(fd, rights) \
+ __cap_rights_get(CAP_RIGHTS_VERSION, (fd), (rights))
+int __cap_rights_get(int version, int fd, cap_rights_t *rights);
+/*
+ * Limits allowed ioctls for the given descriptor.
+ */
+int cap_ioctls_limit(int fd, const cap_ioctl_t *cmds, size_t ncmds);
+/*
+ * Returns array of allowed ioctls for the given descriptor.
+ * If all ioctls are allowed, the cmds array is not populated and
+ * the function returns CAP_IOCTLS_ALL.
+ */
+ssize_t cap_ioctls_get(int fd, cap_ioctl_t *cmds, size_t maxcmds);
+/*
+ * Limits allowed fcntls for the given descriptor (CAP_FCNTL_*).
+ */
+int cap_fcntls_limit(int fd, uint32_t fcntlrights);
+/*
+ * Returns bitmask of allowed fcntls for the given descriptor.
+ */
+int cap_fcntls_get(int fd, uint32_t *fcntlrightsp);
+
+__END_DECLS
+
+#endif /* !_KERNEL */
+
+#endif /* !_SYS_CAPSICUM_H_ */
diff --git a/freebsd/sys/sys/condvar.h b/freebsd/sys/sys/condvar.h
index 2efe469e..c4666694 100644
--- a/freebsd/sys/sys/condvar.h
+++ b/freebsd/sys/sys/condvar.h
@@ -55,8 +55,10 @@ void cv_destroy(struct cv *cvp);
void _cv_wait(struct cv *cvp, struct lock_object *lock);
void _cv_wait_unlock(struct cv *cvp, struct lock_object *lock);
int _cv_wait_sig(struct cv *cvp, struct lock_object *lock);
-int _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo);
-int _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo);
+int _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock,
+ sbintime_t sbt, sbintime_t pr, int flags);
+int _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
+ sbintime_t sbt, sbintime_t pr, int flags);
void cv_signal(struct cv *cvp);
void cv_broadcastpri(struct cv *cvp, int pri);
@@ -68,13 +70,22 @@ void cv_broadcastpri(struct cv *cvp, int pri);
#define cv_wait_sig(cvp, lock) \
_cv_wait_sig((cvp), &(lock)->lock_object)
#define cv_timedwait(cvp, lock, timo) \
- _cv_timedwait((cvp), &(lock)->lock_object, (timo))
+ _cv_timedwait_sbt((cvp), &(lock)->lock_object, \
+ tick_sbt * (timo), 0, C_HARDCLOCK)
+#define cv_timedwait_sbt(cvp, lock, sbt, pr, flags) \
+ _cv_timedwait_sbt((cvp), &(lock)->lock_object, (sbt), (pr), (flags))
#ifndef __rtems__
#define cv_timedwait_sig(cvp, lock, timo) \
- _cv_timedwait_sig((cvp), &(lock)->lock_object, (timo))
+ _cv_timedwait_sig_sbt((cvp), &(lock)->lock_object, \
+ tick_sbt * (timo), 0, C_HARDCLOCK)
+#define cv_timedwait_sig_sbt(cvp, lock, sbt, pr, flags) \
+ _cv_timedwait_sig_sbt((cvp), &(lock)->lock_object, (sbt), (pr), (flags))
#else /* __rtems__ */
#define cv_timedwait_sig(cvp, lock, timo) \
- _cv_timedwait((cvp), &(lock)->lock_object, (timo))
+ _cv_timedwait_sbt((cvp), &(lock)->lock_object, \
+ tick_sbt * (timo), 0, C_HARDCLOCK)
+#define cv_timedwait_sig_sbt(cvp, lock, sbt, pr, flags) \
+ _cv_timedwait_sbt((cvp), &(lock)->lock_object, (sbt), (pr), (flags))
#endif /* __rtems__ */
#define cv_broadcast(cvp) cv_broadcastpri(cvp, 0)
diff --git a/freebsd/sys/sys/conf.h b/freebsd/sys/sys/conf.h
index 9d8b7dcf..78bb1e2a 100644
--- a/freebsd/sys/sys/conf.h
+++ b/freebsd/sys/sys/conf.h
@@ -58,20 +58,18 @@ extern const char rtems_cdev_directory[sizeof(RTEMS_CDEV_DIRECTORY)];
struct cdev {
#ifndef __rtems__
- struct mount *si_mountpt;
+ void *si_spare0;
#endif /* __rtems__ */
u_int si_flags;
#define SI_ETERNAL 0x0001 /* never destroyed */
-#define SI_ALIAS 0x0002 /* carrier of alias name */
-#define SI_NAMED 0x0004 /* make_dev{_alias} has been called */
-#define SI_CHEAPCLONE 0x0008 /* can be removed_dev'ed when vnode reclaims */
-#define SI_CHILD 0x0010 /* child of another struct cdev **/
-#define SI_DEVOPEN 0x0020 /* opened by device */
-#define SI_CONSOPEN 0x0040 /* opened by console */
-#define SI_DUMPDEV 0x0080 /* is kernel dumpdev */
-#define SI_CANDELETE 0x0100 /* can do BIO_DELETE */
-#define SI_CLONELIST 0x0200 /* on a clone list */
+#define SI_ALIAS 0x0002 /* carrier of alias name */
+#define SI_NAMED 0x0004 /* make_dev{_alias} has been called */
+#define SI_CHEAPCLONE 0x0008 /* can be removed_dev'ed when vnode reclaims */
+#define SI_CHILD 0x0010 /* child of another struct cdev **/
+#define SI_DUMPDEV 0x0080 /* is kernel dumpdev */
+#define SI_CLONELIST 0x0200 /* on a clone list */
#define SI_UNMAPPED 0x0400 /* can handle unmapped I/O */
+#define SI_NOSPLIT 0x0800 /* I/O should not be split up */
#ifndef __rtems__
struct timespec si_atime;
struct timespec si_ctime;
@@ -89,8 +87,8 @@ struct cdev {
LIST_HEAD(, cdev) si_children;
LIST_ENTRY(cdev) si_siblings;
struct cdev *si_parent;
+ struct mount *si_mountpt;
#endif /* __rtems__ */
- char *si_name;
void *si_drv1, *si_drv2;
struct cdevsw *si_devsw;
#ifndef __rtems__
@@ -102,22 +100,14 @@ struct cdev {
union {
struct snapdata *__sid_snapdata;
} __si_u;
- char __si_namebuf[SPECNAMELEN + 1];
-#else /* __rtems__ */
- struct {
- /* Keep this two together. They will be used as one string. */
- char __si_dir[sizeof(rtems_cdev_directory) - 1];
- char __si_name[SPECNAMELEN + 1];
- } __si_pathstruct;
#endif /* __rtems__ */
- char __si_namebuf[SPECNAMELEN + 1];
-};
-
#ifdef __rtems__
-#define __si_namebuf __si_pathstruct.__si_name
-#define si_path __si_pathstruct.__si_dir
+ char si_path[sizeof(RTEMS_CDEV_DIRECTORY) - 1];
#endif /* __rtems__ */
-#define si_snapdata __si_u.__sid_snapdata
+ char si_name[SPECNAMELEN + 1];
+};
+
+#define si_snapdata __si_u.__sid_snapdata
#ifdef _KERNEL
@@ -134,24 +124,6 @@ struct clonedevs;
struct vm_object;
struct vnode;
-/*
- * Note: d_thread_t is provided as a transition aid for those drivers
- * that treat struct proc/struct thread as an opaque data type and
- * exist in substantially the same form in both 4.x and 5.x. Writers
- * of drivers that dips into the d_thread_t structure should use
- * struct thread or struct proc as appropriate for the version of the
- * OS they are using. It is provided in lieu of each device driver
- * inventing its own way of doing this. While it does violate style(9)
- * in a number of ways, this violation is deemed to be less
- * important than the benefits that a uniform API between releases
- * gives.
- *
- * Users of struct thread/struct proc that aren't device drivers should
- * not use d_thread_t.
- */
-
-typedef struct thread d_thread_t;
-
typedef int d_open_t(struct cdev *dev, int oflags, int devtype, struct thread *td);
typedef int d_fdopen_t(struct cdev *dev, int oflags, struct thread *td, struct file *fp);
typedef int d_close_t(struct cdev *dev, int fflag, int devtype, struct thread *td);
@@ -184,9 +156,9 @@ typedef int dumper_t(
#define D_TAPE 0x0001
#define D_DISK 0x0002
#define D_TTY 0x0004
-#define D_MEM 0x0008
+#define D_MEM 0x0008 /* /dev/(k)mem */
-#ifdef _KERNEL
+#ifdef _KERNEL
#define D_TYPEMASK 0xffff
@@ -194,25 +166,23 @@ typedef int dumper_t(
* Flags for d_flags which the drivers can set.
*/
#define D_TRACKCLOSE 0x00080000 /* track all closes */
-#define D_MMAP_ANON 0x00100000 /* special treatment in vm_mmap.c */
-#define D_PSEUDO 0x00200000 /* make_dev() can return NULL */
-#define D_NEEDGIANT 0x00400000 /* driver want Giant */
+#define D_MMAP_ANON 0x00100000 /* special treatment in vm_mmap.c */
+#define D_NEEDGIANT 0x00400000 /* driver want Giant */
#define D_NEEDMINOR 0x00800000 /* driver uses clone_create() */
-#define D_UNMAPPED_IO 0x01000000 /* d_strategy can accept unmapped IO */
/*
* Version numbers.
*/
-#define D_VERSION_00 0x20011966
-#define D_VERSION_01 0x17032005 /* Add d_uid,gid,mode & kind */
-#define D_VERSION_02 0x28042009 /* Add d_mmap_single */
-#define D_VERSION_03 0x17122009 /* d_mmap takes memattr,vm_ooffset_t */
-#define D_VERSION D_VERSION_03
+#define D_VERSION_00 0x20011966
+#define D_VERSION_01 0x17032005 /* Add d_uid,gid,mode & kind */
+#define D_VERSION_02 0x28042009 /* Add d_mmap_single */
+#define D_VERSION_03 0x17122009 /* d_mmap takes memattr,vm_ooffset_t */
+#define D_VERSION D_VERSION_03
/*
* Flags used for internal housekeeping
*/
-#define D_INIT 0x80000000 /* cdevsw initialized */
+#define D_INIT 0x80000000 /* cdevsw initialized */
/*
* Character device switch table
@@ -265,21 +235,45 @@ struct devsw_module_data {
/* Do not initialize fields hereafter */
};
-#define DEV_MODULE(name, evh, arg) \
+#define DEV_MODULE_ORDERED(name, evh, arg, ord) \
static moduledata_t name##_mod = { \
#name, \
evh, \
arg \
}; \
-DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE)
+DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, ord)
+#define DEV_MODULE(name, evh, arg) \
+ DEV_MODULE_ORDERED(name, evh, arg, SI_ORDER_MIDDLE)
void clone_setup(struct clonedevs **cdp);
void clone_cleanup(struct clonedevs **);
-#define CLONE_UNITMASK 0xfffff
-#define CLONE_FLAG0 (CLONE_UNITMASK + 1)
+#define CLONE_UNITMASK 0xfffff
+#define CLONE_FLAG0 (CLONE_UNITMASK + 1)
int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra);
+#define MAKEDEV_REF 0x01
+#define MAKEDEV_WHTOUT 0x02
+#define MAKEDEV_NOWAIT 0x04
+#define MAKEDEV_WAITOK 0x08
+#define MAKEDEV_ETERNAL 0x10
+#define MAKEDEV_CHECKNAME 0x20
+struct make_dev_args {
+ size_t mda_size;
+ int mda_flags;
+ struct cdevsw *mda_devsw;
+ struct ucred *mda_cr;
+ uid_t mda_uid;
+ gid_t mda_gid;
+ int mda_mode;
+ int mda_unit;
+ void *mda_si_drv1;
+ void *mda_si_drv2;
+};
+void make_dev_args_init_impl(struct make_dev_args *_args, size_t _sz);
+#define make_dev_args_init(a) \
+ make_dev_args_init_impl((a), sizeof(struct make_dev_args))
+
int count_dev(struct cdev *_dev);
void delist_dev(struct cdev *_dev);
void destroy_dev(struct cdev *_dev);
@@ -294,19 +288,11 @@ void dev_depends(struct cdev *_pdev, struct cdev *_cdev);
void dev_ref(struct cdev *dev);
void dev_refl(struct cdev *dev);
void dev_rel(struct cdev *dev);
-void dev_strategy(struct cdev *dev, struct buf *bp);
-void dev_strategy_csw(struct cdev *dev, struct cdevsw *csw, struct buf *bp);
struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid,
int _perms, const char *_fmt, ...) __printflike(6, 7);
struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms,
const char *_fmt, ...) __printflike(7, 8);
-#define MAKEDEV_REF 0x01
-#define MAKEDEV_WHTOUT 0x02
-#define MAKEDEV_NOWAIT 0x04
-#define MAKEDEV_WAITOK 0x08
-#define MAKEDEV_ETERNAL 0x10
-#define MAKEDEV_CHECKNAME 0x20
struct cdev *make_dev_credf(int _flags,
struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,
@@ -314,13 +300,15 @@ struct cdev *make_dev_credf(int _flags,
int make_dev_p(int _flags, struct cdev **_cdev, struct cdevsw *_devsw,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,
const char *_fmt, ...) __printflike(8, 9);
+int make_dev_s(struct make_dev_args *_args, struct cdev **_cdev,
+ const char *_fmt, ...) __printflike(3, 4);
struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...)
__printflike(2, 3);
int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev,
const char *_fmt, ...) __printflike(4, 5);
int make_dev_physpath_alias(int _flags, struct cdev **_cdev,
- struct cdev *_pdev, struct cdev *_old_alias,
- const char *_physpath);
+ struct cdev *_pdev, struct cdev *_old_alias,
+ const char *_physpath);
void dev_lock(void);
void dev_unlock(void);
void setconf(void);
@@ -333,11 +321,10 @@ void setconf(void);
#define dev2unit(d) ((d)->si_drv0)
-typedef void (*cdevpriv_dtr_t)(void *data);
+typedef void d_priv_dtor_t(void *data);
int devfs_get_cdevpriv(void **datap);
-int devfs_set_cdevpriv(void *priv, cdevpriv_dtr_t dtr);
+int devfs_set_cdevpriv(void *priv, d_priv_dtor_t *dtr);
void devfs_clear_cdevpriv(void);
-void devfs_fpdrop(struct file *fp); /* XXX This is not public KPI */
ino_t devfs_alloc_cdp_inode(void);
void devfs_free_cdp_inode(ino_t ino);
@@ -353,6 +340,7 @@ void devfs_free_cdp_inode(ino_t ino);
#define GID_OPERATOR 5
#define GID_BIN 7
#define GID_GAMES 13
+#define GID_VIDEO 44
#define GID_DIALER 68
#define GID_NOBODY 65534
@@ -366,16 +354,18 @@ EVENTHANDLER_DECLARE(dev_clone, dev_clone_fn);
struct dumperinfo {
dumper_t *dumper; /* Dumping function. */
- void *priv; /* Private parts. */
- u_int blocksize; /* Size of block in bytes. */
+ void *priv; /* Private parts. */
+ u_int blocksize; /* Size of block in bytes. */
u_int maxiosize; /* Max size allowed for an individual I/O */
- off_t mediaoffset; /* Initial offset in bytes. */
- off_t mediasize; /* Space available in bytes. */
+ off_t mediaoffset; /* Initial offset in bytes. */
+ off_t mediasize; /* Space available in bytes. */
+ void *blockbuf; /* Buffer for padding shorter dump blocks */
};
-int set_dumper(struct dumperinfo *);
+int set_dumper(struct dumperinfo *, const char *_devname, struct thread *td);
int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t);
-void dumpsys(struct dumperinfo *);
+int dump_write_pad(struct dumperinfo *, void *, vm_offset_t, off_t, size_t,
+ size_t *);
int doadump(boolean_t);
#ifndef __rtems__
extern int dumping; /* system is dumping */
diff --git a/freebsd/sys/sys/counter.h b/freebsd/sys/sys/counter.h
new file mode 100644
index 00000000..2ce71341
--- /dev/null
+++ b/freebsd/sys/sys/counter.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __SYS_COUNTER_H__
+#define __SYS_COUNTER_H__
+
+typedef uint64_t *counter_u64_t;
+
+#ifdef _KERNEL
+#include <machine/counter.h>
+
+counter_u64_t counter_u64_alloc(int);
+void counter_u64_free(counter_u64_t);
+
+void counter_u64_zero(counter_u64_t);
+uint64_t counter_u64_fetch(counter_u64_t);
+
+#define COUNTER_ARRAY_ALLOC(a, n, wait) do { \
+ for (int i = 0; i < (n); i++) \
+ (a)[i] = counter_u64_alloc(wait); \
+} while (0)
+
+#define COUNTER_ARRAY_FREE(a, n) do { \
+ for (int i = 0; i < (n); i++) \
+ counter_u64_free((a)[i]); \
+} while (0)
+
+#define COUNTER_ARRAY_COPY(a, dstp, n) do { \
+ for (int i = 0; i < (n); i++) \
+ ((uint64_t *)(dstp))[i] = counter_u64_fetch((a)[i]);\
+} while (0)
+
+#define COUNTER_ARRAY_ZERO(a, n) do { \
+ for (int i = 0; i < (n); i++) \
+ counter_u64_zero((a)[i]); \
+} while (0)
+#endif /* _KERNEL */
+#endif /* ! __SYS_COUNTER_H__ */
diff --git a/freebsd/sys/sys/cpu.h b/freebsd/sys/sys/cpu.h
index c16091e1..f159e376 100644
--- a/freebsd/sys/sys/cpu.h
+++ b/freebsd/sys/sys/cpu.h
@@ -37,6 +37,8 @@
#define CPU_IVAR_PCPU 1
#define CPU_IVAR_NOMINAL_MHZ 2
+#define CPU_IVAR_CPUID_SIZE 3
+#define CPU_IVAR_CPUID 4
static __inline struct pcpu *cpu_get_pcpu(device_t dev)
{
@@ -54,6 +56,20 @@ static __inline int32_t cpu_get_nominal_mhz(device_t dev)
return ((int32_t)v);
}
+static __inline const uint32_t *cpu_get_cpuid(device_t dev, size_t *count)
+{
+ uintptr_t v = 0;
+ if (BUS_READ_IVAR(device_get_parent(dev), dev,
+ CPU_IVAR_CPUID_SIZE, &v) != 0)
+ return (NULL);
+ *count = (size_t)v;
+
+ if (BUS_READ_IVAR(device_get_parent(dev), dev,
+ CPU_IVAR_CPUID, &v) != 0)
+ return (NULL);
+ return ((const uint32_t *)v);
+}
+
/*
* CPU frequency control interface.
*/
diff --git a/freebsd/sys/sys/domain.h b/freebsd/sys/sys/domain.h
index 2563cb6e..1817e788 100644
--- a/freebsd/sys/sys/domain.h
+++ b/freebsd/sys/sys/domain.h
@@ -42,6 +42,7 @@
*/
struct mbuf;
struct ifnet;
+struct socket;
struct domain {
int dom_family; /* AF_xxx */
@@ -51,25 +52,18 @@ struct domain {
void (*dom_destroy) /* cleanup structures / state */
(void);
int (*dom_externalize) /* externalize access rights */
- (struct mbuf *, struct mbuf **);
+ (struct mbuf *, struct mbuf **, int);
void (*dom_dispose) /* dispose of internalized rights */
- (struct mbuf *);
+ (struct socket *);
struct protosw *dom_protosw, *dom_protoswNPROTOSW;
struct domain *dom_next;
int (*dom_rtattach) /* initialize routing table */
(void **, int);
int (*dom_rtdetach) /* clean up routing table */
(void **, int);
- int dom_rtoffset; /* an arg to rtattach, in bits */
- /* XXX MRT.
- * rtoffset May be 0 if the domain supplies its own rtattach(),
- * in which case, a 0 indicates it's being called from
- * vfs_export.c (HACK) Only for AF_INET{,6} at this time.
- * Temporary ABI compat hack.. fix post RELENG_7
- */
- int dom_maxrtkey; /* for routing layer */
void *(*dom_ifattach)(struct ifnet *);
void (*dom_ifdetach)(struct ifnet *, void *);
+ int (*dom_ifmtu)(struct ifnet *);
/* af-dependent data on ifnet */
};
diff --git a/freebsd/sys/sys/eventhandler.h b/freebsd/sys/sys/eventhandler.h
index 95d03b83..9315f8c8 100644
--- a/freebsd/sys/sys/eventhandler.h
+++ b/freebsd/sys/sys/eventhandler.h
@@ -26,8 +26,8 @@
* $FreeBSD$
*/
-#ifndef SYS_EVENTHANDLER_H
-#define SYS_EVENTHANDLER_H
+#ifndef _SYS_EVENTHANDLER_H_
+#define _SYS_EVENTHANDLER_H_
#include <rtems/bsd/sys/lock.h>
#include <sys/ktr.h>
@@ -182,6 +182,7 @@ EVENTHANDLER_DECLARE(shutdown_final, shutdown_fn);
typedef void (*power_change_fn)(void *);
EVENTHANDLER_DECLARE(power_resume, power_change_fn);
EVENTHANDLER_DECLARE(power_suspend, power_change_fn);
+EVENTHANDLER_DECLARE(power_suspend_early, power_change_fn);
/* Low memory event */
typedef void (*vm_lowmem_handler_t)(void *, int);
@@ -192,18 +193,16 @@ EVENTHANDLER_DECLARE(vm_lowmem, vm_lowmem_handler_t);
typedef void (*mountroot_handler_t)(void *);
EVENTHANDLER_DECLARE(mountroot, mountroot_handler_t);
-/* VLAN state change events */
-struct ifnet;
-typedef void (*vlan_config_fn)(void *, struct ifnet *, uint16_t);
-typedef void (*vlan_unconfig_fn)(void *, struct ifnet *, uint16_t);
-EVENTHANDLER_DECLARE(vlan_config, vlan_config_fn);
-EVENTHANDLER_DECLARE(vlan_unconfig, vlan_unconfig_fn);
-
-/* BPF attach/detach events */
-struct ifnet;
-typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
- int /* 1 =>'s attach */);
-EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
+/* File system mount events */
+struct mount;
+struct vnode;
+struct thread;
+typedef void (*vfs_mounted_notify_fn)(void *, struct mount *, struct vnode *,
+ struct thread *);
+typedef void (*vfs_unmounted_notify_fn)(void *, struct mount *,
+ struct thread *);
+EVENTHANDLER_DECLARE(vfs_mounted, vfs_mounted_notify_fn);
+EVENTHANDLER_DECLARE(vfs_unmounted, vfs_unmounted_notify_fn);
/*
* Process events
@@ -231,7 +230,6 @@ EVENTHANDLER_DECLARE(process_exec, execlist_fn);
/*
* application dump event
*/
-struct thread;
typedef void (*app_coredump_start_fn)(void *, struct thread *, char *name);
typedef void (*app_coredump_progress_fn)(void *, struct thread *td, int byte_count);
typedef void (*app_coredump_finish_fn)(void *, struct thread *td);
@@ -272,5 +270,4 @@ typedef void (*unregister_framebuffer_fn)(void *, struct fb_info *);
EVENTHANDLER_DECLARE(register_framebuffer, register_framebuffer_fn);
EVENTHANDLER_DECLARE(unregister_framebuffer, unregister_framebuffer_fn);
-#endif /* SYS_EVENTHANDLER_H */
-
+#endif /* _SYS_EVENTHANDLER_H_ */
diff --git a/freebsd/sys/sys/eventvar.h b/freebsd/sys/sys/eventvar.h
index 6af0fe82..c7e46230 100644
--- a/freebsd/sys/sys/eventvar.h
+++ b/freebsd/sys/sys/eventvar.h
@@ -62,6 +62,7 @@ struct kqueue {
u_long kq_knhashmask; /* size of knhash */
struct klist *kq_knhash; /* hash table for knotes */
struct task kq_task;
+ struct ucred *kq_cred;
};
#endif /* !_SYS_EVENTVAR_H_ */
diff --git a/freebsd/sys/sys/fail.h b/freebsd/sys/sys/fail.h
new file mode 100644
index 00000000..45499af2
--- /dev/null
+++ b/freebsd/sys/sys/fail.h
@@ -0,0 +1,366 @@
+/*-
+ * Copyright (c) 2009 Isilon Inc http://www.isilon.com/
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+/**
+ * @file
+ *
+ * Main header for failpoint facility.
+ */
+#ifndef _SYS_FAIL_H_
+#define _SYS_FAIL_H_
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/cdefs.h>
+#include <sys/linker_set.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/condvar.h>
+#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+
+/**
+ * Failpoint return codes, used internally.
+ * @ingroup failpoint_private
+ */
+enum fail_point_return_code {
+ FAIL_POINT_RC_CONTINUE = 0, /**< Continue with normal execution */
+ FAIL_POINT_RC_RETURN, /**< FP evaluated to 'return' */
+ FAIL_POINT_RC_QUEUED, /**< sleep_fn will be called */
+};
+
+struct fail_point_entry;
+struct fail_point_setting;
+
+/**
+ * Internal failpoint structure, tracking all the current details of the
+ * failpoint. This structure is the core component shared between the
+ * failure-injection code and the user-interface.
+ * @ingroup failpoint_private
+ */
+struct fail_point {
+ const char *fp_name; /* name of fail point */
+ const char *fp_location; /* file:line of fail point */
+ volatile int fp_ref_cnt; /**
+ * protects fp_setting: while holding
+ * a ref, fp_setting points to an
+ * unfreed fail_point_setting
+ */
+ struct fail_point_setting * volatile fp_setting;
+ int fp_flags;
+
+ /**< Function to call before sleep or pause */
+ void (*fp_pre_sleep_fn)(void *);
+ /**< Arg for fp_pre_sleep_fn */
+ void *fp_pre_sleep_arg;
+
+ /**< Function to call after waking from sleep or pause */
+ void (*fp_post_sleep_fn)(void *);
+ /**< Arg for fp_post_sleep_fn */
+ void *fp_post_sleep_arg;
+};
+
+#define FAIL_POINT_DYNAMIC_NAME 0x01 /**< Must free name on destroy */
+/**< Use timeout path for sleep instead of msleep */
+#define FAIL_POINT_USE_TIMEOUT_PATH 0x02
+/**< If fail point is set to sleep, replace the sleep call with delay */
+#define FAIL_POINT_NONSLEEPABLE 0x04
+
+#define FAIL_POINT_CV_DESC "fp cv no iterators"
+#define FAIL_POINT_IS_OFF(fp) (__predict_true((fp)->fp_setting == NULL) || \
+ __predict_true(fail_point_is_off(fp)))
+
+__BEGIN_DECLS
+
+/* Private failpoint eval function -- use fail_point_eval() instead. */
+enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *,
+ int *ret);
+
+/**
+ * @addtogroup failpoint
+ * @{
+ */
+/*
+ * Initialize a fail-point. The name is formed in printf-like fashion
+ * from "fmt" and the subsequent arguments.
+ * Pair with fail_point_destroy().
+ */
+void fail_point_init(struct fail_point *, const char *fmt, ...)
+ __printflike(2, 3);
+
+/* Return true iff this fail point is set to off, false otherwise */
+bool fail_point_is_off(struct fail_point *fp);
+
+/**
+ * Set the pre-sleep function for a fail point
+ * If fp_post_sleep_fn is specified, then FAIL_POINT_SLEEP will result in a
+ * (*fp->fp_pre_sleep_fn)(fp->fp_pre_sleep_arg) call by the thread.
+ */
+static inline void
+fail_point_sleep_set_pre_func(struct fail_point *fp, void (*sleep_fn)(void *))
+{
+ fp->fp_pre_sleep_fn = sleep_fn;
+}
+
+static inline void
+fail_point_sleep_set_pre_arg(struct fail_point *fp, void *sleep_arg)
+{
+ fp->fp_pre_sleep_arg = sleep_arg;
+}
+
+/**
+ * Set the post-sleep function. This will be passed to timeout if we take
+ * the timeout path. This must be set if you sleep using the timeout path.
+ */
+static inline void
+fail_point_sleep_set_post_func(struct fail_point *fp, void (*sleep_fn)(void *))
+{
+ fp->fp_post_sleep_fn = sleep_fn;
+}
+
+static inline void
+fail_point_sleep_set_post_arg(struct fail_point *fp, void *sleep_arg)
+{
+ fp->fp_post_sleep_arg = sleep_arg;
+}
+/**
+ * If the FAIL_POINT_USE_TIMEOUT flag is set on a failpoint, then
+ * FAIL_POINT_SLEEP will result in a call to timeout instead of
+ * msleep. Note that if you sleep while this flag is set, you must
+ * set fp_post_sleep_fn or an error will occur upon waking.
+ */
+static inline void
+fail_point_use_timeout_path(struct fail_point *fp, bool use_timeout,
+ void (*post_sleep_fn)(void *))
+{
+ KASSERT(!use_timeout || post_sleep_fn != NULL ||
+ (post_sleep_fn == NULL && fp->fp_post_sleep_fn != NULL),
+ ("Setting fp to use timeout, but not setting post_sleep_fn\n"));
+
+ if (use_timeout)
+ fp->fp_flags |= FAIL_POINT_USE_TIMEOUT_PATH;
+ else
+ fp->fp_flags &= ~FAIL_POINT_USE_TIMEOUT_PATH;
+
+ if (post_sleep_fn != NULL)
+ fp->fp_post_sleep_fn = post_sleep_fn;
+}
+
+/**
+ * Free the resources used by a fail-point. Pair with fail_point_init().
+ */
+void fail_point_destroy(struct fail_point *);
+
+/**
+ * Evaluate a failpoint.
+ */
+static inline enum fail_point_return_code
+fail_point_eval(struct fail_point *fp, int *ret)
+{
+ if (__predict_true(fp->fp_setting == NULL))
+ return (FAIL_POINT_RC_CONTINUE);
+ return (fail_point_eval_nontrivial(fp, ret));
+}
+
+__END_DECLS
+
+/* Declare a fail_point and its sysctl in a function. */
+#define _FAIL_POINT_NAME(name) _fail_point_##name
+#define _FAIL_POINT_LOCATION() "(" __FILE__ ":" __XSTRING(__LINE__) ")"
+#ifndef __rtems__
+#define _FAIL_POINT_INIT(parent, name, flags) \
+ static struct fail_point _FAIL_POINT_NAME(name) = { \
+ .fp_name = #name, \
+ .fp_location = _FAIL_POINT_LOCATION(), \
+ .fp_ref_cnt = 0, \
+ .fp_setting = NULL, \
+ .fp_flags = (flags), \
+ .fp_pre_sleep_fn = NULL, \
+ .fp_pre_sleep_arg = NULL, \
+ .fp_post_sleep_fn = NULL, \
+ .fp_post_sleep_arg = NULL, \
+ }; \
+ SYSCTL_OID(parent, OID_AUTO, name, \
+ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, \
+ &_FAIL_POINT_NAME(name), 0, fail_point_sysctl, \
+ "A", ""); \
+ SYSCTL_OID(parent, OID_AUTO, status_##name, \
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, \
+ &_FAIL_POINT_NAME(name), 0, \
+ fail_point_sysctl_status, "A", "");
+#define _FAIL_POINT_EVAL(name, cond, code...) \
+ int RETURN_VALUE; \
+ \
+ if (__predict_false(cond && \
+ fail_point_eval(&_FAIL_POINT_NAME(name), &RETURN_VALUE))) { \
+ \
+ code; \
+ \
+ }
+#else /* __rtems__ */
+#define _FAIL_POINT_INIT(parent, name, flags) (void)0;
+#define _FAIL_POINT_EVAL(name, cond, code...) (void)0;
+#endif /* __rtems__ */
+
+
+/**
+ * Instantiate a failpoint which returns "RETURN_VALUE" from the function
+ * when triggered.
+ * @param parent The parent sysctl under which to locate the fp's sysctl
+ * @param name The name of the failpoint in the sysctl tree (and printouts)
+ * @return Instantly returns the RETURN_VALUE specified in the
+ * failpoint, if triggered.
+ */
+#define KFAIL_POINT_RETURN(parent, name) \
+ KFAIL_POINT_CODE(parent, name, return RETURN_VALUE)
+
+/**
+ * Instantiate a failpoint which returns (void) from the function when
+ * triggered.
+ * @param parent The parent sysctl under which to locate the sysctl
+ * @param name The name of the failpoint in the sysctl tree (and printouts)
+ * @return Instantly returns void, if triggered in the failpoint.
+ */
+#define KFAIL_POINT_RETURN_VOID(parent, name) \
+ KFAIL_POINT_CODE(parent, name, return)
+
+/**
+ * Instantiate a failpoint which sets an error when triggered.
+ * @param parent The parent sysctl under which to locate the sysctl
+ * @param name The name of the failpoint in the sysctl tree (and
+ * printouts)
+ * @param error_var A variable to set to the failpoint's specified
+ * return-value when triggered
+ */
+#define KFAIL_POINT_ERROR(parent, name, error_var) \
+ KFAIL_POINT_CODE(parent, name, (error_var) = RETURN_VALUE)
+
+/**
+ * Instantiate a failpoint which sets an error and then goes to a
+ * specified label in the function when triggered.
+ * @param parent The parent sysctl under which to locate the sysctl
+ * @param name The name of the failpoint in the sysctl tree (and
+ * printouts)
+ * @param error_var A variable to set to the failpoint's specified
+ * return-value when triggered
+ * @param label The location to goto when triggered.
+ */
+#define KFAIL_POINT_GOTO(parent, name, error_var, label) \
+ KFAIL_POINT_CODE(parent, name, (error_var) = RETURN_VALUE; goto label)
+
+/**
+ * Instantiate a failpoint which sets its pre- and post-sleep callback
+ * mechanisms.
+ * @param parent The parent sysctl under which to locate the sysctl
+ * @param name The name of the failpoint in the sysctl tree (and
+ * printouts)
+ * @param pre_func Function pointer to the pre-sleep function, which will be
+ * called directly before going to sleep.
+ * @param pre_arg Argument to the pre-sleep function
+ * @param post_func Function pointer to the pot-sleep function, which will be
+ * called directly before going to sleep.
+ * @param post_arg Argument to the post-sleep function
+ */
+#define KFAIL_POINT_SLEEP_CALLBACKS(parent, name, pre_func, pre_arg, \
+ post_func, post_arg) \
+ KFAIL_POINT_CODE_SLEEP_CALLBACKS(parent, name, pre_func, \
+ pre_arg, post_func, post_arg, return RETURN_VALUE)
+
+/**
+ * Instantiate a failpoint which runs arbitrary code when triggered, and sets
+ * its pre- and post-sleep callback mechanisms
+ * @param parent The parent sysctl under which to locate the sysctl
+ * @param name The name of the failpoint in the sysctl tree (and
+ * printouts)
+ * @param pre_func Function pointer to the pre-sleep function, which will be
+ * called directly before going to sleep.
+ * @param pre_arg Argument to the pre-sleep function
+ * @param post_func Function pointer to the pot-sleep function, which will be
+ * called directly before going to sleep.
+ * @param post_arg Argument to the post-sleep function
+ * @param code The arbitrary code to run when triggered. Can reference
+ * "RETURN_VALUE" if desired to extract the specified
+ * user return-value when triggered. Note that this is
+ * implemented with a do-while loop so be careful of
+ * break and continue statements.
+ */
+#define KFAIL_POINT_CODE_SLEEP_CALLBACKS(parent, name, pre_func, pre_arg, \
+ post_func, post_arg, code...) \
+ do { \
+ _FAIL_POINT_INIT(parent, name) \
+ _FAIL_POINT_NAME(name).fp_pre_sleep_fn = pre_func; \
+ _FAIL_POINT_NAME(name).fp_pre_sleep_arg = pre_arg; \
+ _FAIL_POINT_NAME(name).fp_post_sleep_fn = post_func; \
+ _FAIL_POINT_NAME(name).fp_post_sleep_arg = post_arg; \
+ _FAIL_POINT_EVAL(name, true, code) \
+ } while (0)
+
+
+/**
+ * Instantiate a failpoint which runs arbitrary code when triggered.
+ * @param parent The parent sysctl under which to locate the sysctl
+ * @param name The name of the failpoint in the sysctl tree
+ * (and printouts)
+ * @param code The arbitrary code to run when triggered. Can reference
+ * "RETURN_VALUE" if desired to extract the specified
+ * user return-value when triggered. Note that this is
+ * implemented with a do-while loop so be careful of
+ * break and continue statements.
+ */
+#define KFAIL_POINT_CODE(parent, name, code...) \
+ do { \
+ _FAIL_POINT_INIT(parent, name, 0) \
+ _FAIL_POINT_EVAL(name, true, code) \
+ } while (0)
+
+#define KFAIL_POINT_CODE_FLAGS(parent, name, flags, code...) \
+ do { \
+ _FAIL_POINT_INIT(parent, name, flags) \
+ _FAIL_POINT_EVAL(name, true, code) \
+ } while (0)
+
+#define KFAIL_POINT_CODE_COND(parent, name, cond, flags, code...) \
+ do { \
+ _FAIL_POINT_INIT(parent, name, flags) \
+ _FAIL_POINT_EVAL(name, cond, code) \
+ } while (0)
+
+/**
+ * @}
+ * (end group failpoint)
+ */
+
+#ifdef _KERNEL
+int fail_point_sysctl(SYSCTL_HANDLER_ARGS);
+int fail_point_sysctl_status(SYSCTL_HANDLER_ARGS);
+
+/* The fail point sysctl tree. */
+SYSCTL_DECL(_debug_fail_point);
+#define DEBUG_FP _debug_fail_point
+#endif
+
+#endif /* _SYS_FAIL_H_ */
diff --git a/freebsd/sys/sys/file.h b/freebsd/sys/sys/file.h
index 4ac69413..4fcbbde0 100644
--- a/freebsd/sys/sys/file.h
+++ b/freebsd/sys/sys/file.h
@@ -42,7 +42,9 @@
#include <sys/refcount.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <vm/vm.h>
+struct filedesc;
struct stat;
struct thread;
struct uio;
@@ -64,12 +66,15 @@ struct socket;
#define DTYPE_SEM 9 /* posix semaphore */
#define DTYPE_PTS 10 /* pseudo teletype master device */
#define DTYPE_DEV 11 /* Device specific fd type */
-#define DTYPE_CAPABILITY 12 /* capability */
-#define DTYPE_PROCDESC 13 /* process descriptor */
+#define DTYPE_PROCDESC 12 /* process descriptor */
+#define DTYPE_LINUXEFD 13 /* emulation eventfd type */
#ifdef _KERNEL
struct file;
+struct filecaps;
+struct kaiocb;
+struct kinfo_file;
struct ucred;
#define FOF_OFFSET 0x01 /* Use the offset in uio argument */
@@ -105,6 +110,17 @@ typedef int fo_chmod_t(struct file *fp, mode_t mode,
struct ucred *active_cred, struct thread *td);
typedef int fo_chown_t(struct file *fp, uid_t uid, gid_t gid,
struct ucred *active_cred, struct thread *td);
+typedef int fo_sendfile_t(struct file *fp, int sockfd, struct uio *hdr_uio,
+ struct uio *trl_uio, off_t offset, size_t nbytes,
+ off_t *sent, int flags, struct thread *td);
+typedef int fo_seek_t(struct file *fp, off_t offset, int whence,
+ struct thread *td);
+typedef int fo_fill_kinfo_t(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp);
+typedef int fo_mmap_t(struct file *fp, vm_map_t map, vm_offset_t *addr,
+ vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot,
+ int flags, vm_ooffset_t foff, struct thread *td);
+typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job);
typedef int fo_flags_t;
struct fileops {
@@ -118,6 +134,11 @@ struct fileops {
fo_close_t *fo_close;
fo_chmod_t *fo_chmod;
fo_chown_t *fo_chown;
+ fo_sendfile_t *fo_sendfile;
+ fo_seek_t *fo_seek;
+ fo_fill_kinfo_t *fo_fill_kinfo;
+ fo_mmap_t *fo_mmap;
+ fo_aio_queue_t *fo_aio_queue;
fo_flags_t fo_flags; /* DFLAG_* below */
};
@@ -136,6 +157,7 @@ struct fileops {
*
* Below is the list of locks that protects members in struct file.
*
+ * (a) f_vnode lock required (shared allows both reads and writes)
* (f) protected with mtx_lock(mtx_pool_find(fp))
* (d) cdevpriv_mtx
* none not locked
@@ -145,8 +167,6 @@ struct fadvise_info {
int fa_advice; /* (f) FADV_* type. */
off_t fa_start; /* (f) Region start. */
off_t fa_end; /* (f) Region end. */
- off_t fa_prevstart; /* (f) Previous NOREUSE start. */
- off_t fa_prevend; /* (f) Previous NOREUSE end. */
};
struct file {
@@ -162,7 +182,7 @@ struct file {
/*
* DTYPE_VNODE specific fields.
*/
- int f_seqcount; /* Count of sequential accesses. */
+ int f_seqcount; /* (a) Count of sequential accesses. */
off_t f_nextoff; /* next expected read/write offset. */
union {
struct cdev_privdata *fvn_cdevpriv;
@@ -288,10 +308,6 @@ struct xfile {
#ifdef _KERNEL
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_FILE);
-#endif
-
extern struct fileops vnops;
extern struct fileops badfileops;
#ifndef __rtems__
@@ -304,7 +320,7 @@ extern int maxfilesperproc; /* per process limit on number of open files */
extern volatile int openfiles; /* actual number of open files */
#ifndef __rtems__
-int fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp);
+int fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp);
#else /* __rtems__ */
struct file *rtems_bsd_get_file(int fd);
@@ -320,32 +336,30 @@ rtems_bsd_do_fget(int fd, struct file **fpp)
#define fget(td, fd, rights, fpp) rtems_bsd_do_fget(fd, fpp)
#endif /* __rtems__ */
-int fget_mmap(struct thread *td, int fd, cap_rights_t rights,
+int fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp,
u_char *maxprotp, struct file **fpp);
-int fget_read(struct thread *td, int fd, cap_rights_t rights,
+int fget_read(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
-int fget_write(struct thread *td, int fd, cap_rights_t rights,
+int fget_write(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
-int fgetcap(struct thread *td, int fd, struct file **fpp);
+int fget_fcntl(struct thread *td, int fd, cap_rights_t *rightsp,
+ int needfcntl, struct file **fpp);
int _fdrop(struct file *fp, struct thread *td);
#ifndef __rtems__
-/*
- * The socket operations are used a couple of places.
- * XXX: This is wrong, they should go through the operations vector for
- * XXX: sockets instead of going directly for the individual functions. /phk
- */
-fo_rdwr_t soo_read;
-fo_rdwr_t soo_write;
-fo_truncate_t soo_truncate;
-fo_ioctl_t soo_ioctl;
-fo_poll_t soo_poll;
-fo_kqfilter_t soo_kqfilter;
-fo_stat_t soo_stat;
-fo_close_t soo_close;
-
+fo_rdwr_t invfo_rdwr;
+fo_truncate_t invfo_truncate;
+fo_ioctl_t invfo_ioctl;
+fo_poll_t invfo_poll;
+fo_kqfilter_t invfo_kqfilter;
fo_chmod_t invfo_chmod;
fo_chown_t invfo_chown;
+fo_sendfile_t invfo_sendfile;
+
+fo_sendfile_t vn_sendfile;
+fo_seek_t vn_seek;
+fo_fill_kinfo_t vn_fill_kinfo;
+int vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif);
#else /* __rtems__ */
int rtems_bsd_soo_kqfilter(rtems_libio_t *iop, struct knote *kn);
#endif /* __rtems__ */
@@ -367,17 +381,18 @@ finit(struct file *fp, u_int fflag, short type, void *data,
pathinfo->handlers = ops;
}
#endif /* __rtems__ */
-int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp);
-int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights,
+int fgetvp(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct vnode **vpp);
+int fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp,
struct vnode **vpp);
-int fgetvp_rights(struct thread *td, int fd, cap_rights_t need,
- cap_rights_t *have, struct vnode **vpp);
-int fgetvp_read(struct thread *td, int fd, cap_rights_t rights,
+int fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp,
+ struct filecaps *havecaps, struct vnode **vpp);
+int fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp,
struct vnode **vpp);
-int fgetvp_write(struct thread *td, int fd, cap_rights_t rights,
+int fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
struct vnode **vpp);
-int fgetsock(struct thread *td, int fd, cap_rights_t rights,
+int fgetsock(struct thread *td, int fd, cap_rights_t *rightsp,
struct socket **spp, u_int *fflagp);
void fputsock(struct socket *sp);
@@ -408,6 +423,7 @@ static __inline fo_stat_t fo_stat;
static __inline fo_close_t fo_close;
static __inline fo_chmod_t fo_chmod;
static __inline fo_chown_t fo_chown;
+static __inline fo_sendfile_t fo_sendfile;
static __inline int
fo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
@@ -516,6 +532,49 @@ fo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
return ((*fp->f_ops->fo_chown)(fp, uid, gid, active_cred, td));
}
+
+static __inline int
+fo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
+ struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
+ struct thread *td)
+{
+
+ return ((*fp->f_ops->fo_sendfile)(fp, sockfd, hdr_uio, trl_uio, offset,
+ nbytes, sent, flags, td));
+}
+
+static __inline int
+fo_seek(struct file *fp, off_t offset, int whence, struct thread *td)
+{
+
+ return ((*fp->f_ops->fo_seek)(fp, offset, whence, td));
+}
+
+static __inline int
+fo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
+{
+
+ return ((*fp->f_ops->fo_fill_kinfo)(fp, kif, fdp));
+}
+
+static __inline int
+fo_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
+ vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
+ struct thread *td)
+{
+
+ if (fp->f_ops->fo_mmap == NULL)
+ return (ENODEV);
+ return ((*fp->f_ops->fo_mmap)(fp, map, addr, size, prot, cap_maxprot,
+ flags, foff, td));
+}
+
+static __inline int
+fo_aio_queue(struct file *fp, struct kaiocb *job)
+{
+
+ return ((*fp->f_ops->fo_aio_queue)(fp, job));
+}
#endif /* __rtems__ */
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/filedesc.h b/freebsd/sys/sys/filedesc.h
index ad18114a..30c2deef 100644
--- a/freebsd/sys/sys/filedesc.h
+++ b/freebsd/sys/sys/filedesc.h
@@ -33,14 +33,41 @@
#ifndef _SYS_FILEDESC_H_
#define _SYS_FILEDESC_H_
+#include <sys/caprights.h>
#include <sys/queue.h>
#include <sys/event.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/priority.h>
+#include <sys/seq.h>
#include <sys/sx.h>
#include <machine/_limits.h>
+struct filecaps {
+ cap_rights_t fc_rights; /* per-descriptor capability rights */
+ u_long *fc_ioctls; /* per-descriptor allowed ioctls */
+ int16_t fc_nioctls; /* fc_ioctls array size */
+ uint32_t fc_fcntls; /* per-descriptor allowed fcntls */
+};
+
+struct filedescent {
+ struct file *fde_file; /* file structure for open file */
+ struct filecaps fde_caps; /* per-descriptor rights */
+ uint8_t fde_flags; /* per-process open file flags */
+ seq_t fde_seq; /* keep file and caps in sync */
+};
+#define fde_rights fde_caps.fc_rights
+#define fde_fcntls fde_caps.fc_fcntls
+#define fde_ioctls fde_caps.fc_ioctls
+#define fde_nioctls fde_caps.fc_nioctls
+#define fde_change_size (offsetof(struct filedescent, fde_seq))
+
+struct fdescenttbl {
+ int fdt_nfiles; /* number of open files allocated */
+ struct filedescent fdt_ofiles[0]; /* open files */
+};
+#define fd_seq(fdt, fd) (&(fdt)->fdt_ofiles[(fd)].fde_seq)
+
/*
* This structure is used for the management of descriptors. It may be
* shared by multiple processes.
@@ -49,18 +76,16 @@
#ifndef __rtems__
struct filedesc {
- struct file **fd_ofiles; /* file structures for open files */
- char *fd_ofileflags; /* per-process open file flags */
+ struct fdescenttbl *fd_files; /* open files table */
struct vnode *fd_cdir; /* current directory */
struct vnode *fd_rdir; /* root directory */
struct vnode *fd_jdir; /* jail root directory */
- int fd_nfiles; /* number of open files allocated */
NDSLOTTYPE *fd_map; /* bitmap of free fds */
int fd_lastfile; /* high-water mark of fd_ofiles */
int fd_freefile; /* approx. next free file */
u_short fd_cmask; /* mask for file creation */
- u_short fd_refcnt; /* thread reference count */
- u_short fd_holdcnt; /* hold count on structure + mutex */
+ int fd_refcnt; /* thread reference count */
+ int fd_holdcnt; /* hold count on structure + mutex */
struct sx fd_sx; /* protects members of this struct */
struct kqlist fd_kqlist; /* list of kqueues on this filedesc */
int fd_holdleaderscount; /* block fdfree() for shared close() */
@@ -89,6 +114,8 @@ struct filedesc_to_leader {
struct filedesc_to_leader *fdl_prev;
struct filedesc_to_leader *fdl_next;
};
+#define fd_nfiles fd_files->fdt_nfiles
+#define fd_ofiles fd_files->fdt_ofiles
#else /* __rtems__ */
struct filedesc_to_leader;
#endif /* __rtems__ */
@@ -96,7 +123,7 @@ struct filedesc_to_leader;
/*
* Per-process open flags.
*/
-#define UF_EXCLOSE 0x01 /* auto-close on exec */
+#define UF_EXCLOSE 0x01 /* auto-close on exec */
#ifdef _KERNEL
#ifdef __rtems__
@@ -124,24 +151,48 @@ struct filedesc_to_leader;
SX_NOTRECURSED)
#define FILEDESC_XLOCK_ASSERT(fdp) sx_assert(&(fdp)->fd_sx, SX_XLOCKED | \
SX_NOTRECURSED)
+#define FILEDESC_UNLOCK_ASSERT(fdp) sx_assert(&(fdp)->fd_sx, SX_UNLOCKED)
+
+/* Operation types for kern_dup(). */
+enum {
+ FDDUP_NORMAL, /* dup() behavior. */
+ FDDUP_FCNTL, /* fcntl()-style errors. */
+ FDDUP_FIXED, /* Force fixed allocation. */
+ FDDUP_MUSTREPLACE, /* Target must exist. */
+ FDDUP_LASTMODE,
+};
+
+/* Flags for kern_dup(). */
+#define FDDUP_FLAG_CLOEXEC 0x1 /* Atomically set UF_EXCLOSE. */
+
+/* For backward compatibility. */
+#define falloc(td, resultfp, resultfd, flags) \
+ falloc_caps(td, resultfp, resultfd, flags, NULL)
struct thread;
+void filecaps_init(struct filecaps *fcaps);
+int filecaps_copy(const struct filecaps *src, struct filecaps *dst,
+ bool locked);
+void filecaps_move(struct filecaps *src, struct filecaps *dst);
+void filecaps_free(struct filecaps *fcaps);
+
int closef(struct file *fp, struct thread *td);
-int dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd,
- int mode, int error);
+int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
+ int openerror, int *indxp);
#ifndef __rtems__
-int falloc(struct thread *td, struct file **resultfp, int *resultfd,
- int flags);
+int falloc_caps(struct thread *td, struct file **resultfp, int *resultfd,
+ int flags, struct filecaps *fcaps);
#else /* __rtems__ */
static inline int
-falloc(struct thread *td, struct file **resultfp, int *resultfd,
- int flags)
+falloc_caps(struct thread *td, struct file **resultfp, int *resultfd,
+ int flags, struct filecaps *fcaps)
{
rtems_libio_t *iop = rtems_libio_allocate();
(void) td;
(void) flags;
+ (void) fcaps;
*resultfp = rtems_bsd_iop_to_fp(iop);
@@ -157,49 +208,63 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd,
}
#endif /* __rtems__ */
int falloc_noinstall(struct thread *td, struct file **resultfp);
-int finstall(struct thread *td, struct file *fp, int *resultfp, int flags);
+void _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
+ struct filecaps *fcaps);
+int finstall(struct thread *td, struct file *fp, int *resultfd, int flags,
+ struct filecaps *fcaps);
int fdalloc(struct thread *td, int minfd, int *result);
int fdallocn(struct thread *td, int minfd, int *fds, int n);
-int fdavail(struct thread *td, int n);
int fdcheckstd(struct thread *td);
#ifndef __rtems__
-void fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td);
+void fdclose(struct thread *td, struct file *fp, int idx);
#else /* __rtems__ */
static inline void
-rtems_bsd_fdclose(struct file *fp, int idx, struct thread *td)
+fdclose(struct thread *td, struct file *fp, int idx)
{
- (void) idx;
- (void) td;
+ (void)td;
+ (void)idx;
rtems_libio_free(&fp->f_io);
}
-
-#define fdclose(fdp, fp, idx, td) rtems_bsd_fdclose(fp, idx, td)
#endif /* __rtems__ */
void fdcloseexec(struct thread *td);
+void fdsetugidsafety(struct thread *td);
struct filedesc *fdcopy(struct filedesc *fdp);
-void fdunshare(struct proc *p, struct thread *td);
-void fdfree(struct thread *td);
-struct filedesc *fdinit(struct filedesc *fdp);
+int fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
+ struct filedesc **newfdp);
+void fdinstall_remapped(struct thread *td, struct filedesc *fdp);
+void fdunshare(struct thread *td);
+void fdescfree(struct thread *td);
+void fdescfree_remapped(struct filedesc *fdp);
+struct filedesc *fdinit(struct filedesc *fdp, bool prepfiles);
struct filedesc *fdshare(struct filedesc *fdp);
struct filedesc_to_leader *
filedesc_to_leader_alloc(struct filedesc_to_leader *old,
struct filedesc *fdp, struct proc *leader);
-int getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
+int getvnode(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
-void setugidsafety(struct thread *td);
/* Return a referenced file from an unlocked descriptor. */
#ifndef __rtems__
-struct file *fget_unlocked(struct filedesc *fdp, int fd);
+int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
+ struct file **fpp, seq_t *seqp);
#else /* __rtems__ */
-static inline struct file *
-fget_unlocked(struct filedesc *fdp, int fd)
+static inline int
+fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
+ struct file **fpp, seq_t *seqp)
{
- (void) fdp;
+ (void)fdp;
+ (void)needrightsp;
+ (void)seqp;
+
+ *fpp = rtems_bsd_get_file(fd);
- return rtems_bsd_get_file(fd);
+ if (*fpp != NULL) {
+ return (0);
+ } else {
+ return (EBADF);
+ }
}
#endif /* __rtems__ */
@@ -209,10 +274,27 @@ static __inline struct file *
fget_locked(struct filedesc *fdp, int fd)
{
- return (fd < 0 || fd >= fdp->fd_nfiles ? NULL : fdp->fd_ofiles[fd]);
+ FILEDESC_LOCK_ASSERT(fdp);
+
+ if (fd < 0 || fd > fdp->fd_lastfile)
+ return (NULL);
+
+ return (fdp->fd_ofiles[fd].fde_file);
+}
+
+static __inline bool
+fd_modified(struct filedesc *fdp, int fd, seq_t seq)
+{
+
+ return (!seq_consistent(fd_seq(fdp->fd_files, fd), seq));
}
#endif /* __rtems__ */
+/* cdir/rdir/jdir manipulation functions. */
+void pwd_chdir(struct thread *td, struct vnode *vp);
+int pwd_chroot(struct thread *td, struct vnode *vp);
+void pwd_ensure_dirs(void);
+
#endif /* _KERNEL */
#endif /* !_SYS_FILEDESC_H_ */
diff --git a/freebsd/sys/sys/fnv_hash.h b/freebsd/sys/sys/fnv_hash.h
index 1b9fa9f5..e574e0e0 100644
--- a/freebsd/sys/sys/fnv_hash.h
+++ b/freebsd/sys/sys/fnv_hash.h
@@ -61,7 +61,7 @@ static __inline Fnv64_t
fnv_64_str(const char *str, Fnv64_t hval)
{
const u_int8_t *s = (const u_int8_t *)str;
- u_register_t c; /* 32 bit on i386, 64 bit on alpha,ia64 */
+ u_register_t c; /* 32 bit on i386, 64 bit on alpha */
while ((c = *s++) != 0) {
hval *= FNV_64_PRIME;
diff --git a/freebsd/sys/sys/gpio.h b/freebsd/sys/sys/gpio.h
new file mode 100644
index 00000000..9b0a1b55
--- /dev/null
+++ b/freebsd/sys/sys/gpio.h
@@ -0,0 +1,108 @@
+/* $NetBSD: gpio.h,v 1.7 2009/09/25 20:27:50 mbalmer Exp $ */
+/* $OpenBSD: gpio.h,v 1.7 2008/11/26 14:51:20 mbalmer Exp $ */
+/*-
+ * Copyright (c) 2009, Oleksandr Tymoshenko <gonzo@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+/*
+ * Copyright (c) 2009 Marc Balmer <marc@msys.ch>
+ * Copyright (c) 2004 Alexander Yurchenko <grange@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __GPIO_H__
+#define __GPIO_H__
+
+#include <sys/ioccom.h>
+
+/* GPIO pin states */
+#define GPIO_PIN_LOW 0x00 /* low level (logical 0) */
+#define GPIO_PIN_HIGH 0x01 /* high level (logical 1) */
+
+/* Max name length of a pin */
+#define GPIOMAXNAME 64
+
+/* GPIO pin configuration flags */
+#define GPIO_PIN_INPUT 0x00000001 /* input direction */
+#define GPIO_PIN_OUTPUT 0x00000002 /* output direction */
+#define GPIO_PIN_OPENDRAIN 0x00000004 /* open-drain output */
+#define GPIO_PIN_PUSHPULL 0x00000008 /* push-pull output */
+#define GPIO_PIN_TRISTATE 0x00000010 /* output disabled */
+#define GPIO_PIN_PULLUP 0x00000020 /* internal pull-up enabled */
+#define GPIO_PIN_PULLDOWN 0x00000040 /* internal pull-down enabled */
+#define GPIO_PIN_INVIN 0x00000080 /* invert input */
+#define GPIO_PIN_INVOUT 0x00000100 /* invert output */
+#define GPIO_PIN_PULSATE 0x00000200 /* pulsate in hardware */
+/* GPIO interrupt capabilities */
+#define GPIO_INTR_NONE 0x00000000 /* no interrupt support */
+#define GPIO_INTR_LEVEL_LOW 0x00010000 /* level trigger, low */
+#define GPIO_INTR_LEVEL_HIGH 0x00020000 /* level trigger, high */
+#define GPIO_INTR_EDGE_RISING 0x00040000 /* edge trigger, rising */
+#define GPIO_INTR_EDGE_FALLING 0x00080000 /* edge trigger, falling */
+#define GPIO_INTR_EDGE_BOTH 0x00100000 /* edge trigger, both */
+#define GPIO_INTR_MASK (GPIO_INTR_LEVEL_LOW | GPIO_INTR_LEVEL_HIGH | \
+ GPIO_INTR_EDGE_RISING | \
+ GPIO_INTR_EDGE_FALLING | GPIO_INTR_EDGE_BOTH)
+
+struct gpio_pin {
+ uint32_t gp_pin; /* pin number */
+ char gp_name[GPIOMAXNAME]; /* human-readable name */
+ uint32_t gp_caps; /* capabilities */
+ uint32_t gp_flags; /* current flags */
+};
+
+/* GPIO pin request (read/write/toggle) */
+struct gpio_req {
+ uint32_t gp_pin; /* pin number */
+ uint32_t gp_value; /* value */
+};
+
+/*
+ * ioctls
+ */
+#define GPIOMAXPIN _IOR('G', 0, int)
+#define GPIOGETCONFIG _IOWR('G', 1, struct gpio_pin)
+#define GPIOSETCONFIG _IOW('G', 2, struct gpio_pin)
+#define GPIOGET _IOWR('G', 3, struct gpio_req)
+#define GPIOSET _IOW('G', 4, struct gpio_req)
+#define GPIOTOGGLE _IOWR('G', 5, struct gpio_req)
+#define GPIOSETNAME _IOW('G', 6, struct gpio_pin)
+
+#endif /* __GPIO_H__ */
diff --git a/freebsd/sys/sys/hash.h b/freebsd/sys/sys/hash.h
index 6ad89c5e..8abf17bb 100644
--- a/freebsd/sys/sys/hash.h
+++ b/freebsd/sys/sys/hash.h
@@ -118,4 +118,17 @@ hash32_strne(const void *buf, size_t len, int end, const char **ep,
return hash;
}
+
+#ifdef _KERNEL
+/*
+ * Hashing function from Bob Jenkins. Implementation in libkern/jenkins_hash.c.
+ */
+uint32_t jenkins_hash(const void *, size_t, uint32_t);
+uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t);
+
+uint32_t murmur3_32_hash(const void *, size_t, uint32_t);
+uint32_t murmur3_32_hash32(const uint32_t *, size_t, uint32_t);
+
+#endif /* _KERNEL */
+
#endif /* !_SYS_HASH_H_ */
diff --git a/freebsd/sys/sys/hhook.h b/freebsd/sys/sys/hhook.h
index 0d54eda4..7de47d48 100644
--- a/freebsd/sys/sys/hhook.h
+++ b/freebsd/sys/sys/hhook.h
@@ -64,6 +64,9 @@
/* Helper hook types. */
#define HHOOK_TYPE_TCP 1
+#define HHOOK_TYPE_SOCKET 2
+#define HHOOK_TYPE_IPSEC_IN 3
+#define HHOOK_TYPE_IPSEC_OUT 4
struct helper;
struct osd;
diff --git a/freebsd/sys/sys/interrupt.h b/freebsd/sys/sys/interrupt.h
index 3dace82e..c320e5fc 100644
--- a/freebsd/sys/sys/interrupt.h
+++ b/freebsd/sys/sys/interrupt.h
@@ -112,13 +112,13 @@ struct intr_event {
void (*ie_pre_ithread)(void *);
void (*ie_post_ithread)(void *);
void (*ie_post_filter)(void *);
- int (*ie_assign_cpu)(void *, u_char);
+ int (*ie_assign_cpu)(void *, int);
int ie_flags;
int ie_count; /* Loop counter. */
int ie_warncnt; /* Rate-check interrupt storm warns. */
struct timeval ie_warntm;
int ie_irq; /* Physical irq number if !SOFT. */
- u_char ie_cpu; /* CPU this event is bound to. */
+ int ie_cpu; /* CPU this event is bound to. */
};
/* Interrupt event flags kept in ie_flags. */
@@ -161,11 +161,11 @@ u_char intr_priority(enum intr_type flags);
int intr_event_add_handler(struct intr_event *ie, const char *name,
driver_filter_t filter, driver_intr_t handler, void *arg,
u_char pri, enum intr_type flags, void **cookiep);
-int intr_event_bind(struct intr_event *ie, u_char cpu);
+int intr_event_bind(struct intr_event *ie, int cpu);
int intr_event_create(struct intr_event **event, void *source,
int flags, int irq, void (*pre_ithread)(void *),
void (*post_ithread)(void *), void (*post_filter)(void *),
- int (*assign_cpu)(void *, u_char), const char *fmt, ...)
+ int (*assign_cpu)(void *, int), const char *fmt, ...)
__printflike(9, 10);
int intr_event_describe_handler(struct intr_event *ie, void *cookie,
const char *descr);
diff --git a/freebsd/sys/sys/jail.h b/freebsd/sys/sys/jail.h
index 063dd6a3..5fcbaf39 100644
--- a/freebsd/sys/sys/jail.h
+++ b/freebsd/sys/sys/jail.h
@@ -134,6 +134,7 @@ MALLOC_DECLARE(M_PRISON);
#include <sys/osd.h>
#define HOSTUUIDLEN 64
+#define OSRELEASELEN 32
struct racct;
struct prison_racct;
@@ -148,7 +149,6 @@ struct prison_racct;
* (p) locked by pr_mtx
* (c) set only during creation before the structure is shared, no mutex
* required to read
- * (d) set only during destruction of jail, no mutex needed
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
@@ -160,7 +160,7 @@ struct prison {
LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */
struct prison *pr_parent; /* (c) containing jail */
struct mtx pr_mtx;
- struct task pr_task; /* (d) destroy task */
+ struct task pr_task; /* (c) destroy task */
struct osd pr_osd; /* (p) additional data */
struct cpuset *pr_cpuset; /* (p) cpuset */
struct vnet *pr_vnet; /* (c) network stack */
@@ -177,13 +177,15 @@ struct prison {
int pr_securelevel; /* (p) securelevel */
int pr_enforce_statfs; /* (p) statfs permission */
int pr_devfs_rsnum; /* (p) devfs ruleset */
- int pr_spare[4];
+ int pr_spare[3];
+ int pr_osreldate; /* (c) kern.osreldate value */
unsigned long pr_hostid; /* (p) jail hostid */
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
char pr_path[MAXPATHLEN]; /* (c) chroot path */
char pr_hostname[MAXHOSTNAMELEN]; /* (p) jail hostname */
char pr_domainname[MAXHOSTNAMELEN]; /* (p) jail domainname */
char pr_hostuuid[HOSTUUIDLEN]; /* (p) jail hostuuid */
+ char pr_osrelease[OSRELEASELEN]; /* (c) kern.osrelease value */
};
struct prison_racct {
@@ -201,15 +203,12 @@ struct prison_racct {
#define PR_IP4_USER 0x00000004 /* Restrict IPv4 addresses */
#define PR_IP6_USER 0x00000008 /* Restrict IPv6 addresses */
#define PR_VNET 0x00000010 /* Virtual network stack */
-#define PR_IP4_DISABLE 0x00000020 /* Disable IPv4 */
-#define PR_IP6_DISABLE 0x00000040 /* Disable IPv6 */
#define PR_IP4_SADDRSEL 0x00000080 /* Do IPv4 src addr sel. or use the */
/* primary jail address. */
#define PR_IP6_SADDRSEL 0x00000100 /* Do IPv6 src addr sel. or use the */
/* primary jail address. */
/* Internal flag bits */
-#define PR_REMOVE 0x01000000 /* In process of being removed */
#define PR_IP4 0x02000000 /* IPv4 restricted or disabled */
/* by this jail or an ancestor */
#define PR_IP6 0x04000000 /* IPv6 restricted or disabled */
@@ -227,7 +226,11 @@ struct prison_racct {
#define PR_ALLOW_MOUNT_NULLFS 0x0100
#define PR_ALLOW_MOUNT_ZFS 0x0200
#define PR_ALLOW_MOUNT_PROCFS 0x0400
-#define PR_ALLOW_ALL 0x07ff
+#define PR_ALLOW_MOUNT_TMPFS 0x0800
+#define PR_ALLOW_MOUNT_FDESCFS 0x1000
+#define PR_ALLOW_MOUNT_LINPROCFS 0x2000
+#define PR_ALLOW_MOUNT_LINSYSFS 0x4000
+#define PR_ALLOW_ALL 0x7fff
/*
* OSD methods
@@ -237,7 +240,8 @@ struct prison_racct {
#define PR_METHOD_SET 2
#define PR_METHOD_CHECK 3
#define PR_METHOD_ATTACH 4
-#define PR_MAXMETHOD 5
+#define PR_METHOD_REMOVE 5
+#define PR_MAXMETHOD 6
/*
* Lock/unlock a prison.
@@ -362,6 +366,7 @@ void getcredhostname(struct ucred *, char *, size_t);
void getcreddomainname(struct ucred *, char *, size_t);
void getcredhostuuid(struct ucred *, char *, size_t);
void getcredhostid(struct ucred *, unsigned long *);
+void prison0_init(void);
int prison_allow(struct ucred *, unsigned);
int prison_check(struct ucred *cred1, struct ucred *cred2);
int prison_owns_vnet(struct ucred *);
@@ -387,8 +392,11 @@ int prison_equal_ip4(struct prison *, struct prison *);
int prison_get_ip4(struct ucred *cred, struct in_addr *ia);
int prison_local_ip4(struct ucred *cred, struct in_addr *ia);
int prison_remote_ip4(struct ucred *cred, struct in_addr *ia);
-int prison_check_ip4(struct ucred *cred, struct in_addr *ia);
+int prison_check_ip4(const struct ucred *, const struct in_addr *);
+int prison_check_ip4_locked(const struct prison *, const struct in_addr *);
int prison_saddrsel_ip4(struct ucred *, struct in_addr *);
+int prison_restrict_ip4(struct prison *, struct in_addr *);
+int prison_qcmp_v4(const void *, const void *);
#ifdef INET6
#ifndef __rtems__
int prison_equal_ip6(struct prison *, struct prison *);
@@ -398,8 +406,11 @@ int prison_equal_ip6(struct prison *, struct prison *);
int prison_get_ip6(struct ucred *, struct in6_addr *);
int prison_local_ip6(struct ucred *, struct in6_addr *, int);
int prison_remote_ip6(struct ucred *, struct in6_addr *);
-int prison_check_ip6(struct ucred *, struct in6_addr *);
+int prison_check_ip6(const struct ucred *, const struct in6_addr *);
+int prison_check_ip6_locked(const struct prison *, const struct in6_addr *);
int prison_saddrsel_ip6(struct ucred *, struct in6_addr *);
+int prison_restrict_ip6(struct prison *, struct in6_addr *);
+int prison_qcmp_v6(const void *, const void *);
#endif
int prison_check_af(struct ucred *cred, int af);
int prison_if(struct ucred *cred, struct sockaddr *sa);
@@ -407,7 +418,8 @@ char *prison_name(struct prison *, struct prison *);
int prison_priv_check(struct ucred *cred, int priv);
int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
void prison_racct_foreach(void (*callback)(struct racct *racct,
- void *arg2, void *arg3), void *arg2, void *arg3);
+ void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+ void *arg2, void *arg3);
struct prison_racct *prison_racct_find(const char *name);
void prison_racct_hold(struct prison_racct *prr);
void prison_racct_free(struct prison_racct *prr);
diff --git a/freebsd/sys/sys/kernel.h b/freebsd/sys/sys/kernel.h
index 687ee531..8f8f4ea7 100644
--- a/freebsd/sys/sys/kernel.h
+++ b/freebsd/sys/sys/kernel.h
@@ -91,22 +91,19 @@ extern volatile int32_t _bsd_ticks;
* for binary compatibility with inserted elements.
*
* The SI_SUB_LAST value must have the highest lexical value.
- *
- * The SI_SUB_SWAP values represent a value used by
- * the BSD 4.4Lite but not by FreeBSD; it is maintained in dependent
- * order to support porting.
*/
enum sysinit_sub_id {
SI_SUB_DUMMY = 0x0000000, /* not executed; for linker*/
SI_SUB_DONE = 0x0000001, /* processed*/
SI_SUB_TUNABLES = 0x0700000, /* establish tunable values */
SI_SUB_COPYRIGHT = 0x0800001, /* first use of console*/
- SI_SUB_SETTINGS = 0x0880000, /* check and recheck settings */
- SI_SUB_MTX_POOL_STATIC = 0x0900000, /* static mutex pool */
- SI_SUB_LOCKMGR = 0x0980000, /* lockmgr locks */
SI_SUB_VM = 0x1000000, /* virtual memory system init*/
SI_SUB_KMEM = 0x1800000, /* kernel memory*/
- SI_SUB_KVM_RSRC = 0x1A00000, /* kvm operational limits*/
+ SI_SUB_HYPERVISOR = 0x1A40000, /*
+ * Hypervisor detection and
+ * virtualization support
+ * setup.
+ */
SI_SUB_WITNESS = 0x1A80000, /* witness initialization */
SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */
SI_SUB_LOCK = 0x1B00000, /* various locks */
@@ -115,8 +112,8 @@ enum sysinit_sub_id {
SI_SUB_KLD = 0x2000000, /* KLD and module setup */
SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
SI_SUB_RACCT = 0x2110000, /* resource accounting */
- SI_SUB_RANDOM = 0x2120000, /* random number generator */
SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */
+ SI_SUB_RANDOM = 0x2160000, /* random number generator */
SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */
SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */
SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */
@@ -127,14 +124,15 @@ enum sysinit_sub_id {
SI_SUB_RUN_QUEUE = 0x2400000, /* set up run queue*/
SI_SUB_KTRACE = 0x2480000, /* ktrace */
SI_SUB_OPENSOLARIS = 0x2490000, /* OpenSolaris compatibility */
- SI_SUB_CYCLIC = 0x24A0000, /* Cyclic timers */
SI_SUB_AUDIT = 0x24C0000, /* audit */
SI_SUB_CREATE_INIT = 0x2500000, /* create init process*/
SI_SUB_SCHED_IDLE = 0x2600000, /* required idle procs */
SI_SUB_MBUF = 0x2700000, /* mbuf subsystem */
SI_SUB_INTR = 0x2800000, /* interrupt threads */
- SI_SUB_SOFTINTR = 0x2800001, /* start soft interrupt thread */
- SI_SUB_ACL = 0x2900000, /* start for filesystem ACLs */
+#ifdef EARLY_AP_STARTUP
+ SI_SUB_SMP = 0x2900000, /* start the APs*/
+#endif
+ SI_SUB_SOFTINTR = 0x2A00000, /* start soft interrupt thread */
SI_SUB_DEVFS = 0x2F00000, /* devfs ready for devices */
SI_SUB_INIT_IF = 0x3000000, /* prep for net interfaces */
SI_SUB_NETGRAPH = 0x3010000, /* Let Netgraph initialize */
@@ -145,7 +143,6 @@ enum sysinit_sub_id {
SI_SUB_CONFIGURE = 0x3800000, /* Configure devices */
SI_SUB_VFS = 0x4000000, /* virtual filesystem*/
SI_SUB_CLOCKS = 0x4800000, /* real time and stat clocks*/
- SI_SUB_CLIST = 0x5800000, /* clists*/
SI_SUB_SYSV_SHM = 0x6400000, /* System V shared memory*/
SI_SUB_SYSV_SEM = 0x6800000, /* System V semaphores*/
SI_SUB_SYSV_MSG = 0x6C00000, /* System V message queues*/
@@ -153,18 +150,18 @@ enum sysinit_sub_id {
SI_SUB_PSEUDO = 0x7000000, /* pseudo devices*/
SI_SUB_EXEC = 0x7400000, /* execve() handlers */
SI_SUB_PROTO_BEGIN = 0x8000000, /* VNET initialization */
+ SI_SUB_PROTO_PFIL = 0x8100000, /* Initialize pfil before FWs */
SI_SUB_PROTO_IF = 0x8400000, /* interfaces*/
SI_SUB_PROTO_DOMAININIT = 0x8600000, /* domain registration system */
+ SI_SUB_PROTO_MC = 0x8700000, /* Multicast */
SI_SUB_PROTO_DOMAIN = 0x8800000, /* domains (address families?)*/
- SI_SUB_PROTO_IFATTACHDOMAIN = 0x8800001, /* domain dependent data init*/
+ SI_SUB_PROTO_FIREWALL = 0x8806000, /* Firewalls */
+ SI_SUB_PROTO_IFATTACHDOMAIN = 0x8808000,/* domain dependent data init */
SI_SUB_PROTO_END = 0x8ffffff, /* VNET helper functions */
SI_SUB_KPROF = 0x9000000, /* kernel profiling*/
SI_SUB_KICK_SCHEDULER = 0xa000000, /* start the timeout events*/
SI_SUB_INT_CONFIG_HOOKS = 0xa800000, /* Interrupts enabled config */
SI_SUB_ROOT_CONF = 0xb000000, /* Find root devices */
- SI_SUB_DUMP_CONF = 0xb200000, /* Find dump devices */
- SI_SUB_RAID = 0xb380000, /* Configure GEOM classes */
- SI_SUB_SWAP = 0xc000000, /* swap */
SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/
SI_SUB_SYSCALLS = 0xd800000, /* register system calls */
SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */
@@ -174,8 +171,10 @@ enum sysinit_sub_id {
SI_SUB_KTHREAD_BUF = 0xea00000, /* buffer daemon*/
SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/
SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/
+#ifndef EARLY_AP_STARTUP
SI_SUB_SMP = 0xf000000, /* start the APs*/
- SI_SUB_RACCTD = 0xf100000, /* start raccd*/
+#endif
+ SI_SUB_RACCTD = 0xf100000, /* start racctd*/
SI_SUB_LAST = 0xfffffff /* final initialization */
};
@@ -303,7 +302,7 @@ void sysinit_add(struct sysinit **set, struct sysinit **set_end);
/*
* Infrastructure for tunable 'constants'. Value may be specified at compile
* time or kernel load time. Rules relating tunables together can be placed
- * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_LAST.
+ * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_ANY.
*
* WARNING: developers should never use the reserved suffixes specified in
* loader.conf(5) for any tunables or conflicts will result.
@@ -368,6 +367,44 @@ struct tunable_ulong {
#define TUNABLE_ULONG_FETCH(path, var) getenv_ulong((path), (var))
/*
+ * int64_t
+ */
+extern void tunable_int64_init(void *);
+struct tunable_int64 {
+ const char *path;
+ int64_t *var;
+};
+#define TUNABLE_INT64(path, var) \
+ static struct tunable_int64 __CONCAT(__tunable_int64_, __LINE__) = { \
+ (path), \
+ (var), \
+ }; \
+ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \
+ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int64_init, \
+ &__CONCAT(__tunable_int64_, __LINE__))
+
+#define TUNABLE_INT64_FETCH(path, var) getenv_int64((path), (var))
+
+/*
+ * uint64_t
+ */
+extern void tunable_uint64_init(void *);
+struct tunable_uint64 {
+ const char *path;
+ uint64_t *var;
+};
+#define TUNABLE_UINT64(path, var) \
+ static struct tunable_ulong __CONCAT(__tunable_uint64_, __LINE__) = { \
+ (path), \
+ (var), \
+ }; \
+ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \
+ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_uint64_init, \
+ &__CONCAT(__tunable_uint64_, __LINE__))
+
+#define TUNABLE_UINT64_FETCH(path, var) getenv_uint64((path), (var))
+
+/*
* quad
*/
extern void tunable_quad_init(void *);
diff --git a/freebsd/sys/sys/khelp.h b/freebsd/sys/sys/khelp.h
index db12d6bb..f542b148 100644
--- a/freebsd/sys/sys/khelp.h
+++ b/freebsd/sys/sys/khelp.h
@@ -55,6 +55,7 @@ struct osd;
/* Helper classes. */
#define HELPER_CLASS_TCP 0x00000001
+#define HELPER_CLASS_SOCKET 0x00000002
/* Public KPI functions. */
int khelp_register_helper(struct helper *h);
diff --git a/freebsd/sys/sys/kobj.h b/freebsd/sys/sys/kobj.h
index 5df5dccc..36d8d2a7 100644
--- a/freebsd/sys/sys/kobj.h
+++ b/freebsd/sys/sys/kobj.h
@@ -34,7 +34,7 @@
*/
typedef struct kobj *kobj_t;
typedef struct kobj_class *kobj_class_t;
-typedef struct kobj_method kobj_method_t;
+typedef const struct kobj_method kobj_method_t;
typedef int (*kobjop_t)(void);
typedef struct kobj_ops *kobj_ops_t;
typedef struct kobjop_desc *kobjop_desc_t;
@@ -86,7 +86,7 @@ struct kobj_ops {
struct kobjop_desc {
unsigned int id; /* unique ID */
- kobj_method_t *deflt; /* default implementation */
+ kobj_method_t deflt; /* default implementation */
};
/*
@@ -146,13 +146,13 @@ struct kobj_class classvar = { \
* DEFINE_CLASS_2(foo, foo_class, foo_methods, sizeof(foo_softc),
* bar, baz);
*/
-#define DEFINE_CLASS_2(name, methods, size, \
+#define DEFINE_CLASS_2(name, classvar, methods, size, \
base1, base2) \
\
static kobj_class_t name ## _baseclasses[] = \
{ &base1, \
&base2, NULL }; \
-struct kobj_class name ## _class = { \
+struct kobj_class classvar = { \
#name, methods, size, name ## _baseclasses \
}
@@ -162,14 +162,14 @@ struct kobj_class name ## _class = { \
* DEFINE_CLASS_3(foo, foo_class, foo_methods, sizeof(foo_softc),
* bar, baz, foobar);
*/
-#define DEFINE_CLASS_3(name, methods, size, \
+#define DEFINE_CLASS_3(name, classvar, methods, size, \
base1, base2, base3) \
\
static kobj_class_t name ## _baseclasses[] = \
{ &base1, \
&base2, \
&base3, NULL }; \
-struct kobj_class name ## _class = { \
+struct kobj_class classvar = { \
#name, methods, size, name ## _baseclasses \
}
diff --git a/freebsd/sys/sys/kthread.h b/freebsd/sys/sys/kthread.h
index 4911eccc..b6304f52 100644
--- a/freebsd/sys/sys/kthread.h
+++ b/freebsd/sys/sys/kthread.h
@@ -37,14 +37,14 @@
* Note: global_procpp may be NULL for no global save area.
*/
struct kproc_desc {
- char *arg0; /* arg 0 (for 'ps' listing) */
- void (*func)(void); /* "main" for kernel process */
+ const char *arg0; /* arg 0 (for 'ps' listing) */
+ void (*func)(void); /* "main" for kernel process */
struct proc **global_procpp; /* ptr to proc ptr save area */
};
/* A kernel thread descriptor; used to start "internal" daemons. */
struct kthread_desc {
- char *arg0; /* arg 0 (for 'ps' listing) */
+ const char *arg0; /* arg 0 (for 'ps' listing) */
void (*func)(void); /* "main" for kernel thread */
struct thread **global_threadpp; /* ptr to thread ptr save area */
};
diff --git a/freebsd/sys/sys/ktr.h b/freebsd/sys/sys/ktr.h
index de8ebe54..e25cfe03 100644
--- a/freebsd/sys/sys/ktr.h
+++ b/freebsd/sys/sys/ktr.h
@@ -36,56 +36,7 @@
#ifndef _SYS_KTR_H_
#define _SYS_KTR_H_
-/*
- * Trace classes
- *
- * Two of the trace classes (KTR_DEV and KTR_SUBSYS) are special in that
- * they are really placeholders so that indvidual drivers and subsystems
- * can map their internal tracing to the general class when they wish to
- * have tracing enabled and map it to 0 when they don't.
- */
-#define KTR_GEN 0x00000001 /* General (TR) */
-#define KTR_NET 0x00000002 /* Network */
-#define KTR_DEV 0x00000004 /* Device driver */
-#define KTR_LOCK 0x00000008 /* MP locking */
-#define KTR_SMP 0x00000010 /* MP general */
-#define KTR_SUBSYS 0x00000020 /* Subsystem. */
-#define KTR_PMAP 0x00000040 /* Pmap tracing */
-#define KTR_MALLOC 0x00000080 /* Malloc tracing */
-#define KTR_TRAP 0x00000100 /* Trap processing */
-#define KTR_INTR 0x00000200 /* Interrupt tracing */
-#define KTR_SIG 0x00000400 /* Signal processing */
-#define KTR_SPARE2 0x00000800 /* XXX Used by cxgb */
-#define KTR_PROC 0x00001000 /* Process scheduling */
-#define KTR_SYSC 0x00002000 /* System call */
-#define KTR_INIT 0x00004000 /* System initialization */
-#define KTR_SPARE3 0x00008000 /* XXX Used by cxgb */
-#define KTR_SPARE4 0x00010000 /* XXX Used by cxgb */
-#define KTR_EVH 0x00020000 /* Eventhandler */
-#define KTR_VFS 0x00040000 /* VFS events */
-#define KTR_VOP 0x00080000 /* Auto-generated vop events */
-#define KTR_VM 0x00100000 /* The virtual memory system */
-#define KTR_INET 0x00200000 /* IPv4 stack */
-#define KTR_RUNQ 0x00400000 /* Run queue */
-#define KTR_CONTENTION 0x00800000 /* Lock contention */
-#define KTR_UMA 0x01000000 /* UMA slab allocator */
-#define KTR_CALLOUT 0x02000000 /* Callouts and timeouts */
-#define KTR_GEOM 0x04000000 /* GEOM I/O events */
-#define KTR_BUSDMA 0x08000000 /* busdma(9) events */
-#define KTR_INET6 0x10000000 /* IPv6 stack */
-#define KTR_SCHED 0x20000000 /* Machine parsed sched info. */
-#define KTR_BUF 0x40000000 /* Buffer cache */
-#define KTR_ALL 0x7fffffff
-
-/* Trace classes to compile in */
-#ifdef KTR
-#ifndef KTR_COMPILE
-#define KTR_COMPILE (KTR_ALL)
-#endif
-#else /* !KTR */
-#undef KTR_COMPILE
-#define KTR_COMPILE 0
-#endif /* KTR */
+#include <sys/ktr_class.h>
/*
* Version number for ktr_entry struct. Increment this when you break binary
@@ -111,16 +62,16 @@ struct ktr_entry {
};
extern cpuset_t ktr_cpumask;
-extern int ktr_mask;
+extern uint64_t ktr_mask;
extern int ktr_entries;
extern int ktr_verbose;
extern volatile int ktr_idx;
-extern struct ktr_entry ktr_buf[];
+extern struct ktr_entry *ktr_buf;
#ifdef KTR
-void ktr_tracepoint(u_int mask, const char *file, int line,
+void ktr_tracepoint(uint64_t mask, const char *file, int line,
const char *format, u_long arg1, u_long arg2, u_long arg3,
u_long arg4, u_long arg5, u_long arg6);
@@ -244,6 +195,50 @@ void ktr_tracepoint(u_int mask, const char *file, int line,
point, a0, (v0), a1, (v1), a2, (v2), a3, (v3))
/*
+ * Start functions denote the start of a region of code or operation
+ * and should be paired with stop functions for timing of nested
+ * sequences.
+ *
+ * Specifying extra attributes with the name "key" will result in
+ * multi-part keys. For example a block device and offset pair
+ * might be used to describe a buf undergoing I/O.
+ */
+#define KTR_START0(m, egroup, ident, key) \
+ KTR_EVENT0(m, egroup, ident, "start:0x%jX", (uintmax_t)key)
+#define KTR_START1(m, egroup, ident, key, a0, v0) \
+ KTR_EVENT1(m, egroup, ident, "start:0x%jX", (uintmax_t)key, a0, (v0))
+#define KTR_START2(m, egroup, ident, key, a0, v0, a1, v1) \
+ KTR_EVENT2(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \
+ a0, (v0), a1, (v1))
+#define KTR_START3(m, egroup, ident, key, a0, v0, a1, v1, a2, v2)\
+ KTR_EVENT3(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \
+ a0, (v0), a1, (v1), a2, (v2))
+#define KTR_START4(m, egroup, ident, key, \
+ a0, v0, a1, v1, a2, v2, a3, v3) \
+ KTR_EVENT4(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \
+ a0, (v0), a1, (v1), a2, (v2), a3, (v3))
+
+/*
+ * Stop functions denote the end of a region of code or operation
+ * and should be paired with start functions for timing of nested
+ * sequences.
+ */
+#define KTR_STOP0(m, egroup, ident, key) \
+ KTR_EVENT0(m, egroup, ident, "stop:0x%jX", (uintmax_t)key)
+#define KTR_STOP1(m, egroup, ident, key, a0, v0) \
+ KTR_EVENT1(m, egroup, ident, "stop:0x%jX", (uintmax_t)key, a0, (v0))
+#define KTR_STOP2(m, egroup, ident, key, a0, v0, a1, v1) \
+ KTR_EVENT2(m, egroup, ident, "stop:0x%jX", (uintmax_t)key, \
+ a0, (v0), a1, (v1))
+#define KTR_STOP3(m, egroup, ident, key, a0, v0, a1, v1, a2, v2)\
+ KTR_EVENT3(m, egroup, ident, "stop:0x%jX", (uintmax_t)key, \
+ a0, (v0), a1, (v1), a2, (v2))
+#define KTR_STOP4(m, egroup, ident, \
+ key, a0, v0, a1, v1, a2, v2, a3, v3) \
+ KTR_EVENT4(m, egroup, ident, "stop:0x%jX", (uintmax_t)key, \
+ a0, (v0), a1, (v1), a2, (v2), a3, (v3))
+
+/*
* Trace initialization events, similar to CTR with KTR_INIT, but
* completely ifdef'ed out if KTR_INIT isn't in KTR_COMPILE (to
* save string space, the compiler doesn't optimize out strings
diff --git a/freebsd/sys/sys/ktr_class.h b/freebsd/sys/sys/ktr_class.h
new file mode 100644
index 00000000..4bfc895b
--- /dev/null
+++ b/freebsd/sys/sys/ktr_class.h
@@ -0,0 +1,87 @@
+/*-
+ * Copyright (c) 1996 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: ktr.h,v 1.10.2.7 2000/03/16 21:44:42 cp Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_KTR_CLASS_H_
+#define _SYS_KTR_CLASS_H_
+
+/*
+ * KTR trace classes
+ *
+ * Two of the trace classes (KTR_DEV and KTR_SUBSYS) are special in that
+ * they are really placeholders so that indvidual drivers and subsystems
+ * can map their internal tracing to the general class when they wish to
+ * have tracing enabled and map it to 0 when they don't.
+ */
+#define KTR_GEN 0x00000001 /* General (TR) */
+#define KTR_NET 0x00000002 /* Network */
+#define KTR_DEV 0x00000004 /* Device driver */
+#define KTR_LOCK 0x00000008 /* MP locking */
+#define KTR_SMP 0x00000010 /* MP general */
+#define KTR_SUBSYS 0x00000020 /* Subsystem. */
+#define KTR_PMAP 0x00000040 /* Pmap tracing */
+#define KTR_MALLOC 0x00000080 /* Malloc tracing */
+#define KTR_TRAP 0x00000100 /* Trap processing */
+#define KTR_INTR 0x00000200 /* Interrupt tracing */
+#define KTR_SIG 0x00000400 /* Signal processing */
+#define KTR_SPARE2 0x00000800 /* cxgb, amd64, xen, clk, &c */
+#define KTR_PROC 0x00001000 /* Process scheduling */
+#define KTR_SYSC 0x00002000 /* System call */
+#define KTR_INIT 0x00004000 /* System initialization */
+#define KTR_SPARE3 0x00008000 /* cxgb, drm2, ioat, ntb */
+#define KTR_SPARE4 0x00010000 /* geom_sched */
+#define KTR_EVH 0x00020000 /* Eventhandler */
+#define KTR_VFS 0x00040000 /* VFS events */
+#define KTR_VOP 0x00080000 /* Auto-generated vop events */
+#define KTR_VM 0x00100000 /* The virtual memory system */
+#define KTR_INET 0x00200000 /* IPv4 stack */
+#define KTR_RUNQ 0x00400000 /* Run queue */
+#define KTR_CONTENTION 0x00800000 /* Lock contention */
+#define KTR_UMA 0x01000000 /* UMA slab allocator */
+#define KTR_CALLOUT 0x02000000 /* Callouts and timeouts */
+#define KTR_GEOM 0x04000000 /* GEOM I/O events */
+#define KTR_BUSDMA 0x08000000 /* busdma(9) events */
+#define KTR_INET6 0x10000000 /* IPv6 stack */
+#define KTR_SCHED 0x20000000 /* Machine parsed sched info. */
+#define KTR_BUF 0x40000000 /* Buffer cache */
+#define KTR_PTRACE 0x80000000 /* Process debugging. */
+#define KTR_ALL 0xffffffff
+
+/* KTR trace classes to compile in */
+#ifdef KTR
+#ifndef KTR_COMPILE
+#define KTR_COMPILE (KTR_ALL)
+#endif
+#else /* !KTR */
+#undef KTR_COMPILE
+#define KTR_COMPILE 0
+#endif /* KTR */
+
+#endif /* !_SYS_KTR_CLASS_H_ */
diff --git a/freebsd/sys/sys/libkern.h b/freebsd/sys/sys/libkern.h
index eebbcb62..c8fcd877 100644
--- a/freebsd/sys/sys/libkern.h
+++ b/freebsd/sys/sys/libkern.h
@@ -65,8 +65,20 @@ static __inline u_int max(u_int a, u_int b) { return (a > b ? a : b); }
static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); }
static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); }
static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); }
+static __inline u_quad_t uqmax(u_quad_t a, u_quad_t b) { return (a > b ? a : b); }
+static __inline u_quad_t uqmin(u_quad_t a, u_quad_t b) { return (a < b ? a : b); }
static __inline u_long ulmax(u_long a, u_long b) { return (a > b ? a : b); }
static __inline u_long ulmin(u_long a, u_long b) { return (a < b ? a : b); }
+static __inline __uintmax_t ummax(__uintmax_t a, __uintmax_t b)
+{
+
+ return (a > b ? a : b);
+}
+static __inline __uintmax_t ummin(__uintmax_t a, __uintmax_t b)
+{
+
+ return (a < b ? a : b);
+}
static __inline off_t omax(off_t a, off_t b) { return (a > b ? a : b); }
static __inline off_t omin(off_t a, off_t b) { return (a < b ? a : b); }
@@ -84,8 +96,21 @@ extern int arc4rand_iniseed_state;
/* Prototypes for non-quad routines. */
struct malloc_type;
uint32_t arc4random(void);
+#ifndef __rtems__
void arc4rand(void *ptr, u_int len, int reseed);
+#else /* __rtems__ */
+void arc4random_buf(void *, size_t);
+
+static inline void
+arc4rand(void *ptr, u_int len, int reseed)
+{
+
+ (void)reseed;
+ arc4random_buf(ptr, len);
+}
+#endif /* __rtems__ */
int bcmp(const void *, const void *, size_t);
+int timingsafe_bcmp(const void *, const void *, size_t);
void *bsearch(const void *, const void *, size_t,
size_t, int (*)(const void *, const void *));
#ifndef __rtems__
@@ -95,10 +120,9 @@ int ffs(int);
#ifndef HAVE_INLINE_FFSL
int ffsl(long);
#endif
-#else /* __rtems__ */
-#define ffs(_x) __builtin_ffs((unsigned int)(_x))
-#define ffsl(_x) __builtin_ffsl((unsigned long)(_x))
-#endif /* __rtems__ */
+#ifndef HAVE_INLINE_FFSLL
+int ffsll(long long);
+#endif
#ifndef HAVE_INLINE_FLS
int fls(int);
#endif
@@ -108,10 +132,36 @@ int flsl(long);
#ifndef HAVE_INLINE_FLSLL
int flsll(long long);
#endif
+#else /* __rtems__ */
+#define ffs(_x) __builtin_ffs((unsigned int)(_x))
+#define ffsl(_x) __builtin_ffsl((unsigned long)(_x))
+
+static inline int
+fls(int x)
+{
+
+ return (x != 0 ? sizeof(x) * 8 - __builtin_clz((unsigned int)x) : 0);
+}
+
+static inline int
+flsl(long x)
+{
+
+ return (x != 0 ? sizeof(x) * 8 - __builtin_clzl((unsigned long)x) : 0);
+}
+#endif /* __rtems__ */
+#define bitcount64(x) __bitcount64((uint64_t)(x))
+#define bitcount32(x) __bitcount32((uint32_t)(x))
+#define bitcount16(x) __bitcount16((uint16_t)(x))
+#define bitcountl(x) __bitcountl((u_long)(x))
+#define bitcount(x) __bitcount((u_int)(x))
+
int fnmatch(const char *, const char *, int);
int locc(int, char *, u_int);
void *memchr(const void *s, int c, size_t n);
+void *memcchr(const void *s, int c, size_t n);
int memcmp(const void *b1, const void *b2, size_t len);
+void *memmem(const void *l, size_t l_len, const void *s, size_t s_len);
void qsort(void *base, size_t nmemb, size_t size,
int (*compar)(const void *, const void *));
void qsort_r(void *base, size_t nmemb, size_t size, void *thunk,
@@ -123,10 +173,7 @@ u_long random(void);
u_long _bsd_random(void);
#define random() _bsd_random()
#endif /* __rtems__ */
-char *index(const char *, int);
-char *rindex(const char *, int);
int scanc(u_int, const u_char *, const u_char *, int);
-int skpc(int, int, char *);
#ifndef __rtems__
void srandom(u_long);
#else /* __rtems__ */
@@ -135,14 +182,18 @@ void _bsd_srandom(u_long);
#endif /* __rtems__ */
int strcasecmp(const char *, const char *);
char *strcat(char * __restrict, const char * __restrict);
+char *strchr(const char *, int);
int strcmp(const char *, const char *);
char *strcpy(char * __restrict, const char * __restrict);
size_t strcspn(const char * __restrict, const char * __restrict) __pure;
#ifdef __rtems__
#include <string.h>
-#define strdup _bsd_strdup
+#define strdup _bsd_strdup
+#define strndup _bsd_strndup
#endif /* __rtems__ */
char *strdup(const char *__restrict, struct malloc_type *);
+char *strncat(char *, const char *, size_t);
+char *strndup(const char *__restrict, size_t, struct malloc_type *);
size_t strlcat(char *, const char *, size_t);
size_t strlcpy(char *, const char *, size_t);
size_t strlen(const char *);
@@ -150,12 +201,13 @@ int strncasecmp(const char *, const char *, size_t);
int strncmp(const char *, const char *, size_t);
char *strncpy(char * __restrict, const char * __restrict, size_t);
size_t strnlen(const char *, size_t);
+char *strrchr(const char *, int);
char *strsep(char **, const char *delim);
size_t strspn(const char *, const char *);
char *strstr(const char *, const char *);
int strvalid(const char *, size_t);
-extern uint32_t crc32_tab[];
+extern const uint32_t crc32_tab[];
static __inline uint32_t
crc32_raw(const void *buf, size_t size, uint32_t crc)
@@ -177,8 +229,8 @@ crc32(const void *buf, size_t size)
}
uint32_t
-calculate_crc32c(uint32_t crc32c, const unsigned char *buffer,
- unsigned int length);
+calculate_crc32c(uint32_t crc32c, const unsigned char *buffer,
+ unsigned int length);
LIBKERN_INLINE void *memset(void *, int, size_t);
@@ -199,15 +251,17 @@ memset(void *b, int c, size_t len)
#ifndef __rtems__
static __inline char *
-strchr(const char *p, int ch)
+index(const char *p, int ch)
{
- return (index(p, ch));
+
+ return (strchr(p, ch));
}
static __inline char *
-strrchr(const char *p, int ch)
+rindex(const char *p, int ch)
{
- return (rindex(p, ch));
+
+ return (strrchr(p, ch));
}
#endif /* __rtems__ */
diff --git a/freebsd/sys/sys/linker.h b/freebsd/sys/sys/linker.h
index 29db893f..90db1b8c 100644
--- a/freebsd/sys/sys/linker.h
+++ b/freebsd/sys/sys/linker.h
@@ -79,6 +79,8 @@ struct linker_file {
int id; /* unique id */
caddr_t address; /* load address */
size_t size; /* size of file */
+ caddr_t ctors_addr; /* address of .ctors */
+ size_t ctors_size; /* size of .ctors */
int ndeps; /* number of dependencies */
linker_file_t* deps; /* list of dependencies */
STAILQ_HEAD(, common_symbol) common; /* list of common symbols */
@@ -158,7 +160,7 @@ int linker_file_function_listall(linker_file_t,
linker_function_nameval_callback_t, void *);
/*
- * Functions soley for use by the linker class handlers.
+ * Functions solely for use by the linker class handlers.
*/
int linker_add_class(linker_class_t _cls);
int linker_file_unload(linker_file_t _file, int flags);
@@ -211,6 +213,9 @@ void *linker_hwpmc_list_objects(void);
#define MODINFOMD_KERNEND 0x0008 /* kernend */
#endif
#define MODINFOMD_SHDR 0x0009 /* section header table */
+#define MODINFOMD_CTORS_ADDR 0x000a /* address of .ctors */
+#define MODINFOMD_CTORS_SIZE 0x000b /* size of .ctors */
+#define MODINFOMD_FW_HANDLE 0x000c /* Firmware dependent handle */
#define MODINFOMD_NOCOPY 0x8000 /* don't copy this metadata to the kernel */
#define MODINFOMD_DEPLIST (0x4001 | MODINFOMD_NOCOPY) /* depends on */
@@ -224,6 +229,7 @@ void *linker_hwpmc_list_objects(void);
#endif
#define LINKER_HINTS_VERSION 1 /* linker.hints file version */
+#define LINKER_HINTS_MAX (1 << 20) /* Allow at most 1MB for linker.hints */
#ifdef _KERNEL
@@ -260,7 +266,7 @@ extern int kld_debug;
#endif
#ifndef __rtems__
-typedef Elf_Addr elf_lookup_fn(linker_file_t, Elf_Size, int);
+typedef int elf_lookup_fn(linker_file_t, Elf_Size, int, Elf_Addr *);
/* Support functions */
int elf_reloc(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu);
diff --git a/freebsd/sys/sys/linker_set.h b/freebsd/sys/sys/linker_set.h
index c403d764..918d7242 100644
--- a/freebsd/sys/sys/linker_set.h
+++ b/freebsd/sys/sys/linker_set.h
@@ -43,16 +43,27 @@
* For ELF, this is done by constructing a separate segment for each set.
*/
+#if defined(__powerpc64__)
+/*
+ * Move the symbol pointer from ".text" to ".data" segment, to make
+ * the GCC compiler happy:
+ */
+#define __MAKE_SET_CONST
+#else
+#define __MAKE_SET_CONST const
+#endif
+
/*
* Private macros, not to be used outside this header file.
*/
#ifdef __GNUCLIKE___SECTION
#ifndef __rtems__
-#define __MAKE_SET(set, sym) \
- __GLOBL(__CONCAT(__start_set_,set)); \
- __GLOBL(__CONCAT(__stop_set_,set)); \
- static void const * const __set_##set##_sym_##sym \
- __section("set_" #set) __used = &sym
+#define __MAKE_SET(set, sym) \
+ __GLOBL(__CONCAT(__start_set_,set)); \
+ __GLOBL(__CONCAT(__stop_set_,set)); \
+ static void const * __MAKE_SET_CONST \
+ __set_##set##_sym_##sym __section("set_" #set) \
+ __used = &(sym)
#else /* __rtems__ */
#define RTEMS_BSD_DEFINE_SET(set, type) \
type const __CONCAT(_bsd__start_set_,set)[0] \
@@ -111,9 +122,9 @@
* Initialize before referring to a given linker set.
*/
#ifndef __rtems__
-#define SET_DECLARE(set, ptype) \
- extern ptype *__CONCAT(__start_set_,set); \
- extern ptype *__CONCAT(__stop_set_,set)
+#define SET_DECLARE(set, ptype) \
+ extern ptype __weak_symbol *__CONCAT(__start_set_,set); \
+ extern ptype __weak_symbol *__CONCAT(__stop_set_,set)
#define SET_BEGIN(set) \
(&__CONCAT(__start_set_,set))
diff --git a/freebsd/sys/sys/lockmgr.h b/freebsd/sys/sys/lockmgr.h
index 10227cdd..3019e4c4 100644
--- a/freebsd/sys/sys/lockmgr.h
+++ b/freebsd/sys/sys/lockmgr.h
@@ -69,7 +69,7 @@ struct thread;
int __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
const char *wmesg, int prio, int timo, const char *file, int line);
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
-void _lockmgr_assert(struct lock *lk, int what, const char *file, int line);
+void _lockmgr_assert(const struct lock *lk, int what, const char *file, int line);
#endif
void _lockmgr_disown(struct lock *lk, const char *file, int line);
@@ -77,13 +77,14 @@ void lockallowrecurse(struct lock *lk);
void lockallowshare(struct lock *lk);
void lockdestroy(struct lock *lk);
void lockdisablerecurse(struct lock *lk);
+void lockdisableshare(struct lock *lk);
void lockinit(struct lock *lk, int prio, const char *wmesg, int timo,
int flags);
#ifdef DDB
int lockmgr_chain(struct thread *td, struct thread **ownerp);
#endif
-void lockmgr_printinfo(struct lock *lk);
-int lockstatus(struct lock *lk);
+void lockmgr_printinfo(const struct lock *lk);
+int lockstatus(const struct lock *lk);
/*
* As far as the ilk can be a static NULL pointer these functions need a
@@ -126,8 +127,6 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
#define lockmgr_rw(lk, flags, ilk) \
_lockmgr_args_rw((lk), (flags), (ilk), LK_WMESG_DEFAULT, \
LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, LOCK_FILE, LOCK_LINE)
-#define lockmgr_waiters(lk) \
- ((lk)->lk_lock & LK_ALL_WAITERS)
#ifdef INVARIANTS
#define lockmgr_assert(lk, what) \
_lockmgr_assert((lk), (what), LOCK_FILE, LOCK_LINE)
@@ -146,6 +145,7 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
#define LK_NOWITNESS 0x000010
#define LK_QUIET 0x000020
#define LK_ADAPTIVE 0x000040
+#define LK_IS_VNODE 0x000080 /* Tell WITNESS about a VNODE lock */
/*
* Additional attributes to be used in lockmgr().
@@ -156,6 +156,8 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
#define LK_RETRY 0x000400
#define LK_SLEEPFAIL 0x000800
#define LK_TIMELOCK 0x001000
+#define LK_NODDLKTREAT 0x002000
+#define LK_VNHELD 0x004000
/*
* Operations for lockmgr().
diff --git a/freebsd/sys/sys/lockstat.h b/freebsd/sys/sys/lockstat.h
index ed9cffa9..1fc79ffe 100644
--- a/freebsd/sys/sys/lockstat.h
+++ b/freebsd/sys/sys/lockstat.h
@@ -27,180 +27,91 @@
/*
* DTrace lockstat provider definitions
- *
*/
-#ifndef _SYS_LOCKSTAT_H
+#ifndef _SYS_LOCKSTAT_H
#define _SYS_LOCKSTAT_H
-#ifdef _KERNEL
+#ifdef _KERNEL
-/*
- * Spin Locks
- */
-#define LS_MTX_SPIN_LOCK_ACQUIRE 0
-#define LS_MTX_SPIN_UNLOCK_RELEASE 1
-#define LS_MTX_SPIN_LOCK_SPIN 2
+#include <rtems/bsd/sys/param.h>
+#include <sys/queue.h>
+#include <sys/sdt.h>
-/*
- * Adaptive Locks
- */
-#define LS_MTX_LOCK_ACQUIRE 3
-#define LS_MTX_UNLOCK_RELEASE 4
-#define LS_MTX_LOCK_SPIN 5
-#define LS_MTX_LOCK_BLOCK 6
-#define LS_MTX_TRYLOCK_ACQUIRE 7
+SDT_PROVIDER_DECLARE(lockstat);
-/*
- * Reader/Writer Locks
- */
-#define LS_RW_RLOCK_ACQUIRE 8
-#define LS_RW_RUNLOCK_RELEASE 9
-#define LS_RW_WLOCK_ACQUIRE 10
-#define LS_RW_WUNLOCK_RELEASE 11
-#define LS_RW_RLOCK_SPIN 12
-#define LS_RW_RLOCK_BLOCK 13
-#define LS_RW_WLOCK_SPIN 14
-#define LS_RW_WLOCK_BLOCK 15
-#define LS_RW_TRYUPGRADE_UPGRADE 16
-#define LS_RW_DOWNGRADE_DOWNGRADE 17
+SDT_PROBE_DECLARE(lockstat, , , adaptive__acquire);
+SDT_PROBE_DECLARE(lockstat, , , adaptive__release);
+SDT_PROBE_DECLARE(lockstat, , , adaptive__spin);
+SDT_PROBE_DECLARE(lockstat, , , adaptive__block);
-/*
- * Shared/Exclusive Locks
- */
-#define LS_SX_SLOCK_ACQUIRE 18
-#define LS_SX_SUNLOCK_RELEASE 19
-#define LS_SX_XLOCK_ACQUIRE 20
-#define LS_SX_XUNLOCK_RELEASE 21
-#define LS_SX_SLOCK_SPIN 22
-#define LS_SX_SLOCK_BLOCK 23
-#define LS_SX_XLOCK_SPIN 24
-#define LS_SX_XLOCK_BLOCK 25
-#define LS_SX_TRYUPGRADE_UPGRADE 26
-#define LS_SX_DOWNGRADE_DOWNGRADE 27
-
-/*
- * Thread Locks
- */
-#define LS_THREAD_LOCK_SPIN 28
+SDT_PROBE_DECLARE(lockstat, , , spin__acquire);
+SDT_PROBE_DECLARE(lockstat, , , spin__release);
+SDT_PROBE_DECLARE(lockstat, , , spin__spin);
-/*
- * Lockmanager Locks
- * According to locking(9) Lockmgr locks are "Largely deprecated"
- * so no support for these have been added in the lockstat provider.
- */
+SDT_PROBE_DECLARE(lockstat, , , rw__acquire);
+SDT_PROBE_DECLARE(lockstat, , , rw__release);
+SDT_PROBE_DECLARE(lockstat, , , rw__block);
+SDT_PROBE_DECLARE(lockstat, , , rw__spin);
+SDT_PROBE_DECLARE(lockstat, , , rw__upgrade);
+SDT_PROBE_DECLARE(lockstat, , , rw__downgrade);
-#define LS_NPROBES 29
-
-#define LS_MTX_LOCK "mtx_lock"
-#define LS_MTX_UNLOCK "mtx_unlock"
-#define LS_MTX_SPIN_LOCK "mtx_lock_spin"
-#define LS_MTX_SPIN_UNLOCK "mtx_unlock_spin"
-#define LS_MTX_TRYLOCK "mtx_trylock"
-#define LS_RW_RLOCK "rw_rlock"
-#define LS_RW_WLOCK "rw_wlock"
-#define LS_RW_RUNLOCK "rw_runlock"
-#define LS_RW_WUNLOCK "rw_wunlock"
-#define LS_RW_TRYUPGRADE "rw_try_upgrade"
-#define LS_RW_DOWNGRADE "rw_downgrade"
-#define LS_SX_SLOCK "sx_slock"
-#define LS_SX_XLOCK "sx_xlock"
-#define LS_SX_SUNLOCK "sx_sunlock"
-#define LS_SX_XUNLOCK "sx_xunlock"
-#define LS_SX_TRYUPGRADE "sx_try_upgrade"
-#define LS_SX_DOWNGRADE "sx_downgrade"
-#define LS_THREAD_LOCK "thread_lock"
-
-#define LS_ACQUIRE "acquire"
-#define LS_RELEASE "release"
-#define LS_SPIN "spin"
-#define LS_BLOCK "block"
-#define LS_UPGRADE "upgrade"
-#define LS_DOWNGRADE "downgrade"
-
-#define LS_TYPE_ADAPTIVE "adaptive"
-#define LS_TYPE_SPIN "spin"
-#define LS_TYPE_THREAD "thread"
-#define LS_TYPE_RW "rw"
-#define LS_TYPE_SX "sx"
-
-#define LSA_ACQUIRE (LS_TYPE_ADAPTIVE "-" LS_ACQUIRE)
-#define LSA_RELEASE (LS_TYPE_ADAPTIVE "-" LS_RELEASE)
-#define LSA_SPIN (LS_TYPE_ADAPTIVE "-" LS_SPIN)
-#define LSA_BLOCK (LS_TYPE_ADAPTIVE "-" LS_BLOCK)
-#define LSS_ACQUIRE (LS_TYPE_SPIN "-" LS_ACQUIRE)
-#define LSS_RELEASE (LS_TYPE_SPIN "-" LS_RELEASE)
-#define LSS_SPIN (LS_TYPE_SPIN "-" LS_SPIN)
-#define LSR_ACQUIRE (LS_TYPE_RW "-" LS_ACQUIRE)
-#define LSR_RELEASE (LS_TYPE_RW "-" LS_RELEASE)
-#define LSR_BLOCK (LS_TYPE_RW "-" LS_BLOCK)
-#define LSR_SPIN (LS_TYPE_RW "-" LS_SPIN)
-#define LSR_UPGRADE (LS_TYPE_RW "-" LS_UPGRADE)
-#define LSR_DOWNGRADE (LS_TYPE_RW "-" LS_DOWNGRADE)
-#define LSX_ACQUIRE (LS_TYPE_SX "-" LS_ACQUIRE)
-#define LSX_RELEASE (LS_TYPE_SX "-" LS_RELEASE)
-#define LSX_BLOCK (LS_TYPE_SX "-" LS_BLOCK)
-#define LSX_SPIN (LS_TYPE_SX "-" LS_SPIN)
-#define LSX_UPGRADE (LS_TYPE_SX "-" LS_UPGRADE)
-#define LSX_DOWNGRADE (LS_TYPE_SX "-" LS_DOWNGRADE)
-#define LST_SPIN (LS_TYPE_THREAD "-" LS_SPIN)
+SDT_PROBE_DECLARE(lockstat, , , sx__acquire);
+SDT_PROBE_DECLARE(lockstat, , , sx__release);
+SDT_PROBE_DECLARE(lockstat, , , sx__block);
+SDT_PROBE_DECLARE(lockstat, , , sx__spin);
+SDT_PROBE_DECLARE(lockstat, , , sx__upgrade);
+SDT_PROBE_DECLARE(lockstat, , , sx__downgrade);
-/*
- * The following must match the type definition of dtrace_probe. It is
- * defined this way to avoid having to rely on CDDL code.
- */
-extern uint32_t lockstat_probemap[LS_NPROBES];
-typedef void (*lockstat_probe_func_t)(uint32_t, uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4);
-extern lockstat_probe_func_t lockstat_probe_func;
-extern uint64_t lockstat_nsecs(void);
+SDT_PROBE_DECLARE(lockstat, , , thread__spin);
-#ifdef KDTRACE_HOOKS
-/*
- * Macros to record lockstat probes.
- */
-#define LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, arg4) do { \
- uint32_t id; \
- \
- if ((id = lockstat_probemap[(probe)])) \
- (*lockstat_probe_func)(id, (uintptr_t)(lp), (arg1), (arg2), \
- (arg3), (arg4)); \
-} while (0)
+#define LOCKSTAT_WRITER 0
+#define LOCKSTAT_READER 1
-#define LOCKSTAT_RECORD(probe, lp, arg1) \
- LOCKSTAT_RECORD4(probe, lp, arg1, 0, 0, 0)
+extern int lockstat_enabled;
-#define LOCKSTAT_RECORD0(probe, lp) \
- LOCKSTAT_RECORD4(probe, lp, 0, 0, 0, 0)
+#ifdef KDTRACE_HOOKS
-#define LOCKSTAT_RECORD1(probe, lp, arg1) \
- LOCKSTAT_RECORD4(probe, lp, arg1, 0, 0, 0)
+#define LOCKSTAT_RECORD0(probe, lp) \
+ SDT_PROBE1(lockstat, , , probe, lp)
-#define LOCKSTAT_RECORD2(probe, lp, arg1, arg2) \
- LOCKSTAT_RECORD4(probe, lp, arg1, arg2, 0, 0)
+#define LOCKSTAT_RECORD1(probe, lp, arg1) \
+ SDT_PROBE2(lockstat, , , probe, lp, arg1)
-#define LOCKSTAT_RECORD3(probe, lp, arg1, arg2, arg3) \
- LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, 0)
+#define LOCKSTAT_RECORD2(probe, lp, arg1, arg2) \
+ SDT_PROBE3(lockstat, , , probe, lp, arg1, arg2)
-#define LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l) do { \
- uint32_t id; \
- \
- lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l); \
- if ((id = lockstat_probemap[(probe)])) \
- (*lockstat_probe_func)(id, (uintptr_t)(lp), 0, 0, 0, 0); \
+#define LOCKSTAT_RECORD3(probe, lp, arg1, arg2, arg3) \
+ SDT_PROBE4(lockstat, , , probe, lp, arg1, arg2, arg3)
+
+#define LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(lockstat, , , probe, lp, arg1, arg2, arg3, arg4)
+
+#define LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l) do { \
+ lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l); \
+ LOCKSTAT_RECORD0(probe, lp); \
} while (0)
-#define LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) do { \
- uint32_t id; \
- \
- lock_profile_release_lock(&(lp)->lock_object); \
- if ((id = lockstat_probemap[(probe)])) \
- (*lockstat_probe_func)(id, (uintptr_t)(lp), 0, 0, 0, 0); \
+#define LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(probe, lp, c, wt, f, l, a) do { \
+ lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l); \
+ LOCKSTAT_RECORD1(probe, lp, a); \
} while (0)
-#else /* !KDTRACE_HOOKS */
+#define LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) do { \
+ lock_profile_release_lock(&(lp)->lock_object); \
+ LOCKSTAT_RECORD0(probe, lp); \
+} while (0)
+
+#define LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a) do { \
+ lock_profile_release_lock(&(lp)->lock_object); \
+ LOCKSTAT_RECORD1(probe, lp, a); \
+} while (0)
+
+struct lock_object;
+uint64_t lockstat_nsecs(struct lock_object *);
+
+#else /* !KDTRACE_HOOKS */
-#define LOCKSTAT_RECORD(probe, lp, arg1)
#define LOCKSTAT_RECORD0(probe, lp)
#define LOCKSTAT_RECORD1(probe, lp, arg1)
#define LOCKSTAT_RECORD2(probe, lp, arg1, arg2)
@@ -210,11 +121,15 @@ extern uint64_t lockstat_nsecs(void);
#define LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l) \
lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l)
+#define LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(probe, lp, c, wt, f, l, a) \
+ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l)
+
#define LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) \
lock_profile_release_lock(&(lp)->lock_object)
-#endif /* !KDTRACE_HOOKS */
-
-#endif /* _KERNEL */
+#define LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a) \
+ LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp)
-#endif /* _SYS_LOCKSTAT_H */
+#endif /* !KDTRACE_HOOKS */
+#endif /* _KERNEL */
+#endif /* _SYS_LOCKSTAT_H */
diff --git a/freebsd/sys/sys/loginclass.h b/freebsd/sys/sys/loginclass.h
index 08f3409a..6783123a 100644
--- a/freebsd/sys/sys/loginclass.h
+++ b/freebsd/sys/sys/loginclass.h
@@ -48,6 +48,7 @@ void loginclass_hold(struct loginclass *lc);
void loginclass_free(struct loginclass *lc);
struct loginclass *loginclass_find(const char *name);
void loginclass_racct_foreach(void (*callback)(struct racct *racct,
- void *arg2, void *arg3), void *arg2, void *arg3);
+ void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+ void *arg2, void *arg3);
#endif /* !_SYS_LOGINCLASS_H_ */
diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h
index f99dfc5e..310d2551 100644
--- a/freebsd/sys/sys/malloc.h
+++ b/freebsd/sys/sys/malloc.h
@@ -51,6 +51,8 @@
#define M_NOVM 0x0200 /* don't ask VM for pages */
#define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */
#define M_NODUMP 0x0800 /* don't dump pages in this allocation */
+#define M_FIRSTFIT 0x1000 /* Only for vmem, fast fit. */
+#define M_BESTFIT 0x2000 /* Only for vmem, low fragmentation. */
#define M_MAGIC 877983977 /* time when first defined :-) */
@@ -139,7 +141,7 @@ struct malloc_type_header {
struct malloc_type type[1] = { \
{ NULL, M_MAGIC, shortdesc, NULL } \
}; \
- SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init, \
+ SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_THIRD, malloc_init, \
type); \
SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, \
malloc_uninit, type)
@@ -151,9 +153,6 @@ MALLOC_DECLARE(M_CACHE);
MALLOC_DECLARE(M_DEVBUF);
MALLOC_DECLARE(M_TEMP);
-MALLOC_DECLARE(M_IP6OPT); /* for INET6 */
-MALLOC_DECLARE(M_IP6NDP); /* for INET6 */
-
/*
* Deprecated macro versions of not-quite-malloc() and free().
*/
@@ -178,9 +177,11 @@ typedef void malloc_type_list_func_t(struct malloc_type *, void *);
void contigfree(void *addr, unsigned long size, struct malloc_type *type);
void *contigmalloc(unsigned long size, struct malloc_type *type, int flags,
vm_paddr_t low, vm_paddr_t high, unsigned long alignment,
- unsigned long boundary) __malloc_like;
+ vm_paddr_t boundary) __malloc_like __result_use_check
+ __alloc_size(1) __alloc_align(6);
void free(void *addr, struct malloc_type *type);
-void *malloc(unsigned long size, struct malloc_type *type, int flags) __malloc_like;
+void *malloc(unsigned long size, struct malloc_type *type, int flags)
+ __malloc_like __result_use_check __alloc_size(1);
void malloc_init(void *);
int malloc_last_fail(void);
void malloc_type_allocated(struct malloc_type *type, unsigned long size);
@@ -188,9 +189,9 @@ void malloc_type_freed(struct malloc_type *type, unsigned long size);
void malloc_type_list(malloc_type_list_func_t *, void *);
void malloc_uninit(void *);
void *realloc(void *addr, unsigned long size, struct malloc_type *type,
- int flags);
+ int flags) __result_use_check __alloc_size(2);
void *reallocf(void *addr, unsigned long size, struct malloc_type *type,
- int flags);
+ int flags) __alloc_size(2);
struct malloc_type *malloc_desc2type(const char *desc);
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h
index b6d58a25..95194e0b 100644
--- a/freebsd/sys/sys/mbuf.h
+++ b/freebsd/sys/sys/mbuf.h
@@ -44,6 +44,32 @@
#endif
#endif
+#ifdef _KERNEL
+#include <sys/sdt.h>
+
+#define MBUF_PROBE1(probe, arg0) \
+ SDT_PROBE1(sdt, , , probe, arg0)
+#define MBUF_PROBE2(probe, arg0, arg1) \
+ SDT_PROBE2(sdt, , , probe, arg0, arg1)
+#define MBUF_PROBE3(probe, arg0, arg1, arg2) \
+ SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2)
+#define MBUF_PROBE4(probe, arg0, arg1, arg2, arg3) \
+ SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3)
+#define MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4)
+
+SDT_PROBE_DECLARE(sdt, , , m__init);
+SDT_PROBE_DECLARE(sdt, , , m__gethdr);
+SDT_PROBE_DECLARE(sdt, , , m__get);
+SDT_PROBE_DECLARE(sdt, , , m__getcl);
+SDT_PROBE_DECLARE(sdt, , , m__clget);
+SDT_PROBE_DECLARE(sdt, , , m__cljget);
+SDT_PROBE_DECLARE(sdt, , , m__cljset);
+SDT_PROBE_DECLARE(sdt, , , m__free);
+SDT_PROBE_DECLARE(sdt, , , m__freem);
+
+#endif /* _KERNEL */
+
/*
* Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
* An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
@@ -52,11 +78,24 @@
* stored. Additionally, it is possible to allocate a separate buffer
* externally and attach it to the mbuf in a way similar to that of mbuf
* clusters.
+ *
+ * NB: These calculation do not take actual compiler-induced alignment and
+ * padding inside the complete struct mbuf into account. Appropriate
+ * attention is required when changing members of struct mbuf.
+ *
+ * MLEN is data length in a normal mbuf.
+ * MHLEN is data length in an mbuf with pktheader.
+ * MINCLSIZE is a smallest amount of data that should be put into cluster.
+ *
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are sensible.
*/
-#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */
-#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
-#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */
-#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
+struct mbuf;
+#define MHSIZE offsetof(struct mbuf, m_dat)
+#define MPKTHSIZE offsetof(struct mbuf, m_pktdat)
+#define MLEN ((int)(MSIZE - MHSIZE))
+#define MHLEN ((int)(MSIZE - MPKTHSIZE))
+#define MINCLSIZE (MHLEN + 1)
#ifdef _KERNEL
/*-
@@ -64,8 +103,10 @@
* type:
*
* mtod(m, t) -- Convert mbuf pointer to data pointer of correct type.
+ * mtodo(m, o) -- Same as above but with offset 'o' into data.
*/
#define mtod(m, t) ((t)((m)->m_data))
+#define mtodo(m, o) ((void *)(((m)->m_data) + (o)))
/*
* Argument structure passed to UMA routines during mbuf and packet
@@ -77,25 +118,6 @@ struct mb_args {
};
#endif /* _KERNEL */
-#if defined(__LP64__)
-#define M_HDR_PAD 6
-#else
-#define M_HDR_PAD 2
-#endif
-
-/*
- * Header present at the beginning of every mbuf.
- */
-struct m_hdr {
- struct mbuf *mh_next; /* next buffer in chain */
- struct mbuf *mh_nextpkt; /* next chain in queue/record */
- caddr_t mh_data; /* location of data */
- int mh_len; /* amount of data in this mbuf */
- int mh_flags; /* flags; see below */
- short mh_type; /* type of data in this mbuf */
- uint8_t pad[M_HDR_PAD];/* word align */
-};
-
/*
* Packet tag structure (see below for details).
*/
@@ -109,40 +131,73 @@ struct m_tag {
/*
* Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
+ * Size ILP32: 48
+ * LP64: 56
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are correct.
*/
struct pkthdr {
struct ifnet *rcvif; /* rcv interface */
- /* variables for ip and tcp reassembly */
- void *header; /* pointer to packet header */
- int len; /* total packet length */
- uint32_t flowid; /* packet's 4-tuple system
- * flow identifier
- */
- /* variables for hardware checksum */
- int csum_flags; /* flags regarding checksum */
- int csum_data; /* data field used by csum routines */
- u_int16_t tso_segsz; /* TSO segment size */
- union {
- u_int16_t vt_vtag; /* Ethernet 802.1p+q vlan tag */
- u_int16_t vt_nrecs; /* # of IGMPv3 records in this chain */
- } PH_vt;
SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
+ int32_t len; /* total packet length */
+
+ /* Layer crossing persistent information. */
+ uint32_t flowid; /* packet's 4-tuple system */
+ uint64_t csum_flags; /* checksum and offload features */
+ uint16_t fibnum; /* this packet should use this fib */
+ uint8_t cosqos; /* class/quality of service */
+ uint8_t rsstype; /* hash type */
+ uint8_t l2hlen; /* layer 2 header length */
+ uint8_t l3hlen; /* layer 3 header length */
+ uint8_t l4hlen; /* layer 4 header length */
+ uint8_t l5hlen; /* layer 5 header length */
+ union {
+ uint8_t eight[8];
+ uint16_t sixteen[4];
+ uint32_t thirtytwo[2];
+ uint64_t sixtyfour[1];
+ uintptr_t unintptr[1];
+ void *ptr;
+ } PH_per;
+
+ /* Layer specific non-persistent local storage for reassembly, etc. */
+ union {
+ uint8_t eight[8];
+ uint16_t sixteen[4];
+ uint32_t thirtytwo[2];
+ uint64_t sixtyfour[1];
+ uintptr_t unintptr[1];
+ void *ptr;
+ } PH_loc;
};
-#define ether_vtag PH_vt.vt_vtag
+#define ether_vtag PH_per.sixteen[0]
+#define PH_vt PH_per
+#define vt_nrecs sixteen[0]
+#define tso_segsz PH_per.sixteen[1]
+#define csum_phsum PH_per.sixteen[2]
+#define csum_data PH_per.thirtytwo[1]
/*
* Description of external storage mapped into mbuf; valid only if M_EXT is
* set.
+ * Size ILP32: 28
+ * LP64: 48
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are correct.
*/
struct m_ext {
+ union {
+ volatile u_int ext_count; /* value of ref count info */
+ volatile u_int *ext_cnt; /* pointer to ref count info */
+ };
caddr_t ext_buf; /* start of buffer */
+ uint32_t ext_size; /* size of buffer, for ext_free */
+ uint32_t ext_type:8, /* type of external storage */
+ ext_flags:24; /* external storage mbuf flags */
void (*ext_free) /* free routine if not the usual */
- (void *, void *);
+ (struct mbuf *, void *, void *);
void *ext_arg1; /* optional argument pointer */
void *ext_arg2; /* optional argument pointer */
- u_int ext_size; /* size of buffer, for ext_free */
- volatile u_int *ref_cnt; /* pointer to ref count info */
- int ext_type; /* type of external storage */
};
/*
@@ -150,71 +205,107 @@ struct m_ext {
* purposes.
*/
struct mbuf {
- struct m_hdr m_hdr;
+ /*
+ * Header present at the beginning of every mbuf.
+ * Size ILP32: 24
+ * LP64: 32
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure
+ * that they are correct.
+ */
+ union { /* next buffer in chain */
+ struct mbuf *m_next;
+ SLIST_ENTRY(mbuf) m_slist;
+ STAILQ_ENTRY(mbuf) m_stailq;
+ };
+ union { /* next chain in queue/record */
+ struct mbuf *m_nextpkt;
+ SLIST_ENTRY(mbuf) m_slistpkt;
+ STAILQ_ENTRY(mbuf) m_stailqpkt;
+ };
+ caddr_t m_data; /* location of data */
+ int32_t m_len; /* amount of data in this mbuf */
+ uint32_t m_type:8, /* type of data in this mbuf */
+ m_flags:24; /* flags; see below */
+#if !defined(__LP64__)
+ uint32_t m_pad; /* pad for 64bit alignment */
+#endif
+
+ /*
+ * A set of optional headers (packet header, external storage header)
+ * and internal data storage. Historically, these arrays were sized
+ * to MHLEN (space left after a packet header) and MLEN (space left
+ * after only a regular mbuf header); they are now variable size in
+ * order to support future work on variable-size mbufs.
+ */
union {
struct {
- struct pkthdr MH_pkthdr; /* M_PKTHDR set */
+ struct pkthdr m_pkthdr; /* M_PKTHDR set */
union {
- struct m_ext MH_ext; /* M_EXT set */
- char MH_databuf[MHLEN];
- } MH_dat;
- } MH;
- char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
- } M_dat;
+ struct m_ext m_ext; /* M_EXT set */
+ char m_pktdat[0];
+ };
+ };
+ char m_dat[0]; /* !M_PKTHDR, !M_EXT */
+ };
};
-#define m_next m_hdr.mh_next
-#define m_len m_hdr.mh_len
-#define m_data m_hdr.mh_data
-#define m_type m_hdr.mh_type
-#define m_flags m_hdr.mh_flags
-#define m_nextpkt m_hdr.mh_nextpkt
-#define m_act m_nextpkt
-#define m_pkthdr M_dat.MH.MH_pkthdr
-#define m_ext M_dat.MH.MH_dat.MH_ext
-#define m_pktdat M_dat.MH.MH_dat.MH_databuf
-#define m_dat M_dat.M_databuf
/*
- * mbuf flags.
+ * mbuf flags of global significance and layer crossing.
+ * Those of only protocol/layer specific significance are to be mapped
+ * to M_PROTO[1-12] and cleared at layer handoff boundaries.
+ * NB: Limited to the lower 24 bits.
*/
#define M_EXT 0x00000001 /* has associated external storage */
#define M_PKTHDR 0x00000002 /* start of record */
#define M_EOR 0x00000004 /* end of record */
#define M_RDONLY 0x00000008 /* associated data is marked read-only */
-#define M_PROTO1 0x00000010 /* protocol-specific */
-#define M_PROTO2 0x00000020 /* protocol-specific */
-#define M_PROTO3 0x00000040 /* protocol-specific */
-#define M_PROTO4 0x00000080 /* protocol-specific */
-#define M_PROTO5 0x00000100 /* protocol-specific */
-#define M_BCAST 0x00000200 /* send/received as link-level broadcast */
-#define M_MCAST 0x00000400 /* send/received as link-level multicast */
-#define M_FRAG 0x00000800 /* packet is a fragment of a larger packet */
-#define M_FIRSTFRAG 0x00001000 /* packet is first fragment */
-#define M_LASTFRAG 0x00002000 /* packet is last fragment */
-#define M_SKIP_FIREWALL 0x00004000 /* skip firewall processing */
-#define M_FREELIST 0x00008000 /* mbuf is on the free list */
-#define M_VLANTAG 0x00010000 /* ether_vtag is valid */
-#define M_PROMISC 0x00020000 /* packet was not for us */
-#define M_NOFREE 0x00040000 /* do not free mbuf, embedded in cluster */
-#define M_PROTO6 0x00080000 /* protocol-specific */
-#define M_PROTO7 0x00100000 /* protocol-specific */
-#define M_PROTO8 0x00200000 /* protocol-specific */
-#define M_FLOWID 0x00400000 /* deprecated: flowid is valid */
-#define M_HASHTYPEBITS 0x0F000000 /* mask of bits holding flowid hash type */
+#define M_BCAST 0x00000010 /* send/received as link-level broadcast */
+#define M_MCAST 0x00000020 /* send/received as link-level multicast */
+#define M_PROMISC 0x00000040 /* packet was not for us */
+#define M_VLANTAG 0x00000080 /* ether_vtag is valid */
+#define M_UNUSED_8 0x00000100 /* --available-- */
+#define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */
+
+#define M_PROTO1 0x00001000 /* protocol-specific */
+#define M_PROTO2 0x00002000 /* protocol-specific */
+#define M_PROTO3 0x00004000 /* protocol-specific */
+#define M_PROTO4 0x00008000 /* protocol-specific */
+#define M_PROTO5 0x00010000 /* protocol-specific */
+#define M_PROTO6 0x00020000 /* protocol-specific */
+#define M_PROTO7 0x00040000 /* protocol-specific */
+#define M_PROTO8 0x00080000 /* protocol-specific */
+#define M_PROTO9 0x00100000 /* protocol-specific */
+#define M_PROTO10 0x00200000 /* protocol-specific */
+#define M_PROTO11 0x00400000 /* protocol-specific */
+#define M_PROTO12 0x00800000 /* protocol-specific */
+
+#define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */
/*
- * For RELENG_{6,7} steal these flags for limited multiple routing table
- * support. In RELENG_8 and beyond, use just one flag and a tag.
+ * Flags to purge when crossing layers.
*/
-#define M_FIB 0xF0000000 /* steal some bits to store fib number. */
+#define M_PROTOFLAGS \
+ (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
+ M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
-#define M_NOTIFICATION M_PROTO5 /* SCTP notification */
+/*
+ * Flags preserved when copying m_pkthdr.
+ */
+#define M_COPYFLAGS \
+ (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \
+ M_PROTOFLAGS)
/*
- * Flags to purge when crossing layers.
+ * Mbuf flag description for use with printf(9) %b identifier.
*/
-#define M_PROTOFLAGS \
- (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8)
+#define M_FLAG_BITS \
+ "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
+ "\7M_PROMISC\10M_VLANTAG"
+#define M_FLAG_PROTOBITS \
+ "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
+ "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
+ "\27M_PROTO11\30M_PROTO12"
+#define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS)
/*
* Network interface cards are able to hash protocol fields (such as IPv4
@@ -227,88 +318,214 @@ struct mbuf {
*
* Most NICs support RSS, which provides ordering and explicit affinity, and
* use the hash m_flag bits to indicate what header fields were covered by
- * the hash. M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations
- * that provide an opaque flow identifier, allowing for ordering and
- * distribution without explicit affinity.
+ * the hash. M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non-
+ * RSS cards or configurations that provide an opaque flow identifier, allowing
+ * for ordering and distribution without explicit affinity. Additionally,
+ * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash
+ * properties.
*/
-#define M_HASHTYPE_SHIFT 24
-#define M_HASHTYPE_NONE 0x0
-#define M_HASHTYPE_RSS_IPV4 0x1 /* IPv4 2-tuple */
-#define M_HASHTYPE_RSS_TCP_IPV4 0x2 /* TCPv4 4-tuple */
-#define M_HASHTYPE_RSS_IPV6 0x3 /* IPv6 2-tuple */
-#define M_HASHTYPE_RSS_TCP_IPV6 0x4 /* TCPv6 4-tuple */
-#define M_HASHTYPE_RSS_IPV6_EX 0x5 /* IPv6 2-tuple + ext hdrs */
-#define M_HASHTYPE_RSS_TCP_IPV6_EX 0x6 /* TCPv6 4-tiple + ext hdrs */
-#define M_HASHTYPE_OPAQUE 0xf /* ordering, not affinity */
-
-#define M_HASHTYPE_CLEAR(m) (m)->m_flags &= ~(M_HASHTYPEBITS)
-#define M_HASHTYPE_GET(m) (((m)->m_flags & M_HASHTYPEBITS) >> \
- M_HASHTYPE_SHIFT)
-#define M_HASHTYPE_SET(m, v) do { \
- (m)->m_flags &= ~M_HASHTYPEBITS; \
- (m)->m_flags |= ((v) << M_HASHTYPE_SHIFT); \
-} while (0)
+#define M_HASHTYPE_HASHPROP 0x80 /* has hash properties */
+#define M_HASHTYPE_HASH(t) (M_HASHTYPE_HASHPROP | (t))
+/* Microsoft RSS standard hash types */
+#define M_HASHTYPE_NONE 0
+#define M_HASHTYPE_RSS_IPV4 M_HASHTYPE_HASH(1) /* IPv4 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV4 M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */
+#define M_HASHTYPE_RSS_IPV6 M_HASHTYPE_HASH(3) /* IPv6 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV6 M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */
+#define M_HASHTYPE_RSS_IPV6_EX M_HASHTYPE_HASH(5) /* IPv6 2-tuple +
+ * ext hdrs */
+#define M_HASHTYPE_RSS_TCP_IPV6_EX M_HASHTYPE_HASH(6) /* TCPv6 4-tiple +
+ * ext hdrs */
+/* Non-standard RSS hash types */
+#define M_HASHTYPE_RSS_UDP_IPV4 M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/
+#define M_HASHTYPE_RSS_UDP_IPV4_EX M_HASHTYPE_HASH(8) /* IPv4 UDP 4-tuple +
+ * ext hdrs */
+#define M_HASHTYPE_RSS_UDP_IPV6 M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/
+#define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple +
+ * ext hdrs */
+
+#define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */
+#define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE)
+ /* ordering+hash, not affinity*/
+
+#define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0)
+#define M_HASHTYPE_GET(m) ((m)->m_pkthdr.rsstype)
+#define M_HASHTYPE_SET(m, v) ((m)->m_pkthdr.rsstype = (v))
#define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v))
+#define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP)
/*
- * Flags preserved when copying m_pkthdr.
+ * COS/QOS class and quality of service tags.
+ * It uses DSCP code points as base.
*/
-#define M_COPYFLAGS \
- (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\
- M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB|M_HASHTYPEBITS)
+#define QOS_DSCP_CS0 0x00
+#define QOS_DSCP_DEF QOS_DSCP_CS0
+#define QOS_DSCP_CS1 0x20
+#define QOS_DSCP_AF11 0x28
+#define QOS_DSCP_AF12 0x30
+#define QOS_DSCP_AF13 0x38
+#define QOS_DSCP_CS2 0x40
+#define QOS_DSCP_AF21 0x48
+#define QOS_DSCP_AF22 0x50
+#define QOS_DSCP_AF23 0x58
+#define QOS_DSCP_CS3 0x60
+#define QOS_DSCP_AF31 0x68
+#define QOS_DSCP_AF32 0x70
+#define QOS_DSCP_AF33 0x78
+#define QOS_DSCP_CS4 0x80
+#define QOS_DSCP_AF41 0x88
+#define QOS_DSCP_AF42 0x90
+#define QOS_DSCP_AF43 0x98
+#define QOS_DSCP_CS5 0xa0
+#define QOS_DSCP_EF 0xb8
+#define QOS_DSCP_CS6 0xc0
+#define QOS_DSCP_CS7 0xe0
/*
- * External buffer types: identify ext_buf type.
+ * External mbuf storage buffer types.
*/
#define EXT_CLUSTER 1 /* mbuf cluster */
-#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
-#define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */
+#ifndef __rtems__
+#define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */
+#endif /* __rtems__ */
+#define EXT_JUMBOP 3 /* jumbo cluster page sized */
#define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */
#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */
#define EXT_PACKET 6 /* mbuf+cluster from packet zone */
#define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */
-#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
-#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
-#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */
-#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */
+#ifndef __rtems__
+#define EXT_SFBUF_NOCACHE 8 /* sendfile(2)'s sf_buf not to be cached */
+#endif /* __rtems__ */
+
+#define EXT_VENDOR1 224 /* for vendor-internal use */
+#define EXT_VENDOR2 225 /* for vendor-internal use */
+#define EXT_VENDOR3 226 /* for vendor-internal use */
+#define EXT_VENDOR4 227 /* for vendor-internal use */
+
+#define EXT_EXP1 244 /* for experimental use */
+#define EXT_EXP2 245 /* for experimental use */
+#define EXT_EXP3 246 /* for experimental use */
+#define EXT_EXP4 247 /* for experimental use */
+
+#define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */
+#define EXT_MOD_TYPE 253 /* custom module's ext_buf type */
+#define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */
+#define EXT_EXTREF 255 /* has externally maintained ext_cnt ptr */
/*
- * Flags indicating hw checksum support and sw checksum requirements. This
- * field can be directly tested against if_data.ifi_hwassist.
+ * Flags for external mbuf buffer types.
+ * NB: limited to the lower 24 bits.
*/
-#define CSUM_IP 0x0001 /* will csum IP */
-#define CSUM_TCP 0x0002 /* will csum TCP */
-#define CSUM_UDP 0x0004 /* will csum UDP */
-#define CSUM_IP_FRAGS 0x0008 /* removed, left for compat */
-#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
-#define CSUM_TSO 0x0020 /* will do TSO */
-#define CSUM_SCTP 0x0040 /* will csum SCTP */
-#define CSUM_SCTP_IPV6 0x0080 /* will csum IPv6/SCTP */
-
-#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
-#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
-#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
-#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
-#define CSUM_SCTP_VALID 0x1000 /* SCTP checksum is valid */
-#define CSUM_UDP_IPV6 0x2000 /* will csum IPv6/UDP */
-#define CSUM_TCP_IPV6 0x4000 /* will csum IPv6/TCP */
-/* CSUM_TSO_IPV6 0x8000 will do IPv6/TSO */
-
-/* CSUM_FRAGMENT_IPV6 0x10000 will do IPv6 fragementation */
-
-#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
-#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID
+#define EXT_FLAG_EMBREF 0x000001 /* embedded ext_count */
+#define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */
+
+#define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */
-#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
-#define CSUM_DELAY_IP (CSUM_IP) /* Only v4, no v6 IP hdr csum */
+#define EXT_FLAG_VENDOR1 0x010000 /* for vendor-internal use */
+#define EXT_FLAG_VENDOR2 0x020000 /* for vendor-internal use */
+#define EXT_FLAG_VENDOR3 0x040000 /* for vendor-internal use */
+#define EXT_FLAG_VENDOR4 0x080000 /* for vendor-internal use */
+
+#define EXT_FLAG_EXP1 0x100000 /* for experimental use */
+#define EXT_FLAG_EXP2 0x200000 /* for experimental use */
+#define EXT_FLAG_EXP3 0x400000 /* for experimental use */
+#define EXT_FLAG_EXP4 0x800000 /* for experimental use */
+
+/*
+ * EXT flag description for use with printf(9) %b identifier.
+ */
+#define EXT_FLAG_BITS \
+ "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \
+ "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \
+ "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
+ "\30EXT_FLAG_EXP4"
+
+/*
+ * External reference/free functions.
+ */
+void sf_ext_free(void *, void *);
+void sf_ext_free_nocache(void *, void *);
/*
- * mbuf types.
+ * Flags indicating checksum, segmentation and other offload work to be
+ * done, or already done, by hardware or lower layers. It is split into
+ * separate inbound and outbound flags.
+ *
+ * Outbound flags that are set by upper protocol layers requesting lower
+ * layers, or ideally the hardware, to perform these offloading tasks.
+ * For outbound packets this field and its flags can be directly tested
+ * against ifnet if_hwassist.
+ */
+#define CSUM_IP 0x00000001 /* IP header checksum offload */
+#define CSUM_IP_UDP 0x00000002 /* UDP checksum offload */
+#define CSUM_IP_TCP 0x00000004 /* TCP checksum offload */
+#define CSUM_IP_SCTP 0x00000008 /* SCTP checksum offload */
+#define CSUM_IP_TSO 0x00000010 /* TCP segmentation offload */
+#define CSUM_IP_ISCSI 0x00000020 /* iSCSI checksum offload */
+
+#define CSUM_IP6_UDP 0x00000200 /* UDP checksum offload */
+#define CSUM_IP6_TCP 0x00000400 /* TCP checksum offload */
+#define CSUM_IP6_SCTP 0x00000800 /* SCTP checksum offload */
+#define CSUM_IP6_TSO 0x00001000 /* TCP segmentation offload */
+#define CSUM_IP6_ISCSI 0x00002000 /* iSCSI checksum offload */
+
+/* Inbound checksum support where the checksum was verified by hardware. */
+#define CSUM_L3_CALC 0x01000000 /* calculated layer 3 csum */
+#define CSUM_L3_VALID 0x02000000 /* checksum is correct */
+#define CSUM_L4_CALC 0x04000000 /* calculated layer 4 csum */
+#define CSUM_L4_VALID 0x08000000 /* checksum is correct */
+#define CSUM_L5_CALC 0x10000000 /* calculated layer 5 csum */
+#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
+#define CSUM_COALESED 0x40000000 /* contains merged segments */
+
+/*
+ * CSUM flag description for use with printf(9) %b identifier.
+ */
+#define CSUM_BITS \
+ "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \
+ "\6CSUM_IP_ISCSI" \
+ "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
+ "\16CSUM_IP6_ISCSI" \
+ "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
+ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESED"
+
+/* CSUM flags compatibility mappings. */
+#define CSUM_IP_CHECKED CSUM_L3_CALC
+#define CSUM_IP_VALID CSUM_L3_VALID
+#define CSUM_DATA_VALID CSUM_L4_VALID
+#define CSUM_PSEUDO_HDR CSUM_L4_CALC
+#define CSUM_SCTP_VALID CSUM_L4_VALID
+#define CSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP)
+#define CSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */
+#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6)
+#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID
+#define CSUM_TCP CSUM_IP_TCP
+#define CSUM_UDP CSUM_IP_UDP
+#define CSUM_SCTP CSUM_IP_SCTP
+#define CSUM_TSO (CSUM_IP_TSO|CSUM_IP6_TSO)
+#define CSUM_UDP_IPV6 CSUM_IP6_UDP
+#define CSUM_TCP_IPV6 CSUM_IP6_TCP
+#define CSUM_SCTP_IPV6 CSUM_IP6_SCTP
+
+/*
+ * mbuf types describing the content of the mbuf (including external storage).
*/
#define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */
#define MT_DATA 1 /* dynamic (data) allocation */
#define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */
+
+#define MT_VENDOR1 4 /* for vendor-internal use */
+#define MT_VENDOR2 5 /* for vendor-internal use */
+#define MT_VENDOR3 6 /* for vendor-internal use */
+#define MT_VENDOR4 7 /* for vendor-internal use */
+
#define MT_SONAME 8 /* socket name */
+
+#define MT_EXP1 9 /* for experimental use */
+#define MT_EXP2 10 /* for experimental use */
+#define MT_EXP3 11 /* for experimental use */
+#define MT_EXP4 12 /* for experimental use */
+
#define MT_CONTROL 14 /* extra-data protocol message */
#define MT_OOBDATA 15 /* expedited data */
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
@@ -316,55 +533,6 @@ struct mbuf {
#define MT_NOINIT 255 /* Not a type but a flag to allocate
a non-initialized mbuf */
-#define MB_NOTAGS 0x1UL /* no tags attached to mbuf */
-
-/*
- * General mbuf allocator statistics structure.
- *
- * Many of these statistics are no longer used; we instead track many
- * allocator statistics through UMA's built in statistics mechanism.
- */
-struct mbstat {
- u_long m_mbufs; /* XXX */
- u_long m_mclusts; /* XXX */
-
- u_long m_drain; /* times drained protocols for space */
- u_long m_mcfail; /* XXX: times m_copym failed */
- u_long m_mpfail; /* XXX: times m_pullup failed */
- u_long m_msize; /* length of an mbuf */
- u_long m_mclbytes; /* length of an mbuf cluster */
- u_long m_minclsize; /* min length of data to allocate a cluster */
- u_long m_mlen; /* length of data in an mbuf */
- u_long m_mhlen; /* length of data in a header mbuf */
-
- /* Number of mbtypes (gives # elems in mbtypes[] array) */
- short m_numtypes;
-
- /* XXX: Sendfile stats should eventually move to their own struct */
- u_long sf_iocnt; /* times sendfile had to do disk I/O */
- u_long sf_allocfail; /* times sfbuf allocation failed */
- u_long sf_allocwait; /* times sfbuf allocation had to wait */
-};
-
-/*
- * Flags specifying how an allocation should be made.
- *
- * The flag to use is as follows:
- * - M_NOWAIT (M_DONTWAIT) from an interrupt handler to not block allocation.
- * - M_WAITOK (M_WAIT) from wherever it is safe to block.
- *
- * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly and
- * if we cannot allocate immediately we may return NULL, whereas
- * M_WAIT/M_WAITOK means that if we cannot allocate resources we
- * will block until they are available, and thus never return NULL.
- *
- * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT.
- */
-#define MBTOM(how) (how)
-#define M_DONTWAIT M_NOWAIT
-#define M_TRYWAIT M_WAITOK
-#define M_WAIT M_WAITOK
-
/*
* String names of mbuf-related UMA(9) and malloc(9) types. Exposed to
* !_KERNEL so that monitoring tools can look up the zones with
@@ -402,23 +570,53 @@ extern uma_zone_t zone_pack;
extern uma_zone_t zone_jumbop;
extern uma_zone_t zone_jumbo9;
extern uma_zone_t zone_jumbo16;
-extern uma_zone_t zone_ext_refcnt;
-
-static __inline struct mbuf *m_getcl(int how, short type, int flags);
-static __inline struct mbuf *m_get(int how, short type);
-static __inline struct mbuf *m_gethdr(int how, short type);
-static __inline struct mbuf *m_getjcl(int how, short type, int flags,
- int size);
-static __inline struct mbuf *m_getclr(int how, short type); /* XXX */
-static __inline int m_init(struct mbuf *m, uma_zone_t zone,
- int size, int how, short type, int flags);
-static __inline struct mbuf *m_free(struct mbuf *m);
-static __inline void m_clget(struct mbuf *m, int how);
-static __inline void *m_cljget(struct mbuf *m, int how, int size);
-static __inline void m_chtype(struct mbuf *m, short new_type);
-void mb_free_ext(struct mbuf *);
-static __inline struct mbuf *m_last(struct mbuf *m);
-int m_pkthdr_init(struct mbuf *m, int how);
+
+void mb_dupcl(struct mbuf *, struct mbuf *);
+void mb_free_ext(struct mbuf *);
+void m_adj(struct mbuf *, int);
+int m_apply(struct mbuf *, int, int,
+ int (*)(void *, void *, u_int), void *);
+int m_append(struct mbuf *, int, c_caddr_t);
+void m_cat(struct mbuf *, struct mbuf *);
+void m_catpkt(struct mbuf *, struct mbuf *);
+int m_clget(struct mbuf *m, int how);
+void *m_cljget(struct mbuf *m, int how, int size);
+struct mbuf *m_collapse(struct mbuf *, int, int);
+void m_copyback(struct mbuf *, int, int, c_caddr_t);
+void m_copydata(const struct mbuf *, int, int, caddr_t);
+struct mbuf *m_copym(struct mbuf *, int, int, int);
+struct mbuf *m_copypacket(struct mbuf *, int);
+void m_copy_pkthdr(struct mbuf *, struct mbuf *);
+struct mbuf *m_copyup(struct mbuf *, int, int);
+struct mbuf *m_defrag(struct mbuf *, int);
+void m_demote_pkthdr(struct mbuf *);
+void m_demote(struct mbuf *, int, int);
+struct mbuf *m_devget(char *, int, int, struct ifnet *,
+ void (*)(char *, caddr_t, u_int));
+struct mbuf *m_dup(const struct mbuf *, int);
+int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int);
+void m_extadd(struct mbuf *, caddr_t, u_int,
+ void (*)(struct mbuf *, void *, void *), void *, void *,
+ int, int);
+u_int m_fixhdr(struct mbuf *);
+struct mbuf *m_fragment(struct mbuf *, int, int);
+void m_freem(struct mbuf *);
+struct mbuf *m_get2(int, int, short, int);
+struct mbuf *m_getjcl(int, short, int, int);
+struct mbuf *m_getm2(struct mbuf *, int, int, short, int);
+struct mbuf *m_getptr(struct mbuf *, int, int *);
+u_int m_length(struct mbuf *, struct mbuf **);
+int m_mbuftouio(struct uio *, struct mbuf *, int);
+void m_move_pkthdr(struct mbuf *, struct mbuf *);
+int m_pkthdr_init(struct mbuf *, int);
+struct mbuf *m_prepend(struct mbuf *, int, int);
+void m_print(const struct mbuf *, int);
+struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
+struct mbuf *m_pullup(struct mbuf *, int);
+int m_sanity(struct mbuf *, int);
+struct mbuf *m_split(struct mbuf *, int, int);
+struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
+struct mbuf *m_unshare(struct mbuf *, int);
static __inline int
m_gettype(int size)
@@ -444,7 +642,7 @@ m_gettype(int size)
type = EXT_JUMBO16;
break;
default:
- panic("%s: m_getjcl: invalid cluster size", __func__);
+ panic("%s: invalid cluster size %d", __func__, size);
}
return (type);
@@ -455,7 +653,7 @@ m_gettype(int size)
*/
static __inline void
m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt,
- void (*freef)(void *, void *), void *arg1, void *arg2)
+ void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2)
{
KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__));
@@ -463,13 +661,14 @@ m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt,
atomic_add_int((int*)ref_cnt, 1);
m->m_flags |= M_EXT;
m->m_ext.ext_buf = buf;
- m->m_ext.ref_cnt = ref_cnt;
+ m->m_ext.ext_cnt = ref_cnt;
m->m_data = m->m_ext.ext_buf;
m->m_ext.ext_size = size;
m->m_ext.ext_free = freef;
m->m_ext.ext_arg1 = arg1;
m->m_ext.ext_arg2 = arg2;
m->m_ext.ext_type = EXT_EXTREF;
+ m->m_ext.ext_flags = 0;
}
static __inline uma_zone_t
@@ -478,9 +677,6 @@ m_getzone(int size)
uma_zone_t zone;
switch (size) {
- case MSIZE:
- zone = zone_mbuf;
- break;
case MCLBYTES:
zone = zone_clust;
break;
@@ -496,7 +692,7 @@ m_getzone(int size)
zone = zone_jumbo16;
break;
default:
- panic("%s: m_getjcl: invalid cluster type", __func__);
+ panic("%s: invalid cluster size %d", __func__, size);
}
return (zone);
@@ -510,8 +706,7 @@ m_getzone(int size)
* should go away with constant propagation for !MGETHDR.
*/
static __inline int
-m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type,
- int flags)
+m_init(struct mbuf *m, int how, short type, int flags)
{
int error;
@@ -521,182 +716,81 @@ m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type,
m->m_len = 0;
m->m_flags = flags;
m->m_type = type;
- if (flags & M_PKTHDR) {
- if ((error = m_pkthdr_init(m, how)) != 0)
- return (error);
- }
+ if (flags & M_PKTHDR)
+ error = m_pkthdr_init(m, how);
+ else
+ error = 0;
- return (0);
+ MBUF_PROBE5(m__init, m, how, type, flags, error);
+ return (error);
}
static __inline struct mbuf *
m_get(int how, short type)
{
- struct mb_args args;
-
- args.flags = 0;
- args.type = type;
- return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how)));
-}
-
-/*
- * XXX This should be deprecated, very little use.
- */
-static __inline struct mbuf *
-m_getclr(int how, short type)
-{
struct mbuf *m;
struct mb_args args;
args.flags = 0;
args.type = type;
- m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, how);
- if (m != NULL)
- bzero(m->m_data, MLEN);
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ MBUF_PROBE3(m__get, how, type, m);
return (m);
}
static __inline struct mbuf *
m_gethdr(int how, short type)
{
+ struct mbuf *m;
struct mb_args args;
args.flags = M_PKTHDR;
args.type = type;
- return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how)));
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ MBUF_PROBE3(m__gethdr, how, type, m);
+ return (m);
}
static __inline struct mbuf *
m_getcl(int how, short type, int flags)
{
+ struct mbuf *m;
struct mb_args args;
args.flags = flags;
args.type = type;
- return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how)));
-}
-
-/*
- * m_getjcl() returns an mbuf with a cluster of the specified size attached.
- * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
- *
- * XXX: This is rather large, should be real function maybe.
- */
-static __inline struct mbuf *
-m_getjcl(int how, short type, int flags, int size)
-{
- struct mb_args args;
- struct mbuf *m, *n;
- uma_zone_t zone;
-
- if (size == MCLBYTES)
- return m_getcl(how, type, flags);
-
- args.flags = flags;
- args.type = type;
-
- m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, how);
- if (m == NULL)
- return (NULL);
-
- zone = m_getzone(size);
- n = (struct mbuf *)uma_zalloc_arg(zone, m, how);
- if (n == NULL) {
- uma_zfree(zone_mbuf, m);
- return (NULL);
- }
+ m = uma_zalloc_arg(zone_pack, &args, how);
+ MBUF_PROBE4(m__getcl, how, type, flags, m);
return (m);
}
-static __inline void
-m_free_fast(struct mbuf *m)
-{
-#ifdef INVARIANTS
- if (m->m_flags & M_PKTHDR)
- KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags"));
-#endif
-
- uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS);
-}
-
-static __inline struct mbuf *
-m_free(struct mbuf *m)
-{
- struct mbuf *n = m->m_next;
-
- if (m->m_flags & M_EXT)
- mb_free_ext(m);
- else if ((m->m_flags & M_NOFREE) == 0)
- uma_zfree(zone_mbuf, m);
- return (n);
-}
-
-static __inline void
-m_clget(struct mbuf *m, int how)
-{
-
- if (m->m_flags & M_EXT)
- printf("%s: %p mbuf already has cluster\n", __func__, m);
- m->m_ext.ext_buf = (char *)NULL;
- uma_zalloc_arg(zone_clust, m, how);
- /*
- * On a cluster allocation failure, drain the packet zone and retry,
- * we might be able to loosen a few clusters up on the drain.
- */
- if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
- zone_drain(zone_pack);
- uma_zalloc_arg(zone_clust, m, how);
- }
-}
-
/*
- * m_cljget() is different from m_clget() as it can allocate clusters without
- * attaching them to an mbuf. In that case the return value is the pointer
- * to the cluster of the requested size. If an mbuf was specified, it gets
- * the cluster attached to it and the return value can be safely ignored.
- * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
+ * XXX: m_cljset() is a dangerous API. One must attach only a new,
+ * unreferenced cluster to an mbuf(9). It is not possible to assert
+ * that, so care can be taken only by users of the API.
*/
-static __inline void *
-m_cljget(struct mbuf *m, int how, int size)
-{
- uma_zone_t zone;
-
- if (m && m->m_flags & M_EXT)
- printf("%s: %p mbuf already has cluster\n", __func__, m);
- if (m != NULL)
- m->m_ext.ext_buf = NULL;
-
- zone = m_getzone(size);
- return (uma_zalloc_arg(zone, m, how));
-}
-
static __inline void
m_cljset(struct mbuf *m, void *cl, int type)
{
- uma_zone_t zone;
int size;
switch (type) {
case EXT_CLUSTER:
size = MCLBYTES;
- zone = zone_clust;
break;
#if MJUMPAGESIZE != MCLBYTES
case EXT_JUMBOP:
size = MJUMPAGESIZE;
- zone = zone_jumbop;
break;
#endif
case EXT_JUMBO9:
size = MJUM9BYTES;
- zone = zone_jumbo9;
break;
case EXT_JUMBO16:
size = MJUM16BYTES;
- zone = zone_jumbo16;
break;
default:
- panic("unknown cluster type");
+ panic("%s: unknown cluster type %d", __func__, type);
break;
}
@@ -705,9 +799,10 @@ m_cljset(struct mbuf *m, void *cl, int type)
m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
m->m_ext.ext_size = size;
m->m_ext.ext_type = type;
- m->m_ext.ref_cnt = (volatile u_int *) uma_find_refcnt(zone, cl);
+ m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+ m->m_ext.ext_count = 1;
m->m_flags |= M_EXT;
-
+ MBUF_PROBE3(m__cljset, m, cl, type);
}
static __inline void
@@ -717,6 +812,16 @@ m_chtype(struct mbuf *m, short new_type)
m->m_type = new_type;
}
+static __inline void
+m_clrprotoflags(struct mbuf *m)
+{
+
+ while (m) {
+ m->m_flags &= ~M_PROTOFLAGS;
+ m = m->m_next;
+ }
+}
+
static __inline struct mbuf *
m_last(struct mbuf *m)
{
@@ -726,14 +831,14 @@ m_last(struct mbuf *m)
return (m);
}
-extern void (*m_addr_chg_pf_p)(struct mbuf *m);
-
-static __inline void
-m_addr_changed(struct mbuf *m)
+static inline u_int
+m_extrefcnt(struct mbuf *m)
{
- if (m_addr_chg_pf_p)
- m_addr_chg_pf_p(m);
+ KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__));
+
+ return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count :
+ *m->m_ext.ext_cnt);
}
/*
@@ -745,7 +850,8 @@ m_addr_changed(struct mbuf *m)
#define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type)))
#define MCLGET(m, how) m_clget((m), (how))
#define MEXTADD(m, buf, size, free, arg1, arg2, flags, type) \
- m_extadd((m), (caddr_t)(buf), (size), (free),(arg1),(arg2),(flags), (type))
+ m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2), \
+ (flags), (type))
#define m_getm(m, len, how, type) \
m_getm2((m), (len), (how), (type), M_PKTHDR)
@@ -756,7 +862,7 @@ m_addr_changed(struct mbuf *m)
*/
#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \
(!(((m)->m_flags & M_EXT)) || \
- (*((m)->m_ext.ref_cnt) == 1)) ) \
+ (m_extrefcnt(m) == 1)))
/* Check if the supplied mbuf has a packet header, or else panic. */
#define M_ASSERTPKTHDR(m) \
@@ -773,28 +879,50 @@ m_addr_changed(struct mbuf *m)
("%s: attempted use of a free mbuf!", __func__))
/*
- * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an
- * object of the specified size at the end of the mbuf, longword aligned.
+ * Return the address of the start of the buffer associated with an mbuf,
+ * handling external storage, packet-header mbufs, and regular data mbufs.
*/
-#define M_ALIGN(m, len) do { \
- KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)), \
- ("%s: M_ALIGN not normal mbuf", __func__)); \
- KASSERT((m)->m_data == (m)->m_dat, \
- ("%s: M_ALIGN not a virgin mbuf", __func__)); \
- (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \
-} while (0)
+#define M_START(m) \
+ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
+ ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \
+ &(m)->m_dat[0])
/*
- * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by
- * M_DUP/MOVE_PKTHDR.
+ * Return the size of the buffer associated with an mbuf, handling external
+ * storage, packet-header mbufs, and regular data mbufs.
*/
-#define MH_ALIGN(m, len) do { \
- KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT), \
- ("%s: MH_ALIGN not PKTHDR mbuf", __func__)); \
- KASSERT((m)->m_data == (m)->m_pktdat, \
- ("%s: MH_ALIGN not a virgin mbuf", __func__)); \
- (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \
-} while (0)
+#define M_SIZE(m) \
+ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \
+ ((m)->m_flags & M_PKTHDR) ? MHLEN : \
+ MLEN)
+
+/*
+ * Set the m_data pointer of a newly allocated mbuf to place an object of the
+ * specified size at the end of the mbuf, longword aligned.
+ *
+ * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as
+ * separate macros, each asserting that it was called at the proper moment.
+ * This required callers to themselves test the storage type and call the
+ * right one. Rather than require callers to be aware of those layout
+ * decisions, we centralize here.
+ */
+static __inline void
+m_align(struct mbuf *m, int len)
+{
+#ifdef INVARIANTS
+ const char *msg = "%s: not a virgin mbuf";
+#endif
+ int adjust;
+
+ KASSERT(m->m_data == M_START(m), (msg, __func__));
+
+ adjust = M_SIZE(m) - len;
+ m->m_data += adjust &~ (sizeof(long)-1);
+}
+
+#define M_ALIGN(m, len) m_align(m, len)
+#define MH_ALIGN(m, len) m_align(m, len)
+#define MEXT_ALIGN(m, len) m_align(m, len)
/*
* Compute the amount of space available before the current start of data in
@@ -802,24 +930,27 @@ m_addr_changed(struct mbuf *m)
*
* The M_WRITABLE() is a temporary, conservative safety measure: the burden
* of checking writability of the mbuf data area rests solely with the caller.
+ *
+ * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE()
+ * for mbufs with external storage. We now allow mbuf-embedded data to be
+ * read-only as well.
*/
#define M_LEADINGSPACE(m) \
- ((m)->m_flags & M_EXT ? \
- (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \
- (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
- (m)->m_data - (m)->m_dat)
+ (M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0)
/*
* Compute the amount of space available after the end of data in an mbuf.
*
* The M_WRITABLE() is a temporary, conservative safety measure: the burden
* of checking writability of the mbuf data area rests solely with the caller.
+ *
+ * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE()
+ * for mbufs with external storage. We now allow mbuf-embedded data to be
+ * read-only as well.
*/
#define M_TRAILINGSPACE(m) \
- ((m)->m_flags & M_EXT ? \
- (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \
- - ((m)->m_data + (m)->m_len) : 0) : \
- &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
+ (M_WRITABLE(m) ? \
+ ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0)
/*
* Arrange to prepend space of size plen to mbuf m. If a new mbuf must be
@@ -853,57 +984,14 @@ m_addr_changed(struct mbuf *m)
#define M_COPYALL 1000000000
/* Compatibility with 4.3. */
-#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
+#define m_copy(m, o, l) m_copym((m), (o), (l), M_NOWAIT)
extern int max_datalen; /* MHLEN - max_hdr */
extern int max_hdr; /* Largest link + protocol header */
extern int max_linkhdr; /* Largest link-level header */
extern int max_protohdr; /* Largest protocol header */
-extern struct mbstat mbstat; /* General mbuf stats/infos */
extern int nmbclusters; /* Maximum number of clusters */
-struct uio;
-
-void m_adj(struct mbuf *, int);
-void m_align(struct mbuf *, int);
-int m_apply(struct mbuf *, int, int,
- int (*)(void *, void *, u_int), void *);
-int m_append(struct mbuf *, int, c_caddr_t);
-void m_cat(struct mbuf *, struct mbuf *);
-void m_extadd(struct mbuf *, caddr_t, u_int,
- void (*)(void *, void *), void *, void *, int, int);
-struct mbuf *m_collapse(struct mbuf *, int, int);
-void m_copyback(struct mbuf *, int, int, c_caddr_t);
-void m_copydata(const struct mbuf *, int, int, caddr_t);
-struct mbuf *m_copym(struct mbuf *, int, int, int);
-struct mbuf *m_copymdata(struct mbuf *, struct mbuf *,
- int, int, int, int);
-struct mbuf *m_copypacket(struct mbuf *, int);
-void m_copy_pkthdr(struct mbuf *, struct mbuf *);
-struct mbuf *m_copyup(struct mbuf *n, int len, int dstoff);
-struct mbuf *m_defrag(struct mbuf *, int);
-void m_demote(struct mbuf *, int);
-struct mbuf *m_devget(char *, int, int, struct ifnet *,
- void (*)(char *, caddr_t, u_int));
-struct mbuf *m_dup(struct mbuf *, int);
-int m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
-u_int m_fixhdr(struct mbuf *);
-struct mbuf *m_fragment(struct mbuf *, int, int);
-void m_freem(struct mbuf *);
-struct mbuf *m_getm2(struct mbuf *, int, int, short, int);
-struct mbuf *m_getptr(struct mbuf *, int, int *);
-u_int m_length(struct mbuf *, struct mbuf **);
-int m_mbuftouio(struct uio *, struct mbuf *, int);
-void m_move_pkthdr(struct mbuf *, struct mbuf *);
-struct mbuf *m_prepend(struct mbuf *, int, int);
-void m_print(const struct mbuf *, int);
-struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
-struct mbuf *m_pullup(struct mbuf *, int);
-int m_sanity(struct mbuf *, int);
-struct mbuf *m_split(struct mbuf *, int, int);
-struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
-struct mbuf *m_unshare(struct mbuf *, int how);
-
/*-
* Network packets may have annotations attached by affixing a list of
* "packet tags" to the pkthdr structure. Packet tags are dynamically
@@ -975,7 +1063,7 @@ struct mbuf *m_unshare(struct mbuf *, int how);
#define PACKET_TAG_DIVERT 17 /* divert info */
#define PACKET_TAG_IPFORWARD 18 /* ipforward info */
#define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */
-#define PACKET_TAG_PF 21 /* PF + ALTQ information */
+#define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */
#define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */
#define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */
#define PACKET_TAG_CARP 28 /* CARP info */
@@ -991,7 +1079,7 @@ void m_tag_delete_chain(struct mbuf *, struct m_tag *);
void m_tag_free_default(struct m_tag *);
struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
struct m_tag *m_tag_copy(struct m_tag *, int);
-int m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
+int m_tag_copy_chain(struct mbuf *, const struct mbuf *, int);
void m_tag_delete_nonpersistent(struct mbuf *);
/*
@@ -1043,7 +1131,7 @@ m_tag_first(struct mbuf *m)
* Return the next tag in the list of tags associated with an mbuf.
*/
static __inline struct m_tag *
-m_tag_next(struct mbuf *m, struct m_tag *t)
+m_tag_next(struct mbuf *m __unused, struct m_tag *t)
{
return (SLIST_NEXT(t, m_tag_link));
@@ -1085,20 +1173,43 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
-/* XXX temporary FIB methods probably eventually use tags.*/
-#define M_FIBSHIFT 28
-#define M_FIBMASK 0x0F
+static __inline struct mbuf *
+m_free(struct mbuf *m)
+{
+ struct mbuf *n = m->m_next;
-/* get the fib from an mbuf and if it is not set, return the default */
-#define M_GETFIB(_m) \
- ((((_m)->m_flags & M_FIB) >> M_FIBSHIFT) & M_FIBMASK)
+ MBUF_PROBE1(m__free, m);
+ if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
+ m_tag_delete_chain(m, NULL);
+ if (m->m_flags & M_EXT)
+ mb_free_ext(m);
+ else if ((m->m_flags & M_NOFREE) == 0)
+ uma_zfree(zone_mbuf, m);
+ return (n);
+}
+
+static __inline int
+rt_m_getfib(struct mbuf *m)
+{
+ KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf."));
+ return (m->m_pkthdr.fibnum);
+}
+
+#define M_GETFIB(_m) rt_m_getfib(_m)
#define M_SETFIB(_m, _fib) do { \
- _m->m_flags &= ~M_FIB; \
- _m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB); \
+ KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf.")); \
+ ((_m)->m_pkthdr.fibnum) = (_fib); \
} while (0)
-#endif /* _KERNEL */
+/* flags passed as first argument for "m_ether_tcpip_hash()" */
+#define MBUF_HASHFLAG_L2 (1 << 2)
+#define MBUF_HASHFLAG_L3 (1 << 3)
+#define MBUF_HASHFLAG_L4 (1 << 4)
+
+/* mbuf hashing helper routines */
+uint32_t m_ether_tcpip_hash_init(void);
+uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
#ifdef MBUF_PROFILING
void m_profile(struct mbuf *m);
@@ -1107,5 +1218,103 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
#define M_PROFILE(m)
#endif
+struct mbufq {
+ STAILQ_HEAD(, mbuf) mq_head;
+ int mq_len;
+ int mq_maxlen;
+};
+
+static inline void
+mbufq_init(struct mbufq *mq, int maxlen)
+{
+
+ STAILQ_INIT(&mq->mq_head);
+ mq->mq_maxlen = maxlen;
+ mq->mq_len = 0;
+}
+
+static inline struct mbuf *
+mbufq_flush(struct mbufq *mq)
+{
+ struct mbuf *m;
+
+ m = STAILQ_FIRST(&mq->mq_head);
+ STAILQ_INIT(&mq->mq_head);
+ mq->mq_len = 0;
+ return (m);
+}
+
+static inline void
+mbufq_drain(struct mbufq *mq)
+{
+ struct mbuf *m, *n;
+
+ n = mbufq_flush(mq);
+ while ((m = n) != NULL) {
+ n = STAILQ_NEXT(m, m_stailqpkt);
+ m_freem(m);
+ }
+}
+
+static inline struct mbuf *
+mbufq_first(const struct mbufq *mq)
+{
+
+ return (STAILQ_FIRST(&mq->mq_head));
+}
+
+static inline struct mbuf *
+mbufq_last(const struct mbufq *mq)
+{
+
+ return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt));
+}
+
+static inline int
+mbufq_full(const struct mbufq *mq)
+{
+
+ return (mq->mq_len >= mq->mq_maxlen);
+}
+
+static inline int
+mbufq_len(const struct mbufq *mq)
+{
+ return (mq->mq_len);
+}
+
+static inline int
+mbufq_enqueue(struct mbufq *mq, struct mbuf *m)
+{
+
+ if (mbufq_full(mq))
+ return (ENOBUFS);
+ STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt);
+ mq->mq_len++;
+ return (0);
+}
+
+static inline struct mbuf *
+mbufq_dequeue(struct mbufq *mq)
+{
+ struct mbuf *m;
+
+ m = STAILQ_FIRST(&mq->mq_head);
+ if (m) {
+ STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt);
+ m->m_nextpkt = NULL;
+ mq->mq_len--;
+ }
+ return (m);
+}
+
+static inline void
+mbufq_prepend(struct mbufq *mq, struct mbuf *m)
+{
+
+ STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt);
+ mq->mq_len++;
+}
+#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */
diff --git a/freebsd/sys/sys/module.h b/freebsd/sys/sys/module.h
index 782770a7..07464fc6 100644
--- a/freebsd/sys/sys/module.h
+++ b/freebsd/sys/sys/module.h
@@ -35,6 +35,7 @@
#define MDT_DEPEND 1 /* argument is a module name */
#define MDT_MODULE 2 /* module declaration */
#define MDT_VERSION 3 /* module version(s) */
+#define MDT_PNP_INFO 4 /* Plug and play hints record */
#define MDT_STRUCT_VERSION 1 /* version of metadata structure */
#define MDT_SETNAME "modmetadata_set"
@@ -70,7 +71,7 @@ typedef union modspecific {
} modspecific_t;
/*
- * Module dependency declarartion
+ * Module dependency declaration
*/
struct mod_depend {
int md_ver_minimum;
@@ -88,10 +89,19 @@ struct mod_version {
struct mod_metadata {
int md_version; /* structure version MDTV_* */
int md_type; /* type of entry MDT_* */
- void *md_data; /* specific data */
+ const void *md_data; /* specific data */
const char *md_cval; /* common string label */
};
+struct mod_pnp_match_info
+{
+ const char *descr; /* Description of the table */
+ const char *bus; /* Name of the bus for this table */
+ const void *table; /* Pointer to pnp table */
+ int entry_len; /* Length of each entry in the table (may be */
+ /* longer than descr describes). */
+ int num_entry; /* Number of entries in the table */
+};
#ifdef _KERNEL
#include <sys/linker_set.h>
@@ -106,7 +116,8 @@ struct mod_metadata {
DATA_SET(modmetadata_set, _mod_metadata##uniquifier)
#define MODULE_DEPEND(module, mdepend, vmin, vpref, vmax) \
- static struct mod_depend _##module##_depend_on_##mdepend = { \
+ static struct mod_depend _##module##_depend_on_##mdepend \
+ __section(".data") = { \
vmin, \
vpref, \
vmax \
@@ -146,12 +157,51 @@ struct mod_metadata {
DECLARE_MODULE_WITH_MAXVER(name, data, sub, order, __FreeBSD_version)
#define MODULE_VERSION(module, version) \
- static struct mod_version _##module##_version = { \
+ static struct mod_version _##module##_version \
+ __section(".data") = { \
version \
}; \
MODULE_METADATA(_##module##_version, MDT_VERSION, \
&_##module##_version, #module)
+/**
+ * Generic macros to create pnp info hints that modules may export
+ * to allow external tools to parse their internal device tables
+ * to make an informed guess about what driver(s) to load.
+ */
+#define MODULE_PNP_INFO(d, b, unique, t, l, n) \
+ static const struct mod_pnp_match_info _module_pnp_##b##_##unique = { \
+ .descr = d, \
+ .bus = #b, \
+ .table = t, \
+ .entry_len = l, \
+ .num_entry = n \
+ }; \
+ MODULE_METADATA(_md_##b##_pnpinfo_##unique, MDT_PNP_INFO, \
+ &_module_pnp_##b##_##unique, #b);
+/**
+ * descr is a string that describes each entry in the table. The general
+ * form is (TYPE:pnp_name[/pnp_name];)*
+ * where TYPE is one of the following:
+ * U8 uint8_t element
+ * V8 like U8 and 0xff means match any
+ * G16 uint16_t element, any value >= matches
+ * L16 uint16_t element, any value <= matches
+ * M16 uint16_t element, mask of which of the following fields to use.
+ * U16 uint16_t element
+ * V16 like U16 and 0xffff means match any
+ * U32 uint32_t element
+ * V32 like U32 and 0xffffffff means match any
+ * W32 Two 16-bit values with first pnp_name in LSW and second in MSW.
+ * Z pointer to a string to match exactly
+ * D like Z, but is the string passed to device_set_descr()
+ * P A pointer that should be ignored
+ * E EISA PNP Identifier (in binary, but bus publishes string)
+ * K Key for whole table. pnp_name=value. must be last, if present.
+ *
+ * The pnp_name "#" is reserved for other fields that should be ignored.
+ */
+
extern struct sx modules_sx;
#define MOD_XLOCK sx_xlock(&modules_sx)
diff --git a/freebsd/sys/sys/mount.h b/freebsd/sys/sys/mount.h
index 0001016b..49e688f3 100644
--- a/freebsd/sys/sys/mount.h
+++ b/freebsd/sys/sys/mount.h
@@ -39,6 +39,7 @@
#include <rtems/bsd/sys/lock.h>
#include <sys/lockmgr.h>
#include <sys/_mutex.h>
+#include <sys/_sx.h>
#endif
/*
@@ -170,7 +171,6 @@ struct mount {
int mnt_writeopcount; /* (i) write syscalls pending */
int mnt_kern_flag; /* (i) kernel only flags */
uint64_t mnt_flag; /* (i) flags shared with user */
- u_int mnt_pad_noasync;
struct vfsoptlist *mnt_opt; /* current mount options */
struct vfsoptlist *mnt_optnew; /* new options passed to fs */
int mnt_maxsymlinklen; /* max size of short symlink */
@@ -226,29 +226,6 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \
__mnt_vnode_markerfree_active(&(mvp), (mp))
-/*
- * Definitions for MNT_VNODE_FOREACH.
- *
- * This interface has been deprecated in favor of MNT_VNODE_FOREACH_ALL.
- */
-struct vnode *__mnt_vnode_next(struct vnode **mvp, struct mount *mp);
-struct vnode *__mnt_vnode_first(struct vnode **mvp, struct mount *mp);
-void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
-
-#define MNT_VNODE_FOREACH(vp, mp, mvp) \
- for (vp = __mnt_vnode_first(&(mvp), (mp)); \
- (vp) != NULL; vp = __mnt_vnode_next(&(mvp), (mp)))
-
-#define MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp) \
- __mnt_vnode_markerfree(&(mvp), (mp))
-
-#define MNT_VNODE_FOREACH_ABORT(mp, mvp) \
- do { \
- MNT_ILOCK(mp); \
- MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); \
- MNT_IUNLOCK(mp); \
- } while (0)
-
#define MNT_ILOCK(mp) mtx_lock(&(mp)->mnt_mtx)
#define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx)
#define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx)
@@ -283,6 +260,7 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
#define MNT_NOCLUSTERR 0x0000000040000000ULL /* disable cluster read */
#define MNT_NOCLUSTERW 0x0000000080000000ULL /* disable cluster write */
#define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */
+#define MNT_AUTOMOUNTED 0x0000000200000000ULL /* mounted by automountd(8) */
/*
* NFS export related mount flags.
@@ -319,7 +297,7 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \
MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \
MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \
- MNT_NFS4ACLS)
+ MNT_NFS4ACLS | MNT_AUTOMOUNTED)
/* Mask of flags that can be updated. */
#define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \
@@ -327,23 +305,28 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
MNT_NOATIME | \
MNT_NOSYMFOLLOW | MNT_IGNORE | \
MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | \
- MNT_ACLS | MNT_USER | MNT_NFS4ACLS)
+ MNT_ACLS | MNT_USER | MNT_NFS4ACLS | \
+ MNT_AUTOMOUNTED)
/*
* External filesystem command modifier flags.
* Unmount can use the MNT_FORCE flag.
* XXX: These are not STATES and really should be somewhere else.
- * XXX: MNT_BYFSID collides with MNT_ACLS, but because MNT_ACLS is only used for
- * mount(2) and MNT_BYFSID is only used for unmount(2) it's harmless.
+ * XXX: MNT_BYFSID and MNT_NONBUSY collide with MNT_ACLS and MNT_MULTILABEL,
+ * but because MNT_ACLS and MNT_MULTILABEL are only used for mount(2),
+ * and MNT_BYFSID and MNT_NONBUSY are only used for unmount(2),
+ * it's harmless.
*/
#define MNT_UPDATE 0x0000000000010000ULL /* not real mount, just update */
#define MNT_DELEXPORT 0x0000000000020000ULL /* delete export host lists */
#define MNT_RELOAD 0x0000000000040000ULL /* reload filesystem data */
#define MNT_FORCE 0x0000000000080000ULL /* force unmount or readonly */
#define MNT_SNAPSHOT 0x0000000001000000ULL /* snapshot the filesystem */
+#define MNT_NONBUSY 0x0000000004000000ULL /* check vnode use counts. */
#define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */
#define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \
- MNT_FORCE | MNT_SNAPSHOT | MNT_BYFSID)
+ MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \
+ MNT_BYFSID)
/*
* Internal filesystem control flags stored in mnt_kern_flag.
*
@@ -376,18 +359,32 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
#define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800
#define MNTK_MARKER 0x00001000
#define MNTK_UNMAPPED_BUFS 0x00002000
+#define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */
#define MNTK_NOASYNC 0x00800000 /* disable async */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
#define MNTK_SUSPEND 0x08000000 /* request write suspension */
#define MNTK_SUSPEND2 0x04000000 /* block secondary writes */
#define MNTK_SUSPENDED 0x10000000 /* write operations are suspended */
-#define MNTK_MPSAFE 0x20000000 /* Filesystem is MPSAFE. */
+#define MNTK_UNUSED1 0x20000000
#define MNTK_LOOKUP_SHARED 0x40000000 /* FS supports shared lock lookups */
#define MNTK_NOKNOTE 0x80000000 /* Don't send KNOTEs from VOP hooks */
-#define MNT_SHARED_WRITES(mp) (((mp) != NULL) && \
- ((mp)->mnt_kern_flag & MNTK_SHARED_WRITES))
+#ifdef _KERNEL
+static inline int
+MNT_SHARED_WRITES(struct mount *mp)
+{
+
+ return (mp != NULL && (mp->mnt_kern_flag & MNTK_SHARED_WRITES) != 0);
+}
+
+static inline int
+MNT_EXTENDED_SHARED(struct mount *mp)
+{
+
+ return (mp != NULL && (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED) != 0);
+}
+#endif
/*
* Sysctl CTL_VFS definitions.
@@ -597,7 +594,6 @@ struct uio;
MALLOC_DECLARE(M_MOUNT);
#endif
extern int maxvfsconf; /* highest defined filesystem type */
-extern int nfs_mount_type; /* vfc_typenum for nfs, or -1 */
TAILQ_HEAD(vfsconfhead, vfsconf);
extern struct vfsconfhead vfsconf;
@@ -633,6 +629,7 @@ typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
struct sysctl_req *req);
typedef void vfs_susp_clean_t(struct mount *mp);
typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
+typedef void vfs_purge_t(struct mount *mp);
struct vfsops {
vfs_mount_t *vfs_mount;
@@ -652,54 +649,23 @@ struct vfsops {
vfs_susp_clean_t *vfs_susp_clean;
vfs_notify_lowervp_t *vfs_reclaim_lowervp;
vfs_notify_lowervp_t *vfs_unlink_lowervp;
+ vfs_purge_t *vfs_purge;
+ vfs_mount_t *vfs_spare[6]; /* spares for ABI compat */
};
vfs_statfs_t __vfs_statfs;
-#define VFS_NEEDSGIANT_(MP) \
- ((MP) != NULL && ((MP)->mnt_kern_flag & MNTK_MPSAFE) == 0)
-
-#define VFS_NEEDSGIANT(MP) __extension__ \
-({ \
- struct mount *_mp; \
- _mp = (MP); \
- VFS_NEEDSGIANT_(_mp); \
-})
-
-#define VFS_LOCK_GIANT(MP) __extension__ \
-({ \
- int _locked; \
- struct mount *_mp; \
- _mp = (MP); \
- if (VFS_NEEDSGIANT_(_mp)) { \
- mtx_lock(&Giant); \
- _locked = 1; \
- } else \
- _locked = 0; \
- _locked; \
-})
-#define VFS_UNLOCK_GIANT(locked) do \
-{ \
- if ((locked)) \
- mtx_unlock(&Giant); \
-} while (0)
-#define VFS_ASSERT_GIANT(MP) do \
-{ \
- struct mount *_mp; \
- _mp = (MP); \
- if (VFS_NEEDSGIANT_(_mp)) \
- mtx_assert(&Giant, MA_OWNED); \
-} while (0)
-
#define VFS_PROLOGUE(MP) do { \
- int _enable_stops; \
+ struct mount *mp__; \
+ int _prev_stops; \
\
- _enable_stops = ((MP) != NULL && \
- ((MP)->mnt_vfc->vfc_flags & VFCF_SBDRY) && sigdeferstop())
+ mp__ = (MP); \
+ _prev_stops = sigdeferstop((mp__ != NULL && \
+ (mp__->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0) ? \
+ SIGDEFERSTOP_SILENT : SIGDEFERSTOP_NOP);
#define VFS_EPILOGUE(MP) \
- if (_enable_stops) \
- sigallowstop(); \
+ sigallowstop(_prev_stops); \
} while (0)
#define VFS_MOUNT(MP) ({ \
@@ -815,6 +781,14 @@ vfs_statfs_t __vfs_statfs;
} \
} while (0)
+#define VFS_PURGE(MP) do { \
+ if (*(MP)->mnt_op->vfs_purge != NULL) { \
+ VFS_PROLOGUE(MP); \
+ (*(MP)->mnt_op->vfs_purge)(MP); \
+ VFS_EPILOGUE(MP); \
+ } \
+} while (0)
+
#define VFS_KNOTE_LOCKED(vp, hint) do \
{ \
if (((vp)->v_vflag & VV_NOKNOTE) == 0) \
@@ -836,7 +810,8 @@ vfs_statfs_t __vfs_statfs;
* Version numbers.
*/
#define VFS_VERSION_00 0x19660120
-#define VFS_VERSION VFS_VERSION_00
+#define VFS_VERSION_01 0x20121030
+#define VFS_VERSION VFS_VERSION_01
#define VFS_SET(vfsops, fsname, flags) \
static struct vfsconf fsname ## _vfsconf = { \
@@ -853,8 +828,6 @@ vfs_statfs_t __vfs_statfs;
}; \
DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE)
-extern char *mountrootfsname;
-
/*
* exported vnode operations
*/
@@ -879,6 +852,8 @@ int vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
uint64_t val);
int vfs_getopt(struct vfsoptlist *, const char *, void **, int *);
int vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
+int vfs_getopt_size(struct vfsoptlist *opts, const char *name,
+ off_t *value);
char *vfs_getopts(struct vfsoptlist *, const char *, int *error);
int vfs_copyopt(struct vfsoptlist *, const char *, void *, int);
int vfs_filteropt(struct vfsoptlist *, const char **legal);
@@ -921,11 +896,17 @@ void vfs_unmountall(void);
extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */
extern struct mtx mountlist_mtx;
extern struct nfs_public nfs_pub;
+extern struct sx vfsconf_sx;
+#define vfsconf_lock() sx_xlock(&vfsconf_sx)
+#define vfsconf_unlock() sx_xunlock(&vfsconf_sx)
+#define vfsconf_slock() sx_slock(&vfsconf_sx)
+#define vfsconf_sunlock() sx_sunlock(&vfsconf_sx)
/*
* Declarations for these vfs default operations are located in
- * kern/vfs_default.c, they should be used instead of making "dummy"
- * functions or casting entries in the VFS op table to "enopnotsupp()".
+ * kern/vfs_default.c. They will be automatically used to replace
+ * null entries in VFS ops tables when registering a new filesystem
+ * type in the global table.
*/
vfs_root_t vfs_stdroot;
vfs_quotactl_t vfs_stdquotactl;
@@ -940,6 +921,9 @@ vfs_uninit_t vfs_stduninit;
vfs_extattrctl_t vfs_stdextattrctl;
vfs_sysctl_t vfs_stdsysctl;
+void syncer_suspend(void);
+void syncer_resume(void);
+
#else /* !_KERNEL */
#include <sys/cdefs.h>
diff --git a/freebsd/sys/sys/mutex.h b/freebsd/sys/sys/mutex.h
index 0e356e15..84feea7c 100644
--- a/freebsd/sys/sys/mutex.h
+++ b/freebsd/sys/sys/mutex.h
@@ -55,6 +55,7 @@
#define MTX_RECURSE 0x00000004 /* Option: lock allowed to recurse */
#define MTX_NOWITNESS 0x00000008 /* Don't do any witness checking. */
#define MTX_NOPROFILE 0x00000020 /* Don't profile this lock */
+#define MTX_NEW 0x00000040 /* Don't check for double-init */
/*
* Option flags passed to certain lock/unlock routines, through the use
@@ -82,7 +83,8 @@
*
* NOTE: Functions prepended with `_' (underscore) are exported to other parts
* of the kernel via macros, thus allowing us to use the cpp LOCK_FILE
- * and LOCK_LINE. These functions should not be called directly by any
+ * and LOCK_LINE or for hiding the lock cookie crunching to the
+ * consumers. These functions should not be called directly by any
* code using the API. Their macros cover their functionality.
* Functions with a `_' suffix are the entrypoint for the common
* KPI covering both compat shims and fast path case. These can be
@@ -92,52 +94,102 @@
* [See below for descriptions]
*
*/
-void mtx_init(struct mtx *m, const char *name, const char *type, int opts);
-void mtx_destroy(struct mtx *m);
+#ifndef __rtems__
+void _mtx_init(volatile uintptr_t *c, const char *name, const char *type,
+ int opts);
+void _mtx_destroy(volatile uintptr_t *c);
+#endif /* __rtems__ */
void mtx_sysinit(void *arg);
+#ifndef __rtems__
+int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file,
+ int line);
void mutex_init(void);
-void _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts,
+void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
const char *file, int line);
-void _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line);
+void __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file,
+ int line);
#ifdef SMP
-void _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts,
+void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
const char *file, int line);
#endif
-void _mtx_unlock_spin(struct mtx *m, int opts, const char *file, int line);
-int _mtx_trylock(struct mtx *m, int opts, const char *file, int line);
+void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file,
+ int line);
+void __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file,
+ int line);
+void __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
+ int line);
+int __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts,
+ const char *file, int line);
+void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts,
+ const char *file, int line);
+#else /* __rtems__ */
+void mtx_init(struct mtx *m, const char *name, const char *type, int opts);
+void mtx_destroy(struct mtx *m);
+void mtx_sysinit(void *arg);
+int mtx_trylock_flags_(struct mtx *m, int opts, const char *file, int line);
void _mtx_lock_flags(struct mtx *m, int opts, const char *file, int line);
void _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line);
+#define _mtx_lock_spin_flags _mtx_lock_flags
+#define _mtx_unlock_spin_flags _mtx_unlock_flags
+#endif /* __rtems__ */
+#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
#ifndef __rtems__
-void _mtx_lock_spin_flags(struct mtx *m, int opts, const char *file,
- int line);
-void _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file,
- int line);
+void __mtx_assert(const volatile uintptr_t *c, int what, const char *file,
+ int line);
#else /* __rtems__ */
-#define _mtx_lock_spin_flags _mtx_lock_flags
-#define _mtx_unlock_spin_flags _mtx_unlock_flags
-#endif /* __rtems__ */
-#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
void _mtx_assert(struct mtx *m, int what, const char *file, int line);
+#endif /* __rtems__ */
#endif
-void _thread_lock_flags(struct thread *, int, const char *, int);
-
-#define mtx_trylock_flags_(m, opts, file, line) \
- _mtx_trylock((m), (opts), (file), (line))
-
#ifndef __rtems__
-#define thread_lock_flags_(tdp, opts, file, line) \
- _thread_lock_flags((tdp), (opts), (file), (line))
+void thread_lock_flags_(struct thread *, int, const char *, int);
+
#define thread_lock(tdp) \
- _thread_lock_flags((tdp), 0, __FILE__, __LINE__)
+ thread_lock_flags_((tdp), 0, __FILE__, __LINE__)
#define thread_lock_flags(tdp, opt) \
- _thread_lock_flags((tdp), (opt), __FILE__, __LINE__)
+ thread_lock_flags_((tdp), (opt), __FILE__, __LINE__)
#define thread_unlock(tdp) \
mtx_unlock_spin((tdp)->td_lock)
-#else
+#else /* __rtems__ */
#define thread_lock(tdp)
#define thread_lock_flags(tdp, opt)
#define thread_unlock(tdp)
+#endif /* __rtems__ */
+
+#ifndef __rtems__
+/*
+ * Top-level macros to provide lock cookie once the actual mtx is passed.
+ * They will also prevent passing a malformed object to the mtx KPI by
+ * failing compilation as the mtx_lock reserved member will not be found.
+ */
+#define mtx_init(m, n, t, o) \
+ _mtx_init(&(m)->mtx_lock, n, t, o)
+#define mtx_destroy(m) \
+ _mtx_destroy(&(m)->mtx_lock)
+#define mtx_trylock_flags_(m, o, f, l) \
+ _mtx_trylock_flags_(&(m)->mtx_lock, o, f, l)
+#define _mtx_lock_sleep(m, t, o, f, l) \
+ __mtx_lock_sleep(&(m)->mtx_lock, t, o, f, l)
+#define _mtx_unlock_sleep(m, o, f, l) \
+ __mtx_unlock_sleep(&(m)->mtx_lock, o, f, l)
+#ifdef SMP
+#define _mtx_lock_spin(m, t, o, f, l) \
+ _mtx_lock_spin_cookie(&(m)->mtx_lock, t, o, f, l)
+#endif
+#define _mtx_lock_flags(m, o, f, l) \
+ __mtx_lock_flags(&(m)->mtx_lock, o, f, l)
+#define _mtx_unlock_flags(m, o, f, l) \
+ __mtx_unlock_flags(&(m)->mtx_lock, o, f, l)
+#define _mtx_lock_spin_flags(m, o, f, l) \
+ __mtx_lock_spin_flags(&(m)->mtx_lock, o, f, l)
+#define _mtx_trylock_spin_flags(m, o, f, l) \
+ __mtx_trylock_spin_flags(&(m)->mtx_lock, o, f, l)
+#define _mtx_unlock_spin_flags(m, o, f, l) \
+ __mtx_unlock_spin_flags(&(m)->mtx_lock, o, f, l)
+#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
+#define _mtx_assert(m, w, f, l) \
+ __mtx_assert(&(m)->mtx_lock, w, f, l)
#endif
+#endif /* __rtems__ */
#define mtx_recurse lock_object.lo_data
@@ -165,11 +217,11 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
#define __mtx_lock(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
\
- if (!_mtx_obtain_lock((mp), _tid)) \
+ if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid)))\
_mtx_lock_sleep((mp), _tid, (opts), (file), (line)); \
else \
- LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE, \
- mp, 0, 0, (file), (line)); \
+ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, \
+ mp, 0, 0, file, line); \
} while (0)
/*
@@ -183,15 +235,30 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
uintptr_t _tid = (uintptr_t)(tid); \
\
spinlock_enter(); \
- if (!_mtx_obtain_lock((mp), _tid)) { \
+ if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\
if ((mp)->mtx_lock == _tid) \
(mp)->mtx_recurse++; \
else \
_mtx_lock_spin((mp), _tid, (opts), (file), (line)); \
} else \
- LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, \
- mp, 0, 0, (file), (line)); \
+ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, \
+ mp, 0, 0, file, line); \
} while (0)
+#define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__ ({ \
+ uintptr_t _tid = (uintptr_t)(tid); \
+ int _ret; \
+ \
+ spinlock_enter(); \
+ if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\
+ spinlock_exit(); \
+ _ret = 0; \
+ } else { \
+ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, \
+ mp, 0, 0, file, line); \
+ _ret = 1; \
+ } \
+ _ret; \
+})
#else /* SMP */
#define __mtx_lock_spin(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
@@ -204,13 +271,29 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
(mp)->mtx_lock = _tid; \
} \
} while (0)
+#define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__ ({ \
+ uintptr_t _tid = (uintptr_t)(tid); \
+ int _ret; \
+ \
+ spinlock_enter(); \
+ if ((mp)->mtx_lock != MTX_UNOWNED) { \
+ spinlock_exit(); \
+ _ret = 0; \
+ } else { \
+ (mp)->mtx_lock = _tid; \
+ _ret = 1; \
+ } \
+ _ret; \
+})
#endif /* SMP */
/* Unlock a normal mutex. */
#define __mtx_unlock(mp, tid, opts, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
\
- if (!_mtx_release_lock((mp), _tid)) \
+ if ((mp)->mtx_recurse == 0) \
+ LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp); \
+ if ((mp)->mtx_lock != _tid || !_mtx_release_lock((mp), _tid)) \
_mtx_unlock_sleep((mp), (opts), (file), (line)); \
} while (0)
@@ -229,21 +312,19 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
if (mtx_recursed((mp))) \
(mp)->mtx_recurse--; \
else { \
- LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_SPIN_UNLOCK_RELEASE, \
- mp); \
+ LOCKSTAT_PROFILE_RELEASE_LOCK(spin__release, mp); \
_mtx_release_lock_quick((mp)); \
- } \
- spinlock_exit(); \
+ } \
+ spinlock_exit(); \
} while (0)
#else /* SMP */
#define __mtx_unlock_spin(mp) do { \
if (mtx_recursed((mp))) \
(mp)->mtx_recurse--; \
else { \
- LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_SPIN_UNLOCK_RELEASE, \
- mp); \
+ LOCKSTAT_PROFILE_RELEASE_LOCK(spin__release, mp); \
(mp)->mtx_lock = MTX_UNOWNED; \
- } \
+ } \
spinlock_exit(); \
} while (0)
#endif /* SMP */
@@ -273,6 +354,10 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
* mtx_trylock_flags(m, opts) is used the same way as mtx_trylock() but accepts
* relevant option flags `opts.'
*
+ * mtx_trylock_spin(m) attempts to acquire MTX_SPIN mutex `m' but doesn't
+ * spin if it cannot. Rather, it returns 0 on failure and non-zero on
+ * success. It always returns failure for recursed lock attempts.
+ *
* mtx_initialized(m) returns non-zero if the lock `m' has been initialized.
*
* mtx_owned(m) returns non-zero if the current thread owns the lock `m'
@@ -282,6 +367,7 @@ void _thread_lock_flags(struct thread *, int, const char *, int);
#define mtx_lock(m) mtx_lock_flags((m), 0)
#define mtx_lock_spin(m) mtx_lock_spin_flags((m), 0)
#define mtx_trylock(m) mtx_trylock_flags((m), 0)
+#define mtx_trylock_spin(m) mtx_trylock_spin_flags((m), 0)
#define mtx_unlock(m) mtx_unlock_flags((m), 0)
#define mtx_unlock_spin(m) mtx_unlock_spin_flags((m), 0)
@@ -301,12 +387,8 @@ struct mtx *mtx_pool_alloc(struct mtx_pool *pool);
mtx_unlock_spin(mtx_pool_find((pool), (ptr)))
/*
- * mtxpool_lockbuilder is a pool of sleep locks that is not witness
- * checked and should only be used for building higher level locks.
- *
* mtxpool_sleep is a general purpose pool of sleep mutexes.
*/
-extern struct mtx_pool *mtxpool_lockbuilder;
extern struct mtx_pool *mtxpool_sleep;
#ifndef LOCK_DEBUG
@@ -319,6 +401,8 @@ extern struct mtx_pool *mtxpool_sleep;
_mtx_unlock_flags((m), (opts), (file), (line))
#define mtx_lock_spin_flags_(m, opts, file, line) \
_mtx_lock_spin_flags((m), (opts), (file), (line))
+#define mtx_trylock_spin_flags_(m, opts, file, line) \
+ _mtx_trylock_spin_flags((m), (opts), (file), (line))
#define mtx_unlock_spin_flags_(m, opts, file, line) \
_mtx_unlock_spin_flags((m), (opts), (file), (line))
#else /* LOCK_DEBUG == 0 && !MUTEX_NOINLINE */
@@ -328,6 +412,8 @@ extern struct mtx_pool *mtxpool_sleep;
__mtx_unlock((m), curthread, (opts), (file), (line))
#define mtx_lock_spin_flags_(m, opts, file, line) \
__mtx_lock_spin((m), curthread, (opts), (file), (line))
+#define mtx_trylock_spin_flags_(m, opts, file, line) \
+ __mtx_trylock_spin((m), curthread, (opts), (file), (line))
#define mtx_unlock_spin_flags_(m, opts, file, line) \
__mtx_unlock_spin((m))
#endif /* LOCK_DEBUG > 0 || MUTEX_NOINLINE */
@@ -353,13 +439,16 @@ extern struct mtx_pool *mtxpool_sleep;
mtx_unlock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
#define mtx_trylock_flags(m, opts) \
mtx_trylock_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
+#define mtx_trylock_spin_flags(m, opts) \
+ mtx_trylock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
#define mtx_assert(m, what) \
mtx_assert_((m), (what), __FILE__, __LINE__)
#define mtx_sleep(chan, mtx, pri, wmesg, timo) \
- _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo))
+ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), \
+ tick_sbt * (timo), 0, C_HARDCLOCK)
-#define mtx_initialized(m) lock_initalized(&(m)->lock_object)
+#define mtx_initialized(m) lock_initialized(&(m)->lock_object)
#ifndef __rtems__
#define mtx_owned(m) (((m)->mtx_lock & ~MTX_FLAGMASK) == (uintptr_t)curthread)
@@ -411,14 +500,8 @@ do { \
}
#endif
-#define UGAR(rval) do { \
- int _val = (rval); \
- mtx_unlock(&Giant); \
- return (_val); \
-} while (0)
-
struct mtx_args {
- struct mtx *ma_mtx;
+ void *ma_mtx;
const char *ma_desc;
int ma_opts;
};
@@ -432,7 +515,7 @@ struct mtx_args {
SYSINIT(name##_mtx_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
mtx_sysinit, &name##_args); \
SYSUNINIT(name##_mtx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
- mtx_destroy, (mtx))
+ _mtx_destroy, __DEVOLATILE(void *, &(mtx)->mtx_lock))
/*
* The INVARIANTS-enabled mtx_assert() functionality.
diff --git a/freebsd/sys/sys/nlist_aout.h b/freebsd/sys/sys/nlist_aout.h
index fc7a3c78..cb3dd859 100644
--- a/freebsd/sys/sys/nlist_aout.h
+++ b/freebsd/sys/sys/nlist_aout.h
@@ -56,8 +56,6 @@ struct nlist {
} n_un;
#else
const char *n_name; /* symbol name (in memory) */
- int : 8 * (sizeof(long) > sizeof(char *) ?
- sizeof(long) - sizeof(char *) : sizeof(char *) - sizeof(long));
#endif
unsigned char n_type; /* type defines */
char n_other; /* ".type" and binding information */
diff --git a/freebsd/sys/sys/osd.h b/freebsd/sys/sys/osd.h
index 14316ae3..c838e97d 100644
--- a/freebsd/sys/sys/osd.h
+++ b/freebsd/sys/sys/osd.h
@@ -59,6 +59,10 @@ int osd_register(u_int type, osd_destructor_t destructor,
void osd_deregister(u_int type, u_int slot);
int osd_set(u_int type, struct osd *osd, u_int slot, void *value);
+void **osd_reserve(u_int slot);
+int osd_set_reserved(u_int type, struct osd *osd, u_int slot, void **rsv,
+ void *value);
+void osd_free_reserved(void **rsv);
void *osd_get(u_int type, struct osd *osd, u_int slot);
void osd_del(u_int type, struct osd *osd, u_int slot);
int osd_call(u_int type, u_int method, void *obj, void *data);
@@ -71,6 +75,8 @@ void osd_exit(u_int type, struct osd *osd);
osd_deregister(OSD_THREAD, (slot))
#define osd_thread_set(td, slot, value) \
osd_set(OSD_THREAD, &(td)->td_osd, (slot), (value))
+#define osd_thread_set_reserved(td, slot, rsv, value) \
+ osd_set_reserved(OSD_THREAD, &(td)->td_osd, (slot), (rsv), (value))
#define osd_thread_get(td, slot) \
osd_get(OSD_THREAD, &(td)->td_osd, (slot))
#define osd_thread_del(td, slot) do { \
@@ -88,6 +94,8 @@ void osd_exit(u_int type, struct osd *osd);
osd_deregister(OSD_JAIL, (slot))
#define osd_jail_set(pr, slot, value) \
osd_set(OSD_JAIL, &(pr)->pr_osd, (slot), (value))
+#define osd_jail_set_reserved(pr, slot, rsv, value) \
+ osd_set_reserved(OSD_JAIL, &(pr)->pr_osd, (slot), (rsv), (value))
#define osd_jail_get(pr, slot) \
osd_get(OSD_JAIL, &(pr)->pr_osd, (slot))
#define osd_jail_del(pr, slot) \
diff --git a/freebsd/sys/sys/pcpu.h b/freebsd/sys/sys/pcpu.h
index ec3f9f94..2d3f3411 100644
--- a/freebsd/sys/sys/pcpu.h
+++ b/freebsd/sys/sys/pcpu.h
@@ -38,7 +38,11 @@
#endif
#include <sys/_cpuset.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#include <sys/_sx.h>
#include <sys/queue.h>
+#include <sys/_rmlock.h>
#include <sys/vmmeter.h>
#include <rtems/bsd/sys/resource.h>
#include <machine/pcpu.h>
@@ -141,15 +145,6 @@ extern uintptr_t dpcpu_off[];
#endif /* _KERNEL */
-/*
- * XXXUPS remove as soon as we have per cpu variable
- * linker sets and can define rm_queue in _rm_lock.h
- */
-struct rm_queue {
- struct rm_queue* volatile rmq_next;
- struct rm_queue* volatile rmq_prev;
-};
-
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. The members are accessed via the PCPU_GET/SET/PTR
@@ -157,6 +152,7 @@ struct rm_queue {
* defined in the PCPU_MD_FIELDS macro defined in <machine/pcpu.h>.
*/
struct pcpu {
+#ifndef __rtems__
struct thread *pc_curthread; /* Current thread */
struct thread *pc_idlethread; /* Idle thread */
struct thread *pc_fpcurthread; /* Fp state owner */
@@ -173,17 +169,9 @@ struct pcpu {
long pc_cp_time[CPUSTATES]; /* statclock ticks */
struct device *pc_device;
void *pc_netisr; /* netisr SWI cookie */
- int pc_dnweight; /* vm_page_dontneed() */
+ int pc_unused1; /* unused field */
int pc_domain; /* Memory domain. */
-
- /*
- * Stuff for read mostly lock
- *
- * XXXUPS remove as soon as we have per cpu variable
- * linker sets.
- */
- struct rm_queue pc_rm_queue;
-
+ struct rm_queue pc_rm_queue; /* rmlock list of trackers */
uintptr_t pc_dynamic; /* Dynamic per-cpu data area */
/*
@@ -197,8 +185,19 @@ struct pcpu {
* if only to make kernel debugging easier.
*/
PCPU_MD_FIELDS;
+#else /* __rtems__ */
+ int pc_dummy;
+#endif /* __rtems__ */
} __aligned(CACHE_LINE_SIZE);
+#ifdef CTASSERT
+/*
+ * To minimize memory waste in per-cpu UMA zones, size of struct pcpu
+ * should be denominator of PAGE_SIZE.
+ */
+CTASSERT((PAGE_SIZE / sizeof(struct pcpu)) * sizeof(struct pcpu) == PAGE_SIZE);
+#endif
+
#ifdef _KERNEL
STAILQ_HEAD(cpuhead, pcpu);
@@ -213,6 +212,25 @@ extern struct pcpu *cpuid_to_pcpu[];
#endif
#define curvidata PCPU_GET(vidata)
+/* Accessor to elements allocated via UMA_ZONE_PCPU zone. */
+static inline void *
+zpcpu_get(void *base)
+{
+
+#ifndef __rtems__
+ return ((char *)(base) + sizeof(struct pcpu) * curcpu);
+#else /* __rtems__ */
+ return ((char *)(base) + sizeof(struct pcpu) * _SMP_Get_current_processor());
+#endif /* __rtems__ */
+}
+
+static inline void *
+zpcpu_get_cpu(void *base, int cpu)
+{
+
+ return ((char *)(base) + sizeof(struct pcpu) * cpu);
+}
+
/*
* Machine dependent callouts. cpu_pcpu_init() is responsible for
* initializing machine dependent fields of struct pcpu, and
diff --git a/freebsd/sys/sys/pipe.h b/freebsd/sys/sys/pipe.h
index c59ecc75..d596b3bb 100755
--- a/freebsd/sys/sys/pipe.h
+++ b/freebsd/sys/sys/pipe.h
@@ -54,9 +54,12 @@
#define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1)
/*
- * See sys_pipe.c for info on what these limits mean.
+ * See sys_pipe.c for info on what these limits mean.
*/
extern long maxpipekva;
+#ifndef __rtems__
+extern struct fileops pipeops;
+#endif /* __rtems__ */
/*
* Pipe buffer information.
@@ -96,6 +99,7 @@ struct pipemapping {
#define PIPE_LWANT 0x200 /* Process wants exclusive access to pointers/data. */
#define PIPE_DIRECTW 0x400 /* Pipe direct write active. */
#define PIPE_DIRECTOK 0x800 /* Direct mode ok. */
+#define PIPE_NAMED 0x1000 /* Is a named pipe. */
/*
* Per-pipe data structure.
@@ -114,6 +118,7 @@ struct pipe {
u_int pipe_state; /* pipe status info */
int pipe_busy; /* busy flag, mostly to handle rundown sanely */
int pipe_present; /* still present? */
+ int pipe_wgen; /* writer generation for named pipe */
ino_t pipe_ino; /* fake inode for stat(2) */
};
@@ -140,5 +145,7 @@ struct pipepair {
#define PIPE_UNLOCK(pipe) mtx_unlock(PIPE_MTX(pipe))
#define PIPE_LOCK_ASSERT(pipe, type) mtx_assert(PIPE_MTX(pipe), (type))
-
+void pipe_dtor(struct pipe *dpipe);
+void pipe_named_ctor(struct pipe **ppipe, struct thread *td);
+void pipeselwakeup(struct pipe *cpipe);
#endif /* !_SYS_PIPE_H_ */
diff --git a/freebsd/sys/sys/priv.h b/freebsd/sys/sys/priv.h
index 1d1e8f20..ec0943aa 100644
--- a/freebsd/sys/sys/priv.h
+++ b/freebsd/sys/sys/priv.h
@@ -45,8 +45,9 @@
* loadable kernel module ABI, and should not be changed across minor
* releases.
*
- * When adding a new privilege, remember to determine if it's appropriate for
- * use in jail, and update the privilege switch in kern_jail.c as necessary.
+ * When adding a new privilege, remember to determine if it's appropriate
+ * for use in jail, and update the privilege switch in prison_priv_check()
+ * in kern_jail.c as necessary.
*/
/*
@@ -111,6 +112,7 @@
#define PRIV_DEBUG_DIFFCRED 80 /* Exempt debugging other users. */
#define PRIV_DEBUG_SUGID 81 /* Exempt debugging setuid proc. */
#define PRIV_DEBUG_UNPRIV 82 /* Exempt unprivileged debug limit. */
+#define PRIV_DEBUG_DENIED 83 /* Exempt P2_NOTRACE. */
/*
* Dtrace privileges.
@@ -132,7 +134,7 @@
#define PRIV_JAIL_REMOVE 112 /* Remove a jail. */
/*
- * Kernel environment priveleges.
+ * Kernel environment privileges.
*/
#define PRIV_KENV_SET 120 /* Set kernel env. variables. */
#define PRIV_KENV_UNSET 121 /* Unset kernel env. variables. */
@@ -158,7 +160,8 @@
#define PRIV_PROC_SETRLIMIT 162 /* Can raise resources limits. */
#define PRIV_PROC_SETLOGINCLASS 163 /* Can call setloginclass(2). */
-/* System V IPC privileges.
+/*
+ * System V IPC privileges.
*/
#define PRIV_IPC_READ 170 /* Can override IPC read perm. */
#define PRIV_IPC_WRITE 171 /* Can override IPC write perm. */
@@ -338,6 +341,8 @@
#define PRIV_NET_SETIFVNET 417 /* Move interface to vnet. */
#define PRIV_NET_SETIFDESCR 418 /* Set interface description. */
#define PRIV_NET_SETIFFIB 419 /* Set interface fib. */
+#define PRIV_NET_VXLAN 420 /* Administer vxlan. */
+#define PRIV_NET_SETVLANPCP 421 /* Set VLAN priority. */
/*
* 802.11-related privileges.
@@ -346,9 +351,9 @@
#define PRIV_NET80211_MANAGE 441 /* Administer 802.11. */
/*
- * AppleTalk privileges.
+ * Placeholder for AppleTalk privileges, not supported anymore.
*/
-#define PRIV_NETATALK_RESERVEDPORT 450 /* Bind low port number. */
+#define _PRIV_NETATALK_RESERVEDPORT 450 /* Bind low port number. */
/*
* ATM privileges.
@@ -389,12 +394,13 @@
#define PRIV_NETINET_REUSEPORT 504 /* Allow [rapid] port/address reuse. */
#define PRIV_NETINET_SETHDROPTS 505 /* Set certain IPv4/6 header options. */
#define PRIV_NETINET_BINDANY 506 /* Allow bind to any address. */
+#define PRIV_NETINET_HASHKEY 507 /* Get and set hash keys for IPv4/6. */
/*
- * IPX/SPX privileges.
+ * Placeholders for IPX/SPX privileges, not supported any more.
*/
-#define PRIV_NETIPX_RESERVEDPORT 520 /* Bind low port number. */
-#define PRIV_NETIPX_RAW 521 /* Open netipx raw socket. */
+#define _PRIV_NETIPX_RESERVEDPORT 520 /* Bind low port number. */
+#define _PRIV_NETIPX_RAW 521 /* Open netipx raw socket. */
/*
* NCP privileges.
@@ -494,9 +500,15 @@
#define PRIV_RCTL_REMOVE_RULE 674
/*
+ * mem(4) privileges.
+ */
+#define PRIV_KMEM_READ 680 /* Open mem/kmem for reading. */
+#define PRIV_KMEM_WRITE 681 /* Open mem/kmem for writing. */
+
+/*
* Track end of privilege list.
*/
-#define _PRIV_HIGHEST 675
+#define _PRIV_HIGHEST 682
/*
* Validate that a named privilege is known by the privilege system. Invalid
diff --git a/freebsd/sys/sys/proc.h b/freebsd/sys/sys/proc.h
index e866552c..4a695ef9 100644
--- a/freebsd/sys/sys/proc.h
+++ b/freebsd/sys/sys/proc.h
@@ -63,6 +63,7 @@
#endif
#include <sys/ucontext.h>
#include <sys/ucred.h>
+#include <sys/_vm_domain.h>
#include <machine/proc.h> /* Machine-dependent proc substruct. */
/*
@@ -148,6 +149,8 @@ struct pargs {
* q - td_contested lock
* r - p_peers lock
* t - thread lock
+ * u - process stat lock
+ * w - process timer lock
* x - created at fork, only changes during single threading in exec
* y - created at first aio, doesn't change until exit or exec at which
* point we are single-threaded and only curthread changes it
@@ -158,6 +161,8 @@ struct pargs {
* for write access.
*/
struct cpuset;
+struct filecaps;
+struct filemon;
struct kaioinfo;
struct kaudit_record;
struct kdtrace_proc;
@@ -170,6 +175,7 @@ struct procdesc;
struct racct;
struct sbuf;
struct sleepqueue;
+struct syscall_args;
struct td_sched;
struct thread;
struct trapframe;
@@ -183,14 +189,14 @@ struct turnstile;
* userland asks for rusage info. Backwards compatibility prevents putting
* this directly in the user-visible rusage struct.
*
- * Locking for p_rux: (cj) means (j) for p_rux and (c) for p_crux.
+ * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux.
* Locking for td_rux: (t) for all fields.
*/
struct rusage_ext {
- uint64_t rux_runtime; /* (cj) Real time. */
- uint64_t rux_uticks; /* (cj) Statclock hits in user mode. */
- uint64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
- uint64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
+ uint64_t rux_runtime; /* (cu) Real time. */
+ uint64_t rux_uticks; /* (cu) Statclock hits in user mode. */
+ uint64_t rux_sticks; /* (cu) Statclock hits in sys mode. */
+ uint64_t rux_iticks; /* (cu) Statclock hits in intr mode. */
uint64_t rux_uu; /* (c) Previous user time in usec. */
uint64_t rux_su; /* (c) Previous sys time in usec. */
uint64_t rux_tu; /* (c) Previous total time in usec. */
@@ -235,7 +241,9 @@ struct thread {
struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
#ifndef __rtems__
struct turnstile *td_turnstile; /* (k) Associated turnstile. */
+ struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */
struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */
+ struct vm_domain_policy td_vm_dom_policy; /* (c) current numa domain policy */
lwpid_t td_tid; /* (b) Thread ID. */
sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */
#define td_siglist td_sigqueue.sq_signals
@@ -255,11 +263,9 @@ struct thread {
void *td_wchan; /* (t) Sleep address. */
const char *td_wmesg; /* (t) Reason for sleep. */
#ifndef __rtems__
- u_char td_lastcpu; /* (t) Last cpu we were on. */
- u_char td_oncpu; /* (t) Which cpu we are on. */
volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */
u_char td_tsqueue; /* (t) Turnstile queue blocked on. */
- short td_locks; /* (k) Count of non-spin locks. */
+ short td_locks; /* (k) Debug: count of non-spin locks */
short td_rw_rlocks; /* (k) Count of rwlock read locks. */
short td_lk_slocks; /* (k) Count of lockmgr shared locks. */
short td_stopsched; /* (k) Scheduler stopped. */
@@ -272,10 +278,12 @@ struct thread {
#endif /* __rtems__ */
struct ucred *td_ucred; /* (k) Reference to credentials. */
#ifndef __rtems__
+ struct plimit *td_limit; /* (k) Resource limits. */
u_int td_estcpu; /* (t) estimated cpu utilization */
int td_slptick; /* (t) Time at sleep. */
int td_blktick; /* (t) Time spent blocked. */
int td_swvoltick; /* (t) Time at last SW_VOL switch. */
+ int td_swinvoltick; /* (t) Time at last SW_INVOL switch. */
u_int td_cow; /* (*) Number of copy-on-write faults */
struct rusage td_ru; /* (t) rusage information. */
struct rusage_ext td_rux; /* (t) Internal rusage information. */
@@ -287,7 +295,6 @@ struct thread {
u_int td_uticks; /* (t) Statclock hits in user mode. */
int td_intrval; /* (t) Return value for sleepq. */
sigset_t td_oldsigmask; /* (k) Saved mask from pre sigpause. */
- sigset_t td_sigmask; /* (c) Current signal mask. */
volatile u_int td_generation; /* (k) For detection of preemption */
stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */
int td_xsig; /* (c) Signal for ptrace */
@@ -301,20 +308,31 @@ struct thread {
struct osd td_osd; /* (k) Object specific data. */
struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
pid_t td_dbg_forked; /* (c) Child pid for debugger. */
-#define td_endzero td_rqindex
+ u_int td_vp_reserv; /* (k) Count of reserved vnodes. */
+ int td_no_sleeping; /* (k) Sleeping disabled count. */
+ int td_dom_rr_idx; /* (k) RR Numa domain selection. */
+ void *td_su; /* (k) FFS SU private */
+ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
+#define td_endzero td_sigmask
-/* Copied during fork1() or thread_sched_upcall(). */
+/* Copied during fork1() or create_thread(). */
#define td_startcopy td_endzero
+ sigset_t td_sigmask; /* (c) Current signal mask. */
u_char td_rqindex; /* (t) Run queue index. */
u_char td_base_pri; /* (t) Thread base kernel priority. */
u_char td_priority; /* (t) Thread active priority. */
u_char td_pri_class; /* (t) Scheduling class. */
u_char td_user_pri; /* (t) User pri from estcpu and nice. */
u_char td_base_user_pri; /* (t) Base user pri */
+ u_int td_dbg_sc_code; /* (c) Syscall code to debugger. */
+ u_int td_dbg_sc_narg; /* (c) Syscall arg count to debugger.*/
+ uintptr_t td_rb_list; /* (k) Robust list head. */
+ uintptr_t td_rbp_list; /* (k) Robust priv list head. */
+ uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */
#define td_endcopy td_pcb
/*
- * Fields that must be manually set in fork1() or thread_sched_upcall()
+ * Fields that must be manually set in fork1() or create_thread()
* or already have been set in the allocator, constructor, etc.
*/
struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */
@@ -325,9 +343,16 @@ struct thread {
TDS_RUNQ,
TDS_RUNNING
} td_state; /* (t) thread state */
-#endif /* __rtems__ */
+ union {
+ register_t tdu_retval[2];
+ off_t tdu_off;
+ } td_uretoff; /* (k) Syscall aux returns. */
+#else /* __rtems__ */
register_t td_retval[2]; /* (k) Syscall aux returns. */
+#endif /* __rtems__ */
#ifndef __rtems__
+#define td_retval td_uretoff.tdu_retval
+ u_int td_cowgen; /* (k) Generation of COW pointers. */
struct callout td_slpcallout; /* (h) Callout for sleep. */
struct trapframe *td_frame; /* (k) */
struct vm_object *td_kstack_obj;/* (a) Kstack object. */
@@ -335,7 +360,6 @@ struct thread {
int td_kstack_pages; /* (a) Size of the kstack. */
volatile u_int td_critnest; /* (k*) Critical section nest level. */
struct mdthread td_md; /* (k) Any machine-dependent fields. */
- struct td_sched *td_sched; /* (*) Scheduler-specific data. */
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
struct lpohead td_lprof[2]; /* (a) lock profiling objects. */
struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */
@@ -346,11 +370,17 @@ struct thread {
struct proc *td_rfppwait_p; /* (k) The vforked child */
struct vm_page **td_ma; /* (k) uio pages held */
int td_ma_cnt; /* (k) size of *td_ma */
- struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */
- u_int td_vp_reserv; /* (k) Count of reserved vnodes. */
+ void *td_emuldata; /* Emulator state data */
+ int td_lastcpu; /* (t) Last cpu we were on. */
+ int td_oncpu; /* (t) Which cpu we are on. */
#endif /* __rtems__ */
};
+struct thread0_storage {
+ struct thread t0st_thread;
+ uint64_t t0st_sched[10];
+};
+
struct mtx *thread_lock_block(struct thread *);
void thread_lock_unblock(struct thread *, struct mtx *);
void thread_lock_set(struct thread *, struct mtx *);
@@ -372,12 +402,15 @@ do { \
KASSERT((__m == &blocked_lock || __m == (lock)), \
("Thread %p lock %p does not match %p", td, __m, (lock))); \
} while (0)
+
+#define TD_LOCKS_INC(td) ((td)->td_locks++)
+#define TD_LOCKS_DEC(td) ((td)->td_locks--)
#else
#define THREAD_LOCKPTR_ASSERT(td, lock)
-#endif
-#define CRITICAL_ASSERT(td) \
- KASSERT((td)->td_critnest >= 1, ("Not in critical section"));
+#define TD_LOCKS_INC(td)
+#define TD_LOCKS_DEC(td)
+#endif
/*
* Flags kept in td_flags:
@@ -392,19 +425,19 @@ do { \
#define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */
#define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */
#define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */
-#define TDF_UNUSED09 0x00000200 /* --available-- */
+#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
-#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
+#define TDF_UNUSED12 0x00001000 /* --available-- */
#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
#define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */
#define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */
#define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */
-#define TDF_UNUSED19 0x00080000 /* --available-- */
+#define TDF_SERESTART 0x00080000 /* ERESTART on stop attempts. */
#define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */
-#define TDF_UNUSED21 0x00200000 /* --available-- */
+#define TDF_SEINTR 0x00200000 /* EINTR on stop attempts. */
#define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */
#define TDF_UNUSED23 0x00800000 /* --available-- */
#define TDF_SCHED0 0x01000000 /* Reserved for scheduler private use */
@@ -427,6 +460,10 @@ do { \
#define TDB_STOPATFORK 0x00000080 /* Stop at the return from fork (child
only) */
#define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */
+#define TDB_BORN 0x00000200 /* New LWP indicator for ptrace() */
+#define TDB_EXIT 0x00000400 /* Exiting LWP indicator for ptrace() */
+#define TDB_VFORK 0x00000800 /* vfork indicator for ptrace() */
+#define TDB_FSTP 0x00001000 /* The thread is PT_ATTACH leader */
/*
* "Private" flags kept in td_pflags:
@@ -438,9 +475,9 @@ do { \
#define TDP_BUFNEED 0x00000008 /* Do not recurse into the buf flush */
#define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
#define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */
-#define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */
+#define TDP_DEADLKTREAT 0x00000040 /* Lock acquisition - deadlock treatment. */
#define TDP_NOFAULTING 0x00000080 /* Do not handle page faults. */
-#define TDP_NOSLEEPING 0x00000100 /* Thread is not allowed to sleep on a sq. */
+#define TDP_UNUSED9 0x00000100 /* --available-- */
#define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */
#define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */
#define TDP_SYNCIO 0x00000800 /* Local override, disable async i/o. */
@@ -461,7 +498,7 @@ do { \
#define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */
#define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */
#define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */
-#define TDP_DEVMEMIO 0x20000000 /* Accessing memory for /dev/mem */
+#define TDP_FORKING 0x20000000 /* Thread is being created through fork() */
#define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */
/*
@@ -520,6 +557,11 @@ do { \
#define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ
#define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN
+#define TD_SBDRY_INTR(td) \
+ (((td)->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 0)
+#define TD_SBDRY_ERRNO(td) \
+ (((td)->td_flags & TDF_SEINTR) != 0 ? EINTR : ERESTART)
+
/*
* Process structure.
*/
@@ -532,7 +574,7 @@ struct proc {
struct filedesc *p_fd; /* (b) Open files. */
struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */
- struct plimit *p_limit; /* (c) Process limits. */
+ struct plimit *p_limit; /* (c) Resource limits. */
struct callout p_limco; /* (c) Limit callout handle */
struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */
@@ -549,7 +591,15 @@ struct proc {
struct proc *p_pptr; /* (c + e) Pointer to parent process. */
LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */
LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */
+ struct proc *p_reaper; /* (e) My reaper. */
+ LIST_HEAD(, proc) p_reaplist; /* (e) List of my descendants
+ (if I am reaper). */
+ LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of
+ the same reaper. */
struct mtx p_mtx; /* (n) Lock for this struct. */
+ struct mtx p_statmtx; /* Lock for the stats */
+ struct mtx p_itimmtx; /* Lock for the virt/prof timers */
+ struct mtx p_profmtx; /* Lock for the profiling */
struct ksiginfo *p_ksi; /* Locked by parent proc lock */
sigqueue_t p_sigqueue; /* (c) Sigs not delivered to a td. */
#define p_siglist p_sigqueue.sq_signals
@@ -557,12 +607,12 @@ struct proc {
/* The following fields are all zeroed upon creation in fork. */
#define p_startzero p_oppid
pid_t p_oppid; /* (c + e) Save ppid in ptrace. XXX */
- int p_pad_dbg_child;
struct vmspace *p_vmspace; /* (b) Address space. */
u_int p_swtick; /* (c) Tick when swapped in or out. */
+ u_int p_cowgen; /* (c) Generation of COW pointers. */
struct itimerval p_realtimer; /* (c) Alarm timer. */
struct rusage p_ru; /* (a) Exit information. */
- struct rusage_ext p_rux; /* (cj) Internal resource usage. */
+ struct rusage_ext p_rux; /* (cu) Internal resource usage. */
struct rusage_ext p_crux; /* (c) Internal child resource usage. */
int p_profthreads; /* (c) Num threads in addupc_task. */
volatile int p_exitthreads; /* (j) Number of threads exiting */
@@ -579,6 +629,7 @@ struct proc {
u_int p_stype; /* (c) Stop event type. */
char p_step; /* (c) Process is stopped. */
u_char p_pfsflags; /* (c) Procfs flags. */
+ u_int p_ptevents; /* (c) ptrace() event mask. */
struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */
struct kaioinfo *p_aioinfo; /* (y) ASYNC I/O info. */
struct thread *p_singlethread;/* (c + j) If single threading this is it */
@@ -588,6 +639,9 @@ struct proc {
int p_pendingcnt; /* how many signals are pending */
struct itimers *p_itimers; /* (c) POSIX interval timers. */
struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */
+ u_int p_treeflag; /* (e) P_TREE flags */
+ int p_pendingexits; /* (c) Count of pending thread exits. */
+ struct filemon *p_filemon; /* (c) filemon-specific data. */
/* End area that is zeroed on creation. */
#define p_endzero p_magic
@@ -596,18 +650,21 @@ struct proc {
u_int p_magic; /* (b) Magic number. */
int p_osrel; /* (x) osreldate for the
binary (from ELF note, if any) */
- char p_comm[MAXCOMLEN + 1]; /* (b) Process name. */
- struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */
+ char p_comm[MAXCOMLEN + 1]; /* (x) Process name. */
struct sysentvec *p_sysent; /* (b) Syscall dispatch info. */
struct pargs *p_args; /* (c) Process arguments. */
rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */
signed char p_nice; /* (c) Process "nice" value. */
int p_fibnum; /* in this routing domain XXX MRT */
+ pid_t p_reapsubtree; /* (e) Pid of the direct child of the
+ reaper which spawned
+ our subtree. */
+ u_int p_xexit; /* (c) Exit code. */
+ u_int p_xsig; /* (c) Stop/kill sig. */
/* End area that is copied on creation. */
-#define p_endcopy p_xstat
-
- u_short p_xstat; /* (c) Exit status; also stop sig. */
- struct knlist p_klist; /* (c) Knotes attached to this proc. */
+#define p_endcopy p_xsig
+ struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */
+ struct knlist *p_klist; /* (c) Knotes attached to this proc. */
int p_numthreads; /* (c) Number of threads. */
struct mdproc p_md; /* Any machine-dependent fields. */
struct callout p_itcallout; /* (h + c) Interval timer callout. */
@@ -616,7 +673,6 @@ struct proc {
struct proc *p_leader; /* (b) */
void *p_emuldata; /* (c) Emulator state data. */
struct label *p_label; /* (*) Proc (not subject) MAC label. */
- struct p_sched *p_sched; /* (*) Scheduler-specific data. */
STAILQ_HEAD(, ktr_request) p_ktr; /* (o) KTR event queue. */
LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/
struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */
@@ -625,6 +681,8 @@ struct proc {
after fork. */
uint64_t p_prev_runtime; /* (c) Resource usage accounting. */
struct racct *p_racct; /* (b) Resource accounting. */
+ int p_throttled; /* (c) Flag for racct pcpu throttling */
+ struct vm_domain_policy p_vm_dom_policy; /* (c) process default VM domain, or -1 */
/*
* An orphan is the child that has beed re-parented to the
* debugger as a result of attaching to it. Need to keep
@@ -633,24 +691,37 @@ struct proc {
*/
LIST_ENTRY(proc) p_orphan; /* (e) List of orphan processes. */
LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */
- u_char p_throttled; /* (c) Flag for racct pcpu throttling */
#endif /* __rtems__ */
};
#define p_session p_pgrp->pg_session
#define p_pgid p_pgrp->pg_id
-#define NOCPU 0xff /* For when we aren't on a CPU. */
+#define NOCPU (-1) /* For when we aren't on a CPU. */
+#define NOCPU_OLD (255)
+#define MAXCPU_OLD (254)
#define PROC_SLOCK(p) mtx_lock_spin(&(p)->p_slock)
#define PROC_SUNLOCK(p) mtx_unlock_spin(&(p)->p_slock)
#define PROC_SLOCK_ASSERT(p, type) mtx_assert(&(p)->p_slock, (type))
+#define PROC_STATLOCK(p) mtx_lock_spin(&(p)->p_statmtx)
+#define PROC_STATUNLOCK(p) mtx_unlock_spin(&(p)->p_statmtx)
+#define PROC_STATLOCK_ASSERT(p, type) mtx_assert(&(p)->p_statmtx, (type))
+
+#define PROC_ITIMLOCK(p) mtx_lock_spin(&(p)->p_itimmtx)
+#define PROC_ITIMUNLOCK(p) mtx_unlock_spin(&(p)->p_itimmtx)
+#define PROC_ITIMLOCK_ASSERT(p, type) mtx_assert(&(p)->p_itimmtx, (type))
+
+#define PROC_PROFLOCK(p) mtx_lock_spin(&(p)->p_profmtx)
+#define PROC_PROFUNLOCK(p) mtx_unlock_spin(&(p)->p_profmtx)
+#define PROC_PROFLOCK_ASSERT(p, type) mtx_assert(&(p)->p_profmtx, (type))
+
/* These flags are kept in p_flag. */
#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */
#define P_CONTROLT 0x00002 /* Has a controlling terminal. */
-#define P_KTHREAD 0x00004 /* Kernel thread (*). */
-#define P_FOLLOWFORK 0x00008 /* Attach parent debugger to children. */
+#define P_KPROC 0x00004 /* Kernel process. */
+#define P_UNUSED3 0x00008 /* --available-- */
#define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */
#define P_PROFIL 0x00020 /* Has started profiling. */
#define P_STOPPROF 0x00040 /* Has thread requesting to stop profiling. */
@@ -672,7 +743,7 @@ struct proc {
#define P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
#define P_HWPMC 0x800000 /* Process is using HWPMCs */
#define P_JAILED 0x1000000 /* Process is in jail. */
-#define P_ORPHAN 0x2000000 /* Orphaned. */
+#define P_TOTAL_STOP 0x2000000 /* Stopped in stop_all_proc. */
#define P_INEXEC 0x4000000 /* Process is in execve(). */
#define P_STATCHILD 0x8000000 /* Child process stopped or exited. */
#define P_INMEM 0x10000000 /* Loaded into memory. */
@@ -686,6 +757,16 @@ struct proc {
/* These flags are kept in p_flag2. */
#define P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */
+#define P2_NOTRACE 0x00000002 /* No ptrace(2) attach or coredumps. */
+#define P2_NOTRACE_EXEC 0x00000004 /* Keep P2_NOPTRACE on exec(2). */
+#define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */
+#define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */
+
+/* Flags protected by proctree_lock, kept in p_treeflags. */
+#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
+#define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan
+ list */
+#define P_TREE_REAPER 0x00000004 /* Reaper of subtree */
/*
* These were process status values (p_stat), now they are only used in
@@ -707,7 +788,7 @@ struct proc {
#define SW_TYPE_MASK 0xff /* First 8 bits are switch type */
#define SWT_NONE 0 /* Unspecified switch. */
#define SWT_PREEMPT 1 /* Switching due to preemption. */
-#define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */
+#define SWT_OWEPREEMPT 2 /* Switching due to owepreempt. */
#define SWT_TURNSTILE 3 /* Turnstile contention. */
#define SWT_SLEEPQ 4 /* Sleepq wait. */
#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */
@@ -728,6 +809,7 @@ struct proc {
#define SINGLE_NO_EXIT 0
#define SINGLE_EXIT 1
#define SINGLE_BOUNDARY 2
+#define SINGLE_ALLPROC 3
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_PARGS);
@@ -755,6 +837,8 @@ extern pid_t pid_max;
#define STOPEVENT(p, e, v) do { \
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \
+ "checking stopevent %d", (e)); \
if ((p)->p_stops & (e)) { \
PROC_LOCK(p); \
stopevent((p), (e), (v)); \
@@ -797,8 +881,21 @@ extern pid_t pid_max;
#define SESS_LOCKED(s) mtx_owned(&(s)->s_mtx)
#define SESS_LOCK_ASSERT(s, type) mtx_assert(&(s)->s_mtx, (type))
+/*
+ * Non-zero p_lock ensures that:
+ * - exit1() is not performed until p_lock reaches zero;
+ * - the process' threads stack are not swapped out if they are currently
+ * not (P_INMEM).
+ *
+ * PHOLD() asserts that the process (except the current process) is
+ * not exiting, increments p_lock and swaps threads stacks into memory,
+ * if needed.
+ * _PHOLD() is same as PHOLD(), it takes the process locked.
+ * _PHOLD_LITE() also takes the process locked, but comparing with
+ * _PHOLD(), it only guarantees that exit1() is not executed,
+ * faultin() is not called.
+ */
#ifndef __rtems__
-/* Hold process U-area in memory, normally for ptrace/procfs work. */
#define PHOLD(p) do { \
PROC_LOCK(p); \
_PHOLD(p); \
@@ -807,13 +904,19 @@ extern pid_t pid_max;
#define _PHOLD(p) do { \
PROC_LOCK_ASSERT((p), MA_OWNED); \
KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \
- ("PHOLD of exiting process")); \
+ ("PHOLD of exiting process %p", p)); \
(p)->p_lock++; \
if (((p)->p_flag & P_INMEM) == 0) \
faultin((p)); \
} while (0)
-#define PROC_ASSERT_HELD(p) do { \
- KASSERT((p)->p_lock > 0, ("process not held")); \
+#define _PHOLD_LITE(p) do { \
+ PROC_LOCK_ASSERT((p), MA_OWNED); \
+ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \
+ ("PHOLD of exiting process %p", p)); \
+ (p)->p_lock++; \
+} while (0)
+#define PROC_ASSERT_HELD(p) do { \
+ KASSERT((p)->p_lock > 0, ("process %p not held", p)); \
} while (0)
#define PRELE(p) do { \
@@ -828,8 +931,13 @@ extern pid_t pid_max;
if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0) \
wakeup(&(p)->p_lock); \
} while (0)
-#define PROC_ASSERT_NOT_HELD(p) do { \
- KASSERT((p)->p_lock == 0, ("process held")); \
+#define PROC_ASSERT_NOT_HELD(p) do { \
+ KASSERT((p)->p_lock == 0, ("process %p held", p)); \
+} while (0)
+
+#define PROC_UPDATE_COW(p) do { \
+ PROC_LOCK_ASSERT((p), MA_OWNED); \
+ (p)->p_cowgen++; \
} while (0)
#else /* __rtems__ */
#define PHOLD(x) do { } while (0)
@@ -840,17 +948,11 @@ extern pid_t pid_max;
#define thread_safetoswapout(td) ((td)->td_flags & TDF_CANSWAP)
/* Control whether or not it is safe for curthread to sleep. */
-#define THREAD_NO_SLEEPING() do { \
- KASSERT(!(curthread->td_pflags & TDP_NOSLEEPING), \
- ("nested no sleeping")); \
- curthread->td_pflags |= TDP_NOSLEEPING; \
-} while (0)
+#define THREAD_NO_SLEEPING() ((curthread)->td_no_sleeping++)
-#define THREAD_SLEEPING_OK() do { \
- KASSERT((curthread->td_pflags & TDP_NOSLEEPING), \
- ("nested sleeping ok")); \
- curthread->td_pflags &= ~TDP_NOSLEEPING; \
-} while (0)
+#define THREAD_SLEEPING_OK() ((curthread)->td_no_sleeping--)
+
+#define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0)
#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash])
extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
@@ -865,10 +967,12 @@ extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
extern u_long pgrphash;
extern struct sx allproc_lock;
+extern int allproc_gen;
extern struct sx proctree_lock;
extern struct mtx ppeers_lock;
extern struct proc proc0; /* Process slot for swapper. */
-extern struct thread thread0; /* Primary thread in proc0. */
+extern struct thread0_storage thread0_st; /* Primary thread in proc0. */
+#define thread0 (thread0_st.t0st_thread)
extern struct vmspace vmspace0; /* VM space for proc0. */
extern int hogticks; /* Limit on kernel cpu hogs. */
extern int lastpid;
@@ -890,6 +994,16 @@ struct proc *pfind_locked(pid_t pid);
struct pgrp *pgfind(pid_t); /* Find process group by id. */
struct proc *zpfind(pid_t); /* Find zombie process by id. */
+struct fork_req {
+ int fr_flags;
+ int fr_pages;
+ int *fr_pidp;
+ struct proc **fr_procp;
+ int *fr_pd_fd;
+ int fr_pd_flags;
+ struct filecaps *fr_pd_fcaps;
+};
+
/*
* pget() flags.
*/
@@ -907,13 +1021,22 @@ int pget(pid_t pid, int flags, struct proc **pp);
void ast(struct trapframe *framep);
struct thread *choosethread(void);
+#ifndef __rtems__
+int cr_cansee(struct ucred *u1, struct ucred *u2);
+int cr_canseesocket(struct ucred *cred, struct socket *so);
+#else /* __rtems__ */
+#define cr_cansee(u1, u2) 0
+#define cr_canseesocket(cred, so) 0
+#endif /* __rtems__ */
+int cr_canseeothergids(struct ucred *u1, struct ucred *u2);
+int cr_canseeotheruids(struct ucred *u1, struct ucred *u2);
int cr_cansignal(struct ucred *cred, struct proc *proc, int signum);
int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
struct session *sess);
int enterthispgrp(struct proc *p, struct pgrp *pgrp);
void faultin(struct proc *p);
void fixjobc(struct proc *p, struct pgrp *pgrp, int entering);
-int fork1(struct thread *, int, int, struct proc **, int *, int);
+int fork1(struct thread *, struct fork_req *);
void fork_exit(void (*)(void *, struct trapframe *), void *,
struct trapframe *);
void fork_return(struct thread *, struct trapframe *);
@@ -924,6 +1047,7 @@ void kick_proc0(void);
#else /* __rtems__ */
#define kick_proc0()
#endif /* __rtems__ */
+void killjobc(void);
int leavepgrp(struct proc *p);
int maybe_preempt(struct thread *td);
void maybe_yield(void);
@@ -942,11 +1066,14 @@ int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
void procinit(void);
void proc_linkup0(struct proc *p, struct thread *td);
void proc_linkup(struct proc *p, struct thread *td);
+struct proc *proc_realparent(struct proc *child);
void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent);
+void proc_set_traced(struct proc *p, bool stop);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
+void reaper_abandon_children(struct proc *p, bool exiting);
#ifndef __rtems__
int securelevel_ge(struct ucred *cr, int level);
int securelevel_gt(struct ucred *cr, int level);
@@ -960,7 +1087,6 @@ int setrunnable(struct thread *);
void setsugid(struct proc *p);
int should_yield(void);
int sigonstack(size_t sp);
-void sleepinit(void);
void stopevent(struct proc *, u_int, u_int);
struct thread *tdfind(lwpid_t, pid_t);
void threadinit(void);
@@ -968,22 +1094,21 @@ void tidhash_add(struct thread *);
void tidhash_remove(struct thread *);
void cpu_idle(int);
int cpu_idle_wakeup(int);
-extern void (*cpu_idle_hook)(void); /* Hook to machdep CPU idler. */
+extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */
void cpu_switch(struct thread *, struct thread *, struct mtx *);
void cpu_throw(struct thread *, struct thread *) __dead2;
void unsleep(struct thread *);
void userret(struct thread *, struct trapframe *);
void cpu_exit(struct thread *);
-void exit1(struct thread *, int) __dead2;
-struct syscall_args;
+void exit1(struct thread *, int, int) __dead2;
+void cpu_copy_thread(struct thread *td, struct thread *td0);
int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa);
void cpu_fork(struct thread *, struct proc *, struct thread *, int);
-void cpu_set_fork_handler(struct thread *, void (*)(void *), void *);
+void cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *);
void cpu_set_syscall_retval(struct thread *, int);
-void cpu_set_upcall(struct thread *td, struct thread *td0);
#ifndef __rtems__
-void cpu_set_upcall_kse(struct thread *, void (*)(void *), void *,
+void cpu_set_upcall(struct thread *, void (*)(void *), void *,
stack_t *);
#endif /* __rtems__ */
int cpu_set_user_tls(struct thread *, void *tls_base);
@@ -995,27 +1120,35 @@ void cpu_thread_swapin(struct thread *);
void cpu_thread_swapout(struct thread *);
struct thread *thread_alloc(int pages);
int thread_alloc_stack(struct thread *, int pages);
+void thread_cow_get_proc(struct thread *newtd, struct proc *p);
+void thread_cow_get(struct thread *newtd, struct thread *td);
+void thread_cow_free(struct thread *td);
+void thread_cow_update(struct thread *td);
+int thread_create(struct thread *td, struct rtprio *rtp,
+ int (*initialize_thread)(struct thread *, void *), void *thunk);
void thread_exit(void) __dead2;
void thread_free(struct thread *td);
void thread_link(struct thread *td, struct proc *p);
void thread_reap(void);
-int thread_single(int how);
-void thread_single_end(void);
+int thread_single(struct proc *p, int how);
+void thread_single_end(struct proc *p, int how);
void thread_stash(struct thread *td);
void thread_stopped(struct proc *p);
void childproc_stopped(struct proc *child, int reason);
void childproc_continued(struct proc *child);
void childproc_exited(struct proc *child);
int thread_suspend_check(int how);
-void thread_suspend_switch(struct thread *);
+bool thread_suspend_check_needed(void);
+void thread_suspend_switch(struct thread *, struct proc *p);
void thread_suspend_one(struct thread *td);
void thread_unlink(struct thread *td);
void thread_unsuspend(struct proc *p);
-int thread_unsuspend_one(struct thread *td);
-void thread_unthread(struct thread *td);
void thread_wait(struct proc *p);
struct thread *thread_find(struct proc *p, lwpid_t tid);
+void stop_all_proc(void);
+void resume_all_proc(void);
+
#ifndef __rtems__
static __inline int
curthread_pflags_set(int flags)
@@ -1035,6 +1168,13 @@ curthread_pflags_restore(int save)
curthread->td_pflags &= save;
}
+
+static __inline __pure2 struct td_sched *
+td_get_sched(struct thread *td)
+{
+
+ return ((struct td_sched *)&td[1]);
+}
#endif /* __rtems__ */
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/protosw.h b/freebsd/sys/sys/protosw.h
index b55af4b7..896ec253 100644
--- a/freebsd/sys/sys/protosw.h
+++ b/freebsd/sys/sys/protosw.h
@@ -34,6 +34,7 @@
#define _SYS_PROTOSW_H_
/* Forward declare these structures referenced from prototypes below. */
+struct kaiocb;
struct mbuf;
struct thread;
struct sockaddr;
@@ -64,13 +65,11 @@ struct sockopt;
* similar to the vnode VOP interface.
*/
/* USE THESE FOR YOUR PROTOTYPES ! */
-typedef void pr_input_t (struct mbuf *, int);
-typedef int pr_input6_t (struct mbuf **, int*, int); /* XXX FIX THIS */
-typedef int pr_output_t (struct mbuf *, struct socket *);
+typedef int pr_input_t (struct mbuf **, int*, int);
+typedef int pr_output_t (struct mbuf *, struct socket *, ...);
typedef void pr_ctlinput_t (int, struct sockaddr *, void *);
typedef int pr_ctloutput_t (struct socket *, struct sockopt *);
typedef void pr_init_t (void);
-typedef void pr_destroy_t (void);
typedef void pr_fasttimo_t (void);
typedef void pr_slowtimo_t (void);
typedef void pr_drain_t (void);
@@ -87,7 +86,6 @@ struct protosw {
pr_ctloutput_t *pr_ctloutput; /* control output (from above) */
/* utility hooks */
pr_init_t *pr_init;
- pr_destroy_t *pr_destroy;
pr_fasttimo_t *pr_fasttimo; /* fast timeout (200ms) */
pr_slowtimo_t *pr_slowtimo; /* slow timeout (500ms) */
pr_drain_t *pr_drain; /* flush any excess space possible */
@@ -203,15 +201,17 @@ struct pr_usrreqs {
int (*pru_peeraddr)(struct socket *so, struct sockaddr **nam);
int (*pru_rcvd)(struct socket *so, int flags);
int (*pru_rcvoob)(struct socket *so, struct mbuf *m, int flags);
- int (*pru_send)(struct socket *so, int flags, struct mbuf *m,
+ int (*pru_send)(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control,
struct thread *td);
#define PRUS_OOB 0x1
#define PRUS_EOF 0x2
#define PRUS_MORETOCOME 0x4
+#define PRUS_NOTREADY 0x8
+ int (*pru_ready)(struct socket *so, struct mbuf *m, int count);
int (*pru_sense)(struct socket *so, struct stat *sb);
- int (*pru_shutdown)(struct socket *so);
- int (*pru_flush)(struct socket *so, int direction);
+ int (*pru_shutdown)(struct socket *so);
+ int (*pru_flush)(struct socket *so, int direction);
int (*pru_sockaddr)(struct socket *so, struct sockaddr **nam);
int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
@@ -223,17 +223,27 @@ struct pr_usrreqs {
struct ucred *cred, struct thread *td);
void (*pru_sosetlabel)(struct socket *so);
void (*pru_close)(struct socket *so);
+ int (*pru_bindat)(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td);
+ int (*pru_connectat)(int fd, struct socket *so,
+ struct sockaddr *nam, struct thread *td);
+ int (*pru_aio_queue)(struct socket *so, struct kaiocb *job);
};
/*
* All nonvoid pru_*() functions below return EOPNOTSUPP.
*/
int pru_accept_notsupp(struct socket *so, struct sockaddr **nam);
+int pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job);
int pru_attach_notsupp(struct socket *so, int proto, struct thread *td);
int pru_bind_notsupp(struct socket *so, struct sockaddr *nam,
struct thread *td);
+int pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td);
int pru_connect_notsupp(struct socket *so, struct sockaddr *nam,
struct thread *td);
+int pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td);
int pru_connect2_notsupp(struct socket *so1, struct socket *so2);
int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td);
@@ -244,6 +254,7 @@ int pru_rcvd_notsupp(struct socket *so, int flags);
int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags);
int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control, struct thread *td);
+int pru_ready_notsupp(struct socket *so, struct mbuf *m, int count);
int pru_sense_null(struct socket *so, struct stat *sb);
int pru_shutdown_notsupp(struct socket *so);
int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam);
@@ -266,9 +277,9 @@ int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
*/
#define PRC_IFDOWN 0 /* interface transition */
#define PRC_ROUTEDEAD 1 /* select new route if possible ??? */
-#define PRC_IFUP 2 /* interface has come back up */
-#define PRC_QUENCH2 3 /* DEC congestion bit says slow down */
-#define PRC_QUENCH 4 /* some one said to slow down */
+#define PRC_IFUP 2 /* interface has come back up */
+/* was PRC_QUENCH2 3 DEC congestion bit says slow down */
+/* was PRC_QUENCH 4 Deprecated by RFC 6633 */
#define PRC_MSGSIZE 5 /* message size forced drop */
#define PRC_HOSTDEAD 6 /* host appears to be down */
#define PRC_HOSTUNREACH 7 /* deprecated (use PRC_UNREACH_HOST) */
@@ -330,6 +341,7 @@ char *prcorequests[] = {
#ifdef _KERNEL
void pfctlinput(int, struct sockaddr *);
void pfctlinput2(int, struct sockaddr *, void *);
+struct domain *pffinddomain(int family);
struct protosw *pffindproto(int family, int protocol, int type);
struct protosw *pffindtype(int family, int type);
int pf_proto_register(int family, struct protosw *npr);
diff --git a/freebsd/sys/sys/racct.h b/freebsd/sys/sys/racct.h
index 3b34891a..9b8143f2 100644
--- a/freebsd/sys/sys/racct.h
+++ b/freebsd/sys/sys/racct.h
@@ -37,9 +37,12 @@
#define _RACCT_H_
#include <sys/cdefs.h>
-#include <sys/queue.h>
#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/stdint.h>
+#include <sys/sysctl.h>
+struct buf;
struct proc;
struct rctl_rule_link;
struct ucred;
@@ -69,7 +72,11 @@ struct ucred;
#define RACCT_SHMSIZE 18
#define RACCT_WALLCLOCK 19
#define RACCT_PCTCPU 20
-#define RACCT_MAX RACCT_PCTCPU
+#define RACCT_READBPS 21
+#define RACCT_WRITEBPS 22
+#define RACCT_READIOPS 23
+#define RACCT_WRITEIOPS 24
+#define RACCT_MAX RACCT_WRITEIOPS
/*
* Resource properties.
@@ -82,17 +89,22 @@ struct ucred;
#define RACCT_DECAYING 0x20
extern int racct_types[];
+extern int racct_enable;
+
+#define ASSERT_RACCT_ENABLED() KASSERT(racct_enable, \
+ ("%s called with !racct_enable", __func__))
/*
* Amount stored in c_resources[] is 10**6 times bigger than what's
* visible to the userland. It gets fixed up when retrieving resource
* usage or adding rules.
*/
-#define RACCT_IS_IN_MILLIONS(X) (racct_types[X] & RACCT_IN_MILLIONS)
+#define RACCT_IS_IN_MILLIONS(X) \
+ ((X) != RACCT_UNDEFINED && (racct_types[(X)] & RACCT_IN_MILLIONS) != 0)
/*
* Resource usage can drop, as opposed to only grow. When the process
- * terminates, its resource usage is freed from the respective
+ * terminates, its resource usage is subtracted from the respective
* per-credential racct containers.
*/
#define RACCT_IS_RECLAIMABLE(X) (racct_types[X] & RACCT_RECLAIMABLE)
@@ -120,8 +132,7 @@ extern int racct_types[];
* When a process terminates, its resource usage is not automatically
* subtracted from per-credential racct containers. Instead, the resource
* usage of per-credential racct containers decays in time.
- * Resource usage can olso drop for such resource.
- * So far, the only such resource is RACCT_PCTCPU.
+ * Resource usage can also drop for such resource.
*/
#define RACCT_IS_DECAYING(X) (racct_types[X] & RACCT_DECAYING)
@@ -141,9 +152,20 @@ struct racct {
LIST_HEAD(, rctl_rule_link) r_rule_links;
};
+SYSCTL_DECL(_kern_racct);
+
+#ifdef RACCT
+
+extern struct mtx racct_lock;
+
+#define RACCT_LOCK() mtx_lock(&racct_lock)
+#define RACCT_UNLOCK() mtx_unlock(&racct_lock)
+#define RACCT_LOCK_ASSERT() mtx_assert(&racct_lock, MA_OWNED)
+
int racct_add(struct proc *p, int resource, uint64_t amount);
void racct_add_cred(struct ucred *cred, int resource, uint64_t amount);
void racct_add_force(struct proc *p, int resource, uint64_t amount);
+void racct_add_buf(struct proc *p, const struct buf *bufp, int is_write);
int racct_set(struct proc *p, int resource, uint64_t amount);
void racct_set_force(struct proc *p, int resource, uint64_t amount);
void racct_sub(struct proc *p, int resource, uint64_t amount);
@@ -161,5 +183,83 @@ void racct_proc_exit(struct proc *p);
void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
struct ucred *newcred);
void racct_move(struct racct *dest, struct racct *src);
+void racct_proc_throttle(struct proc *p, int timeout);
+
+#else
+
+static inline int
+racct_add(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+static inline void
+racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+static inline void
+racct_add_force(struct proc *p, int resource, uint64_t amount)
+{
+}
+
+static inline int
+racct_set(struct proc *p, int resource, uint64_t amount)
+{
+
+ return (0);
+}
+
+static inline void
+racct_set_force(struct proc *p, int resource, uint64_t amount)
+{
+}
+
+static inline void
+racct_sub(struct proc *p, int resource, uint64_t amount)
+{
+}
+
+static inline void
+racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+}
+
+static inline uint64_t
+racct_get_limit(struct proc *p, int resource)
+{
+
+ return (UINT64_MAX);
+}
+
+static inline uint64_t
+racct_get_available(struct proc *p, int resource)
+{
+
+ return (UINT64_MAX);
+}
+
+#define racct_create(x)
+#define racct_destroy(x)
+
+static inline int
+racct_proc_fork(struct proc *parent, struct proc *child)
+{
+
+ return (0);
+}
+
+static inline void
+racct_proc_fork_done(struct proc *child)
+{
+}
+
+static inline void
+racct_proc_exit(struct proc *p)
+{
+}
+
+#endif
#endif /* !_RACCT_H_ */
diff --git a/freebsd/sys/sys/random.h b/freebsd/sys/sys/random.h
index 5cf1611e..396ec2b1 100644
--- a/freebsd/sys/sys/random.h
+++ b/freebsd/sys/sys/random.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2000 Mark R. V. Murray
+ * Copyright (c) 2000-2015 Mark R. V. Murray
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,35 +31,86 @@
#ifdef _KERNEL
-int read_random(void *, int);
+#include <sys/types.h>
+
+#if !defined(KLD_MODULE)
+#if defined(RANDOM_LOADABLE) && defined(RANDOM_YARROW)
+#error "Cannot define both RANDOM_LOADABLE and RANDOM_YARROW"
+#endif
+#endif
+
+struct uio;
+
+#if defined(DEV_RANDOM)
+u_int read_random(void *, u_int);
+int read_random_uio(struct uio *, bool);
+#else
+static __inline int
+read_random_uio(void *a __unused, u_int b __unused)
+{
+ return (0);
+}
+static __inline u_int
+read_random(void *a __unused, u_int b __unused)
+{
+ return (0);
+}
+#endif
/*
- * Note: if you add or remove members of esource, remember to also update the
- * KASSERT regarding what valid members are in random_harvest_internal().
+ * Note: if you add or remove members of random_entropy_source, remember to also update the
+ * KASSERT regarding what valid members are in random_harvest_internal(), and remember the
+ * strings in the static array random_source_descr[] in random_harvestq.c.
+ *
+ * NOTE: complain loudly to markm@ or on the lists if this enum gets more than 32
+ * distinct values (0-31)! ENTROPYSOURCE may be == 32, but not > 32.
*/
-enum esource {
+enum random_entropy_source {
RANDOM_START = 0,
- RANDOM_WRITE = 0,
+ RANDOM_CACHED = 0,
+ /* Environmental sources */
+ RANDOM_ATTACH,
RANDOM_KEYBOARD,
RANDOM_MOUSE,
- RANDOM_NET,
+ RANDOM_NET_TUN,
+ RANDOM_NET_ETHER,
+ RANDOM_NET_NG,
RANDOM_INTERRUPT,
- RANDOM_PURE,
+ RANDOM_SWI,
+ RANDOM_FS_ATIME,
+ RANDOM_UMA, /* Special!! UMA/SLAB Allocator */
+ RANDOM_ENVIRONMENTAL_END = RANDOM_UMA,
+ /* Fast hardware random-number sources from here on. */
+ RANDOM_PURE_OCTEON,
+ RANDOM_PURE_SAFE,
+ RANDOM_PURE_GLXSB,
+ RANDOM_PURE_UBSEC,
+ RANDOM_PURE_HIFN,
+ RANDOM_PURE_RDRAND,
+ RANDOM_PURE_NEHEMIAH,
+ RANDOM_PURE_RNDTEST,
+ RANDOM_PURE_VIRTIO,
+ RANDOM_PURE_BROADCOM,
ENTROPYSOURCE
};
-void random_harvest(void *, u_int, u_int, u_int, enum esource);
-/* Allow the sysadmin to select the broad category of
- * entropy types to harvest
- */
-struct harvest_select {
- int ethernet;
- int point_to_point;
- int interrupt;
- int swi;
-};
+#define RANDOM_HARVEST_EVERYTHING_MASK ((1 << (RANDOM_ENVIRONMENTAL_END + 1)) - 1)
+
+#if defined(DEV_RANDOM)
+void random_harvest_queue(const void *, u_int, u_int, enum random_entropy_source);
+void random_harvest_fast(const void *, u_int, u_int, enum random_entropy_source);
+void random_harvest_direct(const void *, u_int, u_int, enum random_entropy_source);
+#else
+#define random_harvest_queue(a, b, c, d) do {} while (0)
+#define random_harvest_fast(a, b, c, d) do {} while (0)
+#define random_harvest_direct(a, b, c, d) do {} while (0)
+#endif
-extern struct harvest_select harvest;
+#if defined(RANDOM_ENABLE_UMA)
+#define random_harvest_fast_uma(a, b, c, d) random_harvest_fast(a, b, c, d)
+#else /* !defined(RANDOM_ENABLE_UMA) */
+#define random_harvest_fast_uma(a, b, c, d) do {} while (0)
+#endif /* defined(RANDOM_ENABLE_UMA) */
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/reboot.h b/freebsd/sys/sys/reboot.h
index 6b8e25e6..ebe688e8 100644
--- a/freebsd/sys/sys/reboot.h
+++ b/freebsd/sys/sys/reboot.h
@@ -59,6 +59,7 @@
#define RB_RESERVED1 0x40000 /* reserved for internal use of boot blocks */
#define RB_RESERVED2 0x80000 /* reserved for internal use of boot blocks */
#define RB_PAUSE 0x100000 /* pause after each output line during probe */
+#define RB_REROOT 0x200000 /* unmount the rootfs and mount it again */
#define RB_MULTIPLE 0x20000000 /* use multiple consoles */
#define RB_BOOTINFO 0x80000000 /* have `struct bootinfo *' arg */
diff --git a/freebsd/sys/sys/refcount.h b/freebsd/sys/sys/refcount.h
index b169f542..4611664e 100644
--- a/freebsd/sys/sys/refcount.h
+++ b/freebsd/sys/sys/refcount.h
@@ -29,6 +29,7 @@
#ifndef __SYS_REFCOUNT_H__
#define __SYS_REFCOUNT_H__
+#include <sys/limits.h>
#include <machine/atomic.h>
#ifdef _KERNEL
@@ -48,11 +49,8 @@ static __inline void
refcount_acquire(volatile u_int *count)
{
-#ifndef __rtems__
+ KASSERT(*count < UINT_MAX, ("refcount %p overflowed", count));
atomic_add_acq_int(count, 1);
-#else /* __rtems__ */
- atomic_add_acq_int((volatile int *) count, 1);
-#endif /* __rtems__ */
}
static __inline int
@@ -61,11 +59,7 @@ refcount_release(volatile u_int *count)
u_int old;
/* XXX: Should this have a rel membar? */
-#ifndef __rtems__
old = atomic_fetchadd_int(count, -1);
-#else /* __rtems__ */
- old = atomic_fetchadd_int((volatile int *) count, -1);
-#endif /* __rtems__ */
KASSERT(old > 0, ("negative refcount %p", count));
return (old == 1);
}
diff --git a/freebsd/sys/sys/resourcevar.h b/freebsd/sys/sys/resourcevar.h
index 3dead510..1d290aaa 100644
--- a/freebsd/sys/sys/resourcevar.h
+++ b/freebsd/sys/sys/resourcevar.h
@@ -47,21 +47,22 @@
* Locking key:
* b - created at fork, never changes
* c - locked by proc mtx
- * j - locked by proc slock
* k - only accessed by curthread
+ * w - locked by proc itim lock
+ * w2 - locked by proc prof lock
*/
struct pstats {
#define pstat_startzero p_cru
struct rusage p_cru; /* Stats for reaped children. */
- struct itimerval p_timer[3]; /* (j) Virtual-time timers. */
+ struct itimerval p_timer[3]; /* (w) Virtual-time timers. */
#define pstat_endzero pstat_startcopy
#define pstat_startcopy p_prof
struct uprof { /* Profile arguments. */
- caddr_t pr_base; /* (c + j) Buffer base. */
- u_long pr_size; /* (c + j) Buffer size. */
- u_long pr_off; /* (c + j) PC offset. */
- u_long pr_scale; /* (c + j) PC scaling. */
+ caddr_t pr_base; /* (c + w2) Buffer base. */
+ u_long pr_size; /* (c + w2) Buffer size. */
+ u_long pr_off; /* (c + w2) PC offset. */
+ u_long pr_scale; /* (c + w2) PC scaling. */
} p_prof;
#define pstat_endcopy p_start
struct timeval p_start; /* (b) Starting time. */
@@ -89,7 +90,7 @@ struct racct;
* Locking guide:
* (a) Constant from inception
* (b) Lockless, updated using atomics
- * (c) Locked by global uihashtbl_mtx
+ * (c) Locked by global uihashtbl_lock
* (d) Locked by the ui_vmsize_mtx
*/
struct uidinfo {
@@ -99,9 +100,13 @@ struct uidinfo {
long ui_sbsize; /* (b) socket buffer space consumed */
long ui_proccnt; /* (b) number of processes */
long ui_ptscnt; /* (b) number of pseudo-terminals */
+ long ui_kqcnt; /* (b) number of kqueues */
+ long ui_umtxcnt; /* (b) number of shared umtxs */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
+#ifdef RACCT
struct racct *ui_racct; /* (a) resource accounting */
+#endif
};
#define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx))
@@ -115,6 +120,11 @@ void addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks);
void addupc_task(struct thread *td, uintfptr_t pc, u_int ticks);
void calccru(struct proc *p, struct timeval *up, struct timeval *sp);
void calcru(struct proc *p, struct timeval *up, struct timeval *sp);
+#ifndef __rtems__
+int chgkqcnt(struct uidinfo *uip, int diff, rlim_t max);
+#else /* __rtems__ */
+#define chgkqcnt(uip, diff, max) 0
+#endif /* __rtems__ */
int chgproccnt(struct uidinfo *uip, int diff, rlim_t maxval);
#ifndef __rtems__
int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to,
@@ -130,19 +140,23 @@ rtems_bsd_chgsbsize(u_int *hiwat, u_int to)
#define chgsbsize(uip, hiwat, to, maxval) rtems_bsd_chgsbsize(hiwat, to)
#endif /* __rtems__ */
int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval);
+int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval);
int fuswintr(void *base);
int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
struct rlimit *limp);
struct plimit
*lim_alloc(void);
void lim_copy(struct plimit *dst, struct plimit *src);
-rlim_t lim_cur(struct proc *p, int which);
+rlim_t lim_cur(struct thread *td, int which);
+rlim_t lim_cur_proc(struct proc *p, int which);
void lim_fork(struct proc *p1, struct proc *p2);
void lim_free(struct plimit *limp);
struct plimit
*lim_hold(struct plimit *limp);
-rlim_t lim_max(struct proc *p, int which);
-void lim_rlimit(struct proc *p, int which, struct rlimit *rlp);
+rlim_t lim_max(struct thread *td, int which);
+rlim_t lim_max_proc(struct proc *p, int which);
+void lim_rlimit(struct thread *td, int which, struct rlimit *rlp);
+void lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp);
void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
struct rusage_ext *rux2);
void rucollect(struct rusage *ru, struct rusage *ru2);
@@ -157,8 +171,11 @@ struct uidinfo
void uifree(struct uidinfo *uip);
void uihashinit(void);
void uihold(struct uidinfo *uip);
+#ifdef RACCT
void ui_racct_foreach(void (*callback)(struct racct *racct,
- void *arg2, void *arg3), void *arg2, void *arg3);
+ void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
+ void *arg2, void *arg3);
+#endif
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCEVAR_H_ */
diff --git a/freebsd/sys/sys/rman.h b/freebsd/sys/sys/rman.h
index 547ff843..4de6022f 100644
--- a/freebsd/sys/sys/rman.h
+++ b/freebsd/sys/sys/rman.h
@@ -47,10 +47,11 @@
#define RF_FIRSTSHARE 0x0020 /* first in sharing list */
#define RF_PREFETCHABLE 0x0040 /* resource is prefetchable */
#define RF_OPTIONAL 0x0080 /* for bus_alloc_resources() */
+#define RF_UNMAPPED 0x0100 /* don't map resource when activating */
#define RF_ALIGNMENT_SHIFT 10 /* alignment size bit starts bit 10 */
#define RF_ALIGNMENT_MASK (0x003F << RF_ALIGNMENT_SHIFT)
- /* resource address alignemnt size bit mask */
+ /* resource address alignment size bit mask */
#define RF_ALIGNMENT_LOG2(x) ((x) << RF_ALIGNMENT_SHIFT)
#define RF_ALIGNMENT(x) (((x) & RF_ALIGNMENT_MASK) >> RF_ALIGNMENT_SHIFT)
@@ -61,6 +62,10 @@ enum rman_type { RMAN_UNINIT = 0, RMAN_GAUGE, RMAN_ARRAY };
*/
#define RM_TEXTLEN 32
+#define RM_MAX_END (~(rman_res_t)0)
+
+#define RMAN_IS_DEFAULT_RANGE(s,e) ((s) == 0 && (e) == RM_MAX_END)
+
/*
* Userspace-exported structures.
*/
@@ -70,8 +75,8 @@ struct u_resource {
uintptr_t r_device; /* device owning this resource */
char r_devname[RM_TEXTLEN]; /* device name XXX obsolete */
- u_long r_start; /* offset in resource space */
- u_long r_size; /* size in resource space */
+ rman_res_t r_start; /* offset in resource space */
+ rman_res_t r_size; /* size in resource space */
u_int r_flags; /* RF_* flags */
};
@@ -79,8 +84,8 @@ struct u_rman {
uintptr_t rm_handle; /* rman uniquifier */
char rm_descr[RM_TEXTLEN]; /* rman description */
- u_long rm_start; /* base of managed region */
- u_long rm_size; /* size of managed region */
+ rman_res_t rm_start; /* base of managed region */
+ rman_res_t rm_size; /* size of managed region */
enum rman_type rm_type; /* region type */
};
@@ -101,6 +106,7 @@ struct resource {
};
struct resource_i;
+struct resource_map;
TAILQ_HEAD(resource_head, resource_i);
@@ -108,47 +114,48 @@ struct rman {
struct resource_head rm_list;
struct mtx *rm_mtx; /* mutex used to protect rm_list */
TAILQ_ENTRY(rman) rm_link; /* link in list of all rmans */
- u_long rm_start; /* index of globally first entry */
- u_long rm_end; /* index of globally last entry */
+ rman_res_t rm_start; /* index of globally first entry */
+ rman_res_t rm_end; /* index of globally last entry */
enum rman_type rm_type; /* what type of resource this is */
const char *rm_descr; /* text descripion of this resource */
};
TAILQ_HEAD(rman_head, rman);
int rman_activate_resource(struct resource *r);
-int rman_adjust_resource(struct resource *r, u_long start, u_long end);
-int rman_await_resource(struct resource *r, int pri, int timo);
-int rman_first_free_region(struct rman *rm, u_long *start, u_long *end);
+int rman_adjust_resource(struct resource *r, rman_res_t start, rman_res_t end);
+int rman_first_free_region(struct rman *rm, rman_res_t *start, rman_res_t *end);
bus_space_handle_t rman_get_bushandle(struct resource *);
bus_space_tag_t rman_get_bustag(struct resource *);
-u_long rman_get_end(struct resource *);
-struct device *rman_get_device(struct resource *);
+rman_res_t rman_get_end(struct resource *);
+device_t rman_get_device(struct resource *);
u_int rman_get_flags(struct resource *);
+void rman_get_mapping(struct resource *, struct resource_map *);
int rman_get_rid(struct resource *);
-u_long rman_get_size(struct resource *);
-u_long rman_get_start(struct resource *);
+rman_res_t rman_get_size(struct resource *);
+rman_res_t rman_get_start(struct resource *);
void *rman_get_virtual(struct resource *);
int rman_deactivate_resource(struct resource *r);
int rman_fini(struct rman *rm);
int rman_init(struct rman *rm);
int rman_init_from_resource(struct rman *rm, struct resource *r);
-int rman_last_free_region(struct rman *rm, u_long *start, u_long *end);
+int rman_last_free_region(struct rman *rm, rman_res_t *start, rman_res_t *end);
uint32_t rman_make_alignment_flags(uint32_t size);
-int rman_manage_region(struct rman *rm, u_long start, u_long end);
+int rman_manage_region(struct rman *rm, rman_res_t start, rman_res_t end);
int rman_is_region_manager(struct resource *r, struct rman *rm);
int rman_release_resource(struct resource *r);
-struct resource *rman_reserve_resource(struct rman *rm, u_long start,
- u_long end, u_long count,
- u_int flags, struct device *dev);
-struct resource *rman_reserve_resource_bound(struct rman *rm, u_long start,
- u_long end, u_long count, u_long bound,
- u_int flags, struct device *dev);
+struct resource *rman_reserve_resource(struct rman *rm, rman_res_t start,
+ rman_res_t end, rman_res_t count,
+ u_int flags, device_t dev);
+struct resource *rman_reserve_resource_bound(struct rman *rm, rman_res_t start,
+ rman_res_t end, rman_res_t count, rman_res_t bound,
+ u_int flags, device_t dev);
void rman_set_bushandle(struct resource *_r, bus_space_handle_t _h);
void rman_set_bustag(struct resource *_r, bus_space_tag_t _t);
-void rman_set_device(struct resource *_r, struct device *_dev);
-void rman_set_end(struct resource *_r, u_long _end);
+void rman_set_device(struct resource *_r, device_t _dev);
+void rman_set_end(struct resource *_r, rman_res_t _end);
+void rman_set_mapping(struct resource *, struct resource_map *);
void rman_set_rid(struct resource *_r, int _rid);
-void rman_set_start(struct resource *_r, u_long _start);
+void rman_set_start(struct resource *_r, rman_res_t _start);
void rman_set_virtual(struct resource *_r, void *_v);
extern struct rman_head rman_head;
diff --git a/freebsd/sys/sys/rmlock.h b/freebsd/sys/sys/rmlock.h
index e71789ac..efd60597 100644
--- a/freebsd/sys/sys/rmlock.h
+++ b/freebsd/sys/sys/rmlock.h
@@ -40,17 +40,18 @@
#ifdef _KERNEL
/*
- * Flags passed to rm_init(9).
+ * Flags passed to rm_init_flags(9).
*/
#define RM_NOWITNESS 0x00000001
#define RM_RECURSE 0x00000002
#define RM_SLEEPABLE 0x00000004
+#define RM_NEW 0x00000008
#ifndef __rtems__
void rm_init(struct rmlock *rm, const char *name);
void rm_init_flags(struct rmlock *rm, const char *name, int opts);
void rm_destroy(struct rmlock *rm);
-int rm_wowned(struct rmlock *rm);
+int rm_wowned(const struct rmlock *rm);
void rm_sysinit(void *arg);
void rm_sysinit_flags(void *arg);
@@ -67,7 +68,7 @@ int _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker,
int trylock);
void _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker);
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
-void _rm_assert(struct rmlock *rm, int what, const char *file,
+void _rm_assert(const struct rmlock *rm, int what, const char *file,
int line);
#endif
@@ -99,17 +100,18 @@ void _rm_assert(struct rmlock *rm, int what, const char *file,
tick_sbt * (timo), 0, C_HARDCLOCK)
#else /* __rtems__ */
- #define rm_init(rm, name) rw_init(rm, name)
- #define rm_init_flags(rm, name, opts) rw_init_flags(rm, name, opts)
- #define rm_destroy(rm) rw_destroy(rm)
- #define rm_wowned(rm) rw_wowned(rm)
- #define rm_sysinit(arg) rw_sysinit(arg)
- #define rm_sysinit_flags(arg) rw_sysinit_flags(arg)
-
- #define rm_wlock(rm) rw_wlock((rm))
- #define rm_wunlock(rm) rw_wunlock((rm))
- #define rm_rlock(rm,tracker) rw_rlock((rm))
- #define rm_runlock(rm,tracker) rw_runlock((rm))
+#include <sys/rwlock.h>
+#define rm_init rw_init
+#define rm_init_flags rw_init_flags
+#define rm_destroy rw_destroy
+#define rm_wowned rw_wowned
+#define rm_sysinit rw_sysinit
+#define rm_sysinit_flags rw_sysinit_flags
+#define rm_wlock rw_wlock
+#define rm_wunlock rw_wunlock
+#define rm_rlock(rm, tracker) do { (void)tracker; rw_rlock(rm); } while (0)
+#define rm_runlock(rm, tracker) do { (void)tracker; rw_runlock(rm); } while (0)
+#define rm_sleep rw_sleep
#endif /* __rtems__ */
diff --git a/freebsd/sys/sys/rwlock.h b/freebsd/sys/sys/rwlock.h
index 2dd1a257..e0003840 100644
--- a/freebsd/sys/sys/rwlock.h
+++ b/freebsd/sys/sys/rwlock.h
@@ -99,12 +99,12 @@
/* Acquire a write lock. */
#define __rw_wlock(rw, tid, file, line) do { \
uintptr_t _tid = (uintptr_t)(tid); \
- \
- if (!_rw_write_lock((rw), _tid)) \
+ \
+ if ((rw)->rw_lock != RW_UNLOCKED || !_rw_write_lock((rw), _tid))\
_rw_wlock_hard((rw), _tid, (file), (line)); \
else \
- LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, \
- rw, 0, 0, (file), (line)); \
+ LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, \
+ 0, 0, file, line, LOCKSTAT_WRITER); \
} while (0)
/* Release a write lock. */
@@ -113,8 +113,12 @@
\
if ((rw)->rw_recurse) \
(rw)->rw_recurse--; \
- else if (!_rw_write_unlock((rw), _tid)) \
- _rw_wunlock_hard((rw), _tid, (file), (line)); \
+ else { \
+ LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, \
+ LOCKSTAT_WRITER); \
+ if ((rw)->rw_lock != _tid || !_rw_write_unlock((rw), _tid))\
+ _rw_wunlock_hard((rw), _tid, (file), (line)); \
+ } \
} while (0)
#endif /* __rtems__ */
@@ -123,8 +127,30 @@
* external API and should not be called directly. Wrapper macros should
* be used instead.
*/
-
-#define rw_init(rw, name) rw_init_flags((rw), (name), 0)
+#ifndef __rtems__
+void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts);
+void _rw_destroy(volatile uintptr_t *c);
+void rw_sysinit(void *arg);
+void rw_sysinit_flags(void *arg);
+int _rw_wowned(const volatile uintptr_t *c);
+void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line);
+int __rw_try_wlock(volatile uintptr_t *c, const char *file, int line);
+void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line);
+void __rw_rlock(volatile uintptr_t *c, const char *file, int line);
+int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line);
+void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line);
+void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
+ int line);
+void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid,
+ const char *file, int line);
+int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line);
+void __rw_downgrade(volatile uintptr_t *c, const char *file, int line);
+#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
+void __rw_assert(const volatile uintptr_t *c, int what, const char *file,
+ int line);
+#endif
+#else /* __rtems__ */
+#define rw_init(rw, n) rw_init_flags(rw, n, 0)
void rw_init_flags(struct rwlock *rw, const char *name, int opts);
void rw_destroy(struct rwlock *rw);
void rw_sysinit(void *arg);
@@ -136,15 +162,50 @@ void _rw_wunlock(struct rwlock *rw, const char *file, int line);
void _rw_rlock(struct rwlock *rw, const char *file, int line);
int _rw_try_rlock(struct rwlock *rw, const char *file, int line);
void _rw_runlock(struct rwlock *rw, const char *file, int line);
-void _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file,
- int line);
-void _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file,
- int line);
int _rw_try_upgrade(struct rwlock *rw, const char *file, int line);
void _rw_downgrade(struct rwlock *rw, const char *file, int line);
+#endif /* __rtems__ */
+
+#ifndef __rtems__
+/*
+ * Top-level macros to provide lock cookie once the actual rwlock is passed.
+ * They will also prevent passing a malformed object to the rwlock KPI by
+ * failing compilation as the rw_lock reserved member will not be found.
+ */
+#define rw_init(rw, n) \
+ _rw_init_flags(&(rw)->rw_lock, n, 0)
+#define rw_init_flags(rw, n, o) \
+ _rw_init_flags(&(rw)->rw_lock, n, o)
+#define rw_destroy(rw) \
+ _rw_destroy(&(rw)->rw_lock)
+#define rw_wowned(rw) \
+ _rw_wowned(&(rw)->rw_lock)
+#define _rw_wlock(rw, f, l) \
+ _rw_wlock_cookie(&(rw)->rw_lock, f, l)
+#define _rw_try_wlock(rw, f, l) \
+ __rw_try_wlock(&(rw)->rw_lock, f, l)
+#define _rw_wunlock(rw, f, l) \
+ _rw_wunlock_cookie(&(rw)->rw_lock, f, l)
+#define _rw_rlock(rw, f, l) \
+ __rw_rlock(&(rw)->rw_lock, f, l)
+#define _rw_try_rlock(rw, f, l) \
+ __rw_try_rlock(&(rw)->rw_lock, f, l)
+#define _rw_runlock(rw, f, l) \
+ _rw_runlock_cookie(&(rw)->rw_lock, f, l)
+#define _rw_wlock_hard(rw, t, f, l) \
+ __rw_wlock_hard(&(rw)->rw_lock, t, f, l)
+#define _rw_wunlock_hard(rw, t, f, l) \
+ __rw_wunlock_hard(&(rw)->rw_lock, t, f, l)
+#define _rw_try_upgrade(rw, f, l) \
+ __rw_try_upgrade(&(rw)->rw_lock, f, l)
+#define _rw_downgrade(rw, f, l) \
+ __rw_downgrade(&(rw)->rw_lock, f, l)
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
-void _rw_assert(struct rwlock *rw, int what, const char *file, int line);
+#define _rw_assert(rw, w, f, l) \
+ __rw_assert(&(rw)->rw_lock, w, f, l)
#endif
+#endif /* __rtems__ */
+
/*
* Public interface for lock operations.
@@ -175,17 +236,18 @@ void _rw_assert(struct rwlock *rw, int what, const char *file, int line);
rw_runlock(rw); \
} while (0)
#define rw_sleep(chan, rw, pri, wmesg, timo) \
- _sleep((chan), &(rw)->lock_object, (pri), (wmesg), (timo))
+ _sleep((chan), &(rw)->lock_object, (pri), (wmesg), \
+ tick_sbt * (timo), 0, C_HARDCLOCK)
-#define rw_initialized(rw) lock_initalized(&(rw)->lock_object)
+#define rw_initialized(rw) lock_initialized(&(rw)->lock_object)
struct rw_args {
- struct rwlock *ra_rw;
+ void *ra_rw;
const char *ra_desc;
};
struct rw_args_flags {
- struct rwlock *ra_rw;
+ void *ra_rw;
const char *ra_desc;
int ra_flags;
};
@@ -198,7 +260,7 @@ struct rw_args_flags {
SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
rw_sysinit, &name##_args); \
SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
- rw_destroy, (rw))
+ _rw_destroy, __DEVOLATILE(void *, &(rw)->rw_lock))
#define RW_SYSINIT_FLAGS(name, rw, desc, flags) \
@@ -210,7 +272,7 @@ struct rw_args_flags {
SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
rw_sysinit_flags, &name##_args); \
SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \
- rw_destroy, (rw))
+ _rw_destroy, __DEVOLATILE(void *, &(rw)->rw_lock))
/*
* Options passed to rw_init_flags().
@@ -220,6 +282,7 @@ struct rw_args_flags {
#define RW_NOWITNESS 0x04
#define RW_QUIET 0x08
#define RW_RECURSE 0x10
+#define RW_NEW 0x20
/*
* The INVARIANTS-enabled rw_assert() functionality.
diff --git a/freebsd/sys/sys/sbuf.h b/freebsd/sys/sys/sbuf.h
index 9816a4cd..580cbd2e 100644
--- a/freebsd/sys/sys/sbuf.h
+++ b/freebsd/sys/sys/sbuf.h
@@ -48,6 +48,7 @@ struct sbuf {
ssize_t s_len; /* current length of string */
#define SBUF_FIXEDLEN 0x00000000 /* fixed length buffer (default) */
#define SBUF_AUTOEXTEND 0x00000001 /* automatically extend buffer */
+#define SBUF_INCLUDENUL 0x00000002 /* nulterm byte is counted in len */
#define SBUF_USRFLAGMSK 0x0000ffff /* mask of flags the user may specify */
#define SBUF_DYNAMIC 0x00010000 /* s_buf must be freed */
#define SBUF_FINISHED 0x00020000 /* set by sbuf_finish() */
@@ -57,6 +58,14 @@ struct sbuf {
ssize_t s_sect_len; /* current length of section */
};
+#ifndef HD_COLUMN_MASK
+#define HD_COLUMN_MASK 0xff
+#define HD_DELIM_MASK 0xff00
+#define HD_OMIT_COUNT (1 << 16)
+#define HD_OMIT_HEX (1 << 17)
+#define HD_OMIT_CHARS (1 << 18)
+#endif /* HD_COLUMN_MASK */
+
__BEGIN_DECLS
/*
* API functions
@@ -64,6 +73,9 @@ __BEGIN_DECLS
struct sbuf *sbuf_new(struct sbuf *, char *, int, int);
#define sbuf_new_auto() \
sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND)
+int sbuf_get_flags(struct sbuf *);
+void sbuf_clear_flags(struct sbuf *, int);
+void sbuf_set_flags(struct sbuf *, int);
void sbuf_clear(struct sbuf *);
int sbuf_setpos(struct sbuf *, ssize_t);
int sbuf_bcat(struct sbuf *, const void *, size_t);
@@ -85,6 +97,8 @@ int sbuf_done(const struct sbuf *);
void sbuf_delete(struct sbuf *);
void sbuf_start_section(struct sbuf *, ssize_t *);
ssize_t sbuf_end_section(struct sbuf *, ssize_t, size_t, int);
+void sbuf_hexdump(struct sbuf *, const void *, int, const char *,
+ int);
#ifdef _KERNEL
struct uio;
diff --git a/freebsd/sys/sys/sdt.h b/freebsd/sys/sys/sdt.h
index ca820f68..25423d76 100644
--- a/freebsd/sys/sys/sdt.h
+++ b/freebsd/sys/sys/sdt.h
@@ -161,7 +161,7 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype);
extern struct sdt_probe sdt_##prov##_##mod##_##func##_##name[1]
#define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) do { \
- if (sdt_##prov##_##mod##_##func##_##name->id) \
+ if (__predict_false(sdt_##prov##_##mod##_##func##_##name->id)) \
(*sdt_probe_func)(sdt_##prov##_##mod##_##func##_##name->id, \
(uintptr_t) arg0, (uintptr_t) arg1, (uintptr_t) arg2, \
(uintptr_t) arg3, (uintptr_t) arg4); \
@@ -398,7 +398,7 @@ struct sdt_probe {
struct sdt_provider *prov; /* Ptr to the provider structure. */
TAILQ_ENTRY(sdt_probe)
probe_entry; /* SDT probe list entry. */
- TAILQ_HEAD(argtype_list_head, sdt_argtype) argtype_list;
+ TAILQ_HEAD(, sdt_argtype) argtype_list;
const char *mod;
const char *func;
const char *name;
diff --git a/freebsd/sys/sys/seq.h b/freebsd/sys/sys/seq.h
new file mode 100644
index 00000000..82efbdf1
--- /dev/null
+++ b/freebsd/sys/sys/seq.h
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2014 Mateusz Guzik <mjg@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SEQ_H_
+#define _SYS_SEQ_H_
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#endif
+#include <sys/types.h>
+
+/*
+ * seq_t may be included in structs visible to userspace
+ */
+typedef uint32_t seq_t;
+
+#ifdef _KERNEL
+
+/*
+ * Typical usage:
+ *
+ * writers:
+ * lock_exclusive(&obj->lock);
+ * seq_write_begin(&obj->seq);
+ * .....
+ * seq_write_end(&obj->seq);
+ * unlock_exclusive(&obj->unlock);
+ *
+ * readers:
+ * obj_t lobj;
+ * seq_t seq;
+ *
+ * for (;;) {
+ * seq = seq_read(&gobj->seq);
+ * lobj = gobj;
+ * if (seq_consistent(&gobj->seq, seq))
+ * break;
+ * cpu_spinwait();
+ * }
+ * foo(lobj);
+ */
+
+/* A hack to get MPASS macro */
+#include <rtems/bsd/sys/lock.h>
+
+#include <machine/cpu.h>
+
+static __inline bool
+seq_in_modify(seq_t seqp)
+{
+
+ return (seqp & 1);
+}
+
+static __inline void
+seq_write_begin(seq_t *seqp)
+{
+
+ MPASS(!seq_in_modify(*seqp));
+ *seqp += 1;
+ atomic_thread_fence_rel();
+}
+
+static __inline void
+seq_write_end(seq_t *seqp)
+{
+
+ atomic_store_rel_int(seqp, *seqp + 1);
+ MPASS(!seq_in_modify(*seqp));
+}
+
+static __inline seq_t
+seq_read(const seq_t *seqp)
+{
+ seq_t ret;
+
+ for (;;) {
+ ret = atomic_load_acq_int(__DECONST(seq_t *, seqp));
+ if (seq_in_modify(ret)) {
+ cpu_spinwait();
+ continue;
+ }
+ break;
+ }
+
+ return (ret);
+}
+
+static __inline seq_t
+seq_consistent_nomb(const seq_t *seqp, seq_t oldseq)
+{
+
+ return (*seqp == oldseq);
+}
+
+static __inline seq_t
+seq_consistent(const seq_t *seqp, seq_t oldseq)
+{
+
+ atomic_thread_fence_acq();
+ return (seq_consistent_nomb(seqp, oldseq));
+}
+
+#endif /* _KERNEL */
+#endif /* _SYS_SEQ_H_ */
diff --git a/freebsd/sys/sys/sf_buf.h b/freebsd/sys/sys/sf_buf.h
index af420652..b5970d95 100644
--- a/freebsd/sys/sys/sf_buf.h
+++ b/freebsd/sys/sys/sf_buf.h
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2003-2004 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
@@ -29,7 +30,158 @@
#ifndef _SYS_SF_BUF_H_
#define _SYS_SF_BUF_H_
+struct sfstat { /* sendfile statistics */
+ uint64_t sf_syscalls; /* times sendfile was called */
+ uint64_t sf_noiocnt; /* times sendfile didn't require I/O */
+ uint64_t sf_iocnt; /* times sendfile had to do disk I/O */
+ uint64_t sf_pages_read; /* pages read as part of a request */
+ uint64_t sf_pages_valid; /* pages were valid for a request */
+ uint64_t sf_rhpages_requested; /* readahead pages requested */
+ uint64_t sf_rhpages_read; /* readahead pages read */
+ uint64_t sf_busy; /* times aborted on a busy page */
+ uint64_t sf_allocfail; /* times sfbuf allocation failed */
+ uint64_t sf_allocwait; /* times sfbuf allocation had to wait */
+};
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_page.h>
+
+/*
+ * Sf_bufs, or sendfile(2) buffers provide a vm_page that is mapped
+ * into kernel address space. Note, that they aren't used only
+ * by sendfile(2)!
+ *
+ * Sf_bufs could be implemented as a feature of vm_page_t, but that
+ * would require growth of the structure. That's why they are implemented
+ * as a separate hash indexed by vm_page address. Implementation lives in
+ * kern/subr_sfbuf.c. Meanwhile, most 64-bit machines have a physical map,
+ * so they don't require this hash at all, thus ignore subr_sfbuf.c.
+ *
+ * Different 32-bit architectures demand different requirements on sf_buf
+ * hash and functions. They request features in machine/vmparam.h, which
+ * enable parts of this file. They can also optionally provide helpers in
+ * machine/sf_buf.h
+ *
+ * Defines are:
+ * SFBUF This machine requires sf_buf hash.
+ * subr_sfbuf.c should be compiled.
+ * SFBUF_CPUSET This machine can perform SFB_CPUPRIVATE mappings,
+ * that do no invalidate cache on the rest of CPUs.
+ * SFBUF_NOMD This machine doesn't have machine/sf_buf.h
+ *
+ * SFBUF_OPTIONAL_DIRECT_MAP Value of this define is used as boolean
+ * variable that tells whether machine is
+ * capable of direct map or not at runtime.
+ * SFBUF_MAP This machine provides its own sf_buf_map() and
+ * sf_buf_unmap().
+ * SFBUF_PROCESS_PAGE This machine provides sf_buf_process_page()
+ * function.
+ */
+
+#ifdef SFBUF
+#if defined(SMP) && defined(SFBUF_CPUSET)
+#include <sys/_cpuset.h>
+#endif
+#include <sys/queue.h>
+
+struct sf_buf {
+ LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
+ TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
+ vm_page_t m; /* currently mapped page */
+ vm_offset_t kva; /* va of mapping */
+ int ref_count; /* usage of this mapping */
+#if defined(SMP) && defined(SFBUF_CPUSET)
+ cpuset_t cpumask; /* where mapping is valid */
+#endif
+};
+#else /* ! SFBUF */
+struct sf_buf;
+#endif /* SFBUF */
+
+#ifndef SFBUF_NOMD
#include <machine/sf_buf.h>
+#endif
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+#include <machine/md_var.h>
+#endif
+
+#ifdef SFBUF
+struct sf_buf *sf_buf_alloc(struct vm_page *, int);
+void sf_buf_free(struct sf_buf *);
+void sf_buf_ref(struct sf_buf *);
+
+static inline vm_offset_t
+sf_buf_kva(struct sf_buf *sf)
+{
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+ if (SFBUF_OPTIONAL_DIRECT_MAP)
+ return (SFBUF_PHYS_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
+#endif
+
+ return (sf->kva);
+}
+
+static inline vm_page_t
+sf_buf_page(struct sf_buf *sf)
+{
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+ if (SFBUF_OPTIONAL_DIRECT_MAP)
+ return ((vm_page_t)sf);
+#endif
+
+ return (sf->m);
+}
+
+#ifndef SFBUF_MAP
+#include <vm/pmap.h>
+
+static inline void
+sf_buf_map(struct sf_buf *sf, int flags)
+{
+
+ pmap_qenter(sf->kva, &sf->m, 1);
+}
+
+static inline int
+sf_buf_unmap(struct sf_buf *sf)
+{
+
+ return (0);
+}
+#endif /* SFBUF_MAP */
+
+#if defined(SMP) && defined(SFBUF_CPUSET)
+void sf_buf_shootdown(struct sf_buf *, int);
+#endif
+
+#ifdef SFBUF_PROCESS_PAGE
+boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
+#endif
+
+#else /* ! SFBUF */
+
+static inline struct sf_buf *
+sf_buf_alloc(struct vm_page *m, int pri)
+{
+
+ return ((struct sf_buf *)m);
+}
+
+static inline void
+sf_buf_free(struct sf_buf *sf)
+{
+}
+
+static inline void
+sf_buf_ref(struct sf_buf *sf)
+{
+}
+#endif /* SFBUF */
/*
* Options to sf_buf_alloc() are specified through its flags argument. This
@@ -42,15 +194,10 @@
#define SFB_DEFAULT 0
#define SFB_NOWAIT 4 /* Return NULL if all bufs are used. */
-struct vm_page;
-
-extern int nsfbufs; /* Number of sendfile(2) bufs alloced */
-extern int nsfbufspeak; /* Peak of nsfbufsused */
-extern int nsfbufsused; /* Number of sendfile(2) bufs in use */
-
-struct sf_buf *
- sf_buf_alloc(struct vm_page *m, int flags);
-void sf_buf_free(struct sf_buf *sf);
-void sf_buf_mext(void *addr, void *args);
-
+extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
+#define SFSTAT_ADD(name, val) \
+ counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\
+ (val))
+#define SFSTAT_INC(name) SFSTAT_ADD(name, 1)
+#endif /* _KERNEL */
#endif /* !_SYS_SF_BUF_H_ */
diff --git a/freebsd/sys/sys/signalvar.h b/freebsd/sys/sys/signalvar.h
new file mode 100644
index 00000000..a2a1d0d8
--- /dev/null
+++ b/freebsd/sys/sys/signalvar.h
@@ -0,0 +1,403 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)signalvar.h 8.6 (Berkeley) 2/19/95
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SIGNALVAR_H_
+#define _SYS_SIGNALVAR_H_
+
+#include <sys/queue.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#include <sys/signal.h>
+
+#ifndef __rtems__
+/*
+ * Kernel signal definitions and data structures.
+ */
+
+/*
+ * Logical process signal actions and state, needed only within the process
+ * The mapping between sigacts and proc structures is 1:1 except for rfork()
+ * processes masquerading as threads which use one structure for the whole
+ * group. All members are locked by the included mutex. The reference count
+ * and mutex must be last for the bcopy in sigacts_copy() to work.
+ */
+struct sigacts {
+ sig_t ps_sigact[_SIG_MAXSIG]; /* Disposition of signals. */
+ sigset_t ps_catchmask[_SIG_MAXSIG]; /* Signals to be blocked. */
+ sigset_t ps_sigonstack; /* Signals to take on sigstack. */
+ sigset_t ps_sigintr; /* Signals that interrupt syscalls. */
+ sigset_t ps_sigreset; /* Signals that reset when caught. */
+ sigset_t ps_signodefer; /* Signals not masked while handled. */
+ sigset_t ps_siginfo; /* Signals that want SA_SIGINFO args. */
+ sigset_t ps_sigignore; /* Signals being ignored. */
+ sigset_t ps_sigcatch; /* Signals being caught by user. */
+ sigset_t ps_freebsd4; /* Signals using freebsd4 ucontext. */
+ sigset_t ps_osigset; /* Signals using <= 3.x osigset_t. */
+ sigset_t ps_usertramp; /* SunOS compat; libc sigtramp. XXX */
+ int ps_flag;
+ u_int ps_refcnt;
+ struct mtx ps_mtx;
+};
+
+#define PS_NOCLDWAIT 0x0001 /* No zombies if child dies */
+#define PS_NOCLDSTOP 0x0002 /* No SIGCHLD when children stop. */
+#define PS_CLDSIGIGN 0x0004 /* The SIGCHLD handler is SIG_IGN. */
+
+#ifdef _KERNEL
+
+#ifdef COMPAT_43
+typedef struct {
+ struct osigcontext si_sc;
+ int si_signo;
+ int si_code;
+ union sigval si_value;
+} osiginfo_t;
+
+struct osigaction {
+ union {
+ void (*__sa_handler)(int);
+ void (*__sa_sigaction)(int, osiginfo_t *, void *);
+ } __sigaction_u; /* signal handler */
+ osigset_t sa_mask; /* signal mask to apply */
+ int sa_flags; /* see signal options below */
+};
+
+typedef void __osiginfohandler_t(int, osiginfo_t *, void *);
+#endif /* COMPAT_43 */
+
+/* additional signal action values, used only temporarily/internally */
+#define SIG_CATCH ((__sighandler_t *)2)
+/* #define SIG_HOLD ((__sighandler_t *)3) See signal.h */
+
+/*
+ * get signal action for process and signal; currently only for current process
+ */
+#define SIGACTION(p, sig) (p->p_sigacts->ps_sigact[_SIG_IDX(sig)])
+
+#endif /* _KERNEL */
+
+/*
+ * sigset_t manipulation macros.
+ */
+#define SIGADDSET(set, signo) \
+ ((set).__bits[_SIG_WORD(signo)] |= _SIG_BIT(signo))
+
+#define SIGDELSET(set, signo) \
+ ((set).__bits[_SIG_WORD(signo)] &= ~_SIG_BIT(signo))
+
+#define SIGEMPTYSET(set) \
+ do { \
+ int __i; \
+ for (__i = 0; __i < _SIG_WORDS; __i++) \
+ (set).__bits[__i] = 0; \
+ } while (0)
+
+#define SIGFILLSET(set) \
+ do { \
+ int __i; \
+ for (__i = 0; __i < _SIG_WORDS; __i++) \
+ (set).__bits[__i] = ~0U; \
+ } while (0)
+
+#define SIGISMEMBER(set, signo) \
+ ((set).__bits[_SIG_WORD(signo)] & _SIG_BIT(signo))
+
+#define SIGISEMPTY(set) (__sigisempty(&(set)))
+#define SIGNOTEMPTY(set) (!__sigisempty(&(set)))
+
+#define SIGSETEQ(set1, set2) (__sigseteq(&(set1), &(set2)))
+#define SIGSETNEQ(set1, set2) (!__sigseteq(&(set1), &(set2)))
+
+#define SIGSETOR(set1, set2) \
+ do { \
+ int __i; \
+ for (__i = 0; __i < _SIG_WORDS; __i++) \
+ (set1).__bits[__i] |= (set2).__bits[__i]; \
+ } while (0)
+
+#define SIGSETAND(set1, set2) \
+ do { \
+ int __i; \
+ for (__i = 0; __i < _SIG_WORDS; __i++) \
+ (set1).__bits[__i] &= (set2).__bits[__i]; \
+ } while (0)
+
+#define SIGSETNAND(set1, set2) \
+ do { \
+ int __i; \
+ for (__i = 0; __i < _SIG_WORDS; __i++) \
+ (set1).__bits[__i] &= ~(set2).__bits[__i]; \
+ } while (0)
+
+#define SIGSETLO(set1, set2) ((set1).__bits[0] = (set2).__bits[0])
+#define SIGSETOLD(set, oset) ((set).__bits[0] = (oset))
+
+#define SIG_CANTMASK(set) \
+ SIGDELSET(set, SIGKILL), SIGDELSET(set, SIGSTOP)
+
+#define SIG_STOPSIGMASK(set) \
+ SIGDELSET(set, SIGSTOP), SIGDELSET(set, SIGTSTP), \
+ SIGDELSET(set, SIGTTIN), SIGDELSET(set, SIGTTOU)
+
+#define SIG_CONTSIGMASK(set) \
+ SIGDELSET(set, SIGCONT)
+
+#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
+
+#define SIG2OSIG(sig, osig) (osig = (sig).__bits[0])
+#define OSIG2SIG(osig, sig) SIGEMPTYSET(sig); (sig).__bits[0] = osig
+
+static __inline int
+__sigisempty(sigset_t *set)
+{
+ int i;
+
+ for (i = 0; i < _SIG_WORDS; i++) {
+ if (set->__bits[i])
+ return (0);
+ }
+ return (1);
+}
+
+static __inline int
+__sigseteq(sigset_t *set1, sigset_t *set2)
+{
+ int i;
+
+ for (i = 0; i < _SIG_WORDS; i++) {
+ if (set1->__bits[i] != set2->__bits[i])
+ return (0);
+ }
+ return (1);
+}
+
+#ifdef COMPAT_FREEBSD6
+struct osigevent {
+ int sigev_notify; /* Notification type */
+ union {
+ int __sigev_signo; /* Signal number */
+ int __sigev_notify_kqueue;
+ } __sigev_u;
+ union sigval sigev_value; /* Signal value */
+};
+#endif
+
+typedef struct ksiginfo {
+ TAILQ_ENTRY(ksiginfo) ksi_link;
+ siginfo_t ksi_info;
+ int ksi_flags;
+ struct sigqueue *ksi_sigq;
+} ksiginfo_t;
+
+#define ksi_signo ksi_info.si_signo
+#define ksi_errno ksi_info.si_errno
+#define ksi_code ksi_info.si_code
+#define ksi_pid ksi_info.si_pid
+#define ksi_uid ksi_info.si_uid
+#define ksi_status ksi_info.si_status
+#define ksi_addr ksi_info.si_addr
+#define ksi_value ksi_info.si_value
+#define ksi_band ksi_info.si_band
+#define ksi_trapno ksi_info.si_trapno
+#define ksi_overrun ksi_info.si_overrun
+#define ksi_timerid ksi_info.si_timerid
+#define ksi_mqd ksi_info.si_mqd
+
+/* bits for ksi_flags */
+#define KSI_TRAP 0x01 /* Generated by trap. */
+#define KSI_EXT 0x02 /* Externally managed ksi. */
+#define KSI_INS 0x04 /* Directly insert ksi, not the copy */
+#define KSI_SIGQ 0x08 /* Generated by sigqueue, might ret EGAIN. */
+#define KSI_HEAD 0x10 /* Insert into head, not tail. */
+#define KSI_COPYMASK (KSI_TRAP|KSI_SIGQ)
+
+#define KSI_ONQ(ksi) ((ksi)->ksi_sigq != NULL)
+
+typedef struct sigqueue {
+ sigset_t sq_signals; /* All pending signals. */
+ sigset_t sq_kill; /* Legacy depth 1 queue. */
+ TAILQ_HEAD(, ksiginfo) sq_list;/* Queued signal info. */
+ struct proc *sq_proc;
+ int sq_flags;
+} sigqueue_t;
+
+/* Flags for ksi_flags */
+#define SQ_INIT 0x01
+
+#ifdef _KERNEL
+
+/* Return nonzero if process p has an unmasked pending signal. */
+#define SIGPENDING(td) \
+ ((!SIGISEMPTY((td)->td_siglist) && \
+ !sigsetmasked(&(td)->td_siglist, &(td)->td_sigmask)) || \
+ (!SIGISEMPTY((td)->td_proc->p_siglist) && \
+ !sigsetmasked(&(td)->td_proc->p_siglist, &(td)->td_sigmask)))
+/*
+ * Return the value of the pseudo-expression ((*set & ~*mask) != 0). This
+ * is an optimized version of SIGISEMPTY() on a temporary variable
+ * containing SIGSETNAND(*set, *mask).
+ */
+static __inline int
+sigsetmasked(sigset_t *set, sigset_t *mask)
+{
+ int i;
+
+ for (i = 0; i < _SIG_WORDS; i++) {
+ if (set->__bits[i] & ~mask->__bits[i])
+ return (0);
+ }
+ return (1);
+}
+
+#define ksiginfo_init(ksi) \
+do { \
+ bzero(ksi, sizeof(ksiginfo_t)); \
+} while(0)
+
+#define ksiginfo_init_trap(ksi) \
+do { \
+ ksiginfo_t *kp = ksi; \
+ bzero(kp, sizeof(ksiginfo_t)); \
+ kp->ksi_flags |= KSI_TRAP; \
+} while(0)
+
+static __inline void
+ksiginfo_copy(ksiginfo_t *src, ksiginfo_t *dst)
+{
+ (dst)->ksi_info = src->ksi_info;
+ (dst)->ksi_flags = (src->ksi_flags & KSI_COPYMASK);
+}
+
+static __inline void
+ksiginfo_set_sigev(ksiginfo_t *dst, struct sigevent *sigev)
+{
+ dst->ksi_signo = sigev->sigev_signo;
+ dst->ksi_value = sigev->sigev_value;
+}
+
+struct pgrp;
+struct proc;
+struct sigio;
+struct thread;
+
+/*
+ * Lock the pointers for a sigio object in the underlying objects of
+ * a file descriptor.
+ */
+#define SIGIO_LOCK() mtx_lock(&sigio_lock)
+#define SIGIO_TRYLOCK() mtx_trylock(&sigio_lock)
+#define SIGIO_UNLOCK() mtx_unlock(&sigio_lock)
+#define SIGIO_LOCKED() mtx_owned(&sigio_lock)
+#define SIGIO_ASSERT(type) mtx_assert(&sigio_lock, type)
+
+extern struct mtx sigio_lock;
+
+/* Flags for kern_sigprocmask(). */
+#define SIGPROCMASK_OLD 0x0001
+#define SIGPROCMASK_PROC_LOCKED 0x0002
+#define SIGPROCMASK_PS_LOCKED 0x0004
+
+/*
+ * Modes for sigdeferstop(). Manages behaviour of
+ * thread_suspend_check() in the region delimited by
+ * sigdeferstop()/sigallowstop(). Must be restored to
+ * SIGDEFERSTOP_OFF before returning to userspace.
+ */
+#define SIGDEFERSTOP_NOP 0 /* continue doing whatever is done now */
+#define SIGDEFERSTOP_OFF 1 /* stop ignoring STOPs */
+#define SIGDEFERSTOP_SILENT 2 /* silently ignore STOPs */
+#define SIGDEFERSTOP_EINTR 3 /* ignore STOPs, return EINTR */
+#define SIGDEFERSTOP_ERESTART 4 /* ignore STOPs, return ERESTART */
+
+#define SIGDEFERSTOP_VAL_NCHG (-1) /* placeholder indicating no state change */
+int sigdeferstop_impl(int mode);
+void sigallowstop_impl(int prev);
+
+static inline int
+sigdeferstop(int mode)
+{
+
+ if (mode == SIGDEFERSTOP_NOP)
+ return (SIGDEFERSTOP_VAL_NCHG);
+ return (sigdeferstop_impl(mode));
+}
+
+static inline void
+sigallowstop(int prev)
+{
+
+ if (prev == SIGDEFERSTOP_VAL_NCHG)
+ return;
+ sigallowstop_impl(prev);
+}
+
+int cursig(struct thread *td);
+void execsigs(struct proc *p);
+void gsignal(int pgid, int sig, ksiginfo_t *ksi);
+void killproc(struct proc *p, char *why);
+ksiginfo_t * ksiginfo_alloc(int wait);
+void ksiginfo_free(ksiginfo_t *ksi);
+int pksignal(struct proc *p, int sig, ksiginfo_t *ksi);
+void pgsigio(struct sigio **sigiop, int sig, int checkctty);
+void pgsignal(struct pgrp *pgrp, int sig, int checkctty, ksiginfo_t *ksi);
+int postsig(int sig);
+void kern_psignal(struct proc *p, int sig);
+int ptracestop(struct thread *td, int sig);
+void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *retmask);
+struct sigacts *sigacts_alloc(void);
+void sigacts_copy(struct sigacts *dest, struct sigacts *src);
+void sigacts_free(struct sigacts *ps);
+struct sigacts *sigacts_hold(struct sigacts *ps);
+int sigacts_shared(struct sigacts *ps);
+void sigexit(struct thread *td, int sig) __dead2;
+int sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
+int sig_ffs(sigset_t *set);
+void siginit(struct proc *p);
+void signotify(struct thread *td);
+void sigqueue_delete(struct sigqueue *queue, int sig);
+void sigqueue_delete_proc(struct proc *p, int sig);
+void sigqueue_flush(struct sigqueue *queue);
+void sigqueue_init(struct sigqueue *queue, struct proc *p);
+void sigqueue_take(ksiginfo_t *ksi);
+void tdksignal(struct thread *td, int sig, ksiginfo_t *ksi);
+int tdsendsignal(struct proc *p, struct thread *td, int sig,
+ ksiginfo_t *ksi);
+void tdsigcleanup(struct thread *td);
+void tdsignal(struct thread *td, int sig);
+void trapsignal(struct thread *td, ksiginfo_t *ksi);
+
+#endif /* _KERNEL */
+#else /* __rtems__ */
+typedef int ksiginfo_t;
+#endif /* __rtems__ */
+
+#endif /* !_SYS_SIGNALVAR_H_ */
diff --git a/freebsd/sys/sys/sleepqueue.h b/freebsd/sys/sys/sleepqueue.h
index 4c4ea651..d59dc7e5 100644
--- a/freebsd/sys/sys/sleepqueue.h
+++ b/freebsd/sys/sys/sleepqueue.h
@@ -83,8 +83,6 @@ struct thread;
#define SLEEPQ_SX 0x03 /* Used by an sx lock. */
#define SLEEPQ_LK 0x04 /* Used by a lockmgr. */
#define SLEEPQ_INTERRUPTIBLE 0x100 /* Sleep is interruptible. */
-#define SLEEPQ_STOP_ON_BDRY 0x200 /* Stop sleeping thread on
- user mode boundary */
void init_sleepqueues(void);
int sleepq_abort(struct thread *td, int intrval);
@@ -98,7 +96,10 @@ struct sleepqueue *sleepq_lookup(void *wchan);
void sleepq_release(void *wchan);
void sleepq_remove(struct thread *td, void *wchan);
int sleepq_signal(void *wchan, int flags, int pri, int queue);
-void sleepq_set_timeout(void *wchan, int timo);
+void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt,
+ sbintime_t pr, int flags);
+#define sleepq_set_timeout(wchan, timo) \
+ sleepq_set_timeout_sbt((wchan), tick_sbt * (timo), 0, C_HARDCLOCK)
u_int sleepq_sleepcnt(void *wchan, int queue);
int sleepq_timedwait(void *wchan, int pri);
int sleepq_timedwait_sig(void *wchan, int pri);
@@ -106,5 +107,11 @@ int sleepq_type(void *wchan);
void sleepq_wait(void *wchan, int pri);
int sleepq_wait_sig(void *wchan, int pri);
+#ifdef STACK
+struct sbuf;
+int sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
+ int *count_stacks_printed);
+#endif
+
#endif /* _KERNEL */
#endif /* !_SYS_SLEEPQUEUE_H_ */
diff --git a/freebsd/sys/sys/smp.h b/freebsd/sys/sys/smp.h
index 776fc3a1..96a3fe59 100644
--- a/freebsd/sys/sys/smp.h
+++ b/freebsd/sys/sys/smp.h
@@ -17,9 +17,52 @@
#ifndef LOCORE
#include <rtems/bsd/sys/cpuset.h>
+#include <sys/queue.h>
/*
- * Topology of a NUMA or HTT system.
+ * Types of nodes in the topological tree.
+ */
+typedef enum {
+ /* No node has this type; can be used in topo API calls. */
+ TOPO_TYPE_DUMMY,
+ /* Processing unit aka computing unit aka logical CPU. */
+ TOPO_TYPE_PU,
+ /* Physical subdivision of a package. */
+ TOPO_TYPE_CORE,
+ /* CPU L1/L2/L3 cache. */
+ TOPO_TYPE_CACHE,
+ /* Package aka chip, equivalent to socket. */
+ TOPO_TYPE_PKG,
+ /* NUMA node. */
+ TOPO_TYPE_NODE,
+ /* Other logical or physical grouping of PUs. */
+ /* E.g. PUs on the same dye, or PUs sharing an FPU. */
+ TOPO_TYPE_GROUP,
+ /* The whole system. */
+ TOPO_TYPE_SYSTEM
+} topo_node_type;
+
+/* Hardware indenitifier of a topology component. */
+typedef unsigned int hwid_t;
+/* Logical CPU idenitifier. */
+typedef int cpuid_t;
+
+/* A node in the topology. */
+struct topo_node {
+ struct topo_node *parent;
+ TAILQ_HEAD(topo_children, topo_node) children;
+ TAILQ_ENTRY(topo_node) siblings;
+ cpuset_t cpuset;
+ topo_node_type type;
+ uintptr_t subtype;
+ hwid_t hwid;
+ cpuid_t id;
+ int nchildren;
+ int cpu_count;
+};
+
+/*
+ * Scheduling topology of a NUMA or SMP system.
*
* The top level topology is an array of pointers to groups. Each group
* contains a bitmask of cpus in its group or subgroups. It may also
@@ -52,6 +95,8 @@ typedef struct cpu_group *cpu_group_t;
#define CG_SHARE_L2 2
#define CG_SHARE_L3 3
+#define MAX_CACHE_LEVELS CG_SHARE_L3
+
/*
* Behavior modifiers for load balancing and affinity.
*/
@@ -60,10 +105,29 @@ typedef struct cpu_group *cpu_group_t;
#define CG_FLAG_THREAD (CG_FLAG_HTT | CG_FLAG_SMT) /* Any threading. */
/*
- * Convenience routines for building topologies.
+ * Convenience routines for building and traversing topologies.
*/
#ifdef SMP
+void topo_init_node(struct topo_node *node);
+void topo_init_root(struct topo_node *root);
+struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
+ topo_node_type type, uintptr_t subtype);
+struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
+ topo_node_type type, uintptr_t subtype);
+void topo_promote_child(struct topo_node *child);
+struct topo_node * topo_next_node(struct topo_node *top,
+ struct topo_node *node);
+struct topo_node * topo_next_nonchild_node(struct topo_node *top,
+ struct topo_node *node);
+void topo_set_pu_id(struct topo_node *node, cpuid_t id);
+int topo_analyze(struct topo_node *topo_root, int all, int *pkg_count,
+ int *cores_per_pkg, int *thrs_per_core);
+
+#define TOPO_FOREACH(i, root) \
+ for (i = root; i != NULL; i = topo_next_node(root, i))
+
struct cpu_group *smp_topo(void);
+struct cpu_group *smp_topo_alloc(u_int count);
struct cpu_group *smp_topo_none(void);
struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
@@ -71,10 +135,10 @@ struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
extern void (*cpustop_restartfunc)(void);
-extern int smp_active;
extern int smp_cpus;
extern volatile cpuset_t started_cpus;
extern volatile cpuset_t stopped_cpus;
+extern volatile cpuset_t suspended_cpus;
extern cpuset_t hlt_cpus_mask;
extern cpuset_t logical_cpus_mask;
#endif /* SMP */
@@ -86,6 +150,7 @@ extern int mp_ncpus;
extern volatile int smp_started;
extern cpuset_t all_cpus;
+extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */
#else /* __rtems__ */
#define mp_maxid 0U
#define mp_maxcpus 1
@@ -151,7 +216,7 @@ cpu_next(int i)
* cpu_mp_start() will be called so that MP can be enabled. This function
* should do things such as startup secondary processors. It should also
* setup mp_ncpus, all_cpus, and smp_cpus. It should also ensure that
- * smp_active and smp_started are initialized at the appropriate time.
+ * smp_started is initialized at the appropriate time.
* Once cpu_mp_start() returns, machine independent MP startup code will be
* executed and a simple message will be output to the console. Finally,
* cpu_mp_announce() will be called so that machine dependent messages about
@@ -176,11 +241,16 @@ int stop_cpus(cpuset_t);
int stop_cpus_hard(cpuset_t);
#if defined(__amd64__) || defined(__i386__)
int suspend_cpus(cpuset_t);
+int resume_cpus(cpuset_t);
#endif
+
void smp_rendezvous_action(void);
extern struct mtx smp_ipi_mtx;
#endif /* SMP */
+
+int quiesce_all_cpus(const char *, int);
+int quiesce_cpus(cpuset_t, const char *, int);
void smp_no_rendevous_barrier(void *);
void smp_rendezvous(void (*)(void *),
void (*)(void *),
diff --git a/freebsd/sys/sys/sockbuf.h b/freebsd/sys/sys/sockbuf.h
index 76197aea..2c03b3ed 100644
--- a/freebsd/sys/sys/sockbuf.h
+++ b/freebsd/sys/sys/sockbuf.h
@@ -36,6 +36,7 @@
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
+#include <sys/_task.h>
#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
@@ -52,6 +53,8 @@
#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */
#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
+#define SB_STOP 0x1000 /* backpressure indicator */
+#define SB_AIO_RUNNING 0x2000 /* AIO operation running */
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
@@ -76,31 +79,40 @@ struct xsockbuf {
/*
* Variables for socket buffering.
+ *
+ * Locking key to struct sockbuf:
+ * (a) locked by SOCKBUF_LOCK().
*/
struct sockbuf {
struct selinfo sb_sel; /* process selecting read/write */
struct mtx sb_mtx; /* sockbuf lock */
struct sx sb_sx; /* prevent I/O interlacing */
- short sb_state; /* (c/d) socket state on sockbuf */
+ short sb_state; /* (a) socket state on sockbuf */
#define sb_startzero sb_mb
- struct mbuf *sb_mb; /* (c/d) the mbuf chain */
- struct mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */
- struct mbuf *sb_lastrecord; /* (c/d) first mbuf of last
+ struct mbuf *sb_mb; /* (a) the mbuf chain */
+ struct mbuf *sb_mbtail; /* (a) the last mbuf in the chain */
+ struct mbuf *sb_lastrecord; /* (a) first mbuf of last
* record in socket buffer */
- struct mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */
- u_int sb_sndptroff; /* (c/d) byte offset of ptr into chain */
- u_int sb_cc; /* (c/d) actual chars in buffer */
- u_int sb_hiwat; /* (c/d) max actual char count */
- u_int sb_mbcnt; /* (c/d) chars of mbufs used */
- u_int sb_mcnt; /* (c/d) number of mbufs in buffer */
- u_int sb_ccnt; /* (c/d) number of clusters in buffer */
- u_int sb_mbmax; /* (c/d) max chars of mbufs to use */
- u_int sb_ctl; /* (c/d) non-data chars in buffer */
- int sb_lowat; /* (c/d) low water mark */
- int sb_timeo; /* (c/d) timeout for read/write */
- short sb_flags; /* (c/d) flags, see below */
- int (*sb_upcall)(struct socket *, void *, int); /* (c/d) */
- void *sb_upcallarg; /* (c/d) */
+ struct mbuf *sb_sndptr; /* (a) pointer into mbuf chain */
+ struct mbuf *sb_fnrdy; /* (a) pointer to first not ready buffer */
+ u_int sb_sndptroff; /* (a) byte offset of ptr into chain */
+ u_int sb_acc; /* (a) available chars in buffer */
+ u_int sb_ccc; /* (a) claimed chars in buffer */
+ u_int sb_hiwat; /* (a) max actual char count */
+ u_int sb_mbcnt; /* (a) chars of mbufs used */
+ u_int sb_mcnt; /* (a) number of mbufs in buffer */
+ u_int sb_ccnt; /* (a) number of clusters in buffer */
+ u_int sb_mbmax; /* (a) max chars of mbufs to use */
+ u_int sb_ctl; /* (a) non-data chars in buffer */
+ int sb_lowat; /* (a) low water mark */
+ sbintime_t sb_timeo; /* (a) timeout for read/write */
+ short sb_flags; /* (a) flags, see below */
+ int (*sb_upcall)(struct socket *, void *, int); /* (a) */
+ void *sb_upcallarg; /* (a) */
+#ifndef __rtems__
+ TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */
+ struct task sb_aiotask; /* AIO task */
+#endif /* __rtems__ */
};
#ifdef _KERNEL
@@ -119,10 +131,17 @@ struct sockbuf {
#define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
#define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
-void sbappend(struct sockbuf *sb, struct mbuf *m);
-void sbappend_locked(struct sockbuf *sb, struct mbuf *m);
-void sbappendstream(struct sockbuf *sb, struct mbuf *m);
-void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
+/*
+ * Socket buffer private mbuf(9) flags.
+ */
+#define M_NOTREADY M_PROTO1 /* m_data not populated yet */
+#define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */
+#define M_NOTAVAIL (M_NOTREADY | M_BLOCKED)
+
+void sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
+void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
+void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
+void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
@@ -135,13 +154,14 @@ int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
-void sbcheck(struct sockbuf *sb);
void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol(caddr_t p, int size, int type, int level);
void sbdestroy(struct sockbuf *sb, struct socket *so);
void sbdrop(struct sockbuf *sb, int len);
void sbdrop_locked(struct sockbuf *sb, int len);
+struct mbuf *
+ sbcut_locked(struct sockbuf *sb, int len);
void sbdroprecord(struct sockbuf *sb);
void sbdroprecord_locked(struct sockbuf *sb);
void sbflush(struct sockbuf *sb);
@@ -161,47 +181,59 @@ void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
int sbwait(struct sockbuf *sb);
int sblock(struct sockbuf *sb, int flags);
void sbunlock(struct sockbuf *sb);
+void sballoc(struct sockbuf *, struct mbuf *);
+void sbfree(struct sockbuf *, struct mbuf *);
+int sbready(struct sockbuf *, struct mbuf *, int);
+
+/*
+ * Return how much data is available to be taken out of socket
+ * buffer right now.
+ */
+static inline u_int
+sbavail(struct sockbuf *sb)
+{
+
+#if 0
+ SOCKBUF_LOCK_ASSERT(sb);
+#endif
+ return (sb->sb_acc);
+}
+
+/*
+ * Return how much data sits there in the socket buffer
+ * It might be that some data is not yet ready to be read.
+ */
+static inline u_int
+sbused(struct sockbuf *sb)
+{
+
+#if 0
+ SOCKBUF_LOCK_ASSERT(sb);
+#endif
+ return (sb->sb_ccc);
+}
/*
* How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
* This is problematical if the fields are unsigned, as the space might
- * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
- * overflow and return 0. Should use "lmin" but it doesn't exist now.
+ * still be negative (ccc > hiwat or mbcnt > mbmax).
*/
-#define sbspace(sb) \
- ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \
- (int)((sb)->sb_mbmax - (sb)->sb_mbcnt)))
-
-/* adjust counters in sb reflecting allocation of m */
-#define sballoc(sb, m) { \
- (sb)->sb_cc += (m)->m_len; \
- if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
- (sb)->sb_ctl += (m)->m_len; \
- (sb)->sb_mbcnt += MSIZE; \
- (sb)->sb_mcnt += 1; \
- if ((m)->m_flags & M_EXT) { \
- (sb)->sb_mbcnt += (m)->m_ext.ext_size; \
- (sb)->sb_ccnt += 1; \
- } \
-}
+static inline long
+sbspace(struct sockbuf *sb)
+{
+ int bleft, mleft; /* size should match sockbuf fields */
+
+#if 0
+ SOCKBUF_LOCK_ASSERT(sb);
+#endif
-/* adjust counters in sb reflecting freeing of m */
-#define sbfree(sb, m) { \
- (sb)->sb_cc -= (m)->m_len; \
- if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
- (sb)->sb_ctl -= (m)->m_len; \
- (sb)->sb_mbcnt -= MSIZE; \
- (sb)->sb_mcnt -= 1; \
- if ((m)->m_flags & M_EXT) { \
- (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
- (sb)->sb_ccnt -= 1; \
- } \
- if ((sb)->sb_sndptr == (m)) { \
- (sb)->sb_sndptr = NULL; \
- (sb)->sb_sndptroff = 0; \
- } \
- if ((sb)->sb_sndptroff != 0) \
- (sb)->sb_sndptroff -= (m)->m_len; \
+ if (sb->sb_flags & SB_STOP)
+ return(0);
+
+ bleft = sb->sb_hiwat - sb->sb_ccc;
+ mleft = sb->sb_mbmax - sb->sb_mbcnt;
+
+ return ((bleft < mleft) ? bleft : mleft);
}
#define SB_EMPTY_FIXUP(sb) do { \
@@ -213,13 +245,15 @@ void sbunlock(struct sockbuf *sb);
#ifdef SOCKBUF_DEBUG
void sblastrecordchk(struct sockbuf *, const char *, int);
-#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__)
-
void sblastmbufchk(struct sockbuf *, const char *, int);
+void sbcheck(struct sockbuf *, const char *, int);
+#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__)
#define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__)
+#define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__)
#else
-#define SBLASTRECORDCHK(sb) /* nothing */
-#define SBLASTMBUFCHK(sb) /* nothing */
+#define SBLASTRECORDCHK(sb) do {} while (0)
+#define SBLASTMBUFCHK(sb) do {} while (0)
+#define SBCHECK(sb) do {} while (0)
#endif /* SOCKBUF_DEBUG */
#endif /* _KERNEL */
diff --git a/freebsd/sys/sys/socket.h b/freebsd/sys/sys/socket.h
index fb2a2788..f23b5f69 100644
--- a/freebsd/sys/sys/socket.h
+++ b/freebsd/sys/sys/socket.h
@@ -84,6 +84,16 @@ typedef __uid_t uid_t;
#endif
#endif
+#ifndef _UINT32_T_DECLARED
+typedef __uint32_t uint32_t;
+#define _UINT32_T_DECLARED
+#endif
+
+#ifndef _UINTPTR_T_DECLARED
+typedef __uintptr_t uintptr_t;
+#define _UINTPTR_T_DECLARED
+#endif
+
/*
* Types
*/
@@ -95,6 +105,14 @@ typedef __uid_t uid_t;
#endif
#define SOCK_SEQPACKET 5 /* sequenced packet stream */
+#if __BSD_VISIBLE
+/*
+ * Creation flags, OR'ed into socket() and socketpair() type argument.
+ */
+#define SOCK_CLOEXEC 0x10000000
+#define SOCK_NONBLOCK 0x20000000
+#endif
+
/*
* Option flags per-socket.
*/
@@ -346,6 +364,7 @@ struct sockproto {
#define PF_SCLUSTER AF_SCLUSTER
#define PF_ARP AF_ARP
#define PF_BLUETOOTH AF_BLUETOOTH
+#define PF_IEEE80211 AF_IEEE80211
#define PF_INET_SDP AF_INET_SDP
#define PF_INET6_SDP AF_INET6_SDP
@@ -357,47 +376,8 @@ struct sockproto {
* Second level is protocol family.
* Third level is protocol number.
*
- * Further levels are defined by the individual families below.
+ * Further levels are defined by the individual families.
*/
-#define NET_MAXID AF_MAX
-
-#define CTL_NET_NAMES { \
- { 0, 0 }, \
- { "unix", CTLTYPE_NODE }, \
- { "inet", CTLTYPE_NODE }, \
- { "implink", CTLTYPE_NODE }, \
- { "pup", CTLTYPE_NODE }, \
- { "chaos", CTLTYPE_NODE }, \
- { "xerox_ns", CTLTYPE_NODE }, \
- { "iso", CTLTYPE_NODE }, \
- { "emca", CTLTYPE_NODE }, \
- { "datakit", CTLTYPE_NODE }, \
- { "ccitt", CTLTYPE_NODE }, \
- { "ibm_sna", CTLTYPE_NODE }, \
- { "decnet", CTLTYPE_NODE }, \
- { "dec_dli", CTLTYPE_NODE }, \
- { "lat", CTLTYPE_NODE }, \
- { "hylink", CTLTYPE_NODE }, \
- { "appletalk", CTLTYPE_NODE }, \
- { "route", CTLTYPE_NODE }, \
- { "link_layer", CTLTYPE_NODE }, \
- { "xtp", CTLTYPE_NODE }, \
- { "coip", CTLTYPE_NODE }, \
- { "cnt", CTLTYPE_NODE }, \
- { "rtip", CTLTYPE_NODE }, \
- { "ipx", CTLTYPE_NODE }, \
- { "sip", CTLTYPE_NODE }, \
- { "pip", CTLTYPE_NODE }, \
- { "isdn", CTLTYPE_NODE }, \
- { "key", CTLTYPE_NODE }, \
- { "inet6", CTLTYPE_NODE }, \
- { "natm", CTLTYPE_NODE }, \
- { "atm", CTLTYPE_NODE }, \
- { "hdrcomplete", CTLTYPE_NODE }, \
- { "netgraph", CTLTYPE_NODE }, \
- { "snp", CTLTYPE_NODE }, \
- { "scp", CTLTYPE_NODE }, \
-}
/*
* PF_ROUTE - Routing table
@@ -413,16 +393,6 @@ struct sockproto {
#define NET_RT_IFMALIST 4 /* return multicast address list */
#define NET_RT_IFLISTL 5 /* Survey interface list, using 'l'en
* versions of msghdr structs. */
-#define NET_RT_MAXID 6
-
-#define CTL_NET_RT_NAMES { \
- { 0, 0 }, \
- { "dump", CTLTYPE_STRUCT }, \
- { "flags", CTLTYPE_STRUCT }, \
- { "iflist", CTLTYPE_STRUCT }, \
- { "ifmalist", CTLTYPE_STRUCT }, \
- { "iflistl", CTLTYPE_STRUCT }, \
-}
#endif /* __BSD_VISIBLE */
/*
@@ -451,19 +421,21 @@ struct msghdr {
#define MSG_TRUNC 0x10 /* data discarded before delivery */
#define MSG_CTRUNC 0x20 /* control data lost before delivery */
#define MSG_WAITALL 0x40 /* wait for full request or error */
-#define MSG_NOTIFICATION 0x2000 /* SCTP notification */
+#if __POSIX_VISIBLE >= 200809
+#define MSG_NOSIGNAL 0x20000 /* do not generate SIGPIPE on EOF */
+#endif
#if __BSD_VISIBLE
#define MSG_DONTWAIT 0x80 /* this message should be nonblocking */
#define MSG_EOF 0x100 /* data completes connection */
+#define MSG_NOTIFICATION 0x2000 /* SCTP notification */
#define MSG_NBIO 0x4000 /* FIONBIO mode, used by fifofs */
#define MSG_COMPAT 0x8000 /* used in sendit() */
+#define MSG_CMSG_CLOEXEC 0x40000 /* make received fds close-on-exec */
+#define MSG_WAITFORONE 0x80000 /* for recvmmsg() */
#endif
#ifdef _KERNEL
#define MSG_SOCALLBCK 0x10000 /* for use by socket callbacks - soreceive (TCP) */
#endif
-#if __BSD_VISIBLE
-#define MSG_NOSIGNAL 0x20000 /* do not generate SIGPIPE on EOF */
-#endif
/*
* Header for ancillary data objects in msg_control buffer.
@@ -528,7 +500,7 @@ struct sockcred {
/* given pointer to struct cmsghdr, return pointer to next cmsghdr */
#define CMSG_NXTHDR(mhdr, cmsg) \
- ((char *)(cmsg) == NULL ? CMSG_FIRSTHDR(mhdr) : \
+ ((char *)(cmsg) == (char *)0 ? CMSG_FIRSTHDR(mhdr) : \
((char *)(cmsg) + _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len) + \
_ALIGN(sizeof(struct cmsghdr)) > \
(char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \
@@ -543,7 +515,7 @@ struct sockcred {
#define CMSG_FIRSTHDR(mhdr) \
((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \
(struct cmsghdr *)(mhdr)->msg_control : \
- (struct cmsghdr *)NULL)
+ (struct cmsghdr *)0)
#if __BSD_VISIBLE
/* RFC 2292 additions */
@@ -592,10 +564,13 @@ struct omsghdr {
#define SHUT_WR 1 /* shut down the writing side */
#define SHUT_RDWR 2 /* shut down both sides */
+#if __BSD_VISIBLE
+/* for SCTP */
/* we cheat and use the SHUT_XX defines for these */
#define PRU_FLUSH_RD SHUT_RD
#define PRU_FLUSH_WR SHUT_WR
#define PRU_FLUSH_RDWR SHUT_RDWR
+#endif
#if __BSD_VISIBLE
@@ -613,9 +588,23 @@ struct sf_hdtr {
* Sendfile-specific flag(s)
*/
#define SF_NODISKIO 0x00000001
-#define SF_MNOWAIT 0x00000002
+#define SF_MNOWAIT 0x00000002 /* obsolete */
#define SF_SYNC 0x00000004
-#endif
+#define SF_NOCACHE 0x00000010
+#define SF_FLAGS(rh, flags) (((rh) << 16) | (flags))
+
+#ifdef _KERNEL
+#define SF_READAHEAD(flags) ((flags) >> 16)
+#endif /* _KERNEL */
+
+/*
+ * Sendmmsg/recvmmsg specific structure(s)
+ */
+struct mmsghdr {
+ struct msghdr msg_hdr; /* message header */
+ ssize_t msg_len; /* message length */
+};
+#endif /* __BSD_VISIBLE */
#ifndef _KERNEL
@@ -625,6 +614,11 @@ __BEGIN_DECLS
int accept(int, struct sockaddr * __restrict, socklen_t * __restrict);
int bind(int, const struct sockaddr *, socklen_t);
int connect(int, const struct sockaddr *, socklen_t);
+#if __BSD_VISIBLE
+int accept4(int, struct sockaddr * __restrict, socklen_t * __restrict, int);
+int bindat(int, int, const struct sockaddr *, socklen_t);
+int connectat(int, int, const struct sockaddr *, socklen_t);
+#endif
int getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict);
int getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict);
int getsockopt(int, int, int, void * __restrict, socklen_t * __restrict);
@@ -632,12 +626,18 @@ int listen(int, int);
ssize_t recv(int, void *, size_t, int);
ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
ssize_t recvmsg(int, struct msghdr *, int);
+#if __BSD_VISIBLE
+struct timespec;
+ssize_t recvmmsg(int, struct mmsghdr * __restrict, size_t, int,
+ const struct timespec * __restrict);
+#endif
ssize_t send(int, const void *, size_t, int);
ssize_t sendto(int, const void *,
size_t, int, const struct sockaddr *, socklen_t);
ssize_t sendmsg(int, const struct msghdr *, int);
#if __BSD_VISIBLE
int sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
+ssize_t sendmmsg(int, struct mmsghdr * __restrict, size_t, int);
int setfib(int);
#endif
int setsockopt(int, int, int, const void *, socklen_t);
diff --git a/freebsd/sys/sys/socketvar.h b/freebsd/sys/sys/socketvar.h
index 94c3b24e..72eaa998 100644
--- a/freebsd/sys/sys/socketvar.h
+++ b/freebsd/sys/sys/socketvar.h
@@ -38,10 +38,12 @@
#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/osd.h>
#include <sys/_sx.h>
#include <sys/sockbuf.h>
#include <sys/sockstate.h>
#ifdef _KERNEL
+#include <sys/caprights.h>
#include <sys/sockopt.h>
#endif
@@ -62,7 +64,6 @@ struct socket;
* (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so).
* (c) locked by SOCKBUF_LOCK(&so->so_rcv).
- * (d) locked by SOCKBUF_LOCK(&so->so_snd).
* (e) locked by ACCEPT_LOCK().
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
@@ -76,7 +77,7 @@ struct socket {
short so_state; /* (b) internal state flags SS_* */
int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
- struct vnet *so_vnet; /* network stack instance */
+ struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
/*
* Variables for connection queuing.
@@ -93,16 +94,15 @@ struct socket {
TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
- u_short so_qlen; /* (e) number of unaccepted connections */
- u_short so_incqlen; /* (e) number of unaccepted incomplete
+ u_int so_qlen; /* (e) number of unaccepted connections */
+ u_int so_incqlen; /* (e) number of unaccepted incomplete
connections */
- u_short so_qlimit; /* (e) max number queued connections */
+ u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */
u_long so_oobmark; /* (c) chars to oob mark */
- TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */
struct sockbuf so_rcv, so_snd;
@@ -117,6 +117,7 @@ struct socket {
void *so_accept_filter_arg; /* saved filter args */
char *so_accept_filter_str; /* saved user args */
} *so_accf;
+ struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
* some user-specified metadata to a socket, which then can be
@@ -125,6 +126,9 @@ struct socket {
*/
int so_fibnum; /* routing domain for this socket */
uint32_t so_user_cookie;
+
+ void *so_pspare[2]; /* packet pacing / general use */
+ int so_ispare[2]; /* packet pacing / general use */
};
/*
@@ -169,9 +173,9 @@ struct xsocket {
caddr_t so_pcb; /* another convenient handle */
int xso_protocol;
int xso_family;
- u_short so_qlen;
- u_short so_incqlen;
- u_short so_qlimit;
+ u_int so_qlen;
+ u_int so_incqlen;
+ u_int so_qlimit;
short so_timeo;
u_short so_error;
pid_t so_pgid;
@@ -205,7 +209,7 @@ struct xsocket {
/* can we read something from so? */
#define soreadabledata(so) \
- ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
+ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
#define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
@@ -292,11 +296,32 @@ MALLOC_DECLARE(M_PCB);
MALLOC_DECLARE(M_SONAME);
#endif
+/*
+ * Socket specific helper hook point identifiers
+ * Do not leave holes in the sequence, hook registration is a loop.
+ */
+#define HHOOK_SOCKET_OPT 0
+#define HHOOK_SOCKET_CREATE 1
+#define HHOOK_SOCKET_RCV 2
+#define HHOOK_SOCKET_SND 3
+#define HHOOK_FILT_SOREAD 4
+#define HHOOK_FILT_SOWRITE 5
+#define HHOOK_SOCKET_CLOSE 6
+#define HHOOK_SOCKET_LAST HHOOK_SOCKET_CLOSE
+
+struct socket_hhook_data {
+ struct socket *so;
+ struct mbuf *m;
+ void *hctx; /* hook point specific data*/
+ int status;
+};
+
extern int maxsockets;
extern u_long sb_max;
-extern struct uma_zone *socket_zone;
extern so_gen_t so_gencnt;
+struct file;
+struct filedesc;
struct mbuf;
struct sockaddr;
struct ucred;
@@ -313,16 +338,23 @@ struct uio;
/*
* From uipc_socket and friends
*/
-int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type);
int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
+int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct file **fpp, u_int *fflagp);
void soabort(struct socket *so);
int soaccept(struct socket *so, struct sockaddr **nam);
+void soaio_enqueue(struct task *task);
+void soaio_rcv(void *context, int pending);
+void soaio_snd(void *context, int pending);
int socheckuid(struct socket *so, uid_t uid);
int sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
+int sobindat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td);
int soclose(struct socket *so);
int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
+int soconnectat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td);
int soconnect2(struct socket *so1, struct socket *so2);
-int socow_setup(struct mbuf *m0, struct uio *uio);
int socreate(int dom, struct socket **aso, int type, int proto,
struct ucred *cred, struct thread *td);
int sodisconnect(struct socket *so);
@@ -368,6 +400,11 @@ void soupcall_clear(struct socket *so, int which);
void soupcall_set(struct socket *so, int which,
int (*func)(struct socket *, void *, int), void *arg);
void sowakeup(struct socket *so, struct sockbuf *sb);
+#ifndef __rtems__
+void sowakeup_aio(struct socket *so, struct sockbuf *sb);
+#else /* __rtems__ */
+#define sowakeup_aio(so, sb) (void)0
+#endif /* __rtems__ */
int selsocket(struct socket *so, int events, struct timeval *tv,
struct thread *td);
diff --git a/freebsd/sys/sys/stdint.h b/freebsd/sys/sys/stdint.h
index 4c3bbd8e..2b253137 100644
--- a/freebsd/sys/sys/stdint.h
+++ b/freebsd/sys/sys/stdint.h
@@ -58,21 +58,16 @@ typedef __uint_fast16_t uint_fast16_t;
typedef __uint_fast32_t uint_fast32_t;
typedef __uint_fast64_t uint_fast64_t;
-#ifndef _INTMAX_T_DECLARED
-typedef __intmax_t intmax_t;
-#define _INTMAX_T_DECLARED
-#endif
-#ifndef _UINTMAX_T_DECLARED
-typedef __uintmax_t uintmax_t;
-#define _UINTMAX_T_DECLARED
-#endif
-
/* GNU and Darwin define this and people seem to think it's portable */
#if defined(UINTPTR_MAX) && defined(UINT64_MAX) && (UINTPTR_MAX == UINT64_MAX)
#define __WORDSIZE 64
#else
#define __WORDSIZE 32
#endif
+
+/* Limits of wchar_t. */
+#define WCHAR_MIN __WCHAR_MIN
+#define WCHAR_MAX __WCHAR_MAX
#endif /* __rtems__ */
#endif /* !_SYS_STDINT_H_ */
diff --git a/freebsd/sys/sys/sx.h b/freebsd/sys/sys/sx.h
index 53beea07..c285fa77 100644
--- a/freebsd/sys/sys/sx.h
+++ b/freebsd/sys/sys/sx.h
@@ -45,7 +45,7 @@
#ifdef __rtems__
#define SX_NOINLINE 1
#define _sx_slock _bsd__sx_xlock
-#define _sx_try_slock _bsd__sx_try_xlock
+#define sx_try_slock_ _bsd_sx_try_xlock_
#define _sx_sunlock _bsd__sx_xunlock
#endif /* __rtems__ */
/*
@@ -92,6 +92,8 @@
#ifdef _KERNEL
+#define sx_recurse lock_object.lo_data
+
/*
* Function prototipes. Routines that start with an underscore are not part
* of the public interface and are wrappered with a macro.
@@ -100,14 +102,14 @@ void sx_sysinit(void *arg);
#define sx_init(sx, desc) sx_init_flags((sx), (desc), 0)
void sx_init_flags(struct sx *sx, const char *description, int opts);
void sx_destroy(struct sx *sx);
+int sx_try_slock_(struct sx *sx, const char *file, int line);
+int sx_try_xlock_(struct sx *sx, const char *file, int line);
+int sx_try_upgrade_(struct sx *sx, const char *file, int line);
+void sx_downgrade_(struct sx *sx, const char *file, int line);
int _sx_slock(struct sx *sx, int opts, const char *file, int line);
int _sx_xlock(struct sx *sx, int opts, const char *file, int line);
-int _sx_try_slock(struct sx *sx, const char *file, int line);
-int _sx_try_xlock(struct sx *sx, const char *file, int line);
void _sx_sunlock(struct sx *sx, const char *file, int line);
void _sx_xunlock(struct sx *sx, const char *file, int line);
-int _sx_try_upgrade(struct sx *sx, const char *file, int line);
-void _sx_downgrade(struct sx *sx, const char *file, int line);
int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
const char *file, int line);
int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line);
@@ -115,21 +117,12 @@ void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
line);
void _sx_sunlock_hard(struct sx *sx, const char *file, int line);
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
-void _sx_assert(struct sx *sx, int what, const char *file, int line);
+void _sx_assert(const struct sx *sx, int what, const char *file, int line);
#endif
#ifdef DDB
int sx_chain(struct thread *td, struct thread **ownerp);
#endif
-#define sx_downgrade_(sx, file, line) \
- _sx_downgrade((sx), (file), (line))
-#define sx_try_slock_(sx, file, line) \
- _sx_try_slock((sx), (file), (line))
-#define sx_try_xlock_(sx, file, line) \
- _sx_try_xlock((sx), (file), (line))
-#define sx_try_upgrade_(sx, file, line) \
- _sx_try_upgrade((sx), (file), (line))
-
struct sx_args {
struct sx *sa_sx;
const char *sa_desc;
@@ -164,11 +157,12 @@ __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
uintptr_t tid = (uintptr_t)td;
int error = 0;
- if (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
+ if (sx->sx_lock != SX_LOCK_UNLOCKED ||
+ !atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
error = _sx_xlock_hard(sx, tid, opts, file, line);
else
- LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE,
- sx, 0, 0, file, line);
+ LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
+ 0, 0, file, line, LOCKSTAT_WRITER);
return (error);
}
@@ -179,7 +173,11 @@ __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
{
uintptr_t tid = (uintptr_t)td;
- if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
+ if (sx->sx_recurse == 0)
+ LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx,
+ LOCKSTAT_WRITER);
+ if (sx->sx_lock != tid ||
+ !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
_sx_xunlock_hard(sx, tid, file, line);
}
@@ -194,8 +192,8 @@ __sx_slock(struct sx *sx, int opts, const char *file, int line)
!atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
error = _sx_slock_hard(sx, opts, file, line);
else
- LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx, 0,
- 0, file, line);
+ LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
+ 0, 0, file, line, LOCKSTAT_READER);
return (error);
}
@@ -212,6 +210,7 @@ __sx_sunlock(struct sx *sx, const char *file, int line)
{
uintptr_t x = sx->sx_lock;
+ LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) ||
!atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER))
_sx_sunlock_hard(sx, file, line);
@@ -296,7 +295,8 @@ int sx_xlocked(struct sx *sx);
#define sx_unlock(sx) sx_unlock_((sx), LOCK_FILE, LOCK_LINE)
#define sx_sleep(chan, sx, pri, wmesg, timo) \
- _sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo))
+ _sleep((chan), &(sx)->lock_object, (pri), (wmesg), \
+ tick_sbt * (timo), 0, C_HARDCLOCK)
/*
* Options passed to sx_init_flags().
@@ -307,6 +307,7 @@ int sx_xlocked(struct sx *sx);
#define SX_QUIET 0x08
#define SX_NOADAPTIVE 0x10
#define SX_RECURSE 0x20
+#define SX_NEW 0x40
/*
* Options passed to sx_*lock_hard().
@@ -321,7 +322,7 @@ int sx_xlocked(struct sx *sx);
#define SA_RECURSED LA_RECURSED
#define SA_NOTRECURSED LA_NOTRECURSED
-/* Backwards compatability. */
+/* Backwards compatibility. */
#define SX_LOCKED LA_LOCKED
#define SX_SLOCKED LA_SLOCKED
#define SX_XLOCKED LA_XLOCKED
diff --git a/freebsd/sys/sys/sysctl.h b/freebsd/sys/sys/sysctl.h
index cfbbc7f3..687fb23b 100644
--- a/freebsd/sys/sys/sysctl.h
+++ b/freebsd/sys/sys/sysctl.h
@@ -48,7 +48,7 @@ struct thread;
* respective subsystem header files.
*/
-#define CTL_MAXNAME 24 /* largest number of components supported */
+#define CTL_MAXNAME 24 /* largest number of components supported */
/*
* Each subsystem defined by sysctl defines a list of variables
@@ -59,10 +59,10 @@ struct thread;
*/
struct ctlname {
char *ctl_name; /* subsystem name */
- int ctl_type; /* type of name */
+ int ctl_type; /* type of name */
};
-#define CTLTYPE 0xf /* Mask for the type */
+#define CTLTYPE 0xf /* mask for the type */
#define CTLTYPE_NODE 1 /* name is a node */
#define CTLTYPE_INT 2 /* name describes an integer */
#define CTLTYPE_STRING 3 /* name describes a string */
@@ -73,35 +73,43 @@ struct ctlname {
#define CTLTYPE_LONG 7 /* name describes a long */
#define CTLTYPE_ULONG 8 /* name describes an unsigned long */
#define CTLTYPE_U64 9 /* name describes an unsigned 64-bit number */
-
-#define CTLFLAG_RD 0x80000000 /* Allow reads of variable */
-#define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */
-#define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR)
-#define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */
-#define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */
-#define CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */
-#define CTLFLAG_DYN 0x02000000 /* Dynamic oid - can be freed */
-#define CTLFLAG_SKIP 0x01000000 /* Skip this sysctl when listing */
-#define CTLMASK_SECURE 0x00F00000 /* Secure level */
-#define CTLFLAG_TUN 0x00080000 /* Tunable variable */
+#define CTLTYPE_U8 0xa /* name describes an unsigned 8-bit number */
+#define CTLTYPE_U16 0xb /* name describes an unsigned 16-bit number */
+#define CTLTYPE_S8 0xc /* name describes a signed 8-bit number */
+#define CTLTYPE_S16 0xd /* name describes a signed 16-bit number */
+#define CTLTYPE_S32 0xe /* name describes a signed 32-bit number */
+#define CTLTYPE_U32 0xf /* name describes an unsigned 32-bit number */
+
+#define CTLFLAG_RD 0x80000000 /* Allow reads of variable */
+#define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */
+#define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR)
+#define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */
+#define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */
+#define CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */
+#define CTLFLAG_DYN 0x02000000 /* Dynamic oid - can be freed */
+#define CTLFLAG_SKIP 0x01000000 /* Skip this sysctl when listing */
+#define CTLMASK_SECURE 0x00F00000 /* Secure level */
+#define CTLFLAG_TUN 0x00080000 /* Default value is loaded from getenv() */
#define CTLFLAG_RDTUN (CTLFLAG_RD|CTLFLAG_TUN)
#define CTLFLAG_RWTUN (CTLFLAG_RW|CTLFLAG_TUN)
-#define CTLFLAG_MPSAFE 0x00040000 /* Handler is MP safe */
-#define CTLFLAG_VNET 0x00020000 /* Prisons with vnet can fiddle */
-#define CTLFLAG_DYING 0x00010000 /* oid is being removed */
-#define CTLFLAG_CAPRD 0x00008000 /* Can be read in capability mode */
-#define CTLFLAG_CAPWR 0x00004000 /* Can be written in capability mode */
-#define CTLFLAG_CAPRW (CTLFLAG_CAPRD|CTLFLAG_CAPWR)
+#define CTLFLAG_MPSAFE 0x00040000 /* Handler is MP safe */
+#define CTLFLAG_VNET 0x00020000 /* Prisons with vnet can fiddle */
+#define CTLFLAG_DYING 0x00010000 /* Oid is being removed */
+#define CTLFLAG_CAPRD 0x00008000 /* Can be read in capability mode */
+#define CTLFLAG_CAPWR 0x00004000 /* Can be written in capability mode */
+#define CTLFLAG_STATS 0x00002000 /* Statistics, not a tuneable */
+#define CTLFLAG_NOFETCH 0x00001000 /* Don't fetch tunable from getenv() */
+#define CTLFLAG_CAPRW (CTLFLAG_CAPRD|CTLFLAG_CAPWR)
/*
- * Secure level. Note that CTLFLAG_SECURE == CTLFLAG_SECURE1.
+ * Secure level. Note that CTLFLAG_SECURE == CTLFLAG_SECURE1.
*
* Secure when the securelevel is raised to at least N.
*/
-#define CTLSHIFT_SECURE 20
-#define CTLFLAG_SECURE1 (CTLFLAG_SECURE | (0 << CTLSHIFT_SECURE))
-#define CTLFLAG_SECURE2 (CTLFLAG_SECURE | (1 << CTLSHIFT_SECURE))
-#define CTLFLAG_SECURE3 (CTLFLAG_SECURE | (2 << CTLSHIFT_SECURE))
+#define CTLSHIFT_SECURE 20
+#define CTLFLAG_SECURE1 (CTLFLAG_SECURE | (0 << CTLSHIFT_SECURE))
+#define CTLFLAG_SECURE2 (CTLFLAG_SECURE | (1 << CTLSHIFT_SECURE))
+#define CTLFLAG_SECURE3 (CTLFLAG_SECURE | (2 << CTLSHIFT_SECURE))
/*
* USE THIS instead of a hardwired number from the categories below
@@ -109,14 +117,14 @@ struct ctlname {
* technology. This is the way nearly all new sysctl variables should
* be implemented.
* e.g. SYSCTL_INT(_parent, OID_AUTO, name, CTLFLAG_RW, &variable, 0, "");
- */
-#define OID_AUTO (-1)
+ */
+#define OID_AUTO (-1)
/*
* The starting number for dynamically-assigned entries. WARNING!
* ALL static sysctl entries should have numbers LESS than this!
*/
-#define CTL_AUTO_START 0x100
+#define CTL_AUTO_START 0x100
#ifdef _KERNEL
#include <sys/linker_set.h>
@@ -129,14 +137,15 @@ struct ctlname {
#endif
#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \
- intptr_t arg2, struct sysctl_req *req
+ intmax_t arg2, struct sysctl_req *req
/* definitions for sysctl_req 'lock' member */
#define REQ_UNWIRED 1
#define REQ_WIRED 2
/* definitions for sysctl_req 'flags' member */
-#if defined(__amd64__) || defined(__ia64__) || defined(__powerpc64__)
+#if defined(__amd64__) || defined(__powerpc64__) ||\
+ (defined(__mips__) && defined(__mips_n64))
#define SCTL_MASK32 1 /* 32 bit emulation */
#endif
@@ -146,21 +155,21 @@ struct ctlname {
*/
struct sysctl_req {
struct thread *td; /* used for access checking */
- int lock; /* wiring state */
+ int lock; /* wiring state */
void *oldptr;
- size_t oldlen;
- size_t oldidx;
+ size_t oldlen;
+ size_t oldidx;
int (*oldfunc)(struct sysctl_req *, const void *, size_t);
#ifndef __rtems__
void *newptr;
#else /* __rtems__ */
const void *newptr;
#endif /* __rtems__ */
- size_t newlen;
- size_t newidx;
+ size_t newlen;
+ size_t newidx;
int (*newfunc)(struct sysctl_req *, void *, size_t);
- size_t validlen;
- int flags;
+ size_t validlen;
+ int flags;
};
SLIST_HEAD(sysctl_oid_list, sysctl_oid);
@@ -170,29 +179,40 @@ SLIST_HEAD(sysctl_oid_list, sysctl_oid);
* be hidden behind it, expanded by the handler.
*/
struct sysctl_oid {
+ struct sysctl_oid_list oid_children;
struct sysctl_oid_list *oid_parent;
SLIST_ENTRY(sysctl_oid) oid_link;
- int oid_number;
- u_int oid_kind;
+ int oid_number;
+ u_int oid_kind;
void *oid_arg1;
- intptr_t oid_arg2;
+ intmax_t oid_arg2;
const char *oid_name;
- int (*oid_handler)(SYSCTL_HANDLER_ARGS);
+ int (*oid_handler)(SYSCTL_HANDLER_ARGS);
const char *oid_fmt;
- int oid_refcnt;
- u_int oid_running;
+ int oid_refcnt;
+ u_int oid_running;
const char *oid_descr;
};
-#define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l)
-#define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l)
+#define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l)
+#define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l)
+#define SYSCTL_OUT_STR(r, p) (r->oldfunc)(r, p, strlen(p) + 1)
+int sysctl_handle_bool(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_8(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_16(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_32(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_64(SYSCTL_HANDLER_ARGS);
int sysctl_handle_int(SYSCTL_HANDLER_ARGS);
int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS);
int sysctl_handle_long(SYSCTL_HANDLER_ARGS);
-int sysctl_handle_64(SYSCTL_HANDLER_ARGS);
int sysctl_handle_string(SYSCTL_HANDLER_ARGS);
int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_counter_u64(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_counter_u64_array(SYSCTL_HANDLER_ARGS);
+
+int sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS);
+int sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS);
int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS);
int sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS);
@@ -206,27 +226,26 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp);
/* Declare a static oid to allow child oids to be added to it. */
#ifndef __rtems__
-#define SYSCTL_DECL(name) \
- extern struct sysctl_oid_list sysctl_##name##_children
+#define SYSCTL_DECL(name) \
+ extern struct sysctl_oid sysctl__##name
#else /* __rtems__ */
-#define SYSCTL_DECL(name) \
- extern struct sysctl_oid_list _bsd_sysctl_##name##_children
+#define SYSCTL_DECL(name) \
+ extern struct sysctl_oid _bsd_sysctl__##name
#endif /* __rtems__ */
-/* Hide these in macros */
-#define SYSCTL_CHILDREN(oid_ptr) (struct sysctl_oid_list *) \
- (oid_ptr)->oid_arg1
-#define SYSCTL_CHILDREN_SET(oid_ptr, val) \
- (oid_ptr)->oid_arg1 = (val);
+/* Hide these in macros. */
+#define SYSCTL_CHILDREN(oid_ptr) (&(oid_ptr)->oid_children)
+#define SYSCTL_PARENT(oid_ptr) \
+ (((oid_ptr)->oid_parent != &sysctl__children) ? \
+ __containerof((oid_ptr)->oid_parent, struct sysctl_oid, \
+ oid_children) : (struct sysctl_oid *)NULL)
#ifndef __rtems__
-#define SYSCTL_STATIC_CHILDREN(oid_name) \
- (&sysctl_##oid_name##_children)
+#define SYSCTL_STATIC_CHILDREN(oid_name) (&sysctl__##oid_name.oid_children)
#else /* __rtems__ */
-#define SYSCTL_STATIC_CHILDREN(oid_name) \
- (&_bsd_sysctl_##oid_name##_children)
+#define SYSCTL_STATIC_CHILDREN(oid_name) (&_bsd_sysctl__##oid_name.oid_children)
#endif /* __rtems__ */
-/* === Structs and macros related to context handling === */
+/* === Structs and macros related to context handling. === */
/* All dynamically created sysctls can be tracked in a context list. */
struct sysctl_ctx_entry {
@@ -237,52 +256,88 @@ struct sysctl_ctx_entry {
TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#ifndef __rtems__
-#define SYSCTL_NODE_CHILDREN(parent, name) \
- sysctl_##parent##_##name##_children
+#define SYSCTL_NODE_CHILDREN(parent, name) \
+ sysctl__##parent##_##name.oid_children
#else /* __rtems__ */
-#define SYSCTL_NODE_CHILDREN(parent, name) \
- _bsd_sysctl_##parent##_##name##_children
+#define SYSCTL_NODE_CHILDREN(parent, name) \
+ _bsd_sysctl__##parent##_##name.oid_children
#endif /* __rtems__ */
#ifndef NO_SYSCTL_DESCR
-#define __DESCR(d) d
+#define __DESCR(d) d
#else
-#define __DESCR(d) ""
+#define __DESCR(d) ""
#endif
-/* This constructs a "raw" MIB oid. */
+/* This macro is only for internal use */
+#define SYSCTL_OID_RAW(id, parent_child_head, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ struct sysctl_oid id = { \
+ .oid_parent = (parent_child_head), \
+ .oid_children = SLIST_HEAD_INITIALIZER(&id.oid_children), \
+ .oid_number = (nbr), \
+ .oid_kind = (kind), \
+ .oid_arg1 = (a1), \
+ .oid_arg2 = (a2), \
+ .oid_name = (name), \
+ .oid_handler = (handler), \
+ .oid_fmt = (fmt), \
+ .oid_descr = __DESCR(descr) \
+ }; \
+ DATA_SET(sysctl_set, id)
+
+/* This constructs a static "raw" MIB oid. */
#ifndef __rtems__
-#define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
- static struct sysctl_oid sysctl__##parent##_##name = { \
- &sysctl_##parent##_children, { NULL }, nbr, kind, \
- a1, a2, #name, handler, fmt, 0, 0, __DESCR(descr) }; \
- DATA_SET(sysctl_set, sysctl__##parent##_##name)
+#define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ static SYSCTL_OID_RAW(sysctl__##parent##_##name, \
+ SYSCTL_CHILDREN(&sysctl__##parent), \
+ nbr, #name, kind, a1, a2, handler, fmt, descr)
#else /* __rtems__ */
-#define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
- static struct sysctl_oid sysctl__##parent##_##name = { \
- &_bsd_sysctl_##parent##_children, { NULL }, nbr, kind, \
- a1, a2, #name, handler, fmt, 0, 0, __DESCR(descr) }; \
- DATA_SET(sysctl_set, sysctl__##parent##_##name)
+#define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ static SYSCTL_OID_RAW(_bsd_sysctl__##parent##_##name, \
+ SYSCTL_CHILDREN(&_bsd_sysctl__##parent), \
+ nbr, #name, kind, a1, a2, handler, fmt, descr)
#endif /* __rtems__ */
-#define SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+/* This constructs a global "raw" MIB oid. */
+#ifndef __rtems__
+#define SYSCTL_OID_GLOBAL(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ SYSCTL_OID_RAW(sysctl__##parent##_##name, \
+ SYSCTL_CHILDREN(&sysctl__##parent), \
+ nbr, #name, kind, a1, a2, handler, fmt, descr)
+#else /* __rtems__ */
+#define SYSCTL_OID_GLOBAL(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ SYSCTL_OID_RAW(_bsd_sysctl__##parent##_##name, \
+ SYSCTL_CHILDREN(&_bsd_sysctl__##parent), \
+ nbr, #name, kind, a1, a2, handler, fmt, descr)
+#endif /* __rtems__ */
+
+#define SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, __DESCR(descr))
/* This constructs a root node from which other nodes can hang. */
-#define SYSCTL_ROOT_NODE(nbr, name, access, handler, descr) \
- SYSCTL_NODE(, nbr, name, access, handler, descr)
+#ifndef __rtems__
+#define SYSCTL_ROOT_NODE(nbr, name, access, handler, descr) \
+ SYSCTL_OID_RAW(sysctl___##name, &sysctl__children, \
+ nbr, #name, CTLTYPE_NODE|(access), NULL, 0, \
+ handler, "N", descr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE)
+#else /* __rtems__ */
+#define SYSCTL_ROOT_NODE(nbr, name, access, handler, descr) \
+ SYSCTL_OID_RAW(_bsd_sysctl___##name, &sysctl__children, \
+ nbr, #name, CTLTYPE_NODE|(access), NULL, 0, \
+ handler, "N", descr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE)
+#endif /* __rtems__ */
/* This constructs a node from which other oids can hang. */
-#define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \
- struct sysctl_oid_list SYSCTL_NODE_CHILDREN(parent, name); \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|(access), \
- (void*)&SYSCTL_NODE_CHILDREN(parent, name), 0, handler, "N", descr); \
+#define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \
+ SYSCTL_OID_GLOBAL(parent, nbr, name, CTLTYPE_NODE|(access), \
+ NULL, 0, handler, "N", descr); \
CTASSERT(((access) & CTLTYPE) == 0 || \
((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE)
-#define SYSCTL_ADD_ROOT_NODE(ctx, nbr, name, access, handler, descr) \
- SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(), nbr, name, access, handler, descr)
-
#define SYSCTL_ADD_NODE(ctx, parent, nbr, name, access, handler, descr) \
({ \
CTASSERT(((access) & CTLTYPE) == 0 || \
@@ -291,6 +346,15 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
NULL, 0, handler, "N", __DESCR(descr)); \
})
+#define SYSCTL_ADD_ROOT_NODE(ctx, nbr, name, access, handler, descr) \
+({ \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE); \
+ sysctl_add_oid(ctx, &sysctl__children, nbr, name, \
+ CTLTYPE_NODE|(access), \
+ NULL, 0, handler, "N", __DESCR(descr)); \
+})
+
/* Oid for a string. len can be 0 to indicate '\0' termination. */
#define SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access), \
@@ -307,6 +371,184 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
__arg, len, sysctl_handle_string, "A", __DESCR(descr)); \
})
+/* Oid for a bool. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_BOOL_PTR ((bool *)NULL)
+#define SYSCTL_BOOL(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_U8 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_bool, "CU", descr); \
+ CTASSERT(((access) & CTLTYPE) == 0 && \
+ sizeof(bool) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_BOOL(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ bool *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_U8 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_bool, "CU", __DESCR(descr)); \
+})
+
+/* Oid for a signed 8-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_S8_PTR ((int8_t *)NULL)
+#define SYSCTL_S8(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_S8 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_8, "C", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S8) && \
+ sizeof(int8_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_S8(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ int8_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S8); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_S8 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_8, "C", __DESCR(descr)); \
+})
+
+/* Oid for an unsigned 8-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_U8_PTR ((uint8_t *)NULL)
+#define SYSCTL_U8(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_U8 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_8, "CU", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U8) && \
+ sizeof(uint8_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_U8(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ uint8_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U8); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_U8 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_8, "CU", __DESCR(descr)); \
+})
+
+/* Oid for a signed 16-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_S16_PTR ((int16_t *)NULL)
+#define SYSCTL_S16(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_S16 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_16, "S", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S16) && \
+ sizeof(int16_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_S16(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ int16_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S16); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_S16 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_16, "S", __DESCR(descr)); \
+})
+
+/* Oid for an unsigned 16-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_U16_PTR ((uint16_t *)NULL)
+#define SYSCTL_U16(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_U16 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_16, "SU", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U16) && \
+ sizeof(uint16_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_U16(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ uint16_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U16); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_U16 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_16, "SU", __DESCR(descr)); \
+})
+
+/* Oid for a signed 32-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_S32_PTR ((int32_t *)NULL)
+#define SYSCTL_S32(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_S32 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_32, "I", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S32) && \
+ sizeof(int32_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_S32(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ int32_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S32); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_S32 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_32, "I", __DESCR(descr)); \
+})
+
+/* Oid for an unsigned 32-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_U32_PTR ((uint32_t *)NULL)
+#define SYSCTL_U32(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_U32 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_32, "IU", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U32) && \
+ sizeof(uint32_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_U32(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ uint32_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U32); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_U32 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_32, "IU", __DESCR(descr)); \
+})
+
+/* Oid for a signed 64-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_S64_PTR ((int64_t *)NULL)
+#define SYSCTL_S64(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_S64 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_64, "Q", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64) && \
+ sizeof(int64_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_S64(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ int64_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_S64 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_64, "Q", __DESCR(descr)); \
+})
+
+/* Oid for an unsigned 64-bit int. If ptr is NULL, val is returned. */
+#define SYSCTL_NULL_U64_PTR ((uint64_t *)NULL)
+#define SYSCTL_U64(parent, nbr, name, access, ptr, val, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
+ ptr, val, sysctl_handle_64, "QU", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64) && \
+ sizeof(uint64_t) == sizeof(*(ptr)))
+
+#define SYSCTL_ADD_U64(ctx, parent, nbr, name, access, ptr, val, descr) \
+({ \
+ uint64_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
+ __ptr, val, sysctl_handle_64, "QU", __DESCR(descr)); \
+})
+
/* Oid for an int. If ptr is SYSCTL_NULL_INT_PTR, val is returned. */
#define SYSCTL_NULL_INT_PTR ((int *)NULL)
#define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \
@@ -426,20 +668,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
__ptr, 0, sysctl_handle_64, "QU", __DESCR(descr)); \
})
-/* Oid for a 64-bit unsigned counter(9). The pointer must be non NULL. */
-#define SYSCTL_COUNTER_U64(parent, nbr, name, access, ptr, descr) \
- SYSCTL_ASSERT_TYPE(UINT64, ptr, parent, name); \
- SYSCTL_OID(parent, nbr, name, \
- CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
- ptr, 0, sysctl_handle_counter_u64, "QU", descr)
-
-#define SYSCTL_ADD_COUNTER_U64(ctx, parent, nbr, name, access, ptr, descr)\
- sysctl_add_oid(ctx, parent, nbr, name, \
- CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
- SYSCTL_ADD_ASSERT_TYPE(UINT64, ptr), 0, \
- sysctl_handle_counter_u64, "QU", __DESCR(descr))
-
-/* Oid for a CPU dependant variable */
+/* Oid for a CPU dependent variable */
#define SYSCTL_ADD_UAUTO(ctx, parent, nbr, name, access, ptr, descr) \
({ \
struct sysctl_oid *__ret; \
@@ -460,6 +689,48 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
__ret; \
})
+/* Oid for a 64-bit unsigned counter(9). The pointer must be non NULL. */
+#define SYSCTL_COUNTER_U64(parent, nbr, name, access, ptr, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
+ (ptr), 0, sysctl_handle_counter_u64, "QU", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64) && \
+ sizeof(counter_u64_t) == sizeof(*(ptr)) && \
+ sizeof(uint64_t) == sizeof(**(ptr)))
+
+#define SYSCTL_ADD_COUNTER_U64(ctx, parent, nbr, name, access, ptr, descr) \
+({ \
+ counter_u64_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \
+ __ptr, 0, sysctl_handle_counter_u64, "QU", __DESCR(descr)); \
+})
+
+/* Oid for an array of counter(9)s. The pointer and length must be non zero. */
+#define SYSCTL_COUNTER_U64_ARRAY(parent, nbr, name, access, ptr, len, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access), \
+ (ptr), (len), sysctl_handle_counter_u64_array, "S", descr); \
+ CTASSERT((((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE) && \
+ sizeof(counter_u64_t) == sizeof(*(ptr)) && \
+ sizeof(uint64_t) == sizeof(**(ptr)))
+
+#define SYSCTL_ADD_COUNTER_U64_ARRAY(ctx, parent, nbr, name, access, \
+ ptr, len, descr) \
+({ \
+ counter_u64_t *__ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | (access), \
+ __ptr, len, sysctl_handle_counter_u64_array, "S", \
+ __DESCR(descr)); \
+})
+
/* Oid for an opaque object. Specified by a pointer and a length. */
#define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \
@@ -483,7 +754,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
CTASSERT(((access) & CTLTYPE) == 0 || \
((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE)
-#define SYSCTL_ADD_STRUCT(ctx, parent, nbr, name, access, ptr, type, descr) \
+#define SYSCTL_ADD_STRUCT(ctx, parent, nbr, name, access, ptr, type, descr) \
({ \
CTASSERT(((access) & CTLTYPE) == 0 || \
((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE); \
@@ -493,20 +764,56 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
})
/* Oid for a procedure. Specified by a pointer and an arg. */
-#define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
+#define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
SYSCTL_OID(parent, nbr, name, (access), \
ptr, arg, handler, fmt, descr); \
CTASSERT(((access) & CTLTYPE) != 0)
-#define SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
+#define SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
({ \
CTASSERT(((access) & CTLTYPE) != 0); \
sysctl_add_oid(ctx, parent, nbr, name, (access), \
(ptr), (arg), (handler), (fmt), __DESCR(descr)); \
})
+/* Oid to handle limits on uma(9) zone specified by pointer. */
+#define SYSCTL_UMA_MAX(parent, nbr, name, access, ptr, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | (access), \
+ (ptr), 0, sysctl_handle_uma_zone_max, "I", descr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT)
+
+#define SYSCTL_ADD_UMA_MAX(ctx, parent, nbr, name, access, ptr, descr) \
+({ \
+ uma_zone_t __ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | (access), \
+ __ptr, 0, sysctl_handle_uma_zone_max, "I", __DESCR(descr)); \
+})
+
+/* Oid to obtain current use of uma(9) zone specified by pointer. */
+#define SYSCTL_UMA_CUR(parent, nbr, name, access, ptr, descr) \
+ SYSCTL_OID(parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \
+ (ptr), 0, sysctl_handle_uma_zone_cur, "I", descr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT)
+
+#define SYSCTL_ADD_UMA_CUR(ctx, parent, nbr, name, access, ptr, descr) \
+({ \
+ uma_zone_t __ptr = (ptr); \
+ CTASSERT(((access) & CTLTYPE) == 0 || \
+ ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT); \
+ sysctl_add_oid(ctx, parent, nbr, name, \
+ CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \
+ __ptr, 0, sysctl_handle_uma_zone_cur, "I", __DESCR(descr)); \
+})
+
/*
- * A macro to generate a read-only sysctl to indicate the presense of optional
+ * A macro to generate a read-only sysctl to indicate the presence of optional
* kernel features.
*/
#define FEATURE(name, desc) \
@@ -528,33 +835,19 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define CTL_MACHDEP 7 /* machine dependent */
#define CTL_USER 8 /* user-level */
#define CTL_P1003_1B 9 /* POSIX 1003.1B */
-#define CTL_MAXID 10 /* number of valid top-level ids */
-
-#define CTL_NAMES { \
- { 0, 0 }, \
- { "kern", CTLTYPE_NODE }, \
- { "vm", CTLTYPE_NODE }, \
- { "vfs", CTLTYPE_NODE }, \
- { "net", CTLTYPE_NODE }, \
- { "debug", CTLTYPE_NODE }, \
- { "hw", CTLTYPE_NODE }, \
- { "machdep", CTLTYPE_NODE }, \
- { "user", CTLTYPE_NODE }, \
- { "p1003_1b", CTLTYPE_NODE }, \
-}
/*
* CTL_KERN identifiers
*/
-#define KERN_OSTYPE 1 /* string: system version */
-#define KERN_OSRELEASE 2 /* string: system release */
-#define KERN_OSREV 3 /* int: system revision */
-#define KERN_VERSION 4 /* string: compile time info */
-#define KERN_MAXVNODES 5 /* int: max vnodes */
-#define KERN_MAXPROC 6 /* int: max processes */
-#define KERN_MAXFILES 7 /* int: max open files */
-#define KERN_ARGMAX 8 /* int: max arguments to exec */
-#define KERN_SECURELVL 9 /* int: system security level */
+#define KERN_OSTYPE 1 /* string: system version */
+#define KERN_OSRELEASE 2 /* string: system release */
+#define KERN_OSREV 3 /* int: system revision */
+#define KERN_VERSION 4 /* string: compile time info */
+#define KERN_MAXVNODES 5 /* int: max vnodes */
+#define KERN_MAXPROC 6 /* int: max processes */
+#define KERN_MAXFILES 7 /* int: max open files */
+#define KERN_ARGMAX 8 /* int: max arguments to exec */
+#define KERN_SECURELVL 9 /* int: system security level */
#define KERN_HOSTNAME 10 /* string: hostname */
#define KERN_HOSTID 11 /* int: host identifier */
#define KERN_CLOCKRATE 12 /* struct: struct clockrate */
@@ -567,14 +860,14 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define KERN_JOB_CONTROL 19 /* int: is job control available */
#define KERN_SAVED_IDS 20 /* int: saved set-user/group-ID */
#define KERN_BOOTTIME 21 /* struct: time kernel was booted */
-#define KERN_NISDOMAINNAME 22 /* string: YP domain name */
-#define KERN_UPDATEINTERVAL 23 /* int: update process sleep time */
-#define KERN_OSRELDATE 24 /* int: kernel release date */
-#define KERN_NTP_PLL 25 /* node: NTP PLL control */
+#define KERN_NISDOMAINNAME 22 /* string: YP domain name */
+#define KERN_UPDATEINTERVAL 23 /* int: update process sleep time */
+#define KERN_OSRELDATE 24 /* int: kernel release date */
+#define KERN_NTP_PLL 25 /* node: NTP PLL control */
#define KERN_BOOTFILE 26 /* string: name of booted kernel */
#define KERN_MAXFILESPERPROC 27 /* int: max open files per proc */
-#define KERN_MAXPROCPERUID 28 /* int: max processes per uid */
-#define KERN_DUMPDEV 29 /* struct cdev *: device to dump on */
+#define KERN_MAXPROCPERUID 28 /* int: max processes per uid */
+#define KERN_DUMPDEV 29 /* struct cdev *: device to dump on */
#define KERN_IPC 30 /* node: anything related to IPC */
#define KERN_DUMMY 31 /* unused */
#define KERN_PS_STRINGS 32 /* int: address of PS_STRINGS */
@@ -583,59 +876,10 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define KERN_IOV_MAX 35 /* int: value of UIO_MAXIOV */
#define KERN_HOSTUUID 36 /* string: host UUID identifier */
#define KERN_ARND 37 /* int: from arc4rand() */
-#define KERN_MAXID 38 /* number of valid kern ids */
-
-#define CTL_KERN_NAMES { \
- { 0, 0 }, \
- { "ostype", CTLTYPE_STRING }, \
- { "osrelease", CTLTYPE_STRING }, \
- { "osrevision", CTLTYPE_INT }, \
- { "version", CTLTYPE_STRING }, \
- { "maxvnodes", CTLTYPE_INT }, \
- { "maxproc", CTLTYPE_INT }, \
- { "maxfiles", CTLTYPE_INT }, \
- { "argmax", CTLTYPE_INT }, \
- { "securelevel", CTLTYPE_INT }, \
- { "hostname", CTLTYPE_STRING }, \
- { "hostid", CTLTYPE_UINT }, \
- { "clockrate", CTLTYPE_STRUCT }, \
- { "vnode", CTLTYPE_STRUCT }, \
- { "proc", CTLTYPE_STRUCT }, \
- { "file", CTLTYPE_STRUCT }, \
- { "profiling", CTLTYPE_NODE }, \
- { "posix1version", CTLTYPE_INT }, \
- { "ngroups", CTLTYPE_INT }, \
- { "job_control", CTLTYPE_INT }, \
- { "saved_ids", CTLTYPE_INT }, \
- { "boottime", CTLTYPE_STRUCT }, \
- { "nisdomainname", CTLTYPE_STRING }, \
- { "update", CTLTYPE_INT }, \
- { "osreldate", CTLTYPE_INT }, \
- { "ntp_pll", CTLTYPE_NODE }, \
- { "bootfile", CTLTYPE_STRING }, \
- { "maxfilesperproc", CTLTYPE_INT }, \
- { "maxprocperuid", CTLTYPE_INT }, \
- { "ipc", CTLTYPE_NODE }, \
- { "dummy", CTLTYPE_INT }, \
- { "ps_strings", CTLTYPE_INT }, \
- { "usrstack", CTLTYPE_INT }, \
- { "logsigexit", CTLTYPE_INT }, \
- { "iov_max", CTLTYPE_INT }, \
- { "hostuuid", CTLTYPE_STRING }, \
- { "arc4rand", CTLTYPE_OPAQUE }, \
-}
-
-/*
- * CTL_VFS identifiers
- */
-#define CTL_VFS_NAMES { \
- { "vfsconf", CTLTYPE_STRUCT }, \
-}
-
/*
* KERN_PROC subtypes
*/
-#define KERN_PROC_ALL 0 /* everything */
+#define KERN_PROC_ALL 0 /* everything */
#define KERN_PROC_PID 1 /* by process id */
#define KERN_PROC_PGRP 2 /* by process group id */
#define KERN_PROC_SESSION 3 /* by session of pid */
@@ -666,11 +910,13 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define KERN_PROC_UMASK 39 /* process umask */
#define KERN_PROC_OSREL 40 /* osreldate for process binary */
#define KERN_PROC_SIGTRAMP 41 /* signal trampoline location */
+#define KERN_PROC_CWD 42 /* process current working directory */
+#define KERN_PROC_NFDS 43 /* number of open file descriptors */
/*
* KERN_IPC identifiers
*/
-#define KIPC_MAXSOCKBUF 1 /* int: max size of a socket buffer */
+#define KIPC_MAXSOCKBUF 1 /* int: max size of a socket buffer */
#define KIPC_SOCKBUF_WASTE 2 /* int: wastage factor in sockbuf */
#define KIPC_SOMAXCONN 3 /* int: max length of connection q */
#define KIPC_MAX_LINKHDR 4 /* int: max length of link header */
@@ -690,26 +936,9 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define HW_PAGESIZE 7 /* int: software page size */
#define HW_DISKNAMES 8 /* strings: disk drive names */
#define HW_DISKSTATS 9 /* struct: diskstats[] */
-#define HW_FLOATINGPT 10 /* int: has HW floating point? */
-#define HW_MACHINE_ARCH 11 /* string: machine architecture */
+#define HW_FLOATINGPT 10 /* int: has HW floating point? */
+#define HW_MACHINE_ARCH 11 /* string: machine architecture */
#define HW_REALMEM 12 /* int: 'real' memory */
-#define HW_MAXID 13 /* number of valid hw ids */
-
-#define CTL_HW_NAMES { \
- { 0, 0 }, \
- { "machine", CTLTYPE_STRING }, \
- { "model", CTLTYPE_STRING }, \
- { "ncpu", CTLTYPE_INT }, \
- { "byteorder", CTLTYPE_INT }, \
- { "physmem", CTLTYPE_ULONG }, \
- { "usermem", CTLTYPE_ULONG }, \
- { "pagesize", CTLTYPE_INT }, \
- { "disknames", CTLTYPE_STRUCT }, \
- { "diskstats", CTLTYPE_STRUCT }, \
- { "floatingpoint", CTLTYPE_INT }, \
- { "machine_arch", CTLTYPE_STRING }, \
- { "realmem", CTLTYPE_ULONG }, \
-}
/*
* CTL_USER definitions
@@ -734,88 +963,34 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define USER_POSIX2_UPE 18 /* int: POSIX2_UPE */
#define USER_STREAM_MAX 19 /* int: POSIX2_STREAM_MAX */
#define USER_TZNAME_MAX 20 /* int: POSIX2_TZNAME_MAX */
-#define USER_MAXID 21 /* number of valid user ids */
-
-#define CTL_USER_NAMES { \
- { 0, 0 }, \
- { "cs_path", CTLTYPE_STRING }, \
- { "bc_base_max", CTLTYPE_INT }, \
- { "bc_dim_max", CTLTYPE_INT }, \
- { "bc_scale_max", CTLTYPE_INT }, \
- { "bc_string_max", CTLTYPE_INT }, \
- { "coll_weights_max", CTLTYPE_INT }, \
- { "expr_nest_max", CTLTYPE_INT }, \
- { "line_max", CTLTYPE_INT }, \
- { "re_dup_max", CTLTYPE_INT }, \
- { "posix2_version", CTLTYPE_INT }, \
- { "posix2_c_bind", CTLTYPE_INT }, \
- { "posix2_c_dev", CTLTYPE_INT }, \
- { "posix2_char_term", CTLTYPE_INT }, \
- { "posix2_fort_dev", CTLTYPE_INT }, \
- { "posix2_fort_run", CTLTYPE_INT }, \
- { "posix2_localedef", CTLTYPE_INT }, \
- { "posix2_sw_dev", CTLTYPE_INT }, \
- { "posix2_upe", CTLTYPE_INT }, \
- { "stream_max", CTLTYPE_INT }, \
- { "tzname_max", CTLTYPE_INT }, \
-}
-
-#define CTL_P1003_1B_ASYNCHRONOUS_IO 1 /* boolean */
-#define CTL_P1003_1B_MAPPED_FILES 2 /* boolean */
-#define CTL_P1003_1B_MEMLOCK 3 /* boolean */
-#define CTL_P1003_1B_MEMLOCK_RANGE 4 /* boolean */
-#define CTL_P1003_1B_MEMORY_PROTECTION 5 /* boolean */
-#define CTL_P1003_1B_MESSAGE_PASSING 6 /* boolean */
-#define CTL_P1003_1B_PRIORITIZED_IO 7 /* boolean */
-#define CTL_P1003_1B_PRIORITY_SCHEDULING 8 /* boolean */
-#define CTL_P1003_1B_REALTIME_SIGNALS 9 /* boolean */
-#define CTL_P1003_1B_SEMAPHORES 10 /* boolean */
-#define CTL_P1003_1B_FSYNC 11 /* boolean */
-#define CTL_P1003_1B_SHARED_MEMORY_OBJECTS 12 /* boolean */
-#define CTL_P1003_1B_SYNCHRONIZED_IO 13 /* boolean */
-#define CTL_P1003_1B_TIMERS 14 /* boolean */
-#define CTL_P1003_1B_AIO_LISTIO_MAX 15 /* int */
-#define CTL_P1003_1B_AIO_MAX 16 /* int */
-#define CTL_P1003_1B_AIO_PRIO_DELTA_MAX 17 /* int */
-#define CTL_P1003_1B_DELAYTIMER_MAX 18 /* int */
-#define CTL_P1003_1B_MQ_OPEN_MAX 19 /* int */
-#define CTL_P1003_1B_PAGESIZE 20 /* int */
-#define CTL_P1003_1B_RTSIG_MAX 21 /* int */
-#define CTL_P1003_1B_SEM_NSEMS_MAX 22 /* int */
-#define CTL_P1003_1B_SEM_VALUE_MAX 23 /* int */
-#define CTL_P1003_1B_SIGQUEUE_MAX 24 /* int */
-#define CTL_P1003_1B_TIMER_MAX 25 /* int */
-
-#define CTL_P1003_1B_MAXID 26
-
-#define CTL_P1003_1B_NAMES { \
- { 0, 0 }, \
- { "asynchronous_io", CTLTYPE_INT }, \
- { "mapped_files", CTLTYPE_INT }, \
- { "memlock", CTLTYPE_INT }, \
- { "memlock_range", CTLTYPE_INT }, \
- { "memory_protection", CTLTYPE_INT }, \
- { "message_passing", CTLTYPE_INT }, \
- { "prioritized_io", CTLTYPE_INT }, \
- { "priority_scheduling", CTLTYPE_INT }, \
- { "realtime_signals", CTLTYPE_INT }, \
- { "semaphores", CTLTYPE_INT }, \
- { "fsync", CTLTYPE_INT }, \
- { "shared_memory_objects", CTLTYPE_INT }, \
- { "synchronized_io", CTLTYPE_INT }, \
- { "timers", CTLTYPE_INT }, \
- { "aio_listio_max", CTLTYPE_INT }, \
- { "aio_max", CTLTYPE_INT }, \
- { "aio_prio_delta_max", CTLTYPE_INT }, \
- { "delaytimer_max", CTLTYPE_INT }, \
- { "mq_open_max", CTLTYPE_INT }, \
- { "pagesize", CTLTYPE_INT }, \
- { "rtsig_max", CTLTYPE_INT }, \
- { "nsems_max", CTLTYPE_INT }, \
- { "sem_value_max", CTLTYPE_INT }, \
- { "sigqueue_max", CTLTYPE_INT }, \
- { "timer_max", CTLTYPE_INT }, \
-}
+
+#define CTL_P1003_1B_ASYNCHRONOUS_IO 1 /* boolean */
+#define CTL_P1003_1B_MAPPED_FILES 2 /* boolean */
+#define CTL_P1003_1B_MEMLOCK 3 /* boolean */
+#define CTL_P1003_1B_MEMLOCK_RANGE 4 /* boolean */
+#define CTL_P1003_1B_MEMORY_PROTECTION 5 /* boolean */
+#define CTL_P1003_1B_MESSAGE_PASSING 6 /* boolean */
+#define CTL_P1003_1B_PRIORITIZED_IO 7 /* boolean */
+#define CTL_P1003_1B_PRIORITY_SCHEDULING 8 /* boolean */
+#define CTL_P1003_1B_REALTIME_SIGNALS 9 /* boolean */
+#define CTL_P1003_1B_SEMAPHORES 10 /* boolean */
+#define CTL_P1003_1B_FSYNC 11 /* boolean */
+#define CTL_P1003_1B_SHARED_MEMORY_OBJECTS 12 /* boolean */
+#define CTL_P1003_1B_SYNCHRONIZED_IO 13 /* boolean */
+#define CTL_P1003_1B_TIMERS 14 /* boolean */
+#define CTL_P1003_1B_AIO_LISTIO_MAX 15 /* int */
+#define CTL_P1003_1B_AIO_MAX 16 /* int */
+#define CTL_P1003_1B_AIO_PRIO_DELTA_MAX 17 /* int */
+#define CTL_P1003_1B_DELAYTIMER_MAX 18 /* int */
+#define CTL_P1003_1B_MQ_OPEN_MAX 19 /* int */
+#define CTL_P1003_1B_PAGESIZE 20 /* int */
+#define CTL_P1003_1B_RTSIG_MAX 21 /* int */
+#define CTL_P1003_1B_SEM_NSEMS_MAX 22 /* int */
+#define CTL_P1003_1B_SEM_VALUE_MAX 23 /* int */
+#define CTL_P1003_1B_SIGQUEUE_MAX 24 /* int */
+#define CTL_P1003_1B_TIMER_MAX 25 /* int */
+
+#define CTL_P1003_1B_MAXID 26
#ifdef _KERNEL
@@ -859,49 +1034,54 @@ extern char kern_ident[];
/* Dynamic oid handling */
struct sysctl_oid *sysctl_add_oid(struct sysctl_ctx_list *clist,
- struct sysctl_oid_list *parent, int nbr, const char *name,
- int kind, void *arg1, intptr_t arg2,
- int (*handler) (SYSCTL_HANDLER_ARGS),
- const char *fmt, const char *descr);
+ struct sysctl_oid_list *parent, int nbr, const char *name, int kind,
+ void *arg1, intmax_t arg2, int (*handler)(SYSCTL_HANDLER_ARGS),
+ const char *fmt, const char *descr);
int sysctl_remove_name(struct sysctl_oid *parent, const char *name, int del,
- int recurse);
+ int recurse);
void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name);
int sysctl_move_oid(struct sysctl_oid *oidp,
- struct sysctl_oid_list *parent);
+ struct sysctl_oid_list *parent);
int sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse);
int sysctl_ctx_init(struct sysctl_ctx_list *clist);
int sysctl_ctx_free(struct sysctl_ctx_list *clist);
struct sysctl_ctx_entry *sysctl_ctx_entry_add(struct sysctl_ctx_list *clist,
- struct sysctl_oid *oidp);
+ struct sysctl_oid *oidp);
struct sysctl_ctx_entry *sysctl_ctx_entry_find(struct sysctl_ctx_list *clist,
- struct sysctl_oid *oidp);
+ struct sysctl_oid *oidp);
int sysctl_ctx_entry_del(struct sysctl_ctx_list *clist,
- struct sysctl_oid *oidp);
+ struct sysctl_oid *oidp);
int kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
#ifndef __rtems__
- size_t *oldlenp, void *new, size_t newlen,
+ size_t *oldlenp, void *new, size_t newlen, size_t *retval,
#else /* __rtems__ */
- size_t *oldlenp, const void *newp, size_t newlen,
+ size_t *oldlenp, const void *newp, size_t newlen, size_t *retval,
#endif /* __rtems__ */
- size_t *retval, int flags);
+ int flags);
+int kernel_sysctlbyname(struct thread *td, char *name, void *old,
#ifndef __rtems__
-int kernel_sysctlbyname(struct thread *td, char *name,
- void *old, size_t *oldlenp, void *new, size_t newlen,
- size_t *retval, int flags);
+ size_t *oldlenp, void *new, size_t newlen, size_t *retval,
+#else /* __rtems__ */
+ size_t *oldlenp, const void *newp, size_t newlen, size_t *retval,
+#endif /* __rtems__ */
+ int flags);
int userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
- size_t *oldlenp, int inkernel, void *new, size_t newlen,
- size_t *retval, int flags);
+#ifndef __rtems__
+ size_t *oldlenp, int inkernel, void *new, size_t newlen,
+#else /* __rtems__ */
+ size_t *oldlenp, int inkernel, const void *newp, size_t newlen,
#endif /* __rtems__ */
+ size_t *retval, int flags);
int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
- int *nindx, struct sysctl_req *req);
-void sysctl_lock(void);
-void sysctl_unlock(void);
+ int *nindx, struct sysctl_req *req);
+void sysctl_wlock(void);
+void sysctl_wunlock(void);
int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len);
struct sbuf;
-struct sbuf *sbuf_new_for_sysctl(struct sbuf *, char *, int,
- struct sysctl_req *);
+struct sbuf *sbuf_new_for_sysctl(struct sbuf *, char *, int,
+ struct sysctl_req *);
#else /* !_KERNEL */
#include <sys/cdefs.h>
diff --git a/freebsd/sys/sys/syslog.h b/freebsd/sys/sys/syslog.h
index 6f128314..61bad21c 100644
--- a/freebsd/sys/sys/syslog.h
+++ b/freebsd/sys/sys/syslog.h
@@ -69,7 +69,7 @@ typedef struct _code {
int c_val;
} CODE;
-CODE prioritynames[] = {
+static const CODE prioritynames[] = {
{ "alert", LOG_ALERT, },
{ "crit", LOG_CRIT, },
{ "debug", LOG_DEBUG, },
@@ -122,7 +122,7 @@ CODE prioritynames[] = {
#define LOG_FAC(p) (((p) & LOG_FACMASK) >> 3)
#ifdef SYSLOG_NAMES
-CODE facilitynames[] = {
+static const CODE facilitynames[] = {
{ "auth", LOG_AUTH, },
{ "authpriv", LOG_AUTHPRIV, },
{ "console", LOG_CONSOLE, },
diff --git a/freebsd/sys/sys/sysproto.h b/freebsd/sys/sys/sysproto.h
index 479eeb4c..1ee7bb4e 100644
--- a/freebsd/sys/sys/sysproto.h
+++ b/freebsd/sys/sys/sysproto.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/9/sys/kern/syscalls.master 276957 2015-01-11 07:10:43Z dchagin
+ * created from FreeBSD: head/sys/kern/syscalls.master 304395 2016-08-18 10:50:40Z gnn
*/
#ifndef _SYS_SYSPROTO_H_
@@ -12,6 +12,7 @@
#include <sys/signal.h>
#include <sys/acl.h>
#include <rtems/bsd/sys/cpuset.h>
+#include <sys/_ffcounter.h>
#include <sys/_semaphore.h>
#include <sys/ucontext.h>
#include <sys/wait.h>
@@ -166,15 +167,15 @@ struct getsockname_args {
#ifndef __rtems__
struct access_args {
char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
- char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+ char amode_l_[PADL_(int)]; int amode; char amode_r_[PADR_(int)];
};
struct chflags_args {
- char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
- char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+ char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
+ char flags_l_[PADL_(u_long)]; u_long flags; char flags_r_[PADR_(u_long)];
};
struct fchflags_args {
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+ char flags_l_[PADL_(u_long)]; u_long flags; char flags_r_[PADR_(u_long)];
};
struct sync_args {
register_t dummy;
@@ -189,7 +190,7 @@ struct getppid_args {
struct dup_args {
char fd_l_[PADL_(u_int)]; u_int fd; char fd_r_[PADR_(u_int)];
};
-struct pipe_args {
+struct freebsd10_pipe_args {
register_t dummy;
};
struct getegid_args {
@@ -283,7 +284,7 @@ struct munmap_args {
char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
};
struct mprotect_args {
- char addr_l_[PADL_(const void *)]; const void * addr; char addr_r_[PADR_(const void *)];
+ char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
};
@@ -566,20 +567,6 @@ struct shmsys_args {
char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)];
char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)];
};
-struct freebsd6_pread_args {
- char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char buf_l_[PADL_(void *)]; void * buf; char buf_r_[PADR_(void *)];
- char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
- char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
- char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
-};
-struct freebsd6_pwrite_args {
- char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)];
- char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
- char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
- char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
-};
#endif /* __rtems__ */
struct setfib_args {
char fibnum_l_[PADL_(int)]; int fibnum; char fibnum_r_[PADR_(int)];
@@ -631,31 +618,6 @@ struct getdirentries_args {
char count_l_[PADL_(u_int)]; u_int count; char count_r_[PADR_(u_int)];
char basep_l_[PADL_(long *)]; long * basep; char basep_r_[PADR_(long *)];
};
-struct freebsd6_mmap_args {
- char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)];
- char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
- char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
- char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
- char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
- char pos_l_[PADL_(off_t)]; off_t pos; char pos_r_[PADR_(off_t)];
-};
-struct freebsd6_lseek_args {
- char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
- char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
- char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)];
-};
-struct freebsd6_truncate_args {
- char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
- char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
- char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
-};
-struct freebsd6_ftruncate_args {
- char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
- char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
-};
struct sysctl_args {
char name_l_[PADL_(int *)]; int * name; char name_r_[PADR_(int *)];
char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)];
@@ -766,6 +728,15 @@ struct nanosleep_args {
char rqtp_l_[PADL_(const struct timespec *)]; const struct timespec * rqtp; char rqtp_r_[PADR_(const struct timespec *)];
char rmtp_l_[PADL_(struct timespec *)]; struct timespec * rmtp; char rmtp_r_[PADR_(struct timespec *)];
};
+struct ffclock_getcounter_args {
+ char ffcount_l_[PADL_(ffcounter *)]; ffcounter * ffcount; char ffcount_r_[PADR_(ffcounter *)];
+};
+struct ffclock_setestimate_args {
+ char cest_l_[PADL_(struct ffclock_estimate *)]; struct ffclock_estimate * cest; char cest_r_[PADR_(struct ffclock_estimate *)];
+};
+struct ffclock_getestimate_args {
+ char cest_l_[PADL_(struct ffclock_estimate *)]; struct ffclock_estimate * cest; char cest_r_[PADR_(struct ffclock_estimate *)];
+};
struct clock_getcpuclockid2_args {
char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)];
@@ -782,11 +753,6 @@ struct minherit_args {
struct rfork_args {
char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
};
-struct openbsd_poll_args {
- char fds_l_[PADL_(struct pollfd *)]; struct pollfd * fds; char fds_r_[PADR_(struct pollfd *)];
- char nfds_l_[PADL_(u_int)]; u_int nfds; char nfds_r_[PADR_(u_int)];
- char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)];
-};
struct issetugid_args {
register_t dummy;
};
@@ -912,18 +878,6 @@ struct aio_cancel_args {
struct aio_error_args {
char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
};
-struct oaio_read_args {
- char aiocbp_l_[PADL_(struct oaiocb *)]; struct oaiocb * aiocbp; char aiocbp_r_[PADR_(struct oaiocb *)];
-};
-struct oaio_write_args {
- char aiocbp_l_[PADL_(struct oaiocb *)]; struct oaiocb * aiocbp; char aiocbp_r_[PADR_(struct oaiocb *)];
-};
-struct olio_listio_args {
- char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
- char acb_list_l_[PADL_(struct oaiocb *const *)]; struct oaiocb *const * acb_list; char acb_list_r_[PADR_(struct oaiocb *const *)];
- char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)];
- char sig_l_[PADL_(struct osigevent *)]; struct osigevent * sig; char sig_r_[PADR_(struct osigevent *)];
-};
struct yield_args {
register_t dummy;
};
@@ -1124,7 +1078,7 @@ struct __setugid_args {
};
struct eaccess_args {
char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
- char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+ char amode_l_[PADL_(int)]; int amode; char amode_r_[PADR_(int)];
};
struct afs3_syscall_args {
char syscall_l_[PADL_(long)]; long syscall; char syscall_r_[PADR_(long)];
@@ -1170,7 +1124,7 @@ struct kenv_args {
};
struct lchflags_args {
char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
- char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+ char flags_l_[PADL_(u_long)]; u_long flags; char flags_r_[PADR_(u_long)];
};
struct uuidgen_args {
char store_l_[PADL_(struct uuid *)]; struct uuid * store; char store_r_[PADR_(struct uuid *)];
@@ -1336,12 +1290,6 @@ struct thr_kill_args {
char id_l_[PADL_(long)]; long id; char id_r_[PADR_(long)];
char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
};
-struct _umtx_lock_args {
- char umtx_l_[PADL_(struct umtx *)]; struct umtx * umtx; char umtx_r_[PADR_(struct umtx *)];
-};
-struct _umtx_unlock_args {
- char umtx_l_[PADL_(struct umtx *)]; struct umtx * umtx; char umtx_r_[PADR_(struct umtx *)];
-};
struct jail_attach_args {
char jid_l_[PADL_(int)]; int jid; char jid_r_[PADR_(int)];
};
@@ -1583,7 +1531,7 @@ struct cpuset_setaffinity_args {
struct faccessat_args {
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
- char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
+ char amode_l_[PADL_(int)]; int amode; char amode_r_[PADR_(int)];
char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
};
struct fchmodat_args {
@@ -1708,13 +1656,10 @@ struct lpathconf_args {
char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)];
};
-struct cap_new_args {
- char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char rights_l_[PADL_(u_int64_t)]; u_int64_t rights; char rights_r_[PADR_(u_int64_t)];
-};
-struct cap_getrights_args {
+struct __cap_rights_get_args {
+ char version_l_[PADL_(int)]; int version; char version_r_[PADR_(int)];
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
- char rightsp_l_[PADL_(u_int64_t *)]; u_int64_t * rightsp; char rightsp_r_[PADR_(u_int64_t *)];
+ char rightsp_l_[PADL_(cap_rights_t *)]; cap_rights_t * rightsp; char rightsp_r_[PADR_(cap_rights_t *)];
};
struct cap_enter_args {
register_t dummy;
@@ -1798,12 +1743,94 @@ struct wait6_args {
char wrusage_l_[PADL_(struct __wrusage *)]; struct __wrusage * wrusage; char wrusage_r_[PADR_(struct __wrusage *)];
char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)];
};
+struct cap_rights_limit_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char rightsp_l_[PADL_(cap_rights_t *)]; cap_rights_t * rightsp; char rightsp_r_[PADR_(cap_rights_t *)];
+};
+struct cap_ioctls_limit_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char cmds_l_[PADL_(const u_long *)]; const u_long * cmds; char cmds_r_[PADR_(const u_long *)];
+ char ncmds_l_[PADL_(size_t)]; size_t ncmds; char ncmds_r_[PADR_(size_t)];
+};
+struct cap_ioctls_get_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char cmds_l_[PADL_(u_long *)]; u_long * cmds; char cmds_r_[PADR_(u_long *)];
+ char maxcmds_l_[PADL_(size_t)]; size_t maxcmds; char maxcmds_r_[PADR_(size_t)];
+};
+struct cap_fcntls_limit_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char fcntlrights_l_[PADL_(uint32_t)]; uint32_t fcntlrights; char fcntlrights_r_[PADR_(uint32_t)];
+};
+struct cap_fcntls_get_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char fcntlrightsp_l_[PADL_(uint32_t *)]; uint32_t * fcntlrightsp; char fcntlrightsp_r_[PADR_(uint32_t *)];
+};
+struct bindat_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
+ char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)];
+ char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)];
+};
+struct connectat_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
+ char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)];
+ char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)];
+};
+struct chflagsat_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
+ char flags_l_[PADL_(u_long)]; u_long flags; char flags_r_[PADR_(u_long)];
+ char atflag_l_[PADL_(int)]; int atflag; char atflag_r_[PADR_(int)];
+};
+struct accept4_args {
+ char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
+ char name_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict name; char name_r_[PADR_(struct sockaddr *__restrict)];
+ char anamelen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict anamelen; char anamelen_r_[PADR_(__socklen_t *__restrict)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
+struct pipe2_args {
+ char fildes_l_[PADL_(int *)]; int * fildes; char fildes_r_[PADR_(int *)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
+struct aio_mlock_args {
+ char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
+};
struct procctl_args {
char idtype_l_[PADL_(idtype_t)]; idtype_t idtype; char idtype_r_[PADR_(idtype_t)];
char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
char com_l_[PADL_(int)]; int com; char com_r_[PADR_(int)];
char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
};
+struct ppoll_args {
+ char fds_l_[PADL_(struct pollfd *)]; struct pollfd * fds; char fds_r_[PADR_(struct pollfd *)];
+ char nfds_l_[PADL_(u_int)]; u_int nfds; char nfds_r_[PADR_(u_int)];
+ char ts_l_[PADL_(const struct timespec *)]; const struct timespec * ts; char ts_r_[PADR_(const struct timespec *)];
+ char set_l_[PADL_(const sigset_t *)]; const sigset_t * set; char set_r_[PADR_(const sigset_t *)];
+};
+struct futimens_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char times_l_[PADL_(struct timespec *)]; struct timespec * times; char times_r_[PADR_(struct timespec *)];
+};
+struct utimensat_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+ char times_l_[PADL_(struct timespec *)]; struct timespec * times; char times_r_[PADR_(struct timespec *)];
+ char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
+};
+struct numa_getaffinity_args {
+ char which_l_[PADL_(cpuwhich_t)]; cpuwhich_t which; char which_r_[PADR_(cpuwhich_t)];
+ char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
+ char policy_l_[PADL_(struct vm_domain_policy_entry *)]; struct vm_domain_policy_entry * policy; char policy_r_[PADR_(struct vm_domain_policy_entry *)];
+};
+struct numa_setaffinity_args {
+ char which_l_[PADL_(cpuwhich_t)]; cpuwhich_t which; char which_r_[PADR_(cpuwhich_t)];
+ char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
+ char policy_l_[PADL_(const struct vm_domain_policy_entry *)]; const struct vm_domain_policy_entry * policy; char policy_r_[PADR_(const struct vm_domain_policy_entry *)];
+};
+struct fdatasync_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
int nosys(struct thread *, struct nosys_args *);
void sys_sys_exit(struct thread *, struct sys_exit_args *);
int sys_fork(struct thread *, struct fork_args *);
@@ -1840,7 +1867,6 @@ int sys_sync(struct thread *, struct sync_args *);
int sys_kill(struct thread *, struct kill_args *);
int sys_getppid(struct thread *, struct getppid_args *);
int sys_dup(struct thread *, struct dup_args *);
-int sys_pipe(struct thread *, struct pipe_args *);
int sys_getegid(struct thread *, struct getegid_args *);
int sys_profil(struct thread *, struct profil_args *);
int sys_ktrace(struct thread *, struct ktrace_args *);
@@ -1916,8 +1942,6 @@ int sys_rtprio(struct thread *, struct rtprio_args *);
int sys_semsys(struct thread *, struct semsys_args *);
int sys_msgsys(struct thread *, struct msgsys_args *);
int sys_shmsys(struct thread *, struct shmsys_args *);
-int freebsd6_pread(struct thread *, struct freebsd6_pread_args *);
-int freebsd6_pwrite(struct thread *, struct freebsd6_pwrite_args *);
int sys_setfib(struct thread *, struct setfib_args *);
int sys_ntp_adjtime(struct thread *, struct ntp_adjtime_args *);
int sys_setgid(struct thread *, struct setgid_args *);
@@ -1931,10 +1955,6 @@ int sys_fpathconf(struct thread *, struct fpathconf_args *);
int sys_getrlimit(struct thread *, struct __getrlimit_args *);
int sys_setrlimit(struct thread *, struct __setrlimit_args *);
int sys_getdirentries(struct thread *, struct getdirentries_args *);
-int freebsd6_mmap(struct thread *, struct freebsd6_mmap_args *);
-int freebsd6_lseek(struct thread *, struct freebsd6_lseek_args *);
-int freebsd6_truncate(struct thread *, struct freebsd6_truncate_args *);
-int freebsd6_ftruncate(struct thread *, struct freebsd6_ftruncate_args *);
int sys___sysctl(struct thread *, struct sysctl_args *);
int sys_mlock(struct thread *, struct mlock_args *);
int sys_munlock(struct thread *, struct munlock_args *);
@@ -1959,11 +1979,13 @@ int sys_ktimer_settime(struct thread *, struct ktimer_settime_args *);
int sys_ktimer_gettime(struct thread *, struct ktimer_gettime_args *);
int sys_ktimer_getoverrun(struct thread *, struct ktimer_getoverrun_args *);
int sys_nanosleep(struct thread *, struct nanosleep_args *);
+int sys_ffclock_getcounter(struct thread *, struct ffclock_getcounter_args *);
+int sys_ffclock_setestimate(struct thread *, struct ffclock_setestimate_args *);
+int sys_ffclock_getestimate(struct thread *, struct ffclock_getestimate_args *);
int sys_clock_getcpuclockid2(struct thread *, struct clock_getcpuclockid2_args *);
int sys_ntp_gettime(struct thread *, struct ntp_gettime_args *);
int sys_minherit(struct thread *, struct minherit_args *);
int sys_rfork(struct thread *, struct rfork_args *);
-int sys_openbsd_poll(struct thread *, struct openbsd_poll_args *);
int sys_issetugid(struct thread *, struct issetugid_args *);
int sys_lchown(struct thread *, struct lchown_args *);
int sys_aio_read(struct thread *, struct aio_read_args *);
@@ -1996,9 +2018,6 @@ int sys_aio_return(struct thread *, struct aio_return_args *);
int sys_aio_suspend(struct thread *, struct aio_suspend_args *);
int sys_aio_cancel(struct thread *, struct aio_cancel_args *);
int sys_aio_error(struct thread *, struct aio_error_args *);
-int sys_oaio_read(struct thread *, struct oaio_read_args *);
-int sys_oaio_write(struct thread *, struct oaio_write_args *);
-int sys_olio_listio(struct thread *, struct olio_listio_args *);
int sys_yield(struct thread *, struct yield_args *);
int sys_mlockall(struct thread *, struct mlockall_args *);
int sys_munlockall(struct thread *, struct munlockall_args *);
@@ -2090,8 +2109,6 @@ int sys_thr_create(struct thread *, struct thr_create_args *);
int sys_thr_exit(struct thread *, struct thr_exit_args *);
int sys_thr_self(struct thread *, struct thr_self_args *);
int sys_thr_kill(struct thread *, struct thr_kill_args *);
-int sys__umtx_lock(struct thread *, struct _umtx_lock_args *);
-int sys__umtx_unlock(struct thread *, struct _umtx_unlock_args *);
int sys_jail_attach(struct thread *, struct jail_attach_args *);
int sys_extattr_list_fd(struct thread *, struct extattr_list_fd_args *);
int sys_extattr_list_file(struct thread *, struct extattr_list_file_args *);
@@ -2165,8 +2182,7 @@ int sys___semctl(struct thread *, struct __semctl_args *);
int sys_msgctl(struct thread *, struct msgctl_args *);
int sys_shmctl(struct thread *, struct shmctl_args *);
int sys_lpathconf(struct thread *, struct lpathconf_args *);
-int sys_cap_new(struct thread *, struct cap_new_args *);
-int sys_cap_getrights(struct thread *, struct cap_getrights_args *);
+int sys___cap_rights_get(struct thread *, struct __cap_rights_get_args *);
int sys_cap_enter(struct thread *, struct cap_enter_args *);
int sys_cap_getmode(struct thread *, struct cap_getmode_args *);
int sys_pdfork(struct thread *, struct pdfork_args *);
@@ -2183,7 +2199,24 @@ int sys_rctl_remove_rule(struct thread *, struct rctl_remove_rule_args *);
int sys_posix_fallocate(struct thread *, struct posix_fallocate_args *);
int sys_posix_fadvise(struct thread *, struct posix_fadvise_args *);
int sys_wait6(struct thread *, struct wait6_args *);
+int sys_cap_rights_limit(struct thread *, struct cap_rights_limit_args *);
+int sys_cap_ioctls_limit(struct thread *, struct cap_ioctls_limit_args *);
+int sys_cap_ioctls_get(struct thread *, struct cap_ioctls_get_args *);
+int sys_cap_fcntls_limit(struct thread *, struct cap_fcntls_limit_args *);
+int sys_cap_fcntls_get(struct thread *, struct cap_fcntls_get_args *);
+int sys_bindat(struct thread *, struct bindat_args *);
+int sys_connectat(struct thread *, struct connectat_args *);
+int sys_chflagsat(struct thread *, struct chflagsat_args *);
+int sys_accept4(struct thread *, struct accept4_args *);
+int sys_pipe2(struct thread *, struct pipe2_args *);
+int sys_aio_mlock(struct thread *, struct aio_mlock_args *);
int sys_procctl(struct thread *, struct procctl_args *);
+int sys_ppoll(struct thread *, struct ppoll_args *);
+int sys_futimens(struct thread *, struct futimens_args *);
+int sys_utimensat(struct thread *, struct utimensat_args *);
+int sys_numa_getaffinity(struct thread *, struct numa_getaffinity_args *);
+int sys_numa_setaffinity(struct thread *, struct numa_setaffinity_args *);
+int sys_fdatasync(struct thread *, struct fdatasync_args *);
#ifdef COMPAT_43
@@ -2420,6 +2453,66 @@ int freebsd4_sigreturn(struct thread *, struct freebsd4_sigreturn_args *);
#ifdef COMPAT_FREEBSD6
+struct freebsd6_pread_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char buf_l_[PADL_(void *)]; void * buf; char buf_r_[PADR_(void *)];
+ char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
+ char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
+ char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
+};
+struct freebsd6_pwrite_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)];
+ char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
+ char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
+ char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
+};
+struct freebsd6_mmap_args {
+ char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)];
+ char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
+ char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
+ char pos_l_[PADL_(off_t)]; off_t pos; char pos_r_[PADR_(off_t)];
+};
+struct freebsd6_lseek_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
+ char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
+ char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)];
+};
+struct freebsd6_truncate_args {
+ char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+ char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
+ char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
+};
+struct freebsd6_ftruncate_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
+ char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
+};
+struct freebsd6_aio_read_args {
+ char aiocbp_l_[PADL_(struct oaiocb *)]; struct oaiocb * aiocbp; char aiocbp_r_[PADR_(struct oaiocb *)];
+};
+struct freebsd6_aio_write_args {
+ char aiocbp_l_[PADL_(struct oaiocb *)]; struct oaiocb * aiocbp; char aiocbp_r_[PADR_(struct oaiocb *)];
+};
+struct freebsd6_lio_listio_args {
+ char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
+ char acb_list_l_[PADL_(struct oaiocb *const *)]; struct oaiocb *const * acb_list; char acb_list_r_[PADR_(struct oaiocb *const *)];
+ char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)];
+ char sig_l_[PADL_(struct osigevent *)]; struct osigevent * sig; char sig_r_[PADR_(struct osigevent *)];
+};
+int freebsd6_pread(struct thread *, struct freebsd6_pread_args *);
+int freebsd6_pwrite(struct thread *, struct freebsd6_pwrite_args *);
+int freebsd6_mmap(struct thread *, struct freebsd6_mmap_args *);
+int freebsd6_lseek(struct thread *, struct freebsd6_lseek_args *);
+int freebsd6_truncate(struct thread *, struct freebsd6_truncate_args *);
+int freebsd6_ftruncate(struct thread *, struct freebsd6_ftruncate_args *);
+int freebsd6_aio_read(struct thread *, struct freebsd6_aio_read_args *);
+int freebsd6_aio_write(struct thread *, struct freebsd6_aio_write_args *);
+int freebsd6_lio_listio(struct thread *, struct freebsd6_lio_listio_args *);
#endif /* COMPAT_FREEBSD6 */
@@ -2448,11 +2541,18 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#endif /* COMPAT_FREEBSD7 */
+
+#ifdef COMPAT_FREEBSD10
+
+int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *);
+
+#endif /* COMPAT_FREEBSD10 */
+
#define SYS_AUE_syscall AUE_NULL
#define SYS_AUE_exit AUE_EXIT
#define SYS_AUE_fork AUE_FORK
-#define SYS_AUE_read AUE_NULL
-#define SYS_AUE_write AUE_NULL
+#define SYS_AUE_read AUE_READ
+#define SYS_AUE_write AUE_WRITE
#define SYS_AUE_open AUE_OPEN_RWTC
#define SYS_AUE_close AUE_CLOSE
#define SYS_AUE_wait4 AUE_WAIT4
@@ -2489,7 +2589,7 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_getppid AUE_GETPPID
#define SYS_AUE_olstat AUE_LSTAT
#define SYS_AUE_dup AUE_DUP
-#define SYS_AUE_pipe AUE_PIPE
+#define SYS_AUE_freebsd10_pipe AUE_PIPE
#define SYS_AUE_getegid AUE_GETEGID
#define SYS_AUE_profil AUE_PROFILE
#define SYS_AUE_ktrace AUE_KTRACE
@@ -2649,11 +2749,13 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_ktimer_gettime AUE_NULL
#define SYS_AUE_ktimer_getoverrun AUE_NULL
#define SYS_AUE_nanosleep AUE_NULL
+#define SYS_AUE_ffclock_getcounter AUE_NULL
+#define SYS_AUE_ffclock_setestimate AUE_NULL
+#define SYS_AUE_ffclock_getestimate AUE_NULL
#define SYS_AUE_clock_getcpuclockid2 AUE_NULL
#define SYS_AUE_ntp_gettime AUE_NULL
#define SYS_AUE_minherit AUE_MINHERIT
#define SYS_AUE_rfork AUE_RFORK
-#define SYS_AUE_openbsd_poll AUE_POLL
#define SYS_AUE_issetugid AUE_ISSETUGID
#define SYS_AUE_lchown AUE_LCHOWN
#define SYS_AUE_aio_read AUE_NULL
@@ -2687,9 +2789,9 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_aio_suspend AUE_NULL
#define SYS_AUE_aio_cancel AUE_NULL
#define SYS_AUE_aio_error AUE_NULL
-#define SYS_AUE_oaio_read AUE_NULL
-#define SYS_AUE_oaio_write AUE_NULL
-#define SYS_AUE_olio_listio AUE_NULL
+#define SYS_AUE_freebsd6_aio_read AUE_NULL
+#define SYS_AUE_freebsd6_aio_write AUE_NULL
+#define SYS_AUE_freebsd6_lio_listio AUE_NULL
#define SYS_AUE_yield AUE_NULL
#define SYS_AUE_mlockall AUE_MLOCKALL
#define SYS_AUE_munlockall AUE_MUNLOCKALL
@@ -2784,8 +2886,6 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_thr_exit AUE_NULL
#define SYS_AUE_thr_self AUE_NULL
#define SYS_AUE_thr_kill AUE_NULL
-#define SYS_AUE__umtx_lock AUE_NULL
-#define SYS_AUE__umtx_unlock AUE_NULL
#define SYS_AUE_jail_attach AUE_NULL
#define SYS_AUE_extattr_list_fd AUE_EXTATTR_LIST_FD
#define SYS_AUE_extattr_list_file AUE_EXTATTR_LIST_FILE
@@ -2859,8 +2959,7 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_msgctl AUE_MSGCTL
#define SYS_AUE_shmctl AUE_SHMCTL
#define SYS_AUE_lpathconf AUE_LPATHCONF
-#define SYS_AUE_cap_new AUE_CAP_NEW
-#define SYS_AUE_cap_getrights AUE_CAP_GETRIGHTS
+#define SYS_AUE___cap_rights_get AUE_CAP_RIGHTS_GET
#define SYS_AUE_cap_enter AUE_CAP_ENTER
#define SYS_AUE_cap_getmode AUE_CAP_GETMODE
#define SYS_AUE_pdfork AUE_PDFORK
@@ -2877,7 +2976,24 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_posix_fallocate AUE_NULL
#define SYS_AUE_posix_fadvise AUE_NULL
#define SYS_AUE_wait6 AUE_WAIT6
+#define SYS_AUE_cap_rights_limit AUE_CAP_RIGHTS_LIMIT
+#define SYS_AUE_cap_ioctls_limit AUE_CAP_IOCTLS_LIMIT
+#define SYS_AUE_cap_ioctls_get AUE_CAP_IOCTLS_GET
+#define SYS_AUE_cap_fcntls_limit AUE_CAP_FCNTLS_LIMIT
+#define SYS_AUE_cap_fcntls_get AUE_CAP_FCNTLS_GET
+#define SYS_AUE_bindat AUE_BINDAT
+#define SYS_AUE_connectat AUE_CONNECTAT
+#define SYS_AUE_chflagsat AUE_CHFLAGSAT
+#define SYS_AUE_accept4 AUE_ACCEPT
+#define SYS_AUE_pipe2 AUE_PIPE
+#define SYS_AUE_aio_mlock AUE_NULL
#define SYS_AUE_procctl AUE_NULL
+#define SYS_AUE_ppoll AUE_POLL
+#define SYS_AUE_futimens AUE_FUTIMES
+#define SYS_AUE_utimensat AUE_FUTIMESAT
+#define SYS_AUE_numa_getaffinity AUE_NULL
+#define SYS_AUE_numa_setaffinity AUE_NULL
+#define SYS_AUE_fdatasync AUE_FSYNC
#endif /* __rtems__ */
#undef PAD_
diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h
index 36b3f59f..d2205a7a 100644
--- a/freebsd/sys/sys/systm.h
+++ b/freebsd/sys/sys/systm.h
@@ -47,6 +47,7 @@
#ifndef __rtems__
extern int cold; /* nonzero if we are doing a cold boot */
+extern int suspend_blocked; /* block suspend due to pending shutdown */
extern int rebooting; /* kern_reboot() has been called. */
#else /* __rtems__ */
/* In RTEMS there is no cold boot and reboot */
@@ -93,18 +94,24 @@ extern int vm_guest; /* Running as virtual machine guest? */
* Detected virtual machine guest types. The intention is to expand
* and/or add to the VM_GUEST_VM type if specific VM functionality is
* ever implemented (e.g. vendor-specific paravirtualization features).
+ * Keep in sync with vm_guest_sysctl_names[].
*/
-enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN };
+enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
+ VM_GUEST_VMWARE, VM_GUEST_KVM, VM_LAST };
+
+#if defined(WITNESS) || defined(INVARIANT_SUPPORT)
+void kassert_panic(const char *fmt, ...) __printflike(1, 2);
+#endif
#ifdef INVARIANTS /* The option is always available */
#define KASSERT(exp,msg) do { \
if (__predict_false(!(exp))) \
- panic msg; \
+ kassert_panic msg; \
} while (0)
#define VNASSERT(exp, vp, msg) do { \
if (__predict_false(!(exp))) { \
vn_printf(vp, "VNASSERT failed\n"); \
- panic msg; \
+ kassert_panic msg; \
} \
} while (0)
#else
@@ -131,6 +138,12 @@ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN };
((uintptr_t)&(var) & (sizeof(void *) - 1)) == 0, msg)
/*
+ * Assert that a thread is in critical(9) section.
+ */
+#define CRITICAL_ASSERT(td) \
+ KASSERT((td)->td_critnest >= 1, ("Not in critical section"));
+
+/*
* If we have already panic'd and this is the thread that called
* panic(), then don't block on any mutexes but silently succeed.
* Otherwise, the kernel will deadlock since the scheduler isn't
@@ -162,10 +175,14 @@ extern char **kenvp;
extern const void *zero_region; /* address space maps to a zeroed page */
extern int unmapped_buf_allowed;
-extern int iosize_max_clamp;
-extern int devfs_iosize_max_clamp;
-#define IOSIZE_MAX (iosize_max_clamp ? INT_MAX : SSIZE_MAX)
-#define DEVFS_IOSIZE_MAX (devfs_iosize_max_clamp ? INT_MAX : SSIZE_MAX)
+
+#ifdef __LP64__
+#define IOSIZE_MAX iosize_max()
+#define DEVFS_IOSIZE_MAX devfs_iosize_max()
+#else
+#define IOSIZE_MAX SSIZE_MAX
+#define DEVFS_IOSIZE_MAX SSIZE_MAX
+#endif
/*
* General function declarations.
@@ -183,6 +200,7 @@ struct ucred;
struct uio;
struct _jmp_buf;
struct trapframe;
+struct eventtimer;
#ifndef __rtems__
int setjmp(struct _jmp_buf *) __returns_twice;
@@ -200,9 +218,12 @@ void *hashinit_flags(int count, struct malloc_type *type,
#define HASH_WAITOK 0x00000002
void *phashinit(int count, struct malloc_type *type, u_long *nentries);
+void *phashinit_flags(int count, struct malloc_type *type, u_long *nentries,
+ int flags);
void g_waitidle(void);
void panic(const char *, ...) __dead2 __printflike(1, 2);
+void vpanic(const char *, __va_list) __dead2 __printflike(1, 0);
void cpu_boot(int);
void cpu_flush_dcache(void *, size_t);
@@ -229,15 +250,24 @@ void init_param1(void);
void init_param2(long physpages);
void init_static_kenv(char *, size_t);
void tablefull(const char *);
+#ifdef EARLY_PRINTF
+typedef void early_putc_t(int ch);
+extern early_putc_t *early_putc;
+#endif
int kvprintf(char const *, void (*)(int, void*), void *, int,
__va_list) __printflike(1, 0);
void log(int, const char *, ...) __printflike(2, 3);
void log_console(struct uio *);
+void vlog(int, const char *, __va_list) __printflike(2, 0);
+int asprintf(char **ret, struct malloc_type *mtp, const char *format,
+ ...) __printflike(3, 4);
int printf(const char *, ...) __printflike(1, 2);
int snprintf(char *, size_t, const char *, ...) __printflike(3, 4);
int sprintf(char *buf, const char *, ...) __printflike(2, 3);
int uprintf(const char *, ...) __printflike(1, 2);
int vprintf(const char *, __va_list) __printflike(1, 0);
+int vasprintf(char **ret, struct malloc_type *mtp, const char *format,
+ __va_list ap) __printflike(3, 0);
int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0);
int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0);
int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0);
@@ -249,6 +279,7 @@ u_long strtoul(const char *, char **, int) __nonnull(1);
quad_t strtoq(const char *, char **, int) __nonnull(1);
u_quad_t strtouq(const char *, char **, int) __nonnull(1);
void tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4);
+void vtprintf(struct proc *, int, const char *, __va_list) __printflike(3, 0);
void hexdump(const void *ptr, int length, const char *hdr, int flags);
#define HD_COLUMN_MASK 0xff
#define HD_DELIM_MASK 0xff00
@@ -264,6 +295,7 @@ void bzero(void *buf, size_t len) __nonnull(1);
#define bcopy(src, dst, len) memmove((dst), (src), (len))
#define bzero(buf, size) memset((buf), 0, (size))
#endif /* __rtems__ */
+void explicit_bzero(void *, size_t) __nonnull(1);
void *memcpy(void *to, const void *from, size_t len) __nonnull(1) __nonnull(2);
void *memmove(void *dest, const void *src, size_t n) __nonnull(1) __nonnull(2);
@@ -331,7 +363,7 @@ copyout_nofault(const void * __restrict kaddr, void * __restrict udaddr,
#endif /* __rtems__ */
#ifndef __rtems__
-int fubyte(const void *base);
+int fubyte(volatile const void *base);
#else /* __rtems__ */
static inline int
fubyte(const void *base)
@@ -341,17 +373,24 @@ fubyte(const void *base)
return byte_base[0];
}
#endif /* __rtems__ */
-long fuword(const void *base);
-int fuword16(void *base);
-int32_t fuword32(const void *base);
-int64_t fuword64(const void *base);
-int subyte(void *base, int byte);
-int suword(void *base, long word);
-int suword16(void *base, int word);
-int suword32(void *base, int32_t word);
-int suword64(void *base, int64_t word);
+long fuword(volatile const void *base);
+int fuword16(volatile const void *base);
+int32_t fuword32(volatile const void *base);
+int64_t fuword64(volatile const void *base);
+int fueword(volatile const void *base, long *val);
+int fueword32(volatile const void *base, int32_t *val);
+int fueword64(volatile const void *base, int64_t *val);
+int subyte(volatile void *base, int byte);
+int suword(volatile void *base, long word);
+int suword16(volatile void *base, int word);
+int suword32(volatile void *base, int32_t word);
+int suword64(volatile void *base, int64_t word);
uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval);
-u_long casuword(volatile u_long *p, u_long oldval, u_long newval);
+u_long casuword(volatile u_long *p, u_long oldval, u_long newval);
+int casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
+ uint32_t newval);
+int casueword(volatile u_long *p, u_long oldval, u_long *oldvalp,
+ u_long newval);
void realitexpire(void *);
@@ -361,7 +400,9 @@ void hardclock(int usermode, uintfptr_t pc);
void hardclock_cnt(int cnt, int usermode);
void hardclock_cpu(int usermode);
void hardclock_sync(int cpu);
+#ifndef __rtems__
void softclock(void *);
+#endif /* __rtems__ */
void statclock(int usermode);
void statclock_cnt(int cnt, int usermode);
void profclock(int usermode, uintfptr_t pc);
@@ -373,34 +414,26 @@ void startprofclock(struct proc *);
void stopprofclock(struct proc *);
void cpu_startprofclock(void);
void cpu_stopprofclock(void);
-void cpu_idleclock(void);
+sbintime_t cpu_idleclock(void);
void cpu_activeclock(void);
+void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt);
+void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq);
extern int cpu_deepest_sleep;
extern int cpu_disable_c2_sleep;
extern int cpu_disable_c3_sleep;
-#ifndef __rtems__
-int cr_cansee(struct ucred *u1, struct ucred *u2);
-int cr_canseesocket(struct ucred *cred, struct socket *so);
-int cr_canseeinpcb(struct ucred *cred, struct inpcb *inp);
-#else /* __rtems__ */
-#define cr_cansee(u1, u2) 0
-#define cr_canseesocket(cred, so) 0
-#define cr_canseeinpcb(cred, inp) 0
-#endif /* __rtems__ */
-
-char *getenv(const char *name);
+char *kern_getenv(const char *name);
void freeenv(char *env);
int getenv_int(const char *name, int *data);
int getenv_uint(const char *name, unsigned int *data);
int getenv_long(const char *name, long *data);
int getenv_ulong(const char *name, unsigned long *data);
int getenv_string(const char *name, char *data, int size);
+int getenv_int64(const char *name, int64_t *data);
+int getenv_uint64(const char *name, uint64_t *data);
int getenv_quad(const char *name, quad_t *data);
-#ifndef __rtems__
-int setenv(const char *name, const char *value);
-#endif /* __rtems__ */
-int unsetenv(const char *name);
+int kern_setenv(const char *name, const char *value);
+int kern_unsetenv(const char *name);
int testenv(const char *name);
typedef uint64_t (cpu_tick_f)(void);
@@ -435,28 +468,18 @@ typedef void timeout_t(void *); /* timeout function type */
void callout_handle_init(struct callout_handle *);
struct callout_handle timeout(timeout_t *, void *, int);
void untimeout(timeout_t *, void *, struct callout_handle);
-caddr_t kern_timeout_callwheel_alloc(caddr_t v);
-void kern_timeout_callwheel_init(void);
/* Stubs for obsolete functions that used to be for interrupt management */
#ifdef __rtems__
typedef int intrmask_t;
#endif /* __rtems__ */
-static __inline void spl0(void) { return; }
static __inline intrmask_t splbio(void) { return 0; }
static __inline intrmask_t splcam(void) { return 0; }
static __inline intrmask_t splclock(void) { return 0; }
static __inline intrmask_t splhigh(void) { return 0; }
static __inline intrmask_t splimp(void) { return 0; }
static __inline intrmask_t splnet(void) { return 0; }
-static __inline intrmask_t splsoftcam(void) { return 0; }
-static __inline intrmask_t splsoftclock(void) { return 0; }
-static __inline intrmask_t splsofttty(void) { return 0; }
-static __inline intrmask_t splsoftvm(void) { return 0; }
-static __inline intrmask_t splsofttq(void) { return 0; }
-static __inline intrmask_t splstatclock(void) { return 0; }
static __inline intrmask_t spltty(void) { return 0; }
-static __inline intrmask_t splvm(void) { return 0; }
static __inline void splx(intrmask_t ipl __unused) { return; }
/*
@@ -464,23 +487,35 @@ static __inline void splx(intrmask_t ipl __unused) { return; }
* less often.
*/
int _sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg,
- int timo) __nonnull(1);
+ sbintime_t sbt, sbintime_t pr, int flags) __nonnull(1);
#define msleep(chan, mtx, pri, wmesg, timo) \
- _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo))
+ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), \
+ tick_sbt * (timo), 0, C_HARDCLOCK)
+#define msleep_sbt(chan, mtx, pri, wmesg, bt, pr, flags) \
+ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr), \
+ (flags))
#ifndef __rtems__
-int msleep_spin(void *chan, struct mtx *mtx, const char *wmesg, int timo)
- __nonnull(1);
+int msleep_spin_sbt(void *chan, struct mtx *mtx, const char *wmesg,
+ sbintime_t sbt, sbintime_t pr, int flags) __nonnull(1);
#else /* __rtems__ */
-#define msleep_spin(chan, mtx, wmesg, timo) \
- msleep((chan), (mtx), 0, (wmesg), (timo))
+#define msleep_spin_sbt(chan, mtx, wmesg, sbt, pr, flags) \
+ msleep_sbt(chan, mtx, 0, wmesg, sbt, pr, flags)
#endif /* __rtems__ */
+#define msleep_spin(chan, mtx, wmesg, timo) \
+ msleep_spin_sbt((chan), (mtx), (wmesg), tick_sbt * (timo), \
+ 0, C_HARDCLOCK)
+int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr,
+ int flags);
#ifdef __rtems__
#include <unistd.h>
-#define pause _bsd_pause
#endif /* __rtems__ */
-int pause(const char *wmesg, int timo);
+#define pause(wmesg, timo) \
+ pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK)
#define tsleep(chan, pri, wmesg, timo) \
- _sleep((chan), NULL, (pri), (wmesg), (timo))
+ _sleep((chan), NULL, (pri), (wmesg), tick_sbt * (timo), \
+ 0, C_HARDCLOCK)
+#define tsleep_sbt(chan, pri, wmesg, bt, pr, flags) \
+ _sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags))
void wakeup(void *chan) __nonnull(1);
void wakeup_one(void *chan) __nonnull(1);
@@ -492,6 +527,11 @@ struct cdev;
dev_t dev2udev(struct cdev *x);
const char *devtoname(struct cdev *cdev);
+#ifdef __LP64__
+size_t devfs_iosize_max(void);
+size_t iosize_max(void);
+#endif
+
int poll_no_poll(int events);
/* XXX: Should be void nanodelay(u_int nsec); */
@@ -502,7 +542,6 @@ struct root_hold_token;
struct root_hold_token *root_mount_hold(const char *identifier);
void root_mount_rel(struct root_hold_token *h);
-void root_mount_wait(void);
int root_mounted(void);
@@ -511,6 +550,7 @@ int root_mounted(void);
*/
struct unrhdr;
struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex);
+void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex);
void delete_unrhdr(struct unrhdr *uh);
void clean_unrhdr(struct unrhdr *uh);
void clean_unrhdrl(struct unrhdr *uh);
@@ -519,31 +559,10 @@ int alloc_unr_specific(struct unrhdr *uh, u_int item);
int alloc_unrl(struct unrhdr *uh);
void free_unr(struct unrhdr *uh, u_int item);
-/*
- * Population count algorithm using SWAR approach
- * - "SIMD Within A Register".
- */
-static __inline uint32_t
-bitcount32(uint32_t x)
-{
-
- x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1);
- x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2);
- x = (x + (x >> 4)) & 0x0f0f0f0f;
- x = (x + (x >> 8));
- x = (x + (x >> 16)) & 0x000000ff;
- return (x);
-}
+void intr_prof_stack_use(struct thread *td, struct trapframe *frame);
-static __inline uint16_t
-bitcount16(uint32_t x)
-{
+extern void (*softdep_ast_cleanup)(void);
- x = (x & 0x5555) + ((x & 0xaaaa) >> 1);
- x = (x & 0x3333) + ((x & 0xcccc) >> 2);
- x = (x + (x >> 4)) & 0x0f0f;
- x = (x + (x >> 8)) & 0x00ff;
- return (x);
-}
+void counted_warning(unsigned *counter, const char *msg);
#endif /* !_SYS_SYSTM_H_ */
diff --git a/freebsd/sys/sys/taskqueue.h b/freebsd/sys/sys/taskqueue.h
index 68000026..a6c66558 100644
--- a/freebsd/sys/sys/taskqueue.h
+++ b/freebsd/sys/sys/taskqueue.h
@@ -36,8 +36,10 @@
#include <sys/queue.h>
#include <sys/_task.h>
#include <sys/_callout.h>
+#include <sys/_cpuset.h>
struct taskqueue;
+struct taskqgroup;
struct thread;
struct timeout_task {
@@ -47,6 +49,17 @@ struct timeout_task {
int f;
};
+enum taskqueue_callback_type {
+ TASKQUEUE_CALLBACK_TYPE_INIT,
+ TASKQUEUE_CALLBACK_TYPE_SHUTDOWN,
+};
+#define TASKQUEUE_CALLBACK_TYPE_MIN TASKQUEUE_CALLBACK_TYPE_INIT
+#define TASKQUEUE_CALLBACK_TYPE_MAX TASKQUEUE_CALLBACK_TYPE_SHUTDOWN
+#define TASKQUEUE_NUM_CALLBACKS TASKQUEUE_CALLBACK_TYPE_MAX + 1
+#define TASKQUEUE_NAMELEN 32
+
+typedef void (*taskqueue_callback_fn)(void *context);
+
/*
* A notification callback function which is called from
* taskqueue_enqueue(). The context argument is given in the call to
@@ -61,6 +74,8 @@ struct taskqueue *taskqueue_create(const char *name, int mflags,
void *context);
int taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
const char *name, ...) __printflike(4, 5);
+int taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count,
+ int pri, cpuset_t *mask, const char *name, ...) __printflike(5, 6);
int taskqueue_enqueue(struct taskqueue *queue, struct task *task);
int taskqueue_enqueue_timeout(struct taskqueue *queue,
struct timeout_task *timeout_task, int ticks);
@@ -77,6 +92,9 @@ void taskqueue_run(struct taskqueue *queue);
void taskqueue_block(struct taskqueue *queue);
void taskqueue_unblock(struct taskqueue *queue);
int taskqueue_member(struct taskqueue *queue, struct thread *td);
+void taskqueue_set_callback(struct taskqueue *queue,
+ enum taskqueue_callback_type cb_type,
+ taskqueue_callback_fn callback, void *context);
#define TASK_INITIALIZER(priority, func, context) \
{ .ta_pending = 0, \
@@ -127,7 +145,7 @@ taskqueue_define_##name(void *arg) \
init; \
} \
\
-SYSINIT(taskqueue_##name, SI_SUB_CONFIGURE, SI_ORDER_SECOND, \
+SYSINIT(taskqueue_##name, SI_SUB_INIT_IF, SI_ORDER_SECOND, \
taskqueue_define_##name, NULL); \
\
struct __hack
@@ -152,7 +170,7 @@ taskqueue_define_##name(void *arg) \
init; \
} \
\
-SYSINIT(taskqueue_##name, SI_SUB_CONFIGURE, SI_ORDER_SECOND, \
+SYSINIT(taskqueue_##name, SI_SUB_INIT_IF, SI_ORDER_SECOND, \
taskqueue_define_##name, NULL); \
\
struct __hack
@@ -182,7 +200,6 @@ TASKQUEUE_DECLARE(thread);
* from a fast interrupt handler context.
*/
TASKQUEUE_DECLARE(fast);
-int taskqueue_enqueue_fast(struct taskqueue *queue, struct task *task);
struct taskqueue *taskqueue_create_fast(const char *name, int mflags,
taskqueue_enqueue_fn enqueue,
void *context);
diff --git a/freebsd/sys/sys/tree.h b/freebsd/sys/sys/tree.h
index 1cce7278..c9df686f 100644
--- a/freebsd/sys/sys/tree.h
+++ b/freebsd/sys/sys/tree.h
@@ -383,16 +383,33 @@ struct { \
#define RB_PROTOTYPE_STATIC(name, type, field, cmp) \
RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static)
#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \
-attr void name##_RB_INSERT_COLOR(struct name *, struct type *); \
-attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
-attr struct type *name##_RB_REMOVE(struct name *, struct type *); \
-attr struct type *name##_RB_INSERT(struct name *, struct type *); \
-attr struct type *name##_RB_FIND(struct name *, struct type *); \
-attr struct type *name##_RB_NFIND(struct name *, struct type *); \
-attr struct type *name##_RB_NEXT(struct type *); \
-attr struct type *name##_RB_PREV(struct type *); \
-attr struct type *name##_RB_MINMAX(struct name *, int); \
- \
+ RB_PROTOTYPE_INSERT_COLOR(name, type, attr); \
+ RB_PROTOTYPE_REMOVE_COLOR(name, type, attr); \
+ RB_PROTOTYPE_INSERT(name, type, attr); \
+ RB_PROTOTYPE_REMOVE(name, type, attr); \
+ RB_PROTOTYPE_FIND(name, type, attr); \
+ RB_PROTOTYPE_NFIND(name, type, attr); \
+ RB_PROTOTYPE_NEXT(name, type, attr); \
+ RB_PROTOTYPE_PREV(name, type, attr); \
+ RB_PROTOTYPE_MINMAX(name, type, attr);
+#define RB_PROTOTYPE_INSERT_COLOR(name, type, attr) \
+ attr void name##_RB_INSERT_COLOR(struct name *, struct type *)
+#define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr) \
+ attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *)
+#define RB_PROTOTYPE_REMOVE(name, type, attr) \
+ attr struct type *name##_RB_REMOVE(struct name *, struct type *)
+#define RB_PROTOTYPE_INSERT(name, type, attr) \
+ attr struct type *name##_RB_INSERT(struct name *, struct type *)
+#define RB_PROTOTYPE_FIND(name, type, attr) \
+ attr struct type *name##_RB_FIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NFIND(name, type, attr) \
+ attr struct type *name##_RB_NFIND(struct name *, struct type *)
+#define RB_PROTOTYPE_NEXT(name, type, attr) \
+ attr struct type *name##_RB_NEXT(struct type *)
+#define RB_PROTOTYPE_PREV(name, type, attr) \
+ attr struct type *name##_RB_PREV(struct type *)
+#define RB_PROTOTYPE_MINMAX(name, type, attr) \
+ attr struct type *name##_RB_MINMAX(struct name *, int)
/* Main rb operation.
* Moves node close to the key of elm to top
@@ -402,6 +419,17 @@ attr struct type *name##_RB_MINMAX(struct name *, int); \
#define RB_GENERATE_STATIC(name, type, field, cmp) \
RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static)
#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \
+ RB_GENERATE_INSERT_COLOR(name, type, field, attr) \
+ RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \
+ RB_GENERATE_INSERT(name, type, field, cmp, attr) \
+ RB_GENERATE_REMOVE(name, type, field, attr) \
+ RB_GENERATE_FIND(name, type, field, cmp, attr) \
+ RB_GENERATE_NFIND(name, type, field, cmp, attr) \
+ RB_GENERATE_NEXT(name, type, field, attr) \
+ RB_GENERATE_PREV(name, type, field, attr) \
+ RB_GENERATE_MINMAX(name, type, field, attr)
+
+#define RB_GENERATE_INSERT_COLOR(name, type, field, attr) \
attr void \
name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
{ \
@@ -444,8 +472,9 @@ name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
} \
} \
RB_COLOR(head->rbh_root, field) = RB_BLACK; \
-} \
- \
+}
+
+#define RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \
attr void \
name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
{ \
@@ -522,8 +551,9 @@ name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm)
} \
if (elm) \
RB_COLOR(elm, field) = RB_BLACK; \
-} \
- \
+}
+
+#define RB_GENERATE_REMOVE(name, type, field, attr) \
attr struct type * \
name##_RB_REMOVE(struct name *head, struct type *elm) \
{ \
@@ -590,7 +620,8 @@ color: \
name##_RB_REMOVE_COLOR(head, parent, child); \
return (old); \
} \
- \
+
+#define RB_GENERATE_INSERT(name, type, field, cmp, attr) \
/* Inserts a node into the RB tree */ \
attr struct type * \
name##_RB_INSERT(struct name *head, struct type *elm) \
@@ -620,8 +651,9 @@ name##_RB_INSERT(struct name *head, struct type *elm) \
RB_ROOT(head) = elm; \
name##_RB_INSERT_COLOR(head, elm); \
return (NULL); \
-} \
- \
+}
+
+#define RB_GENERATE_FIND(name, type, field, cmp, attr) \
/* Finds the node with the same key as elm */ \
attr struct type * \
name##_RB_FIND(struct name *head, struct type *elm) \
@@ -638,8 +670,9 @@ name##_RB_FIND(struct name *head, struct type *elm) \
return (tmp); \
} \
return (NULL); \
-} \
- \
+}
+
+#define RB_GENERATE_NFIND(name, type, field, cmp, attr) \
/* Finds the first node greater than or equal to the search key */ \
attr struct type * \
name##_RB_NFIND(struct name *head, struct type *elm) \
@@ -659,8 +692,9 @@ name##_RB_NFIND(struct name *head, struct type *elm) \
return (tmp); \
} \
return (res); \
-} \
- \
+}
+
+#define RB_GENERATE_NEXT(name, type, field, attr) \
/* ARGSUSED */ \
attr struct type * \
name##_RB_NEXT(struct type *elm) \
@@ -681,8 +715,9 @@ name##_RB_NEXT(struct type *elm) \
} \
} \
return (elm); \
-} \
- \
+}
+
+#define RB_GENERATE_PREV(name, type, field, attr) \
/* ARGSUSED */ \
attr struct type * \
name##_RB_PREV(struct type *elm) \
@@ -703,8 +738,9 @@ name##_RB_PREV(struct type *elm) \
} \
} \
return (elm); \
-} \
- \
+}
+
+#define RB_GENERATE_MINMAX(name, type, field, attr) \
attr struct type * \
name##_RB_MINMAX(struct name *head, int val) \
{ \
diff --git a/freebsd/sys/sys/tty.h b/freebsd/sys/sys/tty.h
index 00cf4e6c..4d082667 100644
--- a/freebsd/sys/sys/tty.h
+++ b/freebsd/sys/sys/tty.h
@@ -171,8 +171,11 @@ void tty_rel_gone(struct tty *tp);
#define tty_getlock(tp) ((tp)->t_mtx)
/* Device node creation. */
-void tty_makedev(struct tty *tp, struct ucred *cred, const char *fmt, ...)
- __printflike(3, 4);
+int tty_makedevf(struct tty *tp, struct ucred *cred, int flags,
+ const char *fmt, ...) __printflike(4, 5);
+#define TTYMK_CLONING 0x1
+#define tty_makedev(tp, cred, fmt, ...) \
+ (void )tty_makedevf((tp), (cred), 0, (fmt), ## __VA_ARGS__)
#define tty_makealias(tp,fmt,...) \
make_dev_alias((tp)->t_dev, fmt, ## __VA_ARGS__)
diff --git a/freebsd/sys/sys/ttydevsw.h b/freebsd/sys/sys/ttydevsw.h
index 748ae0be..98bebca7 100644
--- a/freebsd/sys/sys/ttydevsw.h
+++ b/freebsd/sys/sys/ttydevsw.h
@@ -54,6 +54,7 @@ typedef int tsw_mmap_t(struct tty *tp, vm_ooffset_t offset,
vm_paddr_t * paddr, int nprot, vm_memattr_t *memattr);
typedef void tsw_pktnotify_t(struct tty *tp, char event);
typedef void tsw_free_t(void *softc);
+typedef bool tsw_busy_t(struct tty *tp);
struct ttydevsw {
unsigned int tsw_flags; /* Default TTY flags. */
@@ -74,21 +75,25 @@ struct ttydevsw {
tsw_free_t *tsw_free; /* Destructor. */
- void *tsw_spare[4]; /* For future use. */
+ tsw_busy_t *tsw_busy; /* Draining output. */
+
+ void *tsw_spare[3]; /* For future use. */
};
static __inline int
ttydevsw_open(struct tty *tp)
{
+
tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
- return tp->t_devsw->tsw_open(tp);
+ return (tp->t_devsw->tsw_open(tp));
}
static __inline void
ttydevsw_close(struct tty *tp)
{
+
tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
@@ -98,6 +103,7 @@ ttydevsw_close(struct tty *tp)
static __inline void
ttydevsw_outwakeup(struct tty *tp)
{
+
tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
@@ -111,6 +117,7 @@ ttydevsw_outwakeup(struct tty *tp)
static __inline void
ttydevsw_inwakeup(struct tty *tp)
{
+
tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
@@ -124,49 +131,56 @@ ttydevsw_inwakeup(struct tty *tp)
static __inline int
ttydevsw_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td)
{
+
tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
- return tp->t_devsw->tsw_ioctl(tp, cmd, data, td);
+ return (tp->t_devsw->tsw_ioctl(tp, cmd, data, td));
}
static __inline int
-ttydevsw_cioctl(struct tty *tp, int unit, u_long cmd, caddr_t data, struct thread *td)
+ttydevsw_cioctl(struct tty *tp, int unit, u_long cmd, caddr_t data,
+ struct thread *td)
{
+
tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
- return tp->t_devsw->tsw_cioctl(tp, unit, cmd, data, td);
+ return (tp->t_devsw->tsw_cioctl(tp, unit, cmd, data, td));
}
static __inline int
ttydevsw_param(struct tty *tp, struct termios *t)
{
+
MPASS(!tty_gone(tp));
- return tp->t_devsw->tsw_param(tp, t);
+ return (tp->t_devsw->tsw_param(tp, t));
}
static __inline int
ttydevsw_modem(struct tty *tp, int sigon, int sigoff)
{
+
MPASS(!tty_gone(tp));
- return tp->t_devsw->tsw_modem(tp, sigon, sigoff);
+ return (tp->t_devsw->tsw_modem(tp, sigon, sigoff));
}
static __inline int
ttydevsw_mmap(struct tty *tp, vm_ooffset_t offset, vm_paddr_t *paddr,
int nprot, vm_memattr_t *memattr)
{
+
MPASS(!tty_gone(tp));
- return tp->t_devsw->tsw_mmap(tp, offset, paddr, nprot, memattr);
+ return (tp->t_devsw->tsw_mmap(tp, offset, paddr, nprot, memattr));
}
static __inline void
ttydevsw_pktnotify(struct tty *tp, char event)
{
+
tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
@@ -176,9 +190,20 @@ ttydevsw_pktnotify(struct tty *tp, char event)
static __inline void
ttydevsw_free(struct tty *tp)
{
+
MPASS(tty_gone(tp));
tp->t_devsw->tsw_free(tty_softc(tp));
}
+static __inline bool
+ttydevsw_busy(struct tty *tp)
+{
+
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return (tp->t_devsw->tsw_busy(tp));
+}
+
#endif /* !_SYS_TTYDEVSW_H_ */
diff --git a/freebsd/sys/sys/ucred.h b/freebsd/sys/sys/ucred.h
index 82e4d9a4..ae3fcdeb 100644
--- a/freebsd/sys/sys/ucred.h
+++ b/freebsd/sys/sys/ucred.h
@@ -37,6 +37,8 @@
struct loginclass;
+#define XU_NGROUPS 16
+
/*
* Credentials.
*
@@ -65,6 +67,7 @@ struct ucred {
struct auditinfo_addr cr_audit; /* Audit properties. */
gid_t *cr_groups; /* groups */
int cr_agroups; /* Available groups */
+ gid_t cr_smallgroups[XU_NGROUPS]; /* storage for small groups */
};
#else /* __rtems__ */
struct ucred;
@@ -73,8 +76,6 @@ struct ucred;
#define FSCRED ((struct ucred *)-1) /* filesystem credential */
#endif /* _KERNEL || _WANT_UCRED */
-#define XU_NGROUPS 16
-
/*
* Flags for cr_flags.
*/
@@ -111,11 +112,12 @@ void change_svuid(struct ucred *newcred, uid_t svuid);
void crcopy(struct ucred *dest, struct ucred *src);
struct ucred *crcopysafe(struct proc *p, struct ucred *cr);
struct ucred *crdup(struct ucred *cr);
-void cred_update_thread(struct thread *td);
+void crextend(struct ucred *cr, int n);
+void proc_set_cred_init(struct proc *p, struct ucred *cr);
+struct ucred *proc_set_cred(struct proc *p, struct ucred *cr);
void crfree(struct ucred *cr);
struct ucred *crget(void);
struct ucred *crhold(struct ucred *cr);
-int crshared(struct ucred *cr);
void cru2x(struct ucred *cr, struct xucred *xcr);
void crsetgroups(struct ucred *cr, int n, gid_t *groups);
int groupmember(gid_t gid, struct ucred *cred);
diff --git a/freebsd/sys/sys/unpcb.h b/freebsd/sys/sys/unpcb.h
index 38a2d1fe..cdb5c4d0 100644
--- a/freebsd/sys/sys/unpcb.h
+++ b/freebsd/sys/sys/unpcb.h
@@ -78,8 +78,8 @@ struct unpcb {
struct unp_head unp_refs; /* referencing socket linked list */
LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
struct sockaddr_un *unp_addr; /* bound address of socket */
- int unp_cc; /* copy of rcv.sb_cc */
- int unp_mbcnt; /* copy of rcv.sb_mbcnt */
+ int reserved1;
+ int reserved2;
unp_gen_t unp_gencnt; /* generation count of this instance */
short unp_flags; /* flags */
short unp_gcflag; /* Garbage collector flags. */
@@ -107,10 +107,6 @@ struct unpcb {
#define UNP_WANTCRED 0x004 /* credentials wanted */
#define UNP_CONNWAIT 0x008 /* connect blocks until accepted */
-#define UNPGC_REF 0x1 /* unpcb has external ref. */
-#define UNPGC_DEAD 0x2 /* unpcb might be dead. */
-#define UNPGC_SCANNED 0x4 /* Has been scanned. */
-
/*
* These flags are used to handle non-atomicity in connect() and bind()
* operations on a socket: in particular, to avoid races between multiple
@@ -118,6 +114,15 @@ struct unpcb {
*/
#define UNP_CONNECTING 0x010 /* Currently connecting. */
#define UNP_BINDING 0x020 /* Currently binding. */
+#define UNP_NASCENT 0x040 /* Newborn child socket. */
+
+/*
+ * Flags in unp_gcflag.
+ */
+#define UNPGC_REF 0x1 /* unpcb has external ref. */
+#define UNPGC_DEAD 0x2 /* unpcb might be dead. */
+#define UNPGC_SCANNED 0x4 /* Has been scanned. */
+#define UNPGC_IGNORE_RIGHTS 0x8 /* Attached rights are freed */
#define sotounpcb(so) ((struct unpcb *)((so)->so_pcb))
diff --git a/freebsd/sys/sys/user.h b/freebsd/sys/sys/user.h
index 698cad9e..d0da0455 100644
--- a/freebsd/sys/sys/user.h
+++ b/freebsd/sys/sys/user.h
@@ -61,6 +61,7 @@
#ifndef _SYS_SOCKET_VAR_H_
#include <sys/socket.h>
#endif
+#include <sys/caprights.h>
/*
* KERN_PROC subtype ops return arrays of selected proc structure entries:
@@ -83,7 +84,7 @@
* it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
* function kvm_proclist in lib/libkvm/kvm_proc.c .
*/
-#define KI_NSPARE_INT 7
+#define KI_NSPARE_INT 4
#define KI_NSPARE_LONG 12
#define KI_NSPARE_PTR 6
@@ -98,7 +99,7 @@
#define TDNAMLEN 16 /* size of returned thread name */
#define COMMLEN 19 /* size of returned ki_comm name */
#define KI_EMULNAMELEN 16 /* size of returned ki_emul */
-#define KI_NGROUPS 16 /* number of groups in ki_groups */
+#define KI_NGROUPS 16 /* number of groups in ki_groups */
#define LOGNAMELEN 17 /* size of returned ki_login */
#define LOGINCLASSLEN 17 /* size of returned ki_loginclass */
@@ -146,7 +147,7 @@ struct kinfo_proc {
gid_t ki_svgid; /* Saved effective group id */
short ki_ngroups; /* number of groups */
short ki_spare_short2; /* unused (just here for alignment) */
- gid_t ki_groups[KI_NGROUPS]; /* groups */
+ gid_t ki_groups[KI_NGROUPS]; /* groups */
vm_size_t ki_size; /* virtual size */
segsz_t ki_rssize; /* current resident set size in pages */
segsz_t ki_swrss; /* resident set size before last swap */
@@ -170,8 +171,8 @@ struct kinfo_proc {
signed char ki_nice; /* Process "nice" value */
char ki_lock; /* Process lock (prevent swap) count */
char ki_rqindex; /* Run queue index */
- u_char ki_oncpu; /* Which cpu we are on */
- u_char ki_lastcpu; /* Last cpu we were on */
+ u_char ki_oncpu_old; /* Which cpu we are on (legacy) */
+ u_char ki_lastcpu_old; /* Last cpu we were on (legacy) */
char ki_tdname[TDNAMLEN+1]; /* thread name */
char ki_wmesg[WMESGLEN+1]; /* wchan message */
char ki_login[LOGNAMELEN+1]; /* setlogin name */
@@ -186,6 +187,9 @@ struct kinfo_proc {
*/
char ki_sparestrings[50]; /* spare string space */
int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */
+ int ki_oncpu; /* Which cpu we are on */
+ int ki_lastcpu; /* Last cpu we were on */
+ int ki_tracer; /* Pid of tracing process */
int ki_flag2; /* P2_* flags */
int ki_fibnum; /* Default FIB number */
u_int ki_cr_flags; /* Credential flags */
@@ -255,8 +259,7 @@ struct user {
#define KF_TYPE_SHM 8
#define KF_TYPE_SEM 9
#define KF_TYPE_PTS 10
-/* no KF_TYPE_CAPABILITY (11), since capabilities wrap other file objects */
-#define KF_TYPE_PROCDESC 12
+#define KF_TYPE_PROCDESC 11
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -273,7 +276,7 @@ struct user {
#define KF_FD_TYPE_CWD -1 /* Current working directory */
#define KF_FD_TYPE_ROOT -2 /* Root directory */
#define KF_FD_TYPE_JAIL -3 /* Jail directory */
-#define KF_FD_TYPE_TRACE -4 /* ptrace vnode */
+#define KF_FD_TYPE_TRACE -4 /* Ktrace vnode */
#define KF_FD_TYPE_TEXT -5 /* Text vnode */
#define KF_FD_TYPE_CTTY -6 /* Controlling terminal */
@@ -292,11 +295,10 @@ struct user {
#define KF_FLAG_TRUNC 0x00001000
#define KF_FLAG_EXCL 0x00002000
#define KF_FLAG_EXEC 0x00004000
-#define KF_FLAG_CAPABILITY 0x00008000
/*
* Old format. Has variable hidden padding due to alignment.
- * This is a compatability hack for pre-build 7.1 packages.
+ * This is a compatibility hack for pre-build 7.1 packages.
*/
#if defined(__amd64__)
#define KINFO_OFILE_SIZE 1328
@@ -323,6 +325,12 @@ struct kinfo_ofile {
};
#if defined(__amd64__) || defined(__i386__)
+/*
+ * This size should never be changed. If you really need to, you must provide
+ * backward ABI compatibility by allocating a new sysctl MIB that will return
+ * the new structure. The current structure has to be returned by the current
+ * sysctl MIB. See how it is done for the kinfo_ofile structure.
+ */
#define KINFO_FILE_SIZE 1392
#endif
@@ -395,7 +403,7 @@ struct kinfo_file {
uint16_t kf_pad1; /* Round to 32 bit alignment. */
int _kf_ispare0; /* Space for more stuff. */
cap_rights_t kf_cap_rights; /* Capability rights. */
- int _kf_ispare[4]; /* Space for more stuff. */
+ uint64_t _kf_cap_spare; /* Space for future cap_rights_t. */
/* Truncated before copyout in sysctl */
char kf_path[PATH_MAX]; /* Path to file, if any. */
#else /* __rtems__ */
@@ -484,6 +492,27 @@ struct kinfo_vmentry {
};
/*
+ * The "vm.objects" sysctl provides a list of all VM objects in the system
+ * via an array of these entries.
+ */
+struct kinfo_vmobject {
+ int kvo_structsize; /* Variable size of record. */
+ int kvo_type; /* Object type: KVME_TYPE_*. */
+ uint64_t kvo_size; /* Object size in pages. */
+ uint64_t kvo_vn_fileid; /* inode number if vnode. */
+ uint32_t kvo_vn_fsid; /* dev_t of vnode location. */
+ int kvo_ref_count; /* Reference count. */
+ int kvo_shadow_count; /* Shadow count. */
+ int kvo_memattr; /* Memory attribute. */
+ uint64_t kvo_resident; /* Number of resident pages. */
+ uint64_t kvo_active; /* Number of active pages. */
+ uint64_t kvo_inactive; /* Number of inactive pages. */
+ uint64_t _kvo_qspare[8];
+ uint32_t _kvo_ispare[8];
+ char kvo_path[PATH_MAX]; /* Pathname, if any. */
+};
+
+/*
* The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
* another process as a series of entries. Each stack is represented by a
* series of symbol names and offsets as generated by stack_sbuf_print(9).
@@ -516,6 +545,11 @@ struct kinfo_sigtramp {
#define KERN_PROC_NOTHREADS 0x1
#define KERN_PROC_MASK32 0x2
+/* Flags for kern_proc_filedesc_out. */
+#define KERN_FILEDESC_PACK_KINFO 0x00000001U
+
+/* Flags for kern_proc_vmmap_out. */
+#define KERN_VMMAP_PACK_KINFO 0x00000001U
struct sbuf;
/*
@@ -527,9 +561,12 @@ struct sbuf;
* to be locked on enter. On return the process is unlocked.
*/
-int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
+int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
+ int flags);
+int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
int kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
-int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb);
+int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
+ int flags);
int vntype_to_kinfo(int vtype);
#endif /* !_KERNEL */
diff --git a/freebsd/sys/sys/uuid.h b/freebsd/sys/sys/uuid.h
new file mode 100644
index 00000000..0748f611
--- /dev/null
+++ b/freebsd/sys/sys/uuid.h
@@ -0,0 +1,85 @@
+/*-
+ * Copyright (c) 2002 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_UUID_H_
+#define _SYS_UUID_H_
+
+#include <sys/cdefs.h>
+
+/* Length of a node address (an IEEE 802 address). */
+#define _UUID_NODE_LEN 6
+
+/*
+ * See also:
+ * http://www.opengroup.org/dce/info/draft-leach-uuids-guids-01.txt
+ * http://www.opengroup.org/onlinepubs/009629399/apdxa.htm
+ *
+ * A DCE 1.1 compatible source representation of UUIDs.
+ */
+struct uuid {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint8_t clock_seq_hi_and_reserved;
+ uint8_t clock_seq_low;
+ uint8_t node[_UUID_NODE_LEN];
+};
+
+#ifdef _KERNEL
+
+#define UUID_NODE_LEN _UUID_NODE_LEN
+
+struct sbuf;
+
+struct uuid *kern_uuidgen(struct uuid *, size_t);
+
+int uuid_ether_add(const uint8_t *);
+int uuid_ether_del(const uint8_t *);
+
+int snprintf_uuid(char *, size_t, struct uuid *);
+int printf_uuid(struct uuid *);
+int sbuf_printf_uuid(struct sbuf *, struct uuid *);
+int parse_uuid(const char *, struct uuid *);
+
+void be_uuid_dec(void const *buf, struct uuid *uuid);
+void be_uuid_enc(void *buf, struct uuid const *uuid);
+void le_uuid_dec(void const *buf, struct uuid *uuid);
+void le_uuid_enc(void *buf, struct uuid const *uuid);
+
+#else /* _KERNEL */
+
+/* XXX namespace pollution? */
+typedef struct uuid uuid_t;
+
+__BEGIN_DECLS
+int uuidgen(struct uuid *, int);
+__END_DECLS
+
+#endif /* _KERNEL */
+
+#endif /* _SYS_UUID_H_ */
diff --git a/freebsd/sys/sys/vmmeter.h b/freebsd/sys/sys/vmmeter.h
index c66016c6..39f03d0d 100644
--- a/freebsd/sys/sys/vmmeter.h
+++ b/freebsd/sys/sys/vmmeter.h
@@ -46,7 +46,7 @@
* c - constant after initialization
* f - locked by vm_page_queue_free_mtx
* p - locked by being in the PCPU and atomicity respect to interrupts
- * q - locked by vm_page_queue_mtx
+ * q - changes are synchronized by the corresponding vm_pagequeue lock
*/
struct vmmeter {
/*
@@ -61,6 +61,7 @@ struct vmmeter {
* Virtual memory activity.
*/
u_int v_vm_faults; /* (p) address memory faults */
+ u_int v_io_faults; /* (p) page faults requiring I/O */
u_int v_cow_faults; /* (p) copy-on-writes faults */
u_int v_cow_optim; /* (p) optimized copy-on-writes faults */
u_int v_zfod; /* (p) pages zero filled on demand */
@@ -75,8 +76,8 @@ struct vmmeter {
u_int v_vnodepgsout; /* (p) vnode pager pages paged out */
u_int v_intrans; /* (p) intransit blocking page faults */
u_int v_reactivated; /* (f) pages reactivated from free list */
- u_int v_pdwakeups; /* (f) times daemon has awaken from sleep */
- u_int v_pdpages; /* (q) pages analyzed by daemon */
+ u_int v_pdwakeups; /* (p) times daemon has awaken from sleep */
+ u_int v_pdpages; /* (p) pages analyzed by daemon */
u_int v_tcached; /* (p) total pages cached */
u_int v_dfree; /* (p) pages freed by daemon */
@@ -96,8 +97,6 @@ struct vmmeter {
u_int v_inactive_target; /* (c) pages desired inactive */
u_int v_inactive_count; /* (q) pages inactive */
u_int v_cache_count; /* (f) pages on cache queue */
- u_int v_cache_min; /* (c) min pages desired on cache queue */
- u_int v_cache_max; /* (c) max pages in cached obj */
u_int v_pageout_free_min; /* (c) min pages reserved for kernel */
u_int v_interrupt_free_min; /* (c) reserved pages for int code */
u_int v_free_severe; /* (c) severe page depletion point */
@@ -112,10 +111,13 @@ struct vmmeter {
u_int v_vforkpages; /* (p) VM pages affected by vfork() */
u_int v_rforkpages; /* (p) VM pages affected by rfork() */
u_int v_kthreadpages; /* (p) VM pages affected by fork() by kernel */
+ u_int v_spare[2];
};
#ifdef _KERNEL
-extern struct vmmeter cnt;
+extern struct vmmeter vm_cnt;
+
+extern u_int vm_pageout_wakeup_thresh;
/*
* Return TRUE if we are under our severe low-free-pages threshold
@@ -123,12 +125,12 @@ extern struct vmmeter cnt;
* This routine is typically used at the user<->system interface to determine
* whether we need to block in order to avoid a low memory deadlock.
*/
-
-static __inline
-int
+static inline int
vm_page_count_severe(void)
{
- return (cnt.v_free_severe > (cnt.v_free_count + cnt.v_cache_count));
+
+ return (vm_cnt.v_free_severe > vm_cnt.v_free_count +
+ vm_cnt.v_cache_count);
}
/*
@@ -138,55 +140,48 @@ vm_page_count_severe(void)
* we can execute potentially very expensive code in terms of memory. It
* is also used by the pageout daemon to calculate when to sleep, when
* to wake waiters up, and when (after making a pass) to become more
- * desparate.
+ * desperate.
*/
-
-static __inline
-int
+static inline int
vm_page_count_min(void)
{
- return (cnt.v_free_min > (cnt.v_free_count + cnt.v_cache_count));
+
+ return (vm_cnt.v_free_min > vm_cnt.v_free_count + vm_cnt.v_cache_count);
}
/*
* Return TRUE if we have not reached our free page target during
* free page recovery operations.
*/
-
-static __inline
-int
+static inline int
vm_page_count_target(void)
{
- return (cnt.v_free_target > (cnt.v_free_count + cnt.v_cache_count));
+
+ return (vm_cnt.v_free_target > vm_cnt.v_free_count +
+ vm_cnt.v_cache_count);
}
/*
* Return the number of pages we need to free-up or cache
* A positive number indicates that we do not have enough free pages.
*/
-
-static __inline
-int
+static inline int
vm_paging_target(void)
{
- return (
- (cnt.v_free_target + cnt.v_cache_min) -
- (cnt.v_free_count + cnt.v_cache_count)
- );
+
+ return (vm_cnt.v_free_target - (vm_cnt.v_free_count +
+ vm_cnt.v_cache_count));
}
/*
* Returns TRUE if the pagedaemon needs to be woken up.
*/
-
-static __inline
-int
+static inline int
vm_paging_needed(void)
{
- return (
- (cnt.v_free_reserved + cnt.v_cache_min) >
- (cnt.v_free_count + cnt.v_cache_count)
- );
+
+ return (vm_cnt.v_free_count + vm_cnt.v_cache_count <
+ vm_pageout_wakeup_thresh);
}
#endif
diff --git a/freebsd/sys/sys/vnode.h b/freebsd/sys/sys/vnode.h
index 5b709f81..a047bc67 100644
--- a/freebsd/sys/sys/vnode.h
+++ b/freebsd/sys/sys/vnode.h
@@ -78,6 +78,7 @@ struct vpollinfo {
* c - namecache mutex
* f - freelist mutex
* i - interlock
+ * I - updated with atomics, 0->1 and 1->0 transitions with interlock held
* m - mount point interlock
* p - pollinfo lock
* u - Only a reference to the vnode is needed to read.
@@ -100,7 +101,6 @@ struct vnode {
* Fields which define the identity of the vnode. These fields are
* owned by the filesystem (XXX: and vgone() ?)
*/
- enum vtype v_type; /* u vnode type */
const char *v_tag; /* u type of underlying data */
struct vop_vector *v_op; /* u vnode operations vector */
void *v_data; /* u private data for fs */
@@ -123,10 +123,10 @@ struct vnode {
} v_un;
/*
- * vfs_hash: (mount + inode) -> vnode hash.
+ * vfs_hash: (mount + inode) -> vnode hash. The hash value
+ * itself is grouped with other int fields, to avoid padding.
*/
LIST_ENTRY(vnode) v_hashlist;
- u_int v_hash;
/*
* VFS_namecache stuff
@@ -136,24 +136,11 @@ struct vnode {
struct namecache *v_cache_dd; /* c Cache entry for .. vnode */
/*
- * clustering stuff
- */
- daddr_t v_cstart; /* v start block of cluster */
- daddr_t v_lasta; /* v last allocation */
- daddr_t v_lastw; /* v last write */
- int v_clen; /* v length of cur. cluster */
-
- /*
* Locking
*/
struct lock v_lock; /* u (if fs don't have one) */
struct mtx v_interlock; /* lock for "i" things */
struct lock *v_vnlock; /* u pointer to vnode lock */
- int v_holdcnt; /* i prevents recycling. */
- int v_usecount; /* i ref count of users */
- u_long v_iflag; /* i vnode flags (see below) */
- u_long v_vflag; /* v vnode flags */
- int v_writecount; /* v ref count of writers */
/*
* The machinery of being a vnode
@@ -168,6 +155,22 @@ struct vnode {
struct label *v_label; /* MAC label for vnode */
struct lockf *v_lockf; /* Byte-level advisory lock list */
struct rangelock v_rl; /* Byte-range lock */
+
+ /*
+ * clustering stuff
+ */
+ daddr_t v_cstart; /* v start block of cluster */
+ daddr_t v_lasta; /* v last allocation */
+ daddr_t v_lastw; /* v last write */
+ int v_clen; /* v length of cur. cluster */
+
+ u_int v_holdcnt; /* I prevents recycling. */
+ u_int v_usecount; /* I ref count of users */
+ u_int v_iflag; /* i vnode flags (see below) */
+ u_int v_vflag; /* v vnode flags */
+ int v_writecount; /* v ref count of writers */
+ u_int v_hash;
+ enum vtype v_type; /* u vnode type */
};
#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
@@ -232,7 +235,6 @@ struct xvnode {
* are required for writing but the status may be checked with either.
*/
#define VI_MOUNT 0x0020 /* Mount in progress */
-#define VI_AGE 0x0040 /* Insert vnode at head of free list */
#define VI_DOOMED 0x0080 /* This vnode is being recycled */
#define VI_FREE 0x0100 /* This vnode is on the freelist */
#define VI_ACTIVE 0x0200 /* This vnode is on the active list */
@@ -285,6 +287,7 @@ struct vattr {
*/
#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */
#define VA_EXCLUSIVE 0x02 /* exclusive create request */
+#define VA_SYNC 0x04 /* O_SYNC truncation */
/*
* Flags for ioflag. (high 16 bits used to ask for read-ahead and
@@ -304,6 +307,7 @@ struct vattr {
#define IO_NORMAL 0x0800 /* operate on regular data */
#define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */
#define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */
+#define IO_RANGELOCKED 0x4000 /* range locked */
#define IO_SEQMAX 0x7F /* seq heuristic max value */
#define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */
@@ -334,6 +338,8 @@ struct vattr {
#define VWRITE_ACL 000040000000 /* change ACL and/or file mode */
#define VWRITE_OWNER 000100000000 /* change file owner */
#define VSYNCHRONIZE 000200000000 /* not used */
+#define VCREAT 000400000000 /* creating new file */
+#define VVERIFY 001000000000 /* verification required */
/*
* Permissions that were traditionally granted only to the file owner.
@@ -368,6 +374,8 @@ struct vattr {
MALLOC_DECLARE(M_VNODE);
#endif
+extern u_int ncsizefactor;
+
/*
* Convert between vnode types and inode formats (since POSIX.1
* defines mode word of stat structure in terms of inode formats).
@@ -393,10 +401,14 @@ extern int vttoif_tab[];
#define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */
#define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */
#define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */
+#define V_MNTREF 0x0010 /* vn_start_write: mp is already ref-ed */
#define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */
#define VR_NO_SUSPCLR 0x0002 /* vfs_write_resume: do not clear suspension */
+#define VS_SKIP_UNMOUNT 0x0001 /* vfs_write_suspend: fail if the
+ filesystem is being unmounted */
+
#define VREF(vp) vref(vp)
#ifdef DIAGNOSTIC
@@ -411,7 +423,7 @@ extern int vttoif_tab[];
* Global vnode data.
*/
extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
-extern int async_io_version; /* 0 or POSIX version of AIO i'face */
+extern struct mount *rootdevmp; /* "/dev" mount */
extern int desiredvnodes; /* number of vnodes desired */
extern struct uma_zone *namei_zone;
extern struct vattr va_null; /* predefined null vattr structure */
@@ -424,6 +436,7 @@ extern struct vattr va_null; /* predefined null vattr structure */
#define VN_LOCK_AREC(vp) lockallowrecurse((vp)->v_vnlock)
#define VN_LOCK_ASHARE(vp) lockallowshare((vp)->v_vnlock)
+#define VN_LOCK_DSHARE(vp) lockdisableshare((vp)->v_vnlock)
#endif /* _KERNEL */
@@ -498,30 +511,20 @@ extern struct vnodeop_desc *vnodeop_descs[];
* reliable since if the thread sleeps between changing the lock
* state and checking it with the assert, some other thread could
* change the state. They are good enough for debugging a single
- * filesystem using a single-threaded test.
+ * filesystem using a single-threaded test. Note that the unreliability is
+ * limited to false negatives; efforts were made to ensure that false
+ * positives cannot occur.
*/
void assert_vi_locked(struct vnode *vp, const char *str);
void assert_vi_unlocked(struct vnode *vp, const char *str);
void assert_vop_elocked(struct vnode *vp, const char *str);
-#if 0
-void assert_vop_elocked_other(struct vnode *vp, const char *str);
-#endif
void assert_vop_locked(struct vnode *vp, const char *str);
-#if 0
-voi0 assert_vop_slocked(struct vnode *vp, const char *str);
-#endif
void assert_vop_unlocked(struct vnode *vp, const char *str);
#define ASSERT_VI_LOCKED(vp, str) assert_vi_locked((vp), (str))
#define ASSERT_VI_UNLOCKED(vp, str) assert_vi_unlocked((vp), (str))
#define ASSERT_VOP_ELOCKED(vp, str) assert_vop_elocked((vp), (str))
-#if 0
-#define ASSERT_VOP_ELOCKED_OTHER(vp, str) assert_vop_locked_other((vp), (str))
-#endif
#define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str))
-#if 0
-#define ASSERT_VOP_SLOCKED(vp, str) assert_vop_slocked((vp), (str))
-#endif
#define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str))
#else /* !DEBUG_VFS_LOCKS */
@@ -529,13 +532,7 @@ void assert_vop_unlocked(struct vnode *vp, const char *str);
#define ASSERT_VI_LOCKED(vp, str) ((void)0)
#define ASSERT_VI_UNLOCKED(vp, str) ((void)0)
#define ASSERT_VOP_ELOCKED(vp, str) ((void)0)
-#if 0
-#define ASSERT_VOP_ELOCKED_OTHER(vp, str)
-#endif
#define ASSERT_VOP_LOCKED(vp, str) ((void)0)
-#if 0
-#define ASSERT_VOP_SLOCKED(vp, str)
-#endif
#define ASSERT_VOP_UNLOCKED(vp, str) ((void)0)
#endif /* DEBUG_VFS_LOCKS */
@@ -566,10 +563,13 @@ vn_canvmio(struct vnode *vp)
/*
* Finally, include the default set of vnode operations.
*/
+typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
#include <rtems/bsd/local/vnode_if.h>
/* vn_open_flags */
#define VN_OPEN_NOAUDIT 0x00000001
+#define VN_OPEN_NOCAPCHECK 0x00000002
+#define VN_OPEN_NAMECACHE 0x00000004
/*
* Public vnode manipulation functions.
@@ -586,23 +586,26 @@ struct nstat;
struct ucred;
struct uio;
struct vattr;
+struct vfsops;
struct vnode;
+typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **);
+
+int bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn,
+ daddr_t endn);
/* cache_* may belong in namei.h. */
+void cache_changesize(int newhashsize);
#define cache_enter(dvp, vp, cnp) \
cache_enter_time(dvp, vp, cnp, NULL, NULL)
void cache_enter_time(struct vnode *dvp, struct vnode *vp,
struct componentname *cnp, struct timespec *tsp,
struct timespec *dtsp);
-#define cache_lookup(dvp, vpp, cnp) \
- cache_lookup_times(dvp, vpp, cnp, NULL, NULL)
-int cache_lookup_times(struct vnode *dvp, struct vnode **vpp,
+int cache_lookup(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp, struct timespec *tsp, int *ticksp);
void cache_purge(struct vnode *vp);
void cache_purge_negative(struct vnode *vp);
void cache_purgevfs(struct mount *mp);
int change_dir(struct vnode *vp, struct thread *td);
-int change_root(struct vnode *vp, struct thread *td);
void cvtstat(struct stat *st, struct ostat *ost);
void cvtnstat(struct stat *sb, struct nstat *nsb);
int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
@@ -616,8 +619,6 @@ u_quad_t init_va_filerev(void);
int speedup_syncer(void);
int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf,
u_int *buflen);
-#define textvp_fullpath(p, rb, rfb) \
- vn_fullpath(FIRST_THREAD_IN_PROC(p), (p)->p_textvp, rb, rfb)
int vn_fullpath(struct thread *td, struct vnode *vn,
char **retbuf, char **freebuf);
int vn_fullpath_global(struct thread *td, struct vnode *vn,
@@ -638,21 +639,22 @@ int vaccess_acl_posix1e(enum vtype type, uid_t file_uid,
struct ucred *cred, int *privused);
void vattr_null(struct vattr *vap);
int vcount(struct vnode *vp);
-void vdrop(struct vnode *);
-void vdropl(struct vnode *);
+#define vdrop(vp) _vdrop((vp), 0)
+#define vdropl(vp) _vdrop((vp), 1)
+void _vdrop(struct vnode *, bool);
int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
int vget(struct vnode *vp, int lockflag, struct thread *td);
void vgone(struct vnode *vp);
-void vhold(struct vnode *);
-void vholdl(struct vnode *);
+#define vhold(vp) _vhold((vp), 0)
+#define vholdl(vp) _vhold((vp), 1)
+void _vhold(struct vnode *, bool);
void vinactive(struct vnode *, struct thread *);
int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
-int vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
- off_t length, int blksize);
+int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length,
+ int blksize);
void vunref(struct vnode *);
void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
-#define vprint(label, vp) vn_printf((vp), "%s\n", (label))
-int vrecycle(struct vnode *vp, struct thread *td);
+int vrecycle(struct vnode *vp);
int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off,
struct ucred *cred);
int vn_close(struct vnode *vp,
@@ -665,6 +667,8 @@ int _vn_lock(struct vnode *vp, int flags, char *file, int line);
int vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp);
int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode,
u_int vn_open_flags, struct ucred *cred, struct file *fp);
+int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
+ struct thread *td, struct file *fp);
void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end);
int vn_pollrecord(struct vnode *vp, struct thread *p, int events);
int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base,
@@ -676,7 +680,7 @@ int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base,
struct ucred *active_cred, struct ucred *file_cred, size_t *aresid,
struct thread *td);
int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
- const struct thread *td);
+ struct thread *td);
int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
struct ucred *file_cred, struct thread *td);
int vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
@@ -691,6 +695,10 @@ int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
const char *attrname, struct thread *td);
int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags,
struct vnode **rvp);
+int vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc,
+ void *alloc_arg, int lkflags, struct vnode **rvp);
+int vn_utimes_perm(struct vnode *vp, struct vattr *vap,
+ struct ucred *cred, struct thread *td);
int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
@@ -708,10 +716,12 @@ int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
int vfs_cache_lookup(struct vop_lookup_args *ap);
void vfs_timestamp(struct timespec *);
-void vfs_write_resume(struct mount *mp);
-void vfs_write_resume_flags(struct mount *mp, int flags);
-int vfs_write_suspend(struct mount *mp);
+void vfs_write_resume(struct mount *mp, int flags);
+int vfs_write_suspend(struct mount *mp, int flags);
+int vfs_write_suspend_umnt(struct mount *mp);
+void vnlru_free(int, struct vfsops *);
int vop_stdbmap(struct vop_bmap_args *);
+int vop_stdfdatasync_buf(struct vop_fdatasync_args *);
int vop_stdfsync(struct vop_fsync_args *);
int vop_stdgetwritemount(struct vop_getwritemount_args *);
int vop_stdgetpages(struct vop_getpages_args *);
@@ -743,33 +753,51 @@ int vop_enoent(struct vop_generic_args *ap);
int vop_enotty(struct vop_generic_args *ap);
int vop_null(struct vop_generic_args *ap);
int vop_panic(struct vop_generic_args *ap);
+int dead_poll(struct vop_poll_args *ap);
+int dead_read(struct vop_read_args *ap);
+int dead_write(struct vop_write_args *ap);
/* These are called from within the actual VOPS. */
+void vop_close_post(void *a, int rc);
void vop_create_post(void *a, int rc);
void vop_deleteextattr_post(void *a, int rc);
void vop_link_post(void *a, int rc);
-void vop_lock_pre(void *a);
-void vop_lock_post(void *a, int rc);
void vop_lookup_post(void *a, int rc);
void vop_lookup_pre(void *a);
void vop_mkdir_post(void *a, int rc);
void vop_mknod_post(void *a, int rc);
+void vop_open_post(void *a, int rc);
+void vop_read_post(void *a, int rc);
+void vop_readdir_post(void *a, int rc);
+void vop_reclaim_post(void *a, int rc);
void vop_remove_post(void *a, int rc);
void vop_rename_post(void *a, int rc);
void vop_rename_pre(void *a);
void vop_rmdir_post(void *a, int rc);
void vop_setattr_post(void *a, int rc);
void vop_setextattr_post(void *a, int rc);
-void vop_strategy_pre(void *a);
void vop_symlink_post(void *a, int rc);
+
+#ifdef DEBUG_VFS_LOCKS
+void vop_strategy_pre(void *a);
+void vop_lock_pre(void *a);
+void vop_lock_post(void *a, int rc);
void vop_unlock_post(void *a, int rc);
void vop_unlock_pre(void *a);
+#else
+#define vop_strategy_pre(x) do { } while (0)
+#define vop_lock_pre(x) do { } while (0)
+#define vop_lock_post(x, y) do { } while (0)
+#define vop_unlock_post(x, y) do { } while (0)
+#define vop_unlock_pre(x) do { } while (0)
+#endif
void vop_rename_fail(struct vop_rename_args *ap);
#define VOP_WRITE_PRE(ap) \
struct vattr va; \
- int error, osize, ooffset, noffset; \
+ int error; \
+ off_t osize, ooffset, noffset; \
\
osize = ooffset = noffset = 0; \
if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \
@@ -777,7 +805,7 @@ void vop_rename_fail(struct vop_rename_args *ap);
if (error) \
return (error); \
ooffset = (ap)->a_uio->uio_offset; \
- osize = va.va_size; \
+ osize = (off_t)va.va_size; \
}
#define VOP_WRITE_POST(ap, ret) \
@@ -793,6 +821,7 @@ void vop_rename_fail(struct vop_rename_args *ap);
void vput(struct vnode *vp);
void vrele(struct vnode *vp);
void vref(struct vnode *vp);
+void vrefl(struct vnode *vp);
int vrefcnt(struct vnode *vp);
void v_addpollinfo(struct vnode *vp);
@@ -811,12 +840,20 @@ extern struct vop_vector default_vnodeops;
#define VOP_ENOENT ((void*)(uintptr_t)vop_enoent)
#define VOP_EOPNOTSUPP ((void*)(uintptr_t)vop_eopnotsupp)
+/* fifo_vnops.c */
+int fifo_printinfo(struct vnode *);
+
/* vfs_hash.c */
typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg);
-int vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
+void vfs_hash_changesize(int newhashsize);
+int vfs_hash_get(const struct mount *mp, u_int hash, int flags,
+ struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
u_int vfs_hash_index(struct vnode *vp);
-int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
+int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td,
+ struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
+void vfs_hash_ref(const struct mount *mp, u_int hash, struct thread *td,
+ struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
void vfs_hash_rehash(struct vnode *vp, u_int hash);
void vfs_hash_remove(struct vnode *vp);
diff --git a/freebsd/sys/v850/include/machine/in_cksum.h b/freebsd/sys/v850/include/machine/in_cksum.h
index 633efa1f..72edfba9 100644
--- a/freebsd/sys/v850/include/machine/in_cksum.h
+++ b/freebsd/sys/v850/include/machine/in_cksum.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
diff --git a/freebsd/sys/vm/uma.h b/freebsd/sys/vm/uma.h
index dbe3c488..1ab51c89 100644
--- a/freebsd/sys/vm/uma.h
+++ b/freebsd/sys/vm/uma.h
@@ -33,8 +33,8 @@
*
*/
-#ifndef VM_UMA_H
-#define VM_UMA_H
+#ifndef _VM_UMA_H_
+#define _VM_UMA_H_
#include <rtems/bsd/sys/param.h> /* For NULL */
#include <sys/malloc.h> /* For M_* */
@@ -50,7 +50,7 @@ typedef struct uma_zone * uma_zone_t;
void zone_drain(uma_zone_t);
-/*
+/*
* Item constructor
*
* Arguments:
@@ -58,7 +58,7 @@ void zone_drain(uma_zone_t);
* arg The arg field passed to uma_zalloc_arg
* size The size of the allocated item
* flags See zalloc flags
- *
+ *
* Returns:
* 0 on success
* errno on failure
@@ -76,7 +76,7 @@ typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
* item A pointer to the memory which has been allocated.
* size The size of the item being destructed.
* arg Argument passed through uma_zfree_arg
- *
+ *
* Returns:
* Nothing
*
@@ -87,20 +87,20 @@ typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
*/
typedef void (*uma_dtor)(void *mem, int size, void *arg);
-/*
+/*
* Item initializer
*
* Arguments:
* item A pointer to the memory which has been allocated.
* size The size of the item being initialized.
* flags See zalloc flags
- *
+ *
* Returns:
* 0 on success
* errno on failure
*
* Discussion:
- * The initializer is called when the memory is cached in the uma zone.
+ * The initializer is called when the memory is cached in the uma zone.
* The initializer and the destructor should leave the object in the same
* state.
*/
@@ -110,7 +110,7 @@ typedef int (*uma_init)(void *mem, int size, int flags);
* Item discard function
*
* Arguments:
- * item A pointer to memory which has been 'freed' but has not left the
+ * item A pointer to memory which has been 'freed' but has not left the
* zone's cache.
* size The size of the item being discarded.
*
@@ -124,9 +124,19 @@ typedef int (*uma_init)(void *mem, int size, int flags);
typedef void (*uma_fini)(void *mem, int size);
/*
+ * Import new memory into a cache zone.
+ */
+typedef int (*uma_import)(void *arg, void **store, int count, int flags);
+
+/*
+ * Free memory from a cache zone.
+ */
+typedef void (*uma_release)(void *arg, void **store, int count);
+
+/*
* What's the difference between initializing and constructing?
*
- * The item is initialized when it is cached, and this is the state that the
+ * The item is initialized when it is cached, and this is the state that the
* object should be in when returned to the allocator. The purpose of this is
* to remove some code which would otherwise be called on each allocation by
* utilizing a known, stable state. This differs from the constructor which
@@ -167,7 +177,7 @@ typedef void (*uma_fini)(void *mem, int size);
*/
uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor,
uma_dtor dtor, uma_init uminit, uma_fini fini,
- int align, u_int32_t flags);
+ int align, uint32_t flags);
/*
* Create a secondary uma zone
@@ -211,11 +221,24 @@ uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
* the only supported.
*
* Returns:
- * Error on failure, 0 on success.
+ * Error on failure, 0 on success.
*/
int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
/*
+ * Create cache-only zones.
+ *
+ * This allows uma's per-cpu cache facilities to handle arbitrary
+ * pointers. Consumers must specify the import and release functions to
+ * fill and destroy caches. UMA does not allocate any memory for these
+ * zones. The 'arg' parameter is passed to import/release and is caller
+ * specific.
+ */
+uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
+ uma_init zinit, uma_fini zfini, uma_import zimport,
+ uma_release zrelease, void *arg, int flags);
+
+/*
* Definitions for uma_zcreate flags
*
* These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to
@@ -239,7 +262,7 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
* information in the vm_page.
*/
#define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */
-#define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */
+/* 0x0400 Unused */
#define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */
#define UMA_ZONE_CACHESPREAD 0x1000 /*
* Spread memory start locations across
@@ -252,6 +275,10 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
* Zone's pages will not be included in
* mini-dumps.
*/
+#define UMA_ZONE_PCPU 0x8000 /*
+ * Allocates mp_maxid + 1 slabs sized to
+ * sizeof(struct pcpu).
+ */
/*
* These flags are shared between the keg and zone. In zones wishing to add
@@ -259,8 +286,8 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
* physical parameters of the request and may not be provided by the consumer.
*/
#define UMA_ZONE_INHERIT \
- (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | \
- UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
+ (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \
+ UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
/* Definitions for align */
#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */
@@ -355,7 +382,8 @@ uma_zfree(uma_zone_t zone, void *item)
* A pointer to the allocated memory or NULL on failure.
*/
-typedef void *(*uma_alloc)(uma_zone_t zone, int size, u_int8_t *pflag, int wait);
+typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, uint8_t *pflag,
+ int wait);
/*
* Backend page free routines
@@ -368,7 +396,7 @@ typedef void *(*uma_alloc)(uma_zone_t zone, int size, u_int8_t *pflag, int wait)
* Returns:
* None
*/
-typedef void (*uma_free)(void *item, int size, u_int8_t pflag);
+typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
@@ -403,7 +431,7 @@ void uma_startup(void *bootmem, int boot_pages);
* Discussion:
* uma_startup2 is called by kmeminit() to enable us of uma for malloc.
*/
-
+
void uma_startup2(void);
/*
@@ -432,24 +460,29 @@ void uma_reclaim(void);
void uma_set_align(int align);
/*
- * Switches the backing object of a zone
+ * Set a reserved number of items to hold for M_USE_RESERVE allocations. All
+ * other requests must allocate new backing pages.
+ */
+void uma_zone_reserve(uma_zone_t zone, int nitems);
+
+/*
+ * Reserves the maximum KVA space required by the zone and configures the zone
+ * to use a VM_ALLOC_NOOBJ-based backend allocator.
*
* Arguments:
* zone The zone to update.
- * obj The VM object to use for future allocations.
- * size The size of the object to allocate.
+ * nitems The upper limit on the number of items that can be allocated.
*
* Returns:
- * 0 if kva space can not be allocated
+ * 0 if KVA space can not be allocated
* 1 if successful
*
* Discussion:
- * A NULL object can be used and uma will allocate one for you. Setting
- * the size will limit the amount of memory allocated to this zone.
- *
+ * When the machine supports a direct map and the zone's items are smaller
+ * than a page, the zone will use the direct map instead of allocating KVA
+ * space.
*/
-struct vm_object;
-int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int size);
+int uma_zone_reserve_kva(uma_zone_t zone, int nitems);
/*
* Sets a high limit on the number of items allowed in a zone
@@ -476,6 +509,31 @@ int uma_zone_set_max(uma_zone_t zone, int nitems);
int uma_zone_get_max(uma_zone_t zone);
/*
+ * Sets a warning to be printed when limit is reached
+ *
+ * Arguments:
+ * zone The zone we will warn about
+ * warning Warning content
+ *
+ * Returns:
+ * Nothing
+ */
+void uma_zone_set_warning(uma_zone_t zone, const char *warning);
+
+/*
+ * Sets a function to run when limit is reached
+ *
+ * Arguments:
+ * zone The zone to which this applies
+ * fx The function ro run
+ *
+ * Returns:
+ * Nothing
+ */
+typedef void (*uma_maxaction_t)(uma_zone_t, int);
+void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t);
+
+/*
* Obtains the approximate current number of items allocated from a zone
*
* Arguments:
@@ -509,7 +567,7 @@ void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit);
void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini);
/*
- * Replaces the standard page_alloc or obj_alloc functions for this zone
+ * Replaces the standard backend allocator for this zone.
*
* Arguments:
* zone The zone whose backend allocator is being changed.
@@ -569,43 +627,34 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef);
void uma_prealloc(uma_zone_t zone, int itemcnt);
/*
- * Used to lookup the reference counter allocated for an item
- * from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones,
- * reference counters are allocated for items and stored in
- * the underlying slab header.
- *
- * Arguments:
- * zone The UMA_ZONE_REFCNT zone to which the item belongs.
- * item The address of the item for which we want a refcnt.
- *
- * Returns:
- * A pointer to a u_int32_t reference counter.
- */
-u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item);
-
-/*
* Used to determine if a fixed-size zone is exhausted.
*
* Arguments:
* zone The zone to check
*
* Returns:
- * Non-zero if zone is exhausted.
+ * Non-zero if zone is exhausted.
*/
int uma_zone_exhausted(uma_zone_t zone);
int uma_zone_exhausted_nolock(uma_zone_t zone);
/*
+ * Common UMA_ZONE_PCPU zones.
+ */
+extern uma_zone_t pcpu_zone_64;
+extern uma_zone_t pcpu_zone_ptr;
+
+/*
* Exported statistics structures to be used by user space monitoring tools.
* Statistics stream consists of a uma_stream_header, followed by a series of
* alternative uma_type_header and uma_type_stat structures.
*/
#define UMA_STREAM_VERSION 0x00000001
struct uma_stream_header {
- u_int32_t ush_version; /* Stream format version. */
- u_int32_t ush_maxcpus; /* Value of MAXCPU for stream. */
- u_int32_t ush_count; /* Number of records. */
- u_int32_t _ush_pad; /* Pad/reserved field. */
+ uint32_t ush_version; /* Stream format version. */
+ uint32_t ush_maxcpus; /* Value of MAXCPU for stream. */
+ uint32_t ush_count; /* Number of records. */
+ uint32_t _ush_pad; /* Pad/reserved field. */
};
#define UTH_MAX_NAME 32
@@ -615,32 +664,35 @@ struct uma_type_header {
* Static per-zone data, some extracted from the supporting keg.
*/
char uth_name[UTH_MAX_NAME];
- u_int32_t uth_align; /* Keg: alignment. */
- u_int32_t uth_size; /* Keg: requested size of item. */
- u_int32_t uth_rsize; /* Keg: real size of item. */
- u_int32_t uth_maxpages; /* Keg: maximum number of pages. */
- u_int32_t uth_limit; /* Keg: max items to allocate. */
+ uint32_t uth_align; /* Keg: alignment. */
+ uint32_t uth_size; /* Keg: requested size of item. */
+ uint32_t uth_rsize; /* Keg: real size of item. */
+ uint32_t uth_maxpages; /* Keg: maximum number of pages. */
+ uint32_t uth_limit; /* Keg: max items to allocate. */
/*
* Current dynamic zone/keg-derived statistics.
*/
- u_int32_t uth_pages; /* Keg: pages allocated. */
- u_int32_t uth_keg_free; /* Keg: items free. */
- u_int32_t uth_zone_free; /* Zone: items free. */
- u_int32_t uth_bucketsize; /* Zone: desired bucket size. */
- u_int32_t uth_zone_flags; /* Zone: flags. */
- u_int64_t uth_allocs; /* Zone: number of allocations. */
- u_int64_t uth_frees; /* Zone: number of frees. */
- u_int64_t uth_fails; /* Zone: number of alloc failures. */
- u_int64_t uth_sleeps; /* Zone: number of alloc sleeps. */
- u_int64_t _uth_reserved1[2]; /* Reserved. */
+ uint32_t uth_pages; /* Keg: pages allocated. */
+ uint32_t uth_keg_free; /* Keg: items free. */
+ uint32_t uth_zone_free; /* Zone: items free. */
+ uint32_t uth_bucketsize; /* Zone: desired bucket size. */
+ uint32_t uth_zone_flags; /* Zone: flags. */
+ uint64_t uth_allocs; /* Zone: number of allocations. */
+ uint64_t uth_frees; /* Zone: number of frees. */
+ uint64_t uth_fails; /* Zone: number of alloc failures. */
+ uint64_t uth_sleeps; /* Zone: number of alloc sleeps. */
+ uint64_t _uth_reserved1[2]; /* Reserved. */
};
struct uma_percpu_stat {
- u_int64_t ups_allocs; /* Cache: number of allocations. */
- u_int64_t ups_frees; /* Cache: number of frees. */
- u_int64_t ups_cache_free; /* Cache: free items in cache. */
- u_int64_t _ups_reserved[5]; /* Reserved. */
+ uint64_t ups_allocs; /* Cache: number of allocations. */
+ uint64_t ups_frees; /* Cache: number of frees. */
+ uint64_t ups_cache_free; /* Cache: free items in cache. */
+ uint64_t _ups_reserved[5]; /* Reserved. */
};
-#endif
+void uma_reclaim_wakeup(void);
+void uma_reclaim_worker(void *);
+
+#endif /* _VM_UMA_H_ */
diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c
index 6bf47a1e..3957a223 100644
--- a/freebsd/sys/vm/uma_core.c
+++ b/freebsd/sys/vm/uma_core.c
@@ -1,7 +1,7 @@
#include <machine/rtems-bsd-kernel-space.h>
/*-
- * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org>
+ * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
* Copyright (c) 2004-2006 Robert N. M. Watson
* All rights reserved.
@@ -33,7 +33,7 @@
*
* This allocator is intended to replace the multitude of similar object caches
* in the standard FreeBSD kernel. The intent is to be flexible as well as
- * effecient. A primary design goal is to return unused memory to the rest of
+ * efficient. A primary design goal is to return unused memory to the rest of
* the system. This will make the system as a whole more flexible due to the
* ability to move memory to subsystems which most need it instead of leaving
* pools of reserved memory unused.
@@ -61,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_param.h>
+#include <rtems/bsd/local/opt_vm.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/bitset.h>
#include <sys/kernel.h>
#include <sys/types.h>
#include <sys/queue.h>
@@ -73,13 +75,18 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/random.h>
+#include <sys/rwlock.h>
#include <sys/sbuf.h>
+#include <sys/sched.h>
#include <sys/smp.h>
+#include <sys/taskqueue.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_param.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
@@ -103,6 +110,10 @@ __FBSDID("$FreeBSD$");
#endif
#endif /* __rtems__ */
+#ifdef DEBUG_MEMGUARD
+#include <vm/memguard.h>
+#endif
+
/*
* This is the zone and keg from which all zones are spawned. The idea is that
* even the zone & keg heads are allocated from the allocator, so we use the
@@ -116,7 +127,6 @@ static uma_zone_t zones = &masterzone_z;
/* This is the zone from which all of uma_slab_t's are allocated. */
static uma_zone_t slabzone;
-static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
/*
* The initial hash tables come out of this zone so they can be allocated
@@ -134,13 +144,19 @@ static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
* Are we allowed to allocate buckets?
*/
static int bucketdisable = 1;
+#else /* __rtems__ */
+#define bucketdisable 0
#endif /* __rtems__ */
/* Linked list of all kegs in the system */
static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
-/* This mutex protects the keg list */
-static struct mtx uma_mtx;
+/* Linked list of all cache-only zones in the system */
+static LIST_HEAD(,uma_zone) uma_cachezones =
+ LIST_HEAD_INITIALIZER(uma_cachezones);
+
+/* This RW lock protects the keg list */
+static struct rwlock_padalign uma_rwlock;
#ifndef __rtems__
/* Linked list of boot time pages */
@@ -148,18 +164,18 @@ static LIST_HEAD(,uma_slab) uma_boot_pages =
LIST_HEAD_INITIALIZER(uma_boot_pages);
/* This mutex protects the boot time pages list */
-static struct mtx uma_boot_pages_mtx;
+static struct mtx_padalign uma_boot_pages_mtx;
+#endif /* __rtems__ */
+
+static struct sx uma_drain_lock;
+#ifndef __rtems__
/* Is the VM done starting up? */
static int booted = 0;
#define UMA_STARTUP 1
#define UMA_STARTUP2 2
#endif /* __rtems__ */
-/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
-static u_int uma_max_ipers;
-static u_int uma_max_ipers_ref;
-
/*
* This is the handle used to schedule events that need to happen
* outside of the allocation fast path.
@@ -178,9 +194,12 @@ struct uma_zctor_args {
uma_dtor dtor;
uma_init uminit;
uma_fini fini;
+ uma_import import;
+ uma_release release;
+ void *arg;
uma_keg_t keg;
int align;
- u_int32_t flags;
+ uint32_t flags;
};
struct uma_kctor_args {
@@ -189,52 +208,53 @@ struct uma_kctor_args {
uma_init uminit;
uma_fini fini;
int align;
- u_int32_t flags;
+ uint32_t flags;
};
struct uma_bucket_zone {
uma_zone_t ubz_zone;
char *ubz_name;
- int ubz_entries;
+ int ubz_entries; /* Number of items it can hold. */
+ int ubz_maxsize; /* Maximum allocation size per-item. */
};
-#define BUCKET_MAX 128
+/*
+ * Compute the actual number of bucket entries to pack them in power
+ * of two sizes for more efficient space utilization.
+ */
+#define BUCKET_SIZE(n) \
+ (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
+
+#define BUCKET_MAX BUCKET_SIZE(256)
struct uma_bucket_zone bucket_zones[] = {
- { NULL, "16 Bucket", 16 },
- { NULL, "32 Bucket", 32 },
- { NULL, "64 Bucket", 64 },
- { NULL, "128 Bucket", 128 },
+ { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
+ { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
+ { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
+ { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
+ { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
+ { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
+ { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
+ { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
+ { NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
{ NULL, NULL, 0}
};
-#define BUCKET_SHIFT 4
-#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
-
-/*
- * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
- * of approximately the right size.
- */
-static uint8_t bucket_size[BUCKET_ZONES];
-
/*
* Flags and enumerations to be passed to internal functions.
*/
-enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
-
-#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
-#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
+enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
/* Prototypes.. */
#ifndef __rtems__
-static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
+static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
#endif /* __rtems__ */
-static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
+static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
#ifndef __rtems__
-static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
+static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
#endif /* __rtems__ */
-static void page_free(void *, int, u_int8_t);
+static void page_free(void *, vm_size_t, uint8_t);
static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
static void cache_drain(uma_zone_t);
static void bucket_drain(uma_zone_t, uma_bucket_t);
@@ -254,29 +274,35 @@ static void hash_free(struct uma_hash *hash);
static void uma_timeout(void *);
static void uma_startup3(void);
static void *zone_alloc_item(uma_zone_t, void *, int);
-static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
- int);
+static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
static void bucket_enable(void);
static void bucket_init(void);
-static uma_bucket_t bucket_alloc(int, int);
-static void bucket_free(uma_bucket_t);
+static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
+static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
static void bucket_zone_drain(void);
-static int zone_alloc_bucket(uma_zone_t zone, int flags);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
#ifndef __rtems__
static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
#endif /* __rtems__ */
-static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
+static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
+static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
- uma_fini fini, int align, u_int32_t flags);
-static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
-static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
+ uma_fini fini, int align, uint32_t flags);
+static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
+static void zone_release(uma_zone_t zone, void **bucket, int cnt);
+static void uma_zero_item(void *item, uma_zone_t zone);
void uma_print_zone(uma_zone_t);
void uma_print_stats(void);
static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
+#ifdef INVARIANTS
+static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
+static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
+#endif
+
SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
@@ -285,10 +311,13 @@ SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
+static int zone_warnings = 1;
+SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
+ "Warn when UMA zones becomes full");
+
/*
* This routine checks to see whether or not it's safe to enable buckets.
*/
-
static void
bucket_enable(void)
{
@@ -301,27 +330,20 @@ bucket_enable(void)
* Initialize bucket_zones, the array of zones of buckets of various sizes.
*
* For each zone, calculate the memory required for each bucket, consisting
- * of the header and an array of pointers. Initialize bucket_size[] to point
- * the range of appropriate bucket sizes at the zone.
+ * of the header and an array of pointers.
*/
static void
bucket_init(void)
{
struct uma_bucket_zone *ubz;
- int i;
- int j;
-
- for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
- int size;
+ int size;
- ubz = &bucket_zones[j];
+ for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
size = roundup(sizeof(struct uma_bucket), sizeof(void *));
size += sizeof(void *) * ubz->ubz_entries;
ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
- UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
- for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
- bucket_size[i >> BUCKET_SHIFT] = j;
+ UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
}
}
@@ -332,14 +354,33 @@ bucket_init(void)
static struct uma_bucket_zone *
bucket_zone_lookup(int entries)
{
- int idx;
+ struct uma_bucket_zone *ubz;
+
+ for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
+ if (ubz->ubz_entries >= entries)
+ return (ubz);
+ ubz--;
+ return (ubz);
+}
+
+static int
+bucket_select(int size)
+{
+ struct uma_bucket_zone *ubz;
+
+ ubz = &bucket_zones[0];
+ if (size > ubz->ubz_maxsize)
+ return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
- idx = howmany(entries, 1 << BUCKET_SHIFT);
- return (&bucket_zones[bucket_size[idx]]);
+ for (; ubz->ubz_entries != 0; ubz++)
+ if (ubz->ubz_maxsize < size)
+ break;
+ ubz--;
+ return (ubz->ubz_entries);
}
static uma_bucket_t
-bucket_alloc(int entries, int bflags)
+bucket_alloc(uma_zone_t zone, void *udata, int flags)
{
struct uma_bucket_zone *ubz;
uma_bucket_t bucket;
@@ -354,9 +395,29 @@ bucket_alloc(int entries, int bflags)
if (bucketdisable)
return (NULL);
#endif /* __rtems__ */
-
- ubz = bucket_zone_lookup(entries);
- bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
+ /*
+ * To limit bucket recursion we store the original zone flags
+ * in a cookie passed via zalloc_arg/zfree_arg. This allows the
+ * NOVM flag to persist even through deep recursions. We also
+ * store ZFLAG_BUCKET once we have recursed attempting to allocate
+ * a bucket for a bucket zone so we do not allow infinite bucket
+ * recursion. This cookie will even persist to frees of unused
+ * buckets via the allocation path or bucket allocations in the
+ * free path.
+ */
+ if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
+ udata = (void *)(uintptr_t)zone->uz_flags;
+ else {
+ if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
+ return (NULL);
+ udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
+ }
+ if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
+ flags |= M_NOVM;
+ ubz = bucket_zone_lookup(zone->uz_count);
+ if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
+ ubz++;
+ bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
if (bucket) {
#ifdef INVARIANTS
bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
@@ -369,13 +430,16 @@ bucket_alloc(int entries, int bflags)
}
static void
-bucket_free(uma_bucket_t bucket)
+bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
{
struct uma_bucket_zone *ubz;
+ KASSERT(bucket->ub_cnt == 0,
+ ("bucket_free: Freeing a non free bucket."));
+ if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
+ udata = (void *)(uintptr_t)zone->uz_flags;
ubz = bucket_zone_lookup(bucket->ub_entries);
- zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
- ZFREE_STATFREE);
+ uma_zfree_arg(ubz->ubz_zone, bucket, udata);
}
static void
@@ -387,11 +451,24 @@ bucket_zone_drain(void)
zone_drain(ubz->ubz_zone);
}
-static inline uma_keg_t
-zone_first_keg(uma_zone_t zone)
+static void
+zone_log_warning(uma_zone_t zone)
{
+ static const struct timeval warninterval = { 300, 0 };
+
+ if (!zone_warnings || zone->uz_warning == NULL)
+ return;
- return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
+ if (ratecheck(&zone->uz_ratecheck, &warninterval))
+ printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
+}
+
+static inline void
+zone_maxaction(uma_zone_t zone)
+{
+
+ if (zone->uz_maxaction.ta_func != NULL)
+ taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
}
static void
@@ -466,7 +543,7 @@ keg_timeout(uma_keg_t keg)
KEG_UNLOCK(keg);
hash_free(&oldhash);
- KEG_LOCK(keg);
+ return;
}
}
KEG_UNLOCK(keg);
@@ -487,7 +564,7 @@ zone_timeout(uma_zone_t zone)
* hash A new hash structure with the old hash size in uh_hashsize
*
* Returns:
- * 1 on sucess and 0 on failure.
+ * 1 on success and 0 on failure.
*/
static int
hash_alloc(struct uma_hash *hash)
@@ -578,8 +655,7 @@ hash_free(struct uma_hash *hash)
if (hash->uh_slab_hash == NULL)
return;
if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
- zone_free_item(hashzone,
- hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
+ zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
else
free(hash->uh_slab_hash, M_UMAHASH);
}
@@ -598,21 +674,16 @@ hash_free(struct uma_hash *hash)
static void
bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
{
- void *item;
+ int i;
if (bucket == NULL)
return;
- while (bucket->ub_cnt > 0) {
- bucket->ub_cnt--;
- item = bucket->ub_bucket[bucket->ub_cnt];
-#ifdef INVARIANTS
- bucket->ub_bucket[bucket->ub_cnt] = NULL;
- KASSERT(item != NULL,
- ("bucket_drain: botched ptr, item is NULL"));
-#endif
- zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
- }
+ if (zone->uz_fini)
+ for (i = 0; i < bucket->ub_cnt; i++)
+ zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
+ zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
+ bucket->ub_cnt = 0;
}
/*
@@ -651,9 +722,9 @@ cache_drain(uma_zone_t zone)
bucket_drain(zone, cache->uc_allocbucket);
bucket_drain(zone, cache->uc_freebucket);
if (cache->uc_allocbucket != NULL)
- bucket_free(cache->uc_allocbucket);
+ bucket_free(zone, cache->uc_allocbucket, NULL);
if (cache->uc_freebucket != NULL)
- bucket_free(cache->uc_freebucket);
+ bucket_free(zone, cache->uc_freebucket, NULL);
cache->uc_allocbucket = cache->uc_freebucket = NULL;
}
ZONE_LOCK(zone);
@@ -661,6 +732,92 @@ cache_drain(uma_zone_t zone)
ZONE_UNLOCK(zone);
}
+static void
+cache_shrink(uma_zone_t zone)
+{
+
+ if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
+ return;
+
+ ZONE_LOCK(zone);
+ zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
+ ZONE_UNLOCK(zone);
+}
+
+static void
+cache_drain_safe_cpu(uma_zone_t zone)
+{
+ uma_cache_t cache;
+ uma_bucket_t b1, b2;
+
+ if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
+ return;
+
+ b1 = b2 = NULL;
+ ZONE_LOCK(zone);
+ critical_enter();
+ cache = &zone->uz_cpu[curcpu];
+ if (cache->uc_allocbucket) {
+ if (cache->uc_allocbucket->ub_cnt != 0)
+ LIST_INSERT_HEAD(&zone->uz_buckets,
+ cache->uc_allocbucket, ub_link);
+ else
+ b1 = cache->uc_allocbucket;
+ cache->uc_allocbucket = NULL;
+ }
+ if (cache->uc_freebucket) {
+ if (cache->uc_freebucket->ub_cnt != 0)
+ LIST_INSERT_HEAD(&zone->uz_buckets,
+ cache->uc_freebucket, ub_link);
+ else
+ b2 = cache->uc_freebucket;
+ cache->uc_freebucket = NULL;
+ }
+ critical_exit();
+ ZONE_UNLOCK(zone);
+ if (b1)
+ bucket_free(zone, b1, NULL);
+ if (b2)
+ bucket_free(zone, b2, NULL);
+}
+
+#ifndef __rtems__
+/*
+ * Safely drain per-CPU caches of a zone(s) to alloc bucket.
+ * This is an expensive call because it needs to bind to all CPUs
+ * one by one and enter a critical section on each of them in order
+ * to safely access their cache buckets.
+ * Zone lock must not be held on call this function.
+ */
+static void
+cache_drain_safe(uma_zone_t zone)
+{
+ int cpu;
+
+ /*
+ * Polite bucket sizes shrinking was not enouth, shrink aggressively.
+ */
+ if (zone)
+ cache_shrink(zone);
+ else
+ zone_foreach(cache_shrink);
+
+ CPU_FOREACH(cpu) {
+ thread_lock(curthread);
+ sched_bind(curthread, cpu);
+ thread_unlock(curthread);
+
+ if (zone)
+ cache_drain_safe_cpu(zone);
+ else
+ zone_foreach(cache_drain_safe_cpu);
+ }
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+}
+#endif /* __rtems__ */
+
/*
* Drain the cached buckets from a zone. Expects a locked zone on entry.
*/
@@ -673,19 +830,44 @@ bucket_cache_drain(uma_zone_t zone)
* Drain the bucket queues and free the buckets, we just keep two per
* cpu (alloc/free).
*/
- while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
+ while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
LIST_REMOVE(bucket, ub_link);
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
- bucket_free(bucket);
+ bucket_free(zone, bucket, NULL);
ZONE_LOCK(zone);
}
- /* Now we do the free queue.. */
- while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
- LIST_REMOVE(bucket, ub_link);
- bucket_free(bucket);
+ /*
+ * Shrink further bucket sizes. Price of single zone lock collision
+ * is probably lower then price of global cache drain.
+ */
+ if (zone->uz_count > zone->uz_count_min)
+ zone->uz_count--;
+}
+
+static void
+keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
+{
+ uint8_t *mem;
+ int i;
+ uint8_t flags;
+
+ mem = slab->us_data;
+ flags = slab->us_flags;
+ i = start;
+ if (keg->uk_fini != NULL) {
+ for (i--; i > -1; i--)
+ keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
+ keg->uk_size);
}
+ if (keg->uk_flags & UMA_ZONE_OFFPAGE)
+ zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
+#ifdef UMA_DEBUG
+ printf("%s: Returning %d bytes.\n", keg->uk_name,
+ PAGE_SIZE * keg->uk_ppera);
+#endif
+ keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
}
/*
@@ -700,9 +882,6 @@ keg_drain(uma_keg_t keg)
struct slabhead freeslabs = { 0 };
uma_slab_t slab;
uma_slab_t n;
- u_int8_t flags;
- u_int8_t *mem;
- int i;
/*
* We don't want to take pages from statically allocated kegs at this
@@ -746,37 +925,7 @@ finished:
while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
- if (keg->uk_fini)
- for (i = 0; i < keg->uk_ipers; i++)
- keg->uk_fini(
- slab->us_data + (keg->uk_rsize * i),
- keg->uk_size);
- flags = slab->us_flags;
- mem = slab->us_data;
-
-#ifndef __rtems__
- if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
- vm_object_t obj;
-
- if (flags & UMA_SLAB_KMEM)
- obj = kmem_object;
- else if (flags & UMA_SLAB_KERNEL)
- obj = kernel_object;
- else
- obj = NULL;
- for (i = 0; i < keg->uk_ppera; i++)
- vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
- obj);
- }
-#endif /* __rtems__ */
- if (keg->uk_flags & UMA_ZONE_OFFPAGE)
- zone_free_item(keg->uk_slabzone, slab, NULL,
- SKIP_NONE, ZFREE_STATFREE);
-#ifdef UMA_DEBUG
- printf("%s: Returning %d bytes.\n",
- keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
-#endif
- keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
+ keg_free_slab(keg, slab, keg->uk_ipers);
}
}
@@ -794,14 +943,14 @@ zone_drain_wait(uma_zone_t zone, int waitok)
while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
if (waitok == M_NOWAIT)
goto out;
- msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
+ msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
}
zone->uz_flags |= UMA_ZFLAG_DRAINING;
bucket_cache_drain(zone);
ZONE_UNLOCK(zone);
/*
* The DRAINING flag protects us from being freed while
- * we're running. Normally the uma_mtx would protect us but we
+ * we're running. Normally the uma_rwlock would protect us but we
* must be able to release and acquire the right lock for each keg.
*/
zone_foreach_keg(zone, &keg_drain);
@@ -832,28 +981,26 @@ zone_drain(uma_zone_t zone)
static uma_slab_t
keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
{
- uma_slabrefcnt_t slabref;
uma_alloc allocf;
uma_slab_t slab;
- u_int8_t *mem;
- u_int8_t flags;
+ uint8_t *mem;
+ uint8_t flags;
int i;
mtx_assert(&keg->uk_lock, MA_OWNED);
slab = NULL;
+ mem = NULL;
#ifdef UMA_DEBUG
- printf("slab_zalloc: Allocating a new slab for %s\n", keg->uk_name);
+ printf("alloc_slab: Allocating a new slab for %s\n", keg->uk_name);
#endif
allocf = keg->uk_allocf;
KEG_UNLOCK(keg);
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
- if (slab == NULL) {
- KEG_LOCK(keg);
- return NULL;
- }
+ if (slab == NULL)
+ goto out;
}
/*
@@ -872,13 +1019,12 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
wait |= M_NODUMP;
/* zone is passed for legacy reasons. */
- mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
+ mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
- zone_free_item(keg->uk_slabzone, slab, NULL,
- SKIP_NONE, ZFREE_STATFREE);
- KEG_LOCK(keg);
- return (NULL);
+ zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
+ slab = NULL;
+ goto out;
}
/* Point the slab into the allocated memory */
@@ -892,19 +1038,11 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
slab->us_keg = keg;
slab->us_data = mem;
slab->us_freecount = keg->uk_ipers;
- slab->us_firstfree = 0;
slab->us_flags = flags;
-
- if (keg->uk_flags & UMA_ZONE_REFCNT) {
- slabref = (uma_slabrefcnt_t)slab;
- for (i = 0; i < keg->uk_ipers; i++) {
- slabref->us_freelist[i].us_refcnt = 0;
- slabref->us_freelist[i].us_item = i+1;
- }
- } else {
- for (i = 0; i < keg->uk_ipers; i++)
- slab->us_freelist[i].us_item = i+1;
- }
+ BIT_FILL(SLAB_SETSIZE, &slab->us_free);
+#ifdef INVARIANTS
+ BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
+#endif
if (keg->uk_init != NULL) {
for (i = 0; i < keg->uk_ipers; i++)
@@ -912,43 +1050,21 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
keg->uk_size, wait) != 0)
break;
if (i != keg->uk_ipers) {
- if (keg->uk_fini != NULL) {
- for (i--; i > -1; i--)
- keg->uk_fini(slab->us_data +
- (keg->uk_rsize * i),
- keg->uk_size);
- }
-#ifndef __rtems__
- if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
- vm_object_t obj;
-
- if (flags & UMA_SLAB_KMEM)
- obj = kmem_object;
- else if (flags & UMA_SLAB_KERNEL)
- obj = kernel_object;
- else
- obj = NULL;
- for (i = 0; i < keg->uk_ppera; i++)
- vsetobj((vm_offset_t)mem +
- (i * PAGE_SIZE), obj);
- }
-#endif /* __rtems__ */
- if (keg->uk_flags & UMA_ZONE_OFFPAGE)
- zone_free_item(keg->uk_slabzone, slab,
- NULL, SKIP_NONE, ZFREE_STATFREE);
- keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
- flags);
- KEG_LOCK(keg);
- return (NULL);
+ keg_free_slab(keg, slab, i);
+ slab = NULL;
+ goto out;
}
}
+out:
KEG_LOCK(keg);
- if (keg->uk_flags & UMA_ZONE_HASH)
- UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
+ if (slab != NULL) {
+ if (keg->uk_flags & UMA_ZONE_HASH)
+ UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
- keg->uk_pages += keg->uk_ppera;
- keg->uk_free += keg->uk_ipers;
+ keg->uk_pages += keg->uk_ppera;
+ keg->uk_free += keg->uk_ipers;
+ }
return (slab);
}
@@ -960,7 +1076,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
* the VM is ready.
*/
static void *
-startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
+startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
{
uma_keg_t keg;
uma_slab_t tmps;
@@ -1021,13 +1137,13 @@ startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
* NULL if M_NOWAIT is set.
*/
static void *
-page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
+page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
{
void *p; /* Returned page */
#ifndef __rtems__
*pflag = UMA_SLAB_KMEM;
- p = (void *) kmem_malloc(kmem_map, bytes, wait);
+ p = (void *) kmem_malloc(kmem_arena, bytes, wait);
#else /* __rtems__ */
*pflag = 0;
p = rtems_bsd_page_alloc(bytes, wait);
@@ -1049,50 +1165,53 @@ page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
* NULL if M_NOWAIT is set.
*/
static void *
-obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
{
- vm_object_t object;
+ TAILQ_HEAD(, vm_page) alloctail;
+ u_long npages;
vm_offset_t retkva, zkva;
- vm_page_t p;
- int pages, startpages;
+ vm_page_t p, p_next;
uma_keg_t keg;
+ TAILQ_INIT(&alloctail);
keg = zone_first_keg(zone);
- object = keg->uk_obj;
- retkva = 0;
- /*
- * This looks a little weird since we're getting one page at a time.
- */
- VM_OBJECT_LOCK(object);
- p = TAILQ_LAST(&object->memq, pglist);
- pages = p != NULL ? p->pindex + 1 : 0;
- startpages = pages;
- zkva = keg->uk_kva + pages * PAGE_SIZE;
- for (; bytes > 0; bytes -= PAGE_SIZE) {
- p = vm_page_alloc(object, pages,
- VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
- if (p == NULL) {
- if (pages != startpages)
- pmap_qremove(retkva, pages - startpages);
- while (pages != startpages) {
- pages--;
- p = TAILQ_LAST(&object->memq, pglist);
- vm_page_unwire(p, 0);
- vm_page_free(p);
- }
- retkva = 0;
- goto done;
+ npages = howmany(bytes, PAGE_SIZE);
+ while (npages > 0) {
+ p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
+ VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
+ if (p != NULL) {
+ /*
+ * Since the page does not belong to an object, its
+ * listq is unused.
+ */
+ TAILQ_INSERT_TAIL(&alloctail, p, listq);
+ npages--;
+ continue;
+ }
+ if (wait & M_WAITOK) {
+ VM_WAIT;
+ continue;
+ }
+
+ /*
+ * Page allocation failed, free intermediate pages and
+ * exit.
+ */
+ TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
+ vm_page_unwire(p, PQ_NONE);
+ vm_page_free(p);
}
+ return (NULL);
+ }
+ *flags = UMA_SLAB_PRIV;
+ zkva = keg->uk_kva +
+ atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
+ retkva = zkva;
+ TAILQ_FOREACH(p, &alloctail, listq) {
pmap_qenter(zkva, &p, 1);
- if (retkva == 0)
- retkva = zkva;
zkva += PAGE_SIZE;
- pages += 1;
}
-done:
- VM_OBJECT_UNLOCK(object);
- *flags = UMA_SLAB_PRIV;
return ((void *)retkva);
}
@@ -1110,19 +1229,19 @@ done:
* Nothing
*/
static void
-page_free(void *mem, int size, u_int8_t flags)
+page_free(void *mem, vm_size_t size, uint8_t flags)
{
#ifndef __rtems__
- vm_map_t map;
+ struct vmem *vmem;
if (flags & UMA_SLAB_KMEM)
- map = kmem_map;
+ vmem = kmem_arena;
else if (flags & UMA_SLAB_KERNEL)
- map = kernel_map;
+ vmem = kernel_arena;
else
panic("UMA: page_free used with invalid flags %d", flags);
- kmem_free(map, (vm_offset_t)mem, size);
+ kmem_free(vmem, (vm_offset_t)mem, size);
#else /* __rtems__ */
if (flags & UMA_SLAB_KERNEL)
free(mem, M_TEMP);
@@ -1160,61 +1279,84 @@ keg_small_init(uma_keg_t keg)
u_int wastedspace;
u_int shsize;
- KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
- rsize = keg->uk_size;
+ if (keg->uk_flags & UMA_ZONE_PCPU) {
+ u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
+
+ keg->uk_slabsize = sizeof(struct pcpu);
+ keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
+ PAGE_SIZE);
+ } else {
+ keg->uk_slabsize = UMA_SLAB_SIZE;
+ keg->uk_ppera = 1;
+ }
- if (rsize < UMA_SMALLEST_UNIT)
- rsize = UMA_SMALLEST_UNIT;
+ /*
+ * Calculate the size of each allocation (rsize) according to
+ * alignment. If the requested size is smaller than we have
+ * allocation bits for we round it up.
+ */
+ rsize = keg->uk_size;
+ if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
+ rsize = keg->uk_slabsize / SLAB_SETSIZE;
if (rsize & keg->uk_align)
rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
-
keg->uk_rsize = rsize;
- keg->uk_ppera = 1;
- if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
+ KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
+ keg->uk_rsize < sizeof(struct pcpu),
+ ("%s: size %u too large", __func__, keg->uk_rsize));
+
+ if (keg->uk_flags & UMA_ZONE_OFFPAGE)
shsize = 0;
- } else if (keg->uk_flags & UMA_ZONE_REFCNT) {
- rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
- shsize = sizeof(struct uma_slab_refcnt);
- } else {
- rsize += UMA_FRITM_SZ; /* Account for linkage */
+ else
shsize = sizeof(struct uma_slab);
- }
- keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
- KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
+ keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
+ KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
+ ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
+
memused = keg->uk_ipers * rsize + shsize;
- wastedspace = UMA_SLAB_SIZE - memused;
+ wastedspace = keg->uk_slabsize - memused;
/*
* We can't do OFFPAGE if we're internal or if we've been
* asked to not go to the VM for buckets. If we do this we
- * may end up going to the VM (kmem_map) for slabs which we
- * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
- * result of UMA_ZONE_VM, which clearly forbids it.
+ * may end up going to the VM for slabs which we do not
+ * want to do if we're UMA_ZFLAG_CACHEONLY as a result
+ * of UMA_ZONE_VM, which clearly forbids it.
*/
if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
return;
- if ((wastedspace >= UMA_MAX_WASTE) &&
- (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
- keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
- KASSERT(keg->uk_ipers <= 255,
- ("keg_small_init: keg->uk_ipers too high!"));
+ /*
+ * See if using an OFFPAGE slab will limit our waste. Only do
+ * this if it permits more items per-slab.
+ *
+ * XXX We could try growing slabsize to limit max waste as well.
+ * Historically this was not done because the VM could not
+ * efficiently handle contiguous allocations.
+ */
+ if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
+ (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
+ keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
+ KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
+ ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
#ifdef UMA_DEBUG
printf("UMA decided we need offpage slab headers for "
"keg: %s, calculated wastedspace = %d, "
"maximum wasted space allowed = %d, "
"calculated ipers = %d, "
"new wasted space = %d\n", keg->uk_name, wastedspace,
- UMA_MAX_WASTE, keg->uk_ipers,
- UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
+ keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
+ keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
#endif
keg->uk_flags |= UMA_ZONE_OFFPAGE;
- if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
- keg->uk_flags |= UMA_ZONE_HASH;
}
+
+ if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
+ (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
+ keg->uk_flags |= UMA_ZONE_HASH;
}
/*
@@ -1231,19 +1373,16 @@ keg_small_init(uma_keg_t keg)
static void
keg_large_init(uma_keg_t keg)
{
- int pages;
+ u_int shsize;
KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
+ KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+ ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
- pages = keg->uk_size / UMA_SLAB_SIZE;
-
- /* Account for remainder */
- if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
- pages++;
-
- keg->uk_ppera = pages;
+ keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
+ keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
keg->uk_ipers = 1;
keg->uk_rsize = keg->uk_size;
@@ -1251,8 +1390,19 @@ keg_large_init(uma_keg_t keg)
if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
return;
- keg->uk_flags |= UMA_ZONE_OFFPAGE;
- if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
+ /* Check whether we have enough space to not do OFFPAGE. */
+ if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
+ shsize = sizeof(struct uma_slab);
+ if (shsize & UMA_ALIGN_PTR)
+ shsize = (shsize & ~UMA_ALIGN_PTR) +
+ (UMA_ALIGN_PTR + 1);
+
+ if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
+ keg->uk_flags |= UMA_ZONE_OFFPAGE;
+ }
+
+ if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
+ (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
keg->uk_flags |= UMA_ZONE_HASH;
}
@@ -1264,6 +1414,9 @@ keg_cachespread_init(uma_keg_t keg)
int pages;
int rsize;
+ KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+ ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
+
alignsize = keg->uk_align + 1;
rsize = keg->uk_size;
/*
@@ -1281,9 +1434,10 @@ keg_cachespread_init(uma_keg_t keg)
pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
keg->uk_rsize = rsize;
keg->uk_ppera = pages;
+ keg->uk_slabsize = UMA_SLAB_SIZE;
keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
- KASSERT(keg->uk_ipers <= uma_max_ipers,
+ KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
keg->uk_ipers));
}
@@ -1308,11 +1462,11 @@ keg_ctor(void *mem, int size, void *udata, int flags)
keg->uk_fini = arg->fini;
keg->uk_align = arg->align;
keg->uk_free = 0;
+ keg->uk_reserve = 0;
keg->uk_pages = 0;
keg->uk_flags = arg->flags;
keg->uk_allocf = page_alloc;
keg->uk_freef = page_free;
- keg->uk_recurse = 0;
keg->uk_slabzone = NULL;
/*
@@ -1327,39 +1481,27 @@ keg_ctor(void *mem, int size, void *udata, int flags)
if (arg->flags & UMA_ZONE_ZINIT)
keg->uk_init = zero_init;
- if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
+ if (arg->flags & UMA_ZONE_MALLOC)
keg->uk_flags |= UMA_ZONE_VTOSLAB;
- /*
- * The +UMA_FRITM_SZ added to uk_size is to account for the
- * linkage that is added to the size in keg_small_init(). If
- * we don't account for this here then we may end up in
- * keg_small_init() with a calculated 'ipers' of 0.
- */
- if (keg->uk_flags & UMA_ZONE_REFCNT) {
- if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
- keg_cachespread_init(keg);
- else if ((keg->uk_size+UMA_FRITMREF_SZ) >
- (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
- keg_large_init(keg);
- else
- keg_small_init(keg);
+ if (arg->flags & UMA_ZONE_PCPU)
+#ifdef SMP
+ keg->uk_flags |= UMA_ZONE_OFFPAGE;
+#else
+ keg->uk_flags &= ~UMA_ZONE_PCPU;
+#endif
+
+ if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
+ keg_cachespread_init(keg);
} else {
- if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
- keg_cachespread_init(keg);
- else if ((keg->uk_size+UMA_FRITM_SZ) >
- (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
+ if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
keg_large_init(keg);
else
keg_small_init(keg);
}
- if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
- if (keg->uk_flags & UMA_ZONE_REFCNT)
- keg->uk_slabzone = slabrefzone;
- else
- keg->uk_slabzone = slabzone;
- }
+ if (keg->uk_flags & UMA_ZONE_OFFPAGE)
+ keg->uk_slabzone = slabzone;
/*
* If we haven't booted yet we need allocations to go through the
@@ -1389,12 +1531,9 @@ keg_ctor(void *mem, int size, void *udata, int flags)
#endif /* __rtems__ */
/*
- * Initialize keg's lock (shared among zones).
+ * Initialize keg's lock
*/
- if (arg->flags & UMA_ZONE_MTXCLASS)
- KEG_LOCK_INIT(keg, 1);
- else
- KEG_LOCK_INIT(keg, 0);
+ KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
/*
* If we're putting the slab header in the actual page we need to
@@ -1405,24 +1544,12 @@ keg_ctor(void *mem, int size, void *udata, int flags)
u_int totsize;
/* Size of the slab struct and free list */
- if (keg->uk_flags & UMA_ZONE_REFCNT)
- totsize = sizeof(struct uma_slab_refcnt) +
- keg->uk_ipers * UMA_FRITMREF_SZ;
- else
- totsize = sizeof(struct uma_slab) +
- keg->uk_ipers * UMA_FRITM_SZ;
+ totsize = sizeof(struct uma_slab);
if (totsize & UMA_ALIGN_PTR)
totsize = (totsize & ~UMA_ALIGN_PTR) +
(UMA_ALIGN_PTR + 1);
- keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
-
- if (keg->uk_flags & UMA_ZONE_REFCNT)
- totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
- + keg->uk_ipers * UMA_FRITMREF_SZ;
- else
- totsize = keg->uk_pgoff + sizeof(struct uma_slab)
- + keg->uk_ipers * UMA_FRITM_SZ;
+ keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
/*
* The only way the following is possible is if with our
@@ -1431,7 +1558,8 @@ keg_ctor(void *mem, int size, void *udata, int flags)
* mathematically possible for all cases, so we make
* sure here anyway.
*/
- if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
+ totsize = keg->uk_pgoff + sizeof(struct uma_slab);
+ if (totsize > PAGE_SIZE * keg->uk_ppera) {
printf("zone %s ipers %d rsize %d size %d\n",
zone->uz_name, keg->uk_ipers, keg->uk_rsize,
keg->uk_size);
@@ -1451,9 +1579,9 @@ keg_ctor(void *mem, int size, void *udata, int flags)
LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
- mtx_lock(&uma_mtx);
+ rw_wlock(&uma_rwlock);
LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
- mtx_unlock(&uma_mtx);
+ rw_wunlock(&uma_rwlock);
return (0);
}
@@ -1482,17 +1610,47 @@ zone_ctor(void *mem, int size, void *udata, int flags)
zone->uz_frees = 0;
zone->uz_fails = 0;
zone->uz_sleeps = 0;
- zone->uz_fills = zone->uz_count = 0;
+ zone->uz_count = 0;
+ zone->uz_count_min = 0;
zone->uz_flags = 0;
+ zone->uz_warning = NULL;
+ timevalclear(&zone->uz_ratecheck);
keg = arg->keg;
+ ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
+
+ /*
+ * This is a pure cache zone, no kegs.
+ */
+ if (arg->import) {
+ if (arg->flags & UMA_ZONE_VM)
+ arg->flags |= UMA_ZFLAG_CACHEONLY;
+ zone->uz_flags = arg->flags;
+ zone->uz_size = arg->size;
+ zone->uz_import = arg->import;
+ zone->uz_release = arg->release;
+ zone->uz_arg = arg->arg;
+ zone->uz_lockptr = &zone->uz_lock;
+ rw_wlock(&uma_rwlock);
+ LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
+ rw_wunlock(&uma_rwlock);
+ goto out;
+ }
+
+ /*
+ * Use the regular zone/keg/slab allocator.
+ */
+ zone->uz_import = (uma_import)zone_import;
+ zone->uz_release = (uma_release)zone_release;
+ zone->uz_arg = zone;
+
if (arg->flags & UMA_ZONE_SECONDARY) {
KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
zone->uz_init = arg->uminit;
zone->uz_fini = arg->fini;
- zone->uz_lock = &keg->uk_lock;
+ zone->uz_lockptr = &keg->uk_lock;
zone->uz_flags |= UMA_ZONE_SECONDARY;
- mtx_lock(&uma_mtx);
+ rw_wlock(&uma_rwlock);
ZONE_LOCK(zone);
LIST_FOREACH(z, &keg->uk_zones, uz_link) {
if (LIST_NEXT(z, uz_link) == NULL) {
@@ -1501,7 +1659,7 @@ zone_ctor(void *mem, int size, void *udata, int flags)
}
}
ZONE_UNLOCK(zone);
- mtx_unlock(&uma_mtx);
+ rw_wunlock(&uma_rwlock);
} else if (keg == NULL) {
if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
arg->align, arg->flags)) == NULL)
@@ -1522,12 +1680,13 @@ zone_ctor(void *mem, int size, void *udata, int flags)
if (error)
return (error);
}
+
/*
* Link in the first keg.
*/
zone->uz_klink.kl_keg = keg;
LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
- zone->uz_lock = &keg->uk_lock;
+ zone->uz_lockptr = &keg->uk_lock;
zone->uz_size = keg->uk_size;
zone->uz_flags |= (keg->uk_flags &
(UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
@@ -1542,12 +1701,13 @@ zone_ctor(void *mem, int size, void *udata, int flags)
return (0);
}
- if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
- zone->uz_count = BUCKET_MAX;
- else if (keg->uk_ipers <= BUCKET_MAX)
- zone->uz_count = keg->uk_ipers;
+out:
+ if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
+ zone->uz_count = bucket_select(zone->uz_size);
else
zone->uz_count = BUCKET_MAX;
+ zone->uz_count_min = zone->uz_count;
+
return (0);
}
@@ -1597,9 +1757,9 @@ zone_dtor(void *arg, int size, void *udata)
if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
cache_drain(zone);
- mtx_lock(&uma_mtx);
+ rw_wlock(&uma_rwlock);
LIST_REMOVE(zone, uz_link);
- mtx_unlock(&uma_mtx);
+ rw_wunlock(&uma_rwlock);
/*
* XXX there are some races here where
* the zone can be drained but zone lock
@@ -1620,13 +1780,13 @@ zone_dtor(void *arg, int size, void *udata)
/*
* We only destroy kegs from non secondary zones.
*/
- if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
- mtx_lock(&uma_mtx);
+ if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
+ rw_wlock(&uma_rwlock);
LIST_REMOVE(keg, uk_link);
- mtx_unlock(&uma_mtx);
- zone_free_item(kegs, keg, NULL, SKIP_NONE,
- ZFREE_STATFREE);
+ rw_wunlock(&uma_rwlock);
+ zone_free_item(kegs, keg, NULL, SKIP_NONE);
}
+ ZONE_LOCK_FINI(zone);
}
/*
@@ -1645,12 +1805,12 @@ zone_foreach(void (*zfunc)(uma_zone_t))
uma_keg_t keg;
uma_zone_t zone;
- mtx_lock(&uma_mtx);
+ rw_rlock(&uma_rwlock);
LIST_FOREACH(keg, &uma_kegs, uk_link) {
LIST_FOREACH(zone, &keg->uk_zones, uz_link)
zfunc(zone);
}
- mtx_unlock(&uma_mtx);
+ rw_runlock(&uma_rwlock);
}
/* Public functions */
@@ -1661,90 +1821,16 @@ uma_startup(void *bootmem, int boot_pages)
struct uma_zctor_args args;
#ifndef __rtems__
uma_slab_t slab;
-#endif /* __rtems__ */
- u_int slabsize;
- u_int objsize, totsize, wsize;
-#ifndef __rtems__
int i;
#endif /* __rtems__ */
#ifdef UMA_DEBUG
printf("Creating uma keg headers zone and keg.\n");
#endif
- mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
-
- /*
- * Figure out the maximum number of items-per-slab we'll have if
- * we're using the OFFPAGE slab header to track free items, given
- * all possible object sizes and the maximum desired wastage
- * (UMA_MAX_WASTE).
- *
- * We iterate until we find an object size for
- * which the calculated wastage in keg_small_init() will be
- * enough to warrant OFFPAGE. Since wastedspace versus objsize
- * is an overall increasing see-saw function, we find the smallest
- * objsize such that the wastage is always acceptable for objects
- * with that objsize or smaller. Since a smaller objsize always
- * generates a larger possible uma_max_ipers, we use this computed
- * objsize to calculate the largest ipers possible. Since the
- * ipers calculated for OFFPAGE slab headers is always larger than
- * the ipers initially calculated in keg_small_init(), we use
- * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
- * obtain the maximum ipers possible for offpage slab headers.
- *
- * It should be noted that ipers versus objsize is an inversly
- * proportional function which drops off rather quickly so as
- * long as our UMA_MAX_WASTE is such that the objsize we calculate
- * falls into the portion of the inverse relation AFTER the steep
- * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
- *
- * Note that we have 8-bits (1 byte) to use as a freelist index
- * inside the actual slab header itself and this is enough to
- * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
- * object with offpage slab header would have ipers =
- * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
- * 1 greater than what our byte-integer freelist index can
- * accomodate, but we know that this situation never occurs as
- * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
- * that we need to go to offpage slab headers. Or, if we do,
- * then we trap that condition below and panic in the INVARIANTS case.
- */
- wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
- totsize = wsize;
- objsize = UMA_SMALLEST_UNIT;
- while (totsize >= wsize) {
- totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
- (objsize + UMA_FRITM_SZ);
- totsize *= (UMA_FRITM_SZ + objsize);
- objsize++;
- }
- if (objsize > UMA_SMALLEST_UNIT)
- objsize--;
- uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
-
- wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
- totsize = wsize;
- objsize = UMA_SMALLEST_UNIT;
- while (totsize >= wsize) {
- totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
- (objsize + UMA_FRITMREF_SZ);
- totsize *= (UMA_FRITMREF_SZ + objsize);
- objsize++;
- }
- if (objsize > UMA_SMALLEST_UNIT)
- objsize--;
- uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
-
- KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
- ("uma_startup: calculated uma_max_ipers values too large!"));
-
-#ifdef UMA_DEBUG
- printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
- printf("Calculated uma_max_ipers_ref (for OFFPAGE) is %d\n",
- uma_max_ipers_ref);
-#endif
+ rw_init(&uma_rwlock, "UMA lock");
/* "manually" create the initial zone */
+ memset(&args, 0, sizeof(args));
args.name = "UMA Kegs";
args.size = sizeof(struct uma_keg);
args.ctor = keg_ctor;
@@ -1762,8 +1848,8 @@ uma_startup(void *bootmem, int boot_pages)
printf("Filling boot free list.\n");
#endif
for (i = 0; i < boot_pages; i++) {
- slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
- slab->us_data = (u_int8_t *)slab;
+ slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
+ slab->us_data = (uint8_t *)slab;
slab->us_flags = UMA_SLAB_BOOT;
LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
}
@@ -1787,37 +1873,15 @@ uma_startup(void *bootmem, int boot_pages)
zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
#ifdef UMA_DEBUG
- printf("Initializing pcpu cache locks.\n");
-#endif
-#ifdef UMA_DEBUG
printf("Creating slab and hash zones.\n");
#endif
- /*
- * This is the max number of free list items we'll have with
- * offpage slabs.
- */
- slabsize = uma_max_ipers * UMA_FRITM_SZ;
- slabsize += sizeof(struct uma_slab);
-
/* Now make a zone for slab headers */
slabzone = uma_zcreate("UMA Slabs",
- slabsize,
+ sizeof(struct uma_slab),
NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
- /*
- * We also create a zone for the bigger slabs with reference
- * counts in them, to accomodate UMA_ZONE_REFCNT zones.
- */
- slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
- slabsize += sizeof(struct uma_slab_refcnt);
- slabrefzone = uma_zcreate("UMA RCntSlabs",
- slabsize,
- NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR,
- UMA_ZFLAG_INTERNAL);
-
hashzone = uma_zcreate("UMA Hash",
sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
NULL, NULL, NULL, NULL,
@@ -1839,6 +1903,7 @@ rtems_bsd_uma_startup(void *unused)
{
(void) unused;
+ sx_init(&uma_drain_lock, "umadrain");
uma_startup(NULL, 0);
}
@@ -1853,6 +1918,7 @@ uma_startup2(void)
{
booted = UMA_STARTUP2;
bucket_enable();
+ sx_init(&uma_drain_lock, "umadrain");
#ifdef UMA_DEBUG
printf("UMA startup2 complete.\n");
#endif
@@ -1870,7 +1936,7 @@ uma_startup3(void)
#ifdef UMA_DEBUG
printf("Starting callout.\n");
#endif
- callout_init(&uma_callout, CALLOUT_MPSAFE);
+ callout_init(&uma_callout, 1);
callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
#ifdef UMA_DEBUG
printf("UMA startup3 complete.\n");
@@ -1879,7 +1945,7 @@ uma_startup3(void)
static uma_keg_t
uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
- int align, u_int32_t flags)
+ int align, uint32_t flags)
{
struct uma_kctor_args args;
@@ -1904,23 +1970,57 @@ uma_set_align(int align)
/* See uma.h */
uma_zone_t
uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
- uma_init uminit, uma_fini fini, int align, u_int32_t flags)
+ uma_init uminit, uma_fini fini, int align, uint32_t flags)
{
struct uma_zctor_args args;
+ uma_zone_t res;
+#ifndef __rtems__
+ bool locked;
+#endif /* __rtems__ */
/* This stuff is essential for the zone ctor */
+ memset(&args, 0, sizeof(args));
args.name = name;
args.size = size;
args.ctor = ctor;
args.dtor = dtor;
args.uminit = uminit;
args.fini = fini;
+#ifdef INVARIANTS
+ /*
+ * If a zone is being created with an empty constructor and
+ * destructor, pass UMA constructor/destructor which checks for
+ * memory use after free.
+ */
+ if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
+ ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
+ args.ctor = trash_ctor;
+ args.dtor = trash_dtor;
+ args.uminit = trash_init;
+ args.fini = trash_fini;
+ }
+#endif
args.align = align;
args.flags = flags;
args.keg = NULL;
- return (zone_alloc_item(zones, &args, M_WAITOK));
+#ifndef __rtems__
+ if (booted < UMA_STARTUP2) {
+ locked = false;
+ } else {
+#endif /* __rtems__ */
+ sx_slock(&uma_drain_lock);
+#ifndef __rtems__
+ locked = true;
+ }
+#endif /* __rtems__ */
+ res = zone_alloc_item(zones, &args, M_WAITOK);
+#ifndef __rtems__
+ if (locked)
+#endif /* __rtems__ */
+ sx_sunlock(&uma_drain_lock);
+ return (res);
}
/* See uma.h */
@@ -1930,8 +2030,13 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
{
struct uma_zctor_args args;
uma_keg_t keg;
+ uma_zone_t res;
+#ifndef __rtems__
+ bool locked;
+#endif /* __rtems__ */
keg = zone_first_keg(master);
+ memset(&args, 0, sizeof(args));
args.name = name;
args.size = keg->uk_size;
args.ctor = ctor;
@@ -1942,7 +2047,46 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
args.keg = keg;
+#ifndef __rtems__
+ if (booted < UMA_STARTUP2) {
+ locked = false;
+ } else {
+#endif /* __rtems__ */
+ sx_slock(&uma_drain_lock);
+#ifndef __rtems__
+ locked = true;
+ }
+#endif /* __rtems__ */
/* XXX Attaches only one keg of potentially many. */
+ res = zone_alloc_item(zones, &args, M_WAITOK);
+#ifndef __rtems__
+ if (locked)
+#endif /* __rtems__ */
+ sx_sunlock(&uma_drain_lock);
+ return (res);
+}
+
+/* See uma.h */
+uma_zone_t
+uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
+ uma_init zinit, uma_fini zfini, uma_import zimport,
+ uma_release zrelease, void *arg, int flags)
+{
+ struct uma_zctor_args args;
+
+ memset(&args, 0, sizeof(args));
+ args.name = name;
+ args.size = size;
+ args.ctor = ctor;
+ args.dtor = dtor;
+ args.uminit = zinit;
+ args.fini = zfini;
+ args.import = zimport;
+ args.release = zrelease;
+ args.arg = arg;
+ args.align = 0;
+ args.flags = flags;
+
return (zone_alloc_item(zones, &args, M_WAITOK));
}
@@ -1952,10 +2096,10 @@ zone_lock_pair(uma_zone_t a, uma_zone_t b)
{
if (a < b) {
ZONE_LOCK(a);
- mtx_lock_flags(b->uz_lock, MTX_DUPOK);
+ mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
} else {
ZONE_LOCK(b);
- mtx_lock_flags(a->uz_lock, MTX_DUPOK);
+ mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
}
}
@@ -1994,14 +2138,7 @@ uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
error = EINVAL;
goto out;
}
- /*
- * Both must either be refcnt, or not be refcnt.
- */
- if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
- (master->uz_flags & UMA_ZONE_REFCNT)) {
- error = EINVAL;
- goto out;
- }
+
/*
* The underlying object must be the same size. rsize
* may be different.
@@ -2039,7 +2176,9 @@ void
uma_zdestroy(uma_zone_t zone)
{
- zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
+ sx_slock(&uma_drain_lock);
+ zone_free_item(zones, zone, NULL, SKIP_NONE);
+ sx_sunlock(&uma_drain_lock);
}
/* See uma.h */
@@ -2049,8 +2188,12 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
void *item;
uma_cache_t cache;
uma_bucket_t bucket;
+ int lockfail;
int cpu;
+ /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
+ random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
+
/* This is the fast path allocation */
#ifdef UMA_DEBUG_ALLOC_1
printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
@@ -2062,7 +2205,27 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
}
-
+ KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
+ ("uma_zalloc_arg: called with spinlock or critical section held"));
+
+#ifdef DEBUG_MEMGUARD
+ if (memguard_cmp_zone(zone)) {
+ item = memguard_alloc(zone->uz_size, flags);
+ if (item != NULL) {
+ if (zone->uz_init != NULL &&
+ zone->uz_init(item, zone->uz_size, flags) != 0)
+ return (NULL);
+ if (zone->uz_ctor != NULL &&
+ zone->uz_ctor(item, zone->uz_size, udata,
+ flags) != 0) {
+ zone->uz_fini(item, zone->uz_size);
+ return (NULL);
+ }
+ return (item);
+ }
+ /* This is unfortunate but should not be fatal. */
+ }
+#endif
/*
* If possible, allocate from the per-CPU cache. There are two
* requirements for safe access to the per-CPU cache: (1) the thread
@@ -2074,60 +2237,62 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
* the current cache; when we re-acquire the critical section, we
* must detect and handle migration if it has occurred.
*/
-zalloc_restart:
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
zalloc_start:
bucket = cache->uc_allocbucket;
-
- if (bucket) {
- if (bucket->ub_cnt > 0) {
- bucket->ub_cnt--;
- item = bucket->ub_bucket[bucket->ub_cnt];
+ if (bucket != NULL && bucket->ub_cnt > 0) {
+ bucket->ub_cnt--;
+ item = bucket->ub_bucket[bucket->ub_cnt];
#ifdef INVARIANTS
- bucket->ub_bucket[bucket->ub_cnt] = NULL;
+ bucket->ub_bucket[bucket->ub_cnt] = NULL;
#endif
- KASSERT(item != NULL,
- ("uma_zalloc: Bucket pointer mangled."));
- cache->uc_allocs++;
- critical_exit();
+ KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
+ cache->uc_allocs++;
+ critical_exit();
+ if (zone->uz_ctor != NULL &&
+ zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
+ atomic_add_long(&zone->uz_fails, 1);
+ zone_free_item(zone, item, udata, SKIP_DTOR);
+ return (NULL);
+ }
#ifdef INVARIANTS
- ZONE_LOCK(zone);
- uma_dbg_alloc(zone, NULL, item);
- ZONE_UNLOCK(zone);
+ uma_dbg_alloc(zone, NULL, item);
#endif
- if (zone->uz_ctor != NULL) {
- if (zone->uz_ctor(item, zone->uz_size,
- udata, flags) != 0) {
- zone_free_item(zone, item, udata,
- SKIP_DTOR, ZFREE_STATFAIL |
- ZFREE_STATFREE);
- return (NULL);
- }
- }
- if (flags & M_ZERO)
- bzero(item, zone->uz_size);
- return (item);
- } else if (cache->uc_freebucket) {
- /*
- * We have run out of items in our allocbucket.
- * See if we can switch with our free bucket.
- */
- if (cache->uc_freebucket->ub_cnt > 0) {
+ if (flags & M_ZERO)
+ uma_zero_item(item, zone);
+ return (item);
+ }
+
+ /*
+ * We have run out of items in our alloc bucket.
+ * See if we can switch with our free bucket.
+ */
+ bucket = cache->uc_freebucket;
+ if (bucket != NULL && bucket->ub_cnt > 0) {
#ifdef UMA_DEBUG_ALLOC
- printf("uma_zalloc: Swapping empty with"
- " alloc.\n");
+ printf("uma_zalloc: Swapping empty with alloc.\n");
#endif
- bucket = cache->uc_freebucket;
- cache->uc_freebucket = cache->uc_allocbucket;
- cache->uc_allocbucket = bucket;
-
- goto zalloc_start;
- }
- }
+ cache->uc_freebucket = cache->uc_allocbucket;
+ cache->uc_allocbucket = bucket;
+ goto zalloc_start;
}
+
+ /*
+ * Discard any empty allocation bucket while we hold no locks.
+ */
+ bucket = cache->uc_allocbucket;
+ cache->uc_allocbucket = NULL;
+ critical_exit();
+ if (bucket != NULL)
+ bucket_free(zone, bucket, udata);
+
+ /* Short-circuit for zones without buckets and low memory. */
+ if (zone->uz_count == 0 || bucketdisable)
+ goto zalloc_item;
+
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
* we must go back to the zone. This requires the zone lock, so we
@@ -2137,41 +2302,34 @@ zalloc_start:
* thread-local state specific to the cache from prior to releasing
* the critical section.
*/
- critical_exit();
- ZONE_LOCK(zone);
+ lockfail = 0;
+ if (ZONE_TRYLOCK(zone) == 0) {
+ /* Record contention to size the buckets. */
+ ZONE_LOCK(zone);
+ lockfail = 1;
+ }
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
- bucket = cache->uc_allocbucket;
- if (bucket != NULL) {
- if (bucket->ub_cnt > 0) {
- ZONE_UNLOCK(zone);
- goto zalloc_start;
- }
- bucket = cache->uc_freebucket;
- if (bucket != NULL && bucket->ub_cnt > 0) {
- ZONE_UNLOCK(zone);
- goto zalloc_start;
- }
- }
- /* Since we have locked the zone we may as well send back our stats */
- zone->uz_allocs += cache->uc_allocs;
+ /*
+ * Since we have locked the zone we may as well send back our stats.
+ */
+ atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
+ atomic_add_long(&zone->uz_frees, cache->uc_frees);
cache->uc_allocs = 0;
- zone->uz_frees += cache->uc_frees;
cache->uc_frees = 0;
- /* Our old one is now a free bucket */
- if (cache->uc_allocbucket) {
- KASSERT(cache->uc_allocbucket->ub_cnt == 0,
- ("uma_zalloc_arg: Freeing a non free bucket."));
- LIST_INSERT_HEAD(&zone->uz_free_bucket,
- cache->uc_allocbucket, ub_link);
- cache->uc_allocbucket = NULL;
+ /* See if we lost the race to fill the cache. */
+ if (cache->uc_allocbucket != NULL) {
+ ZONE_UNLOCK(zone);
+ goto zalloc_start;
}
- /* Check the free list for a new alloc bucket */
- if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
+ /*
+ * Check the zone's cache of buckets.
+ */
+ if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
KASSERT(bucket->ub_cnt != 0,
("uma_zalloc_arg: Returning an empty bucket."));
@@ -2183,19 +2341,38 @@ zalloc_start:
/* We are no longer associated with this CPU. */
critical_exit();
- /* Bump up our uz_count so we get here less */
- if (zone->uz_count < BUCKET_MAX)
+ /*
+ * We bump the uz count when the cache size is insufficient to
+ * handle the working set.
+ */
+ if (lockfail && zone->uz_count < BUCKET_MAX)
zone->uz_count++;
+ ZONE_UNLOCK(zone);
/*
* Now lets just fill a bucket and put it on the free list. If that
- * works we'll restart the allocation from the begining.
+ * works we'll restart the allocation from the beginning and it
+ * will use the just filled bucket.
*/
- if (zone_alloc_bucket(zone, flags)) {
+ bucket = zone_alloc_bucket(zone, udata, flags);
+ if (bucket != NULL) {
+ ZONE_LOCK(zone);
+ critical_enter();
+ cpu = curcpu;
+ cache = &zone->uz_cpu[cpu];
+ /*
+ * See if we lost the race or were migrated. Cache the
+ * initialized bucket to make this less likely or claim
+ * the memory directly.
+ */
+ if (cache->uc_allocbucket == NULL)
+ cache->uc_allocbucket = bucket;
+ else
+ LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
ZONE_UNLOCK(zone);
- goto zalloc_restart;
+ goto zalloc_start;
}
- ZONE_UNLOCK(zone);
+
/*
* We may not be able to get a bucket so return an actual item.
*/
@@ -2203,7 +2380,9 @@ zalloc_start:
printf("uma_zalloc_arg: Bucketzone returned NULL\n");
#endif
+zalloc_item:
item = zone_alloc_item(zone, udata, flags);
+
return (item);
}
@@ -2211,9 +2390,13 @@ static uma_slab_t
keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
{
uma_slab_t slab;
+ int reserve;
mtx_assert(&keg->uk_lock, MA_OWNED);
slab = NULL;
+ reserve = 0;
+ if ((flags & M_USE_RESERVE) == 0)
+ reserve = keg->uk_reserve;
for (;;) {
/*
@@ -2221,7 +2404,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
* used over those that are totally full. This helps to reduce
* fragmentation.
*/
- if (keg->uk_free != 0) {
+ if (keg->uk_free > reserve) {
if (!LIST_EMPTY(&keg->uk_part_slab)) {
slab = LIST_FIRST(&keg->uk_part_slab);
} else {
@@ -2246,17 +2429,18 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
* If this is not a multi-zone, set the FULL bit.
* Otherwise slab_multi() takes care of it.
*/
- if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
+ if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
zone->uz_flags |= UMA_ZFLAG_FULL;
+ zone_log_warning(zone);
+ zone_maxaction(zone);
+ }
if (flags & M_NOWAIT)
break;
zone->uz_sleeps++;
msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
continue;
}
- keg->uk_recurse++;
slab = keg_alloc_slab(keg, zone, flags);
- keg->uk_recurse--;
/*
* If we got a slab here it's safe to mark it partially used
* and return. We assume that the caller is going to remove
@@ -2277,42 +2461,15 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
return (slab);
}
-static inline void
-zone_relock(uma_zone_t zone, uma_keg_t keg)
-{
- if (zone->uz_lock != &keg->uk_lock) {
- KEG_UNLOCK(keg);
- ZONE_LOCK(zone);
- }
-}
-
-static inline void
-keg_relock(uma_keg_t keg, uma_zone_t zone)
-{
- if (zone->uz_lock != &keg->uk_lock) {
- ZONE_UNLOCK(zone);
- KEG_LOCK(keg);
- }
-}
-
static uma_slab_t
zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
{
uma_slab_t slab;
- if (keg == NULL)
+ if (keg == NULL) {
keg = zone_first_keg(zone);
- /*
- * This is to prevent us from recursively trying to allocate
- * buckets. The problem is that if an allocation forces us to
- * grab a new bucket we will call page_alloc, which will go off
- * and cause the vm to allocate vm_map_entries. If we need new
- * buckets there too we will recurse in kmem_alloc and bad
- * things happen. So instead we return a NULL bucket, and make
- * the code that allocates buckets smart enough to deal with it
- */
- if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
- return (NULL);
+ KEG_LOCK(keg);
+ }
for (;;) {
slab = keg_fetch_slab(keg, zone, flags);
@@ -2321,14 +2478,14 @@ zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
if (flags & (M_NOWAIT | M_NOVM))
break;
}
+ KEG_UNLOCK(keg);
return (NULL);
}
#ifndef __rtems__
/*
* uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
- * with the keg locked. Caller must call zone_relock() afterwards if the
- * zone lock is required. On NULL the zone lock is held.
+ * with the keg locked. On NULL no lock is held.
*
* The last pointer is used to seed the search. It is not required.
*/
@@ -2352,12 +2509,11 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
* Use the last slab allocated as a hint for where to start
* the search.
*/
- if (last) {
+ if (last != NULL) {
slab = keg_fetch_slab(last, zone, flags);
if (slab)
return (slab);
- zone_relock(zone, last);
- last = NULL;
+ KEG_UNLOCK(last);
}
/*
* Loop until we have a slab incase of transient failures
@@ -2373,7 +2529,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
*/
LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
keg = klink->kl_keg;
- keg_relock(keg, zone);
+ KEG_LOCK(keg);
if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
slab = keg_fetch_slab(keg, zone, flags);
if (slab)
@@ -2383,7 +2539,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
full++;
else
empty++;
- zone_relock(zone, keg);
+ KEG_UNLOCK(keg);
}
if (rflags & (M_NOWAIT | M_NOVM))
break;
@@ -2393,10 +2549,15 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
* and sleep so just sleep for a short period and retry.
*/
if (full && !empty) {
+ ZONE_LOCK(zone);
zone->uz_flags |= UMA_ZFLAG_FULL;
zone->uz_sleeps++;
- msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
+ zone_log_warning(zone);
+ zone_maxaction(zone);
+ msleep(zone, zone->uz_lockptr, PVM,
+ "zonelimit", hz/100);
zone->uz_flags &= ~UMA_ZFLAG_FULL;
+ ZONE_UNLOCK(zone);
continue;
}
}
@@ -2405,30 +2566,20 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
#endif /* __rtems__ */
static void *
-slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
+slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
{
- uma_keg_t keg;
- uma_slabrefcnt_t slabref;
void *item;
- u_int8_t freei;
+ uint8_t freei;
- keg = slab->us_keg;
+ MPASS(keg == slab->us_keg);
mtx_assert(&keg->uk_lock, MA_OWNED);
- freei = slab->us_firstfree;
- if (keg->uk_flags & UMA_ZONE_REFCNT) {
- slabref = (uma_slabrefcnt_t)slab;
- slab->us_firstfree = slabref->us_freelist[freei].us_item;
- } else {
- slab->us_firstfree = slab->us_freelist[freei].us_item;
- }
+ freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
+ BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
item = slab->us_data + (keg->uk_rsize * freei);
-
slab->us_freecount--;
keg->uk_free--;
-#ifdef INVARIANTS
- uma_dbg_alloc(zone, slab, item);
-#endif
+
/* Move this slab to the full list */
if (slab->us_freecount == 0) {
LIST_REMOVE(slab, us_link);
@@ -2439,117 +2590,85 @@ slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
}
static int
-zone_alloc_bucket(uma_zone_t zone, int flags)
+zone_import(uma_zone_t zone, void **bucket, int max, int flags)
{
- uma_bucket_t bucket;
uma_slab_t slab;
uma_keg_t keg;
- int16_t saved;
- int max, origflags = flags;
-
- /*
- * Try this zone's free list first so we don't allocate extra buckets.
- */
- if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
- KASSERT(bucket->ub_cnt == 0,
- ("zone_alloc_bucket: Bucket on free list is not empty."));
- LIST_REMOVE(bucket, ub_link);
- } else {
- int bflags;
-
- bflags = (flags & ~M_ZERO);
- if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
- bflags |= M_NOVM;
-
- ZONE_UNLOCK(zone);
- bucket = bucket_alloc(zone->uz_count, bflags);
- ZONE_LOCK(zone);
- }
-
- if (bucket == NULL) {
- return (0);
- }
-
-#ifdef SMP
- /*
- * This code is here to limit the number of simultaneous bucket fills
- * for any given zone to the number of per cpu caches in this zone. This
- * is done so that we don't allocate more memory than we really need.
- */
- if (zone->uz_fills >= mp_ncpus)
- goto done;
-
-#endif
- zone->uz_fills++;
+ int i;
- max = MIN(bucket->ub_entries, zone->uz_count);
- /* Try to keep the buckets totally full */
- saved = bucket->ub_cnt;
slab = NULL;
keg = NULL;
- while (bucket->ub_cnt < max &&
- (slab = zone->uz_slab(zone, keg, flags)) != NULL) {
+ /* Try to keep the buckets totally full */
+ for (i = 0; i < max; ) {
+ if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
+ break;
keg = slab->us_keg;
- while (slab->us_freecount && bucket->ub_cnt < max) {
- bucket->ub_bucket[bucket->ub_cnt++] =
- slab_alloc_item(zone, slab);
+ while (slab->us_freecount && i < max) {
+ bucket[i++] = slab_alloc_item(keg, slab);
+ if (keg->uk_free <= keg->uk_reserve)
+ break;
}
-
- /* Don't block on the next fill */
+ /* Don't grab more than one slab at a time. */
+ flags &= ~M_WAITOK;
flags |= M_NOWAIT;
}
- if (slab)
- zone_relock(zone, keg);
+ if (slab != NULL)
+ KEG_UNLOCK(keg);
+
+ return i;
+}
+
+static uma_bucket_t
+zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
+{
+ uma_bucket_t bucket;
+ int max;
+
+ /* Don't wait for buckets, preserve caller's NOVM setting. */
+ bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
+ if (bucket == NULL)
+ return (NULL);
+
+ max = MIN(bucket->ub_entries, zone->uz_count);
+ bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
+ max, flags);
/*
- * We unlock here because we need to call the zone's init.
- * It should be safe to unlock because the slab dealt with
- * above is already on the appropriate list within the keg
- * and the bucket we filled is not yet on any list, so we
- * own it.
+ * Initialize the memory if necessary.
*/
- if (zone->uz_init != NULL) {
+ if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
int i;
- ZONE_UNLOCK(zone);
- for (i = saved; i < bucket->ub_cnt; i++)
+ for (i = 0; i < bucket->ub_cnt; i++)
if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
- origflags) != 0)
+ flags) != 0)
break;
/*
* If we couldn't initialize the whole bucket, put the
* rest back onto the freelist.
*/
if (i != bucket->ub_cnt) {
- int j;
-
- for (j = i; j < bucket->ub_cnt; j++) {
- zone_free_item(zone, bucket->ub_bucket[j],
- NULL, SKIP_FINI, 0);
+ zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
+ bucket->ub_cnt - i);
#ifdef INVARIANTS
- bucket->ub_bucket[j] = NULL;
+ bzero(&bucket->ub_bucket[i],
+ sizeof(void *) * (bucket->ub_cnt - i));
#endif
- }
bucket->ub_cnt = i;
}
- ZONE_LOCK(zone);
}
- zone->uz_fills--;
- if (bucket->ub_cnt != 0) {
- LIST_INSERT_HEAD(&zone->uz_full_bucket,
- bucket, ub_link);
- return (1);
+ if (bucket->ub_cnt == 0) {
+ bucket_free(zone, bucket, udata);
+ atomic_add_long(&zone->uz_fails, 1);
+ return (NULL);
}
-#ifdef SMP
-done:
-#endif
- bucket_free(bucket);
- return (0);
+ return (bucket);
}
+
/*
- * Allocates an item for an internal zone
+ * Allocates a single item from a zone.
*
* Arguments
* zone The zone to alloc for.
@@ -2564,7 +2683,6 @@ done:
static void *
zone_alloc_item(uma_zone_t zone, void *udata, int flags)
{
- uma_slab_t slab;
void *item;
item = NULL;
@@ -2572,20 +2690,9 @@ zone_alloc_item(uma_zone_t zone, void *udata, int flags)
#ifdef UMA_DEBUG_ALLOC
printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
#endif
- ZONE_LOCK(zone);
-
- slab = zone->uz_slab(zone, NULL, flags);
- if (slab == NULL) {
- zone->uz_fails++;
- ZONE_UNLOCK(zone);
- return (NULL);
- }
-
- item = slab_alloc_item(zone, slab);
-
- zone_relock(zone, slab->us_keg);
- zone->uz_allocs++;
- ZONE_UNLOCK(zone);
+ if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
+ goto fail;
+ atomic_add_long(&zone->uz_allocs, 1);
/*
* We have to call both the zone's init (not the keg's init)
@@ -2595,22 +2702,27 @@ zone_alloc_item(uma_zone_t zone, void *udata, int flags)
*/
if (zone->uz_init != NULL) {
if (zone->uz_init(item, zone->uz_size, flags) != 0) {
- zone_free_item(zone, item, udata, SKIP_FINI,
- ZFREE_STATFAIL | ZFREE_STATFREE);
- return (NULL);
+ zone_free_item(zone, item, udata, SKIP_FINI);
+ goto fail;
}
}
if (zone->uz_ctor != NULL) {
if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
- zone_free_item(zone, item, udata, SKIP_DTOR,
- ZFREE_STATFAIL | ZFREE_STATFREE);
- return (NULL);
+ zone_free_item(zone, item, udata, SKIP_DTOR);
+ goto fail;
}
}
+#ifdef INVARIANTS
+ uma_dbg_alloc(zone, NULL, item);
+#endif
if (flags & M_ZERO)
- bzero(item, zone->uz_size);
+ uma_zero_item(item, zone);
return (item);
+
+fail:
+ atomic_add_long(&zone->uz_fails, 1);
+ return (NULL);
}
/* See uma.h */
@@ -2619,36 +2731,49 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
{
uma_cache_t cache;
uma_bucket_t bucket;
- int bflags;
+ int lockfail;
int cpu;
+ /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
+ random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
+
#ifdef UMA_DEBUG_ALLOC_1
printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
#endif
CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
zone->uz_name);
+ KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
+ ("uma_zfree_arg: called with spinlock or critical section held"));
+
/* uma_zfree(..., NULL) does nothing, to match free(9). */
if (item == NULL)
return;
-
- if (zone->uz_dtor)
- zone->uz_dtor(item, zone->uz_size, udata);
-
+#ifdef DEBUG_MEMGUARD
+ if (is_memguard_addr(item)) {
+ if (zone->uz_dtor != NULL)
+ zone->uz_dtor(item, zone->uz_size, udata);
+ if (zone->uz_fini != NULL)
+ zone->uz_fini(item, zone->uz_size);
+ memguard_free(item);
+ return;
+ }
+#endif
#ifdef INVARIANTS
- ZONE_LOCK(zone);
if (zone->uz_flags & UMA_ZONE_MALLOC)
uma_dbg_free(zone, udata, item);
else
uma_dbg_free(zone, NULL, item);
- ZONE_UNLOCK(zone);
#endif
+ if (zone->uz_dtor != NULL)
+ zone->uz_dtor(item, zone->uz_size, udata);
+
/*
* The race here is acceptable. If we miss it we'll just have to wait
* a little longer for the limits to be reset.
*/
if (zone->uz_flags & UMA_ZFLAG_FULL)
- goto zfree_internal;
+ goto zfree_item;
/*
* If possible, free to the per-CPU cache. There are two
@@ -2667,45 +2792,25 @@ zfree_restart:
cache = &zone->uz_cpu[cpu];
zfree_start:
- bucket = cache->uc_freebucket;
-
- if (bucket) {
- /*
- * Do we have room in our bucket? It is OK for this uz count
- * check to be slightly out of sync.
- */
-
- if (bucket->ub_cnt < bucket->ub_entries) {
- KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
- ("uma_zfree: Freeing to non free bucket index."));
- bucket->ub_bucket[bucket->ub_cnt] = item;
- bucket->ub_cnt++;
- cache->uc_frees++;
- critical_exit();
- return;
- } else if (cache->uc_allocbucket) {
-#ifdef UMA_DEBUG_ALLOC
- printf("uma_zfree: Swapping buckets.\n");
-#endif
- /*
- * We have run out of space in our freebucket.
- * See if we can switch with our alloc bucket.
- */
- if (cache->uc_allocbucket->ub_cnt <
- cache->uc_freebucket->ub_cnt) {
- bucket = cache->uc_freebucket;
- cache->uc_freebucket = cache->uc_allocbucket;
- cache->uc_allocbucket = bucket;
- goto zfree_start;
- }
- }
+ /*
+ * Try to free into the allocbucket first to give LIFO ordering
+ * for cache-hot datastructures. Spill over into the freebucket
+ * if necessary. Alloc will swap them if one runs dry.
+ */
+ bucket = cache->uc_allocbucket;
+ if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
+ bucket = cache->uc_freebucket;
+ if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
+ KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
+ ("uma_zfree: Freeing to non free bucket index."));
+ bucket->ub_bucket[bucket->ub_cnt] = item;
+ bucket->ub_cnt++;
+ cache->uc_frees++;
+ critical_exit();
+ return;
}
+
/*
- * We can get here for two reasons:
- *
- * 1) The buckets are NULL
- * 2) The alloc and free buckets are both somewhat full.
- *
* We must go back the zone, which requires acquiring the zone lock,
* which in turn means we must release and re-acquire the critical
* section. Since the critical section is released, we may be
@@ -2714,32 +2819,35 @@ zfree_start:
* the critical section.
*/
critical_exit();
- ZONE_LOCK(zone);
+ if (zone->uz_count == 0 || bucketdisable)
+ goto zfree_item;
+
+ lockfail = 0;
+ if (ZONE_TRYLOCK(zone) == 0) {
+ /* Record contention to size the buckets. */
+ ZONE_LOCK(zone);
+ lockfail = 1;
+ }
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
- if (cache->uc_freebucket != NULL) {
- if (cache->uc_freebucket->ub_cnt <
- cache->uc_freebucket->ub_entries) {
- ZONE_UNLOCK(zone);
- goto zfree_start;
- }
- if (cache->uc_allocbucket != NULL &&
- (cache->uc_allocbucket->ub_cnt <
- cache->uc_freebucket->ub_cnt)) {
- ZONE_UNLOCK(zone);
- goto zfree_start;
- }
- }
- /* Since we have locked the zone we may as well send back our stats */
- zone->uz_allocs += cache->uc_allocs;
+ /*
+ * Since we have locked the zone we may as well send back our stats.
+ */
+ atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
+ atomic_add_long(&zone->uz_frees, cache->uc_frees);
cache->uc_allocs = 0;
- zone->uz_frees += cache->uc_frees;
cache->uc_frees = 0;
bucket = cache->uc_freebucket;
+ if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
+ ZONE_UNLOCK(zone);
+ goto zfree_start;
+ }
cache->uc_freebucket = NULL;
+ /* We are no longer associated with this CPU. */
+ critical_exit();
/* Can we throw this on the zone full list? */
if (bucket != NULL) {
@@ -2749,97 +2857,53 @@ zfree_start:
/* ub_cnt is pointing to the last free item */
KASSERT(bucket->ub_cnt != 0,
("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
- LIST_INSERT_HEAD(&zone->uz_full_bucket,
- bucket, ub_link);
+ LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
}
- if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
- LIST_REMOVE(bucket, ub_link);
- ZONE_UNLOCK(zone);
- cache->uc_freebucket = bucket;
- goto zfree_start;
- }
- /* We are no longer associated with this CPU. */
- critical_exit();
- /* And the zone.. */
+ /*
+ * We bump the uz count when the cache size is insufficient to
+ * handle the working set.
+ */
+ if (lockfail && zone->uz_count < BUCKET_MAX)
+ zone->uz_count++;
ZONE_UNLOCK(zone);
#ifdef UMA_DEBUG_ALLOC
printf("uma_zfree: Allocating new free bucket.\n");
#endif
- bflags = M_NOWAIT;
-
- if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
- bflags |= M_NOVM;
- bucket = bucket_alloc(zone->uz_count, bflags);
+ bucket = bucket_alloc(zone, udata, M_NOWAIT);
if (bucket) {
- ZONE_LOCK(zone);
- LIST_INSERT_HEAD(&zone->uz_free_bucket,
- bucket, ub_link);
- ZONE_UNLOCK(zone);
+ critical_enter();
+ cpu = curcpu;
+ cache = &zone->uz_cpu[cpu];
+ if (cache->uc_freebucket == NULL) {
+ cache->uc_freebucket = bucket;
+ goto zfree_start;
+ }
+ /*
+ * We lost the race, start over. We have to drop our
+ * critical section to free the bucket.
+ */
+ critical_exit();
+ bucket_free(zone, bucket, udata);
goto zfree_restart;
}
/*
* If nothing else caught this, we'll just do an internal free.
*/
-zfree_internal:
- zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
+zfree_item:
+ zone_free_item(zone, item, udata, SKIP_DTOR);
return;
}
-/*
- * Frees an item to an INTERNAL zone or allocates a free bucket
- *
- * Arguments:
- * zone The zone to free to
- * item The item we're freeing
- * udata User supplied data for the dtor
- * skip Skip dtors and finis
- */
static void
-zone_free_item(uma_zone_t zone, void *item, void *udata,
- enum zfreeskip skip, int flags)
+slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
{
- uma_slab_t slab;
- uma_slabrefcnt_t slabref;
- uma_keg_t keg;
- u_int8_t *mem;
- u_int8_t freei;
- int clearfull;
+ uint8_t freei;
- if (skip < SKIP_DTOR && zone->uz_dtor)
- zone->uz_dtor(item, zone->uz_size, udata);
-
- if (skip < SKIP_FINI && zone->uz_fini)
- zone->uz_fini(item, zone->uz_size);
-
- ZONE_LOCK(zone);
-
- if (flags & ZFREE_STATFAIL)
- zone->uz_fails++;
- if (flags & ZFREE_STATFREE)
- zone->uz_frees++;
-
- if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
- mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
- keg = zone_first_keg(zone); /* Must only be one. */
- if (zone->uz_flags & UMA_ZONE_HASH) {
- slab = hash_sfind(&keg->uk_hash, mem);
- } else {
- mem += keg->uk_pgoff;
- slab = (uma_slab_t)mem;
- }
- } else {
- /* This prevents redundant lookups via free(). */
- if ((zone->uz_flags & UMA_ZONE_MALLOC) && udata != NULL)
- slab = (uma_slab_t)udata;
- else
- slab = vtoslab((vm_offset_t)item);
- keg = slab->us_keg;
- keg_relock(keg, zone);
- }
+ mtx_assert(&keg->uk_lock, MA_OWNED);
MPASS(keg == slab->us_keg);
/* Do we need to remove from any lists? */
@@ -2851,49 +2915,102 @@ zone_free_item(uma_zone_t zone, void *item, void *udata,
LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
}
- /* Slab management stuff */
- freei = ((unsigned long)item - (unsigned long)slab->us_data)
- / keg->uk_rsize;
-
-#ifdef INVARIANTS
- if (!skip)
- uma_dbg_free(zone, slab, item);
-#endif
-
- if (keg->uk_flags & UMA_ZONE_REFCNT) {
- slabref = (uma_slabrefcnt_t)slab;
- slabref->us_freelist[freei].us_item = slab->us_firstfree;
- } else {
- slab->us_freelist[freei].us_item = slab->us_firstfree;
- }
- slab->us_firstfree = freei;
+ /* Slab management. */
+ freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
+ BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
slab->us_freecount++;
- /* Zone statistics */
+ /* Keg statistics. */
keg->uk_free++;
+}
+
+static void
+zone_release(uma_zone_t zone, void **bucket, int cnt)
+{
+ void *item;
+ uma_slab_t slab;
+ uma_keg_t keg;
+ uint8_t *mem;
+ int clearfull;
+ int i;
clearfull = 0;
- if (keg->uk_flags & UMA_ZFLAG_FULL) {
- if (keg->uk_pages < keg->uk_maxpages) {
- keg->uk_flags &= ~UMA_ZFLAG_FULL;
- clearfull = 1;
+ keg = zone_first_keg(zone);
+ KEG_LOCK(keg);
+ for (i = 0; i < cnt; i++) {
+ item = bucket[i];
+ if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
+ mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
+ if (zone->uz_flags & UMA_ZONE_HASH) {
+ slab = hash_sfind(&keg->uk_hash, mem);
+ } else {
+ mem += keg->uk_pgoff;
+ slab = (uma_slab_t)mem;
+ }
+ } else {
+ slab = vtoslab((vm_offset_t)item);
+ if (slab->us_keg != keg) {
+ KEG_UNLOCK(keg);
+ keg = slab->us_keg;
+ KEG_LOCK(keg);
+ }
}
+ slab_free_item(keg, slab, item);
+ if (keg->uk_flags & UMA_ZFLAG_FULL) {
+ if (keg->uk_pages < keg->uk_maxpages) {
+ keg->uk_flags &= ~UMA_ZFLAG_FULL;
+ clearfull = 1;
+ }
- /*
- * We can handle one more allocation. Since we're clearing ZFLAG_FULL,
- * wake up all procs blocked on pages. This should be uncommon, so
- * keeping this simple for now (rather than adding count of blocked
- * threads etc).
- */
- wakeup(keg);
+ /*
+ * We can handle one more allocation. Since we're
+ * clearing ZFLAG_FULL, wake up all procs blocked
+ * on pages. This should be uncommon, so keeping this
+ * simple for now (rather than adding count of blocked
+ * threads etc).
+ */
+ wakeup(keg);
+ }
}
+ KEG_UNLOCK(keg);
if (clearfull) {
- zone_relock(zone, keg);
+ ZONE_LOCK(zone);
zone->uz_flags &= ~UMA_ZFLAG_FULL;
wakeup(zone);
ZONE_UNLOCK(zone);
- } else
- KEG_UNLOCK(keg);
+ }
+
+}
+
+/*
+ * Frees a single item to any zone.
+ *
+ * Arguments:
+ * zone The zone to free to
+ * item The item we're freeing
+ * udata User supplied data for the dtor
+ * skip Skip dtors and finis
+ */
+static void
+zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
+{
+
+#ifdef INVARIANTS
+ if (skip == SKIP_NONE) {
+ if (zone->uz_flags & UMA_ZONE_MALLOC)
+ uma_dbg_free(zone, udata, item);
+ else
+ uma_dbg_free(zone, NULL, item);
+ }
+#endif
+ if (skip < SKIP_DTOR && zone->uz_dtor)
+ zone->uz_dtor(item, zone->uz_size, udata);
+
+ if (skip < SKIP_FINI && zone->uz_fini)
+ zone->uz_fini(item, zone->uz_size);
+
+ atomic_add_long(&zone->uz_frees, 1);
+ zone->uz_release(zone->uz_arg, &item, 1);
}
/* See uma.h */
@@ -2902,13 +3019,15 @@ uma_zone_set_max(uma_zone_t zone, int nitems)
{
uma_keg_t keg;
- ZONE_LOCK(zone);
keg = zone_first_keg(zone);
+ if (keg == NULL)
+ return (0);
+ KEG_LOCK(keg);
keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
if (keg->uk_maxpages * keg->uk_ipers < nitems)
keg->uk_maxpages += keg->uk_ppera;
nitems = keg->uk_maxpages * keg->uk_ipers;
- ZONE_UNLOCK(zone);
+ KEG_UNLOCK(keg);
return (nitems);
}
@@ -2920,15 +3039,37 @@ uma_zone_get_max(uma_zone_t zone)
int nitems;
uma_keg_t keg;
- ZONE_LOCK(zone);
keg = zone_first_keg(zone);
+ if (keg == NULL)
+ return (0);
+ KEG_LOCK(keg);
nitems = keg->uk_maxpages * keg->uk_ipers;
- ZONE_UNLOCK(zone);
+ KEG_UNLOCK(keg);
return (nitems);
}
/* See uma.h */
+void
+uma_zone_set_warning(uma_zone_t zone, const char *warning)
+{
+
+ ZONE_LOCK(zone);
+ zone->uz_warning = warning;
+ ZONE_UNLOCK(zone);
+}
+
+/* See uma.h */
+void
+uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
+{
+
+ ZONE_LOCK(zone);
+ TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
+ ZONE_UNLOCK(zone);
+}
+
+/* See uma.h */
int
uma_zone_get_cur(uma_zone_t zone)
{
@@ -2957,12 +3098,13 @@ uma_zone_set_init(uma_zone_t zone, uma_init uminit)
{
uma_keg_t keg;
- ZONE_LOCK(zone);
keg = zone_first_keg(zone);
+ KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
+ KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_init on non-empty keg"));
keg->uk_init = uminit;
- ZONE_UNLOCK(zone);
+ KEG_UNLOCK(keg);
}
/* See uma.h */
@@ -2971,18 +3113,20 @@ uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
{
uma_keg_t keg;
- ZONE_LOCK(zone);
keg = zone_first_keg(zone);
+ KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
+ KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_fini on non-empty keg"));
keg->uk_fini = fini;
- ZONE_UNLOCK(zone);
+ KEG_UNLOCK(keg);
}
/* See uma.h */
void
uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
{
+
ZONE_LOCK(zone);
KASSERT(zone_first_keg(zone)->uk_pages == 0,
("uma_zone_set_zinit on non-empty keg"));
@@ -2994,6 +3138,7 @@ uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
void
uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
{
+
ZONE_LOCK(zone);
KASSERT(zone_first_keg(zone)->uk_pages == 0,
("uma_zone_set_zfini on non-empty keg"));
@@ -3006,10 +3151,13 @@ uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
void
uma_zone_set_freef(uma_zone_t zone, uma_free freef)
{
+ uma_keg_t keg;
- ZONE_LOCK(zone);
- zone_first_keg(zone)->uk_freef = freef;
- ZONE_UNLOCK(zone);
+ keg = zone_first_keg(zone);
+ KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
+ KEG_LOCK(keg);
+ keg->uk_freef = freef;
+ KEG_UNLOCK(keg);
}
/* See uma.h */
@@ -3019,45 +3167,67 @@ uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
{
uma_keg_t keg;
- ZONE_LOCK(zone);
keg = zone_first_keg(zone);
- keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
+ KEG_LOCK(keg);
keg->uk_allocf = allocf;
- ZONE_UNLOCK(zone);
+ KEG_UNLOCK(keg);
+}
+
+/* See uma.h */
+void
+uma_zone_reserve(uma_zone_t zone, int items)
+{
+ uma_keg_t keg;
+
+ keg = zone_first_keg(zone);
+ if (keg == NULL)
+ return;
+ KEG_LOCK(keg);
+ keg->uk_reserve = items;
+ KEG_UNLOCK(keg);
+
+ return;
}
#ifndef __rtems__
/* See uma.h */
int
-uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
+uma_zone_reserve_kva(uma_zone_t zone, int count)
{
uma_keg_t keg;
vm_offset_t kva;
- int pages;
+ u_int pages;
keg = zone_first_keg(zone);
+ if (keg == NULL)
+ return (0);
pages = count / keg->uk_ipers;
if (pages * keg->uk_ipers < count)
pages++;
- kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
-
- if (kva == 0)
- return (0);
- if (obj == NULL)
- obj = vm_object_allocate(OBJT_PHYS, pages);
- else {
- VM_OBJECT_LOCK_INIT(obj, "uma object");
- _vm_object_allocate(OBJT_PHYS, pages, obj);
- }
- ZONE_LOCK(zone);
+#ifdef UMA_MD_SMALL_ALLOC
+ if (keg->uk_ppera > 1) {
+#else
+ if (1) {
+#endif
+ kva = kva_alloc((vm_size_t)pages * UMA_SLAB_SIZE);
+ if (kva == 0)
+ return (0);
+ } else
+ kva = 0;
+ KEG_LOCK(keg);
keg->uk_kva = kva;
- keg->uk_obj = obj;
+ keg->uk_offset = 0;
keg->uk_maxpages = pages;
- keg->uk_allocf = obj_alloc;
- keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
- ZONE_UNLOCK(zone);
+#ifdef UMA_MD_SMALL_ALLOC
+ keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
+#else
+ keg->uk_allocf = noobj_alloc;
+#endif
+ keg->uk_flags |= UMA_ZONE_NOFREE;
+ KEG_UNLOCK(keg);
+
return (1);
}
@@ -3070,7 +3240,9 @@ uma_prealloc(uma_zone_t zone, int items)
uma_keg_t keg;
keg = zone_first_keg(zone);
- ZONE_LOCK(zone);
+ if (keg == NULL)
+ return;
+ KEG_LOCK(keg);
slabs = items / keg->uk_ipers;
if (slabs * keg->uk_ipers < items)
slabs++;
@@ -3082,49 +3254,70 @@ uma_prealloc(uma_zone_t zone, int items)
LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
slabs--;
}
- ZONE_UNLOCK(zone);
+ KEG_UNLOCK(keg);
}
#endif /* __rtems__ */
/* See uma.h */
-u_int32_t *
-uma_find_refcnt(uma_zone_t zone, void *item)
+static void
+uma_reclaim_locked(bool kmem_danger)
{
- uma_slabrefcnt_t slabref;
- uma_keg_t keg;
- u_int32_t *refcnt;
- int idx;
-
- slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
- (~UMA_SLAB_MASK));
- keg = slabref->us_keg;
- KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
- ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
- idx = ((unsigned long)item - (unsigned long)slabref->us_data)
- / keg->uk_rsize;
- refcnt = &slabref->us_freelist[idx].us_refcnt;
- return refcnt;
-}
-/* See uma.h */
-void
-uma_reclaim(void)
-{
#ifdef UMA_DEBUG
printf("UMA: vm asked us to release pages!\n");
#endif
+ sx_assert(&uma_drain_lock, SA_XLOCKED);
bucket_enable();
zone_foreach(zone_drain);
+#ifndef __rtems__
+ if (vm_page_count_min() || kmem_danger) {
+ cache_drain_safe(NULL);
+ zone_foreach(zone_drain);
+ }
+#endif /* __rtems__ */
/*
* Some slabs may have been freed but this zone will be visited early
* we visit again so that we can free pages that are empty once other
* zones are drained. We have to do the same for buckets.
*/
zone_drain(slabzone);
- zone_drain(slabrefzone);
bucket_zone_drain();
}
+void
+uma_reclaim(void)
+{
+
+ sx_xlock(&uma_drain_lock);
+ uma_reclaim_locked(false);
+ sx_xunlock(&uma_drain_lock);
+}
+
+static int uma_reclaim_needed;
+
+void
+uma_reclaim_wakeup(void)
+{
+
+ uma_reclaim_needed = 1;
+ wakeup(&uma_reclaim_needed);
+}
+
+void
+uma_reclaim_worker(void *arg __unused)
+{
+
+ sx_xlock(&uma_drain_lock);
+ for (;;) {
+ sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM,
+ "umarcl", 0);
+ if (uma_reclaim_needed) {
+ uma_reclaim_needed = 0;
+ uma_reclaim_locked(true);
+ }
+ }
+}
+
/* See uma.h */
int
uma_zone_exhausted(uma_zone_t zone)
@@ -3145,11 +3338,11 @@ uma_zone_exhausted_nolock(uma_zone_t zone)
#ifndef __rtems__
void *
-uma_large_malloc(int size, int wait)
+uma_large_malloc(vm_size_t size, int wait)
{
void *mem;
uma_slab_t slab;
- u_int8_t flags;
+ uint8_t flags;
slab = zone_alloc_item(slabzone, NULL, wait);
if (slab == NULL)
@@ -3161,8 +3354,7 @@ uma_large_malloc(int size, int wait)
slab->us_flags = flags | UMA_SLAB_MALLOC;
slab->us_size = size;
} else {
- zone_free_item(slabzone, slab, NULL, SKIP_NONE,
- ZFREE_STATFAIL | ZFREE_STATFREE);
+ zone_free_item(slabzone, slab, NULL, SKIP_NONE);
}
return (mem);
@@ -3171,12 +3363,24 @@ uma_large_malloc(int size, int wait)
void
uma_large_free(uma_slab_t slab)
{
- vsetobj((vm_offset_t)slab->us_data, kmem_object);
+
page_free(slab->us_data, slab->us_size, slab->us_flags);
- zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
+ zone_free_item(slabzone, slab, NULL, SKIP_NONE);
}
#endif /* __rtems__ */
+static void
+uma_zero_item(void *item, uma_zone_t zone)
+{
+ int i;
+
+ if (zone->uz_flags & UMA_ZONE_PCPU) {
+ CPU_FOREACH(i)
+ bzero(zpcpu_get_cpu(item, i), zone->uz_size);
+ } else
+ bzero(item, zone->uz_size);
+}
+
void
uma_print_stats(void)
{
@@ -3186,9 +3390,8 @@ uma_print_stats(void)
static void
slab_print(uma_slab_t slab)
{
- printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
- slab->us_keg, slab->us_data, slab->us_freecount,
- slab->us_firstfree);
+ printf("slab: keg %p, data %p, freecount %d\n",
+ slab->us_keg, slab->us_data, slab->us_freecount);
}
static void
@@ -3255,11 +3458,11 @@ uma_print_zone(uma_zone_t zone)
* directly so that we don't have to.
*/
static void
-uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
- u_int64_t *freesp, u_int64_t *sleepsp)
+uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
+ uint64_t *freesp, uint64_t *sleepsp)
{
uma_cache_t cache;
- u_int64_t allocs, frees, sleeps;
+ uint64_t allocs, frees, sleeps;
int cachefree, cpu;
allocs = frees = sleeps = 0;
@@ -3296,12 +3499,12 @@ sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
int count;
count = 0;
- mtx_lock(&uma_mtx);
+ rw_rlock(&uma_rwlock);
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link)
count++;
}
- mtx_unlock(&uma_mtx);
+ rw_runlock(&uma_rwlock);
return (sysctl_handle_int(oidp, &count, 0, req));
}
@@ -3324,9 +3527,10 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
if (error != 0)
return (error);
sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
count = 0;
- mtx_lock(&uma_mtx);
+ rw_rlock(&uma_rwlock);
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link)
count++;
@@ -3366,7 +3570,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
(LIST_FIRST(&kz->uk_zones) != z))
uth.uth_zone_flags = UTH_ZONE_SECONDARY;
- LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
+ LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
uth.uth_zone_free += bucket->ub_cnt;
uth.uth_allocs = z->uz_allocs;
uth.uth_frees = z->uz_frees;
@@ -3402,24 +3606,146 @@ skip:
ZONE_UNLOCK(z);
}
}
- mtx_unlock(&uma_mtx);
+ rw_runlock(&uma_rwlock);
error = sbuf_finish(&sbuf);
sbuf_delete(&sbuf);
return (error);
}
+int
+sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
+{
+ uma_zone_t zone = *(uma_zone_t *)arg1;
+ int error, max;
+
+ max = uma_zone_get_max(zone);
+ error = sysctl_handle_int(oidp, &max, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ uma_zone_set_max(zone, max);
+
+ return (0);
+}
+
+int
+sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
+{
+ uma_zone_t zone = *(uma_zone_t *)arg1;
+ int cur;
+
+ cur = uma_zone_get_cur(zone);
+ return (sysctl_handle_int(oidp, &cur, 0, req));
+}
+
+#ifdef INVARIANTS
+static uma_slab_t
+uma_dbg_getslab(uma_zone_t zone, void *item)
+{
+ uma_slab_t slab;
+ uma_keg_t keg;
+ uint8_t *mem;
+
+ mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
+ if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
+ slab = vtoslab((vm_offset_t)mem);
+ } else {
+ /*
+ * It is safe to return the slab here even though the
+ * zone is unlocked because the item's allocation state
+ * essentially holds a reference.
+ */
+ ZONE_LOCK(zone);
+ keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
+ if (keg->uk_flags & UMA_ZONE_HASH)
+ slab = hash_sfind(&keg->uk_hash, mem);
+ else
+ slab = (uma_slab_t)(mem + keg->uk_pgoff);
+ ZONE_UNLOCK(zone);
+ }
+
+ return (slab);
+}
+
+/*
+ * Set up the slab's freei data such that uma_dbg_free can function.
+ *
+ */
+static void
+uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
+{
+ uma_keg_t keg;
+ int freei;
+
+ if (zone_first_keg(zone) == NULL)
+ return;
+ if (slab == NULL) {
+ slab = uma_dbg_getslab(zone, item);
+ if (slab == NULL)
+ panic("uma: item %p did not belong to zone %s\n",
+ item, zone->uz_name);
+ }
+ keg = slab->us_keg;
+ freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
+
+ if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
+ panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
+ item, zone, zone->uz_name, slab, freei);
+ BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
+
+ return;
+}
+
+/*
+ * Verifies freed addresses. Checks for alignment, valid slab membership
+ * and duplicate frees.
+ *
+ */
+static void
+uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
+{
+ uma_keg_t keg;
+ int freei;
+
+ if (zone_first_keg(zone) == NULL)
+ return;
+ if (slab == NULL) {
+ slab = uma_dbg_getslab(zone, item);
+ if (slab == NULL)
+ panic("uma: Freed item %p did not belong to zone %s\n",
+ item, zone->uz_name);
+ }
+ keg = slab->us_keg;
+ freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
+
+ if (freei >= keg->uk_ipers)
+ panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
+ item, zone, zone->uz_name, slab, freei);
+
+ if (((freei * keg->uk_rsize) + slab->us_data) != item)
+ panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
+ item, zone, zone->uz_name, slab, freei);
+
+ if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
+ panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
+ item, zone, zone->uz_name, slab, freei);
+
+ BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
+}
+#endif /* INVARIANTS */
+
#ifndef __rtems__
#ifdef DDB
DB_SHOW_COMMAND(uma, db_show_uma)
{
- u_int64_t allocs, frees, sleeps;
+ uint64_t allocs, frees, sleeps;
uma_bucket_t bucket;
uma_keg_t kz;
uma_zone_t z;
int cachefree;
- db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
- "Requests", "Sleeps");
+ db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
+ "Free", "Requests", "Sleeps", "Bucket");
LIST_FOREACH(kz, &uma_kegs, uk_link) {
LIST_FOREACH(z, &kz->uk_zones, uz_link) {
if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
@@ -3433,16 +3759,38 @@ DB_SHOW_COMMAND(uma, db_show_uma)
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
cachefree += kz->uk_free;
- LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
+ LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
cachefree += bucket->ub_cnt;
- db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
- (uintmax_t)kz->uk_size,
+ db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
+ z->uz_name, (uintmax_t)kz->uk_size,
(intmax_t)(allocs - frees), cachefree,
- (uintmax_t)allocs, sleeps);
+ (uintmax_t)allocs, sleeps, z->uz_count);
if (db_pager_quit)
return;
}
}
}
-#endif
+
+DB_SHOW_COMMAND(umacache, db_show_umacache)
+{
+ uint64_t allocs, frees;
+ uma_bucket_t bucket;
+ uma_zone_t z;
+ int cachefree;
+
+ db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
+ "Requests", "Bucket");
+ LIST_FOREACH(z, &uma_cachezones, uz_link) {
+ uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
+ LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
+ cachefree += bucket->ub_cnt;
+ db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
+ z->uz_name, (uintmax_t)z->uz_size,
+ (intmax_t)(allocs - frees), cachefree,
+ (uintmax_t)allocs, z->uz_count);
+ if (db_pager_quit)
+ return;
+ }
+}
+#endif /* DDB */
#endif /* __rtems__ */
diff --git a/freebsd/sys/vm/uma_dbg.c b/freebsd/sys/vm/uma_dbg.c
index 1506674f..0c6be82d 100644
--- a/freebsd/sys/vm/uma_dbg.c
+++ b/freebsd/sys/vm/uma_dbg.c
@@ -35,8 +35,11 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_vm.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/bitset.h>
#include <sys/kernel.h>
#include <sys/types.h>
#include <sys/queue.h>
@@ -50,28 +53,38 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <vm/uma_int.h>
#include <vm/uma_dbg.h>
+#include <vm/memguard.h>
-static const u_int32_t uma_junk = 0xdeadc0de;
+static const uint32_t uma_junk = 0xdeadc0de;
/*
* Checks an item to make sure it hasn't been overwritten since it was freed,
* prior to subsequent reallocation.
*
* Complies with standard ctor arg/return
- *
*/
int
trash_ctor(void *mem, int size, void *arg, int flags)
{
int cnt;
- u_int32_t *p;
+ uint32_t *p;
+
+#ifdef DEBUG_MEMGUARD
+ if (is_memguard_addr(mem))
+ return (0);
+#endif
cnt = size / sizeof(uma_junk);
for (p = mem; cnt > 0; cnt--, p++)
if (*p != uma_junk) {
+#ifdef INVARIANTS
+ panic("Memory modified after free %p(%d) val=%x @ %p\n",
+ mem, size, *p, p);
+#else
printf("Memory modified after free %p(%d) val=%x @ %p\n",
mem, size, *p, p);
+#endif
return (0);
}
return (0);
@@ -87,7 +100,12 @@ void
trash_dtor(void *mem, int size, void *arg)
{
int cnt;
- u_int32_t *p;
+ uint32_t *p;
+
+#ifdef DEBUG_MEMGUARD
+ if (is_memguard_addr(mem))
+ return;
+#endif
cnt = size / sizeof(uma_junk);
@@ -124,9 +142,14 @@ int
mtrash_ctor(void *mem, int size, void *arg, int flags)
{
struct malloc_type **ksp;
- u_int32_t *p = mem;
+ uint32_t *p = mem;
int cnt;
+#ifdef DEBUG_MEMGUARD
+ if (is_memguard_addr(mem))
+ return (0);
+#endif
+
size -= sizeof(struct malloc_type *);
ksp = (struct malloc_type **)mem;
ksp += size / sizeof(struct malloc_type *);
@@ -152,7 +175,12 @@ void
mtrash_dtor(void *mem, int size, void *arg)
{
int cnt;
- u_int32_t *p;
+ uint32_t *p;
+
+#ifdef DEBUG_MEMGUARD
+ if (is_memguard_addr(mem))
+ return;
+#endif
size -= sizeof(struct malloc_type *);
cnt = size / sizeof(uma_junk);
@@ -172,6 +200,11 @@ mtrash_init(void *mem, int size, int flags)
{
struct malloc_type **ksp;
+#ifdef DEBUG_MEMGUARD
+ if (is_memguard_addr(mem))
+ return (0);
+#endif
+
mtrash_dtor(mem, size, NULL);
ksp = (struct malloc_type **)mem;
@@ -192,124 +225,3 @@ mtrash_fini(void *mem, int size)
{
(void)mtrash_ctor(mem, size, NULL, 0);
}
-
-static uma_slab_t
-uma_dbg_getslab(uma_zone_t zone, void *item)
-{
- uma_slab_t slab;
- uma_keg_t keg;
- u_int8_t *mem;
-
- mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
- if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
- slab = vtoslab((vm_offset_t)mem);
- } else {
- keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
- if (keg->uk_flags & UMA_ZONE_HASH)
- slab = hash_sfind(&keg->uk_hash, mem);
- else
- slab = (uma_slab_t)(mem + keg->uk_pgoff);
- }
-
- return (slab);
-}
-
-/*
- * Set up the slab's freei data such that uma_dbg_free can function.
- *
- */
-
-void
-uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
-{
- uma_keg_t keg;
- uma_slabrefcnt_t slabref;
- int freei;
-
- if (slab == NULL) {
- slab = uma_dbg_getslab(zone, item);
- if (slab == NULL)
- panic("uma: item %p did not belong to zone %s\n",
- item, zone->uz_name);
- }
- keg = slab->us_keg;
-
- freei = ((unsigned long)item - (unsigned long)slab->us_data)
- / keg->uk_rsize;
-
- if (keg->uk_flags & UMA_ZONE_REFCNT) {
- slabref = (uma_slabrefcnt_t)slab;
- slabref->us_freelist[freei].us_item = 255;
- } else {
- slab->us_freelist[freei].us_item = 255;
- }
-
- return;
-}
-
-/*
- * Verifies freed addresses. Checks for alignment, valid slab membership
- * and duplicate frees.
- *
- */
-
-void
-uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
-{
- uma_keg_t keg;
- uma_slabrefcnt_t slabref;
- int freei;
-
- if (slab == NULL) {
- slab = uma_dbg_getslab(zone, item);
- if (slab == NULL)
- panic("uma: Freed item %p did not belong to zone %s\n",
- item, zone->uz_name);
- }
- keg = slab->us_keg;
-
- freei = ((unsigned long)item - (unsigned long)slab->us_data)
- / keg->uk_rsize;
-
- if (freei >= keg->uk_ipers)
- panic("zone: %s(%p) slab %p freelist %d out of range 0-%d\n",
- zone->uz_name, zone, slab, freei, keg->uk_ipers-1);
-
- if (((freei * keg->uk_rsize) + slab->us_data) != item) {
- printf("zone: %s(%p) slab %p freed address %p unaligned.\n",
- zone->uz_name, zone, slab, item);
- panic("should be %p\n",
- (freei * keg->uk_rsize) + slab->us_data);
- }
-
- if (keg->uk_flags & UMA_ZONE_REFCNT) {
- slabref = (uma_slabrefcnt_t)slab;
- if (slabref->us_freelist[freei].us_item != 255) {
- printf("Slab at %p, freei %d = %d.\n",
- slab, freei, slabref->us_freelist[freei].us_item);
- panic("Duplicate free of item %p from zone %p(%s)\n",
- item, zone, zone->uz_name);
- }
-
- /*
- * When this is actually linked into the slab this will change.
- * Until then the count of valid slabs will make sure we don't
- * accidentally follow this and assume it's a valid index.
- */
- slabref->us_freelist[freei].us_item = 0;
- } else {
- if (slab->us_freelist[freei].us_item != 255) {
- printf("Slab at %p, freei %d = %d.\n",
- slab, freei, slab->us_freelist[freei].us_item);
- panic("Duplicate free of item %p from zone %p(%s)\n",
- item, zone, zone->uz_name);
- }
-
- /*
- * When this is actually linked into the slab this will change.
- * Until then the count of valid slabs will make sure we don't
- * accidentally follow this and assume it's a valid index.
- */
- slab->us_freelist[freei].us_item = 0;
- }
-}
diff --git a/freebsd/sys/vm/uma_dbg.h b/freebsd/sys/vm/uma_dbg.h
index 341cecbf..e3c9df02 100644
--- a/freebsd/sys/vm/uma_dbg.h
+++ b/freebsd/sys/vm/uma_dbg.h
@@ -49,7 +49,4 @@ void mtrash_dtor(void *mem, int size, void *arg);
int mtrash_init(void *mem, int size, int flags);
void mtrash_fini(void *mem, int size);
-void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
-void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
-
#endif /* VM_UMA_DBG_H */
diff --git a/freebsd/sys/vm/uma_int.h b/freebsd/sys/vm/uma_int.h
index d372a8dd..679e2518 100644
--- a/freebsd/sys/vm/uma_int.h
+++ b/freebsd/sys/vm/uma_int.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org>
+ * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
* All rights reserved.
*
@@ -28,6 +28,8 @@
*
*/
+#include <sys/_task.h>
+
/*
* This file includes definitions, structures, prototypes, and inlines that
* should not be used outside of the actual implementation of UMA.
@@ -45,20 +47,9 @@
*
* The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may
* be allocated off the page from a special slab zone. The free list within a
- * slab is managed with a linked list of indices, which are 8 bit values. If
- * UMA_SLAB_SIZE is defined to be too large I will have to switch to 16bit
- * values. Currently on alpha you can get 250 or so 32 byte items and on x86
- * you can get 250 or so 16byte items. For item sizes that would yield more
- * than 10% memory waste we potentially allocate a separate uma_slab_t if this
- * will improve the number of items per slab that will fit.
- *
- * Other potential space optimizations are storing the 8bit of linkage in space
- * wasted between items due to alignment problems. This may yield a much better
- * memory footprint for certain sizes of objects. Another alternative is to
- * increase the UMA_SLAB_SIZE, or allow for dynamic slab sizes. I prefer
- * dynamic slab sizes because we could stick with 8 bit indices and only use
- * large slab sizes for zones with a lot of waste per slab. This may create
- * inefficiencies in the vm subsystem due to fragmentation in the address space.
+ * slab is managed with a bitmask. For item sizes that would yield more than
+ * 10% memory waste we potentially allocate a separate uma_slab_t if this will
+ * improve the number of items per slab that will fit.
*
* The only really gross cases, with regards to memory waste, are for those
* items that are just over half the page size. You can get nearly 50% waste,
@@ -119,9 +110,11 @@
#define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */
#define UMA_BOOT_PAGES 64 /* Pages allocated for startup */
+#define UMA_BOOT_PAGES_ZONES 32 /* Multiplier for pages to reserve */
+ /* if uma_zone > PAGE_SIZE */
-/* Max waste before going to off page slab management */
-#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10)
+/* Max waste percentage before going to off page slab management */
+#define UMA_MAX_WASTE 10
/*
* I doubt there will be many cases where this is exceeded. This is the initial
@@ -133,14 +126,9 @@
/*
* I should investigate other hashing algorithms. This should yield a low
* number of collisions if the pages are relatively contiguous.
- *
- * This is the same algorithm that most processor caches use.
- *
- * I'm shifting and masking instead of % because it should be faster.
*/
-#define UMA_HASH(h, s) ((((unsigned long)s) >> UMA_SLAB_SHIFT) & \
- (h)->uh_hashmask)
+#define UMA_HASH(h, s) ((((uintptr_t)s) >> UMA_SLAB_SHIFT) & (h)->uh_hashmask)
#define UMA_HASH_INSERT(h, s, mem) \
SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h), \
@@ -184,8 +172,8 @@ typedef struct uma_bucket * uma_bucket_t;
struct uma_cache {
uma_bucket_t uc_freebucket; /* Bucket we're freeing to */
uma_bucket_t uc_allocbucket; /* Bucket to allocate from */
- u_int64_t uc_allocs; /* Count of allocations */
- u_int64_t uc_frees; /* Count of frees */
+ uint64_t uc_allocs; /* Count of allocations */
+ uint64_t uc_frees; /* Count of frees */
} UMA_ALIGN;
typedef struct uma_cache * uma_cache_t;
@@ -197,45 +185,54 @@ typedef struct uma_cache * uma_cache_t;
*
*/
struct uma_keg {
- LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
-
- struct mtx uk_lock; /* Lock for the keg */
+ struct mtx_padalign uk_lock; /* Lock for the keg */
struct uma_hash uk_hash;
- const char *uk_name; /* Name of creating zone. */
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */
LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */
LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */
- u_int32_t uk_recurse; /* Allocation recursion count */
- u_int32_t uk_align; /* Alignment mask */
- u_int32_t uk_pages; /* Total page count */
- u_int32_t uk_free; /* Count of items free in slabs */
- u_int32_t uk_size; /* Requested size of each item */
- u_int32_t uk_rsize; /* Real size of each item */
- u_int32_t uk_maxpages; /* Maximum number of pages to alloc */
+ uint32_t uk_align; /* Alignment mask */
+ uint32_t uk_pages; /* Total page count */
+ uint32_t uk_free; /* Count of items free in slabs */
+ uint32_t uk_reserve; /* Number of reserved items. */
+ uint32_t uk_size; /* Requested size of each item */
+ uint32_t uk_rsize; /* Real size of each item */
+ uint32_t uk_maxpages; /* Maximum number of pages to alloc */
uma_init uk_init; /* Keg's init routine */
uma_fini uk_fini; /* Keg's fini routine */
uma_alloc uk_allocf; /* Allocation function */
uma_free uk_freef; /* Free routine */
- struct vm_object *uk_obj; /* Zone specific object */
- vm_offset_t uk_kva; /* Base kva for zones with objs */
+ u_long uk_offset; /* Next free offset from base KVA */
+ vm_offset_t uk_kva; /* Zone base KVA */
uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */
- u_int16_t uk_pgoff; /* Offset to uma_slab struct */
- u_int16_t uk_ppera; /* pages per allocation from backend */
- u_int16_t uk_ipers; /* Items per slab */
- u_int32_t uk_flags; /* Internal flags */
+ uint16_t uk_slabsize; /* Slab size for this keg */
+ uint16_t uk_pgoff; /* Offset to uma_slab struct */
+ uint16_t uk_ppera; /* pages per allocation from backend */
+ uint16_t uk_ipers; /* Items per slab */
+ uint32_t uk_flags; /* Internal flags */
+
+ /* Least used fields go to the last cache line. */
+ const char *uk_name; /* Name of creating zone. */
+ LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
};
typedef struct uma_keg * uma_keg_t;
-/* Page management structure */
+/*
+ * Free bits per-slab.
+ */
+#define SLAB_SETSIZE (PAGE_SIZE / UMA_SMALLEST_UNIT)
+BITSET_DEFINE(slabbits, SLAB_SETSIZE);
-/* Sorry for the union, but space efficiency is important */
-struct uma_slab_head {
+/*
+ * The slab structure manages a single contiguous allocation from backing
+ * store and subdivides it into individually allocatable items.
+ */
+struct uma_slab {
uma_keg_t us_keg; /* Keg we live in */
union {
LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */
@@ -244,58 +241,24 @@ struct uma_slab_head {
#endif /* __rtems__ */
} us_type;
SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */
- u_int8_t *us_data; /* First item */
- u_int8_t us_flags; /* Page flags see uma.h */
- u_int8_t us_freecount; /* How many are free? */
- u_int8_t us_firstfree; /* First free item index */
-};
-
-/* The standard slab structure */
-struct uma_slab {
- struct uma_slab_head us_head; /* slab header data */
- struct {
- u_int8_t us_item;
- } us_freelist[1]; /* actual number bigger */
-};
-
-/*
- * The slab structure for UMA_ZONE_REFCNT zones for whose items we
- * maintain reference counters in the slab for.
- */
-struct uma_slab_refcnt {
- struct uma_slab_head us_head; /* slab header data */
- struct {
- u_int8_t us_item;
- u_int32_t us_refcnt;
- } us_freelist[1]; /* actual number bigger */
+ uint8_t *us_data; /* First item */
+ struct slabbits us_free; /* Free bitmask. */
+#ifdef INVARIANTS
+ struct slabbits us_debugfree; /* Debug bitmask. */
+#endif
+ uint16_t us_freecount; /* How many are free? */
+ uint8_t us_flags; /* Page flags see uma.h */
+ uint8_t us_pad; /* Pad to 32bits, unused. */
};
-#define us_keg us_head.us_keg
-#define us_link us_head.us_type._us_link
+#define us_link us_type._us_link
#ifndef __rtems__
-#define us_size us_head.us_type._us_size
+#define us_size us_type._us_size
#endif /* __rtems__ */
-#define us_hlink us_head.us_hlink
-#define us_data us_head.us_data
-#define us_flags us_head.us_flags
-#define us_freecount us_head.us_freecount
-#define us_firstfree us_head.us_firstfree
typedef struct uma_slab * uma_slab_t;
-typedef struct uma_slab_refcnt * uma_slabrefcnt_t;
typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int);
-
-/*
- * These give us the size of one free item reference within our corresponding
- * uma_slab structures, so that our calculations during zone setup are correct
- * regardless of what the compiler decides to do with padding the structure
- * arrays within uma_slab.
- */
-#define UMA_FRITM_SZ (sizeof(struct uma_slab) - sizeof(struct uma_slab_head))
-#define UMA_FRITMREF_SZ (sizeof(struct uma_slab_refcnt) - \
- sizeof(struct uma_slab_head))
-
struct uma_klink {
LIST_ENTRY(uma_klink) kl_link;
uma_keg_t kl_keg;
@@ -309,12 +272,12 @@ typedef struct uma_klink *uma_klink_t;
*
*/
struct uma_zone {
- const char *uz_name; /* Text name of the zone */
- struct mtx *uz_lock; /* Lock for the zone (keg's lock) */
+ struct mtx_padalign uz_lock; /* Lock for the zone */
+ struct mtx_padalign *uz_lockptr;
+ const char *uz_name; /* Text name of the zone */
LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
- LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */
- LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */
+ LIST_HEAD(,uma_bucket) uz_buckets; /* full buckets */
LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */
struct uma_klink uz_klink; /* klink for first keg. */
@@ -323,17 +286,26 @@ struct uma_zone {
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
uma_init uz_init; /* Initializer for each item */
- uma_fini uz_fini; /* Discards memory */
+ uma_fini uz_fini; /* Finalizer for each item. */
+ uma_import uz_import; /* Import new memory to cache. */
+ uma_release uz_release; /* Release memory from cache. */
+ void *uz_arg; /* Import/release argument. */
+
+ uint32_t uz_flags; /* Flags inherited from kegs */
+ uint32_t uz_size; /* Size inherited from kegs */
- u_int32_t uz_flags; /* Flags inherited from kegs */
- u_int32_t uz_size; /* Size inherited from kegs */
+ volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
+ volatile u_long uz_fails; /* Total number of alloc failures */
+ volatile u_long uz_frees; /* Total number of frees */
+ uint64_t uz_sleeps; /* Total number of alloc sleeps */
+ uint16_t uz_count; /* Amount of items in full bucket */
+ uint16_t uz_count_min; /* Minimal amount of items there */
- u_int64_t uz_allocs UMA_ALIGN; /* Total number of allocations */
- u_int64_t uz_frees; /* Total number of frees */
- u_int64_t uz_fails; /* Total number of alloc failures */
- u_int64_t uz_sleeps; /* Total number of alloc sleeps */
- uint16_t uz_fills; /* Outstanding bucket fills */
- uint16_t uz_count; /* Highest value ub_ptr can have */
+ /* The next two fields are used to print a rate-limited warnings. */
+ const char *uz_warning; /* Warning to print on failure */
+ struct timeval uz_ratecheck; /* Warnings rate-limiting */
+
+ struct task uz_maxaction; /* Task to run when at limit */
/*
* This HAS to be the last item because we adjust the zone size
@@ -345,23 +317,31 @@ struct uma_zone {
/*
* These flags must not overlap with the UMA_ZONE flags specified in uma.h.
*/
-#define UMA_ZFLAG_BUCKET 0x02000000 /* Bucket zone. */
#define UMA_ZFLAG_MULTI 0x04000000 /* Multiple kegs in the zone. */
#define UMA_ZFLAG_DRAINING 0x08000000 /* Running zone_drain. */
-#define UMA_ZFLAG_PRIVALLOC 0x10000000 /* Use uz_allocf. */
+#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */
#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */
#define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */
#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */
-#define UMA_ZFLAG_INHERIT (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | \
- UMA_ZFLAG_BUCKET)
+#define UMA_ZFLAG_INHERIT \
+ (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET)
+
+static inline uma_keg_t
+zone_first_keg(uma_zone_t zone)
+{
+ uma_klink_t klink;
+
+ klink = LIST_FIRST(&zone->uz_kegs);
+ return (klink != NULL) ? klink->kl_keg : NULL;
+}
#undef UMA_ALIGN
#ifdef _KERNEL
/* Internal prototypes */
-static __inline uma_slab_t hash_sfind(struct uma_hash *hash, u_int8_t *data);
-void *uma_large_malloc(int size, int wait);
+static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data);
+void *uma_large_malloc(vm_size_t size, int wait);
void uma_large_free(uma_slab_t slab);
/* Lock Macros */
@@ -375,12 +355,25 @@ void uma_large_free(uma_slab_t slab);
mtx_init(&(k)->uk_lock, (k)->uk_name, \
"UMA zone", MTX_DEF | MTX_DUPOK); \
} while (0)
-
+
#define KEG_LOCK_FINI(k) mtx_destroy(&(k)->uk_lock)
#define KEG_LOCK(k) mtx_lock(&(k)->uk_lock)
#define KEG_UNLOCK(k) mtx_unlock(&(k)->uk_lock)
-#define ZONE_LOCK(z) mtx_lock((z)->uz_lock)
-#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lock)
+
+#define ZONE_LOCK_INIT(z, lc) \
+ do { \
+ if ((lc)) \
+ mtx_init(&(z)->uz_lock, (z)->uz_name, \
+ (z)->uz_name, MTX_DEF | MTX_DUPOK); \
+ else \
+ mtx_init(&(z)->uz_lock, (z)->uz_name, \
+ "UMA zone", MTX_DEF | MTX_DUPOK); \
+ } while (0)
+
+#define ZONE_LOCK(z) mtx_lock((z)->uz_lockptr)
+#define ZONE_TRYLOCK(z) mtx_trylock((z)->uz_lockptr)
+#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lockptr)
+#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock)
/*
* Find a slab within a hash table. This is used for OFFPAGE zones to lookup
@@ -394,7 +387,7 @@ void uma_large_free(uma_slab_t slab);
* A pointer to a slab if successful, else NULL.
*/
static __inline uma_slab_t
-hash_sfind(struct uma_hash *hash, u_int8_t *data)
+hash_sfind(struct uma_hash *hash, uint8_t *data)
{
uma_slab_t slab;
int hval;
@@ -402,7 +395,7 @@ hash_sfind(struct uma_hash *hash, u_int8_t *data)
hval = UMA_HASH(hash, data);
SLIST_FOREACH(slab, &hash->uh_slab_hash[hval], us_hlink) {
- if ((u_int8_t *)slab->us_data == data)
+ if ((uint8_t *)slab->us_data == data)
return (slab);
}
return (NULL);
@@ -416,15 +409,9 @@ vtoslab(vm_offset_t va)
{
#ifndef __rtems__
vm_page_t p;
- uma_slab_t slab;
p = PHYS_TO_VM_PAGE(pmap_kextract(va));
- slab = (uma_slab_t )p->object;
-
- if (p->flags & PG_SLAB)
- return (slab);
- else
- return (NULL);
+ return ((uma_slab_t)p->plinks.s.pv);
#else /* __rtems__ */
return (rtems_bsd_page_get_object((void *)va));
#endif /* __rtems__ */
@@ -437,32 +424,20 @@ vsetslab(vm_offset_t va, uma_slab_t slab)
vm_page_t p;
p = PHYS_TO_VM_PAGE(pmap_kextract(va));
- p->object = (vm_object_t)slab;
- p->flags |= PG_SLAB;
+ p->plinks.s.pv = slab;
#else /* __rtems__ */
rtems_bsd_page_set_object((void *)va, slab);
#endif /* __rtems__ */
}
-#ifndef __rtems__
-static __inline void
-vsetobj(vm_offset_t va, vm_object_t obj)
-{
- vm_page_t p;
-
- p = PHYS_TO_VM_PAGE(pmap_kextract(va));
- p->object = obj;
- p->flags &= ~PG_SLAB;
-}
-#endif /* __rtems__ */
-
/*
* The following two functions may be defined by architecture specific code
- * if they can provide more effecient allocation functions. This is useful
+ * if they can provide more efficient allocation functions. This is useful
* for using direct mapped addresses.
*/
-void *uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait);
-void uma_small_free(void *mem, int size, u_int8_t flags);
+void *uma_small_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag,
+ int wait);
+void uma_small_free(void *mem, vm_size_t size, uint8_t flags);
#endif /* _KERNEL */
#endif /* VM_UMA_INT_H */
diff --git a/freebsd/sys/vm/vm.h b/freebsd/sys/vm/vm.h
index 17aea47e..1df51fed 100644
--- a/freebsd/sys/vm/vm.h
+++ b/freebsd/sys/vm/vm.h
@@ -109,8 +109,9 @@ typedef struct vm_object *vm_object_t;
typedef int boolean_t;
/*
- * The exact set of memory attributes is machine dependent. However, every
- * machine is required to define VM_MEMATTR_DEFAULT.
+ * The exact set of memory attributes is machine dependent. However,
+ * every machine is required to define VM_MEMATTR_DEFAULT and
+ * VM_MEMATTR_UNCACHEABLE.
*/
typedef char vm_memattr_t; /* memory attribute codes */
@@ -134,10 +135,6 @@ struct kva_md_info {
vm_offset_t buffer_eva;
vm_offset_t clean_sva;
vm_offset_t clean_eva;
- vm_offset_t pager_sva;
- vm_offset_t pager_eva;
- vm_offset_t bio_transient_sva;
- vm_offset_t bio_transient_eva;
};
extern struct kva_md_info kmi;
diff --git a/freebsd/sys/vm/vm_extern.h b/freebsd/sys/vm/vm_extern.h
index 3b5be268..dcb2f3a6 100644
--- a/freebsd/sys/vm/vm_extern.h
+++ b/freebsd/sys/vm/vm_extern.h
@@ -33,30 +33,46 @@
#ifndef _VM_EXTERN_H_
#define _VM_EXTERN_H_
+struct pmap;
struct proc;
struct vmspace;
struct vnode;
+struct vmem;
#ifdef _KERNEL
+struct cdev;
+struct cdevsw;
-int kernacc(void *, int, int);
-vm_offset_t kmem_alloc(vm_map_t, vm_size_t);
-vm_offset_t kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags,
+/* These operate on kernel virtual addresses only. */
+vm_offset_t kva_alloc(vm_size_t);
+void kva_free(vm_offset_t, vm_size_t);
+
+/* These operate on pageable virtual addresses. */
+vm_offset_t kmap_alloc_wait(vm_map_t, vm_size_t);
+void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t);
+
+/* These operate on virtual addresses backed by memory. */
+vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr);
-vm_offset_t kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags,
- vm_paddr_t low, vm_paddr_t high, u_long alignment, u_long boundary,
+vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags,
+ vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr);
-vm_offset_t kmem_alloc_nofault(vm_map_t, vm_size_t);
-vm_offset_t kmem_alloc_nofault_space(vm_map_t, vm_size_t, int);
-vm_offset_t kmem_alloc_wait(vm_map_t, vm_size_t);
-void kmem_free(vm_map_t, vm_offset_t, vm_size_t);
-void kmem_free_wakeup(vm_map_t, vm_offset_t, vm_size_t);
-void kmem_init(vm_offset_t, vm_offset_t);
-vm_offset_t kmem_malloc(vm_map_t map, vm_size_t size, int flags);
-int kmem_back(vm_map_t, vm_offset_t, vm_size_t, int);
+vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags);
+void kmem_free(struct vmem *, vm_offset_t, vm_size_t);
+
+/* This provides memory for previously allocated address space. */
+int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int);
+void kmem_unback(vm_object_t, vm_offset_t, vm_size_t);
+
+/* Bootstrapping. */
vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t,
boolean_t);
+void kmem_init(vm_offset_t, vm_offset_t);
+void kmem_init_zero_region(void);
+void kmeminit(void);
+
void swapout_procs(int);
+int kernacc(void *, int, int);
int useracc(void *, int, int);
int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
@@ -71,13 +87,22 @@ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
#endif /* __rtems__ */
void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
-int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int);
+int vm_forkproc(struct thread *, struct proc *, struct thread *,
+ struct vmspace *, int);
void vm_waitproc(struct proc *);
-int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t);
+int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int,
+ objtype_t, void *, vm_ooffset_t);
+int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t,
+ vm_prot_t, int, vm_object_t, vm_ooffset_t, boolean_t, struct thread *);
int vm_mmap_to_errno(int rv);
+int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
+ int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *);
+int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *,
+ struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
void vm_set_page_size(void);
void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t);
-struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t);
+typedef int (*pmap_pinit_t)(struct pmap *pmap);
+struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t, pmap_pinit_t);
struct vmspace *vmspace_fork(struct vmspace *, vm_ooffset_t *);
int vmspace_exec(struct proc *, vm_offset_t, vm_offset_t);
int vmspace_unshare(struct proc *);
@@ -85,6 +110,7 @@ void vmspace_exit(struct thread *);
struct vmspace *vmspace_acquire_ref(struct proc *);
void vmspace_free(struct vmspace *);
void vmspace_exitfree(struct proc *);
+void vmspace_switch_aio(struct vmspace *);
void vnode_pager_setsize(struct vnode *, vm_ooffset_t);
#ifndef __rtems__
int vslock(void *, size_t);
@@ -110,5 +136,6 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
void vm_imgact_unmap_page(struct sf_buf *sf);
void vm_thread_dispose(struct thread *td);
int vm_thread_new(struct thread *td, int pages);
+int vm_mlock(struct proc *, struct ucred *, const void *, size_t);
#endif /* _KERNEL */
#endif /* !_VM_EXTERN_H_ */
diff --git a/freebsd/sys/x86/include/machine/bus.h b/freebsd/sys/x86/include/machine/bus.h
index 58e98769..91de8cb2 100644
--- a/freebsd/sys/x86/include/machine/bus.h
+++ b/freebsd/sys/x86/include/machine/bus.h
@@ -123,38 +123,22 @@
#define BUS_SPACE_MAXADDR 0xFFFFFFFF
#endif
+#define BUS_SPACE_INVALID_DATA (~0)
#define BUS_SPACE_UNRESTRICTED (~0)
/*
* Map a region of device bus space into CPU virtual address space.
*/
-static __inline int bus_space_map(bus_space_tag_t t, bus_addr_t addr,
- bus_size_t size, int flags,
- bus_space_handle_t *bshp);
-
-static __inline int
-bus_space_map(bus_space_tag_t t __unused, bus_addr_t addr,
- bus_size_t size __unused, int flags __unused,
- bus_space_handle_t *bshp)
-{
-
- *bshp = addr;
- return (0);
-}
+int bus_space_map(bus_space_tag_t tag, bus_addr_t addr, bus_size_t size,
+ int flags, bus_space_handle_t *bshp);
/*
* Unmap a region of device bus space.
*/
-static __inline void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh,
- bus_size_t size);
-
-static __inline void
-bus_space_unmap(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused,
- bus_size_t size __unused)
-{
-}
+void bus_space_unmap(bus_space_tag_t tag, bus_space_handle_t bsh,
+ bus_size_t size);
/*
* Get a new handle for a subregion of an already-mapped area of bus space.
@@ -214,6 +198,12 @@ static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag,
bus_space_handle_t handle,
bus_size_t offset);
+#ifdef __amd64__
+static __inline uint64_t bus_space_read_8(bus_space_tag_t tag,
+ bus_space_handle_t handle,
+ bus_size_t offset);
+#endif
+
static __inline u_int8_t
bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle,
bus_size_t offset)
@@ -244,8 +234,16 @@ bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle,
return (*(volatile u_int32_t *)(handle + offset));
}
-#if 0 /* Cause a link error for bus_space_read_8 */
-#define bus_space_read_8(t, h, o) !!! bus_space_read_8 unimplemented !!!
+#ifdef __amd64__
+static __inline uint64_t
+bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
+ bus_size_t offset)
+{
+
+ if (tag == X86_BUS_SPACE_IO) /* No 8 byte IO space access on x86 */
+ return (BUS_SPACE_INVALID_DATA);
+ return (*(volatile uint64_t *)(handle + offset));
+}
#endif
/*
@@ -472,6 +470,12 @@ static __inline void bus_space_write_4(bus_space_tag_t tag,
bus_space_handle_t bsh,
bus_size_t offset, u_int32_t value);
+#ifdef __amd64__
+static __inline void bus_space_write_8(bus_space_tag_t tag,
+ bus_space_handle_t bsh,
+ bus_size_t offset, uint64_t value);
+#endif
+
static __inline void
bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh,
bus_size_t offset, u_int8_t value)
@@ -505,8 +509,17 @@ bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh,
*(volatile u_int32_t *)(bsh + offset) = value;
}
-#if 0 /* Cause a link error for bus_space_write_8 */
-#define bus_space_write_8 !!! bus_space_write_8 not implemented !!!
+#ifdef __amd64__
+static __inline void
+bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh,
+ bus_size_t offset, uint64_t value)
+{
+
+ if (tag == X86_BUS_SPACE_IO) /* No 8 byte IO space access on x86 */
+ return;
+ else
+ *(volatile uint64_t *)(bsh + offset) = value;
+}
#endif
/*
diff --git a/freebsd/sys/x86/include/machine/pci_cfgreg.h b/freebsd/sys/x86/include/machine/pci_cfgreg.h
index ea5e3198..733b91c4 100644
--- a/freebsd/sys/x86/include/machine/pci_cfgreg.h
+++ b/freebsd/sys/x86/include/machine/pci_cfgreg.h
@@ -46,7 +46,7 @@
#define CONF2_ENABLE_CHK 0x0e
#define CONF2_ENABLE_RES 0x0e
-u_long hostb_alloc_start(int type, u_long start, u_long end, u_long count);
+rman_res_t hostb_alloc_start(int type, rman_res_t start, rman_res_t end, rman_res_t count);
int pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
int pci_cfgregopen(void);
u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes);
diff --git a/freebsd/sys/x86/pci/pci_bus.c b/freebsd/sys/x86/pci/pci_bus.c
index c14b17ff..1b43f53f 100644
--- a/freebsd/sys/x86/pci/pci_bus.c
+++ b/freebsd/sys/x86/pci/pci_bus.c
@@ -47,7 +47,7 @@ __FBSDID("$FreeBSD$");
#ifdef CPU_ELAN
#include <machine/md_var.h>
#endif
-#include <machine/legacyvar.h>
+#include <x86/legacyvar.h>
#include <machine/pci_cfgreg.h>
#include <machine/resource.h>
@@ -94,7 +94,7 @@ legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin)
/* Pass MSI requests up to the nexus. */
-static int
+int
legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount,
int *irqs)
{
@@ -105,7 +105,7 @@ legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount,
irqs));
}
-static int
+int
legacy_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
{
device_t bus;
@@ -135,7 +135,6 @@ legacy_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
slot, func));
pci_ht_map_msi(hostb, *addr);
return (0);
-
}
static const char *
@@ -527,7 +526,7 @@ legacy_pcib_attach(device_t dev)
device_probe_and_attach(pir);
}
#endif
- device_add_child(dev, "pci", bus);
+ device_add_child(dev, "pci", -1);
return bus_generic_attach(dev);
}
@@ -578,12 +577,11 @@ legacy_pcib_write_ivar(device_t dev, device_t child, int which,
SYSCTL_DECL(_hw_pci);
static unsigned long host_mem_start = 0x80000000;
-TUNABLE_ULONG("hw.pci.host_mem_start", &host_mem_start);
SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start,
0, "Limit the host bridge memory to being above this address.");
-u_long
-hostb_alloc_start(int type, u_long start, u_long end, u_long count)
+rman_res_t
+hostb_alloc_start(int type, rman_res_t start, rman_res_t end, rman_res_t count)
{
if (start + count - 1 != end) {
@@ -597,20 +595,41 @@ hostb_alloc_start(int type, u_long start, u_long end, u_long count)
struct resource *
legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
-#if defined(__rtems__) && defined(__i386__)
- /*
- * FIXME: This is a quick and dirty hack. See pci_reserve_map().
- */
-#else /* __rtems__ */
- start = hostb_alloc_start(type, start, end, count);
-#endif /* __rtems__ */
- return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
- count, flags));
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+ if (type == PCI_RES_BUS)
+ return (pci_domain_alloc_bus(0, child, rid, start, end, count,
+ flags));
+#endif
+ start = hostb_alloc_start(type, start, end, count);
+ return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
+ count, flags));
}
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+int
+legacy_pcib_adjust_resource(device_t dev, device_t child, int type,
+ struct resource *r, rman_res_t start, rman_res_t end)
+{
+
+ if (type == PCI_RES_BUS)
+ return (pci_domain_adjust_bus(0, child, r, start, end));
+ return (bus_generic_adjust_resource(dev, child, type, r, start, end));
+}
+
+int
+legacy_pcib_release_resource(device_t dev, device_t child, int type, int rid,
+ struct resource *r)
+{
+
+ if (type == PCI_RES_BUS)
+ return (pci_domain_release_bus(0, child, rid, r));
+ return (bus_generic_release_resource(dev, child, type, rid, r));
+}
+#endif
+
static device_method_t legacy_pcib_methods[] = {
/* Device interface */
DEVMETHOD(device_identify, legacy_pcib_identify),
@@ -624,8 +643,13 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar),
DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar),
DEVMETHOD(bus_alloc_resource, legacy_pcib_alloc_resource),
+#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
+ DEVMETHOD(bus_adjust_resource, legacy_pcib_adjust_resource),
+ DEVMETHOD(bus_release_resource, legacy_pcib_release_resource),
+#else
DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource),
DEVMETHOD(bus_release_resource, bus_generic_release_resource),
+#endif
DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
DEVMETHOD(bus_setup_intr, bus_generic_setup_intr),
diff --git a/freebsd/usr.bin/netstat/bpf.c b/freebsd/usr.bin/netstat/bpf.c
index 97f2fd81..6b8cb819 100644
--- a/freebsd/usr.bin/netstat/bpf.c
+++ b/freebsd/usr.bin/netstat/bpf.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 2005 Christian S.J. Peron
* All rights reserved.
@@ -26,6 +30,9 @@
* SUCH DAMAGE.
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -38,7 +45,6 @@ __FBSDID("$FreeBSD$");
#include <sys/user.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <net/bpf.h>
#include <net/bpfdesc.h>
#include <arpa/inet.h>
@@ -48,10 +54,15 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
#include <unistd.h>
+#include <libxo/xo.h>
#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-bpf-data.h"
+#endif /* __rtems__ */
/* print bpf stats */
@@ -70,7 +81,7 @@ bpf_pidname(pid_t pid)
size = sizeof(newkp);
error = sysctl(mib, 4, &newkp, &size, NULL, 0);
if (error < 0) {
- warn("kern.proc.pid failed");
+ xo_warn("kern.proc.pid failed");
return (strdup("??????"));
}
return (strdup(newkp.ki_comm));
@@ -92,6 +103,23 @@ bpf_flags(struct xbpf_d *bd, char *flagbuf)
#endif /* __rtems__ */
*flagbuf++ = bd->bd_locked ? 'l' : '-';
*flagbuf++ = '\0';
+
+ if (bd->bd_promisc)
+ xo_emit("{e:promiscuous/}");
+ if (bd->bd_immediate)
+ xo_emit("{e:immediate/}");
+ if (bd->bd_hdrcmplt)
+ xo_emit("{e:header-complete/}");
+ xo_emit("{e:direction}", (bd->bd_direction == BPF_D_IN) ? "input" :
+ (bd->bd_direction == BPF_D_OUT) ? "output" : "bidirectional");
+ if (bd->bd_feedback)
+ xo_emit("{e:feedback/}");
+#ifndef __rtems__
+ if (bd->bd_async)
+ xo_emit("{e:async/}");
+#endif /* __rtems__ */
+ if (bd->bd_locked)
+ xo_emit("{e:locked/}");
}
void
@@ -105,50 +133,61 @@ bpf_stats(char *ifname)
bzero(&zerostat, sizeof(zerostat));
if (sysctlbyname("net.bpf.stats", NULL, NULL,
&zerostat, sizeof(zerostat)) < 0)
- warn("failed to zero bpf counters");
+ xo_warn("failed to zero bpf counters");
return;
}
if (sysctlbyname("net.bpf.stats", NULL, &size,
NULL, 0) < 0) {
- warn("net.bpf.stats");
+ xo_warn("net.bpf.stats");
return;
}
if (size == 0)
return;
bd = malloc(size);
if (bd == NULL) {
- warn("malloc failed");
+ xo_warn("malloc failed");
return;
}
if (sysctlbyname("net.bpf.stats", bd, &size,
NULL, 0) < 0) {
- warn("net.bpf.stats");
+ xo_warn("net.bpf.stats");
free(bd);
return;
}
- (void) printf("%5s %6s %7s %9s %9s %9s %5s %5s %s\n",
- "Pid", "Netif", "Flags", "Recv", "Drop", "Match", "Sblen",
- "Hblen", "Command");
+ xo_emit("{T:/%5s} {T:/%6s} {T:/%7s} {T:/%9s} {T:/%9s} {T:/%9s} "
+ "{T:/%5s} {T:/%5s} {T:/%s}\n",
+ "Pid", "Netif", "Flags", "Recv", "Drop", "Match",
+ "Sblen", "Hblen", "Command");
+ xo_open_container("bpf-statistics");
+ xo_open_list("bpf-entry");
for (d = &bd[0]; d < &bd[size / sizeof(*d)]; d++) {
if (d->bd_structsize != sizeof(*d)) {
- warnx("bpf_stats_extended: version mismatch");
+ xo_warnx("bpf_stats_extended: version mismatch");
return;
}
if (ifname && strcmp(ifname, d->bd_ifname) != 0)
continue;
- bpf_flags(d, flagbuf);
+ xo_open_instance("bpf-entry");
#ifndef __rtems__
pname = bpf_pidname(d->bd_pid);
#else /* __rtems__ */
pname = "??????";
#endif /* __rtems__ */
- (void) printf("%5d %6s %7s %9ju %9ju %9ju %5d %5d %s\n",
- d->bd_pid, d->bd_ifname, flagbuf,
- d->bd_rcount, d->bd_dcount, d->bd_fcount,
- d->bd_slen, d->bd_hlen, pname);
+ xo_emit("{k:pid/%5d} {k:interface-name/%6s} ",
+ d->bd_pid, d->bd_ifname);
+ bpf_flags(d, flagbuf);
+ xo_emit("{d:flags/%7s} {:received-packets/%9ju} "
+ "{:dropped-packets/%9ju} {:filter-packets/%9ju} "
+ "{:store-buffer-length/%5d} {:hold-buffer-length/%5d} "
+ "{:process/%s}\n",
+ flagbuf, (uintmax_t)d->bd_rcount, (uintmax_t)d->bd_dcount,
+ (uintmax_t)d->bd_fcount, d->bd_slen, d->bd_hlen, pname);
#ifndef __rtems__
free(pname);
#endif /* __rtems__ */
+ xo_close_instance("bpf-entry");
}
+ xo_close_list("bpf-entry");
+ xo_close_container("bpf-statistics");
free(bd);
}
diff --git a/freebsd/usr.bin/netstat/flowtable.c b/freebsd/usr.bin/netstat/flowtable.c
new file mode 100644
index 00000000..fda45657
--- /dev/null
+++ b/freebsd/usr.bin/netstat/flowtable.c
@@ -0,0 +1,100 @@
+#include <machine/rtems-bsd-user-space.h>
+
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+
+#include <net/flowtable.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-flowtable-data.h"
+#endif /* __rtems__ */
+
+/*
+ * Print flowtable statistics.
+ */
+
+static void
+print_stats(struct flowtable_stat *stat)
+{
+
+#define p(f, m) if (stat->f || sflag <= 1) \
+ printf(m, (uintmax_t)stat->f, plural(stat->f))
+#define p2(f, m) if (stat->f || sflag <= 1) \
+ printf(m, (uintmax_t)stat->f, plurales(stat->f))
+
+ p(ft_lookups, "\t%ju lookup%s\n");
+ p(ft_hits, "\t%ju hit%s\n");
+ p2(ft_misses, "\t%ju miss%s\n");
+ p(ft_inserts, "\t%ju insert%s\n");
+ p(ft_collisions, "\t%ju collision%s\n");
+ p(ft_free_checks, "\t%ju free check%s\n");
+ p(ft_frees, "\t%ju free%s\n");
+ p(ft_fail_lle_invalid,
+ "\t%ju lookup%s with not resolved Layer 2 address\n");
+
+#undef p2
+#undef p
+}
+
+void
+flowtable_stats(void)
+{
+ struct flowtable_stat stat;
+
+ if (!live)
+ return;
+
+ if (fetch_stats("net.flowtable.ip4.stat", 0, &stat,
+ sizeof(stat), NULL) == 0) {
+ printf("flowtable for IPv4:\n");
+ print_stats(&stat);
+ }
+
+ if (fetch_stats("net.flowtable.ip6.stat", 0, &stat,
+ sizeof(stat), NULL) == 0) {
+ printf("flowtable for IPv6:\n");
+ print_stats(&stat);
+ }
+}
diff --git a/freebsd/usr.bin/netstat/if.c b/freebsd/usr.bin/netstat/if.c
index 9edff32f..384c8f20 100644
--- a/freebsd/usr.bin/netstat/if.c
+++ b/freebsd/usr.bin/netstat/if.c
@@ -1,6 +1,11 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
+ * Copyright (c) 2013 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1983, 1988, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -36,66 +41,103 @@ static char sccsid[] = "@(#)if.c 8.3 (Berkeley) 4/28/95";
#endif
#ifdef __rtems__
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
+#include <rtems/bsd/sys/param.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-#include <sys/sysctl.h>
#include <sys/time.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/ethernet.h>
-#include <net/pfvar.h>
-#include <net/if_pfsync.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
-#ifndef __rtems__
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif /* __rtems__ */
#include <arpa/inet.h>
+#ifdef PF
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+#endif
#include <err.h>
#include <errno.h>
+#include <ifaddrs.h>
#include <libutil.h>
#ifdef INET6
#include <netdb.h>
#endif
#include <signal.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <unistd.h>
+#include <libxo/xo.h>
#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-if-data.h"
+#endif /* __rtems__ */
-#define YES 1
-#define NO 0
+static void sidewaysintpr(void);
+
+#ifdef PF
+static const char* pfsyncacts[] = {
+ /* PFSYNC_ACT_CLR */ "clear all request",
+ /* PFSYNC_ACT_INS */ "state insert",
+ /* PFSYNC_ACT_INS_ACK */ "state inserted ack",
+ /* PFSYNC_ACT_UPD */ "state update",
+ /* PFSYNC_ACT_UPD_C */ "compressed state update",
+ /* PFSYNC_ACT_UPD_REQ */ "uncompressed state request",
+ /* PFSYNC_ACT_DEL */ "state delete",
+ /* PFSYNC_ACT_DEL_C */ "compressed state delete",
+ /* PFSYNC_ACT_INS_F */ "fragment insert",
+ /* PFSYNC_ACT_DEL_F */ "fragment delete",
+ /* PFSYNC_ACT_BUS */ "bulk update mark",
+ /* PFSYNC_ACT_TDB */ "TDB replay counter update",
+ /* PFSYNC_ACT_EOF */ "end of frame mark",
+};
-static void sidewaysintpr(int, u_long);
-static void catchalarm(int);
+static const char* pfsyncacts_name[] = {
+ /* PFSYNC_ACT_CLR */ "clear-all-request",
+ /* PFSYNC_ACT_INS */ "state-insert",
+ /* PFSYNC_ACT_INS_ACK */ "state-inserted-ack",
+ /* PFSYNC_ACT_UPD */ "state-update",
+ /* PFSYNC_ACT_UPD_C */ "compressed-state-update",
+ /* PFSYNC_ACT_UPD_REQ */ "uncompressed-state-request",
+ /* PFSYNC_ACT_DEL */ "state-delete",
+ /* PFSYNC_ACT_DEL_C */ "compressed-state-delete",
+ /* PFSYNC_ACT_INS_F */ "fragment-insert",
+ /* PFSYNC_ACT_DEL_F */ "fragment-delete",
+ /* PFSYNC_ACT_BUS */ "bulk-update-mark",
+ /* PFSYNC_ACT_TDB */ "TDB-replay-counter-update",
+ /* PFSYNC_ACT_EOF */ "end-of-frame-mark",
+};
-#ifdef INET6
-static char addr_buf[NI_MAXHOST]; /* for getnameinfo() */
-#endif
+static void
+pfsync_acts_stats(const char *list, const char *desc, uint64_t *a)
+{
+ int i;
+
+ xo_open_list(list);
+ for (i = 0; i < PFSYNC_ACT_MAX; i++, a++) {
+ if (*a || sflag <= 1) {
+ xo_open_instance(list);
+ xo_emit("\t\t{e:name}{:count/%ju} {N:/%s%s %s}\n",
+ pfsyncacts_name[i], (uintmax_t)(*a),
+ pfsyncacts[i], plural(*a), desc);
+ xo_close_instance(list);
+ }
+ }
+ xo_close_list(list);
+}
/*
* Dump pfsync statistics structure.
@@ -103,56 +145,68 @@ static char addr_buf[NI_MAXHOST]; /* for getnameinfo() */
void
pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct pfsyncstats pfsyncstat, zerostat;
- size_t len = sizeof(struct pfsyncstats);
-
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.pfsync.stats", &pfsyncstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet.pfsync.stats");
- return;
- }
- } else
- kread(off, &pfsyncstat, len);
+ struct pfsyncstats pfsyncstat;
- printf("%s:\n", name);
+ if (fetch_stats("net.pfsync.stats", off, &pfsyncstat,
+ sizeof(pfsyncstat), kread) != 0)
+ return;
+
+ xo_emit("{T:/%s}:\n", name);
+ xo_open_container(name);
#define p(f, m) if (pfsyncstat.f || sflag <= 1) \
- printf(m, (uintmax_t)pfsyncstat.f, plural(pfsyncstat.f))
-#define p2(f, m) if (pfsyncstat.f || sflag <= 1) \
- printf(m, (uintmax_t)pfsyncstat.f)
-
- p(pfsyncs_ipackets, "\t%ju packet%s received (IPv4)\n");
- p(pfsyncs_ipackets6, "\t%ju packet%s received (IPv6)\n");
- p(pfsyncs_badif, "\t\t%ju packet%s discarded for bad interface\n");
- p(pfsyncs_badttl, "\t\t%ju packet%s discarded for bad ttl\n");
- p(pfsyncs_hdrops, "\t\t%ju packet%s shorter than header\n");
- p(pfsyncs_badver, "\t\t%ju packet%s discarded for bad version\n");
- p(pfsyncs_badauth, "\t\t%ju packet%s discarded for bad HMAC\n");
- p(pfsyncs_badact,"\t\t%ju packet%s discarded for bad action\n");
- p(pfsyncs_badlen, "\t\t%ju packet%s discarded for short packet\n");
- p(pfsyncs_badval, "\t\t%ju state%s discarded for bad values\n");
- p(pfsyncs_stale, "\t\t%ju stale state%s\n");
- p(pfsyncs_badstate, "\t\t%ju failed state lookup/insert%s\n");
- p(pfsyncs_opackets, "\t%ju packet%s sent (IPv4)\n");
- p(pfsyncs_opackets6, "\t%ju packet%s sent (IPv6)\n");
- p2(pfsyncs_onomem, "\t\t%ju send failed due to mbuf memory error\n");
- p2(pfsyncs_oerrors, "\t\t%ju send error\n");
+ xo_emit(m, (uintmax_t)pfsyncstat.f, plural(pfsyncstat.f))
+
+ p(pfsyncs_ipackets, "\t{:received-inet-packets/%ju} "
+ "{N:/packet%s received (IPv4)}\n");
+ p(pfsyncs_ipackets6, "\t{:received-inet6-packets/%ju} "
+ "{N:/packet%s received (IPv6)}\n");
+ pfsync_acts_stats("input-histogram", "received",
+ &pfsyncstat.pfsyncs_iacts[0]);
+ p(pfsyncs_badif, "\t\t/{:dropped-bad-interface/%ju} "
+ "{N:/packet%s discarded for bad interface}\n");
+ p(pfsyncs_badttl, "\t\t{:dropped-bad-ttl/%ju} "
+ "{N:/packet%s discarded for bad ttl}\n");
+ p(pfsyncs_hdrops, "\t\t{:dropped-short-header/%ju} "
+ "{N:/packet%s shorter than header}\n");
+ p(pfsyncs_badver, "\t\t{:dropped-bad-version/%ju} "
+ "{N:/packet%s discarded for bad version}\n");
+ p(pfsyncs_badauth, "\t\t{:dropped-bad-auth/%ju} "
+ "{N:/packet%s discarded for bad HMAC}\n");
+ p(pfsyncs_badact,"\t\t{:dropped-bad-action/%ju} "
+ "{N:/packet%s discarded for bad action}\n");
+ p(pfsyncs_badlen, "\t\t{:dropped-short/%ju} "
+ "{N:/packet%s discarded for short packet}\n");
+ p(pfsyncs_badval, "\t\t{:dropped-bad-values/%ju} "
+ "{N:/state%s discarded for bad values}\n");
+ p(pfsyncs_stale, "\t\t{:dropped-stale-state/%ju} "
+ "{N:/stale state%s}\n");
+ p(pfsyncs_badstate, "\t\t{:dropped-failed-lookup/%ju} "
+ "{N:/failed state lookup\\/insert%s}\n");
+ p(pfsyncs_opackets, "\t{:sent-inet-packets/%ju} "
+ "{N:/packet%s sent (IPv4})\n");
+ p(pfsyncs_opackets6, "\t{:send-inet6-packets/%ju} "
+ "{N:/packet%s sent (IPv6})\n");
+ pfsync_acts_stats("output-histogram", "sent",
+ &pfsyncstat.pfsyncs_oacts[0]);
+ p(pfsyncs_onomem, "\t\t{:discarded-no-memory/%ju} "
+ "{N:/failure%s due to mbuf memory error}\n");
+ p(pfsyncs_oerrors, "\t\t{:send-errors/%ju} "
+ "{N:/send error%s}\n");
#undef p
-#undef p2
+ xo_close_container(name);
}
+#endif /* PF */
/*
* Display a formatted value, or a '-' in the same space.
*/
static void
-show_stat(const char *fmt, int width, u_long value, short showvalue)
+show_stat(const char *fmt, int width, const char *name,
+ u_long value, short showvalue, int div1000)
{
const char *lsep, *rsep;
- char newfmt[32];
+ char newfmt[64];
lsep = "";
if (strncmp(fmt, "LS", 2) == 0) {
@@ -166,542 +220,403 @@ show_stat(const char *fmt, int width, u_long value, short showvalue)
}
if (showvalue == 0) {
/* Print just dash. */
- sprintf(newfmt, "%s%%%ds%s", lsep, width, rsep);
- printf(newfmt, "-");
+ xo_emit("{P:/%s}{D:/%*s}{P:/%s}", lsep, width, "-", rsep);
return;
}
+ /*
+ * XXX: workaround {P:} modifier can't be empty and doesn't seem to
+ * take args... so we need to conditionally include it in the format.
+ */
+#define maybe_pad(pad) do { \
+ if (strlen(pad)) { \
+ snprintf(newfmt, sizeof(newfmt), "{P:%s}", pad); \
+ xo_emit(newfmt); \
+ } \
+} while (0)
+
if (hflag) {
char buf[5];
/* Format in human readable form. */
humanize_number(buf, sizeof(buf), (int64_t)value, "",
- HN_AUTOSCALE, HN_NOSPACE | HN_DECIMAL);
- sprintf(newfmt, "%s%%%ds%s", lsep, width, rsep);
- printf(newfmt, buf);
+ HN_AUTOSCALE, HN_NOSPACE | HN_DECIMAL | \
+ ((div1000) ? HN_DIVISOR_1000 : 0));
+ maybe_pad(lsep);
+ snprintf(newfmt, sizeof(newfmt), "{:%s/%%%ds}", name, width);
+ xo_emit(newfmt, buf);
+ maybe_pad(rsep);
} else {
/* Construct the format string. */
- sprintf(newfmt, "%s%%%d%s%s", lsep, width, fmt, rsep);
- printf(newfmt, value);
+ maybe_pad(lsep);
+ snprintf(newfmt, sizeof(newfmt), "{:%s/%%%d%s}",
+ name, width, fmt);
+ xo_emit(newfmt, value);
+ maybe_pad(rsep);
+ }
+}
+
+/*
+ * Find next multiaddr for a given interface name.
+ */
+static struct ifmaddrs *
+next_ifma(struct ifmaddrs *ifma, const char *name, const sa_family_t family)
+{
+
+ for(; ifma != NULL; ifma = ifma->ifma_next) {
+ struct sockaddr_dl *sdl;
+
+ sdl = (struct sockaddr_dl *)ifma->ifma_name;
+ if (ifma->ifma_addr->sa_family == family &&
+ strcmp(sdl->sdl_data, name) == 0)
+ break;
}
+
+ return (ifma);
}
/*
* Print a description of the network interfaces.
*/
void
-intpr(int interval1, u_long ifnetaddr, void (*pfunc)(char *))
+intpr(void (*pfunc)(char *), int af)
{
- struct ifnet ifnet;
- struct ifnethead ifnethead;
- union {
- struct ifaddr ifa;
- struct in_ifaddr in;
-#ifdef INET6
- struct in6_ifaddr in6;
-#endif
-#ifndef __rtems__
- struct ipx_ifaddr ipx;
-#endif /* __rtems__ */
- } ifaddr;
- u_long ifaddraddr;
- u_long ifaddrfound;
- u_long opackets;
- u_long ipackets;
- u_long obytes;
- u_long ibytes;
- u_long omcasts;
- u_long imcasts;
- u_long oerrors;
- u_long ierrors;
- u_long idrops;
- u_long collisions;
- int drops;
- struct sockaddr *sa = NULL;
- char name[IFNAMSIZ];
- short network_layer;
- short link_layer;
-
- if (ifnetaddr == 0) {
- printf("ifnet: symbol not defined\n");
- return;
- }
- if (interval1) {
- sidewaysintpr(interval1, ifnetaddr);
- return;
+ struct ifaddrs *ifap, *ifa;
+ struct ifmaddrs *ifmap, *ifma;
+ u_int ifn_len_max = 5, ifn_len;
+ u_int has_ipv6 = 0, net_len = 13, addr_len = 17;
+
+ if (interval)
+ return sidewaysintpr();
+
+ if (getifaddrs(&ifap) != 0)
+ err(EX_OSERR, "getifaddrs");
+ if (aflag && getifmaddrs(&ifmap) != 0)
+ err(EX_OSERR, "getifmaddrs");
+
+ if (Wflag) {
+ for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
+ if (interface != NULL &&
+ strcmp(ifa->ifa_name, interface) != 0)
+ continue;
+ if (af != AF_UNSPEC && ifa->ifa_addr->sa_family != af)
+ continue;
+ ifn_len = strlen(ifa->ifa_name);
+ if ((ifa->ifa_flags & IFF_UP) == 0)
+ ++ifn_len;
+ ifn_len_max = MAX(ifn_len_max, ifn_len);
+ if (ifa->ifa_addr->sa_family == AF_INET6)
+ has_ipv6 = 1;
+ }
+ if (has_ipv6) {
+ net_len = 24;
+ addr_len = 39;
+ } else
+ net_len = 18;
}
- if (kread(ifnetaddr, (char *)&ifnethead, sizeof ifnethead) != 0)
- return;
- ifnetaddr = (u_long)TAILQ_FIRST(&ifnethead);
- if (kread(ifnetaddr, (char *)&ifnet, sizeof ifnet) != 0)
- return;
+ xo_open_list("interface");
if (!pfunc) {
- if (Wflag)
- printf("%-7.7s", "Name");
- else
- printf("%-5.5s", "Name");
- printf(" %5.5s %-13.13s %-17.17s %8.8s %5.5s %5.5s",
- "Mtu", "Network", "Address", "Ipkts", "Ierrs", "Idrop");
+ xo_emit("{T:/%-*.*s}", ifn_len_max, ifn_len_max, "Name");
+ xo_emit(" {T:/%5.5s} {T:/%-*.*s} {T:/%-*.*s} {T:/%8.8s} "
+ "{T:/%5.5s} {T:/%5.5s}",
+ "Mtu", net_len, net_len, "Network", addr_len, addr_len,
+ "Address", "Ipkts", "Ierrs", "Idrop");
if (bflag)
- printf(" %10.10s","Ibytes");
- printf(" %8.8s %5.5s", "Opkts", "Oerrs");
+ xo_emit(" {T:/%10.10s}","Ibytes");
+ xo_emit(" {T:/%8.8s} {T:/%5.5s}", "Opkts", "Oerrs");
if (bflag)
- printf(" %10.10s","Obytes");
- printf(" %5s", "Coll");
+ xo_emit(" {T:/%10.10s}","Obytes");
+ xo_emit(" {T:/%5s}", "Coll");
if (dflag)
- printf(" %s", "Drop");
- putchar('\n');
+ xo_emit(" {T:/%5.5s}", "Drop");
+ xo_emit("\n");
}
- ifaddraddr = 0;
- while (ifnetaddr || ifaddraddr) {
- struct sockaddr_in *sockin;
-#ifdef INET6
- struct sockaddr_in6 *sockin6;
-#endif
- char *cp;
- int n, m;
-
- network_layer = 0;
- link_layer = 0;
-
- if (ifaddraddr == 0) {
- if (kread(ifnetaddr, (char *)&ifnet, sizeof ifnet) != 0)
- return;
- strlcpy(name, ifnet.if_xname, sizeof(name));
- ifnetaddr = (u_long)TAILQ_NEXT(&ifnet, if_link);
- if (interface != 0 && strcmp(name, interface) != 0)
- continue;
- cp = index(name, '\0');
- if (pfunc) {
- (*pfunc)(name);
- continue;
- }
+ for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
+ bool network = false, link = false;
+ char *name, *xname, buf[IFNAMSIZ+1];
+ const char *nn, *rn;
- if ((ifnet.if_flags&IFF_UP) == 0)
- *cp++ = '*';
- *cp = '\0';
- ifaddraddr = (u_long)TAILQ_FIRST(&ifnet.if_addrhead);
- }
- ifaddrfound = ifaddraddr;
+ if (interface != NULL && strcmp(ifa->ifa_name, interface) != 0)
+ continue;
- /*
- * Get the interface stats. These may get
- * overriden below on a per-interface basis.
- */
- opackets = ifnet.if_opackets;
- ipackets = ifnet.if_ipackets;
- obytes = ifnet.if_obytes;
- ibytes = ifnet.if_ibytes;
- omcasts = ifnet.if_omcasts;
- imcasts = ifnet.if_imcasts;
- oerrors = ifnet.if_oerrors;
- ierrors = ifnet.if_ierrors;
- idrops = ifnet.if_iqdrops;
- collisions = ifnet.if_collisions;
- drops = ifnet.if_snd.ifq_drops;
-
- if (ifaddraddr == 0) {
- if (Wflag)
- printf("%-7.7s", name);
- else
- printf("%-5.5s", name);
- printf(" %5lu ", ifnet.if_mtu);
- printf("%-13.13s ", "none");
- printf("%-17.17s ", "none");
- } else {
- if (kread(ifaddraddr, (char *)&ifaddr, sizeof ifaddr)
- != 0) {
- ifaddraddr = 0;
- continue;
- }
-#define CP(x) ((char *)(x))
- cp = (CP(ifaddr.ifa.ifa_addr) - CP(ifaddraddr)) +
- CP(&ifaddr);
- sa = (struct sockaddr *)cp;
- if (af != AF_UNSPEC && sa->sa_family != af) {
- ifaddraddr =
- (u_long)TAILQ_NEXT(&ifaddr.ifa, ifa_link);
- continue;
- }
- if (Wflag)
- printf("%-7.7s", name);
- else
- printf("%-5.5s", name);
- printf(" %5lu ", ifnet.if_mtu);
- switch (sa->sa_family) {
- case AF_UNSPEC:
- printf("%-13.13s ", "none");
- printf("%-15.15s ", "none");
- break;
- case AF_INET:
- sockin = (struct sockaddr_in *)sa;
-#ifdef notdef
- /* can't use inet_makeaddr because kernel
- * keeps nets unshifted.
- */
- in = inet_makeaddr(ifaddr.in.ia_subnet,
- INADDR_ANY);
- printf("%-13.13s ", netname(in.s_addr,
- ifaddr.in.ia_subnetmask));
-#else
- printf("%-13.13s ",
- netname(htonl(ifaddr.in.ia_subnet),
- ifaddr.in.ia_subnetmask));
-#endif
- printf("%-17.17s ",
- routename(sockin->sin_addr.s_addr));
+ name = ifa->ifa_name;
- network_layer = 1;
- break;
-#ifdef INET6
- case AF_INET6:
- sockin6 = (struct sockaddr_in6 *)sa;
- in6_fillscopeid(&ifaddr.in6.ia_addr);
- printf("%-13.13s ",
- netname6(&ifaddr.in6.ia_addr,
- &ifaddr.in6.ia_prefixmask.sin6_addr));
- in6_fillscopeid(sockin6);
- getnameinfo(sa, sa->sa_len, addr_buf,
- sizeof(addr_buf), 0, 0, NI_NUMERICHOST);
- printf("%-17.17s ", addr_buf);
-
- network_layer = 1;
- break;
-#endif /*INET6*/
-#ifndef __rtems__
- case AF_IPX:
- {
- struct sockaddr_ipx *sipx =
- (struct sockaddr_ipx *)sa;
- u_long net;
- char netnum[10];
-
- *(union ipx_net *) &net = sipx->sipx_addr.x_net;
- sprintf(netnum, "%lx", (u_long)ntohl(net));
- printf("ipx:%-8s ", netnum);
-/* printf("ipx:%-8s ", netname(net, 0L)); */
- printf("%-17s ",
- ipx_phost((struct sockaddr *)sipx));
- }
-
- network_layer = 1;
- break;
-#endif /* __rtems__ */
+ if (pfunc) {
-#ifndef __rtems__
- case AF_APPLETALK:
- printf("atalk:%-12.12s ",atalk_print(sa,0x10) );
- printf("%-11.11s ",atalk_print(sa,0x0b) );
- break;
-#endif /* __rtems__ */
- case AF_LINK:
- {
- struct sockaddr_dl *sdl =
- (struct sockaddr_dl *)sa;
- char linknum[10];
- cp = (char *)LLADDR(sdl);
- n = sdl->sdl_alen;
- sprintf(linknum, "<Link#%d>", sdl->sdl_index);
- m = printf("%-13.13s ", linknum);
- }
- goto hexprint;
- default:
- m = printf("(%d)", sa->sa_family);
- for (cp = sa->sa_len + (char *)sa;
- --cp > sa->sa_data && (*cp == 0);) {}
- n = cp - sa->sa_data + 1;
- cp = sa->sa_data;
- hexprint:
- while ((--n >= 0) && (m < 30))
- m += printf("%02x%c", *cp++ & 0xff,
- n > 0 ? ':' : ' ');
- m = 32 - m;
- while (m-- > 0)
- putchar(' ');
-
- link_layer = 1;
- break;
- }
+ (*pfunc)(name);
/*
- * Fixup the statistics for interfaces that
- * update stats for their network addresses
+ * Skip all ifaddrs belonging to same interface.
*/
- if (network_layer) {
- opackets = ifaddr.in.ia_ifa.if_opackets;
- ipackets = ifaddr.in.ia_ifa.if_ipackets;
- obytes = ifaddr.in.ia_ifa.if_obytes;
- ibytes = ifaddr.in.ia_ifa.if_ibytes;
+ while(ifa->ifa_next != NULL &&
+ (strcmp(ifa->ifa_next->ifa_name, name) == 0)) {
+ ifa = ifa->ifa_next;
}
-
- ifaddraddr = (u_long)TAILQ_NEXT(&ifaddr.ifa, ifa_link);
+ continue;
}
- show_stat("lu", 8, ipackets, link_layer|network_layer);
- show_stat("lu", 5, ierrors, link_layer);
- show_stat("lu", 5, idrops, link_layer);
- if (bflag)
- show_stat("lu", 10, ibytes, link_layer|network_layer);
+ if (af != AF_UNSPEC && ifa->ifa_addr->sa_family != af)
+ continue;
- show_stat("lu", 8, opackets, link_layer|network_layer);
- show_stat("lu", 5, oerrors, link_layer);
- if (bflag)
- show_stat("lu", 10, obytes, link_layer|network_layer);
+ xo_open_instance("interface");
- show_stat("NRSlu", 5, collisions, link_layer);
- if (dflag)
- show_stat("LSd", 4, drops, link_layer);
- putchar('\n');
+ if ((ifa->ifa_flags & IFF_UP) == 0) {
+ xname = stpcpy(buf, name);
+ *xname++ = '*';
+ *xname = '\0';
+ xname = buf;
+ } else
+ xname = name;
- if (aflag && ifaddrfound) {
- /*
- * Print family's multicast addresses
- */
- struct ifmultiaddr *multiaddr;
- struct ifmultiaddr ifma;
- union {
- struct sockaddr sa;
- struct sockaddr_in in;
+ xo_emit("{d:/%-*.*s}{etk:name}{eq:flags/0x%x}",
+ ifn_len_max, ifn_len_max, xname, name, ifa->ifa_flags);
+
+#define IFA_MTU(ifa) (((struct if_data *)(ifa)->ifa_data)->ifi_mtu)
+ show_stat("lu", 6, "mtu", IFA_MTU(ifa), IFA_MTU(ifa), 0);
+#undef IFA_MTU
+
+ switch (ifa->ifa_addr->sa_family) {
+ case AF_UNSPEC:
+ xo_emit("{:network/%-*.*s} ", net_len, net_len,
+ "none");
+ xo_emit("{:address/%-*.*s} ", addr_len, addr_len,
+ "none");
+ break;
+ case AF_INET:
#ifdef INET6
- struct sockaddr_in6 in6;
+ case AF_INET6:
#endif /* INET6 */
- struct sockaddr_dl dl;
- } msa;
- const char *fmt;
+ nn = netname(ifa->ifa_addr, ifa->ifa_netmask);
+ rn = routename(ifa->ifa_addr, numeric_addr);
+ if (Wflag) {
+ xo_emit("{t:network/%-*s} ", net_len, nn);
+ xo_emit("{t:address/%-*s} ", addr_len, rn);
+ } else {
+ xo_emit("{d:network/%-*.*s}{et:network} ",
+ net_len, net_len, nn, nn);
+ xo_emit("{d:address/%-*.*s}{et:address} ",
+ addr_len, addr_len, rn, rn);
+ }
- TAILQ_FOREACH(multiaddr, &ifnet.if_multiaddrs, ifma_link) {
- if (kread((u_long)multiaddr, (char *)&ifma,
- sizeof ifma) != 0)
- break;
- multiaddr = &ifma;
- if (kread((u_long)ifma.ifma_addr, (char *)&msa,
- sizeof msa) != 0)
- break;
- if (msa.sa.sa_family != sa->sa_family)
- continue;
+ network = true;
+ break;
+ case AF_LINK:
+ {
+ struct sockaddr_dl *sdl;
+ char linknum[10];
+
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sprintf(linknum, "<Link#%d>", sdl->sdl_index);
+ xo_emit("{t:network/%-*.*s} ", net_len, net_len,
+ linknum);
+ if (sdl->sdl_nlen == 0 &&
+ sdl->sdl_alen == 0 &&
+ sdl->sdl_slen == 0)
+ xo_emit("{P:/%*s} ", addr_len, "");
+ else
+ xo_emit("{t:address/%-*.*s} ", addr_len,
+ addr_len, routename(ifa->ifa_addr, 1));
+ link = true;
+ break;
+ }
+ }
+
+#define IFA_STAT(s) (((struct if_data *)ifa->ifa_data)->ifi_ ## s)
+ show_stat("lu", 8, "received-packets", IFA_STAT(ipackets),
+ link|network, 1);
+ show_stat("lu", 5, "received-errors", IFA_STAT(ierrors),
+ link, 1);
+ show_stat("lu", 5, "dropped-packets", IFA_STAT(iqdrops),
+ link, 1);
+ if (bflag)
+ show_stat("lu", 10, "received-bytes", IFA_STAT(ibytes),
+ link|network, 0);
+ show_stat("lu", 8, "sent-packets", IFA_STAT(opackets),
+ link|network, 1);
+ show_stat("lu", 5, "send-errors", IFA_STAT(oerrors), link, 1);
+ if (bflag)
+ show_stat("lu", 10, "sent-bytes", IFA_STAT(obytes),
+ link|network, 0);
+ show_stat("NRSlu", 5, "collisions", IFA_STAT(collisions),
+ link, 1);
+ if (dflag)
+ show_stat("LSlu", 5, "dropped-packets",
+ IFA_STAT(oqdrops), link, 1);
+ xo_emit("\n");
- fmt = 0;
- switch (msa.sa.sa_family) {
- case AF_INET:
- fmt = routename(msa.in.sin_addr.s_addr);
+ if (!aflag) {
+ xo_close_instance("interface");
+ continue;
+ }
+
+ /*
+ * Print family's multicast addresses.
+ */
+ xo_open_list("multicast-address");
+ for (ifma = next_ifma(ifmap, ifa->ifa_name,
+ ifa->ifa_addr->sa_family);
+ ifma != NULL;
+ ifma = next_ifma(ifma, ifa->ifa_name,
+ ifa->ifa_addr->sa_family)) {
+ const char *fmt = NULL;
+
+ xo_open_instance("multicast-address");
+ switch (ifma->ifma_addr->sa_family) {
+ case AF_LINK:
+ {
+ struct sockaddr_dl *sdl;
+
+ sdl = (struct sockaddr_dl *)ifma->ifma_addr;
+ if (sdl->sdl_type != IFT_ETHER &&
+ sdl->sdl_type != IFT_FDDI)
break;
+ }
+ /* FALLTHROUGH */
+ case AF_INET:
#ifdef INET6
- case AF_INET6:
- in6_fillscopeid(&msa.in6);
- getnameinfo(&msa.sa, msa.sa.sa_len,
- addr_buf, sizeof(addr_buf), 0, 0,
- NI_NUMERICHOST);
- printf("%*s %-19.19s(refs: %d)\n",
- Wflag ? 27 : 25, "",
- addr_buf, ifma.ifma_refcount);
- break;
+ case AF_INET6:
#endif /* INET6 */
- case AF_LINK:
- switch (msa.dl.sdl_type) {
- case IFT_ETHER:
- case IFT_FDDI:
- fmt = ether_ntoa(
- (struct ether_addr *)
- LLADDR(&msa.dl));
- break;
- }
- break;
- }
- if (fmt) {
- printf("%*s %-17.17s",
- Wflag ? 27 : 25, "", fmt);
- if (msa.sa.sa_family == AF_LINK) {
- printf(" %8lu", imcasts);
- printf("%*s",
- bflag ? 17 : 6, "");
- printf(" %8lu", omcasts);
- }
- putchar('\n');
- }
+ fmt = routename(ifma->ifma_addr, numeric_addr);
+ break;
+ }
+ if (fmt) {
+ if (Wflag)
+ xo_emit("{P:/%27s }"
+ "{t:address/%-17s/}", "", fmt);
+ else
+ xo_emit("{P:/%25s }"
+ "{t:address/%-17.17s/}", "", fmt);
+ if (ifma->ifma_addr->sa_family == AF_LINK) {
+ xo_emit(" {:received-packets/%8lu}",
+ IFA_STAT(imcasts));
+ xo_emit("{P:/%*s}", bflag? 17 : 6, "");
+ xo_emit(" {:sent-packets/%8lu}",
+ IFA_STAT(omcasts));
+ }
+ xo_emit("\n");
}
+ xo_close_instance("multicast-address");
+ ifma = ifma->ifma_next;
}
+ xo_close_list("multicast-address");
+ xo_close_instance("interface");
}
+ xo_close_list("interface");
+
+ freeifaddrs(ifap);
+ if (aflag)
+ freeifmaddrs(ifmap);
}
-struct iftot {
- SLIST_ENTRY(iftot) chain;
- char ift_name[IFNAMSIZ]; /* interface name */
+struct iftot {
u_long ift_ip; /* input packets */
u_long ift_ie; /* input errors */
u_long ift_id; /* input drops */
u_long ift_op; /* output packets */
u_long ift_oe; /* output errors */
+ u_long ift_od; /* output drops */
u_long ift_co; /* collisions */
- u_int ift_dr; /* drops */
u_long ift_ib; /* input bytes */
u_long ift_ob; /* output bytes */
};
-u_char signalled; /* set if alarm goes off "early" */
-
/*
- * Print a running summary of interface statistics.
- * Repeat display every interval1 seconds, showing statistics
- * collected over that interval. Assumes that interval1 is non-zero.
- * First line printed at top of screen is always cumulative.
- * XXX - should be rewritten to use ifmib(4).
+ * Obtain stats for interface(s).
*/
static void
-sidewaysintpr(int interval1, u_long off)
+fill_iftot(struct iftot *st)
{
- struct ifnet ifnet;
- u_long firstifnet;
- struct ifnethead ifnethead;
- struct itimerval interval_it;
- struct iftot *iftot, *ip, *ipn, *total, *sum, *interesting;
- int line;
- int oldmask, first;
- u_long interesting_off;
+ struct ifaddrs *ifap, *ifa;
+ bool found = false;
- if (kread(off, (char *)&ifnethead, sizeof ifnethead) != 0)
- return;
- firstifnet = (u_long)TAILQ_FIRST(&ifnethead);
-
- if ((iftot = malloc(sizeof(struct iftot))) == NULL) {
- printf("malloc failed\n");
- exit(1);
- }
- memset(iftot, 0, sizeof(struct iftot));
+ if (getifaddrs(&ifap) != 0)
+ xo_err(EX_OSERR, "getifaddrs");
- interesting = NULL;
- interesting_off = 0;
- for (off = firstifnet, ip = iftot; off;) {
- char name[IFNAMSIZ];
+ bzero(st, sizeof(*st));
- if (kread(off, (char *)&ifnet, sizeof ifnet) != 0)
- break;
- strlcpy(name, ifnet.if_xname, sizeof(name));
- if (interface && strcmp(name, interface) == 0) {
- interesting = ip;
- interesting_off = off;
- }
- snprintf(ip->ift_name, sizeof(ip->ift_name), "(%s)", name);
- if ((ipn = malloc(sizeof(struct iftot))) == NULL) {
- printf("malloc failed\n");
- exit(1);
+ for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr->sa_family != AF_LINK)
+ continue;
+ if (interface) {
+ if (strcmp(ifa->ifa_name, interface) == 0)
+ found = true;
+ else
+ continue;
}
- memset(ipn, 0, sizeof(struct iftot));
- SLIST_NEXT(ip, chain) = ipn;
- ip = ipn;
- off = (u_long)TAILQ_NEXT(&ifnet, if_link);
- }
- if (interface && interesting == NULL)
- errx(1, "%s: unknown interface", interface);
- if ((total = malloc(sizeof(struct iftot))) == NULL) {
- printf("malloc failed\n");
- exit(1);
- }
- memset(total, 0, sizeof(struct iftot));
- if ((sum = malloc(sizeof(struct iftot))) == NULL) {
- printf("malloc failed\n");
- exit(1);
+
+ st->ift_ip += IFA_STAT(ipackets);
+ st->ift_ie += IFA_STAT(ierrors);
+ st->ift_id += IFA_STAT(iqdrops);
+ st->ift_ib += IFA_STAT(ibytes);
+ st->ift_op += IFA_STAT(opackets);
+ st->ift_oe += IFA_STAT(oerrors);
+ st->ift_od += IFA_STAT(oqdrops);
+ st->ift_ob += IFA_STAT(obytes);
+ st->ift_co += IFA_STAT(collisions);
}
- memset(sum, 0, sizeof(struct iftot));
+
+ if (interface && found == false)
+ xo_err(EX_DATAERR, "interface %s not found", interface);
+
+ freeifaddrs(ifap);
+}
+
+/*
+ * Set a flag to indicate that a signal from the periodic itimer has been
+ * caught.
+ */
+static sig_atomic_t signalled;
+static void
+catchalarm(int signo __unused)
+{
+ signalled = true;
+}
+
+/*
+ * Print a running summary of interface statistics.
+ * Repeat display every interval seconds, showing statistics
+ * collected over that interval. Assumes that interval is non-zero.
+ * First line printed at top of screen is always cumulative.
+ */
+static void
+sidewaysintpr(void)
+{
+ struct iftot ift[2], *new, *old;
+ struct itimerval interval_it;
+ int oldmask, line;
+
+ new = &ift[0];
+ old = &ift[1];
+ fill_iftot(old);
(void)signal(SIGALRM, catchalarm);
- signalled = NO;
- interval_it.it_interval.tv_sec = interval1;
+ signalled = false;
+ interval_it.it_interval.tv_sec = interval;
interval_it.it_interval.tv_usec = 0;
interval_it.it_value = interval_it.it_interval;
setitimer(ITIMER_REAL, &interval_it, NULL);
- first = 1;
+ xo_open_list("interface-statistics");
+
banner:
- printf("%17s %14s %16s", "input",
- interesting ? interesting->ift_name : "(Total)", "output");
- putchar('\n');
- printf("%10s %5s %5s %10s %10s %5s %10s %5s",
+ xo_emit("{T:/%17s} {T:/%14s} {T:/%16s}\n", "input",
+ interface != NULL ? interface : "(Total)", "output");
+ xo_emit("{T:/%10s} {T:/%5s} {T:/%5s} {T:/%10s} {T:/%10s} {T:/%5s} "
+ "{T:/%10s} {T:/%5s}",
"packets", "errs", "idrops", "bytes", "packets", "errs", "bytes",
"colls");
if (dflag)
- printf(" %5.5s", "drops");
- putchar('\n');
- fflush(stdout);
+ xo_emit(" {T:/%5.5s}", "drops");
+ xo_emit("\n");
+ xo_flush();
line = 0;
+
loop:
- if (interesting != NULL) {
- ip = interesting;
- if (kread(interesting_off, (char *)&ifnet, sizeof ifnet) != 0) {
- printf("???\n");
- exit(1);
- };
- if (!first) {
- show_stat("lu", 10, ifnet.if_ipackets - ip->ift_ip, 1);
- show_stat("lu", 5, ifnet.if_ierrors - ip->ift_ie, 1);
- show_stat("lu", 5, ifnet.if_iqdrops - ip->ift_id, 1);
- show_stat("lu", 10, ifnet.if_ibytes - ip->ift_ib, 1);
- show_stat("lu", 10, ifnet.if_opackets - ip->ift_op, 1);
- show_stat("lu", 5, ifnet.if_oerrors - ip->ift_oe, 1);
- show_stat("lu", 10, ifnet.if_obytes - ip->ift_ob, 1);
- show_stat("NRSlu", 5,
- ifnet.if_collisions - ip->ift_co, 1);
- if (dflag)
- show_stat("LSu", 5,
- ifnet.if_snd.ifq_drops - ip->ift_dr, 1);
- }
- ip->ift_ip = ifnet.if_ipackets;
- ip->ift_ie = ifnet.if_ierrors;
- ip->ift_id = ifnet.if_iqdrops;
- ip->ift_ib = ifnet.if_ibytes;
- ip->ift_op = ifnet.if_opackets;
- ip->ift_oe = ifnet.if_oerrors;
- ip->ift_ob = ifnet.if_obytes;
- ip->ift_co = ifnet.if_collisions;
- ip->ift_dr = ifnet.if_snd.ifq_drops;
- } else {
- sum->ift_ip = 0;
- sum->ift_ie = 0;
- sum->ift_id = 0;
- sum->ift_ib = 0;
- sum->ift_op = 0;
- sum->ift_oe = 0;
- sum->ift_ob = 0;
- sum->ift_co = 0;
- sum->ift_dr = 0;
- for (off = firstifnet, ip = iftot;
- off && SLIST_NEXT(ip, chain) != NULL;
- ip = SLIST_NEXT(ip, chain)) {
- if (kread(off, (char *)&ifnet, sizeof ifnet) != 0) {
- off = 0;
- continue;
- }
- sum->ift_ip += ifnet.if_ipackets;
- sum->ift_ie += ifnet.if_ierrors;
- sum->ift_id += ifnet.if_iqdrops;
- sum->ift_ib += ifnet.if_ibytes;
- sum->ift_op += ifnet.if_opackets;
- sum->ift_oe += ifnet.if_oerrors;
- sum->ift_ob += ifnet.if_obytes;
- sum->ift_co += ifnet.if_collisions;
- sum->ift_dr += ifnet.if_snd.ifq_drops;
- off = (u_long)TAILQ_NEXT(&ifnet, if_link);
- }
- if (!first) {
- show_stat("lu", 10, sum->ift_ip - total->ift_ip, 1);
- show_stat("lu", 5, sum->ift_ie - total->ift_ie, 1);
- show_stat("lu", 5, sum->ift_id - total->ift_id, 1);
- show_stat("lu", 10, sum->ift_ib - total->ift_ib, 1);
- show_stat("lu", 10, sum->ift_op - total->ift_op, 1);
- show_stat("lu", 5, sum->ift_oe - total->ift_oe, 1);
- show_stat("lu", 10, sum->ift_ob - total->ift_ob, 1);
- show_stat("NRSlu", 5, sum->ift_co - total->ift_co, 1);
- if (dflag)
- show_stat("LSu", 5,
- sum->ift_dr - total->ift_dr, 1);
- }
- *total = *sum;
+ if ((noutputs != 0) && (--noutputs == 0)) {
+ xo_close_list("interface-statistics");
+ return;
}
- if (!first)
- putchar('\n');
- fflush(stdout);
- if ((noutputs != 0) && (--noutputs == 0))
- exit(0);
#ifdef __rtems__
{
sigset_t oldmask, desired, empty;
@@ -712,31 +627,56 @@ loop:
sigprocmask(SIG_BLOCK, &desired, &oldmask);
while (!signalled)
sigsuspend(&desired);
- signalled = NO;
+ signalled = false;
sigprocmask(SIG_SETMASK, &oldmask, NULL);
}
#else /* __rtems__ */
oldmask = sigblock(sigmask(SIGALRM));
while (!signalled)
sigpause(0);
- signalled = NO;
+ signalled = false;
sigsetmask(oldmask);
#endif /* __rtems__ */
line++;
- first = 0;
+
+ fill_iftot(new);
+
+ xo_open_instance("stats");
+ show_stat("lu", 10, "received-packets",
+ new->ift_ip - old->ift_ip, 1, 1);
+ show_stat("lu", 5, "received-errors",
+ new->ift_ie - old->ift_ie, 1, 1);
+ show_stat("lu", 5, "dropped-packets",
+ new->ift_id - old->ift_id, 1, 1);
+ show_stat("lu", 10, "received-bytes",
+ new->ift_ib - old->ift_ib, 1, 0);
+ show_stat("lu", 10, "sent-packets",
+ new->ift_op - old->ift_op, 1, 1);
+ show_stat("lu", 5, "send-errors",
+ new->ift_oe - old->ift_oe, 1, 1);
+ show_stat("lu", 10, "sent-bytes",
+ new->ift_ob - old->ift_ob, 1, 0);
+ show_stat("NRSlu", 5, "collisions",
+ new->ift_co - old->ift_co, 1, 1);
+ if (dflag)
+ show_stat("LSlu", 5, "dropped-packets",
+ new->ift_od - old->ift_od, 1, 1);
+ xo_close_instance("stats");
+ xo_emit("\n");
+ xo_flush();
+
+ if (new == &ift[0]) {
+ new = &ift[1];
+ old = &ift[0];
+ } else {
+ new = &ift[0];
+ old = &ift[1];
+ }
+
if (line == 21)
goto banner;
else
goto loop;
- /*NOTREACHED*/
-}
-/*
- * Set a flag to indicate that a signal from the periodic itimer has been
- * caught.
- */
-static void
-catchalarm(int signo __unused)
-{
- signalled = YES;
+ /* NOTREACHED */
}
diff --git a/freebsd/usr.bin/netstat/inet.c b/freebsd/usr.bin/netstat/inet.c
index e67ef0a6..c5a5042d 100644
--- a/freebsd/usr.bin/netstat/inet.c
+++ b/freebsd/usr.bin/netstat/inet.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 1983, 1988, 1993, 1995
* The Regents of the University of California. All rights reserved.
@@ -35,6 +39,9 @@ static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -68,7 +75,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
-#include <netinet/tcp_debug.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
@@ -80,29 +86,25 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
#include <unistd.h>
+#include <libxo/xo.h>
#include "netstat.h"
+#include "nl_defs.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-inet-data.h"
+#endif /* __rtems__ */
char *inetname(struct in_addr *);
-void inetprint(struct in_addr *, int, const char *, int);
+void inetprint(const char *, struct in_addr *, int, const char *, int,
+ const int);
#ifdef INET6
static int udp_done, tcp_done, sdp_done;
#endif /* INET6 */
-#ifdef __rtems__
-void
-rtems_bsd_netstat_inet_init(void)
-{
-#ifdef INET6
- udp_done = 0;
- tcp_done = 0;
- sdp_done = 0;
-#endif /* INET6 */
-}
-#endif /* __rtems__ */
static int
-pcblist_sysctl(int proto, const char *name, char **bufp, int istcp)
+pcblist_sysctl(int proto, const char *name, char **bufp, int istcp __unused)
{
const char *mibvar;
char *buf;
@@ -127,15 +129,15 @@ pcblist_sysctl(int proto, const char *name, char **bufp, int istcp)
len = 0;
if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) {
if (errno != ENOENT)
- warn("sysctl: %s", mibvar);
+ xo_warn("sysctl: %s", mibvar);
return (0);
}
- if ((buf = malloc(len)) == 0) {
- warnx("malloc %lu bytes", (u_long)len);
+ if ((buf = malloc(len)) == NULL) {
+ xo_warnx("malloc %lu bytes", (u_long)len);
return (0);
}
if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) {
- warn("sysctl: %s", mibvar);
+ xo_warn("sysctl: %s", mibvar);
free(buf);
return (0);
}
@@ -150,7 +152,7 @@ pcblist_sysctl(int proto, const char *name, char **bufp, int istcp)
static void
sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
{
- xsb->sb_cc = sb->sb_cc;
+ xsb->sb_cc = sb->sb_ccc;
xsb->sb_hiwat = sb->sb_hiwat;
xsb->sb_mbcnt = sb->sb_mbcnt;
xsb->sb_mcnt = sb->sb_mcnt;
@@ -217,15 +219,15 @@ pcblist_kvm(u_long off, char **bufp, int istcp)
len = 2 * sizeof(xig) +
(pcbinfo.ipi_count + pcbinfo.ipi_count / 8) *
sizeof(struct xinpcb);
- if ((buf = malloc(len)) == 0) {
- warnx("malloc %lu bytes", (u_long)len);
+ if ((buf = malloc(len)) == NULL) {
+ xo_warnx("malloc %lu bytes", (u_long)len);
return (0);
}
p = buf;
#define COPYOUT(obj, size) do { \
if (len < (size)) { \
- warnx("buffer size exceeded"); \
+ xo_warnx("buffer size exceeded"); \
goto fail; \
} \
bcopy((obj), p, (size)); \
@@ -306,6 +308,9 @@ fail:
#undef KREAD
}
+#ifdef __rtems__
+static int protopr_first = 1;
+#endif /* __rtems__ */
/*
* Print a summary of connections related to an Internet
* protocol. For TCP, also give state of connection.
@@ -316,6 +321,9 @@ void
protopr(u_long off, const char *name, int af1, int proto)
{
int istcp;
+#ifndef __rtems__
+ static int first = 1;
+#endif /* __rtems__ */
char *buf;
const char *vchar;
struct tcpcb *tp = NULL;
@@ -361,8 +369,8 @@ protopr(u_long off, const char *name, int af1, int proto)
oxig = xig = (struct xinpgen *)buf;
for (xig = (struct xinpgen *)((char *)xig + xig->xig_len);
- xig->xig_len > sizeof(struct xinpgen);
- xig = (struct xinpgen *)((char *)xig + xig->xig_len)) {
+ xig->xig_len > sizeof(struct xinpgen);
+ xig = (struct xinpgen *)((char *)xig + xig->xig_len)) {
if (istcp) {
timer = &((struct xtcpcb *)xig)->xt_timer;
tp = &((struct xtcpcb *)xig)->xt_tp;
@@ -413,179 +421,237 @@ protopr(u_long off, const char *name, int af1, int proto)
))
continue;
- if (!protopr_initialized) {
+#ifndef __rtems__
+ if (first) {
+#else /* __rtems__ */
+ if (protopr_first) {
+#endif /* __rtems__ */
if (!Lflag) {
- printf("Active Internet connections");
+ xo_emit("Active Internet connections");
if (aflag)
- printf(" (including servers)");
+ xo_emit(" (including servers)");
} else
- printf(
+ xo_emit(
"Current listen queue sizes (qlen/incqlen/maxqlen)");
- putchar('\n');
+ xo_emit("\n");
if (Aflag)
- printf("%-*s ", 2 * (int)sizeof(void *), "Tcpcb");
+ xo_emit("{T:/%-*s} ", 2 * (int)sizeof(void *),
+ "Tcpcb");
if (Lflag)
- printf((Aflag && !Wflag) ?
- "%-5.5s %-14.14s %-18.18s" :
- "%-5.5s %-14.14s %-22.22s",
+ xo_emit((Aflag && !Wflag) ?
+ "{T:/%-5.5s} {T:/%-32.32s} {T:/%-18.18s}" :
+ ((!Wflag || af1 == AF_INET) ?
+ "{T:/%-5.5s} {T:/%-32.32s} {T:/%-22.22s}" :
+ "{T:/%-5.5s} {T:/%-32.32s} {T:/%-45.45s}"),
"Proto", "Listen", "Local Address");
else if (Tflag)
- printf((Aflag && !Wflag) ?
- "%-5.5s %-6.6s %-6.6s %-6.6s %-18.18s %s" :
- "%-5.5s %-6.6s %-6.6s %-6.6s %-22.22s %s",
+ xo_emit((Aflag && !Wflag) ?
+ "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%s}" :
+ ((!Wflag || af1 == AF_INET) ?
+ "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%s}" :
+ "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%s}"),
"Proto", "Rexmit", "OOORcv", "0-win",
"Local Address", "Foreign Address");
else {
- printf((Aflag && !Wflag) ?
- "%-5.5s %-6.6s %-6.6s %-18.18s %-18.18s" :
- "%-5.5s %-6.6s %-6.6s %-22.22s %-22.22s",
- "Proto", "Recv-Q", "Send-Q",
- "Local Address", "Foreign Address");
- if (!xflag)
- printf(" (state)");
+ xo_emit((Aflag && !Wflag) ?
+ "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%-18.18s}" :
+ ((!Wflag || af1 == AF_INET) ?
+ "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%-22.22s}" :
+ "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%-45.45s}"),
+ "Proto", "Recv-Q", "Send-Q",
+ "Local Address", "Foreign Address");
+ if (!xflag && !Rflag)
+ xo_emit(" (state)");
}
if (xflag) {
- printf(" %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s",
- "R-MBUF", "S-MBUF", "R-CLUS",
- "S-CLUS", "R-HIWA", "S-HIWA",
- "R-LOWA", "S-LOWA", "R-BCNT",
- "S-BCNT", "R-BMAX", "S-BMAX");
- printf(" %7.7s %7.7s %7.7s %7.7s %7.7s %7.7s",
- "rexmt", "persist", "keep",
- "2msl", "delack", "rcvtime");
+ xo_emit(" {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} "
+ "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} "
+ "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} "
+ "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s}",
+ "R-MBUF", "S-MBUF", "R-CLUS", "S-CLUS",
+ "R-HIWA", "S-HIWA", "R-LOWA", "S-LOWA",
+ "R-BCNT", "S-BCNT", "R-BMAX", "S-BMAX");
+ xo_emit(" {T:/%7.7s} {T:/%7.7s} {T:/%7.7s} "
+ "{T:/%7.7s} {T:/%7.7s} {T:/%7.7s}",
+ "rexmt", "persist", "keep", "2msl",
+ "delack", "rcvtime");
+ } else if (Rflag) {
+ xo_emit(" {T:/%8.8s} {T:/%5.5s}",
+ "flowid", "ftype");
}
- putchar('\n');
- protopr_initialized = 1;
+ xo_emit("\n");
+#ifndef __rtems__
+ first = 0;
+#else /* __rtems__ */
+ protopr_first = 0;
+#endif /* __rtems__ */
}
if (Lflag && so->so_qlimit == 0)
continue;
+ xo_open_instance("socket");
if (Aflag) {
if (istcp)
- printf("%*lx ", 2 * (int)sizeof(void *), (u_long)inp->inp_ppcb);
+ xo_emit("{q:address/%*lx} ",
+ 2 * (int)sizeof(void *),
+ (u_long)inp->inp_ppcb);
else
- printf("%*lx ", 2 * (int)sizeof(void *), (u_long)so->so_pcb);
+ xo_emit("{q:adddress/%*lx} ",
+ 2 * (int)sizeof(void *),
+ (u_long)so->so_pcb);
}
#ifdef INET6
if ((inp->inp_vflag & INP_IPV6) != 0)
vchar = ((inp->inp_vflag & INP_IPV4) != 0) ?
- "46" : "6 ";
+ "46" : "6";
else
#endif
vchar = ((inp->inp_vflag & INP_IPV4) != 0) ?
- "4 " : " ";
+ "4" : "";
if (istcp && (tp->t_flags & TF_TOE) != 0)
- printf("%-3.3s%-2.2s ", "toe", vchar);
+ xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", "toe", vchar);
else
- printf("%-3.3s%-2.2s ", name, vchar);
+ xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", name, vchar);
if (Lflag) {
- char buf1[15];
+ char buf1[33];
- snprintf(buf1, 15, "%d/%d/%d", so->so_qlen,
+ snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen,
so->so_incqlen, so->so_qlimit);
- printf("%-14.14s ", buf1);
+ xo_emit("{:listen-queue-sizes/%-32.32s} ", buf1);
} else if (Tflag) {
if (istcp)
- printf("%6u %6u %6u ", tp->t_sndrexmitpack,
- tp->t_rcvoopack, tp->t_sndzerowin);
+ xo_emit("{:sent-retransmit-packets/%6u} "
+ "{:received-out-of-order-packets/%6u} "
+ "{:sent-zero-window/%6u} ",
+ tp->t_sndrexmitpack, tp->t_rcvoopack,
+ tp->t_sndzerowin);
+ else
+ xo_emit("{P:/%21s}", "");
} else {
- printf("%6u %6u ", so->so_rcv.sb_cc, so->so_snd.sb_cc);
+ xo_emit("{:receive-bytes-waiting/%6u} "
+ "{:send-bytes-waiting/%6u} ",
+ so->so_rcv.sb_cc, so->so_snd.sb_cc);
}
if (numeric_port) {
if (inp->inp_vflag & INP_IPV4) {
- inetprint(&inp->inp_laddr, (int)inp->inp_lport,
- name, 1);
+ inetprint("local", &inp->inp_laddr,
+ (int)inp->inp_lport, name, 1, af1);
if (!Lflag)
- inetprint(&inp->inp_faddr,
- (int)inp->inp_fport, name, 1);
+ inetprint("remote", &inp->inp_faddr,
+ (int)inp->inp_fport, name, 1, af1);
}
#ifdef INET6
else if (inp->inp_vflag & INP_IPV6) {
- inet6print(&inp->in6p_laddr,
+ inet6print("local", &inp->in6p_laddr,
(int)inp->inp_lport, name, 1);
if (!Lflag)
- inet6print(&inp->in6p_faddr,
+ inet6print("remote", &inp->in6p_faddr,
(int)inp->inp_fport, name, 1);
} /* else nothing printed now */
#endif /* INET6 */
} else if (inp->inp_flags & INP_ANONPORT) {
if (inp->inp_vflag & INP_IPV4) {
- inetprint(&inp->inp_laddr, (int)inp->inp_lport,
- name, 1);
+ inetprint("local", &inp->inp_laddr,
+ (int)inp->inp_lport, name, 1, af1);
if (!Lflag)
- inetprint(&inp->inp_faddr,
- (int)inp->inp_fport, name, 0);
+ inetprint("remote", &inp->inp_faddr,
+ (int)inp->inp_fport, name, 0, af1);
}
#ifdef INET6
else if (inp->inp_vflag & INP_IPV6) {
- inet6print(&inp->in6p_laddr,
+ inet6print("local", &inp->in6p_laddr,
(int)inp->inp_lport, name, 1);
if (!Lflag)
- inet6print(&inp->in6p_faddr,
+ inet6print("remote", &inp->in6p_faddr,
(int)inp->inp_fport, name, 0);
} /* else nothing printed now */
#endif /* INET6 */
} else {
if (inp->inp_vflag & INP_IPV4) {
- inetprint(&inp->inp_laddr, (int)inp->inp_lport,
- name, 0);
+ inetprint("local", &inp->inp_laddr,
+ (int)inp->inp_lport, name, 0, af1);
if (!Lflag)
- inetprint(&inp->inp_faddr,
+ inetprint("remote", &inp->inp_faddr,
(int)inp->inp_fport, name,
- inp->inp_lport != inp->inp_fport);
+ inp->inp_lport != inp->inp_fport,
+ af1);
}
#ifdef INET6
else if (inp->inp_vflag & INP_IPV6) {
- inet6print(&inp->in6p_laddr,
+ inet6print("local", &inp->in6p_laddr,
(int)inp->inp_lport, name, 0);
if (!Lflag)
- inet6print(&inp->in6p_faddr,
+ inet6print("remote", &inp->in6p_faddr,
(int)inp->inp_fport, name,
inp->inp_lport != inp->inp_fport);
} /* else nothing printed now */
#endif /* INET6 */
}
if (xflag) {
- printf("%6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u",
- so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt,
- so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt,
- so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat,
- so->so_rcv.sb_lowat, so->so_snd.sb_lowat,
- so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt,
- so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax);
+ xo_emit("{:receive-mbufs/%6u} {:send-mbufs/%6u} "
+ "{:receive-clusters/%6u} {:send-clusters/%6u} "
+ "{:receive-high-water/%6u} {:send-high-water/%6u} "
+ "{:receive-low-water/%6u} {:send-low-water/%6u} "
+ "{:receive-mbuf-bytes/%6u} {:send-mbuf-bytes/%6u} "
+ "{:receive-mbuf-bytes-max/%6u} "
+ "{:send-mbuf-bytes-max/%6u}",
+ so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt,
+ so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt,
+ so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat,
+ so->so_rcv.sb_lowat, so->so_snd.sb_lowat,
+ so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt,
+ so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax);
if (timer != NULL)
- printf(" %4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d",
- timer->tt_rexmt / 1000, (timer->tt_rexmt % 1000) / 10,
- timer->tt_persist / 1000, (timer->tt_persist % 1000) / 10,
- timer->tt_keep / 1000, (timer->tt_keep % 1000) / 10,
- timer->tt_2msl / 1000, (timer->tt_2msl % 1000) / 10,
- timer->tt_delack / 1000, (timer->tt_delack % 1000) / 10,
- timer->t_rcvtime / 1000, (timer->t_rcvtime % 1000) / 10);
+ xo_emit(" {:retransmit-timer/%4d.%02d} "
+ "{:persist-timer/%4d.%02d} "
+ "{:keepalive-timer/%4d.%02d} "
+ "{:msl2-timer/%4d.%02d} "
+ "{:delay-ack-timer/%4d.%02d} "
+ "{:inactivity-timer/%4d.%02d}",
+ timer->tt_rexmt / 1000,
+ (timer->tt_rexmt % 1000) / 10,
+ timer->tt_persist / 1000,
+ (timer->tt_persist % 1000) / 10,
+ timer->tt_keep / 1000,
+ (timer->tt_keep % 1000) / 10,
+ timer->tt_2msl / 1000,
+ (timer->tt_2msl % 1000) / 10,
+ timer->tt_delack / 1000,
+ (timer->tt_delack % 1000) / 10,
+ timer->t_rcvtime / 1000,
+ (timer->t_rcvtime % 1000) / 10);
}
- if (istcp && !Lflag && !xflag && !Tflag) {
+ if (istcp && !Lflag && !xflag && !Tflag && !Rflag) {
if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES)
- printf("%d", tp->t_state);
+ xo_emit("{:tcp-state/%d}", tp->t_state);
else {
- printf("%s", tcpstates[tp->t_state]);
+ xo_emit("{:tcp-state/%s}",
+ tcpstates[tp->t_state]);
#if defined(TF_NEEDSYN) && defined(TF_NEEDFIN)
/* Show T/TCP `hidden state' */
if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN))
- putchar('*');
+ xo_emit("{:need-syn-or-fin/*}");
#endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */
}
- }
- putchar('\n');
+ }
+ if (Rflag) {
+ /* XXX: is this right Alfred */
+ xo_emit(" {:flow-id/%08x} {:flow-type/%5d}",
+ inp->inp_flowid,
+ inp->inp_flowtype);
+ }
+ xo_emit("\n");
+ xo_close_instance("socket");
}
if (xig != oxig && xig->xig_gen != oxig->xig_gen) {
if (oxig->xig_count > xig->xig_count) {
- printf("Some %s sockets may have been deleted.\n",
- name);
+ xo_emit("Some {d:lost/%s} sockets may have been "
+ "deleted.\n", name);
} else if (oxig->xig_count < xig->xig_count) {
- printf("Some %s sockets may have been created.\n",
- name);
+ xo_emit("Some {d:created/%s} sockets may have been "
+ "created.\n", name);
} else {
- printf(
- "Some %s sockets may have been created or deleted.\n",
- name);
+ xo_emit("Some {d:changed/%s} sockets may have been "
+ "created or deleted.\n", name);
}
}
free(buf);
@@ -597,8 +663,8 @@ protopr(u_long off, const char *name, int af1, int proto)
void
tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct tcpstat tcpstat, zerostat;
- size_t len = sizeof tcpstat;
+ struct tcpstat tcpstat;
+ uint64_t tcps_states[TCP_NSTATES];
#ifdef INET6
if (tcp_done != 0)
@@ -607,133 +673,240 @@ tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
tcp_done = 1;
#endif
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.tcp.stats", &tcpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- warn("sysctl: net.inet.tcp.stats");
- return;
- }
- } else
- kread(off, &tcpstat, len);
-
- printf ("%s:\n", name);
-
-#define p(f, m) if (tcpstat.f || sflag <= 1) \
- printf(m, tcpstat.f, plural(tcpstat.f))
-#define p1a(f, m) if (tcpstat.f || sflag <= 1) \
- printf(m, tcpstat.f)
-#define p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \
- printf(m, tcpstat.f1, plural(tcpstat.f1), tcpstat.f2, plural(tcpstat.f2))
-#define p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \
- printf(m, tcpstat.f1, plural(tcpstat.f1), tcpstat.f2)
-#define p3(f, m) if (tcpstat.f || sflag <= 1) \
- printf(m, tcpstat.f, pluralies(tcpstat.f))
-
- p(tcps_sndtotal, "\t%lu packet%s sent\n");
- p2(tcps_sndpack,tcps_sndbyte, "\t\t%lu data packet%s (%lu byte%s)\n");
- p2(tcps_sndrexmitpack, tcps_sndrexmitbyte,
- "\t\t%lu data packet%s (%lu byte%s) retransmitted\n");
- p(tcps_sndrexmitbad,
- "\t\t%lu data packet%s unnecessarily retransmitted\n");
- p(tcps_mturesent, "\t\t%lu resend%s initiated by MTU discovery\n");
- p2a(tcps_sndacks, tcps_delack,
- "\t\t%lu ack-only packet%s (%lu delayed)\n");
- p(tcps_sndurg, "\t\t%lu URG only packet%s\n");
- p(tcps_sndprobe, "\t\t%lu window probe packet%s\n");
- p(tcps_sndwinup, "\t\t%lu window update packet%s\n");
- p(tcps_sndctrl, "\t\t%lu control packet%s\n");
- p(tcps_rcvtotal, "\t%lu packet%s received\n");
- p2(tcps_rcvackpack, tcps_rcvackbyte,
- "\t\t%lu ack%s (for %lu byte%s)\n");
- p(tcps_rcvdupack, "\t\t%lu duplicate ack%s\n");
- p(tcps_rcvacktoomuch, "\t\t%lu ack%s for unsent data\n");
- p2(tcps_rcvpack, tcps_rcvbyte,
- "\t\t%lu packet%s (%lu byte%s) received in-sequence\n");
- p2(tcps_rcvduppack, tcps_rcvdupbyte,
- "\t\t%lu completely duplicate packet%s (%lu byte%s)\n");
- p(tcps_pawsdrop, "\t\t%lu old duplicate packet%s\n");
- p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte,
- "\t\t%lu packet%s with some dup. data (%lu byte%s duped)\n");
- p2(tcps_rcvoopack, tcps_rcvoobyte,
- "\t\t%lu out-of-order packet%s (%lu byte%s)\n");
- p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin,
- "\t\t%lu packet%s (%lu byte%s) of data after window\n");
- p(tcps_rcvwinprobe, "\t\t%lu window probe%s\n");
- p(tcps_rcvwinupd, "\t\t%lu window update packet%s\n");
- p(tcps_rcvafterclose, "\t\t%lu packet%s received after close\n");
- p(tcps_rcvbadsum, "\t\t%lu discarded for bad checksum%s\n");
- p(tcps_rcvbadoff, "\t\t%lu discarded for bad header offset field%s\n");
- p1a(tcps_rcvshort, "\t\t%lu discarded because packet too short\n");
- p1a(tcps_rcvmemdrop, "\t\t%lu discarded due to memory problems\n");
- p(tcps_connattempt, "\t%lu connection request%s\n");
- p(tcps_accepts, "\t%lu connection accept%s\n");
- p(tcps_badsyn, "\t%lu bad connection attempt%s\n");
- p(tcps_listendrop, "\t%lu listen queue overflow%s\n");
- p(tcps_badrst, "\t%lu ignored RSTs in the window%s\n");
- p(tcps_connects, "\t%lu connection%s established (including accepts)\n");
- p2(tcps_closed, tcps_drops,
- "\t%lu connection%s closed (including %lu drop%s)\n");
- p(tcps_cachedrtt, "\t\t%lu connection%s updated cached RTT on close\n");
- p(tcps_cachedrttvar,
- "\t\t%lu connection%s updated cached RTT variance on close\n");
- p(tcps_cachedssthresh,
- "\t\t%lu connection%s updated cached ssthresh on close\n");
- p(tcps_conndrops, "\t%lu embryonic connection%s dropped\n");
- p2(tcps_rttupdated, tcps_segstimed,
- "\t%lu segment%s updated rtt (of %lu attempt%s)\n");
- p(tcps_rexmttimeo, "\t%lu retransmit timeout%s\n");
- p(tcps_timeoutdrop, "\t\t%lu connection%s dropped by rexmit timeout\n");
- p(tcps_persisttimeo, "\t%lu persist timeout%s\n");
- p(tcps_persistdrop, "\t\t%lu connection%s dropped by persist timeout\n");
- p(tcps_finwait2_drops,
- "\t%lu Connection%s (fin_wait_2) dropped because of timeout\n");
- p(tcps_keeptimeo, "\t%lu keepalive timeout%s\n");
- p(tcps_keepprobe, "\t\t%lu keepalive probe%s sent\n");
- p(tcps_keepdrops, "\t\t%lu connection%s dropped by keepalive\n");
- p(tcps_predack, "\t%lu correct ACK header prediction%s\n");
- p(tcps_preddat, "\t%lu correct data packet header prediction%s\n");
-
- p3(tcps_sc_added, "\t%lu syncache entr%s added\n");
- p1a(tcps_sc_retransmitted, "\t\t%lu retransmitted\n");
- p1a(tcps_sc_dupsyn, "\t\t%lu dupsyn\n");
- p1a(tcps_sc_dropped, "\t\t%lu dropped\n");
- p1a(tcps_sc_completed, "\t\t%lu completed\n");
- p1a(tcps_sc_bucketoverflow, "\t\t%lu bucket overflow\n");
- p1a(tcps_sc_cacheoverflow, "\t\t%lu cache overflow\n");
- p1a(tcps_sc_reset, "\t\t%lu reset\n");
- p1a(tcps_sc_stale, "\t\t%lu stale\n");
- p1a(tcps_sc_aborted, "\t\t%lu aborted\n");
- p1a(tcps_sc_badack, "\t\t%lu badack\n");
- p1a(tcps_sc_unreach, "\t\t%lu unreach\n");
- p(tcps_sc_zonefail, "\t\t%lu zone failure%s\n");
- p(tcps_sc_sendcookie, "\t%lu cookie%s sent\n");
- p(tcps_sc_recvcookie, "\t%lu cookie%s received\n");
-
- p(tcps_hc_added, "\t%lu hostcache entrie%s added\n");
- p1a(tcps_hc_bucketoverflow, "\t\t%lu bucket overflow\n");
-
- p(tcps_sack_recovery_episode, "\t%lu SACK recovery episode%s\n");
- p(tcps_sack_rexmits,
- "\t%lu segment rexmit%s in SACK recovery episodes\n");
- p(tcps_sack_rexmit_bytes,
- "\t%lu byte rexmit%s in SACK recovery episodes\n");
- p(tcps_sack_rcv_blocks,
- "\t%lu SACK option%s (SACK blocks) received\n");
- p(tcps_sack_send_blocks, "\t%lu SACK option%s (SACK blocks) sent\n");
- p1a(tcps_sack_sboverflow, "\t%lu SACK scoreboard overflow\n");
-
- p(tcps_ecn_ce, "\t%lu packet%s with ECN CE bit set\n");
- p(tcps_ecn_ect0, "\t%lu packet%s with ECN ECT(0) bit set\n");
- p(tcps_ecn_ect1, "\t%lu packet%s with ECN ECT(1) bit set\n");
- p(tcps_ecn_shs, "\t%lu successful ECN handshake%s\n");
- p(tcps_ecn_rcwnd, "\t%lu time%s ECN reduced the congestion window\n");
-#undef p
-#undef p1a
-#undef p2
-#undef p2a
-#undef p3
+ if (fetch_stats("net.inet.tcp.stats", off, &tcpstat,
+ sizeof(tcpstat), kread_counters) != 0)
+ return;
+
+ if (fetch_stats_ro("net.inet.tcp.states", nl[N_TCPS_STATES].n_value,
+ &tcps_states, sizeof(tcps_states), kread_counters) != 0)
+ return;
+
+ xo_open_container("tcp");
+ xo_emit("{T:/%s}:\n", name);
+
+#define p(f, m) if (tcpstat.f || sflag <= 1) \
+ xo_emit(m, (uintmax_t )tcpstat.f, plural(tcpstat.f))
+#define p1a(f, m) if (tcpstat.f || sflag <= 1) \
+ xo_emit(m, (uintmax_t )tcpstat.f)
+#define p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \
+ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \
+ (uintmax_t )tcpstat.f2, plural(tcpstat.f2))
+#define p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \
+ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \
+ (uintmax_t )tcpstat.f2)
+#define p3(f, m) if (tcpstat.f || sflag <= 1) \
+ xo_emit(m, (uintmax_t )tcpstat.f, pluralies(tcpstat.f))
+
+ p(tcps_sndtotal, "\t{:sent-packets/%ju} {N:/packet%s sent}\n");
+ p2(tcps_sndpack,tcps_sndbyte, "\t\t{:sent-data-packets/%ju} "
+ "{N:/data packet%s} ({:sent-data-bytes/%ju} {N:/byte%s})\n");
+ p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t"
+ "{:sent-retransmitted-packets/%ju} {N:/data packet%s} "
+ "({:sent-retransmitted-bytes/%ju} {N:/byte%s}) "
+ "{N:retransmitted}\n");
+ p(tcps_sndrexmitbad, "\t\t"
+ "{:sent-unnecessary-retransmitted-packets/%ju} "
+ "{N:/data packet%s unnecessarily retransmitted}\n");
+ p(tcps_mturesent, "\t\t{:sent-resends-by-mtu-discovery/%ju} "
+ "{N:/resend%s initiated by MTU discovery}\n");
+ p2a(tcps_sndacks, tcps_delack, "\t\t{:sent-ack-only-packets/%ju} "
+ "{N:/ack-only packet%s/} ({:sent-packets-delayed/%ju} "
+ "{N:delayed})\n");
+ p(tcps_sndurg, "\t\t{:sent-urg-only-packets/%ju} "
+ "{N:/URG only packet%s}\n");
+ p(tcps_sndprobe, "\t\t{:sent-window-probe-packets/%ju} "
+ "{N:/window probe packet%s}\n");
+ p(tcps_sndwinup, "\t\t{:sent-window-update-packets/%ju} "
+ "{N:/window update packet%s}\n");
+ p(tcps_sndctrl, "\t\t{:sent-control-packets/%ju} "
+ "{N:/control packet%s}\n");
+ p(tcps_rcvtotal, "\t{:received-packets/%ju} "
+ "{N:/packet%s received}\n");
+ p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t"
+ "{:received-ack-packets/%ju} {N:/ack%s} "
+ "{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n");
+ p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} "
+ "{N:/duplicate ack%s}\n");
+ p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} "
+ "{N:/ack%s for unsent data}\n");
+ p2(tcps_rcvpack, tcps_rcvbyte, "\t\t"
+ "{:received-in-sequence-packets/%ju} {N:/packet%s} "
+ "({:received-in-sequence-bytes/%ju} {N:/byte%s}) "
+ "{N:received in-sequence}\n");
+ p2(tcps_rcvduppack, tcps_rcvdupbyte, "\t\t"
+ "{:received-completely-duplicate-packets/%ju} "
+ "{N:/completely duplicate packet%s} "
+ "({:received-completely-duplicate-bytes/%ju} {N:/byte%s})\n");
+ p(tcps_pawsdrop, "\t\t{:received-old-duplicate-packets/%ju} "
+ "{N:/old duplicate packet%s}\n");
+ p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t"
+ "{:received-some-duplicate-packets/%ju} "
+ "{N:/packet%s with some dup. data} "
+ "({:received-some-duplicate-bytes/%ju} {N:/byte%s duped/})\n");
+ p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t{:received-out-of-order/%ju} "
+ "{N:/out-of-order packet%s} "
+ "({:received-out-of-order-bytes/%ju} {N:/byte%s})\n");
+ p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t"
+ "{:received-after-window-packets/%ju} {N:/packet%s} "
+ "({:received-after-window-bytes/%ju} {N:/byte%s}) "
+ "{N:of data after window}\n");
+ p(tcps_rcvwinprobe, "\t\t{:received-window-probes/%ju} "
+ "{N:/window probe%s}\n");
+ p(tcps_rcvwinupd, "\t\t{:receive-window-update-packets/%ju} "
+ "{N:/window update packet%s}\n");
+ p(tcps_rcvafterclose, "\t\t{:received-after-close-packets/%ju} "
+ "{N:/packet%s received after close}\n");
+ p(tcps_rcvbadsum, "\t\t{:discard-bad-checksum/%ju} "
+ "{N:/discarded for bad checksum%s}\n");
+ p(tcps_rcvbadoff, "\t\t{:discard-bad-header-offset/%ju} "
+ "{N:/discarded for bad header offset field%s}\n");
+ p1a(tcps_rcvshort, "\t\t{:discard-too-short/%ju} "
+ "{N:discarded because packet too short}\n");
+ p1a(tcps_rcvmemdrop, "\t\t{:discard-memory-problems/%ju} "
+ "{N:discarded due to memory problems}\n");
+ p(tcps_connattempt, "\t{:connection-requests/%ju} "
+ "{N:/connection request%s}\n");
+ p(tcps_accepts, "\t{:connections-accepts/%ju} "
+ "{N:/connection accept%s}\n");
+ p(tcps_badsyn, "\t{:bad-connection-attempts/%ju} "
+ "{N:/bad connection attempt%s}\n");
+ p(tcps_listendrop, "\t{:listen-queue-overflows/%ju} "
+ "{N:/listen queue overflow%s}\n");
+ p(tcps_badrst, "\t{:ignored-in-window-resets/%ju} "
+ "{N:/ignored RSTs in the window%s}\n");
+ p(tcps_connects, "\t{:connections-established/%ju} "
+ "{N:/connection%s established (including accepts)}\n");
+ p2(tcps_closed, tcps_drops, "\t{:connections-closed/%ju} "
+ "{N:/connection%s closed (including} "
+ "{:connection-drops/%ju} {N:/drop%s})\n");
+ p(tcps_cachedrtt, "\t\t{:connections-updated-rtt-on-close/%ju} "
+ "{N:/connection%s updated cached RTT on close}\n");
+ p(tcps_cachedrttvar, "\t\t"
+ "{:connections-updated-variance-on-close/%ju} "
+ "{N:/connection%s updated cached RTT variance on close}\n");
+ p(tcps_cachedssthresh, "\t\t"
+ "{:connections-updated-ssthresh-on-close/%ju} "
+ "{N:/connection%s updated cached ssthresh on close}\n");
+ p(tcps_conndrops, "\t{:embryonic-connections-dropped/%ju} "
+ "{N:/embryonic connection%s dropped}\n");
+ p2(tcps_rttupdated, tcps_segstimed, "\t{:segments-updated-rtt/%ju} "
+ "{N:/segment%s updated rtt (of} "
+ "{:segment-update-attempts/%ju} {N:/attempt%s})\n");
+ p(tcps_rexmttimeo, "\t{:retransmit-timeouts/%ju} "
+ "{N:/retransmit timeout%s}\n");
+ p(tcps_timeoutdrop, "\t\t"
+ "{:connections-dropped-by-retransmit-timeout/%ju} "
+ "{N:/connection%s dropped by rexmit timeout}\n");
+ p(tcps_persisttimeo, "\t{:persist-timeout/%ju} "
+ "{N:/persist timeout%s}\n");
+ p(tcps_persistdrop, "\t\t"
+ "{:connections-dropped-by-persist-timeout/%ju} "
+ "{N:/connection%s dropped by persist timeout}\n");
+ p(tcps_finwait2_drops, "\t"
+ "{:connections-dropped-by-finwait2-timeout/%ju} "
+ "{N:/Connection%s (fin_wait_2) dropped because of timeout}\n");
+ p(tcps_keeptimeo, "\t{:keepalive-timeout/%ju} "
+ "{N:/keepalive timeout%s}\n");
+ p(tcps_keepprobe, "\t\t{:keepalive-probes/%ju} "
+ "{N:/keepalive probe%s sent}\n");
+ p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} "
+ "{N:/connection%s dropped by keepalive}\n");
+ p(tcps_predack, "\t{:ack-header-predictions/%ju} "
+ "{N:/correct ACK header prediction%s}\n");
+ p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} "
+ "{N:/correct data packet header prediction%s}\n");
+
+ xo_open_container("syncache");
+
+ p3(tcps_sc_added, "\t{:entries-added/%ju} "
+ "{N:/syncache entr%s added}\n");
+ p1a(tcps_sc_retransmitted, "\t\t{:retransmitted/%ju} "
+ "{N:/retransmitted}\n");
+ p1a(tcps_sc_dupsyn, "\t\t{:duplicates/%ju} {N:/dupsyn}\n");
+ p1a(tcps_sc_dropped, "\t\t{:dropped/%ju} {N:/dropped}\n");
+ p1a(tcps_sc_completed, "\t\t{:completed/%ju} {N:/completed}\n");
+ p1a(tcps_sc_bucketoverflow, "\t\t{:bucket-overflow/%ju} "
+ "{N:/bucket overflow}\n");
+ p1a(tcps_sc_cacheoverflow, "\t\t{:cache-overflow/%ju} "
+ "{N:/cache overflow}\n");
+ p1a(tcps_sc_reset, "\t\t{:reset/%ju} {N:/reset}\n");
+ p1a(tcps_sc_stale, "\t\t{:stale/%ju} {N:/stale}\n");
+ p1a(tcps_sc_aborted, "\t\t{:aborted/%ju} {N:/aborted}\n");
+ p1a(tcps_sc_badack, "\t\t{:bad-ack/%ju} {N:/badack}\n");
+ p1a(tcps_sc_unreach, "\t\t{:unreachable/%ju} {N:/unreach}\n");
+ p(tcps_sc_zonefail, "\t\t{:zone-failures/%ju} {N:/zone failure%s}\n");
+ p(tcps_sc_sendcookie, "\t{:sent-cookies/%ju} {N:/cookie%s sent}\n");
+ p(tcps_sc_recvcookie, "\t{:receivd-cookies/%ju} "
+ "{N:/cookie%s received}\n");
+
+ xo_close_container("syncache");
+
+ xo_open_container("hostcache");
+
+ p3(tcps_hc_added, "\t{:entries-added/%ju} "
+ "{N:/hostcache entr%s added}\n");
+ p1a(tcps_hc_bucketoverflow, "\t\t{:buffer-overflows/%ju} "
+ "{N:/bucket overflow}\n");
+
+ xo_close_container("hostcache");
+
+ xo_open_container("sack");
+
+ p(tcps_sack_recovery_episode, "\t{:recovery-episodes/%ju} "
+ "{N:/SACK recovery episode%s}\n");
+ p(tcps_sack_rexmits, "\t{:segment-retransmits/%ju} "
+ "{N:/segment rexmit%s in SACK recovery episodes}\n");
+ p(tcps_sack_rexmit_bytes, "\t{:byte-retransmits/%ju} "
+ "{N:/byte rexmit%s in SACK recovery episodes}\n");
+ p(tcps_sack_rcv_blocks, "\t{:received-blocks/%ju} "
+ "{N:/SACK option%s (SACK blocks) received}\n");
+ p(tcps_sack_send_blocks, "\t{:sent-option-blocks/%ju} "
+ "{N:/SACK option%s (SACK blocks) sent}\n");
+ p1a(tcps_sack_sboverflow, "\t{:scoreboard-overflows/%ju} "
+ "{N:/SACK scoreboard overflow}\n");
+
+ xo_close_container("sack");
+ xo_open_container("ecn");
+
+ p(tcps_ecn_ce, "\t{:ce-packets/%ju} "
+ "{N:/packet%s with ECN CE bit set}\n");
+ p(tcps_ecn_ect0, "\t{:ect0-packets/%ju} "
+ "{N:/packet%s with ECN ECT(0) bit set}\n");
+ p(tcps_ecn_ect1, "\t{:ect1-packets/%ju} "
+ "{N:/packet%s with ECN ECT(1) bit set}\n");
+ p(tcps_ecn_shs, "\t{:handshakes/%ju} "
+ "{N:/successful ECN handshake%s}\n");
+ p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} "
+ "{N:/time%s ECN reduced the congestion window}\n");
+ #undef p
+ #undef p1a
+ #undef p2
+ #undef p2a
+ #undef p3
+ xo_close_container("ecn");
+
+ xo_open_container("TCP connection count by state");
+ xo_emit("{T:/TCP connection count by state}:\n");
+ for (int i = 0; i < TCP_NSTATES; i++) {
+ /*
+ * XXXGL: is there a way in libxo to use %s
+ * in the "content string" of a format
+ * string? I failed to do that, that's why
+ * a temporary buffer is used to construct
+ * format string for xo_emit().
+ */
+ char fmtbuf[80];
+
+ if (sflag > 1 && tcps_states[i] == 0)
+ continue;
+ snprintf(fmtbuf, sizeof(fmtbuf), "\t{:%s/%%ju} "
+ "{Np:/connection ,connections} in %s state\n",
+ tcpstates[i], tcpstates[i]);
+ xo_emit(fmtbuf, (uintmax_t )tcps_states[i]);
+ }
+ xo_close_container("TCP connection count by state");
+
+ xo_close_container("tcp");
}
/*
@@ -742,9 +915,8 @@ tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
void
udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct udpstat udpstat, zerostat;
- size_t len = sizeof udpstat;
- u_long delivered;
+ struct udpstat udpstat;
+ uint64_t delivered;
#ifdef INET6
if (udp_done != 0)
@@ -753,32 +925,36 @@ udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
udp_done = 1;
#endif
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.udp.stats", &udpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- warn("sysctl: net.inet.udp.stats");
- return;
- }
- } else
- kread(off, &udpstat, len);
+ if (fetch_stats("net.inet.udp.stats", off, &udpstat,
+ sizeof(udpstat), kread_counters) != 0)
+ return;
+
+ xo_open_container("udp");
+ xo_emit("{T:/%s}:\n", name);
- printf("%s:\n", name);
#define p(f, m) if (udpstat.f || sflag <= 1) \
- printf(m, udpstat.f, plural(udpstat.f))
+ xo_emit("\t" m, (uintmax_t)udpstat.f, plural(udpstat.f))
#define p1a(f, m) if (udpstat.f || sflag <= 1) \
- printf(m, udpstat.f)
- p(udps_ipackets, "\t%lu datagram%s received\n");
- p1a(udps_hdrops, "\t%lu with incomplete header\n");
- p1a(udps_badlen, "\t%lu with bad data length field\n");
- p1a(udps_badsum, "\t%lu with bad checksum\n");
- p1a(udps_nosum, "\t%lu with no checksum\n");
- p1a(udps_noport, "\t%lu dropped due to no socket\n");
- p(udps_noportbcast,
- "\t%lu broadcast/multicast datagram%s undelivered\n");
- p1a(udps_fullsock, "\t%lu dropped due to full socket buffers\n");
- p1a(udpps_pcbhashmiss, "\t%lu not for hashed pcb\n");
+ xo_emit("\t" m, (uintmax_t)udpstat.f)
+
+ p(udps_ipackets, "{:received-datagrams/%ju} "
+ "{N:/datagram%s received}\n");
+ p1a(udps_hdrops, "{:dropped-incomplete-headers/%ju} "
+ "{N:/with incomplete header}\n");
+ p1a(udps_badlen, "{:dropped-bad-data-length/%ju} "
+ "{N:/with bad data length field}\n");
+ p1a(udps_badsum, "{:dropped-bad-checksum/%ju} "
+ "{N:/with bad checksum}\n");
+ p1a(udps_nosum, "{:dropped-no-checksum/%ju} "
+ "{N:/with no checksum}\n");
+ p1a(udps_noport, "{:dropped-no-socket/%ju} "
+ "{N:/dropped due to no socket}\n");
+ p(udps_noportbcast, "{:dropped-broadcast-multicast/%ju} "
+ "{N:/broadcast\\/multicast datagram%s undelivered}\n");
+ p1a(udps_fullsock, "{:dropped-full-socket-buffer/%ju} "
+ "{N:/dropped due to full socket buffers}\n");
+ p1a(udpps_pcbhashmiss, "{:not-for-hashed-pcb/%ju} "
+ "{N:/not for hashed pcb}\n");
delivered = udpstat.udps_ipackets -
udpstat.udps_hdrops -
udpstat.udps_badlen -
@@ -787,13 +963,15 @@ udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
udpstat.udps_noportbcast -
udpstat.udps_fullsock;
if (delivered || sflag <= 1)
- printf("\t%lu delivered\n", delivered);
- p(udps_opackets, "\t%lu datagram%s output\n");
+ xo_emit("\t{:delivered-packets/%ju} {N:/delivered}\n",
+ (uint64_t)delivered);
+ p(udps_opackets, "{:output-packets/%ju} {N:/datagram%s output}\n");
/* the next statistic is cumulative in udps_noportbcast */
- p(udps_filtermcast,
- "\t%lu time%s multicast source filter matched\n");
+ p(udps_filtermcast, "{:multicast-source-filter-matches/%ju} "
+ "{N:/time%s multicast source filter matched}\n");
#undef p
#undef p1a
+ xo_close_container("udp");
}
/*
@@ -802,49 +980,53 @@ udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
void
carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct carpstats carpstat, zerostat;
- size_t len = sizeof(struct carpstats);
+ struct carpstats carpstat;
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.carp.stats", &carpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet.carp.stats");
- return;
- }
- } else {
- if (off == 0)
- return;
- kread(off, &carpstat, len);
- }
+ if (fetch_stats("net.inet.carp.stats", off, &carpstat,
+ sizeof(carpstat), kread_counters) != 0)
+ return;
- printf("%s:\n", name);
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p(f, m) if (carpstat.f || sflag <= 1) \
- printf(m, (uintmax_t)carpstat.f, plural(carpstat.f))
+ xo_emit(m, (uintmax_t)carpstat.f, plural(carpstat.f))
#define p2(f, m) if (carpstat.f || sflag <= 1) \
- printf(m, (uintmax_t)carpstat.f)
-
- p(carps_ipackets, "\t%ju packet%s received (IPv4)\n");
- p(carps_ipackets6, "\t%ju packet%s received (IPv6)\n");
- p(carps_badttl, "\t\t%ju packet%s discarded for wrong TTL\n");
- p(carps_hdrops, "\t\t%ju packet%s shorter than header\n");
- p(carps_badsum, "\t\t%ju discarded for bad checksum%s\n");
- p(carps_badver, "\t\t%ju discarded packet%s with a bad version\n");
- p2(carps_badlen, "\t\t%ju discarded because packet too short\n");
- p2(carps_badauth, "\t\t%ju discarded for bad authentication\n");
- p2(carps_badvhid, "\t\t%ju discarded for bad vhid\n");
- p2(carps_badaddrs, "\t\t%ju discarded because of a bad address list\n");
- p(carps_opackets, "\t%ju packet%s sent (IPv4)\n");
- p(carps_opackets6, "\t%ju packet%s sent (IPv6)\n");
- p2(carps_onomem, "\t\t%ju send failed due to mbuf memory error\n");
+ xo_emit(m, (uintmax_t)carpstat.f)
+
+ p(carps_ipackets, "\t{:received-inet-packets/%ju} "
+ "{N:/packet%s received (IPv4)}\n");
+ p(carps_ipackets6, "\t{:received-inet6-packets/%ju} "
+ "{N:/packet%s received (IPv6)}\n");
+ p(carps_badttl, "\t\t{:dropped-wrong-ttl/%ju} "
+ "{N:/packet%s discarded for wrong TTL}\n");
+ p(carps_hdrops, "\t\t{:dropped-short-header/%ju} "
+ "{N:/packet%s shorter than header}\n");
+ p(carps_badsum, "\t\t{:dropped-bad-checksum/%ju} "
+ "{N:/discarded for bad checksum%s}\n");
+ p(carps_badver, "\t\t{:dropped-bad-version/%ju} "
+ "{N:/discarded packet%s with a bad version}\n");
+ p2(carps_badlen, "\t\t{:dropped-short-packet/%ju} "
+ "{N:/discarded because packet too short}\n");
+ p2(carps_badauth, "\t\t{:dropped-bad-authentication/%ju} "
+ "{N:/discarded for bad authentication}\n");
+ p2(carps_badvhid, "\t\t{:dropped-bad-vhid/%ju} "
+ "{N:/discarded for bad vhid}\n");
+ p2(carps_badaddrs, "\t\t{:dropped-bad-address-list/%ju} "
+ "{N:/discarded because of a bad address list}\n");
+ p(carps_opackets, "\t{:sent-inet-packets/%ju} "
+ "{N:/packet%s sent (IPv4)}\n");
+ p(carps_opackets6, "\t{:sent-inet6-packets/%ju} "
+ "{N:/packet%s sent (IPv6)}\n");
+ p2(carps_onomem, "\t\t{:send-failed-memory-error/%ju} "
+ "{N:/send failed due to mbuf memory error}\n");
#if notyet
- p(carps_ostates, "\t\t%s state update%s sent\n");
+ p(carps_ostates, "\t\t{:send-state-updates/%s} "
+ "{N:/state update%s sent}\n");
#endif
#undef p
#undef p2
+ xo_close_container(name);
}
/*
@@ -853,62 +1035,83 @@ carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
void
ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct ipstat ipstat, zerostat;
- size_t len = sizeof ipstat;
+ struct ipstat ipstat;
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.ip.stats", &ipstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- warn("sysctl: net.inet.ip.stats");
- return;
- }
- } else
- kread(off, &ipstat, len);
+ if (fetch_stats("net.inet.ip.stats", off, &ipstat,
+ sizeof(ipstat), kread_counters) != 0)
+ return;
- printf("%s:\n", name);
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p(f, m) if (ipstat.f || sflag <= 1) \
- printf(m, ipstat.f, plural(ipstat.f))
+ xo_emit(m, (uintmax_t )ipstat.f, plural(ipstat.f))
#define p1a(f, m) if (ipstat.f || sflag <= 1) \
- printf(m, ipstat.f)
-
- p(ips_total, "\t%lu total packet%s received\n");
- p(ips_badsum, "\t%lu bad header checksum%s\n");
- p1a(ips_toosmall, "\t%lu with size smaller than minimum\n");
- p1a(ips_tooshort, "\t%lu with data size < data length\n");
- p1a(ips_toolong, "\t%lu with ip length > max ip packet size\n");
- p1a(ips_badhlen, "\t%lu with header length < data size\n");
- p1a(ips_badlen, "\t%lu with data length < header length\n");
- p1a(ips_badoptions, "\t%lu with bad options\n");
- p1a(ips_badvers, "\t%lu with incorrect version number\n");
- p(ips_fragments, "\t%lu fragment%s received\n");
- p(ips_fragdropped, "\t%lu fragment%s dropped (dup or out of space)\n");
- p(ips_fragtimeout, "\t%lu fragment%s dropped after timeout\n");
- p(ips_reassembled, "\t%lu packet%s reassembled ok\n");
- p(ips_delivered, "\t%lu packet%s for this host\n");
- p(ips_noproto, "\t%lu packet%s for unknown/unsupported protocol\n");
- p(ips_forward, "\t%lu packet%s forwarded");
- p(ips_fastforward, " (%lu packet%s fast forwarded)");
+ xo_emit(m, (uintmax_t )ipstat.f)
+
+ p(ips_total, "\t{:received-packets/%ju} "
+ "{N:/total packet%s received}\n");
+ p(ips_badsum, "\t{:dropped-bad-checksum/%ju} "
+ "{N:/bad header checksum%s}\n");
+ p1a(ips_toosmall, "\t{:dropped-below-minimum-size/%ju} "
+ "{N:/with size smaller than minimum}\n");
+ p1a(ips_tooshort, "\t{:dropped-short-packets/%ju} "
+ "{N:/with data size < data length}\n");
+ p1a(ips_toolong, "\t{:dropped-too-long/%ju} "
+ "{N:/with ip length > max ip packet size}\n");
+ p1a(ips_badhlen, "\t{:dropped-short-header-length/%ju} "
+ "{N:/with header length < data size}\n");
+ p1a(ips_badlen, "\t{:dropped-short-data/%ju} "
+ "{N:/with data length < header length}\n");
+ p1a(ips_badoptions, "\t{:dropped-bad-options/%ju} "
+ "{N:/with bad options}\n");
+ p1a(ips_badvers, "\t{:dropped-bad-version/%ju} "
+ "{N:/with incorrect version number}\n");
+ p(ips_fragments, "\t{:received-fragments/%ju} "
+ "{N:/fragment%s received}\n");
+ p(ips_fragdropped, "\t{:dropped-fragments/%ju} "
+ "{N:/fragment%s dropped (dup or out of space)}\n");
+ p(ips_fragtimeout, "\t{:dropped-fragments-after-timeout/%ju} "
+ "{N:/fragment%s dropped after timeout}\n");
+ p(ips_reassembled, "\t{:reassembled-packets/%ju} "
+ "{N:/packet%s reassembled ok}\n");
+ p(ips_delivered, "\t{:received-local-packets/%ju} "
+ "{N:/packet%s for this host}\n");
+ p(ips_noproto, "\t{:dropped-unknown-protocol/%ju} "
+ "{N:/packet%s for unknown\\/unsupported protocol}\n");
+ p(ips_forward, "\t{:forwarded-packets/%ju} "
+ "{N:/packet%s forwarded}");
+ p(ips_fastforward, " ({:fast-forwarded-packets/%ju} "
+ "{N:/packet%s fast forwarded})");
if (ipstat.ips_forward || sflag <= 1)
- putchar('\n');
- p(ips_cantforward, "\t%lu packet%s not forwardable\n");
- p(ips_notmember,
- "\t%lu packet%s received for unknown multicast group\n");
- p(ips_redirectsent, "\t%lu redirect%s sent\n");
- p(ips_localout, "\t%lu packet%s sent from this host\n");
- p(ips_rawout, "\t%lu packet%s sent with fabricated ip header\n");
- p(ips_odropped,
- "\t%lu output packet%s dropped due to no bufs, etc.\n");
- p(ips_noroute, "\t%lu output packet%s discarded due to no route\n");
- p(ips_fragmented, "\t%lu output datagram%s fragmented\n");
- p(ips_ofragments, "\t%lu fragment%s created\n");
- p(ips_cantfrag, "\t%lu datagram%s that can't be fragmented\n");
- p(ips_nogif, "\t%lu tunneling packet%s that can't find gif\n");
- p(ips_badaddr, "\t%lu datagram%s with bad address in header\n");
+ xo_emit("\n");
+ p(ips_cantforward, "\t{:packets-cannot-forward/%ju} "
+ "{N:/packet%s not forwardable}\n");
+ p(ips_notmember, "\t{:received-unknown-multicast-group/%ju} "
+ "{N:/packet%s received for unknown multicast group}\n");
+ p(ips_redirectsent, "\t{:redirects-sent/%ju} "
+ "{N:/redirect%s sent}\n");
+ p(ips_localout, "\t{:sent-packets/%ju} "
+ "{N:/packet%s sent from this host}\n");
+ p(ips_rawout, "\t{:send-packets-fabricated-header/%ju} "
+ "{N:/packet%s sent with fabricated ip header}\n");
+ p(ips_odropped, "\t{:discard-no-mbufs/%ju} "
+ "{N:/output packet%s dropped due to no bufs, etc.}\n");
+ p(ips_noroute, "\t{:discard-no-route/%ju} "
+ "{N:/output packet%s discarded due to no route}\n");
+ p(ips_fragmented, "\t{:sent-fragments/%ju} "
+ "{N:/output datagram%s fragmented}\n");
+ p(ips_ofragments, "\t{:fragments-created/%ju} "
+ "{N:/fragment%s created}\n");
+ p(ips_cantfrag, "\t{:discard-cannot-fragment/%ju} "
+ "{N:/datagram%s that can't be fragmented}\n");
+ p(ips_nogif, "\t{:discard-tunnel-no-gif/%ju} "
+ "{N:/tunneling packet%s that can't find gif}\n");
+ p(ips_badaddr, "\t{:discard-bad-address/%ju} "
+ "{N:/datagram%s with bad address in header}\n");
#undef p
#undef p1a
+ xo_close_container(name);
}
/*
@@ -917,42 +1120,46 @@ ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
void
arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct arpstat arpstat, zerostat;
- size_t len = sizeof(arpstat);
+ struct arpstat arpstat;
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- warn("sysctl: net.link.ether.arp.stats");
- return;
- }
- } else
- kread(off, &arpstat, len);
+ if (fetch_stats("net.link.ether.arp.stats", off, &arpstat,
+ sizeof(arpstat), kread_counters) != 0)
+ return;
- printf("%s:\n", name);
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p(f, m) if (arpstat.f || sflag <= 1) \
- printf(m, arpstat.f, plural(arpstat.f))
+ xo_emit("\t" m, (uintmax_t)arpstat.f, plural(arpstat.f))
#define p2(f, m) if (arpstat.f || sflag <= 1) \
- printf(m, arpstat.f, pluralies(arpstat.f))
-
- p(txrequests, "\t%lu ARP request%s sent\n");
- p2(txreplies, "\t%lu ARP repl%s sent\n");
- p(rxrequests, "\t%lu ARP request%s received\n");
- p2(rxreplies, "\t%lu ARP repl%s received\n");
- p(received, "\t%lu ARP packet%s received\n");
- p(dropped, "\t%lu total packet%s dropped due to no ARP entry\n");
- p(timeouts, "\t%lu ARP entry%s timed out\n");
- p(dupips, "\t%lu Duplicate IP%s seen\n");
+ xo_emit("\t" m, (uintmax_t)arpstat.f, pluralies(arpstat.f))
+
+ p(txrequests, "{:sent-requests/%ju} {N:/ARP request%s sent}\n");
+ p2(txreplies, "{:sent-replies/%ju} {N:/ARP repl%s sent}\n");
+ p(rxrequests, "{:received-requests/%ju} "
+ "{N:/ARP request%s received}\n");
+ p2(rxreplies, "{:received-replies/%ju} "
+ "{N:/ARP repl%s received}\n");
+ p(received, "{:received-packers/%ju} "
+ "{N:/ARP packet%s received}\n");
+ p(dropped, "{:dropped-no-entry/%ju} "
+ "{N:/total packet%s dropped due to no ARP entry}\n");
+ p(timeouts, "{:entries-timeout/%ju} "
+ "{N:/ARP entry%s timed out}\n");
+ p(dupips, "{:dropped-duplicate-address/%ju} "
+ "{N:/Duplicate IP%s seen}\n");
#undef p
#undef p2
+ xo_close_container(name);
}
+#ifndef __rtems__
+static const char *icmpnames[ICMP_MAXTYPE + 1] = {
+#else /* __rtems__ */
static const char *const icmpnames[ICMP_MAXTYPE + 1] = {
+#endif /* __rtems__ */
"echo reply", /* RFC 792 */
"#1",
"#2",
@@ -1002,69 +1209,91 @@ static const char *const icmpnames[ICMP_MAXTYPE + 1] = {
void
icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct icmpstat icmpstat, zerostat;
- int i, first;
+ struct icmpstat icmpstat;
size_t len;
+ int i, first;
- len = sizeof icmpstat;
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.icmp.stats", &icmpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- warn("sysctl: net.inet.icmp.stats");
- return;
- }
- } else
- kread(off, &icmpstat, len);
+ if (fetch_stats("net.inet.icmp.stats", off, &icmpstat,
+ sizeof(icmpstat), kread_counters) != 0)
+ return;
- printf("%s:\n", name);
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p(f, m) if (icmpstat.f || sflag <= 1) \
- printf(m, icmpstat.f, plural(icmpstat.f))
+ xo_emit(m, icmpstat.f, plural(icmpstat.f))
#define p1a(f, m) if (icmpstat.f || sflag <= 1) \
- printf(m, icmpstat.f)
+ xo_emit(m, icmpstat.f)
#define p2(f, m) if (icmpstat.f || sflag <= 1) \
- printf(m, icmpstat.f, plurales(icmpstat.f))
+ xo_emit(m, icmpstat.f, plurales(icmpstat.f))
+
+ p(icps_error, "\t{:icmp-calls/%lu} "
+ "{N:/call%s to icmp_error}\n");
+ p(icps_oldicmp, "\t{:errors-not-from-message/%lu} "
+ "{N:/error%s not generated in response to an icmp message}\n");
- p(icps_error, "\t%lu call%s to icmp_error\n");
- p(icps_oldicmp,
- "\t%lu error%s not generated in response to an icmp message\n");
- for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++)
+ for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) {
if (icmpstat.icps_outhist[i] != 0) {
if (first) {
- printf("\tOutput histogram:\n");
+ xo_open_list("output-histogram");
+ xo_emit("\tOutput histogram:\n");
first = 0;
}
+ xo_open_instance("output-histogram");
if (icmpnames[i] != NULL)
- printf("\t\t%s: %lu\n", icmpnames[i],
- icmpstat.icps_outhist[i]);
+ xo_emit("\t\t{k:name/%s}: {:count/%lu}\n",
+ icmpnames[i], icmpstat.icps_outhist[i]);
else
- printf("\t\tunknown ICMP #%d: %lu\n", i,
- icmpstat.icps_outhist[i]);
+ xo_emit("\t\tunknown ICMP #{k:name/%d}: "
+ "{:count/%lu}\n",
+ i, icmpstat.icps_outhist[i]);
+ xo_close_instance("output-histogram");
}
- p(icps_badcode, "\t%lu message%s with bad code fields\n");
- p(icps_tooshort, "\t%lu message%s less than the minimum length\n");
- p(icps_checksum, "\t%lu message%s with bad checksum\n");
- p(icps_badlen, "\t%lu message%s with bad length\n");
- p1a(icps_bmcastecho, "\t%lu multicast echo requests ignored\n");
- p1a(icps_bmcasttstamp, "\t%lu multicast timestamp requests ignored\n");
- for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++)
+ }
+ if (!first)
+ xo_close_list("output-histogram");
+
+ p(icps_badcode, "\t{:dropped-bad-code/%lu} "
+ "{N:/message%s with bad code fields}\n");
+ p(icps_tooshort, "\t{:dropped-too-short/%lu} "
+ "{N:/message%s less than the minimum length}\n");
+ p(icps_checksum, "\t{:dropped-bad-checksum/%lu} "
+ "{N:/message%s with bad checksum}\n");
+ p(icps_badlen, "\t{:dropped-bad-length/%lu} "
+ "{N:/message%s with bad length}\n");
+ p1a(icps_bmcastecho, "\t{:dropped-multicast-echo/%lu} "
+ "{N:/multicast echo requests ignored}\n");
+ p1a(icps_bmcasttstamp, "\t{:dropped-multicast-timestamp/%lu} "
+ "{N:/multicast timestamp requests ignored}\n");
+
+ for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) {
if (icmpstat.icps_inhist[i] != 0) {
if (first) {
- printf("\tInput histogram:\n");
+ xo_open_list("input-histogram");
+ xo_emit("\tInput histogram:\n");
first = 0;
}
+ xo_open_instance("input-histogram");
if (icmpnames[i] != NULL)
- printf("\t\t%s: %lu\n", icmpnames[i],
- icmpstat.icps_inhist[i]);
+ xo_emit("\t\t{k:name/%s}: {:count/%lu}\n",
+ icmpnames[i],
+ icmpstat.icps_inhist[i]);
else
- printf("\t\tunknown ICMP #%d: %lu\n", i,
- icmpstat.icps_inhist[i]);
+ xo_emit(
+ "\t\tunknown ICMP #{k:name/%d}: {:count/%lu}\n",
+ i, icmpstat.icps_inhist[i]);
+ xo_close_instance("input-histogram");
}
- p(icps_reflect, "\t%lu message response%s generated\n");
- p2(icps_badaddr, "\t%lu invalid return address%s\n");
- p(icps_noroute, "\t%lu no return route%s\n");
+ }
+ if (!first)
+ xo_close_list("input-histogram");
+
+ p(icps_reflect, "\t{:sent-packets/%lu} "
+ "{N:/message response%s generated}\n");
+ p2(icps_badaddr, "\t{:discard-invalid-return-address/%lu} "
+ "{N:/invalid return address%s}\n");
+ p(icps_noroute, "\t{:discard-no-route/%lu} "
+ "{N:/no return route%s}\n");
#undef p
#undef p1a
#undef p2
@@ -1073,51 +1302,12 @@ icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) <
0)
return;
- printf("\tICMP address mask responses are %sabled\n",
- i ? "en" : "dis");
+ xo_emit("\tICMP address mask responses are "
+ "{q:icmp-address-responses/%sabled}\n", i ? "en" : "dis");
}
-}
-#ifndef BURN_BRIDGES
-/*
- * Dump IGMP statistics structure (pre 8.x kernel).
- */
-static void
-igmp_stats_live_old(const char *name)
-{
- struct oigmpstat oigmpstat, zerostat;
- size_t len = sizeof(oigmpstat);
-
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.igmp.stats", &oigmpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- warn("sysctl: net.inet.igmp.stats");
- return;
- }
-
- printf("%s:\n", name);
-
-#define p(f, m) if (oigmpstat.f || sflag <= 1) \
- printf(m, oigmpstat.f, plural(oigmpstat.f))
-#define py(f, m) if (oigmpstat.f || sflag <= 1) \
- printf(m, oigmpstat.f, oigmpstat.f != 1 ? "ies" : "y")
- p(igps_rcv_total, "\t%u message%s received\n");
- p(igps_rcv_tooshort, "\t%u message%s received with too few bytes\n");
- p(igps_rcv_badsum, "\t%u message%s received with bad checksum\n");
- py(igps_rcv_queries, "\t%u membership quer%s received\n");
- py(igps_rcv_badqueries,
- "\t%u membership quer%s received with invalid field(s)\n");
- p(igps_rcv_reports, "\t%u membership report%s received\n");
- p(igps_rcv_badreports,
- "\t%u membership report%s received with invalid field(s)\n");
- p(igps_rcv_ourreports,
-"\t%u membership report%s received for groups to which we belong\n");
- p(igps_snd_reports, "\t%u membership report%s sent\n");
-#undef p
-#undef py
+ xo_close_container(name);
}
-#endif /* !BURN_BRIDGES */
/*
* Dump IGMP statistics structure.
@@ -1125,80 +1315,66 @@ igmp_stats_live_old(const char *name)
void
igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct igmpstat igmpstat, zerostat;
- size_t len;
-
-#ifndef BURN_BRIDGES
- if (live) {
- /*
- * Detect if we are being run against a pre-IGMPv3 kernel.
- * We cannot do this for a core file as the legacy
- * struct igmpstat has no size field, nor does it
- * export it in any readily-available symbols.
- */
- len = 0;
- if (sysctlbyname("net.inet.igmp.stats", NULL, &len, NULL,
- 0) < 0) {
- warn("sysctl: net.inet.igmp.stats");
- return;
- }
- if (len < sizeof(igmpstat)) {
- igmp_stats_live_old(name);
- return;
- }
- }
-#endif /* !BURN_BRIDGES */
+ struct igmpstat igmpstat;
- len = sizeof(igmpstat);
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.igmp.stats", &igmpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- warn("sysctl: net.inet.igmp.stats");
- return;
- }
- } else {
- len = sizeof(igmpstat);
- kread(off, &igmpstat, len);
- }
+ if (fetch_stats("net.inet.igmp.stats", 0, &igmpstat,
+ sizeof(igmpstat), kread) != 0)
+ return;
if (igmpstat.igps_version != IGPS_VERSION_3) {
- warnx("%s: version mismatch (%d != %d)", __func__,
+ xo_warnx("%s: version mismatch (%d != %d)", __func__,
igmpstat.igps_version, IGPS_VERSION_3);
}
if (igmpstat.igps_len != IGPS_VERSION3_LEN) {
- warnx("%s: size mismatch (%d != %d)", __func__,
+ xo_warnx("%s: size mismatch (%d != %d)", __func__,
igmpstat.igps_len, IGPS_VERSION3_LEN);
}
- printf("%s:\n", name);
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p64(f, m) if (igmpstat.f || sflag <= 1) \
- printf(m, (uintmax_t) igmpstat.f, plural(igmpstat.f))
+ xo_emit(m, (uintmax_t) igmpstat.f, plural(igmpstat.f))
#define py64(f, m) if (igmpstat.f || sflag <= 1) \
- printf(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f))
- p64(igps_rcv_total, "\t%ju message%s received\n");
- p64(igps_rcv_tooshort, "\t%ju message%s received with too few bytes\n");
- p64(igps_rcv_badttl, "\t%ju message%s received with wrong TTL\n");
- p64(igps_rcv_badsum, "\t%ju message%s received with bad checksum\n");
- py64(igps_rcv_v1v2_queries, "\t%ju V1/V2 membership quer%s received\n");
- py64(igps_rcv_v3_queries, "\t%ju V3 membership quer%s received\n");
- py64(igps_rcv_badqueries,
- "\t%ju membership quer%s received with invalid field(s)\n");
- py64(igps_rcv_gen_queries, "\t%ju general quer%s received\n");
- py64(igps_rcv_group_queries, "\t%ju group quer%s received\n");
- py64(igps_rcv_gsr_queries, "\t%ju group-source quer%s received\n");
- py64(igps_drop_gsr_queries, "\t%ju group-source quer%s dropped\n");
- p64(igps_rcv_reports, "\t%ju membership report%s received\n");
- p64(igps_rcv_badreports,
- "\t%ju membership report%s received with invalid field(s)\n");
- p64(igps_rcv_ourreports,
-"\t%ju membership report%s received for groups to which we belong\n");
- p64(igps_rcv_nora, "\t%ju V3 report%s received without Router Alert\n");
- p64(igps_snd_reports, "\t%ju membership report%s sent\n");
+ xo_emit(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f))
+
+ p64(igps_rcv_total, "\t{:received-messages/%ju} "
+ "{N:/message%s received}\n");
+ p64(igps_rcv_tooshort, "\t{:dropped-too-short/%ju} "
+ "{N:/message%s received with too few bytes}\n");
+ p64(igps_rcv_badttl, "\t{:dropped-wrong-ttl/%ju} "
+ "{N:/message%s received with wrong TTL}\n");
+ p64(igps_rcv_badsum, "\t{:dropped-bad-checksum/%ju} "
+ "{N:/message%s received with bad checksum}\n");
+ py64(igps_rcv_v1v2_queries, "\t{:received-membership-queries/%ju} "
+ "{N:/V1\\/V2 membership quer%s received}\n");
+ py64(igps_rcv_v3_queries, "\t{:received-v3-membership-queries/%ju} "
+ "{N:/V3 membership quer%s received}\n");
+ py64(igps_rcv_badqueries, "\t{:dropped-membership-queries/%ju} "
+ "{N:/membership quer%s received with invalid field(s)}\n");
+ py64(igps_rcv_gen_queries, "\t{:received-general-queries/%ju} "
+ "{N:/general quer%s received}\n");
+ py64(igps_rcv_group_queries, "\t{:received-group-queries/%ju} "
+ "{N:/group quer%s received}\n");
+ py64(igps_rcv_gsr_queries, "\t{:received-group-source-queries/%ju} "
+ "{N:/group-source quer%s received}\n");
+ py64(igps_drop_gsr_queries, "\t{:dropped-group-source-queries/%ju} "
+ "{N:/group-source quer%s dropped}\n");
+ p64(igps_rcv_reports, "\t{:received-membership-requests/%ju} "
+ "{N:/membership report%s received}\n");
+ p64(igps_rcv_badreports, "\t{:dropped-membership-reports/%ju} "
+ "{N:/membership report%s received with invalid field(s)}\n");
+ p64(igps_rcv_ourreports, "\t"
+ "{:received-membership-reports-matching/%ju} "
+ "{N:/membership report%s received for groups to which we belong}"
+ "\n");
+ p64(igps_rcv_nora, "\t{:received-v3-reports-no-router-alert/%ju} "
+ "{N:/V3 report%s received without Router Alert}\n");
+ p64(igps_snd_reports, "\t{:sent-membership-reports/%ju} "
+ "{N:/membership report%s sent}\n");
#undef p64
#undef py64
+ xo_close_container(name);
}
/*
@@ -1208,72 +1384,86 @@ void
pim_stats(u_long off __unused, const char *name, int af1 __unused,
int proto __unused)
{
- struct pimstat pimstat, zerostat;
- size_t len = sizeof pimstat;
+ struct pimstat pimstat;
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.pim.stats", &pimstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet.pim.stats");
- return;
- }
- } else {
- if (off == 0)
- return;
- kread(off, &pimstat, len);
- }
+ if (fetch_stats("net.inet.pim.stats", off, &pimstat,
+ sizeof(pimstat), kread_counters) != 0)
+ return;
- printf("%s:\n", name);
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p(f, m) if (pimstat.f || sflag <= 1) \
- printf(m, (uintmax_t)pimstat.f, plural(pimstat.f))
+ xo_emit(m, (uintmax_t)pimstat.f, plural(pimstat.f))
#define py(f, m) if (pimstat.f || sflag <= 1) \
- printf(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y")
- p(pims_rcv_total_msgs, "\t%ju message%s received\n");
- p(pims_rcv_total_bytes, "\t%ju byte%s received\n");
- p(pims_rcv_tooshort, "\t%ju message%s received with too few bytes\n");
- p(pims_rcv_badsum, "\t%ju message%s received with bad checksum\n");
- p(pims_rcv_badversion, "\t%ju message%s received with bad version\n");
- p(pims_rcv_registers_msgs, "\t%ju data register message%s received\n");
- p(pims_rcv_registers_bytes, "\t%ju data register byte%s received\n");
- p(pims_rcv_registers_wrongiif,
- "\t%ju data register message%s received on wrong iif\n");
- p(pims_rcv_badregisters, "\t%ju bad register%s received\n");
- p(pims_snd_registers_msgs, "\t%ju data register message%s sent\n");
- p(pims_snd_registers_bytes, "\t%ju data register byte%s sent\n");
+ xo_emit(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y")
+
+ p(pims_rcv_total_msgs, "\t{:received-messages/%ju} "
+ "{N:/message%s received}\n");
+ p(pims_rcv_total_bytes, "\t{:received-bytes/%ju} "
+ "{N:/byte%s received}\n");
+ p(pims_rcv_tooshort, "\t{:dropped-too-short/%ju} "
+ "{N:/message%s received with too few bytes}\n");
+ p(pims_rcv_badsum, "\t{:dropped-bad-checksum/%ju} "
+ "{N:/message%s received with bad checksum}\n");
+ p(pims_rcv_badversion, "\t{:dropped-bad-version/%ju} "
+ "{N:/message%s received with bad version}\n");
+ p(pims_rcv_registers_msgs, "\t{:received-data-register-messages/%ju} "
+ "{N:/data register message%s received}\n");
+ p(pims_rcv_registers_bytes, "\t{:received-data-register-bytes/%ju} "
+ "{N:/data register byte%s received}\n");
+ p(pims_rcv_registers_wrongiif, "\t"
+ "{:received-data-register-wrong-interface/%ju} "
+ "{N:/data register message%s received on wrong iif}\n");
+ p(pims_rcv_badregisters, "\t{:received-bad-registers/%ju} "
+ "{N:/bad register%s received}\n");
+ p(pims_snd_registers_msgs, "\t{:sent-data-register-messages/%ju} "
+ "{N:/data register message%s sent}\n");
+ p(pims_snd_registers_bytes, "\t{:sent-data-register-bytes/%ju} "
+ "{N:/data register byte%s sent}\n");
#undef p
#undef py
+ xo_close_container(name);
}
/*
* Pretty print an Internet address (net address + port).
*/
void
-inetprint(struct in_addr *in, int port, const char *proto, int num_port)
+inetprint(const char *container, struct in_addr *in, int port,
+ const char *proto, int num_port, const int af1)
{
struct servent *sp = 0;
char line[80], *cp;
int width;
+ if (container)
+ xo_open_container(container);
+
if (Wflag)
sprintf(line, "%s.", inetname(in));
else
sprintf(line, "%.*s.", (Aflag && !num_port) ? 12 : 16, inetname(in));
- cp = index(line, '\0');
+ cp = strchr(line, '\0');
if (!num_port && port)
sp = getservbyport((int)port, proto);
if (sp || port == 0)
sprintf(cp, "%.15s ", sp ? sp->s_name : "*");
else
sprintf(cp, "%d ", ntohs((u_short)port));
- width = (Aflag && !Wflag) ? 18 : 22;
+ width = (Aflag && !Wflag) ? 18 :
+ ((!Wflag || af1 == AF_INET) ? 22 : 45);
if (Wflag)
- printf("%-*s ", width, line);
+ xo_emit("{d:target/%-*s} ", width, line);
else
- printf("%-*.*s ", width, width, line);
+ xo_emit("{d:target/%-*.*s} ", width, width, line);
+
+ int alen = cp - line - 1, plen = strlen(cp) - 1;
+ xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen,
+ plen, cp);
+
+ if (container)
+ xo_close_container(container);
}
/*
@@ -1299,7 +1489,7 @@ inetname(struct in_addr *inp)
if (np)
cp = np->n_name;
}
- if (cp == 0) {
+ if (cp == NULL) {
hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET);
if (hp) {
cp = hp->h_name;
diff --git a/freebsd/usr.bin/netstat/inet6.c b/freebsd/usr.bin/netstat/inet6.c
index 07086318..941a2000 100644
--- a/freebsd/usr.bin/netstat/inet6.c
+++ b/freebsd/usr.bin/netstat/inet6.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/* BSDI inet.c,v 2.3 1995/10/24 02:19:29 prb Exp */
/*-
* Copyright (c) 1983, 1988, 1993
@@ -36,6 +40,9 @@ static char sccsid[] = "@(#)inet6.c 8.4 (Berkeley) 4/20/94";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -46,11 +53,9 @@ __FBSDID("$FreeBSD$");
#include <sys/ioctl.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
-#include <sys/sysctl.h>
#include <net/route.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -67,12 +72,15 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include <stdint.h>
#include <stdio.h>
+#include <stdbool.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
+#include <libxo/xo.h>
#include "netstat.h"
-
-struct socket sockb;
+#ifdef __rtems__
+#include "rtems-bsd-netstat-inet6-data.h"
+#endif /* __rtems__ */
char *inet6name(struct in6_addr *);
@@ -211,11 +219,11 @@ static const char *ip6nh[] = {
"#129",
"#130",
"#131",
- "#132",
+ "SCTP",
"#133",
"#134",
"#135",
- "#136",
+ "UDPLite",
"#137",
"#138",
"#139",
@@ -337,7 +345,7 @@ static const char *ip6nh[] = {
"#255",
};
-static char *srcrule_str[] = {
+static const char *srcrule_str[] = {
"first candidate",
"same address",
"appropriate scope",
@@ -347,7 +355,7 @@ static char *srcrule_str[] = {
"matching label",
"public/temporary address",
"alive interface",
- "preferred interface",
+ "better virtual status",
"preferred source",
"rule #11",
"rule #12",
@@ -362,165 +370,248 @@ static char *srcrule_str[] = {
void
ip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct ip6stat ip6stat, zerostat;
+ struct ip6stat ip6stat;
int first, i;
- size_t len;
-
- len = sizeof ip6stat;
- if (live) {
- memset(&ip6stat, 0, len);
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet6.ip6.stats", &ip6stat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet6.ip6.stats");
- return;
- }
- } else
- kread(off, &ip6stat, len);
- printf("%s:\n", name);
+ if (fetch_stats("net.inet6.ip6.stats", off, &ip6stat,
+ sizeof(ip6stat), kread_counters) != 0)
+ return;
+
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p(f, m) if (ip6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)ip6stat.f, plural(ip6stat.f))
+ xo_emit(m, (uintmax_t)ip6stat.f, plural(ip6stat.f))
#define p1a(f, m) if (ip6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)ip6stat.f)
-
- p(ip6s_total, "\t%ju total packet%s received\n");
- p1a(ip6s_toosmall, "\t%ju with size smaller than minimum\n");
- p1a(ip6s_tooshort, "\t%ju with data size < data length\n");
- p1a(ip6s_badoptions, "\t%ju with bad options\n");
- p1a(ip6s_badvers, "\t%ju with incorrect version number\n");
- p(ip6s_fragments, "\t%ju fragment%s received\n");
- p(ip6s_fragdropped, "\t%ju fragment%s dropped (dup or out of space)\n");
- p(ip6s_fragtimeout, "\t%ju fragment%s dropped after timeout\n");
- p(ip6s_fragoverflow, "\t%ju fragment%s that exceeded limit\n");
- p(ip6s_reassembled, "\t%ju packet%s reassembled ok\n");
- p(ip6s_delivered, "\t%ju packet%s for this host\n");
- p(ip6s_forward, "\t%ju packet%s forwarded\n");
- p(ip6s_cantforward, "\t%ju packet%s not forwardable\n");
- p(ip6s_redirectsent, "\t%ju redirect%s sent\n");
- p(ip6s_localout, "\t%ju packet%s sent from this host\n");
- p(ip6s_rawout, "\t%ju packet%s sent with fabricated ip header\n");
- p(ip6s_odropped, "\t%ju output packet%s dropped due to no bufs, etc.\n");
- p(ip6s_noroute, "\t%ju output packet%s discarded due to no route\n");
- p(ip6s_fragmented, "\t%ju output datagram%s fragmented\n");
- p(ip6s_ofragments, "\t%ju fragment%s created\n");
- p(ip6s_cantfrag, "\t%ju datagram%s that can't be fragmented\n");
- p(ip6s_badscope, "\t%ju packet%s that violated scope rules\n");
- p(ip6s_notmember, "\t%ju multicast packet%s which we don't join\n");
+ xo_emit(m, (uintmax_t)ip6stat.f)
+
+ p(ip6s_total, "\t{:received-packets/%ju} "
+ "{N:/total packet%s received}\n");
+ p1a(ip6s_toosmall, "\t{:dropped-below-minimum-size/%ju} "
+ "{N:/with size smaller than minimum}\n");
+ p1a(ip6s_tooshort, "\t{:dropped-short-packets/%ju} "
+ "{N:/with data size < data length}\n");
+ p1a(ip6s_badoptions, "\t{:dropped-bad-options/%ju} "
+ "{N:/with bad options}\n");
+ p1a(ip6s_badvers, "\t{:dropped-bad-version/%ju} "
+ "{N:/with incorrect version number}\n");
+ p(ip6s_fragments, "\t{:received-fragments/%ju} "
+ "{N:/fragment%s received}\n");
+ p(ip6s_fragdropped, "\t{:dropped-fragment/%ju} "
+ "{N:/fragment%s dropped (dup or out of space)}\n");
+ p(ip6s_fragtimeout, "\t{:dropped-fragment-after-timeout/%ju} "
+ "{N:/fragment%s dropped after timeout}\n");
+ p(ip6s_fragoverflow, "\t{:dropped-fragments-overflow/%ju} "
+ "{N:/fragment%s that exceeded limit}\n");
+ p(ip6s_reassembled, "\t{:reassembled-packets/%ju} "
+ "{N:/packet%s reassembled ok}\n");
+ p(ip6s_delivered, "\t{:received-local-packets/%ju} "
+ "{N:/packet%s for this host}\n");
+ p(ip6s_forward, "\t{:forwarded-packets/%ju} "
+ "{N:/packet%s forwarded}\n");
+ p(ip6s_cantforward, "\t{:packets-not-forwardable/%ju} "
+ "{N:/packet%s not forwardable}\n");
+ p(ip6s_redirectsent, "\t{:sent-redirects/%ju} "
+ "{N:/redirect%s sent}\n");
+ p(ip6s_localout, "\t{:sent-packets/%ju} "
+ "{N:/packet%s sent from this host}\n");
+ p(ip6s_rawout, "\t{:send-packets-fabricated-header/%ju} "
+ "{N:/packet%s sent with fabricated ip header}\n");
+ p(ip6s_odropped, "\t{:discard-no-mbufs/%ju} "
+ "{N:/output packet%s dropped due to no bufs, etc.}\n");
+ p(ip6s_noroute, "\t{:discard-no-route/%ju} "
+ "{N:/output packet%s discarded due to no route}\n");
+ p(ip6s_fragmented, "\t{:sent-fragments/%ju} "
+ "{N:/output datagram%s fragmented}\n");
+ p(ip6s_ofragments, "\t{:fragments-created/%ju} "
+ "{N:/fragment%s created}\n");
+ p(ip6s_cantfrag, "\t{:discard-cannot-fragment/%ju} "
+ "{N:/datagram%s that can't be fragmented}\n");
+ p(ip6s_badscope, "\t{:discard-scope-violations/%ju} "
+ "{N:/packet%s that violated scope rules}\n");
+ p(ip6s_notmember, "\t{:multicast-no-join-packets/%ju} "
+ "{N:/multicast packet%s which we don't join}\n");
for (first = 1, i = 0; i < IP6S_HDRCNT; i++)
if (ip6stat.ip6s_nxthist[i] != 0) {
if (first) {
- printf("\tInput histogram:\n");
+ xo_emit("\t{T:Input histogram}:\n");
+ xo_open_list("input-histogram");
first = 0;
}
- printf("\t\t%s: %ju\n", ip6nh[i],
+ xo_open_instance("input-histogram");
+ xo_emit("\t\t{k:name/%s}: {:count/%ju}\n", ip6nh[i],
(uintmax_t)ip6stat.ip6s_nxthist[i]);
+ xo_close_instance("input-histogram");
}
- printf("\tMbuf statistics:\n");
- printf("\t\t%ju one mbuf\n", (uintmax_t)ip6stat.ip6s_m1);
+ if (!first)
+ xo_close_list("input-histogram");
+
+ xo_open_container("mbuf-statistics");
+ xo_emit("\t{T:Mbuf statistics}:\n");
+ xo_emit("\t\t{:one-mbuf/%ju} {N:/one mbuf}\n",
+ (uintmax_t)ip6stat.ip6s_m1);
for (first = 1, i = 0; i < IP6S_M2MMAX; i++) {
char ifbuf[IFNAMSIZ];
if (ip6stat.ip6s_m2m[i] != 0) {
if (first) {
- printf("\t\ttwo or more mbuf:\n");
+ xo_emit("\t\t{N:two or more mbuf}:\n");
+ xo_open_list("mbuf-data");
first = 0;
}
- printf("\t\t\t%s= %ju\n",
+ xo_open_instance("mbuf-data");
+ xo_emit("\t\t\t{k:name/%s}= {:count/%ju}\n",
if_indextoname(i, ifbuf),
(uintmax_t)ip6stat.ip6s_m2m[i]);
+ xo_close_instance("mbuf-data");
}
}
- printf("\t\t%ju one ext mbuf\n",
+ if (!first)
+ xo_close_list("mbuf-data");
+ xo_emit("\t\t{:one-extra-mbuf/%ju} {N:one ext mbuf}\n",
(uintmax_t)ip6stat.ip6s_mext1);
- printf("\t\t%ju two or more ext mbuf\n",
- (uintmax_t)ip6stat.ip6s_mext2m);
- p(ip6s_exthdrtoolong,
- "\t%ju packet%s whose headers are not contiguous\n");
- p(ip6s_nogif, "\t%ju tunneling packet%s that can't find gif\n");
- p(ip6s_toomanyhdr,
- "\t%ju packet%s discarded because of too many headers\n");
+ xo_emit("\t\t{:two-or-more-extra-mbufs/%ju} "
+ "{N:/two or more ext mbuf}\n", (uintmax_t)ip6stat.ip6s_mext2m);
+ xo_close_container("mbuf-statistics");
+
+ p(ip6s_exthdrtoolong, "\t{:dropped-header-too-long/%ju} "
+ "{N:/packet%s whose headers are not contiguous}\n");
+ p(ip6s_nogif, "\t{:discard-tunnel-no-gif/%ju} "
+ "{N:/tunneling packet%s that can't find gif}\n");
+ p(ip6s_toomanyhdr, "\t{:dropped-too-many-headers/%ju} "
+ "{N:/packet%s discarded because of too many headers}\n");
/* for debugging source address selection */
#define PRINT_SCOPESTAT(s,i) do {\
switch(i) { /* XXX hardcoding in each case */\
case 1:\
- p(s, "\t\t%ju interface-local%s\n");\
+ p(s, "\t\t{ke:name/interface-locals}{:count/%ju} " \
+ "{N:/interface-local%s}\n"); \
break;\
case 2:\
- p(s,"\t\t%ju link-local%s\n");\
+ p(s,"\t\t{ke:name/link-locals}{:count/%ju} " \
+ "{N:/link-local%s}\n"); \
break;\
case 5:\
- p(s,"\t\t%ju site-local%s\n");\
+ p(s,"\t\t{ke:name/site-locals}{:count/%ju} " \
+ "{N:/site-local%s}\n");\
break;\
case 14:\
- p(s,"\t\t%ju global%s\n");\
+ p(s,"\t\t{ke:name/globals}{:count/%ju} " \
+ "{N:/global%s}\n");\
break;\
default:\
- printf("\t\t%ju addresses scope=%x\n",\
- (uintmax_t)ip6stat.s, i);\
+ xo_emit("\t\t{qke:name/%#x}{:count/%ju} " \
+ "{N:/addresses scope=%#x}\n",\
+ i, (uintmax_t)ip6stat.s, i); \
}\
} while (0);
- p(ip6s_sources_none,
- "\t%ju failure%s of source address selection\n");
+ xo_open_container("source-address-selection");
+ p(ip6s_sources_none, "\t{:address-selection-failures/%ju} "
+ "{N:/failure%s of source address selection}\n");
+
for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_sameif[i]) {
if (first) {
- printf("\tsource addresses on an outgoing I/F\n");
+ xo_open_list("outgoing-interface");
+ xo_emit("\tsource addresses on an outgoing "
+ "I/F\n");
first = 0;
}
+ xo_open_instance("outgoing-interface");
PRINT_SCOPESTAT(ip6s_sources_sameif[i], i);
+ xo_close_instance("outgoing-interface");
}
}
+ if (!first)
+ xo_close_list("outgoing-interface");
+
for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_otherif[i]) {
if (first) {
- printf("\tsource addresses on a non-outgoing I/F\n");
+ xo_open_list("non-outgoing-interface");
+ xo_emit("\tsource addresses on a non-outgoing "
+ "I/F\n");
first = 0;
}
+ xo_open_instance("non-outgoing-interface");
PRINT_SCOPESTAT(ip6s_sources_otherif[i], i);
+ xo_close_instance("non-outgoing-interface");
}
}
+ if (!first)
+ xo_close_list("non-outgoing-interface");
+
for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_samescope[i]) {
if (first) {
- printf("\tsource addresses of same scope\n");
+ xo_open_list("same-source");
+ xo_emit("\tsource addresses of same scope\n");
first = 0;
}
+ xo_open_instance("same-source");
PRINT_SCOPESTAT(ip6s_sources_samescope[i], i);
+ xo_close_instance("same-source");
}
}
+ if (!first)
+ xo_close_list("same-source");
+
for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_otherscope[i]) {
if (first) {
- printf("\tsource addresses of a different scope\n");
+ xo_open_list("different-scope");
+ xo_emit("\tsource addresses of a different "
+ "scope\n");
first = 0;
}
+ xo_open_instance("different-scope");
PRINT_SCOPESTAT(ip6s_sources_otherscope[i], i);
+ xo_close_instance("different-scope");
}
}
+ if (!first)
+ xo_close_list("different-scope");
+
for (first = 1, i = 0; i < IP6S_SCOPECNT; i++) {
if (ip6stat.ip6s_sources_deprecated[i]) {
if (first) {
- printf("\tdeprecated source addresses\n");
+ xo_open_list("deprecated-source");
+ xo_emit("\tdeprecated source addresses\n");
first = 0;
}
+ xo_open_instance("deprecated-source");
PRINT_SCOPESTAT(ip6s_sources_deprecated[i], i);
+ xo_close_instance("deprecated-source");
}
}
+ if (!first)
+ xo_close_list("deprecated-source");
- printf("\tSource addresses selection rule applied:\n");
- for (i = 0; i < IP6S_RULESMAX; i++) {
- if (ip6stat.ip6s_sources_rule[i])
- printf("\t\t%ju %s\n",
- (uintmax_t)ip6stat.ip6s_sources_rule[i],
- srcrule_str[i]);
+ for (first = 1, i = 0; i < IP6S_RULESMAX; i++) {
+ if (ip6stat.ip6s_sources_rule[i]) {
+ if (first) {
+ xo_open_list("rules-applied");
+ xo_emit("\t{T:Source addresses selection "
+ "rule applied}:\n");
+ first = 0;
+ }
+ xo_open_instance("rules-applied");
+ xo_emit("\t\t{ke:name/%s}{:count/%ju} {d:name/%s}\n",
+ srcrule_str[i],
+ (uintmax_t)ip6stat.ip6s_sources_rule[i],
+ srcrule_str[i]);
+ xo_close_instance("rules-applied");
+ }
}
+ if (!first)
+ xo_close_list("rules-applied");
+
+ xo_close_container("source-address-selection");
+
#undef p
#undef p1a
+ xo_close_container(name);
}
/*
@@ -531,52 +622,74 @@ ip6_ifstats(char *ifname)
{
struct in6_ifreq ifr;
int s;
-#define p(f, m) if (ifr.ifr_ifru.ifru_stat.f || sflag <= 1) \
- printf(m, (uintmax_t)ifr.ifr_ifru.ifru_stat.f, plural(ifr.ifr_ifru.ifru_stat.f))
-#define p_5(f, m) if (ifr.ifr_ifru.ifru_stat.f || sflag <= 1) \
- printf(m, (uintmax_t)ip6stat.f)
+
+#define p(f, m) if (ifr.ifr_ifru.ifru_stat.f || sflag <= 1) \
+ xo_emit(m, (uintmax_t)ifr.ifr_ifru.ifru_stat.f, \
+ plural(ifr.ifr_ifru.ifru_stat.f))
if ((s = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
- perror("Warning: socket(AF_INET6)");
+ xo_warn("Warning: socket(AF_INET6)");
return;
}
strcpy(ifr.ifr_name, ifname);
if (ioctl(s, SIOCGIFSTAT_IN6, (char *)&ifr) < 0) {
if (errno != EPFNOSUPPORT)
- perror("Warning: ioctl(SIOCGIFSTAT_IN6)");
+ xo_warn("Warning: ioctl(SIOCGIFSTAT_IN6)");
goto end;
}
- printf("ip6 on %s:\n", ifr.ifr_name);
- p(ifs6_in_receive, "\t%ju total input datagram%s\n");
- p(ifs6_in_hdrerr, "\t%ju datagram%s with invalid header received\n");
- p(ifs6_in_toobig, "\t%ju datagram%s exceeded MTU received\n");
- p(ifs6_in_noroute, "\t%ju datagram%s with no route received\n");
- p(ifs6_in_addrerr, "\t%ju datagram%s with invalid dst received\n");
- p(ifs6_in_protounknown, "\t%ju datagram%s with unknown proto received\n");
- p(ifs6_in_truncated, "\t%ju truncated datagram%s received\n");
- p(ifs6_in_discard, "\t%ju input datagram%s discarded\n");
- p(ifs6_in_deliver,
- "\t%ju datagram%s delivered to an upper layer protocol\n");
- p(ifs6_out_forward, "\t%ju datagram%s forwarded to this interface\n");
- p(ifs6_out_request,
- "\t%ju datagram%s sent from an upper layer protocol\n");
- p(ifs6_out_discard, "\t%ju total discarded output datagram%s\n");
- p(ifs6_out_fragok, "\t%ju output datagram%s fragmented\n");
- p(ifs6_out_fragfail, "\t%ju output datagram%s failed on fragment\n");
- p(ifs6_out_fragcreat, "\t%ju output datagram%s succeeded on fragment\n");
- p(ifs6_reass_reqd, "\t%ju incoming datagram%s fragmented\n");
- p(ifs6_reass_ok, "\t%ju datagram%s reassembled\n");
- p(ifs6_reass_fail, "\t%ju datagram%s failed on reassembly\n");
- p(ifs6_in_mcast, "\t%ju multicast datagram%s received\n");
- p(ifs6_out_mcast, "\t%ju multicast datagram%s sent\n");
-
- end:
- close(s);
+ xo_emit("{T:/ip6 on %s}:\n", ifr.ifr_name);
+
+ xo_open_instance("ip6-interface-statistics");
+ xo_emit("{ke:name/%s}", ifr.ifr_name);
+
+ p(ifs6_in_receive, "\t{:received-packets/%ju} "
+ "{N:/total input datagram%s}\n");
+ p(ifs6_in_hdrerr, "\t{:dropped-invalid-header/%ju} "
+ "{N:/datagram%s with invalid header received}\n");
+ p(ifs6_in_toobig, "\t{:dropped-mtu-exceeded/%ju} "
+ "{N:/datagram%s exceeded MTU received}\n");
+ p(ifs6_in_noroute, "\t{:dropped-no-route/%ju} "
+ "{N:/datagram%s with no route received}\n");
+ p(ifs6_in_addrerr, "\t{:dropped-invalid-destination/%ju} "
+ "{N:/datagram%s with invalid dst received}\n");
+ p(ifs6_in_protounknown, "\t{:dropped-unknown-protocol/%ju} "
+ "{N:/datagram%s with unknown proto received}\n");
+ p(ifs6_in_truncated, "\t{:dropped-truncated/%ju} "
+ "{N:/truncated datagram%s received}\n");
+ p(ifs6_in_discard, "\t{:dropped-discarded/%ju} "
+ "{N:/input datagram%s discarded}\n");
+ p(ifs6_in_deliver, "\t{:received-valid-packets/%ju} "
+ "{N:/datagram%s delivered to an upper layer protocol}\n");
+ p(ifs6_out_forward, "\t{:sent-forwarded/%ju} "
+ "{N:/datagram%s forwarded to this interface}\n");
+ p(ifs6_out_request, "\t{:sent-packets/%ju} "
+ "{N:/datagram%s sent from an upper layer protocol}\n");
+ p(ifs6_out_discard, "\t{:discard-packets/%ju} "
+ "{N:/total discarded output datagram%s}\n");
+ p(ifs6_out_fragok, "\t{:discard-fragments/%ju} "
+ "{N:/output datagram%s fragmented}\n");
+ p(ifs6_out_fragfail, "\t{:fragments-failed/%ju} "
+ "{N:/output datagram%s failed on fragment}\n");
+ p(ifs6_out_fragcreat, "\t{:fragments-created/%ju} "
+ "{N:/output datagram%s succeeded on fragment}\n");
+ p(ifs6_reass_reqd, "\t{:reassembly-required/%ju} "
+ "{N:/incoming datagram%s fragmented}\n");
+ p(ifs6_reass_ok, "\t{:reassembled-packets/%ju} "
+ "{N:/datagram%s reassembled}\n");
+ p(ifs6_reass_fail, "\t{:reassembly-failed/%ju} "
+ "{N:/datagram%s failed on reassembly}\n");
+ p(ifs6_in_mcast, "\t{:received-multicast/%ju} "
+ "{N:/multicast datagram%s received}\n");
+ p(ifs6_out_mcast, "\t{:sent-multicast/%ju} "
+ "{N:/multicast datagram%s sent}\n");
+
+ end:
+ xo_close_instance("ip6-interface-statistics");
+ close(s);
#undef p
-#undef p_5
}
static const char *icmp6names[] = {
@@ -844,88 +957,119 @@ static const char *icmp6names[] = {
void
icmp6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct icmp6stat icmp6stat, zerostat;
+ struct icmp6stat icmp6stat;
int i, first;
- size_t len;
-
- len = sizeof icmp6stat;
- if (live) {
- memset(&icmp6stat, 0, len);
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet6.icmp6.stats", &icmp6stat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet6.icmp6.stats");
- return;
- }
- } else
- kread(off, &icmp6stat, len);
- printf("%s:\n", name);
+ if (fetch_stats("net.inet6.icmp6.stats", off, &icmp6stat,
+ sizeof(icmp6stat), kread_counters) != 0)
+ return;
+
+ xo_emit("{T:/%s}:\n", name);
+ xo_open_container(name);
#define p(f, m) if (icmp6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)icmp6stat.f, plural(icmp6stat.f))
+ xo_emit(m, (uintmax_t)icmp6stat.f, plural(icmp6stat.f))
#define p_5(f, m) if (icmp6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)icmp6stat.f)
+ xo_emit(m, (uintmax_t)icmp6stat.f)
- p(icp6s_error, "\t%ju call%s to icmp6_error\n");
- p(icp6s_canterror,
- "\t%ju error%s not generated in response to an icmp6 message\n");
- p(icp6s_toofreq,
- "\t%ju error%s not generated because of rate limitation\n");
+ p(icp6s_error, "\t{:icmp6-calls/%ju} "
+ "{N:/call%s to icmp6_error}\n");
+ p(icp6s_canterror, "\t{:errors-not-generated-from-message/%ju} "
+ "{N:/error%s not generated in response to an icmp6 message}\n");
+ p(icp6s_toofreq, "\t{:errors-discarded-by-rate-limitation/%ju} "
+ "{N:/error%s not generated because of rate limitation}\n");
#define NELEM (int)(sizeof(icmp6stat.icp6s_outhist)/sizeof(icmp6stat.icp6s_outhist[0]))
for (first = 1, i = 0; i < NELEM; i++)
if (icmp6stat.icp6s_outhist[i] != 0) {
if (first) {
- printf("\tOutput histogram:\n");
+ xo_open_list("output-histogram");
+ xo_emit("\t{T:Output histogram}:\n");
first = 0;
}
- printf("\t\t%s: %ju\n", icmp6names[i],
+ xo_open_instance("output-histogram");
+ xo_emit("\t\t{k:name/%s}: {:count/%ju}\n",
+ icmp6names[i],
(uintmax_t)icmp6stat.icp6s_outhist[i]);
+ xo_close_instance("output-histogram");
}
+ if (!first)
+ xo_close_list("output-histogram");
#undef NELEM
- p(icp6s_badcode, "\t%ju message%s with bad code fields\n");
- p(icp6s_tooshort, "\t%ju message%s < minimum length\n");
- p(icp6s_checksum, "\t%ju bad checksum%s\n");
- p(icp6s_badlen, "\t%ju message%s with bad length\n");
+
+ p(icp6s_badcode, "\t{:dropped-bad-code/%ju} "
+ "{N:/message%s with bad code fields}\n");
+ p(icp6s_tooshort, "\t{:dropped-too-short/%ju} "
+ "{N:/message%s < minimum length}\n");
+ p(icp6s_checksum, "\t{:dropped-bad-checksum/%ju} "
+ "{N:/bad checksum%s}\n");
+ p(icp6s_badlen, "\t{:dropped-bad-length/%ju} "
+ "{N:/message%s with bad length}\n");
#define NELEM (int)(sizeof(icmp6stat.icp6s_inhist)/sizeof(icmp6stat.icp6s_inhist[0]))
for (first = 1, i = 0; i < NELEM; i++)
if (icmp6stat.icp6s_inhist[i] != 0) {
if (first) {
- printf("\tInput histogram:\n");
+ xo_open_list("input-histogram");
+ xo_emit("\t{T:Input histogram}:\n");
first = 0;
}
- printf("\t\t%s: %ju\n", icmp6names[i],
+ xo_open_instance("input-histogram");
+ xo_emit("\t\t{k:name/%s}: {:count/%ju}\n",
+ icmp6names[i],
(uintmax_t)icmp6stat.icp6s_inhist[i]);
+ xo_close_instance("input-histogram");
}
+ if (!first)
+ xo_close_list("input-histogram");
#undef NELEM
- printf("\tHistogram of error messages to be generated:\n");
- p_5(icp6s_odst_unreach_noroute, "\t\t%ju no route\n");
- p_5(icp6s_odst_unreach_admin, "\t\t%ju administratively prohibited\n");
- p_5(icp6s_odst_unreach_beyondscope, "\t\t%ju beyond scope\n");
- p_5(icp6s_odst_unreach_addr, "\t\t%ju address unreachable\n");
- p_5(icp6s_odst_unreach_noport, "\t\t%ju port unreachable\n");
- p_5(icp6s_opacket_too_big, "\t\t%ju packet too big\n");
- p_5(icp6s_otime_exceed_transit, "\t\t%ju time exceed transit\n");
- p_5(icp6s_otime_exceed_reassembly, "\t\t%ju time exceed reassembly\n");
- p_5(icp6s_oparamprob_header, "\t\t%ju erroneous header field\n");
- p_5(icp6s_oparamprob_nextheader, "\t\t%ju unrecognized next header\n");
- p_5(icp6s_oparamprob_option, "\t\t%ju unrecognized option\n");
- p_5(icp6s_oredirect, "\t\t%ju redirect\n");
- p_5(icp6s_ounknown, "\t\t%ju unknown\n");
-
- p(icp6s_reflect, "\t%ju message response%s generated\n");
- p(icp6s_nd_toomanyopt, "\t%ju message%s with too many ND options\n");
- p(icp6s_nd_badopt, "\t%ju message%s with bad ND options\n");
- p(icp6s_badns, "\t%ju bad neighbor solicitation message%s\n");
- p(icp6s_badna, "\t%ju bad neighbor advertisement message%s\n");
- p(icp6s_badrs, "\t%ju bad router solicitation message%s\n");
- p(icp6s_badra, "\t%ju bad router advertisement message%s\n");
- p(icp6s_badredirect, "\t%ju bad redirect message%s\n");
- p(icp6s_pmtuchg, "\t%ju path MTU change%s\n");
+ xo_emit("\t{T:Histogram of error messages to be generated}:\n");
+ xo_open_container("errors");
+ p_5(icp6s_odst_unreach_noroute, "\t\t{:no-route/%ju} "
+ "{N:/no route}\n");
+ p_5(icp6s_odst_unreach_admin, "\t\t{:admin-prohibited/%ju} "
+ "{N:/administratively prohibited}\n");
+ p_5(icp6s_odst_unreach_beyondscope, "\t\t{:beyond-scope/%ju} "
+ "{N:/beyond scope}\n");
+ p_5(icp6s_odst_unreach_addr, "\t\t{:address-unreachable/%ju} "
+ "{N:/address unreachable}\n");
+ p_5(icp6s_odst_unreach_noport, "\t\t{:port-unreachable/%ju} "
+ "{N:/port unreachable}\n");
+ p_5(icp6s_opacket_too_big, "\t\t{:packet-too-big/%ju} "
+ "{N:/packet too big}\n");
+ p_5(icp6s_otime_exceed_transit, "\t\t{:time-exceed-transmit/%ju} "
+ "{N:/time exceed transit}\n");
+ p_5(icp6s_otime_exceed_reassembly, "\t\t{:time-exceed-reassembly/%ju} "
+ "{N:/time exceed reassembly}\n");
+ p_5(icp6s_oparamprob_header, "\t\t{:bad-header/%ju} "
+ "{N:/erroneous header field}\n");
+ p_5(icp6s_oparamprob_nextheader, "\t\t{:bad-next-header/%ju} "
+ "{N:/unrecognized next header}\n");
+ p_5(icp6s_oparamprob_option, "\t\t{:bad-option/%ju} "
+ "{N:/unrecognized option}\n");
+ p_5(icp6s_oredirect, "\t\t{:redirects/%ju} "
+ "{N:/redirect}\n");
+ p_5(icp6s_ounknown, "\t\t{:unknown/%ju} {N:unknown}\n");
+
+ p(icp6s_reflect, "\t{:reflect/%ju} "
+ "{N:/message response%s generated}\n");
+ p(icp6s_nd_toomanyopt, "\t{:too-many-nd-options/%ju} "
+ "{N:/message%s with too many ND options}\n");
+ p(icp6s_nd_badopt, "\t{:bad-nd-options/%ju} "
+ "{N:/message%s with bad ND options}\n");
+ p(icp6s_badns, "\t{:bad-neighbor-solicitation/%ju} "
+ "{N:/bad neighbor solicitation message%s}\n");
+ p(icp6s_badna, "\t{:bad-neighbor-advertisement/%ju} "
+ "{N:/bad neighbor advertisement message%s}\n");
+ p(icp6s_badrs, "\t{:bad-router-solicitation/%ju} "
+ "{N:/bad router solicitation message%s}\n");
+ p(icp6s_badra, "\t{:bad-router-advertisement/%ju} "
+ "{N:/bad router advertisement message%s}\n");
+ p(icp6s_badredirect, "\t{:bad-redirect/%ju} "
+ "{N:/bad redirect message%s}\n");
+ xo_close_container("errors");
+ p(icp6s_pmtuchg, "\t{:path-mtu-changes/%ju} {N:/path MTU change%s}\n");
#undef p
#undef p_5
+ xo_close_container(name);
}
/*
@@ -936,61 +1080,102 @@ icmp6_ifstats(char *ifname)
{
struct in6_ifreq ifr;
int s;
-#define p(f, m) if (ifr.ifr_ifru.ifru_icmp6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)ifr.ifr_ifru.ifru_icmp6stat.f, plural(ifr.ifr_ifru.ifru_icmp6stat.f))
-#define p2(f, m) if (ifr.ifr_ifru.ifru_icmp6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)ifr.ifr_ifru.ifru_icmp6stat.f, pluralies(ifr.ifr_ifru.ifru_icmp6stat.f))
+
+#define p(f, m) if (ifr.ifr_ifru.ifru_icmp6stat.f || sflag <= 1) \
+ xo_emit(m, (uintmax_t)ifr.ifr_ifru.ifru_icmp6stat.f, \
+ plural(ifr.ifr_ifru.ifru_icmp6stat.f))
+#define p2(f, m) if (ifr.ifr_ifru.ifru_icmp6stat.f || sflag <= 1) \
+ xo_emit(m, (uintmax_t)ifr.ifr_ifru.ifru_icmp6stat.f, \
+ pluralies(ifr.ifr_ifru.ifru_icmp6stat.f))
if ((s = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
- perror("Warning: socket(AF_INET6)");
+ xo_warn("Warning: socket(AF_INET6)");
return;
}
strcpy(ifr.ifr_name, ifname);
if (ioctl(s, SIOCGIFSTAT_ICMP6, (char *)&ifr) < 0) {
if (errno != EPFNOSUPPORT)
- perror("Warning: ioctl(SIOCGIFSTAT_ICMP6)");
+ xo_warn("Warning: ioctl(SIOCGIFSTAT_ICMP6)");
goto end;
}
- printf("icmp6 on %s:\n", ifr.ifr_name);
- p(ifs6_in_msg, "\t%ju total input message%s\n");
- p(ifs6_in_error, "\t%ju total input error message%s\n");
- p(ifs6_in_dstunreach, "\t%ju input destination unreachable error%s\n");
- p(ifs6_in_adminprohib, "\t%ju input administratively prohibited error%s\n");
- p(ifs6_in_timeexceed, "\t%ju input time exceeded error%s\n");
- p(ifs6_in_paramprob, "\t%ju input parameter problem error%s\n");
- p(ifs6_in_pkttoobig, "\t%ju input packet too big error%s\n");
- p(ifs6_in_echo, "\t%ju input echo request%s\n");
- p2(ifs6_in_echoreply, "\t%ju input echo repl%s\n");
- p(ifs6_in_routersolicit, "\t%ju input router solicitation%s\n");
- p(ifs6_in_routeradvert, "\t%ju input router advertisement%s\n");
- p(ifs6_in_neighborsolicit, "\t%ju input neighbor solicitation%s\n");
- p(ifs6_in_neighboradvert, "\t%ju input neighbor advertisement%s\n");
- p(ifs6_in_redirect, "\t%ju input redirect%s\n");
- p2(ifs6_in_mldquery, "\t%ju input MLD quer%s\n");
- p(ifs6_in_mldreport, "\t%ju input MLD report%s\n");
- p(ifs6_in_mlddone, "\t%ju input MLD done%s\n");
-
- p(ifs6_out_msg, "\t%ju total output message%s\n");
- p(ifs6_out_error, "\t%ju total output error message%s\n");
- p(ifs6_out_dstunreach, "\t%ju output destination unreachable error%s\n");
- p(ifs6_out_adminprohib, "\t%ju output administratively prohibited error%s\n");
- p(ifs6_out_timeexceed, "\t%ju output time exceeded error%s\n");
- p(ifs6_out_paramprob, "\t%ju output parameter problem error%s\n");
- p(ifs6_out_pkttoobig, "\t%ju output packet too big error%s\n");
- p(ifs6_out_echo, "\t%ju output echo request%s\n");
- p2(ifs6_out_echoreply, "\t%ju output echo repl%s\n");
- p(ifs6_out_routersolicit, "\t%ju output router solicitation%s\n");
- p(ifs6_out_routeradvert, "\t%ju output router advertisement%s\n");
- p(ifs6_out_neighborsolicit, "\t%ju output neighbor solicitation%s\n");
- p(ifs6_out_neighboradvert, "\t%ju output neighbor advertisement%s\n");
- p(ifs6_out_redirect, "\t%ju output redirect%s\n");
- p2(ifs6_out_mldquery, "\t%ju output MLD quer%s\n");
- p(ifs6_out_mldreport, "\t%ju output MLD report%s\n");
- p(ifs6_out_mlddone, "\t%ju output MLD done%s\n");
-
- end:
+ xo_emit("{T:/icmp6 on %s}:\n", ifr.ifr_name);
+
+ xo_open_instance("icmp6-interface-statistics");
+ xo_emit("{ke:name/%s}", ifr.ifr_name);
+ p(ifs6_in_msg, "\t{:received-packets/%ju} "
+ "{N:/total input message%s}\n");
+ p(ifs6_in_error, "\t{:received-errors/%ju} "
+ "{N:/total input error message%s}\n");
+ p(ifs6_in_dstunreach, "\t{:received-destination-unreachable/%ju} "
+ "{N:/input destination unreachable error%s}\n");
+ p(ifs6_in_adminprohib, "\t{:received-admin-prohibited/%ju} "
+ "{N:/input administratively prohibited error%s}\n");
+ p(ifs6_in_timeexceed, "\t{:received-time-exceeded/%ju} "
+ "{N:/input time exceeded error%s}\n");
+ p(ifs6_in_paramprob, "\t{:received-bad-parameter/%ju} "
+ "{N:/input parameter problem error%s}\n");
+ p(ifs6_in_pkttoobig, "\t{:received-packet-too-big/%ju} "
+ "{N:/input packet too big error%s}\n");
+ p(ifs6_in_echo, "\t{:received-echo-requests/%ju} "
+ "{N:/input echo request%s}\n");
+ p2(ifs6_in_echoreply, "\t{:received-echo-replies/%ju} "
+ "{N:/input echo repl%s}\n");
+ p(ifs6_in_routersolicit, "\t{:received-router-solicitation/%ju} "
+ "{N:/input router solicitation%s}\n");
+ p(ifs6_in_routeradvert, "\t{:received-router-advertisement/%ju} "
+ "{N:/input router advertisement%s}\n");
+ p(ifs6_in_neighborsolicit, "\t{:received-neighbor-solicitation/%ju} "
+ "{N:/input neighbor solicitation%s}\n");
+ p(ifs6_in_neighboradvert, "\t{:received-neighbor-advertisement/%ju} "
+ "{N:/input neighbor advertisement%s}\n");
+ p(ifs6_in_redirect, "\t{received-redirects/%ju} "
+ "{N:/input redirect%s}\n");
+ p2(ifs6_in_mldquery, "\t{:received-mld-queries/%ju} "
+ "{N:/input MLD quer%s}\n");
+ p(ifs6_in_mldreport, "\t{:received-mld-reports/%ju} "
+ "{N:/input MLD report%s}\n");
+ p(ifs6_in_mlddone, "\t{:received-mld-done/%ju} "
+ "{N:/input MLD done%s}\n");
+
+ p(ifs6_out_msg, "\t{:sent-packets/%ju} "
+ "{N:/total output message%s}\n");
+ p(ifs6_out_error, "\t{:sent-errors/%ju} "
+ "{N:/total output error message%s}\n");
+ p(ifs6_out_dstunreach, "\t{:sent-destination-unreachable/%ju} "
+ "{N:/output destination unreachable error%s}\n");
+ p(ifs6_out_adminprohib, "\t{:sent-admin-prohibited/%ju} "
+ "{N:/output administratively prohibited error%s}\n");
+ p(ifs6_out_timeexceed, "\t{:sent-time-exceeded/%ju} "
+ "{N:/output time exceeded error%s}\n");
+ p(ifs6_out_paramprob, "\t{:sent-bad-parameter/%ju} "
+ "{N:/output parameter problem error%s}\n");
+ p(ifs6_out_pkttoobig, "\t{:sent-packet-too-big/%ju} "
+ "{N:/output packet too big error%s}\n");
+ p(ifs6_out_echo, "\t{:sent-echo-requests/%ju} "
+ "{N:/output echo request%s}\n");
+ p2(ifs6_out_echoreply, "\t{:sent-echo-replies/%ju} "
+ "{N:/output echo repl%s}\n");
+ p(ifs6_out_routersolicit, "\t{:sent-router-solicitation/%ju} "
+ "{N:/output router solicitation%s}\n");
+ p(ifs6_out_routeradvert, "\t{:sent-router-advertisement/%ju} "
+ "{N:/output router advertisement%s}\n");
+ p(ifs6_out_neighborsolicit, "\t{:sent-neighbor-solicitation/%ju} "
+ "{N:/output neighbor solicitation%s}\n");
+ p(ifs6_out_neighboradvert, "\t{:sent-neighbor-advertisement/%ju} "
+ "{N:/output neighbor advertisement%s}\n");
+ p(ifs6_out_redirect, "\t{:sent-redirects/%ju} "
+ "{N:/output redirect%s}\n");
+ p2(ifs6_out_mldquery, "\t{:sent-mld-queries/%ju} "
+ "{N:/output MLD quer%s}\n");
+ p(ifs6_out_mldreport, "\t{:sent-mld-reports/%ju} "
+ "{N:/output MLD report%s}\n");
+ p(ifs6_out_mlddone, "\t{:sent-mld-dones/%ju} "
+ "{N:/output MLD done%s}\n");
+
+end:
+ xo_close_instance("icmp6-interface-statistics");
close(s);
#undef p
}
@@ -1001,36 +1186,34 @@ icmp6_ifstats(char *ifname)
void
pim6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct pim6stat pim6stat, zerostat;
- size_t len = sizeof pim6stat;
-
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet6.pim.stats", &pim6stat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet6.pim.stats");
- return;
- }
- } else {
- if (off == 0)
- return;
- kread(off, &pim6stat, len);
- }
+ struct pim6stat pim6stat;
+
+ if (fetch_stats("net.inet6.pim.stats", off, &pim6stat,
+ sizeof(pim6stat), kread) != 0)
+ return;
- printf("%s:\n", name);
+ xo_emit("{T:/%s}:\n", name);
+ xo_open_container(name);
#define p(f, m) if (pim6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)pim6stat.f, plural(pim6stat.f))
- p(pim6s_rcv_total, "\t%ju message%s received\n");
- p(pim6s_rcv_tooshort, "\t%ju message%s received with too few bytes\n");
- p(pim6s_rcv_badsum, "\t%ju message%s received with bad checksum\n");
- p(pim6s_rcv_badversion, "\t%ju message%s received with bad version\n");
- p(pim6s_rcv_registers, "\t%ju register%s received\n");
- p(pim6s_rcv_badregisters, "\t%ju bad register%s received\n");
- p(pim6s_snd_registers, "\t%ju register%s sent\n");
+ xo_emit(m, (uintmax_t)pim6stat.f, plural(pim6stat.f))
+
+ p(pim6s_rcv_total, "\t{:received-packets/%ju} "
+ "{N:/message%s received}\n");
+ p(pim6s_rcv_tooshort, "\t{:dropped-too-short/%ju} "
+ "{N:/message%s received with too few bytes}\n");
+ p(pim6s_rcv_badsum, "\t{:dropped-bad-checksum/%ju} "
+ "{N:/message%s received with bad checksum}\n");
+ p(pim6s_rcv_badversion, "\t{:dropped-bad-version/%ju} "
+ "{N:/message%s received with bad version}\n");
+ p(pim6s_rcv_registers, "\t{:received-registers/%ju} "
+ "{N:/register%s received}\n");
+ p(pim6s_rcv_badregisters, "\t{:received-bad-registers/%ju} "
+ "{N:/bad register%s received}\n");
+ p(pim6s_snd_registers, "\t{:sent-registers/%ju} "
+ "{N:/register%s sent}\n");
#undef p
+ xo_close_container(name);
}
/*
@@ -1039,44 +1222,43 @@ pim6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
void
rip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct rip6stat rip6stat, zerostat;
+ struct rip6stat rip6stat;
u_quad_t delivered;
- size_t len;
-
- len = sizeof(rip6stat);
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet6.ip6.rip6stats", &rip6stat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet6.ip6.rip6stats");
- return;
- }
- } else
- kread(off, &rip6stat, len);
- printf("%s:\n", name);
+ if (fetch_stats("net.inet6.ip6.rip6stats", off, &rip6stat,
+ sizeof(rip6stat), kread_counters) != 0)
+ return;
+
+ xo_emit("{T:/%s}:\n", name);
+ xo_open_container(name);
#define p(f, m) if (rip6stat.f || sflag <= 1) \
- printf(m, (uintmax_t)rip6stat.f, plural(rip6stat.f))
- p(rip6s_ipackets, "\t%ju message%s received\n");
- p(rip6s_isum, "\t%ju checksum calculation%s on inbound\n");
- p(rip6s_badsum, "\t%ju message%s with bad checksum\n");
- p(rip6s_nosock, "\t%ju message%s dropped due to no socket\n");
- p(rip6s_nosockmcast,
- "\t%ju multicast message%s dropped due to no socket\n");
- p(rip6s_fullsock,
- "\t%ju message%s dropped due to full socket buffers\n");
+ xo_emit(m, (uintmax_t)rip6stat.f, plural(rip6stat.f))
+
+ p(rip6s_ipackets, "\t{:received-packets/%ju} "
+ "{N:/message%s received}\n");
+ p(rip6s_isum, "\t{:input-checksum-computation/%ju} "
+ "{N:/checksum calculation%s on inbound}\n");
+ p(rip6s_badsum, "\t{:received-bad-checksum/%ju} "
+ "{N:/message%s with bad checksum}\n");
+ p(rip6s_nosock, "\t{:dropped-no-socket/%ju} "
+ "{N:/message%s dropped due to no socket}\n");
+ p(rip6s_nosockmcast, "\t{:dropped-multicast-no-socket/%ju} "
+ "{N:/multicast message%s dropped due to no socket}\n");
+ p(rip6s_fullsock, "\t{:dropped-full-socket-buffer/%ju} "
+ "{N:/message%s dropped due to full socket buffers}\n");
delivered = rip6stat.rip6s_ipackets -
rip6stat.rip6s_badsum -
rip6stat.rip6s_nosock -
rip6stat.rip6s_nosockmcast -
rip6stat.rip6s_fullsock;
if (delivered || sflag <= 1)
- printf("\t%ju delivered\n", (uintmax_t)delivered);
- p(rip6s_opackets, "\t%ju datagram%s output\n");
+ xo_emit("\t{:delivered-packets/%ju} {N:/delivered}\n",
+ (uintmax_t)delivered);
+ p(rip6s_opackets, "\t{:sent-packets/%ju} "
+ "{N:/datagram%s output}\n");
#undef p
+ xo_close_container(name);
}
/*
@@ -1094,15 +1276,19 @@ rip6_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
};
void
-inet6print(struct in6_addr *in6, int port, const char *proto, int numeric)
+inet6print(const char *container, struct in6_addr *in6, int port,
+ const char *proto, int numeric)
{
struct servent *sp = 0;
char line[80], *cp;
int width;
- sprintf(line, "%.*s.", Wflag ? 39 :
- (Aflag && !numeric) ? 12 : 16, inet6name(in6));
- cp = index(line, '\0');
+ if (container)
+ xo_open_container(container);
+
+ sprintf(line, "%.*s.", Wflag ? 39 : (Aflag && !numeric) ? 12 : 16,
+ inet6name(in6));
+ cp = strchr(line, '\0');
if (!numeric && port)
GETSERVBYPORT6(port, proto, sp);
if (sp || port == 0)
@@ -1110,7 +1296,15 @@ inet6print(struct in6_addr *in6, int port, const char *proto, int numeric)
else
sprintf(cp, "%d", ntohs((u_short)port));
width = Wflag ? 45 : Aflag ? 18 : 22;
- printf("%-*.*s ", width, width, line);
+
+ xo_emit("{d:target/%-*.*s} ", width, width, line);
+
+ int alen = cp - line - 1, plen = strlen(cp) - 1;
+ xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen,
+ plen, cp);
+
+ if (container)
+ xo_close_container(container);
}
/*
@@ -1122,38 +1316,45 @@ inet6print(struct in6_addr *in6, int port, const char *proto, int numeric)
char *
inet6name(struct in6_addr *in6p)
{
- char *cp;
+ struct sockaddr_in6 sin6;
+ char hbuf[NI_MAXHOST], *cp;
static char line[50];
- struct hostent *hp;
static char domain[MAXHOSTNAMELEN];
static int first = 1;
+ int flags, error;
+ if (IN6_IS_ADDR_UNSPECIFIED(in6p)) {
+ strcpy(line, "*");
+ return (line);
+ }
if (first && !numeric_addr) {
first = 0;
if (gethostname(domain, MAXHOSTNAMELEN) == 0 &&
- (cp = index(domain, '.')))
+ (cp = strchr(domain, '.')))
(void) strcpy(domain, cp + 1);
else
domain[0] = 0;
}
- cp = 0;
- if (!numeric_addr && !IN6_IS_ADDR_UNSPECIFIED(in6p)) {
- hp = gethostbyaddr((char *)in6p, sizeof(*in6p), AF_INET6);
- if (hp) {
- if ((cp = index(hp->h_name, '.')) &&
- !strcmp(cp + 1, domain))
- *cp = 0;
- cp = hp->h_name;
- }
- }
- if (IN6_IS_ADDR_UNSPECIFIED(in6p))
- strcpy(line, "*");
- else if (cp)
- strcpy(line, cp);
- else
+ memset(&sin6, 0, sizeof(sin6));
+ memcpy(&sin6.sin6_addr, in6p, sizeof(*in6p));
+ sin6.sin6_family = AF_INET6;
+ /* XXX: in6p.s6_addr[2] can contain scopeid. */
+ in6_fillscopeid(&sin6);
+ flags = (numeric_addr) ? NI_NUMERICHOST : 0;
+ error = getnameinfo((struct sockaddr *)&sin6, sizeof(sin6), hbuf,
+ sizeof(hbuf), NULL, 0, flags);
+ if (error == 0) {
+ if ((flags & NI_NUMERICHOST) == 0 &&
+ (cp = strchr(hbuf, '.')) &&
+ !strcmp(cp + 1, domain))
+ *cp = 0;
+ strcpy(line, hbuf);
+ } else {
+ /* XXX: this should not happen. */
sprintf(line, "%s",
- inet_ntop(AF_INET6, (void *)in6p, ntop_buf,
+ inet_ntop(AF_INET6, (void *)&sin6.sin6_addr, ntop_buf,
sizeof(ntop_buf)));
+ }
return (line);
}
#endif /*INET6*/
diff --git a/freebsd/usr.bin/netstat/ipsec.c b/freebsd/usr.bin/netstat/ipsec.c
index 53dfdbe1..5b7c185b 100644
--- a/freebsd/usr.bin/netstat/ipsec.c
+++ b/freebsd/usr.bin/netstat/ipsec.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/* $KAME: ipsec.c,v 1.33 2003/07/25 09:54:32 itojun Exp $ */
/*-
@@ -90,6 +94,9 @@ static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -109,9 +116,14 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
+#include <stdbool.h>
#include <string.h>
#include <unistd.h>
+#include <libxo/xo.h>
#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-ipsec-data.h"
+#endif /* __rtems__ */
#ifdef IPSEC
struct val2str {
@@ -157,6 +169,9 @@ static struct val2str ipsec_espnames[] = {
#ifdef SADB_X_EALG_AESCTR
{ SADB_X_EALG_AESCTR, "aes-ctr", },
#endif
+#ifdef SADB_X_EALG_AESGCM16
+ { SADB_X_EALG_AESGCM16, "aes-gcm-16", },
+#endif
{ -1, NULL },
};
@@ -168,98 +183,44 @@ static struct val2str ipsec_compnames[] = {
{ -1, NULL },
};
-static void ipsec_hist(const u_quad_t *hist, size_t histmax,
- const struct val2str *name, const char *title);
static void print_ipsecstats(const struct ipsecstat *ipsecstat);
-
-/*
- * Dump IPSEC statistics structure.
- */
-static void
-ipsec_hist(const u_quad_t *hist, size_t histmax, const struct val2str *name,
- const char *title)
-{
- int first;
- size_t proto;
- const struct val2str *p;
-
- first = 1;
- for (proto = 0; proto < histmax; proto++) {
- if (hist[proto] <= 0)
- continue;
- if (first) {
- printf("\t%s histogram:\n", title);
- first = 0;
- }
- for (p = name; p && p->str; p++) {
- if (p->val == (int)proto)
- break;
- }
- if (p && p->str) {
- printf("\t\t%s: %ju\n", p->str, (uintmax_t)hist[proto]);
- } else {
- printf("\t\t#%ld: %ju\n", (long)proto,
- (uintmax_t)hist[proto]);
- }
- }
-}
-
static void
print_ipsecstats(const struct ipsecstat *ipsecstat)
{
+ xo_open_container("ipsec-statistics");
+
#define p(f, m) if (ipsecstat->f || sflag <= 1) \
- printf(m, (uintmax_t)ipsecstat->f, plural(ipsecstat->f))
-#define pes(f, m) if (ipsecstat->f || sflag <= 1) \
- printf(m, (uintmax_t)ipsecstat->f, plurales(ipsecstat->f))
-#define hist(f, n, t) \
- ipsec_hist((f), sizeof(f)/sizeof(f[0]), (n), (t));
-
- p(in_success, "\t%ju inbound packet%s processed successfully\n");
- p(in_polvio, "\t%ju inbound packet%s violated process security "
- "policy\n");
- p(in_nosa, "\t%ju inbound packet%s with no SA available\n");
- p(in_inval, "\t%ju invalid inbound packet%s\n");
- p(in_nomem, "\t%ju inbound packet%s failed due to insufficient memory\n");
- p(in_badspi, "\t%ju inbound packet%s failed getting SPI\n");
- p(in_ahreplay, "\t%ju inbound packet%s failed on AH replay check\n");
- p(in_espreplay, "\t%ju inbound packet%s failed on ESP replay check\n");
- p(in_ahauthsucc, "\t%ju inbound packet%s considered authentic\n");
- p(in_ahauthfail, "\t%ju inbound packet%s failed on authentication\n");
- hist(ipsecstat->in_ahhist, ipsec_ahnames, "AH input");
- hist(ipsecstat->in_esphist, ipsec_espnames, "ESP input");
- hist(ipsecstat->in_comphist, ipsec_compnames, "IPComp input");
-
- p(out_success, "\t%ju outbound packet%s processed successfully\n");
- p(out_polvio, "\t%ju outbound packet%s violated process security "
- "policy\n");
- p(out_nosa, "\t%ju outbound packet%s with no SA available\n");
- p(out_inval, "\t%ju invalid outbound packet%s\n");
- p(out_nomem, "\t%ju outbound packet%s failed due to insufficient memory\n");
- p(out_noroute, "\t%ju outbound packet%s with no route\n");
- hist(ipsecstat->out_ahhist, ipsec_ahnames, "AH output");
- hist(ipsecstat->out_esphist, ipsec_espnames, "ESP output");
- hist(ipsecstat->out_comphist, ipsec_compnames, "IPComp output");
- p(spdcachelookup, "\t%ju SPD cache lookup%s\n");
- pes(spdcachemiss, "\t%ju SPD cache miss%s\n");
-#undef pes
-#undef hist
- p(ips_in_polvio, "\t%ju inbound packet%s violated process "
- "security policy\n");
- p(ips_out_polvio, "\t%ju outbound packet%s violated process "
- "security policy\n");
- p(ips_out_nosa, "\t%ju outbound packet%s with no SA available\n");
- p(ips_out_nomem, "\t%ju outbound packet%s failed due to "
- "insufficient memory\n");
- p(ips_out_noroute, "\t%ju outbound packet%s with no route "
- "available\n");
- p(ips_out_inval, "\t%ju invalid outbound packet%s\n");
- p(ips_out_bundlesa, "\t%ju outbound packet%s with bundled SAs\n");
- p(ips_mbcoalesced, "\t%ju mbuf%s coalesced during clone\n");
- p(ips_clcoalesced, "\t%ju cluster%s coalesced during clone\n");
- p(ips_clcopied, "\t%ju cluster%s copied during clone\n");
- p(ips_mbinserted, "\t%ju mbuf%s inserted during makespace\n");
+ xo_emit(m, (uintmax_t)ipsecstat->f, plural(ipsecstat->f))
+
+ p(ips_in_polvio, "\t{:dropped-policy-violation/%ju} "
+ "{N:/inbound packet%s violated process security policy}\n");
+ p(ips_in_nomem, "\t{:dropped-no-memory/%ju} "
+ "{N:/inbound packet%s failed due to insufficient memory}\n");
+ p(ips_in_inval, "\t{:dropped-invalid/%ju} "
+ "{N:/invalid inbound packet%s}\n");
+ p(ips_out_polvio, "\t{:discarded-policy-violation/%ju} "
+ "{N:/outbound packet%s violated process security policy}\n");
+ p(ips_out_nosa, "\t{:discarded-no-sa/%ju} "
+ "{N:/outbound packet%s with no SA available}\n");
+ p(ips_out_nomem, "\t{:discarded-no-memory/%ju} "
+ "{N:/outbound packet%s failed due to insufficient memory}\n");
+ p(ips_out_noroute, "\t{:discarded-no-route/%ju} "
+ "{N:/outbound packet%s with no route available}\n");
+ p(ips_out_inval, "\t{:discarded-invalid/%ju} "
+ "{N:/invalid outbound packet%s}\n");
+ p(ips_out_bundlesa, "\t{:send-bundled-sa/%ju} "
+ "{N:/outbound packet%s with bundled SAs}\n");
+ p(ips_mbcoalesced, "\t{:mbufs-coalesced-during-clone/%ju} "
+ "{N:/mbuf%s coalesced during clone}\n");
+ p(ips_clcoalesced, "\t{:clusters-coalesced-during-clone/%ju} "
+ "{N:/cluster%s coalesced during clone}\n");
+ p(ips_clcopied, "\t{:clusters-copied-during-clone/%ju} "
+ "{N:/cluster%s copied during clone}\n");
+ p(ips_mbinserted, "\t{:mbufs-inserted/%ju} "
+ "{N:/mbuf%s inserted during makespace}\n");
#undef p
+ xo_close_container("ipsec-statistics");
}
void
@@ -267,17 +228,22 @@ ipsec_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
struct ipsecstat ipsecstat;
- if (off == 0)
- return;
- printf ("%s:\n", name);
- kread(off, (char *)&ipsecstat, sizeof(ipsecstat));
+ if (strcmp(name, "ipsec6") == 0) {
+ if (fetch_stats("net.inet6.ipsec6.ipsecstats", off,&ipsecstat,
+ sizeof(ipsecstat), kread_counters) != 0)
+ return;
+ } else {
+ if (fetch_stats("net.inet.ipsec.ipsecstats", off, &ipsecstat,
+ sizeof(ipsecstat), kread_counters) != 0)
+ return;
+ }
+
+ xo_emit("{T:/%s}:\n", name);
print_ipsecstats(&ipsecstat);
}
-static void ipsec_hist_new(const u_int32_t *hist, size_t histmax,
- const struct val2str *name, const char *title);
static void print_ahstats(const struct ahstat *ahstat);
static void print_espstats(const struct espstat *espstat);
static void print_ipcompstats(const struct ipcompstat *ipcompstat);
@@ -286,8 +252,8 @@ static void print_ipcompstats(const struct ipcompstat *ipcompstat);
* Dump IPSEC statistics structure.
*/
static void
-ipsec_hist_new(const u_int32_t *hist, size_t histmax,
- const struct val2str *name, const char *title)
+ipsec_hist_new(const uint64_t *hist, size_t histmax,
+ const struct val2str *name, const char *title, const char *cname)
{
int first;
size_t proto;
@@ -298,56 +264,72 @@ ipsec_hist_new(const u_int32_t *hist, size_t histmax,
if (hist[proto] <= 0)
continue;
if (first) {
- printf("\t%s histogram:\n", title);
+ xo_open_list(cname);
+ xo_emit("\t{T:/%s histogram}:\n", title);
first = 0;
}
+ xo_open_instance(cname);
for (p = name; p && p->str; p++) {
if (p->val == (int)proto)
break;
}
if (p && p->str) {
- printf("\t\t%s: %u\n", p->str, hist[proto]);
+ xo_emit("\t\t{k:name}: {:count/%ju}\n", p->str,
+ (uintmax_t)hist[proto]);
} else {
- printf("\t\t#%lu: %u\n", (unsigned long)proto,
- hist[proto]);
+ xo_emit("\t\t#{k:name/%lu}: {:count/%ju}\n",
+ (unsigned long)proto, (uintmax_t)hist[proto]);
}
+ xo_close_instance(cname);
}
+ if (!first)
+ xo_close_list(cname);
}
static void
print_ahstats(const struct ahstat *ahstat)
{
-#define p32(f, m) if (ahstat->f || sflag <= 1) \
- printf("\t%u" m, (unsigned int)ahstat->f, plural(ahstat->f))
-#define p64(f, m) if (ahstat->f || sflag <= 1) \
- printf("\t%ju" m, (uintmax_t)ahstat->f, plural(ahstat->f))
-#define hist(f, n, t) \
- ipsec_hist_new((f), sizeof(f)/sizeof(f[0]), (n), (t));
-
- p32(ahs_hdrops, " packet%s shorter than header shows\n");
- p32(ahs_nopf, " packet%s dropped; protocol family not supported\n");
- p32(ahs_notdb, " packet%s dropped; no TDB\n");
- p32(ahs_badkcr, " packet%s dropped; bad KCR\n");
- p32(ahs_qfull, " packet%s dropped; queue full\n");
- p32(ahs_noxform, " packet%s dropped; no transform\n");
- p32(ahs_wrap, " replay counter wrap%s\n");
- p32(ahs_badauth, " packet%s dropped; bad authentication detected\n");
- p32(ahs_badauthl, " packet%s dropped; bad authentication length\n");
- p32(ahs_replay, " possible replay packet%s detected\n");
- p32(ahs_input, " packet%s in\n");
- p32(ahs_output, " packet%s out\n");
- p32(ahs_invalid, " packet%s dropped; invalid TDB\n");
- p64(ahs_ibytes, " byte%s in\n");
- p64(ahs_obytes, " byte%s out\n");
- p32(ahs_toobig, " packet%s dropped; larger than IP_MAXPACKET\n");
- p32(ahs_pdrops, " packet%s blocked due to policy\n");
- p32(ahs_crypto, " crypto processing failure%s\n");
- p32(ahs_tunnel, " tunnel sanity check failure%s\n");
- hist(ahstat->ahs_hist, ipsec_ahnames, "AH output");
-
-#undef p32
-#undef p64
+ xo_open_container("ah-statictics");
+
+#define p(f, n, m) if (ahstat->f || sflag <= 1) \
+ xo_emit("\t{:" n "/%ju} {N:/" m "}\n", \
+ (uintmax_t)ahstat->f, plural(ahstat->f))
+#define hist(f, n, t, c) \
+ ipsec_hist_new((f), sizeof(f)/sizeof(f[0]), (n), (t), (c))
+
+ p(ahs_hdrops, "dropped-short-header",
+ "packet%s shorter than header shows");
+ p(ahs_nopf, "dropped-bad-protocol",
+ "packet%s dropped; protocol family not supported");
+ p(ahs_notdb, "dropped-no-tdb", "packet%s dropped; no TDB");
+ p(ahs_badkcr, "dropped-bad-kcr", "packet%s dropped; bad KCR");
+ p(ahs_qfull, "dropped-queue-full", "packet%s dropped; queue full");
+ p(ahs_noxform, "dropped-no-transform",
+ "packet%s dropped; no transform");
+ p(ahs_wrap, "replay-counter-wraps", "replay counter wrap%s");
+ p(ahs_badauth, "dropped-bad-auth",
+ "packet%s dropped; bad authentication detected");
+ p(ahs_badauthl, "dropped-bad-auth-level",
+ "packet%s dropped; bad authentication length");
+ p(ahs_replay, "possile-replay-detected",
+ "possible replay packet%s detected");
+ p(ahs_input, "received-packets", "packet%s in");
+ p(ahs_output, "send-packets", "packet%s out");
+ p(ahs_invalid, "dropped-bad-tdb", "packet%s dropped; invalid TDB");
+ p(ahs_ibytes, "received-bytes", "byte%s in");
+ p(ahs_obytes, "send-bytes", "byte%s out");
+ p(ahs_toobig, "dropped-too-large",
+ "packet%s dropped; larger than IP_MAXPACKET");
+ p(ahs_pdrops, "dropped-policy-violation",
+ "packet%s blocked due to policy");
+ p(ahs_crypto, "crypto-failures", "crypto processing failure%s");
+ p(ahs_tunnel, "tunnel-failures", "tunnel sanity check failure%s");
+ hist(ahstat->ahs_hist, ipsec_ahnames,
+ "AH output", "ah-output-histogram");
+
+#undef p
#undef hist
+ xo_close_container("ah-statictics");
}
void
@@ -355,10 +337,11 @@ ah_stats(u_long off, const char *name, int family __unused, int proto __unused)
{
struct ahstat ahstat;
- if (off == 0)
+ if (fetch_stats("net.inet.ah.stats", off, &ahstat,
+ sizeof(ahstat), kread_counters) != 0)
return;
- printf ("%s:\n", name);
- kread(off, (char *)&ahstat, sizeof(ahstat));
+
+ xo_emit("{T:/%s}:\n", name);
print_ahstats(&ahstat);
}
@@ -366,38 +349,47 @@ ah_stats(u_long off, const char *name, int family __unused, int proto __unused)
static void
print_espstats(const struct espstat *espstat)
{
-#define p32(f, m) if (espstat->f || sflag <= 1) \
- printf("\t%u" m, (unsigned int)espstat->f, plural(espstat->f))
-#define p64(f, m) if (espstat->f || sflag <= 1) \
- printf("\t%ju" m, (uintmax_t)espstat->f, plural(espstat->f))
-#define hist(f, n, t) \
- ipsec_hist_new((f), sizeof(f)/sizeof(f[0]), (n), (t));
-
- p32(esps_hdrops, " packet%s shorter than header shows\n");
- p32(esps_nopf, " packet%s dropped; protocol family not supported\n");
- p32(esps_notdb, " packet%s dropped; no TDB\n");
- p32(esps_badkcr, " packet%s dropped; bad KCR\n");
- p32(esps_qfull, " packet%s dropped; queue full\n");
- p32(esps_noxform, " packet%s dropped; no transform\n");
- p32(esps_badilen, " packet%s dropped; bad ilen\n");
- p32(esps_wrap, " replay counter wrap%s\n");
- p32(esps_badenc, " packet%s dropped; bad encryption detected\n");
- p32(esps_badauth, " packet%s dropped; bad authentication detected\n");
- p32(esps_replay, " possible replay packet%s detected\n");
- p32(esps_input, " packet%s in\n");
- p32(esps_output, " packet%s out\n");
- p32(esps_invalid, " packet%s dropped; invalid TDB\n");
- p64(esps_ibytes, " byte%s in\n");
- p64(esps_obytes, " byte%s out\n");
- p32(esps_toobig, " packet%s dropped; larger than IP_MAXPACKET\n");
- p32(esps_pdrops, " packet%s blocked due to policy\n");
- p32(esps_crypto, " crypto processing failure%s\n");
- p32(esps_tunnel, " tunnel sanity check failure%s\n");
- hist(espstat->esps_hist, ipsec_espnames, "ESP output");
-
-#undef p32
-#undef p64
+ xo_open_container("esp-statictics");
+#define p(f, n, m) if (espstat->f || sflag <= 1) \
+ xo_emit("\t{:" n "/%ju} {N:/" m "}\n", \
+ (uintmax_t)espstat->f, plural(espstat->f))
+#define hist(f, n, t, c) \
+ ipsec_hist_new((f), sizeof(f)/sizeof(f[0]), (n), (t), (c));
+
+ p(esps_hdrops, "dropped-short-header",
+ "packet%s shorter than header shows");
+ p(esps_nopf, "dropped-bad-protocol",
+ "packet%s dropped; protocol family not supported");
+ p(esps_notdb, "dropped-no-tdb", "packet%s dropped; no TDB");
+ p(esps_badkcr, "dropped-bad-kcr", "packet%s dropped; bad KCR");
+ p(esps_qfull, "dropped-queue-full", "packet%s dropped; queue full");
+ p(esps_noxform, "dropped-no-transform",
+ "packet%s dropped; no transform");
+ p(esps_badilen, "dropped-bad-length", "packet%s dropped; bad ilen");
+ p(esps_wrap, "replay-counter-wraps", "replay counter wrap%s");
+ p(esps_badenc, "dropped-bad-crypto",
+ "packet%s dropped; bad encryption detected");
+ p(esps_badauth, "dropped-bad-auth",
+ "packet%s dropped; bad authentication detected");
+ p(esps_replay, "possible-replay-detected",
+ "possible replay packet%s detected");
+ p(esps_input, "received-packets", "packet%s in");
+ p(esps_output, "sent-packets", "packet%s out");
+ p(esps_invalid, "dropped-bad-tdb", "packet%s dropped; invalid TDB");
+ p(esps_ibytes, "receieve-bytes", "byte%s in");
+ p(esps_obytes, "sent-bytes", "byte%s out");
+ p(esps_toobig, "dropped-too-large",
+ "packet%s dropped; larger than IP_MAXPACKET");
+ p(esps_pdrops, "dropped-policy-violation",
+ "packet%s blocked due to policy");
+ p(esps_crypto, "crypto-failures", "crypto processing failure%s");
+ p(esps_tunnel, "tunnel-failures", "tunnel sanity check failure%s");
+ hist(espstat->esps_hist, ipsec_espnames,
+ "ESP output", "esp-output-histogram");
+
+#undef p
#undef hist
+ xo_close_container("esp-statictics");
}
void
@@ -405,10 +397,11 @@ esp_stats(u_long off, const char *name, int family __unused, int proto __unused)
{
struct espstat espstat;
- if (off == 0)
+ if (fetch_stats("net.inet.esp.stats", off, &espstat,
+ sizeof(espstat), kread_counters) != 0)
return;
- printf ("%s:\n", name);
- kread(off, (char *)&espstat, sizeof(espstat));
+
+ xo_emit("{T:/%s}:\n", name);
print_espstats(&espstat);
}
@@ -416,43 +409,44 @@ esp_stats(u_long off, const char *name, int family __unused, int proto __unused)
static void
print_ipcompstats(const struct ipcompstat *ipcompstat)
{
- uint32_t version;
-#define p32(f, m) if (ipcompstat->f || sflag <= 1) \
- printf("\t%u" m, (unsigned int)ipcompstat->f, plural(ipcompstat->f))
-#define p64(f, m) if (ipcompstat->f || sflag <= 1) \
- printf("\t%ju" m, (uintmax_t)ipcompstat->f, plural(ipcompstat->f))
-#define hist(f, n, t) \
- ipsec_hist_new((f), sizeof(f)/sizeof(f[0]), (n), (t));
-
-#ifndef IPCOMPSTAT_VERSION
- version = 0;
-#else
- version = ipcompstat->version;
-#endif
- p32(ipcomps_hdrops, " packet%s shorter than header shows\n");
- p32(ipcomps_nopf, " packet%s dropped; protocol family not supported\n");
- p32(ipcomps_notdb, " packet%s dropped; no TDB\n");
- p32(ipcomps_badkcr, " packet%s dropped; bad KCR\n");
- p32(ipcomps_qfull, " packet%s dropped; queue full\n");
- p32(ipcomps_noxform, " packet%s dropped; no transform\n");
- p32(ipcomps_wrap, " replay counter wrap%s\n");
- p32(ipcomps_input, " packet%s in\n");
- p32(ipcomps_output, " packet%s out\n");
- p32(ipcomps_invalid, " packet%s dropped; invalid TDB\n");
- p64(ipcomps_ibytes, " byte%s in\n");
- p64(ipcomps_obytes, " byte%s out\n");
- p32(ipcomps_toobig, " packet%s dropped; larger than IP_MAXPACKET\n");
- p32(ipcomps_pdrops, " packet%s blocked due to policy\n");
- p32(ipcomps_crypto, " crypto processing failure%s\n");
- hist(ipcompstat->ipcomps_hist, ipsec_compnames, "COMP output");
- if (version >= 1) {
- p32(ipcomps_threshold, " packet%s sent uncompressed; size < compr. algo. threshold\n");
- p32(ipcomps_uncompr, " packet%s sent uncompressed; compression was useless\n");
- }
+ xo_open_container("ipcomp-statictics");
+
+#define p(f, n, m) if (ipcompstat->f || sflag <= 1) \
+ xo_emit("\t{:" n "/%ju} {N:/" m "}\n", \
+ (uintmax_t)ipcompstat->f, plural(ipcompstat->f))
+#define hist(f, n, t, c) \
+ ipsec_hist_new((f), sizeof(f)/sizeof(f[0]), (n), (t), (c));
+
+ p(ipcomps_hdrops, "dropped-short-header",
+ "packet%s shorter than header shows");
+ p(ipcomps_nopf, "dropped-bad-protocol",
+ "packet%s dropped; protocol family not supported");
+ p(ipcomps_notdb, "dropped-no-tdb", "packet%s dropped; no TDB");
+ p(ipcomps_badkcr, "dropped-bad-kcr", "packet%s dropped; bad KCR");
+ p(ipcomps_qfull, "dropped-queue-full", "packet%s dropped; queue full");
+ p(ipcomps_noxform, "dropped-no-transform",
+ "packet%s dropped; no transform");
+ p(ipcomps_wrap, "replay-counter-wraps", "replay counter wrap%s");
+ p(ipcomps_input, "receieve-packets", "packet%s in");
+ p(ipcomps_output, "sent-packets", "packet%s out");
+ p(ipcomps_invalid, "dropped-bad-tdb", "packet%s dropped; invalid TDB");
+ p(ipcomps_ibytes, "receieved-bytes", "byte%s in");
+ p(ipcomps_obytes, "sent-bytes", "byte%s out");
+ p(ipcomps_toobig, "dropped-too-large",
+ "packet%s dropped; larger than IP_MAXPACKET");
+ p(ipcomps_pdrops, "dropped-policy-violation",
+ "packet%s blocked due to policy");
+ p(ipcomps_crypto, "crypto-failure", "crypto processing failure%s");
+ hist(ipcompstat->ipcomps_hist, ipsec_compnames,
+ "COMP output", "comp-output-histogram");
+ p(ipcomps_threshold, "sent-uncompressed-small-packets",
+ "packet%s sent uncompressed; size < compr. algo. threshold");
+ p(ipcomps_uncompr, "sent-uncompressed-useless-packets",
+ "packet%s sent uncompressed; compression was useless");
-#undef p32
-#undef p64
+#undef p
#undef hist
+ xo_close_container("ipcomp-statictics");
}
void
@@ -461,10 +455,11 @@ ipcomp_stats(u_long off, const char *name, int family __unused,
{
struct ipcompstat ipcompstat;
- if (off == 0)
+ if (fetch_stats("net.inet.ipcomp.stats", off, &ipcompstat,
+ sizeof(ipcompstat), kread_counters) != 0)
return;
- printf ("%s:\n", name);
- kread(off, (char *)&ipcompstat, sizeof(ipcompstat));
+
+ xo_emit("{T:/%s}:\n", name);
print_ipcompstats(&ipcompstat);
}
diff --git a/freebsd/usr.bin/netstat/main.c b/freebsd/usr.bin/netstat/main.c
index a1d66376..a58ee648 100644
--- a/freebsd/usr.bin/netstat/main.c
+++ b/freebsd/usr.bin/netstat/main.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 1983, 1988, 1993
* Regents of the University of California. All rights reserved.
@@ -30,7 +34,7 @@
*/
#ifndef lint
-char const copyright[] =
+static char const copyright[] =
"@(#) Copyright (c) 1983, 1988, 1993\n\
Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
@@ -44,15 +48,6 @@ static char sccsid[] = "@(#)main.c 8.4 (Berkeley) 3/1/94";
#ifdef __rtems__
#define __need_getopt_newlib
#include <getopt.h>
-#define RTEMS_BSD_PROGRAM_NO_OPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_SOCKET_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FOPEN_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FCLOSE_WRAP
-#define RTEMS_BSD_PROGRAM_NO_MALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_CALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_REALLOC_WRAP
-#define RTEMS_BSD_PROGRAM_NO_FREE_WRAP
#include <machine/rtems-bsd-program.h>
#include <machine/rtems-bsd-commands.h>
#endif /* __rtems__ */
@@ -64,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/sysctl.h>
#include <netinet/in.h>
@@ -82,129 +78,21 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include "netstat.h"
+#include "nl_defs.h"
+#include <libxo/xo.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-main-data.h"
+#endif /* __rtems__ */
-static struct nlist nl[] = {
-#define N_IFNET 0
- { .n_name = "_ifnet" },
-#define N_RTSTAT 1
- { .n_name = "_rtstat" },
-#define N_RTREE 2
- { .n_name = "_rt_tables"},
-#define N_MRTSTAT 3
- { .n_name = "_mrtstat" },
-#define N_MFCHASHTBL 4
- { .n_name = "_mfchashtbl" },
-#define N_VIFTABLE 5
- { .n_name = "_viftable" },
-#define N_IPX 6
- { .n_name = "_ipxpcb_list"},
-#define N_IPXSTAT 7
- { .n_name = "_ipxstat"},
-#define N_SPXSTAT 8
- { .n_name = "_spx_istat"},
-#define N_DDPSTAT 9
- { .n_name = "_ddpstat"},
-#define N_DDPCB 10
- { .n_name = "_ddpcb"},
-#define N_NGSOCKS 11
- { .n_name = "_ngsocklist"},
-#define N_IP6STAT 12
- { .n_name = "_ip6stat" },
-#define N_ICMP6STAT 13
- { .n_name = "_icmp6stat" },
-#define N_IPSECSTAT 14
- { .n_name = "_ipsec4stat" },
-#define N_IPSEC6STAT 15
- { .n_name = "_ipsec6stat" },
-#define N_PIM6STAT 16
- { .n_name = "_pim6stat" },
-#define N_MRT6STAT 17
- { .n_name = "_mrt6stat" },
-#define N_MF6CTABLE 18
- { .n_name = "_mf6ctable" },
-#define N_MIF6TABLE 19
- { .n_name = "_mif6table" },
-#define N_PFKEYSTAT 20
- { .n_name = "_pfkeystat" },
-#define N_MBSTAT 21
- { .n_name = "_mbstat" },
-#define N_MBTYPES 22
- { .n_name = "_mbtypes" },
-#define N_NMBCLUSTERS 23
- { .n_name = "_nmbclusters" },
-#define N_NMBUFS 24
- { .n_name = "_nmbufs" },
-#define N_MBHI 25
- { .n_name = "_mbuf_hiwm" },
-#define N_CLHI 26
- { .n_name = "_clust_hiwm" },
-#define N_NCPUS 27
- { .n_name = "_smp_cpus" },
-#define N_PAGESZ 28
- { .n_name = "_pagesize" },
-#define N_MBPSTAT 29
- { .n_name = "_mb_statpcpu" },
-#define N_RTTRASH 30
- { .n_name = "_rttrash" },
-#define N_MBLO 31
- { .n_name = "_mbuf_lowm" },
-#define N_CLLO 32
- { .n_name = "_clust_lowm" },
-#define N_CARPSTAT 33
- { .n_name = "_carpstats" },
-#define N_PFSYNCSTAT 34
- { .n_name = "_pfsyncstats" },
-#define N_AHSTAT 35
- { .n_name = "_ahstat" },
-#define N_ESPSTAT 36
- { .n_name = "_espstat" },
-#define N_IPCOMPSTAT 37
- { .n_name = "_ipcompstat" },
-#define N_TCPSTAT 38
- { .n_name = "_tcpstat" },
-#define N_UDPSTAT 39
- { .n_name = "_udpstat" },
-#define N_IPSTAT 40
- { .n_name = "_ipstat" },
-#define N_ICMPSTAT 41
- { .n_name = "_icmpstat" },
-#define N_IGMPSTAT 42
- { .n_name = "_igmpstat" },
-#define N_PIMSTAT 43
- { .n_name = "_pimstat" },
-#define N_TCBINFO 44
- { .n_name = "_tcbinfo" },
-#define N_UDBINFO 45
- { .n_name = "_udbinfo" },
-#define N_DIVCBINFO 46
- { .n_name = "_divcbinfo" },
-#define N_RIPCBINFO 47
- { .n_name = "_ripcbinfo" },
-#define N_UNP_COUNT 48
- { .n_name = "_unp_count" },
-#define N_UNP_GENCNT 49
- { .n_name = "_unp_gencnt" },
-#define N_UNP_DHEAD 50
- { .n_name = "_unp_dhead" },
-#define N_UNP_SHEAD 51
- { .n_name = "_unp_shead" },
-#define N_RIP6STAT 52
- { .n_name = "_rip6stat" },
-#define N_SCTPSTAT 53
- { .n_name = "_sctpstat" },
-#define N_MFCTABLESIZE 54
- { .n_name = "_mfctablesize" },
-#define N_ARPSTAT 55
- { .n_name = "_arpstat" },
-#define N_UNP_SPHEAD 56
- { .n_name = "unp_sphead" },
- { .n_name = NULL },
-};
-
-struct protox {
+#ifndef __rtems__
+static struct protox {
+#else /* __rtems__ */
+static const struct protox {
+#endif /* __rtems__ */
int pr_index; /* index into nlist of cb head */
int pr_sindex; /* index into nlist of stat block */
u_char pr_wanted; /* 1 if wanted, 0 otherwise */
@@ -216,7 +104,7 @@ struct protox {
const char *pr_name; /* well-known name */
int pr_usesysctl; /* non-zero if we use sysctl, not kvm */
int pr_protocol;
-} static const protox[] = {
+} protox[] = {
{ N_TCBINFO, N_TCPSTAT, 1, protopr,
tcp_stats, NULL, "tcp", 1, IPPROTO_TCP },
{ N_UDBINFO, N_UDPSTAT, 1, protopr,
@@ -238,21 +126,23 @@ struct protox {
{ N_RIPCBINFO, N_IGMPSTAT, 1, protopr,
igmp_stats, NULL, "igmp", 1, IPPROTO_IGMP },
#ifdef IPSEC
- { -1, N_IPSECSTAT, 1, NULL, /* keep as compat */
- ipsec_stats, NULL, "ipsec", 0, 0},
+ { -1, N_IPSEC4STAT, 1, NULL, /* keep as compat */
+ ipsec_stats, NULL, "ipsec", 1, 0},
{ -1, N_AHSTAT, 1, NULL,
- ah_stats, NULL, "ah", 0, 0},
+ ah_stats, NULL, "ah", 1, 0},
{ -1, N_ESPSTAT, 1, NULL,
- esp_stats, NULL, "esp", 0, 0},
+ esp_stats, NULL, "esp", 1, 0},
{ -1, N_IPCOMPSTAT, 1, NULL,
- ipcomp_stats, NULL, "ipcomp", 0, 0},
+ ipcomp_stats, NULL, "ipcomp", 1, 0},
#endif
{ N_RIPCBINFO, N_PIMSTAT, 1, protopr,
pim_stats, NULL, "pim", 1, IPPROTO_PIM },
- { -1, N_CARPSTAT, 1, NULL,
+ { -1, N_CARPSTATS, 1, NULL,
carp_stats, NULL, "carp", 1, 0 },
- { -1, N_PFSYNCSTAT, 1, NULL,
+#ifdef PF
+ { -1, N_PFSYNCSTATS, 1, NULL,
pfsync_stats, NULL, "pfsync", 1, 0 },
+#endif
{ -1, N_ARPSTAT, 1, NULL,
arp_stats, NULL, "arp", 1, 0 },
{ -1, -1, 0, NULL,
@@ -260,7 +150,11 @@ struct protox {
};
#ifdef INET6
+#ifndef __rtems__
+static struct protox ip6protox[] = {
+#else /* __rtems__ */
static const struct protox ip6protox[] = {
+#endif /* __rtems__ */
{ N_TCBINFO, N_TCPSTAT, 1, protopr,
tcp_stats, NULL, "tcp", 1, IPPROTO_TCP },
{ N_UDBINFO, N_UDPSTAT, 1, protopr,
@@ -275,7 +169,7 @@ static const struct protox ip6protox[] = {
#endif
#ifdef IPSEC
{ -1, N_IPSEC6STAT, 1, NULL,
- ipsec_stats, NULL, "ipsec6", 0, 0 },
+ ipsec_stats, NULL, "ipsec6", 1, 0 },
#endif
#ifdef notyet
{ -1, N_PIM6STAT, 1, NULL,
@@ -289,7 +183,11 @@ static const struct protox ip6protox[] = {
#endif /*INET6*/
#ifdef IPSEC
+#ifndef __rtems__
+static struct protox pfkeyprotox[] = {
+#else /* __rtems__ */
static const struct protox pfkeyprotox[] = {
+#endif /* __rtems__ */
{ -1, N_PFKEYSTAT, 1, NULL,
pfkey_stats, NULL, "pfkey", 0, 0 },
{ -1, -1, 0, NULL,
@@ -297,36 +195,26 @@ static const struct protox pfkeyprotox[] = {
};
#endif
-#ifndef __rtems__
-static const struct protox atalkprotox[] = {
- { N_DDPCB, N_DDPSTAT, 1, atalkprotopr,
- ddp_stats, NULL, "ddp", 0, 0 },
- { -1, -1, 0, NULL,
- NULL, NULL, NULL, 0, 0 }
-};
-#endif
#ifdef NETGRAPH
+#ifndef __rtems__
+static struct protox netgraphprotox[] = {
+#else /* __rtems__ */
static const struct protox netgraphprotox[] = {
- { N_NGSOCKS, -1, 1, netgraphprotopr,
+#endif /* __rtems__ */
+ { N_NGSOCKLIST, -1, 1, netgraphprotopr,
NULL, NULL, "ctrl", 0, 0 },
- { N_NGSOCKS, -1, 1, netgraphprotopr,
+ { N_NGSOCKLIST, -1, 1, netgraphprotopr,
NULL, NULL, "data", 0, 0 },
{ -1, -1, 0, NULL,
NULL, NULL, NULL, 0, 0 }
};
#endif
-#ifdef IPX
-static const struct protox ipxprotox[] = {
- { N_IPX, N_IPXSTAT, 1, ipxprotopr,
- ipx_stats, NULL, "ipx", 0, 0 },
- { N_IPX, N_SPXSTAT, 1, ipxprotopr,
- spx_stats, NULL, "spx", 0, 0 },
- { -1, -1, 0, NULL,
- NULL, NULL, 0, 0, 0 }
-};
-#endif
-static const struct protox *protoprotox[] = {
+#ifndef __rtems__
+static struct protox *protoprotox[] = {
+#else /* __rtems__ */
+static const struct protox *const protoprotox[] = {
+#endif /* __rtems__ */
protox,
#ifdef INET6
ip6protox,
@@ -334,26 +222,30 @@ static const struct protox *protoprotox[] = {
#ifdef IPSEC
pfkeyprotox,
#endif
-#ifdef IPX
- ipxprotox,
-#endif
-#ifndef __rtems__
- atalkprotox, NULL };
-#else
- NULL };
-#endif
+ NULL };
-static void printproto(const struct protox *, const char *);
+#ifndef __rtems__
+static void printproto(struct protox *, const char *, bool *);
+#else /* __rtems__ */
+static void printproto(const struct protox *, const char *, bool *);
+#endif /* __rtems__ */
static void usage(void);
+#ifndef __rtems__
+static struct protox *name2protox(const char *);
+static struct protox *knownname(const char *);
+#else /* __rtems__ */
static const struct protox *name2protox(const char *);
static const struct protox *knownname(const char *);
+#endif /* __rtems__ */
+
+static int kresolve_list(struct nlist *_nl);
static kvm_t *kvmd;
static char *nlistf = NULL, *memf = NULL;
int Aflag; /* show addresses of protocol control block */
int aflag; /* show all sockets (including servers) */
-int Bflag; /* show information about bpf consumers */
+static int Bflag; /* show information about bpf consumers */
int bflag; /* show i/f total bytes in/out */
int dflag; /* show i/f dropped packets */
int gflag; /* show group (multicast) routing or stats */
@@ -366,9 +258,10 @@ int numeric_addr; /* show addresses numerically */
int numeric_port; /* show ports numerically */
static int pflag; /* show given protocol */
#ifndef __rtems__
-int Qflag; /* show netisr information */
+static int Qflag; /* show netisr information */
#endif /* __rtems__ */
int rflag; /* show routing tables (or routing stats) */
+int Rflag; /* show flow / RSS statistics */
int sflag; /* show protocol statistics */
int Wflag; /* wide display */
int Tflag; /* TCP Information */
@@ -380,60 +273,27 @@ int interval; /* repeat interval for i/f stats */
char *interface; /* desired i/f for stats, or NULL for all i/fs */
int unit; /* unit number for above */
-int af; /* address family */
+static int af; /* address family */
int live; /* true if we are examining a live system */
#ifdef __rtems__
-int protopr_initialized;
-int do_rtent;
-struct radix_node_head **rt_tables;
-
static int main(int argc, char *argv[]);
-int rtems_bsd_command_netstat(int argc, char *argv[])
+RTEMS_LINKER_RWSET(bsd_prog_netstat, char);
+
+int
+rtems_bsd_command_netstat(int argc, char *argv[])
{
int exit_code;
+ void *data_begin;
+ size_t data_size;
- rtems_bsd_program_lock();
-
- nlistf = NULL;
- memf = NULL;
-
- Aflag = 0;
- aflag = 0;
- Bflag = 0;
- bflag = 0;
- dflag = 0;
- gflag = 0;
- hflag = 0;
- iflag = 0;
- Lflag = 0;
- mflag = 0;
- noutputs = 0;
- numeric_addr = 0;
- numeric_port = 0;
- pflag = 0;
- rflag = 0;
- sflag = 0;
- Wflag = 0;
- xflag = 0;
- zflag = 0;
- interval = 0;
- interface = 0;
- unit = 0;
- af = 0;
- live = 0;
-
- protopr_initialized = 0;
- do_rtent = 0;
-
- rtems_bsd_netstat_inet_init();
-
- exit_code = rtems_bsd_program_call_main("netstat", main, argc, argv);
-
- free(rt_tables);
- rt_tables = NULL;
+ data_begin = RTEMS_LINKER_SET_BEGIN(bsd_prog_netstat);
+ data_size = RTEMS_LINKER_SET_SIZE(bsd_prog_netstat);
+ rtems_bsd_program_lock();
+ exit_code = rtems_bsd_program_call_main_with_data_restore("netstat",
+ main, argc, argv, data_begin, data_size);
rtems_bsd_program_unlock();
return exit_code;
@@ -442,8 +302,15 @@ int rtems_bsd_command_netstat(int argc, char *argv[])
int
main(int argc, char *argv[])
{
+#ifndef __rtems__
+ struct protox *tp = NULL; /* for printing cblocks & stats */
+#else /* __rtems__ */
const struct protox *tp = NULL; /* for printing cblocks & stats */
+#endif /* __rtems__ */
int ch;
+ int fib = -1;
+ char *endptr;
+ bool first = true;
#ifdef __rtems__
struct getopt_data getopt_data;
memset(&getopt_data, 0, sizeof(getopt_data));
@@ -456,7 +323,11 @@ main(int argc, char *argv[])
af = AF_UNSPEC;
- while ((ch = getopt(argc, argv, "46AaBbdf:ghI:iLlM:mN:np:Qq:rSTsuWw:xz"))
+ argc = xo_parse_args(argc, argv);
+ if (argc < 0)
+ exit(EXIT_FAILURE);
+
+ while ((ch = getopt(argc, argv, "46AaBbdF:f:ghI:iLlM:mN:np:Qq:RrSTsuWw:xz"))
!= -1)
switch(ch) {
case '4':
@@ -488,10 +359,14 @@ main(int argc, char *argv[])
case 'd':
dflag = 1;
break;
+ case 'F':
+ fib = strtol(optarg, &endptr, 0);
+ if (*endptr != '\0' ||
+ (fib == 0 && (errno == EINVAL || errno == ERANGE)))
+ xo_errx(1, "%s: invalid fib", optarg);
+ break;
case 'f':
- if (strcmp(optarg, "ipx") == 0)
- af = AF_IPX;
- else if (strcmp(optarg, "inet") == 0)
+ if (strcmp(optarg, "inet") == 0)
af = AF_INET;
#ifdef INET6
else if (strcmp(optarg, "inet6") == 0)
@@ -501,10 +376,9 @@ main(int argc, char *argv[])
else if (strcmp(optarg, "pfkey") == 0)
af = PF_KEY;
#endif
- else if (strcmp(optarg, "unix") == 0)
+ else if (strcmp(optarg, "unix") == 0 ||
+ strcmp(optarg, "local") == 0)
af = AF_UNIX;
- else if (strcmp(optarg, "atalk") == 0)
- af = AF_APPLETALK;
#ifdef NETGRAPH
else if (strcmp(optarg, "ng") == 0
|| strcmp(optarg, "netgraph") == 0)
@@ -513,7 +387,8 @@ main(int argc, char *argv[])
else if (strcmp(optarg, "link") == 0)
af = AF_LINK;
else {
- errx(1, "%s: unknown address family", optarg);
+ xo_errx(1, "%s: unknown address family",
+ optarg);
}
break;
case 'g':
@@ -526,7 +401,12 @@ main(int argc, char *argv[])
char *cp;
iflag = 1;
- for (cp = interface = optarg; isalpha((unsigned char) *cp); cp++)
+#ifndef __rtems__
+ for (cp = interface = optarg; isalpha(*cp); cp++)
+#else /* __rtems__ */
+ for (cp = interface = optarg; isalpha(
+ (unsigned char) *cp); cp++)
+#endif /* __rtems__ */
continue;
unit = atoi(cp);
break;
@@ -551,9 +431,8 @@ main(int argc, char *argv[])
break;
case 'p':
if ((tp = name2protox(optarg)) == NULL) {
- errx(1,
- "%s: unknown or uninstrumented protocol",
- optarg);
+ xo_errx(1, "%s: unknown or uninstrumented "
+ "protocol", optarg);
}
pflag = 1;
break;
@@ -570,6 +449,9 @@ main(int argc, char *argv[])
case 'r':
rflag = 1;
break;
+ case 'R':
+ Rflag = 1;
+ break;
case 's':
++sflag;
break;
@@ -606,7 +488,11 @@ main(int argc, char *argv[])
#define BACKWARD_COMPATIBILITY
#ifdef BACKWARD_COMPATIBILITY
if (*argv) {
+#ifndef __rtems__
+ if (isdigit(**argv)) {
+#else /* __rtems__ */
if (isdigit((unsigned char) **argv)) {
+#endif /* __rtems__ */
interval = atoi(*argv);
if (interval <= 0)
usage();
@@ -626,33 +512,38 @@ main(int argc, char *argv[])
* guys can't print interesting stuff from kernel memory.
*/
live = (nlistf == NULL && memf == NULL);
- if (!live)
- setgid(getgid());
+ if (!live) {
+ if (setgid(getgid()) != 0)
+ xo_err(-1, "setgid");
+ }
- if (xflag && Tflag)
- errx(1, "-x and -T are incompatible, pick one.");
+ if (xflag && Tflag)
+ xo_errx(1, "-x and -T are incompatible, pick one.");
if (Bflag) {
if (!live)
usage();
bpf_stats(interface);
+ xo_finish();
exit(0);
}
if (mflag) {
if (!live) {
if (kread(0, NULL, 0) == 0)
- mbpr(kvmd, nl[N_MBSTAT].n_value);
+ mbpr(kvmd, nl[N_SFSTAT].n_value);
} else
mbpr(NULL, 0);
+ xo_finish();
exit(0);
}
#ifndef __rtems__
if (Qflag) {
if (!live) {
if (kread(0, NULL, 0) == 0)
- netisr_stats(kvmd);
+ netisr_stats();
} else
- netisr_stats(NULL);
+ netisr_stats();
+ xo_finish();
exit(0);
}
#endif /* __rtems__ */
@@ -670,109 +561,162 @@ main(int argc, char *argv[])
* used for the queries, which is slower.
*/
#endif
- kread(0, NULL, 0);
if (iflag && !sflag) {
- intpr(interval, nl[N_IFNET].n_value, NULL);
+ xo_open_container("statistics");
+ intpr(NULL, af);
+ xo_close_container("statistics");
+ xo_finish();
exit(0);
}
if (rflag) {
- if (sflag)
- rt_stats(nl[N_RTSTAT].n_value, nl[N_RTTRASH].n_value);
- else
- routepr(nl[N_RTREE].n_value);
+ xo_open_container("statistics");
+ if (sflag) {
+ rt_stats();
+ flowtable_stats();
+ } else
+ routepr(fib, af);
+ xo_close_container("statistics");
+ xo_finish();
exit(0);
}
+
if (gflag) {
+ xo_open_container("statistics");
if (sflag) {
if (af == AF_INET || af == AF_UNSPEC)
- mrt_stats(nl[N_MRTSTAT].n_value);
+ mrt_stats();
#ifdef INET6
if (af == AF_INET6 || af == AF_UNSPEC)
- mrt6_stats(nl[N_MRT6STAT].n_value);
+ mrt6_stats();
#endif
} else {
if (af == AF_INET || af == AF_UNSPEC)
- mroutepr(nl[N_MFCHASHTBL].n_value,
- nl[N_MFCTABLESIZE].n_value,
- nl[N_VIFTABLE].n_value);
+ mroutepr();
#ifdef INET6
if (af == AF_INET6 || af == AF_UNSPEC)
- mroute6pr(nl[N_MF6CTABLE].n_value,
- nl[N_MIF6TABLE].n_value);
+ mroute6pr();
#endif
}
+ xo_close_container("statistics");
+ xo_finish();
exit(0);
}
+ /* Load all necessary kvm symbols */
+ kresolve_list(nl);
+
if (tp) {
- printproto(tp, tp->pr_name);
+ xo_open_container("statistics");
+ printproto(tp, tp->pr_name, &first);
+ if (!first)
+ xo_close_list("socket");
+ xo_close_container("statistics");
+ xo_finish();
exit(0);
}
+
+ xo_open_container("statistics");
if (af == AF_INET || af == AF_UNSPEC)
for (tp = protox; tp->pr_name; tp++)
- printproto(tp, tp->pr_name);
+ printproto(tp, tp->pr_name, &first);
#ifdef INET6
if (af == AF_INET6 || af == AF_UNSPEC)
for (tp = ip6protox; tp->pr_name; tp++)
- printproto(tp, tp->pr_name);
+ printproto(tp, tp->pr_name, &first);
#endif /*INET6*/
#ifdef IPSEC
if (af == PF_KEY || af == AF_UNSPEC)
for (tp = pfkeyprotox; tp->pr_name; tp++)
- printproto(tp, tp->pr_name);
+ printproto(tp, tp->pr_name, &first);
#endif /*IPSEC*/
-#ifdef IPX
- if (af == AF_IPX || af == AF_UNSPEC) {
- for (tp = ipxprotox; tp->pr_name; tp++)
- printproto(tp, tp->pr_name);
- }
-#endif /* IPX */
-#ifndef __rtems__
- if (af == AF_APPLETALK || af == AF_UNSPEC)
- for (tp = atalkprotox; tp->pr_name; tp++)
- printproto(tp, tp->pr_name);
-#endif
#ifdef NETGRAPH
if (af == AF_NETGRAPH || af == AF_UNSPEC)
for (tp = netgraphprotox; tp->pr_name; tp++)
- printproto(tp, tp->pr_name);
+ printproto(tp, tp->pr_name, &first);
#endif /* NETGRAPH */
#ifndef __rtems__
if ((af == AF_UNIX || af == AF_UNSPEC) && !sflag)
unixpr(nl[N_UNP_COUNT].n_value, nl[N_UNP_GENCNT].n_value,
nl[N_UNP_DHEAD].n_value, nl[N_UNP_SHEAD].n_value,
- nl[N_UNP_SPHEAD].n_value);
-#endif
+ nl[N_UNP_SPHEAD].n_value, &first);
+#endif /* __rtems__ */
+
+ if (!first)
+ xo_close_list("socket");
+ xo_close_container("statistics");
+ xo_finish();
exit(0);
}
+static int
+fetch_stats_internal(const char *sysctlname, u_long off, void *stats,
+ size_t len, kreadfn_t kreadfn, int zero)
+{
+ int error;
+
+ if (live) {
+ memset(stats, 0, len);
+ if (zero)
+ error = sysctlbyname(sysctlname, NULL, NULL, stats,
+ len);
+ else
+ error = sysctlbyname(sysctlname, stats, &len, NULL, 0);
+ if (error == -1 && errno != ENOENT)
+ xo_warn("sysctl %s", sysctlname);
+ } else {
+ if (off == 0)
+ return (1);
+ error = kreadfn(off, stats, len);
+ }
+ return (error);
+}
+
+int
+fetch_stats(const char *sysctlname, u_long off, void *stats,
+ size_t len, kreadfn_t kreadfn)
+{
+
+ return (fetch_stats_internal(sysctlname, off, stats, len, kreadfn,
+ zflag));
+}
+
+int
+fetch_stats_ro(const char *sysctlname, u_long off, void *stats,
+ size_t len, kreadfn_t kreadfn)
+{
+
+ return (fetch_stats_internal(sysctlname, off, stats, len, kreadfn, 0));
+}
+
/*
* Print out protocol statistics or control blocks (per sflag).
* If the interface was not specifically requested, and the symbol
* is not in the namelist, ignore this one.
*/
static void
-printproto(tp, name)
- const struct protox *tp;
- const char *name;
+#ifndef __rtems__
+printproto(struct protox *tp, const char *name, bool *first)
+#else /* __rtems__ */
+printproto(const struct protox *tp, const char *name, bool *first)
+#endif /* __rtems__ */
{
void (*pr)(u_long, const char *, int, int);
u_long off;
+ bool doingdblocks = false;
if (sflag) {
if (iflag) {
if (tp->pr_istats)
- intpr(interval, nl[N_IFNET].n_value,
- tp->pr_istats);
+ intpr(tp->pr_istats, af);
else if (pflag)
- printf("%s: no per-interface stats routine\n",
+ xo_message("%s: no per-interface stats routine",
tp->pr_name);
return;
} else {
pr = tp->pr_stats;
if (!pr) {
if (pflag)
- printf("%s: no stats routine\n",
+ xo_message("%s: no stats routine",
tp->pr_name);
return;
}
@@ -780,34 +724,99 @@ printproto(tp, name)
off = 0;
else if (tp->pr_sindex < 0) {
if (pflag)
- printf(
- "%s: stats routine doesn't work on cores\n",
- tp->pr_name);
+ xo_message("%s: stats routine doesn't "
+ "work on cores", tp->pr_name);
return;
} else
off = nl[tp->pr_sindex].n_value;
}
} else {
+ doingdblocks = true;
pr = tp->pr_cblocks;
if (!pr) {
if (pflag)
- printf("%s: no PCB routine\n", tp->pr_name);
+ xo_message("%s: no PCB routine", tp->pr_name);
return;
}
if (tp->pr_usesysctl && live)
off = 0;
else if (tp->pr_index < 0) {
if (pflag)
- printf(
- "%s: PCB routine doesn't work on cores\n",
- tp->pr_name);
+ xo_message("%s: PCB routine doesn't work on "
+ "cores", tp->pr_name);
return;
} else
off = nl[tp->pr_index].n_value;
}
if (pr != NULL && (off || (live && tp->pr_usesysctl) ||
- af != AF_UNSPEC))
+ af != AF_UNSPEC)) {
+ if (doingdblocks && *first) {
+ xo_open_list("socket");
+ *first = false;
+ }
+
(*pr)(off, name, af, tp->pr_protocol);
+ }
+}
+
+static int
+kvmd_init(void)
+{
+ char errbuf[_POSIX2_LINE_MAX];
+
+ if (kvmd != NULL)
+ return (0);
+
+ kvmd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf);
+ if (setgid(getgid()) != 0)
+ xo_err(-1, "setgid");
+
+ if (kvmd == NULL) {
+ xo_warnx("kvm not available: %s", errbuf);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Resolve symbol list, return 0 on success.
+ */
+static int
+kresolve_list(struct nlist *_nl)
+{
+
+ if ((kvmd == NULL) && (kvmd_init() != 0))
+ return (-1);
+
+ if (_nl[0].n_type != 0)
+ return (0);
+
+ if (kvm_nlist(kvmd, _nl) < 0) {
+ if (nlistf)
+ xo_errx(1, "%s: kvm_nlist: %s", nlistf,
+ kvm_geterr(kvmd));
+ else
+ xo_errx(1, "kvm_nlist: %s", kvm_geterr(kvmd));
+ }
+
+ return (0);
+}
+
+/*
+ * Wrapper of kvm_dpcpu_setcpu().
+ */
+void
+kset_dpcpu(u_int cpuid)
+{
+
+ if ((kvmd == NULL) && (kvmd_init() != 0))
+ xo_errx(-1, "%s: kvm is not available", __func__);
+
+ if (kvm_dpcpu_setcpu(kvmd, cpuid) < 0)
+ xo_errx(-1, "%s: kvm_dpcpu_setcpu(%u): %s", __func__,
+ cpuid, kvm_geterr(kvmd));
+ return;
}
/*
@@ -816,40 +825,70 @@ printproto(tp, name)
int
kread(u_long addr, void *buf, size_t size)
{
- char errbuf[_POSIX2_LINE_MAX];
- if (kvmd == NULL) {
- kvmd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf);
- setgid(getgid());
- if (kvmd != NULL) {
- if (kvm_nlist(kvmd, nl) < 0) {
- if (nlistf)
- errx(1, "%s: kvm_nlist: %s", nlistf,
- kvm_geterr(kvmd));
- else
- errx(1, "kvm_nlist: %s", kvm_geterr(kvmd));
- }
+ if (kvmd_init() < 0)
+ return (-1);
- if (nl[0].n_type == 0) {
- if (nlistf)
- errx(1, "%s: no namelist", nlistf);
- else
- errx(1, "no namelist");
- }
- } else {
- warnx("kvm not available: %s", errbuf);
- return(-1);
- }
- }
if (!buf)
return (0);
if (kvm_read(kvmd, addr, buf, size) != (ssize_t)size) {
- warnx("%s", kvm_geterr(kvmd));
+ xo_warnx("%s", kvm_geterr(kvmd));
return (-1);
}
return (0);
}
+/*
+ * Read single counter(9).
+ */
+uint64_t
+kread_counter(u_long addr)
+{
+
+ if (kvmd_init() < 0)
+ return (-1);
+
+ return (kvm_counter_u64_fetch(kvmd, addr));
+}
+
+/*
+ * Read an array of N counters in kernel memory into array of N uint64_t's.
+ */
+int
+kread_counters(u_long addr, void *buf, size_t size)
+{
+#ifndef __rtems__
+ uint64_t *c;
+ u_long *counters;
+ size_t i, n;
+
+ if (kvmd_init() < 0)
+ return (-1);
+
+ if (size % sizeof(uint64_t) != 0) {
+ xo_warnx("kread_counters: invalid counter set size");
+ return (-1);
+ }
+
+ n = size / sizeof(uint64_t);
+ if ((counters = malloc(n * sizeof(u_long))) == NULL)
+ xo_err(-1, "malloc");
+ if (kread(addr, counters, n * sizeof(u_long)) < 0) {
+ free(counters);
+ return (-1);
+ }
+
+ c = buf;
+ for (i = 0; i < n; i++)
+ c[i] = kvm_counter_u64_fetch(kvmd, counters[i]);
+
+ free(counters);
+ return (0);
+#else /* __rtems__ */
+ return (-1);
+#endif /* __rtems__ */
+}
+
const char *
plural(uintmax_t n)
{
@@ -871,10 +910,18 @@ pluralies(uintmax_t n)
/*
* Find the protox for the given "well-known" name.
*/
+#ifndef __rtems__
+static struct protox *
+#else /* __rtems__ */
static const struct protox *
+#endif /* __rtems__ */
knownname(const char *name)
{
- const struct protox **tpp, *tp;
+#ifndef __rtems__
+ struct protox **tpp, *tp;
+#else /* __rtems__ */
+ const struct protox *const *tpp, *tp;
+#endif /* __rtems__ */
for (tpp = protoprotox; *tpp; tpp++)
for (tp = *tpp; tp->pr_name; tp++)
@@ -886,10 +933,18 @@ knownname(const char *name)
/*
* Find the protox corresponding to name.
*/
+#ifndef __rtems__
+static struct protox *
+#else /* __rtems__ */
static const struct protox *
+#endif /* __rtems__ */
name2protox(const char *name)
{
+#ifndef __rtems__
+ struct protox *tp;
+#else /* __rtems__ */
const struct protox *tp;
+#endif /* __rtems__ */
char **alias; /* alias from p->aliases */
struct protoent *p;
@@ -916,22 +971,25 @@ name2protox(const char *name)
static void
usage(void)
{
- (void)fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
-"usage: netstat [-46AaLnSTWx] [-f protocol_family | -p protocol]\n"
+ (void)xo_error("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
+"usage: netstat [-46AaLnRSTWx] [-f protocol_family | -p protocol]\n"
" [-M core] [-N system]",
" netstat -i | -I interface [-46abdhnW] [-f address_family]\n"
" [-M core] [-N system]",
-" netstat -w wait [-I interface] [-46d] [-M core] [-N system] [-q howmany]",
-" netstat -s [-s] [-46z] [-f protocol_family | -p protocol]\n"
-" [-M core] [-N system]",
-" netstat -i | -I interface [-46s] [-f protocol_family | -p protocol]\n"
+" netstat -w wait [-I interface] [-46d] [-M core] [-N system]\n"
+" [-q howmany]",
+" netstat -s [-46sz] [-f protocol_family | -p protocol]\n"
" [-M core] [-N system]",
+" netstat -i | -I interface -s [-46s]\n"
+" [-f protocol_family | -p protocol] [-M core] [-N system]",
" netstat -m [-M core] [-N system]",
-" netstat -B [-I interface]",
-" netstat -r [-46AanW] [-f address_family] [-M core] [-N system]",
+" netstat -B [-z] [-I interface]",
+" netstat -r [-46AnW] [-F fibnum] [-f address_family]\n"
+" [-M core] [-N system]",
" netstat -rs [-s] [-M core] [-N system]",
" netstat -g [-46W] [-f address_family] [-M core] [-N system]",
" netstat -gs [-46s] [-f address_family] [-M core] [-N system]",
" netstat -Q");
+ xo_finish();
exit(1);
}
diff --git a/freebsd/usr.bin/netstat/mbuf.c b/freebsd/usr.bin/netstat/mbuf.c
index a005d167..81f57292 100644
--- a/freebsd/usr.bin/netstat/mbuf.c
+++ b/freebsd/usr.bin/netstat/mbuf.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 1983, 1988, 1993
* The Regents of the University of California.
@@ -41,12 +45,16 @@ static char sccsid[] = "@(#)mbuf.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
+#include <sys/sf_buf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -57,8 +65,13 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
+#include <libxo/xo.h>
#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-mbuf-data.h"
+#endif /* __rtems__ */
/*
* Print mbuf statistics.
@@ -70,26 +83,26 @@ mbpr(void *kvmd, u_long mbaddr)
struct memory_type *mtp;
uintmax_t mbuf_count, mbuf_bytes, mbuf_free, mbuf_failures, mbuf_size;
uintmax_t mbuf_sleeps;
- uintmax_t cluster_count, cluster_bytes, cluster_limit, cluster_free;
+ uintmax_t cluster_count, cluster_limit, cluster_free;
uintmax_t cluster_failures, cluster_size, cluster_sleeps;
uintmax_t packet_count, packet_bytes, packet_free, packet_failures;
uintmax_t packet_sleeps;
- uintmax_t tag_count, tag_bytes;
- uintmax_t jumbop_count, jumbop_bytes, jumbop_limit, jumbop_free;
+ uintmax_t tag_bytes;
+ uintmax_t jumbop_count, jumbop_limit, jumbop_free;
uintmax_t jumbop_failures, jumbop_sleeps, jumbop_size;
- uintmax_t jumbo9_count, jumbo9_bytes, jumbo9_limit, jumbo9_free;
+ uintmax_t jumbo9_count, jumbo9_limit, jumbo9_free;
uintmax_t jumbo9_failures, jumbo9_sleeps, jumbo9_size;
- uintmax_t jumbo16_count, jumbo16_bytes, jumbo16_limit, jumbo16_free;
+ uintmax_t jumbo16_count, jumbo16_limit, jumbo16_free;
uintmax_t jumbo16_failures, jumbo16_sleeps, jumbo16_size;
uintmax_t bytes_inuse, bytes_incache, bytes_total;
int nsfbufs, nsfbufspeak, nsfbufsused;
- struct mbstat mbstat;
+ struct sfstat sfstat;
size_t mlen;
int error;
mtlp = memstat_mtl_alloc();
if (mtlp == NULL) {
- warn("memstat_mtl_alloc");
+ xo_warn("memstat_mtl_alloc");
return;
}
@@ -99,7 +112,7 @@ mbpr(void *kvmd, u_long mbaddr)
*/
if (live) {
if (memstat_sysctl_all(mtlp, 0) < 0) {
- warnx("memstat_sysctl_all: %s",
+ xo_warnx("memstat_sysctl_all: %s",
memstat_strerror(memstat_mtl_geterror(mtlp)));
goto out;
}
@@ -108,10 +121,10 @@ mbpr(void *kvmd, u_long mbaddr)
if (memstat_kvm_all(mtlp, kvmd) < 0) {
error = memstat_mtl_geterror(mtlp);
if (error == MEMSTAT_ERROR_KVM)
- warnx("memstat_kvm_all: %s",
+ xo_warnx("memstat_kvm_all: %s",
kvm_geterr(kvmd));
else
- warnx("memstat_kvm_all: %s",
+ xo_warnx("memstat_kvm_all: %s",
memstat_strerror(error));
goto out;
}
@@ -123,7 +136,7 @@ mbpr(void *kvmd, u_long mbaddr)
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_MEM_NAME);
if (mtp == NULL) {
- warnx("memstat_mtl_find: zone %s not found", MBUF_MEM_NAME);
+ xo_warnx("memstat_mtl_find: zone %s not found", MBUF_MEM_NAME);
goto out;
}
mbuf_count = memstat_get_count(mtp);
@@ -135,7 +148,7 @@ mbpr(void *kvmd, u_long mbaddr)
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_PACKET_MEM_NAME);
if (mtp == NULL) {
- warnx("memstat_mtl_find: zone %s not found",
+ xo_warnx("memstat_mtl_find: zone %s not found",
MBUF_PACKET_MEM_NAME);
goto out;
}
@@ -147,12 +160,11 @@ mbpr(void *kvmd, u_long mbaddr)
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_CLUSTER_MEM_NAME);
if (mtp == NULL) {
- warnx("memstat_mtl_find: zone %s not found",
+ xo_warnx("memstat_mtl_find: zone %s not found",
MBUF_CLUSTER_MEM_NAME);
goto out;
}
cluster_count = memstat_get_count(mtp);
- cluster_bytes = memstat_get_bytes(mtp);
cluster_limit = memstat_get_countlimit(mtp);
cluster_free = memstat_get_free(mtp);
cluster_failures = memstat_get_failures(mtp);
@@ -162,25 +174,22 @@ mbpr(void *kvmd, u_long mbaddr)
#ifndef __rtems__
mtp = memstat_mtl_find(mtlp, ALLOCATOR_MALLOC, MBUF_TAG_MEM_NAME);
if (mtp == NULL) {
- warnx("memstat_mtl_find: malloc type %s not found",
+ xo_warnx("memstat_mtl_find: malloc type %s not found",
MBUF_TAG_MEM_NAME);
goto out;
}
- tag_count = memstat_get_count(mtp);
tag_bytes = memstat_get_bytes(mtp);
#else /* __rtems__ */
- tag_count = 0;
tag_bytes = 0;
#endif /* __rtems__ */
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBOP_MEM_NAME);
if (mtp == NULL) {
- warnx("memstat_mtl_find: zone %s not found",
+ xo_warnx("memstat_mtl_find: zone %s not found",
MBUF_JUMBOP_MEM_NAME);
goto out;
}
jumbop_count = memstat_get_count(mtp);
- jumbop_bytes = memstat_get_bytes(mtp);
jumbop_limit = memstat_get_countlimit(mtp);
jumbop_free = memstat_get_free(mtp);
jumbop_failures = memstat_get_failures(mtp);
@@ -189,12 +198,11 @@ mbpr(void *kvmd, u_long mbaddr)
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO9_MEM_NAME);
if (mtp == NULL) {
- warnx("memstat_mtl_find: zone %s not found",
+ xo_warnx("memstat_mtl_find: zone %s not found",
MBUF_JUMBO9_MEM_NAME);
goto out;
}
jumbo9_count = memstat_get_count(mtp);
- jumbo9_bytes = memstat_get_bytes(mtp);
jumbo9_limit = memstat_get_countlimit(mtp);
jumbo9_free = memstat_get_free(mtp);
jumbo9_failures = memstat_get_failures(mtp);
@@ -203,48 +211,55 @@ mbpr(void *kvmd, u_long mbaddr)
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO16_MEM_NAME);
if (mtp == NULL) {
- warnx("memstat_mtl_find: zone %s not found",
+ xo_warnx("memstat_mtl_find: zone %s not found",
MBUF_JUMBO16_MEM_NAME);
goto out;
}
jumbo16_count = memstat_get_count(mtp);
- jumbo16_bytes = memstat_get_bytes(mtp);
jumbo16_limit = memstat_get_countlimit(mtp);
jumbo16_free = memstat_get_free(mtp);
jumbo16_failures = memstat_get_failures(mtp);
jumbo16_sleeps = memstat_get_sleeps(mtp);
jumbo16_size = memstat_get_size(mtp);
- printf("%ju/%ju/%ju mbufs in use (current/cache/total)\n",
+ xo_open_container("mbuf-statistics");
+
+ xo_emit("{:mbuf-current/%ju}/{:mbuf-cache/%ju}/{:mbuf-total/%ju} "
+ "{N:mbufs in use (current\\/cache\\/total)}\n",
mbuf_count + packet_count, mbuf_free + packet_free,
mbuf_count + packet_count + mbuf_free + packet_free);
- printf("%ju/%ju/%ju/%ju mbuf clusters in use "
- "(current/cache/total/max)\n",
+ xo_emit("{:cluster-current/%ju}/{:cluster-cache/%ju}/"
+ "{:cluster-total/%ju}/{:cluster-max/%ju} "
+ "{N:mbuf clusters in use (current\\/cache\\/total\\/max)}\n",
cluster_count - packet_free, cluster_free + packet_free,
cluster_count + cluster_free, cluster_limit);
- printf("%ju/%ju mbuf+clusters out of packet secondary zone in use "
- "(current/cache)\n",
+ xo_emit("{:packet-count/%ju}/{:packet-free/%ju} "
+ "{N:mbuf+clusters out of packet secondary zone in use "
+ "(current\\/cache)}\n",
packet_count, packet_free);
- printf("%ju/%ju/%ju/%ju %juk (page size) jumbo clusters in use "
- "(current/cache/total/max)\n",
+ xo_emit("{:jumbo-count/%ju}/{:jumbo-cache/%ju}/{:jumbo-total/%ju}/"
+ "{:jumbo-max/%ju} {:jumbo-page-size/%ju}{U:k} {N:(page size)} "
+ "{N:jumbo clusters in use (current\\/cache\\/total\\/max)}\n",
jumbop_count, jumbop_free, jumbop_count + jumbop_free,
jumbop_limit, jumbop_size / 1024);
- printf("%ju/%ju/%ju/%ju 9k jumbo clusters in use "
- "(current/cache/total/max)\n",
+ xo_emit("{:jumbo9-count/%ju}/{:jumbo9-cache/%ju}/"
+ "{:jumbo9-total/%ju}/{:jumbo9-max/%ju} "
+ "{N:9k jumbo clusters in use (current\\/cache\\/total\\/max)}\n",
jumbo9_count, jumbo9_free, jumbo9_count + jumbo9_free,
jumbo9_limit);
- printf("%ju/%ju/%ju/%ju 16k jumbo clusters in use "
- "(current/cache/total/max)\n",
+ xo_emit("{:jumbo16-count/%ju}/{:jumbo16-cache/%ju}/"
+ "{:jumbo16-total/%ju}/{:jumbo16-limit/%ju} "
+ "{N:16k jumbo clusters in use (current\\/cache\\/total\\/max)}\n",
jumbo16_count, jumbo16_free, jumbo16_count + jumbo16_free,
jumbo16_limit);
#if 0
- printf("%ju mbuf tags in use\n", tag_count);
+ xo_emit("{:tag-count/%ju} {N:mbuf tags in use}\n", tag_count);
#endif
/*-
@@ -292,48 +307,74 @@ mbpr(void *kvmd, u_long mbaddr)
*/
bytes_total = bytes_inuse + bytes_incache;
- printf("%juK/%juK/%juK bytes allocated to network "
- "(current/cache/total)\n", bytes_inuse / 1024,
- bytes_incache / 1024, bytes_total / 1024);
+ xo_emit("{:bytes-in-use/%ju}{U:K}/{:bytes-in-cache/%ju}{U:K}/"
+ "{:bytes-total/%ju}{U:K} "
+ "{N:bytes allocated to network (current\\/cache\\/total)}\n",
+ bytes_inuse / 1024, bytes_incache / 1024, bytes_total / 1024);
- printf("%ju/%ju/%ju requests for mbufs denied (mbufs/clusters/"
- "mbuf+clusters)\n", mbuf_failures, cluster_failures,
- packet_failures);
- printf("%ju/%ju/%ju requests for mbufs delayed (mbufs/clusters/"
- "mbuf+clusters)\n", mbuf_sleeps, cluster_sleeps,
- packet_sleeps);
+ xo_emit("{:mbuf-failures/%ju}/{:cluster-failures/%ju}/"
+ "{:packet-failures/%ju} {N:requests for mbufs denied "
+ "(mbufs\\/clusters\\/mbuf+clusters)}\n",
+ mbuf_failures, cluster_failures, packet_failures);
+ xo_emit("{:mbuf-sleeps/%ju}/{:cluster-sleeps/%ju}/{:packet-sleeps/%ju} "
+ "{N:requests for mbufs delayed "
+ "(mbufs\\/clusters\\/mbuf+clusters)}\n",
+ mbuf_sleeps, cluster_sleeps, packet_sleeps);
- printf("%ju/%ju/%ju requests for jumbo clusters delayed "
- "(%juk/9k/16k)\n", jumbop_sleeps, jumbo9_sleeps,
- jumbo16_sleeps, jumbop_size / 1024);
- printf("%ju/%ju/%ju requests for jumbo clusters denied "
- "(%juk/9k/16k)\n", jumbop_failures, jumbo9_failures,
- jumbo16_failures, jumbop_size / 1024);
+ xo_emit("{:jumbop-sleeps/%ju}/{:jumbo9-sleeps/%ju}/"
+ "{:jumbo16-sleeps/%ju} {N:/requests for jumbo clusters delayed "
+ "(%juk\\/9k\\/16k)}\n",
+ jumbop_sleeps, jumbo9_sleeps, jumbo16_sleeps, jumbop_size / 1024);
+ xo_emit("{:jumbop-failures/%ju}/{:jumbo9-failures/%ju}/"
+ "{:jumbo16-failures/%ju} {N:/requests for jumbo clusters denied "
+ "(%juk\\/9k\\/16k)}\n",
+ jumbop_failures, jumbo9_failures, jumbo16_failures,
+ jumbop_size / 1024);
- if (live) {
- mlen = sizeof(nsfbufs);
- if (!sysctlbyname("kern.ipc.nsfbufs", &nsfbufs, &mlen, NULL,
- 0) &&
- !sysctlbyname("kern.ipc.nsfbufsused", &nsfbufsused,
- &mlen, NULL, 0) &&
- !sysctlbyname("kern.ipc.nsfbufspeak", &nsfbufspeak,
- &mlen, NULL, 0))
- printf("%d/%d/%d sfbufs in use (current/peak/max)\n",
- nsfbufsused, nsfbufspeak, nsfbufs);
- mlen = sizeof(mbstat);
- if (sysctlbyname("kern.ipc.mbstat", &mbstat, &mlen, NULL, 0)) {
- warn("kern.ipc.mbstat");
- goto out;
- }
- } else {
- if (kread(mbaddr, (char *)&mbstat, sizeof mbstat) != 0)
- goto out;
- }
- printf("%lu requests for sfbufs denied\n", mbstat.sf_allocfail);
- printf("%lu requests for sfbufs delayed\n", mbstat.sf_allocwait);
- printf("%lu requests for I/O initiated by sendfile\n",
- mbstat.sf_iocnt);
- printf("%lu calls to protocol drain routines\n", mbstat.m_drain);
+ mlen = sizeof(nsfbufs);
+ if (live &&
+ sysctlbyname("kern.ipc.nsfbufs", &nsfbufs, &mlen, NULL, 0) == 0 &&
+ sysctlbyname("kern.ipc.nsfbufsused", &nsfbufsused, &mlen,
+ NULL, 0) == 0 &&
+ sysctlbyname("kern.ipc.nsfbufspeak", &nsfbufspeak, &mlen,
+ NULL, 0) == 0)
+ xo_emit("{:nsfbufs-current/%d}/{:nsfbufs-peak/%d}/"
+ "{:nsfbufs/%d} "
+ "{N:sfbufs in use (current\\/peak\\/max)}\n",
+ nsfbufsused, nsfbufspeak, nsfbufs);
+
+ if (fetch_stats("kern.ipc.sfstat", mbaddr, &sfstat, sizeof(sfstat),
+ kread_counters) != 0)
+ goto out;
+
+ xo_emit("{:sendfile-syscalls/%ju} {N:sendfile syscalls}\n",
+ (uintmax_t)sfstat.sf_syscalls);
+ xo_emit("{:sendfile-no-io/%ju} "
+ "{N:sendfile syscalls completed without I\\/O request}\n",
+ (uintmax_t)sfstat.sf_noiocnt);
+ xo_emit("{:sendfile-io-count/%ju} "
+ "{N:requests for I\\/O initiated by sendfile}\n",
+ (uintmax_t)sfstat.sf_iocnt);
+ xo_emit("{:sendfile-pages-sent/%ju} "
+ "{N:pages read by sendfile as part of a request}\n",
+ (uintmax_t)sfstat.sf_pages_read);
+ xo_emit("{:sendfile-pages-valid/%ju} "
+ "{N:pages were valid at time of a sendfile request}\n",
+ (uintmax_t)sfstat.sf_pages_valid);
+ xo_emit("{:sendfile-requested-readahead/%ju} "
+ "{N:pages were requested for read ahead by applications}\n",
+ (uintmax_t)sfstat.sf_rhpages_requested);
+ xo_emit("{:sendfile-readahead/%ju} "
+ "{N:pages were read ahead by sendfile}\n",
+ (uintmax_t)sfstat.sf_rhpages_read);
+ xo_emit("{:sendfile-busy-encounters/%ju} "
+ "{N:times sendfile encountered an already busy page}\n",
+ (uintmax_t)sfstat.sf_busy);
+ xo_emit("{:sfbufs-alloc-failed/%ju} {N:requests for sfbufs denied}\n",
+ (uintmax_t)sfstat.sf_allocfail);
+ xo_emit("{:sfbufs-alloc-wait/%ju} {N:requests for sfbufs delayed}\n",
+ (uintmax_t)sfstat.sf_allocwait);
out:
+ xo_close_container("mbuf-statistics");
memstat_mtl_free(mtlp);
}
diff --git a/freebsd/usr.bin/netstat/mroute.c b/freebsd/usr.bin/netstat/mroute.c
index 7a860cd3..4d9d5b1d 100644
--- a/freebsd/usr.bin/netstat/mroute.c
+++ b/freebsd/usr.bin/netstat/mroute.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 1989 Stephen Deering
* Copyright (c) 1992, 1993
@@ -39,6 +43,9 @@
* @(#)mroute.c 8.2 (Berkeley) 4/28/95
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -70,8 +77,14 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <libxo/xo.h>
#include "netstat.h"
-
+#include "nl_defs.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-mroute-data.h"
+#endif /* __rtems__ */
static void print_bw_meter(struct bw_meter *, int *);
static void print_mfc(struct mfc *, int, int *);
@@ -79,105 +92,128 @@ static void print_mfc(struct mfc *, int, int *);
static void
print_bw_meter(struct bw_meter *bw_meter, int *banner_printed)
{
- char s0[256], s1[256], s2[256], s3[256];
+ char s1[256], s2[256], s3[256];
struct timeval now, end, delta;
gettimeofday(&now, NULL);
if (! *banner_printed) {
- printf(" Bandwidth Meters\n");
- printf(" %-30s", "Measured(Start|Packets|Bytes)");
- printf(" %s", "Type");
- printf(" %-30s", "Thresh(Interval|Packets|Bytes)");
- printf(" Remain");
- printf("\n");
+ xo_open_list("bandwidth-meter");
+ xo_emit(" {T:Bandwidth Meters}\n");
+ xo_emit(" {T:/%-30s}", "Measured(Start|Packets|Bytes)");
+ xo_emit(" {T:/%s}", "Type");
+ xo_emit(" {T:/%-30s}", "Thresh(Interval|Packets|Bytes)");
+ xo_emit(" {T:Remain}");
+ xo_emit("\n");
*banner_printed = 1;
}
+ xo_open_instance("bandwidth-meter");
+
/* The measured values */
- if (bw_meter->bm_flags & BW_METER_UNIT_PACKETS)
+ if (bw_meter->bm_flags & BW_METER_UNIT_PACKETS) {
sprintf(s1, "%ju", (uintmax_t)bw_meter->bm_measured.b_packets);
- else
+ xo_emit("{e:measured-packets/%ju}",
+ (uintmax_t)bw_meter->bm_measured.b_packets);
+ } else
sprintf(s1, "?");
- if (bw_meter->bm_flags & BW_METER_UNIT_BYTES)
+ if (bw_meter->bm_flags & BW_METER_UNIT_BYTES) {
sprintf(s2, "%ju", (uintmax_t)bw_meter->bm_measured.b_bytes);
- else
+ xo_emit("{e:measured-bytes/%ju}",
+ (uintmax_t)bw_meter->bm_measured.b_bytes);
+ } else
sprintf(s2, "?");
- sprintf(s0, "%lu.%lu|%s|%s",
- (u_long)bw_meter->bm_start_time.tv_sec,
- (u_long)bw_meter->bm_start_time.tv_usec,
- s1, s2);
- printf(" %-30s", s0);
+ xo_emit(" {[:-30}{:start-time/%lu.%06lu}|{q:measured-packets/%s}"
+ "|{q:measured-bytes%s}{]:}",
+ (u_long)bw_meter->bm_start_time.tv_sec,
+ (u_long)bw_meter->bm_start_time.tv_usec, s1, s2);
/* The type of entry */
- sprintf(s0, "%s", "?");
- if (bw_meter->bm_flags & BW_METER_GEQ)
- sprintf(s0, "%s", ">=");
- else if (bw_meter->bm_flags & BW_METER_LEQ)
- sprintf(s0, "%s", "<=");
- printf(" %-3s", s0);
+ xo_emit(" {t:type/%-3s}", (bw_meter->bm_flags & BW_METER_GEQ) ? ">=" :
+ (bw_meter->bm_flags & BW_METER_LEQ) ? "<=" : "?");
/* The threshold values */
- if (bw_meter->bm_flags & BW_METER_UNIT_PACKETS)
+ if (bw_meter->bm_flags & BW_METER_UNIT_PACKETS) {
sprintf(s1, "%ju", (uintmax_t)bw_meter->bm_threshold.b_packets);
- else
+ xo_emit("{e:threshold-packets/%ju}",
+ (uintmax_t)bw_meter->bm_threshold.b_packets);
+ } else
sprintf(s1, "?");
- if (bw_meter->bm_flags & BW_METER_UNIT_BYTES)
+ if (bw_meter->bm_flags & BW_METER_UNIT_BYTES) {
sprintf(s2, "%ju", (uintmax_t)bw_meter->bm_threshold.b_bytes);
- else
+ xo_emit("{e:threshold-bytes/%ju}",
+ (uintmax_t)bw_meter->bm_threshold.b_bytes);
+ } else
sprintf(s2, "?");
- sprintf(s0, "%lu.%lu|%s|%s",
- (u_long)bw_meter->bm_threshold.b_time.tv_sec,
- (u_long)bw_meter->bm_threshold.b_time.tv_usec,
- s1, s2);
- printf(" %-30s", s0);
+
+ xo_emit(" {[:-30}{:threshold-time/%lu.%06lu}|{q:threshold-packets/%s}"
+ "|{q:threshold-bytes%s}{]:}",
+ (u_long)bw_meter->bm_threshold.b_time.tv_sec,
+ (u_long)bw_meter->bm_threshold.b_time.tv_usec, s1, s2);
/* Remaining time */
timeradd(&bw_meter->bm_start_time,
&bw_meter->bm_threshold.b_time, &end);
if (timercmp(&now, &end, <=)) {
timersub(&end, &now, &delta);
- sprintf(s3, "%lu.%lu",
+ sprintf(s3, "%lu.%06lu",
(u_long)delta.tv_sec,
(u_long)delta.tv_usec);
} else {
/* Negative time */
timersub(&now, &end, &delta);
- sprintf(s3, "-%lu.%lu",
+ sprintf(s3, "-%lu.06%lu",
(u_long)delta.tv_sec,
(u_long)delta.tv_usec);
}
- printf(" %s", s3);
+ xo_emit(" {:remaining-time/%s}", s3);
+
+ xo_open_instance("bandwidth-meter");
- printf("\n");
+ xo_emit("\n");
}
static void
print_mfc(struct mfc *m, int maxvif, int *banner_printed)
{
+ struct sockaddr_in sin;
+ struct sockaddr *sa = (struct sockaddr *)&sin;
struct bw_meter bw_meter, *bwm;
int bw_banner_printed;
int error;
vifi_t vifi;
bw_banner_printed = 0;
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
if (! *banner_printed) {
- printf("\nIPv4 Multicast Forwarding Table\n"
- " Origin Group "
- " Packets In-Vif Out-Vifs:Ttls\n");
+ xo_open_list("multicast-forwarding-entry");
+ xo_emit("\n{T:IPv4 Multicast Forwarding Table}\n"
+ " {T:Origin} {T:Group} "
+ " {T:Packets In-Vif} {T:Out-Vifs:Ttls}\n");
*banner_printed = 1;
}
- printf(" %-15.15s", routename(m->mfc_origin.s_addr));
- printf(" %-15.15s", routename(m->mfc_mcastgrp.s_addr));
- printf(" %9lu", m->mfc_pkt_cnt);
- printf(" %3d ", m->mfc_parent);
+ memcpy(&sin.sin_addr, &m->mfc_origin, sizeof(sin.sin_addr));
+ xo_emit(" {:origin-address/%-15.15s}", routename(sa, numeric_addr));
+ memcpy(&sin.sin_addr, &m->mfc_mcastgrp, sizeof(sin.sin_addr));
+ xo_emit(" {:group-address/%-15.15s}",
+ routename(sa, numeric_addr));
+ xo_emit(" {:sent-packets/%9lu}", m->mfc_pkt_cnt);
+ xo_emit(" {:parent/%3d} ", m->mfc_parent);
+ xo_open_list("vif-ttl");
for (vifi = 0; vifi <= maxvif; vifi++) {
- if (m->mfc_ttls[vifi] > 0)
- printf(" %u:%u", vifi, m->mfc_ttls[vifi]);
+ if (m->mfc_ttls[vifi] > 0) {
+ xo_open_instance("vif-ttl");
+ xo_emit(" {k:vif/%u}:{:ttl/%u}", vifi,
+ m->mfc_ttls[vifi]);
+ xo_close_instance("vif-ttl");
+ }
}
- printf("\n");
+ xo_close_list("vif-ttl");
+ xo_emit("\n");
/*
* XXX We break the rules and try to use KVM to read the
@@ -192,14 +228,19 @@ print_mfc(struct mfc *m, int maxvif, int *banner_printed)
print_bw_meter(&bw_meter, &bw_banner_printed);
bwm = bw_meter.bm_mfc_next;
}
+ if (banner_printed)
+ xo_close_list("bandwidth-meter");
}
void
-mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
+mroutepr()
{
+ struct sockaddr_in sin;
+ struct sockaddr *sa = (struct sockaddr *)&sin;
struct vif viftable[MAXVIFS];
struct vif *v;
struct mfc *m;
+ u_long pmfchashtbl, pmfctablesize, pviftbl;
int banner_printed;
int saved_numeric_addr;
size_t len;
@@ -208,6 +249,10 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
saved_numeric_addr = numeric_addr;
numeric_addr = 1;
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+
/*
* TODO:
* The VIF table will move to hanging off the struct if_info for
@@ -222,23 +267,27 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
* functionality was deprecated, as PIM does not use it.
*/
maxvif = 0;
+ pmfchashtbl = pmfctablesize = pviftbl = 0;
len = sizeof(viftable);
if (live) {
if (sysctlbyname("net.inet.ip.viftable", viftable, &len, NULL,
0) < 0) {
- warn("sysctl: net.inet.ip.viftable");
+ xo_warn("sysctl: net.inet.ip.viftable");
return;
}
- } else
-#ifndef __rtems__
+ } else {
+ pmfchashtbl = nl[N_MFCHASHTBL].n_value;
+ pmfctablesize = nl[N_MFCTABLESIZE].n_value;
+ pviftbl = nl[N_VIFTABLE].n_value;
+
+ if (pmfchashtbl == 0 || pmfctablesize == 0 || pviftbl == 0) {
+ xo_warnx("No IPv4 MROUTING kernel support.");
+ return;
+ }
+
kread(pviftbl, (char *)viftable, sizeof(viftable));
-#else /* __rtems__ */
- {
- warnx("mroutepr: not implemented");
- return;
}
-#endif /* __rtems__ */
banner_printed = 0;
for (vifi = 0, v = viftable; vifi < MAXVIFS; ++vifi, ++v) {
@@ -247,23 +296,31 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
maxvif = vifi;
if (!banner_printed) {
- printf("\nIPv4 Virtual Interface Table\n"
- " Vif Thresh Local-Address "
- "Remote-Address Pkts-In Pkts-Out\n");
+ xo_emit("\n{T:IPv4 Virtual Interface Table\n"
+ " Vif Thresh Local-Address "
+ "Remote-Address Pkts-In Pkts-Out}\n");
banner_printed = 1;
+ xo_open_list("vif");
}
- printf(" %2u %6u %-15.15s",
+ xo_open_instance("vif");
+ memcpy(&sin.sin_addr, &v->v_lcl_addr, sizeof(sin.sin_addr));
+ xo_emit(" {:vif/%2u} {:threshold/%6u} {:route/%-15.15s}",
/* opposite math of add_vif() */
vifi, v->v_threshold,
- routename(v->v_lcl_addr.s_addr));
- printf(" %-15.15s", (v->v_flags & VIFF_TUNNEL) ?
- routename(v->v_rmt_addr.s_addr) : "");
-
- printf(" %9lu %9lu\n", v->v_pkt_in, v->v_pkt_out);
+ routename(sa, numeric_addr));
+ memcpy(&sin.sin_addr, &v->v_rmt_addr, sizeof(sin.sin_addr));
+ xo_emit(" {:source/%-15.15s}", (v->v_flags & VIFF_TUNNEL) ?
+ routename(sa, numeric_addr) : "");
+
+ xo_emit(" {:received-packets/%9lu} {:sent-packets/%9lu}\n",
+ v->v_pkt_in, v->v_pkt_out);
+ xo_close_instance("vif");
}
- if (!banner_printed)
- printf("\nIPv4 Virtual Interface Table is empty\n");
+ if (banner_printed)
+ xo_close_list("vif");
+ else
+ xo_emit("\n{T:IPv4 Virtual Interface Table is empty}\n");
banner_printed = 0;
@@ -283,19 +340,19 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
len = 0;
if (sysctlbyname("net.inet.ip.mfctable", NULL, &len, NULL,
0) < 0) {
- warn("sysctl: net.inet.ip.mfctable");
+ xo_warn("sysctl: net.inet.ip.mfctable");
return;
}
mfctable = malloc(len);
if (mfctable == NULL) {
- warnx("malloc %lu bytes", (u_long)len);
+ xo_warnx("malloc %lu bytes", (u_long)len);
return;
}
if (sysctlbyname("net.inet.ip.mfctable", mfctable, &len, NULL,
0) < 0) {
free(mfctable);
- warn("sysctl: net.inet.ip.mfctable");
+ xo_warn("sysctl: net.inet.ip.mfctable");
return;
}
@@ -304,8 +361,10 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
print_mfc(m++, maxvif, &banner_printed);
len -= sizeof(*m);
}
+ if (banner_printed)
+ xo_close_list("multicast-forwarding-entry");
if (len != 0)
- warnx("print_mfc: %lu trailing bytes", (u_long)len);
+ xo_warnx("print_mfc: %lu trailing bytes", (u_long)len);
free(mfctable);
} else {
@@ -318,14 +377,14 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
error = kread(pmfctablesize, (char *)&mfctablesize,
sizeof(u_long));
if (error) {
- warn("kread: mfctablesize");
+ xo_warn("kread: mfctablesize");
return;
}
len = sizeof(*mfchashtbl) * mfctablesize;
mfchashtbl = malloc(len);
if (mfchashtbl == NULL) {
- warnx("malloc %lu bytes", (u_long)len);
+ xo_warnx("malloc %lu bytes", (u_long)len);
return;
}
kread(pmfchashtbl, (char *)&mfchashtbl, len);
@@ -336,6 +395,8 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
print_mfc(m, maxvif, &banner_printed);
}
}
+ if (banner_printed)
+ xo_close_list("multicast-forwarding-entry");
free(mfchashtbl);
#else /* __rtems__ */
@@ -345,55 +406,65 @@ mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl)
}
if (!banner_printed)
- printf("\nIPv4 Multicast Forwarding Table is empty\n");
+ xo_emit("\n{T:IPv4 Multicast Forwarding Table is empty}\n");
- printf("\n");
+ xo_emit("\n");
numeric_addr = saved_numeric_addr;
}
void
-mrt_stats(u_long mstaddr)
+mrt_stats()
{
struct mrtstat mrtstat;
- size_t len = sizeof mrtstat;
+ u_long mstaddr;
- if (live) {
- if (sysctlbyname("net.inet.ip.mrtstat", &mrtstat, &len, NULL,
- 0) < 0) {
- warn("sysctl: net.inet.ip.mrtstat");
- return;
- }
- } else
-#ifndef __rtems__
- kread(mstaddr, (char *)&mrtstat, sizeof(mrtstat));
-#else /* __rtems__ */
- {
- warnx("mrt_stats: not implemented");
+ mstaddr = nl[N_MRTSTAT].n_value;
+
+ if (mstaddr == 0) {
+ fprintf(stderr, "No IPv4 MROUTING kernel support.\n");
return;
}
-#endif /* __rtems__ */
- printf("IPv4 multicast forwarding:\n");
+ if (fetch_stats("net.inet.ip.mrtstat", mstaddr, &mrtstat,
+ sizeof(mrtstat), kread_counters) != 0)
+ return;
+
+ xo_emit("{T:IPv4 multicast forwarding}:\n");
#define p(f, m) if (mrtstat.f || sflag <= 1) \
- printf(m, mrtstat.f, plural(mrtstat.f))
+ xo_emit(m, (uintmax_t)mrtstat.f, plural(mrtstat.f))
#define p2(f, m) if (mrtstat.f || sflag <= 1) \
- printf(m, mrtstat.f, plurales(mrtstat.f))
-
- p(mrts_mfc_lookups, "\t%lu multicast forwarding cache lookup%s\n");
- p2(mrts_mfc_misses, "\t%lu multicast forwarding cache miss%s\n");
- p(mrts_upcalls, "\t%lu upcall%s to multicast routing daemon\n");
- p(mrts_upq_ovflw, "\t%lu upcall queue overflow%s\n");
+ xo_emit(m, (uintmax_t)mrtstat.f, plurales(mrtstat.f))
+
+ xo_open_container("multicast-statistics");
+
+ p(mrts_mfc_lookups, "\t{:cache-lookups/%ju} "
+ "{N:/multicast forwarding cache lookup%s}\n");
+ p2(mrts_mfc_misses, "\t{:cache-misses/%ju} "
+ "{N:/multicast forwarding cache miss%s}\n");
+ p(mrts_upcalls, "\t{:upcalls-total/%ju} "
+ "{N:/upcall%s to multicast routing daemon}\n");
+ p(mrts_upq_ovflw, "\t{:upcall-overflows/%ju} "
+ "{N:/upcall queue overflow%s}\n");
p(mrts_upq_sockfull,
- "\t%lu upcall%s dropped due to full socket buffer\n");
- p(mrts_cache_cleanups, "\t%lu cache cleanup%s\n");
- p(mrts_no_route, "\t%lu datagram%s with no route for origin\n");
- p(mrts_bad_tunnel, "\t%lu datagram%s arrived with bad tunneling\n");
- p(mrts_cant_tunnel, "\t%lu datagram%s could not be tunneled\n");
- p(mrts_wrong_if, "\t%lu datagram%s arrived on wrong interface\n");
- p(mrts_drop_sel, "\t%lu datagram%s selectively dropped\n");
- p(mrts_q_overflow, "\t%lu datagram%s dropped due to queue overflow\n");
- p(mrts_pkt2large, "\t%lu datagram%s dropped for being too large\n");
+ "\t{:upcalls-dropped-full-buffer/%ju} "
+ "{N:/upcall%s dropped due to full socket buffer}\n");
+ p(mrts_cache_cleanups, "\t{:cache-cleanups/%ju} "
+ "{N:/cache cleanup%s}\n");
+ p(mrts_no_route, "\t{:dropped-no-origin/%ju} "
+ "{N:/datagram%s with no route for origin}\n");
+ p(mrts_bad_tunnel, "\t{:dropped-bad-tunnel/%ju} "
+ "{N:/datagram%s arrived with bad tunneling}\n");
+ p(mrts_cant_tunnel, "\t{:dropped-could-not-tunnel/%ju} "
+ "{N:/datagram%s could not be tunneled}\n");
+ p(mrts_wrong_if, "\t{:dropped-wrong-incoming-interface/%ju} "
+ "{N:/datagram%s arrived on wrong interface}\n");
+ p(mrts_drop_sel, "\t{:dropped-selectively/%ju} "
+ "{N:/datagram%s selectively dropped}\n");
+ p(mrts_q_overflow, "\t{:dropped-queue-overflow/%ju} "
+ "{N:/datagram%s dropped due to queue overflow}\n");
+ p(mrts_pkt2large, "\t{:dropped-too-large/%ju} "
+ "{N:/datagram%s dropped for being too large}\n");
#undef p2
#undef p
diff --git a/freebsd/usr.bin/netstat/mroute6.c b/freebsd/usr.bin/netstat/mroute6.c
index 3a5b25b0..8a5ca63d 100644
--- a/freebsd/usr.bin/netstat/mroute6.c
+++ b/freebsd/usr.bin/netstat/mroute6.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (C) 1998 WIDE Project.
* All rights reserved.
@@ -67,6 +71,9 @@
* @(#)mroute.c 8.2 (Berkeley) 4/28/95
*/
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -81,7 +88,6 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
@@ -90,24 +96,29 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
+#include <libxo/xo.h>
#define KERNEL 1
#include <netinet6/ip6_mroute.h>
#undef KERNEL
#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-mroute6-data.h"
+#endif /* __rtems__ */
#define WID_ORG (Wflag ? 39 : (numeric_addr ? 29 : 18)) /* width of origin column */
#define WID_GRP (Wflag ? 18 : (numeric_addr ? 16 : 18)) /* width of group column */
void
-mroute6pr(u_long mfcaddr, u_long mifaddr)
+mroute6pr()
{
struct mf6c *mf6ctable[MF6CTBLSIZ], *mfcp;
- struct mif6 mif6table[MAXMIFS];
+ struct mif6_sctl mif6table[MAXMIFS];
struct mf6c mfc;
struct rtdetq rte, *rtep;
- struct mif6 *mifp;
+ struct mif6_sctl *mifp;
mifi_t mifi;
int i;
int banner_printed;
@@ -116,72 +127,56 @@ mroute6pr(u_long mfcaddr, u_long mifaddr)
long int waitings;
size_t len;
+ if (live == 0)
+ return;
+
len = sizeof(mif6table);
- if (live) {
- if (sysctlbyname("net.inet6.ip6.mif6table", mif6table, &len,
- NULL, 0) < 0) {
- warn("sysctl: net.inet6.ip6.mif6table");
- return;
- }
- } else
-#ifndef __rtems__
- kread(mifaddr, (char *)mif6table, sizeof(mif6table));
-#else /* __rtems__ */
- {
- warnx("mroute6pr: not implemented");
+ if (sysctlbyname("net.inet6.ip6.mif6table", mif6table, &len, NULL, 0) <
+ 0) {
+ xo_warn("sysctl: net.inet6.ip6.mif6table");
return;
}
-#endif /* __rtems__ */
saved_numeric_addr = numeric_addr;
numeric_addr = 1;
banner_printed = 0;
for (mifi = 0, mifp = mif6table; mifi < MAXMIFS; ++mifi, ++mifp) {
- struct ifnet ifnet;
char ifname[IFNAMSIZ];
- if (mifp->m6_ifp == NULL)
+ if (mifp->m6_ifp == 0)
continue;
- /* XXX KVM */
- kread((u_long)mifp->m6_ifp, (char *)&ifnet, sizeof(ifnet));
-
maxmif = mifi;
if (!banner_printed) {
- printf("\nIPv6 Multicast Interface Table\n"
- " Mif Rate PhyIF "
- "Pkts-In Pkts-Out\n");
+ xo_open_list("multicast-interface");
+ xo_emit("\n{T:IPv6 Multicast Interface Table}\n"
+ "{T: Mif Rate PhyIF Pkts-In Pkts-Out}\n");
banner_printed = 1;
}
- printf(" %2u %4d",
- mifi, mifp->m6_rate_limit);
- printf(" %5s", (mifp->m6_flags & MIFF_REGISTER) ?
- "reg0" : if_indextoname(ifnet.if_index, ifname));
+ xo_open_instance("multicast-interface");
+ xo_emit(" {:mif/%2u} {:rate-limit/%4d}",
+ mifi, mifp->m6_rate_limit);
+ xo_emit(" {:ifname/%5s}", (mifp->m6_flags & MIFF_REGISTER) ?
+ "reg0" : if_indextoname(mifp->m6_ifp, ifname));
- printf(" %9ju %9ju\n", (uintmax_t)mifp->m6_pkt_in,
+ xo_emit(" {:received-packets/%9ju} {:sent-packets/%9ju}\n",
+ (uintmax_t)mifp->m6_pkt_in,
(uintmax_t)mifp->m6_pkt_out);
+ xo_close_instance("multicast-interface");
}
- if (!banner_printed)
- printf("\nIPv6 Multicast Interface Table is empty\n");
+ if (banner_printed)
+ xo_open_list("multicast-interface");
+ else
+ xo_emit("\n{T:IPv6 Multicast Interface Table is empty}\n");
len = sizeof(mf6ctable);
- if (live) {
- if (sysctlbyname("net.inet6.ip6.mf6ctable", mf6ctable, &len,
- NULL, 0) < 0) {
- warn("sysctl: net.inet6.ip6.mf6ctable");
- return;
- }
- } else
-#ifndef __rtems__
- kread(mfcaddr, (char *)mf6ctable, sizeof(mf6ctable));
-#else /* __rtems__ */
- {
- warnx("mroute6pr: not implemented");
+ if (sysctlbyname("net.inet6.ip6.mf6ctable", mf6ctable, &len, NULL, 0) <
+ 0) {
+ xo_warn("sysctl: net.inet6.ip6.mf6ctable");
return;
}
-#endif /* __rtems__ */
banner_printed = 0;
@@ -190,19 +185,26 @@ mroute6pr(u_long mfcaddr, u_long mifaddr)
while(mfcp) {
kread((u_long)mfcp, (char *)&mfc, sizeof(mfc));
if (!banner_printed) {
- printf ("\nIPv6 Multicast Forwarding Cache\n");
- printf(" %-*.*s %-*.*s %s",
- WID_ORG, WID_ORG, "Origin",
- WID_GRP, WID_GRP, "Group",
- " Packets Waits In-Mif Out-Mifs\n");
+ xo_open_list("multicast-forwarding-cache");
+ xo_emit("\n"
+ "{T:IPv6 Multicast Forwarding Cache}\n");
+ xo_emit(" {T:%-*.*s} {T:%-*.*s} {T:%s}",
+ WID_ORG, WID_ORG, "Origin",
+ WID_GRP, WID_GRP, "Group",
+ " Packets Waits In-Mif Out-Mifs\n");
banner_printed = 1;
}
- printf(" %-*.*s", WID_ORG, WID_ORG,
- routename6(&mfc.mf6c_origin));
- printf(" %-*.*s", WID_GRP, WID_GRP,
- routename6(&mfc.mf6c_mcastgrp));
- printf(" %9ju", (uintmax_t)mfc.mf6c_pkt_cnt);
+ xo_open_instance("multicast-forwarding-cache");
+
+ xo_emit(" {:origin/%-*.*s}", WID_ORG, WID_ORG,
+ routename(sin6tosa(&mfc.mf6c_origin),
+ numeric_addr));
+ xo_emit(" {:group/%-*.*s}", WID_GRP, WID_GRP,
+ routename(sin6tosa(&mfc.mf6c_mcastgrp),
+ numeric_addr));
+ xo_emit(" {:total-packets/%9ju}",
+ (uintmax_t)mfc.mf6c_pkt_cnt);
for (waitings = 0, rtep = mfc.mf6c_stall; rtep; ) {
waitings++;
@@ -210,74 +212,79 @@ mroute6pr(u_long mfcaddr, u_long mifaddr)
kread((u_long)rtep, (char *)&rte, sizeof(rte));
rtep = rte.next;
}
- printf(" %3ld", waitings);
+ xo_emit(" {:waitings/%3ld}", waitings);
if (mfc.mf6c_parent == MF6C_INCOMPLETE_PARENT)
- printf(" --- ");
+ xo_emit(" --- ");
else
- printf(" %3d ", mfc.mf6c_parent);
+ xo_emit(" {:parent/%3d} ", mfc.mf6c_parent);
+ xo_open_list("mif");
for (mifi = 0; mifi <= maxmif; mifi++) {
if (IF_ISSET(mifi, &mfc.mf6c_ifset))
- printf(" %u", mifi);
+ xo_emit(" {l:%u}", mifi);
}
- printf("\n");
+ xo_close_list("mif");
+ xo_emit("\n");
mfcp = mfc.mf6c_next;
+ xo_close_instance("multicast-forwarding-cache");
}
}
- if (!banner_printed)
- printf("\nIPv6 Multicast Forwarding Table is empty\n");
+ if (banner_printed)
+ xo_close_list("multicast-forwarding-cache");
+ else
+ xo_emit("\n{T:IPv6 Multicast Forwarding Table is empty}\n");
- printf("\n");
+ xo_emit("\n");
numeric_addr = saved_numeric_addr;
}
void
-mrt6_stats(u_long mstaddr)
+mrt6_stats()
{
struct mrt6stat mrtstat;
- size_t len = sizeof mrtstat;
- if (live) {
- if (sysctlbyname("net.inet6.ip6.mrt6stat", &mrtstat, &len,
- NULL, 0) < 0) {
- warn("sysctl: net.inet6.ip6.mrt6stat");
- return;
- }
- } else
-#ifndef __rtems__
- kread(mstaddr, (char *)&mrtstat, sizeof(mrtstat));
-#else /* __rtems__ */
- {
- warnx("mrt6_stats: not implemented");
+ if (fetch_stats("net.inet6.ip6.mrt6stat", 0, &mrtstat,
+ sizeof(mrtstat), kread_counters) != 0)
return;
- }
-#endif /* __rtems__ */
- printf("IPv6 multicast forwarding:\n");
+ xo_open_container("multicast-statistics");
+ xo_emit("{T:IPv6 multicast forwarding}:\n");
#define p(f, m) if (mrtstat.f || sflag <= 1) \
- printf(m, (uintmax_t)mrtstat.f, plural(mrtstat.f))
+ xo_emit(m, (uintmax_t)mrtstat.f, plural(mrtstat.f))
#define p2(f, m) if (mrtstat.f || sflag <= 1) \
- printf(m, (uintmax_t)mrtstat.f, plurales(mrtstat.f))
-
- p(mrt6s_mfc_lookups, "\t%ju multicast forwarding cache lookup%s\n");
- p2(mrt6s_mfc_misses, "\t%ju multicast forwarding cache miss%s\n");
- p(mrt6s_upcalls, "\t%ju upcall%s to multicast routing daemon\n");
- p(mrt6s_upq_ovflw, "\t%ju upcall queue overflow%s\n");
- p(mrt6s_upq_sockfull,
- "\t%ju upcall%s dropped due to full socket buffer\n");
- p(mrt6s_cache_cleanups, "\t%ju cache cleanup%s\n");
- p(mrt6s_no_route, "\t%ju datagram%s with no route for origin\n");
- p(mrt6s_bad_tunnel, "\t%ju datagram%s arrived with bad tunneling\n");
- p(mrt6s_cant_tunnel, "\t%ju datagram%s could not be tunneled\n");
- p(mrt6s_wrong_if, "\t%ju datagram%s arrived on wrong interface\n");
- p(mrt6s_drop_sel, "\t%ju datagram%s selectively dropped\n");
- p(mrt6s_q_overflow,
- "\t%ju datagram%s dropped due to queue overflow\n");
- p(mrt6s_pkt2large, "\t%ju datagram%s dropped for being too large\n");
+ xo_emit(m, (uintmax_t)mrtstat.f, plurales(mrtstat.f))
+
+ p(mrt6s_mfc_lookups, "\t{:cache-lookups/%ju} "
+ "{N:/multicast forwarding cache lookup%s}\n");
+ p2(mrt6s_mfc_misses, "\t{:cache-misses/%ju} "
+ "{N:/multicast forwarding cache miss%s}\n");
+ p(mrt6s_upcalls, "\t{:upcalls/%ju} "
+ "{N:/upcall%s to multicast routing daemon}\n");
+ p(mrt6s_upq_ovflw, "\t{:upcall-overflows/%ju} "
+ "{N:/upcall queue overflow%s}\n");
+ p(mrt6s_upq_sockfull, "\t{:upcalls-dropped-full-buffer/%ju} "
+ "{N:/upcall%s dropped due to full socket buffer}\n");
+ p(mrt6s_cache_cleanups, "\t{:cache-cleanups/%ju} "
+ "{N:/cache cleanup%s}\n");
+ p(mrt6s_no_route, "\t{:dropped-no-origin/%ju} "
+ "{N:/datagram%s with no route for origin}\n");
+ p(mrt6s_bad_tunnel, "\t{:dropped-bad-tunnel/%ju} "
+ "{N:/datagram%s arrived with bad tunneling}\n");
+ p(mrt6s_cant_tunnel, "\t{:dropped-could-not-tunnel/%ju} "
+ "{N:/datagram%s could not be tunneled}\n");
+ p(mrt6s_wrong_if, "\t{:dropped-wrong-incoming-interface/%ju} "
+ "{N:/datagram%s arrived on wrong interface}\n");
+ p(mrt6s_drop_sel, "\t{:dropped-selectively/%ju} "
+ "{N:/datagram%s selectively dropped}\n");
+ p(mrt6s_q_overflow, "\t{:dropped-queue-overflow/%ju} "
+ "{N:/datagram%s dropped due to queue overflow}\n");
+ p(mrt6s_pkt2large, "\t{:dropped-too-large/%ju} "
+ "{N:/datagram%s dropped for being too large}\n");
#undef p2
#undef p
+ xo_close_container("multicast-statistics");
}
#endif /*INET6*/
diff --git a/freebsd/usr.bin/netstat/netstat.h b/freebsd/usr.bin/netstat/netstat.h
index b25b40c6..042a1c78 100644
--- a/freebsd/usr.bin/netstat/netstat.h
+++ b/freebsd/usr.bin/netstat/netstat.h
@@ -32,12 +32,10 @@
#include <sys/cdefs.h>
-#ifdef __rtems__
-#define rt_tables netstat_rt_tables
-#define routename rtems_shell_netstats_routername
-#define netname rtems_shell_netstats_netname
-#define sotoxsocket rtems_shell_netstats_sotoxsocket
-#endif /* __rtems__ */
+#define satosin(sa) ((struct sockaddr_in *)(sa))
+#define satosin6(sa) ((struct sockaddr_in6 *)(sa))
+#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
+
extern int Aflag; /* show addresses of protocol control block */
extern int aflag; /* show all sockets (including servers) */
extern int bflag; /* show i/f total bytes in/out */
@@ -51,6 +49,7 @@ extern int noutputs; /* how much outputs before we exit */
extern int numeric_addr; /* show addresses numerically */
extern int numeric_port; /* show ports numerically */
extern int rflag; /* show routing tables (or routing stats) */
+extern int Rflag; /* show flowid / RSS information */
extern int sflag; /* show protocol statistics */
extern int Tflag; /* show TCP control block info */
extern int Wflag; /* wide display */
@@ -62,15 +61,16 @@ extern int interval; /* repeat interval for i/f stats */
extern char *interface; /* desired i/f for stats, or NULL for all i/fs */
extern int unit; /* unit number for above */
-extern int af; /* address family */
extern int live; /* true if we are examining a live system */
-#ifdef __rtems__
-extern int protopr_initialized;
-extern int do_rtent;
-extern struct radix_node_head **rt_tables;
-#endif /* __rtems__ */
+
+typedef int kreadfn_t(u_long, void *, size_t);
+int fetch_stats(const char *, u_long, void *, size_t, kreadfn_t);
+int fetch_stats_ro(const char *, u_long, void *, size_t, kreadfn_t);
int kread(u_long addr, void *buf, size_t size);
+uint64_t kread_counter(u_long addr);
+int kread_counters(u_long addr, void *buf, size_t size);
+void kset_dpcpu(u_int);
const char *plural(uintmax_t);
const char *plurales(uintmax_t);
const char *pluralies(uintmax_t);
@@ -107,15 +107,13 @@ void icmp6_stats(u_long, const char *, int, int);
void icmp6_ifstats(char *);
void pim6_stats(u_long, const char *, int, int);
void rip6_stats(u_long, const char *, int, int);
-void mroute6pr(u_long, u_long);
-void mrt6_stats(u_long);
+void mroute6pr(void);
+void mrt6_stats(void);
struct sockaddr_in6;
struct in6_addr;
void in6_fillscopeid(struct sockaddr_in6 *);
-char *routename6(struct sockaddr_in6 *);
-const char *netname6(struct sockaddr_in6 *, struct in6_addr *);
-void inet6print(struct in6_addr *, int, const char *, int);
+void inet6print(const char *, struct in6_addr *, int, const char *, int);
#endif /*INET6*/
#ifdef IPSEC
@@ -124,60 +122,30 @@ void pfkey_stats(u_long, const char *, int, int);
void mbpr(void *, u_long);
-void netisr_stats(void *);
+void netisr_stats(void);
void hostpr(u_long, u_long);
void impstats(u_long, u_long);
-void intpr(int, u_long, void (*)(char *));
+void intpr(void (*)(char *), int);
-void pr_rthdr(int);
void pr_family(int);
-void rt_stats(u_long, u_long);
-char *ipx_pnet(struct sockaddr *);
-char *ipx_phost(struct sockaddr *);
-char *ns_phost(struct sockaddr *);
-void upHex(char *);
-
-char *routename(in_addr_t);
-char *netname(in_addr_t, u_long);
-char *atalk_print(struct sockaddr *, int);
-char *atalk_print2(struct sockaddr *, struct sockaddr *, int);
-char *ipx_print(struct sockaddr *);
-char *ns_print(struct sockaddr *);
-void routepr(u_long);
-
-void ipxprotopr(u_long, const char *, int, int);
-void spx_stats(u_long, const char *, int, int);
-void ipx_stats(u_long, const char *, int, int);
-void ipxerr_stats(u_long, const char *, int, int);
-
-void nsprotopr(u_long, const char *, int, int);
-void spp_stats(u_long, const char *, int, int);
-void idp_stats(u_long, const char *, int, int);
-void nserr_stats(u_long, const char *, int, int);
-
-void atalkprotopr(u_long, const char *, int, int);
-void ddp_stats(u_long, const char *, int, int);
+void rt_stats(void);
+void flowtable_stats(void);
+
+char *routename(struct sockaddr *, int);
+const char *netname(struct sockaddr *, struct sockaddr *);
+void routepr(int, int);
#ifdef NETGRAPH
void netgraphprotopr(u_long, const char *, int, int);
#endif
-void unixpr(u_long, u_long, u_long, u_long, u_long);
-
-void esis_stats(u_long, const char *, int, int);
-void clnp_stats(u_long, const char *, int, int);
-void cltp_stats(u_long, const char *, int, int);
-void iso_protopr(u_long, const char *, int, int);
-void iso_protopr1(u_long, int);
-void tp_protopr(u_long, const char *, int, int);
-void tp_inproto(u_long);
-void tp_stats(caddr_t, caddr_t);
+void unixpr(u_long, u_long, u_long, u_long, u_long, bool *);
-void mroutepr(u_long, u_long, u_long);
-void mrt_stats(u_long);
+void mroutepr(void);
+void mrt_stats(void);
void bpf_stats(char *);
#ifdef __rtems__
-void rtems_bsd_netstat_inet_init(void);
+#include <nlist.h> /* necessary for global "nl" variable */
#endif /* __rtems__ */
diff --git a/freebsd/usr.bin/netstat/nl_defs.h b/freebsd/usr.bin/netstat/nl_defs.h
new file mode 100644
index 00000000..b043d45e
--- /dev/null
+++ b/freebsd/usr.bin/netstat/nl_defs.h
@@ -0,0 +1,58 @@
+#include <nlist.h>
+#ifndef __rtems__
+extern const struct nlist nl[];
+#else /* __rtems__ */
+extern struct nlist nl[];
+#endif /* __rtems__ */
+#define N_AHSTAT 0
+#define N_ARPSTAT 1
+#define N_CARPSTATS 2
+#define N_DIVCBINFO 3
+#define N_ESPSTAT 4
+#define N_ICMP6STAT 5
+#define N_ICMPSTAT 6
+#define N_IGMPSTAT 7
+#define N_IP6STAT 8
+#define N_IPCOMPSTAT 9
+#define N_IPSEC4STAT 10
+#define N_IPSEC6STAT 11
+#define N_IPSTAT 12
+#define N_MF6CTABLE 13
+#define N_MFCHASHTBL 14
+#define N_MFCTABLESIZE 15
+#define N_MIF6TABLE 16
+#define N_MRT6STAT 17
+#define N_MRTSTAT 18
+#define N_NETISR_BINDTHREADS 19
+#define N_NETISR_DEFAULTQLIMIT 20
+#define N_NETISR_DISPATCH_POLICY 21
+#define N_NETISR_MAXPROT 22
+#define N_NETISR_MAXQLIMIT 23
+#define N_NETISR_MAXTHREADS 24
+#define N_NETISR_PROTO 25
+#define N_NGSOCKLIST 26
+#define N_NWS 27
+#define N_NWS_ARRAY 28
+#define N_NWS_COUNT 29
+#define N_PFKEYSTAT 30
+#define N_PFSYNCSTATS 31
+#define N_PIM6STAT 32
+#define N_PIMSTAT 33
+#define N_RIP6STAT 34
+#define N_RIPCBINFO 35
+#define N_RTREE 36
+#define N_RTSTAT 37
+#define N_RTTRASH 38
+#define N_SCTPSTAT 39
+#define N_SFSTAT 40
+#define N_TCBINFO 41
+#define N_TCPSTAT 42
+#define N_TCPS_STATES 43
+#define N_UDBINFO 44
+#define N_UDPSTAT 45
+#define N_UNP_COUNT 46
+#define N_UNP_DHEAD 47
+#define N_UNP_GENCNT 48
+#define N_UNP_SHEAD 49
+#define N_UNP_SPHEAD 50
+#define N_VIFTABLE 51
diff --git a/freebsd/usr.bin/netstat/nl_symbols.c b/freebsd/usr.bin/netstat/nl_symbols.c
new file mode 100644
index 00000000..d719f8f5
--- /dev/null
+++ b/freebsd/usr.bin/netstat/nl_symbols.c
@@ -0,0 +1,75 @@
+#include <machine/rtems-bsd-user-space.h>
+
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
+#include <rtems/bsd/sys/param.h>
+#include <nlist.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-nl_symbols-data.h"
+#endif /* __rtems__ */
+#ifndef __rtems__
+const struct nlist nl[] = {
+#else /* __rtems__ */
+/* This is not as constant as it seems. The call to kresolve_list(..) in main.c
+ * might change something. */
+struct nlist nl[] = {
+#endif /* __rtems__ */
+ { .n_name = "_ahstat" },
+ { .n_name = "_arpstat" },
+ { .n_name = "_carpstats" },
+ { .n_name = "_divcbinfo" },
+ { .n_name = "_espstat" },
+ { .n_name = "_icmp6stat" },
+ { .n_name = "_icmpstat" },
+ { .n_name = "_igmpstat" },
+ { .n_name = "_ip6stat" },
+ { .n_name = "_ipcompstat" },
+ { .n_name = "_ipsec4stat" },
+ { .n_name = "_ipsec6stat" },
+ { .n_name = "_ipstat" },
+ { .n_name = "_mf6ctable" },
+ { .n_name = "_mfchashtbl" },
+ { .n_name = "_mfctablesize" },
+ { .n_name = "_mif6table" },
+ { .n_name = "_mrt6stat" },
+ { .n_name = "_mrtstat" },
+ { .n_name = "_netisr_bindthreads" },
+ { .n_name = "_netisr_defaultqlimit" },
+ { .n_name = "_netisr_dispatch_policy" },
+ { .n_name = "_netisr_maxprot" },
+ { .n_name = "_netisr_maxqlimit" },
+ { .n_name = "_netisr_maxthreads" },
+ { .n_name = "_netisr_proto" },
+ { .n_name = "_ngsocklist" },
+ { .n_name = "_nws" },
+ { .n_name = "_nws_array" },
+ { .n_name = "_nws_count" },
+ { .n_name = "_pfkeystat" },
+ { .n_name = "_pfsyncstats" },
+ { .n_name = "_pim6stat" },
+ { .n_name = "_pimstat" },
+ { .n_name = "_rip6stat" },
+ { .n_name = "_ripcbinfo" },
+ { .n_name = "_rtree" },
+ { .n_name = "_rtstat" },
+ { .n_name = "_rttrash" },
+ { .n_name = "_sctpstat" },
+ { .n_name = "_sfstat" },
+ { .n_name = "_tcbinfo" },
+ { .n_name = "_tcpstat" },
+ { .n_name = "_tcps_states" },
+ { .n_name = "_udbinfo" },
+ { .n_name = "_udpstat" },
+ { .n_name = "_unp_count" },
+ { .n_name = "_unp_dhead" },
+ { .n_name = "_unp_gencnt" },
+ { .n_name = "_unp_shead" },
+ { .n_name = "_unp_sphead" },
+ { .n_name = "_viftable" },
+ { .n_name = NULL },
+};
diff --git a/freebsd/usr.bin/netstat/pfkey.c b/freebsd/usr.bin/netstat/pfkey.c
index 45fcb977..8feb91e9 100644
--- a/freebsd/usr.bin/netstat/pfkey.c
+++ b/freebsd/usr.bin/netstat/pfkey.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/* $NetBSD: inet.c,v 1.35.2.1 1999/04/29 14:57:08 perry Exp $ */
/* $KAME: ipsec.c,v 1.25 2001/03/12 09:04:39 itojun Exp $ */
/*-
@@ -65,6 +69,9 @@ static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -83,7 +90,12 @@ __FBSDID("$FreeBSD$");
#include <stdio.h>
#include <string.h>
#include <unistd.h>
+#include <stdbool.h>
+#include <libxo/xo.h>
#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-pfkey-data.h"
+#endif /* __rtems__ */
#ifdef IPSEC
@@ -101,8 +113,7 @@ static const char *pfkey_msgtype_names (int);
static const char *
pfkey_msgtype_names(int x)
{
- const int max =
- sizeof(pfkey_msgtypenames)/sizeof(pfkey_msgtypenames[0]);
+ const int max = nitems(pfkey_msgtypenames);
static char buf[20];
if (x < max && pfkey_msgtypenames[x])
@@ -120,59 +131,89 @@ pfkey_stats(u_long off, const char *name, int family __unused,
if (off == 0)
return;
- printf ("%s:\n", name);
- kread(off, (char *)&pfkeystat, sizeof(pfkeystat));
+ xo_emit("{T:/%s}:\n", name);
+ xo_open_container(name);
+ kread_counters(off, (char *)&pfkeystat, sizeof(pfkeystat));
#define p(f, m) if (pfkeystat.f || sflag <= 1) \
- printf(m, (uintmax_t)pfkeystat.f, plural(pfkeystat.f))
+ xo_emit(m, (uintmax_t)pfkeystat.f, plural(pfkeystat.f))
/* userland -> kernel */
- p(out_total, "\t%ju request%s sent from userland\n");
- p(out_bytes, "\t%ju byte%s sent from userland\n");
+ p(out_total, "\t{:sent-requests/%ju} "
+ "{N:/request%s sent from userland}\n");
+ p(out_bytes, "\t{:sent-bytes/%ju} "
+ "{N:/byte%s sent from userland}\n");
for (first = 1, type = 0;
- type < sizeof(pfkeystat.out_msgtype)/sizeof(pfkeystat.out_msgtype[0]);
- type++) {
+ type<sizeof(pfkeystat.out_msgtype)/sizeof(pfkeystat.out_msgtype[0]);
+ type++) {
if (pfkeystat.out_msgtype[type] <= 0)
continue;
if (first) {
- printf("\thistogram by message type:\n");
+ xo_open_list("output-histogram");
+ xo_emit("\t{T:histogram by message type}:\n");
first = 0;
}
- printf("\t\t%s: %ju\n", pfkey_msgtype_names(type),
- (uintmax_t)pfkeystat.out_msgtype[type]);
+ xo_open_instance("output-histogram");
+ xo_emit("\t\t{k::type/%s}: {:count/%ju}\n",
+ pfkey_msgtype_names(type),
+ (uintmax_t)pfkeystat.out_msgtype[type]);
+ xo_close_instance("output-histogram");
}
- p(out_invlen, "\t%ju message%s with invalid length field\n");
- p(out_invver, "\t%ju message%s with invalid version field\n");
- p(out_invmsgtype, "\t%ju message%s with invalid message type field\n");
- p(out_tooshort, "\t%ju message%s too short\n");
- p(out_nomem, "\t%ju message%s with memory allocation failure\n");
- p(out_dupext, "\t%ju message%s with duplicate extension\n");
- p(out_invexttype, "\t%ju message%s with invalid extension type\n");
- p(out_invsatype, "\t%ju message%s with invalid sa type\n");
- p(out_invaddr, "\t%ju message%s with invalid address extension\n");
+ if (!first)
+ xo_close_list("output-histogram");
+
+ p(out_invlen, "\t{:dropped-bad-length/%ju} "
+ "{N:/message%s with invalid length field}\n");
+ p(out_invver, "\t{:dropped-bad-version/%ju} "
+ "{N:/message%s with invalid version field}\n");
+ p(out_invmsgtype, "\t{:dropped-bad-type/%ju} "
+ "{N:/message%s with invalid message type field}\n");
+ p(out_tooshort, "\t{:dropped-too-short/%ju} "
+ "{N:/message%s too short}\n");
+ p(out_nomem, "\t{:dropped-no-memory/%ju} "
+ "{N:/message%s with memory allocation failure}\n");
+ p(out_dupext, "\t{:dropped-duplicate-extension/%ju} "
+ "{N:/message%s with duplicate extension}\n");
+ p(out_invexttype, "\t{:dropped-bad-extension/%ju} "
+ "{N:/message%s with invalid extension type}\n");
+ p(out_invsatype, "\t{:dropped-bad-sa-type/%ju} "
+ "{N:/message%s with invalid sa type}\n");
+ p(out_invaddr, "\t{:dropped-bad-address-extension/%ju} "
+ "{N:/message%s with invalid address extension}\n");
/* kernel -> userland */
- p(in_total, "\t%ju request%s sent to userland\n");
- p(in_bytes, "\t%ju byte%s sent to userland\n");
+ p(in_total, "\t{:received-requests/%ju} "
+ "{N:/request%s sent to userland}\n");
+ p(in_bytes, "\t{:received-bytes/%ju} "
+ "{N:/byte%s sent to userland}\n");
for (first = 1, type = 0;
- type < sizeof(pfkeystat.in_msgtype)/sizeof(pfkeystat.in_msgtype[0]);
- type++) {
+ type < sizeof(pfkeystat.in_msgtype)/sizeof(pfkeystat.in_msgtype[0]);
+ type++) {
if (pfkeystat.in_msgtype[type] <= 0)
continue;
if (first) {
- printf("\thistogram by message type:\n");
+ xo_open_list("input-histogram");
+ xo_emit("\t{T:histogram by message type}:\n");
first = 0;
}
- printf("\t\t%s: %ju\n", pfkey_msgtype_names(type),
- (uintmax_t)pfkeystat.in_msgtype[type]);
+ xo_open_instance("input-histogram");
+ xo_emit("\t\t{k:type/%s}: {:count/%ju}\n",
+ pfkey_msgtype_names(type),
+ (uintmax_t)pfkeystat.in_msgtype[type]);
+ xo_close_instance("input-histogram");
}
- p(in_msgtarget[KEY_SENDUP_ONE],
- "\t%ju message%s toward single socket\n");
- p(in_msgtarget[KEY_SENDUP_ALL],
- "\t%ju message%s toward all sockets\n");
+ if (!first)
+ xo_close_list("input-histogram");
+ p(in_msgtarget[KEY_SENDUP_ONE], "\t{:received-one-socket/%ju} "
+ "{N:/message%s toward single socket}\n");
+ p(in_msgtarget[KEY_SENDUP_ALL], "\t{:received-all-sockets/%ju} "
+ "{N:/message%s toward all sockets}\n");
p(in_msgtarget[KEY_SENDUP_REGISTERED],
- "\t%ju message%s toward registered sockets\n");
- p(in_nomem, "\t%ju message%s with memory allocation failure\n");
+ "\t{:received-registered-sockets/%ju} "
+ "{N:/message%s toward registered sockets}\n");
+ p(in_nomem, "\t{:discarded-no-memory/%ju} "
+ "{N:/message%s with memory allocation failure}\n");
#undef p
+ xo_close_container(name);
}
#endif /* IPSEC */
diff --git a/freebsd/usr.bin/netstat/route.c b/freebsd/usr.bin/netstat/route.c
index afa55cee..bed58b06 100644
--- a/freebsd/usr.bin/netstat/route.c
+++ b/freebsd/usr.bin/netstat/route.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 1983, 1988, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,203 +39,146 @@ static char sccsid[] = "From: @(#)route.c 8.6 (Berkeley) 4/28/95";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
+#ifdef __rtems__
+#include <rtems/rtems/clock.h>
+#endif /* __rtems__ */
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/sysctl.h>
#include <sys/time.h>
#include <net/ethernet.h>
#include <net/if.h>
-#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
-#include <net/radix.h>
#include <net/route.h>
#include <netinet/in.h>
-#ifdef __rtems__
-/* no IPX on RTEMS */
-/* no AppleTalk on RTEMS */
-#else /* __rtems__ */
-#include <netipx/ipx.h>
-#include <netatalk/at.h>
-#endif /* __rtems__ */
-#ifdef __rtems__
-/* why isn't this protected by a NETGRAPH define */
-#else /* __rtems__ */
+#ifndef __rtems__
#include <netgraph/ng_socket.h>
#endif /* __rtems__ */
-#include <sys/sysctl.h>
-
#include <arpa/inet.h>
+#include <ifaddrs.h>
#include <libutil.h>
#include <netdb.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <err.h>
+#include <libxo/xo.h>
#include "netstat.h"
-
-#define kget(p, d) (kread((u_long)(p), (char *)&(d), sizeof (d)))
+#include "nl_defs.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-route-data.h"
+#endif /* __rtems__ */
/*
* Definitions for showing gateway flags.
*/
-struct bits {
+#ifndef __rtems__
+static struct bits {
+#else /* __rtems__ */
+static const struct bits {
+#endif /* __rtems__ */
u_long b_mask;
char b_val;
-} static const bits[] = {
- { RTF_UP, 'U' },
- { RTF_GATEWAY, 'G' },
- { RTF_HOST, 'H' },
- { RTF_REJECT, 'R' },
- { RTF_DYNAMIC, 'D' },
- { RTF_MODIFIED, 'M' },
- { RTF_DONE, 'd' }, /* Completed -- for routing messages only */
- { RTF_XRESOLVE, 'X' },
- { RTF_STATIC, 'S' },
- { RTF_PROTO1, '1' },
- { RTF_PROTO2, '2' },
- { RTF_PRCLONING,'c' },
- { RTF_PROTO3, '3' },
- { RTF_BLACKHOLE,'B' },
- { RTF_BROADCAST,'b' },
+ const char *b_name;
+} bits[] = {
+ { RTF_UP, 'U', "up" },
+ { RTF_GATEWAY, 'G', "gateway" },
+ { RTF_HOST, 'H', "host" },
+ { RTF_REJECT, 'R', "reject" },
+ { RTF_DYNAMIC, 'D', "dynamic" },
+ { RTF_MODIFIED, 'M', "modified" },
+ { RTF_DONE, 'd', "done" }, /* Completed -- for routing msgs only */
+ { RTF_XRESOLVE, 'X', "xresolve" },
+ { RTF_STATIC, 'S', "static" },
+ { RTF_PROTO1, '1', "proto1" },
+ { RTF_PROTO2, '2', "proto2" },
+ { RTF_PROTO3, '3', "proto3" },
+ { RTF_BLACKHOLE,'B', "blackhole" },
+ { RTF_BROADCAST,'b', "broadcast" },
#ifdef RTF_LLINFO
- { RTF_LLINFO, 'L' },
-#endif
-#ifdef RTF_WASCLONED
- { RTF_WASCLONED,'W' },
+ { RTF_LLINFO, 'L', "llinfo" },
#endif
-#ifdef RTF_CLONING
- { RTF_CLONING, 'C' },
-#endif
- { 0 , 0 }
+ { 0 , 0, NULL }
};
-typedef union {
- long dummy; /* Helps align structure. */
- struct sockaddr u_sa;
- u_short u_data[128];
-} sa_u;
-
-static sa_u pt_u;
-
-static struct rtentry rtentry;
-static struct radix_node rnode;
-static struct radix_mask rmask;
-
-static const int NewTree = 0;
-
-static struct timespec uptime;
-
-static struct sockaddr *kgetsa(struct sockaddr *);
-static void size_cols(int ef, struct radix_node *rn);
-static void size_cols_tree(struct radix_node *rn);
-static void size_cols_rtentry(struct rtentry *rt);
-static void p_tree(struct radix_node *);
-static void p_rtnode(void);
-static void ntreestuff(void);
-static void np_rtentry(struct rt_msghdr *);
-static void p_sockaddr(struct sockaddr *, struct sockaddr *, int, int);
+struct ifmap_entry {
+ char ifname[IFNAMSIZ];
+};
+static struct ifmap_entry *ifmap;
+static int ifmap_size;
+static struct timespec uptime;
+
+static const char *netname4(in_addr_t, in_addr_t);
+static const char *netname6(struct sockaddr_in6 *, struct sockaddr_in6 *);
+static void p_rtable_sysctl(int, int);
+static void p_rtentry_sysctl(const char *name, struct rt_msghdr *);
+static int p_sockaddr(const char *name, struct sockaddr *, struct sockaddr *,
+ int, int);
static const char *fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask,
int flags);
static void p_flags(int, const char *);
static const char *fmt_flags(int f);
-static void p_rtentry(struct rtentry *);
static void domask(char *, in_addr_t, u_long);
+
/*
* Print routing tables.
*/
void
-routepr(u_long rtree)
+routepr(int fibnum, int af)
{
- struct radix_node_head **rnhp, *rnh, head;
size_t intsize;
- int fam, fibnum, numfibs;
+ int numfibs;
+
+ if (live == 0)
+ return;
intsize = sizeof(int);
- if (sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1)
+ if (fibnum == -1 &&
+ sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1)
fibnum = 0;
if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1)
numfibs = 1;
- rt_tables = calloc(numfibs * (AF_MAX+1),
- sizeof(struct radix_node_head *));
- if (rt_tables == NULL)
- err(EX_OSERR, "memory allocation failed");
+ if (fibnum < 0 || fibnum > numfibs - 1)
+ errx(EX_USAGE, "%d: invalid fib", fibnum);
/*
* Since kernel & userland use different timebase
* (time_uptime vs time_second) and we are reading kernel memory
- * directly we should do rt_rmx.rmx_expire --> expire_time conversion.
+ * directly we should do rt_expire --> expire_time conversion.
*/
-#ifdef __rtems__
- {
- rtems_clock_get_uptime(&uptime);
- }
-#else /* __rtems__ */
+#ifndef __rtems__
if (clock_gettime(CLOCK_UPTIME, &uptime) < 0)
err(EX_OSERR, "clock_gettime() failed");
+#else /* __rtems__ */
+ rtems_clock_get_uptime(&uptime);
#endif /* __rtems__ */
- printf("Routing tables\n");
-
- if (Aflag == 0 && NewTree)
- ntreestuff();
- else {
- if (rtree == 0) {
- printf("rt_tables: symbol not in namelist\n");
- return;
- }
-
- if (kread((u_long)(rtree), (char *)(rt_tables), (numfibs *
- (AF_MAX+1) * sizeof(struct radix_node_head *))) != 0)
- return;
- for (fam = 0; fam <= AF_MAX; fam++) {
- int tmpfib;
-
- switch (fam) {
- case AF_INET6:
- case AF_INET:
- tmpfib = fibnum;
- break;
- default:
- tmpfib = 0;
- }
- rnhp = (struct radix_node_head **)*rt_tables;
- /* Calculate the in-kernel address. */
- rnhp += tmpfib * (AF_MAX+1) + fam;
- /* Read the in kernel rhn pointer. */
- if (kget(rnhp, rnh) != 0)
- continue;
- if (rnh == NULL)
- continue;
- /* Read the rnh data. */
- if (kget(rnh, head) != 0)
- continue;
- if (fam == AF_UNSPEC) {
- if (Aflag && af == 0) {
- printf("Netmasks:\n");
- p_tree(head.rnh_treetop);
- }
- } else if (af == AF_UNSPEC || af == fam) {
- size_cols(fam, head.rnh_treetop);
- pr_family(fam);
- do_rtent = 1;
- pr_rthdr(fam);
- p_tree(head.rnh_treetop);
- }
- }
- }
+ xo_open_container("route-information");
+ xo_emit("{T:Routing tables}");
+ if (fibnum)
+ xo_emit(" ({L:fib}: {:fib/%d})", fibnum);
+ xo_emit("\n");
+ p_rtable_sysctl(fibnum, af);
+ xo_close_container("route-information");
}
+
/*
* Print address family header before a section of the routing table.
*/
@@ -249,15 +196,9 @@ pr_family(int af1)
afname = "Internet6";
break;
#endif /*INET6*/
- case AF_IPX:
- afname = "IPX";
- break;
case AF_ISO:
afname = "ISO";
break;
- case AF_APPLETALK:
- afname = "AppleTalk";
- break;
case AF_CCITT:
afname = "X.25";
break;
@@ -269,166 +210,52 @@ pr_family(int af1)
break;
}
if (afname)
- printf("\n%s:\n", afname);
+ xo_emit("\n{k:address-family/%s}:\n", afname);
else
- printf("\nProtocol Family %d:\n", af1);
+ xo_emit("\n{L:Protocol Family} {k:address-family/%d}:\n", af1);
}
/* column widths; each followed by one space */
#ifndef INET6
#define WID_DST_DEFAULT(af) 18 /* width of destination column */
#define WID_GW_DEFAULT(af) 18 /* width of gateway column */
-#define WID_IF_DEFAULT(af) (Wflag ? 8 : 6) /* width of netif column */
+#define WID_IF_DEFAULT(af) (Wflag ? 10 : 8) /* width of netif column */
#else
#define WID_DST_DEFAULT(af) \
((af) == AF_INET6 ? (numeric_addr ? 33: 18) : 18)
#define WID_GW_DEFAULT(af) \
((af) == AF_INET6 ? (numeric_addr ? 29 : 18) : 18)
-#define WID_IF_DEFAULT(af) ((af) == AF_INET6 ? 8 : (Wflag ? 8 : 6))
+#define WID_IF_DEFAULT(af) ((af) == AF_INET6 ? 8 : (Wflag ? 10 : 8))
#endif /*INET6*/
static int wid_dst;
static int wid_gw;
static int wid_flags;
-static int wid_refs;
-static int wid_use;
+static int wid_pksent;
static int wid_mtu;
static int wid_if;
static int wid_expire;
-static void
-size_cols(int ef __unused, struct radix_node *rn)
-{
- wid_dst = WID_DST_DEFAULT(ef);
- wid_gw = WID_GW_DEFAULT(ef);
- wid_flags = 6;
- wid_refs = 6;
- wid_use = 8;
- wid_mtu = 6;
- wid_if = WID_IF_DEFAULT(ef);
- wid_expire = 6;
-
- if (Wflag)
- size_cols_tree(rn);
-}
-
-static void
-size_cols_tree(struct radix_node *rn)
-{
-again:
- if (kget(rn, rnode) != 0)
- return;
- if (!(rnode.rn_flags & RNF_ACTIVE))
- return;
- if (rnode.rn_bit < 0) {
- if ((rnode.rn_flags & RNF_ROOT) == 0) {
- if (kget(rn, rtentry) != 0)
- return;
- size_cols_rtentry(&rtentry);
- }
- if ((rn = rnode.rn_dupedkey))
- goto again;
- } else {
- rn = rnode.rn_right;
- size_cols_tree(rnode.rn_left);
- size_cols_tree(rn);
- }
-}
-
-static void
-size_cols_rtentry(struct rtentry *rt)
-{
- static struct ifnet ifnet, *lastif;
- static char buffer[100];
- const char *bp;
- struct sockaddr *sa;
- sa_u addr, mask;
- int len;
-
- bzero(&addr, sizeof(addr));
- if ((sa = kgetsa(rt_key(rt))))
- bcopy(sa, &addr, sa->sa_len);
- bzero(&mask, sizeof(mask));
- if (rt_mask(rt) && (sa = kgetsa(rt_mask(rt))))
- bcopy(sa, &mask, sa->sa_len);
- bp = fmt_sockaddr(&addr.u_sa, &mask.u_sa, rt->rt_flags);
- len = strlen(bp);
- wid_dst = MAX(len, wid_dst);
-
- bp = fmt_sockaddr(kgetsa(rt->rt_gateway), NULL, RTF_HOST);
- len = strlen(bp);
- wid_gw = MAX(len, wid_gw);
-
- bp = fmt_flags(rt->rt_flags);
- len = strlen(bp);
- wid_flags = MAX(len, wid_flags);
-
- if (addr.u_sa.sa_family == AF_INET || Wflag) {
- len = snprintf(buffer, sizeof(buffer), "%d", rt->rt_refcnt);
- wid_refs = MAX(len, wid_refs);
- len = snprintf(buffer, sizeof(buffer), "%lu", rt->rt_use);
- wid_use = MAX(len, wid_use);
- if (Wflag && rt->rt_rmx.rmx_mtu != 0) {
- len = snprintf(buffer, sizeof(buffer),
- "%lu", rt->rt_rmx.rmx_mtu);
- wid_mtu = MAX(len, wid_mtu);
- }
- }
- if (rt->rt_ifp) {
- if (rt->rt_ifp != lastif) {
- if (kget(rt->rt_ifp, ifnet) == 0)
- len = strlen(ifnet.if_xname);
- else
- len = strlen("---");
- lastif = rt->rt_ifp;
- wid_if = MAX(len, wid_if);
- }
- if (rt->rt_rmx.rmx_expire) {
- time_t expire_time;
-
- if ((expire_time =
- rt->rt_rmx.rmx_expire - uptime.tv_sec) > 0) {
- len = snprintf(buffer, sizeof(buffer), "%d",
- (int)expire_time);
- wid_expire = MAX(len, wid_expire);
- }
- }
- }
-}
-
-
/*
* Print header for routing table columns.
*/
-void
-pr_rthdr(int af1)
+static void
+pr_rthdr(int af1 __unused)
{
- if (Aflag)
- printf("%-8.8s ","Address");
- if (af1 == AF_INET || Wflag) {
- if (Wflag) {
- printf("%-*.*s %-*.*s %-*.*s %*.*s %*.*s %*.*s %*.*s %*s\n",
- wid_dst, wid_dst, "Destination",
- wid_gw, wid_gw, "Gateway",
- wid_flags, wid_flags, "Flags",
- wid_refs, wid_refs, "Refs",
- wid_use, wid_use, "Use",
- wid_mtu, wid_mtu, "Mtu",
- wid_if, wid_if, "Netif",
- wid_expire, "Expire");
- } else {
- printf("%-*.*s %-*.*s %-*.*s %*.*s %*.*s %*.*s %*s\n",
- wid_dst, wid_dst, "Destination",
- wid_gw, wid_gw, "Gateway",
- wid_flags, wid_flags, "Flags",
- wid_refs, wid_refs, "Refs",
- wid_use, wid_use, "Use",
- wid_if, wid_if, "Netif",
- wid_expire, "Expire");
- }
+ if (Wflag) {
+ xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s} "
+ "{T:/%*.*s} {T:/%*.*s} {T:/%*s}\n",
+ wid_dst, wid_dst, "Destination",
+ wid_gw, wid_gw, "Gateway",
+ wid_flags, wid_flags, "Flags",
+ wid_pksent, wid_pksent, "Use",
+ wid_mtu, wid_mtu, "Mtu",
+ wid_if, wid_if, "Netif",
+ wid_expire, "Expire");
} else {
- printf("%-*.*s %-*.*s %-*.*s %*.*s %*s\n",
+ xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s} "
+ "{T:/%*s}\n",
wid_dst, wid_dst, "Destination",
wid_gw, wid_gw, "Gateway",
wid_flags, wid_flags, "Flags",
@@ -437,309 +264,272 @@ pr_rthdr(int af1)
}
}
-static struct sockaddr *
-kgetsa(struct sockaddr *dst)
+static void
+p_rtable_sysctl(int fibnum, int af)
{
+ size_t needed;
+ int mib[7];
+ char *buf, *next, *lim;
+ struct rt_msghdr *rtm;
+ struct sockaddr *sa;
+ int fam = AF_UNSPEC, ifindex = 0, size;
+ int need_table_close = false;
- if (kget(dst, pt_u.u_sa) != 0)
- return (NULL);
- if (pt_u.u_sa.sa_len > sizeof (pt_u.u_sa))
- kread((u_long)dst, (char *)pt_u.u_data, pt_u.u_sa.sa_len);
- return (&pt_u.u_sa);
-}
+ struct ifaddrs *ifap, *ifa;
+ struct sockaddr_dl *sdl;
-static void
-p_tree(struct radix_node *rn)
-{
+ /*
+ * Retrieve interface list at first
+ * since we need #ifindex -> if_xname match
+ */
+ if (getifaddrs(&ifap) != 0)
+ err(EX_OSERR, "getifaddrs");
-again:
- if (kget(rn, rnode) != 0)
- return;
- if (!(rnode.rn_flags & RNF_ACTIVE))
- return;
- if (rnode.rn_bit < 0) {
- if (Aflag)
- printf("%-8.8lx ", (u_long)rn);
- if (rnode.rn_flags & RNF_ROOT) {
- if (Aflag)
- printf("(root node)%s",
- rnode.rn_dupedkey ? " =>\n" : "\n");
- } else if (do_rtent) {
- if (kget(rn, rtentry) == 0) {
- p_rtentry(&rtentry);
- if (Aflag)
- p_rtnode();
- }
- } else {
- p_sockaddr(kgetsa((struct sockaddr *)rnode.rn_key),
- NULL, 0, 44);
- putchar('\n');
- }
- if ((rn = rnode.rn_dupedkey))
- goto again;
- } else {
- if (Aflag && do_rtent) {
- printf("%-8.8lx ", (u_long)rn);
- p_rtnode();
+ for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
+
+ if (ifa->ifa_addr->sa_family != AF_LINK)
+ continue;
+
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ ifindex = sdl->sdl_index;
+
+ if (ifindex >= ifmap_size) {
+ size = roundup(ifindex + 1, 32) *
+ sizeof(struct ifmap_entry);
+ if ((ifmap = realloc(ifmap, size)) == NULL)
+ errx(2, "realloc(%d) failed", size);
+ memset(&ifmap[ifmap_size], 0,
+ size - ifmap_size *
+ sizeof(struct ifmap_entry));
+
+ ifmap_size = roundup(ifindex + 1, 32);
}
- rn = rnode.rn_right;
- p_tree(rnode.rn_left);
- p_tree(rn);
- }
-}
-static char nbuf[20];
+ if (*ifmap[ifindex].ifname != '\0')
+ continue;
-static void
-p_rtnode(void)
-{
- struct radix_mask *rm = rnode.rn_mklist;
-
- if (rnode.rn_bit < 0) {
- if (rnode.rn_mask) {
- printf("\t mask ");
- p_sockaddr(kgetsa((struct sockaddr *)rnode.rn_mask),
- NULL, 0, -1);
- } else if (rm == 0)
- return;
- } else {
- sprintf(nbuf, "(%d)", rnode.rn_bit);
- printf("%6.6s %8.8lx : %8.8lx", nbuf, (u_long)rnode.rn_left, (u_long)rnode.rn_right);
- }
- while (rm) {
- if (kget(rm, rmask) != 0)
- break;
- sprintf(nbuf, " %d refs, ", rmask.rm_refs);
- printf(" mk = %8.8lx {(%d),%s",
- (u_long)rm, -1 - rmask.rm_bit, rmask.rm_refs ? nbuf : " ");
- if (rmask.rm_flags & RNF_NORMAL) {
- struct radix_node rnode_aux;
- printf(" <normal>, ");
- if (kget(rmask.rm_leaf, rnode_aux) == 0)
- p_sockaddr(kgetsa((struct sockaddr *)rnode_aux.rn_mask),
- NULL, 0, -1);
- else
- p_sockaddr(NULL, NULL, 0, -1);
- } else
- p_sockaddr(kgetsa((struct sockaddr *)rmask.rm_mask),
- NULL, 0, -1);
- putchar('}');
- if ((rm = rmask.rm_mklist))
- printf(" ->");
+ strlcpy(ifmap[ifindex].ifname, ifa->ifa_name, IFNAMSIZ);
}
- putchar('\n');
-}
-static void
-ntreestuff(void)
-{
- size_t needed;
- int mib[6];
- char *buf, *next, *lim;
- struct rt_msghdr *rtm;
+ freeifaddrs(ifap);
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
- mib[3] = 0;
+ mib[3] = af;
mib[4] = NET_RT_DUMP;
mib[5] = 0;
- if (sysctl(mib, 6, NULL, &needed, NULL, 0) < 0) {
- err(1, "sysctl: net.route.0.0.dump estimate");
- }
-
- if ((buf = malloc(needed)) == 0) {
+ mib[6] = fibnum;
+ if (sysctl(mib, nitems(mib), NULL, &needed, NULL, 0) < 0)
+ err(EX_OSERR, "sysctl: net.route.0.%d.dump.%d estimate", af,
+ fibnum);
+ if ((buf = malloc(needed)) == NULL)
errx(2, "malloc(%lu)", (unsigned long)needed);
- }
- if (sysctl(mib, 6, buf, &needed, NULL, 0) < 0) {
- err(1, "sysctl: net.route.0.0.dump");
- }
+ if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0)
+ err(1, "sysctl: net.route.0.%d.dump.%d", af, fibnum);
lim = buf + needed;
+ xo_open_container("route-table");
+ xo_open_list("rt-family");
for (next = buf; next < lim; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)next;
- np_rtentry(rtm);
+ if (rtm->rtm_version != RTM_VERSION)
+ continue;
+ /*
+ * Peek inside header to determine AF
+ */
+ sa = (struct sockaddr *)(rtm + 1);
+ /* Only print family first time. */
+ if (fam != sa->sa_family) {
+ if (need_table_close) {
+ xo_close_list("rt-entry");
+ xo_close_instance("rt-family");
+ }
+ need_table_close = true;
+
+ fam = sa->sa_family;
+ wid_dst = WID_DST_DEFAULT(fam);
+ wid_gw = WID_GW_DEFAULT(fam);
+ wid_flags = 6;
+ wid_pksent = 8;
+ wid_mtu = 6;
+ wid_if = WID_IF_DEFAULT(fam);
+ wid_expire = 6;
+ xo_open_instance("rt-family");
+ pr_family(fam);
+ xo_open_list("rt-entry");
+
+ pr_rthdr(fam);
+ }
+ p_rtentry_sysctl("rt-entry", rtm);
}
+ if (need_table_close) {
+ xo_close_list("rt-entry");
+ xo_close_instance("rt-family");
+ }
+ xo_close_list("rt-family");
+ xo_close_container("route-table");
+ free(buf);
}
static void
-np_rtentry(struct rt_msghdr *rtm)
+p_rtentry_sysctl(const char *name, struct rt_msghdr *rtm)
{
- struct sockaddr *sa = (struct sockaddr *)(rtm + 1);
-#ifdef notdef
- static int masks_done, banner_printed;
-#endif
- static int old_af;
- int af1 = 0, interesting = RTF_UP | RTF_GATEWAY | RTF_HOST;
-
-#ifdef notdef
- /* for the moment, netmasks are skipped over */
- if (!banner_printed) {
- printf("Netmasks:\n");
- banner_printed = 1;
+ struct sockaddr *sa, *addr[RTAX_MAX];
+ char buffer[128];
+ char prettyname[128];
+ int i, protrusion;
+
+ xo_open_instance(name);
+ sa = (struct sockaddr *)(rtm + 1);
+ for (i = 0; i < RTAX_MAX; i++) {
+ if (rtm->rtm_addrs & (1 << i))
+ addr[i] = sa;
+ sa = (struct sockaddr *)((char *)sa + SA_SIZE(sa));
+ }
+
+ protrusion = p_sockaddr("destination", addr[RTAX_DST],
+ addr[RTAX_NETMASK],
+ rtm->rtm_flags, wid_dst);
+ protrusion = p_sockaddr("gateway", addr[RTAX_GATEWAY], NULL, RTF_HOST,
+ wid_gw - protrusion);
+ snprintf(buffer, sizeof(buffer), "{[:-%d}{:flags/%%s}{]:} ",
+ wid_flags - protrusion);
+ p_flags(rtm->rtm_flags, buffer);
+ if (Wflag) {
+ xo_emit("{t:use/%*lu} ", wid_pksent, rtm->rtm_rmx.rmx_pksent);
+
+ if (rtm->rtm_rmx.rmx_mtu != 0)
+ xo_emit("{t:mtu/%*lu} ", wid_mtu, rtm->rtm_rmx.rmx_mtu);
+ else
+ xo_emit("{P:/%*s} ", wid_mtu, "");
}
- if (masks_done == 0) {
- if (rtm->rtm_addrs != RTA_DST ) {
- masks_done = 1;
- af1 = sa->sa_family;
- }
- } else
-#endif
- af1 = sa->sa_family;
- if (af1 != old_af) {
- pr_family(af1);
- old_af = af1;
+
+ memset(prettyname, 0, sizeof(prettyname));
+ if (rtm->rtm_index < ifmap_size) {
+ strlcpy(prettyname, ifmap[rtm->rtm_index].ifname,
+ sizeof(prettyname));
+ if (*prettyname == '\0')
+ strlcpy(prettyname, "---", sizeof(prettyname));
}
- if (rtm->rtm_addrs == RTA_DST)
- p_sockaddr(sa, NULL, 0, 36);
- else {
- p_sockaddr(sa, NULL, rtm->rtm_flags, 16);
- sa = (struct sockaddr *)(SA_SIZE(sa) + (char *)sa);
- p_sockaddr(sa, NULL, 0, 18);
+
+ if (Wflag)
+ xo_emit("{t:interface-name/%*s}", wid_if, prettyname);
+ else
+ xo_emit("{t:interface-name/%*.*s}", wid_if, wid_if,
+ prettyname);
+ if (rtm->rtm_rmx.rmx_expire) {
+ time_t expire_time;
+
+ if ((expire_time = rtm->rtm_rmx.rmx_expire - uptime.tv_sec) > 0)
+ xo_emit(" {:expire-time/%*d}", wid_expire,
+ (int)expire_time);
}
- p_flags(rtm->rtm_flags & interesting, "%-6.6s ");
- putchar('\n');
+
+ xo_emit("\n");
+ xo_close_instance(name);
}
-static void
-p_sockaddr(struct sockaddr *sa, struct sockaddr *mask, int flags, int width)
+static int
+p_sockaddr(const char *name, struct sockaddr *sa, struct sockaddr *mask,
+ int flags, int width)
{
const char *cp;
+ char buf[128];
+ int protrusion;
cp = fmt_sockaddr(sa, mask, flags);
- if (width < 0 )
- printf("%s ", cp);
- else {
- if (numeric_addr)
- printf("%-*s ", width, cp);
- else
- printf("%-*.*s ", width, width, cp);
+ if (width < 0) {
+ snprintf(buf, sizeof(buf), "{:%s/%%s} ", name);
+ xo_emit(buf, cp);
+ protrusion = 0;
+ } else {
+ if (Wflag != 0 || numeric_addr) {
+ snprintf(buf, sizeof(buf), "{[:%d}{:%s/%%s}{]:} ",
+ -width, name);
+ xo_emit(buf, cp);
+ protrusion = strlen(cp) - width;
+ if (protrusion < 0)
+ protrusion = 0;
+ } else {
+ snprintf(buf, sizeof(buf), "{[:%d}{:%s/%%-.*s}{]:} ",
+ -width, name);
+ xo_emit(buf, width, cp);
+ protrusion = 0;
+ }
}
+ return (protrusion);
}
static const char *
fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask, int flags)
{
- static char workbuf[128];
+ static char buf[128];
const char *cp;
if (sa == NULL)
return ("null");
switch(sa->sa_family) {
- case AF_INET:
- {
- struct sockaddr_in *sockin = (struct sockaddr_in *)sa;
-
- if ((sockin->sin_addr.s_addr == INADDR_ANY) &&
- mask &&
- ntohl(((struct sockaddr_in *)mask)->sin_addr.s_addr)
- ==0L)
- cp = "default" ;
- else if (flags & RTF_HOST)
- cp = routename(sockin->sin_addr.s_addr);
- else if (mask)
- cp = netname(sockin->sin_addr.s_addr,
- ntohl(((struct sockaddr_in *)mask)
- ->sin_addr.s_addr));
- else
- cp = netname(sockin->sin_addr.s_addr, 0L);
- break;
- }
-
#ifdef INET6
case AF_INET6:
- {
- struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
-
- in6_fillscopeid(sa6);
-
+ /*
+ * The sa6->sin6_scope_id must be filled here because
+ * this sockaddr is extracted from kmem(4) directly
+ * and has KAME-specific embedded scope id in
+ * sa6->sin6_addr.s6_addr[2].
+ */
+ in6_fillscopeid(satosin6(sa));
+ /* FALLTHROUGH */
+#endif /*INET6*/
+ case AF_INET:
if (flags & RTF_HOST)
- cp = routename6(sa6);
+ cp = routename(sa, numeric_addr);
else if (mask)
- cp = netname6(sa6,
- &((struct sockaddr_in6 *)mask)->sin6_addr);
- else {
- cp = netname6(sa6, NULL);
- }
- break;
- }
-#endif /*INET6*/
-
-#ifndef __rtems__
- case AF_IPX:
- {
- struct ipx_addr work = ((struct sockaddr_ipx *)sa)->sipx_addr;
- if (ipx_nullnet(satoipx_addr(work)))
- cp = "default";
- else
- cp = ipx_print(sa);
- break;
- }
- case AF_APPLETALK:
- {
- if (!(flags & RTF_HOST) && mask)
- cp = atalk_print2(sa,mask,9);
+ cp = netname(sa, mask);
else
- cp = atalk_print(sa,11);
+ cp = netname(sa, NULL);
break;
- }
-#endif /* __rtems__ */
case AF_NETGRAPH:
{
#ifdef __rtems__
/* netgraph not supported yet */
err(EX_OSERR, "memory allocation failed");
#else /* __rtems__ */
- strlcpy(workbuf, ((struct sockaddr_ng *)sa)->sg_data,
- sizeof(workbuf));
- cp = workbuf;
+ strlcpy(buf, ((struct sockaddr_ng *)sa)->sg_data,
+ sizeof(buf));
+ cp = buf;
#endif /* __rtems__ */
break;
}
-
case AF_LINK:
{
+#if 0
struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
- if (sdl->sdl_nlen == 0 && sdl->sdl_alen == 0 &&
- sdl->sdl_slen == 0) {
- (void) sprintf(workbuf, "link#%d", sdl->sdl_index);
- cp = workbuf;
- } else
- switch (sdl->sdl_type) {
-
- case IFT_ETHER:
- case IFT_L2VLAN:
- case IFT_BRIDGE:
- if (sdl->sdl_alen == ETHER_ADDR_LEN) {
- cp = ether_ntoa((struct ether_addr *)
- (sdl->sdl_data + sdl->sdl_nlen));
- break;
- }
- /* FALLTHROUGH */
- default:
- cp = link_ntoa(sdl);
- break;
- }
+ /* Interface route. */
+ if (sdl->sdl_nlen)
+ cp = sdl->sdl_data;
+ else
+#endif
+ cp = routename(sa, 1);
break;
}
-
default:
{
u_char *s = (u_char *)sa->sa_data, *slim;
char *cq, *cqlim;
- cq = workbuf;
+ cq = buf;
slim = sa->sa_len + (u_char *) sa;
- cqlim = cq + sizeof(workbuf) - 6;
+ cqlim = cq + sizeof(buf) - 6;
cq += sprintf(cq, "(%d)", sa->sa_family);
while (s < slim && cq < cqlim) {
cq += sprintf(cq, " %02x", *s++);
if (s < slim)
cq += sprintf(cq, "%02x", *s++);
}
- cp = workbuf;
+ cp = buf;
}
}
@@ -749,7 +539,19 @@ fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask, int flags)
static void
p_flags(int f, const char *format)
{
- printf(format, fmt_flags(f));
+#ifndef __rtems__
+ struct bits *p;
+#else /* __rtems__ */
+ const struct bits *p;
+#endif /* __rtems__ */
+
+ xo_emit(format, fmt_flags(f));
+
+ xo_open_list("flags_pretty");
+ for (p = bits; p->b_mask; p++)
+ if (p->b_mask & f)
+ xo_emit("{le:flags_pretty/%s}", p->b_name);
+ xo_close_list("flags_pretty");
}
static const char *
@@ -757,7 +559,11 @@ fmt_flags(int f)
{
static char name[33];
char *flags;
+#ifndef __rtems__
+ struct bits *p = bits;
+#else /* __rtems__ */
const struct bits *p = bits;
+#endif /* __rtems__ */
for (flags = name; p->b_mask; p++)
if (p->b_mask & f)
@@ -766,81 +572,36 @@ fmt_flags(int f)
return (name);
}
-static void
-p_rtentry(struct rtentry *rt)
-{
- static struct ifnet ifnet, *lastif;
- static char buffer[128];
- static char prettyname[128];
- struct sockaddr *sa;
- sa_u addr, mask;
-
- bzero(&addr, sizeof(addr));
- if ((sa = kgetsa(rt_key(rt))))
- bcopy(sa, &addr, sa->sa_len);
- bzero(&mask, sizeof(mask));
- if (rt_mask(rt) && (sa = kgetsa(rt_mask(rt))))
- bcopy(sa, &mask, sa->sa_len);
- p_sockaddr(&addr.u_sa, &mask.u_sa, rt->rt_flags, wid_dst);
- p_sockaddr(kgetsa(rt->rt_gateway), NULL, RTF_HOST, wid_gw);
- snprintf(buffer, sizeof(buffer), "%%-%d.%ds ", wid_flags, wid_flags);
- p_flags(rt->rt_flags, buffer);
- if (addr.u_sa.sa_family == AF_INET || Wflag) {
- printf("%*d %*lu ", wid_refs, rt->rt_refcnt,
- wid_use, rt->rt_use);
- if (Wflag) {
- if (rt->rt_rmx.rmx_mtu != 0)
- printf("%*lu ", wid_mtu, rt->rt_rmx.rmx_mtu);
- else
- printf("%*s ", wid_mtu, "");
- }
- }
- if (rt->rt_ifp) {
- if (rt->rt_ifp != lastif) {
- if (kget(rt->rt_ifp, ifnet) == 0)
- strlcpy(prettyname, ifnet.if_xname,
- sizeof(prettyname));
- else
- strlcpy(prettyname, "---", sizeof(prettyname));
- lastif = rt->rt_ifp;
- }
- printf("%*.*s", wid_if, wid_if, prettyname);
- if (rt->rt_rmx.rmx_expire) {
- time_t expire_time;
-
- if ((expire_time =
- rt->rt_rmx.rmx_expire - uptime.tv_sec) > 0)
- printf(" %*d", wid_expire, (int)expire_time);
- }
- if (rt->rt_nodes[0].rn_dupedkey)
- printf(" =>");
- }
- putchar('\n');
-}
-
char *
-routename(in_addr_t in)
+routename(struct sockaddr *sa, int flags)
{
- char *cp;
- static char line[MAXHOSTNAMELEN];
- struct hostent *hp;
-
- cp = 0;
- if (!numeric_addr) {
- hp = gethostbyaddr(&in, sizeof (struct in_addr), AF_INET);
- if (hp) {
- cp = hp->h_name;
- trimdomain(cp, strlen(cp));
+ static char line[NI_MAXHOST];
+ int error, f;
+
+ f = (flags) ? NI_NUMERICHOST : 0;
+ error = getnameinfo(sa, sa->sa_len, line, sizeof(line),
+ NULL, 0, f);
+ if (error) {
+ const void *src;
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ src = &satosin(sa)->sin_addr;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ src = &satosin6(sa)->sin6_addr;
+ break;
+#endif /* INET6 */
+ default:
+ return(line);
}
+ inet_ntop(sa->sa_family, src, line, sizeof(line) - 1);
+ return (line);
}
- if (cp) {
- strlcpy(line, cp, sizeof(line));
- } else {
-#define C(x) ((x) & 0xff)
- in = ntohl(in);
- sprintf(line, "%u.%u.%u.%u",
- C(in >> 24), C(in >> 16), C(in >> 8), C(in));
- }
+ trimdomain(line, strlen(line));
+
return (line);
}
@@ -855,7 +616,7 @@ domask(char *dst, in_addr_t addr __unused, u_long mask)
{
int b, i;
- if (mask == 0 || (!numeric_addr && NSHIFT(mask) != 0)) {
+ if (mask == 0) {
*dst = '\0';
return;
}
@@ -880,30 +641,61 @@ domask(char *dst, in_addr_t addr __unused, u_long mask)
/*
* Return the name of the network whose address is given.
- * The address is assumed to be that of a net or subnet, not a host.
*/
-char *
-netname(in_addr_t in, u_long mask)
+const char *
+netname(struct sockaddr *sa, struct sockaddr *mask)
+{
+ switch (sa->sa_family) {
+ case AF_INET:
+ if (mask != NULL)
+ return (netname4(satosin(sa)->sin_addr.s_addr,
+ satosin(mask)->sin_addr.s_addr));
+ else
+ return (netname4(satosin(sa)->sin_addr.s_addr,
+ INADDR_ANY));
+ break;
+#ifdef INET6
+ case AF_INET6:
+ return (netname6(satosin6(sa), satosin6(mask)));
+#endif /* INET6 */
+ default:
+ return (NULL);
+ }
+}
+
+static const char *
+netname4(in_addr_t in, in_addr_t mask)
{
char *cp = 0;
- static char line[MAXHOSTNAMELEN];
+ static char line[MAXHOSTNAMELEN + sizeof("/xx")];
+ char nline[INET_ADDRSTRLEN];
struct netent *np = 0;
in_addr_t i;
+ if (in == INADDR_ANY && mask == 0) {
+ strlcpy(line, "default", sizeof(line));
+ return (line);
+ }
+
+ /* It is ok to supply host address. */
+ in &= mask;
+
i = ntohl(in);
if (!numeric_addr && i) {
- np = getnetbyaddr(i >> NSHIFT(mask), AF_INET);
+ np = getnetbyaddr(i >> NSHIFT(ntohl(mask)), AF_INET);
if (np != NULL) {
cp = np->n_name;
trimdomain(cp, strlen(cp));
}
}
- if (cp != NULL) {
+ if (cp != NULL)
strlcpy(line, cp, sizeof(line));
- } else {
- inet_ntop(AF_INET, &in, line, sizeof(line) - 1);
+ else {
+ inet_ntop(AF_INET, &in, nline, sizeof(nline));
+ strlcpy(line, nline, sizeof(line));
+ domask(line + strlen(line), i, ntohl(mask));
}
- domask(line + strlen(line), i, mask);
+
return (line);
}
@@ -921,58 +713,53 @@ in6_fillscopeid(struct sockaddr_in6 *sa6)
if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr) ||
IN6_IS_ADDR_MC_NODELOCAL(&sa6->sin6_addr) ||
IN6_IS_ADDR_MC_LINKLOCAL(&sa6->sin6_addr)) {
- /* XXX: override is ok? */
- sa6->sin6_scope_id =
- ntohs(*(u_int16_t *)&sa6->sin6_addr.s6_addr[2]);
+ if (sa6->sin6_scope_id == 0)
+ sa6->sin6_scope_id =
+ ntohs(*(u_int16_t *)&sa6->sin6_addr.s6_addr[2]);
sa6->sin6_addr.s6_addr[2] = sa6->sin6_addr.s6_addr[3] = 0;
}
#endif
}
-const char *
-netname6(struct sockaddr_in6 *sa6, struct in6_addr *mask)
+/* Mask to length table. To check an invalid value, (length + 1) is used. */
+static int masktolen[256] = {
+ [0xff] = 8 + 1,
+ [0xfe] = 7 + 1,
+ [0xfc] = 6 + 1,
+ [0xf8] = 5 + 1,
+ [0xf0] = 4 + 1,
+ [0xe0] = 3 + 1,
+ [0xc0] = 2 + 1,
+ [0x80] = 1 + 1,
+ [0x00] = 0 + 1,
+};
+
+static const char *
+netname6(struct sockaddr_in6 *sa6, struct sockaddr_in6 *mask)
{
- static char line[MAXHOSTNAMELEN];
- u_char *p = (u_char *)mask;
- u_char *lim;
- int masklen, illegal = 0, flag = 0;
+ static char line[NI_MAXHOST + sizeof("/xxx") - 1];
+ struct sockaddr_in6 addr;
+ char nline[NI_MAXHOST];
+ u_char *p, *lim;
+ int masklen, illegal = 0, i;
if (mask) {
+ p = (u_char *)&mask->sin6_addr;
for (masklen = 0, lim = p + 16; p < lim; p++) {
- switch (*p) {
- case 0xff:
- masklen += 8;
- break;
- case 0xfe:
- masklen += 7;
- break;
- case 0xfc:
- masklen += 6;
- break;
- case 0xf8:
- masklen += 5;
- break;
- case 0xf0:
- masklen += 4;
- break;
- case 0xe0:
- masklen += 3;
- break;
- case 0xc0:
- masklen += 2;
- break;
- case 0x80:
- masklen += 1;
- break;
- case 0x00:
- break;
- default:
- illegal ++;
- break;
- }
+ if (masktolen[*p] > 0)
+ /* -1 is required. */
+ masklen += masktolen[*p] - 1;
+ else
+ illegal++;
}
if (illegal)
- fprintf(stderr, "illegal prefixlen\n");
+ xo_error("illegal prefixlen\n");
+
+ memcpy(&addr, sa6, sizeof(addr));
+ for (i = 0; i < 16; ++i)
+ addr.sin6_addr.s6_addr[i] &=
+ mask->sin6_addr.s6_addr[i];
+ sa6 = &addr;
}
else
masklen = 128;
@@ -980,37 +767,17 @@ netname6(struct sockaddr_in6 *sa6, struct in6_addr *mask)
if (masklen == 0 && IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr))
return("default");
+ getnameinfo((struct sockaddr *)sa6, sa6->sin6_len, nline, sizeof(nline),
+ NULL, 0, NI_NUMERICHOST);
if (numeric_addr)
- flag |= NI_NUMERICHOST;
- getnameinfo((struct sockaddr *)sa6, sa6->sin6_len, line, sizeof(line),
- NULL, 0, flag);
-
- if (numeric_addr)
+ strlcpy(line, nline, sizeof(line));
+ else
+ getnameinfo((struct sockaddr *)sa6, sa6->sin6_len, line,
+ sizeof(line), NULL, 0, 0);
+ if (numeric_addr || strcmp(line, nline) == 0)
sprintf(&line[strlen(line)], "/%d", masklen);
- return line;
-}
-
-char *
-routename6(struct sockaddr_in6 *sa6)
-{
- static char line[MAXHOSTNAMELEN];
- int flag = 0;
- /* use local variable for safety */
- struct sockaddr_in6 sa6_local;
-
- sa6_local.sin6_family = AF_INET6;
- sa6_local.sin6_len = sizeof(sa6_local);
- sa6_local.sin6_addr = sa6->sin6_addr;
- sa6_local.sin6_scope_id = sa6->sin6_scope_id;
-
- if (numeric_addr)
- flag |= NI_NUMERICHOST;
-
- getnameinfo((struct sockaddr *)&sa6_local, sa6_local.sin6_len,
- line, sizeof(line), NULL, 0, flag);
-
- return line;
+ return (line);
}
#endif /*INET6*/
@@ -1018,142 +785,41 @@ routename6(struct sockaddr_in6 *sa6)
* Print routing statistics
*/
void
-rt_stats(u_long rtsaddr, u_long rttaddr)
+rt_stats(void)
{
struct rtstat rtstat;
+ u_long rtsaddr, rttaddr;
int rttrash;
- if (rtsaddr == 0) {
- printf("rtstat: symbol not in namelist\n");
+ if ((rtsaddr = nl[N_RTSTAT].n_value) == 0) {
+ xo_emit("{W:rtstat: symbol not in namelist}\n");
return;
}
- if (rttaddr == 0) {
- printf("rttrash: symbol not in namelist\n");
+ if ((rttaddr = nl[N_RTTRASH].n_value) == 0) {
+ xo_emit("{W:rttrash: symbol not in namelist}\n");
return;
}
kread(rtsaddr, (char *)&rtstat, sizeof (rtstat));
kread(rttaddr, (char *)&rttrash, sizeof (rttrash));
- printf("routing:\n");
+ xo_emit("{T:routing}:\n");
#define p(f, m) if (rtstat.f || sflag <= 1) \
- printf(m, rtstat.f, plural(rtstat.f))
-
- p(rts_badredirect, "\t%hu bad routing redirect%s\n");
- p(rts_dynamic, "\t%hu dynamically created route%s\n");
- p(rts_newgateway, "\t%hu new gateway%s due to redirects\n");
- p(rts_unreach, "\t%hu destination%s found unreachable\n");
- p(rts_wildcard, "\t%hu use%s of a wildcard route\n");
+ xo_emit(m, rtstat.f, plural(rtstat.f))
+
+ p(rts_badredirect, "\t{:bad-redirects/%hu} "
+ "{N:/bad routing redirect%s}\n");
+ p(rts_dynamic, "\t{:dynamically-created/%hu} "
+ "{N:/dynamically created route%s}\n");
+ p(rts_newgateway, "\t{:new-gateways/%hu} "
+ "{N:/new gateway%s due to redirects}\n");
+ p(rts_unreach, "\t{:unreachable-destination/%hu} "
+ "{N:/destination%s found unreachable}\n");
+ p(rts_wildcard, "\t{:wildcard-uses/%hu} "
+ "{N:/use%s of a wildcard route}\n");
#undef p
if (rttrash || sflag <= 1)
- printf("\t%u route%s not in table but not freed\n",
+ xo_emit("\t{:unused-but-not-freed/%u} "
+ "{N:/route%s not in table but not freed}\n",
rttrash, plural(rttrash));
}
-
-#ifndef __rtems__
-char *
-ipx_print(struct sockaddr *sa)
-{
- u_short port;
- struct servent *sp = 0;
- const char *net = "", *host = "";
- char *p;
- u_char *q;
- struct ipx_addr work = ((struct sockaddr_ipx *)sa)->sipx_addr;
- static char mybuf[50];
- char cport[10], chost[15], cnet[15];
-
- port = ntohs(work.x_port);
-
- if (ipx_nullnet(work) && ipx_nullhost(work)) {
-
- if (port) {
- if (sp)
- sprintf(mybuf, "*.%s", sp->s_name);
- else
- sprintf(mybuf, "*.%x", port);
- } else
- sprintf(mybuf, "*.*");
-
- return (mybuf);
- }
-
- if (ipx_wildnet(work))
- net = "any";
- else if (ipx_nullnet(work))
- net = "*";
- else {
- q = work.x_net.c_net;
- sprintf(cnet, "%02x%02x%02x%02x",
- q[0], q[1], q[2], q[3]);
- for (p = cnet; *p == '0' && p < cnet + 8; p++)
- continue;
- net = p;
- }
-
- if (ipx_wildhost(work))
- host = "any";
- else if (ipx_nullhost(work))
- host = "*";
- else {
- q = work.x_host.c_host;
- sprintf(chost, "%02x%02x%02x%02x%02x%02x",
- q[0], q[1], q[2], q[3], q[4], q[5]);
- for (p = chost; *p == '0' && p < chost + 12; p++)
- continue;
- host = p;
- }
-
- if (port) {
- if (strcmp(host, "*") == 0)
- host = "";
- if (sp)
- snprintf(cport, sizeof(cport),
- "%s%s", *host ? "." : "", sp->s_name);
- else
- snprintf(cport, sizeof(cport),
- "%s%x", *host ? "." : "", port);
- } else
- *cport = 0;
-
- snprintf(mybuf, sizeof(mybuf), "%s.%s%s", net, host, cport);
- return(mybuf);
-}
-
-char *
-ipx_phost(struct sockaddr *sa)
-{
- struct sockaddr_ipx *sipx = (struct sockaddr_ipx *)sa;
- struct sockaddr_ipx work;
- static union ipx_net ipx_zeronet;
- char *p;
-
- work = *sipx;
-
- work.sipx_addr.x_port = 0;
- work.sipx_addr.x_net = ipx_zeronet;
- p = ipx_print((struct sockaddr *)&work);
- if (strncmp("*.", p, 2) == 0) p += 2;
-
- return(p);
-}
-#endif /* __rtems__ */
-
-void
-upHex(char *p0)
-{
- char *p = p0;
-
- for (; *p; p++)
- switch (*p) {
-
- case 'a':
- case 'b':
- case 'c':
- case 'd':
- case 'e':
- case 'f':
- *p += ('A' - 'a');
- break;
- }
-}
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-bpf-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-bpf-data.h
new file mode 100644
index 00000000..addc5819
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-bpf-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* bpf.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-data.h
new file mode 100644
index 00000000..ad92820c
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-data.h
@@ -0,0 +1,41 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+/* bpf.c */
+/* flowtable.c */
+/* if.c */
+/* inet6.c */
+/* inet.c */
+/* ipsec.c */
+/* main.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int Aflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int aflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int bflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int dflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int gflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int hflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int iflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int Lflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int mflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int noutputs);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int numeric_addr);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int numeric_port);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int rflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int Rflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int sflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int Wflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int Tflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int xflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int zflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int interval);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern char *interface);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int unit);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern int live);
+/* mbuf.c */
+/* mroute6.c */
+/* mroute.c */
+/* nl_symbols.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, extern struct nlist nl[53]);
+/* pfkey.c */
+/* route.c */
+/* sctp.c */
+/* unix.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-flowtable-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-flowtable-data.h
new file mode 100644
index 00000000..47f5ff7b
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-flowtable-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* flowtable.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-if-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-if-data.h
new file mode 100644
index 00000000..9e12b65e
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-if-data.h
@@ -0,0 +1,5 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* if.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static sig_atomic_t signalled);
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-inet-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-inet-data.h
new file mode 100644
index 00000000..668af81a
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-inet-data.h
@@ -0,0 +1,8 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* inet.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int udp_done);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int tcp_done);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int sdp_done);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int protopr_first);
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-inet6-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-inet6-data.h
new file mode 100644
index 00000000..2cf306a2
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-inet6-data.h
@@ -0,0 +1,8 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* inet6.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static char ntop_buf[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static char const *ip6nh[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static char const *srcrule_str[]);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static char const *icmp6names[]);
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-ipsec-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-ipsec-data.h
new file mode 100644
index 00000000..773181e5
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-ipsec-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* ipsec.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-main-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-main-data.h
new file mode 100644
index 00000000..8adef4dd
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-main-data.h
@@ -0,0 +1,10 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* main.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static kvm_t *kvmd);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static char *nlistf);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static char *memf);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int Bflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int pflag);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int af);
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-mbuf-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-mbuf-data.h
new file mode 100644
index 00000000..debb7485
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-mbuf-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* mbuf.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute-data.h
new file mode 100644
index 00000000..06942d57
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* mroute.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute6-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute6-data.h
new file mode 100644
index 00000000..91288a71
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-mroute6-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* mroute6.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-namespace.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-namespace.h
new file mode 100644
index 00000000..dbbd2c90
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-namespace.h
@@ -0,0 +1,83 @@
+/* generated by userspace-header-gen.py */
+/* bpf.c */
+#define bpf_stats _bsd_netstat_bpf_stats
+/* flowtable.c */
+#define flowtable_stats _bsd_netstat_flowtable_stats
+/* if.c */
+#define intpr _bsd_netstat_intpr
+/* inet6.c */
+#define inet6name _bsd_netstat_inet6name
+#define inet6print _bsd_netstat_inet6print
+#define rip6_stats _bsd_netstat_rip6_stats
+#define pim6_stats _bsd_netstat_pim6_stats
+#define icmp6_ifstats _bsd_netstat_icmp6_ifstats
+#define icmp6_stats _bsd_netstat_icmp6_stats
+#define ip6_ifstats _bsd_netstat_ip6_ifstats
+#define ip6_stats _bsd_netstat_ip6_stats
+/* inet.c */
+#define inetname _bsd_netstat_inetname
+#define inetprint _bsd_netstat_inetprint
+#define pim_stats _bsd_netstat_pim_stats
+#define igmp_stats _bsd_netstat_igmp_stats
+#define icmp_stats _bsd_netstat_icmp_stats
+#define arp_stats _bsd_netstat_arp_stats
+#define ip_stats _bsd_netstat_ip_stats
+#define carp_stats _bsd_netstat_carp_stats
+#define udp_stats _bsd_netstat_udp_stats
+#define tcp_stats _bsd_netstat_tcp_stats
+#define protopr _bsd_netstat_protopr
+#define sotoxsocket _bsd_netstat_sotoxsocket
+/* ipsec.c */
+/* main.c */
+#define Aflag _bsd_netstat_Aflag
+#define aflag _bsd_netstat_aflag
+#define bflag _bsd_netstat_bflag
+#define dflag _bsd_netstat_dflag
+#define gflag _bsd_netstat_gflag
+#define hflag _bsd_netstat_hflag
+#define iflag _bsd_netstat_iflag
+#define Lflag _bsd_netstat_Lflag
+#define mflag _bsd_netstat_mflag
+#define noutputs _bsd_netstat_noutputs
+#define numeric_addr _bsd_netstat_numeric_addr
+#define numeric_port _bsd_netstat_numeric_port
+#define rflag _bsd_netstat_rflag
+#define Rflag _bsd_netstat_Rflag
+#define sflag _bsd_netstat_sflag
+#define Wflag _bsd_netstat_Wflag
+#define Tflag _bsd_netstat_Tflag
+#define xflag _bsd_netstat_xflag
+#define zflag _bsd_netstat_zflag
+#define interval _bsd_netstat_interval
+#define interface _bsd_netstat_interface
+#define unit _bsd_netstat_unit
+#define live _bsd_netstat_live
+#define pluralies _bsd_netstat_pluralies
+#define plurales _bsd_netstat_plurales
+#define plural _bsd_netstat_plural
+#define kread_counters _bsd_netstat_kread_counters
+#define kread_counter _bsd_netstat_kread_counter
+#define kread _bsd_netstat_kread
+#define kset_dpcpu _bsd_netstat_kset_dpcpu
+#define fetch_stats_ro _bsd_netstat_fetch_stats_ro
+#define fetch_stats _bsd_netstat_fetch_stats
+/* mbuf.c */
+#define mbpr _bsd_netstat_mbpr
+/* mroute6.c */
+#define mrt6_stats _bsd_netstat_mrt6_stats
+#define mroute6pr _bsd_netstat_mroute6pr
+/* mroute.c */
+#define mrt_stats _bsd_netstat_mrt_stats
+#define mroutepr _bsd_netstat_mroutepr
+/* nl_symbols.c */
+#define nl _bsd_netstat_nl
+/* pfkey.c */
+/* route.c */
+#define rt_stats _bsd_netstat_rt_stats
+#define in6_fillscopeid _bsd_netstat_in6_fillscopeid
+#define netname _bsd_netstat_netname
+#define routename _bsd_netstat_routename
+#define pr_family _bsd_netstat_pr_family
+#define routepr _bsd_netstat_routepr
+/* sctp.c */
+/* unix.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-nl_symbols-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-nl_symbols-data.h
new file mode 100644
index 00000000..b1e466dc
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-nl_symbols-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* nl_symbols.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-pfkey-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-pfkey-data.h
new file mode 100644
index 00000000..e1505d6d
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-pfkey-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* pfkey.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-route-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-route-data.h
new file mode 100644
index 00000000..d701a079
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-route-data.h
@@ -0,0 +1,15 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* route.c */
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static struct ifmap_entry *ifmap);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int ifmap_size);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static struct timespec uptime);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int wid_dst);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int wid_gw);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int wid_flags);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int wid_pksent);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int wid_mtu);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int wid_if);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int wid_expire);
+RTEMS_LINKER_RWSET_CONTENT(bsd_prog_netstat, static int masktolen[]);
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-sctp-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-sctp-data.h
new file mode 100644
index 00000000..9c086d3a
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-sctp-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* sctp.c */
diff --git a/freebsd/usr.bin/netstat/rtems-bsd-netstat-unix-data.h b/freebsd/usr.bin/netstat/rtems-bsd-netstat-unix-data.h
new file mode 100644
index 00000000..3edf8adb
--- /dev/null
+++ b/freebsd/usr.bin/netstat/rtems-bsd-netstat-unix-data.h
@@ -0,0 +1,4 @@
+/* generated by userspace-header-gen.py */
+#include <rtems/linkersets.h>
+#include "rtems-bsd-netstat-data.h"
+/* unix.c */
diff --git a/freebsd/usr.bin/netstat/sctp.c b/freebsd/usr.bin/netstat/sctp.c
index 4f27b2ca..88998c2f 100644
--- a/freebsd/usr.bin/netstat/sctp.c
+++ b/freebsd/usr.bin/netstat/sctp.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 2001-2007, by Weongyo Jeong. All rights reserved.
* Copyright (c) 2011, by Michael Tuexen. All rights reserved.
@@ -37,6 +41,9 @@ static char sccsid[] = "@(#)sctp.c 0.1 (Berkeley) 4/18/2007";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -60,9 +67,14 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include "netstat.h"
+#include <libxo/xo.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-sctp-data.h"
+#endif /* __rtems__ */
#ifdef SCTP
@@ -79,7 +91,7 @@ static void sctp_statesprint(uint32_t state);
#define NETSTAT_SCTP_STATES_SHUTDOWN_ACK_SENT 0x8
#define NETSTAT_SCTP_STATES_SHUTDOWN_PENDING 0x9
-char *sctpstates[] = {
+static const char *sctpstates[] = {
"CLOSED",
"BOUND",
"LISTEN",
@@ -92,114 +104,39 @@ char *sctpstates[] = {
"SHUTDOWN_PENDING"
};
-LIST_HEAD(xladdr_list, xladdr_entry) xladdr_head;
+static LIST_HEAD(xladdr_list, xladdr_entry) xladdr_head;
struct xladdr_entry {
struct xsctp_laddr *xladdr;
LIST_ENTRY(xladdr_entry) xladdr_entries;
};
-LIST_HEAD(xraddr_list, xraddr_entry) xraddr_head;
+static LIST_HEAD(xraddr_list, xraddr_entry) xraddr_head;
struct xraddr_entry {
- struct xsctp_raddr *xraddr;
- LIST_ENTRY(xraddr_entry) xraddr_entries;
+ struct xsctp_raddr *xraddr;
+ LIST_ENTRY(xraddr_entry) xraddr_entries;
};
-/*
- * Construct an Internet address representation.
- * If numeric_addr has been supplied, give
- * numeric value, otherwise try for symbolic name.
- */
#ifdef INET
-static char *
-inetname(struct in_addr *inp)
-{
- char *cp;
- static char line[MAXHOSTNAMELEN];
- struct hostent *hp;
- struct netent *np;
-
- cp = 0;
- if (!numeric_addr && inp->s_addr != INADDR_ANY) {
- int net = inet_netof(*inp);
- int lna = inet_lnaof(*inp);
-
- if (lna == INADDR_ANY) {
- np = getnetbyaddr(net, AF_INET);
- if (np)
- cp = np->n_name;
- }
- if (cp == 0) {
- hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET);
- if (hp) {
- cp = hp->h_name;
- trimdomain(cp, strlen(cp));
- }
- }
- }
- if (inp->s_addr == INADDR_ANY)
- strcpy(line, "*");
- else if (cp) {
- strlcpy(line, cp, sizeof(line));
- } else {
- inp->s_addr = ntohl(inp->s_addr);
-#define C(x) ((u_int)((x) & 0xff))
- sprintf(line, "%u.%u.%u.%u", C(inp->s_addr >> 24),
- C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr));
- inp->s_addr = htonl(inp->s_addr);
- }
- return (line);
-}
+char *
+inetname(struct in_addr *inp);
#endif
#ifdef INET6
-static char ntop_buf[INET6_ADDRSTRLEN];
-
-static char *
-inet6name(struct in6_addr *in6p)
-{
- char *cp;
- static char line[50];
- struct hostent *hp;
- static char domain[MAXHOSTNAMELEN];
- static int first = 1;
-
- if (first && !numeric_addr) {
- first = 0;
- if (gethostname(domain, MAXHOSTNAMELEN) == 0 &&
- (cp = index(domain, '.')))
- (void) strcpy(domain, cp + 1);
- else
- domain[0] = 0;
- }
- cp = 0;
- if (!numeric_addr && !IN6_IS_ADDR_UNSPECIFIED(in6p)) {
- hp = gethostbyaddr((char *)in6p, sizeof(*in6p), AF_INET6);
- if (hp) {
- if ((cp = index(hp->h_name, '.')) &&
- !strcmp(cp + 1, domain))
- *cp = 0;
- cp = hp->h_name;
- }
- }
- if (IN6_IS_ADDR_UNSPECIFIED(in6p))
- strcpy(line, "*");
- else if (cp)
- strcpy(line, cp);
- else
- sprintf(line, "%s",
- inet_ntop(AF_INET6, (void *)in6p, ntop_buf,
- sizeof(ntop_buf)));
- return (line);
-}
+char *
+inet6name(struct in6_addr *in6p);
#endif
static void
-sctp_print_address(union sctp_sockstore *address, int port, int num_port)
+sctp_print_address(const char *container, union sctp_sockstore *address,
+ int port, int num_port)
{
struct servent *sp = 0;
char line[80], *cp;
int width;
+ if (container)
+ xo_open_container(container);
+
switch (address->sa.sa_family) {
#ifdef INET
case AF_INET:
@@ -215,7 +152,7 @@ sctp_print_address(union sctp_sockstore *address, int port, int num_port)
sprintf(line, "%.*s.", Wflag ? 39 : 16, "");
break;
}
- cp = index(line, '\0');
+ cp = strchr(line, '\0');
if (!num_port && port)
sp = getservbyport((int)port, "sctp");
if (sp || port == 0)
@@ -223,7 +160,14 @@ sctp_print_address(union sctp_sockstore *address, int port, int num_port)
else
sprintf(cp, "%d ", ntohs((u_short)port));
width = Wflag ? 45 : 22;
- printf("%-*.*s ", width, width, line);
+ xo_emit("{d:target/%-*.*s} ", width, width, line);
+
+ int alen = cp - line - 1, plen = strlen(cp) - 1;
+ xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen,
+ plen, cp);
+
+ if (container)
+ xo_close_container(container);
}
static int
@@ -299,7 +243,7 @@ sctp_process_tcb(struct xsctp_tcb *xstcb,
prev_xl = xl;
xl = malloc(sizeof(struct xladdr_entry));
if (xl == NULL) {
- warnx("malloc %lu bytes",
+ xo_warnx("malloc %lu bytes",
(u_long)sizeof(struct xladdr_entry));
goto out;
}
@@ -320,7 +264,7 @@ sctp_process_tcb(struct xsctp_tcb *xstcb,
prev_xr = xr;
xr = malloc(sizeof(struct xraddr_entry));
if (xr == NULL) {
- warnx("malloc %lu bytes",
+ xo_warnx("malloc %lu bytes",
(u_long)sizeof(struct xraddr_entry));
goto out;
}
@@ -335,26 +279,29 @@ sctp_process_tcb(struct xsctp_tcb *xstcb,
/*
* Let's print the address infos.
*/
+ xo_open_list("address");
xl = LIST_FIRST(&xladdr_head);
xr = LIST_FIRST(&xraddr_head);
- x_max = (xl_total > xr_total) ? xl_total : xr_total;
+ x_max = MAX(xl_total, xr_total);
for (i = 0; i < x_max; i++) {
+ xo_open_instance("address");
+
if (((*indent == 0) && i > 0) || *indent > 0)
- printf("%-12s ", " ");
+ xo_emit("{P:/%-12s} ", " ");
if (xl != NULL) {
- sctp_print_address(&(xl->xladdr->address),
+ sctp_print_address("local", &(xl->xladdr->address),
htons(xstcb->local_port), numeric_port);
} else {
if (Wflag) {
- printf("%-45s ", " ");
+ xo_emit("{P:/%-45s} ", " ");
} else {
- printf("%-22s ", " ");
+ xo_emit("{P:/%-22s} ", " ");
}
}
if (xr != NULL && !Lflag) {
- sctp_print_address(&(xr->xraddr->address),
+ sctp_print_address("remote", &(xr->xraddr->address),
htons(xstcb->remote_port), numeric_port);
}
@@ -367,7 +314,8 @@ sctp_process_tcb(struct xsctp_tcb *xstcb,
sctp_statesprint(xstcb->state);
if (i < x_max)
- putchar('\n');
+ xo_emit("\n");
+ xo_close_instance("address");
}
out:
@@ -395,7 +343,7 @@ sctp_process_inpcb(struct xsctp_inpcb *xinpcb,
{
int indent = 0, xladdr_total = 0, is_listening = 0;
static int first = 1;
- char *tname, *pname;
+ const char *tname, *pname;
struct xsctp_tcb *xstcb;
struct xsctp_laddr *xladdr;
size_t offset_laddr;
@@ -406,30 +354,34 @@ sctp_process_inpcb(struct xsctp_inpcb *xinpcb,
if (first) {
if (!Lflag) {
- printf("Active SCTP associations");
+ xo_emit("Active SCTP associations");
if (aflag)
- printf(" (including servers)");
+ xo_emit(" (including servers)");
} else
- printf("Current listen queue sizes (qlen/maxqlen)");
- putchar('\n');
+ xo_emit("Current listen queue sizes (qlen/maxqlen)");
+ xo_emit("\n");
if (Lflag)
- printf("%-6.6s %-5.5s %-8.8s %-22.22s\n",
+ xo_emit("{T:/%-6.6s} {T:/%-5.5s} {T:/%-8.8s} "
+ "{T:/%-22.22s}\n",
"Proto", "Type", "Listen", "Local Address");
else
if (Wflag)
- printf("%-6.6s %-5.5s %-45.45s %-45.45s %s\n",
+ xo_emit("{T:/%-6.6s} {T:/%-5.5s} {T:/%-45.45s} "
+ "{T:/%-45.45s} {T:/%s}\n",
"Proto", "Type",
"Local Address", "Foreign Address",
"(state)");
else
- printf("%-6.6s %-5.5s %-22.22s %-22.22s %s\n",
+ xo_emit("{T:/%-6.6s} {T:/%-5.5s} {T:/%-22.22s} "
+ "{T:/%-22.22s} {T:/%s}\n",
"Proto", "Type",
"Local Address", "Foreign Address",
"(state)");
first = 0;
}
xladdr = (struct xsctp_laddr *)(buf + *offset);
- if (Lflag && !is_listening) {
+ if ((!aflag && is_listening) ||
+ (Lflag && !is_listening)) {
sctp_skip_xinpcb_ifneed(buf, buflen, offset);
return;
}
@@ -449,30 +401,42 @@ sctp_process_inpcb(struct xsctp_inpcb *xinpcb,
tname = "????";
if (Lflag) {
- char buf1[9];
-
- snprintf(buf1, 9, "%hu/%hu", xinpcb->qlen, xinpcb->maxqlen);
- printf("%-6.6s %-5.5s ", pname, tname);
- printf("%-8.8s ", buf1);
+ char buf1[22];
+
+ snprintf(buf1, sizeof buf1, "%u/%u",
+ xinpcb->qlen, xinpcb->maxqlen);
+ xo_emit("{:protocol/%-6.6s/%s} {:type/%-5.5s/%s} ",
+ pname, tname);
+ xo_emit("{d:queues/%-8.8s}{e:queue-len/%hu}"
+ "{e:max-queue-len/%hu} ",
+ buf1, xinpcb->qlen, xinpcb->maxqlen);
}
offset_laddr = *offset;
process_closed = 0;
+
+ xo_open_list("local-address");
retry:
while (*offset < buflen) {
xladdr = (struct xsctp_laddr *)(buf + *offset);
*offset += sizeof(struct xsctp_laddr);
if (xladdr->last) {
if (aflag && !Lflag && (xladdr_total == 0) && process_closed) {
- printf("%-6.6s %-5.5s ", pname, tname);
+ xo_open_instance("local-address");
+
+ xo_emit("{:protocol/%-6.6s/%s} "
+ "{:type/%-5.5s/%s} ", pname, tname);
if (Wflag) {
- printf("%-91.91s CLOSED", " ");
+ xo_emit("{P:/%-91.91s/%s} "
+ "{:state/CLOSED}", " ");
} else {
- printf("%-45.45s CLOSED", " ");
+ xo_emit("{P:/%-45.45s/%s} "
+ "{:state/CLOSED}", " ");
}
+ xo_close_instance("local-address");
}
if (process_closed || is_listening) {
- putchar('\n');
+ xo_emit("\n");
}
break;
}
@@ -480,31 +444,41 @@ retry:
if (!Lflag && !is_listening && !process_closed)
continue;
+ xo_open_instance("local-address");
+
if (xladdr_total == 0) {
- printf("%-6.6s %-5.5s ", pname, tname);
+ if (!Lflag) {
+ xo_emit("{:protocol/%-6.6s/%s} "
+ "{:type/%-5.5s/%s} ", pname, tname);
+ }
} else {
- putchar('\n');
- printf((Lflag) ?
- "%-21.21s " : "%-12.12s ", " ");
+ xo_emit("\n");
+ xo_emit(Lflag ? "{P:/%-21.21s} " : "{P:/%-12.12s} ",
+ " ");
}
- sctp_print_address(&(xladdr->address),
+ sctp_print_address("local", &(xladdr->address),
htons(xinpcb->local_port), numeric_port);
if (aflag && !Lflag && xladdr_total == 0) {
if (Wflag) {
if (process_closed) {
- printf("%-45.45s CLOSED", " ");
+ xo_emit("{P:/%-45.45s} "
+ "{:state/CLOSED}", " ");
} else {
- printf("%-45.45s LISTEN", " ");
+ xo_emit("{P:/%-45.45s} "
+ "{:state/LISTEN}", " ");
}
} else {
if (process_closed) {
- printf("%-22.22s CLOSED", " ");
+ xo_emit("{P:/%-22.22s} "
+ "{:state/CLOSED}", " ");
} else {
- printf("%-22.22s LISTEN", " ");
+ xo_emit("{P:/%-22.22s} "
+ "{:state/LISTEN}", " ");
}
}
}
xladdr_total++;
+ xo_close_instance("local-address");
}
xstcb = (struct xsctp_tcb *)(buf + *offset);
@@ -515,12 +489,15 @@ retry:
goto retry;
}
while (xstcb->last == 0 && *offset < buflen) {
- printf("%-6.6s %-5.5s ", pname, tname);
+ xo_emit("{:protocol/%-6.6s/%s} {:type/%-5.5s/%s} ",
+ pname, tname);
sctp_process_tcb(xstcb, buf, buflen, offset, &indent);
indent++;
xstcb = (struct xsctp_tcb *)(buf + *offset);
*offset += sizeof(struct xsctp_tcb);
}
+
+ xo_close_list("local-address");
}
/*
@@ -529,7 +506,7 @@ retry:
*/
void
sctp_protopr(u_long off __unused,
- const char *name, int af1, int proto)
+ const char *name __unused, int af1 __unused, int proto)
{
char *buf;
const char *mibvar = "net.inet.sctp.assoclist";
@@ -542,15 +519,15 @@ sctp_protopr(u_long off __unused,
if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) {
if (errno != ENOENT)
- warn("sysctl: %s", mibvar);
+ xo_warn("sysctl: %s", mibvar);
return;
}
- if ((buf = malloc(len)) == 0) {
- warnx("malloc %lu bytes", (u_long)len);
+ if ((buf = malloc(len)) == NULL) {
+ xo_warnx("malloc %lu bytes", (u_long)len);
return;
}
if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) {
- warn("sysctl: %s", mibvar);
+ xo_warn("sysctl: %s", mibvar);
free(buf);
return;
}
@@ -574,33 +551,42 @@ sctp_statesprint(uint32_t state)
int idx;
switch (state) {
- case SCTP_STATE_COOKIE_WAIT:
+ case SCTP_CLOSED:
+ idx = NETSTAT_SCTP_STATES_CLOSED;
+ break;
+ case SCTP_BOUND:
+ idx = NETSTAT_SCTP_STATES_BOUND;
+ break;
+ case SCTP_LISTEN:
+ idx = NETSTAT_SCTP_STATES_LISTEN;
+ break;
+ case SCTP_COOKIE_WAIT:
idx = NETSTAT_SCTP_STATES_COOKIE_WAIT;
break;
- case SCTP_STATE_COOKIE_ECHOED:
+ case SCTP_COOKIE_ECHOED:
idx = NETSTAT_SCTP_STATES_COOKIE_ECHOED;
break;
- case SCTP_STATE_OPEN:
+ case SCTP_ESTABLISHED:
idx = NETSTAT_SCTP_STATES_ESTABLISHED;
break;
- case SCTP_STATE_SHUTDOWN_SENT:
+ case SCTP_SHUTDOWN_SENT:
idx = NETSTAT_SCTP_STATES_SHUTDOWN_SENT;
break;
- case SCTP_STATE_SHUTDOWN_RECEIVED:
+ case SCTP_SHUTDOWN_RECEIVED:
idx = NETSTAT_SCTP_STATES_SHUTDOWN_RECEIVED;
break;
- case SCTP_STATE_SHUTDOWN_ACK_SENT:
+ case SCTP_SHUTDOWN_ACK_SENT:
idx = NETSTAT_SCTP_STATES_SHUTDOWN_ACK_SENT;
break;
- case SCTP_STATE_SHUTDOWN_PENDING:
+ case SCTP_SHUTDOWN_PENDING:
idx = NETSTAT_SCTP_STATES_SHUTDOWN_PENDING;
break;
default:
- printf("UNKNOWN 0x%08x", state);
+ xo_emit("UNKNOWN {:state/0x%08x}", state);
return;
}
- printf("%s", sctpstates[idx]);
+ xo_emit("{:state/%s}", sctpstates[idx]);
}
/*
@@ -609,105 +595,160 @@ sctp_statesprint(uint32_t state)
void
sctp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
{
- struct sctpstat sctpstat, zerostat;
- size_t len = sizeof(sctpstat);
-
- if (live) {
- if (zflag)
- memset(&zerostat, 0, len);
- if (sysctlbyname("net.inet.sctp.stats", &sctpstat, &len,
- zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
- if (errno != ENOENT)
- warn("sysctl: net.inet.sctp.stats");
- return;
- }
- } else
- kread(off, &sctpstat, len);
+ struct sctpstat sctpstat;
+
+ if (fetch_stats("net.inet.sctp.stats", off, &sctpstat,
+ sizeof(sctpstat), kread) != 0)
+ return;
- printf ("%s:\n", name);
+ xo_open_container(name);
+ xo_emit("{T:/%s}:\n", name);
#define p(f, m) if (sctpstat.f || sflag <= 1) \
- printf(m, (uintmax_t)sctpstat.f, plural(sctpstat.f))
+ xo_emit(m, (uintmax_t)sctpstat.f, plural(sctpstat.f))
#define p1a(f, m) if (sctpstat.f || sflag <= 1) \
- printf(m, (uintmax_t)sctpstat.f)
+ xo_emit(m, (uintmax_t)sctpstat.f)
/*
* input statistics
*/
- p(sctps_recvpackets, "\t%ju input packet%s\n");
- p(sctps_recvdatagrams, "\t\t%ju datagram%s\n");
- p(sctps_recvpktwithdata, "\t\t%ju packet%s that had data\n");
- p(sctps_recvsacks, "\t\t%ju input SACK chunk%s\n");
- p(sctps_recvdata, "\t\t%ju input DATA chunk%s\n");
- p(sctps_recvdupdata, "\t\t%ju duplicate DATA chunk%s\n");
- p(sctps_recvheartbeat, "\t\t%ju input HB chunk%s\n");
- p(sctps_recvheartbeatack, "\t\t%ju HB-ACK chunk%s\n");
- p(sctps_recvecne, "\t\t%ju input ECNE chunk%s\n");
- p(sctps_recvauth, "\t\t%ju input AUTH chunk%s\n");
- p(sctps_recvauthmissing, "\t\t%ju chunk%s missing AUTH\n");
- p(sctps_recvivalhmacid, "\t\t%ju invalid HMAC id%s received\n");
- p(sctps_recvivalkeyid, "\t\t%ju invalid secret id%s received\n");
- p1a(sctps_recvauthfailed, "\t\t%ju auth failed\n");
- p1a(sctps_recvexpress, "\t\t%ju fast path receives all one chunk\n");
- p1a(sctps_recvexpressm, "\t\t%ju fast path multi-part data\n");
+ p(sctps_recvpackets, "\t{:received-packets/%ju} "
+ "{N:/input packet%s}\n");
+ p(sctps_recvdatagrams, "\t\t{:received-datagrams/%ju} "
+ "{N:/datagram%s}\n");
+ p(sctps_recvpktwithdata, "\t\t{:received-with-data/%ju} "
+ "{N:/packet%s that had data}\n");
+ p(sctps_recvsacks, "\t\t{:received-sack-chunks/%ju} "
+ "{N:/input SACK chunk%s}\n");
+ p(sctps_recvdata, "\t\t{:received-data-chunks/%ju} "
+ "{N:/input DATA chunk%s}\n");
+ p(sctps_recvdupdata, "\t\t{:received-duplicate-data-chunks/%ju} "
+ "{N:/duplicate DATA chunk%s}\n");
+ p(sctps_recvheartbeat, "\t\t{:received-hb-chunks/%ju} "
+ "{N:/input HB chunk%s}\n");
+ p(sctps_recvheartbeatack, "\t\t{:received-hb-ack-chunks/%ju} "
+ "{N:/HB-ACK chunk%s}\n");
+ p(sctps_recvecne, "\t\t{:received-ecne-chunks/%ju} "
+ "{N:/input ECNE chunk%s}\n");
+ p(sctps_recvauth, "\t\t{:received-auth-chunks/%ju} "
+ "{N:/input AUTH chunk%s}\n");
+ p(sctps_recvauthmissing, "\t\t{:dropped-missing-auth/%ju} "
+ "{N:/chunk%s missing AUTH}\n");
+ p(sctps_recvivalhmacid, "\t\t{:dropped-invalid-hmac/%ju} "
+ "{N:/invalid HMAC id%s received}\n");
+ p(sctps_recvivalkeyid, "\t\t{:dropped-invalid-secret/%ju} "
+ "{N:/invalid secret id%s received}\n");
+ p1a(sctps_recvauthfailed, "\t\t{:dropped-auth-failed/%ju} "
+ "{N:/auth failed}\n");
+ p1a(sctps_recvexpress, "\t\t{:received-fast-path/%ju} "
+ "{N:/fast path receives all one chunk}\n");
+ p1a(sctps_recvexpressm, "\t\t{:receives-fast-path-multipart/%ju} "
+ "{N:/fast path multi-part data}\n");
/*
* output statistics
*/
- p(sctps_sendpackets, "\t%ju output packet%s\n");
- p(sctps_sendsacks, "\t\t%ju output SACK%s\n");
- p(sctps_senddata, "\t\t%ju output DATA chunk%s\n");
- p(sctps_sendretransdata, "\t\t%ju retransmitted DATA chunk%s\n");
- p(sctps_sendfastretrans, "\t\t%ju fast retransmitted DATA chunk%s\n");
- p(sctps_sendmultfastretrans, "\t\t%ju FR'%s that happened more "
- "than once to same chunk\n");
- p(sctps_sendheartbeat, "\t\t%ju output HB chunk%s\n");
- p(sctps_sendecne, "\t\t%ju output ECNE chunk%s\n");
- p(sctps_sendauth, "\t\t%ju output AUTH chunk%s\n");
- p1a(sctps_senderrors, "\t\t%ju ip_output error counter\n");
+ p(sctps_sendpackets, "\t{:sent-packets/%ju} "
+ "{N:/output packet%s}\n");
+ p(sctps_sendsacks, "\t\t{:sent-sacks/%ju} "
+ "{N:/output SACK%s}\n");
+ p(sctps_senddata, "\t\t{:sent-data-chunks/%ju} "
+ "{N:/output DATA chunk%s}\n");
+ p(sctps_sendretransdata, "\t\t{:sent-retransmitted-data-chunks/%ju} "
+ "{N:/retransmitted DATA chunk%s}\n");
+ p(sctps_sendfastretrans, "\t\t"
+ "{:sent-fast-retransmitted-data-chunks/%ju} "
+ "{N:/fast retransmitted DATA chunk%s}\n");
+ p(sctps_sendmultfastretrans, "\t\t"
+ "{:sent-fast-retransmitted-data-chunk-multiple-times/%ju} "
+ "{N:/FR'%s that happened more than once to same chunk}\n");
+ p(sctps_sendheartbeat, "\t\t{:sent-hb-chunks/%ju} "
+ "{N:/output HB chunk%s}\n");
+ p(sctps_sendecne, "\t\t{:sent-ecne-chunks/%ju} "
+ "{N:/output ECNE chunk%s}\n");
+ p(sctps_sendauth, "\t\t{:sent-auth-chunks/%ju} "
+ "{N:/output AUTH chunk%s}\n");
+ p1a(sctps_senderrors, "\t\t{:send-errors/%ju} "
+ "{N:/ip_output error counter}\n");
/*
* PCKDROPREP statistics
*/
- printf("\tPacket drop statistics:\n");
- p1a(sctps_pdrpfmbox, "\t\t%ju from middle box\n");
- p1a(sctps_pdrpfehos, "\t\t%ju from end host\n");
- p1a(sctps_pdrpmbda, "\t\t%ju with data\n");
- p1a(sctps_pdrpmbct, "\t\t%ju non-data, non-endhost\n");
- p1a(sctps_pdrpbwrpt, "\t\t%ju non-endhost, bandwidth rep only\n");
- p1a(sctps_pdrpcrupt, "\t\t%ju not enough for chunk header\n");
- p1a(sctps_pdrpnedat, "\t\t%ju not enough data to confirm\n");
- p1a(sctps_pdrppdbrk, "\t\t%ju where process_chunk_drop said break\n");
- p1a(sctps_pdrptsnnf, "\t\t%ju failed to find TSN\n");
- p1a(sctps_pdrpdnfnd, "\t\t%ju attempt reverse TSN lookup\n");
- p1a(sctps_pdrpdiwnp, "\t\t%ju e-host confirms zero-rwnd\n");
- p1a(sctps_pdrpdizrw, "\t\t%ju midbox confirms no space\n");
- p1a(sctps_pdrpbadd, "\t\t%ju data did not match TSN\n");
- p(sctps_pdrpmark, "\t\t%ju TSN'%s marked for Fast Retran\n");
+ xo_emit("\t{T:Packet drop statistics}:\n");
+ xo_open_container("drop-statistics");
+ p1a(sctps_pdrpfmbox, "\t\t{:middle-box/%ju} "
+ "{N:/from middle box}\n");
+ p1a(sctps_pdrpfehos, "\t\t{:end-host/%ju} "
+ "{N:/from end host}\n");
+ p1a(sctps_pdrpmbda, "\t\t{:with-data/%ju} "
+ "{N:/with data}\n");
+ p1a(sctps_pdrpmbct, "\t\t{:non-data/%ju} "
+ "{N:/non-data, non-endhost}\n");
+ p1a(sctps_pdrpbwrpt, "\t\t{:non-endhost/%ju} "
+ "{N:/non-endhost, bandwidth rep only}\n");
+ p1a(sctps_pdrpcrupt, "\t\t{:short-header/%ju} "
+ "{N:/not enough for chunk header}\n");
+ p1a(sctps_pdrpnedat, "\t\t{:short-data/%ju} "
+ "{N:/not enough data to confirm}\n");
+ p1a(sctps_pdrppdbrk, "\t\t{:chunk-break/%ju} "
+ "{N:/where process_chunk_drop said break}\n");
+ p1a(sctps_pdrptsnnf, "\t\t{:tsn-not-found/%ju} "
+ "{N:/failed to find TSN}\n");
+ p1a(sctps_pdrpdnfnd, "\t\t{:reverse-tsn/%ju} "
+ "{N:/attempt reverse TSN lookup}\n");
+ p1a(sctps_pdrpdiwnp, "\t\t{:confirmed-zero-window/%ju} "
+ "{N:/e-host confirms zero-rwnd}\n");
+ p1a(sctps_pdrpdizrw, "\t\t{:middle-box-no-space/%ju} "
+ "{N:/midbox confirms no space}\n");
+ p1a(sctps_pdrpbadd, "\t\t{:bad-data/%ju} "
+ "{N:/data did not match TSN}\n");
+ p(sctps_pdrpmark, "\t\t{:tsn-marked-fast-retransmission/%ju} "
+ "{N:/TSN'%s marked for Fast Retran}\n");
+ xo_close_container("drop-statistics");
/*
* Timeouts
*/
- printf("\tTimeouts:\n");
- p(sctps_timoiterator, "\t\t%ju iterator timer%s fired\n");
- p(sctps_timodata, "\t\t%ju T3 data time out%s\n");
- p(sctps_timowindowprobe, "\t\t%ju window probe (T3) timer%s fired\n");
- p(sctps_timoinit, "\t\t%ju INIT timer%s fired\n");
- p(sctps_timosack, "\t\t%ju sack timer%s fired\n");
- p(sctps_timoshutdown, "\t\t%ju shutdown timer%s fired\n");
- p(sctps_timoheartbeat, "\t\t%ju heartbeat timer%s fired\n");
- p1a(sctps_timocookie, "\t\t%ju a cookie timeout fired\n");
- p1a(sctps_timosecret, "\t\t%ju an endpoint changed its cookie"
+ xo_emit("\t{T:Timeouts}:\n");
+ xo_open_container("timeouts");
+ p(sctps_timoiterator, "\t\t{:iterator/%ju} "
+ "{N:/iterator timer%s fired}\n");
+ p(sctps_timodata, "\t\t{:t3-data/%ju} "
+ "{N:/T3 data time out%s}\n");
+ p(sctps_timowindowprobe, "\t\t{:window-probe/%ju} "
+ "{N:/window probe (T3) timer%s fired}\n");
+ p(sctps_timoinit, "\t\t{:init-timer/%ju} "
+ "{N:/INIT timer%s fired}\n");
+ p(sctps_timosack, "\t\t{:sack-timer/%ju} "
+ "{N:/sack timer%s fired}\n");
+ p(sctps_timoshutdown, "\t\t{:shutdown-timer/%ju} "
+ "{N:/shutdown timer%s fired}\n");
+ p(sctps_timoheartbeat, "\t\t{:heartbeat-timer/%ju} "
+ "{N:/heartbeat timer%s fired}\n");
+ p1a(sctps_timocookie, "\t\t{:cookie-timer/%ju} "
+ "{N:/a cookie timeout fired}\n");
+ p1a(sctps_timosecret, "\t\t{:endpoint-changed-cookie/%ju} "
+ "{N:/an endpoint changed its cook}ie"
"secret\n");
- p(sctps_timopathmtu, "\t\t%ju PMTU timer%s fired\n");
- p(sctps_timoshutdownack, "\t\t%ju shutdown ack timer%s fired\n");
- p(sctps_timoshutdownguard, "\t\t%ju shutdown guard timer%s fired\n");
- p(sctps_timostrmrst, "\t\t%ju stream reset timer%s fired\n");
- p(sctps_timoearlyfr, "\t\t%ju early FR timer%s fired\n");
- p1a(sctps_timoasconf, "\t\t%ju an asconf timer fired\n");
- p1a(sctps_timoautoclose, "\t\t%ju auto close timer fired\n");
- p(sctps_timoassockill, "\t\t%ju asoc free timer%s expired\n");
- p(sctps_timoinpkill, "\t\t%ju inp free timer%s expired\n");
+ p(sctps_timopathmtu, "\t\t{:pmtu-timer/%ju} "
+ "{N:/PMTU timer%s fired}\n");
+ p(sctps_timoshutdownack, "\t\t{:shutdown-timer/%ju} "
+ "{N:/shutdown ack timer%s fired}\n");
+ p(sctps_timoshutdownguard, "\t\t{:shutdown-guard-timer/%ju} "
+ "{N:/shutdown guard timer%s fired}\n");
+ p(sctps_timostrmrst, "\t\t{:stream-reset-timer/%ju} "
+ "{N:/stream reset timer%s fired}\n");
+ p(sctps_timoearlyfr, "\t\t{:early-fast-retransmission-timer/%ju} "
+ "{N:/early FR timer%s fired}\n");
+ p1a(sctps_timoasconf, "\t\t{:asconf-timer/%ju} "
+ "{N:/an asconf timer fired}\n");
+ p1a(sctps_timoautoclose, "\t\t{:auto-close-timer/%ju} "
+ "{N:/auto close timer fired}\n");
+ p(sctps_timoassockill, "\t\t{:asoc-free-timer/%ju} "
+ "{N:/asoc free timer%s expired}\n");
+ p(sctps_timoinpkill, "\t\t{:input-free-timer/%ju} "
+ "{N:/inp free timer%s expired}\n");
+ xo_close_container("timeouts");
#if 0
/*
@@ -729,60 +770,86 @@ sctp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
/*
* Others
*/
- p1a(sctps_hdrops, "\t%ju packet shorter than header\n");
- p1a(sctps_badsum, "\t%ju checksum error\n");
- p1a(sctps_noport, "\t%ju no endpoint for port\n");
- p1a(sctps_badvtag, "\t%ju bad v-tag\n");
- p1a(sctps_badsid, "\t%ju bad SID\n");
- p1a(sctps_nomem, "\t%ju no memory\n");
- p1a(sctps_fastretransinrtt, "\t%ju number of multiple FR in a RTT "
- "window\n");
+ p1a(sctps_hdrops, "\t{:dropped-too-short/%ju} "
+ "{N:/packet shorter than header}\n");
+ p1a(sctps_badsum, "\t{:dropped-bad-checksum/%ju} "
+ "{N:/checksum error}\n");
+ p1a(sctps_noport, "\t{:dropped-no-endpoint/%ju} "
+ "{N:/no endpoint for port}\n");
+ p1a(sctps_badvtag, "\t{:dropped-bad-v-tag/%ju} "
+ "{N:/bad v-tag}\n");
+ p1a(sctps_badsid, "\t{:dropped-bad-sid/%ju} "
+ "{N:/bad SID}\n");
+ p1a(sctps_nomem, "\t{:dropped-no-memory/%ju} "
+ "{N:/no memory}\n");
+ p1a(sctps_fastretransinrtt, "\t{:multiple-fast-retransmits-in-rtt/%ju} "
+ "{N:/number of multiple FR in a RT}T window\n");
#if 0
p(sctps_markedretrans, "\t%ju TODO:sctps_markedretrans\n");
#endif
- p1a(sctps_naglesent, "\t%ju RFC813 allowed sending\n");
- p1a(sctps_naglequeued, "\t%ju RFC813 does not allow sending\n");
- p1a(sctps_maxburstqueued, "\t%ju times max burst prohibited sending\n");
- p1a(sctps_ifnomemqueued, "\t%ju look ahead tells us no memory in "
- "interface\n");
- p(sctps_windowprobed, "\t%ju number%s of window probes sent\n");
- p(sctps_lowlevelerr, "\t%ju time%s an output error to clamp "
- "down on next user send\n");
- p(sctps_lowlevelerrusr, "\t%ju time%s sctp_senderrors were "
- "caused from a user\n");
- p(sctps_datadropchklmt, "\t%ju number of in data drop%s due to "
- "chunk limit reached\n");
- p(sctps_datadroprwnd, "\t%ju number of in data drop%s due to rwnd "
- "limit reached\n");
- p(sctps_ecnereducedcwnd, "\t%ju time%s a ECN reduced "
- "the cwnd\n");
- p1a(sctps_vtagexpress, "\t%ju used express lookup via vtag\n");
- p1a(sctps_vtagbogus, "\t%ju collision in express lookup\n");
- p(sctps_primary_randry, "\t%ju time%s the sender ran dry "
- "of user data on primary\n");
- p1a(sctps_cmt_randry, "\t%ju same for above\n");
- p(sctps_slowpath_sack, "\t%ju sack%s the slow way\n");
- p(sctps_wu_sacks_sent, "\t%ju window update only sack%s sent\n");
- p(sctps_sends_with_flags, "\t%ju send%s with sinfo_flags !=0\n");
- p(sctps_sends_with_unord, "\t%ju unordered send%s\n");
- p(sctps_sends_with_eof, "\t%ju send%s with EOF flag set\n");
- p(sctps_sends_with_abort, "\t%ju send%s with ABORT flag set\n");
- p(sctps_protocol_drain_calls, "\t%ju time%s protocol drain called\n");
- p(sctps_protocol_drains_done, "\t%ju time%s we did a protocol "
- "drain\n");
- p(sctps_read_peeks, "\t%ju time%s recv was called with peek\n");
- p(sctps_cached_chk, "\t%ju cached chunk%s used\n");
- p1a(sctps_cached_strmoq, "\t%ju cached stream oq's used\n");
- p(sctps_left_abandon, "\t%ju unread message%s abandonded by close\n");
- p1a(sctps_send_burst_avoid, "\t%ju send burst avoidance, already "
- "max burst inflight to net\n");
- p1a(sctps_send_cwnd_avoid, "\t%ju send cwnd full avoidance, already "
- "max burst inflight to net\n");
- p(sctps_fwdtsn_map_over, "\t%ju number of map array over-run%s via "
- "fwd-tsn's\n");
+ p1a(sctps_naglesent, "\t{:rfc813-sent/%ju} "
+ "{N:/RFC813 allowed sending}\n");
+ p1a(sctps_naglequeued, "\t{:rfc813-queued/%ju} "
+ "{N:/RFC813 does not allow sending}\n");
+ p1a(sctps_maxburstqueued, "\t{:max-burst-queued/%ju} "
+ "{N:/times max burst prohibited sending}\n");
+ p1a(sctps_ifnomemqueued, "\t{:no-memory-in-interface/%ju} "
+ "{N:/look ahead tells us no memory in interface}\n");
+ p(sctps_windowprobed, "\t{:sent-window-probes/%ju} "
+ "{N:/number%s of window probes sent}\n");
+ p(sctps_lowlevelerr, "\t{:low-level-err/%ju} "
+ "{N:/time%s an output error to clamp down on next user send}\n");
+ p(sctps_lowlevelerrusr, "\t{:low-level-user-error/%ju} "
+ "{N:/time%s sctp_senderrors were caused from a user}\n");
+ p(sctps_datadropchklmt, "\t{:dropped-chunk-limit/%ju} "
+ "{N:/number of in data drop%s due to chunk limit reached}\n");
+ p(sctps_datadroprwnd, "\t{:dropped-rwnd-limit/%ju} "
+ "{N:/number of in data drop%s due to rwnd limit reached}\n");
+ p(sctps_ecnereducedcwnd, "\t{:ecn-reduced-cwnd/%ju} "
+ "{N:/time%s a ECN reduced the cwnd}\n");
+ p1a(sctps_vtagexpress, "\t{:v-tag-express-lookup/%ju} "
+ "{N:/used express lookup via vtag}\n");
+ p1a(sctps_vtagbogus, "\t{:v-tag-collision/%ju} "
+ "{N:/collision in express lookup}\n");
+ p(sctps_primary_randry, "\t{:sender-ran-dry/%ju} "
+ "{N:/time%s the sender ran dry of user data on primary}\n");
+ p1a(sctps_cmt_randry, "\t{:cmt-ran-dry/%ju} "
+ "{N:/same for above}\n");
+ p(sctps_slowpath_sack, "\t{:slow-path-sack/%ju} "
+ "{N:/sack%s the slow way}\n");
+ p(sctps_wu_sacks_sent, "\t{:sent-window-update-only-sack/%ju} "
+ "{N:/window update only sack%s sent}\n");
+ p(sctps_sends_with_flags, "\t{:sent-with-sinfo/%ju} "
+ "{N:/send%s with sinfo_flags !=0}\n");
+ p(sctps_sends_with_unord, "\t{:sent-with-unordered/%ju} "
+ "{N:/unordered send%s}\n");
+ p(sctps_sends_with_eof, "\t{:sent-with-eof/%ju} "
+ "{N:/send%s with EOF flag set}\n");
+ p(sctps_sends_with_abort, "\t{:sent-with-abort/%ju} "
+ "{N:/send%s with ABORT flag set}\n");
+ p(sctps_protocol_drain_calls, "\t{:protocol-drain-called/%ju} "
+ "{N:/time%s protocol drain called}\n");
+ p(sctps_protocol_drains_done, "\t{:protocol-drain/%ju} "
+ "{N:/time%s we did a protocol drain}\n");
+ p(sctps_read_peeks, "\t{:read-with-peek/%ju} "
+ "{N:/time%s recv was called with peek}\n");
+ p(sctps_cached_chk, "\t{:cached-chunks/%ju} "
+ "{N:/cached chunk%s used}\n");
+ p1a(sctps_cached_strmoq, "\t{:cached-output-queue-used/%ju} "
+ "{N:/cached stream oq's used}\n");
+ p(sctps_left_abandon, "\t{:messages-abandoned/%ju} "
+ "{N:/unread message%s abandonded by close}\n");
+ p1a(sctps_send_burst_avoid, "\t{:send-burst-avoidance/%ju} "
+ "{N:/send burst avoidance, already max burst inflight to net}\n");
+ p1a(sctps_send_cwnd_avoid, "\t{:send-cwnd-avoidance/%ju} "
+ "{N:/send cwnd full avoidance, already max burst inflight "
+ "to net}\n");
+ p(sctps_fwdtsn_map_over, "\t{:tsn-map-overruns/%ju} "
+ "{N:/number of map array over-run%s via fwd-tsn's}\n");
#undef p
#undef p1a
+ xo_close_container(name);
}
#endif /* SCTP */
diff --git a/freebsd/usr.bin/netstat/unix.c b/freebsd/usr.bin/netstat/unix.c
index afb35113..63f2b0ee 100644
--- a/freebsd/usr.bin/netstat/unix.c
+++ b/freebsd/usr.bin/netstat/unix.c
@@ -1,5 +1,9 @@
#include <machine/rtems-bsd-user-space.h>
+#ifdef __rtems__
+#include "rtems-bsd-netstat-namespace.h"
+#endif /* __rtems__ */
+
/*-
* Copyright (c) 1983, 1988, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,6 +39,9 @@ static char sccsid[] = "@(#)unix.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
#endif
+#ifdef __rtems__
+#include <machine/rtems-bsd-program.h>
+#endif /* __rtems__ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -59,9 +66,14 @@ __FBSDID("$FreeBSD$");
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <strings.h>
#include <kvm.h>
+#include <libxo/xo.h>
#include "netstat.h"
+#ifdef __rtems__
+#include "rtems-bsd-netstat-unix-data.h"
+#endif /* __rtems__ */
static void unixdomainpr(struct xunpcb *, struct xsocket *);
@@ -80,15 +92,15 @@ pcblist_sysctl(int type, char **bufp)
len = 0;
if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) {
if (errno != ENOENT)
- warn("sysctl: %s", mibvar);
+ xo_warn("sysctl: %s", mibvar);
return (-1);
}
- if ((buf = malloc(len)) == 0) {
- warnx("malloc %lu bytes", (u_long)len);
+ if ((buf = malloc(len)) == NULL) {
+ xo_warnx("malloc %lu bytes", (u_long)len);
return (-2);
}
if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) {
- warn("sysctl: %s", mibvar);
+ xo_warn("sysctl: %s", mibvar);
free(buf);
return (-2);
}
@@ -117,15 +129,15 @@ pcblist_kvm(u_long count_off, u_long gencnt_off, u_long head_off, char **bufp)
return (-1);
kread(count_off, &unp_count, sizeof(unp_count));
len = 2 * sizeof(xug) + (unp_count + unp_count / 8) * sizeof(xu);
- if ((buf = malloc(len)) == 0) {
- warnx("malloc %lu bytes", (u_long)len);
+ if ((buf = malloc(len)) == NULL) {
+ xo_warnx("malloc %lu bytes", (u_long)len);
return (-2);
}
p = buf;
#define COPYOUT(obj, size) do { \
if (len < (size)) { \
- warnx("buffer size exceeded"); \
+ xo_warnx("buffer size exceeded"); \
goto fail; \
} \
bcopy((obj), p, (size)); \
@@ -195,7 +207,7 @@ fail:
#ifndef __rtems__
void
unixpr(u_long count_off, u_long gencnt_off, u_long dhead_off, u_long shead_off,
- u_long sphead_off)
+ u_long sphead_off, bool *first)
{
char *buf;
int ret, type;
@@ -204,6 +216,7 @@ unixpr(u_long count_off, u_long gencnt_off, u_long dhead_off, u_long shead_off,
struct xunpcb *xunp;
u_long head_off;
+ buf = NULL;
for (type = SOCK_STREAM; type <= SOCK_SEQPACKET; type++) {
if (live)
ret = pcblist_sysctl(type, &buf);
@@ -232,26 +245,35 @@ unixpr(u_long count_off, u_long gencnt_off, u_long dhead_off, u_long shead_off,
oxug = xug = (struct xunpgen *)buf;
for (xug = (struct xunpgen *)((char *)xug + xug->xug_len);
- xug->xug_len > sizeof(struct xunpgen);
- xug = (struct xunpgen *)((char *)xug + xug->xug_len)) {
+ xug->xug_len > sizeof(struct xunpgen);
+ xug = (struct xunpgen *)((char *)xug + xug->xug_len)) {
xunp = (struct xunpcb *)xug;
so = &xunp->xu_socket;
/* Ignore PCBs which were freed during copyout. */
if (xunp->xu_unp.unp_gencnt > oxug->xug_gen)
continue;
+ if (*first) {
+ xo_open_list("socket");
+ *first = false;
+ }
+ xo_open_instance("socket");
unixdomainpr(xunp, so);
+ xo_close_instance("socket");
}
if (xug != oxug && xug->xug_gen != oxug->xug_gen) {
if (oxug->xug_count > xug->xug_count) {
- printf("Some %s sockets may have been deleted.\n",
- socktype[type]);
+ xo_emit("Some {:type/%s} sockets may have "
+ "been {:action/deleted}.\n",
+ socktype[type]);
} else if (oxug->xug_count < xug->xug_count) {
- printf("Some %s sockets may have been created.\n",
- socktype[type]);
+ xo_emit("Some {:type/%s} sockets may have "
+ "been {:action/created}.\n",
+ socktype[type]);
} else {
- printf("Some %s sockets may have been created or deleted",
- socktype[type]);
+ xo_emit("Some {:type/%s} sockets may have "
+ "been {:action/created or deleted}",
+ socktype[type]);
}
}
free(buf);
@@ -266,7 +288,26 @@ unixdomainpr(struct xunpcb *xunp, struct xsocket *so)
struct unpcb *unp;
struct sockaddr_un *sa;
static int first = 1;
- char buf1[15];
+ char buf1[33];
+ static const char *titles[2] = {
+ "{T:/%-8.8s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%8.8s} "
+ "{T:/%8.8s} {T:/%8.8s} {T:/%8.8s} {T:Addr}\n",
+ "{T:/%-16.16s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%16.16s} "
+ "{T:/%16.16s} {T:/%16.16s} {T:/%16.16s} {T:Addr}\n"
+ };
+ static const char *format[2] = {
+ "{q:address/%8lx} {t:type/%-6.6s} "
+ "{:receive-bytes-waiting/%6u} "
+ "{:send-bytes-waiting/%6u} "
+ "{q:vnode/%8lx} {q:connection/%8lx} "
+ "{q:first-reference/%8lx} {q:next-reference/%8lx}",
+ "{q:address/%16lx} {t:type/%-6.6s} "
+ "{:receive-bytes-waiting/%6u} "
+ "{:send-bytes-waiting/%6u} "
+ "{q:vnode/%16lx} {q:connection/%16lx} "
+ "{q:first-reference/%16lx} {q:next-reference/%16lx}"
+ };
+ int fmt = (sizeof(void *) == 8) ? 1 : 0;
unp = &xunp->xu_unp;
if (unp->unp_addr)
@@ -275,9 +316,8 @@ unixdomainpr(struct xunpcb *xunp, struct xsocket *so)
sa = (struct sockaddr_un *)0;
if (first && !Lflag) {
- printf("Active UNIX domain sockets\n");
- printf(
-"%-8.8s %-6.6s %-6.6s %-6.6s %8.8s %8.8s %8.8s %8.8s Addr\n",
+ xo_emit("{T:Active UNIX domain sockets}\n");
+ xo_emit(titles[fmt],
"Address", "Type", "Recv-Q", "Send-Q",
"Inode", "Conn", "Refs", "Nextref");
first = 0;
@@ -287,20 +327,23 @@ unixdomainpr(struct xunpcb *xunp, struct xsocket *so)
return;
if (Lflag) {
- snprintf(buf1, 15, "%d/%d/%d", so->so_qlen,
+ snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen,
so->so_incqlen, so->so_qlimit);
- printf("unix %-14.14s", buf1);
+ xo_emit("unix {d:socket/%-32.32s}{e:queue-length/%u}"
+ "{e:incomplete-queue-length/%u}{e:queue-limit/%u}",
+ buf1, so->so_qlen, so->so_incqlen, so->so_qlimit);
} else {
- printf("%8lx %-6.6s %6u %6u %8lx %8lx %8lx %8lx",
+ xo_emit(format[fmt],
(long)so->so_pcb, socktype[so->so_type], so->so_rcv.sb_cc,
- so->so_snd.sb_cc, (long)unp->unp_vnode, (long)unp->unp_conn,
+ so->so_snd.sb_cc, (long)unp->unp_vnode,
+ (long)unp->unp_conn,
(long)LIST_FIRST(&unp->unp_refs),
(long)LIST_NEXT(unp, unp_reflink));
}
if (sa)
- printf(" %.*s",
+ xo_emit(" {:path/%.*s}",
(int)(sa->sun_len - offsetof(struct sockaddr_un, sun_path)),
sa->sun_path);
- putchar('\n');
+ xo_emit("\n");
}
#endif /* __rtems__ */
diff --git a/libbsd.py b/libbsd.py
index d7a0a5f7..08f3a988 100755
--- a/libbsd.py
+++ b/libbsd.py
@@ -56,6 +56,8 @@ def rtems(mm):
'local/usb_if.c',
'local/mmcbus_if.c',
'local/mmcbr_if.c',
+ 'local/if_dwc_if.c',
+ 'local/gpio_if.c',
'rtems/ipsec_get_policylen.c',
'rtems/rtems-bsd-arp-processor.c',
'rtems/rtems-bsd-allocator-domain-size.c',
@@ -210,6 +212,8 @@ def base(mm):
'sys/security/mac/mac_framework.h',
'sys/sys/acl.h',
'sys/sys/aio.h',
+ 'sys/sys/_bitset.h',
+ 'sys/sys/bitset.h',
'sys/sys/bitstring.h',
'sys/sys/bufobj.h',
'sys/sys/buf_ring.h',
@@ -219,15 +223,21 @@ def base(mm):
'sys/sys/_callout.h',
'sys/sys/callout.h',
'sys/sys/capability.h',
+ 'sys/sys/caprights.h',
+ 'sys/sys/capsicum.h',
'sys/sys/condvar.h',
'sys/sys/conf.h',
+ 'sys/sys/counter.h',
'sys/sys/cpu.h',
+ 'sys/sys/_cpuset.h',
'sys/sys/ctype.h',
'sys/sys/domain.h',
'sys/sys/eventhandler.h',
+ 'sys/sys/fail.h',
'sys/sys/filedesc.h',
'sys/sys/file.h',
'sys/sys/fnv_hash.h',
+ 'sys/sys/gpio.h',
'sys/sys/hash.h',
'sys/sys/hhook.h',
'sys/sys/interrupt.h',
@@ -237,6 +247,7 @@ def base(mm):
'sys/sys/kobj.h',
'sys/sys/kthread.h',
'sys/sys/ktr.h',
+ 'sys/sys/ktr_class.h',
'sys/sys/libkern.h',
'sys/sys/limits.h',
'sys/sys/linker.h',
@@ -258,6 +269,7 @@ def base(mm):
'sys/sys/_null.h',
'sys/sys/osd.h',
'sys/sys/pcpu.h',
+ 'sys/sys/_pctrie.h',
'sys/sys/pipe.h',
'sys/sys/priv.h',
'sys/sys/proc.h',
@@ -278,8 +290,10 @@ def base(mm):
'sys/sys/sdt.h',
'sys/sys/selinfo.h',
'sys/sys/_semaphore.h',
+ 'sys/sys/seq.h',
'sys/sys/sf_buf.h',
'sys/sys/sigio.h',
+ 'sys/sys/signalvar.h',
'sys/sys/smp.h',
'sys/sys/sleepqueue.h',
'sys/sys/_sockaddr_storage.h',
@@ -305,6 +319,8 @@ def base(mm):
'sys/sys/ucred.h',
'sys/sys/un.h',
'sys/sys/unpcb.h',
+ 'sys/sys/_unrhdr.h',
+ 'sys/sys/uuid.h',
'sys/sys/vmmeter.h',
'sys/sys/vnode.h',
'sys/vm/uma_dbg.h',
@@ -333,14 +349,17 @@ def base(mm):
'sys/kern/kern_sysctl.c',
'sys/kern/kern_time.c',
'sys/kern/kern_timeout.c',
+ 'sys/kern/kern_uuid.c',
'sys/kern/subr_bufring.c',
'sys/kern/subr_bus.c',
+ 'sys/kern/subr_counter.c',
'sys/kern/subr_eventhandler.c',
'sys/kern/subr_hash.c',
'sys/kern/subr_hints.c',
'sys/kern/subr_kobj.c',
'sys/kern/subr_lock.c',
'sys/kern/subr_module.c',
+ 'sys/kern/subr_pcpu.c',
'sys/kern/subr_prf.c',
'sys/kern/subr_rman.c',
'sys/kern/subr_sbuf.c',
@@ -354,13 +373,14 @@ def base(mm):
'sys/kern/uipc_domain.c',
'sys/kern/uipc_mbuf2.c',
'sys/kern/uipc_mbuf.c',
+ 'sys/kern/uipc_mbufhash.c',
'sys/kern/uipc_sockbuf.c',
'sys/kern/uipc_socket.c',
'sys/kern/uipc_usrreq.c',
'sys/libkern/bcd.c',
- 'sys/libkern/arc4random.c',
- 'sys/libkern/fls.c',
'sys/libkern/inet_ntoa.c',
+ 'sys/libkern/jenkins_hash.c',
+ 'sys/libkern/murmur3_32.c',
'sys/libkern/random.c',
'sys/vm/uma_core.c',
'sys/vm/uma_dbg.c',
@@ -795,11 +815,13 @@ def cam(mm):
mod = builder.Module('cam')
mod.addKernelSpaceHeaderFiles(
[
+ 'sys/dev/nvme/nvme.h',
'sys/sys/ata.h',
'sys/cam/cam.h',
'sys/cam/cam_ccb.h',
'sys/cam/cam_sim.h',
'sys/cam/cam_xpt_sim.h',
+ 'sys/cam/nvme/nvme_all.h',
'sys/cam/scsi/scsi_all.h',
'sys/cam/scsi/scsi_da.h',
'sys/cam/ata/ata_all.h',
@@ -838,6 +860,7 @@ def dev_net(mm):
'sys/net/if_dl.h',
'sys/net/if.h',
'sys/net/if_media.h',
+ 'sys/net/ifq.h',
'sys/net/if_types.h',
'sys/net/if_var.h',
'sys/net/vnet.h',
@@ -846,6 +869,7 @@ def dev_net(mm):
'sys/dev/tsec/if_tsecreg.h',
'sys/dev/cadence/if_cgem_hw.h',
'sys/dev/dwc/if_dwc.h',
+ 'sys/dev/dwc/if_dwcvar.h',
'sys/arm/xilinx/zy7_slcr.h',
]
)
@@ -878,7 +902,6 @@ def dev_nic(mm):
mod.addKernelSpaceHeaderFiles(
[
'sys/sys/pciio.h',
- 'sys/dev/random/randomdev_soft.h',
'sys/sys/eventvar.h',
'sys/sys/kenv.h',
'sys/isa/isavar.h',
@@ -912,7 +935,6 @@ def dev_nic(mm):
)
mod.addKernelSpaceSourceFiles(
[
- 'sys/dev/random/harvest.c',
'sys/netinet/tcp_hostcache.c',
'sys/dev/led/led.c',
],
@@ -927,7 +949,7 @@ def dev_nic_re(mm):
mod = builder.Module('dev_nic_re')
mod.addKernelSpaceHeaderFiles(
[
- 'sys/pci/if_rlreg.h',
+ 'sys/dev/rl/if_rlreg.h',
]
)
mod.addKernelSpaceSourceFiles(
@@ -1132,7 +1154,6 @@ def net(mm):
'sys/net/if_media.h',
'sys/net/if_mib.h',
'sys/net/if_sppp.h',
- 'sys/net/if_stf.h',
'sys/net/if_tap.h',
'sys/net/if_tapvar.h',
'sys/net/if_tun.h',
@@ -1149,8 +1170,13 @@ def net(mm):
'sys/net/radix_mpath.h',
'sys/net/raw_cb.h',
'sys/net/route.h',
+ 'sys/net/route_var.h',
+ 'sys/net/rss_config.h',
+ 'sys/net/sff8436.h',
+ 'sys/net/sff8472.h',
'sys/net/slcompress.h',
'sys/net/vnet.h',
+ 'sys/netgraph/ng_socket.h',
]
)
mod.addKernelSpaceSourceFiles(
@@ -1165,10 +1191,8 @@ def net(mm):
'sys/net/if_dead.c',
'sys/net/if_disc.c',
'sys/net/if_edsc.c',
- 'sys/net/if_ef.c',
'sys/net/if_enc.c',
'sys/net/if_epair.c',
- 'sys/net/if_faith.c',
'sys/net/if_fddisubr.c',
'sys/net/if_fwsubr.c',
'sys/net/if_gif.c',
@@ -1212,17 +1236,19 @@ def netinet(mm):
mod = builder.Module('netinet')
mod.addKernelSpaceHeaderFiles(
[
- 'sys/netinet/cc.h',
+ 'sys/netinet/cc/cc.h',
'sys/netinet/cc/cc_module.h',
+ 'sys/netinet/in_fib.h',
'sys/netinet/icmp6.h',
'sys/netinet/icmp_var.h',
'sys/netinet/if_atm.h',
'sys/netinet/if_ether.h',
'sys/netinet/igmp.h',
'sys/netinet/igmp_var.h',
- 'sys/netinet/in_gif.h',
'sys/netinet/in.h',
+ 'sys/netinet/in_kdtrace.h',
'sys/netinet/in_pcb.h',
+ 'sys/netinet/in_rss.h',
'sys/netinet/in_systm.h',
'sys/netinet/in_var.h',
'sys/netinet/ip6.h',
@@ -1232,17 +1258,12 @@ def netinet(mm):
'sys/netinet/ip_ecn.h',
'sys/netinet/ip_encap.h',
'sys/netinet/ip_fw.h',
- 'sys/netinet/ip_gre.h',
'sys/netinet/ip.h',
'sys/netinet/ip_icmp.h',
'sys/netinet/ip_ipsec.h',
'sys/netinet/ip_mroute.h',
'sys/netinet/ip_options.h',
'sys/netinet/ip_var.h',
- 'sys/netpfil/ipfw/dn_heap.h',
- 'sys/netpfil/ipfw/dn_sched.h',
- 'sys/netpfil/ipfw/ip_dn_private.h',
- 'sys/netpfil/ipfw/ip_fw_private.h',
'sys/netinet/pim.h',
'sys/netinet/pim_var.h',
'sys/netinet/sctp_asconf.h',
@@ -1281,6 +1302,7 @@ def netinet(mm):
'sys/netinet/tcp_var.h',
'sys/netinet/toecore.h',
'sys/netinet/udp.h',
+ 'sys/netinet/udplite.h',
'sys/netinet/udp_var.h',
'sys/netinet/libalias/alias_local.h',
'sys/netinet/libalias/alias.h',
@@ -1300,6 +1322,7 @@ def netinet(mm):
'sys/netinet/if_ether.c',
'sys/netinet/igmp.c',
'sys/netinet/in.c',
+ 'sys/netinet/in_fib.c',
'sys/netinet/in_gif.c',
'sys/netinet/in_mcast.c',
'sys/netinet/in_pcb.c',
@@ -1317,6 +1340,7 @@ def netinet(mm):
'sys/netinet/ip_mroute.c',
'sys/netinet/ip_options.c',
'sys/netinet/ip_output.c',
+ 'sys/netinet/ip_reass.c',
'sys/netinet/raw_ip.c',
'sys/netinet/sctp_asconf.c',
'sys/netinet/sctp_auth.c',
@@ -1345,22 +1369,6 @@ def netinet(mm):
'sys/netinet/tcp_timer.c',
'sys/netinet/tcp_timewait.c',
'sys/netinet/tcp_usrreq.c',
- 'sys/netpfil/ipfw/dn_heap.c',
- 'sys/netpfil/ipfw/dn_sched_fifo.c',
- 'sys/netpfil/ipfw/dn_sched_prio.c',
- 'sys/netpfil/ipfw/dn_sched_qfq.c',
- 'sys/netpfil/ipfw/dn_sched_rr.c',
- 'sys/netpfil/ipfw/dn_sched_wf2q.c',
- 'sys/netpfil/ipfw/ip_dn_glue.c',
- 'sys/netpfil/ipfw/ip_dn_io.c',
- 'sys/netpfil/ipfw/ip_dummynet.c',
- 'sys/netpfil/ipfw/ip_fw2.c',
- #'sys/netpfil/ipfw/ip_fw_dynamic.c',
- 'sys/netpfil/ipfw/ip_fw_log.c',
- 'sys/netpfil/ipfw/ip_fw_nat.c',
- 'sys/netpfil/ipfw/ip_fw_pfil.c',
- 'sys/netpfil/ipfw/ip_fw_sockopt.c',
- 'sys/netpfil/ipfw/ip_fw_table.c',
'sys/netinet/udp_usrreq.c',
'sys/netinet/libalias/alias_dummy.c',
'sys/netinet/libalias/alias_pptp.c',
@@ -1389,10 +1397,11 @@ def netinet6(mm):
mod.addKernelSpaceHeaderFiles(
[
'sys/netinet6/icmp6.h',
- 'sys/netinet6/in6_gif.h',
+ 'sys/netinet6/in6_fib.h',
'sys/netinet6/in6.h',
'sys/netinet6/in6_ifattach.h',
'sys/netinet6/in6_pcb.h',
+ 'sys/netinet6/in6_rss.h',
'sys/netinet6/in6_var.h',
'sys/netinet6/ip6_ecn.h',
'sys/netinet6/ip6.h',
@@ -1421,6 +1430,7 @@ def netinet6(mm):
'sys/netinet6/icmp6.c',
'sys/netinet6/in6.c',
'sys/netinet6/in6_cksum.c',
+ 'sys/netinet6/in6_fib.c',
'sys/netinet6/in6_gif.c',
'sys/netinet6/in6_ifattach.c',
'sys/netinet6/in6_mcast.c',
@@ -1582,26 +1592,34 @@ def opencrypto(mm):
mod = builder.Module('opencrypto')
mod.addKernelSpaceHeaderFiles(
[
- 'sys/opencrypto/deflate.h',
- 'sys/opencrypto/xform.h',
+ 'sys/opencrypto/cast.h',
+ 'sys/opencrypto/castsb.h',
+ 'sys/opencrypto/cryptodev.h',
'sys/opencrypto/cryptosoft.h',
+ 'sys/opencrypto/deflate.h',
+ 'sys/opencrypto/gfmult.h',
+ 'sys/opencrypto/gmac.h',
'sys/opencrypto/rmd160.h',
- 'sys/opencrypto/cryptodev.h',
- 'sys/opencrypto/castsb.h',
'sys/opencrypto/skipjack.h',
- 'sys/opencrypto/cast.h',
+ 'sys/opencrypto/xform_auth.h',
+ 'sys/opencrypto/xform_comp.h',
+ 'sys/opencrypto/xform_enc.h',
+ 'sys/opencrypto/xform.h',
+ 'sys/opencrypto/xform_userland.h',
]
)
mod.addKernelSpaceSourceFiles(
[
- 'sys/opencrypto/crypto.c',
- 'sys/opencrypto/deflate.c',
- 'sys/opencrypto/cryptosoft.c',
+ 'sys/opencrypto/cast.c',
'sys/opencrypto/criov.c',
+ 'sys/opencrypto/crypto.c',
+ 'sys/opencrypto/cryptodeflate.c',
+ 'sys/opencrypto/cryptosoft.c',
+ 'sys/opencrypto/gfmult.c',
+ 'sys/opencrypto/gmac.c',
'sys/opencrypto/rmd160.c',
- 'sys/opencrypto/xform.c',
'sys/opencrypto/skipjack.c',
- 'sys/opencrypto/cast.c',
+ 'sys/opencrypto/xform.c',
],
mm.generator['source']()
)
@@ -1614,46 +1632,52 @@ def crypto(mm):
mod = builder.Module('crypto')
mod.addKernelSpaceHeaderFiles(
[
- #'crypto/aesni/aesni.h',
+ 'sys/crypto/skein/skein_iv.h',
+ 'sys/crypto/skein/skein_freebsd.h',
+ 'sys/crypto/skein/skein.h',
+ 'sys/crypto/skein/skein_debug.h',
+ 'sys/crypto/skein/skein_port.h',
+ 'sys/crypto/rc4/rc4.h',
+ 'sys/crypto/sha2/sha384.h',
+ 'sys/crypto/sha2/sha256.h',
+ 'sys/crypto/sha2/sha512t.h',
+ 'sys/crypto/sha2/sha512.h',
'sys/crypto/sha1.h',
- 'sys/crypto/sha2/sha2.h',
- 'sys/crypto/rijndael/rijndael.h',
- 'sys/crypto/rijndael/rijndael_local.h',
+ 'sys/crypto/siphash/siphash.h',
'sys/crypto/rijndael/rijndael-api-fst.h',
- 'sys/crypto/des/des.h',
+ 'sys/crypto/rijndael/rijndael_local.h',
+ 'sys/crypto/rijndael/rijndael.h',
+ 'sys/crypto/camellia/camellia.h',
'sys/crypto/des/spr.h',
+ 'sys/crypto/des/des_locl.h',
+ 'sys/crypto/des/des.h',
'sys/crypto/des/podd.h',
'sys/crypto/des/sk.h',
- 'sys/crypto/des/des_locl.h',
'sys/crypto/blowfish/bf_pi.h',
- 'sys/crypto/blowfish/bf_locl.h',
'sys/crypto/blowfish/blowfish.h',
- 'sys/crypto/rc4/rc4.h',
- #'crypto/via/padlock.h',
- 'sys/crypto/camellia/camellia.h',
+ 'sys/crypto/blowfish/bf_locl.h',
]
)
mod.addKernelSpaceSourceFiles(
[
- #'crypto/aesni/aesni.c',
- #'crypto/aesni/aesni_wrap.c',
+ 'sys/crypto/skein/skein_block.c',
+ 'sys/crypto/skein/skein.c',
+ 'sys/crypto/rc4/rc4.c',
+ 'sys/crypto/sha2/sha256c.c',
+ 'sys/crypto/sha2/sha512c.c',
+ 'sys/crypto/siphash/siphash.c',
'sys/crypto/sha1.c',
- 'sys/crypto/sha2/sha2.c',
- 'sys/crypto/rijndael/rijndael-alg-fst.c',
'sys/crypto/rijndael/rijndael-api.c',
+ 'sys/crypto/rijndael/rijndael-alg-fst.c',
'sys/crypto/rijndael/rijndael-api-fst.c',
- 'sys/crypto/des/des_setkey.c',
+ 'sys/crypto/camellia/camellia-api.c',
+ 'sys/crypto/camellia/camellia.c',
'sys/crypto/des/des_enc.c',
+ 'sys/crypto/des/des_setkey.c',
'sys/crypto/des/des_ecb.c',
- 'sys/crypto/blowfish/bf_enc.c',
'sys/crypto/blowfish/bf_skey.c',
+ 'sys/crypto/blowfish/bf_enc.c',
'sys/crypto/blowfish/bf_ecb.c',
- 'sys/crypto/rc4/rc4.c',
- #'crypto/via/padlock.c',
- #'crypto/via/padlock_cipher.c',
- #'crypto/via/padlock_hash.c',
- 'sys/crypto/camellia/camellia-api.c',
- 'sys/crypto/camellia/camellia.c',
],
mm.generator['source']()
)
@@ -1666,31 +1690,34 @@ def altq(mm):
mod = builder.Module('altq')
mod.addKernelSpaceHeaderFiles(
[
- 'sys/contrib/altq/altq/altq_rmclass.h',
- 'sys/contrib/altq/altq/altq_cbq.h',
- 'sys/contrib/altq/altq/altq_var.h',
- 'sys/contrib/altq/altq/altqconf.h',
- 'sys/contrib/altq/altq/altq.h',
- 'sys/contrib/altq/altq/altq_hfsc.h',
- 'sys/contrib/altq/altq/altq_red.h',
- 'sys/contrib/altq/altq/altq_classq.h',
- 'sys/contrib/altq/altq/altq_priq.h',
- 'sys/contrib/altq/altq/altq_rmclass_debug.h',
- 'sys/contrib/altq/altq/altq_cdnr.h',
- 'sys/contrib/altq/altq/altq_rio.h',
- 'sys/contrib/altq/altq/if_altq.h',
+ 'sys/net/altq/altq_cbq.h',
+ 'sys/net/altq/altq_cdnr.h',
+ 'sys/net/altq/altq_classq.h',
+ 'sys/net/altq/altq_codel.h',
+ 'sys/net/altq/altq_fairq.h',
+ 'sys/net/altq/altq.h',
+ 'sys/net/altq/altq_hfsc.h',
+ 'sys/net/altq/altq_priq.h',
+ 'sys/net/altq/altq_red.h',
+ 'sys/net/altq/altq_rio.h',
+ 'sys/net/altq/altq_rmclass_debug.h',
+ 'sys/net/altq/altq_rmclass.h',
+ 'sys/net/altq/altq_var.h',
+ 'sys/net/altq/if_altq.h',
]
)
mod.addKernelSpaceSourceFiles(
[
- 'sys/contrib/altq/altq/altq_rmclass.c',
- 'sys/contrib/altq/altq/altq_rio.c',
- 'sys/contrib/altq/altq/altq_subr.c',
- 'sys/contrib/altq/altq/altq_cdnr.c',
- 'sys/contrib/altq/altq/altq_priq.c',
- 'sys/contrib/altq/altq/altq_cbq.c',
- 'sys/contrib/altq/altq/altq_hfsc.c',
- 'sys/contrib/altq/altq/altq_red.c',
+ 'sys/net/altq/altq_cbq.c',
+ 'sys/net/altq/altq_cdnr.c',
+ 'sys/net/altq/altq_codel.c',
+ 'sys/net/altq/altq_fairq.c',
+ 'sys/net/altq/altq_hfsc.c',
+ 'sys/net/altq/altq_priq.c',
+ 'sys/net/altq/altq_red.c',
+ 'sys/net/altq/altq_rio.c',
+ 'sys/net/altq/altq_rmclass.c',
+ 'sys/net/altq/altq_subr.c',
],
mm.generator['source']()
)
@@ -1703,26 +1730,77 @@ def pf(mm):
mod = builder.Module('pf')
mod.addKernelSpaceHeaderFiles(
[
- 'sys/contrib/pf/net/if_pflog.h',
- 'sys/contrib/pf/net/if_pflow.h',
- 'sys/contrib/pf/net/if_pfsync.h',
- 'sys/contrib/pf/net/pfvar.h',
- 'sys/contrib/pf/net/pf_mtag.h',
+ 'sys/net/if_pflog.h',
+ 'sys/net/if_pfsync.h',
+ 'sys/net/pfvar.h',
+ 'sys/netpfil/pf/pf_altq.h',
+ 'sys/netpfil/pf/pf.h',
+ 'sys/netpfil/pf/pf_mtag.h',
+ ]
+ )
+ mod.addKernelSpaceSourceFiles(
+ [
+ 'sys/netpfil/pf/if_pflog.c',
+ 'sys/netpfil/pf/if_pfsync.c',
+ 'sys/netpfil/pf/in4_cksum.c',
+ 'sys/netpfil/pf/pf.c',
+ 'sys/netpfil/pf/pf_if.c',
+ 'sys/netpfil/pf/pf_ioctl.c',
+ 'sys/netpfil/pf/pf_lb.c',
+ 'sys/netpfil/pf/pf_norm.c',
+ 'sys/netpfil/pf/pf_osfp.c',
+ 'sys/netpfil/pf/pf_ruleset.c',
+ 'sys/netpfil/pf/pf_table.c',
+ ],
+ mm.generator['source']()
+ )
+ return mod
+
+def ipfw(mm):
+ mod = builder.Module('ipfw')
+ mod.addKernelSpaceHeaderFiles(
+ [
+ 'sys/netinet6/ip_fw_nat64.h',
+ 'sys/netinet6/ip_fw_nptv6.h',
+ 'sys/netpfil/ipfw/dn_aqm_codel.h',
+ 'sys/netpfil/ipfw/dn_aqm.h',
+ 'sys/netpfil/ipfw/dn_aqm_pie.h',
+ 'sys/netpfil/ipfw/dn_heap.h',
+ 'sys/netpfil/ipfw/dn_sched_fq_codel.h',
+ 'sys/netpfil/ipfw/dn_sched_fq_codel_helper.h',
+ 'sys/netpfil/ipfw/dn_sched.h',
+ 'sys/netpfil/ipfw/ip_dn_private.h',
+ 'sys/netpfil/ipfw/ip_fw_private.h',
+ 'sys/netpfil/ipfw/ip_fw_table.h',
+ 'sys/netpfil/ipfw/nat64/ip_fw_nat64.h',
+ 'sys/netpfil/ipfw/nat64/nat64lsn.h',
+ 'sys/netpfil/ipfw/nat64/nat64stl.h',
+ 'sys/netpfil/ipfw/nat64/nat64_translate.h',
+ 'sys/netpfil/ipfw/nptv6/nptv6.h',
]
)
mod.addKernelSpaceSourceFiles(
[
- 'sys/contrib/pf/net/if_pflog.c',
- 'sys/contrib/pf/net/if_pfsync.c',
- 'sys/contrib/pf/net/pf.c',
- 'sys/contrib/pf/net/pf_if.c',
- 'sys/contrib/pf/net/pf_ioctl.c',
- 'sys/contrib/pf/net/pf_lb.c',
- 'sys/contrib/pf/net/pf_norm.c',
- 'sys/contrib/pf/net/pf_osfp.c',
- 'sys/contrib/pf/net/pf_ruleset.c',
- 'sys/contrib/pf/net/pf_table.c',
- 'sys/contrib/pf/netinet/in4_cksum.c',
+ 'sys/netpfil/ipfw/ip_fw2.c',
+ 'sys/netpfil/ipfw/ip_fw_bpf.c',
+ 'sys/netpfil/ipfw/ip_fw_dynamic.c',
+ 'sys/netpfil/ipfw/ip_fw_eaction.c',
+ 'sys/netpfil/ipfw/ip_fw_iface.c',
+ 'sys/netpfil/ipfw/ip_fw_log.c',
+ 'sys/netpfil/ipfw/ip_fw_nat.c',
+ 'sys/netpfil/ipfw/ip_fw_pfil.c',
+ 'sys/netpfil/ipfw/ip_fw_sockopt.c',
+ 'sys/netpfil/ipfw/ip_fw_table_algo.c',
+ 'sys/netpfil/ipfw/ip_fw_table.c',
+ 'sys/netpfil/ipfw/ip_fw_table_value.c',
+ 'sys/netpfil/ipfw/nat64/ip_fw_nat64.c',
+ 'sys/netpfil/ipfw/nat64/nat64lsn.c',
+ 'sys/netpfil/ipfw/nat64/nat64lsn_control.c',
+ 'sys/netpfil/ipfw/nat64/nat64stl.c',
+ 'sys/netpfil/ipfw/nat64/nat64stl_control.c',
+ 'sys/netpfil/ipfw/nat64/nat64_translate.c',
+ 'sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c',
+ 'sys/netpfil/ipfw/nptv6/nptv6.c',
],
mm.generator['source']()
)
@@ -1775,8 +1853,13 @@ def user_space(mm):
mod = builder.Module('user_space')
mod.addUserSpaceHeaderFiles(
[
- 'contrib/pf/pfctl/pfctl.h',
- 'contrib/pf/pfctl/pfctl_parser.h',
+ 'contrib/libxo/libxo/xo_buf.h',
+ 'contrib/libxo/libxo/xo_encoder.h',
+ 'contrib/libxo/libxo/xo.h',
+ 'contrib/libxo/libxo/xo_humanize.h',
+ 'contrib/libxo/libxo/xo_wcwidth.h',
+ 'sbin/pfctl/pfctl.h',
+ 'sbin/pfctl/pfctl_parser.h',
'include/arpa/ftp.h',
'include/arpa/inet.h',
'include/arpa/nameser_compat.h',
@@ -1861,7 +1944,6 @@ def user_space(mm):
'sbin/dhclient/privsep.h',
'sbin/dhclient/tree.h',
'sbin/ifconfig/ifconfig.h',
- 'sbin/ifconfig/regdomain.h',
'usr.bin/netstat/netstat.h'
]
)
@@ -1880,7 +1962,7 @@ def user_space(mm):
mm.generator['convert'](),
mm.generator['convert'](),
mm.generator['route-keywords']()))
- mod.addFile(mm.generator['file']('contrib/pf/pfctl/parse.y',
+ mod.addFile(mm.generator['file']('sbin/pfctl/parse.y',
mm.generator['freebsd-path'](),
mm.generator['convert'](),
mm.generator['convert'](),
@@ -1918,15 +2000,8 @@ def user_space(mm):
mod.addUserSpaceSourceFiles(
[
'bin/hostname/hostname.c',
- 'contrib/pf/pfctl/pfctl_altq.c',
- 'contrib/pf/pfctl/pfctl.c',
- 'contrib/pf/pfctl/pfctl_optimize.c',
- 'contrib/pf/pfctl/pfctl_osfp.c',
- 'contrib/pf/pfctl/pfctl_parser.c',
- 'contrib/pf/pfctl/pfctl_qstats.c',
- 'contrib/pf/pfctl/pfctl_radix.c',
- 'contrib/pf/pfctl/pfctl_table.c',
- 'contrib/pf/pfctl/pf_print_state.c',
+ 'contrib/libxo/libxo/libxo.c',
+ 'contrib/libxo/libxo/xo_encoder.c',
'lib/libc/gen/err.c',
'lib/libc/gen/feature_present.c',
'lib/libc/gen/getdomainname.c',
@@ -2102,7 +2177,6 @@ def user_space(mm):
'sbin/ifconfig/af_link.c',
'sbin/ifconfig/af_nd6.c',
'sbin/ifconfig/ifbridge.c',
- 'sbin/ifconfig/ifcarp.c',
'sbin/ifconfig/ifclone.c',
'sbin/ifconfig/ifconfig.c',
'sbin/ifconfig/ifgif.c',
@@ -2113,15 +2187,27 @@ def user_space(mm):
'sbin/ifconfig/ifmedia.c',
'sbin/ifconfig/ifpfsync.c',
'sbin/ifconfig/ifvlan.c',
+ 'sbin/ifconfig/sfp.c',
+ 'sbin/pfctl/pfctl_altq.c',
+ 'sbin/pfctl/pfctl.c',
+ 'sbin/pfctl/pfctl_optimize.c',
+ 'sbin/pfctl/pfctl_osfp.c',
+ 'sbin/pfctl/pfctl_parser.c',
+ 'sbin/pfctl/pfctl_qstats.c',
+ 'sbin/pfctl/pfctl_radix.c',
+ 'sbin/pfctl/pfctl_table.c',
+ 'sbin/pfctl/pf_print_state.c',
'sbin/ping6/ping6.c',
'sbin/ping/ping.c',
'sbin/route/route.c',
'sbin/sysctl/sysctl.c',
'usr.bin/netstat/bpf.c',
+ 'usr.bin/netstat/flowtable.c',
'usr.bin/netstat/if.c',
'usr.bin/netstat/inet6.c',
'usr.bin/netstat/inet.c',
'usr.bin/netstat/ipsec.c',
+ 'usr.bin/netstat/nl_symbols.c',
'usr.bin/netstat/main.c',
'usr.bin/netstat/mbuf.c',
'usr.bin/netstat/mroute6.c',
@@ -2131,7 +2217,7 @@ def user_space(mm):
'usr.bin/netstat/sctp.c',
'usr.bin/netstat/unix.c',
],
- mm.generator['source']('-DINET6')
+ mm.generator['source'](['-DINET6', '-DINET'])
)
return mod
@@ -2683,6 +2769,7 @@ def sources(mm):
mm.addModule(crypto(mm))
mm.addModule(altq(mm))
mm.addModule(pf(mm))
+ mm.addModule(ipfw(mm))
mm.addModule(dev_net(mm))
# Add PCI
diff --git a/libbsd.txt b/libbsd.txt
index 02b03fdf..2d0a012c 100644
--- a/libbsd.txt
+++ b/libbsd.txt
@@ -776,10 +776,7 @@ detail and debug level information from the command.
== FreeBSD version of imported files and directories
-. sys/dev/dwc/*, trunk, 2015-03-26, cfc3df2b8f708ce8494d9d556e3472a5c8c21b8a
-. sys/dev/mmc/*, trunk, 2016-08-23, 9fe7c416e6abb28b1398fd3e5687099846800cfd
-. sys/dev/usb/*, trunk, 2016-08-23, 9fe7c416e6abb28b1398fd3e5687099846800cfd
-. *, stable/9, 2015-04-08, 99a648a912e81e29d9c4c159cbbe263462f2d719
+. *, trunk, 2016-08-23, 9fe7c416e6abb28b1398fd3e5687099846800cfd
== How to import code from FreeBSD
diff --git a/libbsd_waf.py b/libbsd_waf.py
index c440015f..e9b17e7f 100644
--- a/libbsd_waf.py
+++ b/libbsd_waf.py
@@ -63,8 +63,8 @@ def build(bld):
includes += ["%s" % (i[2:].replace("@CPU@", "x86"))]
includes += ["rtemsbsd/include"]
includes += ["freebsd/sys"]
- includes += ["freebsd/sys/contrib/altq"]
includes += ["freebsd/sys/contrib/pf"]
+ includes += ["freebsd/sys/net"]
includes += ["freebsd/include"]
includes += ["freebsd/lib/libc/include"]
includes += ["freebsd/lib/libc/isc/include"]
@@ -74,6 +74,7 @@ def build(bld):
includes += ["freebsd/lib/libmemstat"]
includes += ["freebsd/lib/libipsec"]
includes += ["freebsd/contrib/libpcap"]
+ includes += ["freebsd/contrib/libxo"]
includes += ["rtemsbsd/sys"]
includes += ["mDNSResponder/mDNSCore"]
includes += ["mDNSResponder/mDNSShared"]
@@ -195,17 +196,6 @@ def build(bld):
source = "freebsd/contrib/libpcap/grammar.c")
libbsd_use += ["yacc_pcap"]
if bld.env.AUTO_REGEN:
- bld(target = "freebsd/contrib/pf/pfctl/parse.c",
- source = "freebsd/contrib/pf/pfctl/parse.y",
- rule = host_shell + "${YACC} -b pfctly -d -p pfctly ${SRC} && sed -e '/YY_BUF_SIZE/s/16384/1024/' < pfctly.tab.c > ${TGT} && rm -f pfctly.tab.c && mv pfctly.tab.h freebsd/contrib/pf/pfctl/parse.h")
- bld.objects(target = "yacc_pfctly",
- features = "c",
- cflags = cflags,
- includes = [] + includes,
- defines = defines + [],
- source = "freebsd/contrib/pf/pfctl/parse.c")
- libbsd_use += ["yacc_pfctly"]
- if bld.env.AUTO_REGEN:
bld(target = "freebsd/lib/libc/net/nsparser.c",
source = "freebsd/lib/libc/net/nsparser.y",
rule = host_shell + "${YACC} -b _nsyy -d -p _nsyy ${SRC} && sed -e '/YY_BUF_SIZE/s/16384/1024/' < _nsyy.tab.c > ${TGT} && rm -f _nsyy.tab.c && mv _nsyy.tab.h freebsd/lib/libc/net/nsparser.h")
@@ -227,18 +217,22 @@ def build(bld):
defines = defines + [],
source = "freebsd/lib/libipsec/policy_parse.c")
libbsd_use += ["yacc___libipsecyy"]
+ if bld.env.AUTO_REGEN:
+ bld(target = "freebsd/sbin/pfctl/parse.c",
+ source = "freebsd/sbin/pfctl/parse.y",
+ rule = host_shell + "${YACC} -b pfctly -d -p pfctly ${SRC} && sed -e '/YY_BUF_SIZE/s/16384/1024/' < pfctly.tab.c > ${TGT} && rm -f pfctly.tab.c && mv pfctly.tab.h freebsd/sbin/pfctl/parse.h")
+ bld.objects(target = "yacc_pfctly",
+ features = "c",
+ cflags = cflags,
+ includes = [] + includes,
+ defines = defines + [],
+ source = "freebsd/sbin/pfctl/parse.c")
+ libbsd_use += ["yacc_pfctly"]
# Objects built with different CFLAGS
objs01_source = ['freebsd/bin/hostname/hostname.c',
- 'freebsd/contrib/pf/pfctl/pf_print_state.c',
- 'freebsd/contrib/pf/pfctl/pfctl.c',
- 'freebsd/contrib/pf/pfctl/pfctl_altq.c',
- 'freebsd/contrib/pf/pfctl/pfctl_optimize.c',
- 'freebsd/contrib/pf/pfctl/pfctl_osfp.c',
- 'freebsd/contrib/pf/pfctl/pfctl_parser.c',
- 'freebsd/contrib/pf/pfctl/pfctl_qstats.c',
- 'freebsd/contrib/pf/pfctl/pfctl_radix.c',
- 'freebsd/contrib/pf/pfctl/pfctl_table.c',
+ 'freebsd/contrib/libxo/libxo/libxo.c',
+ 'freebsd/contrib/libxo/libxo/xo_encoder.c',
'freebsd/lib/libc/gen/err.c',
'freebsd/lib/libc/gen/feature_present.c',
'freebsd/lib/libc/gen/getdomainname.c',
@@ -414,7 +408,6 @@ def build(bld):
'freebsd/sbin/ifconfig/af_link.c',
'freebsd/sbin/ifconfig/af_nd6.c',
'freebsd/sbin/ifconfig/ifbridge.c',
- 'freebsd/sbin/ifconfig/ifcarp.c',
'freebsd/sbin/ifconfig/ifclone.c',
'freebsd/sbin/ifconfig/ifconfig.c',
'freebsd/sbin/ifconfig/ifgif.c',
@@ -425,11 +418,22 @@ def build(bld):
'freebsd/sbin/ifconfig/ifmedia.c',
'freebsd/sbin/ifconfig/ifpfsync.c',
'freebsd/sbin/ifconfig/ifvlan.c',
+ 'freebsd/sbin/ifconfig/sfp.c',
+ 'freebsd/sbin/pfctl/pf_print_state.c',
+ 'freebsd/sbin/pfctl/pfctl.c',
+ 'freebsd/sbin/pfctl/pfctl_altq.c',
+ 'freebsd/sbin/pfctl/pfctl_optimize.c',
+ 'freebsd/sbin/pfctl/pfctl_osfp.c',
+ 'freebsd/sbin/pfctl/pfctl_parser.c',
+ 'freebsd/sbin/pfctl/pfctl_qstats.c',
+ 'freebsd/sbin/pfctl/pfctl_radix.c',
+ 'freebsd/sbin/pfctl/pfctl_table.c',
'freebsd/sbin/ping/ping.c',
'freebsd/sbin/ping6/ping6.c',
'freebsd/sbin/route/route.c',
'freebsd/sbin/sysctl/sysctl.c',
'freebsd/usr.bin/netstat/bpf.c',
+ 'freebsd/usr.bin/netstat/flowtable.c',
'freebsd/usr.bin/netstat/if.c',
'freebsd/usr.bin/netstat/inet.c',
'freebsd/usr.bin/netstat/inet6.c',
@@ -438,6 +442,7 @@ def build(bld):
'freebsd/usr.bin/netstat/mbuf.c',
'freebsd/usr.bin/netstat/mroute.c',
'freebsd/usr.bin/netstat/mroute6.c',
+ 'freebsd/usr.bin/netstat/nl_symbols.c',
'freebsd/usr.bin/netstat/pfkey.c',
'freebsd/usr.bin/netstat/route.c',
'freebsd/usr.bin/netstat/sctp.c',
@@ -446,7 +451,7 @@ def build(bld):
features = "c",
cflags = cflags,
includes = [] + includes,
- defines = defines + ['INET6'],
+ defines = defines + ['INET', 'INET6'],
source = objs01_source)
libbsd_use += ["objs01"]
@@ -696,25 +701,6 @@ def build(bld):
source = ['freebsd/sys/arm/xilinx/zy7_slcr.c',
'freebsd/sys/cam/cam.c',
'freebsd/sys/cam/scsi/scsi_all.c',
- 'freebsd/sys/contrib/altq/altq/altq_cbq.c',
- 'freebsd/sys/contrib/altq/altq/altq_cdnr.c',
- 'freebsd/sys/contrib/altq/altq/altq_hfsc.c',
- 'freebsd/sys/contrib/altq/altq/altq_priq.c',
- 'freebsd/sys/contrib/altq/altq/altq_red.c',
- 'freebsd/sys/contrib/altq/altq/altq_rio.c',
- 'freebsd/sys/contrib/altq/altq/altq_rmclass.c',
- 'freebsd/sys/contrib/altq/altq/altq_subr.c',
- 'freebsd/sys/contrib/pf/net/if_pflog.c',
- 'freebsd/sys/contrib/pf/net/if_pfsync.c',
- 'freebsd/sys/contrib/pf/net/pf.c',
- 'freebsd/sys/contrib/pf/net/pf_if.c',
- 'freebsd/sys/contrib/pf/net/pf_ioctl.c',
- 'freebsd/sys/contrib/pf/net/pf_lb.c',
- 'freebsd/sys/contrib/pf/net/pf_norm.c',
- 'freebsd/sys/contrib/pf/net/pf_osfp.c',
- 'freebsd/sys/contrib/pf/net/pf_ruleset.c',
- 'freebsd/sys/contrib/pf/net/pf_table.c',
- 'freebsd/sys/contrib/pf/netinet/in4_cksum.c',
'freebsd/sys/crypto/blowfish/bf_ecb.c',
'freebsd/sys/crypto/blowfish/bf_enc.c',
'freebsd/sys/crypto/blowfish/bf_skey.c',
@@ -728,7 +714,11 @@ def build(bld):
'freebsd/sys/crypto/rijndael/rijndael-api-fst.c',
'freebsd/sys/crypto/rijndael/rijndael-api.c',
'freebsd/sys/crypto/sha1.c',
- 'freebsd/sys/crypto/sha2/sha2.c',
+ 'freebsd/sys/crypto/sha2/sha256c.c',
+ 'freebsd/sys/crypto/sha2/sha512c.c',
+ 'freebsd/sys/crypto/siphash/siphash.c',
+ 'freebsd/sys/crypto/skein/skein.c',
+ 'freebsd/sys/crypto/skein/skein_block.c',
'freebsd/sys/dev/bce/if_bce.c',
'freebsd/sys/dev/bfe/if_bfe.c',
'freebsd/sys/dev/bge/if_bge.c',
@@ -774,7 +764,6 @@ def build(bld):
'freebsd/sys/dev/pci/pci.c',
'freebsd/sys/dev/pci/pci_pci.c',
'freebsd/sys/dev/pci/pci_user.c',
- 'freebsd/sys/dev/random/harvest.c',
'freebsd/sys/dev/re/if_re.c',
'freebsd/sys/dev/smc/if_smc.c',
'freebsd/sys/dev/tsec/if_tsec.c',
@@ -820,14 +809,17 @@ def build(bld):
'freebsd/sys/kern/kern_sysctl.c',
'freebsd/sys/kern/kern_time.c',
'freebsd/sys/kern/kern_timeout.c',
+ 'freebsd/sys/kern/kern_uuid.c',
'freebsd/sys/kern/subr_bufring.c',
'freebsd/sys/kern/subr_bus.c',
+ 'freebsd/sys/kern/subr_counter.c',
'freebsd/sys/kern/subr_eventhandler.c',
'freebsd/sys/kern/subr_hash.c',
'freebsd/sys/kern/subr_hints.c',
'freebsd/sys/kern/subr_kobj.c',
'freebsd/sys/kern/subr_lock.c',
'freebsd/sys/kern/subr_module.c',
+ 'freebsd/sys/kern/subr_pcpu.c',
'freebsd/sys/kern/subr_prf.c',
'freebsd/sys/kern/subr_rman.c',
'freebsd/sys/kern/subr_sbuf.c',
@@ -842,15 +834,26 @@ def build(bld):
'freebsd/sys/kern/uipc_domain.c',
'freebsd/sys/kern/uipc_mbuf.c',
'freebsd/sys/kern/uipc_mbuf2.c',
+ 'freebsd/sys/kern/uipc_mbufhash.c',
'freebsd/sys/kern/uipc_sockbuf.c',
'freebsd/sys/kern/uipc_socket.c',
'freebsd/sys/kern/uipc_syscalls.c',
'freebsd/sys/kern/uipc_usrreq.c',
- 'freebsd/sys/libkern/arc4random.c',
'freebsd/sys/libkern/bcd.c',
- 'freebsd/sys/libkern/fls.c',
'freebsd/sys/libkern/inet_ntoa.c',
+ 'freebsd/sys/libkern/jenkins_hash.c',
+ 'freebsd/sys/libkern/murmur3_32.c',
'freebsd/sys/libkern/random.c',
+ 'freebsd/sys/net/altq/altq_cbq.c',
+ 'freebsd/sys/net/altq/altq_cdnr.c',
+ 'freebsd/sys/net/altq/altq_codel.c',
+ 'freebsd/sys/net/altq/altq_fairq.c',
+ 'freebsd/sys/net/altq/altq_hfsc.c',
+ 'freebsd/sys/net/altq/altq_priq.c',
+ 'freebsd/sys/net/altq/altq_red.c',
+ 'freebsd/sys/net/altq/altq_rio.c',
+ 'freebsd/sys/net/altq/altq_rmclass.c',
+ 'freebsd/sys/net/altq/altq_subr.c',
'freebsd/sys/net/bpf.c',
'freebsd/sys/net/bpf_buffer.c',
'freebsd/sys/net/bpf_filter.c',
@@ -865,11 +868,9 @@ def build(bld):
'freebsd/sys/net/if_dead.c',
'freebsd/sys/net/if_disc.c',
'freebsd/sys/net/if_edsc.c',
- 'freebsd/sys/net/if_ef.c',
'freebsd/sys/net/if_enc.c',
'freebsd/sys/net/if_epair.c',
'freebsd/sys/net/if_ethersubr.c',
- 'freebsd/sys/net/if_faith.c',
'freebsd/sys/net/if_fddisubr.c',
'freebsd/sys/net/if_fwsubr.c',
'freebsd/sys/net/if_gif.c',
@@ -904,6 +905,7 @@ def build(bld):
'freebsd/sys/netinet/if_ether.c',
'freebsd/sys/netinet/igmp.c',
'freebsd/sys/netinet/in.c',
+ 'freebsd/sys/netinet/in_fib.c',
'freebsd/sys/netinet/in_gif.c',
'freebsd/sys/netinet/in_mcast.c',
'freebsd/sys/netinet/in_pcb.c',
@@ -921,6 +923,7 @@ def build(bld):
'freebsd/sys/netinet/ip_mroute.c',
'freebsd/sys/netinet/ip_options.c',
'freebsd/sys/netinet/ip_output.c',
+ 'freebsd/sys/netinet/ip_reass.c',
'freebsd/sys/netinet/libalias/alias.c',
'freebsd/sys/netinet/libalias/alias_cuseeme.c',
'freebsd/sys/netinet/libalias/alias_db.c',
@@ -969,6 +972,7 @@ def build(bld):
'freebsd/sys/netinet6/icmp6.c',
'freebsd/sys/netinet6/in6.c',
'freebsd/sys/netinet6/in6_cksum.c',
+ 'freebsd/sys/netinet6/in6_fib.c',
'freebsd/sys/netinet6/in6_gif.c',
'freebsd/sys/netinet6/in6_ifattach.c',
'freebsd/sys/netinet6/in6_mcast.c',
@@ -990,26 +994,44 @@ def build(bld):
'freebsd/sys/netinet6/scope6.c',
'freebsd/sys/netinet6/sctp6_usrreq.c',
'freebsd/sys/netinet6/udp6_usrreq.c',
- 'freebsd/sys/netpfil/ipfw/dn_heap.c',
- 'freebsd/sys/netpfil/ipfw/dn_sched_fifo.c',
- 'freebsd/sys/netpfil/ipfw/dn_sched_prio.c',
- 'freebsd/sys/netpfil/ipfw/dn_sched_qfq.c',
- 'freebsd/sys/netpfil/ipfw/dn_sched_rr.c',
- 'freebsd/sys/netpfil/ipfw/dn_sched_wf2q.c',
- 'freebsd/sys/netpfil/ipfw/ip_dn_glue.c',
- 'freebsd/sys/netpfil/ipfw/ip_dn_io.c',
- 'freebsd/sys/netpfil/ipfw/ip_dummynet.c',
'freebsd/sys/netpfil/ipfw/ip_fw2.c',
+ 'freebsd/sys/netpfil/ipfw/ip_fw_bpf.c',
+ 'freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c',
+ 'freebsd/sys/netpfil/ipfw/ip_fw_eaction.c',
+ 'freebsd/sys/netpfil/ipfw/ip_fw_iface.c',
'freebsd/sys/netpfil/ipfw/ip_fw_log.c',
'freebsd/sys/netpfil/ipfw/ip_fw_nat.c',
'freebsd/sys/netpfil/ipfw/ip_fw_pfil.c',
'freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c',
'freebsd/sys/netpfil/ipfw/ip_fw_table.c',
+ 'freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c',
+ 'freebsd/sys/netpfil/ipfw/ip_fw_table_value.c',
+ 'freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c',
+ 'freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c',
+ 'freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c',
+ 'freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c',
+ 'freebsd/sys/netpfil/ipfw/nat64/nat64stl.c',
+ 'freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c',
+ 'freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c',
+ 'freebsd/sys/netpfil/ipfw/nptv6/nptv6.c',
+ 'freebsd/sys/netpfil/pf/if_pflog.c',
+ 'freebsd/sys/netpfil/pf/if_pfsync.c',
+ 'freebsd/sys/netpfil/pf/in4_cksum.c',
+ 'freebsd/sys/netpfil/pf/pf.c',
+ 'freebsd/sys/netpfil/pf/pf_if.c',
+ 'freebsd/sys/netpfil/pf/pf_ioctl.c',
+ 'freebsd/sys/netpfil/pf/pf_lb.c',
+ 'freebsd/sys/netpfil/pf/pf_norm.c',
+ 'freebsd/sys/netpfil/pf/pf_osfp.c',
+ 'freebsd/sys/netpfil/pf/pf_ruleset.c',
+ 'freebsd/sys/netpfil/pf/pf_table.c',
'freebsd/sys/opencrypto/cast.c',
'freebsd/sys/opencrypto/criov.c',
'freebsd/sys/opencrypto/crypto.c',
+ 'freebsd/sys/opencrypto/cryptodeflate.c',
'freebsd/sys/opencrypto/cryptosoft.c',
- 'freebsd/sys/opencrypto/deflate.c',
+ 'freebsd/sys/opencrypto/gfmult.c',
+ 'freebsd/sys/opencrypto/gmac.c',
'freebsd/sys/opencrypto/rmd160.c',
'freebsd/sys/opencrypto/skipjack.c',
'freebsd/sys/opencrypto/xform.c',
@@ -1034,6 +1056,8 @@ def build(bld):
'rtemsbsd/local/bus_if.c',
'rtemsbsd/local/cryptodev_if.c',
'rtemsbsd/local/device_if.c',
+ 'rtemsbsd/local/gpio_if.c',
+ 'rtemsbsd/local/if_dwc_if.c',
'rtemsbsd/local/miibus_if.c',
'rtemsbsd/local/mmcbr_if.c',
'rtemsbsd/local/mmcbus_if.c',
@@ -1193,7 +1217,6 @@ def build(bld):
header_paths = [('rtemsbsd/include', '*.h', ''),
('rtemsbsd/mghttpd', 'mongoose.h', 'mghttpd'),
('freebsd/include', '*.h', ''),
- ('freebsd/sys/contrib/altq/altq', '*.h', 'altq'),
('freebsd/sys/bsm', '*.h', 'bsm'),
('freebsd/sys/cam', '*.h', 'cam'),
('freebsd/sys/net', '*.h', 'net'),
diff --git a/rtemsbsd/include/bsp/nexus-devices.h b/rtemsbsd/include/bsp/nexus-devices.h
index 39d49df4..0b4ec62b 100644
--- a/rtemsbsd/include/bsp/nexus-devices.h
+++ b/rtemsbsd/include/bsp/nexus-devices.h
@@ -54,7 +54,7 @@ RTEMS_BSD_DRIVER_FEC;
#include <bsp/irq.h>
-RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR0;
+RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR;
RTEMS_BSD_DRIVER_XILINX_ZYNQ_CGEM0(ZYNQ_IRQ_ETHERNET_0);
RTEMS_BSD_DRIVER_E1000PHY;
diff --git a/rtemsbsd/include/ddb/ddb.h b/rtemsbsd/include/ddb/ddb.h
index 936ffd88..db4c7a35 100644
--- a/rtemsbsd/include/ddb/ddb.h
+++ b/rtemsbsd/include/ddb/ddb.h
@@ -1 +1 @@
-/* EMPTY */
+#include <sys/kernel.h>
diff --git a/rtemsbsd/include/machine/counter.h b/rtemsbsd/include/machine/counter.h
new file mode 100644
index 00000000..2e38c00c
--- /dev/null
+++ b/rtemsbsd/include/machine/counter.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016 embedded brains GmbH. All rights reserved.
+ *
+ * embedded brains GmbH
+ * Dornierstr. 4
+ * 82178 Puchheim
+ * Germany
+ * <rtems@embedded-brains.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __MACHINE_COUNTER_H__
+#define __MACHINE_COUNTER_H__
+
+#include <sys/pcpu.h>
+
+#include <rtems/score/isrlevel.h>
+
+#ifdef IN_SUBR_COUNTER_C
+
+static inline uint64_t
+counter_u64_read_one(uint64_t *p, int cpu)
+{
+
+ return (*((uint64_t *)((char *)p + sizeof(struct pcpu) * cpu)));
+}
+
+static inline uint64_t
+counter_u64_fetch_inline(uint64_t *p)
+{
+ uint64_t r;
+ uint32_t cpu;
+
+ r = 0;
+ for (cpu = 0; cpu < _SMP_Get_processor_count(); ++cpu) {
+ r += counter_u64_read_one((uint64_t *)p, cpu);
+ }
+
+ return (r);
+}
+
+static inline void
+counter_u64_zero_inline(counter_u64_t c)
+{
+ uint32_t cpu;
+
+ for (cpu = 0; cpu < _SMP_Get_processor_count(); ++cpu) {
+ *((uint64_t *)((char *)c + sizeof(struct pcpu) * cpu)) = 0;
+ }
+}
+#endif
+
+static inline void
+counter_u64_add_protected(counter_u64_t c, int64_t inc)
+{
+
+ *(uint64_t *)zpcpu_get(c) += inc;
+}
+
+static inline void
+counter_u64_add(counter_u64_t c, int64_t inc)
+{
+ ISR_Level level;
+
+ _ISR_Local_disable(level);
+ counter_u64_add_protected(c, inc);
+ _ISR_Local_enable(level);
+}
+
+#endif /* ! __MACHINE_COUNTER_H__ */
diff --git a/rtemsbsd/include/machine/pcpu.h b/rtemsbsd/include/machine/pcpu.h
index b09f0942..f690b097 100644
--- a/rtemsbsd/include/machine/pcpu.h
+++ b/rtemsbsd/include/machine/pcpu.h
@@ -40,6 +40,8 @@
#ifndef _RTEMS_BSD_MACHINE_PCPU_H_
#define _RTEMS_BSD_MACHINE_PCPU_H_
+#include <rtems/score/smp.h>
+
struct thread;
struct thread *
diff --git a/rtemsbsd/include/machine/rtems-bsd-kernel-namespace.h b/rtemsbsd/include/machine/rtems-bsd-kernel-namespace.h
index 059ca1c2..a5642ef7 100644
--- a/rtemsbsd/include/machine/rtems-bsd-kernel-namespace.h
+++ b/rtemsbsd/include/machine/rtems-bsd-kernel-namespace.h
@@ -2,13 +2,6 @@
#error "the header file <machine/rtems-bsd-kernel-space.h> must be included first"
#endif
-#define aarp_clean _bsd_aarp_clean
-#define aarpintr _bsd_aarpintr
-#define aarp_org_code _bsd_aarp_org_code
-#define aarpprobe _bsd_aarpprobe
-#define aarpresolve _bsd_aarpresolve
-#define aarptab_mtx _bsd_aarptab_mtx
-#define aarptnew _bsd_aarptnew
#define accept_filt_add _bsd_accept_filt_add
#define accept_filt_del _bsd_accept_filt_del
#define accept_filt_generic_mod_event _bsd_accept_filt_generic_mod_event
@@ -17,8 +10,18 @@
#define AddFragmentPtrLink _bsd_AddFragmentPtrLink
#define AddLink _bsd_AddLink
#define AddPptp _bsd_AddPptp
+#define addr_hash _bsd_addr_hash
+#define addr_kfib _bsd_addr_kfib
+#define addr_radix _bsd_addr_radix
#define addrsel_policy_init _bsd_addrsel_policy_init
#define AddSeq _bsd_AddSeq
+#define add_table_entry _bsd_add_table_entry
+#define add_toperation_state _bsd_add_toperation_state
+#define AES_GMAC_Final _bsd_AES_GMAC_Final
+#define AES_GMAC_Init _bsd_AES_GMAC_Init
+#define AES_GMAC_Reinit _bsd_AES_GMAC_Reinit
+#define AES_GMAC_Setkey _bsd_AES_GMAC_Setkey
+#define AES_GMAC_Update _bsd_AES_GMAC_Update
#define aio_swake _bsd_aio_swake
#define AliasSctpInit _bsd_AliasSctpInit
#define AliasSctpTerm _bsd_AliasSctpTerm
@@ -39,8 +42,6 @@
#define altq_remove _bsd_altq_remove
#define altq_remove_queue _bsd_altq_remove_queue
#define altqs_inactive_open _bsd_altqs_inactive_open
-#define arc4rand _bsd_arc4rand
-#define arc4rand_iniseed_state _bsd_arc4rand_iniseed_state
#define arcbroadcastaddr _bsd_arcbroadcastaddr
#define arc_frag_init _bsd_arc_frag_init
#define arc_frag_next _bsd_arc_frag_next
@@ -51,22 +52,12 @@
#define arc_isphds _bsd_arc_isphds
#define arc_output _bsd_arc_output
#define arc_storelladdr _bsd_arc_storelladdr
+#define arp_announce_ifaddr _bsd_arp_announce_ifaddr
#define arp_ifinit _bsd_arp_ifinit
-#define arp_ifinit2 _bsd_arp_ifinit2
-#define arp_ifscrub _bsd_arp_ifscrub
#define arprequest _bsd_arprequest
#define arpresolve _bsd_arpresolve
+#define arpresolve_addr _bsd_arpresolve_addr
#define arpstat _bsd_arpstat
-#define at1intr _bsd_at1intr
-#define at2intr _bsd_at2intr
-#define at_broadcast _bsd_at_broadcast
-#define at_cksum _bsd_at_cksum
-#define at_control _bsd_at_control
-#define at_ifaddrhead _bsd_at_ifaddrhead
-#define at_ifaddr_rw _bsd_at_ifaddr_rw
-#define at_ifawithnet _bsd_at_ifawithnet
-#define at_ifawithnet_locked _bsd_at_ifawithnet_locked
-#define at_inithead _bsd_at_inithead
#define atm_event _bsd_atm_event
#define atm_getvccs _bsd_atm_getvccs
#define atm_harp_attach_p _bsd_atm_harp_attach_p
@@ -79,14 +70,6 @@
#define atm_output _bsd_atm_output
#define atmresolve _bsd_atmresolve
#define atm_rtrequest _bsd_atm_rtrequest
-#define at_org_code _bsd_at_org_code
-#define at_pcballoc _bsd_at_pcballoc
-#define at_pcbconnect _bsd_at_pcbconnect
-#define at_pcbdetach _bsd_at_pcbdetach
-#define at_pcbdisconnect _bsd_at_pcbdisconnect
-#define at_pcbsetaddr _bsd_at_pcbsetaddr
-#define at_sockaddr _bsd_at_sockaddr
-#define attach_dll _bsd_attach_dll
#define auth_hash_hmac_md5 _bsd_auth_hash_hmac_md5
#define auth_hash_hmac_ripemd_160 _bsd_auth_hash_hmac_ripemd_160
#define auth_hash_hmac_sha1 _bsd_auth_hash_hmac_sha1
@@ -95,6 +78,9 @@
#define auth_hash_hmac_sha2_512 _bsd_auth_hash_hmac_sha2_512
#define auth_hash_key_md5 _bsd_auth_hash_key_md5
#define auth_hash_key_sha1 _bsd_auth_hash_key_sha1
+#define auth_hash_nist_gmac_aes_128 _bsd_auth_hash_nist_gmac_aes_128
+#define auth_hash_nist_gmac_aes_192 _bsd_auth_hash_nist_gmac_aes_192
+#define auth_hash_nist_gmac_aes_256 _bsd_auth_hash_nist_gmac_aes_256
#define auth_hash_null _bsd_auth_hash_null
#define autoinc_step _bsd_autoinc_step
#define badport_bandlim _bsd_badport_bandlim
@@ -325,6 +311,8 @@
#define bce_xi90_rv2p_proc2 _bsd_bce_xi90_rv2p_proc2
#define bce_xi_rv2p_proc1 _bsd_bce_xi_rv2p_proc1
#define bce_xi_rv2p_proc2 _bsd_bce_xi_rv2p_proc2
+#define be_uuid_dec _bsd_be_uuid_dec
+#define be_uuid_enc _bsd_be_uuid_enc
#define BF_decrypt _bsd_BF_decrypt
#define BF_ecb_encrypt _bsd_BF_ecb_encrypt
#define BF_encrypt _bsd_BF_encrypt
@@ -346,15 +334,11 @@
#define bpf_mtap _bsd_bpf_mtap
#define bpf_mtap2 _bsd_bpf_mtap2
#define bpf_tap _bsd_bpf_tap
-#define bridge_cloner _bsd_bridge_cloner
-#define bridge_cloner_data _bsd_bridge_cloner_data
#define bridge_control_table _bsd_bridge_control_table
#define bridge_control_table_size _bsd_bridge_control_table_size
-#define bridge_detach_cookie _bsd_bridge_detach_cookie
#define bridge_dn_p _bsd_bridge_dn_p
#define bridge_input_p _bsd_bridge_input_p
#define bridge_linkstate_p _bsd_bridge_linkstate_p
-#define bridge_list _bsd_bridge_list
#define bridge_output_p _bsd_bridge_output_p
#define bridge_rtable_prune_period _bsd_bridge_rtable_prune_period
#define bridge_rtnode_zone _bsd_bridge_rtnode_zone
@@ -382,6 +366,8 @@
#define bstp_set_protocol _bsd_bstp_set_protocol
#define bstp_set_ptp _bsd_bstp_set_ptp
#define bstp_stop _bsd_bstp_stop
+#define bt_tickthreshold _bsd_bt_tickthreshold
+#define bt_timethreshold _bsd_bt_timethreshold
#define bucket_zones _bsd_bucket_zones
#define buf_ring_alloc _bsd_buf_ring_alloc
#define buf_ring_free _bsd_buf_ring_free
@@ -425,14 +411,19 @@
#define bus_generic_describe_intr _bsd_bus_generic_describe_intr
#define bus_generic_detach _bsd_bus_generic_detach
#define bus_generic_driver_added _bsd_bus_generic_driver_added
+#define bus_generic_get_bus_tag _bsd_bus_generic_get_bus_tag
+#define bus_generic_get_cpus _bsd_bus_generic_get_cpus
#define bus_generic_get_dma_tag _bsd_bus_generic_get_dma_tag
+#define bus_generic_get_domain _bsd_bus_generic_get_domain
#define bus_generic_get_resource_list _bsd_bus_generic_get_resource_list
+#define bus_generic_map_resource _bsd_bus_generic_map_resource
#define bus_generic_new_pass _bsd_bus_generic_new_pass
#define bus_generic_print_child _bsd_bus_generic_print_child
#define bus_generic_probe _bsd_bus_generic_probe
#define bus_generic_read_ivar _bsd_bus_generic_read_ivar
#define bus_generic_release_resource _bsd_bus_generic_release_resource
#define bus_generic_resume _bsd_bus_generic_resume
+#define bus_generic_resume_child _bsd_bus_generic_resume_child
#define bus_generic_rl_alloc_resource _bsd_bus_generic_rl_alloc_resource
#define bus_generic_rl_delete_resource _bsd_bus_generic_rl_delete_resource
#define bus_generic_rl_get_resource _bsd_bus_generic_rl_get_resource
@@ -441,12 +432,20 @@
#define bus_generic_setup_intr _bsd_bus_generic_setup_intr
#define bus_generic_shutdown _bsd_bus_generic_shutdown
#define bus_generic_suspend _bsd_bus_generic_suspend
+#define bus_generic_suspend_child _bsd_bus_generic_suspend_child
#define bus_generic_teardown_intr _bsd_bus_generic_teardown_intr
+#define bus_generic_unmap_resource _bsd_bus_generic_unmap_resource
#define bus_generic_write_ivar _bsd_bus_generic_write_ivar
+#define bus_get_bus_tag _bsd_bus_get_bus_tag
+#define bus_get_cpus _bsd_bus_get_cpus
#define bus_get_dma_tag _bsd_bus_get_dma_tag
+#define bus_get_domain _bsd_bus_get_domain
#define bus_get_resource _bsd_bus_get_resource
#define bus_get_resource_count _bsd_bus_get_resource_count
#define bus_get_resource_start _bsd_bus_get_resource_start
+#define bus_map_resource _bsd_bus_map_resource
+#define bus_null_rescan _bsd_bus_null_rescan
+#define bus_print_child_domain _bsd_bus_print_child_domain
#define bus_print_child_footer _bsd_bus_print_child_footer
#define bus_print_child_header _bsd_bus_print_child_header
#define bus_release_resource _bsd_bus_release_resource
@@ -455,17 +454,16 @@
#define bus_set_resource _bsd_bus_set_resource
#define bus_setup_intr _bsd_bus_setup_intr
#define bus_teardown_intr _bsd_bus_teardown_intr
+#define bus_unmap_resource _bsd_bus_unmap_resource
#define callout_handle_init _bsd_callout_handle_init
#define callout_init _bsd_callout_init
#define _callout_init_lock _bsd__callout_init_lock
-#define callout_new_inserted _bsd_callout_new_inserted
-#define callout_reset_on _bsd_callout_reset_on
+#define callout_process _bsd_callout_process
+#define callout_reset_sbt_on _bsd_callout_reset_sbt_on
#define callout_schedule _bsd_callout_schedule
#define callout_schedule_on _bsd_callout_schedule_on
#define _callout_stop_safe _bsd__callout_stop_safe
-#define callout_tick _bsd_callout_tick
-#define callout_tickstofirst _bsd_callout_tickstofirst
-#define callwheelbits _bsd_callwheelbits
+#define callout_when _bsd_callout_when
#define callwheelmask _bsd_callwheelmask
#define callwheelsize _bsd_callwheelsize
#define camellia_decrypt _bsd_camellia_decrypt
@@ -491,48 +489,61 @@
#define cam_status_table _bsd_cam_status_table
#define cam_strmatch _bsd_cam_strmatch
#define cam_strvis _bsd_cam_strvis
+#define cam_strvis_sbuf _bsd_cam_strvis_sbuf
#define carp6_input _bsd_carp6_input
-#define carp_carpdev_state _bsd_carp_carpdev_state
-#define carp_cloner _bsd_carp_cloner
-#define carp_cloner_data _bsd_carp_cloner_data
+#define carp_attach _bsd_carp_attach
+#define carp_attach_p _bsd_carp_attach_p
+#define carp_demote_adj_p _bsd_carp_demote_adj_p
+#define carp_detach _bsd_carp_detach
+#define carp_detach_p _bsd_carp_detach_p
#define carp_forus _bsd_carp_forus
#define carp_forus_p _bsd_carp_forus_p
+#define carp_get_vhid_p _bsd_carp_get_vhid_p
#define carp_iamatch _bsd_carp_iamatch
#define carp_iamatch6 _bsd_carp_iamatch6
#define carp_iamatch6_p _bsd_carp_iamatch6_p
#define carp_iamatch_p _bsd_carp_iamatch_p
#define carp_input _bsd_carp_input
+#define carp_ioctl _bsd_carp_ioctl
+#define carp_ioctl_p _bsd_carp_ioctl_p
#define carp_linkstate_p _bsd_carp_linkstate_p
#define carp_macmatch6 _bsd_carp_macmatch6
#define carp_macmatch6_p _bsd_carp_macmatch6_p
-#define carp_opts _bsd_carp_opts
+#define carp_master _bsd_carp_master
+#define carp_master_p _bsd_carp_master_p
#define carp_output _bsd_carp_output
#define carp_output_p _bsd_carp_output_p
#define carpstats _bsd_carpstats
-#define carp_suppress_preempt _bsd_carp_suppress_preempt
#define cast_decrypt _bsd_cast_decrypt
#define cast_encrypt _bsd_cast_encrypt
#define cast_setkey _bsd_cast_setkey
+#define cc_ack_received _bsd_cc_ack_received
#define cc_cong_signal _bsd_cc_cong_signal
+#define cc_conn_init _bsd_cc_conn_init
#define cc_cpu _bsd_cc_cpu
#define cc_deregister_algo _bsd_cc_deregister_algo
#define cc_list _bsd_cc_list
#define cc_list_lock _bsd_cc_list_lock
#define cc_modevent _bsd_cc_modevent
+#define cc_post_recovery _bsd_cc_post_recovery
#define cc_register_algo _bsd_cc_register_algo
#define cgem_set_ref_clk _bsd_cgem_set_ref_clk
+#define classify_opcode_kidx _bsd_classify_opcode_kidx
#define clean_unrhdr _bsd_clean_unrhdr
#define clean_unrhdrl _bsd_clean_unrhdrl
-#define cleanup_pf_zone _bsd_cleanup_pf_zone
#define ClearCheckNewLink _bsd_ClearCheckNewLink
#define clk_intr_event _bsd_clk_intr_event
#define comp_algo_deflate _bsd_comp_algo_deflate
+#define compare_ifidx _bsd_compare_ifidx
+#define compare_numarray _bsd_compare_numarray
#define config_intrhook_disestablish _bsd_config_intrhook_disestablish
#define config_intrhook_establish _bsd_config_intrhook_establish
-#define convert_rule_to_7 _bsd_convert_rule_to_7
-#define convert_rule_to_8 _bsd_convert_rule_to_8
-#define copy_data_helper_compat _bsd_copy_data_helper_compat
#define copyiniov _bsd_copyiniov
+#define counter_u64_alloc _bsd_counter_u64_alloc
+#define counter_u64_fetch _bsd_counter_u64_fetch
+#define counter_u64_free _bsd_counter_u64_free
+#define counter_u64_zero _bsd_counter_u64_zero
+#define create_objects_compat _bsd_create_objects_compat
#define crypto_apply _bsd_crypto_apply
#define crypto_copyback _bsd_crypto_copyback
#define crypto_copydata _bsd_crypto_copydata
@@ -550,6 +561,7 @@
#define crypto_kdispatch _bsd_crypto_kdispatch
#define crypto_kdone _bsd_crypto_kdone
#define crypto_kregister _bsd_crypto_kregister
+#define crypto_mbuftoiov _bsd_crypto_mbuftoiov
#define crypto_modevent _bsd_crypto_modevent
#define crypto_newsession _bsd_crypto_newsession
#define crypto_register _bsd_crypto_register
@@ -557,6 +569,8 @@
#define crypto_unregister _bsd_crypto_unregister
#define crypto_unregister_all _bsd_crypto_unregister_all
#define crypto_userasymcrypto _bsd_crypto_userasymcrypto
+#define ctl3_lock _bsd_ctl3_lock
+#define ctl3_rewriters _bsd_ctl3_rewriters
#define cuio_apply _bsd_cuio_apply
#define cuio_copyback _bsd_cuio_copyback
#define cuio_copydata _bsd_cuio_copydata
@@ -565,31 +579,28 @@
#define cv_destroy _bsd_cv_destroy
#define cv_init _bsd_cv_init
#define cv_signal _bsd_cv_signal
-#define _cv_timedwait _bsd__cv_timedwait
+#define _cv_timedwait_sbt _bsd__cv_timedwait_sbt
#define _cv_wait _bsd__cv_wait
#define _cv_wait_sig _bsd__cv_wait_sig
#define _cv_wait_unlock _bsd__cv_wait_unlock
-#define dad_ignore_ns _bsd_dad_ignore_ns
-#define dad_init _bsd_dad_init
-#define dad_maxtry _bsd_dad_maxtry
-#define ddpcb_list _bsd_ddpcb_list
-#define ddp_cksum _bsd_ddp_cksum
-#define ddp_init _bsd_ddp_init
-#define ddp_list_mtx _bsd_ddp_list_mtx
-#define ddp_output _bsd_ddp_output
-#define ddp_route _bsd_ddp_route
-#define ddp_search _bsd_ddp_search
-#define ddp_usrreqs _bsd_ddp_usrreqs
-#define debug_pfugidhack _bsd_debug_pfugidhack
+#define deembed_scopeid _bsd_deembed_scopeid
#define default_cc_ptr _bsd_default_cc_ptr
-#define defer _bsd_defer
+#define default_eaction_typename _bsd_default_eaction_typename
#define deflate_global _bsd_deflate_global
+#define defrouter_del _bsd_defrouter_del
#define defrouter_lookup _bsd_defrouter_lookup
+#define defrouter_lookup_locked _bsd_defrouter_lookup_locked
+#define defrouter_ref _bsd_defrouter_ref
+#define defrouter_rele _bsd_defrouter_rele
+#define defrouter_remove _bsd_defrouter_remove
#define defrouter_reset _bsd_defrouter_reset
#define defrouter_select _bsd_defrouter_select
-#define defrtrlist_del _bsd_defrtrlist_del
+#define defrouter_unlink _bsd_defrouter_unlink
#define DELAY _bsd_DELAY
#define delete_unrhdr _bsd_delete_unrhdr
+#define del_table_entry _bsd_del_table_entry
+#define del_toperation_state _bsd_del_toperation_state
+#define deregister_tcp_functions _bsd_deregister_tcp_functions
#define des_check_key _bsd_des_check_key
#define des_check_key_parity _bsd_des_check_key_parity
#define des_decrypt3 _bsd_des_decrypt3
@@ -609,8 +620,6 @@
#define des_SPtrans _bsd_des_SPtrans
#define dest6_input _bsd_dest6_input
#define destroy_dev _bsd_destroy_dev
-#define detach_dll _bsd_detach_dll
-#define detach_handler _bsd_detach_handler
#define devclass_add_driver _bsd_devclass_add_driver
#define devclass_create _bsd_devclass_create
#define devclass_delete_driver _bsd_devclass_delete_driver
@@ -661,6 +670,7 @@
#define device_is_attached _bsd_device_is_attached
#define device_is_enabled _bsd_device_is_enabled
#define device_is_quiet _bsd_device_is_quiet
+#define device_is_suspended _bsd_device_is_suspended
#define device_printf _bsd_device_printf
#define device_print_prettyname _bsd_device_print_prettyname
#define device_probe _bsd_device_probe
@@ -671,6 +681,7 @@
#define device_set_desc _bsd_device_set_desc
#define device_set_desc_copy _bsd_device_set_desc_copy
#define device_set_devclass _bsd_device_set_devclass
+#define device_set_devclass_fixed _bsd_device_set_devclass_fixed
#define device_set_driver _bsd_device_set_driver
#define device_set_flags _bsd_device_set_flags
#define device_set_ivars _bsd_device_set_ivars
@@ -680,43 +691,28 @@
#define device_shutdown _bsd_device_shutdown
#define device_unbusy _bsd_device_unbusy
#define device_verbose _bsd_device_verbose
+#define dev_lock _bsd_dev_lock
+#define devmtx _bsd_devmtx
+#define dev_refl _bsd_dev_refl
+#define dev_refthread _bsd_dev_refthread
+#define dev_relthread _bsd_dev_relthread
+#define dev_unlock _bsd_dev_unlock
#define DifferentialChecksum _bsd_DifferentialChecksum
-#define disc_cloner _bsd_disc_cloner
-#define disc_cloner_data _bsd_disc_cloner_data
#define div_protosw _bsd_div_protosw
#define div_usrreqs _bsd_div_usrreqs
-#define dll_chain _bsd_dll_chain
-#define dn_c_copy_fs _bsd_dn_c_copy_fs
-#define dn_c_copy_pipe _bsd_dn_c_copy_pipe
-#define dn_c_copy_q _bsd_dn_c_copy_q
-#define dn_cfg _bsd_dn_cfg
-#define dn_compat_calc_size _bsd_dn_compat_calc_size
-#define dn_compat_copy_pipe _bsd_dn_compat_copy_pipe
-#define dn_compat_copy_queue _bsd_dn_compat_copy_queue
-#define dn_drain_queue _bsd_dn_drain_queue
-#define dn_drain_scheduler _bsd_dn_drain_scheduler
-#define dn_enqueue _bsd_dn_enqueue
-#define dn_free_pkts _bsd_dn_free_pkts
-#define dn_ht_entries _bsd_dn_ht_entries
-#define dn_ht_find _bsd_dn_ht_find
-#define dn_ht_free _bsd_dn_ht_free
-#define dn_ht_init _bsd_dn_ht_init
-#define dn_ht_scan _bsd_dn_ht_scan
-#define dn_ht_scan_bucket _bsd_dn_ht_scan_bucket
-#define dn_reschedule _bsd_dn_reschedule
-#define dn_sched_modevent _bsd_dn_sched_modevent
-#define do_config _bsd_do_config
#define do_getopt_accept_filter _bsd_do_getopt_accept_filter
#define domain_add _bsd_domain_add
#define domain_init _bsd_domain_init
#define domain_init_status _bsd_domain_init_status
#define domains _bsd_domains
#define do_setopt_accept_filter _bsd_do_setopt_accept_filter
+#define drbr_dequeue_drv _bsd_drbr_dequeue_drv
+#define drbr_enqueue_drv _bsd_drbr_enqueue_drv
+#define drbr_inuse_drv _bsd_drbr_inuse_drv
+#define drbr_needs_enqueue_drv _bsd_drbr_needs_enqueue_drv
#define driver_module_handler _bsd_driver_module_handler
#define dummy_def _bsd_dummy_def
-#define dummynet_get _bsd_dummynet_get
-#define dummynet_io _bsd_dummynet_io
-#define dummynet_task _bsd_dummynet_task
+#define dwc_driver _bsd_dwc_driver
#define dwc_otg_filter_interrupt _bsd_dwc_otg_filter_interrupt
#define dwc_otg_init _bsd_dwc_otg_init
#define dwc_otg_interrupt _bsd_dwc_otg_interrupt
@@ -820,6 +816,7 @@
#define e1000_init_hw_82575 _bsd_e1000_init_hw_82575
#define e1000_init_hw_i210 _bsd_e1000_init_hw_i210
#define e1000_initialize_M88E1512_phy _bsd_e1000_initialize_M88E1512_phy
+#define e1000_initialize_M88E1543_phy _bsd_e1000_initialize_M88E1543_phy
#define e1000_init_mac_ops_generic _bsd_e1000_init_mac_ops_generic
#define e1000_init_mac_params _bsd_e1000_init_mac_params
#define e1000_init_mbx_ops_generic _bsd_e1000_init_mbx_ops_generic
@@ -1024,17 +1021,9 @@
#define e1000_write_vfta _bsd_e1000_write_vfta
#define e1000_write_vfta_generic _bsd_e1000_write_vfta_generic
#define e1000_write_xmdio_reg _bsd_e1000_write_xmdio_reg
-#define edsc_cloner _bsd_edsc_cloner
-#define edsc_cloner_data _bsd_edsc_cloner_data
-#define ef_inputp _bsd_ef_inputp
-#define ef_outputp _bsd_ef_outputp
-#define ehci_bus_methods _bsd_ehci_bus_methods
#define ehci_detach _bsd_ehci_detach
-#define ehci_device_bulk_methods _bsd_ehci_device_bulk_methods
-#define ehci_device_ctrl_methods _bsd_ehci_device_ctrl_methods
-#define ehci_device_intr_methods _bsd_ehci_device_intr_methods
-#define ehci_device_isoc_fs_methods _bsd_ehci_device_isoc_fs_methods
-#define ehci_device_isoc_hs_methods _bsd_ehci_device_isoc_hs_methods
+#define ehci_get_port_speed_hostc _bsd_ehci_get_port_speed_hostc
+#define ehci_get_port_speed_portsc _bsd_ehci_get_port_speed_portsc
#define ehci_init _bsd_ehci_init
#define ehci_interrupt _bsd_ehci_interrupt
#define ehci_iterate_hw_softc _bsd_ehci_iterate_hw_softc
@@ -1042,7 +1031,6 @@
#define ehci_resume _bsd_ehci_resume
#define ehci_suspend _bsd_ehci_suspend
#define em_devclass _bsd_em_devclass
-#define em_display_debug_stats _bsd_em_display_debug_stats
#define em_driver_version _bsd_em_driver_version
#define encap4_input _bsd_encap4_input
#define encap6_input _bsd_encap6_input
@@ -1051,11 +1039,10 @@
#define encap_detach _bsd_encap_detach
#define encap_getarg _bsd_encap_getarg
#define encap_init _bsd_encap_init
-#define encaptab _bsd_encaptab
-#define enc_cloner _bsd_enc_cloner
-#define enc_cloner_data _bsd_enc_cloner_data
-#define encif _bsd_encif
#define enc_xform_3des _bsd_enc_xform_3des
+#define enc_xform_aes_icm _bsd_enc_xform_aes_icm
+#define enc_xform_aes_nist_gcm _bsd_enc_xform_aes_nist_gcm
+#define enc_xform_aes_nist_gmac _bsd_enc_xform_aes_nist_gmac
#define enc_xform_aes_xts _bsd_enc_xform_aes_xts
#define enc_xform_arc4 _bsd_enc_xform_arc4
#define enc_xform_blf _bsd_enc_xform_blf
@@ -1065,13 +1052,13 @@
#define enc_xform_null _bsd_enc_xform_null
#define enc_xform_rijndael128 _bsd_enc_xform_rijndael128
#define enc_xform_skipjack _bsd_enc_xform_skipjack
+#define eopnotsupp _bsd_eopnotsupp
#define ether_crc32_be _bsd_ether_crc32_be
#define ether_crc32_le _bsd_ether_crc32_le
#define ether_demux _bsd_ether_demux
#define ether_ifattach _bsd_ether_ifattach
#define ether_ifdetach _bsd_ether_ifdetach
#define ether_ioctl _bsd_ether_ioctl
-#define ether_ipfw_chk _bsd_ether_ipfw_chk
#define ethermulticastaddr_slowprotocols _bsd_ethermulticastaddr_slowprotocols
#define ether_output _bsd_ether_output
#define ether_output_frame _bsd_ether_output_frame
@@ -1082,15 +1069,18 @@
#define eventhandler_find_list _bsd_eventhandler_find_list
#define eventhandler_prune_list _bsd_eventhandler_prune_list
#define eventhandler_register _bsd_eventhandler_register
-#define export_pflow_ptr _bsd_export_pflow_ptr
-#define faith_cloner _bsd_faith_cloner
-#define faith_cloner_data _bsd_faith_cloner_data
-#define faithoutput _bsd_faithoutput
-#define faithprefix_p _bsd_faithprefix_p
#define fddi_ifattach _bsd_fddi_ifattach
#define fddi_ifdetach _bsd_fddi_ifdetach
#define fddi_ioctl _bsd_fddi_ioctl
+#define fib4_free_nh_ext _bsd_fib4_free_nh_ext
+#define fib4_lookup_nh_basic _bsd_fib4_lookup_nh_basic
+#define fib4_lookup_nh_ext _bsd_fib4_lookup_nh_ext
+#define fib6_free_nh_ext _bsd_fib6_free_nh_ext
+#define fib6_lookup_nh_basic _bsd_fib6_lookup_nh_basic
+#define fib6_lookup_nh_ext _bsd_fib6_lookup_nh_ext
#define FindAliasAddress _bsd_FindAliasAddress
+#define find_and_ref_tcp_fb _bsd_find_and_ref_tcp_fb
+#define find_and_ref_tcp_functions _bsd_find_and_ref_tcp_functions
#define FindFragmentIn1 _bsd_FindFragmentIn1
#define FindFragmentIn2 _bsd_FindFragmentIn2
#define FindFragmentPtr _bsd_FindFragmentPtr
@@ -1116,7 +1106,8 @@
#define firewire_input _bsd_firewire_input
#define firewire_ioctl _bsd_firewire_ioctl
#define first_handler _bsd_first_handler
-#define fls _bsd_fls
+#define flow_hash _bsd_flow_hash
+#define flush_table _bsd_flush_table
#define frag6_drain _bsd_frag6_drain
#define frag6_init _bsd_frag6_init
#define frag6_input _bsd_frag6_input
@@ -1124,10 +1115,14 @@
#define free_unr _bsd_free_unr
#define fw_one_pass _bsd_fw_one_pass
#define fw_tables_max _bsd_fw_tables_max
+#define fw_tables_sets _bsd_fw_tables_sets
#define fw_verbose _bsd_fw_verbose
#define GetAckModified _bsd_GetAckModified
#define GetAliasAddress _bsd_GetAliasAddress
#define GetAliasPort _bsd_GetAliasPort
+#define getcreddomainname _bsd_getcreddomainname
+#define getcredhostid _bsd_getcredhostid
+#define getcredhostname _bsd_getcredhostname
#define GetDefaultAliasAddress _bsd_GetDefaultAliasAddress
#define GetDeltaAckIn _bsd_GetDeltaAckIn
#define GetDeltaSeqOut _bsd_GetDeltaSeqOut
@@ -1142,38 +1137,20 @@
#define getsockaddr _bsd_getsockaddr
#define GetStateIn _bsd_GetStateIn
#define GetStateOut _bsd_GetStateOut
+#define gf128_genmultable _bsd_gf128_genmultable
+#define gf128_genmultable4 _bsd_gf128_genmultable4
+#define gf128_mul _bsd_gf128_mul
+#define gf128_mul4 _bsd_gf128_mul4
+#define gf128_mul4b _bsd_gf128_mul4b
#define Giant _bsd_Giant
-#define gif_cloner _bsd_gif_cloner
-#define gif_cloner_data _bsd_gif_cloner_data
-#define gif_delete_tunnel _bsd_gif_delete_tunnel
#define gif_encapcheck _bsd_gif_encapcheck
-#define gif_encapcheck4 _bsd_gif_encapcheck4
-#define gif_encapcheck6 _bsd_gif_encapcheck6
#define gif_input _bsd_gif_input
-#define gif_ioctl _bsd_gif_ioctl
#define gif_output _bsd_gif_output
-#define gif_set_tunnel _bsd_gif_set_tunnel
-#define gre_cloner _bsd_gre_cloner
-#define gre_cloner_data _bsd_gre_cloner_data
-#define gre_in_cksum _bsd_gre_in_cksum
#define gre_input _bsd_gre_input
-#define gre_mobile_input _bsd_gre_mobile_input
-#define gre_mtx _bsd_gre_mtx
-#define gre_softc_list _bsd_gre_softc_list
-#define handler_chain _bsd_handler_chain
-#define handler_chain_destroy _bsd_handler_chain_destroy
-#define handler_chain_init _bsd_handler_chain_init
-#define handler_rw _bsd_handler_rw
#define handlers _bsd_handlers
-#define harvest _bsd_harvest
#define hashdestroy _bsd_hashdestroy
#define hashinit _bsd_hashinit
#define hashinit_flags _bsd_hashinit_flags
-#define heap_extract _bsd_heap_extract
-#define heap_free _bsd_heap_free
-#define heap_init _bsd_heap_init
-#define heap_insert _bsd_heap_insert
-#define heap_scan _bsd_heap_scan
#define hex2ascii_data _bsd_hex2ascii_data
#define hhook_add_hook _bsd_hhook_add_hook
#define hhook_add_hook_lookup _bsd_hhook_add_hook_lookup
@@ -1188,6 +1165,7 @@
#define hhook_remove_hook _bsd_hhook_remove_hook
#define hhook_remove_hook_lookup _bsd_hhook_remove_hook_lookup
#define hhook_run_hooks _bsd_hhook_run_hooks
+#define hhook_run_tcp_est_in _bsd_hhook_run_tcp_est_in
#define hhook_vhead_list _bsd_hhook_vhead_list
#define hid_end_parse _bsd_hid_end_parse
#define hid_get_data _bsd_hid_get_data
@@ -1203,7 +1181,6 @@
#define hid_start_parse _bsd_hid_start_parse
#define hmac_ipad_buffer _bsd_hmac_ipad_buffer
#define hmac_opad_buffer _bsd_hmac_opad_buffer
-#define hostb_alloc_start _bsd_hostb_alloc_start
#define HouseKeeping _bsd_HouseKeeping
#define hz _bsd_hz
#define icmp6_ctloutput _bsd_icmp6_ctloutput
@@ -1226,6 +1203,8 @@
#define icmp_input _bsd_icmp_input
#define icmpstat _bsd_icmpstat
#define ifa_add_loopback_route _bsd_ifa_add_loopback_route
+#define ifa_alloc _bsd_ifa_alloc
+#define iface_idx _bsd_iface_idx
#define if_addgroup _bsd_if_addgroup
#define if_addmulti _bsd_if_addmulti
#define ifaddr_byindex _bsd_ifaddr_byindex
@@ -1237,50 +1216,68 @@
#define ifa_ifwithaddr_check _bsd_ifa_ifwithaddr_check
#define ifa_ifwithbroadaddr _bsd_ifa_ifwithbroadaddr
#define ifa_ifwithdstaddr _bsd_ifa_ifwithdstaddr
-#define ifa_ifwithdstaddr_fib _bsd_ifa_ifwithdstaddr_fib
#define ifa_ifwithnet _bsd_ifa_ifwithnet
-#define ifa_ifwithnet_fib _bsd_ifa_ifwithnet_fib
#define ifa_ifwithroute _bsd_ifa_ifwithroute
-#define ifa_ifwithroute_fib _bsd_ifa_ifwithroute_fib
-#define ifa_init _bsd_ifa_init
#define if_allmulti _bsd_if_allmulti
#define if_alloc _bsd_if_alloc
#define ifaof_ifpforaddr _bsd_ifaof_ifpforaddr
+#define ifa_preferred _bsd_ifa_preferred
#define ifa_ref _bsd_ifa_ref
+#define ifa_switch_loopback_route _bsd_ifa_switch_loopback_route
#define if_attach _bsd_if_attach
+#define if_bpfmtap _bsd_if_bpfmtap
#define ifc_alloc_unit _bsd_ifc_alloc_unit
#define ifc_free_unit _bsd_ifc_free_unit
-#define if_clone_attach _bsd_if_clone_attach
+#define if_clearhwassist _bsd_if_clearhwassist
+#define if_clone_addgroup _bsd_if_clone_addgroup
+#define if_clone_advanced _bsd_if_clone_advanced
#define if_clone_create _bsd_if_clone_create
#define if_clone_destroy _bsd_if_clone_destroy
#define if_clone_destroyif _bsd_if_clone_destroyif
#define if_clone_detach _bsd_if_clone_detach
-#define if_clone_init _bsd_if_clone_init
+#define if_clone_findifc _bsd_if_clone_findifc
#define if_clone_list _bsd_if_clone_list
#define if_cloners _bsd_if_cloners
+#define if_clone_simple _bsd_if_clone_simple
#define ifc_name2unit _bsd_ifc_name2unit
-#define ifc_simple_attach _bsd_ifc_simple_attach
-#define ifc_simple_create _bsd_ifc_simple_create
-#define ifc_simple_destroy _bsd_ifc_simple_destroy
-#define ifc_simple_match _bsd_ifc_simple_match
+#define if_data_copy _bsd_if_data_copy
#define if_dead _bsd_if_dead
#define if_delallmulti _bsd_if_delallmulti
#define if_delgroup _bsd_if_delgroup
#define if_delmulti _bsd_if_delmulti
#define if_delmulti_ifma _bsd_if_delmulti_ifma
+#define if_dequeue _bsd_if_dequeue
#define if_deregister_com_alloc _bsd_if_deregister_com_alloc
#define if_detach _bsd_if_detach
#define if_down _bsd_if_down
+#define if_etherbpfmtap _bsd_if_etherbpfmtap
#define if_findmulti _bsd_if_findmulti
#define if_free _bsd_if_free
-#define if_free_type _bsd_if_free_type
+#define if_getamcount _bsd_if_getamcount
+#define if_getbaudrate _bsd_if_getbaudrate
+#define if_getcapabilities _bsd_if_getcapabilities
+#define if_getcapenable _bsd_if_getcapenable
+#define if_get_counter_default _bsd_if_get_counter_default
+#define if_getdname _bsd_if_getdname
+#define if_getdrvflags _bsd_if_getdrvflags
+#define if_getflags _bsd_if_getflags
+#define if_gethandle _bsd_if_gethandle
+#define if_gethwassist _bsd_if_gethwassist
+#define if_getifaddr _bsd_if_getifaddr
+#define if_getlladdr _bsd_if_getlladdr
+#define if_getmtu _bsd_if_getmtu
+#define if_getmtu_family _bsd_if_getmtu_family
+#define if_getsoftc _bsd_if_getsoftc
+#define if_getvtag _bsd_if_getvtag
#define ifg_head _bsd_ifg_head
#define if_handoff _bsd_if_handoff
#define if_hw_tsomax_common _bsd_if_hw_tsomax_common
#define if_hw_tsomax_update _bsd_if_hw_tsomax_update
+#define if_inc_counter _bsd_if_inc_counter
#define if_index _bsd_if_index
#define ifindex_table _bsd_ifindex_table
#define if_initname _bsd_if_initname
+#define if_input _bsd_if_input
#define ifioctl _bsd_ifioctl
#define if_link_state_change _bsd_if_link_state_change
#define if_maddr_rlock _bsd_if_maddr_rlock
@@ -1292,6 +1289,9 @@
#define ifmedia_list_add _bsd_ifmedia_list_add
#define ifmedia_removeall _bsd_ifmedia_removeall
#define ifmedia_set _bsd_ifmedia_set
+#define if_multiaddr_array _bsd_if_multiaddr_array
+#define if_multiaddr_count _bsd_if_multiaddr_count
+#define if_multi_apply _bsd_if_multi_apply
#define ifnet _bsd_ifnet
#define ifnet_byindex _bsd_ifnet_byindex
#define ifnet_byindex_locked _bsd_ifnet_byindex_locked
@@ -1307,13 +1307,43 @@
#define if_ref _bsd_if_ref
#define if_register_com_alloc _bsd_if_register_com_alloc
#define if_rele _bsd_if_rele
+#define if_sendq_empty _bsd_if_sendq_empty
+#define if_sendq_prepend _bsd_if_sendq_prepend
+#define if_setbaudrate _bsd_if_setbaudrate
+#define if_setcapabilities _bsd_if_setcapabilities
+#define if_setcapabilitiesbit _bsd_if_setcapabilitiesbit
+#define if_setcapenable _bsd_if_setcapenable
+#define if_setcapenablebit _bsd_if_setcapenablebit
+#define if_setdev _bsd_if_setdev
+#define if_setdrvflagbits _bsd_if_setdrvflagbits
+#define if_setdrvflags _bsd_if_setdrvflags
+#define if_setflagbits _bsd_if_setflagbits
+#define if_setflags _bsd_if_setflags
+#define if_setgetcounterfn _bsd_if_setgetcounterfn
+#define if_sethwassist _bsd_if_sethwassist
+#define if_sethwassistbits _bsd_if_sethwassistbits
+#define if_setifheaderlen _bsd_if_setifheaderlen
+#define if_setinitfn _bsd_if_setinitfn
+#define if_setioctlfn _bsd_if_setioctlfn
#define if_setlladdr _bsd_if_setlladdr
+#define if_setmtu _bsd_if_setmtu
+#define if_setqflushfn _bsd_if_setqflushfn
+#define if_setrcvif _bsd_if_setrcvif
+#define if_setsendqlen _bsd_if_setsendqlen
+#define if_setsendqready _bsd_if_setsendqready
+#define if_setsoftc _bsd_if_setsoftc
+#define if_setstartfn _bsd_if_setstartfn
+#define if_settransmitfn _bsd_if_settransmitfn
+#define if_setupmultiaddr _bsd_if_setupmultiaddr
+#define if_setvtag _bsd_if_setvtag
#define if_simloop _bsd_if_simloop
#define if_start _bsd_if_start
+#define if_togglecapenable _bsd_if_togglecapenable
#define ifunit _bsd_ifunit
#define ifunit_ref _bsd_ifunit_ref
#define if_up _bsd_if_up
-#define igb_display_debug_stats _bsd_igb_display_debug_stats
+#define if_vlancap _bsd_if_vlancap
+#define if_vlantrunkinuse _bsd_if_vlantrunkinuse
#define igb_driver_version _bsd_igb_driver_version
#define igmp_change_state _bsd_igmp_change_state
#define igmp_domifattach _bsd_igmp_domifattach
@@ -1331,33 +1361,40 @@
#define in6_addrscope _bsd_in6_addrscope
#define in6_are_prefix_equal _bsd_in6_are_prefix_equal
#define in6_cksum _bsd_in6_cksum
+#define in6_cksum_partial _bsd_in6_cksum_partial
#define in6_cksum_pseudo _bsd_in6_cksum_pseudo
#define in6_clearscope _bsd_in6_clearscope
#define in6_control _bsd_in6_control
+#define in6_delayed_cksum _bsd_in6_delayed_cksum
#define in6_domifattach _bsd_in6_domifattach
#define in6_domifdetach _bsd_in6_domifdetach
+#define in6_domifmtu _bsd_in6_domifmtu
#define in6_get_hw_ifid _bsd_in6_get_hw_ifid
+#define in6_getlinkifnet _bsd_in6_getlinkifnet
#define in6_getpeeraddr _bsd_in6_getpeeraddr
#define in6_getscope _bsd_in6_getscope
+#define in6_getscopezone _bsd_in6_getscopezone
#define in6_getsockaddr _bsd_in6_getsockaddr
#define in6_get_tmpifid _bsd_in6_get_tmpifid
#define in6_gif_attach _bsd_in6_gif_attach
-#define in6_gif_detach _bsd_in6_gif_detach
-#define in6_gif_input _bsd_in6_gif_input
+#define in6_gif_encapcheck _bsd_in6_gif_encapcheck
#define in6_gif_output _bsd_in6_gif_output
-#define in6_gif_protosw _bsd_in6_gif_protosw
#define in6_if2idlen _bsd_in6_if2idlen
-#define in6_ifaddloop _bsd_in6_ifaddloop
+#define in6_ifaddrhashtbl _bsd_in6_ifaddrhashtbl
#define in6_ifaddrhead _bsd_in6_ifaddrhead
+#define in6_ifaddrhmask _bsd_in6_ifaddrhmask
#define in6_ifaddr_lock _bsd_in6_ifaddr_lock
#define in6ifa_ifpforlinklocal _bsd_in6ifa_ifpforlinklocal
#define in6ifa_ifpwithaddr _bsd_in6ifa_ifpwithaddr
+#define in6ifa_ifwithaddr _bsd_in6ifa_ifwithaddr
#define in6ifa_llaonifp _bsd_in6ifa_llaonifp
#define in6_ifattach _bsd_in6_ifattach
+#define in6_ifattach_destroy _bsd_in6_ifattach_destroy
#define in6_ifawithifp _bsd_in6_ifawithifp
#define in6_ifdetach _bsd_in6_ifdetach
+#define in6_ifdetach_destroy _bsd_in6_ifdetach_destroy
#define in6if_do_dad _bsd_in6if_do_dad
-#define in6_ifremloop _bsd_in6_ifremloop
+#define in6_ifhasaddr _bsd_in6_ifhasaddr
#define in6_if_up _bsd_in6_if_up
#define in6_inithead _bsd_in6_inithead
#define in6_is_addr_deprecated _bsd_in6_is_addr_deprecated
@@ -1387,23 +1424,22 @@
#define in6m_record_source _bsd_in6m_record_source
#define in6m_release_locked _bsd_in6m_release_locked
#define in6_multi_mtx _bsd_in6_multi_mtx
+#define in6_newaddrmsg _bsd_in6_newaddrmsg
#define in6_nigroup _bsd_in6_nigroup
#define in6_nigroup_oldmcprefix _bsd_in6_nigroup_oldmcprefix
#define in6_pcbbind _bsd_in6_pcbbind
#define in6_pcbconnect _bsd_in6_pcbconnect
#define in6_pcbconnect_mbuf _bsd_in6_pcbconnect_mbuf
#define in6_pcbdisconnect _bsd_in6_pcbdisconnect
-#define in6_pcbladdr _bsd_in6_pcbladdr
#define in6_pcblookup _bsd_in6_pcblookup
-#define in6_pcblookup_hash_locked _bsd_in6_pcblookup_hash_locked
#define in6_pcblookup_local _bsd_in6_pcblookup_local
#define in6_pcblookup_mbuf _bsd_in6_pcblookup_mbuf
#define in6_pcbnotify _bsd_in6_pcbnotify
#define in6_pcbpurgeif0 _bsd_in6_pcbpurgeif0
#define in6_pcbsetport _bsd_in6_pcbsetport
#define in6_prefixlen2mask _bsd_in6_prefixlen2mask
+#define in6_prepare_ifra _bsd_in6_prepare_ifra
#define in6_purgeaddr _bsd_in6_purgeaddr
-#define in6_purgeif _bsd_in6_purgeif
#define in6_rtalloc _bsd_in6_rtalloc
#define in6_rtalloc1 _bsd_in6_rtalloc1
#define in6_rtalloc_ign _bsd_in6_rtalloc_ign
@@ -1413,7 +1449,8 @@
#define in6_selecthlim _bsd_in6_selecthlim
#define in6_selectroute _bsd_in6_selectroute
#define in6_selectroute_fib _bsd_in6_selectroute_fib
-#define in6_selectsrc _bsd_in6_selectsrc
+#define in6_selectsrc_addr _bsd_in6_selectsrc_addr
+#define in6_selectsrc_socket _bsd_in6_selectsrc_socket
#define in6_setmaxmtu _bsd_in6_setmaxmtu
#define in6_setscope _bsd_in6_setscope
#define in6_sin_2_v4mapsin6 _bsd_in6_sin_2_v4mapsin6
@@ -1421,6 +1458,7 @@
#define in6_sin6_2_sin _bsd_in6_sin6_2_sin
#define in6_sin6_2_sin_in_sock _bsd_in6_sin6_2_sin_in_sock
#define in6_sockaddr _bsd_in6_sockaddr
+#define in6_splitscope _bsd_in6_splitscope
#define in6_src_ioctl _bsd_in6_src_ioctl
#define in6_tmpaddrtimer _bsd_in6_tmpaddrtimer
#define in6_tmpaddrtimer_ch _bsd_in6_tmpaddrtimer_ch
@@ -1428,6 +1466,7 @@
#define in6_update_ifa _bsd_in6_update_ifa
#define in6_v4mapsin6_sockaddr _bsd_in6_v4mapsin6_sockaddr
#define in_addmulti _bsd_in_addmulti
+#define in_addprefix _bsd_in_addprefix
#define in_addword _bsd_in_addword
#define in_broadcast _bsd_in_broadcast
#define in_canforward _bsd_in_canforward
@@ -1451,31 +1490,36 @@
#define in_getpeeraddr _bsd_in_getpeeraddr
#define in_getsockaddr _bsd_in_getsockaddr
#define in_gif_attach _bsd_in_gif_attach
-#define in_gif_detach _bsd_in_gif_detach
-#define in_gif_input _bsd_in_gif_input
+#define in_gif_encapcheck _bsd_in_gif_encapcheck
#define in_gif_output _bsd_in_gif_output
-#define in_gif_protosw _bsd_in_gif_protosw
+#define in_gre_attach _bsd_in_gre_attach
+#define in_gre_output _bsd_in_gre_output
+#define in_ifaddr_broadcast _bsd_in_ifaddr_broadcast
#define in_ifaddrhashtbl _bsd_in_ifaddrhashtbl
#define in_ifaddrhead _bsd_in_ifaddrhead
#define in_ifaddrhmask _bsd_in_ifaddrhmask
#define in_ifaddr_lock _bsd_in_ifaddr_lock
#define in_ifadown _bsd_in_ifadown
#define in_ifdetach _bsd_in_ifdetach
-#define in_ifscrub _bsd_in_ifscrub
+#define in_ifhasaddr _bsd_in_ifhasaddr
+#define in_ifscrub_all _bsd_in_ifscrub_all
#define in_inithead _bsd_in_inithead
#define init_machclk _bsd_init_machclk
#define init_sin6 _bsd_init_sin6
#define init_sleepqueues _bsd_init_sleepqueues
-#define init_zone_var _bsd_init_zone_var
+#define init_unrhdr _bsd_init_unrhdr
#define in_joingroup _bsd_in_joingroup
#define in_joingroup_locked _bsd_in_joingroup_locked
#define in_leavegroup _bsd_in_leavegroup
#define in_leavegroup_locked _bsd_in_leavegroup_locked
#define in_localaddr _bsd_in_localaddr
#define in_localip _bsd_in_localip
+#define in_losing _bsd_in_losing
#define in_mcast_loop _bsd_in_mcast_loop
#define inm_clear_recorded _bsd_inm_clear_recorded
#define inm_commit _bsd_inm_commit
+#define inm_lookup _bsd_inm_lookup
+#define inm_lookup_locked _bsd_inm_lookup_locked
#define inm_print _bsd_inm_print
#define inm_record_source _bsd_inm_record_source
#define inm_release_locked _bsd_inm_release_locked
@@ -1484,6 +1528,7 @@
#define inp_apply_all _bsd_inp_apply_all
#define in_pcballoc _bsd_in_pcballoc
#define in_pcbbind _bsd_in_pcbbind
+#define in_pcbbind_check_bindmulti _bsd_in_pcbbind_check_bindmulti
#define in_pcbbind_setup _bsd_in_pcbbind_setup
#define in_pcbconnect _bsd_in_pcbconnect
#define in_pcbconnect_mbuf _bsd_in_pcbconnect_mbuf
@@ -1496,6 +1541,7 @@
#define in_pcbinfo_init _bsd_in_pcbinfo_init
#define in_pcbinshash _bsd_in_pcbinshash
#define in_pcbinshash_nopcbgroup _bsd_in_pcbinshash_nopcbgroup
+#define in_pcbladdr _bsd_in_pcbladdr
#define in_pcblookup _bsd_in_pcblookup
#define in_pcblookup_local _bsd_in_pcblookup_local
#define in_pcblookup_mbuf _bsd_in_pcblookup_mbuf
@@ -1522,20 +1568,13 @@
#define inp_so_options _bsd_inp_so_options
#define inp_wlock _bsd_inp_wlock
#define inp_wunlock _bsd_inp_wunlock
-#define in_rtalloc _bsd_in_rtalloc
-#define in_rtalloc1 _bsd_in_rtalloc1
#define in_rtalloc_ign _bsd_in_rtalloc_ign
-#define in_rtqdrain _bsd_in_rtqdrain
#define in_rtredirect _bsd_in_rtredirect
-#define in_rtrequest _bsd_in_rtrequest
-#define in_setmatchfunc _bsd_in_setmatchfunc
+#define in_scrubprefix _bsd_in_scrubprefix
#define in_sockaddr _bsd_in_sockaddr
-#define in_stf_input _bsd_in_stf_input
-#define in_stf_protosw _bsd_in_stf_protosw
#define intr_event_add_handler _bsd_intr_event_add_handler
#define intr_event_create _bsd_intr_event_create
#define intr_event_execute_handlers _bsd_intr_event_execute_handlers
-#define intrproc _bsd_intrproc
#define ip6_accept_rtadv _bsd_ip6_accept_rtadv
#define ip6_auto_flowlabel _bsd_ip6_auto_flowlabel
#define ip6_auto_linklocal _bsd_ip6_auto_linklocal
@@ -1545,22 +1584,21 @@
#define ip6_dad_count _bsd_ip6_dad_count
#define ip6_defhlim _bsd_ip6_defhlim
#define ip6_defmcasthlim _bsd_ip6_defmcasthlim
+#define ip6_deletefraghdr _bsd_ip6_deletefraghdr
#define ip6_desync_factor _bsd_ip6_desync_factor
#define ip6_ecn_egress _bsd_ip6_ecn_egress
#define ip6_ecn_ingress _bsd_ip6_ecn_ingress
#define ip6_forward _bsd_ip6_forward
#define ip6_forwarding _bsd_ip6_forwarding
+#define ip6_fragment _bsd_ip6_fragment
#define ip6_freemoptions _bsd_ip6_freemoptions
#define ip6_freepcbopts _bsd_ip6_freepcbopts
-#define ip6_getdstifaddr _bsd_ip6_getdstifaddr
#define ip6_getmoptions _bsd_ip6_getmoptions
#define ip6_get_prevhdr _bsd_ip6_get_prevhdr
-#define ip6_gif_hlim _bsd_ip6_gif_hlim
#define ip6_hdrnestlimit _bsd_ip6_hdrnestlimit
#define ip6_init _bsd_ip6_init
#define ip6_initpktopts _bsd_ip6_initpktopts
#define ip6_input _bsd_ip6_input
-#define ip6_keepfaith _bsd_ip6_keepfaith
#define ip6_lasthdr _bsd_ip6_lasthdr
#define ip6_log_interval _bsd_ip6_log_interval
#define ip6_log_time _bsd_ip6_log_time
@@ -1617,54 +1655,135 @@
#define ip_ctloutput _bsd_ip_ctloutput
#define ip_defttl _bsd_ip_defttl
#define ip_divert_ptr _bsd_ip_divert_ptr
-#define ipdn_bound_var _bsd_ipdn_bound_var
#define ip_dn_ctl_ptr _bsd_ip_dn_ctl_ptr
#define ip_dn_io_ptr _bsd_ip_dn_io_ptr
-#define ipdn_q_find _bsd_ipdn_q_find
-#define ipdn_si_find _bsd_ipdn_si_find
#define ip_dooptions _bsd_ip_dooptions
#define ip_doopts _bsd_ip_doopts
-#define ip_do_randomid _bsd_ip_do_randomid
#define ip_drain _bsd_ip_drain
-#define ip_dummynet_compat _bsd_ip_dummynet_compat
#define ip_ecn_egress _bsd_ip_ecn_egress
#define ip_ecn_ingress _bsd_ip_ecn_ingress
-#define ip_fastforward _bsd_ip_fastforward
+#define ip_fillid _bsd_ip_fillid
#define ip_forward _bsd_ip_forward
#define ipforwarding _bsd_ipforwarding
#define ip_fragment _bsd_ip_fragment
-#define ipfw_add_rule _bsd_ipfw_add_rule
-#define ipfw_add_table_entry _bsd_ipfw_add_table_entry
+#define ipfw_add_eaction _bsd_ipfw_add_eaction
+#define ipfw_add_obj_rewriter _bsd_ipfw_add_obj_rewriter
+#define ipfw_add_sopt_handler _bsd_ipfw_add_sopt_handler
+#define ipfw_add_table_algo _bsd_ipfw_add_table_algo
+#define ipfw_alloc_rule _bsd_ipfw_alloc_rule
#define ipfw_attach_hooks _bsd_ipfw_attach_hooks
-#define ipfw_check_hook _bsd_ipfw_check_hook
+#define ipfw_bpf_init _bsd_ipfw_bpf_init
+#define ipfw_bpf_mtap2 _bsd_ipfw_bpf_mtap2
+#define ipfw_bpf_uninit _bsd_ipfw_bpf_uninit
+#define ipfw_check_frame _bsd_ipfw_check_frame
+#define ipfw_check_object_name_generic _bsd_ipfw_check_object_name_generic
+#define ipfw_check_packet _bsd_ipfw_check_packet
#define ipfw_chg_hook _bsd_ipfw_chg_hook
#define ipfw_chk _bsd_ipfw_chk
#define ip_fw_chk_ptr _bsd_ip_fw_chk_ptr
#define ipfw_count_table _bsd_ipfw_count_table
#define ipfw_count_xtable _bsd_ipfw_count_xtable
-#define ipfw_ctl _bsd_ipfw_ctl
+#define ipfw_ctl3 _bsd_ipfw_ctl3
#define ip_fw_ctl_ptr _bsd_ip_fw_ctl_ptr
-#define ipfw_del_table_entry _bsd_ipfw_del_table_entry
+#define ipfw_del_eaction _bsd_ipfw_del_eaction
+#define ipfw_del_obj_rewriter _bsd_ipfw_del_obj_rewriter
+#define ipfw_del_sopt_handler _bsd_ipfw_del_sopt_handler
+#define ipfw_del_table_algo _bsd_ipfw_del_table_algo
+#define ipfw_destroy_counters _bsd_ipfw_destroy_counters
+#define ipfw_destroy_obj_rewriter _bsd_ipfw_destroy_obj_rewriter
+#define ipfw_destroy_skipto_cache _bsd_ipfw_destroy_skipto_cache
+#define ipfw_destroy_sopt_handler _bsd_ipfw_destroy_sopt_handler
+#define ipfw_destroy_srv _bsd_ipfw_destroy_srv
#define ipfw_destroy_tables _bsd_ipfw_destroy_tables
-#define ipfw_dump_table _bsd_ipfw_dump_table
-#define ipfw_dump_xtable _bsd_ipfw_dump_xtable
+#define ipfw_dump_states _bsd_ipfw_dump_states
+#define ipfw_dump_table_legacy _bsd_ipfw_dump_table_legacy
+#define ipfw_dyn_get_count _bsd_ipfw_dyn_get_count
+#define ipfw_dyn_init _bsd_ipfw_dyn_init
+#define ipfw_dyn_len _bsd_ipfw_dyn_len
+#define ipfw_dyn_uninit _bsd_ipfw_dyn_uninit
+#define ipfw_dyn_unlock _bsd_ipfw_dyn_unlock
+#define ipfw_eaction_init _bsd_ipfw_eaction_init
+#define ipfw_eaction_uninit _bsd_ipfw_eaction_uninit
+#define ipfw_expire_dyn_rules _bsd_ipfw_expire_dyn_rules
+#define ipfw_export_obj_ntlv _bsd_ipfw_export_obj_ntlv
+#define ipfw_export_table_ntlv _bsd_ipfw_export_table_ntlv
+#define ipfw_export_table_value_legacy _bsd_ipfw_export_table_value_legacy
+#define ipfw_export_table_value_v1 _bsd_ipfw_export_table_value_v1
+#define ipfw_find_name_tlv_type _bsd_ipfw_find_name_tlv_type
#define ipfw_find_rule _bsd_ipfw_find_rule
-#define ipfw_flush_table _bsd_ipfw_flush_table
+#define ipfw_foreach_table_tentry _bsd_ipfw_foreach_table_tentry
+#define ipfw_garbage_table_values _bsd_ipfw_garbage_table_values
+#define ipfw_get_dynamic _bsd_ipfw_get_dynamic
+#define ipfw_get_sopt_header _bsd_ipfw_get_sopt_header
+#define ipfw_get_sopt_space _bsd_ipfw_get_sopt_space
+#define ipfw_get_table_objhash _bsd_ipfw_get_table_objhash
+#define ipfw_iface_add_notify _bsd_ipfw_iface_add_notify
+#define ipfw_iface_del_notify _bsd_ipfw_iface_del_notify
+#define ipfw_iface_destroy _bsd_ipfw_iface_destroy
+#define ipfw_iface_init _bsd_ipfw_iface_init
+#define ipfw_iface_ref _bsd_ipfw_iface_ref
+#define ipfw_iface_unref _bsd_ipfw_iface_unref
+#define ipfw_import_table_value_legacy _bsd_ipfw_import_table_value_legacy
+#define ipfw_import_table_value_v1 _bsd_ipfw_import_table_value_v1
+#define ipfw_init_counters _bsd_ipfw_init_counters
+#define ipfw_init_obj_rewriter _bsd_ipfw_init_obj_rewriter
+#define ipfw_init_skipto_cache _bsd_ipfw_init_skipto_cache
+#define ipfw_init_sopt_handler _bsd_ipfw_init_sopt_handler
+#define ipfw_init_srv _bsd_ipfw_init_srv
#define ipfw_init_tables _bsd_ipfw_init_tables
+#define ipfw_install_state _bsd_ipfw_install_state
+#define ipfw_is_dyn_rule _bsd_ipfw_is_dyn_rule
+#define ipfw_link_table_values _bsd_ipfw_link_table_values
#define ipfw_log _bsd_ipfw_log
-#define ipfw_log_bpf _bsd_ipfw_log_bpf
+#define ipfw_lookup_dyn_rule _bsd_ipfw_lookup_dyn_rule
#define ipfw_lookup_table _bsd_ipfw_lookup_table
#define ipfw_lookup_table_extended _bsd_ipfw_lookup_table_extended
+#define ipfw_match_range _bsd_ipfw_match_range
+#define ipfw_nat64lsn _bsd_ipfw_nat64lsn
+#define ipfw_nat64stl _bsd_ipfw_nat64stl
#define ipfw_nat_cfg_ptr _bsd_ipfw_nat_cfg_ptr
#define ipfw_nat_del_ptr _bsd_ipfw_nat_del_ptr
#define ipfw_nat_get_cfg_ptr _bsd_ipfw_nat_get_cfg_ptr
#define ipfw_nat_get_log_ptr _bsd_ipfw_nat_get_log_ptr
#define ipfw_nat_ptr _bsd_ipfw_nat_ptr
#define ipfw_nat_ready _bsd_ipfw_nat_ready
+#define ipfw_objhash_add _bsd_ipfw_objhash_add
+#define ipfw_objhash_alloc_idx _bsd_ipfw_objhash_alloc_idx
+#define ipfw_objhash_bitmap_alloc _bsd_ipfw_objhash_bitmap_alloc
+#define ipfw_objhash_bitmap_free _bsd_ipfw_objhash_bitmap_free
+#define ipfw_objhash_bitmap_merge _bsd_ipfw_objhash_bitmap_merge
+#define ipfw_objhash_bitmap_swap _bsd_ipfw_objhash_bitmap_swap
+#define ipfw_objhash_count _bsd_ipfw_objhash_count
+#define ipfw_objhash_count_type _bsd_ipfw_objhash_count_type
+#define ipfw_objhash_create _bsd_ipfw_objhash_create
+#define ipfw_objhash_del _bsd_ipfw_objhash_del
+#define ipfw_objhash_destroy _bsd_ipfw_objhash_destroy
+#define ipfw_objhash_find_type _bsd_ipfw_objhash_find_type
+#define ipfw_objhash_foreach _bsd_ipfw_objhash_foreach
+#define ipfw_objhash_foreach_type _bsd_ipfw_objhash_foreach_type
+#define ipfw_objhash_free_idx _bsd_ipfw_objhash_free_idx
+#define ipfw_objhash_lookup_kidx _bsd_ipfw_objhash_lookup_kidx
+#define ipfw_objhash_lookup_name _bsd_ipfw_objhash_lookup_name
+#define ipfw_objhash_lookup_name_type _bsd_ipfw_objhash_lookup_name_type
+#define ipfw_objhash_lookup_table_kidx _bsd_ipfw_objhash_lookup_table_kidx
+#define ipfw_objhash_same_name _bsd_ipfw_objhash_same_name
+#define ipfw_objhash_set_funcs _bsd_ipfw_objhash_set_funcs
+#define ipfw_obj_manage_sets _bsd_ipfw_obj_manage_sets
+#define ipfw_reap_add _bsd_ipfw_reap_add
#define ipfw_reap_rules _bsd_ipfw_reap_rules
+#define ipfw_ref_table _bsd_ipfw_ref_table
#define ipfw_resize_tables _bsd_ipfw_resize_tables
-#define ip_gif_ttl _bsd_ip_gif_ttl
-#define ip_id _bsd_ip_id
+#define ipfw_run_eaction _bsd_ipfw_run_eaction
+#define ipfw_send_pkt _bsd_ipfw_send_pkt
+#define ipfw_switch_tables_namespace _bsd_ipfw_switch_tables_namespace
+#define ipfw_table_algo_destroy _bsd_ipfw_table_algo_destroy
+#define ipfw_table_algo_init _bsd_ipfw_table_algo_init
+#define ipfw_table_value_destroy _bsd_ipfw_table_value_destroy
+#define ipfw_table_value_init _bsd_ipfw_table_value_init
+#define ipfw_unref_table _bsd_ipfw_unref_table
+#define ipfw_unref_table_values _bsd_ipfw_unref_table_values
+#define ipfw_vnet_ready _bsd_ipfw_vnet_ready
+#define ip_gre_ttl _bsd_ip_gre_ttl
#define ip_init _bsd_ip_init
#define ip_input _bsd_ip_input
#define ip_insertoptions _bsd_ip_insertoptions
@@ -1703,22 +1822,24 @@
#define ipproto_register _bsd_ipproto_register
#define ipproto_unregister _bsd_ipproto_unregister
#define ip_protox _bsd_ip_protox
-#define ip_randomid _bsd_ip_randomid
#define ip_reass _bsd_ip_reass
+#define ipreass_drain _bsd_ipreass_drain
+#define ipreass_init _bsd_ipreass_init
+#define ipreass_slowtimo _bsd_ipreass_slowtimo
#define ip_rsvpd _bsd_ip_rsvpd
#define ip_rsvp_done _bsd_ip_rsvp_done
#define ip_rsvp_force_done _bsd_ip_rsvp_force_done
#define ip_rsvp_init _bsd_ip_rsvp_init
#define ip_rsvp_vif _bsd_ip_rsvp_vif
-#define ip_rtaddr _bsd_ip_rtaddr
#define ip_savecontrol _bsd_ip_savecontrol
-#define ipsec_bpf _bsd_ipsec_bpf
-#define ipsec_filter _bsd_ipsec_filter
+#define ipsec_hhh_in _bsd_ipsec_hhh_in
+#define ipsec_hhh_out _bsd_ipsec_hhh_out
#define ip_slowtimo _bsd_ip_slowtimo
#define ip_srcroute _bsd_ip_srcroute
#define ipstat _bsd_ipstat
#define ip_stripoptions _bsd_ip_stripoptions
#define iptime _bsd_iptime
+#define ip_tryforward _bsd_ip_tryforward
#define iso88025_ifattach _bsd_iso88025_ifattach
#define iso88025_ifdetach _bsd_iso88025_ifdetach
#define iso88025_input _bsd_iso88025_input
@@ -1727,10 +1848,15 @@
#define itimerfix _bsd_itimerfix
#define jailed _bsd_jailed
#define jailed_without_vnet _bsd_jailed_without_vnet
+#define jenkins_hash _bsd_jenkins_hash
+#define jenkins_hash32 _bsd_jenkins_hash32
#define kernel_sysctl _bsd_kernel_sysctl
#define kern_getsockname _bsd_kern_getsockname
+#define kern_kevent_anonymous _bsd_kern_kevent_anonymous
+#define kern_pipe _bsd_kern_pipe
#define kern_select _bsd_kern_select
#define kern_socketpair _bsd_kern_socketpair
+#define kern_uuidgen _bsd_kern_uuidgen
#define khelp_add_hhook _bsd_khelp_add_hhook
#define khelp_deregister_helper _bsd_khelp_deregister_helper
#define khelp_destroy_osd _bsd_khelp_destroy_osd
@@ -1747,13 +1873,14 @@
#define kmod_tcpstat_inc _bsd_kmod_tcpstat_inc
#define kmod_udpstat_inc _bsd_kmod_udpstat_inc
#define knlist_add _bsd_knlist_add
+#define knlist_alloc _bsd_knlist_alloc
#define knlist_cleardel _bsd_knlist_cleardel
#define knlist_destroy _bsd_knlist_destroy
+#define knlist_detach _bsd_knlist_detach
#define knlist_empty _bsd_knlist_empty
#define knlist_init _bsd_knlist_init
#define knlist_init_mtx _bsd_knlist_init_mtx
#define knlist_remove _bsd_knlist_remove
-#define knlist_remove_inevent _bsd_knlist_remove_inevent
#define knote _bsd_knote
#define knote_fdclose _bsd_knote_fdclose
#define kobj_class_compile _bsd_kobj_class_compile
@@ -1787,11 +1914,8 @@
#define lacp_req _bsd_lacp_req
#define lacp_select_tx_port _bsd_lacp_select_tx_port
#define lacp_stop _bsd_lacp_stop
-#define lagg_cloner _bsd_lagg_cloner
-#define lagg_cloner_data _bsd_lagg_cloner_data
#define lagg_detach_cookie _bsd_lagg_detach_cookie
#define lagg_enqueue _bsd_lagg_enqueue
-#define lagg_hashmbuf _bsd_lagg_hashmbuf
#define lagg_input_p _bsd_lagg_input_p
#define lagg_linkstate_p _bsd_lagg_linkstate_p
#define lagg_list _bsd_lagg_list
@@ -1800,16 +1924,10 @@
#define led_create_state _bsd_led_create_state
#define led_destroy _bsd_led_destroy
#define led_set _bsd_led_set
-#define legacy_pcib_alloc_resource _bsd_legacy_pcib_alloc_resource
-#define legacy_pcib_driver _bsd_legacy_pcib_driver
-#define legacy_pcib_map_msi _bsd_legacy_pcib_map_msi
-#define legacy_pcib_maxslots _bsd_legacy_pcib_maxslots
-#define legacy_pcib_read_config _bsd_legacy_pcib_read_config
-#define legacy_pcib_read_ivar _bsd_legacy_pcib_read_ivar
-#define legacy_pcib_write_config _bsd_legacy_pcib_write_config
-#define legacy_pcib_write_ivar _bsd_legacy_pcib_write_ivar
#define legal_vif_num _bsd_legal_vif_num
#define lem_driver_version _bsd_lem_driver_version
+#define le_uuid_dec _bsd_le_uuid_dec
+#define le_uuid_enc _bsd_le_uuid_enc
#define LibAliasAddServer _bsd_LibAliasAddServer
#define LibAliasAttachHandlers _bsd_LibAliasAttachHandlers
#define LibAliasCheckNewLink _bsd_LibAliasCheckNewLink
@@ -1835,14 +1953,48 @@
#define LibAliasSetTarget _bsd_LibAliasSetTarget
#define LibAliasUnaliasOut _bsd_LibAliasUnaliasOut
#define LibAliasUninit _bsd_LibAliasUninit
+#define _libmd_SHA512_224_Final _bsd__libmd_SHA512_224_Final
+#define _libmd_SHA512_224_Init _bsd__libmd_SHA512_224_Init
+#define _libmd_SHA512_224_Update _bsd__libmd_SHA512_224_Update
+#define _libmd_SHA512_256_Final _bsd__libmd_SHA512_256_Final
+#define _libmd_SHA512_256_Init _bsd__libmd_SHA512_256_Init
+#define _libmd_SHA512_256_Update _bsd__libmd_SHA512_256_Update
+#define _libmd_SKEIN1024_Final _bsd__libmd_SKEIN1024_Final
+#define _libmd_SKEIN1024_Init _bsd__libmd_SKEIN1024_Init
+#define _libmd_SKEIN1024_Update _bsd__libmd_SKEIN1024_Update
+#define _libmd_SKEIN256_Final _bsd__libmd_SKEIN256_Final
+#define _libmd_SKEIN256_Init _bsd__libmd_SKEIN256_Init
+#define _libmd_SKEIN256_Update _bsd__libmd_SKEIN256_Update
+#define _libmd_SKEIN512_Final _bsd__libmd_SKEIN512_Final
+#define _libmd_SKEIN512_Init _bsd__libmd_SKEIN512_Init
+#define _libmd_SKEIN512_Update _bsd__libmd_SKEIN512_Update
+#define link_alloc_sdl _bsd_link_alloc_sdl
+#define link_free_sdl _bsd_link_free_sdl
+#define link_init_sdl _bsd_link_init_sdl
+#define link_pfil_hook _bsd_link_pfil_hook
#define lla_rt_output _bsd_lla_rt_output
#define llentry_alloc _bsd_llentry_alloc
#define llentry_free _bsd_llentry_free
+#define lltable_allocate_htbl _bsd_lltable_allocate_htbl
+#define lltable_alloc_entry _bsd_lltable_alloc_entry
+#define lltable_calc_llheader _bsd_lltable_calc_llheader
+#define lltable_delete_addr _bsd_lltable_delete_addr
+#define lltable_drop_entry_queue _bsd_lltable_drop_entry_queue
+#define lltable_fill_sa_entry _bsd_lltable_fill_sa_entry
+#define lltable_foreach_lle _bsd_lltable_foreach_lle
#define lltable_free _bsd_lltable_free
-#define lltable_init _bsd_lltable_init
+#define lltable_free_entry _bsd_lltable_free_entry
+#define lltable_get_af _bsd_lltable_get_af
+#define lltable_get_ifp _bsd_lltable_get_ifp
+#define lltable_link _bsd_lltable_link
+#define lltable_link_entry _bsd_lltable_link_entry
#define lltable_prefix_free _bsd_lltable_prefix_free
#define lltable_rwlock _bsd_lltable_rwlock
+#define lltable_set_entry_addr _bsd_lltable_set_entry_addr
#define lltable_sysctl_dumparp _bsd_lltable_sysctl_dumparp
+#define lltable_try_set_entry_addr _bsd_lltable_try_set_entry_addr
+#define lltable_unlink_entry _bsd_lltable_unlink_entry
+#define lltable_update_ifaddr _bsd_lltable_update_ifaddr
#define lock_classes _bsd_lock_classes
#define lock_class_mtx_sleep _bsd_lock_class_mtx_sleep
#define lock_class_mtx_spin _bsd_lock_class_mtx_spin
@@ -1850,8 +2002,6 @@
#define lock_class_sx _bsd_lock_class_sx
#define lock_destroy _bsd_lock_destroy
#define lock_init _bsd_lock_init
-#define lo_cloner _bsd_lo_cloner
-#define lo_cloner_data _bsd_lo_cloner_data
#define log _bsd_log
#define loif _bsd_loif
#define loioctl _bsd_loioctl
@@ -1861,40 +2011,47 @@
#define machclk_freq _bsd_machclk_freq
#define machclk_per_tick _bsd_machclk_per_tick
#define machclk_usepcc _bsd_machclk_usepcc
-#define m_addr_chg_pf_p _bsd_m_addr_chg_pf_p
#define m_adj _bsd_m_adj
#define make_dev _bsd_make_dev
+#define make_dev_args_init_impl _bsd_make_dev_args_init_impl
+#define make_dev_s _bsd_make_dev_s
#define M_ALIAS _bsd_M_ALIAS
-#define m_align _bsd_m_align
#define malloc_init _bsd_malloc_init
#define malloc_uninit _bsd_malloc_uninit
#define m_append _bsd_m_append
#define m_apply _bsd_m_apply
+#define maxpipekva _bsd_maxpipekva
#define maxusers _bsd_maxusers
+#define mb_dupcl _bsd_mb_dupcl
#define mb_free_ext _bsd_mb_free_ext
#define M_BPF _bsd_M_BPF
#define M_BPFJIT _bsd_M_BPFJIT
#define M_CAMSIM _bsd_M_CAMSIM
#define m_cat _bsd_m_cat
+#define m_catpkt _bsd_m_catpkt
+#define m_clget _bsd_m_clget
+#define m_cljget _bsd_m_cljget
#define m_collapse _bsd_m_collapse
#define m_copyback _bsd_m_copyback
#define m_copydata _bsd_m_copydata
#define m_copym _bsd_m_copym
-#define m_copymdata _bsd_m_copymdata
#define m_copypacket _bsd_m_copypacket
#define m_copyup _bsd_m_copyup
#define M_CRYPTO_DATA _bsd_M_CRYPTO_DATA
#define m_defrag _bsd_m_defrag
#define m_demote _bsd_m_demote
+#define m_demote_pkthdr _bsd_m_demote_pkthdr
#define M_DEVBUF _bsd_M_DEVBUF
#define m_devget _bsd_m_devget
-#define M_DN_HEAP _bsd_M_DN_HEAP
-#define M_DUMMYNET _bsd_M_DUMMYNET
#define m_dup _bsd_m_dup
#define m_dup_pkthdr _bsd_m_dup_pkthdr
+#define m_ether_tcpip_hash _bsd_m_ether_tcpip_hash
+#define m_ether_tcpip_hash_init _bsd_m_ether_tcpip_hash_init
#define m_extadd _bsd_m_extadd
#define m_fixhdr _bsd_m_fixhdr
#define m_freem _bsd_m_freem
+#define m_get2 _bsd_m_get2
+#define m_getjcl _bsd_m_getjcl
#define m_getm2 _bsd_m_getm2
#define m_getptr _bsd_m_getptr
#define M_IFADDR _bsd_M_IFADDR
@@ -1905,7 +2062,8 @@
#define mii_bitbang_writereg _bsd_mii_bitbang_writereg
#define miibus_devclass _bsd_miibus_devclass
#define miibus_driver _bsd_miibus_driver
-#define mii_down _bsd_mii_down
+#define mii_dev_mac_match _bsd_mii_dev_mac_match
+#define mii_dev_mac_softc _bsd_mii_dev_mac_softc
#define mii_mediachg _bsd_mii_mediachg
#define mii_oui _bsd_mii_oui
#define mii_phy_add_media _bsd_mii_phy_add_media
@@ -1913,8 +2071,9 @@
#define mii_phy_detach _bsd_mii_phy_detach
#define mii_phy_dev_attach _bsd_mii_phy_dev_attach
#define mii_phy_dev_probe _bsd_mii_phy_dev_probe
-#define mii_phy_down _bsd_mii_phy_down
#define mii_phy_flowstatus _bsd_mii_phy_flowstatus
+#define mii_phy_mac_match _bsd_mii_phy_mac_match
+#define mii_phy_mac_softc _bsd_mii_phy_mac_softc
#define mii_phy_match _bsd_mii_phy_match
#define mii_phy_match_gen _bsd_mii_phy_match_gen
#define mii_phy_reset _bsd_mii_phy_reset
@@ -1925,7 +2084,6 @@
#define mii_tick _bsd_mii_tick
#define M_IOV _bsd_M_IOV
#define M_IP6NDP _bsd_M_IP6NDP
-#define M_IP6OPT _bsd_M_IP6OPT
#define M_IPFW _bsd_M_IPFW
#define M_IPFW_TBL _bsd_M_IPFW_TBL
#define mi_startup _bsd_mi_startup
@@ -1939,14 +2097,16 @@
#define m_length _bsd_m_length
#define M_LLTABLE _bsd_M_LLTABLE
#define m_mbuftouio _bsd_m_mbuftouio
+#define mmc_devclass _bsd_mmc_devclass
+#define mmc_driver _bsd_mmc_driver
#define m_megapullup _bsd_m_megapullup
#define m_move_pkthdr _bsd_m_move_pkthdr
+#define M_NAT64LSN _bsd_M_NAT64LSN
#define module_lookupbyname _bsd_module_lookupbyname
#define module_register _bsd_module_register
#define module_register_init _bsd_module_register_init
#define module_release _bsd_module_release
#define modules_sx _bsd_modules_sx
-#define M_OFWPROP _bsd_M_OFWPROP
#define M_PCB _bsd_M_PCB
#define m_pkthdr_init _bsd_m_pkthdr_init
#define m_prepend _bsd_m_prepend
@@ -1958,7 +2118,6 @@
#define M_RTABLE _bsd_M_RTABLE
#define mrt_ioctl _bsd_mrt_ioctl
#define m_sanity _bsd_m_sanity
-#define MSFail _bsd_MSFail
#define M_SONAME _bsd_M_SONAME
#define m_split _bsd_m_split
#define m_tag_alloc _bsd_m_tag_alloc
@@ -1970,6 +2129,7 @@
#define m_tag_free_default _bsd_m_tag_free_default
#define m_tag_locate _bsd_m_tag_locate
#define M_TAP _bsd_M_TAP
+#define M_TCPFUNCTIONS _bsd_M_TCPFUNCTIONS
#define M_TCPLOG _bsd_M_TCPLOG
#define M_TEMP _bsd_M_TEMP
#define mtrash_ctor _bsd_mtrash_ctor
@@ -1986,51 +2146,72 @@
#define mtx_pool_find _bsd_mtx_pool_find
#define mtx_recursed _bsd_mtx_recursed
#define mtx_sysinit _bsd_mtx_sysinit
-#define _mtx_trylock _bsd__mtx_trylock
+#define mtx_trylock_flags_ _bsd_mtx_trylock_flags_
#define _mtx_unlock_flags _bsd__mtx_unlock_flags
#define m_uiotombuf _bsd_m_uiotombuf
#define m_unshare _bsd_m_unshare
+#define murmur3_32_hash _bsd_murmur3_32_hash
+#define murmur3_32_hash32 _bsd_murmur3_32_hash32
#define M_USB _bsd_M_USB
#define M_USBDEV _bsd_M_USBDEV
-#define M_USBHC _bsd_M_USBHC
#define mutex_init _bsd_mutex_init
#define M_XDATA _bsd_M_XDATA
+#define nat64_allow_private _bsd_nat64_allow_private
+#define nat64_debug _bsd_nat64_debug
+#define nat64_do_handle_ip4 _bsd_nat64_do_handle_ip4
+#define nat64_do_handle_ip6 _bsd_nat64_do_handle_ip6
+#define nat64_getlasthdr _bsd_nat64_getlasthdr
+#define nat64_handle_icmp6 _bsd_nat64_handle_icmp6
+#define nat64lsn_destroy_instance _bsd_nat64lsn_destroy_instance
+#define nat64lsn_dump_state _bsd_nat64lsn_dump_state
+#define nat64lsn_eid _bsd_nat64lsn_eid
+#define nat64lsn_init _bsd_nat64lsn_init
+#define nat64lsn_init_instance _bsd_nat64lsn_init_instance
+#define nat64lsn_init_internal _bsd_nat64lsn_init_internal
+#define nat64lsn_rproto_map _bsd_nat64lsn_rproto_map
+#define nat64lsn_start_instance _bsd_nat64lsn_start_instance
+#define nat64lsn_uninit _bsd_nat64lsn_uninit
+#define nat64lsn_uninit_internal _bsd_nat64lsn_uninit_internal
+#define nat64stl_eid _bsd_nat64stl_eid
+#define nat64stl_init _bsd_nat64stl_init
+#define nat64stl_uninit _bsd_nat64stl_uninit
+#define nd6_add_ifa_lle _bsd_nd6_add_ifa_lle
+#define nd6_alloc _bsd_nd6_alloc
#define nd6_cache_lladdr _bsd_nd6_cache_lladdr
-#define nd6_dad_duplicated _bsd_nd6_dad_duplicated
+#define nd6_dad_init _bsd_nd6_dad_init
#define nd6_dad_start _bsd_nd6_dad_start
#define nd6_dad_stop _bsd_nd6_dad_stop
#define nd6_defifindex _bsd_nd6_defifindex
+#define nd6_flush_holdchain _bsd_nd6_flush_holdchain
+#define nd6_grab_holdchain _bsd_nd6_grab_holdchain
#define nd6_ifattach _bsd_nd6_ifattach
#define nd6_ifdetach _bsd_nd6_ifdetach
#define nd6_ifptomac _bsd_nd6_ifptomac
#define nd6_init _bsd_nd6_init
#define nd6_ioctl _bsd_nd6_ioctl
#define nd6_is_addr_neighbor _bsd_nd6_is_addr_neighbor
-#define nd6_llinfo_settimer _bsd_nd6_llinfo_settimer
-#define nd6_llinfo_settimer_locked _bsd_nd6_llinfo_settimer_locked
+#define nd6_llinfo_setstate _bsd_nd6_llinfo_setstate
+#define nd6_lock _bsd_nd6_lock
#define nd6_lookup _bsd_nd6_lookup
#define nd6_na_input _bsd_nd6_na_input
#define nd6_na_output _bsd_nd6_na_output
-#define nd6_need_cache _bsd_nd6_need_cache
#define nd6_ns_input _bsd_nd6_ns_input
#define nd6_ns_output _bsd_nd6_ns_output
-#define nd6_nud_hint _bsd_nd6_nud_hint
#define nd6_option _bsd_nd6_option
#define nd6_option_init _bsd_nd6_option_init
#define nd6_options _bsd_nd6_options
-#define nd6_output _bsd_nd6_output
-#define nd6_output_flush _bsd_nd6_output_flush
-#define nd6_output_lle _bsd_nd6_output_lle
+#define nd6_output_ifp _bsd_nd6_output_ifp
#define nd6_prefix_lookup _bsd_nd6_prefix_lookup
#define nd6_prelist_add _bsd_nd6_prelist_add
#define nd6_purge _bsd_nd6_purge
#define nd6_ra_input _bsd_nd6_ra_input
#define nd6_recalc_reachtm_interval _bsd_nd6_recalc_reachtm_interval
+#define nd6_rem_ifa_lle _bsd_nd6_rem_ifa_lle
+#define nd6_resolve _bsd_nd6_resolve
+#define nd6_resolve_addr _bsd_nd6_resolve_addr
#define nd6_rs_input _bsd_nd6_rs_input
-#define nd6_rtrequest _bsd_nd6_rtrequest
#define nd6_setdefaultiface _bsd_nd6_setdefaultiface
#define nd6_setmtu _bsd_nd6_setmtu
-#define nd6_storelladdr _bsd_nd6_storelladdr
#define nd6_timer _bsd_nd6_timer
#define nd6_timer_ch _bsd_nd6_timer_ch
#define nd_defrouter _bsd_nd_defrouter
@@ -2067,44 +2248,13 @@
#define ng_ipfw_input_p _bsd_ng_ipfw_input_p
#define norule_counter _bsd_norule_counter
#define nousrreqs _bsd_nousrreqs
+#define nptv6_init _bsd_nptv6_init
+#define nptv6_uninit _bsd_nptv6_uninit
#define null_class _bsd_null_class
#define null_filtops _bsd_null_filtops
-#define num_cam_status_entries _bsd_num_cam_status_entries
-#define OF_call_method _bsd_OF_call_method
-#define OF_canon _bsd_OF_canon
-#define OF_child _bsd_OF_child
-#define OF_claim _bsd_OF_claim
-#define OF_close _bsd_OF_close
-#define OF_enter _bsd_OF_enter
-#define OF_exit _bsd_OF_exit
-#define OF_finddevice _bsd_OF_finddevice
-#define OF_getprop _bsd_OF_getprop
-#define OF_getprop_alloc _bsd_OF_getprop_alloc
-#define OF_getproplen _bsd_OF_getproplen
-#define OF_init _bsd_OF_init
-#define OF_install _bsd_OF_install
-#define OF_instance_to_package _bsd_OF_instance_to_package
-#define OF_instance_to_path _bsd_OF_instance_to_path
-#define OF_interpret _bsd_OF_interpret
-#define OF_nextprop _bsd_OF_nextprop
-#define OF_open _bsd_OF_open
-#define OF_package_to_path _bsd_OF_package_to_path
-#define OF_parent _bsd_OF_parent
-#define OF_peer _bsd_OF_peer
-#define OF_printf _bsd_OF_printf
-#define OF_read _bsd_OF_read
-#define OF_release _bsd_OF_release
-#define OF_searchprop _bsd_OF_searchprop
-#define OF_seek _bsd_OF_seek
-#define OF_setprop _bsd_OF_setprop
-#define OF_test _bsd_OF_test
-#define OF_write _bsd_OF_write
-#define ohci_bus_methods _bsd_ohci_bus_methods
+#define nullop _bsd_nullop
+#define number_array _bsd_number_array
#define ohci_detach _bsd_ohci_detach
-#define ohci_device_bulk_methods _bsd_ohci_device_bulk_methods
-#define ohci_device_ctrl_methods _bsd_ohci_device_ctrl_methods
-#define ohci_device_intr_methods _bsd_ohci_device_intr_methods
-#define ohci_device_isoc_methods _bsd_ohci_device_isoc_methods
#define ohci_init _bsd_ohci_init
#define ohci_interrupt _bsd_ohci_interrupt
#define ohci_iterate_hw_softc _bsd_ohci_iterate_hw_softc
@@ -2112,17 +2262,27 @@
#define osd_del _bsd_osd_del
#define osd_deregister _bsd_osd_deregister
#define osd_exit _bsd_osd_exit
+#define osd_free_reserved _bsd_osd_free_reserved
#define osd_get _bsd_osd_get
+#define osdm _bsd_osdm
#define osd_register _bsd_osd_register
+#define osd_reserve _bsd_osd_reserve
#define osd_set _bsd_osd_set
+#define osd_set_reserved _bsd_osd_set_reserved
#define page_heap_mtx _bsd_page_heap_mtx
#define panic _bsd_panic
+#define parse_uuid _bsd_parse_uuid
+#define pause_sbt _bsd_pause_sbt
#define pci_activate_resource _bsd_pci_activate_resource
+#define pci_add_bar _bsd_pci_add_bar
#define pci_add_child _bsd_pci_add_child
#define pci_add_children _bsd_pci_add_children
#define pci_add_resources _bsd_pci_add_resources
+#define pci_add_resources_ea _bsd_pci_add_resources_ea
+#define pci_alloc_devinfo_method _bsd_pci_alloc_devinfo_method
#define pci_alloc_msi_method _bsd_pci_alloc_msi_method
#define pci_alloc_msix_method _bsd_pci_alloc_msix_method
+#define pci_alloc_multi_resource _bsd_pci_alloc_multi_resource
#define pci_alloc_resource _bsd_pci_alloc_resource
#define pci_assign_interrupt_method _bsd_pci_assign_interrupt_method
#define pci_attach_common _bsd_pci_attach_common
@@ -2131,17 +2291,19 @@
#define pcib_alloc_resource _bsd_pcib_alloc_resource
#define pci_bar_enabled _bsd_pci_bar_enabled
#define pcib_attach _bsd_pcib_attach
+#define pcib_attach_child _bsd_pcib_attach_child
#define pcib_attach_common _bsd_pcib_attach_common
+#define pcib_bridge_init _bsd_pcib_bridge_init
+#define pcib_child_present _bsd_pcib_child_present
+#define pcib_detach _bsd_pcib_detach
#define pcib_driver _bsd_pcib_driver
#define pcibios_pcib_route_interrupt _bsd_pcibios_pcib_route_interrupt
#define pcib_map_msi _bsd_pcib_map_msi
#define pcib_maxslots _bsd_pcib_maxslots
-#define pcib_read_config _bsd_pcib_read_config
#define pcib_read_ivar _bsd_pcib_read_ivar
#define pcib_release_msi _bsd_pcib_release_msi
#define pcib_release_msix _bsd_pcib_release_msix
#define pcib_route_interrupt _bsd_pcib_route_interrupt
-#define pcib_write_config _bsd_pcib_write_config
#define pcib_write_ivar _bsd_pcib_write_ivar
#define pcicdev _bsd_pcicdev
#define pci_cfgregopen _bsd_pci_cfgregopen
@@ -2149,68 +2311,85 @@
#define pci_cfgregwrite _bsd_pci_cfgregwrite
#define pci_cfg_restore _bsd_pci_cfg_restore
#define pci_cfg_save _bsd_pci_cfg_save
+#define pci_child_added_method _bsd_pci_child_added_method
+#define pci_child_deleted _bsd_pci_child_deleted
+#define pci_child_detached _bsd_pci_child_detached
#define pci_child_location_str_method _bsd_pci_child_location_str_method
#define pci_child_pnpinfo_str_method _bsd_pci_child_pnpinfo_str_method
#define pci_deactivate_resource _bsd_pci_deactivate_resource
-#define pci_delete_child _bsd_pci_delete_child
#define pci_delete_resource _bsd_pci_delete_resource
#define pci_devq _bsd_pci_devq
#define pci_disable_busmaster_method _bsd_pci_disable_busmaster_method
#define pci_disable_io_method _bsd_pci_disable_io_method
+#define pci_disable_msi_method _bsd_pci_disable_msi_method
#define pci_do_power_resume _bsd_pci_do_power_resume
#define pci_do_power_suspend _bsd_pci_do_power_suspend
#define pci_driver _bsd_pci_driver
#define pci_driver_added _bsd_pci_driver_added
+#define pcie_adjust_config _bsd_pcie_adjust_config
+#define pci_ea_is_enabled _bsd_pci_ea_is_enabled
#define pci_enable_busmaster_method _bsd_pci_enable_busmaster_method
#define pci_enable_io_method _bsd_pci_enable_io_method
+#define pci_enable_msi_method _bsd_pci_enable_msi_method
+#define pci_enable_msix_method _bsd_pci_enable_msix_method
+#define pcie_read_config _bsd_pcie_read_config
+#define pcie_write_config _bsd_pcie_write_config
#define pci_fetch_vpd_list _bsd_pci_fetch_vpd_list
#define pci_find_bar _bsd_pci_find_bar
#define pci_find_bsf _bsd_pci_find_bsf
+#define pci_find_cap_method _bsd_pci_find_cap_method
#define pci_find_class _bsd_pci_find_class
#define pci_find_dbsf _bsd_pci_find_dbsf
#define pci_find_extcap_method _bsd_pci_find_extcap_method
+#define pci_find_htcap_method _bsd_pci_find_htcap_method
+#define pci_find_pcie_root_port _bsd_pci_find_pcie_root_port
#define pci_freecfg _bsd_pci_freecfg
#define pci_generation _bsd_pci_generation
+#define pci_get_dma_tag _bsd_pci_get_dma_tag
+#define pci_get_max_payload _bsd_pci_get_max_payload
#define pci_get_max_read_req _bsd_pci_get_max_read_req
#define pci_get_powerstate_method _bsd_pci_get_powerstate_method
#define pci_get_resource_list _bsd_pci_get_resource_list
#define pci_get_vpd_ident_method _bsd_pci_get_vpd_ident_method
#define pci_get_vpd_readonly_method _bsd_pci_get_vpd_readonly_method
#define pci_ht_map_msi _bsd_pci_ht_map_msi
+#define pci_mapsize _bsd_pci_mapsize
#define pci_msi_count_method _bsd_pci_msi_count_method
#define pci_msi_device_blacklisted _bsd_pci_msi_device_blacklisted
#define pci_msix_count_method _bsd_pci_msix_count_method
#define pci_msix_device_blacklisted _bsd_pci_msix_device_blacklisted
+#define pci_msix_pba_bar_method _bsd_pci_msix_pba_bar_method
+#define pci_msix_table_bar_method _bsd_pci_msix_table_bar_method
#define pci_numdevs _bsd_pci_numdevs
#define pci_pending_msix _bsd_pci_pending_msix
#define pci_print_child _bsd_pci_print_child
#define pci_print_verbose _bsd_pci_print_verbose
#define pci_probe_nomatch _bsd_pci_probe_nomatch
+#define pci_read_bar _bsd_pci_read_bar
#define pci_read_config_method _bsd_pci_read_config_method
#define pci_read_device _bsd_pci_read_device
#define pci_read_ivar _bsd_pci_read_ivar
#define pci_release_msi_method _bsd_pci_release_msi_method
#define pci_release_resource _bsd_pci_release_resource
#define pci_remap_msix_method _bsd_pci_remap_msix_method
+#define pci_rescan_method _bsd_pci_rescan_method
#define pci_restore_state _bsd_pci_restore_state
#define pci_resume _bsd_pci_resume
+#define pci_resume_child _bsd_pci_resume_child
#define pci_save_state _bsd_pci_save_state
#define pci_set_max_read_req _bsd_pci_set_max_read_req
#define pci_set_powerstate_method _bsd_pci_set_powerstate_method
#define pci_setup_intr _bsd_pci_setup_intr
-#define pci_suspend _bsd_pci_suspend
+#define pci_suspend_child _bsd_pci_suspend_child
#define pci_teardown_intr _bsd_pci_teardown_intr
#define pci_write_config_method _bsd_pci_write_config_method
#define pci_write_ivar _bsd_pci_write_ivar
#define pcpu_entry_epair_dpcpu _bsd_pcpu_entry_epair_dpcpu
-#define pf_addr_copyout _bsd_pf_addr_copyout
+#define pcpu_zone_64 _bsd_pcpu_zone_64
+#define pcpu_zone_ptr _bsd_pcpu_zone_ptr
+#define pf_addr_cmp _bsd_pf_addr_cmp
#define pf_addrcpy _bsd_pf_addrcpy
#define pf_addr_inc _bsd_pf_addr_inc
-#define pf_addr_setup _bsd_pf_addr_setup
-#define pf_addr_wrap_neq _bsd_pf_addr_wrap_neq
-#define pf_add_threshold _bsd_pf_add_threshold
-#define pf_alloc_state_key _bsd_pf_alloc_state_key
-#define pf_altq_pl _bsd_pf_altq_pl
#define pf_altqs _bsd_pf_altqs
#define pf_altqs_active _bsd_pf_altqs_active
#define pf_altqs_inactive _bsd_pf_altqs_inactive
@@ -2236,227 +2415,146 @@
#define pf_anchor_remove _bsd_pf_anchor_remove
#define pf_anchors _bsd_pf_anchors
#define pf_anchor_setup _bsd_pf_anchor_setup
-#define pf_anchor_stack _bsd_pf_anchor_stack
-#define pfattach _bsd_pfattach
-#define pf_begin_rules _bsd_pf_begin_rules
-#define pf_cache_pl _bsd_pf_cache_pl
-#define pf_cachequeue _bsd_pf_cachequeue
-#define pf_cache_tree _bsd_pf_cache_tree
-#define pf_calc_mss _bsd_pf_calc_mss
#define pf_calc_skip_steps _bsd_pf_calc_skip_steps
-#define pf_cent_pl _bsd_pf_cent_pl
#define pf_change_a _bsd_pf_change_a
-#define pf_change_a6 _bsd_pf_change_a6
-#define pf_change_ap _bsd_pf_change_ap
-#define pf_change_icmp _bsd_pf_change_icmp
-#define pf_check_congestion _bsd_pf_check_congestion
-#define pf_check_proto_cksum _bsd_pf_check_proto_cksum
-#define pf_check_threshold _bsd_pf_check_threshold
+#define pf_change_proto_a _bsd_pf_change_proto_a
#define pf_cksum_fixup _bsd_pf_cksum_fixup
-#define pf_commit_rules _bsd_pf_commit_rules
-#define pf_compare_state_keys _bsd_pf_compare_state_keys
-#define pf_consistency_lock _bsd_pf_consistency_lock
+#define pf_cleanup _bsd_pf_cleanup
#define pfctlinput _bsd_pfctlinput
#define pfctlinput2 _bsd_pfctlinput2
#define pf_default_rule _bsd_pf_default_rule
-#define pf_detach_state _bsd_pf_detach_state
#define pf_dev _bsd_pf_dev
-#define pf_empty_pool _bsd_pf_empty_pool
#define pf_end_threads _bsd_pf_end_threads
-#define pf_find_anchor _bsd_pf_find_anchor
-#define pf_find_fragment _bsd_pf_find_fragment
+#define pffinddomain _bsd_pffinddomain
#define pf_find_or_create_ruleset _bsd_pf_find_or_create_ruleset
#define pffindproto _bsd_pffindproto
#define pf_find_ruleset _bsd_pf_find_ruleset
-#define pf_find_state _bsd_pf_find_state
+#define pf_find_src_node _bsd_pf_find_src_node
#define pf_find_state_all _bsd_pf_find_state_all
#define pf_find_state_byid _bsd_pf_find_state_byid
#define pffindtype _bsd_pffindtype
-#define pf_flush_fragments _bsd_pf_flush_fragments
-#define pf_fragcache _bsd_pf_fragcache
-#define pf_frag_pl _bsd_pf_frag_pl
-#define pf_fragqueue _bsd_pf_fragqueue
-#define pf_frag_tree _bsd_pf_frag_tree
#define pf_frag_tree_RB_FIND _bsd_pf_frag_tree_RB_FIND
#define pf_frag_tree_RB_INSERT _bsd_pf_frag_tree_RB_INSERT
-#define pf_frag_tree_RB_INSERT_COLOR _bsd_pf_frag_tree_RB_INSERT_COLOR
#define pf_frag_tree_RB_MINMAX _bsd_pf_frag_tree_RB_MINMAX
#define pf_frag_tree_RB_NEXT _bsd_pf_frag_tree_RB_NEXT
#define pf_frag_tree_RB_NFIND _bsd_pf_frag_tree_RB_NFIND
#define pf_frag_tree_RB_PREV _bsd_pf_frag_tree_RB_PREV
#define pf_frag_tree_RB_REMOVE _bsd_pf_frag_tree_RB_REMOVE
#define pf_frag_tree_RB_REMOVE_COLOR _bsd_pf_frag_tree_RB_REMOVE_COLOR
-#define pf_free_fragment _bsd_pf_free_fragment
+#define pf_free_rule _bsd_pf_free_rule
+#define pf_free_src_nodes _bsd_pf_free_src_nodes
#define pf_free_state _bsd_pf_free_state
-#define pf_frent_pl _bsd_pf_frent_pl
-#define pf_get_mss _bsd_pf_get_mss
-#define pf_get_pool _bsd_pf_get_pool
+#define pf_get_mtag _bsd_pf_get_mtag
#define pf_get_ruleset_number _bsd_pf_get_ruleset_number
-#define pf_get_sport _bsd_pf_get_sport
#define pf_get_translation _bsd_pf_get_translation
-#define pf_get_wscale _bsd_pf_get_wscale
-#define pf_hash _bsd_pf_hash
-#define pf_hash_rule _bsd_pf_hash_rule
-#define pf_hash_rule_addr _bsd_pf_hash_rule_addr
-#define pfi_address_add _bsd_pfi_address_add
-#define pfi_addr_pl _bsd_pfi_addr_pl
+#define pf_hashmask _bsd_pf_hashmask
+#define pf_hashseed _bsd_pf_hashseed
#define pfi_all _bsd_pfi_all
#define pfi_attach_cookie _bsd_pfi_attach_cookie
#define pfi_attach_group_cookie _bsd_pfi_attach_group_cookie
-#define pfi_attach_group_event _bsd_pfi_attach_group_event
-#define pfi_attach_ifgroup _bsd_pfi_attach_ifgroup
-#define pfi_attach_ifnet _bsd_pfi_attach_ifnet
-#define pfi_attach_ifnet_event _bsd_pfi_attach_ifnet_event
-#define pfi_buffer _bsd_pfi_buffer
-#define pfi_buffer_cnt _bsd_pfi_buffer_cnt
-#define pfi_buffer_max _bsd_pfi_buffer_max
#define pfi_change_group_cookie _bsd_pfi_change_group_cookie
-#define pfi_change_group_event _bsd_pfi_change_group_event
#define pfi_cleanup _bsd_pfi_cleanup
+#define pfi_cleanup_vnet _bsd_pfi_cleanup_vnet
#define pfi_clear_flags _bsd_pfi_clear_flags
#define pfi_detach_cookie _bsd_pfi_detach_cookie
#define pfi_detach_group_cookie _bsd_pfi_detach_group_cookie
-#define pfi_detach_group_event _bsd_pfi_detach_group_event
-#define pfi_detach_ifgroup _bsd_pfi_detach_ifgroup
-#define pfi_detach_ifnet _bsd_pfi_detach_ifnet
-#define pfi_detach_ifnet_event _bsd_pfi_detach_ifnet_event
+#define pf_idhash _bsd_pf_idhash
#define pfi_dynaddr_copyout _bsd_pfi_dynaddr_copyout
#define pfi_dynaddr_remove _bsd_pfi_dynaddr_remove
#define pfi_dynaddr_setup _bsd_pfi_dynaddr_setup
-#define pfi_dynaddr_update _bsd_pfi_dynaddr_update
#define pfi_get_ifaces _bsd_pfi_get_ifaces
-#define pfi_group_change _bsd_pfi_group_change
-#define pfi_ifaddr_event _bsd_pfi_ifaddr_event
#define pfi_ifaddr_event_cookie _bsd_pfi_ifaddr_event_cookie
-#define pfi_if_compare _bsd_pfi_if_compare
#define pfi_ifhead_RB_FIND _bsd_pfi_ifhead_RB_FIND
#define pfi_ifhead_RB_INSERT _bsd_pfi_ifhead_RB_INSERT
-#define pfi_ifhead_RB_INSERT_COLOR _bsd_pfi_ifhead_RB_INSERT_COLOR
#define pfi_ifhead_RB_MINMAX _bsd_pfi_ifhead_RB_MINMAX
#define pfi_ifhead_RB_NEXT _bsd_pfi_ifhead_RB_NEXT
#define pfi_ifhead_RB_NFIND _bsd_pfi_ifhead_RB_NFIND
#define pfi_ifhead_RB_PREV _bsd_pfi_ifhead_RB_PREV
#define pfi_ifhead_RB_REMOVE _bsd_pfi_ifhead_RB_REMOVE
#define pfi_ifhead_RB_REMOVE_COLOR _bsd_pfi_ifhead_RB_REMOVE_COLOR
-#define pfi_ifs _bsd_pfi_ifs
#define pfi_initialize _bsd_pfi_initialize
-#define pfi_instance_add _bsd_pfi_instance_add
-#define pfi_kifaddr_update _bsd_pfi_kifaddr_update
-#define pfi_kif_get _bsd_pfi_kif_get
+#define pfi_initialize_vnet _bsd_pfi_initialize_vnet
+#define pfi_kif_attach _bsd_pfi_kif_attach
+#define pfi_kif_find _bsd_pfi_kif_find
#define pfi_kif_match _bsd_pfi_kif_match
+#define pfi_kif_purge _bsd_pfi_kif_purge
#define pfi_kif_ref _bsd_pfi_kif_ref
#define pfi_kif_unref _bsd_pfi_kif_unref
-#define pfi_kif_update _bsd_pfi_kif_update
#define pfil_add_hook _bsd_pfil_add_hook
#define pfil_head_get _bsd_pfil_head_get
#define pfil_head_list _bsd_pfil_head_list
#define pfil_head_register _bsd_pfil_head_register
#define pfil_head_unregister _bsd_pfil_head_unregister
+#define pfil_lock _bsd_pfil_lock
#define pfil_remove_hook _bsd_pfil_remove_hook
+#define pfil_rlock _bsd_pfil_rlock
#define pfil_run_hooks _bsd_pfil_run_hooks
+#define pfil_runlock _bsd_pfil_runlock
+#define pfil_try_rlock _bsd_pfil_try_rlock
+#define pfil_wlock _bsd_pfil_wlock
+#define pfil_wowned _bsd_pfil_wowned
+#define pfil_wunlock _bsd_pfil_wunlock
#define pfi_match_addr _bsd_pfi_match_addr
+#define PFI_MTYPE _bsd_PFI_MTYPE
+#define pf_initialize _bsd_pf_initialize
#define pf_init_ruleset _bsd_pf_init_ruleset
-#define pf_init_threshold _bsd_pf_init_threshold
-#define pf_insert_src_node _bsd_pf_insert_src_node
-#define pfioctl _bsd_pfioctl
-#define pf_ip2key _bsd_pf_ip2key
+#define pf_intr _bsd_pf_intr
+#define pf_ioctl_lock _bsd_pf_ioctl_lock
#define pfi_set_flags _bsd_pfi_set_flags
-#define pfi_skip_if _bsd_pfi_skip_if
-#define pfi_table_update _bsd_pfi_table_update
-#define pfi_unmask _bsd_pfi_unmask
-#define pfi_update _bsd_pfi_update
#define pfi_update_status _bsd_pfi_update_status
-#define pflogattach _bsd_pflogattach
-#define pflog_cloner _bsd_pflog_cloner
-#define pflog_cloner_data _bsd_pflog_cloner_data
-#define pflogif_list _bsd_pflogif_list
+#define pf_keyhash _bsd_pf_keyhash
+#define pf_limits _bsd_pf_limits
#define pflogifs _bsd_pflogifs
-#define pflogioctl _bsd_pflogioctl
-#define pflogoutput _bsd_pflogoutput
-#define pflog_packet _bsd_pflog_packet
#define pflog_packet_ptr _bsd_pflog_packet_ptr
-#define pflogstart _bsd_pflogstart
#define pf_main_anchor _bsd_pf_main_anchor
#define pf_map_addr _bsd_pf_map_addr
-#define pf_match _bsd_pf_match
#define pf_match_addr _bsd_pf_match_addr
#define pf_match_addr_range _bsd_pf_match_addr_range
-#define pf_match_gid _bsd_pf_match_gid
#define pf_match_port _bsd_pf_match_port
#define pf_match_tag _bsd_pf_match_tag
-#define pf_match_translation _bsd_pf_match_translation
-#define pf_match_uid _bsd_pf_match_uid
-#define pf_modulate_sack _bsd_pf_modulate_sack
-#define pf_mv_pool _bsd_pf_mv_pool
-#define pf_ncache _bsd_pf_ncache
-#define pf_nfrents _bsd_pf_nfrents
+#define pf_mtag_cleanup _bsd_pf_mtag_cleanup
+#define pf_mtag_initialize _bsd_pf_mtag_initialize
+#define pf_mtag_z _bsd_pf_mtag_z
+#define pf_normalize_cleanup _bsd_pf_normalize_cleanup
#define pf_normalize_init _bsd_pf_normalize_init
#define pf_normalize_ip _bsd_pf_normalize_ip
#define pf_normalize_ip6 _bsd_pf_normalize_ip6
#define pf_normalize_tcp _bsd_pf_normalize_tcp
#define pf_normalize_tcp_cleanup _bsd_pf_normalize_tcp_cleanup
#define pf_normalize_tcp_init _bsd_pf_normalize_tcp_init
-#define pf_normalize_tcpopt _bsd_pf_normalize_tcpopt
#define pf_normalize_tcp_stateful _bsd_pf_normalize_tcp_stateful
#define pf_osfp_add _bsd_pf_osfp_add
-#define pf_osfp_cleanup _bsd_pf_osfp_cleanup
-#define pf_osfp_find _bsd_pf_osfp_find
-#define pf_osfp_find_exact _bsd_pf_osfp_find_exact
#define pf_osfp_fingerprint _bsd_pf_osfp_fingerprint
-#define pf_osfp_fingerprint_hdr _bsd_pf_osfp_fingerprint_hdr
#define pf_osfp_flush _bsd_pf_osfp_flush
#define pf_osfp_get _bsd_pf_osfp_get
-#define pf_osfp_initialize _bsd_pf_osfp_initialize
-#define pf_osfp_insert _bsd_pf_osfp_insert
-#define pf_osfp_list _bsd_pf_osfp_list
#define pf_osfp_match _bsd_pf_osfp_match
-#define pf_osfp_validate _bsd_pf_osfp_validate
#define pf_pabuf _bsd_pf_pabuf
-#define pf_pkt_addr_changed _bsd_pf_pkt_addr_changed
-#define pf_pooladdr_pl _bsd_pf_pooladdr_pl
-#define pf_pool_limits _bsd_pf_pool_limits
#define pf_poolmask _bsd_pf_poolmask
#define pf_print_flags _bsd_pf_print_flags
#define pf_print_host _bsd_pf_print_host
#define pf_print_state _bsd_pf_print_state
-#define pf_print_state_parts _bsd_pf_print_state_parts
+#define pf_proto_cksum_fixup _bsd_pf_proto_cksum_fixup
#define pf_proto_register _bsd_pf_proto_register
#define pf_proto_unregister _bsd_pf_proto_unregister
#define pf_pull_hdr _bsd_pf_pull_hdr
#define pf_purge_expired_fragments _bsd_pf_purge_expired_fragments
#define pf_purge_expired_src_nodes _bsd_pf_purge_expired_src_nodes
-#define pf_purge_expired_states _bsd_pf_purge_expired_states
#define pf_purge_thread _bsd_pf_purge_thread
#define pf_qids _bsd_pf_qids
#define pfr_add_addrs _bsd_pfr_add_addrs
#define pfr_add_tables _bsd_pfr_add_tables
#define pfr_attach_table _bsd_pfr_attach_table
-#define pfr_clean_node_mask _bsd_pfr_clean_node_mask
+#define pfr_cleanup _bsd_pfr_cleanup
#define pfr_clr_addrs _bsd_pfr_clr_addrs
#define pfr_clr_astats _bsd_pfr_clr_astats
#define pfr_clr_tables _bsd_pfr_clr_tables
#define pfr_clr_tstats _bsd_pfr_clr_tstats
-#define pfr_clstats_kentries _bsd_pfr_clstats_kentries
-#define pfr_clstats_ktable _bsd_pfr_clstats_ktable
-#define pfr_clstats_ktables _bsd_pfr_clstats_ktables
-#define pfr_commit_ktable _bsd_pfr_commit_ktable
-#define pfr_copyout_addr _bsd_pfr_copyout_addr
-#define pfr_create_kentry _bsd_pfr_create_kentry
-#define pfr_create_ktable _bsd_pfr_create_ktable
#define pfr_del_addrs _bsd_pfr_del_addrs
#define pfr_del_tables _bsd_pfr_del_tables
-#define pfr_destroy_kentries _bsd_pfr_destroy_kentries
-#define pfr_destroy_kentry _bsd_pfr_destroy_kentry
-#define pfr_destroy_ktable _bsd_pfr_destroy_ktable
-#define pfr_destroy_ktables _bsd_pfr_destroy_ktables
#define pfr_detach_table _bsd_pfr_detach_table
#define pfr_dynaddr_update _bsd_pfr_dynaddr_update
-#define pf_reassemble _bsd_pf_reassemble
-#define pf_remove_fragment _bsd_pf_remove_fragment
+#define pf_refragment6 _bsd_pf_refragment6
#define pf_remove_if_empty_ruleset _bsd_pf_remove_if_empty_ruleset
-#define pfr_enqueue_addrs _bsd_pfr_enqueue_addrs
-#define pfr_ffaddr _bsd_pfr_ffaddr
-#define pfr_fix_anchor _bsd_pfr_fix_anchor
#define pfr_get_addrs _bsd_pfr_get_addrs
#define pfr_get_astats _bsd_pfr_get_astats
#define pfr_get_tables _bsd_pfr_get_tables
@@ -2466,209 +2564,75 @@
#define pfr_ina_define _bsd_pfr_ina_define
#define pfr_ina_rollback _bsd_pfr_ina_rollback
#define pfr_initialize _bsd_pfr_initialize
-#define pfr_insert_kentries _bsd_pfr_insert_kentries
#define pfr_insert_kentry _bsd_pfr_insert_kentry
-#define pfr_insert_ktable _bsd_pfr_insert_ktable
-#define pfr_insert_ktables _bsd_pfr_insert_ktables
-#define pfr_kcounters_pl _bsd_pfr_kcounters_pl
-#define pfr_kentry_byidx _bsd_pfr_kentry_byidx
-#define pfr_kentry_pl _bsd_pfr_kentry_pl
#define pfr_ktable_cnt _bsd_pfr_ktable_cnt
-#define pfr_ktable_compare _bsd_pfr_ktable_compare
#define pfr_ktablehead_RB_FIND _bsd_pfr_ktablehead_RB_FIND
#define pfr_ktablehead_RB_INSERT _bsd_pfr_ktablehead_RB_INSERT
-#define pfr_ktablehead_RB_INSERT_COLOR _bsd_pfr_ktablehead_RB_INSERT_COLOR
#define pfr_ktablehead_RB_MINMAX _bsd_pfr_ktablehead_RB_MINMAX
#define pfr_ktablehead_RB_NEXT _bsd_pfr_ktablehead_RB_NEXT
#define pfr_ktablehead_RB_NFIND _bsd_pfr_ktablehead_RB_NFIND
#define pfr_ktablehead_RB_PREV _bsd_pfr_ktablehead_RB_PREV
#define pfr_ktablehead_RB_REMOVE _bsd_pfr_ktablehead_RB_REMOVE
#define pfr_ktablehead_RB_REMOVE_COLOR _bsd_pfr_ktablehead_RB_REMOVE_COLOR
-#define pfr_ktable_pl _bsd_pfr_ktable_pl
#define pfr_ktables _bsd_pfr_ktables
-#define pfr_lookup_addr _bsd_pfr_lookup_addr
-#define pfr_lookup_table _bsd_pfr_lookup_table
-#define pfr_mark_addrs _bsd_pfr_mark_addrs
-#define pfr_mask _bsd_pfr_mask
#define pfr_match_addr _bsd_pfr_match_addr
-#define pf_rm_rule _bsd_pf_rm_rule
#define pfr_nulltable _bsd_pfr_nulltable
-#define pf_rollback_rules _bsd_pf_rollback_rules
#define pf_routable _bsd_pf_routable
-#define pf_route _bsd_pf_route
-#define pf_route6 _bsd_pf_route6
#define pfr_pool_get _bsd_pfr_pool_get
-#define pfr_prepare_network _bsd_pfr_prepare_network
-#define pfr_remove_kentries _bsd_pfr_remove_kentries
-#define pfr_reset_feedback _bsd_pfr_reset_feedback
-#define pfr_route_kentry _bsd_pfr_route_kentry
#define pfr_set_addrs _bsd_pfr_set_addrs
-#define pfr_setflags_ktable _bsd_pfr_setflags_ktable
-#define pfr_setflags_ktables _bsd_pfr_setflags_ktables
#define pfr_set_tflags _bsd_pfr_set_tflags
-#define pfr_sin _bsd_pfr_sin
-#define pfr_sin6 _bsd_pfr_sin6
-#define pfr_skip_table _bsd_pfr_skip_table
-#define pfr_table_count _bsd_pfr_table_count
-#define pf_rtlabel_add _bsd_pf_rtlabel_add
-#define pf_rtlabel_copyout _bsd_pf_rtlabel_copyout
-#define pf_rtlabel_match _bsd_pf_rtlabel_match
-#define pf_rtlabel_remove _bsd_pf_rtlabel_remove
#define pfr_tst_addrs _bsd_pfr_tst_addrs
-#define pf_rule_pl _bsd_pf_rule_pl
-#define pfr_unroute_kentry _bsd_pfr_unroute_kentry
+#define pf_rules_lock _bsd_pf_rules_lock
#define pfr_update_stats _bsd_pfr_update_stats
-#define pfr_validate_addr _bsd_pfr_validate_addr
-#define pfr_validate_table _bsd_pfr_validate_table
-#define pfr_walktree _bsd_pfr_walktree
-#define pf_scrub_ip _bsd_pf_scrub_ip
-#define pf_scrub_ip6 _bsd_pf_scrub_ip6
-#define pf_send_tcp _bsd_pf_send_tcp
-#define pf_set_rt_ifp _bsd_pf_set_rt_ifp
-#define pf_setup_pfsync_matching _bsd_pf_setup_pfsync_matching
#define pf_socket_lookup _bsd_pf_socket_lookup
-#define pf_src_connlimit _bsd_pf_src_connlimit
-#define pf_src_tree_pl _bsd_pf_src_tree_pl
-#define pf_src_tree_RB_FIND _bsd_pf_src_tree_RB_FIND
-#define pf_src_tree_RB_INSERT _bsd_pf_src_tree_RB_INSERT
-#define pf_src_tree_RB_INSERT_COLOR _bsd_pf_src_tree_RB_INSERT_COLOR
-#define pf_src_tree_RB_MINMAX _bsd_pf_src_tree_RB_MINMAX
-#define pf_src_tree_RB_NEXT _bsd_pf_src_tree_RB_NEXT
-#define pf_src_tree_RB_NFIND _bsd_pf_src_tree_RB_NFIND
-#define pf_src_tree_RB_PREV _bsd_pf_src_tree_RB_PREV
-#define pf_src_tree_RB_REMOVE _bsd_pf_src_tree_RB_REMOVE
-#define pf_src_tree_RB_REMOVE_COLOR _bsd_pf_src_tree_RB_REMOVE_COLOR
-#define pf_src_tree_remove_state _bsd_pf_src_tree_remove_state
+#define pf_srchash _bsd_pf_srchash
+#define pf_srchashmask _bsd_pf_srchashmask
#define pf_state_expires _bsd_pf_state_expires
+#define pf_stateid _bsd_pf_stateid
#define pf_state_insert _bsd_pf_state_insert
-#define pf_state_item_pl _bsd_pf_state_item_pl
-#define pf_state_key_attach _bsd_pf_state_key_attach
-#define pf_state_key_detach _bsd_pf_state_key_detach
-#define pf_state_key_pl _bsd_pf_state_key_pl
+#define pf_state_key_clone _bsd_pf_state_key_clone
#define pf_state_key_setup _bsd_pf_state_key_setup
-#define pf_state_pl _bsd_pf_state_pl
-#define pf_state_scrub_pl _bsd_pf_state_scrub_pl
-#define pf_statetbl _bsd_pf_statetbl
-#define pf_state_tree_id_RB_FIND _bsd_pf_state_tree_id_RB_FIND
-#define pf_state_tree_id_RB_INSERT _bsd_pf_state_tree_id_RB_INSERT
-#define pf_state_tree_id_RB_INSERT_COLOR _bsd_pf_state_tree_id_RB_INSERT_COLOR
-#define pf_state_tree_id_RB_MINMAX _bsd_pf_state_tree_id_RB_MINMAX
-#define pf_state_tree_id_RB_NEXT _bsd_pf_state_tree_id_RB_NEXT
-#define pf_state_tree_id_RB_NFIND _bsd_pf_state_tree_id_RB_NFIND
-#define pf_state_tree_id_RB_PREV _bsd_pf_state_tree_id_RB_PREV
-#define pf_state_tree_id_RB_REMOVE _bsd_pf_state_tree_id_RB_REMOVE
-#define pf_state_tree_id_RB_REMOVE_COLOR _bsd_pf_state_tree_id_RB_REMOVE_COLOR
-#define pf_state_tree_RB_FIND _bsd_pf_state_tree_RB_FIND
-#define pf_state_tree_RB_INSERT _bsd_pf_state_tree_RB_INSERT
-#define pf_state_tree_RB_INSERT_COLOR _bsd_pf_state_tree_RB_INSERT_COLOR
-#define pf_state_tree_RB_MINMAX _bsd_pf_state_tree_RB_MINMAX
-#define pf_state_tree_RB_NEXT _bsd_pf_state_tree_RB_NEXT
-#define pf_state_tree_RB_NFIND _bsd_pf_state_tree_RB_NFIND
-#define pf_state_tree_RB_PREV _bsd_pf_state_tree_RB_PREV
-#define pf_state_tree_RB_REMOVE _bsd_pf_state_tree_RB_REMOVE
-#define pf_state_tree_RB_REMOVE_COLOR _bsd_pf_state_tree_RB_REMOVE_COLOR
+#define pf_state_key_z _bsd_pf_state_key_z
+#define pf_state_scrub_z _bsd_pf_state_scrub_z
+#define pf_state_z _bsd_pf_state_z
#define pf_status _bsd_pf_status
#define pf_step_into_anchor _bsd_pf_step_into_anchor
#define pf_step_out_of_anchor _bsd_pf_step_out_of_anchor
-#define pfsync_acts _bsd_pfsync_acts
-#define pfsync_alloc_scrub_memory _bsd_pfsync_alloc_scrub_memory
-#define pfsyncattach _bsd_pfsyncattach
-#define pfsync_bulk_fail _bsd_pfsync_bulk_fail
-#define pfsync_bulk_start _bsd_pfsync_bulk_start
-#define pfsync_bulk_status _bsd_pfsync_bulk_status
-#define pfsync_bulk_update _bsd_pfsync_bulk_update
-#define pfsync_clear_states _bsd_pfsync_clear_states
+#define pf_swi_cookie _bsd_pf_swi_cookie
#define pfsync_clear_states_ptr _bsd_pfsync_clear_states_ptr
-#define pfsync_clone_create _bsd_pfsync_clone_create
-#define pfsync_clone_destroy _bsd_pfsync_clone_destroy
#define pfsync_cloner _bsd_pfsync_cloner
-#define pfsync_cloner_data _bsd_pfsync_cloner_data
-#define pfsync_defer _bsd_pfsync_defer
#define pfsync_defer_ptr _bsd_pfsync_defer_ptr
-#define pfsync_deferred _bsd_pfsync_deferred
-#define pfsync_defer_tmo _bsd_pfsync_defer_tmo
-#define pfsync_delete_state _bsd_pfsync_delete_state
#define pfsync_delete_state_ptr _bsd_pfsync_delete_state_ptr
-#define pfsync_drop _bsd_pfsync_drop
-#define pfsync_if_dequeue _bsd_pfsync_if_dequeue
-#define pfsync_in_bus _bsd_pfsync_in_bus
-#define pfsync_in_clr _bsd_pfsync_in_clr
-#define pfsync_in_del _bsd_pfsync_in_del
-#define pfsync_in_del_c _bsd_pfsync_in_del_c
-#define pfsync_in_eof _bsd_pfsync_in_eof
-#define pfsync_in_error _bsd_pfsync_in_error
-#define pfsync_in_iack _bsd_pfsync_in_iack
-#define pfsync_in_ins _bsd_pfsync_in_ins
-#define pfsync_input _bsd_pfsync_input
-#define pfsync_insert_state _bsd_pfsync_insert_state
#define pfsync_insert_state_ptr _bsd_pfsync_insert_state_ptr
-#define pfsync_in_tdb _bsd_pfsync_in_tdb
-#define pfsync_ints _bsd_pfsync_ints
-#define pfsync_in_upd _bsd_pfsync_in_upd
-#define pfsync_in_upd_c _bsd_pfsync_in_upd_c
-#define pfsync_in_ureq _bsd_pfsync_in_ureq
-#define pfsyncioctl _bsd_pfsyncioctl
-#define pfsync_out_del _bsd_pfsync_out_del
-#define pfsync_out_iack _bsd_pfsync_out_iack
-#define pfsyncoutput _bsd_pfsyncoutput
-#define pfsync_out_state _bsd_pfsync_out_state
-#define pfsync_out_upd_c _bsd_pfsync_out_upd_c
-#define pfsync_q_del _bsd_pfsync_q_del
-#define pfsync_q_ins _bsd_pfsync_q_ins
-#define pfsync_qs _bsd_pfsync_qs
-#define pfsync_request_update _bsd_pfsync_request_update
-#define pfsync_sendout _bsd_pfsync_sendout
-#define pfsync_send_plus _bsd_pfsync_send_plus
-#define pfsyncstart _bsd_pfsyncstart
#define pfsync_state_export _bsd_pfsync_state_export
-#define pfsync_state_import _bsd_pfsync_state_import
#define pfsync_state_import_ptr _bsd_pfsync_state_import_ptr
-#define pfsync_state_in_use _bsd_pfsync_state_in_use
-#define pfsync_state_in_use_ptr _bsd_pfsync_state_in_use_ptr
-#define pfsync_sysctl _bsd_pfsync_sysctl
-#define pfsync_timeout _bsd_pfsync_timeout
-#define pfsync_tmos _bsd_pfsync_tmos
-#define pfsync_undefer _bsd_pfsync_undefer
-#define pfsync_up _bsd_pfsync_up
-#define pfsync_update_state _bsd_pfsync_update_state
#define pfsync_update_state_ptr _bsd_pfsync_update_state_ptr
-#define pfsync_update_state_req _bsd_pfsync_update_state_req
-#define pfsync_upds _bsd_pfsync_upds
-#define pfsync_upd_tcp _bsd_pfsync_upd_tcp
-#define pfsync_up_ptr _bsd_pfsync_up_ptr
-#define pf_tag2tagname _bsd_pf_tag2tagname
-#define pf_tagname2tag _bsd_pf_tagname2tag
#define pf_tag_packet _bsd_pf_tag_packet
-#define pf_tag_ref _bsd_pf_tag_ref
#define pf_tags _bsd_pf_tags
-#define pf_tag_unref _bsd_pf_tag_unref
-#define pf_task_mtx _bsd_pf_task_mtx
-#define pf_tbladdr_copyout _bsd_pf_tbladdr_copyout
-#define pf_tbladdr_remove _bsd_pf_tbladdr_remove
-#define pf_tbladdr_setup _bsd_pf_tbladdr_setup
-#define pf_tcp_iss _bsd_pf_tcp_iss
#define pf_tcp_iss_off _bsd_pf_tcp_iss_off
#define pf_tcp_secret _bsd_pf_tcp_secret
#define pf_tcp_secret_ctx _bsd_pf_tcp_secret_ctx
#define pf_tcp_secret_init _bsd_pf_tcp_secret_init
-#define pf_tcp_track_full _bsd_pf_tcp_track_full
-#define pf_tcp_track_sloppy _bsd_pf_tcp_track_sloppy
#define pf_test _bsd_pf_test
#define pf_test6 _bsd_pf_test6
-#define pf_test_fragment _bsd_pf_test_fragment
-#define pf_test_rule _bsd_pf_test_rule
-#define pf_test_state_icmp _bsd_pf_test_state_icmp
-#define pf_test_state_other _bsd_pf_test_state_other
-#define pf_test_state_tcp _bsd_pf_test_state_tcp
-#define pf_test_state_udp _bsd_pf_test_state_udp
+#define pf_unlinked_rules _bsd_pf_unlinked_rules
+#define pf_unlink_src_node _bsd_pf_unlink_src_node
#define pf_unlink_state _bsd_pf_unlink_state
+#define pf_unlnkdrules_mtx _bsd_pf_unlnkdrules_mtx
+#define pf_unload_vnet_purge _bsd_pf_unload_vnet_purge
+#define pf_vnet_active _bsd_pf_vnet_active
#define pfxlist_onlink_check _bsd_pfxlist_onlink_check
#define pgsigio _bsd_pgsigio
#define phashinit _bsd_phashinit
+#define phashinit_flags _bsd_phashinit_flags
#define pim6_input _bsd_pim6_input
#define pim_input _bsd_pim_input
+#define pipe_dtor _bsd_pipe_dtor
+#define pipe_named_ctor _bsd_pipe_named_ctor
+#define pipeselwakeup _bsd_pipeselwakeup
#define pmtu_expire _bsd_pmtu_expire
#define pmtu_probe _bsd_pmtu_probe
+#define poll_no_poll _bsd_poll_no_poll
#define ppsratecheck _bsd_ppsratecheck
#define prelist_remove _bsd_prelist_remove
#define preload_addr_relocate _bsd_preload_addr_relocate
@@ -2682,6 +2646,7 @@
#define preload_search_info _bsd_preload_search_info
#define preload_search_next_name _bsd_preload_search_next_name
#define printf _bsd_printf
+#define printf_uuid _bsd_printf_uuid
#define prison0 _bsd_prison0
#define prison_check _bsd_prison_check
#define prison_check_af _bsd_prison_check_af
@@ -2702,9 +2667,12 @@
#define ProxyCheck _bsd_ProxyCheck
#define ProxyModify _bsd_ProxyModify
#define pru_accept_notsupp _bsd_pru_accept_notsupp
+#define pru_aio_queue_notsupp _bsd_pru_aio_queue_notsupp
#define pru_attach_notsupp _bsd_pru_attach_notsupp
+#define pru_bindat_notsupp _bsd_pru_bindat_notsupp
#define pru_bind_notsupp _bsd_pru_bind_notsupp
#define pru_connect2_notsupp _bsd_pru_connect2_notsupp
+#define pru_connectat_notsupp _bsd_pru_connectat_notsupp
#define pru_connect_notsupp _bsd_pru_connect_notsupp
#define pru_control_notsupp _bsd_pru_control_notsupp
#define pru_disconnect_notsupp _bsd_pru_disconnect_notsupp
@@ -2712,6 +2680,7 @@
#define pru_peeraddr_notsupp _bsd_pru_peeraddr_notsupp
#define pru_rcvd_notsupp _bsd_pru_rcvd_notsupp
#define pru_rcvoob_notsupp _bsd_pru_rcvoob_notsupp
+#define pru_ready_notsupp _bsd_pru_ready_notsupp
#define pru_send_notsupp _bsd_pru_send_notsupp
#define pru_sense_null _bsd_pru_sense_null
#define pru_shutdown_notsupp _bsd_pru_shutdown_notsupp
@@ -2719,10 +2688,6 @@
#define pru_sopoll_notsupp _bsd_pru_sopoll_notsupp
#define pru_soreceive_notsupp _bsd_pru_soreceive_notsupp
#define pru_sosend_notsupp _bsd_pru_sosend_notsupp
-#define random_harvest _bsd_random_harvest
-#define random_set_wakeup_exit _bsd_random_set_wakeup_exit
-#define random_yarrow_deinit_harvester _bsd_random_yarrow_deinit_harvester
-#define random_yarrow_init_harvester _bsd_random_yarrow_init_harvester
#define ratecheck _bsd_ratecheck
#define raw_attach _bsd_raw_attach
#define rawcb_list _bsd_rawcb_list
@@ -2737,8 +2702,8 @@
#define rc4_init _bsd_rc4_init
#define read_dsfield _bsd_read_dsfield
#define read_machclk _bsd_read_machclk
-#define read_random _bsd_read_random
#define registered_toedevs _bsd_registered_toedevs
+#define register_tcp_functions _bsd_register_tcp_functions
#define resource_disabled _bsd_resource_disabled
#define resource_find_match _bsd_resource_find_match
#define resource_int_value _bsd_resource_int_value
@@ -2753,10 +2718,14 @@
#define resource_list_print_type _bsd_resource_list_print_type
#define resource_list_purge _bsd_resource_list_purge
#define resource_list_release _bsd_resource_list_release
+#define resource_list_release_active _bsd_resource_list_release_active
#define resource_list_reserve _bsd_resource_list_reserve
#define resource_list_reserved _bsd_resource_list_reserved
#define resource_list_unreserve _bsd_resource_list_unreserve
#define resource_string_value _bsd_resource_string_value
+#define resource_unset_value _bsd_resource_unset_value
+#define rib_free_info _bsd_rib_free_info
+#define rib_lookup_info _bsd_rib_lookup_info
#define rijndael_blockDecrypt _bsd_rijndael_blockDecrypt
#define rijndael_blockEncrypt _bsd_rijndael_blockEncrypt
#define rijndael_cipherInit _bsd_rijndael_cipherInit
@@ -2774,8 +2743,6 @@
#define rip6_ctloutput _bsd_rip6_ctloutput
#define rip6_input _bsd_rip6_input
#define rip6_output _bsd_rip6_output
-#define rip6_recvspace _bsd_rip6_recvspace
-#define rip6_sendspace _bsd_rip6_sendspace
#define rip6stat _bsd_rip6stat
#define rip6_usrreqs _bsd_rip6_usrreqs
#define ripcb _bsd_ripcb
@@ -2798,6 +2765,7 @@
#define rman_get_device _bsd_rman_get_device
#define rman_get_end _bsd_rman_get_end
#define rman_get_flags _bsd_rman_get_flags
+#define rman_get_mapping _bsd_rman_get_mapping
#define rman_get_rid _bsd_rman_get_rid
#define rman_get_size _bsd_rman_get_size
#define rman_get_start _bsd_rman_get_start
@@ -2816,6 +2784,7 @@
#define rman_set_bustag _bsd_rman_set_bustag
#define rman_set_device _bsd_rman_set_device
#define rman_set_end _bsd_rman_set_end
+#define rman_set_mapping _bsd_rman_set_mapping
#define rman_set_rid _bsd_rman_set_rid
#define rman_set_start _bsd_rman_set_start
#define rman_set_virtual _bsd_rman_set_virtual
@@ -2825,19 +2794,21 @@
#define RMD160Update _bsd_RMD160Update
#define rn4_mpath_inithead _bsd_rn4_mpath_inithead
#define rn6_mpath_inithead _bsd_rn6_mpath_inithead
-#define rn_addmask _bsd_rn_addmask
-#define rn_addmask_r _bsd_rn_addmask_r
#define rn_addroute _bsd_rn_addroute
#define rn_delete _bsd_rn_delete
#define rn_detachhead _bsd_rn_detachhead
-#define rn_init _bsd_rn_init
#define rn_inithead _bsd_rn_inithead
+#define rn_inithead_internal _bsd_rn_inithead_internal
#define rn_lookup _bsd_rn_lookup
#define rn_match _bsd_rn_match
#define rn_mpath_capable _bsd_rn_mpath_capable
#define rn_mpath_count _bsd_rn_mpath_count
#define rn_mpath_next _bsd_rn_mpath_next
#define rn_refines _bsd_rn_refines
+#define rn_walktree _bsd_rn_walktree
+#define rn_walktree_from _bsd_rn_walktree_from
+#define rollback_table_values _bsd_rollback_table_values
+#define rollback_toperation_state _bsd_rollback_toperation_state
#define root_bus _bsd_root_bus
#define root_bus_configure _bsd_root_bus_configure
#define root_devclass _bsd_root_devclass
@@ -2848,38 +2819,35 @@
#define rt6_flush _bsd_rt6_flush
#define rt_add_addr_allfibs _bsd_rt_add_addr_allfibs
#define rt_addrmsg _bsd_rt_addrmsg
-#define rtalloc _bsd_rtalloc
#define rtalloc1 _bsd_rtalloc1
#define rtalloc1_fib _bsd_rtalloc1_fib
-#define rtalloc_fib _bsd_rtalloc_fib
-#define rtalloc_ign _bsd_rtalloc_ign
#define rtalloc_ign_fib _bsd_rtalloc_ign_fib
#define rtalloc_mpath_fib _bsd_rtalloc_mpath_fib
-#define rtexpunge _bsd_rtexpunge
+#define rt_flushifroutes _bsd_rt_flushifroutes
+#define rt_flushifroutes_af _bsd_rt_flushifroutes_af
+#define rt_foreach_fib_walk _bsd_rt_foreach_fib_walk
+#define rt_foreach_fib_walk_del _bsd_rt_foreach_fib_walk_del
#define rtfree _bsd_rtfree
-#define rt_getifa _bsd_rt_getifa
#define rt_getifa_fib _bsd_rt_getifa_fib
#define rt_ieee80211msg _bsd_rt_ieee80211msg
#define rt_ifannouncemsg _bsd_rt_ifannouncemsg
#define rt_ifmsg _bsd_rt_ifmsg
#define rtinit _bsd_rtinit
-#define rtinit_fib _bsd_rtinit_fib
-#define rtioctl _bsd_rtioctl
#define rtioctl_fib _bsd_rtioctl_fib
#define rt_maskedcopy _bsd_rt_maskedcopy
#define rt_missmsg _bsd_rt_missmsg
#define rt_missmsg_fib _bsd_rt_missmsg_fib
+#define rt_mpath_capable _bsd_rt_mpath_capable
#define rt_mpath_conflict _bsd_rt_mpath_conflict
#define rt_mpath_deldup _bsd_rt_mpath_deldup
#define rt_mpath_matchgate _bsd_rt_mpath_matchgate
+#define rt_mpath_select _bsd_rt_mpath_select
#define rt_newaddrmsg _bsd_rt_newaddrmsg
#define rt_newaddrmsg_fib _bsd_rt_newaddrmsg_fib
#define rt_newmaddrmsg _bsd_rt_newmaddrmsg
#define rt_numfibs _bsd_rt_numfibs
#define rto_logging _bsd_rto_logging
-#define rtredirect _bsd_rtredirect
#define rtredirect_fib _bsd_rtredirect_fib
-#define rtrequest _bsd_rtrequest
#define rtrequest1_fib _bsd_rtrequest1_fib
#define rtrequest_fib _bsd_rtrequest_fib
#define rt_routemsg _bsd_rt_routemsg
@@ -2888,9 +2856,13 @@
#define rtsock_mtx _bsd_rtsock_mtx
#define rtsock_routemsg _bsd_rtsock_routemsg
#define rtstat _bsd_rtstat
+#define rt_table_destroy _bsd_rt_table_destroy
+#define rt_table_init _bsd_rt_table_init
#define rt_tables _bsd_rt_tables
+#define rt_tables_get_gen _bsd_rt_tables_get_gen
#define rt_tables_get_rnh _bsd_rt_tables_get_rnh
#define rttrash _bsd_rttrash
+#define rt_updatemtu _bsd_rt_updatemtu
#define rw_destroy _bsd_rw_destroy
#define _rw_downgrade _bsd__rw_downgrade
#define rw_init_flags _bsd_rw_init_flags
@@ -2905,10 +2877,13 @@
#define rw_wowned _bsd_rw_wowned
#define _rw_wunlock _bsd__rw_wunlock
#define sa6_any _bsd_sa6_any
+#define sa6_checkzone _bsd_sa6_checkzone
+#define sa6_checkzone_ifp _bsd_sa6_checkzone_ifp
#define sa6_embedscope _bsd_sa6_embedscope
#define sa6_recoverscope _bsd_sa6_recoverscope
#define sack_array _bsd_sack_array
#define sack_hole_zone _bsd_sack_hole_zone
+#define sballoc _bsd_sballoc
#define sbappend _bsd_sbappend
#define sbappendaddr _bsd_sbappendaddr
#define sbappendaddr_locked _bsd_sbappendaddr_locked
@@ -2922,6 +2897,7 @@
#define sbappendstream_locked _bsd_sbappendstream_locked
#define sbcompress _bsd_sbcompress
#define sbcreatecontrol _bsd_sbcreatecontrol
+#define sbcut_locked _bsd_sbcut_locked
#define sbdestroy _bsd_sbdestroy
#define sbdrop _bsd_sbdrop
#define sbdrop_locked _bsd_sbdrop_locked
@@ -2929,9 +2905,11 @@
#define sbdroprecord_locked _bsd_sbdroprecord_locked
#define sbflush _bsd_sbflush
#define sbflush_locked _bsd_sbflush_locked
+#define sbfree _bsd_sbfree
#define sblock _bsd_sblock
#define sb_max _bsd_sb_max
#define sb_max_adj _bsd_sb_max_adj
+#define sbready _bsd_sbready
#define sbrelease _bsd_sbrelease
#define sbrelease_internal _bsd_sbrelease_internal
#define sbrelease_locked _bsd_sbrelease_locked
@@ -2940,11 +2918,14 @@
#define sbsndmbuf _bsd_sbsndmbuf
#define sbsndptr _bsd_sbsndptr
#define sbtoxsockbuf _bsd_sbtoxsockbuf
+#define sbt_tickthreshold _bsd_sbt_tickthreshold
+#define sbt_timethreshold _bsd_sbt_timethreshold
#define sbuf_bcat _bsd_sbuf_bcat
#define sbuf_bcopyin _bsd_sbuf_bcopyin
#define sbuf_bcpy _bsd_sbuf_bcpy
#define sbuf_cat _bsd_sbuf_cat
#define sbuf_clear _bsd_sbuf_clear
+#define sbuf_clear_flags _bsd_sbuf_clear_flags
#define sbuf_copyin _bsd_sbuf_copyin
#define sbuf_cpy _bsd_sbuf_cpy
#define sbuf_data _bsd_sbuf_data
@@ -2953,12 +2934,15 @@
#define sbuf_end_section _bsd_sbuf_end_section
#define sbuf_error _bsd_sbuf_error
#define sbuf_finish _bsd_sbuf_finish
+#define sbuf_get_flags _bsd_sbuf_get_flags
#define sbuf_len _bsd_sbuf_len
#define sbuf_new _bsd_sbuf_new
#define sbuf_new_for_sysctl _bsd_sbuf_new_for_sysctl
#define sbuf_printf _bsd_sbuf_printf
+#define sbuf_printf_uuid _bsd_sbuf_printf_uuid
#define sbuf_putc _bsd_sbuf_putc
#define sbuf_set_drain _bsd_sbuf_set_drain
+#define sbuf_set_flags _bsd_sbuf_set_flags
#define sbuf_setpos _bsd_sbuf_setpos
#define sbuf_start_section _bsd_sbuf_start_section
#define sbuf_trim _bsd_sbuf_trim
@@ -2973,23 +2957,55 @@
#define scope6_init _bsd_scope6_init
#define scope6_ioctl _bsd_scope6_ioctl
#define scope6_setdefault _bsd_scope6_setdefault
+#define scsi_attrib_ascii_sbuf _bsd_scsi_attrib_ascii_sbuf
+#define scsi_attrib_hexdump_sbuf _bsd_scsi_attrib_hexdump_sbuf
+#define scsi_attrib_int_sbuf _bsd_scsi_attrib_int_sbuf
+#define scsi_attrib_prefix_sbuf _bsd_scsi_attrib_prefix_sbuf
+#define scsi_attrib_sbuf _bsd_scsi_attrib_sbuf
+#define scsi_attrib_text_sbuf _bsd_scsi_attrib_text_sbuf
+#define scsi_attrib_value_sbuf _bsd_scsi_attrib_value_sbuf
+#define scsi_attrib_vendser_sbuf _bsd_scsi_attrib_vendser_sbuf
+#define scsi_attrib_volcoh_sbuf _bsd_scsi_attrib_volcoh_sbuf
#define scsi_devid_is_lun_eui64 _bsd_scsi_devid_is_lun_eui64
#define scsi_devid_is_lun_naa _bsd_scsi_devid_is_lun_naa
#define scsi_devid_is_lun_name _bsd_scsi_devid_is_lun_name
#define scsi_devid_is_lun_t10 _bsd_scsi_devid_is_lun_t10
#define scsi_devid_is_naa_ieee_reg _bsd_scsi_devid_is_naa_ieee_reg
+#define scsi_devid_is_port_naa _bsd_scsi_devid_is_port_naa
#define scsi_devid_is_sas_target _bsd_scsi_devid_is_sas_target
#define scsi_extract_sense_len _bsd_scsi_extract_sense_len
+#define scsi_find_attrib_entry _bsd_scsi_find_attrib_entry
+#define scsi_get_attrib_entry _bsd_scsi_get_attrib_entry
#define scsi_get_devid _bsd_scsi_get_devid
+#define scsi_get_devid_desc _bsd_scsi_get_devid_desc
+#define scsi_get_nv _bsd_scsi_get_nv
#define scsi_get_sense_key _bsd_scsi_get_sense_key
#define scsi_inquiry _bsd_scsi_inquiry
+#define scsi_log_select _bsd_scsi_log_select
+#define scsi_log_sense _bsd_scsi_log_sense
+#define scsi_mam_attr_table _bsd_scsi_mam_attr_table
+#define scsi_mode_select _bsd_scsi_mode_select
+#define scsi_mode_select_len _bsd_scsi_mode_select_len
+#define scsi_mode_sense _bsd_scsi_mode_sense
+#define scsi_mode_sense_len _bsd_scsi_mode_sense_len
+#define scsi_nv_to_str _bsd_scsi_nv_to_str
+#define scsi_parse_transportid _bsd_scsi_parse_transportid
+#define scsi_parse_transportid_64bit _bsd_scsi_parse_transportid_64bit
+#define scsi_parse_transportid_iscsi _bsd_scsi_parse_transportid_iscsi
+#define scsi_parse_transportid_rdma _bsd_scsi_parse_transportid_rdma
+#define scsi_parse_transportid_sop _bsd_scsi_parse_transportid_sop
+#define scsi_parse_transportid_spi _bsd_scsi_parse_transportid_spi
+#define scsi_prevent _bsd_scsi_prevent
#define scsi_print_inquiry _bsd_scsi_print_inquiry
#define scsi_print_inquiry_short _bsd_scsi_print_inquiry_short
+#define scsi_proto_map _bsd_scsi_proto_map
#define scsi_read_capacity _bsd_scsi_read_capacity
#define scsi_read_write _bsd_scsi_read_write
+#define scsi_request_sense _bsd_scsi_request_sense
#define scsi_set_sense_data _bsd_scsi_set_sense_data
#define scsi_set_sense_data_va _bsd_scsi_set_sense_data_va
#define scsi_test_unit_ready _bsd_scsi_test_unit_ready
+#define scsi_transportid_sbuf _bsd_scsi_transportid_sbuf
#define scsi_write_same _bsd_scsi_write_same
#define SCTP6_ARE_ADDR_EQUAL _bsd_SCTP6_ARE_ADDR_EQUAL
#define sctp6_ctlinput _bsd_sctp6_ctlinput
@@ -3011,7 +3027,6 @@
#define sctp_addr_change _bsd_sctp_addr_change
#define sctp_add_remote_addr _bsd_sctp_add_remote_addr
#define sctp_addr_mgmt_ep_sa _bsd_sctp_addr_mgmt_ep_sa
-#define sctp_add_stream_reset_out _bsd_sctp_add_stream_reset_out
#define sctp_add_stream_reset_result _bsd_sctp_add_stream_reset_result
#define sctp_add_stream_reset_result_tsn _bsd_sctp_add_stream_reset_result_tsn
#define sctp_add_to_readq _bsd_sctp_add_to_readq
@@ -3024,7 +3039,6 @@
#define sctp_alloc_key _bsd_sctp_alloc_key
#define sctp_alloc_sharedkey _bsd_sctp_alloc_sharedkey
#define sctp_aloc_assoc _bsd_sctp_aloc_assoc
-#define sctp_append_to_readq _bsd_sctp_append_to_readq
#define sctp_arethere_unrecognized_parameters _bsd_sctp_arethere_unrecognized_parameters
#define sctp_asconf_cleanup _bsd_sctp_asconf_cleanup
#define sctp_asconf_iterator_end _bsd_sctp_asconf_iterator_end
@@ -3044,7 +3058,6 @@
#define sctp_auth_key_release _bsd_sctp_auth_key_release
#define sctp_auth_setactivekey _bsd_sctp_auth_setactivekey
#define sctp_auth_setactivekey_ep _bsd_sctp_auth_setactivekey_ep
-#define sctp_auth_set_default_chunks _bsd_sctp_auth_set_default_chunks
#define sctp_autoclose_timer _bsd_sctp_autoclose_timer
#define sctp_bindx_add_address _bsd_sctp_bindx_add_address
#define sctp_bindx_delete_address _bsd_sctp_bindx_delete_address
@@ -3059,6 +3072,7 @@
#define sctp_check_address_list _bsd_sctp_check_address_list
#define sctp_CheckTimers _bsd_sctp_CheckTimers
#define sctp_chunk_output _bsd_sctp_chunk_output
+#define sctp_clean_up_stream _bsd_sctp_clean_up_stream
#define sctp_clear_cachedkeys _bsd_sctp_clear_cachedkeys
#define sctp_clear_cachedkeys_ep _bsd_sctp_clear_cachedkeys_ep
#define sctp_clear_chunklist _bsd_sctp_clear_chunklist
@@ -3077,7 +3091,6 @@
#define sctp_copy_skeylist _bsd_sctp_copy_skeylist
#define sctp_ctlinput _bsd_sctp_ctlinput
#define sctp_ctloutput _bsd_sctp_ctloutput
-#define sctp_cwnd_adjust _bsd_sctp_cwnd_adjust
#define sctp_deact_sharedkey _bsd_sctp_deact_sharedkey
#define sctp_deact_sharedkey_ep _bsd_sctp_deact_sharedkey_ep
#define sctp_default_supported_hmaclist _bsd_sctp_default_supported_hmaclist
@@ -3115,7 +3128,6 @@
#define sctp_find_sharedkey _bsd_sctp_find_sharedkey
#define sctp_find_stream_reset _bsd_sctp_find_stream_reset
#define sctp_find_vrf _bsd_sctp_find_vrf
-#define sctp_finish _bsd_sctp_finish
#define sctp_fix_ecn_echo _bsd_sctp_fix_ecn_echo
#define sctp_flush _bsd_sctp_flush
#define sctp_free_assoc _bsd_sctp_free_assoc
@@ -3160,7 +3172,6 @@
#define sctp_inpcb_free _bsd_sctp_inpcb_free
#define sctp_input _bsd_sctp_input
#define sctp_input_with_port _bsd_sctp_input_with_port
-#define sctp_insert_laddr _bsd_sctp_insert_laddr
#define sctp_insert_sharedkey _bsd_sctp_insert_sharedkey
#define sctp_is_address_in_scope _bsd_sctp_is_address_in_scope
#define sctp_is_address_on_local_host _bsd_sctp_is_address_on_local_host
@@ -3184,8 +3195,6 @@
#define sctp_log_lock _bsd_sctp_log_lock
#define sctp_log_map _bsd_sctp_log_map
#define sctp_log_maxburst _bsd_sctp_log_maxburst
-#define sctp_log_mb _bsd_sctp_log_mb
-#define sctp_log_mbcnt _bsd_sctp_log_mbcnt
#define sctp_log_nagle_event _bsd_sctp_log_nagle_event
#define sctp_log_rwnd _bsd_sctp_log_rwnd
#define sctp_log_rwnd_set _bsd_sctp_log_rwnd_set
@@ -3193,7 +3202,7 @@
#define sctp_log_strm_del _bsd_sctp_log_strm_del
#define sctp_log_strm_del_alt _bsd_sctp_log_strm_del_alt
#define sctp_lower_sosend _bsd_sctp_lower_sosend
-#define sctp_map_lookup_tab _bsd_sctp_map_lookup_tab
+#define sctp_map_assoc_state _bsd_sctp_map_assoc_state
#define sctp_mark_ifa_addr_down _bsd_sctp_mark_ifa_addr_down
#define sctp_mark_ifa_addr_up _bsd_sctp_mark_ifa_addr_up
#define SCTP_M_ASC_ADDR _bsd_SCTP_M_ASC_ADDR
@@ -3246,7 +3255,6 @@
#define sctp_queue_op_err _bsd_sctp_queue_op_err
#define sctp_recover_scope _bsd_sctp_recover_scope
#define sctp_release_pr_sctp_chunk _bsd_sctp_release_pr_sctp_chunk
-#define sctp_remove_laddr _bsd_sctp_remove_laddr
#define sctp_remove_net _bsd_sctp_remove_net
#define sctp_report_all_outbound _bsd_sctp_report_all_outbound
#define sctp_reset_in_stream _bsd_sctp_reset_in_stream
@@ -3261,6 +3269,7 @@
#define sctp_send_cookie_ack _bsd_sctp_send_cookie_ack
#define sctp_send_cookie_echo _bsd_sctp_send_cookie_echo
#define sctp_send_cwr _bsd_sctp_send_cwr
+#define sctp_send_deferred_reset_response _bsd_sctp_send_deferred_reset_response
#define sctp_send_ecn_echo _bsd_sctp_send_ecn_echo
#define sctp_send_hb _bsd_sctp_send_hb
#define sctp_send_heartbeat_ack _bsd_sctp_send_heartbeat_ack
@@ -3274,10 +3283,10 @@
#define sctp_send_shutdown_ack _bsd_sctp_send_shutdown_ack
#define sctp_send_shutdown_complete _bsd_sctp_send_shutdown_complete
#define sctp_send_shutdown_complete2 _bsd_sctp_send_shutdown_complete2
+#define sctp_send_stream_reset_out_if_possible _bsd_sctp_send_stream_reset_out_if_possible
#define sctp_send_str_reset_req _bsd_sctp_send_str_reset_req
#define sctp_serialize_auth_chunks _bsd_sctp_serialize_auth_chunks
#define sctp_serialize_hmaclist _bsd_sctp_serialize_hmaclist
-#define sctp_service_queues _bsd_sctp_service_queues
#define sctp_set_key _bsd_sctp_set_key
#define sctp_set_primary_addr _bsd_sctp_set_primary_addr
#define sctp_set_primary_ip_address _bsd_sctp_set_primary_ip_address
@@ -3299,7 +3308,6 @@
#define sctp_swap_inpcb_for_listen _bsd_sctp_swap_inpcb_for_listen
#define sctp_t1init_timer _bsd_sctp_t1init_timer
#define sctp_t3rxt_timer _bsd_sctp_t3rxt_timer
-#define sctp_threshold_management _bsd_sctp_threshold_management
#define sctp_timeout_handler _bsd_sctp_timeout_handler
#define sctp_timer_start _bsd_sctp_timer_start
#define sctp_timer_stop _bsd_sctp_timer_stop
@@ -3318,7 +3326,7 @@
#define sctp_verify_hmac_param _bsd_sctp_verify_hmac_param
#define sctp_wakeup_iterator _bsd_sctp_wakeup_iterator
#define sctp_wakeup_log _bsd_sctp_wakeup_log
-#define sdhci_debug _bsd_sdhci_debug
+#define sctp_wakeup_the_read_socket _bsd_sctp_wakeup_the_read_socket
#define seldrain _bsd_seldrain
#define selrecord _bsd_selrecord
#define seltdfini _bsd_seltdfini
@@ -3342,27 +3350,63 @@
#define sha1_loop _bsd_sha1_loop
#define sha1_pad _bsd_sha1_pad
#define sha1_result _bsd_sha1_result
-#define SHA256_Data _bsd_SHA256_Data
-#define SHA256_End _bsd_SHA256_End
#define SHA256_Final _bsd_SHA256_Final
#define SHA256_Init _bsd_SHA256_Init
#define SHA256_Update _bsd_SHA256_Update
-#define SHA384_Data _bsd_SHA384_Data
-#define SHA384_End _bsd_SHA384_End
#define SHA384_Final _bsd_SHA384_Final
#define SHA384_Init _bsd_SHA384_Init
#define SHA384_Update _bsd_SHA384_Update
-#define SHA512_Data _bsd_SHA512_Data
-#define SHA512_End _bsd_SHA512_End
#define SHA512_Final _bsd_SHA512_Final
#define SHA512_Init _bsd_SHA512_Init
#define SHA512_Update _bsd_SHA512_Update
+#define SipHash24_TestVectors _bsd_SipHash24_TestVectors
+#define SipHash_End _bsd_SipHash_End
+#define SipHash_Final _bsd_SipHash_Final
+#define SipHash_InitX _bsd_SipHash_InitX
+#define SipHash_SetKey _bsd_SipHash_SetKey
+#define SipHash_Update _bsd_SipHash_Update
+#define SipHashX _bsd_SipHashX
+#define Skein1024_Final _bsd_Skein1024_Final
+#define Skein1024_Final_Pad _bsd_Skein1024_Final_Pad
+#define Skein1024_Init _bsd_Skein1024_Init
+#define Skein1024_InitExt _bsd_Skein1024_InitExt
+#define SKEIN1024_IV_1024 _bsd_SKEIN1024_IV_1024
+#define SKEIN1024_IV_384 _bsd_SKEIN1024_IV_384
+#define SKEIN1024_IV_512 _bsd_SKEIN1024_IV_512
+#define Skein1024_Output _bsd_Skein1024_Output
+#define Skein1024_Process_Block _bsd_Skein1024_Process_Block
+#define Skein1024_Update _bsd_Skein1024_Update
+#define Skein_256_Final _bsd_Skein_256_Final
+#define Skein_256_Final_Pad _bsd_Skein_256_Final_Pad
+#define Skein_256_Init _bsd_Skein_256_Init
+#define Skein_256_InitExt _bsd_Skein_256_InitExt
+#define SKEIN_256_IV_128 _bsd_SKEIN_256_IV_128
+#define SKEIN_256_IV_160 _bsd_SKEIN_256_IV_160
+#define SKEIN_256_IV_224 _bsd_SKEIN_256_IV_224
+#define SKEIN_256_IV_256 _bsd_SKEIN_256_IV_256
+#define Skein_256_Output _bsd_Skein_256_Output
+#define Skein_256_Process_Block _bsd_Skein_256_Process_Block
+#define Skein_256_Update _bsd_Skein_256_Update
+#define Skein_512_Final _bsd_Skein_512_Final
+#define Skein_512_Final_Pad _bsd_Skein_512_Final_Pad
+#define Skein_512_Init _bsd_Skein_512_Init
+#define Skein_512_InitExt _bsd_Skein_512_InitExt
+#define SKEIN_512_IV_128 _bsd_SKEIN_512_IV_128
+#define SKEIN_512_IV_160 _bsd_SKEIN_512_IV_160
+#define SKEIN_512_IV_224 _bsd_SKEIN_512_IV_224
+#define SKEIN_512_IV_256 _bsd_SKEIN_512_IV_256
+#define SKEIN_512_IV_384 _bsd_SKEIN_512_IV_384
+#define SKEIN_512_IV_512 _bsd_SKEIN_512_IV_512
+#define Skein_512_Output _bsd_Skein_512_Output
+#define Skein_512_Process_Block _bsd_Skein_512_Process_Block
+#define Skein_512_Update _bsd_Skein_512_Update
+#define Skein_Get64_LSB_First _bsd_Skein_Get64_LSB_First
+#define Skein_Put64_LSB_First _bsd_Skein_Put64_LSB_First
#define skipjack_backwards _bsd_skipjack_backwards
#define skipjack_forwards _bsd_skipjack_forwards
#define sl_compress_init _bsd_sl_compress_init
#define sl_compress_tcp _bsd_sl_compress_tcp
#define _sleep _bsd__sleep
-#define sleepinit _bsd_sleepinit
#define sleepq_add _bsd_sleepq_add
#define sleepq_alloc _bsd_sleepq_alloc
#define sleepq_broadcast _bsd_sleepq_broadcast
@@ -3371,7 +3415,7 @@
#define sleepq_lookup _bsd_sleepq_lookup
#define sleepq_release _bsd_sleepq_release
#define sleepq_remove _bsd_sleepq_remove
-#define sleepq_set_timeout _bsd_sleepq_set_timeout
+#define sleepq_set_timeout_sbt _bsd_sleepq_set_timeout_sbt
#define sleepq_signal _bsd_sleepq_signal
#define sleepq_sleepcnt _bsd_sleepq_sleepcnt
#define sleepq_timedwait _bsd_sleepq_timedwait
@@ -3387,26 +3431,27 @@
#define smc_miibus_writereg _bsd_smc_miibus_writereg
#define smc_probe _bsd_smc_probe
#define snprintf _bsd_snprintf
+#define snprintf_uuid _bsd_snprintf_uuid
#define soabort _bsd_soabort
#define soaccept _bsd_soaccept
#define sobind _bsd_sobind
+#define sobindat _bsd_sobindat
#define socantrcvmore _bsd_socantrcvmore
#define socantrcvmore_locked _bsd_socantrcvmore_locked
#define socantsendmore _bsd_socantsendmore
#define socantsendmore_locked _bsd_socantsendmore_locked
-#define sockargs _bsd_sockargs
+#define socket_hhh _bsd_socket_hhh
#define socketops _bsd_socketops
-#define socket_zone _bsd_socket_zone
#define soclose _bsd_soclose
#define soconnect _bsd_soconnect
#define soconnect2 _bsd_soconnect2
+#define soconnectat _bsd_soconnectat
#define socreate _bsd_socreate
#define sodisconnect _bsd_sodisconnect
#define sodupsockaddr _bsd_sodupsockaddr
#define so_error_get _bsd_so_error_get
#define so_error_set _bsd_so_error_set
#define sofree _bsd_sofree
-#define softclock _bsd_softclock
#define so_gencnt _bsd_so_gencnt
#define sogetopt _bsd_sogetopt
#define sohasoutofband _bsd_sohasoutofband
@@ -3422,6 +3467,9 @@
#define solisten_proto_check _bsd_solisten_proto_check
#define so_lock _bsd_so_lock
#define sonewconn _bsd_sonewconn
+#define soo_close _bsd_soo_close
+#define soo_ioctl _bsd_soo_ioctl
+#define soo_poll _bsd_soo_poll
#define sooptcopyin _bsd_sooptcopyin
#define sooptcopyout _bsd_sooptcopyout
#define soopt_getm _bsd_soopt_getm
@@ -3429,6 +3477,8 @@
#define so_options_set _bsd_so_options_set
#define soopt_mcopyin _bsd_soopt_mcopyin
#define soopt_mcopyout _bsd_soopt_mcopyout
+#define soo_read _bsd_soo_read
+#define soo_write _bsd_soo_write
#define sopoll _bsd_sopoll
#define sopoll_generic _bsd_sopoll_generic
#define so_protosw_get _bsd_so_protosw_get
@@ -3476,8 +3526,6 @@
#define _start_set_nexus _bsd__start_set_nexus
#define _start_set_sysctl_set _bsd__start_set_sysctl_set
#define _start_set_sysinit_set _bsd__start_set_sysinit_set
-#define state_list _bsd_state_list
-#define stf_cloner _bsd_stf_cloner
#define _stop_set_modmetadata_set _bsd__stop_set_modmetadata_set
#define _stop_set_nexus _bsd__stop_set_nexus
#define _stop_set_sysctl_set _bsd__stop_set_sysctl_set
@@ -3486,11 +3534,11 @@
#define swi_add _bsd_swi_add
#define swi_sched _bsd_swi_sched
#define sx_destroy _bsd_sx_destroy
-#define _sx_downgrade _bsd__sx_downgrade
+#define sx_downgrade_ _bsd_sx_downgrade_
#define sx_init_flags _bsd_sx_init_flags
#define sx_sysinit _bsd_sx_sysinit
-#define _sx_try_upgrade _bsd__sx_try_upgrade
-#define _sx_try_xlock _bsd__sx_try_xlock
+#define sx_try_upgrade_ _bsd_sx_try_upgrade_
+#define sx_try_xlock_ _bsd_sx_try_xlock_
#define _sx_xlock _bsd__sx_xlock
#define sx_xlocked _bsd_sx_xlocked
#define _sx_xunlock _bsd__sx_xunlock
@@ -3499,8 +3547,6 @@
#define syncache_chkrst _bsd_syncache_chkrst
#define syncache_expand _bsd_syncache_expand
#define syncache_init _bsd_syncache_init
-#define syncache_lookup _bsd_syncache_lookup
-#define syncache_pcbcount _bsd_syncache_pcbcount
#define syncache_pcblist _bsd_syncache_pcblist
#define syncache_unreach _bsd_syncache_unreach
#define sysctl_add_oid _bsd_sysctl_add_oid
@@ -3519,75 +3565,81 @@
#define sysctl_ctx_entry_find _bsd_sysctl_ctx_entry_find
#define sysctl_ctx_free _bsd_sysctl_ctx_free
#define sysctl_ctx_init _bsd_sysctl_ctx_init
-#define sysctl__debug_children _bsd_sysctl__debug_children
-#define sysctl__dev_children _bsd_sysctl__dev_children
+#define sysctl___debug _bsd_sysctl___debug
+#define sysctl___dev _bsd_sysctl___dev
#define sysctl_find_oid _bsd_sysctl_find_oid
+#define sysctl_handle_16 _bsd_sysctl_handle_16
+#define sysctl_handle_32 _bsd_sysctl_handle_32
#define sysctl_handle_64 _bsd_sysctl_handle_64
+#define sysctl_handle_8 _bsd_sysctl_handle_8
+#define sysctl_handle_bool _bsd_sysctl_handle_bool
+#define sysctl_handle_counter_u64 _bsd_sysctl_handle_counter_u64
+#define sysctl_handle_counter_u64_array _bsd_sysctl_handle_counter_u64_array
#define sysctl_handle_int _bsd_sysctl_handle_int
#define sysctl_handle_long _bsd_sysctl_handle_long
#define sysctl_handle_opaque _bsd_sysctl_handle_opaque
#define sysctl_handle_string _bsd_sysctl_handle_string
-#define sysctl__hw_atm_children _bsd_sysctl__hw_atm_children
-#define sysctl__hw_bus_children _bsd_sysctl__hw_bus_children
-#define sysctl__hw_children _bsd_sysctl__hw_children
-#define sysctl__hw_pci_children _bsd_sysctl__hw_pci_children
-#define sysctl__hw_sdhci_children _bsd_sysctl__hw_sdhci_children
-#define sysctl__hw_usb_children _bsd_sysctl__hw_usb_children
-#define sysctl__hw_zynq_children _bsd_sysctl__hw_zynq_children
-#define sysctl__kern_cam_children _bsd_sysctl__kern_cam_children
-#define sysctl__kern_children _bsd_sysctl__kern_children
-#define sysctl__kern_features_children _bsd_sysctl__kern_features_children
-#define sysctl__kern_ipc_children _bsd_sysctl__kern_ipc_children
-#define sysctl_lock _bsd_sysctl_lock
+#define sysctl_handle_uma_zone_cur _bsd_sysctl_handle_uma_zone_cur
+#define sysctl_handle_uma_zone_max _bsd_sysctl_handle_uma_zone_max
+#define sysctl___hw _bsd_sysctl___hw
+#define sysctl___hw_atm _bsd_sysctl___hw_atm
+#define sysctl___hw_bus _bsd_sysctl___hw_bus
+#define sysctl___hw_pci _bsd_sysctl___hw_pci
+#define sysctl___hw_usb _bsd_sysctl___hw_usb
+#define sysctl___hw_zynq _bsd_sysctl___hw_zynq
+#define sysctl___kern _bsd_sysctl___kern
+#define sysctl___kern_cam _bsd_sysctl___kern_cam
+#define sysctl___kern_features _bsd_sysctl___kern_features
+#define sysctl___kern_ipc _bsd_sysctl___kern_ipc
#define sysctl_move_oid _bsd_sysctl_move_oid
#define sysctl_msec_to_ticks _bsd_sysctl_msec_to_ticks
-#define sysctl__net_bpf_children _bsd_sysctl__net_bpf_children
-#define sysctl__net_bpf_jitter_children _bsd_sysctl__net_bpf_jitter_children
-#define sysctl__net_children _bsd_sysctl__net_children
-#define sysctl__net_inet6_children _bsd_sysctl__net_inet6_children
-#define sysctl__net_inet6_icmp6_children _bsd_sysctl__net_inet6_icmp6_children
-#define sysctl__net_inet6_icmp6_nd6_drlist_children _bsd_sysctl__net_inet6_icmp6_nd6_drlist_children
-#define sysctl__net_inet6_icmp6_nd6_prlist_children _bsd_sysctl__net_inet6_icmp6_nd6_prlist_children
-#define sysctl__net_inet6_ip6_children _bsd_sysctl__net_inet6_ip6_children
-#define sysctl__net_inet6_ip6_fw_children _bsd_sysctl__net_inet6_ip6_fw_children
-#define sysctl__net_inet6_mld_children _bsd_sysctl__net_inet6_mld_children
-#define sysctl__net_inet6_tcp6_children _bsd_sysctl__net_inet6_tcp6_children
-#define sysctl__net_inet6_udp6_children _bsd_sysctl__net_inet6_udp6_children
-#define sysctl__net_inet_accf_children _bsd_sysctl__net_inet_accf_children
-#define sysctl__net_inet_carp_children _bsd_sysctl__net_inet_carp_children
-#define sysctl__net_inet_children _bsd_sysctl__net_inet_children
-#define sysctl__net_inet_icmp_children _bsd_sysctl__net_inet_icmp_children
-#define sysctl__net_inet_igmp_children _bsd_sysctl__net_inet_igmp_children
-#define sysctl__net_inet_ip_alias_children _bsd_sysctl__net_inet_ip_alias_children
-#define sysctl__net_inet_ip_children _bsd_sysctl__net_inet_ip_children
-#define sysctl__net_inet_ip_fw_children _bsd_sysctl__net_inet_ip_fw_children
-#define sysctl__net_inet_pim_children _bsd_sysctl__net_inet_pim_children
-#define sysctl__net_inet_raw_children _bsd_sysctl__net_inet_raw_children
-#define sysctl__net_inet_tcp_cc_children _bsd_sysctl__net_inet_tcp_cc_children
-#define sysctl__net_inet_tcp_children _bsd_sysctl__net_inet_tcp_children
-#define sysctl__net_inet_tcp_experimental_children _bsd_sysctl__net_inet_tcp_experimental_children
-#define sysctl__net_inet_tcp_sack_children _bsd_sysctl__net_inet_tcp_sack_children
-#define sysctl__net_inet_udp_children _bsd_sysctl__net_inet_udp_children
-#define sysctl__net_link_children _bsd_sysctl__net_link_children
-#define sysctl__net_link_ether_children _bsd_sysctl__net_link_ether_children
-#define sysctl__net_link_generic_children _bsd_sysctl__net_link_generic_children
-#define sysctl__net_pfsync_children _bsd_sysctl__net_pfsync_children
+#define sysctl___net _bsd_sysctl___net
+#define sysctl___net_accf _bsd_sysctl___net_accf
+#define sysctl___net_bpf _bsd_sysctl___net_bpf
+#define sysctl___net_bpf_jitter _bsd_sysctl___net_bpf_jitter
+#define sysctl___net_inet _bsd_sysctl___net_inet
+#define sysctl___net_inet6 _bsd_sysctl___net_inet6
+#define sysctl___net_inet6_icmp6 _bsd_sysctl___net_inet6_icmp6
+#define sysctl___net_inet6_ip6 _bsd_sysctl___net_inet6_ip6
+#define sysctl___net_inet6_ip6_fw _bsd_sysctl___net_inet6_ip6_fw
+#define sysctl___net_inet6_mld _bsd_sysctl___net_inet6_mld
+#define sysctl___net_inet6_tcp6 _bsd_sysctl___net_inet6_tcp6
+#define sysctl___net_inet6_udp6 _bsd_sysctl___net_inet6_udp6
+#define sysctl___net_inet_accf _bsd_sysctl___net_inet_accf
+#define sysctl___net_inet_carp _bsd_sysctl___net_inet_carp
+#define sysctl___net_inet_icmp _bsd_sysctl___net_inet_icmp
+#define sysctl___net_inet_igmp _bsd_sysctl___net_inet_igmp
+#define sysctl___net_inet_ip _bsd_sysctl___net_inet_ip
+#define sysctl___net_inet_ip_alias _bsd_sysctl___net_inet_ip_alias
+#define sysctl___net_inet_ip_fw _bsd_sysctl___net_inet_ip_fw
+#define sysctl___net_inet_pim _bsd_sysctl___net_inet_pim
+#define sysctl___net_inet_raw _bsd_sysctl___net_inet_raw
+#define sysctl___net_inet_tcp _bsd_sysctl___net_inet_tcp
+#define sysctl___net_inet_tcp_cc _bsd_sysctl___net_inet_tcp_cc
+#define sysctl___net_inet_tcp_lro _bsd_sysctl___net_inet_tcp_lro
+#define sysctl___net_inet_tcp_sack _bsd_sysctl___net_inet_tcp_sack
+#define sysctl___net_inet_udp _bsd_sysctl___net_inet_udp
+#define sysctl___net_link _bsd_sysctl___net_link
+#define sysctl___net_link_ether _bsd_sysctl___net_link_ether
+#define sysctl___net_link_generic _bsd_sysctl___net_link_generic
+#define sysctl___net_link_lagg _bsd_sysctl___net_link_lagg
+#define sysctl___net_link_lagg_lacp _bsd_sysctl___net_link_lagg_lacp
+#define sysctl___net_pf _bsd_sysctl___net_pf
+#define sysctl___net_pfsync _bsd_sysctl___net_pfsync
#define sysctl_register_oid _bsd_sysctl_register_oid
#define sysctl_remove_name _bsd_sysctl_remove_name
#define sysctl_remove_oid _bsd_sysctl_remove_oid
#define sysctl_rename_oid _bsd_sysctl_rename_oid
-#define sysctl__security_children _bsd_sysctl__security_children
-#define sysctl__sysctl_children _bsd_sysctl__sysctl_children
-#define sysctl_unlock _bsd_sysctl_unlock
+#define sysctl___security _bsd_sysctl___security
+#define sysctl___sysctl _bsd_sysctl___sysctl
#define sysctl_unregister_oid _bsd_sysctl_unregister_oid
-#define sysctl__vm_children _bsd_sysctl__vm_children
+#define sysctl___vm _bsd_sysctl___vm
#define sysctl_wire_old_buffer _bsd_sysctl_wire_old_buffer
+#define sysctl_wlock _bsd_sysctl_wlock
+#define sysctl_wunlock _bsd_sysctl_wunlock
+#define sys_listen _bsd_sys_listen
#define system_base_info _bsd_system_base_info
-#define tag2tagname _bsd_tag2tagname
-#define tagname2tag _bsd_tagname2tag
-#define tag_unref _bsd_tag_unref
-#define tap_cloner _bsd_tap_cloner
-#define tap_cloner_data _bsd_tap_cloner_data
+#define TB_DRAIN_WAITER _bsd_TB_DRAIN_WAITER
#define tbr_dequeue_ptr _bsd_tbr_dequeue_ptr
#define tbr_get _bsd_tbr_get
#define tbr_set _bsd_tbr_set
@@ -3606,30 +3658,36 @@
#define tcp_ccalgounload _bsd_tcp_ccalgounload
#define tcp_clean_sackreport _bsd_tcp_clean_sackreport
#define tcp_close _bsd_tcp_close
+#define tcp_compute_pipe _bsd_tcp_compute_pipe
#define tcp_ctlinput _bsd_tcp_ctlinput
#define tcp_ctloutput _bsd_tcp_ctloutput
#define tcp_debug_mtx _bsd_tcp_debug_mtx
+#define tcp_default_ctloutput _bsd_tcp_default_ctloutput
#define tcp_delack_enabled _bsd_tcp_delack_enabled
#define tcp_delacktime _bsd_tcp_delacktime
#define tcp_discardcb _bsd_tcp_discardcb
#define tcp_do_autorcvbuf _bsd_tcp_do_autorcvbuf
#define tcp_do_autosndbuf _bsd_tcp_do_autosndbuf
#define tcp_do_ecn _bsd_tcp_do_ecn
-#define tcp_do_initcwnd10 _bsd_tcp_do_initcwnd10
+#define tcp_dooptions _bsd_tcp_dooptions
#define tcp_do_rfc1323 _bsd_tcp_do_rfc1323
#define tcp_do_rfc3042 _bsd_tcp_do_rfc3042
#define tcp_do_rfc3390 _bsd_tcp_do_rfc3390
#define tcp_do_rfc3465 _bsd_tcp_do_rfc3465
+#define tcp_do_rfc6675_pipe _bsd_tcp_do_rfc6675_pipe
#define tcp_do_sack _bsd_tcp_do_sack
+#define tcp_do_segment _bsd_tcp_do_segment
#define tcp_do_tso _bsd_tcp_do_tso
#define tcp_drain _bsd_tcp_drain
#define tcp_drop _bsd_tcp_drop
#define tcp_drop_syn_sent _bsd_tcp_drop_syn_sent
+#define tcp_dropwithreset _bsd_tcp_dropwithreset
#define tcp_ecn_maxretries _bsd_tcp_ecn_maxretries
#define tcp_fast_finwait2_recycle _bsd_tcp_fast_finwait2_recycle
#define tcp_fini _bsd_tcp_fini
#define tcp_finwait2_timeout _bsd_tcp_finwait2_timeout
#define tcp_free_sackholes _bsd_tcp_free_sackholes
+#define tcp_function_lock _bsd_tcp_function_lock
#define tcp_hc_get _bsd_tcp_hc_get
#define tcp_hc_getmtu _bsd_tcp_hc_getmtu
#define tcp_hc_init _bsd_tcp_hc_init
@@ -3637,8 +3695,12 @@
#define tcp_hc_updatemtu _bsd_tcp_hc_updatemtu
#define tcp_hhh _bsd_tcp_hhh
#define tcp_init _bsd_tcp_init
+#define tcp_initcwnd_segments _bsd_tcp_initcwnd_segments
+#define tcp_inpinfo_lock_add _bsd_tcp_inpinfo_lock_add
+#define tcp_inpinfo_lock_del _bsd_tcp_inpinfo_lock_del
#define tcp_input _bsd_tcp_input
#define tcp_insecure_rst _bsd_tcp_insecure_rst
+#define tcp_insecure_syn _bsd_tcp_insecure_syn
#define tcpip_fillheaders _bsd_tcpip_fillheaders
#define tcpip_maketemplate _bsd_tcpip_maketemplate
#define tcp_keepcnt _bsd_tcp_keepcnt
@@ -3649,20 +3711,25 @@
#define tcp_log_in_vain _bsd_tcp_log_in_vain
#define tcp_log_vain _bsd_tcp_log_vain
#define tcp_lro_flush _bsd_tcp_lro_flush
+#define tcp_lro_flush_all _bsd_tcp_lro_flush_all
+#define tcp_lro_flush_inactive _bsd_tcp_lro_flush_inactive
#define tcp_lro_free _bsd_tcp_lro_free
#define tcp_lro_init _bsd_tcp_lro_init
+#define tcp_lro_init_args _bsd_tcp_lro_init_args
+#define tcp_lro_queue_mbuf _bsd_tcp_lro_queue_mbuf
#define tcp_lro_rx _bsd_tcp_lro_rx
#define tcp_maxmtu _bsd_tcp_maxmtu
#define tcp_maxmtu6 _bsd_tcp_maxmtu6
#define tcp_maxpersistidle _bsd_tcp_maxpersistidle
+#define tcp_maxseg _bsd_tcp_maxseg
#define tcp_minmss _bsd_tcp_minmss
#define tcp_msl _bsd_tcp_msl
#define tcp_mss _bsd_tcp_mss
#define tcp_mssdflt _bsd_tcp_mssdflt
#define tcp_mssopt _bsd_tcp_mssopt
#define tcp_mss_update _bsd_tcp_mss_update
-#define tcp_mtudisc _bsd_tcp_mtudisc
#define tcp_new_isn _bsd_tcp_new_isn
+#define tcp_newreno_partial_ack _bsd_tcp_newreno_partial_ack
#define tcp_newtcpcb _bsd_tcp_newtcpcb
#define tcp_offload_connect _bsd_tcp_offload_connect
#define tcp_offload_ctloutput _bsd_tcp_offload_ctloutput
@@ -3670,11 +3737,14 @@
#define tcp_offload_input _bsd_tcp_offload_input
#define tcp_offload_output _bsd_tcp_offload_output
#define tcp_offload_rcvd _bsd_tcp_offload_rcvd
-#define tcp_offload_syncache_add _bsd_tcp_offload_syncache_add
#define tcp_output _bsd_tcp_output
+#define tcp_persmax _bsd_tcp_persmax
+#define tcp_persmin _bsd_tcp_persmin
+#define tcp_pulloutofband _bsd_tcp_pulloutofband
#define tcp_reass _bsd_tcp_reass
#define tcp_reass_flush _bsd_tcp_reass_flush
-#define tcp_reass_init _bsd_tcp_reass_init
+#define tcp_reass_global_init _bsd_tcp_reass_global_init
+#define tc_precexp _bsd_tc_precexp
#define tcp_recvspace _bsd_tcp_recvspace
#define tcp_respond _bsd_tcp_respond
#define tcp_rexmit_min _bsd_tcp_rexmit_min
@@ -3691,31 +3761,42 @@
#define tcp_sendspace _bsd_tcp_sendspace
#define tcp_setpersist _bsd_tcp_setpersist
#define tcp_slowtimo _bsd_tcp_slowtimo
+#define tcps_states _bsd_tcps_states
#define tcpstat _bsd_tcpstat
+#define tcp_state_change _bsd_tcp_state_change
#define tcp_syn_backoff _bsd_tcp_syn_backoff
#define tcp_timer_2msl _bsd_tcp_timer_2msl
#define tcp_timer_activate _bsd_tcp_timer_activate
#define tcp_timer_active _bsd_tcp_timer_active
#define tcp_timer_delack _bsd_tcp_timer_delack
+#define tcp_timer_discard _bsd_tcp_timer_discard
#define tcp_timer_keep _bsd_tcp_timer_keep
#define tcp_timer_persist _bsd_tcp_timer_persist
#define tcp_timer_rexmt _bsd_tcp_timer_rexmt
+#define tcp_timer_stop _bsd_tcp_timer_stop
#define tcp_timer_to_xtimer _bsd_tcp_timer_to_xtimer
#define tcp_trace _bsd_tcp_trace
#define tcp_tw_2msl_scan _bsd_tcp_tw_2msl_scan
#define tcp_twcheck _bsd_tcp_twcheck
#define tcp_twclose _bsd_tcp_twclose
#define tcp_tw_init _bsd_tcp_tw_init
-#define tcp_twrespond _bsd_tcp_twrespond
#define tcp_twstart _bsd_tcp_twstart
#define tcp_tw_zone_change _bsd_tcp_tw_zone_change
#define tcp_update_sack_list _bsd_tcp_update_sack_list
#define tcp_usrreqs _bsd_tcp_usrreqs
#define tcp_v6mssdflt _bsd_tcp_v6mssdflt
+#define tcp_xmit_timer _bsd_tcp_xmit_timer
+#define tc_ref _bsd_tc_ref
+#define tc_tick_bt _bsd_tc_tick_bt
+#define tc_tick_sbt _bsd_tc_tick_sbt
+#define tc_unref _bsd_tc_unref
+#define t_functions _bsd_t_functions
+#define t_functions_inited _bsd_t_functions_inited
#define tick _bsd_tick
#define ticket_altqs_active _bsd_ticket_altqs_active
#define ticket_altqs_inactive _bsd_ticket_altqs_inactive
#define ticket_pabuf _bsd_ticket_pabuf
+#define tick_sbt _bsd_tick_sbt
#define timeout _bsd_timeout
#define _timeout_task_init _bsd__timeout_task_init
#define timevaladd _bsd_timevaladd
@@ -3724,8 +3805,6 @@
#define trash_dtor _bsd_trash_dtor
#define trash_fini _bsd_trash_fini
#define trash_init _bsd_trash_init
-#define tree_id _bsd_tree_id
-#define tree_src_tracking _bsd_tree_src_tracking
#define tsec_attach _bsd_tsec_attach
#define tsec_detach _bsd_tsec_detach
#define tsec_devclass _bsd_tsec_devclass
@@ -3733,20 +3812,17 @@
#define tsec_miibus_readreg _bsd_tsec_miibus_readreg
#define tsec_miibus_statchg _bsd_tsec_miibus_statchg
#define tsec_miibus_writereg _bsd_tsec_miibus_writereg
+#define tsec_phy_mtx _bsd_tsec_phy_mtx
#define tsec_receive_intr _bsd_tsec_receive_intr
#define tsec_resume _bsd_tsec_resume
#define tsec_shutdown _bsd_tsec_shutdown
#define tsec_suspend _bsd_tsec_suspend
#define tsec_transmit_intr _bsd_tsec_transmit_intr
-#define tun_cloner _bsd_tun_cloner
-#define tun_cloner_data _bsd_tun_cloner_data
#define tvtohz _bsd_tvtohz
#define udb _bsd_udb
#define udbinfo _bsd_udbinfo
#define udp6_ctlinput _bsd_udp6_ctlinput
#define udp6_input _bsd_udp6_input
-#define udp6_recvspace _bsd_udp6_recvspace
-#define udp6_sendspace _bsd_udp6_sendspace
#define udp6_usrreqs _bsd_udp6_usrreqs
#define udp_blackhole _bsd_udp_blackhole
#define udp_cksum _bsd_udp_cksum
@@ -3755,6 +3831,9 @@
#define udp_discardcb _bsd_udp_discardcb
#define udp_init _bsd_udp_init
#define udp_input _bsd_udp_input
+#define udplite6_ctlinput _bsd_udplite6_ctlinput
+#define udplite_ctlinput _bsd_udplite_ctlinput
+#define udplite_init _bsd_udplite_init
#define udp_log_in_vain _bsd_udp_log_in_vain
#define udp_newudpcb _bsd_udp_newudpcb
#define udp_notify _bsd_udp_notify
@@ -3772,16 +3851,18 @@
#define uiomove _bsd_uiomove
#define uiomove_nofault _bsd_uiomove_nofault
#define ukphy_status _bsd_ukphy_status
+#define ulitecb _bsd_ulitecb
+#define ulitecbinfo _bsd_ulitecbinfo
#define uma_align_cache _bsd_uma_align_cache
-#define uma_dbg_alloc _bsd_uma_dbg_alloc
-#define uma_dbg_free _bsd_uma_dbg_free
-#define uma_find_refcnt _bsd_uma_find_refcnt
#define uma_print_stats _bsd_uma_print_stats
#define uma_print_zone _bsd_uma_print_zone
#define uma_reclaim _bsd_uma_reclaim
+#define uma_reclaim_wakeup _bsd_uma_reclaim_wakeup
+#define uma_reclaim_worker _bsd_uma_reclaim_worker
#define uma_set_align _bsd_uma_set_align
#define uma_startup _bsd_uma_startup
#define uma_zalloc_arg _bsd_uma_zalloc_arg
+#define uma_zcache_create _bsd_uma_zcache_create
#define uma_zcreate _bsd_uma_zcreate
#define uma_zdestroy _bsd_uma_zdestroy
#define uma_zfree_arg _bsd_uma_zfree_arg
@@ -3789,15 +3870,19 @@
#define uma_zone_exhausted_nolock _bsd_uma_zone_exhausted_nolock
#define uma_zone_get_cur _bsd_uma_zone_get_cur
#define uma_zone_get_max _bsd_uma_zone_get_max
+#define uma_zone_reserve _bsd_uma_zone_reserve
#define uma_zone_set_allocf _bsd_uma_zone_set_allocf
#define uma_zone_set_fini _bsd_uma_zone_set_fini
#define uma_zone_set_freef _bsd_uma_zone_set_freef
#define uma_zone_set_init _bsd_uma_zone_set_init
#define uma_zone_set_max _bsd_uma_zone_set_max
+#define uma_zone_set_maxaction _bsd_uma_zone_set_maxaction
+#define uma_zone_set_warning _bsd_uma_zone_set_warning
#define uma_zone_set_zfini _bsd_uma_zone_set_zfini
#define uma_zone_set_zinit _bsd_uma_zone_set_zinit
#define uma_zsecond_create _bsd_uma_zsecond_create
#define untimeout _bsd_untimeout
+#define update_opcode_kidx _bsd_update_opcode_kidx
#define usb_alloc_device _bsd_usb_alloc_device
#define usb_alloc_mbufs _bsd_usb_alloc_mbufs
#define usb_bdma_done_event _bsd_usb_bdma_done_event
@@ -4037,6 +4122,9 @@
#define usb_test_quirk _bsd_usb_test_quirk
#define usb_test_quirk_p _bsd_usb_test_quirk_p
#define usb_trim_spaces _bsd_usb_trim_spaces
+#define uuid_ether_add _bsd_uuid_ether_add
+#define uuid_ether_del _bsd_uuid_ether_del
+#define vectors _bsd_vectors
#define verbose_limit _bsd_verbose_limit
#define vlan_cookie_p _bsd_vlan_cookie_p
#define vlan_devat_p _bsd_vlan_devat_p
@@ -4046,16 +4134,15 @@
#define vlan_tag_p _bsd_vlan_tag_p
#define vlan_trunk_cap_p _bsd_vlan_trunk_cap_p
#define vlan_trunkdev_p _bsd_vlan_trunkdev_p
-#define vmnet_cloner _bsd_vmnet_cloner
-#define vmnet_cloner_data _bsd_vmnet_cloner_data
+#define vlog _bsd_vlog
#define vnet_if_clone_init _bsd_vnet_if_clone_init
+#define vnet_ipfw_iface_destroy _bsd_vnet_ipfw_iface_destroy
#define vprintf _bsd_vprintf
#define vsnprintf _bsd_vsnprintf
#define vsnrprintf _bsd_vsnrprintf
#define vsprintf _bsd_vsprintf
#define wakeup _bsd_wakeup
#define wakeup_one _bsd_wakeup_one
-#define walk_dll_chain _bsd_walk_dll_chain
#define window_deflate _bsd_window_deflate
#define window_inflate _bsd_window_inflate
#define write_dsfield _bsd_write_dsfield
@@ -4068,13 +4155,22 @@
#define xpt_bus_register _bsd_xpt_bus_register
#define xpt_done _bsd_xpt_done
#define z_alloc _bsd_z_alloc
-#define zeroin6_addr _bsd_zeroin6_addr
#define z_free _bsd_z_free
#define zone_clust _bsd_zone_clust
#define zone_drain _bsd_zone_drain
-#define zone_ext_refcnt _bsd_zone_ext_refcnt
#define zone_jumbo16 _bsd_zone_jumbo16
#define zone_jumbo9 _bsd_zone_jumbo9
#define zone_jumbop _bsd_zone_jumbop
#define zone_mbuf _bsd_zone_mbuf
#define zone_pack _bsd_zone_pack
+#define zy7_pl_fclk_disable _bsd_zy7_pl_fclk_disable
+#define zy7_pl_fclk_enable _bsd_zy7_pl_fclk_enable
+#define zy7_pl_fclk_enabled _bsd_zy7_pl_fclk_enabled
+#define zy7_pl_fclk_get_freq _bsd_zy7_pl_fclk_get_freq
+#define zy7_pl_fclk_get_source _bsd_zy7_pl_fclk_get_source
+#define zy7_pl_fclk_set_freq _bsd_zy7_pl_fclk_set_freq
+#define zy7_pl_fclk_set_source _bsd_zy7_pl_fclk_set_source
+#define zy7_pl_level_shifters_disable _bsd_zy7_pl_level_shifters_disable
+#define zy7_pl_level_shifters_enable _bsd_zy7_pl_level_shifters_enable
+#define zy7_pl_level_shifters_enabled _bsd_zy7_pl_level_shifters_enabled
+#define zy7_slcr_postload_pl _bsd_zy7_slcr_postload_pl
diff --git a/rtemsbsd/include/machine/rtems-bsd-kernel-space.h b/rtemsbsd/include/machine/rtems-bsd-kernel-space.h
index 44f76fe1..49dc3aad 100644
--- a/rtemsbsd/include/machine/rtems-bsd-kernel-space.h
+++ b/rtemsbsd/include/machine/rtems-bsd-kernel-space.h
@@ -47,6 +47,8 @@
#define __GLOBL1(sym) __asm__(".globl " #sym)
#define __GLOBL(sym) __GLOBL1(sym)
+#define O_CLOEXEC 0
+
#define __FreeBSD__ 1
/* General define to activate BSD kernel parts */
diff --git a/rtemsbsd/include/machine/rtems-bsd-nexus-bus.h b/rtemsbsd/include/machine/rtems-bsd-nexus-bus.h
index 5a3458c6..a408c172 100644
--- a/rtemsbsd/include/machine/rtems-bsd-nexus-bus.h
+++ b/rtemsbsd/include/machine/rtems-bsd-nexus-bus.h
@@ -37,7 +37,7 @@
* Driver Summary is:
*
* Devices:
- * RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR0
+ * RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR
*
* Buses:
* RTEMS_BSD_DRIVER_PC_LEGACY
@@ -96,14 +96,14 @@ extern "C" {
**/
/*
- * Xilinx Zynq System Level Control Core 0 (SLCR0).
+ * Xilinx Zynq System Level Control Registers (SLCR).
*/
#if !defined(RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR)
/*
* Hard IP part of the Zynq so a fixed address.
*/
- #define RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR0 \
- static const rtems_bsd_device_resource zy7_slcr0_res[] = { \
+ #define RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR \
+ static const rtems_bsd_device_resource zy7_slcr_res[] = { \
{ \
.type = RTEMS_BSD_RES_MEMORY, \
.start_request = 0, \
@@ -111,9 +111,9 @@ extern "C" {
} \
}; \
RTEMS_BSD_DEFINE_NEXUS_DEVICE(zy7_slcr, 0, \
- RTEMS_ARRAY_SIZE(zy7_slcr0_res), \
- &zy7_slcr0_res[0])
-#endif /* RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR0 */
+ RTEMS_ARRAY_SIZE(zy7_slcr_res), \
+ &zy7_slcr_res[0])
+#endif /* RTEMS_BSD_DRIVER_XILINX_ZYNQ_SLCR */
/**
** Physical Buses
diff --git a/rtemsbsd/include/machine/rtems-bsd-user-space.h b/rtemsbsd/include/machine/rtems-bsd-user-space.h
index 536ef8e2..3bf971fd 100644
--- a/rtemsbsd/include/machine/rtems-bsd-user-space.h
+++ b/rtemsbsd/include/machine/rtems-bsd-user-space.h
@@ -63,12 +63,18 @@
#define CLOCK_MONOTONIC_FAST CLOCK_MONOTONIC
#endif
+#ifndef CLOCK_REALTIME_FAST
+#define CLOCK_REALTIME_FAST CLOCK_REALTIME
+#endif
+
#ifndef CLOCK_UPTIME
#define CLOCK_UPTIME CLOCK_MONOTONIC
#endif
#define SIZE_T_MAX SIZE_MAX
+#define __libc_sigprocmask(a, b, c) sigprocmask(a, b, c)
+
__BEGIN_DECLS
/* FIXME: Add to Newlib */
diff --git a/rtemsbsd/include/rtems/bsd/local/bus_if.h b/rtemsbsd/include/rtems/bsd/local/bus_if.h
index c95c7015..2376b896 100644
--- a/rtemsbsd/include/rtems/bsd/local/bus_if.h
+++ b/rtemsbsd/include/rtems/bsd/local/bus_if.h
@@ -92,7 +92,7 @@ typedef int bus_read_ivar_t(device_t _dev, device_t _child, int _index,
* @param _child the child device whose instance variable is
* being read
* @param _index the instance variable to read
- * @param _result a loction to recieve the instance variable
+ * @param _result a location to receive the instance variable
* value
*
* @retval 0 success
@@ -215,7 +215,9 @@ typedef device_t bus_add_child_t(device_t _dev, u_int _order, const char *_name,
* For busses which use use drivers supporting DEVICE_IDENTIFY() to
* enumerate their devices, this method is used to create new
* device instances. The new device will be added after the last
- * existing child with the same order.
+ * existing child with the same order. Implementations of bus_add_child
+ * call device_add_child_ordered to add the child and often add
+ * a suitable ivar to the device specific to that bus.
*
* @param _dev the bus device which will be the parent of the
* new child device
@@ -237,13 +239,35 @@ static __inline device_t BUS_ADD_CHILD(device_t _dev, u_int _order,
return ((bus_add_child_t *) _m)(_dev, _order, _name, _unit);
}
+/** @brief Unique descriptor for the BUS_RESCAN() method */
+extern struct kobjop_desc bus_rescan_desc;
+/** @brief A function implementing the BUS_RESCAN() method */
+typedef int bus_rescan_t(device_t _dev);
+/**
+ * @brief Rescan the bus
+ *
+ * This method is called by a parent bridge or devctl to trigger a bus
+ * rescan. The rescan should delete devices no longer present and
+ * enumerate devices that have newly arrived.
+ *
+ * @param _dev the bus device
+ */
+
+static __inline int BUS_RESCAN(device_t _dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_rescan);
+ return ((bus_rescan_t *) _m)(_dev);
+}
+
/** @brief Unique descriptor for the BUS_ALLOC_RESOURCE() method */
extern struct kobjop_desc bus_alloc_resource_desc;
/** @brief A function implementing the BUS_ALLOC_RESOURCE() method */
typedef struct resource * bus_alloc_resource_t(device_t _dev, device_t _child,
int _type, int *_rid,
- u_long _start, u_long _end,
- u_long _count, u_int _flags);
+ rman_res_t _start,
+ rman_res_t _end,
+ rman_res_t _count, u_int _flags);
/**
* @brief Allocate a system resource
*
@@ -260,9 +284,9 @@ typedef struct resource * bus_alloc_resource_t(device_t _dev, device_t _child,
* @param _type the type of resource to allocate
* @param _rid a pointer to the resource identifier
* @param _start hint at the start of the resource range - pass
- * @c 0UL for any start address
+ * @c 0 for any start address
* @param _end hint at the end of the resource range - pass
- * @c ~0UL for any end address
+ * @c ~0 for any end address
* @param _count hint at the size of range required - pass @c 1
* for any size
* @param _flags any extra flags to control the resource
@@ -275,8 +299,10 @@ typedef struct resource * bus_alloc_resource_t(device_t _dev, device_t _child,
static __inline struct resource * BUS_ALLOC_RESOURCE(device_t _dev,
device_t _child, int _type,
- int *_rid, u_long _start,
- u_long _end, u_long _count,
+ int *_rid,
+ rman_res_t _start,
+ rman_res_t _end,
+ rman_res_t _count,
u_int _flags)
{
kobjop_t _m;
@@ -293,8 +319,9 @@ typedef int bus_activate_resource_t(device_t _dev, device_t _child, int _type,
* @brief Activate a resource
*
* Activate a resource previously allocated with
- * BUS_ALLOC_RESOURCE(). This may for instance map a memory region
- * into the kernel's virtual address space.
+ * BUS_ALLOC_RESOURCE(). This may enable decoding of this resource in a
+ * device for instance. It will also establish a mapping for the resource
+ * unless RF_UNMAPPED was set when allocating the resource.
*
* @param _dev the parent device of @p _child
* @param _child the device which allocated the resource
@@ -312,6 +339,67 @@ static __inline int BUS_ACTIVATE_RESOURCE(device_t _dev, device_t _child,
return ((bus_activate_resource_t *) _m)(_dev, _child, _type, _rid, _r);
}
+/** @brief Unique descriptor for the BUS_MAP_RESOURCE() method */
+extern struct kobjop_desc bus_map_resource_desc;
+/** @brief A function implementing the BUS_MAP_RESOURCE() method */
+typedef int bus_map_resource_t(device_t _dev, device_t _child, int _type,
+ struct resource *_r,
+ struct resource_map_request *_args,
+ struct resource_map *_map);
+/**
+ * @brief Map a resource
+ *
+ * Allocate a mapping for a range of an active resource. The mapping
+ * is described by a struct resource_map object. This may for instance
+ * map a memory region into the kernel's virtual address space.
+ *
+ * @param _dev the parent device of @p _child
+ * @param _child the device which allocated the resource
+ * @param _type the type of resource
+ * @param _r the resource to map
+ * @param _args optional attributes of the mapping
+ * @param _map the mapping
+ */
+
+static __inline int BUS_MAP_RESOURCE(device_t _dev, device_t _child, int _type,
+ struct resource *_r,
+ struct resource_map_request *_args,
+ struct resource_map *_map)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_map_resource);
+ return ((bus_map_resource_t *) _m)(_dev, _child, _type, _r, _args, _map);
+}
+
+/** @brief Unique descriptor for the BUS_UNMAP_RESOURCE() method */
+extern struct kobjop_desc bus_unmap_resource_desc;
+/** @brief A function implementing the BUS_UNMAP_RESOURCE() method */
+typedef int bus_unmap_resource_t(device_t _dev, device_t _child, int _type,
+ struct resource *_r,
+ struct resource_map *_map);
+/**
+ * @brief Unmap a resource
+ *
+ * Release a mapping previously allocated with
+ * BUS_MAP_RESOURCE(). This may for instance unmap a memory region
+ * from the kernel's virtual address space.
+ *
+ * @param _dev the parent device of @p _child
+ * @param _child the device which allocated the resource
+ * @param _type the type of resource
+ * @param _r the resource
+ * @param _map the mapping to release
+ */
+
+static __inline int BUS_UNMAP_RESOURCE(device_t _dev, device_t _child,
+ int _type, struct resource *_r,
+ struct resource_map *_map)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_unmap_resource);
+ return ((bus_unmap_resource_t *) _m)(_dev, _child, _type, _r, _map);
+}
+
/** @brief Unique descriptor for the BUS_DEACTIVATE_RESOURCE() method */
extern struct kobjop_desc bus_deactivate_resource_desc;
/** @brief A function implementing the BUS_DEACTIVATE_RESOURCE() method */
@@ -321,8 +409,7 @@ typedef int bus_deactivate_resource_t(device_t _dev, device_t _child, int _type,
* @brief Deactivate a resource
*
* Deactivate a resource previously allocated with
- * BUS_ALLOC_RESOURCE(). This may for instance unmap a memory region
- * from the kernel's virtual address space.
+ * BUS_ALLOC_RESOURCE().
*
* @param _dev the parent device of @p _child
* @param _child the device which allocated the resource
@@ -344,8 +431,8 @@ static __inline int BUS_DEACTIVATE_RESOURCE(device_t _dev, device_t _child,
extern struct kobjop_desc bus_adjust_resource_desc;
/** @brief A function implementing the BUS_ADJUST_RESOURCE() method */
typedef int bus_adjust_resource_t(device_t _dev, device_t _child, int _type,
- struct resource *_res, u_long _start,
- u_long _end);
+ struct resource *_res, rman_res_t _start,
+ rman_res_t _end);
/**
* @brief Adjust a resource
*
@@ -364,7 +451,7 @@ typedef int bus_adjust_resource_t(device_t _dev, device_t _child, int _type,
static __inline int BUS_ADJUST_RESOURCE(device_t _dev, device_t _child,
int _type, struct resource *_res,
- u_long _start, u_long _end)
+ rman_res_t _start, rman_res_t _end)
{
kobjop_t _m;
KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_adjust_resource);
@@ -425,7 +512,7 @@ typedef int bus_setup_intr_t(device_t _dev, device_t _child,
* triggers
* @param _arg a value to use as the single argument in calls
* to @p _intr
- * @param _cookiep a pointer to a location to recieve a cookie
+ * @param _cookiep a pointer to a location to receive a cookie
* value that may be used to remove the interrupt
* handler
*/
@@ -472,7 +559,7 @@ static __inline int BUS_TEARDOWN_INTR(device_t _dev, device_t _child,
extern struct kobjop_desc bus_set_resource_desc;
/** @brief A function implementing the BUS_SET_RESOURCE() method */
typedef int bus_set_resource_t(device_t _dev, device_t _child, int _type,
- int _rid, u_long _start, u_long _count);
+ int _rid, rman_res_t _start, rman_res_t _count);
/**
* @brief Define a resource which can be allocated with
* BUS_ALLOC_RESOURCE().
@@ -492,7 +579,8 @@ typedef int bus_set_resource_t(device_t _dev, device_t _child, int _type,
*/
static __inline int BUS_SET_RESOURCE(device_t _dev, device_t _child, int _type,
- int _rid, u_long _start, u_long _count)
+ int _rid, rman_res_t _start,
+ rman_res_t _count)
{
kobjop_t _m;
KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_set_resource);
@@ -503,7 +591,8 @@ static __inline int BUS_SET_RESOURCE(device_t _dev, device_t _child, int _type,
extern struct kobjop_desc bus_get_resource_desc;
/** @brief A function implementing the BUS_GET_RESOURCE() method */
typedef int bus_get_resource_t(device_t _dev, device_t _child, int _type,
- int _rid, u_long *_startp, u_long *_countp);
+ int _rid, rman_res_t *_startp,
+ rman_res_t *_countp);
/**
* @brief Describe a resource
*
@@ -514,14 +603,15 @@ typedef int bus_get_resource_t(device_t _dev, device_t _child, int _type,
* @param _child the device which owns the resource
* @param _type the type of resource
* @param _rid the resource identifier
- * @param _start the address of a location to recieve the start
+ * @param _start the address of a location to receive the start
* index of the resource range
- * @param _count the address of a location to recieve the size
+ * @param _count the address of a location to receive the size
* of the resource range
*/
static __inline int BUS_GET_RESOURCE(device_t _dev, device_t _child, int _type,
- int _rid, u_long *_startp, u_long *_countp)
+ int _rid, rman_res_t *_startp,
+ rman_res_t *_countp)
{
kobjop_t _m;
KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_get_resource);
@@ -612,8 +702,15 @@ typedef int bus_child_pnpinfo_str_t(device_t _dev, device_t _child, char *_buf,
/**
* @brief Returns the pnp info for this device.
*
- * Return it as a string. If the string is insufficient for the
- * storage, then return EOVERFLOW.
+ * Return it as a string. If the storage is insufficient for the
+ * string, then return EOVERFLOW.
+ *
+ * The string must be formatted as a space-separated list of
+ * name=value pairs. Names may only contain alphanumeric characters,
+ * underscores ('_') and hyphens ('-'). Values can contain any
+ * non-whitespace characters. Values containing whitespace can be
+ * quoted with double quotes ('"'). Double quotes and backslashes in
+ * quoted values can be escaped with backslashes ('\').
*
* @param _dev the parent device of @p _child
* @param _child the device which is being examined
@@ -638,9 +735,16 @@ typedef int bus_child_location_str_t(device_t _dev, device_t _child, char *_buf,
/**
* @brief Returns the location for this device.
*
- * Return it as a string. If the string is insufficient for the
- * storage, then return EOVERFLOW.
- *
+ * Return it as a string. If the storage is insufficient for the
+ * string, then return EOVERFLOW.
+ *
+ * The string must be formatted as a space-separated list of
+ * name=value pairs. Names may only contain alphanumeric characters,
+ * underscores ('_') and hyphens ('-'). Values can contain any
+ * non-whitespace characters. Values containing whitespace can be
+ * quoted with double quotes ('"'). Double quotes and backslashes in
+ * quoted values can be escaped with backslashes ('\').
+ *
* @param _dev the parent device of @p _child
* @param _child the device which is being examined
* @param _buf the address of a buffer to receive the location
@@ -777,6 +881,24 @@ static __inline bus_dma_tag_t BUS_GET_DMA_TAG(device_t _dev, device_t _child)
return ((bus_get_dma_tag_t *) _m)(_dev, _child);
}
+/** @brief Unique descriptor for the BUS_GET_BUS_TAG() method */
+extern struct kobjop_desc bus_get_bus_tag_desc;
+/** @brief A function implementing the BUS_GET_BUS_TAG() method */
+typedef bus_space_tag_t bus_get_bus_tag_t(device_t _dev, device_t _child);
+/**
+ * @brief Returns bus_space_tag_t for use w/ devices on the bus.
+ *
+ * @param _dev the parent device of @p _child
+ * @param _child the device to which the tag will belong
+ */
+
+static __inline bus_space_tag_t BUS_GET_BUS_TAG(device_t _dev, device_t _child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_get_bus_tag);
+ return ((bus_get_bus_tag_t *) _m)(_dev, _child);
+}
+
/** @brief Unique descriptor for the BUS_HINT_DEVICE_UNIT() method */
extern struct kobjop_desc bus_hint_device_unit_desc;
/** @brief A function implementing the BUS_HINT_DEVICE_UNIT() method */
@@ -835,4 +957,84 @@ static __inline int BUS_REMAP_INTR(device_t _dev, device_t _child, u_int _irq)
return ((bus_remap_intr_t *) _m)(_dev, _child, _irq);
}
+/** @brief Unique descriptor for the BUS_SUSPEND_CHILD() method */
+extern struct kobjop_desc bus_suspend_child_desc;
+/** @brief A function implementing the BUS_SUSPEND_CHILD() method */
+typedef int bus_suspend_child_t(device_t _dev, device_t _child);
+/**
+ * @brief Suspend a given child
+ *
+ * @param _dev the parent device of @p _child
+ * @param _child the device to suspend
+ */
+
+static __inline int BUS_SUSPEND_CHILD(device_t _dev, device_t _child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_suspend_child);
+ return ((bus_suspend_child_t *) _m)(_dev, _child);
+}
+
+/** @brief Unique descriptor for the BUS_RESUME_CHILD() method */
+extern struct kobjop_desc bus_resume_child_desc;
+/** @brief A function implementing the BUS_RESUME_CHILD() method */
+typedef int bus_resume_child_t(device_t _dev, device_t _child);
+/**
+ * @brief Resume a given child
+ *
+ * @param _dev the parent device of @p _child
+ * @param _child the device to resume
+ */
+
+static __inline int BUS_RESUME_CHILD(device_t _dev, device_t _child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_resume_child);
+ return ((bus_resume_child_t *) _m)(_dev, _child);
+}
+
+/** @brief Unique descriptor for the BUS_GET_DOMAIN() method */
+extern struct kobjop_desc bus_get_domain_desc;
+/** @brief A function implementing the BUS_GET_DOMAIN() method */
+typedef int bus_get_domain_t(device_t _dev, device_t _child, int *_domain);
+/**
+ * @brief Get the VM domain handle for the given bus and child.
+ *
+ * @param _dev the bus device
+ * @param _child the child device
+ * @param _domain a pointer to the bus's domain handle identifier
+ */
+
+static __inline int BUS_GET_DOMAIN(device_t _dev, device_t _child, int *_domain)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_get_domain);
+ return ((bus_get_domain_t *) _m)(_dev, _child, _domain);
+}
+
+/** @brief Unique descriptor for the BUS_GET_CPUS() method */
+extern struct kobjop_desc bus_get_cpus_desc;
+/** @brief A function implementing the BUS_GET_CPUS() method */
+typedef int bus_get_cpus_t(device_t _dev, device_t _child, enum cpu_sets _op,
+ size_t _setsize, cpuset_t *_cpuset);
+/**
+ * @brief Request a set of CPUs
+ *
+ * @param _dev the bus device
+ * @param _child the child device
+ * @param _op type of CPUs to request
+ * @param _setsize the size of the set passed in _cpuset
+ * @param _cpuset a pointer to a cpuset to receive the requested
+ * set of CPUs
+ */
+
+static __inline int BUS_GET_CPUS(device_t _dev, device_t _child,
+ enum cpu_sets _op, size_t _setsize,
+ cpuset_t *_cpuset)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)_dev)->ops,bus_get_cpus);
+ return ((bus_get_cpus_t *) _m)(_dev, _child, _op, _setsize, _cpuset);
+}
+
#endif /* _bus_if_h_ */
diff --git a/rtemsbsd/include/rtems/bsd/local/device_if.h b/rtemsbsd/include/rtems/bsd/local/device_if.h
index 7a2cbc72..e27c2b52 100644
--- a/rtemsbsd/include/rtems/bsd/local/device_if.h
+++ b/rtemsbsd/include/rtems/bsd/local/device_if.h
@@ -52,28 +52,29 @@ typedef int device_probe_t(device_t dev);
* the probe before returning. The return value of DEVICE_PROBE()
* is used to elect which driver is used - the driver which returns
* the largest non-error value wins the election and attaches to
- * the device.
+ * the device. Common non-error values are described in the
+ * DEVICE_PROBE(9) manual page.
*
* If a driver matches the hardware, it should set the device
* description string using device_set_desc() or
- * device_set_desc_copy(). This string is
- * used to generate an informative message when DEVICE_ATTACH()
- * is called.
+ * device_set_desc_copy(). This string is used to generate an
+ * informative message when DEVICE_ATTACH() is called.
*
* As a special case, if a driver returns zero, the driver election
* is cut short and that driver will attach to the device
- * immediately.
+ * immediately. This should rarely be used.
*
- * For example, a probe method for a pci device driver might look
+ * For example, a probe method for a PCI device driver might look
* like this:
*
* @code
- * int foo_probe(device_t dev)
+ * int
+ * foo_probe(device_t dev)
* {
* if (pci_get_vendor(dev) == FOOVENDOR &&
* pci_get_device(dev) == FOODEVICE) {
* device_set_desc(dev, "Foo device");
- * return (0);
+ * return (BUS_PROBE_DEFAULT);
* }
* return (ENXIO);
* }
@@ -88,7 +89,8 @@ typedef int device_probe_t(device_t dev);
*
* @param dev the device to probe
*
- * @retval 0 if the driver strongly matches this device
+ * @retval 0 if this is the only possible driver for this
+ * device
* @retval negative if the driver can match this device - the
* least negative value is used to select the
* driver
@@ -337,4 +339,33 @@ static __inline int DEVICE_QUIESCE(device_t dev)
return ((device_quiesce_t *) _m)(dev);
}
+/** @brief Unique descriptor for the DEVICE_REGISTER() method */
+extern struct kobjop_desc device_register_desc;
+/** @brief A function implementing the DEVICE_REGISTER() method */
+typedef void * device_register_t(device_t dev);
+/**
+ * @brief This is called when the driver is asked to register handlers.
+ *
+ *
+ * To include this method in a device driver, use a line like this
+ * in the driver's method list:
+ *
+ * @code
+ * KOBJMETHOD(device_register, foo_register)
+ * @endcode
+ *
+ * @param dev the device for which handlers are being registered
+ *
+ * @retval NULL method not implemented
+ * @retval non-NULL a pointer to implementation specific static driver state
+ *
+ */
+
+static __inline void * DEVICE_REGISTER(device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,device_register);
+ return ((device_register_t *) _m)(dev);
+}
+
#endif /* _device_if_h_ */
diff --git a/rtemsbsd/include/rtems/bsd/local/gpio_if.h b/rtemsbsd/include/rtems/bsd/local/gpio_if.h
new file mode 100644
index 00000000..1d42f82b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/gpio_if.h
@@ -0,0 +1,151 @@
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from source file
+ * freebsd-org/sys/dev/gpio/gpio_if.m
+ * with
+ * makeobjops.awk
+ *
+ * See the source file for legal information
+ */
+
+
+#ifndef _gpio_if_h_
+#define _gpio_if_h_
+
+
+#include <dev/ofw/openfirm.h>
+
+/** @brief Unique descriptor for the GPIO_GET_BUS() method */
+extern struct kobjop_desc gpio_get_bus_desc;
+/** @brief A function implementing the GPIO_GET_BUS() method */
+typedef device_t gpio_get_bus_t(device_t dev);
+
+static __inline device_t GPIO_GET_BUS(device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_get_bus);
+ return ((gpio_get_bus_t *) _m)(dev);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_MAX() method */
+extern struct kobjop_desc gpio_pin_max_desc;
+/** @brief A function implementing the GPIO_PIN_MAX() method */
+typedef int gpio_pin_max_t(device_t dev, int *maxpin);
+
+static __inline int GPIO_PIN_MAX(device_t dev, int *maxpin)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_max);
+ return ((gpio_pin_max_t *) _m)(dev, maxpin);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_SET() method */
+extern struct kobjop_desc gpio_pin_set_desc;
+/** @brief A function implementing the GPIO_PIN_SET() method */
+typedef int gpio_pin_set_t(device_t dev, uint32_t pin_num, uint32_t pin_value);
+
+static __inline int GPIO_PIN_SET(device_t dev, uint32_t pin_num,
+ uint32_t pin_value)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_set);
+ return ((gpio_pin_set_t *) _m)(dev, pin_num, pin_value);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_GET() method */
+extern struct kobjop_desc gpio_pin_get_desc;
+/** @brief A function implementing the GPIO_PIN_GET() method */
+typedef int gpio_pin_get_t(device_t dev, uint32_t pin_num, uint32_t *pin_value);
+
+static __inline int GPIO_PIN_GET(device_t dev, uint32_t pin_num,
+ uint32_t *pin_value)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_get);
+ return ((gpio_pin_get_t *) _m)(dev, pin_num, pin_value);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_TOGGLE() method */
+extern struct kobjop_desc gpio_pin_toggle_desc;
+/** @brief A function implementing the GPIO_PIN_TOGGLE() method */
+typedef int gpio_pin_toggle_t(device_t dev, uint32_t pin_num);
+
+static __inline int GPIO_PIN_TOGGLE(device_t dev, uint32_t pin_num)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_toggle);
+ return ((gpio_pin_toggle_t *) _m)(dev, pin_num);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_GETCAPS() method */
+extern struct kobjop_desc gpio_pin_getcaps_desc;
+/** @brief A function implementing the GPIO_PIN_GETCAPS() method */
+typedef int gpio_pin_getcaps_t(device_t dev, uint32_t pin_num, uint32_t *caps);
+
+static __inline int GPIO_PIN_GETCAPS(device_t dev, uint32_t pin_num,
+ uint32_t *caps)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_getcaps);
+ return ((gpio_pin_getcaps_t *) _m)(dev, pin_num, caps);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_GETFLAGS() method */
+extern struct kobjop_desc gpio_pin_getflags_desc;
+/** @brief A function implementing the GPIO_PIN_GETFLAGS() method */
+typedef int gpio_pin_getflags_t(device_t dev, uint32_t pin_num,
+ uint32_t *flags);
+
+static __inline int GPIO_PIN_GETFLAGS(device_t dev, uint32_t pin_num,
+ uint32_t *flags)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_getflags);
+ return ((gpio_pin_getflags_t *) _m)(dev, pin_num, flags);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_GETNAME() method */
+extern struct kobjop_desc gpio_pin_getname_desc;
+/** @brief A function implementing the GPIO_PIN_GETNAME() method */
+typedef int gpio_pin_getname_t(device_t dev, uint32_t pin_num, char *name);
+
+static __inline int GPIO_PIN_GETNAME(device_t dev, uint32_t pin_num, char *name)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_getname);
+ return ((gpio_pin_getname_t *) _m)(dev, pin_num, name);
+}
+
+/** @brief Unique descriptor for the GPIO_PIN_SETFLAGS() method */
+extern struct kobjop_desc gpio_pin_setflags_desc;
+/** @brief A function implementing the GPIO_PIN_SETFLAGS() method */
+typedef int gpio_pin_setflags_t(device_t dev, uint32_t pin_num, uint32_t flags);
+
+static __inline int GPIO_PIN_SETFLAGS(device_t dev, uint32_t pin_num,
+ uint32_t flags)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,gpio_pin_setflags);
+ return ((gpio_pin_setflags_t *) _m)(dev, pin_num, flags);
+}
+
+/** @brief Unique descriptor for the GPIO_MAP_GPIOS() method */
+extern struct kobjop_desc gpio_map_gpios_desc;
+/** @brief A function implementing the GPIO_MAP_GPIOS() method */
+typedef int gpio_map_gpios_t(device_t bus, phandle_t dev, phandle_t gparent,
+ int gcells, pcell_t *gpios, uint32_t *pin,
+ uint32_t *flags);
+
+static __inline int GPIO_MAP_GPIOS(device_t bus, phandle_t dev,
+ phandle_t gparent, int gcells,
+ pcell_t *gpios, uint32_t *pin,
+ uint32_t *flags)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)bus)->ops,gpio_map_gpios);
+ return ((gpio_map_gpios_t *) _m)(bus, dev, gparent, gcells, gpios, pin, flags);
+}
+
+#endif /* _gpio_if_h_ */
diff --git a/rtemsbsd/include/rtems/bsd/local/if_dwc_if.h b/rtemsbsd/include/rtems/bsd/local/if_dwc_if.h
new file mode 100644
index 00000000..17aa3b8e
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/if_dwc_if.h
@@ -0,0 +1,55 @@
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from source file
+ * freebsd-org/sys/dev/dwc/if_dwc_if.m
+ * with
+ * makeobjops.awk
+ *
+ * See the source file for legal information
+ */
+
+
+#ifndef _if_dwc_if_h_
+#define _if_dwc_if_h_
+
+
+
+/** @brief Unique descriptor for the IF_DWC_INIT() method */
+extern struct kobjop_desc if_dwc_init_desc;
+/** @brief A function implementing the IF_DWC_INIT() method */
+typedef int if_dwc_init_t(device_t dev);
+
+static __inline int IF_DWC_INIT(device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,if_dwc_init);
+ return ((if_dwc_init_t *) _m)(dev);
+}
+
+/** @brief Unique descriptor for the IF_DWC_MAC_TYPE() method */
+extern struct kobjop_desc if_dwc_mac_type_desc;
+/** @brief A function implementing the IF_DWC_MAC_TYPE() method */
+typedef int if_dwc_mac_type_t(device_t dev);
+
+static __inline int IF_DWC_MAC_TYPE(device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,if_dwc_mac_type);
+ return ((if_dwc_mac_type_t *) _m)(dev);
+}
+
+/** @brief Unique descriptor for the IF_DWC_MII_CLK() method */
+extern struct kobjop_desc if_dwc_mii_clk_desc;
+/** @brief A function implementing the IF_DWC_MII_CLK() method */
+typedef int if_dwc_mii_clk_t(device_t dev);
+
+static __inline int IF_DWC_MII_CLK(device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,if_dwc_mii_clk);
+ return ((if_dwc_mii_clk_t *) _m)(dev);
+}
+
+#endif /* _if_dwc_if_h_ */
diff --git a/rtemsbsd/include/rtems/bsd/local/miidevs.h b/rtemsbsd/include/rtems/bsd/local/miidevs.h
index 82f0102b..2199229d 100644
--- a/rtemsbsd/include/rtems/bsd/local/miidevs.h
+++ b/rtemsbsd/include/rtems/bsd/local/miidevs.h
@@ -187,8 +187,14 @@
#define MII_STR_BROADCOM_BCM5400 "BCM5400 1000BASE-T media interface"
#define MII_MODEL_BROADCOM_BCM5401 0x0005
#define MII_STR_BROADCOM_BCM5401 "BCM5401 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5402 0x0006
+#define MII_STR_BROADCOM_BCM5402 "BCM5402 1000BASE-T media interface"
#define MII_MODEL_BROADCOM_BCM5411 0x0007
#define MII_STR_BROADCOM_BCM5411 "BCM5411 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5404 0x0008
+#define MII_STR_BROADCOM_BCM5404 "BCM5404 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5424 0x000a
+#define MII_STR_BROADCOM_BCM5424 "BCM5424/BCM5234 1000BASE-T media interface"
#define MII_MODEL_BROADCOM_BCM5464 0x000b
#define MII_STR_BROADCOM_BCM5464 "BCM5464 1000BASE-T media interface"
#define MII_MODEL_BROADCOM_BCM5461 0x000c
@@ -219,10 +225,16 @@
#define MII_STR_BROADCOM_BCM5780 "BCM5780 1000BASE-T media interface"
#define MII_MODEL_BROADCOM_BCM5708C 0x0036
#define MII_STR_BROADCOM_BCM5708C "BCM5708C 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM_BCM5466 0x003b
+#define MII_STR_BROADCOM_BCM5466 "BCM5466 1000BASE-T media interface"
#define MII_MODEL_BROADCOM2_BCM5325 0x0003
#define MII_STR_BROADCOM2_BCM5325 "BCM5325 10/100 5-port PHY switch"
#define MII_MODEL_BROADCOM2_BCM5906 0x0004
#define MII_STR_BROADCOM2_BCM5906 "BCM5906 10/100baseTX media interface"
+#define MII_MODEL_BROADCOM2_BCM5478 0x0008
+#define MII_STR_BROADCOM2_BCM5478 "BCM5478 1000BASE-T media interface"
+#define MII_MODEL_BROADCOM2_BCM5488 0x0009
+#define MII_STR_BROADCOM2_BCM5488 "BCM5488 1000BASE-T media interface"
#define MII_MODEL_BROADCOM2_BCM5481 0x000a
#define MII_STR_BROADCOM2_BCM5481 "BCM5481 1000BASE-T media interface"
#define MII_MODEL_BROADCOM2_BCM5482 0x000b
@@ -230,7 +242,7 @@
#define MII_MODEL_BROADCOM2_BCM5755 0x000c
#define MII_STR_BROADCOM2_BCM5755 "BCM5755 1000BASE-T media interface"
#define MII_MODEL_BROADCOM2_BCM5754 0x000e
-#define MII_STR_BROADCOM2_BCM5754 "BCM5754/5787 1000BASE-T media interface"
+#define MII_STR_BROADCOM2_BCM5754 "BCM5754/BCM5787 1000BASE-T media interface"
#define MII_MODEL_BROADCOM2_BCM5708S 0x0015
#define MII_STR_BROADCOM2_BCM5708S "BCM5708S 1000/2500baseSX PHY"
#define MII_MODEL_BROADCOM2_BCM5785 0x0016
@@ -411,6 +423,8 @@
/* Micrel PHYs */
#define MII_MODEL_MICREL_KSZ9021 0x0021
#define MII_STR_MICREL_KSZ9021 "Micrel KSZ9021 10/100/1000 PHY"
+#define MII_MODEL_MICREL_KSZ9031 0x0022
+#define MII_STR_MICREL_KSZ9031 "Micrel KSZ9031 10/100/1000 PHY"
/* Myson Technology PHYs */
#define MII_MODEL_xxMYSON_MTD972 0x0000
@@ -505,3 +519,5 @@
/* SMC */
#define MII_MODEL_SMC_LAN8710A 0x000F
#define MII_STR_SMC_LAN8710A "SMC LAN8710A 10/100 interface"
+#define MII_MODEL_SMC_LAN8700 0x000C
+#define MII_STR_SMC_LAN8700 "SMC LAN8700 10/100 interface"
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_callout_profiling.h b/rtemsbsd/include/rtems/bsd/local/opt_callout_profiling.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_callout_profiling.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_em.h b/rtemsbsd/include/rtems/bsd/local/opt_em.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_em.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_ifmedia.h b/rtemsbsd/include/rtems/bsd/local/opt_ifmedia.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_ifmedia.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_kld.h b/rtemsbsd/include/rtems/bsd/local/opt_kld.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_kld.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_kqueue.h b/rtemsbsd/include/rtems/bsd/local/opt_kqueue.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_kqueue.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_kstack_usage_prof.h b/rtemsbsd/include/rtems/bsd/local/opt_kstack_usage_prof.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_kstack_usage_prof.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_pci.h b/rtemsbsd/include/rtems/bsd/local/opt_pci.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_pci.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_rss.h b/rtemsbsd/include/rtems/bsd/local/opt_rss.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_rss.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_stack.h b/rtemsbsd/include/rtems/bsd/local/opt_stack.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_stack.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_timer.h b/rtemsbsd/include/rtems/bsd/local/opt_timer.h
new file mode 100644
index 00000000..188d2f49
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_timer.h
@@ -0,0 +1 @@
+#define NO_EVENTTIMERS 1
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_verbose_sysinit.h b/rtemsbsd/include/rtems/bsd/local/opt_verbose_sysinit.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_verbose_sysinit.h
diff --git a/rtemsbsd/include/rtems/bsd/local/opt_vm.h b/rtemsbsd/include/rtems/bsd/local/opt_vm.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/rtems/bsd/local/opt_vm.h
diff --git a/rtemsbsd/include/rtems/bsd/local/pci_if.h b/rtemsbsd/include/rtems/bsd/local/pci_if.h
index d4152556..2f11c95a 100644
--- a/rtemsbsd/include/rtems/bsd/local/pci_if.h
+++ b/rtemsbsd/include/rtems/bsd/local/pci_if.h
@@ -14,6 +14,14 @@
#ifndef _pci_if_h_
#define _pci_if_h_
+
+struct nvlist;
+
+enum pci_id_type {
+ PCI_ID_RID,
+ PCI_ID_MSI,
+};
+
/** @brief Unique descriptor for the PCI_READ_CONFIG() method */
extern struct kobjop_desc pci_read_config_desc;
/** @brief A function implementing the PCI_READ_CONFIG() method */
@@ -154,6 +162,20 @@ static __inline int PCI_ASSIGN_INTERRUPT(device_t dev, device_t child)
return ((pci_assign_interrupt_t *) _m)(dev, child);
}
+/** @brief Unique descriptor for the PCI_FIND_CAP() method */
+extern struct kobjop_desc pci_find_cap_desc;
+/** @brief A function implementing the PCI_FIND_CAP() method */
+typedef int pci_find_cap_t(device_t dev, device_t child, int capability,
+ int *capreg);
+
+static __inline int PCI_FIND_CAP(device_t dev, device_t child, int capability,
+ int *capreg)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_find_cap);
+ return ((pci_find_cap_t *) _m)(dev, child, capability, capreg);
+}
+
/** @brief Unique descriptor for the PCI_FIND_EXTCAP() method */
extern struct kobjop_desc pci_find_extcap_desc;
/** @brief A function implementing the PCI_FIND_EXTCAP() method */
@@ -168,6 +190,20 @@ static __inline int PCI_FIND_EXTCAP(device_t dev, device_t child,
return ((pci_find_extcap_t *) _m)(dev, child, capability, capreg);
}
+/** @brief Unique descriptor for the PCI_FIND_HTCAP() method */
+extern struct kobjop_desc pci_find_htcap_desc;
+/** @brief A function implementing the PCI_FIND_HTCAP() method */
+typedef int pci_find_htcap_t(device_t dev, device_t child, int capability,
+ int *capreg);
+
+static __inline int PCI_FIND_HTCAP(device_t dev, device_t child, int capability,
+ int *capreg)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_find_htcap);
+ return ((pci_find_htcap_t *) _m)(dev, child, capability, capreg);
+}
+
/** @brief Unique descriptor for the PCI_ALLOC_MSI() method */
extern struct kobjop_desc pci_alloc_msi_desc;
/** @brief A function implementing the PCI_ALLOC_MSI() method */
@@ -192,6 +228,46 @@ static __inline int PCI_ALLOC_MSIX(device_t dev, device_t child, int *count)
return ((pci_alloc_msix_t *) _m)(dev, child, count);
}
+/** @brief Unique descriptor for the PCI_ENABLE_MSI() method */
+extern struct kobjop_desc pci_enable_msi_desc;
+/** @brief A function implementing the PCI_ENABLE_MSI() method */
+typedef void pci_enable_msi_t(device_t dev, device_t child, uint64_t address,
+ uint16_t data);
+
+static __inline void PCI_ENABLE_MSI(device_t dev, device_t child,
+ uint64_t address, uint16_t data)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_enable_msi);
+ ((pci_enable_msi_t *) _m)(dev, child, address, data);
+}
+
+/** @brief Unique descriptor for the PCI_ENABLE_MSIX() method */
+extern struct kobjop_desc pci_enable_msix_desc;
+/** @brief A function implementing the PCI_ENABLE_MSIX() method */
+typedef void pci_enable_msix_t(device_t dev, device_t child, u_int index,
+ uint64_t address, uint32_t data);
+
+static __inline void PCI_ENABLE_MSIX(device_t dev, device_t child, u_int index,
+ uint64_t address, uint32_t data)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_enable_msix);
+ ((pci_enable_msix_t *) _m)(dev, child, index, address, data);
+}
+
+/** @brief Unique descriptor for the PCI_DISABLE_MSI() method */
+extern struct kobjop_desc pci_disable_msi_desc;
+/** @brief A function implementing the PCI_DISABLE_MSI() method */
+typedef void pci_disable_msi_t(device_t dev, device_t child);
+
+static __inline void PCI_DISABLE_MSI(device_t dev, device_t child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_disable_msi);
+ ((pci_disable_msi_t *) _m)(dev, child);
+}
+
/** @brief Unique descriptor for the PCI_REMAP_MSIX() method */
extern struct kobjop_desc pci_remap_msix_desc;
/** @brief A function implementing the PCI_REMAP_MSIX() method */
@@ -242,4 +318,109 @@ static __inline int PCI_MSIX_COUNT(device_t dev, device_t child)
return ((pci_msix_count_t *) _m)(dev, child);
}
+/** @brief Unique descriptor for the PCI_MSIX_PBA_BAR() method */
+extern struct kobjop_desc pci_msix_pba_bar_desc;
+/** @brief A function implementing the PCI_MSIX_PBA_BAR() method */
+typedef int pci_msix_pba_bar_t(device_t dev, device_t child);
+
+static __inline int PCI_MSIX_PBA_BAR(device_t dev, device_t child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_msix_pba_bar);
+ return ((pci_msix_pba_bar_t *) _m)(dev, child);
+}
+
+/** @brief Unique descriptor for the PCI_MSIX_TABLE_BAR() method */
+extern struct kobjop_desc pci_msix_table_bar_desc;
+/** @brief A function implementing the PCI_MSIX_TABLE_BAR() method */
+typedef int pci_msix_table_bar_t(device_t dev, device_t child);
+
+static __inline int PCI_MSIX_TABLE_BAR(device_t dev, device_t child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_msix_table_bar);
+ return ((pci_msix_table_bar_t *) _m)(dev, child);
+}
+
+/** @brief Unique descriptor for the PCI_GET_ID() method */
+extern struct kobjop_desc pci_get_id_desc;
+/** @brief A function implementing the PCI_GET_ID() method */
+typedef int pci_get_id_t(device_t dev, device_t child, enum pci_id_type type,
+ uintptr_t *id);
+
+static __inline int PCI_GET_ID(device_t dev, device_t child,
+ enum pci_id_type type, uintptr_t *id)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_get_id);
+ return ((pci_get_id_t *) _m)(dev, child, type, id);
+}
+
+/** @brief Unique descriptor for the PCI_ALLOC_DEVINFO() method */
+extern struct kobjop_desc pci_alloc_devinfo_desc;
+/** @brief A function implementing the PCI_ALLOC_DEVINFO() method */
+typedef struct pci_devinfo * pci_alloc_devinfo_t(device_t dev);
+
+static __inline struct pci_devinfo * PCI_ALLOC_DEVINFO(device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_alloc_devinfo);
+ return ((pci_alloc_devinfo_t *) _m)(dev);
+}
+
+/** @brief Unique descriptor for the PCI_CHILD_ADDED() method */
+extern struct kobjop_desc pci_child_added_desc;
+/** @brief A function implementing the PCI_CHILD_ADDED() method */
+typedef void pci_child_added_t(device_t dev, device_t child);
+
+static __inline void PCI_CHILD_ADDED(device_t dev, device_t child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_child_added);
+ ((pci_child_added_t *) _m)(dev, child);
+}
+
+/** @brief Unique descriptor for the PCI_IOV_ATTACH() method */
+extern struct kobjop_desc pci_iov_attach_desc;
+/** @brief A function implementing the PCI_IOV_ATTACH() method */
+typedef int pci_iov_attach_t(device_t dev, device_t child,
+ struct nvlist *pf_schema, struct nvlist *vf_schema,
+ const char *name);
+
+static __inline int PCI_IOV_ATTACH(device_t dev, device_t child,
+ struct nvlist *pf_schema,
+ struct nvlist *vf_schema, const char *name)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_iov_attach);
+ return ((pci_iov_attach_t *) _m)(dev, child, pf_schema, vf_schema, name);
+}
+
+/** @brief Unique descriptor for the PCI_IOV_DETACH() method */
+extern struct kobjop_desc pci_iov_detach_desc;
+/** @brief A function implementing the PCI_IOV_DETACH() method */
+typedef int pci_iov_detach_t(device_t dev, device_t child);
+
+static __inline int PCI_IOV_DETACH(device_t dev, device_t child)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pci_iov_detach);
+ return ((pci_iov_detach_t *) _m)(dev, child);
+}
+
+/** @brief Unique descriptor for the PCI_CREATE_IOV_CHILD() method */
+extern struct kobjop_desc pci_create_iov_child_desc;
+/** @brief A function implementing the PCI_CREATE_IOV_CHILD() method */
+typedef device_t pci_create_iov_child_t(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did);
+
+static __inline device_t PCI_CREATE_IOV_CHILD(device_t bus, device_t pf,
+ uint16_t rid, uint16_t vid,
+ uint16_t did)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)bus)->ops,pci_create_iov_child);
+ return ((pci_create_iov_child_t *) _m)(bus, pf, rid, vid, did);
+}
+
#endif /* _pci_if_h_ */
diff --git a/rtemsbsd/include/rtems/bsd/local/pcib_if.h b/rtemsbsd/include/rtems/bsd/local/pcib_if.h
index d8f20c78..a7c06c31 100644
--- a/rtemsbsd/include/rtems/bsd/local/pcib_if.h
+++ b/rtemsbsd/include/rtems/bsd/local/pcib_if.h
@@ -14,6 +14,9 @@
#ifndef _pcib_if_h_
#define _pcib_if_h_
+
+#include "pci_if.h"
+
/** @brief Unique descriptor for the PCIB_MAXSLOTS() method */
extern struct kobjop_desc pcib_maxslots_desc;
/** @brief A function implementing the PCIB_MAXSLOTS() method */
@@ -26,6 +29,18 @@ static __inline int PCIB_MAXSLOTS(device_t dev)
return ((pcib_maxslots_t *) _m)(dev);
}
+/** @brief Unique descriptor for the PCIB_MAXFUNCS() method */
+extern struct kobjop_desc pcib_maxfuncs_desc;
+/** @brief A function implementing the PCIB_MAXFUNCS() method */
+typedef int pcib_maxfuncs_t(device_t dev);
+
+static __inline int PCIB_MAXFUNCS(device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)dev)->ops,pcib_maxfuncs);
+ return ((pcib_maxfuncs_t *) _m)(dev);
+}
+
/** @brief Unique descriptor for the PCIB_READ_CONFIG() method */
extern struct kobjop_desc pcib_read_config_desc;
/** @brief A function implementing the PCIB_READ_CONFIG() method */
@@ -147,4 +162,56 @@ static __inline int PCIB_POWER_FOR_SLEEP(device_t pcib, device_t dev,
return ((pcib_power_for_sleep_t *) _m)(pcib, dev, pstate);
}
+/** @brief Unique descriptor for the PCIB_GET_ID() method */
+extern struct kobjop_desc pcib_get_id_desc;
+/** @brief A function implementing the PCIB_GET_ID() method */
+typedef int pcib_get_id_t(device_t pcib, device_t dev, enum pci_id_type type,
+ uintptr_t *id);
+
+static __inline int PCIB_GET_ID(device_t pcib, device_t dev,
+ enum pci_id_type type, uintptr_t *id)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)pcib)->ops,pcib_get_id);
+ return ((pcib_get_id_t *) _m)(pcib, dev, type, id);
+}
+
+/** @brief Unique descriptor for the PCIB_TRY_ENABLE_ARI() method */
+extern struct kobjop_desc pcib_try_enable_ari_desc;
+/** @brief A function implementing the PCIB_TRY_ENABLE_ARI() method */
+typedef int pcib_try_enable_ari_t(device_t pcib, device_t dev);
+
+static __inline int PCIB_TRY_ENABLE_ARI(device_t pcib, device_t dev)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)pcib)->ops,pcib_try_enable_ari);
+ return ((pcib_try_enable_ari_t *) _m)(pcib, dev);
+}
+
+/** @brief Unique descriptor for the PCIB_ARI_ENABLED() method */
+extern struct kobjop_desc pcib_ari_enabled_desc;
+/** @brief A function implementing the PCIB_ARI_ENABLED() method */
+typedef int pcib_ari_enabled_t(device_t pcib);
+
+static __inline int PCIB_ARI_ENABLED(device_t pcib)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)pcib)->ops,pcib_ari_enabled);
+ return ((pcib_ari_enabled_t *) _m)(pcib);
+}
+
+/** @brief Unique descriptor for the PCIB_DECODE_RID() method */
+extern struct kobjop_desc pcib_decode_rid_desc;
+/** @brief A function implementing the PCIB_DECODE_RID() method */
+typedef void pcib_decode_rid_t(device_t pcib, uint16_t rid, int *bus, int *slot,
+ int *func);
+
+static __inline void PCIB_DECODE_RID(device_t pcib, uint16_t rid, int *bus,
+ int *slot, int *func)
+{
+ kobjop_t _m;
+ KOBJOPLOOKUP(((kobj_t)pcib)->ops,pcib_decode_rid);
+ ((pcib_decode_rid_t *) _m)(pcib, rid, bus, slot, func);
+}
+
#endif /* _pcib_if_h_ */
diff --git a/rtemsbsd/include/rtems/bsd/sys/cpuset.h b/rtemsbsd/include/rtems/bsd/sys/cpuset.h
index 9048c2d9..f5940682 100644
--- a/rtemsbsd/include/rtems/bsd/sys/cpuset.h
+++ b/rtemsbsd/include/rtems/bsd/sys/cpuset.h
@@ -33,64 +33,36 @@
#define _RTEMS_BSD_SYS_CPUSET_H_
#include <sys/_cpuset.h>
+#include <sys/_bitset.h>
+#include <sys/bitset.h>
#define CPUSETBUFSIZ ((2 + sizeof(long) * 2) * _NCPUWORDS)
-#define CPU_SETOF(n, p) do { \
- CPU_ZERO(p); \
- ((p)->__bits[__cpuset_word(n)] = __cpuset_mask(n)); \
-} while (0)
-
-/* Is p full set. */
-#define CPU_ISFULLSET(p) __extension__ ({ \
- __size_t __i; \
- for (__i = 0; __i < _NCPUWORDS; __i++) \
- if ((p)->__bits[__i] != (long)-1) \
- break; \
- __i == _NCPUWORDS; \
-})
-
-/* Is c a subset of p. */
-#define CPU_SUBSET(p, c) __extension__ ({ \
- __size_t __i; \
- for (__i = 0; __i < _NCPUWORDS; __i++) \
- if (((c)->__bits[__i] & \
- (p)->__bits[__i]) != \
- (c)->__bits[__i]) \
- break; \
- __i == _NCPUWORDS; \
-})
-
-/* Are there any common bits between b & c? */
-#define CPU_OVERLAP(p, c) __extension__ ({ \
- __size_t __i; \
- for (__i = 0; __i < _NCPUWORDS; __i++) \
- if (((c)->__bits[__i] & \
- (p)->__bits[__i]) != 0) \
- break; \
- __i != _NCPUWORDS; \
-})
-
-#define CPU_CLR_ATOMIC(n, p) \
- atomic_clear_long(&(p)->__bits[__cpuset_word(n)], __cpuset_mask(n))
-
-#define CPU_SET_ATOMIC(n, p) \
- atomic_set_long(&(p)->__bits[__cpuset_word(n)], __cpuset_mask(n))
-
-/* Convenience functions catering special cases. */
-#define CPU_OR_ATOMIC(d, s) do { \
- __size_t __i; \
- for (__i = 0; __i < _NCPUWORDS; __i++) \
- atomic_set_long(&(d)->__bits[__i], \
- (s)->__bits[__i]); \
-} while (0)
-
-#define CPU_COPY_STORE_REL(f, t) do { \
- __size_t __i; \
- for (__i = 0; __i < _NCPUWORDS; __i++) \
- atomic_store_rel_long(&(t)->__bits[__i], \
- (f)->__bits[__i]); \
-} while (0)
+#define CPU_CLR(n, p) BIT_CLR(CPU_SETSIZE, n, p)
+#define CPU_COPY(f, t) BIT_COPY(CPU_SETSIZE, f, t)
+#define CPU_ISSET(n, p) BIT_ISSET(CPU_SETSIZE, n, p)
+#define CPU_SET(n, p) BIT_SET(CPU_SETSIZE, n, p)
+#define CPU_ZERO(p) BIT_ZERO(CPU_SETSIZE, p)
+#define CPU_FILL(p) BIT_FILL(CPU_SETSIZE, p)
+#define CPU_SETOF(n, p) BIT_SETOF(CPU_SETSIZE, n, p)
+#define CPU_EMPTY(p) BIT_EMPTY(CPU_SETSIZE, p)
+#define CPU_ISFULLSET(p) BIT_ISFULLSET(CPU_SETSIZE, p)
+#define CPU_SUBSET(p, c) BIT_SUBSET(CPU_SETSIZE, p, c)
+#define CPU_OVERLAP(p, c) BIT_OVERLAP(CPU_SETSIZE, p, c)
+#define CPU_CMP(p, c) BIT_CMP(CPU_SETSIZE, p, c)
+#define CPU_OR(d, s) BIT_OR(CPU_SETSIZE, d, s)
+#define CPU_AND(d, s) BIT_AND(CPU_SETSIZE, d, s)
+#define CPU_NAND(d, s) BIT_NAND(CPU_SETSIZE, d, s)
+#define CPU_CLR_ATOMIC(n, p) BIT_CLR_ATOMIC(CPU_SETSIZE, n, p)
+#define CPU_SET_ATOMIC(n, p) BIT_SET_ATOMIC(CPU_SETSIZE, n, p)
+#define CPU_SET_ATOMIC_ACQ(n, p) BIT_SET_ATOMIC_ACQ(CPU_SETSIZE, n, p)
+#define CPU_AND_ATOMIC(n, p) BIT_AND_ATOMIC(CPU_SETSIZE, n, p)
+#define CPU_OR_ATOMIC(d, s) BIT_OR_ATOMIC(CPU_SETSIZE, d, s)
+#define CPU_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(CPU_SETSIZE, f, t)
+#define CPU_FFS(p) BIT_FFS(CPU_SETSIZE, p)
+#define CPU_COUNT(p) BIT_COUNT(CPU_SETSIZE, p)
+#define CPUSET_FSET BITSET_FSET(_NCPUWORDS)
+#define CPUSET_T_INITIALIZER BITSET_T_INITIALIZER
/*
* Valid cpulevel_t values.
@@ -107,6 +79,7 @@
#define CPU_WHICH_CPUSET 3 /* Specifies a set id. */
#define CPU_WHICH_IRQ 4 /* Specifies an irq #. */
#define CPU_WHICH_JAIL 5 /* Specifies a jail id. */
+#define CPU_WHICH_DOMAIN 6 /* Specifies a NUMA domain id. */
/*
* Reserved cpuset identifiers.
@@ -115,6 +88,8 @@
#define CPUSET_DEFAULT 0
#ifdef _KERNEL
+#include <sys/queue.h>
+
LIST_HEAD(setlist, cpuset);
/*
@@ -145,16 +120,23 @@ struct cpuset {
extern cpuset_t *cpuset_root;
struct prison;
struct proc;
+struct thread;
struct cpuset *cpuset_thread0(void);
struct cpuset *cpuset_ref(struct cpuset *);
void cpuset_rel(struct cpuset *);
int cpuset_setthread(lwpid_t id, cpuset_t *);
+int cpuset_setithread(lwpid_t id, int cpu);
int cpuset_create_root(struct prison *, struct cpuset **);
int cpuset_setproc_update_set(struct proc *, struct cpuset *);
-int cpusetobj_ffs(const cpuset_t *);
+int cpuset_which(cpuwhich_t, id_t, struct proc **,
+ struct thread **, struct cpuset **);
+
char *cpusetobj_strprint(char *, const cpuset_t *);
int cpusetobj_strscan(cpuset_t *, const char *);
+#ifdef DDB
+void ddb_display_cpuset(const cpuset_t *);
+#endif
#else
__BEGIN_DECLS
diff --git a/rtemsbsd/include/rtems/bsd/sys/lock.h b/rtemsbsd/include/rtems/bsd/sys/lock.h
index f252976c..0456d0da 100644
--- a/rtemsbsd/include/rtems/bsd/sys/lock.h
+++ b/rtemsbsd/include/rtems/bsd/sys/lock.h
@@ -175,7 +175,7 @@ struct lock_class {
#define LOCK_LOG_DESTROY(lo, flags) LOCK_LOG_INIT(lo, flags)
-#define lock_initalized(lo) ((lo)->lo_flags & LO_INITIALIZED)
+#define lock_initialized(lo) ((lo)->lo_flags & LO_INITIALIZED)
/*
* Helpful macros for quickly coming up with assertions with informative
diff --git a/rtemsbsd/include/rtems/bsd/sys/param.h b/rtemsbsd/include/rtems/bsd/sys/param.h
index 2bc6cce9..a21e94aa 100644
--- a/rtemsbsd/include/rtems/bsd/sys/param.h
+++ b/rtemsbsd/include/rtems/bsd/sys/param.h
@@ -40,6 +40,7 @@
#ifdef __rtems__
#include <sys/param.h>
+#include <sys/_bitset.h>
#endif /* __rtems__ */
#include <sys/_null.h>
@@ -61,11 +62,16 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 903000 /* Master, propagated to newvers */
+#define __FreeBSD_version 1200003 /* Master, propagated to newvers */
#ifdef _KERNEL
-#define P_OSREL_SIGSEGV 700004
-#define P_OSREL_MAP_ANON 800104
+#define P_OSREL_SIGWAIT 700000
+#define P_OSREL_SIGSEGV 700004
+#define P_OSREL_MAP_ANON 800104
+#define P_OSREL_MAP_FSTRICT 1100036
+#define P_OSREL_SHUTDOWN_ENOTCONN 1100077
+
+#define P_OSREL_MAJOR(x) ((x) / 100000)
#endif
#ifndef LOCORE
diff --git a/rtemsbsd/include/sys/_vm_domain.h b/rtemsbsd/include/sys/_vm_domain.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/sys/_vm_domain.h
diff --git a/rtemsbsd/include/sys/fail.h b/rtemsbsd/include/sys/fail.h
deleted file mode 100644
index 936ffd88..00000000
--- a/rtemsbsd/include/sys/fail.h
+++ /dev/null
@@ -1 +0,0 @@
-/* EMPTY */
diff --git a/rtemsbsd/include/sys/signalvar.h b/rtemsbsd/include/sys/signalvar.h
deleted file mode 100644
index 936ffd88..00000000
--- a/rtemsbsd/include/sys/signalvar.h
+++ /dev/null
@@ -1 +0,0 @@
-/* EMPTY */
diff --git a/rtemsbsd/include/sys/stack.h b/rtemsbsd/include/sys/stack.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/sys/stack.h
diff --git a/rtemsbsd/include/sys/stdatomic.h b/rtemsbsd/include/sys/stdatomic.h
new file mode 100644
index 00000000..2643cf37
--- /dev/null
+++ b/rtemsbsd/include/sys/stdatomic.h
@@ -0,0 +1 @@
+#include <stdatomic.h>
diff --git a/rtemsbsd/include/sys/zlib.h b/rtemsbsd/include/sys/zlib.h
new file mode 100644
index 00000000..4470a1fd
--- /dev/null
+++ b/rtemsbsd/include/sys/zlib.h
@@ -0,0 +1 @@
+#include <zlib.h>
diff --git a/rtemsbsd/include/vm/memguard.h b/rtemsbsd/include/vm/memguard.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/vm/memguard.h
diff --git a/rtemsbsd/include/vm/vm_domain.h b/rtemsbsd/include/vm/vm_domain.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/rtemsbsd/include/vm/vm_domain.h
diff --git a/rtemsbsd/include/vm/vm_map.h b/rtemsbsd/include/vm/vm_map.h
index 936ffd88..086ac13b 100644
--- a/rtemsbsd/include/vm/vm_map.h
+++ b/rtemsbsd/include/vm/vm_map.h
@@ -1 +1 @@
-/* EMPTY */
+#include <sys/sx.h>
diff --git a/rtemsbsd/local/bus_if.c b/rtemsbsd/local/bus_if.c
index 558888af..169ea7d5 100644
--- a/rtemsbsd/local/bus_if.c
+++ b/rtemsbsd/local/bus_if.c
@@ -24,8 +24,8 @@
static struct resource *
null_alloc_resource(device_t dev, device_t child,
- int type, int *rid, u_long start, u_long end,
- u_long count, u_int flags)
+ int type, int *rid, rman_res_t start, rman_res_t end,
+ rman_res_t count, u_int flags)
{
return (0);
}
@@ -47,243 +47,155 @@ null_add_child(device_t bus, int order, const char *name,
panic("bus_add_child is not implemented");
}
-struct kobj_method bus_print_child_method_default = {
- &bus_print_child_desc, (kobjop_t) bus_generic_print_child
-};
-
struct kobjop_desc bus_print_child_desc = {
- 0, &bus_print_child_method_default
-};
-
-struct kobj_method bus_probe_nomatch_method_default = {
- &bus_probe_nomatch_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_print_child_desc, (kobjop_t)bus_generic_print_child }
};
struct kobjop_desc bus_probe_nomatch_desc = {
- 0, &bus_probe_nomatch_method_default
-};
-
-struct kobj_method bus_read_ivar_method_default = {
- &bus_read_ivar_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_probe_nomatch_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_read_ivar_desc = {
- 0, &bus_read_ivar_method_default
-};
-
-struct kobj_method bus_write_ivar_method_default = {
- &bus_write_ivar_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_read_ivar_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_write_ivar_desc = {
- 0, &bus_write_ivar_method_default
-};
-
-struct kobj_method bus_child_deleted_method_default = {
- &bus_child_deleted_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_write_ivar_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_child_deleted_desc = {
- 0, &bus_child_deleted_method_default
-};
-
-struct kobj_method bus_child_detached_method_default = {
- &bus_child_detached_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_child_deleted_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_child_detached_desc = {
- 0, &bus_child_detached_method_default
-};
-
-struct kobj_method bus_driver_added_method_default = {
- &bus_driver_added_desc, (kobjop_t) bus_generic_driver_added
+ 0, { &bus_child_detached_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_driver_added_desc = {
- 0, &bus_driver_added_method_default
-};
-
-struct kobj_method bus_add_child_method_default = {
- &bus_add_child_desc, (kobjop_t) null_add_child
+ 0, { &bus_driver_added_desc, (kobjop_t)bus_generic_driver_added }
};
struct kobjop_desc bus_add_child_desc = {
- 0, &bus_add_child_method_default
+ 0, { &bus_add_child_desc, (kobjop_t)null_add_child }
};
-struct kobj_method bus_alloc_resource_method_default = {
- &bus_alloc_resource_desc, (kobjop_t) null_alloc_resource
+struct kobjop_desc bus_rescan_desc = {
+ 0, { &bus_rescan_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_alloc_resource_desc = {
- 0, &bus_alloc_resource_method_default
-};
-
-struct kobj_method bus_activate_resource_method_default = {
- &bus_activate_resource_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_alloc_resource_desc, (kobjop_t)null_alloc_resource }
};
struct kobjop_desc bus_activate_resource_desc = {
- 0, &bus_activate_resource_method_default
+ 0, { &bus_activate_resource_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method bus_deactivate_resource_method_default = {
- &bus_deactivate_resource_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc bus_map_resource_desc = {
+ 0, { &bus_map_resource_desc, (kobjop_t)bus_generic_map_resource }
};
-struct kobjop_desc bus_deactivate_resource_desc = {
- 0, &bus_deactivate_resource_method_default
+struct kobjop_desc bus_unmap_resource_desc = {
+ 0, { &bus_unmap_resource_desc, (kobjop_t)bus_generic_unmap_resource }
};
-struct kobj_method bus_adjust_resource_method_default = {
- &bus_adjust_resource_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc bus_deactivate_resource_desc = {
+ 0, { &bus_deactivate_resource_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_adjust_resource_desc = {
- 0, &bus_adjust_resource_method_default
-};
-
-struct kobj_method bus_release_resource_method_default = {
- &bus_release_resource_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_adjust_resource_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_release_resource_desc = {
- 0, &bus_release_resource_method_default
-};
-
-struct kobj_method bus_setup_intr_method_default = {
- &bus_setup_intr_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_release_resource_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_setup_intr_desc = {
- 0, &bus_setup_intr_method_default
-};
-
-struct kobj_method bus_teardown_intr_method_default = {
- &bus_teardown_intr_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_setup_intr_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_teardown_intr_desc = {
- 0, &bus_teardown_intr_method_default
-};
-
-struct kobj_method bus_set_resource_method_default = {
- &bus_set_resource_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_teardown_intr_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_set_resource_desc = {
- 0, &bus_set_resource_method_default
-};
-
-struct kobj_method bus_get_resource_method_default = {
- &bus_get_resource_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_set_resource_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_get_resource_desc = {
- 0, &bus_get_resource_method_default
-};
-
-struct kobj_method bus_delete_resource_method_default = {
- &bus_delete_resource_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_get_resource_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_delete_resource_desc = {
- 0, &bus_delete_resource_method_default
-};
-
-struct kobj_method bus_get_resource_list_method_default = {
- &bus_get_resource_list_desc, (kobjop_t) bus_generic_get_resource_list
+ 0, { &bus_delete_resource_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_get_resource_list_desc = {
- 0, &bus_get_resource_list_method_default
-};
-
-struct kobj_method bus_child_present_method_default = {
- &bus_child_present_desc, (kobjop_t) bus_generic_child_present
+ 0, { &bus_get_resource_list_desc, (kobjop_t)bus_generic_get_resource_list }
};
struct kobjop_desc bus_child_present_desc = {
- 0, &bus_child_present_method_default
-};
-
-struct kobj_method bus_child_pnpinfo_str_method_default = {
- &bus_child_pnpinfo_str_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_child_present_desc, (kobjop_t)bus_generic_child_present }
};
struct kobjop_desc bus_child_pnpinfo_str_desc = {
- 0, &bus_child_pnpinfo_str_method_default
-};
-
-struct kobj_method bus_child_location_str_method_default = {
- &bus_child_location_str_desc, (kobjop_t) kobj_error_method
+ 0, { &bus_child_pnpinfo_str_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_child_location_str_desc = {
- 0, &bus_child_location_str_method_default
-};
-
-struct kobj_method bus_bind_intr_method_default = {
- &bus_bind_intr_desc, (kobjop_t) bus_generic_bind_intr
+ 0, { &bus_child_location_str_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc bus_bind_intr_desc = {
- 0, &bus_bind_intr_method_default
-};
-
-struct kobj_method bus_config_intr_method_default = {
- &bus_config_intr_desc, (kobjop_t) bus_generic_config_intr
+ 0, { &bus_bind_intr_desc, (kobjop_t)bus_generic_bind_intr }
};
struct kobjop_desc bus_config_intr_desc = {
- 0, &bus_config_intr_method_default
-};
-
-struct kobj_method bus_describe_intr_method_default = {
- &bus_describe_intr_desc, (kobjop_t) bus_generic_describe_intr
+ 0, { &bus_config_intr_desc, (kobjop_t)bus_generic_config_intr }
};
struct kobjop_desc bus_describe_intr_desc = {
- 0, &bus_describe_intr_method_default
+ 0, { &bus_describe_intr_desc, (kobjop_t)bus_generic_describe_intr }
};
-struct kobj_method bus_hinted_child_method_default = {
- &bus_hinted_child_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc bus_hinted_child_desc = {
+ 0, { &bus_hinted_child_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc bus_hinted_child_desc = {
- 0, &bus_hinted_child_method_default
+struct kobjop_desc bus_get_dma_tag_desc = {
+ 0, { &bus_get_dma_tag_desc, (kobjop_t)bus_generic_get_dma_tag }
};
-struct kobj_method bus_get_dma_tag_method_default = {
- &bus_get_dma_tag_desc, (kobjop_t) bus_generic_get_dma_tag
+struct kobjop_desc bus_get_bus_tag_desc = {
+ 0, { &bus_get_bus_tag_desc, (kobjop_t)bus_generic_get_bus_tag }
};
-struct kobjop_desc bus_get_dma_tag_desc = {
- 0, &bus_get_dma_tag_method_default
+struct kobjop_desc bus_hint_device_unit_desc = {
+ 0, { &bus_hint_device_unit_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method bus_hint_device_unit_method_default = {
- &bus_hint_device_unit_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc bus_new_pass_desc = {
+ 0, { &bus_new_pass_desc, (kobjop_t)bus_generic_new_pass }
};
-struct kobjop_desc bus_hint_device_unit_desc = {
- 0, &bus_hint_device_unit_method_default
+struct kobjop_desc bus_remap_intr_desc = {
+ 0, { &bus_remap_intr_desc, (kobjop_t)null_remap_intr }
};
-struct kobj_method bus_new_pass_method_default = {
- &bus_new_pass_desc, (kobjop_t) bus_generic_new_pass
+struct kobjop_desc bus_suspend_child_desc = {
+ 0, { &bus_suspend_child_desc, (kobjop_t)bus_generic_suspend_child }
};
-struct kobjop_desc bus_new_pass_desc = {
- 0, &bus_new_pass_method_default
+struct kobjop_desc bus_resume_child_desc = {
+ 0, { &bus_resume_child_desc, (kobjop_t)bus_generic_resume_child }
};
-struct kobj_method bus_remap_intr_method_default = {
- &bus_remap_intr_desc, (kobjop_t) null_remap_intr
+struct kobjop_desc bus_get_domain_desc = {
+ 0, { &bus_get_domain_desc, (kobjop_t)bus_generic_get_domain }
};
-struct kobjop_desc bus_remap_intr_desc = {
- 0, &bus_remap_intr_method_default
+struct kobjop_desc bus_get_cpus_desc = {
+ 0, { &bus_get_cpus_desc, (kobjop_t)bus_generic_get_cpus }
};
diff --git a/rtemsbsd/local/device_if.c b/rtemsbsd/local/device_if.c
index 1c04d55d..20ab6221 100644
--- a/rtemsbsd/local/device_if.c
+++ b/rtemsbsd/local/device_if.c
@@ -37,70 +37,47 @@ static int null_resume(device_t dev)
static int null_quiesce(device_t dev)
{
- return EOPNOTSUPP;
+ return 0;
}
-struct kobj_method device_probe_method_default = {
- &device_probe_desc, (kobjop_t) kobj_error_method
-};
+static void * null_register(device_t dev)
+{
+ return NULL;
+}
struct kobjop_desc device_probe_desc = {
- 0, &device_probe_method_default
-};
-
-struct kobj_method device_identify_method_default = {
- &device_identify_desc, (kobjop_t) kobj_error_method
+ 0, { &device_probe_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc device_identify_desc = {
- 0, &device_identify_method_default
-};
-
-struct kobj_method device_attach_method_default = {
- &device_attach_desc, (kobjop_t) kobj_error_method
+ 0, { &device_identify_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc device_attach_desc = {
- 0, &device_attach_method_default
-};
-
-struct kobj_method device_detach_method_default = {
- &device_detach_desc, (kobjop_t) kobj_error_method
+ 0, { &device_attach_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc device_detach_desc = {
- 0, &device_detach_method_default
-};
-
-struct kobj_method device_shutdown_method_default = {
- &device_shutdown_desc, (kobjop_t) null_shutdown
+ 0, { &device_detach_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc device_shutdown_desc = {
- 0, &device_shutdown_method_default
-};
-
-struct kobj_method device_suspend_method_default = {
- &device_suspend_desc, (kobjop_t) null_suspend
+ 0, { &device_shutdown_desc, (kobjop_t)null_shutdown }
};
struct kobjop_desc device_suspend_desc = {
- 0, &device_suspend_method_default
-};
-
-struct kobj_method device_resume_method_default = {
- &device_resume_desc, (kobjop_t) null_resume
+ 0, { &device_suspend_desc, (kobjop_t)null_suspend }
};
struct kobjop_desc device_resume_desc = {
- 0, &device_resume_method_default
+ 0, { &device_resume_desc, (kobjop_t)null_resume }
};
-struct kobj_method device_quiesce_method_default = {
- &device_quiesce_desc, (kobjop_t) null_quiesce
+struct kobjop_desc device_quiesce_desc = {
+ 0, { &device_quiesce_desc, (kobjop_t)null_quiesce }
};
-struct kobjop_desc device_quiesce_desc = {
- 0, &device_quiesce_method_default
+struct kobjop_desc device_register_desc = {
+ 0, { &device_register_desc, (kobjop_t)null_register }
};
diff --git a/rtemsbsd/local/gpio_if.c b/rtemsbsd/local/gpio_if.c
new file mode 100644
index 00000000..ca15c8bd
--- /dev/null
+++ b/rtemsbsd/local/gpio_if.c
@@ -0,0 +1,88 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from source file
+ * freebsd-org/sys/dev/gpio/gpio_if.m
+ * with
+ * makeobjops.awk
+ *
+ * See the source file for legal information
+ */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/queue.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/bus.h>
+#include <sys/gpio.h>
+#include <rtems/bsd/local/gpio_if.h>
+
+
+static device_t
+gpio_default_get_bus(void)
+{
+
+ return (NULL);
+}
+
+static int
+gpio_default_map_gpios(device_t bus, phandle_t dev,
+ phandle_t gparent, int gcells, pcell_t *gpios, uint32_t *pin,
+ uint32_t *flags)
+{
+ /* Propagate up the bus hierarchy until someone handles it. */
+ if (device_get_parent(bus) != NULL)
+ return (GPIO_MAP_GPIOS(device_get_parent(bus), dev,
+ gparent, gcells, gpios, pin, flags));
+
+ /* If that fails, then assume the FreeBSD defaults. */
+ *pin = gpios[0];
+ if (gcells == 2 || gcells == 3)
+ *flags = gpios[gcells - 1];
+
+ return (0);
+}
+
+struct kobjop_desc gpio_get_bus_desc = {
+ 0, { &gpio_get_bus_desc, (kobjop_t)gpio_default_get_bus }
+};
+
+struct kobjop_desc gpio_pin_max_desc = {
+ 0, { &gpio_pin_max_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_pin_set_desc = {
+ 0, { &gpio_pin_set_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_pin_get_desc = {
+ 0, { &gpio_pin_get_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_pin_toggle_desc = {
+ 0, { &gpio_pin_toggle_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_pin_getcaps_desc = {
+ 0, { &gpio_pin_getcaps_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_pin_getflags_desc = {
+ 0, { &gpio_pin_getflags_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_pin_getname_desc = {
+ 0, { &gpio_pin_getname_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_pin_setflags_desc = {
+ 0, { &gpio_pin_setflags_desc, (kobjop_t)kobj_error_method }
+};
+
+struct kobjop_desc gpio_map_gpios_desc = {
+ 0, { &gpio_map_gpios_desc, (kobjop_t)gpio_default_map_gpios }
+};
+
diff --git a/rtemsbsd/local/if_dwc_if.c b/rtemsbsd/local/if_dwc_if.c
new file mode 100644
index 00000000..cd8b8922
--- /dev/null
+++ b/rtemsbsd/local/if_dwc_if.c
@@ -0,0 +1,52 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from source file
+ * freebsd-org/sys/dev/dwc/if_dwc_if.m
+ * with
+ * makeobjops.awk
+ *
+ * See the source file for legal information
+ */
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/queue.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <rtems/bsd/local/if_dwc_if.h>
+
+#include <dev/dwc/if_dwc.h>
+
+static int
+if_dwc_default_init(device_t dev)
+{
+ return (0);
+}
+
+static int
+if_dwc_default_mac_type(device_t dev)
+{
+ return (DWC_GMAC);
+}
+
+static int
+if_dwc_default_mii_clk(device_t dev)
+{
+ return (GMAC_MII_CLK_25_35M_DIV16);
+}
+
+struct kobjop_desc if_dwc_init_desc = {
+ 0, { &if_dwc_init_desc, (kobjop_t)if_dwc_default_init }
+};
+
+struct kobjop_desc if_dwc_mac_type_desc = {
+ 0, { &if_dwc_mac_type_desc, (kobjop_t)if_dwc_default_mac_type }
+};
+
+struct kobjop_desc if_dwc_mii_clk_desc = {
+ 0, { &if_dwc_mii_clk_desc, (kobjop_t)if_dwc_default_mii_clk }
+};
+
diff --git a/rtemsbsd/local/miibus_if.c b/rtemsbsd/local/miibus_if.c
index 433e7a67..fdb30e81 100644
--- a/rtemsbsd/local/miibus_if.c
+++ b/rtemsbsd/local/miibus_if.c
@@ -19,43 +19,23 @@
#include <sys/bus.h>
#include <rtems/bsd/local/miibus_if.h>
-struct kobj_method miibus_readreg_method_default = {
- &miibus_readreg_desc, (kobjop_t) kobj_error_method
-};
-
struct kobjop_desc miibus_readreg_desc = {
- 0, &miibus_readreg_method_default
-};
-
-struct kobj_method miibus_writereg_method_default = {
- &miibus_writereg_desc, (kobjop_t) kobj_error_method
+ 0, { &miibus_readreg_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc miibus_writereg_desc = {
- 0, &miibus_writereg_method_default
-};
-
-struct kobj_method miibus_statchg_method_default = {
- &miibus_statchg_desc, (kobjop_t) kobj_error_method
+ 0, { &miibus_writereg_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc miibus_statchg_desc = {
- 0, &miibus_statchg_method_default
-};
-
-struct kobj_method miibus_linkchg_method_default = {
- &miibus_linkchg_desc, (kobjop_t) kobj_error_method
+ 0, { &miibus_statchg_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc miibus_linkchg_desc = {
- 0, &miibus_linkchg_method_default
-};
-
-struct kobj_method miibus_mediainit_method_default = {
- &miibus_mediainit_desc, (kobjop_t) kobj_error_method
+ 0, { &miibus_linkchg_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc miibus_mediainit_desc = {
- 0, &miibus_mediainit_method_default
+ 0, { &miibus_mediainit_desc, (kobjop_t)kobj_error_method }
};
diff --git a/rtemsbsd/local/mmcbr_if.c b/rtemsbsd/local/mmcbr_if.c
index 1ce2d263..83c8f03e 100644
--- a/rtemsbsd/local/mmcbr_if.c
+++ b/rtemsbsd/local/mmcbr_if.c
@@ -22,43 +22,23 @@
#include <dev/mmc/mmcreg.h>
#include <rtems/bsd/local/mmcbr_if.h>
-struct kobj_method mmcbr_update_ios_method_default = {
- &mmcbr_update_ios_desc, (kobjop_t) kobj_error_method
-};
-
struct kobjop_desc mmcbr_update_ios_desc = {
- 0, &mmcbr_update_ios_method_default
-};
-
-struct kobj_method mmcbr_request_method_default = {
- &mmcbr_request_desc, (kobjop_t) kobj_error_method
+ 0, { &mmcbr_update_ios_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc mmcbr_request_desc = {
- 0, &mmcbr_request_method_default
-};
-
-struct kobj_method mmcbr_get_ro_method_default = {
- &mmcbr_get_ro_desc, (kobjop_t) kobj_error_method
+ 0, { &mmcbr_request_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc mmcbr_get_ro_desc = {
- 0, &mmcbr_get_ro_method_default
-};
-
-struct kobj_method mmcbr_acquire_host_method_default = {
- &mmcbr_acquire_host_desc, (kobjop_t) kobj_error_method
+ 0, { &mmcbr_get_ro_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc mmcbr_acquire_host_desc = {
- 0, &mmcbr_acquire_host_method_default
-};
-
-struct kobj_method mmcbr_release_host_method_default = {
- &mmcbr_release_host_desc, (kobjop_t) kobj_error_method
+ 0, { &mmcbr_acquire_host_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc mmcbr_release_host_desc = {
- 0, &mmcbr_release_host_method_default
+ 0, { &mmcbr_release_host_desc, (kobjop_t)kobj_error_method }
};
diff --git a/rtemsbsd/local/mmcbus_if.c b/rtemsbsd/local/mmcbus_if.c
index a757a561..fc5e6ffb 100644
--- a/rtemsbsd/local/mmcbus_if.c
+++ b/rtemsbsd/local/mmcbus_if.c
@@ -21,27 +21,15 @@
#include <dev/mmc/bridge.h>
#include <rtems/bsd/local/mmcbus_if.h>
-struct kobj_method mmcbus_wait_for_request_method_default = {
- &mmcbus_wait_for_request_desc, (kobjop_t) kobj_error_method
-};
-
struct kobjop_desc mmcbus_wait_for_request_desc = {
- 0, &mmcbus_wait_for_request_method_default
-};
-
-struct kobj_method mmcbus_acquire_bus_method_default = {
- &mmcbus_acquire_bus_desc, (kobjop_t) kobj_error_method
+ 0, { &mmcbus_wait_for_request_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc mmcbus_acquire_bus_desc = {
- 0, &mmcbus_acquire_bus_method_default
-};
-
-struct kobj_method mmcbus_release_bus_method_default = {
- &mmcbus_release_bus_desc, (kobjop_t) kobj_error_method
+ 0, { &mmcbus_acquire_bus_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc mmcbus_release_bus_desc = {
- 0, &mmcbus_release_bus_method_default
+ 0, { &mmcbus_release_bus_desc, (kobjop_t)kobj_error_method }
};
diff --git a/rtemsbsd/local/pci_if.c b/rtemsbsd/local/pci_if.c
index 7531ee15..f8ea8d96 100644
--- a/rtemsbsd/local/pci_if.c
+++ b/rtemsbsd/local/pci_if.c
@@ -12,11 +12,12 @@
* See the source file for legal information
*/
-#include <rtems/bsd/sys/param.h>
+#include <sys/param.h>
#include <sys/queue.h>
#include <sys/kernel.h>
#include <sys/kobj.h>
#include <sys/bus.h>
+#include <dev/pci/pcivar.h>
#include <rtems/bsd/local/pci_if.h>
@@ -26,147 +27,141 @@ null_msi_count(device_t dev, device_t child)
return (0);
}
-struct kobj_method pci_read_config_method_default = {
- &pci_read_config_desc, (kobjop_t) kobj_error_method
-};
+static int
+null_msix_bar(device_t dev, device_t child)
+{
+ return (-1);
+}
-struct kobjop_desc pci_read_config_desc = {
- 0, &pci_read_config_method_default
-};
+static device_t
+null_create_iov_child(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did)
+{
+ device_printf(bus, "PCI_IOV not implemented on this bus.\n");
+ return (NULL);
+}
-struct kobj_method pci_write_config_method_default = {
- &pci_write_config_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_read_config_desc = {
+ 0, { &pci_read_config_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc pci_write_config_desc = {
- 0, &pci_write_config_method_default
-};
-
-struct kobj_method pci_get_powerstate_method_default = {
- &pci_get_powerstate_desc, (kobjop_t) kobj_error_method
+ 0, { &pci_write_config_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc pci_get_powerstate_desc = {
- 0, &pci_get_powerstate_method_default
-};
-
-struct kobj_method pci_set_powerstate_method_default = {
- &pci_set_powerstate_desc, (kobjop_t) kobj_error_method
+ 0, { &pci_get_powerstate_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc pci_set_powerstate_desc = {
- 0, &pci_set_powerstate_method_default
-};
-
-struct kobj_method pci_get_vpd_ident_method_default = {
- &pci_get_vpd_ident_desc, (kobjop_t) kobj_error_method
+ 0, { &pci_set_powerstate_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc pci_get_vpd_ident_desc = {
- 0, &pci_get_vpd_ident_method_default
+ 0, { &pci_get_vpd_ident_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_get_vpd_readonly_method_default = {
- &pci_get_vpd_readonly_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_get_vpd_readonly_desc = {
+ 0, { &pci_get_vpd_readonly_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_get_vpd_readonly_desc = {
- 0, &pci_get_vpd_readonly_method_default
+struct kobjop_desc pci_enable_busmaster_desc = {
+ 0, { &pci_enable_busmaster_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_enable_busmaster_method_default = {
- &pci_enable_busmaster_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_disable_busmaster_desc = {
+ 0, { &pci_disable_busmaster_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_enable_busmaster_desc = {
- 0, &pci_enable_busmaster_method_default
+struct kobjop_desc pci_enable_io_desc = {
+ 0, { &pci_enable_io_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_disable_busmaster_method_default = {
- &pci_disable_busmaster_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_disable_io_desc = {
+ 0, { &pci_disable_io_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_disable_busmaster_desc = {
- 0, &pci_disable_busmaster_method_default
+struct kobjop_desc pci_assign_interrupt_desc = {
+ 0, { &pci_assign_interrupt_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_enable_io_method_default = {
- &pci_enable_io_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_find_cap_desc = {
+ 0, { &pci_find_cap_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_enable_io_desc = {
- 0, &pci_enable_io_method_default
+struct kobjop_desc pci_find_extcap_desc = {
+ 0, { &pci_find_extcap_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_disable_io_method_default = {
- &pci_disable_io_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_find_htcap_desc = {
+ 0, { &pci_find_htcap_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_disable_io_desc = {
- 0, &pci_disable_io_method_default
+struct kobjop_desc pci_alloc_msi_desc = {
+ 0, { &pci_alloc_msi_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_assign_interrupt_method_default = {
- &pci_assign_interrupt_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_alloc_msix_desc = {
+ 0, { &pci_alloc_msix_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_assign_interrupt_desc = {
- 0, &pci_assign_interrupt_method_default
+struct kobjop_desc pci_enable_msi_desc = {
+ 0, { &pci_enable_msi_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_find_extcap_method_default = {
- &pci_find_extcap_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_enable_msix_desc = {
+ 0, { &pci_enable_msix_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_find_extcap_desc = {
- 0, &pci_find_extcap_method_default
+struct kobjop_desc pci_disable_msi_desc = {
+ 0, { &pci_disable_msi_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_alloc_msi_method_default = {
- &pci_alloc_msi_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_remap_msix_desc = {
+ 0, { &pci_remap_msix_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_alloc_msi_desc = {
- 0, &pci_alloc_msi_method_default
+struct kobjop_desc pci_release_msi_desc = {
+ 0, { &pci_release_msi_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_alloc_msix_method_default = {
- &pci_alloc_msix_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_msi_count_desc = {
+ 0, { &pci_msi_count_desc, (kobjop_t)null_msi_count }
};
-struct kobjop_desc pci_alloc_msix_desc = {
- 0, &pci_alloc_msix_method_default
+struct kobjop_desc pci_msix_count_desc = {
+ 0, { &pci_msix_count_desc, (kobjop_t)null_msi_count }
};
-struct kobj_method pci_remap_msix_method_default = {
- &pci_remap_msix_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_msix_pba_bar_desc = {
+ 0, { &pci_msix_pba_bar_desc, (kobjop_t)null_msix_bar }
};
-struct kobjop_desc pci_remap_msix_desc = {
- 0, &pci_remap_msix_method_default
+struct kobjop_desc pci_msix_table_bar_desc = {
+ 0, { &pci_msix_table_bar_desc, (kobjop_t)null_msix_bar }
};
-struct kobj_method pci_release_msi_method_default = {
- &pci_release_msi_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pci_get_id_desc = {
+ 0, { &pci_get_id_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_release_msi_desc = {
- 0, &pci_release_msi_method_default
+struct kobjop_desc pci_alloc_devinfo_desc = {
+ 0, { &pci_alloc_devinfo_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_msi_count_method_default = {
- &pci_msi_count_desc, (kobjop_t) null_msi_count
+struct kobjop_desc pci_child_added_desc = {
+ 0, { &pci_child_added_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_msi_count_desc = {
- 0, &pci_msi_count_method_default
+struct kobjop_desc pci_iov_attach_desc = {
+ 0, { &pci_iov_attach_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pci_msix_count_method_default = {
- &pci_msix_count_desc, (kobjop_t) null_msi_count
+struct kobjop_desc pci_iov_detach_desc = {
+ 0, { &pci_iov_detach_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pci_msix_count_desc = {
- 0, &pci_msix_count_method_default
+struct kobjop_desc pci_create_iov_child_desc = {
+ 0, { &pci_create_iov_child_desc, (kobjop_t)null_create_iov_child }
};
diff --git a/rtemsbsd/local/pcib_if.c b/rtemsbsd/local/pcib_if.c
index 07d7abb5..d0c2aa89 100644
--- a/rtemsbsd/local/pcib_if.c
+++ b/rtemsbsd/local/pcib_if.c
@@ -17,7 +17,9 @@
#include <sys/kernel.h>
#include <sys/kobj.h>
#include <sys/bus.h>
+#include <sys/rman.h>
#include <dev/pci/pcivar.h>
+#include <dev/pci/pcib_private.h>
#include <rtems/bsd/local/pcib_if.h>
@@ -27,83 +29,70 @@ null_route_interrupt(device_t pcib, device_t dev, int pin)
return (PCI_INVALID_IRQ);
}
-struct kobj_method pcib_maxslots_method_default = {
- &pcib_maxslots_desc, (kobjop_t) kobj_error_method
-};
+static int
+pcib_null_ari_enabled(device_t pcib)
+{
+
+ return (0);
+}
struct kobjop_desc pcib_maxslots_desc = {
- 0, &pcib_maxslots_method_default
+ 0, { &pcib_maxslots_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pcib_read_config_method_default = {
- &pcib_read_config_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pcib_maxfuncs_desc = {
+ 0, { &pcib_maxfuncs_desc, (kobjop_t)pcib_maxfuncs }
};
struct kobjop_desc pcib_read_config_desc = {
- 0, &pcib_read_config_method_default
-};
-
-struct kobj_method pcib_write_config_method_default = {
- &pcib_write_config_desc, (kobjop_t) kobj_error_method
+ 0, { &pcib_read_config_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc pcib_write_config_desc = {
- 0, &pcib_write_config_method_default
-};
-
-struct kobj_method pcib_route_interrupt_method_default = {
- &pcib_route_interrupt_desc, (kobjop_t) null_route_interrupt
+ 0, { &pcib_write_config_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc pcib_route_interrupt_desc = {
- 0, &pcib_route_interrupt_method_default
-};
-
-struct kobj_method pcib_alloc_msi_method_default = {
- &pcib_alloc_msi_desc, (kobjop_t) kobj_error_method
+ 0, { &pcib_route_interrupt_desc, (kobjop_t)null_route_interrupt }
};
struct kobjop_desc pcib_alloc_msi_desc = {
- 0, &pcib_alloc_msi_method_default
-};
-
-struct kobj_method pcib_release_msi_method_default = {
- &pcib_release_msi_desc, (kobjop_t) kobj_error_method
+ 0, { &pcib_alloc_msi_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc pcib_release_msi_desc = {
- 0, &pcib_release_msi_method_default
+ 0, { &pcib_release_msi_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pcib_alloc_msix_method_default = {
- &pcib_alloc_msix_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pcib_alloc_msix_desc = {
+ 0, { &pcib_alloc_msix_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pcib_alloc_msix_desc = {
- 0, &pcib_alloc_msix_method_default
+struct kobjop_desc pcib_release_msix_desc = {
+ 0, { &pcib_release_msix_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pcib_release_msix_method_default = {
- &pcib_release_msix_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pcib_map_msi_desc = {
+ 0, { &pcib_map_msi_desc, (kobjop_t)kobj_error_method }
};
-struct kobjop_desc pcib_release_msix_desc = {
- 0, &pcib_release_msix_method_default
+struct kobjop_desc pcib_power_for_sleep_desc = {
+ 0, { &pcib_power_for_sleep_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pcib_map_msi_method_default = {
- &pcib_map_msi_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pcib_get_id_desc = {
+ 0, { &pcib_get_id_desc, (kobjop_t)pcib_get_id }
};
-struct kobjop_desc pcib_map_msi_desc = {
- 0, &pcib_map_msi_method_default
+struct kobjop_desc pcib_try_enable_ari_desc = {
+ 0, { &pcib_try_enable_ari_desc, (kobjop_t)kobj_error_method }
};
-struct kobj_method pcib_power_for_sleep_method_default = {
- &pcib_power_for_sleep_desc, (kobjop_t) kobj_error_method
+struct kobjop_desc pcib_ari_enabled_desc = {
+ 0, { &pcib_ari_enabled_desc, (kobjop_t)pcib_null_ari_enabled }
};
-struct kobjop_desc pcib_power_for_sleep_desc = {
- 0, &pcib_power_for_sleep_method_default
+struct kobjop_desc pcib_decode_rid_desc = {
+ 0, { &pcib_decode_rid_desc, (kobjop_t)pcib_decode_rid }
};
diff --git a/rtemsbsd/local/usb_if.c b/rtemsbsd/local/usb_if.c
index d88ab545..4f334ec8 100644
--- a/rtemsbsd/local/usb_if.c
+++ b/rtemsbsd/local/usb_if.c
@@ -19,19 +19,11 @@
#include <sys/bus.h>
#include <rtems/bsd/local/usb_if.h>
-struct kobj_method usb_handle_request_method_default = {
- &usb_handle_request_desc, (kobjop_t) kobj_error_method
-};
-
struct kobjop_desc usb_handle_request_desc = {
- 0, &usb_handle_request_method_default
-};
-
-struct kobj_method usb_take_controller_method_default = {
- &usb_take_controller_desc, (kobjop_t) kobj_error_method
+ 0, { &usb_handle_request_desc, (kobjop_t)kobj_error_method }
};
struct kobjop_desc usb_take_controller_desc = {
- 0, &usb_take_controller_method_default
+ 0, { &usb_take_controller_desc, (kobjop_t)kobj_error_method }
};
diff --git a/rtemsbsd/rtems/generate_kvm_symbols b/rtemsbsd/rtems/generate_kvm_symbols
index 8097c516..5be75664 100755
--- a/rtemsbsd/rtems/generate_kvm_symbols
+++ b/rtemsbsd/rtems/generate_kvm_symbols
@@ -8,62 +8,76 @@ while read sym
do
symbols="${symbols} ${sym}"
done <<EOF
-_ifnet
-_rtstat
-_rt_tables
-_mrtstat
-_mfchashtbl
-_viftable
-_ipxpcb_list
-_ipxstat
-_spx_istat
-_ddpstat
+_ahstat
+_arpstat
+_carpstats
+_clust_hiwm
+_clust_lowm
_ddpcb
-_ngsocklist
-_ip6stat
+_ddpstat
+_divcbinfo
+_espstat
_icmp6stat
+_icmpstat
+_ifnet
+_igmpstat
+_ip6stat
+_ipcompstat
_ipsec4stat
_ipsec6stat
-_pim6stat
-_mrt6stat
-_mf6ctable
-_mif6table
-_pfkeystat
+_ipstat
+_ipxpcb_list
+_ipxstat
_mbstat
+_mb_statpcpu
_mbtypes
+_mbuf_hiwm
+_mbuf_lowm
+_mf6ctable
+_mfchashtbl
+_mfctablesize
+_mif6table
+_mrt6stat
+_mrtstat
+_netisr_bindthreads
+_netisr_defaultqlimit
+_netisr_dispatch_policy
+_netisr_maxprot
+_netisr_maxqlimit
+_netisr_maxthreads
+_netisr_proto
+_ngsocklist
_nmbclusters
_nmbufs
-_mbuf_hiwm
-_clust_hiwm
-_smp_cpus
+_nws
+_nws_array
+_nws_count
_pagesize
-_mb_statpcpu
-_rttrash
-_mbuf_lowm
-_clust_lowm
-_carpstats
+_pfkeystat
_pfsyncstats
-_ahstat
-_espstat
-_ipcompstat
-_tcpstat
-_udpstat
-_ipstat
-_icmpstat
-_igmpstat
+_pim6stat
_pimstat
+_rip6stat
+_ripcbinfo
+_rtree
+_rtstat
+_rt_tables
+_rttrash
+_sctpstat
+_sfstat
+_smp_cpus
+_spx_istat
_tcbinfo
+_tcps_states
+_tcpstat
_udbinfo
-_divcbinfo
-_ripcbinfo
+_udpstat
_unp_count
-_unp_gencnt
_unp_dhead
+_unp_gencnt
_unp_shead
-_rip6stat
-_sctpstat
-_mfctablesize
-_arpstat
+_unp_sphead
+_viftable
EOF
cat <<EOF
@@ -85,8 +99,7 @@ cat <<EOF
EOF
for sym in ${symbols}
do
- no_underscore=`echo $sym | sed -e 's/^_//' `
- echo "extern int ${no_underscore} __attribute((weak));"
+ echo "extern int _bsd${sym} __attribute((weak));"
done
cat <<EOF
@@ -99,8 +112,7 @@ const kvm_symval_t rtems_kvm_symbols[] = {
EOF
for sym in ${symbols}
do
- no_underscore=`echo $sym | sed -e 's/^_//' `
- echo " { \"${sym}\", (uintptr_t) &${no_underscore} },"
+ echo " { \"${sym}\", (uintptr_t) &_bsd${sym} },"
done
cat <<EOF
diff --git a/rtemsbsd/rtems/rtems-kernel-init.c b/rtemsbsd/rtems/rtems-kernel-init.c
index a60024a0..d6ba174c 100644
--- a/rtemsbsd/rtems/rtems-kernel-init.c
+++ b/rtemsbsd/rtems/rtems-kernel-init.c
@@ -69,6 +69,14 @@ void mi_startup(void);
int hz;
int tick;
+sbintime_t tick_sbt;
+struct bintime bt_timethreshold;
+struct bintime bt_tickthreshold;
+sbintime_t sbt_timethreshold;
+sbintime_t sbt_tickthreshold;
+struct bintime tc_tick_bt;
+sbintime_t tc_tick_sbt;
+int tc_precexp;
int maxusers; /* base tunable */
static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL,
@@ -91,6 +99,18 @@ volatile uint32_t _Watchdog_Ticks_since_boot;
extern volatile int32_t _bsd_ticks
__attribute__ ((__alias__("_Watchdog_Ticks_since_boot")));
+__attribute__((__weak__)) void _arc4random_getentropy_fail(void)
+{
+
+}
+
+__attribute__((__weak__)) int getentropy(void *buf, size_t n)
+{
+
+ memset(buf, 0, n);
+ return (0);
+}
+
rtems_status_code
rtems_bsd_initialize(void)
{
@@ -99,6 +119,15 @@ rtems_bsd_initialize(void)
hz = (int) rtems_clock_get_ticks_per_second();
tick = 1000000 / hz;
+ tick_sbt = SBT_1S / hz;
+ FREQ2BT(hz, &tc_tick_bt);
+ tc_tick_sbt = bttosbt(tc_tick_bt);
+ tc_precexp = 31;
+ bt_timethreshold.sec = INT_MAX;
+ bt_timethreshold.frac = ~(uint64_t)0;
+ bt_tickthreshold = bt_timethreshold;
+ sbt_timethreshold = bttosbt(bt_timethreshold);
+ sbt_tickthreshold = bttosbt(bt_tickthreshold);
maxusers = 1;
maxid_maxcpus = (int) rtems_get_processor_count();
diff --git a/rtemsbsd/rtems/rtems-kernel-irqs.c b/rtemsbsd/rtems/rtems-kernel-irqs.c
index 4331c05d..41331b57 100644
--- a/rtemsbsd/rtems/rtems-kernel-irqs.c
+++ b/rtemsbsd/rtems/rtems-kernel-irqs.c
@@ -60,4 +60,4 @@ irqs_sysinit(void)
);
BSD_ASSERT(status == RTEMS_SUCCESSFUL);
}
-SYSINIT(irqs, SI_SUB_SETTINGS, SI_ORDER_ANY, irqs_sysinit, NULL);
+SYSINIT(irqs, SI_SUB_TUNABLES, SI_ORDER_ANY, irqs_sysinit, NULL);
diff --git a/rtemsbsd/rtems/rtems-kernel-jail.c b/rtemsbsd/rtems/rtems-kernel-jail.c
index 663c9348..2499846f 100644
--- a/rtemsbsd/rtems/rtems-kernel-jail.c
+++ b/rtemsbsd/rtems/rtems-kernel-jail.c
@@ -162,7 +162,7 @@ prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6)
* doesn't allow IPv4. Address passed in in NBO.
*/
int
-prison_check_ip4(struct ucred *cred, struct in_addr *ia)
+prison_check_ip4(const struct ucred *cred, const struct in_addr *ia)
{
return 0;
}
@@ -173,7 +173,7 @@ prison_check_ip4(struct ucred *cred, struct in_addr *ia)
* NOTE: RTEMS does not restrict via a jail so return 0.
*/
int
-prison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
+prison_check_ip6(const struct ucred *cred, const struct in6_addr *ia6)
{
return 0;
}
diff --git a/rtemsbsd/rtems/rtems-kernel-malloc.c b/rtemsbsd/rtems/rtems-kernel-malloc.c
index 3c9d877f..b61f2911 100644
--- a/rtemsbsd/rtems/rtems-kernel-malloc.c
+++ b/rtemsbsd/rtems/rtems-kernel-malloc.c
@@ -50,9 +50,6 @@ MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
-MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
-MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
-
MALLOC_DEFINE(M_IOV, "iov", "large iov's");
void
diff --git a/rtemsbsd/rtems/rtems-kernel-mutex.c b/rtemsbsd/rtems/rtems-kernel-mutex.c
index 84ea9e53..8e8e1cd5 100644
--- a/rtemsbsd/rtems/rtems-kernel-mutex.c
+++ b/rtemsbsd/rtems/rtems-kernel-mutex.c
@@ -146,7 +146,7 @@ _mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
}
int
-_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
+mtx_trylock_flags_(struct mtx *m, int opts, const char *file, int line)
{
return (rtems_bsd_mutex_trylock(&m->lock_object, &m->mutex));
}
diff --git a/rtemsbsd/rtems/rtems-kernel-nexus.c b/rtemsbsd/rtems/rtems-kernel-nexus.c
index a9c2427a..04638cca 100644
--- a/rtemsbsd/rtems/rtems-kernel-nexus.c
+++ b/rtemsbsd/rtems/rtems-kernel-nexus.c
@@ -121,9 +121,9 @@ nexus_probe(device_t dev)
}
static bool
-nexus_get_start(const rtems_bsd_device *nd, int type, u_long *start)
+nexus_get_start(const rtems_bsd_device *nd, int type, rman_res_t *start)
{
- u_long sr = *start;
+ u_long sr = (u_long)*start;
size_t i;
for (i = 0; i < nd->resource_count; ++i) {
@@ -141,7 +141,7 @@ nexus_get_start(const rtems_bsd_device *nd, int type, u_long *start)
static struct resource *
nexus_alloc_resource(device_t bus, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+ rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
{
struct resource *res = NULL;
struct rman *rm;
diff --git a/rtemsbsd/rtems/rtems-kernel-page.c b/rtemsbsd/rtems/rtems-kernel-page.c
index cfcbb1a0..8f8c235a 100644
--- a/rtemsbsd/rtems/rtems-kernel-page.c
+++ b/rtemsbsd/rtems/rtems-kernel-page.c
@@ -94,10 +94,12 @@ rtems_bsd_page_alloc(uintptr_t size_in_bytes, int wait)
mtx_unlock(&page_heap_mtx);
#ifdef INVARIANTS
- if (addr != NULL) {
+ wait |= M_ZERO;
+#endif
+
+ if (addr != NULL && (wait & M_ZERO) != 0) {
memset(addr, 0, size_in_bytes);
}
-#endif
return (addr);
}
diff --git a/rtemsbsd/rtems/rtems-kernel-sx.c b/rtemsbsd/rtems/rtems-kernel-sx.c
index 84523a9a..ea0cf04e 100644
--- a/rtemsbsd/rtems/rtems-kernel-sx.c
+++ b/rtemsbsd/rtems/rtems-kernel-sx.c
@@ -146,7 +146,7 @@ _sx_xlock(struct sx *sx, int opts, const char *file, int line)
}
int
-_sx_try_xlock(struct sx *sx, const char *file, int line)
+sx_try_xlock_(struct sx *sx, const char *file, int line)
{
return (rtems_bsd_mutex_trylock(&sx->lock_object, &sx->mutex));
}
@@ -158,13 +158,13 @@ _sx_xunlock(struct sx *sx, const char *file, int line)
}
int
-_sx_try_upgrade(struct sx *sx, const char *file, int line)
+sx_try_upgrade_(struct sx *sx, const char *file, int line)
{
return (1);
}
void
-_sx_downgrade(struct sx *sx, const char *file, int line)
+sx_downgrade_(struct sx *sx, const char *file, int line)
{
/* Do nothing */
}
diff --git a/rtemsbsd/rtems/rtems-legacy-rtrequest.c b/rtemsbsd/rtems/rtems-legacy-rtrequest.c
index e1269be9..2223ab09 100644
--- a/rtemsbsd/rtems/rtems-legacy-rtrequest.c
+++ b/rtemsbsd/rtems/rtems-legacy-rtrequest.c
@@ -44,7 +44,8 @@ rtems_bsdnet_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway,
{
int error;
- error = rtrequest(req, dst, gateway, netmask, flags, net_nrt);
+ error = rtrequest_fib(req, dst, gateway, netmask, flags, net_nrt,
+ BSD_DEFAULT_FIB);
if (error != 0) {
errno = error;
diff --git a/rtemsbsd/sys/dev/ffec/if_ffec_mcf548x.c b/rtemsbsd/sys/dev/ffec/if_ffec_mcf548x.c
index 875fae57..a6d55b19 100644
--- a/rtemsbsd/sys/dev/ffec/if_ffec_mcf548x.c
+++ b/rtemsbsd/sys/dev/ffec/if_ffec_mcf548x.c
@@ -1145,7 +1145,7 @@ static MCD_bufDescFec *fec_init_rx_dma(
for (bdIndex = 0; bdIndex < bdCount; ++bdIndex) {
bool bdIsLast = bdIndex == bdCount - 1;
- mbufs[bdIndex] = fec_add_mbuf(M_WAIT, ifp, &bdRing[bdIndex], bdIsLast);
+ mbufs[bdIndex] = fec_add_mbuf(M_WAITOK, ifp, &bdRing[bdIndex], bdIsLast);
}
return bdRing;
@@ -1539,7 +1539,6 @@ static int fec_attach(device_t dev)
IFQ_SET_MAXLEN(&ifp->if_snd, TX_BUF_COUNT - 1);
ifp->if_snd.ifq_drv_maxlen = TX_BUF_COUNT - 1;
IFQ_SET_READY(&ifp->if_snd);
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_header);
/*
* Attach the interface
diff --git a/rtemsbsd/sys/dev/tsec/if_tsec_nexus.c b/rtemsbsd/sys/dev/tsec/if_tsec_nexus.c
index 72dfafa1..b56a598d 100644
--- a/rtemsbsd/sys/dev/tsec/if_tsec_nexus.c
+++ b/rtemsbsd/sys/dev/tsec/if_tsec_nexus.c
@@ -152,7 +152,6 @@ tsec_fdt_attach(device_t dev)
sc->dev = dev;
/* FIXME */
- sc->phy_sc = sc;
sc->phyaddr = -1;
/* Init timer */
diff --git a/rtemsbsd/sys/dev/usb/controller/ehci_mpc83xx.c b/rtemsbsd/sys/dev/usb/controller/ehci_mpc83xx.c
index 00fbaef0..906fc742 100644
--- a/rtemsbsd/sys/dev/usb/controller/ehci_mpc83xx.c
+++ b/rtemsbsd/sys/dev/usb/controller/ehci_mpc83xx.c
@@ -254,7 +254,7 @@ ehci_mpc83xx_attach(device_t self)
);
BSD_ASSERT_SC(sc);
- e->sc_flags |= EHCI_SCFLG_SETMODE | EHCI_SCFLG_NORESTERM | EHCI_SCFLG_TT;
+ e->sc_flags |= EHCI_SCFLG_NORESTERM | EHCI_SCFLG_TT;
/* EHCI intitialization */
ue = ehci_init(e);
diff --git a/rtemsbsd/sys/fs/devfs/devfs_devs.c b/rtemsbsd/sys/fs/devfs/devfs_devs.c
index 75c9e270..85929e81 100755
--- a/rtemsbsd/sys/fs/devfs/devfs_devs.c
+++ b/rtemsbsd/sys/fs/devfs/devfs_devs.c
@@ -98,7 +98,7 @@ devfs_imfs_readv(rtems_libio_t *iop, const struct iovec *iov, int iovcnt,
struct cdev *cdev = devfs_imfs_get_context_by_iop(iop);
struct thread *td = rtems_bsd_get_curthread_or_null();
struct uio uio = {
- .uio_iov = iov,
+ .uio_iov = __DECONST(struct iovec *, iov),
.uio_iovcnt = iovcnt,
.uio_offset = 0,
.uio_resid = total,
@@ -140,7 +140,7 @@ devfs_imfs_writev(rtems_libio_t *iop, const struct iovec *iov, int iovcnt,
struct cdev *cdev = devfs_imfs_get_context_by_iop(iop);
struct thread *td = rtems_bsd_get_curthread_or_null();
struct uio uio = {
- .uio_iov = iov,
+ .uio_iov = __DECONST(struct iovec *, iov),
.uio_iovcnt = iovcnt,
.uio_offset = 0,
.uio_resid = total,
@@ -168,7 +168,7 @@ static ssize_t
devfs_imfs_write(rtems_libio_t *iop, const void *buffer, size_t count)
{
struct iovec iov = {
- .iov_base = buffer,
+ .iov_base = __DECONST(void *, buffer),
.iov_len = count
};
@@ -237,15 +237,11 @@ devfs_alloc(int flags)
{
struct cdev *cdev;
- cdev = malloc(sizeof *cdev, M_TEMP, 0);
- if (cdev == NULL)
+ cdev = malloc(sizeof *cdev, M_TEMP, M_ZERO);
+ if (cdev != NULL)
return (NULL);
- memset(cdev, 0, sizeof *cdev);
- cdev->si_name = cdev->__si_namebuf;
- memcpy(cdev->__si_pathstruct.__si_dir, rtems_cdev_directory,
- sizeof(rtems_cdev_directory) - 1);
-
+ memcpy(cdev->si_path, rtems_cdev_directory, sizeof(cdev->si_path));
return (cdev);
}
diff --git a/rtemsbsd/sys/net/if_ppp.c b/rtemsbsd/sys/net/if_ppp.c
index b93dc0db..8f8d29c6 100644
--- a/rtemsbsd/sys/net/if_ppp.c
+++ b/rtemsbsd/sys/net/if_ppp.c
@@ -107,6 +107,7 @@
#include <machine/bus.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/netisr.h>
#include <net/route.h>
@@ -918,7 +919,7 @@ pppsioctl(struct ifnet *ifp, ioctl_command_t cmd, caddr_t data)
* Packet is placed in Information field of PPP frame.
*/
int
-pppoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
+pppoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
struct route *rtp)
{
register struct ppp_softc *sc = ifp->if_softc;
@@ -987,7 +988,7 @@ pppoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
* (This assumes M_LEADINGSPACE is always 0 for a cluster mbuf.)
*/
if (M_LEADINGSPACE(m0) < PPP_HDRLEN) {
- m0 = m_prepend(m0, PPP_HDRLEN, M_DONTWAIT);
+ m0 = m_prepend(m0, PPP_HDRLEN, M_NOWAIT);
if (m0 == 0) {
error = ENOBUFS;
goto bad;
@@ -1063,9 +1064,9 @@ pppoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
} else {
ifq = (m0->m_flags & M_HIGHPRI)? &sc->sc_fastq: &ifp->if_snd;
if (_IF_QFULL(ifq) && dst->sa_family != AF_UNSPEC) {
- IFQ_INC_DROPS(ifq);
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
splx(s);
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
sc->sc_stats.ppp_oerrors++;
error = ENOBUFS;
goto bad;
@@ -1074,8 +1075,8 @@ pppoutput(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
(*sc->sc_start)(sc);
}
ifp->if_lastchange = ppp_time;
- ifp->if_opackets++;
- ifp->if_obytes += len;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
splx(s);
return (0);
@@ -1115,8 +1116,8 @@ ppp_requeue(struct ppp_softc *sc)
m->m_nextpkt = NULL;
ifq = (m->m_flags & M_HIGHPRI)? &sc->sc_fastq: &sc->sc_ifp->if_snd;
if (_IF_QFULL(ifq)) {
- IFQ_INC_DROPS(ifq);
- sc->sc_ifp->if_oerrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
sc->sc_stats.ppp_oerrors++;
} else
IF_ENQUEUE(ifq, m);
@@ -1517,13 +1518,13 @@ ppp_inproc(struct ppp_softc *sc, struct mbuf *m)
}
/* Copy the PPP and IP headers into a new mbuf. */
- MGETHDR(mp, M_DONTWAIT, MT_DATA);
+ MGETHDR(mp, M_NOWAIT, MT_DATA);
if (mp == NULL)
goto bad;
mp->m_len = 0;
mp->m_next = NULL;
if (hlen + PPP_HDRLEN > MHLEN) {
- MCLGET(mp, M_DONTWAIT);
+ MCLGET(mp, M_NOWAIT);
if (M_TRAILINGSPACE(mp) < hlen + PPP_HDRLEN) {
m_freem(mp);
goto bad; /* lose if big headers and no clusters */
@@ -1581,7 +1582,7 @@ ppp_inproc(struct ppp_softc *sc, struct mbuf *m)
* whole cluster on it.
*/
if (ilen <= MHLEN && M_IS_CLUSTER(m)) {
- MGETHDR(mp, M_DONTWAIT, MT_DATA);
+ MGETHDR(mp, M_NOWAIT, MT_DATA);
if (mp != NULL) {
m_copydata(m, 0, ilen, mtod(mp, caddr_t));
/* instead of freeing - return cluster mbuf so it can be reused */
@@ -1661,11 +1662,10 @@ ppp_inproc(struct ppp_softc *sc, struct mbuf *m)
*/
s = splimp();
if (_IF_QFULL(inq)) {
- IFQ_INC_DROPS(inq);
splx(s);
if (sc->sc_flags & SC_DEBUG)
printf("ppp%d: input queue full\n", ppp_unit(sc));
- ifp->if_iqdrops++;
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
goto bad;
}
IF_ENQUEUE(inq, m);
@@ -1674,8 +1674,8 @@ ppp_inproc(struct ppp_softc *sc, struct mbuf *m)
break;
}
- ifp->if_ipackets++;
- ifp->if_ibytes += ilen;
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, ilen);
microtime(&ppp_time);
ifp->if_lastchange = ppp_time;
@@ -1687,7 +1687,7 @@ ppp_inproc(struct ppp_softc *sc, struct mbuf *m)
bad:
m_freem(m);
- sc->sc_ifp->if_ierrors++;
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
sc->sc_stats.ppp_ierrors++;
return mf;
}
diff --git a/rtemsbsd/sys/net/if_pppvar.h b/rtemsbsd/sys/net/if_pppvar.h
index b0a5ba91..fdfb56df 100644
--- a/rtemsbsd/sys/net/if_pppvar.h
+++ b/rtemsbsd/sys/net/if_pppvar.h
@@ -131,8 +131,8 @@ struct ppp_softc {
struct ppp_softc *pppalloc(pid_t pid);
void pppdealloc(struct ppp_softc *sc);
-int pppoutput(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct route *);
+int pppoutput(struct ifnet *, struct mbuf *, const struct sockaddr *,
+ struct route *);
int pppioctl(struct ppp_softc *sc, ioctl_command_t cmd, caddr_t data,
int flag, struct proc *p);
struct mbuf *ppp_dequeue(struct ppp_softc *sc);
diff --git a/rtemsbsd/sys/net/ppp_tty.c b/rtemsbsd/sys/net/ppp_tty.c
index d1945941..71105c94 100644
--- a/rtemsbsd/sys/net/ppp_tty.c
+++ b/rtemsbsd/sys/net/ppp_tty.c
@@ -377,14 +377,14 @@ pppwrite(struct rtems_termios_tty *tty, rtems_libio_rw_args_t *rw_args)
struct mbuf **mp;
for (mp = &m0; maximum; mp = &m->m_next) {
- MGET(m, M_WAIT, MT_DATA);
+ MGET(m, M_WAITOK, MT_DATA);
if ((*mp = m) == NULL) {
m_freem(m0);
return (ENOBUFS);
}
m->m_len = 0;
if (maximum >= MCLBYTES / 2) {
- MCLGET(m, M_DONTWAIT);
+ MCLGET(m, M_NOWAIT);
}
len = M_TRAILINGSPACE(m);
if (len > maximum) {
@@ -683,11 +683,11 @@ pppgetm(struct ppp_softc *sc)
mp = &sc->sc_m;
for (len = sc->sc_mru + PPP_HDRLEN + PPP_FCSLEN; len > 0; ){
if ((m = *mp) == NULL) {
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ MGETHDR(m, M_NOWAIT, MT_DATA);
if (m == NULL)
break;
*mp = m;
- MCLGET(m, M_DONTWAIT);
+ MCLGET(m, M_NOWAIT);
}
len -= M_DATASIZE(m);
mp = &m->m_next;
@@ -708,8 +708,8 @@ pppallocmbuf(struct ppp_softc *sc, struct mbuf **mp)
m = *mp;
if ( m == NULL ) {
/* get mbuf header */
- MGETHDR(m, M_WAIT, MT_DATA);
- MCLGET(m, M_WAIT);
+ MGETHDR(m, M_WAITOK, MT_DATA);
+ MCLGET(m, M_WAITOK);
*mp = m;
}
@@ -769,7 +769,7 @@ pppinput(int c, struct rtems_termios_tty *tp)
sc->sc_flags |= SC_PKTLOST; /* note the dropped packet */
if ((sc->sc_flags & (SC_FLUSH | SC_ESCAPED)) == 0){
/* bad fcs error */
- sc->sc_ifp->if_ierrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
sc->sc_stats.ppp_ierrors++;
} else
sc->sc_flags &= ~(SC_FLUSH | SC_ESCAPED);
@@ -779,7 +779,7 @@ pppinput(int c, struct rtems_termios_tty *tp)
if (ilen < PPP_HDRLEN + PPP_FCSLEN) {
if (ilen) {
/* too short error */
- sc->sc_ifp->if_ierrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
sc->sc_stats.ppp_ierrors++;
sc->sc_flags |= SC_PKTLOST;
}
@@ -898,7 +898,7 @@ pppinput(int c, struct rtems_termios_tty *tp)
flush:
if (!(sc->sc_flags & SC_FLUSH)) {
- sc->sc_ifp->if_ierrors++;
+ if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
sc->sc_stats.ppp_ierrors++;
sc->sc_flags |= SC_FLUSH;
}
diff --git a/testsuite/netshell01/shellconfig.c b/testsuite/netshell01/shellconfig.c
index 310f1379..0798127b 100644
--- a/testsuite/netshell01/shellconfig.c
+++ b/testsuite/netshell01/shellconfig.c
@@ -10,6 +10,7 @@
#define CONFIGURE_SHELL_COMMANDS_ALL
#define CONFIGURE_SHELL_USER_COMMANDS \
+ &rtems_shell_SYSCTL_Command, \
&rtems_shell_HOSTNAME_Command, \
&rtems_shell_PING_Command, \
&rtems_shell_ROUTE_Command, \